[
  {
    "path": ".clang-format",
    "content": "---\nLanguage:        Cpp\n# BasedOnStyle:  Google\nAccessModifierOffset: -1\nAlignAfterOpenBracket: Align\nAlignArrayOfStructures: None\nAlignConsecutiveMacros: None\nAlignConsecutiveAssignments: None\nAlignConsecutiveBitFields: None\nAlignConsecutiveDeclarations: None\nAlignEscapedNewlines: Left\nAlignOperands:   Align\nAlignTrailingComments: true\nAllowAllArgumentsOnNextLine: true\nAllowAllParametersOfDeclarationOnNextLine: true\nAllowShortEnumsOnASingleLine: true\nAllowShortBlocksOnASingleLine: Never\nAllowShortCaseLabelsOnASingleLine: false\nAllowShortFunctionsOnASingleLine: All\nAllowShortLambdasOnASingleLine: Inline\nAllowShortIfStatementsOnASingleLine: WithoutElse\nAllowShortLoopsOnASingleLine: true\nAlwaysBreakAfterDefinitionReturnType: None\nAlwaysBreakAfterReturnType: None\nAlwaysBreakBeforeMultilineStrings: true\nAlwaysBreakTemplateDeclarations: Yes\nAttributeMacros:\n  - __capability\nBinPackArguments: true\nBinPackParameters: true\nBraceWrapping:\n  AfterCaseLabel:  false\n  AfterClass:      false\n  AfterControlStatement: Never\n  AfterEnum:       false\n  AfterFunction:   false\n  AfterNamespace:  false\n  AfterObjCDeclaration: false\n  AfterStruct:     false\n  AfterUnion:      false\n  AfterExternBlock: false\n  BeforeCatch:     false\n  BeforeElse:      false\n  BeforeLambdaBody: false\n  BeforeWhile:     false\n  IndentBraces:    false\n  SplitEmptyFunction: true\n  SplitEmptyRecord: true\n  SplitEmptyNamespace: true\nBreakBeforeBinaryOperators: None\nBreakBeforeConceptDeclarations: true\nBreakBeforeBraces: Attach\nBreakBeforeInheritanceComma: false\nBreakInheritanceList: BeforeColon\nBreakBeforeTernaryOperators: true\nBreakConstructorInitializersBeforeComma: false\nBreakConstructorInitializers: BeforeColon\nBreakAfterJavaFieldAnnotations: false\nBreakStringLiterals: true\nColumnLimit:     100\nCommentPragmas:  '^ IWYU pragma:'\nQualifierAlignment: Leave\nCompactNamespaces: false\nConstructorInitializerIndentWidth: 4\nContinuationIndentWidth: 4\nCpp11BracedListStyle: true\nDeriveLineEnding: true\nDerivePointerAlignment: true\nDisableFormat:   false\nEmptyLineAfterAccessModifier: Never\nEmptyLineBeforeAccessModifier: LogicalBlock\nExperimentalAutoDetectBinPacking: false\nPackConstructorInitializers: NextLine\nBasedOnStyle:    ''\nConstructorInitializerAllOnOneLineOrOnePerLine: false\nAllowAllConstructorInitializersOnNextLine: true\nFixNamespaceComments: true\nForEachMacros:\n  - foreach\n  - Q_FOREACH\n  - BOOST_FOREACH\nIfMacros:\n  - KJ_IF_MAYBE\nIncludeBlocks:   Regroup\nIncludeCategories:\n  - Regex:           '^<ext/.*\\.h>'\n    Priority:        2\n    SortPriority:    0\n    CaseSensitive:   false\n  - Regex:           '^<.*\\.h>'\n    Priority:        1\n    SortPriority:    0\n    CaseSensitive:   false\n  - Regex:           '^<.*'\n    Priority:        2\n    SortPriority:    0\n    CaseSensitive:   false\n  - Regex:           '.*'\n    Priority:        3\n    SortPriority:    0\n    CaseSensitive:   false\nIncludeIsMainRegex: '([-_](test|unittest))?$'\nIncludeIsMainSourceRegex: ''\nIndentAccessModifiers: false\nIndentCaseLabels: true\nIndentCaseBlocks: false\nIndentGotoLabels: true\nIndentPPDirectives: None\nIndentExternBlock: AfterExternBlock\nIndentRequires:  false\nIndentWidth:     2\nIndentWrappedFunctionNames: false\nInsertTrailingCommas: None\nJavaScriptQuotes: Leave\nJavaScriptWrapImports: true\nKeepEmptyLinesAtTheStartOfBlocks: false\nLambdaBodyIndentation: Signature\nMacroBlockBegin: ''\nMacroBlockEnd:   ''\nMaxEmptyLinesToKeep: 1\nNamespaceIndentation: None\nObjCBinPackProtocolList: Never\nObjCBlockIndentWidth: 2\nObjCBreakBeforeNestedBlockParam: true\nObjCSpaceAfterProperty: false\nObjCSpaceBeforeProtocolList: true\nPenaltyBreakAssignment: 2\nPenaltyBreakBeforeFirstCallParameter: 1\nPenaltyBreakComment: 300\nPenaltyBreakFirstLessLess: 120\nPenaltyBreakString: 1000\nPenaltyBreakTemplateDeclaration: 10\nPenaltyExcessCharacter: 1000000\nPenaltyReturnTypeOnItsOwnLine: 200\nPenaltyIndentedWhitespace: 0\nPointerAlignment: Left\nPPIndentWidth:   -1\nRawStringFormats:\n  - Language:        Cpp\n    Delimiters:\n      - cc\n      - CC\n      - cpp\n      - Cpp\n      - CPP\n      - 'c++'\n      - 'C++'\n    CanonicalDelimiter: ''\n    BasedOnStyle:    google\n  - Language:        TextProto\n    Delimiters:\n      - pb\n      - PB\n      - proto\n      - PROTO\n    EnclosingFunctions:\n      - EqualsProto\n      - EquivToProto\n      - PARSE_PARTIAL_TEXT_PROTO\n      - PARSE_TEST_PROTO\n      - PARSE_TEXT_PROTO\n      - ParseTextOrDie\n      - ParseTextProtoOrDie\n      - ParseTestProto\n      - ParsePartialTestProto\n    CanonicalDelimiter: pb\n    BasedOnStyle:    google\nReferenceAlignment: Pointer\nReflowComments:  false\nShortNamespaceLines: 1\nSortIncludes:    CaseSensitive\nSortJavaStaticImport: Before\nSortUsingDeclarations: true\nSpaceAfterCStyleCast: false\nSpaceAfterLogicalNot: false\nSpaceAfterTemplateKeyword: true\nSpaceBeforeAssignmentOperators: true\nSpaceBeforeCaseColon: false\nSpaceBeforeCpp11BracedList: false\nSpaceBeforeCtorInitializerColon: true\nSpaceBeforeInheritanceColon: true\nSpaceBeforeParens: ControlStatements\nSpaceAroundPointerQualifiers: Default\nSpaceBeforeRangeBasedForLoopColon: true\nSpaceInEmptyBlock: false\nSpaceInEmptyParentheses: false\nSpacesBeforeTrailingComments: 2\nSpacesInAngles:  Never\nSpacesInConditionalStatement: false\nSpacesInContainerLiterals: true\nSpacesInCStyleCastParentheses: false\nSpacesInLineCommentPrefix:\n  Minimum:         1\n  Maximum:         -1\nSpacesInParentheses: false\nSpacesInSquareBrackets: false\nSpaceBeforeSquareBrackets: false\nBitFieldColonSpacing: Both\nStandard:        Auto\nStatementAttributeLikeMacros:\n  - Q_EMIT\nStatementMacros:\n  - Q_UNUSED\n  - QT_REQUIRE_VERSION\nTabWidth:        8\nUseCRLF:         false\nUseTab:          Never\nWhitespaceSensitiveMacros:\n  - STRINGIZE\n  - PP_STRINGIZE\n  - BOOST_PP_STRINGIZE\n  - NS_SWIFT_NAME\n  - CF_SWIFT_NAME\n...\n"
  },
  {
    "path": ".clang-tidy",
    "content": "Checks: 'modernize-*,-modernize-use-nodiscard,-modernize-concat-nested-namespaces,-modernize-make-*,-modernize-use-auto,-modernize-raw-string-literal,-modernize-avoid-c-arrays,-modernize-use-trailing-return-type,google-*,-google-default-arguments,-clang-diagnostic-#pragma-messages,readability-identifier-naming'\nCheckOptions:\n  - { key: readability-identifier-naming.ClassCase,                 value: CamelCase  }\n  - { key: readability-identifier-naming.StructCase,                value: CamelCase  }\n  - { key: readability-identifier-naming.TypeAliasCase,             value: CamelCase  }\n  - { key: readability-identifier-naming.TypedefCase,               value: CamelCase  }\n  - { key: readability-identifier-naming.TypeTemplateParameterCase, value: CamelCase  }\n  - { key: readability-identifier-naming.MemberCase,                value: lower_case }\n  - { key: readability-identifier-naming.PrivateMemberSuffix,       value: '_'        }\n  - { key: readability-identifier-naming.ProtectedMemberSuffix,     value: '_'        }\n  - { key: readability-identifier-naming.EnumCase,                  value: CamelCase  }\n  - { key: readability-identifier-naming.EnumConstant,              value: CamelCase  }\n  - { key: readability-identifier-naming.EnumConstantPrefix,        value: k          }\n  - { key: readability-identifier-naming.GlobalConstantCase,        value: CamelCase  }\n  - { key: readability-identifier-naming.GlobalConstantPrefix,      value: k          }\n  - { key: readability-identifier-naming.StaticConstantCase,        value: CamelCase  }\n  - { key: readability-identifier-naming.StaticConstantPrefix,      value: k          }\n  - { key: readability-identifier-naming.ConstexprVariableCase,     value: CamelCase  }\n  - { key: readability-identifier-naming.ConstexprVariablePrefix,   value: k          }\n  - { key: readability-identifier-naming.FunctionCase,              value: CamelCase  }\n  - { key: readability-identifier-naming.NamespaceCase,             value: lower_case }\n"
  },
  {
    "path": ".editorconfig",
    "content": "root = true\n\n[*]\ncharset=utf-8\nindent_style = space\nindent_size = 2\ninsert_final_newline = true\n\n[*.py]\nindent_style = space\nindent_size = 4\n"
  },
  {
    "path": ".gitattributes",
    "content": "* text=auto\n\n*.c   text eol=lf\n*.h   text eol=lf\n*.cc  text eol=lf\n*.cuh text eol=lf\n*.cu  text eol=lf\n*.py  text eol=lf\n*.txt text eol=lf\n*.R   text eol=lf\n*.scala text eol=lf\n*.java  text eol=lf\n\n*.sh text eol=lf\n\n*.rst text eol=lf\n*.md  text eol=lf\n*.csv text eol=lf"
  },
  {
    "path": ".github/FUNDING.yml",
    "content": "open_collective: xgboost\ncustom: https://xgboost.ai/sponsors\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE.md",
    "content": "Thanks for participating in the XGBoost community! The issue tracker is used for actionable items such as feature proposals discussion, roadmaps, and bug tracking.\n\nIssues that are inactive for a period of time may get closed. We adopt this policy so that we won't lose track of actionable issues that may fall at the bottom of the pile. Feel free to reopen a new one if you feel there is an additional problem that needs attention when an old one gets closed.\n\nFor bug reports, to help the developer act on the issues, please include a description of your environment, preferably a minimum script to reproduce the problem.\n\nFor feature proposals, list clear, small actionable items so we can track the progress of the change.\n"
  },
  {
    "path": ".github/dependabot.yml",
    "content": "version: 2\nupdates:\n  - package-ecosystem: \"github-actions\"\n    directory: /\n    schedule:\n      interval: \"weekly\"\n    groups:\n      github-actions:\n        patterns:\n          - \"*\"\n"
  },
  {
    "path": ".github/lock.yml",
    "content": "# Configuration for lock-threads - https://github.com/dessant/lock-threads\n\n# Number of days of inactivity before a closed issue or pull request is locked\ndaysUntilLock: 90\n\n# Issues and pull requests with these labels will not be locked. Set to `[]` to disable\nexemptLabels:\n  - feature-request\n\n# Label to add before locking, such as `outdated`. Set to `false` to disable\nlockLabel: false\n\n# Comment to post before locking. Set to `false` to disable\nlockComment: false\n\n# Assign `resolved` as the reason for locking. Set to `false` to disable\nsetLockReason: true\n\n# Limit to only `issues` or `pulls`\n# only: issues\n\n# Optionally, specify configuration settings just for `issues` or `pulls`\n# issues:\n#   exemptLabels:\n#     - help-wanted\n#   lockLabel: outdated\n\n# pulls:\n#   daysUntilLock: 30\n\n# Repository to extend settings from\n# _extends: repo\n"
  },
  {
    "path": ".github/runs-on.yml",
    "content": "# Custom images with CUDA toolkit installed\n# See ops/packer for instructions for building the images\nimages:\n  linux-amd64:\n    platform: \"linux\"\n    arch: \"x64\"\n    owner: \"492475357299\"  # XGBooost CI\n    name: \"xgboost-ci-runs-on-linux-amd64-*\"\n  linux-arm64:\n    platform: \"linux\"\n    arch: \"arm64\"\n    owner: \"492475357299\"  # XGBooost CI\n    name: \"xgboost-ci-runs-on-linux-arm64-*\"\n  windows-amd64:\n    platform: \"windows\"\n    arch: \"x64\"\n    owner: \"492475357299\"  # XGBooost CI\n    name: \"xgboost-ci-runs-on-windows-*\"\n\nrunners:\n  linux-amd64-cpu:\n    cpu: 16\n    family: [\"c7i-flex\", \"c7i\", \"c7a\", \"c5\", \"c5a\"]\n    image: linux-amd64\n    spot: \"false\"\n  linux-amd64-gpu:\n    family: [\"g4dn.xlarge\"]\n    image: linux-amd64\n    spot: \"false\"\n  linux-amd64-mgpu:\n    family: [\"g4dn.12xlarge\"]\n    image: linux-amd64\n    spot: \"false\"\n  linux-arm64-cpu:\n    cpu: 16\n    family: [\"c6g\", \"c7g\"]\n    image: linux-arm64\n    spot: \"false\"\n  linux-arm64-gpu:\n    family: [\"g5g.xlarge\"]\n    image: linux-arm64\n    spot: \"false\"\n  windows-gpu:\n    family: [\"g4dn.2xlarge\"]\n    image: windows-amd64\n    spot: \"false\"\n  windows-cpu:\n    cpu: 32\n    family: [\"c7i-flex\", \"c7i\", \"c7a\", \"c5\", \"c5a\"]\n    image: windows-amd64\n    spot: \"false\"\n"
  },
  {
    "path": ".github/workflows/cccl_nightly.yml",
    "content": "name: Test XGBoost with latest CCCL and RMM\n\non:\n  workflow_dispatch:  # Can be manually triggered\n  schedule:\n    - cron: \"0 7 * * *\"  # Run once daily\n\npermissions:\n  contents: read\n\ndefaults:\n  run:\n    shell: bash -l {0}\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}\n  cancel-in-progress: true\n\nenv:\n  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n\njobs:\n  get-latest-cccl-version:\n    name: Query the latest version of CCCL\n    runs-on: ubuntu-latest\n    outputs:\n      cccl_version: ${{ steps.query_version_step.outputs.cccl_version }}\n    steps:\n      - uses: actions/checkout@v6.0.2\n      - id: query_version_step\n        name: Query version\n        run: |\n          source ops/pipeline/query-latest-cccl.sh\n          echo \"cccl_version=${CCCL_VERSION}\" >> \"$GITHUB_OUTPUT\"\n\n  test-latest-cccl:\n    name: Test building XGBoost with latest CCCL (RC included)\n    runs-on:\n      - runs-on=${{ github.run_id }}\n      - runner=linux-amd64-cpu\n      - tag=nightly-cccl\n    needs: get-latest-cccl-version\n    steps:\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: \"true\"\n      - name: Log into Docker registry (AWS ECR)\n        run: bash ops/pipeline/login-docker-registry.sh\n      - name: Build XGBoost with latest CCCL\n        run: |\n          bash ops/pipeline/nightly-test-cccl.sh ${{ needs.get-latest-cccl-version.outputs.cccl_version }}\n\n  get-latest-rmm-version:\n    name: Query the latest version of RMM\n    runs-on: ubuntu-latest\n    outputs:\n      rmm_version: ${{ steps.query_version_step.outputs.rmm_version }}\n    steps:\n      - uses: actions/checkout@v6.0.2\n      - id: query_version_step\n        name: Query version\n        run: |\n          source ops/pipeline/query-latest-rmm.sh\n          echo \"rmm_version=${RMM_VERSION}\" >> \"$GITHUB_OUTPUT\"\n\n  test-latest-rmm:\n    name: Test building XGBoost with latest nightly version of RMM\n    # This job uses the stable CCCL used by RMM and rest of RAPIDS\n    runs-on:\n      - runs-on=${{ github.run_id }}\n      - runner=linux-amd64-cpu\n      - tag=nightly-rmm\n    needs: get-latest-rmm-version\n    steps:\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: \"true\"\n      - name: Log into Docker registry (AWS ECR)\n        run: bash ops/pipeline/login-docker-registry.sh\n      - name: Build XGBoost with latest RMM\n        run: |\n          bash ops/pipeline/nightly-test-rmm.sh ${{ needs.get-latest-rmm-version.outputs.rmm_version }}\n"
  },
  {
    "path": ".github/workflows/freebsd.yml",
    "content": "name: FreeBSD\n\non:\n  push:\n    branches:\n      - master\n      - 'release_*'\n  pull_request:\n\npermissions:\n  contents: read # to fetch code (actions/checkout)\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}\n  cancel-in-progress: true\n\njobs:\n  test:\n    runs-on: ubuntu-latest\n    timeout-minutes: 20\n    name: A job to run test in FreeBSD\n    steps:\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: 'true'\n      - name: Test in FreeBSD\n        id: test\n        uses: vmactions/freebsd-vm@v1.4.3\n        with:\n          usesh: true\n          prepare: |\n            pkg install -y cmake git ninja googletest bash\n          run: |\n            bash ops/pipeline/test-freebsd.sh\n"
  },
  {
    "path": ".github/workflows/jvm_tests.yml",
    "content": "name: JVM Tests\n\non:\n  push:\n    branches:\n      - master\n      - 'release_*'\n  pull_request:\n\npermissions:\n  contents: read  # to fetch code (actions/checkout)\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}\n  cancel-in-progress: true\n\nenv:\n  BRANCH_NAME: >-\n    ${{ github.event.pull_request.number && 'PR-' }}${{ github.event.pull_request.number || github.ref_name }}\n\njobs:\n  ci-configure:\n    name: Configure variables for CI\n    runs-on:\n      - runs-on=${{ github.run_id }}\n      - runner=linux-amd64-cpu\n      - tag=jvm-tests-ci-configure\n    steps:\n      - name: Login to Amazon ECR\n        id: login-ecr\n        uses: aws-actions/amazon-ecr-login@v2.0.2\n        with:\n          mask-password: 'false'\n          registries: '492475357299'\n      - uses: actions/checkout@v6.0.2\n      - name: Get image tag\n        id: get-image-tag\n        run: |\n          source ops/pipeline/get-image-tag.sh\n          echo \"Using image tag $IMAGE_TAG\"\n          echo \"image_tag=$IMAGE_TAG\" >> \"$GITHUB_OUTPUT\"\n    outputs:\n      docker_registry: ${{ steps.login-ecr.outputs.registry }}\n      docker_username: ${{ steps.login-ecr.outputs.docker_username_492475357299_dkr_ecr_us_west_2_amazonaws_com }}\n      docker_password: ${{ steps.login-ecr.outputs.docker_password_492475357299_dkr_ecr_us_west_2_amazonaws_com }}\n      image_tag: ${{ steps.get-image-tag.outputs.image_tag }}\n\n  build-jvm-manylinux2014:\n    name: >-\n      Build libxgboost4j.so targeting glibc 2.17\n      (arch ${{ matrix.arch }}, runner ${{ matrix.runner }})\n    runs-on:\n      - runs-on\n      - runner=${{ matrix.runner }}\n      - run-id=${{ github.run_id }}\n      - tag=jvm-tests-build-jvm-manylinux2014-${{ matrix.arch }}\n    strategy:\n      fail-fast: false\n      matrix:\n        include:\n        - arch: aarch64\n          runner: linux-arm64-cpu\n        - arch: x86_64\n          runner: linux-amd64-cpu\n    steps:\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: \"true\"\n      - name: Log into Docker registry (AWS ECR)\n        run: bash ops/pipeline/login-docker-registry.sh\n      - run: bash ops/pipeline/build-jvm-manylinux2014.sh ${{ matrix.arch }}\n\n  build-jvm-gpu:\n    name: Build libxgboost4j.so with CUDA\n    needs: ci-configure\n    runs-on:\n      - runs-on=${{ github.run_id }}\n      - runner=linux-amd64-cpu\n      - tag=jvm-tests-build-jvm-gpu\n      - extras=s3-cache\n    container:\n      image: ${{ needs.ci-configure.outputs.docker_registry }}/xgb-ci.jvm_gpu_build:${{ needs.ci-configure.outputs.image_tag }}\n      credentials:\n        username: ${{ needs.ci-configure.outputs.docker_username }}\n        password: ${{ needs.ci-configure.outputs.docker_password }}\n    steps:\n      - uses: runs-on/action@v2\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: \"true\"\n      - uses: dmlc/xgboost-devops/actions/sccache@main\n      - name: Build libxgboost4j.so with CUDA\n        run: bash ops/pipeline/build-jvm-gpu.sh\n      - run: sccache --show-stats\n      - name: Stash files\n        run: |\n          python3 ops/pipeline/manage-artifacts.py upload \\\n            --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \\\n            --prefix cache/${{ github.run_id }}/build-jvm-gpu \\\n            lib/libxgboost4j.so\n\n  build-jvm-mac:\n    name: \"Build libxgboost4j.dylib for ${{ matrix.description }}\"\n    runs-on: ${{ matrix.runner }}\n    strategy:\n      fail-fast: false\n      matrix:\n        include:\n          - description: \"MacOS (Apple Silicon)\"\n            libname: libxgboost4j_m1.dylib\n            runner: macos-14\n          - description: \"MacOS (Intel)\"\n            libname: libxgboost4j_intel.dylib\n            runner: macos-15-intel\n    steps:\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: \"true\"\n      - uses: dmlc/xgboost-devops/actions/sccache@main\n        with:\n          cache-key-prefix: ${{ github.job }}-${{ matrix.runner }}\n      - run: bash ops/pipeline/build-jvm-macos.sh\n      - run: sccache --show-stats\n      - name: Upload libxgboost4j.dylib\n        if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')\n        run: |\n          mv -v lib/libxgboost4j.dylib ${{ matrix.libname }}\n          python3 ops/pipeline/manage-artifacts.py upload \\\n            --s3-bucket xgboost-nightly-builds \\\n            --prefix ${{ env.BRANCH_NAME }}/${{ github.sha }} --make-public \\\n            ${{ matrix.libname }}\n        env:\n          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}\n          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}\n\n  build-test-jvm-packages-linux:\n    name: Build and test JVM packages (Linux, Scala ${{ matrix.scala_version }})\n    needs: ci-configure\n    runs-on:\n      - runs-on=${{ github.run_id }}\n      - runner=linux-amd64-cpu\n      - tag=jvm-tests-build-test-jvm-packages-scala${{ matrix.scala_version }}\n    strategy:\n      fail-fast: false\n      matrix:\n        scala_version: [\"2.12\", \"2.13\"]\n    container:\n      image: ${{ needs.ci-configure.outputs.docker_registry }}/xgb-ci.jvm:${{ needs.ci-configure.outputs.image_tag }}\n      credentials:\n        username: ${{ needs.ci-configure.outputs.docker_username }}\n        password: ${{ needs.ci-configure.outputs.docker_password }}\n    env:\n      SCALA_VERSION: ${{ matrix.scala_version }}\n    steps:\n      - uses: runs-on/action@v2\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: \"true\"\n      - name: Build and test JVM packages (Scala ${{ matrix.scala_version }})\n        run: bash ops/pipeline/build-test-jvm-packages.sh\n      - name: Stash files\n        run: |\n          python3 ops/pipeline/manage-artifacts.py upload \\\n            --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \\\n            --prefix cache/${{ github.run_id }}/build-test-jvm-packages \\\n            lib/libxgboost4j.so\n        if: matrix.scala_version == '2.13'\n\n  build-test-jvm-packages-other-os:\n    name: Build and test JVM packages (${{ matrix.os }})\n    timeout-minutes: 30\n    runs-on: ${{ matrix.os }}\n    strategy:\n      fail-fast: false\n      matrix:\n        os: [windows-latest, macos-15-intel]\n    steps:\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: 'true'\n      - uses: actions/setup-java@v5\n        with:\n          distribution: 'temurin'\n          java-version: '8'\n      - uses: dmlc/xgboost-devops/actions/miniforge-setup@main\n        with:\n          environment-name: minimal\n          environment-file: ops/conda_env/minimal.yml\n      - name: Cache Maven packages\n        uses: actions/cache@v5.0.3\n        with:\n          path: ~/.m2\n          key: ${{ runner.os }}-m2-${{ hashFiles('/jvm-packages/pom.xml') }}\n          restore-keys: ${{ runner.os }}-m2-${{ hashFiles('/jvm-packages/pom.xml') }}\n      - name: Test XGBoost4J (Core) on macos\n        if: matrix.os == 'macos-15-intel'\n        run: |\n          cd jvm-packages\n          mvn test -B -pl :xgboost4j_2.12 -Duse.openmp=OFF\n      - name: Test XGBoost4J (Core) on windows\n        if: matrix.os == 'windows-latest'\n        run: |\n          cd jvm-packages\n          mvn test -B -pl :xgboost4j_2.12\n      - name: Publish artifact xgboost4j.dll to S3\n        run: |\n          python ops/pipeline/manage-artifacts.py upload `\n            --s3-bucket xgboost-nightly-builds `\n            --prefix ${{ env.BRANCH_NAME }}/${{ github.sha }} --make-public `\n            lib/xgboost4j.dll\n        if: |\n          (github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&\n          matrix.os == 'windows-latest'\n        env:\n          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}\n          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}\n\n  test-jvm-packages-gpu:\n    name: Test JVM packages with CUDA (Scala ${{ matrix.scala_version }})\n    needs: [ci-configure, build-jvm-gpu]\n    runs-on:\n      - runs-on=${{ github.run_id }}\n      - runner=linux-amd64-mgpu\n      - tag=jvm-tests-test-jvm-packages-gpu-scala${{ matrix.scala_version }}\n    strategy:\n      fail-fast: false\n      matrix:\n        scala_version: [\"2.12\", \"2.13\"]\n    container:\n      image: ${{ needs.ci-configure.outputs.docker_registry }}/xgb-ci.jvm_gpu_build:${{ needs.ci-configure.outputs.image_tag }}\n      credentials:\n        username: ${{ needs.ci-configure.outputs.docker_username }}\n        password: ${{ needs.ci-configure.outputs.docker_password }}\n      options: --gpus all --shm-size=4g --privileged\n    env:\n      SCALA_VERSION: ${{ matrix.scala_version }}\n      USE_CUDA: \"1\"\n      SKIP_NATIVE_BUILD: \"1\"\n    steps:\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: \"true\"\n      - name: Unstash files\n        run: |\n          python3 ops/pipeline/manage-artifacts.py download \\\n            --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \\\n            --prefix cache/${{ github.run_id }}/build-jvm-gpu \\\n            --dest-dir lib \\\n            libxgboost4j.so\n      - name: Test JVM packages with CUDA\n        run: bash ops/pipeline/build-test-jvm-packages.sh\n\n  deploy-jvm-packages:\n    name: Deploy JVM packages to S3 (${{ matrix.variant.name }}, Scala ${{ matrix.scala_version }})\n    needs: [ci-configure, build-jvm-gpu, build-test-jvm-packages-linux, test-jvm-packages-gpu]\n    runs-on:\n      - runs-on=${{ github.run_id }}\n      - runner=linux-amd64-cpu\n      - tag=jvm-tests-deploy-jvm-packages-${{ matrix.variant.name }}-scala${{ matrix.scala_version }}\n    strategy:\n      fail-fast: false\n      matrix:\n        variant:\n          - name: cpu\n            image_repo: xgb-ci.jvm\n            artifact_from: build-test-jvm-packages\n          - name: gpu\n            image_repo: xgb-ci.jvm_gpu_build\n            artifact_from: build-jvm-gpu\n        scala_version: ['2.12', '2.13']\n    container:\n      image: ${{ needs.ci-configure.outputs.docker_registry }}/${{ matrix.variant.image_repo }}:${{ needs.ci-configure.outputs.image_tag }}\n      credentials:\n        username: ${{ needs.ci-configure.outputs.docker_username }}\n        password: ${{ needs.ci-configure.outputs.docker_password }}\n    steps:\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: \"true\"\n      - name: Unstash files\n        run: |\n          python3 ops/pipeline/manage-artifacts.py download \\\n            --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \\\n            --prefix cache/${{ github.run_id }}/${{ matrix.variant.artifact_from }} \\\n            --dest-dir lib \\\n            libxgboost4j.so\n          ls -lh lib/libxgboost4j.so\n      - name: Deploy JVM packages to S3\n        run: bash ops/pipeline/deploy-jvm-packages.sh ${{ matrix.variant.name }} ${{ matrix.scala_version }}\n        if: github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/heads/release_')\n"
  },
  {
    "path": ".github/workflows/lint.yml",
    "content": "name: XGBoost CI (Lint)\n\non:\n  push:\n    branches:\n      - master\n      - 'release_*'\n  pull_request:\n\npermissions:\n  contents: read  # to fetch code (actions/checkout)\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}\n  cancel-in-progress: true\n\nenv:\n  BRANCH_NAME: >-\n    ${{ github.event.pull_request.number && 'PR-' }}${{ github.event.pull_request.number || github.ref_name }}\n\njobs:\n  clang-tidy:\n    name: Run clang-tidy\n    runs-on:\n      - runs-on=${{ github.run_id }}\n      - runner=linux-amd64-cpu\n      - tag=lint-clang-tidy\n    steps:\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: \"true\"\n      - name: Log into Docker registry (AWS ECR)\n        run: bash ops/pipeline/login-docker-registry.sh\n      - run: bash ops/pipeline/run-clang-tidy.sh\n\n  python-mypy:\n    runs-on: ubuntu-latest\n    name: Type checks for the Python package\n    steps:\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: 'true'\n      - uses: dmlc/xgboost-devops/actions/miniforge-setup@main\n        with:\n          environment-name: python_lint\n          environment-file: ops/conda_env/python_lint.yml\n      - name: Run mypy\n        shell: bash -el {0}\n        run: |\n          python ops/script/type_check_python.py\n"
  },
  {
    "path": ".github/workflows/main.yml",
    "content": "name: XGBoost CI\n\non:\n  push:\n    branches:\n      - master\n      - 'release_*'\n  pull_request:\n\npermissions:\n  contents: read  # to fetch code (actions/checkout)\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}\n  cancel-in-progress: true\n\nenv:\n  BRANCH_NAME: >-\n    ${{ github.event.pull_request.number && 'PR-' }}${{ github.event.pull_request.number || github.ref_name }}\n\njobs:\n  ci-configure:\n    name: Configure variables for CI\n    runs-on:\n      - runs-on=${{ github.run_id }}\n      - runner=linux-amd64-cpu\n      - tag=main-ci-configure\n    steps:\n      - name: Login to Amazon ECR\n        id: login-ecr\n        uses: aws-actions/amazon-ecr-login@v2.0.2\n        with:\n          mask-password: 'false'\n          registries: '492475357299'\n      - uses: actions/checkout@v6.0.2\n      - name: Get image tag\n        id: get-image-tag\n        run: |\n          source ops/pipeline/get-image-tag.sh\n          echo \"Using image tag $IMAGE_TAG\"\n          echo \"image_tag=$IMAGE_TAG\" >> \"$GITHUB_OUTPUT\"\n    outputs:\n      docker_registry: ${{ steps.login-ecr.outputs.registry }}\n      docker_username: ${{ steps.login-ecr.outputs.docker_username_492475357299_dkr_ecr_us_west_2_amazonaws_com }}\n      docker_password: ${{ steps.login-ecr.outputs.docker_password_492475357299_dkr_ecr_us_west_2_amazonaws_com }}\n      image_tag: ${{ steps.get-image-tag.outputs.image_tag }}\n\n  build-cpu:\n    name: Build CPU (${{ matrix.variant }})\n    needs: ci-configure\n    runs-on:\n      - runs-on=${{ github.run_id }}\n      - runner=linux-amd64-cpu\n      - tag=main-build-cpu-${{ matrix.variant }}\n    strategy:\n      fail-fast: false\n      matrix:\n        include:\n          - variant: default\n            build_suite: cpu\n            # Default build doesn't need privileged mode\n            # Using --init as harmless default (proper signal handling)\n            container_options: \"--init\"\n          - variant: sanitizer\n            build_suite: cpu-sanitizer\n            # Sanitizer needs privileged for: sysctl vm.mmap_rnd_bits=28\n            # See https://github.com/google/sanitizers/issues/1614\n            container_options: \"--privileged\"\n    container:\n      image: ${{ needs.ci-configure.outputs.docker_registry }}/xgb-ci.cpu:${{ needs.ci-configure.outputs.image_tag }}\n      credentials:\n        username: ${{ needs.ci-configure.outputs.docker_username }}\n        password: ${{ needs.ci-configure.outputs.docker_password }}\n      options: ${{ matrix.container_options }}\n    steps:\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: \"true\"\n      # Remove default build config to ensure CMake-configured header is used\n      - name: Remove default build config\n        run: rm -fv dmlc-core/include/dmlc/build_config_default.h\n      - name: Configure the system for sanitizers\n        if: matrix.variant == 'sanitizer'\n        run: |\n          echo \"ASAN_SYMBOLIZER_PATH=/usr/bin/llvm-symbolizer\" >> $GITHUB_ENV\n          echo \"ASAN_OPTIONS=symbolize=1\" >> $GITHUB_ENV\n          echo \"UBSAN_OPTIONS=print_stacktrace=1:log_path=ubsan_error.log\" >> $GITHUB_ENV\n          # Work around https://github.com/google/sanitizers/issues/1614\n          sysctl vm.mmap_rnd_bits=28\n      - uses: dmlc/xgboost-devops/actions/sccache@main\n        with:\n          cache-key-prefix: ${{ github.job }}-${{ matrix.build_suite }}\n      - name: Build and test\n        run: bash ops/pipeline/build-cpu.sh ${{ matrix.build_suite }}\n      - run: sccache --show-stats\n\n  build-cuda:\n    name: Build CUDA ${{ matrix.cuda_version }} (${{ matrix.arch }})\n    needs: ci-configure\n    runs-on:\n      - runs-on=${{ github.run_id }}\n      - runner=${{ matrix.runner }}\n      - tag=main-build-cuda${{ matrix.cuda_version }}-${{ matrix.arch }}\n      - extras=s3-cache\n    strategy:\n      fail-fast: false\n      matrix:\n        include:\n        # CUDA 12\n        - cuda_version: 12\n          arch: aarch64\n          runner: linux-arm64-cpu\n          image_repo: xgb-ci.gpu_build_rockylinux8_aarch64\n          use_rmm: 0\n          use_federated: 1\n        - cuda_version: 12\n          arch: x86_64\n          runner: linux-amd64-cpu\n          image_repo: xgb-ci.gpu_build_rockylinux8\n          use_rmm: 0\n          use_federated: 1\n        # CUDA 13\n        - cuda_version: 13\n          arch: aarch64\n          runner: linux-arm64-cpu\n          image_repo: xgb-ci.gpu_build_cuda13_rockylinux8_aarch64\n          use_rmm: 0\n          use_federated: 0\n        - cuda_version: 13\n          arch: x86_64\n          runner: linux-amd64-cpu\n          image_repo: xgb-ci.gpu_build_cuda13_rockylinux8\n          use_rmm: 0\n          use_federated: 0\n    container:\n      image: ${{ needs.ci-configure.outputs.docker_registry }}/${{ matrix.image_repo }}:${{ needs.ci-configure.outputs.image_tag }}\n      credentials:\n        username: ${{ needs.ci-configure.outputs.docker_username }}\n        password: ${{ needs.ci-configure.outputs.docker_password }}\n    steps:\n      - uses: runs-on/action@v2\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: \"true\"\n      - uses: dmlc/xgboost-devops/actions/sccache@main\n        with:\n          cache-key-prefix: ${{ github.job }}-${{ matrix.cuda_version }}\n      - run: >-\n          bash ops/pipeline/build-cuda.sh\n          --cuda-version ${{ matrix.cuda_version }}\n          --use-rmm ${{ matrix.use_rmm }}\n          --use-federated ${{ matrix.use_federated }}\n      - run: sccache --show-stats\n      - name: Stash files\n        run: |\n          python3 ops/pipeline/manage-artifacts.py upload \\\n            --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \\\n            --prefix cache/${{ github.run_id }}/build-cuda${{ matrix.cuda_version }}-${{ matrix.arch }} \\\n            build/testxgboost python-package/dist/*.whl\n\n  audit-cuda-wheel:\n    name: Audit CUDA ${{ matrix.cuda_version }} wheel for manylinux_2_28_${{ matrix.arch }}\n    needs: [ci-configure, build-cuda]\n    runs-on:\n      - runs-on=${{ github.run_id }}\n      - runner=${{ matrix.runner }}\n      - tag=main-audit-cuda${{ matrix.cuda_version }}-wheel-${{ matrix.arch }}\n    strategy:\n      fail-fast: false\n      matrix:\n        include:\n        # CUDA 12\n        - cuda_version: 12\n          cuda_variant_flag: \"\"\n          arch: aarch64\n          runner: linux-arm64-cpu\n        - cuda_version: 12\n          cuda_variant_flag: \"\"\n          arch: x86_64\n          runner: linux-amd64-cpu\n        # CUDA 13\n        - cuda_version: 13\n          cuda_variant_flag: \"--cuda-variant cuda13\"\n          arch: aarch64\n          runner: linux-arm64-cpu\n        - cuda_version: 13\n          cuda_variant_flag: \"--cuda-variant cuda13\"\n          arch: x86_64\n          runner: linux-amd64-cpu\n    container:\n      image: ${{ needs.ci-configure.outputs.docker_registry }}/xgb-ci.manylinux_2_28_${{ matrix.arch }}:${{ needs.ci-configure.outputs.image_tag }}\n      credentials:\n        username: ${{ needs.ci-configure.outputs.docker_username }}\n        password: ${{ needs.ci-configure.outputs.docker_password }}\n    steps:\n      - uses: actions/checkout@v6.0.2\n      - name: Pick Python\n        run: |\n          export PATH=/opt/python/cp310-cp310/bin/:$PATH\n          echo ${PATH} >> $GITHUB_PATH\n      - name: Install dependencies\n        run: |\n          pip install awscli wheel auditwheel pydistcheck\n      - name: Unstash raw wheel\n        run: |\n          mkdir -p python-package/dist\n          python3 ops/pipeline/manage-artifacts.py download \\\n            --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \\\n            --prefix cache/${{ github.run_id }}/build-cuda${{ matrix.cuda_version }}-${{ matrix.arch }} \\\n            --dest-dir python-package/dist \\\n            *.whl\n      - run: bash ops/pipeline/audit-cuda-wheel.sh ${{ matrix.arch }} ${{ matrix.cuda_variant_flag }}\n      - name: Stash files\n        run: |\n          python3 ops/pipeline/manage-artifacts.py upload \\\n            --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \\\n            --prefix cache/${{ github.run_id }}/audit-cuda${{ matrix.cuda_version }}-wheel-${{ matrix.arch }} \\\n            python-package/dist/*.whl\n\n  build-cuda-with-rmm:\n    name: Build CUDA with RMM\n    needs: ci-configure\n    runs-on:\n      - runs-on=${{ github.run_id }}\n      - runner=linux-amd64-cpu\n      - tag=main-build-cuda-with-rmm\n      - extras=s3-cache\n    container:\n      image: ${{ needs.ci-configure.outputs.docker_registry }}/xgb-ci.gpu_build_rockylinux8:${{ needs.ci-configure.outputs.image_tag }}\n      credentials:\n        username: ${{ needs.ci-configure.outputs.docker_username }}\n        password: ${{ needs.ci-configure.outputs.docker_password }}\n    steps:\n      - uses: runs-on/action@v2\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: \"true\"\n      - uses: dmlc/xgboost-devops/actions/sccache@main\n      - run: >-\n          bash ops/pipeline/build-cuda.sh\n          --cuda-version 12\n          --use-rmm 1\n          --use-federated 1\n      - run: sccache --show-stats\n      - name: Stash files\n        run: |\n          python3 ops/pipeline/manage-artifacts.py upload \\\n            --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \\\n            --prefix cache/${{ github.run_id }}/build-cuda-with-rmm \\\n            build/testxgboost\n\n  build-python-wheels-cpu:\n    name: Build CPU wheel (xgboost-cpu) for ${{ matrix.manylinux_target }}_${{ matrix.arch }}\n    runs-on:\n      - runs-on\n      - runner=${{ matrix.runner }}\n      - run-id=${{ github.run_id }}\n      - tag=main-build-python-wheels-cpu-${{ matrix.manylinux_target }}-${{ matrix.arch }}\n    strategy:\n      fail-fast: false\n      matrix:\n        include:\n        - manylinux_target: manylinux_2_28\n          arch: aarch64\n          runner: linux-arm64-cpu\n        - manylinux_target: manylinux_2_28\n          arch: x86_64\n          runner: linux-amd64-cpu\n    steps:\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: \"true\"\n      - name: Log into Docker registry (AWS ECR)\n        run: bash ops/pipeline/login-docker-registry.sh\n      - run: |\n          bash ops/pipeline/build-python-wheels-cpu.sh \\\n            ${{ matrix.manylinux_target }} ${{ matrix.arch }}\n\n  build-gpu-rpkg:\n    name: Build GPU-enabled R package\n    needs: ci-configure\n    runs-on:\n      - runs-on=${{ github.run_id }}\n      - runner=linux-amd64-cpu\n      - tag=main-build-gpu-rpkg\n      - extras=s3-cache\n    container:\n      image: ${{ needs.ci-configure.outputs.docker_registry }}/xgb-ci.gpu_build_r_rockylinux8:${{ needs.ci-configure.outputs.image_tag }}\n      credentials:\n        username: ${{ needs.ci-configure.outputs.docker_username }}\n        password: ${{ needs.ci-configure.outputs.docker_password }}\n    steps:\n      - uses: runs-on/action@v2\n      - name: Trust git cloning project sources\n        run: |\n          git config --global --add safe.directory \"${GITHUB_WORKSPACE}\"\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: \"true\"\n      - uses: dmlc/xgboost-devops/actions/sccache@main\n        with:\n          cache-key-prefix: ${{ github.job }}\n      - run: bash ops/pipeline/build-gpu-rpkg.sh\n      - run: sccache --show-stats\n      - name: Upload R package tarball\n        run: |\n          python3 ops/pipeline/manage-artifacts.py upload \\\n            --s3-bucket xgboost-nightly-builds \\\n            --prefix ${BRANCH_NAME}/${GITHUB_SHA} --make-public \\\n            xgboost_r_gpu_linux.tar.gz\n        if: github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/heads/release_')\n\n  test-cpp-gpu:\n    name: >-\n      Google Test (C++) CUDA ${{ matrix.cuda_version }}\n      (${{ matrix.suite }}, ${{ matrix.runner }})\n    needs: [ci-configure, build-cuda, build-cuda-with-rmm]\n    runs-on:\n      - runs-on=${{ github.run_id }}\n      - runner=${{ matrix.runner }}\n      - tag=main-test-cpp-gpu-cuda${{ matrix.cuda_version }}-${{ matrix.suite }}-${{ matrix.arch }}\n      - extras=s3-cache\n    timeout-minutes: 30\n    strategy:\n      fail-fast: false\n      matrix:\n        include:\n          # CUDA 12 tests\n          # Note: --gpus all provides GPU access; --privileged is not needed for basic GPU tests\n          - cuda_version: 12\n            suite: gpu\n            arch: x86_64\n            runner: linux-amd64-gpu\n            image_repo: xgb-ci.gpu\n            artifact_from: build-cuda12-x86_64\n            container_options: \"--gpus all --privileged\"\n            test_args: \"\"\n          - cuda_version: 12\n            suite: gpu\n            arch: aarch64\n            runner: linux-arm64-gpu\n            image_repo: xgb-ci.gpu_aarch64\n            artifact_from: build-cuda12-aarch64\n            container_options: \"--gpus all --privileged\"\n            test_args: \"\"\n          - cuda_version: 12\n            suite: gpu-rmm\n            arch: x86_64\n            runner: linux-amd64-gpu\n            image_repo: xgb-ci.gpu\n            artifact_from: build-cuda-with-rmm\n            container_options: \"--gpus all --privileged\"\n            test_args: \"--use-rmm-pool\"\n          - cuda_version: 12\n            suite: mgpu\n            arch: x86_64\n            runner: linux-amd64-mgpu\n            image_repo: xgb-ci.gpu\n            artifact_from: build-cuda12-x86_64\n            # mgpu needs --shm-size for NCCL shared memory communication\n            container_options: \"--gpus all --shm-size=4g --privileged\"\n            test_args: \"--gtest_filter=*MGPU*\"\n          # CUDA 13 tests\n          - cuda_version: 13\n            suite: gpu\n            arch: x86_64\n            runner: linux-amd64-gpu\n            image_repo: xgb-ci.gpu_build_cuda13_rockylinux8\n            artifact_from: build-cuda13-x86_64\n            container_options: \"--gpus all --privileged\"\n            test_args: \"\"\n          - cuda_version: 13\n            suite: gpu\n            arch: aarch64\n            runner: linux-arm64-gpu\n            image_repo: xgb-ci.gpu_build_cuda13_rockylinux8_aarch64\n            artifact_from: build-cuda13-aarch64\n            container_options: \"--gpus all --privileged\"\n            test_args: \"\"\n    container:\n      image: ${{ needs.ci-configure.outputs.docker_registry }}/${{ matrix.image_repo }}:${{ needs.ci-configure.outputs.image_tag }}\n      credentials:\n        username: ${{ needs.ci-configure.outputs.docker_username }}\n        password: ${{ needs.ci-configure.outputs.docker_password }}\n      options: ${{ matrix.container_options }}\n    steps:\n      - uses: runs-on/action@v2\n      - uses: actions/checkout@v6.0.2\n      - name: Unstash gtest\n        run: |\n          python3 ops/pipeline/manage-artifacts.py download \\\n            --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \\\n            --prefix cache/${{ github.run_id }}/${{ matrix.artifact_from }} \\\n            --dest-dir build \\\n            testxgboost\n          chmod +x build/testxgboost\n      - name: Run Google Tests (${{ matrix.suite }})\n        run: build/testxgboost ${{ matrix.test_args }}\n\n  test-python-wheel-gpu:\n    name: Python tests CUDA ${{ matrix.cuda_version }} (${{ matrix.description }})\n    needs: [ci-configure, audit-cuda-wheel]\n    runs-on:\n      - runs-on=${{ github.run_id }}\n      - runner=${{ matrix.runner }}\n      - tag=main-test-python-wheel-cuda${{ matrix.cuda_version }}-${{ matrix.description }}\n      - extras=s3-cache\n    timeout-minutes: 60\n    strategy:\n      fail-fast: false\n      matrix:\n        include:\n          # CUDA 12 tests\n          - cuda_version: 12\n            description: GPU-x86_64-CUDA-12\n            image_repo: xgb-ci.gpu\n            suite: gpu\n            runner: linux-amd64-gpu\n            artifact_from: audit-cuda12-wheel-x86_64\n            container_options: \"--gpus all --privileged\"\n          - cuda_version: 12\n            description: Multi-GPU-x86_64-CUDA-12\n            image_repo: xgb-ci.gpu\n            suite: mgpu\n            runner: linux-amd64-mgpu\n            artifact_from: audit-cuda12-wheel-x86_64\n            # mgpu needs --shm-size for NCCL shared memory communication\n            container_options: \"--gpus all --shm-size=4g --privileged\"\n          # CUDA 12 aarch64 tests\n          - cuda_version: 12\n            description: GPU-arm64-CUDA-12\n            image_repo: xgb-ci.gpu_aarch64\n            suite: gpu-arm64\n            runner: linux-arm64-gpu\n            artifact_from: audit-cuda12-wheel-aarch64\n            container_options: \"--gpus all --privileged\"\n          # CUDA 13 tests\n          - cuda_version: 13\n            description: GPU-x86_64-CUDA-13\n            image_repo: xgb-ci.gpu_build_cuda13_rockylinux8\n            suite: gpu\n            runner: linux-amd64-gpu\n            artifact_from: audit-cuda13-wheel-x86_64\n            container_options: \"--gpus all --privileged\"\n          - cuda_version: 13\n            description: GPU-arm64-CUDA-13\n            image_repo: xgb-ci.gpu_build_cuda13_rockylinux8_aarch64\n            suite: gpu-arm64\n            runner: linux-arm64-gpu\n            artifact_from: audit-cuda13-wheel-aarch64\n            container_options: \"--gpus all --privileged\"\n    container:\n      image: ${{ needs.ci-configure.outputs.docker_registry }}/${{ matrix.image_repo }}:${{ needs.ci-configure.outputs.image_tag }}\n      credentials:\n        username: ${{ needs.ci-configure.outputs.docker_username }}\n        password: ${{ needs.ci-configure.outputs.docker_password }}\n      options: ${{ matrix.container_options }}\n    steps:\n      - uses: runs-on/action@v2\n      - uses: actions/checkout@v6.0.2\n      - name: Unstash Python wheel\n        run: |\n          python3 ops/pipeline/manage-artifacts.py download \\\n            --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \\\n            --prefix cache/${{ github.run_id }}/${{ matrix.artifact_from }} \\\n            --dest-dir wheelhouse \\\n            *.whl\n      - name: Run Python tests (${{ matrix.description }})\n        run: >-\n          bash ops/pipeline/test-python-wheel.sh\n          --suite ${{ matrix.suite }}\n          --cuda-version ${{ matrix.cuda_version }}\n      # Train a model for cross-platform testing (only for CUDA 12 x86_64)\n      - name: Train cross-platform test model\n        if: matrix.cuda_version == 12 && matrix.suite == 'gpu'\n        shell: bash -l {0}\n        run: |\n          source activate gpu_test\n          python tests/cross-platform/test_cross_platform_model.py \\\n            --train --model-path cross_platform_model.ubj\n      - name: Upload cross-platform model artifact\n        if: matrix.cuda_version == 12 && matrix.suite == 'gpu'\n        uses: actions/upload-artifact@v7.0.0\n        with:\n          name: cross-platform-model\n          path: |\n            cross_platform_model.ubj\n            cross_platform_model.pkl\n          retention-days: 1\n\n  test-python-wheel-cpu:\n    name: Python tests CPU (${{ matrix.description }})\n    needs: [ci-configure, audit-cuda-wheel]\n    runs-on:\n      - runs-on=${{ github.run_id }}\n      - runner=${{ matrix.runner }}\n      - tag=main-test-python-wheel-cpu-${{ matrix.description }}\n      - extras=s3-cache\n    timeout-minutes: 60\n    strategy:\n      fail-fast: false\n      # Uses the wheel from cuda12 for tests.\n      matrix:\n        include:\n          - description: CPU-amd64\n            image_repo: xgb-ci.cpu\n            suite: cpu\n            runner: linux-amd64-cpu\n            artifact_from: audit-cuda12-wheel-x86_64\n            container_options: \"--init\"\n          - description: CPU-arm64\n            image_repo: xgb-ci.cpu_aarch64\n            suite: cpu-arm64\n            runner: linux-arm64-cpu\n            artifact_from: audit-cuda12-wheel-aarch64\n            container_options: \"--init\"\n    container:\n      image: ${{ needs.ci-configure.outputs.docker_registry }}/${{ matrix.image_repo }}:${{ needs.ci-configure.outputs.image_tag }}\n      credentials:\n        username: ${{ needs.ci-configure.outputs.docker_username }}\n        password: ${{ needs.ci-configure.outputs.docker_password }}\n      options: ${{ matrix.container_options }}\n    steps:\n      - uses: runs-on/action@v2\n      - uses: actions/checkout@v6.0.2\n      - name: Unstash Python wheel\n        run: |\n          python3 ops/pipeline/manage-artifacts.py download \\\n            --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \\\n            --prefix cache/${{ github.run_id }}/${{ matrix.artifact_from }} \\\n            --dest-dir wheelhouse \\\n            *.whl\n      - name: Run Python tests (${{ matrix.description }})\n        run: bash ops/pipeline/test-python-wheel.sh --suite ${{ matrix.suite }}\n\n  python-wheels-macos:\n    name: Build macOS wheel (${{ matrix.platform_id }})\n    runs-on: ${{ matrix.os }}\n    defaults:\n      run:\n        shell: bash -l {0}\n    strategy:\n      fail-fast: false\n      matrix:\n        include:\n        - os: macos-15-intel\n          platform_id: macosx_x86_64\n        - os: macos-14\n          platform_id: macosx_arm64\n    steps:\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: 'true'\n      - name: Set up homebrew\n        uses: Homebrew/actions/setup-homebrew@13341b4d5e459a98bbe0b122b12c11bf90518cc8\n      - name: Install libomp\n        run: brew install libomp\n      - uses: dmlc/xgboost-devops/actions/miniforge-setup@main\n        with:\n          environment-name: minimal\n          environment-file: ops/conda_env/minimal.yml\n      - name: Build wheels\n        run: bash ops/pipeline/build-python-wheels-macos.sh ${{ matrix.platform_id }} ${{ github.sha }}\n      - name: Verify wheel can be installed\n        run: |\n          python -m pip install -vvv wheelhouse/*.whl\n      - name: Upload wheel artifact\n        uses: actions/upload-artifact@v7.0.0\n        with:\n          name: python-wheel-${{ matrix.platform_id }}\n          path: wheelhouse/*.whl\n          retention-days: 1\n      - name: Upload Python wheel to S3\n        if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')\n        run: |\n          python ops/pipeline/manage-artifacts.py upload \\\n            --s3-bucket xgboost-nightly-builds \\\n            --prefix ${{ env.BRANCH_NAME }}/${{ github.sha }} --make-public \\\n            wheelhouse/*.whl\n        env:\n          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}\n          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}\n\n  test-cross-platform-inference:\n    name: Cross-platform inference test (macOS Apple Silicon)\n    needs: [test-python-wheel-gpu, python-wheels-macos]\n    runs-on: ${{ matrix.os }}\n    strategy:\n      fail-fast: false\n      matrix:\n        include:\n        - os: macos-15-intel\n          platform_id: macosx_x86_64\n        - os: macos-14\n          platform_id: macosx_arm64\n    defaults:\n      run:\n        shell: bash -l {0}\n    steps:\n      - uses: actions/checkout@v6.0.2\n      - uses: dmlc/xgboost-devops/actions/miniforge-setup@main\n        with:\n          environment-name: macos_test\n          environment-file: ops/conda_env/minimal.yml\n      - run: conda install scikit-learn numpy -y\n      - name: Download macOS wheel artifact\n        uses: actions/download-artifact@v8.0.1\n        with:\n          name: python-wheel-${{ matrix.platform_id }}\n          path: wheelhouse\n      - name: Install XGBoost wheel\n        run: |\n          python -m pip install -v wheelhouse/*.whl\n      - name: Download cross-platform model artifact\n        uses: actions/download-artifact@v8.0.1\n        with:\n          name: cross-platform-model\n          path: .\n      - name: Run cross-platform inference test\n        run: |\n          python tests/cross-platform/test_cross_platform_model.py \\\n            --inference --model-path cross_platform_model.ubj\n"
  },
  {
    "path": ".github/workflows/misc.yml",
    "content": "name: Miscellaneous\n\non:\n  push:\n    branches:\n      - master\n      - 'release_*'\n  pull_request:\n\npermissions:\n  contents: read  # to fetch code (actions/checkout)\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}\n  cancel-in-progress: true\n\nenv:\n  BRANCH_NAME: >-\n    ${{ github.event.pull_request.number && 'PR-' }}${{ github.event.pull_request.number || github.ref_name }}\n\njobs:\n  ci-configure:\n    name: Configure variables for CI\n    runs-on:\n      - runs-on=${{ github.run_id }}\n      - runner=linux-amd64-cpu\n      - tag=misc-ci-configure\n    steps:\n      - name: Login to Amazon ECR\n        id: login-ecr\n        uses: aws-actions/amazon-ecr-login@v2.0.2\n        with:\n          mask-password: 'false'\n          registries: '492475357299'\n      - uses: actions/checkout@v6.0.2\n      - name: Get image tag\n        id: get-image-tag\n        run: |\n          source ops/pipeline/get-image-tag.sh\n          echo \"Using image tag $IMAGE_TAG\"\n          echo \"image_tag=$IMAGE_TAG\" >> \"$GITHUB_OUTPUT\"\n    outputs:\n      docker_registry: ${{ steps.login-ecr.outputs.registry }}\n      docker_username: ${{ steps.login-ecr.outputs.docker_username_492475357299_dkr_ecr_us_west_2_amazonaws_com }}\n      docker_password: ${{ steps.login-ecr.outputs.docker_password_492475357299_dkr_ecr_us_west_2_amazonaws_com }}\n      image_tag: ${{ steps.get-image-tag.outputs.image_tag }}\n\n  gtest-cpu-nonomp:\n    name: Test Google C++ unittest (CPU Non-OMP)\n    runs-on: ubuntu-latest\n    defaults:\n      run:\n        shell: bash -l {0}\n    steps:\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: 'true'\n      - uses: dmlc/xgboost-devops/actions/miniforge-setup@main\n        with:\n          environment-name: cpp_test\n          environment-file: ops/conda_env/cpp_test.yml\n      - uses: dmlc/xgboost-devops/actions/sccache@main\n      - name: Build and test XGBoost\n        run: bash ops/pipeline/build-cpu.sh cpu-nonomp\n      - run: sccache --show-stats\n\n  c-api-demo:\n    name: Test installing XGBoost lib + building the C API demo\n    runs-on: ubuntu-latest\n    defaults:\n      run:\n        shell: bash -l {0}\n    steps:\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: 'true'\n      - uses: dmlc/xgboost-devops/actions/miniforge-setup@main\n        with:\n          environment-name: cpp_test\n          environment-file: ops/conda_env/cpp_test.yml\n      - uses: dmlc/xgboost-devops/actions/sccache@main\n      - name: Build and run C API demo with shared\n        run: bash ops/pipeline/test-c-api-demo.sh\n\n  build-i386:\n    name: Build 32-bit (i386)\n    runs-on:\n      - runs-on=${{ github.run_id }}\n      - runner=linux-amd64-cpu\n      - tag=misc-build-i386\n    steps:\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: \"true\"\n      - name: Log into Docker registry (AWS ECR)\n        run: bash ops/pipeline/login-docker-registry.sh\n      - run: bash ops/pipeline/test-cpp-i386.sh\n\n  build-jvm-docs:\n    name: Build docs for JVM packages\n    needs: ci-configure\n    runs-on:\n      - runs-on=${{ github.run_id }}\n      - runner=linux-amd64-cpu\n      - tag=misc-build-jvm-docs\n    container:\n      image: ${{ needs.ci-configure.outputs.docker_registry }}/xgb-ci.jvm_gpu_build:${{ needs.ci-configure.outputs.image_tag }}\n      credentials:\n        username: ${{ needs.ci-configure.outputs.docker_username }}\n        password: ${{ needs.ci-configure.outputs.docker_password }}\n    steps:\n      - uses: runs-on/action@v2\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: \"true\"\n      - uses: dmlc/xgboost-devops/actions/sccache@main\n      - name: Build libxgboost4j.so with CUDA\n        run: bash ops/pipeline/build-jvm-gpu.sh\n      - run: sccache --show-stats\n      - name: Build JVM docs\n        run: bash ops/pipeline/build-jvm-doc.sh\n      - name: Upload JVM doc\n        run: |\n          python3 ops/pipeline/manage-artifacts.py upload \\\n            --s3-bucket xgboost-docs \\\n            --prefix ${BRANCH_NAME}/${{ github.event.pull_request.head.sha || github.sha }} --make-public \\\n            jvm-packages/${BRANCH_NAME}.tar.bz2\n\n  build-r-docs:\n    name: Build docs for the R package\n    needs: ci-configure\n    runs-on:\n      - runs-on=${{ github.run_id }}\n      - runner=linux-amd64-cpu\n      - tag=misc-build-r-docs\n    container:\n      image: ${{ needs.ci-configure.outputs.docker_registry }}/xgb-ci.cpu_build_r_doc:${{ needs.ci-configure.outputs.image_tag }}\n      credentials:\n        username: ${{ needs.ci-configure.outputs.docker_username }}\n        password: ${{ needs.ci-configure.outputs.docker_password }}\n    steps:\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: \"true\"\n      - uses: dmlc/xgboost-devops/actions/sccache@main\n      - run: bash ops/script/setup_r_sccache.sh\n      - name: Build R docs\n        run: bash ops/pipeline/build-r-docs.sh\n      - name: Upload R doc\n        run: |\n          python3 ops/pipeline/manage-artifacts.py upload \\\n            --s3-bucket xgboost-docs \\\n            --prefix ${BRANCH_NAME}/${{ github.event.pull_request.head.sha || github.sha }} --make-public \\\n            r-docs-${BRANCH_NAME}.tar.bz2\n\n  trigger-rtd-build:\n    name: Trigger Read The Docs build\n    needs: [build-jvm-docs, build-r-docs]\n    runs-on:\n      - runs-on=${{ github.run_id }}\n      - runner=linux-amd64-cpu\n      - tag=misc-trigger-rtd-build\n    steps:\n      - uses: actions/checkout@v6.0.2\n      - name: Trigger RTD\n        run: bash ops/pipeline/trigger-rtd.sh\n"
  },
  {
    "path": ".github/workflows/pre-commit.yml",
    "content": "name: XGBoost CI (Pre-commit)\n\non:\n  push:\n    branches:\n      - master\n      - 'release_*'\n  pull_request:\n\npermissions:\n  contents: read\n\njobs:\n  pre-commit:\n    name: Run pre-commit\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v6.0.2\n        with:\n          fetch-depth: 0\n      - uses: actions/setup-python@v6.2.0\n        with:\n          python-version: \"3.11\"\n      - name: Install pre-commit\n        run: python -m pip install pre-commit\n      - name: Run pre-commit on updated files\n        shell: bash\n        run: |\n          if [ \"${{ github.event_name }}\" = \"pull_request\" ]; then\n            FROM_REF=\"${{ github.event.pull_request.base.sha }}\"\n            TO_REF=\"${{ github.event.pull_request.head.sha }}\"\n          else\n            FROM_REF=\"${{ github.event.before }}\"\n            TO_REF=\"${{ github.sha }}\"\n          fi\n\n          if [ \"${FROM_REF}\" = \"0000000000000000000000000000000000000000\" ]; then\n            FROM_REF=\"$(git hash-object -t tree /dev/null)\"\n          fi\n\n          pre-commit run --from-ref \"${FROM_REF}\" --to-ref \"${TO_REF}\" --show-diff-on-failure\n"
  },
  {
    "path": ".github/workflows/python_tests.yml",
    "content": "name: Python tests\n\non:\n  push:\n    branches:\n      - master\n      - 'release_*'\n  pull_request:\n\npermissions:\n  contents: read # to fetch code (actions/checkout)\n\ndefaults:\n  run:\n    shell: bash -l {0}\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}\n  cancel-in-progress: true\n\njobs:\n  python-sdist-test:\n    runs-on: ${{ matrix.os }}\n    name: Test installing Python XGBoost from the source distribution (${{ matrix.os }})\n    strategy:\n      fail-fast: false\n      matrix:\n        os: [macos-15-intel, windows-latest, ubuntu-latest]\n    steps:\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: 'true'\n      - uses: dmlc/xgboost-devops/actions/miniforge-setup@main\n        with:\n          environment-name: sdist_test\n          environment-file: ops/conda_env/sdist_test.yml\n      - name: Install extra package for MacOS\n        run: |\n          mamba install -c conda-forge llvm-openmp\n        if: matrix.os == 'macos-15-intel'\n      - name: Build and install XGBoost\n        run: bash ops/pipeline/test-python-sdist.sh\n\n  python-tests-on-macos:\n    name: Test XGBoost Python package on macos-15-intel\n    runs-on: macos-15-intel\n    timeout-minutes: 60\n    steps:\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: 'true'\n      - uses: dmlc/xgboost-devops/actions/miniforge-setup@main\n        with:\n          environment-name: macos_cpu_test\n          environment-file: ops/conda_env/macos_cpu_test.yml\n      - run: bash ops/pipeline/test-python-macos.sh\n\n  python-system-installation-on-ubuntu:\n    name: Test XGBoost Python package System Installation on Ubuntu\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: 'true'\n      - name: Set up Python 3.10\n        uses: actions/setup-python@v6.2.0\n        with:\n          python-version: \"3.10\"\n      - run: bash ops/pipeline/test-python-with-sysprefix.sh\n"
  },
  {
    "path": ".github/workflows/python_wheels_variants.yml",
    "content": "name: Build Python wheels using Wheel Variant prototype (WheelNext)\n\non:\n  push:\n    branches:\n      - master\n      - 'release_*'\n  pull_request:\n\npermissions:\n  contents: read  # to fetch code (actions/checkout)\n\ndefaults:\n  run:\n    shell: bash -l {0}\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}\n  cancel-in-progress: true\n\nenv:\n  BRANCH_NAME: >-\n    ${{ github.event.pull_request.number && 'PR-' }}${{ github.event.pull_request.number || github.ref_name }}\n\njobs:\n  ecr-login:\n    name: Login to AWS ECR\n    runs-on:\n      - runs-on=${{ github.run_id }}\n      - runner=linux-amd64-cpu\n      - tag=python-wheels-variants-ecr-login\n    steps:\n      - name: Login to Amazon ECR\n        id: login-ecr\n        uses: aws-actions/amazon-ecr-login@v2.0.2\n        with:\n          mask-password: 'false'\n          registries: '492475357299'\n    outputs:\n      docker_registry: ${{ steps.login-ecr.outputs.registry }}\n      docker_username: ${{ steps.login-ecr.outputs.docker_username_492475357299_dkr_ecr_us_west_2_amazonaws_com }}\n      docker_password: ${{ steps.login-ecr.outputs.docker_password_492475357299_dkr_ecr_us_west_2_amazonaws_com }}\n\n  build-variant-wheels:\n    name: Build raw wheel for variant\n    needs: ecr-login\n    runs-on:\n      - runs-on=${{ github.run_id }}\n      - runner=linux-amd64-cpu\n      - tag=python-wheels-variants-build\n    container:\n      image: ${{ needs.ecr-login.outputs.docker_registry }}/xgb-ci.gpu_build_rockylinux8:main\n      credentials:\n        username: ${{ needs.ecr-login.outputs.docker_username }}\n        password: ${{ needs.ecr-login.outputs.docker_password }}\n    steps:\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: \"true\"\n      - uses: dmlc/xgboost-devops/actions/sccache@main\n        with:\n          cache-key-prefix: build-variant-wheels\n      - run: bash ops/pipeline/build-variant-wheels.sh\n      - run: sccache --show-stats\n      - name: Stash files\n        run: |\n          python3 ops/pipeline/manage-artifacts.py upload \\\n            --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \\\n            --prefix cache/${{ github.run_id }}/build-variant-wheels \\\n            python-package/dist/*.whl\n\n  audit-variant-wheel:\n    name: Audit variant wheel for manylinux_2_28_x86_64\n    needs: [ecr-login, build-variant-wheels]\n    runs-on:\n      - runs-on=${{ github.run_id }}\n      - runner=linux-amd64-cpu\n      - tag=python-wheels-variants-audit\n    container:\n      image: ${{ needs.ecr-login.outputs.docker_registry }}/xgb-ci.manylinux_2_28_x86_64:main\n      credentials:\n        username: ${{ needs.ecr-login.outputs.docker_username }}\n        password: ${{ needs.ecr-login.outputs.docker_password }}\n    steps:\n      - uses: actions/checkout@v6.0.2\n      - name: Pick Python\n        run: |\n          export PATH=/opt/python/cp310-cp310/bin/:$PATH\n          echo ${PATH} >> $GITHUB_PATH\n      - name: Install dependencies\n        run: |\n          pip install awscli wheel auditwheel pydistcheck\n      - name: Unstash raw wheel\n        run: |\n          mkdir -p python-package/dist\n          python3 ops/pipeline/manage-artifacts.py download \\\n            --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \\\n            --prefix cache/${{ github.run_id }}/build-variant-wheels \\\n            --dest-dir python-package/dist \\\n            *.whl\n      - name: Audit wheel\n        run: |\n          WHEEL_TAG=manylinux_2_28_x86_64\n          echo \"--- Audit binary wheel to ensure it's compliant with ${WHEEL_TAG} standard\"\n          auditwheel repair --only-plat --plat ${WHEEL_TAG} python-package/dist/*.whl\n          python3 -m wheel tags --python-tag py3 --abi-tag none --platform ${WHEEL_TAG} --remove \\\n            wheelhouse/*.whl\n          mv -v wheelhouse/*.whl python-package/dist/\n      - name: Stash files\n        run: |\n          python3 ops/pipeline/manage-artifacts.py upload \\\n            --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \\\n            --prefix cache/${{ github.run_id }}/audit-variant-wheel \\\n            python-package/dist/*.whl\n\n  convert-variant-wheel:\n    name: Convert to variant wheel (WheelNext)\n    needs: [ecr-login, audit-variant-wheel]\n    runs-on:\n      - runs-on=${{ github.run_id }}\n      - runner=linux-amd64-cpu\n      - tag=python-wheels-variants-convert\n    container:\n      image: ${{ needs.ecr-login.outputs.docker_registry }}/xgb-ci.gpu_build_rockylinux8:main\n      credentials:\n        username: ${{ needs.ecr-login.outputs.docker_username }}\n        password: ${{ needs.ecr-login.outputs.docker_password }}\n    steps:\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: \"true\"\n      - name: Unstash audited wheel\n        run: |\n          mkdir -p python-package/dist\n          python3 ops/pipeline/manage-artifacts.py download \\\n            --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \\\n            --prefix cache/${{ github.run_id }}/audit-variant-wheel \\\n            --dest-dir python-package/dist \\\n            *.whl\n      - name: Convert to variant wheel\n        run: bash ops/pipeline/build-variant-wheels-impl.sh\n"
  },
  {
    "path": ".github/workflows/python_wheels_winarm64.yml",
    "content": "name: Build Python wheels targeting Windows ARM64\n\non:\n  push:\n    branches:\n      - master\n      - 'release_*'\n  pull_request:\n\npermissions:\n  contents: read  # to fetch code (actions/checkout)\n\ndefaults:\n  run:\n    shell: pwsh\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}\n  cancel-in-progress: true\n\nenv:\n  BRANCH_NAME: >-\n    ${{ github.event.pull_request.number && 'PR-' }}${{ github.event.pull_request.number || github.ref_name }}\n\njobs:\n  python-wheels-Win-ARM64:\n    name: Build wheel for Windows ARM64\n    runs-on: windows-11-arm\n    steps:\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: 'true'\n\n      - name: Setup Python\n        uses: actions/setup-python@v6.2.0\n        with:\n          python-version: '3.11'\n\n      - name: Install build dependencies\n        run: |\n          python -m pip install --upgrade pip\n          python -m pip install wheel setuptools awscli packaging\n\n      - uses: dmlc/xgboost-devops/actions/msvc-dev-env@main\n      - uses: dmlc/xgboost-devops/actions/sccache@main\n\n      - name: Build XGBoost for Win-ARM64\n        run: |\n          mkdir build\n          cd build\n          cmake .. -G\"Ninja\" -DCMAKE_BUILD_TYPE=Release `\n            -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache\n          cmake --build . -v\n      - run: sccache --show-stats\n\n      - name: Build Python wheel xgboost for Win-ARM64\n        run: |\n          cd python-package\n          mkdir -p wheelhouse\n          pip wheel --no-deps -v . --wheel-dir wheelhouse/\n          $wheelFile = Get-ChildItem wheelhouse/*.whl | Select-Object -First 1 -ExpandProperty FullName\n          python -m wheel tags --python-tag py3 --abi-tag none --platform win_arm64 --remove $wheelFile\n\n      - name: Upload Python wheel xgboost\n        if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')\n        run: |\n          $wheelFile = Get-ChildItem python-package/wheelhouse/*.whl | Select-Object -First 1 -ExpandProperty FullName\n          python ops/pipeline/manage-artifacts.py upload `\n            --s3-bucket xgboost-nightly-builds `\n            --prefix ${{ env.BRANCH_NAME }}/${{ github.sha }} --make-public `\n            $wheelFile\n        env:\n          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}\n          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}\n\n      - name: Clean up\n        run: |\n          $wheelFile = Get-ChildItem python-package/wheelhouse/*.whl | Select-Object -First 1 -ExpandProperty FullName\n          Remove-Item -Path $wheelFile -Verbose\n\n      - name: Build Python wheel xgboost-cpu for Win-ARM64\n        run: |\n          # Patch to rename pkg to xgboost-cpu\n          python ops/script/pypi_variants.py --use-suffix=cpu --require-nccl-dep=na\n          cd python-package\n          pip wheel --no-deps -v . --wheel-dir wheelhouse/\n          $wheelFile = Get-ChildItem wheelhouse/*.whl | Select-Object -First 1 -ExpandProperty FullName\n          python -m wheel tags --python-tag py3 --abi-tag none --platform win_arm64 --remove $wheelFile\n\n      - name: Upload Python wheel xgboost-cpu\n        if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')\n        run: |\n          $wheelFile = Get-ChildItem python-package/wheelhouse/*.whl | Select-Object -First 1 -ExpandProperty FullName\n          python ops/pipeline/manage-artifacts.py upload `\n            --s3-bucket xgboost-nightly-builds `\n            --prefix ${{ env.BRANCH_NAME }}/${{ github.sha }} --make-public `\n            $wheelFile\n        env:\n          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}\n          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}\n"
  },
  {
    "path": ".github/workflows/r_nold.yml",
    "content": "# Run expensive R tests with the help of rhub. Only triggered by a pull request review\n# See discussion at https://github.com/dmlc/xgboost/pull/6378\n\nname: XGBoost-R-noLD\n\non:\n  pull_request_review_comment:\n    types: [created]\n\npermissions:\n  contents: read # to fetch code (actions/checkout)\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}\n  cancel-in-progress: true\n\njobs:\n  test-R-noLD:\n    if: github.event.comment.body == '/gha run r-nold-test' && contains('OWNER,MEMBER,COLLABORATOR', github.event.comment.author_association)\n    timeout-minutes: 120\n    runs-on: ubuntu-latest\n    container:\n      image: rhub/debian-gcc-devel-nold\n    steps:\n      - name: Install git and system packages\n        shell: bash\n        run: |\n          apt update && apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev git -y\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: 'true'\n      - name: Install dependencies\n        shell: bash -l {0}\n        run: |\n          /tmp/R-devel/bin/Rscript -e \"source('./R-package/tests/helper_scripts/install_deps.R')\"\n      - name: Run R tests\n        shell: bash\n        run: |\n          cd R-package && \\\n          /tmp/R-devel/bin/R CMD INSTALL . && \\\n          /tmp/R-devel/bin/R -q -e \"library(testthat); setwd('tests'); source('testthat.R')\"\n"
  },
  {
    "path": ".github/workflows/r_tests.yml",
    "content": "name: R Tests\n\non:\n  push:\n    branches:\n      - master\n      - 'release_*'\n  pull_request:\n\nenv:\n  GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}\n\npermissions:\n  contents: read # to fetch code (actions/checkout)\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}\n  cancel-in-progress: true\n\njobs:\n  test-Rpkg:\n    runs-on: ${{ matrix.os }}\n    name: Test R on OS ${{ matrix.os }}, R ${{ matrix.r }}, Compiler ${{ matrix.compiler }}, Build ${{ matrix.build }}\n    strategy:\n      fail-fast: false\n      matrix:\n        include:\n          - os: windows-latest\n            r: release\n            compiler: mingw\n            build: autotools\n          - os: ubuntu-latest\n            r: release\n            compiler: none\n            build: cmake\n    env:\n      R_REMOTES_NO_ERRORS_FROM_WARNINGS: true\n    steps:\n      - name: Install system dependencies\n        run: |\n          sudo apt update\n          sudo apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev librsvg2-dev librsvg2-2\n        if: matrix.os == 'ubuntu-latest'\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: 'true'\n      - uses: r-lib/actions/setup-r@v2\n        with:\n          r-version: ${{ matrix.r }}\n      - uses: actions/setup-python@v6.2.0\n        with:\n          python-version: \"3.10\"\n          architecture: 'x64'\n      - uses: r-lib/actions/setup-tinytex@v2\n      - uses: dmlc/xgboost-devops/actions/sccache@main\n        if: matrix.os == 'ubuntu-latest'\n      - run: bash ops/script/setup_r_sccache.sh\n        if: matrix.os == 'ubuntu-latest'\n      - name: Install dependencies\n        shell: Rscript {0}\n        run: |\n          source(\"./R-package/tests/helper_scripts/install_deps.R\")\n      - name: Test R\n        run: |\n          python ops/script/test_r_package.py --compiler='${{ matrix.compiler }}' --build-tool=\"${{ matrix.build }}\" --task=check\n        if: matrix.compiler != 'none'\n      - name: Test R\n        run: |\n          python ops/script/test_r_package.py --build-tool=\"${{ matrix.build }}\" --task=check\n        if: matrix.compiler == 'none'\n      - run: sccache --show-stats\n        if: matrix.os == 'ubuntu-latest'\n\n  test-R-on-Debian:\n    name: Test R package on Debian\n    runs-on: ubuntu-latest\n    container:\n      image: rhub/ubuntu-release # rhub uses ubuntu for debian tests.\n    steps:\n      - name: Install system dependencies\n        run: |\n          # Must run before checkout to have the latest git installed.\n          apt update && apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev git librsvg2-dev librsvg2-2 pandoc -y\n      - name: Trust git cloning project sources\n        run: |\n          git config --global --add safe.directory \"${GITHUB_WORKSPACE}\"\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: 'true'\n      - uses: dmlc/xgboost-devops/actions/sccache@main\n      - run: bash ops/script/setup_r_sccache.sh\n      - name: Install dependencies\n        shell: bash -l {0}\n        run: |\n          Rscript -e \"source('./R-package/tests/helper_scripts/install_deps.R')\"\n      - name: Test R\n        shell: bash -l {0}\n        run: |\n          python3 ops/script/test_r_package.py --r=/opt/R/release/bin/R --build-tool=autotools --task=check\n      - uses: dorny/paths-filter@v4\n        # Run the document check if there are changes in the R package.\n        id: changes\n        with:\n          filters: |\n            r_package:\n              - 'R-package/**'\n      - name: Run document check\n        if: steps.changes.outputs.r_package == 'true'\n        run: |\n          python3 ops/script/test_r_package.py --r=/opt/R/release/bin/R --task=doc\n      - name: Run lintr\n        run: |\n          # Prevent the linter from checking generated R scripts.\n          if [ -d ./xgboost.Rcheck ] ; then\n            rm -rf ./xgboost.Rcheck\n          fi\n          MAKEFLAGS=\"-j$(nproc)\" R CMD INSTALL R-package/\n          Rscript ops/script/lint_r.R $(pwd)\n      - run: sccache --show-stats\n"
  },
  {
    "path": ".github/workflows/scorecards.yml",
    "content": "name: Scorecards supply-chain security\non:\n  # Only the default branch is supported.\n  branch_protection_rule:\n  schedule:\n    - cron: '17 2 * * 6'\n  push:\n    branches: [ \"master\" ]\n\n# Declare default permissions as read only.\npermissions: read-all\n\njobs:\n  analysis:\n    name: Scorecards analysis\n    runs-on: ubuntu-latest\n    permissions:\n      # Needed to upload the results to code-scanning dashboard.\n      security-events: write\n      # Used to receive a badge.\n      id-token: write\n\n    steps:\n      - name: \"Checkout code\"\n        uses: actions/checkout@v6.0.2\n        with:\n          persist-credentials: false\n\n      - name: \"Run analysis\"\n        uses: ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # v2.4.3\n        with:\n          results_file: results.sarif\n          results_format: sarif\n\n          # Publish the results for public repositories to enable scorecard badges. For more details, see\n          # https://github.com/ossf/scorecard-action#publishing-results.\n          # For private repositories, `publish_results` will automatically be set to `false`, regardless\n          # of the value entered here.\n          publish_results: true\n\n      # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF\n      # format to the repository Actions tab.\n      - name: \"Upload artifact\"\n        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v4.4.3\n        with:\n          name: SARIF file\n          path: results.sarif\n          retention-days: 5\n\n      # Upload the results to GitHub's code scanning dashboard.\n      - name: \"Upload to code-scanning\"\n        uses: github/codeql-action/upload-sarif@83a02f7883b12e0e4e1a146174f5e2292a01e601 # v2.16.4\n        with:\n          sarif_file: results.sarif\n"
  },
  {
    "path": ".github/workflows/sycl_tests.yml",
    "content": "name: XGBoost CI (oneAPI)\n\non:\n  push:\n    branches:\n      - master\n      - 'release_*'\n  pull_request:\n\npermissions:\n  contents: read  # to fetch code (actions/checkout)\n\ndefaults:\n  run:\n    shell: bash -l {0}\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}\n  cancel-in-progress: true\n\nenv:\n  BRANCH_NAME: >-\n    ${{ github.event.pull_request.number && 'PR-' }}${{ github.event.pull_request.number || github.ref_name }}\n\njobs:\n  gtest-cpu-sycl:\n    name: Test Google C++ unittest (CPU SYCL)\n    runs-on: ubuntu-latest\n    steps:\n    - uses: actions/checkout@v6.0.2\n      with:\n        submodules: 'true'\n    - uses: dmlc/xgboost-devops/actions/miniforge-setup@main\n      with:\n        environment-name: linux_sycl_test\n        environment-file: ops/conda_env/linux_sycl_test.yml\n    - name: Run gtest\n      run: bash ops/pipeline/build-test-sycl.sh gtest\n\n  python-sycl-tests-on-ubuntu:\n    name: Test XGBoost Python package with SYCL\n    runs-on: ubuntu-latest\n    timeout-minutes: 90\n    steps:\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: 'true'\n      - uses: dmlc/xgboost-devops/actions/miniforge-setup@main\n        with:\n          environment-name: linux_sycl_test\n          environment-file: ops/conda_env/linux_sycl_test.yml\n      - name: Test Python package\n        run: bash ops/pipeline/build-test-sycl.sh pytest\n"
  },
  {
    "path": ".github/workflows/windows.yml",
    "content": "name: Windows\n\non:\n  push:\n    branches:\n      - master\n      - 'release_*'\n  pull_request:\n\npermissions:\n  contents: read  # to fetch code (actions/checkout)\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}\n  cancel-in-progress: true\n\ndefaults:\n  run:\n    shell: powershell\n\nenv:\n  BRANCH_NAME: >-\n    ${{ github.event.pull_request.number && 'PR-' }}${{ github.event.pull_request.number || github.ref_name }}\n\njobs:\n  build-win64-gpu:\n    name: Build XGBoost for Windows with CUDA\n    runs-on:\n      - runs-on=${{ github.run_id }}\n      - runner=windows-cpu\n      - tag=windows-build-win64-gpu\n    steps:\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: \"true\"\n      - uses: dmlc/xgboost-devops/actions/msvc-dev-env@main\n      - uses: dmlc/xgboost-devops/actions/sccache@main\n      - run: ops/pipeline/build-win64.ps1 -variant gpu\n      - run: sccache --show-stats\n      - name: Stash files\n        shell: powershell\n        run: |\n          conda activate\n          python ops/pipeline/manage-artifacts.py upload `\n            --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} `\n            --prefix cache/${{ github.run_id }}/build-win64-gpu `\n            build/testxgboost.exe `\n            (Get-ChildItem python-package/dist/*.whl | Select-Object -Expand FullName)\n\n  build-win64-cpu:\n    name: Build XGBoost for Windows (minimal)\n    runs-on:\n      - runs-on=${{ github.run_id }}\n      - runner=windows-cpu\n      - tag=windows-build-win64-cpu\n    steps:\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: \"true\"\n      - uses: dmlc/xgboost-devops/actions/msvc-dev-env@main\n      - uses: dmlc/xgboost-devops/actions/sccache@main\n      - run: ops/pipeline/build-win64.ps1 -variant cpu\n      - run: sccache --show-stats\n\n  test-win64-gpu:\n    name: Test XGBoost on Windows\n    needs: build-win64-gpu\n    runs-on:\n      - runs-on=${{ github.run_id }}\n      - runner=windows-gpu\n      - tag=windows-test-win64-gpu\n    steps:\n      - uses: actions/checkout@v6.0.2\n        with:\n          submodules: \"true\"\n      - name: Unstash files\n        shell: powershell\n        run: |\n          conda activate\n          python ops/pipeline/manage-artifacts.py download `\n            --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} `\n            --prefix cache/${{ github.run_id }}/build-win64-gpu `\n            --dest-dir build `\n            *.whl testxgboost.exe\n          New-Item -ItemType Directory -Path python-package/dist/ -Force\n          Move-Item -Path (Get-ChildItem build/*.whl | Select-Object -Expand FullName) `\n            -Destination python-package/dist/\n      - run: powershell ops/pipeline/test-win64-gpu.ps1\n"
  },
  {
    "path": ".gitignore",
    "content": "# Compiled Object files\n*.slo\n*.lo\n*.o\n*.page\n# Compiled Dynamic libraries\n*.so\n*.dylib\n*.page\n# Compiled Static libraries\n*.lai\n*.la\n*.a\n*~\n*.Rcheck\n*.rds\n*.tar.gz\n*.tar.bz2\n*conf\n*buffer\n*.model\n*pyc\n*.train\n*.test\n*.tar\n*group\n*rar\n*vali\n*sdf\nRelease\n*exe\n*exp\nipch\n*.filters\n*.user\n*log\nrmm_log.txt\nDebug\n*suo\n.Rhistory\n*.dll\n*i386\n*x64\n*dump\n*save\n*csv\n.Rproj.user\n*.cpage.col\n*.cpage\n*.Rproj\n./xgboost.mpi\n./xgboost.mock\n*.bak\n#.Rbuildignore\nR-package.Rproj\nR-package/build/*\n*.cache*\n.mypy_cache/\ndoxygen\n\n# java\njava/xgboost4j/target\njava/xgboost4j/tmp\njava/xgboost4j-demo/target\njava/xgboost4j-demo/data/\njava/xgboost4j-demo/tmp/\njava/xgboost4j-demo/model/\nnb-configuration*\n*.xml.versionsBackup\n\n# Eclipse\n.project\n.cproject\n.classpath\n.pydevproject\n.settings/\n/build\n/build-gpu\n/xgboost\n*.data\nbuild_plugin\nrecommonmark/\ntags\nTAGS\n*.class\ntarget\n*.swp\n\n# cpp tests and gcov generated files\n*.gcov\n*.gcda\n*.gcno\n*.ubj\nbuild_tests\n/tests/cpp/xgboost_test\n\n.DS_Store\nlib/\n\n# spark\nmetastore_db\n\n/include/xgboost/build_config.h\n\n# files from R-package source install\n**/config.status\nR-package/config.h\nR-package/src/Makevars\n*.lib\n\n# Visual Studio\n.vs/\nCMakeSettings.json\n*.ilk\n*.pdb\n\n# IntelliJ/CLion\n.idea\n*.iml\n/cmake-build-debug/\n\n# GDB\n.gdb_history\n\n# Python joblib.Memory used in pytest.\ncachedir/\n\n# Files from local Dask work\ndask-worker-space/\n\n# Jupyter notebook checkpoints\n.ipynb_checkpoints/\n\n# credentials and key material\nconfig\ncredentials\ncredentials.csv\n*.env\n*.pem\n*.pub\n*.rdp\n*_rsa\n\n# Visual Studio code + extensions\n.vscode\n.metals\n.bloop\n\n# python tests\n*.bin\ndemo/**/*.txt\n*.dmatrix\n.hypothesis\n__MACOSX/\nmodel*.json\n/tests/python/models/models/\n\n# R tests\n*.htm\n*.html\n*.libsvm\n*.rds\nRplots.pdf\n*.zip\n\n# nsys\n*.nsys-rep\nrmm_log.dev*"
  },
  {
    "path": ".gitmodules",
    "content": "[submodule \"dmlc-core\"]\n\tpath = dmlc-core\n\turl = https://github.com/dmlc/dmlc-core\n\tbranch = main\n[submodule \"gputreeshap\"]\n\tpath = gputreeshap\n\turl = https://github.com/rapidsai/gputreeshap.git\n"
  },
  {
    "path": ".pre-commit-config.yaml",
    "content": "repos:\n  - repo: https://github.com/pre-commit/pre-commit-hooks\n    rev: v4.6.0\n    hooks:\n      - id: trailing-whitespace\n      - id: end-of-file-fixer\n      - id: check-merge-conflict\n      - id: check-case-conflict\n      - id: check-yaml\n      - id: check-toml\n      - id: check-json\n      - id: check-executables-have-shebangs\n      - id: check-shebang-scripts-are-executable\n      - id: mixed-line-ending\n  - repo: https://github.com/astral-sh/ruff-pre-commit\n    rev: v0.14.14\n    hooks:\n      - id: ruff-check\n        args:\n          - --fix\n          - --select=I\n        files: \\.py$\n        exclude: (dmlc-core|gputreeshap)\n      - id: ruff-format\n        files: \\.py$\n        exclude: (dmlc-core|gputreeshap)\n  - repo: https://github.com/pre-commit/mirrors-clang-format\n    rev: v21.1.8\n    hooks:\n      - id: clang-format\n        args: [--style=file]\n        files: \\.(cc|c|cpp|h|cu|hpp)$\n        exclude: (dmlc-core|gputreeshap)\n  - repo: local\n    hooks:\n      - id: lint-cpp\n        name: C++ lint (cpplint)\n        entry: python ops/script/lint_cpp.py\n        language: python\n        types: [file]\n        files: \\.(cc|c|cpp|h|cu|hpp)$\n        exclude: (dmlc-core|gputreeshap)\n        additional_dependencies:\n          - cpplint==1.6.1\n      - id: lint-cmake\n        name: CMake lint (cmakelint)\n        entry: cmakelint --linelength=120 --filter=-convention/filename,-package/stdargs,-readability/wonkycase\n        language: python\n        types: [file]\n        files: (CMakeLists\\.txt$|^cmake/.*\\.cmake$)\n        exclude: (dmlc-core|gputreeshap)\n        additional_dependencies:\n          - cmakelint\n      - id: pylint\n        name: Python lint (pylint)\n        entry: pylint\n        language: python\n        types: [file]\n        files: \\.py$\n        exclude: (dmlc-core|gputreeshap|^ops/|^tests/)\n        args:\n          - --rcfile=python-package/pyproject.toml\n        additional_dependencies:\n          - pylint\n"
  },
  {
    "path": ".readthedocs.yaml",
    "content": "# .readthedocs.yaml\n# Read the Docs configuration file\n# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details\n\n# Required\nversion: 2\n\nsubmodules:\n  include: all\n\n# Set the version of Python and other tools you might need\nbuild:\n  os: ubuntu-22.04\n  tools:\n    python: \"3.10\"\n  apt_packages:\n    - graphviz\n    - cmake\n    - g++\n    - doxygen\n    - ninja-build\n\n# Build documentation in the docs/ directory with Sphinx\nsphinx:\n   configuration: doc/conf.py\n\n# If using Sphinx, optionally build your docs in additional formats such as PDF\nformats:\n   - pdf\n\n# Optionally declare the Python requirements required to build your docs\npython:\n  install:\n   - requirements: doc/requirements.txt\n"
  },
  {
    "path": "CITATION",
    "content": "@inproceedings{Chen:2016:XST:2939672.2939785,\n author = {Chen, Tianqi and Guestrin, Carlos},\n title = {{XGBoost}: A Scalable Tree Boosting System},\n booktitle = {Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},\n series = {KDD '16},\n year = {2016},\n isbn = {978-1-4503-4232-2},\n location = {San Francisco, California, USA},\n pages = {785--794},\n numpages = {10},\n url = {http://doi.acm.org/10.1145/2939672.2939785},\n doi = {10.1145/2939672.2939785},\n acmid = {2939785},\n publisher = {ACM},\n address = {New York, NY, USA},\n keywords = {large-scale machine learning},\n}\n"
  },
  {
    "path": "CMakeLists.txt",
    "content": "cmake_minimum_required(VERSION 3.18 FATAL_ERROR)\n\nif(PLUGIN_SYCL)\n  string(REPLACE \" -isystem ${CONDA_PREFIX}/include\" \"\" CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS}\")\nendif()\n\nproject(xgboost LANGUAGES CXX C VERSION 3.3.0)\ninclude(cmake/Utils.cmake)\nlist(APPEND CMAKE_MODULE_PATH \"${xgboost_SOURCE_DIR}/cmake/modules\")\n\n# These policies are already set from 3.18 but we still need to set the policy\n# default variables here for lower minimum versions in the submodules\nset(CMAKE_POLICY_DEFAULT_CMP0063 NEW)\nset(CMAKE_POLICY_DEFAULT_CMP0069 NEW)\nset(CMAKE_POLICY_DEFAULT_CMP0076 NEW)\nset(CMAKE_POLICY_DEFAULT_CMP0077 NEW)\nset(CMAKE_POLICY_DEFAULT_CMP0079 NEW)\n\nmessage(STATUS \"CMake version ${CMAKE_VERSION}\")\n\n# Check compiler versions\n# Use recent compilers to ensure that std::filesystem is available\nif(MSVC)\n  if(MSVC_VERSION LESS 1920)\n    message(FATAL_ERROR \"Need Visual Studio 2019 or newer to build XGBoost\")\n  endif()\nelseif(CMAKE_CXX_COMPILER_ID STREQUAL \"GNU\")\n  if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS \"8.1\")\n    message(FATAL_ERROR \"Need GCC 8.1 or newer to build XGBoost\")\n  endif()\nelseif(CMAKE_CXX_COMPILER_ID STREQUAL \"AppleClang\")\n  if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS \"11.0\")\n    message(FATAL_ERROR \"Need Xcode 11.0 (AppleClang 11.0) or newer to build XGBoost\")\n  endif()\nelseif(CMAKE_CXX_COMPILER_ID STREQUAL \"Clang\")\n  if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS \"9.0\")\n    message(FATAL_ERROR \"Need Clang 9.0 or newer to build XGBoost\")\n  endif()\nendif()\n\ninclude(${xgboost_SOURCE_DIR}/cmake/PrefetchIntrinsics.cmake)\nfind_prefetch_intrinsics()\ninclude(${xgboost_SOURCE_DIR}/cmake/Version.cmake)\nwrite_version()\nset_default_configuration_release()\n\n#-- Options\ninclude(CMakeDependentOption)\n\n## User options\noption(BUILD_C_DOC \"Build documentation for C APIs using Doxygen.\" OFF)\noption(USE_OPENMP \"Build with OpenMP support.\" ON)\noption(BUILD_STATIC_LIB \"Build static library\" OFF)\noption(BUILD_DEPRECATED_CLI \"Build the deprecated command line interface\" OFF)\noption(FORCE_SHARED_CRT \"Build with dynamic CRT on Windows (/MD)\" OFF)\noption(BUILD_WITH_GIT_HASH \"Add a short git hash to the build info.\" OFF)\noption(BUILD_WITH_SYSTEM_DMLC \"Use system package for dmlc.\" OFF)\n## Bindings\noption(JVM_BINDINGS \"Build JVM bindings\" OFF)\noption(R_LIB \"Build shared library for R package\" OFF)\n## Dev\noption(USE_DEBUG_OUTPUT \"Dump internal training results like gradients and predictions to stdout.\nShould only be used for debugging.\" OFF)\noption(FORCE_COLORED_OUTPUT \"Force colored output from compilers, useful when ninja is used instead of make.\" OFF)\noption(ENABLE_ALL_WARNINGS \"Enable all compiler warnings. Only effective for GCC/Clang\" OFF)\noption(LOG_CAPI_INVOCATION \"Log all C API invocations for debugging\" OFF)\noption(GOOGLE_TEST \"Build google tests\" OFF)\noption(USE_DMLC_GTEST \"Use google tests bundled with dmlc-core submodule\" OFF)\noption(USE_DEVICE_DEBUG \"Generate CUDA device debug info.\" OFF)\noption(USE_NVTX \"Build with cuda profiling annotations. Developers only.\" OFF)\nset(NVTX_HEADER_DIR \"\" CACHE PATH \"Path to the stand-alone nvtx header\")\noption(HIDE_CXX_SYMBOLS \"Build shared library and hide all C++ symbols\" OFF)\noption(KEEP_BUILD_ARTIFACTS_IN_BINARY_DIR \"Output build artifacts in CMake binary dir\" OFF)\n## CUDA\noption(USE_CUDA  \"Build with GPU acceleration\" OFF)\noption(USE_NCCL  \"Build with NCCL to enable distributed GPU support.\" OFF)\noption(USE_NVCOMP \"Build with nvcomp to enable sparse data compression. (experimental)\" OFF)\n# This is specifically designed for PyPI binary release and should be disabled for most of the cases.\noption(USE_DLOPEN_NCCL \"Whether to load nccl dynamically.\" OFF)\noption(BUILD_WITH_SHARED_NCCL \"Build with shared NCCL library.\" OFF)\n\nif(USE_CUDA)\n  if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES AND NOT DEFINED ENV{CUDAARCHS})\n    set(GPU_COMPUTE_VER \"\" CACHE STRING\n      \"Semicolon separated list of compute versions to be built against, e.g. '35;61'\")\n  else()\n    # Clear any cached values from previous runs\n    unset(GPU_COMPUTE_VER)\n    unset(GPU_COMPUTE_VER CACHE)\n  endif()\nendif()\n\n# CUDA device LTO was introduced in CMake v3.25 and requires host LTO to also be enabled but can still\n# be explicitly disabled allowing for LTO on host only, host and device, or neither, but device-only LTO\n# is not a supproted configuration\ncmake_dependent_option(USE_CUDA_LTO\n  \"Enable link-time optimization for CUDA device code\"\n  \"${CMAKE_INTERPROCEDURAL_OPTIMIZATION}\"\n  \"CMAKE_VERSION VERSION_GREATER_EQUAL 3.25;USE_CUDA;CMAKE_INTERPROCEDURAL_OPTIMIZATION\"\n  OFF)\n## Sanitizers\noption(USE_SANITIZER \"Use santizer flags\" OFF)\noption(SANITIZER_PATH \"Path to sanitizes.\")\nset(ENABLED_SANITIZERS \"address\" \"leak\" CACHE STRING\n  \"Semicolon separated list of sanitizer names. E.g 'address;leak'. Supported sanitizers are\naddress, leak, undefined and thread.\")\n## Plugins\noption(PLUGIN_RMM \"Build with RAPIDS Memory Manager (RMM)\" OFF)\noption(PLUGIN_FEDERATED \"Build with Federated Learning\" OFF)\n## TODO: 1. Add check if DPC++ compiler is used for building\noption(PLUGIN_SYCL \"SYCL plugin\" OFF)\noption(ADD_PKGCONFIG \"Add xgboost.pc into system.\" ON)\n\n#-- Checks for building XGBoost\nif(USE_DEBUG_OUTPUT AND (NOT (CMAKE_BUILD_TYPE MATCHES Debug)))\n  message(SEND_ERROR \"Do not enable `USE_DEBUG_OUTPUT' with release build.\")\nendif()\nif(USE_NVTX AND (NOT USE_CUDA))\n  message(SEND_ERROR \"`USE_NVTX` must be enabled with `USE_CUDA` flag.\")\nendif()\nif(USE_NVTX)\n  if(CMAKE_VERSION VERSION_LESS \"3.25.0\")\n    # CUDA:nvtx3 target is added in 3.25\n    message(\"cmake >= 3.25 is required for NVTX.\")\n  endif()\nendif()\nif(USE_NCCL AND (NOT USE_CUDA))\n  message(SEND_ERROR \"`USE_NCCL` must be enabled with `USE_CUDA` flag.\")\nendif()\nif(USE_NVCOMP AND (NOT USE_CUDA))\n  message(SEND_ERROR \"`USE_NVCOMP` must be enabled with `USE_CUDA` flag.\")\nendif()\nif(USE_DEVICE_DEBUG AND (NOT USE_CUDA))\n  message(SEND_ERROR \"`USE_DEVICE_DEBUG` must be enabled with `USE_CUDA` flag.\")\nendif()\nif(BUILD_WITH_SHARED_NCCL AND (NOT USE_NCCL))\n  message(SEND_ERROR \"Build XGBoost with -DUSE_NCCL=ON to enable BUILD_WITH_SHARED_NCCL.\")\nendif()\nif(USE_DLOPEN_NCCL AND (NOT USE_NCCL))\n  message(SEND_ERROR \"Build XGBoost with -DUSE_NCCL=ON to enable USE_DLOPEN_NCCL.\")\nendif()\nif(USE_DLOPEN_NCCL AND (NOT (CMAKE_SYSTEM_NAME STREQUAL \"Linux\")))\n  message(SEND_ERROR \"`USE_DLOPEN_NCCL` supports only Linux at the moment.\")\nendif()\nif(JVM_BINDINGS AND R_LIB)\n  message(SEND_ERROR \"`R_LIB' is not compatible with `JVM_BINDINGS' as they both have customized configurations.\")\nendif()\nif(R_LIB AND GOOGLE_TEST)\n  message(\n    WARNING\n    \"Some C++ tests will fail with `R_LIB` enabled, as R package redirects some functions to R runtime implementation.\"\n  )\nendif()\nif(R_LIB AND USE_NCCL)\n  message(SEND_ERROR \"`R_LIB` doesn't support distributed computing with NCCL yet.\")\nendif()\nif(PLUGIN_RMM AND NOT (USE_CUDA))\n  message(SEND_ERROR \"`PLUGIN_RMM` must be enabled with `USE_CUDA` flag.\")\nendif()\nif(PLUGIN_RMM AND NOT ((CMAKE_CXX_COMPILER_ID STREQUAL \"Clang\") OR (CMAKE_CXX_COMPILER_ID STREQUAL \"GNU\")))\n  message(SEND_ERROR \"`PLUGIN_RMM` must be used with GCC or Clang compiler.\")\nendif()\nif(PLUGIN_RMM AND NOT (CMAKE_SYSTEM_NAME STREQUAL \"Linux\"))\n  message(SEND_ERROR \"`PLUGIN_RMM` must be used with Linux.\")\nendif()\nif(ENABLE_ALL_WARNINGS)\n  if((NOT CMAKE_CXX_COMPILER_ID MATCHES \"Clang\") AND (NOT CMAKE_CXX_COMPILER_ID STREQUAL \"GNU\"))\n    message(SEND_ERROR \"ENABLE_ALL_WARNINGS is only available for Clang and GCC.\")\n  endif()\nendif()\nif(BUILD_STATIC_LIB AND (R_LIB OR JVM_BINDINGS))\n  message(SEND_ERROR \"Cannot build a static library libxgboost.a when R or JVM packages are enabled.\")\nendif()\nif(PLUGIN_FEDERATED)\n  if(CMAKE_CROSSCOMPILING)\n    message(SEND_ERROR \"Cannot cross compile with federated learning support\")\n  endif()\n  if(BUILD_STATIC_LIB)\n    message(SEND_ERROR \"Cannot build static lib with federated learning support\")\n  endif()\n  if(R_LIB OR JVM_BINDINGS)\n    message(SEND_ERROR \"Cannot enable federated learning support when R or JVM packages are enabled.\")\n  endif()\n  if(WIN32)\n    message(SEND_ERROR \"Federated learning not supported for Windows platform\")\n  endif()\nendif()\n\n#-- Removed options\nif(USE_AVX)\n  message(SEND_ERROR  \"The option `USE_AVX` is deprecated as experimental AVX features have been removed from XGBoost.\")\nendif()\nif(PLUGIN_LZ4)\n  message(SEND_ERROR  \"The option `PLUGIN_LZ4` is removed from XGBoost.\")\nendif()\nif(RABIT_BUILD_MPI)\n  message(SEND_ERROR \"The option `RABIT_BUILD_MPI` has been removed from XGBoost.\")\nendif()\nif(USE_S3)\n  message(SEND_ERROR \"The option `USE_S3` has been removed from XGBoost\")\nendif()\nif(USE_AZURE)\n  message(SEND_ERROR \"The option `USE_AZURE` has been removed from XGBoost\")\nendif()\nif(USE_HDFS)\n  message(SEND_ERROR \"The option `USE_HDFS` has been removed from XGBoost\")\nendif()\nif(PLUGIN_DENSE_PARSER)\n  message(SEND_ERROR \"The option `PLUGIN_DENSE_PARSER` has been removed from XGBoost.\")\nendif()\nif(BUILD_DEPRECATED_CLI)\n  message(SEND_ERROR  \"The option `BUILD_DEPRECATED_CLI` is removed from XGBoost.\")\nendif()\n\n#-- Sanitizer\nif(USE_SANITIZER)\n  include(cmake/Sanitizer.cmake)\n  enable_sanitizers(\"${ENABLED_SANITIZERS}\")\nendif()\n\nif(USE_CUDA)\n  set(USE_OPENMP ON CACHE BOOL \"CUDA requires OpenMP\" FORCE)\n  # `export CXX=' is ignored by CMake CUDA.\n  if(NOT DEFINED CMAKE_CUDA_HOST_COMPILER AND NOT DEFINED ENV{CUDAHOSTCXX})\n    set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER} CACHE FILEPATH\n      \"The compiler executable to use when compiling host code for CUDA or HIP language files.\")\n    mark_as_advanced(CMAKE_CUDA_HOST_COMPILER)\n    message(STATUS \"Configured CUDA host compiler: ${CMAKE_CUDA_HOST_COMPILER}\")\n  endif()\n\n  if(NOT DEFINED CMAKE_CUDA_RUNTIME_LIBRARY)\n    set(CMAKE_CUDA_RUNTIME_LIBRARY Static)\n  endif()\n\n  enable_language(CUDA)\n  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 12.9)\n    message(FATAL_ERROR \"CUDA version must be at least 12.9!\")\n  endif()\n  if(DEFINED GPU_COMPUTE_VER)\n    compute_cmake_cuda_archs(\"${GPU_COMPUTE_VER}\")\n  endif()\n\n  find_package(CUDAToolkit 12.8 REQUIRED)\nendif()\n\nif(USE_NVCOMP)\n  find_package(nvcomp REQUIRED)\nendif()\n\n\nif(FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL \"Ninja\") AND\n    ((CMAKE_CXX_COMPILER_ID STREQUAL \"GNU\") OR\n      (CMAKE_CXX_COMPILER_ID STREQUAL \"Clang\")))\n  set(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS} -fdiagnostics-color=always\")\nendif()\n\nif(NOT (CMAKE_SYSTEM_NAME STREQUAL \"Emscripten\"))\n  find_package(Threads REQUIRED)\nendif()\n\n# -- OpenMP\ninclude(cmake/FindOpenMPMacOS.cmake)\nif(USE_OPENMP)\n  if(APPLE)\n    find_openmp_macos()\n  else()\n    find_package(OpenMP REQUIRED C CXX)\n  endif()\nendif()\n\n# Add for IBM i\nif(${CMAKE_SYSTEM_NAME} MATCHES \"OS400\")\n  set(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS} -pthread\")\n  set(CMAKE_CXX_ARCHIVE_CREATE \"<CMAKE_AR> -X64 qc <TARGET> <OBJECTS>\")\nendif()\n\nif(USE_NCCL)\n  find_package(Nccl REQUIRED)\nendif()\n\nif(MSVC)\n  if(FORCE_SHARED_CRT)\n    message(STATUS \"XGBoost: Using dynamically linked MSVC runtime...\")\n    set(CMAKE_MSVC_RUNTIME_LIBRARY \"MultiThreaded$<$<CONFIG:Debug>:Debug>DLL\")\n  else()\n    message(STATUS \"XGBoost: Using statically linked MSVC runtime...\")\n    set(CMAKE_MSVC_RUNTIME_LIBRARY \"MultiThreaded$<$<CONFIG:Debug>:Debug>\")\n  endif()\nendif()\n\n# dmlc-core\nset(DMLC_FORCE_SHARED_CRT ${FORCE_SHARED_CRT})\nif(BUILD_WITH_SYSTEM_DMLC)\n  find_package(dmlc REQUIRED)\nelse()\n  set(DMLC_FORCE_SHARED_CRT ${FORCE_SHARED_CRT})\n  add_subdirectory(${xgboost_SOURCE_DIR}/dmlc-core)\nendif()\n\nif(MSVC)\n  if(TARGET dmlc_unit_tests)\n    target_compile_options(\n        dmlc_unit_tests PRIVATE\n        -D_CRT_SECURE_NO_WARNINGS -D_CRT_SECURE_NO_DEPRECATE\n    )\n  endif()\nendif()\n\n# core xgboost\nadd_subdirectory(${xgboost_SOURCE_DIR}/src)\n# dmlc-core\nif(BUIILD_WTIH_SYSTEM_DMLC)\n  target_link_libraries(objxgboost PUBLIC ${dmlc-LIBRARIES})\nelse()\n  target_link_libraries(objxgboost PUBLIC dmlc)\nendif()\n\n# Link -lstdc++fs for GCC 8.x\nif(CMAKE_CXX_COMPILER_ID STREQUAL \"GNU\" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS \"9.0\")\n  target_link_libraries(objxgboost PUBLIC stdc++fs)\nendif()\n\n# Exports some R specific definitions and objects\nif(R_LIB)\n  add_subdirectory(${xgboost_SOURCE_DIR}/R-package)\nendif()\n\n# This creates its own shared library `xgboost4j'.\nif(JVM_BINDINGS)\n  add_subdirectory(${xgboost_SOURCE_DIR}/jvm-packages)\nendif()\n\n# Plugin\nadd_subdirectory(${xgboost_SOURCE_DIR}/plugin)\n\nif(PLUGIN_RMM)\n  find_package(rmm REQUIRED)\n\n  # Patch the rmm targets so they reference the static cudart\n  # Remove this patch once RMM stops specifying cudart requirement\n  # (since RMM is a header-only library, it should not specify cudart in its CMake config)\n  get_target_property(rmm_link_libs rmm::rmm INTERFACE_LINK_LIBRARIES)\n  list(REMOVE_ITEM rmm_link_libs CUDA::cudart)\n  list(APPEND rmm_link_libs CUDA::cudart_static)\n  set_target_properties(rmm::rmm PROPERTIES INTERFACE_LINK_LIBRARIES \"${rmm_link_libs}\")\n\n  # Pick up patched CCCL from RMM\nelseif(USE_CUDA)\n  # If using CUDA and not RMM, search for CCCL.\n  # Use HINTS to prioritize CCCL from the same CUDA toolkit as nvcc (e.g., in conda environments).\n  # CUDAToolkit_TARGET_DIR points to the target-specific directory (e.g., targets/x86_64-linux).\n  find_package(CCCL CONFIG\n    HINTS ${CUDAToolkit_TARGET_DIR}/lib/cmake ${CUDAToolkit_LIBRARY_DIR}/cmake)\n  if(CCCL_FOUND)\n    message(STATUS \"Standalone CCCL found.\")\n  else()\n    message(STATUS \"Standalone CCCL not found. Attempting to use CCCL from CUDA Toolkit...\")\n    find_package(CCCL CONFIG\n      HINTS ${CUDAToolkit_TARGET_DIR}/lib/cmake ${CUDAToolkit_LIBRARY_DIR}/cmake)\n    if(NOT CCCL_FOUND)\n      message(STATUS \"Could not locate CCCL from CUDA Toolkit. Using Thrust and CUB from CUDA Toolkit...\")\n      find_package(libcudacxx CONFIG REQUIRED\n        HINTS ${CUDAToolkit_TARGET_DIR}/lib/cmake ${CUDAToolkit_LIBRARY_DIR}/cmake)\n      find_package(CUB CONFIG REQUIRED\n        HINTS ${CUDAToolkit_TARGET_DIR}/lib/cmake ${CUDAToolkit_LIBRARY_DIR}/cmake)\n      find_package(Thrust CONFIG REQUIRED\n        HINTS ${CUDAToolkit_TARGET_DIR}/lib/cmake ${CUDAToolkit_LIBRARY_DIR}/cmake)\n      thrust_create_target(Thrust HOST CPP DEVICE CUDA)\n      add_library(CCCL::CCCL INTERFACE IMPORTED GLOBAL)\n      target_link_libraries(CCCL::CCCL INTERFACE libcudacxx::libcudacxx CUB::CUB Thrust)\n    endif()\n  endif()\n  # Define guard macros to prevent windows.h from conflicting with winsock2.h\n  if(WIN32)\n    target_compile_definitions(CCCL::CCCL INTERFACE NOMINMAX WIN32_LEAN_AND_MEAN _WINSOCKAPI_)\n  endif()\nendif()\n\nif(PLUGIN_SYCL)\n  set(CMAKE_CXX_LINK_EXECUTABLE\n      \"icpx <FLAGS> <CMAKE_CXX_LINK_FLAGS> -qopenmp <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>\")\n  set(CMAKE_CXX_CREATE_SHARED_LIBRARY\n      \"icpx -shared <CMAKE_SHARED_LIBRARY_CXX_FLAGS> -qopenmp <LANGUAGE_COMPILE_FLAGS> \\\n      <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <SONAME_FLAG>,<TARGET_SONAME> \\\n      -o <TARGET> <OBJECTS> <LINK_LIBRARIES>\")\nendif()\n\n#-- library\nif(BUILD_STATIC_LIB)\n  add_library(xgboost STATIC)\nelse()\n  # Provide versioned shared object\n  add_library(xgboost SHARED)\n  set_target_properties(xgboost PROPERTIES\n    VERSION ${PROJECT_VERSION}\n    SOVERSION ${PROJECT_VERSION_MAJOR})\nendif()\ntarget_link_libraries(xgboost PRIVATE objxgboost)\ntarget_include_directories(xgboost\n  INTERFACE\n  $<INSTALL_INTERFACE:$<INSTALL_PREFIX>/include>\n  $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}/include>)\n#-- End shared library\n\n# Common setup for all targets\nforeach(target xgboost objxgboost)\n  xgboost_target_properties(${target})\n  xgboost_target_link_libraries(${target})\n  xgboost_target_defs(${target})\nendforeach()\nif(NOT BUILD_WITH_SYSTEM_DMLC)\n  xgboost_target_properties(dmlc)\n  xgboost_target_link_libraries(dmlc)\n  xgboost_target_defs(dmlc)\nendif()\n\nif(JVM_BINDINGS)\n  xgboost_target_properties(xgboost4j)\n  xgboost_target_link_libraries(xgboost4j)\n  xgboost_target_defs(xgboost4j)\nendif()\n\nif(USE_OPENMP AND APPLE AND NOT BUILD_STATIC_LIB)\n  patch_openmp_path_macos(xgboost libxgboost)\nendif()\n\nif(KEEP_BUILD_ARTIFACTS_IN_BINARY_DIR)\n  set_output_directory(xgboost ${xgboost_BINARY_DIR}/lib)\nelse()\n  set_output_directory(xgboost ${xgboost_SOURCE_DIR}/lib)\nendif()\n\n#-- Installing XGBoost\nif(R_LIB)\n  include(cmake/RPackageInstallTargetSetup.cmake)\n  set_target_properties(xgboost PROPERTIES PREFIX \"\")\n  if(APPLE)\n    set_target_properties(xgboost PROPERTIES SUFFIX \".so\")\n  endif()\n  setup_rpackage_install_target(xgboost \"${CMAKE_CURRENT_BINARY_DIR}/R-package-install\")\n  set(CMAKE_INSTALL_PREFIX \"${CMAKE_CURRENT_BINARY_DIR}/dummy_inst\")\nendif()\nif(MINGW)\n  set_target_properties(xgboost PROPERTIES PREFIX \"\")\nendif()\n\nif(BUILD_C_DOC)\n  include(cmake/Doc.cmake)\n  run_doxygen()\nendif()\n\ninclude(CPack)\n\ninclude(GNUInstallDirs)\n# Install all headers.  Please note that currently the C++ headers does not form an \"API\".\ninstall(DIRECTORY ${xgboost_SOURCE_DIR}/include/xgboost\n  DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})\n\n# Install libraries. If `xgboost` is a static lib, specify `objxgboost` also, to avoid the\n# following error:\n#\n#  > install(EXPORT ...) includes target \"xgboost\" which requires target \"objxgboost\" that is not\n#  > in any export set.\n#\n# https://github.com/dmlc/xgboost/issues/6085\nif(BUILD_STATIC_LIB)\n  set(INSTALL_TARGETS xgboost objxgboost dmlc)\nelse()\n  set(INSTALL_TARGETS xgboost)\nendif()\n\ninstall(TARGETS ${INSTALL_TARGETS}\n  EXPORT XGBoostTargets\n  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}\n  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}\n  RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}\n  INCLUDES DESTINATION ${LIBLEGACY_INCLUDE_DIRS})\ninstall(EXPORT XGBoostTargets\n  FILE XGBoostTargets.cmake\n  NAMESPACE xgboost::\n  DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/xgboost)\n\ninclude(CMakePackageConfigHelpers)\nconfigure_package_config_file(\n  ${CMAKE_CURRENT_LIST_DIR}/cmake/xgboost-config.cmake.in\n  ${CMAKE_CURRENT_BINARY_DIR}/cmake/xgboost-config.cmake\n  INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/xgboost)\nwrite_basic_package_version_file(\n  ${CMAKE_BINARY_DIR}/cmake/xgboost-config-version.cmake\n  VERSION ${XGBOOST_VERSION}\n  COMPATIBILITY AnyNewerVersion)\ninstall(\n  FILES\n  ${CMAKE_CURRENT_BINARY_DIR}/cmake/xgboost-config.cmake\n  ${CMAKE_BINARY_DIR}/cmake/xgboost-config-version.cmake\n  DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/xgboost)\n\n#-- Test\nif(GOOGLE_TEST)\n  enable_testing()\n  # Unittests.\n  add_executable(testxgboost)\n  target_link_libraries(testxgboost PRIVATE objxgboost)\n  xgboost_target_properties(testxgboost)\n  xgboost_target_link_libraries(testxgboost)\n  xgboost_target_defs(testxgboost)\n\n  add_subdirectory(${xgboost_SOURCE_DIR}/tests/cpp)\n\n  add_test(\n    NAME TestXGBoostLib\n    COMMAND testxgboost\n    WORKING_DIRECTORY ${xgboost_BINARY_DIR})\nendif()\n\n# Add xgboost.pc\nif(ADD_PKGCONFIG)\n  configure_file(${xgboost_SOURCE_DIR}/cmake/xgboost.pc.in ${xgboost_BINARY_DIR}/xgboost.pc @ONLY)\n\n  install(\n    FILES ${xgboost_BINARY_DIR}/xgboost.pc\n    DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)\nendif()\n"
  },
  {
    "path": "CONTRIBUTORS.md",
    "content": "Contributors of DMLC/XGBoost\n============================\nXGBoost has been developed and used by a group of active community. Everyone is more than welcomed to is a great way to make the project better and more accessible to more users.\n\nProject Management Committee(PMC) \n----------\nThe Project Management Committee(PMC) consists group of active committers that moderate the discussion, manage the project release, and proposes new committer/PMC members. \n\n* [Tianqi Chen](https://github.com/tqchen), University of Washington\n  - Tianqi is a Ph.D. student working on large-scale machine learning. He is the creator of the project.\n* [Michael Benesty](https://github.com/pommedeterresautee)\n  - Michael is a lawyer and data scientist in France. He is the creator of XGBoost interactive analysis module in R.\n* [Yuan Tang](https://github.com/terrytangyuan), Red Hat\n  - Yuan is a Senior Principal Software Engineer at Red Hat AI. He contributed mostly in R and Python packages.\n* [Nan Zhu](https://github.com/CodingCat), Uber\n  - Nan is a software engineer in Uber. He contributed mostly in JVM packages.\n* [Jiaming Yuan](https://github.com/trivialfis)\n  - Jiaming contributed to the GPU algorithms. He has also introduced new abstractions to improve the quality of the C++ codebase.\n* [Hyunsu Cho](http://hyunsu-cho.io/), NVIDIA\n  - Hyunsu is the maintainer of the XGBoost Python package. He also manages the Jenkins continuous integration system (https://xgboost-ci.net/). He is the initial author of the CPU 'hist' updater.\n* [Rory Mitchell](https://github.com/RAMitchell), University of Waikato\n  - Rory is a Ph.D. student at University of Waikato. He is the original creator of the GPU training algorithms. He improved the CMake build system and continuous integration. \n* [Hongliang Liu](https://github.com/phunterlau)\n\n\nCommitters\n----------\nCommitters are people who have made substantial contribution to the project and granted write access to the project.\n\n* [Tong He](https://github.com/hetong007), Amazon AI\n  - Tong is an applied scientist in Amazon AI. He is the maintainer of XGBoost R package.\n* [Vadim Khotilovich](https://github.com/khotilov)\n  - Vadim contributes many improvements in R and core packages.\n* [Bing Xu](https://github.com/antinucleon)\n  - Bing is the original creator of XGBoost Python package and currently the maintainer of [XGBoost.jl](https://github.com/antinucleon/XGBoost.jl).\n* [Sergei Lebedev](https://github.com/superbobry), Criteo\n  - Sergei is a software engineer in Criteo. He contributed mostly in JVM packages.\n* [Scott Lundberg](http://scottlundberg.com/), University of Washington\n  - Scott is a Ph.D. student at University of Washington. He is the creator of SHAP, a unified approach to explain the output of machine learning models such as decision tree ensembles. He also helps maintain the XGBoost Julia package.\n* [Egor Smirnov](https://github.com/SmirnovEgorRu), Intel\n  - Egor has led a major effort to improve the performance of XGBoost on multi-core CPUs.\n\n\nBecome a Committer\n------------------\nXGBoost is an open source project and we are actively looking for new committers who are willing to help maintaining and lead the project.\nCommitters come from contributors who:\n* Made substantial contribution to the project.\n* Willing to spent time on maintaining and lead the project.\n\nNew committers will be proposed by current committer members, with support from more than two of current committers.\n\nList of Contributors\n--------------------\n* [Full List of Contributors](https://github.com/dmlc/xgboost/graphs/contributors)\n  - To contributors: please add your name to the list when you submit a patch to the project:)\n* [Kailong Chen](https://github.com/kalenhaha)\n  - Kailong is an early contributor of XGBoost, he is creator of ranking objectives in XGBoost.\n* [Skipper Seabold](https://github.com/jseabold)\n  - Skipper is the major contributor to the scikit-learn module of XGBoost.\n* [Zygmunt Zając](https://github.com/zygmuntz)\n  - Zygmunt is the master behind the early stopping feature frequently used by Kagglers.\n* [Ajinkya Kale](https://github.com/ajkl)\n* [Boliang Chen](https://github.com/cblsjtu)\n* [Yangqing Men](https://github.com/yanqingmen)\n  - Yangqing is the creator of XGBoost java package.\n* [Engpeng Yao](https://github.com/yepyao)\n* [Giulio](https://github.com/giuliohome)\n  - Giulio is the creator of Windows project of XGBoost\n* [Jamie Hall](https://github.com/nerdcha)\n  - Jamie is the initial creator of XGBoost scikit-learn module.\n* [Yen-Ying Lee](https://github.com/white1033)\n* [Masaaki Horikoshi](https://github.com/sinhrks)\n  - Masaaki is the initial creator of XGBoost Python plotting module.\n* [daiyl0320](https://github.com/daiyl0320)\n  - daiyl0320 contributed patch to XGBoost distributed version more robust, and scales stably on TB scale datasets.\n* [Huayi Zhang](https://github.com/irachex)\n* [Johan Manders](https://github.com/johanmanders)\n* [yoori](https://github.com/yoori)\n* [Mathias Müller](https://github.com/far0n)\n* [Sam Thomson](https://github.com/sammthomson)\n* [ganesh-krishnan](https://github.com/ganesh-krishnan)\n* [Damien Carol](https://github.com/damiencarol)\n* [Alex Bain](https://github.com/convexquad)\n* [Baltazar Bieniek](https://github.com/bbieniek)\n* [Adam Pocock](https://github.com/Craigacp)\n* [Gideon Whitehead](https://github.com/gaw89)\n* [Yi-Lin Juang](https://github.com/frankyjuang)\n* [Andrew Hannigan](https://github.com/andrewhannigan)\n* [Andy Adinets](https://github.com/canonizer)\n* [Henry Gouk](https://github.com/henrygouk)\n* [Pierre de Sahb](https://github.com/pdesahb)\n* [liuliang01](https://github.com/liuliang01)\n  - liuliang01 added support for the qid column for LIBSVM input format. This makes ranking task easier in distributed setting.\n* [Andrew Thia](https://github.com/BlueTea88)\n  - Andrew Thia implemented feature interaction constraints\n* [Wei Tian](https://github.com/weitian)\n* [Chen Qin](https://github.com/chenqin)\n* [Sam Wilkinson](https://samwilkinson.io)\n* [Matthew Jones](https://github.com/mt-jones)\n* [Jiaxiang Li](https://github.com/JiaxiangBU)\n* [Bryan Woods](https://github.com/bryan-woods)\n  - Bryan added support for cross-validation for the ranking objective\n* [Haoda Fu](https://github.com/fuhaoda)\n* [Evan Kepner](https://github.com/EvanKepner)\n  - Evan Kepner added support for os.PathLike file paths in Python\n"
  },
  {
    "path": "LICENSE",
    "content": "                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"{}\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright (c) 2019 by Contributors\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "NEWS.md",
    "content": "XGBoost Change Log\n==================\n\n**Starting from 2.1.0, release note is recorded in the documentation.**\n\nThis file records the changes in xgboost library in reverse chronological order.\n\n## 2.0.0 (2023 Aug 16)\n\nWe are excited to announce the release of XGBoost 2.0. This note will begin by covering some overall changes and then highlight specific updates to the package.\n\n### Initial work on multi-target trees with vector-leaf outputs\nWe have been working on vector-leaf tree models for multi-target regression, multi-label classification, and multi-class classification in version 2.0. Previously, XGBoost would build a separate model for each target. However, with this new feature that's still being developed, XGBoost can build one tree for all targets. The feature has multiple benefits and trade-offs compared to the existing approach. It can help prevent overfitting, produce smaller models, and build trees that consider the correlation between targets. In addition, users can combine vector leaf and scalar leaf trees during a training session using a callback. Please note that the feature is still a working in progress, and many parts are not yet available. See #9043 for the current status. Related PRs: (#8538, #8697, #8902, #8884, #8895, #8898, #8612, #8652, #8698, #8908, #8928, #8968, #8616, #8922, #8890, #8872, #8889, #9509) Please note that, only the `hist` (default) tree method on CPU can be used for building vector leaf trees at the moment.\n\n### New `device` parameter.\n\nA new `device` parameter is set to replace the existing `gpu_id`, `gpu_hist`, `gpu_predictor`, `cpu_predictor`, `gpu_coord_descent`, and the PySpark specific parameter `use_gpu`. Onward, users need only the `device` parameter to select which device to run along with the ordinal of the device. For more information, please see our document page (https://xgboost.readthedocs.io/en/stable/parameter.html#general-parameters) . For example, with  `device=\"cuda\", tree_method=\"hist\"`, XGBoost will run the `hist` tree method on GPU. (#9363, #8528, #8604, #9354, #9274, #9243, #8896, #9129, #9362, #9402, #9385, #9398, #9390, #9386, #9412, #9507, #9536). The old behavior of ``gpu_hist``  is preserved but deprecated. In addition, the `predictor` parameter is removed.\n\n\n### `hist` is now the default tree method\nStarting from 2.0, the `hist` tree method will be the default. In previous versions, XGBoost chooses `approx` or `exact` depending on the input data and training environment. The new default can help XGBoost train models more efficiently and consistently. (#9320, #9353)\n\n### GPU-based approx tree method\nThere's initial support for using the `approx` tree method on GPU. The performance of the `approx` is not yet well optimized but is feature complete except for the JVM packages. It can be accessed through the use of the parameter combination `device=\"cuda\", tree_method=\"approx\"`. (#9414, #9399, #9478). Please note that the Scala-based Spark interface is not yet supported.\n\n### Optimize and bound the size of the histogram on CPU, to control memory footprint\n\nXGBoost has a new parameter `max_cached_hist_node` for users to limit the CPU cache size for histograms. It can help prevent XGBoost from caching histograms too aggressively. Without the cache, performance is likely to decrease. However, the size of the cache grows exponentially with the depth of the tree. The limit can be crucial when growing deep trees. In most cases, users need not configure this parameter as it does not affect the model's accuracy. (#9455, #9441, #9440, #9427, #9400).\n\nAlong with the cache limit, XGBoost also reduces the memory usage of the `hist` and `approx` tree method on distributed systems by cutting the size of the cache by half. (#9433)\n\n### Improved external memory support\nThere is some exciting development around external memory support in XGBoost. It's still an experimental feature, but the performance has been significantly improved with the default `hist` tree method. We replaced the old file IO logic with memory map. In addition to performance, we have reduced CPU memory usage and added extensive documentation. Beginning from 2.0.0, we encourage users to try it with the `hist` tree method when the memory saving by `QuantileDMatrix` is not sufficient. (#9361, #9317, #9282, #9315, #8457)\n\n### Learning to rank\nWe created a brand-new implementation for the learning-to-rank task. With the latest version, XGBoost gained a set of new features for ranking task including:\n\n- A new parameter `lambdarank_pair_method` for choosing the pair construction strategy.\n- A new parameter `lambdarank_num_pair_per_sample` for controlling the number of samples for each group.\n- An experimental implementation of unbiased learning-to-rank, which can be accessed using the `lambdarank_unbiased` parameter.\n- Support for custom gain function with `NDCG` using the `ndcg_exp_gain` parameter.\n- Deterministic GPU computation for all objectives and metrics.\n- `NDCG` is now the default objective function.\n- Improved performance of metrics using caches.\n- Support scikit-learn utilities for `XGBRanker`.\n- Extensive documentation on how learning-to-rank works with XGBoost.\n\nFor more information, please see the [tutorial](https://xgboost.readthedocs.io/en/latest/tutorials/learning_to_rank.html). Related PRs: (#8771, #8692, #8783, #8789, #8790, #8859, #8887, #8893, #8906, #8931, #9075, #9015, #9381, #9336, #8822, #9222, #8984, #8785, #8786, #8768)\n\n### Automatically estimated intercept\n\nIn the previous version, `base_score` was a constant that could be set as a training parameter. In the new version, XGBoost can automatically estimate this parameter based on input labels for optimal accuracy. (#8539, #8498, #8272, #8793, #8607)\n\n### Quantile regression\nThe XGBoost algorithm now supports quantile regression, which involves minimizing the quantile loss (also called \"pinball loss\"). Furthermore, XGBoost allows for training with multiple target quantiles simultaneously with one tree per quantile. (#8775, #8761, #8760, #8758, #8750)\n\n### L1 and Quantile regression now supports learning rate\nBoth objectives use adaptive trees due to the lack of proper Hessian values. In the new version, XGBoost can scale the leaf value with the learning rate accordingly. (#8866)\n\n### Export cut value\n\nUsing the Python or the C package, users can export the quantile values (not to be confused with quantile regression) used for the `hist` tree method. (#9356)\n\n### column-based split and federated learning\nWe made progress on column-based split for federated learning. In 2.0, both `approx`, `hist`, and `hist` with vector leaf can work with column-based data split, along with support for vertical federated learning. Work on GPU support is still on-going, stay tuned. (#8576, #8468, #8442, #8847, #8811, #8985, #8623, #8568, #8828, #8932, #9081, #9102, #9103, #9124, #9120, #9367, #9370, #9343, #9171, #9346, #9270, #9244, #8494, #8434, #8742, #8804, #8710, #8676, #9020, #9002, #9058, #9037, #9018, #9295, #9006, #9300, #8765, #9365, #9060)\n\n### PySpark\nAfter the initial introduction of the PySpark interface, it has gained some new features and optimizations in 2.0.\n\n- GPU-based prediction. (#9292, #9542)\n- Optimization for data initialization by avoiding the stack operation. (#9088)\n- Support predict feature contribution. (#8633)\n- Python typing support. (#9156, #9172, #9079, #8375)\n- `use_gpu` is deprecated. The `device` parameter is preferred.\n- Update eval_metric validation to support list of strings (#8826)\n- Improved logs for training (#9449)\n- Maintenance, including refactoring and document updates (#8324, #8465, #8605, #9202, #9460, #9302, #8385, #8630, #8525, #8496)\n- Fix for GPU setup. (#9495)\n\n### Other General New Features\nHere's a list of new features that don't have their own section and yet are general to all language bindings.\n\n- Use array interface for CSC matrix. This helps XGBoost to use a consistent number of threads and align the interface of the CSC matrix with other interfaces. In addition, memory usage is likely to decrease with CSC input thanks to on-the-fly type conversion. (#8672)\n- CUDA compute 90 is now part of the default build.. (#9397)\n\n### Other General Optimization\nThese optimizations are general to all language bindings. For language-specific optimization, please visit the corresponding sections.\n\n- Performance for input with `array_interface` on CPU (like `numpy`) is significantly improved. (#9090)\n- Some optimization with CUDA for data initialization. (#9199, #9209, #9144)\n- Use the latest thrust policy to prevent synchronizing GPU devices. (#9212)\n- XGBoost now uses a per-thread CUDA stream, which prevents synchronization with other streams. (#9416, #9396, #9413)\n\n### Notable breaking change\n\nOther than the aforementioned change with the `device` parameter, here's a list of breaking changes affecting all packages.\n\n- Users must specify the format for text input (#9077). However, we suggest using third-party data structures such as `numpy.ndarray` instead of relying on text inputs. See https://github.com/dmlc/xgboost/issues/9472 for more info.\n\n### Notable bug fixes\n\nSome noteworthy bug fixes that are not related to specific language bindings are listed in this section.\n\n- Some language environments use a different thread to perform garbage collection, which breaks the thread-local cache used in XGBoost. XGBoost 2.0 implements a new thread-safe cache using a light weight lock to replace the thread-local cache. (#8851)\n- Fix model IO by clearing the prediction cache. (#8904)\n- `inf` is checked during data construction. (#8911)\n- Preserve order of saved updaters configuration. Usually, this is not an issue unless the `updater` parameter is used instead of the `tree_method` parameter (#9355)\n- Fix GPU memory allocation issue with categorical splits. (#9529)\n- Handle escape sequence like `\\t\\n` in feature names for JSON model dump. (#9474)\n- Normalize file path for model IO and text input. This handles short paths on Windows and paths that contain `~` on Unix (#9463). In addition, all path inputs are required to be encoded in UTF-8 (#9448, #9443)\n- Fix integer overflow on H100. (#9380)\n- Fix weighted sketching on GPU with categorical features. (#9341)\n- Fix metric serialization. The bug might cause some of the metrics to be dropped during evaluation. (#9405)\n- Fixes compilation errors on MSVC x86 targets (#8823)\n- Pick up the dmlc-core fix for the CSV parser. (#8897)\n\n\n### Documentation\nAside from documents for new features, we have many smaller updates to improve user experience, from troubleshooting guides to typo fixes.\n\n- Explain CPU/GPU interop. (#8450)\n- Guide to troubleshoot NCCL errors. (#8943, #9206)\n- Add a note for rabit port selection. (#8879)\n- How to build the docs using conda (#9276)\n- Explain how to obtain reproducible results on distributed systems. (#8903)\n\n* Fixes and small updates to document and demonstration scripts. (#8626, #8436, #8995, #8907, #8923, #8926, #9358, #9232, #9201, #9469, #9462, #9458, #8543, #8597, #8401, #8784, #9213, #9098, #9008, #9223, #9333, #9434, #9435, #9415, #8773, #8752, #9291, #9549)\n\n### Python package\n* New Features and Improvements\n- Support primitive types of pyarrow-backed pandas dataframe. (#8653)\n- Warning messages emitted by XGBoost are now emitted using Python warnings. (#9387)\n- User can now format the value printed near the bars on the `plot_importance` plot (#8540)\n- XGBoost has improved half-type support (float16) with pandas, cupy, and cuDF. With GPU input, the handling is through CUDA `__half` type, and no data copy is made. (#8487, #9207, #8481)\n- Support `Series` and Python primitive types in `inplace_predict` and `QuantileDMatrix` (#8547, #8542)\n- Support all pandas' nullable integer types. (#8480)\n- Custom metric with the scikit-learn interface now supports `sample_weight`. (#8706)\n- Enable Installation of Python Package with System lib in a Virtual Environment (#9349)\n- Raise if expected workers are not alive in `xgboost.dask.train` (#9421)\n\n* Optimization\n- Cache transformed data in `QuantileDMatrix` for efficiency. (#8666, #9445)\n- Take datatable as row-major input. (#8472)\n- Remove unnecessary conversions between data structures (#8546)\n\n* Adopt modern Python packaging conventions (PEP 517, PEP 518, PEP 621)\n-  XGBoost adopted the modern Python packaging conventions. The old setup script `setup.py` is now replaced with the new configuration file `pyproject.toml`. Along with this, XGBoost now supports Python 3.11. (#9021, #9112, #9114, #9115) Consult the latest documentation for the updated instructions to build and install XGBoost.\n\n* Fixes\n- `DataIter` now accepts only keyword arguments. (#9431)\n- Fix empty DMatrix with categorical features. (#8739)\n- Convert ``DaskXGBClassifier.classes_`` to an array (#8452)\n- Define `best_iteration` only if early stopping is used to be consistent with documented behavior. (#9403)\n- Make feature validation immutable. (#9388)\n\n* Breaking changes\n- Discussed in the new `device` parameter section,  the `predictor` parameter is now removed. (#9129)\n- Remove support for single-string feature info. Feature type and names should be a sequence of strings (#9401)\n- Remove parameters in the `save_model` call for the scikit-learn interface. (#8963)\n- Remove the `ntree_limit` in the python package. This has been deprecated in previous versions. (#8345)\n\n* Maintenance including formatting and refactoring along with type hints.\n- More consistent use of `black` and `isort` for code formatting (#8420, #8748, #8867)\n- Improved type support. Most of the type changes happen in the PySpark module; here, we list the remaining changes. (#8444, #8617, #9197, #9005)\n- Set `enable_categorical` to True in predict. (#8592)\n- Some refactoring and updates for tests (#8395, #8372, #8557, #8379, #8702, #9459, #9316, #8446, #8695, #8409, #8993, #9480)\n\n* Documentation\n- Add introduction and notes for the sklearn interface. (#8948)\n- Demo for using dask for hyper-parameter optimization. (#8891)\n- Document all supported Python input types. (#8643)\n- Other documentation updates (#8944, #9304)\n\n### R package\n- Use the new data consumption interface for CSR and CSC. This provides better control for the number of threads and improves performance. (#8455, #8673)\n- Accept multiple evaluation metrics during training. (#8657)\n- Fix integer inputs with `NA`. (#9522)\n- Some refactoring for the R package (#8545, #8430, #8614, #8624, #8613, #9457, #8689, #8563, #9461, #8647, #8564, #8565, #8736, #8610, #8609, #8599, #8704, #9456, #9450, #9476, #9477, #9481). Special thanks to @jameslamb.\n- Document updates (#8886, #9323, #9437, #8998)\n\n### JVM packages\nFollowing are changes specific to various JVM-based packages.\n\n- Stop using Rabit in prediction (#9054)\n- Set feature_names and feature_types in jvm-packages. This is to prepare support for categorical features (#9364)\n- Scala 2.13 support. (#9099)\n- Change training stage from `ResultStage` to `ShuffleMapStage` (#9423)\n- Automatically set the max/min direction for the best score during early stopping. (#9404)\n* Revised support for `flink` (#9046)\n\n* Breaking changes\n- Scala-based tracker is removed. (#9078, #9045)\n- Change `DeviceQuantileDmatrix` into `QuantileDMatrix` (#8461)\n\n* Maintenance (#9253, #9166, #9395, #9389, #9224, #9233, #9351, #9479)\n\n* CI bot PRs\nWe employed GitHub dependent bot to help us keep the dependencies up-to-date for JVM packages. With the help from the bot, we have cleared up all the dependencies that are lagging behind (#8501, #8507).\n\nHere's a list of dependency update PRs including those made by dependent bots (#8456, #8560, #8571, #8561, #8562, #8600, #8594, #8524, #8509, #8548, #8549, #8533, #8521, #8534, #8532, #8516, #8503, #8531, #8530, #8518, #8512, #8515, #8517, #8506, #8504, #8502, #8629, #8815, #8813, #8814, #8877, #8876, #8875, #8874, #8873, #9049, #9070, #9073, #9039, #9083, #8917, #8952, #8980, #8973, #8962, #9252, #9208, #9131, #9136, #9219, #9160, #9158, #9163, #9184, #9192, #9265, #9268, #8882, #8837, #8662, #8661, #8390, #9056, #8508, #8925, #8920, #9149, #9230, #9097, #8648, #9203, #8593).\n\n### Maintenance\nMaintenance work includes refactoring, fixing small issues that don't affect end users. (#9256, #8627, #8756, #8735, #8966, #8864, #8747, #8892, #9057, #8921, #8949, #8941, #8942, #9108, #9125, #9155, #9153, #9176, #9447, #9444, #9436, #9438, #9430, #9200, #9210, #9055, #9014, #9004, #8999, #9154, #9148, #9283, #9246, #8888, #8900, #8871, #8861, #8858, #8791, #8807, #8751, #8703, #8696, #8693, #8677, #8686, #8665, #8660, #8386, #8371, #8410, #8578, #8574, #8483, #8443, #8454, #8733)\n\n### CI\n- Build pip wheel with RMM support (#9383)\n- Other CI updates including updating dependencies and work on the CI infrastructure. (#9464, #9428, #8767, #9394, #9278, #9214, #9234, #9205, #9034, #9104, #8878, #9294, #8625, #8806, #8741, #8707, #8381, #8382, #8388, #8402, #8397, #8445, #8602, #8628, #8583, #8460, #9544)\n\n## 1.7.6 (2023 Jun 16)\n\nThis is a patch release for bug fixes. The CRAN package for the R binding is kept at 1.7.5.\n\n### Bug Fixes\n* Fix distributed training with mixed dense and sparse partitions. (#9272)\n* Fix monotone constraints on CPU with large trees. (#9122)\n* [spark] Make the spark model have the same UID as its estimator (#9022)\n* Optimize prediction with `QuantileDMatrix`. (#9096)\n\n### Document\n* Improve doxygen (#8959)\n* Update the cuDF pip index URL. (#9106)\n\n### Maintenance\n* Fix tests with pandas 2.0. (#9014)\n\n## 1.7.5 (2023 Mar 30)\nThis is a patch release for bug fixes.\n\n* C++ requirement is updated to C++-17, along with which, CUDA 11.8 is used as the default CTK. (#8860, #8855, #8853)\n* Fix import for pyspark ranker. (#8692)\n* Fix Windows binary wheel to be compatible with Poetry (#8991)\n* Fix GPU hist with column sampling. (#8850)\n* Make sure iterative DMatrix is properly initialized. (#8997)\n* [R] Update link in document. (#8998)\n\n## 1.7.4 (2023 Feb 16)\nThis is a patch release for bug fixes.\n\n* [R] Fix OpenMP detection on macOS. (#8684)\n* [Python] Make sure input numpy array is aligned. (#8690)\n* Fix feature interaction with column sampling in gpu_hist evaluator. (#8754)\n* Fix GPU L1 error. (#8749)\n* [PySpark] Fix feature types param (#8772)\n* Fix ranking with quantile dmatrix and group weight. (#8762)\n\n## 1.7.3 (2023 Jan 6)\nThis is a patch release for bug fixes.\n\n* [Breaking] XGBoost Sklearn estimator method `get_params` no longer returns internally configured values. (#8634)\n* Fix linalg iterator, which may crash the L1 error. (#8603)\n* Fix loading pickled GPU model with a CPU-only XGBoost build. (#8632)\n* Fix inference with unseen categories with categorical features. (#8591, #8602)\n* CI fixes. (#8620, #8631, #8579)\n\n## v1.7.2 (2022 Dec 8)\nThis is a patch release for bug fixes.\n\n* Work with newer thrust and libcudacxx (#8432)\n* Support null value in CUDA array interface namespace. (#8486)\n* Use `getsockname` instead of `SO_DOMAIN` on AIX. (#8437)\n* [pyspark] Make QDM optional based on a cuDF check (#8471)\n* [pyspark] sort qid for SparkRanker. (#8497)\n* [dask] Properly await async method client.wait_for_workers. (#8558)\n\n* [R] Fix CRAN test notes. (#8428)\n\n* [doc] Fix outdated document [skip ci]. (#8527)\n* [CI] Fix github action mismatched glibcxx. (#8551)\n\n## v1.7.1 (2022 Nov 3)\nThis is a patch release to incorporate the following hotfix:\n\n* Add back xgboost.rabit for backwards compatibility (#8411)\n\n\n## v1.7.0 (2022 Oct 20)\n\nWe are excited to announce the feature packed XGBoost 1.7 release. The release note will walk through some of the major new features first, then make a summary for other improvements and language-binding-specific changes.\n\n### PySpark\n\nXGBoost 1.7 features initial support for PySpark integration. The new interface is adapted from the existing PySpark XGBoost interface developed by databricks with additional features like `QuantileDMatrix` and the rapidsai plugin (GPU pipeline) support. The new Spark XGBoost Python estimators not only benefit from PySpark ml facilities for powerful distributed computing but also enjoy the rest of the Python ecosystem. Users can define a custom objective, callbacks, and metrics in Python and use them with this interface on distributed clusters. The support is labeled as experimental with more features to come in future releases. For a brief introduction please visit the tutorial on XGBoost's [document page](https://xgboost.readthedocs.io/en/latest/tutorials/spark_estimator.html). (#8355, #8344, #8335, #8284, #8271, #8283, #8250, #8231, #8219, #8245, #8217, #8200, #8173, #8172, #8145, #8117, #8131, #8088, #8082, #8085, #8066, #8068, #8067, #8020, #8385)\n\nDue to its initial support status, the new interface has some limitations; categorical features and multi-output models are not yet supported.\n\n### Development of categorical data support\nMore progress on the experimental support for categorical features. In 1.7, XGBoost can handle missing values in categorical features and features a new parameter `max_cat_threshold`, which limits the number of categories that can be used in the split evaluation. The parameter is enabled when the partitioning algorithm is used and helps prevent over-fitting. Also, the sklearn interface can now accept the `feature_types` parameter to use data types other than dataframe for categorical features. (#8280, #7821, #8285, #8080, #7948, #7858, #7853, #8212, #7957, #7937, #7934)\n\n\n###  Experimental support for federated learning and new communication collective\n\nAn exciting addition to XGBoost is the experimental federated learning support. The federated learning is implemented with a gRPC federated server that aggregates allreduce calls, and federated clients that train on local data and use existing tree methods (approx, hist, gpu_hist). Currently, this only supports horizontal federated learning (samples are split across participants, and each participant has all the features and labels). Future plans include vertical federated learning (features split across participants), and stronger privacy guarantees with homomorphic encryption and differential privacy. See [Demo with NVFlare integration](demo/nvflare/README.md) for example usage with nvflare.\n\nAs part of the work, XGBoost 1.7 has replaced the old rabit module with the new collective module as the network communication interface with added support for runtime backend selection. In previous versions, the backend is defined at compile time and can not be changed once built. In this new release, users can choose between `rabit` and `federated.` (#8029, #8351, #8350, #8342, #8340, #8325, #8279, #8181, #8027, #7958, #7831, #7879, #8257, #8316, #8242, #8057, #8203, #8038, #7965, #7930, #7911)\n\nThe feature is available in the public PyPI binary package for testing.\n\n### Quantile DMatrix\nBefore 1.7, XGBoost has an internal data structure called `DeviceQuantileDMatrix` (and its distributed version). We now extend its support to CPU and renamed it to `QuantileDMatrix`. This data structure is used for optimizing memory usage for the `hist` and `gpu_hist` tree methods. The new feature helps reduce CPU memory usage significantly, especially for dense data. The new `QuantileDMatrix` can be initialized from both CPU and GPU data, and regardless of where the data comes from, the constructed instance can be used by both the CPU algorithm and GPU algorithm including training and prediction (with some overhead of conversion if the device of data and training algorithm doesn't match). Also, a new parameter `ref` is added to `QuantileDMatrix`, which can be used to construct validation/test datasets. Lastly, it's set as default in the scikit-learn interface when a supported tree method is specified by users. (#7889, #7923, #8136, #8215, #8284, #8268, #8220, #8346, #8327, #8130, #8116, #8103, #8094, #8086, #7898, #8060, #8019, #8045, #7901, #7912, #7922)\n\n### Mean absolute error\nThe mean absolute error is a new member of the collection of objectives in XGBoost. It's noteworthy since MAE has zero hessian value, which is unusual to XGBoost as XGBoost relies on Newton optimization. Without valid Hessian values, the convergence speed can be slow. As part of the support for MAE, we added line searches into the XGBoost training algorithm to overcome the difficulty of training without valid Hessian values. In the future, we will extend the line search to other objectives where it's appropriate for faster convergence speed. (#8343, #8107, #7812, #8380)\n\n### XGBoost on Browser\nWith the help of the [pyodide](https://github.com/pyodide/pyodide) project, you can now run XGBoost on browsers. (#7954, #8369)\n\n### Experimental IPv6 Support for Dask\n\nWith the growing adaption of the new internet protocol, XGBoost joined the club. In the latest release, the Dask interface can be used on IPv6 clusters, see XGBoost's Dask tutorial for details. (#8225, #8234)\n\n### Optimizations\nWe have new optimizations for both the `hist` and `gpu_hist` tree methods to make XGBoost's training even more efficient.\n\n* Hist\nHist now supports optional by-column histogram build, which is automatically configured based on various conditions of input data. This helps the XGBoost CPU hist algorithm to scale better with different shapes of training datasets. (#8233, #8259). Also, the build histogram kernel now can better utilize CPU registers (#8218)\n\n* GPU Hist\nGPU hist performance is significantly improved for wide datasets. GPU hist now supports batched node build, which reduces kernel latency and increases throughput. The improvement is particularly significant when growing deep trees with the default ``depthwise`` policy. (#7919, #8073, #8051, #8118, #7867, #7964, #8026)\n\n### Breaking Changes\nBreaking changes made in the 1.7 release are summarized below.\n- The  `grow_local_histmaker`  updater is removed. This updater is rarely used in practice and has no test. We decided to remove it and focus have XGBoot focus on other more efficient algorithms. (#7992, #8091)\n- Single precision histogram is removed due to its lack of accuracy caused by significant floating point error. In some cases the error can be difficult to detect due to log-scale operations, which makes the parameter dangerous to use. (#7892, #7828)\n- Deprecated CUDA architectures are no longer supported in the release binaries. (#7774)\n- As part of the federated learning development, the `rabit` module is replaced with the new `collective` module. It's a drop-in replacement with added runtime backend selection, see the federated learning section for more details (#8257)\n\n### General new features and improvements\nBefore diving into package-specific changes, some general new features other than those listed at the beginning are summarized here.\n* Users of `DMatrix` and `QuantileDMatrix` can get the data from XGBoost. In previous versions, only getters for meta info like labels are available. The new method is available in Python (`DMatrix::get_data`) and C. (#8269, #8323)\n* In previous versions, the GPU histogram tree method may generate phantom gradient for missing values due to floating point error. We fixed such an error in this release and XGBoost is much better equated to handle floating point errors when training on GPU. (#8274, #8246)\n* Parameter validation is no longer experimental. (#8206)\n* C pointer parameters and JSON parameters are vigorously checked. (#8254, #8254)\n* Improved handling of JSON model input. (#7953, #7918)\n* Support IBM i OS (#7920, #8178)\n\n### Fixes\nSome noteworthy bug fixes that are not related to specific language binding are listed in this section.\n* Rename misspelled config parameter for pseudo-Huber (#7904)\n* Fix feature weights with nested column sampling. (#8100)\n* Fix loading DMatrix binary in distributed env. (#8149)\n* Force auc.cc to be statically linked for unusual compiler platforms. (#8039)\n* New logic for detecting libomp on macos (#8384).\n\n### Python Package\n* Python 3.8 is now the minimum required Python version. (#8071)\n* More progress on type hint support. Except for the new PySpark interface, the XGBoost module is fully typed. (#7742, #7945, #8302, #7914, #8052)\n* XGBoost now validates the feature names in `inplace_predict`, which also affects the predict function in scikit-learn estimators as it uses `inplace_predict` internally. (#8359)\n* Users can now get the data from `DMatrix` using `DMatrix::get_data` or `QuantileDMatrix::get_data`.\n* Show `libxgboost.so` path in build info. (#7893)\n* Raise import error when using the sklearn module while scikit-learn is missing. (#8049)\n* Use `config_context` in the sklearn interface. (#8141)\n* Validate features for inplace prediction. (#8359)\n* Pandas dataframe handling is refactored to reduce data fragmentation. (#7843)\n* Support more pandas nullable types (#8262)\n* Remove pyarrow workaround. (#7884)\n\n* Binary wheel size\nWe aim to enable as many features as possible in XGBoost's default binary distribution on PyPI (package installed with pip), but there's a upper limit on the size of the binary wheel. In 1.7, XGBoost reduces the size of the wheel by pruning unused CUDA architectures. (#8179, #8152, #8150)\n\n* Fixes\n  Some noteworthy fixes are listed here:\n  - Fix the Dask interface with the latest cupy. (#8210)\n  - Check cuDF lazily to avoid potential errors with cuda-python. (#8084)\n* Fix potential error in DMatrix constructor on 32-bit platform. (#8369)\n\n* Maintenance work\n  - Linter script is moved from dmlc-core to XGBoost with added support for formatting, mypy, and parallel run, along with some fixes (#7967, #8101, #8216)\n  - We now require the use of `isort` and `black` for selected files. (#8137, #8096)\n  - Code cleanups. (#7827)\n  - Deprecate `use_label_encoder` in XGBClassifier. The label encoder has already been deprecated and removed in the previous version. These changes only affect the indicator parameter (#7822)\n  - Remove the use of distutils. (#7770)\n  - Refactor and fixes for tests (#8077, #8064, #8078, #8076, #8013, #8010, #8244, #7833)\n\n* Documents\n  - [dask] Fix potential error in demo. (#8079)\n  - Improved documentation for the ranker. (#8356, #8347)\n  - Indicate lack of py-xgboost-gpu on Windows (#8127)\n  - Clarification for feature importance. (#8151)\n  - Simplify Python getting started example (#8153)\n\n### R Package\nWe summarize improvements for the R package briefly here:\n* Feature info including names and types are now passed to DMatrix in preparation for categorical feature support. (#804)\n* XGBoost 1.7 can now gracefully load old R models from RDS for better compatibility with 3-party tuning libraries (#7864)\n* The R package now can be built with parallel compilation, along with fixes for warnings in CRAN tests. (#8330)\n* Emit error early if DiagrammeR is missing (#8037)\n* Fix R package Windows build. (#8065)\n\n### JVM Packages\nThe consistency between JVM packages and other language bindings is greatly improved in 1.7, improvements range from model serialization format to the default value of hyper-parameters.\n\n* Java package now supports feature names and feature types for DMatrix in preparation for categorical feature support. (#7966)\n* Models trained by the JVM packages can now be safely used with other language bindings. (#7896, #7907)\n* Users can specify the model format when saving models with a stream. (#7940, #7955)\n* The default value for training parameters is now sourced from XGBoost directly, which helps JVM packages be consistent with other packages. (#7938)\n* Set the correct objective if the user doesn't explicitly set it (#7781)\n* Auto-detection of MUSL is replaced by system properties (#7921)\n* Improved error message for launching tracker. (#7952, #7968)\n* Fix a race condition in parameter configuration. (#8025)\n* [Breaking] ` timeoutRequestWorkers` is now removed. With the support for barrier mode, this parameter is no longer needed. (#7839)\n* Dependencies updates. (#7791, #8157, #7801, #8240)\n\n### Documents\n- Document for the C interface is greatly improved and is now displayed at the [sphinx document page](https://xgboost.readthedocs.io/en/latest/c.html). Thanks to the breathe project, you can view the C API just like the Python API. (#8300)\n- We now avoid having XGBoost internal text parser in demos and recommend users use dedicated libraries for loading data whenever it's feasible. (#7753)\n- Python survival training demos are now displayed at [sphinx gallery](https://xgboost.readthedocs.io/en/latest/python/survival-examples/index.html). (#8328)\n- Some typos, links, format, and grammar fixes. (#7800, #7832, #7861, #8099, #8163, #8166, #8229, #8028, #8214, #7777, #7905, #8270, #8309, d70e59fef, #7806)\n- Updated winning solution under readme.md (#7862)\n- New security policy. (#8360)\n- GPU document is overhauled as we consider CUDA support to be feature-complete. (#8378)\n\n### Maintenance\n* Code refactoring and cleanups. (#7850, #7826, #7910, #8332, #8204)\n* Reduce compiler warnings. (#7768, #7916, #8046, #8059, #7974, #8031, #8022)\n* Compiler workarounds. (#8211, #8314, #8226, #8093)\n* Dependencies update. (#8001, #7876, #7973, #8298, #7816)\n* Remove warnings emitted in previous versions. (#7815)\n* Small fixes occurred during development. (#8008)\n\n### CI and Tests\n* We overhauled the CI infrastructure to reduce the CI cost and lift the maintenance burdens. Jenkins is replaced with buildkite for better automation, with which, finer control of test runs is implemented to reduce overall cost. Also, we refactored some of the existing tests to reduce their runtime, drooped the size of docker images, and removed multi-GPU C++ tests. Lastly, `pytest-timeout` is added as an optional dependency for running Python tests to keep the test time in check. (#7772, #8291, #8286, #8276, #8306, #8287, #8243, #8313, #8235, #8288, #8303, #8142, #8092, #8333, #8312, #8348)\n* New documents for how to reproduce the CI environment (#7971, #8297)\n* Improved automation for JVM release. (#7882)\n* GitHub Action security-related updates. (#8263, #8267, #8360)\n* Other fixes and maintenance work. (#8154, #7848, #8069, #7943)\n* Small updates and fixes to GitHub action pipelines. (#8364, #8321, #8241, #7950, #8011)\n\n## v1.6.1 (2022 May 9)\nThis is a patch release for bug fixes and Spark barrier mode support. The R package is unchanged.\n\n### Experimental support for categorical data\n- Fix segfault when the number of samples is smaller than the number of categories. (https://github.com/dmlc/xgboost/pull/7853)\n- Enable partition-based split for all model types. (https://github.com/dmlc/xgboost/pull/7857)\n\n### JVM packages\nWe replaced the old parallelism tracker with spark barrier mode to improve the robustness of the JVM package and fix the GPU training pipeline.\n- Fix GPU training pipeline quantile synchronization. (#7823, #7834)\n- Use barrier model in spark package. (https://github.com/dmlc/xgboost/pull/7836, https://github.com/dmlc/xgboost/pull/7840, https://github.com/dmlc/xgboost/pull/7845, https://github.com/dmlc/xgboost/pull/7846)\n- Fix shared object loading on some platforms. (https://github.com/dmlc/xgboost/pull/7844)\n\n## v1.6.0 (2022 Apr 16)\n\nAfter a long period of development, XGBoost v1.6.0 is packed with many new features and\nimprovements. We summarize them in the following sections starting with an introduction to\nsome major new features, then moving on to language binding specific changes including new\nfeatures and notable bug fixes for that binding.\n\n### Development of categorical data support\nThis version of XGBoost features new improvements and full coverage of experimental\ncategorical data support in Python and C package with tree model.  Both `hist`, `approx`\nand `gpu_hist` now support training with categorical data.  Also, partition-based\ncategorical split is introduced in this release. This split type is first available in\nLightGBM in the context of gradient boosting. The previous XGBoost release supported one-hot split where the splitting criteria is of form `x \\in {c}`, i.e. the categorical feature `x` is tested against a single candidate. The new release allows for more expressive conditions: `x \\in S` where the categorical feature `x` is tested against multiple candidates. Moreover, it is now possible to use any tree algorithms (`hist`, `approx`, `gpu_hist`) when creating categorical splits. For more\ninformation, please see our tutorial on [categorical\ndata](https://xgboost.readthedocs.io/en/latest/tutorials/categorical.html), along with\nexamples linked on that page. (#7380, #7708, #7695, #7330, #7307, #7322, #7705,\n#7652, #7592, #7666, #7576, #7569, #7529, #7575, #7393, #7465, #7385, #7371, #7745, #7810)\n\nIn the future, we will continue to improve categorical data support with new features and\noptimizations. Also, we are looking forward to bringing the feature beyond Python binding,\ncontributions and feedback are welcomed! Lastly, as a result of experimental status, the\nbehavior might be subject to change, especially the default value of related\nhyper-parameters.\n\n### Experimental support for multi-output model\n\nXGBoost 1.6 features initial support for the multi-output model, which includes\nmulti-output regression and multi-label classification. Along with this, the XGBoost\nclassifier has proper support for base margin without to need for the user to flatten the\ninput. In this initial support, XGBoost builds one model for each target similar to the\nsklearn meta estimator, for more details, please see our [quick\nintroduction](https://xgboost.readthedocs.io/en/latest/tutorials/multioutput.html).\n\n(#7365, #7736, #7607, #7574, #7521, #7514, #7456, #7453, #7455, #7434, #7429, #7405, #7381)\n\n### External memory support\nExternal memory support for both approx and hist tree method is considered feature\ncomplete in XGBoost 1.6.  Building upon the iterator-based interface introduced in the\nprevious version, now both `hist` and `approx` iterates over each batch of data during\ntraining and prediction.  In previous versions, `hist` concatenates all the batches into\nan internal representation, which is removed in this version.  As a result, users can\nexpect higher scalability in terms of data size but might experience lower performance due\nto disk IO. (#7531, #7320, #7638, #7372)\n\n### Rewritten approx\n\nThe `approx` tree method is rewritten based on the existing `hist` tree method. The\nrewrite closes the feature gap between `approx` and `hist` and improves the performance.\nNow the behavior of `approx` should be more aligned with `hist` and `gpu_hist`. Here is a\nlist of user-visible changes:\n\n- Supports both `max_leaves` and `max_depth`.\n- Supports `grow_policy`.\n- Supports monotonic constraint.\n- Supports feature weights.\n- Use `max_bin` to replace `sketch_eps`.\n- Supports categorical data.\n- Faster performance for many of the datasets.\n- Improved performance and robustness for distributed training.\n- Supports prediction cache.\n- Significantly better performance for external memory when `depthwise` policy is used.\n\n### New serialization format\nBased on the existing JSON serialization format, we introduce UBJSON support as a more\nefficient alternative. Both formats will be available in the future and we plan to\ngradually [phase out](https://github.com/dmlc/xgboost/issues/7547) support for the old\nbinary model format.  Users can opt to use the different formats in the serialization\nfunction by providing the file extension `json` or `ubj`. Also, the `save_raw` function in\nall supported languages bindings gains a new parameter for exporting the model in different\nformats, available options are `json`, `ubj`, and `deprecated`, see document for the\nlanguage binding you are using for details. Lastly, the default internal serialization\nformat is set to UBJSON, which affects Python pickle and R RDS. (#7572, #7570, #7358,\n#7571, #7556, #7549, #7416)\n\n### General new features and improvements\nAside from the major new features mentioned above, some others are summarized here:\n\n* Users can now access the build information of XGBoost binary in Python and C\n  interface. (#7399, #7553)\n* Auto-configuration of `seed_per_iteration` is removed, now distributed training should\n  generate closer results to single node training when sampling is used. (#7009)\n* A new parameter `huber_slope` is introduced for the `Pseudo-Huber` objective.\n* During source build, XGBoost can choose cub in the system path automatically. (#7579)\n* XGBoost now honors the CPU counts from CFS, which is usually set in docker\n  environments. (#7654, #7704)\n* The metric `aucpr` is rewritten for better performance and GPU support. (#7297, #7368)\n* Metric calculation is now performed in double precision. (#7364)\n* XGBoost no longer mutates the global OpenMP thread limit. (#7537, #7519, #7608, #7590,\n  #7589, #7588, #7687)\n* The default behavior of `max_leave` and `max_depth` is now unified (#7302, #7551).\n* CUDA fat binary is now compressed. (#7601)\n* Deterministic result for evaluation metric and linear model. In previous versions of\n  XGBoost, evaluation results might differ slightly for each run due to parallel reduction\n  for floating-point values, which is now addressed. (#7362, #7303, #7316, #7349)\n* XGBoost now uses double for GPU Hist node sum, which improves the accuracy of\n  `gpu_hist`. (#7507)\n\n### Performance improvements\nMost of the performance improvements are integrated into other refactors during feature\ndevelopments. The `approx` should see significant performance gain for many datasets as\nmentioned in the previous section, while the `hist` tree method also enjoys improved\nperformance with the removal of the internal `pruner` along with some other\nrefactoring. Lastly, `gpu_hist` no longer synchronizes the device during training. (#7737)\n\n### General bug fixes\nThis section lists bug fixes that are not specific to any language binding.\n* The `num_parallel_tree` is now a model parameter instead of a training hyper-parameter,\n  which fixes model IO with random forest. (#7751)\n* Fixes in CMake script for exporting configuration. (#7730)\n* XGBoost can now handle unsorted sparse input. This includes text file formats like\n  libsvm and scipy sparse matrix where column index might not be sorted. (#7731)\n* Fix tree param feature type, this affects inputs with the number of columns greater than\n  the maximum value of int32. (#7565)\n* Fix external memory with gpu_hist and subsampling. (#7481)\n* Check the number of trees in inplace predict, this avoids a potential segfault when an\n  incorrect value for `iteration_range` is provided. (#7409)\n* Fix non-stable result in cox regression (#7756)\n\n### Changes in the Python package\nOther than the changes in Dask, the XGBoost Python package gained some new features and\nimprovements along with small bug fixes.\n\n* Python 3.7 is required as the lowest Python version. (#7682)\n* Pre-built binary wheel for Apple Silicon. (#7621, #7612, #7747) Apple Silicon users will\n  now be able to run `pip install xgboost` to install XGBoost.\n* MacOS users no longer need to install `libomp` from Homebrew, as the XGBoost wheel now\n  bundles `libomp.dylib` library.\n* There are new parameters for users to specify the custom metric with new\n  behavior. XGBoost can now output transformed prediction values when a custom objective is\n  not supplied.  See our explanation in the\n  [tutorial](https://xgboost.readthedocs.io/en/latest/tutorials/custom_metric_obj.html#reverse-link-function)\n  for details.\n* For the sklearn interface, following the estimator guideline from scikit-learn, all\n  parameters in `fit` that are not related to input data are moved into the constructor\n  and can be set by `set_params`. (#6751, #7420, #7375, #7369)\n* Apache arrow format is now supported, which can bring better performance to users'\n  pipeline (#7512)\n* Pandas nullable types are now supported (#7760)\n* A new function `get_group` is introduced for `DMatrix` to allow users to get the group\n  information in the custom objective function. (#7564)\n* More training parameters are exposed in the sklearn interface instead of relying on the\n  `**kwargs`. (#7629)\n* A new attribute `feature_names_in_` is defined for all sklearn estimators like\n  `XGBRegressor` to follow the convention of sklearn. (#7526)\n* More work on Python type hint. (#7432, #7348, #7338, #7513, #7707)\n* Support the latest pandas Index type. (#7595)\n* Fix for Feature shape mismatch error on s390x platform (#7715)\n* Fix using feature names for constraints with multiple groups (#7711)\n* We clarified the behavior of the callback function when it contains mutable\n  states. (#7685)\n* Lastly, there are some code cleanups and maintenance work. (#7585, #7426, #7634, #7665,\n  #7667, #7377, #7360, #7498, #7438, #7667, #7752, #7749, #7751)\n\n### Changes in the Dask interface\n* Dask module now supports user-supplied host IP and port address of scheduler node.\n  Please see [introduction](https://xgboost.readthedocs.io/en/latest/tutorials/dask.html#troubleshooting) and\n  [API document](https://xgboost.readthedocs.io/en/latest/python/python_api.html#optional-dask-configuration)\n  for reference. (#7645, #7581)\n* Internal `DMatrix` construction in dask now honers thread configuration. (#7337)\n* A fix for `nthread` configuration using the Dask sklearn interface. (#7633)\n* The Dask interface can now handle empty partitions.  An empty partition is different\n  from an empty worker, the latter refers to the case when a worker has no partition of an\n  input dataset, while the former refers to some partitions on a worker that has zero\n  sizes. (#7644, #7510)\n* Scipy sparse matrix is supported as Dask array partition. (#7457)\n* Dask interface is no longer considered experimental. (#7509)\n\n### Changes in the R package\nThis section summarizes the new features, improvements, and bug fixes to the R package.\n\n* `load.raw` can optionally construct a booster as return. (#7686)\n* Fix parsing decision stump, which affects both transforming text representation to data\n  table and plotting. (#7689)\n* Implement feature weights. (#7660)\n* Some improvements for complying the CRAN release policy. (#7672, #7661, #7763)\n* Support CSR data for predictions (#7615)\n* Document update (#7263, #7606)\n* New maintainer for the CRAN package (#7691, #7649)\n* Handle non-standard installation of toolchain on macos (#7759)\n\n### Changes in JVM-packages\nSome new features for JVM-packages are introduced for a more integrated GPU pipeline and\nbetter compatibility with musl-based Linux. Aside from this, we have a few notable bug\nfixes.\n\n* User can specify the tracker IP address for training, which helps running XGBoost on\n  restricted network environments. (#7808)\n* Add support for detecting musl-based Linux (#7624)\n* Add `DeviceQuantileDMatrix` to Scala binding (#7459)\n* Add Rapids plugin support, now more of the JVM pipeline can be accelerated by RAPIDS (#7491, #7779, #7793, #7806)\n* The setters for CPU and GPU are more aligned (#7692, #7798)\n* Control logging for early stopping (#7326)\n* Do not repartition when nWorker = 1 (#7676)\n* Fix the prediction issue for `multi:softmax` (#7694)\n* Fix for serialization of custom objective and eval (#7274)\n* Update documentation about Python tracker (#7396)\n* Remove jackson from dependency, which fixes CVE-2020-36518. (#7791)\n* Some refactoring to the training pipeline for better compatibility between CPU and\n  GPU. (#7440, #7401, #7789, #7784)\n* Maintenance work. (#7550, #7335, #7641, #7523, #6792, #4676)\n\n### Deprecation\nOther than the changes in the Python package and serialization, we removed some deprecated\nfeatures in previous releases. Also, as mentioned in the previous section, we plan to\nphase out the old binary format in future releases.\n\n* Remove old warning in 1.3 (#7279)\n* Remove label encoder deprecated in 1.3. (#7357)\n* Remove old callback deprecated in 1.3. (#7280)\n* Pre-built binary will no longer support deprecated CUDA architectures including sm35 and\n  sm50. Users can continue to use these platforms with source build. (#7767)\n\n### Documentation\nThis section lists some of the general changes to XGBoost's document, for language binding\nspecific change please visit related sections.\n\n* Document is overhauled to use the new RTD theme, along with integration of Python\n  examples using Sphinx gallery. Also, we replaced most of the hard-coded URLs with sphinx\n  references. (#7347, #7346, #7468, #7522, #7530)\n* Small update along with fixes for broken links, typos, etc. (#7684, #7324, #7334, #7655,\n  #7628, #7623, #7487, #7532, #7500, #7341, #7648, #7311)\n* Update document for GPU. [skip ci] (#7403)\n* Document the status of RTD hosting. (#7353)\n* Update document for building from source. (#7664)\n* Add note about CRAN release [skip ci] (#7395)\n\n### Maintenance\nThis is a summary of maintenance work that is not specific to any language binding.\n\n* Add CMake option to use /MD runtime (#7277)\n* Add clang-format configuration. (#7383)\n* Code cleanups (#7539, #7536, #7466, #7499, #7533, #7735, #7722, #7668, #7304, #7293,\n  #7321, #7356, #7345, #7387, #7577, #7548, #7469, #7680, #7433, #7398)\n* Improved tests with better coverage and latest dependency (#7573, #7446, #7650, #7520,\n  #7373, #7723, #7611, #7771)\n* Improved automation of the release process. (#7278, #7332, #7470)\n* Compiler workarounds (#7673)\n* Change shebang used in CLI demo. (#7389)\n* Update affiliation (#7289)\n\n### CI\nSome fixes and update to XGBoost's CI infrastructure. (#7739, #7701, #7382, #7662, #7646,\n#7582, #7407, #7417, #7475, #7474, #7479, #7472, #7626)\n\n\n## v1.5.0 (2021 Oct 11)\n\nThis release comes with many exciting new features and optimizations, along with some bug\nfixes.  We will describe the experimental categorical data support and the external memory\ninterface independently. Package-specific new features will be listed in respective\nsections.\n\n### Development on categorical data support\nIn version 1.3, XGBoost introduced an experimental feature for handling categorical data\nnatively, without one-hot encoding. XGBoost can fit categorical splits in decision\ntrees. (Currently, the generated splits will be of form `x \\in {v}`, where the input is\ncompared to a single category value. A future version of XGBoost will generate splits that\ncompare the input against a list of multiple category values.)\n\nMost of the other features, including prediction, SHAP value computation, feature\nimportance, and model plotting were revised to natively handle categorical splits.  Also,\nall Python interfaces including native interface with and without quantized `DMatrix`,\nscikit-learn interface, and Dask interface now accept categorical data with a wide range\nof data structures support including numpy/cupy array and cuDF/pandas/modin dataframe.  In\npractice, the following are required for enabling categorical data support during\ntraining:\n\n  - Use Python package.\n  - Use `gpu_hist` to train the model.\n  - Use JSON model file format for saving the model.\n\nOnce the model is trained, it can be used with most of the features that are available on\nthe Python package.  For a quick introduction, see\nhttps://xgboost.readthedocs.io/en/latest/tutorials/categorical.html\n\nRelated PRs: (#7011, #7001, #7042, #7041, #7047, #7043, #7036, #7054, #7053, #7065, #7213, #7228, #7220, #7221, #7231, #7306)\n\n* Next steps\n\n\t- Revise the CPU training algorithm to handle categorical data natively and generate categorical splits\n\t- Extend the CPU and GPU algorithms to generate categorical splits of form `x \\in S`\n\twhere the input is compared with multiple category values.  split. (#7081)\n\n### External memory\nThis release features a brand-new interface and implementation for external memory (also\nknown as out-of-core training).  (#6901, #7064, #7088, #7089, #7087, #7092, #7070,\n#7216). The new implementation leverages the data iterator interface, which is currently\nused to create `DeviceQuantileDMatrix`. For a quick introduction, see\nhttps://xgboost.readthedocs.io/en/latest/tutorials/external_memory.html#data-iterator\n. During the development of this new interface, `lz4` compression is removed. (#7076).\nPlease note that external memory support is still experimental and not ready for\nproduction use yet.  All future development will focus on this new interface and users are\nadvised to migrate. (You are using the old interface if you are using a URL suffix to use\nexternal memory.)\n\n### New features in Python package\n* Support numpy array interface and all numeric types from numpy in `DMatrix`\n  construction and `inplace_predict` (#6998, #7003).  Now XGBoost no longer makes data\n  copy when input is numpy array view.\n* The early stopping callback in Python has a new `min_delta` parameter to control the\n  stopping behavior (#7137)\n* Python package now supports calculating feature scores for the linear model, which is\n  also available on R package. (#7048)\n* Python interface now supports configuring constraints using feature names instead of\n  feature indices.\n* Typehint support for more Python code including scikit-learn interface and rabit\n  module. (#6799, #7240)\n* Add tutorial for XGBoost-Ray (#6884)\n\n### New features in R package\n* In 1.4 we have a new prediction function in the C API which is used by the Python\n  package.  This release revises the R package to use the new prediction function as well.\n  A new parameter `iteration_range` for the predict function is available, which can be\n  used for specifying the range of trees for running prediction. (#6819, #7126)\n* R package now supports the `nthread` parameter in `DMatrix` construction. (#7127)\n\n### New features in JVM packages\n* Support GPU dataframe and `DeviceQuantileDMatrix` (#7195).  Constructing `DMatrix`\n  with GPU data structures and the interface for quantized `DMatrix` were first\n  introduced in the Python package and are now available in the xgboost4j package.\n* JVM packages now support saving and getting early stopping attributes. (#7095) Here is a\n  quick [example](https://github.com/dmlc/xgboost/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/EarlyStopping.java \"example\") in JAVA (#7252).\n\n### General new features\n* We now have a pre-built binary package for R on Windows with GPU support. (#7185)\n* CUDA compute capability 86 is now part of the default CMake build configuration with\n  newly added support for CUDA 11.4. (#7131, #7182, #7254)\n* XGBoost can be compiled using system CUB provided by CUDA 11.x installation. (#7232)\n\n### Optimizations\nThe performance for both `hist` and `gpu_hist` has been significantly improved in 1.5\nwith the following optimizations:\n* GPU multi-class model training now supports prediction cache. (#6860)\n* GPU histogram building is sped up and the overall training time is 2-3 times faster on\n  large datasets (#7180, #7198).  In addition, we removed the parameter `deterministic_histogram` and now\n  the GPU algorithm is always deterministic.\n* CPU hist has an optimized procedure for data sampling (#6922)\n* More performance optimization in regression and binary classification objectives on\n  CPU (#7206)\n* Tree model dump is now performed in parallel (#7040)\n\n### Breaking changes\n* `n_gpus` was deprecated in 1.0 release and is now removed.\n* Feature grouping in CPU hist tree method is removed, which was disabled long\n  ago. (#7018)\n* C API for Quantile DMatrix is changed to be consistent with the new external memory\n  implementation. (#7082)\n\n### Notable general bug fixes\n* XGBoost no long changes global CUDA device ordinal when `gpu_id` is specified (#6891,\n  #6987)\n* Fix `gamma` negative likelihood evaluation metric. (#7275)\n* Fix integer value of `verbose_eal` for `xgboost.cv` function in Python. (#7291)\n* Remove extra sync in CPU hist for dense data, which can lead to incorrect tree node\n  statistics. (#7120, #7128)\n* Fix a bug in GPU hist when data size is larger than `UINT32_MAX` with missing\n  values. (#7026)\n* Fix a thread safety issue in prediction with the `softmax` objective. (#7104)\n* Fix a thread safety issue in CPU SHAP value computation. (#7050) Please note that all\n  prediction functions in Python are thread-safe.\n* Fix model slicing. (#7149, #7078)\n* Workaround a bug in old GCC which can lead to segfault during construction of\n  DMatrix. (#7161)\n* Fix histogram truncation in GPU hist, which can lead to slightly-off results. (#7181)\n* Fix loading GPU linear model pickle files on CPU-only machine. (#7154)\n* Check input value is duplicated when CPU quantile queue is full (#7091)\n* Fix parameter loading with training continuation. (#7121)\n* Fix CMake interface for exposing C library by specifying dependencies. (#7099)\n* Callback and early stopping are explicitly disabled for the scikit-learn interface\n  random forest estimator. (#7236)\n* Fix compilation error on x86 (32-bit machine) (#6964)\n* Fix CPU memory usage with extremely sparse datasets (#7255)\n* Fix a bug in GPU multi-class AUC implementation with weighted data (#7300)\n\n### Python package\nOther than the items mentioned in the previous sections, there are some Python-specific\nimprovements.\n* Change development release postfix to `dev` (#6988)\n* Fix early stopping behavior with MAPE metric (#7061)\n* Fixed incorrect feature mismatch error message (#6949)\n* Add predictor to skl constructor. (#7000, #7159)\n* Re-enable feature validation in predict proba. (#7177)\n* scikit learn interface regression estimator now can pass the scikit-learn estimator\n  check and is fully compatible with scikit-learn utilities.  `__sklearn_is_fitted__` is\n  implemented as part of the changes (#7130, #7230)\n* Conform the latest pylint. (#7071, #7241)\n* Support latest panda range index in DMatrix construction. (#7074)\n* Fix DMatrix construction from pandas series. (#7243)\n* Fix typo and grammatical mistake in error message (#7134)\n* [dask] disable work stealing explicitly for training tasks (#6794)\n* [dask] Set dataframe index in predict. (#6944)\n* [dask] Fix prediction on df with latest dask. (#6969)\n* [dask] Fix dask predict on `DaskDMatrix` with `iteration_range`. (#7005)\n* [dask] Disallow importing non-dask estimators from xgboost.dask (#7133)\n\n### R package\nImprovements other than new features on R package:\n* Optimization for updating R handles in-place (#6903)\n* Removed the magrittr dependency. (#6855, #6906, #6928)\n* The R package now hides all C++ symbols to avoid conflicts. (#7245)\n* Other maintenance including code cleanups, document updates. (#6863, #6915, #6930, #6966, #6967)\n\n### JVM packages\nImprovements other than new features on JVM packages:\n* Constructors with implicit missing value are deprecated due to confusing behaviors. (#7225)\n* Reduce scala-compiler, scalatest dependency scopes (#6730)\n* Making the Java library loader emit helpful error messages on missing dependencies. (#6926)\n* JVM packages now use the Python tracker in XGBoost instead of dmlc.  The one in XGBoost\n  is shared between JVM packages and Python Dask and enjoys better maintenance (#7132)\n* Fix \"key not found: train\" error (#6842)\n* Fix model loading from stream (#7067)\n\n### General document improvements\n* Overhaul the installation documents. (#6877)\n* A few demos are added for AFT with dask (#6853), callback with dask (#6995), inference\n  in C (#7151), `process_type`. (#7135)\n* Fix PDF format of document. (#7143)\n* Clarify the behavior of `use_rmm`. (#6808)\n* Clarify prediction function. (#6813)\n* Improve tutorial on feature interactions (#7219)\n* Add small example for dask sklearn interface. (#6970)\n* Update Python intro.  (#7235)\n* Some fixes/updates (#6810, #6856, #6935, #6948, #6976, #7084, #7097, #7170, #7173, #7174, #7226, #6979, #6809, #6796, #6979)\n\n### Maintenance\n* Some refactoring around CPU hist, which lead to better performance but are listed under general maintenance tasks:\n  - Extract evaluate splits from CPU hist. (#7079)\n  - Merge lossgude and depthwise strategies for CPU hist (#7007)\n  - Simplify sparse and dense CPU hist kernels (#7029)\n  - Extract histogram builder from CPU Hist. (#7152)\n\n* Others\n  - Fix `gpu_id` with custom objective. (#7015)\n  - Fix typos in AUC. (#6795)\n  - Use constexpr in `dh::CopyIf`. (#6828)\n  - Update dmlc-core. (#6862)\n  - Bump version to 1.5.0 snapshot in master. (#6875)\n  - Relax shotgun test. (#6900)\n  - Guard against index error in prediction. (#6982)\n  - Hide symbols in CI build + hide symbols for C and CUDA (#6798)\n  - Persist data in dask test. (#7077)\n  - Fix typo in arguments of PartitionBuilder::Init (#7113)\n  - Fix typo in src/common/hist.cc BuildHistKernel (#7116)\n  - Use upstream URI in distributed quantile tests. (#7129)\n  - Include cpack (#7160)\n  - Remove synchronization in monitor. (#7164)\n  - Remove unused code. (#7175)\n  - Fix building on CUDA 11.0. (#7187)\n  - Better error message for `ncclUnhandledCudaError`. (#7190)\n  - Add noexcept to JSON objects. (#7205)\n  - Improve wording for warning (#7248)\n  - Fix typo in release script. [skip ci] (#7238)\n  - Relax shotgun test. (#6918)\n  - Relax test for decision stump in distributed environment. (#6919)\n  -\t[dask] speed up tests (#7020)\n\n### CI\n* [CI] Rotate access keys for uploading MacOS artifacts from Travis CI (#7253)\n* Reduce Travis environment setup time. (#6912)\n* Restore R cache on github action. (#6985)\n* [CI] Remove stray build artifact to avoid error in artifact packaging (#6994)\n* [CI] Move appveyor tests to action (#6986)\n* Remove appveyor badge. [skip ci] (#7035)\n* [CI] Configure RAPIDS, dask, modin (#7033)\n* Test on s390x. (#7038)\n* [CI] Upgrade to CMake 3.14 (#7060)\n* [CI] Update R cache. (#7102)\n* [CI] Pin libomp to 11.1.0  (#7107)\n* [CI] Upgrade build image to CentOS 7 + GCC 8; require CUDA 10.1 and later (#7141)\n* [dask] Work around segfault in prediction. (#7112)\n* [dask] Remove the workaround for segfault. (#7146)\n* [CI] Fix hanging Python setup in Windows CI (#7186)\n* [CI] Clean up in beginning of each task in Win CI (#7189)\n* Fix travis. (#7237)\n\n### Acknowledgement\n* **Contributors**: Adam Pocock (@Craigacp), Jeff H (@JeffHCross), Johan Hansson (@JohanWork), Jose Manuel Llorens (@JoseLlorensRipolles), Benjamin Szőke (@Livius90), @ReeceGoding, @ShvetsKS, Robert Zabel (@ZabelTech), Ali (@ali5h), Andrew Ziem (@az0), Andy Adinets (@canonizer), @david-cortes, Daniel Saxton (@dsaxton), Emil Sadek (@esadek), @farfarawayzyt, Gil Forsyth (@gforsyth), @giladmaya, @graue70, Philip Hyunsu Cho (@hcho3), James Lamb (@jameslamb), José Morales (@jmoralez), Kai Fricke (@krfricke), Christian Lorentzen (@lorentzenchr), Mads R. B. Kristensen (@madsbk), Anton Kostin (@masguit42), Martin Petříček (@mpetricek-corp), @naveenkb, Taewoo Kim (@oOTWK), Viktor Szathmáry (@phraktle), Robert Maynard (@robertmaynard), TP Boudreau (@tpboudreau), Jiaming Yuan (@trivialfis), Paul Taylor (@trxcllnt), @vslaykovsky, Bobby Wang (@wbo4958),\n* **Reviewers**: Nan Zhu (@CodingCat), Adam Pocock (@Craigacp), Jose Manuel Llorens (@JoseLlorensRipolles), Kodi Arfer (@Kodiologist), Benjamin Szőke (@Livius90), Mark Guryanov (@MarkGuryanov), Rory Mitchell (@RAMitchell), @ReeceGoding, @ShvetsKS, Egor Smirnov (@SmirnovEgorRu), Andrew Ziem (@az0), @candalfigomoro, Andy Adinets (@canonizer), Dante Gama Dessavre (@dantegd), @david-cortes, Daniel Saxton (@dsaxton), @farfarawayzyt, Gil Forsyth (@gforsyth), Harutaka Kawamura (@harupy), Philip Hyunsu Cho (@hcho3), @jakirkham, James Lamb (@jameslamb), José Morales (@jmoralez), James Bourbeau (@jrbourbeau), Christian Lorentzen (@lorentzenchr), Martin Petříček (@mpetricek-corp), Nikolay Petrov (@napetrov), @naveenkb, Viktor Szathmáry (@phraktle), Robin Teuwens (@rteuwens), Yuan Tang (@terrytangyuan), TP Boudreau (@tpboudreau), Jiaming Yuan (@trivialfis), @vkuzmin-uber, Bobby Wang (@wbo4958), William Hicks (@wphicks)\n\n\n## v1.4.2 (2021.05.13)\nThis is a patch release for Python package with following fixes:\n\n* Handle the latest version of cupy.ndarray in inplace_predict. (#6933)\n* Ensure output array from predict_leaf is (n_samples, ) when there's only 1 tree. 1.4.0 outputs (n_samples, 1). (#6889)\n* Fix empty dataset handling with multi-class AUC. (#6947)\n* Handle object type from pandas in inplace_predict. (#6927)\n\n\n## v1.4.1 (2021.04.20)\nThis is a bug fix release.\n\n* Fix GPU implementation of AUC on some large datasets. (#6866)\n\n## v1.4.0 (2021.04.12)\n\n### Introduction of pre-built binary package for R, with GPU support\nStarting with release 1.4.0, users now have the option of installing `{xgboost}` without\nhaving to build it from the source. This is particularly advantageous for users who want\nto take advantage of the GPU algorithm (`gpu_hist`), as previously they'd have to build\n`{xgboost}` from the source using CMake and NVCC. Now installing `{xgboost}` with GPU\nsupport is as easy as: `R CMD INSTALL ./xgboost_r_gpu_linux.tar.gz`. (#6827)\n\nSee the instructions at https://xgboost.readthedocs.io/en/latest/build.html\n\n### Improvements on prediction functions\nXGBoost has many prediction types including shap value computation and inplace prediction.\nIn 1.4 we overhauled the underlying prediction functions for C API and Python API with an\nunified interface. (#6777, #6693, #6653, #6662, #6648, #6668, #6804)\n* Starting with 1.4, sklearn interface prediction will use inplace predict by default when\n  input data is supported.\n* Users can use inplace predict with `dart` booster and enable GPU acceleration just\n  like `gbtree`.\n* Also all prediction functions with tree models are now thread-safe.  Inplace predict is\n  improved with `base_margin` support.\n* A new set of C predict functions are exposed in the public interface.\n* A user-visible change is a newly added parameter called `strict_shape`.  See\n  https://xgboost.readthedocs.io/en/latest/prediction.html for more details.\n\n\n### Improvement on Dask interface\n* Starting with 1.4, the Dask interface is considered to be feature-complete, which means\n  all of the models found in the single node Python interface are now supported in Dask,\n  including but not limited to ranking and random forest.  Also, the prediction function\n  is significantly faster and supports shap value computation.\n  - Most of the parameters found in single node sklearn interface are supported by\n    Dask interface. (#6471, #6591)\n  - Implements learning to rank.  On the Dask interface, we use the newly added support of\n    query ID to enable group structure. (#6576)\n  - The Dask interface has Python type hints support. (#6519)\n  - All models can be safely pickled. (#6651)\n  - Random forest estimators are now supported. (#6602)\n  - Shap value computation is now supported. (#6575, #6645, #6614)\n  - Evaluation result is printed on the scheduler process. (#6609)\n  - `DaskDMatrix` (and device quantile dmatrix) now accepts all meta-information. (#6601)\n\n* Prediction optimization.  We enhanced and speeded up the prediction function for the\n  Dask interface.  See the latest Dask tutorial page in our document for an overview of\n  how you can optimize it even further. (#6650, #6645, #6648, #6668)\n\n* Bug fixes\n  - If you are using the latest Dask and distributed where `distributed.MultiLock` is\n    present, XGBoost supports training multiple models on the same cluster in\n    parallel. (#6743)\n  - A bug fix for when using `dask.client` to launch async task, XGBoost might use a\n    different client object internally. (#6722)\n\n* Other improvements on documents, blogs, tutorials, and demos. (#6389, #6366, #6687,\n  #6699, #6532, #6501)\n\n### Python package\nWith changes from Dask and general improvement on prediction, we have made some\nenhancements on the general Python interface and IO for booster information.  Starting\nfrom 1.4, booster feature names and types can be saved into the JSON model.  Also some\nmodel attributes like `best_iteration`, `best_score` are restored upon model load.  On\nsklearn interface, some attributes are now implemented as Python object property with\nbetter documents.\n\n* Breaking change: All `data` parameters in prediction functions are renamed to `X`\n  for better compliance to sklearn estimator interface guidelines.\n* Breaking change: XGBoost used to generate some pseudo feature names with `DMatrix`\n  when inputs like `np.ndarray` don't have column names.  The procedure is removed to\n  avoid conflict with other inputs. (#6605)\n* Early stopping with training continuation is now supported. (#6506)\n* Optional import for Dask and cuDF are now lazy. (#6522)\n* As mentioned in the prediction improvement summary, the sklearn interface uses inplace\n  prediction whenever possible. (#6718)\n* Booster information like feature names and feature types are now saved into the JSON\n  model file. (#6605)\n* All `DMatrix` interfaces including `DeviceQuantileDMatrix` and counterparts in Dask\n  interface (as mentioned in the Dask changes summary) now accept all the meta-information\n  like `group` and `qid` in their constructor for better consistency. (#6601)\n* Booster attributes are restored upon model load so users don't have to call `attr`\n  manually. (#6593)\n* On sklearn interface, all models accept `base_margin` for evaluation datasets. (#6591)\n* Improvements over the setup script including smaller sdist size and faster installation\n  if the C++ library is already built (#6611, #6694, #6565).\n\n* Bug fixes for Python package:\n  - Don't validate feature when number of rows is 0. (#6472)\n  - Move metric configuration into booster. (#6504)\n  - Calling XGBModel.fit() should clear the Booster by default (#6562)\n  - Support `_estimator_type`. (#6582)\n  - [dask, sklearn] Fix predict proba. (#6566, #6817)\n  - Restore unknown data support. (#6595)\n  - Fix learning rate scheduler with cv. (#6720)\n  - Fixes small typo in sklearn documentation (#6717)\n  - [python-package] Fix class Booster: feature_types = None (#6705)\n  - Fix divide by 0 in feature importance when no split is found. (#6676)\n\n\n### JVM package\n* [jvm-packages] fix early stopping doesn't work even without custom_eval setting (#6738)\n* fix potential TaskFailedListener's callback won't be called (#6612)\n* [jvm] Add ability to load booster direct from byte array (#6655)\n* [jvm-packages] JVM library loader extensions (#6630)\n\n### R package\n* R documentation: Make construction of DMatrix consistent.\n* Fix R documentation for xgb.train. (#6764)\n\n### ROC-AUC\nWe re-implemented the ROC-AUC metric in XGBoost.  The new implementation supports\nmulti-class classification and has better support for learning to rank tasks that are not\nbinary.  Also, it has a better-defined average on distributed environments with additional\nhandling for invalid datasets. (#6749, #6747, #6797)\n\n### Global configuration.\nStarting from 1.4, XGBoost's Python, R and C interfaces support a new global configuration\nmodel where users can specify some global parameters.  Currently, supported parameters are\n`verbosity` and `use_rmm`.  The latter is experimental, see rmm plugin demo and\nrelated README file for details. (#6414, #6656)\n\n### Other New features.\n* Better handling for input data types that support `__array_interface__`.  For some\n  data types including GPU inputs and `scipy.sparse.csr_matrix`, XGBoost employs\n  `__array_interface__` for processing the underlying data.  Starting from 1.4, XGBoost\n  can accept arbitrary array strides (which means column-major is supported) without\n  making data copies, potentially reducing a significant amount of memory consumption.\n  Also version 3 of `__cuda_array_interface__` is now supported.  (#6776, #6765, #6459,\n  #6675)\n* Improved parameter validation, now feeding XGBoost with parameters that contain\n  whitespace will trigger an error. (#6769)\n* For Python and R packages, file paths containing the home indicator `~` are supported.\n* As mentioned in the Python changes summary, the JSON model can now save feature\n  information of the trained booster.  The JSON schema is updated accordingly. (#6605)\n* Development of categorical data support is continued.  Newly added weighted data support\n  and `dart` booster support. (#6508, #6693)\n* As mentioned in Dask change summary, ranking now supports the `qid` parameter for\n  query groups. (#6576)\n* `DMatrix.slice` can now consume a numpy array. (#6368)\n\n### Other breaking changes\n* Aside from the feature name generation, there are 2 breaking changes:\n  - Drop saving binary format for memory snapshot. (#6513, #6640)\n  - Change default evaluation metric for binary:logitraw objective to logloss (#6647)\n\n### CPU Optimization\n* Aside from the general changes on predict function, some optimizations are applied on\n  CPU implementation. (#6683, #6550, #6696, #6700)\n* Also performance for sampling initialization in `hist` is improved. (#6410)\n\n### Notable fixes in the core library\nThese fixes do not reside in particular language bindings:\n* Fixes for gamma regression.  This includes checking for invalid input values, fixes for\n  gamma deviance metric, and better floating point guard for gamma negative log-likelihood\n  metric. (#6778, #6537, #6761)\n* Random forest with `gpu_hist` might generate low accuracy in previous versions. (#6755)\n* Fix a bug in GPU sketching when data size exceeds limit of 32-bit integer. (#6826)\n* Memory consumption fix for row-major adapters (#6779)\n* Don't estimate sketch batch size when rmm is used. (#6807) (#6830)\n* Fix in-place predict with missing value. (#6787)\n* Re-introduce double buffer in UpdatePosition, to fix perf regression in gpu_hist (#6757)\n* Pass correct split_type to GPU predictor (#6491)\n* Fix DMatrix feature names/types IO. (#6507)\n* Use view for `SparsePage` exclusively to avoid some data access races. (#6590)\n* Check for invalid data. (#6742)\n* Fix relocatable include in CMakeList (#6734) (#6737)\n* Fix DMatrix slice with feature types. (#6689)\n\n### Other deprecation notices:\n\n* This release will be the last release to support CUDA 10.0. (#6642)\n\n* Starting in the next release, the Python package will require Pip 19.3+ due to the use\n  of manylinux2014 tag. Also, CentOS 6, RHEL 6 and other old distributions will not be\n  supported.\n\n### Known issue:\n\nMacOS build of the JVM packages doesn't support multi-threading out of the box. To enable\nmulti-threading with JVM packages, MacOS users will need to build the JVM packages from\nthe source. See https://xgboost.readthedocs.io/en/latest/jvm/index.html#installation-from-source\n\n\n### Doc\n* Dedicated page for `tree_method` parameter is added. (#6564, #6633)\n* [doc] Add FLAML as a fast tuning tool for XGBoost  (#6770)\n* Add document for tests directory. [skip ci] (#6760)\n* Fix doc string of config.py to use correct `versionadded` (#6458)\n* Update demo for prediction. (#6789)\n* [Doc] Document that AUCPR is for binary classification/ranking (#5899)\n* Update the C API comments (#6457)\n* Fix document. [skip ci] (#6669)\n\n### Maintenance: Testing, continuous integration\n* Use CPU input for test_boost_from_prediction. (#6818)\n* [CI] Upload xgboost4j.dll to S3 (#6781)\n* Update dmlc-core submodule (#6745)\n* [CI] Use manylinux2010_x86_64 container to vendor libgomp (#6485)\n* Add conda-forge badge (#6502)\n* Fix merge conflict. (#6512)\n* [CI] Split up main.yml, add mypy. (#6515)\n* [Breaking] Upgrade cuDF and RMM to 0.18 nightlies; require RMM 0.18+ for RMM plugin (#6510)\n* \"featue_map\" typo changed to  \"feature_map\" (#6540)\n* Add script for generating release tarball. (#6544)\n* Add credentials to .gitignore (#6559)\n* Remove warnings in tests. (#6554)\n* Update dmlc-core submodule and conform to new API (#6431)\n* Suppress hypothesis health check for dask client. (#6589)\n* Fix pylint. (#6714)\n* [CI] Clear R package cache (#6746)\n* Exclude dmlc test on github action. (#6625)\n* Tests for regression metrics with weights. (#6729)\n* Add helper script and doc for releasing pip package. (#6613)\n* Support pylint 2.7.0 (#6726)\n* Remove R cache in github action. (#6695)\n* [CI] Do not mix up stashed executable built for ARM and x86_64 platforms (#6646)\n* [CI] Add ARM64 test to Jenkins pipeline (#6643)\n* Disable s390x and arm64 tests on travis for now. (#6641)\n* Move sdist test to action. (#6635)\n* [dask] Rework base margin test. (#6627)\n\n\n### Maintenance: Refactor code for legibility and maintainability\n* Improve OpenMP exception handling (#6680)\n* Improve string view to reduce string allocation. (#6644)\n* Simplify Span checks. (#6685)\n* Use generic dispatching routine for array interface. (#6672)\n\n\n## v1.3.0 (2020.12.08)\n\n### XGBoost4J-Spark: Exceptions should cancel jobs gracefully instead of killing SparkContext (#6019).\n* By default, exceptions in XGBoost4J-Spark causes the whole SparkContext to shut down, necessitating the restart of the Spark cluster. This behavior is often a major inconvenience.\n* Starting from 1.3.0 release, XGBoost adds a new parameter `killSparkContextOnWorkerFailure` to optionally prevent killing SparkContext. If this parameter is set, exceptions will gracefully cancel training jobs instead of killing SparkContext.\n\n### GPUTreeSHAP: GPU acceleration of the TreeSHAP algorithm (#6038, #6064, #6087, #6099, #6163, #6281, #6332)\n* [SHAP (SHapley Additive exPlanations)](https://github.com/slundberg/shap) is a game theoretic approach to explain predictions of machine learning models. It computes feature importance scores for individual examples, establishing how each feature influences a particular prediction. TreeSHAP is an optimized SHAP algorithm specifically designed for decision tree ensembles.\n* Starting with 1.3.0 release, it is now possible to leverage CUDA-capable GPUs to accelerate the TreeSHAP algorithm. Check out [the demo notebook](https://github.com/dmlc/xgboost/blob/master/demo/gpu_acceleration/shap.ipynb).\n* The CUDA implementation of the TreeSHAP algorithm is hosted at [rapidsai/GPUTreeSHAP](https://github.com/rapidsai/gputreeshap). XGBoost imports it as a Git submodule.\n\n### New style Python callback API (#6199, #6270, #6320, #6348, #6376, #6399, #6441)\n* The XGBoost Python package now offers a re-designed callback API. The new callback API lets you design various extensions of training in idomatic Python. In addition, the new callback API allows you to use early stopping with the native Dask API (`xgboost.dask`). Check out [the tutorial](https://xgboost.readthedocs.io/en/release_1.3.0/python/callbacks.html) and [the demo](https://github.com/dmlc/xgboost/blob/master/demo/guide-python/callbacks.py).\n\n### Enable the use of `DeviceQuantileDMatrix` / `DaskDeviceQuantileDMatrix` with large data (#6201, #6229, #6234).\n* `DeviceQuantileDMatrix` can achieve memory saving by avoiding extra copies of the training data, and the saving is bigger for large data. Unfortunately, large data with more than 2^31 elements was triggering integer overflow bugs in CUB and Thrust. Tracking issue: #6228.\n* This release contains a series of work-arounds to allow the use of `DeviceQuantileDMatrix` with large data:\n  - Loop over `copy_if` (#6201)\n  - Loop over `thrust::reduce` (#6229)\n  - Implement the inclusive scan algorithm in-house, to handle large offsets (#6234)\n\n### Support slicing of tree models (#6302)\n* Accessing the best iteration of a model after the application of early stopping used to be error-prone, need to manually pass the `ntree_limit` argument to the `predict()` function.\n* Now we provide a simple interface to slice tree models by specifying a range of boosting rounds. The tree ensemble can be split into multiple sub-ensembles via the slicing interface. Check out [an example](https://xgboost.readthedocs.io/en/release_1.3.0/python/model.html).\n* In addition, the early stopping callback now supports `save_best` option. When enabled, XGBoost will save (persist) the model at the best boosting round and discard the trees that were fit subsequent to the best round.\n\n### Weighted subsampling of features (columns) (#5962)\n* It is now possible to sample features (columns) via weighted subsampling, in which features with higher weights are more likely to be selected in the sample. Weighted subsampling allows you to encode domain knowledge by emphasizing a particular set of features in the choice of tree splits. In addition, you can prevent particular features from being used in any splits, by assigning them zero weights.\n* Check out [the demo](https://github.com/dmlc/xgboost/blob/master/demo/guide-python/feature_weights.py).\n\n### Improved integration with Dask\n* Support reverse-proxy environment such as Google Kubernetes Engine (#6343, #6475)\n* An XGBoost training job will no longer use all available workers. Instead, it will only use the workers that contain input data (#6343).\n* The new callback API works well with the Dask training API.\n* The `predict()` and `fit()` function of `DaskXGBClassifier` and `DaskXGBRegressor` now accept a base margin (#6155).\n* Support more meta data in the Dask API (#6130, #6132, #6333).\n* Allow passing extra keyword arguments as `kwargs` in `predict()` (#6117)\n* Fix typo in dask interface: `sample_weights` -> `sample_weight` (#6240)\n* Allow empty data matrix in AFT survival, as Dask may produce empty partitions (#6379)\n* Speed up prediction by overlapping prediction jobs in all workers (#6412)\n\n### Experimental support for direct splits with categorical features (#6028, #6128, #6137, #6140, #6164, #6165, #6166, #6179, #6194, #6219)\n* Currently, XGBoost requires users to one-hot-encode categorical variables. This has adverse performance implications, as the creation of many dummy variables results into higher memory consumption and may require fitting deeper trees to achieve equivalent model accuracy.\n* The 1.3.0 release of XGBoost contains an experimental support for direct handling of categorical variables in test nodes. Each test node will have the condition of form `feature_value \\in match_set`, where the `match_set` on the right hand side contains one or more matching categories. The matching categories in `match_set` represent the condition for traversing to the right child node. Currently, XGBoost will only generate categorical splits with only a single matching category (\"one-vs-rest split\"). In a future release, we plan to remove this restriction and produce splits with multiple matching categories in `match_set`.\n* The categorical split requires the use of JSON model serialization. The legacy binary serialization method cannot be used to save (persist) models with categorical splits.\n* Note. This feature is currently highly experimental. Use it at your own risk. See the detailed list of limitations at [#5949](https://github.com/dmlc/xgboost/pull/5949).\n\n### Experimental plugin for RAPIDS Memory Manager (#5873, #6131, #6146, #6150, #6182)\n* RAPIDS Memory Manager library ([rapidsai/rmm](https://github.com/rapidsai/rmm)) provides a collection of efficient memory allocators for NVIDIA GPUs. It is now possible to use XGBoost with memory allocators provided by RMM, by enabling the RMM integration plugin. With this plugin, XGBoost is now able to share a common GPU memory pool with other applications using RMM, such as the RAPIDS data science packages.\n* See [the demo](https://github.com/dmlc/xgboost/blob/master/demo/rmm_plugin/README.md) for a working example, as well as directions for building XGBoost with the RMM plugin.\n* The plugin will be soon considered non-experimental, once #6297 is resolved.\n\n### Experimental plugin for oneAPI programming model (#5825)\n* oneAPI is a programming interface developed by Intel aimed at providing one programming model for many types of hardware such as CPU, GPU, FGPA and other hardware accelerators.\n* XGBoost now includes an experimental plugin for using oneAPI for the predictor and objective functions. The plugin is hosted in the directory `plugin/updater_oneapi`.\n* Roadmap: #5442\n\n### Pickling the XGBoost model will now trigger JSON serialization (#6027)\n* The pickle will now contain the JSON string representation of the XGBoost model, as well as related configuration.\n\n### Performance improvements\n* Various performance improvement on multi-core CPUs\n  - Optimize DMatrix build time by up to 3.7x. (#5877)\n  - CPU predict performance improvement, by up to 3.6x. (#6127)\n  - Optimize CPU sketch allreduce for sparse data (#6009)\n  - Thread local memory allocation for BuildHist, leading to speedup up to 1.7x. (#6358)\n  - Disable hyperthreading for DMatrix creation (#6386). This speeds up DMatrix creation by up to 2x.\n  - Simple fix for static shedule in predict (#6357)\n* Unify thread configuration, to make it easy to utilize all CPU cores (#6186)\n* [jvm-packages] Clean the way deterministic paritioning is computed (#6033)\n* Speed up JSON serialization by implementing an intrusive pointer class (#6129). It leads to 1.5x-2x performance boost.\n\n### API additions\n* [R] Add SHAP summary plot using ggplot2 (#5882)\n* Modin DataFrame can now be used as input (#6055)\n* [jvm-packages] Add `getNumFeature` method (#6075)\n* Add MAPE metric (#6119)\n* Implement GPU predict leaf. (#6187)\n* Enable cuDF/cuPy inputs in `XGBClassifier` (#6269)\n* Document tree method for feature weights. (#6312)\n* Add `fail_on_invalid_gpu_id` parameter, which will cause XGBoost to terminate upon seeing an invalid value of `gpu_id` (#6342)\n\n### Breaking: the default evaluation metric for classification is changed to `logloss` / `mlogloss` (#6183)\n* The default metric used to be accuracy, and it is not statistically consistent to perform early stopping with the accuracy metric when we are really optimizing the log loss for the `binary:logistic` objective.\n* For statistical consistency, the default metric for classification has been changed to `logloss`. Users may choose to preserve the old behavior by explicitly specifying `eval_metric`.\n\n### Breaking: `skmaker` is now removed (#5971)\n* The `skmaker` updater has not been documented nor tested.\n\n### Breaking: the JSON model format no longer stores the leaf child count (#6094).\n* The leaf child count field has been deprecated and is not used anywhere in the XGBoost codebase.\n\n### Breaking: XGBoost now requires MacOS 10.14 (Mojave) and later.\n* Homebrew has dropped support for MacOS 10.13 (High Sierra), so we are not able to install the OpenMP runtime (`libomp`) from Homebrew on MacOS 10.13. Please use MacOS 10.14 (Mojave) or later.\n\n### Deprecation notices\n* The use of `LabelEncoder` in `XGBClassifier` is now deprecated and will be removed in the next minor release (#6269). The deprecation is necessary to support multiple types of inputs, such as cuDF data frames or cuPy arrays.\n* The use of certain positional arguments in the Python interface is deprecated (#6365). Users will use deprecation warnings for the use of position arguments for certain function parameters. New code should use keyword arguments as much as possible. We have not yet decided when we will fully require the use of keyword arguments.\n\n### Bug-fixes\n* On big-endian arch, swap the byte order in the binary serializer to enable loading models that were produced by a little-endian machine (#5813).\n* [jvm-packages] Fix deterministic partitioning with dataset containing Double.NaN (#5996)\n* Limit tree depth for GPU hist to 31 to prevent integer overflow (#6045)\n* [jvm-packages] Set `maxBins` to 256 to align with the default value in the C++ code (#6066)\n* [R] Fix CRAN check (#6077)\n* Add back support for `scipy.sparse.coo_matrix` (#6162)\n* Handle duplicated values in sketching. (#6178)\n* Catch all standard exceptions in C API. (#6220)\n* Fix linear GPU input (#6255)\n* Fix inplace prediction interval. (#6259)\n* [R] allow `xgb.plot.importance()` calls to fill a grid (#6294)\n* Lazy import dask libraries. (#6309)\n* Deterministic data partitioning for external memory (#6317)\n* Avoid resetting seed for every configuration. (#6349)\n* Fix label errors in graph visualization (#6369)\n* [jvm-packages] fix potential unit test suites aborted issue due to race condition (#6373)\n* [R] Fix warnings from `R check --as-cran` (#6374)\n* [R] Fix a crash that occurs with noLD R (#6378)\n* [R] Do not convert continuous labels to factors (#6380)\n* [R] remove uses of `exists()` (#6387)\n* Propagate parameters to the underlying `Booster` handle from `XGBClassifier.set_param` / `XGBRegressor.set_param`. (#6416)\n* [R] Fix R package installation via CMake (#6423)\n* Enforce row-major order in cuPy array (#6459)\n* Fix filtering callable objects in the parameters passed to the scikit-learn API. (#6466)\n\n### Maintenance: Testing, continuous integration, build system\n* [CI] Improve JVM test in GitHub Actions (#5930)\n* Refactor plotting test so that it can run independently (#6040)\n* [CI] Cancel builds on subsequent pushes (#6011)\n* Fix Dask Pytest fixture (#6024)\n* [CI] Migrate linters to GitHub Actions (#6035)\n* [CI] Remove win2016 JVM test from GitHub Actions (#6042)\n* Fix CMake build with `BUILD_STATIC_LIB` option (#6090)\n* Don't link imported target in CMake (#6093)\n* Work around a compiler bug in MacOS AppleClang 11 (#6103)\n* [CI] Fix CTest by running it in a correct directory (#6104)\n* [R] Check warnings explicitly for model compatibility tests (#6114)\n* [jvm-packages] add xgboost4j-gpu/xgboost4j-spark-gpu module to facilitate release (#6136)\n* [CI] Time GPU tests. (#6141)\n* [R] remove warning in configure.ac (#6152)\n* [CI] Upgrade cuDF and RMM to 0.16 nightlies; upgrade to Ubuntu 18.04 (#6157)\n* [CI] Test C API demo (#6159)\n* Option for generating device debug info. (#6168)\n* Update `.gitignore` (#6175, #6193, #6346)\n* Hide C++ symbols from dmlc-core (#6188)\n* [CI] Added arm64 job in Travis-CI (#6200)\n* [CI] Fix Docker build for CUDA 11 (#6202)\n* [CI] Move non-OpenMP gtest to GitHub Actions (#6210)\n* [jvm-packages] Fix up build for xgboost4j-gpu, xgboost4j-spark-gpu (#6216)\n* Add more tests for categorical data support (#6219)\n* [dask] Test for data initializaton. (#6226)\n* Bump junit from 4.11 to 4.13.1 in /jvm-packages/xgboost4j (#6230)\n* Bump junit from 4.11 to 4.13.1 in /jvm-packages/xgboost4j-gpu (#6233)\n* [CI] Reduce testing load with RMM (#6249)\n* [CI] Build a Python wheel for aarch64 platform (#6253)\n* [CI] Time the CPU tests on Jenkins. (#6257)\n* [CI] Skip Dask tests on ARM. (#6267)\n* Fix a typo in `is_arm()` in testing.py (#6271)\n* [CI] replace `egrep` with `grep -E` (#6287)\n* Support unity build. (#6295)\n* [CI] Mark flaky tests as XFAIL (#6299)\n* [CI] Use separate Docker cache for each CUDA version (#6305)\n* Added `USE_NCCL_LIB_PATH` option to enable user to set `NCCL_LIBRARY` during build  (#6310)\n* Fix flaky data initialization test. (#6318)\n* Add a badge for GitHub Actions (#6321)\n* Optional `find_package` for sanitizers. (#6329)\n* Use pytest conventions consistently in Python tests (#6337)\n* Fix missing space in warning message (#6340)\n* Update `custom_metric_obj.rst` (#6367)\n* [CI] Run R check with `--as-cran` flag on GitHub Actions (#6371)\n* [CI] Remove R check from Jenkins (#6372)\n* Mark GPU external memory test as XFAIL. (#6381)\n* [CI] Add noLD R test (#6382)\n* Fix MPI build. (#6403)\n* [CI] Upgrade to MacOS Mojave image (#6406)\n* Fix flaky sparse page dmatrix test. (#6417)\n* [CI] Upgrade cuDF and RMM to 0.17 nightlies (#6434)\n* [CI] Fix CentOS 6 Docker images (#6467)\n* [CI] Vendor libgomp in the manylinux Python wheel (#6461)\n* [CI] Hot fix for libgomp vendoring (#6482)\n\n### Maintenance: Clean up and merge the Rabit submodule (#6023, #6095, #6096, #6105, #6110, #6262, #6275, #6290)\n* The Rabit submodule is now maintained as part of the XGBoost codebase.\n* Tests for Rabit are now part of the test suites of XGBoost.\n* Rabit can now be built on the Windows platform.\n* We made various code re-formatting for the C++ code with clang-tidy.\n* Public headers of XGBoost no longer depend on Rabit headers.\n* Unused CMake targets for Rabit were removed.\n* Single-point model recovery has been dropped and removed from Rabit, simplifying the Rabit code greatly. The single-point model recovery feature has not been adequately maintained over the years.\n* We removed the parts of Rabit that were not useful for XGBoost.\n\n### Maintenance: Refactor code for legibility and maintainability\n* Unify CPU hist sketching (#5880)\n* [R] fix uses of 1:length(x) and other small things (#5992)\n* Unify evaluation functions. (#6037)\n* Make binary bin search reusable. (#6058)\n* Unify set index data. (#6062)\n* [R] Remove `stringi` dependency (#6109)\n* Merge extract cuts into QuantileContainer. (#6125)\n* Reduce C++ compiler warnings (#6197, #6198, #6213, #6286, #6325)\n* Cleanup Python code. (#6223)\n* Small cleanup to evaluator. (#6400)\n\n### Usability Improvements, Documentation\n* [jvm-packages] add example to handle missing value other than 0 (#5677)\n* Add DMatrix usage examples to the C API demo (#5854)\n* List `DaskDeviceQuantileDMatrix` in the doc. (#5975)\n* Update Python custom objective demo. (#5981)\n* Update the JSON model schema to document more objective functions. (#5982)\n* [Python] Fix warning when `missing` field is not used. (#5969)\n* Fix typo in tracker logging (#5994)\n* Move a warning about empty dataset, so that it's shown for all objectives and metrics (#5998)\n* Fix the instructions for installing the nightly build. (#6004)\n* [Doc] Add dtreeviz as a showcase example of integration with 3rd-party software (#6013)\n* [jvm-packages] [doc] Update install doc for JVM packages (#6051)\n* Fix typo in `xgboost.callback.early_stop` docstring (#6071)\n* Add cache suffix to the files used in the external memory demo. (#6088)\n* [Doc] Document the parameter `kill_spark_context_on_worker_failure` (#6097)\n* Fix link to the demo for custom objectives (#6100)\n* Update Dask doc. (#6108)\n* Validate weights are positive values. (#6115)\n* Document the updated CMake version requirement. (#6123)\n* Add demo for `DaskDeviceQuantileDMatrix`. (#6156)\n* Cosmetic fixes in `faq.rst` (#6161)\n* Fix error message. (#6176)\n* [Doc] Add list of winning solutions in data science competitions using XGBoost (#6177)\n* Fix a comment in demo to use correct reference (#6190)\n* Update the list of winning solutions using XGBoost (#6192)\n* Consistent style for build status badge (#6203)\n* [Doc] Add info on GPU compiler (#6204)\n* Update the list of winning solutions (#6222, #6254)\n* Add link to XGBoost's Twitter handle (#6244)\n* Fix minor typos in XGBClassifier methods' docstrings (#6247)\n* Add sponsors link to FUNDING.yml (#6252)\n* Group CLI demo into subdirectory. (#6258)\n* Reduce warning messages from `gbtree`. (#6273)\n* Create a tutorial for using the C API in a C/C++ application (#6285)\n* Update plugin instructions for CMake build (#6289)\n* [doc] make Dask distributed example copy-pastable (#6345)\n* [Python] Add option to use `libxgboost.so` from the system path (#6362)\n* Fixed few grammatical mistakes in doc (#6393)\n* Fix broken link in CLI doc (#6396)\n* Improve documentation for the Dask API (#6413)\n* Revise misleading exception information: no such param of `allow_non_zero_missing` (#6418)\n* Fix CLI ranking demo. (#6439)\n* Fix broken links. (#6455)\n\n### Acknowledgement\n**Contributors**: Nan Zhu (@CodingCat), @FelixYBW, Jack Dunn (@JackDunnNZ), Jean Lescut-Muller (@JeanLescut),  Boris Feld (@Lothiraldan), Nikhil Choudhary (@Nikhil1O1), Rory Mitchell (@RAMitchell), @ShvetsKS, Anthony D'Amato (@Totoketchup), @Wittty-Panda, neko (@akiyamaneko), Alexander Gugel (@alexanderGugel), @dependabot[bot], DIVYA CHAUHAN (@divya661), Daniel Steinberg (@dstein64), Akira Funahashi (@funasoul), Philip Hyunsu Cho (@hcho3), Tong He (@hetong007), Hristo Iliev (@hiliev), Honza Sterba (@honzasterba), @hzy001, Igor Moura (@igormp), @jameskrach, James Lamb (@jameslamb), Naveed Ahmed Saleem Janvekar (@janvekarnaveed), Kyle Nicholson (@kylejn27), lacrosse91 (@lacrosse91), Christian Lorentzen (@lorentzenchr), Manikya Bardhan (@manikyabard), @nabokovas, John Quitto-Graham (@nvidia-johnq), @odidev, Qi Zhang (@qzhang90), Sergio Gavilán (@sgavil), Tanuja Kirthi Doddapaneni (@tanuja3), Cuong Duong (@tcuongd), Yuan Tang (@terrytangyuan), Jiaming Yuan (@trivialfis), vcarpani (@vcarpani), Vladislav Epifanov (@vepifanov), Vitalie Spinu (@vspinu), Bobby Wang (@wbo4958), Zeno Gantner (@zenogantner), zhang_jf (@zuston)\n\n**Reviewers**: Nan Zhu (@CodingCat), John Zedlewski (@JohnZed), Rory Mitchell (@RAMitchell), @ShvetsKS, Egor Smirnov (@SmirnovEgorRu), Anthony D'Amato (@Totoketchup), @Wittty-Panda, Alexander Gugel (@alexanderGugel), Codecov Comments Bot (@codecov-commenter), Codecov (@codecov-io), DIVYA CHAUHAN (@divya661), Devin Robison (@drobison00), Geoffrey Blake (@geoffreyblake), Mark Harris (@harrism), Philip Hyunsu Cho (@hcho3), Honza Sterba (@honzasterba), Igor Moura (@igormp), @jakirkham, @jameskrach, James Lamb (@jameslamb), Janakarajan Natarajan (@janaknat), Jake Hemstad (@jrhemstad), Keith Kraus (@kkraus14), Kyle Nicholson (@kylejn27), Christian Lorentzen (@lorentzenchr), Michael Mayer (@mayer79), Nikolay Petrov (@napetrov), @odidev, PSEUDOTENSOR / Jonathan McKinney (@pseudotensor), Qi Zhang (@qzhang90), Sergio Gavilán (@sgavil), Scott Lundberg (@slundberg), Cuong Duong (@tcuongd), Yuan Tang (@terrytangyuan), Jiaming Yuan (@trivialfis), vcarpani (@vcarpani), Vladislav Epifanov (@vepifanov), Vincent Nijs (@vnijs), Vitalie Spinu (@vspinu), Bobby Wang (@wbo4958), William Hicks (@wphicks)\n\n## v1.2.0 (2020.08.22)\n\n### XGBoost4J-Spark now supports the GPU algorithm (#5171)\n* Now XGBoost4J-Spark is able to leverage NVIDIA GPU hardware to speed up training.\n* There is on-going work for accelerating the rest of the data pipeline with NVIDIA GPUs (#5950, #5972).\n\n### XGBoost now supports CUDA 11 (#5808)\n* It is now possible to build XGBoost with CUDA 11. Note that we do not yet distribute pre-built binaries built with CUDA 11; all current distributions use CUDA 10.0.\n\n### Better guidance for persisting XGBoost models in an R environment (#5940, #5964)\n* Users are strongly encouraged to use `xgb.save()` and `xgb.save.raw()` instead of `saveRDS()`. This is so that the persisted models can be accessed with future releases of XGBoost.\n* The previous release (1.1.0) had problems loading models that were saved with `saveRDS()`. This release adds a compatibility layer to restore access to the old RDS files. Note that this is meant to be a temporary measure; users are advised to stop using `saveRDS()` and migrate to `xgb.save()` and `xgb.save.raw()`.\n\n### New objectives and metrics\n* The pseudo-Huber loss `reg:pseudohubererror` is added (#5647). The corresponding metric is `mphe`. Right now, the slope is hard-coded to 1.\n* The Accelerated Failure Time objective for survival analysis (`survival:aft`) is now accelerated on GPUs (#5714, #5716). The survival metrics `aft-nloglik` and `interval-regression-accuracy` are also accelerated on GPUs.\n\n### Improved integration with scikit-learn\n* Added `n_features_in_` attribute to the scikit-learn interface to store the number of features used (#5780). This is useful for integrating with some scikit-learn features such as `StackingClassifier`.  See [this link](https://scikit-learn-enhancement-proposals.readthedocs.io/en/latest/slep010/proposal.html) for more details.\n* `XGBoostError` now inherits `ValueError`, which conforms scikit-learn's exception requirement (#5696).\n\n### Improved integration with Dask\n* The XGBoost Dask API now exposes an asynchronous interface (#5862). See [the document](https://xgboost.readthedocs.io/en/latest/tutorials/dask.html#working-with-asyncio) for details.\n* Zero-copy ingestion of GPU arrays via `DaskDeviceQuantileDMatrix` (#5623, #5799, #5800, #5803, #5837, #5874, #5901): Previously, the Dask interface had to make 2 data copies: one for concatenating the Dask partition/block into a single block and another for internal representation. To save memory, we introduce `DaskDeviceQuantileDMatrix`. As long as Dask partitions are resident in the GPU memory, `DaskDeviceQuantileDMatrix` is able to ingest them directly without making copies. This matrix type wraps `DeviceQuantileDMatrix`.\n* The prediction function now returns GPU Series type if the input is from Dask-cuDF (#5710). This is to preserve the input data type.\n\n### Robust handling of external data types (#5689, #5893)\n- As we support more and more external data types, the handling logic has proliferated all over the code base and became hard to keep track. It also became unclear how missing values and threads are handled. We refactored the Python package code to collect all data handling logic to a central location, and now we have an explicit list of of all supported data types.\n\n### Improvements in GPU-side data matrix (`DeviceQuantileDMatrix`)\n* The GPU-side data matrix now implements its own quantile sketching logic, so that data don't have to be transported back to the main memory (#5700, #5747, #5760, #5846, #5870, #5898). The GK sketching algorithm is also now better documented.\n  - Now we can load extremely sparse dataset like URL, although performance is still sub-optimal.\n* The GPU-side data matrix now exposes an iterative interface (#5783), so that users are able to construct a matrix from a data iterator. See the [Python demo](https://github.com/dmlc/xgboost/blob/release_1.2.0/demo/guide-python/data_iterator.py).\n\n### New language binding: Swift (#5728)\n* Visit https://github.com/kongzii/SwiftXGBoost for more details.\n\n### Robust model serialization with JSON (#5772, #5804, #5831, #5857, #5934)\n* We continue efforts from the 1.0.0 release to adopt JSON as the format to save and load models robustly.\n* JSON model IO is significantly faster and produces smaller model files.\n* Round-trip reproducibility is guaranteed, via the introduction of an efficient float-to-string conversion algorithm known as [the Ryū algorithm](https://dl.acm.org/doi/10.1145/3192366.3192369). The conversion is locale-independent, producing consistent numeric representation regardless of the locale setting of the user's machine.\n* We fixed an issue in loading large JSON files to memory.\n* It is now possible to load a JSON file from a remote source such as S3.\n\n### Performance improvements\n* CPU hist tree method optimization\n  - Skip missing lookup in hist row partitioning if data is dense. (#5644)\n  - Specialize training procedures for CPU hist tree method on distributed environment. (#5557)\n  - Add single point histogram for CPU hist.  Previously gradient histogram for CPU hist is hard coded to be 64 bit, now users can specify the parameter `single_precision_histogram` to use 32 bit histogram instead for faster training performance. (#5624, #5811)\n* GPU hist tree method optimization\n  - Removed some unnecessary synchronizations and better memory allocation pattern. (#5707)\n  - Optimize GPU Hist for wide dataset.  Previously for wide dataset the atomic operation is performed on global memory, now it can run on shared memory for faster histogram building. But there's a known small regression on GeForce cards with dense data. (#5795, #5926, #5948, #5631)\n\n### API additions\n* Support passing fmap to importance plot (#5719). Now importance plot can show actual names of features instead of default ones.\n* Support 64bit seed. (#5643)\n* A new C API `XGBoosterGetNumFeature` is added for getting number of features in booster (#5856).\n* Feature names and feature types are now stored in C++ core and saved in binary DMatrix (#5858).\n\n### Breaking: The `predict()` method of `DaskXGBClassifier` now produces class predictions (#5986). Use `predict_proba()` to obtain probability predictions.\n* Previously, `DaskXGBClassifier.predict()` produced probability predictions. This is inconsistent with the behavior of other scikit-learn classifiers, where `predict()` returns class predictions. We make a breaking change in 1.2.0 release so that `DaskXGBClassifier.predict()` now correctly produces class predictions and thus behave like other scikit-learn classifiers. Furthermore, we introduce the `predict_proba()` method for obtaining probability predictions, again to be in line with other scikit-learn classifiers.\n\n### Breaking: Custom evaluation metric now receives raw prediction (#5954)\n* Previously, the custom evaluation metric received a transformed prediction result when used with a classifier. Now the custom metric will receive a raw (untransformed) prediction and will need to transform the prediction itself.  See [demo/guide-python/custom\\_softmax.py](https://github.com/dmlc/xgboost/blob/release_1.2.0/demo/guide-python/custom_softmax.py) for an example.\n* This change is to make the custom metric behave consistently with the custom objective, which already receives raw prediction (#5564).\n\n### Breaking: XGBoost4J-Spark now requires Spark 3.0 and Scala 2.12 (#5836, #5890)\n* Starting with version 3.0, Spark can manage GPU resources and allocate them among executors.\n* Spark 3.0 dropped support for Scala 2.11 and now only supports Scala 2.12. Thus, XGBoost4J-Spark also only supports Scala 2.12.\n\n### Breaking: XGBoost Python package now requires Python 3.6 and later (#5715)\n* Python 3.6 has many useful features such as f-strings.\n\n### Breaking: XGBoost now adopts the C++14 standard (#5664)\n* Make sure to use a sufficiently modern C++ compiler that supports C++14, such as Visual Studio 2017, GCC 5.0+, and Clang 3.4+.\n\n### Bug-fixes\n* Fix a data race in the prediction function (#5853). As a byproduct, the prediction function now uses a thread-local data store and became thread-safe.\n* Restore capability to run prediction when the test input has fewer features than the training data (#5955). This capability is necessary to support predicting with LIBSVM inputs. The previous release (1.1) had broken this capability, so we restore it in this version with better tests.\n* Fix OpenMP build with CMake for R package, to support CMake 3.13 (#5895).\n* Fix Windows 2016 build (#5902, #5918).\n* Fix edge cases in scikit-learn interface with Pandas input by disabling feature validation. (#5953)\n* [R] Enable weighted learning to rank (#5945)\n* [R] Fix early stopping with custom objective (#5923)\n* Fix NDK Build (#5886)\n* Add missing explicit template specializations for greater portability (#5921)\n* Handle empty rows in data iterators correctly (#5929). This bug affects file loader and JVM data frames.\n* Fix `IsDense` (#5702)\n* [jvm-packages] Fix wrong method name `setAllowZeroForMissingValue` (#5740)\n* Fix shape inference for Dask predict (#5989)\n\n### Usability Improvements, Documentation\n* [Doc] Document that CUDA 10.0 is required (#5872)\n* Refactored command line interface (CLI). Now CLI is able to handle user errors and output basic document. (#5574)\n* Better error handling in Python: use `raise from` syntax to preserve full stacktrace (#5787).\n* The JSON model dump now has a formal schema (#5660, #5818). The benefit is to prevent `dump_model()` function from breaking. See [this document](https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html#difference-between-saving-model-and-dumping-model) to understand the difference between saving and dumping models.\n* Add a reference to the GPU external memory paper (#5684)\n* Document more objective parameters in the R package (#5682)\n* Document the existence of pre-built binary wheels for MacOS (#5711)\n* Remove `max.depth` in the R gblinear example. (#5753)\n* Added conda environment file for building docs (#5773)\n* Mention dask blog post in the doc, which introduces using Dask with GPU and some internal workings. (#5789)\n* Fix rendering of Markdown docs (#5821)\n* Document new objectives and metrics available on GPUs (#5909)\n* Better message when no GPU is found. (#5594)\n* Remove the use of `silent` parameter from R demos. (#5675)\n* Don't use masked array in array interface. (#5730)\n* Update affiliation of @terrytangyuan: Ant Financial -> Ant Group (#5827)\n* Move dask tutorial closer other distributed tutorials (#5613)\n* Update XGBoost + Dask overview documentation (#5961)\n* Show `n_estimators` in the docstring of the scikit-learn interface (#6041)\n* Fix a type in a doctring of the scikit-learn interface (#5980)\n\n### Maintenance: testing, continuous integration, build system\n* [CI] Remove CUDA 9.0 from CI (#5674, #5745)\n* Require CUDA 10.0+ in CMake build (#5718)\n* [R] Remove dependency on gendef for Visual Studio builds (fixes #5608) (#5764). This enables building XGBoost with GPU support with R 4.x.\n* [R-package] Reduce duplication in configure.ac (#5693)\n* Bump com.esotericsoftware to 4.0.2 (#5690)\n* Migrate some tests from AppVeyor to GitHub Actions to speed up the tests. (#5911, #5917, #5919, #5922, #5928)\n* Reduce cost of the Jenkins CI server (#5884, #5904, #5892). We now enforce a daily budget via an automated monitor. We also dramatically reduced the workload for the Windows platform, since the cloud VM cost is vastly greater for Windows.\n* [R] Set up automated R linter (#5944)\n* [R] replace uses of T and F with TRUE and FALSE (#5778)\n* Update Docker container 'CPU' (#5956)\n* Simplify CMake build with modern CMake techniques (#5871)\n* Use `hypothesis` package for testing (#5759, #5835, #5849).\n* Define `_CRT_SECURE_NO_WARNINGS` to remove unneeded warnings in MSVC (#5434)\n* Run all Python demos in CI, to ensure that they don't break (#5651)\n* Enhance nvtx support (#5636). Now we can use unified timer between CPU and GPU. Also CMake is able to find nvtx automatically.\n* Speed up python test. (#5752)\n* Add helper for generating batches of data. (#5756)\n* Add c-api-demo to .gitignore (#5855)\n* Add option to enable all compiler warnings in GCC/Clang (#5897)\n* Make Python model compatibility test runnable locally (#5941)\n* Add cupy to Windows CI (#5797)\n* [CI] Fix cuDF install; merge 'gpu' and 'cudf' test suite (#5814)\n* Update rabit submodule (#5680, #5876)\n* Force colored output for Ninja build. (#5959)\n* [CI] Assign larger /dev/shm to NCCL (#5966)\n* Add missing Pytest marks to AsyncIO unit test (#5968)\n* [CI] Use latest cuDF and dask-cudf (#6048)\n* Add CMake flag to log C API invocations, to aid debugging (#5925)\n* Fix a unit test on CLI, to handle RC versions (#6050)\n* [CI] Use mgpu machine to run gpu hist unit tests (#6050)\n* [CI] Build GPU-enabled JAR artifact and deploy to xgboost-maven-repo (#6050)\n\n### Maintenance: Refactor code for legibility and maintainability\n* Remove dead code in DMatrix initialization. (#5635)\n* Catch dmlc error by ref. (#5678)\n* Refactor the `gpu_hist` split evaluation in preparation for batched nodes enumeration. (#5610)\n* Remove column major specialization. (#5755)\n* Remove unused imports in Python (#5776)\n* Avoid including `c_api.h` in header files. (#5782)\n* Remove unweighted GK quantile, which is unused. (#5816)\n* Add Python binding for rabit ops. (#5743)\n* Implement `Empty` method for host device vector. (#5781)\n* Remove print (#5867)\n* Enforce tree order in JSON (#5974)\n\n### Acknowledgement\n**Contributors**: Nan Zhu (@CodingCat), @LionOrCatThatIsTheQuestion, Dmitry Mottl (@Mottl), Rory Mitchell (@RAMitchell), @ShvetsKS, Alex Wozniakowski (@a-wozniakowski), Alexander Gugel (@alexanderGugel), @anttisaukko, @boxdot, Andy Adinets (@canonizer), Ram Rachum (@cool-RR), Elliot Hershberg (@elliothershberg), Jason E. Aten, Ph.D. (@glycerine), Philip Hyunsu Cho (@hcho3), @jameskrach, James Lamb (@jameslamb), James Bourbeau (@jrbourbeau), Peter Jung (@kongzii), Lorenz Walthert (@lorenzwalthert), Oleksandr Kuvshynov (@okuvshynov), Rong Ou (@rongou), Shaochen Shi (@shishaochen), Yuan Tang (@terrytangyuan), Jiaming Yuan (@trivialfis), Bobby Wang (@wbo4958), Zhang Zhang (@zhangzhang10)\n\n**Reviewers**: Nan Zhu (@CodingCat), @LionOrCatThatIsTheQuestion, Hao Yang (@QuantHao), Rory Mitchell (@RAMitchell), @ShvetsKS, Egor Smirnov (@SmirnovEgorRu), Alex Wozniakowski (@a-wozniakowski), Amit Kumar (@aktech), Avinash Barnwal (@avinashbarnwal), @boxdot, Andy Adinets (@canonizer), Chandra Shekhar Reddy (@chandrureddy), Ram Rachum (@cool-RR), Cristiano Goncalves (@cristianogoncalves), Elliot Hershberg (@elliothershberg), Jason E. Aten, Ph.D. (@glycerine), Philip Hyunsu Cho (@hcho3), Tong He (@hetong007), James Lamb (@jameslamb), James Bourbeau (@jrbourbeau), Lee Drake (@leedrake5), DougM (@mengdong), Oleksandr Kuvshynov (@okuvshynov), RongOu (@rongou), Shaochen Shi (@shishaochen), Xu Xiao (@sperlingxx), Yuan Tang (@terrytangyuan), Theodore Vasiloudis (@thvasilo), Jiaming Yuan (@trivialfis), Bobby Wang (@wbo4958), Zhang Zhang (@zhangzhang10)\n\n## v1.1.1 (2020.06.06)\nThis patch release applies the following patches to 1.1.0 release:\n\n* CPU performance improvement in the PyPI wheels (#5720)\n* Fix loading old model (#5724)\n* Install pkg-config file (#5744)\n\n## v1.1.0 (2020.05.17)\n\n### Better performance on multi-core CPUs (#5244, #5334, #5522)\n* Poor performance scaling of the `hist` algorithm for multi-core CPUs has been under investigation (#3810). #5244 concludes the ongoing effort to improve performance scaling on multi-CPUs, in particular Intel CPUs. Roadmap: #5104\n* #5334 makes steps toward reducing memory consumption for the `hist` tree method on CPU.\n* #5522 optimizes random number generation for data sampling.\n\n### Deterministic GPU algorithm for regression and classification (#5361)\n* GPU algorithm for regression and classification tasks is now deterministic.\n* Roadmap: #5023. Currently only single-GPU training is deterministic. Distributed training with multiple GPUs is not yet deterministic.\n\n### Improve external memory support on GPUs (#5093, #5365)\n* Starting from 1.0.0 release, we added support for external memory on GPUs to enable training with larger datasets. Gradient-based sampling (#5093) speeds up the external memory algorithm by intelligently sampling a subset of the training data to copy into the GPU memory. [Learn more about out-of-core GPU gradient boosting.](https://arxiv.org/abs/2005.09148)\n* GPU-side data sketching now works with data from external memory (#5365).\n\n### Parameter validation: detection of unused or incorrect parameters (#5477, #5569, #5508)\n* Mis-spelled training parameter is a common user mistake. In previous versions of XGBoost, mis-spelled parameters were silently ignored. Starting with 1.0.0 release, XGBoost will produce a warning message if there is any unused training parameters. The 1.1.0 release makes parameter validation available to the scikit-learn interface (#5477) and the R binding (#5569).\n\n### Thread-safe, in-place prediction method (#5389, #5512)\n* Previously, the prediction method was not thread-safe (#5339). This release adds a new API function `inplace_predict()` that is thread-safe. It is now possible to serve concurrent requests for prediction using a shared model object.\n* It is now possible to compute prediction in-place for selected data formats (`numpy.ndarray` / `scipy.sparse.csr_matrix` / `cupy.ndarray` / `cudf.DataFrame` / `pd.DataFrame`) without creating a `DMatrix` object.\n\n### Addition of Accelerated Failure Time objective for survival analysis (#4763, #5473, #5486, #5552, #5553)\n* Survival analysis (regression) models the time it takes for an event of interest to occur. The target label is potentially censored, i.e. the label is a range rather than a single number. We added a new objective `survival:aft` to support survival analysis. Also added is the new API to specify the ranged labels. Check out [the tutorial](https://xgboost.readthedocs.io/en/release_1.1.0/tutorials/aft_survival_analysis.html) and the [demos](https://github.com/dmlc/xgboost/tree/release_1.1.0/demo/aft_survival).\n* GPU support is work in progress (#5714).\n\n### Improved installation experience on Mac OSX (#5597, #5602, #5606, #5701)\n* It only takes two commands to install the XGBoost Python package: `brew install libomp` followed by `pip install xgboost`. The installed XGBoost will use all CPU cores. Even better, starting with this release, we distribute pre-compiled binary wheels targeting Mac OSX. Now the install command `pip install xgboost` finishes instantly, as it no longer compiles the C++ source of XGBoost. The last three Mac versions (High Sierra, Mojave, Catalina) are supported.\n* R package: the 1.1.0 release fixes the error `Initializing libomp.dylib, but found libomp.dylib already initialized` (#5701)\n\n### Ranking metrics are now accelerated on GPUs (#5380, #5387, #5398)\n\n### GPU-side data matrix to ingest data directly from other GPU libraries (#5420, #5465)\n* Previously, data on GPU memory had to be copied back to the main memory before it could be used by XGBoost. Starting with 1.1.0 release, XGBoost provides a dedicated interface (`DeviceQuantileDMatrix`) so that it can ingest data from GPU memory directly. The result is that XGBoost interoperates better with GPU-accelerated data science libraries, such as cuDF, cuPy, and PyTorch.\n* Set device in device dmatrix. (#5596)\n\n### Robust model serialization with JSON (#5123, #5217)\n* We continue efforts from the 1.0.0 release to adopt JSON as the format to save and load models robustly. Refer to the release note for 1.0.0 to learn more.\n* It is now possible to store internal configuration of the trained model (`Booster`) object in R as a JSON string (#5123, #5217).\n\n### Improved integration with Dask\n* Pass through `verbose` parameter for dask fit (#5413)\n* Use `DMLC_TASK_ID`. (#5415)\n* Order the prediction result. (#5416)\n* Honor `nthreads` from dask worker. (#5414)\n* Enable grid searching with scikit-learn. (#5417)\n* Check non-equal when setting threads. (#5421)\n* Accept other inputs for prediction. (#5428)\n* Fix missing value for scikit-learn interface. (#5435)\n\n### XGBoost4J-Spark: Check number of columns in the data iterator (#5202, #5303)\n* Before, the native layer in XGBoost did not know the number of columns (features) ahead of time and had to guess the number of columns by counting the feature index when ingesting data. This method has a failure more in distributed setting: if the training data is highly sparse, some features may be completely missing in one or more worker partitions. Thus, one or more workers may deduce an incorrect data shape, leading to crashes or silently wrong models.\n* Enforce correct data shape by passing the number of columns explicitly from the JVM layer into the native layer.\n\n### Major refactoring of the `DMatrix` class\n* Continued from 1.0.0 release.\n* Remove update prediction cache from predictors. (#5312)\n* Predict on Ellpack. (#5327)\n* Partial rewrite EllpackPage (#5352)\n* Use ellpack for prediction only when sparsepage doesn't exist. (#5504)\n* RFC: #4354, Roadmap: #5143\n\n### Breaking: XGBoost Python package now requires Pip 19.0 and higher (#5589)\n* Your Linux machine may have an old version of Pip and may attempt to install a source package, leading to long installation time. This is because we are now using `manylinux2010` tag in the binary wheel release. Ensure you have Pip 19.0 or newer by running `python3 -m pip -V` to check the version. Upgrade Pip with command\n```\npython3 -m pip install --upgrade pip\n```\nUpgrading to latest pip allows us to depend on newer versions of system libraries. [TensorFlow](https://www.tensorflow.org/install/pip) also requires Pip 19.0+.\n\n### Breaking: GPU algorithm now requires CUDA 10.0 and higher (#5649)\n* CUDA 10.0 is necessary to make the GPU algorithm deterministic (#5361).\n\n### Breaking: `silent` parameter is now removed (#5476)\n* Please use `verbosity` instead.\n\n### Breaking: Set `output_margin` to True for custom objectives (#5564)\n* Now both R and Python interface custom objectives get un-transformed (raw) prediction outputs.\n\n### Breaking: `Makefile` is now removed. We use CMake exclusively to build XGBoost (#5513)\n* Exception: the R package uses Autotools, as the CRAN ecosystem did not yet adopt CMake widely.\n\n### Breaking: `distcol` updater is now removed (#5507)\n* The `distcol` updater has been long broken, and currently we lack resources to implement a working implementation from scratch.\n\n### Deprecation notices\n* **Python 3.5**. This release is the last release to support Python 3.5. The following release (1.2.0) will require Python 3.6.\n* **Scala 2.11**. Currently XGBoost4J supports Scala 2.11. However, if a future release of XGBoost adopts Spark 3, it will not support Scala 2.11, as Spark 3 requires Scala 2.12+. We do not yet know which XGBoost release will adopt Spark 3.\n\n### Known limitations\n* (Python package) When early stopping is activated with `early_stopping_rounds` at training time, the prediction method (`xgb.predict()`) behaves in a surprising way. If XGBoost runs for M rounds and chooses iteration N (N < M) as the best iteration, then the prediction method will use M trees by default. To use the best iteration (N trees), users will need to manually take the best iteration field `bst.best_iteration` and pass it as the `ntree_limit` argument to `xgb.predict()`. See #5209 and #4052 for additional context.\n* GPU ranking objective is currently not deterministic (#5561).\n* When training parameter `reg_lambda` is set to zero, some leaf nodes may be assigned a NaN value. (See [discussion](https://discuss.xgboost.ai/t/still-getting-unexplained-nans-new-replication-code/1383/9).) For now, please set `reg_lambda` to a nonzero value.\n\n### Community and Governance\n* The XGBoost Project Management Committee (PMC) is pleased to announce a new committer: Egor Smirnov (@SmirnovEgorRu). He has led a major initiative to improve the performance of XGBoost on multi-core CPUs.\n\n### Bug-fixes\n* Improved compatibility with scikit-learn (#5255, #5505, #5538)\n* Remove f-string, since it's not supported by Python 3.5 (#5330). Note that Python 3.5 support is deprecated and schedule to be dropped in the upcoming release (1.2.0).\n* Fix the pruner so that it doesn't prune the same branch twice (#5335)\n* Enforce only major version in JSON model schema (#5336). Any major revision of the model schema would bump up the major version.\n* Fix a small typo in sklearn.py that broke multiple eval metrics (#5341)\n* Restore loading model from a memory buffer (#5360)\n* Define lazy isinstance for Python compat (#5364)\n* [R] fixed uses of `class()` (#5426)\n* Force compressed buffer to be 4 bytes aligned, to keep cuda-memcheck happy (#5441)\n* Remove warning for calling host function (`std::max`) on a GPU device (#5453)\n* Fix uninitialized value bug in xgboost callback (#5463)\n* Fix model dump in CLI (#5485)\n* Fix out-of-bound array access in `WQSummary::SetPrune()` (#5493)\n* Ensure that configured `dmlc/build_config.h` is picked up by Rabit and XGBoost, to fix build on Alpine (#5514)\n* Fix a misspelled method, made in a git merge (#5509)\n* Fix a bug in binary model serialization (#5532)\n* Fix CLI model IO (#5535)\n* Don't use `uint` for threads (#5542)\n* Fix R interaction constraints to handle more than 100000 features (#5543)\n* [jvm-packages] XGBoost Spark should deal with NaN when parsing evaluation output (#5546)\n* GPU-side data sketching is now aware of query groups in learning-to-rank data (#5551)\n* Fix DMatrix slicing for newly added fields (#5552)\n* Fix configuration status with loading binary model (#5562)\n* Fix build when OpenMP is disabled (#5566)\n* R compatibility patches (#5577, #5600)\n* gpu\\_hist performance fixes (#5558)\n* Don't set seed on CLI interface (#5563)\n* [R] When serializing model, preserve model attributes related to early stopping (#5573)\n* Avoid rabit calls in learner configuration (#5581)\n* Hide C++ symbols in libxgboost.so when building Python wheel (#5590). This fixes apache/incubator-tvm#4953.\n* Fix compilation on Mac OSX High Sierra (10.13) (#5597)\n* Fix build on big endian CPUs (#5617)\n* Resolve crash due to use of `vector<bool>::iterator` (#5642)\n* Validation JSON model dump using JSON schema (#5660)\n\n### Performance improvements\n* Wide dataset quantile performance improvement (#5306)\n* Reduce memory usage of GPU-side data sketching (#5407)\n* Reduce span check overhead (#5464)\n* Serialise booster after training to free up GPU memory (#5484)\n* Use the maximum amount of GPU shared memory available to speed up the histogram kernel (#5491)\n* Use non-synchronising scan in Thrust (#5560)\n* Use `cudaDeviceGetAttribute()` instead of `cudaGetDeviceProperties()` for speed (#5570)\n\n### API changes\n* Support importing data from a Pandas SparseArray (#5431)\n* `HostDeviceVector` (vector shared between CPU and GPU memory) now exposes `HostSpan` interface, to enable access on the CPU side with bound check (#5459)\n* Accept other gradient types for `SplitEntry` (#5467)\n\n### Usability Improvements, Documentation\n* Add `JVM_CHECK_CALL` to prevent C++ exceptions from leaking into the JVM layer (#5199)\n* Updated Windows build docs (#5283)\n* Update affiliation of @hcho3 (#5292)\n* Display Sponsor button, link to OpenCollective (#5325)\n* Update docs for GPU external memory (#5332)\n* Add link to GPU documentation (#5437)\n* Small updates to GPU documentation (#5483)\n* Edits on tutorial for XGBoost job on Kubernetes (#5487)\n* Add reference to GPU external memory (#5490)\n* Fix typos (#5346, #5371, #5384, #5399, #5482, #5515)\n* Update Python doc (#5517)\n* Add Neptune and Optuna to list of examples (#5528)\n* Raise error if the number of data weights doesn't match the number of data sets (#5540)\n* Add a note about GPU ranking (#5572)\n* Clarify meaning of `training` parameter in the C API function `XGBoosterPredict()` (#5604)\n* Better error handling for situations where existing trees cannot be modified (#5406, #5418). This feature is enabled when `process_type` is set to `update`.\n\n### Maintenance: testing, continuous integration, build system\n* Add C++ test coverage for data sketching (#5251)\n* Ignore gdb\\_history (#5257)\n* Rewrite setup.py. (#5271, #5280)\n* Use `scikit-learn` in extra dependencies (#5310)\n* Add CMake option to build static library (#5397)\n* [R] changed FindLibR to take advantage of CMake cache (#5427)\n* [R] fixed inconsistency in R -e calls in FindLibR.cmake (#5438)\n* Refactor tests with data generator (#5439)\n* Resolve failing Travis CI (#5445)\n* Update dmlc-core. (#5466)\n* [CI] Use clang-tidy 10 (#5469)\n* De-duplicate code for checking maximum number of nodes (#5497)\n* [CI] Use Ubuntu 18.04 LTS in JVM CI, because 19.04 is EOL (#5537)\n* [jvm-packages] [CI] Create a Maven repository to host SNAPSHOT JARs (#5533)\n* [jvm-packages] [CI] Publish XGBoost4J JARs with Scala 2.11 and 2.12 (#5539)\n* [CI] Use Vault repository to re-gain access to devtoolset-4 (#5589)\n\n### Maintenance: Refactor code for legibility and maintainability\n* Move prediction cache to Learner (#5220, #5302)\n* Remove SimpleCSRSource (#5315)\n* Refactor SparsePageSource, delete cache files after use (#5321)\n* Remove unnecessary DMatrix methods (#5324)\n* Split up `LearnerImpl` (#5350)\n* Move segment sorter to common (#5378)\n* Move thread local entry into Learner (#5396)\n* Split up test helpers header (#5455)\n* Requires setting leaf stat when expanding tree (#5501)\n* Purge device\\_helpers.cuh (#5534)\n* Use thrust functions instead of custom functions (#5544)\n\n### Acknowledgement\n**Contributors**: Nan Zhu (@CodingCat), Rory Mitchell (@RAMitchell), @ShvetsKS, Egor Smirnov (@SmirnovEgorRu), Andrew Kane (@ankane), Avinash Barnwal (@avinashbarnwal), Bart Broere (@bartbroere), Andy Adinets (@canonizer), Chen Qin (@chenqin), Daiki Katsuragawa (@daikikatsuragawa), David Díaz Vico (@daviddiazvico), Darius Kharazi (@dkharazi), Darby Payne (@dpayne), Jason E. Aten, Ph.D. (@glycerine), Philip Hyunsu Cho (@hcho3), James Lamb (@jameslamb), Jan Borchmann (@jborchma), Kamil A. Kaczmarek (@kamil-kaczmarek), Melissa Kohl (@mjkohl32), Nicolas Scozzaro (@nscozzaro), Paul Kaefer (@paulkaefer), Rong Ou (@rongou), Samrat Pandiri (@samratp), Sriram Chandramouli (@sriramch), Yuan Tang (@terrytangyuan), Jiaming Yuan (@trivialfis), Liang-Chi Hsieh (@viirya), Bobby Wang (@wbo4958), Zhang Zhang (@zhangzhang10),\n\n**Reviewers**: Nan Zhu (@CodingCat), @LeZhengThu, Rory Mitchell (@RAMitchell), @ShvetsKS, Egor Smirnov (@SmirnovEgorRu), Steve Bronder (@SteveBronder), Nikita Titov (@StrikerRUS), Andrew Kane (@ankane), Avinash Barnwal (@avinashbarnwal), @brydag, Andy Adinets (@canonizer), Chandra Shekhar Reddy (@chandrureddy), Chen Qin (@chenqin), Codecov (@codecov-io), David Díaz Vico (@daviddiazvico), Darby Payne (@dpayne), Jason E. Aten, Ph.D. (@glycerine), Philip Hyunsu Cho (@hcho3), James Lamb (@jameslamb), @johnny-cat, Mu Li (@mli), Mate Soos (@msoos), @rnyak, Rong Ou (@rongou), Sriram Chandramouli (@sriramch), Toby Dylan Hocking (@tdhock), Yuan Tang (@terrytangyuan), Oleksandr Pryimak (@trams), Jiaming Yuan (@trivialfis), Liang-Chi Hsieh (@viirya), Bobby Wang (@wbo4958),\n\n## v1.0.2 (2020.03.03)\nThis patch release applies the following patches to 1.0.0 release:\n\n* Fix a small typo in sklearn.py that broke multiple eval metrics (#5341)\n* Restore loading model from buffer (#5360)\n* Use type name for data type check (#5364)\n\n## v1.0.1 (2020.02.21)\nThis release is identical to the 1.0.0 release, except that it fixes a small bug that rendered 1.0.0 incompatible with Python 3.5. See #5328.\n\n## v1.0.0 (2020.02.19)\nThis release marks a major milestone for the XGBoost project.\n\n### Apache-style governance, contribution policy, and semantic versioning (#4646, #4659)\n* Starting with 1.0.0 release, the XGBoost Project is adopting Apache-style governance. The full community guideline is [available in the doc website](https://xgboost.readthedocs.io/en/release_1.0.0/contrib/community.html). Note that we now have Project Management Committee (PMC) who would steward the project on the long-term basis. The PMC is also entrusted to run and fund the project's continuous integration (CI) infrastructure (https://xgboost-ci.net).\n* We also adopt the [semantic versioning](https://semver.org/). See [our release versioning policy](https://xgboost.readthedocs.io/en/release_1.0.0/contrib/release.html).\n\n### Better performance scaling for multi-core CPUs (#4502, #4529, #4716, #4851, #5008, #5107, #5138, #5156)\n* Poor performance scaling of the `hist` algorithm for multi-core CPUs has been under investigation (#3810). Previous effort #4529 was replaced with a series of pull requests (#5107, #5138, #5156) aimed at achieving the same performance benefits while keeping the C++ codebase legible. The latest performance benchmark results show [up to 5x speedup on Intel CPUs with many cores](https://github.com/dmlc/xgboost/pull/5156#issuecomment-580024413). Note: #5244, which concludes the effort, will become part of the upcoming release 1.1.0.\n\n### Improved installation experience on Mac OSX (#4672, #5074, #5080, #5146, #5240)\n* It used to be quite complicated to install XGBoost on Mac OSX. XGBoost uses OpenMP to distribute work among multiple CPU cores, and Mac's default C++ compiler (Apple Clang) does not come with OpenMP. Existing work-around (using another C++ compiler) was complex and prone to fail with cryptic diagnosis (#4933, #4949, #4969).\n* Now it only takes two commands to install XGBoost: `brew install libomp` followed by `pip install xgboost`. The installed XGBoost will use all CPU cores.\n* Even better, XGBoost is now available from Homebrew: `brew install xgboost`. See Homebrew/homebrew-core#50467.\n* Previously, if you installed the XGBoost R package using the command `install.packages('xgboost')`, it could only use a single CPU core and you would experience slow training performance. With 1.0.0 release, the R package will use all CPU cores out of box.\n\n### Distributed XGBoost now available on Kubernetes (#4621, #4939)\n* Check out the [tutorial for setting up distributed XGBoost on a Kubernetes cluster](https://xgboost.readthedocs.io/en/release_1.0.0/tutorials/kubernetes.html).\n\n### Ruby binding for XGBoost (#4856)\n\n### New Native Dask interface for multi-GPU and multi-node scaling (#4473, #4507, #4617, #4819, #4907, #4914, #4941, #4942, #4951, #4973, #5048, #5077, #5144, #5270)\n* XGBoost now integrates seamlessly with [Dask](https://dask.org/), a lightweight distributed framework for data processing. Together with the first-class support for cuDF data frames (see below), it is now easier than ever to create end-to-end data pipeline running on one or more NVIDIA GPUs.\n* Multi-GPU training with Dask is now up to 20% faster than the previous release (#4914, #4951).\n\n### First-class support for cuDF data frames and cuPy arrays (#4737, #4745, #4794, #4850, #4891, #4902, #4918, #4927, #4928, #5053, #5189, #5194, #5206, #5219, #5225)\n* [cuDF](https://github.com/rapidsai/cudf) is a data frame library for loading and processing tabular data on NVIDIA GPUs. It provides a Pandas-like API.\n* [cuPy](https://github.com/cupy/cupy) implements a NumPy-compatible multi-dimensional array on NVIDIA GPUs.\n* Now users can keep the data on the GPU memory throughout the end-to-end data pipeline, obviating the need for copying data between the main memory and GPU memory.\n* XGBoost can accept any data structure that exposes `__array_interface__` signature, opening way to support other columar formats that are compatible with Apache Arrow.\n\n### [Feature interaction constraint](https://xgboost.readthedocs.io/en/release_1.0.0/tutorials/feature_interaction_constraint.html) is now available with `approx` and `gpu_hist` algorithms (#4534, #4587, #4596, #5034).\n\n### Learning to rank is now GPU accelerated (#4873, #5004, #5129)\n* Supported ranking objectives: NDGC, Map, Pairwise.\n* [Up to 2x improved training performance on GPUs](https://devblogs.nvidia.com/learning-to-rank-with-xgboost-and-gpu/).\n\n### Enable `gamma` parameter for GPU training (#4874, #4953)\n* The `gamma` parameter specifies the minimum loss reduction required to add a new split in a tree. A larger value for `gamma` has the effect of pre-pruning the tree, by making harder to add splits.\n\n### External memory for GPU training (#4486, #4526, #4747, #4833, #4879, #5014)\n* It is now possible to use NVIDIA GPUs even when the size of training data exceeds the available GPU memory. Note that the external memory support for GPU is still experimental. #5093 will further improve performance and will become part of the upcoming release 1.1.0.\n* RFC for enabling external memory with GPU algorithms: #4357\n\n### Improve Scikit-Learn interface (#4558, #4842, #4929, #5049, #5151, #5130, #5227)\n* Many users of XGBoost enjoy the convenience and breadth of Scikit-Learn ecosystem. In this release, we revise the Scikit-Learn API of XGBoost (`XGBRegressor`, `XGBClassifier`, and `XGBRanker`) to achieve feature parity with the traditional XGBoost interface (`xgboost.train()`).\n* Insert check to validate data shapes.\n* Produce an error message if `eval_set` is not a tuple. An error message is better than silently crashing.\n* Allow using `numpy.RandomState` object.\n* Add `n_jobs` as an alias of `nthread`.\n* Roadmap: #5152\n\n### XGBoost4J-Spark: Redesigning checkpointing mechanism\n* RFC is available at #4786\n* Clean up checkpoint file after a successful training job (#4754): The current implementation in XGBoost4J-Spark does not clean up the checkpoint file after a successful training job. If the user runs another job with the same checkpointing directory, she will get a wrong model because the second job will re-use the checkpoint file left over from the first job. To prevent this scenario, we propose to always clean up the checkpoint file after every successful training job.\n* Avoid Multiple Jobs for Checkpointing (#5082): The current method for checkpoint is to collect the booster produced at the last iteration of each checkpoint internal to Driver and persist it in HDFS. The major issue with this approach is that it needs to re-perform the data preparation for training if the user did not choose to cache the training dataset. To avoid re-performing data prep, we build external-memory checkpointing in the XGBoost4J layer as well.\n* Enable deterministic repartitioning when checkpoint is enabled (#4807): Distributed algorithm for gradient boosting assumes a fixed partition of the training data between multiple iterations. In previous versions, there was no guarantee that data partition would stay the same, especially when a worker goes down and some data had to recovered from previous checkpoint. In this release, we make data partition deterministic by using the data hash value of each data row in computing the partition.\n\n### XGBoost4J-Spark: handle errors thrown by the native code (#4560)\n* All core logic of XGBoost is written in C++, so XGBoost4J-Spark internally uses the C++ code via Java Native Interface (JNI). #4560 adds a proper error handling for any errors or exceptions arising from the C++ code, so that the XGBoost Spark application can be torn down in an orderly fashion.\n\n### XGBoost4J-Spark: Refine method to count the number of alive cores  (#4858)\n* The `SparkParallelismTracker` class ensures that sufficient number of executor cores are alive. To that end, it is important to query the number of alive cores reliably.\n\n### XGBoost4J: Add `BigDenseMatrix` to store more than `Integer.MAX_VALUE` elements (#4383)\n\n### Robust model serialization with JSON (#4632, #4708, #4739, #4868, #4936, #4945, #4974, #5086, #5087, #5089, #5091, #5094, #5110, #5111, #5112, #5120, #5137, #5218, #5222, #5236, #5245, #5248, #5281)\n* In this release, we introduce an experimental support of using [JSON](https://www.json.org/json-en.html) for serializing (saving/loading) XGBoost models and related hyperparameters for training. We would like to eventually replace the old binary format with JSON, since it is an open format and parsers are available in many programming languages and platforms. See [the documentation for model I/O using JSON](https://xgboost.readthedocs.io/en/release_1.0.0/tutorials/saving_model.html). #3980 explains why JSON was chosen over other alternatives.\n* To maximize interoperability and compatibility of the serialized models, we now split serialization into two parts (#4855):\n  1. Model, e.g. decision trees and strictly related metadata like `num_features`.\n  2. Internal configuration, consisting of training parameters and other configurable parameters. For example, `max_delta_step`, `tree_method`, `objective`, `predictor`, `gpu_id`.\n\n  Previously, users often ran into issues where the model file produced by one machine could not load or run on another machine. For example, models trained using a machine with an NVIDIA GPU could not run on another machine without a GPU (#5291, #5234). The reason is that the old binary format saved some internal configuration that were not universally applicable to all machines, e.g. `predictor='gpu_predictor'`.\n\n  Now, model saving function (`Booster.save_model()` in Python) will save only the model, without internal configuration. This will guarantee that your model file would be used anywhere. Internal configuration will be serialized in limited circumstances such as:\n  * Multiple nodes in a distributed system exchange model details over the network.\n  * Model checkpointing, to recover from possible crashes.\n\n  This work proved to be useful for parameter validation as well (see below).\n* Starting with 1.0.0 release, we will use semantic versioning to indicate whether the model produced by one version of XGBoost would be compatible with another version of XGBoost. Any change in the major version indicates a breaking change in the serialization format.\n* We now provide a robust method to save and load scikit-learn related attributes (#5245). Previously, we used Python pickle to save Python attributes related to `XGBClassifier`, `XGBRegressor`, and `XGBRanker` objects. The attributes are necessary to properly interact with scikit-learn. See #4639 for more details. The use of pickling hampered interoperability, as a pickle from one machine may not necessarily work on another machine. Starting with this release, we use an alternative method to serialize the scikit-learn related attributes. The use of Python pickle is now discouraged (#5236, #5281).\n\n### Parameter validation: detection of unused or incorrect parameters (#4553, #4577, #4738, #4801, #4961, #5101, #5157, #5167, #5256)\n* Mis-spelled training parameter is a common user mistake. In previous versions of XGBoost, mis-spelled parameters were silently ignored. Starting with 1.0.0 release, XGBoost will produce a warning message if there is any unused training parameters. Currently, parameter validation is available to R users and Python XGBoost API users. We are working to extend its support to scikit-learn users.\n* Configuration steps now have well-defined semantics (#4542, #4738), so we know exactly where and how the internal configurable parameters are changed.\n* The user can now use `save_config()` function to inspect all (used) training parameters. This is helpful for debugging model performance.\n\n### Allow individual workers to recover from faults (#4808, #4966)\n* Status quo: if a worker fails, all workers are shut down and restarted, and learning resumes from the last checkpoint. This involves requesting resources from the scheduler (e.g. Spark) and shuffling all the data again from scratch. Both of these operations can be quite costly and block training for extended periods of time, especially if the training data is big and the number of worker nodes is in the hundreds.\n* The proposed solution is to recover the single node that failed, instead of shutting down all workers. The rest of the clusters wait until the single failed worker is bootstrapped and catches up with the rest.\n* See roadmap at #4753. Note that this is work in progress. In particular, the feature is not yet available from XGBoost4J-Spark.\n\n### Accurate prediction for DART models\n* Use DART tree weights when computing SHAPs (#5050)\n* Don't drop trees during DART prediction by default (#5115)\n* Fix DART prediction in R (#5204)\n\n### Make external memory more robust\n* Fix issues with training with external memory on cpu (#4487)\n* Fix crash with approx tree method on cpu (#4510)\n* Fix external memory race in `exact` (#4980). Note: `dmlc::ThreadedIter` is not actually thread-safe. We would like to re-design it in the long term.\n\n### Major refactoring of the `DMatrix` class (#4686, #4744, #4748, #5044, #5092, #5108, #5188, #5198)\n* Goal 1: improve performance and reduce memory consumption. Right now, if the user trains a model with a NumPy array as training data, the array gets copies 2-3 times before training begins. We'd like to reduce duplication of the data matrix.\n* Goal 2: Expose a common interface to external data, unify the way DMatrix objects are constructed and simplify the process of adding new external data sources. This work is essential for ingesting cuPy arrays.\n* Goal 3: Handle missing values consistently.\n* RFC: #4354, Roadmap: #5143\n* This work is also relevant to external memory support on GPUs.\n\n### Breaking: XGBoost Python package now requires Python 3.5 or newer (#5021, #5274)\n* Python 3.4 has reached its end-of-life on March 16, 2019, so we now require Python 3.5 or newer.\n\n### Breaking: GPU algorithm now requires CUDA 9.0 and higher (#4527, #4580)\n\n### Breaking: `n_gpus` parameter removed; multi-GPU training now requires a distributed framework (#4579, #4749, #4773, #4810, #4867, #4908)\n* #4531 proposed removing support for single-process multi-GPU training. Contributors would focus on multi-GPU support through distributed frameworks such as Dask and Spark, where the framework would be expected to assign a worker process for each GPU independently. By delegating GPU management and data movement to the distributed framework, we can greatly simplify the core XGBoost codebase, make multi-GPU training more robust, and reduce burden for future development.\n\n### Breaking: Some deprecated features have been removed\n* ``gpu_exact`` training method (#4527, #4742, #4777). Use ``gpu_hist`` instead.\n* ``learning_rates`` parameter in Python (#5155). Use the callback API instead.\n* ``num_roots`` (#5059, #5165), since the current training code always uses a single root node.\n* GPU-specific objectives (#4690), such as `gpu:reg:linear`. Use objectives without `gpu:` prefix; GPU will be used automatically if your machine has one.\n\n### Breaking: the C API function `XGBoosterPredict()` now asks for an extra parameter `training`.\n\n### Breaking: We now use CMake exclusively to build XGBoost. `Makefile` is being sunset.\n* Exception: the R package uses Autotools, as the CRAN ecosystem did not yet adopt CMake widely.\n\n### Performance improvements\n* Smarter choice of histogram construction for distributed `gpu_hist` (#4519)\n* Optimizations for quantization on device (#4572)\n* Introduce caching memory allocator to avoid latency associated with GPU memory allocation (#4554, #4615)\n* Optimize the initialization stage of the CPU `hist` algorithm for sparse datasets (#4625)\n* Prevent unnecessary data copies from GPU memory to the host (#4795)\n* Improve operation efficiency for single prediction (#5016)\n* Group builder modified for incremental building, to speed up building large `DMatrix` (#5098)\n\n### Bug-fixes\n* Eliminate `FutureWarning: Series.base is deprecated` (#4337)\n* Ensure pandas DataFrame column names are treated as strings in type error message (#4481)\n* [jvm-packages] Add back `reg:linear` for scala, as it is only deprecated and not meant to be removed yet (#4490)\n* Fix library loading for Cygwin users (#4499)\n* Fix prediction from loaded pickle (#4516)\n* Enforce exclusion between `pred_interactions=True` and `pred_interactions=True` (#4522)\n* Do not return dangling reference to local `std::string` (#4543)\n* Set the appropriate device before freeing device memory (#4566)\n* Mark `SparsePageDmatrix` destructor default. (#4568)\n* Choose the appropriate tree method only when the tree method is 'auto' (#4571)\n* Fix `benchmark_tree.py` (#4593)\n* [jvm-packages] Fix silly bug in feature scoring (#4604)\n* Fix GPU predictor when the test data matrix has different number of features than the training data matrix used to train the model (#4613)\n* Fix external memory for get column batches. (#4622)\n* [R] Use built-in label when xgb.DMatrix is given to xgb.cv() (#4631)\n* Fix early stopping in the Python package (#4638)\n* Fix AUC error in distributed mode caused by imbalanced dataset (#4645, #4798)\n* [jvm-packages] Expose `setMissing` method in `XGBoostClassificationModel` / `XGBoostRegressionModel` (#4643)\n* Remove initializing stringstream reference. (#4788)\n* [R] `xgb.get.handle` now checks all class listed of `object` (#4800)\n* Do not use `gpu_predictor` unless data comes from GPU (#4836)\n* Fix data loading (#4862)\n* Workaround `isnan` across different environments. (#4883)\n* [jvm-packages] Handle Long-type parameter (#4885)\n* Don't `set_params` at the end of `set_state` (#4947). Ensure that the model does not change after pickling and unpickling multiple times.\n* C++ exceptions should not crash OpenMP loops (#4960)\n* Fix `usegpu` flag in DART. (#4984)\n* Run training with empty `DMatrix` (#4990, #5159)\n* Ensure that no two processes can use the same GPU (#4990)\n* Fix repeated split and 0 cover nodes (#5010)\n* Reset histogram hit counter between multiple data batches (#5035)\n* Fix `feature_name` crated from int64index dataframe. (#5081)\n* Don't use 0 for \"fresh leaf\" (#5084)\n* Throw error when user attempts to use multi-GPU training and XGBoost has not been compiled with NCCL (#5170)\n* Fix metric name loading (#5122)\n* Quick fix for memory leak in CPU `hist` algorithm (#5153)\n* Fix wrapping GPU ID and prevent data copying (#5160)\n* Fix signature of Span constructor (#5166)\n* Lazy initialization of device vector, so that XGBoost compiled with CUDA can run on a machine without any GPU (#5173)\n* Model loading should not change system locale (#5314)\n* Distributed training jobs would sometimes hang; revert Rabit to fix this regression (dmlc/rabit#132, #5237)\n\n### API changes\n* Add support for cross-validation using query ID (#4474)\n* Enable feature importance property for DART model (#4525)\n* Add `rmsle` metric and `reg:squaredlogerror` objective (#4541)\n* All objective and evaluation metrics are now exposed to JVM packages (#4560)\n* `dump_model()` and `get_dump()` now support exporting in GraphViz language (#4602)\n* Support metrics `ndcg-` and `map-` (#4635)\n* [jvm-packages] Allow chaining prediction (transform) in XGBoost4J-Spark (#4667)\n* [jvm-packages] Add option to bypass missing value check in the Spark layer (#4805). Only use this option if you know what you are doing.\n* [jvm-packages] Add public group getter (#4838)\n* `XGDMatrixSetGroup` C API is now deprecated (#4864). Use `XGDMatrixSetUIntInfo` instead.\n* [R] Added new `train_folds` parameter to `xgb.cv()` (#5114)\n* Ingest meta information from Pandas DataFrame, such as data weights (#5216)\n\n### Maintenance: Refactor code for legibility and maintainability\n* De-duplicate GPU parameters (#4454)\n* Simplify INI-style config reader using C++11 STL (#4478, #4521)\n* Refactor histogram building code for `gpu_hist` (#4528)\n* Overload device memory allocator, to enable instrumentation for compiling memory usage statistics (#4532)\n* Refactor out row partitioning logic from `gpu_hist` (#4554)\n* Remove an unused variable (#4588)\n* Implement tree model dump with code generator, to de-duplicate code for generating dumps in 3 different formats (#4602)\n* Remove `RowSet` class which is no longer being used (#4697)\n* Remove some unused functions as reported by cppcheck (#4743)\n* Mimic CUDA assert output in Span check (#4762)\n* [jvm-packages] Refactor `XGBoost.scala` to put all params processing in one place (#4815)\n* Add some comments for GPU row partitioner (#4832)\n* Span: use `size_t' for index_type,  add `front' and `back'. (#4935)\n* Remove dead code in `exact` algorithm (#5034, #5105)\n* Unify integer types used for row and column indices (#5034)\n* Extract feature interaction constraint from `SplitEvaluator` class. (#5034)\n* [Breaking] De-duplicate paramters and docstrings in the constructors of Scikit-Learn models (#5130)\n* Remove benchmark code from GPU tests (#5141)\n* Clean up Python 2 compatibility code. (#5161)\n* Extensible binary serialization format for `DMatrix::MetaInfo` (#5187). This will be useful for implementing censored labels for survival analysis applications.\n* Cleanup clang-tidy warnings. (#5247)\n\n### Maintenance: testing, continuous integration, build system\n* Use `yaml.safe_load` instead of `yaml.load`. (#4537)\n* Ensure GCC is at least 5.x (#4538)\n* Remove all mention of `reg:linear` from tests (#4544)\n* [jvm-packages] Upgrade to Scala 2.12 (#4574)\n* [jvm-packages] Update kryo dependency to 2.22 (#4575)\n* [CI] Specify account ID when logging into ECR Docker registry (#4584)\n* Use Sphinx 2.1+ to compile documentation (#4609)\n* Make Pandas optional for running Python unit tests (#4620)\n* Fix spark tests on machines with many cores (#4634)\n* [jvm-packages] Update local dev build process (#4640)\n* Add optional dependencies to setup.py (#4655)\n* [jvm-packages] Fix maven warnings (#4664)\n* Remove extraneous files from the R package, to comply with CRAN policy (#4699)\n* Remove VC-2013 support, since it is not C++11 compliant (#4701)\n* [CI] Fix broken installation of Pandas (#4704, #4722)\n* [jvm-packages] Clean up temporary files afer running tests (#4706)\n* Specify version macro in CMake. (#4730)\n* Include dmlc-tracker into XGBoost Python package (#4731)\n* [CI] Use long key ID for Ubuntu repository fingerprints. (#4783)\n* Remove plugin, CUDA related code in automake & autoconf files (#4789)\n* Skip related tests when scikit-learn is not installed. (#4791)\n* Ignore vscode and clion files (#4866)\n* Use bundled Google Test by default (#4900)\n* [CI] Raise timeout threshold in Jenkins (#4938)\n* Copy CMake parameter from dmlc-core. (#4948)\n* Set correct file permission. (#4964)\n* [CI] Update lint configuration to support latest pylint convention (#4971)\n* [CI] Upload nightly builds to S3 (#4976, #4979)\n* Add asan.so.5 to cmake script. (#4999)\n* [CI] Fix Travis tests. (#5062)\n* [CI] Locate vcomp140.dll from System32 directory (#5078)\n* Implement training observer to dump internal states of objects (#5088). This will be useful for debugging.\n* Fix visual studio output library directories (#5119)\n* [jvm-packages] Comply with scala style convention + fix broken unit test (#5134)\n* [CI] Repair download URL for Maven 3.6.1 (#5139)\n* Don't use modernize-use-trailing-return-type in clang-tidy. (#5169)\n* Explicitly use UTF-8 codepage when using MSVC (#5197)\n* Add CMake option to run Undefined Behavior Sanitizer (UBSan) (#5211)\n* Make some GPU tests deterministic (#5229)\n* [R] Robust endian detection in CRAN xgboost build (#5232)\n* Support FreeBSD (#5233)\n* Make `pip install xgboost*.tar.gz` work by fixing build-python.sh (#5241)\n* Fix compilation error due to 64-bit integer narrowing to `size_t` (#5250)\n* Remove use of `std::cout` from R package, to comply with CRAN policy (#5261)\n* Update DMLC-Core submodule (#4674, #4688, #4726, #4924)\n* Update Rabit submodule (#4560, #4667, #4718, #4808, #4966, #5237)\n\n### Usability Improvements, Documentation\n* Add Random Forest API to Python API doc (#4500)\n* Fix Python demo and doc. (#4545)\n* Remove doc about not supporting CUDA 10.1 (#4578)\n* Address some sphinx warnings and errors, add doc for building doc. (#4589)\n* Add instruction to run formatting checks locally (#4591)\n* Fix docstring for `XGBModel.predict()` (#4592)\n* Doc and demo for customized metric and objective (#4598, #4608)\n* Add to documentation how to run tests locally (#4610)\n* Empty evaluation list in early stopping should produce meaningful error message (#4633)\n* Fixed year to 2019 in conf.py, helpers.h and LICENSE (#4661)\n* Minor updates to links and grammar (#4673)\n* Remove `silent` in doc (#4689)\n* Remove old Python trouble shooting doc (#4729)\n* Add `os.PathLike` support for file paths to DMatrix and Booster Python classes (#4757)\n* Update XGBoost4J-Spark doc (#4804)\n* Regular formatting for evaluation metrics (#4803)\n* [jvm-packages] Refine documentation for handling missing values in XGBoost4J-Spark (#4805)\n* Monitor for distributed environment (#4829). This is useful for identifying performance bottleneck.\n* Add check for length of weights and produce a good error message (#4872)\n* Fix DMatrix doc (#4884)\n* Export C++ headers in CMake installation (#4897)\n* Update license year in README.md to 2019 (#4940)\n* Fix incorrectly displayed Note in the doc (#4943)\n* Follow PEP 257 Docstring Conventions (#4959)\n* Document minimum version required for Google Test (#5001)\n* Add better error message for invalid feature names (#5024)\n* Some guidelines on device memory usage (#5038)\n* [doc] Some notes for external memory. (#5065)\n* Update document for `tree_method` (#5106)\n* Update demo for ranking. (#5154)\n* Add new lines for Spark XGBoost missing values section (#5180)\n* Fix simple typo: utilty -> utility (#5182)\n* Update R doc by roxygen2 (#5201)\n* [R] Direct user to use `set.seed()` instead of setting `seed` parameter (#5125)\n* Add Optuna badge to `README.md` (#5208)\n* Fix compilation error in `c-api-demo.c` (#5215)\n\n### Acknowledgement\n**Contributors**: Nan Zhu (@CodingCat), Crissman Loomis (@Crissman), Cyprien Ricque (@Cyprien-Ricque), Evan Kepner (@EvanKepner), K.O. (@Hi-king), KaiJin Ji (@KerryJi), Peter Badida (@KeyWeeUsr), Kodi Arfer (@Kodiologist), Rory Mitchell (@RAMitchell), Egor Smirnov (@SmirnovEgorRu), Jacob Kim (@TheJacobKim), Vibhu Jawa (@VibhuJawa), Marcos (@astrowonk), Andy Adinets (@canonizer), Chen Qin (@chenqin), Christopher Cowden (@cowden), @cpfarrell, @david-cortes, Liangcai Li (@firestarman), @fuhaoda, Philip Hyunsu Cho (@hcho3), @here-nagini, Tong He (@hetong007), Michal Kurka (@michalkurka), Honza Sterba (@honzasterba), @iblumin, @koertkuipers, mattn (@mattn), Mingjie Tang (@merlintang), OrdoAbChao (@mglowacki100), Matthew Jones (@mt-jones), mitama (@nigimitama), Nathan Moore (@nmoorenz), Daniel Stahl (@phillyfan1138), Michaël Benesty (@pommedeterresautee), Rong Ou (@rongou), Sebastian (@sfahnens), Xu Xiao (@sperlingxx), @sriramch, Sean Owen (@srowen), Stephanie Yang (@stpyang), Yuan Tang (@terrytangyuan), Mathew Wicks (@thesuperzapper), Tim Gates (@timgates42), TinkleG (@tinkle1129), Oleksandr Pryimak (@trams), Jiaming Yuan (@trivialfis), Matvey Turkov (@turk0v), Bobby Wang (@wbo4958), yage (@yage99), @yellowdolphin\n\n**Reviewers**: Nan Zhu (@CodingCat), Crissman Loomis (@Crissman), Cyprien Ricque (@Cyprien-Ricque), Evan Kepner (@EvanKepner), John Zedlewski (@JohnZed), KOLANICH (@KOLANICH), KaiJin Ji (@KerryJi), Kodi Arfer (@Kodiologist), Rory Mitchell (@RAMitchell), Egor Smirnov (@SmirnovEgorRu), Nikita Titov (@StrikerRUS), Jacob Kim (@TheJacobKim), Vibhu Jawa (@VibhuJawa), Andrew Kane (@ankane), Arno Candel (@arnocandel), Marcos (@astrowonk), Bryan Woods (@bryan-woods), Andy Adinets (@canonizer), Chen Qin (@chenqin), Thomas Franke (@coding-komek), Peter  (@codingforfun), @cpfarrell, Joshua Patterson (@datametrician), @fuhaoda, Philip Hyunsu Cho (@hcho3), Tong He (@hetong007), Honza Sterba (@honzasterba), @iblumin, @jakirkham, Vadim Khotilovich (@khotilov), Keith Kraus (@kkraus14), @koertkuipers, @melonki, Mingjie Tang (@merlintang), OrdoAbChao (@mglowacki100), Daniel Mahler (@mhlr), Matthew Rocklin (@mrocklin), Matthew Jones (@mt-jones), Michaël Benesty (@pommedeterresautee), PSEUDOTENSOR / Jonathan McKinney (@pseudotensor), Rong Ou (@rongou), Vladimir (@sh1ng), Scott Lundberg (@slundberg), Xu Xiao (@sperlingxx), @sriramch, Pasha Stetsenko (@st-pasha), Stephanie Yang (@stpyang), Yuan Tang (@terrytangyuan), Mathew Wicks (@thesuperzapper), Theodore Vasiloudis (@thvasilo), TinkleG (@tinkle1129), Oleksandr Pryimak (@trams), Jiaming Yuan (@trivialfis), Bobby Wang (@wbo4958), yage (@yage99), @yellowdolphin, Yin Lou (@yinlou)\n\n## v0.90 (2019.05.18)\n\n### XGBoost Python package drops Python 2.x (#4379, #4381)\nPython 2.x is reaching its end-of-life at the end of this year. [Many scientific Python packages are now moving to drop Python 2.x](https://python3statement.github.io/).\n\n### XGBoost4J-Spark now requires Spark 2.4.x (#4377)\n* Spark 2.3 is reaching its end-of-life soon. See discussion at #4389.\n* **Consistent handling of missing values** (#4309, #4349, #4411): Many users had reported issue with inconsistent predictions between XGBoost4J-Spark and the Python XGBoost package. The issue was caused by Spark mis-handling non-zero missing values (NaN, -1, 999 etc). We now alert the user whenever Spark doesn't handle missing values correctly (#4309, #4349). See [the tutorial for dealing with missing values in XGBoost4J-Spark](https://xgboost.readthedocs.io/en/release_0.90/jvm/xgboost4j_spark_tutorial.html#dealing-with-missing-values). This fix also depends on the availability of Spark 2.4.x.\n\n### Roadmap: better performance scaling for multi-core CPUs (#4310)\n* Poor performance scaling of the `hist` algorithm for multi-core CPUs has been under investigation (#3810). #4310 optimizes quantile sketches and other pre-processing tasks. Special thanks to @SmirnovEgorRu.\n\n### Roadmap: Harden distributed training (#4250)\n* Make distributed training in XGBoost more robust by hardening [Rabit](https://github.com/dmlc/rabit), which implements [the AllReduce primitive](https://en.wikipedia.org/wiki/Reduce_%28parallel_pattern%29). In particular, improve test coverage on mechanisms for fault tolerance and recovery. Special thanks to @chenqin.\n\n### New feature: Multi-class metric functions for GPUs (#4368)\n* Metrics for multi-class classification have been ported to GPU: `merror`, `mlogloss`. Special thanks to @trivialfis.\n* With supported metrics, XGBoost will select the correct devices based on your system and `n_gpus` parameter.\n\n### New feature: Scikit-learn-like random forest API (#4148, #4255, #4258)\n* XGBoost Python package now offers `XGBRFClassifier` and `XGBRFRegressor` API to train random forests. See [the tutorial](https://xgboost.readthedocs.io/en/release_0.90/tutorials/rf.html). Special thanks to @canonizer\n\n### New feature: use external memory in GPU predictor (#4284, #4396, #4438, #4457)\n* It is now possible to make predictions on GPU when the input is read from external memory. This is useful when you want to make predictions with big dataset that does not fit into the GPU memory. Special thanks to @rongou, @canonizer, @sriramch.\n\n  ```python\n  dtest = xgboost.DMatrix('test_data.libsvm#dtest.cache')\n  bst.set_param('predictor', 'gpu_predictor')\n  bst.predict(dtest)\n  ```\n\n* Coming soon: GPU training (`gpu_hist`) with external memory\n\n### New feature: XGBoost can now handle comments in LIBSVM files (#4430)\n* Special thanks to @trivialfis and @hcho3\n\n### New feature: Embed XGBoost in your C/C++ applications using CMake (#4323, #4333, #4453)\n* It is now easier than ever to embed XGBoost in your C/C++ applications. In your CMakeLists.txt, add `xgboost::xgboost` as a linked library:\n\n  ```cmake\n  find_package(xgboost REQUIRED)\n  add_executable(api-demo c-api-demo.c)\n  target_link_libraries(api-demo xgboost::xgboost)\n  ```\n\n  [XGBoost C API documentation is available.](https://xgboost.readthedocs.io/en/release_0.90/dev) Special thanks to @trivialfis\n\n### Performance improvements\n* Use feature interaction constraints to narrow split search space (#4341, #4428)\n* Additional optimizations for `gpu_hist` (#4248, #4283)\n* Reduce OpenMP thread launches in `gpu_hist` (#4343)\n* Additional optimizations for multi-node multi-GPU random forests. (#4238)\n* Allocate unique prediction buffer for each input matrix, to avoid re-sizing GPU array (#4275)\n* Remove various synchronisations from CUDA API calls (#4205)\n* XGBoost4J-Spark\n  - Allow the user to control whether to cache partitioned training data, to potentially reduce execution time (#4268)\n\n### Bug-fixes\n* Fix node reuse in `hist` (#4404)\n* Fix GPU histogram allocation (#4347)\n* Fix matrix attributes not sliced (#4311)\n* Revise AUC and AUCPR metrics now work with weighted ranking task (#4216, #4436)\n* Fix timer invocation for InitDataOnce() in `gpu_hist` (#4206)\n* Fix R-devel errors (#4251)\n* Make gradient update in GPU linear updater thread-safe (#4259)\n* Prevent out-of-range access in column matrix (#4231)\n* Don't store DMatrix handle in Python object until it's initialized, to improve exception safety (#4317)\n* XGBoost4J-Spark\n  - Fix non-deterministic order within a zipped partition on prediction (#4388)\n  - Remove race condition on tracker shutdown (#4224)\n  - Allow set the parameter `maxLeaves`. (#4226)\n  - Allow partial evaluation of dataframe before prediction (#4407)\n  - Automatically set `maximize_evaluation_metrics` if not explicitly given (#4446)\n\n### API changes\n* Deprecate `reg:linear` in favor of `reg:squarederror`. (#4267, #4427)\n* Add attribute getter and setter to the Booster object in XGBoost4J (#4336)\n\n### Maintenance: Refactor C++ code for legibility and maintainability\n* Fix clang-tidy warnings. (#4149)\n* Remove deprecated C APIs. (#4266)\n* Use Monitor class to time functions in `hist`. (#4273)\n* Retire DVec class in favour of c++20 style span for device memory. (#4293)\n* Improve HostDeviceVector exception safety (#4301)\n\n### Maintenance: testing, continuous integration, build system\n* **Major refactor of CMakeLists.txt** (#4323, #4333, #4453): adopt modern CMake and export XGBoost as a target\n* **Major improvement in Jenkins CI pipeline** (#4234)\n  - Migrate all Linux tests to Jenkins (#4401)\n  - Builds and tests are now de-coupled, to test an artifact against multiple versions of CUDA, JDK, and other dependencies (#4401)\n  - Add Windows GPU to Jenkins CI pipeline (#4463, #4469)\n* Support CUDA 10.1 (#4223, #4232, #4265, #4468)\n* Python wheels are now built with CUDA 9.0, so that JIT is not required on Volta architecture (#4459)\n* Integrate with NVTX CUDA profiler (#4205)\n* Add a test for cpu predictor using external memory (#4308)\n* Refactor tests to get rid of duplication (#4358)\n* Remove test dependency on `craigcitro/r-travis`, since it's deprecated (#4353)\n* Add files from local R build to `.gitignore` (#4346)\n* Make XGBoost4J compatible with Java 9+ by revising NativeLibLoader (#4351)\n* Jenkins build for CUDA 10.0 (#4281)\n* Remove remaining `silent` and `debug_verbose` in Python tests (#4299)\n* Use all cores to build XGBoost4J lib on linux (#4304)\n* Upgrade Jenkins Linux build environment to GCC 5.3.1, CMake 3.6.0 (#4306)\n* Make CMakeLists.txt compatible with CMake 3.3 (#4420)\n* Add OpenMP option in CMakeLists.txt (#4339)\n* Get rid of a few trivial compiler warnings (#4312)\n* Add external Docker build cache, to speed up builds on Jenkins CI (#4331, #4334, #4458)\n* Fix Windows tests (#4403)\n* Fix a broken python test (#4395)\n* Use a fixed seed to split data in XGBoost4J-Spark tests, for reproducibility (#4417)\n* Add additional Python tests to test training under constraints (#4426)\n* Enable building with shared NCCL. (#4447)\n\n### Usability Improvements, Documentation\n* Document limitation of one-split-at-a-time Greedy tree learning heuristic (#4233)\n* Update build doc: PyPI wheel now support multi-GPU (#4219)\n* Fix docs for `num_parallel_tree` (#4221)\n* Fix document about `colsample_by*` parameter (#4340)\n* Make the train and test input with same colnames. (#4329)\n* Update R contribute link. (#4236)\n* Fix travis R tests (#4277)\n* Log version number in crash log in XGBoost4J-Spark (#4271, #4303)\n* Allow supression of Rabit output in Booster::train in XGBoost4J (#4262)\n* Add tutorial on handling missing values in XGBoost4J-Spark (#4425)\n* Fix typos (#4345, #4393, #4432, #4435)\n* Added language classifier in setup.py (#4327)\n* Added Travis CI badge (#4344)\n* Add BentoML to use case section (#4400)\n* Remove subtly sexist remark (#4418)\n* Add R vignette about parsing JSON dumps (#4439)\n\n### Acknowledgement\n**Contributors**: Nan Zhu (@CodingCat), Adam Pocock (@Craigacp), Daniel Hen (@Daniel8hen), Jiaxiang Li (@JiaxiangBU), Rory Mitchell (@RAMitchell), Egor Smirnov (@SmirnovEgorRu), Andy Adinets (@canonizer), Jonas (@elcombato), Harry Braviner (@harrybraviner), Philip Hyunsu Cho (@hcho3), Tong He (@hetong007), James Lamb (@jameslamb), Jean-Francois Zinque (@jeffzi), Yang Yang (@jokerkeny), Mayank Suman (@mayanksuman), jess (@monkeywithacupcake), Hajime Morrita (@omo), Ravi Kalia (@project-delphi), @ras44, Rong Ou (@rongou), Shaochen Shi (@shishaochen), Xu Xiao (@sperlingxx), @sriramch, Jiaming Yuan (@trivialfis), Christopher Suchanek (@wsuchy), Bozhao (@yubozhao)\n\n**Reviewers**: Nan Zhu (@CodingCat), Adam Pocock (@Craigacp), Daniel Hen (@Daniel8hen), Jiaxiang Li (@JiaxiangBU), Laurae (@Laurae2), Rory Mitchell (@RAMitchell), Egor Smirnov (@SmirnovEgorRu), @alois-bissuel, Andy Adinets (@canonizer), Chen Qin (@chenqin), Harry Braviner (@harrybraviner), Philip Hyunsu Cho (@hcho3), Tong He (@hetong007), @jakirkham, James Lamb (@jameslamb), Julien Schueller (@jschueller), Mayank Suman (@mayanksuman), Hajime Morrita (@omo), Rong Ou (@rongou), Sara Robinson (@sararob), Shaochen Shi (@shishaochen), Xu Xiao (@sperlingxx), @sriramch, Sean Owen (@srowen), Sergei Lebedev (@superbobry), Yuan (Terry) Tang (@terrytangyuan), Theodore Vasiloudis (@thvasilo), Matthew Tovbin (@tovbinm), Jiaming Yuan (@trivialfis), Xin Yin (@xydrolase)\n\n## v0.82 (2019.03.03)\nThis release is packed with many new features and bug fixes.\n\n### Roadmap: better performance scaling for multi-core CPUs (#3957)\n* Poor performance scaling of the `hist` algorithm for multi-core CPUs has been under investigation (#3810). #3957 marks an important step toward better performance scaling, by using software pre-fetching and replacing STL vectors with C-style arrays. Special thanks to @Laurae2 and @SmirnovEgorRu.\n* See #3810 for latest progress on this roadmap.\n\n### New feature: Distributed Fast Histogram Algorithm (`hist`) (#4011, #4102, #4140, #4128)\n* It is now possible to run the `hist` algorithm in distributed setting. Special thanks to @CodingCat. The benefits include:\n  1. Faster local computation via feature binning\n  2. Support for monotonic constraints and feature interaction constraints\n  3. Simpler codebase than `approx`, allowing for future improvement\n* Depth-wise tree growing is now performed in a separate code path, so that cross-node syncronization is performed only once per level.\n\n### New feature: Multi-Node, Multi-GPU training (#4095)\n* Distributed training is now able to utilize clusters equipped with NVIDIA GPUs. In particular, the rabit AllReduce layer will communicate GPU device information. Special thanks to @mt-jones, @RAMitchell, @rongou, @trivialfis, @canonizer, and @jeffdk.\n* Resource management systems will be able to assign a rank for each GPU in the cluster.\n* In Dask, users will be able to construct a collection of XGBoost processes over an inhomogeneous device cluster (i.e. workers with different number and/or kinds of GPUs).\n\n### New feature: Multiple validation datasets in XGBoost4J-Spark (#3904, #3910)\n* You can now track the performance of the model during training with multiple evaluation datasets. By specifying `eval_sets` or call `setEvalSets` over a `XGBoostClassifier` or `XGBoostRegressor`, you can pass in multiple evaluation datasets typed as a `Map` from `String` to `DataFrame`. Special thanks to @CodingCat.\n* See the usage of multiple validation datasets [here](https://github.com/dmlc/xgboost/blob/0c1d5f1120c0a159f2567b267f0ec4ffadee00d0/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkTraining.scala#L66-L78)\n\n### New feature: Additional metric functions for GPUs (#3952)\n* Element-wise metrics have been ported to GPU: `rmse`, `mae`, `logloss`, `poisson-nloglik`, `gamma-deviance`, `gamma-nloglik`, `error`, `tweedie-nloglik`. Special thanks to @trivialfis and @RAMitchell.\n* With supported metrics, XGBoost will select the correct devices based on your system and `n_gpus` parameter.\n\n### New feature: Column sampling at individual nodes (splits) (#3971)\n* Columns (features) can now be sampled at individual tree nodes, in addition to per-tree and per-level sampling. To enable per-node sampling, set `colsample_bynode` parameter, which represents the fraction of columns sampled at each node. This parameter is set to 1.0 by default (i.e. no sampling per node). Special thanks to @canonizer.\n* The `colsample_bynode` parameter works cumulatively with other `colsample_by*` parameters: for example, `{'colsample_bynode':0.5, 'colsample_bytree':0.5}` with 100 columns will give 25 features to choose from at each split.\n\n### Major API change: consistent logging level via `verbosity` (#3982, #4002, #4138)\n* XGBoost now allows fine-grained control over logging. You can set `verbosity` to 0 (silent), 1 (warning), 2 (info), and 3 (debug). This is useful for controlling the amount of logging outputs. Special thanks to @trivialfis.\n* Parameters `silent` and `debug_verbose` are now deprecated.\n* Note: Sometimes XGBoost tries to change configurations based on heuristics, which is displayed as warning message.  If there's unexpected behaviour, please try to increase value of verbosity.\n\n### Major bug fix: external memory (#4040, #4193)\n* Clarify object ownership in multi-threaded prefetcher, to avoid memory error.\n* Correctly merge two column batches (which uses [CSC layout](https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_column_(CSC_or_CCS))).\n* Add unit tests for external memory.\n* Special thanks to @trivialfis and @hcho3.\n\n### Major bug fix: early stopping fixed in XGBoost4J and XGBoost4J-Spark (#3928, #4176)\n* Early stopping in XGBoost4J and XGBoost4J-Spark is now consistent with its counterpart in the Python package. Training stops if the current iteration is `earlyStoppingSteps` away from the best iteration. If there are multiple evaluation sets, only the last one is used to determinate early stop.\n* See the updated documentation [here](https://xgboost.readthedocs.io/en/release_0.82/jvm/xgboost4j_spark_tutorial.html#early-stopping)\n* Special thanks to @CodingCat, @yanboliang, and @mingyang.\n\n### Major bug fix: infrequent features should not crash distributed training (#4045)\n* For infrequently occuring features, some partitions may not get any instance. This scenario used to crash distributed training due to mal-formed ranges. The problem has now been fixed.\n* In practice, one-hot-encoded categorical variables tend to produce rare features, particularly when the cardinality is high.\n* Special thanks to @CodingCat.\n\n### Performance improvements\n* Faster, more space-efficient radix sorting in `gpu_hist` (#3895)\n* Subtraction trick in histogram calculation in `gpu_hist` (#3945)\n* More performant re-partition in XGBoost4J-Spark (#4049)\n\n### Bug-fixes\n* Fix semantics of `gpu_id` when running multiple XGBoost processes on a multi-GPU machine (#3851)\n* Fix page storage path for external memory on Windows (#3869)\n* Fix configuration setup so that DART utilizes GPU (#4024)\n* Eliminate NAN values from SHAP prediction (#3943)\n* Prevent empty quantile sketches in `hist` (#4155)\n* Enable running objectives with 0 GPU (#3878)\n* Parameters are no longer dependent on system locale (#3891, #3907)\n* Use consistent data type in the GPU coordinate descent code (#3917)\n* Remove undefined behavior in the CLI config parser on the ARM platform (#3976)\n* Initialize counters in GPU AllReduce (#3987)\n* Prevent deadlocks in GPU AllReduce (#4113)\n* Load correct values from sliced NumPy arrays (#4147, #4165)\n* Fix incorrect GPU device selection (#4161)\n* Make feature binning logic in `hist` aware of query groups when running a ranking task (#4115). For ranking task, query groups are weighted, not individual instances.\n* Generate correct C++ exception type for `LOG(FATAL)` macro (#4159)\n* Python package\n  - Python package should run on system without `PATH` environment variable (#3845)\n  - Fix `coef_` and `intercept_` signature to be compatible with `sklearn.RFECV` (#3873)\n  - Use UTF-8 encoding in Python package README, to support non-English locale (#3867)\n  - Add AUC-PR to list of metrics to maximize for early stopping (#3936)\n  - Allow loading pickles without `self.booster` attribute, for backward compatibility (#3938, #3944)\n  - White-list DART for feature importances (#4073)\n  - Update usage of [h2oai/datatable](https://github.com/h2oai/datatable) (#4123)\n* XGBoost4J-Spark\n  - Address scalability issue in prediction (#4033)\n  - Enforce the use of per-group weights for ranking task (#4118)\n  - Fix vector size of `rawPredictionCol` in `XGBoostClassificationModel` (#3932)\n  - More robust error handling in Spark tracker (#4046, #4108)\n  - Fix return type of `setEvalSets` (#4105)\n  - Return correct value of `getMaxLeaves` (#4114)\n\n### API changes\n* Add experimental parameter `single_precision_histogram` to use single-precision histograms for the `gpu_hist` algorithm (#3965)\n* Python package\n  - Add option to select type of feature importances in the scikit-learn inferface (#3876)\n  - Add `trees_to_df()` method to dump decision trees as Pandas data frame (#4153)\n  - Add options to control node shapes in the GraphViz plotting function (#3859)\n  - Add `xgb_model` option to `XGBClassifier`, to load previously saved model (#4092)\n  - Passing lists into `DMatrix` is now deprecated (#3970)\n* XGBoost4J\n  - Support multiple feature importance features (#3801)\n\n### Maintenance: Refactor C++ code for legibility and maintainability\n* Refactor `hist` algorithm code and add unit tests (#3836)\n* Minor refactoring of split evaluator in `gpu_hist` (#3889)\n* Removed unused leaf vector field in the tree model (#3989)\n* Simplify the tree representation by combining `TreeModel` and `RegTree` classes (#3995)\n* Simplify and harden tree expansion code (#4008, #4015)\n* De-duplicate parameter classes in the linear model algorithms (#4013)\n* Robust handling of ranges with C++20 span in `gpu_exact` and `gpu_coord_descent` (#4020, #4029)\n* Simplify tree training code (#3825). Also use Span class for robust handling of ranges.\n\n### Maintenance: testing, continuous integration, build system\n* Disallow `std::regex` since it's not supported by GCC 4.8.x (#3870)\n* Add multi-GPU tests for coordinate descent algorithm for linear models (#3893, #3974)\n* Enforce naming style in Python lint (#3896)\n* Refactor Python tests (#3897, #3901): Use pytest exclusively, display full trace upon failure\n* Address `DeprecationWarning` when using Python collections (#3909)\n* Use correct group for maven site plugin (#3937)\n* Jenkins CI is now using on-demand EC2 instances exclusively, due to unreliability of Spot instances (#3948)\n* Better GPU performance logging (#3945)\n* Fix GPU tests on machines with only 1 GPU (#4053)\n* Eliminate CRAN check warnings and notes (#3988)\n* Add unit tests for tree serialization (#3989)\n* Add unit tests for tree fitting functions in `hist` (#4155)\n* Add a unit test for `gpu_exact` algorithm (#4020)\n* Correct JVM CMake GPU flag (#4071)\n* Fix failing Travis CI on Mac (#4086)\n* Speed up Jenkins by not compiling CMake (#4099)\n* Analyze C++ and CUDA code using clang-tidy, as part of Jenkins CI pipeline (#4034)\n* Fix broken R test: Install Homebrew GCC (#4142)\n* Check for empty datasets in GPU unit tests (#4151)\n* Fix Windows compilation (#4139)\n* Comply with latest convention of cpplint (#4157)\n* Fix a unit test in `gpu_hist` (#4158)\n* Speed up data generation in Python tests (#4164)\n\n### Usability Improvements\n* Add link to [InfoWorld 2019 Technology of the Year Award](https://www.infoworld.com/article/3336072/application-development/infoworlds-2019-technology-of-the-year-award-winners.html) (#4116)\n* Remove outdated AWS YARN tutorial (#3885)\n* Document current limitation in number of features (#3886)\n* Remove unnecessary warning when `gblinear` is selected (#3888)\n* Document limitation of CSV parser: header not supported (#3934)\n* Log training parameters in XGBoost4J-Spark (#4091)\n* Clarify early stopping behavior in the scikit-learn interface (#3967)\n* Clarify behavior of `max_depth` parameter (#4078)\n* Revise Python docstrings for ranking task (#4121). In particular, weights must be per-group in learning-to-rank setting.\n* Document parameter `num_parallel_tree` (#4022)\n* Add Jenkins status badge (#4090)\n* Warn users against using internal functions of `Booster` object (#4066)\n* Reformat `benchmark_tree.py` to comply with Python style convention (#4126)\n* Clarify a comment in `objectiveTrait` (#4174)\n* Fix typos and broken links in documentation (#3890, #3872, #3902, #3919, #3975, #4027, #4156, #4167)\n\n### Acknowledgement\n**Contributors** (in no particular order): Jiaming Yuan (@trivialfis), Hyunsu Cho (@hcho3), Nan Zhu (@CodingCat), Rory Mitchell (@RAMitchell), Yanbo Liang (@yanboliang), Andy Adinets (@canonizer), Tong He (@hetong007), Yuan Tang (@terrytangyuan)\n\n**First-time Contributors** (in no particular order): Jelle Zijlstra (@JelleZijlstra), Jiacheng Xu (@jiachengxu), @ajing, Kashif Rasul (@kashif), @theycallhimavi, Joey Gao (@pjgao), Prabakaran Kumaresshan (@nixphix), Huafeng Wang (@huafengw), @lyxthe, Sam Wilkinson (@scwilkinson), Tatsuhito Kato (@stabacov), Shayak Banerjee (@shayakbanerjee), Kodi Arfer (@Kodiologist), @KyleLi1985, Egor Smirnov (@SmirnovEgorRu), @tmitanitky, Pasha Stetsenko (@st-pasha), Kenichi Nagahara (@keni-chi), Abhai Kollara Dilip (@abhaikollara), Patrick Ford (@pford221), @hshujuan, Matthew Jones (@mt-jones), Thejaswi Rao (@teju85), Adam November (@anovember)\n\n**First-time Reviewers** (in no particular order): Mingyang Hu (@mingyang), Theodore Vasiloudis (@thvasilo), Jakub Troszok (@troszok), Rong Ou (@rongou), @Denisevi4, Matthew Jones (@mt-jones), Jeff Kaplan (@jeffdk)\n\n## v0.81 (2018.11.04)\n### New feature: feature interaction constraints\n* Users are now able to control which features (independent variables) are allowed to interact by specifying feature interaction constraints (#3466).\n* [Tutorial](https://xgboost.readthedocs.io/en/release_0.81/tutorials/feature_interaction_constraint.html) is available, as well as [R](https://github.com/dmlc/xgboost/blob/9254c58e4dfff6a59dc0829a2ceb02e45ed17cd0/R-package/demo/interaction_constraints.R) and [Python](https://github.com/dmlc/xgboost/blob/9254c58e4dfff6a59dc0829a2ceb02e45ed17cd0/tests/python/test_interaction_constraints.py) examples.\n\n### New feature: learning to rank using scikit-learn interface\n* Learning to rank task is now available for the scikit-learn interface of the Python package (#3560, #3848). It is now possible to integrate the XGBoost ranking model into the scikit-learn learning pipeline.\n* Examples of using `XGBRanker` class is found at [demo/rank/rank_sklearn.py](https://github.com/dmlc/xgboost/blob/24a268a2e3cb17302db3d72da8f04016b7d352d9/demo/rank/rank_sklearn.py).\n\n### New feature: R interface for SHAP interactions\n* SHAP (SHapley Additive exPlanations) is a unified approach to explain the output of any machine learning model. Previously, this feature was only available from the Python package; now it is available from the R package as well (#3636).\n\n### New feature: GPU predictor now use multiple GPUs to predict\n* GPU predictor is now able to utilize multiple GPUs at once to accelerate prediction (#3738)\n\n### New feature: Scale distributed XGBoost to large-scale clusters\n* Fix OS file descriptor limit assertion error on large cluster (#3835, dmlc/rabit#73) by replacing `select()` based AllReduce/Broadcast with `poll()` based implementation.\n* Mitigate tracker \"thundering herd\" issue on large cluster. Add exponential backoff retry when workers connect to tracker.\n* With this change, we were able to scale to 1.5k executors on a 12 billion row dataset after some tweaks here and there.\n\n### New feature: Additional objective functions for GPUs\n* New objective functions ported to GPU: `hinge`, `multi:softmax`, `multi:softprob`, `count:poisson`, `reg:gamma`, `\"reg:tweedie`.\n* With supported objectives, XGBoost will select the correct devices based on your system and `n_gpus` parameter.\n\n### Major bug fix: learning to rank with XGBoost4J-Spark\n* Previously, `repartitionForData` would shuffle data and lose ordering necessary for ranking task.\n* To fix this issue, data points within each RDD partition is explicitly group by their group (query session) IDs (#3654). Also handle empty RDD partition carefully (#3750).\n\n### Major bug fix: early stopping fixed in XGBoost4J-Spark\n* Earlier implementation of early stopping had incorrect semantics and didn't let users to specify direction for optimizing (maximize / minimize)\n* A parameter `maximize_evaluation_metrics` is defined so as to tell whether a metric should be maximized or minimized as part of early stopping criteria (#3808). Also early stopping now has correct semantics.\n\n### API changes\n* Column sampling by level (`colsample_bylevel`) is now functional for `hist` algorithm (#3635, #3862)\n* GPU tag `gpu:` for regression objectives are now deprecated. XGBoost will select the correct devices automatically (#3643)\n* Add `disable_default_eval_metric` parameter to disable default metric (#3606)\n* Experimental AVX support for gradient computation is removed (#3752)\n* XGBoost4J-Spark\n  - Add `rank:ndcg` and `rank:map` to supported objectives (#3697)\n* Python package\n  - Add `callbacks` argument to `fit()` function of sciki-learn API (#3682)\n  - Add `XGBRanker` to scikit-learn interface (#3560, #3848)\n  - Add `validate_features` argument to `predict()` function of scikit-learn API (#3653)\n  - Allow scikit-learn grid search over parameters specified as keyword arguments (#3791)\n  - Add `coef_` and `intercept_` as properties of scikit-learn wrapper (#3855). Some scikit-learn functions expect these properties.\n\n### Performance improvements\n* Address very high GPU memory usage for large data (#3635)\n* Fix performance regression within `EvaluateSplits()` of `gpu_hist` algorithm. (#3680)\n\n### Bug-fixes\n* Fix a problem in GPU quantile sketch with tiny instance weights. (#3628)\n* Fix copy constructor for `HostDeviceVectorImpl` to prevent dangling pointers (#3657)\n* Fix a bug in partitioned file loading (#3673)\n* Fixed an uninitialized pointer in `gpu_hist` (#3703)\n* Reshared data among GPUs when number of GPUs is changed (#3721)\n* Add back `max_delta_step` to split evaluation (#3668)\n* Do not round up integer thresholds for integer features in JSON dump (#3717)\n* Use `dmlc::TemporaryDirectory` to handle temporaries in cross-platform way (#3783)\n* Fix accuracy problem with `gpu_hist` when `min_child_weight` and `lambda` are set to 0 (#3793)\n* Make sure that `tree_method` parameter is recognized and not silently ignored (#3849)\n* XGBoost4J-Spark\n  - Make sure `thresholds` are considered when executing `predict()` method (#3577)\n  - Avoid losing precision when computing probabilities by converting to `Double` early (#3576)\n  - `getTreeLimit()` should return `Int` (#3602)\n  - Fix checkpoint serialization on HDFS (#3614)\n  - Throw `ControlThrowable` instead of `InterruptedException` so that it is properly re-thrown (#3632)\n  - Remove extraneous output to stdout (#3665)\n  - Allow specification of task type for custom objectives and evaluations (#3646)\n  - Fix distributed updater check (#3739)\n  - Fix issue when spark job execution thread cannot return before we execute `first()` (#3758)\n* Python package\n  - Fix accessing `DMatrix.handle` before it is set (#3599)\n  - `XGBClassifier.predict()` should return margin scores when `output_margin` is set to true (#3651)\n  - Early stopping callback should maximize metric of form `NDCG@n-` (#3685)\n  - Preserve feature names when slicing `DMatrix` (#3766)\n* R package\n  - Replace `nround` with `nrounds` to match actual parameter (#3592)\n  - Amend `xgb.createFolds` to handle classes of a single element (#3630)\n  - Fix buggy random generator and make `colsample_bytree` functional (#3781)\n\n### Maintenance: testing, continuous integration, build system\n* Add sanitizers tests to Travis CI (#3557)\n* Add NumPy, Matplotlib, Graphviz as requirements for doc build (#3669)\n* Comply with CRAN submission policy (#3660, #3728)\n* Remove copy-paste error in JVM test suite (#3692)\n* Disable flaky tests in `R-package/tests/testthat/test_update.R` (#3723)\n* Make Python tests compatible with scikit-learn 0.20 release (#3731)\n* Separate out restricted and unrestricted tasks, so that pull requests don't build downloadable artifacts (#3736)\n* Add multi-GPU unit test environment (#3741)\n* Allow plug-ins to be built by CMake (#3752)\n* Test wheel compatibility on CPU containers for pull requests (#3762)\n* Fix broken doc build due to Matplotlib 3.0 release (#3764)\n* Produce `xgboost.so` for XGBoost-R on Mac OSX, so that `make install` works (#3767)\n* Retry Jenkins CI tests up to 3 times to improve reliability (#3769, #3769, #3775, #3776, #3777)\n* Add basic unit tests for `gpu_hist` algorithm (#3785)\n* Fix Python environment for distributed unit tests (#3806)\n* Test wheels on CUDA 10.0 container for compatibility (#3838)\n* Fix JVM doc build (#3853)\n\n### Maintenance: Refactor C++ code for legibility and maintainability\n* Merge generic device helper functions into `GPUSet` class (#3626)\n* Re-factor column sampling logic into `ColumnSampler` class (#3635, #3637)\n* Replace `std::vector` with `HostDeviceVector` in `MetaInfo` and `SparsePage` (#3446)\n* Simplify `DMatrix` class (#3395)\n* De-duplicate CPU/GPU code using `Transform` class (#3643, #3751)\n* Remove obsoleted `QuantileHistMaker` class (#3761)\n* Remove obsoleted `NoConstraint` class (#3792)\n\n### Other Features\n* C++20-compliant Span class for safe pointer indexing (#3548, #3588)\n* Add helper functions to manipulate multiple GPU devices (#3693)\n* XGBoost4J-Spark\n  - Allow specifying host ip from the `xgboost-tracker.properties file` (#3833). This comes in handy when `hosts` files doesn't correctly define localhost.\n\n### Usability Improvements\n* Add reference to GitHub repository in `pom.xml` of JVM packages (#3589)\n* Add R demo of multi-class classification (#3695)\n* Document JSON dump functionality (#3600, #3603)\n* Document CUDA requirement and lack of external memory for GPU algorithms (#3624)\n* Document LambdaMART objectives, both pairwise and listwise (#3672)\n* Document `aucpr` evaluation metric (#3687)\n* Document gblinear parameters: `feature_selector` and `top_k` (#3780)\n* Add instructions for using MinGW-built XGBoost with Python. (#3774)\n* Removed nonexistent parameter `use_buffer` from documentation (#3610)\n* Update Python API doc to include all classes and members (#3619, #3682)\n* Fix typos and broken links in documentation (#3618, #3640, #3676, #3713, #3759, #3784, #3843, #3852)\n* Binary classification demo should produce LIBSVM with 0-based indexing (#3652)\n* Process data once for Python and CLI examples of learning to rank (#3666)\n* Include full text of Apache 2.0 license in the repository (#3698)\n* Save predictor parameters in model file (#3856)\n* JVM packages\n  - Let users specify feature names when calling `getModelDump` and `getFeatureScore` (#3733)\n  - Warn the user about the lack of over-the-wire encryption (#3667)\n  - Fix errors in examples (#3719)\n  - Document choice of trackers (#3831)\n  - Document that vanilla Apache Spark is required (#3854)\n* Python package\n  - Document that custom objective can't contain colon (:) (#3601)\n  - Show a better error message for failed library loading (#3690)\n  - Document that feature importance is unavailable for non-tree learners (#3765)\n  - Document behavior of `get_fscore()` for zero-importance features (#3763)\n  - Recommend pickling as the way to save `XGBClassifier` / `XGBRegressor` / `XGBRanker` (#3829)\n* R package\n  - Enlarge variable importance plot to make it more visible (#3820)\n\n### BREAKING CHANGES\n* External memory page files have changed, breaking backwards compatibility for temporary storage used during external memory training. This only affects external memory users upgrading their xgboost version - we recommend clearing all `*.page` files before resuming training. Model serialization is unaffected.\n\n### Known issues\n* Quantile sketcher fails to produce any quantile for some edge cases (#2943)\n* The `hist` algorithm leaks memory when used with learning rate decay callback (#3579)\n* Using custom evaluation function together with early stopping causes assertion failure in XGBoost4J-Spark (#3595)\n* Early stopping doesn't work with `gblinear` learner (#3789)\n* Label and weight vectors are not reshared upon the change in number of GPUs (#3794). To get around this issue, delete the `DMatrix` object and re-load.\n* The `DMatrix` Python objects are initialized with incorrect values when given array slices (#3841)\n* The `gpu_id` parameter is broken and not yet properly supported (#3850)\n\n### Acknowledgement\n**Contributors** (in no particular order): Hyunsu Cho (@hcho3), Jiaming Yuan (@trivialfis), Nan Zhu (@CodingCat), Rory Mitchell (@RAMitchell), Andy Adinets (@canonizer), Vadim Khotilovich (@khotilov), Sergei Lebedev (@superbobry)\n\n**First-time Contributors** (in no particular order): Matthew Tovbin (@tovbinm), Jakob Richter (@jakob-r), Grace Lam (@grace-lam), Grant W Schneider (@grantschneider), Andrew Thia (@BlueTea88), Sergei Chipiga (@schipiga), Joseph Bradley (@jkbradley), Chen Qin (@chenqin), Jerry Lin (@linjer), Dmitriy Rybalko (@rdtft), Michael Mui (@mmui), Takahiro Kojima (@515hikaru), Bruce Zhao (@BruceZhaoR), Wei Tian (@weitian), Saumya Bhatnagar (@Sam1301), Juzer Shakir (@JuzerShakir), Zhao Hang (@cleghom), Jonathan Friedman (@jontonsoup), Bruno Tremblay (@meztez), Boris Filippov (@frenzykryger), @Shiki-H, @mrgutkun, @gorogm, @htgeis, @jakehoare, @zengxy, @KOLANICH\n\n**First-time Reviewers** (in no particular order): Nikita Titov (@StrikerRUS), Xiangrui Meng (@mengxr), Nirmal Borah (@Nirmal-Neel)\n\n\n## v0.80 (2018.08.13)\n* **JVM packages received a major upgrade**: To consolidate the APIs and improve the user experience, we refactored the design of XGBoost4J-Spark in a significant manner. (#3387)\n  - Consolidated APIs: It is now much easier to integrate XGBoost models into a Spark ML pipeline. Users can control behaviors like output leaf prediction results by setting corresponding column names. Training is now more consistent with other Estimators in Spark MLLIB: there is now one single method `fit()` to train decision trees.\n  - Better user experience: we refactored the parameters relevant modules in XGBoost4J-Spark to provide both camel-case (Spark ML style) and underscore (XGBoost style) parameters\n  - A brand-new tutorial is [available](https://xgboost.readthedocs.io/en/release_0.80/jvm/xgboost4j_spark_tutorial.html) for XGBoost4J-Spark.\n  - Latest API documentation is now hosted at https://xgboost.readthedocs.io/.\n* XGBoost documentation now keeps track of multiple versions:\n  - Latest master: https://xgboost.readthedocs.io/en/latest\n  - 0.80 stable: https://xgboost.readthedocs.io/en/release_0.80\n  - 0.72 stable: https://xgboost.readthedocs.io/en/release_0.72\n* Support for per-group weights in ranking objective (#3379)\n* Fix inaccurate decimal parsing (#3546)\n* New functionality\n  - Query ID column support in LIBSVM data files (#2749). This is convenient for performing ranking task in distributed setting.\n  - Hinge loss for binary classification (`binary:hinge`) (#3477)\n  - Ability to specify delimiter and instance weight column for CSV files (#3546)\n  - Ability to use 1-based indexing instead of 0-based (#3546)\n* GPU support\n  - Quantile sketch, binning, and index compression are now performed on GPU, eliminating PCIe transfer for 'gpu_hist' algorithm (#3319, #3393)\n  - Upgrade to NCCL2 for multi-GPU training (#3404).\n  - Use shared memory atomics for faster training (#3384).\n  - Dynamically allocate GPU memory, to prevent large allocations for deep trees (#3519)\n  - Fix memory copy bug for large files (#3472)\n* Python package\n  - Importing data from Python datatable (#3272)\n  - Pre-built binary wheels available for 64-bit Linux and Windows (#3424, #3443)\n  - Add new importance measures 'total_gain', 'total_cover' (#3498)\n  - Sklearn API now supports saving and loading models (#3192)\n  - Arbitrary cross validation fold indices (#3353)\n  - `predict()` function in Sklearn API uses `best_ntree_limit` if available, to make early stopping easier to use (#3445)\n  - Informational messages are now directed to Python's `print()` rather than standard output (#3438). This way, messages appear inside Jupyter notebooks.\n* R package\n  - Oracle Solaris support, per CRAN policy (#3372)\n* JVM packages\n  - Single-instance prediction (#3464)\n  - Pre-built JARs are now available from Maven Central (#3401)\n  - Add NULL pointer check (#3021)\n  - Consider `spark.task.cpus` when controlling parallelism (#3530)\n  - Handle missing values in prediction (#3529)\n  - Eliminate outputs of `System.out` (#3572)\n* Refactored C++ DMatrix class for simplicity and de-duplication (#3301)\n* Refactored C++ histogram facilities (#3564)\n* Refactored constraints / regularization mechanism for split finding (#3335, #3429). Users may specify an elastic net (L2 + L1 regularization) on leaf weights as well as monotonic constraints on test nodes. The refactor will be useful for a future addition of feature interaction constraints.\n* Statically link `libstdc++` for MinGW32 (#3430)\n* Enable loading from `group`, `base_margin` and `weight` (see [here](http://xgboost.readthedocs.io/en/latest/tutorials/input_format.html#auxiliary-files-for-additional-information)) for Python, R, and JVM packages (#3431)\n* Fix model saving for `count:possion` so that `max_delta_step` doesn't get truncated (#3515)\n* Fix loading of sparse CSC matrix (#3553)\n* Fix incorrect handling of `base_score` parameter for Tweedie regression (#3295)\n\n## v0.72.1 (2018.07.08)\nThis version is only applicable for the Python package. The content is identical to that of v0.72.\n\n## v0.72 (2018.06.01)\n* Starting with this release, we plan to make a new release every two months. See #3252 for more details.\n* Fix a pathological behavior (near-zero second-order gradients) in multiclass objective (#3304)\n* Tree dumps now use high precision in storing floating-point values (#3298)\n* Submodules `rabit` and `dmlc-core` have been brought up to date, bringing bug fixes (#3330, #3221).\n* GPU support\n  - Continuous integration tests for GPU code (#3294, #3309)\n  - GPU accelerated coordinate descent algorithm (#3178)\n  - Abstract 1D vector class now works with multiple GPUs (#3287)\n  - Generate PTX code for most recent architecture (#3316)\n  - Fix a memory bug on NVIDIA K80 cards (#3293)\n  - Address performance instability for single-GPU, multi-core machines (#3324)\n* Python package\n  - FreeBSD support (#3247)\n  - Validation of feature names in `Booster.predict()` is now optional (#3323)\n* Updated Sklearn API\n  - Validation sets now support instance weights (#2354)\n  - `XGBClassifier.predict_proba()` should not support `output_margin` option. (#3343) See BREAKING CHANGES below.\n* R package:\n  - Better handling of NULL in `print.xgb.Booster()` (#3338)\n  - Comply with CRAN policy by removing compiler warning suppression (#3329)\n  - Updated CRAN submission\n* JVM packages\n  - JVM packages will now use the same versioning scheme as other packages (#3253)\n  - Update Spark to 2.3 (#3254)\n  - Add scripts to cross-build and deploy artifacts (#3276, #3307)\n  - Fix a compilation error for Scala 2.10 (#3332)\n* BREAKING CHANGES\n  - `XGBClassifier.predict_proba()` no longer accepts parameter `output_margin`. The parameter makes no sense for `predict_proba()` because the method is to predict class probabilities, not raw margin scores.\n\n## v0.71 (2018.04.11)\n* This is a minor release, mainly motivated by issues concerning `pip install`, e.g. #2426, #3189, #3118, and #3194.\n  With this release, users of Linux and MacOS will be able to run `pip install` for the most part.\n* Refactored linear booster class (`gblinear`), so as to support multiple coordinate descent updaters (#3103, #3134). See BREAKING CHANGES below.\n* Fix slow training for multiclass classification with high number of classes (#3109)\n* Fix a corner case in approximate quantile sketch (#3167). Applicable for 'hist' and 'gpu_hist' algorithms\n* Fix memory leak in DMatrix (#3182)\n* New functionality\n  - Better linear booster class (#3103, #3134)\n  - Pairwise SHAP interaction effects (#3043)\n  - Cox loss (#3043)\n  - AUC-PR metric for ranking task (#3172)\n  - Monotonic constraints for 'hist' algorithm (#3085)\n* GPU support\n    - Create an abstract 1D vector class that moves data seamlessly between the main and GPU memory (#2935, #3116, #3068). This eliminates unnecessary PCIe data transfer during training time.\n  - Fix minor bugs (#3051, #3217)\n  - Fix compatibility error for CUDA 9.1 (#3218)\n* Python package:\n  - Correctly handle parameter `verbose_eval=0` (#3115)\n* R package:\n  - Eliminate segmentation fault on 32-bit Windows platform (#2994)\n* JVM packages\n  - Fix a memory bug involving double-freeing Booster objects (#3005, #3011)\n  - Handle empty partition in predict (#3014)\n  - Update docs and unify terminology (#3024)\n  - Delete cache files after job finishes (#3022)\n  - Compatibility fixes for latest Spark versions (#3062, #3093)\n* BREAKING CHANGES: Updated linear modelling algorithms. In particular L1/L2 regularisation penalties are now normalised to number of training examples. This makes the implementation consistent with sklearn/glmnet. L2 regularisation has also been removed from the intercept. To produce linear models with the old regularisation behaviour, the alpha/lambda regularisation parameters can be manually scaled by dividing them by the number of training examples.\n\n## v0.7 (2017.12.30)\n* **This version represents a major change from the last release (v0.6), which was released one year and half ago.**\n* Updated Sklearn API\n  - Add compatibility layer for scikit-learn v0.18: `sklearn.cross_validation` now deprecated\n  - Updated to allow use of all XGBoost parameters via `**kwargs`.\n  - Updated `nthread` to `n_jobs` and `seed` to `random_state` (as per Sklearn convention); `nthread` and `seed` are now marked as deprecated\n  - Updated to allow choice of Booster (`gbtree`, `gblinear`, or `dart`)\n  - `XGBRegressor` now supports instance weights (specify `sample_weight` parameter)\n  - Pass `n_jobs` parameter to the `DMatrix` constructor\n  - Add `xgb_model` parameter to `fit` method, to allow continuation of training\n* Refactored gbm to allow more friendly cache strategy\n  - Specialized some prediction routine\n* Robust `DMatrix` construction from a sparse matrix\n* Faster construction of `DMatrix` from 2D NumPy matrices: elide copies, use of multiple threads\n* Automatically remove nan from input data when it is sparse.\n  - This can solve some of user reported problem of istart != hist.size\n* Fix the single-instance prediction function to obtain correct predictions\n* Minor fixes\n  - Thread local variable is upgraded so it is automatically freed at thread exit.\n  - Fix saving and loading `count::poisson` models\n  - Fix CalcDCG to use base-2 logarithm\n  - Messages are now written to stderr instead of stdout\n  - Keep built-in evaluations while using customized evaluation functions\n  - Use `bst_float` consistently to minimize type conversion\n  - Copy the base margin when slicing `DMatrix`\n  - Evaluation metrics are now saved to the model file\n  - Use `int32_t` explicitly when serializing version\n  - In distributed training, synchronize the number of features after loading a data matrix.\n* Migrate to C++11\n  - The current master version now requires C++11 enabled compiled(g++4.8 or higher)\n* Predictor interface was factored out (in a manner similar to the updater interface).\n* Makefile support for Solaris and ARM\n* Test code coverage using Codecov\n* Add CPP tests\n* Add `Dockerfile` and `Jenkinsfile` to support continuous integration for GPU code\n* New functionality\n  - Ability to adjust tree model's statistics to a new dataset without changing tree structures.\n  - Ability to extract feature contributions from individual predictions, as described in [here](http://blog.datadive.net/interpreting-random-forests/) and [here](https://arxiv.org/abs/1706.06060).\n  - Faster, histogram-based tree algorithm (`tree_method='hist'`) .\n  - GPU/CUDA accelerated tree algorithms (`tree_method='gpu_hist'` or `'gpu_exact'`), including the GPU-based predictor.\n  - Monotonic constraints: when other features are fixed, force the prediction to be monotonic increasing with respect to a certain specified feature.\n  - Faster gradient calculation using AVX SIMD\n  - Ability to export models in JSON format\n  - Support for Tweedie regression\n  - Additional dropout options for DART: binomial+1, epsilon\n  - Ability to update an existing model in-place: this is useful for many applications, such as determining feature importance\n* Python package:\n  - New parameters:\n    - `learning_rates` in `cv()`\n    - `shuffle` in `mknfold()`\n    - `max_features` and `show_values` in `plot_importance()`\n    - `sample_weight` in `XGBRegressor.fit()`\n  - Support binary wheel builds\n  - Fix `MultiIndex` detection to support Pandas 0.21.0 and higher\n  - Support metrics and evaluation sets whose names contain `-`\n  - Support feature maps when plotting trees\n  - Compatibility fix for Python 2.6\n  - Call `print_evaluation` callback at last iteration\n  - Use appropriate integer types when calling native code, to prevent truncation and memory error\n  - Fix shared library loading on Mac OS X\n* R package:\n  - New parameters:\n    - `silent` in `xgb.DMatrix()`\n    - `use_int_id` in `xgb.model.dt.tree()`\n    - `predcontrib` in `predict()`\n    - `monotone_constraints` in `xgb.train()`\n  - Default value of the `save_period` parameter in `xgboost()` changed to NULL (consistent with `xgb.train()`).\n  - It's possible to custom-build the R package with GPU acceleration support.\n  - Enable JVM build for Mac OS X and Windows\n  - Integration with AppVeyor CI\n  - Improved safety for garbage collection\n  - Store numeric attributes with higher precision\n  - Easier installation for devel version\n  - Improved `xgb.plot.tree()`\n  - Various minor fixes to improve user experience and robustness\n  - Register native code to pass CRAN check\n  - Updated CRAN submission\n* JVM packages\n  - Add Spark pipeline persistence API\n  - Fix data persistence: loss evaluation on test data had wrongly used caches for training data.\n  - Clean external cache after training\n  - Implement early stopping\n  - Enable training of multiple models by distinguishing stage IDs\n  - Better Spark integration: support RDD / dataframe / dataset, integrate with Spark ML package\n  - XGBoost4j now supports ranking task\n  - Support training with missing data\n  - Refactor JVM package to separate regression and classification models to be consistent with other machine learning libraries\n  - Support XGBoost4j compilation on Windows\n  - Parameter tuning tool\n  - Publish source code for XGBoost4j to maven local repo\n  - Scala implementation of the Rabit tracker (drop-in replacement for the Java implementation)\n  - Better exception handling for the Rabit tracker\n  - Persist `num_class`, number of classes (for classification task)\n  - `XGBoostModel` now holds `BoosterParams`\n  - libxgboost4j is now part of CMake build\n  - Release `DMatrix` when no longer needed, to conserve memory\n  - Expose `baseMargin`, to allow initialization of boosting with predictions from an external model\n  - Support instance weights\n  - Use `SparkParallelismTracker` to prevent jobs from hanging forever\n  - Expose train-time evaluation metrics via `XGBoostModel.summary`\n  - Option to specify `host-ip` explicitly in the Rabit tracker\n* Documentation\n  - Better math notation for gradient boosting\n  - Updated build instructions for Mac OS X\n  - Template for GitHub issues\n  - Add `CITATION` file for citing XGBoost in scientific writing\n  - Fix dropdown menu in xgboost.readthedocs.io\n  - Document `updater_seq` parameter\n  - Style fixes for Python documentation\n  - Links to additional examples and tutorials\n  - Clarify installation requirements\n* Changes that break backward compatibility\n  - [#1519](https://github.com/dmlc/xgboost/pull/1519) XGBoost-spark no longer contains APIs for DMatrix; use the public booster interface instead.\n  - [#2476](https://github.com/dmlc/xgboost/pull/2476) `XGBoostModel.predict()` now has a different signature\n\n\n## v0.6 (2016.07.29)\n* Version 0.5 is skipped due to major improvements in the core\n* Major refactor of core library.\n  - Goal: more flexible and modular code as a portable library.\n  - Switch to use of c++11 standard code.\n  - Random number generator defaults to ```std::mt19937```.\n  - Share the data loading pipeline and logging module from dmlc-core.\n  - Enable registry pattern to allow optionally plugin of objective, metric, tree constructor, data loader.\n    - Future plugin modules can be put into xgboost/plugin and register back to the library.\n  - Remove most of the raw pointers to smart ptrs, for RAII safety.\n* Add official option to approximate algorithm `tree_method` to parameter.\n  - Change default behavior to switch to prefer faster algorithm.\n  - User will get a message when approximate algorithm is chosen.\n* Change library name to libxgboost.so\n* Backward compatiblity\n  - The binary buffer file is not backward compatible with previous version.\n  - The model file is backward compatible on 64 bit platforms.\n* The model file is compatible between 64/32 bit platforms(not yet tested).\n* External memory version and other advanced features will be exposed to R library as well on linux.\n  - Previously some of the features are blocked due to C++11 and threading limits.\n  - The windows version is still blocked due to Rtools do not support ```std::thread```.\n* rabit and dmlc-core are maintained through git submodule\n  - Anyone can open PR to update these dependencies now.\n* Improvements\n  - Rabit and xgboost libs are not thread-safe and use thread local PRNGs\n  - This could fix some of the previous problem which runs xgboost on multiple threads.\n* JVM Package\n  - Enable xgboost4j for java and scala\n  - XGBoost distributed now runs on Flink and Spark.\n* Support model attributes listing for meta data.\n  - https://github.com/dmlc/xgboost/pull/1198\n  - https://github.com/dmlc/xgboost/pull/1166\n* Support callback API\n  - https://github.com/dmlc/xgboost/issues/892\n  - https://github.com/dmlc/xgboost/pull/1211\n  - https://github.com/dmlc/xgboost/pull/1264\n* Support new booster DART(dropout in tree boosting)\n  - https://github.com/dmlc/xgboost/pull/1220\n* Add CMake build system\n  - https://github.com/dmlc/xgboost/pull/1314\n\n## v0.47 (2016.01.14)\n\n* Changes in R library\n  - fixed possible problem of poisson regression.\n  - switched from 0 to NA for missing values.\n  - exposed access to additional model parameters.\n* Changes in Python library\n  - throws exception instead of crash terminal when a parameter error happens.\n  - has importance plot and tree plot functions.\n  - accepts different learning rates for each boosting round.\n  - allows model training continuation from previously saved model.\n  - allows early stopping in CV.\n  - allows feval to return a list of tuples.\n  - allows eval_metric to handle additional format.\n  - improved compatibility in sklearn module.\n  - additional parameters added for sklearn wrapper.\n  - added pip installation functionality.\n  - supports more Pandas DataFrame dtypes.\n  - added best_ntree_limit attribute, in addition to best_score and best_iteration.\n* Java api is ready for use\n* Added more test cases and continuous integration to make each build more robust.\n\n## v0.4 (2015.05.11)\n\n* Distributed version of xgboost that runs on YARN, scales to billions of examples\n* Direct save/load data and model from/to S3 and HDFS\n* Feature importance visualization in R module, by Michael Benesty\n* Predict leaf index\n* Poisson regression for counts data\n* Early stopping option in training\n* Native save load support in R and python\n  - xgboost models now can be saved using save/load in R\n  - xgboost python model is now pickable\n* sklearn wrapper is supported in python module\n* Experimental External memory version\n\n\n## v0.3 (2014.09.07)\n\n* Faster tree construction module\n  - Allows subsample columns during tree construction via ```bst:col_samplebytree=ratio```\n* Support for boosting from initial predictions\n* Experimental version of LambdaRank\n* Linear booster is now parallelized, using parallel coordinated descent.\n* Add [Code Guide](src/README.md) for customizing objective function and evaluation\n* Add R module\n\n\n## v0.2x (2014.05.20)\n\n* Python module\n* Weighted samples instances\n* Initial version of pairwise rank\n\n\n## v0.1 (2014.03.26)\n\n* Initial release\n"
  },
  {
    "path": "R-package/.Rbuildignore",
    "content": "\\.o$\n\\.so$\n\\.dll$\n^.*\\.Rproj$\n^\\.Rproj\\.user$\nREADME.md\n^doc$\n^Meta$\n^_pkgdown\\.yml$\n^docs$\n^pkgdown$\n"
  },
  {
    "path": "R-package/.gitignore",
    "content": "docs\n"
  },
  {
    "path": "R-package/CMakeLists.txt",
    "content": "find_package(LibR REQUIRED)\nmessage(STATUS \"LIBR_CORE_LIBRARY \" ${LIBR_CORE_LIBRARY})\n\nfile(\n  GLOB_RECURSE R_SOURCES\n  ${CMAKE_CURRENT_LIST_DIR}/src/*.cc\n  ${CMAKE_CURRENT_LIST_DIR}/src/*.c\n)\n\n# Use object library to expose symbols\nadd_library(xgboost-r OBJECT ${R_SOURCES})\n\nif(ENABLE_ALL_WARNINGS)\n  target_compile_options(xgboost-r PRIVATE -Wall -Wextra)\nendif()\n\nif(MSVC)\n  # https://github.com/lightgbm-org/LightGBM/pull/6061\n  # MSVC doesn't work with anonymous types in structs. (R complex)\n  #\n  # syntax error: missing ';' before identifier 'private_data_c'\n  #\n  target_compile_definitions(xgboost-r PRIVATE -DR_LEGACY_RCOMPLEX)\nendif()\n\ntarget_compile_definitions(\n  xgboost-r PUBLIC\n  -DXGBOOST_STRICT_R_MODE=1\n  -DDMLC_LOG_BEFORE_THROW=0\n  -DDMLC_DISABLE_STDIN=1\n  -DDMLC_LOG_CUSTOMIZE=1\n)\n\ntarget_include_directories(\n  xgboost-r PRIVATE\n  ${LIBR_INCLUDE_DIRS}\n  ${PROJECT_SOURCE_DIR}/include\n  ${PROJECT_SOURCE_DIR}/dmlc-core/include\n)\n\ntarget_link_libraries(xgboost-r PUBLIC ${LIBR_CORE_LIBRARY})\n\nif(USE_OPENMP)\n  find_package(OpenMP REQUIRED)\n  target_link_libraries(xgboost-r PUBLIC OpenMP::OpenMP_CXX OpenMP::OpenMP_C)\nendif()\n\nset_target_properties(\n  xgboost-r PROPERTIES\n  CXX_STANDARD 17\n  CXX_STANDARD_REQUIRED ON\n  POSITION_INDEPENDENT_CODE ON\n)\n\n# Get compilation and link flags of xgboost-r and propagate to objxgboost\ntarget_link_libraries(objxgboost PUBLIC xgboost-r)\n\n# Add all objects of xgboost-r to objxgboost\ntarget_sources(objxgboost INTERFACE $<TARGET_OBJECTS:xgboost-r>)\n\nset(LIBR_HOME \"${LIBR_HOME}\" PARENT_SCOPE)\nset(LIBR_EXECUTABLE \"${LIBR_EXECUTABLE}\" PARENT_SCOPE)\n"
  },
  {
    "path": "R-package/DESCRIPTION",
    "content": "Package: xgboost\nType: Package\nTitle: Extreme Gradient Boosting\nVersion: 3.3.0.0\nDate: 2026-02-10\nAuthors@R: c(\n  person(\"Tianqi\", \"Chen\", role = c(\"aut\"),\n         email = \"tianqi.tchen@gmail.com\"),\n  person(\"Tong\", \"He\", role = c(\"aut\"),\n         email = \"hetong007@gmail.com\"),\n  person(\"Michael\", \"Benesty\", role = c(\"aut\"),\n         email = \"michael@benesty.fr\"),\n  person(\"Vadim\", \"Khotilovich\", role = c(\"aut\"),\n         email = \"khotilovich@gmail.com\"),\n  person(\"Yuan\", \"Tang\", role = c(\"aut\"),\n         email = \"terrytangyuan@gmail.com\",\n         comment = c(ORCID = \"0000-0001-5243-233X\")),\n  person(\"Hyunsu\", \"Cho\", role = c(\"aut\"),\n         email = \"chohyu01@cs.washington.edu\"),\n  person(\"Kailong\", \"Chen\", role = c(\"aut\")),\n  person(\"Rory\", \"Mitchell\", role = c(\"aut\")),\n  person(\"Ignacio\", \"Cano\", role = c(\"aut\")),\n  person(\"Tianyi\", \"Zhou\", role = c(\"aut\")),\n  person(\"Mu\", \"Li\", role = c(\"aut\")),\n  person(\"Junyuan\", \"Xie\", role = c(\"aut\")),\n  person(\"Min\", \"Lin\", role = c(\"aut\")),\n  person(\"Yifeng\", \"Geng\", role = c(\"aut\")),\n  person(\"Yutian\", \"Li\", role = c(\"aut\")),\n  person(\"Jiaming\", \"Yuan\", role = c(\"aut\", \"cre\"),\n         email = \"jm.yuan@outlook.com\"),\n  person(\"David\", \"Cortes\", role = c(\"aut\")),\n  person(\"XGBoost contributors\", role = c(\"cph\"),\n         comment = \"base XGBoost implementation\")\n  )\nMaintainer: Jiaming Yuan <jm.yuan@outlook.com>\nDescription: Extreme Gradient Boosting, which is an efficient implementation\n    of the gradient boosting framework from Chen & Guestrin (2016) <doi:10.1145/2939672.2939785>.\n    This package is its R interface. The package includes efficient linear\n    model solver and tree learning algorithms. The package can automatically\n    do parallel computation on a single machine which could be more than 10\n    times faster than existing gradient boosting packages. It supports\n    various objective functions, including regression, classification and ranking.\n    The package is made to be extensible, so that users are also allowed to define\n    their own objectives easily.\nLicense: Apache License (== 2.0) | file LICENSE\nURL: https://github.com/dmlc/xgboost\nBugReports: https://github.com/dmlc/xgboost/issues\nNeedsCompilation: yes\nVignetteBuilder: knitr\nSuggests:\n    knitr,\n    rmarkdown,\n    ggplot2 (>= 1.0.1),\n    DiagrammeR (>= 0.9.0),\n    DiagrammeRsvg,\n    rsvg,\n    htmlwidgets,\n    Ckmeans.1d.dp (>= 3.3.1),\n    vcd (>= 1.3),\n    testthat,\n    igraph (>= 1.0.1),\n    float,\n    titanic,\n    RhpcBLASctl,\n    survival\nDepends:\n    R (>= 4.3.0)\nImports:\n    Matrix (>= 1.1-0),\n    methods,\n    data.table (>= 1.9.6),\n    jsonlite (>= 1.0)\nRoxygen: list(markdown = TRUE)\nRoxygenNote: 7.3.3\nEncoding: UTF-8\nSystemRequirements: GNU make, C++17\n"
  },
  {
    "path": "R-package/NAMESPACE",
    "content": "# Generated by roxygen2: do not edit by hand\n\nS3method(\"[\",xgb.Booster)\nS3method(\"[\",xgb.DMatrix)\nS3method(\"dimnames<-\",xgb.DMatrix)\nS3method(coef,xgb.Booster)\nS3method(dim,xgb.DMatrix)\nS3method(dimnames,xgb.DMatrix)\nS3method(getinfo,xgb.Booster)\nS3method(getinfo,xgb.DMatrix)\nS3method(length,xgb.Booster)\nS3method(predict,xgb.Booster)\nS3method(predict,xgboost)\nS3method(print,xgb.Booster)\nS3method(print,xgb.DMatrix)\nS3method(print,xgb.cv.synchronous)\nS3method(print,xgboost)\nS3method(setinfo,xgb.Booster)\nS3method(setinfo,xgb.DMatrix)\nS3method(variable.names,xgb.Booster)\nexport(\"xgb.attr<-\")\nexport(\"xgb.attributes<-\")\nexport(\"xgb.config<-\")\nexport(\"xgb.model.parameters<-\")\nexport(getinfo)\nexport(setinfo)\nexport(xgb.Callback)\nexport(xgb.DMatrix)\nexport(xgb.DMatrix.hasinfo)\nexport(xgb.DMatrix.save)\nexport(xgb.DataBatch)\nexport(xgb.DataIter)\nexport(xgb.ExtMemDMatrix)\nexport(xgb.QuantileDMatrix)\nexport(xgb.QuantileDMatrix.from_iterator)\nexport(xgb.attr)\nexport(xgb.attributes)\nexport(xgb.cb.cv.predict)\nexport(xgb.cb.early.stop)\nexport(xgb.cb.evaluation.log)\nexport(xgb.cb.gblinear.history)\nexport(xgb.cb.print.evaluation)\nexport(xgb.cb.reset.parameters)\nexport(xgb.cb.save.model)\nexport(xgb.config)\nexport(xgb.copy.Booster)\nexport(xgb.create.features)\nexport(xgb.cv)\nexport(xgb.dump)\nexport(xgb.gblinear.history)\nexport(xgb.get.DMatrix.data)\nexport(xgb.get.DMatrix.num.non.missing)\nexport(xgb.get.DMatrix.qcut)\nexport(xgb.get.config)\nexport(xgb.get.num.boosted.rounds)\nexport(xgb.ggplot.deepness)\nexport(xgb.ggplot.importance)\nexport(xgb.ggplot.shap.summary)\nexport(xgb.importance)\nexport(xgb.is.same.Booster)\nexport(xgb.load)\nexport(xgb.load.raw)\nexport(xgb.model.dt.tree)\nexport(xgb.params)\nexport(xgb.plot.deepness)\nexport(xgb.plot.importance)\nexport(xgb.plot.multi.trees)\nexport(xgb.plot.shap)\nexport(xgb.plot.shap.summary)\nexport(xgb.plot.tree)\nexport(xgb.save)\nexport(xgb.save.raw)\nexport(xgb.set.config)\nexport(xgb.slice.Booster)\nexport(xgb.slice.DMatrix)\nexport(xgb.train)\nexport(xgboost)\nimport(methods)\nimportClassesFrom(Matrix,CsparseMatrix)\nimportClassesFrom(Matrix,dgCMatrix)\nimportClassesFrom(Matrix,dgRMatrix)\nimportFrom(Matrix,sparse.model.matrix)\nimportFrom(data.table,\":=\")\nimportFrom(data.table,as.data.table)\nimportFrom(data.table,data.table)\nimportFrom(data.table,is.data.table)\nimportFrom(data.table,rbindlist)\nimportFrom(data.table,setkey)\nimportFrom(data.table,setkeyv)\nimportFrom(data.table,setnames)\nimportFrom(grDevices,rgb)\nimportFrom(graphics,barplot)\nimportFrom(graphics,grid)\nimportFrom(graphics,lines)\nimportFrom(graphics,par)\nimportFrom(graphics,points)\nimportFrom(graphics,title)\nimportFrom(jsonlite,fromJSON)\nimportFrom(jsonlite,toJSON)\nimportFrom(methods,new)\nimportFrom(stats,coef)\nimportFrom(stats,median)\nimportFrom(stats,predict)\nimportFrom(stats,sd)\nimportFrom(stats,variable.names)\nimportFrom(utils,hasName)\nimportFrom(utils,head)\nimportFrom(utils,object.size)\nimportFrom(utils,str)\nimportFrom(utils,tail)\nuseDynLib(xgboost, .registration = TRUE)\n"
  },
  {
    "path": "R-package/R/callbacks.R",
    "content": ".reserved_cb_names <- c(\"names\", \"class\", \"call\", \"params\", \"niter\", \"nfeatures\", \"folds\")\n\n#' XGBoost Callback Constructor\n#'\n#' Constructor for defining the structure of callback functions that can be executed\n#' at different stages of model training (before / after training, before / after each boosting\n#' iteration).\n#'\n#' @details\n#' Arguments that will be passed to the supplied functions are as follows:\n#' - env The same environment that is passed under argument `env`.\n#'\n#'   It may be modified by the functions in order to e.g. keep tracking of what happens\n#'   across iterations or similar.\n#'\n#'   This environment is only used by the functions supplied to the callback, and will\n#'   not be kept after the model fitting function terminates (see parameter `f_after_training`).\n#'\n#' - model The booster object when using [xgb.train()], or the folds when using [xgb.cv()].\n#'\n#'   For [xgb.cv()], folds are a list with a structure as follows:\n#'     - `dtrain`: The training data for the fold (as an `xgb.DMatrix` object).\n#'     - `bst`: Rhe `xgb.Booster` object for the fold.\n#'     - `evals`: A list containing two DMatrices, with names `train` and `test`\n#'       (`test` is the held-out data for the fold).\n#'     - `index`: The indices of the hold-out data for that fold (base-1 indexing),\n#'       from which the `test` entry in `evals` was obtained.\n#'\n#'   This object should **not** be in-place modified in ways that conflict with the\n#'   training (e.g. resetting the parameters for a training update in a way that resets\n#'   the number of rounds to zero in order to overwrite rounds).\n#'\n#'   Note that any R attributes that are assigned to the booster during the callback functions,\n#'   will not be kept thereafter as the booster object variable is not re-assigned during\n#'   training. It is however possible to set C-level attributes of the booster through\n#'   [xgb.attr()] or [xgb.attributes()], which should remain available for the rest\n#'   of the iterations and after the training is done.\n#'\n#'   For keeping variables across iterations, it's recommended to use `env` instead.\n#' - data The data to which the model is being fit, as an `xgb.DMatrix` object.\n#'\n#'   Note that, for [xgb.cv()], this will be the full data, while data for the specific\n#'   folds can be found in the `model` object.\n#' - evals The evaluation data, as passed under argument `evals` to [xgb.train()].\n#'\n#'   For [xgb.cv()], this will always be `NULL`.\n#' - begin_iteration Index of the first boosting iteration that will be executed (base-1 indexing).\n#'\n#'   This will typically be '1', but when using training continuation, depending on the\n#'   parameters for updates, boosting rounds will be continued from where the previous\n#'   model ended, in which case this will be larger than 1.\n#'\n#' - end_iteration Index of the last boostign iteration that will be executed\n#'   (base-1 indexing, inclusive of this end).\n#'\n#'   It should match with argument `nrounds` passed to [xgb.train()] or [xgb.cv()].\n#'\n#'   Note that boosting might be interrupted before reaching this last iteration, for\n#'   example by using the early stopping callback [xgb.cb.early.stop()].\n#' - iteration Index of the iteration number that is being executed (first iteration\n#'   will be the same as parameter `begin_iteration`, then next one will add +1, and so on).\n#'\n#' - iter_feval Evaluation metrics for `evals` that were supplied, either\n#'   determined by the objective, or by parameter `custom_metric`.\n#'\n#'   For [xgb.train()], this will be a named vector with one entry per element in\n#'   `evals`, where the names are determined as 'evals name' + '-' + 'metric name' - for\n#'   example, if `evals` contains an entry named \"tr\" and the metric is \"rmse\",\n#'   this will be a one-element vector with name \"tr-rmse\".\n#'\n#'   For [xgb.cv()], this will be a 2d matrix with dimensions `[length(evals), nfolds]`,\n#'   where the row names will follow the same naming logic as the one-dimensional vector\n#'   that is passed in [xgb.train()].\n#'\n#'   Note that, internally, the built-in callbacks such as [xgb.cb.print.evaluation] summarize\n#'   this table by calculating the row-wise means and standard deviations.\n#'\n#' - final_feval The evaluation results after the last boosting round is executed\n#'   (same format as `iter_feval`, and will be the exact same input as passed under\n#'   `iter_feval` to the last round that is executed during model fitting).\n#'\n#' - prev_cb_res Result from a previous run of a callback sharing the same name\n#'   (as given by parameter `cb_name`) when conducting training continuation, if there\n#'   was any in the booster R attributes.\n#'\n#'   Sometimes, one might want to append the new results to the previous one, and this will\n#'   be done automatically by the built-in callbacks such as [xgb.cb.evaluation.log],\n#'   which will append the new rows to the previous table.\n#'\n#'   If no such previous callback result is available (which it never will when fitting\n#'   a model from start instead of updating an existing model), this will be `NULL`.\n#'\n#'   For [xgb.cv()], which doesn't support training continuation, this will always be `NULL`.\n#'\n#' The following names (`cb_name` values) are reserved for internal callbacks:\n#' - print_evaluation\n#' - evaluation_log\n#' - reset_parameters\n#' - early_stop\n#' - save_model\n#' - cv_predict\n#' - gblinear_history\n#'\n#' The following names are reserved for other non-callback attributes:\n#' - names\n#' - class\n#' - call\n#' - params\n#' - niter\n#' - nfeatures\n#' - folds\n#'\n#' When using the built-in early stopping callback ([xgb.cb.early.stop]), said callback\n#' will always be executed before the others, as it sets some booster C-level attributes\n#' that other callbacks might also use. Otherwise, the order of execution will match with\n#' the order in which the callbacks are passed to the model fitting function.\n#'\n#' @param cb_name Name for the callback.\n#'\n#'   If the callback produces some non-NULL result (from executing the function passed under\n#'   `f_after_training`), that result will be added as an R attribute to the resulting booster\n#'   (or as a named element in the result of CV), with the attribute name specified here.\n#'\n#'   Names of callbacks must be unique - i.e. there cannot be two callbacks with the same name.\n#' @param env An environment object that will be passed to the different functions in the callback.\n#'   Note that this environment will not be shared with other callbacks.\n#' @param f_before_training A function that will be executed before the training has started.\n#'\n#'   If passing `NULL` for this or for the other function inputs, then no function will be executed.\n#'\n#'   If passing a function, it will be called with parameters supplied as non-named arguments\n#'   matching the function signatures that are shown in the default value for each function argument.\n#' @param f_before_iter A function that will be executed before each boosting round.\n#'\n#'   This function can signal whether the training should be finalized or not, by outputting\n#'   a value that evaluates to `TRUE` - i.e. if the output from the function provided here at\n#'   a given round is `TRUE`, then training will be stopped before the current iteration happens.\n#'\n#'   Return values of `NULL` will be interpreted as `FALSE`.\n#' @param f_after_iter A function that will be executed after each boosting round.\n#'\n#'   This function can signal whether the training should be finalized or not, by outputting\n#'   a value that evaluates to `TRUE` - i.e. if the output from the function provided here at\n#'   a given round is `TRUE`, then training will be stopped at that round.\n#'\n#'   Return values of `NULL` will be interpreted as `FALSE`.\n#' @param f_after_training A function that will be executed after training is finished.\n#'\n#'   This function can optionally output something non-NULL, which will become part of the R\n#'   attributes of the booster (assuming one passes `keep_extra_attributes=TRUE` to [xgb.train()])\n#'   under the name supplied for parameter `cb_name` imn the case of [xgb.train()]; or a part\n#'   of the named elements in the result of [xgb.cv()].\n#' @return An `xgb.Callback` object, which can be passed to [xgb.train()] or [xgb.cv()].\n#'\n#' @seealso Built-in callbacks:\n#' - [xgb.cb.print.evaluation]\n#' - [xgb.cb.evaluation.log]\n#' - [xgb.cb.reset.parameters]\n#' - [xgb.cb.early.stop]\n#' - [xgb.cb.save.model]\n#' - [xgb.cb.cv.predict]\n#' - [xgb.cb.gblinear.history]\n#\n#' @examples\n#' # Example constructing a custom callback that calculates\n#' # squared error on the training data (no separate test set),\n#' # and outputs the per-iteration results.\n#' ssq_callback <- xgb.Callback(\n#'   cb_name = \"ssq\",\n#'   f_before_training = function(env, model, data, evals,\n#'                                begin_iteration, end_iteration) {\n#'     # A vector to keep track of a number at each iteration\n#'     env$logs <- rep(NA_real_, end_iteration - begin_iteration + 1)\n#'   },\n#'   f_after_iter = function(env, model, data, evals, iteration, iter_feval) {\n#'     # This calculates the sum of squared errors on the training data.\n#'     # Note that this can be better done by passing an 'evals' entry,\n#'     # but this demonstrates a way in which callbacks can be structured.\n#'     pred <- predict(model, data)\n#'     err <- pred - getinfo(data, \"label\")\n#'     sq_err <- sum(err^2)\n#'     env$logs[iteration] <- sq_err\n#'     cat(\n#'       sprintf(\n#'         \"Squared error at iteration %d: %.2f\\n\",\n#'         iteration, sq_err\n#'       )\n#'     )\n#'\n#'     # A return value of 'TRUE' here would signal to finalize the training\n#'     return(FALSE)\n#'   },\n#'   f_after_training = function(env, model, data, evals, iteration,\n#'                               final_feval, prev_cb_res) {\n#'     return(env$logs)\n#'   }\n#' )\n#'\n#' data(mtcars)\n#'\n#' y <- mtcars$mpg\n#' x <- as.matrix(mtcars[, -1])\n#'\n#' dm <- xgb.DMatrix(x, label = y, nthread = 1)\n#' model <- xgb.train(\n#'   data = dm,\n#'   params = xgb.params(objective = \"reg:squarederror\", nthread = 1),\n#'   nrounds = 5,\n#'   callbacks = list(ssq_callback)\n#' )\n#'\n#' # Result from 'f_after_iter' will be available as an attribute\n#' attributes(model)$ssq\n#' @export\nxgb.Callback <- function(\n  cb_name = \"custom_callback\",\n  env = new.env(),\n  f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) NULL,\n  f_before_iter = function(env, model, data, evals, iteration) NULL,\n  f_after_iter = function(env, model, data, evals, iteration, iter_feval) NULL,\n  f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) NULL\n) {\n  stopifnot(is.null(f_before_training) || is.function(f_before_training))\n  stopifnot(is.null(f_before_iter) || is.function(f_before_iter))\n  stopifnot(is.null(f_after_iter) || is.function(f_after_iter))\n  stopifnot(is.null(f_after_training) || is.function(f_after_training))\n  stopifnot(is.character(cb_name) && length(cb_name) == 1)\n\n  if (cb_name %in% .reserved_cb_names) {\n    stop(\"Cannot use reserved callback name '\", cb_name, \"'.\")\n  }\n\n  out <- list(\n    cb_name = cb_name,\n    env = env,\n    f_before_training = f_before_training,\n    f_before_iter = f_before_iter,\n    f_after_iter = f_after_iter,\n    f_after_training = f_after_training\n  )\n  class(out) <- \"xgb.Callback\"\n  return(out)\n}\n\n.execute.cb.before.training <- function(\n  callbacks,\n  model,\n  data,\n  evals,\n  begin_iteration,\n  end_iteration\n) {\n  for (callback in callbacks) {\n    if (!is.null(callback$f_before_training)) {\n      callback$f_before_training(\n        callback$env,\n        model,\n        data,\n        evals,\n        begin_iteration,\n        end_iteration\n      )\n    }\n  }\n}\n\n.execute.cb.before.iter <- function(\n  callbacks,\n  model,\n  data,\n  evals,\n  iteration\n) {\n  if (!length(callbacks)) {\n    return(FALSE)\n  }\n  out <- sapply(callbacks, function(cb) {\n    if (is.null(cb$f_before_iter)) {\n      return(FALSE)\n    }\n    should_stop <- cb$f_before_iter(\n      cb$env,\n      model,\n      data,\n      evals,\n      iteration\n    )\n    if (!NROW(should_stop)) {\n      should_stop <- FALSE\n    } else if (NROW(should_stop) > 1) {\n      should_stop <- head(as.logical(should_stop), 1)\n    }\n    return(should_stop)\n  })\n  return(any(out))\n}\n\n.execute.cb.after.iter <- function(\n  callbacks,\n  model,\n  data,\n  evals,\n  iteration,\n  iter_feval\n) {\n  if (!length(callbacks)) {\n    return(FALSE)\n  }\n  out <- sapply(callbacks, function(cb) {\n    if (is.null(cb$f_after_iter)) {\n      return(FALSE)\n    }\n    should_stop <- cb$f_after_iter(\n      cb$env,\n      model,\n      data,\n      evals,\n      iteration,\n      iter_feval\n    )\n    if (!NROW(should_stop)) {\n      should_stop <- FALSE\n    } else if (NROW(should_stop) > 1) {\n      should_stop <- head(as.logical(should_stop), 1)\n    }\n    return(should_stop)\n  })\n  return(any(out))\n}\n\n.execute.cb.after.training <- function(\n  callbacks,\n  model,\n  data,\n  evals,\n  iteration,\n  final_feval,\n  prev_cb_res\n) {\n  if (!length(callbacks)) {\n    return(NULL)\n  }\n  old_cb_res <- attributes(model)\n  out <- lapply(callbacks, function(cb) {\n    if (is.null(cb$f_after_training)) {\n      return(NULL)\n    } else {\n      return(\n        cb$f_after_training(\n          cb$env,\n          model,\n          data,\n          evals,\n          iteration,\n          final_feval,\n          getElement(old_cb_res, cb$cb_name)\n        )\n      )\n    }\n  })\n  names(out) <- sapply(callbacks, function(cb) cb$cb_name)\n  if (NROW(out)) {\n    out <- out[!sapply(out, is.null)]\n  }\n  return(out)\n}\n\n.summarize.feval <- function(iter_feval, showsd) {\n  if (NCOL(iter_feval) > 1L && showsd) {\n    stdev <- apply(iter_feval, 1, sd)\n  } else {\n    stdev <- NULL\n  }\n  if (NCOL(iter_feval) > 1L) {\n    iter_feval <- rowMeans(iter_feval)\n  }\n  return(list(feval = iter_feval, stdev = stdev))\n}\n\n.print.evaluation <- function(iter_feval, showsd, iteration) {\n  tmp <- .summarize.feval(iter_feval, showsd)\n  msg <- .format_eval_string(iteration, tmp$feval, tmp$stdev)\n  cat(msg, '\\n')\n}\n\n# Format the evaluation metric string\n.format_eval_string <- function(iter, eval_res, eval_err = NULL) {\n  if (length(eval_res) == 0)\n    stop('no evaluation results')\n  enames <- names(eval_res)\n  if (is.null(enames))\n    stop('evaluation results must have names')\n  iter <- sprintf('[%d]\\t', iter)\n  if (!is.null(eval_err)) {\n    if (length(eval_res) != length(eval_err))\n      stop('eval_res & eval_err lengths mismatch')\n    # Note: UTF-8 code for plus/minus sign is U+00B1\n    res <- paste0(sprintf(\"%s:%f\\U00B1%f\", enames, eval_res, eval_err), collapse = '\\t')\n  } else {\n    res <- paste0(sprintf(\"%s:%f\", enames, eval_res), collapse = '\\t')\n  }\n  return(paste0(iter, res))\n}\n\n#' Callback for printing the result of evaluation\n#'\n#' @description\n#' The callback function prints the result of evaluation at every `period` iterations.\n#' The initial and the last iteration's evaluations are always printed.\n#'\n#' Does not leave any attribute in the booster (see [xgb.cb.evaluation.log] for that).\n#'\n#' @param period Results would be printed every number of periods.\n#' @param showsd Whether standard deviations should be printed (when available).\n#' @return An `xgb.Callback` object, which can be passed to [xgb.train()] or [xgb.cv()].\n#' @seealso [xgb.Callback]\n#' @export\nxgb.cb.print.evaluation <- function(period = 1, showsd = TRUE) {\n  if (length(period) != 1 || period != floor(period) || period < 1) {\n    stop(\"'period' must be a positive integer.\")\n  }\n\n  xgb.Callback(\n    cb_name = \"print_evaluation\",\n    env = as.environment(list(period = period, showsd = showsd, is_first_call = TRUE)),\n    f_before_training = NULL,\n    f_before_iter = NULL,\n    f_after_iter = function(env, model, data, evals, iteration, iter_feval) {\n      if (is.null(iter_feval)) {\n        return(FALSE)\n      }\n      if (env$is_first_call || (iteration - 1) %% env$period == 0) {\n        .print.evaluation(iter_feval, env$showsd, iteration)\n        env$last_printed_iter <- iteration\n      }\n      env$is_first_call <- FALSE\n      return(FALSE)\n    },\n    f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {\n      if (is.null(final_feval)) {\n        return(NULL)\n      }\n      if (is.null(env$last_printed_iter) || iteration > env$last_printed_iter) {\n        .print.evaluation(final_feval, env$showsd, iteration)\n      }\n    }\n  )\n}\n\n#' Callback for logging the evaluation history\n#'\n#' @details This callback creates a table with per-iteration evaluation metrics (see parameters\n#' `evals` and `custom_metric` in [xgb.train()]).\n#'\n#' Note: in the column names of the final data.table, the dash '-' character is replaced with\n#' the underscore '_' in order to make the column names more like regular R identifiers.\n#'\n#' @return An `xgb.Callback` object, which can be passed to [xgb.train()] or [xgb.cv()].\n#' @seealso [xgb.cb.print.evaluation]\n#' @export\nxgb.cb.evaluation.log <- function() {\n  xgb.Callback(\n    cb_name = \"evaluation_log\",\n    f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {\n      env$evaluation_log <- vector(\"list\", end_iteration - begin_iteration + 1)\n      env$next_log <- 1\n    },\n    f_before_iter = NULL,\n    f_after_iter = function(env, model, data, evals, iteration, iter_feval) {\n      tmp <- .summarize.feval(iter_feval, TRUE)\n      env$evaluation_log[[env$next_log]] <- list(iter = iteration, metrics = tmp$feval, sds = tmp$stdev)\n      env$next_log <- env$next_log + 1\n      return(FALSE)\n    },\n    f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {\n      if (!NROW(env$evaluation_log)) {\n        return(prev_cb_res)\n      }\n      # in case of early stopping\n      if (env$next_log <= length(env$evaluation_log)) {\n        env$evaluation_log <- head(env$evaluation_log, env$next_log - 1)\n      }\n\n      iters <- data.frame(iter = sapply(env$evaluation_log, function(x) x$iter))\n      metrics <- do.call(rbind, lapply(env$evaluation_log, function(x) x$metrics))\n      mnames <- gsub(\"-\", \"_\", names(env$evaluation_log[[1]]$metrics), fixed = TRUE)\n      colnames(metrics) <- mnames\n      has_sds <- !is.null(env$evaluation_log[[1]]$sds)\n      if (has_sds) {\n        sds <- do.call(rbind, lapply(env$evaluation_log, function(x) x$sds))\n        colnames(sds) <- mnames\n        metrics <- lapply(\n          mnames,\n          function(metric) {\n            out <- cbind(metrics[, metric], sds[, metric])\n            colnames(out) <- paste0(metric, c(\"_mean\", \"_std\"))\n            return(out)\n          }\n        )\n        metrics <- do.call(cbind, metrics)\n      }\n      evaluation_log <- cbind(iters, metrics)\n\n      if (!is.null(prev_cb_res)) {\n        if (!is.data.table(prev_cb_res)) {\n          prev_cb_res <- data.table::as.data.table(prev_cb_res)\n        }\n        prev_take <- prev_cb_res[prev_cb_res$iter < min(evaluation_log$iter)]\n        if (nrow(prev_take)) {\n          evaluation_log <- rbind(prev_cb_res, evaluation_log)\n        }\n      }\n      evaluation_log <- data.table::as.data.table(evaluation_log)\n      return(evaluation_log)\n    }\n  )\n}\n\n#' Callback for resetting booster parameters at each iteration\n#'\n#' @details\n#' Note that when training is resumed from some previous model, and a function is used to\n#' reset a parameter value, the `nrounds` argument in this function would be the\n#' the number of boosting rounds in the current training.\n#'\n#' Does not leave any attribute in the booster.\n#'\n#' @param new_params List of parameters needed to be reset.\n#'   Each element's value must be either a vector of values of length `nrounds`\n#'   to be set at each iteration,\n#'   or a function of two parameters `learning_rates(iteration, nrounds)`\n#'   which returns a new parameter value by using the current iteration number\n#'   and the total number of boosting rounds.\n#' @return An `xgb.Callback` object, which can be passed to [xgb.train()] or [xgb.cv()].\n#' @export\nxgb.cb.reset.parameters <- function(new_params) {\n  stopifnot(is.list(new_params))\n  pnames <- gsub(\".\", \"_\", names(new_params), fixed = TRUE)\n  not_allowed <- pnames %in%\n    c('num_class', 'num_output_group', 'size_leaf_vector', 'updater_seq')\n  if (any(not_allowed))\n    stop('Parameters ', paste(pnames[not_allowed]), \" cannot be changed during boosting.\")\n\n  xgb.Callback(\n    cb_name = \"reset_parameters\",\n    env = as.environment(list(new_params = new_params)),\n    f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {\n      env$end_iteration <- end_iteration\n\n      pnames <- gsub(\".\", \"_\", names(env$new_params), fixed = TRUE)\n      for (n in pnames) {\n        p <- env$new_params[[n]]\n        if (is.function(p)) {\n          if (length(formals(p)) != 2)\n            stop(\"Parameter '\", n, \"' is a function but not of two arguments\")\n        } else if (is.numeric(p) || is.character(p)) {\n          if (length(p) != env$end_iteration)\n            stop(\"Length of '\", n, \"' has to be equal to 'nrounds'\")\n        } else {\n          stop(\"Parameter '\", n, \"' is not a function or a vector\")\n        }\n      }\n    },\n    f_before_iter = function(env, model, data, evals, iteration) {\n      params <- lapply(env$new_params, function(p) {\n        if (is.function(p)) {\n          return(p(iteration, env$end_iteration))\n        } else {\n          return(p[iteration])\n        }\n      })\n\n      if (inherits(model, \"xgb.Booster\")) {\n        xgb.model.parameters(model) <- params\n      } else {\n        for (fd in model) {\n          xgb.model.parameters(fd$bst) <- params\n        }\n      }\n      return(FALSE)\n    },\n    f_after_iter = NULL,\n    f_after_training = NULL\n  )\n}\n\n#' Callback to activate early stopping\n#'\n#' @description\n#' This callback function determines the condition for early stopping.\n#'\n#' The following attributes are assigned to the booster's object:\n#' - `best_score` the evaluation score at the best iteration\n#' - `best_iteration` at which boosting iteration the best score has occurred\n#' (0-based index for interoperability of binary models)\n#'\n#' The same values are also stored as R attributes as a result of the callback, plus an additional\n#' attribute `stopped_by_max_rounds` which indicates whether an early stopping by the `stopping_rounds`\n#' condition occurred. Note that the `best_iteration` that is stored under R attributes will follow\n#' base-1 indexing, so it will be larger by '1' than the C-level 'best_iteration' that is accessed\n#' through [xgb.attr()] or  [xgb.attributes()].\n#'\n#' At least one dataset is required in `evals` for early stopping to work.\n#'\n#' @param stopping_rounds The number of rounds with no improvement in\n#'   the evaluation metric in order to stop the training.\n#' @param maximize Whether to maximize the evaluation metric.\n#' @param metric_name The name of an evaluation column to use as a criteria for early\n#'   stopping. If not set, the last column would be used.\n#'   Let's say the test data in `evals` was labelled as `dtest`,\n#'   and one wants to use the AUC in test data for early stopping regardless of where\n#'   it is in the `evals`, then one of the following would need to be set:\n#'   `metric_name = 'dtest-auc'` or `metric_name = 'dtest_auc'`.\n#'   All dash '-' characters in metric names are considered equivalent to '_'.\n#' @param verbose Whether to print the early stopping information.\n#'\n#' @param save_best Whether training should return the best model or the last model. If\n#'   set to `TRUE`, it will only keep the boosting rounds up to the detected best\n#'   iteration, discarding the ones that come after. This parameter is not supported by\n#'   the `xgb.cv` function and the `gblinear` booster yet.\n#' @return An `xgb.Callback` object, which can be passed to [xgb.train()] or [xgb.cv()].\n#' @export\nxgb.cb.early.stop <- function(\n  stopping_rounds,\n  maximize = FALSE,\n  metric_name = NULL,\n  verbose = TRUE,\n  save_best = FALSE\n) {\n  if (!is.null(metric_name)) {\n    stopifnot(is.character(metric_name))\n    stopifnot(length(metric_name) == 1L)\n  }\n\n  xgb.Callback(\n    cb_name = \"early_stop\",\n    env = as.environment(\n      list(\n        checked_evnames = FALSE,\n        stopping_rounds = stopping_rounds,\n        maximize = maximize,\n        metric_name = metric_name,\n        verbose = verbose,\n        save_best = save_best,\n        stopped_by_max_rounds = FALSE\n      )\n    ),\n    f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {\n      if (inherits(model, \"xgb.Booster\") && !length(evals)) {\n        stop(\"For early stopping, 'evals' must have at least one element\")\n      }\n      if (!inherits(model, \"xgb.Booster\") && save_best) {\n        stop(\"'save_best' must be set to FALSE when using early stopping in 'xgb.cv'.\")\n      }\n      env$begin_iteration <- begin_iteration\n      return(NULL)\n    },\n    f_before_iter = function(env, model, data, evals, iteration) NULL,\n    f_after_iter = function(env, model, data, evals, iteration, iter_feval) {\n      sds <- NULL\n      if (NCOL(iter_feval) > 1) {\n        tmp <- .summarize.feval(iter_feval, TRUE)\n        iter_feval <- tmp$feval\n        sds <- tmp$stdev\n      }\n\n      if (!env$checked_evnames) {\n\n        eval_names <- gsub('-', '_', names(iter_feval), fixed = TRUE)\n        if (!is.null(env$metric_name)) {\n          env$metric_idx <- which(gsub('-', '_', env$metric_name, fixed = TRUE) == eval_names)\n          if (length(env$metric_idx) == 0)\n            stop(\"'metric_name' for early stopping is not one of the following:\\n\",\n                 paste(eval_names, collapse = ' '), '\\n')\n        }\n\n        if (is.null(env$metric_name)) {\n          if (NROW(iter_feval) == 1) {\n            env$metric_idx <- 1L\n          } else {\n            env$metric_idx <- length(eval_names)\n            if (env$verbose)\n              cat('Multiple eval metrics are present. Will use ',\n                  eval_names[env$metric_idx], ' for early stopping.\\n', sep = '')\n          }\n        }\n\n        env$metric_name <- eval_names[env$metric_idx]\n\n        # maximize is usually NULL when not set in xgb.train and built-in metrics\n        if (is.null(env$maximize))\n          env$maximize <- grepl('(_auc|_aupr|_map|_ndcg|_pre)', env$metric_name)\n\n        if (env$verbose)\n          cat(\"Will train until \", env$metric_name, \" hasn't improved in \",\n              env$stopping_rounds, \" rounds.\\n\\n\", sep = '')\n\n        env$best_iteration <- env$begin_iteration\n        if (env$maximize) {\n          env$best_score <- -Inf\n        } else {\n          env$best_score <- Inf\n        }\n\n        if (inherits(model, \"xgb.Booster\")) {\n          best_score <- xgb.attr(model, 'best_score')\n          if (NROW(best_score)) env$best_score <- as.numeric(best_score)\n          best_iteration <- xgb.attr(model, 'best_iteration')\n          if (NROW(best_iteration)) env$best_iteration <- as.numeric(best_iteration) + 1\n        }\n\n        env$checked_evnames <- TRUE\n      }\n\n      score <- iter_feval[env$metric_idx]\n      if ((env$maximize && score > env$best_score) ||\n          (!env$maximize && score < env$best_score)) {\n\n        env$best_score <- score\n        env$best_iteration <- iteration\n        # save the property to attributes, so they will occur in checkpoint\n        if (inherits(model, \"xgb.Booster\")) {\n          xgb.attributes(model) <- list(\n            best_iteration = env$best_iteration - 1, # convert to 0-based index\n            best_score = env$best_score\n          )\n        }\n      } else if (iteration - env$best_iteration >= env$stopping_rounds) {\n        if (env$verbose) {\n          best_msg <- .format_eval_string(iteration, iter_feval, sds)\n          cat(\"Stopping. Best iteration:\\n\", best_msg, \"\\n\\n\", sep = '')\n        }\n        env$stopped_by_max_rounds <- TRUE\n        return(TRUE)\n      }\n      return(FALSE)\n    },\n    f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {\n      if (inherits(model, \"xgb.Booster\") && env$save_best && env$best_iteration < iteration) {\n        # Note: it loses the attributes after being sliced,\n        # so they have to be re-assigned afterwards.\n        prev_attr <- xgb.attributes(model)\n        if (NROW(prev_attr)) {\n          suppressWarnings({\n            prev_attr <- within(prev_attr, rm(\"best_score\", \"best_iteration\"))\n          })\n        }\n        .Call(XGBoosterSliceAndReplace_R, xgb.get.handle(model), 0L, env$best_iteration, 1L)\n        if (NROW(prev_attr)) {\n          xgb.attributes(model) <- prev_attr\n        }\n      }\n      attrs_set <- list(best_iteration = env$best_iteration - 1, best_score = env$best_score)\n      if (inherits(model, \"xgb.Booster\")) {\n        xgb.attributes(model) <- attrs_set\n      } else {\n        for (fd in model) {\n          xgb.attributes(fd$bst) <- attrs_set # to use in the cv.predict callback\n        }\n      }\n      return(\n        list(\n          best_iteration = env$best_iteration,\n          best_score = env$best_score,\n          stopped_by_max_rounds = env$stopped_by_max_rounds\n        )\n      )\n    }\n  )\n}\n\n.save.model.w.formatted.name <- function(model, save_name, iteration) {\n  # Note: this throws a warning if the name doesn't have anything to format through 'sprintf'\n  suppressWarnings({\n    save_name <- sprintf(save_name, iteration)\n  })\n  xgb.save(model, save_name)\n}\n\n#' Callback for saving a model file\n#'\n#' @description\n#' This callback function allows to save an xgb-model file, either periodically\n#' after each `save_period`'s or at the end.\n#'\n#' Does not leave any attribute in the booster.\n#'\n#' @param save_period Save the model to disk after every `save_period` iterations;\n#'   0 means save the model at the end.\n#' @param save_name The name or path for the saved model file.\n#'   It can contain a [sprintf()] formatting specifier to include the integer\n#'   iteration number in the file name. E.g., with `save_name = 'xgboost_%04d.model'`,\n#'   the file saved at iteration 50 would be named \"xgboost_0050.model\".\n#' @return An `xgb.Callback` object, which can be passed to [xgb.train()],\n#'   but **not** to [xgb.cv()].\n#' @export\nxgb.cb.save.model <- function(save_period = 0, save_name = \"xgboost.ubj\") {\n  if (save_period < 0) {\n    stop(\"'save_period' cannot be negative\")\n  }\n  if (!is.character(save_name) || length(save_name) != 1L) {\n    stop(\"'save_name' must be a single character refering to file name.\")\n  }\n\n  xgb.Callback(\n    cb_name = \"save_model\",\n    env = as.environment(list(save_period = save_period, save_name = save_name, last_save = 0)),\n    f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {\n      env$begin_iteration <- begin_iteration\n    },\n    f_before_iter = NULL,\n    f_after_iter = function(env, model, data, evals, iteration, iter_feval) {\n      if (env$save_period > 0 && (iteration - env$begin_iteration) %% env$save_period == 0) {\n        .save.model.w.formatted.name(model, env$save_name, iteration)\n        env$last_save <- iteration\n      }\n      return(FALSE)\n    },\n    f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {\n      if (env$save_period == 0 && iteration > env$last_save) {\n        .save.model.w.formatted.name(model, env$save_name, iteration)\n      }\n    }\n  )\n}\n\n#' Callback for returning cross-validation based predictions\n#'\n#' This callback function saves predictions for all of the test folds,\n#' and also allows to save the folds' models.\n#'\n#' @details\n#' Predictions are saved inside of the `pred` element, which is either a vector or a matrix,\n#' depending on the number of prediction outputs per data row. The order of predictions corresponds\n#' to the order of rows in the original dataset. Note that when a custom `folds` list is\n#' provided in [xgb.cv()], the predictions would only be returned properly when this list is a\n#' non-overlapping list of k sets of indices, as in a standard k-fold CV. The predictions would not be\n#' meaningful when user-provided folds have overlapping indices as in, e.g., random sampling splits.\n#' When some of the indices in the training dataset are not included into user-provided `folds`,\n#' their prediction value would be `NA`.\n#'\n#' @param save_models A flag for whether to save the folds' models.\n#' @param outputmargin Whether to save margin predictions (same effect as passing this\n#'   parameter to [predict.xgb.Booster]).\n#' @return An `xgb.Callback` object, which can be passed to [xgb.cv()],\n#'   but **not** to [xgb.train()].\n#' @export\nxgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) {\n  xgb.Callback(\n    cb_name = \"cv_predict\",\n    env = as.environment(list(save_models = save_models, outputmargin = outputmargin)),\n    f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {\n      if (inherits(model, \"xgb.Booster\")) {\n        stop(\"'cv.predict' callback is only for 'xgb.cv'.\")\n      }\n    },\n    f_before_iter = NULL,\n    f_after_iter = NULL,\n    f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {\n      pred <- NULL\n      for (fd in model) {\n        pr <- predict(\n          fd$bst,\n          fd$evals[[2L]],\n          outputmargin = env$outputmargin\n        )\n        if (is.null(pred)) {\n          if (NCOL(pr) > 1L) {\n            pred <- matrix(NA_real_, nrow(data), ncol(pr))\n          } else {\n            pred <- matrix(NA_real_, nrow(data))\n          }\n        }\n        if (is.matrix(pred)) {\n          pred[fd$index, ] <- pr\n        } else {\n          pred[fd$index] <- pr\n        }\n      }\n      out <- list(pred = pred)\n      if (env$save_models) {\n        out$models <- lapply(model, function(fd) fd$bst)\n      }\n      return(out)\n    }\n  )\n}\n\n.list2mat <- function(coef_list, sparse) {\n  if (sparse) {\n    coef_mat <- methods::new(\"dgRMatrix\")\n    coef_mat@p <- as.integer(c(0, cumsum(sapply(coef_list, function(x) length(x@x)))))\n    coef_mat@j <- as.integer(unlist(lapply(coef_list, slot, \"i\")) - 1L)\n    coef_mat@x <- unlist(lapply(coef_list, slot, \"x\"))\n    coef_mat@Dim <- as.integer(c(length(coef_list), length(coef_list[[1L]])))\n    # Note: function 'xgb.gblinear.history' might later on try to slice by columns\n    coef_mat <- methods::as(coef_mat, \"CsparseMatrix\")\n    return(coef_mat)\n  } else {\n    return(unname(do.call(rbind, coef_list)))\n  }\n}\n\n.extract.coef <- function(model, sparse) {\n  coefs <- .internal.coef.xgb.Booster(model, add_names = FALSE)\n  if (NCOL(coefs) > 1L) {\n    coefs <- as.vector(coefs)\n  }\n  if (sparse) {\n    coefs <- methods::as(coefs, \"sparseVector\")\n  }\n  return(coefs)\n}\n\n#' Callback for collecting coefficients history of a gblinear booster\n#'\n#' @details\n#' To keep things fast and simple, gblinear booster does not internally store the history of linear\n#' model coefficients at each boosting iteration. This callback provides a workaround for storing\n#' the coefficients' path, by extracting them after each training iteration.\n#'\n#' This callback will construct a matrix where rows are boosting iterations and columns are\n#' feature coefficients (same order as when calling [coef.xgb.Booster], with the intercept\n#' corresponding to the first column).\n#'\n#' When there is more than one coefficient per feature (e.g. multi-class classification),\n#' the result will be reshaped into a vector where coefficients are arranged first by features and\n#' then by class (e.g. first 1 through N coefficients will be for the first class, then\n#' coefficients N+1 through 2N for the second class, and so on).\n#'\n#' If the result has only one coefficient per feature in the data, then the resulting matrix\n#' will have column names matching with the feature names, otherwise (when there's more than\n#' one coefficient per feature) the names will be composed as 'column name' + ':' + 'class index'\n#' (so e.g. column 'c1' for class '0' will be named 'c1:0').\n#'\n#' With [xgb.train()], the output is either a dense or a sparse matrix.\n#' With with [xgb.cv()], it is a list (one element per each fold) of such matrices.\n#'\n#' Function [xgb.gblinear.history] provides an easy way to retrieve the\n#' outputs from this callback.\n#'\n#' @param sparse When set to `FALSE`/`TRUE`, a dense/sparse matrix is used to store the result.\n#'   Sparse format is useful when one expects only a subset of coefficients to be non-zero,\n#'   when using the \"thrifty\" feature selector with fairly small number of top features\n#'   selected per iteration.\n#' @return An `xgb.Callback` object, which can be passed to [xgb.train()] or [xgb.cv()].\n#' @seealso [xgb.gblinear.history], [coef.xgb.Booster].\n#' @examples\n#' #### Binary classification:\n#'\n#' ## Keep the number of threads to 1 for examples\n#' nthread <- 1\n#' data.table::setDTthreads(nthread)\n#'\n#' # In the iris dataset, it is hard to linearly separate Versicolor class from the rest\n#' # without considering the 2nd order interactions:\n#' x <- model.matrix(Species ~ .^2, iris)[, -1]\n#' colnames(x)\n#' dtrain <- xgb.DMatrix(\n#'   scale(x),\n#'   label = 1 * (iris$Species == \"versicolor\"),\n#'   nthread = nthread\n#' )\n#' param <- xgb.params(\n#'   booster = \"gblinear\",\n#'   objective = \"reg:logistic\",\n#'   eval_metric = \"auc\",\n#'   reg_lambda = 0.0003,\n#'   reg_alpha = 0.0003,\n#'   nthread = nthread\n#' )\n#'\n#' # For 'shotgun', which is a default linear updater, using high learning_rate values may result in\n#' # unstable behaviour in some datasets. With this simple dataset, however, the high learning\n#' # rate does not break the convergence, but allows us to illustrate the typical pattern of\n#' # \"stochastic explosion\" behaviour of this lock-free algorithm at early boosting iterations.\n#' bst <- xgb.train(\n#'   c(param, list(learning_rate = 1.)),\n#'   dtrain,\n#'   evals = list(tr = dtrain),\n#'   nrounds = 200,\n#'   callbacks = list(xgb.cb.gblinear.history())\n#' )\n#'\n#' # Extract the coefficients' path and plot them vs boosting iteration number:\n#' coef_path <- xgb.gblinear.history(bst)\n#' matplot(coef_path, type = \"l\")\n#'\n#' # With the deterministic coordinate descent updater, it is safer to use higher learning rates.\n#' # Will try the classical componentwise boosting which selects a single best feature per round:\n#' bst <- xgb.train(\n#'   c(\n#'     param,\n#'     xgb.params(\n#'       learning_rate = 0.8,\n#'       updater = \"coord_descent\",\n#'       feature_selector = \"thrifty\",\n#'       top_k = 1\n#'     )\n#'   ),\n#'   dtrain,\n#'   evals = list(tr = dtrain),\n#'   nrounds = 200,\n#'   callbacks = list(xgb.cb.gblinear.history())\n#' )\n#' matplot(xgb.gblinear.history(bst), type = \"l\")\n#' #  Componentwise boosting is known to have similar effect to Lasso regularization.\n#' # Try experimenting with various values of top_k, learning_rate, nrounds,\n#' # as well as different feature_selectors.\n#'\n#' # For xgb.cv:\n#' bst <- xgb.cv(\n#'   c(\n#'     param,\n#'     xgb.params(\n#'       learning_rate = 0.8,\n#'       updater = \"coord_descent\",\n#'       feature_selector = \"thrifty\",\n#'       top_k = 1\n#'     )\n#'   ),\n#'   dtrain,\n#'   nfold = 5,\n#'   nrounds = 100,\n#'   callbacks = list(xgb.cb.gblinear.history())\n#' )\n#' # coefficients in the CV fold #3\n#' matplot(xgb.gblinear.history(bst)[[3]], type = \"l\")\n#'\n#'\n#' #### Multiclass classification:\n#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = nthread)\n#'\n#' param <- xgb.params(\n#'   booster = \"gblinear\",\n#'   objective = \"multi:softprob\",\n#'   num_class = 3,\n#'   reg_lambda = 0.0003,\n#'   reg_alpha = 0.0003,\n#'   nthread = nthread\n#' )\n#'\n#' # For the default linear updater 'shotgun' it sometimes is helpful\n#' # to use smaller learning_rate to reduce instability\n#' bst <- xgb.train(\n#'   c(param, list(learning_rate = 0.5)),\n#'   dtrain,\n#'   evals = list(tr = dtrain),\n#'   nrounds = 50,\n#'   callbacks = list(xgb.cb.gblinear.history())\n#' )\n#'\n#' # Will plot the coefficient paths separately for each class:\n#' matplot(xgb.gblinear.history(bst, class_index = 0), type = \"l\")\n#' matplot(xgb.gblinear.history(bst, class_index = 1), type = \"l\")\n#' matplot(xgb.gblinear.history(bst, class_index = 2), type = \"l\")\n#'\n#' # CV:\n#' bst <- xgb.cv(\n#'   c(param, list(learning_rate = 0.5)),\n#'   dtrain,\n#'   nfold = 5,\n#'   nrounds = 70,\n#'   callbacks = list(xgb.cb.gblinear.history(FALSE))\n#' )\n#' # 1st fold of 1st class\n#' matplot(xgb.gblinear.history(bst, class_index = 0)[[1]], type = \"l\")\n#'\n#' @export\nxgb.cb.gblinear.history <- function(sparse = FALSE) {\n  xgb.Callback(\n    cb_name = \"gblinear_history\",\n    env = as.environment(list(sparse = sparse)),\n    f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {\n      if (!inherits(model, \"xgb.Booster\")) {\n        model <- model[[1L]]$bst\n      }\n      if (xgb.booster_type(model) != \"gblinear\") {\n        stop(\"Callback 'xgb.cb.gblinear.history' is only for booster='gblinear'.\")\n      }\n      env$coef_hist <- vector(\"list\", end_iteration - begin_iteration + 1)\n      env$next_idx <- 1\n    },\n    f_before_iter = NULL,\n    f_after_iter = function(env, model, data, evals, iteration, iter_feval) {\n      if (inherits(model, \"xgb.Booster\")) {\n        coef_this <- .extract.coef(model, env$sparse)\n      } else {\n        coef_this <- lapply(model, function(fd) .extract.coef(fd$bst, env$sparse))\n      }\n      env$coef_hist[[env$next_idx]] <- coef_this\n      env$next_idx <- env$next_idx + 1\n      return(FALSE)\n    },\n    f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {\n      # in case of early stopping\n      if (env$next_idx <= length(env$coef_hist)) {\n        env$coef_hist <- head(env$coef_hist, env$next_idx - 1)\n      }\n\n      is_booster <- inherits(model, \"xgb.Booster\")\n      if (is_booster) {\n        out <- .list2mat(env$coef_hist, env$sparse)\n      } else {\n        out <- lapply(\n          X = lapply(\n            X = seq_along(env$coef_hist[[1]]),\n            FUN = function(i) lapply(env$coef_hist, \"[[\", i)\n          ),\n          FUN = .list2mat,\n          env$sparse\n        )\n      }\n      if (!is.null(prev_cb_res)) {\n        if (is_booster) {\n          out <- rbind(prev_cb_res, out)\n        } else {\n          # Note: this case should never be encountered, since training cannot\n          # be continued from the result of xgb.cv, but this code should in\n          # theory do the job if the situation were to be encountered.\n          out <- lapply(\n            out,\n            function(lst) {\n              lapply(\n                seq_along(lst),\n                function(i) rbind(prev_cb_res[[i]], lst[[i]])\n              )\n            }\n          )\n        }\n      }\n      feature_names <- getinfo(data, \"feature_name\")\n      if (!NROW(feature_names)) {\n        feature_names <- paste0(\"V\", seq(1L, ncol(data)))\n      }\n      expected_ncols <- length(feature_names) + 1\n      if (is_booster) {\n        mat_ncols <- ncol(out)\n      } else {\n        mat_ncols <- ncol(out[[1L]])\n      }\n      if (mat_ncols %% expected_ncols == 0) {\n        feature_names <- c(\"(Intercept)\", feature_names)\n        n_rep <- mat_ncols / expected_ncols\n        if (n_rep > 1) {\n          feature_names <- unlist(\n            lapply(\n              seq(1, n_rep),\n              function(cl) paste(feature_names, cl - 1, sep = \":\")\n            )\n          )\n        }\n        if (is_booster) {\n          colnames(out) <- feature_names\n        } else {\n          out <- lapply(\n            out,\n            function(mat) {\n              colnames(mat) <- feature_names\n              return(mat)\n            }\n          )\n        }\n      }\n      return(out)\n    }\n  )\n}\n\n#' Extract gblinear coefficients history\n#'\n#' A helper function to extract the matrix of linear coefficients' history\n#' from a gblinear model created while using the [xgb.cb.gblinear.history]\n#' callback (which must be added manually as by default it is not used).\n#'\n#' @details\n#' Note that this is an R-specific function that relies on R attributes that\n#' are not saved when using XGBoost's own serialization functions like [xgb.load()]\n#' or [xgb.load.raw()].\n#'\n#' In order for a serialized model to be accepted by this function, one must use R\n#' serializers such as [saveRDS()].\n#' @param model Either an `xgb.Booster` or a result of [xgb.cv()], trained\n#'   using the [xgb.cb.gblinear.history] callback, but **not** a booster\n#'   loaded from [xgb.load()] or [xgb.load.raw()].\n#' @param class_index zero-based class index to extract the coefficients for only that\n#'   specific class in a multinomial multiclass model. When it is `NULL`, all the\n#'   coefficients are returned. Has no effect in non-multiclass models.\n#'\n#' @return\n#' For an [xgb.train()] result, a matrix (either dense or sparse) with the columns\n#' corresponding to iteration's coefficients and the rows corresponding to boosting iterations.\n#'\n#' For an [xgb.cv()] result, a list of such matrices is returned with the elements\n#' corresponding to CV folds.\n#'\n#' When there is more than one coefficient per feature (e.g. multi-class classification)\n#' and `class_index` is not provided,\n#' the result will be reshaped into a vector where coefficients are arranged first by features and\n#' then by class (e.g. first 1 through N coefficients will be for the first class, then\n#' coefficients N+1 through 2N for the second class, and so on).\n#' @seealso [xgb.cb.gblinear.history], [coef.xgb.Booster].\n#' @export\nxgb.gblinear.history <- function(model, class_index = NULL) {\n\n  if (!(inherits(model, \"xgb.Booster\") ||\n        inherits(model, \"xgb.cv.synchronous\")))\n    stop(\"model must be an object of either xgb.Booster or xgb.cv.synchronous class\")\n  is_cv <- inherits(model, \"xgb.cv.synchronous\")\n\n  if (!is_cv) {\n    coef_path <- getElement(attributes(model), \"gblinear_history\")\n  } else {\n    coef_path <- getElement(model, \"gblinear_history\")\n  }\n  if (is.null(coef_path)) {\n    stop(\"model must be trained while using the xgb.cb.gblinear.history() callback\")\n  }\n\n  if (!is_cv) {\n    num_class <- xgb.num_class(model)\n    num_feat <- xgb.num_feature(model)\n  } else {\n    # in case of CV, the object is expected to have this info\n    if (model$params$booster != \"gblinear\")\n      stop(\"It does not appear to be a gblinear model\")\n    num_class <- NVL(model$params$num_class, 1)\n    num_feat <- model$nfeatures\n    if (is.null(num_feat))\n      stop(\"This xgb.cv result does not have nfeatures info\")\n  }\n\n  if (!is.null(class_index) &&\n      num_class > 1 &&\n      (class_index[1] < 0 || class_index[1] >= num_class))\n    stop(\"class_index has to be within [0,\", num_class - 1, \"]\")\n\n  if (!is.null(class_index) && num_class > 1) {\n    seq_take <- seq(1 + class_index * (num_feat + 1), (class_index + 1) * (num_feat + 1))\n    coef_path <- if (is.list(coef_path)) {\n      lapply(coef_path, function(x) x[, seq_take])\n    } else {\n      coef_path <- coef_path[, seq_take]\n    }\n  }\n  return(coef_path)\n}\n\n.callbacks.only.train <- \"save_model\"\n.callbacks.only.cv <- \"cv_predict\"\n\n.process.callbacks <- function(callbacks, is_cv) {\n  if (inherits(callbacks, \"xgb.Callback\")) {\n    callbacks <- list(callbacks)\n  }\n  if (!is.list(callbacks)) {\n    stop(\"'callbacks' must be a list.\")\n  }\n  cb_names <- character()\n  if (length(callbacks)) {\n    is_callback <- sapply(callbacks, inherits, \"xgb.Callback\")\n    if (!all(is_callback)) {\n      stop(\"Entries in 'callbacks' must be 'xgb.Callback' objects.\")\n    }\n    cb_names <- sapply(callbacks, function(cb) cb$cb_name)\n    if (length(cb_names) != length(callbacks)) {\n      stop(\"Passed invalid callback(s).\")\n    }\n    if (anyDuplicated(cb_names) > 0) {\n      stop(\"Callbacks must have unique names.\")\n    }\n    if (is_cv) {\n      if (any(.callbacks.only.train %in% cb_names)) {\n        stop(\n          \"Passed callback(s) not supported for 'xgb.cv': \",\n          paste(intersect(.callbacks.only.train, cb_names), collapse = \", \")\n        )\n      }\n    } else {\n      if (any(.callbacks.only.cv %in% cb_names)) {\n        stop(\n          \"Passed callback(s) not supported for 'xgb.train': \",\n          paste(intersect(.callbacks.only.cv, cb_names), collapse = \", \")\n        )\n      }\n    }\n    # Early stopping callback needs to be executed before the others\n    if (\"early_stop\" %in% cb_names) {\n      mask <- cb_names == \"early_stop\"\n      callbacks <- c(list(callbacks[[which(mask)]]), callbacks[!mask])\n    }\n  }\n  return(list(callbacks = callbacks, cb_names = cb_names))\n}\n\n# Note: don't try to use functions like 'append', as they will\n# merge the elements of the different callbacks into a single list.\nadd.callback <- function(callbacks, cb, as_first_elt = FALSE) {\n  if (!as_first_elt) {\n    callbacks[[length(callbacks) + 1]] <- cb\n    return(callbacks)\n  } else {\n    if (!length(callbacks)) {\n      return(list(cb))\n    }\n    new_cb <- vector(\"list\", length(callbacks) + 1)\n    new_cb[[1]] <- cb\n    new_cb[seq(2, length(new_cb))] <- callbacks\n    return(new_cb)\n  }\n}\n\nhas.callbacks <- function(callbacks, cb_name) {\n  cb_names <- sapply(callbacks, function(cb) cb$name)\n  return(cb_name %in% cb_names)\n}\n"
  },
  {
    "path": "R-package/R/utils.R",
    "content": "#\n# This file is for the low level reusable utility functions\n# that are not supposed to be visible to a user.\n#\n\n#\n# General helper utilities ----------------------------------------------------\n#\n\n# SQL-style NVL shortcut.\nNVL <- function(x, val) {\n  if (is.null(x))\n    return(val)\n  if (is.vector(x)) {\n    x[is.na(x)] <- val\n    return(x)\n  }\n  if (typeof(x) == 'closure')\n    return(x)\n  stop(\"typeof(x) == \", typeof(x), \" is not supported by NVL\")\n}\n\n# List of classification and ranking objectives\n.CLASSIFICATION_OBJECTIVES <- function() {\n  return(c('binary:logistic', 'binary:logitraw', 'binary:hinge', 'multi:softmax',\n           'multi:softprob', 'rank:pairwise', 'rank:ndcg', 'rank:map'))\n}\n\n.RANKING_OBJECTIVES <- function() {\n  return(c('rank:pairwise', 'rank:ndcg', 'rank:map'))\n}\n\n.OBJECTIVES_NON_DEFAULT_MODE <- function() {\n  return(c(\"reg:logistic\", \"binary:logitraw\", \"multi:softmax\"))\n}\n\n.BINARY_CLASSIF_OBJECTIVES <- function() {\n  return(c(\"binary:logistic\", \"binary:hinge\"))\n}\n\n.MULTICLASS_CLASSIF_OBJECTIVES <- function() {\n  return(\"multi:softprob\")\n}\n\n.SURVIVAL_RIGHT_CENSORING_OBJECTIVES <- function() { # nolint\n  return(c(\"survival:cox\", \"survival:aft\"))\n}\n\n.SURVIVAL_ALL_CENSORING_OBJECTIVES <- function() { # nolint\n  return(\"survival:aft\")\n}\n\n.REGRESSION_OBJECTIVES <- function() {\n  return(c(\n    \"reg:squarederror\", \"reg:squaredlogerror\", \"reg:logistic\", \"reg:pseudohubererror\",\n    \"reg:absoluteerror\", \"reg:quantileerror\", \"reg:expectileerror\", \"count:poisson\",\n    \"reg:gamma\", \"reg:tweedie\"\n  ))\n}\n\n.MULTI_TARGET_OBJECTIVES <- function() {\n  return(c(\n    \"reg:squarederror\", \"reg:squaredlogerror\", \"reg:logistic\", \"reg:pseudohubererror\",\n    \"reg:quantileerror\", \"reg:gamma\"\n  ))\n}\n\n\n#\n# Low-level functions for boosting --------------------------------------------\n#\n\n# Merges booster params with whatever is provided in ...\n# plus runs some checks\ncheck.booster.params <- function(params) {\n  if (!identical(class(params), \"list\"))\n    stop(\"params must be a list\")\n\n  # in R interface, allow for '.' instead of '_' in parameter names\n  names(params) <- gsub(\".\", \"_\", names(params), fixed = TRUE)\n\n  # providing a parameter multiple times makes sense only for 'eval_metric'\n  name_freqs <- table(names(params))\n  multi_names <- setdiff(names(name_freqs[name_freqs > 1]), 'eval_metric')\n  if (length(multi_names) > 0) {\n    warning(\"The following parameters were provided multiple times:\\n\\t\",\n            paste(multi_names, collapse = ', '), \"\\n  Only the last value for each of them will be used.\\n\")\n    # While xgboost internals would choose the last value for a multiple-times parameter,\n    # enforce it here in R as well (b/c multi-parameters might be used further in R code,\n    # and R takes the 1st value when multiple elements with the same name are present in a list).\n    for (n in multi_names) {\n      del_idx <- which(n == names(params))\n      del_idx <- del_idx[-length(del_idx)]\n      params[[del_idx]] <- NULL\n    }\n  }\n\n  # for multiclass, expect num_class to be set\n  if (typeof(params[['objective']]) == \"character\" &&\n      startsWith(NVL(params[['objective']], 'x'), 'multi:') &&\n      as.numeric(NVL(params[['num_class']], 0)) < 2) {\n        stop(\"'num_class' > 1 parameter must be set for multiclass classification\")\n  }\n\n  # monotone_constraints parser\n  if (!is.null(params[['monotone_constraints']]) &&\n      typeof(params[['monotone_constraints']]) != \"character\") {\n        vec2str <- paste(params[['monotone_constraints']], collapse = ',')\n        vec2str <- paste0('(', vec2str, ')')\n        params[['monotone_constraints']] <- vec2str\n  }\n\n  # interaction constraints parser (convert from list of column indices to string)\n  if (!is.null(params[['interaction_constraints']]) &&\n      typeof(params[['interaction_constraints']]) != \"character\") {\n    # check input class\n    if (!identical(class(params[['interaction_constraints']]), 'list')) stop('interaction_constraints should be class list')\n    if (!all(unique(sapply(params[['interaction_constraints']], class)) %in% c('numeric', 'integer'))) {\n      stop('interaction_constraints should be a list of numeric/integer vectors')\n    }\n\n    # recast parameter as string\n    interaction_constraints <- sapply(params[['interaction_constraints']], function(x) paste0('[', paste(x, collapse = ','), ']'))\n    params[['interaction_constraints']] <- paste0('[', paste(interaction_constraints, collapse = ','), ']')\n  }\n\n  # for evaluation metrics, should generate multiple entries per metric\n  if (NROW(params[['eval_metric']]) > 1) {\n    eval_metrics <- as.list(params[[\"eval_metric\"]])\n    names(eval_metrics) <- rep(\"eval_metric\", length(eval_metrics))\n    params_without_ev_metrics <- within(params, rm(\"eval_metric\"))\n    params <- c(params_without_ev_metrics, eval_metrics)\n  }\n  return(params)\n}\n\n\n# Performs some checks related to custom objective function.\ncheck.custom.obj <- function(params, objective) {\n  if (!is.null(params[['objective']]) && !is.null(objective))\n    stop(\"Setting objectives in 'params' and 'objective' at the same time is not allowed\")\n\n  if (!is.null(objective) && typeof(objective) != 'closure') {\n    if (is.character(objective)) {\n      msg <- paste(\n        \"Argument 'objective' is only for custom objectives.\",\n        \"For built-in objectives, pass the objective under 'params'.\",\n        sep = \" \"\n      )\n      error_on_deprecated <- getOption(\"xgboost.strict_mode\", default = FALSE)\n      if (error_on_deprecated) {\n        stop(msg)\n      } else {\n        warning(msg, \" This warning will become an error in a future version.\")\n      }\n      params$objective <- objective\n      return(list(params = params, objective = NULL))\n    }\n    stop(\"'objective' must be a function\")\n  }\n\n  # handle the case when custom objective function was provided through params\n  if (!is.null(params[['objective']]) &&\n      typeof(params$objective) == 'closure') {\n    objective <- params$objective\n    params$objective <- NULL\n  }\n  return(list(params = params, objective = objective))\n}\n\n# Performs some checks related to custom evaluation function.\ncheck.custom.eval <- function(params, custom_metric, maximize, early_stopping_rounds, callbacks) {\n  if (!is.null(params[['eval_metric']]) && !is.null(custom_metric))\n    stop(\"Setting evaluation metrics in 'params' and 'custom_metric' at the same time is not allowed\")\n\n  if (!is.null(custom_metric) && typeof(custom_metric) != 'closure')\n    stop(\"'custom_metric' must be a function\")\n\n  # handle a situation when custom eval function was provided through params\n  if (!is.null(params[['eval_metric']]) &&\n      typeof(params$eval_metric) == 'closure') {\n    custom_metric <- params$eval_metric\n    params$eval_metric <- NULL\n  }\n\n  # require maximize to be set when custom metric and early stopping are used together\n  if (!is.null(custom_metric) &&\n      is.null(maximize) && (\n        !is.null(early_stopping_rounds) ||\n        has.callbacks(callbacks, \"early_stop\")))\n    stop(\"Please set 'maximize' to indicate whether the evaluation metric needs to be maximized or not\")\n\n  return(list(params = params, custom_metric = custom_metric))\n}\n\n\n# Update a booster handle for an iteration with dtrain data\nxgb.iter.update <- function(bst, dtrain, iter, objective) {\n  if (!inherits(dtrain, \"xgb.DMatrix\")) {\n    stop(\"dtrain must be of xgb.DMatrix class\")\n  }\n  handle <- xgb.get.handle(bst)\n\n  if (is.null(objective)) {\n    .Call(XGBoosterUpdateOneIter_R, handle, as.integer(iter), dtrain)\n  } else {\n    pred <- predict(\n      bst,\n      dtrain,\n      outputmargin = TRUE,\n      training = TRUE\n    )\n    gpair <- objective(pred, dtrain)\n    n_samples <- dim(dtrain)[1L]\n    grad <- gpair$grad\n    hess <- gpair$hess\n\n    if ((is.matrix(grad) && dim(grad)[1L] != n_samples) ||\n        (is.vector(grad) && length(grad) != n_samples) ||\n        (is.vector(grad) != is.vector(hess))) {\n      warning(paste(\n        \"Since 2.1.0, the shape of the gradient and hessian is required to be \",\n        \"(n_samples, n_targets) or (n_samples, n_classes). Will reshape assuming \",\n        \"column-major order.\",\n        sep = \"\"\n      ))\n      grad <- matrix(grad, nrow = n_samples)\n      hess <- matrix(hess, nrow = n_samples)\n    }\n\n    .Call(\n      XGBoosterTrainOneIter_R, handle, dtrain, iter, grad, hess\n    )\n  }\n  return(TRUE)\n}\n\n\n# Evaluate one iteration.\n# Returns a named vector of evaluation metrics\n# with the names in a 'datasetname-metricname' format.\nxgb.iter.eval <- function(bst, evals, iter, custom_metric) {\n  handle <- xgb.get.handle(bst)\n\n  if (length(evals) == 0)\n    return(NULL)\n\n  evnames <- names(evals)\n  if (is.null(custom_metric)) {\n    msg <- .Call(XGBoosterEvalOneIter_R, handle, as.integer(iter), evals, as.list(evnames))\n    mat <- matrix(strsplit(msg, '\\\\s+|:')[[1]][-1], nrow = 2)\n    res <- structure(as.numeric(mat[2, ]), names = mat[1, ])\n  } else {\n    res <- sapply(seq_along(evals), function(j) {\n      w <- evals[[j]]\n      ## predict using all trees\n      preds <- predict(bst, w, outputmargin = TRUE, iterationrange = \"all\")\n      eval_res <- custom_metric(preds, w)\n      out <- eval_res$value\n      names(out) <- paste0(evnames[j], \"-\", eval_res$metric)\n      out\n    })\n  }\n  return(res)\n}\n\n\n#\n# Helper functions for cross validation ---------------------------------------\n#\n\n# Possibly convert the labels into factors, depending on the objective.\n# The labels are converted into factors only when the given objective refers to the classification\n# or ranking tasks.\nconvert.labels <- function(labels, objective_name) {\n  if (objective_name %in% .CLASSIFICATION_OBJECTIVES()) {\n    return(as.factor(labels))\n  } else {\n    return(labels)\n  }\n}\n\n# Generates random (stratified if needed) CV folds\ngenerate.cv.folds <- function(nfold, nrows, stratified, label, group, params) {\n  if (NROW(group)) {\n    if (stratified) {\n      warning(\n        paste0(\n          \"Stratified splitting is not supported when using 'group' attribute.\",\n          \" Will use unstratified splitting.\"\n        )\n      )\n    }\n    return(generate.group.folds(nfold, group))\n  }\n  objective <- params$objective\n  if (stratified && !is.character(objective)) {\n    warning(\"Will use unstratified splitting (custom objective used)\")\n    stratified <- FALSE\n  }\n  # cannot stratify if label is NULL\n  if (stratified && is.null(label)) {\n    warning(\"Will use unstratified splitting (no 'labels' available)\")\n    stratified <- FALSE\n  }\n\n  # cannot do it for rank\n  if (is.character(objective) && strtrim(objective, 5) == 'rank:') {\n    stop(\"\\n\\tAutomatic generation of CV-folds is not implemented for ranking without 'group' field!\\n\",\n         \"\\tConsider providing pre-computed CV-folds through the 'folds=' parameter.\\n\")\n  }\n  # shuffle\n  rnd_idx <- sample.int(nrows)\n  if (stratified && length(label) == length(rnd_idx)) {\n    y <- label[rnd_idx]\n    #  - For classification, need to convert y labels to factor before making the folds,\n    #    and then do stratification by factor levels.\n    #  - For regression, leave y numeric and do stratification by quantiles.\n    if (is.character(objective)) {\n      y <- convert.labels(y, objective)\n    }\n    folds <- xgb.createFolds(y = y, k = nfold)\n  } else {\n    # make simple non-stratified folds\n    kstep <- length(rnd_idx) %/% nfold\n    folds <- list()\n    for (i in seq_len(nfold - 1)) {\n      folds[[i]] <- rnd_idx[seq_len(kstep)]\n      rnd_idx <- rnd_idx[-seq_len(kstep)]\n    }\n    folds[[nfold]] <- rnd_idx\n  }\n  return(folds)\n}\n\ngenerate.group.folds <- function(nfold, group) {\n  ngroups <- length(group) - 1\n  if (ngroups < nfold) {\n    stop(\"DMatrix has fewer groups than folds.\")\n  }\n  seq_groups <- seq_len(ngroups)\n  indices <- lapply(seq_groups, function(gr) seq(group[gr] + 1, group[gr + 1]))\n  assignments <- base::split(seq_groups, as.integer(seq_groups %% nfold))\n  assignments <- unname(assignments)\n\n  out <- vector(\"list\", nfold)\n  randomized_groups <- sample(ngroups)\n  for (idx in seq_len(nfold)) {\n    groups_idx_test <- randomized_groups[assignments[[idx]]]\n    groups_test <- indices[groups_idx_test]\n    idx_test <- unlist(groups_test)\n    attributes(idx_test)$group_test <- lengths(groups_test)\n    attributes(idx_test)$group_train <- lengths(indices[-groups_idx_test])\n    out[[idx]] <- idx_test\n  }\n  return(out)\n}\n\n# Creates CV folds stratified by the values of y.\n# It was borrowed from caret::createFolds and simplified\n# by always returning an unnamed list of fold indices.\nxgb.createFolds <- function(y, k) {\n  if (is.numeric(y)) {\n    ## Group the numeric data based on their magnitudes\n    ## and sample within those groups.\n\n    ## When the number of samples is low, we may have\n    ## issues further slicing the numeric data into\n    ## groups. The number of groups will depend on the\n    ## ratio of the number of folds to the sample size.\n    ## At most, we will use quantiles. If the sample\n    ## is too small, we just do regular unstratified\n    ## CV\n    cuts <- floor(length(y) / k)\n    if (cuts < 2) cuts <- 2\n    if (cuts > 5) cuts <- 5\n    y <- cut(y,\n             unique(stats::quantile(y, probs = seq(0, 1, length = cuts))),\n             include.lowest = TRUE)\n  }\n\n  if (k < length(y)) {\n    ## reset levels so that the possible levels and\n    ## the levels in the vector are the same\n    y <- factor(as.character(y))\n    numInClass <- table(y)\n    foldVector <- vector(mode = \"integer\", length(y))\n\n    ## For each class, balance the fold allocation as far\n    ## as possible, then resample the remainder.\n    ## The final assignment of folds is also randomized.\n    for (i in seq_along(numInClass)) {\n      ## create a vector of integers from 1:k as many times as possible without\n      ## going over the number of samples in the class. Note that if the number\n      ## of samples in a class is less than k, nothing is produced here.\n      seqVector <- rep(seq_len(k), numInClass[i] %/% k)\n      ## add enough random integers to get  length(seqVector) == numInClass[i]\n      if (numInClass[i] %% k > 0) seqVector <- c(seqVector, sample.int(k, numInClass[i] %% k))\n      ## shuffle the integers for fold assignment and assign to this classes's data\n      ## seqVector[sample.int(length(seqVector))] is used to handle length(seqVector) == 1\n      foldVector[y == dimnames(numInClass)$y[i]] <- seqVector[sample.int(length(seqVector))]\n    }\n  } else {\n    foldVector <- seq(along = y)\n  }\n\n  out <- split(seq(along = y), foldVector)\n  names(out) <- NULL\n  out\n}\n\n#' Model Serialization and Compatibility\n#'\n#' @description\n#' When it comes to serializing XGBoost models, it's possible to use R serializers such as\n#' [save()] or [saveRDS()] to serialize an XGBoost model object, but XGBoost also provides\n#' its own serializers with better compatibility guarantees, which allow loading\n#' said models in other language bindings of XGBoost.\n#'\n#' Note that an `xgb.Booster` object (**as produced by [xgb.train()]**, see rest of the doc\n#' for objects produced by [xgboost()]), outside of its core components, might also keep:\n#' - Additional model configuration (accessible through [xgb.config()]), which includes\n#'   model fitting parameters like `max_depth` and runtime parameters like `nthread`.\n#'   These are not necessarily useful for prediction/importance/plotting.\n#' - Additional R specific attributes  - e.g. results of callbacks, such as evaluation logs,\n#'   which are kept as a `data.table` object, accessible through\n#'   `attributes(model)$evaluation_log` if present.\n#'\n#' The first one (configurations) does not have the same compatibility guarantees as\n#' the model itself, including attributes that are set and accessed through\n#' [xgb.attributes()] - that is, such configuration might be lost after loading the\n#' booster in a different XGBoost version, regardless of the serializer that was used.\n#' These are saved when using [saveRDS()], but will be discarded if loaded into an\n#' incompatible XGBoost version. They are not saved when using XGBoost's\n#' serializers from its public interface including [xgb.save()] and [xgb.save.raw()].\n#'\n#' The second ones (R attributes) are not part of the standard XGBoost model structure,\n#' and thus are not saved when using XGBoost's own serializers. These attributes are\n#' only used for informational purposes, such as keeping track of evaluation metrics as\n#' the model was fit, or saving the R call that produced the model, but are otherwise\n#' not used for prediction / importance / plotting / etc.\n#' These R attributes are only preserved when using R's serializers.\n#'\n#' In addition to the regular `xgb.Booster` objects produced by [xgb.train()], the\n#' function [xgboost()] produces objects with a different subclass `xgboost` (which\n#' inherits from `xgb.Booster`), which keeps other additional metadata as R attributes\n#' such as class names in classification problems, and which has a dedicated `predict`\n#' method that uses different defaults and takes different argument names. XGBoost's\n#' own serializers can work with this `xgboost` class, but as they do not keep R\n#' attributes, the resulting object, when deserialized, is downcasted to the regular\n#' `xgb.Booster` class (i.e. it loses the metadata, and the resulting object will use\n#' [predict.xgb.Booster()] instead of [predict.xgboost()]) - for these `xgboost` objects,\n#' `saveRDS` might thus be a better option if the extra functionalities are needed.\n#'\n#' Note that XGBoost models in R starting from version `2.1.0` and onwards, and\n#' XGBoost models before version `2.1.0`; have a very different R object structure and\n#' are incompatible with each other. Hence, models that were saved with R serializers\n#' like [saveRDS()] or [save()] before version `2.1.0` will not work with latter\n#' `xgboost` versions and vice versa. Be aware that the structure of R model objects\n#' could in theory change again in the future, so XGBoost's serializers should be\n#' preferred for long-term storage.\n#'\n#' Furthermore, note that model objects from XGBoost might not be serializable with third-party\n#' R packages like `qs` or `qs2`.\n#'\n#' @details\n#' Use [xgb.save()] to save the XGBoost model as a stand-alone file. You may opt into\n#' the JSON format by specifying the JSON extension. To read the model back, use\n#' [xgb.load()].\n#'\n#' Use [xgb.save.raw()] to save the XGBoost model as a sequence (vector) of raw bytes\n#' in a future-proof manner. Future releases of XGBoost will be able to read the raw bytes and\n#' re-construct the corresponding model. To read the model back, use [xgb.load.raw()].\n#' The [xgb.save.raw()] function is useful if you would like to persist the XGBoost model\n#' as part of another R object.\n#'\n#' Use [saveRDS()] if you require the R-specific attributes that a booster might have, such\n#' as evaluation logs or the model class `xgboost` instead of `xgb.Booster`, but note that\n#' future compatibility of such objects is outside XGBoost's control as it relies on R's\n#' serialization format (see e.g. the details section in [serialize] and [save()] from base R).\n#'\n#' For more details and explanation about model persistence and archival, consult the page\n#' \\url{https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html}.\n#'\n#' @examples\n#' \\dontshow{RhpcBLASctl::omp_set_num_threads(1)}\n#' data(agaricus.train, package = \"xgboost\")\n#'\n#' bst <- xgb.train(\n#'   data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label, nthread = 1),\n#'   nrounds = 2,\n#'   params = xgb.params(\n#'     max_depth = 2,\n#'     nthread = 2,\n#'     objective = \"binary:logistic\"\n#'   )\n#' )\n#'\n#' # Save as a stand-alone file; load it with xgb.load()\n#' fname <- file.path(tempdir(), \"xgb_model.ubj\")\n#' xgb.save(bst, fname)\n#' bst2 <- xgb.load(fname)\n#'\n#' # Save as a stand-alone file (JSON); load it with xgb.load()\n#' fname <- file.path(tempdir(), \"xgb_model.json\")\n#' xgb.save(bst, fname)\n#' bst2 <- xgb.load(fname)\n#'\n#' # Save as a raw byte vector; load it with xgb.load.raw()\n#' xgb_bytes <- xgb.save.raw(bst)\n#' bst2 <- xgb.load.raw(xgb_bytes)\n#'\n#' # Persist XGBoost model as part of another R object\n#' obj <- list(xgb_model_bytes = xgb.save.raw(bst), description = \"My first XGBoost model\")\n#' # Persist the R object. Here, saveRDS() is okay, since it doesn't persist\n#' # xgb.Booster directly. What's being persisted is the future-proof byte representation\n#' # as given by xgb.save.raw().\n#' fname <- file.path(tempdir(), \"my_object.Rds\")\n#' saveRDS(obj, fname)\n#' # Read back the R object\n#' obj2 <- readRDS(fname)\n#' # Re-construct xgb.Booster object from the bytes\n#' bst2 <- xgb.load.raw(obj2$xgb_model_bytes)\n#'\n#' @name a-compatibility-note-for-saveRDS-save\nNULL\n\n#' @name xgboost-options\n#' @title XGBoost Options\n#' @description XGBoost offers an \\link[base:options]{option setting} for controlling the behavior\n#' of deprecated and removed function arguments.\n#'\n#' Some of the arguments in functions like [xgb.train()] or [predict.xgb.Booster()] been renamed\n#' from how they were in previous versions, or have been removed.\n#'\n#' In order to make the transition to newer XGBoost versions easier, some of these parameters are\n#' still accepted but issue a warning when using them. \\bold{Note that these warnings will become\n#' errors in the future!!} - this is just a temporary workaround to make the transition easier.\n#'\n#' One can optionally use 'strict mode' to turn these warnings into errors, in order to ensure\n#' that code calling xgboost will still work once those are removed in future releases.\n#'\n#' Currently, the only supported option is `xgboost.strict_mode`, which can be set to `TRUE` or\n#' `FALSE` (default).\n#'\n#' In addition to an R option, it can also be enabled through by setting environment variable\n#' `XGB_STRICT_MODE=1`. If set, this environment variable will take precedence over the option.\n#' @examples\n#' options(\"xgboost.strict_mode\" = FALSE)\n#' options(\"xgboost.strict_mode\" = TRUE)\n#' Sys.setenv(\"XGB_STRICT_MODE\" = \"1\")\n#' Sys.setenv(\"XGB_STRICT_MODE\" = \"0\")\nNULL\n\nget.strict.mode.option <- function() {\n  env_var_option <- Sys.getenv(\"XGB_STRICT_MODE\")\n  if (!nchar(env_var_option)) {\n    return(getOption(\"xgboost.strict_mode\", default = FALSE))\n  }\n  return(tolower(as.character(env_var_option)) %in% c(\"1\", \"true\", \"t\", \"yes\", \"y\"))\n}\n\n# Lookup table for the deprecated parameters bookkeeping\ndeprecated_train_params <- list(\n  renamed = list(\n    'print.every.n' = 'print_every_n',\n    'early.stop.round' = 'early_stopping_rounds',\n    'training.data' = 'data',\n    'dtrain' = 'data',\n    'watchlist' = 'evals',\n    'feval' = 'custom_metric'\n  ),\n  removed = character()\n)\ndeprecated_cv_params <- deprecated_train_params\ndeprecated_cv_params$removed <- 'label'\ndeprecated_xgboost_params <- list(\n  renamed = list(\n    'data' = 'x',\n    'label' = 'y',\n    'eta' = 'learning_rate',\n    'gamma' = 'min_split_loss',\n    'lambda' = 'reg_lambda',\n    'alpha' = 'reg_alpha',\n    'min.split.loss' = 'min_split_loss',\n    'reg.lambda' = 'reg_lambda',\n    'reg.alpha' = 'reg_alpha',\n    'watchlist' = 'evals'\n  ),\n  removed = c(\n    'params',\n    'save_period',\n    'save_name',\n    'xgb_model',\n    'callbacks',\n    'missing',\n    'maximize'\n  )\n)\ndeprecated_dttree_params <- list(\n  renamed = list('n_first_tree' = 'trees'),\n  removed = c(\"feature_names\", \"text\")\n)\ndeprecated_plotimp_params <- list(\n  renamed = list(\n    'plot.height' = 'plot_height',\n    'plot.width' = 'plot_width'\n  ),\n  removed = character()\n)\ndeprecated_multitrees_params <- list(\n  renamed = c(\n    deprecated_plotimp_params$renamed,\n    list('features.keep' = 'features_keep')\n  ),\n  removed = \"feature_names\"\n)\ndeprecated_dump_params <- list(\n  renamed = list('with.stats' = 'with_stats'),\n  removed = character()\n)\ndeprecated_plottree_params <- c(\n  renamed = list(\n    deprecated_plotimp_params$renamed,\n    deprecated_dump_params$renamed,\n    list('trees' = 'tree_idx')\n  ),\n  removed = c(\"show_node_id\", \"feature_names\")\n)\ndeprecated_predict_params <- list(\n  renamed = list(\"ntreelimit\" = \"iterationrange\"),\n  removed = \"reshape\"\n)\ndeprecated_dmatrix_params <- list(\n  renamed = character(),\n  removed = \"info\"\n)\n\n# These got moved from 'info' to function arguments\nargs_previous_dmatrix_info <- c(\"label\", \"weight\", \"base_margin\", \"group\")\n\n# Checks the dot-parameters for deprecated names\n# (including partial matching), gives a deprecation warning,\n# and sets new parameters to the old parameters' values within its parent frame.\n# WARNING: has side-effects\ncheck.deprecation <- function(\n  deprecated_list,\n  fn_call,\n  ...,\n  env = parent.frame(),\n  allow_unrecognized = FALSE\n) {\n  params <- list(...)\n  if (length(params) == 0) {\n    return(NULL)\n  }\n  error_on_deprecated <- get.strict.mode.option()\n  throw_err_or_depr_msg <- function(...) {\n    if (error_on_deprecated) {\n      stop(...)\n    } else {\n      warning(..., \" This warning will become an error in a future version.\")\n    }\n  }\n\n  if (is.null(names(params)) || min(nchar(names(params))) == 0L) {\n    throw_err_or_depr_msg(\"Passed invalid positional arguments\")\n  }\n  list_renamed <- deprecated_list$renamed\n  list_removed <- deprecated_list$removed\n  has_params_arg <-\n    length(list_renamed) == length(deprecated_train_params$renamed) &&\n    list_renamed[[1L]] == deprecated_train_params$renamed[[1L]]\n  is_dmatrix_constructor <-\n    length(list_removed) == length(deprecated_dmatrix_params$removed) &&\n    list_removed[[1L]] == deprecated_dmatrix_params$removed[[1L]]\n  all_match <- pmatch(names(params), names(list_renamed))\n  # throw error on unrecognized parameters\n  if (!allow_unrecognized && anyNA(all_match)) {\n\n    names_unrecognized <- names(params)[is.na(all_match)]\n    # make it informative if they match something that goes under 'params'\n    if (has_params_arg) {\n      names_params <- formalArgs(xgb.params)\n      names_params <- c(names_params, gsub(\"_\", \".\", names_params, fixed = TRUE))\n      names_under_params <- intersect(names_unrecognized, names_params)\n      if (length(names_under_params)) {\n        if (error_on_deprecated) {\n          stop(\n            \"Passed invalid function arguments: \",\n            paste(head(names_under_params), collapse = \", \"),\n            \". These should be passed as a list to argument 'params'.\"\n          )\n        } else {\n          warning(\n            \"Passed invalid function arguments: \",\n            paste(head(names_under_params), collapse = \", \"),\n            \". These should be passed as a list to argument 'params'.\",\n            \" Conversion from argument to 'params' entry will be done automatically, but this \",\n            \"behavior will become an error in a future version.\"\n          )\n          if (any(names_under_params %in% names(env[[\"params\"]]))) {\n            repeteated_params <- intersect(names_under_params, names(env[[\"params\"]]))\n            stop(\n              \"Passed entries as both function argument(s) and as elements under 'params': \",\n              paste(head(repeteated_params), collapse = \", \")\n            )\n          } else {\n            env[[\"params\"]] <- c(env[[\"params\"]], params[names_under_params])\n          }\n        }\n        names_unrecognized <- setdiff(names_unrecognized, names_under_params)\n      }\n    } else if (is_dmatrix_constructor && NROW(params$info)) {\n      # same thing for the earlier 'info' in 'xgb.DMatrix'\n      throw_err_or_depr_msg(\n        \"Passed invalid argument 'info' - entries on it should be passed as direct arguments.\"\n      )\n      entries_info <- names(params$info)\n      if (length(setdiff(entries_info, args_previous_dmatrix_info))) {\n        stop(\n          \"Passed unrecognized entries under info: \",\n          paste(setdiff(entries_info, args_previous_dmatrix_info) |> head(), collapse = \", \")\n        )\n      }\n      for (entry_name in entries_info) {\n        if (!is.null(env[[entry_name]])) {\n          stop(\"Passed entry under both 'info' and function argument(s): \", entry_name)\n        }\n        env[[entry_name]] <- params$info[[entry_name]]\n      }\n      names_unrecognized <- setdiff(names_unrecognized, \"info\")\n    }\n\n    # check for parameters that were removed from a previous version\n    names_removed <- intersect(names_unrecognized, list_removed)\n    if (length(names_removed)) {\n      throw_err_or_depr_msg(\n        \"Parameter(s) have been removed from this function: \",\n        paste(names_removed, collapse = \", \"), \".\"\n      )\n      names_unrecognized <- setdiff(names_unrecognized, list_removed)\n    }\n\n    # otherwise throw a generic error\n    if (length(names_unrecognized)) {\n      throw_err_or_depr_msg(\n        \"Passed unrecognized parameters: \",\n        paste(head(names_unrecognized), collapse = \", \"), \".\"\n      )\n    }\n\n  } else {\n\n    names_removed <- intersect(names(params)[is.na(all_match)], list_removed)\n    if (length(names_removed)) {\n      throw_err_or_depr_msg(\n        \"Parameter(s) have been removed from this function: \",\n        paste(names_removed, collapse = \", \"), \".\"\n      )\n    }\n\n  }\n\n  matched_params <- list_renamed[all_match[!is.na(all_match)]]\n  idx_orig <- seq_along(params)[!is.na(all_match)]\n  function_args_passed <- names(as.list(fn_call))[-1L]\n  for (idx in seq_along(matched_params)) {\n    match_old <- names(matched_params)[[idx]]\n    match_new <- matched_params[[idx]]\n    throw_err_or_depr_msg(\n      \"Parameter '\", match_old, \"' has been renamed to '\",\n      match_new, \"'.\"\n    )\n    if (match_new %in% function_args_passed) {\n      stop(\"Passed both '\", match_new, \"' and '\", match_old, \"'.\")\n    }\n    env[[match_new]] <- params[[idx_orig[idx]]]\n  }\n}\n"
  },
  {
    "path": "R-package/R/xgb.Booster.R",
    "content": "# Construct an internal XGBoost Booster and get its current number of rounds.\n# internal utility function\n# Note: the number of rounds in the C booster gets reset to zero when changing\n# key booster parameters like 'process_type=update', but in some cases, when\n# replacing previous iterations, it needs to make a check that the new number\n# of iterations doesn't exceed the previous ones, hence it keeps track of the\n# current number of iterations before resetting the parameters in order to\n# perform the check later on.\nxgb.Booster <- function(params, cachelist, modelfile) {\n  if (typeof(cachelist) != \"list\" ||\n      !all(vapply(cachelist, inherits, logical(1), what = 'xgb.DMatrix'))) {\n    stop(\"cachelist must be a list of xgb.DMatrix objects\")\n  }\n  ## Load existing model, dispatch for on disk model file and in memory buffer\n  if (!is.null(modelfile)) {\n    if (is.character(modelfile)) {\n      ## A filename\n      bst <- .Call(XGBoosterCreate_R, cachelist)\n      modelfile <- path.expand(modelfile)\n      .Call(XGBoosterLoadModel_R, xgb.get.handle(bst), enc2utf8(modelfile[1]))\n      niter <- xgb.get.num.boosted.rounds(bst)\n      if (length(params) > 0) {\n        xgb.model.parameters(bst) <- params\n      }\n      return(list(bst = bst, niter = niter))\n    } else if (is.raw(modelfile)) {\n      ## A memory buffer\n      bst <- xgb.load.raw(modelfile)\n      niter <- xgb.get.num.boosted.rounds(bst)\n      xgb.model.parameters(bst) <- params\n      return(list(bst = bst, niter = niter))\n    } else if (inherits(modelfile, \"xgb.Booster\")) {\n      ## A booster object\n      bst <- .Call(XGDuplicate_R, modelfile)\n      niter <- xgb.get.num.boosted.rounds(bst)\n      xgb.model.parameters(bst) <- params\n      return(list(bst = bst, niter = niter))\n    } else {\n      stop(\"modelfile must be either character filename, or raw booster dump, or xgb.Booster object\")\n    }\n  }\n  ## Create new model\n  bst <- .Call(XGBoosterCreate_R, cachelist)\n  if (length(params) > 0) {\n    xgb.model.parameters(bst) <- params\n  }\n  return(list(bst = bst, niter = 0L))\n}\n\n# Check whether xgb.Booster handle is null\n# internal utility function\nis.null.handle <- function(handle) {\n  if (is.null(handle)) return(TRUE)\n\n  if (!inherits(handle, \"externalptr\"))\n    stop(\"argument type must be 'externalptr'\")\n\n  return(.Call(XGCheckNullPtr_R, handle))\n}\n\n# Return a verified to be valid handle out of xgb.Booster\n# internal utility function\nxgb.get.handle <- function(object) {\n  if (inherits(object, \"xgb.Booster\")) {\n    handle <- object$ptr\n    if (is.null(handle) || !inherits(handle, \"externalptr\")) {\n      stop(\"'xgb.Booster' object is corrupted or is from an incompatible XGBoost version.\")\n    }\n  } else {\n    stop(\"argument must be an 'xgb.Booster' object.\")\n  }\n  if (is.null.handle(handle)) {\n    stop(\"invalid 'xgb.Booster' (blank 'externalptr').\")\n  }\n  return(handle)\n}\n\n#' Predict method for XGBoost model\n#'\n#' Predict values on data based on XGBoost model.\n#'\n#' @param object Object of class `xgb.Booster`.\n#' @param newdata Takes `data.frame`, `matrix`, `dgCMatrix`, `dgRMatrix`, `dsparseVector`,\n#'   local data file, or `xgb.DMatrix`.\n#'\n#'   For single-row predictions on sparse data, it is recommended to use CSR format. If passing\n#'   a sparse vector, it will take it as a row vector.\n#'\n#'   Note that, for repeated predictions on the same data, one might want to create a DMatrix to\n#'   pass here instead of passing R types like matrices or data frames, as predictions will be\n#'   faster on DMatrix.\n#'\n#'   If `newdata` is a `data.frame`, be aware that:\n#'   - Columns will be converted to numeric if they aren't already, which could potentially make\n#'     the operation slower than in an equivalent `matrix` object.\n#'   - The order of the columns must match with that of the data from which the model was fitted\n#'     (i.e. columns will not be referenced by their names, just by their order in the data),\n#'     unless passing `validate_features = TRUE` (which is not the default).\n#'   - If the model was fitted to data with categorical columns, these columns must be of\n#'     `factor` type here, and must use the same encoding (i.e. have the same levels).\n#'   - If `newdata` contains any `factor` columns, they will be converted to base-0\n#'     encoding (same as during DMatrix creation) - hence, one should not pass a `factor`\n#'     under a column which during training had a different type.\n#'   - Any columns with type other than `factor` will be interpreted as numeric.\n#' @param missing Float value that represents missing values in data\n#'   (e.g., 0 or some other extreme value).\n#'\n#'   This parameter is not used when `newdata` is an `xgb.DMatrix` - in such cases,\n#'   should pass this as an argument to the DMatrix constructor instead.\n#' @param outputmargin Whether the prediction should be returned in the form of\n#'   original untransformed sum of predictions from boosting iterations' results.\n#'   E.g., setting `outputmargin = TRUE` for logistic regression would return log-odds\n#'   instead of probabilities.\n#' @param predleaf Whether to predict per-tree leaf indices.\n#' @param predcontrib Whether to return feature contributions to individual predictions (see Details).\n#' @param approxcontrib Whether to use a fast approximation for feature contributions (see Details).\n#' @param predinteraction Whether to return contributions of feature interactions to individual predictions (see Details).\n#' @param training Whether the prediction result is used for training. When enabled,\n#'   XGBoost uses the training prediction path instead of inplace prediction.\n#' @param iterationrange Sequence of rounds/iterations from the model to use for prediction, specified by passing\n#'   a two-dimensional vector with the start and end numbers in the sequence (same format as R's `seq` - i.e.\n#'   base-1 indexing, and inclusive of both ends).\n#'\n#'   For example, passing `c(1,20)` will predict using the first twenty iterations, while passing `c(1,1)` will\n#'   predict using only the first one.\n#'\n#'   If passing `NULL`, will either stop at the best iteration if the model used early stopping, or use all\n#'   of the iterations (rounds) otherwise.\n#'\n#'   If passing \"all\", will use all of the rounds regardless of whether the model had early stopping or not.\n#'\n#'   Not applicable to `gblinear` booster.\n#' @param strict_shape Whether to always return an array with the same dimensions for the given prediction mode\n#'   regardless of the model type - meaning that, for example, both a multi-class and a binary classification\n#'   model would generate output arrays with the same number of dimensions, with the 'class' dimension having\n#'   size equal to '1' for the binary model.\n#'\n#'   If passing `FALSE` (the default), dimensions will be simplified according to the model type, so that a\n#'   binary classification model for example would not have a redundant dimension for 'class'.\n#'\n#'   See documentation for the return type for the exact shape of the output arrays for each prediction mode.\n#' @param avoid_transpose Whether to output the resulting predictions in the same memory layout in which they\n#'   are generated by the core XGBoost library, without transposing them to match the expected output shape.\n#'\n#'   Internally, XGBoost uses row-major order for the predictions it generates, while R arrays use column-major\n#'   order, hence the result needs to be transposed in order to have the expected shape when represented as\n#'   an R array or matrix, which might be a slow operation.\n#'\n#'   If passing `TRUE`, then the result will have dimensions in reverse order - for example, rows\n#'   will be the last dimensions instead of the first dimension.\n#' @param base_margin Base margin used for boosting from existing model (raw score that gets added to\n#'   all observations independently of the trees in the model).\n#'\n#'   If supplied, should be either a vector with length equal to the number of rows in `newdata`\n#'   (for objectives which produces a single score per observation), or a matrix with number of\n#'   rows matching to the number rows in `newdata` and number of columns matching to the number\n#'   of scores estimated by the model (e.g. number of classes for multi-class classification).\n#'\n#'   Note that, if `newdata` is an `xgb.DMatrix` object, this argument will\n#'   be ignored as it needs to be added to the DMatrix instead (e.g. by passing it as\n#'   an argument in its constructor, or by calling [setinfo.xgb.DMatrix()].\n#' @param validate_features When `TRUE`, validate that the Booster's and newdata's\n#'   feature_names match (only applicable when both `object` and `newdata` have feature names).\n#'\n#'   If the column names differ and `newdata` is not an `xgb.DMatrix`, will try to reorder\n#'   the columns in `newdata` to match with the booster's.\n#'\n#'   If the booster has feature types and `newdata` is either an `xgb.DMatrix` or\n#'   `data.frame`, will additionally verify that categorical columns are of the\n#'   correct type in `newdata`, throwing an error if they do not match.\n#'\n#'   If passing `FALSE`, it is assumed that the feature names and types are the same,\n#'   and come in the same order as in the training data.\n#'\n#'   Note that this check might add some sizable latency to the predictions, so it's\n#'   recommended to disable it for performance-sensitive applications.\n#' @param ... Not used.\n#'\n#' @details\n#' Note that `iterationrange` would currently do nothing for predictions from \"gblinear\",\n#' since \"gblinear\" doesn't keep its boosting history.\n#'\n#' One possible practical applications of the `predleaf` option is to use the model\n#' as a generator of new features which capture non-linearity and interactions,\n#' e.g., as implemented in [xgb.create.features()].\n#'\n#' Setting `predcontrib = TRUE` allows to calculate contributions of each feature to\n#' individual predictions. For \"gblinear\" booster, feature contributions are simply linear terms\n#' (feature_beta * feature_value). For \"gbtree\" booster, feature contributions are SHAP\n#' values (Lundberg 2017) that sum to the difference between the expected output\n#' of the model and the current prediction (where the hessian weights are used to compute the expectations).\n#' Setting `approxcontrib = TRUE` approximates these values following the idea explained\n#' in \\url{http://blog.datadive.net/interpreting-random-forests/}.\n#'\n#' With `predinteraction = TRUE`, SHAP values of contributions of interaction of each pair of features\n#' are computed. Note that this operation might be rather expensive in terms of compute and memory.\n#' Since it quadratically depends on the number of features, it is recommended to perform selection\n#' of the most important features first. See below about the format of the returned results.\n#'\n#' The `predict()` method uses as many threads as defined in `xgb.Booster` object (all by default).\n#' If you want to change their number, assign a new number to `nthread` using [xgb.model.parameters<-()].\n#' Note that converting a matrix to [xgb.DMatrix()] uses multiple threads too.\n#'\n#' @return\n#' A numeric vector or array, with corresponding dimensions depending on the prediction mode and on\n#' parameter `strict_shape` as follows:\n#'\n#' If passing `strict_shape=FALSE`:\\itemize{\n#' \\item For regression or binary classification: a vector of length `nrows`.\n#' \\item For multi-class and multi-target objectives: a matrix of dimensions `[nrows, ngroups]`.\n#'\n#' Note that objective variant `multi:softmax` defaults towards predicting most likely class (a vector\n#' `nrows`) instead of per-class probabilities.\n#' \\item For `predleaf`: a matrix with one column per tree.\n#'\n#' For multi-class / multi-target, they will be arranged so that columns in the output will have\n#' the leafs from one group followed by leafs of the other group (e.g. order will be `group1:feat1`,\n#' `group1:feat2`, ..., `group2:feat1`, `group2:feat2`, ...).\n#'\n#' If there is more than one parallel tree (e.g. random forests), the parallel trees will be the\n#' last grouping in the resulting order, which will still be 2D.\n#' \\item For `predcontrib`: when not multi-class / multi-target, a matrix with dimensions\n#' `[nrows, nfeats+1]`. The last \"+ 1\" column corresponds to the baseline value.\n#'\n#' For multi-class and multi-target objectives, will be an array with dimensions `[nrows, ngroups, nfeats+1]`.\n#'\n#' The contribution values are on the scale of untransformed margin (e.g., for binary classification,\n#' the values are log-odds deviations from the baseline).\n#' \\item For `predinteraction`: when not multi-class / multi-target, the output is a 3D array of\n#' dimensions `[nrows, nfeats+1, nfeats+1]`. The off-diagonal (in the last two dimensions)\n#' elements represent different feature interaction contributions. The array is symmetric w.r.t. the last\n#' two dimensions. The \"+ 1\" columns corresponds to the baselines. Summing this array along the last\n#' dimension should produce practically the same result as `predcontrib = TRUE`.\n#'\n#' For multi-class and multi-target, will be a 4D array with dimensions `[nrows, ngroups, nfeats+1, nfeats+1]`\n#' }\n#'\n#' If passing `strict_shape=TRUE`, the result is always a matrix (if 2D) or array (if 3D or higher):\n#' - For normal predictions, the dimension is `[nrows, ngroups]`.\n#' - For `predcontrib=TRUE`, the dimension is `[nrows, ngroups, nfeats+1]`.\n#' - For `predinteraction=TRUE`, the dimension is `[nrows, ngroups, nfeats+1, nfeats+1]`.\n#' - For `predleaf=TRUE`, the dimension is `[nrows, niter, ngroups, num_parallel_tree]`.\n#'\n#' If passing `avoid_transpose=TRUE`, then the dimensions in all cases will be in reverse order - for\n#' example, for `predinteraction`, they will be `[nfeats+1, nfeats+1, ngroups, nrows]`\n#' instead of `[nrows, ngroups, nfeats+1, nfeats+1]`.\n#' @seealso [xgb.train()]\n#' @references\n#' 1. Scott M. Lundberg, Su-In Lee, \"A Unified Approach to Interpreting Model Predictions\",\n#'   NIPS Proceedings 2017, \\url{https://arxiv.org/abs/1705.07874}\n#' 2. Scott M. Lundberg, Su-In Lee, \"Consistent feature attribution for tree ensembles\",\n#'   \\url{https://arxiv.org/abs/1706.06060}\n#'\n#' @examples\n#' \\dontshow{RhpcBLASctl::omp_set_num_threads(1)}\n#' ## binary classification:\n#'\n#' data(agaricus.train, package = \"xgboost\")\n#' data(agaricus.test, package = \"xgboost\")\n#'\n#' ## Keep the number of threads to 2 for examples\n#' nthread <- 2\n#' data.table::setDTthreads(nthread)\n#'\n#' train <- agaricus.train\n#' test <- agaricus.test\n#'\n#' bst <- xgb.train(\n#'   data = xgb.DMatrix(train$data, label = train$label, nthread = 1),\n#'   nrounds = 5,\n#'   params = xgb.params(\n#'     max_depth = 2,\n#'     nthread = nthread,\n#'     objective = \"binary:logistic\"\n#'   )\n#' )\n#'\n#' # use all trees by default\n#' pred <- predict(bst, test$data)\n#' # use only the 1st tree\n#' pred1 <- predict(bst, test$data, iterationrange = c(1, 1))\n#'\n#' # Predicting tree leafs:\n#' # the result is an nsamples X ntrees matrix\n#' pred_leaf <- predict(bst, test$data, predleaf = TRUE)\n#' str(pred_leaf)\n#'\n#' # Predicting feature contributions to predictions:\n#' # the result is an nsamples X (nfeatures + 1) matrix\n#' pred_contr <- predict(bst, test$data, predcontrib = TRUE)\n#' str(pred_contr)\n#' # verify that contributions' sums are equal to log-odds of predictions (up to float precision):\n#' summary(rowSums(pred_contr) - qlogis(pred))\n#' # for the 1st record, let's inspect its features that had non-zero contribution to prediction:\n#' contr1 <- pred_contr[1,]\n#' contr1 <- contr1[-length(contr1)]    # drop intercept\n#' contr1 <- contr1[contr1 != 0]        # drop non-contributing features\n#' contr1 <- contr1[order(abs(contr1))] # order by contribution magnitude\n#' old_mar <- par(\"mar\")\n#' par(mar = old_mar + c(0,7,0,0))\n#' barplot(contr1, horiz = TRUE, las = 2, xlab = \"contribution to prediction in log-odds\")\n#' par(mar = old_mar)\n#'\n#'\n#' ## multiclass classification in iris dataset:\n#'\n#' lb <- as.numeric(iris$Species) - 1\n#' num_class <- 3\n#'\n#' set.seed(11)\n#'\n#' bst <- xgb.train(\n#'   data = xgb.DMatrix(as.matrix(iris[, -5], nthread = 1), label = lb),\n#'   nrounds = 10,\n#'   params = xgb.params(\n#'     max_depth = 4,\n#'     nthread = 2,\n#'     subsample = 0.5,\n#'     objective = \"multi:softprob\",\n#'     num_class = num_class\n#'   )\n#' )\n#'\n#' # predict for softmax returns num_class probability numbers per case:\n#' pred <- predict(bst, as.matrix(iris[, -5]))\n#' str(pred)\n#' # convert the probabilities to softmax labels\n#' pred_labels <- max.col(pred) - 1\n#' # the following should result in the same error as seen in the last iteration\n#' sum(pred_labels != lb) / length(lb)\n#'\n#' # compare with predictions from softmax:\n#' set.seed(11)\n#'\n#' bst <- xgb.train(\n#'   data = xgb.DMatrix(as.matrix(iris[, -5], nthread = 1), label = lb),\n#'   nrounds = 10,\n#'   params = xgb.params(\n#'     max_depth = 4,\n#'     nthread = 2,\n#'     subsample = 0.5,\n#'     objective = \"multi:softmax\",\n#'     num_class = num_class\n#'   )\n#' )\n#'\n#' pred <- predict(bst, as.matrix(iris[, -5]))\n#' str(pred)\n#' all.equal(pred, pred_labels)\n#' # prediction from using only 5 iterations should result\n#' # in the same error as seen in iteration 5:\n#' pred5 <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 5))\n#' sum(pred5 != lb) / length(lb)\n#'\n#' @export\npredict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FALSE,\n                                predleaf = FALSE, predcontrib = FALSE, approxcontrib = FALSE, predinteraction = FALSE,\n                                training = FALSE, iterationrange = NULL, strict_shape = FALSE, avoid_transpose = FALSE,\n                                validate_features = FALSE, base_margin = NULL, ...) {\n  check.deprecation(deprecated_predict_params, match.call(), ..., allow_unrecognized = TRUE)\n  if (validate_features) {\n    newdata <- validate.features(object, newdata)\n  }\n  is_dmatrix <- inherits(newdata, \"xgb.DMatrix\")\n  if (is_dmatrix && !is.null(base_margin)) {\n    stop(\n      \"'base_margin' is not supported when passing 'xgb.DMatrix' as input.\",\n      \" Should be passed as argument to 'xgb.DMatrix' constructor.\"\n    )\n  }\n  if (is_dmatrix) {\n    rnames <- NULL\n  } else {\n    rnames <- row.names(newdata)\n  }\n\n  use_as_df <- FALSE\n  use_as_dense_matrix <- FALSE\n  use_as_csr_matrix <- FALSE\n  n_row <- NULL\n  if (!is_dmatrix) {\n\n    inplace_predict_supported <- !predcontrib && !predinteraction && !predleaf\n    if (inplace_predict_supported) {\n      booster_type <- xgb.booster_type(object)\n      if (booster_type == \"gblinear\" || training) {\n        inplace_predict_supported <- FALSE\n      }\n    }\n    if (inplace_predict_supported) {\n\n      if (is.matrix(newdata)) {\n        use_as_dense_matrix <- TRUE\n      } else if (is.data.frame(newdata)) {\n        # note: since here it turns it into a non-data-frame list,\n        # needs to keep track of the number of rows it had for later\n        n_row <- nrow(newdata)\n        newdata <- lapply(\n          newdata,\n          function(x) {\n            if (is.factor(x)) {\n              return(as.numeric(x) - 1)\n            } else {\n              return(as.numeric(x))\n            }\n          }\n        )\n        use_as_df <- TRUE\n      } else if (inherits(newdata, \"dgRMatrix\")) {\n        use_as_csr_matrix <- TRUE\n        csr_data <- list(newdata@p, newdata@j, newdata@x, ncol(newdata))\n      } else if (inherits(newdata, \"dsparseVector\")) {\n        use_as_csr_matrix <- TRUE\n        n_row <- 1L\n        i <- newdata@i - 1L\n        if (storage.mode(i) != \"integer\") {\n          storage.mode(i) <- \"integer\"\n        }\n        csr_data <- list(c(0L, length(i)), i, newdata@x, length(newdata))\n      }\n\n    }\n\n  } # if (!is_dmatrix)\n\n  if (!is_dmatrix && !use_as_dense_matrix && !use_as_csr_matrix && !use_as_df) {\n    nthread <- xgb.nthread(object)\n    newdata <- xgb.DMatrix(\n      newdata,\n      missing = missing,\n      base_margin = base_margin,\n      nthread = NVL(nthread, -1)\n    )\n    is_dmatrix <- TRUE\n  }\n\n  if (is.null(n_row)) {\n    n_row <- nrow(newdata)\n  }\n\n\n  if (!is.null(iterationrange)) {\n    if (is.character(iterationrange)) {\n      stopifnot(iterationrange == \"all\")\n      iterationrange <- c(0, 0)\n    } else {\n      iterationrange[1] <- iterationrange[1] - 1 # base-0 indexing\n    }\n  } else {\n    ## no limit is supplied, use best\n    best_iteration <- xgb.best_iteration(object)\n    if (is.null(best_iteration)) {\n      iterationrange <- c(0, 0)\n    } else {\n      iterationrange <- c(0, as.integer(best_iteration) + 1L)\n    }\n  }\n  ## Handle the 0 length values.\n  box <- function(val) {\n    if (length(val) == 0) {\n      cval <- vector(, 1)\n      cval[0] <- val\n      return(cval)\n    }\n    return(val)\n  }\n\n  args <- list(\n    training = box(training),\n    strict_shape = as.logical(strict_shape),\n    iteration_begin = box(as.integer(iterationrange[1])),\n    iteration_end = box(as.integer(iterationrange[2])),\n    type = box(as.integer(0))\n  )\n\n  set_type <- function(type) {\n    if (args$type != 0) {\n      stop(\"One type of prediction at a time.\")\n    }\n    return(box(as.integer(type)))\n  }\n  if (outputmargin) {\n    args$type <- set_type(1)\n  }\n  if (predcontrib) {\n    args$type <- set_type(if (approxcontrib) 3 else 2)\n  }\n  if (predinteraction) {\n    args$type <- set_type(if (approxcontrib) 5 else 4)\n  }\n  if (predleaf) {\n    args$type <- set_type(6)\n  }\n\n  json_conf <- jsonlite::toJSON(args, auto_unbox = TRUE)\n  if (is_dmatrix) {\n    arr <- .Call(\n      XGBoosterPredictFromDMatrix_R, xgb.get.handle(object), newdata, json_conf\n    )\n  } else if (use_as_dense_matrix) {\n    arr <- .Call(\n      XGBoosterPredictFromDense_R, xgb.get.handle(object), newdata, missing, json_conf, base_margin\n    )\n  } else if (use_as_csr_matrix) {\n    arr <- .Call(\n      XGBoosterPredictFromCSR_R, xgb.get.handle(object), csr_data, missing, json_conf, base_margin\n    )\n  } else if (use_as_df) {\n    arr <- .Call(\n      XGBoosterPredictFromColumnar_R, xgb.get.handle(object), newdata, missing, json_conf, base_margin\n    )\n  }\n\n  ## Needed regardless of whether strict shape is being used.\n  if ((predcontrib || predinteraction) && !is.null(colnames(newdata))) {\n    cnames <- c(colnames(newdata), \"(Intercept)\")\n    dim_names <- vector(mode = \"list\", length = length(dim(arr)))\n    dim_names[[1L]] <- cnames\n    if (predinteraction) dim_names[[2L]] <- cnames\n    .Call(XGSetArrayDimNamesInplace_R, arr, dim_names)\n  }\n\n  if (NROW(rnames)) {\n    if (is.null(dim(arr))) {\n      .Call(XGSetVectorNamesInplace_R, arr, rnames)\n    } else {\n      dim_names <- dimnames(arr)\n      if (is.null(dim_names)) {\n        dim_names <- vector(mode = \"list\", length = length(dim(arr)))\n      }\n      dim_names[[length(dim_names)]] <- rnames\n      .Call(XGSetArrayDimNamesInplace_R, arr, dim_names)\n    }\n  }\n\n  if (!avoid_transpose && is.array(arr)) {\n    arr <- aperm(arr)\n  }\n\n  return(arr)\n}\n\nvalidate.features <- function(bst, newdata) {\n  if (is.character(newdata)) {\n    # this will be encountered when passing file paths\n    return(newdata)\n  }\n  if (inherits(newdata, \"sparseVector\")) {\n    # in this case, newdata won't have metadata\n    return(newdata)\n  }\n  if (is.vector(newdata)) {\n    newdata <- as.matrix(newdata)\n  }\n\n  booster_names <- getinfo(bst, \"feature_name\")\n  checked_names <- FALSE\n  if (NROW(booster_names)) {\n\n    try_reorder <- FALSE\n    if (inherits(newdata, \"xgb.DMatrix\")) {\n      curr_names <- getinfo(newdata, \"feature_name\")\n    } else {\n      curr_names <- colnames(newdata)\n      try_reorder <- TRUE\n    }\n\n    if (NROW(curr_names)) {\n      checked_names <- TRUE\n\n      if (length(curr_names) != length(booster_names) || any(curr_names != booster_names)) {\n\n        if (!try_reorder) {\n          stop(\"Feature names in 'newdata' do not match with booster's.\")\n        } else {\n          if (inherits(newdata, \"data.table\")) {\n            newdata <- newdata[, booster_names, with = FALSE]\n          } else {\n            newdata <- newdata[, booster_names, drop = FALSE]\n          }\n        }\n\n      }\n\n    } # if (NROW(curr_names)) {\n\n  } # if (NROW(booster_names)) {\n\n  if (inherits(newdata, c(\"data.frame\", \"xgb.DMatrix\"))) {\n\n    booster_types <- getinfo(bst, \"feature_type\")\n    if (!NROW(booster_types)) {\n      # Note: types in the booster are optional. Other interfaces\n      # might not even save it as booster attributes for example,\n      # even if the model uses categorical features.\n      return(newdata)\n    }\n    if (inherits(newdata, \"xgb.DMatrix\")) {\n      curr_types <- getinfo(newdata, \"feature_type\")\n      if (length(curr_types) != length(booster_types) || any(curr_types != booster_types)) {\n        stop(\"Feature types in 'newdata' do not match with booster's.\")\n      }\n    }\n    if (inherits(newdata, \"data.frame\")) {\n      is_factor <- sapply(newdata, is.factor)\n      if (any(is_factor != (booster_types == \"c\"))) {\n        stop(\n          paste0(\n            \"Feature types in 'newdata' do not match with booster's for same columns (by \",\n            ifelse(checked_names, \"name\", \"position\"),\n            \").\"\n          )\n        )\n      }\n    }\n\n  }\n\n  return(newdata)\n}\n\n\n#' Accessors for serializable attributes of a model\n#'\n#' These methods allow to manipulate the key-value attribute strings of an XGBoost model.\n#'\n#' @details\n#' The primary purpose of XGBoost model attributes is to store some meta data about the model.\n#' Note that they are a separate concept from the object attributes in R.\n#' Specifically, they refer to key-value strings that can be attached to an XGBoost model,\n#' stored together with the model's binary representation, and accessed later\n#' (from R or any other interface).\n#' In contrast, any R attribute assigned to an R object of `xgb.Booster` class\n#' would not be saved by [xgb.save()] because an XGBoost model is an external memory object\n#' and its serialization is handled externally.\n#' Also, setting an attribute that has the same name as one of XGBoost's parameters wouldn't\n#' change the value of that parameter for a model.\n#' Use [xgb.model.parameters<-()] to set or change model parameters.\n#'\n#' The `xgb.attributes<-` setter either updates the existing or adds one or several attributes,\n#' but it doesn't delete the other existing attributes.\n#'\n#' Important: since this modifies the booster's C object, semantics for assignment here\n#' will differ from R's, as any object reference to the same booster will be modified\n#' too, while assignment of R attributes through `attributes(model)$<attr> <- <value>`\n#' will follow the usual copy-on-write R semantics (see [xgb.copy.Booster()] for an\n#' example of these behaviors).\n#'\n#' @param object Object of class `xgb.Booster`. **Will be modified in-place** when assigning to it.\n#' @param name A non-empty character string specifying which attribute is to be accessed.\n#' @param value For `xgb.attr<-`, a value of an attribute; for `xgb.attributes<-`,\n#'   it is a list (or an object coercible to a list) with the names of attributes to set\n#'   and the elements corresponding to attribute values.\n#'   Non-character values are converted to character.\n#'   When an attribute value is not a scalar, only the first index is used.\n#'   Use `NULL` to remove an attribute.\n#' @return\n#' - `xgb.attr()` returns either a string value of an attribute\n#'   or `NULL` if an attribute wasn't stored in a model.\n#' - `xgb.attributes()` returns a list of all attributes stored in a model\n#'   or `NULL` if a model has no stored attributes.\n#'\n#' @examples\n#' data(agaricus.train, package = \"xgboost\")\n#' train <- agaricus.train\n#'\n#' bst <- xgb.train(\n#'   data = xgb.DMatrix(train$data, label = train$label, nthread = 1),\n#'   nrounds = 2,\n#'   params = xgb.params(\n#'     max_depth = 2,\n#'     nthread = 2,\n#'     objective = \"binary:logistic\"\n#'   )\n#' )\n#'\n#' xgb.attr(bst, \"my_attribute\") <- \"my attribute value\"\n#' print(xgb.attr(bst, \"my_attribute\"))\n#' xgb.attributes(bst) <- list(a = 123, b = \"abc\")\n#'\n#' fname <- file.path(tempdir(), \"xgb.ubj\")\n#' xgb.save(bst, fname)\n#' bst1 <- xgb.load(fname)\n#' print(xgb.attr(bst1, \"my_attribute\"))\n#' print(xgb.attributes(bst1))\n#'\n#' # deletion:\n#' xgb.attr(bst1, \"my_attribute\") <- NULL\n#' print(xgb.attributes(bst1))\n#' xgb.attributes(bst1) <- list(a = NULL, b = NULL)\n#' print(xgb.attributes(bst1))\n#'\n#' @rdname xgb.attr\n#' @export\nxgb.attr <- function(object, name) {\n  if (is.null(name) || nchar(as.character(name[1])) == 0) stop(\"invalid attribute name\")\n  handle <- xgb.get.handle(object)\n  out <- .Call(XGBoosterGetAttr_R, handle, as.character(name[1]))\n  if (!NROW(out) || !nchar(out)) {\n    return(NULL)\n  }\n  if (!is.null(out)) {\n    if (name %in% c(\"best_iteration\", \"best_score\")) {\n      out <- as.numeric(out)\n    }\n  }\n  return(out)\n}\n\n#' @rdname xgb.attr\n#' @export\n`xgb.attr<-` <- function(object, name, value) {\n  name <- as.character(name[1])\n  if (!NROW(name) || !nchar(name)) stop(\"invalid attribute name\")\n  handle <- xgb.get.handle(object)\n\n  if (!is.null(value)) {\n    # Coerce the elements to be scalar strings.\n    # Q: should we warn user about non-scalar elements?\n    if (is.numeric(value[1])) {\n      value <- format(value[1], digits = 17)\n    } else {\n      value <- as.character(value[1])\n    }\n  }\n  .Call(XGBoosterSetAttr_R, handle, name, value)\n  return(object)\n}\n\n#' @rdname xgb.attr\n#' @export\nxgb.attributes <- function(object) {\n  handle <- xgb.get.handle(object)\n  attr_names <- .Call(XGBoosterGetAttrNames_R, handle)\n  if (!NROW(attr_names)) return(list())\n  out <- lapply(attr_names, function(name) xgb.attr(object, name))\n  names(out) <- attr_names\n  return(out)\n}\n\n#' @rdname xgb.attr\n#' @export\n`xgb.attributes<-` <- function(object, value) {\n  a <- as.list(value)\n  if (is.null(names(a)) || any(nchar(names(a)) == 0)) {\n    stop(\"attribute names cannot be empty strings\")\n  }\n  for (i in seq_along(a)) {\n    xgb.attr(object, names(a[i])) <- a[[i]]\n  }\n  return(object)\n}\n\n#' Accessors for model parameters as JSON string\n#'\n#' @details\n#' Note that assignment is performed in-place on the booster C object, which unlike assignment\n#' of R attributes, doesn't follow typical copy-on-write semantics for assignment - i.e. all references\n#' to the same booster will also get updated.\n#'\n#' See [xgb.copy.Booster()] for an example of this behavior.\n#'\n#' @param object Object of class `xgb.Booster`.**Will be modified in-place** when assigning to it.\n#' @param value A list.\n#' @return Parameters as a list.\n#' @examples\n#' data(agaricus.train, package = \"xgboost\")\n#'\n#' ## Keep the number of threads to 1 for examples\n#' nthread <- 1\n#' data.table::setDTthreads(nthread)\n#' train <- agaricus.train\n#'\n#' bst <- xgb.train(\n#'   data = xgb.DMatrix(train$data, label = train$label, nthread = 1),\n#'   nrounds = 2,\n#'   params = xgb.params(\n#'     max_depth = 2,\n#'     nthread = nthread,\n#'     objective = \"binary:logistic\"\n#'   )\n#' )\n#'\n#' config <- xgb.config(bst)\n#'\n#' @rdname xgb.config\n#' @export\nxgb.config <- function(object) {\n  handle <- xgb.get.handle(object)\n  return(jsonlite::fromJSON(.Call(XGBoosterSaveJsonConfig_R, handle)))\n}\n\n#' @rdname xgb.config\n#' @export\n`xgb.config<-` <- function(object, value) {\n  handle <- xgb.get.handle(object)\n  .Call(\n    XGBoosterLoadJsonConfig_R,\n    handle,\n    jsonlite::toJSON(value, auto_unbox = TRUE, null = \"null\")\n  )\n  return(object)\n}\n\n#' Accessors for model parameters\n#'\n#' Only the setter for XGBoost parameters is currently implemented.\n#'\n#' @details\n#' Just like [xgb.attr()], this function will make in-place modifications\n#' on the booster object which do not follow typical R assignment semantics - that is,\n#' all references to the same booster will also be updated, unlike assingment of R\n#' attributes which follow copy-on-write semantics.\n#'\n#' See [xgb.copy.Booster()] for an example of this behavior.\n#'\n#' Be aware that setting parameters of a fitted booster related to training continuation / updates\n#' will reset its number of rounds indicator to zero.\n#' @param object Object of class `xgb.Booster`. **Will be modified in-place**.\n#' @param value A list (or an object coercible to a list) with the names of parameters to set\n#'        and the elements corresponding to parameter values.\n#' @return The same booster `object`, which gets modified in-place.\n#' @examples\n#' data(agaricus.train, package = \"xgboost\")\n#'\n#' train <- agaricus.train\n#'\n#' bst <- xgb.train(\n#'   data = xgb.DMatrix(train$data, label = train$label, nthread = 1),\n#'   nrounds = 2,\n#'   params = xgb.params(\n#'     max_depth = 2,\n#'     learning_rate = 1,\n#'     nthread = 2,\n#'     objective = \"binary:logistic\"\n#'   )\n#' )\n#'\n#' xgb.model.parameters(bst) <- list(learning_rate = 0.1)\n#'\n#' @rdname xgb.model.parameters\n#' @export\n`xgb.model.parameters<-` <- function(object, value) {\n  if (length(value) == 0) return(object)\n  p <- as.list(value)\n  if (is.null(names(p)) || any(nchar(names(p)) == 0)) {\n    stop(\"parameter names cannot be empty strings\")\n  }\n  names(p) <- gsub(\".\", \"_\", names(p), fixed = TRUE)\n  p <- lapply(p, function(x) {\n    if (is.vector(x) && length(x) == 1) {\n      return(as.character(x)[1])\n    } else {\n      return(jsonlite::toJSON(x, auto_unbox = TRUE))\n    }\n  })\n  handle <- xgb.get.handle(object)\n  for (i in seq_along(p)) {\n    .Call(XGBoosterSetParam_R, handle, names(p[i]), p[[i]])\n  }\n  return(object)\n}\n\n#' @rdname getinfo\n#' @export\ngetinfo.xgb.Booster <- function(object, name) {\n  name <- as.character(head(name, 1L))\n  allowed_fields <- c(\"feature_name\", \"feature_type\")\n  if (!(name %in% allowed_fields)) {\n    stop(\"getinfo: name must be one of the following: \", paste(allowed_fields, collapse = \", \"))\n  }\n  handle <- xgb.get.handle(object)\n  out <- .Call(\n    XGBoosterGetStrFeatureInfo_R,\n    handle,\n    name\n  )\n  if (!NROW(out)) {\n    return(NULL)\n  }\n  return(out)\n}\n\n#' @rdname getinfo\n#' @export\nsetinfo.xgb.Booster <- function(object, name, info) {\n  name <- as.character(head(name, 1L))\n  allowed_fields <- c(\"feature_name\", \"feature_type\")\n  if (!(name %in% allowed_fields)) {\n    stop(\"setinfo: unknown info name \", name)\n  }\n  info <- as.character(info)\n  handle <- xgb.get.handle(object)\n  .Call(\n    XGBoosterSetStrFeatureInfo_R,\n    handle,\n    name,\n    info\n  )\n  return(TRUE)\n}\n\n#' Get number of boosting in a fitted booster\n#'\n#' @param model,x A fitted `xgb.Booster` model.\n#' @return The number of rounds saved in the model as an integer.\n#' @details Note that setting booster parameters related to training\n#' continuation / updates through [xgb.model.parameters<-()] will reset the\n#' number of rounds to zero.\n#' @export\n#' @rdname xgb.get.num.boosted.rounds\nxgb.get.num.boosted.rounds <- function(model) {\n  return(.Call(XGBoosterBoostedRounds_R, xgb.get.handle(model)))\n}\n\n#' @rdname xgb.get.num.boosted.rounds\n#' @export\nlength.xgb.Booster <- function(x) {\n  return(xgb.get.num.boosted.rounds(x))\n}\n\n#' Slice Booster by Rounds\n#'\n#' Creates a new booster including only a selected range of rounds / iterations\n#' from an existing booster, as given by the sequence `seq(start, end, step)`.\n#'\n#' @details\n#' Note that any R attributes that the booster might have, will not be copied into\n#' the resulting object.\n#'\n#' @param model,x A fitted `xgb.Booster` object, which is to be sliced by taking only a subset\n#' of its rounds / iterations.\n#' @param start Start of the slice (base-1 and inclusive, like R's [seq()]).\n#' @param end End of the slice (base-1 and inclusive, like R's [seq()]).\n#' Passing a value of zero here is equivalent to passing the full number of rounds in the\n#' booster object.\n#' @param step Step size of the slice. Passing '1' will take every round in the sequence defined by\n#' `(start, end)`, while passing '2' will take every second value, and so on.\n#' @return A sliced booster object containing only the requested rounds.\n#' @examples\n#' data(mtcars)\n#'\n#' y <- mtcars$mpg\n#' x <- as.matrix(mtcars[, -1])\n#'\n#' dm <- xgb.DMatrix(x, label = y, nthread = 1)\n#' model <- xgb.train(data = dm, params = xgb.params(nthread = 1), nrounds = 5)\n#' model_slice <- xgb.slice.Booster(model, 1, 3)\n#' # Prediction for first three rounds\n#' predict(model, x, predleaf = TRUE)[, 1:3]\n#'\n#' # The new model has only those rounds, so\n#' # a full prediction from it is equivalent\n#' predict(model_slice, x, predleaf = TRUE)\n#' @export\n#' @rdname xgb.slice.Booster\nxgb.slice.Booster <- function(model, start, end = xgb.get.num.boosted.rounds(model), step = 1L) {\n  # This makes the slice mimic the behavior of R's 'seq',\n  # which truncates on the end of the slice when the step\n  # doesn't reach it.\n  if (end > start && step > 1) {\n    d <- (end - start + 1) / step\n    if (d != floor(d)) {\n      end <- start + step * ceiling(d) - 1\n    }\n  }\n  return(\n    .Call(\n      XGBoosterSlice_R,\n      xgb.get.handle(model),\n      start - 1,\n      end,\n      step\n    )\n  )\n}\n\n#' @export\n#' @rdname xgb.slice.Booster\n#' @param i The indices - must be an increasing sequence as generated by e.g. `seq(...)`.\n`[.xgb.Booster` <- function(x, i) {\n  if (missing(i)) {\n    return(xgb.slice.Booster(x, 1, 0))\n  }\n  if (length(i) == 1) {\n    return(xgb.slice.Booster(x, i, i))\n  }\n  steps <- diff(i)\n  if (any(steps < 0)) {\n    stop(\"Can only slice booster with ascending sequences.\")\n  }\n  if (length(unique(steps)) > 1) {\n    stop(\"Can only slice booster with fixed-step sequences.\")\n  }\n  return(xgb.slice.Booster(x, i[1L], i[length(i)], steps[1L]))\n}\n\n#' Get Features Names from Booster\n#'\n#' @description\n#' Returns the feature / variable / column names from a fitted\n#' booster object, which are set automatically during the call to [xgb.train()]\n#' from the DMatrix names, or which can be set manually through [setinfo()].\n#'\n#' If the object doesn't have feature names, will return `NULL`.\n#'\n#' It is equivalent to calling `getinfo(object, \"feature_name\")`.\n#' @param object An `xgb.Booster` object.\n#' @param ... Not used.\n#' @export\nvariable.names.xgb.Booster <- function(object, ...) {\n  return(getinfo(object, \"feature_name\"))\n}\n\nxgb.nthread <- function(bst) {\n  config <- xgb.config(bst)\n  out <- strtoi(config$learner$generic_param$nthread)\n  return(out)\n}\n\nxgb.booster_type <- function(bst) {\n  config <- xgb.config(bst)\n  out <- config$learner$learner_train_param$booster\n  return(out)\n}\n\nxgb.num_class <- function(bst) {\n  config <- xgb.config(bst)\n  out <- strtoi(config$learner$learner_model_param$num_class)\n  return(out)\n}\n\nxgb.feature_names <- function(bst) {\n  return(getinfo(bst, \"feature_name\"))\n}\n\nxgb.feature_types <- function(bst) {\n  return(getinfo(bst, \"feature_type\"))\n}\n\nxgb.num_feature <- function(bst) {\n  handle <- xgb.get.handle(bst)\n  return(.Call(XGBoosterGetNumFeature_R, handle))\n}\n\nxgb.best_iteration <- function(bst) {\n  out <- xgb.attr(bst, \"best_iteration\")\n  if (!NROW(out) || !nchar(out)) {\n    out <- NULL\n  }\n  return(out)\n}\n\nxgb.has_categ_features <- function(bst) {\n  return(\"c\" %in% xgb.feature_types(bst))\n}\n\n#' Extract coefficients from linear booster\n#'\n#' @description\n#' Extracts the coefficients from a 'gblinear' booster object,\n#' as produced by [xgb.train()] when using parameter `booster=\"gblinear\"`.\n#'\n#' Note: this function will error out if passing a booster model\n#' which is not of \"gblinear\" type.\n#'\n#' @param object A fitted booster of 'gblinear' type.\n#' @param ... Not used.\n#' @return The extracted coefficients:\n#'   - If there is only one coefficient per column in the data, will be returned as a\n#'     vector, potentially containing the feature names if available, with the intercept\n#'     as first column.\n#'   - If there is more than one coefficient per column in the data (e.g. when using\n#'     `objective=\"multi:softmax\"`), will be returned as a matrix with dimensions equal\n#'     to `[num_features, num_cols]`, with the intercepts as first row. Note that the column\n#'     (classes in multi-class classification) dimension will not be named.\n#'\n#' The intercept returned here will include the 'base_score' parameter (unlike the 'bias'\n#' or the last coefficient in the model dump, which doesn't have 'base_score' added to it),\n#' hence one should get the same values from calling `predict(..., outputmargin = TRUE)` and\n#' from performing a matrix multiplication with `model.matrix(~., ...)`.\n#'\n#' Be aware that the coefficients are obtained by first converting them to strings and\n#' back, so there will always be some very small lose of precision compared to the actual\n#' coefficients as used by [predict.xgb.Booster].\n#' @examples\n#' library(xgboost)\n#'\n#' data(mtcars)\n#'\n#' y <- mtcars[, 1]\n#' x <- as.matrix(mtcars[, -1])\n#'\n#' dm <- xgb.DMatrix(data = x, label = y, nthread = 1)\n#' params <- xgb.params(booster = \"gblinear\", nthread = 1)\n#' model <- xgb.train(data = dm, params = params, nrounds = 2)\n#' coef(model)\n#' @export\ncoef.xgb.Booster <- function(object, ...) {\n  return(.internal.coef.xgb.Booster(object, add_names = TRUE))\n}\n\n.internal.coef.xgb.Booster <- function(object, add_names = TRUE) {\n  booster_type <- xgb.booster_type(object)\n  if (booster_type != \"gblinear\") {\n    stop(\"Coefficients are not defined for Booster type \", booster_type)\n  }\n  model_json <- jsonlite::fromJSON(\n    rawToChar(xgb.save.raw(object, raw_format = \"json\"))\n  )\n  num_feature <- as.numeric(model_json$learner$learner_model_param$num_feature)\n\n  weights <- model_json$learner$gradient_booster$model$weights\n  n_cols <- length(weights) / (num_feature + 1)\n  if (n_cols != floor(n_cols) || n_cols < 1) {\n    stop(\"Internal error: could not determine shape of coefficients.\")\n  }\n  sep <- num_feature * n_cols\n  coefs <- weights[seq(1, sep)]\n  intercepts <- weights[seq(sep + 1, length(weights))]\n  base_score <- jsonlite::fromJSON(\n    model_json$learner$learner_model_param$base_score\n  )\n  intercepts <- intercepts + as.numeric(base_score)\n\n  if (add_names) {\n    feature_names <- xgb.feature_names(object)\n    if (!NROW(feature_names)) {\n      # This mimics the default naming in R which names columns as \"V1..N\"\n      # when names are needed but not available\n      feature_names <- paste0(\"V\", seq(1L, num_feature))\n    }\n    feature_names <- c(\"(Intercept)\", feature_names)\n  }\n  if (n_cols == 1L) {\n    out <- c(intercepts, coefs)\n    if (add_names) {\n      .Call(XGSetVectorNamesInplace_R, out, feature_names)\n    }\n  } else {\n    coefs <- matrix(coefs, nrow = num_feature, byrow = TRUE)\n    dim(intercepts) <- c(1L, n_cols)\n    out <- rbind(intercepts, coefs)\n    out_names <- vector(mode = \"list\", length = 2)\n    if (add_names) {\n      out_names[[1L]] <- feature_names\n    }\n    if (inherits(object, \"xgboost\")) {\n      metadata <- attributes(object)$metadata\n      if (NROW(metadata$y_levels)) {\n        out_names[[2L]] <- metadata$y_levels\n      } else if (NROW(metadata$y_names)) {\n        out_names[[2L]] <- metadata$y_names\n      }\n    }\n    .Call(XGSetArrayDimNamesInplace_R, out, out_names)\n  }\n  return(out)\n}\n\n#' Deep-copies a Booster Object\n#'\n#' Creates a deep copy of an 'xgb.Booster' object, such that the\n#' C object pointer contained will be a different object, and hence functions\n#' like [xgb.attr()] will not affect the object from which it was copied.\n#'\n#' @param model An 'xgb.Booster' object.\n#' @return A deep copy of `model` - it will be identical in every way, but C-level\n#'   functions called on that copy will not affect the `model` variable.\n#' @examples\n#' library(xgboost)\n#'\n#' data(mtcars)\n#'\n#' y <- mtcars$mpg\n#' x <- mtcars[, -1]\n#'\n#' dm <- xgb.DMatrix(x, label = y, nthread = 1)\n#'\n#' model <- xgb.train(\n#'   data = dm,\n#'   params = xgb.params(nthread = 1),\n#'   nrounds = 3\n#' )\n#'\n#' # Set an arbitrary attribute kept at the C level\n#' xgb.attr(model, \"my_attr\") <- 100\n#' print(xgb.attr(model, \"my_attr\"))\n#'\n#' # Just assigning to a new variable will not create\n#' # a deep copy - C object pointer is shared, and in-place\n#' # modifications will affect both objects\n#' model_shallow_copy <- model\n#' xgb.attr(model_shallow_copy, \"my_attr\") <- 333\n#' # 'model' was also affected by this change:\n#' print(xgb.attr(model, \"my_attr\"))\n#'\n#' model_deep_copy <- xgb.copy.Booster(model)\n#' xgb.attr(model_deep_copy, \"my_attr\") <- 444\n#' # 'model' was NOT affected by this change\n#' # (keeps previous value that was assigned before)\n#' print(xgb.attr(model, \"my_attr\"))\n#'\n#' # Verify that the new object was actually modified\n#' print(xgb.attr(model_deep_copy, \"my_attr\"))\n#' @export\nxgb.copy.Booster <- function(model) {\n  if (!inherits(model, \"xgb.Booster\")) {\n    stop(\"'model' must be an 'xgb.Booster' object.\")\n  }\n  return(.Call(XGDuplicate_R, model))\n}\n\nxgb.reset.Booster <- function(model) {\n  if (!inherits(model, \"xgb.Booster\")) {\n    stop(\"'model' must be an 'xgb.Booster' object.\")\n  }\n  .Call(XGBoosterReset_R, xgb.get.handle(model))\n  return(model)\n}\n\n#' Check if two boosters share the same C object\n#'\n#' Checks whether two booster objects refer to the same underlying C object.\n#'\n#' @details\n#' As booster objects (as returned by e.g. [xgb.train()]) contain an R 'externalptr'\n#' object, they don't follow typical copy-on-write semantics of other R objects - that is, if\n#' one assigns a booster to a different variable and modifies that new variable through in-place\n#' methods like [xgb.attr<-()], the modification will be applied to both the old and the new\n#' variable, unlike typical R assignments which would only modify the latter.\n#'\n#' This function allows checking whether two booster objects share the same 'externalptr',\n#' regardless of the R attributes that they might have.\n#'\n#' In order to duplicate a booster in such a way that the copy wouldn't share the same\n#' 'externalptr', one can use function [xgb.copy.Booster()].\n#' @param obj1 Booster model to compare with `obj2`.\n#' @param obj2 Booster model to compare with `obj1`.\n#' @return Either `TRUE` or `FALSE` according to whether the two boosters share the\n#'   underlying C object.\n#' @seealso [xgb.copy.Booster()]\n#' @examples\n#' library(xgboost)\n#'\n#' data(mtcars)\n#'\n#' y <- mtcars$mpg\n#' x <- as.matrix(mtcars[, -1])\n#'\n#' model <- xgb.train(\n#'   params = xgb.params(nthread = 1),\n#'   data = xgb.DMatrix(x, label = y, nthread = 1),\n#'   nrounds = 3\n#' )\n#'\n#' model_shallow_copy <- model\n#' xgb.is.same.Booster(model, model_shallow_copy) # same C object\n#'\n#' model_deep_copy <- xgb.copy.Booster(model)\n#' xgb.is.same.Booster(model, model_deep_copy) # different C objects\n#'\n#' # In-place assignments modify all references,\n#' # but not full/deep copies of the booster\n#' xgb.attr(model_shallow_copy, \"my_attr\") <- 111\n#' xgb.attr(model, \"my_attr\") # gets modified\n#' xgb.attr(model_deep_copy, \"my_attr\") # doesn't get modified\n#' @export\nxgb.is.same.Booster <- function(obj1, obj2) {\n  if (!inherits(obj1, \"xgb.Booster\") || !inherits(obj2, \"xgb.Booster\")) {\n    stop(\"'xgb.is.same.Booster' is only applicable to 'xgb.Booster' objects.\")\n  }\n  return(\n    .Call(\n      XGPointerEqComparison_R,\n      xgb.get.handle(obj1),\n      xgb.get.handle(obj2)\n    )\n  )\n}\n\n#' @title Print xgb.Booster\n#' @description Print information about `xgb.Booster`.\n#' @param x An `xgb.Booster` object.\n#' @param ... Not used.\n#' @return The same `x` object, returned invisibly\n#' @examples\n#' data(agaricus.train, package = \"xgboost\")\n#' train <- agaricus.train\n#'\n#' bst <- xgb.train(\n#'   data = xgb.DMatrix(train$data, label = train$label, nthread = 1),\n#'   nrounds = 2,\n#'   params = xgb.params(\n#'     max_depth = 2,\n#'     nthread = 2,\n#'     objective = \"binary:logistic\"\n#'   )\n#' )\n#'\n#' attr(bst, \"myattr\") <- \"memo\"\n#'\n#' print(bst)\n#' @method print xgb.Booster\n#' @export\nprint.xgb.Booster <- function(x, ...) {\n  # this lets it error out when the object comes from an earlier R XGBoost version\n  handle <- xgb.get.handle(x)\n  cat('##### xgb.Booster\\n')\n\n  R_attrs <- attributes(x)\n  if (!is.null(R_attrs$call)) {\n    cat('call:\\n  ')\n    print(R_attrs$call)\n  }\n\n  cat('# of features:', xgb.num_feature(x), '\\n')\n  cat('# of rounds: ', xgb.get.num.boosted.rounds(x), '\\n')\n\n  attr_names <- .Call(XGBoosterGetAttrNames_R, handle)\n  if (NROW(attr_names)) {\n    cat('xgb.attributes:\\n')\n    cat(\"  \", paste(attr_names, collapse = \", \"), \"\\n\")\n  }\n\n  additional_attr <- setdiff(names(R_attrs), .reserved_cb_names)\n  if (NROW(additional_attr)) {\n    cat(\"callbacks:\\n  \", paste(additional_attr, collapse = \", \"), \"\\n\")\n  }\n\n  if (!is.null(R_attrs$evaluation_log)) {\n    cat('evaluation_log:\\n')\n    print(R_attrs$evaluation_log, row.names = FALSE, topn = 2)\n  }\n\n  return(invisible(x))\n}\n"
  },
  {
    "path": "R-package/R/xgb.DMatrix.R",
    "content": "#' Construct xgb.DMatrix object\n#'\n#' Construct an 'xgb.DMatrix' object from a given data source, which can then be passed to functions\n#' such as [xgb.train()] or [predict()].\n#'\n#' Function `xgb.QuantileDMatrix()` will construct a DMatrix with quantization for the histogram\n#' method already applied to it, which can be used to reduce memory usage (compared to using a\n#' a regular DMatrix first and then creating a quantization out of it) when using the histogram\n#' method (`tree_method = \"hist\"`, which is the default algorithm), but is not usable for the\n#' sorted-indices method (`tree_method = \"exact\"`), nor for the approximate method\n#' (`tree_method = \"approx\"`).\n#'\n#' @param data Data from which to create a DMatrix, which can then be used for fitting models or\n#' for getting predictions out of a fitted model.\n#'\n#' Supported input types are as follows:\n#' - `matrix` objects, with types `numeric`, `integer`, or `logical`.\n#' - `data.frame` objects, with columns of types `numeric`, `integer`, `logical`, or `factor`\n#'\n#' Note that xgboost uses base-0 encoding for categorical types, hence `factor` types (which use base-1\n#' encoding') will be converted inside the function call. Be aware that the encoding used for `factor`\n#' types is not kept as part of the model, so in subsequent calls to `predict`, it is the user's\n#' responsibility to ensure that factor columns have the same levels as the ones from which the DMatrix\n#' was constructed.\n#'\n#' Other column types are not supported.\n#' - CSR matrices, as class `dgRMatrix` from package `Matrix`.\n#' - CSC matrices, as class `dgCMatrix` from package `Matrix`.\n#'\n#' These are **not** supported by `xgb.QuantileDMatrix`.\n#' - XGBoost's own binary format for DMatrices, as produced by [xgb.DMatrix.save()].\n#' - Single-row CSR matrices, as class `dsparseVector` from package `Matrix`, which is interpreted\n#'   as a single row (only when making predictions from a fitted model).\n#'\n#' @param label Label of the training data. For classification problems, should be passed encoded as\n#' integers with numeration starting at zero.\n#' @param weight Weight for each instance.\n#'\n#'   Note that, for ranking task, weights are per-group.  In ranking task, one weight\n#'   is assigned to each group (not each data point). This is because we\n#'   only care about the relative ordering of data points within each group,\n#'   so it doesn't make sense to assign weights to individual data points.\n#' @param base_margin Base margin used for boosting from existing model.\n#'\n#'   In the case of multi-output models, one can also pass multi-dimensional base_margin.\n#' @param missing A float value to represents missing values in data (not used when creating DMatrix\n#'   from text files). It is useful to change when a zero, infinite, or some other\n#'   extreme value represents missing values in data.\n#' @param silent whether to suppress printing an informational message after loading from a file.\n#' @param feature_names Set names for features. Overrides column names in data frame and matrix.\n#'\n#'   Note: columns are not referenced by name when calling `predict`, so the column order there\n#'   must be the same as in the DMatrix construction, regardless of the column names.\n#' @param feature_types Set types for features.\n#'\n#'   If `data` is a `data.frame` and passing `feature_types` is not supplied,\n#'   feature types will be deduced automatically from the column types.\n#'\n#'   Otherwise, one can pass a character vector with the same length as number of columns in `data`,\n#'   with the following possible values:\n#'   - \"c\", which represents categorical columns.\n#'   - \"q\", which represents numeric columns.\n#'   - \"int\", which represents integer columns.\n#'   - \"i\", which represents logical (boolean) columns.\n#'\n#'   Note that, while categorical types are treated differently from the rest for model fitting\n#'   purposes, the other types do not influence the generated model, but have effects in other\n#'   functionalities such as feature importances.\n#'\n#'   **Important**: Categorical features, if specified manually through `feature_types`, must\n#'   be encoded as integers with numeration starting at zero, and the same encoding needs to be\n#'   applied when passing data to [predict()]. Even if passing `factor` types, the encoding will\n#'   not be saved, so make sure that `factor` columns passed to `predict` have the same `levels`.\n#' @param nthread Number of threads used for creating DMatrix.\n#' @param group Group size for all ranking group.\n#' @param qid Query ID for data samples, used for ranking.\n#' @param label_lower_bound Lower bound for survival training.\n#' @param label_upper_bound Upper bound for survival training.\n#' @param feature_weights Set feature weights for column sampling.\n#' @param data_split_mode Not used yet. This parameter is for distributed training, which is not yet available for the R package.\n#' @inheritParams xgb.train\n#' @return An 'xgb.DMatrix' object. If calling `xgb.QuantileDMatrix`, it will have additional\n#' subclass `xgb.QuantileDMatrix`.\n#'\n#' @details\n#' Note that DMatrix objects are not serializable through R functions such as [saveRDS()] or [save()].\n#' If a DMatrix gets serialized and then de-serialized (for example, when saving data in an R session or caching\n#' chunks in an Rmd file), the resulting object will not be usable anymore and will need to be reconstructed\n#' from the original source of data.\n#'\n#' @examples\n#' data(agaricus.train, package = \"xgboost\")\n#'\n#' ## Keep the number of threads to 1 for examples\n#' nthread <- 1\n#' data.table::setDTthreads(nthread)\n#' dtrain <- with(\n#'   agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)\n#' )\n#' fname <- file.path(tempdir(), \"xgb.DMatrix.data\")\n#' xgb.DMatrix.save(dtrain, fname)\n#' dtrain <- xgb.DMatrix(fname, nthread = 1)\n#' @export\n#' @rdname xgb.DMatrix\nxgb.DMatrix <- function(\n  data,\n  label = NULL,\n  weight = NULL,\n  base_margin = NULL,\n  missing = NA,\n  silent = FALSE,\n  feature_names = colnames(data),\n  feature_types = NULL,\n  nthread = NULL,\n  group = NULL,\n  qid = NULL,\n  label_lower_bound = NULL,\n  label_upper_bound = NULL,\n  feature_weights = NULL,\n  data_split_mode = \"row\",\n  ...\n) {\n  check.deprecation(deprecated_dmatrix_params, match.call(), ...)\n  if (!is.null(group) && !is.null(qid)) {\n    stop(\"Either one of 'group' or 'qid' should be NULL\")\n  }\n  if (data_split_mode != \"row\") {\n    stop(\"'data_split_mode' is not supported yet.\")\n  }\n  nthread <- as.integer(NVL(nthread, -1L))\n  if (typeof(data) == \"character\") {\n    if (length(data) > 1) {\n      stop(\n        \"'data' has class 'character' and length \", length(data),\n        \".\\n  'data' accepts either a numeric matrix or a single filename.\"\n      )\n    }\n    data <- path.expand(data)\n    if (data_split_mode == \"row\") {\n      data_split_mode <- 0L\n    } else if (data_split_mode == \"col\") {\n      data_split_mode <- 1L\n    } else {\n      stop(\"Passed invalid 'data_split_mode': \", data_split_mode)\n    }\n    handle <- .Call(XGDMatrixCreateFromURI_R, data, as.integer(silent), data_split_mode)\n  } else if (is.matrix(data)) {\n    handle <- .Call(\n      XGDMatrixCreateFromMat_R, data, missing, nthread\n    )\n  } else if (inherits(data, \"dgCMatrix\")) {\n    handle <- .Call(\n      XGDMatrixCreateFromCSC_R,\n      data@p,\n      data@i,\n      data@x,\n      nrow(data),\n      missing,\n      nthread\n    )\n  } else if (inherits(data, \"dgRMatrix\")) {\n    handle <- .Call(\n      XGDMatrixCreateFromCSR_R,\n      data@p,\n      data@j,\n      data@x,\n      ncol(data),\n      missing,\n      nthread\n    )\n  } else if (inherits(data, \"dsparseVector\")) {\n    indptr <- c(0L, as.integer(length(data@i)))\n    ind <- as.integer(data@i) - 1L\n    handle <- .Call(\n      XGDMatrixCreateFromCSR_R,\n      indptr,\n      ind,\n      data@x,\n      length(data),\n      missing,\n      nthread\n    )\n  } else if (is.data.frame(data)) {\n    tmp <- .process.df.for.dmatrix(data, feature_types)\n    feature_types <- tmp$feature_types\n    handle <- .Call(\n      XGDMatrixCreateFromDF_R, tmp$lst, missing, nthread\n    )\n    rm(tmp)\n  } else {\n    stop(\"xgb.DMatrix does not support construction from \", typeof(data))\n  }\n\n  dmat <- handle\n  attributes(dmat) <- list(\n    class = \"xgb.DMatrix\",\n    fields = new.env()\n  )\n  .set.dmatrix.fields(\n    dmat = dmat,\n    label = label,\n    weight = weight,\n    base_margin = base_margin,\n    feature_names = feature_names,\n    feature_types = feature_types,\n    group = group,\n    qid = qid,\n    label_lower_bound = label_lower_bound,\n    label_upper_bound = label_upper_bound,\n    feature_weights = feature_weights\n  )\n\n  return(dmat)\n}\n\n.process.df.for.dmatrix <- function(df, feature_types) {\n  if (!nrow(df) || !ncol(df)) {\n    stop(\"'data' is an empty data.frame.\")\n  }\n  if (!is.null(feature_types)) {\n    if (!is.character(feature_types) || length(feature_types) != ncol(df)) {\n      stop(\n        \"'feature_types' must be a character vector with one entry per column in 'data'.\"\n      )\n    }\n  } else {\n    feature_types <- sapply(df, function(col) {\n      if (is.factor(col)) {\n        return(\"c\")\n      } else if (is.integer(col)) {\n        return(\"int\")\n      } else if (is.logical(col)) {\n        return(\"i\")\n      } else {\n        if (!is.numeric(col)) {\n          stop(\"Invalid type in dataframe.\")\n        }\n        return(\"float\")\n      }\n    })\n  }\n\n  lst <- lapply(df, function(col) {\n    is_factor <- is.factor(col)\n    col <- as.numeric(col)\n    if (is_factor) {\n      col <- col - 1\n    }\n    return(col)\n  })\n\n  return(list(lst = lst, feature_types = feature_types))\n}\n\n.set.dmatrix.fields <- function(\n  dmat,\n  label,\n  weight,\n  base_margin,\n  feature_names,\n  feature_types,\n  group,\n  qid,\n  label_lower_bound,\n  label_upper_bound,\n  feature_weights\n) {\n  if (!is.null(label)) {\n    setinfo(dmat, \"label\", label)\n  }\n  if (!is.null(weight)) {\n    setinfo(dmat, \"weight\", weight)\n  }\n  if (!is.null(base_margin)) {\n    setinfo(dmat, \"base_margin\", base_margin)\n  }\n  if (!is.null(feature_names)) {\n    setinfo(dmat, \"feature_name\", feature_names)\n  }\n  if (!is.null(feature_types)) {\n    setinfo(dmat, \"feature_type\", feature_types)\n  }\n  if (!is.null(group)) {\n    setinfo(dmat, \"group\", group)\n  }\n  if (!is.null(qid)) {\n    setinfo(dmat, \"qid\", qid)\n  }\n  if (!is.null(label_lower_bound)) {\n    setinfo(dmat, \"label_lower_bound\", label_lower_bound)\n  }\n  if (!is.null(label_upper_bound)) {\n    setinfo(dmat, \"label_upper_bound\", label_upper_bound)\n  }\n  if (!is.null(feature_weights)) {\n    setinfo(dmat, \"feature_weights\", feature_weights)\n  }\n}\n\n#' @param ref The training dataset that provides quantile information, needed when creating\n#' validation/test dataset with [xgb.QuantileDMatrix()]. Supplying the training DMatrix\n#' as a reference means that the same quantisation applied to the training data is\n#' applied to the validation/test data\n#' @param max_bin The number of histogram bin, should be consistent with the training parameter\n#'   `max_bin`.\n#'\n#'   This is only supported when constructing a QuantileDMatrix.\n#' @export\n#' @rdname xgb.DMatrix\nxgb.QuantileDMatrix <- function(\n  data,\n  label = NULL,\n  weight = NULL,\n  base_margin = NULL,\n  missing = NA,\n  feature_names = colnames(data),\n  feature_types = NULL,\n  nthread = NULL,\n  group = NULL,\n  qid = NULL,\n  label_lower_bound = NULL,\n  label_upper_bound = NULL,\n  feature_weights = NULL,\n  ref = NULL,\n  max_bin = NULL\n) {\n  nthread <- as.integer(NVL(nthread, -1L))\n  if (!is.null(ref) && !inherits(ref, \"xgb.DMatrix\")) {\n    stop(\"'ref' must be an xgb.DMatrix object.\")\n  }\n\n  # Note: when passing an integer matrix, it won't get casted to numeric.\n  # Since 'int' values as understood by languages like C cannot have missing values,\n  # R represents missingness there by assigning them a value equal to the minimum\n  # integer. The 'missing' value here is set before the data, so in case of integers,\n  # need to make the conversion manually beforehand.\n  if (is.matrix(data) && storage.mode(data) %in% c(\"integer\", \"logical\") && is.na(missing)) {\n    missing <- .Call(XGGetRNAIntAsDouble)\n  }\n\n  iterator_env <- as.environment(\n    list(\n      data = data,\n      label = label,\n      weight = weight,\n      base_margin = base_margin,\n      missing = missing,\n      feature_names = feature_names,\n      feature_types = feature_types,\n      group = group,\n      qid = qid,\n      label_lower_bound = label_lower_bound,\n      label_upper_bound = label_upper_bound,\n      feature_weights = feature_weights\n    )\n  )\n  data_iterator <- .single.data.iterator(iterator_env)\n\n  env_keep_alive <- new.env()\n  env_keep_alive$keepalive <- NULL\n\n  # Note: the ProxyDMatrix has its finalizer assigned in the R externalptr\n  # object, but that finalizer will only be called once the object is\n  # garbage-collected, which doesn't happen immediately after it goes out\n  # of scope, hence this piece of code to tigger its destruction earlier\n  # and free memory right away.\n  proxy_handle <- .make.proxy.handle()\n  on.exit({\n    .Call(XGDMatrixFree_R, proxy_handle)\n  })\n  iterator_next <- function() {\n    return(xgb.ProxyDMatrix(proxy_handle, data_iterator, env_keep_alive))\n  }\n  iterator_reset <- function() {\n    env_keep_alive$keepalive <- NULL\n    return(data_iterator$f_reset(iterator_env))\n  }\n  calling_env <- environment()\n\n  dmat <- .Call(\n    XGQuantileDMatrixCreateFromCallback_R,\n    iterator_next,\n    iterator_reset,\n    calling_env,\n    proxy_handle,\n    nthread,\n    missing,\n    max_bin,\n    ref\n  )\n  attributes(dmat) <- list(\n    class = c(\"xgb.DMatrix\", \"xgb.QuantileDMatrix\"),\n    fields = attributes(proxy_handle)$fields\n  )\n  return(dmat)\n}\n\n#' XGBoost Data Iterator\n#'\n#' @description\n#' Interface to create a custom data iterator in order to construct a DMatrix\n#' from external memory.\n#'\n#' This function is responsible for generating an R object structure containing callback\n#' functions and an environment shared with them.\n#'\n#' The output structure from this function is then meant to be passed to [xgb.ExtMemDMatrix()],\n#' which will consume the data and create a DMatrix from it by executing the callback functions.\n#'\n#' For more information, and for a usage example, see the documentation for [xgb.ExtMemDMatrix()].\n#'\n#' @param env An R environment to pass to the callback functions supplied here, which can be\n#'   used to keep track of variables to determine how to handle the batches.\n#'\n#'   For example, one might want to keep track of an iteration number in this environment in order\n#'   to know which part of the data to pass next.\n#' @param f_next `function(env)` which is responsible for:\n#'   - Accessing or retrieving the next batch of data in the iterator.\n#'   - Supplying this data by calling function [xgb.DataBatch()] on it and returning the result.\n#'   - Keeping track of where in the iterator batch it is or will go next, which can for example\n#'     be done by modifiying variables in the `env` variable that is passed here.\n#'   - Signaling whether there are more batches to be consumed or not, by returning `NULL`\n#'     when the stream of data ends (all batches in the iterator have been consumed), or the result from\n#'     calling [xgb.DataBatch()] when there are more batches in the line to be consumed.\n#' @param f_reset `function(env)` which is responsible for reseting the data iterator\n#'   (i.e. taking it back to the first batch, called before and after the sequence of batches\n#'   has been consumed).\n#'\n#'   Note that, after resetting the iterator, the batches will be accessed again, so the same data\n#'   (and in the same order) must be passed in subsequent iterations.\n#' @return An `xgb.DataIter` object, containing the same inputs supplied here, which can then\n#'   be passed to [xgb.ExtMemDMatrix()].\n#' @seealso [xgb.ExtMemDMatrix()], [xgb.DataBatch()].\n#' @export\nxgb.DataIter <- function(env = new.env(), f_next, f_reset) {\n  if (!is.function(f_next)) {\n    stop(\"'f_next' must be a function.\")\n  }\n  if (!is.function(f_reset)) {\n    stop(\"'f_reset' must be a function.\")\n  }\n  out <- list(\n    env = env,\n    f_next = f_next,\n    f_reset = f_reset\n  )\n  class(out) <- \"xgb.DataIter\"\n  return(out)\n}\n\n.qdm.single.fnext <- function(env) {\n  curr_iter <- env[[\"iter\"]]\n  if (curr_iter >= 1L) {\n    return(NULL)\n  }\n\n  on.exit({\n    env[[\"iter\"]] <- curr_iter + 1L\n  })\n  return(\n    xgb.DataBatch(\n      data = env[[\"data\"]],\n      label = env[[\"label\"]],\n      weight = env[[\"weight\"]],\n      base_margin = env[[\"base_margin\"]],\n      feature_names = env[[\"feature_names\"]],\n      feature_types = env[[\"feature_types\"]],\n      group = env[[\"group\"]],\n      qid = env[[\"qid\"]],\n      label_lower_bound = env[[\"label_lower_bound\"]],\n      label_upper_bound = env[[\"label_upper_bound\"]],\n      feature_weights = env[[\"feature_weights\"]]\n    )\n  )\n}\n\n.qdm.single.freset <- function(env) {\n  env[[\"iter\"]] <- 0L\n  return(invisible(NULL))\n}\n\n.single.data.iterator <- function(env) {\n  env[[\"iter\"]] <- 0L\n  return(xgb.DataIter(env, .qdm.single.fnext, .qdm.single.freset))\n}\n\n# Only for internal usage\n.make.proxy.handle <- function() {\n  out <- .Call(XGProxyDMatrixCreate_R)\n  attributes(out) <- list(\n    class = c(\"xgb.DMatrix\", \"xgb.ProxyDMatrix\"),\n    fields = new.env()\n  )\n  return(out)\n}\n\n#' Structure for Data Batches\n#'\n#' @description\n#' Helper function to supply data in batches of a data iterator when\n#' constructing a DMatrix from external memory through [xgb.ExtMemDMatrix()]\n#' or through [xgb.QuantileDMatrix.from_iterator()].\n#'\n#' This function is **only** meant to be called inside of a callback function (which\n#' is passed as argument to function [xgb.DataIter()] to construct a data iterator)\n#' when constructing a DMatrix through external memory - otherwise, one should call\n#' [xgb.DMatrix()] or [xgb.QuantileDMatrix()].\n#'\n#' The object that results from calling this function directly is **not** like\n#' an `xgb.DMatrix` - i.e. cannot be used to train a model, nor to get predictions - only\n#' possible usage is to supply data to an iterator, from which a DMatrix is then constructed.\n#'\n#' For more information and for example usage, see the documentation for [xgb.ExtMemDMatrix()].\n#' @inheritParams xgb.DMatrix\n#' @param data Batch of data belonging to this batch.\n#'\n#'   Note that not all of the input types supported by [xgb.DMatrix()] are possible\n#'   to pass here. Supported types are:\n#'   - `matrix`, with types `numeric`, `integer`, and `logical`. Note that for types\n#'     `integer` and `logical`, missing values might not be automatically recognized as\n#'     as such - see the documentation for parameter `missing` in [xgb.ExtMemDMatrix()]\n#'     for details on this.\n#'   - `data.frame`, with the same types as supported by 'xgb.DMatrix' and same\n#'     conversions applied to it. See the documentation for parameter `data` in\n#'     [xgb.DMatrix()] for details on it.\n#'   - CSR matrices, as class `dgRMatrix` from package \"Matrix\".\n#' @return An object of class `xgb.DataBatch`, which is just a list containing the\n#'   data and parameters passed here. It does **not** inherit from `xgb.DMatrix`.\n#' @seealso [xgb.DataIter()], [xgb.ExtMemDMatrix()].\n#' @export\nxgb.DataBatch <- function(\n  data,\n  label = NULL,\n  weight = NULL,\n  base_margin = NULL,\n  feature_names = colnames(data),\n  feature_types = NULL,\n  group = NULL,\n  qid = NULL,\n  label_lower_bound = NULL,\n  label_upper_bound = NULL,\n  feature_weights = NULL\n) {\n  stopifnot(inherits(data, c(\"matrix\", \"data.frame\", \"dgRMatrix\")))\n  out <- list(\n    data = data,\n    label = label,\n    weight = weight,\n    base_margin = base_margin,\n    feature_names = feature_names,\n    feature_types = feature_types,\n    group = group,\n    qid = qid,\n    label_lower_bound = label_lower_bound,\n    label_upper_bound = label_upper_bound,\n    feature_weights = feature_weights\n  )\n  class(out) <- \"xgb.DataBatch\"\n  return(out)\n}\n\n# This is only for internal usage, class is not exposed to the user.\nxgb.ProxyDMatrix <- function(proxy_handle, data_iterator, env_keep_alive) {\n  env_keep_alive$keepalive <- NULL\n  lst <- data_iterator$f_next(data_iterator$env)\n  if (is.null(lst)) {\n    return(0L)\n  }\n  if (!inherits(lst, \"xgb.DataBatch\")) {\n    stop(\"DataIter 'f_next' must return either NULL or the result from calling 'xgb.DataBatch'.\")\n  }\n\n  if (!is.null(lst$group) && !is.null(lst$qid)) {\n    stop(\"Either one of 'group' or 'qid' should be NULL\")\n  }\n  if (is.data.frame(lst$data)) {\n    data <- lst$data\n    lst$data <- NULL\n    tmp <- .process.df.for.dmatrix(data, lst$feature_types)\n    lst$feature_types <- tmp$feature_types\n    data <- NULL\n    env_keep_alive$keepalive <- tmp\n    .Call(XGProxyDMatrixSetDataColumnar_R, proxy_handle, tmp$lst)\n  } else if (is.matrix(lst$data)) {\n    env_keep_alive$keepalive <- lst\n    .Call(XGProxyDMatrixSetDataDense_R, proxy_handle, lst$data)\n  } else if (inherits(lst$data, \"dgRMatrix\")) {\n    tmp <- list(p = lst$data@p, j = lst$data@j, x = lst$data@x, ncol = ncol(lst$data))\n    env_keep_alive$keepalive <- tmp\n    .Call(XGProxyDMatrixSetDataCSR_R, proxy_handle, tmp)\n  } else {\n    stop(\"'data' has unsupported type.\")\n  }\n\n  .set.dmatrix.fields(\n    dmat = proxy_handle,\n    label = lst$label,\n    weight = lst$weight,\n    base_margin = lst$base_margin,\n    feature_names = lst$feature_names,\n    feature_types = lst$feature_types,\n    group = lst$group,\n    qid = lst$qid,\n    label_lower_bound = lst$label_lower_bound,\n    label_upper_bound = lst$label_upper_bound,\n    feature_weights = lst$feature_weights\n  )\n\n  return(1L)\n}\n\n#' DMatrix from External Data\n#'\n#' @description\n#' Create a special type of XGBoost 'DMatrix' object from external data\n#' supplied by an [xgb.DataIter()] object, potentially passed in batches from a\n#' bigger set that might not fit entirely in memory.\n#'\n#' The data supplied by the iterator is accessed on-demand as needed, multiple times,\n#' without being concatenated, but note that fields like 'label' **will** be\n#' concatenated from multiple calls to the data iterator.\n#'\n#' For more information, see the guide 'Using XGBoost External Memory Version':\n#' \\url{https://xgboost.readthedocs.io/en/stable/tutorials/external_memory.html}\n#' @details\n#' Be aware that construction of external data DMatrices \\bold{will cache data on disk}\n#' in a compressed format, under the path supplied in `cache_prefix`.\n#'\n#' External data is not supported for the exact tree method.\n#' @inheritParams xgb.DMatrix\n#' @param data_iterator A data iterator structure as returned by [xgb.DataIter()],\n#'   which includes an environment shared between function calls, and functions to access\n#'   the data in batches on-demand.\n#' @param cache_prefix The path of cache file, caller must initialize all the directories in this path.\n#' @param missing A float value to represents missing values in data.\n#'\n#'   Note that, while functions like [xgb.DMatrix()] can take a generic `NA` and interpret it\n#'   correctly for different types like `numeric` and `integer`, if an `NA` value is passed here,\n#'   it will not be adapted for different input types.\n#'\n#'   For example, in R `integer` types, missing values are represented by integer number `-2147483648`\n#'   (since machine 'integer' types do not have an inherent 'NA' value) - hence, if one passes `NA`,\n#'   which is interpreted as a floating-point NaN by [xgb.ExtMemDMatrix()] and by\n#'   [xgb.QuantileDMatrix.from_iterator()], these integer missing values will not be treated as missing.\n#'   This should not pose any problem for `numeric` types, since they do have an inheret NaN value.\n#' @return An 'xgb.DMatrix' object, with subclass 'xgb.ExtMemDMatrix', in which the data is not\n#'   held internally but accessed through the iterator when needed.\n#' @seealso [xgb.DataIter()], [xgb.DataBatch()], [xgb.QuantileDMatrix.from_iterator()]\n#' @examples\n#' data(mtcars)\n#'\n#' # This custom environment will be passed to the iterator\n#' # functions at each call. It is up to the user to keep\n#' # track of the iteration number in this environment.\n#' iterator_env <- as.environment(\n#'   list(\n#'     iter = 0,\n#'     x = mtcars[, -1],\n#'     y = mtcars[, 1]\n#'   )\n#' )\n#'\n#' # Data is passed in two batches.\n#' # In this example, batches are obtained by subsetting the 'x' variable.\n#' # This is not advantageous to do, since the data is already loaded in memory\n#' # and can be passed in full in one go, but there can be situations in which\n#' # only a subset of the data will fit in the computer's memory, and it can\n#' # be loaded in batches that are accessed one-at-a-time only.\n#' iterator_next <- function(iterator_env) {\n#'   curr_iter <- iterator_env[[\"iter\"]]\n#'   if (curr_iter >= 2) {\n#'     # there are only two batches, so this signals end of the stream\n#'     return(NULL)\n#'   }\n#'\n#'   if (curr_iter == 0) {\n#'     x_batch <- iterator_env[[\"x\"]][1:16, ]\n#'     y_batch <- iterator_env[[\"y\"]][1:16]\n#'   } else {\n#'     x_batch <- iterator_env[[\"x\"]][17:32, ]\n#'     y_batch <- iterator_env[[\"y\"]][17:32]\n#'   }\n#'   on.exit({\n#'     iterator_env[[\"iter\"]] <- curr_iter + 1\n#'   })\n#'\n#'   # Function 'xgb.DataBatch' must be called manually\n#'   # at each batch with all the appropriate attributes,\n#'   # such as feature names and feature types.\n#'   return(xgb.DataBatch(data = x_batch, label = y_batch))\n#' }\n#'\n#' # This moves the iterator back to its beginning\n#' iterator_reset <- function(iterator_env) {\n#'   iterator_env[[\"iter\"]] <- 0\n#' }\n#'\n#' data_iterator <- xgb.DataIter(\n#'   env = iterator_env,\n#'   f_next = iterator_next,\n#'   f_reset = iterator_reset\n#' )\n#' cache_prefix <- tempdir()\n#'\n#' # DMatrix will be constructed from the iterator's batches\n#' dm <- xgb.ExtMemDMatrix(data_iterator, cache_prefix, nthread = 1)\n#'\n#' # After construction, can be used as a regular DMatrix\n#' params <- xgb.params(nthread = 1, objective = \"reg:squarederror\")\n#' model <- xgb.train(data = dm, nrounds = 2, params = params)\n#'\n#' # Predictions can also be called on it, and should be the same\n#' # as if the data were passed differently.\n#' pred_dm <- predict(model, dm)\n#' pred_mat <- predict(model, as.matrix(mtcars[, -1]))\n#' @export\nxgb.ExtMemDMatrix <- function(\n  data_iterator,\n  cache_prefix = tempdir(),\n  missing = NA,\n  nthread = NULL\n) {\n  stopifnot(inherits(data_iterator, \"xgb.DataIter\"))\n  stopifnot(is.character(cache_prefix))\n\n  cache_prefix <- path.expand(cache_prefix)\n  nthread <- as.integer(NVL(nthread, -1L))\n\n  # The purpose of this environment is to keep data alive (protected from the\n  # garbage collector) after setting the data in the proxy dmatrix. The data\n  # held here (under name 'keepalive') should be unset (leaving it unprotected\n  # for garbage collection) before the start of each data iteration batch and\n  # during each iterator reset.\n  env_keep_alive <- new.env()\n  env_keep_alive$keepalive <- NULL\n\n  proxy_handle <- .make.proxy.handle()\n  on.exit({\n    .Call(XGDMatrixFree_R, proxy_handle)\n  })\n  iterator_next <- function() {\n    return(xgb.ProxyDMatrix(proxy_handle, data_iterator, env_keep_alive))\n  }\n  iterator_reset <- function() {\n    env_keep_alive$keepalive <- NULL\n    return(data_iterator$f_reset(data_iterator$env))\n  }\n  calling_env <- environment()\n\n  dmat <- .Call(\n    XGDMatrixCreateFromCallback_R,\n    iterator_next,\n    iterator_reset,\n    calling_env,\n    proxy_handle,\n    nthread,\n    missing,\n    cache_prefix\n  )\n\n  attributes(dmat) <- list(\n    class = c(\"xgb.DMatrix\", \"xgb.ExtMemDMatrix\"),\n    fields = attributes(proxy_handle)$fields\n  )\n  return(dmat)\n}\n\n\n#' QuantileDMatrix from External Data\n#'\n#' @description\n#' Create an `xgb.QuantileDMatrix` object (exact same class as would be returned by\n#' calling function [xgb.QuantileDMatrix()], with the same advantages and limitations) from\n#' external data supplied by [xgb.DataIter()], potentially passed in batches from\n#' a bigger set that might not fit entirely in memory, same way as [xgb.ExtMemDMatrix()].\n#'\n#' Note that, while external data will only be loaded through the iterator (thus the full data\n#' might not be held entirely in-memory), the quantized representation of the data will get\n#' created in-memory, being concatenated from multiple calls to the data iterator. The quantized\n#' version is typically lighter than the original data, so there might be cases in which this\n#' representation could potentially fit in memory even if the full data does not.\n#'\n#' For more information, see the guide 'Using XGBoost External Memory Version':\n#' \\url{https://xgboost.readthedocs.io/en/stable/tutorials/external_memory.html}\n#' @inheritParams xgb.ExtMemDMatrix\n#' @inheritParams xgb.QuantileDMatrix\n#' @return An 'xgb.DMatrix' object, with subclass 'xgb.QuantileDMatrix'.\n#' @seealso [xgb.DataIter()], [xgb.DataBatch()], [xgb.ExtMemDMatrix()],\n#' [xgb.QuantileDMatrix()]\n#' @export\nxgb.QuantileDMatrix.from_iterator <- function( # nolint\n  data_iterator,\n  missing = NA,\n  nthread = NULL,\n  ref = NULL,\n  max_bin = NULL\n) {\n  stopifnot(inherits(data_iterator, \"xgb.DataIter\"))\n  if (!is.null(ref) && !inherits(ref, \"xgb.DMatrix\")) {\n    stop(\"'ref' must be an xgb.DMatrix object.\")\n  }\n\n  nthread <- as.integer(NVL(nthread, -1L))\n\n  env_keep_alive <- new.env()\n  env_keep_alive$keepalive <- NULL\n  proxy_handle <- .make.proxy.handle()\n  on.exit({\n    .Call(XGDMatrixFree_R, proxy_handle)\n  })\n  iterator_next <- function() {\n    return(xgb.ProxyDMatrix(proxy_handle, data_iterator, env_keep_alive))\n  }\n  iterator_reset <- function() {\n    env_keep_alive$keepalive <- NULL\n    return(data_iterator$f_reset(data_iterator$env))\n  }\n  calling_env <- environment()\n\n  dmat <- .Call(\n    XGQuantileDMatrixCreateFromCallback_R,\n    iterator_next,\n    iterator_reset,\n    calling_env,\n    proxy_handle,\n    nthread,\n    missing,\n    max_bin,\n    ref\n  )\n\n  attributes(dmat) <- list(\n    class = c(\"xgb.DMatrix\", \"xgb.QuantileDMatrix\"),\n    fields = attributes(proxy_handle)$fields\n  )\n  return(dmat)\n}\n\n#' Check whether DMatrix object has a field\n#'\n#' Checks whether an xgb.DMatrix object has a given field assigned to\n#' it, such as weights, labels, etc.\n#' @param object The DMatrix object to check for the given `info` field.\n#' @param info The field to check for presence or absence in `object`.\n#' @seealso [xgb.DMatrix()], [getinfo.xgb.DMatrix()], [setinfo.xgb.DMatrix()]\n#' @examples\n#' x <- matrix(1:10, nrow = 5)\n#' dm <- xgb.DMatrix(x, nthread = 1)\n#'\n#' # 'dm' so far does not have any fields set\n#' xgb.DMatrix.hasinfo(dm, \"label\")\n#'\n#' # Fields can be added after construction\n#' setinfo(dm, \"label\", 1:5)\n#' xgb.DMatrix.hasinfo(dm, \"label\")\n#' @export\nxgb.DMatrix.hasinfo <- function(object, info) {\n  if (!inherits(object, \"xgb.DMatrix\")) {\n    stop(\"Object is not an 'xgb.DMatrix'.\")\n  }\n  if (.Call(XGCheckNullPtr_R, object)) {\n    warning(\"xgb.DMatrix object is invalid. Must be constructed again.\")\n    return(FALSE)\n  }\n  return(NVL(attr(object, \"fields\")[[info]], FALSE))\n}\n\n\n#' Dimensions of xgb.DMatrix\n#'\n#' Returns a vector of numbers of rows and of columns in an `xgb.DMatrix`.\n#'\n#' @param x Object of class `xgb.DMatrix`\n#'\n#' @details\n#' Note: since [nrow()] and [ncol()] internally use [dim()], they can also\n#' be directly used with an `xgb.DMatrix` object.\n#'\n#' @examples\n#' data(agaricus.train, package = \"xgboost\")\n#'\n#' train <- agaricus.train\n#' dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = 2)\n#'\n#' stopifnot(nrow(dtrain) == nrow(train$data))\n#' stopifnot(ncol(dtrain) == ncol(train$data))\n#' stopifnot(all(dim(dtrain) == dim(train$data)))\n#'\n#' @export\ndim.xgb.DMatrix <- function(x) {\n  c(.Call(XGDMatrixNumRow_R, x), .Call(XGDMatrixNumCol_R, x))\n}\n\n\n#' Handling of column names of `xgb.DMatrix`\n#'\n#' Only column names are supported for `xgb.DMatrix`, thus setting of\n#' row names would have no effect and returned row names would be `NULL`.\n#'\n#' @param x Object of class `xgb.DMatrix`.\n#' @param value A list of two elements: the first one is ignored\n#'   and the second one is column names\n#'\n#' @details\n#' Generic [dimnames()] methods are used by [colnames()].\n#' Since row names are irrelevant, it is recommended to use [colnames()] directly.\n#'\n#' @examples\n#' data(agaricus.train, package = \"xgboost\")\n#'\n#' train <- agaricus.train\n#' dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = 2)\n#' dimnames(dtrain)\n#' colnames(dtrain)\n#' colnames(dtrain) <- make.names(1:ncol(train$data))\n#' print(dtrain, verbose = TRUE)\n#'\n#' @rdname dimnames.xgb.DMatrix\n#' @export\ndimnames.xgb.DMatrix <- function(x) {\n  fn <- getinfo(x, \"feature_name\")\n  ## row names is null.\n  list(NULL, fn)\n}\n\n#' @rdname dimnames.xgb.DMatrix\n#' @export\n`dimnames<-.xgb.DMatrix` <- function(x, value) {\n  if (!is.list(value) || length(value) != 2L)\n    stop(\"invalid 'dimnames' given: must be a list of two elements\")\n  if (!is.null(value[[1L]]))\n    stop(\"xgb.DMatrix does not have rownames\")\n  if (is.null(value[[2]])) {\n    setinfo(x, \"feature_name\", NULL)\n    return(x)\n  }\n  if (ncol(x) != length(value[[2]])) {\n    stop(\"can't assign \", length(value[[2]]), \" colnames to a \", ncol(x), \" column xgb.DMatrix\")\n  }\n  setinfo(x, \"feature_name\", value[[2]])\n  x\n}\n\n\n#' Get or set information of xgb.DMatrix and xgb.Booster objects\n#'\n#' @param object Object of class `xgb.DMatrix` or `xgb.Booster`.\n#' @param name The name of the information field to get (see details).\n#' @return For `getinfo()`, will return the requested field. For `setinfo()`,\n#'   will always return value `TRUE` if it succeeds.\n#' @details\n#' The `name` field can be one of the following for `xgb.DMatrix`:\n#' - label\n#' - weight\n#' - base_margin\n#' - label_lower_bound\n#' - label_upper_bound\n#' - group\n#' - feature_type\n#' - feature_name\n#' - nrow\n#'\n#' See the documentation for [xgb.DMatrix()] for more information about these fields.\n#'\n#' For `xgb.Booster`, can be one of the following:\n#' - `feature_type`\n#' - `feature_name`\n#'\n#' Note that, while 'qid' cannot be retrieved, it is possible to get the equivalent 'group'\n#' for a DMatrix that had 'qid' assigned.\n#'\n#' **Important**: when calling [setinfo()], the objects are modified in-place. See\n#' [xgb.copy.Booster()] for an idea of this in-place assignment works.\n#' @examples\n#' data(agaricus.train, package = \"xgboost\")\n#'\n#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))\n#'\n#' labels <- getinfo(dtrain, \"label\")\n#' setinfo(dtrain, \"label\", 1 - labels)\n#'\n#' labels2 <- getinfo(dtrain, \"label\")\n#' stopifnot(all(labels2 == 1 - labels))\n#' @rdname getinfo\n#' @export\ngetinfo <- function(object, name) UseMethod(\"getinfo\")\n\n#' @rdname getinfo\n#' @export\ngetinfo.xgb.DMatrix <- function(object, name) {\n  allowed_int_fields <- 'group'\n  allowed_float_fields <- c(\n    'label', 'weight', 'base_margin',\n    'label_lower_bound', 'label_upper_bound'\n  )\n  allowed_str_fields <- c(\"feature_type\", \"feature_name\")\n  allowed_fields <- c(allowed_float_fields, allowed_int_fields, allowed_str_fields, 'nrow')\n\n  if (typeof(name) != \"character\" ||\n        length(name) != 1 ||\n        !name %in% allowed_fields) {\n    stop(\"getinfo: name must be one of the following\\n\",\n         paste(paste0(\"'\", allowed_fields, \"'\"), collapse = \", \"))\n  }\n  if (name == \"nrow\") {\n    ret <- nrow(object)\n  } else if (name %in% allowed_str_fields) {\n    ret <- .Call(XGDMatrixGetStrFeatureInfo_R, object, name)\n  } else if (name %in% allowed_float_fields) {\n    ret <- .Call(XGDMatrixGetFloatInfo_R, object, name)\n    if (length(ret) > nrow(object)) {\n      ret <- matrix(ret, nrow = nrow(object), byrow = TRUE)\n    }\n  } else if (name %in% allowed_int_fields) {\n    if (name == \"group\") {\n      name <- \"group_ptr\"\n    }\n    ret <- .Call(XGDMatrixGetUIntInfo_R, object, name)\n    if (length(ret) > nrow(object)) {\n      ret <- matrix(ret, nrow = nrow(object), byrow = TRUE)\n    }\n  }\n  if (length(ret) == 0) return(NULL)\n  return(ret)\n}\n\n#' @rdname getinfo\n#' @param info The specific field of information to set.\n#'\n#' @details\n#' See the documentation for [xgb.DMatrix()] for possible fields that can be set\n#' (which correspond to arguments in that function).\n#'\n#' Note that the following fields are allowed in the construction of an `xgb.DMatrix`\n#' but **are not** allowed here:\n#' - data\n#' - missing\n#' - silent\n#' - nthread\n#'\n#' @examples\n#' data(agaricus.train, package = \"xgboost\")\n#'\n#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))\n#'\n#' labels <- getinfo(dtrain, \"label\")\n#' setinfo(dtrain, \"label\", 1 - labels)\n#'\n#' labels2 <- getinfo(dtrain, \"label\")\n#' stopifnot(all.equal(labels2, 1 - labels))\n#' @export\nsetinfo <- function(object, name, info) UseMethod(\"setinfo\")\n\n#' @rdname getinfo\n#' @export\nsetinfo.xgb.DMatrix <- function(object, name, info) {\n  .internal.setinfo.xgb.DMatrix(object, name, info)\n  attr(object, \"fields\")[[name]] <- TRUE\n  return(TRUE)\n}\n\n.internal.setinfo.xgb.DMatrix <- function(object, name, info) {\n  if (name == \"label\") {\n    if (NROW(info) != nrow(object))\n      stop(\"The length of labels must equal to the number of rows in the input data\")\n    if (is.factor(info)) {\n      stop(\"'label' must be a numeric variable.\")\n    }\n    .Call(XGDMatrixSetInfo_R, object, name, info)\n    return(TRUE)\n  }\n  if (name == \"label_lower_bound\") {\n    if (NROW(info) != nrow(object))\n      stop(\"The length of lower-bound labels must equal to the number of rows in the input data\")\n    .Call(XGDMatrixSetInfo_R, object, name, info)\n    return(TRUE)\n  }\n  if (name == \"label_upper_bound\") {\n    if (NROW(info) != nrow(object))\n      stop(\"The length of upper-bound labels must equal to the number of rows in the input data\")\n    .Call(XGDMatrixSetInfo_R, object, name, info)\n    return(TRUE)\n  }\n  if (name == \"weight\") {\n    .Call(XGDMatrixSetInfo_R, object, name, info)\n    return(TRUE)\n  }\n  if (name == \"base_margin\") {\n    .Call(XGDMatrixSetInfo_R, object, name, info)\n    return(TRUE)\n  }\n  if (name == \"group\") {\n    if (sum(info) != nrow(object))\n      stop(\"The sum of groups must equal to the number of rows in the input data\")\n    .Call(XGDMatrixSetInfo_R, object, name, info)\n    return(TRUE)\n  }\n  if (name == \"qid\") {\n    if (NROW(info) != nrow(object))\n      stop(\"The length of qid assignments must equal to the number of rows in the input data\")\n    .Call(XGDMatrixSetInfo_R, object, name, info)\n    return(TRUE)\n  }\n  if (name == \"feature_weights\") {\n    if (NROW(info) != ncol(object)) {\n      stop(\"The number of feature weights must equal to the number of columns in the input data\")\n    }\n    .Call(XGDMatrixSetInfo_R, object, name, info)\n    return(TRUE)\n  }\n\n  set_feat_info <- function(name) {\n    msg <- sprintf(\n      \"The number of %s must equal to the number of columns in the input data. %s vs. %s\",\n      name,\n      length(info),\n      ncol(object)\n    )\n    if (!is.null(info)) {\n      info <- as.list(info)\n      if (length(info) != ncol(object)) {\n        stop(msg)\n      }\n    }\n    .Call(XGDMatrixSetStrFeatureInfo_R, object, name, info)\n  }\n  if (name == \"feature_name\") {\n    set_feat_info(\"feature_name\")\n    return(TRUE)\n  }\n  if (name == \"feature_type\") {\n    set_feat_info(\"feature_type\")\n    return(TRUE)\n  }\n  stop(\"setinfo: unknown info name \", name)\n}\n\n#' Get Quantile Cuts from DMatrix\n#'\n#' @description\n#' Get the quantile cuts (a.k.a. borders) from an `xgb.DMatrix`\n#' that has been quantized for the histogram method (`tree_method = \"hist\"`).\n#'\n#' These cuts are used in order to assign observations to bins - i.e. these are ordered\n#' boundaries which are used to determine assignment condition `border_low < x < border_high`.\n#' As such, the first and last bin will be outside of the range of the data, so as to include\n#' all of the observations there.\n#'\n#' If a given column has 'n' bins, then there will be 'n+1' cuts / borders for that column,\n#' which will be output in sorted order from lowest to highest.\n#'\n#' Different columns can have different numbers of bins according to their range.\n#' @param dmat An `xgb.DMatrix` object, as returned by [xgb.DMatrix()].\n#' @param output Output format for the quantile cuts. Possible options are:\n#'   - \"list\"` will return the output as a list with one entry per column, where\n#'     each column will have a numeric vector with the cuts. The list will be named if\n#'     `dmat` has column names assigned to it.\n#'   - `\"arrays\"` will return a list with entries `indptr` (base-0 indexing) and\n#'     `data`. Here, the cuts for column 'i' are obtained by slicing 'data' from entries\n#' `   indptr[i]+1` to `indptr[i+1]`.\n#' @return The quantile cuts, in the format specified by parameter `output`.\n#' @examples\n#' data(mtcars)\n#'\n#' y <- mtcars$mpg\n#' x <- as.matrix(mtcars[, -1])\n#' dm <- xgb.DMatrix(x, label = y, nthread = 1)\n#'\n#' # DMatrix is not quantized right away, but will be once a hist model is generated\n#' model <- xgb.train(\n#'   data = dm,\n#'   params = xgb.params(tree_method = \"hist\", max_bin = 8, nthread = 1),\n#'   nrounds = 3\n#' )\n#'\n#' # Now can get the quantile cuts\n#' xgb.get.DMatrix.qcut(dm)\n#' @export\nxgb.get.DMatrix.qcut <- function(dmat, output = c(\"list\", \"arrays\")) { # nolint\n  stopifnot(inherits(dmat, \"xgb.DMatrix\"))\n  output <- head(output, 1L)\n  stopifnot(output %in% c(\"list\", \"arrays\"))\n  res <- .Call(XGDMatrixGetQuantileCut_R, dmat)\n  if (output == \"arrays\") {\n    return(res)\n  } else {\n    feature_names <- getinfo(dmat, \"feature_name\")\n    ncols <- length(res$indptr) - 1\n    out <- lapply(\n      seq(1, ncols),\n      function(col) {\n        st <- res$indptr[col]\n        end <- res$indptr[col + 1]\n        if (end <= st) {\n          return(numeric())\n        }\n        return(res$data[seq(1 + st, end)])\n      }\n    )\n    if (NROW(feature_names)) {\n      names(out) <- feature_names\n    }\n    return(out)\n  }\n}\n\n#' Get Number of Non-Missing Entries in DMatrix\n#'\n#' @param dmat An `xgb.DMatrix` object, as returned by [xgb.DMatrix()].\n#' @return The number of non-missing entries in the DMatrix.\n#' @export\nxgb.get.DMatrix.num.non.missing <- function(dmat) { # nolint\n  stopifnot(inherits(dmat, \"xgb.DMatrix\"))\n  return(.Call(XGDMatrixNumNonMissing_R, dmat))\n}\n\n#' Get DMatrix Data\n#'\n#' @param dmat An `xgb.DMatrix` object, as returned by [xgb.DMatrix()].\n#' @return The data held in the DMatrix, as a sparse CSR matrix (class `dgRMatrix`\n#' from package `Matrix`). If it had feature names, these will be added as column names\n#' in the output.\n#' @export\nxgb.get.DMatrix.data <- function(dmat) {\n  stopifnot(inherits(dmat, \"xgb.DMatrix\"))\n  res <- .Call(XGDMatrixGetDataAsCSR_R, dmat)\n  out <- methods::new(\"dgRMatrix\")\n  nrows <- as.integer(length(res$indptr) - 1)\n  out@p <- res$indptr\n  out@j <- res$indices\n  out@x <- res$data\n  out@Dim <- as.integer(c(nrows, res$ncols))\n\n  feature_names <- getinfo(dmat, \"feature_name\")\n  dim_names <- list(NULL, NULL)\n  if (NROW(feature_names)) {\n    dim_names[[2L]] <- feature_names\n  }\n  out@Dimnames <- dim_names\n  return(out)\n}\n\n#' Slice DMatrix\n#'\n#' Get a new DMatrix containing the specified rows of original xgb.DMatrix object.\n#'\n#' @param object Object of class `xgb.DMatrix`.\n#' @param idxset An integer vector of indices of rows needed (base-1 indexing).\n#' @param allow_groups Whether to allow slicing an `xgb.DMatrix` with `group` (or\n#'   equivalently `qid`) field. Note that in such case, the result will not have\n#'   the groups anymore - they need to be set manually through [setinfo()].\n#' @param colset Currently not used (columns subsetting is not available).\n#'\n#' @examples\n#' data(agaricus.train, package = \"xgboost\")\n#'\n#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))\n#'\n#' dsub <- xgb.slice.DMatrix(dtrain, 1:42)\n#' labels1 <- getinfo(dsub, \"label\")\n#'\n#' dsub <- dtrain[1:42, ]\n#' labels2 <- getinfo(dsub, \"label\")\n#' all.equal(labels1, labels2)\n#'\n#' @rdname xgb.slice.DMatrix\n#' @export\nxgb.slice.DMatrix <- function(object, idxset, allow_groups = FALSE) {\n  if (!inherits(object, \"xgb.DMatrix\")) {\n    stop(\"object must be xgb.DMatrix\")\n  }\n  ret <- .Call(XGDMatrixSliceDMatrix_R, object, idxset, allow_groups)\n\n  attr_list <- attributes(object)\n  nr <- nrow(object)\n  len <- sapply(attr_list, NROW)\n  ind <- which(len == nr)\n  if (length(ind) > 0) {\n    nms <- names(attr_list)[ind]\n    for (i in seq_along(ind)) {\n      obj_attr <- attr(object, nms[i])\n      if (NCOL(obj_attr) > 1) {\n        attr(ret, nms[i]) <- obj_attr[idxset, ]\n      } else {\n        attr(ret, nms[i]) <- obj_attr[idxset]\n      }\n    }\n  }\n\n  out <- structure(ret, class = \"xgb.DMatrix\")\n  parent_fields <- as.list(attributes(object)$fields)\n  if (NROW(parent_fields)) {\n    child_fields <- parent_fields[!(names(parent_fields) %in% c(\"group\", \"qid\"))]\n    child_fields <- as.environment(child_fields)\n    attributes(out)$fields <- child_fields\n  }\n  return(out)\n}\n\n#' @rdname xgb.slice.DMatrix\n#' @export\n`[.xgb.DMatrix` <- function(object, idxset, colset = NULL) {\n  xgb.slice.DMatrix(object, idxset)\n}\n\n\n#' Print xgb.DMatrix\n#'\n#' Print information about xgb.DMatrix.\n#' Currently it displays dimensions and presence of info-fields and colnames.\n#'\n#' @param x An xgb.DMatrix object.\n#' @param verbose Whether to print colnames (when present).\n#' @param ... Not currently used.\n#'\n#' @examples\n#' data(agaricus.train, package = \"xgboost\")\n#'\n#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))\n#' dtrain\n#'\n#' print(dtrain, verbose = TRUE)\n#'\n#' @method print xgb.DMatrix\n#' @export\nprint.xgb.DMatrix <- function(x, verbose = FALSE, ...) {\n  if (.Call(XGCheckNullPtr_R, x)) {\n    cat(\"INVALID xgb.DMatrix object. Must be constructed anew.\\n\")\n    return(invisible(x))\n  }\n  class_print <- if (inherits(x, \"xgb.QuantileDMatrix\")) {\n    \"xgb.QuantileDMatrix\"\n  } else if (inherits(x, \"xgb.ExtMemDMatrix\")) {\n    \"xgb.ExtMemDMatrix\"\n  } else if (inherits(x, \"xgb.ProxyDMatrix\")) {\n    \"xgb.ProxyDMatrix\"\n  } else {\n    \"xgb.DMatrix\"\n  }\n\n  cat(class_print, ' dim:', nrow(x), 'x', ncol(x), ' info: ')\n  infos <- names(attributes(x)$fields)\n  infos <- infos[infos != \"feature_name\"]\n  if (!NROW(infos)) infos <- \"NA\"\n  infos <- infos[order(infos)]\n  infos <- paste(infos, collapse = \", \")\n  cat(infos)\n  cnames <- colnames(x)\n  cat('  colnames:')\n  if (verbose && !is.null(cnames)) {\n    cat(\"\\n'\")\n    cat(cnames, sep = \"','\")\n    cat(\"'\")\n  } else {\n    if (is.null(cnames)) cat(' no')\n    else cat(' yes')\n  }\n  cat(\"\\n\")\n  invisible(x)\n}\n"
  },
  {
    "path": "R-package/R/xgb.DMatrix.save.R",
    "content": "#' Save xgb.DMatrix object to binary file\n#'\n#' Save xgb.DMatrix object to binary file\n#'\n#' @param dmatrix the `xgb.DMatrix` object\n#' @param fname the name of the file to write.\n#'\n#' @examples\n#' \\dontshow{RhpcBLASctl::omp_set_num_threads(1)}\n#' data(agaricus.train, package = \"xgboost\")\n#'\n#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))\n#' fname <- file.path(tempdir(), \"xgb.DMatrix.data\")\n#' xgb.DMatrix.save(dtrain, fname)\n#' dtrain <- xgb.DMatrix(fname, nthread = 1)\n#' @export\nxgb.DMatrix.save <- function(dmatrix, fname) {\n  if (typeof(fname) != \"character\")\n    stop(\"fname must be character\")\n  if (!inherits(dmatrix, \"xgb.DMatrix\"))\n    stop(\"dmatrix must be xgb.DMatrix\")\n\n  fname <- path.expand(fname)\n  .Call(XGDMatrixSaveBinary_R, dmatrix, fname[1], 0L)\n  return(TRUE)\n}\n"
  },
  {
    "path": "R-package/R/xgb.config.R",
    "content": "#' Set and get global configuration\n#'\n#' Global configuration consists of a collection of parameters that can be applied in the global\n#' scope. See \\url{https://xgboost.readthedocs.io/en/stable/parameter.html} for the full list of\n#' parameters supported in the global configuration. Use `xgb.set.config()` to update the\n#' values of one or more global-scope parameters. Use `xgb.get.config()` to fetch the current\n#' values of all global-scope parameters (listed in\n#' \\url{https://xgboost.readthedocs.io/en/stable/parameter.html}).\n#'\n#' @details\n#' Note that serialization-related functions might use a globally-configured number of threads,\n#' which is managed by the system's OpenMP (OMP) configuration instead. Typically, XGBoost methods\n#' accept an `nthreads` parameter, but some methods like [readRDS()] might get executed before such\n#' parameter can be supplied.\n#'\n#' The number of OMP threads can in turn be configured for example through an environment variable\n#' `OMP_NUM_THREADS` (needs to be set before R is started), or through `RhpcBLASctl::omp_set_num_threads`.\n#' @rdname xgbConfig\n#' @name xgb.set.config, xgb.get.config\n#' @export xgb.set.config xgb.get.config\n#' @param ... List of parameters to be set, as keyword arguments\n#' @return\n#' `xgb.set.config()` returns `TRUE` to signal success. `xgb.get.config()` returns\n#' a list containing all global-scope parameters and their values.\n#'\n#' @examples\n#' # Set verbosity level to silent (0)\n#' xgb.set.config(verbosity = 0)\n#' # Now global verbosity level is 0\n#' config <- xgb.get.config()\n#' print(config$verbosity)\n#' # Set verbosity level to warning (1)\n#' xgb.set.config(verbosity = 1)\n#' # Now global verbosity level is 1\n#' config <- xgb.get.config()\n#' print(config$verbosity)\nxgb.set.config <- function(...) {\n  new_config <- list(...)\n  .Call(XGBSetGlobalConfig_R, jsonlite::toJSON(new_config, auto_unbox = TRUE))\n  return(TRUE)\n}\n\n#' @rdname xgbConfig\nxgb.get.config <- function() {\n  config <- .Call(XGBGetGlobalConfig_R)\n  return(jsonlite::fromJSON(config))\n}\n"
  },
  {
    "path": "R-package/R/xgb.create.features.R",
    "content": "#' Create new features from a previously learned model\n#'\n#' May improve the learning by adding new features to the training data based on the\n#' decision trees from a previously learned model.\n#'\n#' @details\n#' This is the function inspired from the paragraph 3.1 of the paper:\n#'\n#' **Practical Lessons from Predicting Clicks on Ads at Facebook**\n#'\n#' *(Xinran He, Junfeng Pan, Ou Jin, Tianbing Xu, Bo Liu, Tao Xu, Yan, xin Shi, Antoine Atallah, Ralf Herbrich, Stuart Bowers,\n#' Joaquin Quinonero Candela)*\n#'\n#' International Workshop on Data Mining for Online Advertising (ADKDD) - August 24, 2014\n#'\n#' \\url{https://research.facebook.com/publications/practical-lessons-from-predicting-clicks-on-ads-at-facebook/}.\n#'\n#' Extract explaining the method:\n#'\n#' \"We found that boosted decision trees are a powerful and very\n#' convenient way to implement non-linear and tuple transformations\n#' of the kind we just described. We treat each individual\n#' tree as a categorical feature that takes as value the\n#' index of the leaf an instance ends up falling in. We use\n#' 1-of-K coding of this type of features.\n#'\n#' For example, consider the boosted tree model in Figure 1 with 2 subtrees,\n#' where the first subtree has 3 leafs and the second 2 leafs. If an\n#' instance ends up in leaf 2 in the first subtree and leaf 1 in\n#' second subtree, the overall input to the linear classifier will\n#' be the binary vector `[0, 1, 0, 1, 0]`, where the first 3 entries\n#' correspond to the leaves of the first subtree and last 2 to\n#' those of the second subtree.\n#'\n#' ...\n#'\n#' We can understand boosted decision tree\n#' based transformation as a supervised feature encoding that\n#' converts a real-valued vector into a compact binary-valued\n#' vector. A traversal from root node to a leaf node represents\n#' a rule on certain features.\"\n#'\n#' @param model Decision tree boosting model learned on the original data.\n#' @param data Original data (usually provided as a `dgCMatrix` matrix).\n#'\n#' @return A `dgCMatrix` matrix including both the original data and the new features.\n#'\n#' @examples\n#' data(agaricus.train, package = \"xgboost\")\n#' data(agaricus.test, package = \"xgboost\")\n#'\n#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))\n#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))\n#'\n#' param <- list(max_depth = 2, learning_rate = 1, objective = 'binary:logistic', nthread = 1)\n#' nrounds = 4\n#'\n#' bst <- xgb.train(params = param, data = dtrain, nrounds = nrounds)\n#'\n#' # Model accuracy without new features\n#' accuracy.before <- sum((predict(bst, agaricus.test$data) >= 0.5) == agaricus.test$label) /\n#'                    length(agaricus.test$label)\n#'\n#' # Convert previous features to one hot encoding\n#' new.features.train <- xgb.create.features(model = bst, agaricus.train$data)\n#' new.features.test <- xgb.create.features(model = bst, agaricus.test$data)\n#'\n#' # learning with new features\n#' new.dtrain <- xgb.DMatrix(\n#'   data = new.features.train, label = agaricus.train$label, nthread = 1\n#' )\n#' new.dtest <- xgb.DMatrix(\n#'   data = new.features.test, label = agaricus.test$label, nthread = 1\n#' )\n#' bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds)\n#'\n#' # Model accuracy with new features\n#' accuracy.after <- sum((predict(bst, new.dtest) >= 0.5) == agaricus.test$label) /\n#'                   length(agaricus.test$label)\n#'\n#' # Here the accuracy was already good and is now perfect.\n#' cat(paste(\"The accuracy was\", accuracy.before, \"before adding leaf features and it is now\",\n#'           accuracy.after, \"!\\n\"))\n#'\n#' @export\nxgb.create.features <- function(model, data) {\n  pred_with_leaf <- predict.xgb.Booster(model, data, predleaf = TRUE)\n  cols <- lapply(as.data.frame(pred_with_leaf), factor)\n  cbind(data, sparse.model.matrix(~ . -1, cols)) # nolint\n}\n"
  },
  {
    "path": "R-package/R/xgb.cv.R",
    "content": "#' Cross Validation\n#'\n#' The cross validation function of xgboost.\n#'\n#' @inheritParams xgb.train\n#' @param data An `xgb.DMatrix` object, with corresponding fields like `label` or bounds as required\n#'   for model training by the objective.\n#'\n#'   Note that only the basic `xgb.DMatrix` class is supported - variants such as `xgb.QuantileDMatrix`\n#'   or `xgb.ExtMemDMatrix` are not supported here.\n#' @param nfold The original dataset is randomly partitioned into `nfold` equal size subsamples.\n#' @param prediction A logical value indicating whether to return the test fold predictions\n#'   from each CV model. This parameter engages the [xgb.cb.cv.predict()] callback.\n#' @param showsd Logical value whether to show standard deviation of cross validation.\n#' @param metrics List of evaluation metrics to be used in cross validation,\n#'   when it is not specified, the evaluation metric is chosen according to objective function.\n#'   Possible options are:\n#'   - `error`: Binary classification error rate\n#'   - `rmse`: Root mean square error\n#'   - `logloss`: Negative log-likelihood function\n#'   - `mae`: Mean absolute error\n#'   - `mape`: Mean absolute percentage error\n#'   - `auc`: Area under curve\n#'   - `aucpr`: Area under PR curve\n#'   - `merror`: Exact matching error used to evaluate multi-class classification\n#' @param stratified Logical flag indicating whether sampling of folds should be stratified\n#'   by the values of outcome labels. For real-valued labels in regression objectives,\n#'   stratification will be done by discretizing the labels into up to 5 buckets beforehand.\n#'\n#'   If passing \"auto\", will be set to `TRUE` if the objective in `params` is a classification\n#'   objective (from XGBoost's built-in objectives, doesn't apply to custom ones), and to\n#'   `FALSE` otherwise.\n#'\n#'   This parameter is ignored when `data` has a `group` field - in such case, the splitting\n#'   will be based on whole groups (note that this might make the folds have different sizes).\n#'\n#'   Value `TRUE` here is **not** supported for custom objectives.\n#' @param folds List with pre-defined CV folds (each element must be a vector of test fold's indices).\n#'   When folds are supplied, the `nfold` and `stratified` parameters are ignored.\n#'\n#'   If `data` has a `group` field and the objective requires this field, each fold (list element)\n#'   must additionally have two attributes (retrievable through `attributes`) named `group_test`\n#'   and `group_train`, which should hold the `group` to assign through [setinfo.xgb.DMatrix()] to\n#'   the resulting DMatrices.\n#' @param train_folds List specifying which indices to use for training. If `NULL`\n#'   (the default) all indices not specified in `folds` will be used for training.\n#'\n#'   This is not supported when `data` has `group` field.\n#' @param callbacks A list of callback functions to perform various task during boosting.\n#'   See [xgb.Callback()]. Some of the callbacks are automatically created depending on the\n#'   parameters' values. User can provide either existing or their own callback methods in order\n#'   to customize the training process.\n#' @details\n#' The original sample is randomly partitioned into `nfold` equal size subsamples.\n#'\n#' Of the `nfold` subsamples, a single subsample is retained as the validation data for testing the model,\n#' and the remaining `nfold - 1` subsamples are used as training data.\n#'\n#' The cross-validation process is then repeated `nrounds` times, with each of the\n#' `nfold` subsamples used exactly once as the validation data.\n#'\n#' All observations are used for both training and validation.\n#'\n#' Adapted from \\url{https://en.wikipedia.org/wiki/Cross-validation_\\%28statistics\\%29}\n#'\n#' @return\n#'   An object of class 'xgb.cv.synchronous' with the following elements:\n#'   - `call`: Function call.\n#'   - `params`: Parameters that were passed to the xgboost library. Note that it does not\n#'     capture parameters changed by the [xgb.cb.reset.parameters()] callback.\n#'   - `evaluation_log`: Evaluation history stored as a `data.table` with the\n#'     first column corresponding to iteration number and the rest corresponding to the\n#'     CV-based evaluation means and standard deviations for the training and test CV-sets.\n#'     It is created by the [xgb.cb.evaluation.log()] callback.\n#'   - `niter`: Number of boosting iterations.\n#'   - `nfeatures`: Number of features in training data.\n#'   - `folds`: The list of CV folds' indices - either those passed through the `folds`\n#'      parameter or randomly generated.\n#'\n#'   Plus other potential elements that are the result of callbacks, such as a list `cv_predict` with\n#'   a sub-element `pred` when passing `prediction = TRUE`, which is added by the [xgb.cb.cv.predict()]\n#'   callback (note that one can also pass it manually under `callbacks` with different settings,\n#'   such as saving also the models created during cross validation); or a list `early_stop` which\n#'   will contain elements such as `best_iteration` when using the early stopping callback ([xgb.cb.early.stop()]).\n#'\n#' @examples\n#' data(agaricus.train, package = \"xgboost\")\n#'\n#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))\n#'\n#' cv <- xgb.cv(\n#'   data = dtrain,\n#'   nrounds = 20,\n#'   early_stopping_rounds = 1,\n#'   params = xgb.params(\n#'     nthread = 2,\n#'     max_depth = 3,\n#'     objective = \"binary:logistic\"\n#'   ),\n#'   nfold = 5,\n#'   metrics = list(\"rmse\",\"auc\"),\n#'   prediction = TRUE\n#' )\n#' print(cv)\n#' print(cv, verbose = TRUE)\n#'\n#' # Callbacks might add additional attributes, separated by the name of the callback\n#' cv$early_stop$best_iteration\n#' head(cv$cv_predict$pred)\n#' @export\nxgb.cv <- function(params = xgb.params(), data, nrounds, nfold,\n                   prediction = FALSE, showsd = TRUE, metrics = list(),\n                   objective = NULL, custom_metric = NULL, stratified = \"auto\",\n                   folds = NULL, train_folds = NULL, verbose = TRUE, print_every_n = 1L,\n                   early_stopping_rounds = NULL, maximize = NULL, callbacks = list(), ...) {\n  check.deprecation(deprecated_cv_params, match.call(), ...)\n\n  stopifnot(inherits(data, \"xgb.DMatrix\"))\n\n  if (inherits(data, \"xgb.DMatrix\") && .Call(XGCheckNullPtr_R, data)) {\n    stop(\"'data' is an invalid 'xgb.DMatrix' object. Must be constructed again.\")\n  }\n  if (inherits(data, \"xgb.QuantileDMatrix\")) {\n    stop(\"'xgb.QuantileDMatrix' is not supported as input to 'xgb.cv'.\")\n  }\n\n  params <- check.booster.params(params)\n  # TODO: should we deprecate the redundant 'metrics' parameter?\n  for (m in metrics)\n    params <- c(params, list(\"eval_metric\" = m))\n\n  tmp <- check.custom.obj(params, objective)\n  params <- tmp$params\n  objective <- tmp$objective\n  tmp <- check.custom.eval(params, custom_metric, maximize, early_stopping_rounds, callbacks)\n  params <- tmp$params\n  custom_metric <- tmp$custom_metric\n\n  if (stratified == \"auto\") {\n    if (is.character(params$objective)) {\n      stratified <- (\n        (params$objective %in% .CLASSIFICATION_OBJECTIVES())\n        && !(params$objective %in% .RANKING_OBJECTIVES())\n      )\n    } else {\n      stratified <- FALSE\n    }\n  }\n\n  # Check the labels and groups\n  cv_label <- getinfo(data, \"label\")\n  cv_group <- getinfo(data, \"group\")\n  if (!is.null(train_folds) && NROW(cv_group)) {\n    stop(\"'train_folds' is not supported for DMatrix object with 'group' field.\")\n  }\n\n  # CV folds\n  if (!is.null(folds)) {\n    if (!is.list(folds) || length(folds) < 2)\n      stop(\"'folds' must be a list with 2 or more elements that are vectors of indices for each CV-fold\")\n    nfold <- length(folds)\n  } else {\n    if (nfold <= 1)\n      stop(\"'nfold' must be > 1\")\n    folds <- generate.cv.folds(nfold, nrow(data), stratified, cv_label, cv_group, params)\n  }\n\n  # Callbacks\n  tmp <- .process.callbacks(callbacks, is_cv = TRUE)\n  callbacks <- tmp$callbacks\n  cb_names <- tmp$cb_names\n  rm(tmp)\n\n  # Early stopping callback\n  if (!is.null(early_stopping_rounds) && !(\"early_stop\" %in% cb_names)) {\n    callbacks <- add.callback(\n      callbacks,\n      xgb.cb.early.stop(\n        early_stopping_rounds,\n        maximize = maximize,\n        verbose = verbose,\n        save_best = FALSE\n      ),\n      as_first_elt = TRUE\n    )\n  }\n  # verbosity & evaluation printing callback:\n  params <- c(params, list(silent = 1))\n  print_every_n <- max(as.integer(print_every_n), 1L)\n  if (verbose && !(\"print_evaluation\" %in% cb_names)) {\n    callbacks <- add.callback(callbacks, xgb.cb.print.evaluation(print_every_n, showsd = showsd))\n  }\n  # evaluation log callback: always is on in CV\n  if (!(\"evaluation_log\" %in% cb_names)) {\n    callbacks <- add.callback(callbacks, xgb.cb.evaluation.log())\n  }\n  # CV-predictions callback\n  if (prediction && !(\"cv_predict\" %in% cb_names)) {\n    callbacks <- add.callback(callbacks, xgb.cb.cv.predict(save_models = FALSE))\n  }\n\n  # create the booster-folds\n  # train_folds\n  dall <- data\n  bst_folds <- lapply(seq_along(folds), function(k) {\n    dtest <- xgb.slice.DMatrix(dall, folds[[k]], allow_groups = TRUE)\n    # code originally contributed by @RolandASc on stackoverflow\n    if (is.null(train_folds))\n       dtrain <- xgb.slice.DMatrix(dall, unlist(folds[-k]), allow_groups = TRUE)\n    else\n       dtrain <- xgb.slice.DMatrix(dall, train_folds[[k]], allow_groups = TRUE)\n    if (!is.null(attributes(folds[[k]])$group_test)) {\n      setinfo(dtest, \"group\", attributes(folds[[k]])$group_test)\n      setinfo(dtrain, \"group\", attributes(folds[[k]])$group_train)\n    }\n    bst <- xgb.Booster(\n      params = params,\n      cachelist = list(dtrain, dtest),\n      modelfile = NULL\n    )\n    bst <- bst$bst\n    list(dtrain = dtrain, bst = bst, evals = list(train = dtrain, test = dtest), index = folds[[k]])\n  })\n\n  # extract parameters that can affect the relationship b/w #trees and #iterations\n  num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1) # nolint\n\n  # those are fixed for CV (no training continuation)\n  begin_iteration <- 1\n  end_iteration <- nrounds\n\n  .execute.cb.before.training(\n    callbacks,\n    bst_folds,\n    dall,\n    NULL,\n    begin_iteration,\n    end_iteration\n  )\n\n  # synchronous CV boosting: run CV folds' models within each iteration\n  for (iteration in begin_iteration:end_iteration) {\n\n    .execute.cb.before.iter(\n      callbacks,\n      bst_folds,\n      dall,\n      NULL,\n      iteration\n    )\n\n    msg <- lapply(bst_folds, function(fd) {\n      xgb.iter.update(\n        bst = fd$bst,\n        dtrain = fd$dtrain,\n        iter = iteration - 1,\n        objective = objective\n      )\n      xgb.iter.eval(\n        bst = fd$bst,\n        evals = fd$evals,\n        iter = iteration - 1,\n        custom_metric = custom_metric\n      )\n    })\n    msg <- simplify2array(msg)\n\n    should_stop <- .execute.cb.after.iter(\n      callbacks,\n      bst_folds,\n      dall,\n      NULL,\n      iteration,\n      msg\n    )\n\n    if (should_stop) break\n  }\n\n  cb_outputs <- .execute.cb.after.training(\n    callbacks,\n    bst_folds,\n    dall,\n    NULL,\n    iteration,\n    msg\n  )\n\n  # Just in case if the model is referenced in callbacks.\n  lapply(bst_folds, function(fd) {\n    xgb.reset.Booster(fd$bst)\n  })\n\n  # the CV result\n  ret <- list(\n    call = match.call(),\n    params = params,\n    niter = iteration,\n    nfeatures = ncol(dall),\n    folds = folds\n  )\n  ret <- c(ret, cb_outputs)\n\n  class(ret) <- 'xgb.cv.synchronous'\n  return(invisible(ret))\n}\n\n\n\n#' Print xgb.cv result\n#'\n#' Prints formatted results of [xgb.cv()].\n#'\n#' @param x An `xgb.cv.synchronous` object.\n#' @param verbose Whether to print detailed data.\n#' @param ... Passed to `data.table.print()`.\n#'\n#' @details\n#' When not verbose, it would only print the evaluation results,\n#' including the best iteration (when available).\n#'\n#' @examples\n#' data(agaricus.train, package = \"xgboost\")\n#'\n#' train <- agaricus.train\n#' cv <- xgb.cv(\n#'   data = xgb.DMatrix(train$data, label = train$label, nthread = 1),\n#'   nfold = 5,\n#'   nrounds = 2,\n#'   params = xgb.params(\n#'     max_depth = 2,\n#'     nthread = 2,\n#'     objective = \"binary:logistic\"\n#'   )\n#' )\n#' print(cv)\n#' print(cv, verbose = TRUE)\n#'\n#' @rdname print.xgb.cv\n#' @method print xgb.cv.synchronous\n#' @export\nprint.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) {\n  cat('##### xgb.cv ', length(x$folds), '-folds\\n', sep = '')\n\n  if (verbose) {\n    if (!is.null(x$call)) {\n      cat('call:\\n  ')\n      print(x$call)\n    }\n    if (!is.null(x$params)) {\n      cat('params (as set within xgb.cv):\\n')\n      cat('  ',\n          paste(names(x$params),\n                paste0('\"', unlist(x$params), '\"'),\n                sep = ' = ', collapse = ', '), '\\n', sep = '')\n    }\n\n    for (n in c('niter', 'best_iteration')) {\n      if (is.null(x$early_stop[[n]]))\n        next\n      cat(n, ': ', x$early_stop[[n]], '\\n', sep = '')\n    }\n\n    if (!is.null(x$cv_predict$pred)) {\n      cat('pred:\\n')\n      str(x$cv_predict$pred)\n    }\n  }\n\n  if (verbose)\n    cat('evaluation_log:\\n')\n  print(x$evaluation_log, row.names = FALSE, ...)\n\n  if (!is.null(x$early_stop$best_iteration)) {\n    cat('Best iteration:\\n')\n    print(x$evaluation_log[x$early_stop$best_iteration], row.names = FALSE, ...)\n  }\n  invisible(x)\n}\n"
  },
  {
    "path": "R-package/R/xgb.dump.R",
    "content": "#' Dump an XGBoost model in text format.\n#'\n#' Dump an XGBoost model in text format.\n#'\n#' @param model The model object.\n#' @param fname The name of the text file where to save the model text dump.\n#'   If not provided or set to `NULL`, the model is returned as a character vector.\n#' @param fmap Feature map file representing feature types. See demo/ for a walkthrough\n#'   example in R, and \\url{https://github.com/dmlc/xgboost/blob/master/demo/data/featmap.txt}\n#'   to see an example of the value.\n#' @param with_stats Whether to dump some additional statistics about the splits.\n#'   When this option is on, the model dump contains two additional values:\n#'   gain is the approximate loss function gain we get in each split;\n#'   cover is the sum of second order gradient in each node.\n#' @param dump_format Either 'text', 'json', or 'dot' (graphviz) format could be specified.\n#'\n#'   Format 'dot' for a single tree can be passed directly to packages that consume this format\n#'   for graph visualization, such as function `DiagrammeR::grViz()`\n#' @inheritParams xgb.train\n#' @return\n#' If fname is not provided or set to `NULL` the function will return the model\n#' as a character vector. Otherwise it will return `TRUE`.\n#'\n#' @examples\n#' \\dontshow{RhpcBLASctl::omp_set_num_threads(1)}\n#' data(agaricus.train, package = \"xgboost\")\n#' data(agaricus.test, package = \"xgboost\")\n#'\n#' train <- agaricus.train\n#' test <- agaricus.test\n#'\n#' bst <- xgb.train(\n#'   data = xgb.DMatrix(train$data, label = train$label, nthread = 1),\n#'   nrounds = 2,\n#'   params = xgb.params(\n#'     max_depth = 2,\n#'     nthread = 2,\n#'     objective = \"binary:logistic\"\n#'   )\n#' )\n#'\n#' # save the model in file 'xgb.model.dump'\n#' dump_path = file.path(tempdir(), 'model.dump')\n#' xgb.dump(bst, dump_path, with_stats = TRUE)\n#'\n#' # print the model without saving it to a file\n#' print(xgb.dump(bst, with_stats = TRUE))\n#'\n#' # print in JSON format:\n#' cat(xgb.dump(bst, with_stats = TRUE, dump_format = \"json\"))\n#'\n#' # plot first tree leveraging the 'dot' format\n#' if (requireNamespace('DiagrammeR', quietly = TRUE)) {\n#'   DiagrammeR::grViz(xgb.dump(bst, dump_format = \"dot\")[[1L]])\n#' }\n#' @export\nxgb.dump <- function(model, fname = NULL, fmap = \"\", with_stats = FALSE,\n                     dump_format = c(\"text\", \"json\", \"dot\"), ...) {\n  check.deprecation(deprecated_dump_params, match.call(), ...)\n  dump_format <- match.arg(dump_format)\n  if (!inherits(model, \"xgb.Booster\"))\n    stop(\"model: argument must be of type xgb.Booster\")\n  if (!(is.null(fname) || is.character(fname)))\n    stop(\"fname: argument must be a character string (when provided)\")\n  if (!(is.null(fmap) || is.character(fmap)))\n    stop(\"fmap: argument must be a character string (when provided)\")\n\n  model_dump <- .Call(\n    XGBoosterDumpModel_R,\n    xgb.get.handle(model),\n    NVL(fmap, \"\")[1],\n    as.integer(with_stats),\n    as.character(dump_format)\n  )\n  if (dump_format == \"dot\") {\n    return(sapply(model_dump, function(x) gsub(\"^booster\\\\[\\\\d+\\\\]\\\\n\", \"\\\\1\", x)))\n  }\n\n  if (is.null(fname))\n    model_dump <- gsub('\\t', '', model_dump, fixed = TRUE)\n\n  if (dump_format == \"text\")\n    model_dump <- unlist(strsplit(model_dump, '\\n', fixed = TRUE))\n\n  model_dump <- grep('^\\\\s*$', model_dump, invert = TRUE, value = TRUE)\n\n  if (is.null(fname)) {\n    return(model_dump)\n  } else {\n    fname <- path.expand(fname)\n    writeLines(model_dump, fname[1])\n    return(TRUE)\n  }\n}\n"
  },
  {
    "path": "R-package/R/xgb.ggplot.R",
    "content": "# ggplot backend for the xgboost plotting facilities\n\n#' @rdname xgb.plot.importance\n#' @export\nxgb.ggplot.importance <- function(importance_matrix = NULL, top_n = NULL, measure = NULL,\n                                  rel_to_first = FALSE, n_clusters = seq_len(10), ...) {\n\n  importance_matrix <- xgb.plot.importance(importance_matrix, top_n = top_n, measure = measure,\n                                           rel_to_first = rel_to_first, plot = FALSE, ...)\n  if (!requireNamespace(\"ggplot2\", quietly = TRUE)) {\n    stop(\"ggplot2 package is required\", call. = FALSE)\n  }\n  if (!requireNamespace(\"Ckmeans.1d.dp\", quietly = TRUE)) {\n    stop(\"Ckmeans.1d.dp package is required\", call. = FALSE)\n  }\n\n  clusters <- suppressWarnings(\n    Ckmeans.1d.dp::Ckmeans.1d.dp(importance_matrix$Importance, n_clusters)\n  )\n  importance_matrix[, Cluster := as.character(clusters$cluster)]\n\n  plot <-\n    ggplot2::ggplot(importance_matrix,\n                    ggplot2::aes(x = factor(Feature, levels = rev(Feature)), y = Importance, width = 0.5),\n                    environment = environment()) +\n    ggplot2::geom_bar(ggplot2::aes(fill = Cluster), stat = \"identity\", position = \"identity\") +\n    ggplot2::coord_flip() +\n    ggplot2::xlab(\"Features\") +\n    ggplot2::ggtitle(\"Feature importance\") +\n    ggplot2::theme(plot.title = ggplot2::element_text(lineheight = .9, face = \"bold\"),\n                   panel.grid.major.y = ggplot2::element_blank())\n  return(plot)\n}\n\n\n#' @rdname xgb.plot.deepness\n#' @export\nxgb.ggplot.deepness <- function(model = NULL, which = c(\"2x1\", \"max.depth\", \"med.depth\", \"med.weight\")) {\n\n  if (!requireNamespace(\"ggplot2\", quietly = TRUE))\n    stop(\"ggplot2 package is required for plotting the graph deepness.\", call. = FALSE)\n\n  which <- match.arg(which)\n\n  dt_depths <- xgb.plot.deepness(model = model, plot = FALSE)\n  dt_summaries <- dt_depths[, .(.N, Cover = mean(Cover)), Depth]\n  setkey(dt_summaries, 'Depth')\n\n  if (which == \"2x1\") {\n    p1 <-\n      ggplot2::ggplot(dt_summaries) +\n      ggplot2::geom_bar(ggplot2::aes(x = Depth, y = N), stat = \"Identity\") +\n      ggplot2::xlab(\"\") +\n      ggplot2::ylab(\"Number of leafs\") +\n      ggplot2::ggtitle(\"Model complexity\") +\n      ggplot2::theme(\n        plot.title = ggplot2::element_text(lineheight = 0.9, face = \"bold\"),\n        panel.grid.major.y = ggplot2::element_blank(),\n        axis.ticks = ggplot2::element_blank(),\n        axis.text.x = ggplot2::element_blank()\n      )\n\n    p2 <-\n      ggplot2::ggplot(dt_summaries) +\n      ggplot2::geom_bar(ggplot2::aes(x = Depth, y = Cover), stat = \"Identity\") +\n      ggplot2::xlab(\"Leaf depth\") +\n      ggplot2::ylab(\"Weighted cover\")\n\n    multiplot(p1, p2, cols = 1)\n    return(invisible(list(p1, p2)))\n\n  } else if (which == \"max.depth\") {\n    p <-\n      ggplot2::ggplot(dt_depths[, max(Depth), Tree]) +\n      ggplot2::geom_jitter(ggplot2::aes(x = Tree, y = V1),\n                           height = 0.15, alpha = 0.4, size = 3, stroke = 0) +\n      ggplot2::xlab(\"tree #\") +\n      ggplot2::ylab(\"Max tree leaf depth\")\n    return(p)\n\n  } else if (which == \"med.depth\") {\n    p <-\n      ggplot2::ggplot(dt_depths[, median(as.numeric(Depth)), Tree]) +\n      ggplot2::geom_jitter(ggplot2::aes(x = Tree, y = V1),\n                           height = 0.15, alpha = 0.4, size = 3, stroke = 0) +\n      ggplot2::xlab(\"tree #\") +\n      ggplot2::ylab(\"Median tree leaf depth\")\n    return(p)\n\n  } else if (which == \"med.weight\") {\n    p <-\n      ggplot2::ggplot(dt_depths[, median(abs(Weight)), Tree]) +\n      ggplot2::geom_point(ggplot2::aes(x = Tree, y = V1),\n                          alpha = 0.4, size = 3, stroke = 0) +\n      ggplot2::xlab(\"tree #\") +\n      ggplot2::ylab(\"Median absolute leaf weight\")\n    return(p)\n  }\n}\n\n#' @rdname xgb.plot.shap.summary\n#' @export\nxgb.ggplot.shap.summary <- function(data, shap_contrib = NULL, features = NULL, top_n = 10, model = NULL,\n                                    trees = NULL, target_class = NULL, approxcontrib = FALSE, subsample = NULL) {\n  if (inherits(data, \"xgb.DMatrix\")) {\n    stop(\n      \"'xgb.ggplot.shap.summary' is not compatible with 'xgb.DMatrix' objects. Try passing a matrix or data.frame.\"\n    )\n  }\n  cols_categ <- NULL\n  if (!is.null(model)) {\n    ftypes <- getinfo(model, \"feature_type\")\n    if (NROW(ftypes)) {\n      if (length(ftypes) != ncol(data)) {\n        stop(sprintf(\"'data' has incorrect number of columns (expected: %d, got: %d).\", length(ftypes), ncol(data)))\n      }\n      cols_categ <- colnames(data)[ftypes == \"c\"]\n    }\n  } else if (inherits(data, \"data.frame\")) {\n    cols_categ <- names(data)[sapply(data, function(x) is.factor(x) || is.character(x))]\n  }\n  if (NROW(cols_categ)) {\n    warning(\"Categorical features are ignored in 'xgb.ggplot.shap.summary'.\")\n  }\n\n  data_list <- xgb.shap.data(\n    data = data,\n    shap_contrib = shap_contrib,\n    features = features,\n    top_n = top_n,\n    model = model,\n    trees = trees,\n    target_class = target_class,\n    approxcontrib = approxcontrib,\n    subsample = subsample,\n    max_observations = 10000  # 10,000 samples per feature.\n  )\n  if (NROW(cols_categ)) {\n    data_list <- lapply(data_list, function(x) x[, !(colnames(x) %in% cols_categ), drop = FALSE])\n  }\n\n  p_data <- prepare.ggplot.shap.data(data_list, normalize = TRUE)\n  # Reverse factor levels so that the first level is at the top of the plot\n  p_data[, \"feature\" := factor(feature, rev(levels(feature)))]\n  p <- ggplot2::ggplot(p_data, ggplot2::aes(x = feature, y = p_data$shap_value, colour = p_data$feature_value)) +\n    ggplot2::geom_jitter(alpha = 0.5, width = 0.1) +\n    ggplot2::scale_colour_viridis_c(limits = c(-3, 3), option = \"plasma\", direction = -1) +\n    ggplot2::geom_abline(slope = 0, intercept = 0, colour = \"darkgrey\") +\n    ggplot2::coord_flip()\n\n  p\n}\n\n#' Combine feature values and SHAP values\n#'\n#' Internal function used to combine and melt feature values and SHAP contributions\n#' as required for ggplot functions related to SHAP.\n#'\n#' @param data_list The result of `xgb.shap.data()`.\n#' @param normalize Whether to standardize feature values to mean 0 and\n#'   standard deviation 1. This is useful for comparing multiple features on the same\n#'   plot. Default is `FALSE`. Note that it cannot be used when the data contains\n#'   categorical features.\n#' @return A `data.table` containing the observation ID, the feature name, the\n#'   feature value (normalized if specified), and the SHAP contribution value.\n#' @noRd\n#' @keywords internal\nprepare.ggplot.shap.data <- function(data_list, normalize = FALSE) {\n  data <- data_list[[\"data\"]]\n  shap_contrib <- data_list[[\"shap_contrib\"]]\n\n  data <- data.table::as.data.table(as.matrix(data))\n  if (normalize) {\n    data[, (names(data)) := lapply(.SD, normalize)]\n  }\n  data[, \"id\" := seq_len(nrow(data))]\n  data_m <- data.table::melt.data.table(data, id.vars = \"id\", variable.name = \"feature\", value.name = \"feature_value\")\n\n  shap_contrib <- data.table::as.data.table(as.matrix(shap_contrib))\n  shap_contrib[, \"id\" := seq_len(nrow(shap_contrib))]\n  shap_contrib_m <- data.table::melt.data.table(shap_contrib, id.vars = \"id\", variable.name = \"feature\", value.name = \"shap_value\")\n\n  p_data <- data.table::merge.data.table(data_m, shap_contrib_m, by = c(\"id\", \"feature\"))\n\n  p_data\n}\n\n#' Scale feature values\n#'\n#' Internal function that scales feature values to mean 0 and standard deviation 1.\n#' Useful to compare multiple features on the same plot.\n#'\n#' @param x Numeric vector.\n#' @return Numeric vector with mean 0 and standard deviation 1.\n#' @noRd\n#' @keywords internal\nnormalize <- function(x) {\n  loc <- mean(x, na.rm = TRUE)\n  scale <- stats::sd(x, na.rm = TRUE)\n\n  (x - loc) / scale\n}\n\n# Plot multiple ggplot graph aligned by rows and columns.\n# ... the plots\n# cols number of columns\n# internal utility function\nmultiplot <- function(..., cols) {\n  plots <- list(...)\n  num_plots <- length(plots)\n\n  layout <- matrix(seq(1, cols * ceiling(num_plots / cols)),\n                   ncol = cols, nrow = ceiling(num_plots / cols))\n\n  if (num_plots == 1) {\n    print(plots[[1]])\n  } else {\n    grid::grid.newpage()\n    grid::pushViewport(grid::viewport(layout = grid::grid.layout(nrow(layout), ncol(layout))))\n    for (i in 1:num_plots) {\n      # Get the i,j matrix positions of the regions that contain this subplot\n      matchidx <- as.data.table(which(layout == i, arr.ind = TRUE))\n\n      print(\n        plots[[i]], vp = grid::viewport(\n          layout.pos.row = matchidx$row,\n          layout.pos.col = matchidx$col\n        )\n      )\n    }\n  }\n}\n\nglobalVariables(c(\n  \"Cluster\", \"ggplot\", \"aes\", \"geom_bar\", \"coord_flip\", \"xlab\", \"ylab\", \"ggtitle\", \"theme\",\n  \"element_blank\", \"element_text\", \"V1\", \"Weight\", \"feature\"\n))\n"
  },
  {
    "path": "R-package/R/xgb.importance.R",
    "content": "#' Feature importance\n#'\n#' Creates a `data.table` of feature importances.\n#'\n#' @details\n#' This function works for both linear and tree models.\n#'\n#' For linear models, the importance is the absolute magnitude of linear coefficients.\n#' To obtain a meaningful ranking by importance for linear models, the features need to\n#' be on the same scale (which is also recommended when using L1 or L2 regularization).\n#'\n#' @param feature_names Character vector used to overwrite the feature names\n#'   of the model. The default is `NULL` (use original feature names).\n#' @param model Object of class `xgb.Booster`.\n#' @param trees An integer vector of (base-1) tree indices that should be included\n#'   into the importance calculation (only for the \"gbtree\" booster).\n#'   The default (`NULL`) parses all trees.\n#'   It could be useful, e.g., in multiclass classification to get feature importances\n#'   for each class separately.\n#' @return A `data.table` with the following columns:\n#'\n#' For a tree model:\n#' - `Features`: Names of the features used in the model.\n#' - `Gain`: Fractional contribution of each feature to the model based on\n#'    the total gain of this feature's splits. Higher percentage means higher importance.\n#' - `Cover`: Metric of the number of observation related to this feature.\n#' - `Frequency`: Percentage of times a feature has been used in trees.\n#'\n#' For a linear model:\n#' - `Features`: Names of the features used in the model.\n#' - `Weight`: Linear coefficient of this feature.\n#' - `Class`: Class label (only for multiclass models). For objects of class `xgboost` (as\n#'   produced by [xgboost()]), it will be a `factor`, while for objects of class `xgb.Booster`\n#'   (as produced by [xgb.train()]), it will be a zero-based integer vector.\n#'\n#' If `feature_names` is not provided and `model` doesn't have `feature_names`,\n#' the index of the features will be used instead. Because the index is extracted from the model dump\n#' (based on C++ code), it starts at 0 (as in C/C++ or Python) instead of 1 (usual in R).\n#'\n#' @examples\n#' # binary classification using \"gbtree\":\n#' data(\"ToothGrowth\")\n#' x <- ToothGrowth[, c(\"len\", \"dose\")]\n#' y <- ToothGrowth$supp\n#' model_tree_binary <- xgboost(\n#'   x, y,\n#'   nrounds = 5L,\n#'   nthreads = 1L,\n#'   booster = \"gbtree\",\n#'   max_depth = 2L\n#' )\n#' xgb.importance(model_tree_binary)\n#'\n#' # binary classification using \"gblinear\":\n#' model_tree_linear <- xgboost(\n#'   x, y,\n#'   nrounds = 5L,\n#'   nthreads = 1L,\n#'   booster = \"gblinear\",\n#'   learning_rate = 0.3\n#' )\n#' xgb.importance(model_tree_linear)\n#'\n#' # multi-class classification using \"gbtree\":\n#' data(\"iris\")\n#' x <- iris[, c(\"Sepal.Length\", \"Sepal.Width\", \"Petal.Length\", \"Petal.Width\")]\n#' y <- iris$Species\n#' model_tree_multi <- xgboost(\n#'   x, y,\n#'   nrounds = 5L,\n#'   nthreads = 1L,\n#'   booster = \"gbtree\",\n#'   max_depth = 3\n#' )\n#' # all classes clumped together:\n#' xgb.importance(model_tree_multi)\n#' # inspect importances separately for each class:\n#' num_classes <- 3L\n#' nrounds <- 5L\n#' xgb.importance(\n#'   model_tree_multi, trees = seq(from = 1, by = num_classes, length.out = nrounds)\n#' )\n#' xgb.importance(\n#'   model_tree_multi, trees = seq(from = 2, by = num_classes, length.out = nrounds)\n#' )\n#' xgb.importance(\n#'   model_tree_multi, trees = seq(from = 3, by = num_classes, length.out = nrounds)\n#' )\n#'\n#' # multi-class classification using \"gblinear\":\n#' model_linear_multi <- xgboost(\n#'   x, y,\n#'   nrounds = 5L,\n#'   nthreads = 1L,\n#'   booster = \"gblinear\",\n#'   learning_rate = 0.2\n#' )\n#' xgb.importance(model_linear_multi)\n#' @export\nxgb.importance <- function(model = NULL, feature_names = getinfo(model, \"feature_name\"), trees = NULL) {\n\n  if (!(is.null(feature_names) || is.character(feature_names)))\n    stop(\"feature_names: Has to be a character vector\")\n\n  if (!is.null(trees)) {\n    if (!is.vector(trees)) {\n      stop(\"'trees' must be a vector of tree indices.\")\n    }\n    trees <- trees - 1L\n    if (anyNA(trees)) {\n      stop(\"Passed invalid tree indices.\")\n    }\n  }\n\n  handle <- xgb.get.handle(model)\n  if (xgb.booster_type(model) == \"gblinear\") {\n    args <- list(importance_type = \"weight\", feature_names = feature_names)\n    results <- .Call(\n      XGBoosterFeatureScore_R, handle, jsonlite::toJSON(args, auto_unbox = TRUE, null = \"null\")\n    )\n    names(results) <- c(\"features\", \"shape\", \"weight\")\n    if (length(results$shape) == 2) {\n        n_classes <- results$shape[2]\n    } else {\n        n_classes <- 0\n    }\n    importance <- if (n_classes == 0) {\n      return(data.table(Feature = results$features, Weight = results$weight)[order(-abs(Weight))])\n    } else {\n      out <- data.table(\n        Feature = rep(results$features, each = n_classes), Weight = results$weight, Class = seq_len(n_classes) - 1\n      )[order(Class, -abs(Weight))]\n      if (inherits(model, \"xgboost\") && NROW(attributes(model)$metadata$y_levels)) {\n        class_vec <- out$Class\n        class_vec <- as.integer(class_vec) + 1L\n        attributes(class_vec)$levels <- attributes(model)$metadata$y_levels\n        attributes(class_vec)$class <- \"factor\"\n        out[, Class := class_vec]\n      }\n      return(out[])\n    }\n  } else {\n    concatenated <- list()\n    output_names <- vector()\n    for (importance_type in c(\"weight\", \"total_gain\", \"total_cover\")) {\n      args <- list(importance_type = importance_type, feature_names = feature_names, tree_idx = trees)\n      results <- .Call(\n        XGBoosterFeatureScore_R, handle, jsonlite::toJSON(args, auto_unbox = TRUE, null = \"null\")\n      )\n      names(results) <- c(\"features\", \"shape\", importance_type)\n      concatenated[\n        switch(importance_type, \"weight\" = \"Frequency\", \"total_gain\" = \"Gain\", \"total_cover\" = \"Cover\")\n      ] <- results[importance_type]\n      output_names <- results$features\n    }\n    importance <- data.table(\n        Feature = output_names,\n        Gain = concatenated$Gain / sum(concatenated$Gain),\n        Cover = concatenated$Cover / sum(concatenated$Cover),\n        Frequency = concatenated$Frequency / sum(concatenated$Frequency)\n    )[order(Gain, decreasing = TRUE)]\n  }\n  importance\n}\n\n# Avoid error messages during CRAN check.\n# The reason is that these variables are never declared\n# They are mainly column names inferred by Data.table...\nglobalVariables(c(\".\", \".N\", \"Gain\", \"Cover\", \"Frequency\", \"Feature\", \"Class\"))\n"
  },
  {
    "path": "R-package/R/xgb.load.R",
    "content": "#' Load XGBoost model from binary file\n#'\n#' Load XGBoost model from binary model file.\n#'\n#' @param modelfile The name of the binary input file.\n#'\n#' @details\n#' The input file is expected to contain a model saved in an XGBoost model format\n#' using either [xgb.save()] in R, or using some\n#' appropriate methods from other XGBoost interfaces. E.g., a model trained in Python and\n#' saved from there in XGBoost format, could be loaded from R.\n#'\n#' Note: a model saved as an R object has to be loaded using corresponding R-methods,\n#' not by [xgb.load()].\n#'\n#' @return\n#' An object of `xgb.Booster` class.\n#'\n#' @seealso [xgb.save()]\n#'\n#' @examples\n#' \\dontshow{RhpcBLASctl::omp_set_num_threads(1)}\n#' data(agaricus.train, package = \"xgboost\")\n#' data(agaricus.test, package = \"xgboost\")\n#'\n#' ## Keep the number of threads to 1 for examples\n#' nthread <- 1\n#' data.table::setDTthreads(nthread)\n#'\n#' train <- agaricus.train\n#' test <- agaricus.test\n#'\n#' bst <- xgb.train(\n#'   data = xgb.DMatrix(train$data, label = train$label, nthread = 1),\n#'   nrounds = 2,\n#'   params = xgb.params(\n#'     max_depth = 2,\n#'     nthread = nthread,\n#'     objective = \"binary:logistic\"\n#'   )\n#' )\n#'\n#' fname <- file.path(tempdir(), \"xgb.ubj\")\n#' xgb.save(bst, fname)\n#' bst <- xgb.load(fname)\n#' @export\nxgb.load <- function(modelfile) {\n  if (is.null(modelfile))\n    stop(\"xgb.load: modelfile cannot be NULL\")\n\n  bst <- xgb.Booster(\n    params = list(),\n    cachelist = list(),\n    modelfile = modelfile\n  )\n  bst <- bst$bst\n  # re-use modelfile if it is raw so we do not need to serialize\n  if (typeof(modelfile) == \"raw\") {\n    warning(\n      paste(\n        \"The support for loading raw booster with `xgb.load` will be \",\n        \"discontinued in upcoming release. Use `xgb.load.raw` instead. \"\n      )\n    )\n  }\n  return(bst)\n}\n"
  },
  {
    "path": "R-package/R/xgb.load.raw.R",
    "content": "#' Load serialised XGBoost model from R's raw vector\n#'\n#' User can generate raw memory buffer by calling [xgb.save.raw()].\n#'\n#' @param buffer The buffer returned by [xgb.save.raw()].\n#' @export\nxgb.load.raw <- function(buffer) {\n  cachelist <- list()\n  bst <- .Call(XGBoosterCreate_R, cachelist)\n  .Call(XGBoosterLoadModelFromRaw_R, xgb.get.handle(bst), buffer)\n  return(bst)\n}\n"
  },
  {
    "path": "R-package/R/xgb.model.dt.tree.R",
    "content": "#' Parse model text dump\n#'\n#' Parse a boosted tree model text dump into a `data.table` structure.\n#'\n#' Note that this function does not work with models that were fitted to\n#' categorical data, and is only applicable to tree-based boosters (not `gblinear`).\n#' @param model Object of class `xgb.Booster`. If it contains feature names (they can\n#'   be set through [setinfo()]), they will be used in the output from this function.\n#'\n#'   If the model contains categorical features, an error will be thrown.\n#' @param trees An integer vector of (base-1) tree indices that should be used. The default\n#'   (`NULL`) uses all trees. Useful, e.g., in multiclass classification to get only\n#'   the trees of one class.\n#' @param use_int_id A logical flag indicating whether nodes in columns \"Yes\", \"No\", and\n#'   \"Missing\" should be represented as integers (when `TRUE`) or as \"Tree-Node\"\n#'   character strings (when `FALSE`, default).\n#' @inheritParams xgb.train\n#' @return\n#' A `data.table` with detailed information about tree nodes. It has the following columns:\n#' - `Tree`: integer ID of a tree in a model (zero-based index).\n#' - `Node`: integer ID of a node in a tree (zero-based index).\n#' - `ID`: character identifier of a node in a model (only when `use_int_id = FALSE`).\n#' - `Feature`: for a branch node, a feature ID or name (when available);\n#'              for a leaf node, it simply labels it as `\"Leaf\"`.\n#' - `Split`: location of the split for a branch node (split condition is always \"less than\").\n#' - `Yes`: ID of the next node when the split condition is met.\n#' - `No`: ID of the next node when the split condition is not met.\n#' - `Missing`: ID of the next node when the branch value is missing.\n#' - `Gain`: either the split gain (change in loss) or the leaf value.\n#' - `Cover`: metric related to the number of observations either seen by a split\n#'            or collected by a leaf during training.\n#'\n#' When `use_int_id = FALSE`, columns \"Yes\", \"No\", and \"Missing\" point to model-wide node identifiers\n#' in the \"ID\" column. When `use_int_id = TRUE`, those columns point to node identifiers from\n#' the corresponding trees in the \"Node\" column.\n#'\n#' @examples\n#' # Basic use:\n#'\n#' data(agaricus.train, package = \"xgboost\")\n#' ## Keep the number of threads to 1 for examples\n#' nthread <- 1\n#' data.table::setDTthreads(nthread)\n#'\n#' bst <- xgb.train(\n#'   data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label, nthread = 1),\n#'   nrounds = 2,\n#'   params = xgb.params(\n#'     max_depth = 2,\n#'     nthread = nthread,\n#'     objective = \"binary:logistic\"\n#'   )\n#' )\n#'\n#' # This bst model already has feature_names stored with it, so those would be used when\n#' # feature_names is not set:\n#' dt <- xgb.model.dt.tree(bst)\n#'\n#' # How to match feature names of splits that are following a current 'Yes' branch:\n#' merge(\n#'   dt,\n#'   dt[, .(ID, Y.Feature = Feature)], by.x = \"Yes\", by.y = \"ID\", all.x = TRUE\n#' )[\n#'   order(Tree, Node)\n#' ]\n#'\n#' @export\nxgb.model.dt.tree <- function(model, trees = NULL, use_int_id = FALSE, ...) {\n  check.deprecation(deprecated_dttree_params, match.call(), ...)\n\n  if (!inherits(model, \"xgb.Booster\")) {\n    stop(\"Either 'model' must be an object of class xgb.Booster\")\n  }\n\n  if (xgb.has_categ_features(model)) {\n    stop(\"Cannot produce tables for models having categorical features.\")\n  }\n\n  if (!is.null(trees)) {\n    if (!is.vector(trees) || (!is.numeric(trees) && !is.integer(trees))) {\n      stop(\"trees: must be a vector of integers.\")\n    }\n    trees <- trees - 1L\n    if (anyNA(trees) || min(trees) < 0) {\n      stop(\"Passed invalid tree indices.\")\n    }\n  }\n\n  feature_names <- NULL\n  if (inherits(model, \"xgb.Booster\")) {\n    feature_names <- xgb.feature_names(model)\n  }\n\n  text <- xgb.dump(model = model, with_stats = TRUE)\n\n  if (length(text) < 2 || !any(grepl('leaf=(-?\\\\d+)', text))) {\n    stop(\"Non-tree model detected! This function can only be used with tree models.\")\n  }\n\n  position <- which(grepl(\"booster\", text, fixed = TRUE))\n\n  add.tree.id <- function(node, tree) if (use_int_id) node else paste(tree, node, sep = \"-\")\n\n  anynumber_regex <- \"[-+]?[0-9]*\\\\.?[0-9]+([eE][-+]?[0-9]+)?\"\n\n  td <- data.table(t = text)\n  td[position, Tree := 1L]\n  td[, Tree := cumsum(ifelse(is.na(Tree), 0L, Tree)) - 1L]\n\n  if (is.null(trees)) {\n    trees <- 0:max(td$Tree)\n  } else {\n    trees <- trees[trees >= 0 & trees <= max(td$Tree)]\n  }\n  td <- td[Tree %in% trees & !is.na(t) & !startsWith(t, 'booster')]\n\n  td[, Node := as.integer(sub(\"^([0-9]+):.*\", \"\\\\1\", t))]\n  if (!use_int_id) td[, ID := add.tree.id(Node, Tree)]\n  td[, isLeaf := grepl(\"leaf\", t, fixed = TRUE)]\n\n  # parse branch lines\n  branch_rx_nonames <- paste0(\"f(\\\\d+)<(\", anynumber_regex, \")\\\\] yes=(\\\\d+),no=(\\\\d+),missing=(\\\\d+),\",\n                              \"gain=(\", anynumber_regex, \"),cover=(\", anynumber_regex, \")\")\n  branch_rx_w_names <- paste0(\"\\\\d+:\\\\[(.+)<(\", anynumber_regex, \")\\\\] yes=(\\\\d+),no=(\\\\d+),missing=(\\\\d+),\",\n                              \"gain=(\", anynumber_regex, \"),cover=(\", anynumber_regex, \")\")\n  text_has_feature_names <- FALSE\n  if (NROW(feature_names)) {\n    branch_rx <- branch_rx_w_names\n    text_has_feature_names <- TRUE\n  } else {\n    branch_rx <- branch_rx_nonames\n  }\n  branch_cols <- c(\"Feature\", \"Split\", \"Yes\", \"No\", \"Missing\", \"Gain\", \"Cover\")\n  td[\n    isLeaf == FALSE,\n    (branch_cols) := {\n      matches <- regmatches(t, regexec(branch_rx, t))\n      # skip some indices with spurious capture groups from anynumber_regex\n      xtr <- do.call(rbind, matches)[, c(2, 3, 5, 6, 7, 8, 10), drop = FALSE]\n      xtr[, 3:5] <- add.tree.id(xtr[, 3:5], Tree)\n      if (length(xtr) == 0) {\n        as.data.table(\n          list(Feature = \"NA\", Split = \"NA\", Yes = \"NA\", No = \"NA\", Missing = \"NA\", Gain = \"NA\", Cover = \"NA\")\n        )\n      } else {\n        as.data.table(xtr)\n      }\n    }\n  ]\n\n  # assign feature_names when available\n  is_stump <- function() {\n    return(length(td$Feature) == 1 && is.na(td$Feature))\n  }\n  if (!text_has_feature_names) {\n    if (!is.null(feature_names) && !is_stump()) {\n      if (length(feature_names) <= max(as.numeric(td$Feature), na.rm = TRUE))\n        stop(\"feature_names has less elements than there are features used in the model\")\n      td[isLeaf == FALSE, Feature := feature_names[as.numeric(Feature) + 1]]\n    }\n  }\n\n  # parse leaf lines\n  leaf_rx <- paste0(\"leaf=(\", anynumber_regex, \"),cover=(\", anynumber_regex, \")\")\n  leaf_cols <- c(\"Feature\", \"Gain\", \"Cover\")\n  td[\n    isLeaf == TRUE,\n    (leaf_cols) := {\n      matches <- regmatches(t, regexec(leaf_rx, t))\n      xtr <- do.call(rbind, matches)[, c(2, 4)]\n      if (length(xtr) == 2) {\n        c(\"Leaf\", as.data.table(xtr[1]), as.data.table(xtr[2]))\n      } else {\n        c(\"Leaf\", as.data.table(xtr))\n      }\n    }\n  ]\n\n  # convert some columns to numeric\n  numeric_cols <- c(\"Split\", \"Gain\", \"Cover\")\n  td[, (numeric_cols) := lapply(.SD, as.numeric), .SDcols = numeric_cols]\n  if (use_int_id) {\n    int_cols <- c(\"Yes\", \"No\", \"Missing\")\n    td[, (int_cols) := lapply(.SD, as.integer), .SDcols = int_cols]\n  }\n\n  td[, t := NULL]\n  td[, isLeaf := NULL]\n\n  td[order(Tree, Node)]\n}\n\n# Avoid notes during CRAN check.\n# The reason is that these variables are never declared\n# They are mainly column names inferred by Data.table...\nglobalVariables(c(\"Tree\", \"Node\", \"ID\", \"Feature\", \"t\", \"isLeaf\", \".SD\", \".SDcols\"))\n"
  },
  {
    "path": "R-package/R/xgb.plot.deepness.R",
    "content": "#' Plot model tree depth\n#'\n#' Visualizes distributions related to the depth of tree leaves.\n#' - `xgb.plot.deepness()` uses base R graphics, while\n#' - `xgb.ggplot.deepness()` uses \"ggplot2\".\n#'\n#' @param model Either an `xgb.Booster` model, or the \"data.table\" returned\n#'   by [xgb.model.dt.tree()].\n#' @param which Which distribution to plot (see details).\n#' @param plot Should the plot be shown? Default is `TRUE`.\n#' @param ... Other parameters passed to [graphics::barplot()] or [graphics::plot()].\n#'\n#' @details\n#'\n#' When `which = \"2x1\"`, two distributions with respect to the leaf depth\n#' are plotted on top of each other:\n#' 1. The distribution of the number of leaves in a tree model at a certain depth.\n#' 2. The distribution of the average weighted number of observations (\"cover\")\n#'   ending up in leaves at a certain depth.\n#'\n#' Those could be helpful in determining sensible ranges of the `max_depth`\n#' and `min_child_weight` parameters.\n#'\n#' When `which = \"max.depth\"` or `which = \"med.depth\"`, plots of either maximum or\n#' median depth per tree with respect to the tree number are created.\n#'\n#' Finally, `which = \"med.weight\"` allows to see how\n#' a tree's median absolute leaf weight changes through the iterations.\n#'\n#' These functions have been inspired by the blog post\n#' <https://github.com/aysent/random-forest-leaf-visualization>.\n#'\n#' @return\n#' The return value of the two functions is as follows:\n#' - `xgb.plot.deepness()`: A \"data.table\" (invisibly).\n#'   Each row corresponds to a terminal leaf in the model. It contains its information\n#'   about depth, cover, and weight (used in calculating predictions).\n#'   If `plot = TRUE`, also a plot is shown.\n#' - `xgb.ggplot.deepness()`: When `which = \"2x1\"`, a list of two \"ggplot\" objects,\n#'   and a single \"ggplot\" object otherwise.\n#'\n#' @seealso [xgb.train()] and [xgb.model.dt.tree()].\n#'\n#' @examples\n#'\n#' data(agaricus.train, package = \"xgboost\")\n#' ## Keep the number of threads to 2 for examples\n#' nthread <- 2\n#' data.table::setDTthreads(nthread)\n#'\n#' ## Change max_depth to a higher number to get a more significant result\n#' model <- xgboost(\n#'   agaricus.train$data, factor(agaricus.train$label),\n#'   nrounds = 50,\n#'   max_depth = 6,\n#'   nthreads = nthread,\n#'   subsample = 0.5,\n#'   min_child_weight = 2\n#' )\n#'\n#' xgb.plot.deepness(model)\n#' xgb.ggplot.deepness(model)\n#'\n#' xgb.plot.deepness(\n#'   model, which = \"max.depth\", pch = 16, col = rgb(0, 0, 1, 0.3), cex = 2\n#' )\n#'\n#' xgb.plot.deepness(\n#'   model, which = \"med.weight\", pch = 16, col = rgb(0, 0, 1, 0.3), cex = 2\n#' )\n#'\n#' @rdname xgb.plot.deepness\n#' @export\nxgb.plot.deepness <- function(model = NULL, which = c(\"2x1\", \"max.depth\", \"med.depth\", \"med.weight\"),\n                              plot = TRUE, ...) {\n\n  if (!(inherits(model, \"xgb.Booster\") || is.data.table(model)))\n    stop(\"model: Has to be either an xgb.Booster model generaged by the xgb.train function\\n\",\n         \"or a data.table result of the xgb.importance function\")\n\n  if (!requireNamespace(\"igraph\", quietly = TRUE))\n    stop(\"igraph package is required for plotting the graph deepness.\", call. = FALSE)\n\n  which <- match.arg(which)\n\n  dt_tree <- model\n  if (inherits(model, \"xgb.Booster\"))\n    dt_tree <- xgb.model.dt.tree(model = model)\n\n  if (!all(c(\"Feature\", \"Tree\", \"ID\", \"Yes\", \"No\", \"Cover\") %in% colnames(dt_tree)))\n    stop(\"Model tree columns are not as expected!\\n\",\n         \"  Note that this function works only for tree models.\")\n\n  dt_depths <- merge(get.leaf.depth(dt_tree), dt_tree[, .(ID, Cover, Weight = Gain)], by = \"ID\")\n  setkeyv(dt_depths, c(\"Tree\", \"ID\"))\n  # count by depth levels, and also calculate average cover at a depth\n  dt_summaries <- dt_depths[, .(.N, Cover = mean(Cover)), Depth]\n  setkey(dt_summaries, \"Depth\")\n\n  if (plot) {\n    if (which == \"2x1\") {\n      op <- par(no.readonly = TRUE)\n      par(mfrow = c(2, 1),\n          oma = c(3, 1, 3, 1) + 0.1,\n          mar = c(1, 4, 1, 0) + 0.1)\n\n      dt_summaries[, barplot(N, border = NA, ylab = 'Number of leafs', ...)]\n\n      dt_summaries[, barplot(Cover, border = NA, ylab = \"Weighted cover\", names.arg = Depth, ...)]\n\n      title(\"Model complexity\", xlab = \"Leaf depth\", outer = TRUE, line = 1)\n      par(op)\n    } else if (which == \"max.depth\") {\n      dt_depths[, max(Depth), Tree][\n                , plot(jitter(V1, amount = 0.1) ~ Tree, ylab = 'Max tree leaf depth', xlab = \"tree #\", ...)]\n    } else if (which == \"med.depth\") {\n      dt_depths[, median(as.numeric(Depth)), Tree][\n                , plot(jitter(V1, amount = 0.1) ~ Tree, ylab = 'Median tree leaf depth', xlab = \"tree #\", ...)]\n    } else if (which == \"med.weight\") {\n      dt_depths[, median(abs(Weight)), Tree][\n                , plot(V1 ~ Tree, ylab = 'Median absolute leaf weight', xlab = \"tree #\", ...)]\n    }\n  }\n  invisible(dt_depths)\n}\n\n# Extract path depths from root to leaf\n# from data.table containing the nodes and edges of the trees.\n# internal utility function\nget.leaf.depth <- function(dt_tree) {\n  # extract tree graph's edges\n  dt_edges <- rbindlist(list(\n      dt_tree[Feature != \"Leaf\", .(ID, To = Yes, Tree)],\n      dt_tree[Feature != \"Leaf\", .(ID, To = No, Tree)]\n    ))\n  # whether \"To\" is a leaf:\n  dt_edges <-\n    merge(dt_edges,\n          dt_tree[Feature == \"Leaf\", .(ID, Leaf = TRUE)],\n          all.x = TRUE, by.x = \"To\", by.y = \"ID\")\n  dt_edges[is.na(Leaf), Leaf := FALSE]\n\n  dt_edges[, {\n    graph <- igraph::graph_from_data_frame(.SD[, .(ID, To)])\n    # min(ID) in a tree is a root node\n    paths_tmp <- igraph::shortest_paths(graph, from = min(ID), to = To[Leaf == TRUE])\n    # list of paths to each leaf in a tree\n    paths <- lapply(paths_tmp$vpath, names)\n    # combine into a resulting path lengths table for a tree\n    data.table(Depth = lengths(paths), ID = To[Leaf == TRUE])\n  }, by = Tree]\n}\n\n# Avoid error messages during CRAN check.\n# The reason is that these variables are never declared\n# They are mainly column names inferred by Data.table...\nglobalVariables(\n  c(\n    \".N\", \"N\", \"Depth\", \"Gain\", \"Cover\", \"Tree\", \"ID\", \"Yes\", \"No\", \"Feature\", \"Leaf\", \"Weight\"\n  )\n)\n"
  },
  {
    "path": "R-package/R/xgb.plot.importance.R",
    "content": "#' Plot feature importance\n#'\n#' Represents previously calculated feature importance as a bar graph.\n#' - `xgb.plot.importance()` uses base R graphics, while\n#' - `xgb.ggplot.importance()` uses \"ggplot\".\n#'\n#' @details\n#' The graph represents each feature as a horizontal bar of length proportional to the\n#' importance of a feature. Features are sorted by decreasing importance.\n#' It works for both \"gblinear\" and \"gbtree\" models.\n#'\n#' When `rel_to_first = FALSE`, the values would be plotted as in `importance_matrix`.\n#' For a \"gbtree\" model, that would mean being normalized to the total of 1\n#' (\"what is feature's importance contribution relative to the whole model?\").\n#' For linear models, `rel_to_first = FALSE` would show actual values of the coefficients.\n#' Setting `rel_to_first = TRUE` allows to see the picture from the perspective of\n#' \"what is feature's importance contribution relative to the most important feature?\"\n#'\n#' The \"ggplot\" backend performs 1-D clustering of the importance values,\n#' with bar colors corresponding to different clusters having similar importance values.\n#'\n#' @param importance_matrix A `data.table` as returned by [xgb.importance()].\n#' @param top_n Maximal number of top features to include into the plot.\n#' @param measure The name of importance measure to plot.\n#'   When `NULL`, 'Gain' would be used for trees and 'Weight' would be used for gblinear.\n#' @param rel_to_first Whether importance values should be represented as relative to\n#'   the highest ranked feature, see Details.\n#' @param left_margin Adjust the left margin size to fit feature names.\n#'   When `NULL`, the existing `par(\"mar\")` is used.\n#' @param cex Passed as `cex.names` parameter to [graphics::barplot()].\n#' @param plot Should the barplot be shown? Default is `TRUE`.\n#' @param n_clusters A numeric vector containing the min and the max range\n#'   of the possible number of clusters of bars.\n#' @param ... Other parameters passed to [graphics::barplot()]\n#'   (except `horiz`, `border`, `cex.names`, `names.arg`, and `las`).\n#'   Only used in `xgb.plot.importance()`.\n#' @return\n#' The return value depends on the function:\n#' - `xgb.plot.importance()`: Invisibly, a \"data.table\" with `n_top` features sorted\n#'   by importance. If `plot = TRUE`, the values are also plotted as barplot.\n#' - `xgb.ggplot.importance()`: A customizable \"ggplot\" object.\n#'   E.g., to change the title, set `+ ggtitle(\"A GRAPH NAME\")`.\n#'\n#' @seealso [graphics::barplot()]\n#'\n#' @examples\n#' data(agaricus.train)\n#'\n#' ## Keep the number of threads to 2 for examples\n#' nthread <- 2\n#' data.table::setDTthreads(nthread)\n#'\n#' model <- xgboost(\n#'   agaricus.train$data, factor(agaricus.train$label),\n#'   nrounds = 2,\n#'   max_depth = 3,\n#'   nthreads = nthread\n#' )\n#'\n#' importance_matrix <- xgb.importance(model)\n#' xgb.plot.importance(\n#'   importance_matrix, rel_to_first = TRUE, xlab = \"Relative importance\"\n#' )\n#'\n#' gg <- xgb.ggplot.importance(\n#'   importance_matrix, measure = \"Frequency\", rel_to_first = TRUE\n#' )\n#' gg\n#' gg + ggplot2::ylab(\"Frequency\")\n#'\n#' @rdname xgb.plot.importance\n#' @export\nxgb.plot.importance <- function(importance_matrix = NULL, top_n = NULL, measure = NULL,\n                                rel_to_first = FALSE, left_margin = 10, cex = NULL, plot = TRUE, ...) {\n  check.deprecation(deprecated_plotimp_params, match.call(), ..., allow_unrecognized = TRUE)\n  if (!is.data.table(importance_matrix))  {\n    stop(\"importance_matrix: must be a data.table\")\n  }\n\n  imp_names <- colnames(importance_matrix)\n  if (is.null(measure)) {\n    if (all(c(\"Feature\", \"Gain\") %in% imp_names)) {\n      measure <- \"Gain\"\n    } else if (all(c(\"Feature\", \"Weight\") %in% imp_names)) {\n      measure <- \"Weight\"\n    } else {\n      stop(\"Importance matrix column names are not as expected!\")\n    }\n  } else {\n    if (!measure %in% imp_names)\n      stop(\"Invalid `measure`\")\n    if (!\"Feature\" %in% imp_names)\n      stop(\"Importance matrix column names are not as expected!\")\n  }\n\n  # also aggregate, just in case when the values were not yet summed up by feature\n  importance_matrix <- importance_matrix[\n    , lapply(.SD, sum)\n    , .SDcols = setdiff(names(importance_matrix), \"Feature\")\n    , by = Feature\n  ][\n    , Importance := get(measure)\n  ]\n\n  # make sure it's ordered\n  importance_matrix <- importance_matrix[order(-abs(Importance))]\n\n  if (!is.null(top_n)) {\n    top_n <- min(top_n, nrow(importance_matrix))\n    importance_matrix <- head(importance_matrix, top_n)\n  }\n  if (rel_to_first) {\n    importance_matrix[, Importance := Importance / max(abs(Importance))]\n  }\n  if (is.null(cex)) {\n    cex <- 2.5 / log2(1 + nrow(importance_matrix))\n  }\n\n  if (plot) {\n    original_mar <- par()$mar\n\n    # reset margins so this function doesn't have side effects\n    on.exit({\n        par(mar = original_mar)\n    })\n\n    mar <- original_mar\n    if (!is.null(left_margin))\n      mar[2] <- left_margin\n    par(mar = mar)\n\n    # reverse the order of rows to have the highest ranked at the top\n    importance_matrix[rev(seq_len(nrow(importance_matrix))),\n                      barplot(Importance, horiz = TRUE, border = NA, cex.names = cex,\n                              names.arg = Feature, las = 1, ...)]\n  }\n\n  invisible(importance_matrix)\n}\n\n# Avoid error messages during CRAN check.\n# The reason is that these variables are never declared\n# They are mainly column names inferred by Data.table...\nglobalVariables(c(\"Feature\", \"Importance\"))\n"
  },
  {
    "path": "R-package/R/xgb.plot.multi.trees.R",
    "content": "#' Project all trees on one tree\n#'\n#' Visualization of the ensemble of trees as a single collective unit.\n#'\n#' Note that this function does not work with models that were fitted to\n#' categorical data.\n#' @details\n#' This function tries to capture the complexity of a gradient boosted tree model\n#' in a cohesive way by compressing an ensemble of trees into a single tree-graph representation.\n#' The goal is to improve the interpretability of a model generally seen as black box.\n#'\n#' Note: this function is applicable to tree booster-based models only.\n#'\n#' It takes advantage of the fact that the shape of a binary tree is only defined by\n#' its depth (therefore, in a boosting model, all trees have similar shape).\n#'\n#' Moreover, the trees tend to reuse the same features.\n#'\n#' The function projects each tree onto one, and keeps for each position the\n#' `features_keep` first features (based on the Gain per feature measure).\n#'\n#' This function is inspired by this blog post:\n#' <https://wellecks.wordpress.com/2015/02/21/peering-into-the-black-box-visualizing-lambdamart/>\n#'\n#' @inheritParams xgb.plot.tree\n#' @param features_keep Number of features to keep in each position of the multi trees,\n#'   by default 5.\n#' @param render Should the graph be rendered or not? The default is `TRUE`.\n#' @inherit xgb.plot.tree return\n#'\n#' @examples\n#'\n#' data(agaricus.train, package = \"xgboost\")\n#'\n#' ## Keep the number of threads to 2 for examples\n#' nthread <- 2\n#' data.table::setDTthreads(nthread)\n#'\n#' model <- xgboost(\n#'   agaricus.train$data, factor(agaricus.train$label),\n#'   nrounds = 30,\n#'   verbosity = 0L,\n#'   nthreads = nthread,\n#'   max_depth = 15,\n#'   learning_rate = 1,\n#'   min_child_weight = 50\n#' )\n#'\n#' p <- xgb.plot.multi.trees(model, features_keep = 3)\n#' print(p)\n#'\n#' # Below is an example of how to save this plot to a file.\n#' if (require(\"DiagrammeR\") && require(\"DiagrammeRsvg\") && require(\"rsvg\")) {\n#'   fname <- file.path(tempdir(), \"tree.pdf\")\n#'   gr <- xgb.plot.multi.trees(model, features_keep = 3, render = FALSE)\n#'   export_graph(gr, fname, width = 1500, height = 600)\n#' }\n#' @export\nxgb.plot.multi.trees <- function(model, features_keep = 5, plot_width = NULL, plot_height = NULL,\n                                 render = TRUE, ...) {\n  check.deprecation(deprecated_multitrees_params, match.call(), ...)\n  if (!requireNamespace(\"DiagrammeR\", quietly = TRUE)) {\n    stop(\"DiagrammeR is required for xgb.plot.multi.trees\")\n  }\n  if (xgb.has_categ_features(model)) {\n    stop(\n      \"Cannot use 'xgb.plot.multi.trees' for models with categorical features.\",\n      \" Try 'xgb.plot.tree' instead.\"\n    )\n  }\n  tree.matrix <- xgb.model.dt.tree(model = model)\n\n  # first number of the path represents the tree, then the following numbers are related to the path to follow\n  # root init\n  root.nodes <- tree.matrix[Node == 0, ID]\n  tree.matrix[ID %in% root.nodes, abs.node.position := root.nodes]\n\n  precedent.nodes <- root.nodes\n\n  while (tree.matrix[, sum(is.na(abs.node.position))] > 0) {\n    yes.row.nodes <- tree.matrix[abs.node.position %in% precedent.nodes & !is.na(Yes)]\n    no.row.nodes <- tree.matrix[abs.node.position %in% precedent.nodes & !is.na(No)]\n    yes.nodes.abs.pos <- paste0(yes.row.nodes[, abs.node.position], \"_0\")\n    no.nodes.abs.pos <- paste0(no.row.nodes[, abs.node.position], \"_1\")\n\n    tree.matrix[ID %in% yes.row.nodes[, Yes], abs.node.position := yes.nodes.abs.pos]\n    tree.matrix[ID %in% no.row.nodes[, No], abs.node.position := no.nodes.abs.pos]\n    precedent.nodes <- c(yes.nodes.abs.pos, no.nodes.abs.pos)\n  }\n\n  tree.matrix[!is.na(Yes), Yes := paste0(abs.node.position, \"_0\")]\n  tree.matrix[!is.na(No), No := paste0(abs.node.position, \"_1\")]\n\n  for (nm in c(\"abs.node.position\", \"Yes\", \"No\"))\n    data.table::set(tree.matrix, j = nm, value = sub(\"^\\\\d+-\", \"\", tree.matrix[[nm]]))\n\n  nodes.dt <- tree.matrix[\n        , .(Gain = sum(Gain))\n        , by = .(abs.node.position, Feature)\n      ][, .(Text = paste0(\n              paste0(\n                Feature[seq_len(min(length(Feature), features_keep))],\n                \" (\",\n                format(Gain[seq_len(min(length(Gain), features_keep))], digits = 5),\n                \")\"\n              ),\n              collapse = \"\\n\"\n            )\n          )\n        , by = abs.node.position\n      ]\n\n  edges.dt <- data.table::rbindlist(\n    l = list(\n      tree.matrix[Feature != \"Leaf\", .(From = abs.node.position, To = Yes)],\n      tree.matrix[Feature != \"Leaf\", .(From = abs.node.position, To = No)]\n    )\n  )\n  edges.dt <- edges.dt[, .N, .(From, To)]\n  edges.dt[, N := NULL]\n\n  nodes <- DiagrammeR::create_node_df(\n    n = nrow(nodes.dt),\n    label = nodes.dt[, Text]\n  )\n\n  edges <- DiagrammeR::create_edge_df(\n    from = match(edges.dt[, From], nodes.dt[, abs.node.position]),\n    to = match(edges.dt[, To], nodes.dt[, abs.node.position]),\n    rel = \"leading_to\")\n\n  graph <- DiagrammeR::create_graph(\n      nodes_df = nodes,\n      edges_df = edges,\n      attr_theme = NULL\n  )\n  graph <- DiagrammeR::add_global_graph_attrs(\n      graph = graph,\n      attr_type = \"graph\",\n      attr  = c(\"layout\", \"rankdir\"),\n      value = c(\"dot\", \"LR\")\n  )\n  graph <- DiagrammeR::add_global_graph_attrs(\n      graph = graph,\n      attr_type = \"node\",\n      attr  = c(\"color\", \"fillcolor\", \"style\", \"shape\", \"fontname\"),\n      value = c(\"DimGray\", \"beige\", \"filled\", \"rectangle\", \"Helvetica\")\n  )\n  graph <- DiagrammeR::add_global_graph_attrs(\n      graph = graph,\n      attr_type = \"edge\",\n      attr  = c(\"color\", \"arrowsize\", \"arrowhead\", \"fontname\"),\n      value = c(\"DimGray\", \"1.5\", \"vee\", \"Helvetica\")\n  )\n\n  if (!render) return(invisible(graph))\n\n  DiagrammeR::render_graph(graph, width = plot_width, height = plot_height)\n}\n\nglobalVariables(c(\".N\", \"N\", \"From\", \"To\", \"Text\", \"Feature\", \"no.nodes.abs.pos\",\n                  \"ID\", \"Yes\", \"No\", \"Tree\", \"yes.nodes.abs.pos\", \"abs.node.position\"))\n"
  },
  {
    "path": "R-package/R/xgb.plot.shap.R",
    "content": "#' SHAP dependence plots\n#'\n#' Visualizes SHAP values against feature values to gain an impression of feature effects.\n#'\n#' @param data The data to explain as a `matrix`, `dgCMatrix`, or `data.frame`.\n#' @param shap_contrib Matrix of SHAP contributions of `data`.\n#'   The default (`NULL`) computes it from `model` and `data`.\n#' @param features Vector of column indices or feature names to plot. When `NULL`\n#'   (default), the `top_n` most important features are selected by [xgb.importance()].\n#' @param top_n How many of the most important features (<= 100) should be selected?\n#'   By default 1 for SHAP dependence and 10 for SHAP summary.\n#'   Only used when `features = NULL`.\n#' @param model An `xgb.Booster` model. Only required when `shap_contrib = NULL` or\n#'   `features = NULL`.\n#' @param trees Passed to [xgb.importance()] when `features = NULL`.\n#' @param target_class Only relevant for multiclass models. The default (`NULL`)\n#'   averages the SHAP values over all classes. Pass a (0-based) class index\n#'   to show only SHAP values of that class.\n#' @param approxcontrib Passed to [predict.xgb.Booster()] when `shap_contrib = NULL`.\n#' @param subsample Fraction of data points randomly picked for plotting.\n#'   The default (`NULL`) will use up to 100k data points.\n#' @param n_col Number of columns in a grid of plots.\n#' @param col Color of the scatterplot markers.\n#' @param pch Scatterplot marker.\n#' @param discrete_n_uniq Maximal number of unique feature values to consider the\n#'   feature as discrete.\n#' @param discrete_jitter Jitter amount added to the values of discrete features.\n#' @param ylab The y-axis label in 1D plots.\n#' @param plot_NA Should contributions of cases with missing values be plotted?\n#'   Default is `TRUE`.\n#' @param col_NA Color of marker for missing value contributions.\n#' @param pch_NA Marker type for `NA` values.\n#' @param pos_NA Relative position of the x-location where `NA` values are shown:\n#'   `min(x) + (max(x) - min(x)) * pos_NA`.\n#' @param plot_loess Should loess-smoothed curves be plotted? (Default is `TRUE`).\n#'   The smoothing is only done for features with more than 5 distinct values.\n#' @param col_loess Color of loess curves.\n#' @param span_loess The `span` parameter of [stats::loess()].\n#' @param which Whether to do univariate or bivariate plotting. Currently, only \"1d\" is implemented.\n#' @param plot Should the plot be drawn? (Default is `TRUE`).\n#'   If `FALSE`, only a list of matrices is returned.\n#' @param ... Other parameters passed to [graphics::plot()].\n#'\n#' @details\n#'\n#' These scatterplots represent how SHAP feature contributions depend of feature values.\n#' The similarity to partial dependence plots is that they also give an idea for how feature values\n#' affect predictions. However, in partial dependence plots, we see marginal dependencies\n#' of model prediction on feature value, while SHAP dependence plots display the estimated\n#' contributions of a feature to the prediction for each individual case.\n#'\n#' When `plot_loess = TRUE`, feature values are rounded to three significant digits and\n#' weighted LOESS is computed and plotted, where the weights are the numbers of data points\n#' at each rounded value.\n#'\n#' Note: SHAP contributions are on the scale of the model margin.\n#' E.g., for a logistic binomial objective, the margin is on log-odds scale.\n#' Also, since SHAP stands for \"SHapley Additive exPlanation\" (model prediction = sum of SHAP\n#' contributions for all features + bias), depending on the objective used, transforming SHAP\n#' contributions for a feature from the marginal to the prediction space is not necessarily\n#' a meaningful thing to do.\n#'\n#' @return\n#' In addition to producing plots (when `plot = TRUE`), it silently returns a list of two matrices:\n#' - `data`: Feature value matrix.\n#' - `shap_contrib`: Corresponding SHAP value matrix.\n#'\n#' @references\n#' 1. Scott M. Lundberg, Su-In Lee, \"A Unified Approach to Interpreting Model Predictions\",\n#'    NIPS Proceedings 2017, <https://arxiv.org/abs/1705.07874>\n#' 2. Scott M. Lundberg, Su-In Lee, \"Consistent feature attribution for tree ensembles\",\n#'    <https://arxiv.org/abs/1706.06060>\n#'\n#' @examples\n#'\n#' data(agaricus.train, package = \"xgboost\")\n#' data(agaricus.test, package = \"xgboost\")\n#'\n#' ## Keep the number of threads to 1 for examples\n#' nthread <- 1\n#' data.table::setDTthreads(nthread)\n#' nrounds <- 20\n#'\n#' model_binary <- xgboost(\n#'   agaricus.train$data, factor(agaricus.train$label),\n#'   nrounds = nrounds,\n#'   verbosity = 0L,\n#'   learning_rate = 0.1,\n#'   max_depth = 3L,\n#'   subsample = 0.5,\n#'   nthreads = nthread\n#' )\n#'\n#' xgb.plot.shap(agaricus.test$data, model = model_binary, features = \"odor=none\")\n#'\n#' contr <- predict(model_binary, agaricus.test$data, type = \"contrib\")\n#' xgb.plot.shap(agaricus.test$data, contr, model = model_binary, top_n = 12, n_col = 3)\n#'\n#' # Summary plot\n#' xgb.ggplot.shap.summary(agaricus.test$data, contr, model = model_binary, top_n = 12)\n#'\n#' # Multiclass example - plots for each class separately:\n#' x <- as.matrix(iris[, -5])\n#' set.seed(123)\n#' is.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values\n#'\n#' model_multiclass <- xgboost(\n#'   x, iris$Species,\n#'   nrounds = nrounds,\n#'   verbosity = 0,\n#'   max_depth = 2,\n#'   subsample = 0.5,\n#'   nthreads = nthread\n#' )\n#' nclass <- 3\n#' trees0 <- seq(from = 1, by = nclass, length.out = nrounds)\n#' col <- rgb(0, 0, 1, 0.5)\n#'\n#' xgb.plot.shap(\n#'   x,\n#'   model = model_multiclass,\n#'   trees = trees0,\n#'   target_class = 0,\n#'   top_n = 4,\n#'   n_col = 2,\n#'   col = col,\n#'   pch = 16,\n#'   pch_NA = 17\n#' )\n#'\n#' xgb.plot.shap(\n#'   x,\n#'   model = model_multiclass,\n#'   trees = trees0 + 1,\n#'   target_class = 1,\n#'   top_n = 4,\n#'   n_col = 2,\n#'   col = col,\n#'   pch = 16,\n#'   pch_NA = 17\n#' )\n#'\n#' xgb.plot.shap(\n#'   x,\n#'   model = model_multiclass,\n#'   trees = trees0 + 2,\n#'   target_class = 2,\n#'   top_n = 4,\n#'   n_col = 2,\n#'   col = col,\n#'   pch = 16,\n#'   pch_NA = 17\n#' )\n#'\n#' # Summary plot\n#' xgb.ggplot.shap.summary(x, model = model_multiclass, target_class = 0, top_n = 4)\n#'\n#' @rdname xgb.plot.shap\n#' @export\nxgb.plot.shap <- function(data, shap_contrib = NULL, features = NULL, top_n = 1, model = NULL,\n                          trees = NULL, target_class = NULL, approxcontrib = FALSE,\n                          subsample = NULL, n_col = 1, col = rgb(0, 0, 1, 0.2), pch = '.',\n                          discrete_n_uniq = 5, discrete_jitter = 0.01, ylab = \"SHAP\",\n                          plot_NA = TRUE, col_NA = rgb(0.7, 0, 1, 0.6), pch_NA = '.', pos_NA = 1.07,\n                          plot_loess = TRUE, col_loess = 2, span_loess = 0.5,\n                          which = c(\"1d\", \"2d\"), plot = TRUE, ...) {\n  data_list <- xgb.shap.data(\n    data = data,\n    shap_contrib = shap_contrib,\n    features = features,\n    top_n = top_n,\n    model = model,\n    trees = trees,\n    target_class = target_class,\n    approxcontrib = approxcontrib,\n    subsample = subsample,\n    max_observations = 100000\n  )\n  data <- data_list[[\"data\"]]\n  shap_contrib <- data_list[[\"shap_contrib\"]]\n  features <- colnames(data)\n\n  which <- match.arg(which)\n  if (which == \"2d\")\n    stop(\"2D plots are not implemented yet\")\n\n  if (n_col > length(features)) n_col <- length(features)\n  if (plot && which == \"1d\") {\n    op <- par(mfrow = c(ceiling(length(features) / n_col), n_col),\n              oma = c(0, 0, 0, 0) + 0.2,\n              mar = c(3.5, 3.5, 0, 0) + 0.1,\n              mgp = c(1.7, 0.6, 0))\n    for (f in features) {\n      ord <- order(data[, f])\n      x <- data[, f][ord]\n      y <- shap_contrib[, f][ord]\n      x_lim <- range(x, na.rm = TRUE)\n      y_lim <- range(y, na.rm = TRUE)\n      do_na <- plot_NA && anyNA(x)\n      if (do_na) {\n        x_range <- diff(x_lim)\n        loc_na <- min(x, na.rm = TRUE) + x_range * pos_NA\n        x_lim <- range(c(x_lim, loc_na))\n      }\n      x_uniq <- unique(x)\n      x2plot <- x\n      # add small jitter for discrete features with <= 5 distinct values\n      if (length(x_uniq) <= discrete_n_uniq)\n        x2plot <- jitter(x, amount = discrete_jitter * min(diff(x_uniq), na.rm = TRUE))\n      plot(x2plot, y, pch = pch, xlab = f, col = col, xlim = x_lim, ylim = y_lim, ylab = ylab, ...)\n      grid()\n      if (plot_loess) {\n        # compress x to 3 digits, and mean-aggregate y\n        zz <- data.table(x = signif(x, 3), y)[, .(.N, y = mean(y)), x]\n        if (nrow(zz) <= 5) {\n          lines(zz$x, zz$y, col = col_loess)\n        } else {\n          lo <- stats::loess(y ~ x, data = zz, weights = zz$N, span = span_loess)\n          zz$y_lo <- predict(lo, zz, type = \"link\")\n          lines(zz$x, zz$y_lo, col = col_loess)\n        }\n      }\n      if (do_na) {\n        i_na <- which(is.na(x))\n        x_na <- rep(loc_na, length(i_na))\n        x_na <- jitter(x_na, amount = x_range * 0.01)\n        points(x_na, y[i_na], pch = pch_NA, col = col_NA)\n      }\n    }\n    par(op)\n  }\n  if (plot && which == \"2d\") {\n    # TODO\n    warning(\"Bivariate plotting is currently not available.\")\n  }\n  invisible(list(data = data, shap_contrib = shap_contrib))\n}\n\n#' SHAP summary plot\n#'\n#' Visualizes SHAP contributions of different features.\n#'\n#' A point plot (each point representing one observation from `data`) is\n#' produced for each feature, with the points plotted on the SHAP value axis.\n#' Each point (observation) is coloured based on its feature value.\n#'\n#' The plot allows to see which features have a negative / positive contribution\n#' on the model prediction, and whether the contribution is different for larger\n#' or smaller values of the feature. Inspired by the summary plot of\n#' <https://github.com/shap/shap>.\n#'\n#' @inheritParams xgb.plot.shap\n#'\n#' @return A `ggplot2` object.\n#' @export\n#'\n#' @examples\n#' # See examples in xgb.plot.shap()\n#'\n#' @seealso [xgb.plot.shap()], [xgb.ggplot.shap.summary()],\n#'   and the Python library <https://github.com/shap/shap>.\nxgb.plot.shap.summary <- function(data, shap_contrib = NULL, features = NULL, top_n = 10, model = NULL,\n                                  trees = NULL, target_class = NULL, approxcontrib = FALSE, subsample = NULL) {\n  # Only ggplot implementation is available.\n  xgb.ggplot.shap.summary(data, shap_contrib, features, top_n, model, trees, target_class, approxcontrib, subsample)\n}\n\n#' Prepare data for SHAP plots\n#'\n#' Internal function used in [xgb.plot.shap()], [xgb.plot.shap.summary()], etc.\n#'\n#' @inheritParams xgb.plot.shap\n#' @param max_observations Maximum number of observations to consider.\n#' @keywords internal\n#' @noRd\n#'\n#' @return\n#' A list containing:\n#' - `data`: The matrix of feature values.\n#' - `shap_contrib`: The matrix with corresponding SHAP values.\nxgb.shap.data <- function(data, shap_contrib = NULL, features = NULL, top_n = 1, model = NULL,\n                          trees = NULL, target_class = NULL, approxcontrib = FALSE,\n                          subsample = NULL, max_observations = 100000) {\n  if (!inherits(data, c(\"matrix\", \"dsparseMatrix\", \"data.frame\")))\n    stop(\"data: must be matrix, sparse matrix, or data.frame.\")\n  if (inherits(data, \"data.frame\") && length(class(data)) > 1L) {\n    data <- as.data.frame(data)\n  }\n\n  if (is.null(shap_contrib) && (is.null(model) || !inherits(model, \"xgb.Booster\")))\n    stop(\"when shap_contrib is not provided, one must provide an xgb.Booster model\")\n\n  if (is.null(features) && (is.null(model) || !inherits(model, \"xgb.Booster\")))\n    stop(\"when features are not provided, one must provide an xgb.Booster model to rank the features\")\n\n  last_dim <- function(v) dim(v)[length(dim(v))]\n\n  if (!is.null(shap_contrib) &&\n      (!is.array(shap_contrib) || nrow(shap_contrib) != nrow(data) || last_dim(shap_contrib) != ncol(data) + 1))\n    stop(\"shap_contrib is not compatible with the provided data\")\n\n  if (is.character(features) && is.null(colnames(data)))\n    stop(\"either provide `data` with column names or provide `features` as column indices\")\n\n  model_feature_names <- NULL\n  if (is.null(features) && !is.null(model)) {\n    model_feature_names <- xgb.feature_names(model)\n  }\n  if (is.null(model_feature_names) && xgb.num_feature(model) != ncol(data))\n    stop(\"if model has no feature_names, columns in `data` must match features in model\")\n\n  if (!is.null(subsample)) {\n    if (subsample <= 0 || subsample >= 1) {\n      stop(\"'subsample' must be a number between zero and one (non-inclusive).\")\n    }\n    sample_size <- as.integer(subsample * nrow(data))\n    if (sample_size < 2) {\n      stop(\"Sampling fraction involves less than 2 rows.\")\n    }\n    idx <- sample(x = seq_len(nrow(data)), size = sample_size, replace = FALSE)\n  } else {\n    idx <- seq_len(min(nrow(data), max_observations))\n  }\n  data <- data[idx, ]\n  if (is.null(colnames(data))) {\n    colnames(data) <- paste0(\"X\", seq_len(ncol(data)))\n  }\n\n  reshape_3d_shap_contrib <- function(shap_contrib, target_class) {\n    # multiclass: either choose a class or merge\n    if (is.list(shap_contrib)) {\n      if (!is.null(target_class)) {\n        shap_contrib <- shap_contrib[[target_class + 1]]\n      } else {\n        shap_contrib <- Reduce(\"+\", lapply(shap_contrib, abs))\n      }\n    } else if (length(dim(shap_contrib)) > 2) {\n      if (!is.null(target_class)) {\n        orig_shape <- dim(shap_contrib)\n        shap_contrib <- shap_contrib[, target_class + 1, , drop = TRUE]\n        if (!is.matrix(shap_contrib)) {\n          shap_contrib <- matrix(shap_contrib, orig_shape[c(1L, 3L)])\n        }\n      } else {\n        shap_contrib <- apply(abs(shap_contrib), c(1L, 3L), sum)\n      }\n    }\n    return(shap_contrib)\n  }\n\n  if (is.null(shap_contrib)) {\n    shap_contrib <- predict.xgb.Booster(\n      model,\n      newdata = data,\n      predcontrib = TRUE,\n      approxcontrib = approxcontrib\n    )\n  }\n  shap_contrib <- reshape_3d_shap_contrib(shap_contrib, target_class)\n  if (is.null(colnames(shap_contrib))) {\n    colnames(shap_contrib) <- paste0(\"X\", seq_len(ncol(data)))\n  }\n\n  if (is.null(features)) {\n    if (!is.null(model_feature_names)) {\n      imp <- xgb.importance(model = model, trees = trees)\n    } else {\n      imp <- xgb.importance(model = model, trees = trees, feature_names = colnames(data))\n    }\n    top_n <- top_n[1]\n    if (top_n < 1 || top_n > 100) stop(\"top_n: must be an integer within [1, 100]\")\n    features <- imp$Feature[seq_len(min(top_n, NROW(imp)))]\n  }\n  if (is.character(features)) {\n    features <- match(features, colnames(data))\n  }\n\n  shap_contrib <- shap_contrib[, features, drop = FALSE]\n  data <- data[, features, drop = FALSE]\n\n  list(\n    data = data,\n    shap_contrib = shap_contrib\n  )\n}\n"
  },
  {
    "path": "R-package/R/xgb.plot.tree.R",
    "content": "#' Plot boosted trees\n#'\n#' Read a tree model text dump and plot the model.\n#'\n#' @details\n#' The content of each node is visualized as follows:\n#' - For non-terminal nodes, it will display the split condition (number or name\n#'   if available, and the condition that would decide to which node to go\n#'   next).\n#' - Those nodes will be connected to their children by arrows that indicate\n#'   whether the branch corresponds to the condition being met or not being met.\n#' - Terminal (leaf) nodes contain the margin to add when ending there.\n#'\n#' The \"Yes\" branches are marked by the \"< split_value\" label.\n#' The branches also used for missing values are marked as bold\n#' (as in \"carrying extra capacity\").\n#'\n#' This function uses [GraphViz](https://www.graphviz.org/) as DiagrammeR\n#' backend.\n#'\n#' @param model Object of class `xgb.Booster`. If it contains feature names\n#'   (they can be set through [setinfo()], they will be used in the\n#'   output from this function.\n#' @param tree_idx An integer of the tree index that should be used. This\n#'   is an 1-based index.\n#' @param plot_width,plot_height Width and height of the graph in pixels.\n#'   The values are passed to `DiagrammeR::render_graph()`.\n#' @param with_stats Whether to dump some additional statistics about the\n#'   splits.  When this option is on, the model dump contains two additional\n#'   values: gain is the approximate loss function gain we get in each split;\n#'   cover is the sum of second order gradient in each node.\n#' @inheritParams xgb.train\n#' @return\n#'\n#' Rendered graph object which is an htmlwidget of ' class `grViz`. Similar to\n#' \"ggplot\" objects, it needs to be printed when not running from the command\n#' line.\n#'\n#' @examples\n#' data(\"ToothGrowth\")\n#' x <- ToothGrowth[, c(\"len\", \"dose\")]\n#' y <- ToothGrowth$supp\n#' model <- xgboost(\n#'   x, y,\n#'   nthreads = 1L,\n#'   nrounds = 3L,\n#'   max_depth = 3L\n#' )\n#'\n#' # plot the first tree\n#' xgb.plot.tree(model, tree_idx = 1)\n#'\n#' # Below is an example of how to save this plot to a file.\n#' if (require(\"DiagrammeR\") && require(\"htmlwidgets\")) {\n#'   fname <- file.path(tempdir(), \"plot.html'\")\n#'   gr <- xgb.plot.tree(model, tree_idx = 1)\n#'   htmlwidgets::saveWidget(gr, fname)\n#' }\n#' @export\nxgb.plot.tree <- function(model,\n                          tree_idx = 1,\n                          plot_width = NULL,\n                          plot_height = NULL,\n                          with_stats = FALSE, ...) {\n  check.deprecation(deprecated_plottree_params, match.call(), ...)\n  if (!inherits(model, \"xgb.Booster\")) {\n    stop(\"model has to be an object of the class xgb.Booster\")\n  }\n  if (!requireNamespace(\"DiagrammeR\", quietly = TRUE)) {\n    stop(\"The DiagrammeR package is required for xgb.plot.tree\", call. = FALSE)\n  }\n\n  txt <- xgb.dump(model, dump_format = \"dot\", with_stats = with_stats)\n  DiagrammeR::grViz(\n    txt[[tree_idx]], width = plot_width, height = plot_height\n  )\n}\n"
  },
  {
    "path": "R-package/R/xgb.save.R",
    "content": "#' Save XGBoost model to binary file\n#'\n#' Save XGBoost model to a file in binary or JSON format.\n#'\n#' @param model Model object of `xgb.Booster` class.\n#' @param fname Name of the file to write. Its extension determines the serialization format:\n#'   - \".ubj\": Use the universal binary JSON format (recommended).\n#'     This format uses binary types for e.g. floating point numbers, thereby preventing any loss\n#'     of precision when converting to a human-readable JSON text or similar.\n#'   - \".json\": Use plain JSON, which is a human-readable format.\n#'   - \".deprecated\": Use **deprecated** binary format. This format will\n#'     not be able to save attributes introduced after v1 of XGBoost, such as the \"best_iteration\"\n#'     attribute that boosters might keep, nor feature names or user-specifiec attributes.\n#'   - If the format is not specified by passing one of the file extensions above, will\n#'     default to UBJ.\n#'\n#' @details\n#'\n#' This methods allows to save a model in an XGBoost-internal binary or text format which is universal\n#' among the various xgboost interfaces. In R, the saved model file could be read later\n#' using either the [xgb.load()] function or the `xgb_model` parameter of [xgb.train()].\n#'\n#' Note: a model can also be saved as an R object (e.g., by using [readRDS()]\n#' or [save()]). However, it would then only be compatible with R, and\n#' corresponding R methods would need to be used to load it. Moreover, persisting the model with\n#' [readRDS()] or [save()] might cause compatibility problems in\n#' future versions of XGBoost. Consult [a-compatibility-note-for-saveRDS-save] to learn\n#' how to persist models in a future-proof way, i.e., to make the model accessible in future\n#' releases of XGBoost.\n#'\n#' @seealso [xgb.load()]\n#'\n#' @examples\n#' \\dontshow{RhpcBLASctl::omp_set_num_threads(1)}\n#' data(agaricus.train, package = \"xgboost\")\n#' data(agaricus.test, package = \"xgboost\")\n#'\n#' ## Keep the number of threads to 1 for examples\n#' nthread <- 1\n#' data.table::setDTthreads(nthread)\n#'\n#' train <- agaricus.train\n#' test <- agaricus.test\n#'\n#' bst <- xgb.train(\n#'   data = xgb.DMatrix(train$data, label = train$label, nthread = 1),\n#'   nrounds = 2,\n#'   params = xgb.params(\n#'     max_depth = 2,\n#'     nthread = nthread,\n#'     objective = \"binary:logistic\"\n#'   )\n#' )\n#'\n#' fname <- file.path(tempdir(), \"xgb.ubj\")\n#' xgb.save(bst, fname)\n#' bst <- xgb.load(fname)\n#' @export\nxgb.save <- function(model, fname) {\n  if (typeof(fname) != \"character\")\n    stop(\"fname must be character\")\n  if (!inherits(model, \"xgb.Booster\")) {\n    stop(\"model must be xgb.Booster.\",\n         if (inherits(model, \"xgb.DMatrix\")) \" Use xgb.DMatrix.save to save an xgb.DMatrix object.\" else \"\")\n  }\n  fname <- path.expand(fname)\n  .Call(XGBoosterSaveModel_R, xgb.get.handle(model), enc2utf8(fname[1]))\n  return(TRUE)\n}\n"
  },
  {
    "path": "R-package/R/xgb.save.raw.R",
    "content": "#' Save XGBoost model to R's raw vector\n#'\n#' Save XGBoost model from [xgboost()] or [xgb.train()].\n#' Call [xgb.load.raw()] to load the model back from raw vector.\n#'\n#' @param model The model object.\n#' @param raw_format The format for encoding the booster:\n#'   - \"json\": Encode the booster into JSON text document.\n#'   - \"ubj\":  Encode the booster into Universal Binary JSON.\n#'   - \"deprecated\": Encode the booster into old customized binary format.\n#'\n#' @examples\n#' \\dontshow{RhpcBLASctl::omp_set_num_threads(1)}\n#' data(agaricus.train, package = \"xgboost\")\n#' data(agaricus.test, package = \"xgboost\")\n#'\n#' ## Keep the number of threads to 1 for examples\n#' nthread <- 1\n#' data.table::setDTthreads(nthread)\n#'\n#' train <- agaricus.train\n#' test <- agaricus.test\n#'\n#' bst <- xgb.train(\n#'   data = xgb.DMatrix(train$data, label = train$label, nthread = 1),\n#'   nrounds = 2,\n#'   params = xgb.params(\n#'     max_depth = 2,\n#'     nthread = nthread,\n#'     objective = \"binary:logistic\"\n#'   )\n#' )\n#'\n#' raw <- xgb.save.raw(bst)\n#' bst <- xgb.load.raw(raw)\n#'\n#' @export\nxgb.save.raw <- function(model, raw_format = \"ubj\") {\n  handle <- xgb.get.handle(model)\n  args <- list(format = raw_format)\n  .Call(XGBoosterSaveModelToRaw_R, handle, jsonlite::toJSON(args, auto_unbox = TRUE))\n}\n"
  },
  {
    "path": "R-package/R/xgb.train.R",
    "content": "#' @title Fit XGBoost Model\n#' @description Fits an XGBoost model to given data in DMatrix format (e.g. as produced by [xgb.DMatrix()]).\n#' See the tutorial [Introduction to Boosted Trees](https://xgboost.readthedocs.io/en/stable/tutorials/model.html)\n#' for a longer explanation of what XGBoost does, and the rest of the\n#' [XGBoost Tutorials](https://xgboost.readthedocs.io/en/latest/tutorials/index.html) for further\n#' explanations XGBoost's features and usage.\n#'\n#' Compared to function [xgboost()] which is a user-friendly function targeted towards interactive\n#' usage, ``xgb.train`` is a lower-level interface which allows finer-grained control and exposes\n#' further functionalities offered by the core library (such as learning-to-rank objectives), but\n#' which works exclusively with XGBoost's own data format (\"DMatrices\") instead of with regular R\n#' objects.\n#'\n#' The syntax of this function closely mimics the same function from the Python package for XGBoost,\n#' and is recommended to use for package developers over `xgboost()` as it will provide a more\n#' stable interface (with fewer breaking changes) and lower overhead from data validations.\n#'\n#' See also the [migration guide](https://xgboost.readthedocs.io/en/latest/R-package/migration_guide.html)\n#' if coming from a previous version of XGBoost in the 1.x series.\n#' @param params List of XGBoost parameters which control the model building process.\n#' See the [online documentation](https://xgboost.readthedocs.io/en/latest/parameter.html)\n#' and the documentation for [xgb.params()] for details.\n#'\n#' Should be passed as list with named entries. Parameters that are not specified in this\n#' list will use their default values.\n#'\n#' A list of named parameters can be created through the function [xgb.params()], which\n#' accepts all valid parameters as function arguments.\n#' @param data Training dataset. `xgb.train()` accepts only an `xgb.DMatrix` as the input.\n#'\n#' Note that there is a function [xgboost()] which is meant to accept R data objects\n#' as inputs, such as data frames and matrices.\n#' @param nrounds Max number of boosting iterations.\n#' @param evals Named list of `xgb.DMatrix` datasets to use for evaluating model performance.\n#'   Metrics specified in either `eval_metric` (under params) or `custom_metric` (function\n#'   argument here) will be computed for each of these datasets during each boosting iteration,\n#'   and stored in the end as a field named `evaluation_log` in the resulting object.\n#'\n#'   When either `verbose>=1` or [xgb.cb.print.evaluation()] callback is engaged, the performance\n#'   results are continuously printed out during the training.\n#'\n#'   E.g., specifying `evals=list(validation1=mat1, validation2=mat2)` allows to track\n#'   the performance of each round's model on `mat1` and `mat2`.\n#' @param objective Customized objective function. Should take two arguments: the first one will be the\n#'   current predictions (either a numeric vector or matrix depending on the number of targets / classes),\n#'   and the second one will be the `data` DMatrix object that is used for training.\n#'\n#'   It should return a list with two elements `grad` and `hess` (in that order), as either\n#'   numeric vectors or numeric matrices depending on the number of targets / classes (same\n#'   dimension as the predictions that are passed as first argument).\n#' @param custom_metric Customized evaluation function. Just like `objective`, should take two arguments,\n#'   with the first one being the predictions and the second one the `data` DMatrix.\n#'\n#'   Should return a list with two elements `metric` (name that will be displayed for this metric,\n#'   should be a string / character), and `value` (the number that the function calculates, should\n#'   be a numeric scalar).\n#'\n#'   Note that even if passing `custom_metric`, objectives also have an associated default metric that\n#'   will be evaluated in addition to it. In order to disable the built-in metric, one can pass\n#'   parameter `disable_default_eval_metric = TRUE`.\n#' @param verbose If 0, xgboost will stay silent. If 1, it will print information about performance.\n#'   If 2, some additional information will be printed out.\n#'   Note that setting `verbose > 0` automatically engages the\n#'   `xgb.cb.print.evaluation(period=1)` callback function.\n#' @param print_every_n When passing `verbose>0`, evaluation logs (metrics calculated on the\n#' data passed under `evals`) will be printed every nth iteration according to the value passed\n#' here. The first and last iteration are always included regardless of this 'n'.\n#'\n#' Only has an effect when passing data under `evals` and when passing `verbose>0`. The parameter\n#' is passed to the [xgb.cb.print.evaluation()] callback.\n#' @param early_stopping_rounds Number of boosting rounds after which training will be stopped\n#'   if there is no improvement in performance (as measured by the evaluatiation metric that is\n#'   supplied or selected by default for the objective) on the evaluation data passed under\n#'   `evals`.\n#'\n#'   Must pass `evals` in order to use this functionality. Setting this parameter adds the\n#'   [xgb.cb.early.stop()] callback.\n#'\n#'   If `NULL`, early stopping will not be used.\n#' @param maximize If `feval` and `early_stopping_rounds` are set, then this parameter must be set as well.\n#'   When it is `TRUE`, it means the larger the evaluation score the better.\n#'   This parameter is passed to the [xgb.cb.early.stop()] callback.\n#' @param save_period When not `NULL`, model is saved to disk after every `save_period` rounds.\n#'   0 means save at the end. The saving is handled by the [xgb.cb.save.model()] callback.\n#' @param save_name the name or path for periodically saved model file.\n#' @param xgb_model A previously built model to continue the training from.\n#'   Could be either an object of class `xgb.Booster`, or its raw data, or the name of a\n#'   file with a previously saved model.\n#' @param callbacks A list of callback functions to perform various task during boosting.\n#'   See [xgb.Callback()]. Some of the callbacks are automatically created depending on the\n#'   parameters' values. User can provide either existing or their own callback methods in order\n#'   to customize the training process.\n#'\n#'   Note that some callbacks might try to leave attributes in the resulting model object,\n#'   such as an evaluation log (a `data.table` object) - be aware that these objects are kept\n#'   as R attributes, and thus do not get saved when using XGBoost's own serializaters like\n#'   [xgb.save()] (but are kept when using R serializers like [saveRDS()]).\n#' @param ... Not used.\n#'\n#' Some arguments that were part of this function in previous XGBoost versions are currently\n#' deprecated or have been renamed. If a deprecated or renamed argument is passed, will throw\n#' a warning (by default) and use its current equivalent instead. This warning will become an\n#' error if using the \\link[=xgboost-options]{'strict mode' option}.\n#'\n#' If some additional argument is passed that is neither a current function argument nor\n#' a deprecated or renamed argument, a warning or error will be thrown depending on the\n#' 'strict mode' option.\n#'\n#' \\bold{Important:} `...` will be removed in a future version, and all the current\n#' deprecation warnings will become errors. Please use only arguments that form part of\n#' the function signature.\n#' @return An object of class `xgb.Booster`.\n#' @details\n#' Compared to [xgboost()], the `xgb.train()` interface supports advanced features such as\n#' `evals`, customized objective and evaluation metric functions, among others, with the\n#' difference these work `xgb.DMatrix` objects and do not follow typical R idioms.\n#'\n#' Parallelization is automatically enabled if OpenMP is present.\n#' Number of threads can also be manually specified via the `nthread` parameter.\n#'\n#' While in XGBoost language bindings, the default random seed defaults to zero, in R, if a parameter `seed`\n#' is not manually supplied, it will generate a random seed through R's own random number generator,\n#' whose seed in turn is controllable through `set.seed`. If `seed` is passed, it will override the\n#' RNG from R.\n#'\n#' The following callbacks are automatically created when certain parameters are set:\n#' - [xgb.cb.print.evaluation()] is turned on when `verbose > 0` and the `print_every_n`\n#'   parameter is passed to it.\n#' - [xgb.cb.evaluation.log()] is on when `evals` is present.\n#' - [xgb.cb.early.stop()]: When `early_stopping_rounds` is set.\n#' - [xgb.cb.save.model()]: When `save_period > 0` is set.\n#'\n#' Note that objects of type `xgb.Booster` as returned by this function behave a bit differently\n#' from typical R objects (it's an 'altrep' list class), and it makes a separation between\n#' internal booster attributes (restricted to jsonifyable data), accessed through [xgb.attr()]\n#' and shared between interfaces through serialization functions like [xgb.save()]; and\n#' R-specific attributes (typically the result from a callback), accessed through [attributes()]\n#' and [attr()], which are otherwise\n#' only used in the R interface, only kept when using R's serializers like [saveRDS()], and\n#' not anyhow used by functions like `predict.xgb.Booster()`.\n#'\n#' Be aware that one such R attribute that is automatically added is `params` - this attribute\n#' is assigned from the `params` argument to this function, and is only meant to serve as a\n#' reference for what went into the booster, but is not used in other methods that take a booster\n#' object - so for example, changing the booster's configuration requires calling `xgb.config<-`\n#' or `xgb.model.parameters<-`, while simply modifying `attributes(model)$params$<...>` will have no\n#' effect elsewhere.\n#'\n#' @seealso [xgb.Callback()], [predict.xgb.Booster()], [xgb.cv()]\n#'\n#' @references\n#' Tianqi Chen and Carlos Guestrin, \"XGBoost: A Scalable Tree Boosting System\",\n#' 22nd SIGKDD Conference on Knowledge Discovery and Data Mining, 2016, \\url{https://arxiv.org/abs/1603.02754}\n#'\n#' @examples\n#' data(agaricus.train, package = \"xgboost\")\n#' data(agaricus.test, package = \"xgboost\")\n#'\n#' ## Keep the number of threads to 1 for examples\n#' nthread <- 1\n#' data.table::setDTthreads(nthread)\n#'\n#' dtrain <- with(\n#'   agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)\n#' )\n#' dtest <- with(\n#'   agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)\n#' )\n#' evals <- list(train = dtrain, eval = dtest)\n#'\n#' ## A simple xgb.train example:\n#' param <- xgb.params(\n#'   max_depth = 2,\n#'   nthread = nthread,\n#'   objective = \"binary:logistic\",\n#'   eval_metric = \"auc\"\n#' )\n#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)\n#'\n#' ## An xgb.train example where custom objective and evaluation metric are\n#' ## used:\n#' logregobj <- function(preds, dtrain) {\n#'    labels <- getinfo(dtrain, \"label\")\n#'    preds <- 1/(1 + exp(-preds))\n#'    grad <- preds - labels\n#'    hess <- preds * (1 - preds)\n#'    return(list(grad = grad, hess = hess))\n#' }\n#' evalerror <- function(preds, dtrain) {\n#'   labels <- getinfo(dtrain, \"label\")\n#'   err <- as.numeric(sum(labels != (preds > 0)))/length(labels)\n#'   return(list(metric = \"error\", value = err))\n#' }\n#'\n#' # These functions could be used by passing them as 'objective' and\n#' # 'eval_metric' parameters in the params list:\n#' param <- xgb.params(\n#'   max_depth = 2,\n#'   nthread = nthread,\n#'   objective = logregobj,\n#'   eval_metric = evalerror\n#' )\n#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)\n#'\n#' # ... or as dedicated 'objective' and 'custom_metric' parameters of xgb.train:\n#' bst <- xgb.train(\n#'   within(param, rm(\"objective\", \"eval_metric\")),\n#'   dtrain, nrounds = 2, evals = evals,\n#'   objective = logregobj, custom_metric = evalerror\n#' )\n#'\n#'\n#' ## An xgb.train example of using variable learning rates at each iteration:\n#' param <- xgb.params(\n#'   max_depth = 2,\n#'   learning_rate = 1,\n#'   nthread = nthread,\n#'   objective = \"binary:logistic\",\n#'   eval_metric = \"auc\"\n#' )\n#' my_learning_rates <- list(learning_rate = c(0.5, 0.1))\n#'\n#' bst <- xgb.train(\n#'  param,\n#'  dtrain,\n#'  nrounds = 2,\n#'  evals = evals,\n#'  verbose = 0,\n#'  callbacks = list(xgb.cb.reset.parameters(my_learning_rates))\n#' )\n#'\n#' ## Early stopping:\n#' bst <- xgb.train(\n#'   param, dtrain, nrounds = 25, evals = evals, early_stopping_rounds = 3\n#' )\n#' @export\nxgb.train <- function(params = xgb.params(), data, nrounds, evals = list(),\n                      objective = NULL, custom_metric = NULL, verbose = 1, print_every_n = 1L,\n                      early_stopping_rounds = NULL, maximize = NULL,\n                      save_period = NULL, save_name = \"xgboost.model\",\n                      xgb_model = NULL, callbacks = list(), ...) {\n  check.deprecation(deprecated_train_params, match.call(), ...)\n\n  params <- check.booster.params(params)\n  tmp <- check.custom.obj(params, objective)\n  params <- tmp$params\n  objective <- tmp$objective\n  tmp <- check.custom.eval(params, custom_metric, maximize, early_stopping_rounds, callbacks)\n  params <- tmp$params\n  custom_metric <- tmp$custom_metric\n\n  # data & evals checks\n  dtrain <- data\n  if (!inherits(dtrain, \"xgb.DMatrix\"))\n    stop(\"second argument dtrain must be xgb.DMatrix\")\n  if (length(evals) > 0) {\n    if (typeof(evals) != \"list\" ||\n        !all(vapply(evals, inherits, logical(1), what = 'xgb.DMatrix')))\n      stop(\"'evals' must be a list of xgb.DMatrix elements\")\n    evnames <- names(evals)\n    if (is.null(evnames) || any(evnames == \"\"))\n      stop(\"each element of 'evals' must have a name tag\")\n  }\n  # Handle multiple evaluation metrics given as a list\n  for (m in params$eval_metric) {\n    params <- c(params, list(eval_metric = m))\n  }\n\n  params <- c(params)\n  params['validate_parameters'] <- TRUE\n  if (!(\"seed\" %in% names(params))) {\n    params[[\"seed\"]] <- sample(.Machine$integer.max, size = 1)\n  }\n\n  # callbacks\n  tmp <- .process.callbacks(callbacks, is_cv = FALSE)\n  callbacks <- tmp$callbacks\n  cb_names <- tmp$cb_names\n  rm(tmp)\n\n  # Early stopping callback (should always come first)\n  if (!is.null(early_stopping_rounds) && !(\"early_stop\" %in% cb_names)) {\n    callbacks <- add.callback(\n      callbacks,\n      xgb.cb.early.stop(\n        early_stopping_rounds,\n        maximize = maximize,\n        verbose = verbose\n      ),\n      as_first_elt = TRUE\n    )\n  }\n  # evaluation printing callback\n  print_every_n <- max(as.integer(print_every_n), 1L)\n  if (verbose && !(\"print_evaluation\" %in% cb_names)) {\n    callbacks <- add.callback(callbacks, xgb.cb.print.evaluation(print_every_n))\n  }\n  # evaluation log callback:  it is automatically enabled when 'evals' is provided\n  if (length(evals) && !(\"evaluation_log\" %in% cb_names)) {\n    callbacks <- add.callback(callbacks, xgb.cb.evaluation.log())\n  }\n  # Model saving callback\n  if (!is.null(save_period) && !(\"save_model\" %in% cb_names)) {\n    callbacks <- add.callback(callbacks, xgb.cb.save.model(save_period, save_name))\n  }\n\n  # The tree updating process would need slightly different handling\n  is_update <- NVL(params[['process_type']], '.') == 'update'\n\n  # Construct a booster (either a new one or load from xgb_model)\n  bst <- xgb.Booster(\n    params = params,\n    cachelist = append(evals, dtrain),\n    modelfile = xgb_model\n  )\n  niter_init <- bst$niter\n  bst <- bst$bst\n  .Call(\n    XGBoosterCopyInfoFromDMatrix_R,\n    xgb.get.handle(bst),\n    dtrain\n  )\n\n  if (is_update && nrounds > niter_init)\n    stop(\"nrounds cannot be larger than \", niter_init, \" (nrounds of xgb_model)\")\n\n  niter_skip <- ifelse(is_update, 0, niter_init)\n  begin_iteration <- niter_skip + 1\n  end_iteration <- niter_skip + nrounds\n\n  .execute.cb.before.training(\n    callbacks,\n    bst,\n    dtrain,\n    evals,\n    begin_iteration,\n    end_iteration\n  )\n\n  # the main loop for boosting iterations\n  # FIX: Handle nrounds=0 to prevent 1:0 sequence and ensure 'iteration' is defined\n  if (nrounds == 0) iteration <- end_iteration\n  for (iteration in seq(from = begin_iteration, length.out = nrounds)) {\n\n    .execute.cb.before.iter(\n      callbacks,\n      bst,\n      dtrain,\n      evals,\n      iteration\n    )\n\n    xgb.iter.update(\n      bst = bst,\n      dtrain = dtrain,\n      iter = iteration - 1,\n      objective = objective\n    )\n\n    bst_evaluation <- NULL\n    if (length(evals) > 0) {\n      bst_evaluation <- xgb.iter.eval(\n        bst = bst,\n        evals = evals,\n        iter = iteration - 1,\n        custom_metric = custom_metric\n      )\n    }\n\n    should_stop <- .execute.cb.after.iter(\n      callbacks,\n      bst,\n      dtrain,\n      evals,\n      iteration,\n      bst_evaluation\n    )\n\n    if (should_stop) break\n  }\n\n  cb_outputs <- .execute.cb.after.training(\n    callbacks,\n    bst,\n    dtrain,\n    evals,\n    iteration,\n    bst_evaluation\n  )\n\n  extra_attrs <- list(\n    call = match.call(),\n    params = params\n  )\n  bst <- xgb.reset.Booster(bst)\n  curr_attrs <- attributes(bst)\n  if (NROW(curr_attrs)) {\n    curr_attrs <- curr_attrs[\n      setdiff(\n        names(curr_attrs),\n        c(names(extra_attrs), names(cb_outputs))\n      )\n    ]\n  }\n  curr_attrs <- c(extra_attrs, curr_attrs)\n  if (NROW(cb_outputs)) {\n    curr_attrs <- c(curr_attrs, cb_outputs)\n  }\n  attributes(bst) <- curr_attrs\n\n  return(bst)\n}\n\n# nolint start: line_length_linter.\n#' @title XGBoost Parameters\n#' @description Convenience function to generate a list of named XGBoost parameters, which\n#' can be passed as argument `params` to [xgb.train()]. See the [online documentation](\n#' https://xgboost.readthedocs.io/en/stable/parameter.html) for more details.\n#'\n#' The purpose of this function is to enable IDE autocompletions and to provide in-package\n#' documentation for all the possible parameters that XGBoost accepts. The output from this\n#' function is just a regular R list containing the parameters that were set to non-default\n#' values. Note that this function will not perform any validation on the supplied arguments.\n#'\n#' If passing `NULL` for a given parameter (the default for all of them), then the default\n#' value for that parameter will be used. Default values are automatically determined by the\n#' XGBoost core library upon calls to [xgb.train()] or [xgb.cv()], and are subject to change\n#' over XGBoost library versions. Some of them might differ according to the\n#' booster type (e.g. defaults for regularization are different for linear and tree-based boosters).\n#' @return A list with the entries that were passed non-NULL values. It is intended to\n#' be passed as argument `params` to [xgb.train()] or [xgb.cv()].\n#' @export\n#' @param objective (default=`\"reg:squarederror\"`)\n#' Specify the learning task and the corresponding learning objective or a custom objective function to be used.\n#'\n#' For custom objective, see [Custom Objective and Evaluation Metric](https://xgboost.readthedocs.io/en/latest/tutorials/custom_metric_obj.html)\n#' and [Custom objective and metric](https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html#custom-obj-metric) for more information,\n#' along with the end note for function signatures.\n#'\n#' Supported values are:\n#' - `\"reg:squarederror\"`: regression with squared loss.\n#' - `\"reg:squaredlogerror\"`: regression with squared log loss \\eqn{\\frac{1}{2}[log(pred + 1) - log(label + 1)]^2}.  All input labels are required to be greater than -1.  Also, see metric `rmsle` for possible issue  with this objective.\n#' - `\"reg:logistic\"`: logistic regression, output probability\n#' - `\"reg:pseudohubererror\"`: regression with Pseudo Huber loss, a twice differentiable alternative to absolute loss.\n#' - `\"reg:absoluteerror\"`: Regression with L1 error. When tree model is used, leaf value is refreshed after tree construction. If used in distributed training, the leaf value is calculated as the mean value from all workers, which is not guaranteed to be optimal.\n#'\n#'   Version added: 1.7.0\n#' - `\"reg:quantileerror\"`: Quantile loss, also known as \"pinball loss\". See later sections for its parameter and [Quantile Regression](https://xgboost.readthedocs.io/en/latest/python/examples/quantile_regression.html#sphx-glr-python-examples-quantile-regression-py) for a worked example.\n#'\n#'   Version added: 2.0.0\n#' - `\"reg:expectileerror\"`: Expectile loss. See later sections for its parameter.\n#' - `\"binary:logistic\"`: logistic regression for binary classification, output probability\n#' - `\"binary:logitraw\"`: logistic regression for binary classification, output score before logistic transformation\n#' - `\"binary:hinge\"`: hinge loss for binary classification. This makes predictions of 0 or 1, rather than producing probabilities.\n#' - `\"count:poisson\"`: Poisson regression for count data, output mean of Poisson distribution.\n#'   `\"max_delta_step\"` is set to 0.7 by default in Poisson regression (used to safeguard optimization)\n#' - `\"survival:cox\"`: Cox regression for right censored survival time data (negative values are considered right censored).\n#'\n#'   Note that predictions are returned on the hazard ratio scale (i.e., as HR = exp(marginal_prediction) in the proportional hazard function `h(t) = h0(t) * HR`).\n#' - `\"survival:aft\"`: Accelerated failure time model for censored survival time data.\n#' See [Survival Analysis with Accelerated Failure Time](https://xgboost.readthedocs.io/en/latest/tutorials/aft_survival_analysis.html) for details.\n#' - `\"multi:softmax\"`: set XGBoost to do multiclass classification using the softmax objective, you also need to set num_class(number of classes)\n#' - `\"multi:softprob\"`: same as softmax, but output a vector of `ndata * nclass`, which can be further reshaped to `ndata * nclass` matrix. The result contains predicted probability of each data point belonging to each class.\n#' - `\"rank:ndcg\"`: Use LambdaMART to perform pair-wise ranking where the normalized discounted cumulative gain (NDCG) is maximized. This objective supports position debiasing for click data.\n#' - `\"rank:map\"`: Use LambdaMART to perform pair-wise ranking where the mean average precision (MAP) is maximized\n#' - `\"rank:pairwise\"`: Use LambdaRank to perform pair-wise ranking using the `ranknet` objective.\n#' - `\"reg:gamma\"`: gamma regression with log-link. Output is a mean of gamma distribution. It might be useful, e.g., for modeling insurance claims severity, or for any outcome that might be gamma-distributed.\n#' - `\"reg:tweedie\"`: Tweedie regression with log-link. It might be useful, e.g., for modeling total loss in insurance, or for any outcome that might be tweedie-distributed.\n#' @param verbosity (default=1)\n#' Verbosity of printing messages. Valid values are 0 (silent), 1 (warning), 2 (info), 3\n#' (debug). Sometimes XGBoost tries to change configurations based on heuristics, which\n#' is displayed as warning message. If there's unexpected behaviour, please try to\n#' increase value of verbosity.\n#' @param nthread (default to maximum number of threads available if not set)\n#' Number of parallel threads used to run XGBoost. When choosing it, please keep thread\n#' contention and hyperthreading in mind.\n#' @param seed Random number seed. If not specified, will take a random seed through R's own RNG engine.\n#' @param booster (default= `\"gbtree\"`)\n#' Which booster to use. Can be `\"gbtree\"`, `\"gblinear\"` or `\"dart\"`; `\"gbtree\"` and `\"dart\"` use tree based models while `\"gblinear\"` uses linear functions.\n#' @param eta,learning_rate (two aliases for the same parameter)\n#' Step size shrinkage used in update to prevent overfitting. After each boosting step, we can directly get the weights of new features, and `eta` shrinks the feature weights to make the boosting process more conservative.\n#' - range: \\eqn{[0,1]}\n#' - default value: 0.3 for tree-based boosters, 0.5 for linear booster.\n#'\n#' Note: should only pass one of `eta` or `learning_rate`. Both refer to the same parameter and there's thus no difference between one or the other.\n#' @param gamma,min_split_loss (two aliases for the same parameter) (for Tree Booster) (default=0, alias: `gamma`)\n#' Minimum loss reduction required to make a further partition on a leaf node of the tree. The larger `min_split_loss` is, the more conservative the algorithm will be. Note that a tree where no splits were made might still contain a single terminal node with a non-zero score.\n#'\n#' range: \\eqn{[0, \\infty)}\n#'\n#' Note: should only pass one of `gamma` or `min_split_loss`. Both refer to the same parameter and there's thus no difference between one or the other.\n#' @param max_depth (for Tree Booster) (default=6, type=int32)\n#' Maximum depth of a tree. Increasing this value will make the model more complex and more likely to overfit. 0 indicates no limit on depth. Beware that XGBoost aggressively consumes memory when training a deep tree. `\"exact\"` tree method requires non-zero value.\n#'\n#' range: \\eqn{[0, \\infty)}\n#' @param min_child_weight (for Tree Booster) (default=1)\n#' Minimum sum of instance weight (hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than `min_child_weight`, then the building process will give up further partitioning. In linear regression task, this simply corresponds to minimum number of instances needed to be in each node. The larger `min_child_weight` is, the more conservative the algorithm will be.\n#'\n#' range: \\eqn{[0, \\infty)}\n#' @param max_delta_step (for Tree Booster) (default=0)\n#' Maximum delta step we allow each leaf output to be. If the value is set to 0, it means there is no constraint. If it is set to a positive value, it can help making the update step more conservative. Usually this parameter is not needed, but it might help in logistic regression when class is extremely imbalanced. Set it to value of 1-10 might help control the update.\n#'\n#' range: \\eqn{[0, \\infty)}\n#' @param subsample (for Tree Booster) (default=1)\n#' Subsample ratio of the training instances. Setting it to 0.5 means that XGBoost would randomly sample half of the training data prior to growing trees. and this will prevent overfitting. Subsampling will occur once in every boosting iteration.\n#'\n#' range: \\eqn{(0,1]}\n#' @param sampling_method (for Tree Booster) (default= `\"uniform\"`)\n#' The method to use to sample the training instances.\n#' - `\"uniform\"`: each training instance has an equal probability of being selected. Typically set\n#'   `\"subsample\"` >= 0.5 for good results.\n#' - `\"gradient_based\"`: the selection probability for each training instance is proportional to the\n#'   \\bold{regularized absolute value} of gradients (more specifically, \\eqn{\\sqrt{g^2+\\lambda h^2}}).\n#'   `\"subsample\"` may be set to as low as 0.1 without loss of model accuracy. Note that this\n#'   sampling method is only supported when `\"tree_method\"` is set to `\"hist\"`; other tree\n#'   methods only support `\"uniform\"` sampling.\n#' @param colsample_bytree,colsample_bylevel,colsample_bynode (for Tree Booster) (default=1)\n#' This is a family of parameters for subsampling of columns.\n#' - All `\"colsample_by*\"` parameters have a range of \\eqn{(0, 1]}, the default value of 1, and specify the fraction of columns to be subsampled.\n#' - `\"colsample_bytree\"` is the subsample ratio of columns when constructing each tree. Subsampling occurs once for every tree constructed.\n#' - `\"colsample_bylevel\"` is the subsample ratio of columns for each level. Subsampling occurs once for every new depth level reached in a tree. Columns are subsampled from the set of columns chosen for the current tree.\n#' - `\"colsample_bynode\"` is the subsample ratio of columns for each node (split). Subsampling occurs once every time a new split is evaluated. Columns are subsampled from the set of columns chosen for the current level. This is not supported by the exact tree method.\n#' - `\"colsample_by*\"` parameters work cumulatively. For instance,\n#'   the combination `{'colsample_bytree'=0.5, 'colsample_bylevel'=0.5, 'colsample_bynode'=0.5}` with 64 features will leave 8 features to choose from at\n#'   each split.\n#'\n#' One can set the `\"feature_weights\"` for DMatrix to\n#' define the probability of each feature being selected when using column sampling.\n#' @param lambda,reg_lambda (two aliases for the same parameter)\n#'\n#' - For tree-based boosters:\n#'   - L2 regularization term on weights. Increasing this value will make model more conservative.\n#'   - default: 1\n#'   - range: \\eqn{[0, \\infty]}\n#' - For linear booster:\n#'   - L2 regularization term on weights. Increasing this value will make model more conservative. Normalised to number of training examples.\n#'   - default: 0\n#'   - range: \\eqn{[0, \\infty)}\n#'\n#' Note: should only pass one of `lambda` or `reg_lambda`. Both refer to the same parameter and there's thus no difference between one or the other.\n#' @param alpha,reg_alpha (two aliases for the same parameter)\n#' - L1 regularization term on weights. Increasing this value will make model more conservative.\n#' - For the linear booster, it's normalised to number of training examples.\n#' - default: 0\n#' - range: \\eqn{[0, \\infty)}\n#'\n#' Note: should only pass one of `alpha` or `reg_alpha`. Both refer to the same parameter and there's thus no difference between one or the other.\n#' @param tree_method (for Tree Booster) (default= `\"auto\"`)\n#' The tree construction algorithm used in XGBoost. See description in the [reference paper](https://arxiv.org/abs/1603.02754) and [Tree Methods](https://xgboost.readthedocs.io/en/latest/treemethod.html).\n#'\n#' Choices: `\"auto\"`, `\"exact\"`, `\"approx\"`, `\"hist\"`, this is a combination of commonly\n#' used updaters.  For other updaters like `\"refresh\"`, set the parameter `updater`\n#' directly.\n#' - `\"auto\"`: Same as the `\"hist\"` tree method.\n#' - `\"exact\"`: Exact greedy algorithm.  Enumerates all split candidates.\n#' - `\"approx\"`: Approximate greedy algorithm using quantile sketch and gradient histogram.\n#' - `\"hist\"`: Faster histogram optimized approximate greedy algorithm.\n#' @param scale_pos_weight (for Tree Booster) (default=1)\n#' Control the balance of positive and negative weights, useful for unbalanced classes. A typical value to consider: `sum(negative instances) / sum(positive instances)`. See [Parameters Tuning](https://xgboost.readthedocs.io/en/latest/tutorials/param_tuning.html) for more discussion. Also, see Higgs Kaggle competition demo for examples: [R](https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-train.R), [py1](https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-numpy.py), [py2](https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-cv.py), [py3](https://github.com/dmlc/xgboost/blob/master/demo/guide-python/cross_validation.py).\n#' @param updater Has different meanings depending on the type of booster.\n#'\n#' - For tree-based boosters:\n#'   A comma separated string defining the sequence of tree updaters to run, providing a modular way to construct and to modify the trees. This is an advanced parameter that is usually set automatically, depending on some other parameters. However, it could be also set explicitly by a user. The following updaters exist:\n#'   - `\"grow_colmaker\"`: non-distributed column-based construction of trees.\n#'   - `\"grow_histmaker\"`: distributed tree construction with row-based data splitting based on global proposal of histogram counting.\n#'   - `\"grow_quantile_histmaker\"`: Grow tree using quantized histogram.\n#'   - `\"grow_gpu_hist\"`:  Enabled when `tree_method` is set to `\"hist\"` along with `device=\"cuda\"`.\n#'   - `\"grow_gpu_approx\"`: Enabled when `tree_method` is set to `\"approx\"` along with `device=\"cuda\"`.\n#'   - `\"sync\"`: synchronizes trees in all distributed nodes.\n#'   - `\"refresh\"`: refreshes tree's statistics and/or leaf values based on the current data. Note that no random subsampling of data rows is performed.\n#'   - `\"prune\"`: prunes the splits where loss < `min_split_loss` (or `gamma`) and nodes that have depth greater than `max_depth`.\n#'\n#' - For `booster=\"gblinear\"`:\n#' (default= `\"shotgun\"`) Choice of algorithm to fit linear model\n#'   - `\"shotgun\"`: Parallel coordinate descent algorithm based on shotgun algorithm. Uses 'hogwild' parallelism and therefore produces a nondeterministic solution on each run.\n#'   - `\"coord_descent\"`: Ordinary coordinate descent algorithm. Also multithreaded but still produces a deterministic solution. When the `device` parameter is set to `\"cuda\"` or `\"gpu\"`, a GPU variant would be used.\n#' @param refresh_leaf (for Tree Booster) (default=1)\n#' This is a parameter of the `\"refresh\"` updater. When this flag is 1, tree leafs as well as tree nodes' stats are updated. When it is 0, only node stats are updated.\n#' @param grow_policy (for Tree Booster) (default= `\"depthwise\"`)\n#' - Controls a way new nodes are added to the tree.\n#' - Currently supported only if `tree_method` is set to `\"hist\"` or `\"approx\"`.\n#' - Choices: `\"depthwise\"`, `\"lossguide\"`\n#'   - `\"depthwise\"`: split at nodes closest to the root.\n#'   - `\"lossguide\"`: split at nodes with highest loss change.\n#' @param max_leaves (for Tree Booster) (default=0, type=int32)\n#' Maximum number of nodes to be added.  Not used by `\"exact\"` tree method.\n#' @param max_bin (for Tree Booster) (default=256, type=int32)\n#' - Only used if `tree_method` is set to `\"hist\"` or `\"approx\"`.\n#' - Maximum number of discrete bins to bucket continuous features.\n#' - Increasing this number improves the optimality of splits at the cost of higher computation time.\n#' @param num_parallel_tree (for Tree Booster) (default=1)\n#' Number of parallel trees constructed during each iteration. This option is used to support boosted random forest.\n#' @param monotone_constraints (for Tree Booster)\n#' Constraint of variable monotonicity. See [Monotonic Constraints](https://xgboost.readthedocs.io/en/latest/tutorials/monotonic.html) for more information.\n#' @param interaction_constraints (for Tree Booster)\n#' Constraints for interaction representing permitted interactions. The constraints must\n#' be specified in the form of a nest list, e.g. `list(c(0, 1), c(2, 3, 4))`, where each inner\n#' list is a group of indices of features (base-0 numeration) that are allowed to interact with each other.\n#' See [Feature Interaction Constraints](https://xgboost.readthedocs.io/en/latest/tutorials/feature_interaction_constraint.html) for more information.\n#' @param multi_strategy (for Tree Booster) (default = `\"one_output_per_tree\"`)\n#' The strategy used for training multi-target models, including multi-target regression\n#' and multi-class classification. See [Multiple Outputs](https://xgboost.readthedocs.io/en/latest/tutorials/multioutput.html) for more information.\n#' - `\"one_output_per_tree\"`: One model for each target.\n#' - `\"multi_output_tree\"`:  Use multi-target trees.\n#'\n#' Version added: 2.0.0\n#'\n#' Note: This parameter is working-in-progress.\n#' @param base_score\n#' - The initial prediction score of all instances, global bias\n#' - The parameter is automatically estimated for selected objectives before training. To\n#'   disable the estimation, specify a real number argument.\n#' - If `base_margin` is supplied, `base_score` will not be added.\n#' - For sufficient number of iterations, changing this value will not have too much effect.\n#' @param eval_metric (default according to objective)\n#' - Evaluation metrics for validation data, a default metric will be assigned according to objective (rmse for regression, and logloss for classification, `mean average precision` for ``rank:map``, etc.)\n#' - User can add multiple evaluation metrics.\n#' - The choices are listed below:\n#'   - `\"rmse\"`: root mean square error\n#'   - `\"rmsle\"`: root mean square log error: \\eqn{\\sqrt{\\frac{1}{N}[log(pred + 1) - log(label + 1)]^2}}. Default metric of `\"reg:squaredlogerror\"` objective. This metric reduces errors generated by outliers in dataset.  But because `log` function is employed, `\"rmsle\"` might output `nan` when prediction value is less than -1.  See `\"reg:squaredlogerror\"` for other requirements.\n#'   - `\"mae\"`: mean absolute error.\n#'   - `\"mape\"`: mean absolute percentage error.\n#'   - `\"mphe\"`: mean Pseudo Huber error. Default metric of `\"reg:pseudohubererror\"` objective.\n#'   - `\"logloss\"`: negative log-likelihood.\n#'   - `\"error\"`: Binary classification error rate. It is calculated as `#(wrong cases)/#(all cases)`. For the predictions, the evaluation will regard the instances with prediction value larger than 0.5 as positive instances, and the others as negative instances.\n#'   - `\"error@t\"`: a different than 0.5 binary classification threshold value could be specified by providing a numerical value through 't'.\n#'   - `\"merror\"`: Multiclass classification error rate. It is calculated as `#(wrong cases)/#(all cases)`.\n#'   - `\"mlogloss\"`: [Multiclass logloss](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html).\n#'   - `\"auc\"`: area under the receiver-operating characteristic curve.\n#'     Available for classification and learning-to-rank tasks.\n#'     - When used with binary classification, the objective should be `\"binary:logistic\"` or similar functions that work on probability.\n#'     - When used with multi-class classification, objective should be `\"multi:softprob\"` instead of `\"multi:softmax\"`, as the latter doesn't output probability.  Also the AUC is calculated by 1-vs-rest with reference class weighted by class prevalence.\n#'     - When used with LTR task, the AUC is computed by comparing pairs of documents to count correctly sorted pairs.  This corresponds to pairwise learning to rank.  The implementation has some issues with average AUC around groups and distributed workers not being well-defined.\n#'     - On a single machine the AUC calculation is exact. In a distributed environment the AUC is a weighted average over the AUC of training rows on each node - therefore, distributed AUC is an approximation sensitive to the distribution of data across workers. Use another metric in distributed environments if precision and reproducibility are important.\n#'     - When input dataset contains only negative or positive samples, the output is `NaN`.  The behavior is implementation defined, for instance, `scikit-learn` returns \\eqn{0.5} instead.\n#'   - `\"aucpr\"`: area under the PR curve\n#'     Available for classification and learning-to-rank tasks.\n#'\n#'     After XGBoost 1.6, both of the requirements and restrictions for using `\"aucpr\"` in classification problem are similar to `\"auc\"`.  For ranking task, only binary relevance label \\eqn{y \\in [0, 1]} is supported.  Different from `\"map\"` (mean average precision), `\"aucpr\"` calculates the *interpolated* area under precision recall curve using continuous interpolation.\n#'\n#'   - `\"pre\"`: Precision at \\eqn{k}. Supports only learning to rank task.\n#'   - `\"ndcg\"`: normalized discounted cumulative gain\n#'   - `\"map\"`: mean average precision\n#'\n#'     The `average precision` is defined as:\n#'\n#'       \\eqn{AP@l = \\frac{1}{min{(l, N)}}\\sum^l_{k=1}P@k \\cdot I_{(k)}}\n#'\n#'     where \\eqn{I_{(k)}} is an indicator function that equals to \\eqn{1} when the document at \\eqn{k} is relevant and \\eqn{0} otherwise. The \\eqn{P@k} is the precision at \\eqn{k}, and \\eqn{N} is the total number of relevant documents. Lastly, the `mean average precision` is defined as the weighted average across all queries.\n#'\n#'   - `\"ndcg@n\"`, `\"map@n\"`, `\"pre@n\"`: \\eqn{n} can be assigned as an integer to cut off the top positions in the lists for evaluation.\n#'   - `\"ndcg-\"`, `\"map-\"`, `\"ndcg@n-\"`, `\"map@n-\"`: In XGBoost, the NDCG and MAP evaluate the score of a list without any positive samples as \\eqn{1}. By appending \"-\" to the evaluation metric name, we can ask XGBoost to evaluate these scores as \\eqn{0} to be consistent under some conditions.\n#'   - `\"poisson-nloglik\"`: negative log-likelihood for Poisson regression\n#'   - `\"gamma-nloglik\"`: negative log-likelihood for gamma regression\n#'   - `\"cox-nloglik\"`: negative partial log-likelihood for Cox proportional hazards regression\n#'   - `\"gamma-deviance\"`: residual deviance for gamma regression\n#'   - `\"tweedie-nloglik\"`: negative log-likelihood for Tweedie regression (at a specified value of the `tweedie_variance_power` parameter)\n#'   - `\"aft-nloglik\"`: Negative log likelihood of Accelerated Failure Time model.\n#'     See [Survival Analysis with Accelerated Failure Time](https://xgboost.readthedocs.io/en/latest/tutorials/aft_survival_analysis.html) for details.\n#'   - `\"interval-regression-accuracy\"`: Fraction of data points whose predicted labels fall in the interval-censored labels.\n#'     Only applicable for interval-censored data.  See [Survival Analysis with Accelerated Failure Time](https://xgboost.readthedocs.io/en/latest/tutorials/aft_survival_analysis.html) for details.\n#' @param seed_per_iteration (default= `FALSE`)\n#' Seed PRNG determnisticly via iterator number.\n#' @param device (default= `\"cpu\"`)\n#' Device for XGBoost to run. User can set it to one of the following values:\n#' - `\"cpu\"`: Use CPU.\n#' - `\"cuda\"`: Use a GPU (CUDA device).\n#' - `\"cuda:<ordinal>\"`: `<ordinal>` is an integer that specifies the ordinal of the GPU (which GPU do you want to use if you have more than one devices).\n#' - `\"gpu\"`: Default GPU device selection from the list of available and supported devices. Only `\"cuda\"` devices are supported currently.\n#' - `\"gpu:<ordinal>\"`: Default GPU device selection from the list of available and supported devices. Only `\"cuda\"` devices are supported currently.\n#'\n#' For more information about GPU acceleration, see [XGBoost GPU Support](https://xgboost.readthedocs.io/en/latest/gpu/index.html). In distributed environments, ordinal selection is handled by distributed frameworks instead of XGBoost. As a result, using `\"cuda:<ordinal>\"` will result in an error. Use `\"cuda\"` instead.\n#'\n#' Version added: 2.0.0\n#'\n#' Note: if XGBoost was installed from CRAN, it won't have GPU support enabled, thus only `\"cpu\"` will be available.\n#' To get GPU support, the R package for XGBoost must be installed from source or from the GitHub releases - see\n#' [instructions](https://xgboost.readthedocs.io/en/latest/install.html#r).\n#' @param disable_default_eval_metric (default= `FALSE`)\n#' Flag to disable default metric. Set to 1 or `TRUE` to disable.\n#' @param use_rmm Whether to use RAPIDS Memory Manager (RMM) to allocate cache GPU\n#' memory. The primary memory is always allocated on the RMM pool when XGBoost is built\n#' (compiled) with the RMM plugin enabled. Valid values are `TRUE` and `FALSE`. See\n#' [Using XGBoost with RAPIDS Memory Manager (RMM) plugin](https://xgboost.readthedocs.io/en/latest/python/rmm-examples/index.html) for details.\n#' @param max_cached_hist_node (for Non-Exact Tree Methods) (default = 65536)\n#' Maximum number of cached nodes for histogram. This can be used with the `\"hist\"` and the\n#' `\"approx\"` tree methods.\n#'\n#' Version added: 2.0.0\n#'\n#' - For most of the cases this parameter should not be set except for growing deep\n#'   trees. After 3.0, this parameter affects GPU algorithms as well.\n#' @param max_cat_to_onehot (for Non-Exact Tree Methods)\n#' A threshold for deciding whether XGBoost should use one-hot encoding based split for\n#' categorical data.  When number of categories is lesser than the threshold then one-hot\n#' encoding is chosen, otherwise the categories will be partitioned into children nodes.\n#'\n#' Version added: 1.6.0\n#' @param max_cat_threshold (for Non-Exact Tree Methods)\n#' Maximum number of categories considered for each split. Used only by partition-based\n#' splits for preventing over-fitting.\n#'\n#' Version added: 1.7.0\n#' @param sample_type (for Dart Booster) (default= `\"uniform\"`)\n#' Type of sampling algorithm.\n#' - `\"uniform\"`: dropped trees are selected uniformly.\n#' - `\"weighted\"`: dropped trees are selected in proportion to weight.\n#' @param normalize_type (for Dart Booster) (default= `\"tree\"`)\n#' Type of normalization algorithm.\n#' - `\"tree\"`: new trees have the same weight of each of dropped trees.\n#'   - Weight of new trees are `1 / (k + learning_rate)`.\n#'   - Dropped trees are scaled by a factor of `k / (k + learning_rate)`.\n#' - `\"forest\"`: new trees have the same weight of sum of dropped trees (forest).\n#'   - Weight of new trees are `1 / (1 + learning_rate)`.\n#'   - Dropped trees are scaled by a factor of `1 / (1 + learning_rate)`.\n#' @param rate_drop (for Dart Booster) (default=0.0)\n#' Dropout rate (a fraction of previous trees to drop during the dropout).\n#'\n#' range: \\eqn{[0.0, 1.0]}\n#' @param one_drop (for Dart Booster) (default=0)\n#' When this flag is enabled, at least one tree is always dropped during the dropout (allows Binomial-plus-one or epsilon-dropout from the original DART paper).\n#' @param skip_drop (for Dart Booster) (default=0.0)\n#' Probability of skipping the dropout procedure during a boosting iteration.\n#' - If a dropout is skipped, new trees are added in the same manner as `\"gbtree\"`.\n#' - Note that non-zero `skip_drop` has higher priority than `rate_drop` or `one_drop`.\n#'\n#' range: \\eqn{[0.0, 1.0]}\n#' @param feature_selector (for Linear Booster) (default= `\"cyclic\"`)\n#' Feature selection and ordering method\n#' - `\"cyclic\"`: Deterministic selection by cycling through features one at a time.\n#' - `\"shuffle\"`: Similar to `\"cyclic\"` but with random feature shuffling prior to each update.\n#' - `\"random\"`: A random (with replacement) coordinate selector.\n#' - `\"greedy\"`: Select coordinate with the greatest gradient magnitude.  It has `O(num_feature^2)` complexity. It is fully deterministic. It allows restricting the selection to `top_k` features per group with the largest magnitude of univariate weight change, by setting the `top_k` parameter. Doing so would reduce the complexity to `O(num_feature*top_k)`.\n#' - `\"thrifty\"`: Thrifty, approximately-greedy feature selector. Prior to cyclic updates, reorders features in descending magnitude of their univariate weight changes. This operation is multithreaded and is a linear complexity approximation of the quadratic greedy selection. It allows restricting the selection to `top_k` features per group with the largest magnitude of univariate weight change, by setting the `top_k` parameter.\n#' @param top_k (for Linear Booster) (default=0)\n#' The number of top features to select in `greedy` and `thrifty` feature selector. The value of 0 means using all the features.\n#' @param num_class Number of classes when using multi-class classification objectives (e.g. `objective=\"multi:softprob\"`)\n#' @param tweedie_variance_power (for Tweedie Regression (`\"objective=reg:tweedie\"`)) (default=1.5)\n#' - Parameter that controls the variance of the Tweedie distribution `var(y) ~ E(y)^tweedie_variance_power`\n#' - range: \\eqn{(1,2)}\n#' - Set closer to 2 to shift towards a gamma distribution\n#' - Set closer to 1 to shift towards a Poisson distribution.\n#' @param huber_slope (for using Pseudo-Huber (`\"reg:pseudohubererror`\")) (default = 1.0)\n#' A parameter used for Pseudo-Huber loss to define the \\eqn{\\delta} term.\n#' @param quantile_alpha (for using Quantile Loss (`\"reg:quantileerror\"`))\n#' A scalar or a list of targeted quantiles (passed as a numeric vector).\n#'\n#' Version added: 2.0.0\n#' @param expectile_alpha (for using Expectile Loss (`\"reg:expectileerror\"`))\n#' A scalar or a list of targeted expectiles (passed as a numeric vector).\n#' @param aft_loss_distribution (when using AFT Survival Loss (`\"survival:aft\"`) and Negative Log Likelihood of AFT metric (`\"aft-nloglik\"`))\n#' Probability Density Function, `\"normal\"`, `\"logistic\"`, or `\"extreme\"`.\n#' @param aft_loss_distribution_scale (when using AFT Survival Loss (`\"survival:aft\"`) and Negative Log Likelihood of AFT metric (`\"aft-nloglik\"`))\n#' Scaling factor for the AFT distribution. Range: \\eqn{(0, \\infty)}.\n#' @param lambdarank_pair_method (for learning to rank (`\"rank:ndcg\"`, `\"rank:map\"`, `\"rank:pairwise\"`)) (default = `\"topk\"`)\n#' How to construct pairs for pair-wise learning.\n#' - `\"mean\"`: Sample `lambdarank_num_pair_per_sample` pairs for each document in the query list.\n#' - `\"topk\"`: Focus on top-`lambdarank_num_pair_per_sample` documents. Construct \\eqn{|query|} pairs for each document at the top-`lambdarank_num_pair_per_sample` ranked by the model.\n#' @param lambdarank_num_pair_per_sample (for learning to rank (`\"rank:ndcg\"`, `\"rank:map\"`, `\"rank:pairwise\"`))\n#' It specifies the number of pairs sampled for each document when pair method is `\"mean\"`, or the truncation level for queries when the pair method is `\"topk\"`. For example, to train with `ndcg@6`, set `\"lambdarank_num_pair_per_sample\"` to \\eqn{6} and `lambdarank_pair_method` to `\"topk\"`.\n#'\n#' range = \\eqn{[1, \\infty)}\n#' @param lambdarank_normalization (for learning to rank (`\"rank:ndcg\"`, `\"rank:map\"`, `\"rank:pairwise\"`)) (default = `TRUE`)\n#' Whether to normalize the leaf value by lambda gradient. This can sometimes stagnate the training progress.\n#'\n#' Version added: 2.1.0\n#'\n#' @param lambdarank_score_normalization\n#'\n#' Whether to normalize the delta metric by the difference of prediction scores. This can\n#' sometimes stagnate the training progress. With pairwise ranking, we can normalize the\n#' gradient using the difference between two samples in each pair to reduce influence from\n#' the pairs that have large difference in ranking scores. This can help us regularize the\n#' model to reduce bias and prevent overfitting. Similar to other regularization\n#' techniques, this might prevent training from converging.\n#'\n#' There was no normalization before 2.0. In 2.0 and later versions this is used by\n#' default. In 3.0, we made this an option that users can disable.\n#'\n#' Version added: 3.0.0\n#'\n#' @param lambdarank_unbiased (for learning to rank (`\"rank:ndcg\"`, `\"rank:map\"`, `\"rank:pairwise\"`)) (default = `FALSE`)\n#' Specify whether do we need to debias input click data.\n#' @param lambdarank_bias_norm (for learning to rank (`\"rank:ndcg\"`, `\"rank:map\"`, `\"rank:pairwise\"`)) (default = 2.0)\n#' \\eqn{L_p} normalization for position debiasing, default is \\eqn{L_2}. Only relevant when `lambdarank_unbiased` is set to `TRUE`.\n#' @param ndcg_exp_gain (for learning to rank (`\"rank:ndcg\"`, `\"rank:map\"`, `\"rank:pairwise\"`)) (default = `TRUE`)\n#' Whether we should use exponential gain function for `NDCG`. There are two forms of gain function for `NDCG`, one is using relevance value directly while the other is using\\eqn{2^{rel} - 1} to emphasize on retrieving relevant documents. When `ndcg_exp_gain` is `TRUE` (the default), relevance degree cannot be greater than 31.\nxgb.params <- function(\n  objective = NULL,\n  verbosity = NULL,\n  nthread = NULL,\n  seed = NULL,\n  booster = NULL,\n  eta = NULL,\n  learning_rate = NULL,\n  gamma = NULL,\n  min_split_loss = NULL,\n  max_depth = NULL,\n  min_child_weight = NULL,\n  max_delta_step = NULL,\n  subsample = NULL,\n  sampling_method = NULL,\n  colsample_bytree = NULL,\n  colsample_bylevel = NULL,\n  colsample_bynode = NULL,\n  lambda = NULL,\n  reg_lambda = NULL,\n  alpha = NULL,\n  reg_alpha = NULL,\n  tree_method = NULL,\n  scale_pos_weight = NULL,\n  updater = NULL,\n  refresh_leaf = NULL,\n  grow_policy = NULL,\n  max_leaves = NULL,\n  max_bin = NULL,\n  num_parallel_tree = NULL,\n  monotone_constraints = NULL,\n  interaction_constraints = NULL,\n  multi_strategy = NULL,\n  base_score = NULL,\n  eval_metric = NULL,\n  seed_per_iteration = NULL,\n  device = NULL,\n  disable_default_eval_metric = NULL,\n  use_rmm = NULL,\n  max_cached_hist_node = NULL,\n  max_cat_to_onehot = NULL,\n  max_cat_threshold = NULL,\n  sample_type = NULL,\n  normalize_type = NULL,\n  rate_drop = NULL,\n  one_drop = NULL,\n  skip_drop = NULL,\n  feature_selector = NULL,\n  top_k = NULL,\n  num_class = NULL,\n  tweedie_variance_power = NULL,\n  huber_slope = NULL,\n  quantile_alpha = NULL,\n  expectile_alpha = NULL,\n  aft_loss_distribution = NULL,\n  aft_loss_distribution_scale = NULL,\n  lambdarank_pair_method = NULL,\n  lambdarank_num_pair_per_sample = NULL,\n  lambdarank_normalization = NULL,\n  lambdarank_score_normalization = NULL,\n  lambdarank_unbiased = NULL,\n  lambdarank_bias_norm = NULL,\n  ndcg_exp_gain = NULL\n) {\n# nolint end\n  out <- as.list(environment())\n  out <- out[!sapply(out, is.null)]\n  return(out)\n}\n"
  },
  {
    "path": "R-package/R/xgboost.R",
    "content": "prescreen.objective <- function(objective) {\n  if (!is.null(objective)) {\n    if (!is.character(objective) || length(objective) != 1L || is.na(objective)) {\n      stop(\"'objective' must be a single character/string variable.\")\n    }\n\n    if (objective %in% .OBJECTIVES_NON_DEFAULT_MODE()) {\n      stop(\n        \"Objectives with non-default prediction mode (\",\n        paste(.OBJECTIVES_NON_DEFAULT_MODE(), collapse = \", \"),\n        \") are not supported in 'xgboost()'. Try 'xgb.train()'.\"\n      )\n    }\n\n    if (objective %in% .RANKING_OBJECTIVES()) {\n      stop(\"Ranking objectives are not supported in 'xgboost()'. Try 'xgb.train()'.\")\n    }\n  }\n}\n\nprocess.base.margin <- function(base_margin, nrows, ncols) {\n  if (!NROW(base_margin)) {\n    return(NULL)\n  }\n  if (is.array(base_margin) && length(dim(base_margin)) > 2) {\n    stop(\n      \"'base_margin' should not have more than 2 dimensions for any objective (got: \",\n      length(dim(base_margin)),\n      \" dimensions).\"\n    )\n  }\n  if (inherits(base_margin, c(\"sparseMatrix\", \"sparseVector\"))) {\n    warning(\n      \"Got a sparse matrix type (class: \",\n      paste(class(base_margin), collapse = \", \"),\n      \") for 'base_margin'. Will convert to dense matrix.\"\n    )\n    base_margin <- as.matrix(base_margin)\n  }\n  if (NROW(base_margin) != nrows) {\n    stop(\n      \"'base_margin' has incorrect number of rows. Expected: \",\n      nrows,\n      \". Got: \",\n      NROW(base_margin)\n    )\n  }\n\n  if (ncols == 1L) {\n    if (inherits(base_margin, c(\"matrix\", \"data.frame\"))) {\n      if (ncol(base_margin) != 1L) {\n        stop(\"'base_margin' should be a 1-d vector for the given objective and data.\")\n      }\n      if (is.data.frame(base_margin)) {\n        base_margin <- base_margin[[1L]]\n      } else {\n        base_margin <- base_margin[, 1L]\n      }\n    }\n    if (!is.numeric(base_margin)) {\n      base_margin <- as.numeric(base_margin)\n    }\n  } else {\n    supported_multicol <- c(\"matrix\", \"data.frame\")\n    if (!inherits(base_margin, supported_multicol)) {\n      stop(\n        \"'base_margin' should be a matrix with \",\n        ncols,\n        \" columns for the given objective and data. Got class: \",\n        paste(class(base_margin), collapse = \", \")\n      )\n    }\n    if (ncol(base_margin) != ncols) {\n      stop(\n        \"'base_margin' has incorrect number of columns. Expected: \",\n        ncols,\n        \". Got: \",\n        ncol(base_margin)\n      )\n    }\n    if (!is.matrix(base_margin)) {\n      base_margin <- as.matrix(base_margin)\n    }\n  }\n\n  return(base_margin)\n}\n\nprocess.y.margin.and.objective <- function(\n  y,\n  base_margin,\n  objective,\n  params\n) {\n\n  if (!NROW(y)) {\n    stop(\"Passed empty 'y'.\")\n  }\n\n  if (is.array(y) && length(dim(y)) > 2) {\n    stop(\n      \"'y' should not have more than 2 dimensions for any objective (got: \",\n      length(dim(y)),\n      \").\"\n    )\n  }\n\n  if (inherits(y, c(\"sparseMatrix\", \"sparseVector\"))) {\n    warning(\n      \"Got a sparse matrix type (class: \",\n      paste(class(y), collapse = \", \"),\n      \") for 'y'. Will convert to dense matrix.\"\n    )\n    y <- as.matrix(y)\n  }\n\n  if (is.character(y)) {\n    if (!is.vector(y)) {\n      if (NCOL(y) > 1L) {\n        stop(\"Multi-column categorical 'y' is not supported.\")\n      }\n      y <- as.vector(y)\n    }\n    y <- factor(y)\n  }\n\n  if (is.logical(y)) {\n    if (!is.vector(y)) {\n      if (NCOL(y) > 1L) {\n        stop(\"Multi-column logical/boolean 'y' is not supported.\")\n      }\n      y <- as.vector(y)\n    }\n    y <- factor(y, c(FALSE, TRUE))\n  }\n\n  if (is.factor(y)) {\n\n    y_levels <- levels(y)\n    if (length(y_levels) < 2) {\n      stop(\"Factor 'y' has less than 2 levels.\")\n    }\n    if (length(y_levels) == 2) {\n      if (is.null(objective)) {\n        objective <- \"binary:logistic\"\n      } else {\n        if (!(objective %in% .BINARY_CLASSIF_OBJECTIVES())) {\n          stop(\n            \"Got binary 'y' - supported objectives for this data are: \",\n            paste(.BINARY_CLASSIF_OBJECTIVES(), collapse = \", \"),\n            \". Was passed: \",\n            objective\n          )\n        }\n      }\n\n      if (!is.null(base_margin)) {\n        base_margin <- process.base.margin(base_margin, length(y), 1)\n      }\n\n      out <- list(\n        params = list(\n          objective = objective\n        ),\n        metadata = list(\n          y_levels = y_levels,\n          n_targets = 1\n        )\n      )\n    } else { # length(levels) > 2\n      if (is.null(objective)) {\n        objective <- \"multi:softprob\"\n      } else {\n        if (!(objective %in% .MULTICLASS_CLASSIF_OBJECTIVES())) {\n          stop(\n            \"Got non-binary factor 'y' - supported objectives for this data are: \",\n            paste(.MULTICLASS_CLASSIF_OBJECTIVES(), collapse = \", \"),\n            \". Was passed: \",\n            objective\n          )\n        }\n      }\n\n      if (!is.null(base_margin)) {\n        base_margin <- process.base.margin(base_margin, length(y), length(y_levels))\n      }\n\n      out <- list(\n        params = list(\n          objective = objective,\n          num_class = length(y_levels)\n        ),\n        metadata = list(\n          y_levels = y_levels,\n          n_targets = length(y_levels)\n        )\n      )\n    }\n\n    out$dmatrix_args <- list(\n      label = as.numeric(y) - 1,\n      base_margin = base_margin\n    )\n\n  } else if (inherits(y, \"Surv\")) {\n\n    y_attr <- attributes(y)\n    supported_surv_types <- c(\"left\", \"right\", \"interval\")\n    if (!(y_attr$type %in% supported_surv_types)) {\n      stop(\n        \"Survival objectives are only supported for types: \",\n        paste(supported_surv_types, collapse = \", \"),\n        \". Was passed: \",\n        y_attr$type\n      )\n    }\n\n    if (is.null(objective)) {\n      objective <- \"survival:aft\"\n    } else {\n      if (y_attr$type == \"right\") {\n        if (!(objective %in% .SURVIVAL_RIGHT_CENSORING_OBJECTIVES())) {\n          stop(\n            \"Got right-censored 'y' variable - supported objectives for this data are: \",\n            paste(.SURVIVAL_RIGHT_CENSORING_OBJECTIVES(), collapse = \", \"),\n            \". Was passed: \",\n            objective\n          )\n        }\n      } else {\n        if (!(objective %in% .SURVIVAL_ALL_CENSORING_OBJECTIVES())) {\n          stop(\n            \"Got \", y_attr$type, \"-censored 'y' variable - supported objectives for this data are:\",\n            paste(.SURVIVAL_ALL_CENSORING_OBJECTIVES(), collapse = \", \"),\n            \". Was passed: \",\n            objective\n          )\n        }\n      }\n    }\n\n    if (!is.null(base_margin)) {\n      base_margin <- process.base.margin(base_margin, nrow(y), 1)\n    }\n\n    out <- list(\n      params = list(\n        objective = objective\n      ),\n      metadata = list(\n        n_targets = 1\n      )\n    )\n\n    # Note: the 'Surv' object class that is passed as 'y' might have either 2 or 3 columns\n    # depending on the type of censoring, and the last column in both cases is the one that\n    # indicates the observation type (e.g. censored / uncensored).\n    # In the case of interval censoring, the second column will not always have values with\n    # infinites filled in. For more information, see the code behind the 'print.Surv' method.\n\n    if (objective == \"survival:cox\") {\n      # Can only get here when using right censoring\n      if (y_attr$type != \"right\") {\n        stop(\"Internal error.\")\n      }\n\n      out$dmatrix_args <- list(\n        label = y[, 1L] * (2 * (y[, 2L] - 0.5))\n      )\n\n    } else {\n      if (y_attr$type == \"left\") {\n        lb <- ifelse(\n          y[, 2L] == 0,\n          0,\n          y[, 1L]\n        )\n        ub <- y[, 1L]\n        out$dmatrix_args <- list(\n          label_lower_bound = lb,\n          label_upper_bound = ub\n        )\n      } else if (y_attr$type == \"right\") {\n        lb <- y[, 1L]\n        ub <- ifelse(\n          y[, 2L] == 0,\n          Inf,\n          y[, 1L]\n        )\n        out$dmatrix_args <- list(\n          label_lower_bound = lb,\n          label_upper_bound = ub\n        )\n      } else if (y_attr$type == \"interval\") {\n        out$dmatrix_args <- list(\n          label_lower_bound = ifelse(y[, 3L] == 2, 0, y[, 1L]),\n          label_upper_bound = ifelse(\n            y[, 3L] == 0, Inf,\n            ifelse(y[, 3L] == 3, y[, 2L], y[, 1L])\n          )\n        )\n      }\n\n      if (min(out$dmatrix_args$label_lower_bound) < 0) {\n        stop(\"Survival objectives are only defined for non-negative 'y'.\")\n      }\n    }\n\n    out$dmatrix_args$base_margin <- base_margin\n\n  } else if (is.vector(y)) {\n\n    if (is.null(objective)) {\n      objective <- \"reg:squarederror\"\n    } else if (!(objective %in% .REGRESSION_OBJECTIVES())) {\n      stop(\n        \"Got numeric 'y' - supported objectives for this data are: \",\n        paste(.REGRESSION_OBJECTIVES(), collapse = \", \"),\n        \". Was passed: \",\n        objective\n      )\n    }\n\n    n_targets <- 1L\n    if (objective == \"reg:quantileerror\" && NROW(params$quantile_alpha) > 1) {\n      n_targets <- NROW(params$quantile_alpha)\n    } else if (objective == \"reg:expectileerror\" && NROW(params$expectile_alpha) > 1) {\n      n_targets <- NROW(params$expectile_alpha)\n    }\n\n    if (!is.null(base_margin)) {\n      base_margin <- process.base.margin(base_margin, length(y), n_targets)\n    }\n\n    out <- list(\n      params = list(\n        objective = objective\n      ),\n      metadata = list(\n        n_targets = n_targets\n      ),\n      dmatrix_args = list(\n        label = as.numeric(y),\n        base_margin = base_margin\n      )\n    )\n\n  } else if (is.data.frame(y)) {\n    if (ncol(y) == 1L) {\n      return(process.y.margin.and.objective(y[[1L]], base_margin, objective, params))\n    }\n\n    if (is.null(objective)) {\n      objective <- \"reg:squarederror\"\n    } else if (!(objective %in% .MULTI_TARGET_OBJECTIVES())) {\n      stop(\n        \"Got multi-column 'y' - supported objectives for this data are: \",\n        paste(.MULTI_TARGET_OBJECTIVES(), collapse = \", \"),\n        \". Was passed: \",\n        objective\n      )\n    }\n\n    y_names <- names(y)\n    y <- lapply(y, function(x) {\n      if (!inherits(x, c(\"numeric\", \"integer\"))) {\n        stop(\n          \"Multi-target 'y' only supports 'numeric' and 'integer' types. Got: \",\n          paste(class(x), collapse = \", \")\n        )\n      }\n      return(as.numeric(x))\n    })\n    y <- as.data.frame(y) |> as.matrix()\n\n    if (!is.null(base_margin)) {\n      base_margin <- process.base.margin(base_margin, length(y), ncol(y))\n    }\n\n    out <- list(\n      params = list(\n        objective = objective\n      ),\n      dmatrix_args = list(\n        label = y,\n        base_margin = base_margin\n      ),\n      metadata = list(\n        y_names = y_names,\n        n_targets = ncol(y)\n      )\n    )\n\n  } else if (is.matrix(y)) {\n    if (ncol(y) == 1L) {\n      return(process.y.margin.and.objective(as.vector(y), base_margin, objective, params))\n    }\n\n    if (!is.null(objective) && !(objective %in% .MULTI_TARGET_OBJECTIVES())) {\n      stop(\n        \"Got multi-column 'y' - supported objectives for this data are: \",\n        paste(.MULTI_TARGET_OBJECTIVES(), collapse = \", \"),\n        \". Was passed: \",\n        objective\n      )\n    }\n    if (is.null(objective)) {\n      objective <- \"reg:squarederror\"\n    }\n\n    y_names <- colnames(y)\n    if (storage.mode(y) != \"double\") {\n      storage.mode(y) <- \"double\"\n    }\n\n    if (!is.null(base_margin)) {\n      base_margin <- process.base.margin(base_margin, nrow(y), ncol(y))\n    }\n\n    out <- list(\n      params = list(\n        objective = objective\n      ),\n      dmatrix_args = list(\n        label = y,\n        base_margin = base_margin\n      ),\n      metadata = list(\n        n_targets = ncol(y)\n      )\n    )\n\n    if (NROW(y_names) == ncol(y)) {\n      out$metadata$y_names <- y_names\n    }\n\n  } else {\n    stop(\"Passed 'y' object with unsupported class: \", paste(class(y), collapse = \", \"))\n  }\n\n  return(out)\n}\n\nprocess.row.weights <- function(w, lst_args) {\n  if (!is.null(w)) {\n    if (\"label\" %in% names(lst_args$dmatrix_args)) {\n      nrow_y <- NROW(lst_args$dmatrix_args$label)\n    } else if (\"label_lower_bound\" %in% names(lst_args$dmatrix_args)) {\n      nrow_y <- length(lst_args$dmatrix_args$label_lower_bound)\n    } else {\n      stop(\"Internal error.\")\n    }\n    if (!is.numeric(w)) {\n      w <- as.numeric(w)\n    }\n    if (length(w) != nrow_y) {\n      stop(\n        \"'weights' must be a 1-d vector with the same length as 'y' (\",\n        length(w), \" vs. \", nrow_y, \").\"\n      )\n    }\n    lst_args$dmatrix_args$weight <- w\n  }\n  return(lst_args)\n}\n\ncheck.nthreads <- function(nthreads) {\n  if (is.null(nthreads)) {\n    return(1L)\n  }\n  if (!inherits(nthreads, c(\"numeric\", \"integer\")) || !NROW(nthreads)) {\n    stop(\"'nthreads' must be a positive scalar value.\")\n  }\n  if (length(nthreads) > 1L) {\n    nthreads <- utils::head(nthreads, 1L)\n  }\n  if (is.na(nthreads) || nthreads < 0) {\n    stop(\"Passed invalid 'nthreads': \", nthreads)\n  }\n  if (is.numeric(nthreads)) {\n    if (floor(nthreads) != nthreads) {\n      stop(\"'nthreads' must be an integer.\")\n    }\n  }\n  return(as.integer(nthreads))\n}\n\ncheck.can.use.qdm <- function(x, params, eval_set) {\n  if (\"booster\" %in% names(params)) {\n    if (params$booster == \"gblinear\") {\n      return(FALSE)\n    }\n  }\n  if (\"tree_method\" %in% names(params)) {\n    if (params$tree_method %in% c(\"exact\", \"approx\")) {\n      return(FALSE)\n    }\n  }\n  if (NROW(eval_set)) {\n    return(FALSE)\n  }\n  return(TRUE)\n}\n\nprocess.x.and.col.args <- function(\n  x,\n  monotone_constraints,\n  interaction_constraints,\n  feature_weights,\n  lst_args,\n  use_qdm\n) {\n  if (is.null(x)) {\n    stop(\"'x' cannot be NULL.\")\n  }\n  if (inherits(x, \"xgb.DMatrix\")) {\n    stop(\"Cannot pass 'xgb.DMatrix' as 'x' to 'xgboost()'. Try 'xgb.train()' instead.\")\n  }\n  supported_x_types <- c(\"data.frame\", \"matrix\", \"dgTMatrix\", \"dgCMatrix\", \"dgRMatrix\")\n  if (!inherits(x, supported_x_types)) {\n    stop(\n      \"'x' must be one of the following classes: \",\n      paste(supported_x_types, collapse = \", \"),\n      \". Got: \",\n      paste(class(x), collapse = \", \")\n    )\n  }\n  if (use_qdm && inherits(x, \"sparseMatrix\") && !inherits(x, \"dgRMatrix\")) {\n    x <- methods::as(x, \"RsparseMatrix\")\n    if (!inherits(x, \"RsparseMatrix\")) {\n      stop(\"Internal error: casting sparse matrix did not yield 'dgRMatrix'.\")\n    }\n  }\n\n  if (NROW(feature_weights)) {\n    if (is.list(feature_weights)) {\n      feature_weights <- unlist(feature_weights)\n    }\n    if (!inherits(feature_weights, c(\"numeric\", \"integer\"))) {\n      stop(\"'feature_weights' must be a numeric vector or named list matching to columns of 'x'.\")\n    }\n    if (NROW(names(feature_weights)) && NROW(colnames(x))) {\n      matched <- match(colnames(x), names(feature_weights))\n      matched <- matched[!is.na(matched)]\n      matched <- matched[!duplicated(matched)]\n      if (length(matched) > 0 && length(matched) < length(feature_weights)) {\n        stop(\n          \"'feature_weights' names do not contain all columns of 'x'. Missing: \",\n          utils::head(setdiff(colnames(x), names(feature_weights)))\n        )\n      }\n      if (length(matched)) {\n        feature_weights <- feature_weights[matched]\n      } else {\n        warning(\"Names of 'feature_weights' do not match with 'x'. Names will be ignored.\")\n      }\n    }\n\n    lst_args$dmatrix_args$feature_weights <- unname(feature_weights)\n  }\n\n  if (NROW(monotone_constraints)) {\n\n    if (NROW(monotone_constraints) > ncol(x)) {\n      stop(\n        \"'monotone_constraints' contains more entries than there are columns in 'x' (\",\n        NROW(monotone_constraints), \" vs. \", ncol(x), \").\"\n      )\n    }\n\n    if (is.list(monotone_constraints)) {\n\n      if (!NROW(names(monotone_constraints))) {\n        stop(\n          \"If passing 'monotone_constraints' as a named list,\",\n          \" must have names matching to columns of 'x'.\"\n        )\n      }\n      if (!NROW(colnames(x))) {\n        stop(\"If passing 'monotone_constraints' as a named list, 'x' must have column names.\")\n      }\n      if (anyDuplicated(names(monotone_constraints))) {\n        stop(\n          \"'monotone_constraints' contains duplicated names: \",\n          paste(\n            names(monotone_constraints)[duplicated(names(monotone_constraints))] |> utils::head(),\n            collapse = \", \"\n          )\n        )\n      }\n      if (NROW(setdiff(names(monotone_constraints), colnames(x)))) {\n        stop(\n          \"'monotone_constraints' contains column names not present in 'x': \",\n          paste(utils::head(names(monotone_constraints)), collapse = \", \")\n        )\n      }\n\n      vec_monotone_constr <- rep(0, ncol(x))\n      matched <- match(names(monotone_constraints), colnames(x))\n      vec_monotone_constr[matched] <- unlist(monotone_constraints)\n      lst_args$params$monotone_constraints <- unname(vec_monotone_constr)\n\n    } else if (inherits(monotone_constraints, c(\"numeric\", \"integer\"))) {\n\n      if (NROW(names(monotone_constraints)) && NROW(colnames(x))) {\n        if (length(monotone_constraints) < ncol(x)) {\n          return(\n            process.x.and.col.args(\n              x,\n              as.list(monotone_constraints),\n              interaction_constraints,\n              feature_weights,\n              lst_args,\n              use_qdm\n            )\n          )\n        } else {\n          matched <- match(names(monotone_constraints), colnames(x))\n          matched <- matched[!is.na(matched)]\n          matched <- matched[!duplicated(matched)]\n          if (length(matched)) {\n            monotone_constraints <- monotone_constraints[matched]\n          } else {\n            warning(\"Names of 'monotone_constraints' do not match with 'x'. Names will be ignored.\")\n          }\n        }\n      } else {\n        if (length(monotone_constraints) != ncol(x)) {\n          stop(\n            \"If passing 'monotone_constraints' as unnamed vector or not using column names,\",\n            \" must have length matching to number of columns in 'x'. Got: \",\n            length(monotone_constraints), \" (vs. \", ncol(x), \")\"\n          )\n        }\n      }\n\n      lst_args$params$monotone_constraints <- unname(monotone_constraints)\n\n    } else if (is.character(monotone_constraints)) {\n      lst_args$params$monotone_constraints <- monotone_constraints\n    } else {\n      stop(\n        \"Passed unsupported type for 'monotone_constraints': \",\n        paste(class(monotone_constraints), collapse = \", \")\n      )\n    }\n  }\n\n  if (NROW(interaction_constraints)) {\n    if (!is.list(interaction_constraints)) {\n      stop(\"'interaction_constraints' must be a list of vectors.\")\n    }\n    cnames <- colnames(x)\n    lst_args$params$interaction_constraints <- lapply(interaction_constraints, function(idx) {\n      if (!NROW(idx)) {\n        stop(\"Elements in 'interaction_constraints' cannot be empty.\")\n      }\n\n      if (is.character(idx)) {\n        if (!NROW(cnames)) {\n          stop(\n            \"Passed a character vector for 'interaction_constraints', but 'x' \",\n            \"has no column names to match them against.\"\n          )\n        }\n        out <- match(idx, cnames) - 1L\n        if (anyNA(out)) {\n          stop(\n            \"'interaction_constraints' contains column names not present in 'x': \",\n            paste(utils::head(idx[which(is.na(out))]), collapse = \", \")\n          )\n        }\n        return(out)\n      } else if (inherits(idx, c(\"numeric\", \"integer\"))) {\n        if (anyNA(idx)) {\n          stop(\"'interaction_constraints' cannot contain NA values.\")\n        }\n        if (min(idx) < 1) {\n          stop(\"Column indices for 'interaction_constraints' must follow base-1 indexing.\")\n        }\n        if (max(idx) > ncol(x)) {\n          stop(\"'interaction_constraints' contains invalid column indices.\")\n        }\n        if (is.numeric(idx)) {\n          if (any(idx != floor(idx))) {\n            stop(\n              \"'interaction_constraints' must contain only integer indices. Got non-integer: \",\n              paste(utils::head(idx[which(idx != floor(idx))]), collapse = \", \")\n            )\n          }\n        }\n        return(idx - 1L)\n      } else {\n        stop(\n          \"Elements in 'interaction_constraints' must be vectors of types \",\n          \"'integer', 'numeric', or 'character'. Got: \",\n          paste(class(idx), collapse = \", \")\n        )\n      }\n    })\n  }\n\n  lst_args$dmatrix_args$data <- x\n  return(lst_args)\n}\n\nprocess.eval.set <- function(eval_set, lst_args) {\n  if (!NROW(eval_set)) {\n    return(NULL)\n  }\n  nrows <- nrow(lst_args$dmatrix_args$data)\n  is_classif <- hasName(lst_args$metadata, \"y_levels\")\n  processed_y <- lst_args$dmatrix_args$label\n  eval_set <- as.vector(eval_set)\n  if (length(eval_set) == 1L) {\n\n    eval_set <- as.numeric(eval_set)\n    if (is.na(eval_set) || eval_set < 0 || eval_set >= 1) {\n      stop(\"'eval_set' as a fraction must be a number between zero and one (non-inclusive).\")\n    }\n    if (eval_set == 0) {\n      return(NULL)\n    }\n    nrow_eval <- as.integer(round(nrows * eval_set, 0))\n    if (nrow_eval < 1) {\n      warning(\n        \"Desired 'eval_set' fraction amounts to zero observations.\",\n        \" Will not create evaluation set.\"\n      )\n      return(NULL)\n    }\n    nrow_train <- nrows - nrow_eval\n    if (nrow_train < 2L) {\n      stop(\"Desired 'eval_set' fraction would leave less than 2 observations for training data.\")\n    }\n    if (is_classif && nrow_train < length(lst_args$metadata$y_levels)) {\n      stop(\"Desired 'eval_set' fraction would not leave enough samples for each class of 'y'.\")\n    }\n\n    seed <- lst_args$params$seed\n    if (!is.null(seed)) {\n      set.seed(seed)\n    }\n\n    idx_shuffled <- sample(nrows, nrows, replace = FALSE)\n    idx_eval <- idx_shuffled[seq(1L, nrow_eval)]\n    idx_train <- idx_shuffled[seq(nrow_eval + 1L, nrows)]\n    # Here we want the training set to include all of the classes of 'y' for classification\n    # objectives. If that condition doesn't hold with the random sample, then it forcibly\n    # makes a new random selection in such a way that the condition would always hold, by\n    # first sampling one random example of 'y' for training and then choosing the evaluation\n    # set from the remaining rows. The procedure here is quite inefficient, but there aren't\n    # enough random-related functions in base R to be able to construct an efficient version.\n    if (is_classif && length(unique(processed_y[idx_train])) < length(lst_args$metadata$y_levels)) {\n      # These are defined in order to avoid NOTEs from CRAN checks\n      # when using non-standard data.table evaluation with column names.\n      idx <- NULL\n      y <- NULL\n      ranked_idx <- NULL\n      chosen <- NULL\n\n      dt <- data.table::data.table(y = processed_y, idx = seq(1L, nrows))[\n        , .(\n            ranked_idx = seq(1L, .N),\n            chosen = rep(sample(.N, 1L), .N),\n            idx\n          )\n        , by = y\n      ]\n      min_idx_train <- dt[ranked_idx == chosen, idx]\n      rem_idx <- dt[ranked_idx != chosen, idx]\n      if (length(rem_idx) == nrow_eval) {\n        idx_train <- min_idx_train\n        idx_eval <- rem_idx\n      } else {\n        rem_idx <- rem_idx[sample(length(rem_idx), length(rem_idx), replace = FALSE)]\n        idx_eval <- rem_idx[seq(1L, nrow_eval)]\n        idx_train <- c(min_idx_train, rem_idx[seq(nrow_eval + 1L, length(rem_idx))])\n      }\n    }\n\n  } else {\n\n    if (any(eval_set != floor(eval_set))) {\n      stop(\"'eval_set' as indices must contain only integers.\")\n    }\n    eval_set <- as.integer(eval_set)\n    idx_min <- min(eval_set)\n    if (is.na(idx_min) || idx_min < 1L) {\n      stop(\"'eval_set' contains invalid indices.\")\n    }\n    idx_max <- max(eval_set)\n    if (is.na(idx_max) || idx_max > nrows) {\n      stop(\"'eval_set' contains row indices beyond the size of the input data.\")\n    }\n    idx_train <- seq(1L, nrows)[-eval_set]\n    if (is_classif && length(unique(processed_y[idx_train])) < length(lst_args$metadata$y_levels)) {\n      warning(\"'eval_set' indices will leave some classes of 'y' outside of the training data.\")\n    }\n    idx_eval <- eval_set\n\n  }\n\n  # Note: slicing is done in the constructed DMatrix object instead of in the\n  # original input, because objects from 'Matrix' might change class after\n  # being sliced (e.g. 'dgRMatrix' turns into 'dgCMatrix').\n  return(list(idx_train = idx_train, idx_eval = idx_eval))\n}\n\ncheck.early.stopping.rounds <- function(early_stopping_rounds, eval_set) {\n  if (is.null(early_stopping_rounds)) {\n    return(NULL)\n  }\n  if (is.null(eval_set)) {\n    stop(\"'early_stopping_rounds' requires passing 'eval_set'.\")\n  }\n  if (NROW(early_stopping_rounds) != 1L) {\n    stop(\"'early_stopping_rounds' must be NULL or an integer greater than zero.\")\n  }\n  early_stopping_rounds <- as.integer(early_stopping_rounds)\n  if (is.na(early_stopping_rounds) || early_stopping_rounds <= 0L) {\n    stop(\n      \"'early_stopping_rounds' must be NULL or an integer greater than zero. Got: \",\n      early_stopping_rounds\n    )\n  }\n  return(early_stopping_rounds)\n}\n\n# nolint start: line_length_linter.\n#' Fit XGBoost Model\n#'\n#' @export\n#' @description\n#' Fits an XGBoost model (boosted decision tree ensemble) to given x/y data.\n#'\n#' See the tutorial [Introduction to Boosted Trees](https://xgboost.readthedocs.io/en/stable/tutorials/model.html)\n#' for a longer explanation of what XGBoost does, and the rest of the\n#' [XGBoost Tutorials](https://xgboost.readthedocs.io/en/latest/tutorials/index.html) for further\n#' explanations XGBoost's features and usage.\n#'\n#' This function is intended to provide a user-friendly interface for XGBoost that follows\n#' R's conventions for model fitting and predictions, but which doesn't expose all of the\n#' possible functionalities of the core XGBoost library.\n#'\n#' See [xgb.train()] for a more flexible low-level alternative which is similar across different\n#' language bindings of XGBoost and which exposes additional functionalities such as training on\n#' external memory data and learning-to-rank objectives.\n#'\n#' See also the [migration guide](https://xgboost.readthedocs.io/en/latest/R-package/migration_guide.html)\n#' if coming from a previous version of XGBoost in the 1.x series.\n#'\n#' By default, most of the parameters here have a value of `NULL`, which signals XGBoost to use its\n#' default value. Default values are automatically determined by the XGBoost core library, and are\n#' subject to change over XGBoost library versions. Some of them might differ according to the\n#' booster type (e.g. defaults for regularization are different for linear and tree-based boosters).\n#' See [xgb.params()] and the [online documentation](https://xgboost.readthedocs.io/en/latest/parameter.html)\n#' for more details about parameters - but note that some of the parameters are not supported in\n#' the `xgboost()` interface.\n#' @details\n#' For package authors using 'xgboost' as a dependency, it is highly recommended to use\n#' [xgb.train()] in package code instead of [xgboost()], since it has a more stable interface\n#' and performs fewer data conversions and copies along the way.\n#' @references\n#'   - Chen, Tianqi, and Carlos Guestrin. \"Xgboost: A scalable tree boosting system.\"\n#'     Proceedings of the 22nd acm sigkdd international conference on knowledge discovery and\n#'     data mining. 2016.\n#'   - \\url{https://xgboost.readthedocs.io/en/stable/}\n#' @param x The features / covariates. Can be passed as:\n#'   - A numeric or integer `matrix`.\n#'   - A `data.frame`, in which all columns are one of the following types:\n#'     - `numeric`\n#'     - `integer`\n#'     - `logical`\n#'     - `factor`\n#'\n#'     Columns of `factor` type will be assumed to be categorical, while other column types will\n#'     be assumed to be numeric.\n#'   - A sparse matrix from the `Matrix` package, either as `dgCMatrix` or `dgRMatrix` class.\n#'\n#'   Note that categorical features are only supported for `data.frame` inputs, and are automatically\n#'   determined based on their types. See [xgb.train()] with [xgb.DMatrix()] for more flexible\n#'   variants that would allow something like categorical features on sparse matrices.\n#' @param y The response variable. Allowed values are:\n#'   - A numeric or integer vector (for regression tasks).\n#'   - A factor or character vector (for binary and multi-class classification tasks).\n#'   - A logical (boolean) vector (for binary classification tasks).\n#'   - A numeric or integer matrix or `data.frame` with numeric/integer columns\n#'     (for multi-task regression tasks).\n#'   - A `Surv` object from the 'survival' package (for survival tasks).\n#'\n#'   If `objective` is `NULL`, the right task will be determined automatically based on\n#'   the class of `y`.\n#'\n#'   If `objective` is not `NULL`, it must match with the type of `y` - e.g. `factor` types of `y`\n#'   can only be used with classification objectives and vice-versa.\n#'\n#'   For binary classification, the last factor level of `y` will be used as the \"positive\"\n#'   class - that is, the numbers from `predict` will reflect the probabilities of belonging to this\n#'   class instead of to the first factor level. If `y` is a `logical` vector, then `TRUE` will be\n#'   set as the last level.\n#' @param objective Optimization objective to minimize based on the supplied data, to be passed\n#' by name as a string / character (e.g. `reg:absoluteerror`). See the\n#' [Learning Task Parameters](https://xgboost.readthedocs.io/en/stable/parameter.html#learning-task-parameters)\n#' page and the [xgb.params()] documentation for more detailed information on allowed values.\n#'\n#' If `NULL` (the default), will be automatically determined from `y` according to the following\n#' logic:\n#' - If `y` is a factor with 2 levels, will use `binary:logistic`.\n#' - If `y` is a factor with more than 2 levels, will use `multi:softprob` (number of classes\n#'   will be determined automatically, should not be passed under `params`).\n#' - If `y` is a `Surv` object from the `survival` package, will use `survival:aft` (note that\n#'   the only types supported are left / right / interval censored).\n#' - Otherwise, will use `reg:squarederror`.\n#'\n#' If `objective` is not `NULL`, it must match with the type of `y` - e.g. `factor` types of `y`\n#' can only be used with classification objectives and vice-versa.\n#'\n#' Note that not all possible `objective` values supported by the core XGBoost library are allowed\n#' here - for example, objectives which are a variation of another but with a different default\n#' prediction type (e.g. `multi:softmax` vs. `multi:softprob`) are not allowed, and neither are\n#' ranking objectives, nor custom objectives at the moment.\n#'\n#' Supported values are:\n#' - `\"reg:squarederror\"`: regression with squared loss.\n#' - `\"reg:squaredlogerror\"`: regression with squared log loss \\eqn{\\frac{1}{2}[log(pred + 1) - log(label + 1)]^2}.  All input labels are required to be greater than -1.  Also, see metric `rmsle` for possible issue  with this objective.\n#' - `\"reg:pseudohubererror\"`: regression with Pseudo Huber loss, a twice differentiable alternative to absolute loss.\n#' - `\"reg:absoluteerror\"`: Regression with L1 error. When tree model is used, leaf value is refreshed after tree construction. If used in distributed training, the leaf value is calculated as the mean value from all workers, which is not guaranteed to be optimal.\n#' - `\"reg:quantileerror\"`: Quantile loss, also known as \"pinball loss\". See later sections for its parameter and [Quantile Regression](https://xgboost.readthedocs.io/en/latest/python/examples/quantile_regression.html#sphx-glr-python-examples-quantile-regression-py) for a worked example.\n#' - `\"reg:expectileerror\"`: Expectile loss. See later sections for its parameter.\n#' - `\"binary:logistic\"`: logistic regression for binary classification, output probability\n#' - `\"binary:hinge\"`: hinge loss for binary classification. This makes predictions of 0 or 1, rather than producing probabilities.\n#' - `\"count:poisson\"`: Poisson regression for count data, output mean of Poisson distribution.\n#'   `\"max_delta_step\"` is set to 0.7 by default in Poisson regression (used to safeguard optimization)\n#' - `\"survival:cox\"`: Cox regression for right censored survival time data (negative values are considered right censored).\n#'\n#'   Note that predictions are returned on the hazard ratio scale (i.e., as HR = exp(marginal_prediction) in the proportional hazard function `h(t) = h0(t) * HR`).\n#' - `\"survival:aft\"`: Accelerated failure time model for censored survival time data.\n#' See [Survival Analysis with Accelerated Failure Time](https://xgboost.readthedocs.io/en/latest/tutorials/aft_survival_analysis.html) for details.\n#' - `\"multi:softprob\"`: multi-class classification throgh multinomial logistic likelihood.\n#' - `\"reg:gamma\"`: gamma regression with log-link. Output is a mean of gamma distribution. It might be useful, e.g., for modeling insurance claims severity, or for any outcome that might be [gamma-distributed](https://en.wikipedia.org/wiki/Gamma_distribution#Occurrence_and_applications).\n#' - `\"reg:tweedie\"`: Tweedie regression with log-link. It might be useful, e.g., for modeling total loss in insurance, or for any outcome that might be [Tweedie-distributed](https://en.wikipedia.org/wiki/Tweedie_distribution#Occurrence_and_applications).\n#'\n#' The following values are \\bold{NOT} supported by `xgboost`, but are supported by [xgb.train()]\n#' (see [xgb.params()] for details):\n#' - `\"reg:logistic\"`\n#' - `\"binary:logitraw\"`\n#' - `\"multi:softmax\"`\n#' - `\"rank:ndcg\"`\n#' - `\"rank:map\"`\n#' - `\"rank:pairwise\"`\n#' @param nrounds Number of boosting iterations / rounds.\n#'\n#'   Note that the number of default boosting rounds here is not automatically tuned, and different\n#'   problems will have vastly different optimal numbers of boosting rounds.\n#' @param weights Sample weights for each row in `x` and `y`. If `NULL` (the default), each row\n#'   will have the same weight.\n#'\n#'   If not `NULL`, should be passed as a numeric vector with length matching to the number of rows in `x`.\n#' @param verbosity Verbosity of printing messages. Valid values of 0 (silent), 1 (warning),\n#'   2 (info), and 3 (debug).\n#' @param monitor_training Whether to monitor objective optimization progress on the input data.\n#' Note that same 'x' and 'y' data are used for both model fitting and evaluation.\n#' @param eval_set Subset of the data to use as evaluation set. Can be passed as:\n#' - A vector of row indices (base-1 numeration) indicating the observations that are to be designed\n#'   as evaluation data.\n#' - A number between zero and one indicating a random fraction of the input data to use as\n#'   evaluation data. Note that the selection will be done uniformly at random, regardless of\n#'   argument `weights`.\n#'\n#' If passed, this subset of the data will be excluded from the training procedure, and the\n#' evaluation metric(s) supplied under `eval_metric` will be calculated on this dataset after each\n#' boosting iteration (pass `verbosity>0` to have these metrics printed during training). If\n#' `eval_metric` is not passed, a default metric will be selected according to `objective`.\n#'\n#' If passing a fraction, in classification problems, the evaluation set will be chosen in such a\n#' way that at least one observation of each class will be kept in the training data.\n#'\n#' For more elaborate evaluation variants (e.g. custom metrics, multiple evaluation sets, etc.),\n#' one might want to use [xgb.train()] instead.\n#' @param early_stopping_rounds Number of boosting rounds after which training will be stopped\n#' if there is no improvement in performance (as measured by the last metric passed under\n#' `eval_metric`, or by the default metric for the objective if `eval_metric` is not passed) on the\n#' evaluation data from `eval_set`. Must pass `eval_set` in order to use this functionality.\n#'\n#' If `NULL`, early stopping will not be used.\n#' @param print_every_n When passing `verbosity>0` and either `monitor_training=TRUE` or `eval_set`,\n#' evaluation logs (metrics calculated on the training and/or evaluation data) will be printed every\n#' nth iteration according to the value passed here. The first and last iteration are always\n#' included regardless of this 'n'.\n#'\n#' Only has an effect when passing `verbosity>0`.\n#' @param nthreads Number of parallel threads to use. If passing zero, will use all CPU threads.\n#' @param seed Seed to use for random number generation. If passing `NULL`, will draw a random\n#'   number using R's PRNG system to use as seed.\n#' @param monotone_constraints Optional monotonicity constraints for features.\n#'\n#'   Can be passed either as a named list (when `x` has column names), or as a vector. If passed\n#'   as a vector and `x` has column names, will try to match the elements by name.\n#'\n#'   A value of `+1` for a given feature makes the model predictions / scores constrained to be\n#'   a monotonically increasing function of that feature (that is, as the value of the feature\n#'   increases, the model prediction cannot decrease), while a value of `-1` makes it a monotonically\n#'   decreasing function. A value of zero imposes no constraint.\n#'\n#'   The input for `monotone_constraints` can be a subset of the columns of `x` if named, in which\n#'   case the columns that are not referred to in `monotone_constraints` will be assumed to have\n#'   a value of zero (no constraint imposed on the model for those features).\n#'\n#'   See the tutorial [Monotonic Constraints](https://xgboost.readthedocs.io/en/stable/tutorials/monotonic.html)\n#'   for a more detailed explanation.\n#' @param interaction_constraints Constraints for interaction representing permitted interactions.\n#'   The constraints must be specified in the form of a list of vectors referencing columns in the\n#'   data, e.g. `list(c(1, 2), c(3, 4, 5))` (with these numbers being column indices, numeration\n#'   starting at 1 - i.e. the first sublist references the first and second columns) or\n#'   `list(c(\"Sepal.Length\", \"Sepal.Width\"), c(\"Petal.Length\", \"Petal.Width\"))` (references\n#'   columns by names), where each vector is a group of indices of features that are allowed to\n#'   interact with each other.\n#'\n#'   See the tutorial [Feature Interaction Constraints](https://xgboost.readthedocs.io/en/stable/tutorials/feature_interaction_constraint.html)\n#'   for more information.\n#' @param feature_weights Feature weights for column sampling.\n#'\n#'   Can be passed either as a vector with length matching to columns of `x`, or as a named\n#'   list (only if `x` has column names) with names matching to columns of 'x'. If it is a\n#'   named vector, will try to match the entries to column names of `x` by name.\n#'\n#'   If `NULL` (the default), all columns will have the same weight.\n#' @param base_margin Base margin used for boosting from existing model.\n#'\n#'   If passing it, will start the gradient boosting procedure from the scores that are provided\n#'   here - for example, one can pass the raw scores from a previous model, or some per-observation\n#'   offset, or similar.\n#'\n#'   Should be either a numeric vector or numeric matrix (for multi-class and multi-target objectives)\n#'   with the same number of rows as `x` and number of columns corresponding to number of optimization\n#'   targets, and should be in the untransformed scale (for example, for objective `binary:logistic`,\n#'   it should have log-odds, not probabilities; and for objective `multi:softprob`, should have\n#'   number of columns matching to number of classes in the data).\n#'\n#'   Note that, if it contains more than one column, then columns will not be matched by name to\n#'   the corresponding `y` - `base_margin` should have the same column order that the model will use\n#'   (for example, for objective `multi:softprob`, columns of `base_margin` will be matched against\n#'   `levels(y)` by their position, regardless of what `colnames(base_margin)` returns).\n#'\n#'   If `NULL`, will start from zero, but note that for most objectives, an intercept is usually\n#'   added (controllable through parameter `base_score` instead) when `base_margin` is not passed.\n#' @param min_split_loss (for Tree Booster) (default=0, alias: `gamma`)\n#' Minimum loss reduction required to make a further partition on a leaf node of the tree. The larger `min_split_loss` is, the more conservative the algorithm will be. Note that a tree where no splits were made might still contain a single terminal node with a non-zero score.\n#'\n#' range: \\eqn{[0, \\infty)}\n#' @param learning_rate (alias: `eta`)\n#' Step size shrinkage used in update to prevent overfitting. After each boosting step, we can directly get the weights of new features, and `learning_rate` shrinks the feature weights to make the boosting process more conservative.\n#' - range: \\eqn{[0,1]}\n#' - default value: 0.3 for tree-based boosters, 0.5 for linear booster.\n#' @param reg_lambda (alias: `lambda`)\n#' - For tree-based boosters:\n#'   - L2 regularization term on weights. Increasing this value will make model more conservative.\n#'   - default: 1\n#'   - range: \\eqn{[0, \\infty]}\n#' - For linear booster:\n#'   - L2 regularization term on weights. Increasing this value will make model more conservative. Normalised to number of training examples.\n#'   - default: 0\n#'   - range: \\eqn{[0, \\infty)}\n#' @param reg_alpha (alias: `reg_alpha`)\n#' - L1 regularization term on weights. Increasing this value will make model more conservative.\n#' - For the linear booster, it's normalised to number of training examples.\n#' - default: 0\n#' - range: \\eqn{[0, \\infty)}\n#' @param updater (for Linear Booster) (default= `\"shotgun\"`)\n#' Choice of algorithm to fit linear model\n#' - `\"shotgun\"`: Parallel coordinate descent algorithm based on shotgun algorithm. Uses 'hogwild' parallelism and therefore produces a nondeterministic solution on each run.\n#' - `\"coord_descent\"`: Ordinary coordinate descent algorithm. Also multithreaded but still produces a deterministic solution. When the `device` parameter is set to `\"cuda\"` or `\"gpu\"`, a GPU variant would be used.\n#' @inheritParams xgb.params\n#' @inheritParams xgb.train\n#' @return A model object, inheriting from both `xgboost` and `xgb.Booster`. Compared to the regular\n#'   `xgb.Booster` model class produced by [xgb.train()], this `xgboost` class will have an\n#'   additional attribute `metadata` containing information which is used for formatting prediction\n#'   outputs, such as class names for classification problems.\n#'\n#' @examples\n#' data(mtcars)\n#'\n#' # Fit a small regression model on the mtcars data\n#' model_regression <- xgboost(mtcars[, -1], mtcars$mpg, nthreads = 1, nrounds = 3)\n#' predict(model_regression, mtcars, validate_features = TRUE)\n#'\n#' # Task objective is determined automatically according to the type of 'y'\n#' data(iris)\n#' model_classif <- xgboost(iris[, -5], iris$Species, nthreads = 1, nrounds = 5)\n#' predict(model_classif, iris[1:10,])\n#' predict(model_classif, iris[1:10,], type = \"class\")\n#'\n#' # Can nevertheless choose a non-default objective if needed\n#' model_poisson <- xgboost(\n#'   mtcars[, -1], mtcars$mpg,\n#'   objective = \"count:poisson\",\n#'   nthreads = 1,\n#'   nrounds = 3\n#' )\n#'\n#' # Can calculate evaluation metrics during boosting rounds\n#' data(ToothGrowth)\n#' xgboost(\n#'   ToothGrowth[, c(\"len\", \"dose\")],\n#'   ToothGrowth$supp,\n#'   eval_metric = c(\"auc\", \"logloss\"),\n#'   eval_set = 0.2,\n#'   monitor_training = TRUE,\n#'   verbosity = 1,\n#'   nthreads = 1,\n#'   nrounds = 3\n#' )\nxgboost <- function(\n  x,\n  y,\n  objective = NULL,\n  nrounds = 100L,\n  max_depth = NULL,\n  learning_rate = NULL,\n  min_child_weight = NULL,\n  min_split_loss = NULL,\n  reg_lambda = NULL,\n  weights = NULL,\n  verbosity = if (is.null(eval_set)) 0L else 1L,\n  monitor_training = verbosity > 0,\n  eval_set = NULL,\n  early_stopping_rounds = NULL,\n  print_every_n = 1L,\n  eval_metric = NULL,\n  nthreads = parallel::detectCores(),\n  seed = 0L,\n  base_margin = NULL,\n  monotone_constraints = NULL,\n  interaction_constraints = NULL,\n  reg_alpha = NULL,\n  max_bin = NULL,\n  max_leaves = NULL,\n  booster = NULL,\n  subsample = NULL,\n  sampling_method = NULL,\n  feature_weights = NULL,\n  colsample_bytree = NULL,\n  colsample_bylevel = NULL,\n  colsample_bynode = NULL,\n  tree_method = NULL,\n  max_delta_step = NULL,\n  scale_pos_weight = NULL,\n  updater = NULL,\n  grow_policy = NULL,\n  num_parallel_tree = NULL,\n  multi_strategy = NULL,\n  base_score = NULL,\n  seed_per_iteration = NULL,\n  device = NULL,\n  disable_default_eval_metric = NULL,\n  use_rmm = NULL,\n  max_cached_hist_node = NULL,\n  max_cat_to_onehot = NULL,\n  max_cat_threshold = NULL,\n  sample_type = NULL,\n  normalize_type = NULL,\n  rate_drop = NULL,\n  one_drop = NULL,\n  skip_drop = NULL,\n  feature_selector = NULL,\n  top_k = NULL,\n  tweedie_variance_power = NULL,\n  huber_slope = NULL,\n  quantile_alpha = NULL,\n  expectile_alpha = NULL,\n  aft_loss_distribution = NULL,\n  ...\n) {\n# nolint end\n  check.deprecation(deprecated_xgboost_params, match.call(), ...)\n  params <- as.list(environment())\n  params <- params[\n    (names(params) %in% formalArgs(xgb.params))\n    & !sapply(params, is.null)\n    & !(names(params) %in% c( # these undergo additional processing here\n      \"objective\", \"base_margin\", \"monotone_constraints\", \"interaction_constraints\"\n    ))\n  ]\n\n  prescreen.objective(objective)\n  use_qdm <- check.can.use.qdm(x, params, eval_set)\n  lst_args <- process.y.margin.and.objective(y, base_margin, objective, params)\n  lst_args <- process.row.weights(weights, lst_args)\n  lst_args <- process.x.and.col.args(\n    x,\n    monotone_constraints,\n    interaction_constraints,\n    feature_weights,\n    lst_args,\n    use_qdm\n  )\n  eval_set <- process.eval.set(eval_set, lst_args)\n\n  if (use_qdm && hasName(params, \"max_bin\")) {\n    lst_args$dmatrix_args$max_bin <- params$max_bin\n  }\n\n  nthreads <- check.nthreads(nthreads)\n  lst_args$dmatrix_args$nthread <- nthreads\n  lst_args$params$nthread <- nthreads\n\n  params <- c(lst_args$params, params)\n  params$verbosity <- verbosity\n\n  fn_dm <- if (use_qdm) xgb.QuantileDMatrix else xgb.DMatrix\n  dm <- do.call(fn_dm, lst_args$dmatrix_args)\n  if (!is.null(eval_set)) {\n    dm_eval <- xgb.slice.DMatrix(dm, eval_set$idx_eval)\n    dm <- xgb.slice.DMatrix(dm, eval_set$idx_train)\n  }\n  evals <- list()\n  if (monitor_training) {\n    evals <- list(train = dm)\n  }\n  if (!is.null(eval_set)) {\n    evals <- c(evals, list(eval = dm_eval))\n  }\n  model <- xgb.train(\n    params = params,\n    data = dm,\n    nrounds = nrounds,\n    verbose = verbosity,\n    print_every_n = print_every_n,\n    evals = evals\n  )\n  attributes(model)$metadata <- lst_args$metadata\n  attributes(model)$call <- match.call()\n  class(model) <- c(\"xgboost\", class(model))\n  return(model)\n}\n\n#' @title Compute predictions from XGBoost model on new data\n#' @description Predict values on data based on XGBoost model.\n#' @param object An XGBoost model object of class `xgboost`, as produced by function [xgboost()].\n#'\n#' Note that there is also a lower-level [predict.xgb.Booster()] method for models of class\n#' `xgb.Booster` as produced by [xgb.train()], which can also be used for `xgboost` class models as\n#' an alternative that performs fewer validations and post-processings.\n#' @param newdata Data on which to compute predictions from the model passed in `object`. Supported\n#' input classes are:\n#' - Data Frames (class `data.frame` from base R and subclasses like `data.table`).\n#' - Matrices (class `matrix` from base R).\n#' - Sparse matrices from package `Matrix`, either as class `dgRMatrix` (CSR) or `dgCMatrix` (CSC).\n#' - Sparse vectors from package `Matrix`, which will be interpreted as containing a single\n#'   observation.\n#'\n#' In the case of data frames, if there are any categorical features, they should be of class\n#' `factor` and should have the same levels as the `factor` columns of the data from which the model\n#' was constructed. Any columns with type other than `factor` will be interpreted as numeric.\n#'\n#' If there are named columns and the model was fitted to data with named columns, they will be\n#' matched by name by default (see `validate_features`).\n#' @param type Type of prediction to make. Supported options are:\n#' - `\"response\"`: will output model predictions on the scale of the response variable (e.g.\n#'  probabilities of belonging to the last class in the case of binary classification). Result will\n#'  be either a numeric vector with length matching to rows in `newdata`, or a numeric matrix with\n#'  shape `[nrows(newdata), nscores]` (for objectives that produce more than one score per\n#'  observation such as multi-class classification or multi-quantile regression).\n#' - `\"raw\"`: will output the unprocessed boosting scores (e.g. log-odds in the case of objective\n#'   `binary:logistic`). Same output shape and type as for `\"response\"`.\n#' - `\"class\"`: will output the class with the highest predicted probability, returned as a `factor`\n#'   (only applicable to classification objectives) with length matching to rows in `newdata`.\n#' - `\"leaf\"`: will output the terminal node indices of each observation across each tree, as an\n#'   integer matrix of shape `[nrows(newdata), ntrees]`, or as an integer array with an extra one or\n#'   two dimensions, up to `[nrows(newdata), ntrees, nscores, n_parallel_trees]` for models that\n#'   produce more than one score per tree and/or which have more than one parallel tree (e.g.\n#'   random forests).\n#'\n#'   Only applicable to tree-based boosters (not `gblinear`).\n#' - `\"contrib\"`: will produce per-feature contribution estimates towards the model score for a\n#'   given observation, based on SHAP values. The contribution values are on the scale of\n#'   untransformed margin (e.g., for binary classification, the values are log-odds deviations from\n#'   the baseline).\n#'\n#'   Output will be a numeric matrix with shape `[nrows, nfeatures+1]`, with the intercept being the\n#'   last feature, or a numeric array with shape `[nrows, nscores, nfeatures+1]` if the model\n#'   produces more than one score per observation.\n#' - `\"interaction\"`: similar to `\"contrib\"`, but computing SHAP values of contributions of\n#'   interaction of each pair of features. Note that this operation might be rather expensive in\n#'   terms of compute and memory.\n#'\n#'   Since it quadratically depends on the number of features, it is recommended to perform\n#'   selection of the most important features first.\n#'\n#'   Output will be a numeric array of shape `[nrows, nfeatures+1, nfeatures+1]`, or shape\n#'   `[nrows, nscores, nfeatures+1, nfeatures+1]` (for objectives that produce more than one score\n#'   per observation).\n#' @param base_margin Base margin used for boosting from existing model (raw score that gets added to\n#' all observations independently of the trees in the model).\n#'\n#' If supplied, should be either a vector with length equal to the number of rows in `newdata`\n#' (for objectives which produces a single score per observation), or a matrix with number of\n#' rows matching to the number rows in `newdata` and number of columns matching to the number\n#' of scores estimated by the model (e.g. number of classes for multi-class classification).\n#' @param iteration_range Sequence of rounds/iterations from the model to use for prediction, specified by passing\n#' a two-dimensional vector with the start and end numbers in the sequence (same format as R's `seq` - i.e.\n#' base-1 indexing, and inclusive of both ends).\n#'\n#' For example, passing `c(1,20)` will predict using the first twenty iterations, while passing `c(1,1)` will\n#' predict using only the first one.\n#'\n#' If passing `NULL`, will either stop at the best iteration if the model used early stopping, or use all\n#' of the iterations (rounds) otherwise.\n#'\n#' If passing \"all\", will use all of the rounds regardless of whether the model had early stopping or not.\n#'\n#' Not applicable to `gblinear` booster.\n#' @param validate_features Validate that the feature names in the data match to the feature names\n#' in the column, and reorder them in the data otherwise.\n#'\n#' If passing `FALSE`, it is assumed that the feature names and types are the same,\n#' and come in the same order as in the training data.\n#'\n#' Be aware that this only applies to column names and not to factor levels in categorical columns.\n#'\n#' Note that this check might add some sizable latency to the predictions, so it's\n#' recommended to disable it for performance-sensitive applications.\n#' @param ... Not used.\n#' @return Either a numeric vector (for 1D outputs), numeric matrix (for 2D outputs), numeric array\n#' (for 3D and higher), or `factor` (for class predictions). See documentation for parameter `type`\n#' for details about what the output type and shape will be.\n#' @method predict xgboost\n#' @export\n#' @examples\n#' data(\"ToothGrowth\")\n#' y <- ToothGrowth$supp\n#' x <- ToothGrowth[, -2L]\n#' model <- xgboost(x, y, nthreads = 1L, nrounds = 3L, max_depth = 2L)\n#' pred_prob <- predict(model, x[1:5, ], type = \"response\")\n#' pred_raw <- predict(model, x[1:5, ], type = \"raw\")\n#' pred_class <- predict(model, x[1:5, ], type = \"class\")\n#'\n#' # Relationships between these\n#' manual_probs <- 1 / (1 + exp(-pred_raw))\n#' manual_class <- ifelse(manual_probs < 0.5, levels(y)[1], levels(y)[2])\n#'\n#' # They should match up to numerical precision\n#' round(pred_prob, 6) == round(manual_probs, 6)\n#' pred_class == manual_class\npredict.xgboost <- function(\n  object,\n  newdata,\n  type = \"response\",\n  base_margin = NULL,\n  iteration_range = NULL,\n  validate_features = TRUE,\n  ...\n) {\n  if (inherits(newdata, \"xgb.DMatrix\")) {\n    stop(\n      \"Predictions on 'xgb.DMatrix' objects are not supported with 'xgboost' class.\",\n      \" Try 'xgb.train' or 'predict.xgb.Booster'.\"\n    )\n  }\n\n  outputmargin <- FALSE\n  predleaf <- FALSE\n  predcontrib <- FALSE\n  predinteraction <- FALSE\n  pred_class <- FALSE\n  strict_shape <- FALSE\n  allowed_types <- c(\n    \"response\",\n    \"raw\",\n    \"class\",\n    \"leaf\",\n    \"contrib\",\n    \"interaction\"\n  )\n  type <- head(type, 1L)\n  if (!is.character(type) || !(type %in% allowed_types)) {\n    stop(\"'type' must be one of: \", paste(allowed_types, collapse = \", \"))\n  }\n\n  if (type != \"response\")  {\n    switch(\n      type,\n      \"raw\" = {\n        outputmargin <- TRUE\n      }, \"class\" = {\n        if (is.null(attributes(object)$metadata$y_levels)) {\n          stop(\"Prediction type 'class' is only for classification objectives.\")\n        }\n        pred_class <- TRUE\n        outputmargin <- TRUE\n      }, \"leaf\" = {\n        predleaf <- TRUE\n        strict_shape <- TRUE # required for 3D and 4D outputs\n      }, \"contrib\" = {\n        predcontrib <- TRUE\n      }, \"interaction\" = {\n        predinteraction <- TRUE\n      }\n    )\n  }\n  out <- predict.xgb.Booster(\n    object,\n    newdata,\n    outputmargin = outputmargin,\n    predleaf = predleaf,\n    predcontrib = predcontrib,\n    predinteraction = predinteraction,\n    iterationrange = iteration_range,\n    strict_shape = strict_shape,\n    validate_features = validate_features,\n    base_margin = base_margin\n  )\n\n  if (strict_shape) {\n    # Should only end up here for leaf predictions\n    out_dims <- dim(out)\n    dims_remove <- integer()\n    if (out_dims[3L] == 1L) {\n      dims_remove <- c(dims_remove, -3L)\n    }\n    if (length(out_dims) >= 4L && out_dims[4L] == 1L) {\n      dims_remove <- c(dims_remove, -4L)\n    }\n    if (length(dims_remove)) {\n      new_dimnames <- dimnames(out)[dims_remove]\n      dim(out) <- out_dims[dims_remove]\n      dimnames(out) <- new_dimnames\n    }\n  }\n\n  if (pred_class) {\n\n    if (is.null(dim(out))) {\n      out <- as.integer(out >= 0) + 1L\n    } else {\n      out <- max.col(out, ties.method = \"first\")\n    }\n    attr_out <- attributes(out)\n    attr_out$class <- \"factor\"\n    attr_out$levels <- attributes(object)$metadata$y_levels\n    attributes(out) <- attr_out\n\n  } else if (NCOL(out) > 1L || (strict_shape && length(dim(out)) >= 3L)) {\n\n    names_use <- NULL\n    if (NROW(attributes(object)$metadata$y_levels) > 2L) {\n      names_use <- attributes(object)$metadata$y_levels\n    } else if (NROW(attributes(object)$metadata$y_names)) {\n      names_use <- attributes(object)$metadata$y_names\n    } else if (NROW(attributes(object)$params$quantile_alpha) > 1L) {\n      names_use <- paste0(\"q\", attributes(object)$params$quantile_alpha)\n      if (anyDuplicated(names_use)) {\n        warning(\"Cannot add quantile names to output due to clashes in their character conversions\")\n        names_use <- NULL\n      }\n    } else if (NROW(attributes(object)$params$expectile_alpha) > 1L) {\n      names_use <- paste0(\"e\", attributes(object)$params$expectile_alpha)\n      if (anyDuplicated(names_use)) {\n        warning(\"Cannot add expectile names to output due to clashes in their character conversions\")\n        names_use <- NULL\n      }\n    }\n    if (NROW(names_use)) {\n      dimnames_out <- dimnames(out)\n      dim_with_names <- if (type == \"leaf\") 3L else 2L\n      dimnames_out[[dim_with_names]] <- names_use\n      .Call(XGSetArrayDimNamesInplace_R, out, dimnames_out)\n    }\n\n  }\n\n  return(out)\n}\n\n#' @title Print info from XGBoost model\n#' @description Prints basic properties of an XGBoost model object.\n#' @param x An XGBoost model object of class `xgboost`, as produced by function [xgboost()].\n#' @param ... Not used.\n#' @return Same object `x`, after printing its info.\n#' @method print xgboost\n#' @export\nprint.xgboost <- function(x, ...) {\n  cat(\"XGBoost model object\\n\")\n  cat(\"Call:\\n  \")\n  print(attributes(x)$call)\n  cat(\"Objective: \", attributes(x)$params$objective, \"\\n\", sep = \"\")\n  cat(\"Number of iterations: \", xgb.get.num.boosted.rounds(x), \"\\n\", sep = \"\")\n  cat(\"Number of features: \", xgb.num_feature(x), \"\\n\", sep = \"\")\n\n  printable_head <- function(v) {\n    v_sub <- utils::head(v, 5L)\n    return(\n      sprintf(\n        \"%s%s\",\n        paste(v_sub, collapse = \", \"),\n        ifelse(length(v_sub) < length(v), \", ...\", \"\")\n      )\n    )\n  }\n\n  if (NROW(attributes(x)$metadata$y_levels)) {\n    cat(\n      \"Classes: \",\n      printable_head(attributes(x)$metadata$y_levels),\n      \"\\n\",\n      sep = \"\"\n    )\n  } else if (NROW(attributes(x)$params$quantile_alpha)) {\n    cat(\n      \"Prediction quantile\",\n      ifelse(length(attributes(x)$params$quantile_alpha) > 1L, \"s\", \"\"),\n      \": \",\n      printable_head(attributes(x)$params$quantile_alpha),\n      \"\\n\",\n      sep = \"\"\n    )\n  } else if (NROW(attributes(x)$params$expectile_alpha)) {\n    cat(\n      \"Prediction expectile\",\n      ifelse(length(attributes(x)$params$expectile_alpha) > 1L, \"s\", \"\"),\n      \": \",\n      printable_head(attributes(x)$params$expectile_alpha),\n      \"\\n\",\n      sep = \"\"\n    )\n  } else if (NROW(attributes(x)$metadata$y_names)) {\n    cat(\n      \"Prediction targets: \",\n      printable_head(attributes(x)$metadata$y_names),\n      \"\\n\",\n      sep = \"\"\n    )\n  } else if (attributes(x)$metadata$n_targets > 1L) {\n    cat(\n      \"Number of predition targets: \",\n      attributes(x)$metadata$n_targets,\n      \"\\n\",\n      sep = \"\"\n    )\n  }\n\n  return(x)\n}\n\n\n#' Training part from Mushroom Data Set\n#'\n#' This data set is originally from the Mushroom data set,\n#' UCI Machine Learning Repository.\n#'\n#' It includes the following fields:\n#'  - `label`: The label for each record.\n#'  - `data`: A sparse Matrix of 'dgCMatrix' class with 126 columns.\n#'\n#' @references\n#' <https://archive.ics.uci.edu/ml/datasets/Mushroom>\n#'\n#' Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository\n#' <http://archive.ics.uci.edu/ml>. Irvine, CA: University of California,\n#' School of Information and Computer Science.\n#'\n#' @docType data\n#' @keywords datasets\n#' @name agaricus.train\n#' @usage data(agaricus.train)\n#' @format A list containing a label vector, and a dgCMatrix object with 6513\n#' rows and 127 variables\nNULL\n\n#' Test part from Mushroom Data Set\n#'\n#' This data set is originally from the Mushroom data set,\n#' UCI Machine Learning Repository.\n#'\n#' It includes the following fields:\n#'  - `label`: The label for each record.\n#'  - `data`: A sparse Matrix of 'dgCMatrix' class with 126 columns.\n#'\n#' @references\n#' <https://archive.ics.uci.edu/ml/datasets/Mushroom>\n#'\n#' Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository\n#' <http://archive.ics.uci.edu/ml>. Irvine, CA: University of California,\n#' School of Information and Computer Science.\n#'\n#' @docType data\n#' @keywords datasets\n#' @name agaricus.test\n#' @usage data(agaricus.test)\n#' @format A list containing a label vector, and a dgCMatrix object with 1611\n#' rows and 126 variables\nNULL\n\n# Various imports\n#' @importClassesFrom Matrix dgCMatrix dgRMatrix CsparseMatrix\n#' @importFrom Matrix sparse.model.matrix\n#' @importFrom data.table data.table\n#' @importFrom data.table is.data.table\n#' @importFrom data.table as.data.table\n#' @importFrom data.table :=\n#' @importFrom data.table rbindlist\n#' @importFrom data.table setkey\n#' @importFrom data.table setkeyv\n#' @importFrom data.table setnames\n#' @importFrom jsonlite fromJSON\n#' @importFrom jsonlite toJSON\n#' @importFrom methods new\n#' @importFrom utils object.size str tail\n#' @importFrom stats coef\n#' @importFrom stats predict\n#' @importFrom stats median\n#' @importFrom stats sd\n#' @importFrom stats variable.names\n#' @importFrom utils head\n#' @importFrom utils hasName\n#' @importFrom graphics barplot\n#' @importFrom graphics lines\n#' @importFrom graphics points\n#' @importFrom graphics grid\n#' @importFrom graphics par\n#' @importFrom graphics title\n#' @importFrom grDevices rgb\n#'\n#' @import methods\n#' @useDynLib xgboost, .registration = TRUE\nNULL\n"
  },
  {
    "path": "R-package/README.md",
    "content": "XGBoost R Package\n=================\n\n[![CRAN Status Badge](http://www.r-pkg.org/badges/version/xgboost)](https://cran.r-project.org/web/packages/xgboost)\n[![CRAN Downloads](http://cranlogs.r-pkg.org/badges/xgboost)](https://cran.rstudio.com/web/packages/xgboost/index.html)\n[![Documentation Status](https://readthedocs.org/projects/xgboost/badge/?version=latest)](https://xgboost.readthedocs.org/en/latest/R-package/index.html)\n\nResources\n---------\n* [XGBoost R Package Online Documentation](https://xgboost.readthedocs.org/en/stable/R-package/index.html)\n  - Check this out for detailed documents, examples and tutorials.\n\nInstallation\n------------\n\nWe are [on CRAN](https://cran.r-project.org/web/packages/xgboost/index.html) now. For stable/pre-compiled(for Windows and OS X) version, please install from CRAN:\n\n```r\ninstall.packages('xgboost')\n```\n\nFor more detailed installation instructions, please see [here](https://xgboost.readthedocs.io/en/stable/install.html).\n\nDevelopment\n-----------\n\n* See the [R Package section](https://xgboost.readthedocs.io/en/latest/contrib/coding_guide.html#r-coding-guideline) of the contributors guide.\n"
  },
  {
    "path": "R-package/bootstrap.R",
    "content": "## Script used to bootstrap R-universe build.\n\n## Execute git commands to initialize git submodules\nsystem(\"git submodule init\")\nsystem(\"git submodule update\")\n\n## core\nfile.copy(\"../src\", \"./src/\", recursive = TRUE)\nfile.copy(\"../include\", \"./src/\", recursive = TRUE)\nfile.copy(\"../amalgamation\", \"./src/\", recursive = TRUE)\n\n## dmlc-core\ndir.create(\"./src/dmlc-core\")\nfile.copy(\"../dmlc-core/include\", \"./src/dmlc-core/\", recursive = TRUE)\nfile.copy(\"../dmlc-core/src\", \"./src/dmlc-core/\", recursive = TRUE)\n\npkgroot <- function(path) {\n  ## read the file from path, replace the PKGROOT=../../ with PKGROOT=.\n  lines <- readLines(path)\n  lines <- gsub(\"PKGROOT=../../\", \"PKGROOT=.\", lines, fixed = TRUE)\n  writeLines(lines, path)\n}\n\n## makefile and license\nfile.copy(\"../LICENSE\", \"./LICENSE\")\npkgroot(\"./src/Makevars.in\")\npkgroot(\"./src/Makevars.win.in\")\n\n## misc\npath <- file.path(\"remove_warning_suppression_pragma.sh\")\nfile.remove(path)\npath <- file.path(\"CMakeLists.txt\")\nfile.remove(path)\n\n## remove the directory recursively ./tests/helper_scripts\nunlink(\"tests/helper_scripts\", recursive = TRUE)\n"
  },
  {
    "path": "R-package/cleanup",
    "content": "#!/bin/sh\n\nrm -f src/Makevars\n"
  },
  {
    "path": "R-package/configure",
    "content": "#! /bin/sh\n# Guess values for system-dependent variables and create Makefiles.\n# Generated by GNU Autoconf 2.71 for xgboost 3.3.0.\n#\n#\n# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,\n# Inc.\n#\n#\n# This configure script is free software; the Free Software Foundation\n# gives unlimited permission to copy, distribute and modify it.\n## -------------------- ##\n## M4sh Initialization. ##\n## -------------------- ##\n\n# Be more Bourne compatible\nDUALCASE=1; export DUALCASE # for MKS sh\nas_nop=:\nif test ${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1\nthen :\n  emulate sh\n  NULLCMD=:\n  # Pre-4.2 versions of Zsh do word splitting on ${1+\"$@\"}, which\n  # is contrary to our usage.  Disable this feature.\n  alias -g '${1+\"$@\"}'='\"$@\"'\n  setopt NO_GLOB_SUBST\nelse $as_nop\n  case `(set -o) 2>/dev/null` in #(\n  *posix*) :\n    set -o posix ;; #(\n  *) :\n     ;;\nesac\nfi\n\n\n\n# Reset variables that may have inherited troublesome values from\n# the environment.\n\n# IFS needs to be set, to space, tab, and newline, in precisely that order.\n# (If _AS_PATH_WALK were called with IFS unset, it would have the\n# side effect of setting IFS to empty, thus disabling word splitting.)\n# Quoting is to prevent editors from complaining about space-tab.\nas_nl='\n'\nexport as_nl\nIFS=\" \"\"\t$as_nl\"\n\nPS1='$ '\nPS2='> '\nPS4='+ '\n\n# Ensure predictable behavior from utilities with locale-dependent output.\nLC_ALL=C\nexport LC_ALL\nLANGUAGE=C\nexport LANGUAGE\n\n# We cannot yet rely on \"unset\" to work, but we need these variables\n# to be unset--not just set to an empty or harmless value--now, to\n# avoid bugs in old shells (e.g. pre-3.0 UWIN ksh).  This construct\n# also avoids known problems related to \"unset\" and subshell syntax\n# in other old shells (e.g. bash 2.01 and pdksh 5.2.14).\nfor as_var in BASH_ENV ENV MAIL MAILPATH CDPATH\ndo eval test \\${$as_var+y} \\\n  && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || :\ndone\n\n# Ensure that fds 0, 1, and 2 are open.\nif (exec 3>&0) 2>/dev/null; then :; else exec 0</dev/null; fi\nif (exec 3>&1) 2>/dev/null; then :; else exec 1>/dev/null; fi\nif (exec 3>&2)            ; then :; else exec 2>/dev/null; fi\n\n# The user is always right.\nif ${PATH_SEPARATOR+false} :; then\n  PATH_SEPARATOR=:\n  (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && {\n    (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 ||\n      PATH_SEPARATOR=';'\n  }\nfi\n\n\n# Find who we are.  Look in the path if we contain no directory separator.\nas_myself=\ncase $0 in #((\n  *[\\\\/]* ) as_myself=$0 ;;\n  *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR\nfor as_dir in $PATH\ndo\n  IFS=$as_save_IFS\n  case $as_dir in #(((\n    '') as_dir=./ ;;\n    */) ;;\n    *) as_dir=$as_dir/ ;;\n  esac\n    test -r \"$as_dir$0\" && as_myself=$as_dir$0 && break\n  done\nIFS=$as_save_IFS\n\n     ;;\nesac\n# We did not find ourselves, most probably we were run as `sh COMMAND'\n# in which case we are not to be found in the path.\nif test \"x$as_myself\" = x; then\n  as_myself=$0\nfi\nif test ! -f \"$as_myself\"; then\n  printf \"%s\\n\" \"$as_myself: error: cannot find myself; rerun with an absolute file name\" >&2\n  exit 1\nfi\n\n\n# Use a proper internal environment variable to ensure we don't fall\n  # into an infinite loop, continuously re-executing ourselves.\n  if test x\"${_as_can_reexec}\" != xno && test \"x$CONFIG_SHELL\" != x; then\n    _as_can_reexec=no; export _as_can_reexec;\n    # We cannot yet assume a decent shell, so we have to provide a\n# neutralization value for shells without unset; and this also\n# works around shells that cannot unset nonexistent variables.\n# Preserve -v and -x to the replacement shell.\nBASH_ENV=/dev/null\nENV=/dev/null\n(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV\ncase $- in # ((((\n  *v*x* | *x*v* ) as_opts=-vx ;;\n  *v* ) as_opts=-v ;;\n  *x* ) as_opts=-x ;;\n  * ) as_opts= ;;\nesac\nexec $CONFIG_SHELL $as_opts \"$as_myself\" ${1+\"$@\"}\n# Admittedly, this is quite paranoid, since all the known shells bail\n# out after a failed `exec'.\nprintf \"%s\\n\" \"$0: could not re-execute with $CONFIG_SHELL\" >&2\nexit 255\n  fi\n  # We don't want this to propagate to other subprocesses.\n          { _as_can_reexec=; unset _as_can_reexec;}\nif test \"x$CONFIG_SHELL\" = x; then\n  as_bourne_compatible=\"as_nop=:\nif test \\${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1\nthen :\n  emulate sh\n  NULLCMD=:\n  # Pre-4.2 versions of Zsh do word splitting on \\${1+\\\"\\$@\\\"}, which\n  # is contrary to our usage.  Disable this feature.\n  alias -g '\\${1+\\\"\\$@\\\"}'='\\\"\\$@\\\"'\n  setopt NO_GLOB_SUBST\nelse \\$as_nop\n  case \\`(set -o) 2>/dev/null\\` in #(\n  *posix*) :\n    set -o posix ;; #(\n  *) :\n     ;;\nesac\nfi\n\"\n  as_required=\"as_fn_return () { (exit \\$1); }\nas_fn_success () { as_fn_return 0; }\nas_fn_failure () { as_fn_return 1; }\nas_fn_ret_success () { return 0; }\nas_fn_ret_failure () { return 1; }\n\nexitcode=0\nas_fn_success || { exitcode=1; echo as_fn_success failed.; }\nas_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; }\nas_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; }\nas_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; }\nif ( set x; as_fn_ret_success y && test x = \\\"\\$1\\\" )\nthen :\n\nelse \\$as_nop\n  exitcode=1; echo positional parameters were not saved.\nfi\ntest x\\$exitcode = x0 || exit 1\nblah=\\$(echo \\$(echo blah))\ntest x\\\"\\$blah\\\" = xblah || exit 1\ntest -x / || exit 1\"\n  as_suggested=\"  as_lineno_1=\";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested\" as_lineno_1a=\\$LINENO\n  as_lineno_2=\";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested\" as_lineno_2a=\\$LINENO\n  eval 'test \\\"x\\$as_lineno_1'\\$as_run'\\\" != \\\"x\\$as_lineno_2'\\$as_run'\\\" &&\n  test \\\"x\\`expr \\$as_lineno_1'\\$as_run' + 1\\`\\\" = \\\"x\\$as_lineno_2'\\$as_run'\\\"' || exit 1\"\n  if (eval \"$as_required\") 2>/dev/null\nthen :\n  as_have_required=yes\nelse $as_nop\n  as_have_required=no\nfi\n  if test x$as_have_required = xyes && (eval \"$as_suggested\") 2>/dev/null\nthen :\n\nelse $as_nop\n  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR\nas_found=false\nfor as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH\ndo\n  IFS=$as_save_IFS\n  case $as_dir in #(((\n    '') as_dir=./ ;;\n    */) ;;\n    *) as_dir=$as_dir/ ;;\n  esac\n  as_found=:\n  case $as_dir in #(\n\t /*)\n\t   for as_base in sh bash ksh sh5; do\n\t     # Try only shells that exist, to save several forks.\n\t     as_shell=$as_dir$as_base\n\t     if { test -f \"$as_shell\" || test -f \"$as_shell.exe\"; } &&\n\t\t    as_run=a \"$as_shell\" -c \"$as_bourne_compatible\"\"$as_required\" 2>/dev/null\nthen :\n  CONFIG_SHELL=$as_shell as_have_required=yes\n\t\t   if as_run=a \"$as_shell\" -c \"$as_bourne_compatible\"\"$as_suggested\" 2>/dev/null\nthen :\n  break 2\nfi\nfi\n\t   done;;\n       esac\n  as_found=false\ndone\nIFS=$as_save_IFS\nif $as_found\nthen :\n\nelse $as_nop\n  if { test -f \"$SHELL\" || test -f \"$SHELL.exe\"; } &&\n\t      as_run=a \"$SHELL\" -c \"$as_bourne_compatible\"\"$as_required\" 2>/dev/null\nthen :\n  CONFIG_SHELL=$SHELL as_have_required=yes\nfi\nfi\n\n\n      if test \"x$CONFIG_SHELL\" != x\nthen :\n  export CONFIG_SHELL\n             # We cannot yet assume a decent shell, so we have to provide a\n# neutralization value for shells without unset; and this also\n# works around shells that cannot unset nonexistent variables.\n# Preserve -v and -x to the replacement shell.\nBASH_ENV=/dev/null\nENV=/dev/null\n(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV\ncase $- in # ((((\n  *v*x* | *x*v* ) as_opts=-vx ;;\n  *v* ) as_opts=-v ;;\n  *x* ) as_opts=-x ;;\n  * ) as_opts= ;;\nesac\nexec $CONFIG_SHELL $as_opts \"$as_myself\" ${1+\"$@\"}\n# Admittedly, this is quite paranoid, since all the known shells bail\n# out after a failed `exec'.\nprintf \"%s\\n\" \"$0: could not re-execute with $CONFIG_SHELL\" >&2\nexit 255\nfi\n\n    if test x$as_have_required = xno\nthen :\n  printf \"%s\\n\" \"$0: This script requires a shell more modern than all\"\n  printf \"%s\\n\" \"$0: the shells that I found on your system.\"\n  if test ${ZSH_VERSION+y} ; then\n    printf \"%s\\n\" \"$0: In particular, zsh $ZSH_VERSION has bugs and should\"\n    printf \"%s\\n\" \"$0: be upgraded to zsh 4.3.4 or later.\"\n  else\n    printf \"%s\\n\" \"$0: Please tell bug-autoconf@gnu.org about your system,\n$0: including any error possibly output before this\n$0: message. Then install a modern shell, or manually run\n$0: the script under such a shell if you do have one.\"\n  fi\n  exit 1\nfi\nfi\nfi\nSHELL=${CONFIG_SHELL-/bin/sh}\nexport SHELL\n# Unset more variables known to interfere with behavior of common tools.\nCLICOLOR_FORCE= GREP_OPTIONS=\nunset CLICOLOR_FORCE GREP_OPTIONS\n\n## --------------------- ##\n## M4sh Shell Functions. ##\n## --------------------- ##\n# as_fn_unset VAR\n# ---------------\n# Portably unset VAR.\nas_fn_unset ()\n{\n  { eval $1=; unset $1;}\n}\nas_unset=as_fn_unset\n\n\n# as_fn_set_status STATUS\n# -----------------------\n# Set $? to STATUS, without forking.\nas_fn_set_status ()\n{\n  return $1\n} # as_fn_set_status\n\n# as_fn_exit STATUS\n# -----------------\n# Exit the shell with STATUS, even in a \"trap 0\" or \"set -e\" context.\nas_fn_exit ()\n{\n  set +e\n  as_fn_set_status $1\n  exit $1\n} # as_fn_exit\n# as_fn_nop\n# ---------\n# Do nothing but, unlike \":\", preserve the value of $?.\nas_fn_nop ()\n{\n  return $?\n}\nas_nop=as_fn_nop\n\n# as_fn_mkdir_p\n# -------------\n# Create \"$as_dir\" as a directory, including parents if necessary.\nas_fn_mkdir_p ()\n{\n\n  case $as_dir in #(\n  -*) as_dir=./$as_dir;;\n  esac\n  test -d \"$as_dir\" || eval $as_mkdir_p || {\n    as_dirs=\n    while :; do\n      case $as_dir in #(\n      *\\'*) as_qdir=`printf \"%s\\n\" \"$as_dir\" | sed \"s/'/'\\\\\\\\\\\\\\\\''/g\"`;; #'(\n      *) as_qdir=$as_dir;;\n      esac\n      as_dirs=\"'$as_qdir' $as_dirs\"\n      as_dir=`$as_dirname -- \"$as_dir\" ||\n$as_expr X\"$as_dir\" : 'X\\(.*[^/]\\)//*[^/][^/]*/*$' \\| \\\n\t X\"$as_dir\" : 'X\\(//\\)[^/]' \\| \\\n\t X\"$as_dir\" : 'X\\(//\\)$' \\| \\\n\t X\"$as_dir\" : 'X\\(/\\)' \\| . 2>/dev/null ||\nprintf \"%s\\n\" X\"$as_dir\" |\n    sed '/^X\\(.*[^/]\\)\\/\\/*[^/][^/]*\\/*$/{\n\t    s//\\1/\n\t    q\n\t  }\n\t  /^X\\(\\/\\/\\)[^/].*/{\n\t    s//\\1/\n\t    q\n\t  }\n\t  /^X\\(\\/\\/\\)$/{\n\t    s//\\1/\n\t    q\n\t  }\n\t  /^X\\(\\/\\).*/{\n\t    s//\\1/\n\t    q\n\t  }\n\t  s/.*/./; q'`\n      test -d \"$as_dir\" && break\n    done\n    test -z \"$as_dirs\" || eval \"mkdir $as_dirs\"\n  } || test -d \"$as_dir\" || as_fn_error $? \"cannot create directory $as_dir\"\n\n\n} # as_fn_mkdir_p\n\n# as_fn_executable_p FILE\n# -----------------------\n# Test if FILE is an executable regular file.\nas_fn_executable_p ()\n{\n  test -f \"$1\" && test -x \"$1\"\n} # as_fn_executable_p\n# as_fn_append VAR VALUE\n# ----------------------\n# Append the text in VALUE to the end of the definition contained in VAR. Take\n# advantage of any shell optimizations that allow amortized linear growth over\n# repeated appends, instead of the typical quadratic growth present in naive\n# implementations.\nif (eval \"as_var=1; as_var+=2; test x\\$as_var = x12\") 2>/dev/null\nthen :\n  eval 'as_fn_append ()\n  {\n    eval $1+=\\$2\n  }'\nelse $as_nop\n  as_fn_append ()\n  {\n    eval $1=\\$$1\\$2\n  }\nfi # as_fn_append\n\n# as_fn_arith ARG...\n# ------------------\n# Perform arithmetic evaluation on the ARGs, and store the result in the\n# global $as_val. Take advantage of shells that can avoid forks. The arguments\n# must be portable across $(()) and expr.\nif (eval \"test \\$(( 1 + 1 )) = 2\") 2>/dev/null\nthen :\n  eval 'as_fn_arith ()\n  {\n    as_val=$(( $* ))\n  }'\nelse $as_nop\n  as_fn_arith ()\n  {\n    as_val=`expr \"$@\" || test $? -eq 1`\n  }\nfi # as_fn_arith\n\n# as_fn_nop\n# ---------\n# Do nothing but, unlike \":\", preserve the value of $?.\nas_fn_nop ()\n{\n  return $?\n}\nas_nop=as_fn_nop\n\n# as_fn_error STATUS ERROR [LINENO LOG_FD]\n# ----------------------------------------\n# Output \"`basename $0`: error: ERROR\" to stderr. If LINENO and LOG_FD are\n# provided, also output the error to LOG_FD, referencing LINENO. Then exit the\n# script with STATUS, using 1 if that was 0.\nas_fn_error ()\n{\n  as_status=$1; test $as_status -eq 0 && as_status=1\n  if test \"$4\"; then\n    as_lineno=${as_lineno-\"$3\"} as_lineno_stack=as_lineno_stack=$as_lineno_stack\n    printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: error: $2\" >&$4\n  fi\n  printf \"%s\\n\" \"$as_me: error: $2\" >&2\n  as_fn_exit $as_status\n} # as_fn_error\n\nif expr a : '\\(a\\)' >/dev/null 2>&1 &&\n   test \"X`expr 00001 : '.*\\(...\\)'`\" = X001; then\n  as_expr=expr\nelse\n  as_expr=false\nfi\n\nif (basename -- /) >/dev/null 2>&1 && test \"X`basename -- / 2>&1`\" = \"X/\"; then\n  as_basename=basename\nelse\n  as_basename=false\nfi\n\nif (as_dir=`dirname -- /` && test \"X$as_dir\" = X/) >/dev/null 2>&1; then\n  as_dirname=dirname\nelse\n  as_dirname=false\nfi\n\nas_me=`$as_basename -- \"$0\" ||\n$as_expr X/\"$0\" : '.*/\\([^/][^/]*\\)/*$' \\| \\\n\t X\"$0\" : 'X\\(//\\)$' \\| \\\n\t X\"$0\" : 'X\\(/\\)' \\| . 2>/dev/null ||\nprintf \"%s\\n\" X/\"$0\" |\n    sed '/^.*\\/\\([^/][^/]*\\)\\/*$/{\n\t    s//\\1/\n\t    q\n\t  }\n\t  /^X\\/\\(\\/\\/\\)$/{\n\t    s//\\1/\n\t    q\n\t  }\n\t  /^X\\/\\(\\/\\).*/{\n\t    s//\\1/\n\t    q\n\t  }\n\t  s/.*/./; q'`\n\n# Avoid depending upon Character Ranges.\nas_cr_letters='abcdefghijklmnopqrstuvwxyz'\nas_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'\nas_cr_Letters=$as_cr_letters$as_cr_LETTERS\nas_cr_digits='0123456789'\nas_cr_alnum=$as_cr_Letters$as_cr_digits\n\n\n  as_lineno_1=$LINENO as_lineno_1a=$LINENO\n  as_lineno_2=$LINENO as_lineno_2a=$LINENO\n  eval 'test \"x$as_lineno_1'$as_run'\" != \"x$as_lineno_2'$as_run'\" &&\n  test \"x`expr $as_lineno_1'$as_run' + 1`\" = \"x$as_lineno_2'$as_run'\"' || {\n  # Blame Lee E. McMahon (1931-1989) for sed's syntax.  :-)\n  sed -n '\n    p\n    /[$]LINENO/=\n  ' <$as_myself |\n    sed '\n      s/[$]LINENO.*/&-/\n      t lineno\n      b\n      :lineno\n      N\n      :loop\n      s/[$]LINENO\\([^'$as_cr_alnum'_].*\\n\\)\\(.*\\)/\\2\\1\\2/\n      t loop\n      s/-\\n.*//\n    ' >$as_me.lineno &&\n  chmod +x \"$as_me.lineno\" ||\n    { printf \"%s\\n\" \"$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell\" >&2; as_fn_exit 1; }\n\n  # If we had to re-execute with $CONFIG_SHELL, we're ensured to have\n  # already done that, so ensure we don't try to do so again and fall\n  # in an infinite loop.  This has already happened in practice.\n  _as_can_reexec=no; export _as_can_reexec\n  # Don't try to exec as it changes $[0], causing all sort of problems\n  # (the dirname of $[0] is not the place where we might find the\n  # original and so on.  Autoconf is especially sensitive to this).\n  . \"./$as_me.lineno\"\n  # Exit status is that of the last command.\n  exit\n}\n\n\n# Determine whether it's possible to make 'echo' print without a newline.\n# These variables are no longer used directly by Autoconf, but are AC_SUBSTed\n# for compatibility with existing Makefiles.\nECHO_C= ECHO_N= ECHO_T=\ncase `echo -n x` in #(((((\n-n*)\n  case `echo 'xy\\c'` in\n  *c*) ECHO_T='\t';;\t# ECHO_T is single tab character.\n  xy)  ECHO_C='\\c';;\n  *)   echo `echo ksh88 bug on AIX 6.1` > /dev/null\n       ECHO_T='\t';;\n  esac;;\n*)\n  ECHO_N='-n';;\nesac\n\n# For backward compatibility with old third-party macros, we provide\n# the shell variables $as_echo and $as_echo_n.  New code should use\n# AS_ECHO([\"message\"]) and AS_ECHO_N([\"message\"]), respectively.\nas_echo='printf %s\\n'\nas_echo_n='printf %s'\n\n\nrm -f conf$$ conf$$.exe conf$$.file\nif test -d conf$$.dir; then\n  rm -f conf$$.dir/conf$$.file\nelse\n  rm -f conf$$.dir\n  mkdir conf$$.dir 2>/dev/null\nfi\nif (echo >conf$$.file) 2>/dev/null; then\n  if ln -s conf$$.file conf$$ 2>/dev/null; then\n    as_ln_s='ln -s'\n    # ... but there are two gotchas:\n    # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.\n    # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.\n    # In both cases, we have to default to `cp -pR'.\n    ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||\n      as_ln_s='cp -pR'\n  elif ln conf$$.file conf$$ 2>/dev/null; then\n    as_ln_s=ln\n  else\n    as_ln_s='cp -pR'\n  fi\nelse\n  as_ln_s='cp -pR'\nfi\nrm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file\nrmdir conf$$.dir 2>/dev/null\n\nif mkdir -p . 2>/dev/null; then\n  as_mkdir_p='mkdir -p \"$as_dir\"'\nelse\n  test -d ./-p && rmdir ./-p\n  as_mkdir_p=false\nfi\n\nas_test_x='test -x'\nas_executable_p=as_fn_executable_p\n\n# Sed expression to map a string onto a valid CPP name.\nas_tr_cpp=\"eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'\"\n\n# Sed expression to map a string onto a valid variable name.\nas_tr_sh=\"eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'\"\n\n\ntest -n \"$DJDIR\" || exec 7<&0 </dev/null\nexec 6>&1\n\n# Name of the host.\n# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status,\n# so uname gets run too.\nac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q`\n\n#\n# Initializations.\n#\nac_default_prefix=/usr/local\nac_clean_files=\nac_config_libobj_dir=.\nLIBOBJS=\ncross_compiling=no\nsubdirs=\nMFLAGS=\nMAKEFLAGS=\n\n# Identity of this package.\nPACKAGE_NAME='xgboost'\nPACKAGE_TARNAME='xgboost'\nPACKAGE_VERSION='3.3.0'\nPACKAGE_STRING='xgboost 3.3.0'\nPACKAGE_BUGREPORT=''\nPACKAGE_URL=''\n\n# Factoring default headers for most tests.\nac_includes_default=\"\\\n#include <stddef.h>\n#ifdef HAVE_STDIO_H\n# include <stdio.h>\n#endif\n#ifdef HAVE_STDLIB_H\n# include <stdlib.h>\n#endif\n#ifdef HAVE_STRING_H\n# include <string.h>\n#endif\n#ifdef HAVE_INTTYPES_H\n# include <inttypes.h>\n#endif\n#ifdef HAVE_STDINT_H\n# include <stdint.h>\n#endif\n#ifdef HAVE_STRINGS_H\n# include <strings.h>\n#endif\n#ifdef HAVE_SYS_TYPES_H\n# include <sys/types.h>\n#endif\n#ifdef HAVE_SYS_STAT_H\n# include <sys/stat.h>\n#endif\n#ifdef HAVE_UNISTD_H\n# include <unistd.h>\n#endif\"\n\nac_header_cxx_list=\nac_subst_vars='LTLIBOBJS\nLIBOBJS\nXGBOOST_MM_PREFETCH_PRESENT\nXGBOOST_BUILTIN_PREFETCH_PRESENT\nBACKTRACE_LIB\nDMLC_DEFS\nENDIAN_FLAG\nOPENMP_LIB\nOPENMP_CXXFLAGS\nUSE_LITTLE_ENDIAN\nOBJEXT\nEXEEXT\nac_ct_CXX\nCPPFLAGS\nLDFLAGS\nCXXFLAGS\nCXX\ntarget_alias\nhost_alias\nbuild_alias\nLIBS\nECHO_T\nECHO_N\nECHO_C\nDEFS\nmandir\nlocaledir\nlibdir\npsdir\npdfdir\ndvidir\nhtmldir\ninfodir\ndocdir\noldincludedir\nincludedir\nrunstatedir\nlocalstatedir\nsharedstatedir\nsysconfdir\ndatadir\ndatarootdir\nlibexecdir\nsbindir\nbindir\nprogram_transform_name\nprefix\nexec_prefix\nPACKAGE_URL\nPACKAGE_BUGREPORT\nPACKAGE_STRING\nPACKAGE_VERSION\nPACKAGE_TARNAME\nPACKAGE_NAME\nPATH_SEPARATOR\nSHELL'\nac_subst_files=''\nac_user_opts='\nenable_option_checking\n'\n      ac_precious_vars='build_alias\nhost_alias\ntarget_alias\nCXX\nCXXFLAGS\nLDFLAGS\nLIBS\nCPPFLAGS\nCCC\nUSE_LITTLE_ENDIAN'\n\n\n# Initialize some variables set by options.\nac_init_help=\nac_init_version=false\nac_unrecognized_opts=\nac_unrecognized_sep=\n# The variables have the same names as the options, with\n# dashes changed to underlines.\ncache_file=/dev/null\nexec_prefix=NONE\nno_create=\nno_recursion=\nprefix=NONE\nprogram_prefix=NONE\nprogram_suffix=NONE\nprogram_transform_name=s,x,x,\nsilent=\nsite=\nsrcdir=\nverbose=\nx_includes=NONE\nx_libraries=NONE\n\n# Installation directory options.\n# These are left unexpanded so users can \"make install exec_prefix=/foo\"\n# and all the variables that are supposed to be based on exec_prefix\n# by default will actually change.\n# Use braces instead of parens because sh, perl, etc. also accept them.\n# (The list follows the same order as the GNU Coding Standards.)\nbindir='${exec_prefix}/bin'\nsbindir='${exec_prefix}/sbin'\nlibexecdir='${exec_prefix}/libexec'\ndatarootdir='${prefix}/share'\ndatadir='${datarootdir}'\nsysconfdir='${prefix}/etc'\nsharedstatedir='${prefix}/com'\nlocalstatedir='${prefix}/var'\nrunstatedir='${localstatedir}/run'\nincludedir='${prefix}/include'\noldincludedir='/usr/include'\ndocdir='${datarootdir}/doc/${PACKAGE_TARNAME}'\ninfodir='${datarootdir}/info'\nhtmldir='${docdir}'\ndvidir='${docdir}'\npdfdir='${docdir}'\npsdir='${docdir}'\nlibdir='${exec_prefix}/lib'\nlocaledir='${datarootdir}/locale'\nmandir='${datarootdir}/man'\n\nac_prev=\nac_dashdash=\nfor ac_option\ndo\n  # If the previous option needs an argument, assign it.\n  if test -n \"$ac_prev\"; then\n    eval $ac_prev=\\$ac_option\n    ac_prev=\n    continue\n  fi\n\n  case $ac_option in\n  *=?*) ac_optarg=`expr \"X$ac_option\" : '[^=]*=\\(.*\\)'` ;;\n  *=)   ac_optarg= ;;\n  *)    ac_optarg=yes ;;\n  esac\n\n  case $ac_dashdash$ac_option in\n  --)\n    ac_dashdash=yes ;;\n\n  -bindir | --bindir | --bindi | --bind | --bin | --bi)\n    ac_prev=bindir ;;\n  -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)\n    bindir=$ac_optarg ;;\n\n  -build | --build | --buil | --bui | --bu)\n    ac_prev=build_alias ;;\n  -build=* | --build=* | --buil=* | --bui=* | --bu=*)\n    build_alias=$ac_optarg ;;\n\n  -cache-file | --cache-file | --cache-fil | --cache-fi \\\n  | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c)\n    ac_prev=cache_file ;;\n  -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \\\n  | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*)\n    cache_file=$ac_optarg ;;\n\n  --config-cache | -C)\n    cache_file=config.cache ;;\n\n  -datadir | --datadir | --datadi | --datad)\n    ac_prev=datadir ;;\n  -datadir=* | --datadir=* | --datadi=* | --datad=*)\n    datadir=$ac_optarg ;;\n\n  -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \\\n  | --dataroo | --dataro | --datar)\n    ac_prev=datarootdir ;;\n  -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \\\n  | --dataroot=* | --dataroo=* | --dataro=* | --datar=*)\n    datarootdir=$ac_optarg ;;\n\n  -disable-* | --disable-*)\n    ac_useropt=`expr \"x$ac_option\" : 'x-*disable-\\(.*\\)'`\n    # Reject names that are not valid shell variable names.\n    expr \"x$ac_useropt\" : \".*[^-+._$as_cr_alnum]\" >/dev/null &&\n      as_fn_error $? \"invalid feature name: \\`$ac_useropt'\"\n    ac_useropt_orig=$ac_useropt\n    ac_useropt=`printf \"%s\\n\" \"$ac_useropt\" | sed 's/[-+.]/_/g'`\n    case $ac_user_opts in\n      *\"\n\"enable_$ac_useropt\"\n\"*) ;;\n      *) ac_unrecognized_opts=\"$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig\"\n\t ac_unrecognized_sep=', ';;\n    esac\n    eval enable_$ac_useropt=no ;;\n\n  -docdir | --docdir | --docdi | --doc | --do)\n    ac_prev=docdir ;;\n  -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*)\n    docdir=$ac_optarg ;;\n\n  -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv)\n    ac_prev=dvidir ;;\n  -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*)\n    dvidir=$ac_optarg ;;\n\n  -enable-* | --enable-*)\n    ac_useropt=`expr \"x$ac_option\" : 'x-*enable-\\([^=]*\\)'`\n    # Reject names that are not valid shell variable names.\n    expr \"x$ac_useropt\" : \".*[^-+._$as_cr_alnum]\" >/dev/null &&\n      as_fn_error $? \"invalid feature name: \\`$ac_useropt'\"\n    ac_useropt_orig=$ac_useropt\n    ac_useropt=`printf \"%s\\n\" \"$ac_useropt\" | sed 's/[-+.]/_/g'`\n    case $ac_user_opts in\n      *\"\n\"enable_$ac_useropt\"\n\"*) ;;\n      *) ac_unrecognized_opts=\"$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig\"\n\t ac_unrecognized_sep=', ';;\n    esac\n    eval enable_$ac_useropt=\\$ac_optarg ;;\n\n  -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \\\n  | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \\\n  | --exec | --exe | --ex)\n    ac_prev=exec_prefix ;;\n  -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \\\n  | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \\\n  | --exec=* | --exe=* | --ex=*)\n    exec_prefix=$ac_optarg ;;\n\n  -gas | --gas | --ga | --g)\n    # Obsolete; use --with-gas.\n    with_gas=yes ;;\n\n  -help | --help | --hel | --he | -h)\n    ac_init_help=long ;;\n  -help=r* | --help=r* | --hel=r* | --he=r* | -hr*)\n    ac_init_help=recursive ;;\n  -help=s* | --help=s* | --hel=s* | --he=s* | -hs*)\n    ac_init_help=short ;;\n\n  -host | --host | --hos | --ho)\n    ac_prev=host_alias ;;\n  -host=* | --host=* | --hos=* | --ho=*)\n    host_alias=$ac_optarg ;;\n\n  -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht)\n    ac_prev=htmldir ;;\n  -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \\\n  | --ht=*)\n    htmldir=$ac_optarg ;;\n\n  -includedir | --includedir | --includedi | --included | --include \\\n  | --includ | --inclu | --incl | --inc)\n    ac_prev=includedir ;;\n  -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \\\n  | --includ=* | --inclu=* | --incl=* | --inc=*)\n    includedir=$ac_optarg ;;\n\n  -infodir | --infodir | --infodi | --infod | --info | --inf)\n    ac_prev=infodir ;;\n  -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*)\n    infodir=$ac_optarg ;;\n\n  -libdir | --libdir | --libdi | --libd)\n    ac_prev=libdir ;;\n  -libdir=* | --libdir=* | --libdi=* | --libd=*)\n    libdir=$ac_optarg ;;\n\n  -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \\\n  | --libexe | --libex | --libe)\n    ac_prev=libexecdir ;;\n  -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \\\n  | --libexe=* | --libex=* | --libe=*)\n    libexecdir=$ac_optarg ;;\n\n  -localedir | --localedir | --localedi | --localed | --locale)\n    ac_prev=localedir ;;\n  -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*)\n    localedir=$ac_optarg ;;\n\n  -localstatedir | --localstatedir | --localstatedi | --localstated \\\n  | --localstate | --localstat | --localsta | --localst | --locals)\n    ac_prev=localstatedir ;;\n  -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \\\n  | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*)\n    localstatedir=$ac_optarg ;;\n\n  -mandir | --mandir | --mandi | --mand | --man | --ma | --m)\n    ac_prev=mandir ;;\n  -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*)\n    mandir=$ac_optarg ;;\n\n  -nfp | --nfp | --nf)\n    # Obsolete; use --without-fp.\n    with_fp=no ;;\n\n  -no-create | --no-create | --no-creat | --no-crea | --no-cre \\\n  | --no-cr | --no-c | -n)\n    no_create=yes ;;\n\n  -no-recursion | --no-recursion | --no-recursio | --no-recursi \\\n  | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r)\n    no_recursion=yes ;;\n\n  -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \\\n  | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \\\n  | --oldin | --oldi | --old | --ol | --o)\n    ac_prev=oldincludedir ;;\n  -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \\\n  | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \\\n  | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*)\n    oldincludedir=$ac_optarg ;;\n\n  -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)\n    ac_prev=prefix ;;\n  -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)\n    prefix=$ac_optarg ;;\n\n  -program-prefix | --program-prefix | --program-prefi | --program-pref \\\n  | --program-pre | --program-pr | --program-p)\n    ac_prev=program_prefix ;;\n  -program-prefix=* | --program-prefix=* | --program-prefi=* \\\n  | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*)\n    program_prefix=$ac_optarg ;;\n\n  -program-suffix | --program-suffix | --program-suffi | --program-suff \\\n  | --program-suf | --program-su | --program-s)\n    ac_prev=program_suffix ;;\n  -program-suffix=* | --program-suffix=* | --program-suffi=* \\\n  | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*)\n    program_suffix=$ac_optarg ;;\n\n  -program-transform-name | --program-transform-name \\\n  | --program-transform-nam | --program-transform-na \\\n  | --program-transform-n | --program-transform- \\\n  | --program-transform | --program-transfor \\\n  | --program-transfo | --program-transf \\\n  | --program-trans | --program-tran \\\n  | --progr-tra | --program-tr | --program-t)\n    ac_prev=program_transform_name ;;\n  -program-transform-name=* | --program-transform-name=* \\\n  | --program-transform-nam=* | --program-transform-na=* \\\n  | --program-transform-n=* | --program-transform-=* \\\n  | --program-transform=* | --program-transfor=* \\\n  | --program-transfo=* | --program-transf=* \\\n  | --program-trans=* | --program-tran=* \\\n  | --progr-tra=* | --program-tr=* | --program-t=*)\n    program_transform_name=$ac_optarg ;;\n\n  -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd)\n    ac_prev=pdfdir ;;\n  -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*)\n    pdfdir=$ac_optarg ;;\n\n  -psdir | --psdir | --psdi | --psd | --ps)\n    ac_prev=psdir ;;\n  -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*)\n    psdir=$ac_optarg ;;\n\n  -q | -quiet | --quiet | --quie | --qui | --qu | --q \\\n  | -silent | --silent | --silen | --sile | --sil)\n    silent=yes ;;\n\n  -runstatedir | --runstatedir | --runstatedi | --runstated \\\n  | --runstate | --runstat | --runsta | --runst | --runs \\\n  | --run | --ru | --r)\n    ac_prev=runstatedir ;;\n  -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \\\n  | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \\\n  | --run=* | --ru=* | --r=*)\n    runstatedir=$ac_optarg ;;\n\n  -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)\n    ac_prev=sbindir ;;\n  -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \\\n  | --sbi=* | --sb=*)\n    sbindir=$ac_optarg ;;\n\n  -sharedstatedir | --sharedstatedir | --sharedstatedi \\\n  | --sharedstated | --sharedstate | --sharedstat | --sharedsta \\\n  | --sharedst | --shareds | --shared | --share | --shar \\\n  | --sha | --sh)\n    ac_prev=sharedstatedir ;;\n  -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \\\n  | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \\\n  | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \\\n  | --sha=* | --sh=*)\n    sharedstatedir=$ac_optarg ;;\n\n  -site | --site | --sit)\n    ac_prev=site ;;\n  -site=* | --site=* | --sit=*)\n    site=$ac_optarg ;;\n\n  -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)\n    ac_prev=srcdir ;;\n  -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)\n    srcdir=$ac_optarg ;;\n\n  -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \\\n  | --syscon | --sysco | --sysc | --sys | --sy)\n    ac_prev=sysconfdir ;;\n  -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \\\n  | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*)\n    sysconfdir=$ac_optarg ;;\n\n  -target | --target | --targe | --targ | --tar | --ta | --t)\n    ac_prev=target_alias ;;\n  -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*)\n    target_alias=$ac_optarg ;;\n\n  -v | -verbose | --verbose | --verbos | --verbo | --verb)\n    verbose=yes ;;\n\n  -version | --version | --versio | --versi | --vers | -V)\n    ac_init_version=: ;;\n\n  -with-* | --with-*)\n    ac_useropt=`expr \"x$ac_option\" : 'x-*with-\\([^=]*\\)'`\n    # Reject names that are not valid shell variable names.\n    expr \"x$ac_useropt\" : \".*[^-+._$as_cr_alnum]\" >/dev/null &&\n      as_fn_error $? \"invalid package name: \\`$ac_useropt'\"\n    ac_useropt_orig=$ac_useropt\n    ac_useropt=`printf \"%s\\n\" \"$ac_useropt\" | sed 's/[-+.]/_/g'`\n    case $ac_user_opts in\n      *\"\n\"with_$ac_useropt\"\n\"*) ;;\n      *) ac_unrecognized_opts=\"$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig\"\n\t ac_unrecognized_sep=', ';;\n    esac\n    eval with_$ac_useropt=\\$ac_optarg ;;\n\n  -without-* | --without-*)\n    ac_useropt=`expr \"x$ac_option\" : 'x-*without-\\(.*\\)'`\n    # Reject names that are not valid shell variable names.\n    expr \"x$ac_useropt\" : \".*[^-+._$as_cr_alnum]\" >/dev/null &&\n      as_fn_error $? \"invalid package name: \\`$ac_useropt'\"\n    ac_useropt_orig=$ac_useropt\n    ac_useropt=`printf \"%s\\n\" \"$ac_useropt\" | sed 's/[-+.]/_/g'`\n    case $ac_user_opts in\n      *\"\n\"with_$ac_useropt\"\n\"*) ;;\n      *) ac_unrecognized_opts=\"$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig\"\n\t ac_unrecognized_sep=', ';;\n    esac\n    eval with_$ac_useropt=no ;;\n\n  --x)\n    # Obsolete; use --with-x.\n    with_x=yes ;;\n\n  -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \\\n  | --x-incl | --x-inc | --x-in | --x-i)\n    ac_prev=x_includes ;;\n  -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \\\n  | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*)\n    x_includes=$ac_optarg ;;\n\n  -x-libraries | --x-libraries | --x-librarie | --x-librari \\\n  | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l)\n    ac_prev=x_libraries ;;\n  -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \\\n  | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)\n    x_libraries=$ac_optarg ;;\n\n  -*) as_fn_error $? \"unrecognized option: \\`$ac_option'\nTry \\`$0 --help' for more information\"\n    ;;\n\n  *=*)\n    ac_envvar=`expr \"x$ac_option\" : 'x\\([^=]*\\)='`\n    # Reject names that are not valid shell variable names.\n    case $ac_envvar in #(\n      '' | [0-9]* | *[!_$as_cr_alnum]* )\n      as_fn_error $? \"invalid variable name: \\`$ac_envvar'\" ;;\n    esac\n    eval $ac_envvar=\\$ac_optarg\n    export $ac_envvar ;;\n\n  *)\n    # FIXME: should be removed in autoconf 3.0.\n    printf \"%s\\n\" \"$as_me: WARNING: you should use --build, --host, --target\" >&2\n    expr \"x$ac_option\" : \".*[^-._$as_cr_alnum]\" >/dev/null &&\n      printf \"%s\\n\" \"$as_me: WARNING: invalid host type: $ac_option\" >&2\n    : \"${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}\"\n    ;;\n\n  esac\ndone\n\nif test -n \"$ac_prev\"; then\n  ac_option=--`echo $ac_prev | sed 's/_/-/g'`\n  as_fn_error $? \"missing argument to $ac_option\"\nfi\n\nif test -n \"$ac_unrecognized_opts\"; then\n  case $enable_option_checking in\n    no) ;;\n    fatal) as_fn_error $? \"unrecognized options: $ac_unrecognized_opts\" ;;\n    *)     printf \"%s\\n\" \"$as_me: WARNING: unrecognized options: $ac_unrecognized_opts\" >&2 ;;\n  esac\nfi\n\n# Check all directory arguments for consistency.\nfor ac_var in\texec_prefix prefix bindir sbindir libexecdir datarootdir \\\n\t\tdatadir sysconfdir sharedstatedir localstatedir includedir \\\n\t\toldincludedir docdir infodir htmldir dvidir pdfdir psdir \\\n\t\tlibdir localedir mandir runstatedir\ndo\n  eval ac_val=\\$$ac_var\n  # Remove trailing slashes.\n  case $ac_val in\n    */ )\n      ac_val=`expr \"X$ac_val\" : 'X\\(.*[^/]\\)' \\| \"X$ac_val\" : 'X\\(.*\\)'`\n      eval $ac_var=\\$ac_val;;\n  esac\n  # Be sure to have absolute directory names.\n  case $ac_val in\n    [\\\\/$]* | ?:[\\\\/]* )  continue;;\n    NONE | '' ) case $ac_var in *prefix ) continue;; esac;;\n  esac\n  as_fn_error $? \"expected an absolute directory name for --$ac_var: $ac_val\"\ndone\n\n# There might be people who depend on the old broken behavior: `$host'\n# used to hold the argument of --host etc.\n# FIXME: To remove some day.\nbuild=$build_alias\nhost=$host_alias\ntarget=$target_alias\n\n# FIXME: To remove some day.\nif test \"x$host_alias\" != x; then\n  if test \"x$build_alias\" = x; then\n    cross_compiling=maybe\n  elif test \"x$build_alias\" != \"x$host_alias\"; then\n    cross_compiling=yes\n  fi\nfi\n\nac_tool_prefix=\ntest -n \"$host_alias\" && ac_tool_prefix=$host_alias-\n\ntest \"$silent\" = yes && exec 6>/dev/null\n\n\nac_pwd=`pwd` && test -n \"$ac_pwd\" &&\nac_ls_di=`ls -di .` &&\nac_pwd_ls_di=`cd \"$ac_pwd\" && ls -di .` ||\n  as_fn_error $? \"working directory cannot be determined\"\ntest \"X$ac_ls_di\" = \"X$ac_pwd_ls_di\" ||\n  as_fn_error $? \"pwd does not report name of working directory\"\n\n\n# Find the source files, if location was not specified.\nif test -z \"$srcdir\"; then\n  ac_srcdir_defaulted=yes\n  # Try the directory containing this script, then the parent directory.\n  ac_confdir=`$as_dirname -- \"$as_myself\" ||\n$as_expr X\"$as_myself\" : 'X\\(.*[^/]\\)//*[^/][^/]*/*$' \\| \\\n\t X\"$as_myself\" : 'X\\(//\\)[^/]' \\| \\\n\t X\"$as_myself\" : 'X\\(//\\)$' \\| \\\n\t X\"$as_myself\" : 'X\\(/\\)' \\| . 2>/dev/null ||\nprintf \"%s\\n\" X\"$as_myself\" |\n    sed '/^X\\(.*[^/]\\)\\/\\/*[^/][^/]*\\/*$/{\n\t    s//\\1/\n\t    q\n\t  }\n\t  /^X\\(\\/\\/\\)[^/].*/{\n\t    s//\\1/\n\t    q\n\t  }\n\t  /^X\\(\\/\\/\\)$/{\n\t    s//\\1/\n\t    q\n\t  }\n\t  /^X\\(\\/\\).*/{\n\t    s//\\1/\n\t    q\n\t  }\n\t  s/.*/./; q'`\n  srcdir=$ac_confdir\n  if test ! -r \"$srcdir/$ac_unique_file\"; then\n    srcdir=..\n  fi\nelse\n  ac_srcdir_defaulted=no\nfi\nif test ! -r \"$srcdir/$ac_unique_file\"; then\n  test \"$ac_srcdir_defaulted\" = yes && srcdir=\"$ac_confdir or ..\"\n  as_fn_error $? \"cannot find sources ($ac_unique_file) in $srcdir\"\nfi\nac_msg=\"sources are in $srcdir, but \\`cd $srcdir' does not work\"\nac_abs_confdir=`(\n\tcd \"$srcdir\" && test -r \"./$ac_unique_file\" || as_fn_error $? \"$ac_msg\"\n\tpwd)`\n# When building in place, set srcdir=.\nif test \"$ac_abs_confdir\" = \"$ac_pwd\"; then\n  srcdir=.\nfi\n# Remove unnecessary trailing slashes from srcdir.\n# Double slashes in file names in object file debugging info\n# mess up M-x gdb in Emacs.\ncase $srcdir in\n*/) srcdir=`expr \"X$srcdir\" : 'X\\(.*[^/]\\)' \\| \"X$srcdir\" : 'X\\(.*\\)'`;;\nesac\nfor ac_var in $ac_precious_vars; do\n  eval ac_env_${ac_var}_set=\\${${ac_var}+set}\n  eval ac_env_${ac_var}_value=\\$${ac_var}\n  eval ac_cv_env_${ac_var}_set=\\${${ac_var}+set}\n  eval ac_cv_env_${ac_var}_value=\\$${ac_var}\ndone\n\n#\n# Report the --help message.\n#\nif test \"$ac_init_help\" = \"long\"; then\n  # Omit some internal or obsolete options to make the list less imposing.\n  # This message is too long to be a string in the A/UX 3.1 sh.\n  cat <<_ACEOF\n\\`configure' configures xgboost 3.3.0 to adapt to many kinds of systems.\n\nUsage: $0 [OPTION]... [VAR=VALUE]...\n\nTo assign environment variables (e.g., CC, CFLAGS...), specify them as\nVAR=VALUE.  See below for descriptions of some of the useful variables.\n\nDefaults for the options are specified in brackets.\n\nConfiguration:\n  -h, --help              display this help and exit\n      --help=short        display options specific to this package\n      --help=recursive    display the short help of all the included packages\n  -V, --version           display version information and exit\n  -q, --quiet, --silent   do not print \\`checking ...' messages\n      --cache-file=FILE   cache test results in FILE [disabled]\n  -C, --config-cache      alias for \\`--cache-file=config.cache'\n  -n, --no-create         do not create output files\n      --srcdir=DIR        find the sources in DIR [configure dir or \\`..']\n\nInstallation directories:\n  --prefix=PREFIX         install architecture-independent files in PREFIX\n                          [$ac_default_prefix]\n  --exec-prefix=EPREFIX   install architecture-dependent files in EPREFIX\n                          [PREFIX]\n\nBy default, \\`make install' will install all the files in\n\\`$ac_default_prefix/bin', \\`$ac_default_prefix/lib' etc.  You can specify\nan installation prefix other than \\`$ac_default_prefix' using \\`--prefix',\nfor instance \\`--prefix=\\$HOME'.\n\nFor better control, use the options below.\n\nFine tuning of the installation directories:\n  --bindir=DIR            user executables [EPREFIX/bin]\n  --sbindir=DIR           system admin executables [EPREFIX/sbin]\n  --libexecdir=DIR        program executables [EPREFIX/libexec]\n  --sysconfdir=DIR        read-only single-machine data [PREFIX/etc]\n  --sharedstatedir=DIR    modifiable architecture-independent data [PREFIX/com]\n  --localstatedir=DIR     modifiable single-machine data [PREFIX/var]\n  --runstatedir=DIR       modifiable per-process data [LOCALSTATEDIR/run]\n  --libdir=DIR            object code libraries [EPREFIX/lib]\n  --includedir=DIR        C header files [PREFIX/include]\n  --oldincludedir=DIR     C header files for non-gcc [/usr/include]\n  --datarootdir=DIR       read-only arch.-independent data root [PREFIX/share]\n  --datadir=DIR           read-only architecture-independent data [DATAROOTDIR]\n  --infodir=DIR           info documentation [DATAROOTDIR/info]\n  --localedir=DIR         locale-dependent data [DATAROOTDIR/locale]\n  --mandir=DIR            man documentation [DATAROOTDIR/man]\n  --docdir=DIR            documentation root [DATAROOTDIR/doc/xgboost]\n  --htmldir=DIR           html documentation [DOCDIR]\n  --dvidir=DIR            dvi documentation [DOCDIR]\n  --pdfdir=DIR            pdf documentation [DOCDIR]\n  --psdir=DIR             ps documentation [DOCDIR]\n_ACEOF\n\n  cat <<\\_ACEOF\n_ACEOF\nfi\n\nif test -n \"$ac_init_help\"; then\n  case $ac_init_help in\n     short | recursive ) echo \"Configuration of xgboost 3.3.0:\";;\n   esac\n  cat <<\\_ACEOF\n\nSome influential environment variables:\n  CXX         C++ compiler command\n  CXXFLAGS    C++ compiler flags\n  LDFLAGS     linker flags, e.g. -L<lib dir> if you have libraries in a\n              nonstandard directory <lib dir>\n  LIBS        libraries to pass to the linker, e.g. -l<library>\n  CPPFLAGS    (Objective) C/C++ preprocessor flags, e.g. -I<include dir> if\n              you have headers in a nonstandard directory <include dir>\n  USE_LITTLE_ENDIAN\n              \"Whether to build with little endian (checks at compile time if\n              unset)\"\n\nUse these variables to override the choices made by `configure' or to help\nit to find libraries and programs with nonstandard names/locations.\n\nReport bugs to the package provider.\n_ACEOF\nac_status=$?\nfi\n\nif test \"$ac_init_help\" = \"recursive\"; then\n  # If there are subdirs, report their specific --help.\n  for ac_dir in : $ac_subdirs_all; do test \"x$ac_dir\" = x: && continue\n    test -d \"$ac_dir\" ||\n      { cd \"$srcdir\" && ac_pwd=`pwd` && srcdir=. && test -d \"$ac_dir\"; } ||\n      continue\n    ac_builddir=.\n\ncase \"$ac_dir\" in\n.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;\n*)\n  ac_dir_suffix=/`printf \"%s\\n\" \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`\n  # A \"..\" for each directory in $ac_dir_suffix.\n  ac_top_builddir_sub=`printf \"%s\\n\" \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`\n  case $ac_top_builddir_sub in\n  \"\") ac_top_builddir_sub=. ac_top_build_prefix= ;;\n  *)  ac_top_build_prefix=$ac_top_builddir_sub/ ;;\n  esac ;;\nesac\nac_abs_top_builddir=$ac_pwd\nac_abs_builddir=$ac_pwd$ac_dir_suffix\n# for backward compatibility:\nac_top_builddir=$ac_top_build_prefix\n\ncase $srcdir in\n  .)  # We are building in place.\n    ac_srcdir=.\n    ac_top_srcdir=$ac_top_builddir_sub\n    ac_abs_top_srcdir=$ac_pwd ;;\n  [\\\\/]* | ?:[\\\\/]* )  # Absolute name.\n    ac_srcdir=$srcdir$ac_dir_suffix;\n    ac_top_srcdir=$srcdir\n    ac_abs_top_srcdir=$srcdir ;;\n  *) # Relative name.\n    ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix\n    ac_top_srcdir=$ac_top_build_prefix$srcdir\n    ac_abs_top_srcdir=$ac_pwd/$srcdir ;;\nesac\nac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix\n\n    cd \"$ac_dir\" || { ac_status=$?; continue; }\n    # Check for configure.gnu first; this name is used for a wrapper for\n    # Metaconfig's \"Configure\" on case-insensitive file systems.\n    if test -f \"$ac_srcdir/configure.gnu\"; then\n      echo &&\n      $SHELL \"$ac_srcdir/configure.gnu\" --help=recursive\n    elif test -f \"$ac_srcdir/configure\"; then\n      echo &&\n      $SHELL \"$ac_srcdir/configure\" --help=recursive\n    else\n      printf \"%s\\n\" \"$as_me: WARNING: no configuration information is in $ac_dir\" >&2\n    fi || ac_status=$?\n    cd \"$ac_pwd\" || { ac_status=$?; break; }\n  done\nfi\n\ntest -n \"$ac_init_help\" && exit $ac_status\nif $ac_init_version; then\n  cat <<\\_ACEOF\nxgboost configure 3.3.0\ngenerated by GNU Autoconf 2.71\n\nCopyright (C) 2021 Free Software Foundation, Inc.\nThis configure script is free software; the Free Software Foundation\ngives unlimited permission to copy, distribute and modify it.\n_ACEOF\n  exit\nfi\n\n## ------------------------ ##\n## Autoconf initialization. ##\n## ------------------------ ##\n\n# ac_fn_cxx_try_compile LINENO\n# ----------------------------\n# Try to compile conftest.$ac_ext, and return whether this succeeded.\nac_fn_cxx_try_compile ()\n{\n  as_lineno=${as_lineno-\"$1\"} as_lineno_stack=as_lineno_stack=$as_lineno_stack\n  rm -f conftest.$ac_objext conftest.beam\n  if { { ac_try=\"$ac_compile\"\ncase \"(($ac_try\" in\n  *\\\"* | *\\`* | *\\\\*) ac_try_echo=\\$ac_try;;\n  *) ac_try_echo=$ac_try;;\nesac\neval ac_try_echo=\"\\\"\\$as_me:${as_lineno-$LINENO}: $ac_try_echo\\\"\"\nprintf \"%s\\n\" \"$ac_try_echo\"; } >&5\n  (eval \"$ac_compile\") 2>conftest.err\n  ac_status=$?\n  if test -s conftest.err; then\n    grep -v '^ *+' conftest.err >conftest.er1\n    cat conftest.er1 >&5\n    mv -f conftest.er1 conftest.err\n  fi\n  printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: \\$? = $ac_status\" >&5\n  test $ac_status = 0; } && {\n\t test -z \"$ac_cxx_werror_flag\" ||\n\t test ! -s conftest.err\n       } && test -s conftest.$ac_objext\nthen :\n  ac_retval=0\nelse $as_nop\n  printf \"%s\\n\" \"$as_me: failed program was:\" >&5\nsed 's/^/| /' conftest.$ac_ext >&5\n\n\tac_retval=1\nfi\n  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno\n  as_fn_set_status $ac_retval\n\n} # ac_fn_cxx_try_compile\n\n# ac_fn_cxx_try_link LINENO\n# -------------------------\n# Try to link conftest.$ac_ext, and return whether this succeeded.\nac_fn_cxx_try_link ()\n{\n  as_lineno=${as_lineno-\"$1\"} as_lineno_stack=as_lineno_stack=$as_lineno_stack\n  rm -f conftest.$ac_objext conftest.beam conftest$ac_exeext\n  if { { ac_try=\"$ac_link\"\ncase \"(($ac_try\" in\n  *\\\"* | *\\`* | *\\\\*) ac_try_echo=\\$ac_try;;\n  *) ac_try_echo=$ac_try;;\nesac\neval ac_try_echo=\"\\\"\\$as_me:${as_lineno-$LINENO}: $ac_try_echo\\\"\"\nprintf \"%s\\n\" \"$ac_try_echo\"; } >&5\n  (eval \"$ac_link\") 2>conftest.err\n  ac_status=$?\n  if test -s conftest.err; then\n    grep -v '^ *+' conftest.err >conftest.er1\n    cat conftest.er1 >&5\n    mv -f conftest.er1 conftest.err\n  fi\n  printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: \\$? = $ac_status\" >&5\n  test $ac_status = 0; } && {\n\t test -z \"$ac_cxx_werror_flag\" ||\n\t test ! -s conftest.err\n       } && test -s conftest$ac_exeext && {\n\t test \"$cross_compiling\" = yes ||\n\t test -x conftest$ac_exeext\n       }\nthen :\n  ac_retval=0\nelse $as_nop\n  printf \"%s\\n\" \"$as_me: failed program was:\" >&5\nsed 's/^/| /' conftest.$ac_ext >&5\n\n\tac_retval=1\nfi\n  # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information\n  # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would\n  # interfere with the next link command; also delete a directory that is\n  # left behind by Apple's compiler.  We do this before executing the actions.\n  rm -rf conftest.dSYM conftest_ipa8_conftest.oo\n  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno\n  as_fn_set_status $ac_retval\n\n} # ac_fn_cxx_try_link\n\n# ac_fn_cxx_check_func LINENO FUNC VAR\n# ------------------------------------\n# Tests whether FUNC exists, setting the cache variable VAR accordingly\nac_fn_cxx_check_func ()\n{\n  as_lineno=${as_lineno-\"$1\"} as_lineno_stack=as_lineno_stack=$as_lineno_stack\n  { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: checking for $2\" >&5\nprintf %s \"checking for $2... \" >&6; }\nif eval test \\${$3+y}\nthen :\n  printf %s \"(cached) \" >&6\nelse $as_nop\n  cat confdefs.h - <<_ACEOF >conftest.$ac_ext\n/* end confdefs.h.  */\n/* Define $2 to an innocuous variant, in case <limits.h> declares $2.\n   For example, HP-UX 11i <limits.h> declares gettimeofday.  */\n#define $2 innocuous_$2\n\n/* System header to define __stub macros and hopefully few prototypes,\n   which can conflict with char $2 (); below.  */\n\n#include <limits.h>\n#undef $2\n\n/* Override any GCC internal prototype to avoid an error.\n   Use char because int might match the return type of a GCC\n   builtin and then its argument prototype would still apply.  */\n#ifdef __cplusplus\nextern \"C\"\n#endif\nchar $2 ();\n/* The GNU C library defines this for functions which it implements\n    to always fail with ENOSYS.  Some functions are actually named\n    something starting with __ and the normal name is an alias.  */\n#if defined __stub_$2 || defined __stub___$2\nchoke me\n#endif\n\nint\nmain (void)\n{\nreturn $2 ();\n  ;\n  return 0;\n}\n_ACEOF\nif ac_fn_cxx_try_link \"$LINENO\"\nthen :\n  eval \"$3=yes\"\nelse $as_nop\n  eval \"$3=no\"\nfi\nrm -f core conftest.err conftest.$ac_objext conftest.beam \\\n    conftest$ac_exeext conftest.$ac_ext\nfi\neval ac_res=\\$$3\n\t       { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: result: $ac_res\" >&5\nprintf \"%s\\n\" \"$ac_res\" >&6; }\n  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno\n\n} # ac_fn_cxx_check_func\n\n# ac_fn_cxx_try_run LINENO\n# ------------------------\n# Try to run conftest.$ac_ext, and return whether this succeeded. Assumes that\n# executables *can* be run.\nac_fn_cxx_try_run ()\n{\n  as_lineno=${as_lineno-\"$1\"} as_lineno_stack=as_lineno_stack=$as_lineno_stack\n  if { { ac_try=\"$ac_link\"\ncase \"(($ac_try\" in\n  *\\\"* | *\\`* | *\\\\*) ac_try_echo=\\$ac_try;;\n  *) ac_try_echo=$ac_try;;\nesac\neval ac_try_echo=\"\\\"\\$as_me:${as_lineno-$LINENO}: $ac_try_echo\\\"\"\nprintf \"%s\\n\" \"$ac_try_echo\"; } >&5\n  (eval \"$ac_link\") 2>&5\n  ac_status=$?\n  printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: \\$? = $ac_status\" >&5\n  test $ac_status = 0; } && { ac_try='./conftest$ac_exeext'\n  { { case \"(($ac_try\" in\n  *\\\"* | *\\`* | *\\\\*) ac_try_echo=\\$ac_try;;\n  *) ac_try_echo=$ac_try;;\nesac\neval ac_try_echo=\"\\\"\\$as_me:${as_lineno-$LINENO}: $ac_try_echo\\\"\"\nprintf \"%s\\n\" \"$ac_try_echo\"; } >&5\n  (eval \"$ac_try\") 2>&5\n  ac_status=$?\n  printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: \\$? = $ac_status\" >&5\n  test $ac_status = 0; }; }\nthen :\n  ac_retval=0\nelse $as_nop\n  printf \"%s\\n\" \"$as_me: program exited with status $ac_status\" >&5\n       printf \"%s\\n\" \"$as_me: failed program was:\" >&5\nsed 's/^/| /' conftest.$ac_ext >&5\n\n       ac_retval=$ac_status\nfi\n  rm -rf conftest.dSYM conftest_ipa8_conftest.oo\n  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno\n  as_fn_set_status $ac_retval\n\n} # ac_fn_cxx_try_run\n\n# ac_fn_cxx_check_header_compile LINENO HEADER VAR INCLUDES\n# ---------------------------------------------------------\n# Tests whether HEADER exists and can be compiled using the include files in\n# INCLUDES, setting the cache variable VAR accordingly.\nac_fn_cxx_check_header_compile ()\n{\n  as_lineno=${as_lineno-\"$1\"} as_lineno_stack=as_lineno_stack=$as_lineno_stack\n  { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: checking for $2\" >&5\nprintf %s \"checking for $2... \" >&6; }\nif eval test \\${$3+y}\nthen :\n  printf %s \"(cached) \" >&6\nelse $as_nop\n  cat confdefs.h - <<_ACEOF >conftest.$ac_ext\n/* end confdefs.h.  */\n$4\n#include <$2>\n_ACEOF\nif ac_fn_cxx_try_compile \"$LINENO\"\nthen :\n  eval \"$3=yes\"\nelse $as_nop\n  eval \"$3=no\"\nfi\nrm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext\nfi\neval ac_res=\\$$3\n\t       { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: result: $ac_res\" >&5\nprintf \"%s\\n\" \"$ac_res\" >&6; }\n  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno\n\n} # ac_fn_cxx_check_header_compile\nac_configure_args_raw=\nfor ac_arg\ndo\n  case $ac_arg in\n  *\\'*)\n    ac_arg=`printf \"%s\\n\" \"$ac_arg\" | sed \"s/'/'\\\\\\\\\\\\\\\\''/g\"` ;;\n  esac\n  as_fn_append ac_configure_args_raw \" '$ac_arg'\"\ndone\n\ncase $ac_configure_args_raw in\n  *$as_nl*)\n    ac_safe_unquote= ;;\n  *)\n    ac_unsafe_z='|&;<>()$`\\\\\"*?[ ''\t' # This string ends in space, tab.\n    ac_unsafe_a=\"$ac_unsafe_z#~\"\n    ac_safe_unquote=\"s/ '\\\\([^$ac_unsafe_a][^$ac_unsafe_z]*\\\\)'/ \\\\1/g\"\n    ac_configure_args_raw=`      printf \"%s\\n\" \"$ac_configure_args_raw\" | sed \"$ac_safe_unquote\"`;;\nesac\n\ncat >config.log <<_ACEOF\nThis file contains any messages produced by compilers while\nrunning configure, to aid debugging if configure makes a mistake.\n\nIt was created by xgboost $as_me 3.3.0, which was\ngenerated by GNU Autoconf 2.71.  Invocation command line was\n\n  $ $0$ac_configure_args_raw\n\n_ACEOF\nexec 5>>config.log\n{\ncat <<_ASUNAME\n## --------- ##\n## Platform. ##\n## --------- ##\n\nhostname = `(hostname || uname -n) 2>/dev/null | sed 1q`\nuname -m = `(uname -m) 2>/dev/null || echo unknown`\nuname -r = `(uname -r) 2>/dev/null || echo unknown`\nuname -s = `(uname -s) 2>/dev/null || echo unknown`\nuname -v = `(uname -v) 2>/dev/null || echo unknown`\n\n/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown`\n/bin/uname -X     = `(/bin/uname -X) 2>/dev/null     || echo unknown`\n\n/bin/arch              = `(/bin/arch) 2>/dev/null              || echo unknown`\n/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null       || echo unknown`\n/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown`\n/usr/bin/hostinfo      = `(/usr/bin/hostinfo) 2>/dev/null      || echo unknown`\n/bin/machine           = `(/bin/machine) 2>/dev/null           || echo unknown`\n/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null       || echo unknown`\n/bin/universe          = `(/bin/universe) 2>/dev/null          || echo unknown`\n\n_ASUNAME\n\nas_save_IFS=$IFS; IFS=$PATH_SEPARATOR\nfor as_dir in $PATH\ndo\n  IFS=$as_save_IFS\n  case $as_dir in #(((\n    '') as_dir=./ ;;\n    */) ;;\n    *) as_dir=$as_dir/ ;;\n  esac\n    printf \"%s\\n\" \"PATH: $as_dir\"\n  done\nIFS=$as_save_IFS\n\n} >&5\n\ncat >&5 <<_ACEOF\n\n\n## ----------- ##\n## Core tests. ##\n## ----------- ##\n\n_ACEOF\n\n\n# Keep a trace of the command line.\n# Strip out --no-create and --no-recursion so they do not pile up.\n# Strip out --silent because we don't want to record it for future runs.\n# Also quote any args containing shell meta-characters.\n# Make two passes to allow for proper duplicate-argument suppression.\nac_configure_args=\nac_configure_args0=\nac_configure_args1=\nac_must_keep_next=false\nfor ac_pass in 1 2\ndo\n  for ac_arg\n  do\n    case $ac_arg in\n    -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;;\n    -q | -quiet | --quiet | --quie | --qui | --qu | --q \\\n    | -silent | --silent | --silen | --sile | --sil)\n      continue ;;\n    *\\'*)\n      ac_arg=`printf \"%s\\n\" \"$ac_arg\" | sed \"s/'/'\\\\\\\\\\\\\\\\''/g\"` ;;\n    esac\n    case $ac_pass in\n    1) as_fn_append ac_configure_args0 \" '$ac_arg'\" ;;\n    2)\n      as_fn_append ac_configure_args1 \" '$ac_arg'\"\n      if test $ac_must_keep_next = true; then\n\tac_must_keep_next=false # Got value, back to normal.\n      else\n\tcase $ac_arg in\n\t  *=* | --config-cache | -C | -disable-* | --disable-* \\\n\t  | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \\\n\t  | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \\\n\t  | -with-* | --with-* | -without-* | --without-* | --x)\n\t    case \"$ac_configure_args0 \" in\n\t      \"$ac_configure_args1\"*\" '$ac_arg' \"* ) continue ;;\n\t    esac\n\t    ;;\n\t  -* ) ac_must_keep_next=true ;;\n\tesac\n      fi\n      as_fn_append ac_configure_args \" '$ac_arg'\"\n      ;;\n    esac\n  done\ndone\n{ ac_configure_args0=; unset ac_configure_args0;}\n{ ac_configure_args1=; unset ac_configure_args1;}\n\n# When interrupted or exit'd, cleanup temporary files, and complete\n# config.log.  We remove comments because anyway the quotes in there\n# would cause problems or look ugly.\n# WARNING: Use '\\'' to represent an apostrophe within the trap.\n# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug.\ntrap 'exit_status=$?\n  # Sanitize IFS.\n  IFS=\" \"\"\t$as_nl\"\n  # Save into config.log some information that might help in debugging.\n  {\n    echo\n\n    printf \"%s\\n\" \"## ---------------- ##\n## Cache variables. ##\n## ---------------- ##\"\n    echo\n    # The following way of writing the cache mishandles newlines in values,\n(\n  for ac_var in `(set) 2>&1 | sed -n '\\''s/^\\([a-zA-Z_][a-zA-Z0-9_]*\\)=.*/\\1/p'\\''`; do\n    eval ac_val=\\$$ac_var\n    case $ac_val in #(\n    *${as_nl}*)\n      case $ac_var in #(\n      *_cv_*) { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline\" >&5\nprintf \"%s\\n\" \"$as_me: WARNING: cache variable $ac_var contains a newline\" >&2;} ;;\n      esac\n      case $ac_var in #(\n      _ | IFS | as_nl) ;; #(\n      BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #(\n      *) { eval $ac_var=; unset $ac_var;} ;;\n      esac ;;\n    esac\n  done\n  (set) 2>&1 |\n    case $as_nl`(ac_space='\\'' '\\''; set) 2>&1` in #(\n    *${as_nl}ac_space=\\ *)\n      sed -n \\\n\t\"s/'\\''/'\\''\\\\\\\\'\\'''\\''/g;\n\t  s/^\\\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\\\)=\\\\(.*\\\\)/\\\\1='\\''\\\\2'\\''/p\"\n      ;; #(\n    *)\n      sed -n \"/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p\"\n      ;;\n    esac |\n    sort\n)\n    echo\n\n    printf \"%s\\n\" \"## ----------------- ##\n## Output variables. ##\n## ----------------- ##\"\n    echo\n    for ac_var in $ac_subst_vars\n    do\n      eval ac_val=\\$$ac_var\n      case $ac_val in\n      *\\'\\''*) ac_val=`printf \"%s\\n\" \"$ac_val\" | sed \"s/'\\''/'\\''\\\\\\\\\\\\\\\\'\\'''\\''/g\"`;;\n      esac\n      printf \"%s\\n\" \"$ac_var='\\''$ac_val'\\''\"\n    done | sort\n    echo\n\n    if test -n \"$ac_subst_files\"; then\n      printf \"%s\\n\" \"## ------------------- ##\n## File substitutions. ##\n## ------------------- ##\"\n      echo\n      for ac_var in $ac_subst_files\n      do\n\teval ac_val=\\$$ac_var\n\tcase $ac_val in\n\t*\\'\\''*) ac_val=`printf \"%s\\n\" \"$ac_val\" | sed \"s/'\\''/'\\''\\\\\\\\\\\\\\\\'\\'''\\''/g\"`;;\n\tesac\n\tprintf \"%s\\n\" \"$ac_var='\\''$ac_val'\\''\"\n      done | sort\n      echo\n    fi\n\n    if test -s confdefs.h; then\n      printf \"%s\\n\" \"## ----------- ##\n## confdefs.h. ##\n## ----------- ##\"\n      echo\n      cat confdefs.h\n      echo\n    fi\n    test \"$ac_signal\" != 0 &&\n      printf \"%s\\n\" \"$as_me: caught signal $ac_signal\"\n    printf \"%s\\n\" \"$as_me: exit $exit_status\"\n  } >&5\n  rm -f core *.core core.conftest.* &&\n    rm -f -r conftest* confdefs* conf$$* $ac_clean_files &&\n    exit $exit_status\n' 0\nfor ac_signal in 1 2 13 15; do\n  trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal\ndone\nac_signal=0\n\n# confdefs.h avoids OS command line length limits that DEFS can exceed.\nrm -f -r conftest* confdefs.h\n\nprintf \"%s\\n\" \"/* confdefs.h */\" > confdefs.h\n\n# Predefined preprocessor variables.\n\nprintf \"%s\\n\" \"#define PACKAGE_NAME \\\"$PACKAGE_NAME\\\"\" >>confdefs.h\n\nprintf \"%s\\n\" \"#define PACKAGE_TARNAME \\\"$PACKAGE_TARNAME\\\"\" >>confdefs.h\n\nprintf \"%s\\n\" \"#define PACKAGE_VERSION \\\"$PACKAGE_VERSION\\\"\" >>confdefs.h\n\nprintf \"%s\\n\" \"#define PACKAGE_STRING \\\"$PACKAGE_STRING\\\"\" >>confdefs.h\n\nprintf \"%s\\n\" \"#define PACKAGE_BUGREPORT \\\"$PACKAGE_BUGREPORT\\\"\" >>confdefs.h\n\nprintf \"%s\\n\" \"#define PACKAGE_URL \\\"$PACKAGE_URL\\\"\" >>confdefs.h\n\n\n# Let the site file select an alternate cache file if it wants to.\n# Prefer an explicitly selected file to automatically selected ones.\nif test -n \"$CONFIG_SITE\"; then\n  ac_site_files=\"$CONFIG_SITE\"\nelif test \"x$prefix\" != xNONE; then\n  ac_site_files=\"$prefix/share/config.site $prefix/etc/config.site\"\nelse\n  ac_site_files=\"$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site\"\nfi\n\nfor ac_site_file in $ac_site_files\ndo\n  case $ac_site_file in #(\n  */*) :\n     ;; #(\n  *) :\n    ac_site_file=./$ac_site_file ;;\nesac\n  if test -f \"$ac_site_file\" && test -r \"$ac_site_file\"; then\n    { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file\" >&5\nprintf \"%s\\n\" \"$as_me: loading site script $ac_site_file\" >&6;}\n    sed 's/^/| /' \"$ac_site_file\" >&5\n    . \"$ac_site_file\" \\\n      || { { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: error: in \\`$ac_pwd':\" >&5\nprintf \"%s\\n\" \"$as_me: error: in \\`$ac_pwd':\" >&2;}\nas_fn_error $? \"failed to load site script $ac_site_file\nSee \\`config.log' for more details\" \"$LINENO\" 5; }\n  fi\ndone\n\nif test -r \"$cache_file\"; then\n  # Some versions of bash will fail to source /dev/null (special files\n  # actually), so we avoid doing that.  DJGPP emulates it as a regular file.\n  if test /dev/null != \"$cache_file\" && test -f \"$cache_file\"; then\n    { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: loading cache $cache_file\" >&5\nprintf \"%s\\n\" \"$as_me: loading cache $cache_file\" >&6;}\n    case $cache_file in\n      [\\\\/]* | ?:[\\\\/]* ) . \"$cache_file\";;\n      *)                      . \"./$cache_file\";;\n    esac\n  fi\nelse\n  { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: creating cache $cache_file\" >&5\nprintf \"%s\\n\" \"$as_me: creating cache $cache_file\" >&6;}\n  >$cache_file\nfi\n\n# Test code for whether the C++ compiler supports C++98 (global declarations)\nac_cxx_conftest_cxx98_globals='\n// Does the compiler advertise C++98 conformance?\n#if !defined __cplusplus || __cplusplus < 199711L\n# error \"Compiler does not advertise C++98 conformance\"\n#endif\n\n// These inclusions are to reject old compilers that\n// lack the unsuffixed header files.\n#include <cstdlib>\n#include <exception>\n\n// <cassert> and <cstring> are *not* freestanding headers in C++98.\nextern void assert (int);\nnamespace std {\n  extern int strcmp (const char *, const char *);\n}\n\n// Namespaces, exceptions, and templates were all added after \"C++ 2.0\".\nusing std::exception;\nusing std::strcmp;\n\nnamespace {\n\nvoid test_exception_syntax()\n{\n  try {\n    throw \"test\";\n  } catch (const char *s) {\n    // Extra parentheses suppress a warning when building autoconf itself,\n    // due to lint rules shared with more typical C programs.\n    assert (!(strcmp) (s, \"test\"));\n  }\n}\n\ntemplate <typename T> struct test_template\n{\n  T const val;\n  explicit test_template(T t) : val(t) {}\n  template <typename U> T add(U u) { return static_cast<T>(u) + val; }\n};\n\n} // anonymous namespace\n'\n\n# Test code for whether the C++ compiler supports C++98 (body of main)\nac_cxx_conftest_cxx98_main='\n  assert (argc);\n  assert (! argv[0]);\n{\n  test_exception_syntax ();\n  test_template<double> tt (2.0);\n  assert (tt.add (4) == 6.0);\n  assert (true && !false);\n}\n'\n\n# Test code for whether the C++ compiler supports C++11 (global declarations)\nac_cxx_conftest_cxx11_globals='\n// Does the compiler advertise C++ 2011 conformance?\n#if !defined __cplusplus || __cplusplus < 201103L\n# error \"Compiler does not advertise C++11 conformance\"\n#endif\n\nnamespace cxx11test\n{\n  constexpr int get_val() { return 20; }\n\n  struct testinit\n  {\n    int i;\n    double d;\n  };\n\n  class delegate\n  {\n  public:\n    delegate(int n) : n(n) {}\n    delegate(): delegate(2354) {}\n\n    virtual int getval() { return this->n; };\n  protected:\n    int n;\n  };\n\n  class overridden : public delegate\n  {\n  public:\n    overridden(int n): delegate(n) {}\n    virtual int getval() override final { return this->n * 2; }\n  };\n\n  class nocopy\n  {\n  public:\n    nocopy(int i): i(i) {}\n    nocopy() = default;\n    nocopy(const nocopy&) = delete;\n    nocopy & operator=(const nocopy&) = delete;\n  private:\n    int i;\n  };\n\n  // for testing lambda expressions\n  template <typename Ret, typename Fn> Ret eval(Fn f, Ret v)\n  {\n    return f(v);\n  }\n\n  // for testing variadic templates and trailing return types\n  template <typename V> auto sum(V first) -> V\n  {\n    return first;\n  }\n  template <typename V, typename... Args> auto sum(V first, Args... rest) -> V\n  {\n    return first + sum(rest...);\n  }\n}\n'\n\n# Test code for whether the C++ compiler supports C++11 (body of main)\nac_cxx_conftest_cxx11_main='\n{\n  // Test auto and decltype\n  auto a1 = 6538;\n  auto a2 = 48573953.4;\n  auto a3 = \"String literal\";\n\n  int total = 0;\n  for (auto i = a3; *i; ++i) { total += *i; }\n\n  decltype(a2) a4 = 34895.034;\n}\n{\n  // Test constexpr\n  short sa[cxx11test::get_val()] = { 0 };\n}\n{\n  // Test initializer lists\n  cxx11test::testinit il = { 4323, 435234.23544 };\n}\n{\n  // Test range-based for\n  int array[] = {9, 7, 13, 15, 4, 18, 12, 10, 5, 3,\n                 14, 19, 17, 8, 6, 20, 16, 2, 11, 1};\n  for (auto &x : array) { x += 23; }\n}\n{\n  // Test lambda expressions\n  using cxx11test::eval;\n  assert (eval ([](int x) { return x*2; }, 21) == 42);\n  double d = 2.0;\n  assert (eval ([&](double x) { return d += x; }, 3.0) == 5.0);\n  assert (d == 5.0);\n  assert (eval ([=](double x) mutable { return d += x; }, 4.0) == 9.0);\n  assert (d == 5.0);\n}\n{\n  // Test use of variadic templates\n  using cxx11test::sum;\n  auto a = sum(1);\n  auto b = sum(1, 2);\n  auto c = sum(1.0, 2.0, 3.0);\n}\n{\n  // Test constructor delegation\n  cxx11test::delegate d1;\n  cxx11test::delegate d2();\n  cxx11test::delegate d3(45);\n}\n{\n  // Test override and final\n  cxx11test::overridden o1(55464);\n}\n{\n  // Test nullptr\n  char *c = nullptr;\n}\n{\n  // Test template brackets\n  test_template<::test_template<int>> v(test_template<int>(12));\n}\n{\n  // Unicode literals\n  char const *utf8 = u8\"UTF-8 string \\u2500\";\n  char16_t const *utf16 = u\"UTF-8 string \\u2500\";\n  char32_t const *utf32 = U\"UTF-32 string \\u2500\";\n}\n'\n\n# Test code for whether the C compiler supports C++11 (complete).\nac_cxx_conftest_cxx11_program=\"${ac_cxx_conftest_cxx98_globals}\n${ac_cxx_conftest_cxx11_globals}\n\nint\nmain (int argc, char **argv)\n{\n  int ok = 0;\n  ${ac_cxx_conftest_cxx98_main}\n  ${ac_cxx_conftest_cxx11_main}\n  return ok;\n}\n\"\n\n# Test code for whether the C compiler supports C++98 (complete).\nac_cxx_conftest_cxx98_program=\"${ac_cxx_conftest_cxx98_globals}\nint\nmain (int argc, char **argv)\n{\n  int ok = 0;\n  ${ac_cxx_conftest_cxx98_main}\n  return ok;\n}\n\"\n\nas_fn_append ac_header_cxx_list \" stdio.h stdio_h HAVE_STDIO_H\"\nas_fn_append ac_header_cxx_list \" stdlib.h stdlib_h HAVE_STDLIB_H\"\nas_fn_append ac_header_cxx_list \" string.h string_h HAVE_STRING_H\"\nas_fn_append ac_header_cxx_list \" inttypes.h inttypes_h HAVE_INTTYPES_H\"\nas_fn_append ac_header_cxx_list \" stdint.h stdint_h HAVE_STDINT_H\"\nas_fn_append ac_header_cxx_list \" strings.h strings_h HAVE_STRINGS_H\"\nas_fn_append ac_header_cxx_list \" sys/stat.h sys_stat_h HAVE_SYS_STAT_H\"\nas_fn_append ac_header_cxx_list \" sys/types.h sys_types_h HAVE_SYS_TYPES_H\"\nas_fn_append ac_header_cxx_list \" unistd.h unistd_h HAVE_UNISTD_H\"\n# Check that the precious variables saved in the cache have kept the same\n# value.\nac_cache_corrupted=false\nfor ac_var in $ac_precious_vars; do\n  eval ac_old_set=\\$ac_cv_env_${ac_var}_set\n  eval ac_new_set=\\$ac_env_${ac_var}_set\n  eval ac_old_val=\\$ac_cv_env_${ac_var}_value\n  eval ac_new_val=\\$ac_env_${ac_var}_value\n  case $ac_old_set,$ac_new_set in\n    set,)\n      { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: error: \\`$ac_var' was set to \\`$ac_old_val' in the previous run\" >&5\nprintf \"%s\\n\" \"$as_me: error: \\`$ac_var' was set to \\`$ac_old_val' in the previous run\" >&2;}\n      ac_cache_corrupted=: ;;\n    ,set)\n      { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: error: \\`$ac_var' was not set in the previous run\" >&5\nprintf \"%s\\n\" \"$as_me: error: \\`$ac_var' was not set in the previous run\" >&2;}\n      ac_cache_corrupted=: ;;\n    ,);;\n    *)\n      if test \"x$ac_old_val\" != \"x$ac_new_val\"; then\n\t# differences in whitespace do not lead to failure.\n\tac_old_val_w=`echo x $ac_old_val`\n\tac_new_val_w=`echo x $ac_new_val`\n\tif test \"$ac_old_val_w\" != \"$ac_new_val_w\"; then\n\t  { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: error: \\`$ac_var' has changed since the previous run:\" >&5\nprintf \"%s\\n\" \"$as_me: error: \\`$ac_var' has changed since the previous run:\" >&2;}\n\t  ac_cache_corrupted=:\n\telse\n\t  { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \\`$ac_var' since the previous run:\" >&5\nprintf \"%s\\n\" \"$as_me: warning: ignoring whitespace changes in \\`$ac_var' since the previous run:\" >&2;}\n\t  eval $ac_var=\\$ac_old_val\n\tfi\n\t{ printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}:   former value:  \\`$ac_old_val'\" >&5\nprintf \"%s\\n\" \"$as_me:   former value:  \\`$ac_old_val'\" >&2;}\n\t{ printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}:   current value: \\`$ac_new_val'\" >&5\nprintf \"%s\\n\" \"$as_me:   current value: \\`$ac_new_val'\" >&2;}\n      fi;;\n  esac\n  # Pass precious variables to config.status.\n  if test \"$ac_new_set\" = set; then\n    case $ac_new_val in\n    *\\'*) ac_arg=$ac_var=`printf \"%s\\n\" \"$ac_new_val\" | sed \"s/'/'\\\\\\\\\\\\\\\\''/g\"` ;;\n    *) ac_arg=$ac_var=$ac_new_val ;;\n    esac\n    case \" $ac_configure_args \" in\n      *\" '$ac_arg' \"*) ;; # Avoid dups.  Use of quotes ensures accuracy.\n      *) as_fn_append ac_configure_args \" '$ac_arg'\" ;;\n    esac\n  fi\ndone\nif $ac_cache_corrupted; then\n  { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: error: in \\`$ac_pwd':\" >&5\nprintf \"%s\\n\" \"$as_me: error: in \\`$ac_pwd':\" >&2;}\n  { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build\" >&5\nprintf \"%s\\n\" \"$as_me: error: changes in the environment can compromise the build\" >&2;}\n  as_fn_error $? \"run \\`${MAKE-make} distclean' and/or \\`rm $cache_file'\n\t    and start over\" \"$LINENO\" 5\nfi\n## -------------------- ##\n## Main body of script. ##\n## -------------------- ##\n\nac_ext=c\nac_cpp='$CPP $CPPFLAGS'\nac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'\nac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'\nac_compiler_gnu=$ac_cv_c_compiler_gnu\n\n\n\n: ${R_HOME=`R RHOME`}\nif test -z \"${R_HOME}\"; then\n  echo \"could not determine R_HOME\"\n  exit 1\nfi\n\nCXX17=`\"${R_HOME}/bin/R\" CMD config CXX17`\nCXX17STD=`\"${R_HOME}/bin/R\" CMD config CXX17STD`\nCXX=\"${CXX17} ${CXX17STD}\"\nCXXFLAGS=`\"${R_HOME}/bin/R\" CMD config CXXFLAGS`\n\nCC=`\"${R_HOME}/bin/R\" CMD config CC`\nCFLAGS=`\"${R_HOME}/bin/R\" CMD config CFLAGS`\nCPPFLAGS=`\"${R_HOME}/bin/R\" CMD config CPPFLAGS`\n\nLDFLAGS=`\"${R_HOME}/bin/R\" CMD config LDFLAGS`\nac_ext=cpp\nac_cpp='$CXXCPP $CPPFLAGS'\nac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'\nac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'\nac_compiler_gnu=$ac_cv_cxx_compiler_gnu\n\n\nDMLC_DEFS=\"\"\n\n{ printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: Checking if/where backtrace is available\" >&5\nprintf \"%s\\n\" \"$as_me: Checking if/where backtrace is available\" >&6;}\n\n\n\n\n\n\nac_ext=cpp\nac_cpp='$CXXCPP $CPPFLAGS'\nac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'\nac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'\nac_compiler_gnu=$ac_cv_cxx_compiler_gnu\nif test -z \"$CXX\"; then\n  if test -n \"$CCC\"; then\n    CXX=$CCC\n  else\n    if test -n \"$ac_tool_prefix\"; then\n  for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC clang++\n  do\n    # Extract the first word of \"$ac_tool_prefix$ac_prog\", so it can be a program name with args.\nset dummy $ac_tool_prefix$ac_prog; ac_word=$2\n{ printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: checking for $ac_word\" >&5\nprintf %s \"checking for $ac_word... \" >&6; }\nif test ${ac_cv_prog_CXX+y}\nthen :\n  printf %s \"(cached) \" >&6\nelse $as_nop\n  if test -n \"$CXX\"; then\n  ac_cv_prog_CXX=\"$CXX\" # Let the user override the test.\nelse\nas_save_IFS=$IFS; IFS=$PATH_SEPARATOR\nfor as_dir in $PATH\ndo\n  IFS=$as_save_IFS\n  case $as_dir in #(((\n    '') as_dir=./ ;;\n    */) ;;\n    *) as_dir=$as_dir/ ;;\n  esac\n    for ac_exec_ext in '' $ac_executable_extensions; do\n  if as_fn_executable_p \"$as_dir$ac_word$ac_exec_ext\"; then\n    ac_cv_prog_CXX=\"$ac_tool_prefix$ac_prog\"\n    printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext\" >&5\n    break 2\n  fi\ndone\n  done\nIFS=$as_save_IFS\n\nfi\nfi\nCXX=$ac_cv_prog_CXX\nif test -n \"$CXX\"; then\n  { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: result: $CXX\" >&5\nprintf \"%s\\n\" \"$CXX\" >&6; }\nelse\n  { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: result: no\" >&5\nprintf \"%s\\n\" \"no\" >&6; }\nfi\n\n\n    test -n \"$CXX\" && break\n  done\nfi\nif test -z \"$CXX\"; then\n  ac_ct_CXX=$CXX\n  for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC clang++\ndo\n  # Extract the first word of \"$ac_prog\", so it can be a program name with args.\nset dummy $ac_prog; ac_word=$2\n{ printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: checking for $ac_word\" >&5\nprintf %s \"checking for $ac_word... \" >&6; }\nif test ${ac_cv_prog_ac_ct_CXX+y}\nthen :\n  printf %s \"(cached) \" >&6\nelse $as_nop\n  if test -n \"$ac_ct_CXX\"; then\n  ac_cv_prog_ac_ct_CXX=\"$ac_ct_CXX\" # Let the user override the test.\nelse\nas_save_IFS=$IFS; IFS=$PATH_SEPARATOR\nfor as_dir in $PATH\ndo\n  IFS=$as_save_IFS\n  case $as_dir in #(((\n    '') as_dir=./ ;;\n    */) ;;\n    *) as_dir=$as_dir/ ;;\n  esac\n    for ac_exec_ext in '' $ac_executable_extensions; do\n  if as_fn_executable_p \"$as_dir$ac_word$ac_exec_ext\"; then\n    ac_cv_prog_ac_ct_CXX=\"$ac_prog\"\n    printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext\" >&5\n    break 2\n  fi\ndone\n  done\nIFS=$as_save_IFS\n\nfi\nfi\nac_ct_CXX=$ac_cv_prog_ac_ct_CXX\nif test -n \"$ac_ct_CXX\"; then\n  { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: result: $ac_ct_CXX\" >&5\nprintf \"%s\\n\" \"$ac_ct_CXX\" >&6; }\nelse\n  { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: result: no\" >&5\nprintf \"%s\\n\" \"no\" >&6; }\nfi\n\n\n  test -n \"$ac_ct_CXX\" && break\ndone\n\n  if test \"x$ac_ct_CXX\" = x; then\n    CXX=\"g++\"\n  else\n    case $cross_compiling:$ac_tool_warned in\nyes:)\n{ printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet\" >&5\nprintf \"%s\\n\" \"$as_me: WARNING: using cross tools not prefixed with host triplet\" >&2;}\nac_tool_warned=yes ;;\nesac\n    CXX=$ac_ct_CXX\n  fi\nfi\n\n  fi\nfi\n# Provide some information about the compiler.\nprintf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: checking for C++ compiler version\" >&5\nset X $ac_compile\nac_compiler=$2\nfor ac_option in --version -v -V -qversion; do\n  { { ac_try=\"$ac_compiler $ac_option >&5\"\ncase \"(($ac_try\" in\n  *\\\"* | *\\`* | *\\\\*) ac_try_echo=\\$ac_try;;\n  *) ac_try_echo=$ac_try;;\nesac\neval ac_try_echo=\"\\\"\\$as_me:${as_lineno-$LINENO}: $ac_try_echo\\\"\"\nprintf \"%s\\n\" \"$ac_try_echo\"; } >&5\n  (eval \"$ac_compiler $ac_option >&5\") 2>conftest.err\n  ac_status=$?\n  if test -s conftest.err; then\n    sed '10a\\\n... rest of stderr output deleted ...\n         10q' conftest.err >conftest.er1\n    cat conftest.er1 >&5\n  fi\n  rm -f conftest.er1 conftest.err\n  printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: \\$? = $ac_status\" >&5\n  test $ac_status = 0; }\ndone\n\ncat confdefs.h - <<_ACEOF >conftest.$ac_ext\n/* end confdefs.h.  */\n\nint\nmain (void)\n{\n\n  ;\n  return 0;\n}\n_ACEOF\nac_clean_files_save=$ac_clean_files\nac_clean_files=\"$ac_clean_files a.out a.out.dSYM a.exe b.out\"\n# Try to create an executable without -o first, disregard a.out.\n# It will help us diagnose broken compilers, and finding out an intuition\n# of exeext.\n{ printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: checking whether the C++ compiler works\" >&5\nprintf %s \"checking whether the C++ compiler works... \" >&6; }\nac_link_default=`printf \"%s\\n\" \"$ac_link\" | sed 's/ -o *conftest[^ ]*//'`\n\n# The possible output files:\nac_files=\"a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*\"\n\nac_rmfiles=\nfor ac_file in $ac_files\ndo\n  case $ac_file in\n    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;;\n    * ) ac_rmfiles=\"$ac_rmfiles $ac_file\";;\n  esac\ndone\nrm -f $ac_rmfiles\n\nif { { ac_try=\"$ac_link_default\"\ncase \"(($ac_try\" in\n  *\\\"* | *\\`* | *\\\\*) ac_try_echo=\\$ac_try;;\n  *) ac_try_echo=$ac_try;;\nesac\neval ac_try_echo=\"\\\"\\$as_me:${as_lineno-$LINENO}: $ac_try_echo\\\"\"\nprintf \"%s\\n\" \"$ac_try_echo\"; } >&5\n  (eval \"$ac_link_default\") 2>&5\n  ac_status=$?\n  printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: \\$? = $ac_status\" >&5\n  test $ac_status = 0; }\nthen :\n  # Autoconf-2.13 could set the ac_cv_exeext variable to `no'.\n# So ignore a value of `no', otherwise this would lead to `EXEEXT = no'\n# in a Makefile.  We should not override ac_cv_exeext if it was cached,\n# so that the user can short-circuit this test for compilers unknown to\n# Autoconf.\nfor ac_file in $ac_files ''\ndo\n  test -f \"$ac_file\" || continue\n  case $ac_file in\n    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj )\n\t;;\n    [ab].out )\n\t# We found the default executable, but exeext='' is most\n\t# certainly right.\n\tbreak;;\n    *.* )\n\tif test ${ac_cv_exeext+y} && test \"$ac_cv_exeext\" != no;\n\tthen :; else\n\t   ac_cv_exeext=`expr \"$ac_file\" : '[^.]*\\(\\..*\\)'`\n\tfi\n\t# We set ac_cv_exeext here because the later test for it is not\n\t# safe: cross compilers may not add the suffix if given an `-o'\n\t# argument, so we may need to know it at that point already.\n\t# Even if this section looks crufty: it has the advantage of\n\t# actually working.\n\tbreak;;\n    * )\n\tbreak;;\n  esac\ndone\ntest \"$ac_cv_exeext\" = no && ac_cv_exeext=\n\nelse $as_nop\n  ac_file=''\nfi\nif test -z \"$ac_file\"\nthen :\n  { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: result: no\" >&5\nprintf \"%s\\n\" \"no\" >&6; }\nprintf \"%s\\n\" \"$as_me: failed program was:\" >&5\nsed 's/^/| /' conftest.$ac_ext >&5\n\n{ { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: error: in \\`$ac_pwd':\" >&5\nprintf \"%s\\n\" \"$as_me: error: in \\`$ac_pwd':\" >&2;}\nas_fn_error 77 \"C++ compiler cannot create executables\nSee \\`config.log' for more details\" \"$LINENO\" 5; }\nelse $as_nop\n  { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: result: yes\" >&5\nprintf \"%s\\n\" \"yes\" >&6; }\nfi\n{ printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: checking for C++ compiler default output file name\" >&5\nprintf %s \"checking for C++ compiler default output file name... \" >&6; }\n{ printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: result: $ac_file\" >&5\nprintf \"%s\\n\" \"$ac_file\" >&6; }\nac_exeext=$ac_cv_exeext\n\nrm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out\nac_clean_files=$ac_clean_files_save\n{ printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: checking for suffix of executables\" >&5\nprintf %s \"checking for suffix of executables... \" >&6; }\nif { { ac_try=\"$ac_link\"\ncase \"(($ac_try\" in\n  *\\\"* | *\\`* | *\\\\*) ac_try_echo=\\$ac_try;;\n  *) ac_try_echo=$ac_try;;\nesac\neval ac_try_echo=\"\\\"\\$as_me:${as_lineno-$LINENO}: $ac_try_echo\\\"\"\nprintf \"%s\\n\" \"$ac_try_echo\"; } >&5\n  (eval \"$ac_link\") 2>&5\n  ac_status=$?\n  printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: \\$? = $ac_status\" >&5\n  test $ac_status = 0; }\nthen :\n  # If both `conftest.exe' and `conftest' are `present' (well, observable)\n# catch `conftest.exe'.  For instance with Cygwin, `ls conftest' will\n# work properly (i.e., refer to `conftest.exe'), while it won't with\n# `rm'.\nfor ac_file in conftest.exe conftest conftest.*; do\n  test -f \"$ac_file\" || continue\n  case $ac_file in\n    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;;\n    *.* ) ac_cv_exeext=`expr \"$ac_file\" : '[^.]*\\(\\..*\\)'`\n\t  break;;\n    * ) break;;\n  esac\ndone\nelse $as_nop\n  { { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: error: in \\`$ac_pwd':\" >&5\nprintf \"%s\\n\" \"$as_me: error: in \\`$ac_pwd':\" >&2;}\nas_fn_error $? \"cannot compute suffix of executables: cannot compile and link\nSee \\`config.log' for more details\" \"$LINENO\" 5; }\nfi\nrm -f conftest conftest$ac_cv_exeext\n{ printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext\" >&5\nprintf \"%s\\n\" \"$ac_cv_exeext\" >&6; }\n\nrm -f conftest.$ac_ext\nEXEEXT=$ac_cv_exeext\nac_exeext=$EXEEXT\ncat confdefs.h - <<_ACEOF >conftest.$ac_ext\n/* end confdefs.h.  */\n#include <stdio.h>\nint\nmain (void)\n{\nFILE *f = fopen (\"conftest.out\", \"w\");\n return ferror (f) || fclose (f) != 0;\n\n  ;\n  return 0;\n}\n_ACEOF\nac_clean_files=\"$ac_clean_files conftest.out\"\n# Check that the compiler produces executables we can run.  If not, either\n# the compiler is broken, or we cross compile.\n{ printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling\" >&5\nprintf %s \"checking whether we are cross compiling... \" >&6; }\nif test \"$cross_compiling\" != yes; then\n  { { ac_try=\"$ac_link\"\ncase \"(($ac_try\" in\n  *\\\"* | *\\`* | *\\\\*) ac_try_echo=\\$ac_try;;\n  *) ac_try_echo=$ac_try;;\nesac\neval ac_try_echo=\"\\\"\\$as_me:${as_lineno-$LINENO}: $ac_try_echo\\\"\"\nprintf \"%s\\n\" \"$ac_try_echo\"; } >&5\n  (eval \"$ac_link\") 2>&5\n  ac_status=$?\n  printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: \\$? = $ac_status\" >&5\n  test $ac_status = 0; }\n  if { ac_try='./conftest$ac_cv_exeext'\n  { { case \"(($ac_try\" in\n  *\\\"* | *\\`* | *\\\\*) ac_try_echo=\\$ac_try;;\n  *) ac_try_echo=$ac_try;;\nesac\neval ac_try_echo=\"\\\"\\$as_me:${as_lineno-$LINENO}: $ac_try_echo\\\"\"\nprintf \"%s\\n\" \"$ac_try_echo\"; } >&5\n  (eval \"$ac_try\") 2>&5\n  ac_status=$?\n  printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: \\$? = $ac_status\" >&5\n  test $ac_status = 0; }; }; then\n    cross_compiling=no\n  else\n    if test \"$cross_compiling\" = maybe; then\n\tcross_compiling=yes\n    else\n\t{ { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: error: in \\`$ac_pwd':\" >&5\nprintf \"%s\\n\" \"$as_me: error: in \\`$ac_pwd':\" >&2;}\nas_fn_error 77 \"cannot run C++ compiled programs.\nIf you meant to cross compile, use \\`--host'.\nSee \\`config.log' for more details\" \"$LINENO\" 5; }\n    fi\n  fi\nfi\n{ printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: result: $cross_compiling\" >&5\nprintf \"%s\\n\" \"$cross_compiling\" >&6; }\n\nrm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out\nac_clean_files=$ac_clean_files_save\n{ printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: checking for suffix of object files\" >&5\nprintf %s \"checking for suffix of object files... \" >&6; }\nif test ${ac_cv_objext+y}\nthen :\n  printf %s \"(cached) \" >&6\nelse $as_nop\n  cat confdefs.h - <<_ACEOF >conftest.$ac_ext\n/* end confdefs.h.  */\n\nint\nmain (void)\n{\n\n  ;\n  return 0;\n}\n_ACEOF\nrm -f conftest.o conftest.obj\nif { { ac_try=\"$ac_compile\"\ncase \"(($ac_try\" in\n  *\\\"* | *\\`* | *\\\\*) ac_try_echo=\\$ac_try;;\n  *) ac_try_echo=$ac_try;;\nesac\neval ac_try_echo=\"\\\"\\$as_me:${as_lineno-$LINENO}: $ac_try_echo\\\"\"\nprintf \"%s\\n\" \"$ac_try_echo\"; } >&5\n  (eval \"$ac_compile\") 2>&5\n  ac_status=$?\n  printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: \\$? = $ac_status\" >&5\n  test $ac_status = 0; }\nthen :\n  for ac_file in conftest.o conftest.obj conftest.*; do\n  test -f \"$ac_file\" || continue;\n  case $ac_file in\n    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;;\n    *) ac_cv_objext=`expr \"$ac_file\" : '.*\\.\\(.*\\)'`\n       break;;\n  esac\ndone\nelse $as_nop\n  printf \"%s\\n\" \"$as_me: failed program was:\" >&5\nsed 's/^/| /' conftest.$ac_ext >&5\n\n{ { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: error: in \\`$ac_pwd':\" >&5\nprintf \"%s\\n\" \"$as_me: error: in \\`$ac_pwd':\" >&2;}\nas_fn_error $? \"cannot compute suffix of object files: cannot compile\nSee \\`config.log' for more details\" \"$LINENO\" 5; }\nfi\nrm -f conftest.$ac_cv_objext conftest.$ac_ext\nfi\n{ printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext\" >&5\nprintf \"%s\\n\" \"$ac_cv_objext\" >&6; }\nOBJEXT=$ac_cv_objext\nac_objext=$OBJEXT\n{ printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: checking whether the compiler supports GNU C++\" >&5\nprintf %s \"checking whether the compiler supports GNU C++... \" >&6; }\nif test ${ac_cv_cxx_compiler_gnu+y}\nthen :\n  printf %s \"(cached) \" >&6\nelse $as_nop\n  cat confdefs.h - <<_ACEOF >conftest.$ac_ext\n/* end confdefs.h.  */\n\nint\nmain (void)\n{\n#ifndef __GNUC__\n       choke me\n#endif\n\n  ;\n  return 0;\n}\n_ACEOF\nif ac_fn_cxx_try_compile \"$LINENO\"\nthen :\n  ac_compiler_gnu=yes\nelse $as_nop\n  ac_compiler_gnu=no\nfi\nrm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext\nac_cv_cxx_compiler_gnu=$ac_compiler_gnu\n\nfi\n{ printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: result: $ac_cv_cxx_compiler_gnu\" >&5\nprintf \"%s\\n\" \"$ac_cv_cxx_compiler_gnu\" >&6; }\nac_compiler_gnu=$ac_cv_cxx_compiler_gnu\n\nif test $ac_compiler_gnu = yes; then\n  GXX=yes\nelse\n  GXX=\nfi\nac_test_CXXFLAGS=${CXXFLAGS+y}\nac_save_CXXFLAGS=$CXXFLAGS\n{ printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: checking whether $CXX accepts -g\" >&5\nprintf %s \"checking whether $CXX accepts -g... \" >&6; }\nif test ${ac_cv_prog_cxx_g+y}\nthen :\n  printf %s \"(cached) \" >&6\nelse $as_nop\n  ac_save_cxx_werror_flag=$ac_cxx_werror_flag\n   ac_cxx_werror_flag=yes\n   ac_cv_prog_cxx_g=no\n   CXXFLAGS=\"-g\"\n   cat confdefs.h - <<_ACEOF >conftest.$ac_ext\n/* end confdefs.h.  */\n\nint\nmain (void)\n{\n\n  ;\n  return 0;\n}\n_ACEOF\nif ac_fn_cxx_try_compile \"$LINENO\"\nthen :\n  ac_cv_prog_cxx_g=yes\nelse $as_nop\n  CXXFLAGS=\"\"\n      cat confdefs.h - <<_ACEOF >conftest.$ac_ext\n/* end confdefs.h.  */\n\nint\nmain (void)\n{\n\n  ;\n  return 0;\n}\n_ACEOF\nif ac_fn_cxx_try_compile \"$LINENO\"\nthen :\n\nelse $as_nop\n  ac_cxx_werror_flag=$ac_save_cxx_werror_flag\n\t CXXFLAGS=\"-g\"\n\t cat confdefs.h - <<_ACEOF >conftest.$ac_ext\n/* end confdefs.h.  */\n\nint\nmain (void)\n{\n\n  ;\n  return 0;\n}\n_ACEOF\nif ac_fn_cxx_try_compile \"$LINENO\"\nthen :\n  ac_cv_prog_cxx_g=yes\nfi\nrm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext\nfi\nrm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext\nfi\nrm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext\n   ac_cxx_werror_flag=$ac_save_cxx_werror_flag\nfi\n{ printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_g\" >&5\nprintf \"%s\\n\" \"$ac_cv_prog_cxx_g\" >&6; }\nif test $ac_test_CXXFLAGS; then\n  CXXFLAGS=$ac_save_CXXFLAGS\nelif test $ac_cv_prog_cxx_g = yes; then\n  if test \"$GXX\" = yes; then\n    CXXFLAGS=\"-g -O2\"\n  else\n    CXXFLAGS=\"-g\"\n  fi\nelse\n  if test \"$GXX\" = yes; then\n    CXXFLAGS=\"-O2\"\n  else\n    CXXFLAGS=\n  fi\nfi\nac_prog_cxx_stdcxx=no\nif test x$ac_prog_cxx_stdcxx = xno\nthen :\n  { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++11 features\" >&5\nprintf %s \"checking for $CXX option to enable C++11 features... \" >&6; }\nif test ${ac_cv_prog_cxx_cxx11+y}\nthen :\n  printf %s \"(cached) \" >&6\nelse $as_nop\n  ac_cv_prog_cxx_cxx11=no\nac_save_CXX=$CXX\ncat confdefs.h - <<_ACEOF >conftest.$ac_ext\n/* end confdefs.h.  */\n$ac_cxx_conftest_cxx11_program\n_ACEOF\nfor ac_arg in '' -std=gnu++11 -std=gnu++0x -std=c++11 -std=c++0x -qlanglvl=extended0x -AA\ndo\n  CXX=\"$ac_save_CXX $ac_arg\"\n  if ac_fn_cxx_try_compile \"$LINENO\"\nthen :\n  ac_cv_prog_cxx_cxx11=$ac_arg\nfi\nrm -f core conftest.err conftest.$ac_objext conftest.beam\n  test \"x$ac_cv_prog_cxx_cxx11\" != \"xno\" && break\ndone\nrm -f conftest.$ac_ext\nCXX=$ac_save_CXX\nfi\n\nif test \"x$ac_cv_prog_cxx_cxx11\" = xno\nthen :\n  { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: result: unsupported\" >&5\nprintf \"%s\\n\" \"unsupported\" >&6; }\nelse $as_nop\n  if test \"x$ac_cv_prog_cxx_cxx11\" = x\nthen :\n  { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: result: none needed\" >&5\nprintf \"%s\\n\" \"none needed\" >&6; }\nelse $as_nop\n  { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_cxx11\" >&5\nprintf \"%s\\n\" \"$ac_cv_prog_cxx_cxx11\" >&6; }\n     CXX=\"$CXX $ac_cv_prog_cxx_cxx11\"\nfi\n  ac_cv_prog_cxx_stdcxx=$ac_cv_prog_cxx_cxx11\n  ac_prog_cxx_stdcxx=cxx11\nfi\nfi\nif test x$ac_prog_cxx_stdcxx = xno\nthen :\n  { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++98 features\" >&5\nprintf %s \"checking for $CXX option to enable C++98 features... \" >&6; }\nif test ${ac_cv_prog_cxx_cxx98+y}\nthen :\n  printf %s \"(cached) \" >&6\nelse $as_nop\n  ac_cv_prog_cxx_cxx98=no\nac_save_CXX=$CXX\ncat confdefs.h - <<_ACEOF >conftest.$ac_ext\n/* end confdefs.h.  */\n$ac_cxx_conftest_cxx98_program\n_ACEOF\nfor ac_arg in '' -std=gnu++98 -std=c++98 -qlanglvl=extended -AA\ndo\n  CXX=\"$ac_save_CXX $ac_arg\"\n  if ac_fn_cxx_try_compile \"$LINENO\"\nthen :\n  ac_cv_prog_cxx_cxx98=$ac_arg\nfi\nrm -f core conftest.err conftest.$ac_objext conftest.beam\n  test \"x$ac_cv_prog_cxx_cxx98\" != \"xno\" && break\ndone\nrm -f conftest.$ac_ext\nCXX=$ac_save_CXX\nfi\n\nif test \"x$ac_cv_prog_cxx_cxx98\" = xno\nthen :\n  { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: result: unsupported\" >&5\nprintf \"%s\\n\" \"unsupported\" >&6; }\nelse $as_nop\n  if test \"x$ac_cv_prog_cxx_cxx98\" = x\nthen :\n  { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: result: none needed\" >&5\nprintf \"%s\\n\" \"none needed\" >&6; }\nelse $as_nop\n  { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_cxx98\" >&5\nprintf \"%s\\n\" \"$ac_cv_prog_cxx_cxx98\" >&6; }\n     CXX=\"$CXX $ac_cv_prog_cxx_cxx98\"\nfi\n  ac_cv_prog_cxx_stdcxx=$ac_cv_prog_cxx_cxx98\n  ac_prog_cxx_stdcxx=cxx98\nfi\nfi\n\nac_ext=cpp\nac_cpp='$CXXCPP $CPPFLAGS'\nac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'\nac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'\nac_compiler_gnu=$ac_cv_cxx_compiler_gnu\n\n\n{ printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: checking for backtrace in -lexecinfo\" >&5\nprintf %s \"checking for backtrace in -lexecinfo... \" >&6; }\nif test ${ac_cv_lib_execinfo_backtrace+y}\nthen :\n  printf %s \"(cached) \" >&6\nelse $as_nop\n  ac_check_lib_save_LIBS=$LIBS\nLIBS=\"-lexecinfo  $LIBS\"\ncat confdefs.h - <<_ACEOF >conftest.$ac_ext\n/* end confdefs.h.  */\n\nnamespace conftest {\n  extern \"C\" int backtrace ();\n}\nint\nmain (void)\n{\nreturn conftest::backtrace ();\n  ;\n  return 0;\n}\n_ACEOF\nif ac_fn_cxx_try_link \"$LINENO\"\nthen :\n  ac_cv_lib_execinfo_backtrace=yes\nelse $as_nop\n  ac_cv_lib_execinfo_backtrace=no\nfi\nrm -f core conftest.err conftest.$ac_objext conftest.beam \\\n    conftest$ac_exeext conftest.$ac_ext\nLIBS=$ac_check_lib_save_LIBS\nfi\n{ printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_execinfo_backtrace\" >&5\nprintf \"%s\\n\" \"$ac_cv_lib_execinfo_backtrace\" >&6; }\nif test \"x$ac_cv_lib_execinfo_backtrace\" = xyes\nthen :\n  BACKTRACE_LIB=-lexecinfo\nelse $as_nop\n  BACKTRACE_LIB=''\nfi\n\nif test -z \"${BACKTRACE_LIB}\"\nthen :\n        ac_fn_cxx_check_func \"$LINENO\" \"backtrace\" \"ac_cv_func_backtrace\"\nif test \"x$ac_cv_func_backtrace\" = xyes\nthen :\n\nelse $as_nop\n  DMLC_DEFS=\"$DMLC_DEFS -DDMLC_LOG_STACK_TRACE=0\"\nfi\n\nfi\n\n{ printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: Checking whether fopen64 is available\" >&5\nprintf \"%s\\n\" \"$as_me: Checking whether fopen64 is available\" >&6;}\nac_fn_cxx_check_func \"$LINENO\" \"fopen64\" \"ac_cv_func_fopen64\"\nif test \"x$ac_cv_func_fopen64\" = xyes\nthen :\n\nelse $as_nop\n  DMLC_DEFS=\"$DMLC_DEFS -DDMLC_USE_FOPEN64=0\"\nfi\n\n\n{ printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: Endian detection\" >&5\nprintf \"%s\\n\" \"$as_me: Endian detection\" >&6;}\n\nac_header= ac_cache=\nfor ac_item in $ac_header_cxx_list\ndo\n  if test $ac_cache; then\n    ac_fn_cxx_check_header_compile \"$LINENO\" $ac_header ac_cv_header_$ac_cache \"$ac_includes_default\"\n    if eval test \\\"x\\$ac_cv_header_$ac_cache\\\" = xyes; then\n      printf \"%s\\n\" \"#define $ac_item 1\" >> confdefs.h\n    fi\n    ac_header= ac_cache=\n  elif test $ac_header; then\n    ac_cache=$ac_item\n  else\n    ac_header=$ac_item\n  fi\ndone\n\n\n\n\n\n\n\n\nif test $ac_cv_header_stdlib_h = yes && test $ac_cv_header_string_h = yes\nthen :\n\nprintf \"%s\\n\" \"#define STDC_HEADERS 1\" >>confdefs.h\n\nfi\nif test -z \"${USE_LITTLE_ENDIAN+x}\"\nthen :\n\n  { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: Checking system endianness as USE_LITTLE_ENDIAN is unset\" >&5\nprintf \"%s\\n\" \"$as_me: Checking system endianness as USE_LITTLE_ENDIAN is unset\" >&6;}\n   { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: checking whether byte ordering is bigendian\" >&5\nprintf %s \"checking whether byte ordering is bigendian... \" >&6; }\nif test ${ac_cv_c_bigendian+y}\nthen :\n  printf %s \"(cached) \" >&6\nelse $as_nop\n  ac_cv_c_bigendian=unknown\n    # See if we're dealing with a universal compiler.\n    cat confdefs.h - <<_ACEOF >conftest.$ac_ext\n/* end confdefs.h.  */\n#ifndef __APPLE_CC__\n\t       not a universal capable compiler\n\t     #endif\n\t     typedef int dummy;\n\n_ACEOF\nif ac_fn_cxx_try_compile \"$LINENO\"\nthen :\n\n\t# Check for potential -arch flags.  It is not universal unless\n\t# there are at least two -arch flags with different values.\n\tac_arch=\n\tac_prev=\n\tfor ac_word in $CC $CFLAGS $CPPFLAGS $LDFLAGS; do\n\t if test -n \"$ac_prev\"; then\n\t   case $ac_word in\n\t     i?86 | x86_64 | ppc | ppc64)\n\t       if test -z \"$ac_arch\" || test \"$ac_arch\" = \"$ac_word\"; then\n\t\t ac_arch=$ac_word\n\t       else\n\t\t ac_cv_c_bigendian=universal\n\t\t break\n\t       fi\n\t       ;;\n\t   esac\n\t   ac_prev=\n\t elif test \"x$ac_word\" = \"x-arch\"; then\n\t   ac_prev=arch\n\t fi\n       done\nfi\nrm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext\n    if test $ac_cv_c_bigendian = unknown; then\n      # See if sys/param.h defines the BYTE_ORDER macro.\n      cat confdefs.h - <<_ACEOF >conftest.$ac_ext\n/* end confdefs.h.  */\n#include <sys/types.h>\n\t     #include <sys/param.h>\n\nint\nmain (void)\n{\n#if ! (defined BYTE_ORDER && defined BIG_ENDIAN \\\n\t\t     && defined LITTLE_ENDIAN && BYTE_ORDER && BIG_ENDIAN \\\n\t\t     && LITTLE_ENDIAN)\n\t      bogus endian macros\n\t     #endif\n\n  ;\n  return 0;\n}\n_ACEOF\nif ac_fn_cxx_try_compile \"$LINENO\"\nthen :\n  # It does; now see whether it defined to BIG_ENDIAN or not.\n\t cat confdefs.h - <<_ACEOF >conftest.$ac_ext\n/* end confdefs.h.  */\n#include <sys/types.h>\n\t\t#include <sys/param.h>\n\nint\nmain (void)\n{\n#if BYTE_ORDER != BIG_ENDIAN\n\t\t not big endian\n\t\t#endif\n\n  ;\n  return 0;\n}\n_ACEOF\nif ac_fn_cxx_try_compile \"$LINENO\"\nthen :\n  ac_cv_c_bigendian=yes\nelse $as_nop\n  ac_cv_c_bigendian=no\nfi\nrm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext\nfi\nrm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext\n    fi\n    if test $ac_cv_c_bigendian = unknown; then\n      # See if <limits.h> defines _LITTLE_ENDIAN or _BIG_ENDIAN (e.g., Solaris).\n      cat confdefs.h - <<_ACEOF >conftest.$ac_ext\n/* end confdefs.h.  */\n#include <limits.h>\n\nint\nmain (void)\n{\n#if ! (defined _LITTLE_ENDIAN || defined _BIG_ENDIAN)\n\t      bogus endian macros\n\t     #endif\n\n  ;\n  return 0;\n}\n_ACEOF\nif ac_fn_cxx_try_compile \"$LINENO\"\nthen :\n  # It does; now see whether it defined to _BIG_ENDIAN or not.\n\t cat confdefs.h - <<_ACEOF >conftest.$ac_ext\n/* end confdefs.h.  */\n#include <limits.h>\n\nint\nmain (void)\n{\n#ifndef _BIG_ENDIAN\n\t\t not big endian\n\t\t#endif\n\n  ;\n  return 0;\n}\n_ACEOF\nif ac_fn_cxx_try_compile \"$LINENO\"\nthen :\n  ac_cv_c_bigendian=yes\nelse $as_nop\n  ac_cv_c_bigendian=no\nfi\nrm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext\nfi\nrm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext\n    fi\n    if test $ac_cv_c_bigendian = unknown; then\n      # Compile a test program.\n      if test \"$cross_compiling\" = yes\nthen :\n  # Try to guess by grepping values from an object file.\n\t cat confdefs.h - <<_ACEOF >conftest.$ac_ext\n/* end confdefs.h.  */\nunsigned short int ascii_mm[] =\n\t\t  { 0x4249, 0x4765, 0x6E44, 0x6961, 0x6E53, 0x7953, 0 };\n\t\tunsigned short int ascii_ii[] =\n\t\t  { 0x694C, 0x5454, 0x656C, 0x6E45, 0x6944, 0x6E61, 0 };\n\t\tint use_ascii (int i) {\n\t\t  return ascii_mm[i] + ascii_ii[i];\n\t\t}\n\t\tunsigned short int ebcdic_ii[] =\n\t\t  { 0x89D3, 0xE3E3, 0x8593, 0x95C5, 0x89C4, 0x9581, 0 };\n\t\tunsigned short int ebcdic_mm[] =\n\t\t  { 0xC2C9, 0xC785, 0x95C4, 0x8981, 0x95E2, 0xA8E2, 0 };\n\t\tint use_ebcdic (int i) {\n\t\t  return ebcdic_mm[i] + ebcdic_ii[i];\n\t\t}\n\t\textern int foo;\n\nint\nmain (void)\n{\nreturn use_ascii (foo) == use_ebcdic (foo);\n  ;\n  return 0;\n}\n_ACEOF\nif ac_fn_cxx_try_compile \"$LINENO\"\nthen :\n  if grep BIGenDianSyS conftest.$ac_objext >/dev/null; then\n\t      ac_cv_c_bigendian=yes\n\t    fi\n\t    if grep LiTTleEnDian conftest.$ac_objext >/dev/null ; then\n\t      if test \"$ac_cv_c_bigendian\" = unknown; then\n\t\tac_cv_c_bigendian=no\n\t      else\n\t\t# finding both strings is unlikely to happen, but who knows?\n\t\tac_cv_c_bigendian=unknown\n\t      fi\n\t    fi\nfi\nrm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext\nelse $as_nop\n  cat confdefs.h - <<_ACEOF >conftest.$ac_ext\n/* end confdefs.h.  */\n$ac_includes_default\nint\nmain (void)\n{\n\n\t     /* Are we little or big endian?  From Harbison&Steele.  */\n\t     union\n\t     {\n\t       long int l;\n\t       char c[sizeof (long int)];\n\t     } u;\n\t     u.l = 1;\n\t     return u.c[sizeof (long int) - 1] == 1;\n\n  ;\n  return 0;\n}\n_ACEOF\nif ac_fn_cxx_try_run \"$LINENO\"\nthen :\n  ac_cv_c_bigendian=no\nelse $as_nop\n  ac_cv_c_bigendian=yes\nfi\nrm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \\\n  conftest.$ac_objext conftest.beam conftest.$ac_ext\nfi\n\n    fi\nfi\n{ printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_bigendian\" >&5\nprintf \"%s\\n\" \"$ac_cv_c_bigendian\" >&6; }\n case $ac_cv_c_bigendian in #(\n   yes)\n     { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: result: using big endian\" >&5\nprintf \"%s\\n\" \"using big endian\" >&6; }\n     ENDIAN_FLAG=\"-DDMLC_CMAKE_LITTLE_ENDIAN=0\";; #(\n   no)\n     { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: result: using little endian\" >&5\nprintf \"%s\\n\" \"using little endian\" >&6; }\n     ENDIAN_FLAG=\"-DDMLC_CMAKE_LITTLE_ENDIAN=1\" ;; #(\n   universal)\n\nprintf \"%s\\n\" \"#define AC_APPLE_UNIVERSAL_BUILD 1\" >>confdefs.h\n\n     ;; #(\n   *)\n     { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: result: unknown\" >&5\nprintf \"%s\\n\" \"unknown\" >&6; }\n     as_fn_error $? \"Could not determine endianness. Please set USE_LITTLE_ENDIAN\" \"$LINENO\" 5\n   ;;\n esac\n\n\nelse $as_nop\n\n  { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: Forcing endianness to: ${USE_LITTLE_ENDIAN}\" >&5\nprintf \"%s\\n\" \"$as_me: Forcing endianness to: ${USE_LITTLE_ENDIAN}\" >&6;}\n  ENDIAN_FLAG=\"-DDMLC_CMAKE_LITTLE_ENDIAN=${USE_LITTLE_ENDIAN}\"\n\nfi\n\n{ printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: Checking for prefetch builtin\" >&5\nprintf \"%s\\n\" \"$as_me: Checking for prefetch builtin\" >&6;}\ncat confdefs.h - <<_ACEOF >conftest.$ac_ext\n/* end confdefs.h.  */\n\nint\nmain (void)\n{\n__builtin_prefetch\n\n  ;\n  return 0;\n}\n_ACEOF\nif ac_fn_cxx_try_link \"$LINENO\"\nthen :\n  XGBOOST_BUILTIN_PREFETCH_PRESENT=\"-DXGBOOST_BUILTIN_PREFETCH_PRESENT=1\"\nelse $as_nop\n  XGBOOST_BUILTIN_PREFETCH_PRESENT=\"\"\n\nfi\nrm -f core conftest.err conftest.$ac_objext conftest.beam \\\n    conftest$ac_exeext conftest.$ac_ext\nif [ \"$XGBOOST_BUILTIN_PREFETCH_PRESENT\" = \"\" ]; then\n  echo \"Has __builtin_prefetch\"\nelse\n  echo \"Doesn't have __builtin_prefetch\"\nfi\n\n{ printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: Checking for mm_prefetch\" >&5\nprintf \"%s\\n\" \"$as_me: Checking for mm_prefetch\" >&6;}\ncat confdefs.h - <<_ACEOF >conftest.$ac_ext\n/* end confdefs.h.  */\n#include <xmmintrin.h>\nint\nmain (void)\n{\n_mm_prefetch\n\n  ;\n  return 0;\n}\n_ACEOF\nif ac_fn_cxx_try_link \"$LINENO\"\nthen :\n  XGBOOST_MM_PREFETCH_PRESENT=\"-DXGBOOST_MM_PREFETCH_PRESENT=1\"\nelse $as_nop\n  XGBOOST_MM_PREFETCH_PRESENT=\"\"\n\nfi\nrm -f core conftest.err conftest.$ac_objext conftest.beam \\\n    conftest$ac_exeext conftest.$ac_ext\nif [ \"$XGBOOST_MM_PREFETCH_PRESENT\" = \"\" ]; then\n  echo \"Has _mm_prefetch\"\nelse\n  echo \"Doesn't have _mm_prefetch\"\nfi\n\nOPENMP_CXXFLAGS=\"\"\n\nif test `uname -s` = \"Linux\"\nthen\n  OPENMP_CXXFLAGS=\"\\$(SHLIB_OPENMP_CXXFLAGS)\"\nfi\n\nif test `uname -s` = \"Darwin\"\nthen\n  if command -v brew &> /dev/null\n  then\n    HOMEBREW_LIBOMP_PREFIX=`brew --prefix libomp`\n  else\n    # Homebrew not found\n    HOMEBREW_LIBOMP_PREFIX=''\n  fi\n  OPENMP_CXXFLAGS=\"-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include\"\n  OPENMP_LIB=\"-lomp -L${HOMEBREW_LIBOMP_PREFIX}/lib\"\n  ac_pkg_openmp=no\n  { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: checking whether OpenMP will work in a package\" >&5\nprintf %s \"checking whether OpenMP will work in a package... \" >&6; }\n  cat confdefs.h - <<_ACEOF >conftest.$ac_ext\n/* end confdefs.h.  */\n#include <omp.h>\nint\nmain (void)\n{\n return (omp_get_max_threads() <= 1);\n  ;\n  return 0;\n}\n_ACEOF\n  ${CXX} -o conftest conftest.cpp ${CPPFLAGS} ${LDFLAGS} ${OPENMP_LIB} ${OPENMP_CXXFLAGS} 2>/dev/null && ./conftest && ac_pkg_openmp=yes\n  { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: result: ${ac_pkg_openmp}\" >&5\nprintf \"%s\\n\" \"${ac_pkg_openmp}\" >&6; }\n  if test \"${ac_pkg_openmp}\" = no; then\n    OPENMP_CXXFLAGS=''\n    OPENMP_LIB=''\n    echo '*****************************************************************************************'\n    echo '         OpenMP is unavailable on this Mac OSX system. Training speed may be suboptimal.'\n    echo '         To use all CPU cores for training jobs, you should install OpenMP by running'\n    echo '             brew install libomp'\n    echo '*****************************************************************************************'\n  fi\nfi\n\n\n\n\n\n\n\n\nac_config_files=\"$ac_config_files src/Makevars\"\n\nac_config_headers=\"$ac_config_headers src/config.h\"\n\ncat >confcache <<\\_ACEOF\n# This file is a shell script that caches the results of configure\n# tests run on this system so they can be shared between configure\n# scripts and configure runs, see configure's option --config-cache.\n# It is not useful on other systems.  If it contains results you don't\n# want to keep, you may remove or edit it.\n#\n# config.status only pays attention to the cache file if you give it\n# the --recheck option to rerun configure.\n#\n# `ac_cv_env_foo' variables (set or unset) will be overridden when\n# loading this file, other *unset* `ac_cv_foo' will be assigned the\n# following values.\n\n_ACEOF\n\n# The following way of writing the cache mishandles newlines in values,\n# but we know of no workaround that is simple, portable, and efficient.\n# So, we kill variables containing newlines.\n# Ultrix sh set writes to stderr and can't be redirected directly,\n# and sets the high bit in the cache file unless we assign to the vars.\n(\n  for ac_var in `(set) 2>&1 | sed -n 's/^\\([a-zA-Z_][a-zA-Z0-9_]*\\)=.*/\\1/p'`; do\n    eval ac_val=\\$$ac_var\n    case $ac_val in #(\n    *${as_nl}*)\n      case $ac_var in #(\n      *_cv_*) { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline\" >&5\nprintf \"%s\\n\" \"$as_me: WARNING: cache variable $ac_var contains a newline\" >&2;} ;;\n      esac\n      case $ac_var in #(\n      _ | IFS | as_nl) ;; #(\n      BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #(\n      *) { eval $ac_var=; unset $ac_var;} ;;\n      esac ;;\n    esac\n  done\n\n  (set) 2>&1 |\n    case $as_nl`(ac_space=' '; set) 2>&1` in #(\n    *${as_nl}ac_space=\\ *)\n      # `set' does not quote correctly, so add quotes: double-quote\n      # substitution turns \\\\\\\\ into \\\\, and sed turns \\\\ into \\.\n      sed -n \\\n\t\"s/'/'\\\\\\\\''/g;\n\t  s/^\\\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\\\)=\\\\(.*\\\\)/\\\\1='\\\\2'/p\"\n      ;; #(\n    *)\n      # `set' quotes correctly as required by POSIX, so do not add quotes.\n      sed -n \"/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p\"\n      ;;\n    esac |\n    sort\n) |\n  sed '\n     /^ac_cv_env_/b end\n     t clear\n     :clear\n     s/^\\([^=]*\\)=\\(.*[{}].*\\)$/test ${\\1+y} || &/\n     t end\n     s/^\\([^=]*\\)=\\(.*\\)$/\\1=${\\1=\\2}/\n     :end' >>confcache\nif diff \"$cache_file\" confcache >/dev/null 2>&1; then :; else\n  if test -w \"$cache_file\"; then\n    if test \"x$cache_file\" != \"x/dev/null\"; then\n      { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: updating cache $cache_file\" >&5\nprintf \"%s\\n\" \"$as_me: updating cache $cache_file\" >&6;}\n      if test ! -f \"$cache_file\" || test -h \"$cache_file\"; then\n\tcat confcache >\"$cache_file\"\n      else\n        case $cache_file in #(\n        */* | ?:*)\n\t  mv -f confcache \"$cache_file\"$$ &&\n\t  mv -f \"$cache_file\"$$ \"$cache_file\" ;; #(\n        *)\n\t  mv -f confcache \"$cache_file\" ;;\n\tesac\n      fi\n    fi\n  else\n    { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file\" >&5\nprintf \"%s\\n\" \"$as_me: not updating unwritable cache $cache_file\" >&6;}\n  fi\nfi\nrm -f confcache\n\ntest \"x$prefix\" = xNONE && prefix=$ac_default_prefix\n# Let make expand exec_prefix.\ntest \"x$exec_prefix\" = xNONE && exec_prefix='${prefix}'\n\nDEFS=-DHAVE_CONFIG_H\n\nac_libobjs=\nac_ltlibobjs=\nU=\nfor ac_i in : $LIBOBJS; do test \"x$ac_i\" = x: && continue\n  # 1. Remove the extension, and $U if already installed.\n  ac_script='s/\\$U\\././;s/\\.o$//;s/\\.obj$//'\n  ac_i=`printf \"%s\\n\" \"$ac_i\" | sed \"$ac_script\"`\n  # 2. Prepend LIBOBJDIR.  When used with automake>=1.10 LIBOBJDIR\n  #    will be set to the directory where LIBOBJS objects are built.\n  as_fn_append ac_libobjs \" \\${LIBOBJDIR}$ac_i\\$U.$ac_objext\"\n  as_fn_append ac_ltlibobjs \" \\${LIBOBJDIR}$ac_i\"'$U.lo'\ndone\nLIBOBJS=$ac_libobjs\n\nLTLIBOBJS=$ac_ltlibobjs\n\n\n\n\n: \"${CONFIG_STATUS=./config.status}\"\nac_write_fail=0\nac_clean_files_save=$ac_clean_files\nac_clean_files=\"$ac_clean_files $CONFIG_STATUS\"\n{ printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS\" >&5\nprintf \"%s\\n\" \"$as_me: creating $CONFIG_STATUS\" >&6;}\nas_write_fail=0\ncat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1\n#! $SHELL\n# Generated by $as_me.\n# Run this file to recreate the current configuration.\n# Compiler output produced by configure, useful for debugging\n# configure, is in config.log if it exists.\n\ndebug=false\nac_cs_recheck=false\nac_cs_silent=false\n\nSHELL=\\${CONFIG_SHELL-$SHELL}\nexport SHELL\n_ASEOF\ncat >>$CONFIG_STATUS <<\\_ASEOF || as_write_fail=1\n## -------------------- ##\n## M4sh Initialization. ##\n## -------------------- ##\n\n# Be more Bourne compatible\nDUALCASE=1; export DUALCASE # for MKS sh\nas_nop=:\nif test ${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1\nthen :\n  emulate sh\n  NULLCMD=:\n  # Pre-4.2 versions of Zsh do word splitting on ${1+\"$@\"}, which\n  # is contrary to our usage.  Disable this feature.\n  alias -g '${1+\"$@\"}'='\"$@\"'\n  setopt NO_GLOB_SUBST\nelse $as_nop\n  case `(set -o) 2>/dev/null` in #(\n  *posix*) :\n    set -o posix ;; #(\n  *) :\n     ;;\nesac\nfi\n\n\n\n# Reset variables that may have inherited troublesome values from\n# the environment.\n\n# IFS needs to be set, to space, tab, and newline, in precisely that order.\n# (If _AS_PATH_WALK were called with IFS unset, it would have the\n# side effect of setting IFS to empty, thus disabling word splitting.)\n# Quoting is to prevent editors from complaining about space-tab.\nas_nl='\n'\nexport as_nl\nIFS=\" \"\"\t$as_nl\"\n\nPS1='$ '\nPS2='> '\nPS4='+ '\n\n# Ensure predictable behavior from utilities with locale-dependent output.\nLC_ALL=C\nexport LC_ALL\nLANGUAGE=C\nexport LANGUAGE\n\n# We cannot yet rely on \"unset\" to work, but we need these variables\n# to be unset--not just set to an empty or harmless value--now, to\n# avoid bugs in old shells (e.g. pre-3.0 UWIN ksh).  This construct\n# also avoids known problems related to \"unset\" and subshell syntax\n# in other old shells (e.g. bash 2.01 and pdksh 5.2.14).\nfor as_var in BASH_ENV ENV MAIL MAILPATH CDPATH\ndo eval test \\${$as_var+y} \\\n  && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || :\ndone\n\n# Ensure that fds 0, 1, and 2 are open.\nif (exec 3>&0) 2>/dev/null; then :; else exec 0</dev/null; fi\nif (exec 3>&1) 2>/dev/null; then :; else exec 1>/dev/null; fi\nif (exec 3>&2)            ; then :; else exec 2>/dev/null; fi\n\n# The user is always right.\nif ${PATH_SEPARATOR+false} :; then\n  PATH_SEPARATOR=:\n  (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && {\n    (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 ||\n      PATH_SEPARATOR=';'\n  }\nfi\n\n\n# Find who we are.  Look in the path if we contain no directory separator.\nas_myself=\ncase $0 in #((\n  *[\\\\/]* ) as_myself=$0 ;;\n  *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR\nfor as_dir in $PATH\ndo\n  IFS=$as_save_IFS\n  case $as_dir in #(((\n    '') as_dir=./ ;;\n    */) ;;\n    *) as_dir=$as_dir/ ;;\n  esac\n    test -r \"$as_dir$0\" && as_myself=$as_dir$0 && break\n  done\nIFS=$as_save_IFS\n\n     ;;\nesac\n# We did not find ourselves, most probably we were run as `sh COMMAND'\n# in which case we are not to be found in the path.\nif test \"x$as_myself\" = x; then\n  as_myself=$0\nfi\nif test ! -f \"$as_myself\"; then\n  printf \"%s\\n\" \"$as_myself: error: cannot find myself; rerun with an absolute file name\" >&2\n  exit 1\nfi\n\n\n\n# as_fn_error STATUS ERROR [LINENO LOG_FD]\n# ----------------------------------------\n# Output \"`basename $0`: error: ERROR\" to stderr. If LINENO and LOG_FD are\n# provided, also output the error to LOG_FD, referencing LINENO. Then exit the\n# script with STATUS, using 1 if that was 0.\nas_fn_error ()\n{\n  as_status=$1; test $as_status -eq 0 && as_status=1\n  if test \"$4\"; then\n    as_lineno=${as_lineno-\"$3\"} as_lineno_stack=as_lineno_stack=$as_lineno_stack\n    printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: error: $2\" >&$4\n  fi\n  printf \"%s\\n\" \"$as_me: error: $2\" >&2\n  as_fn_exit $as_status\n} # as_fn_error\n\n\n\n# as_fn_set_status STATUS\n# -----------------------\n# Set $? to STATUS, without forking.\nas_fn_set_status ()\n{\n  return $1\n} # as_fn_set_status\n\n# as_fn_exit STATUS\n# -----------------\n# Exit the shell with STATUS, even in a \"trap 0\" or \"set -e\" context.\nas_fn_exit ()\n{\n  set +e\n  as_fn_set_status $1\n  exit $1\n} # as_fn_exit\n\n# as_fn_unset VAR\n# ---------------\n# Portably unset VAR.\nas_fn_unset ()\n{\n  { eval $1=; unset $1;}\n}\nas_unset=as_fn_unset\n\n# as_fn_append VAR VALUE\n# ----------------------\n# Append the text in VALUE to the end of the definition contained in VAR. Take\n# advantage of any shell optimizations that allow amortized linear growth over\n# repeated appends, instead of the typical quadratic growth present in naive\n# implementations.\nif (eval \"as_var=1; as_var+=2; test x\\$as_var = x12\") 2>/dev/null\nthen :\n  eval 'as_fn_append ()\n  {\n    eval $1+=\\$2\n  }'\nelse $as_nop\n  as_fn_append ()\n  {\n    eval $1=\\$$1\\$2\n  }\nfi # as_fn_append\n\n# as_fn_arith ARG...\n# ------------------\n# Perform arithmetic evaluation on the ARGs, and store the result in the\n# global $as_val. Take advantage of shells that can avoid forks. The arguments\n# must be portable across $(()) and expr.\nif (eval \"test \\$(( 1 + 1 )) = 2\") 2>/dev/null\nthen :\n  eval 'as_fn_arith ()\n  {\n    as_val=$(( $* ))\n  }'\nelse $as_nop\n  as_fn_arith ()\n  {\n    as_val=`expr \"$@\" || test $? -eq 1`\n  }\nfi # as_fn_arith\n\n\nif expr a : '\\(a\\)' >/dev/null 2>&1 &&\n   test \"X`expr 00001 : '.*\\(...\\)'`\" = X001; then\n  as_expr=expr\nelse\n  as_expr=false\nfi\n\nif (basename -- /) >/dev/null 2>&1 && test \"X`basename -- / 2>&1`\" = \"X/\"; then\n  as_basename=basename\nelse\n  as_basename=false\nfi\n\nif (as_dir=`dirname -- /` && test \"X$as_dir\" = X/) >/dev/null 2>&1; then\n  as_dirname=dirname\nelse\n  as_dirname=false\nfi\n\nas_me=`$as_basename -- \"$0\" ||\n$as_expr X/\"$0\" : '.*/\\([^/][^/]*\\)/*$' \\| \\\n\t X\"$0\" : 'X\\(//\\)$' \\| \\\n\t X\"$0\" : 'X\\(/\\)' \\| . 2>/dev/null ||\nprintf \"%s\\n\" X/\"$0\" |\n    sed '/^.*\\/\\([^/][^/]*\\)\\/*$/{\n\t    s//\\1/\n\t    q\n\t  }\n\t  /^X\\/\\(\\/\\/\\)$/{\n\t    s//\\1/\n\t    q\n\t  }\n\t  /^X\\/\\(\\/\\).*/{\n\t    s//\\1/\n\t    q\n\t  }\n\t  s/.*/./; q'`\n\n# Avoid depending upon Character Ranges.\nas_cr_letters='abcdefghijklmnopqrstuvwxyz'\nas_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'\nas_cr_Letters=$as_cr_letters$as_cr_LETTERS\nas_cr_digits='0123456789'\nas_cr_alnum=$as_cr_Letters$as_cr_digits\n\n\n# Determine whether it's possible to make 'echo' print without a newline.\n# These variables are no longer used directly by Autoconf, but are AC_SUBSTed\n# for compatibility with existing Makefiles.\nECHO_C= ECHO_N= ECHO_T=\ncase `echo -n x` in #(((((\n-n*)\n  case `echo 'xy\\c'` in\n  *c*) ECHO_T='\t';;\t# ECHO_T is single tab character.\n  xy)  ECHO_C='\\c';;\n  *)   echo `echo ksh88 bug on AIX 6.1` > /dev/null\n       ECHO_T='\t';;\n  esac;;\n*)\n  ECHO_N='-n';;\nesac\n\n# For backward compatibility with old third-party macros, we provide\n# the shell variables $as_echo and $as_echo_n.  New code should use\n# AS_ECHO([\"message\"]) and AS_ECHO_N([\"message\"]), respectively.\nas_echo='printf %s\\n'\nas_echo_n='printf %s'\n\nrm -f conf$$ conf$$.exe conf$$.file\nif test -d conf$$.dir; then\n  rm -f conf$$.dir/conf$$.file\nelse\n  rm -f conf$$.dir\n  mkdir conf$$.dir 2>/dev/null\nfi\nif (echo >conf$$.file) 2>/dev/null; then\n  if ln -s conf$$.file conf$$ 2>/dev/null; then\n    as_ln_s='ln -s'\n    # ... but there are two gotchas:\n    # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.\n    # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.\n    # In both cases, we have to default to `cp -pR'.\n    ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||\n      as_ln_s='cp -pR'\n  elif ln conf$$.file conf$$ 2>/dev/null; then\n    as_ln_s=ln\n  else\n    as_ln_s='cp -pR'\n  fi\nelse\n  as_ln_s='cp -pR'\nfi\nrm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file\nrmdir conf$$.dir 2>/dev/null\n\n\n# as_fn_mkdir_p\n# -------------\n# Create \"$as_dir\" as a directory, including parents if necessary.\nas_fn_mkdir_p ()\n{\n\n  case $as_dir in #(\n  -*) as_dir=./$as_dir;;\n  esac\n  test -d \"$as_dir\" || eval $as_mkdir_p || {\n    as_dirs=\n    while :; do\n      case $as_dir in #(\n      *\\'*) as_qdir=`printf \"%s\\n\" \"$as_dir\" | sed \"s/'/'\\\\\\\\\\\\\\\\''/g\"`;; #'(\n      *) as_qdir=$as_dir;;\n      esac\n      as_dirs=\"'$as_qdir' $as_dirs\"\n      as_dir=`$as_dirname -- \"$as_dir\" ||\n$as_expr X\"$as_dir\" : 'X\\(.*[^/]\\)//*[^/][^/]*/*$' \\| \\\n\t X\"$as_dir\" : 'X\\(//\\)[^/]' \\| \\\n\t X\"$as_dir\" : 'X\\(//\\)$' \\| \\\n\t X\"$as_dir\" : 'X\\(/\\)' \\| . 2>/dev/null ||\nprintf \"%s\\n\" X\"$as_dir\" |\n    sed '/^X\\(.*[^/]\\)\\/\\/*[^/][^/]*\\/*$/{\n\t    s//\\1/\n\t    q\n\t  }\n\t  /^X\\(\\/\\/\\)[^/].*/{\n\t    s//\\1/\n\t    q\n\t  }\n\t  /^X\\(\\/\\/\\)$/{\n\t    s//\\1/\n\t    q\n\t  }\n\t  /^X\\(\\/\\).*/{\n\t    s//\\1/\n\t    q\n\t  }\n\t  s/.*/./; q'`\n      test -d \"$as_dir\" && break\n    done\n    test -z \"$as_dirs\" || eval \"mkdir $as_dirs\"\n  } || test -d \"$as_dir\" || as_fn_error $? \"cannot create directory $as_dir\"\n\n\n} # as_fn_mkdir_p\nif mkdir -p . 2>/dev/null; then\n  as_mkdir_p='mkdir -p \"$as_dir\"'\nelse\n  test -d ./-p && rmdir ./-p\n  as_mkdir_p=false\nfi\n\n\n# as_fn_executable_p FILE\n# -----------------------\n# Test if FILE is an executable regular file.\nas_fn_executable_p ()\n{\n  test -f \"$1\" && test -x \"$1\"\n} # as_fn_executable_p\nas_test_x='test -x'\nas_executable_p=as_fn_executable_p\n\n# Sed expression to map a string onto a valid CPP name.\nas_tr_cpp=\"eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'\"\n\n# Sed expression to map a string onto a valid variable name.\nas_tr_sh=\"eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'\"\n\n\nexec 6>&1\n## ----------------------------------- ##\n## Main body of $CONFIG_STATUS script. ##\n## ----------------------------------- ##\n_ASEOF\ntest $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1\n\ncat >>$CONFIG_STATUS <<\\_ACEOF || ac_write_fail=1\n# Save the log message, to keep $0 and so on meaningful, and to\n# report actual input values of CONFIG_FILES etc. instead of their\n# values after options handling.\nac_log=\"\nThis file was extended by xgboost $as_me 3.3.0, which was\ngenerated by GNU Autoconf 2.71.  Invocation command line was\n\n  CONFIG_FILES    = $CONFIG_FILES\n  CONFIG_HEADERS  = $CONFIG_HEADERS\n  CONFIG_LINKS    = $CONFIG_LINKS\n  CONFIG_COMMANDS = $CONFIG_COMMANDS\n  $ $0 $@\n\non `(hostname || uname -n) 2>/dev/null | sed 1q`\n\"\n\n_ACEOF\n\ncase $ac_config_files in *\"\n\"*) set x $ac_config_files; shift; ac_config_files=$*;;\nesac\n\ncase $ac_config_headers in *\"\n\"*) set x $ac_config_headers; shift; ac_config_headers=$*;;\nesac\n\n\ncat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1\n# Files that config.status was made for.\nconfig_files=\"$ac_config_files\"\nconfig_headers=\"$ac_config_headers\"\n\n_ACEOF\n\ncat >>$CONFIG_STATUS <<\\_ACEOF || ac_write_fail=1\nac_cs_usage=\"\\\n\\`$as_me' instantiates files and other configuration actions\nfrom templates according to the current configuration.  Unless the files\nand actions are specified as TAGs, all are instantiated by default.\n\nUsage: $0 [OPTION]... [TAG]...\n\n  -h, --help       print this help, then exit\n  -V, --version    print version number and configuration settings, then exit\n      --config     print configuration, then exit\n  -q, --quiet, --silent\n                   do not print progress messages\n  -d, --debug      don't remove temporary files\n      --recheck    update $as_me by reconfiguring in the same conditions\n      --file=FILE[:TEMPLATE]\n                   instantiate the configuration file FILE\n      --header=FILE[:TEMPLATE]\n                   instantiate the configuration header FILE\n\nConfiguration files:\n$config_files\n\nConfiguration headers:\n$config_headers\n\nReport bugs to the package provider.\"\n\n_ACEOF\nac_cs_config=`printf \"%s\\n\" \"$ac_configure_args\" | sed \"$ac_safe_unquote\"`\nac_cs_config_escaped=`printf \"%s\\n\" \"$ac_cs_config\" | sed \"s/^ //; s/'/'\\\\\\\\\\\\\\\\''/g\"`\ncat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1\nac_cs_config='$ac_cs_config_escaped'\nac_cs_version=\"\\\\\nxgboost config.status 3.3.0\nconfigured by $0, generated by GNU Autoconf 2.71,\n  with options \\\\\"\\$ac_cs_config\\\\\"\n\nCopyright (C) 2021 Free Software Foundation, Inc.\nThis config.status script is free software; the Free Software Foundation\ngives unlimited permission to copy, distribute and modify it.\"\n\nac_pwd='$ac_pwd'\nsrcdir='$srcdir'\ntest -n \"\\$AWK\" || AWK=awk\n_ACEOF\n\ncat >>$CONFIG_STATUS <<\\_ACEOF || ac_write_fail=1\n# The default lists apply if the user does not specify any file.\nac_need_defaults=:\nwhile test $# != 0\ndo\n  case $1 in\n  --*=?*)\n    ac_option=`expr \"X$1\" : 'X\\([^=]*\\)='`\n    ac_optarg=`expr \"X$1\" : 'X[^=]*=\\(.*\\)'`\n    ac_shift=:\n    ;;\n  --*=)\n    ac_option=`expr \"X$1\" : 'X\\([^=]*\\)='`\n    ac_optarg=\n    ac_shift=:\n    ;;\n  *)\n    ac_option=$1\n    ac_optarg=$2\n    ac_shift=shift\n    ;;\n  esac\n\n  case $ac_option in\n  # Handling of the options.\n  -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)\n    ac_cs_recheck=: ;;\n  --version | --versio | --versi | --vers | --ver | --ve | --v | -V )\n    printf \"%s\\n\" \"$ac_cs_version\"; exit ;;\n  --config | --confi | --conf | --con | --co | --c )\n    printf \"%s\\n\" \"$ac_cs_config\"; exit ;;\n  --debug | --debu | --deb | --de | --d | -d )\n    debug=: ;;\n  --file | --fil | --fi | --f )\n    $ac_shift\n    case $ac_optarg in\n    *\\'*) ac_optarg=`printf \"%s\\n\" \"$ac_optarg\" | sed \"s/'/'\\\\\\\\\\\\\\\\''/g\"` ;;\n    '') as_fn_error $? \"missing file argument\" ;;\n    esac\n    as_fn_append CONFIG_FILES \" '$ac_optarg'\"\n    ac_need_defaults=false;;\n  --header | --heade | --head | --hea )\n    $ac_shift\n    case $ac_optarg in\n    *\\'*) ac_optarg=`printf \"%s\\n\" \"$ac_optarg\" | sed \"s/'/'\\\\\\\\\\\\\\\\''/g\"` ;;\n    esac\n    as_fn_append CONFIG_HEADERS \" '$ac_optarg'\"\n    ac_need_defaults=false;;\n  --he | --h)\n    # Conflict between --help and --header\n    as_fn_error $? \"ambiguous option: \\`$1'\nTry \\`$0 --help' for more information.\";;\n  --help | --hel | -h )\n    printf \"%s\\n\" \"$ac_cs_usage\"; exit ;;\n  -q | -quiet | --quiet | --quie | --qui | --qu | --q \\\n  | -silent | --silent | --silen | --sile | --sil | --si | --s)\n    ac_cs_silent=: ;;\n\n  # This is an error.\n  -*) as_fn_error $? \"unrecognized option: \\`$1'\nTry \\`$0 --help' for more information.\" ;;\n\n  *) as_fn_append ac_config_targets \" $1\"\n     ac_need_defaults=false ;;\n\n  esac\n  shift\ndone\n\nac_configure_extra_args=\n\nif $ac_cs_silent; then\n  exec 6>/dev/null\n  ac_configure_extra_args=\"$ac_configure_extra_args --silent\"\nfi\n\n_ACEOF\ncat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1\nif \\$ac_cs_recheck; then\n  set X $SHELL '$0' $ac_configure_args \\$ac_configure_extra_args --no-create --no-recursion\n  shift\n  \\printf \"%s\\n\" \"running CONFIG_SHELL=$SHELL \\$*\" >&6\n  CONFIG_SHELL='$SHELL'\n  export CONFIG_SHELL\n  exec \"\\$@\"\nfi\n\n_ACEOF\ncat >>$CONFIG_STATUS <<\\_ACEOF || ac_write_fail=1\nexec 5>>config.log\n{\n  echo\n  sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX\n## Running $as_me. ##\n_ASBOX\n  printf \"%s\\n\" \"$ac_log\"\n} >&5\n\n_ACEOF\ncat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1\n_ACEOF\n\ncat >>$CONFIG_STATUS <<\\_ACEOF || ac_write_fail=1\n\n# Handling of arguments.\nfor ac_config_target in $ac_config_targets\ndo\n  case $ac_config_target in\n    \"src/Makevars\") CONFIG_FILES=\"$CONFIG_FILES src/Makevars\" ;;\n    \"src/config.h\") CONFIG_HEADERS=\"$CONFIG_HEADERS src/config.h\" ;;\n\n  *) as_fn_error $? \"invalid argument: \\`$ac_config_target'\" \"$LINENO\" 5;;\n  esac\ndone\n\n\n# If the user did not use the arguments to specify the items to instantiate,\n# then the envvar interface is used.  Set only those that are not.\n# We use the long form for the default assignment because of an extremely\n# bizarre bug on SunOS 4.1.3.\nif $ac_need_defaults; then\n  test ${CONFIG_FILES+y} || CONFIG_FILES=$config_files\n  test ${CONFIG_HEADERS+y} || CONFIG_HEADERS=$config_headers\nfi\n\n# Have a temporary directory for convenience.  Make it in the build tree\n# simply because there is no reason against having it here, and in addition,\n# creating and moving files from /tmp can sometimes cause problems.\n# Hook for its removal unless debugging.\n# Note that there is a small window in which the directory will not be cleaned:\n# after its creation but before its name has been assigned to `$tmp'.\n$debug ||\n{\n  tmp= ac_tmp=\n  trap 'exit_status=$?\n  : \"${ac_tmp:=$tmp}\"\n  { test ! -d \"$ac_tmp\" || rm -fr \"$ac_tmp\"; } && exit $exit_status\n' 0\n  trap 'as_fn_exit 1' 1 2 13 15\n}\n# Create a (secure) tmp directory for tmp files.\n\n{\n  tmp=`(umask 077 && mktemp -d \"./confXXXXXX\") 2>/dev/null` &&\n  test -d \"$tmp\"\n}  ||\n{\n  tmp=./conf$$-$RANDOM\n  (umask 077 && mkdir \"$tmp\")\n} || as_fn_error $? \"cannot create a temporary directory in .\" \"$LINENO\" 5\nac_tmp=$tmp\n\n# Set up the scripts for CONFIG_FILES section.\n# No need to generate them if there are no CONFIG_FILES.\n# This happens for instance with `./config.status config.h'.\nif test -n \"$CONFIG_FILES\"; then\n\n\nac_cr=`echo X | tr X '\\015'`\n# On cygwin, bash can eat \\r inside `` if the user requested igncr.\n# But we know of no other shell where ac_cr would be empty at this\n# point, so we can use a bashism as a fallback.\nif test \"x$ac_cr\" = x; then\n  eval ac_cr=\\$\\'\\\\r\\'\nfi\nac_cs_awk_cr=`$AWK 'BEGIN { print \"a\\rb\" }' </dev/null 2>/dev/null`\nif test \"$ac_cs_awk_cr\" = \"a${ac_cr}b\"; then\n  ac_cs_awk_cr='\\\\r'\nelse\n  ac_cs_awk_cr=$ac_cr\nfi\n\necho 'BEGIN {' >\"$ac_tmp/subs1.awk\" &&\n_ACEOF\n\n\n{\n  echo \"cat >conf$$subs.awk <<_ACEOF\" &&\n  echo \"$ac_subst_vars\" | sed 's/.*/&!$&$ac_delim/' &&\n  echo \"_ACEOF\"\n} >conf$$subs.sh ||\n  as_fn_error $? \"could not make $CONFIG_STATUS\" \"$LINENO\" 5\nac_delim_num=`echo \"$ac_subst_vars\" | grep -c '^'`\nac_delim='%!_!# '\nfor ac_last_try in false false false false false :; do\n  . ./conf$$subs.sh ||\n    as_fn_error $? \"could not make $CONFIG_STATUS\" \"$LINENO\" 5\n\n  ac_delim_n=`sed -n \"s/.*$ac_delim\\$/X/p\" conf$$subs.awk | grep -c X`\n  if test $ac_delim_n = $ac_delim_num; then\n    break\n  elif $ac_last_try; then\n    as_fn_error $? \"could not make $CONFIG_STATUS\" \"$LINENO\" 5\n  else\n    ac_delim=\"$ac_delim!$ac_delim _$ac_delim!! \"\n  fi\ndone\nrm -f conf$$subs.sh\n\ncat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1\ncat >>\"\\$ac_tmp/subs1.awk\" <<\\\\_ACAWK &&\n_ACEOF\nsed -n '\nh\ns/^/S[\"/; s/!.*/\"]=/\np\ng\ns/^[^!]*!//\n:repl\nt repl\ns/'\"$ac_delim\"'$//\nt delim\n:nl\nh\ns/\\(.\\{148\\}\\)..*/\\1/\nt more1\ns/[\"\\\\]/\\\\&/g; s/^/\"/; s/$/\\\\n\"\\\\/\np\nn\nb repl\n:more1\ns/[\"\\\\]/\\\\&/g; s/^/\"/; s/$/\"\\\\/\np\ng\ns/.\\{148\\}//\nt nl\n:delim\nh\ns/\\(.\\{148\\}\\)..*/\\1/\nt more2\ns/[\"\\\\]/\\\\&/g; s/^/\"/; s/$/\"/\np\nb\n:more2\ns/[\"\\\\]/\\\\&/g; s/^/\"/; s/$/\"\\\\/\np\ng\ns/.\\{148\\}//\nt delim\n' <conf$$subs.awk | sed '\n/^[^\"\"]/{\n  N\n  s/\\n//\n}\n' >>$CONFIG_STATUS || ac_write_fail=1\nrm -f conf$$subs.awk\ncat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1\n_ACAWK\ncat >>\"\\$ac_tmp/subs1.awk\" <<_ACAWK &&\n  for (key in S) S_is_set[key] = 1\n  FS = \"\u0007\"\n\n}\n{\n  line = $ 0\n  nfields = split(line, field, \"@\")\n  substed = 0\n  len = length(field[1])\n  for (i = 2; i < nfields; i++) {\n    key = field[i]\n    keylen = length(key)\n    if (S_is_set[key]) {\n      value = S[key]\n      line = substr(line, 1, len) \"\" value \"\" substr(line, len + keylen + 3)\n      len += length(value) + length(field[++i])\n      substed = 1\n    } else\n      len += 1 + keylen\n  }\n\n  print line\n}\n\n_ACAWK\n_ACEOF\ncat >>$CONFIG_STATUS <<\\_ACEOF || ac_write_fail=1\nif sed \"s/$ac_cr//\" < /dev/null > /dev/null 2>&1; then\n  sed \"s/$ac_cr\\$//; s/$ac_cr/$ac_cs_awk_cr/g\"\nelse\n  cat\nfi < \"$ac_tmp/subs1.awk\" > \"$ac_tmp/subs.awk\" \\\n  || as_fn_error $? \"could not setup config files machinery\" \"$LINENO\" 5\n_ACEOF\n\n# VPATH may cause trouble with some makes, so we remove sole $(srcdir),\n# ${srcdir} and @srcdir@ entries from VPATH if srcdir is \".\", strip leading and\n# trailing colons and then remove the whole line if VPATH becomes empty\n# (actually we leave an empty line to preserve line numbers).\nif test \"x$srcdir\" = x.; then\n  ac_vpsub='/^[\t ]*VPATH[\t ]*=[\t ]*/{\nh\ns///\ns/^/:/\ns/[\t ]*$/:/\ns/:\\$(srcdir):/:/g\ns/:\\${srcdir}:/:/g\ns/:@srcdir@:/:/g\ns/^:*//\ns/:*$//\nx\ns/\\(=[\t ]*\\).*/\\1/\nG\ns/\\n//\ns/^[^=]*=[\t ]*$//\n}'\nfi\n\ncat >>$CONFIG_STATUS <<\\_ACEOF || ac_write_fail=1\nfi # test -n \"$CONFIG_FILES\"\n\n# Set up the scripts for CONFIG_HEADERS section.\n# No need to generate them if there are no CONFIG_HEADERS.\n# This happens for instance with `./config.status Makefile'.\nif test -n \"$CONFIG_HEADERS\"; then\ncat >\"$ac_tmp/defines.awk\" <<\\_ACAWK ||\nBEGIN {\n_ACEOF\n\n# Transform confdefs.h into an awk script `defines.awk', embedded as\n# here-document in config.status, that substitutes the proper values into\n# config.h.in to produce config.h.\n\n# Create a delimiter string that does not exist in confdefs.h, to ease\n# handling of long lines.\nac_delim='%!_!# '\nfor ac_last_try in false false :; do\n  ac_tt=`sed -n \"/$ac_delim/p\" confdefs.h`\n  if test -z \"$ac_tt\"; then\n    break\n  elif $ac_last_try; then\n    as_fn_error $? \"could not make $CONFIG_HEADERS\" \"$LINENO\" 5\n  else\n    ac_delim=\"$ac_delim!$ac_delim _$ac_delim!! \"\n  fi\ndone\n\n# For the awk script, D is an array of macro values keyed by name,\n# likewise P contains macro parameters if any.  Preserve backslash\n# newline sequences.\n\nac_word_re=[_$as_cr_Letters][_$as_cr_alnum]*\nsed -n '\ns/.\\{148\\}/&'\"$ac_delim\"'/g\nt rset\n:rset\ns/^[\t ]*#[\t ]*define[\t ][\t ]*/ /\nt def\nd\n:def\ns/\\\\$//\nt bsnl\ns/[\"\\\\]/\\\\&/g\ns/^ \\('\"$ac_word_re\"'\\)\\(([^()]*)\\)[\t ]*\\(.*\\)/P[\"\\1\"]=\"\\2\"\\\nD[\"\\1\"]=\" \\3\"/p\ns/^ \\('\"$ac_word_re\"'\\)[\t ]*\\(.*\\)/D[\"\\1\"]=\" \\2\"/p\nd\n:bsnl\ns/[\"\\\\]/\\\\&/g\ns/^ \\('\"$ac_word_re\"'\\)\\(([^()]*)\\)[\t ]*\\(.*\\)/P[\"\\1\"]=\"\\2\"\\\nD[\"\\1\"]=\" \\3\\\\\\\\\\\\n\"\\\\/p\nt cont\ns/^ \\('\"$ac_word_re\"'\\)[\t ]*\\(.*\\)/D[\"\\1\"]=\" \\2\\\\\\\\\\\\n\"\\\\/p\nt cont\nd\n:cont\nn\ns/.\\{148\\}/&'\"$ac_delim\"'/g\nt clear\n:clear\ns/\\\\$//\nt bsnlc\ns/[\"\\\\]/\\\\&/g; s/^/\"/; s/$/\"/p\nd\n:bsnlc\ns/[\"\\\\]/\\\\&/g; s/^/\"/; s/$/\\\\\\\\\\\\n\"\\\\/p\nb cont\n' <confdefs.h | sed '\ns/'\"$ac_delim\"'/\"\\\\\\\n\"/g' >>$CONFIG_STATUS || ac_write_fail=1\n\ncat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1\n  for (key in D) D_is_set[key] = 1\n  FS = \"\u0007\"\n}\n/^[\\t ]*#[\\t ]*(define|undef)[\\t ]+$ac_word_re([\\t (]|\\$)/ {\n  line = \\$ 0\n  split(line, arg, \" \")\n  if (arg[1] == \"#\") {\n    defundef = arg[2]\n    mac1 = arg[3]\n  } else {\n    defundef = substr(arg[1], 2)\n    mac1 = arg[2]\n  }\n  split(mac1, mac2, \"(\") #)\n  macro = mac2[1]\n  prefix = substr(line, 1, index(line, defundef) - 1)\n  if (D_is_set[macro]) {\n    # Preserve the white space surrounding the \"#\".\n    print prefix \"define\", macro P[macro] D[macro]\n    next\n  } else {\n    # Replace #undef with comments.  This is necessary, for example,\n    # in the case of _POSIX_SOURCE, which is predefined and required\n    # on some systems where configure will not decide to define it.\n    if (defundef == \"undef\") {\n      print \"/*\", prefix defundef, macro, \"*/\"\n      next\n    }\n  }\n}\n{ print }\n_ACAWK\n_ACEOF\ncat >>$CONFIG_STATUS <<\\_ACEOF || ac_write_fail=1\n  as_fn_error $? \"could not setup config headers machinery\" \"$LINENO\" 5\nfi # test -n \"$CONFIG_HEADERS\"\n\n\neval set X \"  :F $CONFIG_FILES  :H $CONFIG_HEADERS    \"\nshift\nfor ac_tag\ndo\n  case $ac_tag in\n  :[FHLC]) ac_mode=$ac_tag; continue;;\n  esac\n  case $ac_mode$ac_tag in\n  :[FHL]*:*);;\n  :L* | :C*:*) as_fn_error $? \"invalid tag \\`$ac_tag'\" \"$LINENO\" 5;;\n  :[FH]-) ac_tag=-:-;;\n  :[FH]*) ac_tag=$ac_tag:$ac_tag.in;;\n  esac\n  ac_save_IFS=$IFS\n  IFS=:\n  set x $ac_tag\n  IFS=$ac_save_IFS\n  shift\n  ac_file=$1\n  shift\n\n  case $ac_mode in\n  :L) ac_source=$1;;\n  :[FH])\n    ac_file_inputs=\n    for ac_f\n    do\n      case $ac_f in\n      -) ac_f=\"$ac_tmp/stdin\";;\n      *) # Look for the file first in the build tree, then in the source tree\n\t # (if the path is not absolute).  The absolute path cannot be DOS-style,\n\t # because $ac_f cannot contain `:'.\n\t test -f \"$ac_f\" ||\n\t   case $ac_f in\n\t   [\\\\/$]*) false;;\n\t   *) test -f \"$srcdir/$ac_f\" && ac_f=\"$srcdir/$ac_f\";;\n\t   esac ||\n\t   as_fn_error 1 \"cannot find input file: \\`$ac_f'\" \"$LINENO\" 5;;\n      esac\n      case $ac_f in *\\'*) ac_f=`printf \"%s\\n\" \"$ac_f\" | sed \"s/'/'\\\\\\\\\\\\\\\\''/g\"`;; esac\n      as_fn_append ac_file_inputs \" '$ac_f'\"\n    done\n\n    # Let's still pretend it is `configure' which instantiates (i.e., don't\n    # use $as_me), people would be surprised to read:\n    #    /* config.h.  Generated by config.status.  */\n    configure_input='Generated from '`\n\t  printf \"%s\\n\" \"$*\" | sed 's|^[^:]*/||;s|:[^:]*/|, |g'\n\t`' by configure.'\n    if test x\"$ac_file\" != x-; then\n      configure_input=\"$ac_file.  $configure_input\"\n      { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: creating $ac_file\" >&5\nprintf \"%s\\n\" \"$as_me: creating $ac_file\" >&6;}\n    fi\n    # Neutralize special characters interpreted by sed in replacement strings.\n    case $configure_input in #(\n    *\\&* | *\\|* | *\\\\* )\n       ac_sed_conf_input=`printf \"%s\\n\" \"$configure_input\" |\n       sed 's/[\\\\\\\\&|]/\\\\\\\\&/g'`;; #(\n    *) ac_sed_conf_input=$configure_input;;\n    esac\n\n    case $ac_tag in\n    *:-:* | *:-) cat >\"$ac_tmp/stdin\" \\\n      || as_fn_error $? \"could not create $ac_file\" \"$LINENO\" 5 ;;\n    esac\n    ;;\n  esac\n\n  ac_dir=`$as_dirname -- \"$ac_file\" ||\n$as_expr X\"$ac_file\" : 'X\\(.*[^/]\\)//*[^/][^/]*/*$' \\| \\\n\t X\"$ac_file\" : 'X\\(//\\)[^/]' \\| \\\n\t X\"$ac_file\" : 'X\\(//\\)$' \\| \\\n\t X\"$ac_file\" : 'X\\(/\\)' \\| . 2>/dev/null ||\nprintf \"%s\\n\" X\"$ac_file\" |\n    sed '/^X\\(.*[^/]\\)\\/\\/*[^/][^/]*\\/*$/{\n\t    s//\\1/\n\t    q\n\t  }\n\t  /^X\\(\\/\\/\\)[^/].*/{\n\t    s//\\1/\n\t    q\n\t  }\n\t  /^X\\(\\/\\/\\)$/{\n\t    s//\\1/\n\t    q\n\t  }\n\t  /^X\\(\\/\\).*/{\n\t    s//\\1/\n\t    q\n\t  }\n\t  s/.*/./; q'`\n  as_dir=\"$ac_dir\"; as_fn_mkdir_p\n  ac_builddir=.\n\ncase \"$ac_dir\" in\n.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;\n*)\n  ac_dir_suffix=/`printf \"%s\\n\" \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`\n  # A \"..\" for each directory in $ac_dir_suffix.\n  ac_top_builddir_sub=`printf \"%s\\n\" \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`\n  case $ac_top_builddir_sub in\n  \"\") ac_top_builddir_sub=. ac_top_build_prefix= ;;\n  *)  ac_top_build_prefix=$ac_top_builddir_sub/ ;;\n  esac ;;\nesac\nac_abs_top_builddir=$ac_pwd\nac_abs_builddir=$ac_pwd$ac_dir_suffix\n# for backward compatibility:\nac_top_builddir=$ac_top_build_prefix\n\ncase $srcdir in\n  .)  # We are building in place.\n    ac_srcdir=.\n    ac_top_srcdir=$ac_top_builddir_sub\n    ac_abs_top_srcdir=$ac_pwd ;;\n  [\\\\/]* | ?:[\\\\/]* )  # Absolute name.\n    ac_srcdir=$srcdir$ac_dir_suffix;\n    ac_top_srcdir=$srcdir\n    ac_abs_top_srcdir=$srcdir ;;\n  *) # Relative name.\n    ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix\n    ac_top_srcdir=$ac_top_build_prefix$srcdir\n    ac_abs_top_srcdir=$ac_pwd/$srcdir ;;\nesac\nac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix\n\n\n  case $ac_mode in\n  :F)\n  #\n  # CONFIG_FILE\n  #\n\n_ACEOF\n\ncat >>$CONFIG_STATUS <<\\_ACEOF || ac_write_fail=1\n# If the template does not know about datarootdir, expand it.\n# FIXME: This hack should be removed a few years after 2.60.\nac_datarootdir_hack=; ac_datarootdir_seen=\nac_sed_dataroot='\n/datarootdir/ {\n  p\n  q\n}\n/@datadir@/p\n/@docdir@/p\n/@infodir@/p\n/@localedir@/p\n/@mandir@/p'\ncase `eval \"sed -n \\\"\\$ac_sed_dataroot\\\" $ac_file_inputs\"` in\n*datarootdir*) ac_datarootdir_seen=yes;;\n*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*)\n  { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting\" >&5\nprintf \"%s\\n\" \"$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting\" >&2;}\n_ACEOF\ncat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1\n  ac_datarootdir_hack='\n  s&@datadir@&$datadir&g\n  s&@docdir@&$docdir&g\n  s&@infodir@&$infodir&g\n  s&@localedir@&$localedir&g\n  s&@mandir@&$mandir&g\n  s&\\\\\\${datarootdir}&$datarootdir&g' ;;\nesac\n_ACEOF\n\n# Neutralize VPATH when `$srcdir' = `.'.\n# Shell code in configure.ac might set extrasub.\n# FIXME: do we really want to maintain this feature?\ncat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1\nac_sed_extra=\"$ac_vpsub\n$extrasub\n_ACEOF\ncat >>$CONFIG_STATUS <<\\_ACEOF || ac_write_fail=1\n:t\n/@[a-zA-Z_][a-zA-Z_0-9]*@/!b\ns|@configure_input@|$ac_sed_conf_input|;t t\ns&@top_builddir@&$ac_top_builddir_sub&;t t\ns&@top_build_prefix@&$ac_top_build_prefix&;t t\ns&@srcdir@&$ac_srcdir&;t t\ns&@abs_srcdir@&$ac_abs_srcdir&;t t\ns&@top_srcdir@&$ac_top_srcdir&;t t\ns&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t\ns&@builddir@&$ac_builddir&;t t\ns&@abs_builddir@&$ac_abs_builddir&;t t\ns&@abs_top_builddir@&$ac_abs_top_builddir&;t t\n$ac_datarootdir_hack\n\"\neval sed \\\"\\$ac_sed_extra\\\" \"$ac_file_inputs\" | $AWK -f \"$ac_tmp/subs.awk\" \\\n  >$ac_tmp/out || as_fn_error $? \"could not create $ac_file\" \"$LINENO\" 5\n\ntest -z \"$ac_datarootdir_hack$ac_datarootdir_seen\" &&\n  { ac_out=`sed -n '/\\${datarootdir}/p' \"$ac_tmp/out\"`; test -n \"$ac_out\"; } &&\n  { ac_out=`sed -n '/^[\t ]*datarootdir[\t ]*:*=/p' \\\n      \"$ac_tmp/out\"`; test -z \"$ac_out\"; } &&\n  { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \\`datarootdir'\nwhich seems to be undefined.  Please make sure it is defined\" >&5\nprintf \"%s\\n\" \"$as_me: WARNING: $ac_file contains a reference to the variable \\`datarootdir'\nwhich seems to be undefined.  Please make sure it is defined\" >&2;}\n\n  rm -f \"$ac_tmp/stdin\"\n  case $ac_file in\n  -) cat \"$ac_tmp/out\" && rm -f \"$ac_tmp/out\";;\n  *) rm -f \"$ac_file\" && mv \"$ac_tmp/out\" \"$ac_file\";;\n  esac \\\n  || as_fn_error $? \"could not create $ac_file\" \"$LINENO\" 5\n ;;\n  :H)\n  #\n  # CONFIG_HEADER\n  #\n  if test x\"$ac_file\" != x-; then\n    {\n      printf \"%s\\n\" \"/* $configure_input  */\" >&1 \\\n      && eval '$AWK -f \"$ac_tmp/defines.awk\"' \"$ac_file_inputs\"\n    } >\"$ac_tmp/config.h\" \\\n      || as_fn_error $? \"could not create $ac_file\" \"$LINENO\" 5\n    if diff \"$ac_file\" \"$ac_tmp/config.h\" >/dev/null 2>&1; then\n      { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: $ac_file is unchanged\" >&5\nprintf \"%s\\n\" \"$as_me: $ac_file is unchanged\" >&6;}\n    else\n      rm -f \"$ac_file\"\n      mv \"$ac_tmp/config.h\" \"$ac_file\" \\\n\t|| as_fn_error $? \"could not create $ac_file\" \"$LINENO\" 5\n    fi\n  else\n    printf \"%s\\n\" \"/* $configure_input  */\" >&1 \\\n      && eval '$AWK -f \"$ac_tmp/defines.awk\"' \"$ac_file_inputs\" \\\n      || as_fn_error $? \"could not create -\" \"$LINENO\" 5\n  fi\n ;;\n\n\n  esac\n\ndone # for ac_tag\n\n\nas_fn_exit 0\n_ACEOF\nac_clean_files=$ac_clean_files_save\n\ntest $ac_write_fail = 0 ||\n  as_fn_error $? \"write failure creating $CONFIG_STATUS\" \"$LINENO\" 5\n\n\n# configure is writing to config.log, and then calls config.status.\n# config.status does its own redirection, appending to config.log.\n# Unfortunately, on DOS this fails, as config.log is still kept open\n# by configure, so config.status won't be able to write to it; its\n# output is simply discarded.  So we exec the FD to /dev/null,\n# effectively closing config.log, so it can be properly (re)opened and\n# appended to by config.status.  When coming back to configure, we\n# need to make the FD available again.\nif test \"$no_create\" != yes; then\n  ac_cs_success=:\n  ac_config_status_args=\n  test \"$silent\" = yes &&\n    ac_config_status_args=\"$ac_config_status_args --quiet\"\n  exec 5>/dev/null\n  $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false\n  exec 5>>config.log\n  # Use ||, not &&, to avoid exiting from the if with $? = 1, which\n  # would make configure fail if this is the last instruction.\n  $ac_cs_success || as_fn_exit 1\nfi\nif test -n \"$ac_unrecognized_opts\" && test \"$enable_option_checking\" != no; then\n  { printf \"%s\\n\" \"$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts\" >&5\nprintf \"%s\\n\" \"$as_me: WARNING: unrecognized options: $ac_unrecognized_opts\" >&2;}\nfi\n"
  },
  {
    "path": "R-package/configure.ac",
    "content": "### configure.ac\t\t\t\t\t-*- Autoconf -*-\n\nAC_PREREQ(2.69)\n\nAC_INIT([xgboost],[3.3.0],[],[xgboost],[])\n\n: ${R_HOME=`R RHOME`}\nif test -z \"${R_HOME}\"; then\n  echo \"could not determine R_HOME\"\n  exit 1\nfi\n\nCXX17=`\"${R_HOME}/bin/R\" CMD config CXX17`\nCXX17STD=`\"${R_HOME}/bin/R\" CMD config CXX17STD`\nCXX=\"${CXX17} ${CXX17STD}\"\nCXXFLAGS=`\"${R_HOME}/bin/R\" CMD config CXXFLAGS`\n\nCC=`\"${R_HOME}/bin/R\" CMD config CC`\nCFLAGS=`\"${R_HOME}/bin/R\" CMD config CFLAGS`\nCPPFLAGS=`\"${R_HOME}/bin/R\" CMD config CPPFLAGS`\n\nLDFLAGS=`\"${R_HOME}/bin/R\" CMD config LDFLAGS`\nAC_LANG(C++)\n\nDMLC_DEFS=\"\"\n\nAC_MSG_NOTICE([Checking if/where backtrace is available])\nAC_CHECK_LIB([execinfo], [backtrace], [BACKTRACE_LIB=-lexecinfo], [BACKTRACE_LIB=''])\nAS_IF([test -z \"${BACKTRACE_LIB}\"],\n      dnl backtrace() might be unavailable (e.g., in musl libc)\n      [AC_CHECK_FUNC(backtrace, [], [DMLC_DEFS=\"$DMLC_DEFS -DDMLC_LOG_STACK_TRACE=0\"])])\n\nAC_MSG_NOTICE([Checking whether fopen64 is available])\nAC_CHECK_FUNC(fopen64, [], [DMLC_DEFS=\"$DMLC_DEFS -DDMLC_USE_FOPEN64=0\"])\n\nAC_MSG_NOTICE([Endian detection])\nAC_ARG_VAR(USE_LITTLE_ENDIAN, \"Whether to build with little endian (checks at compile time if unset)\")\nAS_IF([test -z \"${USE_LITTLE_ENDIAN+x}\"], [\n  AC_MSG_NOTICE([Checking system endianness as USE_LITTLE_ENDIAN is unset])\n  AC_C_BIGENDIAN(\n    [AC_MSG_RESULT([using big endian])\n     ENDIAN_FLAG=\"-DDMLC_CMAKE_LITTLE_ENDIAN=0\"],\n    [AC_MSG_RESULT([using little endian])\n     ENDIAN_FLAG=\"-DDMLC_CMAKE_LITTLE_ENDIAN=1\"],\n    [AC_MSG_RESULT([unknown])\n     AC_MSG_ERROR([Could not determine endianness. Please set USE_LITTLE_ENDIAN])]\n  )\n], [\n  AC_MSG_NOTICE([Forcing endianness to: ${USE_LITTLE_ENDIAN}])\n  ENDIAN_FLAG=\"-DDMLC_CMAKE_LITTLE_ENDIAN=${USE_LITTLE_ENDIAN}\"\n])\n\nAC_MSG_NOTICE([Checking for prefetch builtin])\nAC_LINK_IFELSE(\n  [AC_LANG_PROGRAM(\n    [],\n    [__builtin_prefetch]\n  )],\n  [XGBOOST_BUILTIN_PREFETCH_PRESENT=\"-DXGBOOST_BUILTIN_PREFETCH_PRESENT=1\"],\n  [XGBOOST_BUILTIN_PREFETCH_PRESENT=\"\"]\n)\nif [[ \"$XGBOOST_BUILTIN_PREFETCH_PRESENT\" = \"\" ]]; then\n  echo \"Has __builtin_prefetch\"\nelse\n  echo \"Doesn't have __builtin_prefetch\"\nfi\n\nAC_MSG_NOTICE([Checking for mm_prefetch])\nAC_LINK_IFELSE(\n  [AC_LANG_PROGRAM(\n    [#include <xmmintrin.h>],\n    [_mm_prefetch]\n  )],\n  [XGBOOST_MM_PREFETCH_PRESENT=\"-DXGBOOST_MM_PREFETCH_PRESENT=1\"],\n  [XGBOOST_MM_PREFETCH_PRESENT=\"\"]\n)\nif [[ \"$XGBOOST_MM_PREFETCH_PRESENT\" = \"\" ]]; then\n  echo \"Has _mm_prefetch\"\nelse\n  echo \"Doesn't have _mm_prefetch\"\nfi\n\nOPENMP_CXXFLAGS=\"\"\n\nif test `uname -s` = \"Linux\"\nthen\n  OPENMP_CXXFLAGS=\"\\$(SHLIB_OPENMP_CXXFLAGS)\"\nfi\n\nif test `uname -s` = \"Darwin\"\nthen\n  if command -v brew &> /dev/null\n  then\n    HOMEBREW_LIBOMP_PREFIX=`brew --prefix libomp`\n  else\n    # Homebrew not found\n    HOMEBREW_LIBOMP_PREFIX=''\n  fi\n  OPENMP_CXXFLAGS=\"-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include\"\n  OPENMP_LIB=\"-lomp -L${HOMEBREW_LIBOMP_PREFIX}/lib\"\n  ac_pkg_openmp=no\n  AC_MSG_CHECKING([whether OpenMP will work in a package])\n  AC_LANG_CONFTEST([AC_LANG_PROGRAM([[#include <omp.h>]], [[ return (omp_get_max_threads() <= 1); ]])])\n  ${CXX} -o conftest conftest.cpp ${CPPFLAGS} ${LDFLAGS} ${OPENMP_LIB} ${OPENMP_CXXFLAGS} 2>/dev/null && ./conftest && ac_pkg_openmp=yes\n  AC_MSG_RESULT([${ac_pkg_openmp}])\n  if test \"${ac_pkg_openmp}\" = no; then\n    OPENMP_CXXFLAGS=''\n    OPENMP_LIB=''\n    echo '*****************************************************************************************'\n    echo '         OpenMP is unavailable on this Mac OSX system. Training speed may be suboptimal.'\n    echo '         To use all CPU cores for training jobs, you should install OpenMP by running'\n    echo '             brew install libomp'\n    echo '*****************************************************************************************'\n  fi\nfi\n\nAC_SUBST(OPENMP_CXXFLAGS)\nAC_SUBST(OPENMP_LIB)\nAC_SUBST(ENDIAN_FLAG)\nAC_SUBST(DMLC_DEFS)\nAC_SUBST(BACKTRACE_LIB)\nAC_SUBST(XGBOOST_BUILTIN_PREFETCH_PRESENT)\nAC_SUBST(XGBOOST_MM_PREFETCH_PRESENT)\nAC_CONFIG_FILES([src/Makevars])\nAC_CONFIG_HEADERS([src/config.h])\nAC_OUTPUT\n"
  },
  {
    "path": "R-package/configure.win",
    "content": "R_EXE=\"${R_HOME}/bin${R_ARCH_BIN}/R.exe\"\nCXX=`\"${R_EXE}\" CMD config CXX`\n\ncat > test.cpp <<EOL\n#include <xmmintrin.h>\nint main() {\n  char data = 0;\n  const char* address = &data;\n  _mm_prefetch(address, _MM_HINT_NTA);\n  return 0;\n}\nEOL\n\nXGBOOST_MM_PREFETCH_PRESENT=\"\"\n${CXX} -o test test.cpp 2>/dev/null && ./test && XGBOOST_MM_PREFETCH_PRESENT=\"-DXGBOOST_MM_PREFETCH_PRESENT=1\"\nrm -f ./test\nrm -f ./test.cpp\n\nsed \\\n    -e \"s/@XGBOOST_MM_PREFETCH_PRESENT@/$XGBOOST_MM_PREFETCH_PRESENT/\" \\\n    < src/Makevars.win.in > src/Makevars.win\n"
  },
  {
    "path": "R-package/inst/make-r-def.R",
    "content": "# [description]\n#     Create a definition file (.def) from a .dll file, using objdump. This\n#     is used by FindLibR.cmake when building the R package with MSVC.\n#\n# [usage]\n#\n#     Rscript make-r-def.R something.dll something.def\n#\n# [references]\n#    * https://www.cs.colorado.edu/~main/cs1300/doc/mingwfaq.html\n\nargs <- commandArgs(trailingOnly = TRUE)\n\nIN_DLL_FILE <- args[[1L]]\nOUT_DEF_FILE <- args[[2L]]\nDLL_BASE_NAME <- basename(IN_DLL_FILE)\n\nmessage(sprintf(\"Creating '%s' from '%s'\", OUT_DEF_FILE, IN_DLL_FILE))\n\n# system() will not raise an R exception if the process called\n# fails. Wrapping it here to get that behavior.\n#\n# system() introduces a lot of overhead, at least on Windows,\n# so trying processx if it is available\n.pipe_shell_command_to_stdout <- function(command, args, out_file) {\n    has_processx <- suppressMessages({\n        suppressWarnings({\n            require(\"processx\")  # nolint\n        })\n    })\n    if (has_processx) {\n        p <- processx::process$new(\n            command = command\n            , args = args\n            , stdout = out_file\n            , windows_verbatim_args = FALSE\n        )\n        invisible(p$wait())\n    } else {\n        message(paste0(\n            \"Using system2() to run shell commands. Installing \"\n            , \"'processx' with install.packages('processx') might \"\n            , \"make this faster.\"\n        ))\n        exit_code <- system2(\n            command = command\n            , args = shQuote(args)\n            , stdout = out_file\n        )\n        if (exit_code != 0L) {\n            stop(paste0(\"Command failed with exit code: \", exit_code))\n        }\n    }\n    return(invisible(NULL))\n}\n\n# use objdump to dump all the symbols\nOBJDUMP_FILE <- file.path(tempdir(), \"objdump-out.txt\")\n.pipe_shell_command_to_stdout(\n    command = \"objdump\"\n    , args = c(\"-p\", IN_DLL_FILE)\n    , out_file = OBJDUMP_FILE\n)\n\nobjdump_results <- readLines(OBJDUMP_FILE)\nresult <- file.remove(OBJDUMP_FILE)\n\n# Only one table in the objdump results matters for our purposes,\n# see https://www.cs.colorado.edu/~main/cs1300/doc/mingwfaq.html\nstart_index <- which(\n    grepl(\n        pattern = \"[Ordinal/Name Pointer] Table\"\n        , x = objdump_results\n        , fixed = TRUE\n    )\n)\nempty_lines <- which(objdump_results == \"\")\nend_of_table <- empty_lines[empty_lines > start_index][1L]\n\n# Read the contents of the table\nexported_symbols <- objdump_results[(start_index + 1L):end_of_table]\nexported_symbols <- gsub(\"\\t\", \"\", exported_symbols, fixed = TRUE)\nexported_symbols <- gsub(\".*\\\\] \", \"\", exported_symbols)\nexported_symbols <- gsub(\" \", \"\", exported_symbols, fixed = TRUE)\n\n# Write R.def file\nwriteLines(\n    text = c(\n        paste0(\"LIBRARY \\\"\", DLL_BASE_NAME, \"\\\"\")\n        , \"EXPORTS\"\n        , exported_symbols\n    )\n    , con = OUT_DEF_FILE\n    , sep = \"\\n\"\n)\nmessage(sprintf(\"Successfully created '%s'\", OUT_DEF_FILE))\n"
  },
  {
    "path": "R-package/man/a-compatibility-note-for-saveRDS-save.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils.R\n\\name{a-compatibility-note-for-saveRDS-save}\n\\alias{a-compatibility-note-for-saveRDS-save}\n\\title{Model Serialization and Compatibility}\n\\description{\nWhen it comes to serializing XGBoost models, it's possible to use R serializers such as\n\\code{\\link[=save]{save()}} or \\code{\\link[=saveRDS]{saveRDS()}} to serialize an XGBoost model object, but XGBoost also provides\nits own serializers with better compatibility guarantees, which allow loading\nsaid models in other language bindings of XGBoost.\n\nNote that an \\code{xgb.Booster} object (\\strong{as produced by \\code{\\link[=xgb.train]{xgb.train()}}}, see rest of the doc\nfor objects produced by \\code{\\link[=xgboost]{xgboost()}}), outside of its core components, might also keep:\n\\itemize{\n\\item Additional model configuration (accessible through \\code{\\link[=xgb.config]{xgb.config()}}), which includes\nmodel fitting parameters like \\code{max_depth} and runtime parameters like \\code{nthread}.\nThese are not necessarily useful for prediction/importance/plotting.\n\\item Additional R specific attributes  - e.g. results of callbacks, such as evaluation logs,\nwhich are kept as a \\code{data.table} object, accessible through\n\\code{attributes(model)$evaluation_log} if present.\n}\n\nThe first one (configurations) does not have the same compatibility guarantees as\nthe model itself, including attributes that are set and accessed through\n\\code{\\link[=xgb.attributes]{xgb.attributes()}} - that is, such configuration might be lost after loading the\nbooster in a different XGBoost version, regardless of the serializer that was used.\nThese are saved when using \\code{\\link[=saveRDS]{saveRDS()}}, but will be discarded if loaded into an\nincompatible XGBoost version. They are not saved when using XGBoost's\nserializers from its public interface including \\code{\\link[=xgb.save]{xgb.save()}} and \\code{\\link[=xgb.save.raw]{xgb.save.raw()}}.\n\nThe second ones (R attributes) are not part of the standard XGBoost model structure,\nand thus are not saved when using XGBoost's own serializers. These attributes are\nonly used for informational purposes, such as keeping track of evaluation metrics as\nthe model was fit, or saving the R call that produced the model, but are otherwise\nnot used for prediction / importance / plotting / etc.\nThese R attributes are only preserved when using R's serializers.\n\nIn addition to the regular \\code{xgb.Booster} objects produced by \\code{\\link[=xgb.train]{xgb.train()}}, the\nfunction \\code{\\link[=xgboost]{xgboost()}} produces objects with a different subclass \\code{xgboost} (which\ninherits from \\code{xgb.Booster}), which keeps other additional metadata as R attributes\nsuch as class names in classification problems, and which has a dedicated \\code{predict}\nmethod that uses different defaults and takes different argument names. XGBoost's\nown serializers can work with this \\code{xgboost} class, but as they do not keep R\nattributes, the resulting object, when deserialized, is downcasted to the regular\n\\code{xgb.Booster} class (i.e. it loses the metadata, and the resulting object will use\n\\code{\\link[=predict.xgb.Booster]{predict.xgb.Booster()}} instead of \\code{\\link[=predict.xgboost]{predict.xgboost()}}) - for these \\code{xgboost} objects,\n\\code{saveRDS} might thus be a better option if the extra functionalities are needed.\n\nNote that XGBoost models in R starting from version \\verb{2.1.0} and onwards, and\nXGBoost models before version \\verb{2.1.0}; have a very different R object structure and\nare incompatible with each other. Hence, models that were saved with R serializers\nlike \\code{\\link[=saveRDS]{saveRDS()}} or \\code{\\link[=save]{save()}} before version \\verb{2.1.0} will not work with latter\n\\code{xgboost} versions and vice versa. Be aware that the structure of R model objects\ncould in theory change again in the future, so XGBoost's serializers should be\npreferred for long-term storage.\n\nFurthermore, note that model objects from XGBoost might not be serializable with third-party\nR packages like \\code{qs} or \\code{qs2}.\n}\n\\details{\nUse \\code{\\link[=xgb.save]{xgb.save()}} to save the XGBoost model as a stand-alone file. You may opt into\nthe JSON format by specifying the JSON extension. To read the model back, use\n\\code{\\link[=xgb.load]{xgb.load()}}.\n\nUse \\code{\\link[=xgb.save.raw]{xgb.save.raw()}} to save the XGBoost model as a sequence (vector) of raw bytes\nin a future-proof manner. Future releases of XGBoost will be able to read the raw bytes and\nre-construct the corresponding model. To read the model back, use \\code{\\link[=xgb.load.raw]{xgb.load.raw()}}.\nThe \\code{\\link[=xgb.save.raw]{xgb.save.raw()}} function is useful if you would like to persist the XGBoost model\nas part of another R object.\n\nUse \\code{\\link[=saveRDS]{saveRDS()}} if you require the R-specific attributes that a booster might have, such\nas evaluation logs or the model class \\code{xgboost} instead of \\code{xgb.Booster}, but note that\nfuture compatibility of such objects is outside XGBoost's control as it relies on R's\nserialization format (see e.g. the details section in \\link{serialize} and \\code{\\link[=save]{save()}} from base R).\n\nFor more details and explanation about model persistence and archival, consult the page\n\\url{https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html}.\n}\n\\examples{\n\\dontshow{RhpcBLASctl::omp_set_num_threads(1)}\ndata(agaricus.train, package = \"xgboost\")\n\nbst <- xgb.train(\n  data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label, nthread = 1),\n  nrounds = 2,\n  params = xgb.params(\n    max_depth = 2,\n    nthread = 2,\n    objective = \"binary:logistic\"\n  )\n)\n\n# Save as a stand-alone file; load it with xgb.load()\nfname <- file.path(tempdir(), \"xgb_model.ubj\")\nxgb.save(bst, fname)\nbst2 <- xgb.load(fname)\n\n# Save as a stand-alone file (JSON); load it with xgb.load()\nfname <- file.path(tempdir(), \"xgb_model.json\")\nxgb.save(bst, fname)\nbst2 <- xgb.load(fname)\n\n# Save as a raw byte vector; load it with xgb.load.raw()\nxgb_bytes <- xgb.save.raw(bst)\nbst2 <- xgb.load.raw(xgb_bytes)\n\n# Persist XGBoost model as part of another R object\nobj <- list(xgb_model_bytes = xgb.save.raw(bst), description = \"My first XGBoost model\")\n# Persist the R object. Here, saveRDS() is okay, since it doesn't persist\n# xgb.Booster directly. What's being persisted is the future-proof byte representation\n# as given by xgb.save.raw().\nfname <- file.path(tempdir(), \"my_object.Rds\")\nsaveRDS(obj, fname)\n# Read back the R object\nobj2 <- readRDS(fname)\n# Re-construct xgb.Booster object from the bytes\nbst2 <- xgb.load.raw(obj2$xgb_model_bytes)\n\n}\n"
  },
  {
    "path": "R-package/man/agaricus.test.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgboost.R\n\\docType{data}\n\\name{agaricus.test}\n\\alias{agaricus.test}\n\\title{Test part from Mushroom Data Set}\n\\format{\nA list containing a label vector, and a dgCMatrix object with 1611\nrows and 126 variables\n}\n\\usage{\ndata(agaricus.test)\n}\n\\description{\nThis data set is originally from the Mushroom data set,\nUCI Machine Learning Repository.\n}\n\\details{\nIt includes the following fields:\n\\itemize{\n\\item \\code{label}: The label for each record.\n\\item \\code{data}: A sparse Matrix of 'dgCMatrix' class with 126 columns.\n}\n}\n\\references{\n\\url{https://archive.ics.uci.edu/ml/datasets/Mushroom}\n\nBache, K. & Lichman, M. (2013). UCI Machine Learning Repository\n\\url{http://archive.ics.uci.edu/ml}. Irvine, CA: University of California,\nSchool of Information and Computer Science.\n}\n\\keyword{datasets}\n"
  },
  {
    "path": "R-package/man/agaricus.train.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgboost.R\n\\docType{data}\n\\name{agaricus.train}\n\\alias{agaricus.train}\n\\title{Training part from Mushroom Data Set}\n\\format{\nA list containing a label vector, and a dgCMatrix object with 6513\nrows and 127 variables\n}\n\\usage{\ndata(agaricus.train)\n}\n\\description{\nThis data set is originally from the Mushroom data set,\nUCI Machine Learning Repository.\n}\n\\details{\nIt includes the following fields:\n\\itemize{\n\\item \\code{label}: The label for each record.\n\\item \\code{data}: A sparse Matrix of 'dgCMatrix' class with 126 columns.\n}\n}\n\\references{\n\\url{https://archive.ics.uci.edu/ml/datasets/Mushroom}\n\nBache, K. & Lichman, M. (2013). UCI Machine Learning Repository\n\\url{http://archive.ics.uci.edu/ml}. Irvine, CA: University of California,\nSchool of Information and Computer Science.\n}\n\\keyword{datasets}\n"
  },
  {
    "path": "R-package/man/coef.xgb.Booster.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.Booster.R\n\\name{coef.xgb.Booster}\n\\alias{coef.xgb.Booster}\n\\title{Extract coefficients from linear booster}\n\\usage{\n\\method{coef}{xgb.Booster}(object, ...)\n}\n\\arguments{\n\\item{object}{A fitted booster of 'gblinear' type.}\n\n\\item{...}{Not used.}\n}\n\\value{\nThe extracted coefficients:\n\\itemize{\n\\item If there is only one coefficient per column in the data, will be returned as a\nvector, potentially containing the feature names if available, with the intercept\nas first column.\n\\item If there is more than one coefficient per column in the data (e.g. when using\n\\code{objective=\"multi:softmax\"}), will be returned as a matrix with dimensions equal\nto \\verb{[num_features, num_cols]}, with the intercepts as first row. Note that the column\n(classes in multi-class classification) dimension will not be named.\n}\n\nThe intercept returned here will include the 'base_score' parameter (unlike the 'bias'\nor the last coefficient in the model dump, which doesn't have 'base_score' added to it),\nhence one should get the same values from calling \\code{predict(..., outputmargin = TRUE)} and\nfrom performing a matrix multiplication with \\code{model.matrix(~., ...)}.\n\nBe aware that the coefficients are obtained by first converting them to strings and\nback, so there will always be some very small lose of precision compared to the actual\ncoefficients as used by \\link{predict.xgb.Booster}.\n}\n\\description{\nExtracts the coefficients from a 'gblinear' booster object,\nas produced by \\code{\\link[=xgb.train]{xgb.train()}} when using parameter \\code{booster=\"gblinear\"}.\n\nNote: this function will error out if passing a booster model\nwhich is not of \"gblinear\" type.\n}\n\\examples{\nlibrary(xgboost)\n\ndata(mtcars)\n\ny <- mtcars[, 1]\nx <- as.matrix(mtcars[, -1])\n\ndm <- xgb.DMatrix(data = x, label = y, nthread = 1)\nparams <- xgb.params(booster = \"gblinear\", nthread = 1)\nmodel <- xgb.train(data = dm, params = params, nrounds = 2)\ncoef(model)\n}\n"
  },
  {
    "path": "R-package/man/dim.xgb.DMatrix.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.DMatrix.R\n\\name{dim.xgb.DMatrix}\n\\alias{dim.xgb.DMatrix}\n\\title{Dimensions of xgb.DMatrix}\n\\usage{\n\\method{dim}{xgb.DMatrix}(x)\n}\n\\arguments{\n\\item{x}{Object of class \\code{xgb.DMatrix}}\n}\n\\description{\nReturns a vector of numbers of rows and of columns in an \\code{xgb.DMatrix}.\n}\n\\details{\nNote: since \\code{\\link[=nrow]{nrow()}} and \\code{\\link[=ncol]{ncol()}} internally use \\code{\\link[=dim]{dim()}}, they can also\nbe directly used with an \\code{xgb.DMatrix} object.\n}\n\\examples{\ndata(agaricus.train, package = \"xgboost\")\n\ntrain <- agaricus.train\ndtrain <- xgb.DMatrix(train$data, label = train$label, nthread = 2)\n\nstopifnot(nrow(dtrain) == nrow(train$data))\nstopifnot(ncol(dtrain) == ncol(train$data))\nstopifnot(all(dim(dtrain) == dim(train$data)))\n\n}\n"
  },
  {
    "path": "R-package/man/dimnames.xgb.DMatrix.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.DMatrix.R\n\\name{dimnames.xgb.DMatrix}\n\\alias{dimnames.xgb.DMatrix}\n\\alias{dimnames<-.xgb.DMatrix}\n\\title{Handling of column names of \\code{xgb.DMatrix}}\n\\usage{\n\\method{dimnames}{xgb.DMatrix}(x)\n\n\\method{dimnames}{xgb.DMatrix}(x) <- value\n}\n\\arguments{\n\\item{x}{Object of class \\code{xgb.DMatrix}.}\n\n\\item{value}{A list of two elements: the first one is ignored\nand the second one is column names}\n}\n\\description{\nOnly column names are supported for \\code{xgb.DMatrix}, thus setting of\nrow names would have no effect and returned row names would be \\code{NULL}.\n}\n\\details{\nGeneric \\code{\\link[=dimnames]{dimnames()}} methods are used by \\code{\\link[=colnames]{colnames()}}.\nSince row names are irrelevant, it is recommended to use \\code{\\link[=colnames]{colnames()}} directly.\n}\n\\examples{\ndata(agaricus.train, package = \"xgboost\")\n\ntrain <- agaricus.train\ndtrain <- xgb.DMatrix(train$data, label = train$label, nthread = 2)\ndimnames(dtrain)\ncolnames(dtrain)\ncolnames(dtrain) <- make.names(1:ncol(train$data))\nprint(dtrain, verbose = TRUE)\n\n}\n"
  },
  {
    "path": "R-package/man/getinfo.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.Booster.R, R/xgb.DMatrix.R\n\\name{getinfo.xgb.Booster}\n\\alias{getinfo.xgb.Booster}\n\\alias{setinfo.xgb.Booster}\n\\alias{getinfo}\n\\alias{getinfo.xgb.DMatrix}\n\\alias{setinfo}\n\\alias{setinfo.xgb.DMatrix}\n\\title{Get or set information of xgb.DMatrix and xgb.Booster objects}\n\\usage{\n\\method{getinfo}{xgb.Booster}(object, name)\n\n\\method{setinfo}{xgb.Booster}(object, name, info)\n\ngetinfo(object, name)\n\n\\method{getinfo}{xgb.DMatrix}(object, name)\n\nsetinfo(object, name, info)\n\n\\method{setinfo}{xgb.DMatrix}(object, name, info)\n}\n\\arguments{\n\\item{object}{Object of class \\code{xgb.DMatrix} or \\code{xgb.Booster}.}\n\n\\item{name}{The name of the information field to get (see details).}\n\n\\item{info}{The specific field of information to set.}\n}\n\\value{\nFor \\code{getinfo()}, will return the requested field. For \\code{setinfo()},\nwill always return value \\code{TRUE} if it succeeds.\n}\n\\description{\nGet or set information of xgb.DMatrix and xgb.Booster objects\n}\n\\details{\nThe \\code{name} field can be one of the following for \\code{xgb.DMatrix}:\n\\itemize{\n\\item label\n\\item weight\n\\item base_margin\n\\item label_lower_bound\n\\item label_upper_bound\n\\item group\n\\item feature_type\n\\item feature_name\n\\item nrow\n}\n\nSee the documentation for \\code{\\link[=xgb.DMatrix]{xgb.DMatrix()}} for more information about these fields.\n\nFor \\code{xgb.Booster}, can be one of the following:\n\\itemize{\n\\item \\code{feature_type}\n\\item \\code{feature_name}\n}\n\nNote that, while 'qid' cannot be retrieved, it is possible to get the equivalent 'group'\nfor a DMatrix that had 'qid' assigned.\n\n\\strong{Important}: when calling \\code{\\link[=setinfo]{setinfo()}}, the objects are modified in-place. See\n\\code{\\link[=xgb.copy.Booster]{xgb.copy.Booster()}} for an idea of this in-place assignment works.\n\nSee the documentation for \\code{\\link[=xgb.DMatrix]{xgb.DMatrix()}} for possible fields that can be set\n(which correspond to arguments in that function).\n\nNote that the following fields are allowed in the construction of an \\code{xgb.DMatrix}\nbut \\strong{are not} allowed here:\n\\itemize{\n\\item data\n\\item missing\n\\item silent\n\\item nthread\n}\n}\n\\examples{\ndata(agaricus.train, package = \"xgboost\")\n\ndtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))\n\nlabels <- getinfo(dtrain, \"label\")\nsetinfo(dtrain, \"label\", 1 - labels)\n\nlabels2 <- getinfo(dtrain, \"label\")\nstopifnot(all(labels2 == 1 - labels))\ndata(agaricus.train, package = \"xgboost\")\n\ndtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))\n\nlabels <- getinfo(dtrain, \"label\")\nsetinfo(dtrain, \"label\", 1 - labels)\n\nlabels2 <- getinfo(dtrain, \"label\")\nstopifnot(all.equal(labels2, 1 - labels))\n}\n"
  },
  {
    "path": "R-package/man/predict.xgb.Booster.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.Booster.R\n\\name{predict.xgb.Booster}\n\\alias{predict.xgb.Booster}\n\\title{Predict method for XGBoost model}\n\\usage{\n\\method{predict}{xgb.Booster}(\n  object,\n  newdata,\n  missing = NA,\n  outputmargin = FALSE,\n  predleaf = FALSE,\n  predcontrib = FALSE,\n  approxcontrib = FALSE,\n  predinteraction = FALSE,\n  training = FALSE,\n  iterationrange = NULL,\n  strict_shape = FALSE,\n  avoid_transpose = FALSE,\n  validate_features = FALSE,\n  base_margin = NULL,\n  ...\n)\n}\n\\arguments{\n\\item{object}{Object of class \\code{xgb.Booster}.}\n\n\\item{newdata}{Takes \\code{data.frame}, \\code{matrix}, \\code{dgCMatrix}, \\code{dgRMatrix}, \\code{dsparseVector},\nlocal data file, or \\code{xgb.DMatrix}.\n\nFor single-row predictions on sparse data, it is recommended to use CSR format. If passing\na sparse vector, it will take it as a row vector.\n\nNote that, for repeated predictions on the same data, one might want to create a DMatrix to\npass here instead of passing R types like matrices or data frames, as predictions will be\nfaster on DMatrix.\n\nIf \\code{newdata} is a \\code{data.frame}, be aware that:\n\\itemize{\n\\item Columns will be converted to numeric if they aren't already, which could potentially make\nthe operation slower than in an equivalent \\code{matrix} object.\n\\item The order of the columns must match with that of the data from which the model was fitted\n(i.e. columns will not be referenced by their names, just by their order in the data),\nunless passing \\code{validate_features = TRUE} (which is not the default).\n\\item If the model was fitted to data with categorical columns, these columns must be of\n\\code{factor} type here, and must use the same encoding (i.e. have the same levels).\n\\item If \\code{newdata} contains any \\code{factor} columns, they will be converted to base-0\nencoding (same as during DMatrix creation) - hence, one should not pass a \\code{factor}\nunder a column which during training had a different type.\n\\item Any columns with type other than \\code{factor} will be interpreted as numeric.\n}}\n\n\\item{missing}{Float value that represents missing values in data\n(e.g., 0 or some other extreme value).\n\nThis parameter is not used when \\code{newdata} is an \\code{xgb.DMatrix} - in such cases,\nshould pass this as an argument to the DMatrix constructor instead.}\n\n\\item{outputmargin}{Whether the prediction should be returned in the form of\noriginal untransformed sum of predictions from boosting iterations' results.\nE.g., setting \\code{outputmargin = TRUE} for logistic regression would return log-odds\ninstead of probabilities.}\n\n\\item{predleaf}{Whether to predict per-tree leaf indices.}\n\n\\item{predcontrib}{Whether to return feature contributions to individual predictions (see Details).}\n\n\\item{approxcontrib}{Whether to use a fast approximation for feature contributions (see Details).}\n\n\\item{predinteraction}{Whether to return contributions of feature interactions to individual predictions (see Details).}\n\n\\item{training}{Whether the prediction result is used for training. When enabled,\nXGBoost uses the training prediction path instead of inplace prediction.}\n\n\\item{iterationrange}{Sequence of rounds/iterations from the model to use for prediction, specified by passing\na two-dimensional vector with the start and end numbers in the sequence (same format as R's \\code{seq} - i.e.\nbase-1 indexing, and inclusive of both ends).\n\nFor example, passing \\code{c(1,20)} will predict using the first twenty iterations, while passing \\code{c(1,1)} will\npredict using only the first one.\n\nIf passing \\code{NULL}, will either stop at the best iteration if the model used early stopping, or use all\nof the iterations (rounds) otherwise.\n\nIf passing \"all\", will use all of the rounds regardless of whether the model had early stopping or not.\n\nNot applicable to \\code{gblinear} booster.}\n\n\\item{strict_shape}{Whether to always return an array with the same dimensions for the given prediction mode\nregardless of the model type - meaning that, for example, both a multi-class and a binary classification\nmodel would generate output arrays with the same number of dimensions, with the 'class' dimension having\nsize equal to '1' for the binary model.\n\nIf passing \\code{FALSE} (the default), dimensions will be simplified according to the model type, so that a\nbinary classification model for example would not have a redundant dimension for 'class'.\n\nSee documentation for the return type for the exact shape of the output arrays for each prediction mode.}\n\n\\item{avoid_transpose}{Whether to output the resulting predictions in the same memory layout in which they\nare generated by the core XGBoost library, without transposing them to match the expected output shape.\n\nInternally, XGBoost uses row-major order for the predictions it generates, while R arrays use column-major\norder, hence the result needs to be transposed in order to have the expected shape when represented as\nan R array or matrix, which might be a slow operation.\n\nIf passing \\code{TRUE}, then the result will have dimensions in reverse order - for example, rows\nwill be the last dimensions instead of the first dimension.}\n\n\\item{validate_features}{When \\code{TRUE}, validate that the Booster's and newdata's\nfeature_names match (only applicable when both \\code{object} and \\code{newdata} have feature names).\n\nIf the column names differ and \\code{newdata} is not an \\code{xgb.DMatrix}, will try to reorder\nthe columns in \\code{newdata} to match with the booster's.\n\nIf the booster has feature types and \\code{newdata} is either an \\code{xgb.DMatrix} or\n\\code{data.frame}, will additionally verify that categorical columns are of the\ncorrect type in \\code{newdata}, throwing an error if they do not match.\n\nIf passing \\code{FALSE}, it is assumed that the feature names and types are the same,\nand come in the same order as in the training data.\n\nNote that this check might add some sizable latency to the predictions, so it's\nrecommended to disable it for performance-sensitive applications.}\n\n\\item{base_margin}{Base margin used for boosting from existing model (raw score that gets added to\nall observations independently of the trees in the model).\n\nIf supplied, should be either a vector with length equal to the number of rows in \\code{newdata}\n(for objectives which produces a single score per observation), or a matrix with number of\nrows matching to the number rows in \\code{newdata} and number of columns matching to the number\nof scores estimated by the model (e.g. number of classes for multi-class classification).\n\nNote that, if \\code{newdata} is an \\code{xgb.DMatrix} object, this argument will\nbe ignored as it needs to be added to the DMatrix instead (e.g. by passing it as\nan argument in its constructor, or by calling \\code{\\link[=setinfo.xgb.DMatrix]{setinfo.xgb.DMatrix()}}.}\n\n\\item{...}{Not used.}\n}\n\\value{\nA numeric vector or array, with corresponding dimensions depending on the prediction mode and on\nparameter \\code{strict_shape} as follows:\n\nIf passing \\code{strict_shape=FALSE}:\\itemize{\n\\item For regression or binary classification: a vector of length \\code{nrows}.\n\\item For multi-class and multi-target objectives: a matrix of dimensions \\verb{[nrows, ngroups]}.\n\nNote that objective variant \\code{multi:softmax} defaults towards predicting most likely class (a vector\n\\code{nrows}) instead of per-class probabilities.\n\\item For \\code{predleaf}: a matrix with one column per tree.\n\nFor multi-class / multi-target, they will be arranged so that columns in the output will have\nthe leafs from one group followed by leafs of the other group (e.g. order will be \\code{group1:feat1},\n\\code{group1:feat2}, ..., \\code{group2:feat1}, \\code{group2:feat2}, ...).\n\nIf there is more than one parallel tree (e.g. random forests), the parallel trees will be the\nlast grouping in the resulting order, which will still be 2D.\n\\item For \\code{predcontrib}: when not multi-class / multi-target, a matrix with dimensions\n\\verb{[nrows, nfeats+1]}. The last \"+ 1\" column corresponds to the baseline value.\n\nFor multi-class and multi-target objectives, will be an array with dimensions \\verb{[nrows, ngroups, nfeats+1]}.\n\nThe contribution values are on the scale of untransformed margin (e.g., for binary classification,\nthe values are log-odds deviations from the baseline).\n\\item For \\code{predinteraction}: when not multi-class / multi-target, the output is a 3D array of\ndimensions \\verb{[nrows, nfeats+1, nfeats+1]}. The off-diagonal (in the last two dimensions)\nelements represent different feature interaction contributions. The array is symmetric w.r.t. the last\ntwo dimensions. The \"+ 1\" columns corresponds to the baselines. Summing this array along the last\ndimension should produce practically the same result as \\code{predcontrib = TRUE}.\n\nFor multi-class and multi-target, will be a 4D array with dimensions \\verb{[nrows, ngroups, nfeats+1, nfeats+1]}\n}\n\nIf passing \\code{strict_shape=TRUE}, the result is always a matrix (if 2D) or array (if 3D or higher):\n\\itemize{\n\\item For normal predictions, the dimension is \\verb{[nrows, ngroups]}.\n\\item For \\code{predcontrib=TRUE}, the dimension is \\verb{[nrows, ngroups, nfeats+1]}.\n\\item For \\code{predinteraction=TRUE}, the dimension is \\verb{[nrows, ngroups, nfeats+1, nfeats+1]}.\n\\item For \\code{predleaf=TRUE}, the dimension is \\verb{[nrows, niter, ngroups, num_parallel_tree]}.\n}\n\nIf passing \\code{avoid_transpose=TRUE}, then the dimensions in all cases will be in reverse order - for\nexample, for \\code{predinteraction}, they will be \\verb{[nfeats+1, nfeats+1, ngroups, nrows]}\ninstead of \\verb{[nrows, ngroups, nfeats+1, nfeats+1]}.\n}\n\\description{\nPredict values on data based on XGBoost model.\n}\n\\details{\nNote that \\code{iterationrange} would currently do nothing for predictions from \"gblinear\",\nsince \"gblinear\" doesn't keep its boosting history.\n\nOne possible practical applications of the \\code{predleaf} option is to use the model\nas a generator of new features which capture non-linearity and interactions,\ne.g., as implemented in \\code{\\link[=xgb.create.features]{xgb.create.features()}}.\n\nSetting \\code{predcontrib = TRUE} allows to calculate contributions of each feature to\nindividual predictions. For \"gblinear\" booster, feature contributions are simply linear terms\n(feature_beta * feature_value). For \"gbtree\" booster, feature contributions are SHAP\nvalues (Lundberg 2017) that sum to the difference between the expected output\nof the model and the current prediction (where the hessian weights are used to compute the expectations).\nSetting \\code{approxcontrib = TRUE} approximates these values following the idea explained\nin \\url{http://blog.datadive.net/interpreting-random-forests/}.\n\nWith \\code{predinteraction = TRUE}, SHAP values of contributions of interaction of each pair of features\nare computed. Note that this operation might be rather expensive in terms of compute and memory.\nSince it quadratically depends on the number of features, it is recommended to perform selection\nof the most important features first. See below about the format of the returned results.\n\nThe \\code{predict()} method uses as many threads as defined in \\code{xgb.Booster} object (all by default).\nIf you want to change their number, assign a new number to \\code{nthread} using \\code{\\link[=xgb.model.parameters<-]{xgb.model.parameters<-()}}.\nNote that converting a matrix to \\code{\\link[=xgb.DMatrix]{xgb.DMatrix()}} uses multiple threads too.\n}\n\\examples{\n\\dontshow{RhpcBLASctl::omp_set_num_threads(1)}\n## binary classification:\n\ndata(agaricus.train, package = \"xgboost\")\ndata(agaricus.test, package = \"xgboost\")\n\n## Keep the number of threads to 2 for examples\nnthread <- 2\ndata.table::setDTthreads(nthread)\n\ntrain <- agaricus.train\ntest <- agaricus.test\n\nbst <- xgb.train(\n  data = xgb.DMatrix(train$data, label = train$label, nthread = 1),\n  nrounds = 5,\n  params = xgb.params(\n    max_depth = 2,\n    nthread = nthread,\n    objective = \"binary:logistic\"\n  )\n)\n\n# use all trees by default\npred <- predict(bst, test$data)\n# use only the 1st tree\npred1 <- predict(bst, test$data, iterationrange = c(1, 1))\n\n# Predicting tree leafs:\n# the result is an nsamples X ntrees matrix\npred_leaf <- predict(bst, test$data, predleaf = TRUE)\nstr(pred_leaf)\n\n# Predicting feature contributions to predictions:\n# the result is an nsamples X (nfeatures + 1) matrix\npred_contr <- predict(bst, test$data, predcontrib = TRUE)\nstr(pred_contr)\n# verify that contributions' sums are equal to log-odds of predictions (up to float precision):\nsummary(rowSums(pred_contr) - qlogis(pred))\n# for the 1st record, let's inspect its features that had non-zero contribution to prediction:\ncontr1 <- pred_contr[1,]\ncontr1 <- contr1[-length(contr1)]    # drop intercept\ncontr1 <- contr1[contr1 != 0]        # drop non-contributing features\ncontr1 <- contr1[order(abs(contr1))] # order by contribution magnitude\nold_mar <- par(\"mar\")\npar(mar = old_mar + c(0,7,0,0))\nbarplot(contr1, horiz = TRUE, las = 2, xlab = \"contribution to prediction in log-odds\")\npar(mar = old_mar)\n\n\n## multiclass classification in iris dataset:\n\nlb <- as.numeric(iris$Species) - 1\nnum_class <- 3\n\nset.seed(11)\n\nbst <- xgb.train(\n  data = xgb.DMatrix(as.matrix(iris[, -5], nthread = 1), label = lb),\n  nrounds = 10,\n  params = xgb.params(\n    max_depth = 4,\n    nthread = 2,\n    subsample = 0.5,\n    objective = \"multi:softprob\",\n    num_class = num_class\n  )\n)\n\n# predict for softmax returns num_class probability numbers per case:\npred <- predict(bst, as.matrix(iris[, -5]))\nstr(pred)\n# convert the probabilities to softmax labels\npred_labels <- max.col(pred) - 1\n# the following should result in the same error as seen in the last iteration\nsum(pred_labels != lb) / length(lb)\n\n# compare with predictions from softmax:\nset.seed(11)\n\nbst <- xgb.train(\n  data = xgb.DMatrix(as.matrix(iris[, -5], nthread = 1), label = lb),\n  nrounds = 10,\n  params = xgb.params(\n    max_depth = 4,\n    nthread = 2,\n    subsample = 0.5,\n    objective = \"multi:softmax\",\n    num_class = num_class\n  )\n)\n\npred <- predict(bst, as.matrix(iris[, -5]))\nstr(pred)\nall.equal(pred, pred_labels)\n# prediction from using only 5 iterations should result\n# in the same error as seen in iteration 5:\npred5 <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 5))\nsum(pred5 != lb) / length(lb)\n\n}\n\\references{\n\\enumerate{\n\\item Scott M. Lundberg, Su-In Lee, \"A Unified Approach to Interpreting Model Predictions\",\nNIPS Proceedings 2017, \\url{https://arxiv.org/abs/1705.07874}\n\\item Scott M. Lundberg, Su-In Lee, \"Consistent feature attribution for tree ensembles\",\n\\url{https://arxiv.org/abs/1706.06060}\n}\n}\n\\seealso{\n\\code{\\link[=xgb.train]{xgb.train()}}\n}\n"
  },
  {
    "path": "R-package/man/predict.xgboost.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgboost.R\n\\name{predict.xgboost}\n\\alias{predict.xgboost}\n\\title{Compute predictions from XGBoost model on new data}\n\\usage{\n\\method{predict}{xgboost}(\n  object,\n  newdata,\n  type = \"response\",\n  base_margin = NULL,\n  iteration_range = NULL,\n  validate_features = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{object}{An XGBoost model object of class \\code{xgboost}, as produced by function \\code{\\link[=xgboost]{xgboost()}}.\n\nNote that there is also a lower-level \\code{\\link[=predict.xgb.Booster]{predict.xgb.Booster()}} method for models of class\n\\code{xgb.Booster} as produced by \\code{\\link[=xgb.train]{xgb.train()}}, which can also be used for \\code{xgboost} class models as\nan alternative that performs fewer validations and post-processings.}\n\n\\item{newdata}{Data on which to compute predictions from the model passed in \\code{object}. Supported\ninput classes are:\n\\itemize{\n\\item Data Frames (class \\code{data.frame} from base R and subclasses like \\code{data.table}).\n\\item Matrices (class \\code{matrix} from base R).\n\\item Sparse matrices from package \\code{Matrix}, either as class \\code{dgRMatrix} (CSR) or \\code{dgCMatrix} (CSC).\n\\item Sparse vectors from package \\code{Matrix}, which will be interpreted as containing a single\nobservation.\n}\n\nIn the case of data frames, if there are any categorical features, they should be of class\n\\code{factor} and should have the same levels as the \\code{factor} columns of the data from which the model\nwas constructed. Any columns with type other than \\code{factor} will be interpreted as numeric.\n\nIf there are named columns and the model was fitted to data with named columns, they will be\nmatched by name by default (see \\code{validate_features}).}\n\n\\item{type}{Type of prediction to make. Supported options are:\n\\itemize{\n\\item \\code{\"response\"}: will output model predictions on the scale of the response variable (e.g.\nprobabilities of belonging to the last class in the case of binary classification). Result will\nbe either a numeric vector with length matching to rows in \\code{newdata}, or a numeric matrix with\nshape \\verb{[nrows(newdata), nscores]} (for objectives that produce more than one score per\nobservation such as multi-class classification or multi-quantile regression).\n\\item \\code{\"raw\"}: will output the unprocessed boosting scores (e.g. log-odds in the case of objective\n\\code{binary:logistic}). Same output shape and type as for \\code{\"response\"}.\n\\item \\code{\"class\"}: will output the class with the highest predicted probability, returned as a \\code{factor}\n(only applicable to classification objectives) with length matching to rows in \\code{newdata}.\n\\item \\code{\"leaf\"}: will output the terminal node indices of each observation across each tree, as an\ninteger matrix of shape \\verb{[nrows(newdata), ntrees]}, or as an integer array with an extra one or\ntwo dimensions, up to \\verb{[nrows(newdata), ntrees, nscores, n_parallel_trees]} for models that\nproduce more than one score per tree and/or which have more than one parallel tree (e.g.\nrandom forests).\n\nOnly applicable to tree-based boosters (not \\code{gblinear}).\n\\item \\code{\"contrib\"}: will produce per-feature contribution estimates towards the model score for a\ngiven observation, based on SHAP values. The contribution values are on the scale of\nuntransformed margin (e.g., for binary classification, the values are log-odds deviations from\nthe baseline).\n\nOutput will be a numeric matrix with shape \\verb{[nrows, nfeatures+1]}, with the intercept being the\nlast feature, or a numeric array with shape \\verb{[nrows, nscores, nfeatures+1]} if the model\nproduces more than one score per observation.\n\\item \\code{\"interaction\"}: similar to \\code{\"contrib\"}, but computing SHAP values of contributions of\ninteraction of each pair of features. Note that this operation might be rather expensive in\nterms of compute and memory.\n\nSince it quadratically depends on the number of features, it is recommended to perform\nselection of the most important features first.\n\nOutput will be a numeric array of shape \\verb{[nrows, nfeatures+1, nfeatures+1]}, or shape\n\\verb{[nrows, nscores, nfeatures+1, nfeatures+1]} (for objectives that produce more than one score\nper observation).\n}}\n\n\\item{base_margin}{Base margin used for boosting from existing model (raw score that gets added to\nall observations independently of the trees in the model).\n\nIf supplied, should be either a vector with length equal to the number of rows in \\code{newdata}\n(for objectives which produces a single score per observation), or a matrix with number of\nrows matching to the number rows in \\code{newdata} and number of columns matching to the number\nof scores estimated by the model (e.g. number of classes for multi-class classification).}\n\n\\item{iteration_range}{Sequence of rounds/iterations from the model to use for prediction, specified by passing\na two-dimensional vector with the start and end numbers in the sequence (same format as R's \\code{seq} - i.e.\nbase-1 indexing, and inclusive of both ends).\n\nFor example, passing \\code{c(1,20)} will predict using the first twenty iterations, while passing \\code{c(1,1)} will\npredict using only the first one.\n\nIf passing \\code{NULL}, will either stop at the best iteration if the model used early stopping, or use all\nof the iterations (rounds) otherwise.\n\nIf passing \"all\", will use all of the rounds regardless of whether the model had early stopping or not.\n\nNot applicable to \\code{gblinear} booster.}\n\n\\item{validate_features}{Validate that the feature names in the data match to the feature names\nin the column, and reorder them in the data otherwise.\n\nIf passing \\code{FALSE}, it is assumed that the feature names and types are the same,\nand come in the same order as in the training data.\n\nBe aware that this only applies to column names and not to factor levels in categorical columns.\n\nNote that this check might add some sizable latency to the predictions, so it's\nrecommended to disable it for performance-sensitive applications.}\n\n\\item{...}{Not used.}\n}\n\\value{\nEither a numeric vector (for 1D outputs), numeric matrix (for 2D outputs), numeric array\n(for 3D and higher), or \\code{factor} (for class predictions). See documentation for parameter \\code{type}\nfor details about what the output type and shape will be.\n}\n\\description{\nPredict values on data based on XGBoost model.\n}\n\\examples{\ndata(\"ToothGrowth\")\ny <- ToothGrowth$supp\nx <- ToothGrowth[, -2L]\nmodel <- xgboost(x, y, nthreads = 1L, nrounds = 3L, max_depth = 2L)\npred_prob <- predict(model, x[1:5, ], type = \"response\")\npred_raw <- predict(model, x[1:5, ], type = \"raw\")\npred_class <- predict(model, x[1:5, ], type = \"class\")\n\n# Relationships between these\nmanual_probs <- 1 / (1 + exp(-pred_raw))\nmanual_class <- ifelse(manual_probs < 0.5, levels(y)[1], levels(y)[2])\n\n# They should match up to numerical precision\nround(pred_prob, 6) == round(manual_probs, 6)\npred_class == manual_class\n}\n"
  },
  {
    "path": "R-package/man/print.xgb.Booster.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.Booster.R\n\\name{print.xgb.Booster}\n\\alias{print.xgb.Booster}\n\\title{Print xgb.Booster}\n\\usage{\n\\method{print}{xgb.Booster}(x, ...)\n}\n\\arguments{\n\\item{x}{An \\code{xgb.Booster} object.}\n\n\\item{...}{Not used.}\n}\n\\value{\nThe same \\code{x} object, returned invisibly\n}\n\\description{\nPrint information about \\code{xgb.Booster}.\n}\n\\examples{\ndata(agaricus.train, package = \"xgboost\")\ntrain <- agaricus.train\n\nbst <- xgb.train(\n  data = xgb.DMatrix(train$data, label = train$label, nthread = 1),\n  nrounds = 2,\n  params = xgb.params(\n    max_depth = 2,\n    nthread = 2,\n    objective = \"binary:logistic\"\n  )\n)\n\nattr(bst, \"myattr\") <- \"memo\"\n\nprint(bst)\n}\n"
  },
  {
    "path": "R-package/man/print.xgb.DMatrix.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.DMatrix.R\n\\name{print.xgb.DMatrix}\n\\alias{print.xgb.DMatrix}\n\\title{Print xgb.DMatrix}\n\\usage{\n\\method{print}{xgb.DMatrix}(x, verbose = FALSE, ...)\n}\n\\arguments{\n\\item{x}{An xgb.DMatrix object.}\n\n\\item{verbose}{Whether to print colnames (when present).}\n\n\\item{...}{Not currently used.}\n}\n\\description{\nPrint information about xgb.DMatrix.\nCurrently it displays dimensions and presence of info-fields and colnames.\n}\n\\examples{\ndata(agaricus.train, package = \"xgboost\")\n\ndtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))\ndtrain\n\nprint(dtrain, verbose = TRUE)\n\n}\n"
  },
  {
    "path": "R-package/man/print.xgb.cv.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.cv.R\n\\name{print.xgb.cv.synchronous}\n\\alias{print.xgb.cv.synchronous}\n\\title{Print xgb.cv result}\n\\usage{\n\\method{print}{xgb.cv.synchronous}(x, verbose = FALSE, ...)\n}\n\\arguments{\n\\item{x}{An \\code{xgb.cv.synchronous} object.}\n\n\\item{verbose}{Whether to print detailed data.}\n\n\\item{...}{Passed to \\code{data.table.print()}.}\n}\n\\description{\nPrints formatted results of \\code{\\link[=xgb.cv]{xgb.cv()}}.\n}\n\\details{\nWhen not verbose, it would only print the evaluation results,\nincluding the best iteration (when available).\n}\n\\examples{\ndata(agaricus.train, package = \"xgboost\")\n\ntrain <- agaricus.train\ncv <- xgb.cv(\n  data = xgb.DMatrix(train$data, label = train$label, nthread = 1),\n  nfold = 5,\n  nrounds = 2,\n  params = xgb.params(\n    max_depth = 2,\n    nthread = 2,\n    objective = \"binary:logistic\"\n  )\n)\nprint(cv)\nprint(cv, verbose = TRUE)\n\n}\n"
  },
  {
    "path": "R-package/man/print.xgboost.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgboost.R\n\\name{print.xgboost}\n\\alias{print.xgboost}\n\\title{Print info from XGBoost model}\n\\usage{\n\\method{print}{xgboost}(x, ...)\n}\n\\arguments{\n\\item{x}{An XGBoost model object of class \\code{xgboost}, as produced by function \\code{\\link[=xgboost]{xgboost()}}.}\n\n\\item{...}{Not used.}\n}\n\\value{\nSame object \\code{x}, after printing its info.\n}\n\\description{\nPrints basic properties of an XGBoost model object.\n}\n"
  },
  {
    "path": "R-package/man/variable.names.xgb.Booster.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.Booster.R\n\\name{variable.names.xgb.Booster}\n\\alias{variable.names.xgb.Booster}\n\\title{Get Features Names from Booster}\n\\usage{\n\\method{variable.names}{xgb.Booster}(object, ...)\n}\n\\arguments{\n\\item{object}{An \\code{xgb.Booster} object.}\n\n\\item{...}{Not used.}\n}\n\\description{\nReturns the feature / variable / column names from a fitted\nbooster object, which are set automatically during the call to \\code{\\link[=xgb.train]{xgb.train()}}\nfrom the DMatrix names, or which can be set manually through \\code{\\link[=setinfo]{setinfo()}}.\n\nIf the object doesn't have feature names, will return \\code{NULL}.\n\nIt is equivalent to calling \\code{getinfo(object, \"feature_name\")}.\n}\n"
  },
  {
    "path": "R-package/man/xgb.Callback.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/callbacks.R\n\\name{xgb.Callback}\n\\alias{xgb.Callback}\n\\title{XGBoost Callback Constructor}\n\\usage{\nxgb.Callback(\n  cb_name = \"custom_callback\",\n  env = new.env(),\n  f_before_training = function(env, model, data, evals, begin_iteration, end_iteration)\n    NULL,\n  f_before_iter = function(env, model, data, evals, iteration) NULL,\n  f_after_iter = function(env, model, data, evals, iteration, iter_feval) NULL,\n  f_after_training = function(env, model, data, evals, iteration, final_feval,\n    prev_cb_res) NULL\n)\n}\n\\arguments{\n\\item{cb_name}{Name for the callback.\n\nIf the callback produces some non-NULL result (from executing the function passed under\n\\code{f_after_training}), that result will be added as an R attribute to the resulting booster\n(or as a named element in the result of CV), with the attribute name specified here.\n\nNames of callbacks must be unique - i.e. there cannot be two callbacks with the same name.}\n\n\\item{env}{An environment object that will be passed to the different functions in the callback.\nNote that this environment will not be shared with other callbacks.}\n\n\\item{f_before_training}{A function that will be executed before the training has started.\n\nIf passing \\code{NULL} for this or for the other function inputs, then no function will be executed.\n\nIf passing a function, it will be called with parameters supplied as non-named arguments\nmatching the function signatures that are shown in the default value for each function argument.}\n\n\\item{f_before_iter}{A function that will be executed before each boosting round.\n\nThis function can signal whether the training should be finalized or not, by outputting\na value that evaluates to \\code{TRUE} - i.e. if the output from the function provided here at\na given round is \\code{TRUE}, then training will be stopped before the current iteration happens.\n\nReturn values of \\code{NULL} will be interpreted as \\code{FALSE}.}\n\n\\item{f_after_iter}{A function that will be executed after each boosting round.\n\nThis function can signal whether the training should be finalized or not, by outputting\na value that evaluates to \\code{TRUE} - i.e. if the output from the function provided here at\na given round is \\code{TRUE}, then training will be stopped at that round.\n\nReturn values of \\code{NULL} will be interpreted as \\code{FALSE}.}\n\n\\item{f_after_training}{A function that will be executed after training is finished.\n\nThis function can optionally output something non-NULL, which will become part of the R\nattributes of the booster (assuming one passes \\code{keep_extra_attributes=TRUE} to \\code{\\link[=xgb.train]{xgb.train()}})\nunder the name supplied for parameter \\code{cb_name} imn the case of \\code{\\link[=xgb.train]{xgb.train()}}; or a part\nof the named elements in the result of \\code{\\link[=xgb.cv]{xgb.cv()}}.}\n}\n\\value{\nAn \\code{xgb.Callback} object, which can be passed to \\code{\\link[=xgb.train]{xgb.train()}} or \\code{\\link[=xgb.cv]{xgb.cv()}}.\n}\n\\description{\nConstructor for defining the structure of callback functions that can be executed\nat different stages of model training (before / after training, before / after each boosting\niteration).\n}\n\\details{\nArguments that will be passed to the supplied functions are as follows:\n\\itemize{\n\\item env The same environment that is passed under argument \\code{env}.\n\nIt may be modified by the functions in order to e.g. keep tracking of what happens\nacross iterations or similar.\n\nThis environment is only used by the functions supplied to the callback, and will\nnot be kept after the model fitting function terminates (see parameter \\code{f_after_training}).\n\\item model The booster object when using \\code{\\link[=xgb.train]{xgb.train()}}, or the folds when using \\code{\\link[=xgb.cv]{xgb.cv()}}.\n\nFor \\code{\\link[=xgb.cv]{xgb.cv()}}, folds are a list with a structure as follows:\n\\itemize{\n\\item \\code{dtrain}: The training data for the fold (as an \\code{xgb.DMatrix} object).\n\\item \\code{bst}: Rhe \\code{xgb.Booster} object for the fold.\n\\item \\code{evals}: A list containing two DMatrices, with names \\code{train} and \\code{test}\n(\\code{test} is the held-out data for the fold).\n\\item \\code{index}: The indices of the hold-out data for that fold (base-1 indexing),\nfrom which the \\code{test} entry in \\code{evals} was obtained.\n}\n\nThis object should \\strong{not} be in-place modified in ways that conflict with the\ntraining (e.g. resetting the parameters for a training update in a way that resets\nthe number of rounds to zero in order to overwrite rounds).\n\nNote that any R attributes that are assigned to the booster during the callback functions,\nwill not be kept thereafter as the booster object variable is not re-assigned during\ntraining. It is however possible to set C-level attributes of the booster through\n\\code{\\link[=xgb.attr]{xgb.attr()}} or \\code{\\link[=xgb.attributes]{xgb.attributes()}}, which should remain available for the rest\nof the iterations and after the training is done.\n\nFor keeping variables across iterations, it's recommended to use \\code{env} instead.\n\\item data The data to which the model is being fit, as an \\code{xgb.DMatrix} object.\n\nNote that, for \\code{\\link[=xgb.cv]{xgb.cv()}}, this will be the full data, while data for the specific\nfolds can be found in the \\code{model} object.\n\\item evals The evaluation data, as passed under argument \\code{evals} to \\code{\\link[=xgb.train]{xgb.train()}}.\n\nFor \\code{\\link[=xgb.cv]{xgb.cv()}}, this will always be \\code{NULL}.\n\\item begin_iteration Index of the first boosting iteration that will be executed (base-1 indexing).\n\nThis will typically be '1', but when using training continuation, depending on the\nparameters for updates, boosting rounds will be continued from where the previous\nmodel ended, in which case this will be larger than 1.\n\\item end_iteration Index of the last boostign iteration that will be executed\n(base-1 indexing, inclusive of this end).\n\nIt should match with argument \\code{nrounds} passed to \\code{\\link[=xgb.train]{xgb.train()}} or \\code{\\link[=xgb.cv]{xgb.cv()}}.\n\nNote that boosting might be interrupted before reaching this last iteration, for\nexample by using the early stopping callback \\code{\\link[=xgb.cb.early.stop]{xgb.cb.early.stop()}}.\n\\item iteration Index of the iteration number that is being executed (first iteration\nwill be the same as parameter \\code{begin_iteration}, then next one will add +1, and so on).\n\\item iter_feval Evaluation metrics for \\code{evals} that were supplied, either\ndetermined by the objective, or by parameter \\code{custom_metric}.\n\nFor \\code{\\link[=xgb.train]{xgb.train()}}, this will be a named vector with one entry per element in\n\\code{evals}, where the names are determined as 'evals name' + '-' + 'metric name' - for\nexample, if \\code{evals} contains an entry named \"tr\" and the metric is \"rmse\",\nthis will be a one-element vector with name \"tr-rmse\".\n\nFor \\code{\\link[=xgb.cv]{xgb.cv()}}, this will be a 2d matrix with dimensions \\verb{[length(evals), nfolds]},\nwhere the row names will follow the same naming logic as the one-dimensional vector\nthat is passed in \\code{\\link[=xgb.train]{xgb.train()}}.\n\nNote that, internally, the built-in callbacks such as \\link{xgb.cb.print.evaluation} summarize\nthis table by calculating the row-wise means and standard deviations.\n\\item final_feval The evaluation results after the last boosting round is executed\n(same format as \\code{iter_feval}, and will be the exact same input as passed under\n\\code{iter_feval} to the last round that is executed during model fitting).\n\\item prev_cb_res Result from a previous run of a callback sharing the same name\n(as given by parameter \\code{cb_name}) when conducting training continuation, if there\nwas any in the booster R attributes.\n\nSometimes, one might want to append the new results to the previous one, and this will\nbe done automatically by the built-in callbacks such as \\link{xgb.cb.evaluation.log},\nwhich will append the new rows to the previous table.\n\nIf no such previous callback result is available (which it never will when fitting\na model from start instead of updating an existing model), this will be \\code{NULL}.\n\nFor \\code{\\link[=xgb.cv]{xgb.cv()}}, which doesn't support training continuation, this will always be \\code{NULL}.\n}\n\nThe following names (\\code{cb_name} values) are reserved for internal callbacks:\n\\itemize{\n\\item print_evaluation\n\\item evaluation_log\n\\item reset_parameters\n\\item early_stop\n\\item save_model\n\\item cv_predict\n\\item gblinear_history\n}\n\nThe following names are reserved for other non-callback attributes:\n\\itemize{\n\\item names\n\\item class\n\\item call\n\\item params\n\\item niter\n\\item nfeatures\n\\item folds\n}\n\nWhen using the built-in early stopping callback (\\link{xgb.cb.early.stop}), said callback\nwill always be executed before the others, as it sets some booster C-level attributes\nthat other callbacks might also use. Otherwise, the order of execution will match with\nthe order in which the callbacks are passed to the model fitting function.\n}\n\\examples{\n# Example constructing a custom callback that calculates\n# squared error on the training data (no separate test set),\n# and outputs the per-iteration results.\nssq_callback <- xgb.Callback(\n  cb_name = \"ssq\",\n  f_before_training = function(env, model, data, evals,\n                               begin_iteration, end_iteration) {\n    # A vector to keep track of a number at each iteration\n    env$logs <- rep(NA_real_, end_iteration - begin_iteration + 1)\n  },\n  f_after_iter = function(env, model, data, evals, iteration, iter_feval) {\n    # This calculates the sum of squared errors on the training data.\n    # Note that this can be better done by passing an 'evals' entry,\n    # but this demonstrates a way in which callbacks can be structured.\n    pred <- predict(model, data)\n    err <- pred - getinfo(data, \"label\")\n    sq_err <- sum(err^2)\n    env$logs[iteration] <- sq_err\n    cat(\n      sprintf(\n        \"Squared error at iteration \\%d: \\%.2f\\n\",\n        iteration, sq_err\n      )\n    )\n\n    # A return value of 'TRUE' here would signal to finalize the training\n    return(FALSE)\n  },\n  f_after_training = function(env, model, data, evals, iteration,\n                              final_feval, prev_cb_res) {\n    return(env$logs)\n  }\n)\n\ndata(mtcars)\n\ny <- mtcars$mpg\nx <- as.matrix(mtcars[, -1])\n\ndm <- xgb.DMatrix(x, label = y, nthread = 1)\nmodel <- xgb.train(\n  data = dm,\n  params = xgb.params(objective = \"reg:squarederror\", nthread = 1),\n  nrounds = 5,\n  callbacks = list(ssq_callback)\n)\n\n# Result from 'f_after_iter' will be available as an attribute\nattributes(model)$ssq\n}\n\\seealso{\nBuilt-in callbacks:\n\\itemize{\n\\item \\link{xgb.cb.print.evaluation}\n\\item \\link{xgb.cb.evaluation.log}\n\\item \\link{xgb.cb.reset.parameters}\n\\item \\link{xgb.cb.early.stop}\n\\item \\link{xgb.cb.save.model}\n\\item \\link{xgb.cb.cv.predict}\n\\item \\link{xgb.cb.gblinear.history}\n}\n}\n"
  },
  {
    "path": "R-package/man/xgb.DMatrix.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.DMatrix.R\n\\name{xgb.DMatrix}\n\\alias{xgb.DMatrix}\n\\alias{xgb.QuantileDMatrix}\n\\title{Construct xgb.DMatrix object}\n\\usage{\nxgb.DMatrix(\n  data,\n  label = NULL,\n  weight = NULL,\n  base_margin = NULL,\n  missing = NA,\n  silent = FALSE,\n  feature_names = colnames(data),\n  feature_types = NULL,\n  nthread = NULL,\n  group = NULL,\n  qid = NULL,\n  label_lower_bound = NULL,\n  label_upper_bound = NULL,\n  feature_weights = NULL,\n  data_split_mode = \"row\",\n  ...\n)\n\nxgb.QuantileDMatrix(\n  data,\n  label = NULL,\n  weight = NULL,\n  base_margin = NULL,\n  missing = NA,\n  feature_names = colnames(data),\n  feature_types = NULL,\n  nthread = NULL,\n  group = NULL,\n  qid = NULL,\n  label_lower_bound = NULL,\n  label_upper_bound = NULL,\n  feature_weights = NULL,\n  ref = NULL,\n  max_bin = NULL\n)\n}\n\\arguments{\n\\item{data}{Data from which to create a DMatrix, which can then be used for fitting models or\nfor getting predictions out of a fitted model.\n\nSupported input types are as follows:\n\\itemize{\n\\item \\code{matrix} objects, with types \\code{numeric}, \\code{integer}, or \\code{logical}.\n\\item \\code{data.frame} objects, with columns of types \\code{numeric}, \\code{integer}, \\code{logical}, or \\code{factor}\n}\n\nNote that xgboost uses base-0 encoding for categorical types, hence \\code{factor} types (which use base-1\nencoding') will be converted inside the function call. Be aware that the encoding used for \\code{factor}\ntypes is not kept as part of the model, so in subsequent calls to \\code{predict}, it is the user's\nresponsibility to ensure that factor columns have the same levels as the ones from which the DMatrix\nwas constructed.\n\nOther column types are not supported.\n\\itemize{\n\\item CSR matrices, as class \\code{dgRMatrix} from package \\code{Matrix}.\n\\item CSC matrices, as class \\code{dgCMatrix} from package \\code{Matrix}.\n}\n\nThese are \\strong{not} supported by \\code{xgb.QuantileDMatrix}.\n\\itemize{\n\\item XGBoost's own binary format for DMatrices, as produced by \\code{\\link[=xgb.DMatrix.save]{xgb.DMatrix.save()}}.\n\\item Single-row CSR matrices, as class \\code{dsparseVector} from package \\code{Matrix}, which is interpreted\nas a single row (only when making predictions from a fitted model).\n}}\n\n\\item{label}{Label of the training data. For classification problems, should be passed encoded as\nintegers with numeration starting at zero.}\n\n\\item{weight}{Weight for each instance.\n\nNote that, for ranking task, weights are per-group.  In ranking task, one weight\nis assigned to each group (not each data point). This is because we\nonly care about the relative ordering of data points within each group,\nso it doesn't make sense to assign weights to individual data points.}\n\n\\item{base_margin}{Base margin used for boosting from existing model.\n\nIn the case of multi-output models, one can also pass multi-dimensional base_margin.}\n\n\\item{missing}{A float value to represents missing values in data (not used when creating DMatrix\nfrom text files). It is useful to change when a zero, infinite, or some other\nextreme value represents missing values in data.}\n\n\\item{silent}{whether to suppress printing an informational message after loading from a file.}\n\n\\item{feature_names}{Set names for features. Overrides column names in data frame and matrix.\n\nNote: columns are not referenced by name when calling \\code{predict}, so the column order there\nmust be the same as in the DMatrix construction, regardless of the column names.}\n\n\\item{feature_types}{Set types for features.\n\nIf \\code{data} is a \\code{data.frame} and passing \\code{feature_types} is not supplied,\nfeature types will be deduced automatically from the column types.\n\nOtherwise, one can pass a character vector with the same length as number of columns in \\code{data},\nwith the following possible values:\n\\itemize{\n\\item \"c\", which represents categorical columns.\n\\item \"q\", which represents numeric columns.\n\\item \"int\", which represents integer columns.\n\\item \"i\", which represents logical (boolean) columns.\n}\n\nNote that, while categorical types are treated differently from the rest for model fitting\npurposes, the other types do not influence the generated model, but have effects in other\nfunctionalities such as feature importances.\n\n\\strong{Important}: Categorical features, if specified manually through \\code{feature_types}, must\nbe encoded as integers with numeration starting at zero, and the same encoding needs to be\napplied when passing data to \\code{\\link[=predict]{predict()}}. Even if passing \\code{factor} types, the encoding will\nnot be saved, so make sure that \\code{factor} columns passed to \\code{predict} have the same \\code{levels}.}\n\n\\item{nthread}{Number of threads used for creating DMatrix.}\n\n\\item{group}{Group size for all ranking group.}\n\n\\item{qid}{Query ID for data samples, used for ranking.}\n\n\\item{label_lower_bound}{Lower bound for survival training.}\n\n\\item{label_upper_bound}{Upper bound for survival training.}\n\n\\item{feature_weights}{Set feature weights for column sampling.}\n\n\\item{data_split_mode}{Not used yet. This parameter is for distributed training, which is not yet available for the R package.}\n\n\\item{...}{Not used.\n\nSome arguments that were part of this function in previous XGBoost versions are currently\ndeprecated or have been renamed. If a deprecated or renamed argument is passed, will throw\na warning (by default) and use its current equivalent instead. This warning will become an\nerror if using the \\link[=xgboost-options]{'strict mode' option}.\n\nIf some additional argument is passed that is neither a current function argument nor\na deprecated or renamed argument, a warning or error will be thrown depending on the\n'strict mode' option.\n\n\\bold{Important:} \\code{...} will be removed in a future version, and all the current\ndeprecation warnings will become errors. Please use only arguments that form part of\nthe function signature.}\n\n\\item{ref}{The training dataset that provides quantile information, needed when creating\nvalidation/test dataset with \\code{\\link[=xgb.QuantileDMatrix]{xgb.QuantileDMatrix()}}. Supplying the training DMatrix\nas a reference means that the same quantisation applied to the training data is\napplied to the validation/test data}\n\n\\item{max_bin}{The number of histogram bin, should be consistent with the training parameter\n\\code{max_bin}.\n\nThis is only supported when constructing a QuantileDMatrix.}\n}\n\\value{\nAn 'xgb.DMatrix' object. If calling \\code{xgb.QuantileDMatrix}, it will have additional\nsubclass \\code{xgb.QuantileDMatrix}.\n}\n\\description{\nConstruct an 'xgb.DMatrix' object from a given data source, which can then be passed to functions\nsuch as \\code{\\link[=xgb.train]{xgb.train()}} or \\code{\\link[=predict]{predict()}}.\n}\n\\details{\nFunction \\code{xgb.QuantileDMatrix()} will construct a DMatrix with quantization for the histogram\nmethod already applied to it, which can be used to reduce memory usage (compared to using a\na regular DMatrix first and then creating a quantization out of it) when using the histogram\nmethod (\\code{tree_method = \"hist\"}, which is the default algorithm), but is not usable for the\nsorted-indices method (\\code{tree_method = \"exact\"}), nor for the approximate method\n(\\code{tree_method = \"approx\"}).\n\nNote that DMatrix objects are not serializable through R functions such as \\code{\\link[=saveRDS]{saveRDS()}} or \\code{\\link[=save]{save()}}.\nIf a DMatrix gets serialized and then de-serialized (for example, when saving data in an R session or caching\nchunks in an Rmd file), the resulting object will not be usable anymore and will need to be reconstructed\nfrom the original source of data.\n}\n\\examples{\ndata(agaricus.train, package = \"xgboost\")\n\n## Keep the number of threads to 1 for examples\nnthread <- 1\ndata.table::setDTthreads(nthread)\ndtrain <- with(\n  agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)\n)\nfname <- file.path(tempdir(), \"xgb.DMatrix.data\")\nxgb.DMatrix.save(dtrain, fname)\ndtrain <- xgb.DMatrix(fname, nthread = 1)\n}\n"
  },
  {
    "path": "R-package/man/xgb.DMatrix.hasinfo.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.DMatrix.R\n\\name{xgb.DMatrix.hasinfo}\n\\alias{xgb.DMatrix.hasinfo}\n\\title{Check whether DMatrix object has a field}\n\\usage{\nxgb.DMatrix.hasinfo(object, info)\n}\n\\arguments{\n\\item{object}{The DMatrix object to check for the given \\code{info} field.}\n\n\\item{info}{The field to check for presence or absence in \\code{object}.}\n}\n\\description{\nChecks whether an xgb.DMatrix object has a given field assigned to\nit, such as weights, labels, etc.\n}\n\\examples{\nx <- matrix(1:10, nrow = 5)\ndm <- xgb.DMatrix(x, nthread = 1)\n\n# 'dm' so far does not have any fields set\nxgb.DMatrix.hasinfo(dm, \"label\")\n\n# Fields can be added after construction\nsetinfo(dm, \"label\", 1:5)\nxgb.DMatrix.hasinfo(dm, \"label\")\n}\n\\seealso{\n\\code{\\link[=xgb.DMatrix]{xgb.DMatrix()}}, \\code{\\link[=getinfo.xgb.DMatrix]{getinfo.xgb.DMatrix()}}, \\code{\\link[=setinfo.xgb.DMatrix]{setinfo.xgb.DMatrix()}}\n}\n"
  },
  {
    "path": "R-package/man/xgb.DMatrix.save.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.DMatrix.save.R\n\\name{xgb.DMatrix.save}\n\\alias{xgb.DMatrix.save}\n\\title{Save xgb.DMatrix object to binary file}\n\\usage{\nxgb.DMatrix.save(dmatrix, fname)\n}\n\\arguments{\n\\item{dmatrix}{the \\code{xgb.DMatrix} object}\n\n\\item{fname}{the name of the file to write.}\n}\n\\description{\nSave xgb.DMatrix object to binary file\n}\n\\examples{\n\\dontshow{RhpcBLASctl::omp_set_num_threads(1)}\ndata(agaricus.train, package = \"xgboost\")\n\ndtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))\nfname <- file.path(tempdir(), \"xgb.DMatrix.data\")\nxgb.DMatrix.save(dtrain, fname)\ndtrain <- xgb.DMatrix(fname, nthread = 1)\n}\n"
  },
  {
    "path": "R-package/man/xgb.DataBatch.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.DMatrix.R\n\\name{xgb.DataBatch}\n\\alias{xgb.DataBatch}\n\\title{Structure for Data Batches}\n\\usage{\nxgb.DataBatch(\n  data,\n  label = NULL,\n  weight = NULL,\n  base_margin = NULL,\n  feature_names = colnames(data),\n  feature_types = NULL,\n  group = NULL,\n  qid = NULL,\n  label_lower_bound = NULL,\n  label_upper_bound = NULL,\n  feature_weights = NULL\n)\n}\n\\arguments{\n\\item{data}{Batch of data belonging to this batch.\n\nNote that not all of the input types supported by \\code{\\link[=xgb.DMatrix]{xgb.DMatrix()}} are possible\nto pass here. Supported types are:\n\\itemize{\n\\item \\code{matrix}, with types \\code{numeric}, \\code{integer}, and \\code{logical}. Note that for types\n\\code{integer} and \\code{logical}, missing values might not be automatically recognized as\nas such - see the documentation for parameter \\code{missing} in \\code{\\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}}\nfor details on this.\n\\item \\code{data.frame}, with the same types as supported by 'xgb.DMatrix' and same\nconversions applied to it. See the documentation for parameter \\code{data} in\n\\code{\\link[=xgb.DMatrix]{xgb.DMatrix()}} for details on it.\n\\item CSR matrices, as class \\code{dgRMatrix} from package \"Matrix\".\n}}\n\n\\item{label}{Label of the training data. For classification problems, should be passed encoded as\nintegers with numeration starting at zero.}\n\n\\item{weight}{Weight for each instance.\n\nNote that, for ranking task, weights are per-group.  In ranking task, one weight\nis assigned to each group (not each data point). This is because we\nonly care about the relative ordering of data points within each group,\nso it doesn't make sense to assign weights to individual data points.}\n\n\\item{base_margin}{Base margin used for boosting from existing model.\n\nIn the case of multi-output models, one can also pass multi-dimensional base_margin.}\n\n\\item{feature_names}{Set names for features. Overrides column names in data frame and matrix.\n\nNote: columns are not referenced by name when calling \\code{predict}, so the column order there\nmust be the same as in the DMatrix construction, regardless of the column names.}\n\n\\item{feature_types}{Set types for features.\n\nIf \\code{data} is a \\code{data.frame} and passing \\code{feature_types} is not supplied,\nfeature types will be deduced automatically from the column types.\n\nOtherwise, one can pass a character vector with the same length as number of columns in \\code{data},\nwith the following possible values:\n\\itemize{\n\\item \"c\", which represents categorical columns.\n\\item \"q\", which represents numeric columns.\n\\item \"int\", which represents integer columns.\n\\item \"i\", which represents logical (boolean) columns.\n}\n\nNote that, while categorical types are treated differently from the rest for model fitting\npurposes, the other types do not influence the generated model, but have effects in other\nfunctionalities such as feature importances.\n\n\\strong{Important}: Categorical features, if specified manually through \\code{feature_types}, must\nbe encoded as integers with numeration starting at zero, and the same encoding needs to be\napplied when passing data to \\code{\\link[=predict]{predict()}}. Even if passing \\code{factor} types, the encoding will\nnot be saved, so make sure that \\code{factor} columns passed to \\code{predict} have the same \\code{levels}.}\n\n\\item{group}{Group size for all ranking group.}\n\n\\item{qid}{Query ID for data samples, used for ranking.}\n\n\\item{label_lower_bound}{Lower bound for survival training.}\n\n\\item{label_upper_bound}{Upper bound for survival training.}\n\n\\item{feature_weights}{Set feature weights for column sampling.}\n}\n\\value{\nAn object of class \\code{xgb.DataBatch}, which is just a list containing the\ndata and parameters passed here. It does \\strong{not} inherit from \\code{xgb.DMatrix}.\n}\n\\description{\nHelper function to supply data in batches of a data iterator when\nconstructing a DMatrix from external memory through \\code{\\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}}\nor through \\code{\\link[=xgb.QuantileDMatrix.from_iterator]{xgb.QuantileDMatrix.from_iterator()}}.\n\nThis function is \\strong{only} meant to be called inside of a callback function (which\nis passed as argument to function \\code{\\link[=xgb.DataIter]{xgb.DataIter()}} to construct a data iterator)\nwhen constructing a DMatrix through external memory - otherwise, one should call\n\\code{\\link[=xgb.DMatrix]{xgb.DMatrix()}} or \\code{\\link[=xgb.QuantileDMatrix]{xgb.QuantileDMatrix()}}.\n\nThe object that results from calling this function directly is \\strong{not} like\nan \\code{xgb.DMatrix} - i.e. cannot be used to train a model, nor to get predictions - only\npossible usage is to supply data to an iterator, from which a DMatrix is then constructed.\n\nFor more information and for example usage, see the documentation for \\code{\\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}}.\n}\n\\seealso{\n\\code{\\link[=xgb.DataIter]{xgb.DataIter()}}, \\code{\\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}}.\n}\n"
  },
  {
    "path": "R-package/man/xgb.DataIter.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.DMatrix.R\n\\name{xgb.DataIter}\n\\alias{xgb.DataIter}\n\\title{XGBoost Data Iterator}\n\\usage{\nxgb.DataIter(env = new.env(), f_next, f_reset)\n}\n\\arguments{\n\\item{env}{An R environment to pass to the callback functions supplied here, which can be\nused to keep track of variables to determine how to handle the batches.\n\nFor example, one might want to keep track of an iteration number in this environment in order\nto know which part of the data to pass next.}\n\n\\item{f_next}{\\verb{function(env)} which is responsible for:\n\\itemize{\n\\item Accessing or retrieving the next batch of data in the iterator.\n\\item Supplying this data by calling function \\code{\\link[=xgb.DataBatch]{xgb.DataBatch()}} on it and returning the result.\n\\item Keeping track of where in the iterator batch it is or will go next, which can for example\nbe done by modifiying variables in the \\code{env} variable that is passed here.\n\\item Signaling whether there are more batches to be consumed or not, by returning \\code{NULL}\nwhen the stream of data ends (all batches in the iterator have been consumed), or the result from\ncalling \\code{\\link[=xgb.DataBatch]{xgb.DataBatch()}} when there are more batches in the line to be consumed.\n}}\n\n\\item{f_reset}{\\verb{function(env)} which is responsible for reseting the data iterator\n(i.e. taking it back to the first batch, called before and after the sequence of batches\nhas been consumed).\n\nNote that, after resetting the iterator, the batches will be accessed again, so the same data\n(and in the same order) must be passed in subsequent iterations.}\n}\n\\value{\nAn \\code{xgb.DataIter} object, containing the same inputs supplied here, which can then\nbe passed to \\code{\\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}}.\n}\n\\description{\nInterface to create a custom data iterator in order to construct a DMatrix\nfrom external memory.\n\nThis function is responsible for generating an R object structure containing callback\nfunctions and an environment shared with them.\n\nThe output structure from this function is then meant to be passed to \\code{\\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}},\nwhich will consume the data and create a DMatrix from it by executing the callback functions.\n\nFor more information, and for a usage example, see the documentation for \\code{\\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}}.\n}\n\\seealso{\n\\code{\\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}}, \\code{\\link[=xgb.DataBatch]{xgb.DataBatch()}}.\n}\n"
  },
  {
    "path": "R-package/man/xgb.ExtMemDMatrix.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.DMatrix.R\n\\name{xgb.ExtMemDMatrix}\n\\alias{xgb.ExtMemDMatrix}\n\\title{DMatrix from External Data}\n\\usage{\nxgb.ExtMemDMatrix(\n  data_iterator,\n  cache_prefix = tempdir(),\n  missing = NA,\n  nthread = NULL\n)\n}\n\\arguments{\n\\item{data_iterator}{A data iterator structure as returned by \\code{\\link[=xgb.DataIter]{xgb.DataIter()}},\nwhich includes an environment shared between function calls, and functions to access\nthe data in batches on-demand.}\n\n\\item{cache_prefix}{The path of cache file, caller must initialize all the directories in this path.}\n\n\\item{missing}{A float value to represents missing values in data.\n\nNote that, while functions like \\code{\\link[=xgb.DMatrix]{xgb.DMatrix()}} can take a generic \\code{NA} and interpret it\ncorrectly for different types like \\code{numeric} and \\code{integer}, if an \\code{NA} value is passed here,\nit will not be adapted for different input types.\n\nFor example, in R \\code{integer} types, missing values are represented by integer number \\code{-2147483648}\n(since machine 'integer' types do not have an inherent 'NA' value) - hence, if one passes \\code{NA},\nwhich is interpreted as a floating-point NaN by \\code{\\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}} and by\n\\code{\\link[=xgb.QuantileDMatrix.from_iterator]{xgb.QuantileDMatrix.from_iterator()}}, these integer missing values will not be treated as missing.\nThis should not pose any problem for \\code{numeric} types, since they do have an inheret NaN value.}\n\n\\item{nthread}{Number of threads used for creating DMatrix.}\n}\n\\value{\nAn 'xgb.DMatrix' object, with subclass 'xgb.ExtMemDMatrix', in which the data is not\nheld internally but accessed through the iterator when needed.\n}\n\\description{\nCreate a special type of XGBoost 'DMatrix' object from external data\nsupplied by an \\code{\\link[=xgb.DataIter]{xgb.DataIter()}} object, potentially passed in batches from a\nbigger set that might not fit entirely in memory.\n\nThe data supplied by the iterator is accessed on-demand as needed, multiple times,\nwithout being concatenated, but note that fields like 'label' \\strong{will} be\nconcatenated from multiple calls to the data iterator.\n\nFor more information, see the guide 'Using XGBoost External Memory Version':\n\\url{https://xgboost.readthedocs.io/en/stable/tutorials/external_memory.html}\n}\n\\details{\nBe aware that construction of external data DMatrices \\bold{will cache data on disk}\nin a compressed format, under the path supplied in \\code{cache_prefix}.\n\nExternal data is not supported for the exact tree method.\n}\n\\examples{\ndata(mtcars)\n\n# This custom environment will be passed to the iterator\n# functions at each call. It is up to the user to keep\n# track of the iteration number in this environment.\niterator_env <- as.environment(\n  list(\n    iter = 0,\n    x = mtcars[, -1],\n    y = mtcars[, 1]\n  )\n)\n\n# Data is passed in two batches.\n# In this example, batches are obtained by subsetting the 'x' variable.\n# This is not advantageous to do, since the data is already loaded in memory\n# and can be passed in full in one go, but there can be situations in which\n# only a subset of the data will fit in the computer's memory, and it can\n# be loaded in batches that are accessed one-at-a-time only.\niterator_next <- function(iterator_env) {\n  curr_iter <- iterator_env[[\"iter\"]]\n  if (curr_iter >= 2) {\n    # there are only two batches, so this signals end of the stream\n    return(NULL)\n  }\n\n  if (curr_iter == 0) {\n    x_batch <- iterator_env[[\"x\"]][1:16, ]\n    y_batch <- iterator_env[[\"y\"]][1:16]\n  } else {\n    x_batch <- iterator_env[[\"x\"]][17:32, ]\n    y_batch <- iterator_env[[\"y\"]][17:32]\n  }\n  on.exit({\n    iterator_env[[\"iter\"]] <- curr_iter + 1\n  })\n\n  # Function 'xgb.DataBatch' must be called manually\n  # at each batch with all the appropriate attributes,\n  # such as feature names and feature types.\n  return(xgb.DataBatch(data = x_batch, label = y_batch))\n}\n\n# This moves the iterator back to its beginning\niterator_reset <- function(iterator_env) {\n  iterator_env[[\"iter\"]] <- 0\n}\n\ndata_iterator <- xgb.DataIter(\n  env = iterator_env,\n  f_next = iterator_next,\n  f_reset = iterator_reset\n)\ncache_prefix <- tempdir()\n\n# DMatrix will be constructed from the iterator's batches\ndm <- xgb.ExtMemDMatrix(data_iterator, cache_prefix, nthread = 1)\n\n# After construction, can be used as a regular DMatrix\nparams <- xgb.params(nthread = 1, objective = \"reg:squarederror\")\nmodel <- xgb.train(data = dm, nrounds = 2, params = params)\n\n# Predictions can also be called on it, and should be the same\n# as if the data were passed differently.\npred_dm <- predict(model, dm)\npred_mat <- predict(model, as.matrix(mtcars[, -1]))\n}\n\\seealso{\n\\code{\\link[=xgb.DataIter]{xgb.DataIter()}}, \\code{\\link[=xgb.DataBatch]{xgb.DataBatch()}}, \\code{\\link[=xgb.QuantileDMatrix.from_iterator]{xgb.QuantileDMatrix.from_iterator()}}\n}\n"
  },
  {
    "path": "R-package/man/xgb.QuantileDMatrix.from_iterator.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.DMatrix.R\n\\name{xgb.QuantileDMatrix.from_iterator}\n\\alias{xgb.QuantileDMatrix.from_iterator}\n\\title{QuantileDMatrix from External Data}\n\\usage{\nxgb.QuantileDMatrix.from_iterator(\n  data_iterator,\n  missing = NA,\n  nthread = NULL,\n  ref = NULL,\n  max_bin = NULL\n)\n}\n\\arguments{\n\\item{data_iterator}{A data iterator structure as returned by \\code{\\link[=xgb.DataIter]{xgb.DataIter()}},\nwhich includes an environment shared between function calls, and functions to access\nthe data in batches on-demand.}\n\n\\item{missing}{A float value to represents missing values in data.\n\nNote that, while functions like \\code{\\link[=xgb.DMatrix]{xgb.DMatrix()}} can take a generic \\code{NA} and interpret it\ncorrectly for different types like \\code{numeric} and \\code{integer}, if an \\code{NA} value is passed here,\nit will not be adapted for different input types.\n\nFor example, in R \\code{integer} types, missing values are represented by integer number \\code{-2147483648}\n(since machine 'integer' types do not have an inherent 'NA' value) - hence, if one passes \\code{NA},\nwhich is interpreted as a floating-point NaN by \\code{\\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}} and by\n\\code{\\link[=xgb.QuantileDMatrix.from_iterator]{xgb.QuantileDMatrix.from_iterator()}}, these integer missing values will not be treated as missing.\nThis should not pose any problem for \\code{numeric} types, since they do have an inheret NaN value.}\n\n\\item{nthread}{Number of threads used for creating DMatrix.}\n\n\\item{ref}{The training dataset that provides quantile information, needed when creating\nvalidation/test dataset with \\code{\\link[=xgb.QuantileDMatrix]{xgb.QuantileDMatrix()}}. Supplying the training DMatrix\nas a reference means that the same quantisation applied to the training data is\napplied to the validation/test data}\n\n\\item{max_bin}{The number of histogram bin, should be consistent with the training parameter\n\\code{max_bin}.\n\nThis is only supported when constructing a QuantileDMatrix.}\n}\n\\value{\nAn 'xgb.DMatrix' object, with subclass 'xgb.QuantileDMatrix'.\n}\n\\description{\nCreate an \\code{xgb.QuantileDMatrix} object (exact same class as would be returned by\ncalling function \\code{\\link[=xgb.QuantileDMatrix]{xgb.QuantileDMatrix()}}, with the same advantages and limitations) from\nexternal data supplied by \\code{\\link[=xgb.DataIter]{xgb.DataIter()}}, potentially passed in batches from\na bigger set that might not fit entirely in memory, same way as \\code{\\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}}.\n\nNote that, while external data will only be loaded through the iterator (thus the full data\nmight not be held entirely in-memory), the quantized representation of the data will get\ncreated in-memory, being concatenated from multiple calls to the data iterator. The quantized\nversion is typically lighter than the original data, so there might be cases in which this\nrepresentation could potentially fit in memory even if the full data does not.\n\nFor more information, see the guide 'Using XGBoost External Memory Version':\n\\url{https://xgboost.readthedocs.io/en/stable/tutorials/external_memory.html}\n}\n\\seealso{\n\\code{\\link[=xgb.DataIter]{xgb.DataIter()}}, \\code{\\link[=xgb.DataBatch]{xgb.DataBatch()}}, \\code{\\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}},\n\\code{\\link[=xgb.QuantileDMatrix]{xgb.QuantileDMatrix()}}\n}\n"
  },
  {
    "path": "R-package/man/xgb.attr.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.Booster.R\n\\name{xgb.attr}\n\\alias{xgb.attr}\n\\alias{xgb.attr<-}\n\\alias{xgb.attributes}\n\\alias{xgb.attributes<-}\n\\title{Accessors for serializable attributes of a model}\n\\usage{\nxgb.attr(object, name)\n\nxgb.attr(object, name) <- value\n\nxgb.attributes(object)\n\nxgb.attributes(object) <- value\n}\n\\arguments{\n\\item{object}{Object of class \\code{xgb.Booster}. \\strong{Will be modified in-place} when assigning to it.}\n\n\\item{name}{A non-empty character string specifying which attribute is to be accessed.}\n\n\\item{value}{For \\verb{xgb.attr<-}, a value of an attribute; for \\verb{xgb.attributes<-},\nit is a list (or an object coercible to a list) with the names of attributes to set\nand the elements corresponding to attribute values.\nNon-character values are converted to character.\nWhen an attribute value is not a scalar, only the first index is used.\nUse \\code{NULL} to remove an attribute.}\n}\n\\value{\n\\itemize{\n\\item \\code{xgb.attr()} returns either a string value of an attribute\nor \\code{NULL} if an attribute wasn't stored in a model.\n\\item \\code{xgb.attributes()} returns a list of all attributes stored in a model\nor \\code{NULL} if a model has no stored attributes.\n}\n}\n\\description{\nThese methods allow to manipulate the key-value attribute strings of an XGBoost model.\n}\n\\details{\nThe primary purpose of XGBoost model attributes is to store some meta data about the model.\nNote that they are a separate concept from the object attributes in R.\nSpecifically, they refer to key-value strings that can be attached to an XGBoost model,\nstored together with the model's binary representation, and accessed later\n(from R or any other interface).\nIn contrast, any R attribute assigned to an R object of \\code{xgb.Booster} class\nwould not be saved by \\code{\\link[=xgb.save]{xgb.save()}} because an XGBoost model is an external memory object\nand its serialization is handled externally.\nAlso, setting an attribute that has the same name as one of XGBoost's parameters wouldn't\nchange the value of that parameter for a model.\nUse \\code{\\link[=xgb.model.parameters<-]{xgb.model.parameters<-()}} to set or change model parameters.\n\nThe \\verb{xgb.attributes<-} setter either updates the existing or adds one or several attributes,\nbut it doesn't delete the other existing attributes.\n\nImportant: since this modifies the booster's C object, semantics for assignment here\nwill differ from R's, as any object reference to the same booster will be modified\ntoo, while assignment of R attributes through \\verb{attributes(model)$<attr> <- <value>}\nwill follow the usual copy-on-write R semantics (see \\code{\\link[=xgb.copy.Booster]{xgb.copy.Booster()}} for an\nexample of these behaviors).\n}\n\\examples{\ndata(agaricus.train, package = \"xgboost\")\ntrain <- agaricus.train\n\nbst <- xgb.train(\n  data = xgb.DMatrix(train$data, label = train$label, nthread = 1),\n  nrounds = 2,\n  params = xgb.params(\n    max_depth = 2,\n    nthread = 2,\n    objective = \"binary:logistic\"\n  )\n)\n\nxgb.attr(bst, \"my_attribute\") <- \"my attribute value\"\nprint(xgb.attr(bst, \"my_attribute\"))\nxgb.attributes(bst) <- list(a = 123, b = \"abc\")\n\nfname <- file.path(tempdir(), \"xgb.ubj\")\nxgb.save(bst, fname)\nbst1 <- xgb.load(fname)\nprint(xgb.attr(bst1, \"my_attribute\"))\nprint(xgb.attributes(bst1))\n\n# deletion:\nxgb.attr(bst1, \"my_attribute\") <- NULL\nprint(xgb.attributes(bst1))\nxgb.attributes(bst1) <- list(a = NULL, b = NULL)\nprint(xgb.attributes(bst1))\n\n}\n"
  },
  {
    "path": "R-package/man/xgb.cb.cv.predict.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/callbacks.R\n\\name{xgb.cb.cv.predict}\n\\alias{xgb.cb.cv.predict}\n\\title{Callback for returning cross-validation based predictions}\n\\usage{\nxgb.cb.cv.predict(save_models = FALSE, outputmargin = FALSE)\n}\n\\arguments{\n\\item{save_models}{A flag for whether to save the folds' models.}\n\n\\item{outputmargin}{Whether to save margin predictions (same effect as passing this\nparameter to \\link{predict.xgb.Booster}).}\n}\n\\value{\nAn \\code{xgb.Callback} object, which can be passed to \\code{\\link[=xgb.cv]{xgb.cv()}},\nbut \\strong{not} to \\code{\\link[=xgb.train]{xgb.train()}}.\n}\n\\description{\nThis callback function saves predictions for all of the test folds,\nand also allows to save the folds' models.\n}\n\\details{\nPredictions are saved inside of the \\code{pred} element, which is either a vector or a matrix,\ndepending on the number of prediction outputs per data row. The order of predictions corresponds\nto the order of rows in the original dataset. Note that when a custom \\code{folds} list is\nprovided in \\code{\\link[=xgb.cv]{xgb.cv()}}, the predictions would only be returned properly when this list is a\nnon-overlapping list of k sets of indices, as in a standard k-fold CV. The predictions would not be\nmeaningful when user-provided folds have overlapping indices as in, e.g., random sampling splits.\nWhen some of the indices in the training dataset are not included into user-provided \\code{folds},\ntheir prediction value would be \\code{NA}.\n}\n"
  },
  {
    "path": "R-package/man/xgb.cb.early.stop.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/callbacks.R\n\\name{xgb.cb.early.stop}\n\\alias{xgb.cb.early.stop}\n\\title{Callback to activate early stopping}\n\\usage{\nxgb.cb.early.stop(\n  stopping_rounds,\n  maximize = FALSE,\n  metric_name = NULL,\n  verbose = TRUE,\n  save_best = FALSE\n)\n}\n\\arguments{\n\\item{stopping_rounds}{The number of rounds with no improvement in\nthe evaluation metric in order to stop the training.}\n\n\\item{maximize}{Whether to maximize the evaluation metric.}\n\n\\item{metric_name}{The name of an evaluation column to use as a criteria for early\nstopping. If not set, the last column would be used.\nLet's say the test data in \\code{evals} was labelled as \\code{dtest},\nand one wants to use the AUC in test data for early stopping regardless of where\nit is in the \\code{evals}, then one of the following would need to be set:\n\\code{metric_name = 'dtest-auc'} or \\code{metric_name = 'dtest_auc'}.\nAll dash '-' characters in metric names are considered equivalent to '_'.}\n\n\\item{verbose}{Whether to print the early stopping information.}\n\n\\item{save_best}{Whether training should return the best model or the last model. If\nset to \\code{TRUE}, it will only keep the boosting rounds up to the detected best\niteration, discarding the ones that come after. This parameter is not supported by\nthe \\code{xgb.cv} function and the \\code{gblinear} booster yet.}\n}\n\\value{\nAn \\code{xgb.Callback} object, which can be passed to \\code{\\link[=xgb.train]{xgb.train()}} or \\code{\\link[=xgb.cv]{xgb.cv()}}.\n}\n\\description{\nThis callback function determines the condition for early stopping.\n\nThe following attributes are assigned to the booster's object:\n\\itemize{\n\\item \\code{best_score} the evaluation score at the best iteration\n\\item \\code{best_iteration} at which boosting iteration the best score has occurred\n(0-based index for interoperability of binary models)\n}\n\nThe same values are also stored as R attributes as a result of the callback, plus an additional\nattribute \\code{stopped_by_max_rounds} which indicates whether an early stopping by the \\code{stopping_rounds}\ncondition occurred. Note that the \\code{best_iteration} that is stored under R attributes will follow\nbase-1 indexing, so it will be larger by '1' than the C-level 'best_iteration' that is accessed\nthrough \\code{\\link[=xgb.attr]{xgb.attr()}} or  \\code{\\link[=xgb.attributes]{xgb.attributes()}}.\n\nAt least one dataset is required in \\code{evals} for early stopping to work.\n}\n"
  },
  {
    "path": "R-package/man/xgb.cb.evaluation.log.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/callbacks.R\n\\name{xgb.cb.evaluation.log}\n\\alias{xgb.cb.evaluation.log}\n\\title{Callback for logging the evaluation history}\n\\usage{\nxgb.cb.evaluation.log()\n}\n\\value{\nAn \\code{xgb.Callback} object, which can be passed to \\code{\\link[=xgb.train]{xgb.train()}} or \\code{\\link[=xgb.cv]{xgb.cv()}}.\n}\n\\description{\nCallback for logging the evaluation history\n}\n\\details{\nThis callback creates a table with per-iteration evaluation metrics (see parameters\n\\code{evals} and \\code{custom_metric} in \\code{\\link[=xgb.train]{xgb.train()}}).\n\nNote: in the column names of the final data.table, the dash '-' character is replaced with\nthe underscore '_' in order to make the column names more like regular R identifiers.\n}\n\\seealso{\n\\link{xgb.cb.print.evaluation}\n}\n"
  },
  {
    "path": "R-package/man/xgb.cb.gblinear.history.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/callbacks.R\n\\name{xgb.cb.gblinear.history}\n\\alias{xgb.cb.gblinear.history}\n\\title{Callback for collecting coefficients history of a gblinear booster}\n\\usage{\nxgb.cb.gblinear.history(sparse = FALSE)\n}\n\\arguments{\n\\item{sparse}{When set to \\code{FALSE}/\\code{TRUE}, a dense/sparse matrix is used to store the result.\nSparse format is useful when one expects only a subset of coefficients to be non-zero,\nwhen using the \"thrifty\" feature selector with fairly small number of top features\nselected per iteration.}\n}\n\\value{\nAn \\code{xgb.Callback} object, which can be passed to \\code{\\link[=xgb.train]{xgb.train()}} or \\code{\\link[=xgb.cv]{xgb.cv()}}.\n}\n\\description{\nCallback for collecting coefficients history of a gblinear booster\n}\n\\details{\nTo keep things fast and simple, gblinear booster does not internally store the history of linear\nmodel coefficients at each boosting iteration. This callback provides a workaround for storing\nthe coefficients' path, by extracting them after each training iteration.\n\nThis callback will construct a matrix where rows are boosting iterations and columns are\nfeature coefficients (same order as when calling \\link{coef.xgb.Booster}, with the intercept\ncorresponding to the first column).\n\nWhen there is more than one coefficient per feature (e.g. multi-class classification),\nthe result will be reshaped into a vector where coefficients are arranged first by features and\nthen by class (e.g. first 1 through N coefficients will be for the first class, then\ncoefficients N+1 through 2N for the second class, and so on).\n\nIf the result has only one coefficient per feature in the data, then the resulting matrix\nwill have column names matching with the feature names, otherwise (when there's more than\none coefficient per feature) the names will be composed as 'column name' + ':' + 'class index'\n(so e.g. column 'c1' for class '0' will be named 'c1:0').\n\nWith \\code{\\link[=xgb.train]{xgb.train()}}, the output is either a dense or a sparse matrix.\nWith with \\code{\\link[=xgb.cv]{xgb.cv()}}, it is a list (one element per each fold) of such matrices.\n\nFunction \\link{xgb.gblinear.history} provides an easy way to retrieve the\noutputs from this callback.\n}\n\\examples{\n#### Binary classification:\n\n## Keep the number of threads to 1 for examples\nnthread <- 1\ndata.table::setDTthreads(nthread)\n\n# In the iris dataset, it is hard to linearly separate Versicolor class from the rest\n# without considering the 2nd order interactions:\nx <- model.matrix(Species ~ .^2, iris)[, -1]\ncolnames(x)\ndtrain <- xgb.DMatrix(\n  scale(x),\n  label = 1 * (iris$Species == \"versicolor\"),\n  nthread = nthread\n)\nparam <- xgb.params(\n  booster = \"gblinear\",\n  objective = \"reg:logistic\",\n  eval_metric = \"auc\",\n  reg_lambda = 0.0003,\n  reg_alpha = 0.0003,\n  nthread = nthread\n)\n\n# For 'shotgun', which is a default linear updater, using high learning_rate values may result in\n# unstable behaviour in some datasets. With this simple dataset, however, the high learning\n# rate does not break the convergence, but allows us to illustrate the typical pattern of\n# \"stochastic explosion\" behaviour of this lock-free algorithm at early boosting iterations.\nbst <- xgb.train(\n  c(param, list(learning_rate = 1.)),\n  dtrain,\n  evals = list(tr = dtrain),\n  nrounds = 200,\n  callbacks = list(xgb.cb.gblinear.history())\n)\n\n# Extract the coefficients' path and plot them vs boosting iteration number:\ncoef_path <- xgb.gblinear.history(bst)\nmatplot(coef_path, type = \"l\")\n\n# With the deterministic coordinate descent updater, it is safer to use higher learning rates.\n# Will try the classical componentwise boosting which selects a single best feature per round:\nbst <- xgb.train(\n  c(\n    param,\n    xgb.params(\n      learning_rate = 0.8,\n      updater = \"coord_descent\",\n      feature_selector = \"thrifty\",\n      top_k = 1\n    )\n  ),\n  dtrain,\n  evals = list(tr = dtrain),\n  nrounds = 200,\n  callbacks = list(xgb.cb.gblinear.history())\n)\nmatplot(xgb.gblinear.history(bst), type = \"l\")\n#  Componentwise boosting is known to have similar effect to Lasso regularization.\n# Try experimenting with various values of top_k, learning_rate, nrounds,\n# as well as different feature_selectors.\n\n# For xgb.cv:\nbst <- xgb.cv(\n  c(\n    param,\n    xgb.params(\n      learning_rate = 0.8,\n      updater = \"coord_descent\",\n      feature_selector = \"thrifty\",\n      top_k = 1\n    )\n  ),\n  dtrain,\n  nfold = 5,\n  nrounds = 100,\n  callbacks = list(xgb.cb.gblinear.history())\n)\n# coefficients in the CV fold #3\nmatplot(xgb.gblinear.history(bst)[[3]], type = \"l\")\n\n\n#### Multiclass classification:\ndtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = nthread)\n\nparam <- xgb.params(\n  booster = \"gblinear\",\n  objective = \"multi:softprob\",\n  num_class = 3,\n  reg_lambda = 0.0003,\n  reg_alpha = 0.0003,\n  nthread = nthread\n)\n\n# For the default linear updater 'shotgun' it sometimes is helpful\n# to use smaller learning_rate to reduce instability\nbst <- xgb.train(\n  c(param, list(learning_rate = 0.5)),\n  dtrain,\n  evals = list(tr = dtrain),\n  nrounds = 50,\n  callbacks = list(xgb.cb.gblinear.history())\n)\n\n# Will plot the coefficient paths separately for each class:\nmatplot(xgb.gblinear.history(bst, class_index = 0), type = \"l\")\nmatplot(xgb.gblinear.history(bst, class_index = 1), type = \"l\")\nmatplot(xgb.gblinear.history(bst, class_index = 2), type = \"l\")\n\n# CV:\nbst <- xgb.cv(\n  c(param, list(learning_rate = 0.5)),\n  dtrain,\n  nfold = 5,\n  nrounds = 70,\n  callbacks = list(xgb.cb.gblinear.history(FALSE))\n)\n# 1st fold of 1st class\nmatplot(xgb.gblinear.history(bst, class_index = 0)[[1]], type = \"l\")\n\n}\n\\seealso{\n\\link{xgb.gblinear.history}, \\link{coef.xgb.Booster}.\n}\n"
  },
  {
    "path": "R-package/man/xgb.cb.print.evaluation.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/callbacks.R\n\\name{xgb.cb.print.evaluation}\n\\alias{xgb.cb.print.evaluation}\n\\title{Callback for printing the result of evaluation}\n\\usage{\nxgb.cb.print.evaluation(period = 1, showsd = TRUE)\n}\n\\arguments{\n\\item{period}{Results would be printed every number of periods.}\n\n\\item{showsd}{Whether standard deviations should be printed (when available).}\n}\n\\value{\nAn \\code{xgb.Callback} object, which can be passed to \\code{\\link[=xgb.train]{xgb.train()}} or \\code{\\link[=xgb.cv]{xgb.cv()}}.\n}\n\\description{\nThe callback function prints the result of evaluation at every \\code{period} iterations.\nThe initial and the last iteration's evaluations are always printed.\n\nDoes not leave any attribute in the booster (see \\link{xgb.cb.evaluation.log} for that).\n}\n\\seealso{\n\\link{xgb.Callback}\n}\n"
  },
  {
    "path": "R-package/man/xgb.cb.reset.parameters.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/callbacks.R\n\\name{xgb.cb.reset.parameters}\n\\alias{xgb.cb.reset.parameters}\n\\title{Callback for resetting booster parameters at each iteration}\n\\usage{\nxgb.cb.reset.parameters(new_params)\n}\n\\arguments{\n\\item{new_params}{List of parameters needed to be reset.\nEach element's value must be either a vector of values of length \\code{nrounds}\nto be set at each iteration,\nor a function of two parameters \\code{learning_rates(iteration, nrounds)}\nwhich returns a new parameter value by using the current iteration number\nand the total number of boosting rounds.}\n}\n\\value{\nAn \\code{xgb.Callback} object, which can be passed to \\code{\\link[=xgb.train]{xgb.train()}} or \\code{\\link[=xgb.cv]{xgb.cv()}}.\n}\n\\description{\nCallback for resetting booster parameters at each iteration\n}\n\\details{\nNote that when training is resumed from some previous model, and a function is used to\nreset a parameter value, the \\code{nrounds} argument in this function would be the\nthe number of boosting rounds in the current training.\n\nDoes not leave any attribute in the booster.\n}\n"
  },
  {
    "path": "R-package/man/xgb.cb.save.model.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/callbacks.R\n\\name{xgb.cb.save.model}\n\\alias{xgb.cb.save.model}\n\\title{Callback for saving a model file}\n\\usage{\nxgb.cb.save.model(save_period = 0, save_name = \"xgboost.ubj\")\n}\n\\arguments{\n\\item{save_period}{Save the model to disk after every \\code{save_period} iterations;\n0 means save the model at the end.}\n\n\\item{save_name}{The name or path for the saved model file.\nIt can contain a \\code{\\link[=sprintf]{sprintf()}} formatting specifier to include the integer\niteration number in the file name. E.g., with \\code{save_name = 'xgboost_\\%04d.model'},\nthe file saved at iteration 50 would be named \"xgboost_0050.model\".}\n}\n\\value{\nAn \\code{xgb.Callback} object, which can be passed to \\code{\\link[=xgb.train]{xgb.train()}},\nbut \\strong{not} to \\code{\\link[=xgb.cv]{xgb.cv()}}.\n}\n\\description{\nThis callback function allows to save an xgb-model file, either periodically\nafter each \\code{save_period}'s or at the end.\n\nDoes not leave any attribute in the booster.\n}\n"
  },
  {
    "path": "R-package/man/xgb.config.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.Booster.R\n\\name{xgb.config}\n\\alias{xgb.config}\n\\alias{xgb.config<-}\n\\title{Accessors for model parameters as JSON string}\n\\usage{\nxgb.config(object)\n\nxgb.config(object) <- value\n}\n\\arguments{\n\\item{object}{Object of class \\code{xgb.Booster}.\\strong{Will be modified in-place} when assigning to it.}\n\n\\item{value}{A list.}\n}\n\\value{\nParameters as a list.\n}\n\\description{\nAccessors for model parameters as JSON string\n}\n\\details{\nNote that assignment is performed in-place on the booster C object, which unlike assignment\nof R attributes, doesn't follow typical copy-on-write semantics for assignment - i.e. all references\nto the same booster will also get updated.\n\nSee \\code{\\link[=xgb.copy.Booster]{xgb.copy.Booster()}} for an example of this behavior.\n}\n\\examples{\ndata(agaricus.train, package = \"xgboost\")\n\n## Keep the number of threads to 1 for examples\nnthread <- 1\ndata.table::setDTthreads(nthread)\ntrain <- agaricus.train\n\nbst <- xgb.train(\n  data = xgb.DMatrix(train$data, label = train$label, nthread = 1),\n  nrounds = 2,\n  params = xgb.params(\n    max_depth = 2,\n    nthread = nthread,\n    objective = \"binary:logistic\"\n  )\n)\n\nconfig <- xgb.config(bst)\n\n}\n"
  },
  {
    "path": "R-package/man/xgb.copy.Booster.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.Booster.R\n\\name{xgb.copy.Booster}\n\\alias{xgb.copy.Booster}\n\\title{Deep-copies a Booster Object}\n\\usage{\nxgb.copy.Booster(model)\n}\n\\arguments{\n\\item{model}{An 'xgb.Booster' object.}\n}\n\\value{\nA deep copy of \\code{model} - it will be identical in every way, but C-level\nfunctions called on that copy will not affect the \\code{model} variable.\n}\n\\description{\nCreates a deep copy of an 'xgb.Booster' object, such that the\nC object pointer contained will be a different object, and hence functions\nlike \\code{\\link[=xgb.attr]{xgb.attr()}} will not affect the object from which it was copied.\n}\n\\examples{\nlibrary(xgboost)\n\ndata(mtcars)\n\ny <- mtcars$mpg\nx <- mtcars[, -1]\n\ndm <- xgb.DMatrix(x, label = y, nthread = 1)\n\nmodel <- xgb.train(\n  data = dm,\n  params = xgb.params(nthread = 1),\n  nrounds = 3\n)\n\n# Set an arbitrary attribute kept at the C level\nxgb.attr(model, \"my_attr\") <- 100\nprint(xgb.attr(model, \"my_attr\"))\n\n# Just assigning to a new variable will not create\n# a deep copy - C object pointer is shared, and in-place\n# modifications will affect both objects\nmodel_shallow_copy <- model\nxgb.attr(model_shallow_copy, \"my_attr\") <- 333\n# 'model' was also affected by this change:\nprint(xgb.attr(model, \"my_attr\"))\n\nmodel_deep_copy <- xgb.copy.Booster(model)\nxgb.attr(model_deep_copy, \"my_attr\") <- 444\n# 'model' was NOT affected by this change\n# (keeps previous value that was assigned before)\nprint(xgb.attr(model, \"my_attr\"))\n\n# Verify that the new object was actually modified\nprint(xgb.attr(model_deep_copy, \"my_attr\"))\n}\n"
  },
  {
    "path": "R-package/man/xgb.create.features.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.create.features.R\n\\name{xgb.create.features}\n\\alias{xgb.create.features}\n\\title{Create new features from a previously learned model}\n\\usage{\nxgb.create.features(model, data)\n}\n\\arguments{\n\\item{model}{Decision tree boosting model learned on the original data.}\n\n\\item{data}{Original data (usually provided as a \\code{dgCMatrix} matrix).}\n}\n\\value{\nA \\code{dgCMatrix} matrix including both the original data and the new features.\n}\n\\description{\nMay improve the learning by adding new features to the training data based on the\ndecision trees from a previously learned model.\n}\n\\details{\nThis is the function inspired from the paragraph 3.1 of the paper:\n\n\\strong{Practical Lessons from Predicting Clicks on Ads at Facebook}\n\n\\emph{(Xinran He, Junfeng Pan, Ou Jin, Tianbing Xu, Bo Liu, Tao Xu, Yan, xin Shi, Antoine Atallah, Ralf Herbrich, Stuart Bowers,\nJoaquin Quinonero Candela)}\n\nInternational Workshop on Data Mining for Online Advertising (ADKDD) - August 24, 2014\n\n\\url{https://research.facebook.com/publications/practical-lessons-from-predicting-clicks-on-ads-at-facebook/}.\n\nExtract explaining the method:\n\n\"We found that boosted decision trees are a powerful and very\nconvenient way to implement non-linear and tuple transformations\nof the kind we just described. We treat each individual\ntree as a categorical feature that takes as value the\nindex of the leaf an instance ends up falling in. We use\n1-of-K coding of this type of features.\n\nFor example, consider the boosted tree model in Figure 1 with 2 subtrees,\nwhere the first subtree has 3 leafs and the second 2 leafs. If an\ninstance ends up in leaf 2 in the first subtree and leaf 1 in\nsecond subtree, the overall input to the linear classifier will\nbe the binary vector \\verb{[0, 1, 0, 1, 0]}, where the first 3 entries\ncorrespond to the leaves of the first subtree and last 2 to\nthose of the second subtree.\n\n...\n\nWe can understand boosted decision tree\nbased transformation as a supervised feature encoding that\nconverts a real-valued vector into a compact binary-valued\nvector. A traversal from root node to a leaf node represents\na rule on certain features.\"\n}\n\\examples{\ndata(agaricus.train, package = \"xgboost\")\ndata(agaricus.test, package = \"xgboost\")\n\ndtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))\ndtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))\n\nparam <- list(max_depth = 2, learning_rate = 1, objective = 'binary:logistic', nthread = 1)\nnrounds = 4\n\nbst <- xgb.train(params = param, data = dtrain, nrounds = nrounds)\n\n# Model accuracy without new features\naccuracy.before <- sum((predict(bst, agaricus.test$data) >= 0.5) == agaricus.test$label) /\n                   length(agaricus.test$label)\n\n# Convert previous features to one hot encoding\nnew.features.train <- xgb.create.features(model = bst, agaricus.train$data)\nnew.features.test <- xgb.create.features(model = bst, agaricus.test$data)\n\n# learning with new features\nnew.dtrain <- xgb.DMatrix(\n  data = new.features.train, label = agaricus.train$label, nthread = 1\n)\nnew.dtest <- xgb.DMatrix(\n  data = new.features.test, label = agaricus.test$label, nthread = 1\n)\nbst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds)\n\n# Model accuracy with new features\naccuracy.after <- sum((predict(bst, new.dtest) >= 0.5) == agaricus.test$label) /\n                  length(agaricus.test$label)\n\n# Here the accuracy was already good and is now perfect.\ncat(paste(\"The accuracy was\", accuracy.before, \"before adding leaf features and it is now\",\n          accuracy.after, \"!\\n\"))\n\n}\n"
  },
  {
    "path": "R-package/man/xgb.cv.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.cv.R\n\\name{xgb.cv}\n\\alias{xgb.cv}\n\\title{Cross Validation}\n\\usage{\nxgb.cv(\n  params = xgb.params(),\n  data,\n  nrounds,\n  nfold,\n  prediction = FALSE,\n  showsd = TRUE,\n  metrics = list(),\n  objective = NULL,\n  custom_metric = NULL,\n  stratified = \"auto\",\n  folds = NULL,\n  train_folds = NULL,\n  verbose = TRUE,\n  print_every_n = 1L,\n  early_stopping_rounds = NULL,\n  maximize = NULL,\n  callbacks = list(),\n  ...\n)\n}\n\\arguments{\n\\item{params}{List of XGBoost parameters which control the model building process.\nSee the \\href{https://xgboost.readthedocs.io/en/latest/parameter.html}{online documentation}\nand the documentation for \\code{\\link[=xgb.params]{xgb.params()}} for details.\n\nShould be passed as list with named entries. Parameters that are not specified in this\nlist will use their default values.\n\nA list of named parameters can be created through the function \\code{\\link[=xgb.params]{xgb.params()}}, which\naccepts all valid parameters as function arguments.}\n\n\\item{data}{An \\code{xgb.DMatrix} object, with corresponding fields like \\code{label} or bounds as required\nfor model training by the objective.\n\nNote that only the basic \\code{xgb.DMatrix} class is supported - variants such as \\code{xgb.QuantileDMatrix}\nor \\code{xgb.ExtMemDMatrix} are not supported here.}\n\n\\item{nrounds}{Max number of boosting iterations.}\n\n\\item{nfold}{The original dataset is randomly partitioned into \\code{nfold} equal size subsamples.}\n\n\\item{prediction}{A logical value indicating whether to return the test fold predictions\nfrom each CV model. This parameter engages the \\code{\\link[=xgb.cb.cv.predict]{xgb.cb.cv.predict()}} callback.}\n\n\\item{showsd}{Logical value whether to show standard deviation of cross validation.}\n\n\\item{metrics}{List of evaluation metrics to be used in cross validation,\nwhen it is not specified, the evaluation metric is chosen according to objective function.\nPossible options are:\n\\itemize{\n\\item \\code{error}: Binary classification error rate\n\\item \\code{rmse}: Root mean square error\n\\item \\code{logloss}: Negative log-likelihood function\n\\item \\code{mae}: Mean absolute error\n\\item \\code{mape}: Mean absolute percentage error\n\\item \\code{auc}: Area under curve\n\\item \\code{aucpr}: Area under PR curve\n\\item \\code{merror}: Exact matching error used to evaluate multi-class classification\n}}\n\n\\item{objective}{Customized objective function. Should take two arguments: the first one will be the\ncurrent predictions (either a numeric vector or matrix depending on the number of targets / classes),\nand the second one will be the \\code{data} DMatrix object that is used for training.\n\nIt should return a list with two elements \\code{grad} and \\code{hess} (in that order), as either\nnumeric vectors or numeric matrices depending on the number of targets / classes (same\ndimension as the predictions that are passed as first argument).}\n\n\\item{custom_metric}{Customized evaluation function. Just like \\code{objective}, should take two arguments,\nwith the first one being the predictions and the second one the \\code{data} DMatrix.\n\nShould return a list with two elements \\code{metric} (name that will be displayed for this metric,\nshould be a string / character), and \\code{value} (the number that the function calculates, should\nbe a numeric scalar).\n\nNote that even if passing \\code{custom_metric}, objectives also have an associated default metric that\nwill be evaluated in addition to it. In order to disable the built-in metric, one can pass\nparameter \\code{disable_default_eval_metric = TRUE}.}\n\n\\item{stratified}{Logical flag indicating whether sampling of folds should be stratified\nby the values of outcome labels. For real-valued labels in regression objectives,\nstratification will be done by discretizing the labels into up to 5 buckets beforehand.\n\nIf passing \"auto\", will be set to \\code{TRUE} if the objective in \\code{params} is a classification\nobjective (from XGBoost's built-in objectives, doesn't apply to custom ones), and to\n\\code{FALSE} otherwise.\n\nThis parameter is ignored when \\code{data} has a \\code{group} field - in such case, the splitting\nwill be based on whole groups (note that this might make the folds have different sizes).\n\nValue \\code{TRUE} here is \\strong{not} supported for custom objectives.}\n\n\\item{folds}{List with pre-defined CV folds (each element must be a vector of test fold's indices).\nWhen folds are supplied, the \\code{nfold} and \\code{stratified} parameters are ignored.\n\nIf \\code{data} has a \\code{group} field and the objective requires this field, each fold (list element)\nmust additionally have two attributes (retrievable through \\code{attributes}) named \\code{group_test}\nand \\code{group_train}, which should hold the \\code{group} to assign through \\code{\\link[=setinfo.xgb.DMatrix]{setinfo.xgb.DMatrix()}} to\nthe resulting DMatrices.}\n\n\\item{train_folds}{List specifying which indices to use for training. If \\code{NULL}\n(the default) all indices not specified in \\code{folds} will be used for training.\n\nThis is not supported when \\code{data} has \\code{group} field.}\n\n\\item{verbose}{If 0, xgboost will stay silent. If 1, it will print information about performance.\nIf 2, some additional information will be printed out.\nNote that setting \\code{verbose > 0} automatically engages the\n\\code{xgb.cb.print.evaluation(period=1)} callback function.}\n\n\\item{print_every_n}{When passing \\code{verbose>0}, evaluation logs (metrics calculated on the\ndata passed under \\code{evals}) will be printed every nth iteration according to the value passed\nhere. The first and last iteration are always included regardless of this 'n'.\n\nOnly has an effect when passing data under \\code{evals} and when passing \\code{verbose>0}. The parameter\nis passed to the \\code{\\link[=xgb.cb.print.evaluation]{xgb.cb.print.evaluation()}} callback.}\n\n\\item{early_stopping_rounds}{Number of boosting rounds after which training will be stopped\nif there is no improvement in performance (as measured by the evaluatiation metric that is\nsupplied or selected by default for the objective) on the evaluation data passed under\n\\code{evals}.\n\nMust pass \\code{evals} in order to use this functionality. Setting this parameter adds the\n\\code{\\link[=xgb.cb.early.stop]{xgb.cb.early.stop()}} callback.\n\nIf \\code{NULL}, early stopping will not be used.}\n\n\\item{maximize}{If \\code{feval} and \\code{early_stopping_rounds} are set, then this parameter must be set as well.\nWhen it is \\code{TRUE}, it means the larger the evaluation score the better.\nThis parameter is passed to the \\code{\\link[=xgb.cb.early.stop]{xgb.cb.early.stop()}} callback.}\n\n\\item{callbacks}{A list of callback functions to perform various task during boosting.\nSee \\code{\\link[=xgb.Callback]{xgb.Callback()}}. Some of the callbacks are automatically created depending on the\nparameters' values. User can provide either existing or their own callback methods in order\nto customize the training process.}\n\n\\item{...}{Not used.\n\nSome arguments that were part of this function in previous XGBoost versions are currently\ndeprecated or have been renamed. If a deprecated or renamed argument is passed, will throw\na warning (by default) and use its current equivalent instead. This warning will become an\nerror if using the \\link[=xgboost-options]{'strict mode' option}.\n\nIf some additional argument is passed that is neither a current function argument nor\na deprecated or renamed argument, a warning or error will be thrown depending on the\n'strict mode' option.\n\n\\bold{Important:} \\code{...} will be removed in a future version, and all the current\ndeprecation warnings will become errors. Please use only arguments that form part of\nthe function signature.}\n}\n\\value{\nAn object of class 'xgb.cv.synchronous' with the following elements:\n\\itemize{\n\\item \\code{call}: Function call.\n\\item \\code{params}: Parameters that were passed to the xgboost library. Note that it does not\ncapture parameters changed by the \\code{\\link[=xgb.cb.reset.parameters]{xgb.cb.reset.parameters()}} callback.\n\\item \\code{evaluation_log}: Evaluation history stored as a \\code{data.table} with the\nfirst column corresponding to iteration number and the rest corresponding to the\nCV-based evaluation means and standard deviations for the training and test CV-sets.\nIt is created by the \\code{\\link[=xgb.cb.evaluation.log]{xgb.cb.evaluation.log()}} callback.\n\\item \\code{niter}: Number of boosting iterations.\n\\item \\code{nfeatures}: Number of features in training data.\n\\item \\code{folds}: The list of CV folds' indices - either those passed through the \\code{folds}\nparameter or randomly generated.\n}\n\nPlus other potential elements that are the result of callbacks, such as a list \\code{cv_predict} with\na sub-element \\code{pred} when passing \\code{prediction = TRUE}, which is added by the \\code{\\link[=xgb.cb.cv.predict]{xgb.cb.cv.predict()}}\ncallback (note that one can also pass it manually under \\code{callbacks} with different settings,\nsuch as saving also the models created during cross validation); or a list \\code{early_stop} which\nwill contain elements such as \\code{best_iteration} when using the early stopping callback (\\code{\\link[=xgb.cb.early.stop]{xgb.cb.early.stop()}}).\n}\n\\description{\nThe cross validation function of xgboost.\n}\n\\details{\nThe original sample is randomly partitioned into \\code{nfold} equal size subsamples.\n\nOf the \\code{nfold} subsamples, a single subsample is retained as the validation data for testing the model,\nand the remaining \\code{nfold - 1} subsamples are used as training data.\n\nThe cross-validation process is then repeated \\code{nrounds} times, with each of the\n\\code{nfold} subsamples used exactly once as the validation data.\n\nAll observations are used for both training and validation.\n\nAdapted from \\url{https://en.wikipedia.org/wiki/Cross-validation_\\%28statistics\\%29}\n}\n\\examples{\ndata(agaricus.train, package = \"xgboost\")\n\ndtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))\n\ncv <- xgb.cv(\n  data = dtrain,\n  nrounds = 20,\n  early_stopping_rounds = 1,\n  params = xgb.params(\n    nthread = 2,\n    max_depth = 3,\n    objective = \"binary:logistic\"\n  ),\n  nfold = 5,\n  metrics = list(\"rmse\",\"auc\"),\n  prediction = TRUE\n)\nprint(cv)\nprint(cv, verbose = TRUE)\n\n# Callbacks might add additional attributes, separated by the name of the callback\ncv$early_stop$best_iteration\nhead(cv$cv_predict$pred)\n}\n"
  },
  {
    "path": "R-package/man/xgb.dump.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.dump.R\n\\name{xgb.dump}\n\\alias{xgb.dump}\n\\title{Dump an XGBoost model in text format.}\n\\usage{\nxgb.dump(\n  model,\n  fname = NULL,\n  fmap = \"\",\n  with_stats = FALSE,\n  dump_format = c(\"text\", \"json\", \"dot\"),\n  ...\n)\n}\n\\arguments{\n\\item{model}{The model object.}\n\n\\item{fname}{The name of the text file where to save the model text dump.\nIf not provided or set to \\code{NULL}, the model is returned as a character vector.}\n\n\\item{fmap}{Feature map file representing feature types. See demo/ for a walkthrough\nexample in R, and \\url{https://github.com/dmlc/xgboost/blob/master/demo/data/featmap.txt}\nto see an example of the value.}\n\n\\item{with_stats}{Whether to dump some additional statistics about the splits.\nWhen this option is on, the model dump contains two additional values:\ngain is the approximate loss function gain we get in each split;\ncover is the sum of second order gradient in each node.}\n\n\\item{dump_format}{Either 'text', 'json', or 'dot' (graphviz) format could be specified.\n\nFormat 'dot' for a single tree can be passed directly to packages that consume this format\nfor graph visualization, such as function \\code{DiagrammeR::grViz()}}\n\n\\item{...}{Not used.\n\nSome arguments that were part of this function in previous XGBoost versions are currently\ndeprecated or have been renamed. If a deprecated or renamed argument is passed, will throw\na warning (by default) and use its current equivalent instead. This warning will become an\nerror if using the \\link[=xgboost-options]{'strict mode' option}.\n\nIf some additional argument is passed that is neither a current function argument nor\na deprecated or renamed argument, a warning or error will be thrown depending on the\n'strict mode' option.\n\n\\bold{Important:} \\code{...} will be removed in a future version, and all the current\ndeprecation warnings will become errors. Please use only arguments that form part of\nthe function signature.}\n}\n\\value{\nIf fname is not provided or set to \\code{NULL} the function will return the model\nas a character vector. Otherwise it will return \\code{TRUE}.\n}\n\\description{\nDump an XGBoost model in text format.\n}\n\\examples{\n\\dontshow{RhpcBLASctl::omp_set_num_threads(1)}\ndata(agaricus.train, package = \"xgboost\")\ndata(agaricus.test, package = \"xgboost\")\n\ntrain <- agaricus.train\ntest <- agaricus.test\n\nbst <- xgb.train(\n  data = xgb.DMatrix(train$data, label = train$label, nthread = 1),\n  nrounds = 2,\n  params = xgb.params(\n    max_depth = 2,\n    nthread = 2,\n    objective = \"binary:logistic\"\n  )\n)\n\n# save the model in file 'xgb.model.dump'\ndump_path = file.path(tempdir(), 'model.dump')\nxgb.dump(bst, dump_path, with_stats = TRUE)\n\n# print the model without saving it to a file\nprint(xgb.dump(bst, with_stats = TRUE))\n\n# print in JSON format:\ncat(xgb.dump(bst, with_stats = TRUE, dump_format = \"json\"))\n\n# plot first tree leveraging the 'dot' format\nif (requireNamespace('DiagrammeR', quietly = TRUE)) {\n  DiagrammeR::grViz(xgb.dump(bst, dump_format = \"dot\")[[1L]])\n}\n}\n"
  },
  {
    "path": "R-package/man/xgb.gblinear.history.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/callbacks.R\n\\name{xgb.gblinear.history}\n\\alias{xgb.gblinear.history}\n\\title{Extract gblinear coefficients history}\n\\usage{\nxgb.gblinear.history(model, class_index = NULL)\n}\n\\arguments{\n\\item{model}{Either an \\code{xgb.Booster} or a result of \\code{\\link[=xgb.cv]{xgb.cv()}}, trained\nusing the \\link{xgb.cb.gblinear.history} callback, but \\strong{not} a booster\nloaded from \\code{\\link[=xgb.load]{xgb.load()}} or \\code{\\link[=xgb.load.raw]{xgb.load.raw()}}.}\n\n\\item{class_index}{zero-based class index to extract the coefficients for only that\nspecific class in a multinomial multiclass model. When it is \\code{NULL}, all the\ncoefficients are returned. Has no effect in non-multiclass models.}\n}\n\\value{\nFor an \\code{\\link[=xgb.train]{xgb.train()}} result, a matrix (either dense or sparse) with the columns\ncorresponding to iteration's coefficients and the rows corresponding to boosting iterations.\n\nFor an \\code{\\link[=xgb.cv]{xgb.cv()}} result, a list of such matrices is returned with the elements\ncorresponding to CV folds.\n\nWhen there is more than one coefficient per feature (e.g. multi-class classification)\nand \\code{class_index} is not provided,\nthe result will be reshaped into a vector where coefficients are arranged first by features and\nthen by class (e.g. first 1 through N coefficients will be for the first class, then\ncoefficients N+1 through 2N for the second class, and so on).\n}\n\\description{\nA helper function to extract the matrix of linear coefficients' history\nfrom a gblinear model created while using the \\link{xgb.cb.gblinear.history}\ncallback (which must be added manually as by default it is not used).\n}\n\\details{\nNote that this is an R-specific function that relies on R attributes that\nare not saved when using XGBoost's own serialization functions like \\code{\\link[=xgb.load]{xgb.load()}}\nor \\code{\\link[=xgb.load.raw]{xgb.load.raw()}}.\n\nIn order for a serialized model to be accepted by this function, one must use R\nserializers such as \\code{\\link[=saveRDS]{saveRDS()}}.\n}\n\\seealso{\n\\link{xgb.cb.gblinear.history}, \\link{coef.xgb.Booster}.\n}\n"
  },
  {
    "path": "R-package/man/xgb.get.DMatrix.data.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.DMatrix.R\n\\name{xgb.get.DMatrix.data}\n\\alias{xgb.get.DMatrix.data}\n\\title{Get DMatrix Data}\n\\usage{\nxgb.get.DMatrix.data(dmat)\n}\n\\arguments{\n\\item{dmat}{An \\code{xgb.DMatrix} object, as returned by \\code{\\link[=xgb.DMatrix]{xgb.DMatrix()}}.}\n}\n\\value{\nThe data held in the DMatrix, as a sparse CSR matrix (class \\code{dgRMatrix}\nfrom package \\code{Matrix}). If it had feature names, these will be added as column names\nin the output.\n}\n\\description{\nGet DMatrix Data\n}\n"
  },
  {
    "path": "R-package/man/xgb.get.DMatrix.num.non.missing.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.DMatrix.R\n\\name{xgb.get.DMatrix.num.non.missing}\n\\alias{xgb.get.DMatrix.num.non.missing}\n\\title{Get Number of Non-Missing Entries in DMatrix}\n\\usage{\nxgb.get.DMatrix.num.non.missing(dmat)\n}\n\\arguments{\n\\item{dmat}{An \\code{xgb.DMatrix} object, as returned by \\code{\\link[=xgb.DMatrix]{xgb.DMatrix()}}.}\n}\n\\value{\nThe number of non-missing entries in the DMatrix.\n}\n\\description{\nGet Number of Non-Missing Entries in DMatrix\n}\n"
  },
  {
    "path": "R-package/man/xgb.get.DMatrix.qcut.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.DMatrix.R\n\\name{xgb.get.DMatrix.qcut}\n\\alias{xgb.get.DMatrix.qcut}\n\\title{Get Quantile Cuts from DMatrix}\n\\usage{\nxgb.get.DMatrix.qcut(dmat, output = c(\"list\", \"arrays\"))\n}\n\\arguments{\n\\item{dmat}{An \\code{xgb.DMatrix} object, as returned by \\code{\\link[=xgb.DMatrix]{xgb.DMatrix()}}.}\n\n\\item{output}{Output format for the quantile cuts. Possible options are:\n\\itemize{\n\\item \"list\"\\verb{will return the output as a list with one entry per column, where each column will have a numeric vector with the cuts. The list will be named if}dmat` has column names assigned to it.\n\\item \\code{\"arrays\"} will return a list with entries \\code{indptr} (base-0 indexing) and\n\\code{data}. Here, the cuts for column 'i' are obtained by slicing 'data' from entries\n\\code{   indptr[i]+1} to \\code{indptr[i+1]}.\n}}\n}\n\\value{\nThe quantile cuts, in the format specified by parameter \\code{output}.\n}\n\\description{\nGet the quantile cuts (a.k.a. borders) from an \\code{xgb.DMatrix}\nthat has been quantized for the histogram method (\\code{tree_method = \"hist\"}).\n\nThese cuts are used in order to assign observations to bins - i.e. these are ordered\nboundaries which are used to determine assignment condition \\verb{border_low < x < border_high}.\nAs such, the first and last bin will be outside of the range of the data, so as to include\nall of the observations there.\n\nIf a given column has 'n' bins, then there will be 'n+1' cuts / borders for that column,\nwhich will be output in sorted order from lowest to highest.\n\nDifferent columns can have different numbers of bins according to their range.\n}\n\\examples{\ndata(mtcars)\n\ny <- mtcars$mpg\nx <- as.matrix(mtcars[, -1])\ndm <- xgb.DMatrix(x, label = y, nthread = 1)\n\n# DMatrix is not quantized right away, but will be once a hist model is generated\nmodel <- xgb.train(\n  data = dm,\n  params = xgb.params(tree_method = \"hist\", max_bin = 8, nthread = 1),\n  nrounds = 3\n)\n\n# Now can get the quantile cuts\nxgb.get.DMatrix.qcut(dm)\n}\n"
  },
  {
    "path": "R-package/man/xgb.get.num.boosted.rounds.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.Booster.R\n\\name{xgb.get.num.boosted.rounds}\n\\alias{xgb.get.num.boosted.rounds}\n\\alias{length.xgb.Booster}\n\\title{Get number of boosting in a fitted booster}\n\\usage{\nxgb.get.num.boosted.rounds(model)\n\n\\method{length}{xgb.Booster}(x)\n}\n\\arguments{\n\\item{model, x}{A fitted \\code{xgb.Booster} model.}\n}\n\\value{\nThe number of rounds saved in the model as an integer.\n}\n\\description{\nGet number of boosting in a fitted booster\n}\n\\details{\nNote that setting booster parameters related to training\ncontinuation / updates through \\code{\\link[=xgb.model.parameters<-]{xgb.model.parameters<-()}} will reset the\nnumber of rounds to zero.\n}\n"
  },
  {
    "path": "R-package/man/xgb.importance.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.importance.R\n\\name{xgb.importance}\n\\alias{xgb.importance}\n\\title{Feature importance}\n\\usage{\nxgb.importance(\n  model = NULL,\n  feature_names = getinfo(model, \"feature_name\"),\n  trees = NULL\n)\n}\n\\arguments{\n\\item{model}{Object of class \\code{xgb.Booster}.}\n\n\\item{feature_names}{Character vector used to overwrite the feature names\nof the model. The default is \\code{NULL} (use original feature names).}\n\n\\item{trees}{An integer vector of (base-1) tree indices that should be included\ninto the importance calculation (only for the \"gbtree\" booster).\nThe default (\\code{NULL}) parses all trees.\nIt could be useful, e.g., in multiclass classification to get feature importances\nfor each class separately.}\n}\n\\value{\nA \\code{data.table} with the following columns:\n\nFor a tree model:\n\\itemize{\n\\item \\code{Features}: Names of the features used in the model.\n\\item \\code{Gain}: Fractional contribution of each feature to the model based on\nthe total gain of this feature's splits. Higher percentage means higher importance.\n\\item \\code{Cover}: Metric of the number of observation related to this feature.\n\\item \\code{Frequency}: Percentage of times a feature has been used in trees.\n}\n\nFor a linear model:\n\\itemize{\n\\item \\code{Features}: Names of the features used in the model.\n\\item \\code{Weight}: Linear coefficient of this feature.\n\\item \\code{Class}: Class label (only for multiclass models). For objects of class \\code{xgboost} (as\nproduced by \\code{\\link[=xgboost]{xgboost()}}), it will be a \\code{factor}, while for objects of class \\code{xgb.Booster}\n(as produced by \\code{\\link[=xgb.train]{xgb.train()}}), it will be a zero-based integer vector.\n}\n\nIf \\code{feature_names} is not provided and \\code{model} doesn't have \\code{feature_names},\nthe index of the features will be used instead. Because the index is extracted from the model dump\n(based on C++ code), it starts at 0 (as in C/C++ or Python) instead of 1 (usual in R).\n}\n\\description{\nCreates a \\code{data.table} of feature importances.\n}\n\\details{\nThis function works for both linear and tree models.\n\nFor linear models, the importance is the absolute magnitude of linear coefficients.\nTo obtain a meaningful ranking by importance for linear models, the features need to\nbe on the same scale (which is also recommended when using L1 or L2 regularization).\n}\n\\examples{\n# binary classification using \"gbtree\":\ndata(\"ToothGrowth\")\nx <- ToothGrowth[, c(\"len\", \"dose\")]\ny <- ToothGrowth$supp\nmodel_tree_binary <- xgboost(\n  x, y,\n  nrounds = 5L,\n  nthreads = 1L,\n  booster = \"gbtree\",\n  max_depth = 2L\n)\nxgb.importance(model_tree_binary)\n\n# binary classification using \"gblinear\":\nmodel_tree_linear <- xgboost(\n  x, y,\n  nrounds = 5L,\n  nthreads = 1L,\n  booster = \"gblinear\",\n  learning_rate = 0.3\n)\nxgb.importance(model_tree_linear)\n\n# multi-class classification using \"gbtree\":\ndata(\"iris\")\nx <- iris[, c(\"Sepal.Length\", \"Sepal.Width\", \"Petal.Length\", \"Petal.Width\")]\ny <- iris$Species\nmodel_tree_multi <- xgboost(\n  x, y,\n  nrounds = 5L,\n  nthreads = 1L,\n  booster = \"gbtree\",\n  max_depth = 3\n)\n# all classes clumped together:\nxgb.importance(model_tree_multi)\n# inspect importances separately for each class:\nnum_classes <- 3L\nnrounds <- 5L\nxgb.importance(\n  model_tree_multi, trees = seq(from = 1, by = num_classes, length.out = nrounds)\n)\nxgb.importance(\n  model_tree_multi, trees = seq(from = 2, by = num_classes, length.out = nrounds)\n)\nxgb.importance(\n  model_tree_multi, trees = seq(from = 3, by = num_classes, length.out = nrounds)\n)\n\n# multi-class classification using \"gblinear\":\nmodel_linear_multi <- xgboost(\n  x, y,\n  nrounds = 5L,\n  nthreads = 1L,\n  booster = \"gblinear\",\n  learning_rate = 0.2\n)\nxgb.importance(model_linear_multi)\n}\n"
  },
  {
    "path": "R-package/man/xgb.is.same.Booster.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.Booster.R\n\\name{xgb.is.same.Booster}\n\\alias{xgb.is.same.Booster}\n\\title{Check if two boosters share the same C object}\n\\usage{\nxgb.is.same.Booster(obj1, obj2)\n}\n\\arguments{\n\\item{obj1}{Booster model to compare with \\code{obj2}.}\n\n\\item{obj2}{Booster model to compare with \\code{obj1}.}\n}\n\\value{\nEither \\code{TRUE} or \\code{FALSE} according to whether the two boosters share the\nunderlying C object.\n}\n\\description{\nChecks whether two booster objects refer to the same underlying C object.\n}\n\\details{\nAs booster objects (as returned by e.g. \\code{\\link[=xgb.train]{xgb.train()}}) contain an R 'externalptr'\nobject, they don't follow typical copy-on-write semantics of other R objects - that is, if\none assigns a booster to a different variable and modifies that new variable through in-place\nmethods like \\code{\\link[=xgb.attr<-]{xgb.attr<-()}}, the modification will be applied to both the old and the new\nvariable, unlike typical R assignments which would only modify the latter.\n\nThis function allows checking whether two booster objects share the same 'externalptr',\nregardless of the R attributes that they might have.\n\nIn order to duplicate a booster in such a way that the copy wouldn't share the same\n'externalptr', one can use function \\code{\\link[=xgb.copy.Booster]{xgb.copy.Booster()}}.\n}\n\\examples{\nlibrary(xgboost)\n\ndata(mtcars)\n\ny <- mtcars$mpg\nx <- as.matrix(mtcars[, -1])\n\nmodel <- xgb.train(\n  params = xgb.params(nthread = 1),\n  data = xgb.DMatrix(x, label = y, nthread = 1),\n  nrounds = 3\n)\n\nmodel_shallow_copy <- model\nxgb.is.same.Booster(model, model_shallow_copy) # same C object\n\nmodel_deep_copy <- xgb.copy.Booster(model)\nxgb.is.same.Booster(model, model_deep_copy) # different C objects\n\n# In-place assignments modify all references,\n# but not full/deep copies of the booster\nxgb.attr(model_shallow_copy, \"my_attr\") <- 111\nxgb.attr(model, \"my_attr\") # gets modified\nxgb.attr(model_deep_copy, \"my_attr\") # doesn't get modified\n}\n\\seealso{\n\\code{\\link[=xgb.copy.Booster]{xgb.copy.Booster()}}\n}\n"
  },
  {
    "path": "R-package/man/xgb.load.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.load.R\n\\name{xgb.load}\n\\alias{xgb.load}\n\\title{Load XGBoost model from binary file}\n\\usage{\nxgb.load(modelfile)\n}\n\\arguments{\n\\item{modelfile}{The name of the binary input file.}\n}\n\\value{\nAn object of \\code{xgb.Booster} class.\n}\n\\description{\nLoad XGBoost model from binary model file.\n}\n\\details{\nThe input file is expected to contain a model saved in an XGBoost model format\nusing either \\code{\\link[=xgb.save]{xgb.save()}} in R, or using some\nappropriate methods from other XGBoost interfaces. E.g., a model trained in Python and\nsaved from there in XGBoost format, could be loaded from R.\n\nNote: a model saved as an R object has to be loaded using corresponding R-methods,\nnot by \\code{\\link[=xgb.load]{xgb.load()}}.\n}\n\\examples{\n\\dontshow{RhpcBLASctl::omp_set_num_threads(1)}\ndata(agaricus.train, package = \"xgboost\")\ndata(agaricus.test, package = \"xgboost\")\n\n## Keep the number of threads to 1 for examples\nnthread <- 1\ndata.table::setDTthreads(nthread)\n\ntrain <- agaricus.train\ntest <- agaricus.test\n\nbst <- xgb.train(\n  data = xgb.DMatrix(train$data, label = train$label, nthread = 1),\n  nrounds = 2,\n  params = xgb.params(\n    max_depth = 2,\n    nthread = nthread,\n    objective = \"binary:logistic\"\n  )\n)\n\nfname <- file.path(tempdir(), \"xgb.ubj\")\nxgb.save(bst, fname)\nbst <- xgb.load(fname)\n}\n\\seealso{\n\\code{\\link[=xgb.save]{xgb.save()}}\n}\n"
  },
  {
    "path": "R-package/man/xgb.load.raw.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.load.raw.R\n\\name{xgb.load.raw}\n\\alias{xgb.load.raw}\n\\title{Load serialised XGBoost model from R's raw vector}\n\\usage{\nxgb.load.raw(buffer)\n}\n\\arguments{\n\\item{buffer}{The buffer returned by \\code{\\link[=xgb.save.raw]{xgb.save.raw()}}.}\n}\n\\description{\nUser can generate raw memory buffer by calling \\code{\\link[=xgb.save.raw]{xgb.save.raw()}}.\n}\n"
  },
  {
    "path": "R-package/man/xgb.model.dt.tree.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.model.dt.tree.R\n\\name{xgb.model.dt.tree}\n\\alias{xgb.model.dt.tree}\n\\title{Parse model text dump}\n\\usage{\nxgb.model.dt.tree(model, trees = NULL, use_int_id = FALSE, ...)\n}\n\\arguments{\n\\item{model}{Object of class \\code{xgb.Booster}. If it contains feature names (they can\nbe set through \\code{\\link[=setinfo]{setinfo()}}), they will be used in the output from this function.\n\nIf the model contains categorical features, an error will be thrown.}\n\n\\item{trees}{An integer vector of (base-1) tree indices that should be used. The default\n(\\code{NULL}) uses all trees. Useful, e.g., in multiclass classification to get only\nthe trees of one class.}\n\n\\item{use_int_id}{A logical flag indicating whether nodes in columns \"Yes\", \"No\", and\n\"Missing\" should be represented as integers (when \\code{TRUE}) or as \"Tree-Node\"\ncharacter strings (when \\code{FALSE}, default).}\n\n\\item{...}{Not used.\n\nSome arguments that were part of this function in previous XGBoost versions are currently\ndeprecated or have been renamed. If a deprecated or renamed argument is passed, will throw\na warning (by default) and use its current equivalent instead. This warning will become an\nerror if using the \\link[=xgboost-options]{'strict mode' option}.\n\nIf some additional argument is passed that is neither a current function argument nor\na deprecated or renamed argument, a warning or error will be thrown depending on the\n'strict mode' option.\n\n\\bold{Important:} \\code{...} will be removed in a future version, and all the current\ndeprecation warnings will become errors. Please use only arguments that form part of\nthe function signature.}\n}\n\\value{\nA \\code{data.table} with detailed information about tree nodes. It has the following columns:\n\\itemize{\n\\item \\code{Tree}: integer ID of a tree in a model (zero-based index).\n\\item \\code{Node}: integer ID of a node in a tree (zero-based index).\n\\item \\code{ID}: character identifier of a node in a model (only when \\code{use_int_id = FALSE}).\n\\item \\code{Feature}: for a branch node, a feature ID or name (when available);\nfor a leaf node, it simply labels it as \\code{\"Leaf\"}.\n\\item \\code{Split}: location of the split for a branch node (split condition is always \"less than\").\n\\item \\code{Yes}: ID of the next node when the split condition is met.\n\\item \\code{No}: ID of the next node when the split condition is not met.\n\\item \\code{Missing}: ID of the next node when the branch value is missing.\n\\item \\code{Gain}: either the split gain (change in loss) or the leaf value.\n\\item \\code{Cover}: metric related to the number of observations either seen by a split\nor collected by a leaf during training.\n}\n\nWhen \\code{use_int_id = FALSE}, columns \"Yes\", \"No\", and \"Missing\" point to model-wide node identifiers\nin the \"ID\" column. When \\code{use_int_id = TRUE}, those columns point to node identifiers from\nthe corresponding trees in the \"Node\" column.\n}\n\\description{\nParse a boosted tree model text dump into a \\code{data.table} structure.\n}\n\\details{\nNote that this function does not work with models that were fitted to\ncategorical data, and is only applicable to tree-based boosters (not \\code{gblinear}).\n}\n\\examples{\n# Basic use:\n\ndata(agaricus.train, package = \"xgboost\")\n## Keep the number of threads to 1 for examples\nnthread <- 1\ndata.table::setDTthreads(nthread)\n\nbst <- xgb.train(\n  data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label, nthread = 1),\n  nrounds = 2,\n  params = xgb.params(\n    max_depth = 2,\n    nthread = nthread,\n    objective = \"binary:logistic\"\n  )\n)\n\n# This bst model already has feature_names stored with it, so those would be used when\n# feature_names is not set:\ndt <- xgb.model.dt.tree(bst)\n\n# How to match feature names of splits that are following a current 'Yes' branch:\nmerge(\n  dt,\n  dt[, .(ID, Y.Feature = Feature)], by.x = \"Yes\", by.y = \"ID\", all.x = TRUE\n)[\n  order(Tree, Node)\n]\n\n}\n"
  },
  {
    "path": "R-package/man/xgb.model.parameters.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.Booster.R\n\\name{xgb.model.parameters<-}\n\\alias{xgb.model.parameters<-}\n\\title{Accessors for model parameters}\n\\usage{\nxgb.model.parameters(object) <- value\n}\n\\arguments{\n\\item{object}{Object of class \\code{xgb.Booster}. \\strong{Will be modified in-place}.}\n\n\\item{value}{A list (or an object coercible to a list) with the names of parameters to set\nand the elements corresponding to parameter values.}\n}\n\\value{\nThe same booster \\code{object}, which gets modified in-place.\n}\n\\description{\nOnly the setter for XGBoost parameters is currently implemented.\n}\n\\details{\nJust like \\code{\\link[=xgb.attr]{xgb.attr()}}, this function will make in-place modifications\non the booster object which do not follow typical R assignment semantics - that is,\nall references to the same booster will also be updated, unlike assingment of R\nattributes which follow copy-on-write semantics.\n\nSee \\code{\\link[=xgb.copy.Booster]{xgb.copy.Booster()}} for an example of this behavior.\n\nBe aware that setting parameters of a fitted booster related to training continuation / updates\nwill reset its number of rounds indicator to zero.\n}\n\\examples{\ndata(agaricus.train, package = \"xgboost\")\n\ntrain <- agaricus.train\n\nbst <- xgb.train(\n  data = xgb.DMatrix(train$data, label = train$label, nthread = 1),\n  nrounds = 2,\n  params = xgb.params(\n    max_depth = 2,\n    learning_rate = 1,\n    nthread = 2,\n    objective = \"binary:logistic\"\n  )\n)\n\nxgb.model.parameters(bst) <- list(learning_rate = 0.1)\n\n}\n"
  },
  {
    "path": "R-package/man/xgb.params.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.train.R\n\\name{xgb.params}\n\\alias{xgb.params}\n\\title{XGBoost Parameters}\n\\usage{\nxgb.params(\n  objective = NULL,\n  verbosity = NULL,\n  nthread = NULL,\n  seed = NULL,\n  booster = NULL,\n  eta = NULL,\n  learning_rate = NULL,\n  gamma = NULL,\n  min_split_loss = NULL,\n  max_depth = NULL,\n  min_child_weight = NULL,\n  max_delta_step = NULL,\n  subsample = NULL,\n  sampling_method = NULL,\n  colsample_bytree = NULL,\n  colsample_bylevel = NULL,\n  colsample_bynode = NULL,\n  lambda = NULL,\n  reg_lambda = NULL,\n  alpha = NULL,\n  reg_alpha = NULL,\n  tree_method = NULL,\n  scale_pos_weight = NULL,\n  updater = NULL,\n  refresh_leaf = NULL,\n  grow_policy = NULL,\n  max_leaves = NULL,\n  max_bin = NULL,\n  num_parallel_tree = NULL,\n  monotone_constraints = NULL,\n  interaction_constraints = NULL,\n  multi_strategy = NULL,\n  base_score = NULL,\n  eval_metric = NULL,\n  seed_per_iteration = NULL,\n  device = NULL,\n  disable_default_eval_metric = NULL,\n  use_rmm = NULL,\n  max_cached_hist_node = NULL,\n  max_cat_to_onehot = NULL,\n  max_cat_threshold = NULL,\n  sample_type = NULL,\n  normalize_type = NULL,\n  rate_drop = NULL,\n  one_drop = NULL,\n  skip_drop = NULL,\n  feature_selector = NULL,\n  top_k = NULL,\n  num_class = NULL,\n  tweedie_variance_power = NULL,\n  huber_slope = NULL,\n  quantile_alpha = NULL,\n  expectile_alpha = NULL,\n  aft_loss_distribution = NULL,\n  aft_loss_distribution_scale = NULL,\n  lambdarank_pair_method = NULL,\n  lambdarank_num_pair_per_sample = NULL,\n  lambdarank_normalization = NULL,\n  lambdarank_score_normalization = NULL,\n  lambdarank_unbiased = NULL,\n  lambdarank_bias_norm = NULL,\n  ndcg_exp_gain = NULL\n)\n}\n\\arguments{\n\\item{objective}{(default=\\code{\"reg:squarederror\"})\nSpecify the learning task and the corresponding learning objective or a custom objective function to be used.\n\nFor custom objective, see \\href{https://xgboost.readthedocs.io/en/latest/tutorials/custom_metric_obj.html}{Custom Objective and Evaluation Metric}\nand \\href{https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html#custom-obj-metric}{Custom objective and metric} for more information,\nalong with the end note for function signatures.\n\nSupported values are:\n\\itemize{\n\\item \\code{\"reg:squarederror\"}: regression with squared loss.\n\\item \\code{\"reg:squaredlogerror\"}: regression with squared log loss \\eqn{\\frac{1}{2}[log(pred + 1) - log(label + 1)]^2}.  All input labels are required to be greater than -1.  Also, see metric \\code{rmsle} for possible issue  with this objective.\n\\item \\code{\"reg:logistic\"}: logistic regression, output probability\n\\item \\code{\"reg:pseudohubererror\"}: regression with Pseudo Huber loss, a twice differentiable alternative to absolute loss.\n\\item \\code{\"reg:absoluteerror\"}: Regression with L1 error. When tree model is used, leaf value is refreshed after tree construction. If used in distributed training, the leaf value is calculated as the mean value from all workers, which is not guaranteed to be optimal.\n\nVersion added: 1.7.0\n\\item \\code{\"reg:quantileerror\"}: Quantile loss, also known as \"pinball loss\". See later sections for its parameter and \\href{https://xgboost.readthedocs.io/en/latest/python/examples/quantile_regression.html#sphx-glr-python-examples-quantile-regression-py}{Quantile Regression} for a worked example.\n\nVersion added: 2.0.0\n\\item \\code{\"reg:expectileerror\"}: Expectile loss. See later sections for its parameter.\n\\item \\code{\"binary:logistic\"}: logistic regression for binary classification, output probability\n\\item \\code{\"binary:logitraw\"}: logistic regression for binary classification, output score before logistic transformation\n\\item \\code{\"binary:hinge\"}: hinge loss for binary classification. This makes predictions of 0 or 1, rather than producing probabilities.\n\\item \\code{\"count:poisson\"}: Poisson regression for count data, output mean of Poisson distribution.\n\\code{\"max_delta_step\"} is set to 0.7 by default in Poisson regression (used to safeguard optimization)\n\\item \\code{\"survival:cox\"}: Cox regression for right censored survival time data (negative values are considered right censored).\n\nNote that predictions are returned on the hazard ratio scale (i.e., as HR = exp(marginal_prediction) in the proportional hazard function \\code{h(t) = h0(t) * HR}).\n\\item \\code{\"survival:aft\"}: Accelerated failure time model for censored survival time data.\nSee \\href{https://xgboost.readthedocs.io/en/latest/tutorials/aft_survival_analysis.html}{Survival Analysis with Accelerated Failure Time} for details.\n\\item \\code{\"multi:softmax\"}: set XGBoost to do multiclass classification using the softmax objective, you also need to set num_class(number of classes)\n\\item \\code{\"multi:softprob\"}: same as softmax, but output a vector of \\code{ndata * nclass}, which can be further reshaped to \\code{ndata * nclass} matrix. The result contains predicted probability of each data point belonging to each class.\n\\item \\code{\"rank:ndcg\"}: Use LambdaMART to perform pair-wise ranking where the normalized discounted cumulative gain (NDCG) is maximized. This objective supports position debiasing for click data.\n\\item \\code{\"rank:map\"}: Use LambdaMART to perform pair-wise ranking where the mean average precision (MAP) is maximized\n\\item \\code{\"rank:pairwise\"}: Use LambdaRank to perform pair-wise ranking using the \\code{ranknet} objective.\n\\item \\code{\"reg:gamma\"}: gamma regression with log-link. Output is a mean of gamma distribution. It might be useful, e.g., for modeling insurance claims severity, or for any outcome that might be gamma-distributed.\n\\item \\code{\"reg:tweedie\"}: Tweedie regression with log-link. It might be useful, e.g., for modeling total loss in insurance, or for any outcome that might be tweedie-distributed.\n}}\n\n\\item{verbosity}{(default=1)\nVerbosity of printing messages. Valid values are 0 (silent), 1 (warning), 2 (info), 3\n(debug). Sometimes XGBoost tries to change configurations based on heuristics, which\nis displayed as warning message. If there's unexpected behaviour, please try to\nincrease value of verbosity.}\n\n\\item{nthread}{(default to maximum number of threads available if not set)\nNumber of parallel threads used to run XGBoost. When choosing it, please keep thread\ncontention and hyperthreading in mind.}\n\n\\item{seed}{Random number seed. If not specified, will take a random seed through R's own RNG engine.}\n\n\\item{booster}{(default= \\code{\"gbtree\"})\nWhich booster to use. Can be \\code{\"gbtree\"}, \\code{\"gblinear\"} or \\code{\"dart\"}; \\code{\"gbtree\"} and \\code{\"dart\"} use tree based models while \\code{\"gblinear\"} uses linear functions.}\n\n\\item{eta, learning_rate}{(two aliases for the same parameter)\nStep size shrinkage used in update to prevent overfitting. After each boosting step, we can directly get the weights of new features, and \\code{eta} shrinks the feature weights to make the boosting process more conservative.\n\\itemize{\n\\item range: \\eqn{[0,1]}\n\\item default value: 0.3 for tree-based boosters, 0.5 for linear booster.\n}\n\nNote: should only pass one of \\code{eta} or \\code{learning_rate}. Both refer to the same parameter and there's thus no difference between one or the other.}\n\n\\item{gamma, min_split_loss}{(two aliases for the same parameter) (for Tree Booster) (default=0, alias: \\code{gamma})\nMinimum loss reduction required to make a further partition on a leaf node of the tree. The larger \\code{min_split_loss} is, the more conservative the algorithm will be. Note that a tree where no splits were made might still contain a single terminal node with a non-zero score.\n\nrange: \\eqn{[0, \\infty)}\n\nNote: should only pass one of \\code{gamma} or \\code{min_split_loss}. Both refer to the same parameter and there's thus no difference between one or the other.}\n\n\\item{max_depth}{(for Tree Booster) (default=6, type=int32)\nMaximum depth of a tree. Increasing this value will make the model more complex and more likely to overfit. 0 indicates no limit on depth. Beware that XGBoost aggressively consumes memory when training a deep tree. \\code{\"exact\"} tree method requires non-zero value.\n\nrange: \\eqn{[0, \\infty)}}\n\n\\item{min_child_weight}{(for Tree Booster) (default=1)\nMinimum sum of instance weight (hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than \\code{min_child_weight}, then the building process will give up further partitioning. In linear regression task, this simply corresponds to minimum number of instances needed to be in each node. The larger \\code{min_child_weight} is, the more conservative the algorithm will be.\n\nrange: \\eqn{[0, \\infty)}}\n\n\\item{max_delta_step}{(for Tree Booster) (default=0)\nMaximum delta step we allow each leaf output to be. If the value is set to 0, it means there is no constraint. If it is set to a positive value, it can help making the update step more conservative. Usually this parameter is not needed, but it might help in logistic regression when class is extremely imbalanced. Set it to value of 1-10 might help control the update.\n\nrange: \\eqn{[0, \\infty)}}\n\n\\item{subsample}{(for Tree Booster) (default=1)\nSubsample ratio of the training instances. Setting it to 0.5 means that XGBoost would randomly sample half of the training data prior to growing trees. and this will prevent overfitting. Subsampling will occur once in every boosting iteration.\n\nrange: \\eqn{(0,1]}}\n\n\\item{sampling_method}{(for Tree Booster) (default= \\code{\"uniform\"})\nThe method to use to sample the training instances.\n\\itemize{\n\\item \\code{\"uniform\"}: each training instance has an equal probability of being selected. Typically set\n\\code{\"subsample\"} >= 0.5 for good results.\n\\item \\code{\"gradient_based\"}: the selection probability for each training instance is proportional to the\n\\bold{regularized absolute value} of gradients (more specifically, \\eqn{\\sqrt{g^2+\\lambda h^2}}).\n\\code{\"subsample\"} may be set to as low as 0.1 without loss of model accuracy. Note that this\nsampling method is only supported when \\code{\"tree_method\"} is set to \\code{\"hist\"}; other tree\nmethods only support \\code{\"uniform\"} sampling.\n}}\n\n\\item{colsample_bytree, colsample_bylevel, colsample_bynode}{(for Tree Booster) (default=1)\nThis is a family of parameters for subsampling of columns.\n\\itemize{\n\\item All \\code{\"colsample_by*\"} parameters have a range of \\eqn{(0, 1]}, the default value of 1, and specify the fraction of columns to be subsampled.\n\\item \\code{\"colsample_bytree\"} is the subsample ratio of columns when constructing each tree. Subsampling occurs once for every tree constructed.\n\\item \\code{\"colsample_bylevel\"} is the subsample ratio of columns for each level. Subsampling occurs once for every new depth level reached in a tree. Columns are subsampled from the set of columns chosen for the current tree.\n\\item \\code{\"colsample_bynode\"} is the subsample ratio of columns for each node (split). Subsampling occurs once every time a new split is evaluated. Columns are subsampled from the set of columns chosen for the current level. This is not supported by the exact tree method.\n\\item \\code{\"colsample_by*\"} parameters work cumulatively. For instance,\nthe combination \\verb{\\{'colsample_bytree'=0.5, 'colsample_bylevel'=0.5, 'colsample_bynode'=0.5\\}} with 64 features will leave 8 features to choose from at\neach split.\n}\n\nOne can set the \\code{\"feature_weights\"} for DMatrix to\ndefine the probability of each feature being selected when using column sampling.}\n\n\\item{lambda, reg_lambda}{(two aliases for the same parameter)\n\\itemize{\n\\item For tree-based boosters:\n\\itemize{\n\\item L2 regularization term on weights. Increasing this value will make model more conservative.\n\\item default: 1\n\\item range: \\eqn{[0, \\infty]}\n}\n\\item For linear booster:\n\\itemize{\n\\item L2 regularization term on weights. Increasing this value will make model more conservative. Normalised to number of training examples.\n\\item default: 0\n\\item range: \\eqn{[0, \\infty)}\n}\n}\n\nNote: should only pass one of \\code{lambda} or \\code{reg_lambda}. Both refer to the same parameter and there's thus no difference between one or the other.}\n\n\\item{alpha, reg_alpha}{(two aliases for the same parameter)\n\\itemize{\n\\item L1 regularization term on weights. Increasing this value will make model more conservative.\n\\item For the linear booster, it's normalised to number of training examples.\n\\item default: 0\n\\item range: \\eqn{[0, \\infty)}\n}\n\nNote: should only pass one of \\code{alpha} or \\code{reg_alpha}. Both refer to the same parameter and there's thus no difference between one or the other.}\n\n\\item{tree_method}{(for Tree Booster) (default= \\code{\"auto\"})\nThe tree construction algorithm used in XGBoost. See description in the \\href{https://arxiv.org/abs/1603.02754}{reference paper} and \\href{https://xgboost.readthedocs.io/en/latest/treemethod.html}{Tree Methods}.\n\nChoices: \\code{\"auto\"}, \\code{\"exact\"}, \\code{\"approx\"}, \\code{\"hist\"}, this is a combination of commonly\nused updaters.  For other updaters like \\code{\"refresh\"}, set the parameter \\code{updater}\ndirectly.\n\\itemize{\n\\item \\code{\"auto\"}: Same as the \\code{\"hist\"} tree method.\n\\item \\code{\"exact\"}: Exact greedy algorithm.  Enumerates all split candidates.\n\\item \\code{\"approx\"}: Approximate greedy algorithm using quantile sketch and gradient histogram.\n\\item \\code{\"hist\"}: Faster histogram optimized approximate greedy algorithm.\n}}\n\n\\item{scale_pos_weight}{(for Tree Booster) (default=1)\nControl the balance of positive and negative weights, useful for unbalanced classes. A typical value to consider: \\verb{sum(negative instances) / sum(positive instances)}. See \\href{https://xgboost.readthedocs.io/en/latest/tutorials/param_tuning.html}{Parameters Tuning} for more discussion. Also, see Higgs Kaggle competition demo for examples: \\href{https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-train.R}{R}, \\href{https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-numpy.py}{py1}, \\href{https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-cv.py}{py2}, \\href{https://github.com/dmlc/xgboost/blob/master/demo/guide-python/cross_validation.py}{py3}.}\n\n\\item{updater}{Has different meanings depending on the type of booster.\n\\itemize{\n\\item For tree-based boosters:\nA comma separated string defining the sequence of tree updaters to run, providing a modular way to construct and to modify the trees. This is an advanced parameter that is usually set automatically, depending on some other parameters. However, it could be also set explicitly by a user. The following updaters exist:\n\\itemize{\n\\item \\code{\"grow_colmaker\"}: non-distributed column-based construction of trees.\n\\item \\code{\"grow_histmaker\"}: distributed tree construction with row-based data splitting based on global proposal of histogram counting.\n\\item \\code{\"grow_quantile_histmaker\"}: Grow tree using quantized histogram.\n\\item \\code{\"grow_gpu_hist\"}:  Enabled when \\code{tree_method} is set to \\code{\"hist\"} along with \\code{device=\"cuda\"}.\n\\item \\code{\"grow_gpu_approx\"}: Enabled when \\code{tree_method} is set to \\code{\"approx\"} along with \\code{device=\"cuda\"}.\n\\item \\code{\"sync\"}: synchronizes trees in all distributed nodes.\n\\item \\code{\"refresh\"}: refreshes tree's statistics and/or leaf values based on the current data. Note that no random subsampling of data rows is performed.\n\\item \\code{\"prune\"}: prunes the splits where loss < \\code{min_split_loss} (or \\code{gamma}) and nodes that have depth greater than \\code{max_depth}.\n}\n\\item For \\code{booster=\"gblinear\"}:\n(default= \\code{\"shotgun\"}) Choice of algorithm to fit linear model\n\\itemize{\n\\item \\code{\"shotgun\"}: Parallel coordinate descent algorithm based on shotgun algorithm. Uses 'hogwild' parallelism and therefore produces a nondeterministic solution on each run.\n\\item \\code{\"coord_descent\"}: Ordinary coordinate descent algorithm. Also multithreaded but still produces a deterministic solution. When the \\code{device} parameter is set to \\code{\"cuda\"} or \\code{\"gpu\"}, a GPU variant would be used.\n}\n}}\n\n\\item{refresh_leaf}{(for Tree Booster) (default=1)\nThis is a parameter of the \\code{\"refresh\"} updater. When this flag is 1, tree leafs as well as tree nodes' stats are updated. When it is 0, only node stats are updated.}\n\n\\item{grow_policy}{(for Tree Booster) (default= \\code{\"depthwise\"})\n\\itemize{\n\\item Controls a way new nodes are added to the tree.\n\\item Currently supported only if \\code{tree_method} is set to \\code{\"hist\"} or \\code{\"approx\"}.\n\\item Choices: \\code{\"depthwise\"}, \\code{\"lossguide\"}\n\\itemize{\n\\item \\code{\"depthwise\"}: split at nodes closest to the root.\n\\item \\code{\"lossguide\"}: split at nodes with highest loss change.\n}\n}}\n\n\\item{max_leaves}{(for Tree Booster) (default=0, type=int32)\nMaximum number of nodes to be added.  Not used by \\code{\"exact\"} tree method.}\n\n\\item{max_bin}{(for Tree Booster) (default=256, type=int32)\n\\itemize{\n\\item Only used if \\code{tree_method} is set to \\code{\"hist\"} or \\code{\"approx\"}.\n\\item Maximum number of discrete bins to bucket continuous features.\n\\item Increasing this number improves the optimality of splits at the cost of higher computation time.\n}}\n\n\\item{num_parallel_tree}{(for Tree Booster) (default=1)\nNumber of parallel trees constructed during each iteration. This option is used to support boosted random forest.}\n\n\\item{monotone_constraints}{(for Tree Booster)\nConstraint of variable monotonicity. See \\href{https://xgboost.readthedocs.io/en/latest/tutorials/monotonic.html}{Monotonic Constraints} for more information.}\n\n\\item{interaction_constraints}{(for Tree Booster)\nConstraints for interaction representing permitted interactions. The constraints must\nbe specified in the form of a nest list, e.g. \\code{list(c(0, 1), c(2, 3, 4))}, where each inner\nlist is a group of indices of features (base-0 numeration) that are allowed to interact with each other.\nSee \\href{https://xgboost.readthedocs.io/en/latest/tutorials/feature_interaction_constraint.html}{Feature Interaction Constraints} for more information.}\n\n\\item{multi_strategy}{(for Tree Booster) (default = \\code{\"one_output_per_tree\"})\nThe strategy used for training multi-target models, including multi-target regression\nand multi-class classification. See \\href{https://xgboost.readthedocs.io/en/latest/tutorials/multioutput.html}{Multiple Outputs} for more information.\n\\itemize{\n\\item \\code{\"one_output_per_tree\"}: One model for each target.\n\\item \\code{\"multi_output_tree\"}:  Use multi-target trees.\n}\n\nVersion added: 2.0.0\n\nNote: This parameter is working-in-progress.}\n\n\\item{base_score}{\\itemize{\n\\item The initial prediction score of all instances, global bias\n\\item The parameter is automatically estimated for selected objectives before training. To\ndisable the estimation, specify a real number argument.\n\\item If \\code{base_margin} is supplied, \\code{base_score} will not be added.\n\\item For sufficient number of iterations, changing this value will not have too much effect.\n}}\n\n\\item{eval_metric}{(default according to objective)\n\\itemize{\n\\item Evaluation metrics for validation data, a default metric will be assigned according to objective (rmse for regression, and logloss for classification, \\verb{mean average precision} for \\code{rank:map}, etc.)\n\\item User can add multiple evaluation metrics.\n\\item The choices are listed below:\n\\itemize{\n\\item \\code{\"rmse\"}: root mean square error\n\\item \\code{\"rmsle\"}: root mean square log error: \\eqn{\\sqrt{\\frac{1}{N}[log(pred + 1) - log(label + 1)]^2}}. Default metric of \\code{\"reg:squaredlogerror\"} objective. This metric reduces errors generated by outliers in dataset.  But because \\code{log} function is employed, \\code{\"rmsle\"} might output \\code{nan} when prediction value is less than -1.  See \\code{\"reg:squaredlogerror\"} for other requirements.\n\\item \\code{\"mae\"}: mean absolute error.\n\\item \\code{\"mape\"}: mean absolute percentage error.\n\\item \\code{\"mphe\"}: mean Pseudo Huber error. Default metric of \\code{\"reg:pseudohubererror\"} objective.\n\\item \\code{\"logloss\"}: negative log-likelihood.\n\\item \\code{\"error\"}: Binary classification error rate. It is calculated as \\verb{#(wrong cases)/#(all cases)}. For the predictions, the evaluation will regard the instances with prediction value larger than 0.5 as positive instances, and the others as negative instances.\n\\item \\code{\"error@t\"}: a different than 0.5 binary classification threshold value could be specified by providing a numerical value through 't'.\n\\item \\code{\"merror\"}: Multiclass classification error rate. It is calculated as \\verb{#(wrong cases)/#(all cases)}.\n\\item \\code{\"mlogloss\"}: \\href{https://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html}{Multiclass logloss}.\n\\item \\code{\"auc\"}: area under the receiver-operating characteristic curve.\nAvailable for classification and learning-to-rank tasks.\n\\itemize{\n\\item When used with binary classification, the objective should be \\code{\"binary:logistic\"} or similar functions that work on probability.\n\\item When used with multi-class classification, objective should be \\code{\"multi:softprob\"} instead of \\code{\"multi:softmax\"}, as the latter doesn't output probability.  Also the AUC is calculated by 1-vs-rest with reference class weighted by class prevalence.\n\\item When used with LTR task, the AUC is computed by comparing pairs of documents to count correctly sorted pairs.  This corresponds to pairwise learning to rank.  The implementation has some issues with average AUC around groups and distributed workers not being well-defined.\n\\item On a single machine the AUC calculation is exact. In a distributed environment the AUC is a weighted average over the AUC of training rows on each node - therefore, distributed AUC is an approximation sensitive to the distribution of data across workers. Use another metric in distributed environments if precision and reproducibility are important.\n\\item When input dataset contains only negative or positive samples, the output is \\code{NaN}.  The behavior is implementation defined, for instance, \\code{scikit-learn} returns \\eqn{0.5} instead.\n}\n\\item \\code{\"aucpr\"}: area under the PR curve\nAvailable for classification and learning-to-rank tasks.\n\nAfter XGBoost 1.6, both of the requirements and restrictions for using \\code{\"aucpr\"} in classification problem are similar to \\code{\"auc\"}.  For ranking task, only binary relevance label \\eqn{y \\in [0, 1]} is supported.  Different from \\code{\"map\"} (mean average precision), \\code{\"aucpr\"} calculates the \\emph{interpolated} area under precision recall curve using continuous interpolation.\n\\item \\code{\"pre\"}: Precision at \\eqn{k}. Supports only learning to rank task.\n\\item \\code{\"ndcg\"}: normalized discounted cumulative gain\n\\item \\code{\"map\"}: mean average precision\n\nThe \\verb{average precision} is defined as:\n\n\\eqn{AP@l = \\frac{1}{min{(l, N)}}\\sum^l_{k=1}P@k \\cdot I_{(k)}}\n\nwhere \\eqn{I_{(k)}} is an indicator function that equals to \\eqn{1} when the document at \\eqn{k} is relevant and \\eqn{0} otherwise. The \\eqn{P@k} is the precision at \\eqn{k}, and \\eqn{N} is the total number of relevant documents. Lastly, the \\verb{mean average precision} is defined as the weighted average across all queries.\n\\item \\code{\"ndcg@n\"}, \\code{\"map@n\"}, \\code{\"pre@n\"}: \\eqn{n} can be assigned as an integer to cut off the top positions in the lists for evaluation.\n\\item \\code{\"ndcg-\"}, \\code{\"map-\"}, \\code{\"ndcg@n-\"}, \\code{\"map@n-\"}: In XGBoost, the NDCG and MAP evaluate the score of a list without any positive samples as \\eqn{1}. By appending \"-\" to the evaluation metric name, we can ask XGBoost to evaluate these scores as \\eqn{0} to be consistent under some conditions.\n\\item \\code{\"poisson-nloglik\"}: negative log-likelihood for Poisson regression\n\\item \\code{\"gamma-nloglik\"}: negative log-likelihood for gamma regression\n\\item \\code{\"cox-nloglik\"}: negative partial log-likelihood for Cox proportional hazards regression\n\\item \\code{\"gamma-deviance\"}: residual deviance for gamma regression\n\\item \\code{\"tweedie-nloglik\"}: negative log-likelihood for Tweedie regression (at a specified value of the \\code{tweedie_variance_power} parameter)\n\\item \\code{\"aft-nloglik\"}: Negative log likelihood of Accelerated Failure Time model.\nSee \\href{https://xgboost.readthedocs.io/en/latest/tutorials/aft_survival_analysis.html}{Survival Analysis with Accelerated Failure Time} for details.\n\\item \\code{\"interval-regression-accuracy\"}: Fraction of data points whose predicted labels fall in the interval-censored labels.\nOnly applicable for interval-censored data.  See \\href{https://xgboost.readthedocs.io/en/latest/tutorials/aft_survival_analysis.html}{Survival Analysis with Accelerated Failure Time} for details.\n}\n}}\n\n\\item{seed_per_iteration}{(default= \\code{FALSE})\nSeed PRNG determnisticly via iterator number.}\n\n\\item{device}{(default= \\code{\"cpu\"})\nDevice for XGBoost to run. User can set it to one of the following values:\n\\itemize{\n\\item \\code{\"cpu\"}: Use CPU.\n\\item \\code{\"cuda\"}: Use a GPU (CUDA device).\n\\item \\code{\"cuda:<ordinal>\"}: \\verb{<ordinal>} is an integer that specifies the ordinal of the GPU (which GPU do you want to use if you have more than one devices).\n\\item \\code{\"gpu\"}: Default GPU device selection from the list of available and supported devices. Only \\code{\"cuda\"} devices are supported currently.\n\\item \\code{\"gpu:<ordinal>\"}: Default GPU device selection from the list of available and supported devices. Only \\code{\"cuda\"} devices are supported currently.\n}\n\nFor more information about GPU acceleration, see \\href{https://xgboost.readthedocs.io/en/latest/gpu/index.html}{XGBoost GPU Support}. In distributed environments, ordinal selection is handled by distributed frameworks instead of XGBoost. As a result, using \\code{\"cuda:<ordinal>\"} will result in an error. Use \\code{\"cuda\"} instead.\n\nVersion added: 2.0.0\n\nNote: if XGBoost was installed from CRAN, it won't have GPU support enabled, thus only \\code{\"cpu\"} will be available.\nTo get GPU support, the R package for XGBoost must be installed from source or from the GitHub releases - see\n\\href{https://xgboost.readthedocs.io/en/latest/install.html#r}{instructions}.}\n\n\\item{disable_default_eval_metric}{(default= \\code{FALSE})\nFlag to disable default metric. Set to 1 or \\code{TRUE} to disable.}\n\n\\item{use_rmm}{Whether to use RAPIDS Memory Manager (RMM) to allocate cache GPU\nmemory. The primary memory is always allocated on the RMM pool when XGBoost is built\n(compiled) with the RMM plugin enabled. Valid values are \\code{TRUE} and \\code{FALSE}. See\n\\href{https://xgboost.readthedocs.io/en/latest/python/rmm-examples/index.html}{Using XGBoost with RAPIDS Memory Manager (RMM) plugin} for details.}\n\n\\item{max_cached_hist_node}{(for Non-Exact Tree Methods) (default = 65536)\nMaximum number of cached nodes for histogram. This can be used with the \\code{\"hist\"} and the\n\\code{\"approx\"} tree methods.\n\nVersion added: 2.0.0\n\\itemize{\n\\item For most of the cases this parameter should not be set except for growing deep\ntrees. After 3.0, this parameter affects GPU algorithms as well.\n}}\n\n\\item{max_cat_to_onehot}{(for Non-Exact Tree Methods)\nA threshold for deciding whether XGBoost should use one-hot encoding based split for\ncategorical data.  When number of categories is lesser than the threshold then one-hot\nencoding is chosen, otherwise the categories will be partitioned into children nodes.\n\nVersion added: 1.6.0}\n\n\\item{max_cat_threshold}{(for Non-Exact Tree Methods)\nMaximum number of categories considered for each split. Used only by partition-based\nsplits for preventing over-fitting.\n\nVersion added: 1.7.0}\n\n\\item{sample_type}{(for Dart Booster) (default= \\code{\"uniform\"})\nType of sampling algorithm.\n\\itemize{\n\\item \\code{\"uniform\"}: dropped trees are selected uniformly.\n\\item \\code{\"weighted\"}: dropped trees are selected in proportion to weight.\n}}\n\n\\item{normalize_type}{(for Dart Booster) (default= \\code{\"tree\"})\nType of normalization algorithm.\n\\itemize{\n\\item \\code{\"tree\"}: new trees have the same weight of each of dropped trees.\n\\itemize{\n\\item Weight of new trees are \\code{1 / (k + learning_rate)}.\n\\item Dropped trees are scaled by a factor of \\code{k / (k + learning_rate)}.\n}\n\\item \\code{\"forest\"}: new trees have the same weight of sum of dropped trees (forest).\n\\itemize{\n\\item Weight of new trees are \\code{1 / (1 + learning_rate)}.\n\\item Dropped trees are scaled by a factor of \\code{1 / (1 + learning_rate)}.\n}\n}}\n\n\\item{rate_drop}{(for Dart Booster) (default=0.0)\nDropout rate (a fraction of previous trees to drop during the dropout).\n\nrange: \\eqn{[0.0, 1.0]}}\n\n\\item{one_drop}{(for Dart Booster) (default=0)\nWhen this flag is enabled, at least one tree is always dropped during the dropout (allows Binomial-plus-one or epsilon-dropout from the original DART paper).}\n\n\\item{skip_drop}{(for Dart Booster) (default=0.0)\nProbability of skipping the dropout procedure during a boosting iteration.\n\\itemize{\n\\item If a dropout is skipped, new trees are added in the same manner as \\code{\"gbtree\"}.\n\\item Note that non-zero \\code{skip_drop} has higher priority than \\code{rate_drop} or \\code{one_drop}.\n}\n\nrange: \\eqn{[0.0, 1.0]}}\n\n\\item{feature_selector}{(for Linear Booster) (default= \\code{\"cyclic\"})\nFeature selection and ordering method\n\\itemize{\n\\item \\code{\"cyclic\"}: Deterministic selection by cycling through features one at a time.\n\\item \\code{\"shuffle\"}: Similar to \\code{\"cyclic\"} but with random feature shuffling prior to each update.\n\\item \\code{\"random\"}: A random (with replacement) coordinate selector.\n\\item \\code{\"greedy\"}: Select coordinate with the greatest gradient magnitude.  It has \\code{O(num_feature^2)} complexity. It is fully deterministic. It allows restricting the selection to \\code{top_k} features per group with the largest magnitude of univariate weight change, by setting the \\code{top_k} parameter. Doing so would reduce the complexity to \\code{O(num_feature*top_k)}.\n\\item \\code{\"thrifty\"}: Thrifty, approximately-greedy feature selector. Prior to cyclic updates, reorders features in descending magnitude of their univariate weight changes. This operation is multithreaded and is a linear complexity approximation of the quadratic greedy selection. It allows restricting the selection to \\code{top_k} features per group with the largest magnitude of univariate weight change, by setting the \\code{top_k} parameter.\n}}\n\n\\item{top_k}{(for Linear Booster) (default=0)\nThe number of top features to select in \\code{greedy} and \\code{thrifty} feature selector. The value of 0 means using all the features.}\n\n\\item{num_class}{Number of classes when using multi-class classification objectives (e.g. \\code{objective=\"multi:softprob\"})}\n\n\\item{tweedie_variance_power}{(for Tweedie Regression (\\code{\"objective=reg:tweedie\"})) (default=1.5)\n\\itemize{\n\\item Parameter that controls the variance of the Tweedie distribution \\code{var(y) ~ E(y)^tweedie_variance_power}\n\\item range: \\eqn{(1,2)}\n\\item Set closer to 2 to shift towards a gamma distribution\n\\item Set closer to 1 to shift towards a Poisson distribution.\n}}\n\n\\item{huber_slope}{(for using Pseudo-Huber (\\verb{\"reg:pseudohubererror}\")) (default = 1.0)\nA parameter used for Pseudo-Huber loss to define the \\eqn{\\delta} term.}\n\n\\item{quantile_alpha}{(for using Quantile Loss (\\code{\"reg:quantileerror\"}))\nA scalar or a list of targeted quantiles (passed as a numeric vector).\n\nVersion added: 2.0.0}\n\n\\item{expectile_alpha}{(for using Expectile Loss (\\code{\"reg:expectileerror\"}))\nA scalar or a list of targeted expectiles (passed as a numeric vector).}\n\n\\item{aft_loss_distribution}{(when using AFT Survival Loss (\\code{\"survival:aft\"}) and Negative Log Likelihood of AFT metric (\\code{\"aft-nloglik\"}))\nProbability Density Function, \\code{\"normal\"}, \\code{\"logistic\"}, or \\code{\"extreme\"}.}\n\n\\item{aft_loss_distribution_scale}{(when using AFT Survival Loss (\\code{\"survival:aft\"}) and Negative Log Likelihood of AFT metric (\\code{\"aft-nloglik\"}))\nScaling factor for the AFT distribution. Range: \\eqn{(0, \\infty)}.}\n\n\\item{lambdarank_pair_method}{(for learning to rank (\\code{\"rank:ndcg\"}, \\code{\"rank:map\"}, \\code{\"rank:pairwise\"})) (default = \\code{\"topk\"})\nHow to construct pairs for pair-wise learning.\n\\itemize{\n\\item \\code{\"mean\"}: Sample \\code{lambdarank_num_pair_per_sample} pairs for each document in the query list.\n\\item \\code{\"topk\"}: Focus on top-\\code{lambdarank_num_pair_per_sample} documents. Construct \\eqn{|query|} pairs for each document at the top-\\code{lambdarank_num_pair_per_sample} ranked by the model.\n}}\n\n\\item{lambdarank_num_pair_per_sample}{(for learning to rank (\\code{\"rank:ndcg\"}, \\code{\"rank:map\"}, \\code{\"rank:pairwise\"}))\nIt specifies the number of pairs sampled for each document when pair method is \\code{\"mean\"}, or the truncation level for queries when the pair method is \\code{\"topk\"}. For example, to train with \\verb{ndcg@6}, set \\code{\"lambdarank_num_pair_per_sample\"} to \\eqn{6} and \\code{lambdarank_pair_method} to \\code{\"topk\"}.\n\nrange = \\eqn{[1, \\infty)}}\n\n\\item{lambdarank_normalization}{(for learning to rank (\\code{\"rank:ndcg\"}, \\code{\"rank:map\"}, \\code{\"rank:pairwise\"})) (default = \\code{TRUE})\nWhether to normalize the leaf value by lambda gradient. This can sometimes stagnate the training progress.\n\nVersion added: 2.1.0}\n\n\\item{lambdarank_score_normalization}{Whether to normalize the delta metric by the difference of prediction scores. This can\nsometimes stagnate the training progress. With pairwise ranking, we can normalize the\ngradient using the difference between two samples in each pair to reduce influence from\nthe pairs that have large difference in ranking scores. This can help us regularize the\nmodel to reduce bias and prevent overfitting. Similar to other regularization\ntechniques, this might prevent training from converging.\n\nThere was no normalization before 2.0. In 2.0 and later versions this is used by\ndefault. In 3.0, we made this an option that users can disable.\n\nVersion added: 3.0.0}\n\n\\item{lambdarank_unbiased}{(for learning to rank (\\code{\"rank:ndcg\"}, \\code{\"rank:map\"}, \\code{\"rank:pairwise\"})) (default = \\code{FALSE})\nSpecify whether do we need to debias input click data.}\n\n\\item{lambdarank_bias_norm}{(for learning to rank (\\code{\"rank:ndcg\"}, \\code{\"rank:map\"}, \\code{\"rank:pairwise\"})) (default = 2.0)\n\\eqn{L_p} normalization for position debiasing, default is \\eqn{L_2}. Only relevant when \\code{lambdarank_unbiased} is set to \\code{TRUE}.}\n\n\\item{ndcg_exp_gain}{(for learning to rank (\\code{\"rank:ndcg\"}, \\code{\"rank:map\"}, \\code{\"rank:pairwise\"})) (default = \\code{TRUE})\nWhether we should use exponential gain function for \\code{NDCG}. There are two forms of gain function for \\code{NDCG}, one is using relevance value directly while the other is using\\eqn{2^{rel} - 1} to emphasize on retrieving relevant documents. When \\code{ndcg_exp_gain} is \\code{TRUE} (the default), relevance degree cannot be greater than 31.}\n}\n\\value{\nA list with the entries that were passed non-NULL values. It is intended to\nbe passed as argument \\code{params} to \\code{\\link[=xgb.train]{xgb.train()}} or \\code{\\link[=xgb.cv]{xgb.cv()}}.\n}\n\\description{\nConvenience function to generate a list of named XGBoost parameters, which\ncan be passed as argument \\code{params} to \\code{\\link[=xgb.train]{xgb.train()}}. See the \\href{https://xgboost.readthedocs.io/en/stable/parameter.html}{online documentation} for more details.\n\nThe purpose of this function is to enable IDE autocompletions and to provide in-package\ndocumentation for all the possible parameters that XGBoost accepts. The output from this\nfunction is just a regular R list containing the parameters that were set to non-default\nvalues. Note that this function will not perform any validation on the supplied arguments.\n\nIf passing \\code{NULL} for a given parameter (the default for all of them), then the default\nvalue for that parameter will be used. Default values are automatically determined by the\nXGBoost core library upon calls to \\code{\\link[=xgb.train]{xgb.train()}} or \\code{\\link[=xgb.cv]{xgb.cv()}}, and are subject to change\nover XGBoost library versions. Some of them might differ according to the\nbooster type (e.g. defaults for regularization are different for linear and tree-based boosters).\n}\n"
  },
  {
    "path": "R-package/man/xgb.plot.deepness.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.ggplot.R, R/xgb.plot.deepness.R\n\\name{xgb.ggplot.deepness}\n\\alias{xgb.ggplot.deepness}\n\\alias{xgb.plot.deepness}\n\\title{Plot model tree depth}\n\\usage{\nxgb.ggplot.deepness(\n  model = NULL,\n  which = c(\"2x1\", \"max.depth\", \"med.depth\", \"med.weight\")\n)\n\nxgb.plot.deepness(\n  model = NULL,\n  which = c(\"2x1\", \"max.depth\", \"med.depth\", \"med.weight\"),\n  plot = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{model}{Either an \\code{xgb.Booster} model, or the \"data.table\" returned\nby \\code{\\link[=xgb.model.dt.tree]{xgb.model.dt.tree()}}.}\n\n\\item{which}{Which distribution to plot (see details).}\n\n\\item{plot}{Should the plot be shown? Default is \\code{TRUE}.}\n\n\\item{...}{Other parameters passed to \\code{\\link[graphics:barplot]{graphics::barplot()}} or \\code{\\link[graphics:plot.default]{graphics::plot()}}.}\n}\n\\value{\nThe return value of the two functions is as follows:\n\\itemize{\n\\item \\code{xgb.plot.deepness()}: A \"data.table\" (invisibly).\nEach row corresponds to a terminal leaf in the model. It contains its information\nabout depth, cover, and weight (used in calculating predictions).\nIf \\code{plot = TRUE}, also a plot is shown.\n\\item \\code{xgb.ggplot.deepness()}: When \\code{which = \"2x1\"}, a list of two \"ggplot\" objects,\nand a single \"ggplot\" object otherwise.\n}\n}\n\\description{\nVisualizes distributions related to the depth of tree leaves.\n\\itemize{\n\\item \\code{xgb.plot.deepness()} uses base R graphics, while\n\\item \\code{xgb.ggplot.deepness()} uses \"ggplot2\".\n}\n}\n\\details{\nWhen \\code{which = \"2x1\"}, two distributions with respect to the leaf depth\nare plotted on top of each other:\n\\enumerate{\n\\item The distribution of the number of leaves in a tree model at a certain depth.\n\\item The distribution of the average weighted number of observations (\"cover\")\nending up in leaves at a certain depth.\n}\n\nThose could be helpful in determining sensible ranges of the \\code{max_depth}\nand \\code{min_child_weight} parameters.\n\nWhen \\code{which = \"max.depth\"} or \\code{which = \"med.depth\"}, plots of either maximum or\nmedian depth per tree with respect to the tree number are created.\n\nFinally, \\code{which = \"med.weight\"} allows to see how\na tree's median absolute leaf weight changes through the iterations.\n\nThese functions have been inspired by the blog post\n\\url{https://github.com/aysent/random-forest-leaf-visualization}.\n}\n\\examples{\n\ndata(agaricus.train, package = \"xgboost\")\n## Keep the number of threads to 2 for examples\nnthread <- 2\ndata.table::setDTthreads(nthread)\n\n## Change max_depth to a higher number to get a more significant result\nmodel <- xgboost(\n  agaricus.train$data, factor(agaricus.train$label),\n  nrounds = 50,\n  max_depth = 6,\n  nthreads = nthread,\n  subsample = 0.5,\n  min_child_weight = 2\n)\n\nxgb.plot.deepness(model)\nxgb.ggplot.deepness(model)\n\nxgb.plot.deepness(\n  model, which = \"max.depth\", pch = 16, col = rgb(0, 0, 1, 0.3), cex = 2\n)\n\nxgb.plot.deepness(\n  model, which = \"med.weight\", pch = 16, col = rgb(0, 0, 1, 0.3), cex = 2\n)\n\n}\n\\seealso{\n\\code{\\link[=xgb.train]{xgb.train()}} and \\code{\\link[=xgb.model.dt.tree]{xgb.model.dt.tree()}}.\n}\n"
  },
  {
    "path": "R-package/man/xgb.plot.importance.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.ggplot.R, R/xgb.plot.importance.R\n\\name{xgb.ggplot.importance}\n\\alias{xgb.ggplot.importance}\n\\alias{xgb.plot.importance}\n\\title{Plot feature importance}\n\\usage{\nxgb.ggplot.importance(\n  importance_matrix = NULL,\n  top_n = NULL,\n  measure = NULL,\n  rel_to_first = FALSE,\n  n_clusters = seq_len(10),\n  ...\n)\n\nxgb.plot.importance(\n  importance_matrix = NULL,\n  top_n = NULL,\n  measure = NULL,\n  rel_to_first = FALSE,\n  left_margin = 10,\n  cex = NULL,\n  plot = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{importance_matrix}{A \\code{data.table} as returned by \\code{\\link[=xgb.importance]{xgb.importance()}}.}\n\n\\item{top_n}{Maximal number of top features to include into the plot.}\n\n\\item{measure}{The name of importance measure to plot.\nWhen \\code{NULL}, 'Gain' would be used for trees and 'Weight' would be used for gblinear.}\n\n\\item{rel_to_first}{Whether importance values should be represented as relative to\nthe highest ranked feature, see Details.}\n\n\\item{n_clusters}{A numeric vector containing the min and the max range\nof the possible number of clusters of bars.}\n\n\\item{...}{Other parameters passed to \\code{\\link[graphics:barplot]{graphics::barplot()}}\n(except \\code{horiz}, \\code{border}, \\code{cex.names}, \\code{names.arg}, and \\code{las}).\nOnly used in \\code{xgb.plot.importance()}.}\n\n\\item{left_margin}{Adjust the left margin size to fit feature names.\nWhen \\code{NULL}, the existing \\code{par(\"mar\")} is used.}\n\n\\item{cex}{Passed as \\code{cex.names} parameter to \\code{\\link[graphics:barplot]{graphics::barplot()}}.}\n\n\\item{plot}{Should the barplot be shown? Default is \\code{TRUE}.}\n}\n\\value{\nThe return value depends on the function:\n\\itemize{\n\\item \\code{xgb.plot.importance()}: Invisibly, a \"data.table\" with \\code{n_top} features sorted\nby importance. If \\code{plot = TRUE}, the values are also plotted as barplot.\n\\item \\code{xgb.ggplot.importance()}: A customizable \"ggplot\" object.\nE.g., to change the title, set \\code{+ ggtitle(\"A GRAPH NAME\")}.\n}\n}\n\\description{\nRepresents previously calculated feature importance as a bar graph.\n\\itemize{\n\\item \\code{xgb.plot.importance()} uses base R graphics, while\n\\item \\code{xgb.ggplot.importance()} uses \"ggplot\".\n}\n}\n\\details{\nThe graph represents each feature as a horizontal bar of length proportional to the\nimportance of a feature. Features are sorted by decreasing importance.\nIt works for both \"gblinear\" and \"gbtree\" models.\n\nWhen \\code{rel_to_first = FALSE}, the values would be plotted as in \\code{importance_matrix}.\nFor a \"gbtree\" model, that would mean being normalized to the total of 1\n(\"what is feature's importance contribution relative to the whole model?\").\nFor linear models, \\code{rel_to_first = FALSE} would show actual values of the coefficients.\nSetting \\code{rel_to_first = TRUE} allows to see the picture from the perspective of\n\"what is feature's importance contribution relative to the most important feature?\"\n\nThe \"ggplot\" backend performs 1-D clustering of the importance values,\nwith bar colors corresponding to different clusters having similar importance values.\n}\n\\examples{\ndata(agaricus.train)\n\n## Keep the number of threads to 2 for examples\nnthread <- 2\ndata.table::setDTthreads(nthread)\n\nmodel <- xgboost(\n  agaricus.train$data, factor(agaricus.train$label),\n  nrounds = 2,\n  max_depth = 3,\n  nthreads = nthread\n)\n\nimportance_matrix <- xgb.importance(model)\nxgb.plot.importance(\n  importance_matrix, rel_to_first = TRUE, xlab = \"Relative importance\"\n)\n\ngg <- xgb.ggplot.importance(\n  importance_matrix, measure = \"Frequency\", rel_to_first = TRUE\n)\ngg\ngg + ggplot2::ylab(\"Frequency\")\n\n}\n\\seealso{\n\\code{\\link[graphics:barplot]{graphics::barplot()}}\n}\n"
  },
  {
    "path": "R-package/man/xgb.plot.multi.trees.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.plot.multi.trees.R\n\\name{xgb.plot.multi.trees}\n\\alias{xgb.plot.multi.trees}\n\\title{Project all trees on one tree}\n\\usage{\nxgb.plot.multi.trees(\n  model,\n  features_keep = 5,\n  plot_width = NULL,\n  plot_height = NULL,\n  render = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{model}{Object of class \\code{xgb.Booster}. If it contains feature names\n(they can be set through \\code{\\link[=setinfo]{setinfo()}}, they will be used in the\noutput from this function.}\n\n\\item{features_keep}{Number of features to keep in each position of the multi trees,\nby default 5.}\n\n\\item{plot_width, plot_height}{Width and height of the graph in pixels.\nThe values are passed to \\code{DiagrammeR::render_graph()}.}\n\n\\item{render}{Should the graph be rendered or not? The default is \\code{TRUE}.}\n\n\\item{...}{Not used.\n\nSome arguments that were part of this function in previous XGBoost versions are currently\ndeprecated or have been renamed. If a deprecated or renamed argument is passed, will throw\na warning (by default) and use its current equivalent instead. This warning will become an\nerror if using the \\link[=xgboost-options]{'strict mode' option}.\n\nIf some additional argument is passed that is neither a current function argument nor\na deprecated or renamed argument, a warning or error will be thrown depending on the\n'strict mode' option.\n\n\\bold{Important:} \\code{...} will be removed in a future version, and all the current\ndeprecation warnings will become errors. Please use only arguments that form part of\nthe function signature.}\n}\n\\value{\nRendered graph object which is an htmlwidget of ' class \\code{grViz}. Similar to\n\"ggplot\" objects, it needs to be printed when not running from the command\nline.\n}\n\\description{\nVisualization of the ensemble of trees as a single collective unit.\n}\n\\details{\nNote that this function does not work with models that were fitted to\ncategorical data.\n\nThis function tries to capture the complexity of a gradient boosted tree model\nin a cohesive way by compressing an ensemble of trees into a single tree-graph representation.\nThe goal is to improve the interpretability of a model generally seen as black box.\n\nNote: this function is applicable to tree booster-based models only.\n\nIt takes advantage of the fact that the shape of a binary tree is only defined by\nits depth (therefore, in a boosting model, all trees have similar shape).\n\nMoreover, the trees tend to reuse the same features.\n\nThe function projects each tree onto one, and keeps for each position the\n\\code{features_keep} first features (based on the Gain per feature measure).\n\nThis function is inspired by this blog post:\n\\url{https://wellecks.wordpress.com/2015/02/21/peering-into-the-black-box-visualizing-lambdamart/}\n}\n\\examples{\n\ndata(agaricus.train, package = \"xgboost\")\n\n## Keep the number of threads to 2 for examples\nnthread <- 2\ndata.table::setDTthreads(nthread)\n\nmodel <- xgboost(\n  agaricus.train$data, factor(agaricus.train$label),\n  nrounds = 30,\n  verbosity = 0L,\n  nthreads = nthread,\n  max_depth = 15,\n  learning_rate = 1,\n  min_child_weight = 50\n)\n\np <- xgb.plot.multi.trees(model, features_keep = 3)\nprint(p)\n\n# Below is an example of how to save this plot to a file.\nif (require(\"DiagrammeR\") && require(\"DiagrammeRsvg\") && require(\"rsvg\")) {\n  fname <- file.path(tempdir(), \"tree.pdf\")\n  gr <- xgb.plot.multi.trees(model, features_keep = 3, render = FALSE)\n  export_graph(gr, fname, width = 1500, height = 600)\n}\n}\n"
  },
  {
    "path": "R-package/man/xgb.plot.shap.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.plot.shap.R\n\\name{xgb.plot.shap}\n\\alias{xgb.plot.shap}\n\\title{SHAP dependence plots}\n\\usage{\nxgb.plot.shap(\n  data,\n  shap_contrib = NULL,\n  features = NULL,\n  top_n = 1,\n  model = NULL,\n  trees = NULL,\n  target_class = NULL,\n  approxcontrib = FALSE,\n  subsample = NULL,\n  n_col = 1,\n  col = rgb(0, 0, 1, 0.2),\n  pch = \".\",\n  discrete_n_uniq = 5,\n  discrete_jitter = 0.01,\n  ylab = \"SHAP\",\n  plot_NA = TRUE,\n  col_NA = rgb(0.7, 0, 1, 0.6),\n  pch_NA = \".\",\n  pos_NA = 1.07,\n  plot_loess = TRUE,\n  col_loess = 2,\n  span_loess = 0.5,\n  which = c(\"1d\", \"2d\"),\n  plot = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{data}{The data to explain as a \\code{matrix}, \\code{dgCMatrix}, or \\code{data.frame}.}\n\n\\item{shap_contrib}{Matrix of SHAP contributions of \\code{data}.\nThe default (\\code{NULL}) computes it from \\code{model} and \\code{data}.}\n\n\\item{features}{Vector of column indices or feature names to plot. When \\code{NULL}\n(default), the \\code{top_n} most important features are selected by \\code{\\link[=xgb.importance]{xgb.importance()}}.}\n\n\\item{top_n}{How many of the most important features (<= 100) should be selected?\nBy default 1 for SHAP dependence and 10 for SHAP summary.\nOnly used when \\code{features = NULL}.}\n\n\\item{model}{An \\code{xgb.Booster} model. Only required when \\code{shap_contrib = NULL} or\n\\code{features = NULL}.}\n\n\\item{trees}{Passed to \\code{\\link[=xgb.importance]{xgb.importance()}} when \\code{features = NULL}.}\n\n\\item{target_class}{Only relevant for multiclass models. The default (\\code{NULL})\naverages the SHAP values over all classes. Pass a (0-based) class index\nto show only SHAP values of that class.}\n\n\\item{approxcontrib}{Passed to \\code{\\link[=predict.xgb.Booster]{predict.xgb.Booster()}} when \\code{shap_contrib = NULL}.}\n\n\\item{subsample}{Fraction of data points randomly picked for plotting.\nThe default (\\code{NULL}) will use up to 100k data points.}\n\n\\item{n_col}{Number of columns in a grid of plots.}\n\n\\item{col}{Color of the scatterplot markers.}\n\n\\item{pch}{Scatterplot marker.}\n\n\\item{discrete_n_uniq}{Maximal number of unique feature values to consider the\nfeature as discrete.}\n\n\\item{discrete_jitter}{Jitter amount added to the values of discrete features.}\n\n\\item{ylab}{The y-axis label in 1D plots.}\n\n\\item{plot_NA}{Should contributions of cases with missing values be plotted?\nDefault is \\code{TRUE}.}\n\n\\item{col_NA}{Color of marker for missing value contributions.}\n\n\\item{pch_NA}{Marker type for \\code{NA} values.}\n\n\\item{pos_NA}{Relative position of the x-location where \\code{NA} values are shown:\n\\code{min(x) + (max(x) - min(x)) * pos_NA}.}\n\n\\item{plot_loess}{Should loess-smoothed curves be plotted? (Default is \\code{TRUE}).\nThe smoothing is only done for features with more than 5 distinct values.}\n\n\\item{col_loess}{Color of loess curves.}\n\n\\item{span_loess}{The \\code{span} parameter of \\code{\\link[stats:loess]{stats::loess()}}.}\n\n\\item{which}{Whether to do univariate or bivariate plotting. Currently, only \"1d\" is implemented.}\n\n\\item{plot}{Should the plot be drawn? (Default is \\code{TRUE}).\nIf \\code{FALSE}, only a list of matrices is returned.}\n\n\\item{...}{Other parameters passed to \\code{\\link[graphics:plot.default]{graphics::plot()}}.}\n}\n\\value{\nIn addition to producing plots (when \\code{plot = TRUE}), it silently returns a list of two matrices:\n\\itemize{\n\\item \\code{data}: Feature value matrix.\n\\item \\code{shap_contrib}: Corresponding SHAP value matrix.\n}\n}\n\\description{\nVisualizes SHAP values against feature values to gain an impression of feature effects.\n}\n\\details{\nThese scatterplots represent how SHAP feature contributions depend of feature values.\nThe similarity to partial dependence plots is that they also give an idea for how feature values\naffect predictions. However, in partial dependence plots, we see marginal dependencies\nof model prediction on feature value, while SHAP dependence plots display the estimated\ncontributions of a feature to the prediction for each individual case.\n\nWhen \\code{plot_loess = TRUE}, feature values are rounded to three significant digits and\nweighted LOESS is computed and plotted, where the weights are the numbers of data points\nat each rounded value.\n\nNote: SHAP contributions are on the scale of the model margin.\nE.g., for a logistic binomial objective, the margin is on log-odds scale.\nAlso, since SHAP stands for \"SHapley Additive exPlanation\" (model prediction = sum of SHAP\ncontributions for all features + bias), depending on the objective used, transforming SHAP\ncontributions for a feature from the marginal to the prediction space is not necessarily\na meaningful thing to do.\n}\n\\examples{\n\ndata(agaricus.train, package = \"xgboost\")\ndata(agaricus.test, package = \"xgboost\")\n\n## Keep the number of threads to 1 for examples\nnthread <- 1\ndata.table::setDTthreads(nthread)\nnrounds <- 20\n\nmodel_binary <- xgboost(\n  agaricus.train$data, factor(agaricus.train$label),\n  nrounds = nrounds,\n  verbosity = 0L,\n  learning_rate = 0.1,\n  max_depth = 3L,\n  subsample = 0.5,\n  nthreads = nthread\n)\n\nxgb.plot.shap(agaricus.test$data, model = model_binary, features = \"odor=none\")\n\ncontr <- predict(model_binary, agaricus.test$data, type = \"contrib\")\nxgb.plot.shap(agaricus.test$data, contr, model = model_binary, top_n = 12, n_col = 3)\n\n# Summary plot\nxgb.ggplot.shap.summary(agaricus.test$data, contr, model = model_binary, top_n = 12)\n\n# Multiclass example - plots for each class separately:\nx <- as.matrix(iris[, -5])\nset.seed(123)\nis.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values\n\nmodel_multiclass <- xgboost(\n  x, iris$Species,\n  nrounds = nrounds,\n  verbosity = 0,\n  max_depth = 2,\n  subsample = 0.5,\n  nthreads = nthread\n)\nnclass <- 3\ntrees0 <- seq(from = 1, by = nclass, length.out = nrounds)\ncol <- rgb(0, 0, 1, 0.5)\n\nxgb.plot.shap(\n  x,\n  model = model_multiclass,\n  trees = trees0,\n  target_class = 0,\n  top_n = 4,\n  n_col = 2,\n  col = col,\n  pch = 16,\n  pch_NA = 17\n)\n\nxgb.plot.shap(\n  x,\n  model = model_multiclass,\n  trees = trees0 + 1,\n  target_class = 1,\n  top_n = 4,\n  n_col = 2,\n  col = col,\n  pch = 16,\n  pch_NA = 17\n)\n\nxgb.plot.shap(\n  x,\n  model = model_multiclass,\n  trees = trees0 + 2,\n  target_class = 2,\n  top_n = 4,\n  n_col = 2,\n  col = col,\n  pch = 16,\n  pch_NA = 17\n)\n\n# Summary plot\nxgb.ggplot.shap.summary(x, model = model_multiclass, target_class = 0, top_n = 4)\n\n}\n\\references{\n\\enumerate{\n\\item Scott M. Lundberg, Su-In Lee, \"A Unified Approach to Interpreting Model Predictions\",\nNIPS Proceedings 2017, \\url{https://arxiv.org/abs/1705.07874}\n\\item Scott M. Lundberg, Su-In Lee, \"Consistent feature attribution for tree ensembles\",\n\\url{https://arxiv.org/abs/1706.06060}\n}\n}\n"
  },
  {
    "path": "R-package/man/xgb.plot.shap.summary.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.ggplot.R, R/xgb.plot.shap.R\n\\name{xgb.ggplot.shap.summary}\n\\alias{xgb.ggplot.shap.summary}\n\\alias{xgb.plot.shap.summary}\n\\title{SHAP summary plot}\n\\usage{\nxgb.ggplot.shap.summary(\n  data,\n  shap_contrib = NULL,\n  features = NULL,\n  top_n = 10,\n  model = NULL,\n  trees = NULL,\n  target_class = NULL,\n  approxcontrib = FALSE,\n  subsample = NULL\n)\n\nxgb.plot.shap.summary(\n  data,\n  shap_contrib = NULL,\n  features = NULL,\n  top_n = 10,\n  model = NULL,\n  trees = NULL,\n  target_class = NULL,\n  approxcontrib = FALSE,\n  subsample = NULL\n)\n}\n\\arguments{\n\\item{data}{The data to explain as a \\code{matrix}, \\code{dgCMatrix}, or \\code{data.frame}.}\n\n\\item{shap_contrib}{Matrix of SHAP contributions of \\code{data}.\nThe default (\\code{NULL}) computes it from \\code{model} and \\code{data}.}\n\n\\item{features}{Vector of column indices or feature names to plot. When \\code{NULL}\n(default), the \\code{top_n} most important features are selected by \\code{\\link[=xgb.importance]{xgb.importance()}}.}\n\n\\item{top_n}{How many of the most important features (<= 100) should be selected?\nBy default 1 for SHAP dependence and 10 for SHAP summary.\nOnly used when \\code{features = NULL}.}\n\n\\item{model}{An \\code{xgb.Booster} model. Only required when \\code{shap_contrib = NULL} or\n\\code{features = NULL}.}\n\n\\item{trees}{Passed to \\code{\\link[=xgb.importance]{xgb.importance()}} when \\code{features = NULL}.}\n\n\\item{target_class}{Only relevant for multiclass models. The default (\\code{NULL})\naverages the SHAP values over all classes. Pass a (0-based) class index\nto show only SHAP values of that class.}\n\n\\item{approxcontrib}{Passed to \\code{\\link[=predict.xgb.Booster]{predict.xgb.Booster()}} when \\code{shap_contrib = NULL}.}\n\n\\item{subsample}{Fraction of data points randomly picked for plotting.\nThe default (\\code{NULL}) will use up to 100k data points.}\n}\n\\value{\nA \\code{ggplot2} object.\n}\n\\description{\nVisualizes SHAP contributions of different features.\n}\n\\details{\nA point plot (each point representing one observation from \\code{data}) is\nproduced for each feature, with the points plotted on the SHAP value axis.\nEach point (observation) is coloured based on its feature value.\n\nThe plot allows to see which features have a negative / positive contribution\non the model prediction, and whether the contribution is different for larger\nor smaller values of the feature. Inspired by the summary plot of\n\\url{https://github.com/shap/shap}.\n}\n\\examples{\n# See examples in xgb.plot.shap()\n\n}\n\\seealso{\n\\code{\\link[=xgb.plot.shap]{xgb.plot.shap()}}, \\code{\\link[=xgb.ggplot.shap.summary]{xgb.ggplot.shap.summary()}},\nand the Python library \\url{https://github.com/shap/shap}.\n}\n"
  },
  {
    "path": "R-package/man/xgb.plot.tree.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.plot.tree.R\n\\name{xgb.plot.tree}\n\\alias{xgb.plot.tree}\n\\title{Plot boosted trees}\n\\usage{\nxgb.plot.tree(\n  model,\n  tree_idx = 1,\n  plot_width = NULL,\n  plot_height = NULL,\n  with_stats = FALSE,\n  ...\n)\n}\n\\arguments{\n\\item{model}{Object of class \\code{xgb.Booster}. If it contains feature names\n(they can be set through \\code{\\link[=setinfo]{setinfo()}}, they will be used in the\noutput from this function.}\n\n\\item{tree_idx}{An integer of the tree index that should be used. This\nis an 1-based index.}\n\n\\item{plot_width, plot_height}{Width and height of the graph in pixels.\nThe values are passed to \\code{DiagrammeR::render_graph()}.}\n\n\\item{with_stats}{Whether to dump some additional statistics about the\nsplits.  When this option is on, the model dump contains two additional\nvalues: gain is the approximate loss function gain we get in each split;\ncover is the sum of second order gradient in each node.}\n\n\\item{...}{Not used.\n\nSome arguments that were part of this function in previous XGBoost versions are currently\ndeprecated or have been renamed. If a deprecated or renamed argument is passed, will throw\na warning (by default) and use its current equivalent instead. This warning will become an\nerror if using the \\link[=xgboost-options]{'strict mode' option}.\n\nIf some additional argument is passed that is neither a current function argument nor\na deprecated or renamed argument, a warning or error will be thrown depending on the\n'strict mode' option.\n\n\\bold{Important:} \\code{...} will be removed in a future version, and all the current\ndeprecation warnings will become errors. Please use only arguments that form part of\nthe function signature.}\n}\n\\value{\nRendered graph object which is an htmlwidget of ' class \\code{grViz}. Similar to\n\"ggplot\" objects, it needs to be printed when not running from the command\nline.\n}\n\\description{\nRead a tree model text dump and plot the model.\n}\n\\details{\nThe content of each node is visualized as follows:\n\\itemize{\n\\item For non-terminal nodes, it will display the split condition (number or name\nif available, and the condition that would decide to which node to go\nnext).\n\\item Those nodes will be connected to their children by arrows that indicate\nwhether the branch corresponds to the condition being met or not being met.\n\\item Terminal (leaf) nodes contain the margin to add when ending there.\n}\n\nThe \"Yes\" branches are marked by the \"< split_value\" label.\nThe branches also used for missing values are marked as bold\n(as in \"carrying extra capacity\").\n\nThis function uses \\href{https://www.graphviz.org/}{GraphViz} as DiagrammeR\nbackend.\n}\n\\examples{\ndata(\"ToothGrowth\")\nx <- ToothGrowth[, c(\"len\", \"dose\")]\ny <- ToothGrowth$supp\nmodel <- xgboost(\n  x, y,\n  nthreads = 1L,\n  nrounds = 3L,\n  max_depth = 3L\n)\n\n# plot the first tree\nxgb.plot.tree(model, tree_idx = 1)\n\n# Below is an example of how to save this plot to a file.\nif (require(\"DiagrammeR\") && require(\"htmlwidgets\")) {\n  fname <- file.path(tempdir(), \"plot.html'\")\n  gr <- xgb.plot.tree(model, tree_idx = 1)\n  htmlwidgets::saveWidget(gr, fname)\n}\n}\n"
  },
  {
    "path": "R-package/man/xgb.save.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.save.R\n\\name{xgb.save}\n\\alias{xgb.save}\n\\title{Save XGBoost model to binary file}\n\\usage{\nxgb.save(model, fname)\n}\n\\arguments{\n\\item{model}{Model object of \\code{xgb.Booster} class.}\n\n\\item{fname}{Name of the file to write. Its extension determines the serialization format:\n\\itemize{\n\\item \".ubj\": Use the universal binary JSON format (recommended).\nThis format uses binary types for e.g. floating point numbers, thereby preventing any loss\nof precision when converting to a human-readable JSON text or similar.\n\\item \".json\": Use plain JSON, which is a human-readable format.\n\\item \".deprecated\": Use \\strong{deprecated} binary format. This format will\nnot be able to save attributes introduced after v1 of XGBoost, such as the \"best_iteration\"\nattribute that boosters might keep, nor feature names or user-specifiec attributes.\n\\item If the format is not specified by passing one of the file extensions above, will\ndefault to UBJ.\n}}\n}\n\\description{\nSave XGBoost model to a file in binary or JSON format.\n}\n\\details{\nThis methods allows to save a model in an XGBoost-internal binary or text format which is universal\namong the various xgboost interfaces. In R, the saved model file could be read later\nusing either the \\code{\\link[=xgb.load]{xgb.load()}} function or the \\code{xgb_model} parameter of \\code{\\link[=xgb.train]{xgb.train()}}.\n\nNote: a model can also be saved as an R object (e.g., by using \\code{\\link[=readRDS]{readRDS()}}\nor \\code{\\link[=save]{save()}}). However, it would then only be compatible with R, and\ncorresponding R methods would need to be used to load it. Moreover, persisting the model with\n\\code{\\link[=readRDS]{readRDS()}} or \\code{\\link[=save]{save()}} might cause compatibility problems in\nfuture versions of XGBoost. Consult \\link{a-compatibility-note-for-saveRDS-save} to learn\nhow to persist models in a future-proof way, i.e., to make the model accessible in future\nreleases of XGBoost.\n}\n\\examples{\n\\dontshow{RhpcBLASctl::omp_set_num_threads(1)}\ndata(agaricus.train, package = \"xgboost\")\ndata(agaricus.test, package = \"xgboost\")\n\n## Keep the number of threads to 1 for examples\nnthread <- 1\ndata.table::setDTthreads(nthread)\n\ntrain <- agaricus.train\ntest <- agaricus.test\n\nbst <- xgb.train(\n  data = xgb.DMatrix(train$data, label = train$label, nthread = 1),\n  nrounds = 2,\n  params = xgb.params(\n    max_depth = 2,\n    nthread = nthread,\n    objective = \"binary:logistic\"\n  )\n)\n\nfname <- file.path(tempdir(), \"xgb.ubj\")\nxgb.save(bst, fname)\nbst <- xgb.load(fname)\n}\n\\seealso{\n\\code{\\link[=xgb.load]{xgb.load()}}\n}\n"
  },
  {
    "path": "R-package/man/xgb.save.raw.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.save.raw.R\n\\name{xgb.save.raw}\n\\alias{xgb.save.raw}\n\\title{Save XGBoost model to R's raw vector}\n\\usage{\nxgb.save.raw(model, raw_format = \"ubj\")\n}\n\\arguments{\n\\item{model}{The model object.}\n\n\\item{raw_format}{The format for encoding the booster:\n\\itemize{\n\\item \"json\": Encode the booster into JSON text document.\n\\item \"ubj\":  Encode the booster into Universal Binary JSON.\n\\item \"deprecated\": Encode the booster into old customized binary format.\n}}\n}\n\\description{\nSave XGBoost model from \\code{\\link[=xgboost]{xgboost()}} or \\code{\\link[=xgb.train]{xgb.train()}}.\nCall \\code{\\link[=xgb.load.raw]{xgb.load.raw()}} to load the model back from raw vector.\n}\n\\examples{\n\\dontshow{RhpcBLASctl::omp_set_num_threads(1)}\ndata(agaricus.train, package = \"xgboost\")\ndata(agaricus.test, package = \"xgboost\")\n\n## Keep the number of threads to 1 for examples\nnthread <- 1\ndata.table::setDTthreads(nthread)\n\ntrain <- agaricus.train\ntest <- agaricus.test\n\nbst <- xgb.train(\n  data = xgb.DMatrix(train$data, label = train$label, nthread = 1),\n  nrounds = 2,\n  params = xgb.params(\n    max_depth = 2,\n    nthread = nthread,\n    objective = \"binary:logistic\"\n  )\n)\n\nraw <- xgb.save.raw(bst)\nbst <- xgb.load.raw(raw)\n\n}\n"
  },
  {
    "path": "R-package/man/xgb.slice.Booster.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.Booster.R\n\\name{xgb.slice.Booster}\n\\alias{xgb.slice.Booster}\n\\alias{[.xgb.Booster}\n\\title{Slice Booster by Rounds}\n\\usage{\nxgb.slice.Booster(\n  model,\n  start,\n  end = xgb.get.num.boosted.rounds(model),\n  step = 1L\n)\n\n\\method{[}{xgb.Booster}(x, i)\n}\n\\arguments{\n\\item{model, x}{A fitted \\code{xgb.Booster} object, which is to be sliced by taking only a subset\nof its rounds / iterations.}\n\n\\item{start}{Start of the slice (base-1 and inclusive, like R's \\code{\\link[=seq]{seq()}}).}\n\n\\item{end}{End of the slice (base-1 and inclusive, like R's \\code{\\link[=seq]{seq()}}).\nPassing a value of zero here is equivalent to passing the full number of rounds in the\nbooster object.}\n\n\\item{step}{Step size of the slice. Passing '1' will take every round in the sequence defined by\n\\verb{(start, end)}, while passing '2' will take every second value, and so on.}\n\n\\item{i}{The indices - must be an increasing sequence as generated by e.g. \\code{seq(...)}.}\n}\n\\value{\nA sliced booster object containing only the requested rounds.\n}\n\\description{\nCreates a new booster including only a selected range of rounds / iterations\nfrom an existing booster, as given by the sequence \\code{seq(start, end, step)}.\n}\n\\details{\nNote that any R attributes that the booster might have, will not be copied into\nthe resulting object.\n}\n\\examples{\ndata(mtcars)\n\ny <- mtcars$mpg\nx <- as.matrix(mtcars[, -1])\n\ndm <- xgb.DMatrix(x, label = y, nthread = 1)\nmodel <- xgb.train(data = dm, params = xgb.params(nthread = 1), nrounds = 5)\nmodel_slice <- xgb.slice.Booster(model, 1, 3)\n# Prediction for first three rounds\npredict(model, x, predleaf = TRUE)[, 1:3]\n\n# The new model has only those rounds, so\n# a full prediction from it is equivalent\npredict(model_slice, x, predleaf = TRUE)\n}\n"
  },
  {
    "path": "R-package/man/xgb.slice.DMatrix.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.DMatrix.R\n\\name{xgb.slice.DMatrix}\n\\alias{xgb.slice.DMatrix}\n\\alias{[.xgb.DMatrix}\n\\title{Slice DMatrix}\n\\usage{\nxgb.slice.DMatrix(object, idxset, allow_groups = FALSE)\n\n\\method{[}{xgb.DMatrix}(object, idxset, colset = NULL)\n}\n\\arguments{\n\\item{object}{Object of class \\code{xgb.DMatrix}.}\n\n\\item{idxset}{An integer vector of indices of rows needed (base-1 indexing).}\n\n\\item{allow_groups}{Whether to allow slicing an \\code{xgb.DMatrix} with \\code{group} (or\nequivalently \\code{qid}) field. Note that in such case, the result will not have\nthe groups anymore - they need to be set manually through \\code{\\link[=setinfo]{setinfo()}}.}\n\n\\item{colset}{Currently not used (columns subsetting is not available).}\n}\n\\description{\nGet a new DMatrix containing the specified rows of original xgb.DMatrix object.\n}\n\\examples{\ndata(agaricus.train, package = \"xgboost\")\n\ndtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))\n\ndsub <- xgb.slice.DMatrix(dtrain, 1:42)\nlabels1 <- getinfo(dsub, \"label\")\n\ndsub <- dtrain[1:42, ]\nlabels2 <- getinfo(dsub, \"label\")\nall.equal(labels1, labels2)\n\n}\n"
  },
  {
    "path": "R-package/man/xgb.train.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.train.R\n\\name{xgb.train}\n\\alias{xgb.train}\n\\title{Fit XGBoost Model}\n\\usage{\nxgb.train(\n  params = xgb.params(),\n  data,\n  nrounds,\n  evals = list(),\n  objective = NULL,\n  custom_metric = NULL,\n  verbose = 1,\n  print_every_n = 1L,\n  early_stopping_rounds = NULL,\n  maximize = NULL,\n  save_period = NULL,\n  save_name = \"xgboost.model\",\n  xgb_model = NULL,\n  callbacks = list(),\n  ...\n)\n}\n\\arguments{\n\\item{params}{List of XGBoost parameters which control the model building process.\nSee the \\href{https://xgboost.readthedocs.io/en/latest/parameter.html}{online documentation}\nand the documentation for \\code{\\link[=xgb.params]{xgb.params()}} for details.\n\nShould be passed as list with named entries. Parameters that are not specified in this\nlist will use their default values.\n\nA list of named parameters can be created through the function \\code{\\link[=xgb.params]{xgb.params()}}, which\naccepts all valid parameters as function arguments.}\n\n\\item{data}{Training dataset. \\code{xgb.train()} accepts only an \\code{xgb.DMatrix} as the input.\n\nNote that there is a function \\code{\\link[=xgboost]{xgboost()}} which is meant to accept R data objects\nas inputs, such as data frames and matrices.}\n\n\\item{nrounds}{Max number of boosting iterations.}\n\n\\item{evals}{Named list of \\code{xgb.DMatrix} datasets to use for evaluating model performance.\nMetrics specified in either \\code{eval_metric} (under params) or \\code{custom_metric} (function\nargument here) will be computed for each of these datasets during each boosting iteration,\nand stored in the end as a field named \\code{evaluation_log} in the resulting object.\n\nWhen either \\code{verbose>=1} or \\code{\\link[=xgb.cb.print.evaluation]{xgb.cb.print.evaluation()}} callback is engaged, the performance\nresults are continuously printed out during the training.\n\nE.g., specifying \\code{evals=list(validation1=mat1, validation2=mat2)} allows to track\nthe performance of each round's model on \\code{mat1} and \\code{mat2}.}\n\n\\item{objective}{Customized objective function. Should take two arguments: the first one will be the\ncurrent predictions (either a numeric vector or matrix depending on the number of targets / classes),\nand the second one will be the \\code{data} DMatrix object that is used for training.\n\nIt should return a list with two elements \\code{grad} and \\code{hess} (in that order), as either\nnumeric vectors or numeric matrices depending on the number of targets / classes (same\ndimension as the predictions that are passed as first argument).}\n\n\\item{custom_metric}{Customized evaluation function. Just like \\code{objective}, should take two arguments,\nwith the first one being the predictions and the second one the \\code{data} DMatrix.\n\nShould return a list with two elements \\code{metric} (name that will be displayed for this metric,\nshould be a string / character), and \\code{value} (the number that the function calculates, should\nbe a numeric scalar).\n\nNote that even if passing \\code{custom_metric}, objectives also have an associated default metric that\nwill be evaluated in addition to it. In order to disable the built-in metric, one can pass\nparameter \\code{disable_default_eval_metric = TRUE}.}\n\n\\item{verbose}{If 0, xgboost will stay silent. If 1, it will print information about performance.\nIf 2, some additional information will be printed out.\nNote that setting \\code{verbose > 0} automatically engages the\n\\code{xgb.cb.print.evaluation(period=1)} callback function.}\n\n\\item{print_every_n}{When passing \\code{verbose>0}, evaluation logs (metrics calculated on the\ndata passed under \\code{evals}) will be printed every nth iteration according to the value passed\nhere. The first and last iteration are always included regardless of this 'n'.\n\nOnly has an effect when passing data under \\code{evals} and when passing \\code{verbose>0}. The parameter\nis passed to the \\code{\\link[=xgb.cb.print.evaluation]{xgb.cb.print.evaluation()}} callback.}\n\n\\item{early_stopping_rounds}{Number of boosting rounds after which training will be stopped\nif there is no improvement in performance (as measured by the evaluatiation metric that is\nsupplied or selected by default for the objective) on the evaluation data passed under\n\\code{evals}.\n\nMust pass \\code{evals} in order to use this functionality. Setting this parameter adds the\n\\code{\\link[=xgb.cb.early.stop]{xgb.cb.early.stop()}} callback.\n\nIf \\code{NULL}, early stopping will not be used.}\n\n\\item{maximize}{If \\code{feval} and \\code{early_stopping_rounds} are set, then this parameter must be set as well.\nWhen it is \\code{TRUE}, it means the larger the evaluation score the better.\nThis parameter is passed to the \\code{\\link[=xgb.cb.early.stop]{xgb.cb.early.stop()}} callback.}\n\n\\item{save_period}{When not \\code{NULL}, model is saved to disk after every \\code{save_period} rounds.\n0 means save at the end. The saving is handled by the \\code{\\link[=xgb.cb.save.model]{xgb.cb.save.model()}} callback.}\n\n\\item{save_name}{the name or path for periodically saved model file.}\n\n\\item{xgb_model}{A previously built model to continue the training from.\nCould be either an object of class \\code{xgb.Booster}, or its raw data, or the name of a\nfile with a previously saved model.}\n\n\\item{callbacks}{A list of callback functions to perform various task during boosting.\nSee \\code{\\link[=xgb.Callback]{xgb.Callback()}}. Some of the callbacks are automatically created depending on the\nparameters' values. User can provide either existing or their own callback methods in order\nto customize the training process.\n\nNote that some callbacks might try to leave attributes in the resulting model object,\nsuch as an evaluation log (a \\code{data.table} object) - be aware that these objects are kept\nas R attributes, and thus do not get saved when using XGBoost's own serializaters like\n\\code{\\link[=xgb.save]{xgb.save()}} (but are kept when using R serializers like \\code{\\link[=saveRDS]{saveRDS()}}).}\n\n\\item{...}{Not used.\n\nSome arguments that were part of this function in previous XGBoost versions are currently\ndeprecated or have been renamed. If a deprecated or renamed argument is passed, will throw\na warning (by default) and use its current equivalent instead. This warning will become an\nerror if using the \\link[=xgboost-options]{'strict mode' option}.\n\nIf some additional argument is passed that is neither a current function argument nor\na deprecated or renamed argument, a warning or error will be thrown depending on the\n'strict mode' option.\n\n\\bold{Important:} \\code{...} will be removed in a future version, and all the current\ndeprecation warnings will become errors. Please use only arguments that form part of\nthe function signature.}\n}\n\\value{\nAn object of class \\code{xgb.Booster}.\n}\n\\description{\nFits an XGBoost model to given data in DMatrix format (e.g. as produced by \\code{\\link[=xgb.DMatrix]{xgb.DMatrix()}}).\nSee the tutorial \\href{https://xgboost.readthedocs.io/en/stable/tutorials/model.html}{Introduction to Boosted Trees}\nfor a longer explanation of what XGBoost does, and the rest of the\n\\href{https://xgboost.readthedocs.io/en/latest/tutorials/index.html}{XGBoost Tutorials} for further\nexplanations XGBoost's features and usage.\n\nCompared to function \\code{\\link[=xgboost]{xgboost()}} which is a user-friendly function targeted towards interactive\nusage, \\code{xgb.train} is a lower-level interface which allows finer-grained control and exposes\nfurther functionalities offered by the core library (such as learning-to-rank objectives), but\nwhich works exclusively with XGBoost's own data format (\"DMatrices\") instead of with regular R\nobjects.\n\nThe syntax of this function closely mimics the same function from the Python package for XGBoost,\nand is recommended to use for package developers over \\code{xgboost()} as it will provide a more\nstable interface (with fewer breaking changes) and lower overhead from data validations.\n\nSee also the \\href{https://xgboost.readthedocs.io/en/latest/R-package/migration_guide.html}{migration guide}\nif coming from a previous version of XGBoost in the 1.x series.\n}\n\\details{\nCompared to \\code{\\link[=xgboost]{xgboost()}}, the \\code{xgb.train()} interface supports advanced features such as\n\\code{evals}, customized objective and evaluation metric functions, among others, with the\ndifference these work \\code{xgb.DMatrix} objects and do not follow typical R idioms.\n\nParallelization is automatically enabled if OpenMP is present.\nNumber of threads can also be manually specified via the \\code{nthread} parameter.\n\nWhile in XGBoost language bindings, the default random seed defaults to zero, in R, if a parameter \\code{seed}\nis not manually supplied, it will generate a random seed through R's own random number generator,\nwhose seed in turn is controllable through \\code{set.seed}. If \\code{seed} is passed, it will override the\nRNG from R.\n\nThe following callbacks are automatically created when certain parameters are set:\n\\itemize{\n\\item \\code{\\link[=xgb.cb.print.evaluation]{xgb.cb.print.evaluation()}} is turned on when \\code{verbose > 0} and the \\code{print_every_n}\nparameter is passed to it.\n\\item \\code{\\link[=xgb.cb.evaluation.log]{xgb.cb.evaluation.log()}} is on when \\code{evals} is present.\n\\item \\code{\\link[=xgb.cb.early.stop]{xgb.cb.early.stop()}}: When \\code{early_stopping_rounds} is set.\n\\item \\code{\\link[=xgb.cb.save.model]{xgb.cb.save.model()}}: When \\code{save_period > 0} is set.\n}\n\nNote that objects of type \\code{xgb.Booster} as returned by this function behave a bit differently\nfrom typical R objects (it's an 'altrep' list class), and it makes a separation between\ninternal booster attributes (restricted to jsonifyable data), accessed through \\code{\\link[=xgb.attr]{xgb.attr()}}\nand shared between interfaces through serialization functions like \\code{\\link[=xgb.save]{xgb.save()}}; and\nR-specific attributes (typically the result from a callback), accessed through \\code{\\link[=attributes]{attributes()}}\nand \\code{\\link[=attr]{attr()}}, which are otherwise\nonly used in the R interface, only kept when using R's serializers like \\code{\\link[=saveRDS]{saveRDS()}}, and\nnot anyhow used by functions like \\code{predict.xgb.Booster()}.\n\nBe aware that one such R attribute that is automatically added is \\code{params} - this attribute\nis assigned from the \\code{params} argument to this function, and is only meant to serve as a\nreference for what went into the booster, but is not used in other methods that take a booster\nobject - so for example, changing the booster's configuration requires calling \\verb{xgb.config<-}\nor \\verb{xgb.model.parameters<-}, while simply modifying \\verb{attributes(model)$params$<...>} will have no\neffect elsewhere.\n}\n\\examples{\ndata(agaricus.train, package = \"xgboost\")\ndata(agaricus.test, package = \"xgboost\")\n\n## Keep the number of threads to 1 for examples\nnthread <- 1\ndata.table::setDTthreads(nthread)\n\ndtrain <- with(\n  agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)\n)\ndtest <- with(\n  agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)\n)\nevals <- list(train = dtrain, eval = dtest)\n\n## A simple xgb.train example:\nparam <- xgb.params(\n  max_depth = 2,\n  nthread = nthread,\n  objective = \"binary:logistic\",\n  eval_metric = \"auc\"\n)\nbst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)\n\n## An xgb.train example where custom objective and evaluation metric are\n## used:\nlogregobj <- function(preds, dtrain) {\n   labels <- getinfo(dtrain, \"label\")\n   preds <- 1/(1 + exp(-preds))\n   grad <- preds - labels\n   hess <- preds * (1 - preds)\n   return(list(grad = grad, hess = hess))\n}\nevalerror <- function(preds, dtrain) {\n  labels <- getinfo(dtrain, \"label\")\n  err <- as.numeric(sum(labels != (preds > 0)))/length(labels)\n  return(list(metric = \"error\", value = err))\n}\n\n# These functions could be used by passing them as 'objective' and\n# 'eval_metric' parameters in the params list:\nparam <- xgb.params(\n  max_depth = 2,\n  nthread = nthread,\n  objective = logregobj,\n  eval_metric = evalerror\n)\nbst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)\n\n# ... or as dedicated 'objective' and 'custom_metric' parameters of xgb.train:\nbst <- xgb.train(\n  within(param, rm(\"objective\", \"eval_metric\")),\n  dtrain, nrounds = 2, evals = evals,\n  objective = logregobj, custom_metric = evalerror\n)\n\n\n## An xgb.train example of using variable learning rates at each iteration:\nparam <- xgb.params(\n  max_depth = 2,\n  learning_rate = 1,\n  nthread = nthread,\n  objective = \"binary:logistic\",\n  eval_metric = \"auc\"\n)\nmy_learning_rates <- list(learning_rate = c(0.5, 0.1))\n\nbst <- xgb.train(\n param,\n dtrain,\n nrounds = 2,\n evals = evals,\n verbose = 0,\n callbacks = list(xgb.cb.reset.parameters(my_learning_rates))\n)\n\n## Early stopping:\nbst <- xgb.train(\n  param, dtrain, nrounds = 25, evals = evals, early_stopping_rounds = 3\n)\n}\n\\references{\nTianqi Chen and Carlos Guestrin, \"XGBoost: A Scalable Tree Boosting System\",\n22nd SIGKDD Conference on Knowledge Discovery and Data Mining, 2016, \\url{https://arxiv.org/abs/1603.02754}\n}\n\\seealso{\n\\code{\\link[=xgb.Callback]{xgb.Callback()}}, \\code{\\link[=predict.xgb.Booster]{predict.xgb.Booster()}}, \\code{\\link[=xgb.cv]{xgb.cv()}}\n}\n"
  },
  {
    "path": "R-package/man/xgbConfig.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgb.config.R\n\\name{xgb.set.config, xgb.get.config}\n\\alias{xgb.set.config, xgb.get.config}\n\\alias{xgb.set.config}\n\\alias{xgb.get.config}\n\\title{Set and get global configuration}\n\\usage{\nxgb.set.config(...)\n\nxgb.get.config()\n}\n\\arguments{\n\\item{...}{List of parameters to be set, as keyword arguments}\n}\n\\value{\n\\code{xgb.set.config()} returns \\code{TRUE} to signal success. \\code{xgb.get.config()} returns\na list containing all global-scope parameters and their values.\n}\n\\description{\nGlobal configuration consists of a collection of parameters that can be applied in the global\nscope. See \\url{https://xgboost.readthedocs.io/en/stable/parameter.html} for the full list of\nparameters supported in the global configuration. Use \\code{xgb.set.config()} to update the\nvalues of one or more global-scope parameters. Use \\code{xgb.get.config()} to fetch the current\nvalues of all global-scope parameters (listed in\n\\url{https://xgboost.readthedocs.io/en/stable/parameter.html}).\n}\n\\details{\nNote that serialization-related functions might use a globally-configured number of threads,\nwhich is managed by the system's OpenMP (OMP) configuration instead. Typically, XGBoost methods\naccept an \\code{nthreads} parameter, but some methods like \\code{\\link[=readRDS]{readRDS()}} might get executed before such\nparameter can be supplied.\n\nThe number of OMP threads can in turn be configured for example through an environment variable\n\\code{OMP_NUM_THREADS} (needs to be set before R is started), or through \\code{RhpcBLASctl::omp_set_num_threads}.\n}\n\\examples{\n# Set verbosity level to silent (0)\nxgb.set.config(verbosity = 0)\n# Now global verbosity level is 0\nconfig <- xgb.get.config()\nprint(config$verbosity)\n# Set verbosity level to warning (1)\nxgb.set.config(verbosity = 1)\n# Now global verbosity level is 1\nconfig <- xgb.get.config()\nprint(config$verbosity)\n}\n"
  },
  {
    "path": "R-package/man/xgboost-options.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils.R\n\\name{xgboost-options}\n\\alias{xgboost-options}\n\\title{XGBoost Options}\n\\description{\nXGBoost offers an \\link[base:options]{option setting} for controlling the behavior\nof deprecated and removed function arguments.\n\nSome of the arguments in functions like \\code{\\link[=xgb.train]{xgb.train()}} or \\code{\\link[=predict.xgb.Booster]{predict.xgb.Booster()}} been renamed\nfrom how they were in previous versions, or have been removed.\n\nIn order to make the transition to newer XGBoost versions easier, some of these parameters are\nstill accepted but issue a warning when using them. \\bold{Note that these warnings will become\nerrors in the future!!} - this is just a temporary workaround to make the transition easier.\n\nOne can optionally use 'strict mode' to turn these warnings into errors, in order to ensure\nthat code calling xgboost will still work once those are removed in future releases.\n\nCurrently, the only supported option is \\code{xgboost.strict_mode}, which can be set to \\code{TRUE} or\n\\code{FALSE} (default).\n\nIn addition to an R option, it can also be enabled through by setting environment variable\n\\code{XGB_STRICT_MODE=1}. If set, this environment variable will take precedence over the option.\n}\n\\examples{\noptions(\"xgboost.strict_mode\" = FALSE)\noptions(\"xgboost.strict_mode\" = TRUE)\nSys.setenv(\"XGB_STRICT_MODE\" = \"1\")\nSys.setenv(\"XGB_STRICT_MODE\" = \"0\")\n}\n"
  },
  {
    "path": "R-package/man/xgboost.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/xgboost.R\n\\name{xgboost}\n\\alias{xgboost}\n\\title{Fit XGBoost Model}\n\\usage{\nxgboost(\n  x,\n  y,\n  objective = NULL,\n  nrounds = 100L,\n  max_depth = NULL,\n  learning_rate = NULL,\n  min_child_weight = NULL,\n  min_split_loss = NULL,\n  reg_lambda = NULL,\n  weights = NULL,\n  verbosity = if (is.null(eval_set)) 0L else 1L,\n  monitor_training = verbosity > 0,\n  eval_set = NULL,\n  early_stopping_rounds = NULL,\n  print_every_n = 1L,\n  eval_metric = NULL,\n  nthreads = parallel::detectCores(),\n  seed = 0L,\n  base_margin = NULL,\n  monotone_constraints = NULL,\n  interaction_constraints = NULL,\n  reg_alpha = NULL,\n  max_bin = NULL,\n  max_leaves = NULL,\n  booster = NULL,\n  subsample = NULL,\n  sampling_method = NULL,\n  feature_weights = NULL,\n  colsample_bytree = NULL,\n  colsample_bylevel = NULL,\n  colsample_bynode = NULL,\n  tree_method = NULL,\n  max_delta_step = NULL,\n  scale_pos_weight = NULL,\n  updater = NULL,\n  grow_policy = NULL,\n  num_parallel_tree = NULL,\n  multi_strategy = NULL,\n  base_score = NULL,\n  seed_per_iteration = NULL,\n  device = NULL,\n  disable_default_eval_metric = NULL,\n  use_rmm = NULL,\n  max_cached_hist_node = NULL,\n  max_cat_to_onehot = NULL,\n  max_cat_threshold = NULL,\n  sample_type = NULL,\n  normalize_type = NULL,\n  rate_drop = NULL,\n  one_drop = NULL,\n  skip_drop = NULL,\n  feature_selector = NULL,\n  top_k = NULL,\n  tweedie_variance_power = NULL,\n  huber_slope = NULL,\n  quantile_alpha = NULL,\n  expectile_alpha = NULL,\n  aft_loss_distribution = NULL,\n  ...\n)\n}\n\\arguments{\n\\item{x}{The features / covariates. Can be passed as:\n\\itemize{\n\\item A numeric or integer \\code{matrix}.\n\\item A \\code{data.frame}, in which all columns are one of the following types:\n\\itemize{\n\\item \\code{numeric}\n\\item \\code{integer}\n\\item \\code{logical}\n\\item \\code{factor}\n}\n\nColumns of \\code{factor} type will be assumed to be categorical, while other column types will\nbe assumed to be numeric.\n\\item A sparse matrix from the \\code{Matrix} package, either as \\code{dgCMatrix} or \\code{dgRMatrix} class.\n}\n\nNote that categorical features are only supported for \\code{data.frame} inputs, and are automatically\ndetermined based on their types. See \\code{\\link[=xgb.train]{xgb.train()}} with \\code{\\link[=xgb.DMatrix]{xgb.DMatrix()}} for more flexible\nvariants that would allow something like categorical features on sparse matrices.}\n\n\\item{y}{The response variable. Allowed values are:\n\\itemize{\n\\item A numeric or integer vector (for regression tasks).\n\\item A factor or character vector (for binary and multi-class classification tasks).\n\\item A logical (boolean) vector (for binary classification tasks).\n\\item A numeric or integer matrix or \\code{data.frame} with numeric/integer columns\n(for multi-task regression tasks).\n\\item A \\code{Surv} object from the 'survival' package (for survival tasks).\n}\n\nIf \\code{objective} is \\code{NULL}, the right task will be determined automatically based on\nthe class of \\code{y}.\n\nIf \\code{objective} is not \\code{NULL}, it must match with the type of \\code{y} - e.g. \\code{factor} types of \\code{y}\ncan only be used with classification objectives and vice-versa.\n\nFor binary classification, the last factor level of \\code{y} will be used as the \"positive\"\nclass - that is, the numbers from \\code{predict} will reflect the probabilities of belonging to this\nclass instead of to the first factor level. If \\code{y} is a \\code{logical} vector, then \\code{TRUE} will be\nset as the last level.}\n\n\\item{objective}{Optimization objective to minimize based on the supplied data, to be passed\nby name as a string / character (e.g. \\code{reg:absoluteerror}). See the\n\\href{https://xgboost.readthedocs.io/en/stable/parameter.html#learning-task-parameters}{Learning Task Parameters}\npage and the \\code{\\link[=xgb.params]{xgb.params()}} documentation for more detailed information on allowed values.\n\nIf \\code{NULL} (the default), will be automatically determined from \\code{y} according to the following\nlogic:\n\\itemize{\n\\item If \\code{y} is a factor with 2 levels, will use \\code{binary:logistic}.\n\\item If \\code{y} is a factor with more than 2 levels, will use \\code{multi:softprob} (number of classes\nwill be determined automatically, should not be passed under \\code{params}).\n\\item If \\code{y} is a \\code{Surv} object from the \\code{survival} package, will use \\code{survival:aft} (note that\nthe only types supported are left / right / interval censored).\n\\item Otherwise, will use \\code{reg:squarederror}.\n}\n\nIf \\code{objective} is not \\code{NULL}, it must match with the type of \\code{y} - e.g. \\code{factor} types of \\code{y}\ncan only be used with classification objectives and vice-versa.\n\nNote that not all possible \\code{objective} values supported by the core XGBoost library are allowed\nhere - for example, objectives which are a variation of another but with a different default\nprediction type (e.g. \\code{multi:softmax} vs. \\code{multi:softprob}) are not allowed, and neither are\nranking objectives, nor custom objectives at the moment.\n\nSupported values are:\n\\itemize{\n\\item \\code{\"reg:squarederror\"}: regression with squared loss.\n\\item \\code{\"reg:squaredlogerror\"}: regression with squared log loss \\eqn{\\frac{1}{2}[log(pred + 1) - log(label + 1)]^2}.  All input labels are required to be greater than -1.  Also, see metric \\code{rmsle} for possible issue  with this objective.\n\\item \\code{\"reg:pseudohubererror\"}: regression with Pseudo Huber loss, a twice differentiable alternative to absolute loss.\n\\item \\code{\"reg:absoluteerror\"}: Regression with L1 error. When tree model is used, leaf value is refreshed after tree construction. If used in distributed training, the leaf value is calculated as the mean value from all workers, which is not guaranteed to be optimal.\n\\item \\code{\"reg:quantileerror\"}: Quantile loss, also known as \"pinball loss\". See later sections for its parameter and \\href{https://xgboost.readthedocs.io/en/latest/python/examples/quantile_regression.html#sphx-glr-python-examples-quantile-regression-py}{Quantile Regression} for a worked example.\n\\item \\code{\"reg:expectileerror\"}: Expectile loss. See later sections for its parameter.\n\\item \\code{\"binary:logistic\"}: logistic regression for binary classification, output probability\n\\item \\code{\"binary:hinge\"}: hinge loss for binary classification. This makes predictions of 0 or 1, rather than producing probabilities.\n\\item \\code{\"count:poisson\"}: Poisson regression for count data, output mean of Poisson distribution.\n\\code{\"max_delta_step\"} is set to 0.7 by default in Poisson regression (used to safeguard optimization)\n\\item \\code{\"survival:cox\"}: Cox regression for right censored survival time data (negative values are considered right censored).\n\nNote that predictions are returned on the hazard ratio scale (i.e., as HR = exp(marginal_prediction) in the proportional hazard function \\code{h(t) = h0(t) * HR}).\n\\item \\code{\"survival:aft\"}: Accelerated failure time model for censored survival time data.\nSee \\href{https://xgboost.readthedocs.io/en/latest/tutorials/aft_survival_analysis.html}{Survival Analysis with Accelerated Failure Time} for details.\n\\item \\code{\"multi:softprob\"}: multi-class classification throgh multinomial logistic likelihood.\n\\item \\code{\"reg:gamma\"}: gamma regression with log-link. Output is a mean of gamma distribution. It might be useful, e.g., for modeling insurance claims severity, or for any outcome that might be \\href{https://en.wikipedia.org/wiki/Gamma_distribution#Occurrence_and_applications}{gamma-distributed}.\n\\item \\code{\"reg:tweedie\"}: Tweedie regression with log-link. It might be useful, e.g., for modeling total loss in insurance, or for any outcome that might be \\href{https://en.wikipedia.org/wiki/Tweedie_distribution#Occurrence_and_applications}{Tweedie-distributed}.\n}\n\nThe following values are \\bold{NOT} supported by \\code{xgboost}, but are supported by \\code{\\link[=xgb.train]{xgb.train()}}\n(see \\code{\\link[=xgb.params]{xgb.params()}} for details):\n\\itemize{\n\\item \\code{\"reg:logistic\"}\n\\item \\code{\"binary:logitraw\"}\n\\item \\code{\"multi:softmax\"}\n\\item \\code{\"rank:ndcg\"}\n\\item \\code{\"rank:map\"}\n\\item \\code{\"rank:pairwise\"}\n}}\n\n\\item{nrounds}{Number of boosting iterations / rounds.\n\nNote that the number of default boosting rounds here is not automatically tuned, and different\nproblems will have vastly different optimal numbers of boosting rounds.}\n\n\\item{max_depth}{(for Tree Booster) (default=6, type=int32)\nMaximum depth of a tree. Increasing this value will make the model more complex and more likely to overfit. 0 indicates no limit on depth. Beware that XGBoost aggressively consumes memory when training a deep tree. \\code{\"exact\"} tree method requires non-zero value.\n\nrange: \\eqn{[0, \\infty)}}\n\n\\item{learning_rate}{(alias: \\code{eta})\nStep size shrinkage used in update to prevent overfitting. After each boosting step, we can directly get the weights of new features, and \\code{learning_rate} shrinks the feature weights to make the boosting process more conservative.\n\\itemize{\n\\item range: \\eqn{[0,1]}\n\\item default value: 0.3 for tree-based boosters, 0.5 for linear booster.\n}}\n\n\\item{min_child_weight}{(for Tree Booster) (default=1)\nMinimum sum of instance weight (hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than \\code{min_child_weight}, then the building process will give up further partitioning. In linear regression task, this simply corresponds to minimum number of instances needed to be in each node. The larger \\code{min_child_weight} is, the more conservative the algorithm will be.\n\nrange: \\eqn{[0, \\infty)}}\n\n\\item{min_split_loss}{(for Tree Booster) (default=0, alias: \\code{gamma})\nMinimum loss reduction required to make a further partition on a leaf node of the tree. The larger \\code{min_split_loss} is, the more conservative the algorithm will be. Note that a tree where no splits were made might still contain a single terminal node with a non-zero score.\n\nrange: \\eqn{[0, \\infty)}}\n\n\\item{reg_lambda}{(alias: \\code{lambda})\n\\itemize{\n\\item For tree-based boosters:\n\\itemize{\n\\item L2 regularization term on weights. Increasing this value will make model more conservative.\n\\item default: 1\n\\item range: \\eqn{[0, \\infty]}\n}\n\\item For linear booster:\n\\itemize{\n\\item L2 regularization term on weights. Increasing this value will make model more conservative. Normalised to number of training examples.\n\\item default: 0\n\\item range: \\eqn{[0, \\infty)}\n}\n}}\n\n\\item{weights}{Sample weights for each row in \\code{x} and \\code{y}. If \\code{NULL} (the default), each row\nwill have the same weight.\n\nIf not \\code{NULL}, should be passed as a numeric vector with length matching to the number of rows in \\code{x}.}\n\n\\item{verbosity}{Verbosity of printing messages. Valid values of 0 (silent), 1 (warning),\n2 (info), and 3 (debug).}\n\n\\item{monitor_training}{Whether to monitor objective optimization progress on the input data.\nNote that same 'x' and 'y' data are used for both model fitting and evaluation.}\n\n\\item{eval_set}{Subset of the data to use as evaluation set. Can be passed as:\n\\itemize{\n\\item A vector of row indices (base-1 numeration) indicating the observations that are to be designed\nas evaluation data.\n\\item A number between zero and one indicating a random fraction of the input data to use as\nevaluation data. Note that the selection will be done uniformly at random, regardless of\nargument \\code{weights}.\n}\n\nIf passed, this subset of the data will be excluded from the training procedure, and the\nevaluation metric(s) supplied under \\code{eval_metric} will be calculated on this dataset after each\nboosting iteration (pass \\code{verbosity>0} to have these metrics printed during training). If\n\\code{eval_metric} is not passed, a default metric will be selected according to \\code{objective}.\n\nIf passing a fraction, in classification problems, the evaluation set will be chosen in such a\nway that at least one observation of each class will be kept in the training data.\n\nFor more elaborate evaluation variants (e.g. custom metrics, multiple evaluation sets, etc.),\none might want to use \\code{\\link[=xgb.train]{xgb.train()}} instead.}\n\n\\item{early_stopping_rounds}{Number of boosting rounds after which training will be stopped\nif there is no improvement in performance (as measured by the last metric passed under\n\\code{eval_metric}, or by the default metric for the objective if \\code{eval_metric} is not passed) on the\nevaluation data from \\code{eval_set}. Must pass \\code{eval_set} in order to use this functionality.\n\nIf \\code{NULL}, early stopping will not be used.}\n\n\\item{print_every_n}{When passing \\code{verbosity>0} and either \\code{monitor_training=TRUE} or \\code{eval_set},\nevaluation logs (metrics calculated on the training and/or evaluation data) will be printed every\nnth iteration according to the value passed here. The first and last iteration are always\nincluded regardless of this 'n'.\n\nOnly has an effect when passing \\code{verbosity>0}.}\n\n\\item{eval_metric}{(default according to objective)\n\\itemize{\n\\item Evaluation metrics for validation data, a default metric will be assigned according to objective (rmse for regression, and logloss for classification, \\verb{mean average precision} for \\code{rank:map}, etc.)\n\\item User can add multiple evaluation metrics.\n\\item The choices are listed below:\n\\itemize{\n\\item \\code{\"rmse\"}: root mean square error\n\\item \\code{\"rmsle\"}: root mean square log error: \\eqn{\\sqrt{\\frac{1}{N}[log(pred + 1) - log(label + 1)]^2}}. Default metric of \\code{\"reg:squaredlogerror\"} objective. This metric reduces errors generated by outliers in dataset.  But because \\code{log} function is employed, \\code{\"rmsle\"} might output \\code{nan} when prediction value is less than -1.  See \\code{\"reg:squaredlogerror\"} for other requirements.\n\\item \\code{\"mae\"}: mean absolute error.\n\\item \\code{\"mape\"}: mean absolute percentage error.\n\\item \\code{\"mphe\"}: mean Pseudo Huber error. Default metric of \\code{\"reg:pseudohubererror\"} objective.\n\\item \\code{\"logloss\"}: negative log-likelihood.\n\\item \\code{\"error\"}: Binary classification error rate. It is calculated as \\verb{#(wrong cases)/#(all cases)}. For the predictions, the evaluation will regard the instances with prediction value larger than 0.5 as positive instances, and the others as negative instances.\n\\item \\code{\"error@t\"}: a different than 0.5 binary classification threshold value could be specified by providing a numerical value through 't'.\n\\item \\code{\"merror\"}: Multiclass classification error rate. It is calculated as \\verb{#(wrong cases)/#(all cases)}.\n\\item \\code{\"mlogloss\"}: \\href{https://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html}{Multiclass logloss}.\n\\item \\code{\"auc\"}: area under the receiver-operating characteristic curve.\nAvailable for classification and learning-to-rank tasks.\n\\itemize{\n\\item When used with binary classification, the objective should be \\code{\"binary:logistic\"} or similar functions that work on probability.\n\\item When used with multi-class classification, objective should be \\code{\"multi:softprob\"} instead of \\code{\"multi:softmax\"}, as the latter doesn't output probability.  Also the AUC is calculated by 1-vs-rest with reference class weighted by class prevalence.\n\\item When used with LTR task, the AUC is computed by comparing pairs of documents to count correctly sorted pairs.  This corresponds to pairwise learning to rank.  The implementation has some issues with average AUC around groups and distributed workers not being well-defined.\n\\item On a single machine the AUC calculation is exact. In a distributed environment the AUC is a weighted average over the AUC of training rows on each node - therefore, distributed AUC is an approximation sensitive to the distribution of data across workers. Use another metric in distributed environments if precision and reproducibility are important.\n\\item When input dataset contains only negative or positive samples, the output is \\code{NaN}.  The behavior is implementation defined, for instance, \\code{scikit-learn} returns \\eqn{0.5} instead.\n}\n\\item \\code{\"aucpr\"}: area under the PR curve\nAvailable for classification and learning-to-rank tasks.\n\nAfter XGBoost 1.6, both of the requirements and restrictions for using \\code{\"aucpr\"} in classification problem are similar to \\code{\"auc\"}.  For ranking task, only binary relevance label \\eqn{y \\in [0, 1]} is supported.  Different from \\code{\"map\"} (mean average precision), \\code{\"aucpr\"} calculates the \\emph{interpolated} area under precision recall curve using continuous interpolation.\n\\item \\code{\"pre\"}: Precision at \\eqn{k}. Supports only learning to rank task.\n\\item \\code{\"ndcg\"}: normalized discounted cumulative gain\n\\item \\code{\"map\"}: mean average precision\n\nThe \\verb{average precision} is defined as:\n\n\\eqn{AP@l = \\frac{1}{min{(l, N)}}\\sum^l_{k=1}P@k \\cdot I_{(k)}}\n\nwhere \\eqn{I_{(k)}} is an indicator function that equals to \\eqn{1} when the document at \\eqn{k} is relevant and \\eqn{0} otherwise. The \\eqn{P@k} is the precision at \\eqn{k}, and \\eqn{N} is the total number of relevant documents. Lastly, the \\verb{mean average precision} is defined as the weighted average across all queries.\n\\item \\code{\"ndcg@n\"}, \\code{\"map@n\"}, \\code{\"pre@n\"}: \\eqn{n} can be assigned as an integer to cut off the top positions in the lists for evaluation.\n\\item \\code{\"ndcg-\"}, \\code{\"map-\"}, \\code{\"ndcg@n-\"}, \\code{\"map@n-\"}: In XGBoost, the NDCG and MAP evaluate the score of a list without any positive samples as \\eqn{1}. By appending \"-\" to the evaluation metric name, we can ask XGBoost to evaluate these scores as \\eqn{0} to be consistent under some conditions.\n\\item \\code{\"poisson-nloglik\"}: negative log-likelihood for Poisson regression\n\\item \\code{\"gamma-nloglik\"}: negative log-likelihood for gamma regression\n\\item \\code{\"cox-nloglik\"}: negative partial log-likelihood for Cox proportional hazards regression\n\\item \\code{\"gamma-deviance\"}: residual deviance for gamma regression\n\\item \\code{\"tweedie-nloglik\"}: negative log-likelihood for Tweedie regression (at a specified value of the \\code{tweedie_variance_power} parameter)\n\\item \\code{\"aft-nloglik\"}: Negative log likelihood of Accelerated Failure Time model.\nSee \\href{https://xgboost.readthedocs.io/en/latest/tutorials/aft_survival_analysis.html}{Survival Analysis with Accelerated Failure Time} for details.\n\\item \\code{\"interval-regression-accuracy\"}: Fraction of data points whose predicted labels fall in the interval-censored labels.\nOnly applicable for interval-censored data.  See \\href{https://xgboost.readthedocs.io/en/latest/tutorials/aft_survival_analysis.html}{Survival Analysis with Accelerated Failure Time} for details.\n}\n}}\n\n\\item{nthreads}{Number of parallel threads to use. If passing zero, will use all CPU threads.}\n\n\\item{seed}{Seed to use for random number generation. If passing \\code{NULL}, will draw a random\nnumber using R's PRNG system to use as seed.}\n\n\\item{base_margin}{Base margin used for boosting from existing model.\n\nIf passing it, will start the gradient boosting procedure from the scores that are provided\nhere - for example, one can pass the raw scores from a previous model, or some per-observation\noffset, or similar.\n\nShould be either a numeric vector or numeric matrix (for multi-class and multi-target objectives)\nwith the same number of rows as \\code{x} and number of columns corresponding to number of optimization\ntargets, and should be in the untransformed scale (for example, for objective \\code{binary:logistic},\nit should have log-odds, not probabilities; and for objective \\code{multi:softprob}, should have\nnumber of columns matching to number of classes in the data).\n\nNote that, if it contains more than one column, then columns will not be matched by name to\nthe corresponding \\code{y} - \\code{base_margin} should have the same column order that the model will use\n(for example, for objective \\code{multi:softprob}, columns of \\code{base_margin} will be matched against\n\\code{levels(y)} by their position, regardless of what \\code{colnames(base_margin)} returns).\n\nIf \\code{NULL}, will start from zero, but note that for most objectives, an intercept is usually\nadded (controllable through parameter \\code{base_score} instead) when \\code{base_margin} is not passed.}\n\n\\item{monotone_constraints}{Optional monotonicity constraints for features.\n\nCan be passed either as a named list (when \\code{x} has column names), or as a vector. If passed\nas a vector and \\code{x} has column names, will try to match the elements by name.\n\nA value of \\code{+1} for a given feature makes the model predictions / scores constrained to be\na monotonically increasing function of that feature (that is, as the value of the feature\nincreases, the model prediction cannot decrease), while a value of \\code{-1} makes it a monotonically\ndecreasing function. A value of zero imposes no constraint.\n\nThe input for \\code{monotone_constraints} can be a subset of the columns of \\code{x} if named, in which\ncase the columns that are not referred to in \\code{monotone_constraints} will be assumed to have\na value of zero (no constraint imposed on the model for those features).\n\nSee the tutorial \\href{https://xgboost.readthedocs.io/en/stable/tutorials/monotonic.html}{Monotonic Constraints}\nfor a more detailed explanation.}\n\n\\item{interaction_constraints}{Constraints for interaction representing permitted interactions.\nThe constraints must be specified in the form of a list of vectors referencing columns in the\ndata, e.g. \\code{list(c(1, 2), c(3, 4, 5))} (with these numbers being column indices, numeration\nstarting at 1 - i.e. the first sublist references the first and second columns) or\n\\code{list(c(\"Sepal.Length\", \"Sepal.Width\"), c(\"Petal.Length\", \"Petal.Width\"))} (references\ncolumns by names), where each vector is a group of indices of features that are allowed to\ninteract with each other.\n\nSee the tutorial \\href{https://xgboost.readthedocs.io/en/stable/tutorials/feature_interaction_constraint.html}{Feature Interaction Constraints}\nfor more information.}\n\n\\item{reg_alpha}{(alias: \\code{reg_alpha})\n\\itemize{\n\\item L1 regularization term on weights. Increasing this value will make model more conservative.\n\\item For the linear booster, it's normalised to number of training examples.\n\\item default: 0\n\\item range: \\eqn{[0, \\infty)}\n}}\n\n\\item{max_bin}{(for Tree Booster) (default=256, type=int32)\n\\itemize{\n\\item Only used if \\code{tree_method} is set to \\code{\"hist\"} or \\code{\"approx\"}.\n\\item Maximum number of discrete bins to bucket continuous features.\n\\item Increasing this number improves the optimality of splits at the cost of higher computation time.\n}}\n\n\\item{max_leaves}{(for Tree Booster) (default=0, type=int32)\nMaximum number of nodes to be added.  Not used by \\code{\"exact\"} tree method.}\n\n\\item{booster}{(default= \\code{\"gbtree\"})\nWhich booster to use. Can be \\code{\"gbtree\"}, \\code{\"gblinear\"} or \\code{\"dart\"}; \\code{\"gbtree\"} and \\code{\"dart\"} use tree based models while \\code{\"gblinear\"} uses linear functions.}\n\n\\item{subsample}{(for Tree Booster) (default=1)\nSubsample ratio of the training instances. Setting it to 0.5 means that XGBoost would randomly sample half of the training data prior to growing trees. and this will prevent overfitting. Subsampling will occur once in every boosting iteration.\n\nrange: \\eqn{(0,1]}}\n\n\\item{sampling_method}{(for Tree Booster) (default= \\code{\"uniform\"})\nThe method to use to sample the training instances.\n\\itemize{\n\\item \\code{\"uniform\"}: each training instance has an equal probability of being selected. Typically set\n\\code{\"subsample\"} >= 0.5 for good results.\n\\item \\code{\"gradient_based\"}: the selection probability for each training instance is proportional to the\n\\bold{regularized absolute value} of gradients (more specifically, \\eqn{\\sqrt{g^2+\\lambda h^2}}).\n\\code{\"subsample\"} may be set to as low as 0.1 without loss of model accuracy. Note that this\nsampling method is only supported when \\code{\"tree_method\"} is set to \\code{\"hist\"}; other tree\nmethods only support \\code{\"uniform\"} sampling.\n}}\n\n\\item{feature_weights}{Feature weights for column sampling.\n\nCan be passed either as a vector with length matching to columns of \\code{x}, or as a named\nlist (only if \\code{x} has column names) with names matching to columns of 'x'. If it is a\nnamed vector, will try to match the entries to column names of \\code{x} by name.\n\nIf \\code{NULL} (the default), all columns will have the same weight.}\n\n\\item{colsample_bytree, colsample_bylevel, colsample_bynode}{(for Tree Booster) (default=1)\nThis is a family of parameters for subsampling of columns.\n\\itemize{\n\\item All \\code{\"colsample_by*\"} parameters have a range of \\eqn{(0, 1]}, the default value of 1, and specify the fraction of columns to be subsampled.\n\\item \\code{\"colsample_bytree\"} is the subsample ratio of columns when constructing each tree. Subsampling occurs once for every tree constructed.\n\\item \\code{\"colsample_bylevel\"} is the subsample ratio of columns for each level. Subsampling occurs once for every new depth level reached in a tree. Columns are subsampled from the set of columns chosen for the current tree.\n\\item \\code{\"colsample_bynode\"} is the subsample ratio of columns for each node (split). Subsampling occurs once every time a new split is evaluated. Columns are subsampled from the set of columns chosen for the current level. This is not supported by the exact tree method.\n\\item \\code{\"colsample_by*\"} parameters work cumulatively. For instance,\nthe combination \\verb{\\{'colsample_bytree'=0.5, 'colsample_bylevel'=0.5, 'colsample_bynode'=0.5\\}} with 64 features will leave 8 features to choose from at\neach split.\n}\n\nOne can set the \\code{\"feature_weights\"} for DMatrix to\ndefine the probability of each feature being selected when using column sampling.}\n\n\\item{tree_method}{(for Tree Booster) (default= \\code{\"auto\"})\nThe tree construction algorithm used in XGBoost. See description in the \\href{https://arxiv.org/abs/1603.02754}{reference paper} and \\href{https://xgboost.readthedocs.io/en/latest/treemethod.html}{Tree Methods}.\n\nChoices: \\code{\"auto\"}, \\code{\"exact\"}, \\code{\"approx\"}, \\code{\"hist\"}, this is a combination of commonly\nused updaters.  For other updaters like \\code{\"refresh\"}, set the parameter \\code{updater}\ndirectly.\n\\itemize{\n\\item \\code{\"auto\"}: Same as the \\code{\"hist\"} tree method.\n\\item \\code{\"exact\"}: Exact greedy algorithm.  Enumerates all split candidates.\n\\item \\code{\"approx\"}: Approximate greedy algorithm using quantile sketch and gradient histogram.\n\\item \\code{\"hist\"}: Faster histogram optimized approximate greedy algorithm.\n}}\n\n\\item{max_delta_step}{(for Tree Booster) (default=0)\nMaximum delta step we allow each leaf output to be. If the value is set to 0, it means there is no constraint. If it is set to a positive value, it can help making the update step more conservative. Usually this parameter is not needed, but it might help in logistic regression when class is extremely imbalanced. Set it to value of 1-10 might help control the update.\n\nrange: \\eqn{[0, \\infty)}}\n\n\\item{scale_pos_weight}{(for Tree Booster) (default=1)\nControl the balance of positive and negative weights, useful for unbalanced classes. A typical value to consider: \\verb{sum(negative instances) / sum(positive instances)}. See \\href{https://xgboost.readthedocs.io/en/latest/tutorials/param_tuning.html}{Parameters Tuning} for more discussion. Also, see Higgs Kaggle competition demo for examples: \\href{https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-train.R}{R}, \\href{https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-numpy.py}{py1}, \\href{https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-cv.py}{py2}, \\href{https://github.com/dmlc/xgboost/blob/master/demo/guide-python/cross_validation.py}{py3}.}\n\n\\item{updater}{(for Linear Booster) (default= \\code{\"shotgun\"})\nChoice of algorithm to fit linear model\n\\itemize{\n\\item \\code{\"shotgun\"}: Parallel coordinate descent algorithm based on shotgun algorithm. Uses 'hogwild' parallelism and therefore produces a nondeterministic solution on each run.\n\\item \\code{\"coord_descent\"}: Ordinary coordinate descent algorithm. Also multithreaded but still produces a deterministic solution. When the \\code{device} parameter is set to \\code{\"cuda\"} or \\code{\"gpu\"}, a GPU variant would be used.\n}}\n\n\\item{grow_policy}{(for Tree Booster) (default= \\code{\"depthwise\"})\n\\itemize{\n\\item Controls a way new nodes are added to the tree.\n\\item Currently supported only if \\code{tree_method} is set to \\code{\"hist\"} or \\code{\"approx\"}.\n\\item Choices: \\code{\"depthwise\"}, \\code{\"lossguide\"}\n\\itemize{\n\\item \\code{\"depthwise\"}: split at nodes closest to the root.\n\\item \\code{\"lossguide\"}: split at nodes with highest loss change.\n}\n}}\n\n\\item{num_parallel_tree}{(for Tree Booster) (default=1)\nNumber of parallel trees constructed during each iteration. This option is used to support boosted random forest.}\n\n\\item{multi_strategy}{(for Tree Booster) (default = \\code{\"one_output_per_tree\"})\nThe strategy used for training multi-target models, including multi-target regression\nand multi-class classification. See \\href{https://xgboost.readthedocs.io/en/latest/tutorials/multioutput.html}{Multiple Outputs} for more information.\n\\itemize{\n\\item \\code{\"one_output_per_tree\"}: One model for each target.\n\\item \\code{\"multi_output_tree\"}:  Use multi-target trees.\n}\n\nVersion added: 2.0.0\n\nNote: This parameter is working-in-progress.}\n\n\\item{base_score}{\\itemize{\n\\item The initial prediction score of all instances, global bias\n\\item The parameter is automatically estimated for selected objectives before training. To\ndisable the estimation, specify a real number argument.\n\\item If \\code{base_margin} is supplied, \\code{base_score} will not be added.\n\\item For sufficient number of iterations, changing this value will not have too much effect.\n}}\n\n\\item{seed_per_iteration}{(default= \\code{FALSE})\nSeed PRNG determnisticly via iterator number.}\n\n\\item{device}{(default= \\code{\"cpu\"})\nDevice for XGBoost to run. User can set it to one of the following values:\n\\itemize{\n\\item \\code{\"cpu\"}: Use CPU.\n\\item \\code{\"cuda\"}: Use a GPU (CUDA device).\n\\item \\code{\"cuda:<ordinal>\"}: \\verb{<ordinal>} is an integer that specifies the ordinal of the GPU (which GPU do you want to use if you have more than one devices).\n\\item \\code{\"gpu\"}: Default GPU device selection from the list of available and supported devices. Only \\code{\"cuda\"} devices are supported currently.\n\\item \\code{\"gpu:<ordinal>\"}: Default GPU device selection from the list of available and supported devices. Only \\code{\"cuda\"} devices are supported currently.\n}\n\nFor more information about GPU acceleration, see \\href{https://xgboost.readthedocs.io/en/latest/gpu/index.html}{XGBoost GPU Support}. In distributed environments, ordinal selection is handled by distributed frameworks instead of XGBoost. As a result, using \\code{\"cuda:<ordinal>\"} will result in an error. Use \\code{\"cuda\"} instead.\n\nVersion added: 2.0.0\n\nNote: if XGBoost was installed from CRAN, it won't have GPU support enabled, thus only \\code{\"cpu\"} will be available.\nTo get GPU support, the R package for XGBoost must be installed from source or from the GitHub releases - see\n\\href{https://xgboost.readthedocs.io/en/latest/install.html#r}{instructions}.}\n\n\\item{disable_default_eval_metric}{(default= \\code{FALSE})\nFlag to disable default metric. Set to 1 or \\code{TRUE} to disable.}\n\n\\item{use_rmm}{Whether to use RAPIDS Memory Manager (RMM) to allocate cache GPU\nmemory. The primary memory is always allocated on the RMM pool when XGBoost is built\n(compiled) with the RMM plugin enabled. Valid values are \\code{TRUE} and \\code{FALSE}. See\n\\href{https://xgboost.readthedocs.io/en/latest/python/rmm-examples/index.html}{Using XGBoost with RAPIDS Memory Manager (RMM) plugin} for details.}\n\n\\item{max_cached_hist_node}{(for Non-Exact Tree Methods) (default = 65536)\nMaximum number of cached nodes for histogram. This can be used with the \\code{\"hist\"} and the\n\\code{\"approx\"} tree methods.\n\nVersion added: 2.0.0\n\\itemize{\n\\item For most of the cases this parameter should not be set except for growing deep\ntrees. After 3.0, this parameter affects GPU algorithms as well.\n}}\n\n\\item{max_cat_to_onehot}{(for Non-Exact Tree Methods)\nA threshold for deciding whether XGBoost should use one-hot encoding based split for\ncategorical data.  When number of categories is lesser than the threshold then one-hot\nencoding is chosen, otherwise the categories will be partitioned into children nodes.\n\nVersion added: 1.6.0}\n\n\\item{max_cat_threshold}{(for Non-Exact Tree Methods)\nMaximum number of categories considered for each split. Used only by partition-based\nsplits for preventing over-fitting.\n\nVersion added: 1.7.0}\n\n\\item{sample_type}{(for Dart Booster) (default= \\code{\"uniform\"})\nType of sampling algorithm.\n\\itemize{\n\\item \\code{\"uniform\"}: dropped trees are selected uniformly.\n\\item \\code{\"weighted\"}: dropped trees are selected in proportion to weight.\n}}\n\n\\item{normalize_type}{(for Dart Booster) (default= \\code{\"tree\"})\nType of normalization algorithm.\n\\itemize{\n\\item \\code{\"tree\"}: new trees have the same weight of each of dropped trees.\n\\itemize{\n\\item Weight of new trees are \\code{1 / (k + learning_rate)}.\n\\item Dropped trees are scaled by a factor of \\code{k / (k + learning_rate)}.\n}\n\\item \\code{\"forest\"}: new trees have the same weight of sum of dropped trees (forest).\n\\itemize{\n\\item Weight of new trees are \\code{1 / (1 + learning_rate)}.\n\\item Dropped trees are scaled by a factor of \\code{1 / (1 + learning_rate)}.\n}\n}}\n\n\\item{rate_drop}{(for Dart Booster) (default=0.0)\nDropout rate (a fraction of previous trees to drop during the dropout).\n\nrange: \\eqn{[0.0, 1.0]}}\n\n\\item{one_drop}{(for Dart Booster) (default=0)\nWhen this flag is enabled, at least one tree is always dropped during the dropout (allows Binomial-plus-one or epsilon-dropout from the original DART paper).}\n\n\\item{skip_drop}{(for Dart Booster) (default=0.0)\nProbability of skipping the dropout procedure during a boosting iteration.\n\\itemize{\n\\item If a dropout is skipped, new trees are added in the same manner as \\code{\"gbtree\"}.\n\\item Note that non-zero \\code{skip_drop} has higher priority than \\code{rate_drop} or \\code{one_drop}.\n}\n\nrange: \\eqn{[0.0, 1.0]}}\n\n\\item{feature_selector}{(for Linear Booster) (default= \\code{\"cyclic\"})\nFeature selection and ordering method\n\\itemize{\n\\item \\code{\"cyclic\"}: Deterministic selection by cycling through features one at a time.\n\\item \\code{\"shuffle\"}: Similar to \\code{\"cyclic\"} but with random feature shuffling prior to each update.\n\\item \\code{\"random\"}: A random (with replacement) coordinate selector.\n\\item \\code{\"greedy\"}: Select coordinate with the greatest gradient magnitude.  It has \\code{O(num_feature^2)} complexity. It is fully deterministic. It allows restricting the selection to \\code{top_k} features per group with the largest magnitude of univariate weight change, by setting the \\code{top_k} parameter. Doing so would reduce the complexity to \\code{O(num_feature*top_k)}.\n\\item \\code{\"thrifty\"}: Thrifty, approximately-greedy feature selector. Prior to cyclic updates, reorders features in descending magnitude of their univariate weight changes. This operation is multithreaded and is a linear complexity approximation of the quadratic greedy selection. It allows restricting the selection to \\code{top_k} features per group with the largest magnitude of univariate weight change, by setting the \\code{top_k} parameter.\n}}\n\n\\item{top_k}{(for Linear Booster) (default=0)\nThe number of top features to select in \\code{greedy} and \\code{thrifty} feature selector. The value of 0 means using all the features.}\n\n\\item{tweedie_variance_power}{(for Tweedie Regression (\\code{\"objective=reg:tweedie\"})) (default=1.5)\n\\itemize{\n\\item Parameter that controls the variance of the Tweedie distribution \\code{var(y) ~ E(y)^tweedie_variance_power}\n\\item range: \\eqn{(1,2)}\n\\item Set closer to 2 to shift towards a gamma distribution\n\\item Set closer to 1 to shift towards a Poisson distribution.\n}}\n\n\\item{huber_slope}{(for using Pseudo-Huber (\\verb{\"reg:pseudohubererror}\")) (default = 1.0)\nA parameter used for Pseudo-Huber loss to define the \\eqn{\\delta} term.}\n\n\\item{quantile_alpha}{(for using Quantile Loss (\\code{\"reg:quantileerror\"}))\nA scalar or a list of targeted quantiles (passed as a numeric vector).\n\nVersion added: 2.0.0}\n\n\\item{expectile_alpha}{(for using Expectile Loss (\\code{\"reg:expectileerror\"}))\nA scalar or a list of targeted expectiles (passed as a numeric vector).}\n\n\\item{aft_loss_distribution}{(when using AFT Survival Loss (\\code{\"survival:aft\"}) and Negative Log Likelihood of AFT metric (\\code{\"aft-nloglik\"}))\nProbability Density Function, \\code{\"normal\"}, \\code{\"logistic\"}, or \\code{\"extreme\"}.}\n\n\\item{...}{Not used.\n\nSome arguments that were part of this function in previous XGBoost versions are currently\ndeprecated or have been renamed. If a deprecated or renamed argument is passed, will throw\na warning (by default) and use its current equivalent instead. This warning will become an\nerror if using the \\link[=xgboost-options]{'strict mode' option}.\n\nIf some additional argument is passed that is neither a current function argument nor\na deprecated or renamed argument, a warning or error will be thrown depending on the\n'strict mode' option.\n\n\\bold{Important:} \\code{...} will be removed in a future version, and all the current\ndeprecation warnings will become errors. Please use only arguments that form part of\nthe function signature.}\n}\n\\value{\nA model object, inheriting from both \\code{xgboost} and \\code{xgb.Booster}. Compared to the regular\n\\code{xgb.Booster} model class produced by \\code{\\link[=xgb.train]{xgb.train()}}, this \\code{xgboost} class will have an\nadditional attribute \\code{metadata} containing information which is used for formatting prediction\noutputs, such as class names for classification problems.\n}\n\\description{\nFits an XGBoost model (boosted decision tree ensemble) to given x/y data.\n\nSee the tutorial \\href{https://xgboost.readthedocs.io/en/stable/tutorials/model.html}{Introduction to Boosted Trees}\nfor a longer explanation of what XGBoost does, and the rest of the\n\\href{https://xgboost.readthedocs.io/en/latest/tutorials/index.html}{XGBoost Tutorials} for further\nexplanations XGBoost's features and usage.\n\nThis function is intended to provide a user-friendly interface for XGBoost that follows\nR's conventions for model fitting and predictions, but which doesn't expose all of the\npossible functionalities of the core XGBoost library.\n\nSee \\code{\\link[=xgb.train]{xgb.train()}} for a more flexible low-level alternative which is similar across different\nlanguage bindings of XGBoost and which exposes additional functionalities such as training on\nexternal memory data and learning-to-rank objectives.\n\nSee also the \\href{https://xgboost.readthedocs.io/en/latest/R-package/migration_guide.html}{migration guide}\nif coming from a previous version of XGBoost in the 1.x series.\n\nBy default, most of the parameters here have a value of \\code{NULL}, which signals XGBoost to use its\ndefault value. Default values are automatically determined by the XGBoost core library, and are\nsubject to change over XGBoost library versions. Some of them might differ according to the\nbooster type (e.g. defaults for regularization are different for linear and tree-based boosters).\nSee \\code{\\link[=xgb.params]{xgb.params()}} and the \\href{https://xgboost.readthedocs.io/en/latest/parameter.html}{online documentation}\nfor more details about parameters - but note that some of the parameters are not supported in\nthe \\code{xgboost()} interface.\n}\n\\details{\nFor package authors using 'xgboost' as a dependency, it is highly recommended to use\n\\code{\\link[=xgb.train]{xgb.train()}} in package code instead of \\code{\\link[=xgboost]{xgboost()}}, since it has a more stable interface\nand performs fewer data conversions and copies along the way.\n}\n\\examples{\ndata(mtcars)\n\n# Fit a small regression model on the mtcars data\nmodel_regression <- xgboost(mtcars[, -1], mtcars$mpg, nthreads = 1, nrounds = 3)\npredict(model_regression, mtcars, validate_features = TRUE)\n\n# Task objective is determined automatically according to the type of 'y'\ndata(iris)\nmodel_classif <- xgboost(iris[, -5], iris$Species, nthreads = 1, nrounds = 5)\npredict(model_classif, iris[1:10,])\npredict(model_classif, iris[1:10,], type = \"class\")\n\n# Can nevertheless choose a non-default objective if needed\nmodel_poisson <- xgboost(\n  mtcars[, -1], mtcars$mpg,\n  objective = \"count:poisson\",\n  nthreads = 1,\n  nrounds = 3\n)\n\n# Can calculate evaluation metrics during boosting rounds\ndata(ToothGrowth)\nxgboost(\n  ToothGrowth[, c(\"len\", \"dose\")],\n  ToothGrowth$supp,\n  eval_metric = c(\"auc\", \"logloss\"),\n  eval_set = 0.2,\n  monitor_training = TRUE,\n  verbosity = 1,\n  nthreads = 1,\n  nrounds = 3\n)\n}\n\\references{\n\\itemize{\n\\item Chen, Tianqi, and Carlos Guestrin. \"Xgboost: A scalable tree boosting system.\"\nProceedings of the 22nd acm sigkdd international conference on knowledge discovery and\ndata mining. 2016.\n\\item \\url{https://xgboost.readthedocs.io/en/stable/}\n}\n}\n"
  },
  {
    "path": "R-package/pkgdown/_pkgdown.yml",
    "content": "url: https://github.com/dmlc/xgboost\n\ntemplate:\n  bootstrap: 5\n  math-rendering: mathjax\n\nreference:\n  - title: High Level Interface\n    desc: High level XGBoost interface\n    contents:\n      - \"xgboost\"\n      - \"predict.xgboost\"\n      - \"print.xgboost\"\n  - title: Datasets\n    desc: Test datasets bundled with the R package.\n    contents:\n      - \"agaricus.train\"\n      - \"agaricus.test\"\n  - title: Global Configuration\n    desc: Global configuration for the XGBoost library.\n    contents:\n      - \"xgb.config\"\n      - \"xgb.set.config\"\n      - \"xgb.get.config\"\n  - title: DMatrix\n    desc: Low-level data storage.\n    contents:\n      - \"xgb.DMatrix\"\n      - \"xgb.DMatrix.hasinfo\"\n      - \"xgb.DMatrix.save\"\n      - \"dim.xgb.DMatrix\"\n      - \"dimnames.xgb.DMatrix\"\n      - \"print.xgb.DMatrix\"\n      - \"xgb.DataBatch\"\n      - \"xgb.DataIter\"\n      - \"xgb.get.DMatrix.data\"\n      - \"xgb.get.DMatrix.num.non.missing\"\n      - \"xgb.ExtMemDMatrix\"\n      - \"xgb.QuantileDMatrix.from_iterator\"\n      - \"xgb.get.DMatrix.qcut\"\n      - \"xgb.slice.DMatrix\"\n  - title: Booster\n    desc: The model for XGBoost.\n    contents:\n      - \"a-compatibility-note-for-saveRDS-save\"\n      - \"coef.xgb.Booster\"\n      - \"getinfo.xgb.Booster\"\n      - \"predict.xgb.Booster\"\n      - \"print.xgb.Booster\"\n      - \"xgb.load\"\n      - \"xgb.load.raw\"\n      - \"xgb.save\"\n      - \"xgb.save.raw\"\n      - \"xgb.copy.Booster\"\n      - \"xgb.slice.Booster\"\n      - \"xgb.get.num.boosted.rounds\"\n      - \"xgb.is.same.Booster\"\n      - \"xgb.importance\"\n      - \"xgb.attr\"\n      - \"xgb.create.features\"\n      - \"xgb.model.dt.tree\"\n      - \"xgb.model.parameters<-\"\n      - \"xgb.ggplot.deepness\"\n      - \"xgb.dump\"\n      - \"variable.names.xgb.Booster\"\n      - \"xgb.ggplot.importance\"\n      - \"xgb.plot.multi.trees\"\n      - \"xgb.plot.shap\"\n      - \"xgb.ggplot.shap.summary\"\n      - \"xgb.plot.tree\"\n      - \"xgb.gblinear.history\"\n  - title: Training Callbacks\n    desc: Callback functions used for training.\n    contents:\n      - \"xgb.Callback\"\n      - \"xgb.cb.cv.predict\"\n      - \"xgb.cb.early.stop\"\n      - \"xgb.cb.evaluation.log\"\n      - \"xgb.cb.gblinear.history\"\n      - \"xgb.cb.print.evaluation\"\n      - \"xgb.cb.reset.parameters\"\n      - \"xgb.cb.save.model\"\n  - title: Low-level Training Functions\n    desc: Low-level Training Functions with DMatrix and Booster\n    contents:\n      - \"xgb.params\"\n      - \"xgb.train\"\n      - \"xgb.cv\"\n      - \"print.xgb.cv.synchronous\"\n  - title: Deprecation Settings\n    contents:\n      - \"xgboost-options\"\n"
  },
  {
    "path": "R-package/remove_warning_suppression_pragma.sh",
    "content": "#!/bin/bash\n# remove all #pragma's that suppress compiler warnings\nset -e\nset -x\nfor file in xgboost/src/dmlc-core/include/dmlc/*.h\ndo\n  sed -i.bak -e 's/^.*#pragma GCC diagnostic.*$//' -e 's/^.*#pragma clang diagnostic.*$//' -e 's/^.*#pragma warning.*$//' \"${file}\"\ndone\nfor file in xgboost/src/dmlc-core/include/dmlc/*.h.bak\ndo\n  rm \"${file}\"\ndone\nset +x\nset +e\n"
  },
  {
    "path": "R-package/src/Makevars.in",
    "content": "# package root\nPKGROOT=../../\nENABLE_STD_THREAD=1\n# _*_ mode: Makefile; _*_\n\nCXX_STD = CXX17\n\nXGB_RFLAGS = \\\n    @DMLC_DEFS@ \\\n    @XGBOOST_BUILTIN_PREFETCH_PRESENT@ \\\n    @XGBOOST_MM_PREFETCH_PRESENT@ \\\n    -DXGBOOST_STRICT_R_MODE=1 \\\n    -DDMLC_LOG_BEFORE_THROW=0 \\\n    -DDMLC_ENABLE_STD_THREAD=$(ENABLE_STD_THREAD) \\\n    -DDMLC_DISABLE_STDIN=1 \\\n    -DDMLC_LOG_CUSTOMIZE=1\n\n# disable the use of thread_local for 32 bit windows:\nifeq ($(R_OSTYPE)$(WIN),windows)\n    XGB_RFLAGS += -DDMLC_CXX11_THREAD_LOCAL=0\nendif\n$(foreach v, $(XGB_RFLAGS), $(warning $(v)))\n\nPKG_CPPFLAGS = \\\n    -I$(PKGROOT)/include \\\n    -I$(PKGROOT)/dmlc-core/include \\\n    -I$(PKGROOT) \\\n    $(XGB_RFLAGS)\n\nPKG_CXXFLAGS = \\\n    @OPENMP_CXXFLAGS@ \\\n    @ENDIAN_FLAG@ \\\n    -pthread \\\n    $(CXX_VISIBILITY)\n\nPKG_LIBS = \\\n    @OPENMP_CXXFLAGS@ \\\n    @OPENMP_LIB@ \\\n    @ENDIAN_FLAG@ \\\n    @BACKTRACE_LIB@ \\\n    -pthread\n\nOBJECTS= \\\n    ./xgboost_R.o \\\n    ./xgboost_custom.o \\\n    ./init.o \\\n    $(PKGROOT)/src/metric/metric.o \\\n    $(PKGROOT)/src/metric/elementwise_metric.o \\\n    $(PKGROOT)/src/metric/multiclass_metric.o \\\n    $(PKGROOT)/src/metric/rank_metric.o \\\n    $(PKGROOT)/src/metric/auc.o \\\n    $(PKGROOT)/src/metric/survival_metric.o \\\n    $(PKGROOT)/src/objective/objective.o \\\n    $(PKGROOT)/src/objective/regression_obj.o \\\n    $(PKGROOT)/src/objective/multiclass_obj.o \\\n    $(PKGROOT)/src/objective/lambdarank_obj.o \\\n    $(PKGROOT)/src/objective/hinge.o \\\n    $(PKGROOT)/src/objective/aft_obj.o \\\n    $(PKGROOT)/src/objective/adaptive.o \\\n    $(PKGROOT)/src/objective/init_estimation.o \\\n    $(PKGROOT)/src/objective/quantile_obj.o \\\n    $(PKGROOT)/src/gbm/gbm.o \\\n    $(PKGROOT)/src/gbm/gbtree.o \\\n    $(PKGROOT)/src/gbm/gbtree_model.o \\\n    $(PKGROOT)/src/gbm/gblinear.o \\\n    $(PKGROOT)/src/gbm/gblinear_model.o \\\n    $(PKGROOT)/src/data/adapter.o \\\n    $(PKGROOT)/src/data/array_interface.o \\\n    $(PKGROOT)/src/data/cat_container.o \\\n    $(PKGROOT)/src/data/simple_dmatrix.o \\\n    $(PKGROOT)/src/data/data.o \\\n    $(PKGROOT)/src/data/sparse_page_raw_format.o \\\n    $(PKGROOT)/src/data/ellpack_page.o \\\n    $(PKGROOT)/src/data/file_iterator.o \\\n    $(PKGROOT)/src/data/gradient_index.o \\\n    $(PKGROOT)/src/data/gradient_index_page_source.o \\\n    $(PKGROOT)/src/data/gradient_index_format.o \\\n    $(PKGROOT)/src/data/metainfo.o \\\n    $(PKGROOT)/src/data/sparse_page_dmatrix.o \\\n    $(PKGROOT)/src/data/sparse_page_source.o \\\n    $(PKGROOT)/src/data/extmem_quantile_dmatrix.o \\\n    $(PKGROOT)/src/data/quantile_dmatrix.o \\\n    $(PKGROOT)/src/data/batch_utils.o \\\n    $(PKGROOT)/src/data/proxy_dmatrix.o \\\n    $(PKGROOT)/src/data/iterative_dmatrix.o \\\n    $(PKGROOT)/src/predictor/predictor.o \\\n    $(PKGROOT)/src/predictor/cpu_predictor.o \\\n    $(PKGROOT)/src/predictor/interpretability/shap.o \\\n    $(PKGROOT)/src/predictor/treeshap.o \\\n    $(PKGROOT)/src/tree/constraints.o \\\n    $(PKGROOT)/src/tree/param.o \\\n    $(PKGROOT)/src/tree/fit_stump.o \\\n    $(PKGROOT)/src/tree/tree_model.o \\\n    $(PKGROOT)/src/tree/tree_view.o \\\n    $(PKGROOT)/src/tree/tree_updater.o \\\n    $(PKGROOT)/src/tree/multi_target_tree_model.o \\\n    $(PKGROOT)/src/tree/updater_approx.o \\\n    $(PKGROOT)/src/tree/updater_colmaker.o \\\n    $(PKGROOT)/src/tree/updater_prune.o \\\n    $(PKGROOT)/src/tree/updater_quantile_hist.o \\\n    $(PKGROOT)/src/tree/updater_refresh.o \\\n    $(PKGROOT)/src/tree/updater_sync.o \\\n    $(PKGROOT)/src/tree/hist/hist_param.o \\\n    $(PKGROOT)/src/tree/hist/histogram.o \\\n    $(PKGROOT)/src/tree/hist/sampler.o \\\n    $(PKGROOT)/src/linear/linear_updater.o \\\n    $(PKGROOT)/src/linear/updater_coordinate.o \\\n    $(PKGROOT)/src/linear/updater_shotgun.o \\\n    $(PKGROOT)/src/learner.o \\\n    $(PKGROOT)/src/context.o \\\n    $(PKGROOT)/src/logging.o \\\n    $(PKGROOT)/src/global_config.o \\\n    $(PKGROOT)/src/collective/result.o \\\n    $(PKGROOT)/src/collective/allgather.o \\\n    $(PKGROOT)/src/collective/allreduce.o \\\n    $(PKGROOT)/src/collective/broadcast.o \\\n    $(PKGROOT)/src/collective/comm.o \\\n    $(PKGROOT)/src/collective/comm_group.o \\\n    $(PKGROOT)/src/collective/coll.o \\\n    $(PKGROOT)/src/collective/tracker.o \\\n    $(PKGROOT)/src/collective/in_memory_handler.o \\\n    $(PKGROOT)/src/collective/loop.o \\\n    $(PKGROOT)/src/collective/socket.o \\\n    $(PKGROOT)/src/common/cache_manager.o \\\n    $(PKGROOT)/src/common/charconv.o \\\n    $(PKGROOT)/src/common/column_matrix.o \\\n    $(PKGROOT)/src/common/common.o \\\n    $(PKGROOT)/src/common/cuda_rt_utils.o \\\n    $(PKGROOT)/src/common/error_msg.o \\\n    $(PKGROOT)/src/common/hist_util.o \\\n    $(PKGROOT)/src/common/host_device_vector.o \\\n    $(PKGROOT)/src/common/io.o \\\n    $(PKGROOT)/src/common/json.o \\\n    $(PKGROOT)/src/common/linalg_op.o \\\n    $(PKGROOT)/src/common/numeric.o \\\n    $(PKGROOT)/src/common/optional_weight.o \\\n    $(PKGROOT)/src/common/pseudo_huber.o \\\n    $(PKGROOT)/src/common/quantile.o \\\n    $(PKGROOT)/src/common/random.o \\\n    $(PKGROOT)/src/common/stats.o \\\n    $(PKGROOT)/src/common/survival_util.o \\\n    $(PKGROOT)/src/common/threading_utils.o \\\n    $(PKGROOT)/src/common/ranking_utils.o \\\n    $(PKGROOT)/src/common/param_array.o \\\n    $(PKGROOT)/src/common/expectile_loss_utils.o \\\n    $(PKGROOT)/src/common/quantile_loss_utils.o \\\n    $(PKGROOT)/src/common/timer.o \\\n    $(PKGROOT)/src/common/version.o \\\n    $(PKGROOT)/src/c_api/c_api.o \\\n    $(PKGROOT)/src/c_api/c_api_error.o \\\n    $(PKGROOT)/amalgamation/dmlc-minimum0.o\n"
  },
  {
    "path": "R-package/src/Makevars.win.in",
    "content": "# package root\nPKGROOT=../../\nENABLE_STD_THREAD=0\n# _*_ mode: Makefile; _*_\n\nCXX_STD = CXX17\n\nXGB_RFLAGS = \\\n    -DXGBOOST_STRICT_R_MODE=1 \\\n    -DDMLC_LOG_BEFORE_THROW=0 \\\n    -DDMLC_ENABLE_STD_THREAD=$(ENABLE_STD_THREAD) \\\n    -DDMLC_DISABLE_STDIN=1 \\\n    -DDMLC_LOG_CUSTOMIZE=1\n\n# disable the use of thread_local for 32 bit windows:\nifeq ($(R_OSTYPE)$(WIN),windows)\n    XGB_RFLAGS += -DDMLC_CXX11_THREAD_LOCAL=0\nendif\n$(foreach v, $(XGB_RFLAGS), $(warning $(v)))\n\nPKG_CPPFLAGS = \\\n    -I$(PKGROOT)/include \\\n    -I$(PKGROOT)/dmlc-core/include \\\n    -I$(PKGROOT) \\\n    -DXGBOOST_BUILTIN_PREFETCH_PRESENT=1 \\\n    @XGBOOST_MM_PREFETCH_PRESENT@ \\\n    $(XGB_RFLAGS)\n\nPKG_CXXFLAGS = \\\n    $(SHLIB_OPENMP_CXXFLAGS) \\\n    -DDMLC_CMAKE_LITTLE_ENDIAN=1 \\\n    $(SHLIB_PTHREAD_FLAGS) \\\n    $(CXX_VISIBILITY)\n\nPKG_LIBS = \\\n    $(SHLIB_OPENMP_CXXFLAGS) \\\n    -DDMLC_CMAKE_LITTLE_ENDIAN=1 \\\n    $(SHLIB_PTHREAD_FLAGS) \\\n    -lwsock32 \\\n    -lws2_32\n\nOBJECTS= \\\n    ./xgboost_R.o \\\n    ./xgboost_custom.o \\\n    ./init.o \\\n    $(PKGROOT)/src/metric/metric.o \\\n    $(PKGROOT)/src/metric/elementwise_metric.o \\\n    $(PKGROOT)/src/metric/multiclass_metric.o \\\n    $(PKGROOT)/src/metric/rank_metric.o \\\n    $(PKGROOT)/src/metric/auc.o \\\n    $(PKGROOT)/src/metric/survival_metric.o \\\n    $(PKGROOT)/src/objective/objective.o \\\n    $(PKGROOT)/src/objective/regression_obj.o \\\n    $(PKGROOT)/src/objective/multiclass_obj.o \\\n    $(PKGROOT)/src/objective/lambdarank_obj.o \\\n    $(PKGROOT)/src/objective/hinge.o \\\n    $(PKGROOT)/src/objective/aft_obj.o \\\n    $(PKGROOT)/src/objective/adaptive.o \\\n    $(PKGROOT)/src/objective/init_estimation.o \\\n    $(PKGROOT)/src/objective/quantile_obj.o \\\n    $(PKGROOT)/src/gbm/gbm.o \\\n    $(PKGROOT)/src/gbm/gbtree.o \\\n    $(PKGROOT)/src/gbm/gbtree_model.o \\\n    $(PKGROOT)/src/gbm/gblinear.o \\\n    $(PKGROOT)/src/gbm/gblinear_model.o \\\n    $(PKGROOT)/src/data/adapter.o \\\n    $(PKGROOT)/src/data/array_interface.o \\\n    $(PKGROOT)/src/data/cat_container.o \\\n    $(PKGROOT)/src/data/simple_dmatrix.o \\\n    $(PKGROOT)/src/data/data.o \\\n    $(PKGROOT)/src/data/sparse_page_raw_format.o \\\n    $(PKGROOT)/src/data/ellpack_page.o \\\n    $(PKGROOT)/src/data/file_iterator.o \\\n    $(PKGROOT)/src/data/gradient_index.o \\\n    $(PKGROOT)/src/data/gradient_index_page_source.o \\\n    $(PKGROOT)/src/data/gradient_index_format.o \\\n    $(PKGROOT)/src/data/metainfo.o \\\n    $(PKGROOT)/src/data/sparse_page_dmatrix.o \\\n    $(PKGROOT)/src/data/sparse_page_source.o \\\n    $(PKGROOT)/src/data/extmem_quantile_dmatrix.o \\\n    $(PKGROOT)/src/data/quantile_dmatrix.o \\\n    $(PKGROOT)/src/data/batch_utils.o \\\n    $(PKGROOT)/src/data/proxy_dmatrix.o \\\n    $(PKGROOT)/src/data/iterative_dmatrix.o \\\n    $(PKGROOT)/src/predictor/predictor.o \\\n    $(PKGROOT)/src/predictor/cpu_predictor.o \\\n    $(PKGROOT)/src/predictor/interpretability/shap.o \\\n    $(PKGROOT)/src/predictor/treeshap.o \\\n    $(PKGROOT)/src/tree/constraints.o \\\n    $(PKGROOT)/src/tree/param.o \\\n    $(PKGROOT)/src/tree/fit_stump.o \\\n    $(PKGROOT)/src/tree/tree_model.o \\\n    $(PKGROOT)/src/tree/tree_view.o \\\n    $(PKGROOT)/src/tree/multi_target_tree_model.o \\\n    $(PKGROOT)/src/tree/tree_updater.o \\\n    $(PKGROOT)/src/tree/updater_approx.o \\\n    $(PKGROOT)/src/tree/updater_colmaker.o \\\n    $(PKGROOT)/src/tree/updater_prune.o \\\n    $(PKGROOT)/src/tree/updater_quantile_hist.o \\\n    $(PKGROOT)/src/tree/updater_refresh.o \\\n    $(PKGROOT)/src/tree/updater_sync.o \\\n    $(PKGROOT)/src/tree/hist/hist_param.o \\\n    $(PKGROOT)/src/tree/hist/histogram.o \\\n    $(PKGROOT)/src/tree/hist/sampler.o \\\n    $(PKGROOT)/src/linear/linear_updater.o \\\n    $(PKGROOT)/src/linear/updater_coordinate.o \\\n    $(PKGROOT)/src/linear/updater_shotgun.o \\\n    $(PKGROOT)/src/learner.o \\\n    $(PKGROOT)/src/context.o \\\n    $(PKGROOT)/src/logging.o \\\n    $(PKGROOT)/src/global_config.o \\\n    $(PKGROOT)/src/collective/result.o \\\n    $(PKGROOT)/src/collective/allgather.o \\\n    $(PKGROOT)/src/collective/allreduce.o \\\n    $(PKGROOT)/src/collective/broadcast.o \\\n    $(PKGROOT)/src/collective/comm.o \\\n    $(PKGROOT)/src/collective/comm_group.o \\\n    $(PKGROOT)/src/collective/coll.o \\\n    $(PKGROOT)/src/collective/tracker.o \\\n    $(PKGROOT)/src/collective/in_memory_handler.o \\\n    $(PKGROOT)/src/collective/loop.o \\\n    $(PKGROOT)/src/collective/socket.o \\\n    $(PKGROOT)/src/common/cache_manager.o \\\n    $(PKGROOT)/src/common/charconv.o \\\n    $(PKGROOT)/src/common/column_matrix.o \\\n    $(PKGROOT)/src/common/common.o \\\n    $(PKGROOT)/src/common/cuda_rt_utils.o \\\n    $(PKGROOT)/src/common/error_msg.o \\\n    $(PKGROOT)/src/common/hist_util.o \\\n    $(PKGROOT)/src/common/host_device_vector.o \\\n    $(PKGROOT)/src/common/io.o \\\n    $(PKGROOT)/src/common/json.o \\\n    $(PKGROOT)/src/common/linalg_op.o \\\n    $(PKGROOT)/src/common/numeric.o \\\n    $(PKGROOT)/src/common/optional_weight.o \\\n    $(PKGROOT)/src/common/pseudo_huber.o \\\n    $(PKGROOT)/src/common/quantile.o \\\n    $(PKGROOT)/src/common/random.o \\\n    $(PKGROOT)/src/common/stats.o \\\n    $(PKGROOT)/src/common/survival_util.o \\\n    $(PKGROOT)/src/common/threading_utils.o \\\n    $(PKGROOT)/src/common/ranking_utils.o \\\n    $(PKGROOT)/src/common/param_array.o \\\n    $(PKGROOT)/src/common/expectile_loss_utils.o \\\n    $(PKGROOT)/src/common/quantile_loss_utils.o \\\n    $(PKGROOT)/src/common/timer.o \\\n    $(PKGROOT)/src/common/version.o \\\n    $(PKGROOT)/src/c_api/c_api.o \\\n    $(PKGROOT)/src/c_api/c_api_error.o \\\n    $(PKGROOT)/amalgamation/dmlc-minimum0.o\n"
  },
  {
    "path": "R-package/src/config.h.in",
    "content": "/* src/config.h.in.  Generated from configure.ac by autoheader.  */\n\n/* Define if building universal (internal helper macro) */\n#undef AC_APPLE_UNIVERSAL_BUILD\n\n/* Define to 1 if you have the <inttypes.h> header file. */\n#undef HAVE_INTTYPES_H\n\n/* Define to 1 if you have the <stdint.h> header file. */\n#undef HAVE_STDINT_H\n\n/* Define to 1 if you have the <stdio.h> header file. */\n#undef HAVE_STDIO_H\n\n/* Define to 1 if you have the <stdlib.h> header file. */\n#undef HAVE_STDLIB_H\n\n/* Define to 1 if you have the <strings.h> header file. */\n#undef HAVE_STRINGS_H\n\n/* Define to 1 if you have the <string.h> header file. */\n#undef HAVE_STRING_H\n\n/* Define to 1 if you have the <sys/stat.h> header file. */\n#undef HAVE_SYS_STAT_H\n\n/* Define to 1 if you have the <sys/types.h> header file. */\n#undef HAVE_SYS_TYPES_H\n\n/* Define to 1 if you have the <unistd.h> header file. */\n#undef HAVE_UNISTD_H\n\n/* Define to the address where bug reports for this package should be sent. */\n#undef PACKAGE_BUGREPORT\n\n/* Define to the full name of this package. */\n#undef PACKAGE_NAME\n\n/* Define to the full name and version of this package. */\n#undef PACKAGE_STRING\n\n/* Define to the one symbol short name of this package. */\n#undef PACKAGE_TARNAME\n\n/* Define to the home page for this package. */\n#undef PACKAGE_URL\n\n/* Define to the version of this package. */\n#undef PACKAGE_VERSION\n\n/* Define to 1 if all of the C90 standard headers exist (not just the ones\n   required in a freestanding environment). This macro is provided for\n   backward compatibility; new code need not use it. */\n#undef STDC_HEADERS\n\n/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most\n   significant byte first (like Motorola and SPARC, unlike Intel). */\n#if defined AC_APPLE_UNIVERSAL_BUILD\n# if defined __BIG_ENDIAN__\n#  define WORDS_BIGENDIAN 1\n# endif\n#else\n# ifndef WORDS_BIGENDIAN\n#  undef WORDS_BIGENDIAN\n# endif\n#endif\n"
  },
  {
    "path": "R-package/src/init.c",
    "content": "/* Copyright (c) 2015 by Contributors\n *\n * This file was initially generated using the following R command:\n * tools::package_native_routine_registration_skeleton('.', con = 'src/init.c', character_only = F)\n * and edited to conform to xgboost C linter requirements. For details, see\n * https://cran.r-project.org/doc/manuals/r-release/R-exts.html#Registering-native-routines\n */\n#include <Rinternals.h>\n#include <stdlib.h>\n#include <R_ext/Rdynload.h>\n#include <R_ext/Visibility.h>\n\n/* FIXME:\nCheck these declarations against the C/Fortran source code.\n*/\n\n/* .Call calls */\nextern void XGBInitializeAltrepClass_R(DllInfo *info);\nextern SEXP XGDuplicate_R(SEXP);\nextern SEXP XGPointerEqComparison_R(SEXP, SEXP);\nextern SEXP XGBoosterTrainOneIter_R(SEXP, SEXP, SEXP, SEXP, SEXP);\nextern SEXP XGBoosterCreate_R(SEXP);\nextern SEXP XGBoosterReset_R(SEXP);\nextern SEXP XGBoosterCopyInfoFromDMatrix_R(SEXP, SEXP);\nextern SEXP XGBoosterSetStrFeatureInfo_R(SEXP, SEXP, SEXP);\nextern SEXP XGBoosterGetStrFeatureInfo_R(SEXP, SEXP);\nextern SEXP XGBoosterBoostedRounds_R(SEXP);\nextern SEXP XGBoosterGetNumFeature_R(SEXP);\nextern SEXP XGBoosterDumpModel_R(SEXP, SEXP, SEXP, SEXP);\nextern SEXP XGBoosterEvalOneIter_R(SEXP, SEXP, SEXP, SEXP);\nextern SEXP XGBoosterGetAttrNames_R(SEXP);\nextern SEXP XGBoosterGetAttr_R(SEXP, SEXP);\nextern SEXP XGBoosterLoadModelFromRaw_R(SEXP, SEXP);\nextern SEXP XGBoosterSaveModelToRaw_R(SEXP handle, SEXP config);\nextern SEXP XGBoosterLoadModel_R(SEXP, SEXP);\nextern SEXP XGBoosterSaveJsonConfig_R(SEXP handle);\nextern SEXP XGBoosterLoadJsonConfig_R(SEXP handle, SEXP value);\nextern SEXP XGBoosterSerializeToBuffer_R(SEXP handle);\nextern SEXP XGBoosterUnserializeFromBuffer_R(SEXP handle, SEXP raw);\nextern SEXP XGBoosterPredictFromDMatrix_R(SEXP, SEXP, SEXP);\nextern SEXP XGBoosterPredictFromDense_R(SEXP, SEXP, SEXP, SEXP, SEXP);\nextern SEXP XGBoosterPredictFromCSR_R(SEXP, SEXP, SEXP, SEXP, SEXP);\nextern SEXP XGBoosterPredictFromColumnar_R(SEXP, SEXP, SEXP, SEXP, SEXP);\nextern SEXP XGBoosterSaveModel_R(SEXP, SEXP);\nextern SEXP XGBoosterSetAttr_R(SEXP, SEXP, SEXP);\nextern SEXP XGBoosterSetParam_R(SEXP, SEXP, SEXP);\nextern SEXP XGBoosterUpdateOneIter_R(SEXP, SEXP, SEXP);\nextern SEXP XGCheckNullPtr_R(SEXP);\nextern SEXP XGSetArrayDimNamesInplace_R(SEXP, SEXP);\nextern SEXP XGSetVectorNamesInplace_R(SEXP, SEXP);\nextern SEXP XGDMatrixCreateFromCSC_R(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);\nextern SEXP XGDMatrixCreateFromCSR_R(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);\nextern SEXP XGDMatrixCreateFromURI_R(SEXP, SEXP, SEXP);\nextern SEXP XGDMatrixCreateFromMat_R(SEXP, SEXP, SEXP);\nextern SEXP XGDMatrixGetFloatInfo_R(SEXP, SEXP);\nextern SEXP XGDMatrixGetUIntInfo_R(SEXP, SEXP);\nextern SEXP XGDMatrixCreateFromDF_R(SEXP, SEXP, SEXP);\nextern SEXP XGDMatrixGetStrFeatureInfo_R(SEXP, SEXP);\nextern SEXP XGDMatrixNumCol_R(SEXP);\nextern SEXP XGDMatrixNumRow_R(SEXP);\nextern SEXP XGProxyDMatrixCreate_R(void);\nextern SEXP XGProxyDMatrixSetDataDense_R(SEXP, SEXP);\nextern SEXP XGProxyDMatrixSetDataCSR_R(SEXP, SEXP);\nextern SEXP XGProxyDMatrixSetDataColumnar_R(SEXP, SEXP);\nextern SEXP XGDMatrixCreateFromCallback_R(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);\nextern SEXP XGQuantileDMatrixCreateFromCallback_R(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);\nextern SEXP XGDMatrixFree_R(SEXP);\nextern SEXP XGGetRNAIntAsDouble(void);\nextern SEXP XGDMatrixGetQuantileCut_R(SEXP);\nextern SEXP XGDMatrixNumNonMissing_R(SEXP);\nextern SEXP XGDMatrixGetDataAsCSR_R(SEXP);\nextern SEXP XGDMatrixSaveBinary_R(SEXP, SEXP, SEXP);\nextern SEXP XGDMatrixSetInfo_R(SEXP, SEXP, SEXP);\nextern SEXP XGDMatrixSetStrFeatureInfo_R(SEXP, SEXP, SEXP);\nextern SEXP XGDMatrixSliceDMatrix_R(SEXP, SEXP, SEXP);\nextern SEXP XGBSetGlobalConfig_R(SEXP);\nextern SEXP XGBGetGlobalConfig_R(void);\nextern SEXP XGBoosterFeatureScore_R(SEXP, SEXP);\nextern SEXP XGBoosterSlice_R(SEXP, SEXP, SEXP, SEXP);\nextern SEXP XGBoosterSliceAndReplace_R(SEXP, SEXP, SEXP, SEXP);\n\nstatic const R_CallMethodDef CallEntries[] = {\n  {\"XGDuplicate_R\",               (DL_FUNC) &XGDuplicate_R,               1},\n  {\"XGPointerEqComparison_R\",     (DL_FUNC) &XGPointerEqComparison_R,     2},\n  {\"XGBoosterTrainOneIter_R\",     (DL_FUNC) &XGBoosterTrainOneIter_R,     5},\n  {\"XGBoosterCreate_R\",           (DL_FUNC) &XGBoosterCreate_R,           1},\n  {\"XGBoosterReset_R\",            (DL_FUNC) &XGBoosterReset_R,            1},\n  {\"XGBoosterCopyInfoFromDMatrix_R\", (DL_FUNC) &XGBoosterCopyInfoFromDMatrix_R, 2},\n  {\"XGBoosterSetStrFeatureInfo_R\",(DL_FUNC) &XGBoosterSetStrFeatureInfo_R,3},  // NOLINT\n  {\"XGBoosterGetStrFeatureInfo_R\",(DL_FUNC) &XGBoosterGetStrFeatureInfo_R,2},  // NOLINT\n  {\"XGBoosterBoostedRounds_R\",    (DL_FUNC) &XGBoosterBoostedRounds_R,    1},\n  {\"XGBoosterGetNumFeature_R\",    (DL_FUNC) &XGBoosterGetNumFeature_R,    1},\n  {\"XGBoosterDumpModel_R\",        (DL_FUNC) &XGBoosterDumpModel_R,        4},\n  {\"XGBoosterEvalOneIter_R\",      (DL_FUNC) &XGBoosterEvalOneIter_R,      4},\n  {\"XGBoosterGetAttrNames_R\",     (DL_FUNC) &XGBoosterGetAttrNames_R,     1},\n  {\"XGBoosterGetAttr_R\",          (DL_FUNC) &XGBoosterGetAttr_R,          2},\n  {\"XGBoosterLoadModelFromRaw_R\", (DL_FUNC) &XGBoosterLoadModelFromRaw_R, 2},\n  {\"XGBoosterSaveModelToRaw_R\",   (DL_FUNC) &XGBoosterSaveModelToRaw_R,   2},\n  {\"XGBoosterLoadModel_R\",        (DL_FUNC) &XGBoosterLoadModel_R,        2},\n  {\"XGBoosterSaveJsonConfig_R\",   (DL_FUNC) &XGBoosterSaveJsonConfig_R,   1},\n  {\"XGBoosterLoadJsonConfig_R\",   (DL_FUNC) &XGBoosterLoadJsonConfig_R,   2},\n  {\"XGBoosterSerializeToBuffer_R\",     (DL_FUNC) &XGBoosterSerializeToBuffer_R,     1},\n  {\"XGBoosterUnserializeFromBuffer_R\", (DL_FUNC) &XGBoosterUnserializeFromBuffer_R, 2},\n  {\"XGBoosterPredictFromDMatrix_R\", (DL_FUNC) &XGBoosterPredictFromDMatrix_R, 3},\n  {\"XGBoosterPredictFromDense_R\", (DL_FUNC) &XGBoosterPredictFromDense_R, 5},\n  {\"XGBoosterPredictFromCSR_R\",   (DL_FUNC) &XGBoosterPredictFromCSR_R,   5},\n  {\"XGBoosterPredictFromColumnar_R\", (DL_FUNC) &XGBoosterPredictFromColumnar_R, 5},\n  {\"XGBoosterSaveModel_R\",        (DL_FUNC) &XGBoosterSaveModel_R,        2},\n  {\"XGBoosterSetAttr_R\",          (DL_FUNC) &XGBoosterSetAttr_R,          3},\n  {\"XGBoosterSetParam_R\",         (DL_FUNC) &XGBoosterSetParam_R,         3},\n  {\"XGBoosterUpdateOneIter_R\",    (DL_FUNC) &XGBoosterUpdateOneIter_R,    3},\n  {\"XGCheckNullPtr_R\",            (DL_FUNC) &XGCheckNullPtr_R,            1},\n  {\"XGSetArrayDimNamesInplace_R\", (DL_FUNC) &XGSetArrayDimNamesInplace_R, 2},\n  {\"XGSetVectorNamesInplace_R\",   (DL_FUNC) &XGSetVectorNamesInplace_R,   2},\n  {\"XGDMatrixCreateFromCSC_R\",    (DL_FUNC) &XGDMatrixCreateFromCSC_R,    6},\n  {\"XGDMatrixCreateFromCSR_R\",    (DL_FUNC) &XGDMatrixCreateFromCSR_R,    6},\n  {\"XGDMatrixCreateFromURI_R\",    (DL_FUNC) &XGDMatrixCreateFromURI_R,    3},\n  {\"XGDMatrixCreateFromMat_R\",    (DL_FUNC) &XGDMatrixCreateFromMat_R,    3},\n  {\"XGDMatrixGetFloatInfo_R\",     (DL_FUNC) &XGDMatrixGetFloatInfo_R,     2},\n  {\"XGDMatrixGetUIntInfo_R\",      (DL_FUNC) &XGDMatrixGetUIntInfo_R,      2},\n  {\"XGDMatrixCreateFromDF_R\",     (DL_FUNC) &XGDMatrixCreateFromDF_R,     3},\n  {\"XGDMatrixGetStrFeatureInfo_R\", (DL_FUNC) &XGDMatrixGetStrFeatureInfo_R, 2},\n  {\"XGDMatrixNumCol_R\",           (DL_FUNC) &XGDMatrixNumCol_R,           1},\n  {\"XGDMatrixNumRow_R\",           (DL_FUNC) &XGDMatrixNumRow_R,           1},\n  {\"XGProxyDMatrixCreate_R\",      (DL_FUNC) &XGProxyDMatrixCreate_R,      0},\n  {\"XGProxyDMatrixSetDataDense_R\", (DL_FUNC) &XGProxyDMatrixSetDataDense_R, 2},\n  {\"XGProxyDMatrixSetDataCSR_R\",  (DL_FUNC) &XGProxyDMatrixSetDataCSR_R,  2},\n  {\"XGProxyDMatrixSetDataColumnar_R\", (DL_FUNC) &XGProxyDMatrixSetDataColumnar_R, 2},\n  {\"XGDMatrixCreateFromCallback_R\", (DL_FUNC) &XGDMatrixCreateFromCallback_R, 7},\n  {\"XGQuantileDMatrixCreateFromCallback_R\", (DL_FUNC) &XGQuantileDMatrixCreateFromCallback_R, 8},\n  {\"XGDMatrixFree_R\",             (DL_FUNC) &XGDMatrixFree_R,             1},\n  {\"XGGetRNAIntAsDouble\",         (DL_FUNC) &XGGetRNAIntAsDouble,         0},\n  {\"XGDMatrixGetQuantileCut_R\",   (DL_FUNC) &XGDMatrixGetQuantileCut_R,   1},\n  {\"XGDMatrixNumNonMissing_R\",    (DL_FUNC) &XGDMatrixNumNonMissing_R,    1},\n  {\"XGDMatrixGetDataAsCSR_R\",     (DL_FUNC) &XGDMatrixGetDataAsCSR_R,     1},\n  {\"XGDMatrixSaveBinary_R\",       (DL_FUNC) &XGDMatrixSaveBinary_R,       3},\n  {\"XGDMatrixSetInfo_R\",          (DL_FUNC) &XGDMatrixSetInfo_R,          3},\n  {\"XGDMatrixSetStrFeatureInfo_R\", (DL_FUNC) &XGDMatrixSetStrFeatureInfo_R, 3},\n  {\"XGDMatrixSliceDMatrix_R\",     (DL_FUNC) &XGDMatrixSliceDMatrix_R,     3},\n  {\"XGBSetGlobalConfig_R\",        (DL_FUNC) &XGBSetGlobalConfig_R,        1},\n  {\"XGBGetGlobalConfig_R\",        (DL_FUNC) &XGBGetGlobalConfig_R,        0},\n  {\"XGBoosterFeatureScore_R\",     (DL_FUNC) &XGBoosterFeatureScore_R,     2},\n  {\"XGBoosterSlice_R\",            (DL_FUNC) &XGBoosterSlice_R,            4},\n  {\"XGBoosterSliceAndReplace_R\",  (DL_FUNC) &XGBoosterSliceAndReplace_R,  4},\n  {NULL, NULL, 0}\n};\n\n#if defined(_WIN32)\n__declspec(dllexport)\n#endif  // defined(_WIN32)\nvoid attribute_visible R_init_xgboost(DllInfo *dll) {\n  R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);\n  R_useDynamicSymbols(dll, FALSE);\n  XGBInitializeAltrepClass_R(dll);\n}\n"
  },
  {
    "path": "R-package/src/xgboost-win.def",
    "content": "LIBRARY xgboost.dll\nEXPORTS\n R_init_xgboost\n"
  },
  {
    "path": "R-package/src/xgboost_R.cc",
    "content": "/**\n * Copyright 2014-2024, XGBoost Contributors\n */\n#include <dmlc/common.h>\n#include <dmlc/omp.h>\n#include <xgboost/c_api.h>\n#include <xgboost/context.h>\n#include <xgboost/data.h>\n#include <xgboost/logging.h>\n\n#include <algorithm>\n#include <cmath>\n#include <cstdint>\n#include <cstdio>\n#include <cstring>\n#include <memory>\n#include <limits>\n#include <sstream>\n#include <string>\n#include <utility>\n#include <vector>\n\n#include \"../../src/c_api/c_api_error.h\"\n#include \"../../src/c_api/c_api_utils.h\"  // MakeSparseFromPtr\n#include \"../../src/common/threading_utils.h\"\n#include \"../../src/data/array_interface.h\"  // for ArrayInterface\n\n#include \"./xgboost_R.h\"  // Must follow other includes.\n\n#ifdef _MSC_VER\n#error \"Compilation of R package with MSVC is not supported due to issues handling R headers\"\n#endif\n\nnamespace {\n\n/* Note: this class is used as a throwable exception.\nSome xgboost C functions that use callbacks will catch exceptions\nthat happen inside of the callback execution, hence it purposefully\ndoesn't inherit from 'std::exception' even if used as such. */\nstruct ErrorWithUnwind {};\n\nvoid ThrowExceptionFromRError(void *, Rboolean jump) {\n  if (jump) {\n    throw ErrorWithUnwind();\n  }\n}\n\nstruct PtrToConstChar {\n  const char *ptr;\n};\n\nSEXP WrappedMkChar(void *void_ptr) {\n  return Rf_mkChar(static_cast<PtrToConstChar*>(void_ptr)->ptr);\n}\n\nSEXP SafeMkChar(const char *c_str, SEXP continuation_token) {\n  PtrToConstChar ptr_struct{c_str};\n  return R_UnwindProtect(\n    WrappedMkChar, static_cast<void*>(&ptr_struct),\n    ThrowExceptionFromRError, nullptr,\n    continuation_token);\n}\n\nstruct RFunAndEnv {\n  SEXP R_fun;\n  SEXP R_calling_env;\n};\n\nSEXP WrappedExecFun(void *void_ptr) {\n  RFunAndEnv *r_fun_and_env = static_cast<RFunAndEnv*>(void_ptr);\n  SEXP f_expr = Rf_protect(Rf_lang1(r_fun_and_env->R_fun));\n  SEXP out = Rf_protect(Rf_eval(f_expr, r_fun_and_env->R_calling_env));\n  Rf_unprotect(2);\n  return out;\n}\n\nSEXP SafeExecFun(SEXP R_fun, SEXP R_calling_env, SEXP continuation_token) {\n  RFunAndEnv r_fun_and_env{R_fun, R_calling_env};\n  return R_UnwindProtect(\n    WrappedExecFun, static_cast<void*>(&r_fun_and_env),\n    ThrowExceptionFromRError, nullptr,\n    continuation_token);\n}\n\nSEXP WrappedAllocReal(void *void_ptr) {\n  size_t *size = static_cast<size_t*>(void_ptr);\n  return Rf_allocVector(REALSXP, *size);\n}\n\nSEXP SafeAllocReal(size_t size, SEXP continuation_token) {\n  return R_UnwindProtect(\n    WrappedAllocReal, static_cast<void*>(&size),\n    ThrowExceptionFromRError, nullptr,\n    continuation_token);\n}\n\nSEXP WrappedAllocInteger(void *void_ptr) {\n  size_t *size = static_cast<size_t*>(void_ptr);\n  return Rf_allocVector(INTSXP, *size);\n}\n\nSEXP SafeAllocInteger(size_t size, SEXP continuation_token) {\n  return R_UnwindProtect(\n    WrappedAllocInteger, static_cast<void*>(&size),\n    ThrowExceptionFromRError, nullptr,\n    continuation_token);\n}\n\n[[nodiscard]] std::string MakeArrayInterfaceFromRMat(SEXP R_mat) {\n  SEXP mat_dims = Rf_getAttrib(R_mat, R_DimSymbol);\n  if (Rf_xlength(mat_dims) > 2) {\n    LOG(FATAL) << \"Passed input array with more than two dimensions, which is not supported.\";\n  }\n  const int *ptr_mat_dims = INTEGER(mat_dims);\n\n  // Lambda for type dispatch.\n  auto make_matrix = [=](auto const *ptr) {\n    using namespace xgboost;  // NOLINT\n    using T = std::remove_pointer_t<decltype(ptr)>;\n\n    auto m = linalg::MatrixView<T>{\n        common::Span{ptr,\n          static_cast<std::size_t>(ptr_mat_dims[0]) * static_cast<std::size_t>(ptr_mat_dims[1])},\n        {ptr_mat_dims[0], ptr_mat_dims[1]},  // Shape\n        DeviceOrd::CPU(),\n        linalg::Order::kF  // R uses column-major\n    };\n    CHECK(m.FContiguous());\n    return linalg::ArrayInterfaceStr(m);\n  };\n\n  const SEXPTYPE arr_type = TYPEOF(R_mat);\n  switch (arr_type) {\n    case REALSXP:\n      return make_matrix(REAL(R_mat));\n    case INTSXP:\n      return make_matrix(INTEGER(R_mat));\n    case LGLSXP:\n      return make_matrix(LOGICAL(R_mat));\n    default:\n      LOG(FATAL) << \"Array or matrix has unsupported type.\";\n  }\n\n  LOG(FATAL) << \"Not reachable\";\n  return \"\";\n}\n\n[[nodiscard]] std::string MakeArrayInterfaceFromRVector(SEXP R_vec) {\n  const size_t vec_len = Rf_xlength(R_vec);\n\n  // Lambda for type dispatch.\n  auto make_vec = [=](auto const *ptr) {\n    using namespace xgboost;  // NOLINT\n    auto v = linalg::MakeVec(ptr, vec_len);\n    return linalg::ArrayInterfaceStr(v);\n  };\n\n  const SEXPTYPE arr_type = TYPEOF(R_vec);\n  switch (arr_type) {\n    case REALSXP:\n      return make_vec(REAL(R_vec));\n    case INTSXP:\n      return make_vec(INTEGER(R_vec));\n    case LGLSXP:\n      return make_vec(LOGICAL(R_vec));\n    default:\n      LOG(FATAL) << \"Array or matrix has unsupported type.\";\n  }\n\n  LOG(FATAL) << \"Not reachable\";\n  return \"\";\n}\n\n[[nodiscard]] std::string MakeArrayInterfaceFromRDataFrame(SEXP R_df) {\n  auto make_vec = [&](auto const *ptr, std::size_t len) {\n    auto v = xgboost::linalg::MakeVec(ptr, len);\n    return xgboost::linalg::ArrayInterface(v);\n  };\n\n  R_xlen_t n_features = Rf_xlength(R_df);\n  std::vector<xgboost::Json> array(n_features);\n  CHECK_GT(n_features, 0);\n  std::size_t len = Rf_xlength(VECTOR_ELT(R_df, 0));\n\n  // The `data.frame` in R actually converts all data into numeric. The other type\n  // handlers here are not used. At the moment they are kept as a reference for when we\n  // can avoid making data copies during transformation.\n  for (R_xlen_t i = 0; i < n_features; ++i) {\n    switch (TYPEOF(VECTOR_ELT(R_df, i))) {\n      case INTSXP: {\n        auto const *ptr = INTEGER(VECTOR_ELT(R_df, i));\n        array[i] = make_vec(ptr, len);\n        break;\n      }\n      case REALSXP: {\n        auto const *ptr = REAL(VECTOR_ELT(R_df, i));\n        array[i] = make_vec(ptr, len);\n        break;\n      }\n      case LGLSXP: {\n        auto const *ptr = LOGICAL(VECTOR_ELT(R_df, i));\n        array[i] = make_vec(ptr, len);\n        break;\n      }\n      default: {\n        LOG(FATAL) << \"data.frame has unsupported type.\";\n      }\n    }\n  }\n\n  xgboost::Json jinterface{std::move(array)};\n  return xgboost::Json::Dump(jinterface);\n}\n\nvoid AddMissingToJson(xgboost::Json *jconfig, SEXP missing, SEXPTYPE arr_type) {\n  if (Rf_isNull(missing) || ISNAN(Rf_asReal(missing))) {\n    // missing is not specified\n    if (arr_type == REALSXP) {\n      (*jconfig)[\"missing\"] = std::numeric_limits<double>::quiet_NaN();\n    } else {\n      (*jconfig)[\"missing\"] = R_NaInt;\n    }\n  } else {\n    // missing specified\n    (*jconfig)[\"missing\"] = Rf_asReal(missing);\n  }\n}\n\n[[nodiscard]] std::string MakeJsonConfigForArray(SEXP missing, SEXP n_threads, SEXPTYPE arr_type) {\n  using namespace ::xgboost;  // NOLINT\n  Json jconfig{Object{}};\n  AddMissingToJson(&jconfig, missing, arr_type);\n  jconfig[\"nthread\"] = Rf_asInteger(n_threads);\n  return Json::Dump(jconfig);\n}\n\n// Allocate a R vector and copy an array interface encoded object to it.\n[[nodiscard]] SEXP CopyArrayToR(const char *array_str, SEXP ctoken) {\n  xgboost::ArrayInterface<1> array{xgboost::StringView{array_str}};\n  // R supports only int and double.\n  bool is_int_type =\n      xgboost::DispatchDType(array.type, [](auto t) { return std::is_integral_v<decltype(t)>; });\n  bool is_float = xgboost::DispatchDType(\n      array.type, [](auto v) { return std::is_floating_point_v<decltype(v)>; });\n  CHECK(is_int_type || is_float) << \"Internal error: Invalid DType.\";\n  CHECK(array.is_contiguous) << \"Internal error: Return by XGBoost should be contiguous\";\n\n  // Note: the only case in which this will receive an integer type is\n  // for the 'indptr' part of the quantile cut outputs, which comes\n  // in sorted order, so the last element contains the maximum value.\n  bool fits_into_C_int = xgboost::DispatchDType(array.type, [&](auto t) {\n    using T = decltype(t);\n    if (!std::is_integral_v<decltype(t)>) {\n      return false;\n    }\n    auto ptr = static_cast<T const *>(array.data);\n    T last_elt = ptr[array.n - 1];\n    if (last_elt < 0) {\n      last_elt = -last_elt;  // no std::abs overload for all possible types\n    }\n    return last_elt <= std::numeric_limits<int>::max();\n  });\n  bool use_int = is_int_type && fits_into_C_int;\n\n  // Allocate memory in R\n  SEXP out =\n      Rf_protect(use_int ? SafeAllocInteger(array.n, ctoken) : SafeAllocReal(array.n, ctoken));\n\n  xgboost::DispatchDType(array.type, [&](auto t) {\n    using T = decltype(t);\n    auto in_ptr = static_cast<T const *>(array.data);\n    if (use_int) {\n      auto out_ptr = INTEGER(out);\n      std::copy_n(in_ptr, array.n, out_ptr);\n    } else {\n      auto out_ptr = REAL(out);\n      std::copy_n(in_ptr, array.n, out_ptr);\n    }\n  });\n\n  Rf_unprotect(1);\n  return out;\n}\n}  // namespace\n\n/*!\n * \\brief macro to annotate begin of api\n */\n#define R_API_BEGIN()                           \\\n  try {                                         \\\n/* Note: an R error triggers a long jump, hence all C++ objects that\nallocated memory through non-R allocators, including the exception\nobject, need to be destructed before triggering the R error.\nIn order to preserve the error message, it gets copied to a temporary\nbuffer, and the R error section is reached through a 'goto' statement\nthat bypasses usual function control flow. */\nnamespace {\nconstexpr std::size_t MsgSize = 512;\nchar cpp_ex_msg[MsgSize];\n}  // anonymous namespace\n\n/*!\n * \\brief macro to annotate end of api\n */\n#define R_API_END()                                  \\\n  }                                                  \\\n  catch (std::exception & e) {                       \\\n    cpp_ex_msg[MsgSize - 1] = 0;                     \\\n    std::strncpy(cpp_ex_msg, e.what(), MsgSize - 1); \\\n    goto throw_cpp_ex_as_R_err;                      \\\n  }                                                  \\\n  if (false) {                                       \\\n  throw_cpp_ex_as_R_err:                             \\\n    Rf_error(\"%s\", cpp_ex_msg);                      \\\n  }\n\n/**\n * @brief Macro for checking XGBoost return code.\n */\n#define CHECK_CALL(__rc)               \\\n  if ((__rc) != 0) {                   \\\n    Rf_error(\"%s\", XGBGetLastError()); \\\n  }\n\nusing dmlc::BeginPtr;\n\nXGB_DLL SEXP XGCheckNullPtr_R(SEXP handle) {\n  return Rf_ScalarLogical(R_ExternalPtrAddr(handle) == nullptr);\n}\n\nXGB_DLL SEXP XGSetArrayDimNamesInplace_R(SEXP arr, SEXP dim_names) {\n  Rf_setAttrib(arr, R_DimNamesSymbol, dim_names);\n  return R_NilValue;\n}\n\nXGB_DLL SEXP XGSetVectorNamesInplace_R(SEXP arr, SEXP names) {\n  Rf_setAttrib(arr, R_NamesSymbol, names);\n  return R_NilValue;\n}\n\nnamespace {\nvoid _DMatrixFinalizer(SEXP ext) {\n  R_API_BEGIN();\n  if (R_ExternalPtrAddr(ext) == NULL) return;\n  CHECK_CALL(XGDMatrixFree(R_ExternalPtrAddr(ext)));\n  R_ClearExternalPtr(ext);\n  R_API_END();\n}\n} /* namespace */\n\nXGB_DLL SEXP XGBSetGlobalConfig_R(SEXP json_str) {\n  R_API_BEGIN();\n  CHECK_CALL(XGBSetGlobalConfig(CHAR(Rf_asChar(json_str))));\n  R_API_END();\n  return R_NilValue;\n}\n\nXGB_DLL SEXP XGBGetGlobalConfig_R() {\n  const char* json_str;\n  R_API_BEGIN();\n  CHECK_CALL(XGBGetGlobalConfig(&json_str));\n  R_API_END();\n  return Rf_mkString(json_str);\n}\n\nXGB_DLL SEXP XGDMatrixCreateFromURI_R(SEXP uri, SEXP silent, SEXP data_split_mode) {\n  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));\n  SEXP uri_char = Rf_protect(Rf_asChar(uri));\n  const char *uri_ptr = CHAR(uri_char);\n  R_API_BEGIN();\n  xgboost::Json jconfig{xgboost::Object{}};\n  jconfig[\"uri\"] = std::string(uri_ptr);\n  jconfig[\"silent\"] = Rf_asLogical(silent);\n  jconfig[\"data_split_mode\"] = Rf_asInteger(data_split_mode);\n  const std::string sconfig = xgboost::Json::Dump(jconfig);\n  DMatrixHandle handle;\n  CHECK_CALL(XGDMatrixCreateFromURI(sconfig.c_str(), &handle));\n  R_SetExternalPtrAddr(ret, handle);\n  R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);\n  R_API_END();\n  Rf_unprotect(2);\n  return ret;\n}\n\nXGB_DLL SEXP XGDMatrixCreateFromMat_R(SEXP mat, SEXP missing, SEXP n_threads) {\n  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));\n  R_API_BEGIN();\n\n  DMatrixHandle handle;\n  int res_code;\n  {\n    auto array_str = MakeArrayInterfaceFromRMat(mat);\n    auto config_str = MakeJsonConfigForArray(missing, n_threads, TYPEOF(mat));\n\n    res_code = XGDMatrixCreateFromDense(array_str.c_str(), config_str.c_str(), &handle);\n  }\n  CHECK_CALL(res_code);\n  R_SetExternalPtrAddr(ret, handle);\n  R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);\n  R_API_END();\n  Rf_unprotect(1);\n  return ret;\n}\n\nXGB_DLL SEXP XGDMatrixCreateFromDF_R(SEXP df, SEXP missing, SEXP n_threads) {\n  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));\n  R_API_BEGIN();\n\n  DMatrixHandle handle;\n  std::int32_t rc{0};\n  {\n    const std::string sinterface = MakeArrayInterfaceFromRDataFrame(df);\n    xgboost::Json jconfig{xgboost::Object{}};\n    jconfig[\"missing\"] = Rf_asReal(missing);\n    jconfig[\"nthread\"] = Rf_asInteger(n_threads);\n    std::string sconfig = xgboost::Json::Dump(jconfig);\n\n    rc = XGDMatrixCreateFromColumnar(sinterface.c_str(), sconfig.c_str(), &handle);\n  }\n\n  CHECK_CALL(rc);\n  R_SetExternalPtrAddr(ret, handle);\n  R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);\n  R_API_END();\n  Rf_unprotect(1);\n\n  return ret;\n}\n\nnamespace {\nvoid CreateFromSparse(SEXP indptr, SEXP indices, SEXP data, std::string *indptr_str,\n                      std::string *indices_str, std::string *data_str) {\n  const int *p_indptr = INTEGER(indptr);\n  const int *p_indices = INTEGER(indices);\n  const double *p_data = REAL(data);\n\n  auto nindptr = static_cast<std::size_t>(Rf_xlength(indptr));\n  auto ndata = static_cast<std::size_t>(Rf_xlength(data));\n  CHECK_EQ(ndata, p_indptr[nindptr - 1]);\n  xgboost::detail::MakeSparseFromPtr(p_indptr, p_indices, p_data, nindptr, indptr_str, indices_str,\n                                     data_str);\n}\n}  // namespace\n\nXGB_DLL SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, SEXP indices, SEXP data, SEXP num_row,\n                                      SEXP missing, SEXP n_threads) {\n  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));\n  R_API_BEGIN();\n  std::int32_t threads = Rf_asInteger(n_threads);\n  DMatrixHandle handle;\n\n  int res_code;\n  {\n    using xgboost::Integer;\n    using xgboost::Json;\n    using xgboost::Object;\n    std::string sindptr, sindices, sdata;\n    CreateFromSparse(indptr, indices, data, &sindptr, &sindices, &sdata);\n    auto nrow = static_cast<std::size_t>(INTEGER(num_row)[0]);\n\n    Json jconfig{Object{}};\n    // Construct configuration\n    jconfig[\"nthread\"] = Integer{threads};\n    AddMissingToJson(&jconfig, missing, TYPEOF(data));\n    std::string config;\n    Json::Dump(jconfig, &config);\n    res_code = XGDMatrixCreateFromCSC(sindptr.c_str(), sindices.c_str(), sdata.c_str(), nrow,\n                                      config.c_str(), &handle);\n  }\n  CHECK_CALL(res_code);\n\n  R_SetExternalPtrAddr(ret, handle);\n  R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);\n  R_API_END();\n  Rf_unprotect(1);\n  return ret;\n}\n\nXGB_DLL SEXP XGDMatrixCreateFromCSR_R(SEXP indptr, SEXP indices, SEXP data, SEXP num_col,\n                                      SEXP missing, SEXP n_threads) {\n  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));\n  R_API_BEGIN();\n  std::int32_t threads = Rf_asInteger(n_threads);\n  DMatrixHandle handle;\n\n  int res_code;\n  {\n    using xgboost::Integer;\n    using xgboost::Json;\n    using xgboost::Object;\n\n    std::string sindptr, sindices, sdata;\n    CreateFromSparse(indptr, indices, data, &sindptr, &sindices, &sdata);\n    auto ncol = static_cast<std::size_t>(INTEGER(num_col)[0]);\n\n    Json jconfig{Object{}};\n    // Construct configuration\n    jconfig[\"nthread\"] = Integer{threads};\n    AddMissingToJson(&jconfig, missing, TYPEOF(data));\n    std::string config;\n    Json::Dump(jconfig, &config);\n    res_code = XGDMatrixCreateFromCSR(sindptr.c_str(), sindices.c_str(), sdata.c_str(), ncol,\n                                      config.c_str(), &handle);\n  }\n  CHECK_CALL(res_code);\n  R_SetExternalPtrAddr(ret, handle);\n  R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);\n  R_API_END();\n  Rf_unprotect(1);\n  return ret;\n}\n\nXGB_DLL SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset, SEXP allow_groups) {\n  SEXP ret = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));\n  R_API_BEGIN();\n  R_xlen_t len = Rf_xlength(idxset);\n  const int *idxset_ = INTEGER(idxset);\n  DMatrixHandle res;\n\n  int res_code;\n  {\n    std::vector<int> idxvec(len);\n    #ifndef _MSC_VER\n    #pragma omp simd\n    #endif\n    for (R_xlen_t i = 0; i < len; ++i) {\n      idxvec[i] = idxset_[i] - 1;\n    }\n    res_code = XGDMatrixSliceDMatrixEx(R_ExternalPtrAddr(handle),\n                                       BeginPtr(idxvec), len,\n                                       &res,\n                                       Rf_asLogical(allow_groups));\n  }\n  CHECK_CALL(res_code);\n  R_SetExternalPtrAddr(ret, res);\n  R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);\n  R_API_END();\n  Rf_unprotect(1);\n  return ret;\n}\n\nXGB_DLL SEXP XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) {\n  R_API_BEGIN();\n  CHECK_CALL(XGDMatrixSaveBinary(R_ExternalPtrAddr(handle),\n                                 CHAR(Rf_asChar(fname)),\n                                 Rf_asInteger(silent)));\n  R_API_END();\n  return R_NilValue;\n}\n\nXGB_DLL SEXP XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) {\n  R_API_BEGIN();\n  SEXP field_ = Rf_protect(Rf_asChar(field));\n  SEXP arr_dim = Rf_getAttrib(array, R_DimSymbol);\n  int res_code;\n  {\n    const std::string array_str = Rf_isNull(arr_dim)?\n      MakeArrayInterfaceFromRVector(array) : MakeArrayInterfaceFromRMat(array);\n    res_code = XGDMatrixSetInfoFromInterface(\n      R_ExternalPtrAddr(handle), CHAR(field_), array_str.c_str());\n  }\n  CHECK_CALL(res_code);\n  Rf_unprotect(1);\n  R_API_END();\n  return R_NilValue;\n}\n\nXGB_DLL SEXP XGDMatrixSetStrFeatureInfo_R(SEXP handle, SEXP field, SEXP array) {\n  R_API_BEGIN();\n  size_t len{0};\n  if (!Rf_isNull(array)) {\n    len = Rf_xlength(array);\n  }\n\n  SEXP str_info_holder = Rf_protect(Rf_allocVector(VECSXP, len));\n  if (TYPEOF(array) == STRSXP) {\n    for (size_t i = 0; i < len; ++i) {\n      SET_VECTOR_ELT(str_info_holder, i, STRING_ELT(array, i));\n    }\n  } else {\n    for (size_t i = 0; i < len; ++i) {\n      SET_VECTOR_ELT(str_info_holder, i, Rf_asChar(VECTOR_ELT(array, i)));\n    }\n  }\n\n  SEXP field_ = Rf_protect(Rf_asChar(field));\n  const char *name = CHAR(field_);\n  int res_code;\n  {\n    std::vector<std::string> str_info;\n    str_info.reserve(len);\n    for (size_t i = 0; i < len; ++i) {\n      str_info.emplace_back(CHAR(VECTOR_ELT(str_info_holder, i)));\n    }\n    std::vector<char const*> vec(len);\n    std::transform(str_info.cbegin(), str_info.cend(), vec.begin(),\n                   [](std::string const &str) { return str.c_str(); });\n    res_code = XGDMatrixSetStrFeatureInfo(R_ExternalPtrAddr(handle), name, vec.data(), len);\n  }\n  CHECK_CALL(res_code);\n  Rf_unprotect(2);\n  R_API_END();\n  return R_NilValue;\n}\n\nXGB_DLL SEXP XGDMatrixGetStrFeatureInfo_R(SEXP handle, SEXP field) {\n  SEXP ret;\n  R_API_BEGIN();\n  char const **out_features{nullptr};\n  bst_ulong len{0};\n  const char *name = CHAR(Rf_asChar(field));\n  XGDMatrixGetStrFeatureInfo(R_ExternalPtrAddr(handle), name, &len, &out_features);\n\n  if (len > 0) {\n    ret = Rf_protect(Rf_allocVector(STRSXP, len));\n    for (size_t i = 0; i < len; ++i) {\n      SET_STRING_ELT(ret, i, Rf_mkChar(out_features[i]));\n    }\n  } else {\n    ret = Rf_protect(R_NilValue);\n  }\n  R_API_END();\n  Rf_unprotect(1);\n  return ret;\n}\n\nXGB_DLL SEXP XGDMatrixGetFloatInfo_R(SEXP handle, SEXP field) {\n  SEXP ret;\n  R_API_BEGIN();\n  bst_ulong olen;\n  const float *res;\n  CHECK_CALL(XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle), CHAR(Rf_asChar(field)), &olen, &res));\n  ret = Rf_protect(Rf_allocVector(REALSXP, olen));\n  std::copy(res, res + olen, REAL(ret));\n  R_API_END();\n  Rf_unprotect(1);\n  return ret;\n}\n\nXGB_DLL SEXP XGDMatrixGetUIntInfo_R(SEXP handle, SEXP field) {\n  SEXP ret;\n  R_API_BEGIN();\n  bst_ulong olen;\n  const unsigned *res;\n  CHECK_CALL(XGDMatrixGetUIntInfo(R_ExternalPtrAddr(handle), CHAR(Rf_asChar(field)), &olen, &res));\n  ret = Rf_protect(Rf_allocVector(INTSXP, olen));\n  std::copy(res, res + olen, INTEGER(ret));\n  R_API_END();\n  Rf_unprotect(1);\n  return ret;\n}\n\nXGB_DLL SEXP XGDMatrixNumRow_R(SEXP handle) {\n  bst_ulong nrow;\n  R_API_BEGIN();\n  CHECK_CALL(XGDMatrixNumRow(R_ExternalPtrAddr(handle), &nrow));\n  R_API_END();\n  return Rf_ScalarInteger(static_cast<int>(nrow));\n}\n\nXGB_DLL SEXP XGDMatrixNumCol_R(SEXP handle) {\n  bst_ulong ncol;\n  R_API_BEGIN();\n  CHECK_CALL(XGDMatrixNumCol(R_ExternalPtrAddr(handle), &ncol));\n  R_API_END();\n  return Rf_ScalarInteger(static_cast<int>(ncol));\n}\n\nXGB_DLL SEXP XGProxyDMatrixCreate_R() {\n  SEXP out = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));\n  R_API_BEGIN();\n  DMatrixHandle proxy_dmat_handle;\n  CHECK_CALL(XGProxyDMatrixCreate(&proxy_dmat_handle));\n  R_SetExternalPtrAddr(out, proxy_dmat_handle);\n  R_RegisterCFinalizerEx(out, _DMatrixFinalizer, TRUE);\n  R_API_END();\n  Rf_unprotect(1);\n  return out;\n}\n\nXGB_DLL SEXP XGProxyDMatrixSetDataDense_R(SEXP handle, SEXP R_mat) {\n  R_API_BEGIN();\n  DMatrixHandle proxy_dmat = R_ExternalPtrAddr(handle);\n  int res_code;\n  {\n    std::string array_str = MakeArrayInterfaceFromRMat(R_mat);\n    res_code = XGProxyDMatrixSetDataDense(proxy_dmat, array_str.c_str());\n  }\n  CHECK_CALL(res_code);\n  R_API_END();\n  return R_NilValue;\n}\n\nXGB_DLL SEXP XGProxyDMatrixSetDataCSR_R(SEXP handle, SEXP lst) {\n  R_API_BEGIN();\n  DMatrixHandle proxy_dmat = R_ExternalPtrAddr(handle);\n  int res_code;\n  {\n    std::string array_str_indptr = MakeArrayInterfaceFromRVector(VECTOR_ELT(lst, 0));\n    std::string array_str_indices = MakeArrayInterfaceFromRVector(VECTOR_ELT(lst, 1));\n    std::string array_str_data = MakeArrayInterfaceFromRVector(VECTOR_ELT(lst, 2));\n    const int ncol = Rf_asInteger(VECTOR_ELT(lst, 3));\n    res_code = XGProxyDMatrixSetDataCSR(proxy_dmat,\n                                        array_str_indptr.c_str(),\n                                        array_str_indices.c_str(),\n                                        array_str_data.c_str(),\n                                        ncol);\n  }\n  CHECK_CALL(res_code);\n  R_API_END();\n  return R_NilValue;\n}\n\nXGB_DLL SEXP XGProxyDMatrixSetDataColumnar_R(SEXP handle, SEXP lst) {\n  R_API_BEGIN();\n  DMatrixHandle proxy_dmat = R_ExternalPtrAddr(handle);\n  int res_code;\n  {\n    std::string sinterface = MakeArrayInterfaceFromRDataFrame(lst);\n    res_code = XGProxyDMatrixSetDataColumnar(proxy_dmat, sinterface.c_str());\n  }\n  CHECK_CALL(res_code);\n  R_API_END();\n  return R_NilValue;\n}\n\nnamespace {\n\nstruct _RDataIterator {\n  SEXP f_next;\n  SEXP f_reset;\n  SEXP calling_env;\n  SEXP continuation_token;\n\n  _RDataIterator(\n    SEXP f_next, SEXP f_reset, SEXP calling_env, SEXP continuation_token) :\n  f_next(f_next), f_reset(f_reset), calling_env(calling_env),\n  continuation_token(continuation_token) {}\n\n  void reset() {\n    SafeExecFun(this->f_reset, this->calling_env, this->continuation_token);\n  }\n\n  int next() {\n    SEXP R_res = Rf_protect(\n      SafeExecFun(this->f_next, this->calling_env, this->continuation_token));\n    int res = Rf_asInteger(R_res);\n    Rf_unprotect(1);\n    return res;\n  }\n};\n\nvoid _reset_RDataIterator(DataIterHandle iter) {\n  static_cast<_RDataIterator*>(iter)->reset();\n}\n\nint _next_RDataIterator(DataIterHandle iter) {\n  return static_cast<_RDataIterator*>(iter)->next();\n}\n\nSEXP XGDMatrixCreateFromCallbackGeneric_R(\n  SEXP f_next, SEXP f_reset, SEXP calling_env, SEXP proxy_dmat,\n  SEXP n_threads, SEXP missing, SEXP max_bin, SEXP ref_dmat,\n  SEXP cache_prefix, bool as_quantile_dmatrix) {\n  SEXP continuation_token = Rf_protect(R_MakeUnwindCont());\n  SEXP out = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));\n  R_API_BEGIN();\n  DMatrixHandle out_dmat;\n\n  int res_code;\n  try {\n    _RDataIterator data_iterator(f_next, f_reset, calling_env, continuation_token);\n\n    std::string str_cache_prefix;\n    xgboost::Json jconfig{xgboost::Object{}};\n    jconfig[\"missing\"] = Rf_asReal(missing);\n    if (!Rf_isNull(n_threads)) {\n      jconfig[\"nthread\"] = Rf_asInteger(n_threads);\n    }\n    if (as_quantile_dmatrix) {\n      if (!Rf_isNull(max_bin)) {\n        jconfig[\"max_bin\"] = Rf_asInteger(max_bin);\n      }\n    } else {\n      str_cache_prefix = std::string(CHAR(Rf_asChar(cache_prefix)));\n      jconfig[\"cache_prefix\"] = str_cache_prefix;\n    }\n    std::string json_str = xgboost::Json::Dump(jconfig);\n\n    DMatrixHandle ref_dmat_handle = nullptr;\n    if (as_quantile_dmatrix && !Rf_isNull(ref_dmat)) {\n      ref_dmat_handle = R_ExternalPtrAddr(ref_dmat);\n    }\n\n    if (as_quantile_dmatrix) {\n      res_code = XGQuantileDMatrixCreateFromCallback(\n        &data_iterator,\n        R_ExternalPtrAddr(proxy_dmat),\n        ref_dmat_handle,\n        _reset_RDataIterator,\n        _next_RDataIterator,\n        json_str.c_str(),\n        &out_dmat);\n    } else {\n      res_code = XGDMatrixCreateFromCallback(\n        &data_iterator,\n        R_ExternalPtrAddr(proxy_dmat),\n        _reset_RDataIterator,\n        _next_RDataIterator,\n        json_str.c_str(),\n        &out_dmat);\n    }\n  } catch (ErrorWithUnwind &e) {\n    R_ContinueUnwind(continuation_token);\n  }\n  CHECK_CALL(res_code);\n\n  R_SetExternalPtrAddr(out, out_dmat);\n  R_RegisterCFinalizerEx(out, _DMatrixFinalizer, TRUE);\n  R_API_END();\n  Rf_unprotect(2);\n  return out;\n}\n\n} /* namespace */\n\nXGB_DLL SEXP XGQuantileDMatrixCreateFromCallback_R(\n  SEXP f_next, SEXP f_reset, SEXP calling_env, SEXP proxy_dmat,\n  SEXP n_threads, SEXP missing, SEXP max_bin, SEXP ref_dmat) {\n  return XGDMatrixCreateFromCallbackGeneric_R(\n    f_next, f_reset, calling_env, proxy_dmat,\n    n_threads, missing, max_bin, ref_dmat,\n    R_NilValue, true);\n}\n\nXGB_DLL SEXP XGDMatrixCreateFromCallback_R(\n  SEXP f_next, SEXP f_reset, SEXP calling_env, SEXP proxy_dmat,\n  SEXP n_threads, SEXP missing, SEXP cache_prefix) {\n  return XGDMatrixCreateFromCallbackGeneric_R(\n    f_next, f_reset, calling_env, proxy_dmat,\n    n_threads, missing, R_NilValue, R_NilValue,\n    cache_prefix, false);\n}\n\nXGB_DLL SEXP XGDMatrixFree_R(SEXP proxy_dmat) {\n  _DMatrixFinalizer(proxy_dmat);\n  return R_NilValue;\n}\n\nXGB_DLL SEXP XGGetRNAIntAsDouble() {\n  double sentinel_as_double = static_cast<double>(R_NaInt);\n  return Rf_ScalarReal(sentinel_as_double);\n}\n\nXGB_DLL SEXP XGDuplicate_R(SEXP obj) {\n  return Rf_duplicate(obj);\n}\n\nXGB_DLL SEXP XGPointerEqComparison_R(SEXP obj1, SEXP obj2) {\n  return Rf_ScalarLogical(R_ExternalPtrAddr(obj1) == R_ExternalPtrAddr(obj2));\n}\n\nXGB_DLL SEXP XGDMatrixGetQuantileCut_R(SEXP handle) {\n  const char *out_names[] = {\"indptr\", \"data\", \"\"};\n  SEXP continuation_token = Rf_protect(R_MakeUnwindCont());\n  SEXP out = Rf_protect(Rf_mkNamed(VECSXP, out_names));\n  R_API_BEGIN();\n  const char *out_indptr;\n  const char *out_data;\n  CHECK_CALL(XGDMatrixGetQuantileCut(R_ExternalPtrAddr(handle), \"{}\", &out_indptr, &out_data));\n  try {\n    SET_VECTOR_ELT(out, 0, CopyArrayToR(out_indptr, continuation_token));\n    SET_VECTOR_ELT(out, 1, CopyArrayToR(out_data, continuation_token));\n  } catch (ErrorWithUnwind &e) {\n    R_ContinueUnwind(continuation_token);\n  }\n  R_API_END();\n  Rf_unprotect(2);\n  return out;\n}\n\nXGB_DLL SEXP XGDMatrixNumNonMissing_R(SEXP handle) {\n  SEXP out = Rf_protect(Rf_allocVector(REALSXP, 1));\n  R_API_BEGIN();\n  bst_ulong out_;\n  CHECK_CALL(XGDMatrixNumNonMissing(R_ExternalPtrAddr(handle), &out_));\n  REAL(out)[0] = static_cast<double>(out_);\n  R_API_END();\n  Rf_unprotect(1);\n  return out;\n}\n\nXGB_DLL SEXP XGDMatrixGetDataAsCSR_R(SEXP handle) {\n  const char *out_names[] = {\"indptr\", \"indices\", \"data\", \"ncols\", \"\"};\n  SEXP out = Rf_protect(Rf_mkNamed(VECSXP, out_names));\n  R_API_BEGIN();\n\n  bst_ulong nrows, ncols, nnz;\n  CHECK_CALL(XGDMatrixNumRow(R_ExternalPtrAddr(handle), &nrows));\n  CHECK_CALL(XGDMatrixNumCol(R_ExternalPtrAddr(handle), &ncols));\n  CHECK_CALL(XGDMatrixNumNonMissing(R_ExternalPtrAddr(handle), &nnz));\n  if (std::max(nrows, ncols) > std::numeric_limits<int>::max()) {\n    Rf_error(\"%s\", \"Error: resulting DMatrix data does not fit into R 'dgRMatrix'.\");\n  }\n\n  SET_VECTOR_ELT(out, 0, Rf_allocVector(INTSXP, nrows + 1));\n  SET_VECTOR_ELT(out, 1, Rf_allocVector(INTSXP, nnz));\n  SET_VECTOR_ELT(out, 2, Rf_allocVector(REALSXP, nnz));\n  SET_VECTOR_ELT(out, 3, Rf_ScalarInteger(ncols));\n\n  std::unique_ptr<bst_ulong[]> indptr(new bst_ulong[nrows + 1]);\n  std::unique_ptr<unsigned[]> indices(new unsigned[nnz]);\n  std::unique_ptr<float[]> data(new float[nnz]);\n\n  CHECK_CALL(XGDMatrixGetDataAsCSR(R_ExternalPtrAddr(handle),\n                                   \"{}\",\n                                   indptr.get(),\n                                   indices.get(),\n                                   data.get()));\n\n  std::copy(indptr.get(), indptr.get() + nrows + 1, INTEGER(VECTOR_ELT(out, 0)));\n  std::copy(indices.get(), indices.get() + nnz, INTEGER(VECTOR_ELT(out, 1)));\n  std::copy(data.get(), data.get() + nnz, REAL(VECTOR_ELT(out, 2)));\n\n  R_API_END();\n  Rf_unprotect(1);\n  return out;\n}\n\n// functions related to booster\nnamespace {\nvoid _BoosterFinalizer(SEXP R_ptr) {\n  if (R_ExternalPtrAddr(R_ptr) == NULL) return;\n  CHECK_CALL(XGBoosterFree(R_ExternalPtrAddr(R_ptr)));\n  R_ClearExternalPtr(R_ptr);\n}\n\n/* Booster is represented as an altrep list with one element which\ncorresponds to an 'externalptr' holding the C object, forbidding\nmodification by not implementing setters, and adding custom serialization. */\nR_altrep_class_t XGBAltrepPointerClass;\n\nR_xlen_t XGBAltrepPointerLength_R(SEXP R_altrepped_obj) {\n  return 1;\n}\n\nSEXP XGBAltrepPointerGetElt_R(SEXP R_altrepped_obj, R_xlen_t idx) {\n  return R_altrep_data1(R_altrepped_obj);\n}\n\nSEXP XGBMakeEmptyAltrep() {\n  SEXP class_name = Rf_protect(Rf_mkString(\"xgb.Booster\"));\n  SEXP elt_names = Rf_protect(Rf_mkString(\"ptr\"));\n  SEXP R_ptr = Rf_protect(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));\n  SEXP R_altrepped_obj = Rf_protect(R_new_altrep(XGBAltrepPointerClass, R_ptr, R_NilValue));\n  Rf_setAttrib(R_altrepped_obj, R_NamesSymbol, elt_names);\n  Rf_setAttrib(R_altrepped_obj, R_ClassSymbol, class_name);\n  Rf_unprotect(4);\n  return R_altrepped_obj;\n}\n\n/* Note: the idea for separating this function from the one above is to be\nable to trigger all R allocations first before doing non-R allocations. */\nvoid XGBAltrepSetPointer(SEXP R_altrepped_obj, BoosterHandle handle) {\n  SEXP R_ptr = R_altrep_data1(R_altrepped_obj);\n  R_SetExternalPtrAddr(R_ptr, handle);\n  R_RegisterCFinalizerEx(R_ptr, _BoosterFinalizer, TRUE);\n}\n\nSEXP XGBAltrepSerializer_R(SEXP R_altrepped_obj) {\n  R_API_BEGIN();\n  BoosterHandle handle = R_ExternalPtrAddr(R_altrep_data1(R_altrepped_obj));\n  char const *serialized_bytes;\n  bst_ulong serialized_length;\n  CHECK_CALL(XGBoosterSerializeToBuffer(\n    handle, &serialized_length, &serialized_bytes));\n  SEXP R_state = Rf_protect(Rf_allocVector(RAWSXP, serialized_length));\n  if (serialized_length != 0) {\n    std::memcpy(RAW(R_state), serialized_bytes, serialized_length);\n  }\n  Rf_unprotect(1);\n  return R_state;\n  R_API_END();\n  return R_NilValue; /* <- should not be reached */\n}\n\nSEXP XGBAltrepDeserializer_R(SEXP /*unused*/, SEXP R_state) {\n  SEXP R_altrepped_obj = Rf_protect(XGBMakeEmptyAltrep());\n  R_API_BEGIN();\n  BoosterHandle handle = nullptr;\n  CHECK_CALL(XGBoosterCreate(nullptr, 0, &handle));\n  int res_code = XGBoosterUnserializeFromBuffer(handle,\n                                                RAW(R_state),\n                                                Rf_xlength(R_state));\n  if (res_code != 0) {\n    XGBoosterFree(handle);\n  }\n  CHECK_CALL(res_code);\n  XGBAltrepSetPointer(R_altrepped_obj, handle);\n  R_API_END();\n  Rf_unprotect(1);\n  return R_altrepped_obj;\n}\n\n// https://purrple.cat/blog/2018/10/14/altrep-and-cpp/\nRboolean XGBAltrepInspector_R(\n  SEXP x, int pre, int deep, int pvec,\n  void (*inspect_subtree)(SEXP, int, int, int)) {\n  Rprintf(\"Altrepped external pointer [address:%p]\\n\",\n          R_ExternalPtrAddr(R_altrep_data1(x)));\n  return TRUE;\n}\n\nSEXP XGBAltrepDuplicate_R(SEXP R_altrepped_obj, Rboolean deep) {\n  R_API_BEGIN();\n  if (!deep) {\n    SEXP out = Rf_protect(XGBMakeEmptyAltrep());\n    R_set_altrep_data1(out, R_altrep_data1(R_altrepped_obj));\n    Rf_unprotect(1);\n    return out;\n  } else {\n    SEXP out = Rf_protect(XGBMakeEmptyAltrep());\n    char const *serialized_bytes;\n    bst_ulong serialized_length;\n    CHECK_CALL(XGBoosterSerializeToBuffer(\n      R_ExternalPtrAddr(R_altrep_data1(R_altrepped_obj)),\n      &serialized_length, &serialized_bytes));\n    BoosterHandle new_handle = nullptr;\n    CHECK_CALL(XGBoosterCreate(nullptr, 0, &new_handle));\n    int res_code = XGBoosterUnserializeFromBuffer(new_handle,\n                                                  serialized_bytes,\n                                                  serialized_length);\n    if (res_code != 0) {\n      XGBoosterFree(new_handle);\n    }\n    CHECK_CALL(res_code);\n    XGBAltrepSetPointer(out, new_handle);\n    Rf_unprotect(1);\n    return out;\n  }\n  R_API_END();\n  return R_NilValue; /* <- should not be reached */\n}\n\n} /* namespace */\n\nXGB_DLL void XGBInitializeAltrepClass_R(DllInfo *dll) {\n  XGBAltrepPointerClass = R_make_altlist_class(\"XGBAltrepPointerClass\", \"xgboost\", dll);\n  R_set_altrep_Length_method(XGBAltrepPointerClass, XGBAltrepPointerLength_R);\n  R_set_altlist_Elt_method(XGBAltrepPointerClass, XGBAltrepPointerGetElt_R);\n  R_set_altrep_Inspect_method(XGBAltrepPointerClass, XGBAltrepInspector_R);\n  R_set_altrep_Serialized_state_method(XGBAltrepPointerClass, XGBAltrepSerializer_R);\n  R_set_altrep_Unserialize_method(XGBAltrepPointerClass, XGBAltrepDeserializer_R);\n  R_set_altrep_Duplicate_method(XGBAltrepPointerClass, XGBAltrepDuplicate_R);\n}\n\nXGB_DLL SEXP XGBoosterCreate_R(SEXP dmats) {\n  SEXP out = Rf_protect(XGBMakeEmptyAltrep());\n  R_API_BEGIN();\n  R_xlen_t len = Rf_xlength(dmats);\n  BoosterHandle handle;\n\n  int res_code;\n  {\n    std::vector<void*> dvec(len);\n    for (R_xlen_t i = 0; i < len; ++i) {\n      dvec[i] = R_ExternalPtrAddr(VECTOR_ELT(dmats, i));\n    }\n    res_code = XGBoosterCreate(BeginPtr(dvec), dvec.size(), &handle);\n  }\n  CHECK_CALL(res_code);\n  XGBAltrepSetPointer(out, handle);\n  R_API_END();\n  Rf_unprotect(1);\n  return out;\n}\n\nXGB_DLL SEXP XGBoosterReset_R(SEXP handle) {\n  R_API_BEGIN();\n  CHECK_CALL(XGBoosterReset(R_ExternalPtrAddr(handle)));\n  R_API_END();\n  return R_NilValue;\n}\n\nXGB_DLL SEXP XGBoosterCopyInfoFromDMatrix_R(SEXP booster, SEXP dmat) {\n  R_API_BEGIN();\n  char const **feature_names;\n  bst_ulong len_feature_names = 0;\n  CHECK_CALL(XGDMatrixGetStrFeatureInfo(R_ExternalPtrAddr(dmat),\n                                        \"feature_name\",\n                                        &len_feature_names,\n                                        &feature_names));\n  if (len_feature_names) {\n    CHECK_CALL(XGBoosterSetStrFeatureInfo(R_ExternalPtrAddr(booster),\n                                          \"feature_name\",\n                                          feature_names,\n                                          len_feature_names));\n  }\n\n  char const **feature_types;\n  bst_ulong len_feature_types = 0;\n  CHECK_CALL(XGDMatrixGetStrFeatureInfo(R_ExternalPtrAddr(dmat),\n                                        \"feature_type\",\n                                        &len_feature_types,\n                                        &feature_types));\n  if (len_feature_types) {\n    CHECK_CALL(XGBoosterSetStrFeatureInfo(R_ExternalPtrAddr(booster),\n                                          \"feature_type\",\n                                          feature_types,\n                                          len_feature_types));\n  }\n  R_API_END();\n  return R_NilValue;\n}\n\nXGB_DLL SEXP XGBoosterSetStrFeatureInfo_R(SEXP handle, SEXP field, SEXP features) {\n  R_API_BEGIN();\n  SEXP field_char = Rf_protect(Rf_asChar(field));\n  bst_ulong len_features = Rf_xlength(features);\n\n  int res_code;\n  {\n    std::vector<const char*> str_arr(len_features);\n    for (bst_ulong idx = 0; idx < len_features; idx++) {\n      str_arr[idx] = CHAR(STRING_ELT(features, idx));\n    }\n    res_code = XGBoosterSetStrFeatureInfo(R_ExternalPtrAddr(handle),\n                                          CHAR(field_char),\n                                          str_arr.data(),\n                                          len_features);\n  }\n  CHECK_CALL(res_code);\n  Rf_unprotect(1);\n  R_API_END();\n  return R_NilValue;\n}\n\nXGB_DLL SEXP XGBoosterGetStrFeatureInfo_R(SEXP handle, SEXP field) {\n  R_API_BEGIN();\n  bst_ulong len;\n  const char **out_features;\n  SEXP field_char = Rf_protect(Rf_asChar(field));\n  CHECK_CALL(XGBoosterGetStrFeatureInfo(R_ExternalPtrAddr(handle),\n                                        CHAR(field_char), &len, &out_features));\n  SEXP out = Rf_protect(Rf_allocVector(STRSXP, len));\n  for (bst_ulong idx = 0; idx < len; idx++) {\n    SET_STRING_ELT(out, idx, Rf_mkChar(out_features[idx]));\n  }\n  Rf_unprotect(2);\n  return out;\n  R_API_END();\n  return R_NilValue; /* <- should not be reached */\n}\n\nXGB_DLL SEXP XGBoosterBoostedRounds_R(SEXP handle) {\n  SEXP out = Rf_protect(Rf_allocVector(INTSXP, 1));\n  R_API_BEGIN();\n  CHECK_CALL(XGBoosterBoostedRounds(R_ExternalPtrAddr(handle), INTEGER(out)));\n  R_API_END();\n  Rf_unprotect(1);\n  return out;\n}\n\n/* Note: R's integer class is 32-bit-and-signed only, while xgboost\nsupports more, so it returns it as a floating point instead */\nXGB_DLL SEXP XGBoosterGetNumFeature_R(SEXP handle) {\n  SEXP out = Rf_protect(Rf_allocVector(REALSXP, 1));\n  R_API_BEGIN();\n  bst_ulong res;\n  CHECK_CALL(XGBoosterGetNumFeature(R_ExternalPtrAddr(handle), &res));\n  REAL(out)[0] = static_cast<double>(res);\n  R_API_END();\n  Rf_unprotect(1);\n  return out;\n}\n\nXGB_DLL SEXP XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) {\n  R_API_BEGIN();\n  SEXP name_ = Rf_protect(Rf_asChar(name));\n  SEXP val_ = Rf_protect(Rf_asChar(val));\n  CHECK_CALL(XGBoosterSetParam(R_ExternalPtrAddr(handle),\n                               CHAR(name_),\n                               CHAR(val_)));\n  Rf_unprotect(2);\n  R_API_END();\n  return R_NilValue;\n}\n\nXGB_DLL SEXP XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) {\n  R_API_BEGIN();\n  CHECK_CALL(XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle),\n                                    Rf_asInteger(iter),\n                                    R_ExternalPtrAddr(dtrain)));\n  R_API_END();\n  return R_NilValue;\n}\n\nXGB_DLL SEXP XGBoosterTrainOneIter_R(SEXP handle, SEXP dtrain, SEXP iter, SEXP grad, SEXP hess) {\n  R_API_BEGIN();\n  CHECK_EQ(Rf_xlength(grad), Rf_xlength(hess)) << \"gradient and hess must have same length.\";\n  SEXP gdim = Rf_protect(Rf_getAttrib(grad, R_DimSymbol));\n  SEXP hdim = Rf_protect(Rf_getAttrib(hess, R_DimSymbol));\n\n  int res_code;\n  {\n    const std::string s_grad = Rf_isNull(gdim)?\n      MakeArrayInterfaceFromRVector(grad) : MakeArrayInterfaceFromRMat(grad);\n    const std::string s_hess = Rf_isNull(hdim)?\n      MakeArrayInterfaceFromRVector(hess) : MakeArrayInterfaceFromRMat(hess);\n    res_code = XGBoosterTrainOneIter(R_ExternalPtrAddr(handle), R_ExternalPtrAddr(dtrain),\n                                     Rf_asInteger(iter), s_grad.c_str(), s_hess.c_str());\n  }\n  CHECK_CALL(res_code);\n\n  Rf_unprotect(2);\n  R_API_END();\n  return R_NilValue;\n}\n\nXGB_DLL SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) {\n  const char *ret;\n  R_API_BEGIN();\n  CHECK_EQ(Rf_xlength(dmats), Rf_xlength(evnames))\n      << \"dmats and evnams must have same length\";\n  R_xlen_t len = Rf_xlength(dmats);\n  SEXP evnames_lst = Rf_protect(Rf_allocVector(VECSXP, len));\n  for (R_xlen_t i = 0; i < len; i++) {\n    SET_VECTOR_ELT(evnames_lst, i, Rf_asChar(VECTOR_ELT(evnames, i)));\n  }\n\n  int res_code;\n  {\n    std::vector<void*> vec_dmats(len);\n    std::vector<std::string> vec_names;\n    vec_names.reserve(len);\n    std::vector<const char*> vec_sptr(len);\n    for (R_xlen_t i = 0; i < len; ++i) {\n      vec_dmats[i] = R_ExternalPtrAddr(VECTOR_ELT(dmats, i));\n      vec_names.emplace_back(CHAR(VECTOR_ELT(evnames_lst, i)));\n    }\n    for (R_xlen_t i = 0; i < len; ++i) {\n      vec_sptr[i] = vec_names[i].c_str();\n    }\n    res_code = XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),\n                                    Rf_asInteger(iter),\n                                    BeginPtr(vec_dmats),\n                                    BeginPtr(vec_sptr),\n                                    len, &ret);\n  }\n  CHECK_CALL(res_code);\n  Rf_unprotect(1);\n  R_API_END();\n  return Rf_mkString(ret);\n}\n\nnamespace {\n\nstruct ProxyDmatrixError : public std::exception {};\n\nstruct ProxyDmatrixWrapper {\n  DMatrixHandle proxy_dmat_handle;\n\n  ProxyDmatrixWrapper() {\n    int res_code = XGProxyDMatrixCreate(&this->proxy_dmat_handle);\n    if (res_code != 0) {\n      throw ProxyDmatrixError();\n    }\n  }\n\n  ~ProxyDmatrixWrapper() {\n    if (this->proxy_dmat_handle) {\n      XGDMatrixFree(this->proxy_dmat_handle);\n      this->proxy_dmat_handle = nullptr;\n    }\n  }\n\n  DMatrixHandle get_handle() {\n    return this->proxy_dmat_handle;\n  }\n};\n\nstd::unique_ptr<ProxyDmatrixWrapper> GetProxyDMatrixWithBaseMargin(SEXP base_margin) {\n  if (Rf_isNull(base_margin)) {\n    return std::unique_ptr<ProxyDmatrixWrapper>(nullptr);\n  }\n\n  SEXP base_margin_dim = Rf_getAttrib(base_margin, R_DimSymbol);\n  int res_code;\n  try {\n    const std::string array_str = Rf_isNull(base_margin_dim)?\n      MakeArrayInterfaceFromRVector(base_margin) : MakeArrayInterfaceFromRMat(base_margin);\n    std::unique_ptr<ProxyDmatrixWrapper> proxy_dmat(new ProxyDmatrixWrapper());\n    res_code = XGDMatrixSetInfoFromInterface(proxy_dmat->get_handle(),\n                                             \"base_margin\",\n                                             array_str.c_str());\n    if (res_code != 0) {\n      throw ProxyDmatrixError();\n    }\n    return proxy_dmat;\n  } catch(ProxyDmatrixError &err) {\n    Rf_error(\"%s\", XGBGetLastError());\n  }\n}\n\nenum class PredictionInputType {DMatrix, DenseMatrix, CSRMatrix, DataFrame};\n\nSEXP XGBoosterPredictGeneric(SEXP handle, SEXP input_data, SEXP json_config,\n                                    PredictionInputType input_type, SEXP missing,\n                                    SEXP base_margin) {\n  SEXP r_out_result = R_NilValue;\n  R_API_BEGIN();\n  SEXP json_config_ = Rf_protect(Rf_asChar(json_config));\n  char const *c_json_config = CHAR(json_config_);\n\n  bst_ulong out_dim;\n  bst_ulong const *out_shape;\n  float const *out_result;\n\n  int res_code;\n  {\n    switch (input_type) {\n      case PredictionInputType::DMatrix: {\n        res_code = XGBoosterPredictFromDMatrix(R_ExternalPtrAddr(handle),\n                                               R_ExternalPtrAddr(input_data), c_json_config,\n                                               &out_shape, &out_dim, &out_result);\n        break;\n      }\n\n      case PredictionInputType::CSRMatrix: {\n        std::unique_ptr<ProxyDmatrixWrapper> proxy_dmat = GetProxyDMatrixWithBaseMargin(\n          base_margin);\n        DMatrixHandle proxy_dmat_handle = proxy_dmat.get()? proxy_dmat->get_handle() : nullptr;\n\n        SEXP indptr = VECTOR_ELT(input_data, 0);\n        SEXP indices = VECTOR_ELT(input_data, 1);\n        SEXP data = VECTOR_ELT(input_data, 2);\n        const int ncol_csr = Rf_asInteger(VECTOR_ELT(input_data, 3));\n        const SEXPTYPE type_data = TYPEOF(data);\n        CHECK_EQ(type_data, REALSXP);\n        std::string sindptr, sindices, sdata;\n        CreateFromSparse(indptr, indices, data, &sindptr, &sindices, &sdata);\n\n        xgboost::StringView json_str(c_json_config);\n        xgboost::Json new_json = xgboost::Json::Load(json_str);\n        AddMissingToJson(&new_json, missing, type_data);\n        const std::string new_c_json = xgboost::Json::Dump(new_json);\n\n        res_code = XGBoosterPredictFromCSR(\n          R_ExternalPtrAddr(handle), sindptr.c_str(), sindices.c_str(), sdata.c_str(),\n          ncol_csr, new_c_json.c_str(), proxy_dmat_handle, &out_shape, &out_dim, &out_result);\n        break;\n      }\n\n      case PredictionInputType::DenseMatrix: {\n        std::unique_ptr<ProxyDmatrixWrapper> proxy_dmat = GetProxyDMatrixWithBaseMargin(\n          base_margin);\n        DMatrixHandle proxy_dmat_handle = proxy_dmat.get()? proxy_dmat->get_handle() : nullptr;\n        const std::string array_str = MakeArrayInterfaceFromRMat(input_data);\n\n        xgboost::StringView json_str(c_json_config);\n        xgboost::Json new_json = xgboost::Json::Load(json_str);\n        AddMissingToJson(&new_json, missing, TYPEOF(input_data));\n        const std::string new_c_json = xgboost::Json::Dump(new_json);\n\n        res_code = XGBoosterPredictFromDense(\n          R_ExternalPtrAddr(handle), array_str.c_str(), new_c_json.c_str(),\n          proxy_dmat_handle, &out_shape, &out_dim, &out_result);\n        break;\n      }\n\n      case PredictionInputType::DataFrame: {\n        std::unique_ptr<ProxyDmatrixWrapper> proxy_dmat = GetProxyDMatrixWithBaseMargin(\n          base_margin);\n        DMatrixHandle proxy_dmat_handle = proxy_dmat.get()? proxy_dmat->get_handle() : nullptr;\n\n        const std::string df_str = MakeArrayInterfaceFromRDataFrame(input_data);\n\n        xgboost::StringView json_str(c_json_config);\n        xgboost::Json new_json = xgboost::Json::Load(json_str);\n        AddMissingToJson(&new_json, missing, REALSXP);\n        const std::string new_c_json = xgboost::Json::Dump(new_json);\n\n        res_code = XGBoosterPredictFromColumnar(\n          R_ExternalPtrAddr(handle), df_str.c_str(), new_c_json.c_str(),\n          proxy_dmat_handle, &out_shape, &out_dim, &out_result);\n        break;\n      }\n    }\n  }\n  CHECK_CALL(res_code);\n\n  SEXP r_out_shape = Rf_protect(Rf_allocVector(INTSXP, out_dim));\n  size_t len = 1;\n  int *r_out_shape_ = INTEGER(r_out_shape);\n  for (size_t i = 0; i < out_dim; ++i) {\n    r_out_shape_[out_dim - i - 1] = out_shape[i];\n    len *= out_shape[i];\n  }\n  r_out_result = Rf_protect(Rf_allocVector(REALSXP, len));\n  std::copy(out_result, out_result + len, REAL(r_out_result));\n\n  if (out_dim > 1) {\n    Rf_setAttrib(r_out_result, R_DimSymbol, r_out_shape);\n  }\n\n  R_API_END();\n  Rf_unprotect(3);\n\n  return r_out_result;\n}\n\n}  // namespace\n\nXGB_DLL SEXP XGBoosterPredictFromDMatrix_R(SEXP handle, SEXP dmat, SEXP json_config)  {\n  return XGBoosterPredictGeneric(handle, dmat, json_config,\n                                 PredictionInputType::DMatrix, R_NilValue, R_NilValue);\n}\n\nXGB_DLL SEXP XGBoosterPredictFromDense_R(SEXP handle, SEXP R_mat, SEXP missing,\n                                         SEXP json_config, SEXP base_margin) {\n  return XGBoosterPredictGeneric(handle, R_mat, json_config,\n                                 PredictionInputType::DenseMatrix, missing, base_margin);\n}\n\nXGB_DLL SEXP XGBoosterPredictFromCSR_R(SEXP handle, SEXP lst, SEXP missing,\n                                       SEXP json_config, SEXP base_margin) {\n  return XGBoosterPredictGeneric(handle, lst, json_config,\n                                 PredictionInputType::CSRMatrix, missing, base_margin);\n}\n\nXGB_DLL SEXP XGBoosterPredictFromColumnar_R(SEXP handle, SEXP R_df, SEXP missing,\n                                            SEXP json_config, SEXP base_margin) {\n  return XGBoosterPredictGeneric(handle, R_df, json_config,\n                                 PredictionInputType::DataFrame, missing, base_margin);\n}\n\nXGB_DLL SEXP XGBoosterLoadModel_R(SEXP handle, SEXP fname) {\n  R_API_BEGIN();\n  CHECK_CALL(XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(Rf_asChar(fname))));\n  R_API_END();\n  return R_NilValue;\n}\n\nXGB_DLL SEXP XGBoosterSaveModel_R(SEXP handle, SEXP fname) {\n  R_API_BEGIN();\n  CHECK_CALL(XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(Rf_asChar(fname))));\n  R_API_END();\n  return R_NilValue;\n}\n\nXGB_DLL SEXP XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw) {\n  R_API_BEGIN();\n  CHECK_CALL(XGBoosterLoadModelFromBuffer(R_ExternalPtrAddr(handle),\n                                          RAW(raw),\n                                          Rf_xlength(raw)));\n  R_API_END();\n  return R_NilValue;\n}\n\nXGB_DLL SEXP XGBoosterSaveModelToRaw_R(SEXP handle, SEXP json_config) {\n  SEXP ret;\n  R_API_BEGIN();\n  bst_ulong olen;\n  char const *c_json_config = CHAR(Rf_asChar(json_config));\n  char const *raw;\n  CHECK_CALL(XGBoosterSaveModelToBuffer(R_ExternalPtrAddr(handle), c_json_config, &olen, &raw))\n  ret = Rf_protect(Rf_allocVector(RAWSXP, olen));\n  if (olen != 0) {\n    std::memcpy(RAW(ret), raw, olen);\n  }\n  R_API_END();\n  Rf_unprotect(1);\n  return ret;\n}\n\nXGB_DLL SEXP XGBoosterSaveJsonConfig_R(SEXP handle) {\n  const char* ret;\n  R_API_BEGIN();\n  bst_ulong len {0};\n  CHECK_CALL(XGBoosterSaveJsonConfig(R_ExternalPtrAddr(handle),\n                                     &len,\n                                     &ret));\n  R_API_END();\n  return Rf_mkString(ret);\n}\n\nXGB_DLL SEXP XGBoosterLoadJsonConfig_R(SEXP handle, SEXP value) {\n  R_API_BEGIN();\n  CHECK_CALL(XGBoosterLoadJsonConfig(R_ExternalPtrAddr(handle), CHAR(Rf_asChar(value))));\n  R_API_END();\n  return R_NilValue;\n}\n\nXGB_DLL SEXP XGBoosterSerializeToBuffer_R(SEXP handle) {\n  SEXP ret;\n  R_API_BEGIN();\n  bst_ulong out_len;\n  const char *raw;\n  CHECK_CALL(XGBoosterSerializeToBuffer(R_ExternalPtrAddr(handle), &out_len, &raw));\n  ret = Rf_protect(Rf_allocVector(RAWSXP, out_len));\n  if (out_len != 0) {\n    memcpy(RAW(ret), raw, out_len);\n  }\n  R_API_END();\n  Rf_unprotect(1);\n  return ret;\n}\n\nXGB_DLL SEXP XGBoosterUnserializeFromBuffer_R(SEXP handle, SEXP raw) {\n  R_API_BEGIN();\n  CHECK_CALL(XGBoosterUnserializeFromBuffer(R_ExternalPtrAddr(handle),\n                                 RAW(raw),\n                                 Rf_xlength(raw)));\n  R_API_END();\n  return R_NilValue;\n}\n\nXGB_DLL SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats, SEXP dump_format) {\n  SEXP out;\n  SEXP continuation_token = Rf_protect(R_MakeUnwindCont());\n  SEXP dump_format_ = Rf_protect(Rf_asChar(dump_format));\n  SEXP fmap_ = Rf_protect(Rf_asChar(fmap));\n  R_API_BEGIN();\n  bst_ulong olen;\n  const char **res;\n  const char *fmt = CHAR(dump_format_);\n  CHECK_CALL(XGBoosterDumpModelEx(R_ExternalPtrAddr(handle),\n                                CHAR(fmap_),\n                                Rf_asInteger(with_stats),\n                                fmt,\n                                &olen, &res));\n  out = Rf_protect(Rf_allocVector(STRSXP, olen));\n  try {\n    if (!strcmp(\"json\", fmt)) {\n      std::stringstream stream;\n      stream <<  \"[\\n\";\n      for (size_t i = 0; i < olen; ++i) {\n        stream << res[i];\n        if (i < olen - 1) {\n          stream << \",\\n\";\n        } else {\n          stream << \"\\n\";\n        }\n      }\n      stream <<  \"]\";\n      const std::string temp_str = stream.str();\n      SET_STRING_ELT(out, 0, SafeMkChar(temp_str.c_str(), continuation_token));\n    } else {\n      for (size_t i = 0; i < olen; ++i) {\n        std::stringstream stream;\n        stream <<  \"booster[\" << i <<\"]\\n\" << res[i];\n        const std::string temp_str = stream.str();\n        SET_STRING_ELT(out, i, SafeMkChar(temp_str.c_str(), continuation_token));\n      }\n    }\n  } catch (ErrorWithUnwind &e) {\n    R_ContinueUnwind(continuation_token);\n  }\n  R_API_END();\n  Rf_unprotect(4);\n  return out;\n}\n\nXGB_DLL SEXP XGBoosterGetAttr_R(SEXP handle, SEXP name) {\n  SEXP out;\n  R_API_BEGIN();\n  int success;\n  const char *val;\n  CHECK_CALL(XGBoosterGetAttr(R_ExternalPtrAddr(handle),\n                              CHAR(Rf_asChar(name)),\n                              &val,\n                              &success));\n  if (success) {\n    out = Rf_protect(Rf_allocVector(STRSXP, 1));\n    SET_STRING_ELT(out, 0, Rf_mkChar(val));\n  } else {\n    out = Rf_protect(R_NilValue);\n  }\n  R_API_END();\n  Rf_unprotect(1);\n  return out;\n}\n\nXGB_DLL SEXP XGBoosterSetAttr_R(SEXP handle, SEXP name, SEXP val) {\n  R_API_BEGIN();\n  const char *v = nullptr;\n  SEXP name_ = Rf_protect(Rf_asChar(name));\n  SEXP val_;\n  int n_protected = 1;\n  if (!Rf_isNull(val)) {\n    val_ = Rf_protect(Rf_asChar(val));\n    n_protected++;\n    v = CHAR(val_);\n  }\n\n  CHECK_CALL(XGBoosterSetAttr(R_ExternalPtrAddr(handle),\n                              CHAR(name_), v));\n  Rf_unprotect(n_protected);\n  R_API_END();\n  return R_NilValue;\n}\n\nXGB_DLL SEXP XGBoosterGetAttrNames_R(SEXP handle) {\n  SEXP out;\n  R_API_BEGIN();\n  bst_ulong len;\n  const char **res;\n  CHECK_CALL(XGBoosterGetAttrNames(R_ExternalPtrAddr(handle),\n                                   &len, &res));\n  if (len > 0) {\n    out = Rf_protect(Rf_allocVector(STRSXP, len));\n    for (size_t i = 0; i < len; ++i) {\n      SET_STRING_ELT(out, i, Rf_mkChar(res[i]));\n    }\n  } else {\n    out = Rf_protect(R_NilValue);\n  }\n  R_API_END();\n  Rf_unprotect(1);\n  return out;\n}\n\nXGB_DLL SEXP XGBoosterFeatureScore_R(SEXP handle, SEXP json_config) {\n  SEXP out_features_sexp;\n  SEXP out_scores_sexp;\n  SEXP out_shape_sexp;\n  SEXP r_out = Rf_protect(Rf_allocVector(VECSXP, 3));\n\n  R_API_BEGIN();\n  char const *c_json_config = CHAR(Rf_asChar(json_config));\n  bst_ulong out_n_features;\n  char const **out_features;\n\n  bst_ulong out_dim;\n  bst_ulong const *out_shape;\n  float const *out_scores;\n\n  CHECK_CALL(XGBoosterFeatureScore(R_ExternalPtrAddr(handle), c_json_config,\n                                   &out_n_features, &out_features,\n                                   &out_dim, &out_shape, &out_scores));\n  out_shape_sexp = Rf_protect(Rf_allocVector(INTSXP, out_dim));\n  size_t len = 1;\n  int *out_shape_sexp_ = INTEGER(out_shape_sexp);\n  for (size_t i = 0; i < out_dim; ++i) {\n    out_shape_sexp_[i] = out_shape[i];\n    len *= out_shape[i];\n  }\n\n  out_features_sexp = Rf_protect(Rf_allocVector(STRSXP, out_n_features));\n  for (size_t i = 0; i < out_n_features; ++i) {\n    SET_STRING_ELT(out_features_sexp, i, Rf_mkChar(out_features[i]));\n  }\n\n  out_scores_sexp = Rf_protect(Rf_allocVector(REALSXP, len));\n  std::copy(out_scores, out_scores + len, REAL(out_scores_sexp));\n\n  SET_VECTOR_ELT(r_out, 0, out_features_sexp);\n  SET_VECTOR_ELT(r_out, 1, out_shape_sexp);\n  SET_VECTOR_ELT(r_out, 2, out_scores_sexp);\n\n  R_API_END();\n  Rf_unprotect(4);\n\n  return r_out;\n}\n\nXGB_DLL SEXP XGBoosterSlice_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEXP step) {\n  SEXP out = Rf_protect(XGBMakeEmptyAltrep());\n  R_API_BEGIN();\n  BoosterHandle handle_out = nullptr;\n  CHECK_CALL(XGBoosterSlice(R_ExternalPtrAddr(handle),\n                            Rf_asInteger(begin_layer),\n                            Rf_asInteger(end_layer),\n                            Rf_asInteger(step),\n                            &handle_out));\n  XGBAltrepSetPointer(out, handle_out);\n  R_API_END();\n  Rf_unprotect(1);\n  return out;\n}\n\nXGB_DLL SEXP XGBoosterSliceAndReplace_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEXP step) {\n  R_API_BEGIN();\n  BoosterHandle old_handle = R_ExternalPtrAddr(handle);\n  BoosterHandle new_handle = nullptr;\n  CHECK_CALL(XGBoosterSlice(old_handle,\n                            Rf_asInteger(begin_layer),\n                            Rf_asInteger(end_layer),\n                            Rf_asInteger(step),\n                            &new_handle));\n  R_SetExternalPtrAddr(handle, new_handle);\n  CHECK_CALL(XGBoosterFree(old_handle));\n  R_API_END();\n  return R_NilValue;\n}\n"
  },
  {
    "path": "R-package/src/xgboost_R.h",
    "content": "/*!\n * Copyright 2014-2022 by XGBoost Contributors\n * \\file xgboost_R.h\n * \\author Tianqi Chen\n * \\brief R wrapper of xgboost\n */\n#ifndef XGBOOST_R_H_ // NOLINT(*)\n#define XGBOOST_R_H_ // NOLINT(*)\n\n\n#ifndef R_NO_REMAP\n#  define R_NO_REMAP\n#endif\n#include <R.h>\n#include <Rinternals.h>\n#include <R_ext/Altrep.h>\n#include <Rmath.h>\n\n#include <xgboost/c_api.h>\n\n/*!\n * \\brief check whether a handle is NULL\n * \\param handle\n * \\return whether it is null ptr\n */\nXGB_DLL SEXP XGCheckNullPtr_R(SEXP handle);\n\n/*!\n * \\brief set the names of the dimensions of an array in-place\n * \\param arr\n * \\param dim_names names for the dimensions to set\n * \\return NULL value\n */\nXGB_DLL SEXP XGSetArrayDimNamesInplace_R(SEXP arr, SEXP dim_names);\n\n/*!\n * \\brief set the names of a vector in-place\n * \\param arr\n * \\param names names for the dimensions to set\n * \\return NULL value\n */\nXGB_DLL SEXP XGSetVectorNamesInplace_R(SEXP arr, SEXP names);\n\n/*!\n * \\brief Set global configuration\n * \\param json_str a JSON string representing the list of key-value pairs\n * \\return R_NilValue\n */\nXGB_DLL SEXP XGBSetGlobalConfig_R(SEXP json_str);\n\n/*!\n * \\brief Get global configuration\n * \\return JSON string\n */\nXGB_DLL SEXP XGBGetGlobalConfig_R();\n\n/*!\n * \\brief load a data matrix from URI\n * \\param uri URI to the source file to read data from\n * \\param silent whether print messages\n * \\param Data split mode (0=rows, 1=columns)\n * \\return a loaded data matrix\n */\nXGB_DLL SEXP XGDMatrixCreateFromURI_R(SEXP uri, SEXP silent, SEXP data_split_mode);\n\n/*!\n * \\brief create matrix content from dense matrix\n * This assumes the matrix is stored in column major format\n * \\param data R Matrix object\n * \\param missing which value to represent missing value\n * \\param n_threads Number of threads used to construct DMatrix from dense matrix.\n * \\return created dmatrix\n */\nXGB_DLL SEXP XGDMatrixCreateFromMat_R(SEXP mat,\n                                      SEXP missing,\n                                      SEXP n_threads);\n\n/**\n * @brief Create matrix content from a data frame.\n * @param data R data.frame object\n * @param missing which value to represent missing value\n * @param n_threads Number of threads used to construct DMatrix from dense matrix.\n * @return created dmatrix\n */\nXGB_DLL SEXP XGDMatrixCreateFromDF_R(SEXP df, SEXP missing, SEXP n_threads);\n\n/*!\n * \\brief create a matrix content from CSC format\n * \\param indptr pointer to column headers\n * \\param indices row indices\n * \\param data content of the data\n * \\param num_row numer of rows (when it's set to 0, then guess from data)\n * \\param missing which value to represent missing value\n * \\param n_threads Number of threads used to construct DMatrix from csc matrix.\n * \\return created dmatrix\n */\nXGB_DLL SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, SEXP indices, SEXP data, SEXP num_row,\n                                      SEXP missing, SEXP n_threads);\n\n/*!\n * \\brief create a matrix content from CSR format\n * \\param indptr pointer to row headers\n * \\param indices column indices\n * \\param data content of the data\n * \\param num_col numer of columns (when it's set to 0, then guess from data)\n * \\param missing which value to represent missing value\n * \\param n_threads Number of threads used to construct DMatrix from csr matrix.\n * \\return created dmatrix\n */\nXGB_DLL SEXP XGDMatrixCreateFromCSR_R(SEXP indptr, SEXP indices, SEXP data, SEXP num_col,\n                                      SEXP missing, SEXP n_threads);\n\n/*!\n * \\brief create a new dmatrix from sliced content of existing matrix\n * \\param handle instance of data matrix to be sliced\n * \\param idxset index set\n * \\param allow_groups Whether to allow slicing the DMatrix if it has a 'group' field\n * \\return a sliced new matrix\n */\nXGB_DLL SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset, SEXP allow_groups);\n\n/*!\n * \\brief load a data matrix into binary file\n * \\param handle a instance of data matrix\n * \\param fname file name\n * \\param silent print statistics when saving\n * \\return R_NilValue\n */\nXGB_DLL SEXP XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent);\n\n/*!\n * \\brief set information to dmatrix\n * \\param handle a instance of data matrix\n * \\param field field name, can be label, weight\n * \\param array pointer to float vector\n * \\return R_NilValue\n */\nXGB_DLL SEXP XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array);\n\n/*!\n * \\brief get info vector (float type) from matrix\n * \\param handle a instance of data matrix\n * \\param field field name\n * \\return info vector\n */\nXGB_DLL SEXP XGDMatrixGetFloatInfo_R(SEXP handle, SEXP field);\n\n/*!\n * \\brief get info vector (uint type) from matrix\n * \\param handle a instance of data matrix\n * \\param field field name\n * \\return info vector\n */\nXGB_DLL SEXP XGDMatrixGetUIntInfo_R(SEXP handle, SEXP field);\n\n/*!\n * \\brief return number of rows\n * \\param handle an instance of data matrix\n */\nXGB_DLL SEXP XGDMatrixNumRow_R(SEXP handle);\n\n/*!\n * \\brief return number of columns\n * \\param handle an instance of data matrix\n */\nXGB_DLL SEXP XGDMatrixNumCol_R(SEXP handle);\n\n/*!\n<<<<<<< HEAD\n * \\brief create a ProxyDMatrix and get an R externalptr object for it\n */\nXGB_DLL SEXP XGProxyDMatrixCreate_R();\n\n/*!\n * \\brief Set dense matrix data on a proxy dmatrix\n * \\param handle R externalptr pointing to a ProxyDMatrix\n * \\param R_mat R matrix to set in the proxy dmatrix\n */\nXGB_DLL SEXP XGProxyDMatrixSetDataDense_R(SEXP handle, SEXP R_mat);\n\n/*!\n * \\brief Set dense matrix data on a proxy dmatrix\n * \\param handle R externalptr pointing to a ProxyDMatrix\n * \\param lst R list containing, in this order:\n * 1. 'p' or 'indptr' vector of the CSR matrix.\n * 2. 'j' or 'indices' vector of the CSR matrix.\n * 3. 'x' or 'data' vector of the CSR matrix.\n * 4. Number of columns in the CSR matrix.\n */\nXGB_DLL SEXP XGProxyDMatrixSetDataCSR_R(SEXP handle, SEXP lst);\n\n/*!\n * \\brief Set dense matrix data on a proxy dmatrix\n * \\param handle R externalptr pointing to a ProxyDMatrix\n * \\param lst R list or data.frame object containing its columns as numeric vectors\n */\nXGB_DLL SEXP XGProxyDMatrixSetDataColumnar_R(SEXP handle, SEXP lst);\n\n/*!\n * \\brief Create a DMatrix from a DataIter with callbacks\n * \\param expr_f_next expression for function(env, proxy_dmat) that sets the data on the proxy\n * dmatrix and returns either zero (end of batch) or one (batch continues).\n * \\param expr_f_reset expression for function(env) that resets the data iterator to\n * the beginning (first batch).\n * \\param calling_env R environment where to evaluate the expressions above\n * \\param proxy_dmat R externalptr holding a ProxyDMatrix.\n * \\param n_threads number of parallel threads to use for constructing the DMatrix.\n * \\param missing which value to represent missing value.\n * \\param cache_prefix path of cache file\n * \\return handle R externalptr holding the resulting DMatrix.\n */\nXGB_DLL SEXP XGDMatrixCreateFromCallback_R(\n  SEXP expr_f_next, SEXP expr_f_reset, SEXP calling_env, SEXP proxy_dmat,\n  SEXP n_threads, SEXP missing, SEXP cache_prefix);\n\n/*!\n * \\brief Create a QuantileDMatrix from a DataIter with callbacks\n * \\param expr_f_next expression for function(env, proxy_dmat) that sets the data on the proxy\n * dmatrix and returns either zero (end of batch) or one (batch continues).\n * \\param expr_f_reset expression for function(env) that resets the data iterator to\n * the beginning (first batch).\n * \\param calling_env R environment where to evaluate the expressions above\n * \\param proxy_dmat R externalptr holding a ProxyDMatrix.\n * \\param n_threads number of parallel threads to use for constructing the QuantileDMatrix.\n * \\param missing which value to represent missing value.\n * \\param max_bin maximum number of bins to have in the resulting QuantileDMatrix.\n * \\param ref_dmat an optional reference DMatrix from which to get the bin boundaries.\n * \\return handle R externalptr holding the resulting QuantileDMatrix.\n */\nXGB_DLL SEXP XGQuantileDMatrixCreateFromCallback_R(\n  SEXP expr_f_next, SEXP expr_f_reset, SEXP calling_env, SEXP proxy_dmat,\n  SEXP n_threads, SEXP missing, SEXP max_bin, SEXP ref_dmat);\n\n/*!\n * \\brief Frees a ProxyDMatrix and empties out the R externalptr object that holds it\n * \\param proxy_dmat R externalptr containing a ProxyDMatrix\n * \\return NULL\n */\nXGB_DLL SEXP XGDMatrixFree_R(SEXP proxy_dmat);\n\n/*!\n * \\brief Get the value that represents missingness in R integers as a numeric non-missing value.\n */\nXGB_DLL SEXP XGGetRNAIntAsDouble();\n\n/*!\n * \\brief Call R C-level function 'duplicate'\n * \\param obj Object to duplicate\n */\nXGB_DLL SEXP XGDuplicate_R(SEXP obj);\n\n/*!\n * \\brief Equality comparison for two pointers\n * \\param obj1 R 'externalptr'\n * \\param obj2 R 'externalptr'\n */\nXGB_DLL SEXP XGPointerEqComparison_R(SEXP obj1, SEXP obj2);\n\n/*!\n * \\brief Register the Altrep class used for the booster\n * \\param dll DLL info as provided by R_init\n */\nXGB_DLL void XGBInitializeAltrepClass_R(DllInfo *dll);\n\n/*!\n * \\brief return the quantile cuts used for the histogram method\n * \\param handle an instance of data matrix\n * \\return A list with entries 'indptr' and 'data'\n */\nXGB_DLL SEXP XGDMatrixGetQuantileCut_R(SEXP handle);\n\n/*!\n * \\brief get the number of non-missing entries in a dmatrix\n * \\param handle an instance of data matrix\n * \\return the number of non-missing entries\n */\nXGB_DLL SEXP XGDMatrixNumNonMissing_R(SEXP handle);\n\n/*!\n * \\brief get the data in a dmatrix in CSR format\n * \\param handle an instance of data matrix\n * \\return R list with the following entries in this order:\n * - 'indptr\n * - 'indices\n * - 'data'\n * - 'ncol'\n */\nXGB_DLL SEXP XGDMatrixGetDataAsCSR_R(SEXP handle);\n\n/*!\n * \\brief create xgboost learner\n * \\param dmats a list of dmatrix handles that will be cached\n */\nXGB_DLL SEXP XGBoosterCreate_R(SEXP dmats);\n\n/*!\n * \\brief copy information about features from a DMatrix into a Booster\n * \\param booster R 'externalptr' pointing to a booster object\n * \\param dmat R 'externalptr' pointing to a DMatrix object\n */\nXGB_DLL SEXP XGBoosterCopyInfoFromDMatrix_R(SEXP booster, SEXP dmat);\n\n/*!\n * \\brief handle R 'externalptr' holding the booster object\n * \\param field field name\n * \\param features features to set for the field\n */\nXGB_DLL SEXP XGBoosterSetStrFeatureInfo_R(SEXP handle, SEXP field, SEXP features);\n\n/*!\n * \\brief handle R 'externalptr' holding the booster object\n * \\param field field name\n */\nXGB_DLL SEXP XGBoosterGetStrFeatureInfo_R(SEXP handle, SEXP field);\n\n/*!\n * \\brief Get the number of boosted rounds from a model\n * \\param handle R 'externalptr' holding the booster object\n */\nXGB_DLL SEXP XGBoosterBoostedRounds_R(SEXP handle);\n\n/*!\n * \\brief Get the number of features to which the model was fitted\n * \\param handle R 'externalptr' holding the booster object\n */\nXGB_DLL SEXP XGBoosterGetNumFeature_R(SEXP handle);\n\n/*!\n * \\brief set parameters\n * \\param handle handle\n * \\param name  parameter name\n * \\param val value of parameter\n * \\return R_NilValue\n */\nXGB_DLL SEXP XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val);\n\n/*!\n * \\brief update the model in one round using dtrain\n * \\param handle handle\n * \\param iter current iteration rounds\n * \\param dtrain training data\n * \\return R_NilValue\n */\nXGB_DLL SEXP XGBoosterUpdateOneIter_R(SEXP ext, SEXP iter, SEXP dtrain);\n\n/*!\n * \\brief update the model, by directly specify gradient and second order gradient,\n *        this can be used to replace UpdateOneIter, to support customized loss function\n * \\param handle handle\n * \\param iter The current training iteration.\n * \\param dtrain training data\n * \\param grad gradient statistics\n * \\param hess second order gradient statistics\n * \\return R_NilValue\n */\nXGB_DLL SEXP XGBoosterTrainOneIter_R(SEXP handle, SEXP dtrain, SEXP iter, SEXP grad, SEXP hess);\n\n/*!\n * \\brief get evaluation statistics for xgboost\n * \\param handle handle\n * \\param iter current iteration rounds\n * \\param dmats list of handles to dmatrices\n * \\param evname name of evaluation\n * \\return the string containing evaluation stats\n */\nXGB_DLL SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames);\n\n/*!\n * \\brief Run prediction on DMatrix, replacing `XGBoosterPredict_R`\n * \\param handle handle\n * \\param dmat data matrix\n * \\param json_config See `XGBoosterPredictFromDMatrix` in xgboost c_api.h\n *\n * \\return A list containing 2 vectors, first one for shape while second one for prediction result.\n */\nXGB_DLL SEXP XGBoosterPredictFromDMatrix_R(SEXP handle, SEXP dmat, SEXP json_config);\n\n/*!\n * \\brief Run prediction on R dense matrix\n * \\param handle handle\n * \\param R_mat R matrix\n * \\param missing missing value\n * \\param json_config See `XGBoosterPredictFromDense` in xgboost c_api.h. Doesn't include 'missing'\n * \\param base_margin base margin for the prediction\n *\n * \\return A list containing 2 vectors, first one for shape while second one for prediction result.\n */\nXGB_DLL SEXP XGBoosterPredictFromDense_R(SEXP handle, SEXP R_mat, SEXP missing,\n                                         SEXP json_config, SEXP base_margin);\n\n/*!\n * \\brief Run prediction on R CSR matrix\n * \\param handle handle\n * \\param lst An R list, containing, in this order:\n *              (a) 'p' array (a.k.a. indptr)\n *              (b) 'j' array (a.k.a. indices)\n *              (c) 'x' array (a.k.a. data / values)\n *              (d) number of columns\n * \\param missing missing value\n * \\param json_config See `XGBoosterPredictFromCSR` in xgboost c_api.h. Doesn't include 'missing'\n * \\param base_margin base margin for the prediction\n *\n * \\return A list containing 2 vectors, first one for shape while second one for prediction result.\n */\nXGB_DLL SEXP XGBoosterPredictFromCSR_R(SEXP handle, SEXP lst, SEXP missing,\n                                       SEXP json_config, SEXP base_margin);\n\n/*!\n * \\brief Run prediction on R data.frame\n * \\param handle handle\n * \\param R_df R data.frame\n * \\param missing missing value\n * \\param json_config See `XGBoosterPredictFromDense` in xgboost c_api.h. Doesn't include 'missing'\n * \\param base_margin base margin for the prediction\n *\n * \\return A list containing 2 vectors, first one for shape while second one for prediction result.\n */\nXGB_DLL SEXP XGBoosterPredictFromColumnar_R(SEXP handle, SEXP R_df, SEXP missing,\n                                            SEXP json_config, SEXP base_margin);\n\n/*!\n * \\brief load model from existing file\n * \\param handle handle\n * \\param fname file name\n * \\return R_NilValue\n */\nXGB_DLL SEXP XGBoosterLoadModel_R(SEXP handle, SEXP fname);\n\n/*!\n * \\brief save model into existing file\n * \\param handle handle\n * \\param fname file name\n * \\return R_NilValue\n */\nXGB_DLL SEXP XGBoosterSaveModel_R(SEXP handle, SEXP fname);\n\n/*!\n * \\brief load model from raw array\n * \\param handle handle\n * \\return R_NilValue\n */\nXGB_DLL SEXP XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw);\n\n/*!\n * \\brief Save model into R's raw array\n *\n * \\param handle handle\n * \\param json_config JSON encoded string storing parameters for the function.  Following\n *                    keys are expected in the JSON document:\n *\n *     \"format\": str\n *       - json: Output booster will be encoded as JSON.\n *       - ubj:  Output booster will be encoded as Univeral binary JSON.\n *       - deprecated: Output booster will be encoded as old custom binary format.  Do now use\n *         this format except for compatibility reasons.\n *\n * \\return Raw array\n */\nXGB_DLL SEXP XGBoosterSaveModelToRaw_R(SEXP handle, SEXP json_config);\n\n/*!\n * \\brief Save internal parameters as a JSON string\n * \\param handle handle\n * \\return JSON string\n */\n\nXGB_DLL SEXP XGBoosterSaveJsonConfig_R(SEXP handle);\n/*!\n * \\brief Load the JSON string returnd by XGBoosterSaveJsonConfig_R\n * \\param handle handle\n * \\param value JSON string\n * \\return R_NilValue\n */\nXGB_DLL SEXP XGBoosterLoadJsonConfig_R(SEXP handle, SEXP value);\n\n/*!\n  * \\brief Memory snapshot based serialization method.  Saves everything states\n  *        into buffer.\n  * \\param handle handle to booster\n  */\nXGB_DLL SEXP XGBoosterSerializeToBuffer_R(SEXP handle);\n\n/*!\n * \\brief Memory snapshot based serialization method.  Loads the buffer returned\n *        from `XGBoosterSerializeToBuffer'.\n * \\param handle handle to booster\n * \\return raw byte array\n */\nXGB_DLL SEXP XGBoosterUnserializeFromBuffer_R(SEXP handle, SEXP raw);\n\n/*!\n * \\brief dump model into a string\n * \\param handle handle\n * \\param fmap  name to fmap can be empty string\n * \\param with_stats whether dump statistics of splits\n * \\param dump_format the format to dump the model in\n */\nXGB_DLL SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats, SEXP dump_format);\n\n/*!\n * \\brief get learner attribute value\n * \\param handle handle\n * \\param name  attribute name\n * \\return character containing attribute value\n */\nXGB_DLL SEXP XGBoosterGetAttr_R(SEXP handle, SEXP name);\n\n/*!\n * \\brief set learner attribute value\n * \\param handle handle\n * \\param name  attribute name\n * \\param val attribute value; NULL value would delete an attribute\n * \\return R_NilValue\n */\nXGB_DLL SEXP XGBoosterSetAttr_R(SEXP handle, SEXP name, SEXP val);\n\n/*!\n * \\brief get the names of learner attributes\n * \\return string vector containing attribute names\n */\nXGB_DLL SEXP XGBoosterGetAttrNames_R(SEXP handle);\n\n/*!\n * \\brief Get feature scores from the model.\n * \\param json_config See `XGBoosterFeatureScore` in xgboost c_api.h\n * \\return A vector with the first element as feature names, second element as shape of\n *         feature scores and thrid element as feature scores.\n */\nXGB_DLL SEXP XGBoosterFeatureScore_R(SEXP handle, SEXP json_config);\n\n/*!\n * \\brief Slice a fitted booster model (by rounds)\n * \\param handle handle to the fitted booster\n * \\param begin_layer start of the slice\n * \\param end_later end of the slice; end_layer=0 is equivalent to end_layer=num_boost_round\n * \\param step step size of the slice\n * \\return The sliced booster with the requested rounds only\n */\nXGB_DLL SEXP XGBoosterSlice_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEXP step);\n\n/*!\n * \\brief Slice a fitted booster model (by rounds), and replace its handle with the result\n * \\param handle handle to the fitted booster\n * \\param begin_layer start of the slice\n * \\param end_later end of the slice; end_layer=0 is equivalent to end_layer=num_boost_round\n * \\param step step size of the slice\n * \\return NULL\n */\nXGB_DLL SEXP XGBoosterSliceAndReplace_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEXP step);\n\n#endif  // XGBOOST_WRAPPER_R_H_ // NOLINT(*)\n"
  },
  {
    "path": "R-package/src/xgboost_custom.cc",
    "content": "// Copyright (c) 2015 by Contributors\n// This file contains the customization implementations of R module\n// to change behavior of libxgboost\n\n#include <xgboost/logging.h>\n#include \"../../src/common/random.h\"\n#include \"./xgboost_R.h\"\n\n// redirect the messages to R's console.\nnamespace dmlc {\nvoid CustomLogMessage::Log(const std::string& msg) {\n  Rprintf(\"%s\\n\", msg.c_str());\n}\n}  // namespace dmlc\n\nnamespace xgboost {\nConsoleLogger::~ConsoleLogger() {\n  if (cur_verbosity_ == LogVerbosity::kIgnore ||\n      cur_verbosity_ <= GlobalVerbosity()) {\n    if (cur_verbosity_ == LogVerbosity::kWarning) {\n      REprintf(\"%s\\n\", log_stream_.str().c_str());\n    } else {\n      dmlc::CustomLogMessage::Log(log_stream_.str());\n    }\n  }\n}\nTrackerLogger::~TrackerLogger() {\n  dmlc::CustomLogMessage::Log(log_stream_.str());\n}\n}  // namespace xgboost\n\nnamespace xgboost {\nnamespace common {\n\n// redirect the nath functions.\nbool CheckNAN(double v) {\n  return ISNAN(v);\n}\n#if !defined(XGBOOST_USE_CUDA)\ndouble LogGamma(double v) {\n  return lgammafn(v);\n}\n#endif  // !defined(XGBOOST_USE_CUDA)\n\n}  // namespace common\n}  // namespace xgboost\n"
  },
  {
    "path": "R-package/tests/helper_scripts/generate_models.R",
    "content": "# Script to generate reference models. The reference models are used to test backward compatibility\n# of saved model files from XGBoost version 0.90 and 1.0.x.\nlibrary(xgboost)\nlibrary(Matrix)\n\nset.seed(0)\nmetadata <- list(\n  kRounds = 2,\n  kRows = 1000,\n  kCols = 4,\n  kForests = 2,\n  kMaxDepth = 2,\n  kClasses = 3\n)\nX <- Matrix(data = rnorm(metadata$kRows * metadata$kCols), nrow = metadata$kRows,\n            ncol = metadata$kCols, sparse = TRUE)\nw <- runif(metadata$kRows)\n\nversion <- packageVersion('xgboost')\ntarget_dir <- 'models'\n\nsave_booster <- function(booster, model_name) {\n  booster_bin <- function(model_name) {\n    return(file.path(target_dir, paste('xgboost-', version, '.', model_name, '.bin', sep = '')))\n  }\n  booster_json <- function(model_name) {\n    return(file.path(target_dir, paste('xgboost-', version, '.', model_name, '.json', sep = '')))\n  }\n  booster_rds <- function(model_name) {\n    return(file.path(target_dir, paste('xgboost-', version, '.', model_name, '.rds', sep = '')))\n  }\n  xgb.save(booster, booster_bin(model_name))\n  saveRDS(booster, booster_rds(model_name))\n  if (version >= '1.0.0') {\n    xgb.save(booster, booster_json(model_name))\n  }\n}\n\ngenerate_regression_model <- function() {\n  print('Regression')\n  y <- rnorm(metadata$kRows)\n\n  data <- xgb.DMatrix(X, label = y, nthread = 1)\n  params <- list(tree_method = 'hist', num_parallel_tree = metadata$kForests,\n                 max_depth = metadata$kMaxDepth)\n  booster <- xgb.train(params, data, nrounds = metadata$kRounds)\n  save_booster(booster, 'reg')\n}\n\ngenerate_logistic_model <- function() {\n  print('Binary classification with logistic loss')\n  y <- sample(0:1, size = metadata$kRows, replace = TRUE)\n  stopifnot(max(y) == 1, min(y) == 0)\n\n  objective <- c('binary:logistic', 'binary:logitraw')\n  name <- c('logit', 'logitraw')\n\n  for (i in seq_along(objective)) {\n    data <- xgb.DMatrix(X, label = y, weight = w, nthread = 1)\n    params <- list(tree_method = 'hist', num_parallel_tree = metadata$kForests,\n                   max_depth = metadata$kMaxDepth, objective = objective[i])\n    booster <- xgb.train(params, data, nrounds = metadata$kRounds)\n    save_booster(booster, name[i])\n  }\n}\n\ngenerate_classification_model <- function() {\n  print('Multi-class classification')\n  y <- sample(0:(metadata$kClasses - 1), size = metadata$kRows, replace = TRUE)\n  stopifnot(max(y) == metadata$kClasses - 1, min(y) == 0)\n\n  data <- xgb.DMatrix(X, label = y, weight = w, nthread = 1)\n  params <- list(num_class = metadata$kClasses, tree_method = 'hist',\n                 num_parallel_tree = metadata$kForests, max_depth = metadata$kMaxDepth,\n                 objective = 'multi:softmax')\n  booster <- xgb.train(params, data, nrounds = metadata$kRounds)\n  save_booster(booster, 'cls')\n}\n\ngenerate_ranking_model <- function() {\n  print('Learning to rank')\n  y <- sample(0:4, size = metadata$kRows, replace = TRUE)\n  stopifnot(max(y) == 4, min(y) == 0)\n  kGroups <- 20\n  w <- runif(kGroups)\n  g <- rep(50, times = kGroups)\n\n  data <- xgb.DMatrix(X, label = y, group = g, nthread = 1)\n  # setinfo(data, 'weight', w)\n  # ^^^ does not work in version <= 1.1.0; see https://github.com/dmlc/xgboost/issues/5942\n  # So call low-level function XGDMatrixSetInfo_R directly. Since this function is not an exported\n  # symbol, use the triple-colon operator.\n  .Call(xgboost:::XGDMatrixSetInfo_R, data, 'weight', as.numeric(w))\n  params <- list(objective = 'rank:ndcg', num_parallel_tree = metadata$kForests,\n                 tree_method = 'hist', max_depth = metadata$kMaxDepth)\n  booster <- xgb.train(params, data, nrounds = metadata$kRounds)\n  save_booster(booster, 'ltr')\n}\n\ndir.create(target_dir)\n\ninvisible(generate_regression_model())\ninvisible(generate_logistic_model())\ninvisible(generate_classification_model())\ninvisible(generate_ranking_model())\n"
  },
  {
    "path": "R-package/tests/helper_scripts/install_deps.R",
    "content": "## Install dependencies of R package for testing. The list might not be\n## up-to-date, check DESCRIPTION for the latest list and update this one if\n## inconsistent is found.\npkgs <- c(\n  ## CI\n  \"pkgbuild\",\n  \"roxygen2\",\n  \"XML\",\n  \"cplm\",\n  \"e1071\",\n  ## suggests\n  \"knitr\",\n  \"rmarkdown\",\n  \"ggplot2\",\n  \"DiagrammeR\",\n  \"DiagrammeRsvg\",\n  \"rsvg\",\n  \"htmlwidgets\",\n  \"Ckmeans.1d.dp\",\n  \"vcd\",\n  \"lintr\",\n  \"testthat\",\n  \"igraph\",\n  \"float\",\n  \"titanic\",\n  \"RhpcBLASctl\",\n  ## imports\n  \"Matrix\",\n  \"data.table\",\n  \"jsonlite\"\n)\n\nncpus <- parallel::detectCores()\nprint(paste0(\"Using \", ncpus, \" cores to install dependencies.\"))\n\nif (.Platform$OS.type == \"unix\") {\n  print(\"Installing source packages on unix.\")\n  install.packages(\n    pkgs,\n    repo = \"https://cloud.r-project.org\",\n    dependencies = c(\"Depends\", \"Imports\", \"LinkingTo\"),\n    Ncpus = parallel::detectCores()\n  )\n} else {\n  print(\"Installing binary packages on Windows.\")\n  install.packages(\n    pkgs,\n    repo = \"https://cloud.r-project.org\",\n    dependencies = c(\"Depends\", \"Imports\", \"LinkingTo\"),\n    Ncpus = parallel::detectCores(),\n    type = \"binary\"\n  )\n}\n"
  },
  {
    "path": "R-package/tests/helper_scripts/run-examples.R",
    "content": "## Helper script for running individual examples.\nlibrary(pkgload)\nlibrary(xgboost)\n\nfiles <- list.files(\"./man\")\n\n\nrun_example_timeit <- function(f) {\n  path <- paste(\"./man/\", f, sep = \"\")\n  print(paste(\"Test\", f))\n  flush.console()\n  t0 <- proc.time()\n  run_example(path)\n  t1 <- proc.time()\n  list(file = f, time = t1 - t0)\n}\n\ntimings <- lapply(files, run_example_timeit)\n\nfor (t in timings) {\n  ratio <- t$time[1] / t$time[3]\n  if (!is.na(ratio) && !is.infinite(ratio) && ratio >= 2.5) {\n    print(paste(\"Offending example:\", t$file, ratio))\n  }\n}\n"
  },
  {
    "path": "R-package/tests/testthat/helper_model.R",
    "content": "## A special file sourced by testthat.\n\nget_basescore <- function(model) {\n  as.numeric(\n    jsonlite::fromJSON(model$learner$learner_model_param$base_score)\n  )\n}\n"
  },
  {
    "path": "R-package/tests/testthat/test_basic.R",
    "content": "context(\"basic functions\")\n\ndata(agaricus.train, package = \"xgboost\")\ndata(agaricus.test, package = \"xgboost\")\ntrain <- agaricus.train\ntest <- agaricus.test\nset.seed(1994)\n\n# disable some tests for Win32\nwindows_flag <- .Platform$OS.type == \"windows\" &&\n  .Machine$sizeof.pointer != 8\nsolaris_flag <- (Sys.info()[\"sysname\"] == \"SunOS\")\nn_threads <- 1\n\n\ntest_that(\"train and predict binary classification\", {\n  nrounds <- 2\n  expect_output(\n    bst <- xgb.train(\n      data = xgb.DMatrix(train$data, label = train$label, nthread = 1),\n      nrounds = nrounds,\n      params = xgb.params(\n        max_depth = 2,\n        learning_rate = 1,\n        nthread = n_threads,\n        objective = \"binary:logistic\",\n        eval_metric = \"error\"\n      ),\n      evals = list(train = xgb.DMatrix(train$data, label = train$label, nthread = 1))\n    ),\n    \"train-error\"\n  )\n  expect_equal(class(bst), \"xgb.Booster\")\n  expect_equal(xgb.get.num.boosted.rounds(bst), nrounds)\n  expect_false(is.null(attributes(bst)$evaluation_log))\n  expect_equal(nrow(attributes(bst)$evaluation_log), nrounds)\n  expect_lt(attributes(bst)$evaluation_log[, min(train_error)], 0.03)\n\n  pred <- predict(bst, test$data)\n  expect_length(pred, 1611)\n\n  pred1 <- predict(bst, train$data, iterationrange = c(1, 1))\n  expect_length(pred1, 6513)\n  err_pred1 <- sum((pred1 > 0.5) != train$label) / length(train$label)\n  err_log <- attributes(bst)$evaluation_log[1, train_error]\n  expect_lt(abs(err_pred1 - err_log), 10e-6)\n})\n\ntest_that(\"parameter validation works\", {\n  p <- list(foo = \"bar\")\n  nrounds <- 1\n  set.seed(1994)\n\n  d <- cbind(\n    x1 = rnorm(10),\n    x2 = rnorm(10),\n    x3 = rnorm(10)\n  )\n  y <- d[, \"x1\"] + d[, \"x2\"]^2 +\n    ifelse(d[, \"x3\"] > .5, d[, \"x3\"]^2, 2^d[, \"x3\"]) +\n    rnorm(10)\n  dtrain <- xgb.DMatrix(data = d, label = y, nthread = n_threads)\n\n  correct <- function() {\n    params <- list(\n      max_depth = 2,\n      booster = \"dart\",\n      rate_drop = 0.5,\n      one_drop = TRUE,\n      nthread = n_threads,\n      objective = \"reg:squarederror\"\n    )\n    xgb.train(params = params, data = dtrain, nrounds = nrounds)\n  }\n  expect_silent(correct())\n  incorrect <- function() {\n    params <- list(\n      max_depth = 2,\n      booster = \"dart\",\n      rate_drop = 0.5,\n      one_drop = TRUE,\n      objective = \"reg:squarederror\",\n      nthread = n_threads,\n      foo = \"bar\",\n      bar = \"foo\"\n    )\n    output <- capture.output(\n      xgb.train(params = params, data = dtrain, nrounds = nrounds),\n      type = \"message\"\n    )\n    print(output)\n  }\n  expect_output(incorrect(), '\\\\\\\\\"bar\\\\\\\\\", \\\\\\\\\"foo\\\\\\\\\"')\n})\n\n\ntest_that(\"dart prediction works\", {\n  nrounds <- 32\n  set.seed(1994)\n\n  d <- cbind(\n    x1 = rnorm(100),\n    x2 = rnorm(100),\n    x3 = rnorm(100)\n  )\n  y <- d[, \"x1\"] + d[, \"x2\"]^2 +\n    ifelse(d[, \"x3\"] > .5, d[, \"x3\"]^2, 2^d[, \"x3\"]) +\n    rnorm(100)\n\n  set.seed(1994)\n  booster_by_xgboost <- xgb.train(\n    data = xgb.DMatrix(d, label = y, nthread = 1),\n    nrounds = nrounds,\n    params = xgb.params(\n      max_depth = 2,\n      booster = \"dart\",\n      rate_drop = 0.5,\n      one_drop = TRUE,\n      learning_rate = 1,\n      nthread = n_threads,\n      objective = \"reg:squarederror\"\n    )\n  )\n  pred_by_xgboost_0 <- predict(booster_by_xgboost, newdata = d, iterationrange = NULL)\n  pred_by_xgboost_1 <- predict(booster_by_xgboost, newdata = d, iterationrange = c(1, nrounds))\n  expect_true(all(matrix(pred_by_xgboost_0, byrow = TRUE) == matrix(pred_by_xgboost_1, byrow = TRUE)))\n\n  pred_by_xgboost_2 <- predict(booster_by_xgboost, newdata = d, training = TRUE)\n  expect_false(all(matrix(pred_by_xgboost_0, byrow = TRUE) == matrix(pred_by_xgboost_2, byrow = TRUE)))\n\n  set.seed(1994)\n  dtrain <- xgb.DMatrix(data = d, label = y, nthread = n_threads)\n  booster_by_train <- xgb.train(\n    params = xgb.params(\n      booster = \"dart\",\n      max_depth = 2,\n      learning_rate = 1,\n      rate_drop = 0.5,\n      one_drop = TRUE,\n      nthread = n_threads,\n      objective = \"reg:squarederror\"\n    ),\n    data = dtrain,\n    nrounds = nrounds\n  )\n  pred_by_train_0 <- predict(booster_by_train, newdata = dtrain, iterationrange = NULL)\n  pred_by_train_1 <- predict(booster_by_train, newdata = dtrain, iterationrange = c(1, nrounds))\n  pred_by_train_2 <- predict(booster_by_train, newdata = dtrain, training = TRUE)\n\n  expect_equal(pred_by_train_0, pred_by_xgboost_0, tolerance = 1e-6)\n  expect_equal(pred_by_train_1, pred_by_xgboost_1, tolerance = 1e-6)\n  expect_true(all(matrix(pred_by_train_2, byrow = TRUE) == matrix(pred_by_xgboost_2, byrow = TRUE)))\n})\n\ntest_that(\"train and predict softprob\", {\n  lb <- as.numeric(iris$Species) - 1\n  set.seed(11)\n  expect_output(\n    bst <- xgb.train(\n      data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb, nthread = 1),\n      nrounds = 5,\n      params = xgb.params(\n        max_depth = 3, learning_rate = 0.5, nthread = n_threads,\n        objective = \"multi:softprob\", num_class = 3, eval_metric = \"merror\"\n      ),\n      evals = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb, nthread = 1))\n    ),\n    \"train-merror\"\n  )\n  expect_false(is.null(attributes(bst)$evaluation_log))\n  expect_lt(attributes(bst)$evaluation_log[, min(train_merror)], 0.025)\n  expect_equal(xgb.get.num.boosted.rounds(bst), 5)\n  pred <- predict(bst, as.matrix(iris[, -5]))\n  expect_length(pred, nrow(iris) * 3)\n  # row sums add up to total probability of 1:\n  expect_equal(rowSums(pred), rep(1, nrow(iris)), tolerance = 1e-7)\n  # manually calculate error at the last iteration:\n  mpred <- predict(bst, as.matrix(iris[, -5]))\n  expect_equal(mpred, pred)\n  pred_labels <- max.col(mpred) - 1\n  err <- sum(pred_labels != lb) / length(lb)\n  expect_equal(attributes(bst)$evaluation_log[5, train_merror], err, tolerance = 5e-6)\n  # manually calculate error at the 1st iteration:\n  mpred <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 1))\n  pred_labels <- max.col(mpred) - 1\n  err <- sum(pred_labels != lb) / length(lb)\n  expect_equal(attributes(bst)$evaluation_log[1, train_merror], err, tolerance = 5e-6)\n\n  mpred1 <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 1))\n  expect_equal(mpred, mpred1)\n\n  d <- cbind(\n    x1 = rnorm(100),\n    x2 = rnorm(100),\n    x3 = rnorm(100)\n  )\n  y <- sample.int(10, 100, replace = TRUE) - 1\n  dtrain <- xgb.DMatrix(data = d, label = y, nthread = n_threads)\n  booster <- xgb.train(\n    params = xgb.params(\n      objective = \"multi:softprob\",\n      num_class = 10,\n      tree_method = \"hist\",\n      nthread = n_threads\n    ),\n    data = dtrain,\n    nrounds = 4\n  )\n  predt <- predict(booster, as.matrix(d), strict_shape = FALSE)\n  expect_equal(ncol(predt), 10)\n  expect_equal(rowSums(predt), rep(1, 100), tolerance = 1e-7)\n})\n\ntest_that(\"train and predict softmax\", {\n  lb <- as.numeric(iris$Species) - 1\n  set.seed(11)\n  expect_output(\n    bst <- xgb.train(\n      data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb, nthread = 1),\n      nrounds = 5,\n      params = xgb.params(\n        max_depth = 3, learning_rate = 0.5, nthread = n_threads,\n        objective = \"multi:softmax\", num_class = 3, eval_metric = \"merror\"\n      ),\n      evals = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb, nthread = 1))\n    ),\n    \"train-merror\"\n  )\n  expect_false(is.null(attributes(bst)$evaluation_log))\n  expect_lt(attributes(bst)$evaluation_log[, min(train_merror)], 0.025)\n  expect_equal(xgb.get.num.boosted.rounds(bst), 5)\n\n  pred <- predict(bst, as.matrix(iris[, -5]))\n  expect_length(pred, nrow(iris))\n  err <- sum(pred != lb) / length(lb)\n  expect_equal(attributes(bst)$evaluation_log[5, train_merror], err, tolerance = 5e-6)\n})\n\ntest_that(\"train and predict RF\", {\n  set.seed(11)\n  lb <- train$label\n  # single iteration\n  bst <- xgb.train(\n    data = xgb.DMatrix(train$data, label = lb, nthread = 1),\n    nrounds = 1,\n    params = xgb.params(\n      max_depth = 5,\n      nthread = n_threads,\n      objective = \"binary:logistic\", eval_metric = \"error\",\n      num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1\n    ),\n    evals = list(train = xgb.DMatrix(train$data, label = lb, nthread = 1)),\n    verbose = 0\n  )\n  expect_equal(xgb.get.num.boosted.rounds(bst), 1)\n\n  pred <- predict(bst, train$data)\n  pred_err <- sum((pred > 0.5) != lb) / length(lb)\n  expect_lt(abs(attributes(bst)$evaluation_log[1, train_error] - pred_err), 10e-6)\n  # expect_lt(pred_err, 0.03)\n\n  pred <- predict(bst, train$data, iterationrange = c(1, 1))\n  pred_err_20 <- sum((pred > 0.5) != lb) / length(lb)\n  expect_equal(pred_err_20, pred_err)\n})\n\ntest_that(\"train and predict RF with softprob\", {\n  lb <- as.numeric(iris$Species) - 1\n  nrounds <- 15\n  set.seed(11)\n  bst <- xgb.train(\n    data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb, nthread = 1),\n    nrounds = nrounds,\n    verbose = 0,\n    params = xgb.params(\n      max_depth = 3,\n      learning_rate = 0.9,\n      nthread = n_threads,\n      objective = \"multi:softprob\",\n      eval_metric = \"merror\",\n      num_class = 3,\n      num_parallel_tree = 4,\n      subsample = 0.5,\n      colsample_bytree = 0.5\n    ),\n    evals = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb, nthread = 1))\n  )\n  expect_equal(xgb.get.num.boosted.rounds(bst), 15)\n  # predict for all iterations:\n  pred <- predict(bst, as.matrix(iris[, -5]))\n  expect_equal(dim(pred), c(nrow(iris), 3))\n  pred_labels <- max.col(pred) - 1\n  err <- sum(pred_labels != lb) / length(lb)\n  expect_equal(attributes(bst)$evaluation_log[nrounds, train_merror], err, tolerance = 5e-6)\n  # predict for 7 iterations and adjust for 4 parallel trees per iteration\n  pred <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 7))\n  err <- sum((max.col(pred) - 1) != lb) / length(lb)\n  expect_equal(attributes(bst)$evaluation_log[7, train_merror], err, tolerance = 5e-6)\n})\n\ntest_that(\"use of multiple eval metrics works\", {\n  expect_output(\n    bst <- xgb.train(\n      data = xgb.DMatrix(train$data, label = train$label, nthread = 1),\n      nrounds = 2,\n      params = list(\n        max_depth = 2,\n        learning_rate = 1, nthread = n_threads, objective = \"binary:logistic\",\n        eval_metric = \"error\", eval_metric = \"auc\", eval_metric = \"logloss\"\n      ),\n      evals = list(train = xgb.DMatrix(train$data, label = train$label, nthread = 1))\n    ),\n    \"train-error.*train-auc.*train-logloss\"\n  )\n  expect_false(is.null(attributes(bst)$evaluation_log))\n  expect_equal(dim(attributes(bst)$evaluation_log), c(2, 4))\n  expect_equal(colnames(attributes(bst)$evaluation_log), c(\"iter\", \"train_error\", \"train_auc\", \"train_logloss\"))\n  expect_output(\n    bst2 <- xgb.train(\n      data = xgb.DMatrix(train$data, label = train$label, nthread = 1),\n      nrounds = 2,\n      params = xgb.params(\n        max_depth = 2,\n        learning_rate = 1,\n        nthread = n_threads,\n        objective = \"binary:logistic\",\n        eval_metric = list(\"error\", \"auc\", \"logloss\")\n      ),\n      evals = list(train = xgb.DMatrix(train$data, label = train$label, nthread = 1))\n    ),\n    \"train-error.*train-auc.*train-logloss\"\n  )\n  expect_false(is.null(attributes(bst2)$evaluation_log))\n  expect_equal(dim(attributes(bst2)$evaluation_log), c(2, 4))\n  expect_equal(colnames(attributes(bst2)$evaluation_log), c(\"iter\", \"train_error\", \"train_auc\", \"train_logloss\"))\n})\n\n\ntest_that(\"training continuation works\", {\n  dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads)\n  evals <- list(train = dtrain)\n  params <- xgb.params(\n    objective = \"binary:logistic\", max_depth = 2, learning_rate = 1, nthread = n_threads\n  )\n\n  # for the reference, use 4 iterations at once:\n  set.seed(11)\n  bst <- xgb.train(params, dtrain, nrounds = 4, evals = evals, verbose = 0)\n  # first two iterations:\n  set.seed(11)\n  bst1 <- xgb.train(params, dtrain, nrounds = 2, evals = evals, verbose = 0)\n  # continue for two more:\n  bst2 <- xgb.train(params, dtrain, nrounds = 2, evals = evals, verbose = 0, xgb_model = bst1)\n  if (!windows_flag && !solaris_flag) {\n    expect_equal(xgb.save.raw(bst), xgb.save.raw(bst2))\n  }\n  expect_false(is.null(attributes(bst2)$evaluation_log))\n  expect_equal(dim(attributes(bst2)$evaluation_log), c(4, 2))\n  expect_equal(attributes(bst2)$evaluation_log, attributes(bst)$evaluation_log)\n  # test continuing from raw model data\n  bst2 <- xgb.train(params, dtrain, nrounds = 2, evals = evals, verbose = 0, xgb_model = xgb.save.raw(bst1))\n  if (!windows_flag && !solaris_flag) {\n    expect_equal(xgb.save.raw(bst), xgb.save.raw(bst2))\n  }\n  expect_equal(dim(attributes(bst2)$evaluation_log), c(2, 2))\n  # test continuing from a model in file\n  fname <- file.path(tempdir(), \"xgboost.json\")\n  xgb.save(bst1, fname)\n  bst2 <- xgb.train(params, dtrain, nrounds = 2, evals = evals, verbose = 0, xgb_model = fname)\n  if (!windows_flag && !solaris_flag) {\n    expect_equal(xgb.save.raw(bst), xgb.save.raw(bst2))\n  }\n  expect_equal(dim(attributes(bst2)$evaluation_log), c(2, 2))\n})\n\ntest_that(\"xgb.cv works\", {\n  set.seed(11)\n  expect_output(\n    cv <- xgb.cv(\n      data = xgb.DMatrix(train$data, label = train$label, nthread = 1),\n      nfold = 5,\n      nrounds = 2,\n      params = xgb.params(\n        max_depth = 2,\n        learning_rate = 1.,\n        nthread = n_threads,\n        objective = \"binary:logistic\",\n        eval_metric = \"error\"\n      ),\n      verbose = TRUE\n    ),\n    \"train-error:\"\n  )\n  expect_is(cv, \"xgb.cv.synchronous\")\n  expect_false(is.null(cv$evaluation_log))\n  expect_lt(cv$evaluation_log[, min(test_error_mean)], 0.03)\n  expect_lt(cv$evaluation_log[, min(test_error_std)], 0.0085)\n  expect_equal(cv$niter, 2)\n  expect_false(is.null(cv$folds) && is.list(cv$folds))\n  expect_length(cv$folds, 5)\n  expect_false(is.null(cv$params) && is.list(cv$params))\n  expect_false(is.null(cv$call))\n})\n\ntest_that(\"xgb.cv invalid inputs\", {\n  data(\"mtcars\")\n  y <- mtcars$mpg\n  x_df <- mtcars[, -1]\n\n  expect_error(\n    cv <- xgb.cv(\n      data = xgb.QuantileDMatrix(x_df, label = y),\n      nfold = 5,\n      nrounds = 2,\n      params = xgb.params(\n        max_depth = 2,\n        nthread = n_threads\n      )\n    ),\n    regexp = \".*QuantileDMatrix.*\"\n  )\n  expect_error(\n    cv <- xgb.cv(\n      data = xgb.DMatrix(x_df, label = y),\n      nfold = 5,\n      nrounds = 2,\n      params = xgb.params(\n        max_depth = 2,\n        nthread = n_threads,\n      ),\n      callbacks = list(\n        xgb.cb.early.stop(stopping_rounds = 3, save_best = TRUE)\n      )\n    ),\n    regexp = \".*save_best.*\"\n  )\n})\n\ntest_that(\"xgb.cv works with stratified folds\", {\n  dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads)\n  set.seed(314159)\n  cv <- xgb.cv(\n    data = dtrain,\n    nrounds = 2,\n    nfold = 5,\n    params = xgb.params(\n      max_depth = 2,\n      nthread = n_threads,\n      objective = \"binary:logistic\"\n    ),\n    verbose = FALSE, stratified = FALSE\n  )\n  set.seed(314159)\n  cv2 <- xgb.cv(\n    data = dtrain,\n    nfold = 5,\n    nrounds = 2,\n    params = xgb.params(\n      max_depth = 2,\n      nthread = n_threads,\n      objective = \"binary:logistic\"\n    ),\n    verbose = FALSE, stratified = TRUE\n  )\n  # Stratified folds should result in a different evaluation logs\n  expect_true(all(cv$evaluation_log[, test_logloss_mean] != cv2$evaluation_log[, test_logloss_mean]))\n})\n\ntest_that(\"train and predict with non-strict classes\", {\n  # standard dense matrix input\n  train_dense <- as.matrix(train$data)\n  bst <- xgb.train(\n    data = xgb.DMatrix(train_dense, label = train$label, nthread = 1),\n    nrounds = 2,\n    params = xgb.params(\n      max_depth = 2,\n      nthread = n_threads,\n      objective = \"binary:logistic\"\n    ),\n    verbose = 0\n  )\n  pr0 <- predict(bst, train_dense)\n\n  # dense matrix-like input of non-matrix class\n  class(train_dense) <- \"shmatrix\"\n  expect_true(is.matrix(train_dense))\n  expect_error(\n    bst <- xgb.train(\n      data = xgb.DMatrix(train_dense, label = train$label, nthread = 1),\n      nrounds = 2,\n      params = xgb.params(\n        max_depth = 2,\n        nthread = n_threads,\n        objective = \"binary:logistic\"\n      ),\n      verbose = 0\n    ),\n    regexp = NA\n  )\n  expect_error(pr <- predict(bst, train_dense), regexp = NA)\n  expect_equal(pr0, pr)\n\n  # dense matrix-like input of non-matrix class with some inheritance\n  class(train_dense) <- c(\"pphmatrix\", \"shmatrix\")\n  expect_true(is.matrix(train_dense))\n  expect_error(\n    bst <- xgb.train(\n      data = xgb.DMatrix(train_dense, label = train$label, nthread = 1),\n      nrounds = 2,\n      params = xgb.params(\n        max_depth = 2,\n        nthread = n_threads,\n        objective = \"binary:logistic\"\n      ),\n      verbose = 0\n    ),\n    regexp = NA\n  )\n  expect_error(pr <- predict(bst, train_dense), regexp = NA)\n  expect_equal(pr0, pr)\n\n  # when someone inherits from xgb.Booster, it should still be possible to use it as xgb.Booster\n  class(bst) <- c(\"super.Booster\", \"xgb.Booster\")\n  expect_error(pr <- predict(bst, train_dense), regexp = NA)\n  expect_equal(pr0, pr)\n})\n\ntest_that(\"max_delta_step works\", {\n  dtrain <- xgb.DMatrix(\n    agaricus.train$data, label = agaricus.train$label, nthread = n_threads\n  )\n  evals <- list(train = dtrain)\n  params <- xgb.params(\n    objective = \"binary:logistic\", eval_metric = \"logloss\", max_depth = 2,\n    nthread = n_threads,\n    learning_rate = 0.5\n  )\n  nrounds <- 5\n  # model with no restriction on max_delta_step\n  bst1 <- xgb.train(params, dtrain, nrounds, evals = evals, verbose = 0)\n  # model with restricted max_delta_step\n  bst2 <- xgb.train(c(params, list(max_delta_step = 1)), dtrain, nrounds, evals = evals, verbose = 0)\n  # the no-restriction model is expected to have consistently lower loss during the initial iterations\n  expect_true(all(attributes(bst1)$evaluation_log$train_logloss < attributes(bst2)$evaluation_log$train_logloss))\n  expect_lt(mean(attributes(bst1)$evaluation_log$train_logloss) / mean(attributes(bst2)$evaluation_log$train_logloss), 0.8)\n})\n\ntest_that(\"colsample_bytree works\", {\n  # Randomly generate data matrix by sampling from uniform distribution [-1, 1]\n  set.seed(1)\n  train_x <- matrix(runif(1000, min = -1, max = 1), ncol = 100)\n  train_y <- as.numeric(rowSums(train_x) > 0)\n  test_x <- matrix(runif(1000, min = -1, max = 1), ncol = 100)\n  test_y <- as.numeric(rowSums(test_x) > 0)\n  colnames(train_x) <- paste0(\"Feature_\", sprintf(\"%03d\", 1:100))\n  colnames(test_x) <- paste0(\"Feature_\", sprintf(\"%03d\", 1:100))\n  dtrain <- xgb.DMatrix(train_x, label = train_y, nthread = n_threads)\n  dtest <- xgb.DMatrix(test_x, label = test_y, nthread = n_threads)\n  evals <- list(train = dtrain, eval = dtest)\n  ## Use colsample_bytree = 0.01, so that roughly one out of 100 features is chosen for\n  ## each tree\n  params <- xgb.params(\n    max_depth = 2, learning_rate = 0, nthread = n_threads,\n    colsample_bytree = 0.01, objective = \"binary:logistic\",\n    eval_metric = \"auc\"\n  )\n  set.seed(2)\n  bst <- xgb.train(params, dtrain, nrounds = 100, evals = evals, verbose = 0)\n  xgb.importance(model = bst)\n  # If colsample_bytree works properly, a variety of features should be used\n  # in the 100 trees\n  expect_gte(nrow(xgb.importance(model = bst)), 28)\n})\n\ntest_that(\"Configuration works\", {\n  bst <- xgb.train(\n    data = xgb.DMatrix(train$data, label = train$label, nthread = 1),\n    nrounds = 2,\n    params = xgb.params(\n      max_depth = 2,\n      nthread = n_threads,\n      objective = \"binary:logistic\"\n    )\n  )\n  config <- xgb.config(bst)\n  xgb.config(bst) <- config\n  reloaded_config <- xgb.config(bst)\n  expect_equal(config, reloaded_config)\n})\n\ntest_that(\"strict_shape works\", {\n  n_rounds <- 2\n\n  test_strict_shape <- function(bst, X, n_groups) {\n    predt <- predict(bst, X, strict_shape = TRUE)\n    margin <- predict(bst, X, outputmargin = TRUE, strict_shape = TRUE)\n    contri <- predict(bst, X, predcontrib = TRUE, strict_shape = TRUE)\n    interact <- predict(bst, X, predinteraction = TRUE, strict_shape = TRUE)\n    leaf <- predict(bst, X, predleaf = TRUE, strict_shape = TRUE)\n\n    n_rows <- nrow(X)\n    n_cols <- ncol(X)\n\n    expect_equal(dim(predt), c(n_rows, n_groups))\n    expect_equal(dim(margin), c(n_rows, n_groups))\n    expect_equal(dim(contri), c(n_rows, n_groups, n_cols + 1))\n    expect_equal(dim(interact), c(n_rows, n_groups, n_cols + 1, n_cols + 1))\n    expect_equal(dim(leaf), c(n_rows, n_rounds, n_groups, 1))\n\n    if (n_groups != 1) {\n      for (g in seq_len(n_groups)) {\n        expect_lt(max(abs(rowSums(contri[, g, ]) - margin[, g])), 1e-5)\n      }\n\n      leaf_no_strict <- predict(bst, X, strict_shape = FALSE, predleaf = TRUE)\n      for (g in seq_len(n_groups)) {\n        g_mask <- rep(FALSE, n_groups)\n        g_mask[g] <- TRUE\n        expect_equal(\n          leaf[, , g, 1L],\n          leaf_no_strict[, g_mask]\n        )\n      }\n    }\n  }\n\n  test_iris <- function() {\n    y <- as.numeric(iris$Species) - 1\n    X <- as.matrix(iris[, -5])\n\n    bst <- xgb.train(\n      data = xgb.DMatrix(X, label = y, nthread = 1),\n      nrounds = n_rounds,\n      params = xgb.params(\n        max_depth = 2, nthread = n_threads,\n        objective = \"multi:softprob\", num_class = 3\n      )\n    )\n\n    test_strict_shape(bst, X, 3)\n  }\n\n\n  test_agaricus <- function() {\n    data(agaricus.train, package = \"xgboost\")\n    X <- agaricus.train$data\n    y <- agaricus.train$label\n\n    bst <- xgb.train(\n      data = xgb.DMatrix(X, label = y, nthread = 1),\n      nrounds = n_rounds,\n      params = xgb.params(\n        max_depth = 2, nthread = n_threads,\n        objective = \"binary:logistic\"\n      )\n    )\n\n    test_strict_shape(bst, X, 1)\n  }\n\n  test_iris()\n  test_agaricus()\n})\n\ntest_that(\"'predict' accepts CSR data\", {\n  X <- agaricus.train$data\n  y <- agaricus.train$label\n  x_csc <- as(X[1L, , drop = FALSE], \"CsparseMatrix\")\n  x_csr <- as(x_csc, \"RsparseMatrix\")\n  x_spv <- as(x_csc, \"sparseVector\")\n  bst <- xgb.train(\n    data = xgb.DMatrix(X, label = y, nthread = 1),\n    nrounds = 5L, verbose = FALSE,\n    params = xgb.params(\n      objective = \"binary:logistic\",\n      nthread = n_threads\n    )\n  )\n  p_csc <- predict(bst, x_csc)\n  p_csr <- predict(bst, x_csr)\n  p_spv <- predict(bst, x_spv)\n  expect_equal(p_csc, p_csr)\n  expect_equal(p_csc, p_spv)\n})\n\ntest_that(\"Quantile regression accepts multiple quantiles\", {\n  data(mtcars)\n  y <- mtcars[, 1]\n  x <- as.matrix(mtcars[, -1])\n  dm <- xgb.DMatrix(data = x, label = y, nthread = 1)\n  model <- xgb.train(\n    data = dm,\n    params = xgb.params(\n      objective = \"reg:quantileerror\",\n      tree_method = \"exact\",\n      quantile_alpha = c(0.05, 0.5, 0.95),\n      nthread = n_threads\n    ),\n    nrounds = 15\n  )\n  pred <- predict(model, x)\n\n  expect_equal(dim(pred)[1], nrow(x))\n  expect_equal(dim(pred)[2], 3)\n  expect_true(all(pred[, 1] <= pred[, 3]))\n\n  cors <- cor(y, pred)\n  expect_true(cors[2] > cors[1])\n  expect_true(cors[2] > cors[3])\n  expect_true(cors[2] > 0.85)\n})\n\ntest_that(\"Can use multi-output labels with built-in objectives\", {\n  data(\"mtcars\")\n  y <- mtcars$mpg\n  x <- as.matrix(mtcars[, -1])\n  y_mirrored <- cbind(y, -y)\n  dm <- xgb.DMatrix(x, label = y_mirrored, nthread = n_threads)\n  model <- xgb.train(\n    params = xgb.params(\n      tree_method = \"hist\",\n      multi_strategy = \"multi_output_tree\",\n      objective = \"reg:squarederror\",\n      nthread = n_threads\n    ),\n    data = dm,\n    nrounds = 5\n  )\n  pred <- predict(model, x)\n  expect_equal(pred[, 1], -pred[, 2])\n  expect_true(cor(y, pred[, 1]) > 0.9)\n  expect_true(cor(y, pred[, 2]) < -0.9)\n})\n\ntest_that(\"Can use multi-output labels with custom objectives\", {\n  data(\"mtcars\")\n  y <- mtcars$mpg\n  x <- as.matrix(mtcars[, -1])\n  y_mirrored <- cbind(y, -y)\n  dm <- xgb.DMatrix(x, label = y_mirrored, nthread = n_threads)\n  model <- xgb.train(\n    params = xgb.params(\n      tree_method = \"hist\",\n      multi_strategy = \"multi_output_tree\",\n      base_score = 0,\n      objective = function(pred, dtrain) {\n        y <- getinfo(dtrain, \"label\")\n        grad <- pred - y\n        hess <- rep(1, nrow(grad) * ncol(grad))\n        hess <- matrix(hess, nrow = nrow(grad))\n        return(list(grad = grad, hess = hess))\n      },\n      nthread = n_threads\n    ),\n    data = dm,\n    nrounds = 5\n  )\n  pred <- predict(model, x)\n  expect_equal(pred[, 1], -pred[, 2])\n  expect_true(cor(y, pred[, 1]) > 0.9)\n  expect_true(cor(y, pred[, 2]) < -0.9)\n})\n\ntest_that(\"Can use ranking objectives with either 'qid' or 'group'\", {\n  set.seed(123)\n  x <- matrix(rnorm(100 * 10), nrow = 100)\n  y <- sample(2, size = 100, replace = TRUE) - 1\n  qid <- c(rep(1, 20), rep(2, 20), rep(3, 60))\n  gr <- c(20, 20, 60)\n\n  dmat_qid <- xgb.DMatrix(x, label = y, qid = qid, nthread = 1)\n  dmat_gr <- xgb.DMatrix(x, label = y, group = gr, nthread = 1)\n\n  params <- xgb.params(\n    tree_method = \"hist\",\n    lambdarank_num_pair_per_sample = 8,\n    objective = \"rank:ndcg\",\n    lambdarank_pair_method = \"topk\",\n    nthread = n_threads\n  )\n  set.seed(123)\n  model_qid <- xgb.train(params, dmat_qid, nrounds = 5)\n  set.seed(123)\n  model_gr <- xgb.train(params, dmat_gr, nrounds = 5)\n\n  pred_qid <- predict(model_qid, x)\n  pred_gr <- predict(model_gr, x)\n  expect_equal(pred_qid, pred_gr)\n})\n\ntest_that(\"Can predict on data.frame objects\", {\n  data(\"mtcars\")\n  y <- mtcars$mpg\n  x_df <- mtcars[, -1]\n  x_mat <- as.matrix(x_df)\n  dm <- xgb.DMatrix(x_mat, label = y, nthread = n_threads)\n  model <- xgb.train(\n    params = xgb.params(\n      tree_method = \"hist\",\n      objective = \"reg:squarederror\",\n      nthread = n_threads\n    ),\n    data = dm,\n    nrounds = 5\n  )\n\n  pred_mat <- predict(model, xgb.DMatrix(x_mat, nthread = 1))\n  pred_df <- predict(model, x_df)\n  expect_equal(pred_mat, unname(pred_df))\n})\n\ntest_that(\"'base_margin' gives the same result in DMatrix as in inplace_predict\", {\n  data(\"mtcars\")\n  y <- mtcars$mpg\n  x <- as.matrix(mtcars[, -1])\n  dm <- xgb.DMatrix(x, label = y, nthread = n_threads)\n  model <- xgb.train(\n    params = xgb.params(\n      tree_method = \"hist\",\n      objective = \"reg:squarederror\",\n      nthread = n_threads\n    ),\n    data = dm,\n    nrounds = 5\n  )\n\n  set.seed(123)\n  base_margin <- rnorm(nrow(x))\n  dm_w_base <- xgb.DMatrix(data = x, base_margin = base_margin, nthread = 1)\n  pred_from_dm <- predict(model, dm_w_base)\n  pred_from_mat <- predict(model, x, base_margin = base_margin)\n\n  expect_equal(pred_from_dm, unname(pred_from_mat))\n})\n\ntest_that(\"Coefficients from gblinear have the expected shape and names\", {\n  # Single-column coefficients\n  data(mtcars)\n  y <- mtcars$mpg\n  x <- as.matrix(mtcars[, -1])\n  mm <- model.matrix(~., data = mtcars[, -1])\n  dm <- xgb.DMatrix(x, label = y, nthread = 1)\n  model <- xgb.train(\n    data = dm,\n    params = xgb.params(\n      booster = \"gblinear\",\n      nthread = 1\n    ),\n    nrounds = 3\n  )\n  coefs <- coef(model)\n  expect_equal(length(coefs), ncol(x) + 1)\n  expect_equal(names(coefs), c(\"(Intercept)\", colnames(x)))\n  pred_auto <- predict(model, x)\n  pred_manual <- as.numeric(mm %*% coefs)\n  expect_equal(pred_manual, unname(pred_auto), tolerance = 1e-5)\n\n  # Multi-column coefficients\n  data(iris)\n  y <- as.numeric(iris$Species) - 1\n  x <- as.matrix(iris[, -5])\n  dm <- xgb.DMatrix(x, label = y, nthread = 1)\n  mm <- model.matrix(~., data = iris[, -5])\n  model <- xgb.train(\n    data = dm,\n    params = xgb.params(\n      booster = \"gblinear\",\n      objective = \"multi:softprob\",\n      num_class = 3,\n      nthread = 1\n    ),\n    nrounds = 3\n  )\n  coefs <- coef(model)\n  expect_equal(nrow(coefs), ncol(x) + 1)\n  expect_equal(ncol(coefs), 3)\n  expect_equal(row.names(coefs), c(\"(Intercept)\", colnames(x)))\n  pred_auto <- predict(model, x, outputmargin = TRUE)\n  pred_manual <- unname(mm %*% coefs)\n  expect_equal(pred_manual, pred_auto, tolerance = 1e-7)\n\n  # xgboost() with additional metadata\n  model <- xgboost(\n    iris[, -5],\n    iris$Species,\n    booster = \"gblinear\",\n    objective = \"multi:softprob\",\n    nrounds = 3,\n    nthread = 1\n  )\n  coefs <- coef(model)\n  expect_equal(row.names(coefs), c(\"(Intercept)\", colnames(x)))\n  expect_equal(colnames(coefs), levels(iris$Species))\n})\n\ntest_that(\"Deep copies work as expected\", {\n  data(mtcars)\n  y <- mtcars$mpg\n  x <- mtcars[, -1]\n  dm <- xgb.DMatrix(x, label = y, nthread = 1)\n  model <- xgb.train(\n   data = dm,\n   params = xgb.params(nthread = 1),\n   nrounds = 3\n  )\n\n  xgb.attr(model, \"my_attr\") <- 100\n  model_shallow_copy <- model\n  xgb.attr(model_shallow_copy, \"my_attr\") <- 333\n  attr_orig <- xgb.attr(model, \"my_attr\")\n  attr_shallow <- xgb.attr(model_shallow_copy, \"my_attr\")\n  expect_equal(attr_orig, attr_shallow)\n\n  model_deep_copy <- xgb.copy.Booster(model)\n  xgb.attr(model_deep_copy, \"my_attr\") <- 444\n  attr_orig <- xgb.attr(model, \"my_attr\")\n  attr_deep <- xgb.attr(model_deep_copy, \"my_attr\")\n  expect_false(attr_orig == attr_deep)\n})\n\ntest_that(\"Pointer comparison works as expected\", {\n  library(xgboost)\n  y <- mtcars$mpg\n  x <- as.matrix(mtcars[, -1])\n  model <- xgb.train(\n    params = xgb.params(nthread = 1),\n    data = xgb.DMatrix(x, label = y, nthread = 1),\n    nrounds = 3\n  )\n\n  model_shallow_copy <- model\n  expect_true(xgb.is.same.Booster(model, model_shallow_copy))\n\n  model_deep_copy <- xgb.copy.Booster(model)\n  expect_false(xgb.is.same.Booster(model, model_deep_copy))\n\n  xgb.attr(model_shallow_copy, \"my_attr\") <- 111\n  expect_equal(xgb.attr(model, \"my_attr\"), \"111\")\n  expect_null(xgb.attr(model_deep_copy, \"my_attr\"))\n})\n\ntest_that(\"DMatrix field are set to booster when training\", {\n  set.seed(123)\n  y <- rnorm(100)\n  x <- matrix(rnorm(100 * 3), nrow = 100)\n  x[, 2] <- abs(as.integer(x[, 2]))\n\n  dm_unnamed <- xgb.DMatrix(x, label = y, nthread = 1)\n  dm_feature_names <- xgb.DMatrix(x, label = y, feature_names = c(\"a\", \"b\", \"c\"), nthread = 1)\n  dm_feature_types <- xgb.DMatrix(x, label = y, nthread = 1)\n  setinfo(dm_feature_types, \"feature_type\", c(\"q\", \"c\", \"q\"))\n  dm_both <- xgb.DMatrix(x, label = y, feature_names = c(\"a\", \"b\", \"c\"), nthread = 1)\n  setinfo(dm_both, \"feature_type\", c(\"q\", \"c\", \"q\"))\n\n  params <- xgb.params(nthread = 1)\n  model_unnamed <- xgb.train(data = dm_unnamed, params = params, nrounds = 3)\n  model_feature_names <- xgb.train(data = dm_feature_names, params = params, nrounds = 3)\n  model_feature_types <- xgb.train(data = dm_feature_types, params = params, nrounds = 3)\n  model_both <- xgb.train(data = dm_both, params = params, nrounds = 3)\n\n  expect_null(getinfo(model_unnamed, \"feature_name\"))\n  expect_equal(getinfo(model_feature_names, \"feature_name\"), c(\"a\", \"b\", \"c\"))\n  expect_null(getinfo(model_feature_types, \"feature_name\"))\n  expect_equal(getinfo(model_both, \"feature_name\"), c(\"a\", \"b\", \"c\"))\n\n  expect_null(variable.names(model_unnamed))\n  expect_equal(variable.names(model_feature_names), c(\"a\", \"b\", \"c\"))\n  expect_null(variable.names(model_feature_types))\n  expect_equal(variable.names(model_both), c(\"a\", \"b\", \"c\"))\n\n  expect_null(getinfo(model_unnamed, \"feature_type\"))\n  expect_null(getinfo(model_feature_names, \"feature_type\"))\n  expect_equal(getinfo(model_feature_types, \"feature_type\"), c(\"q\", \"c\", \"q\"))\n  expect_equal(getinfo(model_both, \"feature_type\"), c(\"q\", \"c\", \"q\"))\n})\n\ntest_that(\"Seed in params override PRNG from R\", {\n  set.seed(123)\n  model1 <- xgb.train(\n    data = xgb.DMatrix(\n      agaricus.train$data,\n      label = agaricus.train$label, nthread = 1L\n    ),\n    params = xgb.params(\n      objective = \"binary:logistic\",\n      max_depth = 3L,\n      subsample = 0.1,\n      colsample_bytree = 0.1,\n      seed = 111L\n    ),\n    nrounds = 3L\n  )\n\n  set.seed(456)\n  model2 <- xgb.train(\n    data = xgb.DMatrix(\n      agaricus.train$data,\n      label = agaricus.train$label, nthread = 1L\n    ),\n    params = xgb.params(\n      objective = \"binary:logistic\",\n      max_depth = 3L,\n      subsample = 0.1,\n      colsample_bytree = 0.1,\n      seed = 111L\n    ),\n    nrounds = 3L\n  )\n\n  expect_equal(\n    xgb.save.raw(model1, raw_format = \"json\"),\n    xgb.save.raw(model2, raw_format = \"json\")\n  )\n\n  set.seed(123)\n  model3 <- xgb.train(\n    data = xgb.DMatrix(\n      agaricus.train$data,\n      label = agaricus.train$label, nthread = 1L\n    ),\n    params = xgb.params(\n      objective = \"binary:logistic\",\n      max_depth = 3L,\n      subsample = 0.1,\n      colsample_bytree = 0.1,\n      seed = 222L\n    ),\n    nrounds = 3L\n  )\n  expect_false(\n    isTRUE(\n      all.equal(\n        xgb.save.raw(model1, raw_format = \"json\"),\n        xgb.save.raw(model3, raw_format = \"json\")\n      )\n    )\n  )\n})\n\ntest_that(\"xgb.cv works for AFT\", {\n  X <- matrix(c(1, -1, -1, 1, 0, 1, 1, 0), nrow = 4, byrow = TRUE)  # 4x2 matrix\n  dtrain <- xgb.DMatrix(X, nthread = n_threads)\n\n  params <- xgb.params(objective = 'survival:aft', learning_rate = 0.2, max_depth = 2L, nthread = n_threads)\n\n  # data must have bounds\n  expect_error(\n    xgb.cv(\n      params = params,\n      data = dtrain,\n      nround = 5L,\n      nfold = 4L\n    )\n  )\n\n  setinfo(dtrain, 'label_lower_bound', c(2, 3, 0, 4))\n  setinfo(dtrain, 'label_upper_bound', c(2, Inf, 4, 5))\n\n  # automatic stratified splitting is turned off\n  expect_warning(\n    xgb.cv(\n      params = params, data = dtrain, nround = 5L, nfold = 4L,\n      stratified = TRUE, verbose = FALSE\n    )\n  )\n\n  # this works without any issue\n  expect_no_warning(\n    xgb.cv(params = params, data = dtrain, nround = 5L, nfold = 4L, verbose = FALSE)\n  )\n})\n\ntest_that(\"xgb.cv works for ranking\", {\n  data(iris)\n  x <- iris[, -(4:5)]\n  y <- as.integer(iris$Petal.Width)\n  group <- rep(50, 3)\n  dm <- xgb.DMatrix(x, label = y, group = group, nthread = 1)\n  res <- xgb.cv(\n    data = dm,\n    params = xgb.params(\n      objective = \"rank:pairwise\",\n      max_depth = 3,\n      nthread = 1L\n    ),\n    nrounds = 3,\n    nfold = 2,\n    verbose = FALSE,\n    stratified = FALSE\n  )\n  expect_equal(length(res$folds), 2L)\n})\n\ntest_that(\"Row names are preserved in outputs\", {\n  data(iris)\n  x <- iris[, -5]\n  y <- as.numeric(iris$Species) - 1\n  dm <- xgb.DMatrix(x, label = y, nthread = 1)\n  model <- xgb.train(\n    data = dm,\n    params = xgb.params(\n      objective = \"multi:softprob\",\n      num_class = 3,\n      max_depth = 2,\n      nthread = 1\n    ),\n    nrounds = 3\n  )\n  row.names(x) <- paste0(\"r\", seq(1, nrow(x)))\n  pred <- predict(model, x)\n  expect_equal(row.names(pred), row.names(x))\n  pred <- predict(model, x, avoid_transpose = TRUE)\n  expect_equal(colnames(pred), row.names(x))\n\n  data(mtcars)\n  y <- mtcars[, 1]\n  x <- as.matrix(mtcars[, -1])\n  dm <- xgb.DMatrix(data = x, label = y, nthread = 1)\n  model <- xgb.train(\n    data = dm,\n    params = xgb.params(\n      max_depth = 2,\n      nthread = 1\n    ),\n    nrounds = 3\n  )\n  row.names(x) <- paste0(\"r\", seq(1, nrow(x)))\n  pred <- predict(model, x)\n  expect_equal(names(pred), row.names(x))\n  pred <- predict(model, x, avoid_transpose = TRUE)\n  expect_equal(names(pred), row.names(x))\n  pred <- predict(model, x, predleaf = TRUE)\n  expect_equal(row.names(pred), row.names(x))\n  pred <- predict(model, x, predleaf = TRUE, avoid_transpose = TRUE)\n  expect_equal(colnames(pred), row.names(x))\n})\n\ntest_that(\"xgb.train works with nrounds=0 (serialization, continuation, callbacks)\", {\n  # Reuse global data variable 'train' defined at the top of test_basic.R\n  dtrain <- xgb.DMatrix(train$data, label = train$label)\n  watchlist <- list(train = dtrain)\n\n  # --- Case 1: Basic check & Serialization symmetry ---\n  bst_0 <- xgb.train(\n    params = list(objective = \"binary:logistic\", nthread = 1),\n    data = dtrain,\n    nrounds = 0,\n    verbose = 0\n  )\n\n  # Check niter is 0 (handling NULL case common for empty boosters)\n  iter_0 <- bst_0$niter\n  if (is.null(iter_0)) {\n    iter_0 <- 0\n  }\n  expect_equal(iter_0, 0)\n\n  # Check that 0-round model provides a valid \"base score\" prediction\n  preds_0 <- predict(bst_0, dtrain)\n  expect_true(all((preds_0 >= 0) & (preds_0 <= 1)))\n\n  # Serialize via RAM (Raw) instead of disk (tempfile) for cleaner tests\n  raw <- xgb.save.raw(bst_0)\n  bst_loaded <- xgb.load.raw(raw)\n\n  # Verify predictions match before/after serialization\n  preds_loaded <- predict(bst_loaded, dtrain)\n  expect_equal(preds_0, preds_loaded, tolerance = 1e-6)\n\n  # --- Case 2: Training Continuation Numeric Consistency ---\n  # Initialize empty model with fixed seed & single thread\n  bst_init <- xgb.train(\n    params = list(objective = \"binary:logistic\", seed = 123, nthread = 1),\n    data = dtrain,\n    nrounds = 0,\n    verbose = 0\n  )\n\n  # Continue training for 10 rounds from empty booster\n  bst_cont <- xgb.train(\n    params = list(objective = \"binary:logistic\", seed = 123, nthread = 1),\n    data = dtrain,\n    nrounds = 10,\n    xgb_model = bst_init,\n    verbose = 0\n  )\n\n  # Reference training from scratch\n  bst_ref <- xgb.train(\n    params = list(objective = \"binary:logistic\", seed = 123, nthread = 1),\n    data = dtrain,\n    nrounds = 10,\n    verbose = 0\n  )\n\n  # Predictions must be numerically identical within 1e-6\n  p_cont <- predict(bst_cont, dtrain)\n  p_ref <- predict(bst_ref, dtrain)\n  expect_equal(p_cont, p_ref, tolerance = 1e-6)\n\n  # --- Case 3: Callback Robustness ---\n  # Verify early stopping and evals work with nrounds=0\n  bst_cb <- xgb.train(\n    params = list(objective = \"binary:logistic\", seed = 456, nthread = 1),\n    data = dtrain,\n    nrounds = 0,\n    evals = watchlist,\n    early_stopping_rounds = 3,\n    verbose = 0\n  )\n\n  # Verify that continuation works\n  bst_cb_cont <- xgb.train(\n    params = list(objective = \"binary:logistic\", seed = 456, nthread = 1),\n    data = dtrain,\n    nrounds = 5,\n    evals = watchlist,\n    early_stopping_rounds = 3,\n    xgb_model = bst_cb,\n    verbose = 0\n  )\n\n  # Handle NULL niter for continued model with early stopping\n  iter_cb <- bst_cb_cont$niter\n  if (is.null(iter_cb)) {\n    # Verify the continued model works and learned signal\n    preds_cb <- predict(bst_cb_cont, dtrain)\n    expect_true(stats::sd(preds_cb) > 0)\n    expect_equal(length(preds_cb), nrow(train$data))\n  } else {\n    expect_equal(iter_cb, 5)\n  }\n})\n"
  },
  {
    "path": "R-package/tests/testthat/test_booster_slicing.R",
    "content": "context(\"testing xgb.Booster slicing\")\n\ndata(agaricus.train, package = \"xgboost\")\ndm <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label, nthread = 1)\n# Note: here need large step sizes in order for the predictions\n# to have substantially different leaf assignments on each tree\nmodel <- xgb.train(\n  params = xgb.params(objective = \"binary:logistic\", nthread = 1, max_depth = 4),\n  data = dm,\n  nrounds = 20\n)\npred <- predict(model, dm, predleaf = TRUE)\n\ntest_that(\"Slicing full model\", {\n  new_model <- xgb.slice.Booster(model, 1, 0)\n  expect_equal(xgb.save.raw(new_model), xgb.save.raw(model))\n\n  new_model <- model[]\n  expect_equal(xgb.save.raw(new_model), xgb.save.raw(model))\n\n  new_model <- model[1:length(model)] # nolint\n  expect_equal(xgb.save.raw(new_model), xgb.save.raw(model))\n})\n\ntest_that(\"Slicing sequence from start\", {\n  new_model <- xgb.slice.Booster(model, 1, 10)\n  new_pred <- predict(new_model, dm, predleaf = TRUE)\n  expect_equal(new_pred, pred[, seq(1, 10)])\n\n  new_model <- model[1:10]\n  new_pred <- predict(new_model, dm, predleaf = TRUE)\n  expect_equal(new_pred, pred[, seq(1, 10)])\n})\n\ntest_that(\"Slicing sequence from middle\", {\n  new_model <- xgb.slice.Booster(model, 5, 10)\n  new_pred <- predict(new_model, dm, predleaf = TRUE)\n  expect_equal(new_pred, pred[, seq(5, 10)])\n\n  new_model <- model[5:10]\n  new_pred <- predict(new_model, dm, predleaf = TRUE)\n  expect_equal(new_pred, pred[, seq(5, 10)])\n})\n\ntest_that(\"Slicing with non-unit step\", {\n  for (s in 2:5) {\n    new_model <- xgb.slice.Booster(model, 1, 17, s)\n    new_pred <- predict(new_model, dm, predleaf = TRUE)\n    expect_equal(new_pred, pred[, seq(1, 17, s)])\n\n    new_model <- model[seq(1, 17, s)]\n    new_pred <- predict(new_model, dm, predleaf = TRUE)\n    expect_equal(new_pred, pred[, seq(1, 17, s)])\n  }\n})\n\ntest_that(\"Slicing with non-unit step from middle\", {\n  for (s in 2:5) {\n    new_model <- xgb.slice.Booster(model, 4, 17, s)\n    new_pred <- predict(new_model, dm, predleaf = TRUE)\n    expect_equal(new_pred, pred[, seq(4, 17, s)])\n\n    new_model <- model[seq(4, 17, s)]\n    new_pred <- predict(new_model, dm, predleaf = TRUE)\n    expect_equal(new_pred, pred[, seq(4, 17, s)])\n  }\n})\n"
  },
  {
    "path": "R-package/tests/testthat/test_callbacks.R",
    "content": "# More specific testing of callbacks\ncontext(\"callbacks\")\n\ndata(agaricus.train, package = 'xgboost')\ndata(agaricus.test, package = 'xgboost')\ntrain <- agaricus.train\ntest <- agaricus.test\n\nn_threads <- 2\n\n# add some label noise for early stopping tests\nadd.noise <- function(label, frac) {\n  inoise <- sample(length(label), length(label) * frac)\n  label[inoise] <- !label[inoise]\n  label\n}\nset.seed(11)\nltrain <- add.noise(train$label, 0.2)\nltest <- add.noise(test$label, 0.2)\ndtrain <- xgb.DMatrix(train$data, label = ltrain, nthread = n_threads)\ndtest <- xgb.DMatrix(test$data, label = ltest, nthread = n_threads)\nevals <- list(train = dtrain, test = dtest)\n\n\nerr <- function(label, pr) sum((pr > 0.5) != label) / length(label)\n\nparams <- xgb.params(\n  objective = \"binary:logistic\", eval_metric = \"error\",\n  max_depth = 2, nthread = n_threads\n)\n\n\ntest_that(\"xgb.cb.print.evaluation works as expected for xgb.train\", {\n  logs1 <- capture.output({\n    model <- xgb.train(\n      data = dtrain,\n      params = xgb.params(\n        objective = \"binary:logistic\",\n        eval_metric = \"auc\",\n        max_depth = 2,\n        nthread = n_threads\n      ),\n      nrounds = 10,\n      evals = list(train = dtrain, test = dtest),\n      callbacks = list(xgb.cb.print.evaluation(period = 1))\n    )\n  })\n  expect_equal(length(logs1), 10)\n  expect_true(all(grepl(\"^\\\\[\\\\d{1,2}\\\\]\\ttrain-auc:0\\\\.\\\\d+\\ttest-auc:0\\\\.\\\\d+\\\\s*$\", logs1)))\n  lapply(seq(1, 10), function(x) expect_true(grepl(paste0(\"^\\\\[\", x), logs1[x])))\n\n  logs2 <- capture.output({\n    model <- xgb.train(\n      data = dtrain,\n      params = xgb.params(\n        objective = \"binary:logistic\",\n        eval_metric = \"auc\",\n        max_depth = 2,\n        nthread = n_threads\n      ),\n      nrounds = 10,\n      evals = list(train = dtrain, test = dtest),\n      callbacks = list(xgb.cb.print.evaluation(period = 2))\n    )\n  })\n  expect_equal(length(logs2), 6)\n  expect_true(all(grepl(\"^\\\\[\\\\d{1,2}\\\\]\\ttrain-auc:0\\\\.\\\\d+\\ttest-auc:0\\\\.\\\\d+\\\\s*$\", logs2)))\n  seq_matches <- c(seq(1, 10, 2), 10)\n  lapply(seq_along(seq_matches), function(x) expect_true(grepl(paste0(\"^\\\\[\", seq_matches[x]), logs2[x])))\n})\n\ntest_that(\"xgb.cb.print.evaluation works as expected for xgb.cv\", {\n  logs1 <- capture.output({\n    model <- xgb.cv(\n      data = dtrain,\n      params = xgb.params(\n        objective = \"binary:logistic\",\n        eval_metric = \"auc\",\n        max_depth = 2,\n        nthread = n_threads\n      ),\n      nrounds = 10,\n      nfold = 3,\n      callbacks = list(xgb.cb.print.evaluation(period = 1, showsd = TRUE))\n    )\n  })\n  expect_equal(length(logs1), 10)\n  expect_true(all(grepl(\"^\\\\[\\\\d{1,2}\\\\]\\ttrain-auc:0\\\\.\\\\d+±0\\\\.\\\\d+\\ttest-auc:0\\\\.\\\\d+±0\\\\.\\\\d+\\\\s*$\", logs1)))\n  lapply(seq(1, 10), function(x) expect_true(grepl(paste0(\"^\\\\[\", x), logs1[x])))\n\n  logs2 <- capture.output({\n    model <- xgb.cv(\n      data = dtrain,\n      params = xgb.params(\n        objective = \"binary:logistic\",\n        eval_metric = \"auc\",\n        max_depth = 2,\n        nthread = n_threads\n      ),\n      nrounds = 10,\n      nfold = 3,\n      callbacks = list(xgb.cb.print.evaluation(period = 2, showsd = TRUE))\n    )\n  })\n  expect_equal(length(logs2), 6)\n  expect_true(all(grepl(\"^\\\\[\\\\d{1,2}\\\\]\\ttrain-auc:0\\\\.\\\\d+±0\\\\.\\\\d+\\ttest-auc:0\\\\.\\\\d+±0\\\\.\\\\d+\\\\s*$\", logs2)))\n  seq_matches <- c(seq(1, 10, 2), 10)\n  lapply(seq_along(seq_matches), function(x) expect_true(grepl(paste0(\"^\\\\[\", seq_matches[x]), logs2[x])))\n})\n\ntest_that(\"xgb.cb.evaluation.log works as expected for xgb.train\", {\n  model <- xgb.train(\n    data = dtrain,\n    params = xgb.params(\n      objective = \"binary:logistic\",\n      eval_metric = \"auc\",\n      max_depth = 2,\n      nthread = n_threads\n    ),\n    nrounds = 10,\n    verbose = FALSE,\n    evals = list(train = dtrain, test = dtest),\n    callbacks = list(xgb.cb.evaluation.log())\n  )\n  logs <- attributes(model)$evaluation_log\n\n  expect_equal(nrow(logs), 10)\n  expect_equal(colnames(logs), c(\"iter\", \"train_auc\", \"test_auc\"))\n})\n\ntest_that(\"xgb.cb.evaluation.log works as expected for xgb.cv\", {\n  model <- xgb.cv(\n    data = dtrain,\n    params = xgb.params(\n      objective = \"binary:logistic\",\n      eval_metric = \"auc\",\n      max_depth = 2,\n      nthread = n_threads\n    ),\n    nrounds = 10,\n    verbose = FALSE,\n    nfold = 3,\n    callbacks = list(xgb.cb.evaluation.log())\n  )\n  logs <- model$evaluation_log\n\n  expect_equal(nrow(logs), 10)\n  expect_equal(\n    colnames(logs),\n    c(\"iter\", \"train_auc_mean\", \"train_auc_std\", \"test_auc_mean\", \"test_auc_std\")\n  )\n})\n\n\nparams <- xgb.params(\n  objective = \"binary:logistic\", eval_metric = \"error\",\n  max_depth = 4, nthread = n_threads\n)\n\ntest_that(\"can store evaluation_log without printing\", {\n  expect_silent(\n    bst <- xgb.train(params, dtrain, nrounds = 10, evals = evals, verbose = 0)\n  )\n  expect_false(is.null(attributes(bst)$evaluation_log))\n  expect_false(is.null(attributes(bst)$evaluation_log$train_error))\n  expect_lt(attributes(bst)$evaluation_log[, min(train_error)], 0.2)\n})\n\ntest_that(\"xgb.cb.reset.parameters works as expected\", {\n\n  # fixed learning_rate\n  params <- c(params, list(learning_rate = 0.9))\n  set.seed(111)\n  bst0 <- xgb.train(params, dtrain, nrounds = 2, evals = evals, verbose = 0)\n  expect_false(is.null(attributes(bst0)$evaluation_log))\n  expect_false(is.null(attributes(bst0)$evaluation_log$train_error))\n\n  # same learning_rate but re-set as a vector parameter in the callback\n  set.seed(111)\n  my_par <- list(learning_rate = c(0.9, 0.9))\n  bst1 <- xgb.train(params, dtrain, nrounds = 2, evals = evals, verbose = 0,\n                    callbacks = list(xgb.cb.reset.parameters(my_par)))\n  expect_false(is.null(attributes(bst1)$evaluation_log$train_error))\n  expect_equal(attributes(bst0)$evaluation_log$train_error,\n               attributes(bst1)$evaluation_log$train_error)\n\n  # same learning_rate but re-set via a function in the callback\n  set.seed(111)\n  my_par <- list(learning_rate = function(itr, itr_end) 0.9)\n  bst2 <- xgb.train(params, dtrain, nrounds = 2, evals = evals, verbose = 0,\n                    callbacks = list(xgb.cb.reset.parameters(my_par)))\n  expect_false(is.null(attributes(bst2)$evaluation_log$train_error))\n  expect_equal(attributes(bst0)$evaluation_log$train_error,\n               attributes(bst2)$evaluation_log$train_error)\n\n  # different learning_rate re-set as a vector parameter in the callback\n  set.seed(111)\n  my_par <- list(learning_rate = c(0.6, 0.5))\n  bst3 <- xgb.train(params, dtrain, nrounds = 2, evals = evals, verbose = 0,\n                    callbacks = list(xgb.cb.reset.parameters(my_par)))\n  expect_false(is.null(attributes(bst3)$evaluation_log$train_error))\n  expect_false(all(attributes(bst0)$evaluation_log$train_error == attributes(bst3)$evaluation_log$train_error))\n\n  # resetting multiple parameters at the same time runs with no error\n  my_par <- list(learning_rate = c(1., 0.5), min_split_loss = c(1, 2), max_depth = c(4, 8))\n  expect_error(\n    bst4 <- xgb.train(params, dtrain, nrounds = 2, evals = evals, verbose = 0,\n                      callbacks = list(xgb.cb.reset.parameters(my_par)))\n  , NA) # NA = no error\n  # CV works as well\n  expect_error(\n    bst4 <- xgb.cv(params, dtrain, nfold = 2, nrounds = 2, verbose = 0,\n                   callbacks = list(xgb.cb.reset.parameters(my_par)))\n  , NA) # NA = no error\n\n  # expect no learning with 0 learning rate\n  my_par <- list(learning_rate = c(0., 0.))\n  bstX <- xgb.train(params, dtrain, nrounds = 2, evals = evals, verbose = 0,\n                    callbacks = list(xgb.cb.reset.parameters(my_par)))\n  expect_false(is.null(attributes(bstX)$evaluation_log$train_error))\n  er <- unique(attributes(bstX)$evaluation_log$train_error)\n  expect_length(er, 1)\n  expect_gt(er, 0.4)\n})\n\ntest_that(\"xgb.cb.save.model works as expected\", {\n  files <- c('xgboost_01.json', 'xgboost_02.json', 'xgboost.json')\n  files <- unname(sapply(files, function(f) file.path(tempdir(), f)))\n  for (f in files) if (file.exists(f)) file.remove(f)\n\n  bst <- xgb.train(params, dtrain, nrounds = 2, evals = evals, verbose = 0,\n                   save_period = 1, save_name = file.path(tempdir(), \"xgboost_%02d.json\"))\n  expect_true(file.exists(files[1]))\n  expect_true(file.exists(files[2]))\n  b1 <- xgb.load(files[1])\n  xgb.model.parameters(b1) <- list(nthread = 2)\n  expect_equal(xgb.get.num.boosted.rounds(b1), 1)\n  b2 <- xgb.load(files[2])\n  xgb.model.parameters(b2) <- list(nthread = 2)\n  expect_equal(xgb.get.num.boosted.rounds(b2), 2)\n\n  xgb.config(b2) <- xgb.config(bst)\n  expect_equal(xgb.config(bst), xgb.config(b2))\n  expect_equal(xgb.save.raw(bst), xgb.save.raw(b2))\n\n  # save_period = 0 saves the last iteration's model\n  bst <- xgb.train(params, dtrain, nrounds = 2, evals = evals, verbose = 0,\n                   save_period = 0, save_name = file.path(tempdir(), 'xgboost.json'))\n  expect_true(file.exists(files[3]))\n  b2 <- xgb.load(files[3])\n  xgb.config(b2) <- xgb.config(bst)\n  expect_equal(xgb.save.raw(bst), xgb.save.raw(b2))\n\n  for (f in files) if (file.exists(f)) file.remove(f)\n})\n\ntest_that(\"early stopping xgb.train works\", {\n  params <- c(params, list(learning_rate = 0.3))\n  set.seed(11)\n  expect_output(\n    bst <- xgb.train(params, dtrain, nrounds = 20, evals = evals,\n                     early_stopping_rounds = 3, maximize = FALSE)\n  , \"Stopping. Best iteration\")\n  expect_false(is.null(xgb.attr(bst, \"best_iteration\")))\n  expect_lt(xgb.attr(bst, \"best_iteration\"), 19)\n\n  pred <- predict(bst, dtest)\n  expect_equal(length(pred), 1611)\n  err_pred <- err(ltest, pred)\n  err_log <- attributes(bst)$evaluation_log[xgb.attr(bst, \"best_iteration\") + 1, test_error]\n  expect_equal(err_log, err_pred, tolerance = 5e-6)\n\n  set.seed(11)\n  expect_silent(\n    bst0 <- xgb.train(params, dtrain, nrounds = 20, evals = evals,\n                      early_stopping_rounds = 3, maximize = FALSE, verbose = 0)\n  )\n  expect_equal(attributes(bst)$evaluation_log, attributes(bst0)$evaluation_log)\n\n  fname <- file.path(tempdir(), \"model.ubj\")\n  xgb.save(bst, fname)\n  loaded <- xgb.load(fname)\n\n  expect_false(is.null(xgb.attr(loaded, \"best_iteration\")))\n  expect_equal(xgb.attr(loaded, \"best_iteration\"), xgb.attr(bst, \"best_iteration\"))\n})\n\ntest_that(\"early stopping using a specific metric works\", {\n  set.seed(11)\n  expect_output(\n    bst <- xgb.train(\n      c(\n        within(params, rm(\"eval_metric\")),\n        list(\n          learning_rate = 0.6,\n          eval_metric = \"logloss\",\n          eval_metric = \"auc\"\n        )\n      ),\n      dtrain,\n      nrounds = 20,\n      evals = evals,\n      callbacks = list(\n        xgb.cb.early.stop(stopping_rounds = 3, maximize = FALSE, metric_name = 'test_logloss')\n      )\n    )\n  , \"Stopping. Best iteration\")\n  expect_false(is.null(xgb.attr(bst, \"best_iteration\")))\n  expect_lt(xgb.attr(bst, \"best_iteration\"), 19)\n\n  pred <- predict(bst, dtest, iterationrange = c(1, xgb.attr(bst, \"best_iteration\") + 1))\n  expect_equal(length(pred), 1611)\n  logloss_pred <- sum(-ltest * log(pred) - (1 - ltest) * log(1 - pred)) / length(ltest)\n  logloss_log <- attributes(bst)$evaluation_log[xgb.attr(bst, \"best_iteration\") + 1, test_logloss]\n  expect_equal(logloss_log, logloss_pred, tolerance = 1e-5)\n})\n\ntest_that(\"early stopping works with titanic\", {\n  if (!requireNamespace(\"titanic\")) {\n    testthat::skip(\"Optional testing dependency 'titanic' not found.\")\n  }\n  # This test was inspired by https://github.com/dmlc/xgboost/issues/5935\n  # It catches possible issues on noLD R\n  titanic <- titanic::titanic_train\n  titanic$Pclass <-  as.factor(titanic$Pclass)\n  dtx <- model.matrix(~ 0 + ., data = titanic[, c(\"Pclass\", \"Sex\")])\n  dty <- titanic$Survived\n\n  xgb.train(\n    data = xgb.DMatrix(dtx, label = dty, nthread = 1),\n    params = xgb.params(\n      objective = \"binary:logistic\",\n      eval_metric = \"auc\",\n      nthread = n_threads\n    ),\n    nrounds = 100,\n    early_stopping_rounds = 3,\n    verbose = 0,\n    evals = list(train = xgb.DMatrix(dtx, label = dty, nthread = 1))\n  )\n\n  expect_true(TRUE)  # should not crash\n})\n\ntest_that(\"early stopping xgb.cv works\", {\n  set.seed(11)\n  expect_output(\n    {\n      cv <- xgb.cv(\n        c(params, list(learning_rate = 0.3)),\n        dtrain,\n        nfold = 5,\n        nrounds = 20,\n        early_stopping_rounds = 3,\n        maximize = FALSE\n      )\n    },\n    \"Stopping. Best iteration\"\n  )\n  expect_false(is.null(cv$early_stop$best_iteration))\n  expect_lt(cv$early_stop$best_iteration, 19)\n  # the best error is min error:\n  expect_true(cv$evaluation_log[, test_error_mean[cv$early_stop$best_iteration] == min(test_error_mean)])\n})\n\ntest_that(\"prediction in xgb.cv works\", {\n  params <- c(params, list(learning_rate = 0.5))\n  set.seed(11)\n  nrounds <- 4\n  cv <- xgb.cv(params, dtrain, nfold = 5, nrounds = nrounds, prediction = TRUE, verbose = 0)\n  expect_false(is.null(cv$evaluation_log))\n  expect_false(is.null(cv$cv_predict$pred))\n  expect_length(cv$cv_predict$pred, nrow(train$data))\n  err_pred <- mean(sapply(cv$folds, function(f) mean(err(ltrain[f], cv$cv_predict$pred[f]))))\n  err_log <- cv$evaluation_log[nrounds, test_error_mean]\n  expect_equal(err_pred, err_log, tolerance = 1e-6)\n\n  # save CV models\n  set.seed(11)\n  cvx <- xgb.cv(params, dtrain, nfold = 5, nrounds = nrounds, prediction = TRUE, verbose = 0,\n                callbacks = list(xgb.cb.cv.predict(save_models = TRUE)))\n  expect_equal(cv$evaluation_log, cvx$evaluation_log)\n  expect_length(cvx$cv_predict$models, 5)\n  expect_true(all(sapply(cvx$cv_predict$models, class) == 'xgb.Booster'))\n})\n\ntest_that(\"prediction in xgb.cv works for gblinear too\", {\n  set.seed(11)\n  p <- xgb.params(\n    booster = 'gblinear',\n    objective = \"reg:logistic\",\n    learning_rate = 0.5,\n    nthread = n_threads\n  )\n  cv <- xgb.cv(p, dtrain, nfold = 5, nrounds = 2, prediction = TRUE, verbose = 0)\n  expect_false(is.null(cv$evaluation_log))\n  expect_false(is.null(cv$cv_predict$pred))\n  expect_length(cv$cv_predict$pred, nrow(train$data))\n})\n\ntest_that(\"prediction in early-stopping xgb.cv works\", {\n  params <- c(params, list(learning_rate = 0.1, base_score = 0.5))\n  set.seed(11)\n  expect_output(\n    cv <- xgb.cv(params, dtrain, nfold = 5, nrounds = 20,\n                 early_stopping_rounds = 5, maximize = FALSE, stratified = FALSE,\n                 prediction = TRUE, verbose = TRUE)\n  , \"Stopping. Best iteration\")\n\n  expect_false(is.null(cv$early_stop$best_iteration))\n  expect_lt(cv$early_stop$best_iteration, 19)\n  expect_false(is.null(cv$evaluation_log))\n  expect_false(is.null(cv$cv_predict$pred))\n  expect_length(cv$cv_predict$pred, nrow(train$data))\n\n  err_pred <- mean(sapply(cv$folds, function(f) mean(err(ltrain[f], cv$cv_predict$pred[f]))))\n  err_log <- cv$evaluation_log[cv$early_stop$best_iteration, test_error_mean]\n  expect_equal(err_pred, err_log, tolerance = 1e-6)\n  err_log_last <- cv$evaluation_log[cv$niter, test_error_mean]\n  expect_gt(abs(err_pred - err_log_last), 1e-4)\n})\n\ntest_that(\"prediction in xgb.cv for softprob works\", {\n  lb <- as.numeric(iris$Species) - 1\n  set.seed(11)\n  expect_warning(\n    {\n      cv <- xgb.cv(\n        data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb, nthread = 1),\n        nfold = 4,\n        nrounds = 5,\n        params = xgb.params(\n          objective = \"multi:softprob\",\n          num_class = 3,\n          learning_rate = 0.5,\n          max_depth = 3,\n          nthread = n_threads,\n          subsample = 0.8,\n          min_split_loss = 2\n        ),\n        verbose = 0,\n        prediction = TRUE\n      )\n    },\n    NA\n  )\n  expect_false(is.null(cv$cv_predict$pred))\n  expect_equal(dim(cv$cv_predict$pred), c(nrow(iris), 3))\n  expect_lt(diff(range(rowSums(cv$cv_predict$pred))), 1e-6)\n})\n\ntest_that(\"prediction in xgb.cv works for multi-quantile\", {\n  data(mtcars)\n  y <- mtcars$mpg\n  x <- as.matrix(mtcars[, -1])\n  dm <- xgb.DMatrix(x, label = y, nthread = 1)\n  cv <- xgb.cv(\n    data = dm,\n    params = xgb.params(\n      objective = \"reg:quantileerror\",\n      quantile_alpha = c(0.1, 0.2, 0.5, 0.8, 0.9),\n      nthread = 1\n    ),\n    nrounds = 5,\n    nfold = 3,\n    prediction = TRUE,\n    verbose = 0\n  )\n  expect_equal(dim(cv$cv_predict$pred), c(nrow(x), 5))\n})\n\ntest_that(\"prediction in xgb.cv works for multi-output\", {\n  data(mtcars)\n  y <- mtcars$mpg\n  x <- as.matrix(mtcars[, -1])\n  dm <- xgb.DMatrix(x, label = cbind(y, -y), nthread = 1)\n  cv <- xgb.cv(\n    data = dm,\n    params = xgb.params(\n      tree_method = \"hist\",\n      multi_strategy = \"multi_output_tree\",\n      objective = \"reg:squarederror\",\n      nthread = n_threads\n    ),\n    nrounds = 5,\n    nfold = 3,\n    prediction = TRUE,\n    verbose = 0\n  )\n  expect_equal(dim(cv$cv_predict$pred), c(nrow(x), 2))\n})\n\ntest_that(\"prediction in xgb.cv works for multi-quantile\", {\n  data(mtcars)\n  y <- mtcars$mpg\n  x <- as.matrix(mtcars[, -1])\n  dm <- xgb.DMatrix(x, label = y, nthread = 1)\n  cv <- xgb.cv(\n    data = dm,\n    params = xgb.params(\n      objective = \"reg:quantileerror\",\n      quantile_alpha = c(0.1, 0.2, 0.5, 0.8, 0.9),\n      nthread = 1\n    ),\n    nrounds = 5,\n    nfold = 3,\n    prediction = TRUE,\n    verbose = 0\n  )\n  expect_equal(dim(cv$cv_predict$pred), c(nrow(x), 5))\n})\n\ntest_that(\"prediction in xgb.cv works for multi-output\", {\n  data(mtcars)\n  y <- mtcars$mpg\n  x <- as.matrix(mtcars[, -1])\n  dm <- xgb.DMatrix(x, label = cbind(y, -y), nthread = 1)\n  cv <- xgb.cv(\n    data = dm,\n    params = xgb.params(\n      tree_method = \"hist\",\n      multi_strategy = \"multi_output_tree\",\n      objective = \"reg:squarederror\",\n      nthread = n_threads\n    ),\n    nrounds = 5,\n    nfold = 3,\n    prediction = TRUE,\n    verbose = 0\n  )\n  expect_equal(dim(cv$cv_predict$pred), c(nrow(x), 2))\n})\n"
  },
  {
    "path": "R-package/tests/testthat/test_config.R",
    "content": "context('Test global configuration')\n\ntest_that('Global configuration works with verbosity', {\n  old_verbosity <- xgb.get.config()$verbosity\n  for (v in c(0, 1, 2, 3)) {\n    xgb.set.config(verbosity = v)\n    expect_equal(xgb.get.config()$verbosity, v)\n  }\n  xgb.set.config(verbosity = old_verbosity)\n  expect_equal(xgb.get.config()$verbosity, old_verbosity)\n})\n\ntest_that('Global configuration works with use_rmm flag', {\n  old_use_rmm_flag <- xgb.get.config()$use_rmm\n  for (v in c(TRUE, FALSE)) {\n    xgb.set.config(use_rmm = v)\n    expect_equal(xgb.get.config()$use_rmm, v)\n  }\n  xgb.set.config(use_rmm = old_use_rmm_flag)\n  expect_equal(xgb.get.config()$use_rmm, old_use_rmm_flag)\n})\n"
  },
  {
    "path": "R-package/tests/testthat/test_custom_objective.R",
    "content": "context('Test models with custom objective')\n\nset.seed(1994)\n\nn_threads <- 2\n\ndata(agaricus.train, package = 'xgboost')\ndata(agaricus.test, package = 'xgboost')\ndtrain <- xgb.DMatrix(\n  agaricus.train$data, label = agaricus.train$label, nthread = n_threads\n)\ndtest <- xgb.DMatrix(\n  agaricus.test$data, label = agaricus.test$label, nthread = n_threads\n)\nevals <- list(eval = dtest, train = dtrain)\n\nlogregobj <- function(preds, dtrain) {\n  labels <- getinfo(dtrain, \"label\")\n  preds <- 1 / (1 + exp(-preds))\n  grad <- preds - labels\n  hess <- preds * (1 - preds)\n  return(list(grad = grad, hess = hess))\n}\n\nevalerror <- function(preds, dtrain) {\n  labels <- getinfo(dtrain, \"label\")\n  err <- as.numeric(sum(labels != (preds > 0.5))) / length(labels)\n  return(list(metric = \"error\", value = err))\n}\n\nparam <- list(max_depth = 2, learning_rate = 1, nthread = n_threads,\n              objective = logregobj, eval_metric = evalerror)\nnum_round <- 2\n\ntest_that(\"custom objective works\", {\n  bst <- xgb.train(param, dtrain, num_round, evals, verbose = 0)\n  expect_equal(class(bst), \"xgb.Booster\")\n  expect_false(is.null(attributes(bst)$evaluation_log))\n  expect_false(is.null(attributes(bst)$evaluation_log$eval_error))\n  expect_lt(attributes(bst)$evaluation_log[num_round, eval_error], 0.03)\n})\n\ntest_that(\"custom objective in CV works\", {\n  cv <- xgb.cv(param, dtrain, num_round, nfold = 10, verbose = FALSE, stratified = FALSE)\n  expect_false(is.null(cv$evaluation_log))\n  expect_equal(dim(cv$evaluation_log), c(2, 5))\n  expect_lt(cv$evaluation_log[num_round, test_error_mean], 0.03)\n})\n\ntest_that(\"custom objective with early stop works\", {\n  bst <- xgb.train(param, dtrain, 10, evals, verbose = 0)\n  expect_equal(class(bst), \"xgb.Booster\")\n  train_log <- attributes(bst)$evaluation_log$train_error\n  expect_true(all(diff(train_log) <= 0))\n})\n\ntest_that(\"custom objective using DMatrix attr works\", {\n\n  attr(dtrain, 'label') <- getinfo(dtrain, 'label')\n\n  logregobjattr <- function(preds, dtrain) {\n    labels <- attr(dtrain, 'label')\n    preds <- 1 / (1 + exp(-preds))\n    grad <- preds - labels\n    hess <- preds * (1 - preds)\n    return(list(grad = grad, hess = hess))\n  }\n  param$objective <- logregobjattr\n  bst <- xgb.train(param, dtrain, num_round, evals, verbose = 0)\n  expect_equal(class(bst), \"xgb.Booster\")\n})\n\ntest_that(\"custom objective with multi-class shape\", {\n  data <- as.matrix(iris[, -5])\n  label <-  as.numeric(iris$Species) - 1\n  dtrain <- xgb.DMatrix(data = data, label = label, nthread = n_threads)\n  n_classes <- 3\n\n  fake_softprob <- function(preds, dtrain) {\n    mpreds <- as.matrix(preds)\n    expect_equal(\n      mpreds,\n      matrix(0.5, nrow = nrow(mpreds), ncol = ncol(mpreds)),\n      tolerance = 1e-4\n    )\n    ## use numeric vector here to test compatibility with XGBoost < 2.1\n    grad <- rnorm(length(as.matrix(preds)))\n    expect_equal(dim(data)[1] * n_classes, dim(as.matrix(preds))[1] * n_classes)\n    hess <- rnorm(length(as.matrix(preds)))\n    list(grad = grad, hess = hess)\n  }\n  fake_merror <- function(preds, dtrain) {\n    expect_equal(dim(data)[1] * n_classes, dim(as.matrix(preds))[1])\n  }\n  param$objective <- fake_softprob\n  param$eval_metric <- fake_merror\n  expect_warning({\n    bst <- xgb.train(c(param, list(num_class = n_classes)), dtrain, nrounds = 1)\n  })\n})\n\nsoftmax <- function(values) {\n  values <- as.numeric(values)\n  exps <- exp(values)\n  den <- sum(exps)\n  return(exps / den)\n}\n\nsoftprob <- function(predt, dtrain) {\n  y <- getinfo(dtrain, \"label\")\n\n  n_samples <- dim(predt)[1]\n  n_classes <- dim(predt)[2]\n\n  grad <- matrix(nrow = n_samples, ncol = n_classes)\n  hess <- matrix(nrow = n_samples, ncol = n_classes)\n\n  for (i in seq_len(n_samples)) {\n    t <- y[i]\n    p <- softmax(predt[i, ])\n    for (c in seq_len(n_classes)) {\n      g <- if (c - 1 == t) {\n        p[c] - 1.0\n      } else {\n        p[c]\n      }\n      h <- max((2.0 * p[c] * (1.0 - p[c])), 1e-6)\n      grad[i, c] <- g\n      hess[i, c] <- h\n    }\n  }\n\n  return(list(grad = grad, hess = hess))\n}\n\n\ntest_that(\"custom objective with multi-class works\", {\n  data <- as.matrix(iris[, -5])\n  label <- as.numeric(iris$Species) - 1\n\n  dtrain <- xgb.DMatrix(data = data, label = label, nthread = 1)\n\n  param$num_class <- 3\n  param$objective <- softprob\n  param$eval_metric <- \"merror\"\n  param$base_score <- 0.5\n\n  custom_bst <- xgb.train(param, dtrain, 2)\n  custom_predt <- predict(custom_bst, dtrain)\n\n  param$objective <- \"multi:softmax\"\n  builtin_bst <- xgb.train(param, dtrain, 2)\n  builtin_predt <- predict(builtin_bst, dtrain)\n\n  expect_equal(custom_predt, builtin_predt)\n})\n\ntest_that(\"custom metric with multi-target passes reshaped data to feval\", {\n  x <- as.matrix(iris[, -5])\n  y <- as.numeric(iris$Species) - 1\n  dtrain <- xgb.DMatrix(data = x, label = y, nthread = 1)\n\n  multinomial.ll <- function(predt, dtrain) {\n    expect_equal(dim(predt), c(nrow(iris), 3L))\n    y <- getinfo(dtrain, \"label\")\n    probs <- apply(predt, 1, softmax) |> t()\n    probs.y <- probs[cbind(seq(1L, nrow(predt)), y + 1L)]\n    ll <- sum(log(probs.y))\n    return(list(metric = \"multinomial-ll\", value = -ll))\n  }\n\n  model <- xgb.train(\n    params = list(\n      objective = \"multi:softmax\",\n      num_class = 3L,\n      base_score = 0,\n      disable_default_eval_metric = TRUE,\n      eval_metric = multinomial.ll,\n      max_depth = 123,\n      seed = 123\n    ),\n    data = dtrain,\n    nrounds = 2L,\n    evals = list(Train = dtrain),\n    verbose = 0\n  )\n\n  model <- xgb.train(\n    params = list(\n      objective = \"multi:softmax\",\n      num_class = 3L,\n      base_score = 0,\n      disable_default_eval_metric = TRUE,\n      max_depth = 123,\n      seed = 123\n    ),\n    data = dtrain,\n    nrounds = 2L,\n    evals = list(Train = dtrain),\n    custom_metric = multinomial.ll,\n    verbose = 0\n  )\n})\n"
  },
  {
    "path": "R-package/tests/testthat/test_dmatrix.R",
    "content": "library(Matrix)\ncontext(\"testing xgb.DMatrix functionality\")\n\ndata(agaricus.test, package = \"xgboost\")\ntest_data <- agaricus.test$data[1:100, ]\ntest_label <- agaricus.test$label[1:100]\n\nn_threads <- 2\n\ntest_that(\"xgb.DMatrix: basic construction\", {\n  # from sparse matrix\n  dtest1 <- xgb.DMatrix(test_data, label = test_label, nthread = n_threads)\n\n  # from dense matrix\n  dtest2 <- xgb.DMatrix(as.matrix(test_data), label = test_label, nthread = n_threads)\n  expect_equal(getinfo(dtest1, \"label\"), getinfo(dtest2, \"label\"))\n  expect_equal(dim(dtest1), dim(dtest2))\n\n  # from dense integer matrix\n  int_data <- as.matrix(test_data)\n  storage.mode(int_data) <- \"integer\"\n  dtest3 <- xgb.DMatrix(int_data, label = test_label, nthread = n_threads)\n  expect_equal(dim(dtest1), dim(dtest3))\n\n  n_samples <- 100\n  X <- cbind(\n    x1 = sample(x = 4, size = n_samples, replace = TRUE),\n    x2 = sample(x = 4, size = n_samples, replace = TRUE),\n    x3 = sample(x = 4, size = n_samples, replace = TRUE)\n  )\n  X <- matrix(X, nrow = n_samples)\n  y <- rbinom(n = n_samples, size = 1, prob = 1 / 2)\n\n  fd <- xgb.DMatrix(X, label = y, missing = 1, nthread = n_threads)\n\n  dgc <- as(X, \"dgCMatrix\")\n  fdgc <- xgb.DMatrix(dgc, label = y, missing = 1.0, nthread = n_threads)\n\n  dgr <- as(X, \"dgRMatrix\")\n  fdgr <- xgb.DMatrix(dgr, label = y, missing = 1, nthread = n_threads)\n\n  params <- list(tree_method = \"hist\", nthread = n_threads)\n  bst_fd <- xgb.train(\n    params, nrounds = 8, fd, evals = list(train = fd), verbose = 0\n  )\n  bst_dgr <- xgb.train(\n    params, nrounds = 8, fdgr, evals = list(train = fdgr), verbose = 0\n  )\n  bst_dgc <- xgb.train(\n    params, nrounds = 8, fdgc, evals = list(train = fdgc), verbose = 0\n  )\n\n  raw_fd <- xgb.save.raw(bst_fd, raw_format = \"ubj\")\n  raw_dgr <- xgb.save.raw(bst_dgr, raw_format = \"ubj\")\n  raw_dgc <- xgb.save.raw(bst_dgc, raw_format = \"ubj\")\n\n  expect_equal(raw_fd, raw_dgr)\n  expect_equal(raw_fd, raw_dgc)\n})\n\ntest_that(\"xgb.DMatrix: NA\", {\n  n_samples <- 3\n  x <- cbind(\n    x1 = sample(x = 4, size = n_samples, replace = TRUE),\n    x2 = sample(x = 4, size = n_samples, replace = TRUE)\n  )\n  x[1, \"x1\"] <- NA\n\n  m <- xgb.DMatrix(x, nthread = n_threads)\n  fname_int <- file.path(tempdir(), \"int.dmatrix\")\n  xgb.DMatrix.save(m, fname_int)\n\n  x <- matrix(as.numeric(x), nrow = n_samples, ncol = 2)\n  colnames(x) <- c(\"x1\", \"x2\")\n  m <- xgb.DMatrix(x, nthread = n_threads)\n\n  fname_float <- file.path(tempdir(), \"float.dmatrix\")\n  xgb.DMatrix.save(m, fname_float)\n\n  iconn <- file(fname_int, \"rb\")\n  fconn <- file(fname_float, \"rb\")\n\n  expect_equal(file.size(fname_int), file.size(fname_float))\n\n  bytes <- file.size(fname_int)\n  idmatrix <- readBin(iconn, \"raw\", n = bytes)\n  fdmatrix <- readBin(fconn, \"raw\", n = bytes)\n\n  expect_equal(length(idmatrix), length(fdmatrix))\n  expect_equal(idmatrix, fdmatrix)\n\n  close(iconn)\n  close(fconn)\n\n  file.remove(fname_int)\n  file.remove(fname_float)\n})\n\ntest_that(\"xgb.DMatrix: saving, loading\", {\n  # save to a local file\n  dtest1 <- xgb.DMatrix(test_data, label = test_label, nthread = n_threads)\n  tmp_file <- tempfile('xgb.DMatrix_')\n  on.exit(unlink(tmp_file))\n  expect_true(xgb.DMatrix.save(dtest1, tmp_file))\n  # read from a local file\n  xgb.set.config(verbosity = 2)\n  expect_output(dtest3 <- xgb.DMatrix(tmp_file, nthread = 1), \"entries loaded from\")\n  xgb.set.config(verbosity = 1)\n  expect_output(dtest3 <- xgb.DMatrix(tmp_file, nthread = 1), NA)\n  unlink(tmp_file)\n  expect_equal(getinfo(dtest1, 'label'), getinfo(dtest3, 'label'))\n\n  # from a libsvm text file\n  tmp <- c(\"0 1:1 2:1\", \"1 3:1\", \"0 1:1\")\n  tmp_file <- tempfile(fileext = \".libsvm\")\n  writeLines(tmp, tmp_file)\n  expect_true(file.exists(tmp_file))\n  dtest4 <- xgb.DMatrix(\n    paste(tmp_file, \"?format=libsvm\", sep = \"\"), silent = TRUE, nthread = n_threads\n  )\n  expect_equal(dim(dtest4), c(3, 4))\n  expect_equal(getinfo(dtest4, 'label'), c(0, 1, 0))\n\n  # check that feature info is saved\n  data(agaricus.train, package = 'xgboost')\n  dtrain <- xgb.DMatrix(\n    data = agaricus.train$data, label = agaricus.train$label, nthread = n_threads\n  )\n  cnames <- colnames(dtrain)\n  expect_equal(length(cnames), 126)\n  tmp_file <- tempfile('xgb.DMatrix_')\n  xgb.DMatrix.save(dtrain, tmp_file)\n  xgb.set.config(verbosity = 0)\n  dtrain <- xgb.DMatrix(tmp_file, nthread = 1)\n  expect_equal(colnames(dtrain), cnames)\n\n  ft <- rep(c(\"c\", \"q\"), each = length(cnames) / 2)\n  setinfo(dtrain, \"feature_type\", ft)\n  expect_equal(ft, getinfo(dtrain, \"feature_type\"))\n})\n\ntest_that(\"xgb.DMatrix: getinfo & setinfo\", {\n  dtest <- xgb.DMatrix(test_data, nthread = n_threads)\n  expect_true(setinfo(dtest, 'label', test_label))\n  labels <- getinfo(dtest, 'label')\n  expect_equal(test_label, getinfo(dtest, 'label'))\n\n  expect_true(setinfo(dtest, 'label_lower_bound', test_label))\n  expect_equal(test_label, getinfo(dtest, 'label_lower_bound'))\n\n  expect_true(setinfo(dtest, 'label_upper_bound', test_label))\n  expect_equal(test_label, getinfo(dtest, 'label_upper_bound'))\n\n  expect_true(length(getinfo(dtest, 'weight')) == 0)\n  expect_true(length(getinfo(dtest, 'base_margin')) == 0)\n\n  expect_true(setinfo(dtest, 'weight', test_label))\n  expect_true(setinfo(dtest, 'base_margin', test_label))\n  expect_true(setinfo(dtest, 'group', c(50, 50)))\n  expect_error(setinfo(dtest, 'group', test_label))\n\n  # providing character values will give an error\n  expect_error(setinfo(dtest, 'weight', rep('a', nrow(test_data))))\n\n  # any other label should error\n  expect_error(setinfo(dtest, 'asdf', test_label))\n})\n\ntest_that(\"xgb.DMatrix: slice, dim\", {\n  dtest <- xgb.DMatrix(test_data, label = test_label, nthread = n_threads)\n  expect_equal(dim(dtest), dim(test_data))\n  dsub1 <- xgb.slice.DMatrix(dtest, 1:42)\n  expect_equal(nrow(dsub1), 42)\n  expect_equal(ncol(dsub1), ncol(test_data))\n\n  dsub2 <- dtest[1:42, ]\n  expect_equal(dim(dtest), dim(test_data))\n  expect_equal(getinfo(dsub1, 'label'), getinfo(dsub2, 'label'))\n})\n\ntest_that(\"xgb.DMatrix: slice, trailing empty rows\", {\n  data(agaricus.train, package = 'xgboost')\n  train_data <- agaricus.train$data\n  train_label <- agaricus.train$label\n  dtrain <- xgb.DMatrix(\n    data = train_data, label = train_label, nthread = n_threads\n  )\n  xgb.slice.DMatrix(dtrain, 6513L)\n  train_data[6513, ] <- 0\n  dtrain <- xgb.DMatrix(\n    data = train_data, label = train_label, nthread = n_threads\n  )\n  xgb.slice.DMatrix(dtrain, 6513L)\n  expect_equal(nrow(dtrain), 6513)\n})\n\ntest_that(\"xgb.DMatrix: colnames\", {\n  dtest <- xgb.DMatrix(test_data, label = test_label, nthread = n_threads)\n  expect_equal(colnames(dtest), colnames(test_data))\n  expect_error(colnames(dtest) <- 'asdf')\n  new_names <- make.names(seq_len(ncol(test_data)))\n  expect_silent(colnames(dtest) <- new_names)\n  expect_equal(colnames(dtest), new_names)\n  expect_silent(colnames(dtest) <- NULL)\n  expect_null(colnames(dtest))\n})\n\ntest_that(\"xgb.DMatrix: nrow is correct for a very sparse matrix\", {\n  set.seed(123)\n  nr <- 1000\n  x <- Matrix::rsparsematrix(nr, 100, density = 0.0005)\n  # we want it very sparse, so that last rows are empty\n  expect_lt(max(x@i), nr)\n  dtest <- xgb.DMatrix(x, nthread = n_threads)\n  expect_equal(dim(dtest), dim(x))\n})\n\ntest_that(\"xgb.DMatrix: print\", {\n    data(agaricus.train, package = 'xgboost')\n\n    # core DMatrix with just data and labels\n    dtrain <- xgb.DMatrix(\n      data = agaricus.train$data, label = agaricus.train$label,\n      nthread = n_threads\n    )\n    txt <- capture.output({\n        print(dtrain)\n    })\n    expect_equal(txt, \"xgb.DMatrix  dim: 6513 x 126  info: label  colnames: yes\")\n\n    # verbose=TRUE prints feature names\n    txt <- capture.output({\n        print(dtrain, verbose = TRUE)\n    })\n    expect_equal(txt[[1L]], \"xgb.DMatrix  dim: 6513 x 126  info: label  colnames:\")\n    expect_equal(txt[[2L]], sprintf(\"'%s'\", paste(colnames(dtrain), collapse = \"','\")))\n\n    # DMatrix with weights and base_margin\n    dtrain <- xgb.DMatrix(\n      data = agaricus.train$data,\n      label = agaricus.train$label,\n      weight = seq_along(agaricus.train$label),\n      base_margin = agaricus.train$label,\n      nthread = n_threads\n    )\n    txt <- capture.output({\n        print(dtrain)\n    })\n    expect_equal(txt, \"xgb.DMatrix  dim: 6513 x 126  info: base_margin, label, weight  colnames: yes\")\n\n    # DMatrix with just features\n    dtrain <- xgb.DMatrix(\n      data = agaricus.train$data,\n      nthread = n_threads\n    )\n    txt <- capture.output({\n        print(dtrain)\n    })\n    expect_equal(txt, \"xgb.DMatrix  dim: 6513 x 126  info: NA  colnames: yes\")\n\n    # DMatrix with no column names\n    data_no_colnames <- agaricus.train$data\n    colnames(data_no_colnames) <- NULL\n    dtrain <- xgb.DMatrix(\n      data = data_no_colnames,\n      nthread = n_threads\n    )\n    txt <- capture.output({\n        print(dtrain)\n    })\n    expect_equal(txt, \"xgb.DMatrix  dim: 6513 x 126  info: NA  colnames: no\")\n})\n\ntest_that(\"xgb.DMatrix: Inf as missing\", {\n  x_inf <- matrix(as.numeric(1:10), nrow = 5)\n  x_inf[2, 1] <- Inf\n\n  x_nan <- x_inf\n  x_nan[2, 1] <- NA_real_\n\n  m_inf <- xgb.DMatrix(x_inf, nthread = n_threads, missing = Inf)\n  fname_inf <- file.path(tempdir(), \"inf.dmatrix\")\n  xgb.DMatrix.save(m_inf, fname_inf)\n\n  m_nan <- xgb.DMatrix(x_nan, nthread = n_threads, missing = NA_real_)\n  fname_nan <- file.path(tempdir(), \"nan.dmatrix\")\n  xgb.DMatrix.save(m_nan, fname_nan)\n\n  infconn <- file(fname_inf, \"rb\")\n  nanconn <- file(fname_nan, \"rb\")\n\n  expect_equal(file.size(fname_inf), file.size(fname_nan))\n\n  bytes <- file.size(fname_inf)\n  infdmatrix <- readBin(infconn, \"raw\", n = bytes)\n  nandmatrix <- readBin(nanconn, \"raw\", n = bytes)\n\n  expect_equal(length(infdmatrix), length(nandmatrix))\n  expect_equal(infdmatrix, nandmatrix)\n\n  close(infconn)\n  close(nanconn)\n\n  file.remove(fname_inf)\n  file.remove(fname_nan)\n})\n\ntest_that(\"xgb.DMatrix: missing in CSR\", {\n  x_dense <- matrix(as.numeric(1:10), nrow = 5)\n  x_dense[2, 1] <- NA_real_\n\n  x_csr <- as(x_dense, \"RsparseMatrix\")\n\n  m_dense <- xgb.DMatrix(x_dense, nthread = n_threads, missing = NA_real_)\n  xgb.DMatrix.save(m_dense, \"dense.dmatrix\")\n\n  m_csr <- xgb.DMatrix(x_csr, nthread = n_threads, missing = NA)\n  xgb.DMatrix.save(m_csr, \"csr.dmatrix\")\n\n  denseconn <- file(\"dense.dmatrix\", \"rb\")\n  csrconn <- file(\"csr.dmatrix\", \"rb\")\n\n  expect_equal(file.size(\"dense.dmatrix\"), file.size(\"csr.dmatrix\"))\n\n  bytes <- file.size(\"dense.dmatrix\")\n  densedmatrix <- readBin(denseconn, \"raw\", n = bytes)\n  csrmatrix <- readBin(csrconn, \"raw\", n = bytes)\n\n  expect_equal(length(densedmatrix), length(csrmatrix))\n  expect_equal(densedmatrix, csrmatrix)\n\n  close(denseconn)\n  close(csrconn)\n\n  file.remove(\"dense.dmatrix\")\n  file.remove(\"csr.dmatrix\")\n})\n\ntest_that(\"xgb.DMatrix: error on three-dimensional array\", {\n  set.seed(123)\n  x <- matrix(rnorm(500), nrow = 50)\n  y <- rnorm(400)\n  dim(y) <- c(50, 4, 2)\n  expect_error(xgb.DMatrix(data = x, label = y))\n})\n\ntest_that(\"xgb.DMatrix: can get group for both 'qid' and 'group' constructors\", {\n  set.seed(123)\n  x <- matrix(rnorm(1000), nrow = 100)\n  group <- c(20, 20, 60)\n  qid <- c(rep(1, 20), rep(2, 20), rep(3, 60))\n\n  gr_mat <- xgb.DMatrix(x, group = group, nthread = 1)\n  qid_mat <- xgb.DMatrix(x, qid = qid, nthread = 1)\n\n  info_gr <- getinfo(gr_mat, \"group\")\n  info_qid <- getinfo(qid_mat, \"group\")\n  expect_equal(info_gr, info_qid)\n\n  expected_gr <- c(0, 20, 40, 100)\n  expect_equal(info_gr, expected_gr)\n})\n\ntest_that(\"xgb.DMatrix: data.frame\", {\n  df <- data.frame(\n    a = (1:4) / 10,\n    num = c(1, NA, 3, 4),\n    as.int = as.integer(c(1, 2, 3, 4)),\n    lo = c(TRUE, FALSE, NA, TRUE),\n    str.fac = c(\"a\", \"b\", \"d\", \"c\"),\n    as.fac = as.factor(c(3, 5, 8, 11)),\n    stringsAsFactors = TRUE\n  )\n\n  m <- xgb.DMatrix(df, nthread = 1)\n  expect_equal(colnames(m), colnames(df))\n  expect_equal(\n    getinfo(m, \"feature_type\"), c(\"float\", \"float\", \"int\", \"i\", \"c\", \"c\")\n  )\n\n  df <- data.frame(\n    missing = c(\"a\", \"b\", \"d\", NA),\n    valid = c(\"a\", \"b\", \"d\", \"c\"),\n    stringsAsFactors = TRUE\n  )\n  m <- xgb.DMatrix(df, nthread = 1)\n  expect_equal(getinfo(m, \"feature_type\"), c(\"c\", \"c\"))\n})\n\ntest_that(\"xgb.DMatrix: can take multi-dimensional 'base_margin'\", {\n  set.seed(123)\n  x <- matrix(rnorm(100 * 10), nrow = 100)\n  y <- matrix(rnorm(100 * 2), nrow = 100)\n  b <- matrix(rnorm(100 * 2), nrow = 100)\n  model <- xgb.train(\n    data = xgb.DMatrix(data = x, label = y, nthread = n_threads),\n    params = list(\n      objective = \"reg:squarederror\",\n      tree_method = \"hist\",\n      multi_strategy = \"multi_output_tree\",\n      base_score = 0,\n      nthread = n_threads\n    ),\n    nround = 1\n  )\n  pred_only_x <- predict(model, x)\n  pred_w_base <- predict(\n    model,\n    xgb.DMatrix(data = x, base_margin = b, nthread = 1)\n  )\n  expect_equal(pred_only_x, pred_w_base - b, tolerance = 1e-5)\n})\n\ntest_that(\"xgb.DMatrix: QuantileDMatrix produces same result as DMatrix\", {\n  data(mtcars)\n  y <- mtcars[, 1]\n  x <- mtcars[, -1]\n\n  cast_matrix <- function(x) as.matrix(x)\n  cast_df <- function(x) as.data.frame(x)\n  cast_csr <- function(x) as(as.matrix(x), \"RsparseMatrix\")\n  casting_funs <- list(cast_matrix, cast_df, cast_csr)\n\n  for (casting_fun in casting_funs) {\n\n    qdm <- xgb.QuantileDMatrix(\n      data = casting_fun(x),\n      label = y,\n      nthread = n_threads,\n      max_bin = 5\n    )\n    params <- list(\n      tree_method = \"hist\",\n      objective = \"reg:squarederror\",\n      nthread = n_threads,\n      max_bin = 5\n    )\n    model_qdm <- xgb.train(\n      params = params,\n      data = qdm,\n      nrounds = 2\n    )\n    pred_qdm <- predict(model_qdm, x)\n\n    dm <- xgb.DMatrix(\n      data = x,\n      label = y,\n      nthread = n_threads\n    )\n    model_dm <- xgb.train(\n      params = params,\n      data = dm,\n      nrounds = 2\n    )\n    pred_dm <- predict(model_dm, x)\n\n    expect_equal(pred_qdm, pred_dm)\n  }\n})\n\ntest_that(\"xgb.DMatrix: QuantileDMatrix is not accepted by exact method\", {\n  data(mtcars)\n  y <- mtcars[, 1]\n  x <- as.matrix(mtcars[, -1])\n  qdm <- xgb.QuantileDMatrix(\n    data = x,\n    label = y,\n    nthread = n_threads\n  )\n  params <- list(\n    tree_method = \"exact\",\n    objective = \"reg:squarederror\",\n    nthread = n_threads\n  )\n  expect_error({\n    xgb.train(\n      params = params,\n      data = qdm,\n      nrounds = 2\n    )\n  })\n})\n\ntest_that(\"xgb.DMatrix: ExtMemDMatrix produces the same results as regular DMatrix\", {\n  data(mtcars)\n  y <- mtcars[, 1]\n  x <- as.matrix(mtcars[, -1])\n  set.seed(123)\n  params <- list(\n    objective = \"reg:squarederror\",\n    nthread = n_threads\n  )\n  model <- xgb.train(\n    data = xgb.DMatrix(x, label = y, nthread = 1),\n    params = params,\n    nrounds = 5\n  )\n  pred <- predict(model, x)\n  pred <- unname(pred)\n\n  iterator_env <- as.environment(\n    list(\n      iter = 0,\n      x = mtcars[, -1],\n      y = mtcars[, 1]\n    )\n  )\n  iterator_next <- function(iterator_env) {\n    curr_iter <- iterator_env[[\"iter\"]]\n    if (curr_iter >= 2) {\n      return(NULL)\n    }\n    if (curr_iter == 0) {\n      x_batch <- iterator_env[[\"x\"]][1:16, ]\n      y_batch <- iterator_env[[\"y\"]][1:16]\n    } else {\n      x_batch <- iterator_env[[\"x\"]][17:32, ]\n      y_batch <- iterator_env[[\"y\"]][17:32]\n    }\n    on.exit({\n      iterator_env[[\"iter\"]] <- curr_iter + 1\n    })\n    return(xgb.DataBatch(data = x_batch, label = y_batch))\n  }\n  iterator_reset <- function(iterator_env) {\n    iterator_env[[\"iter\"]] <- 0\n  }\n  data_iterator <- xgb.DataIter(\n    env = iterator_env,\n    f_next = iterator_next,\n    f_reset = iterator_reset\n  )\n  cache_prefix <- tempdir()\n  edm <- xgb.ExtMemDMatrix(data_iterator, cache_prefix, nthread = 1)\n  expect_true(inherits(edm, \"xgb.ExtMemDMatrix\"))\n  expect_true(inherits(edm, \"xgb.DMatrix\"))\n  set.seed(123)\n  model_ext <- xgb.train(\n    data = edm,\n    params = params,\n    nrounds = 5\n  )\n\n  pred_model1_edm <- predict(model, edm)\n  pred_model2_mat <- predict(model_ext, x) |> unname()\n  pred_model2_edm <- predict(model_ext, edm)\n\n  expect_equal(pred_model1_edm, pred)\n  expect_equal(pred_model2_mat, pred)\n  expect_equal(pred_model2_edm, pred)\n})\n\ntest_that(\"xgb.DMatrix: External QDM produces same results as regular QDM\", {\n  data(mtcars)\n  y <- mtcars[, 1]\n  x <- as.matrix(mtcars[, -1])\n  set.seed(123)\n  params <- list(\n    objective = \"reg:squarederror\",\n    nthread = n_threads,\n    max_bin = 3\n  )\n  model <- xgb.train(\n    data = xgb.QuantileDMatrix(\n      x,\n      label = y,\n      nthread = 1,\n      max_bin = 3\n    ),\n    params = params,\n    nrounds = 5\n  )\n  pred <- predict(model, x)\n  pred <- unname(pred)\n\n  iterator_env <- as.environment(\n    list(\n      iter = 0,\n      x = mtcars[, -1],\n      y = mtcars[, 1]\n    )\n  )\n  iterator_next <- function(iterator_env) {\n    curr_iter <- iterator_env[[\"iter\"]]\n    if (curr_iter >= 2) {\n      return(NULL)\n    }\n    if (curr_iter == 0) {\n      x_batch <- iterator_env[[\"x\"]][1:16, ]\n      y_batch <- iterator_env[[\"y\"]][1:16]\n    } else {\n      x_batch <- iterator_env[[\"x\"]][17:32, ]\n      y_batch <- iterator_env[[\"y\"]][17:32]\n    }\n    on.exit({\n      iterator_env[[\"iter\"]] <- curr_iter + 1\n    })\n    return(xgb.DataBatch(data = x_batch, label = y_batch))\n  }\n  iterator_reset <- function(iterator_env) {\n    iterator_env[[\"iter\"]] <- 0\n  }\n  data_iterator <- xgb.DataIter(\n    env = iterator_env,\n    f_next = iterator_next,\n    f_reset = iterator_reset\n  )\n  cache_prefix <- tempdir()\n  qdm <- xgb.QuantileDMatrix.from_iterator(\n    data_iterator,\n    max_bin = 3,\n    nthread = 1\n  )\n  expect_true(inherits(qdm, \"xgb.QuantileDMatrix\"))\n  expect_true(inherits(qdm, \"xgb.DMatrix\"))\n  set.seed(123)\n  model_ext <- xgb.train(\n    data = qdm,\n    params = params,\n    nrounds = 5\n  )\n\n  pred_model1_qdm <- predict(model, qdm)\n  pred_model2_mat <- predict(model_ext, x) |> unname()\n  pred_model2_qdm <- predict(model_ext, qdm)\n\n  expect_equal(pred_model1_qdm, pred)\n  expect_equal(pred_model2_mat, pred)\n  expect_equal(pred_model2_qdm, pred)\n})\n\ntest_that(\"xgb.DMatrix: R errors thrown on DataIterator are thrown back to the user\", {\n  data(mtcars)\n  iterator_env <- as.environment(\n    list(\n      iter = 0,\n      x = mtcars[, -1],\n      y = mtcars[, 1]\n    )\n  )\n  iterator_next <- function(iterator_env) {\n    curr_iter <- iterator_env[[\"iter\"]]\n    if (curr_iter >= 2) {\n      return(0)\n    }\n    if (curr_iter == 0) {\n      x_batch <- iterator_env[[\"x\"]][1:16, ]\n      y_batch <- iterator_env[[\"y\"]][1:16]\n    } else {\n      stop(\"custom error\")\n    }\n    on.exit({\n      iterator_env[[\"iter\"]] <- curr_iter + 1\n    })\n    return(xgb.DataBatch(data = x_batch, label = y_batch))\n  }\n  iterator_reset <- function(iterator_env) {\n    iterator_env[[\"iter\"]] <- 0\n  }\n  data_iterator <- xgb.DataIter(\n    env = iterator_env,\n    f_next = iterator_next,\n    f_reset = iterator_reset\n  )\n  expect_error(\n    {xgb.ExtMemDMatrix(data_iterator, nthread = 1)},\n    \"custom error\"\n  )\n})\n\ntest_that(\"xgb.DMatrix: number of non-missing matches data\", {\n  x <- matrix(1:10, nrow = 5)\n  dm1 <- xgb.DMatrix(x, nthread = 1)\n  expect_equal(xgb.get.DMatrix.num.non.missing(dm1), 10)\n\n  x[2, 2] <- NA\n  x[4, 1] <- NA\n  dm2 <- xgb.DMatrix(x, nthread = 1)\n  expect_equal(xgb.get.DMatrix.num.non.missing(dm2), 8)\n})\n\ntest_that(\"xgb.DMatrix: retrieving data as CSR\", {\n  data(mtcars)\n  dm <- xgb.DMatrix(as.matrix(mtcars), nthread = 1)\n  csr <- xgb.get.DMatrix.data(dm)\n  expect_equal(dim(csr), dim(mtcars))\n  expect_equal(colnames(csr), colnames(mtcars))\n  expect_equal(unname(as.matrix(csr)), unname(as.matrix(mtcars)), tolerance = 1e-6)\n})\n\ntest_that(\"xgb.DMatrix: quantile cuts look correct\", {\n  data(mtcars)\n  y <- mtcars$mpg\n  x <- as.matrix(mtcars[, -1])\n  dm <- xgb.DMatrix(x, label = y, nthread = 1)\n  model <- xgb.train(\n    data = dm,\n    params = list(\n      tree_method = \"hist\",\n      max_bin = 8,\n      nthread = 1\n    ),\n    nrounds = 3\n  )\n  qcut_list <- xgb.get.DMatrix.qcut(dm, \"list\")\n  qcut_arrays <- xgb.get.DMatrix.qcut(dm, \"arrays\")\n\n  expect_equal(length(qcut_arrays), 2)\n  expect_equal(names(qcut_arrays), c(\"indptr\", \"data\"))\n  expect_equal(length(qcut_arrays$indptr), ncol(x) + 1)\n  expect_true(min(diff(qcut_arrays$indptr)) > 0)\n\n  col_min <- apply(x, 2, min)\n  col_max <- apply(x, 2, max)\n\n  expect_equal(length(qcut_list), ncol(x))\n  expect_equal(names(qcut_list), colnames(x))\n  lapply(\n    seq(1, ncol(x)),\n    function(col) {\n      cuts <- qcut_list[[col]]\n      expect_true(min(diff(cuts)) > 0)\n      expect_true(col_min[col] > cuts[1])\n      expect_true(col_max[col] < cuts[length(cuts)])\n      expect_true(length(cuts) <= 9)\n    }\n  )\n})\n\ntest_that(\"xgb.DMatrix: slicing keeps field indicators\", {\n  data(mtcars)\n  x <- as.matrix(mtcars[, -1])\n  y <- mtcars[, 1]\n  dm <- xgb.DMatrix(\n    data = x,\n    label_lower_bound = -y,\n    label_upper_bound = y,\n    nthread = 1\n  )\n  idx_take <- seq(1, 5)\n  dm_slice <- xgb.slice.DMatrix(dm, idx_take)\n\n  expect_true(xgb.DMatrix.hasinfo(dm_slice, \"label_lower_bound\"))\n  expect_true(xgb.DMatrix.hasinfo(dm_slice, \"label_upper_bound\"))\n  expect_false(xgb.DMatrix.hasinfo(dm_slice, \"label\"))\n\n  expect_equal(getinfo(dm_slice, \"label_lower_bound\"), -y[idx_take], tolerance = 1e-6)\n  expect_equal(getinfo(dm_slice, \"label_upper_bound\"), y[idx_take], tolerance = 1e-6)\n})\n\ntest_that(\"xgb.DMatrix: can slice with groups\", {\n  data(iris)\n  x <- as.matrix(iris[, -5])\n  set.seed(123)\n  y <- sample(3, size = nrow(x), replace = TRUE)\n  group <- c(50, 50, 50)\n  dm <- xgb.DMatrix(x, label = y, group = group, nthread = 1)\n  idx_take <- seq(1, 50)\n  dm_slice <- xgb.slice.DMatrix(dm, idx_take, allow_groups = TRUE)\n\n  expect_true(xgb.DMatrix.hasinfo(dm_slice, \"label\"))\n  expect_false(xgb.DMatrix.hasinfo(dm_slice, \"group\"))\n  expect_false(xgb.DMatrix.hasinfo(dm_slice, \"qid\"))\n  expect_null(getinfo(dm_slice, \"group\"))\n  expect_equal(getinfo(dm_slice, \"label\"), y[idx_take], tolerance = 1e-6)\n})\n\ntest_that(\"xgb.DMatrix: can read CSV\", {\n  txt <- paste(\n    \"1,2,3\",\n    \"-1,3,2\",\n    sep = \"\\n\"\n  )\n  fname <- file.path(tempdir(), \"data.csv\")\n  writeChar(txt, fname)\n  uri <- paste0(fname, \"?format=csv&label_column=0\")\n  dm <- xgb.DMatrix(uri, silent = TRUE, nthread = 1)\n  expect_equal(getinfo(dm, \"label\"), c(1, -1))\n  expect_equal(\n    as.matrix(xgb.get.DMatrix.data(dm)),\n    matrix(c(2, 3, 3, 2), nrow = 2, byrow = TRUE)\n  )\n})\n"
  },
  {
    "path": "R-package/tests/testthat/test_feature_weights.R",
    "content": "context(\"feature weights\")\n\nn_threads <- 2\n\ntest_that(\"training with feature weights works\", {\n  nrows <- 1000\n  ncols <- 9\n  set.seed(2022)\n  x <- matrix(rnorm(nrows * ncols), nrow = nrows)\n  y <- rowSums(x)\n  weights <- seq(from = 1, to = ncols)\n\n  test <- function(tm) {\n    names <- paste0(\"f\", 1:ncols)\n    xy <- xgb.DMatrix(\n      data = x, label = y, feature_weights = weights, nthread = n_threads\n    )\n    params <- list(\n      colsample_bynode = 0.4, tree_method = tm, nthread = n_threads\n    )\n    model <- xgb.train(params = params, data = xy, nrounds = 32)\n    importance <- xgb.importance(model = model, feature_names = names)\n    expect_equal(dim(importance), c(ncols, 4))\n    importance <- importance[order(importance$Feature)]\n    expect_lt(importance[1, Frequency], importance[9, Frequency])\n  }\n\n  for (tm in c(\"hist\", \"approx\")) {\n    test(tm)\n  }\n})\n"
  },
  {
    "path": "R-package/tests/testthat/test_glm.R",
    "content": "context('Test generalized linear models')\n\nn_threads <- 2\n\ntest_that(\"gblinear works\", {\n  data(agaricus.train, package = 'xgboost')\n  data(agaricus.test, package = 'xgboost')\n  dtrain <- xgb.DMatrix(\n    agaricus.train$data, label = agaricus.train$label, nthread = n_threads\n  )\n  dtest <- xgb.DMatrix(\n    agaricus.test$data, label = agaricus.test$label, nthread = n_threads\n  )\n\n  param <- list(objective = \"binary:logistic\", eval_metric = \"error\", booster = \"gblinear\",\n                nthread = n_threads, learning_rate = 0.8, reg_alpha = 0.0001, reg_lambda = 0.0001)\n  evals <- list(eval = dtest, train = dtrain)\n\n  n <- 5         # iterations\n  ERR_UL <- 0.005 # upper limit for the test set error\n  VERB <- 0      # chatterbox switch\n\n  param$updater <- 'shotgun'\n  bst <- xgb.train(c(param, list(feature_selector = 'shuffle')), dtrain, n, evals, verbose = VERB)\n  ypred <- predict(bst, dtest)\n  expect_equal(length(getinfo(dtest, 'label')), 1611)\n  expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)\n\n  bst <- xgb.train(c(param, list(feature_selector = 'cyclic')), dtrain, n, evals, verbose = VERB,\n                   callbacks = list(xgb.cb.gblinear.history()))\n  expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)\n  h <- xgb.gblinear.history(bst)\n  expect_equal(dim(h), c(n, ncol(dtrain) + 1))\n  expect_is(h, \"matrix\")\n\n  param$updater <- 'coord_descent'\n  bst <- xgb.train(c(param, list(feature_selector = 'cyclic')), dtrain, n, evals, verbose = VERB)\n  expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)\n\n  bst <- xgb.train(c(param, list(feature_selector = 'shuffle')), dtrain, n, evals, verbose = VERB)\n  expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)\n\n  bst <- xgb.train(c(param, list(feature_selector = 'greedy')), dtrain, 2, evals, verbose = VERB)\n  expect_lt(attributes(bst)$evaluation_log$eval_error[2], ERR_UL)\n\n  bst <- xgb.train(c(param, list(feature_selector = 'thrifty', top_k = 50)), dtrain, n, evals, verbose = VERB,\n                   callbacks = list(xgb.cb.gblinear.history(sparse = TRUE)))\n  expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)\n  h <- xgb.gblinear.history(bst)\n  expect_equal(dim(h), c(n, ncol(dtrain) + 1))\n  expect_s4_class(h, \"dgCMatrix\")\n})\n\ntest_that(\"gblinear early stopping works\", {\n  data(agaricus.train, package = 'xgboost')\n  data(agaricus.test, package = 'xgboost')\n  dtrain <- xgb.DMatrix(\n    agaricus.train$data, label = agaricus.train$label, nthread = n_threads\n  )\n  dtest <- xgb.DMatrix(\n    agaricus.test$data, label = agaricus.test$label, nthread = n_threads\n  )\n\n  param <- xgb.params(\n    objective = \"binary:logistic\", eval_metric = \"error\", booster = \"gblinear\",\n    nthread = n_threads, learning_rate = 0.8, reg_alpha = 0.0001, reg_lambda = 0.0001,\n    updater = \"coord_descent\"\n  )\n\n  es_round <- 1\n  n <- 10\n  booster <- xgb.train(\n    param, dtrain, nrounds = n, evals = list(eval = dtest, train = dtrain),\n    early_stopping_rounds = es_round, verbose = 0\n  )\n  expect_equal(xgb.attr(booster, \"best_iteration\"), 4)\n  predt_es <- predict(booster, dtrain)\n\n  n <- xgb.attr(booster, \"best_iteration\") + es_round + 1\n  booster <- xgb.train(\n    param, dtrain, nrounds = n, evals = list(eval = dtest, train = dtrain),\n    early_stopping_rounds = es_round, verbose = 0\n  )\n  predt <- predict(booster, dtrain)\n  expect_equal(predt_es, predt)\n})\n"
  },
  {
    "path": "R-package/tests/testthat/test_helpers.R",
    "content": "context('Test helper functions')\n\nVCD_AVAILABLE <- requireNamespace(\"vcd\", quietly = TRUE)\n.skip_if_vcd_not_available <- function() {\n    if (!VCD_AVAILABLE) {\n        testthat::skip(\"Optional testing dependency 'vcd' not found.\")\n    }\n}\n\nfloat_tolerance <- 5e-6\n\n# disable some tests for 32-bit environment\nflag_32bit <- .Machine$sizeof.pointer != 8\n\nset.seed(1982)\n\nnrounds <- 12\nif (isTRUE(VCD_AVAILABLE)) {\n    data(Arthritis, package = \"vcd\")\n    df <- data.table::data.table(Arthritis, keep.rownames = FALSE)\n    df[, AgeDiscret := as.factor(round(Age / 10, 0))]\n    df[, AgeCat := as.factor(ifelse(Age > 30, \"Old\", \"Young\"))]\n    df[, ID := NULL]\n    sparse_matrix <- Matrix::sparse.model.matrix(Improved~.-1, data = df) # nolint\n    label <- df[, ifelse(Improved == \"Marked\", 1, 0)]\n\n    # binary\n    bst.Tree <- xgb.train(\n      data = xgb.DMatrix(sparse_matrix, label = label, nthread = 1),\n      nrounds = nrounds, verbose = 0,\n      params = xgb.params(\n        max_depth = 9,\n        learning_rate = 1,\n        nthread = 2,\n        objective = \"binary:logistic\",\n        booster = \"gbtree\",\n        base_score = 0.5\n      )\n    )\n\n    bst.GLM <- xgb.train(\n      data = xgb.DMatrix(sparse_matrix, label = label, nthread = 1),\n      nrounds = nrounds, verbose = 0,\n      params = xgb.params(\n        learning_rate = 1,\n        nthread = 1,\n        objective = \"binary:logistic\",\n        booster = \"gblinear\",\n        base_score = 0.5\n      )\n    )\n\n    feature.names <- colnames(sparse_matrix)\n\n    # without feature names\n    bst.Tree.unnamed <- xgb.copy.Booster(bst.Tree)\n    setinfo(bst.Tree.unnamed, \"feature_name\", NULL)\n}\n\n# multiclass\nmlabel <- as.numeric(iris$Species) - 1\nnclass <- 3\nmbst.Tree <- xgb.train(\n  data = xgb.DMatrix(as.matrix(iris[, -5]), label = mlabel, nthread = 1),\n  verbose = 0,\n  nrounds = nrounds,\n  params = xgb.params(\n    max_depth = 3, learning_rate = 0.5, nthread = 2,\n    objective = \"multi:softprob\", num_class = nclass, base_score = 0\n  )\n)\n\nmbst.GLM <- xgb.train(\n  data = xgb.DMatrix(as.matrix(iris[, -5]), label = mlabel, nthread = 1),\n  verbose = 0,\n  nrounds = nrounds,\n  params = xgb.params(\n    booster = \"gblinear\", learning_rate = 0.1, nthread = 1,\n    objective = \"multi:softprob\", num_class = nclass, base_score = 0\n  )\n)\n\ntest_that(\"xgb.dump works\", {\n  .skip_if_vcd_not_available()\n  if (!flag_32bit)\n    expect_length(xgb.dump(bst.Tree), 200)\n  dump_file <- file.path(tempdir(), 'xgb.model.dump')\n  expect_true(xgb.dump(bst.Tree, dump_file, with_stats = TRUE))\n  expect_true(file.exists(dump_file))\n  expect_gt(file.size(dump_file), 8000)\n\n  # JSON format\n  dmp <- xgb.dump(bst.Tree, dump_format = \"json\")\n  expect_length(dmp, 1)\n  if (!flag_32bit)\n    expect_length(grep('nodeid', strsplit(dmp, '\\n', fixed = TRUE)[[1]], fixed = TRUE), 188)\n})\n\ntest_that(\"xgb.dump works for gblinear\", {\n  .skip_if_vcd_not_available()\n  expect_length(xgb.dump(bst.GLM), 14)\n  # also make sure that it works properly for a sparse model where some coefficients\n  # are 0 from setting large L1 regularization:\n  bst.GLM.sp <- xgb.train(\n    data = xgb.DMatrix(sparse_matrix, label = label, nthread = 1),\n    nrounds = 1,\n    params = xgb.params(\n      learning_rate = 1,\n      nthread = 2,\n      reg_alpha = 2,\n      objective = \"binary:logistic\",\n      booster = \"gblinear\"\n    )\n  )\n  d.sp <- xgb.dump(bst.GLM.sp)\n  expect_length(d.sp, 14)\n  expect_gt(sum(d.sp == \"0\"), 0)\n\n  # JSON format\n  dmp <- xgb.dump(bst.GLM.sp, dump_format = \"json\")\n  expect_length(dmp, 1)\n  expect_length(grep('\\\\d', strsplit(dmp, '\\n', fixed = TRUE)[[1]]), 11)\n})\n\ntest_that(\"predict leafs works\", {\n  .skip_if_vcd_not_available()\n  # no error for gbtree\n  expect_error(pred_leaf <- predict(bst.Tree, sparse_matrix, predleaf = TRUE), regexp = NA)\n  expect_equal(dim(pred_leaf), c(nrow(sparse_matrix), nrounds))\n  # error for gblinear\n  expect_error(predict(bst.GLM, sparse_matrix, predleaf = TRUE))\n})\n\ntest_that(\"predict feature contributions works\", {\n  .skip_if_vcd_not_available()\n  # gbtree binary classifier\n  expect_error(pred_contr <- predict(bst.Tree, sparse_matrix, predcontrib = TRUE), regexp = NA)\n  expect_equal(dim(pred_contr), c(nrow(sparse_matrix), ncol(sparse_matrix) + 1))\n  expect_equal(colnames(pred_contr), c(colnames(sparse_matrix), \"(Intercept)\"))\n  pred <- predict(bst.Tree, sparse_matrix, outputmargin = TRUE)\n  expect_lt(max(abs(rowSums(pred_contr) - pred)), 1e-5)\n  # must work with data that has no column names\n  X <- sparse_matrix\n  colnames(X) <- NULL\n  expect_error(pred_contr_ <- predict(bst.Tree, X, predcontrib = TRUE), regexp = NA)\n  expect_equal(pred_contr, pred_contr_, check.attributes = FALSE,\n               tolerance = float_tolerance)\n\n  # gbtree binary classifier (approximate method)\n  expect_error(pred_contr <- predict(bst.Tree, sparse_matrix, predcontrib = TRUE, approxcontrib = TRUE), regexp = NA)\n  expect_equal(dim(pred_contr), c(nrow(sparse_matrix), ncol(sparse_matrix) + 1))\n  expect_equal(colnames(pred_contr), c(colnames(sparse_matrix), \"(Intercept)\"))\n  pred <- predict(bst.Tree, sparse_matrix, outputmargin = TRUE)\n  expect_lt(max(abs(rowSums(pred_contr) - pred)), 1e-5)\n\n  # gblinear binary classifier\n  expect_error(pred_contr <- predict(bst.GLM, sparse_matrix, predcontrib = TRUE), regexp = NA)\n  expect_equal(dim(pred_contr), c(nrow(sparse_matrix), ncol(sparse_matrix) + 1))\n  expect_equal(colnames(pred_contr), c(colnames(sparse_matrix), \"(Intercept)\"))\n  pred <- predict(bst.GLM, sparse_matrix, outputmargin = TRUE)\n  expect_lt(max(abs(rowSums(pred_contr) - pred)), 1e-5)\n  # manual calculation of linear terms\n  coefs <- as.numeric(xgb.dump(bst.GLM)[-c(1, 2, 4)])\n  coefs <- c(coefs[-1], coefs[1]) # intercept must be the last\n  pred_contr_manual <- sweep(cbind(sparse_matrix, 1), 2, coefs, FUN = \"*\")\n  expect_equal(as.numeric(pred_contr), as.numeric(pred_contr_manual),\n               tolerance = float_tolerance)\n\n  # gbtree multiclass\n  pred <- predict(mbst.Tree, as.matrix(iris[, -5]), outputmargin = TRUE)\n  pred_contr <- predict(mbst.Tree, as.matrix(iris[, -5]), predcontrib = TRUE)\n  expect_is(pred_contr, \"array\")\n  expect_length(dim(pred_contr), 3)\n  for (g in seq_len(dim(pred_contr)[2])) {\n    expect_equal(colnames(pred_contr[, g, ]), c(colnames(iris[, -5]), \"(Intercept)\"))\n    expect_lt(max(abs(rowSums(pred_contr[, g, ]) - pred[, g])), 1e-5)\n  }\n\n  # gblinear multiclass (set base_score = 0, which is base margin in multiclass)\n  pred <- predict(mbst.GLM, as.matrix(iris[, -5]), outputmargin = TRUE)\n  pred_contr <- predict(mbst.GLM, as.matrix(iris[, -5]), predcontrib = TRUE)\n  expect_length(dim(pred_contr), 3)\n  coefs_all <- matrix(\n    data = as.numeric(xgb.dump(mbst.GLM)[-c(1, 2, 6)]),\n    ncol = 3,\n    byrow = TRUE\n  )\n  for (g in seq_along(dim(pred_contr)[2])) {\n    expect_equal(colnames(pred_contr[, g, ]), c(colnames(iris[, -5]), \"(Intercept)\"))\n    expect_lt(max(abs(rowSums(pred_contr[, g, ]) - pred[, g])), float_tolerance)\n    # manual calculation of linear terms\n    coefs <- c(coefs_all[-1, g], coefs_all[1, g]) # intercept needs to be the last\n    pred_contr_manual <- sweep(as.matrix(cbind(iris[, -5], 1)), 2, coefs, FUN = \"*\")\n    expect_equal(as.numeric(pred_contr[, g, ]), as.numeric(pred_contr_manual),\n                 tolerance = float_tolerance)\n  }\n})\n\ntest_that(\"SHAPs sum to predictions, with or without DART\", {\n  d <- cbind(\n    x1 = rnorm(100),\n    x2 = rnorm(100),\n    x3 = rnorm(100))\n  y <- d[, \"x1\"] + d[, \"x2\"]^2 +\n    ifelse(d[, \"x3\"] > .5, d[, \"x3\"]^2, 2^d[, \"x3\"]) +\n    rnorm(100)\n  nrounds <- 30\n\n  for (booster in list(\"gbtree\", \"dart\")) {\n    fit <- xgb.train(\n      params = c(\n        list(\n          nthread = 2,\n          booster = booster,\n          objective = \"reg:squarederror\",\n          eval_metric = \"rmse\"),\n        if (booster == \"dart\")\n          list(rate_drop = .01, one_drop = TRUE)),\n      data = xgb.DMatrix(d, label = y, nthread = 1),\n      nrounds = nrounds)\n\n    pr <- function(...) {\n      predict(fit, newdata = d, ...)\n    }\n    pred <- pr()\n    shap <- pr(predcontrib = TRUE)\n    shapi <- pr(predinteraction = TRUE)\n    tol <- 1e-5\n\n    expect_equal(rowSums(shap), pred, tol = tol)\n    expect_equal(rowSums(shapi), pred, tol = tol)\n    for (i in seq_len(nrow(d)))\n      for (f in list(rowSums, colSums))\n        expect_equal(f(shapi[i, , ]), shap[i, ], tol = tol)\n  }\n})\n\ntest_that(\"xgb-attribute functionality\", {\n  .skip_if_vcd_not_available()\n  val <- \"my attribute value\"\n  list.val <- list(my_attr = val, a = 123, b = 'ok')\n  list.ch <- list.val[order(names(list.val))]\n  list.ch <- lapply(list.ch, as.character)\n  # note: iter is 0-index in xgb attributes\n  list.default <- list()\n  list.ch <- c(list.ch, list.default)\n  # proper input:\n  expect_error(xgb.attr(bst.Tree, NULL))\n  expect_error(xgb.attr(val, val))\n  # set & get:\n  expect_null(xgb.attr(bst.Tree, \"asdf\"))\n  expect_equal(xgb.attributes(bst.Tree), list.default)\n  bst.Tree.copy <- xgb.copy.Booster(bst.Tree)\n  xgb.attr(bst.Tree.copy, \"my_attr\") <- val\n  expect_equal(xgb.attr(bst.Tree.copy, \"my_attr\"), val)\n  xgb.attributes(bst.Tree.copy) <- list.val\n  expect_equal(xgb.attributes(bst.Tree.copy), list.ch)\n  # serializing:\n  fname <- file.path(tempdir(), \"xgb.ubj\")\n  xgb.save(bst.Tree.copy, fname)\n  bst <- xgb.load(fname)\n  expect_equal(xgb.attr(bst, \"my_attr\"), val)\n  expect_equal(xgb.attributes(bst), list.ch)\n  # deletion:\n  xgb.attr(bst, \"my_attr\") <- NULL\n  expect_null(xgb.attr(bst, \"my_attr\"))\n  expect_equal(xgb.attributes(bst), list.ch[c(\"a\", \"b\")])\n  xgb.attributes(bst) <- list(a = NULL, b = NULL)\n  expect_equal(xgb.attributes(bst), list.default)\n  xgb.attributes(bst) <- list(niter = NULL)\n  expect_equal(xgb.attributes(bst), list())\n})\n\nif (grepl('Windows', Sys.info()[['sysname']], fixed = TRUE) ||\n    grepl('Linux', Sys.info()[['sysname']], fixed = TRUE) ||\n    grepl('Darwin', Sys.info()[['sysname']], fixed = TRUE)) {\n    test_that(\"xgb-attribute numeric precision\", {\n      .skip_if_vcd_not_available()\n      # check that lossless conversion works with 17 digits\n      # numeric -> character -> numeric\n      X <- 10^runif(100, -20, 20)\n      if (capabilities('long.double')) {\n          X2X <- as.numeric(format(X, digits = 17))\n          expect_equal(X, X2X, tolerance = float_tolerance)\n      }\n      # retrieved attributes to be the same as written\n      for (x in X) {\n        xgb.attr(bst.Tree, \"x\") <- x\n        expect_equal(as.numeric(xgb.attr(bst.Tree, \"x\")), x, tolerance = float_tolerance)\n        xgb.attributes(bst.Tree) <- list(a = \"A\", b = x)\n        expect_equal(as.numeric(xgb.attr(bst.Tree, \"b\")), x, tolerance = float_tolerance)\n      }\n    })\n}\n\ntest_that(\"xgb.Booster serializing as R object works\", {\n  .skip_if_vcd_not_available()\n  fname_rds <- file.path(tempdir(), \"xgb.model.rds\")\n  saveRDS(bst.Tree, fname_rds)\n  bst <- readRDS(fname_rds)\n  dtrain <- xgb.DMatrix(sparse_matrix, label = label, nthread = 2)\n  expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain), tolerance = float_tolerance)\n  expect_equal(xgb.dump(bst.Tree), xgb.dump(bst))\n\n  fname_bin <- file.path(tempdir(), \"xgb.model\")\n  xgb.save(bst, fname_bin)\n  bst <- readRDS(fname_rds)\n  expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain), tolerance = float_tolerance)\n})\n\ntest_that(\"xgb.model.dt.tree works with and without feature names\", {\n  .skip_if_vcd_not_available()\n  names.dt.trees <- c(\"Tree\", \"Node\", \"ID\", \"Feature\", \"Split\", \"Yes\", \"No\", \"Missing\", \"Gain\", \"Cover\")\n  dt.tree <- xgb.model.dt.tree(model = bst.Tree)\n  expect_equal(names.dt.trees, names(dt.tree))\n  if (!flag_32bit)\n    expect_equal(dim(dt.tree), c(188, 10))\n  expect_output(str(dt.tree), 'Feature.*\\\\\"Age\\\\\"')\n\n  # when model contains no feature names:\n  dt.tree.x <- xgb.model.dt.tree(model = bst.Tree.unnamed)\n  expect_output(str(dt.tree.x), 'Feature.*\\\\\"3\\\\\"')\n  expect_equal(dt.tree[, -4, with = FALSE], dt.tree.x[, -4, with = FALSE])\n\n  # using integer node ID instead of character\n  dt.tree.int <- xgb.model.dt.tree(model = bst.Tree, use_int_id = TRUE)\n  expect_equal(as.integer(data.table::tstrsplit(dt.tree$Yes, '-', fixed = TRUE)[[2]]), dt.tree.int$Yes)\n  expect_equal(as.integer(data.table::tstrsplit(dt.tree$No, '-', fixed = TRUE)[[2]]), dt.tree.int$No)\n  expect_equal(as.integer(data.table::tstrsplit(dt.tree$Missing, '-', fixed = TRUE)[[2]]), dt.tree.int$Missing)\n})\n\ntest_that(\"xgb.model.dt.tree throws error for gblinear\", {\n  .skip_if_vcd_not_available()\n  expect_error(xgb.model.dt.tree(model = bst.GLM))\n})\n\ntest_that(\"xgb.importance works with and without feature names\", {\n  .skip_if_vcd_not_available()\n  importance.Tree <- xgb.importance(feature_names = feature.names, model = bst.Tree.unnamed)\n  if (!flag_32bit)\n    expect_equal(dim(importance.Tree), c(7, 4))\n  expect_equal(colnames(importance.Tree), c(\"Feature\", \"Gain\", \"Cover\", \"Frequency\"))\n  expect_output(str(importance.Tree), 'Feature.*\\\\\"Age\\\\\"')\n\n  importance.Tree.0 <- xgb.importance(model = bst.Tree)\n  expect_equal(importance.Tree, importance.Tree.0, tolerance = float_tolerance)\n\n  # when model contains no feature names:\n  importance.Tree.x <- xgb.importance(model = bst.Tree.unnamed)\n  expect_equal(importance.Tree[, -1, with = FALSE], importance.Tree.x[, -1, with = FALSE],\n               tolerance = float_tolerance)\n\n  imp2plot <- xgb.plot.importance(importance_matrix = importance.Tree)\n  expect_equal(colnames(imp2plot), c(\"Feature\", \"Gain\", \"Cover\", \"Frequency\", \"Importance\"))\n  xgb.ggplot.importance(importance_matrix = importance.Tree)\n\n  # for multiclass\n  imp.Tree <- xgb.importance(model = mbst.Tree)\n  expect_equal(dim(imp.Tree), c(4, 4))\n\n  trees <- seq(from = 1, by = 2, length.out = 2)\n  importance <- xgb.importance(feature_names = feature.names, model = bst.Tree, trees = trees)\n\n  importance_from_dump <- function() {\n    imp <- xgb.model.dt.tree(\n      model = bst.Tree,\n      trees = trees\n    )[\n      Feature != \"Leaf\", .(\n        Gain = sum(Gain),\n        Cover = sum(Cover),\n        Frequency = .N\n      ),\n      by = Feature\n    ][\n      , `:=`(\n        Gain = Gain / sum(Gain),\n        Cover = Cover / sum(Cover),\n        Frequency = Frequency / sum(Frequency)\n      )\n    ][\n      order(Gain, decreasing = TRUE)\n    ]\n    imp\n  }\n  expect_equal(importance_from_dump(), importance, tolerance = 1e-6)\n\n  ## decision stump\n  m <- xgb.train(\n    data = xgb.DMatrix(as.matrix(data.frame(x = c(0, 1))), label = c(1, 2), nthread = 1),\n    nrounds = 1,\n    params = xgb.params(\n      base_score = 0.5,\n      nthread = 2\n    )\n  )\n  df <- xgb.model.dt.tree(model = m)\n  expect_equal(df$Feature, \"Leaf\")\n  expect_equal(df$Cover, 2)\n})\n\ntest_that(\"xgb.importance works with GLM model\", {\n  .skip_if_vcd_not_available()\n  importance.GLM <- xgb.importance(feature_names = feature.names, model = bst.GLM)\n  expect_equal(dim(importance.GLM), c(10, 2))\n  expect_equal(colnames(importance.GLM), c(\"Feature\", \"Weight\"))\n  xgb.importance(model = bst.GLM)\n  imp2plot <- xgb.plot.importance(importance.GLM)\n  expect_equal(colnames(imp2plot), c(\"Feature\", \"Weight\", \"Importance\"))\n  xgb.ggplot.importance(importance.GLM)\n\n  # check that the input is not modified in-place\n  expect_false(\"Importance\" %in% names(importance.GLM))\n\n  # for multiclass\n  imp.GLM <- xgb.importance(model = mbst.GLM)\n  expect_equal(dim(imp.GLM), c(12, 3))\n  expect_equal(imp.GLM$Class, rep(0:2, each = 4))\n})\n\ntest_that(\"xgb.model.dt.tree and xgb.importance work with a single split model\", {\n  .skip_if_vcd_not_available()\n  bst1 <- xgb.train(\n    data = xgb.DMatrix(sparse_matrix, label = label, nthread = 1),\n    nrounds = 1, verbose = 0,\n    params = xgb.params(\n      max_depth = 1,\n      learning_rate = 1,\n      nthread = 2,\n      objective = \"binary:logistic\"\n    )\n  )\n  expect_error(dt <- xgb.model.dt.tree(model = bst1), regexp = NA) # no error\n  expect_equal(nrow(dt), 3)\n  expect_error(imp <- xgb.importance(model = bst1), regexp = NA) # no error\n  expect_equal(nrow(imp), 1)\n  expect_equal(imp$Gain, 1)\n})\n\ntest_that(\"xgb.plot.importance de-duplicates features\", {\n  importances <- data.table(\n    Feature = c(\"col1\", \"col2\", \"col2\"),\n    Gain = c(0.4, 0.3, 0.3)\n  )\n  imp2plot <- xgb.plot.importance(importances)\n  expect_equal(nrow(imp2plot), 2L)\n  expect_equal(imp2plot$Feature, c(\"col2\", \"col1\"))\n})\n\ntest_that(\"xgb.plot.tree works with and without feature names\", {\n  .skip_if_vcd_not_available()\n  expect_silent(xgb.plot.tree(model = bst.Tree.unnamed))\n  expect_silent(xgb.plot.tree(model = bst.Tree))\n\n  ## Categorical\n  y <- rnorm(100)\n  x <- sample(3, size = 100 * 3, replace = TRUE) |> matrix(nrow = 100)\n  x <- x - 1\n  dm <- xgb.DMatrix(data = x, label = y, nthread = 1)\n  setinfo(dm, \"feature_type\", c(\"c\", \"c\", \"c\"))\n  model <- xgb.train(\n    data = dm,\n    params = list(tree_method = \"hist\"),\n    nrounds = 2\n  )\n  expect_silent(xgb.plot.tree(model = model))\n})\n\ntest_that(\"xgb.plot.multi.trees works with and without feature names\", {\n  .skip_if_vcd_not_available()\n  xgb.plot.multi.trees(model = bst.Tree.unnamed, features_keep = 3)\n  xgb.plot.multi.trees(model = bst.Tree, features_keep = 3)\n  expect_true(TRUE)\n})\n\ntest_that(\"xgb.plot.deepness works\", {\n  .skip_if_vcd_not_available()\n  d2p <- xgb.plot.deepness(model = bst.Tree)\n  expect_equal(colnames(d2p), c(\"ID\", \"Tree\", \"Depth\", \"Cover\", \"Weight\"))\n  xgb.plot.deepness(model = bst.Tree, which = \"med.depth\")\n  xgb.ggplot.deepness(model = bst.Tree)\n})\n\ntest_that(\"xgb.shap.data works when top_n is provided\", {\n  .skip_if_vcd_not_available()\n  data_list <- xgb.shap.data(data = sparse_matrix, model = bst.Tree, top_n = 2)\n  expect_equal(names(data_list), c(\"data\", \"shap_contrib\"))\n  expect_equal(NCOL(data_list$data), 2)\n  expect_equal(NCOL(data_list$shap_contrib), 2)\n  expect_equal(NROW(data_list$data), NROW(data_list$shap_contrib))\n  expect_gt(length(colnames(data_list$data)), 0)\n  expect_gt(length(colnames(data_list$shap_contrib)), 0)\n\n  # for multiclass without target class provided\n  data_list <- xgb.shap.data(data = as.matrix(iris[, -5]), model = mbst.Tree, top_n = 2)\n  expect_equal(dim(data_list$shap_contrib), c(nrow(iris), 2))\n  # for multiclass with target class provided\n  data_list <- xgb.shap.data(data = as.matrix(iris[, -5]), model = mbst.Tree, top_n = 2, target_class = 0)\n  expect_equal(dim(data_list$shap_contrib), c(nrow(iris), 2))\n})\n\ntest_that(\"xgb.shap.data works with subsampling\", {\n  .skip_if_vcd_not_available()\n  data_list <- xgb.shap.data(data = sparse_matrix, model = bst.Tree, top_n = 2, subsample = 0.8)\n  expect_equal(NROW(data_list$data), as.integer(0.8 * nrow(sparse_matrix)))\n  expect_equal(NROW(data_list$data), NROW(data_list$shap_contrib))\n})\n\ntest_that(\"xgb.shap.data works with data frames\", {\n  data(mtcars)\n  df <- mtcars\n  df$cyl <- factor(df$cyl)\n  x <- df[, -1]\n  y <- df$mpg\n  dm <- xgb.DMatrix(x, label = y, nthread = 1L)\n  model <- xgb.train(\n    data = dm,\n    params = list(\n      max_depth = 2,\n      nthread = 1\n    ),\n    nrounds = 2\n  )\n  data_list <- xgb.shap.data(data = df[, -1], model = model, top_n = 2, subsample = 0.8)\n  expect_equal(NROW(data_list$data), as.integer(0.8 * nrow(df)))\n  expect_equal(NROW(data_list$data), NROW(data_list$shap_contrib))\n})\n\ntest_that(\"prepare.ggplot.shap.data works\", {\n  .skip_if_vcd_not_available()\n  data_list <- xgb.shap.data(data = sparse_matrix, model = bst.Tree, top_n = 2)\n  plot_data <- prepare.ggplot.shap.data(data_list, normalize = TRUE)\n  expect_s3_class(plot_data, \"data.frame\")\n  expect_equal(names(plot_data), c(\"id\", \"feature\", \"feature_value\", \"shap_value\"))\n  expect_s3_class(plot_data$feature, \"factor\")\n  # Each observation should have 1 row for each feature\n  expect_equal(nrow(plot_data), nrow(sparse_matrix) * 2)\n})\n\ntest_that(\"xgb.plot.shap works\", {\n  .skip_if_vcd_not_available()\n  sh <- xgb.plot.shap(data = sparse_matrix, model = bst.Tree, top_n = 2, col = 4)\n  expect_equal(names(sh), c(\"data\", \"shap_contrib\"))\n})\n\ntest_that(\"xgb.plot.shap.summary works\", {\n  .skip_if_vcd_not_available()\n  expect_silent(xgb.plot.shap.summary(data = sparse_matrix, model = bst.Tree, top_n = 2))\n  expect_silent(xgb.ggplot.shap.summary(data = sparse_matrix, model = bst.Tree, top_n = 2))\n})\n\ntest_that(\"xgb.plot.shap.summary ignores categorical features\", {\n  .skip_if_vcd_not_available()\n  data(mtcars)\n  df <- mtcars\n  df$cyl <- factor(df$cyl)\n  levels(df$cyl) <- c(\"a\", \"b\", \"c\")\n  x <- df[, -1]\n  y <- df$mpg\n  dm <- xgb.DMatrix(x, label = y, nthread = 1L)\n  model <- xgb.train(\n    data = dm,\n    params = list(\n      max_depth = 2,\n      nthread = 1\n    ),\n    nrounds = 2\n  )\n  expect_warning({\n    xgb.ggplot.shap.summary(data = x, model = model, top_n = 2)\n  })\n\n  x_num <- mtcars[, -1]\n  x_num$gear <- as.numeric(x_num$gear) - 1\n  x_num <- as.matrix(x_num)\n  dm <- xgb.DMatrix(x_num, label = y, feature_types = c(rep(\"q\", 8), \"c\", \"q\"), nthread = 1L)\n  model <- xgb.train(\n    data = dm,\n    params = list(\n      max_depth = 2,\n      nthread = 1\n    ),\n    nrounds = 2\n  )\n  expect_warning({\n    xgb.ggplot.shap.summary(data = x_num, model = model, top_n = 2)\n  })\n})\n\ntest_that(\"check.deprecation works\", {\n  data(mtcars)\n  dm <- xgb.DMatrix(mtcars[, -1L], label = mtcars$mpg, nthread = 1)\n  params <- xgb.params(nthread = 1, max_depth = 2, eval_metric = \"rmse\")\n  args_train <- list(\n    data = dm,\n    params = params,\n    nrounds = 10,\n    verbose = 0\n  )\n\n  # with exact name\n  options(\"xgboost.strict_mode\" = TRUE)\n  expect_error({\n    model <- xgb.train(\n      data = dm,\n      params = params,\n      nrounds = 10,\n      watchlist = list(tr = dm),\n      verbose = 0\n    )\n  }, regexp = \"watchlist\")\n  options(\"xgboost.strict_mode\" = FALSE)\n  expect_warning({\n    model <- xgb.train(\n      data = dm,\n      params = params,\n      nrounds = 10,\n      watchlist = list(tr = dm),\n      verbose = 0\n    )\n  }, regexp = \"watchlist\")\n  expect_true(hasName(attributes(model), \"evaluation_log\"))\n  expect_equal(names(attributes(model)$evaluation_log), c(\"iter\", \"tr_rmse\"))\n\n  # with partial name match\n  expect_warning({\n    model <- xgb.train(\n      data = dm,\n      params = params,\n      nrounds = 10,\n      watchlis = list(train = dm),\n      verbose = 0\n    )\n  }, regexp = \"watchlist\")\n  expect_true(hasName(attributes(model), \"evaluation_log\"))\n  expect_equal(names(attributes(model)$evaluation_log), c(\"iter\", \"train_rmse\"))\n\n  # error/warning is thrown if argument cannot be matched\n  options(\"xgboost.strict_mode\" = TRUE)\n  expect_error({\n    model <- xgb.train(\n      data = dm,\n      params = params,\n      nrounds = 10,\n      watchlistt = list(train = dm),\n      verbose = 0\n    )\n  }, regexp = \"unrecognized\")\n  options(\"xgboost.strict_mode\" = FALSE)\n  expect_warning({\n    model <- xgb.train(\n      data = dm,\n      params = params,\n      nrounds = 10,\n      watchlistt = list(train = dm),\n      verbose = 0\n    )\n  }, regexp = \"unrecognized\")\n\n  # error should suggest to put under 'params' if it goes there\n  options(\"xgboost.strict_mode\" = TRUE)\n  expect_error({\n    model <- xgb.train(\n      data = dm,\n      nthread = 1, max_depth = 2, eval_metric = \"rmse\",\n      nrounds = 10,\n      evals = list(train = dm),\n      verbose = 0\n    )\n  }, regexp = \"should be passed as a list to argument 'params'\")\n  options(\"xgboost.strict_mode\" = FALSE)\n  expect_warning({\n    model <- xgb.train(\n      data = dm,\n      nthread = 1, max_depth = 2, eval_metric = \"mae\",\n      nrounds = 10,\n      evals = list(train = dm),\n      verbose = 0\n    )\n  }, regexp = \"should be passed as a list to argument 'params'\")\n  expect_true(hasName(attributes(model), \"evaluation_log\"))\n  expect_equal(names(attributes(model)$evaluation_log), c(\"iter\", \"train_mae\"))\n\n  # can take more than one deprecated parameter\n  expect_warning({\n    model <- xgb.train(\n      training.data = dm,\n      params = params,\n      nrounds = 10,\n      watchlis = list(tr = dm),\n      verbose = 0\n    )\n  }, regexp = \"training.data\")\n  expect_true(hasName(attributes(model), \"evaluation_log\"))\n  expect_equal(names(attributes(model)$evaluation_log), c(\"iter\", \"tr_rmse\"))\n})\n\ntest_that('convert.labels works', {\n  y <- c(0, 1, 0, 0, 1)\n  for (objective in c('binary:logistic', 'binary:logitraw', 'binary:hinge')) {\n    res <- xgboost:::convert.labels(y, objective_name = objective)\n    expect_s3_class(res, 'factor')\n    expect_equal(res, factor(res))\n  }\n  y <- c(0, 1, 3, 2, 1, 4)\n  for (objective in c('multi:softmax', 'multi:softprob', 'rank:pairwise', 'rank:ndcg',\n                      'rank:map')) {\n    res <- xgboost:::convert.labels(y, objective_name = objective)\n    expect_s3_class(res, 'factor')\n    expect_equal(res, factor(res))\n  }\n  y <- c(1.2, 3.0, -1.0, 10.0)\n  for (objective in c('reg:squarederror', 'reg:squaredlogerror', 'reg:logistic',\n                      'reg:pseudohubererror', 'count:poisson', 'survival:cox', 'survival:aft',\n                      'reg:gamma', 'reg:tweedie')) {\n    res <- xgboost:::convert.labels(y, objective_name = objective)\n    expect_equal(class(res), 'numeric')\n  }\n})\n\ntest_that(\"validate.features works as expected\", {\n  data(mtcars)\n  y <- mtcars$mpg\n  x <- as.matrix(mtcars[, -1])\n  dm <- xgb.DMatrix(x, label = y, nthread = 1)\n  model <- xgb.train(\n    params = list(nthread = 1),\n    data = dm,\n    nrounds = 3\n  )\n\n  # result is output as-is when needed\n  res <- validate.features(model, x)\n  expect_equal(res, x)\n  res <- validate.features(model, dm)\n  expect_identical(res, dm)\n  res <- validate.features(model, as(x[1, ], \"dsparseVector\"))\n  expect_equal(as.numeric(res), unname(x[1, ]))\n  res <- validate.features(model, \"file.txt\")\n  expect_equal(res, \"file.txt\")\n\n  # columns are reordered\n  res <- validate.features(model, mtcars[, rev(names(mtcars))])\n  expect_equal(names(res), colnames(x))\n  expect_equal(as.matrix(res), x)\n  res <- validate.features(model, as.matrix(mtcars[, rev(names(mtcars))]))\n  expect_equal(colnames(res), colnames(x))\n  expect_equal(res, x)\n  res <- validate.features(model, mtcars[1, rev(names(mtcars)), drop = FALSE])\n  expect_equal(names(res), colnames(x))\n  expect_equal(unname(as.matrix(res)), unname(x[1, , drop = FALSE]))\n  res <- validate.features(model, as.data.table(mtcars[, rev(names(mtcars))]))\n  expect_equal(names(res), colnames(x))\n  expect_equal(unname(as.matrix(res)), unname(x))\n\n  # error when columns are missing\n  expect_error({\n    validate.features(model, mtcars[, 1:3])\n  })\n  expect_error({\n    validate.features(model, as.matrix(mtcars[, 1:ncol(x)])) # nolint\n  })\n  expect_error({\n    validate.features(model, xgb.DMatrix(mtcars[, 1:3], nthread = 1))\n  })\n  expect_error({\n    validate.features(model, as(x[, 1:3], \"CsparseMatrix\"))\n  })\n\n  # error when it cannot reorder or subset\n  expect_error({\n    validate.features(model, xgb.DMatrix(mtcars, nthread = 1))\n  }, \"Feature names\")\n  expect_error({\n    validate.features(model, xgb.DMatrix(x[, rev(colnames(x))], nthread = 1))\n  }, \"Feature names\")\n\n  # no error about types if the booster doesn't have types\n  expect_error({\n    validate.features(model, xgb.DMatrix(x, feature_types = c(rep(\"q\", 5), rep(\"c\", 5)), nthread = 1))\n  }, NA)\n  tmp <- mtcars\n  tmp[[\"vs\"]] <- factor(tmp[[\"vs\"]])\n  expect_error({\n    validate.features(model, tmp)\n  }, NA)\n\n  # error when types do not match\n  setinfo(model, \"feature_type\", rep(\"q\", 10))\n  expect_error({\n    validate.features(model, xgb.DMatrix(x, feature_types = c(rep(\"q\", 5), rep(\"c\", 5)), nthread = 1))\n  }, \"Feature types\")\n  tmp <- mtcars\n  tmp[[\"vs\"]] <- factor(tmp[[\"vs\"]])\n  expect_error({\n    validate.features(model, tmp)\n  }, \"Feature types\")\n})\n\ntest_that(\"Parameters constructor works as expected\", {\n  empty_list <- list()\n  names(empty_list) <- character()\n\n  params <- xgb.params()\n  expect_equal(params, empty_list)\n\n  params <- xgb.params(max_depth = 2)\n  expect_equal(params, list(max_depth = 2))\n\n  params <- xgb.params(max_depth = NULL)\n  expect_equal(params, empty_list)\n\n  max_depth <- 3\n  params <- xgb.params(max_depth = max_depth)\n  expect_equal(params, list(max_depth = 3))\n\n  four <- 4L\n  params <- xgb.params(max_depth = four)\n  expect_equal(params, list(max_depth = 4L))\n\n  params <- xgb.params(objective = \"binary:logistic\", nthread = 10)\n  expect_equal(params, list(objective = \"binary:logistic\", nthread = 10))\n\n  expect_error({\n    xgb.params(max_xgboost = 10)\n  })\n  expect_error({\n    xgb.params(max_depth = 2, max_depth = 3)\n  })\n})\n"
  },
  {
    "path": "R-package/tests/testthat/test_interaction_constraints.R",
    "content": "require(xgboost)\n\ncontext(\"interaction constraints\")\n\nn_threads <- 2\n\nset.seed(1024)\nx1 <- rnorm(1000, 1)\nx2 <- rnorm(1000, 1)\nx3 <- sample(c(1, 2, 3), size = 1000, replace = TRUE)\ny <- x1 + x2 + x3 + x1 * x2 * x3 + rnorm(1000, 0.001) + 3 * sin(x1)\ntrain <- matrix(c(x1, x2, x3), ncol = 3)\n\ntest_that(\"interaction constraints for regression\", {\n  # Fit a model that only allows interaction between x1 and x2\n  bst <- xgb.train(\n    data = xgb.DMatrix(train, label = y, nthread = 1),\n    nrounds = 100, verbose = 0,\n    params = xgb.params(\n      max_depth = 3,\n      learning_rate = 0.1,\n      nthread = 2,\n      interaction_constraints = list(c(0, 1))\n    )\n  )\n\n  # Set all observations to have the same x3 values then increment\n  #  by the same amount\n  preds <- lapply(c(1, 2, 3), function(x) {\n    tmat <- matrix(c(x1, x2, rep(x, 1000)), ncol = 3)\n    return(predict(bst, tmat))\n  })\n\n  # Check incrementing x3 has the same effect on all observations\n  #   since x3 is constrained to be independent of x1 and x2\n  #   and all observations start off from the same x3 value\n  diff1 <- preds[[2]] - preds[[1]]\n  test1 <- all(abs(diff1 - diff1[1]) < 1e-4)\n\n  diff2 <- preds[[3]] - preds[[2]]\n  test2 <- all(abs(diff2 - diff2[1]) < 1e-4)\n\n  expect_true({\n    test1 & test2\n  }, \"Interaction Contraint Satisfied\")\n})\n\ntest_that(\"interaction constraints scientific representation\", {\n  rows <- 10\n  ## When number exceeds 1e5, R paste function uses scientific representation.\n  ## See: https://github.com/dmlc/xgboost/issues/5179\n  cols <- 1e5 + 10\n\n  d <- matrix(rexp(rows, rate = .1), nrow = rows, ncol = cols)\n  y <- rnorm(rows)\n\n  dtrain <- xgb.DMatrix(data = d, label = y, nthread = n_threads)\n  inc <- list(c(seq.int(from = 0, to = cols, by = 1)))\n\n  with_inc <- xgb.train(\n    data = dtrain,\n    nrounds = 10,\n    params = xgb.params(\n      tree_method = 'hist',\n      interaction_constraints = inc,\n      nthread = n_threads\n    )\n  )\n  without_inc <- xgb.train(\n    data = dtrain,\n    nrounds = 10,\n    params = xgb.params(\n      tree_method = 'hist',\n      nthread = n_threads\n    )\n  )\n  expect_equal(xgb.save.raw(with_inc), xgb.save.raw(without_inc))\n})\n"
  },
  {
    "path": "R-package/tests/testthat/test_interactions.R",
    "content": "context('Test prediction of feature interactions')\n\nset.seed(123)\nn_threads <- 2\n\ntest_that(\"predict feature interactions works\", {\n  # simulate some binary data and a linear outcome with an interaction term\n  N <- 1000\n  P <- 5\n  X <- matrix(rbinom(N * P, 1, 0.5), ncol = P, dimnames = list(NULL, letters[1:P]))\n  # center the data (as contributions are computed WRT feature means)\n  X <- scale(X, scale = FALSE)\n\n  # outcome without any interactions, without any noise:\n  f <- function(x) 2 * x[, 1] - 3 * x[, 2]\n  # outcome with interactions, without noise:\n  f_int <- function(x) f(x) + 2 * x[, 2] * x[, 3]\n  # outcome with interactions, with noise:\n  #f_int_noise <- function(x) f_int(x) + rnorm(N, 0, 0.3)\n\n  y <- f_int(X)\n\n  dm <- xgb.DMatrix(X, label = y, nthread = n_threads)\n  param <- xgb.params(\n    learning_rate = 0.1,\n    max_depth = 4,\n    base_score = mean(y),\n    reg_lambda = 0,\n    nthread = n_threads\n  )\n  b <- xgb.train(param, dm, 100)\n\n  pred <- predict(b, dm, outputmargin = TRUE)\n\n  # SHAP contributions:\n  cont <- predict(b, dm, predcontrib = TRUE)\n  expect_equal(dim(cont), c(N, P + 1))\n  # make sure for each row they add up to marginal predictions\n  expect_lt(max(abs(rowSums(cont) - pred)), 0.001)\n  # Hand-construct the 'ground truth' feature contributions:\n  gt_cont <- cbind(\n      2. * X[, 1],\n     -3. * X[, 2] + 1. * X[, 2] * X[, 3], # attribute a HALF of the interaction term to feature #2\n      1. * X[, 2] * X[, 3]               # and another HALF of the interaction term to feature #3\n     )\n  gt_cont <- cbind(gt_cont, matrix(0, nrow = N, ncol = P + 1 - 3))\n  # These should be relatively close:\n  expect_lt(max(abs(cont - gt_cont)), 0.05)\n\n\n  # SHAP interaction contributions:\n  intr <- predict(b, dm, predinteraction = TRUE)\n  expect_equal(dim(intr), c(N, P + 1, P + 1))\n  # check assigned colnames\n  cn <- c(letters[1:P], \"(Intercept)\")\n  expect_equal(dimnames(intr), list(NULL, cn, cn))\n\n  # check the symmetry\n  expect_lt(max(abs(aperm(intr, c(1, 3, 2)) - intr)), 0.00001)\n\n  # sums WRT columns must be close to feature contributions\n  expect_lt(max(abs(apply(intr, c(1, 2), sum) - cont)), 0.00001)\n\n  # diagonal terms for features 3,4,5 must be close to zero\n  expect_lt(Reduce(max, sapply(3:P, function(i) max(abs(intr[, i, i])))), 0.05)\n\n  # Intercept must have no interactions\n  expect_lt(max(abs(intr[, 1:P, P + 1])), 0.00001)\n\n  # interactions other than 2 x 3 must be close to zero\n  intr23 <- intr\n  intr23[, 2, 3] <- 0\n  expect_lt(\n    Reduce(max, sapply(1:P, function(i) max(abs(intr23[, i, (i + 1):(P + 1)])))),\n    0.05\n  )\n\n  # Construct the 'ground truth' contributions of interactions directly from the linear terms:\n  gt_intr <- array(0, c(N, P + 1, P + 1))\n  gt_intr[, 2, 3] <- 1. * X[, 2] * X[, 3] # attribute a HALF of the interaction term to each symmetric element\n  gt_intr[, 3, 2] <- gt_intr[, 2, 3]\n  # merge-in the diagonal based on 'ground truth' feature contributions\n  intr_diag <- gt_cont - apply(gt_intr, c(1, 2), sum)\n  for (j in seq_len(P)) {\n    gt_intr[, j, j] <- intr_diag[, j]\n  }\n  # These should be relatively close:\n  expect_lt(max(abs(intr - gt_intr)), 0.1)\n})\n\ntest_that(\"SHAP contribution values are not NAN\", {\n  d <- data.frame(\n    x1 = c(-2.3, 1.4, 5.9, 2, 2.5, 0.3, -3.6, -0.2, 0.5, -2.8, -4.6, 3.3, -1.2,\n           -1.1, -2.3, 0.4, -1.5, -0.2, -1, 3.7),\n    x2 = c(291.179171, 269.198331, 289.942097, 283.191669, 269.673332,\n           294.158346, 287.255835, 291.530838, 285.899586, 269.290833,\n           268.649586, 291.530841, 280.074593, 269.484168, 293.94042,\n           294.327506, 296.20709, 295.441669, 283.16792, 270.227085),\n    y = c(9, 15, 5.7, 9.2, 22.4, 5, 9, 3.2, 7.2, 13.1, 7.8, 16.9, 6.5, 22.1,\n          5.3, 10.4, 11.1, 13.9, 11, 20.5),\n    fold = c(2, 2, 2, 1, 2, 2, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2))\n\n  ivs <- c(\"x1\", \"x2\")\n\n  fit <- xgb.train(\n    verbose = 0,\n    params = list(\n      objective = \"reg:squarederror\",\n      eval_metric = \"rmse\",\n      nthread = n_threads\n    ),\n    data = xgb.DMatrix(as.matrix(subset(d, fold == 2)[, ivs]), label = subset(d, fold == 2)$y, nthread = 1),\n    nrounds = 3\n  )\n\n  shaps <- as.data.frame(predict(fit,\n    newdata = as.matrix(subset(d, fold == 1)[, ivs]),\n    predcontrib = TRUE))\n  result <- cbind(shaps, sum = rowSums(shaps), pred = predict(fit,\n      newdata = as.matrix(subset(d, fold == 1)[, ivs])))\n\n  expect_true(identical(TRUE, all.equal(result$sum, result$pred, tol = 1e-6)))\n})\n\n\ntest_that(\"multiclass feature interactions work\", {\n  dm <- xgb.DMatrix(\n    as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1, nthread = n_threads\n  )\n  param <- xgb.params(\n    learning_rate = 0.1,\n    max_depth = 4,\n    objective = 'multi:softprob',\n    num_class = 3,\n    nthread = n_threads\n  )\n  b <- xgb.train(param, dm, 40)\n  pred <- predict(b, dm, outputmargin = TRUE)\n\n  # SHAP contributions:\n  cont <- predict(b, dm, predcontrib = TRUE)\n  expect_length(dim(cont), 3)\n\n  # make sure for each row they add up to marginal predictions\n  expect_lt(max(abs(apply(cont, c(1, 2), sum) - pred)), 0.001)\n\n  # SHAP interaction contributions:\n  intr <- predict(b, dm, predinteraction = TRUE)\n  expect_length(dim(intr), 4)\n\n  # check the symmetry\n  expect_lt(max(abs(aperm(intr, c(1, 2, 4, 3)) - intr)), 0.00001)\n  # sums WRT columns must be close to feature contributions\n  expect_lt(max(abs(apply(intr, c(1, 2, 3), sum) - cont)), 0.00001)\n})\n\n\ntest_that(\"SHAP single sample works\", {\n  train <- agaricus.train\n  test <- agaricus.test\n  booster <- xgb.train(\n    data = xgb.DMatrix(train$data, label = train$label, nthread = 1),\n    nrounds = 4,\n    params = xgb.params(\n      max_depth = 2,\n      objective = \"binary:logistic\",\n      nthread = n_threads\n    )\n  )\n\n  predt <- predict(\n    booster,\n    newdata = train$data[1, , drop = FALSE], predcontrib = TRUE\n  )\n  expect_equal(dim(predt), c(1, dim(train$data)[2] + 1))\n\n  predt <- predict(\n    booster,\n    newdata = train$data[1, , drop = FALSE], predinteraction = TRUE\n  )\n  expect_equal(dim(predt), c(1, dim(train$data)[2] + 1, dim(train$data)[2] + 1))\n})\n"
  },
  {
    "path": "R-package/tests/testthat/test_io.R",
    "content": "context(\"Test model IO.\")\n\ndata(agaricus.train, package = \"xgboost\")\ndata(agaricus.test, package = \"xgboost\")\ntrain <- agaricus.train\ntest <- agaricus.test\n\ntest_that(\"load/save raw works\", {\n  nrounds <- 8\n  booster <- xgb.train(\n    data = xgb.DMatrix(train$data, label = train$label, nthread = 1),\n    nrounds = nrounds,\n    params = xgb.params(\n      objective = \"binary:logistic\",\n      nthread = 2\n    )\n  )\n\n  json_bytes <- xgb.save.raw(booster, raw_format = \"json\")\n  ubj_bytes <- xgb.save.raw(booster, raw_format = \"ubj\")\n\n  from_json <- xgb.load.raw(json_bytes)\n  from_ubj <- xgb.load.raw(ubj_bytes)\n\n  json2ubj <- xgb.save.raw(from_json, raw_format = \"ubj\")\n  ubj2ubj <- xgb.save.raw(from_ubj, raw_format = \"ubj\")\n\n  expect_equal(json2ubj, ubj2ubj)\n})\n\ntest_that(\"saveRDS preserves C and R attributes\", {\n  data(mtcars)\n  y <- mtcars$mpg\n  x <- as.matrix(mtcars[, -1])\n  dm <- xgb.DMatrix(x, label = y, nthread = 1)\n  model <- xgb.train(\n    data = dm,\n    params = xgb.params(nthread = 1, max_depth = 2),\n    nrounds = 5\n  )\n  attributes(model)$my_attr <- \"qwerty\"\n  xgb.attr(model, \"c_attr\") <- \"asdf\"\n\n  fname <- file.path(tempdir(), \"xgb_model.Rds\")\n  saveRDS(model, fname)\n  model_new <- readRDS(fname)\n\n  expect_equal(attributes(model_new)$my_attr, attributes(model)$my_attr)\n  expect_equal(xgb.attr(model, \"c_attr\"), xgb.attr(model_new, \"c_attr\"))\n})\n\ntest_that(\"R serializers keep C config\", {\n  data(mtcars)\n  y <- mtcars$mpg\n  x <- as.matrix(mtcars[, -1])\n  dm <- xgb.DMatrix(x, label = y, nthread = 1)\n  model <- xgb.train(\n    data = dm,\n    params = list(\n      tree_method = \"approx\",\n      nthread = 1,\n      max_depth = 2\n    ),\n    nrounds = 3\n  )\n  model_new <- unserialize(serialize(model, NULL))\n  expect_equal(\n    xgb.config(model)$learner$gradient_booster$gbtree_train_param$tree_method,\n    xgb.config(model_new)$learner$gradient_booster$gbtree_train_param$tree_method\n  )\n  expect_equal(variable.names(model), variable.names(model_new))\n})\n"
  },
  {
    "path": "R-package/tests/testthat/test_model_compatibility.R",
    "content": "context(\"Models from previous versions of XGBoost can be loaded\")\n\nmetadata <- list(\n  kRounds = 4,\n  kRows = 1000,\n  kCols = 4,\n  kForests = 2,\n  kMaxDepth = 2,\n  kClasses = 3\n)\n\nrun_model_param_check <- function(config) {\n  testthat::expect_equal(config$learner$learner_model_param$num_feature, \"4\")\n  testthat::expect_equal(config$learner$learner_train_param$booster, \"gbtree\")\n}\n\nget_n_rounds <- function(model_file) {\n  is_10 <- grepl(\"1.0.0rc1\", model_file, fixed = TRUE)\n  if (is_10) {\n    2\n  } else {\n    metadata$kRounds\n  }\n}\n\nget_num_tree <- function(booster) {\n  dump <- xgb.dump(booster)\n  m <- regexec(\"booster\\\\[[0-9]+\\\\]\", dump, perl = TRUE)\n  m <- regmatches(dump, m)\n  num_tree <- Reduce(\"+\", lapply(m, length))\n  num_tree\n}\n\nrun_booster_check <- function(booster, model_file) {\n  config <- xgb.config(booster)\n  run_model_param_check(config)\n  is_model <- function(typ) {\n    grepl(typ, model_file, fixed = TRUE)\n  }\n  n_rounds <- get_n_rounds(model_file = model_file)\n  if (is_model(\"cls\")) {\n    testthat::expect_equal(\n      get_num_tree(booster), metadata$kForests * n_rounds * metadata$kClasses\n    )\n    testthat::expect_equal(get_basescore(config), c(0.5, 0.5, 0.5))  # nolint\n    testthat::expect_equal(\n      config$learner$learner_train_param$objective, \"multi:softmax\"\n    )\n    testthat::expect_equal(\n      as.numeric(config$learner$learner_model_param$num_class),\n      metadata$kClasses\n    )\n  } else if (is_model(\"logitraw\")) {\n    testthat::expect_equal(get_num_tree(booster), metadata$kForests * n_rounds)\n    testthat::expect_equal(\n      as.numeric(config$learner$learner_model_param$num_class), 0\n    )\n    testthat::expect_equal(\n      config$learner$learner_train_param$objective, \"binary:logitraw\"\n    )\n  } else if (is_model(\"logit\")) {\n    testthat::expect_equal(get_num_tree(booster), metadata$kForests * n_rounds)\n    testthat::expect_equal(\n      as.numeric(config$learner$learner_model_param$num_class), 0\n    )\n    testthat::expect_equal(\n      config$learner$learner_train_param$objective, \"binary:logistic\"\n    )\n  } else if (is_model(\"ltr\")) {\n    testthat::expect_equal(get_num_tree(booster), metadata$kForests * n_rounds)\n    testthat::expect_equal(\n      config$learner$learner_train_param$objective, \"rank:ndcg\"\n    )\n  } else if (is_model(\"aft\")) {\n    testthat::expect_equal(get_num_tree(booster), metadata$kForests * n_rounds)\n    testthat::expect_equal(\n      config$learner$learner_train_param$objective, \"survival:aft\"\n    )\n  } else {\n    testthat::expect_true(is_model(\"reg\"))\n    testthat::expect_equal(get_num_tree(booster), metadata$kForests * n_rounds)\n    testthat::expect_equal(get_basescore(config), 0.5)  # nolint\n    testthat::expect_equal(\n      config$learner$learner_train_param$objective, \"reg:squarederror\"\n    )\n  }\n}\n\ntest_that(\"Models from previous versions of XGBoost can be loaded\", {\n  bucket <- \"xgboost-ci-jenkins-artifacts\"\n  region <- \"us-west-2\"\n  file_name <- \"xgboost_model_compatibility_tests-3.0.2.zip\"\n  zipfile <- tempfile(fileext = \".zip\")\n  extract_dir <- tempdir()\n  result <- tryCatch(\n    {\n      download.file(\n        paste(\n          \"https://\", bucket, \".s3-\", region, \".amazonaws.com/\", file_name,\n          sep = \"\"\n        ),\n        destfile = zipfile, mode = \"wb\", quiet = TRUE\n      )\n      zipfile\n    },\n    error = function(e) {\n      print(e)\n      NA_character_\n    }\n  )\n  if (is.na(result)) {\n    print(\"Failed to download old models.\")\n    return()\n  }\n\n  unzip(zipfile, exdir = extract_dir, overwrite = TRUE)\n  model_dir <- file.path(extract_dir, \"models\")\n\n  pred_data <- xgb.DMatrix(\n    matrix(c(0, 0, 0, 0), nrow = 1, ncol = 4),\n    nthread = 2\n  )\n\n  lapply(list.files(model_dir), function(x) {\n    model_file <- file.path(model_dir, x)\n    is_skl <- grepl(\"scikit\", model_file, fixed = TRUE)\n    if (is_skl) {\n      return()\n    }\n    booster <- xgb.load(model_file)\n    xgb.model.parameters(booster) <- list(nthread = 2)\n    predict(booster, newdata = pred_data)\n    run_booster_check(booster, model_file)\n  })\n})\n"
  },
  {
    "path": "R-package/tests/testthat/test_monotone.R",
    "content": "context(\"monotone constraints\")\n\nset.seed(1024)\nx <- rnorm(1000, 10)\ny <- -1 * x + rnorm(1000, 0.001) + 3 * sin(x)\ntrain <- matrix(x, ncol = 1)\n\n\ntest_that(\"monotone constraints for regression\", {\n    bst <- xgb.train(\n        data = xgb.DMatrix(train, label = y, nthread = 1),\n        nrounds = 100, verbose = 0,\n        params = xgb.params(\n            max_depth = 2,\n            learning_rate = 0.1,\n            nthread = 2,\n            monotone_constraints = -1\n        )\n    )\n\n    pred <- predict(bst, train)\n\n    ind <- order(train[, 1])\n    pred.ord <- pred[ind]\n    expect_true({\n        !any(diff(pred.ord) > 0)\n    }, \"Monotone constraint satisfied\")\n})\n"
  },
  {
    "path": "R-package/tests/testthat/test_parameter_exposure.R",
    "content": "context('Test model params and call are exposed to R')\n\ndata(agaricus.train, package = 'xgboost')\ndata(agaricus.test, package = 'xgboost')\n\ndtrain <- xgb.DMatrix(\n  agaricus.train$data, label = agaricus.train$label, nthread = 2\n)\ndtest <- xgb.DMatrix(\n  agaricus.test$data, label = agaricus.test$label, nthread = 2\n)\n\nbst <- xgb.train(\n  data = dtrain,\n  verbose = 0,\n  nrounds = 10,\n  params = xgb.params(\n    max_depth = 2,\n    learning_rate = 1,\n    nthread = 1,\n    objective = \"binary:logistic\"\n  )\n)\n\ntest_that(\"call is exposed to R\", {\n  expect_false(is.null(attributes(bst)$call))\n  expect_is(attributes(bst)$call, \"call\")\n})\n\ntest_that(\"params is exposed to R\", {\n  model_params <- attributes(bst)$params\n  expect_is(model_params, \"list\")\n  expect_equal(model_params$learning_rate, 1)\n  expect_equal(model_params$max_depth, 2)\n  expect_equal(model_params$objective, \"binary:logistic\")\n})\n"
  },
  {
    "path": "R-package/tests/testthat/test_poisson_regression.R",
    "content": "context('Test Poisson regression model')\n\nset.seed(1994)\n\ntest_that(\"Poisson regression works\", {\n  data(mtcars)\n  bst <- xgb.train(\n    data = xgb.DMatrix(as.matrix(mtcars[, -11]), label = mtcars[, 11], nthread = 1),\n    nrounds = 10, verbose = 0,\n    params = xgb.params(objective = 'count:poisson',  nthread = 2)\n  )\n  expect_equal(class(bst), \"xgb.Booster\")\n  pred <- predict(bst, as.matrix(mtcars[, -11]))\n  expect_equal(length(pred), 32)\n  expect_lt(sqrt(mean((pred - mtcars[, 11])^2)), 1.2)\n})\n\ntest_that(\"Poisson regression is centered around mean\", {\n  m <- 50L\n  n <- 10L\n  y <- rpois(m, n)\n  x <- matrix(rnorm(m * n), nrow = m)\n  model <- xgb.train(\n    data = xgb.DMatrix(x, label = y, nthread = 1),\n    params = xgb.params(objective = \"count:poisson\", min_split_loss = 1e4),\n    nrounds = 1\n  )\n  model_json <- xgb.save.raw(model, \"json\") |> rawToChar() |> jsonlite::fromJSON()\n  expect_equal(\n    get_basescore(model_json),\n    mean(y),\n    tolerance = 1e-4\n  )\n\n  pred <- predict(model, x)\n  expect_equal(\n    pred,\n    rep(mean(y), m),\n    tolerance = 1e-4\n  )\n\n  w <- y + 1\n  model_weighted <- xgb.train(\n    data = xgb.DMatrix(x, label = y, weight = w, nthread = 1),\n    params = xgb.params(objective = \"count:poisson\", min_split_loss = 1e4),\n    nrounds = 1\n  )\n  model_json <- xgb.save.raw(model_weighted, \"json\") |> rawToChar() |> jsonlite::fromJSON()\n  expect_equal(\n    get_basescore(model_json),\n    weighted.mean(y, w),\n    tolerance = 1e-4\n  )\n})\n"
  },
  {
    "path": "R-package/tests/testthat/test_ranking.R",
    "content": "context('Learning to rank')\n\nn_threads <- 2\n\ntest_that('Test ranking with unweighted data', {\n  X <- Matrix::sparseMatrix(\n    i = c(2, 3, 7, 9, 12, 15, 17, 18)\n    , j = c(1, 1, 2, 2,  3,  3,  4,  4)\n    , x = rep(1.0, 8)\n    , dims = c(20, 4)\n  )\n  y <- c(0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0)\n  group <- c(5, 5, 5, 5)\n  dtrain <- xgb.DMatrix(X, label = y, group = group, nthread = n_threads)\n\n  params <- xgb.params(\n    learning_rate = 1,\n    tree_method = 'exact',\n    objective = 'rank:pairwise',\n    max_depth = 1,\n    eval_metric = c('auc', 'aucpr'),\n    nthread = n_threads\n  )\n  bst <- xgb.train(params, dtrain, nrounds = 10, evals = list(train = dtrain), verbose = 0)\n  # Check if the metric is monotone increasing\n  expect_true(all(diff(attributes(bst)$evaluation_log$train_auc) >= 0))\n  expect_true(all(diff(attributes(bst)$evaluation_log$train_aucpr) >= 0))\n})\n\ntest_that('Test ranking with weighted data', {\n  X <- Matrix::sparseMatrix(\n    i = c(2, 3, 7, 9, 12, 15, 17, 18)\n    , j = c(1, 1, 2, 2,  3,  3,  4,  4)\n    , x = rep(1.0, 8)\n    , dims = c(20, 4)\n  )\n  y <- c(0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0)\n  group <- c(5, 5, 5, 5)\n  weight <- c(1.0, 2.0, 3.0, 4.0)\n  dtrain <- xgb.DMatrix(\n    X, label = y, group = group, weight = weight, nthread = n_threads\n  )\n\n  params <- xgb.params(\n    learning_rate = 1,\n    tree_method = \"exact\",\n    objective = \"rank:pairwise\",\n    max_depth = 1,\n    eval_metric = c(\"auc\", \"aucpr\"),\n    nthread = n_threads\n  )\n  bst <- xgb.train(params, dtrain, nrounds = 10, evals = list(train = dtrain), verbose = 0)\n  # Check if the metric is monotone increasing\n  expect_true(all(diff(attributes(bst)$evaluation_log$train_auc) >= 0))\n  expect_true(all(diff(attributes(bst)$evaluation_log$train_aucpr) >= 0))\n  for (i in 1:10) {\n    pred <- predict(bst, newdata = dtrain, iterationrange = c(1, i))\n    # is_sorted[i]: is i-th group correctly sorted by the ranking predictor?\n    is_sorted <- lapply(seq(1, 20, by = 5),\n      function(k) {\n        ind <- order(-pred[k:(k + 4)])\n        z <- y[ind + (k - 1)]\n        all(diff(z) <= 0)  # Check if z is monotone decreasing\n      })\n    # Since we give weights 1, 2, 3, 4 to the four query groups,\n    # the ranking predictor will first try to correctly sort the last query group\n    # before correctly sorting other groups.\n    expect_true(all(diff(as.numeric(is_sorted)) >= 0))\n  }\n})\n"
  },
  {
    "path": "R-package/tests/testthat/test_unicode.R",
    "content": "context(\"Test Unicode handling\")\n\ndata(agaricus.train, package = 'xgboost')\ndata(agaricus.test, package = 'xgboost')\ntrain <- agaricus.train\ntest <- agaricus.test\nset.seed(1994)\n\ntest_that(\"Can save and load models with Unicode paths\", {\n  nrounds <- 2\n  bst <- xgb.train(\n    data = xgb.DMatrix(train$data, label = train$label, nthread = 1),\n    nrounds = nrounds,\n    params = xgb.params(\n      max_depth = 2,\n      nthread = 2,\n      objective = \"binary:logistic\"\n    )\n  )\n  tmpdir <- tempdir()\n  lapply(c(\"모델.json\", \"がうる・ぐら.json\", \"类继承.ubj\"), function(x) {\n    path <- file.path(tmpdir, x)\n    xgb.save(bst, path)\n    bst2 <- xgb.load(path)\n    xgb.model.parameters(bst2) <- list(nthread = 2)\n    expect_equal(predict(bst, test$data), predict(bst2, test$data))\n  })\n})\n"
  },
  {
    "path": "R-package/tests/testthat/test_update.R",
    "content": "context(\"update trees in an existing model\")\n\ndata(agaricus.train, package = 'xgboost')\ndata(agaricus.test, package = 'xgboost')\n\nn_threads <- 1\n\ndtrain <- xgb.DMatrix(\n  agaricus.train$data, label = agaricus.train$label, nthread = n_threads\n)\ndtest <- xgb.DMatrix(\n  agaricus.test$data, label = agaricus.test$label, nthread = n_threads\n)\n\n# Disable flaky tests for 32-bit Windows.\n# See https://github.com/dmlc/xgboost/issues/3720\nwin32_flag <- .Platform$OS.type == \"windows\" && .Machine$sizeof.pointer != 8\n\ntest_that(\"updating the model works\", {\n  evals <- list(train = dtrain, test = dtest)\n\n  # no-subsampling\n  p1 <- xgb.params(\n    objective = \"binary:logistic\",\n    max_depth = 2,\n    learning_rate = 0.05,\n    nthread = n_threads,\n    updater = \"grow_colmaker,prune\"\n  )\n  set.seed(11)\n  bst1 <- xgb.train(p1, dtrain, nrounds = 10, evals = evals, verbose = 0)\n  tr1 <- xgb.model.dt.tree(model = bst1)\n\n  # with subsampling\n  p2 <- modifyList(p1, list(subsample = 0.1))\n  set.seed(11)\n  bst2 <- xgb.train(p2, dtrain, nrounds = 10, evals = evals, verbose = 0)\n  tr2 <- xgb.model.dt.tree(model = bst2)\n\n  # the same no-subsampling boosting with an extra 'refresh' updater:\n  p1r <- modifyList(p1, list(updater = 'grow_colmaker,prune,refresh', refresh_leaf = FALSE))\n  set.seed(11)\n  bst1r <- xgb.train(p1r, dtrain, nrounds = 10, evals = evals, verbose = 0)\n  tr1r <- xgb.model.dt.tree(model = bst1r)\n  # all should be the same when no subsampling\n  expect_equal(attributes(bst1)$evaluation_log, attributes(bst1r)$evaluation_log)\n  expect_equal(\n    jsonlite::fromJSON(rawToChar(xgb.save.raw(bst1, raw_format = \"json\"))),\n    jsonlite::fromJSON(rawToChar(xgb.save.raw(bst1r, raw_format = \"json\"))),\n    tolerance = 1e-6\n  )\n  if (!win32_flag) {\n    expect_equal(tr1, tr1r, tolerance = 0.00001, check.attributes = FALSE)\n  }\n\n  # the same boosting with subsampling with an extra 'refresh' updater:\n  p2r <- modifyList(p2, list(updater = 'grow_colmaker,prune,refresh', refresh_leaf = FALSE))\n  set.seed(11)\n  bst2r <- xgb.train(p2r, dtrain, nrounds = 10, evals = evals, verbose = 0)\n  tr2r <- xgb.model.dt.tree(model = bst2r)\n  # should be the same evaluation but different gains and larger cover\n  expect_equal(attributes(bst2)$evaluation_log, attributes(bst2r)$evaluation_log)\n  if (!win32_flag) {\n    expect_equal(tr2[Feature == 'Leaf']$Gain, tr2r[Feature == 'Leaf']$Gain)\n  }\n  expect_gt(sum(abs(tr2[Feature != 'Leaf']$Gain - tr2r[Feature != 'Leaf']$Gain)), 100)\n  expect_gt(sum(tr2r$Cover) / sum(tr2$Cover), 1.5)\n\n  # process type 'update' for no-subsampling model, refreshing the tree stats AND leaves from training data:\n  set.seed(123)\n  p1u <- modifyList(p1, list(process_type = 'update', updater = 'refresh', refresh_leaf = TRUE))\n  bst1u <- xgb.train(p1u, dtrain, nrounds = 10, evals = evals, verbose = 0, xgb_model = bst1)\n  tr1u <- xgb.model.dt.tree(model = bst1u)\n  # all should be the same when no subsampling\n  expect_equal(attributes(bst1)$evaluation_log, attributes(bst1u)$evaluation_log)\n  expect_equal(\n    jsonlite::fromJSON(rawToChar(xgb.save.raw(bst1, raw_format = \"json\"))),\n    jsonlite::fromJSON(rawToChar(xgb.save.raw(bst1u, raw_format = \"json\"))),\n    tolerance = 1e-6\n  )\n  expect_equal(tr1, tr1u, tolerance = 0.00001, check.attributes = FALSE)\n\n  # same thing but with a serialized model\n  set.seed(123)\n  bst1u <- xgb.train(p1u, dtrain, nrounds = 10, evals = evals, verbose = 0, xgb_model = xgb.save.raw(bst1))\n  tr1u <- xgb.model.dt.tree(model = bst1u)\n  # all should be the same when no subsampling\n  expect_equal(attributes(bst1)$evaluation_log, attributes(bst1u)$evaluation_log)\n  expect_equal(tr1, tr1u, tolerance = 0.00001, check.attributes = FALSE)\n\n  # process type 'update' for model with subsampling, refreshing only the tree stats from training data:\n  p2u <- modifyList(p2, list(process_type = 'update', updater = 'refresh', refresh_leaf = FALSE))\n  bst2u <- xgb.train(p2u, dtrain, nrounds = 10, evals = evals, verbose = 0, xgb_model = bst2)\n  tr2u <- xgb.model.dt.tree(model = bst2u)\n  # should be the same evaluation but different gains and larger cover\n  expect_equal(attributes(bst2)$evaluation_log, attributes(bst2u)$evaluation_log)\n  expect_equal(tr2[Feature == 'Leaf']$Gain, tr2u[Feature == 'Leaf']$Gain)\n  expect_gt(sum(abs(tr2[Feature != 'Leaf']$Gain - tr2u[Feature != 'Leaf']$Gain)), 100)\n  expect_gt(sum(tr2u$Cover) / sum(tr2$Cover), 1.5)\n  # the results should be the same as for the model with an extra 'refresh' updater\n  expect_equal(attributes(bst2r)$evaluation_log, attributes(bst2u)$evaluation_log)\n  if (!win32_flag) {\n    expect_equal(tr2r, tr2u, tolerance = 0.00001, check.attributes = FALSE)\n  }\n\n  # process type 'update' for no-subsampling model, refreshing only the tree stats from TEST data:\n  p1ut <- modifyList(p1, list(process_type = 'update', updater = 'refresh', refresh_leaf = FALSE))\n  bst1ut <- xgb.train(p1ut, dtest, nrounds = 10, evals = evals, verbose = 0, xgb_model = bst1)\n  tr1ut <- xgb.model.dt.tree(model = bst1ut)\n  # should be the same evaluations but different gains and smaller cover (test data is smaller)\n  expect_equal(attributes(bst1)$evaluation_log, attributes(bst1ut)$evaluation_log)\n  expect_equal(tr1[Feature == 'Leaf']$Gain, tr1ut[Feature == 'Leaf']$Gain)\n  expect_gt(sum(abs(tr1[Feature != 'Leaf']$Gain - tr1ut[Feature != 'Leaf']$Gain)), 100)\n  expect_lt(sum(tr1ut$Cover) / sum(tr1$Cover), 0.5)\n})\n\ntest_that(\"updating works for multiclass & multitree\", {\n  dtr <- xgb.DMatrix(\n    as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1, nthread = n_threads\n  )\n  evals <- list(train = dtr)\n  p0 <- xgb.params(\n    max_depth = 2,\n    learning_rate = 0.5,\n    nthread = n_threads,\n    subsample = 0.6,\n    objective = \"multi:softprob\",\n    num_class = 3,\n    num_parallel_tree = 2,\n    base_score = 0\n  )\n  set.seed(121)\n  bst0 <- xgb.train(p0, dtr, 5, evals = evals, verbose = 0)\n  tr0 <- xgb.model.dt.tree(model = bst0)\n\n  # run update process for an original model with subsampling\n  p0u <- modifyList(p0, list(process_type = 'update', updater = 'refresh', refresh_leaf = FALSE))\n  bst0u <- xgb.train(p0u, dtr, nrounds = xgb.get.num.boosted.rounds(bst0),\n                     evals = evals, xgb_model = bst0, verbose = 0)\n  tr0u <- xgb.model.dt.tree(model = bst0u)\n\n  # should be the same evaluation but different gains and larger cover\n  expect_equal(attributes(bst0)$evaluation_log, attributes(bst0u)$evaluation_log)\n  expect_equal(tr0[Feature == 'Leaf']$Gain, tr0u[Feature == 'Leaf']$Gain)\n  expect_gt(sum(abs(tr0[Feature != 'Leaf']$Gain - tr0u[Feature != 'Leaf']$Gain)), 100)\n  expect_gt(sum(tr0u$Cover) / sum(tr0$Cover), 1.5)\n})\n"
  },
  {
    "path": "R-package/tests/testthat/test_xgboost.R",
    "content": "library(survival)\nlibrary(data.table)\ndata(\"iris\")\ndata(\"mtcars\")\ndata(\"ToothGrowth\")\n\ntest_that(\"Auto determine objective\", {\n  y_num <- seq(1, 10)\n  res_num <- process.y.margin.and.objective(y_num, NULL, NULL, NULL)\n  expect_equal(res_num$params$objective, \"reg:squarederror\")\n\n  y_bin <- factor(c('a', 'b', 'a', 'b'), c('a', 'b'))\n  res_bin <- process.y.margin.and.objective(y_bin, NULL, NULL, NULL)\n  expect_equal(res_bin$params$objective, \"binary:logistic\")\n\n  y_multi <- factor(c('a', 'b', 'a', 'b', 'c'), c('a', 'b', 'c'))\n  res_multi <- process.y.margin.and.objective(y_multi, NULL, NULL, NULL)\n  expect_equal(res_multi$params$objective, \"multi:softprob\")\n\n  y_surv <- Surv(1:10, rep(c(0, 1), 5), type = \"right\")\n  res_surv <- process.y.margin.and.objective(y_surv, NULL, NULL, NULL)\n  expect_equal(res_surv$params$objective, \"survival:aft\")\n\n  y_multicol <- matrix(seq(1, 20), nrow = 5)\n  res_multicol <- process.y.margin.and.objective(y_multicol, NULL, NULL, NULL)\n  expect_equal(res_multicol$params$objective, \"reg:squarederror\")\n})\n\ntest_that(\"Process vectors\", {\n  y <- seq(1, 10)\n  for (y_inp in list(as.integer(y), as.numeric(y))) {\n    res <- process.y.margin.and.objective(y_inp, NULL, \"reg:pseudohubererror\", NULL)\n    expect_equal(\n      res$dmatrix_args$label,\n      y\n    )\n    expect_equal(\n      res$params$objective,\n      \"reg:pseudohubererror\"\n    )\n  }\n})\n\ntest_that(\"Process factors\", {\n  y_bin <- factor(c('a', 'b', 'a', 'b'), c('a', 'b'))\n  expect_error({\n    process.y.margin.and.objective(y_bin, NULL, \"multi:softprob\", NULL)\n  })\n  for (bin_obj in c(\"binary:logistic\", \"binary:hinge\")) {\n    for (y_inp in list(y_bin, as.ordered(y_bin))) {\n      res_bin <- process.y.margin.and.objective(y_inp, NULL, bin_obj, NULL)\n      expect_equal(\n        res_bin$dmatrix_args$label,\n        c(0, 1, 0, 1)\n      )\n      expect_equal(\n        res_bin$metadata$y_levels,\n        c('a', 'b')\n      )\n      expect_equal(\n        res_bin$params$objective,\n        bin_obj\n      )\n    }\n  }\n\n  y_bin2 <- factor(c(1, 0, 1, 0), c(1, 0))\n  res_bin <- process.y.margin.and.objective(y_bin2, NULL, \"binary:logistic\", NULL)\n  expect_equal(\n    res_bin$dmatrix_args$label,\n    c(0, 1, 0, 1)\n  )\n  expect_equal(\n    res_bin$metadata$y_levels,\n    c(\"1\", \"0\")\n  )\n\n  y_bin3 <- c(TRUE, FALSE, TRUE)\n  res_bin <- process.y.margin.and.objective(y_bin3, NULL, \"binary:logistic\", NULL)\n  expect_equal(\n    res_bin$dmatrix_args$label,\n    c(1, 0, 1)\n  )\n  expect_equal(\n    res_bin$metadata$y_levels,\n    c(\"FALSE\", \"TRUE\")\n  )\n\n  y_multi <- factor(c('a', 'b', 'c', 'd', 'a', 'b'), c('a', 'b', 'c', 'd'))\n  expect_error({\n    process.y.margin.and.objective(y_multi, NULL, \"binary:logistic\", NULL)\n  })\n  expect_error({\n    process.y.margin.and.objective(y_multi, NULL, \"binary:logistic\", NULL)\n  })\n  res_multi <- process.y.margin.and.objective(y_multi, NULL, \"multi:softprob\", NULL)\n  expect_equal(\n    res_multi$dmatrix_args$label,\n    c(0, 1, 2, 3, 0, 1)\n  )\n  expect_equal(\n    res_multi$metadata$y_levels,\n    c('a', 'b', 'c', 'd')\n  )\n  expect_equal(\n    res_multi$params$num_class,\n    4\n  )\n  expect_equal(\n    res_multi$params$objective,\n    \"multi:softprob\"\n  )\n})\n\ntest_that(\"Process survival objects\", {\n  data(cancer, package = \"survival\")\n  y_right <- Surv(cancer$time, cancer$status - 1, type = \"right\")\n  res_cox <- process.y.margin.and.objective(y_right, NULL, \"survival:cox\", NULL)\n  expect_equal(\n    res_cox$dmatrix_args$label,\n    ifelse(cancer$status == 2, cancer$time, -cancer$time)\n  )\n  expect_equal(\n    res_cox$params$objective,\n    \"survival:cox\"\n  )\n\n  res_aft <- process.y.margin.and.objective(y_right, NULL, \"survival:aft\", NULL)\n  expect_equal(\n    res_aft$dmatrix_args$label_lower_bound,\n    cancer$time\n  )\n  expect_equal(\n    res_aft$dmatrix_args$label_upper_bound,\n    ifelse(cancer$status == 2, cancer$time, Inf)\n  )\n  expect_equal(\n    res_aft$params$objective,\n    \"survival:aft\"\n  )\n\n  y_left <- Surv(seq(1, 4), c(1, 0, 1, 0), type = \"left\")\n  expect_error({\n    process.y.margin.and.objective(y_left, NULL, \"survival:cox\", NULL)\n  })\n  res_aft <- process.y.margin.and.objective(y_left, NULL, \"survival:aft\", NULL)\n  expect_equal(\n    res_aft$dmatrix_args$label_lower_bound,\n    c(1, 0, 3, 0)\n  )\n  expect_equal(\n    res_aft$dmatrix_args$label_upper_bound,\n    seq(1, 4)\n  )\n  expect_equal(\n    res_aft$params$objective,\n    \"survival:aft\"\n  )\n\n  y_interval <- Surv(\n    time = c(1, 5, 2, 10, 3),\n    time2 = c(2, 5, 2.5, 10, 3),\n    event = c(3, 1, 3, 0, 2),\n    type = \"interval\"\n  )\n  expect_error({\n    process.y.margin.and.objective(y_interval, NULL, \"survival:cox\", NULL)\n  })\n  res_aft <- process.y.margin.and.objective(y_interval, NULL, \"survival:aft\", NULL)\n  expect_equal(\n    res_aft$dmatrix_args$label_lower_bound,\n    c(1, 5, 2, 10, 0)\n  )\n  expect_equal(\n    res_aft$dmatrix_args$label_upper_bound,\n    c(2, 5, 2.5, Inf, 3)\n  )\n  expect_equal(\n    res_aft$params$objective,\n    \"survival:aft\"\n  )\n\n  y_interval_neg <- Surv(\n    time = c(1, -5, 2, 10, 3),\n    time2 = c(2, -5, 2.5, 10, 3),\n    event = c(3, 1, 3, 0, 2),\n    type = \"interval\"\n  )\n  expect_error({\n    process.y.margin.and.objective(y_interval_neg, NULL, \"survival:aft\", NULL)\n  })\n})\n\ntest_that(\"Process multi-target\", {\n  data(mtcars)\n  y_multi <- data.frame(\n    y1 = mtcars$mpg,\n    y2 = mtcars$mpg ^ 2\n  )\n  for (y_inp in list(y_multi, as.matrix(y_multi), data.table::as.data.table(y_multi))) {\n    res_multi <- process.y.margin.and.objective(y_inp, NULL, \"reg:pseudohubererror\", NULL)\n    expect_equal(\n      res_multi$dmatrix_args$label,\n      as.matrix(y_multi)\n    )\n    expect_equal(\n      res_multi$metadata$y_names,\n      c(\"y1\", \"y2\")\n    )\n    expect_equal(\n      res_multi$params$objective,\n      \"reg:pseudohubererror\"\n    )\n  }\n\n  expect_error({\n    process.y.margin.and.objective(y_multi, NULL, \"count:poisson\", NULL)\n  })\n\n  y_bad <- data.frame(\n    c1 = seq(1, 3),\n    c2 = rep(as.Date(\"2024-01-01\"), 3)\n  )\n  expect_error({\n    process.y.margin.and.objective(y_bad, NULL, \"reg:squarederror\", NULL)\n  })\n\n  y_bad <- data.frame(\n    c1 = seq(1, 3),\n    c2 = factor(c('a', 'b', 'a'), c('a', 'b'))\n  )\n  expect_error({\n    process.y.margin.and.objective(y_bad, NULL, \"reg:squarederror\", NULL)\n  })\n\n  y_bad <- seq(1, 20)\n  dim(y_bad) <- c(5, 2, 2)\n  expect_error({\n    process.y.margin.and.objective(y_bad, NULL, \"reg:squarederror\", NULL)\n  })\n})\n\ntest_that(\"Process base_margin\", {\n  y <- seq(101, 110)\n  bm_good <- seq(1, 10)\n  for (bm in list(bm_good, as.matrix(bm_good), as.data.frame(as.matrix(bm_good)))) {\n    res <- process.y.margin.and.objective(y, bm, \"reg:squarederror\", NULL)\n    expect_equal(\n      res$dmatrix_args$base_margin,\n      seq(1, 10)\n    )\n  }\n  expect_error({\n    process.y.margin.and.objective(y, 5, \"reg:squarederror\", NULL)\n  })\n  expect_error({\n    process.y.margin.and.objective(y, seq(1, 5), \"reg:squarederror\", NULL)\n  })\n  expect_error({\n    process.y.margin.and.objective(y, matrix(seq(1, 20), ncol = 2), \"reg:squarederror\", NULL)\n  })\n  expect_error({\n    process.y.margin.and.objective(\n      y,\n      as.data.frame(matrix(seq(1, 20), ncol = 2)),\n      \"reg:squarederror\",\n      NULL\n    )\n  })\n\n  y <- factor(c('a', 'b', 'c', 'a'))\n  bm_good <- matrix(seq(1, 12), ncol = 3)\n  for (bm in list(bm_good, as.data.frame(bm_good))) {\n    res <- process.y.margin.and.objective(y, bm, \"multi:softprob\", NULL)\n    expect_equal(\n      res$dmatrix_args$base_margin |> unname(),\n      matrix(seq(1, 12), ncol = 3)\n    )\n  }\n  expect_error({\n    process.y.margin.and.objective(y, as.numeric(bm_good), \"multi:softprob\", NULL)\n  })\n  expect_error({\n    process.y.margin.and.objective(y, 5, \"multi:softprob\", NULL)\n  })\n  expect_error({\n    process.y.margin.and.objective(y, bm_good[, 1], \"multi:softprob\", NULL)\n  })\n  expect_error({\n    process.y.margin.and.objective(y, bm_good[, c(1, 2)], \"multi:softprob\", NULL)\n  })\n  expect_error({\n    process.y.margin.and.objective(y, bm_good[c(1, 2), ], \"multi:softprob\", NULL)\n  })\n\n  y <- seq(101, 110)\n  bm_good <- matrix(seq(1, 30), ncol = 3)\n  params <- list(quantile_alpha = c(0.1, 0.5, 0.9))\n  for (bm in list(bm_good, as.data.frame(bm_good))) {\n    res <- process.y.margin.and.objective(y, bm, \"reg:quantileerror\", params)\n    expect_equal(\n      res$dmatrix_args$base_margin |> unname(),\n      matrix(seq(1, 30), ncol = 3)\n    )\n  }\n  expect_error({\n    process.y.margin.and.objective(y, as.numeric(bm_good), \"reg:quantileerror\", params)\n  })\n  expect_error({\n    process.y.margin.and.objective(y, 5, \"reg:quantileerror\", params)\n  })\n  expect_error({\n    process.y.margin.and.objective(y, bm_good[, 1], \"reg:quantileerror\", params)\n  })\n  expect_error({\n    process.y.margin.and.objective(y, bm_good[, c(1, 2)], \"reg:quantileerror\", params)\n  })\n  expect_error({\n    process.y.margin.and.objective(y, bm_good[c(1, 2, 3), ], \"reg:quantileerror\", params)\n  })\n\n  params <- list(expectile_alpha = c(0.1, 0.5, 0.9))\n  for (bm in list(bm_good, as.data.frame(bm_good))) {\n    res <- process.y.margin.and.objective(y, bm, \"reg:expectileerror\", params)\n    expect_equal(\n      res$dmatrix_args$base_margin |> unname(),\n      matrix(seq(1, 30), ncol = 3)\n    )\n  }\n  expect_error({\n    process.y.margin.and.objective(y, as.numeric(bm_good), \"reg:expectileerror\", params)\n  })\n  expect_error({\n    process.y.margin.and.objective(y, 5, \"reg:expectileerror\", params)\n  })\n  expect_error({\n    process.y.margin.and.objective(y, bm_good[, 1], \"reg:expectileerror\", params)\n  })\n  expect_error({\n    process.y.margin.and.objective(y, bm_good[, c(1, 2)], \"reg:expectileerror\", params)\n  })\n  expect_error({\n    process.y.margin.and.objective(y, bm_good[c(1, 2, 3), ], \"reg:expectileerror\", params)\n  })\n\n  y <- matrix(seq(101, 130), ncol = 3)\n  for (bm in list(bm_good, as.data.frame(bm_good))) {\n    res <- process.y.margin.and.objective(y, bm, \"reg:squarederror\", params)\n    expect_equal(\n      res$dmatrix_args$base_margin |> unname(),\n      matrix(seq(1, 30), ncol = 3)\n    )\n  }\n  expect_error({\n    process.y.margin.and.objective(y, as.numeric(bm_good), \"reg:squarederror\", params)\n  })\n  expect_error({\n    process.y.margin.and.objective(y, 5, \"reg:squarederror\", params)\n  })\n  expect_error({\n    process.y.margin.and.objective(y, bm_good[, 1], \"reg:squarederror\", params)\n  })\n  expect_error({\n    process.y.margin.and.objective(y, bm_good[, c(1, 2)], \"reg:squarederror\", params)\n  })\n  expect_error({\n    process.y.margin.and.objective(y, bm_good[c(1, 2, 3), ], \"reg:squarederror\", params)\n  })\n})\n\ntest_that(\"Process monotone constraints\", {\n  data(iris)\n  mc_list <- list(Sepal.Width = 1)\n  res <- process.x.and.col.args(\n    iris,\n    monotone_constraints = mc_list,\n    interaction_constraints = NULL,\n    feature_weights = NULL,\n    lst_args = list(),\n    use_qdm = FALSE\n  )\n  expect_equal(\n    res$params$monotone_constraints,\n    c(0, 1, 0, 0, 0)\n  )\n\n  mc_list2 <- list(Sepal.Width = 1, Petal.Width = -1)\n  res <- process.x.and.col.args(\n    iris,\n    monotone_constraints = mc_list2,\n    interaction_constraints = NULL,\n    feature_weights = NULL,\n    lst_args = list(),\n    use_qdm = FALSE\n  )\n  expect_equal(\n    res$params$monotone_constraints,\n    c(0, 1, 0, -1, 0)\n  )\n\n  mc_vec <- c(0, 1, -1, 0, 0)\n  res <- process.x.and.col.args(\n    iris,\n    monotone_constraints = mc_vec,\n    interaction_constraints = NULL,\n    feature_weights = NULL,\n    lst_args = list(),\n    use_qdm = FALSE\n  )\n  expect_equal(\n    res$params$monotone_constraints,\n    c(0, 1, -1, 0, 0)\n  )\n\n  mc_named_vec <- c(1, 1)\n  names(mc_named_vec) <- names(iris)[1:2]\n  res <- process.x.and.col.args(\n    iris,\n    monotone_constraints = mc_named_vec,\n    interaction_constraints = NULL,\n    feature_weights = NULL,\n    lst_args = list(),\n    use_qdm = FALSE\n  )\n  expect_equal(\n    res$params$monotone_constraints,\n    c(1, 1, 0, 0, 0)\n  )\n\n  mc_named_all <- c(0, -1, 1, 0, -1)\n  names(mc_named_all) <- rev(names(iris))\n  res <- process.x.and.col.args(\n    iris,\n    monotone_constraints = mc_named_all,\n    interaction_constraints = NULL,\n    feature_weights = NULL,\n    lst_args = list(),\n    use_qdm = FALSE\n  )\n  expect_equal(\n    res$params$monotone_constraints,\n    rev(mc_named_all) |> unname()\n  )\n\n  expect_error({\n    process.x.and.col.args(\n      iris,\n      monotone_constraints = list(\n        Sepal.Width = 1,\n        Petal.Width = -1,\n        Sepal.Width = -1\n      ),\n      interaction_constraints = NULL,\n      feature_weights = NULL,\n      lst_args = list(),\n      use_qdm = FALSE\n    )\n  })\n\n  expect_error({\n    process.x.and.col.args(\n      iris,\n      monotone_constraints = rep(0, 6),\n      interaction_constraints = NULL,\n      feature_weights = NULL,\n      lst_args = list(),\n      use_qdm = FALSE\n    )\n  })\n})\n\ntest_that(\"Process interaction_constraints\", {\n  data(iris)\n  res <- process.x.and.col.args(iris, NULL, list(c(1L, 2L)), NULL, NULL, FALSE)\n  expect_equal(\n    res$params$interaction_constraints,\n    list(c(0, 1))\n  )\n  res <- process.x.and.col.args(iris, NULL, list(c(1.0, 2.0)), NULL, NULL, FALSE)\n  expect_equal(\n    res$params$interaction_constraints,\n    list(c(0, 1))\n  )\n  res <- process.x.and.col.args(iris, NULL, list(c(1, 2), c(3, 4)), NULL, NULL, FALSE)\n  expect_equal(\n    res$params$interaction_constraints,\n    list(c(0, 1), c(2, 3))\n  )\n  res <- process.x.and.col.args(\n    iris, NULL, list(c(\"Sepal.Length\", \"Sepal.Width\")), NULL, NULL, FALSE\n  )\n  expect_equal(\n    res$params$interaction_constraints,\n    list(c(0, 1))\n  )\n  res <- process.x.and.col.args(\n    as.matrix(iris),\n    NULL,\n    list(c(\"Sepal.Length\", \"Sepal.Width\")),\n    NULL,\n    NULL,\n    FALSE\n  )\n  expect_equal(\n    res$params$interaction_constraints,\n    list(c(0, 1))\n  )\n  res <- process.x.and.col.args(\n    iris,\n    NULL,\n    list(c(\"Sepal.Width\", \"Petal.Length\"), c(\"Sepal.Length\", \"Petal.Width\", \"Species\")),\n    NULL,\n    NULL,\n    FALSE\n  )\n  expect_equal(\n    res$params$interaction_constraints,\n    list(c(1, 2), c(0, 3, 4))\n  )\n\n  expect_error({\n    process.x.and.col.args(iris, NULL, list(c(1L, 20L)), NULL, NULL, FALSE)\n  })\n  expect_error({\n    process.x.and.col.args(iris, NULL, list(c(0L, 2L)), NULL, NULL, FALSE)\n  })\n  expect_error({\n    process.x.and.col.args(iris, NULL, list(c(\"1\", \"2\")), NULL, NULL, FALSE)\n  })\n  expect_error({\n    process.x.and.col.args(iris, NULL, list(c(\"Sepal\", \"Petal\")), NULL, NULL, FALSE)\n  })\n  expect_error({\n    process.x.and.col.args(iris, NULL, c(1L, 2L), NULL, NULL, FALSE)\n  })\n  expect_error({\n    process.x.and.col.args(iris, NULL, matrix(c(1L, 2L)), NULL, NULL, FALSE)\n  })\n  expect_error({\n    process.x.and.col.args(iris, NULL, list(c(1, 2.5)), NULL, NULL, FALSE)\n  })\n})\n\ntest_that(\"Sparse matrices are casted to CSR for QDM\", {\n  data(agaricus.test, package = \"xgboost\")\n  x <- agaricus.test$data\n  for (x_in in list(x, methods::as(x, \"TsparseMatrix\"))) {\n    res <- process.x.and.col.args(\n      x_in,\n      NULL,\n      NULL,\n      NULL,\n      NULL,\n      TRUE\n    )\n    expect_s4_class(res$dmatrix_args$data, \"dgRMatrix\")\n  }\n})\n\ntest_that(\"Process feature_weights\", {\n  data(iris)\n  w_vector <- seq(1, 5)\n  res <-  process.x.and.col.args(\n    iris,\n    monotone_constraints = NULL,\n    interaction_constraints = NULL,\n    feature_weights = w_vector,\n    lst_args = list(),\n    use_qdm = FALSE\n  )\n  expect_equal(\n    res$dmatrix_args$feature_weights,\n    seq(1, 5)\n  )\n\n  w_named_vector <- seq(1, 5)\n  names(w_named_vector) <- rev(names(iris))\n  res <-  process.x.and.col.args(\n    iris,\n    monotone_constraints = NULL,\n    interaction_constraints = NULL,\n    feature_weights = w_named_vector,\n    lst_args = list(),\n    use_qdm = FALSE\n  )\n  expect_equal(\n    res$dmatrix_args$feature_weights,\n    rev(seq(1, 5))\n  )\n\n  w_list <- list(\n    Species = 5,\n    Sepal.Length = 1,\n    Sepal.Width = 2,\n    Petal.Length = 3,\n    Petal.Width = 4\n  )\n  res <- process.x.and.col.args(\n    iris,\n    monotone_constraints = NULL,\n    interaction_constraints = NULL,\n    feature_weights = w_list,\n    lst_args = list(),\n    use_qdm = FALSE\n  )\n  expect_equal(\n    res$dmatrix_args$feature_weights,\n    seq(1, 5)\n  )\n})\n\ntest_that(\"Whole function works\", {\n  data(cancer, package = \"survival\")\n  y <- Surv(cancer$time, cancer$status - 1, type = \"right\")\n  x <- as.data.table(cancer)[, -c(\"time\", \"status\")]\n  model <- xgboost(\n    x,\n    y,\n    monotone_constraints = list(age = -1),\n    nthreads = 1L,\n    nrounds = 5L,\n    learning_rate = 3\n  )\n  expect_equal(\n    attributes(model)$params$objective,\n    \"survival:aft\"\n  )\n  expect_equal(\n    attributes(model)$metadata$n_targets,\n    1L\n  )\n  expect_equal(\n    attributes(model)$params$monotone_constraints,\n    \"(0,-1,0,0,0,0,0,0)\"\n  )\n  expect_false(\n    \"interaction_constraints\" %in% names(attributes(model)$params)\n  )\n  expect_equal(\n    attributes(model)$params$learning_rate,\n    3\n  )\n  txt <- capture.output({\n    print(model)\n  })\n  expect_true(any(grepl(\"Objective: survival:aft\", txt, fixed = TRUE)))\n  expect_true(any(grepl(\"monotone_constraints\", txt, fixed = TRUE)))\n  expect_true(any(grepl(\"Number of iterations: 5\", txt, fixed = TRUE)))\n  expect_true(any(grepl(\"Number of features: 8\", txt, fixed = TRUE)))\n})\n\ntest_that(\"Print shows expectile metadata\", {\n  y <- mtcars$mpg\n  x <- mtcars[, -1L]\n  model <- xgboost(\n    x,\n    y,\n    nthreads = 1L,\n    nrounds = 2L,\n    max_depth = 2L,\n    objective = \"reg:expectileerror\",\n    expectile_alpha = c(0.25, 0.5)\n  )\n  txt <- capture.output({\n    print(model)\n  })\n  expect_true(any(grepl(\"Prediction expectile\", txt, fixed = TRUE)))\n})\n\ntest_that(\"Can predict probabilities and raw scores\", {\n  y <- ToothGrowth$supp\n  x <- ToothGrowth[, -2L]\n  model <- xgboost(x, y, nthreads = 1L, nrounds = 3L, max_depth = 2L)\n  pred_prob <- predict(model, x, type = \"response\")\n  pred_raw <- predict(model, x, type = \"raw\")\n  expect_true(is.vector(pred_prob))\n  expect_equal(length(pred_prob), nrow(x))\n  expect_true(min(pred_prob) >= 0)\n  expect_true(max(pred_prob) <= 1)\n\n  expect_equal(length(pred_raw), nrow(x))\n  expect_true(is.vector(pred_raw))\n  expect_true(min(pred_raw) < 0)\n  expect_true(max(pred_raw) > 0)\n\n  expect_equal(\n    pred_prob,\n    1 / (1 + exp(-pred_raw)),\n    tolerance = 1e-6\n  )\n})\n\ntest_that(\"Can predict class\", {\n  y <- iris$Species\n  x <- iris[, -5L]\n  model <- xgboost(x, y, nthreads = 1L, nrounds = 3L, max_depth = 2L)\n  pred_class <- predict(model, x, type = \"class\")\n  expect_true(is.factor(pred_class))\n  expect_equal(levels(pred_class), levels(y))\n\n  y <- ToothGrowth$supp\n  x <- ToothGrowth[, -2L]\n  model <- xgboost(x, y, nthreads = 1L, nrounds = 3L, max_depth = 2L)\n  pred_class <- predict(model, x, type = \"class\")\n  expect_true(is.factor(pred_class))\n  expect_equal(levels(pred_class), levels(y))\n\n  probs <- predict(model, x, type = \"response\")\n  expect_true(all(pred_class[probs >= 0.5] == levels(y)[[2L]]))\n  expect_true(all(pred_class[probs < 0.5] == levels(y)[[1L]]))\n\n  # Check that it fails for regression models\n  y <- mtcars$mpg\n  x <- mtcars[, -1L]\n  model <- xgboost(x, y, nthreads = 1L, nrounds = 3L, max_depth = 2L)\n  expect_error({\n    predict(model, x, type = \"class\")\n  })\n})\n\ntest_that(\"Metadata survives serialization\", {\n  y <- iris$Species\n  x <- iris[, -5L]\n  model_fresh <- xgboost(x, y, nthreads = 1L, nrounds = 3L, max_depth = 2L)\n  temp_file <- file.path(tempdir(), \"xgb_model.Rds\")\n  saveRDS(model_fresh, temp_file)\n  model <- readRDS(temp_file)\n  pred_class <- predict(model, x, type = \"class\")\n  expect_true(is.factor(pred_class))\n  expect_equal(levels(pred_class), levels(y))\n})\n\ntest_that(\"Column names aren't added when not appropriate\", {\n  pred_types <- c(\n    \"response\",\n    \"raw\",\n    \"leaf\"\n  )\n  for (pred_type in pred_types) {\n    y <- mtcars$mpg\n    x <- mtcars[, -1L]\n    model <- xgboost(\n      x,\n      y,\n      nthreads = 1L,\n      nrounds = 3L,\n      max_depth = 2L,\n      objective = \"reg:quantileerror\",\n      quantile_alpha = 0.5\n    )\n    pred <- predict(model, x, type = pred_type)\n    if (pred_type %in% c(\"raw\", \"response\")) {\n      expect_true(is.vector(pred))\n    } else {\n      expect_true(length(dim(pred)) >= 2L)\n      expect_null(colnames(pred))\n    }\n\n    y <- ToothGrowth$supp\n    x <- ToothGrowth[, -2L]\n    model <- xgboost(x, y, nthreads = 1L, nrounds = 3L, max_depth = 2L)\n    pred <- predict(model, x, type = pred_type)\n    if (pred_type %in% c(\"raw\", \"response\")) {\n      expect_true(is.vector(pred))\n    } else {\n      expect_true(length(dim(pred)) >= 2L)\n      expect_null(colnames(pred))\n    }\n  }\n})\n\ntest_that(\"Column names from multiclass are added to non-class predictions\", {\n  y <- iris$Species\n  x <- iris[, -5L]\n  model <- xgboost(x, y, nthreads = 1L, nrounds = 3L, max_depth = 2L)\n\n  pred_types_with_colnames <- c(\n    \"response\",\n    \"raw\",\n    \"contrib\",\n    \"interaction\"\n  )\n\n  for (pred_type in pred_types_with_colnames) {\n    pred <- predict(model, x, type = pred_type)\n    expect_equal(nrow(pred), nrow(x))\n    expect_equal(ncol(pred), 3L)\n    expect_equal(colnames(pred), levels(y))\n  }\n})\n\ntest_that(\"Column names from multitarget are added to predictions\", {\n  y <- data.frame(\n    ylog = log(mtcars$mpg),\n    ysqrt = sqrt(mtcars$mpg)\n  )\n  x <- mtcars[, -1L]\n  model <- xgboost(x, y, nthreads = 1L, nrounds = 3L, max_depth = 2L)\n\n  pred_types_with_colnames <- c(\n    \"response\",\n    \"raw\",\n    \"contrib\",\n    \"interaction\"\n  )\n\n  for (pred_type in pred_types_with_colnames) {\n    pred <- predict(model, x, type = pred_type)\n    expect_equal(nrow(pred), nrow(x))\n    expect_equal(ncol(pred), 2L)\n    expect_equal(colnames(pred), colnames(y))\n  }\n})\n\ntest_that(\"Column names from multiquantile are added to predictions\", {\n  y <- mtcars$mpg\n  x <- mtcars[, -1L]\n  model <- xgboost(\n    x,\n    y,\n    nthreads = 1L,\n    nrounds = 3L,\n    max_depth = 2L,\n    objective = \"reg:quantileerror\",\n    quantile_alpha = c(0.25, 0.5, 0.75)\n  )\n\n  pred_types_with_colnames <- c(\n    \"response\",\n    \"raw\",\n    \"contrib\",\n    \"interaction\"\n  )\n\n  for (pred_type in pred_types_with_colnames) {\n    pred <- predict(model, x, type = pred_type)\n    expect_equal(nrow(pred), nrow(x))\n    expect_equal(ncol(pred), 3L)\n    expect_equal(colnames(pred), c(\"q0.25\", \"q0.5\", \"q0.75\"))\n  }\n})\n\ntest_that(\"Column names from multiexpectile are added to predictions\", {\n  y <- mtcars$mpg\n  x <- mtcars[, -1L]\n  model <- xgboost(\n    x,\n    y,\n    nthreads = 1L,\n    nrounds = 3L,\n    max_depth = 2L,\n    objective = \"reg:expectileerror\",\n    expectile_alpha = c(0.25, 0.5, 0.75)\n  )\n\n  pred_types_with_colnames <- c(\n    \"response\",\n    \"raw\",\n    \"contrib\",\n    \"interaction\"\n  )\n\n  for (pred_type in pred_types_with_colnames) {\n    pred <- predict(model, x, type = pred_type)\n    expect_equal(nrow(pred), nrow(x))\n    expect_equal(ncol(pred), 3L)\n    expect_equal(colnames(pred), c(\"e0.25\", \"e0.5\", \"e0.75\"))\n  }\n})\n\ntest_that(\"Leaf predictions have multiple dimensions when needed\", {\n  # single score, multiple trees\n  y <- mtcars$mpg\n  x <- mtcars[, -1L]\n  model <- xgboost(\n    x,\n    y,\n    nthreads = 1L,\n    nrounds = 4L,\n    max_depth = 2L,\n    objective = \"reg:quantileerror\",\n    quantile_alpha = 0.5\n  )\n  pred <- predict(model, x, type = \"leaf\")\n  expect_equal(dim(pred), c(nrow(x), 4L))\n  expect_equal(row.names(pred), row.names(x))\n  expect_null(colnames(pred))\n\n  # single score, single tree\n  model <- xgboost(\n    x,\n    y,\n    nthreads = 1L,\n    nrounds = 1L,\n    max_depth = 2L,\n    objective = \"reg:quantileerror\",\n    quantile_alpha = 0.5\n  )\n  pred <- predict(model, x, type = \"leaf\")\n  expect_equal(dim(pred), c(nrow(x), 1L))\n  expect_equal(row.names(pred), row.names(x))\n  expect_null(colnames(pred))\n\n  # multiple score, multiple trees\n  model <- xgboost(\n    x,\n    y,\n    nthreads = 1L,\n    nrounds = 4L,\n    max_depth = 2L,\n    objective = \"reg:quantileerror\",\n    quantile_alpha = c(0.25, 0.5, 0.75)\n  )\n  pred <- predict(model, x, type = \"leaf\")\n  expect_equal(dim(pred), c(nrow(x), 4L, 3L))\n  expect_equal(row.names(pred), row.names(x))\n  expect_null(colnames(pred))\n  expect_equal(dimnames(pred)[[3L]], c(\"q0.25\", \"q0.5\", \"q0.75\"))\n\n  # multiple score, single tree\n  model <- xgboost(\n    x,\n    y,\n    nthreads = 1L,\n    nrounds = 1L,\n    max_depth = 2L,\n    objective = \"reg:quantileerror\",\n    quantile_alpha = c(0.25, 0.5, 0.75)\n  )\n  pred <- predict(model, x, type = \"leaf\")\n  expect_equal(dim(pred), c(nrow(x), 1L, 3L))\n  expect_equal(row.names(pred), row.names(x))\n  expect_null(colnames(pred))\n  expect_equal(dimnames(pred)[[3L]], c(\"q0.25\", \"q0.5\", \"q0.75\"))\n\n  # parallel trees, single tree, single score\n  model <- xgboost(\n    x,\n    y,\n    nthreads = 1L,\n    nrounds = 1L,\n    max_depth = 2L,\n    objective = \"count:poisson\",\n    num_parallel_tree = 2L\n  )\n  pred <- predict(model, x, type = \"leaf\")\n  expect_equal(dim(pred), c(nrow(x), 1L, 2L))\n  expect_equal(row.names(pred), row.names(x))\n  expect_null(colnames(pred))\n  expect_null(dimnames(pred)[[3L]])\n\n  # num_parallel_tree>1 + multiple scores is not supported at the moment so no test for it.\n})\n\ntest_that(\"Column names from multiclass are added to leaf predictions\", {\n  y <- iris$Species\n  x <- iris[, -5L]\n  model <- xgboost(x, y, nthreads = 1L, nrounds = 4L, max_depth = 2L)\n  pred <- predict(model, x, type = \"leaf\")\n  expect_equal(dim(pred), c(nrow(x), 4L, 3L))\n  expect_equal(dimnames(pred)[[3L]], levels(y))\n\n  # Check also for a single tree\n  model <- xgboost(x, y, nthreads = 1L, nrounds = 1L, max_depth = 2L)\n  pred <- predict(model, x, type = \"leaf\")\n  expect_equal(dim(pred), c(nrow(x), 1L, 3L))\n  expect_equal(dimnames(pred)[[3L]], levels(y))\n})\n\ntest_that(\"Column names from multitarget are added to leaf predictions\", {\n  y <- data.frame(\n    ylog = log(mtcars$mpg),\n    ysqrt = sqrt(mtcars$mpg)\n  )\n  x <- mtcars[, -1L]\n  model <- xgboost(x, y, nthreads = 1L, nrounds = 4L, max_depth = 2L)\n  pred <- predict(model, x, type = \"leaf\")\n  expect_equal(dim(pred), c(nrow(x), 4L, 2L))\n  expect_equal(dimnames(pred)[[3L]], colnames(y))\n\n  # Check also for a single tree\n  model <- xgboost(x, y, nthreads = 1L, nrounds = 1L, max_depth = 2L)\n  pred <- predict(model, x, type = \"leaf\")\n  expect_equal(dim(pred), c(nrow(x), 1L, 2L))\n  expect_equal(dimnames(pred)[[3L]], colnames(y))\n})\n\ntest_that(\"Column names from multiquantile are added to leaf predictions\", {\n  y <- mtcars$mpg\n  x <- mtcars[, -1L]\n  model <- xgboost(\n    x,\n    y,\n    nthreads = 1L,\n    nrounds = 4L,\n    max_depth = 2L,\n    objective = \"reg:quantileerror\",\n    quantile_alpha = c(0.25, 0.5, 0.75)\n  )\n  pred <- predict(model, x, type = \"leaf\")\n  expect_equal(dim(pred), c(nrow(x), 4L, 3L))\n  expect_equal(dimnames(pred)[[3L]], c(\"q0.25\", \"q0.5\", \"q0.75\"))\n\n  # Check also for a single tree\n  model <- xgboost(\n    x,\n    y,\n    nthreads = 1L,\n    nrounds = 1L,\n    max_depth = 2L,\n    objective = \"reg:quantileerror\",\n    quantile_alpha = c(0.25, 0.5, 0.75)\n  )\n  pred <- predict(model, x, type = \"leaf\")\n  expect_equal(dim(pred), c(nrow(x), 1L, 3L))\n  expect_equal(dimnames(pred)[[3L]], c(\"q0.25\", \"q0.5\", \"q0.75\"))\n})\n\ntest_that(\"Column names from multiexpectile are added to leaf predictions\", {\n  y <- mtcars$mpg\n  x <- mtcars[, -1L]\n  model <- xgboost(\n    x,\n    y,\n    nthreads = 1L,\n    nrounds = 4L,\n    max_depth = 2L,\n    objective = \"reg:expectileerror\",\n    expectile_alpha = c(0.25, 0.5, 0.75)\n  )\n  pred <- predict(model, x, type = \"leaf\")\n  expect_equal(dim(pred), c(nrow(x), 4L, 3L))\n  expect_equal(dimnames(pred)[[3L]], c(\"e0.25\", \"e0.5\", \"e0.75\"))\n\n  # Check also for a single tree\n  model <- xgboost(\n    x,\n    y,\n    nthreads = 1L,\n    nrounds = 1L,\n    max_depth = 2L,\n    objective = \"reg:expectileerror\",\n    expectile_alpha = c(0.25, 0.5, 0.75)\n  )\n  pred <- predict(model, x, type = \"leaf\")\n  expect_equal(dim(pred), c(nrow(x), 1L, 3L))\n  expect_equal(dimnames(pred)[[3L]], c(\"e0.25\", \"e0.5\", \"e0.75\"))\n})\n\ntest_that(\"Evaluation fraction leaves examples of all classes for training\", {\n  # With minimal sample leave no remainder\n  lst_args <- list(\n    dmatrix_args = list(\n      data = matrix(seq(1, 4), ncol = 1L),\n      label = c(0, 0, 1, 1)\n    ),\n    metadata = list(\n      y_levels = c(\"a\", \"b\")\n    ),\n    params = list(\n      seed = 123\n    )\n  )\n  for (retry in seq_len(10)) {\n    lst_args$params$seed <- retry\n    res <- process.eval.set(0.5, lst_args)\n    expect_equal(length(intersect(res$idx_train, res$idx_eval)), 0)\n    expect_equal(length(res$idx_train), 2L)\n    expect_equal(length(res$idx_eval), 2L)\n    expect_true(length(intersect(c(1L, 2L), res$idx_train)) >= 1L)\n    expect_true(length(intersect(c(3L, 4L), res$idx_train)) >= 1L)\n  }\n\n  # With minimal sample leaving some remainder\n  lst_args <- list(\n    dmatrix_args = list(\n      data = matrix(seq(1, 5), ncol = 1L),\n      label = c(0, 0, 1, 1, 1)\n    ),\n    metadata = list(\n      y_levels = c(\"a\", \"b\")\n    ),\n    params = list(\n      seed = 123\n    )\n  )\n  for (retry in seq_len(20)) {\n    lst_args$params$seed <- retry\n    res <- process.eval.set(0.4, lst_args)\n    expect_equal(length(intersect(res$idx_train, res$idx_eval)), 0)\n    expect_equal(length(res$idx_train), 3L)\n    expect_equal(length(res$idx_eval), 2L)\n    expect_true(length(intersect(c(1L, 2L), res$idx_train)) >= 1L)\n    expect_true(length(intersect(c(3L, 4L, 5L), res$idx_train)) >= 1L)\n  }\n})\n\ntest_that(\"'eval_set' as fraction works\", {\n  y <- iris$Species\n  x <- iris[, -5L]\n  model <- xgboost(\n    x,\n    y,\n    base_margin = matrix(0.1, nrow = nrow(x), ncol = 3L),\n    eval_set = 0.2,\n    nthreads = 1L,\n    nrounds = 4L,\n    max_depth = 2L,\n    verbosity = 0L\n  )\n  expect_true(hasName(attributes(model), \"evaluation_log\"))\n  evaluation_log <- attributes(model)$evaluation_log\n  expect_equal(nrow(evaluation_log), 4L)\n  expect_true(hasName(evaluation_log, \"eval_mlogloss\"))\n  expect_equal(length(attributes(model)$metadata$y_levels), 3L)\n})\n\ntest_that(\"Linear booster importance uses class names\", {\n  y <- iris$Species\n  x <- iris[, -5L]\n  model <- xgboost(\n    x,\n    y,\n    nthreads = 1L,\n    nrounds = 4L,\n    verbosity = 0L,\n    booster = \"gblinear\",\n    learning_rate = 0.2\n  )\n  imp <- xgb.importance(model)\n  expect_true(is.factor(imp$Class))\n  expect_equal(levels(imp$Class), levels(y))\n})\n"
  },
  {
    "path": "R-package/tests/testthat.R",
    "content": "library(testthat)\nlibrary(xgboost)\nlibrary(Matrix)\n\ntest_check(\"xgboost\", reporter = ProgressReporter)\nRhpcBLASctl::omp_set_num_threads(1)\n"
  },
  {
    "path": "R-package/vignettes/xgboost_introduction.Rmd",
    "content": "---\ntitle: \"XGBoost for R introduction\"\nvignette: >\n    %\\VignetteIndexEntry{XGBoost for R introduction}\n    %\\VignetteEncoding{UTF-8}\n    %\\VignetteEngine{knitr::rmarkdown}\noutput:\n    html_document:\n        theme: \"spacelab\"\n        highlight: \"kate\"\n        toc: true\n        toc_float: true\n---\n\nXGBoost for R introduction\n==========================\n\n## Introduction\n\n**XGBoost** is an optimized distributed gradient boosting library designed to be highly **efficient**, **flexible** and **portable**. It implements machine learning algorithms under the `gradient boosting` framework. XGBoost provides a parallel tree boosting (also known as GBDT, GBM) that solve many data science problems in a fast and accurate way. The same code runs on major distributed environment (Hadoop, SGE, MPI) and can solve problems beyond billions of examples.\n\nFor an introduction to the concept of gradient boosting, see the tutorial [Introduction to Boosted Trees](https://xgboost.readthedocs.io/en/stable/tutorials/model.html) in XGBoost's online docs.\n\nFor more details about XGBoost's features and usage, see the [online documentation](https://xgboost.readthedocs.io/en/stable/) which contains more tutorials, examples, and details.\n\nThis short vignette outlines the basic usage of the R interface for XGBoost, assuming the reader has some familiarity with the underlying concepts behind statistical modeling with gradient-boosted decision trees.\n\n## Building a predictive model\n\nAt its core, XGBoost consists of a C++ library which offers bindings for different programming languages, including R. The R package for XGBoost provides an idiomatic interface similar to those of other statistical modeling packages using and x/y design, as well as a lower-level interface that interacts more directly with the underlying core library and which is similar to those of other language bindings like Python, plus various helpers to interact with its model objects such as by plotting their feature importances or converting them to other formats.\n\nThe main function of interest is `xgboost(x, y, ...)`, which calls the XGBoost model building procedure on observed data of covariates/features/predictors \"x\", and a response variable \"y\" - it should feel familiar to users of packages like `glmnet` or `ncvreg`:\n\n```{r}\nlibrary(xgboost)\ndata(ToothGrowth)\n\ny <- ToothGrowth$supp # the response which we want to model/predict\nx <- ToothGrowth[, c(\"len\", \"dose\")] # the features from which we want to predict it\nmodel <- xgboost(x, y, nthreads = 1, nrounds = 2)\nmodel\n```\n\nIn this case, the \"y\" response variable that was supplied is a \"factor\" type with two classes (\"OJ\" and \"VC\") - hence, XGBoost builds a binary classification model for it based on the features \"x\", by finding a maximum likelihood estimate (similar to the `family=\"binomial\"` model from R's `glm` function) through rule buckets obtained from the sum of two decision trees (from `nrounds=2`), from which we can then predict probabilities, log-odds, class with highest likelihood, among others:\n\n```{r}\npredict(model, x[1:6, ], type = \"response\") # probabilities for y's last level (\"VC\")\npredict(model, x[1:6, ], type = \"raw\")      # log-odds\npredict(model, x[1:6, ], type = \"class\")    # class with highest probability\n```\n\nCompared to R's `glm` function which follows the concepts of \"families\" and \"links\" from GLM theory to fit models for different kinds of response distributions, XGBoost follows the simpler concept of \"objectives\" which mix both of them into one, and which just like `glm`, allow modeling very different kinds of response distributions (e.g. discrete choices, real-valued numbers, counts, censored measurements, etc.) through a common framework.\n\nXGBoost will automatically determine a suitable objective for the response given its object class (can pass factors for classification, numeric vectors for regression, `Surv` objects from the `survival` package for survival, etc. - see `?xgboost` for more details), but this can be controlled manually through an `objective` parameter based the kind of model that is desired:\n\n```{r}\ndata(mtcars)\n\ny <- mtcars$mpg\nx <- mtcars[, -1]\nmodel_gaussian <- xgboost(x, y, nthreads = 1, nrounds = 2) # default is squared loss (Gaussian)\nmodel_poisson <- xgboost(x, y, objective = \"count:poisson\", nthreads = 1, nrounds = 2)\nmodel_abserr <- xgboost(x, y, objective = \"reg:absoluteerror\", nthreads = 1, nrounds = 2)\n```\n\n_Note: the objective must match with the type of the \"y\" response variable - for example, classification objectives for discrete choices require \"factor\" types, while regression models for real-valued data require \"numeric\" types._\n\n## Model parameters\n\nXGBoost models allow a large degree of control over how they are built. By their nature, gradient-boosted decision tree ensembles are able to capture very complex patterns between features in the data and a response variable, which also means they can suffer from overfitting if not controlled appropirately.\n\nFor best results, one needs to find suitable parameters for the data being modeled. Note that XGBoost does not adjust its default hyperparameters based on the data, and different datasets will require vastly different hyperparameters for optimal predictive performance.\n\nFor example, for a small dataset like \"TootGrowth\" which has only two features and 60 observations, the defaults from XGBoost are an overkill which lead to severe overfitting - for such data, one might want to have smaller trees (i.e. more convervative decision rules, capturing simpler patterns) and fewer of them, for example.\n\nParameters can be controlled by passing additional arguments to `xgboost()`. See `?xgb.params` for details about what parameters are available to control.\n\n```{r}\ny <- ToothGrowth$supp\nx <- ToothGrowth[, c(\"len\", \"dose\")]\nmodel_conservative <- xgboost(\n    x, y, nthreads = 1,\n    nrounds = 5,\n    max_depth = 2,\n    reg_lambda = 0.5,\n    learning_rate = 0.15\n)\npred_conservative <- predict(\n    model_conservative,\n    x\n)\npred_conservative[1:6] # probabilities are all closer to 0.5 now\n```\n\nXGBoost also allows the possibility of calculating evaluation metrics for model quality over boosting rounds, with a wide variety of built-in metrics available to use. It's possible to automatically set aside a fraction of the data to use as evaluation set, from which one can then visually monitor progress and overfitting:\n\n```{r}\nxgboost(\n    x, y, nthreads = 1,\n    eval_set = 0.2,\n    monitor_training = TRUE,\n    verbosity = 1,\n    eval_metric = c(\"auc\", \"logloss\"),\n    nrounds = 5,\n    max_depth = 2,\n    reg_lambda = 0.5,\n    learning_rate = 0.15\n)\n```\n\n## Examining model objects\n\nXGBoost model objects for the most part consist of a pointer to a C++ object where most of the information is held and which is interfaced through the utility functions and methods in the package, but also contains some R attributes that can be retrieved (and new ones added) through `attributes()`:\n\n```{r}\nattributes(model)\n```\n\nIn addition to R attributes (which can be arbitrary R objects), it may also keep some standardized C-level attributes that one can access and modify (but which can only be JSON-format):\n\n```{r}\nxgb.attributes(model)\n```\n\n(they are empty for this model)\n\n... but usually, when it comes to getting something out of a model object, one would typically want to do this through the built-in utility functions. Some examples:\n\n```{r}\nxgb.importance(model)\n```\n\n```{r}\nxgb.model.dt.tree(model)\n```\n\n## Other features\n\nXGBoost supports many additional features on top of its traditional gradient-boosting framework, including, among others:\n\n* Building decision tree models with characteristics such as per-feature monotonicity constraints or interaction constraints.\n* Calculating feature contributions in individual predictions.\n* Using custom objectives and custom evaluation metrics.\n* Fitting linear models.\n* Fitting models on GPUs and/or on data that doesn't fit in RAM (\"external memory\").\n\nSee the [online documentation](https://xgboost.readthedocs.io/en/stable/index.html) - particularly the [tutorials section](https://xgboost.readthedocs.io/en/stable/tutorials/index.html) - for a glimpse over further functionalities that XGBoost offers.\n\n## The low-level interface\n\nIn addition to the `xgboost(x, y, ...)` function, XGBoost also provides a lower-level interface for creating model objects through the function `xgb.train()`, which resembles the same `xgb.train` functions in other language bindings of XGBoost.\n\nThis `xgb.train()` interface exposes additional functionalities (such as user-supplied callbacks or external-memory data support) and performs fewer data validations and castings compared to the `xgboost()` function interface.\n\nSome key differences between the two interfaces:\n\n* Unlike `xgboost()` which takes R objects such as `matrix` or `data.frame` as inputs, the function `xgb.train()` uses XGBoost's own data container called \"DMatrix\", which can be created from R objects through the function `xgb.DMatrix()`. Note that there are other \"DMatrix\" constructors too, such as \"xgb.QuantileDMatrix()\", which might be more beneficial for some use-cases.\n* A \"DMatrix\" object may contain a mixture of features/covariates, the response variable, observation weights, base margins, among others; and unlike `xgboost()`, requires its inputs to have already been encoded into the representation that XGBoost uses behind the scenes - for example, while `xgboost()` may take a `factor` object as \"y\", `xgb.DMatrix()` requires instead a binary response variable to be passed as a vector of zeros and ones.\n* Hyperparameters are passed as function arguments in `xgboost()`, while they are passed as a named list to `xgb.train()`.\n* The `xgb.train()` interface keeps less metadata about its inputs - for example, it will not add levels of factors as column names to estimated probabilities when calling `predict`.\n\nExample usage of `xgb.train()`:\n\n```{r}\ndata(\"agaricus.train\")\ndmatrix <- xgb.DMatrix(\n    data = agaricus.train$data,  # a sparse CSC matrix ('dgCMatrix')\n    label = agaricus.train$label, # zeros and ones\n    nthread = 1\n)\nbooster <- xgb.train(\n    data = dmatrix,\n    nrounds = 10,\n    params = xgb.params(\n        objective = \"binary:logistic\",\n        nthread = 1,\n        max_depth = 3\n    )\n)\n\ndata(\"agaricus.test\")\ndmatrix_test <- xgb.DMatrix(agaricus.test$data, nthread = 1)\npred_prob <- predict(booster, dmatrix_test)\npred_raw <- predict(booster, dmatrix_test, outputmargin = TRUE)\n```\n\nModel objects produced by `xgb.train()` have class `xgb.Booster`, while model objects produced by `xgboost()` have class `xgboost`, which is a subclass of `xgb.Booster`. Their `predict` methods also take different arguments - for example, `predict.xgboost` has a `type` parameter, while `predict.xgb.Booster` controls this through binary arguments - but as `xgboost` is a subclass of `xgb.Booster`, methods for `xgb.Booster` can be called on `xgboost` objects if needed.\n\nUtility functions in the XGBoost R package will work with both model classes - for example:\n\n```{r}\nxgb.importance(model)\nxgb.importance(booster)\n```\n\nWhile `xgboost()` aims to provide a user-friendly interface, there are still many situations where one should prefer the `xgb.train()` interface - for example:\n\n* For latency-sensitive applications (e.g. when serving models in real time), `xgb.train()` will have a speed advantage, as it performs fewer validations, conversions, and post-processings with metadata.\n* If you are developing an R package that depends on XGBoost, `xgb.train()` will provide a more stable interface (less subject to changes) and will have lower time/memory overhead.\n* If you need functionalities that are not exposed by the `xgboost()` interface - for example, if your dataset does not fit into the computer's RAM, it's still possible to construct a DMatrix from it if the data is loaded in batches through `xgb.ExtMemDMatrix()`.\n"
  },
  {
    "path": "R-package/vignettes/xgboostfromJSON.Rmd",
    "content": "---\ntitle: \"XGBoost from JSON\"\noutput:\n  rmarkdown::html_vignette:\n    number_sections: yes\n    toc: yes\nauthor: Roland Stevenson\nvignette: >\n  %\\VignetteIndexEntry{XGBoost from JSON}\n  %\\VignetteEngine{knitr::rmarkdown}\n  \\usepackage[utf8]{inputenc}\n---\n\nXGBoost from JSON\n=================\n\n## Introduction\n\nThe purpose of this Vignette is to show you how to correctly load and work with an **XGBoost** model that has been dumped to JSON.  **XGBoost** internally converts all data to [32-bit floats](https://en.wikipedia.org/wiki/Single-precision_floating-point_format), and the values dumped to JSON are decimal representations of these values.  When working with a model that has been parsed from a JSON file, care must be taken to correctly treat:\n\n- the input data, which should be converted to 32-bit floats\n- any 32-bit floats that were stored in JSON as decimal representations\n- any calculations must be done with 32-bit mathematical operators\n\n## Setup\n\nFor the purpose of this tutorial we will load the xgboost, jsonlite, and float packages.  We'll also set `digits=22` in our options in case we want to inspect many digits of our results.\n\n```{r}\nrequire(xgboost)\nrequire(jsonlite)\nrequire(float)\noptions(digits = 22)\n```\n\nWe will create a toy binary logistic model based on the example first provided [here](https://github.com/dmlc/xgboost/issues/3960), so that we can easily understand the structure of the dumped JSON model object.  This will allow us to understand where discrepancies can occur and how they should be handled.\n\n```{r}\ndates <- c(20180130, 20180130, 20180130,\n           20180130, 20180130, 20180130,\n           20180131, 20180131, 20180131,\n           20180131, 20180131, 20180131,\n           20180131, 20180131, 20180131,\n           20180134, 20180134, 20180134)\n\nlabels <- c(1, 1, 1,\n            1, 1, 1,\n            0, 0, 0,\n            0, 0, 0,\n            0, 0, 0,\n            0, 0, 0)\n\ndata <- data.frame(dates = dates, labels = labels)\n\nbst <- xgb.train(\n  data = xgb.DMatrix(as.matrix(data$dates), label = labels, missing = NA, nthread = 1),\n  nrounds = 1,\n  params = xgb.params(\n    objective = \"binary:logistic\",\n    nthread = 2,\n    max_depth = 1\n  )\n)\n```\n\n## Comparing results\nWe will now dump the model to JSON and attempt to illustrate a variety of issues that can arise, and how to properly deal with them.\n\nFirst let's dump the model to JSON:\n\n```{r}\nbst_json <- xgb.dump(bst, with_stats = FALSE, dump_format = 'json')\nbst_from_json <- fromJSON(bst_json, simplifyDataFrame = FALSE)\nnode <- bst_from_json[[1]]\ncat(bst_json)\n```\n\nThe tree JSON shown by the above code-chunk tells us that if the data is less than 20180132, the tree will output the value in the first leaf.  Otherwise it will output the value in the second leaf.  Let's try to reproduce this manually with the data we have and confirm that it matches the model predictions we've already calculated.\n\n```{r}\nbst_preds_logodds <- predict(bst, as.matrix(data$dates), outputmargin = TRUE)\n\n# calculate the logodds values using the JSON representation\nbst_from_json_logodds <- ifelse(data$dates < node$split_condition,\n                                node$children[[1]]$leaf,\n                                node$children[[2]]$leaf)\n\nbst_preds_logodds\nbst_from_json_logodds\n\n# test that values are equal\nbst_preds_logodds == bst_from_json_logodds\n\n```\nNone are equal.  What happened?\n\nAt this stage two things happened:\n\n- input data was not converted to 32-bit floats\n- the JSON variables were not converted to 32-bit floats\n\n### Lesson 1: All data is 32-bit floats\n\n> When working with imported JSON, all data must be converted to 32-bit floats\n\nTo explain this, let's repeat the comparison and round to two decimals:\n\n```{r}\nround(bst_preds_logodds, 2) == round(bst_from_json_logodds, 2)\n```\n\nIf we round to two decimals, we see that only the elements related to data values of `20180131` don't agree.  If we convert the data to floats, they agree:\n\n```{r}\n# now convert the dates to floats first\nbst_from_json_logodds <- ifelse(fl(data$dates) < node$split_condition,\n                                node$children[[1]]$leaf,\n                                node$children[[2]]$leaf)\n\n# test that values are equal\nround(bst_preds_logodds, 2) == round(bst_from_json_logodds, 2)\n```\n\nWhat's the lesson?  If we are going to work with an imported JSON model, any data must be converted to floats first.  In this case, since '20180131' cannot be represented as a 32-bit float, it is rounded up to 20180132, as shown here:\n\n```{r}\nfl(20180131)\n```\n\n\n### Lesson 2: JSON parameters are 32-bit floats\n\n> All JSON parameters stored as floats must be converted to floats.\n\nLet's now say we do care about numbers past the first two decimals.\n\n```{r}\n# test that values are equal\nbst_preds_logodds == bst_from_json_logodds\n```\n\nNone are exactly equal.  What happened?  Although we've converted the data to 32-bit floats, we also need to convert the JSON parameters to 32-bit floats.  Let's do this:\n\n```{r}\n# now convert the dates to floats first\nbst_from_json_logodds <- ifelse(fl(data$dates) < fl(node$split_condition),\n                                as.numeric(fl(node$children[[1]]$leaf)),\n                                as.numeric(fl(node$children[[2]]$leaf)))\n\n# test that values are equal\nbst_preds_logodds == bst_from_json_logodds\n```\nAll equal.  What's the lesson?  If we are going to work with an imported JSON model, any JSON parameters that were stored as floats must also be converted to floats first.\n\n### Lesson 3: Use 32-bit math\n\n> Always use 32-bit numbers and operators\n\nWe were able to get the log-odds to agree, so now let's manually calculate the sigmoid of the log-odds.  This should agree with the xgboost predictions.\n\n\n```{r}\nbst_preds <- predict(bst, as.matrix(data$dates))\n\n# calculate the predictions casting doubles to floats\nbst_from_json_preds <- ifelse(\n  fl(data$dates) < fl(node$split_condition)\n  , as.numeric(1 / (1 + exp(-1 * fl(node$children[[1]]$leaf))))\n  , as.numeric(1 / (1 + exp(-1 * fl(node$children[[2]]$leaf))))\n)\n\n# test that values are equal\nbst_preds == bst_from_json_preds\n```\n\nNone are exactly equal again.  What is going on here?  Well, since we are using the value `1` in the calculations, we have introduced a double into the calculation.  Because of this, all float values are promoted to 64-bit doubles and the 64-bit version of the exponentiation operator `exp` is also used.  On the other hand, xgboost uses the 32-bit version of the exponentiation operator in its [sigmoid function](https://github.com/dmlc/xgboost/blob/54980b8959680a0da06a3fc0ec776e47c8cbb0a1/src/common/math.h#L25-L27).\n\nHow do we fix this?  We have to ensure we use the correct data types everywhere and the correct operators.  If we use only floats, the float library that we have loaded will ensure the 32-bit float exponentiation operator is applied.\n```{r}\n# calculate the predictions casting doubles to floats\nbst_from_json_preds <- ifelse(\n  fl(data$dates) < fl(node$split_condition)\n  , as.numeric(fl(1) / (fl(1) + exp(fl(-1) * fl(node$children[[1]]$leaf))))\n  , as.numeric(fl(1) / (fl(1) + exp(fl(-1) * fl(node$children[[2]]$leaf))))\n)\n\n# test that values are equal\nbst_preds == bst_from_json_preds\n```\n\nAll equal.  What's the lesson?  We have to ensure that all calculations are done with 32-bit floating point operators if we want to reproduce the results that we see with xgboost.\n"
  },
  {
    "path": "README.md",
    "content": "<img src=\"https://xgboost.ai/images/logo/xgboost-logo-trimmed.png\" width=200/> eXtreme Gradient Boosting\n===========\n\n[![XGBoost-CI](https://github.com/dmlc/xgboost/workflows/XGBoost%20CI/badge.svg?branch=master)](https://github.com/dmlc/xgboost/actions)\n[![Documentation Status](https://readthedocs.org/projects/xgboost/badge/?version=latest)](https://xgboost.readthedocs.org)\n[![GitHub license](https://dmlc.github.io/img/apache2.svg)](./LICENSE)\n[![CRAN Status Badge](https://www.r-pkg.org/badges/version/xgboost)](https://cran.r-project.org/web/packages/xgboost)\n[![PyPI version](https://badge.fury.io/py/xgboost.svg)](https://pypi.python.org/pypi/xgboost/)\n[![Conda version](https://img.shields.io/conda/vn/conda-forge/py-xgboost.svg)](https://anaconda.org/conda-forge/py-xgboost)\n[![Optuna](https://img.shields.io/badge/Optuna-integrated-blue)](https://optuna.org)\n[![Twitter](https://img.shields.io/badge/@XGBoostProject--_.svg?style=social&logo=twitter)](https://twitter.com/XGBoostProject)\n[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/dmlc/xgboost/badge)](https://api.securityscorecards.dev/projects/github.com/dmlc/xgboost)\n[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/comet-ml/comet-examples/blob/master/integrations/model-training/xgboost/notebooks/how_to_use_comet_with_xgboost_tutorial.ipynb)\n\n[Community](https://xgboost.ai/community) |\n[Documentation](https://xgboost.readthedocs.org) |\n[Resources](demo/README.md) |\n[Contributors](CONTRIBUTORS.md) |\n[Release Notes](https://xgboost.readthedocs.io/en/latest/changes/index.html)\n\nXGBoost is an optimized distributed gradient boosting library designed to be highly ***efficient***, ***flexible*** and ***portable***.\nIt implements machine learning algorithms under the [Gradient Boosting](https://en.wikipedia.org/wiki/Gradient_boosting) framework.\nXGBoost provides a parallel tree boosting (also known as GBDT, GBM) that solve many data science problems in a fast and accurate way.\nThe same code runs on major distributed environment (Kubernetes, Hadoop, SGE, Dask, Spark, PySpark) and can solve problems beyond billions of examples.\n\nLicense\n-------\n© Contributors, 2021. Licensed under an [Apache-2](https://github.com/dmlc/xgboost/blob/master/LICENSE) license.\n\nContribute to XGBoost\n---------------------\nXGBoost has been developed and used by a group of active community members. Your help is very valuable to make the package better for everyone.\nCheckout the [Community Page](https://xgboost.ai/community).\n\nReference\n---------\n- Tianqi Chen and Carlos Guestrin. [XGBoost: A Scalable Tree Boosting System](https://arxiv.org/abs/1603.02754). In 22nd SIGKDD Conference on Knowledge Discovery and Data Mining, 2016\n- XGBoost originates from research project at University of Washington.\n\nSponsors\n--------\nBecome a sponsor and get a logo here. See details at [Sponsoring the XGBoost Project](https://xgboost.ai/sponsors). The funds are used to defray the cost of continuous integration and testing infrastructure (https://xgboost-ci.net).\n\n## Open Source Collective sponsors\n[![Backers on Open Collective](https://opencollective.com/xgboost/backers/badge.svg)](#backers) [![Sponsors on Open Collective](https://opencollective.com/xgboost/sponsors/badge.svg)](#sponsors)\n\n### Sponsors\n[[Become a sponsor](https://opencollective.com/xgboost#sponsor)]\n\n<a href=\"https://www.nvidia.com/en-us/\" target=\"_blank\"><img src=\"https://raw.githubusercontent.com/xgboost-ai/xgboost-ai.github.io/master/images/sponsors/nvidia.jpg\" alt=\"NVIDIA\" width=\"72\" height=\"72\"></a>\n<a href=\"https://www.comet.com/site/?utm_source=xgboost&utm_medium=github&utm_content=readme\" target=\"_blank\"><img src=\"https://cdn.comet.ml/img/notebook_logo.png\" height=\"72\"></a>\n<a href=\"https://opencollective.com/tomislav1\" target=\"_blank\"><img src=\"https://images.opencollective.com/tomislav1/avatar/256.png\" height=\"72\"></a>\n<a href=\"https://databento.com/?utm_source=xgboost&utm_medium=sponsor&utm_content=display\"><img src=\"https://raw.githubusercontent.com/xgboost-ai/xgboost-ai.github.io/refs/heads/master/images/sponsors/databento.png\" height=\"72\"></a>\n<a href=\"https://www.intel.com/\" target=\"_blank\"><img src=\"https://images.opencollective.com/intel-corporation/2fa85c1/logo/256.png\" width=\"72\" height=\"72\"></a>\n\n### Backers\n[[Become a backer](https://opencollective.com/xgboost#backer)]\n\n<a href=\"https://opencollective.com/xgboost#backers\" target=\"_blank\"><img src=\"https://opencollective.com/xgboost/backers.svg?width=890\"></a>\n"
  },
  {
    "path": "SECURITY.md",
    "content": "# Security Policy\n\n## Supported Versions\n\n<!-- Use this section to tell people about which versions of your project are\ncurrently being supported with security updates. -->\nSecurity updates are applied only to the most recent release.\n\n## Reporting a Vulnerability\n\n<!-- Use this section to tell people how to report a vulnerability.\n\nTell them where to go, how often they can expect to get an update on a\nreported vulnerability, what to expect if the vulnerability is accepted or\ndeclined, etc. -->\n\nTo report a security issue, please email\n[security@xgboost-ci.net](mailto:security@xgboost-ci.net)\nwith a description of the issue, the steps you took to create the issue,\naffected versions, and, if known, mitigations for the issue.\n\nAll support will be made on the best effort base, so please indicate the \"urgency level\" of the vulnerability as Critical, High, Medium or Low.\n"
  },
  {
    "path": "amalgamation/dmlc-minimum0.cc",
    "content": "/*!\n * Copyright 2015 by Contributors.\n * \\brief Mininum DMLC library Amalgamation, used for easy plugin of dmlc lib.\n *  Normally this is not needed.\n */\n#include \"../dmlc-core/src/io/line_split.cc\"\n#include \"../dmlc-core/src/io/recordio_split.cc\"\n#include \"../dmlc-core/src/io/input_split_base.cc\"\n#include \"../dmlc-core/src/io/local_filesys.cc\"\n#include \"../dmlc-core/src/io/filesys.cc\"\n#include \"../dmlc-core/src/io/indexed_recordio_split.cc\"\n#include \"../dmlc-core/src/data.cc\"\n#include \"../dmlc-core/src/io.cc\"\n#include \"../dmlc-core/src/recordio.cc\"\n"
  },
  {
    "path": "cmake/Doc.cmake",
    "content": "function(run_doxygen)\n  find_package(Doxygen REQUIRED)\n\n  if(NOT DOXYGEN_DOT_FOUND)\n    message(FATAL_ERROR \"Command `dot` not found.  Please install graphviz.\")\n  endif()\n\n  configure_file(\n    ${xgboost_SOURCE_DIR}/doc/Doxyfile.in\n    ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY)\n  add_custom_target(\n    doc_doxygen ALL\n    COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile\n    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}\n    COMMENT \"Generate C APIs documentation.\"\n    VERBATIM)\nendfunction()\n"
  },
  {
    "path": "cmake/FindOpenMPMacOS.cmake",
    "content": "# Find OpenMP library on MacOS\n# Automatically handle locating libomp from the Homebrew package manager\n\n# lint_cmake: -package/consistency\n\nmacro(find_openmp_macos)\n  if(NOT APPLE)\n    message(FATAL_ERROR \"${CMAKE_CURRENT_FUNCTION}() must only be used on MacOS\")\n  endif()\n  find_package(OpenMP)\n  if(NOT OpenMP_FOUND)\n    # Try again with extra path info. This step is required for libomp 15+ from Homebrew,\n    # as libomp 15.0+ from brew is keg-only\n    # See https://github.com/Homebrew/homebrew-core/issues/112107#issuecomment-1278042927.\n    execute_process(COMMAND brew --prefix libomp\n                    OUTPUT_VARIABLE HOMEBREW_LIBOMP_PREFIX\n                    OUTPUT_STRIP_TRAILING_WHITESPACE)\n    set(OpenMP_C_FLAGS\n      \"-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include\")\n    set(OpenMP_CXX_FLAGS\n      \"-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include\")\n    set(OpenMP_C_LIB_NAMES omp)\n    set(OpenMP_CXX_LIB_NAMES omp)\n    set(OpenMP_omp_LIBRARY ${HOMEBREW_LIBOMP_PREFIX}/lib/libomp.dylib)\n    find_package(OpenMP REQUIRED)\n  endif()\nendmacro()\n\n# Patch libxgboost.dylib so that it depends on @rpath/libomp.dylib instead of\n# /opt/homebrew/opt/libomp/lib/libomp.dylib or other hard-coded paths.\n# Doing so enables XGBoost to interoperate with multiple kinds of OpenMP\n# libraries. See https://github.com/lightgbm-org/LightGBM/pull/6391 for detailed\n# explanation. Adapted from https://github.com/lightgbm-org/LightGBM/pull/6391\n# by James Lamb.\n# MacOS only.\nfunction(patch_openmp_path_macos target target_default_output_name)\n  if(NOT APPLE)\n    message(FATAL_ERROR \"${CMAKE_CURRENT_FUNCTION}() must only be used on MacOS\")\n  endif()\n  # Get path to libomp found at build time\n  get_target_property(\n    __OpenMP_LIBRARY_LOCATION\n    OpenMP::OpenMP_CXX\n    INTERFACE_LINK_LIBRARIES\n  )\n  # Get the base name of the OpenMP lib\n  # Usually: libomp.dylib, libgomp.dylib, or libiomp.dylib\n  get_filename_component(\n    __OpenMP_LIBRARY_NAME\n    ${__OpenMP_LIBRARY_LOCATION}\n    NAME\n  )\n  # Get the directory containing the OpenMP lib\n  get_filename_component(\n    __OpenMP_LIBRARY_DIR\n    ${__OpenMP_LIBRARY_LOCATION}\n    DIRECTORY\n  )\n  # Get the name of the XGBoost lib, e.g. libxgboost\n  get_target_property(\n    __LIBXGBOOST_OUTPUT_NAME\n    ${target}\n    OUTPUT_NAME\n  )\n  if(NOT __LIBXGBOOST_OUTPUT_NAME)\n    set(__LIBXGBOOST_OUTPUT_NAME \"${target_default_output_name}\")\n  endif()\n\n  # Get the file name of the XGBoost lib, e.g. libxgboost.dylib\n  if(CMAKE_SHARED_LIBRARY_SUFFIX_CXX)\n    set(\n      __LIBXGBOOST_FILENAME_${target} \"${__LIBXGBOOST_OUTPUT_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX_CXX}\"\n      CACHE INTERNAL \"Shared library filename ${target}\"\n    )\n  else()\n    set(\n      __LIBXGBOOST_FILENAME_${target} \"${__LIBXGBOOST_OUTPUT_NAME}.dylib\"\n      CACHE INTERNAL \"Shared library filename ${target}\"\n    )\n  endif()\n\n  message(STATUS \"Creating shared lib for target ${target}: ${__LIBXGBOOST_FILENAME_${target}}\")\n\n  # Override the absolute path to OpenMP with a relative one using @rpath.\n  #\n  # This also ensures that if a libomp.dylib has already been loaded, it'll just use that.\n  if(KEEP_BUILD_ARTIFACTS_IN_BINARY_DIR)\n    set(__LIB_DIR ${xgboost_BINARY_DIR}/lib)\n  else()\n    set(__LIB_DIR ${xgboost_SOURCE_DIR}/lib)\n  endif()\n  add_custom_command(\n    TARGET ${target}\n    POST_BUILD\n      COMMAND\n        install_name_tool\n        -change\n        ${__OpenMP_LIBRARY_LOCATION}\n        \"@rpath/${__OpenMP_LIBRARY_NAME}\"\n        \"${__LIBXGBOOST_FILENAME_${target}}\"\n      WORKING_DIRECTORY ${__LIB_DIR}\n  )\n  message(STATUS\n    \"${__LIBXGBOOST_FILENAME_${target}}: \"\n    \"Replacing hard-coded OpenMP install_name with '@rpath/${__OpenMP_LIBRARY_NAME}'...\"\n  )\n  # Add RPATH entries to ensure the loader looks in the following, in the following order:\n  #\n  #   - /opt/homebrew/opt/libomp/lib  (where 'brew install' / 'brew link' puts libomp.dylib)\n  #   - ${__OpenMP_LIBRARY_DIR}       (wherever find_package(OpenMP) found OpenMP at build time)\n  #\n  # Note: This list will only be used if libomp.dylib isn't already loaded into memory.\n  #       So Conda users will likely use ${CONDA_PREFIX}/libomp.dylib\n  execute_process(COMMAND brew --prefix libomp\n                  OUTPUT_VARIABLE HOMEBREW_LIBOMP_PREFIX\n                  OUTPUT_STRIP_TRAILING_WHITESPACE)\n  set_target_properties(\n    ${target}\n    PROPERTIES\n      BUILD_WITH_INSTALL_RPATH TRUE\n      INSTALL_RPATH \"${HOMEBREW_LIBOMP_PREFIX}/lib;${__OpenMP_LIBRARY_DIR}\"\n      INSTALL_RPATH_USE_LINK_PATH FALSE\n  )\nendfunction()\n"
  },
  {
    "path": "cmake/PrefetchIntrinsics.cmake",
    "content": "function(find_prefetch_intrinsics)\n  include(CheckCXXSourceCompiles)\n  check_cxx_source_compiles(\"\n  #include <xmmintrin.h>\n  int main() {\n    char data = 0;\n    const char* address = &data;\n    _mm_prefetch(address, _MM_HINT_NTA);\n    return 0;\n  }\n  \" XGBOOST_MM_PREFETCH_PRESENT)\n  check_cxx_source_compiles(\"\n  int main() {\n    char data = 0;\n    const char* address = &data;\n    __builtin_prefetch(address, 0, 0);\n    return 0;\n  }\n  \" XGBOOST_BUILTIN_PREFETCH_PRESENT)\n  set(XGBOOST_MM_PREFETCH_PRESENT ${XGBOOST_MM_PREFETCH_PRESENT} PARENT_SCOPE)\n  set(XGBOOST_BUILTIN_PREFETCH_PRESENT ${XGBOOST_BUILTIN_PREFETCH_PRESENT} PARENT_SCOPE)\nendfunction()\n"
  },
  {
    "path": "cmake/RPackageInstall.cmake.in",
    "content": "# Commands to install the R package as a CMake install target\n\nfunction(check_call)\n  set(cmd COMMAND)\n  cmake_parse_arguments(\n    PARSE_ARGV 0\n    CALL_ARG \"\" \"\" \"${cmd}\"\n  )\n  string(REPLACE \";\" \" \" commands \"${CALL_ARG_COMMAND}\")\n  message(\"Command: ${commands}\")\n  execute_process(COMMAND ${CALL_ARG_COMMAND}\n                  OUTPUT_VARIABLE _out\n\t\t\t\t  ERROR_VARIABLE _err\n\t\t\t\t  RESULT_VARIABLE _res)\n  if(NOT \"${_res}\" EQUAL \"0\")\n    message(FATAL_ERROR \"out: ${_out}, err: ${_err}, res: ${_res}\")\n  endif()\nendfunction()\n\n# Important paths\nset(build_dir \"@build_dir@\")\nset(LIBR_EXECUTABLE \"@LIBR_EXECUTABLE@\")\n\n# Back up cmake_install.cmake\nfile(WRITE \"${build_dir}/R-package/src/Makevars\" \"all:\")\nfile(WRITE \"${build_dir}/R-package/src/Makevars.win\" \"all:\")\n\n# Install dependencies\nset(XGB_DEPS_SCRIPT\n    \"deps = setdiff(c('data.table', 'jsonlite', 'Matrix'), rownames(installed.packages())); if(length(deps)>0) install.packages(deps, repo = 'https://cloud.r-project.org/')\")\ncheck_call(COMMAND \"${LIBR_EXECUTABLE}\" -q -e \"${XGB_DEPS_SCRIPT}\")\n\n# Install the XGBoost R package\ncheck_call(COMMAND \"${LIBR_EXECUTABLE}\" CMD INSTALL --no-multiarch --build \"${build_dir}/R-package\")"
  },
  {
    "path": "cmake/RPackageInstallTargetSetup.cmake",
    "content": "# Assembles the R-package files in build_dir;\n# if necessary, installs the main R package dependencies;\n# runs R CMD INSTALL.\nfunction(setup_rpackage_install_target rlib_target build_dir)\n  configure_file(${PROJECT_SOURCE_DIR}/cmake/RPackageInstall.cmake.in ${PROJECT_BINARY_DIR}/RPackageInstall.cmake @ONLY)\n  install(\n    DIRECTORY \"${xgboost_SOURCE_DIR}/R-package\"\n    DESTINATION \"${build_dir}\"\n    PATTERN \"src/*\" EXCLUDE\n    PATTERN \"R-package/configure\" EXCLUDE\n  )\n  install(TARGETS ${rlib_target}\n    LIBRARY DESTINATION \"${build_dir}/R-package/src/\"\n    RUNTIME DESTINATION \"${build_dir}/R-package/src/\")\n  install(SCRIPT ${PROJECT_BINARY_DIR}/RPackageInstall.cmake)\nendfunction()\n"
  },
  {
    "path": "cmake/Sanitizer.cmake",
    "content": "# Set appropriate compiler and linker flags for sanitizers.\n#\n# Usage of this module:\n#  enable_sanitizers(\"address;leak\")\n\n# Add flags\nmacro(enable_sanitizer sanitizer)\n  if(${sanitizer} MATCHES \"address\")\n    find_package(ASan)\n    set(SAN_COMPILE_FLAGS \"${SAN_COMPILE_FLAGS} -fsanitize=address\")\n\n  elseif(${sanitizer} MATCHES \"thread\")\n    find_package(TSan)\n    set(SAN_COMPILE_FLAGS \"${SAN_COMPILE_FLAGS} -fsanitize=thread\")\n    if(TSan_FOUND)\n      link_libraries(${TSan_LIBRARY})\n    endif()\n\n  elseif(${sanitizer} MATCHES \"leak\")\n    find_package(LSan)\n    set(SAN_COMPILE_FLAGS \"${SAN_COMPILE_FLAGS} -fsanitize=leak\")\n\n  elseif(${sanitizer} MATCHES \"undefined\")\n    find_package(UBSan)\n    set(SAN_COMPILE_FLAGS \"${SAN_COMPILE_FLAGS} -fsanitize=undefined -fno-sanitize-recover=undefined\")\n\n  else()\n    message(FATAL_ERROR \"Santizer ${sanitizer} not supported.\")\n  endif()\nendmacro()\n\nmacro(enable_sanitizers SANITIZERS)\n  # Check sanitizers compatibility.\n  # Idealy, we should use if(san IN_LIST SANITIZERS) ... endif()\n  # But I haven't figure out how to make it work.\n  foreach( _san ${SANITIZERS} )\n    string(TOLOWER ${_san} _san)\n    if(_san MATCHES \"thread\")\n      if(${_use_other_sanitizers})\n        message(FATAL_ERROR\n          \"thread sanitizer is not compatible with ${_san} sanitizer.\")\n      endif()\n      set(_use_thread_sanitizer 1)\n    else()\n      if(${_use_thread_sanitizer})\n        message(FATAL_ERROR\n          \"${_san} sanitizer is not compatible with thread sanitizer.\")\n      endif()\n      set(_use_other_sanitizers 1)\n    endif()\n  endforeach()\n\n  message(\"Sanitizers: ${SANITIZERS}\")\n\n  foreach( _san ${SANITIZERS} )\n    string(TOLOWER ${_san} _san)\n    enable_sanitizer(${_san})\n  endforeach()\n  message(\"Sanitizers compile flags: ${SAN_COMPILE_FLAGS}\")\n  set(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS} ${SAN_COMPILE_FLAGS}\")\n  set(CMAKE_C_FLAGS \"${CMAKE_C_FLAGS} ${SAN_COMPILE_FLAGS}\")\nendmacro()\n"
  },
  {
    "path": "cmake/Utils.cmake",
    "content": "# Automatically set source group based on folder\nfunction(auto_source_group SOURCES)\n\n  foreach(FILE ${SOURCES})\n      get_filename_component(PARENT_DIR \"${FILE}\" PATH)\n\n      # skip src or include and changes /'s to \\\\'s\n      string(REPLACE \"${CMAKE_CURRENT_LIST_DIR}\" \"\" GROUP \"${PARENT_DIR}\")\n      string(REPLACE \"/\" \"\\\\\\\\\" GROUP \"${GROUP}\")\n      string(REGEX REPLACE \"^\\\\\\\\\" \"\" GROUP \"${GROUP}\")\n\n      source_group(\"${GROUP}\" FILES \"${FILE}\")\n  endforeach()\nendfunction()\n\n# Set output directory of target, ignoring debug or release\nfunction(set_output_directory target dir)\n  set_target_properties(${target} PROPERTIES\n    RUNTIME_OUTPUT_DIRECTORY ${dir}\n    RUNTIME_OUTPUT_DIRECTORY_DEBUG ${dir}\n    RUNTIME_OUTPUT_DIRECTORY_RELEASE ${dir}\n    RUNTIME_OUTPUT_DIRECTORY_RELWITHDEBINFO ${dir}\n    RUNTIME_OUTPUT_DIRECTORY_MINSIZEREL ${dir}\n    LIBRARY_OUTPUT_DIRECTORY ${dir}\n    LIBRARY_OUTPUT_DIRECTORY_DEBUG ${dir}\n    LIBRARY_OUTPUT_DIRECTORY_RELEASE ${dir}\n    LIBRARY_OUTPUT_DIRECTORY_RELWITHDEBINFO ${dir}\n    LIBRARY_OUTPUT_DIRECTORY_MINSIZEREL ${dir}\n    ARCHIVE_OUTPUT_DIRECTORY ${dir}\n    ARCHIVE_OUTPUT_DIRECTORY_DEBUG ${dir}\n    ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${dir}\n    ARCHIVE_OUTPUT_DIRECTORY_RELWITHDEBINFO ${dir}\n    ARCHIVE_OUTPUT_DIRECTORY_MINSIZEREL ${dir})\nendfunction()\n\n# Set a default build type to release if none was specified\nfunction(set_default_configuration_release)\n    if(CMAKE_CONFIGURATION_TYPES STREQUAL \"Debug;Release;MinSizeRel;RelWithDebInfo\") # multiconfig generator?\n        set(CMAKE_CONFIGURATION_TYPES Release CACHE STRING \"\" FORCE)\n    elseif(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)\n      message(STATUS \"Setting build type to 'Release' as none was specified.\")\n      set(CMAKE_BUILD_TYPE Release CACHE STRING \"Choose the type of build.\" FORCE)\n    endif()\nendfunction()\n\nif(BUILD_WITH_GIT_HASH)\n  execute_process(COMMAND git rev-parse --short HEAD\n    WORKING_DIRECTORY ${xgboost_SOURCE_DIR}\n    OUTPUT_STRIP_TRAILING_WHITESPACE\n    OUTPUT_VARIABLE XGBOOST_GIT_HASH\n    ERROR_VARIABLE XGBOOST_GIT_ERROR\n    RESULT_VARIABLE GIT_COMMAND_RESULT)\n\n  if(NOT GIT_COMMAND_RESULT EQUAL 0)\n    message(FATAL_ERROR \"Failed to retrieve the git hash:\\n${XGBOOST_GIT_ERROR}\")\n  endif()\n  message(STATUS \"Git hash: ${XGBOOST_GIT_HASH}\")\nendif()\n\n# Generate CMAKE_CUDA_ARCHITECTURES form a list of architectures\n# Also generates PTX for the most recent architecture for forwards compatibility\nfunction(compute_cmake_cuda_archs archs)\n  if(CMAKE_CUDA_COMPILER_VERSION MATCHES \"^([0-9]+\\\\.[0-9]+)\")\n    set(CUDA_VERSION \"${CMAKE_MATCH_1}\")\n  endif()\n  list(SORT archs)\n  unset(CMAKE_CUDA_ARCHITECTURES CACHE)\n  set(CMAKE_CUDA_ARCHITECTURES ${archs})\n\n  # Set up defaults based on CUDA varsion\n  # Remember to update arch-specific tunings when supporting new archs.\n  if(NOT CMAKE_CUDA_ARCHITECTURES)\n    if(CUDA_VERSION VERSION_GREATER_EQUAL \"13.0\")\n      set(CMAKE_CUDA_ARCHITECTURES 75 80 90 100 120)\n    elseif(CUDA_VERSION VERSION_GREATER_EQUAL \"12.8\")\n      set(CMAKE_CUDA_ARCHITECTURES 50 60 70 80 90 100 120)\n    elseif(CUDA_VERSION VERSION_GREATER_EQUAL \"11.8\")\n      set(CMAKE_CUDA_ARCHITECTURES 50 60 70 80 90)\n    elseif(CUDA_VERSION VERSION_GREATER_EQUAL \"11.0\")\n      set(CMAKE_CUDA_ARCHITECTURES 50 60 70 80)\n    elseif(CUDA_VERSION VERSION_GREATER_EQUAL \"10.0\")\n      set(CMAKE_CUDA_ARCHITECTURES 35 50 60 70)\n    elseif(CUDA_VERSION VERSION_GREATER_EQUAL \"9.0\")\n      set(CMAKE_CUDA_ARCHITECTURES 35 50 60 70)\n    else()\n      set(CMAKE_CUDA_ARCHITECTURES 35 50 60)\n    endif()\n  endif()\n\n  list(TRANSFORM CMAKE_CUDA_ARCHITECTURES APPEND \"-real\")\n  list(TRANSFORM CMAKE_CUDA_ARCHITECTURES REPLACE \"([0-9]+)-real\" \"\\\\0;\\\\1-virtual\" AT -1)\n  set(CMAKE_CUDA_ARCHITECTURES \"${CMAKE_CUDA_ARCHITECTURES}\" PARENT_SCOPE)\n  message(STATUS \"CMAKE_CUDA_ARCHITECTURES: ${CMAKE_CUDA_ARCHITECTURES}\")\nendfunction()\n\n# Set CUDA related flags to target.  Must be used after code `format_gencode_flags`.\nfunction(xgboost_set_cuda_flags target)\n  target_compile_options(${target} PRIVATE\n    $<$<COMPILE_LANGUAGE:CUDA>:--expt-extended-lambda>\n    $<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>\n    $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=${OpenMP_CXX_FLAGS}>\n    $<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all>\n    $<$<COMPILE_LANGUAGE:CUDA>:--default-stream per-thread>\n  )\n\n  if(FORCE_COLORED_OUTPUT)\n    if(FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL \"Ninja\") AND\n        ((CMAKE_CXX_COMPILER_ID STREQUAL \"GNU\") OR\n          (CMAKE_CXX_COMPILER_ID STREQUAL \"Clang\")))\n      target_compile_options(${target} PRIVATE\n        $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-fdiagnostics-color=always>)\n    endif()\n  endif()\n\n  if(USE_DEVICE_DEBUG)\n    target_compile_options(${target} PRIVATE\n      $<$<AND:$<CONFIG:DEBUG>,$<COMPILE_LANGUAGE:CUDA>>:-G;-src-in-ptx>)\n  endif()\n\n  if(USE_NVTX)\n    target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_NVTX=1)\n    if(NOT USE_DEVICE_DEBUG)\n      target_compile_options(${target} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-lineinfo>)\n    endif()\n  endif()\n\n  # Use CCCL we find before CUDA Toolkit to make sure we get newer headers as intended\n  # The CUDA Toolkit includes its own copy of CCCL that often lags the latest releases\n  # (and would be picked up otherwise)\n  if(BUILD_STATIC_LIB)\n    # If the downstream user is statically linking with libxgboost, it needs to\n    # explicitly link with CCCL and CUDA runtime.\n    target_link_libraries(${target}\n      PUBLIC CCCL::CCCL CUDA::cudart_static)\n  else()\n    # If the downstream user is dynamically linking with libxgboost, it does not\n    # need to link with CCCL and CUDA runtime.\n    target_link_libraries(${target} PRIVATE CCCL::CCCL CUDA::cudart_static)\n  endif()\n  target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_CUDA=1)\n  target_include_directories(\n    ${target} PRIVATE\n    ${xgboost_SOURCE_DIR}/gputreeshap)\n\n  if(MSVC)\n    target_compile_options(${target} PRIVATE\n      $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=/utf-8>)\n  endif()\n\n  set_target_properties(${target} PROPERTIES\n    CUDA_STANDARD 17\n    CUDA_STANDARD_REQUIRED ON)\n  if(USE_CUDA_LTO)\n    set_target_properties(${target} PROPERTIES\n      INTERPROCEDURAL_OPTIMIZATION ON\n      CUDA_SEPARABLE_COMPILATION ON)\n  else()\n    set_target_properties(${target} PROPERTIES\n      CUDA_SEPARABLE_COMPILATION OFF)\n  endif()\nendfunction()\n\nfunction(xgboost_link_nccl target)\n  set(xgboost_nccl_flags -DXGBOOST_USE_NCCL=1)\n  if(USE_DLOPEN_NCCL)\n    list(APPEND xgboost_nccl_flags -DXGBOOST_USE_DLOPEN_NCCL=1)\n    target_link_libraries(${target} PRIVATE ${CMAKE_DL_LIBS})\n  endif()\n\n  if(BUILD_STATIC_LIB)\n    target_include_directories(${target} PUBLIC ${NCCL_INCLUDE_DIR})\n    target_compile_definitions(${target} PUBLIC ${xgboost_nccl_flags})\n    target_link_libraries(${target} PUBLIC ${NCCL_LIBRARY})\n  else()\n    target_include_directories(${target} PRIVATE ${NCCL_INCLUDE_DIR})\n    target_compile_definitions(${target} PRIVATE ${xgboost_nccl_flags})\n    if(NOT USE_DLOPEN_NCCL)\n      target_link_libraries(${target} PRIVATE ${NCCL_LIBRARY})\n    endif()\n  endif()\nendfunction()\n\n# compile options\nmacro(xgboost_target_properties target)\n  set_target_properties(${target} PROPERTIES\n    CXX_STANDARD 17\n    CXX_STANDARD_REQUIRED ON\n    POSITION_INDEPENDENT_CODE ON)\n\n  if(HIDE_CXX_SYMBOLS)\n    #-- Hide all C++ symbols\n    set_target_properties(${target} PROPERTIES\n      C_VISIBILITY_PRESET hidden\n      CXX_VISIBILITY_PRESET hidden\n      CUDA_VISIBILITY_PRESET hidden\n    )\n  endif()\n\n  if(ENABLE_ALL_WARNINGS)\n    target_compile_options(${target} PUBLIC\n      $<IF:$<COMPILE_LANGUAGE:CUDA>,\n      -Xcompiler=-Wall -Xcompiler=-Wextra -Xcompiler=-Wno-expansion-to-defined,\n      -Wall -Wextra -Wno-expansion-to-defined>\n    )\n  endif()\n\n  target_compile_options(${target}\n    PRIVATE\n    $<$<AND:$<CXX_COMPILER_ID:MSVC>,$<COMPILE_LANGUAGE:CXX>>:/MP>\n    $<$<AND:$<NOT:$<CXX_COMPILER_ID:MSVC>>,$<COMPILE_LANGUAGE:CXX>>:-funroll-loops>)\n\n  if(MSVC)\n    target_compile_options(${target} PRIVATE\n      $<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>\n      -D_CRT_SECURE_NO_WARNINGS\n      -D_CRT_SECURE_NO_DEPRECATE\n    )\n  endif()\n\n  if(WIN32 AND MINGW)\n    target_compile_options(${target} PUBLIC -static-libstdc++)\n  endif()\n\n  if(NOT WIN32 AND ENABLE_ALL_WARNINGS)\n    target_compile_options(${target} PRIVATE\n      $<$<COMPILE_LANGUAGE:CUDA>:-Werror=cross-execution-space-call>\n    )\n  endif()\nendmacro()\n\n# Custom definitions used in xgboost.\nmacro(xgboost_target_defs target)\n  if(NOT ${target} STREQUAL \"dmlc\") # skip dmlc core for custom logging.\n    target_compile_definitions(${target}\n      PRIVATE\n      -DDMLC_LOG_CUSTOMIZE=1\n      $<$<NOT:$<CXX_COMPILER_ID:MSVC>>:_MWAITXINTRIN_H_INCLUDED>)\n  endif()\n  if(USE_DEBUG_OUTPUT)\n    target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_DEBUG_OUTPUT=1)\n  endif()\n  if(XGBOOST_MM_PREFETCH_PRESENT)\n    target_compile_definitions(${target}\n      PRIVATE\n      -DXGBOOST_MM_PREFETCH_PRESENT=1)\n  endif()\n  if(XGBOOST_BUILTIN_PREFETCH_PRESENT)\n    target_compile_definitions(${target}\n      PRIVATE\n      -DXGBOOST_BUILTIN_PREFETCH_PRESENT=1)\n  endif()\n\n  if(PLUGIN_RMM)\n    target_compile_definitions(objxgboost PUBLIC -DXGBOOST_USE_RMM=1)\n  endif()\n\n  if(USE_NVCOMP)\n    target_compile_definitions(objxgboost PUBLIC -DXGBOOST_USE_NVCOMP=1)\n  endif()\n  if(BUILD_WITH_GIT_HASH)\n    target_compile_definitions(objxgboost PUBLIC -DXGBOOST_GIT_HASH=\"${XGBOOST_GIT_HASH}\")\n  endif()\nendmacro()\n\n# handles dependencies\nmacro(xgboost_target_link_libraries target)\n  if(NOT (CMAKE_SYSTEM_NAME STREQUAL \"Emscripten\"))\n    if(BUILD_STATIC_LIB)\n      target_link_libraries(${target} PUBLIC Threads::Threads ${CMAKE_THREAD_LIBS_INIT})\n    else()\n      target_link_libraries(${target} PRIVATE Threads::Threads ${CMAKE_THREAD_LIBS_INIT})\n    endif()\n  endif()\n\n  if(USE_OPENMP)\n    if(BUILD_STATIC_LIB)\n      target_link_libraries(${target} PUBLIC OpenMP::OpenMP_CXX)\n    else()\n      target_link_libraries(${target} PRIVATE OpenMP::OpenMP_CXX)\n    endif()\n  endif()\n\n  if(USE_CUDA)\n    xgboost_set_cuda_flags(${target})\n  endif()\n\n  if(PLUGIN_RMM)\n    target_link_libraries(${target} PRIVATE rmm::rmm)\n  endif()\n\n  if(USE_NVCOMP)\n    target_link_libraries(${target} PRIVATE nvcomp::nvcomp)\n  endif()\n\n  if(USE_NCCL)\n    xgboost_link_nccl(${target})\n  endif()\n\n  if(USE_NVTX)\n    target_link_libraries(${target} PRIVATE CUDA::nvtx3)\n  endif()\n\n  if(MINGW)\n    target_link_libraries(${target} PRIVATE wsock32 ws2_32)\n  endif()\nendmacro()\n"
  },
  {
    "path": "cmake/Version.cmake",
    "content": "function(write_version)\n  message(STATUS \"xgboost VERSION: ${xgboost_VERSION}\")\n  configure_file(\n    ${xgboost_SOURCE_DIR}/cmake/version_config.h.in\n    ${xgboost_SOURCE_DIR}/include/xgboost/version_config.h\n    @ONLY\n    NEWLINE_STYLE UNIX)\nendfunction()\n"
  },
  {
    "path": "cmake/modules/FindASan.cmake",
    "content": "set(ASan_LIB_NAME ASan)\n\nfind_library(ASan_LIBRARY\n  NAMES libasan.so libasan.so.6 libasan.so.5 libasan.so.4 libasan.so.3 libasan.so.2 libasan.so.1 libasan.so.0\n  PATHS ${SANITIZER_PATH} /usr/lib64 /usr/lib /usr/local/lib64 /usr/local/lib ${CMAKE_PREFIX_PATH}/lib)\n\ninclude(FindPackageHandleStandardArgs)\nfind_package_handle_standard_args(ASan DEFAULT_MSG\n  ASan_LIBRARY)\n\nmark_as_advanced(\n  ASan_LIBRARY\n  ASan_LIB_NAME)\n"
  },
  {
    "path": "cmake/modules/FindLSan.cmake",
    "content": "set(LSan_LIB_NAME lsan)\n\nfind_library(LSan_LIBRARY\n  NAMES liblsan.so liblsan.so.0 liblsan.so.0.0.0\n  PATHS ${SANITIZER_PATH} /usr/lib64 /usr/lib /usr/local/lib64 /usr/local/lib ${CMAKE_PREFIX_PATH}/lib)\n\ninclude(FindPackageHandleStandardArgs)\nfind_package_handle_standard_args(LSan DEFAULT_MSG\n  LSan_LIBRARY)\n\nmark_as_advanced(\n  LSan_LIBRARY\n  LSan_LIB_NAME)\n"
  },
  {
    "path": "cmake/modules/FindLibR.cmake",
    "content": "# CMake module for R\n# Borrows ideas from RStudio's FindLibR.cmake\n#\n# Defines the following:\n#  LIBR_FOUND\n#  LIBR_HOME\n#  LIBR_EXECUTABLE\n#  LIBR_INCLUDE_DIRS\n#  LIBR_LIB_DIR\n#  LIBR_CORE_LIBRARY\n# and a cmake function to create R.lib for MSVC\n#\n# The following could be provided by user through cmake's -D options:\n#  LIBR_EXECUTABLE (for unix and win)\n#  R_VERSION (for win)\n#  R_ARCH (for win 64 when want 32 bit build)\n#\n# TODO:\n# - someone to verify OSX detection,\n# - possibly, add OSX detection based on current R in PATH or LIBR_EXECUTABLE\n# - improve registry-based R_HOME detection in Windows (from a set of R_VERSION's)\n\n\n# Windows users might want to change this to their R version:\nif(NOT R_VERSION)\n  set(R_VERSION \"4.0.0\")\nendif()\nif(NOT R_ARCH)\n  if(\"${CMAKE_SIZEOF_VOID_P}\" STREQUAL \"4\")\n    set(R_ARCH \"i386\")\n  else()\n    set(R_ARCH \"x64\")\n  endif()\nendif()\n\n\n# Creates R.lib and R.def in the build directory for linking with MSVC\nfunction(create_rlib_for_msvc)\n  # various checks and warnings\n  if(NOT WIN32 OR (NOT MSVC AND NOT MINGW))\n    message(FATAL_ERROR \"create_rlib_for_msvc() can only be used with MSVC or MINGW\")\n  endif()\n  if(NOT EXISTS \"${LIBR_LIB_DIR}\")\n    message(FATAL_ERROR \"LIBR_LIB_DIR was not set!\")\n  endif()\n  find_program(DLLTOOL_EXE dlltool)\n  if(NOT DLLTOOL_EXE)\n    message(FATAL_ERROR \"\\ndlltool.exe not found!\\\n      \\nDo you have Rtools installed with its MinGW's bin/ in PATH?\")\n  endif()\n\n  # extract symbols from R.dll into R.def and R.lib import library\n  get_filename_component(\n    LIBR_RSCRIPT_EXECUTABLE_DIR\n    ${LIBR_EXECUTABLE}\n    DIRECTORY\n  )\n  set(LIBR_RSCRIPT_EXECUTABLE \"${LIBR_RSCRIPT_EXECUTABLE_DIR}/Rscript\")\n\n  execute_process(\n    COMMAND ${LIBR_RSCRIPT_EXECUTABLE}\n    \"${CMAKE_CURRENT_BINARY_DIR}/../../R-package/inst/make-r-def.R\"\n    \"${LIBR_LIB_DIR}/R.dll\" \"${CMAKE_CURRENT_BINARY_DIR}/R.def\"\n  )\n\n  execute_process(COMMAND ${DLLTOOL_EXE}\n    \"--input-def\" \"${CMAKE_CURRENT_BINARY_DIR}/R.def\"\n    \"--output-lib\" \"${CMAKE_CURRENT_BINARY_DIR}/R.lib\"\n    \"--temp-prefix\" \"Rlibtemp\"\n    COMMAND_ECHO STDOUT\n    COMMAND_ERROR_IS_FATAL ANY)\nendfunction()\n\n\n# detection for OSX\nif(APPLE)\n\n  find_library(LIBR_LIBRARIES R)\n\n  if(LIBR_LIBRARIES MATCHES \".*\\\\.framework\")\n    set(LIBR_HOME \"${LIBR_LIBRARIES}/Resources\" CACHE PATH \"R home directory\")\n    set(LIBR_INCLUDE_DIRS \"${LIBR_HOME}/include\" CACHE PATH \"R include directory\")\n    set(LIBR_EXECUTABLE \"${LIBR_HOME}/R\" CACHE PATH \"R executable\")\n    set(LIBR_LIB_DIR \"${LIBR_HOME}/lib\" CACHE PATH \"R lib directory\")\n  else()\n    get_filename_component(_LIBR_LIBRARIES \"${LIBR_LIBRARIES}\" REALPATH)\n    get_filename_component(_LIBR_LIBRARIES_DIR \"${_LIBR_LIBRARIES}\" DIRECTORY)\n    set(LIBR_EXECUTABLE \"${_LIBR_LIBRARIES_DIR}/../bin/R\")\n    execute_process(\n      COMMAND ${LIBR_EXECUTABLE} \"--slave\" \"--vanilla\" \"-e\" \"cat(R.home())\"\n      OUTPUT_VARIABLE LIBR_HOME)\n    set(LIBR_HOME ${LIBR_HOME} CACHE PATH \"R home directory\")\n    set(LIBR_INCLUDE_DIRS \"${LIBR_HOME}/include\" CACHE PATH \"R include directory\")\n    set(LIBR_LIB_DIR \"${LIBR_HOME}/lib\" CACHE PATH \"R lib directory\")\n  endif()\n\n# detection for UNIX & Win32\nelse()\n\n  # attempt to find R executable\n  if(NOT LIBR_EXECUTABLE)\n    find_program(LIBR_EXECUTABLE NAMES R R.exe)\n  endif()\n\n  if(UNIX)\n\n    if(NOT LIBR_EXECUTABLE)\n      message(FATAL_ERROR \"Unable to locate R executable.\\\n        \\nEither add its location to PATH or provide it through the LIBR_EXECUTABLE cmake variable\")\n    endif()\n\n    # ask R for the home path\n    execute_process(\n      COMMAND ${LIBR_EXECUTABLE} \"--slave\" \"--vanilla\" \"-e\" \"cat(R.home())\"\n      OUTPUT_VARIABLE LIBR_HOME\n    )\n    # ask R for the include dir\n    execute_process(\n      COMMAND ${LIBR_EXECUTABLE} \"--slave\" \"--vanilla\" \"-e\" \"cat(R.home('include'))\"\n      OUTPUT_VARIABLE LIBR_INCLUDE_DIRS\n    )\n    # ask R for the lib dir\n    execute_process(\n      COMMAND ${LIBR_EXECUTABLE} \"--slave\" \"--vanilla\" \"-e\" \"cat(R.home('lib'))\"\n      OUTPUT_VARIABLE LIBR_LIB_DIR\n    )\n\n  # Windows\n  else()\n    # ask R for R_HOME\n    if(LIBR_EXECUTABLE)\n      execute_process(\n        COMMAND ${LIBR_EXECUTABLE} \"--slave\" \"--no-save\" \"-e\" \"cat(normalizePath(R.home(),winslash='/'))\"\n        OUTPUT_VARIABLE LIBR_HOME)\n    endif()\n    # if R executable not available, query R_HOME path from registry\n    if(NOT LIBR_HOME)\n      get_filename_component(LIBR_HOME\n        \"[HKEY_LOCAL_MACHINE\\\\SOFTWARE\\\\R-core\\\\R\\\\${R_VERSION};InstallPath]\"\n        ABSOLUTE)\n      if(NOT LIBR_HOME)\n        message(FATAL_ERROR \"\\nUnable to locate R executable.\\\n          \\nEither add its location to PATH or provide it through the LIBR_EXECUTABLE cmake variable\")\n      endif()\n    endif()\n    # set exe location based on R_ARCH\n    if(NOT LIBR_EXECUTABLE)\n      set(LIBR_EXECUTABLE \"${LIBR_HOME}/bin/${R_ARCH}/R.exe\")\n    endif()\n    # set other R paths based on home path\n    set(LIBR_INCLUDE_DIRS \"${LIBR_HOME}/include\")\n    set(LIBR_LIB_DIR \"${LIBR_HOME}/bin/${R_ARCH}\")\n\nmessage(STATUS \"LIBR_HOME [${LIBR_HOME}]\")\nmessage(STATUS \"LIBR_EXECUTABLE [${LIBR_EXECUTABLE}]\")\nmessage(STATUS \"LIBR_INCLUDE_DIRS [${LIBR_INCLUDE_DIRS}]\")\nmessage(STATUS \"LIBR_LIB_DIR [${LIBR_LIB_DIR}]\")\nmessage(STATUS \"LIBR_CORE_LIBRARY [${LIBR_CORE_LIBRARY}]\")\n\n  endif()\n\nendif()\n\nif((WIN32 AND MSVC) OR (WIN32 AND MINGW))\n  # create a local R.lib import library for R.dll if it doesn't exist\n  if(NOT EXISTS \"${CMAKE_CURRENT_BINARY_DIR}/R.lib\")\n    create_rlib_for_msvc()\n  endif()\nendif()\n\n# look for the core R library\nfind_library(LIBR_CORE_LIBRARY NAMES R\n  HINTS \"${CMAKE_CURRENT_BINARY_DIR}\" \"${LIBR_LIB_DIR}\" \"${LIBR_HOME}/bin\" \"${LIBR_LIBRARIES}\")\nif(LIBR_CORE_LIBRARY-NOTFOUND)\n  message(STATUS \"Could not find R core shared library.\")\nendif()\n\nset(LIBR_HOME ${LIBR_HOME} CACHE PATH \"R home directory\")\nset(LIBR_EXECUTABLE ${LIBR_EXECUTABLE} CACHE PATH \"R executable\")\nset(LIBR_INCLUDE_DIRS ${LIBR_INCLUDE_DIRS} CACHE PATH \"R include directory\")\nset(LIBR_LIB_DIR ${LIBR_LIB_DIR} CACHE PATH \"R shared libraries directory\")\nset(LIBR_CORE_LIBRARY ${LIBR_CORE_LIBRARY} CACHE PATH \"R core shared library\")\n\n# define find requirements\ninclude(FindPackageHandleStandardArgs)\nfind_package_handle_standard_args(LibR DEFAULT_MSG\n  LIBR_HOME\n  LIBR_EXECUTABLE\n  LIBR_INCLUDE_DIRS\n  LIBR_LIB_DIR\n  LIBR_CORE_LIBRARY\n)\n\nif(LIBR_FOUND)\n  message(STATUS \"Found R: ${LIBR_EXECUTABLE}\")\nendif()\n"
  },
  {
    "path": "cmake/modules/FindNVML.cmake",
    "content": "if(NVML_LIBRARY)\n  unset(NVML_LIBRARY CACHE)\nendif()\n\nset(NVML_LIB_NAME nvml)\n\nfind_path(NVML_INCLUDE_DIR\n  NAMES nvml.h\n  PATHS ${CUDA_HOME}/include ${CUDA_INCLUDE} /usr/local/cuda/include)\n\nfind_library(NVML_LIBRARY\n  NAMES nvidia-ml)\n\nmessage(STATUS \"Using nvml library: ${NVML_LIBRARY}\")\n\ninclude(FindPackageHandleStandardArgs)\nfind_package_handle_standard_args(NVML DEFAULT_MSG\n                                  NVML_INCLUDE_DIR NVML_LIBRARY)\n\nmark_as_advanced(\n  NVML_INCLUDE_DIR\n  NVML_LIBRARY\n)\n"
  },
  {
    "path": "cmake/modules/FindNccl.cmake",
    "content": "#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n# Tries to find NCCL headers and libraries.\n#\n# Usage of this module as follows:\n#\n#  find_package(NCCL)\n#\n# Variables used by this module, they can change the default behaviour and need\n# to be set before calling find_package:\n#\n#  NCCL_ROOT - When set, this path is inspected instead of standard library\n#              locations as the root of the NCCL installation.\n#              The environment variable NCCL_ROOT overrides this variable.\n#\n# This module defines\n#  Nccl_FOUND, whether nccl has been found\n#  NCCL_INCLUDE_DIR, directory containing header\n#  NCCL_LIBRARY, directory containing nccl library\n#  NCCL_LIB_NAME, nccl library name\n#  USE_NCCL_LIB_PATH, when set, NCCL_LIBRARY path is also inspected for the\n#                     location of the nccl library. This would disable\n#                     switching between static and shared.\n#\n# This module assumes that the user has already called find_package(CUDA)\n\nif(NCCL_LIBRARY)\n  if(NOT USE_NCCL_LIB_PATH)\n    # Don't cache NCCL_LIBRARY to enable switching between static and shared.\n    unset(NCCL_LIBRARY CACHE)\n  endif()\nendif()\n\nif(BUILD_WITH_SHARED_NCCL)\n  # libnccl.so\n  set(NCCL_LIB_NAME nccl)\nelse()\n  # libnccl_static.a\n  set(NCCL_LIB_NAME nccl_static)\nendif()\n\nfind_path(NCCL_INCLUDE_DIR\n  NAMES nccl.h\n  HINTS  ${NCCL_ROOT}/include $ENV{NCCL_ROOT}/include)\n\nif(USE_DLOPEN_NCCL)\n  include(FindPackageHandleStandardArgs)\n  find_package_handle_standard_args(Nccl DEFAULT_MSG NCCL_INCLUDE_DIR)\n\n  mark_as_advanced(NCCL_INCLUDE_DIR)\nelse()\n  find_library(NCCL_LIBRARY\n    NAMES ${NCCL_LIB_NAME}\n    HINTS ${NCCL_ROOT}/lib $ENV{NCCL_ROOT}/lib/)\n\n  message(STATUS \"Using nccl library: ${NCCL_LIBRARY}\")\n\n  include(FindPackageHandleStandardArgs)\n  find_package_handle_standard_args(Nccl DEFAULT_MSG\n    NCCL_INCLUDE_DIR NCCL_LIBRARY)\n\n  mark_as_advanced(\n    NCCL_INCLUDE_DIR\n    NCCL_LIBRARY\n  )\nendif()\n"
  },
  {
    "path": "cmake/modules/FindTSan.cmake",
    "content": "set(TSan_LIB_NAME tsan)\n\nfind_library(TSan_LIBRARY\n  NAMES libtsan.so libtsan.so.0 libtsan.so.0.0.0\n  PATHS ${SANITIZER_PATH} /usr/lib64 /usr/lib /usr/local/lib64 /usr/local/lib ${CMAKE_PREFIX_PATH}/lib)\n\ninclude(FindPackageHandleStandardArgs)\nfind_package_handle_standard_args(TSan DEFAULT_MSG\n  TSan_LIBRARY)\n\nmark_as_advanced(\n  TSan_LIBRARY\n  TSan_LIB_NAME)\n"
  },
  {
    "path": "cmake/modules/FindUBSan.cmake",
    "content": "set(UBSan_LIB_NAME UBSan)\n\nfind_library(UBSan_LIBRARY\n  NAMES libubsan.so libubsan.so.5 libubsan.so.4 libubsan.so.3 libubsan.so.2 libubsan.so.1 libubsan.so.0\n  PATHS ${SANITIZER_PATH} /usr/lib64 /usr/lib /usr/local/lib64 /usr/local/lib ${CMAKE_PREFIX_PATH}/lib)\n\ninclude(FindPackageHandleStandardArgs)\nfind_package_handle_standard_args(UBSan DEFAULT_MSG\n  UBSan_LIBRARY)\n\nmark_as_advanced(\n  UBSan_LIBRARY\n  UBSan_LIB_NAME)\n"
  },
  {
    "path": "cmake/version_config.h.in",
    "content": "/**\n * Copyright 2019-2026, XGBoost contributors\n */\n#ifndef XGBOOST_VERSION_CONFIG_H_\n#define XGBOOST_VERSION_CONFIG_H_\n\n#define XGBOOST_VER_MAJOR @xgboost_VERSION_MAJOR@ /* NOLINT */\n#define XGBOOST_VER_MINOR @xgboost_VERSION_MINOR@ /* NOLINT */\n#define XGBOOST_VER_PATCH @xgboost_VERSION_PATCH@ /* NOLINT */\n\n#endif  // XGBOOST_VERSION_CONFIG_H_\n"
  },
  {
    "path": "cmake/xgboost-config.cmake.in",
    "content": "@PACKAGE_INIT@\n\nset(USE_OPENMP @USE_OPENMP@)\nset(USE_CUDA @USE_CUDA@)\nset(USE_NCCL @USE_NCCL@)\nset(XGBOOST_BUILD_STATIC_LIB @BUILD_STATIC_LIB@)\n\ninclude(CMakeFindDependencyMacro)\n\nif (XGBOOST_BUILD_STATIC_LIB)\n  find_dependency(Threads)\n  if(USE_OPENMP)\n    find_dependency(OpenMP)\n  endif()\n  if(USE_CUDA)\n    find_dependency(CUDA)\n  endif()\n  # nccl should be linked statically if xgboost is built as static library.\nendif (XGBOOST_BUILD_STATIC_LIB)\n\nif(NOT TARGET xgboost::xgboost)\n  include(${CMAKE_CURRENT_LIST_DIR}/XGBoostTargets.cmake)\nendif()\n\nmessage(STATUS \"Found XGBoost (found version \\\"${xgboost_VERSION}\\\")\")\n"
  },
  {
    "path": "cmake/xgboost.pc.in",
    "content": "prefix=@CMAKE_INSTALL_PREFIX@\nversion=@xgboost_VERSION@\nexec_prefix=${prefix}/bin\nlibdir=${prefix}/lib\nincludedir=${prefix}/include\n\nName: xgboost\nDescription: XGBoost - Scalable and Flexible Gradient Boosting.\nVersion: ${version}\n\nCflags: -I${includedir}\nLibs: -L${libdir} -lxgboost\n"
  },
  {
    "path": "demo/.gitignore",
    "content": "*.libsvm\n*.pkl\n"
  },
  {
    "path": "demo/README.md",
    "content": "Awesome XGBoost\n===============\nThis page contains a curated list of examples, tutorials, blogs about XGBoost usecases.\nIt is inspired by [awesome-MXNet](https://github.com/dmlc/mxnet/blob/master/example/README.md),\n[awesome-php](https://github.com/ziadoz/awesome-php) and [awesome-machine-learning](https://github.com/josephmisiti/awesome-machine-learning).\n\nPlease send a pull request if you find things that belongs to here.\n\nContents\n--------\n- [Code Examples](#code-examples)\n  - [Features Walkthrough](#features-walkthrough)\n  - [Benchmarks](#benchmarks)\n- [Machine Learning Challenge Winning Solutions](#machine-learning-challenge-winning-solutions)\n- [Tutorials](#tutorials)\n- [Usecases](#usecases)\n- [Tools using XGBoost](#tools-using-xgboost)\n- [Integrations with 3rd party software](#integrations-with-3rd-party-software)\n- [Awards](#awards)\n- [Windows Binaries](#windows-binaries)\n\nCode Examples\n-------------\n\n### Features Walkthrough\n\n_Note: for the R package, see the in-package examples and vignettes instead_\n\n_Note: For the Python package, see [Feature Walk through](https://xgboost.readthedocs.io/en/stable/python/examples/index.html)._\n\nThis is a list of short codes introducing different functionalities of xgboost packages.\n\n* Basic walkthrough of packages\n  [python](guide-python/basic_walkthrough.py)\n  [Julia](https://github.com/antinucleon/XGBoost.jl/blob/master/demo/basic_walkthrough.jl)\n* Customize loss function, and evaluation metric\n  [python](guide-python/custom_objective.py)\n  [Julia](https://github.com/antinucleon/XGBoost.jl/blob/master/demo/custom_objective.jl)\n* Boosting from existing prediction\n  [python](guide-python/boost_from_prediction.py)\n  [Julia](https://github.com/antinucleon/XGBoost.jl/blob/master/demo/boost_from_prediction.jl)\n* Predicting using first n trees\n  [python](guide-python/predict_first_ntree.py)\n  [Julia](https://github.com/antinucleon/XGBoost.jl/blob/master/demo/predict_first_ntree.jl)\n* Generalized Linear Model\n  [python](guide-python/generalized_linear_model.py)\n  [Julia](https://github.com/antinucleon/XGBoost.jl/blob/master/demo/generalized_linear_model.jl)\n* Cross validation\n  [python](guide-python/cross_validation.py)\n  [Julia](https://github.com/antinucleon/XGBoost.jl/blob/master/demo/cross_validation.jl)\n* Predicting leaf indices\n  [python](guide-python/predict_leaf_indices.py)\n\n### Benchmarks\n\n- [Starter script for Kaggle Higgs Boson](kaggle-higgs)\n- [Kaggle Tradeshift winning solution by daxiongshu](https://github.com/daxiongshu/kaggle-tradeshift-winning-solution)\n- [Benchmarking the most commonly used open source tools for binary classification](https://github.com/szilard/benchm-ml#boosting-gradient-boosted-treesgradient-boosting-machines)\n\n\n## Machine Learning Challenge Winning Solutions\n\nXGBoost is extensively used by machine learning practitioners to create state of art data science solutions,\nthis is a list of machine learning winning solutions with XGBoost.\nPlease send pull requests if you find ones that are missing here.\n\n- Gábor Melis, 1st place winner of [Kaggle Higgs competition](https://github.com/ghl3/higgs-kaggle) conducted between May and September 2014. Link to [discussion](http://no2147483647.wordpress.com/2014/09/17/winning-solution-of-kaggle-higgs-competition-what-a-single-model-can-do/), [code](https://github.com/phunterlau/kaggle_higgs) and [news article](https://atlas.cern/updates/news/machine-learning-wins-higgs-challenge)\n- Bishwarup Bhattacharjee, 1st place winner of [Allstate Claims Severity](https://www.kaggle.com/competitions/allstate-claims-severity/overview) conducted on December 2016. Link to [discussion](https://www.kaggle.com/competitions/allstate-claims-severity/discussion/26416)\n- Benedikt Schifferer, Gilberto Titericz, Chris Deotte, Christof Henkel, Kazuki Onodera, Jiwei Liu, Bojan Tunguz, Even Oldridge, Gabriel De Souza Pereira Moreira and Ahmet Erdem, 1st place winner of [Twitter RecSys Challenge 2020](https://recsys-twitter.com/) conducted from June,20-August,20. [GPU Accelerated Feature Engineering and Training for Recommender Systems](https://medium.com/rapids-ai/winning-solution-of-recsys2020-challenge-gpu-accelerated-feature-engineering-and-training-for-cd67c5a87b1f)\n- Eugene Khvedchenya,Jessica Fridrich, Jan Butora, Yassine Yousfi 1st place winner in [ALASKA2 Image Steganalysis](https://www.kaggle.com/c/alaska2-image-steganalysis/overview). Link to [discussion](https://www.kaggle.com/c/alaska2-image-steganalysis/discussion/168546)\n- Dan Ofer, Seffi Cohen, Noa Dagan, Nurit, 1st place in WiDS Datathon 2020. Link to [discussion](https://www.kaggle.com/c/widsdatathon2020/discussion/133189)\n- Chris Deotte, Konstantin Yakovlev 1st place in [IEEE-CIS Fraud Detection](https://www.kaggle.com/c/ieee-fraud-detection/overview). Link to [discussion](https://www.kaggle.com/c/ieee-fraud-detection/discussion/111308)\n- Giba, Lucasz, 1st place winner in [Santander Value Prediction Challenge](https://www.kaggle.com/c/santander-value-prediction-challenge) organized on August,2018. Solution [discussion](https://www.kaggle.com/c/santander-value-prediction-challenge/discussion/65272) and [code](https://www.kaggle.com/titericz/winner-model-giba-single-xgb-lb0-5178/comments)\n- Beluga, 2nd place and Evgeny Nekrasov, 3rd place winner in Statoil/C-CORE Iceberg Classifier Challenge'2018. Link to [discussion](https://www.kaggle.com/c/statoil-iceberg-classifier-challenge/discussion/48294)\n- Radek Osmulski, 1st place of the [iMaterialist Challenge (Fashion) at FGVC5](https://www.kaggle.com/c/imaterialist-challenge-fashion-2018/overview). Link to [the winning solution](https://www.kaggle.com/c/imaterialist-challenge-fashion-2018/discussion/57944).\n- Maksims Volkovs, Guangwei Yu and Tomi Poutanen, 1st place of the [2017 ACM RecSys challenge](http://2017.recsyschallenge.com/). Link to [paper](http://www.cs.toronto.edu/~mvolkovs/recsys2017_challenge.pdf).\n- Vlad Sandulescu, Mihai Chiru, 1st place of the [KDD Cup 2016 competition](https://kddcup2016.azurewebsites.net). Link to [the arxiv paper](http://arxiv.org/abs/1609.02728).\n- Marios Michailidis, Mathias Müller and HJ van Veen, 1st place of the [Dato Truely Native? competition](https://www.kaggle.com/c/dato-native). Link to [the Kaggle interview](http://blog.kaggle.com/2015/12/03/dato-winners-interview-1st-place-mad-professors/).\n- Vlad Mironov, Alexander Guschin, 1st place of the [CERN LHCb experiment Flavour of Physics competition](https://www.kaggle.com/c/flavours-of-physics). Link to [the Kaggle interview](http://blog.kaggle.com/2015/11/30/flavour-of-physics-technical-write-up-1st-place-go-polar-bears/).\n- Josef Slavicek, 3rd place of the [CERN LHCb experiment Flavour of Physics competition](https://www.kaggle.com/c/flavours-of-physics). Link to [the Kaggle interview](http://blog.kaggle.com/2015/11/23/flavour-of-physics-winners-interview-3rd-place-josef-slavicek/).\n- Mario Filho, Josef Feigl, Lucas, Gilberto, 1st place of the [Caterpillar Tube Pricing competition](https://www.kaggle.com/c/caterpillar-tube-pricing). Link to [the Kaggle interview](http://blog.kaggle.com/2015/09/22/caterpillar-winners-interview-1st-place-gilberto-josef-leustagos-mario/).\n- Qingchen Wang, 1st place of the [Liberty Mutual Property Inspection](https://www.kaggle.com/c/liberty-mutual-group-property-inspection-prediction). Link to [the Kaggle interview](http://blog.kaggle.com/2015/09/28/liberty-mutual-property-inspection-winners-interview-qingchen-wang/).\n- Chenglong Chen, 1st place of the [Crowdflower Search Results Relevance](https://www.kaggle.com/c/crowdflower-search-relevance). Link to [the winning solution](https://www.kaggle.com/c/crowdflower-search-relevance/forums/t/15186/1st-place-winner-solution-chenglong-chen/).\n- Alexandre Barachant (“Cat”) and Rafał Cycoń (“Dog”), 1st place of the [Grasp-and-Lift EEG Detection](https://www.kaggle.com/c/grasp-and-lift-eeg-detection). Link to [the Kaggle interview](http://blog.kaggle.com/2015/10/12/grasp-and-lift-eeg-winners-interview-1st-place-cat-dog/).\n- Halla Yang, 2nd place of the [Recruit Coupon Purchase Prediction Challenge](https://www.kaggle.com/c/coupon-purchase-prediction). Link to [the Kaggle interview](http://blog.kaggle.com/2015/10/21/recruit-coupon-purchase-winners-interview-2nd-place-halla-yang/).\n- Owen Zhang, 1st place of the [Avito Context Ad Clicks competition](https://www.kaggle.com/c/avito-context-ad-clicks). Link to [the Kaggle interview](http://blog.kaggle.com/2015/08/26/avito-winners-interview-1st-place-owen-zhang/).\n- Keiichi Kuroyanagi, 2nd place of the [Airbnb New User Bookings](https://www.kaggle.com/c/airbnb-recruiting-new-user-bookings). Link to [the Kaggle interview](http://blog.kaggle.com/2016/03/17/airbnb-new-user-bookings-winners-interview-2nd-place-keiichi-kuroyanagi-keiku/).\n- Marios Michailidis, Mathias Müller and Ning Situ, 1st place [Homesite Quote Conversion](https://www.kaggle.com/c/homesite-quote-conversion). Link to [the Kaggle interview](http://blog.kaggle.com/2016/04/08/homesite-quote-conversion-winners-write-up-1st-place-kazanova-faron-clobber/).\n- Gilberto Titericz, Stanislav Semenov, 1st place in challenge to classify products into the correct category organized by Otto Group in 2015. Link to [challenge](https://www.kaggle.com/c/otto-group-product-classification-challenge). Link to [kaggle winning solution](https://www.kaggle.com/c/otto-group-product-classification-challenge/discussion/14335)\n- Darius Barušauskas, 1st place winner in [Predicting Red Hat Business Value](https://www.kaggle.com/c/predicting-red-hat-business-value). Link to [interview](https://medium.com/kaggle-blog/red-hat-business-value-competition-1st-place-winners-interview-darius-baru%C5%A1auskas-646692a2841b). Link to [discussion](https://www.kaggle.com/c/predicting-red-hat-business-value/discussion/23786)\n- David Austin, Weimin Wang, 1st place winner in [Iceberg-classifier-challenge](https://www.kaggle.com/c/statoil-iceberg-classifier-challenge/leaderboard) Link to [discussion](https://www.kaggle.com/c/statoil-iceberg-classifier-challenge/discussion/48241)\n- Kazuki Onodera, Kazuki Fujikawa, 2nd place winner in [OpenVaccine: COVID-19 mRNA Vaccine Degradation Prediction](https://www.kaggle.com/c/stanford-covid-vaccine/overview) Link to [Discussion](https://www.kaggle.com/c/stanford-covid-vaccine/discussion/189709)\n- Prarthana Bhat, 2nd place winner in [DYD Competition](https://datahack.analyticsvidhya.com/contest/date-your-data/). Link to [Solution](https://github.com/analyticsvidhya/DateYourData/blob/master/Prathna_Bhat_Model.R).\n- Benedikt Schifferer, Chris Deotte, Gilberto Titericz, Bo Liu, 1st place winner of [ACM RecSys Challenge 2021](https://recsys.acm.org/recsys21/challenge/). An ensemble of 5 XGBoost models and 3 neural networks. Link to [writeup](https://medium.com/nvidia-merlin/winning-the-recsys2021-challenge-by-a-diverse-set-of-xgboost-and-neural-network-models-4c5422a642d8) and [paper](https://dl.acm.org/doi/10.1145/3487572.3487605).\n- Chris Deotte, 1st place winner of [Kaggle Playground Series S5E2 - Backpack Prediction Challenge](https://www.kaggle.com/competitions/playground-series-s5e2) (Feb 2025). A single XGBoost model with 500 engineered features. Link to [discussion](https://www.kaggle.com/competitions/playground-series-s5e2/discussion/565539).\n- Chris Deotte, 1st place winner of [Kaggle Playground Series S5E4 - Predict Podcast Listening Time](https://www.kaggle.com/competitions/playground-series-s5e4) (Apr 2025). A three-level ensemble that includes XGBoost/GBDT models. Link to [writeup](https://www.kaggle.com/competitions/playground-series-s5e4/writeups/chris-deotte-1st-place-rapids-cuml-stack-3-levels) and [technical blog](https://developer.nvidia.com/blog/grandmaster-pro-tip-winning-first-place-in-a-kaggle-competition-with-stacking-using-cuml/).\n- Mohammad Odeh, 1st place winner of [March Machine Learning Mania 2025](https://www.kaggle.com/competitions/march-machine-learning-mania-2025) ($50K prize). XGBoost outperformed CatBoost and LightGBM. Link to [writeup](https://www.kaggle.com/competitions/march-machine-learning-mania-2025/writeups/mohammad-odeh-first-place-solution).\n\n## Talks\n- XGBoost: A Scalable Tree Boosting System ([video] (https://www.youtube.com/watch?v=Vly8xGnNiWs) + [slides](https://speakerdeck.com/datasciencela/tianqi-chen-xgboost-overview-and-latest-news-la-meetup-talk)) by Tianqi Chen at the Los Angeles Data Science meetup\n\n## Tutorials\n\n- [XGBoost Training with Dask, using Saturn Cloud](https://www.saturncloud.io/docs/tutorials/xgboost/)\n- [Machine Learning with XGBoost on Qubole Spark Cluster](https://www.qubole.com/blog/machine-learning-xgboost-qubole-spark-cluster/)\n- [XGBoost Official RMarkdown Tutorials](https://xgboost.readthedocs.org/en/latest/R-package/index.html#tutorials)\n- [An Introduction to XGBoost R Package](http://dmlc.ml/rstats/2016/03/10/xgboost.html) by Tong He\n- [Open Source Tools & Data Science Competitions](http://www.slideshare.net/odsc/owen-zhangopen-sourcetoolsanddscompetitions1) by Owen Zhang - XGBoost parameter tuning tips\n* [Feature Importance Analysis with XGBoost in Tax audit](http://fr.slideshare.net/MichaelBENESTY/feature-importance-analysis-with-xgboost-in-tax-audit)\n* [Winning solution of Kaggle Higgs competition: what a single model can do](http://no2147483647.wordpress.com/2014/09/17/winning-solution-of-kaggle-higgs-competition-what-a-single-model-can-do/)\n- [XGBoost - eXtreme Gradient Boosting](http://www.slideshare.net/ShangxuanZhang/xgboost) by Tong He\n- [How to use XGBoost algorithm in R in easy steps](http://www.analyticsvidhya.com/blog/2016/01/xgboost-algorithm-easy-steps/) by TAVISH SRIVASTAVA ([Chinese Translation 中文翻译](https://segmentfault.com/a/1190000004421821) by [HarryZhu](https://segmentfault.com/u/harryprince))\n- [Kaggle Solution: What’s Cooking ? (Text Mining Competition)](http://www.analyticsvidhya.com/blog/2015/12/kaggle-solution-cooking-text-mining-competition/) by MANISH SARASWAT\n- Better Optimization with Repeated Cross Validation and the XGBoost model - Machine Learning with R) by Manuel Amunategui ([Youtube Link](https://www.youtube.com/watch?v=Og7CGAfSr_Y)) ([GitHub Link](https://github.com/amunategui/BetterCrossValidation))\n- [XGBoost Rossman Parameter Tuning](https://www.kaggle.com/khozzy/rossmann-store-sales/xgboost-parameter-tuning-template/run/90168/notebook) by [Norbert Kozlowski](https://www.kaggle.com/khozzy)\n- [Featurizing log data before XGBoost](http://www.slideshare.net/DataRobot/featurizing-log-data-before-xgboost) by Xavier Conort, Owen Zhang etc\n- [West Nile Virus Competition Benchmarks & Tutorials](http://blog.kaggle.com/2015/07/21/west-nile-virus-competition-benchmarks-tutorials/) by [Anna Montoya](http://blog.kaggle.com/author/annamontoya/)\n- [Ensemble Decision Tree with XGBoost](https://www.kaggle.com/binghsu/predict-west-nile-virus/xgboost-starter-code-python-0-69) by [Bing Xu](https://www.kaggle.com/binghsu)\n- [Notes on eXtreme Gradient Boosting](http://startup.ml/blog/xgboost) by ARSHAK NAVRUZYAN ([iPython Notebook](https://github.com/startupml/koan/blob/master/eXtreme%20Gradient%20Boosting.ipynb))\n- [Complete Guide to Parameter Tuning in XGBoost](http://www.analyticsvidhya.com/blog/2016/03/complete-guide-parameter-tuning-xgboost-with-codes-python/) by Aarshay Jain\n- [Practical XGBoost in Python online course](http://education.parrotprediction.teachable.com/courses/practical-xgboost-in-python) by Parrot Prediction\n- [Spark and XGBoost using Scala](http://www.elenacuoco.com/2016/10/10/scala-spark-xgboost-classification/) by Elena Cuoco\n\n## Usecases\nIf you have particular usecase of xgboost that you would like to highlight.\nSend a PR to add a one sentence description:)\n\n- XGBoost is used in [Kaggle Script](https://www.kaggle.com/scripts) to solve data science challenges.\n- Distribute XGBoost as Rest API server from Jupyter notebook with [BentoML](https://github.com/bentoml/bentoml). [Link to notebook](https://github.com/bentoml/BentoML/blob/master/examples/xgboost-predict-titanic-survival/XGBoost-titanic-survival-prediction.ipynb)\n- [Seldon predictive service powered by XGBoost](https://docs.seldon.io/projects/seldon-core/en/latest/servers/xgboost.html)\n- XGBoost Distributed is used in [ODPS Cloud Service by Alibaba](https://yq.aliyun.com/articles/6355) (in Chinese)\n- XGBoost is incoporated as part of [Graphlab Create](https://dato.com/products/create/) for scalable machine learning.\n- [Hanjing Su](https://www.52cs.org) from Tencent data platform team: \"We use distributed XGBoost for click through prediction in wechat shopping and lookalikes. The problems involve hundreds millions of users and thousands of features. XGBoost is cleanly designed and can be easily integrated into our production environment, reducing our cost in developments.\"\n- [CNevd](https://github.com/CNevd) from autohome.com ad platform team: \"Distributed XGBoost is used for click through rate prediction in our display advertising, XGBoost is highly efficient and flexible and can be easily used on our distributed platform, our ctr made a great improvement with hundred millions samples and millions features due to this awesome XGBoost\"\n\n## Tools using XGBoost\n\n- [BayesBoost](https://github.com/mpearmain/BayesBoost) - Bayesian Optimization using xgboost and sklearn API\n- [FLAML](https://github.com/microsoft/FLAML) - An open source AutoML library\ndesigned to automatically produce accurate machine learning models with low computational cost. FLAML includes [XGBoost as one of the default learners](https://github.com/microsoft/FLAML/blob/main/flaml/model.py) and can also be used as a fast hyperparameter tuning tool for XGBoost ([code example](https://microsoft.github.io/FLAML/docs/Examples/AutoML-for-XGBoost)).\n- [gp_xgboost_gridsearch](https://github.com/vatsan/gp_xgboost_gridsearch) - In-database parallel grid-search for XGBoost on [Greenplum](https://github.com/greenplum-db/gpdb) using PL/Python\n- [tpot](https://github.com/rhiever/tpot) - A Python tool that automatically creates and optimizes machine learning pipelines using genetic programming.\n\n## Integrations with 3rd party software\nOpen source integrations with XGBoost:\n* [Neptune.ai](http://neptune.ai/) - Experiment management and collaboration tool for ML/DL/RL specialists. Integration has a form of the [XGBoost callback](https://docs.neptune.ai/integrations/xgboost.html) that automatically logs training and evaluation metrics, as well as saved model (booster), feature importance chart and visualized trees.\n* [Optuna](https://optuna.org/) - An open source hyperparameter optimization framework to automate hyperparameter search. Optuna integrates with XGBoost in the [XGBoostPruningCallback](https://optuna.readthedocs.io/en/stable/reference/integration.html#optuna.integration.XGBoostPruningCallback) that let users easily prune unpromising trials.\n* [dtreeviz](https://github.com/parrt/dtreeviz) - A python library for decision tree visualization and model interpretation. Starting from version 1.0, dtreeviz is able to visualize tree ensembles produced by XGBoost.\n\n## Awards\n- [John Chambers Award](http://stat-computing.org/awards/jmc/winners.html) - 2016 Winner: XGBoost R Package, by Tong He (Simon Fraser University) and Tianqi Chen (University of Washington)\n- [InfoWorld’s 2019 Technology of the Year Award](https://www.infoworld.com/article/3336072/application-development/infoworlds-2019-technology-of-the-year-award-winners.html)\n\n## Windows Binaries\nUnofficial windows binaries and instructions on how to use them are hosted on [Guido Tapia's blog](http://www.picnet.com.au/blogs/guido/post/2016/09/22/xgboost-windows-x64-binaries-for-download/)\n"
  },
  {
    "path": "demo/aft_survival/README.rst",
    "content": "Survival Analysis Walkthrough\n=============================\n\nThis is a collection of examples for using the XGBoost Python package for training\nsurvival models. For an introduction, see :doc:`/tutorials/aft_survival_analysis`\n"
  },
  {
    "path": "demo/aft_survival/aft_survival_demo.py",
    "content": "\"\"\"\nDemo for survival analysis (regression).\n========================================\n\nDemo for survival analysis (regression). using Accelerated Failure Time (AFT) model.\n\"\"\"\n\nimport os\n\nimport numpy as np\nimport pandas as pd\nfrom sklearn.model_selection import ShuffleSplit\n\nimport xgboost as xgb\n\n# The Veterans' Administration Lung Cancer Trial\n# The Statistical Analysis of Failure Time Data by Kalbfleisch J. and Prentice R (1980)\nCURRENT_DIR = os.path.dirname(__file__)\ndf = pd.read_csv(os.path.join(CURRENT_DIR, '../data/veterans_lung_cancer.csv'))\nprint('Training data:')\nprint(df)\n\n# Split features and labels\ny_lower_bound = df['Survival_label_lower_bound']\ny_upper_bound = df['Survival_label_upper_bound']\nX = df.drop(['Survival_label_lower_bound', 'Survival_label_upper_bound'], axis=1)\n\n# Split data into training and validation sets\nrs = ShuffleSplit(n_splits=2, test_size=.7, random_state=0)\ntrain_index, valid_index = next(rs.split(X))\ndtrain = xgb.DMatrix(X.values[train_index, :])\ndtrain.set_float_info('label_lower_bound', y_lower_bound[train_index])\ndtrain.set_float_info('label_upper_bound', y_upper_bound[train_index])\ndvalid = xgb.DMatrix(X.values[valid_index, :])\ndvalid.set_float_info('label_lower_bound', y_lower_bound[valid_index])\ndvalid.set_float_info('label_upper_bound', y_upper_bound[valid_index])\n\n# Train gradient boosted trees using AFT loss and metric\nparams = {'verbosity': 0,\n          'objective': 'survival:aft',\n          'eval_metric': 'aft-nloglik',\n          'tree_method': 'hist',\n          'learning_rate': 0.05,\n          'aft_loss_distribution': 'normal',\n          'aft_loss_distribution_scale': 1.20,\n          'max_depth': 6,\n          'lambda': 0.01,\n          'alpha': 0.02}\nbst = xgb.train(params, dtrain, num_boost_round=10000,\n                evals=[(dtrain, 'train'), (dvalid, 'valid')],\n                early_stopping_rounds=50)\n\n# Run prediction on the validation set\ndf = pd.DataFrame({'Label (lower bound)': y_lower_bound[valid_index],\n                   'Label (upper bound)': y_upper_bound[valid_index],\n                   'Predicted label': bst.predict(dvalid)})\nprint(df)\n# Show only data points with right-censored labels\nprint(df[np.isinf(df['Label (upper bound)'])])\n\n# Save trained model\nbst.save_model('aft_model.json')\n"
  },
  {
    "path": "demo/aft_survival/aft_survival_demo_with_optuna.py",
    "content": "\"\"\"\nDemo for survival analysis (regression) with Optuna.\n====================================================\n\nDemo for survival analysis (regression) using Accelerated Failure Time (AFT) model,\nusing Optuna to tune hyperparameters\n\n\"\"\"\nimport numpy as np\nimport optuna\nimport pandas as pd\nfrom sklearn.model_selection import ShuffleSplit\n\nimport xgboost as xgb\n\n# The Veterans' Administration Lung Cancer Trial\n# The Statistical Analysis of Failure Time Data by Kalbfleisch J. and Prentice R (1980)\ndf = pd.read_csv('../data/veterans_lung_cancer.csv')\nprint('Training data:')\nprint(df)\n\n# Split features and labels\ny_lower_bound = df['Survival_label_lower_bound']\ny_upper_bound = df['Survival_label_upper_bound']\nX = df.drop(['Survival_label_lower_bound', 'Survival_label_upper_bound'], axis=1)\n\n# Split data into training and validation sets\nrs = ShuffleSplit(n_splits=2, test_size=.7, random_state=0)\ntrain_index, valid_index = next(rs.split(X))\ndtrain = xgb.DMatrix(X.values[train_index, :])\ndtrain.set_float_info('label_lower_bound', y_lower_bound[train_index])\ndtrain.set_float_info('label_upper_bound', y_upper_bound[train_index])\ndvalid = xgb.DMatrix(X.values[valid_index, :])\ndvalid.set_float_info('label_lower_bound', y_lower_bound[valid_index])\ndvalid.set_float_info('label_upper_bound', y_upper_bound[valid_index])\n\n# Define hyperparameter search space\nbase_params = {'verbosity': 0,\n              'objective': 'survival:aft',\n              'eval_metric': 'aft-nloglik',\n              'tree_method': 'hist'}  # Hyperparameters common to all trials\ndef objective(trial):\n    params = {'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 1.0),\n              'aft_loss_distribution': trial.suggest_categorical('aft_loss_distribution',\n                                                                  ['normal', 'logistic', 'extreme']),\n              'aft_loss_distribution_scale': trial.suggest_loguniform('aft_loss_distribution_scale', 0.1, 10.0),\n              'max_depth': trial.suggest_int('max_depth', 3, 8),\n              'lambda': trial.suggest_loguniform('lambda', 1e-8, 1.0),\n              'alpha': trial.suggest_loguniform('alpha', 1e-8, 1.0)}  # Search space\n    params.update(base_params)\n    pruning_callback = optuna.integration.XGBoostPruningCallback(trial, 'valid-aft-nloglik')\n    bst = xgb.train(params, dtrain, num_boost_round=10000,\n                    evals=[(dtrain, 'train'), (dvalid, 'valid')],\n                    early_stopping_rounds=50, verbose_eval=False, callbacks=[pruning_callback])\n    if bst.best_iteration >= 25:\n        return bst.best_score\n    else:\n        return np.inf  # Reject models with < 25 trees\n\n# Run hyperparameter search\nstudy = optuna.create_study(direction='minimize')\nstudy.optimize(objective, n_trials=200)\nprint('Completed hyperparameter tuning with best aft-nloglik = {}.'.format(study.best_trial.value))\nparams = {}\nparams.update(base_params)\nparams.update(study.best_trial.params)\n\n# Re-run training with the best hyperparameter combination\nprint('Re-running the best trial... params = {}'.format(params))\nbst = xgb.train(params, dtrain, num_boost_round=10000,\n                evals=[(dtrain, 'train'), (dvalid, 'valid')],\n                early_stopping_rounds=50)\n\n# Run prediction on the validation set\ndf = pd.DataFrame({'Label (lower bound)': y_lower_bound[valid_index],\n                   'Label (upper bound)': y_upper_bound[valid_index],\n                   'Predicted label': bst.predict(dvalid)})\nprint(df)\n# Show only data points with right-censored labels\nprint(df[np.isinf(df['Label (upper bound)'])])\n\n# Save trained model\nbst.save_model('aft_best_model.json')\n"
  },
  {
    "path": "demo/aft_survival/aft_survival_viz_demo.py",
    "content": "\"\"\"\nVisual demo for survival analysis (regression) with Accelerated Failure Time (AFT) model.\n=========================================================================================\n\nThis demo uses 1D toy data and visualizes how XGBoost fits a tree ensemble. The ensemble\nmodel starts out as a flat line and evolves into a step function in order to account for\nall ranged labels.\n\"\"\"\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nimport xgboost as xgb\n\nplt.rcParams.update({\"font.size\": 13})\n\n\n# Function to visualize censored labels\ndef plot_censored_labels(\n    X: np.ndarray, y_lower: np.ndarray, y_upper: np.ndarray\n) -> None:\n    def replace_inf(x: np.ndarray, target_value: float) -> np.ndarray:\n        x[np.isinf(x)] = target_value\n        return x\n\n    plt.plot(X, y_lower, \"o\", label=\"y_lower\", color=\"blue\")\n    plt.plot(X, y_upper, \"o\", label=\"y_upper\", color=\"fuchsia\")\n    plt.vlines(\n        X,\n        ymin=replace_inf(y_lower, 0.01),\n        ymax=replace_inf(y_upper, 1000.0),\n        label=\"Range for y\",\n        color=\"gray\",\n    )\n\n\n# Toy data\nX = np.array([1, 2, 3, 4, 5]).reshape((-1, 1))\nINF = np.inf\ny_lower = np.array([10, 15, -INF, 30, 100])\ny_upper = np.array([INF, INF, 20, 50, INF])\n\n# Visualize toy data\nplt.figure(figsize=(5, 4))\nplot_censored_labels(X, y_lower, y_upper)\nplt.ylim((6, 200))\nplt.legend(loc=\"lower right\")\nplt.title(\"Toy data\")\nplt.xlabel(\"Input feature\")\nplt.ylabel(\"Label\")\nplt.yscale(\"log\")\nplt.tight_layout()\nplt.show(block=True)\n\n# Will be used to visualize XGBoost model\ngrid_pts = np.linspace(0.8, 5.2, 1000).reshape((-1, 1))\n\n# Train AFT model using XGBoost\ndmat = xgb.DMatrix(X)\ndmat.set_float_info(\"label_lower_bound\", y_lower)\ndmat.set_float_info(\"label_upper_bound\", y_upper)\nparams = {\"max_depth\": 3, \"objective\": \"survival:aft\", \"min_child_weight\": 0}\n\naccuracy_history = []\n\n\nclass PlotIntermediateModel(xgb.callback.TrainingCallback):\n    \"\"\"Custom callback to plot intermediate models.\"\"\"\n\n    def __init__(self) -> None:\n        super().__init__()\n\n    def after_iteration(\n        self,\n        model: xgb.Booster,\n        epoch: int,\n        evals_log: xgb.callback.TrainingCallback.EvalsLog,\n    ) -> bool:\n        \"\"\"Run after training is finished.\"\"\"\n        # Compute y_pred = prediction using the intermediate model, at current boosting\n        # iteration\n        y_pred = model.predict(dmat)\n        # \"Accuracy\" = the number of data points whose ranged label (y_lower, y_upper)\n        #              includes the corresponding predicted label (y_pred)\n        acc = np.sum(\n            np.logical_and(y_pred >= y_lower, y_pred <= y_upper) / len(X) * 100\n        )\n        accuracy_history.append(acc)\n\n        # Plot ranged labels as well as predictions by the model\n        plt.subplot(5, 3, epoch + 1)\n        plot_censored_labels(X, y_lower, y_upper)\n        y_pred_grid_pts = model.predict(xgb.DMatrix(grid_pts))\n        plt.plot(\n            grid_pts, y_pred_grid_pts, \"r-\", label=\"XGBoost AFT model\", linewidth=4\n        )\n        plt.title(\"Iteration {}\".format(epoch), x=0.5, y=0.8)\n        plt.xlim((0.8, 5.2))\n        plt.ylim((1 if np.min(y_pred) < 6 else 6, 200))\n        plt.yscale(\"log\")\n        return False\n\n\nres: xgb.callback.TrainingCallback.EvalsLog = {}\nplt.figure(figsize=(12, 13))\nbst = xgb.train(\n    params,\n    dmat,\n    num_boost_round=15,\n    evals=[(dmat, \"train\")],\n    evals_result=res,\n    callbacks=[PlotIntermediateModel()],\n)\nplt.tight_layout()\nplt.legend(\n    loc=\"lower center\",\n    ncol=4,\n    bbox_to_anchor=(0.5, 0),\n    bbox_transform=plt.gcf().transFigure,\n)\nplt.tight_layout()\n\n# Plot negative log likelihood over boosting iterations\nplt.figure(figsize=(8, 3))\nplt.subplot(1, 2, 1)\nplt.plot(res[\"train\"][\"aft-nloglik\"], \"b-o\", label=\"aft-nloglik\")\nplt.xlabel(\"# Boosting Iterations\")\nplt.legend(loc=\"best\")\n\n# Plot \"accuracy\" over boosting iterations\n# \"Accuracy\" = the number of data points whose ranged label (y_lower, y_upper) includes\n#              the corresponding predicted label (y_pred)\nplt.subplot(1, 2, 2)\nplt.plot(accuracy_history, \"r-o\", label=\"Accuracy (%)\")\nplt.xlabel(\"# Boosting Iterations\")\nplt.legend(loc=\"best\")\nplt.tight_layout()\n\nplt.show()\n"
  },
  {
    "path": "demo/c-api/.gitignore",
    "content": "c-api-demo\n"
  },
  {
    "path": "demo/c-api/basic/Makefile",
    "content": "SRC=c-api-demo.c\nTGT=c-api-demo\n\ncc=cc\nCFLAGS ?=-O3\nXGBOOST_ROOT ?=../..\nINCLUDE_DIR=-I$(XGBOOST_ROOT)/include -I$(XGBOOST_ROOT)/dmlc-core/include\nLIB_DIR=-L$(XGBOOST_ROOT)/lib\n\nbuild: $(TGT)\n\n$(TGT): $(SRC) Makefile\n\t$(cc) $(CFLAGS) $(INCLUDE_DIR) $(LIB_DIR) -o $(TGT) $(SRC) -lxgboost\n\nrun: $(TGT)\n\tLD_LIBRARY_PATH=$(XGBOOST_ROOT)/lib ./$(TGT)\n\nclean:\n\trm -f $(TGT)\n"
  },
  {
    "path": "demo/c-api/basic/README.md",
    "content": "C-APIs\n===\n\n**XGBoost** implements a C API originally designed for various language\nbindings.  For detailed reference, please check xgboost/c_api.h.  Here is a\ndemonstration of using the API.\n\n# CMake\nIf you use **CMake** for your project, you can either install **XGBoost**\nsomewhere in your system and tell CMake to find it by calling\n`find_package(xgboost)`, or put **XGBoost** inside your project's source tree\nand call **CMake** command: `add_subdirectory(xgboost)`.  To use\n`find_package()`, put the following in your **CMakeLists.txt**:\n\n``` CMake\nfind_package(xgboost REQUIRED)\nadd_executable(api-demo c-api-demo.c)\ntarget_link_libraries(api-demo xgboost::xgboost)\n```\n\nIf you want to put XGBoost inside your project (like git submodule), use this\ninstead:\n``` CMake\nadd_subdirectory(xgboost)\nadd_executable(api-demo c-api-demo.c)\ntarget_link_libraries(api-demo xgboost)\n```\n\n# make\nYou can start by modifying the makefile in this directory to fit your need.\n"
  },
  {
    "path": "demo/c-api/basic/c-api-demo.c",
    "content": "/**\n * Copyright 2019-2023 by XGBoost contributors\n *\n * \\file c-api-demo.c\n * \\brief A simple example of using xgboost C API.\n */\n\n#include <assert.h>\n#include <stddef.h>\n#include <stdint.h> /* uint32_t,uint64_t */\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <xgboost/c_api.h>\n\n#define safe_xgboost(call) {                                            \\\nint err = (call);                                                       \\\nif (err != 0) {                                                         \\\n  fprintf(stderr, \"%s:%d: error in %s: %s\\n\", __FILE__, __LINE__, #call, XGBGetLastError()); \\\n  exit(1);                                                              \\\n}                                                                       \\\n}\n\n/* Make Json encoded array interface. */\nstatic void MakeArrayInterface(size_t data, size_t n, char const* typestr, size_t length,\n                               char* out) {\n  static char const kTemplate[] =\n      \"{\\\"data\\\": [%lu, true], \\\"shape\\\": [%lu, %lu], \\\"typestr\\\": \\\"%s\\\", \\\"version\\\": 3}\";\n  memset(out, '\\0', length);\n  sprintf(out, kTemplate, data, n, 1ul, typestr);\n}\n/* Make Json encoded DMatrix configuration. */\nstatic void MakeConfig(int n_threads, size_t length, char* out) {\n  static char const kTemplate[] = \"{\\\"missing\\\": NaN, \\\"nthread\\\": %d}\";\n  memset(out, '\\0', length);\n  sprintf(out, kTemplate, n_threads);\n}\n\nint main() {\n  int silent = 0;\n  int use_gpu = 0;  // set to 1 to use the GPU for training\n\n  // load the data\n  DMatrixHandle dtrain, dtest;\n  safe_xgboost(XGDMatrixCreateFromFile(\"../../data/agaricus.txt.train?format=libsvm\", silent, &dtrain));\n  safe_xgboost(XGDMatrixCreateFromFile(\"../../data/agaricus.txt.test?format=libsvm\", silent, &dtest));\n\n  // create the booster\n  BoosterHandle booster;\n  DMatrixHandle eval_dmats[2] = {dtrain, dtest};\n  safe_xgboost(XGBoosterCreate(eval_dmats, 2, &booster));\n\n  // configure the training\n  // available parameters are described here:\n  //   https://xgboost.readthedocs.io/en/latest/parameter.html\n  safe_xgboost(XGBoosterSetParam(booster, \"device\", use_gpu ? \"cuda\" : \"cpu\"));\n\n  safe_xgboost(XGBoosterSetParam(booster, \"objective\", \"binary:logistic\"));\n  safe_xgboost(XGBoosterSetParam(booster, \"min_child_weight\", \"1\"));\n  safe_xgboost(XGBoosterSetParam(booster, \"gamma\", \"0.1\"));\n  safe_xgboost(XGBoosterSetParam(booster, \"max_depth\", \"3\"));\n  safe_xgboost(XGBoosterSetParam(booster, \"verbosity\", silent ? \"0\" : \"1\"));\n\n  // train and evaluate for 10 iterations\n  int n_trees = 10;\n  const char* eval_names[2] = {\"train\", \"test\"};\n  const char* eval_result = NULL;\n  for (int i = 0; i < n_trees; ++i) {\n    safe_xgboost(XGBoosterUpdateOneIter(booster, i, dtrain));\n    safe_xgboost(XGBoosterEvalOneIter(booster, i, eval_dmats, eval_names, 2, &eval_result));\n    printf(\"%s\\n\", eval_result);\n  }\n\n  bst_ulong num_feature = 0;\n  safe_xgboost(XGBoosterGetNumFeature(booster, &num_feature));\n  printf(\"num_feature: %lu\\n\", (unsigned long)(num_feature));\n\n  // predict\n  bst_ulong out_len = 0;\n  int n_print = 10;\n\n  /* Run prediction with DMatrix object. */\n  char const config[] =\n      \"{\\\"training\\\": false, \\\"type\\\": 0, \"\n      \"\\\"iteration_begin\\\": 0, \\\"iteration_end\\\": 0, \\\"strict_shape\\\": false}\";\n  /* Shape of output prediction */\n  uint64_t const* out_shape;\n  /* Dimension of output prediction */\n  uint64_t out_dim;\n  /* Pointer to a thread local contigious array, assigned in prediction function. */\n  float const* out_result = NULL;\n  safe_xgboost(\n      XGBoosterPredictFromDMatrix(booster, dtest, config, &out_shape, &out_dim, &out_result));\n\n  printf(\"y_pred: \");\n  for (int i = 0; i < n_print; ++i) {\n    printf(\"%1.4f \", out_result[i]);\n  }\n  printf(\"\\n\");\n\n  // print true labels\n  safe_xgboost(XGDMatrixGetFloatInfo(dtest, \"label\", &out_len, &out_result));\n  printf(\"y_test: \");\n  for (int i = 0; i < n_print; ++i) {\n    printf(\"%1.4f \", out_result[i]);\n  }\n  printf(\"\\n\");\n\n  {\n    printf(\"Dense Matrix Example (XGDMatrixCreateFromMat): \");\n\n    const float values[] = {0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,\n      0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0,\n      1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,\n      0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,\n      1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n      1, 0, 0, 0, 0, 1, 0, 0, 0, 0};\n\n    DMatrixHandle dmat;\n    safe_xgboost(XGDMatrixCreateFromMat(values, 1, 127, 0.0, &dmat));\n\n    const float* out_result = NULL;\n\n    safe_xgboost(\n        XGBoosterPredictFromDMatrix(booster, dmat, config, &out_shape, &out_dim, &out_result));\n    assert(out_dim == 1);\n    assert(out_shape[0] == 1);\n\n    printf(\"%1.4f \\n\", out_result[0]);\n    safe_xgboost(XGDMatrixFree(dmat));\n  }\n\n  {\n    printf(\"Sparse Matrix Example (XGDMatrixCreateFromCSR): \");\n\n    const uint64_t indptr[] = {0, 22};\n    const uint32_t indices[] = {1,  9,  19, 21, 24, 34, 36, 39,  42,  53,  56,\n                                65, 69, 77, 86, 88, 92, 95, 102, 106, 117, 122};\n    const float data[] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,\n                          1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};\n\n    DMatrixHandle dmat;\n    char j_indptr[128];\n    MakeArrayInterface((size_t)indptr, 2ul, \"<u8\", sizeof(j_indptr), j_indptr);\n    char j_indices[128];\n    MakeArrayInterface((size_t)indices, sizeof(indices) / sizeof(uint32_t), \"<u4\",\n                       sizeof(j_indices), j_indices);\n    char j_data[128];\n    MakeArrayInterface((size_t)data, sizeof(data) / sizeof(float), \"<f4\", sizeof(j_data), j_data);\n\n    char j_config[64];\n    MakeConfig(0, sizeof(j_config), j_config);\n\n    safe_xgboost(XGDMatrixCreateFromCSR(j_indptr, j_indices, j_data, 127, j_config, &dmat));\n\n    const float* out_result = NULL;\n\n    safe_xgboost(\n        XGBoosterPredictFromDMatrix(booster, dmat, config, &out_shape, &out_dim, &out_result));\n    assert(out_dim == 1);\n    assert(out_shape[0] == 1);\n\n    printf(\"%1.4f \\n\", out_result[0]);\n    safe_xgboost(XGDMatrixFree(dmat));\n  }\n\n  {\n    printf(\"Sparse Matrix Example (XGDMatrixCreateFromCSC): \");\n\n    const uint64_t indptr[] = {\n        0,  0,  1,  1,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  3,  3,\n        4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  6,  6,  7,  7,  7,  8,  8,  8,  9,\n        9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11,\n        12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15,\n        15, 16, 16, 16, 16, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 20, 20, 20,\n        20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22};\n\n    const uint32_t indices[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};\n\n    const float data[] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,\n                          1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};\n\n    char j_indptr[128];\n    MakeArrayInterface((size_t)indptr, 128ul, \"<u8\", sizeof(j_indptr), j_indptr);\n    char j_indices[128];\n    MakeArrayInterface((size_t)indices, sizeof(indices) / sizeof(unsigned), \"<u4\",\n                       sizeof(j_indices), j_indices);\n    char j_data[128];\n    MakeArrayInterface((size_t)data, sizeof(data) / sizeof(float), \"<f4\", sizeof(j_data), j_data);\n\n    char j_config[64];\n    MakeConfig(0, sizeof(j_config), j_config);\n\n    DMatrixHandle dmat;\n    safe_xgboost(XGDMatrixCreateFromCSC(j_indptr, j_indices, j_data, 1, j_config, &dmat));\n\n    const float* out_result = NULL;\n\n    safe_xgboost(\n        XGBoosterPredictFromDMatrix(booster, dmat, config, &out_shape, &out_dim, &out_result));\n    assert(out_dim == 1);\n    assert(out_shape[0] == 1);\n\n    printf(\"%1.4f \\n\", out_result[0]);\n    safe_xgboost(XGDMatrixFree(dmat));\n  }\n\n  // free everything\n  safe_xgboost(XGBoosterFree(booster));\n  safe_xgboost(XGDMatrixFree(dtrain));\n  safe_xgboost(XGDMatrixFree(dtest));\n  return 0;\n}\n"
  },
  {
    "path": "demo/c-api/external-memory/README.md",
    "content": "Defining a Custom Data Iterator to Load Data from External Memory\n=================================================================\n\nA simple demo for using custom data iterator with XGBoost.  The feature is still\n**experimental** and not ready for production use.  If you are not familiar with C API,\nplease read its introduction in our tutorials and visit the basic demo first.\n\nDefining Data Iterator\n----------------------\n\nIn the example, we define a custom data iterator with 2 methods: `reset` and `next`.  The\n`next` method passes data into XGBoost and tells XGBoost whether the iterator has reached\nits end, and the `reset` method resets iterations. One important detail when using the C\nAPI for data iterator is users need to make sure that the data passed into `next` method\nmust be kept in memory until the next iteration or `reset` is called.  The external memory\nDMatrix is not limited to training, but also valid for other features like prediction."
  },
  {
    "path": "demo/c-api/external-memory/external_memory.c",
    "content": "/*!\n * Copyright 2021 XGBoost contributors\n *\n * \\brief A simple example of using xgboost data callback API.\n */\n\n#include <stddef.h>\n#include <stdlib.h>\n#include <string.h>\n#include <xgboost/c_api.h>\n\n#define safe_xgboost(err)                                                      \\\n  if ((err) != 0) {                                                            \\\n    fprintf(stderr, \"%s:%d: error in %s: %s\\n\", __FILE__, __LINE__, #err,      \\\n            XGBGetLastError());                                                \\\n    exit(1);                                                                   \\\n  }\n\n#define N_BATCHS 32\n#define BATCH_LEN 512\n\n/* Shorthands. */\ntypedef DMatrixHandle DMatrix;\ntypedef BoosterHandle Booster;\n\ntypedef struct _DataIter {\n  /* Data of each batch. */\n  float **data;\n  /* Labels of each batch */\n  float **labels;\n  /* Length of each batch. */\n  size_t *lengths;\n  /* Total number of batches. */\n  size_t n;\n  /* Current iteration. */\n  size_t cur_it;\n\n  /* Private fields */\n  DMatrix _proxy;\n  char _array[128];\n} DataIter;\n\n#define safe_malloc(ptr)                                                       \\\n  if ((ptr) == NULL) {                                                         \\\n    fprintf(stderr, \"%s:%d: Failed to allocate memory.\\n\", __FILE__,           \\\n            __LINE__);                                                         \\\n    exit(1);                                                                   \\\n  }\n\n/**\n * Initialize with random data for demo. In practice the data should be loaded\n * from external memory.  We just demonstrate how to use the iterator in\n * XGBoost.\n *\n * \\param batch_size  Number of elements for each batch.  The demo here is only using 1\n *                    column.\n * \\param n_batches   Number of batches.\n */\nvoid DataIterator_Init(DataIter *self, size_t batch_size, size_t n_batches) {\n  self->n = n_batches;\n\n  self->lengths = (size_t *)malloc(self->n * sizeof(size_t));\n  safe_malloc(self->lengths);\n  for (size_t i = 0; i < self->n; ++i) {\n    self->lengths[i] = batch_size;\n  }\n\n  self->data = (float **)malloc(self->n * sizeof(float *));\n  safe_malloc(self->data);\n  self->labels = (float **)malloc(self->n * sizeof(float *));\n  safe_malloc(self->labels);\n\n  /* Generate some random data. */\n  for (size_t i = 0; i < self->n; ++i) {\n    self->data[i] = (float *)malloc(self->lengths[i] * sizeof(float));\n    safe_malloc(self->data[i]);\n    for (size_t j = 0; j < self->lengths[i]; ++j) {\n      float x = (float)rand() / (float)(RAND_MAX);\n      self->data[i][j] = x;\n    }\n\n    self->labels[i] = (float *)malloc(self->lengths[i] * sizeof(float));\n    safe_malloc(self->labels[i]);\n    for (size_t j = 0; j < self->lengths[i]; ++j) {\n      float y = (float)rand() / (float)(RAND_MAX);\n      self->labels[i][j] = y;\n    }\n  }\n\n  self->cur_it = 0;\n  safe_xgboost(XGProxyDMatrixCreate(&self->_proxy));\n}\n\nvoid DataIterator_Free(DataIter *self) {\n  for (size_t i = 0; i < self->n; ++i) {\n    free(self->data[i]);\n    free(self->labels[i]);\n  }\n  free(self->data);\n  free(self->lengths);\n  free(self->labels);\n  safe_xgboost(XGDMatrixFree(self->_proxy));\n};\n\nint DataIterator_Next(DataIterHandle handle) {\n  DataIter *self = (DataIter *)(handle);\n  if (self->cur_it == self->n) {\n    self->cur_it = 0;\n    return 0;  /* At end */\n  }\n\n  /* A JSON string encoding array interface (standard from numpy). */\n  char array[] = \"{\\\"data\\\": [%lu, false], \\\"shape\\\":[%lu, 1], \\\"typestr\\\": \"\n                 \"\\\"<f4\\\", \\\"version\\\": 3}\";\n  memset(self->_array, '\\0', sizeof(self->_array));\n  sprintf(self->_array, array, (size_t)self->data[self->cur_it],\n          self->lengths[self->cur_it]);\n\n  safe_xgboost(XGProxyDMatrixSetDataDense(self->_proxy, self->_array));\n  /* The data passed in the iterator must remain valid (not being freed until the next\n   * iteration or reset) */\n  safe_xgboost(XGDMatrixSetDenseInfo(self->_proxy, \"label\",\n                                     self->labels[self->cur_it],\n                                     self->lengths[self->cur_it], 1));\n  self->cur_it++;\n  return 1;  /* Continue. */\n}\n\nvoid DataIterator_Reset(DataIterHandle handle) {\n  DataIter *self = (DataIter *)(handle);\n  self->cur_it = 0;\n}\n\n/**\n * Train a regression model and save it into JSON model file.\n */\nvoid TrainModel(DMatrix Xy) {\n  /* Create booster for training. */\n  Booster booster;\n  DMatrix cache[] = {Xy};\n  safe_xgboost(XGBoosterCreate(cache, 1, &booster));\n  /* Use approx or hist for external memory training. */\n  safe_xgboost(XGBoosterSetParam(booster, \"tree_method\", \"hist\"));\n  safe_xgboost(XGBoosterSetParam(booster, \"objective\", \"reg:squarederror\"));\n\n  /* Start training. */\n  const char *validation_names[1] = {\"train\"};\n  const char *validation_result = NULL;\n  size_t n_rounds = 10;\n  for (size_t i = 0; i < n_rounds; ++i) {\n    safe_xgboost(XGBoosterUpdateOneIter(booster, i, Xy));\n    safe_xgboost(XGBoosterEvalOneIter(booster, i, cache, validation_names, 1,\n                                      &validation_result));\n    printf(\"%s\\n\", validation_result);\n  }\n\n  /* Save the model to a JSON file. */\n  safe_xgboost(XGBoosterSaveModel(booster, \"model.json\"));\n\n  safe_xgboost(XGBoosterFree(booster));\n}\n\nint main() {\n  DataIter iter;\n  DataIterator_Init(&iter, BATCH_LEN, N_BATCHS);\n\n  /* Create DMatrix from iterator.  During training, some cache files with the\n   * prefix \"cache-\" will be generated in current directory */\n  char config[] = \"{\\\"missing\\\": NaN, \\\"cache_prefix\\\": \\\"cache\\\"}\";\n  DMatrix Xy;\n  safe_xgboost(XGDMatrixCreateFromCallback(\n      &iter, iter._proxy, DataIterator_Reset, DataIterator_Next, config, &Xy));\n\n  TrainModel(Xy);\n\n  safe_xgboost(XGDMatrixFree(Xy));\n\n  DataIterator_Free(&iter);\n  return 0;\n}\n"
  },
  {
    "path": "demo/c-api/inference/inference.c",
    "content": "/**\n * Copyright 2021-2025, XGBoost contributors\n *\n * @brief A simple example of using prediction functions.\n *\n * See more examples in test_c_api.cc on how to reuse a ProxyDMatrix object for reducing\n * the latency of DMatrix creation.\n */\n#include <stddef.h>\n#include <stdlib.h>\n#include <string.h>\n#include <xgboost/c_api.h>\n\n#define safe_xgboost(err)                                                      \\\n  if ((err) != 0) {                                                            \\\n    fprintf(stderr, \"%s:%d: error in %s: %s\\n\", __FILE__, __LINE__, #err,      \\\n            XGBGetLastError());                                                \\\n    exit(1);                                                                   \\\n  }\n\n#define safe_malloc(ptr)                                                       \\\n  if ((ptr) == NULL) {                                                         \\\n    fprintf(stderr, \"%s:%d: Failed to allocate memory.\\n\", __FILE__,           \\\n            __LINE__);                                                         \\\n    exit(1);                                                                   \\\n  }\n\n#define N_SAMPLES 128\n#define N_FEATURES 16\n\ntypedef BoosterHandle Booster;\ntypedef DMatrixHandle DMatrix;\n\n/* Row-major matrix */\nstruct _Matrix {\n  float *data;\n  size_t shape[2];\n\n  /* private members */\n  char _array_intrerface[256];\n};\n\n/* A custom data type for demo. */\ntypedef struct _Matrix *Matrix;\n\n/* Initialize matrix, copy data from `data` if it's not NULL. */\nvoid Matrix_Create(Matrix *self, float const *data, size_t n_samples,\n                   size_t n_features) {\n  if (self == NULL) {\n    fprintf(stderr, \"Invalid pointer to %s\\n\", __func__);\n    exit(-1);\n  }\n\n  *self = (Matrix)malloc(sizeof(struct _Matrix));\n  safe_malloc(*self);\n  (*self)->data = (float *)malloc(n_samples * n_features * sizeof(float));\n  safe_malloc((*self)->data);\n  (*self)->shape[0] = n_samples;\n  (*self)->shape[1] = n_features;\n\n  if (data != NULL) {\n    memcpy((*self)->data, data,\n           (*self)->shape[0] * (*self)->shape[1] * sizeof(float));\n  }\n}\n\n/* Generate random matrix. */\nvoid Matrix_Random(Matrix *self, size_t n_samples, size_t n_features) {\n  Matrix_Create(self, NULL, n_samples, n_features);\n  for (size_t i = 0; i < n_samples * n_features; ++i) {\n    float x = (float)rand() / (float)(RAND_MAX);\n    (*self)->data[i] = x;\n  }\n}\n\n/* Array interface specified by numpy. */\nchar const *Matrix_ArrayInterface(Matrix self) {\n  char const template[] = \"{\\\"data\\\": [%lu, true], \\\"shape\\\": [%lu, %lu], \"\n                          \"\\\"typestr\\\": \\\"<f4\\\", \\\"version\\\": 3}\";\n  memset(self->_array_intrerface, '\\0', sizeof(self->_array_intrerface));\n  sprintf(self->_array_intrerface, template, (size_t)self->data, self->shape[0],\n          self->shape[1]);\n  return self->_array_intrerface;\n}\n\nsize_t Matrix_NSamples(Matrix self) { return self->shape[0]; }\n\nsize_t Matrix_NFeatures(Matrix self) { return self->shape[1]; }\n\nfloat Matrix_At(Matrix self, size_t i, size_t j) {\n  return self->data[i * self->shape[1] + j];\n}\n\nvoid Matrix_Print(Matrix self) {\n  for (size_t i = 0; i < Matrix_NSamples(self); i++) {\n    for (size_t j = 0; j < Matrix_NFeatures(self); ++j) {\n      printf(\"%f, \", Matrix_At(self, i, j));\n    }\n  }\n  printf(\"\\n\");\n}\n\nvoid Matrix_Free(Matrix self) {\n  if (self != NULL) {\n    if (self->data != NULL) {\n      self->shape[0] = 0;\n      self->shape[1] = 0;\n      free(self->data);\n      self->data = NULL;\n    }\n    free(self);\n  }\n}\n\nint main() {\n  Matrix X;\n  Matrix y;\n\n  Matrix_Random(&X, N_SAMPLES, N_FEATURES);\n  Matrix_Random(&y, N_SAMPLES, 1);\n\n  char const *X_interface = Matrix_ArrayInterface(X);\n  char config[] = \"{\\\"nthread\\\": 16, \\\"missing\\\": NaN}\";\n  DMatrix Xy;\n  /* Dense means \"dense matrix\". */\n  safe_xgboost(XGDMatrixCreateFromDense(X_interface, config, &Xy));\n  /* Label must be in a contigious array. */\n  safe_xgboost(XGDMatrixSetDenseInfo(Xy, \"label\", y->data, y->shape[0], 1));\n\n  DMatrix cache[] = {Xy};\n  Booster booster;\n  /* Train a booster for demo. */\n  safe_xgboost(XGBoosterCreate(cache, 1, &booster));\n\n  size_t n_rounds = 10;\n  for (size_t i = 0; i < n_rounds; ++i) {\n    safe_xgboost(XGBoosterUpdateOneIter(booster, i, Xy));\n  }\n\n  /* Save the trained model in JSON format. */\n  safe_xgboost(XGBoosterSaveModel(booster, \"model.json\"));\n  safe_xgboost(XGBoosterFree(booster));\n\n  /* Load it back for inference.  The save and load is not required, only shown here for\n   * demonstration purpose. */\n  safe_xgboost(XGBoosterCreate(NULL, 0, &booster));\n  safe_xgboost(XGBoosterLoadModel(booster, \"model.json\"));\n  {\n    /* Run prediction with DMatrix object. */\n    char const config[] =\n        \"{\\\"training\\\": false, \\\"type\\\": 0, \"\n        \"\\\"iteration_begin\\\": 0, \\\"iteration_end\\\": 0, \\\"strict_shape\\\": true}\";\n    /* Shape of output prediction */\n    uint64_t const *out_shape;\n    /* Dimension of output prediction */\n    uint64_t out_dim;\n    /* Pointer to a thread local contigious array, assigned in prediction function. */\n    float const *out_results;\n\n    safe_xgboost(XGBoosterPredictFromDMatrix(booster, Xy, config, &out_shape,\n                                             &out_dim, &out_results));\n    if (out_dim != 2 || out_shape[0] != N_SAMPLES || out_shape[1] != 1) {\n      fprintf(stderr, \"Regression model should output prediction as vector.\");\n      exit(-1);\n    }\n\n    Matrix predt;\n    /* Always copy output from XGBoost before calling next API function. */\n    Matrix_Create(&predt, out_results, out_shape[0], out_shape[1]);\n    printf(\"Results from prediction\\n\");\n    Matrix_Print(predt);\n    Matrix_Free(predt);\n  }\n\n  {\n    /* Run inplace prediction, which is faster and more memory efficient, but supports\n     * only basic inference types. */\n    char const config[] = \"{\\\"type\\\": 0, \\\"iteration_begin\\\": 0, \"\n                          \"\\\"iteration_end\\\": 0, \\\"strict_shape\\\": true, \"\n                          \"\\\"cache_id\\\": 0, \\\"missing\\\": NaN}\";\n    /* Shape of output prediction */\n    uint64_t const *out_shape;\n    /* Dimension of output prediction */\n    uint64_t out_dim;\n    /* Pointer to a thread local contigious array, assigned in prediction function. */\n    float const *out_results;\n\n    char const *X_interface = Matrix_ArrayInterface(X);\n    safe_xgboost(XGBoosterPredictFromDense(booster, X_interface, config, NULL,\n                                           &out_shape, &out_dim, &out_results));\n\n    if (out_dim != 2 || out_shape[0] != N_SAMPLES || out_shape[1] != 1) {\n      fprintf(stderr,\n              \"Regression model should output prediction as vector, %lu, %lu\",\n              out_dim, out_shape[0]);\n      exit(-1);\n    }\n\n    Matrix predt;\n    /* Always copy output from XGBoost before calling next API function. */\n    Matrix_Create(&predt, out_results, out_shape[0], out_shape[1]);\n    printf(\"Results from inplace prediction\\n\");\n    Matrix_Print(predt);\n    Matrix_Free(predt);\n  }\n\n  XGBoosterFree(booster);\n\n  XGDMatrixFree(Xy);\n  Matrix_Free(X);\n  Matrix_Free(y);\n  return 0;\n}\n"
  },
  {
    "path": "demo/dask/README.rst",
    "content": ".. _dask-examples:\n\nXGBoost Dask Feature Walkthrough\n================================\n\nThis directory contains some demonstrations for using `dask` with `XGBoost`.  For an\noverview, see :doc:`/tutorials/dask`\n"
  },
  {
    "path": "demo/dask/cpu_survival.py",
    "content": "\"\"\"\nExample of training survival model with Dask on CPU\n===================================================\n\n\"\"\"\n\nimport os\n\nimport dask.array as da\nimport dask.dataframe as dd\nfrom dask.distributed import Client, LocalCluster\n\nfrom xgboost import dask as dxgb\nfrom xgboost.dask import DaskDMatrix\n\n\ndef main(client: Client) -> da.Array:\n    # Load an example survival data from CSV into a Dask data frame.\n    # The Veterans' Administration Lung Cancer Trial\n    # The Statistical Analysis of Failure Time Data by Kalbfleisch J. and Prentice R (1980)\n    CURRENT_DIR = os.path.dirname(__file__)\n    df = dd.read_csv(\n        os.path.join(CURRENT_DIR, os.pardir, \"data\", \"veterans_lung_cancer.csv\")\n    )\n\n    # DaskDMatrix acts like normal DMatrix, works as a proxy for local\n    # DMatrix scatter around workers.\n    # For AFT survival, you'd need to extract the lower and upper bounds for the label\n    # and pass them as arguments to DaskDMatrix.\n    y_lower_bound = df[\"Survival_label_lower_bound\"]\n    y_upper_bound = df[\"Survival_label_upper_bound\"]\n    X = df.drop([\"Survival_label_lower_bound\", \"Survival_label_upper_bound\"], axis=1)\n    dtrain = DaskDMatrix(\n        client, X, label_lower_bound=y_lower_bound, label_upper_bound=y_upper_bound\n    )\n\n    # Use train method from xgboost.dask instead of xgboost.  This\n    # distributed version of train returns a dictionary containing the\n    # resulting booster and evaluation history obtained from\n    # evaluation metrics.\n    params = {\n        \"verbosity\": 1,\n        \"objective\": \"survival:aft\",\n        \"eval_metric\": \"aft-nloglik\",\n        \"learning_rate\": 0.05,\n        \"aft_loss_distribution_scale\": 1.20,\n        \"aft_loss_distribution\": \"normal\",\n        \"max_depth\": 6,\n        \"lambda\": 0.01,\n        \"alpha\": 0.02,\n    }\n    output = dxgb.train(\n        client, params, dtrain, num_boost_round=100, evals=[(dtrain, \"train\")]\n    )\n    bst = output[\"booster\"]\n    history = output[\"history\"]\n\n    # you can pass output directly into `predict` too.\n    prediction = dxgb.predict(client, bst, dtrain)\n    print(\"Evaluation history: \", history)\n\n    # Uncomment the following line to save the model to the disk\n    # bst.save_model('survival_model.json')\n\n    return prediction\n\n\nif __name__ == \"__main__\":\n    # or use other clusters for scaling\n    with LocalCluster(n_workers=7, threads_per_worker=4) as cluster:\n        with Client(cluster) as client:\n            main(client)\n"
  },
  {
    "path": "demo/dask/cpu_training.py",
    "content": "\"\"\"\nExample of training with Dask on CPU\n====================================\n\n\"\"\"\n\nfrom dask import array as da\nfrom dask.distributed import Client, LocalCluster\n\nfrom xgboost import dask as dxgb\nfrom xgboost.dask import DaskDMatrix\n\n\ndef main(client: Client) -> None:\n    # generate some random data for demonstration\n    m = 100000\n    n = 100\n    rng = da.random.default_rng(1)\n    X = rng.normal(size=(m, n), chunks=(10000, -1))\n    y = X.sum(axis=1)\n\n    # DaskDMatrix acts like normal DMatrix, works as a proxy for local\n    # DMatrix scatter around workers.\n    dtrain = DaskDMatrix(client, X, y)\n\n    # Use train method from xgboost.dask instead of xgboost.  This\n    # distributed version of train returns a dictionary containing the\n    # resulting booster and evaluation history obtained from\n    # evaluation metrics.\n    output = dxgb.train(\n        client,\n        {\"verbosity\": 1, \"tree_method\": \"hist\"},\n        dtrain,\n        num_boost_round=4,\n        evals=[(dtrain, \"train\")],\n    )\n    bst = output[\"booster\"]\n    history = output[\"history\"]\n\n    # you can pass output directly into `predict` too.\n    prediction = dxgb.predict(client, bst, dtrain)\n    print(\"Evaluation history:\", history)\n    print(\"Error:\", da.sqrt((prediction - y) ** 2).mean().compute())\n\n\nif __name__ == \"__main__\":\n    # or use other clusters for scaling\n    with LocalCluster(n_workers=7, threads_per_worker=4) as cluster:\n        with Client(cluster) as client:\n            main(client)\n"
  },
  {
    "path": "demo/dask/dask_callbacks.py",
    "content": "\"\"\"\nExample of using callbacks with Dask\n====================================\n\"\"\"\n\nfrom typing import Any\n\nimport numpy as np\nfrom dask.distributed import Client, LocalCluster\nfrom dask_ml.datasets import make_regression\nfrom dask_ml.model_selection import train_test_split\n\nimport xgboost as xgb\nimport xgboost.dask as dxgb\nfrom xgboost.dask import DaskDMatrix\n\n\ndef probability_for_going_backward(epoch: int) -> float:\n    return 0.999 / (1.0 + 0.05 * np.log(1.0 + epoch))\n\n\n# All callback functions must inherit from TrainingCallback\nclass CustomEarlyStopping(xgb.callback.TrainingCallback):\n    \"\"\"A custom early stopping class where early stopping is determined stochastically.\n    In the beginning, allow the metric to become worse with a probability of 0.999.\n    As boosting progresses, the probability should be adjusted downward\"\"\"\n\n    def __init__(\n        self, *, validation_set: str, target_metric: str, maximize: bool, seed: int\n    ) -> None:\n        self.validation_set = validation_set\n        self.target_metric = target_metric\n        self.maximize = maximize\n        self.seed = seed\n        self.rng = np.random.default_rng(seed=seed)\n        if maximize:\n            self.better = lambda x, y: x > y\n        else:\n            self.better = lambda x, y: x < y\n\n    def after_iteration(\n        self, model: Any, epoch: int, evals_log: xgb.callback.TrainingCallback.EvalsLog\n    ) -> bool:\n        metric_history = evals_log[self.validation_set][self.target_metric]\n        if len(metric_history) < 2 or self.better(\n            metric_history[-1], metric_history[-2]\n        ):\n            return False  # continue training\n        p = probability_for_going_backward(epoch)\n        go_backward = self.rng.choice(2, size=(1,), replace=True, p=[1 - p, p]).astype(\n            np.bool_\n        )[0]\n        print(\n            \"The validation metric went into the wrong direction. \"\n            + f\"Stopping training with probability {1 - p}...\"\n        )\n        if go_backward:\n            return False  # continue training\n        else:\n            return True  # stop training\n\n\ndef main(client: Client) -> None:\n    m = 100000\n    n = 100\n    X, y = make_regression(n_samples=m, n_features=n, chunks=200, random_state=0)\n    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)\n\n    dtrain = DaskDMatrix(client, X_train, y_train)\n    dtest = DaskDMatrix(client, X_test, y_test)\n\n    output = dxgb.train(\n        client,\n        {\n            \"verbosity\": 1,\n            \"tree_method\": \"hist\",\n            \"objective\": \"reg:squarederror\",\n            \"eval_metric\": \"rmse\",\n            \"max_depth\": 6,\n            \"learning_rate\": 1.0,\n        },\n        dtrain,\n        num_boost_round=1000,\n        evals=[(dtrain, \"train\"), (dtest, \"test\")],\n        callbacks=[\n            CustomEarlyStopping(\n                validation_set=\"test\", target_metric=\"rmse\", maximize=False, seed=0\n            )\n        ],\n    )\n\n\nif __name__ == \"__main__\":\n    # or use other clusters for scaling\n    with LocalCluster(n_workers=4, threads_per_worker=1) as cluster:\n        with Client(cluster) as client:\n            main(client)\n"
  },
  {
    "path": "demo/dask/dask_learning_to_rank.py",
    "content": "\"\"\"\nLearning to rank with the Dask Interface\n========================================\n\n  .. versionadded:: 3.0.0\n\nThis is a demonstration of using XGBoost for learning to rank tasks using the\nMSLR_10k_letor dataset. For more infomation about the dataset, please visit its\n`description page <https://www.microsoft.com/en-us/research/project/mslr/>`_.\n\nSee :ref:`ltr-dist` for a general description for distributed learning to rank and\n:ref:`ltr-dask` for Dask-specific features.\n\n\"\"\"\n\nfrom __future__ import annotations\n\nimport argparse\nimport os\nfrom contextlib import contextmanager\nfrom typing import Generator\n\nimport dask\nimport numpy as np\nfrom dask import dataframe as dd\nfrom distributed import Client, LocalCluster, wait\nfrom sklearn.datasets import load_svmlight_file\n\nfrom xgboost import dask as dxgb\n\n\ndef load_mslr_10k(\n    device: str, data_path: str, cache_path: str\n) -> tuple[dd.DataFrame, dd.DataFrame, dd.DataFrame]:\n    \"\"\"Load the MSLR10k dataset from data_path and save parquet files in the cache_path.\"\"\"\n    root_path = os.path.expanduser(args.data)\n    cache_path = os.path.expanduser(args.cache)\n\n    # Use only the Fold1 for demo:\n    # Train,      Valid, Test\n    # {S1,S2,S3}, S4,    S5\n    fold = 1\n\n    if not os.path.exists(cache_path):\n        os.mkdir(cache_path)\n        fold_path = os.path.join(root_path, f\"Fold{fold}\")\n        train_path = os.path.join(fold_path, \"train.txt\")\n        valid_path = os.path.join(fold_path, \"vali.txt\")\n        test_path = os.path.join(fold_path, \"test.txt\")\n\n        X_train, y_train, qid_train = load_svmlight_file(\n            train_path, query_id=True, dtype=np.float32\n        )\n        columns = [f\"f{i}\" for i in range(X_train.shape[1])]\n        X_train = dd.from_array(X_train.toarray(), columns=columns)\n        y_train = y_train.astype(np.int32)\n        qid_train = qid_train.astype(np.int32)\n\n        X_train[\"y\"] = dd.from_array(y_train)\n        X_train[\"qid\"] = dd.from_array(qid_train)\n        X_train.to_parquet(os.path.join(cache_path, \"train\"), engine=\"pyarrow\")\n\n        X_valid, y_valid, qid_valid = load_svmlight_file(\n            valid_path, query_id=True, dtype=np.float32\n        )\n        X_valid = dd.from_array(X_valid.toarray(), columns=columns)\n        y_valid = y_valid.astype(np.int32)\n        qid_valid = qid_valid.astype(np.int32)\n\n        X_valid[\"y\"] = dd.from_array(y_valid)\n        X_valid[\"qid\"] = dd.from_array(qid_valid)\n        X_valid.to_parquet(os.path.join(cache_path, \"valid\"), engine=\"pyarrow\")\n\n        X_test, y_test, qid_test = load_svmlight_file(\n            test_path, query_id=True, dtype=np.float32\n        )\n\n        X_test = dd.from_array(X_test.toarray(), columns=columns)\n        y_test = y_test.astype(np.int32)\n        qid_test = qid_test.astype(np.int32)\n\n        X_test[\"y\"] = dd.from_array(y_test)\n        X_test[\"qid\"] = dd.from_array(qid_test)\n        X_test.to_parquet(os.path.join(cache_path, \"test\"), engine=\"pyarrow\")\n\n    df_train = dd.read_parquet(\n        os.path.join(cache_path, \"train\"), calculate_divisions=True\n    )\n    df_valid = dd.read_parquet(\n        os.path.join(cache_path, \"valid\"), calculate_divisions=True\n    )\n    df_test = dd.read_parquet(\n        os.path.join(cache_path, \"test\"), calculate_divisions=True\n    )\n\n    return df_train, df_valid, df_test\n\n\ndef ranking_demo(client: Client, args: argparse.Namespace) -> None:\n    \"\"\"Learning to rank with data sorted locally.\"\"\"\n    df_tr, df_va, _ = load_mslr_10k(args.device, args.data, args.cache)\n\n    X_train: dd.DataFrame = df_tr[df_tr.columns.difference([\"y\", \"qid\"])]\n    y_train = df_tr[[\"y\", \"qid\"]]\n    Xy_train = dxgb.DaskQuantileDMatrix(client, X_train, y_train.y, qid=y_train.qid)\n\n    X_valid: dd.DataFrame = df_va[df_va.columns.difference([\"y\", \"qid\"])]\n    y_valid = df_va[[\"y\", \"qid\"]]\n    Xy_valid = dxgb.DaskQuantileDMatrix(\n        client, X_valid, y_valid.y, qid=y_valid.qid, ref=Xy_train\n    )\n    # Upon training, you will see a performance warning about sorting data based on\n    # query groups.\n    dxgb.train(\n        client,\n        {\"objective\": \"rank:ndcg\", \"device\": args.device},\n        Xy_train,\n        evals=[(Xy_train, \"Train\"), (Xy_valid, \"Valid\")],\n        num_boost_round=100,\n    )\n\n\ndef ranking_wo_split_demo(client: Client, args: argparse.Namespace) -> None:\n    \"\"\"Learning to rank with data partitioned according to query groups.\"\"\"\n    df_tr, df_va, df_te = load_mslr_10k(args.device, args.data, args.cache)\n\n    X_tr = df_tr[df_tr.columns.difference([\"y\", \"qid\"])]\n    X_va = df_va[df_va.columns.difference([\"y\", \"qid\"])]\n\n    # `allow_group_split=False` makes sure data is partitioned according to the query\n    # groups.\n    ltr = dxgb.DaskXGBRanker(allow_group_split=False, device=args.device)\n    ltr.client = client\n    ltr = ltr.fit(\n        X_tr,\n        df_tr.y,\n        qid=df_tr.qid,\n        eval_set=[(X_tr, df_tr.y), (X_va, df_va.y)],\n        eval_qid=[df_tr.qid, df_va.qid],\n        verbose=True,\n    )\n\n    df_te = df_te.persist()\n    wait([df_te])\n\n    X_te = df_te[df_te.columns.difference([\"y\", \"qid\"])]\n    predt = ltr.predict(X_te)\n    y = client.compute(df_te.y)\n    wait([predt, y])\n\n\n@contextmanager\ndef gen_client(device: str) -> Generator[Client, None, None]:\n    match device:\n        case \"cuda\":\n            from dask_cuda import LocalCUDACluster\n\n            with LocalCUDACluster() as cluster:\n                with Client(cluster) as client:\n                    with dask.config.set(\n                        {\n                            \"array.backend\": \"cupy\",\n                            \"dataframe.backend\": \"cudf\",\n                        }\n                    ):\n                        yield client\n        case \"cpu\":\n            with LocalCluster() as cluster:\n                with Client(cluster) as client:\n                    yield client\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(\n        description=\"Demonstration of learning to rank using XGBoost.\"\n    )\n    parser.add_argument(\n        \"--data\",\n        type=str,\n        help=\"Root directory of the MSLR-WEB10K data.\",\n        required=True,\n    )\n    parser.add_argument(\n        \"--cache\",\n        type=str,\n        help=\"Directory for caching processed data.\",\n        required=True,\n    )\n    parser.add_argument(\"--device\", choices=[\"cpu\", \"cuda\"], default=\"cpu\")\n    parser.add_argument(\n        \"--no-split\",\n        action=\"store_true\",\n        help=\"Flag to indicate query groups should not be split.\",\n    )\n    args = parser.parse_args()\n\n    with gen_client(args.device) as client:\n        if args.no_split:\n            ranking_wo_split_demo(client, args)\n        else:\n            ranking_demo(client, args)\n"
  },
  {
    "path": "demo/dask/forward_logging.py",
    "content": "\"\"\"\nExample of forwarding evaluation logs to the client\n===================================================\n\nThe example runs on GPU. Two classes are defined to show how to use Dask builtins to\nforward the logs to the client process.\n\n\"\"\"\n\nimport logging\n\nimport dask\nimport distributed\nfrom dask import array as da\nfrom dask_cuda import LocalCUDACluster\nfrom distributed import Client\n\nfrom xgboost import dask as dxgb\nfrom xgboost.callback import EvaluationMonitor\n\n\ndef _get_logger() -> logging.Logger:\n    logger = logging.getLogger(\"[xgboost.dask]\")\n    logger.setLevel(logging.INFO)\n    if not logger.hasHandlers():\n        handler = logging.StreamHandler()\n        logger.addHandler(handler)\n    return logger\n\n\nclass ForwardLoggingMonitor(EvaluationMonitor):\n    def __init__(\n        self,\n        client: Client,\n        rank: int = 0,\n        period: int = 1,\n    ) -> None:\n        \"\"\"Print the evaluation result at each iteration. The default monitor in the\n        native interface logs the result to the Dask scheduler process. This class can\n        be used to forward the logging to the client process. Important: see the\n        `client` parameter for more info.\n\n        Parameters\n        ----------\n        client :\n            Distributed client. This must be the top-level client. The class uses\n            :py:meth:`distributed.Client.forward_logging` in conjunction with the Python\n            :py:mod:`logging` module to forward the evaluation results to the client\n            process. It has undefined behaviour if called in a nested task. As a result,\n            client-side logging is not enabled by default.\n\n        \"\"\"\n        client.forward_logging(_get_logger().name)\n\n        super().__init__(\n            rank=rank,\n            period=period,\n            logger=lambda msg: _get_logger().info(msg.strip()),\n        )\n\n\nclass WorkerEventMonitor(EvaluationMonitor):\n    \"\"\"Use :py:meth:`distributed.print` to forward the log. A downside is that not only\n    all clients connected to the cluster can see the log, the logs are also printed on\n    the worker. If you use a local cluster, the log is duplicated.\n\n    \"\"\"\n\n    def __init__(self, rank: int = 0, period: int = 1) -> None:\n        super().__init__(\n            rank=rank, period=period, logger=lambda msg: distributed.print(msg.strip())\n        )\n\n\ndef hist_train(\n    client: Client, X: da.Array, y: da.Array, monitor: EvaluationMonitor\n) -> da.Array:\n    # `DaskQuantileDMatrix` is used instead of `DaskDMatrix`, be careful that it can not\n    # be used for anything else other than as a training DMatrix, unless a reference is\n    # specified. See the `ref` argument of `DaskQuantileDMatrix`.\n    dtrain = dxgb.DaskQuantileDMatrix(client, X, y)\n    output = dxgb.train(\n        client,\n        # Make sure the device is set to CUDA.\n        {\"tree_method\": \"hist\", \"device\": \"cuda\"},\n        dtrain,\n        num_boost_round=4,\n        evals=[(dtrain, \"train\")],\n        # Use the monitor to forward the log.\n        callbacks=[monitor],\n        # Disable the internal logging and prefer the client-side `EvaluationMonitor`.\n        verbose_eval=False,\n    )\n    bst = output[\"booster\"]\n    history = output[\"history\"]\n\n    prediction = dxgb.predict(client, bst, X)\n    print(\"Evaluation history:\", history)\n    return prediction\n\n\nif __name__ == \"__main__\":\n    # `LocalCUDACluster` is used for assigning GPU to XGBoost processes.  Here\n    # `n_workers` represents the number of GPUs since we use one GPU per worker process.\n    with LocalCUDACluster(n_workers=2, threads_per_worker=4) as cluster:\n        # Create client from cluster, set the backend to GPU array (cupy).\n        with Client(cluster) as client, dask.config.set({\"array.backend\": \"cupy\"}):\n            # Generate some random data for demonstration\n            rng = da.random.default_rng(1)\n\n            m = 2**18\n            n = 100\n            X = rng.uniform(size=(m, n), chunks=(128**2, -1))\n            y = X.sum(axis=1)\n\n            # Use forwarding, the client must be the top client.\n            monitor: EvaluationMonitor = ForwardLoggingMonitor(client)\n            hist_train(client, X, y, monitor).compute()\n\n            # Use distributed.print, the logs in this demo are duplicated as the same\n            # log is printed in all workers along with the client.\n            monitor = WorkerEventMonitor()\n            hist_train(client, X, y, monitor).compute()\n"
  },
  {
    "path": "demo/dask/gpu_training.py",
    "content": "\"\"\"\nExample of training with Dask on GPU\n====================================\n\"\"\"\n\nimport dask\nimport dask_cudf\nfrom dask import array as da\nfrom dask import dataframe as dd\nfrom dask.distributed import Client\nfrom dask_cuda import LocalCUDACluster\n\nfrom xgboost import dask as dxgb\nfrom xgboost.dask import DaskDMatrix\n\n\ndef using_dask_matrix(client: Client, X: da.Array, y: da.Array) -> da.Array:\n    # DaskDMatrix acts like normal DMatrix, works as a proxy for local DMatrix scatter\n    # around workers.\n    dtrain = DaskDMatrix(client, X, y)\n\n    # Use train method from xgboost.dask instead of xgboost.  This distributed version\n    # of train returns a dictionary containing the resulting booster and evaluation\n    # history obtained from evaluation metrics.\n    output = dxgb.train(\n        client,\n        # Make sure the device is set to CUDA.\n        {\"tree_method\": \"hist\", \"device\": \"cuda\"},\n        dtrain,\n        num_boost_round=4,\n        evals=[(dtrain, \"train\")],\n    )\n    bst = output[\"booster\"]\n    history = output[\"history\"]\n\n    # you can pass output directly into `predict` too.\n    prediction = dxgb.predict(client, bst, dtrain)\n    print(\"Evaluation history:\", history)\n    return prediction\n\n\ndef using_quantile_device_dmatrix(client: Client, X: da.Array, y: da.Array) -> da.Array:\n    \"\"\"`DaskQuantileDMatrix` is a data type specialized for `hist` tree methods for\n     reducing memory usage.\n\n    .. versionadded:: 1.2.0\n\n    \"\"\"\n    # `DaskQuantileDMatrix` is used instead of `DaskDMatrix`, be careful that it can not\n    # be used for anything else other than training unless a reference is specified. See\n    # the `ref` argument of `DaskQuantileDMatrix`.\n    dtrain = dxgb.DaskQuantileDMatrix(client, X, y)\n    output = dxgb.train(\n        client,\n        # Make sure the device is set to CUDA.\n        {\"tree_method\": \"hist\", \"device\": \"cuda\"},\n        dtrain,\n        num_boost_round=4,\n        evals=[(dtrain, \"train\")],\n    )\n\n    prediction = dxgb.predict(client, output, X)\n    return prediction\n\n\nif __name__ == \"__main__\":\n    # `LocalCUDACluster` is used for assigning GPU to XGBoost processes.  Here\n    # `n_workers` represents the number of GPUs since we use one GPU per worker process.\n    with LocalCUDACluster(n_workers=2, threads_per_worker=4) as cluster:\n        # Create client from cluster, set the backend to GPU array (cupy).\n        with Client(cluster) as client, dask.config.set({\"array.backend\": \"cupy\"}):\n            # Generate some random data for demonstration\n            rng = da.random.default_rng(1)\n\n            m = 2**18\n            n = 100\n            X = rng.uniform(size=(m, n), chunks=(128**2, -1))\n            y = X.sum(axis=1)\n\n            X = dd.from_dask_array(X)\n            y = dd.from_dask_array(y)\n            # XGBoost can take arrays. This is to show that DataFrame uses the GPU\n            # backend as well.\n            assert isinstance(X, dask_cudf.DataFrame)\n            assert isinstance(y, dask_cudf.Series)\n\n            print(\"Using DaskQuantileDMatrix\")\n            from_ddqdm = using_quantile_device_dmatrix(client, X, y).compute()\n            print(\"Using DMatrix\")\n            from_dmatrix = using_dask_matrix(client, X, y).compute()\n"
  },
  {
    "path": "demo/dask/sklearn_cpu_training.py",
    "content": "\"\"\"\nUse scikit-learn regressor interface with CPU histogram tree method\n===================================================================\n\"\"\"\n\nfrom dask import array as da\nfrom dask.distributed import Client, LocalCluster\n\nfrom xgboost import dask as dxgb\n\n\ndef main(client: Client) -> dxgb.Booster:\n    # generate some random data for demonstration\n    n = 100\n    m = 10000\n    partition_size = 100\n    X = da.random.random((m, n), partition_size)\n    y = da.random.random(m, partition_size)\n\n    regressor = dxgb.DaskXGBRegressor(verbosity=1, n_estimators=2)\n    regressor.set_params(tree_method=\"hist\")\n    # assigning client here is optional\n    regressor.client = client\n\n    regressor.fit(X, y, eval_set=[(X, y)])\n    prediction = regressor.predict(X)\n\n    bst = regressor.get_booster()\n    history = regressor.evals_result()\n\n    print(\"Evaluation history:\", history)\n    # returned prediction is always a dask array.\n    assert isinstance(prediction, da.Array)\n    return bst  # returning the trained model\n\n\nif __name__ == \"__main__\":\n    # or use other clusters for scaling\n    with LocalCluster(n_workers=4, threads_per_worker=1) as cluster:\n        with Client(cluster) as client:\n            main(client)\n"
  },
  {
    "path": "demo/dask/sklearn_gpu_training.py",
    "content": "\"\"\"\nUse scikit-learn regressor interface with GPU histogram tree method\n===================================================================\n\"\"\"\n\nimport dask\nfrom dask import array as da\nfrom dask.distributed import Client\n\n# It's recommended to use dask_cuda for GPU assignment\nfrom dask_cuda import LocalCUDACluster\n\nfrom xgboost import dask as dxgb\n\n\ndef main(client: Client) -> dxgb.Booster:\n    # Generate some random data for demonstration\n    rng = da.random.default_rng(1)\n\n    m = 2**18\n    n = 100\n    X = rng.uniform(size=(m, n), chunks=(128**2, -1))\n    y = X.sum(axis=1)\n\n    regressor = dxgb.DaskXGBRegressor(verbosity=1)\n    # Set the device to CUDA\n    regressor.set_params(tree_method=\"hist\", device=\"cuda\")\n    # Assigning client here is optional\n    regressor.client = client\n\n    regressor.fit(X, y, eval_set=[(X, y)])\n    prediction = regressor.predict(X)\n\n    bst = regressor.get_booster()\n    history = regressor.evals_result()\n\n    print(\"Evaluation history:\", history)\n    # returned prediction is always a dask array.\n    assert isinstance(prediction, da.Array)\n    return bst  # returning the trained model\n\n\nif __name__ == \"__main__\":\n    # With dask cuda, one can scale up XGBoost to arbitrary GPU clusters.\n    # `LocalCUDACluster` used here is only for demonstration purpose.\n    with LocalCUDACluster() as cluster:\n        # Create client from cluster, set the backend to GPU array (cupy).\n        with Client(cluster) as client, dask.config.set({\"array.backend\": \"cupy\"}):\n            main(client)\n"
  },
  {
    "path": "demo/data/README.md",
    "content": "This folder contains processed example dataset used by the demos.\nCopyright of the dataset belongs to the original copyright holder\n"
  },
  {
    "path": "demo/data/gen_autoclaims.R",
    "content": "site <- 'http://cran.r-project.org'\nif (!require('dummies')) {\n    install.packages('dummies', repos = site)\n}\nif (!require('insuranceData')) {\n    install.packages('insuranceData', repos = site)\n}\n\nlibrary(dummies)\nlibrary(insuranceData)\n\ndata(AutoClaims)\ndata <- AutoClaims\n\ndata$STATE <- as.factor(data$STATE)\ndata$CLASS <- as.factor(data$CLASS)\ndata$GENDER <- as.factor(data$GENDER)\n\ndata.dummy <- dummy.data.frame(\n    data\n    , dummy.class = 'factor'\n    , omit.constants = TRUE\n)\nwrite.table(\n    data.dummy\n    , 'autoclaims.csv'\n    , sep = ','\n    , row.names = FALSE\n    , col.names = FALSE\n    , quote = FALSE\n)\n"
  },
  {
    "path": "demo/data/regression/README.md",
    "content": "Regression\n==========\n\nThe dataset is the [computer hardware dataset from UCI repository](https://archive.ics.uci.edu/ml/datasets/Computer+Hardware).\n"
  },
  {
    "path": "demo/data/regression/machine.names",
    "content": "1. Title: Relative CPU Performance Data \n\n2. Source Information\n   -- Creators: Phillip Ein-Dor and Jacob Feldmesser\n     -- Ein-Dor: Faculty of Management; Tel Aviv University; Ramat-Aviv; \n        Tel Aviv, 69978; Israel\n   -- Donor: David W. Aha (aha@ics.uci.edu) (714) 856-8779   \n   -- Date: October, 1987\n \n3. Past Usage:\n    1. Ein-Dor and Feldmesser (CACM 4/87, pp 308-317)\n       -- Results: \n          -- linear regression prediction of relative cpu performance\n          -- Recorded 34% average deviation from actual values \n    2. Kibler,D. & Aha,D. (1988).  Instance-Based Prediction of\n       Real-Valued Attributes.  In Proceedings of the CSCSI (Canadian\n       AI) Conference.\n       -- Results:\n          -- instance-based prediction of relative cpu performance\n          -- similar results; no transformations required\n    - Predicted attribute: cpu relative performance (numeric)\n\n4. Relevant Information:\n   -- The estimated relative performance values were estimated by the authors\n      using a linear regression method.  See their article (pp 308-313) for\n      more details on how the relative performance values were set.\n\n5. Number of Instances: 209 \n\n6. Number of Attributes: 10 (6 predictive attributes, 2 non-predictive, \n                             1 goal field, and the linear regression's guess)\n\n7. Attribute Information:\n   1. vendor name: 30 \n      (adviser, amdahl,apollo, basf, bti, burroughs, c.r.d, cambex, cdc, dec, \n       dg, formation, four-phase, gould, honeywell, hp, ibm, ipl, magnuson, \n       microdata, nas, ncr, nixdorf, perkin-elmer, prime, siemens, sperry, \n       sratus, wang)\n   2. Model Name: many unique symbols\n   3. MYCT: machine cycle time in nanoseconds (integer)\n   4. MMIN: minimum main memory in kilobytes (integer)\n   5. MMAX: maximum main memory in kilobytes (integer)\n   6. CACH: cache memory in kilobytes (integer)\n   7. CHMIN: minimum channels in units (integer)\n   8. CHMAX: maximum channels in units (integer)\n   9. PRP: published relative performance (integer)\n  10. ERP: estimated relative performance from the original article (integer)\n\n8. Missing Attribute Values: None\n\n9. Class Distribution: the class value (PRP) is continuously valued.\n   PRP Value Range:   Number of Instances in Range:\n   0-20               31\n   21-100             121\n   101-200            27\n   201-300            13\n   301-400            7\n   401-500            4\n   501-600            2\n   above 600          4\n\nSummary Statistics:\n\t   Min  Max   Mean    SD      PRP Correlation\n   MCYT:   17   1500  203.8   260.3   -0.3071\n   MMIN:   64   32000 2868.0  3878.7   0.7949\n   MMAX:   64   64000 11796.1 11726.6  0.8630\n   CACH:   0    256   25.2    40.6     0.6626\n   CHMIN:  0    52    4.7     6.8      0.6089\n   CHMAX:  0    176   18.2    26.0     0.6052\n   PRP:    6    1150  105.6   160.8    1.0000\n   ERP:   15    1238  99.3    154.8    0.9665\n\n"
  },
  {
    "path": "demo/data/regression/mapfeat.py",
    "content": "#!/usr/bin/env python3\n\nfo = open('machine.txt', 'w')\ncnt = 6\nfmap = {}\nfor l in open('machine.data'):\n    arr = l.split(',')\n    fo.write(arr[8])\n    for i in range(0, 6):\n        fo.write(' %d:%s' % (i, arr[i + 2]))\n\n    if arr[0] not in fmap:\n        fmap[arr[0]] = cnt\n        cnt += 1\n\n    fo.write(' %d:1' % fmap[arr[0]])\n    fo.write('\\n')\n\nfo.close()\n\n# create feature map for machine data\nfo = open('featmap.txt', 'w')\n# list from machine.names\nnames = [\n    'vendor', 'MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP'\n]\n\nfor i in range(0, 6):\n    fo.write('%d\\t%s\\tint\\n' % (i, names[i + 1]))\n\nfor v, k in sorted(fmap.items(), key=lambda x: x[1]):\n    fo.write('%d\\tvendor=%s\\ti\\n' % (k, v))\nfo.close()\n"
  },
  {
    "path": "demo/data/regression/mknfold.py",
    "content": "#!/usr/bin/env python3\n\nimport random\nimport sys\n\nif len(sys.argv) < 2:\n    print('Usage:<filename> <k> [nfold = 5]')\n    exit(0)\n\nrandom.seed(10)\n\nk = int(sys.argv[2])\nif len(sys.argv) > 3:\n    nfold = int(sys.argv[3])\nelse:\n    nfold = 5\n\nfi = open(sys.argv[1], 'r')\nftr = open(sys.argv[1] + '.train', 'w')\nfte = open(sys.argv[1] + '.test', 'w')\nfor l in fi:\n    if random.randint(1, nfold) == k:\n        fte.write(l)\n    else:\n        ftr.write(l)\n\nfi.close()\nftr.close()\nfte.close()\n"
  },
  {
    "path": "demo/guide-python/README.rst",
    "content": "XGBoost Python Feature Walkthrough\n==================================\n\n\nThis is a collection of examples for using the XGBoost Python package.\n"
  },
  {
    "path": "demo/guide-python/basic_walkthrough.py",
    "content": "\"\"\"\nGetting started with XGBoost\n============================\n\nThis is a simple example of using the native XGBoost interface, there are other\ninterfaces in the Python package like scikit-learn interface and Dask interface.\n\n\nSee :doc:`/python/python_intro` and :doc:`/tutorials/index` for other references.\n\n\"\"\"\nimport os\nimport pickle\n\nimport numpy as np\nfrom sklearn.datasets import load_svmlight_file\n\nimport xgboost as xgb\n\n# Make sure the demo knows where to load the data.\nCURRENT_DIR = os.path.dirname(os.path.abspath(__file__))\nXGBOOST_ROOT_DIR = os.path.dirname(os.path.dirname(CURRENT_DIR))\nDEMO_DIR = os.path.join(XGBOOST_ROOT_DIR, \"demo\")\n\n# X is a scipy csr matrix, XGBoost supports many other input types,\nX, y = load_svmlight_file(os.path.join(DEMO_DIR, \"data\", \"agaricus.txt.train\"))\ndtrain = xgb.DMatrix(X, y)\n# validation set\nX_test, y_test = load_svmlight_file(os.path.join(DEMO_DIR, \"data\", \"agaricus.txt.test\"))\ndtest = xgb.DMatrix(X_test, y_test)\n\n# specify parameters via map, definition are same as c++ version\nparam = {\"max_depth\": 2, \"eta\": 1, \"objective\": \"binary:logistic\"}\n\n# specify validations set to watch performance\nwatchlist = [(dtest, \"eval\"), (dtrain, \"train\")]\n# number of boosting rounds\nnum_round = 2\nbst = xgb.train(param, dtrain, num_boost_round=num_round, evals=watchlist)\n\n# run prediction\npreds = bst.predict(dtest)\nlabels = dtest.get_label()\nprint(\n    \"error=%f\"\n    % (\n        sum(1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i])\n        / float(len(preds))\n    )\n)\nbst.save_model(\"model-0.json\")\n# dump model\nbst.dump_model(\"dump.raw.txt\")\n# dump model with feature map\nbst.dump_model(\"dump.nice.txt\", os.path.join(DEMO_DIR, \"data/featmap.txt\"))\n\n# save dmatrix into binary buffer\ndtest.save_binary(\"dtest.dmatrix\")\n# save model\nbst.save_model(\"model-1.json\")\n# load model and data in\nbst2 = xgb.Booster(model_file=\"model-1.json\")\ndtest2 = xgb.DMatrix(\"dtest.dmatrix\")\npreds2 = bst2.predict(dtest2)\n# assert they are the same\nassert np.sum(np.abs(preds2 - preds)) == 0\n\n# alternatively, you can pickle the booster\npks = pickle.dumps(bst2)\n# load model and data in\nbst3 = pickle.loads(pks)\npreds3 = bst3.predict(dtest2)\n# assert they are the same\nassert np.sum(np.abs(preds3 - preds)) == 0\n"
  },
  {
    "path": "demo/guide-python/boost_from_prediction.py",
    "content": "\"\"\"\nDemo for boosting from prediction\n=================================\n\"\"\"\nimport os\n\nimport xgboost as xgb\n\nCURRENT_DIR = os.path.dirname(__file__)\ndtrain = xgb.DMatrix(\n    os.path.join(CURRENT_DIR, \"../data/agaricus.txt.train?format=libsvm\")\n)\ndtest = xgb.DMatrix(\n    os.path.join(CURRENT_DIR, \"../data/agaricus.txt.test?format=libsvm\")\n)\nwatchlist = [(dtest, \"eval\"), (dtrain, \"train\")]\n###\n# advanced: start from a initial base prediction\n#\nprint(\"start running example to start from a initial prediction\")\n# specify parameters via map, definition are same as c++ version\nparam = {\"max_depth\": 2, \"eta\": 1, \"objective\": \"binary:logistic\"}\n# train xgboost for 1 round\nbst = xgb.train(param, dtrain, 1, watchlist)\n# Note: we need the margin value instead of transformed prediction in\n# set_base_margin\n# do predict with output_margin=True, will always give you margin values\n# before logistic transformation\nptrain = bst.predict(dtrain, output_margin=True)\nptest = bst.predict(dtest, output_margin=True)\ndtrain.set_base_margin(ptrain)\ndtest.set_base_margin(ptest)\n\nprint(\"this is result of running from initial prediction\")\nbst = xgb.train(param, dtrain, 1, watchlist)\n"
  },
  {
    "path": "demo/guide-python/callbacks.py",
    "content": "\"\"\"\nDemo for using and defining callback functions\n==============================================\n\n    .. versionadded:: 1.3.0\n\"\"\"\n\nimport argparse\nimport os\nimport tempfile\nfrom typing import Dict\n\nimport numpy as np\nfrom matplotlib import pyplot as plt\nfrom sklearn.datasets import load_breast_cancer\nfrom sklearn.model_selection import train_test_split\n\nimport xgboost as xgb\n\n\nclass Plotting(xgb.callback.TrainingCallback):\n    \"\"\"Plot evaluation result during training.  Only for demonstration purpose as it's\n    quite slow to draw using matplotlib.\n\n    \"\"\"\n\n    def __init__(self, rounds: int) -> None:\n        self.fig = plt.figure()\n        self.ax = self.fig.add_subplot(111)\n        self.rounds = rounds\n        self.lines: Dict[str, plt.Line2D] = {}\n        self.fig.show()\n        self.x = np.linspace(0, self.rounds, self.rounds)\n        plt.ion()\n\n    def _get_key(self, data: str, metric: str) -> str:\n        return f\"{data}-{metric}\"\n\n    def after_iteration(\n        self, model: xgb.Booster, epoch: int, evals_log: Dict[str, dict]\n    ) -> bool:\n        \"\"\"Update the plot.\"\"\"\n        if not self.lines:\n            for data, metric in evals_log.items():\n                for metric_name, log in metric.items():\n                    key = self._get_key(data, metric_name)\n                    expanded = log + [0] * (self.rounds - len(log))\n                    (self.lines[key],) = self.ax.plot(self.x, expanded, label=key)\n                    self.ax.legend()\n        else:\n            # https://pythonspot.com/matplotlib-update-plot/\n            for data, metric in evals_log.items():\n                for metric_name, log in metric.items():\n                    key = self._get_key(data, metric_name)\n                    expanded = log + [0] * (self.rounds - len(log))\n                    self.lines[key].set_ydata(expanded)\n            self.fig.canvas.draw()\n        # False to indicate training should not stop.\n        return False\n\n\ndef custom_callback() -> None:\n    \"\"\"Demo for defining a custom callback function that plots evaluation result during\n    training.\"\"\"\n    X, y = load_breast_cancer(return_X_y=True)\n    X_train, X_valid, y_train, y_valid = train_test_split(X, y, random_state=0)\n\n    D_train = xgb.DMatrix(X_train, y_train)\n    D_valid = xgb.DMatrix(X_valid, y_valid)\n\n    num_boost_round = 100\n    plotting = Plotting(num_boost_round)\n\n    # Pass it to the `callbacks` parameter as a list.\n    xgb.train(\n        {\n            \"objective\": \"binary:logistic\",\n            \"eval_metric\": [\"error\", \"rmse\"],\n            \"tree_method\": \"hist\",\n            \"device\": \"cuda\",\n        },\n        D_train,\n        evals=[(D_train, \"Train\"), (D_valid, \"Valid\")],\n        num_boost_round=num_boost_round,\n        callbacks=[plotting],\n    )\n\n\ndef check_point_callback() -> None:\n    \"\"\"Demo for using the checkpoint callback. Custom logic for handling output is\n    usually required and users are encouraged to define their own callback for\n    checkpointing operations. The builtin one can be used as a starting point.\n\n    \"\"\"\n    # Only for demo, set a larger value (like 100) in practice as checkpointing is quite\n    # slow.\n    rounds = 2\n\n    def check(as_pickle: bool) -> None:\n        for i in range(0, 10, rounds):\n            if i == 0:\n                continue\n            if as_pickle:\n                path = os.path.join(tmpdir, \"model_\" + str(i) + \".pkl\")\n            else:\n                path = os.path.join(\n                    tmpdir,\n                    f\"model_{i}.{xgb.callback.TrainingCheckPoint.default_format}\",\n                )\n            assert os.path.exists(path)\n\n    X, y = load_breast_cancer(return_X_y=True)\n    m = xgb.DMatrix(X, y)\n    # Check point to a temporary directory for demo\n    with tempfile.TemporaryDirectory() as tmpdir:\n        # Use callback class from xgboost.callback\n        # Feel free to subclass/customize it to suit your need.\n        check_point = xgb.callback.TrainingCheckPoint(\n            directory=tmpdir, interval=rounds, name=\"model\"\n        )\n        xgb.train(\n            {\"objective\": \"binary:logistic\"},\n            m,\n            num_boost_round=10,\n            verbose_eval=False,\n            callbacks=[check_point],\n        )\n        check(False)\n\n        # This version of checkpoint saves everything including parameters and\n        # model.  See: doc/tutorials/saving_model.rst\n        check_point = xgb.callback.TrainingCheckPoint(\n            directory=tmpdir, interval=rounds, as_pickle=True, name=\"model\"\n        )\n        xgb.train(\n            {\"objective\": \"binary:logistic\"},\n            m,\n            num_boost_round=10,\n            verbose_eval=False,\n            callbacks=[check_point],\n        )\n        check(True)\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"--plot\", default=1, type=int)\n    args = parser.parse_args()\n\n    check_point_callback()\n\n    if args.plot:\n        custom_callback()\n"
  },
  {
    "path": "demo/guide-python/cat_in_the_dat.py",
    "content": "\"\"\"\nTrain XGBoost with cat_in_the_dat dataset\n=========================================\n\nA simple demo for categorical data support using dataset from Kaggle categorical data\ntutorial.\n\nThe excellent tutorial is at:\nhttps://www.kaggle.com/shahules/an-overview-of-encoding-techniques\n\nAnd the data can be found at:\nhttps://www.kaggle.com/shahules/an-overview-of-encoding-techniques/data\n\n  .. versionadded:: 1.6.0\n\nSee Also\n--------\n- :doc:`Tutorial </tutorials/categorical>`\n- :ref:`sphx_glr_python_examples_categorical.py`\n- :ref:`sphx_glr_python_examples_cat_pipeline.py`\n\n\"\"\"\n\nfrom __future__ import annotations\n\nimport os\nfrom tempfile import TemporaryDirectory\nfrom time import time\n\nimport pandas as pd\nfrom sklearn.metrics import roc_auc_score\nfrom sklearn.model_selection import train_test_split\n\nimport xgboost as xgb\n\n\ndef load_cat_in_the_dat() -> tuple[pd.DataFrame, pd.Series]:\n    \"\"\"Assuming you have already downloaded the data into `input` directory.\"\"\"\n\n    df_train = pd.read_csv(\"./input/cat-in-the-dat/train.csv\")\n\n    print(\n        \"train data set has got {} rows and {} columns\".format(\n            df_train.shape[0], df_train.shape[1]\n        )\n    )\n    X = df_train.drop([\"target\"], axis=1)\n    y = df_train[\"target\"]\n\n    for i in range(0, 5):\n        X[\"bin_\" + str(i)] = X[\"bin_\" + str(i)].astype(\"category\")\n\n    for i in range(0, 5):\n        X[\"nom_\" + str(i)] = X[\"nom_\" + str(i)].astype(\"category\")\n\n    for i in range(5, 10):\n        X[\"nom_\" + str(i)] = X[\"nom_\" + str(i)].apply(int, base=16)\n\n    for i in range(0, 6):\n        X[\"ord_\" + str(i)] = X[\"ord_\" + str(i)].astype(\"category\")\n\n    print(\n        \"train data set has got {} rows and {} columns\".format(X.shape[0], X.shape[1])\n    )\n    return X, y\n\n\nparams = {\n    \"tree_method\": \"hist\",\n    \"device\": \"cuda\",\n    \"n_estimators\": 32,\n    \"colsample_bylevel\": 0.7,\n}\n\n\ndef categorical_model(X: pd.DataFrame, y: pd.Series, output_dir: str) -> None:\n    \"\"\"Train using builtin categorical data support from XGBoost\"\"\"\n    X_train, X_test, y_train, y_test = train_test_split(\n        X, y, random_state=1994, test_size=0.2\n    )\n    # Be aware that the encoding for X_train and X_test are the same here. In practice,\n    # we should try to use an encoder like (sklearn OrdinalEncoder) to obtain the\n    # categorical values.\n\n    # Specify `enable_categorical` to True.\n    clf = xgb.XGBClassifier(\n        **params,\n        eval_metric=\"auc\",\n        enable_categorical=True,\n        max_cat_to_onehot=1,  # We use optimal partitioning exclusively\n    )\n    clf.fit(X_train, y_train, eval_set=[(X_test, y_test), (X_train, y_train)])\n    clf.save_model(os.path.join(output_dir, \"categorical.json\"))\n\n    y_score = clf.predict_proba(X_test)[:, 1]  # proba of positive samples\n    auc = roc_auc_score(y_test, y_score)\n    print(\"AUC of using builtin categorical data support:\", auc)\n\n\ndef onehot_encoding_model(X: pd.DataFrame, y: pd.Series, output_dir: str) -> None:\n    \"\"\"Train using one-hot encoded data.\"\"\"\n    X_train, X_test, y_train, y_test = train_test_split(\n        X, y, random_state=42, test_size=0.2\n    )\n    # Specify `enable_categorical` to False as we are using encoded data.\n    clf = xgb.XGBClassifier(**params, eval_metric=\"auc\", enable_categorical=False)\n    clf.fit(\n        X_train,\n        y_train,\n        eval_set=[(X_test, y_test), (X_train, y_train)],\n    )\n    clf.save_model(os.path.join(output_dir, \"one-hot.json\"))\n\n    y_score = clf.predict_proba(X_test)[:, 1]  # proba of positive samples\n    auc = roc_auc_score(y_test, y_score)\n    print(\"AUC of using onehot encoding:\", auc)\n\n\nif __name__ == \"__main__\":\n    X, y = load_cat_in_the_dat()\n\n    with TemporaryDirectory() as tmpdir:\n        start = time()\n        categorical_model(X, y, tmpdir)\n        end = time()\n        print(\"Duration:categorical\", end - start)\n\n        X = pd.get_dummies(X)\n        start = time()\n        onehot_encoding_model(X, y, tmpdir)\n        end = time()\n        print(\"Duration:onehot\", end - start)\n"
  },
  {
    "path": "demo/guide-python/cat_pipeline.py",
    "content": "\"\"\"\nFeature engineering pipeline for categorical data\n=================================================\n\nThe script showcases how to keep the categorical data encoding consistent across\ntraining and inference. There are many ways to attain the same goal, this script can be\nused as a starting point.\n\n.. versionchanged:: 3.1\n\n    Start with 3.1, users don't need this for most of the cases. See :ref:`cat-recode`\n    for more info.\n\nSee Also\n--------\n- :doc:`Tutorial </tutorials/categorical>`\n- :ref:`sphx_glr_python_examples_categorical.py`\n- :ref:`sphx_glr_python_examples_cat_in_the_dat.py`\n\n\"\"\"\n\nfrom typing import List, Tuple\n\nimport numpy as np\nimport pandas as pd\nfrom sklearn.compose import make_column_selector, make_column_transformer\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import OrdinalEncoder\n\nimport xgboost as xgb\n\n\ndef make_example_data() -> Tuple[pd.DataFrame, pd.Series, List[str]]:\n    \"\"\"Generate data for demo.\"\"\"\n    n_samples = 2048\n    rng = np.random.default_rng(1994)\n\n    # We have three categorical features, while the rest are numerical.\n    categorical_features = [\"brand_id\", \"retailer_id\", \"category_id\"]\n\n    df = pd.DataFrame(\n        np.random.randint(32, 96, size=(n_samples, 3)),\n        columns=categorical_features,\n    )\n\n    df[\"price\"] = rng.integers(100, 200, size=(n_samples,))\n    df[\"stock_status\"] = rng.choice([True, False], n_samples)\n    df[\"on_sale\"] = rng.choice([True, False], n_samples)\n    df[\"label\"] = rng.normal(loc=0.0, scale=1.0, size=n_samples)\n\n    X = df.drop([\"label\"], axis=1)\n    y = df[\"label\"]\n\n    return X, y, categorical_features\n\n\ndef native() -> None:\n    \"\"\"Using the native XGBoost interface.\"\"\"\n    X, y, cat_feats = make_example_data()\n\n    X_train, X_test, y_train, y_test = train_test_split(\n        X, y, random_state=1994, test_size=0.2\n    )\n\n    # Create an encoder based on training data.\n    enc = OrdinalEncoder(handle_unknown=\"use_encoded_value\", unknown_value=np.nan)\n    enc.set_output(transform=\"pandas\")\n    enc = enc.fit(X_train[cat_feats])\n\n    def enc_transform(X: pd.DataFrame) -> pd.DataFrame:\n        # don't make change inplace so that we can have demonstrations for encoding\n        X = X.copy()\n        cat_cols = enc.transform(X[cat_feats])\n        for i, name in enumerate(cat_feats):\n            # create pd.Series based on the encoder\n            cat_cols[name] = pd.Categorical.from_codes(\n                codes=cat_cols[name].astype(np.int32), categories=enc.categories_[i]\n            )\n        X[cat_feats] = cat_cols\n        return X\n\n    # Encode the data based on fitted encoder.\n    X_train_enc = enc_transform(X_train)\n    X_test_enc = enc_transform(X_test)\n    # Train XGBoost model using the native interface.\n    Xy_train = xgb.QuantileDMatrix(X_train_enc, y_train, enable_categorical=True)\n    Xy_test = xgb.QuantileDMatrix(\n        X_test_enc, y_test, enable_categorical=True, ref=Xy_train\n    )\n    booster = xgb.train({}, Xy_train)\n    booster.predict(Xy_test)\n\n    # Following shows that data are encoded consistently.\n\n    # We first obtain result from newly encoded data\n    predt0 = booster.inplace_predict(enc_transform(X_train.head(16)))\n    # then we obtain result from already encoded data from training.\n    predt1 = booster.inplace_predict(X_train_enc.head(16))\n\n    np.testing.assert_allclose(predt0, predt1)\n\n\ndef pipeline() -> None:\n    \"\"\"Using the sklearn pipeline.\"\"\"\n    X, y, cat_feats = make_example_data()\n\n    X_train, X_test, y_train, y_test = train_test_split(\n        X, y, random_state=3, test_size=0.2\n    )\n\n    enc = make_column_transformer(\n        (\n            OrdinalEncoder(handle_unknown=\"use_encoded_value\", unknown_value=np.nan),\n            # all categorical feature names end with \"_id\"\n            make_column_selector(pattern=\".*_id\"),\n        ),\n        remainder=\"passthrough\",\n        verbose_feature_names_out=False,\n    )\n    # No need to set pandas output, we use `feature_types` to indicate the type of\n    # features.\n\n    # enc.set_output(transform=\"pandas\")\n\n    feature_types = [\"c\" if fn in cat_feats else \"q\" for fn in X_train.columns]\n    reg = xgb.XGBRegressor(\n        feature_types=feature_types, enable_categorical=True, n_estimators=10\n    )\n    p = make_pipeline(enc, reg)\n    p.fit(X_train, y_train)\n    # check XGBoost is using the feature type correctly.\n    model_types = reg.get_booster().feature_types\n    assert model_types is not None\n    for a, b in zip(model_types, feature_types):\n        assert a == b\n\n    # Following shows that data are encoded consistently.\n\n    # We first create a slice of data that doesn't contain all the categories\n    predt0 = p.predict(X_train.iloc[:16, :])\n    # Then we use the dataframe that contains all the categories\n    predt1 = p.predict(X_train)[:16]\n\n    # The resulting encoding is the same\n    np.testing.assert_allclose(predt0, predt1)\n\n\nif __name__ == \"__main__\":\n    pipeline()\n    native()\n"
  },
  {
    "path": "demo/guide-python/categorical.py",
    "content": "\"\"\"\nGetting started with categorical data\n=====================================\n\nExperimental support for categorical data.\n\nIn before, users need to run an encoder themselves before passing the data into XGBoost,\nwhich creates a sparse matrix and potentially increase memory usage.  This demo\nshowcases the experimental categorical data support, more advanced features are planned.\n\n  .. versionadded:: 1.5.0\n\nSee Also\n--------\n- :doc:`Tutorial </tutorials/categorical>`\n- :ref:`sphx_glr_python_examples_cat_in_the_dat.py`\n- :ref:`sphx_glr_python_examples_cat_pipeline.py`\n\n\"\"\"\n\nfrom typing import Tuple\n\nimport numpy as np\nimport pandas as pd\n\nimport xgboost as xgb\n\n\ndef make_categorical(\n    n_samples: int, n_features: int, n_categories: int, onehot: bool\n) -> Tuple[pd.DataFrame, pd.Series]:\n    \"\"\"Make some random data for demo.\"\"\"\n    rng = np.random.RandomState(1994)\n\n    pd_dict = {}\n    for i in range(n_features + 1):\n        c = rng.randint(low=0, high=n_categories, size=n_samples)\n        pd_dict[str(i)] = pd.Series(c, dtype=np.int64)\n\n    df = pd.DataFrame(pd_dict)\n    label = df.iloc[:, 0]\n    df = df.iloc[:, 1:]\n    for i in range(0, n_features):\n        label += df.iloc[:, i]\n    label += 1\n\n    df = df.astype(\"category\")\n    categories = np.arange(0, n_categories)\n    for col in df.columns:\n        df[col] = df[col].cat.set_categories(categories)\n\n    if onehot:\n        return pd.get_dummies(df), label\n    return df, label\n\n\ndef main() -> None:\n    # Use builtin categorical data support\n\n    # For scikit-learn interface, the input data should be pandas DataFrame or cudf\n    # DataFrame with categorical features. If an numpy/cupy array is used instead, the\n    # `feature_types` for `XGBRegressor` should be set accordingly.\n    X, y = make_categorical(100, 10, 4, False)\n    # Specify `enable_categorical` to True, also we use onehot-encoding-based split here\n    # for demonstration. For details see the document of `max_cat_to_onehot`.\n    reg = xgb.XGBRegressor(\n        tree_method=\"hist\", enable_categorical=True, max_cat_to_onehot=5, device=\"cuda\"\n    )\n    reg.fit(X, y, eval_set=[(X, y)])\n\n    # Pass in already encoded data\n    X_enc, y_enc = make_categorical(100, 10, 4, True)\n    reg_enc = xgb.XGBRegressor(tree_method=\"hist\", device=\"cuda\")\n    reg_enc.fit(X_enc, y_enc, eval_set=[(X_enc, y_enc)])\n\n    reg_results = np.array(reg.evals_result()[\"validation_0\"][\"rmse\"])\n    reg_enc_results = np.array(reg_enc.evals_result()[\"validation_0\"][\"rmse\"])\n\n    # Check that they have same results\n    np.testing.assert_allclose(reg_results, reg_enc_results)\n\n    # Convert to DMatrix for SHAP value\n    booster: xgb.Booster = reg.get_booster()\n    m = xgb.DMatrix(X, enable_categorical=True)  # specify categorical data support.\n    SHAP = booster.predict(m, pred_contribs=True)\n    margin = booster.predict(m, output_margin=True)\n    np.testing.assert_allclose(\n        np.sum(SHAP, axis=len(SHAP.shape) - 1), margin, rtol=1e-3\n    )\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "demo/guide-python/continuation.py",
    "content": "\"\"\"\nDemo for training continuation\n==============================\n\"\"\"\n\nimport os\nimport pickle\nimport tempfile\n\nfrom sklearn.datasets import load_breast_cancer\n\nimport xgboost\n\n\ndef training_continuation(tmpdir: str, use_pickle: bool) -> None:\n    \"\"\"Basic training continuation.\"\"\"\n    # Train 128 iterations in 1 session\n    X, y = load_breast_cancer(return_X_y=True)\n    clf = xgboost.XGBClassifier(n_estimators=128, eval_metric=\"logloss\")\n    clf.fit(X, y, eval_set=[(X, y)])\n    print(\"Total boosted rounds:\", clf.get_booster().num_boosted_rounds())\n\n    # Train 128 iterations in 2 sessions, with the first one runs for 32 iterations and\n    # the second one runs for 96 iterations\n    clf = xgboost.XGBClassifier(n_estimators=32, eval_metric=\"logloss\")\n    clf.fit(X, y, eval_set=[(X, y)])\n    assert clf.get_booster().num_boosted_rounds() == 32\n\n    # load back the model, this could be a checkpoint\n    if use_pickle:\n        path = os.path.join(tmpdir, \"model-first-32.pkl\")\n        with open(path, \"wb\") as fd:\n            pickle.dump(clf, fd)\n        with open(path, \"rb\") as fd:\n            loaded = pickle.load(fd)\n    else:\n        path = os.path.join(tmpdir, \"model-first-32.json\")\n        clf.save_model(path)\n        loaded = xgboost.XGBClassifier()\n        loaded.load_model(path)\n\n    clf = xgboost.XGBClassifier(n_estimators=128 - 32, eval_metric=\"logloss\")\n    clf.fit(X, y, eval_set=[(X, y)], xgb_model=loaded)\n\n    print(\"Total boosted rounds:\", clf.get_booster().num_boosted_rounds())\n\n    assert clf.get_booster().num_boosted_rounds() == 128\n\n\ndef training_continuation_early_stop(tmpdir: str, use_pickle: bool) -> None:\n    \"\"\"Training continuation with early stopping.\"\"\"\n    early_stopping_rounds = 5\n    early_stop = xgboost.callback.EarlyStopping(\n        rounds=early_stopping_rounds, save_best=True\n    )\n    n_estimators = 512\n\n    X, y = load_breast_cancer(return_X_y=True)\n    clf = xgboost.XGBClassifier(\n        n_estimators=n_estimators, eval_metric=\"logloss\", callbacks=[early_stop]\n    )\n    clf.fit(X, y, eval_set=[(X, y)])\n    print(\"Total boosted rounds:\", clf.get_booster().num_boosted_rounds())\n    best = clf.best_iteration\n\n    # Train 512 iterations in 2 sessions, with the first one runs for 128 iterations and\n    # the second one runs until early stop.\n    clf = xgboost.XGBClassifier(\n        n_estimators=128, eval_metric=\"logloss\", callbacks=[early_stop]\n    )\n    # Reinitialize the early stop callback\n    early_stop = xgboost.callback.EarlyStopping(\n        rounds=early_stopping_rounds, save_best=True\n    )\n    clf.set_params(callbacks=[early_stop])\n    clf.fit(X, y, eval_set=[(X, y)])\n    assert clf.get_booster().num_boosted_rounds() == 128\n\n    # load back the model, this could be a checkpoint\n    if use_pickle:\n        path = os.path.join(tmpdir, \"model-first-128.pkl\")\n        with open(path, \"wb\") as fd:\n            pickle.dump(clf, fd)\n        with open(path, \"rb\") as fd:\n            loaded = pickle.load(fd)\n    else:\n        path = os.path.join(tmpdir, \"model-first-128.json\")\n        clf.save_model(path)\n        loaded = xgboost.XGBClassifier()\n        loaded.load_model(path)\n\n    early_stop = xgboost.callback.EarlyStopping(\n        rounds=early_stopping_rounds, save_best=True\n    )\n    clf = xgboost.XGBClassifier(\n        n_estimators=n_estimators - 128, eval_metric=\"logloss\", callbacks=[early_stop]\n    )\n    clf.fit(\n        X,\n        y,\n        eval_set=[(X, y)],\n        xgb_model=loaded,\n    )\n\n    print(\"Total boosted rounds:\", clf.get_booster().num_boosted_rounds())\n    assert clf.best_iteration == best\n\n\nif __name__ == \"__main__\":\n    with tempfile.TemporaryDirectory() as tmpdir:\n        training_continuation_early_stop(tmpdir, False)\n        training_continuation_early_stop(tmpdir, True)\n\n        training_continuation(tmpdir, True)\n        training_continuation(tmpdir, False)\n"
  },
  {
    "path": "demo/guide-python/cover_type.py",
    "content": "\"\"\"\nUsing xgboost on GPU devices\n============================\n\nShows how to train a model on the `forest cover type\n<https://archive.ics.uci.edu/ml/datasets/covertype>`_ dataset using GPU\nacceleration. The forest cover type dataset has 581,012 rows and 54 features, making it\ntime consuming to process. We compare the run-time and accuracy of the GPU and CPU\nhistogram algorithms.\n\nIn addition, The demo showcases using GPU with other GPU-related libraries including\ncupy and cuml. These libraries are not strictly required.\n\n\"\"\"\nimport time\n\nimport cupy as cp\nfrom cuml.model_selection import train_test_split\nfrom sklearn.datasets import fetch_covtype\n\nimport xgboost as xgb\n\n# Fetch dataset using sklearn\nX, y = fetch_covtype(return_X_y=True)\nX = cp.array(X)\ny = cp.array(y)\ny -= y.min()\n\n# Create 0.75/0.25 train/test split\nX_train, X_test, y_train, y_test = train_test_split(\n    X, y, test_size=0.25, train_size=0.75, random_state=42\n)\n\n# Specify sufficient boosting iterations to reach a minimum\nnum_round = 3000\n\n# Leave most parameters as default\nclf = xgb.XGBClassifier(device=\"cuda\", n_estimators=num_round)\n# Train model\nstart = time.time()\nclf.fit(X_train, y_train, eval_set=[(X_test, y_test)])\ngpu_res = clf.evals_result()\nprint(\"GPU Training Time: %s seconds\" % (str(time.time() - start)))\n\n# Repeat for CPU algorithm\nclf = xgb.XGBClassifier(device=\"cpu\", n_estimators=num_round)\nstart = time.time()\nclf.fit(X_train, y_train, eval_set=[(X_test, y_test)])\ncpu_res = clf.evals_result()\nprint(\"CPU Training Time: %s seconds\" % (str(time.time() - start)))\n"
  },
  {
    "path": "demo/guide-python/cross_validation.py",
    "content": "\"\"\"\nDemo for using cross validation\n===============================\n\"\"\"\n\nimport os\nfrom typing import Any, Dict, Tuple\n\nimport numpy as np\n\nimport xgboost as xgb\n\n# load data in do training\nCURRENT_DIR = os.path.dirname(__file__)\ndtrain = xgb.DMatrix(\n    os.path.join(CURRENT_DIR, \"../data/agaricus.txt.train?format=libsvm\")\n)\nparam = {\"max_depth\": 2, \"eta\": 1, \"objective\": \"binary:logistic\"}\nnum_round = 2\n\nprint(\"running cross validation\")\n# do cross validation, this will print result out as\n# [iteration]  metric_name:mean_value+std_value\n# std_value is standard deviation of the metric\nxgb.cv(\n    param,\n    dtrain,\n    num_round,\n    nfold=5,\n    metrics={\"error\"},\n    seed=0,\n    callbacks=[xgb.callback.EvaluationMonitor(show_stdv=True)],\n)\n\nprint(\"running cross validation, disable standard deviation display\")\n# do cross validation, this will print result out as\n# [iteration]  metric_name:mean_value\nres = xgb.cv(\n    param,\n    dtrain,\n    num_boost_round=10,\n    nfold=5,\n    metrics={\"error\"},\n    seed=0,\n    callbacks=[\n        xgb.callback.EvaluationMonitor(show_stdv=False),\n        xgb.callback.EarlyStopping(3),\n    ],\n)\nprint(res)\nprint(\"running cross validation, with preprocessing function\")\n\n\n# define the preprocessing function\n# used to return the preprocessed training, test data, and parameter\n# we can use this to do weight rescale, etc.\n# as a example, we try to set scale_pos_weight\ndef fpreproc(\n    dtrain: xgb.DMatrix, dtest: xgb.DMatrix, param: Any\n) -> Tuple[xgb.DMatrix, xgb.DMatrix, Dict[str, Any]]:\n    label = dtrain.get_label()\n    ratio = float(np.sum(label == 0)) / np.sum(label == 1)\n    param[\"scale_pos_weight\"] = ratio\n    return (dtrain, dtest, param)\n\n\n# do cross validation, for each fold\n# the dtrain, dtest, param will be passed into fpreproc\n# then the return value of fpreproc will be used to generate\n# results of that fold\nxgb.cv(param, dtrain, num_round, nfold=5, metrics={\"auc\"}, seed=0, fpreproc=fpreproc)\n\n###\n# you can also do cross validation with customized loss function\n# See custom_objective.py\n##\nprint(\"running cross validation, with customized loss function\")\n\n\ndef logregobj(preds: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[np.ndarray, np.ndarray]:\n    labels = dtrain.get_label()\n    preds = 1.0 / (1.0 + np.exp(-preds))\n    grad = preds - labels\n    hess = preds * (1.0 - preds)\n    return grad, hess\n\n\ndef evalerror(preds: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, float]:\n    labels = dtrain.get_label()\n    preds = 1.0 / (1.0 + np.exp(-preds))\n    return \"error\", float(sum(labels != (preds > 0.0))) / len(labels)\n\n\nparam = {\"max_depth\": 2, \"eta\": 1}\n# train with customized objective\nxgb.cv(\n    param, dtrain, num_round, nfold=5, seed=0, obj=logregobj, custom_metric=evalerror\n)\n"
  },
  {
    "path": "demo/guide-python/custom_rmsle.py",
    "content": "\"\"\"\nDemo for defining a custom regression objective and metric\n==========================================================\n\nDemo for defining customized metric and objective.  Notice that for simplicity reason\nweight is not used in following example. In this script, we implement the Squared Log\nError (SLE) objective and RMSLE metric as customized functions, then compare it with\nnative implementation in XGBoost.\n\nSee :doc:`/tutorials/custom_metric_obj` for a step by step walkthrough, with other\ndetails.\n\nThe `SLE` objective reduces impact of outliers in training dataset, hence here we also\ncompare its performance with standard squared error.\n\n\"\"\"\nimport argparse\nfrom time import time\nfrom typing import Dict, List, Tuple\n\nimport numpy as np\nfrom matplotlib import pyplot as plt\n\nimport xgboost as xgb\n\n# shape of generated data.\nkRows = 4096\nkCols = 16\n\nkOutlier = 10000                # mean of generated outliers\nkNumberOfOutliers = 64\n\nkRatio = 0.7\nkSeed = 1994\n\nkBoostRound = 20\n\nnp.random.seed(seed=kSeed)\n\n\ndef generate_data() -> Tuple[xgb.DMatrix, xgb.DMatrix]:\n    '''Generate data containing outliers.'''\n    x = np.random.randn(kRows, kCols)\n    y = np.random.randn(kRows)\n    y += np.abs(np.min(y))\n\n    # Create outliers\n    for i in range(0, kNumberOfOutliers):\n        ind = np.random.randint(0, len(y)-1)\n        y[ind] += np.random.randint(0, kOutlier)\n\n    train_portion = int(kRows * kRatio)\n\n    # rmsle requires all label be greater than -1.\n    assert np.all(y > -1.0)\n\n    train_x: np.ndarray = x[: train_portion]\n    train_y: np.ndarray = y[: train_portion]\n    dtrain = xgb.DMatrix(train_x, label=train_y)\n\n    test_x = x[train_portion:]\n    test_y = y[train_portion:]\n    dtest = xgb.DMatrix(test_x, label=test_y)\n    return dtrain, dtest\n\n\ndef native_rmse(dtrain: xgb.DMatrix,\n                dtest: xgb.DMatrix) -> Dict[str, Dict[str, List[float]]]:\n    '''Train using native implementation of Root Mean Squared Loss.'''\n    print('Squared Error')\n    squared_error = {\n        'objective': 'reg:squarederror',\n        'eval_metric': 'rmse',\n        'tree_method': 'hist',\n        'seed': kSeed\n    }\n    start = time()\n    results: Dict[str, Dict[str, List[float]]] = {}\n    xgb.train(squared_error,\n              dtrain=dtrain,\n              num_boost_round=kBoostRound,\n              evals=[(dtrain, 'dtrain'), (dtest, 'dtest')],\n              evals_result=results)\n    print('Finished Squared Error in:', time() - start, '\\n')\n    return results\n\n\ndef native_rmsle(dtrain: xgb.DMatrix,\n                 dtest: xgb.DMatrix) -> Dict[str, Dict[str, List[float]]]:\n    '''Train using native implementation of Squared Log Error.'''\n    print('Squared Log Error')\n    results: Dict[str, Dict[str, List[float]]] = {}\n    squared_log_error = {\n        'objective': 'reg:squaredlogerror',\n        'eval_metric': 'rmsle',\n        'tree_method': 'hist',\n        'seed': kSeed\n    }\n    start = time()\n    xgb.train(squared_log_error,\n              dtrain=dtrain,\n              num_boost_round=kBoostRound,\n              evals=[(dtrain, 'dtrain'), (dtest, 'dtest')],\n              evals_result=results)\n    print('Finished Squared Log Error in:', time() - start)\n    return results\n\n\ndef py_rmsle(dtrain: xgb.DMatrix, dtest: xgb.DMatrix) -> Dict:\n    '''Train using Python implementation of Squared Log Error.'''\n    def gradient(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:\n        '''Compute the gradient squared log error.'''\n        y = dtrain.get_label()\n        return (np.log1p(predt) - np.log1p(y)) / (predt + 1)\n\n    def hessian(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:\n        '''Compute the hessian for squared log error.'''\n        y = dtrain.get_label()\n        return ((-np.log1p(predt) + np.log1p(y) + 1) /\n                np.power(predt + 1, 2))\n\n    def squared_log(predt: np.ndarray,\n                    dtrain: xgb.DMatrix) -> Tuple[np.ndarray, np.ndarray]:\n        '''Squared Log Error objective. A simplified version for RMSLE used as\n        objective function.\n\n        :math:`\\frac{1}{2}[log(pred + 1) - log(label + 1)]^2`\n\n        '''\n        predt[predt < -1] = -1 + 1e-6\n        grad = gradient(predt, dtrain)\n        hess = hessian(predt, dtrain)\n        return grad, hess\n\n    def rmsle(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, float]:\n        ''' Root mean squared log error metric.\n\n        :math:`\\\\sqrt{\\frac{1}{N}[log(pred + 1) - log(label + 1)]^2}`\n        '''\n        y = dtrain.get_label()\n        predt[predt < -1] = -1 + 1e-6\n        elements = np.power(np.log1p(y) - np.log1p(predt), 2)\n        return 'PyRMSLE', float(np.sqrt(np.sum(elements) / len(y)))\n\n    results: Dict[str, Dict[str, List[float]]] = {}\n    xgb.train({'tree_method': 'hist', 'seed': kSeed,\n               'disable_default_eval_metric': 1},\n              dtrain=dtrain,\n              num_boost_round=kBoostRound,\n              obj=squared_log,\n              custom_metric=rmsle,\n              evals=[(dtrain, 'dtrain'), (dtest, 'dtest')],\n              evals_result=results)\n\n    return results\n\n\ndef plot_history(\n    rmse_evals: Dict[str, Dict],\n    rmsle_evals: Dict[str, Dict],\n    py_rmsle_evals: Dict[str, Dict]\n) -> None:\n    fig, axs = plt.subplots(3, 1)\n    assert isinstance(axs, np.ndarray)\n    ax0 = axs[0]\n    ax1 = axs[1]\n    ax2 = axs[2]\n\n    x = np.arange(0, kBoostRound, 1)\n\n    ax0.plot(x, rmse_evals['dtrain']['rmse'], label='train-RMSE')\n    ax0.plot(x, rmse_evals['dtest']['rmse'], label='test-RMSE')\n    ax0.legend()\n\n    ax1.plot(x, rmsle_evals['dtrain']['rmsle'], label='train-native-RMSLE')\n    ax1.plot(x, rmsle_evals['dtest']['rmsle'], label='test-native-RMSLE')\n    ax1.legend()\n\n    ax2.plot(x, py_rmsle_evals['dtrain']['PyRMSLE'], label='train-PyRMSLE')\n    ax2.plot(x, py_rmsle_evals['dtest']['PyRMSLE'], label='test-PyRMSLE')\n    ax2.legend()\n\n\ndef main(args: argparse.Namespace) -> None:\n    dtrain, dtest = generate_data()\n    rmse_evals = native_rmse(dtrain, dtest)\n    rmsle_evals = native_rmsle(dtrain, dtest)\n    py_rmsle_evals = py_rmsle(dtrain, dtest)\n\n    if args.plot != 0:\n        plot_history(rmse_evals, rmsle_evals, py_rmsle_evals)\n        plt.show()\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(\n        description='Arguments for custom RMSLE objective function demo.')\n    parser.add_argument(\n        '--plot',\n        type=int,\n        default=1,\n        help='Set to 0 to disable plotting the evaluation history.')\n    args = parser.parse_args()\n    main(args)\n"
  },
  {
    "path": "demo/guide-python/custom_softmax.py",
    "content": "\"\"\"\nDemo for creating customized multi-class objective function\n===========================================================\n\nThis demo is only applicable after (excluding) XGBoost 1.0.0, as before this version\nXGBoost returns transformed prediction for multi-class objective function.  More details\nin comments.\n\nSee :doc:`/tutorials/custom_metric_obj` and :doc:`/tutorials/advanced_custom_obj` for\ndetailed tutorial and notes.\n\n\"\"\"\n\n# pylint: disable=missing-function-docstring,redefined-outer-name,unused-variable\n\nimport argparse\nfrom typing import Dict, Tuple\n\nimport numpy as np\nimport xgboost as xgb\nfrom matplotlib import pyplot as plt\n\nnp.random.seed(1994)\n\nkRows = 100\nkCols = 10\nkClasses = 4  # number of classes\n\nkRounds = 10  # number of boosting rounds.\n\n# Generate some random data for demo.\nX = np.random.randn(kRows, kCols)\ny = np.random.randint(0, 4, size=kRows)\n\nm = xgb.DMatrix(X, y)\n\n\ndef softmax(x: np.ndarray) -> np.ndarray:\n    \"\"\"Softmax function with x as input vector.\"\"\"\n    e = np.exp(x)\n    return e / np.sum(e)\n\n\ndef softprob_obj(predt: np.ndarray, data: xgb.DMatrix) -> Tuple[np.ndarray, np.ndarray]:\n    \"\"\"Loss function. Computing the gradient and upper bound on the\n    Hessian with a diagonal structure for XGBoost (note that this is\n    not the true Hessian).\n    Reimplements the `multi:softprob` inside XGBoost.\n\n    \"\"\"\n    labels = data.get_label()\n    if data.get_weight().size == 0:\n        # Use 1 as weight if we don't have custom weight.\n        weights = np.ones(kRows, dtype=float)\n    else:\n        weights = data.get_weight()\n\n    # The prediction is of shape (rows, classes), each element in a row\n    # represents a raw prediction (leaf weight, hasn't gone through softmax\n    # yet).  In XGBoost 1.0.0, the prediction is transformed by a softmax\n    # function, fixed in later versions.\n    assert predt.shape == (kRows, kClasses)\n\n    grad = np.zeros((kRows, kClasses), dtype=float)\n    hess = np.zeros((kRows, kClasses), dtype=float)\n\n    eps = 1e-6\n\n    # compute the gradient and hessian upper bound, slow iterations in Python, only\n    # suitable for demo.  Also the one in native XGBoost core is more robust to\n    # numeric overflow as we don't do anything to mitigate the `exp` in\n    # `softmax` here.\n    for r in range(predt.shape[0]):\n        target = int(labels[r])\n        weight = float(weights[r])\n        p = softmax(predt[r, :])\n        for c in range(predt.shape[1]):\n            assert 0 <= target < kClasses\n            pc = float(p[c])\n            g = pc - 1.0 if c == target else pc\n            g = g * weight\n            h = max(2.0 * pc * (1.0 - pc) * weight, eps)\n            grad[r, c] = g\n            hess[r, c] = h\n\n    # After 2.1.0, pass the gradient as it is.\n    return grad, hess\n\n\ndef predict(booster: xgb.Booster, X: xgb.DMatrix) -> np.ndarray:\n    \"\"\"A customized prediction function that converts raw prediction to\n    target class.\n\n    \"\"\"\n    # Output margin means we want to obtain the raw prediction obtained from\n    # tree leaf weight.\n    predt = booster.predict(X, output_margin=True)\n    out = np.zeros(kRows)\n    for r in range(predt.shape[0]):\n        # the class with maximum prob (not strictly prob as it haven't gone\n        # through softmax yet so it doesn't sum to 1, but result is the same\n        # for argmax).\n        i = np.argmax(predt[r])\n        out[r] = i\n    return out\n\n\ndef merror(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, np.float64]:\n    y = dtrain.get_label()\n    # Like custom objective, the predt is untransformed leaf weight when custom\n    # objective is provided.\n\n    # With the use of `custom_metric` parameter in train function, custom metric\n    # receives raw input only when custom objective is also being used.  Otherwise\n    # custom metric will receive transformed prediction.\n    assert predt.shape == (kRows, kClasses)\n    out = np.zeros(kRows)\n    for r in range(predt.shape[0]):\n        i = np.argmax(predt[r])\n        out[r] = i\n\n    assert y.shape == out.shape\n\n    errors = np.zeros(kRows)\n    errors[y != out] = 1.0\n    return \"PyMError\", np.sum(errors) / kRows\n\n\ndef plot_history(\n    custom_results: Dict[str, Dict], native_results: Dict[str, Dict]\n) -> None:\n    axs: np.ndarray\n    fig, axs = plt.subplots(2, 1)  # type: ignore\n    ax0 = axs[0]\n    ax1 = axs[1]\n\n    pymerror = custom_results[\"train\"][\"PyMError\"]\n    merror = native_results[\"train\"][\"merror\"]\n\n    x = np.arange(0, kRounds, 1)\n    ax0.plot(x, pymerror, label=\"Custom objective\")\n    ax0.legend()\n    ax1.plot(x, merror, label=\"multi:softmax\")\n    ax1.legend()\n\n    plt.show()\n\n\ndef main(args: argparse.Namespace) -> None:\n    # Since 3.1, XGBoost can estimate the base_score automatically for built-in\n    # multi-class objectives.\n    #\n    # We explicitly specify it here to disable the automatic estimation to have a proper\n    # comparison between the custom implementation and the built-in implementation.\n    intercept = np.full(shape=(kClasses,), fill_value=1 / kClasses)\n\n    custom_results: Dict[str, Dict] = {}\n    # Use our custom objective function\n    booster_custom = xgb.train(\n        {\n            \"num_class\": kClasses,\n            \"base_score\": intercept,\n            \"disable_default_eval_metric\": True,\n        },\n        m,\n        num_boost_round=kRounds,\n        obj=softprob_obj,\n        custom_metric=merror,\n        evals_result=custom_results,\n        evals=[(m, \"train\")],\n    )\n\n    predt_custom = predict(booster_custom, m)\n\n    native_results: Dict[str, Dict] = {}\n    # Use the same objective function defined in XGBoost.\n    booster_native = xgb.train(\n        {\n            \"num_class\": kClasses,\n            \"base_score\": intercept,\n            \"objective\": \"multi:softmax\",\n            \"eval_metric\": \"merror\",\n        },\n        m,\n        num_boost_round=kRounds,\n        evals_result=native_results,\n        evals=[(m, \"train\")],\n    )\n    predt_native = booster_native.predict(m)\n\n    # We are reimplementing the loss function in XGBoost, so it should\n    # be the same for normal cases.\n    assert np.all(predt_custom == predt_native)\n    np.testing.assert_allclose(\n        custom_results[\"train\"][\"PyMError\"], native_results[\"train\"][\"merror\"]\n    )\n\n    if args.plot != 0:\n        plot_history(custom_results, native_results)\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(\n        description=\"Arguments for custom softmax objective function demo.\"\n    )\n    parser.add_argument(\n        \"--plot\",\n        type=int,\n        default=1,\n        help=\"Set to 0 to disable plotting the evaluation history.\",\n    )\n    args = parser.parse_args()\n    main(args)\n"
  },
  {
    "path": "demo/guide-python/distributed_extmem_basic.py",
    "content": "\"\"\"\nExperimental support for distributed training with external memory\n==================================================================\n\n    .. versionadded:: 3.0.0\n\nSee :doc:`the tutorial </tutorials/external_memory>` for more details. To run the\nexample, following packages in addition to XGBoost native dependencies are required:\n\n- scikit-learn\n- loky\n\nIf `device` is `cuda`, following are also needed:\n\n- cupy\n- cuda-python\n- pyhwloc\n\n\"\"\"\n\nimport argparse\nimport multiprocessing as mp\nimport os\nimport sys\nimport tempfile\nimport traceback\nfrom functools import partial, update_wrapper, wraps\nfrom typing import TYPE_CHECKING, Callable, List, ParamSpec, Tuple, TypeVar\n\nimport numpy as np\nimport xgboost\nfrom loky import get_reusable_executor\nfrom sklearn.datasets import make_regression\nfrom xgboost import collective as coll\nfrom xgboost.tracker import RabitTracker\n\nif TYPE_CHECKING:\n    from cuda.bindings.runtime import cudaError_t\n\n\ndef _checkcu(status: \"cudaError_t\") -> None:\n    import cuda.bindings.runtime as cudart\n\n    if status != cudart.cudaError_t.cudaSuccess:\n        raise RuntimeError(cudart.cudaGetErrorString(status))\n\n\ndef device_mem_total() -> int:\n    \"\"\"The total number of bytes of memory this GPU has.\"\"\"\n    import cuda.bindings.runtime as cudart\n\n    status, _, total = cudart.cudaMemGetInfo()\n    _checkcu(status)\n    return total\n\n\ndef make_batches(\n    n_samples_per_batch: int, n_features: int, n_batches: int, tmpdir: str, rank: int\n) -> List[Tuple[str, str]]:\n    \"\"\"Create multiple batches of synthetic data and return their file paths.\"\"\"\n    files: List[Tuple[str, str]] = []\n    rng = np.random.RandomState(rank)\n    for i in range(n_batches):\n        X, y = make_regression(n_samples_per_batch, n_features, random_state=rng)\n        X_path = os.path.join(tmpdir, f\"X-r{rank}-{i}.npy\")\n        y_path = os.path.join(tmpdir, f\"y-r{rank}-{i}.npy\")\n        np.save(X_path, X)\n        np.save(y_path, y)\n        files.append((X_path, y_path))\n    return files\n\n\nclass Iterator(xgboost.DataIter):\n    \"\"\"A custom iterator for loading files in batches.\"\"\"\n\n    def __init__(self, device: str, file_paths: List[Tuple[str, str]]) -> None:\n        self.device = device\n\n        self._file_paths = file_paths\n        self._it = 0\n        # XGBoost will generate some cache files under the current directory with the\n        # prefix \"cache\"\n        super().__init__(cache_prefix=os.path.join(\".\", \"cache\"))\n\n    def load_file(self) -> Tuple[np.ndarray, np.ndarray]:\n        \"\"\"Load a single batch of data.\"\"\"\n        X_path, y_path = self._file_paths[self._it]\n        # When the `ExtMemQuantileDMatrix` is used, the device must match. GPU cannot\n        # consume CPU input data and vice-versa.\n        if self.device == \"cpu\":\n            X = np.load(X_path)\n            y = np.load(y_path)\n        else:\n            import cupy as cp\n\n            X = cp.load(X_path)\n            y = cp.load(y_path)\n\n        assert X.shape[0] == y.shape[0]\n        return X, y\n\n    def next(self, input_data: Callable) -> bool:\n        \"\"\"Advance the iterator by 1 step and pass the data to XGBoost.  This function\n        is called by XGBoost during the construction of ``DMatrix``\n\n        \"\"\"\n        if self._it == len(self._file_paths):\n            # return False to let XGBoost know this is the end of iteration\n            return False\n\n        # input_data is a keyword-only function passed in by XGBoost and has the similar\n        # signature to the ``DMatrix`` constructor.\n        X, y = self.load_file()\n        input_data(data=X, label=y)\n        self._it += 1\n        return True\n\n    def reset(self) -> None:\n        \"\"\"Reset the iterator to its beginning\"\"\"\n        self._it = 0\n\n\ndef setup_numa() -> None:\n    \"\"\"Set correct NUMA binding for GPU-based external memory training.\"\"\"\n    from pyhwloc import from_this_system\n    from pyhwloc.cuda_runtime import get_device\n    from pyhwloc.topology import MemBindFlags, MemBindPolicy, TypeFilter\n\n    devices = os.getenv(\"CUDA_VISIBLE_DEVICES\", None)\n    assert devices is not None, \"CUDA_VISIBLE_DEVICES must be set.\"\n\n    with from_this_system().set_io_types_filter(TypeFilter.KEEP_ALL) as topo:\n        # Get CPU affinity for this GPU. Device ordinal 0 is used because\n        # CUDA_VISIBLE_DEVICES has already reordered the devices.\n        dev = get_device(topo, device=0)\n        cpuset = dev.get_affinity()\n\n        # Set CPU binding\n        topo.set_cpubind(cpuset)\n        # Set memory binding with STRICT policy - ensures all memory allocations come\n        # from the local NUMA node. hwloc determines the NUMA nodes from cpuset.\n        topo.set_membind(cpuset, MemBindPolicy.BIND, MemBindFlags.STRICT)\n\n\ndef setup_async_pool() -> None:\n    \"\"\"Setup CUDA async pool. As an alternative, the RMM plugin can be used as well.\n    This is the same as using the `CudaAsyncMemoryResource` from RMM, but without the\n    RMM dependency.\n\n    .. versionadded:: 3.2.0\n\n    \"\"\"\n    import cuda.bindings.runtime as cudart\n    from cuda.bindings import driver\n    from cupy.cuda import MemoryAsyncPool\n\n    status, dft_pool = cudart.cudaDeviceGetDefaultMemPool(0)\n    _checkcu(status)\n\n    total = device_mem_total()\n\n    v = driver.cuuint64_t(int(total * 0.9))\n    (status,) = cudart.cudaMemPoolSetAttribute(\n        dft_pool,\n        cudart.cudaMemPoolAttr.cudaMemPoolAttrReleaseThreshold,\n        v,\n    )\n    _checkcu(status)\n    # Set the allocator for cupy as well.\n    import cupy as cp\n\n    cp.cuda.set_allocator(MemoryAsyncPool().malloc)\n\n\nR = TypeVar(\"R\")\nP = ParamSpec(\"P\")\n\n\ndef try_run(fn: Callable[P, R]) -> Callable[P, R]:\n    \"\"\"Loky aborts the process without printing out any error message if there's an\n    exception.\n\n    \"\"\"\n\n    @wraps(fn)\n    def inner(*args: P.args, **kwargs: P.kwargs) -> R:\n        try:\n            return fn(*args, **kwargs)\n        except Exception as e:\n            print(traceback.format_exc(), file=sys.stderr)\n            raise RuntimeError(\"Running into exception in worker.\") from e\n\n    return inner\n\n\n@try_run\ndef hist_train(\n    worker_idx: int,\n    tmpdir: str,\n    device: str,\n    rabit_args: dict,\n) -> None:\n    \"\"\"The hist tree method can use a special data structure `ExtMemQuantileDMatrix` for\n    faster initialization and lower memory usage.\n\n    \"\"\"\n\n    # Make sure XGBoost is using the configured memory pool for all allocations.\n    with (\n        coll.CommunicatorContext(**rabit_args),\n        xgboost.config_context(\n            use_cuda_async_pool=device == \"cuda\",\n        ),\n    ):\n        print(\"Worker: \", worker_idx)\n        # Generate the data for demonstration. The synthetic data is sharded by workers.\n        files = make_batches(\n            n_samples_per_batch=4096,\n            n_features=16,\n            n_batches=17,\n            tmpdir=tmpdir,\n            rank=coll.get_rank(),\n        )\n        # Since we are running two workers on a single node, we should divide the number\n        # of threads between workers.\n        n_threads = os.cpu_count()\n        assert n_threads is not None\n        n_threads = max(n_threads // coll.get_world_size(), 1)\n        it = Iterator(device, files)\n        Xy = xgboost.ExtMemQuantileDMatrix(\n            it, missing=np.nan, enable_categorical=False, nthread=n_threads\n        )\n        # Check the device is correctly set.\n        if device == \"cuda\":\n            # Check the first device\n            assert (\n                int(os.environ[\"CUDA_VISIBLE_DEVICES\"].split(\",\")[0])\n                < coll.get_world_size()\n            )\n        booster = xgboost.train(\n            {\n                \"tree_method\": \"hist\",\n                \"max_depth\": 4,\n                \"device\": it.device,\n                \"nthread\": n_threads,\n            },\n            Xy,\n            evals=[(Xy, \"Train\")],\n            num_boost_round=10,\n        )\n        booster.predict(Xy)\n\n\ndef launch_workers(tmpdir: str, args: argparse.Namespace) -> None:\n    \"\"\"Client function to launch workers.\"\"\"\n    n_workers = 2\n\n    tracker = RabitTracker(host_ip=\"127.0.0.1\", n_workers=n_workers)\n    tracker.start()\n    rabit_args = tracker.worker_args()\n\n    def initializer(device: str) -> None:\n        # Set CUDA device before launching child processes.\n        if device == \"cuda\":\n            # name: LokyProcess-1\n            _, sidx = mp.current_process().name.split(\"-\")\n            idx = int(sidx) - 1  # 1-based indexing from loky\n            # Assuming two workers for demo.\n            devices = \",\".join([str(idx), str((idx + 1) % n_workers)])\n            # P0: CUDA_VISIBLE_DEVICES=0,1\n            # P1: CUDA_VISIBLE_DEVICES=1,0\n            os.environ[\"CUDA_VISIBLE_DEVICES\"] = devices\n            setup_numa()\n            setup_async_pool()\n\n    with get_reusable_executor(\n        max_workers=n_workers,\n        initargs=(args.device,),\n        initializer=initializer,\n    ) as pool:\n        # Poor man's currying\n        fn = update_wrapper(\n            partial(\n                hist_train,\n                tmpdir=tmpdir,\n                device=args.device,\n                rabit_args=rabit_args,\n            ),\n            hist_train,\n        )\n        pool.map(fn, range(n_workers))\n\n\ndef main() -> None:\n    \"\"\"Demo for distributed training from scratch.\"\"\"\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"--device\", choices=[\"cpu\", \"cuda\"], default=\"cpu\")\n    args = parser.parse_args()\n    with tempfile.TemporaryDirectory() as tmpdir:\n        launch_workers(tmpdir, args)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "demo/guide-python/evals_result.py",
    "content": "\"\"\"\nThis script demonstrate how to access the eval metrics\n======================================================\n\"\"\"\nimport os\nfrom typing import Any, Dict\n\nimport xgboost as xgb\n\nCURRENT_DIR = os.path.dirname(__file__)\ndtrain = xgb.DMatrix(\n    os.path.join(CURRENT_DIR, \"../data/agaricus.txt.train?format=libsvm\")\n)\ndtest = xgb.DMatrix(\n    os.path.join(CURRENT_DIR, \"../data/agaricus.txt.test?format=libsvm\")\n)\n\nparam = [\n    (\"max_depth\", 2),\n    (\"objective\", \"binary:logistic\"),\n    (\"eval_metric\", \"logloss\"),\n    (\"eval_metric\", \"error\"),\n]\n\nnum_round = 2\nwatchlist = [(dtest, \"eval\"), (dtrain, \"train\")]\n\nevals_result: Dict[str, Any] = {}\nbst = xgb.train(param, dtrain, num_round, watchlist, evals_result=evals_result)\n\nprint(\"Access logloss metric directly from evals_result:\")\nprint(evals_result[\"eval\"][\"logloss\"])\n\nprint(\"\")\nprint(\"Access metrics through a loop:\")\nfor e_name, e_mtrs in evals_result.items():\n    print(\"- {}\".format(e_name))\n    for e_mtr_name, e_mtr_vals in e_mtrs.items():\n        print(\"   - {}\".format(e_mtr_name))\n        print(\"      - {}\".format(e_mtr_vals))\n\nprint(\"\")\nprint(\"Access complete dictionary:\")\nprint(evals_result)\n"
  },
  {
    "path": "demo/guide-python/external_memory.py",
    "content": "\"\"\"\nExperimental support for external memory\n========================================\n\nThis is similar to the one in `quantile_data_iterator.py`, but for external memory\ninstead of Quantile DMatrix.  The feature is not ready for production use yet.\n\n    .. versionadded:: 1.5.0\n\n\nSee :doc:`the tutorial </tutorials/external_memory>` for more details.\n\n    .. versionchanged:: 3.0.0\n\n        Added :py:class:`~xgboost.ExtMemQuantileDMatrix`.\n\nTo run the example, following packages in addition to XGBoost native dependencies are\nrequired:\n\n- scikit-learn\n\nIf `device` is `cuda`, following are also needed:\n\n- cupy\n- rmm\n- cuda-python\n\n.. seealso::\n\n  :ref:`sphx_glr_python_examples_distributed_extmem_basic.py`\n\nNot shown in this example, but you should pay attention to NUMA configuration as\ndiscussed in the tutorial.\n\n\"\"\"\n\nimport argparse\nimport os\nimport tempfile\nfrom typing import TYPE_CHECKING, Callable, List, Literal, Tuple\n\nimport numpy as np\nfrom sklearn.datasets import make_regression\n\nimport xgboost\n\nif TYPE_CHECKING:\n    from cuda.bindings.runtime import cudaError_t\n\n\ndef _checkcu(status: \"cudaError_t\") -> None:\n    import cuda.bindings.runtime as cudart\n\n    if status != cudart.cudaError_t.cudaSuccess:\n        raise RuntimeError(cudart.cudaGetErrorString(status))\n\n\ndef device_mem_total() -> int:\n    \"\"\"The total number of bytes of memory this GPU has.\"\"\"\n    import cuda.bindings.runtime as cudart\n\n    status, free, total = cudart.cudaMemGetInfo()\n    _checkcu(status)\n    return total\n\n\ndef make_batches(\n    n_samples_per_batch: int,\n    n_features: int,\n    n_batches: int,\n    tmpdir: str,\n) -> List[Tuple[str, str]]:\n    files: List[Tuple[str, str]] = []\n    rng = np.random.RandomState(1994)\n    for i in range(n_batches):\n        X, y = make_regression(n_samples_per_batch, n_features, random_state=rng)\n        X_path = os.path.join(tmpdir, \"X-\" + str(i) + \".npy\")\n        y_path = os.path.join(tmpdir, \"y-\" + str(i) + \".npy\")\n        np.save(X_path, X)\n        np.save(y_path, y)\n        files.append((X_path, y_path))\n    return files\n\n\nclass Iterator(xgboost.DataIter):\n    \"\"\"A custom iterator for loading files in batches.\"\"\"\n\n    def __init__(\n        self, device: Literal[\"cpu\", \"cuda\"], file_paths: List[Tuple[str, str]]\n    ) -> None:\n        self.device = device\n\n        self._file_paths = file_paths\n        self._it = 0\n        # XGBoost will generate some cache files under the current directory with the\n        # prefix \"cache\"\n        super().__init__(cache_prefix=os.path.join(\".\", \"cache\"))\n\n    def load_file(self) -> Tuple[np.ndarray, np.ndarray]:\n        \"\"\"Load a single batch of data.\"\"\"\n        X_path, y_path = self._file_paths[self._it]\n        # When the `ExtMemQuantileDMatrix` is used, the device must match. GPU cannot\n        # consume CPU input data and vice-versa.\n        if self.device == \"cpu\":\n            X = np.load(X_path)\n            y = np.load(y_path)\n        else:\n            X = cp.load(X_path)\n            y = cp.load(y_path)\n\n        assert X.shape[0] == y.shape[0]\n        return X, y\n\n    def next(self, input_data: Callable) -> bool:\n        \"\"\"Advance the iterator by 1 step and pass the data to XGBoost.  This function\n        is called by XGBoost during the construction of ``DMatrix``\n\n        \"\"\"\n        if self._it == len(self._file_paths):\n            # return False to let XGBoost know this is the end of iteration\n            return False\n\n        # input_data is a keyword-only function passed in by XGBoost and has the similar\n        # signature to the ``DMatrix`` constructor.\n        X, y = self.load_file()\n        input_data(data=X, label=y)\n        self._it += 1\n        return True\n\n    def reset(self) -> None:\n        \"\"\"Reset the iterator to its beginning\"\"\"\n        self._it = 0\n\n\ndef hist_train(it: Iterator) -> None:\n    \"\"\"The hist tree method can use a special data structure `ExtMemQuantileDMatrix` for\n    faster initialization and lower memory usage (recommended).\n\n    .. versionadded:: 3.0.0\n\n    \"\"\"\n    # For non-data arguments, specify it here once instead of passing them by the `next`\n    # method.\n    Xy = xgboost.ExtMemQuantileDMatrix(it, missing=np.nan, enable_categorical=False)\n    booster = xgboost.train(\n        {\"tree_method\": \"hist\", \"max_depth\": 4, \"device\": it.device},\n        Xy,\n        evals=[(Xy, \"Train\")],\n        num_boost_round=10,\n    )\n    booster.predict(Xy)\n\n\ndef approx_train(it: Iterator) -> None:\n    \"\"\"The approx tree method uses the basic `DMatrix` (not recommended).\"\"\"\n\n    # For non-data arguments, specify it here once instead of passing them by the `next`\n    # method.\n    Xy = xgboost.DMatrix(it, missing=np.nan, enable_categorical=False)\n    # ``approx`` is also supported, but less efficient due to sketching. It's\n    # recommended to use `hist` instead.\n    booster = xgboost.train(\n        {\"tree_method\": \"approx\", \"max_depth\": 4, \"device\": it.device},\n        Xy,\n        evals=[(Xy, \"Train\")],\n        num_boost_round=10,\n    )\n    booster.predict(Xy)\n\n\ndef main(tmpdir: str, args: argparse.Namespace) -> None:\n    \"\"\"Entry point for training.\"\"\"\n\n    # generate some random data for demo\n    files = make_batches(\n        n_samples_per_batch=1024, n_features=17, n_batches=31, tmpdir=tmpdir\n    )\n    it = Iterator(args.device, files)\n\n    hist_train(it)\n    approx_train(it)\n\n\ndef setup_async_pool() -> None:\n    \"\"\"Setup CUDA async pool. As an alternative, the RMM plugin can be used as well. See\n    the `setup_rmm`. This is the same as using the `CudaAsyncMemoryResource` from RMM,\n    but without the RMM dependency.\n\n    .. versionadded:: 3.2.0\n\n    \"\"\"\n    import cuda.bindings.driver as driver\n    import cuda.bindings.runtime as cudart\n    from cupy.cuda import MemoryAsyncPool\n\n    status, dft_pool = cudart.cudaDeviceGetDefaultMemPool(0)\n    _checkcu(status)\n\n    total = device_mem_total()\n\n    v = driver.cuuint64_t(int(total * 0.9))\n    (status,) = cudart.cudaMemPoolSetAttribute(\n        dft_pool,\n        cudart.cudaMemPoolAttr.cudaMemPoolAttrReleaseThreshold,\n        v,\n    )\n    _checkcu(status)\n    # Set the allocator for cupy as well.\n    cp.cuda.set_allocator(MemoryAsyncPool().malloc)\n\n\ndef setup_rmm() -> None:\n    \"\"\"Setup RMM for GPU-based external memory training.\n\n    It's important to use RMM with `CudaAsyncMemoryResource` or `ArenaMemoryResource`\n    for GPU-based external memory to improve performance. If XGBoost is not built with\n    RMM support, a warning is raised when constructing the `DMatrix`.\n\n    \"\"\"\n\n    import rmm\n    from rmm.allocators.cupy import rmm_cupy_allocator\n    from rmm.mr import ArenaMemoryResource\n\n    if not xgboost.build_info()[\"USE_RMM\"]:\n        return\n\n    total = device_mem_total()\n\n    mr = rmm.mr.CudaMemoryResource()\n    mr = ArenaMemoryResource(mr, arena_size=int(total * 0.9))\n\n    rmm.mr.set_current_device_resource(mr)\n    # Set the allocator for cupy as well.\n    cp.cuda.set_allocator(rmm_cupy_allocator)\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"--device\", choices=[\"cpu\", \"cuda\"], default=\"cpu\")\n    parser.add_argument(\n        \"--memory_pool\",\n        choices=[\"rmm\", \"cuda\"],\n        default=\"rmm\",\n        help=\"Use a memory pool for asynchronous memory allocation in XGBoost.\",\n    )\n    args = parser.parse_args()\n    if args.device == \"cuda\":\n        import cupy as cp\n\n        if args.memory_pool == \"rmm\":\n            setup_rmm()\n        elif args.memory_pool == \"cuda\":\n            setup_async_pool()\n        # Make sure XGBoost is using RMM for all allocations.\n        with xgboost.config_context(\n            use_rmm=args.memory_pool == \"rmm\",\n            use_cuda_async_pool=args.memory_pool == \"cuda\",\n        ):\n            with tempfile.TemporaryDirectory() as tmpdir:\n                main(tmpdir, args)\n    else:\n        with tempfile.TemporaryDirectory() as tmpdir:\n            main(tmpdir, args)\n"
  },
  {
    "path": "demo/guide-python/feature_weights.py",
    "content": "\"\"\"\nDemo for using feature weight to change column sampling\n=======================================================\n\n    .. versionadded:: 1.3.0\n\"\"\"\n\nimport argparse\n\nimport numpy as np\nfrom matplotlib import pyplot as plt\n\nimport xgboost\n\n\ndef main(args: argparse.Namespace) -> None:\n    rng = np.random.RandomState(1994)\n\n    kRows = 4196\n    kCols = 10\n\n    X = rng.randn(kRows, kCols)\n    y = rng.randn(kRows)\n    fw = np.ones(shape=(kCols,))\n    for i in range(kCols):\n        fw[i] *= float(i)\n\n    dtrain = xgboost.DMatrix(X, y)\n    dtrain.set_info(feature_weights=fw)\n\n    # Perform column sampling for each node split evaluation, the sampling process is\n    # weighted by feature weights.\n    bst = xgboost.train(\n        {\"tree_method\": \"hist\", \"colsample_bynode\": 0.2},\n        dtrain,\n        num_boost_round=10,\n        evals=[(dtrain, \"d\")],\n    )\n    feature_map = bst.get_fscore()\n\n    # feature zero has 0 weight\n    assert feature_map.get(\"f0\", None) is None\n    assert max(feature_map.values()) == feature_map.get(\"f9\")\n\n    if args.plot:\n        xgboost.plot_importance(bst)\n        plt.show()\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\n        \"--plot\",\n        type=int,\n        default=1,\n        help=\"Set to 0 to disable plotting the evaluation history.\",\n    )\n    args = parser.parse_args()\n    main(args)\n"
  },
  {
    "path": "demo/guide-python/gamma_regression.py",
    "content": "\"\"\"\nDemo for gamma regression\n=========================\n\"\"\"\nimport numpy as np\n\nimport xgboost as xgb\n\n#  this script demonstrates how to fit gamma regression model (with log link function)\n#  in xgboost, before running the demo you need to generate the autoclaims dataset\n#  by running gen_autoclaims.R located in xgboost/demo/data.\n\ndata = np.genfromtxt('../data/autoclaims.csv', delimiter=',')\ndtrain = xgb.DMatrix(data[0:4741, 0:34], data[0:4741, 34])\ndtest = xgb.DMatrix(data[4741:6773, 0:34], data[4741:6773, 34])\n\n# for gamma regression, we need to set the objective to 'reg:gamma', it also suggests\n# to set the base_score to a value between 1 to 5 if the number of iteration is small\nparam = {'objective':'reg:gamma', 'booster':'gbtree', 'base_score':3}\n\n# the rest of settings are the same\nwatchlist = [(dtest, 'eval'), (dtrain, 'train')]\nnum_round = 30\n\n# training and evaluation\nbst = xgb.train(param, dtrain, num_round, watchlist)\npreds = bst.predict(dtest)\nlabels = dtest.get_label()\nprint('test deviance=%f' % (2 * np.sum((labels - preds) / preds - np.log(labels) + np.log(preds))))\n"
  },
  {
    "path": "demo/guide-python/generalized_linear_model.py",
    "content": "\"\"\"\nDemo for GLM\n============\n\"\"\"\nimport os\n\nimport xgboost as xgb\n\n##\n#  this script demonstrate how to fit generalized linear model in xgboost\n#  basically, we are using linear model, instead of tree for our boosters\n##\nCURRENT_DIR = os.path.dirname(__file__)\ndtrain = xgb.DMatrix(\n    os.path.join(CURRENT_DIR, \"../data/agaricus.txt.train?format=libsvm\")\n)\ndtest = xgb.DMatrix(\n    os.path.join(CURRENT_DIR, \"../data/agaricus.txt.test?format=libsvm\")\n)\n# change booster to gblinear, so that we are fitting a linear model\n# alpha is the L1 regularizer\n# lambda is the L2 regularizer\n# you can also set lambda_bias which is L2 regularizer on the bias term\nparam = {\n    \"objective\": \"binary:logistic\",\n    \"booster\": \"gblinear\",\n    \"alpha\": 0.0001,\n    \"lambda\": 1,\n}\n\n# normally, you do not need to set eta (step_size)\n# XGBoost uses a parallel coordinate descent algorithm (shotgun),\n# there could be affection on convergence with parallelization on certain cases\n# setting eta to be smaller value, e.g 0.5 can make the optimization more stable\n# param['eta'] = 1\n\n##\n# the rest of settings are the same\n##\nwatchlist = [(dtest, \"eval\"), (dtrain, \"train\")]\nnum_round = 4\nbst = xgb.train(param, dtrain, num_round, watchlist)\npreds = bst.predict(dtest)\nlabels = dtest.get_label()\nprint(\n    \"error=%f\"\n    % (\n        sum(1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i])\n        / float(len(preds))\n    )\n)\n"
  },
  {
    "path": "demo/guide-python/gpu_tree_shap.py",
    "content": "\"\"\"\nUse GPU to speedup SHAP value computation\n=========================================\n\nDemonstrates using GPU acceleration to compute SHAP values for feature importance.\n\n\"\"\"\nfrom urllib.error import HTTPError\n\nimport shap\nfrom sklearn.datasets import fetch_california_housing, make_regression\n\nimport xgboost as xgb\n\n# Fetch dataset using sklearn\ntry:\n    _data = fetch_california_housing(return_X_y=True)\n    X = _data.data\n    y = _data.target\n    feature_names = _data.feature_names\n    print(_data.DESCR)\nexcept HTTPError:\n    # Use a synthetic dataset instead if we couldn't\n    X, y = make_regression(n_samples=20640, n_features=8, random_state=1234)\n    feature_names = [f\"f{i}\" for i in range(8)]\n\nnum_round = 500\n\nparam = {\n    \"eta\": 0.05,\n    \"max_depth\": 10,\n    \"tree_method\": \"hist\",\n    \"device\": \"cuda\",\n}\n\n# GPU accelerated training\ndtrain = xgb.DMatrix(X, label=y, feature_names=feature_names)\nmodel = xgb.train(param, dtrain, num_round)\n\n# Compute shap values using GPU with xgboost\nmodel.set_param({\"device\": \"cuda\"})\nshap_values = model.predict(dtrain, pred_contribs=True)\n\n# Compute shap interaction values using GPU\nshap_interaction_values = model.predict(dtrain, pred_interactions=True)\n\n\n# shap will call the GPU accelerated version as long as the device parameter is set to\n# \"cuda\"\nexplainer = shap.TreeExplainer(model)\nshap_values = explainer.shap_values(X)\n\n# visualize the first prediction's explanation\nshap.force_plot(\n    explainer.expected_value,\n    shap_values[0, :],\n    X[0, :],\n    feature_names=feature_names,\n    matplotlib=True,\n)\n\n# Show a summary of feature importance\nshap.summary_plot(shap_values, X, plot_type=\"bar\", feature_names=feature_names)\n"
  },
  {
    "path": "demo/guide-python/individual_trees.py",
    "content": "\"\"\"\nDemo for prediction using individual trees and model slices\n===========================================================\n\"\"\"\n\nimport os\n\nimport numpy as np\nfrom scipy.special import logit\nfrom sklearn.datasets import load_svmlight_file\n\nimport xgboost as xgb\n\nCURRENT_DIR = os.path.dirname(__file__)\ntrain = os.path.join(CURRENT_DIR, \"../data/agaricus.txt.train\")\ntest = os.path.join(CURRENT_DIR, \"../data/agaricus.txt.test\")\n\n\ndef individual_tree() -> None:\n    \"\"\"Get prediction from each individual tree and combine them together.\"\"\"\n    X_train, y_train = load_svmlight_file(train)\n    X_test, y_test = load_svmlight_file(test)\n    Xy_train = xgb.QuantileDMatrix(X_train, y_train)\n\n    n_rounds = 4\n    # Specify the base score, otherwise xgboost will estimate one from the training\n    # data.\n    base_score = 0.5\n    params = {\n        \"max_depth\": 2,\n        \"eta\": 1,\n        \"objective\": \"reg:logistic\",\n        \"tree_method\": \"hist\",\n        \"base_score\": base_score,\n    }\n    booster = xgb.train(params, Xy_train, num_boost_round=n_rounds)\n\n    # Use logit to inverse the base score back to raw leaf value (margin)\n    scores = np.full((X_test.shape[0],), logit(base_score))\n    for i in range(n_rounds):\n        # - Use output_margin to get raw leaf values\n        # - Use iteration_range to get prediction for only one tree\n        # - Use previous prediction as base marign for the model\n        Xy_test = xgb.DMatrix(X_test, base_margin=scores)\n\n        if i == n_rounds - 1:\n            # last round, get the transformed prediction\n            scores = booster.predict(\n                Xy_test, iteration_range=(i, i + 1), output_margin=False\n            )\n        else:\n            # get raw leaf value for accumulation\n            scores = booster.predict(\n                Xy_test, iteration_range=(i, i + 1), output_margin=True\n            )\n\n    full = booster.predict(xgb.DMatrix(X_test), output_margin=False)\n    np.testing.assert_allclose(scores, full)\n\n\ndef model_slices() -> None:\n    \"\"\"Inference with each individual tree using model slices.\"\"\"\n    X_train, y_train = load_svmlight_file(train)\n    X_test, y_test = load_svmlight_file(test)\n    Xy_train = xgb.QuantileDMatrix(X_train, y_train)\n\n    n_rounds = 4\n    # Specify the base score, otherwise xgboost will estimate one from the training\n    # data.\n    base_score = 0.5\n    params = {\n        \"max_depth\": 2,\n        \"eta\": 1,\n        \"objective\": \"reg:logistic\",\n        \"tree_method\": \"hist\",\n        \"base_score\": base_score,\n    }\n    booster = xgb.train(params, Xy_train, num_boost_round=n_rounds)\n    trees = [booster[t] for t in range(n_rounds)]\n\n    # Use logit to inverse the base score back to raw leaf value (margin)\n    scores = np.full((X_test.shape[0],), logit(base_score))\n    for i, t in enumerate(trees):\n        # Feed previous scores into base margin.\n        Xy_test = xgb.DMatrix(X_test, base_margin=scores)\n\n        if i == n_rounds - 1:\n            # last round, get the transformed prediction\n            scores = t.predict(Xy_test, output_margin=False)\n        else:\n            # get raw leaf value for accumulation\n            scores = t.predict(Xy_test, output_margin=True)\n\n    full = booster.predict(xgb.DMatrix(X_test), output_margin=False)\n    np.testing.assert_allclose(scores, full)\n\n\nif __name__ == \"__main__\":\n    individual_tree()\n    model_slices()\n"
  },
  {
    "path": "demo/guide-python/learning_to_rank.py",
    "content": "\"\"\"\nGetting started with learning to rank\n=====================================\n\n  .. versionadded:: 2.0.0\n\nThis is a demonstration of using XGBoost for learning to rank tasks using the\nMSLR_10k_letor dataset. For more infomation about the dataset, please visit its\n`description page <https://www.microsoft.com/en-us/research/project/mslr/>`_.\n\nThis is a two-part demo, the first one contains a basic example of using XGBoost to\ntrain on relevance degree, and the second part simulates click data and enable the\nposition debiasing training.\n\nFor an overview of learning to rank in XGBoost, please see :doc:`Learning to Rank\n</tutorials/learning_to_rank>`.\n\"\"\"\n\nfrom __future__ import annotations\n\nimport argparse\nimport json\nimport os\nimport pickle as pkl\n\nimport numpy as np\nimport pandas as pd\nfrom sklearn.datasets import load_svmlight_file\n\nimport xgboost as xgb\nfrom xgboost.testing.data import RelDataCV, simulate_clicks, sort_ltr_samples\n\n\ndef load_mslr_10k(data_path: str, cache_path: str) -> RelDataCV:\n    \"\"\"Load the MSLR10k dataset from data_path and cache a pickle object in cache_path.\n\n    Returns\n    -------\n\n    A list of tuples [(X, y, qid), ...].\n\n    \"\"\"\n    root_path = os.path.expanduser(args.data)\n    cacheroot_path = os.path.expanduser(args.cache)\n    cache_path = os.path.join(cacheroot_path, \"MSLR_10K_LETOR.pkl\")\n\n    # Use only the Fold1 for demo:\n    # Train,      Valid, Test\n    # {S1,S2,S3}, S4,    S5\n    fold = 1\n\n    if not os.path.exists(cache_path):\n        fold_path = os.path.join(root_path, f\"Fold{fold}\")\n        train_path = os.path.join(fold_path, \"train.txt\")\n        valid_path = os.path.join(fold_path, \"vali.txt\")\n        test_path = os.path.join(fold_path, \"test.txt\")\n        X_train, y_train, qid_train = load_svmlight_file(\n            train_path, query_id=True, dtype=np.float32\n        )\n        y_train = y_train.astype(np.int32)\n        qid_train = qid_train.astype(np.int32)\n\n        X_valid, y_valid, qid_valid = load_svmlight_file(\n            valid_path, query_id=True, dtype=np.float32\n        )\n        y_valid = y_valid.astype(np.int32)\n        qid_valid = qid_valid.astype(np.int32)\n\n        X_test, y_test, qid_test = load_svmlight_file(\n            test_path, query_id=True, dtype=np.float32\n        )\n        y_test = y_test.astype(np.int32)\n        qid_test = qid_test.astype(np.int32)\n\n        data = RelDataCV(\n            train=(X_train, y_train, qid_train),\n            test=(X_test, y_test, qid_test),\n            max_rel=4,\n        )\n\n        with open(cache_path, \"wb\") as fd:\n            pkl.dump(data, fd)\n\n    with open(cache_path, \"rb\") as fd:\n        data = pkl.load(fd)\n\n    return data\n\n\ndef ranking_demo(args: argparse.Namespace) -> None:\n    \"\"\"Demonstration for learning to rank with relevance degree.\"\"\"\n    data = load_mslr_10k(args.data, args.cache)\n\n    # Sort data according to query index\n    X_train, y_train, qid_train = data.train\n    sorted_idx = np.argsort(qid_train)\n    X_train = X_train[sorted_idx]\n    y_train = y_train[sorted_idx]\n    qid_train = qid_train[sorted_idx]\n\n    X_test, y_test, qid_test = data.test\n    sorted_idx = np.argsort(qid_test)\n    X_test = X_test[sorted_idx]\n    y_test = y_test[sorted_idx]\n    qid_test = qid_test[sorted_idx]\n\n    ranker = xgb.XGBRanker(\n        tree_method=\"hist\",\n        device=\"cuda\",\n        lambdarank_pair_method=\"topk\",\n        lambdarank_num_pair_per_sample=13,\n        eval_metric=[\"ndcg@1\", \"ndcg@8\"],\n    )\n    ranker.fit(\n        X_train,\n        y_train,\n        qid=qid_train,\n        eval_set=[(X_test, y_test)],\n        eval_qid=[qid_test],\n        verbose=True,\n    )\n\n\ndef click_data_demo(args: argparse.Namespace) -> None:\n    \"\"\"Demonstration for learning to rank with click data.\"\"\"\n    data = load_mslr_10k(args.data, args.cache)\n    train, test = simulate_clicks(data)\n    assert test is not None\n\n    assert train.X.shape[0] == train.click.size\n    assert test.X.shape[0] == test.click.size\n    assert test.score.dtype == np.float32\n    assert test.click.dtype == np.int32\n\n    X_train, clicks_train, y_train, qid_train = sort_ltr_samples(\n        train.X,\n        train.y,\n        train.qid,\n        train.click,\n        train.pos,\n    )\n    X_test, clicks_test, y_test, qid_test = sort_ltr_samples(\n        test.X,\n        test.y,\n        test.qid,\n        test.click,\n        test.pos,\n    )\n\n    class ShowPosition(xgb.callback.TrainingCallback):\n        def after_iteration(\n            self,\n            model: xgb.Booster,\n            epoch: int,\n            evals_log: xgb.callback.TrainingCallback.EvalsLog,\n        ) -> bool:\n            config = json.loads(model.save_config())\n            ti_plus = np.array(config[\"learner\"][\"objective\"][\"ti+\"])\n            tj_minus = np.array(config[\"learner\"][\"objective\"][\"tj-\"])\n            df = pd.DataFrame({\"ti+\": ti_plus, \"tj-\": tj_minus})\n            print(df)\n            return False\n\n    ranker = xgb.XGBRanker(\n        n_estimators=512,\n        tree_method=\"hist\",\n        device=\"cuda\",\n        learning_rate=0.01,\n        reg_lambda=1.5,\n        subsample=0.8,\n        sampling_method=\"gradient_based\",\n        # LTR specific parameters\n        objective=\"rank:ndcg\",\n        # - Enable bias estimation\n        lambdarank_unbiased=True,\n        # - normalization (1 / (norm + 1))\n        lambdarank_bias_norm=1,\n        # - Focus on the top 12 documents\n        lambdarank_num_pair_per_sample=12,\n        lambdarank_pair_method=\"topk\",\n        ndcg_exp_gain=True,\n        eval_metric=[\"ndcg@1\", \"ndcg@3\", \"ndcg@5\", \"ndcg@10\"],\n        callbacks=[ShowPosition()],\n    )\n    ranker.fit(\n        X_train,\n        clicks_train,\n        qid=qid_train,\n        eval_set=[(X_test, y_test), (X_test, clicks_test)],\n        eval_qid=[qid_test, qid_test],\n        verbose=True,\n    )\n    ranker.predict(X_test)\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(\n        description=\"Demonstration of learning to rank using XGBoost.\"\n    )\n    parser.add_argument(\n        \"--data\",\n        type=str,\n        help=\"Root directory of the MSLR-WEB10K data.\",\n        required=True,\n    )\n    parser.add_argument(\n        \"--cache\",\n        type=str,\n        help=\"Directory for caching processed data.\",\n        required=True,\n    )\n    args = parser.parse_args()\n\n    ranking_demo(args)\n    click_data_demo(args)\n"
  },
  {
    "path": "demo/guide-python/model_parser.py",
    "content": "\"\"\"\nDemonstration for parsing JSON/UBJSON tree model files\n======================================================\n\nSee :doc:`/tutorials/saving_model` for details about the model serialization.\n\n\"\"\"\n\nimport argparse\nimport json\nfrom dataclasses import dataclass\nfrom enum import IntEnum, unique\nfrom typing import Any, Dict, List, Sequence, Union\n\nimport numpy as np\n\ntry:\n    import ubjson\nexcept ImportError:\n    ubjson = None\n\n\nParamT = Dict[str, str]\n\n\ndef to_integers(data: Union[bytes, List[int]]) -> List[int]:\n    \"\"\"Convert a sequence of bytes to a list of Python integer\"\"\"\n    return [v for v in data]\n\n\n@unique\nclass SplitType(IntEnum):\n    numerical = 0\n    categorical = 1\n\n\n@dataclass\nclass Node:\n    # properties\n    left: int\n    right: int\n    parent: int\n    split_idx: int\n    split_cond: float\n    default_left: bool\n    split_type: SplitType\n    categories: List[int]\n    # statistic\n    base_weight: float\n    loss_chg: float\n    sum_hess: float\n\n\nclass Tree:\n    \"\"\"A tree built by XGBoost.\"\"\"\n\n    def __init__(self, tree_id: int, nodes: Sequence[Node]) -> None:\n        self.tree_id = tree_id\n        self.nodes = nodes\n\n    def loss_change(self, node_id: int) -> float:\n        \"\"\"Loss gain of a node.\"\"\"\n        return self.nodes[node_id].loss_chg\n\n    def sum_hessian(self, node_id: int) -> float:\n        \"\"\"Sum Hessian of a node.\"\"\"\n        return self.nodes[node_id].sum_hess\n\n    def base_weight(self, node_id: int) -> float:\n        \"\"\"Base weight of a node.\"\"\"\n        return self.nodes[node_id].base_weight\n\n    def split_index(self, node_id: int) -> int:\n        \"\"\"Split feature index of node.\"\"\"\n        return self.nodes[node_id].split_idx\n\n    def split_condition(self, node_id: int) -> float:\n        \"\"\"Split value of a node.\"\"\"\n        return self.nodes[node_id].split_cond\n\n    def split_categories(self, node_id: int) -> List[int]:\n        \"\"\"Categories in a node.\"\"\"\n        return self.nodes[node_id].categories\n\n    def is_categorical(self, node_id: int) -> bool:\n        \"\"\"Whether a node has categorical split.\"\"\"\n        return self.nodes[node_id].split_type == SplitType.categorical\n\n    def is_numerical(self, node_id: int) -> bool:\n        return not self.is_categorical(node_id)\n\n    def parent(self, node_id: int) -> int:\n        \"\"\"Parent ID of a node.\"\"\"\n        return self.nodes[node_id].parent\n\n    def left_child(self, node_id: int) -> int:\n        \"\"\"Left child ID of a node.\"\"\"\n        return self.nodes[node_id].left\n\n    def right_child(self, node_id: int) -> int:\n        \"\"\"Right child ID of a node.\"\"\"\n        return self.nodes[node_id].right\n\n    def is_leaf(self, node_id: int) -> bool:\n        \"\"\"Whether a node is leaf.\"\"\"\n        return self.nodes[node_id].left == -1\n\n    def is_deleted(self, node_id: int) -> bool:\n        \"\"\"Whether a node is deleted.\"\"\"\n        return self.split_index(node_id) == np.iinfo(np.uint32).max\n\n    def __str__(self) -> str:\n        stack = [0]\n        nodes = []\n        while stack:\n            node: Dict[str, Union[float, int, List[int]]] = {}\n            nid = stack.pop()\n\n            node[\"node id\"] = nid\n            node[\"gain\"] = self.loss_change(nid)\n            node[\"cover\"] = self.sum_hessian(nid)\n            nodes.append(node)\n\n            if not self.is_leaf(nid) and not self.is_deleted(nid):\n                left = self.left_child(nid)\n                right = self.right_child(nid)\n                stack.append(left)\n                stack.append(right)\n                categories = self.split_categories(nid)\n                if categories:\n                    assert self.is_categorical(nid)\n                    node[\"categories\"] = categories\n                else:\n                    assert self.is_numerical(nid)\n                    node[\"condition\"] = self.split_condition(nid)\n            if self.is_leaf(nid):\n                node[\"weight\"] = self.split_condition(nid)\n\n        string = \"\\n\".join(map(lambda x: \"  \" + str(x), nodes))\n        return string\n\n\nclass Model:\n    \"\"\"Gradient boosted tree model.\"\"\"\n\n    def __init__(self, model: dict) -> None:\n        \"\"\"Construct the Model from a JSON object.\n\n        parameters\n        ----------\n         model : A dictionary loaded by json representing a XGBoost boosted tree model.\n        \"\"\"\n        # Basic properties of a model\n        self.learner_model_shape: ParamT = model[\"learner\"][\"learner_model_param\"]\n        self.num_output_group = int(self.learner_model_shape[\"num_class\"])\n        self.num_feature = int(self.learner_model_shape[\"num_feature\"])\n        self.base_score: List[float] = json.loads(\n            self.learner_model_shape[\"base_score\"]\n        )\n        # A field encoding which output group a tree belongs\n        self.tree_info = model[\"learner\"][\"gradient_booster\"][\"model\"][\"tree_info\"]\n\n        model_shape: ParamT = model[\"learner\"][\"gradient_booster\"][\"model\"][\n            \"gbtree_model_param\"\n        ]\n\n        # JSON representation of trees\n        j_trees = model[\"learner\"][\"gradient_booster\"][\"model\"][\"trees\"]\n\n        # Load the trees\n        self.num_trees = int(model_shape[\"num_trees\"])\n\n        trees: List[Tree] = []\n        for i in range(self.num_trees):\n            tree: Dict[str, Any] = j_trees[i]\n            tree_id = int(tree[\"id\"])\n            assert tree_id == i, (tree_id, i)\n            # - properties\n            left_children: List[int] = tree[\"left_children\"]\n            right_children: List[int] = tree[\"right_children\"]\n            parents: List[int] = tree[\"parents\"]\n            split_conditions: List[float] = tree[\"split_conditions\"]\n            split_indices: List[int] = tree[\"split_indices\"]\n            # when ubjson is used, this is a byte array with each element as uint8\n            default_left = to_integers(tree[\"default_left\"])\n\n            # - categorical features\n            # when ubjson is used, this is a byte array with each element as uint8\n            split_types = to_integers(tree[\"split_type\"])\n            # categories for each node is stored in a CSR style storage with segment as\n            # the begin ptr and the `categories' as values.\n            cat_segments: List[int] = tree[\"categories_segments\"]\n            cat_sizes: List[int] = tree[\"categories_sizes\"]\n            # node index for categorical nodes\n            cat_nodes: List[int] = tree[\"categories_nodes\"]\n            assert len(cat_segments) == len(cat_sizes) == len(cat_nodes)\n            cats = tree[\"categories\"]\n            assert len(left_children) == len(split_types)\n\n            # The storage for categories is only defined for categorical nodes to\n            # prevent unnecessary overhead for numerical splits, we track the\n            # categorical node that are processed using a counter.\n            cat_cnt = 0\n            if cat_nodes:\n                last_cat_node = cat_nodes[cat_cnt]\n            else:\n                last_cat_node = -1\n            node_categories: List[List[int]] = []\n            for node_id in range(len(left_children)):\n                if node_id == last_cat_node:\n                    beg = cat_segments[cat_cnt]\n                    size = cat_sizes[cat_cnt]\n                    end = beg + size\n                    node_cats = cats[beg:end]\n                    # categories are unique for each node\n                    assert len(set(node_cats)) == len(node_cats)\n                    cat_cnt += 1\n                    if cat_cnt == len(cat_nodes):\n                        last_cat_node = -1  # continue to process the rest of the nodes\n                    else:\n                        last_cat_node = cat_nodes[cat_cnt]\n                    assert node_cats\n                    node_categories.append(node_cats)\n                else:\n                    # append an empty node, it's either a numerical node or a leaf.\n                    node_categories.append([])\n\n            # - stats\n            base_weights: List[float] = tree[\"base_weights\"]\n            loss_changes: List[float] = tree[\"loss_changes\"]\n            sum_hessian: List[float] = tree[\"sum_hessian\"]\n\n            # Construct a list of nodes that have complete information\n            nodes: List[Node] = [\n                Node(\n                    left_children[node_id],\n                    right_children[node_id],\n                    parents[node_id],\n                    split_indices[node_id],\n                    split_conditions[node_id],\n                    default_left[node_id] == 1,  # to boolean\n                    SplitType(split_types[node_id]),\n                    node_categories[node_id],\n                    base_weights[node_id],\n                    loss_changes[node_id],\n                    sum_hessian[node_id],\n                )\n                for node_id in range(len(left_children))\n            ]\n\n            pytree = Tree(tree_id, nodes)\n            trees.append(pytree)\n\n        self.trees = trees\n\n    def print_model(self) -> None:\n        for i, tree in enumerate(self.trees):\n            print(\"\\ntree_id:\", i)\n            print(tree)\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(\n        description=\"Demonstration for loading XGBoost JSON/UBJSON model.\"\n    )\n    parser.add_argument(\n        \"--model\", type=str, required=True, help=\"Path to .json/.ubj model file.\"\n    )\n    args = parser.parse_args()\n    if args.model.endswith(\"json\"):\n        # use json format\n        with open(args.model, \"r\") as fd:\n            model = json.load(fd)\n    elif args.model.endswith(\"ubj\"):\n        if ubjson is None:\n            raise ImportError(\"ubjson is not installed.\")\n        # use ubjson format\n        with open(args.model, \"rb\") as bfd:\n            model = ubjson.load(bfd)\n    else:\n        raise ValueError(\n            \"Unexpected file extension. Supported file extension are json and ubj.\"\n        )\n    model = Model(model)\n    model.print_model()\n"
  },
  {
    "path": "demo/guide-python/multioutput_reduced_gradient.py",
    "content": "\"\"\"\nA demo for multi-output regression using reduced gradient\n=========================================================\n\nSee :doc:`/tutorials/multioutput` for more information.\n\n.. versionadded:: 3.2.0\n\n.. note::\n\n    The implementation is experimental and many features are missing.\n\n.. seealso:: :ref:`sphx_glr_python_examples_multioutput_regression.py`\n\n\"\"\"\n\nimport argparse\nfrom typing import Tuple\n\nimport numpy as np\nfrom sklearn.base import BaseEstimator\nfrom sklearn.datasets import make_regression\n\nimport xgboost as xgb\nfrom xgboost.objective import TreeObjective\n\n\nclass LsObjMean(TreeObjective):\n    \"\"\"Least squared error. Reduce the size of the gradient using mean value.\"\"\"\n\n    def __init__(self, device: str) -> None:\n        self.device = device\n\n    def __call__(\n        self, iteration: int, y_pred: np.ndarray, dtrain: xgb.DMatrix\n    ) -> Tuple[np.ndarray, np.ndarray]:\n        y_true = dtrain.get_label()\n        grad = y_pred - y_true\n        if self.device == \"cpu\":\n            hess = np.ones(grad.shape)\n            return grad, hess\n        else:\n            import cupy as cp\n\n            hess = cp.ones(grad.shape)\n\n            return cp.array(grad), cp.array(hess)\n\n    def split_grad(\n        self, iteration: int, grad: np.ndarray, hess: np.ndarray\n    ) -> Tuple[np.ndarray, np.ndarray]:\n        if self.device == \"cpu\":\n            from numpy import mean\n        else:\n            from cupy import mean  # type: ignore[no-redef]\n\n        sgrad = mean(grad, axis=1)\n        shess = mean(hess, axis=1)\n        return sgrad, shess\n\n\ndef svd_class(device: str) -> BaseEstimator:\n    \"\"\"One of the methods in the sketch boost paper.\"\"\"\n    from sklearn.decomposition import TruncatedSVD\n\n    svd_params = {\"algorithm\": \"arpack\", \"n_components\": 2, \"n_iter\": 8}\n    svd = TruncatedSVD(**svd_params)\n    return svd\n\n\nclass LsObjSvd(LsObjMean):\n    \"\"\"Reduce the size of the gradient using SVD.\"\"\"\n\n    def __init__(self, device: str) -> None:\n        super().__init__(device=device)\n\n    def split_grad(\n        self, iteration: int, grad: np.ndarray, hess: np.ndarray\n    ) -> Tuple[np.ndarray, np.ndarray]:\n        svd = svd_class(self.device)\n        if self.device == \"cuda\":\n            grad = grad.get()   # type: ignore\n            hess = hess.get()   # type: ignore\n\n        svd.fit(grad)\n        grad = svd.transform(grad)\n        hess = svd.transform(hess)\n        if self.device == \"cpu\":\n            hess = np.clip(hess, 0.01, None)\n        else:\n            import cupy as cp\n\n            hess = cp.clip(hess, 0.01, None)\n        return grad, hess\n\n\ndef main() -> None:\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"--device\", choices=[\"cpu\", \"cuda\"], default=\"cpu\")\n    args = parser.parse_args()\n\n    X, y = make_regression(\n        n_samples=8192, n_features=32, n_targets=8, random_state=2026\n    )\n    Xy = xgb.QuantileDMatrix(X, y)\n\n    for obj in (LsObjMean(args.device), LsObjSvd(args.device)):\n        xgb.train(\n            {\n                \"device\": args.device,\n                \"multi_strategy\": \"multi_output_tree\",\n            },\n            Xy,\n            evals=[(Xy, \"Train\")],\n            obj=obj,\n            num_boost_round=16,\n        )\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "demo/guide-python/multioutput_regression.py",
    "content": "\"\"\"\nA demo for multi-output regression\n==================================\n\nThe demo is adopted from scikit-learn:\n\nhttps://scikit-learn.org/stable/auto_examples/ensemble/plot_random_forest_regression_multioutput.html#sphx-glr-auto-examples-ensemble-plot-random-forest-regression-multioutput-py\n\nSee :doc:`/tutorials/multioutput` for more information.\n\n.. note::\n\n    The feature is experimental. For the `multi_output_tree` strategy, many features are\n    missing.\n\n.. seealso:: :ref:`sphx_glr_python_examples_multioutput_reduced_gradient.py`\n\n\"\"\"\n\nimport argparse\nfrom typing import Dict, List, Optional, Tuple\n\nimport matplotlib\nimport numpy as np\nfrom matplotlib import pyplot as plt\n\nimport xgboost as xgb\n\n\ndef plot_predt(\n    y: np.ndarray, y_predt: np.ndarray, name: str, ax: matplotlib.axes.Axes\n) -> None:\n    s = 25\n    ax.scatter(y[:, 0], y[:, 1], c=\"navy\", s=s, edgecolor=\"black\", label=name)\n    ax.scatter(y_predt[:, 0], y_predt[:, 1], c=\"cornflowerblue\", s=s, edgecolor=\"black\")\n    ax.legend()\n\n\ndef gen_circle() -> Tuple[np.ndarray, np.ndarray]:\n    \"Generate a sample dataset that y is a 2 dim circle.\"\n    rng = np.random.RandomState(1994)\n    X = np.sort(200 * rng.rand(100, 1) - 100, axis=0)\n    y = np.array([np.pi * np.sin(X).ravel(), np.pi * np.cos(X).ravel()]).T\n    y[::5, :] += 0.5 - rng.rand(20, 2)\n    y = y - y.min()\n    y = y / y.max()\n    return X, y\n\n\ndef rmse_model(strategy: str, ax: Optional[matplotlib.axes.Axes]) -> None:\n    \"\"\"Draw a circle with 2-dim coordinate as target variables.\"\"\"\n    X, y = gen_circle()\n    # Train a regressor on it\n    reg = xgb.XGBRegressor(\n        tree_method=\"hist\",\n        n_estimators=128,\n        n_jobs=16,\n        max_depth=8,\n        multi_strategy=strategy,\n        subsample=0.6,\n    )\n    reg.fit(X, y, eval_set=[(X, y)])\n\n    y_predt = reg.predict(X)\n    if ax:\n        plot_predt(y, y_predt, f\"RMSE-{strategy}\", ax)\n\n\ndef custom_rmse_model(strategy: str, ax: Optional[matplotlib.axes.Axes]) -> None:\n    \"\"\"Train using Python implementation of Squared Error.\"\"\"\n\n    def gradient(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:\n        \"\"\"Compute the gradient squared error.\"\"\"\n        y = dtrain.get_label().reshape(predt.shape)\n        return predt - y\n\n    def hessian(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:\n        \"\"\"Compute the hessian for squared error.\"\"\"\n        return np.ones(predt.shape)\n\n    def squared_log(\n        predt: np.ndarray, dtrain: xgb.DMatrix\n    ) -> Tuple[np.ndarray, np.ndarray]:\n        grad = gradient(predt, dtrain)\n        hess = hessian(predt, dtrain)\n        # both numpy.ndarray and cupy.ndarray works.\n        return grad, hess\n\n    def rmse(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, float]:\n        y = dtrain.get_label().reshape(predt.shape)\n        v = np.sqrt(np.mean(np.power(y - predt, 2)))\n        return \"PyRMSE\", v\n\n    X, y = gen_circle()\n    Xy = xgb.DMatrix(X, y)\n    results: Dict[str, Dict[str, List[float]]] = {}\n    # Make sure the `num_target` is passed to XGBoost when custom objective is used.\n    # When builtin objective is used, XGBoost can figure out the number of targets\n    # automatically.\n    booster = xgb.train(\n        {\n            \"tree_method\": \"hist\",\n            \"num_target\": y.shape[1],\n            \"multi_strategy\": strategy,\n        },\n        dtrain=Xy,\n        num_boost_round=128,\n        obj=squared_log,\n        evals=[(Xy, \"Train\")],\n        evals_result=results,\n        custom_metric=rmse,\n    )\n\n    y_predt = booster.inplace_predict(X)\n    if ax:\n        plot_predt(y, y_predt, f\"PyRMSE-{strategy}\", ax)\n\n    np.testing.assert_allclose(\n        results[\"Train\"][\"rmse\"], results[\"Train\"][\"PyRMSE\"], rtol=1e-2\n    )\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"--plot\", choices=[0, 1], type=int, default=1)\n    args = parser.parse_args()\n    if args.plot == 1:\n        _, axs = plt.subplots(2, 2)\n    else:\n        axs = np.full(shape=(2, 2), fill_value=None)\n    assert isinstance(axs, np.ndarray)\n\n    # Train with builtin RMSE objective\n    # - One model per output.\n    rmse_model(\"one_output_per_tree\", axs[0, 0])\n    # - One model for all outputs, this is still working in progress, many features are\n    # missing.\n    rmse_model(\"multi_output_tree\", axs[0, 1])\n\n    # Train with custom objective.\n    # - One model per output.\n    custom_rmse_model(\"one_output_per_tree\", axs[1, 0])\n    # - One model for all outputs, this is still working in progress, many features are\n    # missing.\n    custom_rmse_model(\"multi_output_tree\", axs[1, 1])\n    if args.plot == 1:\n        plt.show()\n"
  },
  {
    "path": "demo/guide-python/predict_first_ntree.py",
    "content": "\"\"\"\nDemo for prediction using number of trees\n=========================================\n\"\"\"\nimport os\n\nimport numpy as np\nfrom sklearn.datasets import load_svmlight_file\n\nimport xgboost as xgb\n\nCURRENT_DIR = os.path.dirname(__file__)\ntrain = os.path.join(CURRENT_DIR, \"../data/agaricus.txt.train\")\ntest = os.path.join(CURRENT_DIR, \"../data/agaricus.txt.test\")\n\n\ndef native_interface() -> None:\n    # load data in do training\n    dtrain = xgb.DMatrix(train + \"?format=libsvm\")\n    dtest = xgb.DMatrix(test + \"?format=libsvm\")\n    param = {\"max_depth\": 2, \"eta\": 1, \"objective\": \"binary:logistic\"}\n    watchlist = [(dtest, \"eval\"), (dtrain, \"train\")]\n    num_round = 3\n    bst = xgb.train(param, dtrain, num_round, watchlist)\n\n    print(\"start testing prediction from first n trees\")\n    # predict using first 1 tree\n    label = dtest.get_label()\n    ypred1 = bst.predict(dtest, iteration_range=(0, 1))\n    # by default, we predict using all the trees\n    ypred2 = bst.predict(dtest)\n\n    print(\"error of ypred1=%f\" % (np.sum((ypred1 > 0.5) != label) / float(len(label))))\n    print(\"error of ypred2=%f\" % (np.sum((ypred2 > 0.5) != label) / float(len(label))))\n\n\ndef sklearn_interface() -> None:\n    X_train, y_train = load_svmlight_file(train)\n    X_test, y_test = load_svmlight_file(test)\n    clf = xgb.XGBClassifier(n_estimators=3, max_depth=2, eta=1)\n    clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])\n    assert clf.n_classes_ == 2\n\n    print(\"start testing prediction from first n trees\")\n    # predict using first 1 tree\n    ypred1 = clf.predict(X_test, iteration_range=(0, 1))\n    # by default, we predict using all the trees\n    ypred2 = clf.predict(X_test)\n\n    print(\n        \"error of ypred1=%f\" % (np.sum((ypred1 > 0.5) != y_test) / float(len(y_test)))\n    )\n    print(\n        \"error of ypred2=%f\" % (np.sum((ypred2 > 0.5) != y_test) / float(len(y_test)))\n    )\n\n\nif __name__ == \"__main__\":\n    native_interface()\n    sklearn_interface()\n"
  },
  {
    "path": "demo/guide-python/predict_leaf_indices.py",
    "content": "\"\"\"\nDemo for obtaining leaf index\n=============================\n\"\"\"\nimport os\n\nimport xgboost as xgb\n\n# load data in do training\nCURRENT_DIR = os.path.dirname(__file__)\ndtrain = xgb.DMatrix(\n    os.path.join(CURRENT_DIR, \"../data/agaricus.txt.train?format=libsvm\")\n)\ndtest = xgb.DMatrix(\n    os.path.join(CURRENT_DIR, \"../data/agaricus.txt.test?format=libsvm\")\n)\nparam = {\"max_depth\": 2, \"eta\": 1, \"objective\": \"binary:logistic\"}\nwatchlist = [(dtest, \"eval\"), (dtrain, \"train\")]\nnum_round = 3\nbst = xgb.train(param, dtrain, num_round, watchlist)\n\nprint(\"start testing predict the leaf indices\")\n# predict using first 2 tree\nleafindex = bst.predict(\n    dtest, iteration_range=(0, 2), pred_leaf=True, strict_shape=True\n)\nprint(leafindex.shape)\nprint(leafindex)\n# predict all trees\nleafindex = bst.predict(dtest, pred_leaf=True)\nprint(leafindex.shape)\n"
  },
  {
    "path": "demo/guide-python/quantile_data_iterator.py",
    "content": "\"\"\"\nDemo for using data iterator with Quantile DMatrix\n==================================================\n\n    .. versionadded:: 1.2.0\n\nThe demo that defines a customized iterator for passing batches of data into\n:py:class:`xgboost.QuantileDMatrix` and use this ``QuantileDMatrix`` for training.  The\nfeature is primarily designed to reduce the required GPU memory for training on\ndistributed environment.\n\nAftering going through the demo, one might ask why don't we use more native Python\niterator?  That's because XGBoost requires a `reset` function, while using\n`itertools.tee` might incur significant memory usage according to:\n\n  https://docs.python.org/3/library/itertools.html#itertools.tee.\n\n.. seealso::\n\n  :ref:`sphx_glr_python_examples_external_memory.py`\n\n\"\"\"\n\nfrom typing import Callable\n\nimport cupy\nimport numpy\n\nimport xgboost\n\nCOLS = 64\nROWS_PER_BATCH = 1000  # data is splited by rows\nBATCHES = 32\n\n\nclass IterForDMatrixDemo(xgboost.core.DataIter):\n    \"\"\"A data iterator for XGBoost DMatrix.\n\n    `reset` and `next` are required for any data iterator, other functions here\n    are utilites for demonstration's purpose.\n\n    \"\"\"\n\n    def __init__(self) -> None:\n        \"\"\"Generate some random data for demostration.\n\n        Actual data can be anything that is currently supported by XGBoost.\n        \"\"\"\n        self.rows = ROWS_PER_BATCH\n        self.cols = COLS\n        rng = cupy.random.RandomState(numpy.uint64(1994))\n        self._data = [rng.randn(self.rows, self.cols)] * BATCHES\n        self._labels = [rng.randn(self.rows)] * BATCHES\n        self._weights = [rng.uniform(size=self.rows)] * BATCHES\n\n        self.it = 0  # set iterator to 0\n        super().__init__()\n\n    def as_array(self) -> cupy.ndarray:\n        return cupy.concatenate(self._data)\n\n    def as_array_labels(self) -> cupy.ndarray:\n        return cupy.concatenate(self._labels)\n\n    def as_array_weights(self) -> cupy.ndarray:\n        return cupy.concatenate(self._weights)\n\n    def data(self) -> cupy.ndarray:\n        \"\"\"Utility function for obtaining current batch of data.\"\"\"\n        return self._data[self.it]\n\n    def labels(self) -> cupy.ndarray:\n        \"\"\"Utility function for obtaining current batch of label.\"\"\"\n        return self._labels[self.it]\n\n    def weights(self) -> cupy.ndarray:\n        return self._weights[self.it]\n\n    def reset(self) -> None:\n        \"\"\"Reset the iterator\"\"\"\n        self.it = 0\n\n    def next(self, input_data: Callable) -> bool:\n        \"\"\"Yield the next batch of data.\"\"\"\n        if self.it == len(self._data):\n            # Return False to let XGBoost know this is the end of iteration\n            return False\n\n        # input_data is a keyword-only function passed in by XGBoost and has the similar\n        # signature to the ``DMatrix`` constructor.\n        input_data(data=self.data(), label=self.labels(), weight=self.weights())\n        self.it += 1\n        return True\n\n\ndef main() -> None:\n    rounds = 100\n    it = IterForDMatrixDemo()\n\n    # Use iterator, must be `QuantileDMatrix`.\n\n    # In this demo, the input batches are created using cupy, and the data processing\n    # (quantile sketching) will be performed on GPU. If data is loaded with CPU based\n    # data structures like numpy or pandas, then the processing step will be performed\n    # on CPU instead.\n    m_with_it = xgboost.QuantileDMatrix(it)\n\n    # Use regular DMatrix.\n    m = xgboost.DMatrix(\n        it.as_array(), it.as_array_labels(), weight=it.as_array_weights()\n    )\n\n    assert m_with_it.num_col() == m.num_col()\n    assert m_with_it.num_row() == m.num_row()\n    # Tree method must be `hist`.\n    reg_with_it = xgboost.train(\n        {\"tree_method\": \"hist\", \"device\": \"cuda\"},\n        m_with_it,\n        num_boost_round=rounds,\n        evals=[(m_with_it, \"Train\")],\n    )\n    predict_with_it = reg_with_it.predict(m_with_it)\n\n    reg = xgboost.train(\n        {\"tree_method\": \"hist\", \"device\": \"cuda\"},\n        m,\n        num_boost_round=rounds,\n        evals=[(m, \"Train\")],\n    )\n    predict = reg.predict(m)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "demo/guide-python/quantile_regression.py",
    "content": "\"\"\"\nQuantile Regression\n===================\n\n    .. versionadded:: 2.0.0\n\nThe script is inspired by this awesome example in sklearn:\nhttps://scikit-learn.org/stable/auto_examples/ensemble/plot_gradient_boosting_quantile.html\n\n.. note::\n\n    The feature is only supported using the Python, R, and C packages. In addition, quantile\n    crossing can happen due to limitation in the algorithm.\n\n\"\"\"\n\nimport argparse\nfrom typing import Dict\n\nimport numpy as np\nfrom sklearn.model_selection import train_test_split\n\nimport xgboost as xgb\n\n\ndef f(x: np.ndarray) -> np.ndarray:\n    \"\"\"The function to predict.\"\"\"\n    return x * np.sin(x)\n\n\ndef quantile_loss(args: argparse.Namespace) -> None:\n    \"\"\"Train a quantile regression model.\"\"\"\n    rng = np.random.RandomState(1994)\n    # Generate a synthetic dataset for demo, the generate process is from the sklearn\n    # example.\n    X = np.atleast_2d(rng.uniform(0, 10.0, size=1000)).T\n    expected_y = f(X).ravel()\n\n    sigma = 0.5 + X.ravel() / 10.0\n    noise = rng.lognormal(sigma=sigma) - np.exp(sigma**2.0 / 2.0)\n    y = expected_y + noise\n\n    # Train on 0.05 and 0.95 quantiles. The model is similar to multi-class and\n    # multi-target models.\n    alpha = np.array([0.05, 0.5, 0.95])\n    evals_result: Dict[str, Dict] = {}\n\n    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng)\n    # We will be using the `hist` tree method, quantile DMatrix can be used to preserve\n    # memory (which has nothing to do with quantile regression itself, see its document\n    # for details).\n    # Do not use the `exact` tree method for quantile regression, otherwise the\n    # performance might drop.\n    Xy = xgb.QuantileDMatrix(X_train, y_train)\n    # use Xy as a reference\n    Xy_test = xgb.QuantileDMatrix(X_test, y_test, ref=Xy)\n\n    booster = xgb.train(\n        {\n            # Use the quantile objective function.\n            \"objective\": \"reg:quantileerror\",\n            \"tree_method\": \"hist\",\n            \"quantile_alpha\": alpha,\n            # Let's try not to overfit.\n            \"learning_rate\": 0.04,\n            \"max_depth\": 5,\n            \"multi_strategy\": args.multi_strategy,\n            \"device\": args.device,\n        },\n        Xy,\n        num_boost_round=32,\n        early_stopping_rounds=2,\n        # The evaluation result is a weighted average across multiple quantiles.\n        evals=[(Xy, \"Train\"), (Xy_test, \"Test\")],\n        evals_result=evals_result,\n    )\n    xx = np.atleast_2d(np.linspace(0, 10, 1000)).T\n    scores = booster.inplace_predict(xx)\n    # dim 1 is the quantiles\n    assert scores.shape[0] == xx.shape[0]\n    assert scores.shape[1] == alpha.shape[0]\n\n    y_lower = scores[:, 0]  # alpha=0.05\n    y_med = scores[:, 1]  # alpha=0.5, median\n    y_upper = scores[:, 2]  # alpha=0.95\n\n    # Train a mse model for comparison\n    booster = xgb.train(\n        {\n            \"objective\": \"reg:squarederror\",\n            \"tree_method\": \"hist\",\n            # Let's try not to overfit.\n            \"learning_rate\": 0.04,\n            \"max_depth\": 5,\n        },\n        Xy,\n        num_boost_round=32,\n        early_stopping_rounds=2,\n        evals=[(Xy, \"Train\"), (Xy_test, \"Test\")],\n        evals_result=evals_result,\n    )\n    xx = np.atleast_2d(np.linspace(0, 10, 1000)).T\n    y_pred = booster.inplace_predict(xx)\n\n    if args.plot:\n        from matplotlib import pyplot as plt\n\n        fig = plt.figure(figsize=(10, 10))\n        plt.plot(xx, f(xx), \"g:\", linewidth=3, label=r\"$f(x) = x\\,\\sin(x)$\")\n        plt.plot(X_test, y_test, \"b.\", markersize=10, label=\"Test observations\")\n        plt.plot(xx, y_med, \"r-\", label=\"Predicted median\")\n        plt.plot(xx, y_pred, \"m-\", label=\"Predicted mean\")\n        plt.plot(xx, y_upper, \"k-\")\n        plt.plot(xx, y_lower, \"k-\")\n        plt.fill_between(\n            xx.ravel(), y_lower, y_upper, alpha=0.4, label=\"Predicted 90% interval\"\n        )\n        plt.xlabel(\"$x$\")\n        plt.ylabel(\"$f(x)$\")\n        plt.ylim(-10, 25)\n        plt.legend(loc=\"upper left\")\n        plt.show()\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\n        \"--plot\",\n        action=\"store_true\",\n        help=\"Specify it to enable plotting the outputs.\",\n    )\n    parser.add_argument(\n        \"--multi_strategy\",\n        choices=[\"multi_output_tree\", \"one_output_per_tree\"],\n        default=\"one_output_per_tree\",\n        help=\"See the parameter `multi_strategy` for more info. (Experimental)\",\n    )\n    parser.add_argument(\"--device\", choices=[\"cpu\", \"cuda\"], default=\"cpu\")\n    args = parser.parse_args()\n    quantile_loss(args)\n"
  },
  {
    "path": "demo/guide-python/sklearn_evals_result.py",
    "content": "\"\"\"\nDemo for accessing the xgboost eval metrics by using sklearn interface\n======================================================================\n\"\"\"\n\nimport numpy as np\nfrom sklearn.datasets import make_hastie_10_2\n\nimport xgboost as xgb\n\nX, y = make_hastie_10_2(n_samples=2000, random_state=42)\n\n# Map labels from {-1, 1} to {0, 1}\nlabels, y = np.unique(y, return_inverse=True)\n\nX_train, X_test = X[:1600], X[1600:]\ny_train, y_test = y[:1600], y[1600:]\n\nparam_dist = {\"objective\": \"binary:logistic\", \"n_estimators\": 2}\n\nclf = xgb.XGBModel(\n    **param_dist,\n    eval_metric=\"logloss\",\n)\n# Or you can use: clf = xgb.XGBClassifier(**param_dist)\n\nclf.fit(\n    X_train,\n    y_train,\n    eval_set=[(X_train, y_train), (X_test, y_test)],\n    verbose=True,\n)\n\n# Load evals result by calling the evals_result() function\nevals_result = clf.evals_result()\n\nprint(\"Access logloss metric directly from validation_0:\")\nprint(evals_result[\"validation_0\"][\"logloss\"])\n\nprint(\"\")\nprint(\"Access metrics through a loop:\")\nfor e_name, e_mtrs in evals_result.items():\n    print(\"- {}\".format(e_name))\n    for e_mtr_name, e_mtr_vals in e_mtrs.items():\n        print(\"   - {}\".format(e_mtr_name))\n        print(\"      - {}\".format(e_mtr_vals))\n\nprint(\"\")\nprint(\"Access complete dict:\")\nprint(evals_result)\n"
  },
  {
    "path": "demo/guide-python/sklearn_examples.py",
    "content": "\"\"\"\nCollection of examples for using sklearn interface\n==================================================\n\nFor an introduction to XGBoost's scikit-learn estimator interface, see\n:doc:`/python/sklearn_estimator`.\n\nCreated on 1 Apr 2015\n\n@author: Jamie Hall\n\"\"\"\n\nimport pickle\nfrom urllib.error import HTTPError\n\nimport numpy as np\nfrom sklearn.datasets import (\n    fetch_california_housing,\n    load_digits,\n    load_iris,\n    make_regression,\n)\nfrom sklearn.metrics import confusion_matrix, mean_squared_error\nfrom sklearn.model_selection import GridSearchCV, KFold, train_test_split\n\nimport xgboost as xgb\n\nrng = np.random.RandomState(31337)\n\nprint(\"Zeros and Ones from the Digits dataset: binary classification\")\ndigits = load_digits(n_class=2)\ny = digits[\"target\"]\nX = digits[\"data\"]\nkf = KFold(n_splits=2, shuffle=True, random_state=rng)\nfor train_index, test_index in kf.split(X):\n    xgb_model = xgb.XGBClassifier(n_jobs=1).fit(X[train_index], y[train_index])\n    predictions = xgb_model.predict(X[test_index])\n    actuals = y[test_index]\n    print(confusion_matrix(actuals, predictions))\n\nprint(\"Iris: multiclass classification\")\niris = load_iris()\ny = iris[\"target\"]\nX = iris[\"data\"]\nkf = KFold(n_splits=2, shuffle=True, random_state=rng)\nfor train_index, test_index in kf.split(X):\n    xgb_model = xgb.XGBClassifier(n_jobs=1).fit(X[train_index], y[train_index])\n    predictions = xgb_model.predict(X[test_index])\n    actuals = y[test_index]\n    print(confusion_matrix(actuals, predictions))\n\nprint(\"California Housing: regression\")\n\ntry:\n    X, y = fetch_california_housing(return_X_y=True)\nexcept HTTPError:\n    # Use a synthetic dataset instead if we couldn't\n    X, y = make_regression(n_samples=20640, n_features=8, random_state=1234)\n\nkf = KFold(n_splits=2, shuffle=True, random_state=rng)\nfor train_index, test_index in kf.split(X):\n    xgb_model = xgb.XGBRegressor(n_jobs=1).fit(X[train_index], y[train_index])\n    predictions = xgb_model.predict(X[test_index])\n    actuals = y[test_index]\n    print(mean_squared_error(actuals, predictions))\n\nprint(\"Parameter optimization\")\nxgb_model = xgb.XGBRegressor(n_jobs=1)\nclf = GridSearchCV(\n    xgb_model,\n    {\"max_depth\": [2, 4], \"n_estimators\": [50, 100]},\n    verbose=1,\n    n_jobs=1,\n    cv=3,\n)\nclf.fit(X, y)\nprint(clf.best_score_)\nprint(clf.best_params_)\n\n# The sklearn API models are picklable\nprint(\"Pickling sklearn API models\")\n# must open in binary format to pickle\npickle.dump(clf, open(\"best_calif.pkl\", \"wb\"))\nclf2 = pickle.load(open(\"best_calif.pkl\", \"rb\"))\nprint(np.allclose(clf.predict(X), clf2.predict(X)))\n\n# Early-stopping\n\nX = digits[\"data\"]\ny = digits[\"target\"]\nX_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)\nclf = xgb.XGBClassifier(n_jobs=1, early_stopping_rounds=10, eval_metric=\"auc\")\nclf.fit(X_train, y_train, eval_set=[(X_test, y_test)])\n"
  },
  {
    "path": "demo/guide-python/sklearn_parallel.py",
    "content": "\"\"\"\nDemo for using xgboost with sklearn\n===================================\n\"\"\"\n\nimport multiprocessing\nfrom urllib.error import HTTPError\n\nfrom sklearn.datasets import fetch_california_housing, make_regression\nfrom sklearn.model_selection import GridSearchCV\n\nimport xgboost as xgb\n\nif __name__ == \"__main__\":\n    print(\"Parallel Parameter optimization\")\n    try:\n        X, y = fetch_california_housing(return_X_y=True)\n    except HTTPError:\n        # Use a synthetic dataset instead if we couldn't\n        X, y = make_regression(n_samples=20640, n_features=8, random_state=1234)\n    # Make sure the number of threads is balanced.\n    xgb_model = xgb.XGBRegressor(\n        n_jobs=multiprocessing.cpu_count() // 2, tree_method=\"hist\"\n    )\n    clf = GridSearchCV(\n        xgb_model,\n        {\"max_depth\": [2, 4, 6], \"n_estimators\": [50, 100, 200]},\n        verbose=1,\n        n_jobs=2,\n    )\n    clf.fit(X, y)\n    print(clf.best_score_)\n    print(clf.best_params_)\n"
  },
  {
    "path": "demo/guide-python/spark_estimator_examples.py",
    "content": "\"\"\"\nCollection of examples for using xgboost.spark estimator interface\n==================================================================\n\n@author: Weichen Xu\n\"\"\"\n\nimport numpy as np\nimport sklearn.datasets\nfrom pyspark.ml.evaluation import MulticlassClassificationEvaluator, RegressionEvaluator\nfrom pyspark.ml.linalg import Vectors\nfrom pyspark.sql import DataFrame, SparkSession\nfrom pyspark.sql.functions import rand\nfrom sklearn.model_selection import train_test_split\n\nfrom xgboost.spark import SparkXGBClassifier, SparkXGBRegressor\n\nspark = SparkSession.builder.master(\"local[*]\").getOrCreate()\n\n\ndef create_spark_df(X: np.ndarray, y: np.ndarray) -> DataFrame:\n    return spark.createDataFrame(\n        spark.sparkContext.parallelize(\n            [(Vectors.dense(features), float(label)) for features, label in zip(X, y)]\n        ),\n        [\"features\", \"label\"],\n    )\n\n\n# load diabetes dataset (regression dataset)\ndiabetes_X, diabetes_y = sklearn.datasets.load_diabetes(return_X_y=True)\ndiabetes_X_train, diabetes_X_test, diabetes_y_train, diabetes_y_test = train_test_split(\n    diabetes_X, diabetes_y, test_size=0.3, shuffle=True\n)\n\ndiabetes_train_spark_df = create_spark_df(diabetes_X_train, diabetes_y_train)\ndiabetes_test_spark_df = create_spark_df(diabetes_X_test, diabetes_y_test)\n\n# train xgboost regressor model\nxgb_regressor = SparkXGBRegressor(max_depth=5)\nxgb_regressor_model = xgb_regressor.fit(diabetes_train_spark_df)\n\ntransformed_diabetes_test_spark_df = xgb_regressor_model.transform(\n    diabetes_test_spark_df\n)\nregressor_evaluator = RegressionEvaluator(metricName=\"rmse\")\nprint(\n    f\"regressor rmse={regressor_evaluator.evaluate(transformed_diabetes_test_spark_df)}\"\n)\n\ndiabetes_train_spark_df2 = diabetes_train_spark_df.withColumn(\n    \"validationIndicatorCol\", rand(1) > 0.7\n)\n\n# train xgboost regressor model with validation dataset\nxgb_regressor2 = SparkXGBRegressor(\n    max_depth=5, validation_indicator_col=\"validationIndicatorCol\"\n)\nxgb_regressor_model2 = xgb_regressor2.fit(diabetes_train_spark_df2)\ntransformed_diabetes_test_spark_df2 = xgb_regressor_model2.transform(\n    diabetes_test_spark_df\n)\nprint(\n    f\"regressor2 rmse={regressor_evaluator.evaluate(transformed_diabetes_test_spark_df2)}\"\n)\n\n\n# load iris dataset (classification dataset)\niris_X, iris_y = sklearn.datasets.load_iris(return_X_y=True)\niris_X_train, iris_X_test, iris_y_train, iris_y_test = train_test_split(\n    iris_X, iris_y, test_size=0.3, shuffle=True\n)\n\niris_train_spark_df = create_spark_df(iris_X_train, iris_y_train)\niris_test_spark_df = create_spark_df(iris_X_test, iris_y_test)\n\n# train xgboost classifier model\nxgb_classifier = SparkXGBClassifier(max_depth=5)\nxgb_classifier_model = xgb_classifier.fit(iris_train_spark_df)\n\ntransformed_iris_test_spark_df = xgb_classifier_model.transform(iris_test_spark_df)\nclassifier_evaluator = MulticlassClassificationEvaluator(metricName=\"f1\")\nprint(f\"classifier f1={classifier_evaluator.evaluate(transformed_iris_test_spark_df)}\")\n\niris_train_spark_df2 = iris_train_spark_df.withColumn(\n    \"validationIndicatorCol\", rand(1) > 0.7\n)\n\n# train xgboost classifier model with validation dataset\nxgb_classifier2 = SparkXGBClassifier(\n    max_depth=5, validation_indicator_col=\"validationIndicatorCol\"\n)\nxgb_classifier_model2 = xgb_classifier2.fit(iris_train_spark_df2)\ntransformed_iris_test_spark_df2 = xgb_classifier_model2.transform(iris_test_spark_df)\nprint(\n    f\"classifier2 f1={classifier_evaluator.evaluate(transformed_iris_test_spark_df2)}\"\n)\n\nspark.stop()\n"
  },
  {
    "path": "demo/guide-python/update_process.py",
    "content": "\"\"\"\nDemo for using `process_type` with `prune` and `refresh`\n========================================================\n\nModifying existing trees is not a well established use for XGBoost, so feel free to\nexperiment.\n\n\"\"\"\n\nfrom urllib.error import HTTPError\n\nimport numpy as np\nfrom sklearn.datasets import fetch_california_housing, make_regression\n\nimport xgboost as xgb\n\n\ndef main() -> None:\n    n_rounds = 32\n\n    try:\n        X, y = fetch_california_housing(return_X_y=True)\n    except HTTPError:\n        # Use a synthetic dataset instead if we couldn't\n        X, y = make_regression(n_samples=20640, n_features=8, random_state=1234)\n\n    # Train a model first\n    X_train = X[: X.shape[0] // 2]\n    y_train = y[: y.shape[0] // 2]\n    Xy = xgb.DMatrix(X_train, y_train)\n    evals_result: xgb.callback.EvaluationMonitor.EvalsLog = {}\n    booster = xgb.train(\n        {\"tree_method\": \"hist\", \"max_depth\": 6, \"device\": \"cuda\"},\n        Xy,\n        num_boost_round=n_rounds,\n        evals=[(Xy, \"Train\")],\n        evals_result=evals_result,\n    )\n    SHAP = booster.predict(Xy, pred_contribs=True)\n\n    # Refresh the leaf value and tree statistic\n    X_refresh = X[X.shape[0] // 2 :]\n    y_refresh = y[y.shape[0] // 2 :]\n    Xy_refresh = xgb.DMatrix(X_refresh, y_refresh)\n    # The model will adapt to other half of the data by changing leaf value (no change in\n    # split condition) with refresh_leaf set to True.\n    refresh_result: xgb.callback.EvaluationMonitor.EvalsLog = {}\n    refreshed = xgb.train(\n        {\"process_type\": \"update\", \"updater\": \"refresh\", \"refresh_leaf\": True},\n        Xy_refresh,\n        num_boost_round=n_rounds,\n        xgb_model=booster,\n        evals=[(Xy, \"Original\"), (Xy_refresh, \"Train\")],\n        evals_result=refresh_result,\n    )\n\n    # Refresh the model without changing the leaf value, but tree statistic including\n    # cover and weight are refreshed.\n    refresh_result = {}\n    refreshed = xgb.train(\n        {\"process_type\": \"update\", \"updater\": \"refresh\", \"refresh_leaf\": False},\n        Xy_refresh,\n        num_boost_round=n_rounds,\n        xgb_model=booster,\n        evals=[(Xy, \"Original\"), (Xy_refresh, \"Train\")],\n        evals_result=refresh_result,\n    )\n    # Without refreshing the leaf value, resulting trees should be the same with original\n    # model except for accumulated statistic.  The rtol is for floating point error in\n    # prediction.\n    np.testing.assert_allclose(\n        refresh_result[\"Original\"][\"rmse\"], evals_result[\"Train\"][\"rmse\"], rtol=1e-5\n    )\n    # But SHAP value is changed as cover in tree nodes are changed.\n    refreshed_SHAP = refreshed.predict(Xy, pred_contribs=True)\n    assert not np.allclose(SHAP, refreshed_SHAP, rtol=1e-3)\n\n    # Prune the trees with smaller max_depth\n    X_update = X_train\n    y_update = y_train\n    Xy_update = xgb.DMatrix(X_update, y_update)\n\n    prune_result: xgb.callback.EvaluationMonitor.EvalsLog = {}\n    pruned = xgb.train(\n        {\"process_type\": \"update\", \"updater\": \"prune\", \"max_depth\": 2},\n        Xy_update,\n        num_boost_round=n_rounds,\n        xgb_model=booster,\n        evals=[(Xy, \"Original\"), (Xy_update, \"Train\")],\n        evals_result=prune_result,\n    )\n    # Have a smaller model, but similar accuracy.\n    np.testing.assert_allclose(\n        np.array(prune_result[\"Original\"][\"rmse\"]),\n        np.array(prune_result[\"Train\"][\"rmse\"]),\n        atol=1e-5,\n    )\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "demo/kaggle-higgs/README.md",
    "content": "Highlights\n=====\nHiggs challenge ends recently, xgboost is being used by many users. This list highlights the xgboost solutions of players\n* Blogpost by phunther: [Winning solution of Kaggle Higgs competition: what a single model can do](http://no2147483647.wordpress.com/2014/09/17/winning-solution-of-kaggle-higgs-competition-what-a-single-model-can-do/)\n* The solution by Tianqi Chen and Tong He [Link](https://github.com/hetong007/higgsml)\n\nGuide for Kaggle Higgs Challenge\n=====\n\nThis is the folder giving example of how to use XGBoost Python Module  to run Kaggle Higgs competition\n\nThis script will achieve about 3.600 AMS score in public leaderboard. To get start, you need do following step:\n\n1. Compile the XGBoost python lib\n```bash\ncd ../..\nmake\n```\n\n2. Put training.csv test.csv on folder './data' (you can create a symbolic link)\n\n3. Run ./run.sh\n\nSpeed\n=====\nspeedtest.py compares xgboost's speed on this dataset with sklearn.GBM\n\n\nUsing R module\n=====\n* Alternatively, you can run using R, higgs-train.R and higgs-pred.R.\n"
  },
  {
    "path": "demo/kaggle-higgs/higgs-cv.py",
    "content": "#!/usr/bin/python\nimport numpy as np\n\nimport xgboost as xgb\n\n### load data in do training\ntrain = np.loadtxt('./data/training.csv', delimiter=',', skiprows=1, converters={32: lambda x:int(x=='s'.encode('utf-8')) } )\nlabel  = train[:,32]\ndata   = train[:,1:31]\nweight = train[:,31]\ndtrain = xgb.DMatrix( data, label=label, missing = -999.0, weight=weight )\nparam = {'max_depth':6, 'eta':0.1, 'objective':'binary:logitraw', 'nthread':4}\nnum_round = 120\n\nprint ('running cross validation, with preprocessing function')\n# define the preprocessing function\n# used to return the preprocessed training, test data, and parameter\n# we can use this to do weight rescale, etc.\n# as a example, we try to set scale_pos_weight\ndef fpreproc(dtrain, dtest, param):\n    label = dtrain.get_label()\n    ratio = float(np.sum(label == 0)) / np.sum(label==1)\n    param['scale_pos_weight'] = ratio\n    wtrain = dtrain.get_weight()\n    wtest = dtest.get_weight()\n    sum_weight = sum(wtrain) + sum(wtest)\n    wtrain *= sum_weight / sum(wtrain)\n    wtest *= sum_weight / sum(wtest)\n    dtrain.set_weight(wtrain)\n    dtest.set_weight(wtest)\n    return (dtrain, dtest, param)\n\n# do cross validation, for each fold\n# the dtrain, dtest, param will be passed into fpreproc\n# then the return value of fpreproc will be used to generate\n# results of that fold\nxgb.cv(param, dtrain, num_round, nfold=5,\n       metrics={'ams@0.15', 'auc'}, seed = 0, fpreproc = fpreproc)\n"
  },
  {
    "path": "demo/kaggle-higgs/higgs-numpy.py",
    "content": "#!/usr/bin/python\n# this is the example script to use xgboost to train\nimport numpy as np\n\nimport xgboost as xgb\n\ntest_size = 550000\n\n# path to where the data lies\ndpath = 'data'\n\n# load in training data, directly use numpy\ndtrain = np.loadtxt( dpath+'/training.csv', delimiter=',', skiprows=1, converters={32: lambda x:int(x=='s'.encode('utf-8')) } )\nprint ('finish loading from csv ')\n\nlabel  = dtrain[:,32]\ndata   = dtrain[:,1:31]\n# rescale weight to make it same as test set\nweight = dtrain[:,31] * float(test_size) / len(label)\n\nsum_wpos = sum( weight[i] for i in range(len(label)) if label[i] == 1.0  )\nsum_wneg = sum( weight[i] for i in range(len(label)) if label[i] == 0.0  )\n\n# print weight statistics\nprint ('weight statistics: wpos=%g, wneg=%g, ratio=%g' % ( sum_wpos, sum_wneg, sum_wneg/sum_wpos ))\n\n# construct xgboost.DMatrix from numpy array, treat -999.0 as missing value\nxgmat = xgb.DMatrix( data, label=label, missing = -999.0, weight=weight )\n\n# setup parameters for xgboost\nparam = {}\n# use logistic regression loss, use raw prediction before logistic transformation\n# since we only need the rank\nparam['objective'] = 'binary:logitraw'\n# scale weight of positive examples\nparam['scale_pos_weight'] = sum_wneg/sum_wpos\nparam['eta'] = 0.1\nparam['max_depth'] = 6\nparam['eval_metric'] = 'auc'\nparam['nthread'] = 16\n\n# you can directly throw param in, though we want to watch multiple metrics here\nplst = list(param.items())+[('eval_metric', 'ams@0.15')]\n\nwatchlist = [ (xgmat,'train') ]\n# boost 120 trees\nnum_round = 120\nprint ('loading data end, start to boost trees')\nbst = xgb.train( plst, xgmat, num_round, watchlist );\n# save out model\nbst.save_model('higgs.model')\n\nprint ('finish training')\n"
  },
  {
    "path": "demo/kaggle-higgs/higgs-pred.R",
    "content": "# install xgboost package, see R-package in root folder\nrequire(xgboost)\nrequire(methods)\n\nmodelfile <- \"higgs.model\"\noutfile <- \"higgs.pred.csv\"\ndtest <- read.csv(\"data/test.csv\", header = TRUE)\ndata <- as.matrix(dtest[2:31])\nidx <- dtest[[1]]\n\nxgmat <- xgb.DMatrix(data, missing = -999.0)\nbst <- xgb.load(modelfile = modelfile)\nypred <- predict(bst, xgmat)\n\nrorder <- rank(ypred, ties.method = \"first\")\n\nthreshold <- 0.15\n# to be completed\nntop <- length(rorder) - as.integer(threshold * length(rorder))\nplabel <- ifelse(rorder > ntop, \"s\", \"b\")\noutdata <- list(\"EventId\" = idx,\n                \"RankOrder\" = rorder,\n                \"Class\" = plabel)\nwrite.csv(outdata, file = outfile, quote = FALSE, row.names = FALSE)\n"
  },
  {
    "path": "demo/kaggle-higgs/higgs-pred.py",
    "content": "#!/usr/bin/python\n# make prediction\nimport numpy as np\n\nimport xgboost as xgb\n\n# path to where the data lies\ndpath = 'data'\n\nmodelfile = 'higgs.model'\noutfile = 'higgs.pred.csv'\n# make top 15% as positive\nthreshold_ratio = 0.15\n\n# load in training data, directly use numpy\ndtest = np.loadtxt( dpath+'/test.csv', delimiter=',', skiprows=1 )\ndata   = dtest[:,1:31]\nidx = dtest[:,0]\n\nprint ('finish loading from csv ')\nxgmat = xgb.DMatrix( data, missing = -999.0 )\nbst = xgb.Booster({'nthread':16}, model_file = modelfile)\nypred = bst.predict( xgmat )\n\nres  = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ]\n\nrorder = {}\nfor k, v in sorted( res, key = lambda x:-x[1] ):\n    rorder[ k ] = len(rorder) + 1\n\n# write out predictions\nntop = int( threshold_ratio * len(rorder ) )\nfo = open(outfile, 'w')\nnhit = 0\nntot = 0\nfo.write('EventId,RankOrder,Class\\n')\nfor k, v in res:\n    if rorder[k] <= ntop:\n        lb = 's'\n        nhit += 1\n    else:\n        lb = 'b'\n    # change output rank order to follow Kaggle convention\n    fo.write('%s,%d,%s\\n' % ( k,  len(rorder)+1-rorder[k], lb ) )\n    ntot += 1\nfo.close()\n\nprint ('finished writing into prediction file')\n"
  },
  {
    "path": "demo/kaggle-higgs/higgs-train.R",
    "content": "# install xgboost package, see R-package in root folder\nrequire(xgboost)\nrequire(methods)\n\ntestsize <- 550000\n\ndtrain <- read.csv(\"data/training.csv\", header = TRUE)\ndtrain[33] <- dtrain[33] == \"s\"\nlabel <- as.numeric(dtrain[[33]])\ndata <- as.matrix(dtrain[2:31])\nweight <- as.numeric(dtrain[[32]]) * testsize / length(label)\n\nsumwpos <- sum(weight * (label == 1.0))\nsumwneg <- sum(weight * (label == 0.0))\nprint(paste(\"weight statistics: wpos=\", sumwpos, \"wneg=\", sumwneg, \"ratio=\", sumwneg / sumwpos))\n\nxgmat <- xgb.DMatrix(data, label = label, weight = weight, missing = -999.0)\nparam <- list(\"objective\" = \"binary:logitraw\",\n              \"scale_pos_weight\" = sumwneg / sumwpos,\n              \"bst:eta\" = 0.1,\n              \"bst:max_depth\" = 6,\n              \"eval_metric\" = \"auc\",\n              \"eval_metric\" = \"ams@0.15\",\n              \"nthread\" = 16)\nwatchlist <- list(\"train\" = xgmat)\nnrounds <- 120\nprint(\"loading data end, start to boost trees\")\nbst <- xgb.train(param, xgmat, nrounds, watchlist)\n# save out model\nxgb.save(bst, \"higgs.model\")\nprint('finish training')\n"
  },
  {
    "path": "demo/kaggle-higgs/run.sh",
    "content": "#!/bin/bash\n\npython -u higgs-numpy.py\nret=$?\nif [[ $ret != 0 ]]; then\n    echo \"ERROR in higgs-numpy.py\"\n    exit $ret\nfi\npython -u higgs-pred.py\nret=$?\nif [[ $ret != 0 ]]; then\n    echo \"ERROR in higgs-pred.py\"\n    exit $ret\nfi\n"
  },
  {
    "path": "demo/kaggle-higgs/speedtest.R",
    "content": "# install xgboost package, see R-package in root folder\nrequire(xgboost)\nrequire(gbm)\nrequire(methods)\n\ntestsize <- 550000\n\ndtrain <- read.csv(\"data/training.csv\", header = TRUE, nrows = 350001)\ndtrain$Label <- as.numeric(dtrain$Label == 's')\n# gbm.time = system.time({\n#   gbm.model <- gbm(Label ~ ., data = dtrain[, -c(1,32)], n.trees = 120,\n#                    interaction.depth = 6, shrinkage = 0.1, bag.fraction = 1,\n#                    verbose = TRUE)\n# })\n# print(gbm.time)\n# Test result: 761.48 secs\n\n# dtrain[33] <- dtrain[33] == \"s\"\n# label <- as.numeric(dtrain[[33]])\ndata <- as.matrix(dtrain[2:31])\nweight <- as.numeric(dtrain[[32]]) * testsize / length(label)\n\nsumwpos <- sum(weight * (label == 1.0))\nsumwneg <- sum(weight * (label == 0.0))\nprint(paste(\"weight statistics: wpos=\", sumwpos, \"wneg=\", sumwneg, \"ratio=\", sumwneg / sumwpos))\n\nxgboost.time <- list()\nthreads <- c(1, 2, 4, 8, 16)\nfor (i in seq_along(threads)){\n  thread <- threads[i]\n  xgboost.time[[i]] <- system.time({\n    xgmat <- xgb.DMatrix(data, label = label, weight = weight, missing = -999.0)\n    param <- list(\"objective\" = \"binary:logitraw\",\n                  \"scale_pos_weight\" = sumwneg / sumwpos,\n                  \"bst:eta\" = 0.1,\n                  \"bst:max_depth\" = 6,\n                  \"eval_metric\" = \"auc\",\n                  \"eval_metric\" = \"ams@0.15\",\n                  \"nthread\" = thread)\n    watchlist <- list(\"train\" = xgmat)\n    nrounds <- 120\n    print(\"loading data end, start to boost trees\")\n    bst <- xgb.train(param, xgmat, nrounds, watchlist)\n    # save out model\n    xgb.save(bst, \"higgs.model\")\n    print('finish training')\n  })\n}\n\nxgboost.time\n# [[1]]\n# user  system elapsed\n# 99.015   0.051  98.982\n#\n# [[2]]\n# user  system elapsed\n# 100.268   0.317  55.473\n#\n# [[3]]\n# user  system elapsed\n# 111.682   0.777  35.963\n#\n# [[4]]\n# user  system elapsed\n# 149.396   1.851  32.661\n#\n# [[5]]\n# user  system elapsed\n# 157.390   5.988  40.949\n"
  },
  {
    "path": "demo/kaggle-higgs/speedtest.py",
    "content": "#!/usr/bin/python\n# this is the example script to use xgboost to train\nimport time\n\nimport numpy as np\nfrom sklearn.ensemble import GradientBoostingClassifier\n\nimport xgboost as xgb\n\ntest_size = 550000\n\n# path to where the data lies\ndpath = 'data'\n\n# load in training data, directly use numpy\ndtrain = np.loadtxt( dpath+'/training.csv', delimiter=',', skiprows=1, converters={32: lambda x:int(x=='s') } )\nprint ('finish loading from csv ')\n\nlabel  = dtrain[:,32]\ndata   = dtrain[:,1:31]\n# rescale weight to make it same as test set\nweight = dtrain[:,31] * float(test_size) / len(label)\n\nsum_wpos = sum( weight[i] for i in range(len(label)) if label[i] == 1.0  )\nsum_wneg = sum( weight[i] for i in range(len(label)) if label[i] == 0.0  )\n\n# print weight statistics\nprint ('weight statistics: wpos=%g, wneg=%g, ratio=%g' % ( sum_wpos, sum_wneg, sum_wneg/sum_wpos ))\n\n# construct xgboost.DMatrix from numpy array, treat -999.0 as missing value\nxgmat = xgb.DMatrix( data, label=label, missing = -999.0, weight=weight )\n\n# setup parameters for xgboost\nparam = {}\n# use logistic regression loss\nparam['objective'] = 'binary:logitraw'\n# scale weight of positive examples\nparam['scale_pos_weight'] = sum_wneg/sum_wpos\nparam['bst:eta'] = 0.1\nparam['bst:max_depth'] = 6\nparam['eval_metric'] = 'auc'\nparam['nthread'] = 4\n\nplst = param.items()+[('eval_metric', 'ams@0.15')]\n\nwatchlist = [ (xgmat,'train') ]\n# boost 10 trees\nnum_round = 10\nprint ('loading data end, start to boost trees')\nprint (\"training GBM from sklearn\")\ntmp = time.time()\ngbm = GradientBoostingClassifier(n_estimators=num_round, max_depth=6, verbose=2)\ngbm.fit(data, label)\nprint (\"sklearn.GBM costs: %s seconds\" % str(time.time() - tmp))\n#raw_input()\nprint (\"training xgboost\")\nthreads = [1, 2, 4, 16]\nfor i in threads:\n    param['nthread'] = i\n    tmp = time.time()\n    plst = param.items()+[('eval_metric', 'ams@0.15')]\n    bst = xgb.train( plst, xgmat, num_round, watchlist );\n    print (\"XGBoost with %d thread costs: %s seconds\" % (i, str(time.time() - tmp)))\n\nprint ('finish training')\n"
  },
  {
    "path": "demo/kaggle-otto/README.MD",
    "content": "Benchmark for Otto Group Competition\n=========\n\nThis is a folder containing the benchmark for the [Otto Group Competition on Kaggle](http://www.kaggle.com/c/otto-group-product-classification-challenge).\n\n## Getting started\n\n1. Put `train.csv` and `test.csv` under the `data` folder\n2. Run the script\n3. Submit the `submission.csv`\n\nThe parameter `nthread` controls the number of cores to run on, please set it to suit your machine.\n\n## R-package\n\nTo install the R-package of xgboost, please run\n\n```r\ninstall.packages(\"xgboost\", repos = \"https://cran.r-project.org\")\n```\n\nWindows users may need to install [RTools](http://cran.r-project.org/bin/windows/Rtools/) first.\n"
  },
  {
    "path": "demo/kaggle-otto/otto_train_pred.R",
    "content": "require(xgboost)\nrequire(methods)\n\ntrain <- read.csv('data/train.csv', header = TRUE, stringsAsFactors = FALSE)\ntest <- read.csv('data/test.csv', header = TRUE, stringsAsFactors = FALSE)\ntrain <- train[, -1]\ntest <- test[, -1]\n\ny <- train[, ncol(train)]\ny <- gsub('Class_', '', y, fixed = TRUE)\ny <- as.integer(y) - 1  # xgboost take features in [0,numOfClass)\n\nx <- rbind(train[, -ncol(train)], test)\nx <- as.matrix(x)\nx <- matrix(as.numeric(x), nrow(x), ncol(x))\ntrind <- seq_along(y)\nteind <- (nrow(train) + 1):nrow(x)\n\n# Set necessary parameter\nparam <- list(\"objective\" = \"multi:softprob\",\n              \"eval_metric\" = \"mlogloss\",\n              \"num_class\" = 9,\n              \"nthread\" = 8)\n\n# Run Cross Validation\ncv.nrounds <- 50\nbst.cv <- xgb.cv(\n    param = param\n    , data = x[trind, ]\n    , label = y\n    , nfold = 3\n    , nrounds = cv.nrounds\n)\n\n# Train the model\nnrounds <- 50\nbst <- xgboost(param = param, data = x[trind, ], label = y, nrounds = nrounds)\n\n# Make prediction\npred <- predict(bst, x[teind, ])\npred <- matrix(pred, 9, length(pred) / 9)\npred <- t(pred)\n\n# Output submission\npred <- format(pred, digits = 2, scientific = FALSE) # shrink the size of submission\npred <- data.frame(seq_len(nrow(pred)), pred)\nnames(pred) <- c('id', paste0('Class_', 1:9))\nwrite.csv(pred, file = 'submission.csv', quote = FALSE, row.names = FALSE)\n"
  },
  {
    "path": "demo/kaggle-otto/understandingXGBoostModel.Rmd",
    "content": "---\ntitle: \"Understanding XGBoost Model on Otto Dataset\"\nauthor: \"Michaël Benesty\"\noutput:\n  rmarkdown::html_vignette:\n    css: ../../R-package/vignettes/vignette.css\n    number_sections: yes\n    toc: yes\n---\n\nIntroduction\n============\n\n**XGBoost** is an implementation of the famous gradient boosting algorithm. This model is often described as a *blackbox*, meaning it works well but it is not trivial to understand how. Indeed, the model is made of hundreds (thousands?) of decision trees. You may wonder how possible a human would be able to have a general view of the model?\n\nWhile XGBoost is known for its fast speed and accurate predictive power, it also comes with various functions to help you understand the model.\nThe purpose of this RMarkdown document is to demonstrate how easily we can leverage the functions already implemented in **XGBoost R** package. Of course, everything showed below can be applied to the dataset you may have to manipulate at work or wherever!\n\nFirst we will prepare the **Otto** dataset and train a model, then we will generate two visualisations to get a clue of what is important to the model, finally, we will see how we can leverage these information.\n\nPreparation of the data\n=======================\n\nThis part is based on the **R** tutorial example by [Tong He](https://github.com/dmlc/xgboost/blob/master/demo/kaggle-otto/otto_train_pred.R)\n\nFirst, let's load the packages and the dataset.\n\n```{r loading}\nrequire(xgboost)\nrequire(methods)\nrequire(data.table)\nrequire(magrittr)\ntrain <- fread('data/train.csv', header = TRUE, stringsAsFactors = FALSE)\ntest <- fread('data/test.csv', header = TRUE, stringsAsFactors = FALSE)\n```\n> `magrittr` and `data.table` are here to make the code cleaner and much more rapid.\n\nLet's explore the dataset.\n\n```{r explore}\n# Train dataset dimensions\ndim(train)\n\n# Training content\ntrain[1:6, 1:5, with = FALSE]\n\n# Test dataset dimensions\ndim(test)\n\n# Test content\ntest[1:6, 1:5, with = FALSE]\n```\n> We only display the 6 first rows and 5 first columns for convenience\n\nEach *column* represents a feature measured by an `integer`. Each *row* is an **Otto** product.\n\nObviously the first column (`ID`) doesn't contain any useful information.\n\nTo let the algorithm focus on real stuff, we will delete it.\n\n```{r clean, results='hide'}\n# Delete ID column in training dataset\ntrain[, id := NULL]\n\n# Delete ID column in testing dataset\ntest[, id := NULL]\n```\n\nAccording to its description, the **Otto** challenge is a multi class classification challenge. We need to extract the labels (here the name of the different classes) from the dataset. We only have two files (test and training), it seems logical that the training file contains the class we are looking for. Usually the labels is in the first or the last column. We already know what is in the first column, let's check the content of the last one.\n\n```{r searchLabel}\n# Check the content of the last column\ntrain[1:6, ncol(train), with  = FALSE]\n# Save the name of the last column\nnameLastCol <- names(train)[ncol(train)]\n```\n\nThe classes are provided as character string in the `r ncol(train)`th column called `r nameLastCol`. As you may know, **XGBoost** doesn't support anything else than numbers. So we will convert classes to `integer`. Moreover, according to the documentation, it should start at `0`.\n\nFor that purpose, we will:\n\n* extract the target column\n* remove `Class_` from each class name\n* convert to `integer`\n* remove `1` to the new value\n\n```{r classToIntegers}\n# Convert from classes to numbers\ny <- train[, nameLastCol, with = FALSE][[1]] %>%\n    gsub('Class_', '', ., fixed = TRUE) %>%\n    as.integer %>%\n    subtract(., 1)\n\n# Display the first 5 levels\ny[1:5]\n```\n\nWe remove label column from training dataset, otherwise **XGBoost** would use it to guess the labels!\n\n```{r deleteCols, results='hide'}\ntrain[, nameLastCol := NULL, with = FALSE]\n```\n\n`data.table` is an awesome implementation of data.frame, unfortunately it is not a format supported natively by **XGBoost**. We need to convert both datasets (training and test) in `numeric` Matrix format.\n\n```{r convertToNumericMatrix}\ntrainMatrix <- train[, lapply(.SD, as.numeric)] %>% as.matrix\ntestMatrix <- test[, lapply(.SD, as.numeric)] %>% as.matrix\n```\n\nModel training\n==============\n\nBefore the learning we will use the cross validation to evaluate the our error rate.\n\nBasically **XGBoost** will divide the training data in `nfold` parts, then **XGBoost** will retain the first part to use it as the test data and perform a training. Then it will reintegrate the first part and retain the second part, do a training and so on...\n\nYou can look at the function documentation for more information.\n\n```{r crossValidation}\nnumberOfClasses <- max(y) + 1\n\nparam <- list(\"objective\" = \"multi:softprob\",\n              \"eval_metric\" = \"mlogloss\",\n              \"num_class\" = numberOfClasses)\n\ncv.nrounds <- 5\ncv.nfold <- 3\n\nbst.cv <- xgb.cv(param = param, data = trainMatrix, label = y,\n                nfold = cv.nfold, nrounds = cv.nrounds)\n```\n> As we can see the error rate is low on the test dataset (for a 5mn trained model).\n\nFinally, we are ready to train the real model!!!\n\n```{r modelTraining}\nnrounds <- 50\nbst <- xgboost(param = param, data = trainMatrix, label = y, nrounds = nrounds)\n```\n\nModel understanding\n===================\n\nFeature importance\n------------------\n\nSo far, we have built a model made of **`r nrounds`** trees.\n\nTo build a tree, the dataset is divided recursively several times. At the end of the process, you get groups of observations (here, these observations are properties regarding **Otto** products).\n\nEach division operation is called a *split*.\n\nEach group at each division level is called a branch and the deepest level is called a *leaf*.\n\nIn the final model, these *leafs* are supposed to be as pure as possible for each tree, meaning in our case that each *leaf* should be made of one class of **Otto** product only (of course it is not true, but that's what we try to achieve in a minimum of splits).\n\n**Not all *splits* are equally important**. Basically the first *split* of a tree will have more impact on the purity that, for instance, the deepest *split*. Intuitively, we understand that the first *split* makes most of the work, and the following *splits* focus on smaller parts of the dataset which have been misclassified by the first *tree*.\n\nIn the same way, in Boosting we try to optimize the misclassification at each round (it is called the *loss*). So the first *tree* will do the big work and the following trees will focus on the remaining, on the parts not correctly learned by the previous *trees*.\n\nThe improvement brought by each *split* can be measured, it is the *gain*.\n\nEach *split* is done on one feature only at one value.\n\nLet's see what the model looks like.\n\n```{r modelDump}\nmodel <- xgb.dump(bst, with.stats = TRUE)\nmodel[1:10]\n```\n> For convenience, we are displaying the first 10 lines of the model only.\n\nClearly, it is not easy to understand what it means.\n\nBasically each line represents a *branch*, there is the *tree* ID, the feature ID, the point where it *splits*, and information regarding the next *branches* (left, right, when the row for this feature is N/A).\n\nHopefully, **XGBoost** offers a better representation: **feature importance**.\n\nFeature importance is about averaging the *gain* of each feature for all *split* and all *trees*.\n\nThen we can use the function `xgb.plot.importance`.\n\n```{r importanceFeature, fig.align='center', fig.height=5, fig.width=10}\n# Get the feature real names\nnames <- dimnames(trainMatrix)[[2]]\n\n# Compute feature importance matrix\nimportance_matrix <- xgb.importance(names, model = bst)\n\n# Nice graph\nxgb.plot.importance(importance_matrix[1:10, ])\n```\n\n> To make it understandable we first extract the column names from the `Matrix`.\n\nInterpretation\n--------------\n\nIn the feature importance above, we can see the first 10 most important features.\n\nThis function gives a color to each bar. These colors represent groups of features. Basically a K-means clustering is  applied to group each feature by importance.\n\nFrom here you can take several actions. For instance you can remove the less important feature (feature selection process), or go deeper in the interaction between the most important features and labels.\n\nOr you can just reason about why these features are so important (in **Otto** challenge we can't go this way because there is not enough information).\n\nTree graph\n----------\n\nFeature importance gives you feature weight information but not interaction between features.\n\n**XGBoost R** package have another useful function for that.\n\nPlease, scroll on the right to see the tree.\n\n```{r treeGraph, dpi=1500, fig.align='left'}\nxgb.plot.tree(feature_names = names, model = bst, n_first_tree = 2)\n```\n\nWe are just displaying the first two trees here.\n\nOn simple models the first two trees may be enough. Here, it might not be the case. We can see from the size of the trees that the interaction between features is complicated.\nBesides, **XGBoost** generate `k` trees at each round for a `k`-classification problem. Therefore the two trees illustrated here are trying to classify data into different classes.\n\nGoing deeper\n============\n\nThere are 4 documents you may also be interested in:\n\n* [xgboostPresentation.Rmd](https://github.com/dmlc/xgboost/blob/master/R-package/vignettes/xgboostPresentation.Rmd): general presentation\n* [discoverYourData.Rmd](https://github.com/dmlc/xgboost/blob/master/R-package/vignettes/discoverYourData.Rmd): explaining feature analysis\n* [Feature Importance Analysis with XGBoost in Tax audit](http://fr.slideshare.net/MichaelBENESTY/feature-importance-analysis-with-xgboost-in-tax-audit): use case\n* [The Elements of Statistical Learning](http://statweb.stanford.edu/~tibs/ElemStatLearn/): very good book to have a good understanding of the model\n"
  },
  {
    "path": "demo/multiclass_classification/README.md",
    "content": "Demonstrating how to use XGBoost accomplish Multi-Class classification task on [UCI Dermatology dataset](https://archive.ics.uci.edu/ml/datasets/Dermatology)\n\nMake sure you make xgboost python module in ../../python\n\n1. Run runexp.sh\n```bash\n./runexp.sh\n```\n\n**R version** please see the `train.R`.\n"
  },
  {
    "path": "demo/multiclass_classification/runexp.sh",
    "content": "#!/bin/bash\nif [ -f dermatology.data ]\nthen\n    echo \"use existing data to run multi class classification\"\nelse\n    echo \"getting data from uci, make sure you are connected to internet\"\n    wget https://archive.ics.uci.edu/ml/machine-learning-databases/dermatology/dermatology.data\nfi\npython train.py\n"
  },
  {
    "path": "demo/multiclass_classification/train.R",
    "content": "library(data.table)\nlibrary(xgboost)\n\nif (!file.exists(\"./dermatology.data\")) {\n  download.file(\n    \"https://archive.ics.uci.edu/ml/machine-learning-databases/dermatology/dermatology.data\",\n    \"dermatology.data\",\n    method = \"curl\"\n  )\n}\n\ndf <- fread(\"dermatology.data\", sep = \",\", header = FALSE)\n\ndf[, `:=`(V34 = as.integer(ifelse(V34 == \"?\", 0L, V34)),\n          V35 = V35 - 1L)]\n\nidx <- sample(nrow(df), size = round(0.7 * nrow(df)), replace = FALSE)\n\ntrain <- df[idx, ]\ntest <- df[-idx, ]\n\ntrain_x <- train[, 1:34]\ntrain_y <- train[, V35]\n\ntest_x <- test[, 1:34]\ntest_y <- test[, V35]\n\nxg_train <- xgb.DMatrix(data = as.matrix(train_x), label = train_y)\nxg_test <- xgb.DMatrix(as.matrix(test_x), label = test_y)\n\nparams <- list(\n  objective = 'multi:softmax',\n  num_class = 6,\n  max_depth = 6,\n  nthread = 4,\n  eta = 0.1\n)\n\nwatchlist <- list(train = xg_train, test = xg_test)\n\nbst <- xgb.train(\n  params = params,\n  data = xg_train,\n  watchlist = watchlist,\n  nrounds = 5\n)\n\npred <- predict(bst, xg_test)\nerror_rate <- sum(pred != test_y) / length(test_y)\nprint(paste(\"Test error using softmax =\", error_rate))\n\n# do the same thing again, but output probabilities\nparams$objective <- 'multi:softprob'\nbst <- xgb.train(params, xg_train, nrounds = 5, watchlist)\n\npred_prob <- predict(bst, xg_test)\n\npred_mat <- matrix(pred_prob, ncol = 6, byrow = TRUE)\n# validation\n# rowSums(pred_mat)\n\npred_label <- apply(pred_mat, 1, which.max) - 1L\nerror_rate <- sum(pred_label != test_y) / length(test_y)\nprint(paste(\"Test error using softprob =\", error_rate))\n"
  },
  {
    "path": "demo/multiclass_classification/train.py",
    "content": "#!/usr/bin/python\n\nfrom __future__ import division\n\nimport numpy as np\n\nimport xgboost as xgb\n\n# label need to be 0 to num_class -1\ndata = np.loadtxt('./dermatology.data', delimiter=',',\n        converters={33: lambda x:int(x == '?'), 34: lambda x:int(x) - 1})\nsz = data.shape\n\ntrain = data[:int(sz[0] * 0.7), :]\ntest = data[int(sz[0] * 0.7):, :]\n\ntrain_X = train[:, :33]\ntrain_Y = train[:, 34]\n\ntest_X = test[:, :33]\ntest_Y = test[:, 34]\n\nxg_train = xgb.DMatrix(train_X, label=train_Y)\nxg_test = xgb.DMatrix(test_X, label=test_Y)\n# setup parameters for xgboost\nparam = {}\n# use softmax multi-class classification\nparam['objective'] = 'multi:softmax'\n# scale weight of positive examples\nparam['eta'] = 0.1\nparam['max_depth'] = 6\nparam['nthread'] = 4\nparam['num_class'] = 6\n\nwatchlist = [(xg_train, 'train'), (xg_test, 'test')]\nnum_round = 5\nbst = xgb.train(param, xg_train, num_round, watchlist)\n# get prediction\npred = bst.predict(xg_test)\nerror_rate = np.sum(pred != test_Y) / test_Y.shape[0]\nprint('Test error using softmax = {}'.format(error_rate))\n\n# do the same thing again, but output probabilities\nparam['objective'] = 'multi:softprob'\nbst = xgb.train(param, xg_train, num_round, watchlist)\n# Note: this convention has been changed since xgboost-unity\n# get prediction, this is in 1D array, need reshape to (ndata, nclass)\npred_prob = bst.predict(xg_test).reshape(test_Y.shape[0], 6)\npred_label = np.argmax(pred_prob, axis=1)\nerror_rate = np.sum(pred_label != test_Y) / test_Y.shape[0]\nprint('Test error using softprob = {}'.format(error_rate))\n"
  },
  {
    "path": "demo/nvflare/.gitignore",
    "content": "!config\n"
  },
  {
    "path": "demo/nvflare/README.md",
    "content": "# Experimental Support of Federated XGBoost using NVFlare\n\nThis directory contains a demo of Federated Learning using\n[NVFlare](https://nvidia.github.io/NVFlare/).\n\n## Horizontal Federated XGBoost\n\nFor horizontal federated learning using XGBoost (data is split row-wise), check out the `horizontal` directory\n(see the [README](horizontal/README.md)).\n\n## Vertical Federated XGBoost\n\nFor vertical federated learning using XGBoost (data is split column-wise), check out the `vertical` directory\n(see the [README](vertical/README.md)).\n"
  },
  {
    "path": "demo/nvflare/config/config_fed_client.json",
    "content": "{\n  \"format_version\": 2,\n  \"executors\": [\n    {\n      \"tasks\": [\n        \"train\"\n      ],\n      \"executor\": {\n        \"path\": \"trainer.XGBoostTrainer\",\n        \"args\": {\n          \"server_address\": \"localhost:9091\",\n          \"world_size\": 2,\n          \"server_cert_path\": \"server-cert.pem\",\n          \"client_key_path\": \"client-key.pem\",\n          \"client_cert_path\": \"client-cert.pem\",\n          \"use_gpus\": false\n        }\n      }\n    }\n  ],\n  \"task_result_filters\": [],\n  \"task_data_filters\": []\n}\n"
  },
  {
    "path": "demo/nvflare/config/config_fed_server.json",
    "content": "{\n  \"format_version\": 2,\n  \"server\": {\n    \"heart_beat_timeout\": 600\n  },\n  \"task_data_filters\": [],\n  \"task_result_filters\": [],\n  \"workflows\": [\n    {\n      \"id\": \"server_workflow\",\n      \"path\": \"controller.XGBoostController\",\n      \"args\": {\n        \"port\": 9091,\n        \"world_size\": 2,\n        \"server_key_path\": \"server-key.pem\",\n        \"server_cert_path\": \"server-cert.pem\",\n        \"client_cert_path\": \"client-cert.pem\"\n      }\n    }\n  ],\n  \"components\": []\n}\n"
  },
  {
    "path": "demo/nvflare/horizontal/README.md",
    "content": "# Experimental Support of Horizontal Federated XGBoost using NVFlare\n\nThis directory contains a demo of Horizontal Federated Learning using\n[NVFlare](https://nvidia.github.io/NVFlare/).\n\n## Training with CPU only\n\nTo run the demo, first build XGBoost with the federated learning plugin enabled (see the\n[README](../../../plugin/federated/README.md)).\n\nInstall NVFlare:\n```shell\npip install nvflare\n```\n\nPrepare the data:\n```shell\n./prepare_data.sh\n```\n\nStart the NVFlare federated server:\n```shell\n/tmp/nvflare/poc/server/startup/start.sh\n```\n\nIn another terminal, start the first worker:\n```shell\n/tmp/nvflare/poc/site-1/startup/start.sh\n```\n\nAnd the second worker:\n```shell\n/tmp/nvflare/poc/site-2/startup/start.sh\n```\n\nThen start the admin CLI:\n```shell\n/tmp/nvflare/poc/admin/startup/fl_admin.sh\n```\n\nIn the admin CLI, run the following command:\n```shell\nsubmit_job horizontal-xgboost\n```\n\nMake a note of the job id:\n```console\nSubmitted job: 28309e77-a7c5-45e6-b2bc-c2e3655122d8\n```\n\nOn both workers, you should see train and eval losses printed:\n```console\n[10:45:41] [0]\teval-logloss:0.22646\ttrain-logloss:0.23316\n[10:45:41] [1]\teval-logloss:0.13776\ttrain-logloss:0.13654\n[10:45:41] [2]\teval-logloss:0.08036\ttrain-logloss:0.08243\n[10:45:41] [3]\teval-logloss:0.05830\ttrain-logloss:0.05645\n[10:45:41] [4]\teval-logloss:0.03825\ttrain-logloss:0.04148\n[10:45:41] [5]\teval-logloss:0.02660\ttrain-logloss:0.02958\n[10:45:41] [6]\teval-logloss:0.01386\ttrain-logloss:0.01918\n[10:45:41] [7]\teval-logloss:0.01018\ttrain-logloss:0.01331\n[10:45:41] [8]\teval-logloss:0.00847\ttrain-logloss:0.01112\n[10:45:41] [9]\teval-logloss:0.00691\ttrain-logloss:0.00662\n[10:45:41] [10]\teval-logloss:0.00543\ttrain-logloss:0.00503\n[10:45:41] [11]\teval-logloss:0.00445\ttrain-logloss:0.00420\n[10:45:41] [12]\teval-logloss:0.00336\ttrain-logloss:0.00355\n[10:45:41] [13]\teval-logloss:0.00277\ttrain-logloss:0.00280\n[10:45:41] [14]\teval-logloss:0.00252\ttrain-logloss:0.00244\n[10:45:41] [15]\teval-logloss:0.00177\ttrain-logloss:0.00193\n[10:45:41] [16]\teval-logloss:0.00156\ttrain-logloss:0.00161\n[10:45:41] [17]\teval-logloss:0.00135\ttrain-logloss:0.00142\n[10:45:41] [18]\teval-logloss:0.00123\ttrain-logloss:0.00125\n[10:45:41] [19]\teval-logloss:0.00106\ttrain-logloss:0.00107\n```\n\nOnce the training finishes, the model file should be written into\n`/tmp/nvlfare/poc/site-1/${job_id}/test.model.json` and `/tmp/nvflare/poc/site-2/${job_id}/test.model.json`\nrespectively, where `job_id` is the UUID printed out when we ran `submit_job`.\n\nFinally, shutdown everything from the admin CLI, using `admin` as password:\n```shell\nshutdown client\nshutdown server\n```\n\n## Training with GPUs\n\nTo demo with Federated Learning using GPUs, make sure your machine has at least 2 GPUs.\nBuild XGBoost with the federated learning plugin enabled along with CUDA\n(see the [README](../../plugin/federated/README.md)).\n\nModify `../config/config_fed_client.json` and set `use_gpus` to `true`, then repeat the steps\nabove.\n"
  },
  {
    "path": "demo/nvflare/horizontal/custom/controller.py",
    "content": "\"\"\"\nExample of training controller with NVFlare\n===========================================\n\"\"\"\nimport multiprocessing\n\nfrom nvflare.apis.client import Client\nfrom nvflare.apis.fl_context import FLContext\nfrom nvflare.apis.impl.controller import Controller, Task\nfrom nvflare.apis.shareable import Shareable\nfrom nvflare.apis.signal import Signal\nfrom trainer import SupportedTasks\n\nimport xgboost.federated\n\n\nclass XGBoostController(Controller):\n    def __init__(self, port: int, world_size: int, server_key_path: str,\n                 server_cert_path: str, client_cert_path: str):\n        \"\"\"Controller for federated XGBoost.\n\n        Args:\n            port: the port for the gRPC server to listen on.\n            world_size: the number of sites.\n            server_key_path: the path to the server key file.\n            server_cert_path: the path to the server certificate file.\n            client_cert_path: the path to the client certificate file.\n        \"\"\"\n        super().__init__()\n        self._port = port\n        self._world_size = world_size\n        self._server_key_path = server_key_path\n        self._server_cert_path = server_cert_path\n        self._client_cert_path = client_cert_path\n        self._server = None\n\n    def start_controller(self, fl_ctx: FLContext):\n        self._server = multiprocessing.Process(\n            target=xgboost.federated.run_federated_server,\n            args=(self._port, self._world_size, self._server_key_path,\n                  self._server_cert_path, self._client_cert_path))\n        self._server.start()\n\n    def stop_controller(self, fl_ctx: FLContext):\n        if self._server:\n            self._server.terminate()\n\n    def process_result_of_unknown_task(self, client: Client, task_name: str,\n                                       client_task_id: str, result: Shareable,\n                                       fl_ctx: FLContext):\n        self.log_warning(fl_ctx, f\"Unknown task: {task_name} from client {client.name}.\")\n\n    def control_flow(self, abort_signal: Signal, fl_ctx: FLContext):\n        self.log_info(fl_ctx, \"XGBoost training control flow started.\")\n        if abort_signal.triggered:\n            return\n        task = Task(name=SupportedTasks.TRAIN, data=Shareable())\n        self.broadcast_and_wait(\n            task=task,\n            min_responses=self._world_size,\n            fl_ctx=fl_ctx,\n            wait_time_after_min_received=1,\n            abort_signal=abort_signal,\n        )\n        if abort_signal.triggered:\n            return\n\n        self.log_info(fl_ctx, \"XGBoost training control flow finished.\")\n"
  },
  {
    "path": "demo/nvflare/horizontal/custom/trainer.py",
    "content": "import os\n\nfrom nvflare.apis.executor import Executor\nfrom nvflare.apis.fl_constant import FLContextKey, ReturnCode\nfrom nvflare.apis.fl_context import FLContext\nfrom nvflare.apis.shareable import Shareable, make_reply\nfrom nvflare.apis.signal import Signal\n\nimport xgboost as xgb\nfrom xgboost import callback\n\n\nclass SupportedTasks(object):\n    TRAIN = \"train\"\n\n\nclass XGBoostTrainer(Executor):\n    def __init__(self, server_address: str, world_size: int, server_cert_path: str,\n                 client_key_path: str, client_cert_path: str, use_gpus: bool):\n        \"\"\"Trainer for federated XGBoost.\n\n        Args:\n            server_address: address for the gRPC server to connect to.\n            world_size: the number of sites.\n            server_cert_path: the path to the server certificate file.\n            client_key_path: the path to the client key file.\n            client_cert_path: the path to the client certificate file.\n        \"\"\"\n        super().__init__()\n        self._server_address = server_address\n        self._world_size = world_size\n        self._server_cert_path = server_cert_path\n        self._client_key_path = client_key_path\n        self._client_cert_path = client_cert_path\n        self._use_gpus = use_gpus\n\n    def execute(self, task_name: str, shareable: Shareable, fl_ctx: FLContext,\n                abort_signal: Signal) -> Shareable:\n        self.log_info(fl_ctx, f\"Executing {task_name}\")\n        try:\n            if task_name == SupportedTasks.TRAIN:\n                self._do_training(fl_ctx)\n                return make_reply(ReturnCode.OK)\n            else:\n                self.log_error(fl_ctx, f\"{task_name} is not a supported task.\")\n                return make_reply(ReturnCode.TASK_UNKNOWN)\n        except BaseException as e:\n            self.log_exception(fl_ctx,\n                               f\"Task {task_name} failed. Exception: {e.__str__()}\")\n            return make_reply(ReturnCode.EXECUTION_EXCEPTION)\n\n    def _do_training(self, fl_ctx: FLContext):\n        client_name = fl_ctx.get_prop(FLContextKey.CLIENT_NAME)\n        rank = int(client_name.split('-')[1]) - 1\n        communicator_env = {\n            'xgboost_communicator': 'federated',\n            'federated_server_address': self._server_address,\n            'federated_world_size': self._world_size,\n            'federated_rank': rank,\n            'federated_server_cert': self._server_cert_path,\n            'federated_client_key': self._client_key_path,\n            'federated_client_cert': self._client_cert_path\n        }\n        with xgb.collective.CommunicatorContext(**communicator_env):\n            # Load file, file will not be sharded in federated mode.\n            dtrain = xgb.DMatrix('agaricus.txt.train?format=libsvm')\n            dtest = xgb.DMatrix('agaricus.txt.test?format=libsvm')\n\n            # Specify parameters via map, definition are same as c++ version\n            param = {'tree_method': 'hist', 'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}\n            if self._use_gpus:\n                self.log_info(fl_ctx, f'Training with GPU {rank}')\n                param['device'] = f\"cuda:{rank}\"\n\n            # Specify validations set to watch performance\n            watchlist = [(dtest, 'eval'), (dtrain, 'train')]\n            num_round = 20\n\n            # Run training, all the features in training API is available.\n            bst = xgb.train(param, dtrain, num_round, evals=watchlist,\n                            early_stopping_rounds=2, verbose_eval=False,\n                            callbacks=[callback.EvaluationMonitor(rank=rank)])\n\n            # Save the model.\n            workspace = fl_ctx.get_prop(FLContextKey.WORKSPACE_OBJECT)\n            run_number = fl_ctx.get_prop(FLContextKey.CURRENT_RUN)\n            run_dir = workspace.get_run_dir(run_number)\n            bst.save_model(os.path.join(run_dir, \"test.model.json\"))\n            xgb.collective.communicator_print(\"Finished training\\n\")\n"
  },
  {
    "path": "demo/nvflare/horizontal/prepare_data.sh",
    "content": "#!/bin/bash\n\nset -e\n\nrm -fr ./agaricus* ./*.pem /tmp/nvflare\n\nworld_size=2\n\n# Generate server and client certificates.\nopenssl req -x509 -newkey rsa:2048 -days 7 -nodes -keyout server-key.pem -out server-cert.pem -subj \"/C=US/CN=localhost\"\nopenssl req -x509 -newkey rsa:2048 -days 7 -nodes -keyout client-key.pem -out client-cert.pem -subj \"/C=US/CN=localhost\"\n\n# Split train and test files manually to simulate a federated environment.\nsplit -n l/${world_size} --numeric-suffixes=1 -a 1 ../../data/agaricus.txt.train agaricus.txt.train-site-\nsplit -n l/${world_size} --numeric-suffixes=1 -a 1 ../../data/agaricus.txt.test agaricus.txt.test-site-\n\nnvflare poc -n 2 --prepare\nmkdir -p /tmp/nvflare/poc/admin/transfer/horizontal-xgboost\ncp -fr ../config custom /tmp/nvflare/poc/admin/transfer/horizontal-xgboost\ncp server-*.pem client-cert.pem /tmp/nvflare/poc/server/\nfor (( site=1; site<=world_size; site++ )); do\n  cp server-cert.pem client-*.pem /tmp/nvflare/poc/site-\"$site\"/\n  cp agaricus.txt.train-site-\"$site\" /tmp/nvflare/poc/site-\"$site\"/agaricus.txt.train\n  cp agaricus.txt.test-site-\"$site\" /tmp/nvflare/poc/site-\"$site\"/agaricus.txt.test\ndone\n"
  },
  {
    "path": "demo/nvflare/vertical/README.md",
    "content": "# Experimental Support of Vertical Federated XGBoost using NVFlare\n\nThis directory contains a demo of Vertical Federated Learning using\n[NVFlare](https://nvidia.github.io/NVFlare/).\n\n## Training with CPU only\n\nTo run the demo, first build XGBoost with the federated learning plugin enabled (see the\n[README](../../../plugin/federated/README.md)).\n\nInstall NVFlare:\n```shell\npip install nvflare\n```\n\nPrepare the data (note that this step will download the HIGGS dataset, which is 2.6GB compressed, and 7.5GB\nuncompressed, so make sure you have enough disk space and are on a fast internet connection):\n```shell\n./prepare_data.sh\n```\n\nStart the NVFlare federated server:\n```shell\n/tmp/nvflare/poc/server/startup/start.sh\n```\n\nIn another terminal, start the first worker:\n```shell\n/tmp/nvflare/poc/site-1/startup/start.sh\n```\n\nAnd the second worker:\n```shell\n/tmp/nvflare/poc/site-2/startup/start.sh\n```\n\nThen start the admin CLI:\n```shell\n/tmp/nvflare/poc/admin/startup/fl_admin.sh\n```\n\nIn the admin CLI, run the following command:\n```shell\nsubmit_job vertical-xgboost\n```\n\nOnce the training finishes, the model file should be written into\n`/tmp/nvlfare/poc/site-1/run_1/test.model.json` and `/tmp/nvflare/poc/site-2/run_1/test.model.json`\nrespectively.\n\nFinally, shutdown everything from the admin CLI, using `admin` as password:\n```shell\nshutdown client\nshutdown server\n```\n\n## Training with GPUs\n\nTo demo with Vertical Federated Learning using GPUs, make sure your machine has at least 2 GPUs.\nBuild XGBoost with the federated learning plugin enabled along with CUDA\n(see the [README](../../plugin/federated/README.md)).\n\nModify `../config/config_fed_client.json` and set `use_gpus` to `true`, then repeat the steps\nabove.\n"
  },
  {
    "path": "demo/nvflare/vertical/custom/controller.py",
    "content": "\"\"\"\nExample of training controller with NVFlare\n===========================================\n\"\"\"\nimport multiprocessing\n\nfrom nvflare.apis.client import Client\nfrom nvflare.apis.fl_context import FLContext\nfrom nvflare.apis.impl.controller import Controller, Task\nfrom nvflare.apis.shareable import Shareable\nfrom nvflare.apis.signal import Signal\nfrom trainer import SupportedTasks\n\nimport xgboost.federated\n\n\nclass XGBoostController(Controller):\n    def __init__(self, port: int, world_size: int, server_key_path: str,\n                 server_cert_path: str, client_cert_path: str):\n        \"\"\"Controller for federated XGBoost.\n\n        Args:\n            port: the port for the gRPC server to listen on.\n            world_size: the number of sites.\n            server_key_path: the path to the server key file.\n            server_cert_path: the path to the server certificate file.\n            client_cert_path: the path to the client certificate file.\n        \"\"\"\n        super().__init__()\n        self._port = port\n        self._world_size = world_size\n        self._server_key_path = server_key_path\n        self._server_cert_path = server_cert_path\n        self._client_cert_path = client_cert_path\n        self._server = None\n\n    def start_controller(self, fl_ctx: FLContext):\n        self._server = multiprocessing.Process(\n            target=xgboost.federated.run_federated_server,\n            args=(self._port, self._world_size, self._server_key_path,\n                  self._server_cert_path, self._client_cert_path))\n        self._server.start()\n\n    def stop_controller(self, fl_ctx: FLContext):\n        if self._server:\n            self._server.terminate()\n\n    def process_result_of_unknown_task(self, client: Client, task_name: str,\n                                       client_task_id: str, result: Shareable,\n                                       fl_ctx: FLContext):\n        self.log_warning(fl_ctx, f\"Unknown task: {task_name} from client {client.name}.\")\n\n    def control_flow(self, abort_signal: Signal, fl_ctx: FLContext):\n        self.log_info(fl_ctx, \"XGBoost training control flow started.\")\n        if abort_signal.triggered:\n            return\n        task = Task(name=SupportedTasks.TRAIN, data=Shareable())\n        self.broadcast_and_wait(\n            task=task,\n            min_responses=self._world_size,\n            fl_ctx=fl_ctx,\n            wait_time_after_min_received=1,\n            abort_signal=abort_signal,\n        )\n        if abort_signal.triggered:\n            return\n\n        self.log_info(fl_ctx, \"XGBoost training control flow finished.\")\n"
  },
  {
    "path": "demo/nvflare/vertical/custom/trainer.py",
    "content": "import os\n\nfrom nvflare.apis.executor import Executor\nfrom nvflare.apis.fl_constant import FLContextKey, ReturnCode\nfrom nvflare.apis.fl_context import FLContext\nfrom nvflare.apis.shareable import Shareable, make_reply\nfrom nvflare.apis.signal import Signal\n\nimport xgboost as xgb\nfrom xgboost import callback\n\n\nclass SupportedTasks(object):\n    TRAIN = \"train\"\n\n\nclass XGBoostTrainer(Executor):\n    def __init__(self, server_address: str, world_size: int, server_cert_path: str,\n                 client_key_path: str, client_cert_path: str, use_gpus: bool):\n        \"\"\"Trainer for federated XGBoost.\n\n        Args:\n            server_address: address for the gRPC server to connect to.\n            world_size: the number of sites.\n            server_cert_path: the path to the server certificate file.\n            client_key_path: the path to the client key file.\n            client_cert_path: the path to the client certificate file.\n        \"\"\"\n        super().__init__()\n        self._server_address = server_address\n        self._world_size = world_size\n        self._server_cert_path = server_cert_path\n        self._client_key_path = client_key_path\n        self._client_cert_path = client_cert_path\n        self._use_gpus = use_gpus\n\n    def execute(self, task_name: str, shareable: Shareable, fl_ctx: FLContext,\n                abort_signal: Signal) -> Shareable:\n        self.log_info(fl_ctx, f\"Executing {task_name}\")\n        try:\n            if task_name == SupportedTasks.TRAIN:\n                self._do_training(fl_ctx)\n                return make_reply(ReturnCode.OK)\n            else:\n                self.log_error(fl_ctx, f\"{task_name} is not a supported task.\")\n                return make_reply(ReturnCode.TASK_UNKNOWN)\n        except BaseException as e:\n            self.log_exception(fl_ctx,\n                               f\"Task {task_name} failed. Exception: {e.__str__()}\")\n            return make_reply(ReturnCode.EXECUTION_EXCEPTION)\n\n    def _do_training(self, fl_ctx: FLContext):\n        client_name = fl_ctx.get_prop(FLContextKey.CLIENT_NAME)\n        rank = int(client_name.split('-')[1]) - 1\n        communicator_env = {\n            'xgboost_communicator': 'federated',\n            'federated_server_address': self._server_address,\n            'federated_world_size': self._world_size,\n            'federated_rank': rank,\n            'federated_server_cert': self._server_cert_path,\n            'federated_client_key': self._client_key_path,\n            'federated_client_cert': self._client_cert_path\n        }\n        with xgb.collective.CommunicatorContext(**communicator_env):\n            # Load file, file will not be sharded in federated mode.\n            if rank == 0:\n                label = '&label_column=0'\n            else:\n                label = ''\n            dtrain = xgb.DMatrix(f'higgs.train.csv?format=csv{label}', data_split_mode=1)\n            dtest = xgb.DMatrix(f'higgs.test.csv?format=csv{label}', data_split_mode=1)\n\n            # specify parameters via map\n            param = {\n                'validate_parameters': True,\n                'eta': 0.1,\n                'gamma': 1.0,\n                'max_depth': 8,\n                'min_child_weight': 100,\n                'tree_method': 'hist',\n                'grow_policy': 'depthwise',\n                'objective': 'binary:logistic',\n                'eval_metric': 'auc',\n            }\n            if self._use_gpus:\n                self.log_info(fl_ctx, f'Training with GPU {rank}')\n                param['device'] = f\"cuda:{rank}\"\n\n            # specify validations set to watch performance\n            watchlist = [(dtest, \"eval\"), (dtrain, \"train\")]\n            # number of boosting rounds\n            num_round = 10\n\n            bst = xgb.train(param, dtrain, num_round, evals=watchlist, early_stopping_rounds=2)\n\n            # Save the model.\n            workspace = fl_ctx.get_prop(FLContextKey.WORKSPACE_OBJECT)\n            run_number = fl_ctx.get_prop(FLContextKey.CURRENT_RUN)\n            run_dir = workspace.get_run_dir(run_number)\n            bst.save_model(os.path.join(run_dir, \"higgs.model.federated.vertical.json\"))\n            xgb.collective.communicator_print(\"Finished training\\n\")\n"
  },
  {
    "path": "demo/nvflare/vertical/prepare_data.sh",
    "content": "#!/bin/bash\n\nset -e\n\nrm -fr ./*.pem /tmp/nvflare/poc\n\nworld_size=2\n\n# Generate server and client certificates.\nopenssl req -x509 -newkey rsa:2048 -days 7 -nodes -keyout server-key.pem -out server-cert.pem -subj \"/C=US/CN=localhost\"\nopenssl req -x509 -newkey rsa:2048 -days 7 -nodes -keyout client-key.pem -out client-cert.pem -subj \"/C=US/CN=localhost\"\n\n# Download HIGGS dataset.\nif [ -f \"HIGGS.csv\" ]; then\n  echo \"HIGGS.csv exists, skipping download.\"\nelse\n  echo \"Downloading HIGGS dataset.\"\n  wget https://archive.ics.uci.edu/ml/machine-learning-databases/00280/HIGGS.csv.gz\n  gunzip HIGGS.csv.gz\nfi\n\n# Split into train/test.\nif [[ -f higgs.train.csv && -f higgs.test.csv ]]; then\n  echo \"higgs.train.csv and higgs.test.csv exist, skipping split.\"\nelse\n  echo \"Splitting HIGGS dataset into train/test.\"\n  head -n 10450000 HIGGS.csv > higgs.train.csv\n  tail -n 550000 HIGGS.csv > higgs.test.csv\nfi\n\n# Split train and test files by column to simulate a federated environment.\nsite_files=(higgs.{train,test}.csv-site-*)\nif [ ${#site_files[@]} -eq $((world_size*2)) ]; then\n  echo \"Site files exist, skipping split.\"\nelse\n  echo \"Splitting train/test into site files.\"\n  total_cols=28  # plus label\n  cols=$((total_cols/world_size))\n  echo \"Columns per site: $cols\"\n  for (( site=1; site<=world_size; site++ )); do\n    if (( site == 1 )); then\n      start=$((cols*(site-1)+1))\n    else\n      start=$((cols*(site-1)+2))\n    fi\n    if (( site == world_size )); then\n      end=$((total_cols+1))\n    else\n      end=$((cols*site+1))\n    fi\n    echo \"Site $site, columns $start-$end\"\n    cut -d, -f${start}-${end} higgs.train.csv > higgs.train.csv-site-\"${site}\"\n    cut -d, -f${start}-${end} higgs.test.csv > higgs.test.csv-site-\"${site}\"\n  done\nfi\n\nnvflare poc -n 2 --prepare\nmkdir -p /tmp/nvflare/poc/admin/transfer/vertical-xgboost\ncp -fr ../config custom /tmp/nvflare/poc/admin/transfer/vertical-xgboost\ncp server-*.pem client-cert.pem /tmp/nvflare/poc/server/\nfor (( site=1; site<=world_size; site++ )); do\n  cp server-cert.pem client-*.pem /tmp/nvflare/poc/site-\"${site}\"/\n  ln -s \"${PWD}\"/higgs.train.csv-site-\"${site}\" /tmp/nvflare/poc/site-\"${site}\"/higgs.train.csv\n  ln -s \"${PWD}\"/higgs.test.csv-site-\"${site}\" /tmp/nvflare/poc/site-\"${site}\"/higgs.test.csv\ndone\n"
  },
  {
    "path": "demo/rmm_plugin/README.rst",
    "content": "Using XGBoost with RAPIDS Memory Manager (RMM) plugin\n=====================================================\n\n`RAPIDS Memory Manager (RMM) <https://github.com/rapidsai/rmm>`__ library provides a\ncollection of efficient memory allocators for NVIDIA GPUs. It is now possible to use\nXGBoost with memory allocators provided by RMM, by enabling the RMM integration plugin.\n\nThe demos in this directory highlights one RMM allocator in particular: **the pool\nsub-allocator**.  This allocator addresses the slow speed of ``cudaMalloc()`` by\nallocating a large chunk of memory upfront. Subsequent allocations will draw from the pool\nof already allocated memory and thus avoid the overhead of calling ``cudaMalloc()``\ndirectly. See `this GTC talk slides\n<https://on-demand.gputechconf.com/gtc/2015/presentation/S5530-Stephen-Jones.pdf>`_ for\nmore details.\n\nBefore running the demos, ensure that XGBoost is compiled with the RMM plugin enabled. To do this,\nrun CMake with option ``-DPLUGIN_RMM=ON`` (``-DUSE_CUDA=ON`` also required):\n\n.. code-block:: sh\n\n  cmake -B build -S . -DUSE_CUDA=ON -DUSE_NCCL=ON -DPLUGIN_RMM=ON\n  cmake --build build -j$(nproc)\n\nCMake will attempt to locate the RMM library in your build environment. You may choose to build\nRMM from the source, or install it using the Conda package manager. If CMake cannot find RMM, you\nshould specify the location of RMM with the CMake prefix:\n\n.. code-block:: sh\n\n  # If using Conda:\n  cmake -B build -S . -DUSE_CUDA=ON -DUSE_NCCL=ON -DPLUGIN_RMM=ON -DCMAKE_PREFIX_PATH=$CONDA_PREFIX\n  # If using RMM installed with a custom location\n  cmake -B build -S . -DUSE_CUDA=ON -DUSE_NCCL=ON -DPLUGIN_RMM=ON -DCMAKE_PREFIX_PATH=/path/to/rmm\n\n********************************\nInforming XGBoost about RMM pool\n********************************\n\nWhen XGBoost is compiled with RMM, most of the large size allocation will go through RMM\nallocators, but some small allocations in performance critical areas are using a different\ncaching allocator so that we can have better control over memory allocation behavior.\nUsers can override this behavior and force the use of rmm for all allocations by setting\nthe global configuration ``use_rmm``:\n\n.. code-block:: python\n\n  with xgb.config_context(use_rmm=True):\n    clf = xgb.XGBClassifier(tree_method=\"hist\", device=\"cuda\")\n\nDepending on the choice of memory pool size and the type of the allocator, this can add\nmore consistency to memory usage but with slightly degraded performance impact.\n\n*******************************\nNo Device Ordinal for Multi-GPU\n*******************************\n\nSince with RMM the memory pool is pre-allocated on a specific device, changing the CUDA\ndevice ordinal in XGBoost can result in memory error ``cudaErrorIllegalAddress``. Use the\n``CUDA_VISIBLE_DEVICES`` environment variable instead of the ``device=\"cuda:1\"`` parameter\nfor selecting device. For distributed training, the distributed computing frameworks like\n``dask-cuda`` are responsible for device management. For Scala-Spark, see\n:doc:`/jvm/xgboost4j_spark_gpu_tutorial` for more info.\n\n************************\nMemory Over-Subscription\n************************\n\n.. warning::\n\n   This feature is still experimental and is under active development.\n\nThe newer NVIDIA platforms like `Grace-Hopper\n<https://www.nvidia.com/en-us/data-center/grace-hopper-superchip/>`__ use `NVLink-C2C\n<https://www.nvidia.com/en-us/data-center/nvlink-c2c/>`__, which allows the CPU and GPU to\nhave a coherent memory model. Users can use the `SamHeadroomMemoryResource` in the latest\nRMM to utilize system memory for storing data. This can help XGBoost utilize memory from\nthe host for GPU computation, but it may reduce performance due to slower CPU memory speed\nand page migration overhead."
  },
  {
    "path": "demo/rmm_plugin/rmm_mgpu_with_dask.py",
    "content": "\"\"\"\nUsing rmm with Dask\n===================\n\"\"\"\n\nimport dask\nfrom dask.distributed import Client\nfrom dask_cuda import LocalCUDACluster\nfrom sklearn.datasets import make_classification\n\nimport xgboost as xgb\n\n\ndef main(client):\n    # Optionally force XGBoost to use RMM for all GPU memory allocation, see ./README.md\n    # xgb.set_config(use_rmm=True)\n\n    X, y = make_classification(n_samples=10000, n_informative=5, n_classes=3)\n    # In pratice one should prefer loading the data with dask collections instead of\n    # using `from_array`.\n    X = dask.array.from_array(X)\n    y = dask.array.from_array(y)\n    dtrain = xgb.dask.DaskDMatrix(client, X, label=y)\n\n    params = {\n        \"max_depth\": 8,\n        \"eta\": 0.01,\n        \"objective\": \"multi:softprob\",\n        \"num_class\": 3,\n        \"tree_method\": \"hist\",\n        \"eval_metric\": \"merror\",\n        \"device\": \"cuda\",\n    }\n    output = xgb.dask.train(\n        client, params, dtrain, num_boost_round=100, evals=[(dtrain, \"train\")]\n    )\n    bst = output[\"booster\"]\n    history = output[\"history\"]\n    for i, e in enumerate(history[\"train\"][\"merror\"]):\n        print(f\"[{i}] train-merror: {e}\")\n\n\nif __name__ == \"__main__\":\n    # To use RMM pool allocator with a GPU Dask cluster, just add rmm_pool_size option\n    # to LocalCUDACluster constructor.\n    with LocalCUDACluster(rmm_pool_size=\"2GB\") as cluster:\n        with Client(cluster) as client:\n            main(client)\n"
  },
  {
    "path": "demo/rmm_plugin/rmm_singlegpu.py",
    "content": "\"\"\"\nUsing rmm on a single node device\n=================================\n\"\"\"\n\nimport rmm\nfrom sklearn.datasets import make_classification\n\nimport xgboost as xgb\n\n# Initialize RMM pool allocator\nrmm.reinitialize(pool_allocator=True)\n# Optionally force XGBoost to use RMM for all GPU memory allocation, see ./README.md\n# xgb.set_config(use_rmm=True)\n\nX, y = make_classification(n_samples=10000, n_informative=5, n_classes=3)\ndtrain = xgb.DMatrix(X, label=y)\n\nparams = {\n    \"max_depth\": 8,\n    \"eta\": 0.01,\n    \"objective\": \"multi:softprob\",\n    \"num_class\": 3,\n    \"tree_method\": \"hist\",\n    \"device\": \"cuda\",\n}\n# XGBoost will automatically use the RMM pool allocator\nbst = xgb.train(params, dtrain, num_boost_round=100, evals=[(dtrain, \"train\")])\n"
  },
  {
    "path": "dev/prepare_jvm_release.py",
    "content": "\"\"\"\nHelper script to prepare for releasing XGBoost JVM packages to\nMaven Central.\n\n## Prerequisite\n\n1. You must have the right to upload artifacts to the Maven Central repo.\n   If you do not, contact Hyunsu Cho (chohyu01@cs.washington.edu) so that\n   he can contact Sonatype on your behalf in order to add you as a\n   \"producer\" user for the ml.dmlc namespace. See\n   https://central.sonatype.org/pages/support/#status to learn about\n   the process for adding or removing users who can publish to the project.\n\n2. Follow instructions in\n   https://central.sonatype.org/publish/publish-portal-maven/#credentials\n   to set up the authentication token in your machine.\n\n3. Set up GPG for signing artifacts:\n   https://central.sonatype.org/publish/requirements/gpg/\n\n## Making the release\nRun this script 4 times:\n\npython3 dev/prepare_jvm_release.py --scala-version 2.12 --variant cpu\npython3 dev/prepare_jvm_release.py --scala-version 2.12 --variant gpu\npython3 dev/prepare_jvm_release.py --scala-version 2.13 --variant cpu\npython3 dev/prepare_jvm_release.py --scala-version 2.13 --variant gpu\n\n\"\"\"\n\nimport argparse\nimport errno\nimport glob\nimport os\nimport re\nimport shutil\nimport subprocess\nimport sys\nimport tempfile\nimport zipfile\nfrom contextlib import contextmanager\nfrom urllib.request import urlretrieve\n\n\ndef normpath(path):\n    \"\"\"Normalize UNIX path to a native path.\"\"\"\n    normalized = os.path.join(*path.split(\"/\"))\n    if os.path.isabs(path):\n        return os.path.abspath(\"/\") + normalized\n    else:\n        return normalized\n\n\ndef cp(source, target):\n    source = normpath(source)\n    target = normpath(target)\n    print(\"cp {0} {1}\".format(source, target))\n    shutil.copy(source, target)\n\n\ndef maybe_makedirs(path):\n    path = normpath(path)\n    print(\"mkdir -p \" + path)\n    try:\n        os.makedirs(path)\n    except OSError as e:\n        if e.errno != errno.EEXIST:\n            raise\n\n\n@contextmanager\ndef cd(path):\n    path = normpath(path)\n    cwd = os.getcwd()\n    os.chdir(path)\n    print(\"cd \" + path)\n    try:\n        yield path\n    finally:\n        os.chdir(cwd)\n\n\ndef run(command, **kwargs):\n    print(command)\n    subprocess.run(command, shell=True, check=True, **kwargs)\n\n\ndef get_current_commit_hash():\n    out = subprocess.check_output([\"git\", \"rev-parse\", \"HEAD\"])\n    return out.decode().split(\"\\n\")[0]\n\n\ndef get_current_git_branch():\n    out = subprocess.check_output([\"git\", \"log\", \"-n\", \"1\", \"--pretty=%d\", \"HEAD\"])\n    m = re.search(r\"release_[0-9\\.]+\", out.decode())\n    if not m:\n        raise ValueError(\"Expected branch name of form release_xxx\")\n    return m.group(0)\n\n\ndef retrieve(url, filename=None):\n    print(f\"{url} -> {filename}\")\n    return urlretrieve(url, filename)\n\n\ndef main():\n    parser = argparse.ArgumentParser(\n        description=__doc__, formatter_class=argparse.RawTextHelpFormatter\n    )\n    parser.add_argument(\n        \"--release-version\",\n        type=str,\n        required=True,\n        help=\"Version of the release being prepared\",\n    )\n    parser.add_argument(\n        \"--scala-version\",\n        type=str,\n        required=True,\n        help=\"Version of Scala to use in the JVM packages\",\n        choices=[\"2.12\", \"2.13\"],\n    )\n    parser.add_argument(\n        \"--variant\",\n        type=str,\n        required=True,\n        choices=[\"cpu\", \"gpu\"],\n        help=\"JVM package variant to package and publish\",\n    )\n\n    args = parser.parse_args()\n    version = args.release_version\n    scala_version = args.scala_version\n    use_cuda = args.variant == \"gpu\"\n\n    commit_hash = get_current_commit_hash()\n    git_branch = get_current_git_branch()\n    print(f\"Using commit {commit_hash} of branch {git_branch}\")\n    print(f\"====Update pom.xml to use Scala {scala_version}====\")\n    run(\n        f\"{sys.executable} ops/script/change_scala_version.py \"\n        f\"--scala-version {scala_version} --purge-artifacts\"\n    )\n\n    with cd(\"jvm-packages/\"):\n        print(\"====Copying resources for testing====\")\n        with cd(\"../demo/data/regression\"):\n            run(f\"{sys.executable} mapfeat.py\")\n            run(f\"{sys.executable} mknfold.py machine.txt 1\")\n        xgboost4j_spark = \"xgboost4j-spark-gpu\" if use_cuda else \"xgboost4j-spark\"\n        maybe_makedirs(\"xgboost4j/src/test/resources\")\n        maybe_makedirs(f\"{xgboost4j_spark}/src/test/resources\")\n        for file in glob.glob(\"../demo/data/agaricus.*\"):\n            cp(file, \"xgboost4j/src/test/resources\")\n            cp(file, f\"{xgboost4j_spark}/src/test/resources\")\n        for file in glob.glob(\"../demo/data/regression/machine.txt.t*\"):\n            cp(file, f\"{xgboost4j_spark}/src/test/resources\")\n\n        print(\"====Creating directories to hold native binaries====\")\n        if use_cuda:\n            # TODO(hcho3): Add GPU build for linux aarch64\n            matrix = [(\"linux\", \"x86_64\")]\n        else:\n            matrix = [\n                (\"linux\", \"x86_64\"),\n                (\"linux\", \"aarch64\"),\n                (\"windows\", \"x86_64\"),\n                (\"macos\", \"x86_64\"),\n                (\"macos\", \"aarch64\"),\n            ]\n        for os_ident, arch in matrix:\n            output_dir = f\"xgboost4j/src/main/resources/lib/{os_ident}/{arch}\"\n            maybe_makedirs(output_dir)\n\n        print(\"====Downloading native binaries from CI====\")\n        if use_cuda:\n            url_prefix = (\n                \"https://s3-us-west-2.amazonaws.com/xgboost-maven-repo/release/ml/dmlc\"\n            )\n            with tempfile.TemporaryDirectory() as tempdir:\n                # libxgboost4j.so for Linux x86_64, GPU support\n                zip_path = os.path.join(tempdir, \"xgboost4j-spark-gpu_2.12.jar\")\n                extract_dir = os.path.join(tempdir, \"xgboost4j-spark-gpu\")\n                retrieve(\n                    url=f\"{url_prefix}/xgboost4j-spark-gpu_2.12/{version}/\"\n                    f\"xgboost4j-spark-gpu_2.12-{version}.jar\",\n                    filename=zip_path,\n                )\n                os.mkdir(extract_dir)\n                with zipfile.ZipFile(zip_path, \"r\") as t:\n                    t.extractall(extract_dir)\n                cp(\n                    os.path.join(\n                        extract_dir, \"lib\", \"linux\", \"x86_64\", \"libxgboost4j.so\"\n                    ),\n                    \"xgboost4j/src/main/resources/lib/linux/x86_64/libxgboost4j.so\",\n                )\n            run(\n                \"mvn --no-transfer-progress install -Pgpu \"\n                \"-DskipTests -Dmaven.test.skip=true -Dskip.native.build=true\"\n            )\n            run(\n                \"mvn deploy -Pgpu,release -pl xgboost4j-spark-gpu \"\n                \"-DskipTests -Dmaven.test.skip=true -Dskip.native.build=true\"\n            )\n        else:\n            url_prefix = \"https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds\"\n            for os_ident, arch, src_libname, dest_libname in [\n                (\"linux\", \"x86_64\", \"libxgboost4j_linux_x86_64.so\", \"libxgboost4j.so\"),\n                (\n                    \"linux\",\n                    \"aarch64\",\n                    \"libxgboost4j_linux_aarch64.so\",\n                    \"libxgboost4j.so\",\n                ),\n                (\"windows\", \"x86_64\", \"xgboost4j.dll\", \"xgboost4j.dll\"),\n                (\"macos\", \"x86_64\", \"libxgboost4j_intel.dylib\", \"libxgboost4j.dylib\"),\n                (\"macos\", \"aarch64\", \"libxgboost4j_m1.dylib\", \"libxgboost4j.dylib\"),\n            ]:\n                retrieve(\n                    url=f\"{url_prefix}/{git_branch}/{commit_hash}/{src_libname}\",\n                    filename=(\n                        \"xgboost4j/src/main/resources/lib/\"\n                        f\"{os_ident}/{arch}/{dest_libname}\"\n                    ),\n                )\n            run(\n                \"mvn --no-transfer-progress deploy -Pdefault,release \"\n                \"-DskipTests -Dmaven.test.skip=true -Dskip.native.build=true\"\n            )\n\n    print(\"====Next Steps====\")\n    print(\n        \"Visit https://central.sonatype.com/publishing/deployments to verify the deployment. \"\n        \"You can either drop the deployment or publish it. Note: publishing is final.\"\n    )\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "dev/query_contributors.py",
    "content": "\"\"\"Query list of all contributors and reviewers in a release\"\"\"\n\nimport json\nimport re\nimport sys\n\nimport requests\nfrom sh.contrib import git\n\nif len(sys.argv) != 5:\n    print(f'Usage: {sys.argv[0]} [starting commit/tag] [ending commit/tag] [GitHub username] ' +\n           '[GitHub password]')\n    sys.exit(1)\n\nfrom_commit = sys.argv[1]\nto_commit = sys.argv[2]\nusername = sys.argv[3]\npassword = sys.argv[4]\n\ncontributors = set()\nreviewers = set()\n\ndef paginate_request(url, callback):\n    r = requests.get(url, auth=(username, password))\n    assert r.status_code == requests.codes.ok, f'Code: {r.status_code}, Text: {r.text}'\n    callback(json.loads(r.text))\n    while 'next' in r.links:\n        r = requests.get(r.links['next']['url'], auth=(username, password))\n        callback(json.loads(r.text))\n\nfor line in git.log(f'{from_commit}..{to_commit}', '--pretty=format:%s', '--reverse', '--first-parent'):\n    m = re.search('\\(#([0-9]+)\\)$', line.rstrip())\n    if m:\n        pr_id = m.group(1)\n        print(f'PR #{pr_id}')\n\n        def process_commit_list(commit_list):\n            try:\n                contributors.update([commit['author']['login'] for commit in commit_list])\n            except TypeError:\n                prompt = (f'Error fetching contributors for PR #{pr_id}. Enter it manually, ' +\n                          'as a space-separated list: ')\n                contributors.update(str(input(prompt)).split(' '))\n        def process_review_list(review_list):\n            reviewers.update([x['user']['login'] for x in review_list])\n        def process_comment_list(comment_list):\n            reviewers.update([x['user']['login'] for x in comment_list])\n\n        paginate_request(f'https://api.github.com/repos/dmlc/xgboost/pulls/{pr_id}/commits',\n                         process_commit_list)\n        paginate_request(f'https://api.github.com/repos/dmlc/xgboost/pulls/{pr_id}/reviews',\n                         process_review_list)\n        paginate_request(f'https://api.github.com/repos/dmlc/xgboost/issues/{pr_id}/comments',\n                         process_comment_list)\n\nprint('Contributors: ', end='')\nfor x in sorted(contributors):\n    r = requests.get(f'https://api.github.com/users/{x}', auth=(username, password))\n    assert r.status_code == requests.codes.ok, f'Code: {r.status_code}, Text: {r.text}'\n    user_info = json.loads(r.text)\n    if user_info['name'] is None:\n        print(f\"@{x}, \", end='')\n    else:\n        print(f\"{user_info['name']} (@{x}), \", end='')\n\nprint('\\nReviewers: ', end='')\nfor x in sorted(reviewers):\n    r = requests.get(f'https://api.github.com/users/{x}', auth=(username, password))\n    assert r.status_code == requests.codes.ok, f'Code: {r.status_code}, Text: {r.text}'\n    user_info = json.loads(r.text)\n    if user_info['name'] is None:\n        print(f\"@{x}, \", end='')\n    else:\n        print(f\"{user_info['name']} (@{x}), \", end='')\nprint('')\n"
  },
  {
    "path": "doc/.gitignore",
    "content": "html\nlatex\n*.sh\n_*\nsg_execution_times.rst\ndoxygen\nparser.py\n*.pyc\nweb-data\n# generated by doxygen\ntmp"
  },
  {
    "path": "doc/Doxyfile.in",
    "content": "# Doxyfile 1.9.1\n\n# This file describes the settings to be used by the documentation system\n# doxygen (www.doxygen.org) for a project.\n#\n# All text after a double hash (##) is considered a comment and is placed in\n# front of the TAG it is preceding.\n#\n# All text after a single hash (#) is considered a comment and will be ignored.\n# The format is:\n# TAG = value [value, ...]\n# For lists, items can also be appended using:\n# TAG += value [value, ...]\n# Values that contain spaces should be placed between quotes (\\\" \\\").\n\n#---------------------------------------------------------------------------\n# Project related configuration options\n#---------------------------------------------------------------------------\n\n# This tag specifies the encoding used for all characters in the configuration\n# file that follow. The default is UTF-8 which is also the encoding used for all\n# text before the first occurrence of this tag. Doxygen uses libiconv (or the\n# iconv built into libc) for the transcoding. See\n# https://www.gnu.org/software/libiconv/ for the list of possible encodings.\n# The default value is: UTF-8.\n\nDOXYFILE_ENCODING      = UTF-8\n\n# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by\n# double-quotes, unless you are using Doxywizard) that should identify the\n# project for which the documentation is generated. This name is used in the\n# title of most generated pages and in a few other places.\n# The default value is: My Project.\n\nPROJECT_NAME           = xgboost\n\n# The PROJECT_NUMBER tag can be used to enter a project or revision number. This\n# could be handy for archiving the generated documentation or if some version\n# control system is used.\n\nPROJECT_NUMBER         = @XGBOOST_VERSION@\n\n# Using the PROJECT_BRIEF tag one can provide an optional one line description\n# for a project that appears at the top of each page and should give viewer a\n# quick idea about the purpose of the project. Keep the description short.\n\nPROJECT_BRIEF          =\n\n# With the PROJECT_LOGO tag one can specify a logo or an icon that is included\n# in the documentation. The maximum height of the logo should not exceed 55\n# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy\n# the logo to the output directory.\n\nPROJECT_LOGO           =\n\n# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path\n# into which the generated documentation will be written. If a relative path is\n# entered, it will be relative to the location where doxygen was started. If\n# left blank the current directory will be used.\n\nOUTPUT_DIRECTORY       = @PROJECT_BINARY_DIR@/doc_doxygen\n\n# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub-\n# directories (in 2 levels) under the output directory of each output format and\n# will distribute the generated files over these directories. Enabling this\n# option can be useful when feeding doxygen a huge amount of source files, where\n# putting all generated files in the same directory would otherwise causes\n# performance problems for the file system.\n# The default value is: NO.\n\nCREATE_SUBDIRS         = NO\n\n# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII\n# characters to appear in the names of generated files. If set to NO, non-ASCII\n# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode\n# U+3044.\n# The default value is: NO.\n\nALLOW_UNICODE_NAMES    = NO\n\n# The OUTPUT_LANGUAGE tag is used to specify the language in which all\n# documentation generated by doxygen is written. Doxygen will use this\n# information to generate all constant output in the proper language.\n# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese,\n# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States),\n# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian,\n# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages),\n# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian,\n# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian,\n# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish,\n# Ukrainian and Vietnamese.\n# The default value is: English.\n\nOUTPUT_LANGUAGE        = English\n\n# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all\n# documentation generated by doxygen is written. Doxygen will use this\n# information to generate all generated output in the proper direction.\n# Possible values are: None, LTR, RTL and Context.\n# The default value is: None.\n\nOUTPUT_TEXT_DIRECTION  = None\n\n# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member\n# descriptions after the members that are listed in the file and class\n# documentation (similar to Javadoc). Set to NO to disable this.\n# The default value is: YES.\n\nBRIEF_MEMBER_DESC      = YES\n\n# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief\n# description of a member or function before the detailed description\n#\n# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the\n# brief descriptions will be completely suppressed.\n# The default value is: YES.\n\nREPEAT_BRIEF           = YES\n\n# This tag implements a quasi-intelligent brief description abbreviator that is\n# used to form the text in various listings. Each string in this list, if found\n# as the leading text of the brief description, will be stripped from the text\n# and the result, after processing the whole list, is used as the annotated\n# text. Otherwise, the brief description is used as-is. If left blank, the\n# following values are used ($name is automatically replaced with the name of\n# the entity):The $name class, The $name widget, The $name file, is, provides,\n# specifies, contains, represents, a, an and the.\n\nABBREVIATE_BRIEF       =\n\n# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then\n# doxygen will generate a detailed section even if there is only a brief\n# description.\n# The default value is: NO.\n\nALWAYS_DETAILED_SEC    = NO\n\n# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all\n# inherited members of a class in the documentation of that class as if those\n# members were ordinary class members. Constructors, destructors and assignment\n# operators of the base classes will not be shown.\n# The default value is: NO.\n\nINLINE_INHERITED_MEMB  = NO\n\n# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path\n# before files name in the file list and in the header files. If set to NO the\n# shortest path that makes the file name unique will be used\n# The default value is: YES.\n\nFULL_PATH_NAMES        = YES\n\n# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path.\n# Stripping is only done if one of the specified strings matches the left-hand\n# part of the path. The tag can be used to show relative paths in the file list.\n# If left blank the directory from which doxygen is run is used as the path to\n# strip.\n#\n# Note that you can specify absolute paths here, but also relative paths, which\n# will be relative from the directory where doxygen is started.\n# This tag requires that the tag FULL_PATH_NAMES is set to YES.\n\nSTRIP_FROM_PATH        =\n\n# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the\n# path mentioned in the documentation of a class, which tells the reader which\n# header file to include in order to use a class. If left blank only the name of\n# the header file containing the class definition is used. Otherwise one should\n# specify the list of include paths that are normally passed to the compiler\n# using the -I flag.\n\nSTRIP_FROM_INC_PATH    =\n\n# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but\n# less readable) file names. This can be useful is your file systems doesn't\n# support long names like on DOS, Mac, or CD-ROM.\n# The default value is: NO.\n\nSHORT_NAMES            = NO\n\n# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the\n# first line (until the first dot) of a Javadoc-style comment as the brief\n# description. If set to NO, the Javadoc-style will behave just like regular Qt-\n# style comments (thus requiring an explicit @brief command for a brief\n# description.)\n# The default value is: NO.\n\nJAVADOC_AUTOBRIEF      = NO\n\n# If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line\n# such as\n# /***************\n# as being the beginning of a Javadoc-style comment \"banner\". If set to NO, the\n# Javadoc-style will behave just like regular comments and it will not be\n# interpreted by doxygen.\n# The default value is: NO.\n\nJAVADOC_BANNER         = NO\n\n# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first\n# line (until the first dot) of a Qt-style comment as the brief description. If\n# set to NO, the Qt-style will behave just like regular Qt-style comments (thus\n# requiring an explicit \\brief command for a brief description.)\n# The default value is: NO.\n\nQT_AUTOBRIEF           = NO\n\n# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a\n# multi-line C++ special comment block (i.e. a block of //! or /// comments) as\n# a brief description. This used to be the default behavior. The new default is\n# to treat a multi-line C++ comment block as a detailed description. Set this\n# tag to YES if you prefer the old behavior instead.\n#\n# Note that setting this tag to YES also means that rational rose comments are\n# not recognized any more.\n# The default value is: NO.\n\nMULTILINE_CPP_IS_BRIEF = NO\n\n# By default Python docstrings are displayed as preformatted text and doxygen's\n# special commands cannot be used. By setting PYTHON_DOCSTRING to NO the\n# doxygen's special commands can be used and the contents of the docstring\n# documentation blocks is shown as doxygen documentation.\n# The default value is: YES.\n\nPYTHON_DOCSTRING       = YES\n\n# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the\n# documentation from any documented member that it re-implements.\n# The default value is: YES.\n\nINHERIT_DOCS           = YES\n\n# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new\n# page for each member. If set to NO, the documentation of a member will be part\n# of the file/class/namespace that contains it.\n# The default value is: NO.\n\nSEPARATE_MEMBER_PAGES  = NO\n\n# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen\n# uses this value to replace tabs by spaces in code fragments.\n# Minimum value: 1, maximum value: 16, default value: 4.\n\nTAB_SIZE               = 8\n\n# This tag can be used to specify a number of aliases that act as commands in\n# the documentation. An alias has the form:\n# name=value\n# For example adding\n# \"sideeffect=@par Side Effects:\\n\"\n# will allow you to put the command \\sideeffect (or @sideeffect) in the\n# documentation, which will result in a user-defined paragraph with heading\n# \"Side Effects:\". You can put \\n's in the value part of an alias to insert\n# newlines (in the resulting output). You can put ^^ in the value part of an\n# alias to insert a newline as if a physical newline was in the original file.\n# When you need a literal { or } or , in the value part of an alias you have to\n# escape them by means of a backslash (\\), this can lead to conflicts with the\n# commands \\{ and \\} for these it is advised to use the version @{ and @} or use\n# a double escape (\\\\{ and \\\\})\n\nALIASES                =\n\n# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources\n# only. Doxygen will then generate output that is more tailored for C. For\n# instance, some of the names that are used will be different. The list of all\n# members will be omitted, etc.\n# The default value is: NO.\n\nOPTIMIZE_OUTPUT_FOR_C  = NO\n\n# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or\n# Python sources only. Doxygen will then generate output that is more tailored\n# for that language. For instance, namespaces will be presented as packages,\n# qualified scopes will look different, etc.\n# The default value is: NO.\n\nOPTIMIZE_OUTPUT_JAVA   = NO\n\n# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran\n# sources. Doxygen will then generate output that is tailored for Fortran.\n# The default value is: NO.\n\nOPTIMIZE_FOR_FORTRAN   = NO\n\n# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL\n# sources. Doxygen will then generate output that is tailored for VHDL.\n# The default value is: NO.\n\nOPTIMIZE_OUTPUT_VHDL   = NO\n\n# Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice\n# sources only. Doxygen will then generate output that is more tailored for that\n# language. For instance, namespaces will be presented as modules, types will be\n# separated into more groups, etc.\n# The default value is: NO.\n\nOPTIMIZE_OUTPUT_SLICE  = NO\n\n# Doxygen selects the parser to use depending on the extension of the files it\n# parses. With this tag you can assign which parser to use for a given\n# extension. Doxygen has a built-in mapping, but you can override or extend it\n# using this tag. The format is ext=language, where ext is a file extension, and\n# language is one of the parsers supported by doxygen: IDL, Java, JavaScript,\n# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, VHDL,\n# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran:\n# FortranFree, unknown formatted Fortran: Fortran. In the later case the parser\n# tries to guess whether the code is fixed or free formatted code, this is the\n# default for Fortran type files). For instance to make doxygen treat .inc files\n# as Fortran files (default is PHP), and .f files as C (default is Fortran),\n# use: inc=Fortran f=C.\n#\n# Note: For files without extension you can use no_extension as a placeholder.\n#\n# Note that for custom extensions you also need to set FILE_PATTERNS otherwise\n# the files are not read by doxygen. When specifying no_extension you should add\n# * to the FILE_PATTERNS.\n#\n# Note see also the list of default file extension mappings.\n\nEXTENSION_MAPPING      =\n\n# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments\n# according to the Markdown format, which allows for more readable\n# documentation. See https://daringfireball.net/projects/markdown/ for details.\n# The output of markdown processing is further processed by doxygen, so you can\n# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in\n# case of backward compatibilities issues.\n# The default value is: YES.\n\nMARKDOWN_SUPPORT       = YES\n\n# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up\n# to that level are automatically included in the table of contents, even if\n# they do not have an id attribute.\n# Note: This feature currently applies only to Markdown headings.\n# Minimum value: 0, maximum value: 99, default value: 5.\n# This tag requires that the tag MARKDOWN_SUPPORT is set to YES.\n\nTOC_INCLUDE_HEADINGS   = 5\n\n# When enabled doxygen tries to link words that correspond to documented\n# classes, or namespaces to their corresponding documentation. Such a link can\n# be prevented in individual cases by putting a % sign in front of the word or\n# globally by setting AUTOLINK_SUPPORT to NO.\n# The default value is: YES.\n\nAUTOLINK_SUPPORT       = YES\n\n# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want\n# to include (a tag file for) the STL sources as input, then you should set this\n# tag to YES in order to let doxygen match functions declarations and\n# definitions whose arguments contain STL classes (e.g. func(std::string);\n# versus func(std::string) {}). This also make the inheritance and collaboration\n# diagrams that involve STL classes more complete and accurate.\n# The default value is: NO.\n\nBUILTIN_STL_SUPPORT    = NO\n\n# If you use Microsoft's C++/CLI language, you should set this option to YES to\n# enable parsing support.\n# The default value is: NO.\n\nCPP_CLI_SUPPORT        = NO\n\n# Set the SIP_SUPPORT tag to YES if your project consists of sip (see:\n# https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen\n# will parse them like normal C++ but will assume all classes use public instead\n# of private inheritance when no explicit protection keyword is present.\n# The default value is: NO.\n\nSIP_SUPPORT            = NO\n\n# For Microsoft's IDL there are propget and propput attributes to indicate\n# getter and setter methods for a property. Setting this option to YES will make\n# doxygen to replace the get and set methods by a property in the documentation.\n# This will only work if the methods are indeed getting or setting a simple\n# type. If this is not the case, or you want to show the methods anyway, you\n# should set this option to NO.\n# The default value is: YES.\n\nIDL_PROPERTY_SUPPORT   = YES\n\n# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC\n# tag is set to YES then doxygen will reuse the documentation of the first\n# member in the group (if any) for the other members of the group. By default\n# all members of a group must be documented explicitly.\n# The default value is: NO.\n\nDISTRIBUTE_GROUP_DOC   = NO\n\n# If one adds a struct or class to a group and this option is enabled, then also\n# any nested class or struct is added to the same group. By default this option\n# is disabled and one has to add nested compounds explicitly via \\ingroup.\n# The default value is: NO.\n\nGROUP_NESTED_COMPOUNDS = NO\n\n# Set the SUBGROUPING tag to YES to allow class member groups of the same type\n# (for instance a group of public functions) to be put as a subgroup of that\n# type (e.g. under the Public Functions section). Set it to NO to prevent\n# subgrouping. Alternatively, this can be done per class using the\n# \\nosubgrouping command.\n# The default value is: YES.\n\nSUBGROUPING            = YES\n\n# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions\n# are shown inside the group in which they are included (e.g. using \\ingroup)\n# instead of on a separate page (for HTML and Man pages) or section (for LaTeX\n# and RTF).\n#\n# Note that this feature does not work in combination with\n# SEPARATE_MEMBER_PAGES.\n# The default value is: NO.\n\nINLINE_GROUPED_CLASSES = NO\n\n# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions\n# with only public data fields or simple typedef fields will be shown inline in\n# the documentation of the scope in which they are defined (i.e. file,\n# namespace, or group documentation), provided this scope is documented. If set\n# to NO, structs, classes, and unions are shown on a separate page (for HTML and\n# Man pages) or section (for LaTeX and RTF).\n# The default value is: NO.\n\nINLINE_SIMPLE_STRUCTS  = NO\n\n# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or\n# enum is documented as struct, union, or enum with the name of the typedef. So\n# typedef struct TypeS {} TypeT, will appear in the documentation as a struct\n# with name TypeT. When disabled the typedef will appear as a member of a file,\n# namespace, or class. And the struct will be named TypeS. This can typically be\n# useful for C code in case the coding convention dictates that all compound\n# types are typedef'ed and only the typedef is referenced, never the tag name.\n# The default value is: NO.\n\nTYPEDEF_HIDES_STRUCT   = NO\n\n# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This\n# cache is used to resolve symbols given their name and scope. Since this can be\n# an expensive process and often the same symbol appears multiple times in the\n# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small\n# doxygen will become slower. If the cache is too large, memory is wasted. The\n# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range\n# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536\n# symbols. At the end of a run doxygen will report the cache usage and suggest\n# the optimal cache size from a speed point of view.\n# Minimum value: 0, maximum value: 9, default value: 0.\n\nLOOKUP_CACHE_SIZE      = 0\n\n# The NUM_PROC_THREADS specifies the number threads doxygen is allowed to use\n# during processing. When set to 0 doxygen will based this on the number of\n# cores available in the system. You can set it explicitly to a value larger\n# than 0 to get more control over the balance between CPU load and processing\n# speed. At this moment only the input processing can be done using multiple\n# threads. Since this is still an experimental feature the default is set to 1,\n# which efficively disables parallel processing. Please report any issues you\n# encounter. Generating dot graphs in parallel is controlled by the\n# DOT_NUM_THREADS setting.\n# Minimum value: 0, maximum value: 32, default value: 1.\n\nNUM_PROC_THREADS       = 1\n\n#---------------------------------------------------------------------------\n# Build related configuration options\n#---------------------------------------------------------------------------\n\n# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in\n# documentation are documented, even if no documentation was available. Private\n# class members and static file members will be hidden unless the\n# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES.\n# Note: This will also disable the warnings about undocumented members that are\n# normally produced when WARNINGS is set to YES.\n# The default value is: NO.\n\nEXTRACT_ALL            = YES\n\n# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will\n# be included in the documentation.\n# The default value is: NO.\n\nEXTRACT_PRIVATE        = NO\n\n# If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual\n# methods of a class will be included in the documentation.\n# The default value is: NO.\n\nEXTRACT_PRIV_VIRTUAL   = NO\n\n# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal\n# scope will be included in the documentation.\n# The default value is: NO.\n\nEXTRACT_PACKAGE        = NO\n\n# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be\n# included in the documentation.\n# The default value is: NO.\n\nEXTRACT_STATIC         = NO\n\n# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined\n# locally in source files will be included in the documentation. If set to NO,\n# only classes defined in header files are included. Does not have any effect\n# for Java sources.\n# The default value is: YES.\n\nEXTRACT_LOCAL_CLASSES  = YES\n\n# This flag is only useful for Objective-C code. If set to YES, local methods,\n# which are defined in the implementation section but not in the interface are\n# included in the documentation. If set to NO, only methods in the interface are\n# included.\n# The default value is: NO.\n\nEXTRACT_LOCAL_METHODS  = NO\n\n# If this flag is set to YES, the members of anonymous namespaces will be\n# extracted and appear in the documentation as a namespace called\n# 'anonymous_namespace{file}', where file will be replaced with the base name of\n# the file that contains the anonymous namespace. By default anonymous namespace\n# are hidden.\n# The default value is: NO.\n\nEXTRACT_ANON_NSPACES   = NO\n\n# If this flag is set to YES, the name of an unnamed parameter in a declaration\n# will be determined by the corresponding definition. By default unnamed\n# parameters remain unnamed in the output.\n# The default value is: YES.\n\nRESOLVE_UNNAMED_PARAMS = YES\n\n# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all\n# undocumented members inside documented classes or files. If set to NO these\n# members will be included in the various overviews, but no documentation\n# section is generated. This option has no effect if EXTRACT_ALL is enabled.\n# The default value is: NO.\n\nHIDE_UNDOC_MEMBERS     = NO\n\n# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all\n# undocumented classes that are normally visible in the class hierarchy. If set\n# to NO, these classes will be included in the various overviews. This option\n# has no effect if EXTRACT_ALL is enabled.\n# The default value is: NO.\n\nHIDE_UNDOC_CLASSES     = NO\n\n# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend\n# declarations. If set to NO, these declarations will be included in the\n# documentation.\n# The default value is: NO.\n\nHIDE_FRIEND_COMPOUNDS  = NO\n\n# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any\n# documentation blocks found inside the body of a function. If set to NO, these\n# blocks will be appended to the function's detailed documentation block.\n# The default value is: NO.\n\nHIDE_IN_BODY_DOCS      = NO\n\n# The INTERNAL_DOCS tag determines if documentation that is typed after a\n# \\internal command is included. If the tag is set to NO then the documentation\n# will be excluded. Set it to YES to include the internal documentation.\n# The default value is: NO.\n\nINTERNAL_DOCS          = NO\n\n# With the correct setting of option CASE_SENSE_NAMES doxygen will better be\n# able to match the capabilities of the underlying filesystem. In case the\n# filesystem is case sensitive (i.e. it supports files in the same directory\n# whose names only differ in casing), the option must be set to YES to properly\n# deal with such files in case they appear in the input. For filesystems that\n# are not case sensitive the option should be be set to NO to properly deal with\n# output files written for symbols that only differ in casing, such as for two\n# classes, one named CLASS and the other named Class, and to also support\n# references to files without having to specify the exact matching casing. On\n# Windows (including Cygwin) and MacOS, users should typically set this option\n# to NO, whereas on Linux or other Unix flavors it should typically be set to\n# YES.\n# The default value is: system dependent.\n\nCASE_SENSE_NAMES       = YES\n\n# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with\n# their full class and namespace scopes in the documentation. If set to YES, the\n# scope will be hidden.\n# The default value is: NO.\n\nHIDE_SCOPE_NAMES       = NO\n\n# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will\n# append additional text to a page's title, such as Class Reference. If set to\n# YES the compound reference will be hidden.\n# The default value is: NO.\n\nHIDE_COMPOUND_REFERENCE= NO\n\n# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of\n# the files that are included by a file in the documentation of that file.\n# The default value is: YES.\n\nSHOW_INCLUDE_FILES     = YES\n\n# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each\n# grouped member an include statement to the documentation, telling the reader\n# which file to include in order to use the member.\n# The default value is: NO.\n\nSHOW_GROUPED_MEMB_INC  = NO\n\n# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include\n# files with double quotes in the documentation rather than with sharp brackets.\n# The default value is: NO.\n\nFORCE_LOCAL_INCLUDES   = NO\n\n# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the\n# documentation for inline members.\n# The default value is: YES.\n\nINLINE_INFO            = YES\n\n# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the\n# (detailed) documentation of file and class members alphabetically by member\n# name. If set to NO, the members will appear in declaration order.\n# The default value is: YES.\n\nSORT_MEMBER_DOCS       = YES\n\n# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief\n# descriptions of file, namespace and class members alphabetically by member\n# name. If set to NO, the members will appear in declaration order. Note that\n# this will also influence the order of the classes in the class list.\n# The default value is: NO.\n\nSORT_BRIEF_DOCS        = NO\n\n# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the\n# (brief and detailed) documentation of class members so that constructors and\n# destructors are listed first. If set to NO the constructors will appear in the\n# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS.\n# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief\n# member documentation.\n# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting\n# detailed member documentation.\n# The default value is: NO.\n\nSORT_MEMBERS_CTORS_1ST = NO\n\n# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy\n# of group names into alphabetical order. If set to NO the group names will\n# appear in their defined order.\n# The default value is: NO.\n\nSORT_GROUP_NAMES       = NO\n\n# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by\n# fully-qualified names, including namespaces. If set to NO, the class list will\n# be sorted only by class name, not including the namespace part.\n# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.\n# Note: This option applies only to the class list, not to the alphabetical\n# list.\n# The default value is: NO.\n\nSORT_BY_SCOPE_NAME     = NO\n\n# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper\n# type resolution of all parameters of a function it will reject a match between\n# the prototype and the implementation of a member function even if there is\n# only one candidate or it is obvious which candidate to choose by doing a\n# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still\n# accept a match between prototype and implementation in such cases.\n# The default value is: NO.\n\nSTRICT_PROTO_MATCHING  = NO\n\n# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo\n# list. This list is created by putting \\todo commands in the documentation.\n# The default value is: YES.\n\nGENERATE_TODOLIST      = YES\n\n# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test\n# list. This list is created by putting \\test commands in the documentation.\n# The default value is: YES.\n\nGENERATE_TESTLIST      = YES\n\n# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug\n# list. This list is created by putting \\bug commands in the documentation.\n# The default value is: YES.\n\nGENERATE_BUGLIST       = YES\n\n# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO)\n# the deprecated list. This list is created by putting \\deprecated commands in\n# the documentation.\n# The default value is: YES.\n\nGENERATE_DEPRECATEDLIST= YES\n\n# The ENABLED_SECTIONS tag can be used to enable conditional documentation\n# sections, marked by \\if <section_label> ... \\endif and \\cond <section_label>\n# ... \\endcond blocks.\n\nENABLED_SECTIONS       =\n\n# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the\n# initial value of a variable or macro / define can have for it to appear in the\n# documentation. If the initializer consists of more lines than specified here\n# it will be hidden. Use a value of 0 to hide initializers completely. The\n# appearance of the value of individual variables and macros / defines can be\n# controlled using \\showinitializer or \\hideinitializer command in the\n# documentation regardless of this setting.\n# Minimum value: 0, maximum value: 10000, default value: 30.\n\nMAX_INITIALIZER_LINES  = 30\n\n# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at\n# the bottom of the documentation of classes and structs. If set to YES, the\n# list will mention the files that were used to generate the documentation.\n# The default value is: YES.\n\nSHOW_USED_FILES        = YES\n\n# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This\n# will remove the Files entry from the Quick Index and from the Folder Tree View\n# (if specified).\n# The default value is: YES.\n\nSHOW_FILES             = YES\n\n# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces\n# page. This will remove the Namespaces entry from the Quick Index and from the\n# Folder Tree View (if specified).\n# The default value is: YES.\n\nSHOW_NAMESPACES        = YES\n\n# The FILE_VERSION_FILTER tag can be used to specify a program or script that\n# doxygen should invoke to get the current version for each file (typically from\n# the version control system). Doxygen will invoke the program by executing (via\n# popen()) the command command input-file, where command is the value of the\n# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided\n# by doxygen. Whatever the program writes to standard output is used as the file\n# version. For an example see the documentation.\n\nFILE_VERSION_FILTER    =\n\n# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed\n# by doxygen. The layout file controls the global structure of the generated\n# output files in an output format independent way. To create the layout file\n# that represents doxygen's defaults, run doxygen with the -l option. You can\n# optionally specify a file name after the option, if omitted DoxygenLayout.xml\n# will be used as the name of the layout file.\n#\n# Note that if you run doxygen from a directory containing a file called\n# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE\n# tag is left empty.\n\nLAYOUT_FILE            =\n\n# The CITE_BIB_FILES tag can be used to specify one or more bib files containing\n# the reference definitions. This must be a list of .bib files. The .bib\n# extension is automatically appended if omitted. This requires the bibtex tool\n# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info.\n# For LaTeX the style of the bibliography can be controlled using\n# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the\n# search path. See also \\cite for info how to create references.\n\nCITE_BIB_FILES         =\n\n#---------------------------------------------------------------------------\n# Configuration options related to warning and progress messages\n#---------------------------------------------------------------------------\n\n# The QUIET tag can be used to turn on/off the messages that are generated to\n# standard output by doxygen. If QUIET is set to YES this implies that the\n# messages are off.\n# The default value is: NO.\n\nQUIET                  = NO\n\n# The WARNINGS tag can be used to turn on/off the warning messages that are\n# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES\n# this implies that the warnings are on.\n#\n# Tip: Turn warnings on while writing the documentation.\n# The default value is: YES.\n\nWARNINGS               = YES\n\n# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate\n# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag\n# will automatically be disabled.\n# The default value is: YES.\n\nWARN_IF_UNDOCUMENTED   = YES\n\n# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for\n# potential errors in the documentation, such as not documenting some parameters\n# in a documented function, or documenting parameters that don't exist or using\n# markup commands wrongly.\n# The default value is: YES.\n\nWARN_IF_DOC_ERROR      = YES\n\n# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that\n# are documented, but have no documentation for their parameters or return\n# value. If set to NO, doxygen will only warn about wrong or incomplete\n# parameter documentation, but not about the absence of documentation. If\n# EXTRACT_ALL is set to YES then this flag will automatically be disabled.\n# The default value is: NO.\n\nWARN_NO_PARAMDOC       = YES\n\n# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when\n# a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS\n# then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but\n# at the end of the doxygen process doxygen will return with a non-zero status.\n# Possible values are: NO, YES and FAIL_ON_WARNINGS.\n# The default value is: NO.\n\nWARN_AS_ERROR          = NO\n\n# The WARN_FORMAT tag determines the format of the warning messages that doxygen\n# can produce. The string should contain the $file, $line, and $text tags, which\n# will be replaced by the file and line number from which the warning originated\n# and the warning text. Optionally the format may contain $version, which will\n# be replaced by the version of the file (if it could be obtained via\n# FILE_VERSION_FILTER)\n# The default value is: $file:$line: $text.\n\nWARN_FORMAT            = \"$file:$line: $text\"\n\n# The WARN_LOGFILE tag can be used to specify a file to which warning and error\n# messages should be written. If left blank the output is written to standard\n# error (stderr).\n\nWARN_LOGFILE           =\n\n#---------------------------------------------------------------------------\n# Configuration options related to the input files\n#---------------------------------------------------------------------------\n\n# The INPUT tag is used to specify the files and/or directories that contain\n# documented source files. You may enter file names like myfile.cpp or\n# directories like /usr/src/myproject. Separate the files or directories with\n# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING\n# Note: If this tag is empty the current directory is searched.\n\nINPUT                  = @PROJECT_SOURCE_DIR@/include\n\n# This tag can be used to specify the character encoding of the source files\n# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses\n# libiconv (or the iconv built into libc) for the transcoding. See the libiconv\n# documentation (see:\n# https://www.gnu.org/software/libiconv/) for the list of possible encodings.\n# The default value is: UTF-8.\n\nINPUT_ENCODING         = UTF-8\n\n# If the value of the INPUT tag contains directories, you can use the\n# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and\n# *.h) to filter out the source-files in the directories.\n#\n# Note that for custom extensions or not directly supported extensions you also\n# need to set EXTENSION_MAPPING for the extension otherwise the files are not\n# read by doxygen.\n#\n# Note the list of default checked file patterns might differ from the list of\n# default file extension mappings.\n#\n# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp,\n# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h,\n# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc,\n# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment),\n# *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, *.vhdl,\n# *.ucf, *.qsf and *.ice.\n\nFILE_PATTERNS          = *.h\n\n# The RECURSIVE tag can be used to specify whether or not subdirectories should\n# be searched for input files as well.\n# The default value is: NO.\n\nRECURSIVE              = YES\n\n# The EXCLUDE tag can be used to specify files and/or directories that should be\n# excluded from the INPUT source files. This way you can easily exclude a\n# subdirectory from a directory tree whose root is specified with the INPUT tag.\n#\n# Note that relative paths are relative to the directory from which doxygen is\n# run.\n\nEXCLUDE                =\n\n# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or\n# directories that are symbolic links (a Unix file system feature) are excluded\n# from the input.\n# The default value is: NO.\n\nEXCLUDE_SYMLINKS       = NO\n\n# If the value of the INPUT tag contains directories, you can use the\n# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude\n# certain files from those directories.\n#\n# Note that the wildcards are matched against the file with absolute path, so to\n# exclude all test directories for example use the pattern */test/*\n\nEXCLUDE_PATTERNS       = */test/* \\\n                         logging.h\n\n# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names\n# (namespaces, classes, functions, etc.) that should be excluded from the\n# output. The symbol name can be a fully qualified name, a word, or if the\n# wildcard * is used, a substring. Examples: ANamespace, AClass,\n# AClass::ANamespace, ANamespace::*Test\n#\n# Note that the wildcards are matched against the file with absolute path, so to\n# exclude all test directories use the pattern */test/*\n\nEXCLUDE_SYMBOLS        =\n\n# The EXAMPLE_PATH tag can be used to specify one or more files or directories\n# that contain example code fragments that are included (see the \\include\n# command).\n\nEXAMPLE_PATH           = @PROJECT_SOURCE_DIR@/demo/c-api/\n\n# If the value of the EXAMPLE_PATH tag contains directories, you can use the\n# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and\n# *.h) to filter out the source-files in the directories. If left blank all\n# files are included.\n\nEXAMPLE_PATTERNS       =\n\n# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be\n# searched for input files to be used with the \\include or \\dontinclude commands\n# irrespective of the value of the RECURSIVE tag.\n# The default value is: NO.\n\nEXAMPLE_RECURSIVE      = YES\n\n# The IMAGE_PATH tag can be used to specify one or more files or directories\n# that contain images that are to be included in the documentation (see the\n# \\image command).\n\nIMAGE_PATH             =\n\n# The INPUT_FILTER tag can be used to specify a program that doxygen should\n# invoke to filter for each input file. Doxygen will invoke the filter program\n# by executing (via popen()) the command:\n#\n# <filter> <input-file>\n#\n# where <filter> is the value of the INPUT_FILTER tag, and <input-file> is the\n# name of an input file. Doxygen will then use the output that the filter\n# program writes to standard output. If FILTER_PATTERNS is specified, this tag\n# will be ignored.\n#\n# Note that the filter must not add or remove lines; it is applied before the\n# code is scanned, but not when the output code is generated. If lines are added\n# or removed, the anchors will not be placed correctly.\n#\n# Note that for custom extensions or not directly supported extensions you also\n# need to set EXTENSION_MAPPING for the extension otherwise the files are not\n# properly processed by doxygen.\n\nINPUT_FILTER           =\n\n# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern\n# basis. Doxygen will compare the file name with each pattern and apply the\n# filter if there is a match. The filters are a list of the form: pattern=filter\n# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how\n# filters are used. If the FILTER_PATTERNS tag is empty or if none of the\n# patterns match the file name, INPUT_FILTER is applied.\n#\n# Note that for custom extensions or not directly supported extensions you also\n# need to set EXTENSION_MAPPING for the extension otherwise the files are not\n# properly processed by doxygen.\n\nFILTER_PATTERNS        =\n\n# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using\n# INPUT_FILTER) will also be used to filter the input files that are used for\n# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES).\n# The default value is: NO.\n\nFILTER_SOURCE_FILES    = NO\n\n# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file\n# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and\n# it is also possible to disable source filtering for a specific pattern using\n# *.ext= (so without naming a filter).\n# This tag requires that the tag FILTER_SOURCE_FILES is set to YES.\n\nFILTER_SOURCE_PATTERNS =\n\n# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that\n# is part of the input, its contents will be placed on the main page\n# (index.html). This can be useful if you have a project on for instance GitHub\n# and want to reuse the introduction page also for the doxygen output.\n\nUSE_MDFILE_AS_MAINPAGE =\n\n#---------------------------------------------------------------------------\n# Configuration options related to source browsing\n#---------------------------------------------------------------------------\n\n# If the SOURCE_BROWSER tag is set to YES then a list of source files will be\n# generated. Documented entities will be cross-referenced with these sources.\n#\n# Note: To get rid of all source code in the generated output, make sure that\n# also VERBATIM_HEADERS is set to NO.\n# The default value is: NO.\n\nSOURCE_BROWSER         = NO\n\n# Setting the INLINE_SOURCES tag to YES will include the body of functions,\n# classes and enums directly into the documentation.\n# The default value is: NO.\n\nINLINE_SOURCES         = NO\n\n# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any\n# special comment blocks from generated source code fragments. Normal C, C++ and\n# Fortran comments will always remain visible.\n# The default value is: YES.\n\nSTRIP_CODE_COMMENTS    = YES\n\n# If the REFERENCED_BY_RELATION tag is set to YES then for each documented\n# entity all documented functions referencing it will be listed.\n# The default value is: NO.\n\nREFERENCED_BY_RELATION = NO\n\n# If the REFERENCES_RELATION tag is set to YES then for each documented function\n# all documented entities called/used by that function will be listed.\n# The default value is: NO.\n\nREFERENCES_RELATION    = NO\n\n# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set\n# to YES then the hyperlinks from functions in REFERENCES_RELATION and\n# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will\n# link to the documentation.\n# The default value is: YES.\n\nREFERENCES_LINK_SOURCE = YES\n\n# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the\n# source code will show a tooltip with additional information such as prototype,\n# brief description and links to the definition and documentation. Since this\n# will make the HTML file larger and loading of large files a bit slower, you\n# can opt to disable this feature.\n# The default value is: YES.\n# This tag requires that the tag SOURCE_BROWSER is set to YES.\n\nSOURCE_TOOLTIPS        = YES\n\n# If the USE_HTAGS tag is set to YES then the references to source code will\n# point to the HTML generated by the htags(1) tool instead of doxygen built-in\n# source browser. The htags tool is part of GNU's global source tagging system\n# (see https://www.gnu.org/software/global/global.html). You will need version\n# 4.8.6 or higher.\n#\n# To use it do the following:\n# - Install the latest version of global\n# - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file\n# - Make sure the INPUT points to the root of the source tree\n# - Run doxygen as normal\n#\n# Doxygen will invoke htags (and that will in turn invoke gtags), so these\n# tools must be available from the command line (i.e. in the search path).\n#\n# The result: instead of the source browser generated by doxygen, the links to\n# source code will now point to the output of htags.\n# The default value is: NO.\n# This tag requires that the tag SOURCE_BROWSER is set to YES.\n\nUSE_HTAGS              = NO\n\n# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a\n# verbatim copy of the header file for each class for which an include is\n# specified. Set to NO to disable this.\n# See also: Section \\class.\n# The default value is: YES.\n\nVERBATIM_HEADERS       = YES\n\n# If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the\n# clang parser (see:\n# http://clang.llvm.org/) for more accurate parsing at the cost of reduced\n# performance. This can be particularly helpful with template rich C++ code for\n# which doxygen's built-in parser lacks the necessary type information.\n# Note: The availability of this option depends on whether or not doxygen was\n# generated with the -Duse_libclang=ON option for CMake.\n# The default value is: NO.\n\nCLANG_ASSISTED_PARSING = NO\n\n# If clang assisted parsing is enabled and the CLANG_ADD_INC_PATHS tag is set to\n# YES then doxygen will add the directory of each input to the include path.\n# The default value is: YES.\n\nCLANG_ADD_INC_PATHS    = YES\n\n# If clang assisted parsing is enabled you can provide the compiler with command\n# line options that you would normally use when invoking the compiler. Note that\n# the include paths will already be set by doxygen for the files and directories\n# specified with INPUT and INCLUDE_PATH.\n# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES.\n\nCLANG_OPTIONS          =\n\n# If clang assisted parsing is enabled you can provide the clang parser with the\n# path to the directory containing a file called compile_commands.json. This\n# file is the compilation database (see:\n# http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) containing the\n# options used when the source files were built. This is equivalent to\n# specifying the -p option to a clang tool, such as clang-check. These options\n# will then be passed to the parser. Any options specified with CLANG_OPTIONS\n# will be added as well.\n# Note: The availability of this option depends on whether or not doxygen was\n# generated with the -Duse_libclang=ON option for CMake.\n\nCLANG_DATABASE_PATH    =\n\n#---------------------------------------------------------------------------\n# Configuration options related to the alphabetical class index\n#---------------------------------------------------------------------------\n\n# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all\n# compounds will be generated. Enable this if the project contains a lot of\n# classes, structs, unions or interfaces.\n# The default value is: YES.\n\nALPHABETICAL_INDEX     = YES\n\n# In case all classes in a project start with a common prefix, all classes will\n# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag\n# can be used to specify a prefix (or a list of prefixes) that should be ignored\n# while generating the index headers.\n# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.\n\nIGNORE_PREFIX          =\n\n#---------------------------------------------------------------------------\n# Configuration options related to the HTML output\n#---------------------------------------------------------------------------\n\n# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output\n# The default value is: YES.\n\nGENERATE_HTML          = YES\n\n# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a\n# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of\n# it.\n# The default directory is: html.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nHTML_OUTPUT            = html\n\n# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each\n# generated HTML page (for example: .htm, .php, .asp).\n# The default value is: .html.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nHTML_FILE_EXTENSION    = .html\n\n# The HTML_HEADER tag can be used to specify a user-defined HTML header file for\n# each generated HTML page. If the tag is left blank doxygen will generate a\n# standard header.\n#\n# To get valid HTML the header file that includes any scripts and style sheets\n# that doxygen needs, which is dependent on the configuration options used (e.g.\n# the setting GENERATE_TREEVIEW). It is highly recommended to start with a\n# default header using\n# doxygen -w html new_header.html new_footer.html new_stylesheet.css\n# YourConfigFile\n# and then modify the file new_header.html. See also section \"Doxygen usage\"\n# for information on how to generate the default header that doxygen normally\n# uses.\n# Note: The header is subject to change so you typically have to regenerate the\n# default header when upgrading to a newer version of doxygen. For a description\n# of the possible markers and block names see the documentation.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nHTML_HEADER            =\n\n# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each\n# generated HTML page. If the tag is left blank doxygen will generate a standard\n# footer. See HTML_HEADER for more information on how to generate a default\n# footer and what special commands can be used inside the footer. See also\n# section \"Doxygen usage\" for information on how to generate the default footer\n# that doxygen normally uses.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nHTML_FOOTER            =\n\n# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style\n# sheet that is used by each HTML page. It can be used to fine-tune the look of\n# the HTML output. If left blank doxygen will generate a default style sheet.\n# See also section \"Doxygen usage\" for information on how to generate the style\n# sheet that doxygen normally uses.\n# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as\n# it is more robust and this tag (HTML_STYLESHEET) will in the future become\n# obsolete.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nHTML_STYLESHEET        =\n\n# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined\n# cascading style sheets that are included after the standard style sheets\n# created by doxygen. Using this option one can overrule certain style aspects.\n# This is preferred over using HTML_STYLESHEET since it does not replace the\n# standard style sheet and is therefore more robust against future updates.\n# Doxygen will copy the style sheet files to the output directory.\n# Note: The order of the extra style sheet files is of importance (e.g. the last\n# style sheet in the list overrules the setting of the previous ones in the\n# list). For an example see the documentation.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nHTML_EXTRA_STYLESHEET  =\n\n# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or\n# other source files which should be copied to the HTML output directory. Note\n# that these files will be copied to the base HTML output directory. Use the\n# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these\n# files. In the HTML_STYLESHEET file, use the file name only. Also note that the\n# files will be copied as-is; there are no commands or markers available.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nHTML_EXTRA_FILES       =\n\n# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen\n# will adjust the colors in the style sheet and background images according to\n# this color. Hue is specified as an angle on a colorwheel, see\n# https://en.wikipedia.org/wiki/Hue for more information. For instance the value\n# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300\n# purple, and 360 is red again.\n# Minimum value: 0, maximum value: 359, default value: 220.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nHTML_COLORSTYLE_HUE    = 220\n\n# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors\n# in the HTML output. For a value of 0 the output will use grayscales only. A\n# value of 255 will produce the most vivid colors.\n# Minimum value: 0, maximum value: 255, default value: 100.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nHTML_COLORSTYLE_SAT    = 100\n\n# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the\n# luminance component of the colors in the HTML output. Values below 100\n# gradually make the output lighter, whereas values above 100 make the output\n# darker. The value divided by 100 is the actual gamma applied, so 80 represents\n# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not\n# change the gamma.\n# Minimum value: 40, maximum value: 240, default value: 80.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nHTML_COLORSTYLE_GAMMA  = 80\n\n# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML\n# page will contain the date and time when the page was generated. Setting this\n# to YES can help to show when doxygen was last run and thus if the\n# documentation is up to date.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nHTML_TIMESTAMP         = YES\n\n# If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML\n# documentation will contain a main index with vertical navigation menus that\n# are dynamically created via JavaScript. If disabled, the navigation index will\n# consists of multiple levels of tabs that are statically embedded in every HTML\n# page. Disable this option to support browsers that do not have JavaScript,\n# like the Qt help browser.\n# The default value is: YES.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nHTML_DYNAMIC_MENUS     = YES\n\n# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML\n# documentation will contain sections that can be hidden and shown after the\n# page has loaded.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nHTML_DYNAMIC_SECTIONS  = NO\n\n# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries\n# shown in the various tree structured indices initially; the user can expand\n# and collapse entries dynamically later on. Doxygen will expand the tree to\n# such a level that at most the specified number of entries are visible (unless\n# a fully collapsed tree already exceeds this amount). So setting the number of\n# entries 1 will produce a full collapsed tree by default. 0 is a special value\n# representing an infinite number of entries and will result in a full expanded\n# tree by default.\n# Minimum value: 0, maximum value: 9999, default value: 100.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nHTML_INDEX_NUM_ENTRIES = 100\n\n# If the GENERATE_DOCSET tag is set to YES, additional index files will be\n# generated that can be used as input for Apple's Xcode 3 integrated development\n# environment (see:\n# https://developer.apple.com/xcode/), introduced with OSX 10.5 (Leopard). To\n# create a documentation set, doxygen will generate a Makefile in the HTML\n# output directory. Running make will produce the docset in that directory and\n# running make install will install the docset in\n# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at\n# startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy\n# genXcode/_index.html for more information.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nGENERATE_DOCSET        = NO\n\n# This tag determines the name of the docset feed. A documentation feed provides\n# an umbrella under which multiple documentation sets from a single provider\n# (such as a company or product suite) can be grouped.\n# The default value is: Doxygen generated docs.\n# This tag requires that the tag GENERATE_DOCSET is set to YES.\n\nDOCSET_FEEDNAME        = \"Doxygen generated docs\"\n\n# This tag specifies a string that should uniquely identify the documentation\n# set bundle. This should be a reverse domain-name style string, e.g.\n# com.mycompany.MyDocSet. Doxygen will append .docset to the name.\n# The default value is: org.doxygen.Project.\n# This tag requires that the tag GENERATE_DOCSET is set to YES.\n\nDOCSET_BUNDLE_ID       = org.doxygen.Project\n\n# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify\n# the documentation publisher. This should be a reverse domain-name style\n# string, e.g. com.mycompany.MyDocSet.documentation.\n# The default value is: org.doxygen.Publisher.\n# This tag requires that the tag GENERATE_DOCSET is set to YES.\n\nDOCSET_PUBLISHER_ID    = org.doxygen.Publisher\n\n# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher.\n# The default value is: Publisher.\n# This tag requires that the tag GENERATE_DOCSET is set to YES.\n\nDOCSET_PUBLISHER_NAME  = Publisher\n\n# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three\n# additional HTML index files: index.hhp, index.hhc, and index.hhk. The\n# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop\n# (see:\n# https://www.microsoft.com/en-us/download/details.aspx?id=21138) on Windows.\n#\n# The HTML Help Workshop contains a compiler that can convert all HTML output\n# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML\n# files are now used as the Windows 98 help format, and will replace the old\n# Windows help format (.hlp) on all Windows platforms in the future. Compressed\n# HTML files also contain an index, a table of contents, and you can search for\n# words in the documentation. The HTML workshop also contains a viewer for\n# compressed HTML files.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nGENERATE_HTMLHELP      = NO\n\n# The CHM_FILE tag can be used to specify the file name of the resulting .chm\n# file. You can add a path in front of the file if the result should not be\n# written to the html output directory.\n# This tag requires that the tag GENERATE_HTMLHELP is set to YES.\n\nCHM_FILE               =\n\n# The HHC_LOCATION tag can be used to specify the location (absolute path\n# including file name) of the HTML help compiler (hhc.exe). If non-empty,\n# doxygen will try to run the HTML help compiler on the generated index.hhp.\n# The file has to be specified with full path.\n# This tag requires that the tag GENERATE_HTMLHELP is set to YES.\n\nHHC_LOCATION           =\n\n# The GENERATE_CHI flag controls if a separate .chi index file is generated\n# (YES) or that it should be included in the main .chm file (NO).\n# The default value is: NO.\n# This tag requires that the tag GENERATE_HTMLHELP is set to YES.\n\nGENERATE_CHI           = NO\n\n# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc)\n# and project file content.\n# This tag requires that the tag GENERATE_HTMLHELP is set to YES.\n\nCHM_INDEX_ENCODING     =\n\n# The BINARY_TOC flag controls whether a binary table of contents is generated\n# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it\n# enables the Previous and Next buttons.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_HTMLHELP is set to YES.\n\nBINARY_TOC             = NO\n\n# The TOC_EXPAND flag can be set to YES to add extra items for group members to\n# the table of contents of the HTML help documentation and to the tree view.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_HTMLHELP is set to YES.\n\nTOC_EXPAND             = NO\n\n# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and\n# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that\n# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help\n# (.qch) of the generated HTML documentation.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nGENERATE_QHP           = NO\n\n# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify\n# the file name of the resulting .qch file. The path specified is relative to\n# the HTML output folder.\n# This tag requires that the tag GENERATE_QHP is set to YES.\n\nQCH_FILE               =\n\n# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help\n# Project output. For more information please see Qt Help Project / Namespace\n# (see:\n# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace).\n# The default value is: org.doxygen.Project.\n# This tag requires that the tag GENERATE_QHP is set to YES.\n\nQHP_NAMESPACE          = org.doxygen.Project\n\n# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt\n# Help Project output. For more information please see Qt Help Project / Virtual\n# Folders (see:\n# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-folders).\n# The default value is: doc.\n# This tag requires that the tag GENERATE_QHP is set to YES.\n\nQHP_VIRTUAL_FOLDER     = doc\n\n# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom\n# filter to add. For more information please see Qt Help Project / Custom\n# Filters (see:\n# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).\n# This tag requires that the tag GENERATE_QHP is set to YES.\n\nQHP_CUST_FILTER_NAME   =\n\n# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the\n# custom filter to add. For more information please see Qt Help Project / Custom\n# Filters (see:\n# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).\n# This tag requires that the tag GENERATE_QHP is set to YES.\n\nQHP_CUST_FILTER_ATTRS  =\n\n# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this\n# project's filter section matches. Qt Help Project / Filter Attributes (see:\n# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes).\n# This tag requires that the tag GENERATE_QHP is set to YES.\n\nQHP_SECT_FILTER_ATTRS  =\n\n# The QHG_LOCATION tag can be used to specify the location (absolute path\n# including file name) of Qt's qhelpgenerator. If non-empty doxygen will try to\n# run qhelpgenerator on the generated .qhp file.\n# This tag requires that the tag GENERATE_QHP is set to YES.\n\nQHG_LOCATION           =\n\n# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be\n# generated, together with the HTML files, they form an Eclipse help plugin. To\n# install this plugin and make it available under the help contents menu in\n# Eclipse, the contents of the directory containing the HTML and XML files needs\n# to be copied into the plugins directory of eclipse. The name of the directory\n# within the plugins directory should be the same as the ECLIPSE_DOC_ID value.\n# After copying Eclipse needs to be restarted before the help appears.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nGENERATE_ECLIPSEHELP   = NO\n\n# A unique identifier for the Eclipse help plugin. When installing the plugin\n# the directory name containing the HTML and XML files should also have this\n# name. Each documentation set should have its own identifier.\n# The default value is: org.doxygen.Project.\n# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES.\n\nECLIPSE_DOC_ID         = org.doxygen.Project\n\n# If you want full control over the layout of the generated HTML pages it might\n# be necessary to disable the index and replace it with your own. The\n# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top\n# of each HTML page. A value of NO enables the index and the value YES disables\n# it. Since the tabs in the index contain the same information as the navigation\n# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nDISABLE_INDEX          = NO\n\n# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index\n# structure should be generated to display hierarchical information. If the tag\n# value is set to YES, a side panel will be generated containing a tree-like\n# index structure (just like the one that is generated for HTML Help). For this\n# to work a browser that supports JavaScript, DHTML, CSS and frames is required\n# (i.e. any modern browser). Windows users are probably better off using the\n# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can\n# further fine-tune the look of the index. As an example, the default style\n# sheet generated by doxygen has an example that shows how to put an image at\n# the root of the tree instead of the PROJECT_NAME. Since the tree basically has\n# the same information as the tab index, you could consider setting\n# DISABLE_INDEX to YES when enabling this option.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nGENERATE_TREEVIEW      = NO\n\n# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that\n# doxygen will group on one line in the generated HTML documentation.\n#\n# Note that a value of 0 will completely suppress the enum values from appearing\n# in the overview section.\n# Minimum value: 0, maximum value: 20, default value: 4.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nENUM_VALUES_PER_LINE   = 4\n\n# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used\n# to set the initial width (in pixels) of the frame in which the tree is shown.\n# Minimum value: 0, maximum value: 1500, default value: 250.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nTREEVIEW_WIDTH         = 250\n\n# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to\n# external symbols imported via tag files in a separate window.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nEXT_LINKS_IN_WINDOW    = NO\n\n# If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg\n# tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see\n# https://inkscape.org) to generate formulas as SVG images instead of PNGs for\n# the HTML output. These images will generally look nicer at scaled resolutions.\n# Possible values are: png (the default) and svg (looks nicer but requires the\n# pdf2svg or inkscape tool).\n# The default value is: png.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nHTML_FORMULA_FORMAT    = png\n\n# Use this tag to change the font size of LaTeX formulas included as images in\n# the HTML documentation. When you change the font size after a successful\n# doxygen run you need to manually remove any form_*.png images from the HTML\n# output directory to force them to be regenerated.\n# Minimum value: 8, maximum value: 50, default value: 10.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nFORMULA_FONTSIZE       = 10\n\n# Use the FORMULA_TRANSPARENT tag to determine whether or not the images\n# generated for formulas are transparent PNGs. Transparent PNGs are not\n# supported properly for IE 6.0, but are supported on all modern browsers.\n#\n# Note that when changing this option you need to delete any form_*.png files in\n# the HTML output directory before the changes have effect.\n# The default value is: YES.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nFORMULA_TRANSPARENT    = YES\n\n# The FORMULA_MACROFILE can contain LaTeX \\newcommand and \\renewcommand commands\n# to create new LaTeX commands to be used in formulas as building blocks. See\n# the section \"Including formulas\" for details.\n\nFORMULA_MACROFILE      =\n\n# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see\n# https://www.mathjax.org) which uses client side JavaScript for the rendering\n# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX\n# installed or if you want to formulas look prettier in the HTML output. When\n# enabled you may also need to install MathJax separately and configure the path\n# to it using the MATHJAX_RELPATH option.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nUSE_MATHJAX            = NO\n\n# When MathJax is enabled you can set the default output format to be used for\n# the MathJax output. See the MathJax site (see:\n# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details.\n# Possible values are: HTML-CSS (which is slower, but has the best\n# compatibility), NativeMML (i.e. MathML) and SVG.\n# The default value is: HTML-CSS.\n# This tag requires that the tag USE_MATHJAX is set to YES.\n\nMATHJAX_FORMAT         = HTML-CSS\n\n# When MathJax is enabled you need to specify the location relative to the HTML\n# output directory using the MATHJAX_RELPATH option. The destination directory\n# should contain the MathJax.js script. For instance, if the mathjax directory\n# is located at the same level as the HTML output directory, then\n# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax\n# Content Delivery Network so you can quickly see the result without installing\n# MathJax. However, it is strongly recommended to install a local copy of\n# MathJax from https://www.mathjax.org before deployment.\n# The default value is: https://cdn.jsdelivr.net/npm/mathjax@2.\n# This tag requires that the tag USE_MATHJAX is set to YES.\n\nMATHJAX_RELPATH        = http://www.mathjax.org/mathjax\n\n# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax\n# extension names that should be enabled during MathJax rendering. For example\n# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols\n# This tag requires that the tag USE_MATHJAX is set to YES.\n\nMATHJAX_EXTENSIONS     =\n\n# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces\n# of code that will be used on startup of the MathJax code. See the MathJax site\n# (see:\n# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. For an\n# example see the documentation.\n# This tag requires that the tag USE_MATHJAX is set to YES.\n\nMATHJAX_CODEFILE       =\n\n# When the SEARCHENGINE tag is enabled doxygen will generate a search box for\n# the HTML output. The underlying search engine uses javascript and DHTML and\n# should work on any modern browser. Note that when using HTML help\n# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET)\n# there is already a search function so this one should typically be disabled.\n# For large projects the javascript based search engine can be slow, then\n# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to\n# search using the keyboard; to jump to the search box use <access key> + S\n# (what the <access key> is depends on the OS and browser, but it is typically\n# <CTRL>, <ALT>/<option>, or both). Inside the search box use the <cursor down\n# key> to jump into the search results window, the results can be navigated\n# using the <cursor keys>. Press <Enter> to select an item or <escape> to cancel\n# the search. The filter options can be selected when the cursor is inside the\n# search box by pressing <Shift>+<cursor down>. Also here use the <cursor keys>\n# to select a filter and <Enter> or <escape> to activate or cancel the filter\n# option.\n# The default value is: YES.\n# This tag requires that the tag GENERATE_HTML is set to YES.\n\nSEARCHENGINE           = YES\n\n# When the SERVER_BASED_SEARCH tag is enabled the search engine will be\n# implemented using a web server instead of a web client using JavaScript. There\n# are two flavors of web server based searching depending on the EXTERNAL_SEARCH\n# setting. When disabled, doxygen will generate a PHP script for searching and\n# an index file used by the script. When EXTERNAL_SEARCH is enabled the indexing\n# and searching needs to be provided by external tools. See the section\n# \"External Indexing and Searching\" for details.\n# The default value is: NO.\n# This tag requires that the tag SEARCHENGINE is set to YES.\n\nSERVER_BASED_SEARCH    = NO\n\n# When EXTERNAL_SEARCH tag is enabled doxygen will no longer generate the PHP\n# script for searching. Instead the search results are written to an XML file\n# which needs to be processed by an external indexer. Doxygen will invoke an\n# external search engine pointed to by the SEARCHENGINE_URL option to obtain the\n# search results.\n#\n# Doxygen ships with an example indexer (doxyindexer) and search engine\n# (doxysearch.cgi) which are based on the open source search engine library\n# Xapian (see:\n# https://xapian.org/).\n#\n# See the section \"External Indexing and Searching\" for details.\n# The default value is: NO.\n# This tag requires that the tag SEARCHENGINE is set to YES.\n\nEXTERNAL_SEARCH        = NO\n\n# The SEARCHENGINE_URL should point to a search engine hosted by a web server\n# which will return the search results when EXTERNAL_SEARCH is enabled.\n#\n# Doxygen ships with an example indexer (doxyindexer) and search engine\n# (doxysearch.cgi) which are based on the open source search engine library\n# Xapian (see:\n# https://xapian.org/). See the section \"External Indexing and Searching\" for\n# details.\n# This tag requires that the tag SEARCHENGINE is set to YES.\n\nSEARCHENGINE_URL       =\n\n# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed\n# search data is written to a file for indexing by an external tool. With the\n# SEARCHDATA_FILE tag the name of this file can be specified.\n# The default file is: searchdata.xml.\n# This tag requires that the tag SEARCHENGINE is set to YES.\n\nSEARCHDATA_FILE        = searchdata.xml\n\n# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the\n# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is\n# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple\n# projects and redirect the results back to the right project.\n# This tag requires that the tag SEARCHENGINE is set to YES.\n\nEXTERNAL_SEARCH_ID     =\n\n# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen\n# projects other than the one defined by this configuration file, but that are\n# all added to the same external search index. Each project needs to have a\n# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id of\n# to a relative location where the documentation can be found. The format is:\n# EXTRA_SEARCH_MAPPINGS = tagname1=loc1 tagname2=loc2 ...\n# This tag requires that the tag SEARCHENGINE is set to YES.\n\nEXTRA_SEARCH_MAPPINGS  =\n\n#---------------------------------------------------------------------------\n# Configuration options related to the LaTeX output\n#---------------------------------------------------------------------------\n\n# If the GENERATE_LATEX tag is set to YES, doxygen will generate LaTeX output.\n# The default value is: YES.\n\nGENERATE_LATEX         = YES\n\n# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a\n# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of\n# it.\n# The default directory is: latex.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nLATEX_OUTPUT           = latex\n\n# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be\n# invoked.\n#\n# Note that when not enabling USE_PDFLATEX the default is latex when enabling\n# USE_PDFLATEX the default is pdflatex and when in the later case latex is\n# chosen this is overwritten by pdflatex. For specific output languages the\n# default can have been set differently, this depends on the implementation of\n# the output language.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nLATEX_CMD_NAME         = latex\n\n# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate\n# index for LaTeX.\n# Note: This tag is used in the Makefile / make.bat.\n# See also: LATEX_MAKEINDEX_CMD for the part in the generated output file\n# (.tex).\n# The default file is: makeindex.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nMAKEINDEX_CMD_NAME     = makeindex\n\n# The LATEX_MAKEINDEX_CMD tag can be used to specify the command name to\n# generate index for LaTeX. In case there is no backslash (\\) as first character\n# it will be automatically added in the LaTeX code.\n# Note: This tag is used in the generated output file (.tex).\n# See also: MAKEINDEX_CMD_NAME for the part in the Makefile / make.bat.\n# The default value is: makeindex.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nLATEX_MAKEINDEX_CMD    = makeindex\n\n# If the COMPACT_LATEX tag is set to YES, doxygen generates more compact LaTeX\n# documents. This may be useful for small projects and may help to save some\n# trees in general.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nCOMPACT_LATEX          = NO\n\n# The PAPER_TYPE tag can be used to set the paper type that is used by the\n# printer.\n# Possible values are: a4 (210 x 297 mm), letter (8.5 x 11 inches), legal (8.5 x\n# 14 inches) and executive (7.25 x 10.5 inches).\n# The default value is: a4.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nPAPER_TYPE             = a4\n\n# The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names\n# that should be included in the LaTeX output. The package can be specified just\n# by its name or with the correct syntax as to be used with the LaTeX\n# \\usepackage command. To get the times font for instance you can specify :\n# EXTRA_PACKAGES=times or EXTRA_PACKAGES={times}\n# To use the option intlimits with the amsmath package you can specify:\n# EXTRA_PACKAGES=[intlimits]{amsmath}\n# If left blank no extra packages will be included.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nEXTRA_PACKAGES         =\n\n# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the\n# generated LaTeX document. The header should contain everything until the first\n# chapter. If it is left blank doxygen will generate a standard header. See\n# section \"Doxygen usage\" for information on how to let doxygen write the\n# default header to a separate file.\n#\n# Note: Only use a user-defined header if you know what you are doing! The\n# following commands have a special meaning inside the header: $title,\n# $datetime, $date, $doxygenversion, $projectname, $projectnumber,\n# $projectbrief, $projectlogo. Doxygen will replace $title with the empty\n# string, for the replacement values of the other commands the user is referred\n# to HTML_HEADER.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nLATEX_HEADER           =\n\n# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the\n# generated LaTeX document. The footer should contain everything after the last\n# chapter. If it is left blank doxygen will generate a standard footer. See\n# LATEX_HEADER for more information on how to generate a default footer and what\n# special commands can be used inside the footer.\n#\n# Note: Only use a user-defined footer if you know what you are doing!\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nLATEX_FOOTER           =\n\n# The LATEX_EXTRA_STYLESHEET tag can be used to specify additional user-defined\n# LaTeX style sheets that are included after the standard style sheets created\n# by doxygen. Using this option one can overrule certain style aspects. Doxygen\n# will copy the style sheet files to the output directory.\n# Note: The order of the extra style sheet files is of importance (e.g. the last\n# style sheet in the list overrules the setting of the previous ones in the\n# list).\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nLATEX_EXTRA_STYLESHEET =\n\n# The LATEX_EXTRA_FILES tag can be used to specify one or more extra images or\n# other source files which should be copied to the LATEX_OUTPUT output\n# directory. Note that the files will be copied as-is; there are no commands or\n# markers available.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nLATEX_EXTRA_FILES      =\n\n# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated is\n# prepared for conversion to PDF (using ps2pdf or pdflatex). The PDF file will\n# contain links (just like the HTML output) instead of page references. This\n# makes the output suitable for online browsing using a PDF viewer.\n# The default value is: YES.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nPDF_HYPERLINKS         = YES\n\n# If the USE_PDFLATEX tag is set to YES, doxygen will use the engine as\n# specified with LATEX_CMD_NAME to generate the PDF file directly from the LaTeX\n# files. Set this option to YES, to get a higher quality PDF documentation.\n#\n# See also section LATEX_CMD_NAME for selecting the engine.\n# The default value is: YES.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nUSE_PDFLATEX           = YES\n\n# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode\n# command to the generated LaTeX files. This will instruct LaTeX to keep running\n# if errors occur, instead of asking the user for help. This option is also used\n# when generating formulas in HTML.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nLATEX_BATCHMODE        = NO\n\n# If the LATEX_HIDE_INDICES tag is set to YES then doxygen will not include the\n# index chapters (such as File Index, Compound Index, etc.) in the output.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nLATEX_HIDE_INDICES     = NO\n\n# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source\n# code with syntax highlighting in the LaTeX output.\n#\n# Note that which sources are shown also depends on other settings such as\n# SOURCE_BROWSER.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nLATEX_SOURCE_CODE      = NO\n\n# The LATEX_BIB_STYLE tag can be used to specify the style to use for the\n# bibliography, e.g. plainnat, or ieeetr. See\n# https://en.wikipedia.org/wiki/BibTeX and \\cite for more info.\n# The default value is: plain.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nLATEX_BIB_STYLE        = plain\n\n# If the LATEX_TIMESTAMP tag is set to YES then the footer of each generated\n# page will contain the date and time when the page was generated. Setting this\n# to NO can help when comparing the output of multiple runs.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nLATEX_TIMESTAMP        = NO\n\n# The LATEX_EMOJI_DIRECTORY tag is used to specify the (relative or absolute)\n# path from which the emoji images will be read. If a relative path is entered,\n# it will be relative to the LATEX_OUTPUT directory. If left blank the\n# LATEX_OUTPUT directory will be used.\n# This tag requires that the tag GENERATE_LATEX is set to YES.\n\nLATEX_EMOJI_DIRECTORY  =\n\n#---------------------------------------------------------------------------\n# Configuration options related to the RTF output\n#---------------------------------------------------------------------------\n\n# If the GENERATE_RTF tag is set to YES, doxygen will generate RTF output. The\n# RTF output is optimized for Word 97 and may not look too pretty with other RTF\n# readers/editors.\n# The default value is: NO.\n\nGENERATE_RTF           = NO\n\n# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. If a\n# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of\n# it.\n# The default directory is: rtf.\n# This tag requires that the tag GENERATE_RTF is set to YES.\n\nRTF_OUTPUT             = rtf\n\n# If the COMPACT_RTF tag is set to YES, doxygen generates more compact RTF\n# documents. This may be useful for small projects and may help to save some\n# trees in general.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_RTF is set to YES.\n\nCOMPACT_RTF            = NO\n\n# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated will\n# contain hyperlink fields. The RTF file will contain links (just like the HTML\n# output) instead of page references. This makes the output suitable for online\n# browsing using Word or some other Word compatible readers that support those\n# fields.\n#\n# Note: WordPad (write) and others do not support links.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_RTF is set to YES.\n\nRTF_HYPERLINKS         = NO\n\n# Load stylesheet definitions from file. Syntax is similar to doxygen's\n# configuration file, i.e. a series of assignments. You only have to provide\n# replacements, missing definitions are set to their default value.\n#\n# See also section \"Doxygen usage\" for information on how to generate the\n# default style sheet that doxygen normally uses.\n# This tag requires that the tag GENERATE_RTF is set to YES.\n\nRTF_STYLESHEET_FILE    =\n\n# Set optional variables used in the generation of an RTF document. Syntax is\n# similar to doxygen's configuration file. A template extensions file can be\n# generated using doxygen -e rtf extensionFile.\n# This tag requires that the tag GENERATE_RTF is set to YES.\n\nRTF_EXTENSIONS_FILE    =\n\n# If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code\n# with syntax highlighting in the RTF output.\n#\n# Note that which sources are shown also depends on other settings such as\n# SOURCE_BROWSER.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_RTF is set to YES.\n\nRTF_SOURCE_CODE        = NO\n\n#---------------------------------------------------------------------------\n# Configuration options related to the man page output\n#---------------------------------------------------------------------------\n\n# If the GENERATE_MAN tag is set to YES, doxygen will generate man pages for\n# classes and files.\n# The default value is: NO.\n\nGENERATE_MAN           = NO\n\n# The MAN_OUTPUT tag is used to specify where the man pages will be put. If a\n# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of\n# it. A directory man3 will be created inside the directory specified by\n# MAN_OUTPUT.\n# The default directory is: man.\n# This tag requires that the tag GENERATE_MAN is set to YES.\n\nMAN_OUTPUT             = man\n\n# The MAN_EXTENSION tag determines the extension that is added to the generated\n# man pages. In case the manual section does not start with a number, the number\n# 3 is prepended. The dot (.) at the beginning of the MAN_EXTENSION tag is\n# optional.\n# The default value is: .3.\n# This tag requires that the tag GENERATE_MAN is set to YES.\n\nMAN_EXTENSION          = .3\n\n# The MAN_SUBDIR tag determines the name of the directory created within\n# MAN_OUTPUT in which the man pages are placed. If defaults to man followed by\n# MAN_EXTENSION with the initial . removed.\n# This tag requires that the tag GENERATE_MAN is set to YES.\n\nMAN_SUBDIR             =\n\n# If the MAN_LINKS tag is set to YES and doxygen generates man output, then it\n# will generate one additional man file for each entity documented in the real\n# man page(s). These additional files only source the real man page, but without\n# them the man command would be unable to find the correct page.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_MAN is set to YES.\n\nMAN_LINKS              = NO\n\n#---------------------------------------------------------------------------\n# Configuration options related to the XML output\n#---------------------------------------------------------------------------\n\n# If the GENERATE_XML tag is set to YES, doxygen will generate an XML file that\n# captures the structure of the code including all documentation.\n# The default value is: NO.\n\nGENERATE_XML           = YES\n\n# The XML_OUTPUT tag is used to specify where the XML pages will be put. If a\n# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of\n# it.\n# The default directory is: xml.\n# This tag requires that the tag GENERATE_XML is set to YES.\n\nXML_OUTPUT             = xml\n\n# If the XML_PROGRAMLISTING tag is set to YES, doxygen will dump the program\n# listings (including syntax highlighting and cross-referencing information) to\n# the XML output. Note that enabling this will significantly increase the size\n# of the XML output.\n# The default value is: YES.\n# This tag requires that the tag GENERATE_XML is set to YES.\n\nXML_PROGRAMLISTING     = YES\n\n# If the XML_NS_MEMB_FILE_SCOPE tag is set to YES, doxygen will include\n# namespace members in file scope as well, matching the HTML output.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_XML is set to YES.\n\nXML_NS_MEMB_FILE_SCOPE = NO\n\n#---------------------------------------------------------------------------\n# Configuration options related to the DOCBOOK output\n#---------------------------------------------------------------------------\n\n# If the GENERATE_DOCBOOK tag is set to YES, doxygen will generate Docbook files\n# that can be used to generate PDF.\n# The default value is: NO.\n\nGENERATE_DOCBOOK       = NO\n\n# The DOCBOOK_OUTPUT tag is used to specify where the Docbook pages will be put.\n# If a relative path is entered the value of OUTPUT_DIRECTORY will be put in\n# front of it.\n# The default directory is: docbook.\n# This tag requires that the tag GENERATE_DOCBOOK is set to YES.\n\nDOCBOOK_OUTPUT         = docbook\n\n# If the DOCBOOK_PROGRAMLISTING tag is set to YES, doxygen will include the\n# program listings (including syntax highlighting and cross-referencing\n# information) to the DOCBOOK output. Note that enabling this will significantly\n# increase the size of the DOCBOOK output.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_DOCBOOK is set to YES.\n\nDOCBOOK_PROGRAMLISTING = NO\n\n#---------------------------------------------------------------------------\n# Configuration options for the AutoGen Definitions output\n#---------------------------------------------------------------------------\n\n# If the GENERATE_AUTOGEN_DEF tag is set to YES, doxygen will generate an\n# AutoGen Definitions (see http://autogen.sourceforge.net/) file that captures\n# the structure of the code including all documentation. Note that this feature\n# is still experimental and incomplete at the moment.\n# The default value is: NO.\n\nGENERATE_AUTOGEN_DEF   = NO\n\n#---------------------------------------------------------------------------\n# Configuration options related to the Perl module output\n#---------------------------------------------------------------------------\n\n# If the GENERATE_PERLMOD tag is set to YES, doxygen will generate a Perl module\n# file that captures the structure of the code including all documentation.\n#\n# Note that this feature is still experimental and incomplete at the moment.\n# The default value is: NO.\n\nGENERATE_PERLMOD       = NO\n\n# If the PERLMOD_LATEX tag is set to YES, doxygen will generate the necessary\n# Makefile rules, Perl scripts and LaTeX code to be able to generate PDF and DVI\n# output from the Perl module output.\n# The default value is: NO.\n# This tag requires that the tag GENERATE_PERLMOD is set to YES.\n\nPERLMOD_LATEX          = NO\n\n# If the PERLMOD_PRETTY tag is set to YES, the Perl module output will be nicely\n# formatted so it can be parsed by a human reader. This is useful if you want to\n# understand what is going on. On the other hand, if this tag is set to NO, the\n# size of the Perl module output will be much smaller and Perl will parse it\n# just the same.\n# The default value is: YES.\n# This tag requires that the tag GENERATE_PERLMOD is set to YES.\n\nPERLMOD_PRETTY         = YES\n\n# The names of the make variables in the generated doxyrules.make file are\n# prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. This is useful\n# so different doxyrules.make files included by the same Makefile don't\n# overwrite each other's variables.\n# This tag requires that the tag GENERATE_PERLMOD is set to YES.\n\nPERLMOD_MAKEVAR_PREFIX =\n\n#---------------------------------------------------------------------------\n# Configuration options related to the preprocessor\n#---------------------------------------------------------------------------\n\n# If the ENABLE_PREPROCESSING tag is set to YES, doxygen will evaluate all\n# C-preprocessor directives found in the sources and include files.\n# The default value is: YES.\n\nENABLE_PREPROCESSING   = YES\n\n# If the MACRO_EXPANSION tag is set to YES, doxygen will expand all macro names\n# in the source code. If set to NO, only conditional compilation will be\n# performed. Macro expansion can be done in a controlled way by setting\n# EXPAND_ONLY_PREDEF to YES.\n# The default value is: NO.\n# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.\n\nMACRO_EXPANSION        = YES\n\n# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then\n# the macro expansion is limited to the macros specified with the PREDEFINED and\n# EXPAND_AS_DEFINED tags.\n# The default value is: NO.\n# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.\n\nEXPAND_ONLY_PREDEF     = YES\n\n# If the SEARCH_INCLUDES tag is set to YES, the include files in the\n# INCLUDE_PATH will be searched if a #include is found.\n# The default value is: YES.\n# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.\n\nSEARCH_INCLUDES        = YES\n\n# The INCLUDE_PATH tag can be used to specify one or more directories that\n# contain include files that are not input files but should be processed by the\n# preprocessor.\n# This tag requires that the tag SEARCH_INCLUDES is set to YES.\n\nINCLUDE_PATH           =\n\n# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard\n# patterns (like *.h and *.hpp) to filter out the header-files in the\n# directories. If left blank, the patterns specified with FILE_PATTERNS will be\n# used.\n# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.\n\nINCLUDE_FILE_PATTERNS  =\n\n# The PREDEFINED tag can be used to specify one or more macro names that are\n# defined before the preprocessor is started (similar to the -D option of e.g.\n# gcc). The argument of the tag is a list of macros of the form: name or\n# name=definition (no spaces). If the definition and the \"=\" are omitted, \"=1\"\n# is assumed. To prevent a macro definition from being undefined via #undef or\n# recursively expanded use the := operator instead of the = operator.\n# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.\n\nPREDEFINED             = DMLC_USE_CXX11 \\\n                         XGB_DLL= \\\n                         XGB_EXTERN_C=\n\n# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this\n# tag can be used to specify a list of macro names that should be expanded. The\n# macro definition that is found in the sources will be used. Use the PREDEFINED\n# tag if you want to use a different macro definition that overrules the\n# definition found in the source code.\n# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.\n\nEXPAND_AS_DEFINED      =\n\n# If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will\n# remove all references to function-like macros that are alone on a line, have\n# an all uppercase name, and do not end with a semicolon. Such function macros\n# are typically used for boiler-plate code, and will confuse the parser if not\n# removed.\n# The default value is: YES.\n# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.\n\nSKIP_FUNCTION_MACROS   = YES\n\n#---------------------------------------------------------------------------\n# Configuration options related to external references\n#---------------------------------------------------------------------------\n\n# The TAGFILES tag can be used to specify one or more tag files. For each tag\n# file the location of the external documentation should be added. The format of\n# a tag file without this location is as follows:\n# TAGFILES = file1 file2 ...\n# Adding location for the tag files is done as follows:\n# TAGFILES = file1=loc1 \"file2 = loc2\" ...\n# where loc1 and loc2 can be relative or absolute paths or URLs. See the\n# section \"Linking to external documentation\" for more information about the use\n# of tag files.\n# Note: Each tag file must have a unique name (where the name does NOT include\n# the path). If a tag file is not located in the directory in which doxygen is\n# run, you must also specify the path to the tagfile here.\n\nTAGFILES               =\n\n# When a file name is specified after GENERATE_TAGFILE, doxygen will create a\n# tag file that is based on the input files it reads. See section \"Linking to\n# external documentation\" for more information about the usage of tag files.\n\nGENERATE_TAGFILE       =\n\n# If the ALLEXTERNALS tag is set to YES, all external class will be listed in\n# the class index. If set to NO, only the inherited external classes will be\n# listed.\n# The default value is: NO.\n\nALLEXTERNALS           = NO\n\n# If the EXTERNAL_GROUPS tag is set to YES, all external groups will be listed\n# in the modules index. If set to NO, only the current project's groups will be\n# listed.\n# The default value is: YES.\n\nEXTERNAL_GROUPS        = YES\n\n# If the EXTERNAL_PAGES tag is set to YES, all external pages will be listed in\n# the related pages index. If set to NO, only the current project's pages will\n# be listed.\n# The default value is: YES.\n\nEXTERNAL_PAGES         = YES\n\n#---------------------------------------------------------------------------\n# Configuration options related to the dot tool\n#---------------------------------------------------------------------------\n\n# If the CLASS_DIAGRAMS tag is set to YES, doxygen will generate a class diagram\n# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to\n# NO turns the diagrams off. Note that this option also works with HAVE_DOT\n# disabled, but it is recommended to install and use dot, since it yields more\n# powerful graphs.\n# The default value is: YES.\n\nCLASS_DIAGRAMS         = YES\n\n# You can include diagrams made with dia in doxygen documentation. Doxygen will\n# then run dia to produce the diagram and insert it in the documentation. The\n# DIA_PATH tag allows you to specify the directory where the dia binary resides.\n# If left empty dia is assumed to be found in the default search path.\n\nDIA_PATH               =\n\n# If set to YES the inheritance and collaboration graphs will hide inheritance\n# and usage relations if the target is undocumented or is not a class.\n# The default value is: YES.\n\nHIDE_UNDOC_RELATIONS   = YES\n\n# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is\n# available from the path. This tool is part of Graphviz (see:\n# http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent\n# Bell Labs. The other options in this section have no effect if this option is\n# set to NO\n# The default value is: YES.\n\nHAVE_DOT               = YES\n\n# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is allowed\n# to run in parallel. When set to 0 doxygen will base this on the number of\n# processors available in the system. You can set it explicitly to a value\n# larger than 0 to get control over the balance between CPU load and processing\n# speed.\n# Minimum value: 0, maximum value: 32, default value: 0.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nDOT_NUM_THREADS        = 0\n\n# When you want a differently looking font in the dot files that doxygen\n# generates you can specify the font name using DOT_FONTNAME. You need to make\n# sure dot is able to find the font, which can be done by putting it in a\n# standard location or by setting the DOTFONTPATH environment variable or by\n# setting DOT_FONTPATH to the directory containing the font.\n# The default value is: Helvetica.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nDOT_FONTNAME           = Helvetica\n\n# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of\n# dot graphs.\n# Minimum value: 4, maximum value: 24, default value: 10.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nDOT_FONTSIZE           = 10\n\n# By default doxygen will tell dot to use the default font as specified with\n# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set\n# the path where dot can find it using this tag.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nDOT_FONTPATH           =\n\n# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for\n# each documented class showing the direct and indirect inheritance relations.\n# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO.\n# The default value is: YES.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nCLASS_GRAPH            = YES\n\n# If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a\n# graph for each documented class showing the direct and indirect implementation\n# dependencies (inheritance, containment, and class references variables) of the\n# class with other documented classes.\n# The default value is: YES.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nCOLLABORATION_GRAPH    = YES\n\n# If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for\n# groups, showing the direct groups dependencies.\n# The default value is: YES.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nGROUP_GRAPHS           = YES\n\n# If the UML_LOOK tag is set to YES, doxygen will generate inheritance and\n# collaboration diagrams in a style similar to the OMG's Unified Modeling\n# Language.\n# The default value is: NO.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nUML_LOOK               = YES\n\n# If the UML_LOOK tag is enabled, the fields and methods are shown inside the\n# class node. If there are many fields or methods and many nodes the graph may\n# become too big to be useful. The UML_LIMIT_NUM_FIELDS threshold limits the\n# number of items for each type to make the size more manageable. Set this to 0\n# for no limit. Note that the threshold may be exceeded by 50% before the limit\n# is enforced. So when you set the threshold to 10, up to 15 fields may appear,\n# but if the number exceeds 15, the total amount of fields shown is limited to\n# 10.\n# Minimum value: 0, maximum value: 100, default value: 10.\n# This tag requires that the tag UML_LOOK is set to YES.\n\nUML_LIMIT_NUM_FIELDS   = 10\n\n# If the DOT_UML_DETAILS tag is set to NO, doxygen will show attributes and\n# methods without types and arguments in the UML graphs. If the DOT_UML_DETAILS\n# tag is set to YES, doxygen will add type and arguments for attributes and\n# methods in the UML graphs. If the DOT_UML_DETAILS tag is set to NONE, doxygen\n# will not generate fields with class member information in the UML graphs. The\n# class diagrams will look similar to the default class diagrams but using UML\n# notation for the relationships.\n# Possible values are: NO, YES and NONE.\n# The default value is: NO.\n# This tag requires that the tag UML_LOOK is set to YES.\n\nDOT_UML_DETAILS        = NO\n\n# The DOT_WRAP_THRESHOLD tag can be used to set the maximum number of characters\n# to display on a single line. If the actual line length exceeds this threshold\n# significantly it will wrapped across multiple lines. Some heuristics are apply\n# to avoid ugly line breaks.\n# Minimum value: 0, maximum value: 1000, default value: 17.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nDOT_WRAP_THRESHOLD     = 17\n\n# If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and\n# collaboration graphs will show the relations between templates and their\n# instances.\n# The default value is: NO.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nTEMPLATE_RELATIONS     = NO\n\n# If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to\n# YES then doxygen will generate a graph for each documented file showing the\n# direct and indirect include dependencies of the file with other documented\n# files.\n# The default value is: YES.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nINCLUDE_GRAPH          = YES\n\n# If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are\n# set to YES then doxygen will generate a graph for each documented file showing\n# the direct and indirect include dependencies of the file with other documented\n# files.\n# The default value is: YES.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nINCLUDED_BY_GRAPH      = YES\n\n# If the CALL_GRAPH tag is set to YES then doxygen will generate a call\n# dependency graph for every global function or class method.\n#\n# Note that enabling this option will significantly increase the time of a run.\n# So in most cases it will be better to enable call graphs for selected\n# functions only using the \\callgraph command. Disabling a call graph can be\n# accomplished by means of the command \\hidecallgraph.\n# The default value is: NO.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nCALL_GRAPH             = NO\n\n# If the CALLER_GRAPH tag is set to YES then doxygen will generate a caller\n# dependency graph for every global function or class method.\n#\n# Note that enabling this option will significantly increase the time of a run.\n# So in most cases it will be better to enable caller graphs for selected\n# functions only using the \\callergraph command. Disabling a caller graph can be\n# accomplished by means of the command \\hidecallergraph.\n# The default value is: NO.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nCALLER_GRAPH           = NO\n\n# If the GRAPHICAL_HIERARCHY tag is set to YES then doxygen will graphical\n# hierarchy of all classes instead of a textual one.\n# The default value is: YES.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nGRAPHICAL_HIERARCHY    = YES\n\n# If the DIRECTORY_GRAPH tag is set to YES then doxygen will show the\n# dependencies a directory has on other directories in a graphical way. The\n# dependency relations are determined by the #include relations between the\n# files in the directories.\n# The default value is: YES.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nDIRECTORY_GRAPH        = YES\n\n# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images\n# generated by dot. For an explanation of the image formats see the section\n# output formats in the documentation of the dot tool (Graphviz (see:\n# http://www.graphviz.org/)).\n# Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order\n# to make the SVG files visible in IE 9+ (other browsers do not have this\n# requirement).\n# Possible values are: png, png:cairo, png:cairo:cairo, png:cairo:gd, png:gd,\n# png:gd:gd, jpg, jpg:cairo, jpg:cairo:gd, jpg:gd, jpg:gd:gd, gif, gif:cairo,\n# gif:cairo:gd, gif:gd, gif:gd:gd, svg, png:gd, png:gd:gd, png:cairo,\n# png:cairo:gd, png:cairo:cairo, png:cairo:gdiplus, png:gdiplus and\n# png:gdiplus:gdiplus.\n# The default value is: png.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nDOT_IMAGE_FORMAT       = png\n\n# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to\n# enable generation of interactive SVG images that allow zooming and panning.\n#\n# Note that this requires a modern browser other than Internet Explorer. Tested\n# and working are Firefox, Chrome, Safari, and Opera.\n# Note: For IE 9+ you need to set HTML_FILE_EXTENSION to xhtml in order to make\n# the SVG files visible. Older versions of IE do not have SVG support.\n# The default value is: NO.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nINTERACTIVE_SVG        = NO\n\n# The DOT_PATH tag can be used to specify the path where the dot tool can be\n# found. If left blank, it is assumed the dot tool can be found in the path.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nDOT_PATH               =\n\n# The DOTFILE_DIRS tag can be used to specify one or more directories that\n# contain dot files that are included in the documentation (see the \\dotfile\n# command).\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nDOTFILE_DIRS           =\n\n# The MSCFILE_DIRS tag can be used to specify one or more directories that\n# contain msc files that are included in the documentation (see the \\mscfile\n# command).\n\nMSCFILE_DIRS           =\n\n# The DIAFILE_DIRS tag can be used to specify one or more directories that\n# contain dia files that are included in the documentation (see the \\diafile\n# command).\n\nDIAFILE_DIRS           =\n\n# When using plantuml, the PLANTUML_JAR_PATH tag should be used to specify the\n# path where java can find the plantuml.jar file. If left blank, it is assumed\n# PlantUML is not used or called during a preprocessing step. Doxygen will\n# generate a warning when it encounters a \\startuml command in this case and\n# will not generate output for the diagram.\n\nPLANTUML_JAR_PATH      =\n\n# When using plantuml, the PLANTUML_CFG_FILE tag can be used to specify a\n# configuration file for plantuml.\n\nPLANTUML_CFG_FILE      =\n\n# When using plantuml, the specified paths are searched for files specified by\n# the !include statement in a plantuml block.\n\nPLANTUML_INCLUDE_PATH  =\n\n# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of nodes\n# that will be shown in the graph. If the number of nodes in a graph becomes\n# larger than this value, doxygen will truncate the graph, which is visualized\n# by representing a node as a red box. Note that doxygen if the number of direct\n# children of the root node in a graph is already larger than\n# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note that\n# the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.\n# Minimum value: 0, maximum value: 10000, default value: 50.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nDOT_GRAPH_MAX_NODES    = 50\n\n# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs\n# generated by dot. A depth value of 3 means that only nodes reachable from the\n# root by following a path via at most 3 edges will be shown. Nodes that lay\n# further from the root node will be omitted. Note that setting this option to 1\n# or 2 may greatly reduce the computation time needed for large code bases. Also\n# note that the size of a graph can be further restricted by\n# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.\n# Minimum value: 0, maximum value: 1000, default value: 0.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nMAX_DOT_GRAPH_DEPTH    = 0\n\n# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent\n# background. This is disabled by default, because dot on Windows does not seem\n# to support this out of the box.\n#\n# Warning: Depending on the platform used, enabling this option may lead to\n# badly anti-aliased labels on the edges of a graph (i.e. they become hard to\n# read).\n# The default value is: NO.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nDOT_TRANSPARENT        = NO\n\n# Set the DOT_MULTI_TARGETS tag to YES to allow dot to generate multiple output\n# files in one run (i.e. multiple -o and -T options on the command line). This\n# makes dot run faster, but since only newer versions of dot (>1.8.10) support\n# this, this feature is disabled by default.\n# The default value is: NO.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nDOT_MULTI_TARGETS      = YES\n\n# If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page\n# explaining the meaning of the various boxes and arrows in the dot generated\n# graphs.\n# The default value is: YES.\n# This tag requires that the tag HAVE_DOT is set to YES.\n\nGENERATE_LEGEND        = YES\n\n# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate\n# files that are used to generate the various graphs.\n#\n# Note: This setting is not only used for dot files but also for msc and\n# plantuml temporary files.\n# The default value is: YES.\n\nDOT_CLEANUP            = YES\n"
  },
  {
    "path": "doc/Makefile",
    "content": "# Makefile for Sphinx documentation\n#\n\n# You can set these variables from the command line.\nSPHINXOPTS    =\nSPHINXBUILD   = sphinx-build\nPAPER         =\nBUILDDIR      = _build\n\n# User-friendly check for sphinx-build\nifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)\n$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)\nendif\n\n# Internal variables.\nPAPEROPT_a4     = -D latex_paper_size=a4\nPAPEROPT_letter = -D latex_paper_size=letter\nALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .\n# the i18n builder cannot share the environment and doctrees with the others\nI18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .\n\n.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext\n\nhelp:\n\t@echo \"Please use \\`make <target>' where <target> is one of\"\n\t@echo \"  html       to make standalone HTML files\"\n\t@echo \"  dirhtml    to make HTML files named index.html in directories\"\n\t@echo \"  singlehtml to make a single large HTML file\"\n\t@echo \"  pickle     to make pickle files\"\n\t@echo \"  json       to make JSON files\"\n\t@echo \"  htmlhelp   to make HTML files and a HTML help project\"\n\t@echo \"  qthelp     to make HTML files and a qthelp project\"\n\t@echo \"  applehelp  to make an Apple Help Book\"\n\t@echo \"  devhelp    to make HTML files and a Devhelp project\"\n\t@echo \"  epub       to make an epub\"\n\t@echo \"  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter\"\n\t@echo \"  latexpdf   to make LaTeX files and run them through pdflatex\"\n\t@echo \"  latexpdfja to make LaTeX files and run them through platex/dvipdfmx\"\n\t@echo \"  text       to make text files\"\n\t@echo \"  man        to make manual pages\"\n\t@echo \"  texinfo    to make Texinfo files\"\n\t@echo \"  info       to make Texinfo files and run them through makeinfo\"\n\t@echo \"  gettext    to make PO message catalogs\"\n\t@echo \"  changes    to make an overview of all changed/added/deprecated items\"\n\t@echo \"  xml        to make Docutils-native XML files\"\n\t@echo \"  pseudoxml  to make pseudoxml-XML files for display purposes\"\n\t@echo \"  linkcheck  to check all external links for integrity\"\n\t@echo \"  doctest    to run all doctests embedded in the documentation (if enabled)\"\n\t@echo \"  coverage   to run coverage check of the documentation (if enabled)\"\n\nclean:\n\trm -rf $(BUILDDIR)/*\n\nhtml:\n\t$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html\n\t@echo\n\t@echo \"Build finished. The HTML pages are in $(BUILDDIR)/html.\"\n\ndirhtml:\n\t$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml\n\t@echo\n\t@echo \"Build finished. The HTML pages are in $(BUILDDIR)/dirhtml.\"\n\nsinglehtml:\n\t$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml\n\t@echo\n\t@echo \"Build finished. The HTML page is in $(BUILDDIR)/singlehtml.\"\n\npickle:\n\t$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle\n\t@echo\n\t@echo \"Build finished; now you can process the pickle files.\"\n\njson:\n\t$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json\n\t@echo\n\t@echo \"Build finished; now you can process the JSON files.\"\n\nhtmlhelp:\n\t$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp\n\t@echo\n\t@echo \"Build finished; now you can run HTML Help Workshop with the\" \\\n\t      \".hhp project file in $(BUILDDIR)/htmlhelp.\"\n\nqthelp:\n\t$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp\n\t@echo\n\t@echo \"Build finished; now you can run \"qcollectiongenerator\" with the\" \\\n\t      \".qhcp project file in $(BUILDDIR)/qthelp, like this:\"\n\t@echo \"# qcollectiongenerator $(BUILDDIR)/qthelp/rabit.qhcp\"\n\t@echo \"To view the help file:\"\n\t@echo \"# assistant -collectionFile $(BUILDDIR)/qthelp/rabit.qhc\"\n\napplehelp:\n\t$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp\n\t@echo\n\t@echo \"Build finished. The help book is in $(BUILDDIR)/applehelp.\"\n\t@echo \"N.B. You won't be able to view it unless you put it in\" \\\n\t      \"~/Library/Documentation/Help or install it in your application\" \\\n\t      \"bundle.\"\n\ndevhelp:\n\t$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp\n\t@echo\n\t@echo \"Build finished.\"\n\t@echo \"To view the help file:\"\n\t@echo \"# mkdir -p $$HOME/.local/share/devhelp/rabit\"\n\t@echo \"# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/rabit\"\n\t@echo \"# devhelp\"\n\nepub:\n\t$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub\n\t@echo\n\t@echo \"Build finished. The epub file is in $(BUILDDIR)/epub.\"\n\nlatex:\n\t$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex\n\t@echo\n\t@echo \"Build finished; the LaTeX files are in $(BUILDDIR)/latex.\"\n\t@echo \"Run \\`make' in that directory to run these through (pdf)latex\" \\\n\t      \"(use \\`make latexpdf' here to do that automatically).\"\n\nlatexpdf:\n\t$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex\n\t@echo \"Running LaTeX files through pdflatex...\"\n\t$(MAKE) -C $(BUILDDIR)/latex all-pdf\n\t@echo \"pdflatex finished; the PDF files are in $(BUILDDIR)/latex.\"\n\nlatexpdfja:\n\t$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex\n\t@echo \"Running LaTeX files through platex and dvipdfmx...\"\n\t$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja\n\t@echo \"pdflatex finished; the PDF files are in $(BUILDDIR)/latex.\"\n\ntext:\n\t$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text\n\t@echo\n\t@echo \"Build finished. The text files are in $(BUILDDIR)/text.\"\n\nman:\n\t$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man\n\t@echo\n\t@echo \"Build finished. The manual pages are in $(BUILDDIR)/man.\"\n\ntexinfo:\n\t$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo\n\t@echo\n\t@echo \"Build finished. The Texinfo files are in $(BUILDDIR)/texinfo.\"\n\t@echo \"Run \\`make' in that directory to run these through makeinfo\" \\\n\t      \"(use \\`make info' here to do that automatically).\"\n\ninfo:\n\t$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo\n\t@echo \"Running Texinfo files through makeinfo...\"\n\tmake -C $(BUILDDIR)/texinfo info\n\t@echo \"makeinfo finished; the Info files are in $(BUILDDIR)/texinfo.\"\n\ngettext:\n\t$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale\n\t@echo\n\t@echo \"Build finished. The message catalogs are in $(BUILDDIR)/locale.\"\n\nchanges:\n\t$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes\n\t@echo\n\t@echo \"The overview file is in $(BUILDDIR)/changes.\"\n\nlinkcheck:\n\t$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck\n\t@echo\n\t@echo \"Link check complete; look for any errors in the above output \" \\\n\t      \"or in $(BUILDDIR)/linkcheck/output.txt.\"\n\ndoctest:\n\t$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest\n\t@echo \"Testing of doctests in the sources finished, look at the \" \\\n\t      \"results in $(BUILDDIR)/doctest/output.txt.\"\n\ncoverage:\n\t$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage\n\t@echo \"Testing of coverage in the sources finished, look at the \" \\\n\t      \"results in $(BUILDDIR)/coverage/python.txt.\"\n\nxml:\n\t$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml\n\t@echo\n\t@echo \"Build finished. The XML files are in $(BUILDDIR)/xml.\"\n\npseudoxml:\n\t$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml\n\t@echo\n\t@echo \"Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml.\"\n"
  },
  {
    "path": "doc/R-package/.gitignore",
    "content": "*~\n*.md\n"
  },
  {
    "path": "doc/R-package/Makefile",
    "content": "# This is the makefile for compiling Rmarkdown files into the md file with results.\nPKGROOT=../../R-package\n\n# ADD The Markdown to be built here, with suffix md\nxgboostfromJSON.md: $(PKGROOT)/vignettes/xgboostfromJSON.Rmd\nxgboost_introduction.md: $(PKGROOT)/vignettes/xgboost_introduction.Rmd\n\nall: xgboostfromJSON.md xgboost_introduction.md\n\n# General Rules for build rmarkdowns, need knitr\n%.md:\n\tRscript -e \\\n\t\"require(methods);\"\\\n\t\"require(knitr);\"\\\n\t\"knitr::opts_knit\\$$set(root.dir=\\\".\\\");\"\\\n\t\"knitr::opts_chunk\\$$set(fig.path=\\\"../web-data/xgboost/knitr/$(basename $@)-\\\");\"\\\n\t\"knitr::knit(\\\"$+\\\")\"\n"
  },
  {
    "path": "doc/R-package/adding_parameters.rst",
    "content": "Developer guide: parameters from core library\n=============================================\n\nThe XGBoost core library accepts a long list of input parameters (e.g. ``max_depth`` for decision trees, regularization, ``device`` where compute happens, etc.). New parameters are constantly being added as XGBoost is developed further, and their language bindings should allow passing to the core library everything that it accepts.\n\nIn the case of R, these parameters are passed as an R ``list`` object to function ``xgb.train``, but the R interface aims at providing a better, more idiomatic user experience by offering a parameters constructor with full in-package documentation. This requires keeping the list of parameters and their documentation up to date **in the R package** too, in addition to the general online documentation for XGBoost.\n\nIn more detail, there is a function ``xgb.params`` which allows the user to construct such a ``list`` object to pass to ``xgb.train`` while getting full IDE autocompletion on it. This function should accept all possible XGBoost parameters as arguments, listing them in the same order as they appear in the online documentation.\n\nIn order to add a new parameter from the core library to ``xgb.params``:\n\n- Add the parameter at the right location, according to the order in which it appears in the .rst file listing the parameters for the core library. If the parameter appears more than once (e.g. because it applies to more than one type of booster), then add it in a position according to to the first occurrence.\n- Copy-paste the docs from the .rst file as another ``@param`` entry for ``xgb.train``. Some easy substitutions might be needed, such as changing double-backticks to single-backticks, enquoting variables that need to be passed as strings, and replacing ``:math:`` calls with their roxygen equivalent ``\\eqn{}``, among others.\n- If needed, make minimal modifications for the R interface - for example, since parameters are only listed once, should add at the beginning a note about which type of booster they apply to if they are only applicable for one type, or list default values by booster type if they are different.\n\nAfter adding the parameter to ``xgb.params``, it will also need to be added to the function ``xgboost`` if that function can use it. The function ``xgboost`` is not meant to support everything that the core library offers - currently parameters related to learning-to-rank are not listed there for example as they are unusable for it (but can be used for ``xgb.train``).\n\nIn order to add the parameter to ``xgboost``:\n\n- Add it to the function signature. The position here differs though: there are a few selected parameters whose positions have been moved closer to the top of the signature. New parameters should not be placed within those \"top\" positions - instead, place it after parameter ``tree_method``, in the most similar place among the remaining parameters according to how it was inserted in ``xgb.params``. Note that the rest of the parameters that come after ``tree_method`` are still meant to follow the same relative order as in ``xgb.params``.\n- If the parameter applies exactly in the same way as in ``xgb.train``, then no additional documentation is needed for ``xgboost``, because it inherits parameters from ``xgb.params`` by default. However, some parameters might need slight modifications - for example, not all objectives are supported by ``xgboost``, so modifications are needed for that parameter.\n- If the parameter allows aliases, use only one alias, and prefer the most descriptive nomenclature (e.g. \"learning_rate\" instead of \"eta\"). These also need a doc entry ``@param`` in ``xgboost``, as the one in ``xgb.params`` will have the unsupported alias.\n\nAs new objectives and evaluation metrics are added, be mindful that they need to be added to the docs of both ``xgb.params`` and ``xgboost``. Documentation for objectives in both functions was originally copied from the same .rst file for the core library, but for ``xgboost`` it undergoes additional modifications in order to list what is and isn't supported, and to refer only to the parameter aliases that are accepted by ``xgboost``.\n\nKeep in mind also that objectives that are a variant of one another but with a different prediction mode, are not meant to be allowed in ``xgboost`` as they'd break its intended interface - therefore, such objectives are not described in the docs for ``xgboost`` (but there is a list at the end of what isn't supported by it) and are checked against in function ``prescreen.objective``.\n"
  },
  {
    "path": "doc/R-package/index.rst",
    "content": "#################\nXGBoost R Package\n#################\n\n.. raw:: html\n\n  <a href=\"http://cran.r-project.org/web/packages/xgboost\"><img alt=\"CRAN Status Badge\" src=\"http://www.r-pkg.org/badges/version/xgboost\"></a>\n  <a href=\"http://cran.rstudio.com/web/packages/xgboost/index.html\"><img alt=\"CRAN Downloads\" src=\"http://cranlogs.r-pkg.org/badges/xgboost\"></a>\n\nYou have found the XGBoost R Package!\n\n.. toctree::\n  :maxdepth: 2\n  :titlesonly:\n\n***********\nGet Started\n***********\n\nSince XGBoost 3.0.0, the latest R package is available on `R-universe\n<https://dmlc.r-universe.dev/xgboost>`__ in addition to the CRAN package.\n\n* Check out the :doc:`Installation Guide </install>` for instructions on how to install\n  xgboost, and :doc:`Tutorials </tutorials/index>` for examples on how to use XGBoost for\n  various tasks.\n* Read the latest `API documentation <../r_docs/R-package/docs/reference/index.html>`__.\n* Read the `CRAN documentation <https://cran.r-project.org/web/packages/xgboost/xgboost.pdf>`__.\n\n*********\nVignettes\n*********\n\n.. toctree::\n\n  xgboost_introduction\n  xgboostfromJSON\n\n************\nOther topics\n************\n\n.. toctree::\n\n  Migrating code from previous XGBoost versions <migration_guide>\n  Handling of indexable elements <index_base>\n  Developer guide: parameters from core library <adding_parameters>\n"
  },
  {
    "path": "doc/R-package/index_base.rst",
    "content": ".. _index_base:\n\nHandling of indexable elements\n==============================\n\nThere are many functionalities in XGBoost which refer to indexable elements in a countable set, such as boosting rounds / iterations / trees in a model (which can be referred to by number), classes, categories / levels in categorical features, among others.\n\nXGBoost, being written in C++, uses base-0 indexing and considers ranges / sequences to be inclusive of the left end but not the right one - for example, a range (0, 3) would include the first three elements, numbered 0, 1, and 2.\n\nThe Python interface uses this same logic, since this is also the way that indexing in Python works, but other languages like R have different logic. In R, indexing is base-1 and ranges / sequences are inclusive of both ends - for example, to refer to the first three elements in a sequence, the interval would be written as (1, 3), and the elements numbered 1, 2, and 3.\n\nIn order to provide a more idiomatic R interface, XGBoost adjusts its user-facing R interface to follow this and similar R conventions, but internally, it needs to convert all these numbers to the format that the C interface uses. This is made more problematic by the fact that models are meant to be serializable and loadable in other interfaces, which will have different indexing logic.\n\nThe following adjustments are made in the R interface:\n\n- Slicing method for DMatrix, which takes an array of integers, is converted to base-0 indexing by subtracting 1 from each element. Note that this is done in the C-level wrapper function for R, unlike all other conversions which are done in R before being passed to C.\n- Slicing method for Booster takes a sequence defined by start, end, and step. The R interface is made to work the same way as R's ``seq`` from the user's POV, so it always adjusts the left end by subtracting one, and depending on whether the step size ends exactly or not at the right end, will also adjust the right end to be non-inclusive in C indexing.\n- Parameter ``iterationrange`` in ``predict`` is also made to behave the same way as R's ``seq``. Since it doesn't have a step size, just adjusting the left end by subtracting 1 suffices here.\n- ``best_iteration``, depending on the context, might be stored as both a C-level booster attribute, and as an R attribute. Since the C-level attributes are shared across interfaces and used in prediction methods, in order to improve compatibility, it leaves this C-level attribute in base-0 indexing, but the R attribute, if present, will be adjusted to base-1 indexing. Note that the ``predict`` method in R and other interfaces will look at the C-level attribute only.\n- Other references to iteration numbers or boosting rounds, such as when printing metrics or saving model snapshots, also follow base-1 indexing. These other references are coded entirely in R, as the C-level functions do not handle such functionalities.\n- Terminal leaf / node numbers are returned in base-0 indexing, just like they come from the C interface.\n- Tree numbers in plots follow base-1 indexing. Note that these are only displayed when producing these plots through the R interface's own handling of DiagrammeR objects, but not when using the C-level GraphViz 'dot' format generator for plots.\n- Feature numbers when producing feature importances, JSONs, trees-to-tables, and SHAP; are all following base-0 indexing.\n- Categorical features are defined in R as a ``factor`` type which encodes with base-1 indexing. When categorical features are passed as R ``factor`` types, the conversion is done automatically to base-0 indexing, but if the user whishes to manually supply categorical features as already-encoded integers, then those integers need to already be in base-0 encoding.\n- Categorical levels (categories) in outputs such as plots, JSONs, and trees-to-tables; are also referred to using base-0 indexing, regardless of whether they went into the model as integers or as ``factor``-typed columns.\n- Categorical labels for DMatrices do not undergo any extra processing - the user must supply base-0 encoded labels.\n- A function to retrieve class-specific coefficients when using the linear coefficients history callback takes a class index parameter, which also does not undergo any conversion (i.e. user must pass a base-0 index), in order to match with the label logic - that is, the same class index will refer to the class encoded with that number in the DMatrix ``label`` field.\n\nNew additions to the R interface that take on indexable elements should be mindful of these conventions and try to mimic R's behavior as much as possible.\n"
  },
  {
    "path": "doc/R-package/migration_guide.rst",
    "content": ".. _migation_guide:\n\nMigrating code from previous XGBoost versions\n=============================================\n\nXGBoost's R language bindings had large breaking changes between versions 1.x and 2.x. R code that was working with past XGBoost versions might require modifications to work with the newer versions. This guide outlines the main differences:\n\n- Function ``xgboost()``:\n    - Previously, this function accepted arguments 'data' and 'label', which have now been renamed to 'x' and 'y', in line with other popular R packages.\n    - Previously, the 'data' argument which is now 'x' had to be passed as either an XGBoost 'DMatrix' or as an R matrix. Now the argument allows R data.frames, matrices, and sparse matrices from the 'Matrix' package, but not XGBoost's own DMatrices. Categorical columns will be deduced from the types of the columns when passing a data.frame.\n    - Previously, the 'label' data which is now 'y' had to be passed to ``xgboost()`` encoded in the format used by the XGBoost core library - meaning: binary variables had to be encoded to 0/1, bounds for survival objectives had to be passed as different arguments, among others. In the newest versions, 'y' now doesn't need to be manually encoded beforehand: it should be passed as an R object of the corresponding class as regression functions from base R and core R packages for the corresponding XGBoost objective - e.g. classification problems should be passed a ``factor``, survival problems a ``Surv``, regression problems a numeric vector, and so on. Learning-to-rank is not supported by ``xgboost()``, but is supported by ``xgb.train``.\n    - Previously, ``xgboost()`` accepted both a ``params`` argument and named arguments under ``...``. Now all training parameters should be passed as named arguments, and all accepted parameters are explicit function arguments with in-package documentation. Some parameters are not allowed as they are determined automatically from the rest of the data, such as the number of classes for multi-classes classification which is determined automatically from 'y'. As well, parameters that have synonyms or which are accepted under different possible arguments (e.g. \"eta\" and \"learning_rate\") now accept only their more descriptive form (so \"eta\" is not accepted, but \"learning_rate\" is).\n    - Models produced by this function ``xgboost()`` are now returned with a different class \"xgboost\", which is a subclass of \"xgb.Booster\" but with more metadata and a ``predict`` method with different defaults.\n    - This function ``xgboost()`` is now meant for interactive usage only. For package developers who wish to incorporate the XGBoost package, it is highly recommended to use ``xgb.train`` instead, which is a lower-level function that closely mimics the same function from the Python package and is meant to be less subject to breaking changes.\n\n- Function ``xgb.train()``:\n    - Previously, ``xgb.train()`` allowed arguments under both a \"params\" list and as named arguments under ``...``. Now, all training arguments should be passed under ``params``.\n    - In order to make it easier to discover and pass parameters, there is now a function ``xgb.params`` which can generate a list to pass to the ``params`` argument. ``xgb.params`` is simply a function with named arguments that lists everything accepted by ``xgb.train`` and offers in-package documentation for all of the arguments, returning a simple named list.\n    - Arguments that are meant to be consumed by the DMatrix constructor must be passed directly to ``xgb.DMatrix`` instead (e.g. argument for categorical features or for feature names).\n    - Some arguments have been renamed (e.g. previous 'watchlist' is now 'evals', in line with the Python package).\n    - The format of the callbacks to pass to ``xgb.train`` has largely been re-written. See the documentation of ``xgb.Callback`` for details.\n\n- Function ``xgb.DMatrix()``:\n    - This function now accepts 'data.frame' inputs and determines which features are categorical from their types - anything with type 'factor' or 'character' will be considered as categorical. Note that when passing data to the 'predict' method, the 'factor' variables must have the same encoding (i.e. same levels) as XGBoost will not re-encode them for you.\n    - Whereas previously some arguments such as the type of the features had to be passed as a list under argument 'info', they are all now direct function arguments to 'xgb.DMatrix' instead.\n    - There are now other varieties of DMatrix constructors that might better fit some uses cases -for example, there is 'xgb.QuantileDMatrix' which will quantize the features straight away (therefore avoiding redundant copies and reducing memory consumption) for the histogram method in XGBoost (but note that quantized DMatrices are not usable with the 'exact' sorted-indices method).\n    - Note that data for 'label' still needs to be encoded in the format consumed by the core XGBoost library - e.g. classification objectives should receive 'label' data encoded as zeros and ones.\n    - Creation of DMatrices from text files has been deprecated.\n\n- Function ``xgb.cv()``:\n    - While previously this function accepted 'data' and 'label' similarly to the old ``xgboost()``, now it accepts only ``xgb.DMatrix`` objects.\n    - The function's scope has been expanded to support more functionalities offered by XGBoost, such as survival and learning-to-rank objectives.\n\n- Method ``predict``:\n    - There are now two predict methods with different default arguments according to whether the model was produced through ``xgboost()`` or through ``xgb.train()``. Function ``xgboost()`` is more geared towards interactive usage, and thus the defaults for the 'predict' method on such objects (class \"xgboost\") by default will perform more data validations such as checking that column names match and reordering them otherwise. The 'predict' method for models created through ``xgb.train()`` (class \"xgb.Booster\") has the same defaults as before, so for example it will not reorder columns to match names under the default behavior.\n    - The 'predict' method for objects of class \"xgboost\" (produced by ``xgboost()``, not by ``xgb.train()``) now can control the types of predictions to make through an argument ``type``, similarly as the 'predict' methods in the 'stats' module of base R - e.g. one can now do ``predict(model, type=\"class\")``; while the 'predict' method for \"xgb.Booster\" objects (produced by ``xgb.train()``), just like before, controls those through separate arguments such as ``outputmargin``.\n    - Previously, predictions using a subset of the trees were using base-0 indexing and range syntax mimicing Python's ranges, whereas now they use base-1 indexing as is common in R, and their behavior for ranges matches that of R's ``seq`` function. Note that the syntax for \"use all trees\" and \"use trees up to early-stopped criteria\" have changed (see documentation for details).\n\n- Booster objects:\n    - The structure of these objects has been modified - now they are represented as a simple R \"ALTLIST\" (a special kind of 'list' object) with additional attributes.\n    - These objects now cannot be modified by adding more fields to them, but metadata for them can be added as attributes.\n    - The objects distinguish between two types of attributes:\n\n        - R-side attributes (which can be accessed and modified through R function ``attributes(model)`` and ``attributes(model)$field <- val``), which allow arbitrary objects. Many attributes are automatically added by the model building functions, such as evaluation logs (a ``data.table`` with metrics calculated per iteration), which previously were model fields.\n        - C-level attributes, which allow only JSON-compliant data and which can be accessed and set through function ``xgb.attributes(model)``. These C-level attributes are shareable through serialized models in different XGBoost interfaces, while the R-level ones are specific to the R interface. Some attributes that are standard among language bindings of XGBoost, such as the best interation, are kept as C attributes.\n    - Previously, models that were just de-serialized from an on-disk format required calling method 'xgb.Booster.complete' on them to finish the full de-serialization process before being usable, or would otherwise call this method on their own automatically automatically at the first call to 'predict'. Serialization is now handled more gracefully, and there are no additional functions/methods involved - i.e. if one saves a model to disk with ``saveRDS()`` and then reads it back with ``readRDS()``, the model will be fully loaded straight away, without needing to call additional methods on it.\n\nOther recommendations\n---------------------\n\nBy default, XGBoost might recognize that some parameter has been removed or renamed from how it was in a previous version, and still accept the same function call as it used to do before with the renamed or removed arugments, but issuing a deprecation warning along the way that highlights the changes.\n\nThese behaviors will be removed in future versions, and function calls which currently return deprecation warnings will stop working in the future, so in order to make sure that code calling XGBoost will still keep working, it should be ensured that it doesn't issue deprecation warnings.\n\nOptionally, these deprecation warnings can be turned into errors (while still keeping other types of warnings as warnings) through an option \"xgboost.strict_mode\" - example:\n\n.. code-block:: r\n\n    options(\"xgboost.strict_mode\" = TRUE)\n\nIt can also be controlled through an environment variable `XGB_STRICT_MODE=1`, which takes precende over the R option - e.g.:\n\n.. code-block:: r\n\n    Sys.setenv(\"XGB_STRICT_MODE\" = \"1\")\n\nIt is highly recommended for package developers to enable this option during their package checks to ensure better compatibility with XGBoost.\n"
  },
  {
    "path": "doc/R-package/r_docs/index.rst",
    "content": ":orphan:\n\n=============\nXGBoost R API\n=============\n"
  },
  {
    "path": "doc/README",
    "content": "The documentation of xgboost is generated with recommonmark and sphinx.\n\nYou can build it locally by typing \"make html\" in this folder.\n\nCheckout https://recommonmark.readthedocs.org for guide on how to write markdown with extensions used in this doc, such as math formulas and table of content.\n"
  },
  {
    "path": "doc/build.rst",
    "content": "####################\nBuilding From Source\n####################\n\nThis page gives instructions on how to build and install XGBoost from the source code on\nvarious systems.  If the instructions do not work for you, please feel free to ask\nquestions at `GitHub <https://github.com/dmlc/xgboost/issues>`__.\n\n.. note:: Pre-built binary is available: now with GPU support\n\n  Consider installing XGBoost from a pre-built binary, to avoid the trouble of building XGBoost from the source.  Checkout :doc:`Installation Guide </install>`.\n\n.. contents:: Contents\n  :local:\n\n.. _get_source:\n\n*************************\nObtaining the Source Code\n*************************\n\nTo obtain the development repository of XGBoost, one needs to use ``git``. XGBoost uses\nGit submodules to manage dependencies. So when you clone the repo, remember to specify\n``--recursive`` option:\n\n  .. code-block:: bash\n\n    git clone --recursive https://github.com/dmlc/xgboost\n\n.. _build_shared_lib:\n\n***************************\nBuilding the Shared Library\n***************************\n\nThis section describes the procedure to build the shared library and CLI interface\nindependently. For building language specific package, see corresponding sections in this\ndocument.\n\n- On Linux and other UNIX-like systems, the target library is ``libxgboost.so``\n- On MacOS, the target library is ``libxgboost.dylib``\n- On Windows the target library is ``xgboost.dll``\n\nThis shared library is used by different language bindings (with some additions depending\non the binding you choose).  The minimal building requirement is\n\n- A recent C++ compiler supporting C++17. We use gcc, clang, and MSVC for daily\n  testing. Mingw is only used for the R package and has limited features.\n- CMake 3.18 or higher.\n\nFor a list of CMake options like GPU support, see ``#-- Options`` in CMakeLists.txt on top\nlevel of source tree. We use ``ninja`` for build in this document, specified via the CMake\nflag ``-GNinja``. If you prefer other build tools like ``make`` or ``Visual Studio 17\n2022``, please change the corresponding CMake flags. Consult the `CMake generator\n<https://cmake.org/cmake/help/latest/manual/cmake-generators.7.html>`_ document when\nneeded.\n\n.. _running_cmake_and_build:\n\nRunning CMake and build\n=======================\n\nAfter obtaining the source code, one builds XGBoost by running CMake:\n\n.. code-block:: bash\n\n  cd xgboost\n  cmake -B build -S . -DCMAKE_BUILD_TYPE=RelWithDebInfo -GNinja\n  cd build && ninja\n\n\nThe same command applies for both Unix-like systems and Windows. After running the\nbuild, one should see a shared object under the ``xgboost/lib`` directory.\n\n- Building on MacOS\n\n  On MacOS, one needs to obtain ``libomp`` from `Homebrew <https://brew.sh/>`_ first:\n\n  .. code-block:: bash\n\n    brew install libomp\n\n- Visual Studio\n\n  The latest Visual Studio has builtin support for CMake projects. If you prefer using an\n  IDE over the command line, you can use the ``open with visual studio`` option in the\n  right-click menu under the ``xgboost`` source directory. Consult the VS `document\n  <https://learn.microsoft.com/en-us/cpp/build/cmake-projects-in-visual-studio?view=msvc-170>`__\n  for more info.\n\n.. _build_gpu_support:\n\n\nBuilding with GPU support\n=========================\n\nXGBoost can be built with GPU support for both Linux and Windows using CMake. See\n`Building R package with GPU support`_ for special instructions for R.\n\nAn up-to-date version of the CUDA toolkit is required.\n\n.. note:: Checking your compiler version\n\n    CUDA is really picky about supported compilers, a table for the compatible compilers\n    for the latest CUDA version on Linux can be seen `here\n    <https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html>`_.\n\nSome distros package a compatible ``gcc`` version with CUDA. If you run into compiler\nerrors with ``nvcc``, try specifying the correct compiler with\n``-DCMAKE_CXX_COMPILER=/path/to/correct/g++ -DCMAKE_C_COMPILER=/path/to/correct/gcc``. On\nArch Linux, for example, both binaries can be found under ``/opt/cuda/bin/``. In addition,\nthe ``CMAKE_CUDA_HOST_COMPILER`` parameter can be useful.\n\nFrom the command line on Linux starting from the XGBoost directory, add the ``USE_CUDA``\nflag:\n\n.. code-block:: bash\n\n  cmake -B build -S . -DUSE_CUDA=ON -GNinja\n  cd build && ninja\n\nTo speed up compilation, the compute version specific to your GPU could be passed to cmake\nas, e.g., ``-DCMAKE_CUDA_ARCHITECTURES=75``. A quick explanation and numbers for some\narchitectures can be found `in this page <https://developer.nvidia.com/cuda-gpus>`_.\n\n- Faster distributed GPU training with NCCL\n\n  By default, distributed GPU training is enabled with the option\n  ``USE_NCCL=ON``. Distributed GPU training depends on NCCL2, available at `this link\n  <https://developer.nvidia.com/nccl>`_. Since NCCL2 is only available for Linux machines,\n  **Distributed GPU training is available only for Linux**.\n\n  .. code-block:: bash\n\n    cmake -B build -S . -DUSE_CUDA=ON -DUSE_NCCL=ON -DNCCL_ROOT=/path/to/nccl2 -GNinja\n    cd build && ninja\n\n  Some additional flags are available for NCCL, ``BUILD_WITH_SHARED_NCCL`` enables\n  building XGBoost with NCCL as a shared library, while ``USE_DLOPEN_NCCL`` enables\n  XGBoost to load NCCL at runtime using ``dlopen``.\n\nFederated Learning\n==================\n\nThe federated learning plugin requires ``grpc`` and ``protobuf``. To install grpc, refer\nto the `installation guide from the gRPC website\n<https://grpc.io/docs/languages/cpp/quickstart/>`_. Alternatively, one can use the\n``libgrpc`` and the ``protobuf`` package from conda forge if conda is available. After\nobtaining the required dependencies, enable the flag: ``-DPLUGIN_FEDERATED=ON`` when\nrunning CMake. Please note that only Linux is supported for the federated plugin.\n\n\n.. code-block:: bash\n\n  cmake -B build -S . -DPLUGIN_FEDERATED=ON -GNinja\n  cd build && ninja\n\n\n.. _build_python:\n\n***********************************\nBuilding Python Package from Source\n***********************************\n\nThe Python package is located at ``python-package/``.\n\nBuilding Python Package with Default Toolchains\n===============================================\nThere are several ways to build and install the package from source:\n\n1. Build C++ core with CMake first\n\n  You can first build C++ library using CMake as described in :ref:`build_shared_lib`.\n  After compilation, a shared library will appear in ``lib/`` directory.\n  On Linux distributions, the shared library is ``lib/libxgboost.so``.\n  The install script ``pip install .`` will reuse the shared library instead of compiling\n  it from scratch, making it quite fast to run.\n\n  .. code-block:: console\n\n    $ cd python-package/\n    $ pip install .  # Will re-use lib/libxgboost.so\n\n2. Install the Python package directly\n\n  If the shared object is not present, the Python project setup script will try to run the\n  CMake build command automatically. Navigate to the ``python-package/`` directory and\n  install the Python package by running:\n\n  .. code-block:: console\n\n    $ cd python-package/\n    $ pip install -v . # Builds the shared object automatically.\n\n  which will compile XGBoost's native (C++) code using default CMake flags.  To enable\n  additional compilation options, pass corresponding ``--config-settings``:\n\n  .. code-block:: console\n\n    $ pip install -v . --config-settings use_cuda=True --config-settings use_nccl=True\n\n  Use Pip 22.1 or later to use ``--config-settings`` option.\n\n  Here are the available options for ``--config-settings``:\n\n  .. literalinclude:: ../python-package/packager/build_config.py\n    :language: python\n    :start-at: @dataclasses.dataclass\n    :end-before: def _set_config_setting(\n\n  ``use_system_libxgboost`` is a special option. See Item 4 below for\n  detailed description.\n\n  .. note:: Verbose flag recommended\n\n    As ``pip install .`` will build C++ code, it will take a while to complete.\n    To ensure that the build is progressing successfully, we suggest that\n    you add the verbose flag (``-v``) when invoking ``pip install``.\n\n\n3. Editable installation\n\n  To further enable rapid development and iteration, we provide an **editable\n  installation**.  In an editable installation, the installed package is simply a symbolic\n  link to your working copy of the XGBoost source code. So every changes you make to your\n  source directory will be immediately visible to the Python interpreter. To install\n  XGBoost as editable installation, first build the shared library as previously described\n  in :ref:`running_cmake_and_build`, then install the Python package with the ``-e`` flag:\n\n  .. code-block:: bash\n\n    # Build shared library libxgboost.so\n    cmake -B build -S . -GNinja\n    cd build && ninja\n    # Install as editable installation\n    cd ../python-package\n    pip install -e .\n\n4. Reuse the ``libxgboost.so`` on system path.\n\n  This option is useful for package managers that wish to separately package\n  ``libxgboost.so`` and the XGBoost Python package. For example, Conda\n  publishes ``libxgboost`` (for the shared library) and ``py-xgboost``\n  (for the Python package).\n\n  To use this option, first make sure that ``libxgboost.so`` exists in the system library path:\n\n  .. code-block:: python\n\n    import sys\n    import pathlib\n    libpath = pathlib.Path(sys.base_prefix).joinpath(\"lib\", \"libxgboost.so\")\n    assert libpath.exists()\n\n  Then pass ``use_system_libxgboost=True`` option to ``pip install``:\n\n  .. code-block:: bash\n\n    cd python-package\n    pip install . --config-settings use_system_libxgboost=True\n\n\n.. note::\n\n  See :doc:`contrib/python_packaging` for instructions on packaging and distributing\n  XGBoost as Python distributions.\n\n\n******************************\nBuilding R Package From Source\n******************************\n\nBy default, the package installed by running ``install.packages`` is built from source\nusing the package from `CRAN <https://cran.r-project.org/>`__.  Here we list some other\noptions for installing development version.\n\nInstalling the development version (Linux / Mac OSX)\n====================================================\n\nMake sure you have installed git and a recent C++ compiler supporting C++11 (See above\nsections for requirements of building C++ core).\n\nDue to the use of git-submodules, ``remotes::install_github()`` cannot be used to\ninstall the latest version of R package. Thus, one has to run git to check out the code\nfirst, see :ref:`get_source` on how to initialize the git repository for XGBoost. The\nsimplest way to install the R package after obtaining the source code is:\n\n.. code-block:: bash\n\n  cd R-package\n  R CMD INSTALL .\n\nUse the environment variable ``MAKEFLAGS=-j$(nproc)`` if you want to speedup the build. As\nan alternative, the package can also be loaded through ``devtools::load_all()`` from the\nsame subfolder ``R-package`` in the repository's root, and by extension, can be installed\nthrough RStudio's build panel if one adds that folder ``R-package`` as an R package\nproject in the RStudio IDE.\n\n.. code-block:: R\n\n  library(devtools)\n  devtools::load_all(path = \"/path/to/xgboost/R-package\")\n\nOn Linux, if you want to use the CMake build for greater flexibility around compile flags,\nthe earlier snippet can be replaced by:\n\n.. code-block:: bash\n\n  cmake -B build -S . -DR_LIB=ON -GNinja\n  cd build && ninja install\n\n.. warning::\n\n   MSVC is not supported for the R package as it has difficulty handling R C\n   headers. CMake build is not supported either.\n\nNote in this case that ``cmake`` will not take configurations from your regular\n``Makevars`` file (if you have such a file under ``~/.R/Makevars``) - instead, custom\nconfigurations such as compilers to use and flags need to be set through CMake variables\nlike ``-DCMAKE_CXX_COMPILER``.\n\n\n.. _r_gpu_support:\n\nBuilding R package with GPU support\n===================================\n\nThe procedure and requirements are similar as in :ref:`build_gpu_support`, so make sure to read it first.\n\nOn Linux, starting from the XGBoost directory type:\n\n.. code-block:: bash\n\n  cmake -B build -S . -DUSE_CUDA=ON -DR_LIB=ON\n  cmake --build build --target install -j$(nproc)\n\nWhen default target is used, an R package shared library would be built in the ``build`` area.\nThe ``install`` target, in addition, assembles the package files with this shared library under ``build/R-package`` and runs ``R CMD INSTALL``.\n\n*********************\nBuilding JVM Packages\n*********************\n\nBuilding XGBoost4J using Maven requires Maven 3 or newer, Java 7+ and CMake 3.18+ for\ncompiling Java code as well as the Java Native Interface (JNI) bindings. In addition, a\nPython script is used during configuration, make sure the command ``python`` is available\non your system path (some distros use the name ``python3`` instead of ``python``).\n\nBefore you install XGBoost4J, you need to define environment variable ``JAVA_HOME`` as your JDK directory to ensure that your compiler can find ``jni.h`` correctly, since XGBoost4J relies on JNI to implement the interaction between the JVM and native libraries.\n\nAfter your ``JAVA_HOME`` is defined correctly, it is as simple as run ``mvn package`` under jvm-packages directory to install XGBoost4J. You can also skip the tests by running ``mvn -DskipTests=true package``, if you are sure about the correctness of your local setup.\n\nTo publish the artifacts to your local maven repository, run\n\n.. code-block:: bash\n\n  mvn install\n\nOr, if you would like to skip tests, run\n\n.. code-block:: bash\n\n  mvn -DskipTests install\n\nThis command will publish the xgboost binaries, the compiled java classes as well as the java sources to your local repository. Then you can use XGBoost4J in your Java projects by including the following dependency in ``pom.xml``:\n\n.. code-block:: xml\n\n  <dependency>\n    <groupId>ml.dmlc</groupId>\n    <artifactId>xgboost4j</artifactId>\n    <version>latest_source_version_num</version>\n  </dependency>\n\nFor sbt, please add the repository and dependency in build.sbt as following:\n\n.. code-block:: scala\n\n  resolvers += \"Local Maven Repository\" at \"file://\"+Path.userHome.absolutePath+\"/.m2/repository\"\n\n  \"ml.dmlc\" % \"xgboost4j\" % \"latest_source_version_num\"\n\nIf you want to use XGBoost4J-Spark, replace ``xgboost4j`` with ``xgboost4j-spark``.\n\n.. note:: XGBoost4J-Spark requires Apache Spark 2.3+\n\n  XGBoost4J-Spark now requires **Apache Spark 3.4+**. Latest versions of XGBoost4J-Spark uses facilities of `org.apache.spark.ml.param.shared` extensively to provide for a tight integration with Spark MLLIB framework, and these facilities are not fully available on earlier versions of Spark.\n\n  Also, make sure to install Spark directly from `Apache website <https://spark.apache.org/>`_. **Upstream XGBoost is not guaranteed to work with third-party distributions of Spark, such as Cloudera Spark.** Consult appropriate third parties to obtain their distribution of XGBoost.\n\nAdditional System-dependent Features\n====================================\n\n- OpenMP on MacOS: See :ref:`running_cmake_and_build` for installing ``openmp``. The flag\n  -``mvn -Duse.openmp=OFF`` can be used to disable OpenMP support.\n- GPU support can be enabled by passing an additional flag to maven ``mvn -Duse.cuda=ON\n  install``. See :ref:`build_gpu_support` for more info. In addition, ``-Dplugin.rmm=ON``\n  can enable the optional RMM support.\n\n**************************\nBuilding the Documentation\n**************************\n\nXGBoost uses `Sphinx <https://www.sphinx-doc.org/en/stable/>`_ for documentation.  To\nbuild it locally, you need a installed XGBoost with all its dependencies along with:\n\n* System dependencies\n\n  - git\n  - graphviz\n\n* Python dependencies\n\n  Checkout the ``requirements.txt`` file under ``doc/``\n\nUnder ``xgboost/doc`` directory, run ``make <format>`` with ``<format>`` replaced by the\nformat you want.  For a list of supported formats, run ``make help`` under the same\ndirectory. This builds a partial document for Python but not other language bindings. To\nbuild the full document, see :doc:`/contrib/docs`.\n"
  },
  {
    "path": "doc/c++.rst",
    "content": "###############\nXGBoost C++ API\n###############\n\nStarting from 1.0 release, CMake will generate installation rules to export all C++ headers. But\nthe c++ interface is much closer to the internal of XGBoost than other language bindings.\nAs a result it's changing quite often and we don't maintain its stability.  Along with the\nplugin system (see ``plugin/example`` in XGBoost's source tree), users can utilize some\nexisting c++ headers for gaining more access to the internal of XGBoost.\n\n* `C++ interface documentation (latest master branch) <./dev/files.html>`_\n* `C++ interface documentation (last stable release) <https://xgboost.readthedocs.io/en/stable/dev/files.html>`_\n"
  },
  {
    "path": "doc/c.rst",
    "content": "#################\nXGBoost C Package\n#################\n\nXGBoost implements a set of C API designed for various bindings, we maintain its stability\nand the CMake/make build interface.  See :doc:`/tutorials/c_api_tutorial` for an\nintroduction and ``demo/c-api/`` for related examples.  Also one can generate doxygen\ndocument by providing ``-DBUILD_C_DOC=ON`` as parameter to ``CMake`` during build, or\nsimply look at function comments in ``include/xgboost/c_api.h``. The reference is exported\nto sphinx with the help of breathe, which doesn't contain links to examples but might be\neasier to read. For the original doxygen pages please visit:\n\n* `C API documentation (latest master branch) <./dev/c__api_8h.html>`_\n* `C API documentation (last stable release) <https://xgboost.readthedocs.io/en/stable/dev/c__api_8h.html>`_\n\n***************\nC API Reference\n***************\n\n.. contents::\n  :backlinks: none\n  :local:\n\nLibrary\n=======\n\n.. doxygengroup:: Library\n   :project: xgboost\n\nDMatrix\n=======\n\n.. doxygengroup:: DMatrix\n   :project: xgboost\n\n.. _c_streaming:\n\nStreaming\n---------\n\n.. doxygengroup:: Streaming\n   :project: xgboost\n\nBooster\n=======\n\n.. doxygengroup:: Booster\n   :project: xgboost\n\nPrediction\n----------\n\n.. doxygengroup:: Prediction\n   :project: xgboost\n\nSerialization\n-------------\n\n.. doxygengroup:: Serialization\n   :project: xgboost\n\nCollective\n==========\n\n.. doxygengroup:: Collective\n   :project: xgboost\n"
  },
  {
    "path": "doc/changes/index.rst",
    "content": "#############\nRelease Notes\n#############\n\nFor release notes prior to the 2.1 release, please see `news <https://github.com/dmlc/xgboost/blob/master/NEWS.md>`__ .\n\n.. toctree::\n  :maxdepth: 1\n  :caption: Contents:\n\n  v3.2.0\n  v3.1.0\n  v3.0.0\n  v2.1.0\n"
  },
  {
    "path": "doc/changes/v2.1.0.rst",
    "content": "################################\n2.1.4 Patch Release (2025 Feb 6)\n################################\n\nThe 2.1.4 patch release incorporates the following fixes on top of the 2.1.3 release:\n\n- XGBoost is now compatible with scikit-learn 1.6 (#11021, #11162)\n- Build wheels with CUDA 12.8 and enable Blackwell support (#11187, #11202)\n- Adapt to RMM 25.02 logger changes (#11153)\n\n#################################\n2.1.3 Patch Release (2024 Nov 26)\n#################################\n\nThe 2.1.3 patch release makes the following bug fixes:\n\n- [pyspark]  Support large model size (#10984).\n- Fix rng for the column sampler (#10998).\n- Handle `cudf.pandas` proxy objects properly (#11014).\n\n#################################\n2.1.2 Patch Release (2024 Oct 23)\n#################################\n\nThe 2.1.2 patch release makes the following bug fixes:\n\n- Clean up and modernize release-artifacts.py (#10818)\n- Fix ellpack categorical feature with missing values. (#10906)\n- Fix unbiased ltr with training continuation. (#10908)\n- Fix potential race in feature constraint. (#10719)\n- Fix boolean array for arrow-backed DF. (#10527)\n- Ensure that pip check does not fail due to a bad platform tag (#10755)\n- Check cub errors (#10721)\n- Limit the maximum number of threads. (#10872)\n- Fixes for large size clusters. (#10880)\n- POSIX compliant poll.h and mmap (#10767)\n\n#################################\n2.1.1 Patch Release (2024 Jul 31)\n#################################\n\nThe 2.1.1 patch release makes the following bug fixes:\n\n- [Dask] Disable broadcast in the scatter call so that predict function won't hang (#10632)\n- [Dask] Handle empty partitions correctly (#10559)\n- Fix federated learning for the encrypted GRPC backend (#10503)\n- Fix a race condition in column splitter (#10572)\n- Gracefully handle cases where system files like /sys/fs/cgroup/cpu.max are not readable by the user (#10623)\n- Fix build and C++ tests for FreeBSD (#10480)\n- Clarify the requirement Pandas 1.2+ (#10476)\n- More robust endianness detection in R package build (#10642)\n\nIn addition, it contains several enhancements:\n\n- Publish JVM packages targeting Linux ARM64 (#10487)\n- Publish a CPU-only wheel under name xgboost-cpu (#10603)\n- Support building with CUDA Toolkit 12.5 and latest CCCL (#10624, #10633, #10574)\n\n\n###################\n2.1.0 (2024 Jun 20)\n###################\n\nWe are thrilled to announce the XGBoost 2.1 release. This note will start by summarizing some general changes and then highlighting specific package updates. As we are working on a `new R interface <https://github.com/dmlc/xgboost/issues/9810>`_, this release will not include the R package. We'll update the R package as soon as it's ready. Stay tuned!\n\n.. contents::\n  :backlinks: none\n  :local:\n\n***********************\nNetworking Improvements\n***********************\n\nAn important ongoing work for XGBoost, which we've been collaborating on, is to support resilience for improved scaling and federated learning on various platforms. The existing networking library in XGBoost, adopted from the RABIT project, can no longer meet the feature demand. We've revamped the RABIT module in this release to pave the way for future development. The choice of using an in-house version instead of an existing library is due to the active development status with frequent new feature requests like loading extra plugins for federated learning. The new implementation features:\n\n- Both CPU and GPU communication (based on NCCL).\n- A reusable tracker for both the Python package and JVM packages. With the new release, the JVM packages no longer require Python as a runtime dependency.\n- Supports federated communication patterns for both CPU and GPU.\n- Supports timeout. The high-level interface parameter is currently hard-coded to 30 minutes, which we plan to improve.\n- Supports significantly more data types.\n- Supports thread-based workers.\n- Improved handling for worker errors, including better error messages when one of the peers dies during training.\n- Work with IPv6. Currently, this is only supported by the dask interface.\n- Built-in support for various operations like broadcast, allgatherV, allreduce, etc.\n\nRelated PRs (#9597, #9576, #9523, #9524, #9593, #9596, #9661, #10319, #10152, #10125, #10332, #10306, #10208, #10203, #10199, #9784, #9777, #9773, #9772, #9759, #9745, #9695, #9738, #9732, #9726, #9688, #9681, #9679, #9659, #9650, #9644, #9649, #9917, #9990, #10313, #10315, #10112, #9531, #10075, #9805, #10198, #10414).\n\nThe existing option of using ``MPI`` in RABIT is removed in the release. (#9525)\n\n*****************************\nNCCL is now fetched from PyPI\n*****************************\n\nIn the previous version, XGBoost statically linked NCCL, which significantly increased the binary size and led to hitting the PyPI repository limit. With the new release, we have made a significant improvement. The new release can now dynamically load NCCL from an external source, reducing the binary size. For the PyPI package, the ``nvidia-nccl-cu12`` package will be fetched during installation. With more downstream packages reusing NCCL, we expect the user environments to be slimmer in the future as well. (#9796, #9804, #10447)\n\n***************************************************\nParts of the Python package now require glibc 2.28+\n***************************************************\nStarting from 2.1.0, XGBoost Python package will be distributed in two variants:\n\n* ``manylinux_2_28``: for recent Linux distros with glibc 2.28 or newer. This variant comes with all features enabled.\n* ``manylinux2014``: for old Linux distros with glibc older than 2.28. This variant does not support GPU algorithms or federated learning.\n\nThe ``pip`` package manager will automatically choose the correct variant depending on your system.\n\nStarting from **May 31, 2025**, we will stop distributing the ``manylinux2014`` variant and exclusively\ndistribute the ``manylinux_2_28`` variant. We made this decision so that our CI/CD pipeline won't have\ndepend on software components that reached end-of-life (such as CentOS 7). We strongly encourage\neveryone to migrate to recent Linux distros in order to use future versions of XGBoost.\n\nNote. If you want to use GPU algorithms or federated learning on an older Linux distro, you have\ntwo alternatives:\n\n1. Upgrade to a recent Linux distro with glibc 2.28+.  OR\n2. Build XGBoost from the source.\n\n************\nMulti-output\n************\n\nWe continue the work on multi-target and vector leaf in this release:\n\n- Revise the support for custom objectives with a new API, ``XGBoosterTrainOneIter``. This new function supports strided matrices and CUDA inputs. In addition, custom objectives now return the correct shape for prediction. (#9508)\n- The ``hinge`` objective now supports multi-target regression (#9850)\n- Fix the gain calculation with vector leaf (#9978)\n- Support graphviz plot for multi-target tree. (#10093)\n- Fix multi-output with alternating strategies. (#9933)\n\nPlease note that the feature is still in progress and not suitable for production use.\n\n******************\nFederated Learning\n******************\n\nProgress has been made on federated learning with improved support for column-split, including the following updates:\n\n- Column split work for both CPU and GPU. In addition, categorical data is now compatible with column split. (#9562, #9609, #9611, #9628, #9539, #9578, #9685, #9623, #9613, #9511, #9384, #9595)\n-  The use of UBJson to serialize split entries for column split has been implemented, aiding vector-leaf with column-based data split. (#10059, #10055, #9702)\n- Documentation and small fixes. (#9610, #9552, #9614, #9867)\n\n*****************************\nOngoing work for SYCL support\n*****************************\n\nXGBoost is developing a SYCL plugin for SYCL devices, starting with the ``hist`` tree method. (#10216, #9800, #10311, #9691, #10269, #10251, #10222, #10174, #10080, #10057, #10011, #10138, #10119, #10045, #9876, #9846, #9682) XGBoost now supports launchable inference on SYCL devices, and that work on adding SYCL support for training is ongoing.\n\nLooking ahead, we plan to complete the training in coming releases and then focus on improving test coverage for SYCL, particularly for Python tests.\n\n*************\nOptimizations\n*************\n\n- Implement column sampler in CUDA for GPU-based tree methods. This helps us get faster training time when column sampling is employed (#9785)\n- CMake LTO and CUDA arch (#9677)\n- Small optimization to external memory with a thread pool. This reduces the number of threads launched during iteration. (#9605, #10288, #10374)\n\n********************************\nDeprecation and breaking changes\n********************************\n\nPackage-specific breaking changes are outlined in respective sections. Here we list general breaking changes in this release:\n\n- The command line interface is deprecated due to the increasing complexity of the machine learning ecosystem. Building a machine learning model using a command shell is no longer feasible and could mislead newcomers. (#9485)\n- ``Universal binary JSON`` is now the default format for saving models (#9947, #9958, #9954, #9955). See https://github.com/dmlc/xgboost/issues/7547 for more info.\n- The ``XGBoosterGetModelRaw`` is now removed after deprecation in 1.6. (#9617)\n- Drop support for loading remote files. This feature lacks any test. Users are encouraged to use dedicated libraries to fetch remote content. (#9504)\n- Remove the dense libsvm parser plugin. This plugin is never tested or documented (#9799)\n- ``XGDMatrixSetDenseInfo`` and ``XGDMatrixSetUIntInfo`` are now deprecated. Use the array interface based alternatives instead.\n\n********\nFeatures\n********\n\nThis section lists some new features that are general to all language bindings. For package-specific changes, please visit respective sections.\n\n- Adopt a new XGBoost logo (#10270)\n- Now supports dataframe data format in native XGBoost. This improvement enhances performance and reduces memory usage when working with dataframe-based structures such as pandas, arrow, and R dataframe. (#9828, #9616, #9905)\n- Change default metric for gamma regression to ``deviance``. (#9757)\n- Normalization for learning to rank is now optional with the introduction of the new ``lambdarank_normalization`` parameter. (#10094)\n- Contribution prediction with ``QuantileDMatrix`` on CPU. (#10043)\n- XGBoost on macos no longer bundles OpenMP runtime. Users can install the latest runtime from their dependency manager of choice. (#10440). Along with which, JVM packages on MacoOS are now built with OpenMP support (#10449).\n\n*********\nBug fixes\n*********\n\n- Fix training with categorical data from external memory. (#10433)\n- Fix compilation with CTK-12. (#10123)\n- Fix inconsistent runtime library on Windows. (#10404)\n- Fix default metric configuration. (#9575)\n- Fix feature names with special characters. (#9923)\n- Fix global configuration for external memory training. (#10173)\n- Disable column sample by node for the exact tree method. (#10083)\n- Fix the ``FieldEntry`` constructor specialization syntax error (#9980)\n- Fix pairwise objective with NDCG metric along with custom gain. (#10100)\n- Fix the default value for ``lambdarank_pair_method``. (#10098)\n- Fix UBJSON with boolean values. No existing code is affected by this fix. (#10054)\n- Be more lenient on floating point errors for AUC. This prevents the AUC > 1.0 error. (#10264)\n- Check support status for categorical features. This prevents ``gblinear`` from treating categorical features as numerical. (#9946)\n\n********\nDocument\n********\n\nHere is a list of documentation changes not specific to any XGBoost package.\n\n- A new coarse map for XGBoost features to assist development. (#10310)\n- New language binding consistency guideline. (#9755, #9866)\n- Fixes, cleanups, small updates (#9501, #9988, #10023, #10013, #10143, #9904, #10179, #9781, #10340, #9658, #10182, #9822)\n- Update document for parameters (#9900)\n- Brief introduction to ``base_score``. (#9882)\n- Mention data consistency for categorical features. (#9678)\n\n**************\nPython package\n**************\n\nDask\n----\nOther than the changes in networking, we have some optimizations and document updates in dask:\n\n- Filter models on workers instead of clients; this prevents an OOM error on the client machine. (#9518)\n- Users are now encouraged to use `from xgboost import dask`  instead of `import xgboost.dask` to avoid drawing in unnecessary dependencies for non-dask users. (#9742)\n- Add seed to demos. (#10009)\n- New document for using dask XGBoost with k8s. (#10271)\n- Workaround potentially unaligned pointer from an empty partition. (#10418)\n- Workaround a race condition in the latest dask. (#10419)\n- [doc] Add typing to dask demos. (#10207)\n\nPySpark\n-------\n\nPySpark has several new features along with some small fixes:\n\n- Support stage-level scheduling for training on various platforms, including yarn/k8s. (#9519, #10209, #9786, #9727)\n- Support GPU-based transform methods (#9542)\n- Avoid expensive repartition when appropriate. (#10408)\n- Refactor the logging and the GPU code path (#10077, 9724)\n- Sort workers by task ID. This helps the PySpark interface obtain deterministic results. (#10220)\n- Fix PySpark with ``verbosity=3``. (#10172)\n- Fix spark estimator doc. (#10066)\n- Rework transform for improved code reusing. (#9292)\n\nBreaking changes\n----------------\n\nFor the Python package, ``eval_metric``, ``early_stopping_rounds``, and ``callbacks`` from now removed from the ``fit`` method in the sklearn interface. They were deprecated in 1.6. Use the parameters with the same name in constructors instead. (#9986)\n\nFeatures\n--------\n\nFollowing is a list of new features in the Python package:\n\n- Support sample weight in sklearn custom objective. (#10050)\n- New supported data types, including ``cudf.pandas`` (#9602), ``torch.Tensor`` (#9971), and more scipy types (#9881).\n- Support pandas 2.2 and numpy 2.0. (#10266, #9557, #10252, #10175)\n- Support the latest rapids including rmm. (#10435)\n- Improved data cache option in data iterator. (#10286)\n- Accept numpy generators as ``random_state`` (#9743)\n- Support returning base score as intercept in the sklearn interface. (#9486)\n- Support arrow through pandas ext types. This is built on top of the new DataFrame API in XGBoost. See general features for more info. (#9612)\n- Handle np integer in model slice and prediction. (#10007)\n- Improved sklearn tags support. (#10230)\n- The base image for building Linux binary wheels is updated to rockylinux8. (#10399)\n- Improved handling for float128. (#10322)\n\nFixes\n-----\n\n- Fix ``DMatrix`` with ``None`` input. (#10052)\n- Fix native library discovery logic. (#9712, #9860)\n- Fix using categorical data with the score function for the ranker. (#9753)\n\nDocument\n--------\n\n- Clarify the effect of ``enable_categorical`` (#9877, #9884)\n- Update the Python introduction. (#10033)\n- Fixes. (#10058, #9991, #9573)\n\nMaintenance\n-----------\n\n- Use array interface in Python prediction return. (#9855)\n- Synthesize the AMES housing dataset for tests. (#9963)\n- linter, formatting, etc. (#10296, #10014)\n- Tests. (#9962, #10285, #9997, #9943, #9934)\n\n************\nJVM packages\n************\n\nHere is a list of JVM-specific changes. Like the PySpark package, the JVM package also gains stage-level scheduling.\n\nFeatures and related documents\n------------------------------\n\n- Support stage-level scheduling (#9775)\n- Allow JVM-Package to access inplace predict method (#9167)\n- Support JDK 17 for test (#9959)\n- Various dependency updates.(#10211, #10210, #10217, #10156, #10070, #9809, #9517, #10235, #10276, #9331, #10335, #10309, #10240, #10244, #10260, #9489, #9326, #10294, #10197, #10196, #10193, #10202, #10191, #10188, #9328, #9311, #9951, #10151, #9827, #9820, #10253)\n- Update and fixes for document. (#9752, #10385)\n- Remove rabit checkpoint. (#9599)\n\nBug Fixes\n---------\n\n- Fixes memory leak in error handling. (#10307)\n- Fixes group col for GPU packages (#10254)\n\n***********\nMaintenance\n***********\n\n- Add formatting and linting requirements to the CMake script. (#9653, #9641, #9637, #9728, #9674)\n- Refactors and cleanups (#10085, #10120, #10074, #9645, #9992, #9568, #9731, #9527).\n- Update nvtx. (#10227)\n- Tests. (#9499, #9553, #9737)\n- Throw error for 32-bit architectures (#10005)\n- Helpers. (#9505, #9572, #9750, #9541, #9983, #9714)\n- Fix mingw hanging on regex in context (#9729)\n- Linters. (#10010, #9634)\n\n**\nCI\n**\n\n- Meta info about the Python package is uploaded for easier parsing (#10295)\n- Various dependency updates (#10274, #10280, #10278, #10275, #10320, #10305, #10267, #9544, #10228, #10133, #10187, #9857, #10042, #10268, #9654, #9835)\n- GitHub Action fixes (#10067, #10134, #10064)\n- Improved support for Apple devices. (#10225, #9886, #9699, #9748, #9704, #9749)\n- Stop Windows pipeline upon a failing pytest (#10003)\n- Cancel GH Action job if a newer commit is published (#10088)\n- CI images. (#9666, #10201, #9932)\n- Test R package with CMake (#10087)\n- Test building for the 32-bit arch (#10021)\n- Test federated plugin using GitHub action. (#10336)\n"
  },
  {
    "path": "doc/changes/v3.0.0.rst",
    "content": "#################################\n3.0.3 Patch Release (Jul 30 2025)\n#################################\n\n- Fix NDCG metric with non-exp gain. (:pr:`11534`)\n- Avoid using mean intercept for ``rmsle``. (:pr:`11588`)\n- [jvm-packages] add ``setNumEarlyStoppingRounds`` API (:pr:`11571`)\n- Avoid implicit synchronization in GPU evaluation. (:pr:`11542`)\n- Remove CUDA check in the array interface handler (:pr:`11386`)\n- Fix check in GPU histogram. (:pr:`11574`)\n- Support Rapids 25.06 (:pr:`11504`)\n- Adding ``enable_categorical`` to the sklearn ``.apply`` method (:pr:`11550`)\n- Make xgboost.testing compatible with scikit-learn 1.7 (:pr:`11502`)\n- Add support for building xgboost wheels on Win-ARM64 (:pr:`11572`, :pr:`11597`, :pr:`11559`)\n\n#################################\n3.0.2 Patch Release (May 25 2025)\n#################################\n\n- Dask 2025.4.0 scheduler info compatibility fix (:pr:`11462`)\n- Fix CUDA virtual memory fallback logic on WSL2 (:pr:`11471`)\n\n#################################\n3.0.1 Patch Release (May 13 2025)\n#################################\n\n- Use ``nvidia-smi`` to detect the driver version and handle old drivers that don't support virtual memory. (:pr:`11391`)\n- Optimize deep trees for GPU external memory. (:pr:`11387`)\n- Small fix for page concatenation with external memory (:pr:`11338`)\n- Build xgboost-cpu for ``manylinux_2_28_x86_64`` (:pr:`11406`)\n- Workaround for different Dask versions (:pr:`11436`)\n- Output models now use denormal floating-point instead of ``nan``. (:pr:`11428`)\n- Fix aarch64 CI. (:pr:`11454`)\n\n\n###################\n3.0.0 (2025 Feb 27)\n###################\n\n3.0.0 is a milestone for XGBoost. This note will summarize some general changes and then\nlist package-specific updates. The bump in the major version is for a reworked R package\nalong with a significant update to the JVM packages.\n\n.. contents::\n  :backlinks: none\n  :local:\n\n***********************\nExternal Memory Support\n***********************\n\nThis release features a major update to the external memory implementation with improved\nperformance, a new :py:class:`~xgboost.ExtMemQuantileDMatrix` for more efficient data\ninitialization, new feature coverage including categorical data support and quantile\nregression support. Additionally, GPU-based external memory is reworked to support using\nCPU memory as a data cache. Last but not least, we worked on distributed training using\nexternal memory along with the spark package's initial support.\n\n- A new :py:class:`~xgboost.ExtMemQuantileDMatrix` class for fast data initialization with\n  the ``hist`` tree method. The new class supports both CPU and GPU training. (:pr:`10689`,\n  :pr:`10682`, :pr:`10886`, :pr:`10860`, :pr:`10762`, :pr:`10694`, :pr:`10876`)\n- External memory now supports distributed training (:pr:`10492`, :pr:`10861`). In addition, the\n  Spark package can use external memory (the host memory) when the device is GPU. The\n  default package on maven doesn't support RMM yet. For better performance, one needs\n  to compile XGBoost from the source for now. (:pr:`11186`, :pr:`11238`, :pr:`11219`)\n- Improved performance with new optimizations for both the ``hist``-specific training and\n  the ``approx`` (:py:class:`~xgboost.DMatrix`) method. (:pr:`10529`, :pr:`10980`, :pr:`10342`)\n- New demos and documents for external memory, including distributed training. (:pr:`11234`,\n  :pr:`10929`, :pr:`10916`, :pr:`10426`, :pr:`11113`)\n- Reduced binary cache size and memory allocation overhead by not writing the cut matrix. (:pr:`10444`)\n- More feature coverage, including categorical data and all objective functions, including\n  quantile regression. In addition, various prediction types like SHAP values are\n  supported. (:pr:`10918`, :pr:`10820`, :pr:`10751`, :pr:`10724`)\n\nSignificant updates for the GPU-based external memory training implementation. (:pr:`10924`,\n:pr:`10895`, :pr:`10766`, :pr:`10544`, :pr:`10677`, :pr:`10615`, :pr:`10927`, :pr:`10608`, :pr:`10711`)\n\n- GPU-based external memory supports both batch-based and sampling-based training. Before\n  the 3.0 release, XGBoost concatenates the data during training and stores the cache on\n  disk. In 3.0, XGBoost can now stage the data on the host and fetch them by\n  batch. (:pr:`10602`, :pr:`10595`, :pr:`10606`, :pr:`10549`, :pr:`10488`, :pr:`10766`,\n  :pr:`10765`, :pr:`10764`, :pr:`10760`, :pr:`10753`, :pr:`10734`, :pr:`10691`,\n  :pr:`10713`, :pr:`10826`, :pr:`10811`, :pr:`10810`, :pr:`10736`, :pr:`10538`,\n  :pr:`11333`)\n- XGBoost can now utilize `NVLink-C2C` for GPU-based external memory training and can\n  handle up to terabytes of data.\n- Support prediction cache (:pr:`10707`).\n- Automatic page concatenation for improved GPU utilization (:pr:`10887`).\n- Improved quantile sketching algorithm for batch-based inputs. See the section for\n  :ref:`new features <3_0_features>` for more info.\n- Optimization for nearly-dense input, see the section for :ref:`optimization\n  <3_0_optimization>` for more info.\n\nSee our latest document for details :doc:`/tutorials/external_memory`. The PyPI package\n(``pip install``) doesn't have ``RMM`` support, which is required by the GPU external\nmemory implementation. To experiment, you can compile XGBoost from source or wait for the\nRAPIDS conda package to be available.\n\n.. _3_0_networking:\n\n**********\nNetworking\n**********\n\nContinuing the work from the previous release, we updated the network module to improve\nreliability. (:pr:`10453`, :pr:`10756`, :pr:`11111`, :pr:`10914`, :pr:`10828`, :pr:`10735`, :pr:`10693`, :pr:`10676`, :pr:`10349`,\n:pr:`10397`, :pr:`10566`, :pr:`10526`, :pr:`10349`)\n\nThe timeout option is now supported for NCCL using the NCCL asynchronous mode (:pr:`10850`,\n:pr:`10934`, :pr:`10945`, :pr:`10930`).\n\nIn addition, a new :py:class:`~xgboost.collective.Config` class is added for users to\nspecify various options including timeout, tracker port, etc for distributed\ntraining. Both the Dask interface and the PySpark interface support the new\nconfiguration. (:pr:`11003`, :pr:`10281`, :pr:`10983`, :pr:`10973`)\n\n****\nSYCL\n****\n\nContinuing the work on the SYCL integration, there are significant improvements in the\nfeature coverage for this release from more training parameters and more objectives to\ndistributed training, along with various optimization (:pr:`10884`, :pr:`10883`).\n\nStarting with 3.0, the SYCL-plugin is close to feature-complete, users can start working\non SYCL devices for in-core training and inference. Newly introduced features include:\n\n- Dask support for distributed training (:pr:`10812`)\n\n- Various training procedures, including split evaluation (:pr:`10605`, :pr:`10636`), grow policy\n  (:pr:`10690`, :pr:`10681`), cached prediction (:pr:`10701`).\n\n- Updates for objective functions. (:pr:`11029`, :pr:`10931`, :pr:`11016`, :pr:`10993`, :pr:`11064`, :pr:`10325`)\n\n- On-going work for float32-only devices.  (:pr:`10702`)\n\nOther related PRs (:pr:`10842`, :pr:`10543`, :pr:`10806`, :pr:`10943`, :pr:`10987`, :pr:`10548`, :pr:`10922`, :pr:`10898`, :pr:`10576`)\n\n.. _3_0_features:\n\n********\nFeatures\n********\n\nThis section describes new features in the XGBoost core. For language-specific features,\nplease visit corresponding sections.\n\n- A new initialization method for objectives that are derived from GLM. The new method is\n  based on the mean value of the input labels. The new method changes the result of the\n  estimated ``base_score``. (:pr:`10298`, :pr:`11331`)\n\n- The :py:class:`xgboost.QuantileDMatrix` can be used with all prediction types for both\n  CPU and GPU.\n\n- In prior releases, XGBoost makes a copy for the booster to release memory held by\n  internal tree methods. We formalize the procedure into a new booster method\n  :py:meth:`~xgboost.Booster.reset` / :cpp:func:`XGBoosterReset`. (:pr:`11042`)\n\n- OpenMP thread setting is exposed to the XGBoost global configuration. Users can use it\n  to workaround hardcoded OpenMP environment variables. (:pr:`11175`)\n\n- We improved learning to rank tasks for better hyper-parameter configuration and for\n  distributed training.\n\n  + In 3.0, all three distributed interfaces, including Dask, Spark, and PySpark, support\n    sorting the data based on query ID. The option for the\n    :py:class:`~xgboost.dask.DaskXGBRanker` is true by default and can be opted\n    out. (:pr:`11146`, :pr:`11007`, :pr:`11047`, :pr:`11012`, :pr:`10823`, :pr:`11023`)\n\n  + Also for learning to rank, a new parameter ``lambdarank_score_normalization`` is\n    introduced to make one of the normalizations optional. (:pr:`11272`)\n\n  + The ``lambdarank_normalization`` now uses the number of pairs when normalizing the\n    ``mean`` pair strategy. Previously, the gradient was used for both ``topk`` and\n    ``mean``. :pr:`11322`\n\n- We have improved GPU quantile sketching to reduce memory usage. The improvement helps\n  the construction of the :py:class:`~xgboost.QuantileDMatrix` and the new\n  :py:class:`~xgboost.ExtMemQuantileDMatrix`.\n\n  + A new multi-level sketching algorithm is employed to reduce the overall memory usage\n    with batched inputs.\n  + In addition to algorithmic changes, internal memory usage estimation and the quantile\n    container is also updated. (:pr:`10761`, :pr:`10843`)\n  + The change introduces two more parameters for the :py:class:`~xgboost.QuantileDMatrix`\n    and :py:class:`~xgboost.DataIter`, namely, ``max_quantile_batches`` and\n    ``min_cache_page_bytes``.\n\n- More work is needed to improve the support of categorical features. This release\n  supports plotting trees with stat for categorical nodes (:pr:`11053`). In addition, some\n  preparation work is ongoing for auto re-coding categories. (:pr:`11094`, :pr:`11114`,\n  :pr:`11089`) These are feature enhancements instead of blocking issues.\n- Implement weight-based feature importance for vector-leaf. (:pr:`10700`)\n- Reduced logging in the DMatrix construction. (:pr:`11080`)\n\n.. _3_0_optimization:\n\n************\nOptimization\n************\n\nIn addition to the external memory and quantile sketching improvements, we have a number\nof optimizations and performance fixes.\n\n- GPU tree methods now use significantly less memory for both dense inputs and near-dense\n  inputs. (:pr:`10821`, :pr:`10870`)\n- For near-dense inputs, GPU training is much faster for both ``hist`` (about 2x) and\n  ``approx``.\n- Quantile regression on CPU now can handle imbalance trees much more efficiently. (:pr:`11275`)\n- Small optimization for DMatrix construction to reduce latency. Also, C users can now\n  reuse the :cpp:func:`ProxyDMatrix <XGProxyDMatrixCreate()>` for multiple inference\n  calls. (:pr:`11273`)\n- CPU prediction performance for :py:class:`~xgboost.QuantileDMatrix` has been improved\n  (:pr:`11139`) and now is on par with normal ``DMatrix``.\n- Fixed a performance issue for running inference using CPU with extremely sparse\n  :py:class:`~xgboost.QuantileDMatrix` (:pr:`11250`).\n- Optimize CPU training memory allocation for improved performance. (:pr:`11112`)\n- Improved RMM (rapids memory manager) integration. Now, with the help of\n  :py:func:`~xgboost.config_context`, all memory allocated by XGBoost should be routed to\n  RMM. As a bonus, all ``thrust`` algorithms now use async policy. (:pr:`10873`, :pr:`11173`, :pr:`10712`,\n  :pr:`10712`, :pr:`10562`)\n- When used without RMM, XGBoost is more careful with its use of caching allocator to\n  avoid holding too much device memory. (:pr:`10582`)\n\n****************\nBreaking Changes\n****************\nThis section lists breaking changes that affect all packages.\n\n- Remove the deprecated ``DeviceQuantileDMatrix``. (:pr:`10974`, :pr:`10491`)\n- Support for saving the model in the ``deprecated`` has been removed. Users can still\n  load old models in 3.0. (:pr:`10490`)\n- Support for the legacy (blocking) CUDA stream is removed (:pr:`10607`)\n- XGBoost now requires CUDA 12.0 or later.\n\n*********\nBug Fixes\n*********\n- Fix the quantile error metric (pinball loss) with multiple quantiles. (:pr:`11279`)\n- Fix potential access error when running prediction in multi-thread environment. (:pr:`11167`)\n- Check the correct dump format for the ``gblinear``. (:pr:`10831`)\n\n*************\nDocumentation\n*************\n- A new tutorial for advanced usage with custom objective functions. (:pr:`10283`, :pr:`10725`)\n- The new online document site now shows documents for all packages including Python, R,\n  and JVM-based packages. (:pr:`11240`, :pr:`11216`, :pr:`11166`)\n- Lots of enhancements. (:pr:`10822`, 11137, :pr:`11138`, :pr:`11246`, :pr:`11266`, :pr:`11253`, :pr:`10731`, :pr:`11222`,\n  :pr:`10551`, :pr:`10533`)\n- Consistent use of cmake in documents. (:pr:`10717`)\n- Add a brief description for using the ``offset`` from the GLM setting (like\n  ``Poisson``). (:pr:`10996`)\n- Cleanup document for building from source. (:pr:`11145`)\n- Various fixes. (:pr:`10412`, :pr:`10405`, :pr:`10353`, :pr:`10464`, :pr:`10587`, :pr:`10350`, :pr:`11131`, :pr:`10815`)\n- Maintenance. (:pr:`11052`, :pr:`10380`)\n\n**************\nPython Package\n**************\n\n- The ``feature_weights`` parameter in the sklearn interface is now defined as\n  a scikit-learn parameter. (:pr:`9506`)\n- Initial support for polars, categorical feature is not yet supported. (:pr:`11126`, :pr:`11172`,\n  :pr:`11116`)\n- Reduce pandas dataframe overhead and overhead for various imports. (:pr:`11058`, :pr:`11068`)\n- Better xlabel in :py:func:`~xgboost.plot_importance` (:pr:`11009`)\n- Validate reference dataset for training. The :py:func:`~xgboost.train` function now\n  throws an error if a :py:class:`~xgboost.QuantileDMatrix` is used as a validation\n  dataset without a reference. (:pr:`11105`)\n- Fix misleading errors when feature names are missing during inference (:pr:`10814`)\n- Add Stacklevel to Python warning callback. The change helps improve the error message\n  for the Python package. (:pr:`10977`)\n- Remove circular reference in DataIter. It helps reduce memory usage. (:pr:`11177`)\n- Add checks for invalid inputs for `cv`. (:pr:`11255`)\n- Update Python project classifiers. (:pr:`10381`, :pr:`11028`)\n- Support doc link for the sklearn module. Users can now find links to documents in a\n  jupyter notebook. (:pr:`10287`)\n\n- Dask\n\n  + Prevent the training from hanging due to aborted workers. (:pr:`10985`) This helps\n    Dask XGBoost be robust against error. When a worker is killed, the training will fail\n    with an exception instead of hang.\n  + Optional support for client-side logging. (:pr:`10942`)\n  + Fix LTR with empty partition and NCCL error. (:pr:`11152`)\n  + Update to work with the latest Dask. (:pr:`11291`)\n  + See the :ref:`3_0_features` section for changes to ranking models.\n  + See the :ref:`3_0_networking` section for changes with the communication module.\n\n- PySpark\n\n  + Expose Training and Validation Metrics. (:pr:`11133`)\n  + Add barrier before initializing the communicator. (:pr:`10938`)\n  + Extend support for columnar input to CPU (GPU-only previously). (:pr:`11299`)\n  + See the :ref:`3_0_features` section for changes to ranking models.\n  + See the :ref:`3_0_networking` section for changes with the communication module.\n\n- Document updates (:pr:`11265`).\n- Maintenance. (:pr:`11071`, :pr:`11211`, :pr:`10837`, :pr:`10754`, :pr:`10347`, :pr:`10678`, :pr:`11002`, :pr:`10692`, :pr:`11006`,\n  :pr:`10972`, :pr:`10907`, :pr:`10659`, :pr:`10358`, :pr:`11149`, :pr:`11178`, :pr:`11248`)\n\n- Breaking changes\n\n  + Remove deprecated `feval`. (:pr:`11051`)\n  + Remove dask from the default import. (:pr:`10935`) Users are now required to import the\n    XGBoost Dask through:\n\n    .. code-block:: python\n\n       from xgboost import dask as dxgb\n\n    instead of:\n\n    .. code-block:: python\n\n       import xgboost as xgb\n       xgb.dask\n\n    The change helps avoid introducing dask into the default import set.\n\n  + Bump Python requirement to 3.10. (:pr:`10434`)\n  + Drop support for datatable. (:pr:`11070`)\n\n*********\nR Package\n*********\n\nWe have been reworking the R package for a few releases now. In 3.0, we will start\npublishing a new R package on R-universe, before moving toward a CRAN update. The new\npackage features a much more ergonomic interface, which is also more idiomatic to R\nspeakers. In addition, a range of new features are introduced to the package. To name a\nfew, the new package includes categorical feature support, ``QuantileDMatrix``, and an\ninitial implementation of the external memory training. To test the new package:\n\n.. code-block:: R\n\n  install.packages('xgboost', repos = c('https://dmlc.r-universe.dev', 'https://cloud.r-project.org'))\n\nAlso, we finally have an online documentation site for the R package featuring both\nvignettes and API references (:pr:`11166`, :pr:`11257`). A good starting point for the new interface\nis the new ``xgboost()`` function. We won't list all the feature gains here, as there are\ntoo many! Please visit the :doc:`/R-package/index` for more info. There's a migration\nguide (:pr:`11197`) there if you use a previous XGBoost R package version.\n\n- Support for the MSVC build was dropped due to incompatibility with R headers. (:pr:`10355`,\n  :pr:`11150`)\n- Maintenance (:pr:`11259`)\n- Related PRs. (:pr:`11171`, :pr:`11231`, :pr:`11223`, :pr:`11073`, :pr:`11224`, :pr:`11076`, :pr:`11084`, :pr:`11081`,\n  :pr:`11072`, :pr:`11170`, :pr:`11123`, :pr:`11168`, :pr:`11264`, :pr:`11140`, :pr:`11117`, :pr:`11104`, :pr:`11095`, :pr:`11125`, :pr:`11124`,\n  :pr:`11122`, :pr:`11108`, :pr:`11102`, :pr:`11101`, :pr:`11100`, :pr:`11077`, :pr:`11099`, :pr:`11074`, :pr:`11065`, :pr:`11092`, :pr:`11090`,\n  :pr:`11096`, :pr:`11148`, :pr:`11151`, :pr:`11159`, :pr:`11204`, :pr:`11254`, :pr:`11109`, :pr:`11141`, :pr:`10798`, :pr:`10743`, :pr:`10849`,\n  :pr:`10747`, :pr:`11022`, :pr:`10989`, :pr:`11026`, :pr:`11060`, :pr:`11059`, :pr:`11041`, :pr:`11043`, :pr:`11025`, :pr:`10674`, :pr:`10727`,\n  :pr:`10745`, :pr:`10733`, :pr:`10750`, :pr:`10749`, :pr:`10744`, :pr:`10794`, :pr:`10330`, :pr:`10698`, :pr:`10687`, :pr:`10688`, :pr:`10654`,\n  :pr:`10456`, :pr:`10556`, :pr:`10465`, :pr:`10337`)\n\n************\nJVM Packages\n************\n\nThe XGBoost 3.0 release features a significant update to the JVM packages, and in\nparticular, the Spark package. There are breaking changes in packaging and some\nparameters. Please visit the :doc:`migration guide </jvm/xgboost_spark_migration>` for\nrelated changes. The work brings new features and a more unified feature set between CPU\nand GPU implementation. (:pr:`10639`, :pr:`10833`, :pr:`10845`, :pr:`10847`, :pr:`10635`, :pr:`10630`, :pr:`11179`, :pr:`11184`)\n\n- Automatic partitioning for distributed learning to rank. See the :ref:`features\n  <3_0_features>` section above (:pr:`11023`).\n- Resolve spark compatibility issue (:pr:`10917`)\n- Support missing value when constructing dmatrix with iterator (:pr:`10628`)\n- Fix transform performance issue (:pr:`10925`)\n- Honor skip.native.build option in xgboost4j-gpu (:pr:`10496`)\n- Support array features type for CPU (:pr:`10937`)\n- Change default missing value to ``NaN`` for better alignment (:pr:`11225`)\n- Don't cast to float if it's already float (:pr:`10386`)\n- Maintenance. (:pr:`10982`, :pr:`10979`, :pr:`10978`, :pr:`10673`, :pr:`10660`, :pr:`10835`, :pr:`10836`, :pr:`10857`, :pr:`10618`,\n  :pr:`10627`)\n\n***********\nMaintenance\n***********\n\nCode maintenance includes both refactoring (:pr:`10531`, :pr:`10573`, :pr:`11069`), cleanups (:pr:`11129`,\n:pr:`10878`, :pr:`11244`, :pr:`10401`, :pr:`10502`, :pr:`11107`, :pr:`11097`, :pr:`11130`, :pr:`10758`, :pr:`10923`, :pr:`10541`, :pr:`10990`),\nand improvements for tests (:pr:`10611`, :pr:`10658`, :pr:`10583`, :pr:`11245`, :pr:`10708`), along with fixing\nvarious warnings in compilers and test dependencies (:pr:`10757`, :pr:`10641`, :pr:`11062`,\n:pr:`11226`). Also, miscellaneous updates, including some dev scripts and profiling annotations\n(:pr:`10485`, :pr:`10657`, :pr:`10854`, :pr:`10718`, :pr:`11158`, :pr:`10697`, :pr:`11276`).\n\nLastly, dependency updates (:pr:`10362`, :pr:`10363`, :pr:`10360`, :pr:`10373`, :pr:`10377`, :pr:`10368`, :pr:`10369`,\n:pr:`10366`, :pr:`11032`, :pr:`11037`, :pr:`11036`, :pr:`11035`, :pr:`11034`, :pr:`10518`, :pr:`10536`, :pr:`10586`, :pr:`10585`, :pr:`10458`,\n:pr:`10547`, :pr:`10429`, :pr:`10517`, :pr:`10497`, :pr:`10588`, :pr:`10975`, :pr:`10971`, :pr:`10970`, :pr:`10949`, :pr:`10947`, :pr:`10863`,\n:pr:`10953`, :pr:`10954`, :pr:`10951`, :pr:`10590`, :pr:`10600`, :pr:`10599`, :pr:`10535`, :pr:`10516`, :pr:`10786`, :pr:`10859`, :pr:`10785`,\n:pr:`10779`, :pr:`10790`, :pr:`10777`, :pr:`10855`, :pr:`10848`, :pr:`10778`, :pr:`10772`, :pr:`10771`, :pr:`10862`, :pr:`10952`, :pr:`10768`,\n:pr:`10770`, :pr:`10769`, :pr:`10664`, :pr:`10663`, :pr:`10892`, :pr:`10979`, :pr:`10978`).\n\n***\nCI\n***\n\n- The CI is reworked to use `RunsOn` to integrate custom CI pipelines with GitHub\n  action. The migration helps us reduce the maintenance burden and make the CI\n  configuration more accessible to others. (:pr:`11001`, :pr:`11079`, :pr:`10649`, :pr:`11196`, :pr:`11055`,\n  :pr:`10483`, :pr:`11078`, :pr:`11157`)\n\n- Other maintenance work includes various small fixes, enhancements, and tooling\n  updates. (:pr:`10877`, :pr:`10494`, :pr:`10351`, :pr:`10609`, :pr:`11192`, :pr:`11188`, :pr:`11142`, :pr:`10730`, :pr:`11066`,\n  :pr:`11063`, :pr:`10800`, :pr:`10995`, :pr:`10858`, :pr:`10685`, :pr:`10593`, :pr:`11061`)\n"
  },
  {
    "path": "doc/changes/v3.1.0.rst",
    "content": "#################################\n3.1.3 Patch Release (Jan 08 2026)\n#################################\n\n- Scikit-learn 1.8 compatibility fix (:pr:`11858`)\n- Add ARM CUDA wheels for PyPI. (:pr:`11827`) Add nccl as dep for aarch64. (:pr:`11753`)\n- [R] Fix off-by-one bug: nrounds=0 resulted in 2 iterations :pr:`11856`\n- [R] Fix mingw warnings, winbuilder check warnings, memory safety issues. (:pr:`11859`, :pr:`11847`, :pr:`11830`, :pr:`11906`)\n- Avoid overflow in rounding estimation. (:pr:`11910`)\n- Workaround compiler issue on Windows, affects the use of max_delta_step with CUDA. (:pr:`11916`)\n\n#################################\n3.1.2 Patch Release (Nov 20 2025)\n#################################\n\n- Fix loading nccl 2.28. (:pr:`11806`)\n- Fix ordering of Python callbacks. (:pr:`11812`)\n- Infer the ``enable_categorical`` during model load. (:pr:`11816`)\n\n#################################\n3.1.1 Patch Release (Oct 22 2025)\n#################################\n\n- Emit correct error when performing inplace-predict using a CPU-only version of XGBoost,\n  but with a GPU input. (:pr:`11761`)\n- Enhance the error message for loading the removed binary model format. (:pr:`11760`)\n- Use the correct group ID for SHAP when the intercept is a vector. (:pr:`11764`)\n\n###################\n3.1.0 (2025 Sep 22)\n###################\n\nWe are delighted to share the latest 3.1.0 update for XGBoost.\n\n********************\nCategorical Re-coder\n********************\n\nThis release features a major update to categorical data support by introducing a\nre-coder. This re-coder saves categories in the trained model and re-codes the data during\ninference, to keep the categorical encoding consistent. Aside from primitive types like\nintegers, it also supports string-based categories. The implementation works with all\nsupported Python DataFrame implementations. (:pr:`11609`, :pr:`11665`, :pr:`11605`,\n:pr:`11628`, :pr:`11598`, :pr:`11591`, :pr:`11568`, :pr:`11561`, :pr:`11650`, :pr:`11621`,\n:pr:`11611`, :pr:`11313`, :pr:`11311`, :pr:`11310`, :pr:`11315`, :pr:`11303`, :pr:`11612`,\n:pr:`11098`, :pr:`11347`) See :ref:`cat-recode` for more information. (:pr:`11297`)\n\nIn addition, categorical support for Polars data frames is now available (:pr:`11565`).\n\nLastly, we removed the experimental tag for categorical feature support in this\nrelease. (:pr:`11690`)\n\n***************\nExternal Memory\n***************\n\nWe continue the work on external memory support on 3.1. In this release, XGBoost features\nan adaptive cache for CUDA external memory. The improved cache can split the data between\nCPU memory and GPU memory according to the underlying hardware and data\nsize. (:pr:`11556`, :pr:`11465`, :pr:`11664`, :pr:`11594`, :pr:`11469`, :pr:`11547`,\n:pr:`11339`, :pr:`11477`, :pr:`11453`, :pr:`11446`, :pr:`11458`, :pr:`11426`, :pr:`11566`,\n:pr:`11497`)\n\nAlso, there's an optional support (opt-in) for using ``nvcomp`` and the GB200\ndecompression engine to handle sparse data (requires nvcomp as a plugin) (:pr:`11451`,\n:pr:`11464`, :pr:`11460`, :pr:`11512`, :pr:`11520`). We improved the memory usage of\nquantile sketching with external memory (:pr:`11641`) and optimized the predictor for\ntraining (:pr:`11548`). To help ensure the training performance, the latest XGBoost\nfeatures detection for NUMA (Non-Uniform Memory Access) node (:pr:`11538`, :pr:`11576`) for checking cross-socket data\naccess. We are working on additional tooling to enhance NUMA node performance. Aside from\nfeatures, we have also added various documentation improvements. (:pr:`11412`,\n:pr:`11631`)\n\nLastly, external memory support with text file input has been removed\n(:pr:`11562`). Moving forward, we will focus on iterator inputs.\n\n\n****************************\nMulti-Target/Class Intercept\n****************************\n\nStarting with 3.1, the base-score (intercept) is estimated and stored as a vector when the\nmodel has multiple outputs, be it multi-target regression or multi-class\nclassification. This change enhances the initial estimation for multi-output models and\nwill be the starting point for future work on vector-leaf. (:pr:`11277`, :pr:`11651`,\n:pr:`11625`, :pr:`11649`, :pr:`11630`, :pr:`11647`, :pr:`11656`, :pr:`11663`)\n\n********\nFeatures\n********\n\n- Support leaf prediction with QDM on CPU. (:pr:`11620`)\n- Improve seed with mean sampling for the first iteration. (:pr:`11639`)\n- Optionally include git hash in CMake build. (:pr:`11587`)\n\n****************************\nRemoving Deprecated Features\n****************************\n\nThis version removes some deprecated features, notably, the binary IO format, along with\nfeatures deprecated in 2.0.\n\n- Binary serialization format has been removed in 3.1. The format has been formally\n  deprecated in `1.6 <https://github.com/dmlc/xgboost/issues/7547>`__. (:pr:`11307`,\n  :pr:`11553`, :pr:`11552`, :pr:`11602`)\n\n- Removed old GPU-related parameters including ``use_gpu`` (pyspark), ``gpu_id``,\n  ``gpu_hist``, and ``gpu_coord_descent``. These parameters have been deprecated in\n  2.0. Use the ``device`` parameter instead. (:pr:`11395`, :pr:`11554`, :pr:`11549`,\n  :pr:`11543`, :pr:`11539`, :pr:`11402`)\n\n- Remove deprecated C functions: ``XGDMatrixCreateFromCSREx``,\n  ``XGDMatrixCreateFromCSCEx``. (:pr:`11514`, :pr:`11513`)\n\n- XGBoost starts emit warning for text inputs. (:pr:`11590`)\n\n\n*************\nOptimizations\n*************\n\n- Optimize CPU inference with Array-Based Tree Traversal (:pr:`11519`)\n- Specialize for GPU dense histogram. (:pr:`11443`)\n- [sycl] Improve L1 cache locality for histogram building. (:pr:`11555`)\n- [sycl] Reduce predictor memory consumption and improve L2 locality (:pr:`11603`)\n\n*****\nFixes\n*****\n\n- Fix static linking C++ libraries on macOS (:pr:`11522`)\n- Rename param.hh/cc to hist_param.hh/cc to fix xcode build (:pr:`11378`)\n- [sycl] Fix build with updated compiler (:pr:`11618`)\n- [sycl] Various fixes for fp32-only devices. (:pr:`11527`, :pr:`11524`)\n- Fix compilation on android older than API 26 (:pr:`11366`)\n- Fix loading Gamma model from 1.3. (:pr:`11377`)\n\n**************\nPython Package\n**************\n\n- Support mixing Python metrics and built-in metrics for the skl interface. (:pr:`11536`)\n- CUDA 13 Support for PyPI with the new ``xgboost-cu13`` package. (:pr:`11677`, :pr:`11662`)\n- Remove wheels for manylinux2014. (:pr:`11673`)\n- Initial support for building variant wheels (:pr:`11531`, :pr:`11645`, :pr:`11294`)\n- Minimum PySpark version is now set to 3.4 (:pr:`11364`). In addition, the PySpark\n  interface now checks the validation indicator column type and has a fix for None column\n  input. (:pr:`11535`, :pr:`11523`)\n- [dask] Small cleanup for the predict function. (:pr:`11423`)\n\n*********\nR Package\n*********\n\nNow that most of the deprecated features have been removed in this release, we will try to\nbring the latest R package back to CRAN.\n\n- Implement Booster reset. (:pr:`11357`)\n- Improvements for documentation, including having code examples in XGBoost's sphinx\n  documentation side, and notes for R-universe release. (:pr:`11369`, :pr:`11410`,\n  :pr:`11685`, :pr:`11316`)\n\n************\nJVM Packages\n************\n\n- Support columnar inputs for cpu pipeline (:pr:`11352`)\n- Rewrite the `LabeledPoint` as a Java class (:pr:`11545`)\n- Various fixes and document updates. (:pr:`11525`, :pr:`11508`, :pr:`11489`, :pr:`11682`)\n\n*********\nDocuments\n*********\n\nChanges for general documentation:\n\n- Update notes about GPU memory usage. (:pr:`11375`)\n- Various fixes and updates. (:pr:`11503`, :pr:`11532`, :pr:`11328`, :pr:`11344`, :pr:`11626`)\n\n\n******************\nCI and Maintenance\n******************\n\n- Code cleanups. (:pr:`11367`, :pr:`11342`, :pr:`11658`, :pr:`11528`, :pr:`11585`,\n  :pr:`11672`, :pr:`11642`, :pr:`11667`, :pr:`11495`, :pr:`11567`)\n- Various cleanup and fixes for tests. (:pr:`11405`, :pr:`11389`, :pr:`11396`, :pr:`11456`)\n- Support CMake 4.0 (:pr:`11382`)\n- Various CI updates and fixes (:pr:`11318`, :pr:`11349`, :pr:`11653`, :pr:`11637`,\n  :pr:`11683`, :pr:`11638`, :pr:`11644`, :pr:`11306`, :pr:`11560`, :pr:`11323`, :pr:`11617`,\n  :pr:`11341`, :pr:`11693`)\n"
  },
  {
    "path": "doc/changes/v3.2.0.rst",
    "content": "###################\n3.2.0 (2026 Feb 09)\n###################\n\nWe are excited to announce the XGBoost 3.2 release. This release features significant\nprogress on multi-target tree support with vector leaf, enhanced GPU external memory\ntraining, various optimizations, and the removal of the deprecated CLI.\n\n***************\nExternal Memory\n***************\n\nThe latest XGBoost release features enhanced support for external memory training with\nGPUs. XGBoost has experimental support for using the CUDA async memory pool, which users\ncan opt in to enable asynchronous memory management for efficient external memory\ntraining. Prior to 3.2, the RMM plugin was required. The feature is Linux-only at the\nmoment. (:pr:`11706`, :pr:`11715`, :pr:`11718`, :pr:`11931`, :pr:`11865`, :pr:`11959`,\n:pr:`11962`)\n\nThe adaptive cache is now used for all device types, including devices with full C2C\nbandwidth, like GH200 and DGX station. Users can continue to specify the\n``cache_host_ratio`` parameter in case of memory fragmentation. XGBoost now supports\ndevices with mixed GPU models for configuring the host cache (:pr:`11998`). As part of the\nwork for improved NUMA system support, we co-developed the ``pyhwloc`` project\n(:pr:`11992`).\n\nLastly, the old page-concat option for GPU external memory has been removed. XGBoost will\nuse the full dataset for training. (:pr:`11882`, :pr:`11897`)\n\n******************\nMulti-Target/Class\n******************\n\nThis release brings substantial progress on the vector-leaf-based multi-target tree model,\nbuilding on the multi-target intercept work from 3.1. The vector leaf tree stores a vector\nof weights in each leaf node, enabling the model to capture correlations across targets\nduring tree construction. In 3.2, we expanded the feature set to cover most of the\ncommonly used training configurations.\n\n.. warning::\n\n   The vector leaf is still a work in progress. Feedback is welcome.\n\nNew features for the multi-target tree include:\n\n- Reduced gradient (sketch boost) for the hist tree method, which avoids using the full\n  gradient matrix to find tree structures for improving scalability with the number of\n  targets. Users can use a custom objective to define the tree split gradient in addition\n  to the full leaf gradient. Built-in objectives are not yet supported.\n- Support for all regression objectives, including MAE and the quantile loss.\n- GPU ``hist`` tree method implementation has features on par with the CPU one.\n- Regularization parameters including L1/L2, ``min_split_loss``, and ``max_delta_step``.\n- Row subsampling with both uniform sampling and gradient-based sampling.\n- Column sampling (feature selection), including feature weights.\n- Feature importance variants (gain and coverage).\n- Model dump support for all formats (JSON, text, graphviz).\n- External memory.\n\nIn addition, intercept initialization for the multinomial logistic objective now adheres\nto GLM semantics.\n\nRelated PRs: :pr:`11950`, :pr:`11914`, :pr:`11913`, :pr:`11965`, :pr:`11941`, :pr:`11967`,\n:pr:`11940`, :pr:`11896`, :pr:`11894`, :pr:`11889`, :pr:`11917`, :pr:`11883`, :pr:`11786`,\n:pr:`11881`, :pr:`11862`, :pr:`11855`, :pr:`11829`, :pr:`11825`, :pr:`11820`, :pr:`11814`,\n:pr:`11729`, :pr:`11724`, :pr:`11747`, :pr:`11798`, :pr:`11791`, :pr:`11789`, :pr:`11781`,\n:pr:`11778`, :pr:`11777`, :pr:`11744`, :pr:`11922`, :pr:`11920`\n\nCurrently missing features for the ``hist`` tree method with vector leaf:\n\n- Distributed training\n- Categorical features\n- Feature interaction constraints\n- Monotone constraints, which are not defined when the output is a vector.\n- Shapley values\n\n********\nFeatures\n********\n\n- As part of the vector leaf work, CPU ``hist`` now supports gradient-based sampling.\n- The deprecated CLI (command line interface) has been removed. It was deprecated in\n  2.1. (:pr:`11720`)\n- Expose the categories container to the C API, allowing C users to access category\n  information from the trained model. (:pr:`11794`)\n- Upgrade to CUDA 12.9. (:pr:`11972`, :pr:`11968`)\n- Support oneapi 2026 release. (:pr:`11994`)\n- Compatibility fixes for the latest versions of nvcomp, RMM, and CCCL. (:pr:`11930`,\n  :pr:`11834`, :pr:`11871`, :pr:`11995`, :pr:`11861`, :pr:`11785`, :pr:`11997`). A nightly\n  CI pipeline was added to test XGBoost with the latest versions of CCCL and\n  RMM. (:pr:`11863`)\n\n*************\nOptimizations\n*************\n\n- Various optimizations for the GPU hist tree method, some of which were done as part of\n  the vector leaf work. (:pr:`11895`)\n- Enable multi-threaded data initialization for CPU. (:pr:`11974`)\n- Make the ``block_size`` of the CPU histogram building kernel adaptive based on model\n  parameters and CPU cache size, demonstrating up to 2x speedup for certain\n  workloads. (:pr:`11808`)\n- Small optimizations for some GPU kernels to use TMA. (:pr:`11841`, :pr:`11802`)\n- We now use device memory for storing the tree model, which eliminates data copies\n  between host and device during training and inference. (:pr:`11759`, :pr:`11735`, :pr:`11750`, :pr:`11741`,\n  :pr:`11752`)\n\n*****\nFixes\n*****\n\n- Fix logistic regression with constant labels. (:pr:`11973`)\n- Fix OpenMP configuration for macOS. (:pr:`11976`)\n- Fix SYCL build. (:pr:`11844`)\n\n**************\nPython Package\n**************\n\n- Fix memory leak with Python DataFrame inputs where temporary buffers were stored as\n  class variables instead of instance variables. (:pr:`11961`)\n- Pandas 3.0 support. (:pr:`11975`)\n- Add Python type hints for tests and demos, various type hint fixes. (:pr:`11795`, :pr:`11797`)\n- Add Python 3.14 classifier. (:pr:`11793`)\n- Maintenance (:pr:`11717`, :pr:`11783`)\n\n*********\nR Package\n*********\n\n- Fix RCHK warnings and memory safety issues. (:pr:`11938`, :pr:`11935`, :pr:`11847`)\n- Error out on factors passed to ``DMatrix`` with an informative message. (:pr:`11810`)\n- Remove calls to R's global RNG that are no longer needed. (:pr:`11848`, :pr:`11887`)\n- Various documentation fixes and updates. (:pr:`11773`, :pr:`11890`, :pr:`11732`, :pr:`11846`, :pr:`11981`, :pr:`11842`)\n\n************\nJVM Packages\n************\n\n- Remove ``synchronized`` from predict, as internal prediction is already thread-safe,\n  with a concurrency test added to verify. (:pr:`11746`)\n- Set GPU device ID explicitly at the beginning of training and avoid CUDA API guard for\n  the tracker process, allowing Spark executors to run in exclusive mode. (:pr:`11939`, :pr:`11929`)\n- Use ``inferBatchSizeParameter`` instead of a hardcoded value. (:pr:`11745`)\n- Documentation updates, maintenance. (:pr:`11691`, :pr:`11915`, :pr:`11743`)\n\n*********\nDocuments\n*********\n\n- Update references from XGBoost Operator to Kubeflow Trainer. (:pr:`11710`)\n- Document the categories container and add notes for handling unseen categories. (:pr:`11788`, :pr:`11868`, :pr:`11774`)\n- Add Intel as sponsor. (:pr:`11850`)\n\n******************\nCI and Maintenance\n******************\n\n- Support ``pre-commit`` for various linting and formatting tasks. ``clang-format`` is now\n  required by the CI. (:pr:`11984`, :pr:`11978`, :pr:`11980`, :pr:`11958`, :pr:`11953`, :pr:`11946`, :pr:`11993`)\n- We added sccache integration to XGBoost's CI workflows, which brings significant\n  speedup since a majority of the time is spent on compiling variants of XGBoost. In addition,\n  most of the workflows now use GHA container support. (:pr:`11956`, :pr:`11952`, :pr:`11949`, :pr:`11937`,\n  :pr:`11934`, :pr:`11927`, :pr:`11932`, :pr:`11924`, :pr:`11979`)\n- Plenty of optimizations for tests. (:pr:`11990`, :pr:`11975`, :pr:`11964`)\n- Various dependency updates, fixes, test refactoring, and cleanups. (:pr:`11955`, :pr:`11957`,\n  :pr:`11963`, :pr:`11945`, :pr:`11912`, :pr:`11909`, :pr:`11888`, :pr:`11898`, :pr:`11925`, :pr:`11877`, :pr:`11824`, :pr:`11748`, :pr:`11721`,\n  :pr:`11705`, :pr:`11699`, :pr:`11832`, :pr:`11796`, :pr:`11828`, :pr:`11852`, :pr:`11800`, :pr:`11999`, :pr:`11991`)\n"
  },
  {
    "path": "doc/conf.py",
    "content": "\"\"\"Sphinx configuration.\n\nSee `doc/contrib/docs.rst <https://xgboost.readthedocs.io/en/stable/contrib/docs.html>`__\nfor more info.\n\"\"\"\n\nimport os\nimport shutil\nimport subprocess\nimport sys\nimport tarfile\nimport urllib.request\nimport warnings\nfrom urllib.error import HTTPError\n\nCURR_PATH = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))\nPROJECT_ROOT = os.path.normpath(os.path.join(CURR_PATH, os.path.pardir))\nTMP_DIR = os.path.join(CURR_PATH, \"tmp\")\nDOX_DIR = \"doxygen\"\n\n# Directly load the source module.\nsys.path.append(os.path.join(PROJECT_ROOT, \"python-package\"))\n# Tell xgboost to not load the libxgboost.so\nos.environ[\"XGBOOST_BUILD_DOC\"] = \"1\"\n\n# Version information.\nimport xgboost  # NOQA\n\nversion = xgboost.__version__\nrelease = xgboost.__version__\n\n\n# Document is uploaded to here by the CI builder.\nS3_BUCKET = \"https://xgboost-docs.s3.us-west-2.amazonaws.com\"\n\n\ndef run_doxygen() -> None:\n    \"\"\"Run the doxygen make command in the designated folder.\"\"\"\n    curdir = os.path.normpath(os.path.abspath(os.path.curdir))\n    if os.path.exists(TMP_DIR):\n        print(f\"Delete directory {TMP_DIR}\")\n        shutil.rmtree(TMP_DIR)\n    else:\n        print(f\"Create directory {TMP_DIR}\")\n        os.mkdir(TMP_DIR)\n    try:\n        os.chdir(PROJECT_ROOT)\n        if not os.path.exists(DOX_DIR):\n            os.mkdir(DOX_DIR)\n        os.chdir(os.path.join(PROJECT_ROOT, DOX_DIR))\n        print(\n            \"Build doxygen at {}\".format(\n                os.path.join(PROJECT_ROOT, DOX_DIR, \"doc_doxygen\")\n            )\n        )\n        subprocess.check_call([\"cmake\", \"..\", \"-DBUILD_C_DOC=ON\", \"-GNinja\"])\n        subprocess.check_call([\"ninja\", \"doc_doxygen\"])\n\n        src = os.path.join(PROJECT_ROOT, DOX_DIR, \"doc_doxygen\", \"html\")\n        dest = os.path.join(TMP_DIR, \"dev\")\n        print(f\"Copy directory {src} -> {dest}\")\n        shutil.copytree(src, dest)\n    except OSError as e:\n        sys.stderr.write(\"doxygen execution failed: %s\" % e)\n    finally:\n        os.chdir(curdir)\n\n\ndef get_branch() -> str:\n    \"\"\"Guess the git branch.\"\"\"\n    branch = os.getenv(\"READTHEDOCS_VERSION_NAME\", default=None)\n    print(f\"READTHEDOCS_VERSION_NAME = {branch}\")\n\n    def is_id():\n        try:\n            return str(int(branch)) == branch\n        except ValueError:\n            return False\n\n    if not branch:  # Not in RTD\n        branch = \"master\"  # use the master branch as the default.\n    elif branch == \"latest\":\n        branch = \"master\"\n    elif branch.startswith(\"release_\"):\n        pass  # release branch, like: release_2.1.0\n    elif branch == \"stable\":\n        # Avoid patch release branch.\n        v = xgboost.__version__.split(\".\")\n        branch = f\"release_{v[0]}.{v[1]}.0\"\n    elif is_id():\n        # Likely PR branch\n        branch = f\"PR-{branch}\"\n    else:  # other dmlc branches.\n        pass\n    print(f\"branch = {branch}\")\n    return branch\n\n\ndef get_sha(branch: str) -> str | None:\n    sha = os.getenv(\"READTHEDOCS_GIT_COMMIT_HASH\", default=None)\n    if sha is not None:\n        return sha\n\n    if branch == \"master\":\n        res = subprocess.run([\"git\", \"rev-parse\", \"master\"], stdout=subprocess.PIPE)\n    else:\n        res = subprocess.run([\"git\", \"rev-parse\", \"HEAD\"], stdout=subprocess.PIPE)\n    if res.returncode != 0:\n        return None\n    return res.stdout.decode(\"utf-8\").strip()\n\n\ndef download_jvm_docs() -> None:\n    \"\"\"Fetch docs for the JVM packages\"\"\"\n    print(\"Download JVM documents from S3.\")\n    branch = get_branch()\n    commit = get_sha(branch)\n    if commit is None:\n        print(\"Couldn't find commit to build jvm docs.\")\n        return\n\n    def try_fetch_jvm_doc(branch: str) -> bool:\n        \"\"\"\n        Attempt to fetch JVM docs for a given branch.\n        Returns True if successful\n        \"\"\"\n        try:\n            local_jvm_docs = os.environ.get(\"XGBOOST_JVM_DOCS\", None)\n            url = f\"{S3_BUCKET}/{branch}/{commit}/{branch}.tar.bz2\"\n            if local_jvm_docs is not None:\n                local_jvm_docs = os.path.expanduser(local_jvm_docs)\n\n            if local_jvm_docs is not None and os.path.exists(local_jvm_docs):\n                # Reuse an existing tarball.\n                filename = local_jvm_docs\n            elif local_jvm_docs is not None:\n                # Download to local_jvm_docs for future reuse.\n                filename, _ = urllib.request.urlretrieve(url, filename=local_jvm_docs)\n                print(f\"Finished: {url} -> {filename}\")\n            else:\n                filename, _ = urllib.request.urlretrieve(url)\n                print(f\"Finished: {url} -> {filename}\")\n            if not os.path.exists(TMP_DIR):\n                os.mkdir(TMP_DIR)\n            jvm_doc_dir = os.path.join(TMP_DIR, \"jvm_docs\")\n            if os.path.exists(jvm_doc_dir):\n                shutil.rmtree(jvm_doc_dir)\n            os.mkdir(jvm_doc_dir)\n\n            with tarfile.open(filename, \"r:bz2\") as t:\n                t.extractall(jvm_doc_dir)\n            return True\n        except HTTPError:\n            print(f\"JVM doc not found at {url}. Skipping...\")\n            return False\n\n    if not try_fetch_jvm_doc(branch):\n        print(\"Falling back to the master branch.\")\n        try_fetch_jvm_doc(\"master\")\n\n\ndef download_r_docs() -> None:\n    \"\"\"Fetch R document from s3.\"\"\"\n    branch = get_branch()\n    commit = get_sha(branch)\n    print(\"Download R documents from S3.\")\n    if commit is None:\n        print(\"Couldn't find commit to build R docs.\")\n        return\n\n    def try_fetch_r_doc(branch: str) -> bool:\n        try:\n            local_r_docs = os.environ.get(\"XGBOOST_R_DOCS\", None)\n            url = f\"{S3_BUCKET}/{branch}/{commit}/r-docs-{branch}.tar.bz2\"\n            if local_r_docs is not None:\n                local_r_docs = os.path.expanduser(local_r_docs)\n\n            if local_r_docs is not None and os.path.exists(local_r_docs):\n                # Reuse an existing tarball.\n                filename = local_r_docs\n            elif local_r_docs is not None:\n                filename, _ = urllib.request.urlretrieve(url, filename=local_r_docs)\n                print(f\"Finished: {url} -> {filename}\")\n            else:\n                filename, _ = urllib.request.urlretrieve(url)\n                print(f\"Finished: {url} -> {filename}\")\n\n            if not os.path.exists(TMP_DIR):\n                os.mkdir(TMP_DIR)\n            r_doc_dir = os.path.join(TMP_DIR, \"r_docs\")\n            if os.path.exists(r_doc_dir):\n                shutil.rmtree(r_doc_dir)\n            os.mkdir(r_doc_dir)\n\n            with tarfile.open(filename, \"r:bz2\") as t:\n                t.extractall(r_doc_dir)\n\n            for root, subdir, files in os.walk(\n                os.path.join(r_doc_dir, \"doc\", \"R-package\")\n            ):\n                for f in files:\n                    assert f.endswith(\".md\")\n                    src = os.path.join(root, f)\n                    dst = os.path.join(PROJECT_ROOT, \"doc\", \"R-package\", f)\n                    shutil.move(src, dst)\n            return True\n        except HTTPError:\n            print(f\"R doc not found at {url}. Falling back to the master branch.\")\n            return False\n\n    if not try_fetch_r_doc(branch):\n        try_fetch_r_doc(\"master\")\n\n\ndef is_readthedocs_build():\n    if os.environ.get(\"READTHEDOCS\", None) == \"True\":\n        return True\n    warnings.warn(\n        \"Skipping Doxygen build... You won't have documentation for C/C++ functions. \"\n        \"Set environment variable READTHEDOCS=True if you want to build Doxygen. \"\n        \"(If you do opt in, make sure to install Doxygen, Graphviz, CMake, and C++ compiler \"\n        \"on your system.)\"\n    )\n    return False\n\n\nif is_readthedocs_build():\n    run_doxygen()\n    download_jvm_docs()\n    download_r_docs()\n\n\n# If extensions (or modules to document with autodoc) are in another directory,\n# add these directories to sys.path here. If the directory is relative to the\n# documentation root, use os.path.abspath to make it absolute, like shown here.\nlibpath = os.path.join(PROJECT_ROOT, \"python-package/\")\nsys.path.insert(0, libpath)\nsys.path.insert(0, CURR_PATH)\n\n# -- General configuration ------------------------------------------------\n\n# General information about the project.\nproject = \"xgboost\"\nauthor = \"%s developers\" % project\ncopyright = \"2025, %s\" % author\ngithub_doc_root = \"https://github.com/dmlc/xgboost/tree/master/doc/\"\n\n# Add any Sphinx extension module names here, as strings. They can be\n# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones\nextensions = [\n    \"matplotlib.sphinxext.plot_directive\",\n    \"sphinxcontrib.jquery\",\n    \"sphinx.ext.autodoc\",\n    \"sphinx.ext.napoleon\",\n    \"sphinx.ext.mathjax\",\n    \"sphinx.ext.intersphinx\",\n    \"sphinx_gallery.gen_gallery\",\n    \"sphinx_issues\",\n    \"sphinx_tabs.tabs\",\n    \"breathe\",\n    \"myst_parser\",\n]\n\nsphinx_gallery_conf = {\n    # path to your example scripts\n    \"examples_dirs\": [\n        \"../demo/guide-python\",\n        \"../demo/dask\",\n        \"../demo/aft_survival\",\n        \"../demo/rmm_plugin\",\n    ],\n    # path to where to save gallery generated output\n    \"gallery_dirs\": [\n        \"python/examples\",\n        \"python/dask-examples\",\n        \"python/survival-examples\",\n        \"python/rmm-examples\",\n    ],\n    \"matplotlib_animations\": True,\n}\n\n# Sphinx-issues configuration\n# Path to GitHub repo {group}/{project}  (note that `group` is the GitHub user or organization)\nissues_github_path = \"dmlc/xgboost\"\n\nautodoc_typehints = \"description\"\n\ngraphviz_output_format = \"png\"\nplot_formats = [(\"svg\", 300), (\"png\", 100), (\"hires.png\", 300)]\nplot_html_show_source_link = False\nplot_html_show_formats = False\n\n# Breathe extension variables\nbreathe_projects = {}\nif is_readthedocs_build():\n    breathe_projects = {\n        \"xgboost\": os.path.join(PROJECT_ROOT, DOX_DIR, \"doc_doxygen/xml\")\n    }\nbreathe_default_project = \"xgboost\"\n\n# Add any paths that contain templates here, relative to this directory.\ntemplates_path = [\"_templates\"]\n\n# The suffix(es) of source filenames.\n# You can specify multiple suffix as a list of string:\nsource_suffix = [\".rst\", \".md\"]\n\n# The encoding of source files.\n# source_encoding = 'utf-8-sig'\n\n# The master toctree document.\nmaster_doc = \"index\"\n\n# The language for content autogenerated by Sphinx. Refer to documentation\n# for a list of supported languages.\n#\n# This is also used if you do content translation via gettext catalogs.\n# Usually you set \"language\" from the command line for these cases.\nlanguage = \"en\"\n\nautoclass_content = \"both\"\n\n# There are two options for replacing |today|: either, you set today to some\n# non-false value, then it is used:\n# today = ''\n# Else, today_fmt is used as the format for a strftime call.\n# today_fmt = '%B %d, %Y'\n\n# List of patterns, relative to source directory, that match files and\n# directories to ignore when looking for source files.\nexclude_patterns = [\"_build\"]\nhtml_extra_path = []\nif is_readthedocs_build():\n    html_extra_path = [TMP_DIR]\n\n# The reST default role (used for this markup: `text`) to use for all\n# documents.\n# default_role = None\n\n# If true, '()' will be appended to :func: etc. cross-reference text.\n# add_function_parentheses = True\n\n# If true, the current module name will be prepended to all description\n# unit titles (such as .. function::).\n# add_module_names = True\n\n# If true, sectionauthor and moduleauthor directives will be shown in the\n# output. They are ignored by default.\n# show_authors = False\n\n# The name of the Pygments (syntax highlighting) style to use.\npygments_style = \"sphinx\"\n\n# A list of ignored prefixes for module index sorting.\n# modindex_common_prefix = []\n\n# If true, keep warnings as \"system message\" paragraphs in the built documents.\n# keep_warnings = False\n\n# If true, `todo` and `todoList` produce output, else they produce nothing.\ntodo_include_todos = False\n\n# -- Options for HTML output ----------------------------------------------\n\n# The theme to use for HTML and HTML Help pages.  See the documentation for\n# a list of builtin themes.\nhtml_theme = \"sphinx_rtd_theme\"\nhtml_theme_options = {\"logo_only\": True}\n\n\nhtml_logo = \"https://xgboost.ai/images/logo/xgboost-logo.png\"\n\nhtml_css_files = [\"css/custom.css\"]\n\nhtml_sidebars = {\"**\": [\"logo-text.html\", \"globaltoc.html\", \"searchbox.html\"]}\n\n# Add any paths that contain custom static files (such as style sheets) here,\n# relative to this directory. They are copied after the builtin static files,\n# so a file named \"default.css\" will overwrite the builtin \"default.css\".\nhtml_static_path = [\"_static\"]\n\n# Output file base name for HTML help builder.\nhtmlhelp_basename = project + \"doc\"\n\n# -- Options for LaTeX output ---------------------------------------------\nlatex_elements = {}\n\n# Grouping the document tree into LaTeX files. List of tuples\n# (source start file, target name, title,\n#  author, documentclass [howto, manual, or own class]).\nlatex_documents = [\n    (master_doc, \"%s.tex\" % project, project, author, \"manual\"),\n]\n\nintersphinx_mapping = {\n    \"python\": (\"https://docs.python.org/3.10\", None),\n    \"numpy\": (\"https://numpy.org/doc/stable/\", None),\n    \"scipy\": (\"https://docs.scipy.org/doc/scipy/\", None),\n    \"pandas\": (\"https://pandas.pydata.org/pandas-docs/stable/\", None),\n    \"sklearn\": (\"https://scikit-learn.org/stable\", None),\n    \"dask\": (\"https://docs.dask.org/en/stable/\", None),\n    \"distributed\": (\"https://distributed.dask.org/en/stable/\", None),\n    \"pyspark\": (\"https://spark.apache.org/docs/latest/api/python/\", None),\n    \"rmm\": (\"https://docs.rapids.ai/api/rmm/nightly/\", None),\n}\n\n\ndef setup(app):\n    app.add_css_file(\"custom.css\")\n"
  },
  {
    "path": "doc/contrib/ci.rst",
    "content": "####################################\nAutomated testing in XGBoost project\n####################################\n\nThis document collects tips for using the Continuous Integration (CI) service of the XGBoost\nproject.\n\n**Contents**\n\n.. contents::\n  :backlinks: none\n  :local:\n\n****************\nTips for testing\n****************\n\n=======\nR tests\n=======\n\n------------------------------------\nRunning R tests with ``noLD`` option\n------------------------------------\n\nYou can run R tests using a custom-built R with compilation flag\n``--disable-long-double``. See `this page <https://blog.r-hub.io/2019/05/21/nold/>`_ for more\ndetails about noLD. This is a requirement for keeping XGBoost on CRAN (the R package index).\nUnlike other tests, this test must be invoked manually. Simply add a review comment\n``/gha run r-nold-test`` to a pull request to kick off the test.\n(Ordinary comment won't work. It needs to be a review comment.)\n\n---------------------------------\nUsing container images from r-hub\n---------------------------------\n\nThe r-hub project `provides <https://github.com/r-hub/containers>`__ a list of container\n`images <https://r-hub.github.io/containers/>`__ for reproducing CRAN environments.\n\n\n===============================\nMaking changes to CI containers\n===============================\nMany of the CI pipelines use Docker containers to ensure consistent testing environment\nwith a variety of software packages. We have a separate repo,\n`dmlc/xgboost-devops <https://github.com/dmlc/xgboost-devops>`_, to host the logic for\nbuilding and publishing CI containers.\n\nTo make changes to the CI container, carry out the following steps:\n\n1. Identify which container needs updating. Example:\n   ``492475357299.dkr.ecr.us-west-2.amazonaws.com/xgb-ci.gpu:main``\n2. Clone `dmlc/xgboost-devops <https://github.com/dmlc/xgboost-devops>`_ and make changes to the\n   corresponding Dockerfile. Example: ``containers/dockerfile/Dockerfile.gpu``.\n3. Locally build the container, to ensure that the container successfully builds.\n   Consult :ref:`build_run_docker_locally` for this step.\n4. Submit a pull request to `dmlc/xgboost-devops <https://github.com/dmlc/xgboost-devops>`_ with\n   the proposed changes to the Dockerfile. Make note of the pull request number. Example: ``#204``\n5. Clone `dmlc/xgboost <https://github.com/dmlc/xgboost>`_. Locate the file\n   ``ops/pipeline/get-image-tag.sh``, which should have a single line\n\n   .. code-block:: bash\n\n     IMAGE_TAG=main\n\n   To use the new container, revise the file as follows:\n\n   .. code-block:: bash\n\n     IMAGE_TAG=PR-XX\n\n   where ``XX`` is the pull request number. E.g. ``PR-204``.\n\n6. Now submit a pull request to `dmlc/xgboost <https://github.com/dmlc/xgboost>`_. The CI will\n   run tests using the new container. Verify that all tests pass.\n7. Merge the pull request in ``dmlc/xgboost-devops``. Wait until the CI completes on the ``main`` branch.\n8. Go back to the the pull request for ``dmlc/xgboost`` and change ``ops/pipeline/get-image-tag.sh``\n   back to ``IMAGE_TAG=main``.\n9. Merge the pull request in ``dmlc/xgboost``.\n\n.. _build_run_docker_locally:\n\n===========================================\nReproducing CI testing environments locally\n===========================================\nYou can reproduce the same testing environment as the CI pipelines by building and running Docker\ncontainers locally.\n\n**Prerequisites**\n\n1. Install Docker: https://docs.docker.com/engine/install/ubuntu/\n2. Install NVIDIA Docker runtime:\n   https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html.\n   The runtime lets you access NVIDIA GPUs inside a Docker container.\n\n---------------------------\nTo build a Docker container\n---------------------------\nClone the repository `dmlc/xgboost-devops <https://github.com/dmlc/xgboost-devops>`_\nand invoke ``containers/docker_build.sh`` as follows:\n\n.. code-block:: bash\n\n  # The following env vars are only relevant for CI\n  # For local testing, set them to \"main\"\n  export GITHUB_SHA=\"main\"\n  export BRANCH_NAME=\"main\"\n  bash containers/docker_build.sh IMAGE_REPO\n\nwhere ``IMAGE_REPO`` is the name of the container image. The wrapper script will look up the\nYAML file ``containers/ci_container.yml``. For example, when ``IMAGE_REPO`` is set to\n``xgb-ci.gpu``, the script will use the corresponding entry from\n``containers/ci_container.yml``:\n\n.. code-block:: yaml\n\n  xgb-ci.gpu:\n    container_def: gpu\n    build_args:\n      CUDA_VERSION_ARG: \"12.4.1\"\n      NCCL_VERSION_ARG: \"2.23.4-1\"\n      RAPIDS_VERSION_ARG: \"24.10\"\n\nThe ``container_def`` entry indicates where the Dockerfile is located. The container\ndefinition will be fetched from ``containers/dockerfile/Dockerfile.CONTAINER_DEF`` where\n``CONTAINER_DEF`` is the value of ``container_def`` entry. In this example, the Dockerfile\nis ``containers/dockerfile/Dockerfile.gpu``.\n\nThe ``build_args`` entry lists all the build arguments for the Docker build. In this example,\nthe build arguments are:\n\n.. code-block::\n\n  --build-arg CUDA_VERSION_ARG=12.4.1 --build-arg NCCL_VERSION_ARG=2.23.4-1 \\\n    --build-arg RAPIDS_VERSION_ARG=24.10\n\nThe build arguments provide inputs to the ``ARG`` instructions in the Dockerfile.\n\nWhen ``containers/docker_build.sh`` completes, you will have access to the container with the\n(fully qualified) URI ``492475357299.dkr.ecr.us-west-2.amazonaws.com/[image_repo]:main``.\nThe prefix ``492475357299.dkr.ecr.us-west-2.amazonaws.com/`` was added so that\nthe container could later be uploaded to AWS Elastic Container Registry (ECR),\na private Docker registry.\n\n-----------------------------------------\nTo run commands within a Docker container\n-----------------------------------------\nInvoke ``ops/docker_run.py`` from the main ``dmlc/xgboost`` repo as follows:\n\n.. code-block:: bash\n\n  python3 ops/docker_run.py \\\n    --image-uri 492475357299.dkr.ecr.us-west-2.amazonaws.com/[image_repo]:[image_tag] \\\n    [--use-gpus] \\\n    -- \"command to run inside the container\"\n\nwhere ``--use-gpus`` should be specified to expose NVIDIA GPUs to the Docker container.\n\nFor example:\n\n.. code-block:: bash\n\n  # Run without GPU\n  python3 ops/docker_run.py \\\n    --image-uri 492475357299.dkr.ecr.us-west-2.amazonaws.com/xgb-ci.cpu:main \\\n    -- bash ops/pipeline/build-cpu-impl.sh cpu\n\n  # Run with NVIDIA GPU\n  python3 ops/docker_run.py \\\n    --image-uri 492475357299.dkr.ecr.us-west-2.amazonaws.com/xgb-ci.gpu:main \\\n    --use-gpus \\\n    -- bash ops/pipeline/test-python-wheel.sh gpu\n\nOptionally, you can specify ``--run-args`` to pass extra arguments to ``docker run``:\n\n.. code-block:: bash\n\n  # Allocate extra space in /dev/shm to enable NCCL\n  # Also run the container with elevated privileges\n  python3 ops/docker_run.py \\\n    --image-uri 492475357299.dkr.ecr.us-west-2.amazonaws.com/xgb-ci.gpu:main \\\n    --use-gpus \\\n    --run-args='--shm-size=4g --privileged' \\\n    -- bash ops/pipeline/test-python-wheel.sh gpu\n\nSee :ref:`ci_container_infra` to read about how containers are built and managed in the CI pipelines.\n\n--------------------------------------------\nExamples: useful tasks for local development\n--------------------------------------------\n\n* Build XGBoost with GPU support + package it as a Python wheel\n\n  .. code-block:: bash\n\n    export DOCKER_REGISTRY=492475357299.dkr.ecr.us-west-2.amazonaws.com\n    python3 ops/docker_run.py \\\n      --image-uri ${DOCKER_REGISTRY}/xgb-ci.gpu_build_rockylinux8:main \\\n      -- ops/pipeline/build-cuda-impl.sh\n\n* Build XGBoost with GPU support on Linux ARM64\n\n  .. code-block:: bash\n\n    export DOCKER_REGISTRY=492475357299.dkr.ecr.us-west-2.amazonaws.com\n    python3 ops/docker_run.py \\\n      --image-uri ${DOCKER_REGISTRY}/xgb-ci.gpu_build_rockylinux8_aarch64:main \\\n      -- ops/pipeline/build-cuda-impl.sh\n\n* Run Python tests\n\n  .. code-block:: bash\n\n    export DOCKER_REGISTRY=492475357299.dkr.ecr.us-west-2.amazonaws.com\n    python3 ops/docker_run.py \\\n      --image-uri ${DOCKER_REGISTRY}/xgb-ci.cpu:main \\\n      -- ops/pipeline/test-python-wheel.sh cpu\n\n* Run Python tests with GPU algorithm\n\n  .. code-block:: bash\n\n    export DOCKER_REGISTRY=492475357299.dkr.ecr.us-west-2.amazonaws.com\n    python3 ops/docker_run.py \\\n      --image-uri ${DOCKER_REGISTRY}/xgb-ci.gpu:main \\\n      --use-gpus \\\n      -- ops/pipeline/test-python-wheel.sh gpu\n\n* Run Python tests with GPU algorithm on Linux ARM64\n\n  .. code-block:: bash\n\n    export DOCKER_REGISTRY=492475357299.dkr.ecr.us-west-2.amazonaws.com\n    python3 ops/docker_run.py \\\n      --image-uri ${DOCKER_REGISTRY}/xgb-ci.gpu_aarch64:main \\\n      --use-gpus \\\n      -- ops/pipeline/test-python-wheel.sh gpu-arm64\n\n* Run Python tests with GPU algorithm, with multiple GPUs\n\n  .. code-block:: bash\n\n    export DOCKER_REGISTRY=492475357299.dkr.ecr.us-west-2.amazonaws.com\n    python3 ops/docker_run.py \\\n      --image-uri ${DOCKER_REGISTRY}/xgb-ci.gpu:main \\\n      --use-gpus \\\n      --run-args='--shm-size=4g' \\\n      -- ops/pipeline/test-python-wheel.sh mgpu\n      # --shm-size=4g is needed for multi-GPU algorithms to function\n\n* Build and test JVM packages\n\n  .. code-block:: bash\n\n    export DOCKER_REGISTRY=492475357299.dkr.ecr.us-west-2.amazonaws.com\n    export SCALA_VERSION=2.12  # Specify Scala version (2.12 or 2.13)\n    python3 ops/docker_run.py \\\n      --image-uri ${DOCKER_REGISTRY}/xgb-ci.jvm:main \\\n      --run-args \"-e SCALA_VERSION\" \\\n      -- ops/pipeline/build-test-jvm-packages-impl.sh\n\n* Build and test JVM packages, with GPU support\n\n  .. code-block:: bash\n\n    export DOCKER_REGISTRY=492475357299.dkr.ecr.us-west-2.amazonaws.com\n    export SCALA_VERSION=2.12  # Specify Scala version (2.12 or 2.13)\n    export USE_CUDA=1\n    python3 ops/docker_run.py \\\n      --image-uri ${DOCKER_REGISTRY}/xgb-ci.jvm_gpu_build:main \\\n      --use-gpus \\\n      --run-args \"-e SCALA_VERSION -e USE_CUDA --shm-size=4g\" \\\n      -- ops/pipeline/build-test-jvm-packages-impl.sh\n      # --shm-size=4g is needed for multi-GPU algorithms to function\n\n*****************************\nTour of the CI infrastructure\n*****************************\n\n==============\nGitHub Actions\n==============\nWe make the extensive use of `GitHub Actions <https://github.com/features/actions>`_ to host our\nCI pipelines. Most of the tests listed in the configuration files run automatically for every\nincoming pull requests and every update to branches.\n\n===============================\nSelf-Hosted Runners with RunsOn\n===============================\n`RunsOn <https://runs-on.com/>`_ is a SaaS (Software as a Service) app that lets us to easily create\nself-hosted runners to use with GitHub Actions pipelines. RunsOn uses\n`Amazon Web Services (AWS) <https://aws.amazon.com/>`_ under the hood to provision runners with\naccess to various amount of CPUs, memory, and NVIDIA GPUs. Thanks to this app, we are able to test\nGPU-accelerated and distributed algorithms of XGBoost while using the familar interface of\nGitHub Actions.\n\nIn GitHub Actions, jobs run on Microsoft-hosted runners by default.\nTo opt into self-hosted runners (enabled by RunsOn), we use the following special syntax:\n\n.. code-block:: yaml\n\n  runs-on:\n    - runs-on\n    - runner=runner-name\n    - run-id=${{ github.run_id }}\n    - tag=[unique tag that uniquely identifies the job in the GH Action workflow]\n\nwhere the runner is defined in ``.github/runs-on.yml``.\nFor CUDA-enabled ARM64 builds and tests we rely on the ``linux-arm64-gpu`` runner,\nwhich provisions a Graviton + NVIDIA GPU instance.\n\n===================================================================\nThe Lay of the Land: how CI pipelines are organized in the codebase\n===================================================================\nThe XGBoost project stores the configuration for its CI pipelines as part of the codebase.\nThe git repository therefore stores not only the change history for its source code but also\nthe change history for the CI pipelines.\n\nThe CI pipelines are organized into the following directories and files:\n\n* ``.github/workflows/``: Definition of CI pipelines, using the GitHub Actions syntax\n* ``.github/runs-on.yml``: Configuration for the RunsOn service. Specifies the spec for\n  the self-hosted CI runners.\n* ``ops/conda_env/``: Definitions for Conda environments\n* ``ops/patch/``: Patch files\n* ``ops/pipeline/``: Shell scripts defining CI/CD pipelines. Most of these scripts can be run\n  locally (to assist with development and debugging); a few must run in the CI.\n* ``ops/script/``: Various utility scripts useful for testing\n* ``ops/docker_run.py``: Wrapper script to run commands inside a container\n\nTo inspect a given CI pipeline, inspect files in the following order:\n\n.. plot::\n  :nofigs:\n\n  from graphviz import Source\n  source = r\"\"\"\n    digraph ci_graph {\n      graph [fontname = \"monospace\"];\n      node [fontname = \"monospace\"];\n      edge [fontname = \"monospace\"];\n      0 [label=<.github/workflows/*.yml>, shape=box];\n      1 [label=<ops/pipeline/*.sh>, shape=box];\n      2 [label=<ops/pipeline/*-impl.sh>, shape=box];\n      3 [label=<ops/script/*.sh>, shape=box];\n      0 -> 1 [xlabel=\"Calls\"];\n      1 -> 2 [xlabel=\"Calls,\\nvia docker_run.py\"];\n      2 -> 3 [xlabel=\"Calls\"];\n      1 -> 3 [xlabel=\"Calls\"];\n    }\n  \"\"\"\n  Source(source, format='png').render('../_static/ci_graph', view=False)\n  Source(source, format='svg').render('../_static/ci_graph', view=False)\n\n.. figure:: ../_static/ci_graph.svg\n   :align: center\n   :figwidth: 80 %\n\nMany of the CI pipelines use Docker containers to ensure consistent testing environment\nwith a variety of software packages. We have a separate repo,\n`dmlc/xgboost-devops <https://github.com/dmlc/xgboost-devops>`_, that\nhosts the code for building the CI containers. The repository is organized as follows:\n\n* ``actions/``: Custom actions to be used with GitHub Actions. See :ref:`custom_actions`\n  for more details.\n* ``containers/dockerfile/``: Dockerfiles to define containers\n* ``containers/ci_container.yml``: Defines the mapping between Dockerfiles and containers.\n  Also specifies the build arguments to be used with each container.\n* ``containers/docker_build.{py,sh}``: Wrapper scripts to build and test CI containers.\n* ``vm_images/``: Defines bootstrap scripts to build VM images for Amazon EC2. See\n  :ref:`vm_images` to learn about how VM images relate to container images.\n\nSee :ref:`build_run_docker_locally` to learn about the utility scripts for building and\nusing containers.\n\n===========================================\nArtifact sharing between jobs via Amazon S3\n===========================================\n\nWe make artifacts from one workflow job available to another job, by uploading the\nartifacts to `Amazon S3 <https://aws.amazon.com/s3/>`_. In the CI, we utilize the\nscript ``ops/pipeline/manage-artifacts.py`` to coordinate artifact sharing.\n\n**To upload files to S3**: In the workflow YAML, add the following lines:\n\n.. code-block:: yaml\n\n  - name: Upload files to S3\n    run: |\n      REMOTE_PREFIX=\"remote directory to place the artifact(s)\"\n      python3 ops/pipeline/manage-artifacts.py upload \\\n        --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \\\n        --prefix cache/${{ github.run_id }}/${REMOTE_PREFIX} \\\n        path/to/file\n\nThe ``--prefix`` argument specifies the remote directory in which the artifact(s)\nshould be placed. The artifact(s) will be placed in\n``s3://{RUNS_ON_S3_BUCKET_CACHE}/cache/{GITHUB_RUN_ID}/{REMOTE_PREFIX}/``\nwhere ``RUNS_ON_S3_BUCKET_CACHE`` and ``GITHUB_RUN_ID`` are set by the CI.\n\nYou can upload multiple files, possibly with wildcard globbing:\n\n.. code-block:: yaml\n\n  - name: Upload files to S3\n    run: |\n      python3 ops/pipeline/manage-artifacts.py upload \\\n        --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \\\n        --prefix cache/${{ github.run_id }}/build-cuda \\\n        build/testxgboost python-package/dist/*.whl\n\n**To download files from S3**: In the workflow YAML, add the following lines:\n\n.. code-block:: yaml\n\n  - name: Download files from S3\n    run: |\n      REMOTE_PREFIX=\"remote directory where the artifact(s) were placed\"\n      python3 ops/pipeline/manage-artifacts.py download \\\n        --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \\\n        --prefix cache/${{ github.run_id }}/${REMOTE_PREFIX} \\\n        --dest-dir path/to/destination_directory \\\n        artifacts\n\nYou can also use the wildcard globbing. The script will locate all artifacts\nunder the given prefix that matches the wildcard pattern.\n\n.. code-block:: yaml\n\n  - name: Download files from S3\n    run: |\n      # Locate all artifacts with name *.whl under prefix\n      # cache/${GITHUB_RUN_ID}/${REMOTE_PREFIX} and\n      # download them to wheelhouse/.\n      python3 ops/pipeline/manage-artifacts.py download \\\n        --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \\\n        --prefix cache/${{ github.run_id }}/${REMOTE_PREFIX} \\\n        --dest-dir wheelhouse/ \\\n        *.whl\n\n.. _custom_actions:\n\n=================================\nCustom actions for GitHub Actions\n=================================\n\nXGBoost implements a few custom\n`composite actions <https://docs.github.com/en/actions/sharing-automations/creating-actions/creating-a-composite-action>`_\nto reduce duplicated code within workflow YAML files. The custom actions are hosted in a separate repository,\n`dmlc/xgboost-devops <https://github.com/dmlc/xgboost-devops>`_, to make it easy to test changes to the custom actions in\na pull request or a fork.\n\nIn a workflow file, we'd refer to ``dmlc/xgboost-devops/actions/{custom-action}@main``. For example:\n\n.. code-block:: yaml\n\n  - uses: dmlc/xgboost-devops/actions/miniforge-setup@main\n    with:\n      environment-name: cpp_test\n      environment-file: ops/conda_env/cpp_test.yml\n\nEach custom action consists of two components:\n\n* Main script (``dmlc/xgboost-devops/actions/{custom-action}/action.yml``): dispatches to a specific version\n  of the implementation script (see the next item). The main script clones ``xgboost-devops`` from\n  a specified fork at a particular ref, allowing us to easily test changes to the custom action.\n* Implementation script (``dmlc/xgboost-devops/actions/impls/{custom-action}/action.yml``): Implements the\n  custom script.\n\nThis design was inspired by Mike Sarahan's work in\n`rapidsai/shared-actions <https://github.com/rapidsai/shared-actions>`_.\n\n\n.. _ci_container_infra:\n\n=============================================================\nInfra for building and publishing CI containers and VM images\n=============================================================\n\n--------------------------\nNotes on Docker containers\n--------------------------\n**CI pipeline for containers**\n\nThe `dmlc/xgboost-devops <https://github.com/dmlc/xgboost-devops>`_ repo hosts a CI pipeline to build new\nDocker containers at a regular schedule. New containers are built in the following occasions:\n\n* New commits are added to the ``main`` branch of ``dmlc/xgboost-devops``.\n* New pull requests are submitted to ``dmlc/xgboost-devops``.\n* Every week, at a set day and hour.\n\nThis setup ensures that the CI containers remain up-to-date.\n\n**How wrapper scripts work**\n\nThe wrapper scripts ``docker_build.sh``, ``docker_build.py`` (in ``dmlc/xgboost-devops``) and ``docker_run.py``\n(in ``dmlc/xgboost``) are designed to transparently log what commands are being carried out under the hood.\nFor example, when you run ``bash containers/docker_build.sh xgb-ci.gpu``, the logs will show the following:\n\n.. code-block:: bash\n\n  # docker_build.sh calls docker_build.py...\n  python3 containers/docker_build.py --container-def gpu \\\n    --image-uri 492475357299.dkr.ecr.us-west-2.amazonaws.com/xgb-ci.gpu:main \\\n    --build-arg CUDA_VERSION_ARG=12.4.1 --build-arg NCCL_VERSION_ARG=2.23.4-1 \\\n    --build-arg RAPIDS_VERSION_ARG=24.10\n\n  ...\n\n  # .. and docker_build.py in turn calls \"docker build\"...\n  docker build --build-arg CUDA_VERSION_ARG=12.4.1 \\\n    --build-arg NCCL_VERSION_ARG=2.23.4-1 \\\n    --build-arg RAPIDS_VERSION_ARG=24.10 \\\n    --load --progress=plain \\\n    --ulimit nofile=1024000:1024000 \\\n    -t 492475357299.dkr.ecr.us-west-2.amazonaws.com/xgb-ci.gpu:main \\\n    -f containers/dockerfile/Dockerfile.gpu \\\n    containers/\n\nThe logs come in handy when debugging the container builds.\n\nHere is an example with ``docker_run.py``:\n\n.. code-block:: bash\n\n  # Run without GPU\n  python3 ops/docker_run.py \\\n    --image-uri 492475357299.dkr.ecr.us-west-2.amazonaws.com/xgb-ci.cpu:main \\\n    -- bash ops/pipeline/build-cpu-impl.sh cpu\n\n  # Run with NVIDIA GPU\n  # Allocate extra space in /dev/shm to enable NCCL\n  # Also run the container with elevated privileges\n  python3 ops/docker_run.py \\\n    --image-uri 492475357299.dkr.ecr.us-west-2.amazonaws.com/xgb-ci.gpu:main \\\n    --use-gpus \\\n    --run-args='--shm-size=4g --privileged' \\\n    -- bash ops/pipeline/test-python-wheel.sh gpu\n\nwhich are translated to the following ``docker run`` invocations:\n\n.. code-block:: bash\n\n  docker run --rm --pid=host \\\n    -w /workspace -v /path/to/xgboost:/workspace \\\n    -e CI_BUILD_UID=<uid> -e CI_BUILD_USER=<user_name> \\\n    -e CI_BUILD_GID=<gid> -e CI_BUILD_GROUP=<group_name> \\\n    492475357299.dkr.ecr.us-west-2.amazonaws.com/xgb-ci.cpu:main \\\n    bash ops/pipeline/build-cpu-impl.sh cpu\n\n  docker run --rm --pid=host --gpus all \\\n    -w /workspace -v /path/to/xgboost:/workspace \\\n    -e CI_BUILD_UID=<uid> -e CI_BUILD_USER=<user_name> \\\n    -e CI_BUILD_GID=<gid> -e CI_BUILD_GROUP=<group_name> \\\n    --shm-size=4g --privileged \\\n    492475357299.dkr.ecr.us-west-2.amazonaws.com/xgb-ci.gpu:main \\\n    bash ops/pipeline/test-python-wheel.sh gpu\n\n\n.. _vm_images:\n\n------------------\nNotes on VM images\n------------------\n\nIn the ``vm_images/`` directory of `dmlc/xgboost-devops <https://github.com/dmlc/xgboost-devops>`_,\nwe define Packer scripts to build images for Virtual Machines (VM) on\n`Amazon EC2 <https://aws.amazon.com/ec2/>`_.\nThe VM image contains the minimal set of drivers and system software that are needed to\nrun the containers.\n\nWe update container images much more often than VM images. Whereas it takes only 10 minutes to\nbuild a new container image, it takes 1-2 hours to build a new VM image.\n\nTo enable quick development iteration cycle, we place the most of\nthe development environment in containers and keep VM images small.\nPackages need for testing should be baked into containers, not VM images.\nDevelopers can make changes to containers and see the results of the changes quickly.\n\n.. note:: Special note for the Windows platform\n\n  We do not use containers when testing XGBoost on Windows. All software must be baked into\n  the VM image. Containers are not used because\n  `NVIDIA Container Toolkit <https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/index.html>`_\n  does not yet support Windows natively.\n\nThe `dmlc/xgboost-devops <https://github.com/dmlc/xgboost-devops>`_ repo hosts a CI pipeline to build new\nVM images at a regular schedule (currently monthly).\n"
  },
  {
    "path": "doc/contrib/coding_guide.rst",
    "content": "################\nCoding Guideline\n################\n\n**Contents**\n\n.. contents::\n  :backlinks: none\n  :local:\n\n********************\nC++ Coding Guideline\n********************\n- Follow `Google style for C++ <https://google.github.io/styleguide/cppguide.html>`_, with two exceptions:\n\n  * Each line of text may contain up to 100 characters.\n  * The use of C++ exceptions is allowed.\n\n- Use C++17 features such as smart pointers, braced initializers, lambda functions, and ``std::thread``.\n- Use Doxygen to document all the interface code.\n- We have some comments around symbols imported by headers, some of those are hinted by `include-what-you-use <https://include-what-you-use.org>`_. It's not required.\n- We use clang-tidy. Its configuration lives in the root directory of the XGBoost source tree.\n- We have a series of automatic checks to ensure that all of our codebase complies with the Google style. Before submitting your pull request, you are encouraged to run the style checks on your machine. See :ref:`running_checks_locally`.\n\n***********************\nPython Coding Guideline\n***********************\n- Follow `PEP 8: Style Guide for Python Code <https://www.python.org/dev/peps/pep-0008/>`_. We use Pylint to automatically enforce PEP 8 style across our Python codebase. Before submitting your pull request, you are encouraged to run Pylint on your machine. See :ref:`running_checks_locally`.\n- Docstrings should be in `NumPy docstring format <https://numpydoc.readthedocs.io/en/latest/format.html>`_.\n\n.. _running_checks_locally:\n\n******************\nR Coding Guideline\n******************\n\nCode Style\n==========\n- We follow Google's C++ Style guide for C++ code.\n\n  - This is mainly to be consistent with the rest of the project.\n  - Another reason is we will be able to check style automatically with a linter.\n\n- When needed, you can disable the linter warning of certain line with ``// NOLINT(*)`` comments.\n- We use `roxygen <https://cran.r-project.org/web/packages/roxygen2/vignettes/roxygen2.html>`_ for documenting the R package.\n\nRmarkdown Vignettes\n===================\nRmarkdown vignettes are placed in `R-package/vignettes <https://github.com/dmlc/xgboost/tree/master/R-package/vignettes>`_.\nThese Rmarkdown files are not compiled. We host the compiled version on `doc/R-package <https://github.com/dmlc/xgboost/tree/master/doc/R-package>`_.\n\nThe following steps are followed to add a new Rmarkdown vignettes:\n\n- Add the original rmarkdown to ``R-package/vignettes``.\n- Modify ``doc/R-package/Makefile`` to add the markdown files to be build.\n- Clone the `dmlc/web-data <https://github.com/dmlc/web-data>`_ repo to folder ``doc``.\n- Now type the following command on ``doc/R-package``:\n\n  .. code-block:: bash\n\n    make the-markdown-to-make.md\n\n- This will generate the markdown, as well as the figures in ``doc/web-data/xgboost/knitr``.\n- Modify the ``doc/R-package/index.md`` to point to the generated markdown.\n- Add the generated figure to the ``dmlc/web-data`` repo.\n\n  - If you already cloned the repo to doc, this means ``git add``\n\n- Create PR for both the markdown and ``dmlc/web-data``.\n- You can also build the document locally by typing the following command at the ``doc`` directory:\n\n  .. code-block:: bash\n\n    make html\n\nThe reason we do this is to avoid exploded repo size due to generated images.\n\n\nR package versioning\n====================\nSee :ref:`release`.\n\nTesting R package with different compilers\n==========================================\n\nYou can change the default compiler of R by changing the configuration file in home\ndirectory. For instance, if you want to test XGBoost built with clang++ instead of g++ on\nLinux, put the following in your ``~/.R/Makevars`` file:\n\n.. code-block:: sh\n\n  CC=clang-15\n  CXX17=clang++-15\n\nBe aware that the variable name should match with the name used by ``R CMD``:\n\n.. code-block:: sh\n\n  R CMD config CXX17\n\nRegistering native routines in R\n================================\nAccording to `R extension manual <https://cran.r-project.org/doc/manuals/r-release/R-exts.html#Registering-native-routines>`_,\nit is good practice to register native routines and to disable symbol search. When any changes or additions are made to the\nC++ interface of the R package, please make corresponding changes in ``src/init.c`` as well.\n\nGenerating the Package and Running Tests\n========================================\n\nThe source layout of XGBoost is a bit unusual to normal R packages as XGBoost is primarily written in C++ with multiple language bindings in mind. As a result, some special cares need to be taken to generate a standard R tarball. Most of the tests are being run on CI, and as a result, the best way to see how things work is by looking at the CI configuration files (GitHub action, at the time of writing). There are helper scripts in ``ops/script`` and ``R-package/tests/helper_scripts`` for running various checks including linter and making the standard tarball.\n\n*********************************\nRunning Formatting Checks Locally\n*********************************\n\nOnce you submit a pull request to `dmlc/xgboost <https://github.com/dmlc/xgboost>`_, we perform\ntwo automatic checks to enforce coding style conventions. To expedite the code review process, you are encouraged to run the checks locally on your machine prior to submitting your pull request.\n\nPre-commit\n==========\nWe provide a `pre-commit <https://pre-commit.com/>`_ configuration for basic formatting,\nlinting, and file-sanity checks. By default, pre-commit runs on files that are staged for commit,\nand the hooks in this repository are configured accordingly. To run on modified or untracked files,\nyou can use ``pre-commit run --files <path> [...]`` or ``pre-commit run --all-files``.\n\nTo enable it locally:\n\n.. code-block:: bash\n\n  python -m pip install pre-commit\n  pre-commit install\n\nTo run it on the files you have staged for commit:\n\n.. code-block:: bash\n\n  pre-commit run\n\nTo run it on a specific range of commits (e.g. in CI or for a local comparison):\n\n.. code-block:: bash\n\n  pre-commit run --from-ref <base> --to-ref <head>\n\nLinter\n======\nWe use a combination of linters to enforce style convention and find potential errors. Linting is especially useful for scripting languages like Python, as we can catch many errors that would have otherwise occurred at run-time.\n\nFor Python scripts, `pylint <https://github.com/PyCQA/pylint>`_, `black <https://github.com/psf/black>`__ and `isort <https://github.com/PyCQA/isort>`__ are used for providing guidance on coding style, and `mypy <https://github.com/python/mypy>`__ is required for type checking. The Python formatting and pylint checks are provided via the corresponding pre-commit hooks, which operate on changed files. For C++, `cpplint <https://github.com/cpplint/cpplint>`_ is used along with ``clang-tidy``. For R, ``lintr`` is used.\n\nTo run Python checks locally, install the checkers mentioned previously and run the pre-commit hooks for the files you changed:\n\n.. code-block:: bash\n\n  cd /path/to/xgboost/\n  pre-commit run\n\nTo run checks for R:\n\n.. code-block:: bash\n\n  cd /path/to/xgboost/\n  R CMD INSTALL R-package/\n  Rscript ops/script/lint_r.R $(pwd)\n\nTo run checks for cpplint locally:\n\n.. code-block:: bash\n\n  cd /path/to/xgboost/\n  python ./ops/script/lint_cpp.py\n\n\nLastly, the linter for jvm-packages is integrated into the maven build process.\n\n\nClang-tidy\n==========\n`Clang-tidy <https://clang.llvm.org/extra/clang-tidy/>`_ is an advance linter for C++ code, made by the LLVM team. We use it to conform our C++ codebase to modern C++ practices and conventions.\n\nTo run this check locally, run the following command from the top level source tree:\n\n.. code-block:: bash\n\n  cd /path/to/xgboost/\n  python3 ops/script/run_clang_tidy.py\n\nAlso, the script accepts two optional integer arguments, namely ``--cpp`` and ``--cuda``. By default they are both set to 1, meaning that both C++ and CUDA code will be checked. If the CUDA toolkit is not installed on your machine, you'll encounter an error. To exclude CUDA source from linting, use:\n\n.. code-block:: bash\n\n  cd /path/to/xgboost/\n  python3 ops/script/run_clang_tidy.py --cuda=0\n\nSimilarly, if you want to exclude C++ source from linting:\n\n.. code-block:: bash\n\n  cd /path/to/xgboost/\n  python3 ops/script/run_clang_tidy.py --cpp=0\n\n**********************************\nGuide for handling user input data\n**********************************\n\nThis is an in-comprehensive guide for handling user input data.  XGBoost has wide verity\nof native supported data structures, mostly come from higher level language bindings. The\ninputs ranges from basic contiguous 1 dimension memory buffer to more sophisticated data\nstructures like columnar data with validity mask.  Raw input data can be used in 2 places,\nfirstly it's the construction of various ``DMatrix``, secondly it's the in-place\nprediction.  For plain memory buffer, there's not much to discuss since it's just a\npointer with a size. But for general n-dimension array and columnar data, there are many\nsubtleties.  XGBoost has 3 different data structures for handling optionally masked arrays\n(tensors), for consuming user inputs ``ArrayInterface`` should be chosen.  There are many\nexisting functions that accept only plain pointer due to legacy reasons (XGBoost started\nas a much simpler library and didn't care about memory usage that much back then).  The\n``ArrayInterface`` is a in memory representation of ``__array_interface__`` protocol\ndefined by numpy or the ``__cuda_array_interface__`` defined by numba.  Following is a\ncheck list of things to have in mind when accepting related user inputs:\n\n- [ ] Is it strided? (identified by the ``strides`` field)\n- [ ] If it's a vector, is it row vector or column vector? (Identified by both ``shape``\n  and ``strides``).\n- [ ] Is the data type supported? Half type and 128 integer types should be converted\n  before going into XGBoost.\n- [ ] Does it have higher than 1 dimension? (identified by ``shape`` field)\n- [ ] Are some of dimensions trivial? (shape[dim] <= 1)\n- [ ] Does it have mask? (identified by ``mask`` field)\n- [ ] Can the mask be broadcasted? (unsupported at the moment)\n- [ ] Is it on CUDA memory? (identified by ``data`` field, and optionally ``stream``)\n\nMost of the checks are handled by the ``ArrayInterface`` during construction, except for\nthe data type issue since it doesn't know how to cast such pointers with C builtin types.\nBut for safety reason one should still try to write related tests for the all items. The\ndata type issue should be taken care of in language binding for each of the specific data\ninput.  For single-chunk columnar format, it's just a masked array for each column so it\nshould be treated uniformly as normal array. For input predictor ``X``, we have adapters\nfor each type of input. Some are composition of the others. For instance, CSR matrix has 3\npotentially strided arrays for ``indptr``, ``indices`` and ``values``. No assumption\nshould be made to these components (all the check boxes should be considered). Slicing row\nof CSR matrix should calculate the offset of each field based on respective strides.\n\nFor meta info like labels, which is growing both in size and complexity, we accept only\nmasked array at the moment (no specialized adapter).  One should be careful about the\ninput data shape. For base margin it can be 2 dim or higher if we have multiple targets in\nthe future.  The getters in ``DMatrix`` returns only 1 dimension flatten vectors at the\nmoment, which can be improved in the future when it's needed.\n"
  },
  {
    "path": "doc/contrib/community.rst",
    "content": ".. _community_guide:\n\nXGBoost Community Guideline\n===========================\n\nXGBoost adopts the Apache style model and governs by merit. We believe that it is important to create an inclusive community where everyone can use, contribute to, and influence the direction of the project. See `CONTRIBUTORS.md <https://github.com/dmlc/xgboost/blob/master/CONTRIBUTORS.md>`_ for the current list of contributors.\n\n\n\nGeneral Development Process\n---------------------------\nEveryone in the community is welcomed to send patches, documents, and propose new directions to the project. The key guideline here is to enable everyone in the community to get involved and participate the decision and development.  When major changes are proposed, an RFC should be sent to allow discussion by the community. We encourage public discussion, archivable channels such as issues and discuss forum, so that everyone in the community can participate and review the process later.\n\nCode reviews are one of the key ways to ensure the quality of the code. High-quality code reviews prevent technical debt for long-term and are crucial to the success of the project. A pull request needs to be reviewed before it gets merged. A committer who has the expertise of the corresponding area would moderate the pull request and then merge the code when it is ready. The corresponding committer could request multiple reviewers who are familiar with the area of the code. We encourage contributors to request code reviews themselves and help review each other's code -- remember everyone is volunteering their time to the community, high-quality code review itself costs as much as the actual code contribution, you could get your code quickly reviewed if you do others the same favor.\n\nThe community should strive to reach a consensus on technical decisions through discussion. We expect committers and PMCs to moderate technical discussions in a diplomatic way, and provide suggestions with clear technical reasoning when necessary.\n\n\n\nCommitters\n----------\nCommitters are individuals who are granted the write access to the project. A committer is usually responsible for a certain area or several areas of the code where they oversee the code review process. The area of contribution can take all forms, including code contributions and code reviews, documents, education, and outreach. Committers are essential for a high quality and healthy project. The community actively look for new committers from contributors. Here is a list of useful traits that help the community to recognize potential committers:\n\n- Sustained contribution to the project, demonstrated by discussion over RFCs, code reviews and proposals of new features, and other development activities. Being familiar with, and being able to take ownership on one or several areas of the project.\n- Quality of contributions: High-quality, readable code contributions indicated by pull requests that can be merged without a substantial code review.  History of creating clean, maintainable code and including good test cases. Informative code reviews to help other contributors that adhere to a good standard.\n- Community involvement: active participation in the discussion forum, promote the projects via tutorials, talks and outreach. We encourage committers to collaborate broadly, e.g. do code reviews and discuss designs with community members that they do not interact physically.\n\nThe Project Management Committee(PMC) consists of a group of active committers that moderate the discussion, manage the project release, and proposes new committer/PMC members. Potential candidates are usually proposed via an internal discussion among PMCs, followed by a consensus approval, i.e. least 3 +1 votes, and no vetoes. Any veto must be accompanied by reasoning. PMCs should serve the community by upholding the community practices and guidelines in order to make XGBoost a better community for everyone. PMCs should strive to only nominate new candidates outside of their own organization.\n\nThe PMC is in charge of the project's `continuous integration (CI) <https://en.wikipedia.org/wiki/Continuous_integration>`_ and testing infrastructure. Currently, we host our own Jenkins server at https://xgboost-ci.net. The PMC shall appoint committer(s) to manage the CI infrastructure. The PMC may accept 3rd-party donations and sponsorships that would defray the cost of the CI infrastructure. See :ref:`donation_policy`.\n\n\nReviewers\n---------\nReviewers are individuals who actively contributed to the project and are willing to participate in the code review of new contributions. We identify reviewers from active contributors. The committers should explicitly solicit reviews from reviewers.  High-quality code reviews prevent technical debt for the long-term and are crucial to the success of the project. A pull request to the project has to be reviewed by at least one reviewer in order to be merged.\n"
  },
  {
    "path": "doc/contrib/consistency.rst",
    "content": "#################################\nConsistency for Language Bindings\n#################################\n\nXGBoost has many different language bindings developed over the years, some are in the main repository while others live independently. Many features and interfaces are inconsistent with each others, this document aims to provide some guidelines and actionable items for language binding designers.\n\n*******************\nModel Serialization\n*******************\n\nXGBoost C API exposes a couple functions for serializing a model for persistence storage. These saved files are backward compatible, meaning one can load an older XGBoost model with a newer XGBoost version. If there's change in the model format, we have deprecation notice inside the C++ implementation and public issue for tracking the status. See :doc:`/tutorials/saving_model` for details.\n\nAs a result, these are considered to be stable and should work across language bindings. For instance, a model trained in R should be fully functioning in C or Python. Please don't pad anything to the output file or buffer.\n\nIf there are extra fields that must be saved:\n\n- First review whether the attribute can be retrieved from known properties of the model. For instance, there's a :py:attr:`~xgboost.XGBClassifier.classes_` attribute in the scikit-learn interface :py:class:`~xgboost.XGBClassifier`, which can be obtained through `numpy.arange(n_classes)` and doesn't need to be saved into the model. Preserving version compatibility is not a trivial task and we are still spending a significant amount of time to maintain it. Please don't make complication if it's not necessary.\n\n- Then please consider whether it's universal. For instance, we have added `feature_types` to the model serialization for categorical features (which is a new feature after 1.6), the attribute is useful or will be useful in the future regardless of the language binding.\n\n- If the field is small, we can save it as model attribute (which is a key-value structure). These attributes are ignored by all other language bindings and mostly an ad-hoc storage.\n\n- Lastly, we should use the UBJSON as the default output format when given a chance (not to be burdened by the old binary format).\n\n*********************\nTraining Continuation\n*********************\n\nThere are cases where we want to train a model based on the previous model, for boosting trees, it's either adding new trees or modifying the existing trees. This can be normal model update, error recovery, or other special cases we don't know of yet. When it happens, the training iteration should start from 0, not from the last boosted rounds of the model. 0 is a special iteration number, we perform some extra checks like whether the label is valid during that iteration. These checks can be expensive but necessary for eliminating silent errors. Keeping the iteration starts from zero allows us to perform these checks only once for each input data.\n\n*********\nInference\n*********\n\nThe inference function is quite inconsistent among language bindings at the time of writing due to historical reasons, but this makes more important for us to have consistency in mind in the future development.\n\n- Firstly, it's the output shape. There's a relatively new parameter called ``strict_shape`` in XGBoost and is rarely used. We want to make it as the default behavior but couldn't due to compatibility concerns. See :doc:`/prediction` for details. In short, if specified, XGBoost C++ implementation can output prediction with the correct shape, instead of letting the language binding to handle it.\n- Policy around early stopping is at the moment inconsistent between various interfaces. Some considers the ``best_iteration`` attribute while others don't. We should formalize that all interfaces in the future should use the ``best_iteration`` during inference unless user has explicitly specified the ``iteration_range`` parameter.\n\n****************\nParameter naming\n****************\n\nThere are many parameter naming conventions out there, Some XGBoost interfaces try to align with the larger communities. For example, the R package might support parameters naming like ``max.depth=3``, while the Spark package might support ``MaxDepth=3``. These are fine, it's better for the users to keep their pipeline consistent. However, while supporting naming variants, the normal, XGBoost way of naming should also be supported, meaning ``max_depth=3`` should be a valid parameter no-matter what language one is using. If someone were to write duplicated parameter ``max.depth=3, max_depth=3``, a clear error should be preferred instead of prioritizing one over the other.\n\n******************\nDefault Parameters\n******************\n\nLike many other machine learning libraries, all parameters from XGBoost can either be inferred from the data or have default values. Bindings should not make copies of these default values and let the XGBoost core decide. When the parameter key is not passed into the C++ core, XGBoost will pick the default accordingly. These defaults are not necessarily optimal, but they are there for consistency. If there's a new choice of default parameter, we can change it inside the core and it will be automatically propagated to all bindings. Given the same set of parameters and data, various bindings should strive to produce the same model. One exception is the `num_boost_rounds`, which exists only in high-level bindings and has various alias like ``n_estimators``. Its default value is close to arbitrary at the moment, we haven't been able to get a good default yet.\n\n*******\nLogging\n*******\n\nXGBoost has a default logger builtin that can be a wrapper over binding-specific logging facility. For instance, the Python binding registers a callback to use Python :py:mod:`warnings` and :py:func:`print` function to output logging. We want to keep logging native to the larger communities instead of using the ``std::cerr`` from C++.\n\n***********************************\nMinimum Amount of Data Manipulation\n***********************************\n\nXGBoost is mostly a machine learning library providing boosting algorithm implementation. Some other implementations might perform some sort of data manipulation implicitly like deciding the coding of the data, and transforming the data according to some heuristic before training. We prefer to keep these operations based on necessities instead of convenience to keep the scope of the project well-defined. Whenever possible, we should leave these features to 3-party libraries and consider how a user can compose their pipeline. For instance, XGBoost itself should not perform ordinal encoding for categorical data, users will pick an encoder that fits their use cases (like out-of-core implementation, distributed implementation, known mapping, etc). If some transformations are decided to be part of the algorithm, we can have it inside the core instead of the language binding. Examples would be target-encoding or sketching the response variables. If we were to support them, we could have it inside the core implementation as part of the ML algorithm. This aligns with the same principles of default parameters, various bindings should provide similar (if not the same) results given the same set of parameters and data.\n\n************\nFeature Info\n************\n\nXGBoost accepts data structures that contain meta info about predictors, including the names and types of features. Example inputs are :py:class:`pandas.DataFrame`, R `data.frame`. We have the following heuristics:\n- When the input data structure contains such information, we set the `feature_names` and `feature_types` for `DMatrix` accordingly.\n- When a user provides this information as explicit parameters, the user-provided version should override the one provided by the data structure.\n- When both sources are missing, the `DMatrix` class contain empty info."
  },
  {
    "path": "doc/contrib/docs.rst",
    "content": "##########################\nDocumentation and Examples\n##########################\n\n**Contents**\n\n.. contents::\n  :backlinks: none\n  :local:\n\n*************\nDocumentation\n*************\n* Python and C documentation is built using `Sphinx <http://www.sphinx-doc.org/en/master/>`_.\n* Each document is written in `reStructuredText <http://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html>`_.\n* The documentation is the ``doc/`` directory.\n* You can build it locally using ``make html`` command.\n\n  .. code-block:: bash\n\n    make html\n\n  Run ``make help`` to learn about the other commands.\n\nThe online document is hosted by `Read the Docs <https://readthedocs.org/>`__ where the imported project is managed by `Hyunsu Cho <https://github.com/hcho3>`__ and `Jiaming Yuan <https://github.com/trivialfis>`__.\n\n=========================================\nBuild the Python Docs using pip and Conda\n=========================================\n\n#. Create a conda environment.\n\n   .. code-block:: bash\n\n     conda create -n xgboost-docs --yes python=3.10\n\n   .. note:: Python 3.10 is required by `xgboost_ray <https://github.com/ray-project/xgboost_ray>`__ package.\n\n#. Activate the environment\n\n   .. code-block:: bash\n\n     conda activate xgboost-docs\n\n#. Install required packages (in the current environment) using ``pip`` command.\n\n   .. code-block:: bash\n\n     pip install -r requirements.txt\n\n   .. note::\n      It is currently not possible to install the required packages using ``conda``\n      due to ``xgboost_ray`` being unavailable in conda channels.\n\n      .. code-block:: bash\n\n        conda install --file requirements.txt --yes -c conda-forge\n\n\n#. (optional) Install `graphviz <https://www.graphviz.org/>`__\n\n   .. code-block:: bash\n\n     conda install graphviz --yes\n\n#. Eventually, build the docs.\n\n   .. code-block:: bash\n\n     make html\n\n  You should see the following messages in the console:\n\n  .. code-block:: console\n\n    $ make html\n    sphinx-build -b html -d _build/doctrees   . _build/html\n    Running Sphinx v6.2.1\n    ...\n    The HTML pages are in _build/html.\n\n    Build finished. The HTML pages are in _build/html.\n\n*************\nRead The Docs\n*************\n\n`Read the Docs <https://readthedocs.org/>`__ (RTD for short) is an online document hosting\nservice and hosts the `XGBoost document site\n<https://xgboost.readthedocs.io/en/stable/>`__. The document builder used by RTD is\nrelatively lightweight. However some of the packages like the R binding require a compiled\nXGBoost along with all the optional dependencies to render the document. As a result, both\njvm-based packages and the R package's document is built with an independent CI pipeline\nand fetched during online document build.\n\nThe sphinx configuration file ``xgboost/doc/conf.py`` acts as the fetcher. During build,\nthe fetched artifacts are stored in ``xgboost/doc/tmp/jvm_docs`` and\n``xgboost/doc/tmp/r_docs`` respectively. For the R package, there's a dummy index file in\n``xgboost/doc/R-package/r_docs`` . Jvm doc is similar. As for the C doc, it's generated\nusing doxygen and processed by breathe during build as it's relatively cheap. The\ngenerated xml files are stored in ``xgboost/doc/tmp/dev`` .\n\nThe ``xgboost/doc/tmp`` is part of the ``html_extra_path`` sphinx configuration specified\nin the ``conf.py`` file, which informs sphinx to copy the extracted html files to the\nbuild directory. Following is a list of environment variables used by the fetchers in\n``conf.py``:\n\n - ``READTHEDOCS``: Read the docs flag. Build the full documentation site including R, JVM and\n   C doc when set to ``True`` (case sensitive).\n - ``XGBOOST_R_DOCS``: Local path for pre-built R document, used for development. If it\n   points to a file that doesn't exist, the configuration script will download the\n   packaged document to that path for future reuse.\n - ``XGBOOST_JVM_DOCS``: Local path for pre-built JVM document, used for\n   development. Similar to the R docs environment variable when it points to a non-existent\n   file.\n\nAs of writing, RTD doesn't provide any facility to be embedded as a GitHub action but we\nneed a way to specify the dependency between the CI pipelines and the document build in\norder to fetch the correct artifact. The workaround is to use an extra GA step to notify\nRTD using its `REST API <https://docs.readthedocs.com/platform/stable/api/v3.html>`__.\n\n********\nExamples\n********\n* Use cases and examples are in `demo <https://github.com/dmlc/xgboost/tree/master/demo>`_ directory.\n* We are super excited to hear about your story. If you have blog posts,\n  tutorials, or code solutions using XGBoost, please tell us, and we will add\n  a link in the example pages.\n"
  },
  {
    "path": "doc/contrib/donate.rst",
    "content": ".. _donation_policy:\n\nDonations\n=========\n\n.. raw:: html\n\n  <a href=\"https://opencollective.com/xgboost\">Donate to dmlc/xgboost</a>\n\nMotivation\n----------\nDMLC/XGBoost has grown from a research project incubated in academia to one of the most widely used gradient boosting framework in production environment. On one side, with the growth of volume and variety of data in the production environment, users are putting accordingly growing expectation to XGBoost in terms of more functions, scalability and robustness. On the other side, as an open source project which develops in a fast pace, XGBoost has been receiving contributions from many individuals and organizations around the world. Given the high expectation from the users and the increasing channels of contribution to the project, delivering the high quality software presents a challenge to the project maintainers.\n\nA robust and efficient **continuous integration (CI)** infrastructure is one of the most critical solutions to address the above challenge. A CI service will monitor an open-source repository and run a suite of integration tests for every incoming contribution. This way, the CI ensures that every proposed change in the codebase is compatible with existing functionalities. Furthermore, XGBoost can enable more thorough tests with a powerful CI infrastructure to cover cases which are closer to the production environment.\n\nThere are several CI services available free to open source projects, such as Travis CI and AppVeyor. The XGBoost project already utilizes GitHub Actions. However, the XGBoost project has needs that these free services do not adequately address. In particular, the limited usage quota of resources such as CPU and memory leaves XGBoost developers unable to bring \"too-intensive\" tests. In addition, they do not offer test machines with GPUs for testing XGBoost-GPU code base which has been attracting more and more interest across many organizations. Consequently, the XGBoost project uses a cloud-hosted test farm. We host `Amazon Web Services (AWS) <https://aws.amazon.com/>`_ to host the test machines, along with `GitHub Actions <https://github.com/features/actions>`_ and `RunsOn <https://runs-on.com/>`_ (SaaS app) to organize the CI pipelines.\n\nThe cloud-hosted test farm has recurring operating expenses. RunsOn launches worker machines on AWS on demand to run the test suite on incoming contributions. To save cost, the worker machines are terminated when they are no longer needed.\n\nTo help defray the hosting cost, the XGBoost project seeks donations from third parties.\n\nDonations and Sponsorships\n--------------------------\nDonors may choose to make one-time donations or recurring donations on monthly or yearly basis. Donors who commit to the Sponsor tier will have their logo displayed on the front page of the XGBoost project.\n\nFiscal host: Open Source Collective 501(c)(6)\n---------------------------------------------\nThe Project Management Committee (PMC) of the XGBoost project appointed `Open Source Collective <https://opencollective.com/opensource>`_ as their **fiscal host**. The platform is a 501(c)(6) registered entity and will manage the funds on the behalf of the PMC so that PMC members will not have to manage the funds directly. The platform currently hosts several well-known JavaScript frameworks such as Babel, Vue, and Webpack.\n\nAll expenses incurred for hosting CI will be submitted to the fiscal host with receipts. Only the expenses in the following categories will be approved for reimbursement:\n\n* Cloud expenses for the cloud test farm\n* Cost of domain https://xgboost-ci.net\n* Annual subscription for RunsOn\n\nAdministration of cloud CI infrastructure\n-----------------------------------------\nThe PMC shall appoint committer(s) to administer the cloud CI infrastructure on their behalf. The current administrators are as follows:\n\n* Primary administrator: `Hyunsu Cho <https://github.com/hcho3>`_\n* Secondary administrator: `Jiaming Yuan <https://github.com/trivialfis>`_\n\nThe administrators shall make good-faith effort to keep the CI expenses under control. The expenses shall not exceed the available funds. The administrators should post regular updates on CI expenses.\n"
  },
  {
    "path": "doc/contrib/featuremap.rst",
    "content": "############################\nXGBoost Internal Feature Map\n############################\n\nThe following is a reference to the features supported by XGBoost.  It is not a beginner's guide, but rather a list meant to help those looking to add new features to XGBoost understand what needs to be covered.\n\n*************\nCore Features\n*************\nCore features are not dependent on language binding and any language binding can choose to support them.\n\n-------------\nData Storage\n-------------\nThe primary data structure in XGBoost for storing user inputs is ``DMatrix``; it's a container for all data that XGBoost can use. ``QuantileDMatrix`` is a variant specifically designed for the ``hist`` tree method. Both can take GPU-based inputs. They take an optional parameter ``missing`` to specify which input value should be ignored. For external memory support, please refer to :doc:`/tutorials/external_memory`.\n\n---------------------\nSingle Node Training\n---------------------\nThere are two different model types in XGBoost: the tree model, which we primarily focus on, and the linear model. For the tree model, we have various methods to build decision trees; please see the :doc:`/treemethod` for a complete reference. In addition to the tree method, we have many hyper-parameters for tuning the model and injecting prior knowledge into the training process. Two noteworthy examples are :doc:`monotonic constraints </tutorials/monotonic>` and :doc:`feature interaction constraints </tutorials/feature_interaction_constraint>`. These two constraints require special treatment during tree construction. Both the ``hist`` and the ``approx`` tree methods support GPU acceleration. Also, XGBoost GPU supports gradient-based sampling, which supports external-memory data as well.\n\nThe objective function plays an important role in training. It not only provides the gradient, but also responsible for estimating a good starting point for Newton optimization. Please note that users can define custom objective functions for the task at hand.\nIn addition to numerical features, XGBoost also supports categorical features with two different algorithms, including one-hot encoding and optimal partitioning. For more information, refer to the :doc:`categorical feature tutorial </tutorials/categorical>`. The ``hist`` and the ``approx`` tree methods support categorical features for CPU and GPU.\n\nThere's working-in-progress support for vector leaves, which are decision tree leaves that contain multiple values. This type of tree is used to support efficient multi-class and multi-target models.\n\n----------\nInference\n----------\nBy inference, we specifically mean getting model prediction for the response variable. XGBoost supports two inference methods. The first one is the prediction on the ``DMatrix`` object (or ``QuantileDMatrix``, which is a subclass). Using a ``DMatrix`` object allows XGBoost to cache the prediction, hence getting faster performance when running prediction on the same data with new trees. The second method is ``inplace_predict``, which bypasses the construction of ``DMatrix``. It's more efficient but doesn't support cached prediction. In addtion to returning the estimated response, we also support returning the leaf index, which can be used to analyse the model and as a feature to another model.\n\n----------\nModel IO\n----------\nWe have a set of methods for different model serialization methods, including complete serialization, saving to a file, and saving to a buffer. For more, refer to the :doc:`/tutorials/saving_model`.\n\n-------------------\nModel Explanation\n-------------------\nXGBoost includes features designed to improve understanding of the model. Here's a list:\n\n- Global feature importance.\n- SHAP value, including contribution and intervention.\n- Tree dump.\n- Tree visualization.\n- Tree as dataframe.\n\nFor GPU support, the SHAP value uses the `GPUTreeShap <https://github.com/rapidsai/gputreeshap/tree/main>`_ project in rapidsai. They all support categorical features, while vector-leaf is still in progress.\n\n----------\nEvaluation\n----------\nXGBoost has built-in support for a wide range of metrics, from basic regression to learning to rank and survival modeling. They can handle distributed training and GPU-based acceleration. Custom metrics are supported as well, please see :doc:`/tutorials/custom_metric_obj`.\n\n--------------------\nDistributed Training\n--------------------\nXGBoost has built-in support for three distributed frameworks, including ``Dask``, ``PySpark``, and ``Spark (Scala)``. In addition, there's ``flink`` support for the Java binding and the ``ray-xgboost`` project. Please see the respective tutorial on how to use them. By default, XGBoost uses sample-based parallelism for distributed training. The column-based split is still working in progress and needs to be supported in these high-level framework integrations. On top of distributed training, we are also working on federated learning for both sample-based and column-based splits.\n\nDistributed training works with custom objective functions and metrics as well. XGBoost aggregates the evaluation result automatically during training.\n\nThe distributed training is enabled by a built-in implementation of a collective library. It's based on the RABIT project and has evolved significantly since its early adoption. The collective implementation supports GPU via NCCL, and has variants for handling federated learning and federated learning on GPU.\n\nInference normally doesn't require any special treatment since we are using sample-based split. However, with column-based data split, we need to initialize the communicator context as well.\n\n*****************\nLanguage Bindings\n*****************\nWe have a list of bindings for various languages. Inside the XGBoost repository, there's Python, R, Java, Scala, and C. All language bindings are built on top of the C version. Some others, like Julia and Rust, have their own repository. For guideline on adding a new binding, please see :doc:`/contrib/consistency`."
  },
  {
    "path": "doc/contrib/git_guide.rst",
    "content": "###################\nGit Workflow Howtos\n###################\n\n**Contents**\n\n.. contents::\n  :backlinks: none\n  :local:\n\n***********************************\nHow to resolve conflict with master\n***********************************\n\n- First rebase to most recent master\n\n  .. code-block:: bash\n\n    # The first two steps can be skipped after you do it once.\n    git remote add upstream https://github.com/dmlc/xgboost\n    git fetch upstream\n    git rebase upstream/master\n\n- The git may show some conflicts it cannot merge, say ``conflicted.py``.\n\n  - Manually modify the file to resolve the conflict.\n  - After you resolved the conflict, mark it as resolved by\n\n    .. code-block:: bash\n\n      git add conflicted.py\n\n- Then you can continue rebase by\n\n  .. code-block:: bash\n\n    git rebase --continue\n\n- Finally push to your fork, you may need to force push here.\n\n  .. code-block:: bash\n\n    git push --force\n\n****************************************\nHow to combine multiple commits into one\n****************************************\nSometimes we want to combine multiple commits, especially when later commits are only fixes to previous ones,\nto create a PR with set of meaningful commits. You can do it by following steps.\n\n- Before doing so, configure the default editor of git if you haven't done so before.\n\n  .. code-block:: bash\n\n    git config core.editor the-editor-you-like\n\n- Assume we want to merge last 3 commits, type the following commands\n\n  .. code-block:: bash\n\n    git rebase -i HEAD~3\n\n- It will pop up an text editor. Set the first commit as ``pick``, and change later ones to ``squash``.\n- After you saved the file, it will pop up another text editor to ask you modify the combined commit message.\n- Push the changes to your fork, you need to force push.\n\n  .. code-block:: bash\n\n    git push --force\n\n*************************************\nWhat is the consequence of force push\n*************************************\nThe previous two tips requires force push, this is because we altered the path of the commits.\nIt is fine to force push to your own fork, as long as the commits changed are only yours.\n\n"
  },
  {
    "path": "doc/contrib/index.rst",
    "content": "#####################\nContribute to XGBoost\n#####################\n\nXGBoost has been developed by community members. Everyone is welcome to contribute. We value all forms of contributions, including, but not limited to:\n\n* Code reviews for pull requests\n* Documentation and usage examples\n* Community participation in forums and issues\n* Code readability and developer guide\n\n  - We welcome contributions that add code comments to improve readability.\n  - We also welcome contributions to docs to explain the design choices of the XGBoost internals.\n\n* Test cases to make the codebase more robust.\n* Tutorials, blog posts, talks that promote the project.\n\nHere are guidelines for contributing to various aspect of the XGBoost project:\n\n.. toctree::\n  :maxdepth: 2\n\n  Community Guideline <community>\n  donate\n  coding_guide\n  consistency\n  python_packaging\n  unit_tests\n  Docs and Examples <docs>\n  featuremap\n  git_guide\n  release\n  ci\n"
  },
  {
    "path": "doc/contrib/python_packaging.rst",
    "content": "###########################################\nNotes on packaging XGBoost's Python package\n###########################################\n\n\n.. contents:: Contents\n  :local:\n\n.. _packaging_python_xgboost:\n\n***************************************************\nHow to build binary wheels and source distributions\n***************************************************\n\nWheels and source distributions (sdist for short) are the two main\nmechanisms for packaging and distributing Python packages.\n\n* A **source distribution** (sdist) is a tarball (``.tar.gz`` extension) that\n  contains the source code.\n* A **wheel** is a ZIP-compressed archive (with ``.whl`` extension)\n  representing a *built* distribution. Unlike an sdist, a wheel can contain\n  compiled components. The compiled components are compiled prior to distribution,\n  making it more convenient for end-users to install a wheel. Wheels containing\n  compiled components are referred to as **binary wheels**.\n\nSee `Python Packaging User Guide <https://packaging.python.org/en/latest/>`_\nto learn more about how Python packages in general are packaged and\ndistributed.\n\nFor the remainder of this document, we will focus on packaging and\ndistributing XGBoost.\n\nBuilding sdists\n===============\n\nIn the case of XGBoost, an sdist contains both the Python code as well as\nthe C++ code, so that the core part of XGBoost can be compiled into the\nshared library ``libxgboost.so`` [#shared_lib_name]_.\n\nYou can obtain an sdist as follows:\n\n.. code-block:: console\n\n  $ python -m build --sdist .\n\n(You'll need to install the ``build`` package first:\n``pip install build`` or ``conda install python-build``.)\n\nRunning ``pip install`` with an sdist will launch CMake and a C++ compiler\nto compile the bundled C++ code into ``libxgboost.so``:\n\n.. code-block:: console\n\n  $ pip install -v xgboost-2.0.0.tar.gz  # Add -v to show build progress\n\nBuilding binary wheels\n======================\n\nYou can also build a wheel as follows:\n\n.. code-block:: console\n\n   $ pip wheel --no-deps -v .\n\nNotably, the resulting wheel contains a copy of the shared library\n``libxgboost.so`` [#shared_lib_name]_. The wheel is a **binary wheel**,\nsince it contains a compiled binary.\n\n\nRunning ``pip install`` with the binary wheel will extract the content of\nthe wheel into the current Python environment. Since the wheel already\ncontains a pre-built copy of ``libxgboost.so``, it does not have to be\nbuilt at the time of install. So ``pip install`` with the binary wheel\ncompletes quickly:\n\n.. code-block:: console\n\n  $ pip install xgboost-2.0.0-py3-none-linux_x86_64.whl  # Completes quickly\n\n.. rubric:: Footnotes\n\n.. [#shared_lib_name] The name of the shared library file will differ\n   depending on the operating system in use. See :ref:`build_shared_lib`.\n"
  },
  {
    "path": "doc/contrib/release.rst",
    "content": ".. _release:\n\nXGBoost Release Policy\n=======================\n\nVersioning Policy\n-----------------\n\nStarting from XGBoost 1.0.0, each XGBoost release will be versioned as [MAJOR].[FEATURE].[MAINTENANCE]\n\n* MAJOR: We guarantee the API compatibility across releases with the same major version number. We expect to have a 1+ years development period for a new MAJOR release version.\n* FEATURE: We ship new features, improvements and bug fixes through feature releases. The cycle length of a feature is decided by the size of feature roadmap. The roadmap is decided right after the previous release.\n* MAINTENANCE: Maintenance version only contains bug fixes. This type of release only occurs when we found significant correctness and/or performance bugs and barrier for users to upgrade to a new version of XGBoost smoothly.\n\n\nMaking a Release\n-----------------\n\n1. Create an issue for the release, noting the estimated date and expected features or major fixes, pin that issue.\n2. Create a release branch if this is a major release. Bump release version. There's a helper script ``ops/script/change_version.py``.\n3. Commit the change, create a PR on GitHub on release branch.  Port the bumped version to default branch, optionally with the postfix ``SNAPSHOT``.\n4. Create a tag on release branch, either on GitHub or locally.\n5. Make a release on GitHub tag page, which might be done with previous step if the tag is created on GitHub.\n6. Submit pip, R-universe, CRAN, and Maven packages.\n\n   There are helper scripts for automating the process in ``xgboost/dev/``.\n\n   + The pip package is maintained by `Hyunsu Cho <https://github.com/hcho3>`__ and `Jiaming Yuan <https://github.com/trivialfis>`__.\n\n   + The CRAN package and the R-universe packages are maintained by `Jiaming Yuan <https://github.com/trivialfis>`__.\n\n   + The Maven package is maintained by `Nan Zhu <https://github.com/CodingCat>`_ and `Hyunsu Cho <https://github.com/hcho3>`_.\n\n\nR Universe Packages\n-------------------\n\nSince XGBoost 3.0.0, we host the R package on `R-Universe\n<https://dmlc.r-universe.dev/xgboost>`__. To make a new release, change the\n``packages.json`` in `dmlc.r-universe.dev <https://github.com/dmlc/dmlc.r-universe.dev>`__\nwith a new release branch.\n\nR CRAN Package\n--------------\nBefore submitting a release, one should test the package on `R-hub <https://r-hub.github.io/rhub/>`__ and `win-builder <https://win-builder.r-project.org/>`__ first.  Please note that the R-hub Windows instance doesn't have the exact same environment as the one hosted on win-builder.\n\nAccording to the `CRAN policy <https://cran.r-project.org/web/packages/policies.html>`__:\n\n    If running a package uses multiple threads/cores it must never use more than two simultaneously: the check farm is a shared resource and will typically be running many checks simultaneously.\n\nWe need to check the number of CPUs used in examples. Export ``_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_=2.5`` before running ``R CMD check --as-cran`` `[1] <#references>`__ and make sure the machine you are using has enough CPU cores to reveal any potential policy violation.\n\nRead The Docs\n-------------\n\nWe might need to manually activate the new release branch for `read the docs\n<https://xgboost.readthedocs.io/>`__ and set it as the default branch in the console `[2]\n<#references>`__. Please check the document build and make sure the correct branch is\nactivated and selected after making a new release.\n\nReferences\n----------\n\n[1] https://stat.ethz.ch/pipermail/r-package-devel/2022q4/008610.html\n\n[2] https://github.com/readthedocs/readthedocs.org/issues/12073"
  },
  {
    "path": "doc/contrib/unit_tests.rst",
    "content": "########################\nAdding and running tests\n########################\n\nA high-quality suite of tests is crucial in ensuring correctness and robustness of the codebase. Here, we provide instructions how to run unit tests, and also how to add a new one.\n\n**Contents**\n\n.. contents::\n  :backlinks: none\n  :local:\n\n**********************\nAdding a new unit test\n**********************\n\nPython package: pytest\n======================\nAdd your test under the directories\n\n- `tests/python/ <https://github.com/dmlc/xgboost/tree/master/tests/python>`_\n- `tests/python-gpu/ <https://github.com/dmlc/xgboost/tree/master/tests/python-gpu>`_ (if you are testing GPU code)\n- `tests/test_distributed <https://github.com/dmlc/xgboost/tree/master/tests/test_distributed>`_. (if a distributed framework is used)\n\nRefer to `the PyTest tutorial <https://docs.pytest.org/en/latest/getting-started.html>`_\nto learn how to write tests for Python code.\n\nYou may try running your test by following instructions in :ref:`this section <running_pytest>`.\n\nC++: Google Test\n================\nAdd your test under the directory `tests/cpp/ <https://github.com/dmlc/xgboost/tree/master/tests/cpp>`_. Refer to `this excellent tutorial on using Google Test <https://developer.ibm.com/articles/au-googletestingframework/>`_.\n\nYou may try running your test by following instructions in :ref:`this section <running_gtest>`. Note. Google Test version 1.8.1 or later is required.\n\nJVM packages: JUnit / scalatest\n===============================\nThe JVM packages for XGBoost (XGBoost4J / XGBoost4J-Spark) use `the Maven Standard Directory Layout <https://maven.apache.org/guides/introduction/introduction-to-the-standard-directory-layout.html>`_. Specifically, the tests for the JVM packages are located in the following locations:\n\n* `jvm-packages/xgboost4j/src/test/ <https://github.com/dmlc/xgboost/tree/master/jvm-packages/xgboost4j/src/test>`_\n* `jvm-packages/xgboost4j-spark/src/test/ <https://github.com/dmlc/xgboost/tree/master/jvm-packages/xgboost4j-spark/src/test>`_\n\nTo write a test for Java code, see `JUnit 5 tutorial <https://junit.org/junit5/docs/current/user-guide/>`_.\nTo write a test for Scala, see `Scalatest tutorial <http://www.scalatest.org/user_guide/writing_your_first_test>`_.\n\nYou may try running your test by following instructions in :ref:`this section <running_jvm_tests>`.\n\nR package: testthat\n===================\nAdd your test under the directory `R-package/tests/testthat <https://github.com/dmlc/xgboost/tree/master/R-package/tests/testthat>`_. Refer to `this excellent tutorial on testthat <https://kbroman.org/pkg_primer/pages/tests.html>`_.\n\nYou may try running your test by following instructions in :ref:`this section <running_r_tests>`.\n\n**************************\nRunning Unit Tests Locally\n**************************\n\n.. _running_r_tests:\n\nR package\n=========\nRun\n\n.. code-block:: bash\n\n  python ./ops/script/test_r_package.py --task=check\n\nat the root of the project directory. The command builds and checks the XGBoost\nr-package. Alternatively, if you want to just run the tests, you can use the following\ncommands after installing XGBoost:\n\n.. code-block:: bash\n\n  cd R-package/tests/\n  Rscript testthat.R\n\n.. _running_jvm_tests:\n\nJVM packages\n============\nMaven is used\n\n.. code-block:: bash\n\n  mvn test\n\n.. _running_pytest:\n\nPython package: pytest\n======================\n\nTo run Python unit tests, first install `pytest <https://docs.pytest.org/en/latest/contents.html>`_ package:\n\n.. code:: bash\n\n  pip3 install pytest\n\nThen compile XGBoost according to instructions in :ref:`build_shared_lib`. Finally, invoke pytest at the project root directory:\n\n.. code:: bash\n\n  # Tell Python where to find XGBoost module\n  export PYTHONPATH=./python-package\n  pytest -v -s --fulltrace tests/python\n\nIn addition, to test CUDA code, run:\n\n.. code:: bash\n\n  # Tell Python where to find XGBoost module\n  export PYTHONPATH=./python-package\n  pytest -v -s --fulltrace tests/python-gpu\n\n(For this step, you should have compiled XGBoost with CUDA enabled.)\n\nFor testing with distributed frameworks like ``Dask`` and ``PySpark``:\n\n.. code:: bash\n\n  # Tell Python where to find XGBoost module\n  export PYTHONPATH=./python-package\n  pytest -v -s --fulltrace tests/test_distributed\n\n.. _running_gtest:\n\nC++: Google Test\n================\n\nTo build and run C++ unit tests enable tests while running CMake:\n\n.. code-block:: bash\n\n  cmake -B build -S . -GNinja -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DUSE_CUDA=ON -DUSE_NCCL=ON\n  cmake --build build\n  cd ./build\n  ./testxgboost\n\nFlags like ``USE_CUDA``, ``USE_DMLC_GTEST`` are optional. For more info about how to build\nXGBoost from source, see :doc:`/build`. One can also run all unit tests using ctest tool\nwhich provides higher flexibility. For example:\n\n.. code-block:: bash\n\n  ctest --verbose\n\nIf you need to debug errors on Windows using the debugger from VS, you can append the gtest flags in `test_main.cc`:\n\n.. code-block::\n\n  ::testing::GTEST_FLAG(filter) = \"Suite.Test\";\n  ::testing::GTEST_FLAG(repeat) = 10;\n\n\n***********************************************\nSanitizers: Detect memory errors and data races\n***********************************************\n\nBy default, sanitizers are bundled in GCC and Clang/LLVM. One can enable sanitizers with\nGCC >= 4.8 or LLVM >= 3.1, But some distributions might package sanitizers separately.\nHere is a list of supported sanitizers with corresponding library names:\n\n- Address sanitizer: libasan\n- Undefined sanitizer: libubsan\n- Leak sanitizer:    liblsan\n- Thread sanitizer:  libtsan\n\nMemory sanitizer is exclusive to LLVM, hence not supported in XGBoost.  With latest\ncompilers like gcc-9, when sanitizer flags are specified, the compiler driver should be\nable to link the runtime libraries automatically.\n\nHow to build XGBoost with sanitizers\n====================================\nOne can build XGBoost with sanitizer support by specifying -DUSE_SANITIZER=ON.\nBy default, address sanitizer and leak sanitizer are used when you turn the\nUSE_SANITIZER flag on.  You can always change the default by providing a\nsemicolon separated list of sanitizers to ENABLED_SANITIZERS.  Note that thread\nsanitizer is not compatible with the other two sanitizers.\n\n.. code-block:: bash\n\n  cmake -DUSE_SANITIZER=ON -DENABLED_SANITIZERS=\"address;undefined\" /path/to/xgboost\n\nBy default, CMake will search regular system paths for sanitizers, you can also\nsupply a specified SANITIZER_PATH.\n\n.. code-block:: bash\n\n  cmake -DUSE_SANITIZER=ON -DENABLED_SANITIZERS=\"address;undefined\" \\\n  -DSANITIZER_PATH=/path/to/sanitizers /path/to/xgboost\n\nHow to use sanitizers with CUDA support\n=======================================\nRunning XGBoost on CUDA with address sanitizer (asan) will raise memory error.\nTo use asan with CUDA correctly, you need to configure asan via ASAN_OPTIONS\nenvironment variable:\n\n.. code-block:: bash\n\n  ASAN_OPTIONS=protect_shadow_gap=0 ${BUILD_DIR}/testxgboost\n\n\nOther sanitizer runtime options\n===============================\n\nBy default undefined sanitizer doesn't print out the backtrace. You can enable it by\nexporting environment variable:\n\n.. code-block::\n\n  UBSAN_OPTIONS=print_stacktrace=1 ${BUILD_DIR}/testxgboost\n\nFor details, please consult `official documentation <https://github.com/google/sanitizers/wiki>`_ for sanitizers.\n"
  },
  {
    "path": "doc/dump.schema",
    "content": "{\n    \"$schema\": \"http://json-schema.org/draft-07/schema#\",\n    \"definitions\": {\n        \"split_node\": {\n            \"type\": \"object\",\n            \"properties\": {\n                \"nodeid\": {\n                    \"type\": \"number\",\n                    \"minimum\": 0\n                },\n                \"depth\": {\n                    \"type\": \"number\",\n                    \"minimum\": 0\n                },\n                \"yes\": {\n                    \"type\": \"number\",\n                    \"minimum\": 0\n                },\n                \"no\": {\n                    \"type\": \"number\",\n                    \"minimum\": 0\n                },\n                \"split\": {\n                    \"type\": \"string\"\n                },\n                \"children\": {\n                    \"type\": \"array\",\n                    \"items\": {\n                        \"oneOf\": [\n                            {\"$ref\": \"#/definitions/split_node\"},\n                            {\"$ref\": \"#/definitions/leaf_node\"}\n                        ]\n                    },\n                    \"maxItems\": 2\n                }\n            },\n            \"required\": [\"nodeid\", \"depth\", \"yes\", \"no\", \"split\", \"children\"]\n        },\n        \"leaf_node\": {\n            \"type\": \"object\",\n            \"properties\": {\n                \"nodeid\": {\n                    \"type\": \"number\",\n                    \"minimum\": 0\n                },\n                \"leaf\": {\n                    \"type\": \"number\"\n                }\n            },\n            \"required\": [\"nodeid\", \"leaf\"]\n        }\n    },\n    \"type\": \"object\",\n    \"$ref\": \"#/definitions/split_node\"\n}\n"
  },
  {
    "path": "doc/faq.rst",
    "content": "##########################\nFrequently Asked Questions\n##########################\n\nThis document contains frequently asked questions about XGBoost.\n\n**********************\nHow to tune parameters\n**********************\nSee :doc:`Parameter Tuning Guide </tutorials/param_tuning>`.\n\n************************\nDescription of the model\n************************\nSee :doc:`Introduction to Boosted Trees </tutorials/model>`.\n\n********************\nI have a big dataset\n********************\nXGBoost is designed to be memory efficient. Usually it can handle problems as long as the data fits into your memory.\nThis usually means millions of instances.\n\nIf you are running out of memory, checkout the tutorial page for using :doc:`distributed training </tutorials/index>` with one of the many frameworks, or the :doc:`external memory version </tutorials/external_memory>` for using external memory.\n\n\n**********************************\nHow to handle categorical feature?\n**********************************\nVisit :doc:`this tutorial </tutorials/categorical>` for a walkthrough of categorical data handling and some worked examples.\n\n******************************************************************\nWhy not implement distributed XGBoost on top of X (Spark, Hadoop)?\n******************************************************************\nThe first fact we need to know is going distributed does not necessarily solve all the problems.\nInstead, it creates more problems such as more communication overhead and fault tolerance.\nThe ultimate question will still come back to how to push the limit of each computation node\nand use less resources to complete the task (thus with less communication and chance of failure).\n\nTo achieve these, we decide to reuse the optimizations in the single node XGBoost and build the distributed version on top of it.\nThe demand for communication in machine learning is rather simple, in the sense that we can depend on a limited set of APIs.\nSuch design allows us to reuse most of the code, while being portable to major platforms such as Hadoop/Yarn, MPI, SGE.\nMost importantly, it pushes the limit of the computation resources we can use.\n\n****************************************\nHow can I port a model to my own system?\n****************************************\nThe model and data format of XGBoost are exchangeable,\nwhich means the model trained by one language can be loaded in another.\nThis means you can train the model using R, while running prediction using\nJava or C++, which are more common in production systems.\nYou can also train the model using distributed versions,\nand load them in from Python to do some interactive analysis. See :doc:`Model IO </tutorials/saving_model>` for more information.\n\n**************************\nDo you support LambdaMART?\n**************************\nYes, XGBoost implements LambdaMART. Checkout the objective section in :doc:`parameters </parameter>`.\n\n*******************************\nHow to deal with missing values\n*******************************\nXGBoost supports missing values by default.\nIn tree algorithms, branch directions for missing values are learned during training.\nNote that the gblinear booster treats missing values as zeros.\n\nWhen the ``missing`` parameter is specified, values in the input predictor that is equal to\n``missing`` will be treated as missing and removed.  By default it's set to ``NaN``.\n\n**************************************\nSlightly different result between runs\n**************************************\nThis could happen, due to non-determinism in floating point summation order and multi-threading. Also, data partitioning changes by distributed framework can be an issue as well. Though the general accuracy will usually remain the same.\n\n**********************************************************\nWhy do I see different results with sparse and dense data?\n**********************************************************\n\n\"Sparse\" elements are treated as if they were \"missing\" by the tree booster, and as zeros by the linear booster. However, if we convert the sparse matrix back to dense matrix, the sparse matrix might fill the missing entries with 0, which is a valid value for xgboost. In short, sparse matrix implementations like scipy treats 0 as missing, while 0 is a valid split value for XGBoost decision trees.\n"
  },
  {
    "path": "doc/get_started.rst",
    "content": "########################\nGet Started with XGBoost\n########################\n\nThis is a quick start tutorial showing snippets for you to quickly try out XGBoost\non the demo dataset on a binary classification task.\n\n********************************\nLinks to Other Helpful Resources\n********************************\n- See :doc:`Installation Guide </install>` on how to install XGBoost.\n- See :doc:`Text Input Format </tutorials/input_format>` on using text format for specifying training/testing data.\n- See :doc:`Tutorials </tutorials/index>` for tips and tutorials.\n- See `Learning to use XGBoost by Examples <https://github.com/dmlc/xgboost/tree/master/demo>`_ for more code examples.\n\n******\nPython\n******\n\n.. code-block:: python\n\n  from xgboost import XGBClassifier\n  # read data\n  from sklearn.datasets import load_iris\n  from sklearn.model_selection import train_test_split\n  data = load_iris()\n  X_train, X_test, y_train, y_test = train_test_split(data['data'], data['target'], test_size=.2)\n  # create model instance\n  bst = XGBClassifier(n_estimators=2, max_depth=2, learning_rate=1, objective='binary:logistic')\n  # fit model\n  bst.fit(X_train, y_train)\n  # make predictions\n  preds = bst.predict(X_test)\n\n***\nR\n***\n\n.. code-block:: R\n\n  # load data\n  data(agaricus.train, package='xgboost')\n  data(agaricus.test, package='xgboost')\n  train <- agaricus.train\n  test <- agaricus.test\n  # fit model\n  bst <- xgboost(x = train$data, y = factor(train$label),\n                 max.depth = 2, eta = 1, nrounds = 2,\n                 nthread = 2, objective = \"binary:logistic\")\n  # predict\n  pred <- predict(bst, test$data)\n\n*****\nJulia\n*****\n\n.. code-block:: julia\n\n  using XGBoost\n  # read data\n  train_X, train_Y = readlibsvm(\"demo/data/agaricus.txt.train\", (6513, 126))\n  test_X, test_Y = readlibsvm(\"demo/data/agaricus.txt.test\", (1611, 126))\n  # fit model\n  num_round = 2\n  bst = xgboost(train_X, num_round, label=train_Y, eta=1, max_depth=2)\n  # predict\n  pred = predict(bst, test_X)\n\n*****\nScala\n*****\n\n.. code-block:: scala\n\n  import ml.dmlc.xgboost4j.scala.DMatrix\n  import ml.dmlc.xgboost4j.scala.XGBoost\n\n  object XGBoostScalaExample {\n    def main(args: Array[String]) {\n      // read trainining data, available at xgboost/demo/data\n      val trainData =\n        new DMatrix(\"/path/to/agaricus.txt.train\")\n      // define parameters\n      val paramMap = List(\n        \"eta\" -> 0.1,\n        \"max_depth\" -> 2,\n        \"objective\" -> \"binary:logistic\").toMap\n      // number of iterations\n      val round = 2\n      // train the model\n      val model = XGBoost.train(trainData, paramMap, round)\n      // run prediction\n      val predTrain = model.predict(trainData)\n      // save model to the file.\n      model.saveModel(\"/local/path/to/model\")\n    }\n  }\n"
  },
  {
    "path": "doc/gpu/index.rst",
    "content": "###################\nXGBoost GPU Support\n###################\n\nThis page contains information about GPU algorithms supported in XGBoost.\n\n.. note:: CUDA 12.0, Compute Capability 5.0 required (See `this list <https://en.wikipedia.org/wiki/CUDA#GPUs_supported>`_ to look up compute capability of your GPU card.)\n\n*********************************************\nCUDA Accelerated Tree Construction Algorithms\n*********************************************\n\nMost of the algorithms in XGBoost including training, prediction and evaluation can be accelerated with CUDA-capable GPUs.\n\nUsage\n=====\n\nTo enable GPU acceleration, specify the ``device`` parameter as ``cuda``. In addition, the device ordinal (which GPU to use if you have multiple devices in the same node) can be specified using the ``cuda:<ordinal>`` syntax, where ``<ordinal>`` is an integer that represents the device ordinal. XGBoost defaults to 0 (the first device reported by CUDA runtime).\n\nThe GPU algorithms currently work with CLI, Python, R, and JVM packages. See :doc:`/install` for details.\n\n.. code-block:: python\n  :caption: Python example\n\n  params = dict()\n  params[\"device\"] = \"cuda\"\n  params[\"tree_method\"] = \"hist\"\n  Xy = xgboost.QuantileDMatrix(X, y)\n  xgboost.train(params, Xy)\n\n.. code-block:: python\n  :caption: With the Scikit-Learn interface\n\n  XGBRegressor(tree_method=\"hist\", device=\"cuda\")\n\nGPU-Accelerated SHAP values\n=============================\nXGBoost makes use of `GPUTreeShap <https://github.com/rapidsai/gputreeshap>`_ as a backend for computing shap values when the GPU is used.\n\n.. code-block:: python\n\n  booster.set_param({\"device\": \"cuda:0\"})\n  shap_values = booster.predict(dtrain, pred_contribs=True)\n  shap_interaction_values = model.predict(dtrain, pred_interactions=True)\n\nSee :ref:`sphx_glr_python_examples_gpu_tree_shap.py` for a worked example.\n\nMulti-node Multi-GPU Training\n=============================\n\nXGBoost supports fully distributed GPU training using `Dask <https://dask.org/>`_, ``Spark`` and ``PySpark``. For getting started with Dask see our tutorial :doc:`/tutorials/dask` and worked examples :doc:`/python/dask-examples/index`, also Python documentation :ref:`dask_api` for complete reference. For usage with ``Spark`` using Scala see :doc:`/jvm/xgboost4j_spark_gpu_tutorial`. Lastly for distributed GPU training with ``PySpark``, see :doc:`/tutorials/spark_estimator`.\n\nRMM integration\n===============\n\nXGBoost provides optional support for RMM integration. See :doc:`/python/rmm-examples/index` for more info.\n\n\nMemory usage\n============\nThe following are some guidelines on the device memory usage of the ``hist`` tree method on GPU.\n\nMemory inside xgboost training is generally allocated for two reasons - storing the dataset and working memory.\n\nThe dataset itself is stored on device in a compressed ELLPACK format. The ELLPACK format is a type of sparse matrix that stores elements with a constant row stride. This format is convenient for parallel computation when compared to CSR because the row index of each element is known directly from its address in memory. The disadvantage of the ELLPACK format is that it becomes less memory efficient if the maximum row length is significantly more than the average row length. Elements are quantised and stored as integers. These integers are compressed to a minimum bit length. Depending on the number of features, we usually don't need the full range of a 32 bit integer to store elements and so compress this down. The compressed, quantised ELLPACK format will commonly use 1/4 the space of a CSR matrix stored in floating point.\n\nWorking memory is allocated inside the algorithm proportional to the number of rows to keep track of gradients, tree positions and other per row statistics. Memory is allocated for histogram bins proportional to the number of bins, number of features and nodes in the tree. For performance reasons we keep histograms in memory from previous nodes in the tree, when a certain threshold of memory usage is passed we stop doing this to conserve memory at some performance loss.\n\nIf you are getting out-of-memory errors on a big dataset, try the\n:py:class:`xgboost.QuantileDMatrix` first. If you have access to NVLink-C2C devices, see\n:doc:`external memory version </tutorials/external_memory>`. In addition,\n:py:meth:`~xgboost.Booster.inplace_predict` should be preferred over ``predict`` when data\nis already on GPU. Both :py:class:`xgboost.QuantileDMatrix` and\n:py:meth:`~xgboost.Booster.inplace_predict` are automatically enabled if you are using the\nscikit-learn interface. Last but not least, using :py:class:`~xgboost.QuantileDMatrix`\nwith a data iterator as input is a great way to increase memory capacity, see\n:ref:`sphx_glr_python_examples_quantile_data_iterator.py`.\n\n\nCPU-GPU Interoperability\n========================\n\nThe model can be used on any device regardless of the one used to train it. For instance, a model trained using GPU can still work on a CPU-only machine and vice versa. For more information about model serialization, see :doc:`/tutorials/saving_model`.\n\n\nDeveloper notes\n===============\nThe application may be profiled with annotations by specifying ``USE_NTVX`` to cmake. Regions covered by the 'Monitor' class in CUDA code will automatically appear in the nsight profiler when `verbosity` is set to 3.\n\n**********\nReferences\n**********\n`Mitchell R, Frank E. (2017) Accelerating the XGBoost algorithm using GPU computing. PeerJ Computer Science 3:e127 https://doi.org/10.7717/peerj-cs.127 <https://peerj.com/articles/cs-127/>`_\n\n`NVIDIA Parallel Forall: Gradient Boosting, Decision Trees and XGBoost with CUDA <https://devblogs.nvidia.com/parallelforall/gradient-boosting-decision-trees-xgboost-cuda/>`_\n\n`Out-of-Core GPU Gradient Boosting <https://arxiv.org/abs/2005.09148>`_\n\nContributors\n============\nMany thanks to the following contributors (alphabetical order):\n\n* Andrey Adinets\n* Jiaming Yuan\n* Jonathan C. McKinney\n* Matthew Jones\n* Philip Cho\n* Rong Ou\n* Rory Mitchell\n* Shankara Rao Thejaswi Nanditale\n* Sriram Chandramouli\n* Vinay Deshpande\n\nPlease report bugs to the XGBoost `issues list <https://github.com/dmlc/xgboost/issues>`__.\n"
  },
  {
    "path": "doc/index.rst",
    "content": "#####################\nXGBoost Documentation\n#####################\n\n**XGBoost** is an optimized distributed gradient boosting library designed to be highly **efficient**, **flexible** and **portable**.\nIt implements machine learning algorithms under the `Gradient Boosting <https://en.wikipedia.org/wiki/Gradient_boosting>`_ framework.\nXGBoost provides a parallel tree boosting (also known as GBDT, GBM) that solve many data science problems in a fast and accurate way.\nThe same code runs on major distributed environment (Hadoop, SGE, MPI) and can solve problems beyond billions of examples.\n\n********\nContents\n********\n\n.. toctree::\n  :maxdepth: 2\n  :titlesonly:\n\n  install\n  build\n  get_started\n  tutorials/index\n  faq\n  GPU Support <gpu/index>\n  parameter\n  prediction\n  treemethod\n  Python Package <python/index>\n  R Package <R-package/index>\n  JVM Package <jvm/index>\n  Ruby Package <https://github.com/ankane/xgboost-ruby>\n  Swift Package <https://github.com/kongzii/SwiftXGBoost>\n  Julia Package <julia>\n  C Package <c>\n  C++ Interface <c++>\n  contrib/index\n  changes/index\n"
  },
  {
    "path": "doc/install.rst",
    "content": "##################\nInstallation Guide\n##################\n\nXGBoost provides binary packages for some language bindings.  The binary packages support\nthe GPU algorithm (``device=cuda:0``) on machines with NVIDIA GPUs. Please note that\n**training with multiple GPUs is only supported for Linux platform**. See\n:doc:`gpu/index`.  Also we have both stable releases and nightly builds, see below for how\nto install them.  For building from source, visit :doc:`this page </build>`.\n\n.. contents:: Contents\n\nStable Release\n==============\n\nPython\n------\n\nPre-built binary wheels are uploaded to PyPI (Python Package Index) for each release. Supported platforms are Linux (x86_64, aarch64), Windows (x86_64) and MacOS (x86_64, Apple Silicon).\n\n.. code-block:: bash\n\n  # Pip 21.3+ is required\n  pip install xgboost\n\n\nYou might need to run the command with ``--user`` flag or use ``virtualenv`` if you run\ninto permission errors.\n\n.. note:: Parts of the Python package now require glibc 2.28+\n\n  Starting from 2.1.0, XGBoost Python package will be distributed in two variants:\n\n  * ``manylinux_2_28``: for recent Linux distros with glibc 2.28 or newer. This variant comes with all features enabled.\n  * ``manylinux2014``: for old Linux distros with glibc older than 2.28. This variant does not support GPU algorithms or federated learning.\n\n  The ``pip`` package manager will automatically choose the correct variant depending on your system.\n\n  Starting from **May 31, 2025**, we will stop distributing the ``manylinux2014`` variant and exclusively\n  distribute the ``manylinux_2_28`` variant. We made this decision so that our CI/CD pipeline won't have\n  depend on software components that reached end-of-life (such as CentOS 7). We strongly encourage\n  everyone to migrate to recent Linux distros in order to use future versions of XGBoost.\n\n  Note. If you want to use GPU algorithms or federated learning on an older Linux distro, you have\n  two alternatives:\n\n  1. Upgrade to a recent Linux distro with glibc 2.28+.  OR\n  2. Build XGBoost from the source.\n\n.. note:: Windows users need to install Visual C++ Redistributable\n\n  XGBoost requires DLLs from `Visual C++ Redistributable\n  <https://www.microsoft.com/en-us/download/details.aspx?id=48145>`_\n  in order to function, so make sure to install it. Exception: If\n  you have Visual Studio installed, you already have access to\n  necessary libraries and thus don't need to install Visual C++\n  Redistributable.\n\n\nCapabilities of binary wheels for each platform:\n\n.. |tick| unicode:: U+2714\n.. |cross| unicode:: U+2718\n\n+---------------------+---------+----------------------+\n| Platform            | GPU     | Multi-Node-Multi-GPU |\n+=====================+=========+======================+\n| Linux x86_64        | |tick|  |  |tick|              |\n+---------------------+---------+----------------------+\n| Linux aarch64       | |tick|  |  |cross|             |\n+---------------------+---------+----------------------+\n| MacOS x86_64        | |cross| |  |cross|             |\n+---------------------+---------+----------------------+\n| MacOS Apple Silicon | |cross| |  |cross|             |\n+---------------------+---------+----------------------+\n| Windows             | |tick|  |  |cross|             |\n+---------------------+---------+----------------------+\n\nLinux aarch64 wheels now ship with CUDA support, so ``pip install xgboost`` on\nmodern Jetson or Graviton machines provides the same GPU functionality as the\nLinux x86_64 wheel. Multi-node and multi-GPU training remain experimental on\nARM64 at this time.\n\nMinimal installation (CPU-only)\n*******************************\nThe default installation with ``pip`` will install the full XGBoost package, including the support for the GPU algorithms and federated learning.\n\nYou may choose to reduce the size of the installed package and save the disk space, by opting to install ``xgboost-cpu`` instead:\n\n.. code-block:: bash\n\n  pip install xgboost-cpu\n\nThe ``xgboost-cpu`` variant will have drastically smaller disk footprint, but does not provide some features, such as the GPU algorithms and\nfederated learning.\n\nCurrently, ``xgboost-cpu`` package is provided for x86_64 (amd64) Linux and Windows platforms.\n\nConda\n*****\n\nYou may use the Conda packaging manager to install XGBoost:\n\n.. code-block:: bash\n\n   conda install -c conda-forge py-xgboost\n\nConda should be able to detect the existence of a GPU on your machine and install the correct variant of XGBoost. If you run into issues, try indicating the variant explicitly:\n\n.. code-block:: bash\n\n   # CPU variant\n   conda install -c conda-forge py-xgboost=*=cpu*\n   # GPU variant\n   conda install -c conda-forge py-xgboost=*=cuda*\n\nTo force the installation of the GPU variant on a machine that does not have an NVIDIA GPU, use environment variable ``CONDA_OVERRIDE_CUDA``,\nas described in `\"Managing Virtual Packages\" in the conda docs <https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-virtual.html>`_.\n\n.. code-block:: bash\n\n  export CONDA_OVERRIDE_CUDA=\"12.8\"\n  conda install -c conda-forge py-xgboost=*=cuda*\n\nYou can install Conda from the following link: `Download the conda-forge Installer <https://conda-forge.org/download/>`_.\n\nR\n-\n\n* From R Universe\n\n.. code-block:: R\n\n    install.packages('xgboost', repos = c('https://dmlc.r-universe.dev', 'https://cloud.r-project.org'))\n\n.. note:: Using all CPU cores (threads) on Mac OSX\n\n   If you are using Mac OSX, you should first install OpenMP library (``libomp``) by running\n\n   .. code-block:: bash\n\n        brew install libomp\n\n   and then run ``install.packages(\"xgboost\")``. Without OpenMP, XGBoost will only use a\n   single CPU core, leading to suboptimal training speed.\n\n* We also provide **experimental** pre-built binary with GPU support. With this binary,\n  you will be able to use the GPU algorithm without building XGBoost from the source.\n  Download the binary package from the Releases page. The file name will be of the form\n  ``xgboost_r_gpu_[os]_[version].tar.gz``, where ``[os]`` is either ``linux`` or ``win64``.\n  (We build the binaries for 64-bit Linux and Windows.)\n  Then install XGBoost by running:\n\n  .. code-block:: bash\n\n    # Install dependencies\n    R -q -e \"install.packages(c('data.table', 'jsonlite'))\"\n    # Install XGBoost\n    R CMD INSTALL ./xgboost_r_gpu_linux.tar.gz\n\n\n* From CRAN (outdated):\n\n.. warning::\n\n    We are working on bringing the CRAN version of XGBoost up-to-date, in the meantime,\n    please use packages from the R-universe.\n\n\n.. code-block:: R\n\n    install.packages(\"xgboost\")\n\n.. note:: Using all CPU cores (threads) on Mac OSX\n\n   If you are using Mac OSX, you should first install OpenMP library (``libomp``) by running\n\n   .. code-block:: bash\n\n        brew install libomp\n\n   and then run ``install.packages(\"xgboost\")``. Without OpenMP, XGBoost will only use a\n   single CPU core, leading to suboptimal training speed.\n\nJVM\n---\n\n* XGBoost4j-Spark\n\n.. code-block:: xml\n  :caption: Maven\n\n  <properties>\n    ...\n    <!-- Specify Scala version in package name -->\n    <scala.binary.version>2.12</scala.binary.version>\n  </properties>\n\n  <dependencies>\n    ...\n    <dependency>\n        <groupId>ml.dmlc</groupId>\n        <artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>\n        <version>latest_version_num</version>\n    </dependency>\n  </dependencies>\n\n.. code-block:: scala\n  :caption: sbt\n\n  libraryDependencies ++= Seq(\n    \"ml.dmlc\" %% \"xgboost4j-spark\" % \"latest_version_num\"\n  )\n\n* XGBoost4j-Spark-GPU\n\n.. code-block:: xml\n  :caption: Maven\n\n  <properties>\n    ...\n    <!-- Specify Scala version in package name -->\n    <scala.binary.version>2.12</scala.binary.version>\n  </properties>\n\n  <dependencies>\n    ...\n    <dependency>\n        <groupId>ml.dmlc</groupId>\n        <artifactId>xgboost4j-spark-gpu_${scala.binary.version}</artifactId>\n        <version>latest_version_num</version>\n    </dependency>\n  </dependencies>\n\n.. code-block:: scala\n  :caption: sbt\n\n  libraryDependencies ++= Seq(\n    \"ml.dmlc\" %% \"xgboost4j-spark-gpu\" % \"latest_version_num\"\n  )\n\nThis will check out the latest stable version from the Maven Central.\n\nFor the latest release version number, please check `release page <https://github.com/dmlc/xgboost/releases>`_.\n\nTo enable the GPU algorithm (``device='cuda'``), use artifacts ``xgboost4j-spark-gpu_2.12`` instead (note the ``gpu`` suffix).\n\n\n.. note:: Windows not supported in the JVM package\n\n  Currently, XGBoost4J-Spark does not support Windows platform, as the distributed training algorithm is inoperational for Windows. Please use Linux or MacOS.\n\n\nNightly Build\n=============\n\n\nPython\n------\n\nNightly builds are available. You can go to `this page <https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds/list.html>`_,\nfind the wheel with the commit ID you want and install it with pip:\n\n.. code-block:: bash\n\n  pip install <url to the wheel>\n\n\nThe capability of Python pre-built wheel is the same as stable release.\n\n\nR\n-\n\nOther than standard CRAN installation, we also provide *experimental* pre-built binary on\nwith GPU support.  You can go to `this page\n<https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds/list.html>`_, Find the commit\nID you want to install and then locate the file ``xgboost_r_gpu_[os]_[commit].tar.gz``,\nwhere ``[os]`` is either ``linux`` or ``win64``. (We build the binaries for 64-bit Linux\nand Windows.) Download it and run the following commands:\n\n.. code-block:: bash\n\n  # Install dependencies\n  R -q -e \"install.packages(c('data.table', 'jsonlite', 'remotes'))\"\n  # Install XGBoost\n  R CMD INSTALL ./xgboost_r_gpu_linux.tar.gz\n\n\nJVM\n---\n\n* XGBoost4j/XGBoost4j-Spark\n\n.. code-block:: xml\n  :caption: Maven\n\n  <repository>\n    <id>XGBoost4J Snapshot Repo</id>\n    <name>XGBoost4J Snapshot Repo</name>\n    <url>https://s3-us-west-2.amazonaws.com/xgboost-maven-repo/snapshot/</url>\n  </repository>\n\n.. code-block:: scala\n  :caption: sbt\n\n  resolvers += \"XGBoost4J Snapshot Repo\" at \"https://s3-us-west-2.amazonaws.com/xgboost-maven-repo/snapshot/\"\n\nThen add XGBoost4J-Spark as a dependency:\n\n.. code-block:: xml\n  :caption: maven\n\n  <properties>\n    ...\n    <!-- Specify Scala version in package name -->\n    <scala.binary.version>2.12</scala.binary.version>\n  </properties>\n\n  <dependencies>\n    <dependency>\n        <groupId>ml.dmlc</groupId>\n        <artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>\n        <version>latest_version_num-SNAPSHOT</version>\n    </dependency>\n  </dependencies>\n\n.. code-block:: scala\n  :caption: sbt\n\n  libraryDependencies ++= Seq(\n    \"ml.dmlc\" %% \"xgboost4j-spark\" % \"latest_version_num-SNAPSHOT\"\n  )\n\n* XGBoost4j-Spark-GPU\n\n.. code-block:: xml\n  :caption: maven\n\n  <properties>\n    ...\n    <!-- Specify Scala version in package name -->\n    <scala.binary.version>2.12</scala.binary.version>\n  </properties>\n\n  <dependencies>\n    <dependency>\n        <groupId>ml.dmlc</groupId>\n        <artifactId>xgboost4j-spark-gpu_${scala.binary.version}</artifactId>\n        <version>latest_version_num-SNAPSHOT</version>\n    </dependency>\n  </dependencies>\n\n.. code-block:: scala\n  :caption: sbt\n\n  libraryDependencies ++= Seq(\n    \"ml.dmlc\" %% \"xgboost4j-spark-gpu\" % \"latest_version_num-SNAPSHOT\"\n  )\n\n\nLook up the ``version`` field in `pom.xml <https://github.com/dmlc/xgboost/blob/master/jvm-packages/pom.xml>`_ to get the correct version number.\n\nThe SNAPSHOT JARs are hosted by the XGBoost project. Every commit in the ``master`` branch will automatically trigger generation of a new SNAPSHOT JAR. You can control how often Maven should upgrade your SNAPSHOT installation by specifying ``updatePolicy``. See `here <http://maven.apache.org/pom.html#Repositories>`_ for details.\n\nYou can browse the file listing of the Maven repository at https://s3-us-west-2.amazonaws.com/xgboost-maven-repo/list.html.\n\nTo enable the GPU algorithm (``device='cuda'``), use artifacts ``xgboost4j-gpu_2.12`` and ``xgboost4j-spark-gpu_2.12`` instead (note the ``gpu`` suffix).\n"
  },
  {
    "path": "doc/julia.rst",
    "content": "##########\nXGBoost.jl\n##########\n\nSee `XGBoost.jl Project page <https://github.com/dmlc/XGBoost.jl>`_.\n"
  },
  {
    "path": "doc/jvm/api.rst",
    "content": "#############################\nAPI Docs for the JVM packages\n#############################\n\n* `XGBoost4J Java API <../jvm_docs/javadocs/index.html>`_\n* `XGBoost4J Scala API <../jvm_docs/scaladocs/xgboost4j/index.html>`_\n* `XGBoost4J-Spark Scala API <../jvm_docs/scaladocs/xgboost4j-spark/index.html>`_\n* `XGBoost4J-Spark-GPU Scala API <../jvm_docs/scaladocs/xgboost4j-spark-gpu/index.html>`_\n* `XGBoost4J-Flink Scala API <../jvm_docs/scaladocs/xgboost4j-flink/index.html>`_\n"
  },
  {
    "path": "doc/jvm/index.rst",
    "content": "###################\nXGBoost JVM Package\n###################\n\n.. raw:: html\n\n  <a href=\"https://travis-ci.org/dmlc/xgboost\">\n  <img alt=\"Build Status\" src=\"https://travis-ci.org/dmlc/xgboost.svg?branch=master\">\n  </a>\n  <a href=\"https://github.com/dmlc/xgboost/blob/master/LICENSE\">\n  <img alt=\"GitHub license\" src=\"https://dmlc.github.io/img/apache2.svg\">\n  </a>\n\nYou have found the XGBoost JVM Package!\n\n.. _install_jvm_packages:\n\n************\nInstallation\n************\n\n.. contents::\n  :local:\n  :backlinks: none\n\nCheckout the :doc:`Installation Guide </install>` for how to install the jvm package, or\n:doc:`Building from Source </build>` on how to build it from the sources.\n\n********\nContents\n********\n\n.. toctree::\n  :maxdepth: 2\n\n  java_intro\n  XGBoost4J-Spark Tutorial <xgboost4j_spark_tutorial>\n  XGBoost4J-Spark-GPU Tutorial <xgboost4j_spark_gpu_tutorial>\n  Code Examples <https://github.com/dmlc/xgboost/tree/master/jvm-packages/xgboost4j-example>\n  API docs <api>\n  How to migrate to XGBoost-Spark jvm 3.x <xgboost_spark_migration>\n\n.. note::\n\n  Please note that the flink interface is still under construction.\n"
  },
  {
    "path": "doc/jvm/java_intro.rst",
    "content": "##############################\nGetting Started with XGBoost4J\n##############################\nThis tutorial introduces Java API for XGBoost.\n\n**************\nData Interface\n**************\nLike the XGBoost python module, XGBoost4J uses DMatrix to handle data.\nLIBSVM txt format file, sparse matrix in CSR/CSC format, and dense matrix are\nsupported.\n\n* The first step is to import DMatrix:\n\n  .. code-block:: java\n\n    import ml.dmlc.xgboost4j.java.DMatrix;\n\n* Use DMatrix constructor to load data from a libsvm text format file:\n\n  .. code-block:: java\n\n    DMatrix dmat = new DMatrix(\"train.svm.txt\");\n\n* Pass arrays to DMatrix constructor to load from sparse matrix.\n\n  Suppose we have a sparse matrix\n\n  .. code-block:: none\n\n    1 0 2 0\n    4 0 0 3\n    3 1 2 0\n\n  We can express the sparse matrix in `Compressed Sparse Row (CSR) <https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_row_(CSR,_CRS_or_Yale_format)>`_ format:\n\n  .. code-block:: java\n\n    long[] rowHeaders = new long[] {0,2,4,7};\n    float[] data = new float[] {1f,2f,4f,3f,3f,1f,2f};\n    int[] colIndex = new int[] {0,2,0,3,0,1,2};\n    int numColumn = 4;\n    DMatrix dmat = new DMatrix(rowHeaders, colIndex, data, DMatrix.SparseType.CSR, numColumn);\n\n  ... or in `Compressed Sparse Column (CSC) <https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_column_(CSC_or_CCS)>`_ format:\n\n  .. code-block:: java\n\n    long[] colHeaders = new long[] {0,3,4,6,7};\n    float[] data = new float[] {1f,4f,3f,1f,2f,2f,3f};\n    int[] rowIndex = new int[] {0,1,2,2,0,2,1};\n    int numRow = 3;\n    DMatrix dmat = new DMatrix(colHeaders, rowIndex, data, DMatrix.SparseType.CSC, numRow);\n\n* You may also load your data from a dense matrix. Let's assume we have a matrix of form\n\n  .. code-block:: none\n\n    1    2\n    3    4\n    5    6\n\n  Using `row-major layout <https://en.wikipedia.org/wiki/Row-_and_column-major_order>`_, we specify the dense matrix as follows:\n\n  .. code-block:: java\n\n    float[] data = new float[] {1f,2f,3f,4f,5f,6f};\n    int nrow = 3;\n    int ncol = 2;\n    float missing = 0.0f;\n    DMatrix dmat = new DMatrix(data, nrow, ncol, missing);\n\n* To set weight:\n\n  .. code-block:: java\n\n    float[] weights = new float[] {1f,2f,1f};\n    dmat.setWeight(weights);\n\n******************\nSetting Parameters\n******************\nTo set parameters, parameters are specified as a Map:\n\n.. code-block:: java\n\n  Map<String, Object> params = new HashMap<String, Object>() {\n    {\n      put(\"eta\", 1.0);\n      put(\"max_depth\", 2);\n      put(\"objective\", \"binary:logistic\");\n      put(\"eval_metric\", \"logloss\");\n    }\n  };\n\n**************\nTraining Model\n**************\nWith parameters and data, you are able to train a booster model.\n\n* Import Booster and XGBoost:\n\n  .. code-block:: java\n\n    import ml.dmlc.xgboost4j.java.Booster;\n    import ml.dmlc.xgboost4j.java.XGBoost;\n\n* Training\n\n  .. code-block:: java\n\n    DMatrix trainMat = new DMatrix(\"train.svm.txt\");\n    DMatrix validMat = new DMatrix(\"valid.svm.txt\");\n    // Specify a watch list to see model accuracy on data sets\n    Map<String, DMatrix> watches = new HashMap<String, DMatrix>() {\n      {\n        put(\"train\", trainMat);\n        put(\"test\", testMat);\n      }\n    };\n    int nround = 2;\n    Booster booster = XGBoost.train(trainMat, params, nround, watches, null, null);\n\n* Saving model\n\n  After training, you can save model and dump it out.\n\n  .. code-block:: java\n\n    booster.saveModel(\"model.json\");\n\n* Generating model dump with feature map\n\n  .. code-block:: java\n\n    // dump without feature map\n    String[] model_dump = booster.getModelDump(null, false);\n    // dump with feature map\n    String[] model_dump_with_feature_map = booster.getModelDump(\"featureMap.txt\", false);\n\n* Load a model\n\n  .. code-block:: java\n\n    Booster booster = XGBoost.loadModel(\"model.json\");\n\n**********\nPrediction\n**********\nAfter training and loading a model, you can use it to make prediction for other data. The result will be a two-dimension float array ``(nsample, nclass)``; for ``predictLeaf()``, the result would be of shape ``(nsample, nclass*ntrees)``.\n\n.. code-block:: java\n\n  DMatrix dtest = new DMatrix(\"test.svm.txt\");\n  // predict\n  float[][] predicts = booster.predict(dtest);\n  // predict leaf\n  float[][] leafPredicts = booster.predictLeaf(dtest, 0);\n"
  },
  {
    "path": "doc/jvm/javadocs/index.rst",
    "content": ":orphan:\n\n==================\nXGBoost4J Java API\n==================\n"
  },
  {
    "path": "doc/jvm/scaladocs/xgboost4j/index.rst",
    "content": ":orphan:\n\n===================\nXGBoost4J Scala API\n===================\n"
  },
  {
    "path": "doc/jvm/scaladocs/xgboost4j-flink/index.rst",
    "content": ":orphan:\n\n=========================\nXGBoost4J-Flink Scala API\n=========================\n"
  },
  {
    "path": "doc/jvm/scaladocs/xgboost4j-spark/index.rst",
    "content": ":orphan:\n\n=========================\nXGBoost4J-Spark Scala API\n=========================\n"
  },
  {
    "path": "doc/jvm/xgboost4j_spark_gpu_tutorial.rst",
    "content": "############################\nXGBoost4J-Spark-GPU Tutorial\n############################\n\n**XGBoost4J-Spark-GPU** is an open source library aiming to accelerate distributed XGBoost training on Apache Spark cluster from\nend to end with GPUs by leveraging the `RAPIDS Accelerator for Apache Spark <https://nvidia.github.io/spark-rapids/>`_ product.\n\nThis tutorial will show you how to use **XGBoost4J-Spark-GPU**.\n\n.. contents::\n  :backlinks: none\n  :local:\n\n************************************************\nBuild an ML Application with XGBoost4J-Spark-GPU\n************************************************\n\nAdd XGBoost to Your Project\n===========================\n\nPrior to delving into the tutorial on utilizing XGBoost4J-Spark-GPU, it is advisable to refer to\n:ref:`Installation from Maven repository <install_jvm_packages>` for instructions on adding XGBoost4J-Spark-GPU\nas a project dependency. We offer both stable releases and snapshots for your convenience.\n\nData Preparation\n================\n\nIn this section, we use the `Iris <https://archive.ics.uci.edu/ml/datasets/iris>`_ dataset as an example to\nshowcase how we use Apache Spark to transform a raw dataset and make it fit the data interface of XGBoost.\n\nThe Iris dataset is shipped in CSV format. Each instance contains 4 features, \"sepal length\", \"sepal width\",\n\"petal length\" and \"petal width\". In addition, it contains the \"class\" column, which is essentially the\nlabel with three possible values: \"Iris Setosa\", \"Iris Versicolour\" and \"Iris Virginica\".\n\nRead Dataset with Spark's Built-In Reader\n-----------------------------------------\n\n.. code-block:: scala\n\n  import org.apache.spark.sql.SparkSession\n  import org.apache.spark.sql.types.{DoubleType, StringType, StructField, StructType}\n\n  val spark = SparkSession.builder().getOrCreate()\n\n  val labelName = \"class\"\n  val schema = new StructType(Array(\n      StructField(\"sepal length\", DoubleType, true),\n      StructField(\"sepal width\", DoubleType, true),\n      StructField(\"petal length\", DoubleType, true),\n      StructField(\"petal width\", DoubleType, true),\n      StructField(labelName, StringType, true)))\n\n  val xgbInput = spark.read.option(\"header\", \"false\")\n      .schema(schema)\n      .csv(dataPath)\n\nAt first, we create an instance of a `SparkSession <https://spark.apache.org/docs/latest/sql-getting-started.html#starting-point-sparksession>`_\nwhich is the entry point of any Spark application working with DataFrames. The ``schema`` variable\ndefines the schema of the DataFrame wrapping Iris data. With this explicitly set schema, we\ncan define the column names as well as their types; otherwise the column names would be\nthe default ones derived by Spark, such as ``_col0``, etc. Finally, we can use Spark's\nbuilt-in CSV reader to load the Iris CSV file as a DataFrame named ``xgbInput``.\n\nApache Spark also contains many built-in readers for other formats such as ORC, Parquet, Avro, JSON.\n\n\nTransform Raw Iris Dataset\n--------------------------\n\nTo make the Iris dataset recognizable to XGBoost, we need to encode the String-typed\nlabel, i.e. \"class\", to the Double-typed label.\n\nOne way to convert the String-typed label to Double is to use Spark's built-in feature transformer\n`StringIndexer <https://spark.apache.org/docs/latest/api/scala/org/apache/spark/ml/feature/StringIndexer.html>`_.\nBut this feature is not accelerated in RAPIDS Accelerator, which means it will fall back\nto CPU. Instead, we use an alternative way to achieve the same goal with the following code:\n\n.. code-block:: scala\n\n  import org.apache.spark.sql.expressions.Window\n  import org.apache.spark.sql.functions._\n\n  val spec = Window.orderBy(labelName)\n  val Array(train, test) = xgbInput\n      .withColumn(\"tmpClassName\", dense_rank().over(spec) - 1)\n      .drop(labelName)\n      .withColumnRenamed(\"tmpClassName\", labelName)\n      .randomSplit(Array(0.7, 0.3), seed = 1)\n\n  train.show(5)\n\n.. code-block:: none\n\n\t+------------+-----------+------------+-----------+-----+\n\t|sepal length|sepal width|petal length|petal width|class|\n\t+------------+-----------+------------+-----------+-----+\n\t|         4.3|        3.0|         1.1|        0.1|    0|\n\t|         4.4|        2.9|         1.4|        0.2|    0|\n\t|         4.4|        3.0|         1.3|        0.2|    0|\n\t|         4.4|        3.2|         1.3|        0.2|    0|\n\t|         4.6|        3.2|         1.4|        0.2|    0|\n\t+------------+-----------+------------+-----------+-----+\n\n\nWith window operations, we have mapped the string column of labels to label indices.\n\nTraining\n========\n\nXGBoost4j-Spark-Gpu supports regression, classification and ranking\nmodels. Although we use the Iris dataset in this tutorial to show how we use\n``XGBoost4J-Spark-GPU`` to resolve a multi-classes classification problem, the\nusage in Regression and Ranking is very similar to classification.\n\nTo train a XGBoost model for classification, we need to define a XGBoostClassifier first:\n\n.. code-block:: scala\n\n  import ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier\n  val xgbParam = Map(\n      \"objective\" -> \"multi:softprob\",\n      \"num_class\" -> 3,\n      \"num_round\" -> 100,\n      \"device\" -> \"cuda\",\n      \"num_workers\" -> 1)\n\n  val featuresNames = schema.fieldNames.filter(name => name != labelName)\n\n  val xgbClassifier = new XGBoostClassifier(xgbParam)\n      .setFeaturesCol(featuresNames)\n      .setLabelCol(labelName)\n\nThe ``device`` parameter is for informing XGBoost that CUDA devices should be used instead of CPU.\nUnlike the single-node mode, GPUs are managed by spark instead of by XGBoost. Therefore,\nexplicitly specified device ordinal like ``cuda:1`` is not support.\n\nThe available parameters for training a XGBoost model can be found in :doc:`here </parameter>`.\nSimilar to the XGBoost4J-Spark package, in addition to the default set of parameters,\nXGBoost4J-Spark-GPU also supports the camel-case variant of these parameters to be consistent with Spark's MLlib naming convention.\n\nSpecifically, each parameter in :doc:`this page </parameter>` has its equivalent form in\nXGBoost4J-Spark-GPU with camel case. For example, to set ``max_depth`` for each tree, you\ncan pass parameter just like what we did in the above code snippet (as ``max_depth``\nwrapped in a Map), or you can do it through setters in XGBoostClassifer:\n\n.. code-block:: scala\n\n  val xgbClassifier = new XGBoostClassifier(xgbParam)\n      .setFeaturesCol(featuresNames)\n      .setLabelCol(labelName)\n  xgbClassifier.setMaxDepth(2)\n\n.. note::\n\n  In contrast with XGBoost4j-Spark which accepts both a feature column with VectorUDT type and\n  an array of feature column names, XGBoost4j-Spark-GPU only accepts an array of feature\n  column names by ``setFeaturesCol(value: Array[String])``.\n\nAfter setting XGBoostClassifier parameters and feature/label columns, we can build a\ntransformer, XGBoostClassificationModel by fitting XGBoostClassifier with the input\nDataFrame. This ``fit`` operation is essentially the training process and the generated\nmodel can then be used in other tasks like prediction.\n\n.. code-block:: scala\n\n  val xgbClassificationModel = xgbClassifier.fit(train)\n\nPrediction\n==========\n\nWhen we get a model, a XGBoostClassificationModel or a XGBoostRegressionModel or a XGBoostRankerModel, it takes a DataFrame as an input,\nreads the column containing feature vectors, predicts for each feature vector, and outputs a new DataFrame\nwith the following columns by default:\n\n* XGBoostClassificationModel will output margins (``rawPredictionCol``), probabilities(``probabilityCol``) and the eventual prediction labels (``predictionCol``) for each possible label.\n* XGBoostRegressionModel will output prediction a label(``predictionCol``).\n* XGBoostRankerModel will output prediction a label(``predictionCol``).\n\n.. code-block:: scala\n\n  val xgbClassificationModel = xgbClassifier.fit(train)\n  val results = xgbClassificationModel.transform(test)\n  results.show()\n\nWith the above code snippet, we get a DataFrame as result, which contains the margin, probability for each class,\nand the prediction for each instance.\n\n.. code-block:: none\n\n\t+------------+-----------+------------------+-------------------+-----+--------------------+--------------------+----------+\n\t|sepal length|sepal width|      petal length|        petal width|class|       rawPrediction|         probability|prediction|\n\t+------------+-----------+------------------+-------------------+-----+--------------------+--------------------+----------+\n\t|         4.5|        2.3|               1.3|0.30000000000000004|    0|[3.16666603088378...|[0.98853939771652...|       0.0|\n\t|         4.6|        3.1|               1.5|                0.2|    0|[3.25857257843017...|[0.98969423770904...|       0.0|\n\t|         4.8|        3.1|               1.6|                0.2|    0|[3.25857257843017...|[0.98969423770904...|       0.0|\n\t|         4.8|        3.4|               1.6|                0.2|    0|[3.25857257843017...|[0.98969423770904...|       0.0|\n\t|         4.8|        3.4|1.9000000000000001|                0.2|    0|[3.25857257843017...|[0.98969423770904...|       0.0|\n\t|         4.9|        2.4|               3.3|                1.0|    1|[-2.1498908996582...|[0.00596602633595...|       1.0|\n\t|         4.9|        2.5|               4.5|                1.7|    2|[-2.1498908996582...|[0.00596602633595...|       1.0|\n\t|         5.0|        3.5|               1.3|0.30000000000000004|    0|[3.25857257843017...|[0.98969423770904...|       0.0|\n\t|         5.1|        2.5|               3.0|                1.1|    1|[3.16666603088378...|[0.98853939771652...|       0.0|\n\t|         5.1|        3.3|               1.7|                0.5|    0|[3.25857257843017...|[0.98969423770904...|       0.0|\n\t|         5.1|        3.5|               1.4|                0.2|    0|[3.25857257843017...|[0.98969423770904...|       0.0|\n\t|         5.1|        3.8|               1.6|                0.2|    0|[3.25857257843017...|[0.98969423770904...|       0.0|\n\t|         5.2|        3.4|               1.4|                0.2|    0|[3.25857257843017...|[0.98969423770904...|       0.0|\n\t|         5.2|        3.5|               1.5|                0.2|    0|[3.25857257843017...|[0.98969423770904...|       0.0|\n\t|         5.2|        4.1|               1.5|                0.1|    0|[3.25857257843017...|[0.98969423770904...|       0.0|\n\t|         5.4|        3.9|               1.7|                0.4|    0|[3.25857257843017...|[0.98969423770904...|       0.0|\n\t|         5.5|        2.4|               3.8|                1.1|    1|[-2.1498908996582...|[0.00596602633595...|       1.0|\n\t|         5.5|        4.2|               1.4|                0.2|    0|[3.25857257843017...|[0.98969423770904...|       0.0|\n\t|         5.7|        2.5|               5.0|                2.0|    2|[-2.1498908996582...|[0.00280966912396...|       2.0|\n\t|         5.7|        3.0|               4.2|                1.2|    1|[-2.1498908996582...|[0.00643939292058...|       1.0|\n\t+------------+-----------+------------------+-------------------+-----+--------------------+--------------------+----------+\n\n**********************\nSubmit the application\n**********************\n\nAssuming you have configured the Spark standalone cluster with GPU support. Otherwise,\nplease refer to `spark standalone configuration with GPU support\n<https://docs.nvidia.com/spark-rapids/user-guide/latest/getting-started/on-premise.html>`__.\n\nStarting from XGBoost 2.1.0, stage-level scheduling is automatically enabled. Therefore,\nif you are using Spark standalone cluster version 3.4.0 or higher, we strongly recommend\nconfiguring the ``\"spark.task.resource.gpu.amount\"`` as a fractional value. This will\nenable running multiple tasks in parallel during the ETL phase. An example configuration\nwould be ``\"spark.task.resource.gpu.amount=1/spark.executor.cores\"``. However, if you are\nusing a XGBoost version earlier than 2.1.0 or a Spark standalone cluster version below 3.4.0,\nyou still need to set ``\"spark.task.resource.gpu.amount\"`` equal to ``\"spark.executor.resource.gpu.amount\"``.\n\nAssuming that the application main class is \"Iris\" and the application jar is \"iris-1.0.0.jar\",\nprovided below is an instance demonstrating how to submit the xgboost application to an Apache\nSpark Standalone cluster.\n\n.. code-block:: bash\n\n  rapids_version=24.08.0\n  xgboost_version=$LATEST_VERSION\n  main_class=Iris\n  app_jar=iris-1.0.0.jar\n\n  spark-submit \\\n    --master $master \\\n    --packages com.nvidia:rapids-4-spark_2.12:${rapids_version},ml.dmlc:xgboost4j-spark-gpu_2.12:${xgboost_version} \\\n    --conf spark.executor.cores=12 \\\n    --conf spark.task.cpus=1 \\\n    --conf spark.executor.resource.gpu.amount=1 \\\n    --conf spark.task.resource.gpu.amount=0.08 \\\n    --conf spark.rapids.sql.csv.read.double.enabled=true \\\n    --conf spark.rapids.sql.hasNans=false \\\n    --conf spark.plugins=com.nvidia.spark.SQLPlugin \\\n    --class ${main_class} \\\n     ${app_jar}\n\n* First, we need to specify the ``RAPIDS Accelerator, xgboost4j-spark-gpu`` packages by ``--packages``\n* Second, ``RAPIDS Accelerator`` is a Spark plugin, so we need to configure it by specifying ``spark.plugins=com.nvidia.spark.SQLPlugin``\n\nFor details about other ``RAPIDS Accelerator`` other configurations, please refer to the `configuration <https://nvidia.github.io/spark-rapids/docs/configs.html>`_.\n\nFor ``RAPIDS Accelerator Frequently Asked Questions``, please refer to the\n`frequently-asked-questions <https://docs.nvidia.com/spark-rapids/user-guide/latest/faq.html>`_.\n\n***********\nRMM Support\n***********\n\n.. versionadded:: 3.0\n\nWhen compiled with the RMM plugin (see :doc:`/build`), the XGBoost spark package can reuse\nthe RMM memory pool automatically based on `spark.rapids.memory.gpu.pooling.enabled` and\n`spark.rapids.memory.gpu.pool`. Please note that both submit options need to be set\naccordingly. In addition, XGBoost employs NCCL for GPU communication, which requires some\nGPU memory for communication buffers and one should not let RMM take all the available\nmemory. Example configuration related to memory pool:\n\n.. code-block:: bash\n\n  spark-submit \\\n    --master $master \\\n    --conf spark.rapids.memory.gpu.allocFraction=0.5 \\\n    --conf spark.rapids.memory.gpu.maxAllocFraction=0.8 \\\n    --conf spark.rapids.memory.gpu.pool=ARENA \\\n    --conf spark.rapids.memory.gpu.pooling.enabled=true \\\n    ...\n"
  },
  {
    "path": "doc/jvm/xgboost4j_spark_tutorial.rst",
    "content": "########################\nXGBoost4J-Spark Tutorial\n########################\n\n**XGBoost4J-Spark** is a project aiming to seamlessly integrate XGBoost and Apache Spark by fitting XGBoost to\nApache Spark's MLLIB framework. With the integration, user can not only uses the high-performant algorithm\nimplementation of XGBoost, but also leverages the powerful data processing engine of Spark for:\n\n* Feature Engineering: feature extraction, transformation, dimensionality reduction, and selection, etc.\n* Pipelines: constructing, evaluating, and tuning ML Pipelines\n* Persistence: persist and load machine learning models and even whole Pipelines\n\nThis tutorial is to cover the end-to-end process to build a machine learning pipeline with XGBoost4J-Spark. We will discuss\n\n* Using Spark to preprocess data to fit to XGBoost4J-Spark's data interface\n* Training a XGBoost model with XGBoost4J-Spark\n* Serving XGBoost model (prediction) with Spark\n* Building a Machine Learning Pipeline with XGBoost4J-Spark\n* Running XGBoost4J-Spark in Production\n\n.. contents::\n  :backlinks: none\n  :local:\n\n********************************************\nBuild an ML Application with XGBoost4J-Spark\n********************************************\n\nRefer to XGBoost4J-Spark Dependency\n===================================\n\nBefore we go into the tour of how to use XGBoost4J-Spark, you should first consult :ref:`Installation from Maven repository <install_jvm_packages>`\nin order to add XGBoost4J-Spark as a dependency for your project. We provide both stable releases and snapshots.\n\n.. note:: XGBoost4J-Spark requires Apache Spark 3.0+\n\n  XGBoost4J-Spark now requires **Apache Spark 3.0+**. Latest versions of XGBoost4J-Spark uses facilities of `org.apache.spark.ml.param.shared`\n  extensively to provide for a tight integration with Spark MLLIB framework, and these facilities are not fully available on earlier versions of Spark.\n\n  Also, make sure to install Spark directly from `Apache website <https://spark.apache.org/>`_. **Upstream XGBoost is not guaranteed to\n  work with third-party distributions of Spark, such as Cloudera Spark.** Consult appropriate third parties to obtain their distribution of XGBoost.\n\nData Preparation\n================\n\nAs aforementioned, XGBoost4J-Spark seamlessly integrates Spark and XGBoost. The integration enables\nusers to apply various types of transformation over the training/test datasets with the convenient\nand powerful data processing framework: Spark.\n\nIn this section, we use `Iris <https://archive.ics.uci.edu/ml/datasets/iris>`_ dataset as an example to\nshowcase how we use Spark to transform raw dataset and make it fit to the data interface of XGBoost.\n\nIris dataset is shipped in CSV format. Each instance contains 4 features, \"sepal length\", \"sepal width\",\n\"petal length\" and \"petal width\". In addition, it contains the \"class\" column, which is essentially the\nlabel with three possible values: \"Iris Setosa\", \"Iris Versicolour\" and \"Iris Virginica\".\n\nRead Dataset with Spark's Built-In Reader\n-----------------------------------------\n\nThe first thing in data transformation is to load the dataset as Spark's structured data abstraction, DataFrame.\n\n.. code-block:: scala\n\n  import org.apache.spark.sql.SparkSession\n  import org.apache.spark.sql.types.{DoubleType, StringType, StructField, StructType}\n\n  val spark = SparkSession.builder().getOrCreate()\n  val schema = new StructType(Array(\n    StructField(\"sepal length\", DoubleType, true),\n    StructField(\"sepal width\", DoubleType, true),\n    StructField(\"petal length\", DoubleType, true),\n    StructField(\"petal width\", DoubleType, true),\n    StructField(\"class\", StringType, true)))\n  val rawInput = spark.read.schema(schema).csv(\"input_path\")\n\nAt the first line, we create a instance of `SparkSession <https://spark.apache.org/docs/latest/sql-getting-started.html#starting-point-sparksession>`_\nwhich is the entry of any Spark program working with DataFrame. The ``schema`` variable defines the schema of DataFrame wrapping Iris data.\nWith this explicitly set schema, we can define the columns' name as well as their types; otherwise the column name would be the default ones\nderived by Spark, such as ``_col0``, etc. Finally, we can use Spark's built-in csv reader to load Iris csv file as a DataFrame named ``rawInput``.\n\nSpark also contains many built-in readers for other format. The latest version of Spark supports CSV, JSON, Parquet, and LIBSVM.\n\nTransform Raw Iris Dataset\n--------------------------\n\nTo make Iris dataset be recognizable to XGBoost, we need to\n\n1. Transform String-typed label, i.e. \"class\", to Double-typed label.\n2. Assemble the feature columns as a vector to fit to the data interface of Spark ML framework.\n\nTo convert String-typed label to Double, we can use Spark's built-in feature transformer\n`StringIndexer <https://spark.apache.org/docs/latest/api/scala/org/apache/spark/ml/feature/StringIndexer.html>`_.\n\n.. code-block:: scala\n\n  import org.apache.spark.ml.feature.StringIndexer\n  val stringIndexer = new StringIndexer().\n    setInputCol(\"class\").\n    setOutputCol(\"classIndex\").\n    fit(rawInput)\n  val labelTransformed = stringIndexer.transform(rawInput).drop(\"class\")\n\nWith a newly created StringIndexer instance:\n\n1. we set input column, i.e. the column containing String-typed label.\n2. we set output column, i.e. the column containing the Double-typed label.\n3. Then we ``fit`` StringIndex with our input DataFrame ``rawInput``, so that Spark internals can get information like total number of distinct values, etc.\n\nNow we have a StringIndexer which is ready to be applied to our input DataFrame. To execute the transformation logic of StringIndexer,\nwe ``transform`` the input DataFrame ``rawInput`` and to keep a concise DataFrame,\nwe drop the column \"class\" and only keeps the feature columns and the transformed Double-typed label column (in the last line of the above code snippet).\n\nThe ``fit`` and ``transform`` are two key operations in MLLIB. Basically, ``fit`` produces a \"transformer\", e.g. StringIndexer,\nand each transformer applies ``transform`` method on DataFrame to add new column(s) containing transformed features/labels or\nprediction results, etc. To understand more about ``fit`` and ``transform``, You can find more details in\n`here <http://spark.apache.org/docs/latest/ml-pipeline.html#pipeline-components>`_.\n\nSimilarly, we can use another transformer, `VectorAssembler <https://spark.apache.org/docs/latest/api/scala/org/apache/spark/ml/feature/VectorAssembler.html>`_,\nto assemble feature columns \"sepal length\", \"sepal width\", \"petal length\" and \"petal width\" as a vector.\n\n.. code-block:: scala\n\n  import org.apache.spark.ml.feature.VectorAssembler\n  val vectorAssembler = new VectorAssembler().\n    setInputCols(Array(\"sepal length\", \"sepal width\", \"petal length\", \"petal width\")).\n    setOutputCol(\"features\")\n  val xgbInput = vectorAssembler.transform(labelTransformed).select(\"features\", \"classIndex\")\n\nNow, we have a DataFrame containing only two columns, \"features\" which contains vector-represented\n\"sepal length\", \"sepal width\", \"petal length\" and \"petal width\" and \"classIndex\" which has Double-typed\nlabels. A DataFrame like this (containing vector-represented features and numeric labels) can be fed to XGBoost4J-Spark's training engine directly.\n\nDealing with missing values\n~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nXGBoost supports missing values by default (`as desribed here <https://xgboost.readthedocs.io/en/latest/faq.html#how-to-deal-with-missing-values>`_).\nIf given a SparseVector, XGBoost will treat any values absent from the SparseVector as missing. You are also able to\nspecify to XGBoost to treat a specific value in your Dataset as if it was a missing value. By default XGBoost will treat NaN as the value representing missing.\n\nExample of setting a missing value (e.g. -999) to the \"missing\" parameter in XGBoostClassifier:\n\n.. code-block:: scala\n\n  import ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier\n  val xgbParam = Map(\"eta\" -> 0.1f,\n        \"missing\" -> -999,\n        \"objective\" -> \"multi:softprob\",\n        \"num_class\" -> 3,\n        \"num_round\" -> 100,\n        \"num_workers\" -> 2)\n  val xgbClassifier = new XGBoostClassifier(xgbParam).\n        setFeaturesCol(\"features\").\n        setLabelCol(\"classIndex\")\n\n.. note:: Missing values\n\n  If the feature is vector type, the single feature instance could be a SparseVector, where \"0\" will be treated as the missing value.\n  In order to get the correct model, XGBoost4j-Spark will convert the SparseVector to array by restoring the \"0\". However, we can't\n  assume 0 for missing values as it may be meaningful. So in this case, users need to specify the missing value explicitly\n  even the missing value has been set to `Float.NaN` by default in the XGBoost4j-Spark.\n\nTraining\n========\n\nXGBoost supports regression, classification and ranking. While we use Iris dataset in this tutorial to show how we\nuse XGBoost4J-Spark to resolve a multi-classes classification problem, the usage in Regression and Ranking is very similar to classification.\n\nTo train a XGBoost model for classification, we need to create a XGBoostClassifier first:\n\n.. code-block:: scala\n\n  import ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier\n  val xgbParam = Map(\"eta\" -> 0.1f,\n        \"max_depth\" -> 2,\n        \"objective\" -> \"multi:softprob\",\n        \"num_class\" -> 3)\n  val xgbClassifier = new XGBoostClassifier(xgbParam).\n        setNumRound(100).\n        setNumWorkers(2).\n        setFeaturesCol(\"features\").\n        setLabelCol(\"classIndex\")\n\nThe available parameters for training a XGBoost model can be found in :doc:`here </parameter>`. In XGBoost4J-Spark, we support\nnot only the default set of parameters but also the camel-case variant of these parameters to keep consistent with Spark's MLLIB parameters.\n\nSpecifically, each parameter in :doc:`this page </parameter>` has its\nequivalent form in XGBoost4J-Spark with camel case. For example, to set ``max_depth`` for each tree, you can pass parameter just\nlike what we did in the above code snippet (as ``max_depth`` wrapped in a Map), or you can do it through setters in XGBoostClassifer:\n\n.. code-block:: scala\n\n  val xgbClassifier = new XGBoostClassifier().\n    setFeaturesCol(\"features\").\n    setLabelCol(\"classIndex\")\n  xgbClassifier.setMaxDepth(2)\n\nAfter we set XGBoostClassifier parameters and feature/label column, we can build a transformer, XGBoostClassificationModel by\nfitting XGBoostClassifier with the input DataFrame. This ``fit`` operation is essentially the training process and the generated\nmodel can then be used in prediction.\n\n.. code-block:: scala\n\n  val xgbClassificationModel = xgbClassifier.fit(xgbInput)\n\nEarly Stopping\n----------------\n\nEarly stopping is a feature to prevent the unnecessary training iterations. By specifying ``num_early_stopping_rounds`` or\ndirectly call ``setNumEarlyStoppingRounds`` over a XGBoostClassifier or XGBoostRegressor, we can define number of rounds if\nthe evaluation metric going away from the best iteration and early stop training iterations.\n\nWhen it comes to custom eval metrics, in additional to ``num_early_stopping_rounds``, you also need to define ``maximize_evaluation_metrics``\nor call ``setMaximizeEvaluationMetrics`` to specify whether you want to maximize or minimize the metrics in training. For built-in eval metrics,\nXGBoost4J-Spark will automatically select the direction.\n\nFor example, we need to maximize the evaluation metrics (set ``maximize_evaluation_metrics`` with true), and set ``num_early_stopping_rounds``\nwith 5. The evaluation metric of 10th iteration is the maximum one until now. In the following iterations, if there is no evaluation metric\ngreater than the 10th iteration's (best one), the training would be early stopped at 15th iteration.\n\nTraining with Evaluation Dataset\n--------------------------------\n\nYou can also monitor the performance of the model during training with evaluation dataset. By calling ``setEvalDataset`` over a\nXGBoostClassifier, XGBoostRegressor or XGBoostRanker.\n\nPrediction\n==========\n\nXGBoost4j-Spark supports two ways for model serving: batch prediction and single instance prediction.\n\nBatch Prediction\n----------------\n\nWhen we get a model, either XGBoostClassificationModel, XGBoostRegressionModel or XGBoostRankerModel, it takes a DataFrame, read the column containing\nfeature vectors, predict for each feature vector, and output a new DataFrame with the following columns by default:\n\n* XGBoostClassificationModel will output margins (``rawPredictionCol``), probabilities(``probabilityCol``) and the eventual prediction labels (``predictionCol``) for each possible label.\n* XGBoostRegressionModel will output prediction label(``predictionCol``).\n* XGBoostRankerModel will output prediction label(``predictionCol``).\n\nBatch prediction expects the user to pass the testset in the form of a DataFrame. XGBoost4J-Spark starts a XGBoost worker\nfor each partition of DataFrame for parallel prediction and generates prediction results for the whole DataFrame in a batch.\n\n.. code-block:: scala\n\n  val xgbClassificationModel = xgbClassifier.fit(xgbInput)\n  val results = xgbClassificationModel.transform(testSet)\n\nWith the above code snippet, we get a result DataFrame, result containing margin, probability for each class and the prediction for each instance\n\n.. code-block:: none\n\n  +-----------------+----------+--------------------+--------------------+----------+\n  |         features|classIndex|       rawPrediction|         probability|prediction|\n  +-----------------+----------+--------------------+--------------------+----------+\n  |[5.1,3.5,1.4,0.2]|       0.0|[3.45569849014282...|[0.99579632282257...|       0.0|\n  |[4.9,3.0,1.4,0.2]|       0.0|[3.45569849014282...|[0.99618089199066...|       0.0|\n  |[4.7,3.2,1.3,0.2]|       0.0|[3.45569849014282...|[0.99643349647521...|       0.0|\n  |[4.6,3.1,1.5,0.2]|       0.0|[3.45569849014282...|[0.99636095762252...|       0.0|\n  |[5.0,3.6,1.4,0.2]|       0.0|[3.45569849014282...|[0.99579632282257...|       0.0|\n  |[5.4,3.9,1.7,0.4]|       0.0|[3.45569849014282...|[0.99428516626358...|       0.0|\n  |[4.6,3.4,1.4,0.3]|       0.0|[3.45569849014282...|[0.99643349647521...|       0.0|\n  |[5.0,3.4,1.5,0.2]|       0.0|[3.45569849014282...|[0.99579632282257...|       0.0|\n  |[4.4,2.9,1.4,0.2]|       0.0|[3.45569849014282...|[0.99618089199066...|       0.0|\n  |[4.9,3.1,1.5,0.1]|       0.0|[3.45569849014282...|[0.99636095762252...|       0.0|\n  |[5.4,3.7,1.5,0.2]|       0.0|[3.45569849014282...|[0.99428516626358...|       0.0|\n  |[4.8,3.4,1.6,0.2]|       0.0|[3.45569849014282...|[0.99643349647521...|       0.0|\n  |[4.8,3.0,1.4,0.1]|       0.0|[3.45569849014282...|[0.99618089199066...|       0.0|\n  |[4.3,3.0,1.1,0.1]|       0.0|[3.45569849014282...|[0.99618089199066...|       0.0|\n  |[5.8,4.0,1.2,0.2]|       0.0|[3.45569849014282...|[0.97809928655624...|       0.0|\n  |[5.7,4.4,1.5,0.4]|       0.0|[3.45569849014282...|[0.97809928655624...|       0.0|\n  |[5.4,3.9,1.3,0.4]|       0.0|[3.45569849014282...|[0.99428516626358...|       0.0|\n  |[5.1,3.5,1.4,0.3]|       0.0|[3.45569849014282...|[0.99579632282257...|       0.0|\n  |[5.7,3.8,1.7,0.3]|       0.0|[3.45569849014282...|[0.97809928655624...|       0.0|\n  |[5.1,3.8,1.5,0.3]|       0.0|[3.45569849014282...|[0.99579632282257...|       0.0|\n  +-----------------+----------+--------------------+--------------------+----------+\n\nSingle instance prediction\n--------------------------\n\nXGBoostClassificationModel, XGBoostRegressionModel or XGBoostRankerModel supports making prediction on single instance as well.\nIt accepts a single Vector as feature, and output the prediction label.\n\nHowever, the overhead of single-instance prediction is high due to the internal overhead of XGBoost, use it carefully!\n\n.. code-block:: scala\n\n  val features = xgbInput.head().getAs[Vector](\"features\")\n  val result = xgbClassificationModel.predict(features)\n\nModel Persistence\n=================\n\nModel and pipeline persistence\n------------------------------\n\nA data scientist produces an ML model and hands it over to an engineering team for deployment in a production environment.\nReversely, a trained model may be used by data scientists, for example as a baseline, across the process of data exploration.\nSo it's important to support model persistence to make the models available across usage scenarios and programming languages.\n\nXGBoost4j-Spark supports saving and loading XGBoostClassifier/XGBoostClassificationModel and XGBoostRegressor/XGBoostRegressionModel\nand XGBoostRanker/XGBoostRankerModel to/from file system. It also supports saving and loading a ML pipeline which includes these\nestimators and models.\n\nWe can save the XGBoostClassificationModel to file system:\n\n.. code-block:: scala\n\n  val xgbClassificationModelPath = \"/tmp/xgbClassificationModel\"\n  xgbClassificationModel.write.overwrite().save(xgbClassificationModelPath)\n\nand then loading the model in another session:\n\n.. code-block:: scala\n\n  import ml.dmlc.xgboost4j.scala.spark.XGBoostClassificationModel\n\n  val xgbClassificationModel2 = XGBoostClassificationModel.load(xgbClassificationModelPath)\n  xgbClassificationModel2.transform(xgbInput)\n\n.. note::\n\n  Besides dumping the model to raw format, users are able to dump the model to be json or ubj format.\n\n  .. code-block:: scala\n\n    val xgbClassificationModelPath = \"/tmp/xgbClassificationModel\"\n    xgbClassificationModel.write.overwrite().option(\"format\", \"json\").save(xgbClassificationModelPath)\n\n\nWith regards to ML pipeline save and load, please refer the next section.\n\nInteract with Other Bindings of XGBoost\n---------------------------------------\nAfter we train a model with XGBoost4j-Spark on massive dataset, sometimes we want to do model serving\nin single machine or integrate it with other single node libraries for further processing.\n\nAfter saving the model, we can load this model with single node Python XGBoost directly.\n\n.. code-block:: scala\n\n  val xgbClassificationModelPath = \"/tmp/xgbClassificationModel\"\n  xgbClassificationModel.write.overwrite().save(xgbClassificationModelPath)\n\n.. code-block:: python\n\n  import xgboost as xgb\n  bst = xgb.Booster({'nthread': 4})\n  bst.load_model(\"/tmp/xgbClassificationModel/data/model\")\n\n.. note:: Consistency issue between XGBoost4J-Spark and other bindings\n\n  There is a consistency issue between XGBoost4J-Spark and other language bindings of XGBoost.\n\n  When users use Spark to load training/test data in LIBSVM format with the following code snippet:\n\n  .. code-block:: scala\n\n    spark.read.format(\"libsvm\").load(\"trainingset_libsvm\")\n\n  Spark assumes that the dataset is using 1-based indexing (feature indices staring with 1). However,\n  when you do prediction with other bindings of XGBoost (e.g. Python API of XGBoost), XGBoost assumes\n  that the dataset is using 0-based indexing (feature indices starting with 0) by default. It creates a\n  pitfall for the users who train model with Spark but predict with the dataset in the same format in\n  other bindings of XGBoost. The solution is to transform the dataset to 0-based indexing before you\n  predict with, for example, Python API, or you append ``?indexing_mode=1`` to your file path when\n  loading with DMatirx. For example in Python:\n\n  .. code-block:: python\n\n    xgb.DMatrix('test.libsvm?indexing_mode=1')\n\n*******************************************\nBuilding a ML Pipeline with XGBoost4J-Spark\n*******************************************\n\nBasic ML Pipeline\n=================\n\nSpark ML pipeline can combine multiple algorithms or functions into a single pipeline.\nIt covers from feature extraction, transformation, selection to model training and prediction.\nXGBoost4j-Spark makes it feasible to embed XGBoost into such a pipeline seamlessly.\nThe following example shows how to build such a pipeline consisting of Spark MLlib feature transformer\nand XGBoostClassifier estimator.\n\nWe still use `Iris <https://archive.ics.uci.edu/ml/datasets/iris>`_ dataset and the ``rawInput`` DataFrame.\nFirst we need to split the dataset into training and test dataset.\n\n.. code-block:: scala\n\n  val Array(training, test) = rawInput.randomSplit(Array(0.8, 0.2), 123)\n\nThe we build the ML pipeline which includes 4 stages:\n\n* Assemble all features into a single vector column.\n* From string label to indexed double label.\n* Use XGBoostClassifier to train classification model.\n* Convert indexed double label back to original string label.\n\nWe have shown the first three steps in the earlier sections, and the last step is finished with a new\ntransformer `IndexToString <https://spark.apache.org/docs/latest/api/scala/org/apache/spark/ml/feature/IndexToString.html>`_:\n\n.. code-block:: scala\n\n\tval labelConverter = new IndexToString()\n        .setInputCol(\"prediction\")\n        .setOutputCol(\"realLabel\")\n        .setLabels(stringIndexer.labels)\n\nWe need to organize these steps as a Pipeline in Spark ML framework and evaluate the whole pipeline to get a PipelineModel:\n\n.. code-block:: scala\n\n  import org.apache.spark.ml.feature._\n  import org.apache.spark.ml.Pipeline\n\n  val pipeline = new Pipeline()\n      .setStages(Array(assembler, stringIndexer, booster, labelConverter))\n  val model = pipeline.fit(training)\n\nAfter we get the PipelineModel, we can make prediction on the test dataset and evaluate the model accuracy.\n\n.. code-block:: scala\n\n  import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator\n\n  val prediction = model.transform(test)\n  val evaluator = new MulticlassClassificationEvaluator()\n  val accuracy = evaluator.evaluate(prediction)\n\nPipeline with Hyper-parameter Tunning\n=====================================\nThe most critical operation to maximize the power of XGBoost is to select the optimal parameters for the model.\nTuning parameters manually is a tedious and labor-consuming process. With the latest version of XGBoost4J-Spark,\nwe can utilize the Spark model selecting tool to automate this process.\n\nThe following example shows the code snippet utilizing CrossValidation and MulticlassClassificationEvaluator\nto search the optimal combination of two XGBoost parameters, ``max_depth`` and ``eta``. (See :doc:`/parameter`.)\nThe model producing the maximum accuracy defined by MulticlassClassificationEvaluator is selected and used to\ngenerate the prediction for the test set.\n\n.. code-block:: scala\n\n  import org.apache.spark.ml.tuning._\n  import org.apache.spark.ml.PipelineModel\n  import ml.dmlc.xgboost4j.scala.spark.XGBoostClassificationModel\n\n  val paramGrid = new ParamGridBuilder()\n      .addGrid(booster.maxDepth, Array(3, 8))\n      .addGrid(booster.eta, Array(0.2, 0.6))\n      .build()\n  val cv = new CrossValidator()\n      .setEstimator(pipeline)\n      .setEvaluator(evaluator)\n      .setEstimatorParamMaps(paramGrid)\n      .setNumFolds(3)\n\n  val cvModel = cv.fit(training)\n\n  val bestModel = cvModel.bestModel.asInstanceOf[PipelineModel].stages(2)\n      .asInstanceOf[XGBoostClassificationModel]\n  bestModel.extractParamMap()\n\n*********************************\nRun XGBoost4J-Spark in Production\n*********************************\n\nXGBoost4J-Spark is one of the most important steps to bring XGBoost to production environment easier. In this section,\nwe introduce three key features to run XGBoost4J-Spark in production.\n\nParallel/Distributed Training\n=============================\nThe massive size of training dataset is one of the most significant characteristics in production environment. To ensure\nthat training in XGBoost scales with the data size, XGBoost4J-Spark bridges the distributed/parallel processing framework\nof Spark and the parallel/distributed training mechanism of XGBoost.\n\nIn XGBoost4J-Spark, each XGBoost worker is wrapped by a Spark task and the training dataset in Spark's memory space is\nfed to XGBoost workers in a transparent approach to the user.\n\nIn the code snippet where we build XGBoostClassifier, we set parameter ``num_workers`` (or ``numWorkers``).\nThis parameter controls how many parallel workers we want to have when training a XGBoostClassificationModel.\n\n.. note:: Regarding OpenMP optimization\n\n  By default, we allocate a core per each XGBoost worker. Therefore, the OpenMP optimization within each XGBoost worker does\n  not take effect and the parallelization of training is achieved by running multiple workers (i.e. Spark tasks) at the same time.\n\n  If you do want OpenMP optimization, you have to\n\n  1. set ``nthread`` to a value larger than 1 when creating XGBoostClassifier/XGBoostRegressor\n  2. set ``spark.task.cpus`` in Spark to the same value as ``nthread``\n\nGang Scheduling\n===============\nXGBoost uses `AllReduce <http://mpitutorial.com/tutorials/mpi-reduce-and-allreduce/>`_.\nalgorithm to synchronize the stats, e.g. histogram values, of each worker during training. Therefore XGBoost4J-Spark requires\nthat all of ``nthread * numWorkers`` cores should be available before the training runs.\n\nIn the production environment where many users share the same cluster, it's hard to guarantee that your XGBoost4J-Spark application\ncan get all requested resources for every run. By default, the communication layer in XGBoost will block the whole application when\nit requires more resources to be available. This process usually brings unnecessary resource waste as it keeps the ready resources\nand try to claim more. Additionally, this usually happens silently and does not bring the attention of users.\n\nXGBoost4J-Spark allows the user to setup a timeout threshold for claiming resources from the cluster. If the application cannot get\nenough resources within this time period, the application would fail instead of wasting resources for hanging long. To enable this\nfeature, you can set with XGBoostClassifier/XGBoostRegressor/XGBoostRanker:\n\n.. code-block:: scala\n\n  xgbClassifier.setRabitTrackerTimeout(60000L)\n\nor pass in ``rabit_tracker_timeout`` in ``xgbParamMap`` when building XGBoostClassifier:\n\n.. code-block:: scala\n\n  val xgbParam = Map(\"eta\" -> 0.1f,\n     \"max_depth\" -> 2,\n     \"objective\" -> \"multi:softprob\",\n     \"num_class\" -> 3,\n     \"num_round\" -> 100,\n     \"num_workers\" -> 2,\n     \"rabit_tracker_timeout\" -> 60000L)\n  val xgbClassifier = new XGBoostClassifier(xgbParam).\n      setFeaturesCol(\"features\").\n      setLabelCol(\"classIndex\")\n\nIf XGBoost4J-Spark cannot get enough resources for running two XGBoost workers, the application would fail.\nUsers can have external mechanism to monitor the status of application and get notified for such case.\n\nCheckpoint During Training\n==========================\n\nTransient failures are also commonly seen in production environment. To simplify the design of XGBoost,\nwe stop training if any of the distributed workers fail. However, if the training fails after having been\nthrough a long time, it would be a great waste of resources.\n\nWe support creating checkpoint during training to facilitate more efficient recovery from failure. To enable this feature,\nyou can set how many iterations we build each checkpoint with ``setCheckpointInterval`` and the location of checkpoints\nwith ``setCheckpointPath``:\n\n.. code-block:: scala\n\n  xgbClassifier.setCheckpointInterval(2)\n  xgbClassifier.setCheckpointPath(\"/checkpoint_path\")\n\nAn equivalent way is to pass in parameters in XGBoostClassifier's constructor:\n\n.. code-block:: scala\n\n  val xgbParam = Map(\"eta\" -> 0.1f,\n     \"max_depth\" -> 2,\n     \"objective\" -> \"multi:softprob\",\n     \"num_class\" -> 3,\n     \"num_round\" -> 100,\n     \"num_workers\" -> 2,\n     \"checkpoint_path\" -> \"/checkpoints_path\",\n     \"checkpoint_interval\" -> 2)\n  val xgbClassifier = new XGBoostClassifier(xgbParam).\n      setFeaturesCol(\"features\").\n      setLabelCol(\"classIndex\")\n\nIf the training failed during these 100 rounds, the next run of training would start by reading the latest checkpoint\nfile in ``/checkpoints_path`` and start from the iteration when the checkpoint was built until to next failure or the specified 100 rounds.\n\n\n***************\nExternal Memory\n***************\n\n.. versionadded:: 3.0\n\n.. warning::\n\n   The feature is experimental.\n\nHere we refer to the iterator-based external memory instead of the one that uses special\nURL parameters. XGBoost-Spark has experimental support for GPU-based external memory\ntraining (:doc:`/jvm/xgboost4j_spark_gpu_tutorial`) since 3.0. When it's used in\ncombination with GPU-based training, data is first cached on disk and then staged on CPU\nmemory.  See :doc:`/tutorials/external_memory` for general concept and best practices for\nthe external memory training. In addition, see the doc string of the estimator parameter\n`useExternalMemory`. With Spark estimators:\n\n.. code-block:: scala\n\n  val xgbClassifier = new XGBoostClassifier(xgbParam)\n      .setFeaturesCol(featuresNames)\n      .setLabelCol(labelName)\n      .setUseExternalMemory(true)\n      .setDevice(\"cuda\")  // CPU is not yet supported\n"
  },
  {
    "path": "doc/jvm/xgboost_spark_migration.rst",
    "content": "##########################################################\nMigration Guide: How to migrate to XGBoost4j-Spark jvm 3.x\n##########################################################\n\nXGBoost4j-Spark jvm packages underwent significant modifications in version 3.0,\nwhich may cause compatibility issues with existing user code.\n\nThis guide will walk you through the process of updating your code to ensure\nit's compatible with XGBoost4j-Spark 3.0 and later versions.\n\n************************\nXGBoost4j Spark Packages\n************************\n\nXGBoost4j-Spark 3.0 has assembled xgboost4j package into xgboost4j-spark_2.12-3.0.0.jar, which means\nyou can now simply use `xgboost4j-spark` for your application.\n\n* For CPU\n\n  .. code-block:: xml\n\n    <dependency>\n        <groupId>ml.dmlc</groupId>\n        <artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>\n        <version>3.0.0</version>\n    </dependency>\n\n* For GPU\n\n  .. code-block:: xml\n\n    <dependency>\n        <groupId>ml.dmlc</groupId>\n        <artifactId>xgboost4j-spark-gpu_${scala.binary.version}</artifactId>\n        <version>3.0.0</version>\n    </dependency>\n\n\nWhen submitting the XGBoost application to the Spark cluster, you only need to specify the single `xgboost4j-spark` package.\n\n* For CPU\n\n  .. code-block:: bash\n\n    spark-submit \\\n      --jars xgboost4j-spark_2.12-3.0.0.jar \\\n      ... \\\n\n\n* For GPU\n\n  .. code-block:: bash\n\n    spark-submit \\\n      --jars xgboost4j-spark-gpu_2.12-3.0.0.jar \\\n      ... \\\n\n***************\nXGBoost Ranking\n***************\n\nLearning to rank using XGBoostRegressor has been replaced by a dedicated `XGBoostRanker`, which is specifically designed\nto support ranking algorithms.\n\n.. code-block:: scala\n\n  // before xgboost4j-spark 3.0\n  val regressor = new XGBoostRegressor().setObjective(\"rank:ndcg\")\n\n  // after xgboost4j-spark 3.0\n  val ranker = new XGBoostRanker()\n\n******************\nRemoved Parameters\n******************\n\nStarting from xgboost4j-spark 3.0, below parameters are removed.\n\n- cacheTrainingSet\n\n  If you wish to cache the training dataset, you have the option to implement caching\n  in your code prior to fitting the data to an estimator.\n\n  .. code-block:: scala\n\n    val df = input.cache()\n    val model = new XGBoostClassifier().fit(df)\n\n- trainTestRatio\n\n  The following method can be employed to do the evaluation.\n\n  .. code-block:: scala\n\n    val Array(train, eval) = trainDf.randomSplit(Array(0.7, 0.3))\n    val classifier = new XGBoostClassifer().setEvalDataset(eval)\n    val model = classifier.fit(train)\n\n- tracker_conf\n\n  The following method can be used to configure RabitTracker.\n\n  .. code-block:: scala\n\n    val classifier = new XGBoostClassifer()\n      .setRabitTrackerTimeout(100)\n      .setRabitTrackerHostIp(\"192.168.0.2\")\n      .setRabitTrackerPort(19203)\n\n- rabitRingReduceThreshold\n- rabitTimeout\n- rabitConnectRetry\n- singlePrecisionHistogram\n- lambdaBias\n- objectiveType\n"
  },
  {
    "path": "doc/parameter.rst",
    "content": "..\n  IMPORTANT: When adding new entries to this file (e.g. a new parameter),\n  the parameter should also be added under file 'R-package/R/xgb.train.R'.\n\n##################\nXGBoost Parameters\n##################\nBefore running XGBoost, we must set three types of parameters: general parameters, booster parameters and task parameters.\n\n- **General parameters** relate to which booster we are using to do boosting, commonly tree or linear model\n- **Booster parameters** depend on which booster you have chosen\n- **Learning task parameters** decide on the learning scenario. For example, regression tasks may use different parameters with ranking tasks.\n\n.. note:: Parameters in R package\n\n  In R-package, you can use ``.`` (dot) to replace underscore in the parameters, for example, you can use ``max.depth`` to indicate ``max_depth``. The underscore parameters are also valid in R.\n\n.. contents::\n  :backlinks: none\n  :local:\n\n\n.. _global_config:\n\n********************\nGlobal Configuration\n********************\nThe following parameters can be set in the global scope, using :py:func:`xgboost.config_context()` (Python) or ``xgb.set.config()`` (R).\n\n* ``verbosity``: Verbosity of printing messages. Valid values of 0 (silent), 1 (warning), 2 (info), and 3 (debug).\n\n* ``use_rmm``: Whether to use RAPIDS Memory Manager (RMM) to allocate cache GPU\n  memory. The primary memory is always allocated on the RMM pool when XGBoost is built\n  (compiled) with the RMM plugin enabled. Valid values are ``true`` and ``false``. See\n  :doc:`/python/rmm-examples/index` for details.\n\n* ``use_cuda_async_pool`` [default=false]\n\n  Whether to use the device memory pool in the CUDA driver. This option is not available\n  if XGBoost is built with RMM support, as it is the same as using the RMM\n  `CudaAsyncMemoryResource` pool.\n\n  .. versionadded:: 3.2.0\n\n  .. warning:: This is an experimental feature and is subject to change without notice. Windows is not supported yet.\n\n* ``nthread``: Set the global number of threads for OpenMP. Use this only when you need to\n  override some OpenMP-related environment variables like ``OMP_NUM_THREADS``. Otherwise,\n  the ``nthread`` parameter from the Booster and the DMatrix should be preferred as the\n  former sets the global variable and might cause conflicts with other libraries.\n\n******************\nGeneral Parameters\n******************\n* ``booster`` [default= ``gbtree``]\n\n  - Which booster to use. Can be ``gbtree``, ``gblinear`` or ``dart``; ``gbtree`` and ``dart`` use tree based models while ``gblinear`` uses linear functions.\n  - Dropout parameters like ``rate_drop`` can be used directly with tree models. ``booster=dart`` remains supported for compatibility.\n\n  .. deprecated:: 3.3.0\n\n    ``booster=gblinear`` is deprecated and support will be removed in a future release.\n\n* ``device`` [default= ``cpu``]\n\n  .. versionadded:: 2.0.0\n\n  - Device for XGBoost to run. User can set it to one of the following values:\n\n    + ``cpu``: Use CPU.\n    + ``cuda``: Use a GPU (CUDA device).\n    + ``cuda:<ordinal>``: ``<ordinal>`` is an integer that specifies the ordinal of the GPU (which GPU do you want to use if you have more than one devices).\n    + ``gpu``: Default GPU device selection from the list of available and supported devices. Only ``cuda`` devices are supported currently.\n    + ``gpu:<ordinal>``: Default GPU device selection from the list of available and supported devices. Only ``cuda`` devices are supported currently.\n\n    For more information about GPU acceleration, see :doc:`/gpu/index`. In distributed environments, ordinal selection is handled by distributed frameworks instead of XGBoost. As a result, using ``cuda:<ordinal>`` will result in an error. Use ``cuda`` instead.\n\n* ``verbosity`` [default=1]\n\n  - Verbosity of printing messages.  Valid values are 0 (silent), 1 (warning), 2 (info), 3\n    (debug).  Sometimes XGBoost tries to change configurations based on heuristics, which\n    is displayed as warning message.  If there's unexpected behaviour, please try to\n    increase value of verbosity.\n\n* ``validate_parameters`` [default to ``false``, except for Python, R and CLI interface]\n\n  - When set to True, XGBoost will perform validation of input parameters to check whether\n    a parameter is used or not. A warning is emitted when there's unknown parameter.\n\n* ``nthread`` [default to maximum number of threads available if not set]\n\n  - Number of parallel threads used to run XGBoost.  When choosing it, please keep thread\n    contention and hyperthreading in mind.\n\n* ``disable_default_eval_metric`` [default= ``false``]\n\n  - Flag to disable default metric. Set to 1 or ``true`` to disable.\n\nParameters for Tree Booster\n===========================\n* ``eta`` [default=0.3, alias: ``learning_rate``]\n\n  - Step size shrinkage used in update to prevent overfitting. After each boosting step, we can directly get the weights of new features, and ``eta`` shrinks the feature weights to make the boosting process more conservative.\n  - range: [0,1]\n\n* ``gamma`` [default=0, alias: ``min_split_loss``]\n\n  - Minimum loss reduction required to make a further partition on a leaf node of the tree. The larger ``gamma`` is, the more conservative the algorithm will be. Note that a tree where no splits were made might still contain a single terminal node with a non-zero score. This is the same :math:`\\gamma` described in the :doc:`/tutorials/model`.\n  - range: [0,∞]\n\n* ``max_depth`` [default=6, type=int32]\n\n  - Maximum depth of a tree. Increasing this value will make the model more complex and more likely to overfit. 0 indicates no limit on depth. Beware that XGBoost aggressively consumes memory when training a deep tree. ``exact`` tree method requires non-zero value.\n  - range: [0,∞]\n\n* ``min_child_weight`` [default=1]\n\n  - Minimum sum of instance weight (hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than ``min_child_weight``, then the building process will give up further partitioning. In linear regression task, this simply corresponds to minimum number of instances needed to be in each node. The larger ``min_child_weight`` is, the more conservative the algorithm will be.\n  - range: [0,∞]\n\n* ``max_delta_step`` [default=0]\n\n  - Maximum delta step we allow each leaf output to be. If the value is set to 0, it means there is no constraint. If it is set to a positive value, it can help making the update step more conservative. Usually this parameter is not needed, but it might help in logistic regression when class is extremely imbalanced. Set it to value of 1-10 might help control the update.\n  - range: [0,∞]\n\n* ``subsample`` [default=1]\n\n  - Subsample ratio of the training instances. Setting it to 0.5 means that XGBoost would randomly sample half of the training data prior to growing trees. and this will prevent overfitting. Subsampling will occur once in every boosting iteration.\n  - range: (0,1]\n\n* ``sampling_method`` [default= ``uniform``]\n\n.. versionchanged:: 3.2.0\n\n    XGBoost supports both CPU and GPU for gradient-based sampling.\n\n  - The method to use to sample the training instances.\n  - ``uniform``: each training instance has an equal probability of being selected. Typically set\n    ``subsample`` >= 0.5 for good results.\n  - ``gradient_based``: the selection probability for each training instance is proportional to the\n    *regularized absolute value* of gradients (more specifically, :math:`\\sqrt{g^2+\\lambda h^2}`).\n    ``subsample`` may be set to as low as 0.1 without loss of model accuracy. Note that this\n    sampling method is only supported when ``tree_method`` is set to ``hist``; other tree\n    methods only support ``uniform`` sampling.\n\n  .. note::\n\n     When working with reduced gradient for multi-target models, the accuracy of\n     gradient-based sampling might be sub-optimal. The sampling is performed using the\n     split gradient, which may not be optimal with the full gradient. Use uniform sampling\n     as an alternative.\n\n* ``colsample_bytree``, ``colsample_bylevel``, ``colsample_bynode`` [default=1]\n\n  - This is a family of parameters for subsampling of columns.\n  - All ``colsample_by*`` parameters have a range of (0, 1], the default value of 1, and specify the fraction of columns to be subsampled.\n  - ``colsample_bytree`` is the subsample ratio of columns when constructing each tree. Subsampling occurs once for every tree constructed.\n  - ``colsample_bylevel`` is the subsample ratio of columns for each level. Subsampling occurs once for every new depth level reached in a tree. Columns are subsampled from the set of columns chosen for the current tree.\n  - ``colsample_bynode`` is the subsample ratio of columns for each node (split). Subsampling occurs once every time a new split is evaluated. Columns are subsampled from the set of columns chosen for the current level. This is not supported by the exact tree method.\n  - ``colsample_by*`` parameters work cumulatively. For instance,\n    the combination ``{'colsample_bytree':0.5, 'colsample_bylevel':0.5,\n    'colsample_bynode':0.5}`` with 64 features will leave 8 features to choose from at\n    each split.\n\n    Using the Python or the R package, one can set the ``feature_weights`` for DMatrix to\n    define the probability of each feature being selected when using column sampling.\n    There's a similar parameter for ``fit`` method in sklearn interface.\n\n* ``lambda`` [default=1, alias: ``reg_lambda``]\n\n  - L2 regularization term on weights. Increasing this value will make model more conservative. This is the :math:`\\lambda` described in the :doc:`/tutorials/model`.\n  - range: [0, :math:`\\infty`]\n\n* ``alpha`` [default=0, alias: ``reg_alpha``]\n\n  - L1 regularization term on weights. Increasing this value will make model more conservative.\n  - range: [0, :math:`\\infty`]\n\n* ``tree_method`` string [default= ``auto``]\n\n  - The tree construction algorithm used in XGBoost. See description in the `reference paper <https://arxiv.org/abs/1603.02754>`_ and :doc:`treemethod`.\n\n  - Choices: ``auto``, ``exact``, ``approx``, ``hist``, this is a combination of commonly\n    used updaters.  For other updaters like ``refresh``, set the parameter ``updater``\n    directly.\n\n    - ``auto``: Same as the ``hist`` tree method.\n    - ``exact``: Exact greedy algorithm.  Enumerates all split candidates.\n    - ``approx``: Approximate greedy algorithm using quantile sketch and gradient histogram.\n    - ``hist``: Faster histogram optimized approximate greedy algorithm.\n\n* ``scale_pos_weight`` [default=1]\n\n  - Control the balance of positive and negative weights, useful for unbalanced classes. A typical value to consider: ``sum(negative instances) / sum(positive instances)``. See :doc:`Parameters Tuning </tutorials/param_tuning>` for more discussion. Also, see Higgs Kaggle competition demo for examples: `R <https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-train.R>`_, `py1 <https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-numpy.py>`_, `py2 <https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-cv.py>`_, `py3 <https://github.com/dmlc/xgboost/blob/master/demo/guide-python/cross_validation.py>`_.\n\n* ``updater``\n\n  - A comma separated string defining the sequence of tree updaters to run, providing a modular way to construct and to modify the trees. This is an advanced parameter that is usually set automatically, depending on some other parameters. However, it could be also set explicitly by a user. The following updaters exist:\n\n    - ``grow_colmaker``: non-distributed column-based construction of trees.\n    - ``grow_histmaker``: distributed tree construction with row-based data splitting based on global proposal of histogram counting.\n    - ``grow_quantile_histmaker``: Grow tree using quantized histogram.\n    - ``grow_gpu_hist``:  Enabled when ``tree_method`` is set to ``hist`` along with ``device=cuda``.\n    - ``grow_gpu_approx``: Enabled when ``tree_method`` is set to ``approx`` along with ``device=cuda``.\n    - ``sync``: synchronizes trees in all distributed nodes.\n    - ``refresh``: refreshes tree's statistics and/or leaf values based on the current data. Note that no random subsampling of data rows is performed.\n    - ``prune``: prunes the splits where loss < min_split_loss (or gamma) and nodes that have depth greater than ``max_depth``.\n\n* ``refresh_leaf`` [default=1]\n\n  - This is a parameter of the ``refresh`` updater. When this flag is 1, tree leafs as well as tree nodes' stats are updated. When it is 0, only node stats are updated.\n\n* ``process_type`` [default= ``default``]\n\n  - A type of boosting process to run.\n  - Choices: ``default``, ``update``\n\n    - ``default``: The normal boosting process which creates new trees.\n    - ``update``: Starts from an existing model and only updates its trees. In each boosting iteration, a tree from the initial model is taken, a specified sequence of updaters is run for that tree, and a modified tree is added to the new model. The new model would have either the same or smaller number of trees, depending on the number of boosting iterations performed. Currently, the following built-in updaters could be meaningfully used with this process type: ``refresh``, ``prune``. With ``process_type=update``, one cannot use updaters that create new trees.\n\n* ``grow_policy`` [default= ``depthwise``]\n\n  - Controls a way new nodes are added to the tree.\n  - Currently supported only if ``tree_method`` is set to ``hist`` or ``approx``.\n  - Choices: ``depthwise``, ``lossguide``\n\n    - ``depthwise``: split at nodes closest to the root.\n    - ``lossguide``: split at nodes with highest loss change.\n\n* ``max_leaves`` [default=0, type=int32]\n\n  - Maximum number of nodes to be added.  Not used by ``exact`` tree method.\n\n* ``max_bin``, [default=256, type=int32]\n\n  - Only used if ``tree_method`` is set to ``hist`` or ``approx``.\n  - Maximum number of discrete bins to bucket continuous features.\n  - Increasing this number improves the optimality of splits at the cost of higher computation time.\n\n* ``num_parallel_tree``, [default=1]\n\n  - Number of parallel trees constructed during each iteration. This option is used to support boosted random forest.\n\n* ``monotone_constraints``\n\n  - Constraint of variable monotonicity.  See :doc:`/tutorials/monotonic` for more information.\n\n* ``interaction_constraints``\n\n  - Constraints for interaction representing permitted interactions.  The constraints must\n    be specified in the form of a nest list, e.g. ``[[0, 1], [2, 3, 4]]``, where each inner\n    list is a group of indices of features that are allowed to interact with each other.\n    See :doc:`/tutorials/feature_interaction_constraint` for more information.\n\n* ``multi_strategy``, [default = ``one_output_per_tree``]\n\n  .. versionadded:: 2.0.0\n\n  .. note:: This parameter is working-in-progress.\n\n  - The strategy used for training multi-target models, including multi-target regression\n    and multi-class classification. See :doc:`/tutorials/multioutput` for more information.\n\n    - ``one_output_per_tree``: One model for each target.\n    - ``multi_output_tree``:  Use multi-target trees.\n\n\nParameters for Non-Exact Tree Methods\n=====================================\n\n* ``max_cached_hist_node``, [default = 65536]\n\n  Maximum number of cached nodes for histogram. This can be used with the ``hist`` and the\n  ``approx`` tree methods.\n\n  .. versionadded:: 2.0.0\n\n  - For most of the cases this parameter should not be set except for growing deep\n    trees. After 3.0, this parameter affects GPU algorithms as well.\n\n\n.. _cat-param:\n\nParameters for Categorical Feature\n==================================\n\nThese parameters are only used for training with categorical data. See\n:doc:`/tutorials/categorical` for more information.\n\n.. note:: The ``exact`` tree method is not supported for categorical features.\n\n\n* ``max_cat_to_onehot``\n\n  .. versionadded:: 1.6.0\n\n  - A threshold for deciding whether XGBoost should use one-hot encoding based split for\n    categorical data.  When number of categories is lesser than the threshold then one-hot\n    encoding is chosen, otherwise the categories will be partitioned into children nodes.\n\n* ``max_cat_threshold``\n\n  .. versionadded:: 1.7.0\n\n  - Maximum number of categories considered for each split. Used only by partition-based\n    splits for preventing over-fitting.\n\nAdditional dropout parameters for tree boosters\n================================================\n\n* ``sample_type`` [default= ``uniform``]\n\n  - Type of sampling algorithm.\n\n    - ``uniform``: dropped trees are selected uniformly.\n    - ``weighted``: dropped trees are selected in proportion to weight.\n\n* ``normalize_type`` [default= ``tree``]\n\n  - Type of normalization algorithm.\n\n    - ``tree``: new trees have the same weight of each of dropped trees.\n\n      - Weight of new trees are ``1 / (k + learning_rate)``.\n      - Dropped trees are scaled by a factor of ``k / (k + learning_rate)``.\n\n    - ``forest``: new trees have the same weight of sum of dropped trees (forest).\n\n      - Weight of new trees are ``1 / (1 + learning_rate)``.\n      - Dropped trees are scaled by a factor of ``1 / (1 + learning_rate)``.\n\n* ``rate_drop`` [default=0.0]\n\n  - Dropout rate (a fraction of previous trees to drop during the dropout).\n  - range: [0.0, 1.0]\n\n* ``one_drop`` [default=0]\n\n  - When this flag is enabled, at least one tree is always dropped during the dropout (allows Binomial-plus-one or epsilon-dropout from the original DART paper).\n\n* ``skip_drop`` [default=0.0]\n\n  - Probability of skipping the dropout procedure during a boosting iteration.\n\n    - If a dropout is skipped, new trees are added in the same manner as ``gbtree``.\n    - Note that non-zero ``skip_drop`` has higher priority than ``rate_drop`` or ``one_drop``.\n\n  - range: [0.0, 1.0]\n\nParameters for Linear Booster (``booster=gblinear``)\n====================================================\n.. deprecated:: 3.3.0\n\n  ``booster=gblinear`` is deprecated and support will be removed in a future release.\n\n* ``lambda`` [default=0, alias: ``reg_lambda``]\n\n  - L2 regularization term on weights. Increasing this value will make model more conservative. Normalised to number of training examples.\n\n* ``alpha`` [default=0, alias: ``reg_alpha``]\n\n  - L1 regularization term on weights. Increasing this value will make model more conservative. Normalised to number of training examples.\n\n* ``eta`` [default=0.5, alias: ``learning_rate``]\n\n  - Step size shrinkage used in update to prevent overfitting. After each boosting step, we can directly get the weights of new features, and ``eta`` shrinks the feature weights to make the boosting process more conservative.\n  - range: [0,1]\n\n* ``updater`` [default= ``shotgun``]\n\n  - Choice of algorithm to fit linear model\n\n    - ``shotgun``: Parallel coordinate descent algorithm based on shotgun algorithm. Uses 'hogwild' parallelism and therefore produces a nondeterministic solution on each run.\n    - ``coord_descent``: Ordinary coordinate descent algorithm. Also multithreaded but still produces a deterministic solution. When the ``device`` parameter is set to ``cuda`` or ``gpu``, a GPU variant would be used.\n\n* ``feature_selector`` [default= ``cyclic``]\n\n  - Feature selection and ordering method\n\n    * ``cyclic``: Deterministic selection by cycling through features one at a time.\n    * ``shuffle``: Similar to ``cyclic`` but with random feature shuffling prior to each update.\n    * ``random``: A random (with replacement) coordinate selector.\n    * ``greedy``: Select coordinate with the greatest gradient magnitude.  It has ``O(num_feature^2)`` complexity. It is fully deterministic. It allows restricting the selection to ``top_k`` features per group with the largest magnitude of univariate weight change, by setting the ``top_k`` parameter. Doing so would reduce the complexity to ``O(num_feature*top_k)``.\n    * ``thrifty``: Thrifty, approximately-greedy feature selector. Prior to cyclic updates, reorders features in descending magnitude of their univariate weight changes. This operation is multithreaded and is a linear complexity approximation of the quadratic greedy selection. It allows restricting the selection to ``top_k`` features per group with the largest magnitude of univariate weight change, by setting the ``top_k`` parameter.\n\n* ``top_k`` [default=0]\n\n  - The number of top features to select in ``greedy`` and ``thrifty`` feature selector. The value of 0 means using all the features.\n\n************************\nLearning Task Parameters\n************************\nSpecify the learning task and the corresponding learning objective. The objective options are below:\n\n* ``objective`` [default=reg:squarederror]\n\n  - ``reg:squarederror``: regression with squared loss.\n  - ``reg:squaredlogerror``: regression with squared log loss :math:`\\frac{1}{2}[log(pred + 1) - log(label + 1)]^2`.  All input labels are required to be greater than -1.  Also, see metric ``rmsle`` for possible issue  with this objective.\n  - ``reg:logistic``: logistic regression, output probability\n  - ``reg:pseudohubererror``: regression with Pseudo Huber loss, a twice differentiable alternative to absolute loss.\n  - ``reg:absoluteerror``: Regression with L1 error. When tree model is used, leaf value is refreshed after tree construction. If used in distributed training, the leaf value is calculated as the mean value from all workers, which is not guaranteed to be optimal.\n\n    .. versionadded:: 1.7.0\n\n  - ``reg:quantileerror``: Quantile loss, also known as ``pinball loss``. See later sections for its parameter and :ref:`sphx_glr_python_examples_quantile_regression.py` for a worked example.\n\n    .. versionadded:: 2.0.0\n\n  - ``reg:expectileerror``: Expectile loss (asymmetric squared error). See later sections for its parameter.\n\n  - ``binary:logistic``: logistic regression for binary classification, output probability\n  - ``binary:logitraw``: logistic regression for binary classification, output score before logistic transformation\n  - ``binary:hinge``: hinge loss for binary classification. This makes predictions of 0 or 1, rather than producing probabilities.\n  - ``count:poisson``: Poisson regression for count data, output mean of Poisson distribution.\n\n    + ``max_delta_step`` is set to 0.7 by default in Poisson regression (used to safeguard optimization)\n\n  - ``survival:cox``: Cox regression for right censored survival time data (negative values are considered right censored).\n    Note that predictions are returned on the hazard ratio scale (i.e., as HR = exp(marginal_prediction) in the proportional hazard function ``h(t) = h0(t) * HR``).\n  - ``survival:aft``: Accelerated failure time model for censored survival time data.\n    See :doc:`/tutorials/aft_survival_analysis` for details.\n  - ``multi:softmax``: set XGBoost to do multiclass classification using the softmax objective, you also need to set num_class(number of classes)\n  - ``multi:softprob``: same as softmax, but output a vector of ``ndata * nclass``, which can be further reshaped to ``ndata * nclass`` matrix. The result contains predicted probability of each data point belonging to each class.\n  - ``rank:ndcg``: Use LambdaMART to perform pair-wise ranking where `Normalized Discounted Cumulative Gain (NDCG) <https://en.wikipedia.org/wiki/NDCG>`_ is maximized. This objective supports position debiasing for click data.\n  - ``rank:map``: Use LambdaMART to perform pair-wise ranking where `Mean Average Precision (MAP) <https://en.wikipedia.org/wiki/Mean_average_precision#Mean_average_precision>`_ is maximized\n  - ``rank:pairwise``: Use LambdaRank to perform pair-wise ranking using the `ranknet` objective.\n  - ``reg:gamma``: gamma regression with log-link. Output is a mean of gamma distribution. It might be useful, e.g., for modeling insurance claims severity, or for any outcome that might be `gamma-distributed <https://en.wikipedia.org/wiki/Gamma_distribution#Occurrence_and_applications>`_.\n  - ``reg:tweedie``: Tweedie regression with log-link. It might be useful, e.g., for modeling total loss in insurance, or for any outcome that might be `Tweedie-distributed <https://en.wikipedia.org/wiki/Tweedie_distribution#Occurrence_and_applications>`_.\n\n* ``base_score``\n\n  The initial prediction score of all instances, also known as the global bias, or the intercept.\n\n  .. versionchanged:: 3.1.0\n\n    XGBoost is updated to use vector-valued intercept by default.\n\n  - The parameter is automatically estimated for selected objectives before training. To\n    disable the estimation, specify a real number argument, e.g. ``base_score = 0.5``.\n  - If ``base_margin`` is supplied, ``base_score`` will not be used.\n  - If we train the model with a sufficient number of iterations, changing this value does not offer significant benefit.\n\n  See :doc:`/tutorials/intercept` for more information, including different use cases.\n\n* ``eval_metric`` [default according to objective]\n\n  - Evaluation metrics for validation data, a default metric will be assigned according to objective (rmse for regression, and logloss for classification, `mean average precision` for ``rank:map``, etc.)\n  - User can add multiple evaluation metrics. Python users: remember to pass the metrics in as list of parameters pairs instead of map, so that latter ``eval_metric`` won't override previous ones\n\n  - The choices are listed below:\n\n    - ``rmse``: `root mean square error <https://en.wikipedia.org/wiki/Root_mean_square_error>`_\n    - ``rmsle``: root mean square log error: :math:`\\sqrt{\\frac{1}{N}[log(pred + 1) - log(label + 1)]^2}`. Default metric of ``reg:squaredlogerror`` objective. This metric reduces errors generated by outliers in dataset.  But because ``log`` function is employed, ``rmsle`` might output ``nan`` when prediction value is less than -1.  See ``reg:squaredlogerror`` for other requirements.\n    - ``mae``: `mean absolute error <https://en.wikipedia.org/wiki/Mean_absolute_error>`_\n    - ``mape``: `mean absolute percentage error <https://en.wikipedia.org/wiki/Mean_absolute_percentage_error>`_\n    - ``mphe``: `mean Pseudo Huber error <https://en.wikipedia.org/wiki/Huber_loss>`_. Default metric of ``reg:pseudohubererror`` objective.\n    - ``expectile``: Expectile regression error (asymmetric squared error). Default metric of ``reg:expectileerror`` objective.\n    - ``logloss``: `negative log-likelihood <https://en.wikipedia.org/wiki/Log-likelihood>`_\n    - ``error``: Binary classification error rate. It is calculated as ``#(wrong cases)/#(all cases)``. For the predictions, the evaluation will regard the instances with prediction value larger than 0.5 as positive instances, and the others as negative instances.\n    - ``error@t``: a different than 0.5 binary classification threshold value could be specified by providing a numerical value through 't'.\n    - ``merror``: Multiclass classification error rate. It is calculated as ``#(wrong cases)/#(all cases)``.\n    - ``mlogloss``: `Multiclass logloss <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html>`_.\n    - ``auc``: `Receiver Operating Characteristic Area under the Curve <https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve>`_.\n      Available for classification and learning-to-rank tasks.\n\n      - When used with binary classification, the objective should be ``binary:logistic`` or similar functions that work on probability.\n      - When used with multi-class classification, objective should be ``multi:softprob`` instead of ``multi:softmax``, as the latter doesn't output probability.  Also the AUC is calculated by 1-vs-rest with reference class weighted by class prevalence.\n      - When used with LTR task, the AUC is computed by comparing pairs of documents to count correctly sorted pairs.  This corresponds to pairwise learning to rank.  The implementation has some issues with average AUC around groups and distributed workers not being well-defined.\n      - On a single machine the AUC calculation is exact. In a distributed environment the AUC is a weighted average over the AUC of training rows on each node - therefore, distributed AUC is an approximation sensitive to the distribution of data across workers. Use another metric in distributed environments if precision and reproducibility are important.\n      - When input dataset contains only negative or positive samples, the output is `NaN`.  The behavior is implementation defined, for instance, ``scikit-learn`` returns :math:`0.5` instead.\n\n    - ``aucpr``: `Area under the PR curve <https://en.wikipedia.org/wiki/Precision_and_recall>`_.\n      Available for classification and learning-to-rank tasks.\n\n      After XGBoost 1.6, both of the requirements and restrictions for using ``aucpr`` in classification problem are similar to ``auc``.  For ranking task, only binary relevance label :math:`y \\in [0, 1]` is supported.  Different from ``map (mean average precision)``, ``aucpr`` calculates the *interpolated* area under precision recall curve using continuous interpolation.\n\n    - ``pre``: Precision at :math:`k`. Supports only learning to rank task.\n    - ``ndcg``: `Normalized Discounted Cumulative Gain <https://en.wikipedia.org/wiki/NDCG>`_\n    - ``map``: `Mean Average Precision <https://en.wikipedia.org/wiki/Mean_average_precision#Mean_average_precision>`_\n\n      The `average precision` is defined as:\n\n      .. math::\n\n         AP@l = \\frac{1}{min{(l, N)}}\\sum^l_{k=1}P@k \\cdot I_{(k)}\n\n      where :math:`I_{(k)}` is an indicator function that equals to :math:`1` when the document at :math:`k` is relevant and :math:`0` otherwise. The :math:`P@k` is the precision at :math:`k`, and :math:`N` is the total number of relevant documents. Lastly, the `mean average precision` is defined as the weighted average across all queries.\n\n    - ``ndcg@n``, ``map@n``, ``pre@n``: :math:`n` can be assigned as an integer to cut off the top positions in the lists for evaluation.\n    - ``ndcg-``, ``map-``, ``ndcg@n-``, ``map@n-``: In XGBoost, the NDCG and MAP evaluate the score of a list without any positive samples as :math:`1`. By appending \"-\" to the evaluation metric name, we can ask XGBoost to evaluate these scores as :math:`0` to be consistent under some conditions.\n    - ``poisson-nloglik``: negative log-likelihood for Poisson regression\n    - ``gamma-nloglik``: negative log-likelihood for gamma regression\n    - ``cox-nloglik``: negative partial log-likelihood for Cox proportional hazards regression\n    - ``gamma-deviance``: residual deviance for gamma regression\n    - ``tweedie-nloglik``: negative log-likelihood for Tweedie regression (at a specified value of the ``tweedie_variance_power`` parameter)\n    - ``aft-nloglik``: Negative log likelihood of Accelerated Failure Time model.\n      See :doc:`/tutorials/aft_survival_analysis` for details.\n    - ``interval-regression-accuracy``: Fraction of data points whose predicted labels fall in the interval-censored labels.\n      Only applicable for interval-censored data.  See :doc:`/tutorials/aft_survival_analysis` for details.\n\n* ``seed`` [default=0]\n\n  - Random number seed.  In the R package, if not specified, instead of defaulting to seed 'zero', will take a random seed through R's own RNG engine.\n\n* ``seed_per_iteration`` [default= ``false``]\n\n  - Seed PRNG determnisticly via iterator number.\n\nParameters for Tweedie Regression (``objective=reg:tweedie``)\n=============================================================\n* ``tweedie_variance_power`` [default=1.5]\n\n  - Parameter that controls the variance of the Tweedie distribution ``var(y) ~ E(y)^tweedie_variance_power``\n  - range: (1,2)\n  - Set closer to 2 to shift towards a gamma distribution\n  - Set closer to 1 to shift towards a Poisson distribution.\n\nParameter for using Pseudo-Huber (``reg:pseudohubererror``)\n===========================================================\n\n* ``huber_slope`` : A parameter used for Pseudo-Huber loss to define the :math:`\\delta` term. [default = 1.0]\n\nParameter for using Quantile Loss (``reg:quantileerror``)\n=========================================================\n\n* ``quantile_alpha``: A scalar or a list of targeted quantiles.\n\n    .. versionadded:: 2.0.0\n\nParameter for using Expectile Loss (``reg:expectileerror``)\n===========================================================\n\n* ``expectile_alpha``: A scalar or a list of targeted expectiles. Range: [0, 1]. Required for\n  ``reg:expectileerror``.\n\n    .. versionadded:: 3.3.0\n\n    .. note:: Multi-target labels are not supported for expectile loss.\n\nParameter for using AFT Survival Loss (``survival:aft``) and Negative Log Likelihood of AFT metric (``aft-nloglik``)\n====================================================================================================================\n\n* ``aft_loss_distribution``: Probability Density Function for the AFT distribution; ``normal``, ``logistic``, or ``extreme``.\n* ``aft_loss_distribution_scale``: Scaling factor for the AFT distribution. Range: (0,∞)\n\n.. _ltr-param:\n\nParameters for learning to rank (``rank:ndcg``, ``rank:map``, ``rank:pairwise``)\n================================================================================\n\nThese are parameters specific to learning to rank task. See :doc:`Learning to Rank </tutorials/learning_to_rank>` for an in-depth explanation.\n\n* ``lambdarank_pair_method`` [default = ``topk``]\n\n  How to construct pairs for pair-wise learning.\n\n  - ``mean``: Sample ``lambdarank_num_pair_per_sample`` pairs for each document in the query list.\n  - ``topk``: Focus on top-``lambdarank_num_pair_per_sample`` documents. Construct :math:`|query|` pairs for each document at the top-``lambdarank_num_pair_per_sample`` ranked by the model.\n\n* ``lambdarank_num_pair_per_sample`` [range = :math:`[1, \\infty]`]\n\n  It specifies the number of pairs sampled for each document when pair method is ``mean``, or the truncation level for queries when the pair method is ``topk``. For example, to train with ``ndcg@6``, set ``lambdarank_num_pair_per_sample`` to :math:`6` and ``lambdarank_pair_method`` to ``topk``.\n\n* ``lambdarank_normalization`` [default = ``true``]\n\n  .. versionadded:: 2.1.0\n\n  Whether to normalize the leaf value by lambda gradient. This can sometimes stagnate the training progress.\n\n  .. versionchanged:: 3.0.0\n\n  When the ``mean`` method is used, it's normalized by the ``lambdarank_num_pair_per_sample`` instead of gradient.\n\n* ``lambdarank_score_normalization`` [default = ``true``]\n\n  .. versionadded:: 3.0.0\n\n  Whether to normalize the delta metric by the difference of prediction scores. This can\n  sometimes stagnate the training progress. With pairwise ranking, we can normalize the\n  gradient using the difference between two samples in each pair to reduce influence from\n  the pairs that have large difference in ranking scores. This can help us regularize the\n  model to reduce bias and prevent overfitting. Similar to other regularization\n  techniques, this might prevent training from converging.\n\n  There was no normalization before 2.0. In 2.0 and later versions this is used by\n  default. In 3.0, we made this an option that users can disable.\n\n*  ``lambdarank_unbiased`` [default = ``false``]\n\n  Specify whether do we need to debias input click data.\n\n* ``lambdarank_bias_norm`` [default = 2.0]\n\n  :math:`L_p` normalization for position debiasing, default is :math:`L_2`. Only relevant when ``lambdarank_unbiased`` is set to true.\n\n* ``ndcg_exp_gain`` [default = ``true``]\n\n  Whether we should use exponential gain function for ``NDCG``. There are two forms of gain function for ``NDCG``, one is using relevance value directly while the other is using :math:`2^{rel} - 1` to emphasize on retrieving relevant documents. When ``ndcg_exp_gain`` is true (the default), relevance degree cannot be greater than 31.\n"
  },
  {
    "path": "doc/prediction.rst",
    "content": ".. _predict_api:\n\n##########\nPrediction\n##########\n\nThere are a number of prediction functions in XGBoost with various parameters.  This\ndocument attempts to clarify some of confusions around prediction with a focus on the\nPython binding, R package is similar when ``strict_shape`` is specified (see below).\n\n******************\nPrediction Options\n******************\n\nThere are a number of different prediction options for the\n:py:meth:`xgboost.Booster.predict` method, ranging from ``pred_contribs`` to\n``pred_leaf``.  The output shape depends on types of prediction.  Also for multi-class\nclassification problem, XGBoost builds one tree for each class and the trees for each\nclass are called a \"group\" of trees, so output dimension may change due to used model.\nAfter 1.4 release, we added a new parameter called ``strict_shape``, one can set it to\n``True`` to indicate a more restricted output is desired.  Assuming you are using\n:py:obj:`xgboost.Booster`, here is a list of possible returns:\n\n- When using normal prediction with ``strict_shape`` set to ``True``:\n\n  Output is a 2-dim array with first dimension as rows and second as groups.  For\n  regression/survival/ranking/binary classification this is equivalent to a column vector\n  with ``shape[1] == 1``.  But for multi-class with ``multi:softprob`` the number of\n  columns equals to number of classes.  If strict_shape is set to False then XGBoost might\n  output 1 or 2 dim array.\n\n- When using ``output_margin`` to avoid transformation and ``strict_shape`` is set to ``True``:\n\n  Similar to the previous case, output is a 2-dim array, except for that ``multi:softmax``\n  has equivalent output shape of ``multi:softprob`` due to dropped transformation.  If\n  strict shape is set to False then output can have 1 or 2 dim depending on used model.\n\n- When using ``pred_contribs`` with ``strict_shape`` set to ``True``:\n\n  Output is a 3-dim array, with ``(rows, groups, columns + 1)`` as shape.  Whether\n  ``approx_contribs`` is used does not change the output shape. If the strict shape\n  parameter is not set, it can be a 2 or 3 dimension array depending on whether\n  multi-class model is being used.\n\n- When using ``pred_interactions`` with ``strict_shape`` set to ``True``:\n\n  Output is a 4-dim array, with ``(rows, groups, columns + 1, columns + 1)`` as shape.\n  Like the predict contribution case, whether ``approx_contribs`` is used does not change\n  the output shape.  If strict shape is set to False, it can have 3 or 4 dims depending on\n  the underlying model.\n\n- When using ``pred_leaf`` with ``strict_shape`` set to ``True``:\n\n  Output is a 4-dim array with ``(n_samples, n_iterations, n_classes, n_trees_in_forest)``\n  as shape.  ``n_trees_in_forest`` is specified by the ``numb_parallel_tree`` during\n  training.  When strict shape is set to False, output is a 2-dim array with last 3 dims\n  concatenated into 1.  Also the last dimension is dropped if it equals to 1. When using\n  ``apply`` method in scikit learn interface, this is set to False by default.\n\n\nFor R package, when ``strict_shape`` is specified, an ``array`` is returned, with the same\nvalue as Python except R array is column-major while Python numpy array is row-major, so\nall the dimensions are reversed.  For example, for a Python ``predict_leaf`` output\nobtained by having ``strict_shape=True`` has 4 dimensions: ``(n_samples, n_iterations,\nn_classes, n_trees_in_forest)``, while R with ``strict_shape=TRUE`` outputs\n``(n_trees_in_forest, n_classes, n_iterations, n_samples)``.\n\nOther than these prediction types, there's also a parameter called ``iteration_range``,\nwhich is similar to model slicing.  But instead of actually splitting up the model into\nmultiple stacks, it simply returns the prediction formed by the trees within range.\nNumber of trees created in each iteration equals to :math:`trees_i = num\\_class \\times\nnum\\_parallel\\_tree`.  So if you are training a boosted random forest with size of 4, on\nthe 3-class classification dataset, and want to use the first 2 iterations of trees for\nprediction, you need to provide ``iteration_range=(0, 2)``.  Then the first :math:`2\n\\times 3 \\times 4` trees will be used in this prediction.\n\n**************\nEarly Stopping\n**************\n\nWhen a model is trained with early stopping, there is an inconsistent behavior between\nnative Python interface and sklearn/R interfaces.  By default on R and sklearn interfaces,\nthe ``best_iteration`` is automatically used so prediction comes from the best model.  But\nwith the native Python interface :py:meth:`xgboost.Booster.predict` and\n:py:meth:`xgboost.Booster.inplace_predict` uses the full model.  Users can use\n``best_iteration`` attribute with ``iteration_range`` parameter to achieve the same\nbehavior.  Also the ``save_best`` parameter from :py:obj:`xgboost.callback.EarlyStopping`\nmight be useful.\n\n\n***********\nBase Margin\n***********\n\nThere's a training parameter in XGBoost called ``base_score``, and a meta data for\n``DMatrix`` called ``base_margin`` (which can be set in ``fit`` method if you are using\nscikit-learn interface).  They specifies the global bias for boosted model.  If the latter\nis supplied then former is ignored.  ``base_margin`` can be used to train XGBoost model\nbased on other models.  See demos on boosting from predictions.\n\n*****************\nStaged Prediction\n*****************\n\nUsing the native interface with ``DMatrix``, prediction can be staged (or cached).  For\nexample, one can first predict on the first 4 trees then run prediction on 8 trees.  After\nrunning the first prediction, result from first 4 trees are cached so when you run the\nprediction with 8 trees XGBoost can reuse the result from previous prediction.  The cache\nexpires automatically upon next prediction, train or evaluation if the cached ``DMatrix``\nobject is expired (like going out of scope and being collected by garbage collector in\nyour language environment).\n\n*******************\nIn-place Prediction\n*******************\n\nTraditionally XGBoost accepts only ``DMatrix`` for prediction, with wrappers like\nscikit-learn interface the construction happens internally.  We added support for in-place\npredict to bypass the construction of ``DMatrix``, which is slow and memory consuming.\nThe new predict function has limited features but is often sufficient for simple inference\ntasks.  It accepts some commonly found data types in Python like :py:obj:`numpy.ndarray`,\n:py:obj:`scipy.sparse.csr_matrix` and :py:obj:`cudf.DataFrame` instead of\n:py:obj:`xgboost.DMatrix`.  You can call :py:meth:`xgboost.Booster.inplace_predict` to use\nit.  Be aware that the output of in-place prediction depends on input data type, when\ninput is on GPU data output is :py:obj:`cupy.ndarray`, otherwise a :py:obj:`numpy.ndarray`\nis returned.\n\n*************\nThread Safety\n*************\n\nAfter 1.4 release, all prediction functions including normal ``predict`` with various\nparameters like shap value computation and ``inplace_predict`` are thread safe when\nunderlying booster is ``gbtree`` or ``dart``, which means as long as tree model is used,\nprediction itself should thread safe.  But the safety is only guaranteed with prediction.\nIf one tries to train a model in one thread and provide prediction at the other using the\nsame model the behaviour is undefined.  This happens easier than one might expect, for\ninstance we might accidentally call ``clf.set_params()`` inside a predict function:\n\n.. code-block:: python\n\n    def predict_fn(clf: xgb.XGBClassifier, X):\n        X = preprocess(X)\n        clf.set_params(n_jobs=1)  # NOT safe!\n        return clf.predict_proba(X, iteration_range=(0, 10))\n\n    with ThreadPoolExecutor(max_workers=10) as e:\n        e.submit(predict_fn, ...)\n\n*****************************\nPrivacy-Preserving Prediction\n*****************************\n\n`Concrete ML`_ is a third-party open-source library developed by `Zama`_ that proposes gradient\nboosting classes similar to ours, but predicting directly over encrypted data, thanks to\nFully Homomorphic Encryption. A simple example would be as follows:\n\n.. code-block:: python\n\n    from sklearn.datasets import make_classification\n    from sklearn.model_selection import train_test_split\n    from concrete.ml.sklearn import XGBClassifier\n\n    x, y = make_classification(n_samples=100, class_sep=2, n_features=30, random_state=42)\n    X_train, X_test, y_train, y_test = train_test_split(\n        x, y, test_size=10, random_state=42\n    )\n\n    # Train in the clear and quantize the weights\n    model = XGBClassifier()\n    model.fit(X_train, y_train)\n\n    # Simulate the predictions in the clear\n    y_pred_clear = model.predict(X_test)\n\n    # Compile in FHE\n    model.compile(X_train)\n\n    # Generate keys\n    model.fhe_circuit.keygen()\n\n    # Run the inference on encrypted inputs!\n    y_pred_fhe = model.predict(X_test, fhe=\"execute\")\n\n    print(\"In clear  :\", y_pred_clear)\n    print(\"In FHE    :\", y_pred_fhe)\n    print(f\"Similarity: {int((y_pred_fhe == y_pred_clear).mean()*100)}%\")\n\nMore information and examples are given in the `Concrete ML documentation`_.\n\n.. _Zama: https://www.zama.ai/\n.. _Concrete ML: https://github.com/zama-ai/concrete-ml\n.. _Concrete ML documentation: https://docs.zama.ai/concrete-ml\n"
  },
  {
    "path": "doc/python/.gitignore",
    "content": "examples\ndask-examples\nsurvival-examples\ngpu-examples\nrmm-examples"
  },
  {
    "path": "doc/python/callbacks.rst",
    "content": "##################\nCallback Functions\n##################\n\nThis document gives a basic walkthrough of :ref:`callback API <callback_api>` used in\nXGBoost Python package.  In XGBoost 1.3, a new callback interface is designed for Python\npackage, which provides the flexibility of designing various extension for training.\nAlso, XGBoost has a number of pre-defined callbacks for supporting early stopping,\ncheckpoints etc.\n\n\nUsing builtin callbacks\n-----------------------\n\nBy default, training methods in XGBoost have parameters like ``early_stopping_rounds`` and\n``verbose``/``verbose_eval``, when specified the training procedure will define the\ncorresponding callbacks internally.  For example, when ``early_stopping_rounds`` is\nspecified, :py:class:`EarlyStopping <xgboost.callback.EarlyStopping>` callback is invoked\ninside iteration loop.  You can also pass this callback function directly into XGBoost:\n\n.. code-block:: python\n\n    D_train = xgb.DMatrix(X_train, y_train)\n    D_valid = xgb.DMatrix(X_valid, y_valid)\n\n    # Define a custom evaluation metric used for early stopping.\n    def eval_error_metric(predt, dtrain: xgb.DMatrix):\n        label = dtrain.get_label()\n        r = np.zeros(predt.shape)\n        gt = predt > 0.5\n        r[gt] = 1 - label[gt]\n        le = predt <= 0.5\n        r[le] = label[le]\n        return 'CustomErr', np.sum(r)\n\n    # Specify which dataset and which metric should be used for early stopping.\n    early_stop = xgb.callback.EarlyStopping(rounds=early_stopping_rounds,\n                                            metric_name='CustomErr',\n                                            data_name='Valid')\n\n    booster = xgb.train(\n        {'objective': 'binary:logistic',\n         'eval_metric': ['error', 'rmse'],\n         'tree_method': 'hist'}, D_train,\n        evals=[(D_train, 'Train'), (D_valid, 'Valid')],\n        feval=eval_error_metric,\n        num_boost_round=1000,\n        callbacks=[early_stop],\n        verbose_eval=False)\n\n    dump = booster.get_dump(dump_format='json')\n    assert len(early_stop.stopping_history['Valid']['CustomErr']) == len(dump)\n\n\nDefining your own callback\n--------------------------\n\nXGBoost provides an callback interface class: :py:class:`TrainingCallback\n<xgboost.callback.TrainingCallback>`, user defined callbacks should inherit this class and\noverride corresponding methods.  There's a working example in\n:ref:`sphx_glr_python_examples_callbacks.py`.\n"
  },
  {
    "path": "doc/python/data_input.rst",
    "content": "################################\nSupported Python data structures\n################################\n\nThis page is a support matrix for various input types.\n\n.. _py-data:\n\n*******\nMarkers\n*******\n\n- T: Supported.\n- F: Not supported.\n- NE: Invalid type for the use case. For instance, :py:class:`pandas.Series` can not be multi-target label.\n- NPA: Support with the help of numpy array.\n- AT: Support with the help of arrow table.\n- CPA: Support with the help of cupy array.\n- SciCSR: Support with the help of scipy sparse CSR :py:class:`scipy.sparse.csr_matrix`. The conversion to scipy CSR may or may not be possible. Raise a type error if conversion fails.\n- FF: We can look forward to having its support in recent future if requested.\n- empty: To be filled in.\n\n************\nTable Header\n************\n- `X` means predictor matrix.\n- Meta info: label, weight, etc.\n- Multi Label: 2-dim label for multi-target.\n- Others: Anything else that we don't list here explicitly including formats like `lil`, `dia`, `bsr`. XGBoost will try to convert it into scipy csr.\n\n**************\nSupport Matrix\n**************\n\n+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+\n| Name                    | DMatrix X | QuantileDMatrix X | Sklearn X | Meta Info | Inplace prediction | Multi Label |\n+=========================+===========+===================+===========+===========+====================+=============+\n| numpy.ndarray           | T         | T                 | T         | T         | T                  | T           |\n+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+\n| scipy.sparse.csr        | T         | T                 | T         | NE        | T                  | F           |\n+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+\n| scipy.sparse.csc        | T         | F                 | T         | NE        | F                  | F           |\n+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+\n| scipy.sparse.coo        | SciCSR    | F                 | SciCSR    | NE        | F                  | F           |\n+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+\n| uri                     | T         | F                 | F         | F         | NE                 | F           |\n+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+\n| list                    | NPA       | NPA               | NPA       | NPA       | NPA                | T           |\n+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+\n| tuple                   | NPA       | NPA               | NPA       | NPA       | NPA                | T           |\n+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+\n| pandas.DataFrame        | NPA       | NPA               | NPA       | NPA       | NPA                | NPA         |\n+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+\n| pandas.Series           | NPA       | NPA               | NPA       | NPA       | NPA                | NE          |\n+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+\n| cudf.DataFrame          | T         | T                 | T         | T         | T                  | T           |\n+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+\n| cudf.Series             | T         | T                 | T         | T         | FF                 | NE          |\n+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+\n| cupy.ndarray            | T         | T                 | T         | T         | T                  | T           |\n+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+\n| torch.Tensor            | T         | T                 | T         | T         | T                  | T           |\n+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+\n| dlpack                  | CPA       | CPA               |           | CPA       | FF                 | FF          |\n+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+\n| modin.DataFrame         | NPA       | FF                | NPA       | NPA       | FF                 |             |\n+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+\n| modin.Series            | NPA       | FF                | NPA       | NPA       | FF                 |             |\n+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+\n| pyarrow.Table           | T         | T                 | T         | T         | T                  | T           |\n+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+\n| polars.DataFrame        | AT        | AT                | AT        | AT        | AT                 | AT          |\n+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+\n| polars.LazyFrame (WARN) | AT        | AT                | AT        | AT        | AT                 | AT          |\n+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+\n| polars.Series           | AT        | AT                | AT        | AT        | AT                 | NE          |\n+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+\n| _\\_array\\_\\_            | NPA       | F                 | NPA       | NPA       | H                  |             |\n+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+\n| Others                  | SciCSR    | F                 |           | F         | F                  |             |\n+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+\n\nThe polars ``LazyFrame.collect`` supports many configurations, ranging from the choice of\nquery engine to type coercion. XGBoost simply uses the default parameter. Please run\n``collect`` to obtain the ``DataFrame`` before passing it into XGBoost for finer control\nover the behaviour."
  },
  {
    "path": "doc/python/index.rst",
    "content": "######################\nXGBoost Python Package\n######################\nThis page contains links to all the python related documents on python package.\nTo install the package, checkout :doc:`Installation Guide </install>`.\n\n********\nContents\n********\n\n.. toctree::\n  python_intro\n  sklearn_estimator\n  python_api\n  data_input\n  callbacks\n  examples/index\n  dask-examples/index\n  survival-examples/index\n  rmm-examples/index\n"
  },
  {
    "path": "doc/python/python_api.rst",
    "content": "Python API Reference\n====================\nThis page gives the Python API reference of xgboost, please also refer to Python Package Introduction for more information about the Python package.\n\n.. contents::\n  :backlinks: none\n  :local:\n\nGlobal Configuration\n--------------------\n.. autofunction:: xgboost.config_context\n\n.. autofunction:: xgboost.set_config\n\n.. autofunction:: xgboost.get_config\n\n.. autofunction:: xgboost.build_info\n\nCore Data Structure\n-------------------\n.. automodule:: xgboost.core\n\n.. autoclass:: xgboost.DMatrix\n    :members:\n    :show-inheritance:\n\n.. autoclass:: xgboost.QuantileDMatrix\n    :members:\n    :inherited-members:\n    :show-inheritance:\n\n.. autoclass:: xgboost.ExtMemQuantileDMatrix\n    :members:\n    :inherited-members:\n    :show-inheritance:\n\n.. autoclass:: xgboost.Booster\n    :members:\n    :show-inheritance:\n    :special-members: __getitem__\n\n.. autoclass:: xgboost.DataIter\n    :members:\n    :show-inheritance:\n\n.. autoclass:: xgboost.core.Categories\n\nLearning API\n------------\n.. automodule:: xgboost.training\n\n.. autofunction:: xgboost.train\n\n.. autofunction:: xgboost.cv\n\n\nScikit-Learn API\n----------------\n\n.. automodule:: xgboost.sklearn\n.. autoclass:: xgboost.XGBRegressor\n    :members:\n    :inherited-members:\n    :show-inheritance:\n.. autoclass:: xgboost.XGBClassifier\n    :members:\n    :inherited-members:\n    :show-inheritance:\n.. autoclass:: xgboost.XGBRanker\n    :members:\n    :inherited-members:\n    :show-inheritance:\n.. autoclass:: xgboost.XGBRFRegressor\n    :members:\n    :inherited-members:\n    :show-inheritance:\n.. autoclass:: xgboost.XGBRFClassifier\n    :members:\n    :inherited-members:\n    :show-inheritance:\n\nPlotting API\n------------\n.. automodule:: xgboost.plotting\n\n.. autofunction:: xgboost.plot_importance\n\n.. autofunction:: xgboost.plot_tree\n\n.. autofunction:: xgboost.to_graphviz\n\n.. _callback_api:\n\nCallback API\n------------\n.. automodule:: xgboost.callback\n.. autoclass:: xgboost.callback.TrainingCallback\n    :members:\n\n.. autoclass:: xgboost.callback.EvaluationMonitor\n    :members:\n    :show-inheritance:\n\n.. autoclass:: xgboost.callback.EarlyStopping\n    :members:\n    :show-inheritance:\n\n.. autoclass:: xgboost.callback.LearningRateScheduler\n    :members:\n    :show-inheritance:\n\n.. autoclass:: xgboost.callback.TrainingCheckPoint\n    :members:\n    :show-inheritance:\n\n.. _dask_api:\n\nDask API\n--------\n.. automodule:: xgboost.dask\n\n.. autoclass:: xgboost.dask.DaskDMatrix\n    :members:\n    :inherited-members:\n    :show-inheritance:\n\n.. autoclass:: xgboost.dask.DaskQuantileDMatrix\n    :members:\n    :inherited-members:\n    :show-inheritance:\n\n.. autofunction:: xgboost.dask.train\n\n.. autofunction:: xgboost.dask.predict\n\n.. autofunction:: xgboost.dask.inplace_predict\n\n.. autoclass:: xgboost.dask.DaskXGBClassifier\n    :members:\n    :inherited-members:\n    :show-inheritance:\n\n.. autoclass:: xgboost.dask.DaskXGBRegressor\n    :members:\n    :inherited-members:\n    :show-inheritance:\n\n.. autoclass:: xgboost.dask.DaskXGBRanker\n    :members:\n    :inherited-members:\n    :show-inheritance:\n\n.. autoclass:: xgboost.dask.DaskXGBRFRegressor\n    :members:\n    :inherited-members:\n    :show-inheritance:\n\n.. autoclass:: xgboost.dask.DaskXGBRFClassifier\n    :members:\n    :inherited-members:\n    :show-inheritance:\n\n\nPySpark API\n-----------\n\n.. automodule:: xgboost.spark\n\n.. autoclass:: xgboost.spark.SparkXGBClassifier\n    :members:\n    :inherited-members:\n    :show-inheritance:\n\n.. autoclass:: xgboost.spark.SparkXGBClassifierModel\n    :members:\n    :inherited-members:\n    :show-inheritance:\n\n.. autoclass:: xgboost.spark.SparkXGBRegressor\n    :members:\n    :inherited-members:\n    :show-inheritance:\n\n.. autoclass:: xgboost.spark.SparkXGBRegressorModel\n    :members:\n    :inherited-members:\n    :show-inheritance:\n\n.. autoclass:: xgboost.spark.SparkXGBRanker\n    :members:\n    :inherited-members:\n    :show-inheritance:\n\n.. autoclass:: xgboost.spark.SparkXGBRankerModel\n    :members:\n    :inherited-members:\n    :show-inheritance:\n\n\nCollective\n----------\n\n.. automodule:: xgboost.collective\n\n.. autoclass:: xgboost.collective.Config\n\n.. autofunction:: xgboost.collective.init\n\n.. autofunction:: xgboost.collective.finalize\n\n.. autofunction:: xgboost.collective.get_rank\n\n.. autofunction:: xgboost.collective.get_world_size\n\n.. autoclass:: xgboost.collective.CommunicatorContext\n\n.. automodule:: xgboost.tracker\n\n.. autoclass:: xgboost.tracker.RabitTracker"
  },
  {
    "path": "doc/python/python_intro.rst",
    "content": "###########################\nPython Package Introduction\n###########################\n\nThis document gives a basic walkthrough of the xgboost package for Python.  The Python\npackage is consisted of 3 different interfaces, including native interface, scikit-learn\ninterface and dask interface.  For introduction to dask interface please see\n:doc:`/tutorials/dask`.\n\n**List of other Helpful Links**\n\n* :doc:`/python/examples/index`\n* :doc:`Python API Reference <python_api>`\n\n**Contents**\n\n.. contents::\n  :backlinks: none\n  :local:\n\nInstall XGBoost\n---------------\nTo install XGBoost, follow instructions in :doc:`/install`.\n\nTo verify your installation, run the following in Python:\n\n.. code-block:: python\n\n  import xgboost as xgb\n\n.. _python_data_interface:\n\nData Interface\n--------------\nThe XGBoost Python module is able to load data from many different types of data format including both CPU and GPU data structures. For a comprehensive list of supported data types, please reference the :doc:`/python/data_input`. For a detailed description of text input formats, please visit :doc:`/tutorials/input_format`.\n\nThe input data is stored in a :py:class:`DMatrix <xgboost.DMatrix>` object. For the sklearn estimator interface, a :py:class:`DMatrix` or a :py:class:`QuantileDMatrix` is created depending on the chosen algorithm and the input, see the sklearn API reference for details. We will illustrate some of the basic input types using the ``DMatrix`` here.\n\n* To load a NumPy array into :py:class:`DMatrix <xgboost.DMatrix>`:\n\n  .. code-block:: python\n\n    data = np.random.rand(5, 10)  # 5 entities, each contains 10 features\n    label = np.random.randint(2, size=5)  # binary target\n    dtrain = xgb.DMatrix(data, label=label)\n\n* To load a :py:mod:`scipy.sparse` array into :py:class:`DMatrix <xgboost.DMatrix>`:\n\n  .. code-block:: python\n\n    csr = scipy.sparse.csr_matrix((dat, (row, col)))\n    dtrain = xgb.DMatrix(csr)\n\n* To load a Pandas data frame into :py:class:`DMatrix <xgboost.DMatrix>`:\n\n  .. code-block:: python\n\n    data = pandas.DataFrame(np.arange(12).reshape((4,3)), columns=['a', 'b', 'c'])\n    label = pandas.DataFrame(np.random.randint(2, size=4))\n    dtrain = xgb.DMatrix(data, label=label)\n\n* Saving :py:class:`DMatrix <xgboost.DMatrix>` into a XGBoost binary file:\n\n  .. code-block:: python\n\n    data = np.random.rand(5, 10)  # 5 entities, each contains 10 features\n    label = np.random.randint(2, size=5)  # binary target\n    dtrain.save_binary('train.buffer')\n\n* Missing values can be replaced by a default value in the :py:class:`DMatrix <xgboost.DMatrix>` constructor:\n\n  .. code-block:: python\n\n    dtrain = xgb.DMatrix(data, label=label, missing=np.NaN)\n\n* Weights can be set when needed:\n\n  .. code-block:: python\n\n    w = np.random.rand(5, 1)\n    dtrain = xgb.DMatrix(data, label=label, missing=np.NaN, weight=w)\n\nSetting Parameters\n------------------\nXGBoost can use either a list of pairs or a dictionary to set :doc:`parameters </parameter>`. For instance:\n\n* Booster parameters\n\n  .. code-block:: python\n\n    param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}\n    param['nthread'] = 4\n    param['eval_metric'] = 'auc'\n\n* You can also specify multiple eval metrics:\n\n  .. code-block:: python\n\n    param['eval_metric'] = ['auc', 'ams@0']\n\n    # alternatively:\n    # plst = param.items()\n    # plst += [('eval_metric', 'ams@0')]\n\n* Specify validations set to watch performance\n\n  .. code-block:: python\n\n    evallist = [(dtrain, 'train'), (dtest, 'eval')]\n\nTraining\n--------\n\nTraining a model requires a parameter list and data set.\n\n.. code-block:: python\n\n  num_round = 10\n  bst = xgb.train(param, dtrain, num_round, evallist)\n\nAfter training, the model can be saved into ``JSON`` or ``UBJSON``:\n\n.. code-block:: python\n\n  bst.save_model('model.ubj')\n\nThe model and its feature map can also be dumped to a text file.\n\n.. code-block:: python\n\n  # dump model\n  bst.dump_model('dump.raw.txt')\n  # dump model with feature map\n  bst.dump_model('dump.raw.txt', 'featmap.txt')\n\nA saved model can be loaded as follows:\n\n.. code-block:: python\n\n  bst = xgb.Booster({'nthread': 4})  # init model\n  bst.load_model('model.ubj')  # load model data\n\nMethods including `update` and `boost` from :py:class:`xgboost.Booster` are designed for\ninternal usage only.  The wrapper function :py:class:`xgboost.train` does some\npre-configuration including setting up caches and some other parameters.\n\nEarly Stopping\n--------------\nIf you have a validation set, you can use early stopping to find the optimal number of boosting rounds.\nEarly stopping requires at least one set in ``evals``. If there's more than one, it will use the last.\n\n.. code-block:: python\n\n  train(..., evals=evals, early_stopping_rounds=10)\n\nThe model will train until the validation score stops improving. Validation error needs to decrease at least every ``early_stopping_rounds`` to continue training.\n\nIf early stopping occurs, the model will have two additional fields: ``bst.best_score``, ``bst.best_iteration``.  Note that :py:meth:`xgboost.train` will return a model from the last iteration, not the best one.\n\nThis works with both metrics to minimize (RMSE, log loss, etc.) and to maximize (MAP, NDCG, AUC). Note that if you specify more than one evaluation metric the last one in ``param['eval_metric']`` is used for early stopping.\n\nPrediction\n----------\nA model that has been trained or loaded can perform predictions on data sets.\n\n.. code-block:: python\n\n  # 7 entities, each contains 10 features\n  data = np.random.rand(7, 10)\n  dtest = xgb.DMatrix(data)\n  ypred = bst.predict(dtest)\n\nIf early stopping is enabled during training, you can get predictions from the best iteration with ``bst.best_iteration``:\n\n.. code-block:: python\n\n  ypred = bst.predict(dtest, iteration_range=(0, bst.best_iteration + 1))\n\nPlotting\n--------\n\nYou can use plotting module to plot importance and output tree.\n\nTo plot importance, use :py:meth:`xgboost.plot_importance`. This function requires ``matplotlib`` to be installed.\n\n.. code-block:: python\n\n  xgb.plot_importance(bst)\n\nTo plot the output tree via ``matplotlib``, use :py:meth:`xgboost.plot_tree`, specifying the ordinal number of the target tree. This function requires ``graphviz`` and ``matplotlib``.\n\n.. code-block:: python\n\n  xgb.plot_tree(bst, num_trees=2)\n\nWhen you use ``IPython``, you can use the :py:meth:`xgboost.to_graphviz` function, which converts the target tree to a ``graphviz`` instance. The ``graphviz`` instance is automatically rendered in ``IPython``.\n\n.. code-block:: python\n\n  xgb.to_graphviz(bst, num_trees=2)\n\n\nScikit-Learn interface\n----------------------\n\nXGBoost provides an easy to use scikit-learn interface for some pre-defined models\nincluding regression, classification and ranking. See :doc:`/python/sklearn_estimator`\nfor more info.\n\n.. code-block:: python\n\n  # Use \"hist\" for training the model.\n  reg = xgb.XGBRegressor(tree_method=\"hist\", device=\"cuda\")\n  # Fit the model using predictor X and response y.\n  reg.fit(X, y)\n  # Save model into JSON format.\n  reg.save_model(\"regressor.json\")\n\nUser can still access the underlying booster model when needed:\n\n.. code-block:: python\n\n   booster: xgb.Booster = reg.get_booster()\n"
  },
  {
    "path": "doc/python/sklearn_estimator.rst",
    "content": "##########################################\nUsing the Scikit-Learn Estimator Interface\n##########################################\n\n**Contents**\n\n.. contents::\n  :backlinks: none\n  :local:\n\n********\nOverview\n********\n\nIn addition to the native interface, XGBoost features a sklearn estimator interface that\nconforms to `sklearn estimator guideline\n<https://scikit-learn.org/stable/developers/develop.html#rolling-your-own-estimator>`__. It\nsupports regression, classification, and learning to rank. Survival training for the\nsklearn estimator interface is still working in progress.\n\nYou can find some some quick start examples at\n:ref:`sphx_glr_python_examples_sklearn_examples.py`. The main advantage of using sklearn\ninterface is that it works with most of the utilities provided by sklearn like\n:py:func:`sklearn.model_selection.cross_validate`. Also, many other libraries recognize\nthe sklearn estimator interface thanks to its popularity.\n\nWith the sklearn estimator interface, we can train a classification model with only a\ncouple lines of Python code. Here's an example for training a classification model:\n\n.. code-block:: python\n\n    from sklearn.datasets import load_breast_cancer\n    from sklearn.model_selection import train_test_split\n\n    import xgboost as xgb\n\n    X, y = load_breast_cancer(return_X_y=True)\n    X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=94)\n\n    # Use \"hist\" for constructing the trees, with early stopping enabled.\n    clf = xgb.XGBClassifier(tree_method=\"hist\", early_stopping_rounds=2)\n    # Fit the model, test sets are used for early stopping.\n    clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])\n    # Save model into JSON format.\n    clf.save_model(\"clf.json\")\n\n\nThe ``tree_method`` parameter specifies the method to use for constructing the trees, and\nthe early_stopping_rounds parameter enables early stopping. Early stopping can help\nprevent overfitting and save time during training.\n\n**************\nEarly Stopping\n**************\n\nAs demonstrated in the previous example, early stopping can be enabled by the parameter\n``early_stopping_rounds``. Alternatively, there's a callback function that can be used\n:py:class:`xgboost.callback.EarlyStopping` to specify more details about the behavior of\nearly stopping, including whether XGBoost should return the best model instead of the full\nstack of trees:\n\n.. code-block:: python\n\n    early_stop = xgb.callback.EarlyStopping(\n        rounds=2, metric_name='logloss', data_name='validation_0', save_best=True\n    )\n    clf = xgb.XGBClassifier(tree_method=\"hist\", callbacks=[early_stop])\n    clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])\n\nAt present, XGBoost doesn't implement data spliting logic within the estimator and relies\non the ``eval_set`` parameter of the :py:meth:`xgboost.XGBModel.fit` method. If you want\nto use early stopping to prevent overfitting, you'll need to manually split your data into\ntraining and testing sets using the :py:func:`sklearn.model_selection.train_test_split`\nfunction from the `sklearn` library. Some other machine learning algorithms, like those in\n`sklearn`, include early stopping as part of the estimator and may work with cross\nvalidation. However, using early stopping during cross validation may not be a perfect\napproach because it changes the model's number of trees for each validation fold, leading\nto different model. A better approach is to retrain the model after cross validation using\nthe best hyperparameters along with early stopping. If you want to experiment with idea of\nusing cross validation with early stopping, here is a snippet to begin with:\n\n.. code-block:: python\n\n    from sklearn.base import clone\n    from sklearn.datasets import load_breast_cancer\n    from sklearn.model_selection import StratifiedKFold, cross_validate\n\n    import xgboost as xgb\n\n    X, y = load_breast_cancer(return_X_y=True)\n\n\n    def fit_and_score(estimator, X_train, X_test, y_train, y_test):\n        \"\"\"Fit the estimator on the train set and score it on both sets\"\"\"\n        estimator.fit(X_train, y_train, eval_set=[(X_test, y_test)])\n\n        train_score = estimator.score(X_train, y_train)\n        test_score = estimator.score(X_test, y_test)\n\n        return estimator, train_score, test_score\n\n\n    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=94)\n\n    clf = xgb.XGBClassifier(tree_method=\"hist\", early_stopping_rounds=3)\n\n    results = {}\n\n    for train, test in cv.split(X, y):\n        X_train = X[train]\n        X_test = X[test]\n        y_train = y[train]\n        y_test = y[test]\n        est, train_score, test_score = fit_and_score(\n            clone(clf), X_train, X_test, y_train, y_test\n        )\n        results[est] = (train_score, test_score)\n\n\n***********************************\nObtaining the native booster object\n***********************************\n\nThe sklearn estimator interface primarily facilitates training and doesn't implement all\nfeatures available in XGBoost. For instance, in order to have cached predictions,\n:py:class:`xgboost.DMatrix` needs to be used with :py:meth:`xgboost.Booster.predict`. One\ncan obtain the booster object from the sklearn interface using\n:py:meth:`xgboost.XGBModel.get_booster`:\n\n.. code-block:: python\n\n   booster = clf.get_booster()\n   print(booster.num_boosted_rounds())\n\n\n**********\nPrediction\n**********\n\nWhen early stopping is enabled, prediction functions including the\n:py:meth:`xgboost.XGBModel.predict`, :py:meth:`xgboost.XGBModel.score`, and\n:py:meth:`xgboost.XGBModel.apply` methods will use the best model automatically. Meaning\nthe :py:attr:`xgboost.XGBModel.best_iteration` is used to specify the range of trees used\nin prediction.\n\nTo have cached results for incremental prediction, please use the\n:py:meth:`xgboost.Booster.predict` method instead.\n\n\n**************************\nNumber of parallel threads\n**************************\n\nWhen working with XGBoost and other sklearn tools, you can specify how many threads you\nwant to use by using the ``n_jobs`` parameter. By default, XGBoost uses all the available\nthreads on your computer, which can lead to some interesting consequences when combined\nwith other sklearn functions like :py:func:`sklearn.model_selection.cross_validate`. If\nboth XGBoost and sklearn are set to use all threads, your computer may start to slow down\nsignificantly due to something called \"thread thrashing\". To avoid this, you can simply\nset the ``n_jobs`` parameter for XGBoost to `None` (which uses all threads) and the\n``n_jobs`` parameter for sklearn to `1`. This way, both programs will be able to work\ntogether smoothly without causing any unnecessary computer strain.\n"
  },
  {
    "path": "doc/requirements.txt",
    "content": "sphinx\nmock\nsphinx_rtd_theme>=1.0.0\nbreathe\nscikit-learn\nsh\nmatplotlib\ngraphviz\nnumpy\nscipy\nmyst-parser\nray[train]\nsphinx-gallery\nsphinx-issues\nsphinx-tabs\ndask\npyspark\ncloudpickle\nsetuptools\n"
  },
  {
    "path": "doc/sphinx_util.py",
    "content": "# -*- coding: utf-8 -*-\n\"\"\"Helper utility function for customization.\"\"\"\nimport os\nimport subprocess\nimport sys\n\nREADTHEDOCS_BUILD = (os.environ.get('READTHEDOCS', None) is not None)\n\nif not os.path.exists('web-data'):\n  subprocess.call('rm -rf web-data;' +\n                  'git clone https://github.com/dmlc/web-data', shell = True)\nelse:\n  subprocess.call('cd web-data; git pull', shell=True)\n\nsys.stderr.write('READTHEDOCS=%s\\n' % (READTHEDOCS_BUILD))\n"
  },
  {
    "path": "doc/treemethod.rst",
    "content": "############\nTree Methods\n############\n\nFor training boosted tree models, there are 2 parameters used for choosing algorithms,\nnamely ``updater`` and ``tree_method``.  XGBoost has 3 builtin tree methods, namely\n``exact``, ``approx`` and ``hist``.  Along with these tree methods, there are also some\nfree standing updaters including ``refresh``, ``prune`` and ``sync``.  The parameter\n``updater`` is more primitive than ``tree_method`` as the latter is just a\npre-configuration of the former.  The difference is mostly due to historical reasons that\neach updater requires some specific configurations and might have missing features.  As we\nare moving forward, the gap between them is becoming more and more irrelevant.  We will\ncollectively document them under tree methods.\n\n**************\nExact Solution\n**************\n\nExact means XGBoost considers all candidates from data for tree splitting, but underlying\nthe objective is still interpreted as a Taylor expansion.\n\n1. ``exact``: The vanilla gradient boosting tree algorithm described in `reference paper\n   <http://arxiv.org/abs/1603.02754>`_.  During split-finding, it iterates over all\n   entries of input data.  It's more accurate (among other greedy methods) but\n   computationally slower in compared to other tree methods.  Further more, its feature\n   set is limited. Features like distributed training and external memory that require\n   approximated quantiles are not supported. This tree method can be used with the\n   parameter ``tree_method`` set to ``exact``.\n\n\n**********************\nApproximated Solutions\n**********************\n\nAs ``exact`` tree method is slow in computation performance and difficult to scale, we\noften employ approximated training algorithms.  These algorithms build a gradient\nhistogram for each node and iterate through the histogram instead of real dataset.  Here\nwe introduce the implementations in XGBoost.\n\n1. ``approx`` tree method: An approximation tree method described in `reference paper\n   <http://arxiv.org/abs/1603.02754>`_.  It runs sketching before building each tree\n   using all the rows (rows belonging to the root). Hessian is used as weights during\n   sketch.  The algorithm can be accessed by setting ``tree_method`` to ``approx``.\n\n2. ``hist`` tree method: An approximation tree method used in LightGBM with slight\n   differences in implementation.  It runs sketching before training using only user\n   provided weights instead of hessian.  The subsequent per-node histogram is built upon\n   this global sketch.  This is the fastest algorithm as it runs sketching only once.  The\n   algorithm can be accessed by setting ``tree_method`` to ``hist``.\n\n************\nImplications\n************\n\nSome objectives like ``reg:squarederror`` have constant hessian.  In this case, the\n``hist`` should be preferred as weighted sketching doesn't make sense with constant\nweights.  When using non-constant hessian objectives, sometimes ``approx`` yields better\naccuracy, but with slower computation performance.  Most of the time using ``hist`` with\nhigher ``max_bin`` can achieve similar or even superior accuracy while maintaining good\nperformance.  However, as xgboost is largely driven by community effort, the actual\nimplementations have some differences than pure math description.  Result might be\nslightly different than expectation, which we are currently trying to overcome.\n\n**************\nOther Updaters\n**************\n\n1. ``Prune``: It prunes the existing trees.  ``prune`` is usually used as part of other\n   tree methods.  To use pruner independently, one needs to set the process type to update\n   by: ``{\"process_type\": \"update\", \"updater\": \"prune\"}``.  With this set of parameters,\n   during training, XGBoost will prune the existing trees according to 2 parameters\n   ``min_split_loss (gamma)`` and ``max_depth``.\n\n2. ``Refresh``: Refresh the statistic of built trees on a new training dataset.  Like the\n   pruner, To use refresh independently, one needs to set the process type to update:\n   ``{\"process_type\": \"update\", \"updater\": \"refresh\"}``.  During training, the updater\n   will change statistics like ``cover`` and ``weight`` according to the new training\n   dataset.  When ``refresh_leaf`` is also set to true (default), XGBoost will update the\n   leaf value according to the new leaf weight, but the tree structure (split condition)\n   itself doesn't change.\n\n   There are examples on both training continuation (adding new trees) and using update\n   process on ``demo/guide-python``.  Also checkout the ``process_type`` parameter in\n   :doc:`parameter`.\n\n3. ``Sync``: Synchronize the tree among workers when running distributed training.\n\n****************\nRemoved Updaters\n****************\n\n3 Updaters were removed during development due to maintainability.  We describe them here\nsolely for the interest of documentation.\n\n1. Distributed colmaker, which was a distributed version of exact tree method.  It\n   required specialization for column based splitting strategy and a different prediction\n   procedure.  As the exact tree method is slow by itself and scaling is even less\n   efficient, we removed it entirely.\n\n2. ``skmaker``.  Per-node weighted sketching employed by ``grow_local_histmaker`` is slow,\n   the ``skmaker`` was unmaintained and seems to be a workaround trying to eliminate the\n   histogram creation step and uses sketching values directly during split evaluation.  It\n   was never tested and contained some unknown bugs, we decided to remove it and focus our\n   resources on more promising algorithms instead.  For accuracy, most of the time\n   ``approx`` and ``hist`` are enough with some parameters tuning, so removing them don't\n   have any real practical impact.\n\n3. ``grow_local_histmaker`` updater: An approximation tree method described in `reference\n   paper <http://arxiv.org/abs/1603.02754>`_.  This updater was rarely used in practice so\n   it was still an updater rather than tree method.  During split finding, it first runs a\n   weighted GK sketching for data points belong to current node to find split candidates,\n   using hessian as weights.  The histogram is built upon this per-node sketch.  It was\n   faster than ``exact`` in some applications, but still slow in computation.  It was\n   removed because it depended on Rabit's customized reduction function that handles all\n   the data structure that can be serialized/deserialized into fixed size buffer, which is\n   not directly supported by NCCL or federated learning gRPC, making it hard to refactor\n   into a common allreducer interface.\n\n**************\nFeature Matrix\n**************\n\nFollowing table summarizes some differences in supported features between 4 tree methods,\n`T` means supported while `F` means unsupported.\n\n+------------------+-----------+---------------------+------------------------+------------------------+\n|                  | Exact     | Approx              | Approx (GPU)           | Hist                   |\n+==================+===========+=====================+========================+========================+\n| grow_policy      | Depthwise | depthwise/lossguide | depthwise/lossguide    | depthwise/lossguide    |\n+------------------+-----------+---------------------+------------------------+------------------------+\n| max_leaves       | F         | T                   | T                      | T                      |\n+------------------+-----------+---------------------+------------------------+------------------------+\n| sampling method  | uniform   | uniform             | gradient_based/uniform | gradient_based/uniform |\n+------------------+-----------+---------------------+------------------------+------------------------+\n| categorical data | F         | T                   | T                      | T                      |\n+------------------+-----------+---------------------+------------------------+------------------------+\n| External memory  | F         | T                   | P                      | T                      |\n+------------------+-----------+---------------------+------------------------+------------------------+\n| Distributed      | F         | T                   | T                      | T                      |\n+------------------+-----------+---------------------+------------------------+------------------------+\n\nFeatures/parameters that are not mentioned here are universally supported for all 3 tree\nmethods (for instance, column sampling and constraints).  The `P` in external memory means\nspecial handling.  Please note that both categorical data and external memory are\nexperimental.\n"
  },
  {
    "path": "doc/tutorials/advanced_custom_obj.rst",
    "content": "###################################\nAdvanced Usage of Custom Objectives\n###################################\n\n**Contents**\n\n.. contents::\n  :backlinks: none\n  :local:\n\n********\nOverview\n********\n\nXGBoost allows optimizing custom user-defined functions based on\ngradients and Hessians provided by the user for the desired objective function.\n\nIn order for a custom objective to work as intended:\n\n- The function to optimize must be smooth and twice differentiable.\n- The function must be additive with respect to rows / observations,\n  such as a likelihood function with i.i.d. assumptions.\n- The range of the scores for the function must be unbounded\n  (i.e. it should not work exclusively with positive numbers, for example).\n- The function must be convex. Note that, if the Hessian has negative\n  values, they will be clipped, which will likely result in a model\n  that does not fit the function well.\n- For multi-output objectives, there should not be dependencies between\n  different targets (i.e. Hessian should be diagonal for each row).\n\n\nSome of these limitations can nevertheless be worked around by foregoing\nthe true Hessian of the function, using something else instead such as an\napproximation with better properties - convergence might be slower when\nnot using the true Hessian of a function, but many theoretical guarantees\nshould still hold and result in usable models. For example, XGBoost's\ninternal implementation of multionomial logistic regression uses an upper\nbound on the Hessian with diagonal structure instead of the true Hessian\nwhich is a full square matrix for each row in the data.\n\nThis tutorial provides some suggestions for use-cases that do not perfectly\nfit the criteria outlined above, by showing how to solve a Dirichlet regression\nparameterized by concentrations.\n\nA Dirichlet regression model poses certain challenges for XGBoost:\n\n- Concentration parameters must be positive. An easy way to achieve this is\n  by applying an 'exp' transform on raw unbounded values, but in such case\n  the objective becomes non-convex. Furthermore, note that this function is\n  not in the exponential family, unlike typical distributions used for GLM\n  models.\n- The Hessian has dependencies between targets - that is, for a Dirichlet\n  distribution with 'k' parameters, each row will have a full Hessian matrix\n  of dimensions ``[k, k]``.\n- An optimal intercept for this type of model would involve a vector of\n  values rather than the same value for every target.\n\nIn order to use this type of model as a custom objetive:\n\n- It's possible to use the expected Hessian (a.k.a. the Fisher information\n  matrix or expected information) instead of the true Hessian. The expected\n  Hessian is always positive semi-definite for an additive likelihood, even\n  if the true Hessian isn't.\n- It's possible to use an upper bound on the expected Hessian with a diagonal\n  structure, such that a second-order approximation under this diagonal\n  bound would always yield greater or equal function values than under the\n  non-diagonal expected Hessian.\n- Since the ``base_score`` parameter that XGBoost uses for an intercept is\n  limited to a scalar, one can use the ``base_margin`` functionality instead,\n  but note that using it requires a bit more effort.\n\n*****************************\nDirichlet Regression Formulae\n*****************************\n\nThe Dirichlet distribution is a generalization of the Beta distribution to\nmultiple dimensions. It models proportions data in which the values sum to\n1, and is typically used as part of composite models (e.g. Dirichlet-multinomial)\nor as a prior in Bayesian models, but it also can be used on its own for\nproportions data for example.\n\nIts likelihood for a given observation with values ``y`` and a given prediction ``x``\nis given as follows:\n\n.. math::\n    L(\\mathbf{y} | \\mathbf{x}) = \\frac{1}{\\beta(\\mathbf{x})} \\prod_{i=1}^k y_i^{x_i - 1}\n\nWhere:\n\n.. math::\n  \\beta(\\mathbf{x}) = \\frac{ \\prod_{i=1}^k \\Gamma(x_i) }{\\Gamma( \\sum_{i=1}^k x_i )}\n\n\nIn this case, we want to optimize the negative of the log-likelihood summed across rows.\nThe resulting function, gradient and Hessian could be implemented as follows:\n\n.. tabs::\n    .. code-tab:: py\n\n        import numpy as np\n        from scipy.special import loggamma, psi as digamma, polygamma\n        trigamma = lambda x: polygamma(1, x)\n\n        def dirichlet_fun(pred: np.ndarray, Y: np.ndarray) -> float:\n            epred = np.exp(pred)\n            sum_epred = np.sum(epred, axis=1, keepdims=True)\n            return (\n                loggamma(epred).sum()\n                - loggamma(sum_epred).sum()\n                - np.sum(np.log(Y) * (epred - 1))\n            )\n        def dirichlet_grad(pred: np.ndarray, Y: np.ndarray) -> np.ndarray:\n            epred = np.exp(pred)\n            return epred * (\n                digamma(epred)\n                - digamma(np.sum(epred, axis=1, keepdims=True))\n                - np.log(Y)\n            )\n        def dirichlet_hess(pred: np.ndarray, Y: np.ndarray) -> np.ndarray:\n            epred = np.exp(pred)\n            grad = dirichlet_grad(pred, Y)\n            k = Y.shape[1]\n            H = np.empty((pred.shape[0], k, k))\n            for row in range(pred.shape[0]):\n                H[row, :, :] = (\n                    - trigamma(epred[row].sum()) * np.outer(epred[row], epred[row])\n                    + np.diag(grad[row] + trigamma(epred[row]) * epred[row] ** 2)\n                )\n            return H\n\n    .. code-tab:: r R\n\n        softmax <- function(x) {\n            max.x <- max(x)\n            e <- exp(x - max.x)\n            return(e / sum(e))\n        }\n\n        dirichlet.fun <- function(pred, y) {\n            epred <- exp(pred)\n            sum_epred <- rowSums(epred)\n            return(\n                sum(lgamma(epred))\n                - sum(lgamma(sum_epred))\n                - sum(log(y) * (epred - 1))\n            )\n        }\n\n        dirichlet.grad <- function(pred, y) {\n            epred <- exp(pred)\n            return(\n                epred * (\n                    digamma(epred)\n                    - digamma(rowSums(epred))\n                    - log(y)\n                )\n            )\n        }\n\n        dirichlet.hess <- function(pred, y) {\n            epred <- exp(pred)\n            grad <- dirichlet.grad(pred, y)\n            k <- ncol(y)\n            H <- array(dim = c(nrow(y), k, k))\n            for (row in seq_len(nrow(y))) {\n                H[row, , ] <- (\n                    - trigamma(sum(epred[row,])) * tcrossprod(epred[row,])\n                    + diag(grad[row,] + trigamma(epred[row,]) * epred[row,]^2)\n                )\n            }\n            return(H)\n        }\n\n\nConvince yourself that the implementation is correct:\n\n.. tabs::\n    .. code-tab:: py\n\n        from math import isclose\n        from scipy import stats\n        from scipy.optimize import check_grad\n        from scipy.special import softmax\n\n        def gen_random_dirichlet(rng: np.random.Generator, m: int, k: int):\n            alpha = np.exp(rng.standard_normal(size=k))\n            return rng.dirichlet(alpha, size=m)\n\n        def test_dirichlet_fun_grad_hess():\n            k = 3\n            m = 10\n            rng = np.random.default_rng(seed=123)\n            Y = gen_random_dirichlet(rng, m, k)\n            x0 = rng.standard_normal(size=k)\n            for row in range(Y.shape[0]):\n                fun_row = dirichlet_fun(x0.reshape((1,-1)), Y[[row]])\n                ref_logpdf = stats.dirichlet.logpdf(\n                    Y[row] / Y[row].sum(), # <- avoid roundoff error\n                    np.exp(x0),\n                )\n                assert isclose(fun_row, -ref_logpdf)\n\n                gdiff = check_grad(\n                    lambda pred: dirichlet_fun(pred.reshape((1,-1)), Y[[row]]),\n                    lambda pred: dirichlet_grad(pred.reshape((1,-1)), Y[[row]]),\n                    x0\n                )\n                assert gdiff <= 1e-6\n\n                H_numeric = np.empty((k,k))\n                eps = 1e-7\n                for ii in range(k):\n                    x0_plus_eps = x0.reshape((1,-1)).copy()\n                    x0_plus_eps[0,ii] += eps\n                    for jj in range(k):\n                        H_numeric[ii, jj] = (\n                            dirichlet_grad(x0_plus_eps, Y[[row]])[0][jj]\n                            - dirichlet_grad(x0.reshape((1,-1)), Y[[row]])[0][jj]\n                        ) / eps\n                H = dirichlet_hess(x0.reshape((1,-1)), Y[[row]])[0]\n                np.testing.assert_almost_equal(H, H_numeric, decimal=6)\n        test_dirichlet_fun_grad_hess()\n\n    .. code-tab:: r R\n\n        library(DirichletReg)\n        library(testthat)\n\n        test_that(\"dirichlet formulae\", {\n            k <- 3L\n            m <- 10L\n            set.seed(123)\n            alpha <- exp(rnorm(k))\n            y <- rdirichlet(m, alpha)\n            x0 <- rnorm(k)\n\n            for (row in seq_len(m)) {\n                logpdf <- dirichlet.fun(matrix(x0, nrow=1), y[row,,drop=F])\n                ref_logpdf <- ddirichlet(y[row,,drop=F], exp(x0), log = T)\n                expect_equal(logpdf, -ref_logpdf)\n\n                eps <- 1e-7\n                grad_num <- numeric(k)\n                for (col in seq_len(k)) {\n                    xplus <- x0\n                    xplus[col] <- x0[col] + eps\n                    grad_num[col] <- (\n                        dirichlet.fun(matrix(xplus, nrow=1), y[row,,drop=F])\n                        - dirichlet.fun(matrix(x0, nrow=1), y[row,,drop=F])\n                    ) / eps\n                }\n\n                grad <- dirichlet.grad(matrix(x0, nrow=1), y[row,,drop=F])\n                expect_equal(grad |> as.vector(), grad_num, tolerance=1e-6)\n\n                H_numeric <- array(dim=c(k, k))\n                for (ii in seq_len(k)) {\n                    xplus <- x0\n                    xplus[ii] <- x0[ii] + eps\n                    for (jj in seq_len(k)) {\n                        H_numeric[ii, jj] <- (\n                            dirichlet.grad(matrix(xplus, nrow=1), y[row,,drop=F])[1, jj]\n                            - grad[1L, jj]\n                        ) / eps\n                    }\n                }\n\n                H <- dirichlet.hess(matrix(xplus, nrow=1), y[row,,drop=F])\n                expect_equal(H[1,,], H_numeric, tolerance=1e-6)\n            }\n        })\n\n******************************************\nDirichlet Regression as Objective Function\n******************************************\n\nAs mentioned earlier, the Hessian of this function is problematic for\nXGBoost: it can have a negative determinant, and might even have negative\nvalues in the diagonal, which is problematic for optimization methods - in\nXGBoost, those values would be clipped and the resulting model might not\nend up producing sensible predictions.\n\nA potential workaround is to use the expected Hessian instead - that is,\nthe expected outer product of the gradient if the response variable were\ndistributed according to what is predicted. See the Wikipedia article\nfor more information:\n\n`<https://en.wikipedia.org/wiki/Fisher_information>`_\n\nIn general, for objective functions in the exponential family, this is easy\nto obtain from the gradient of the link function and the variance of the\nprobability distribution, but for other functions in general, it might\ninvolve other types of calculations (e.g. covariances and covariances of\nlogarithms for Dirichlet).\n\nIt nevertheless results in a form very similar to the Hessian. One can also\nsee from the differences here that, at an optimal point (gradient being zero),\nthe expected and true Hessian for Dirichlet will match, which is a nice\nproperty for optimization (i.e. the Hessian will be positive at a stationary\npoint, which means it will be a minimum rather than a maximum or saddle point).\n\n.. tabs::\n    .. code-tab:: py\n\n        def dirichlet_expected_hess(pred: np.ndarray) -> np.ndarray:\n            epred = np.exp(pred)\n            k = pred.shape[1]\n            Ehess = np.empty((pred.shape[0], k, k))\n            for row in range(pred.shape[0]):\n                Ehess[row, :, :] = (\n                    - trigamma(epred[row].sum()) * np.outer(epred[row], epred[row])\n                    + np.diag(trigamma(epred[row]) * epred[row] ** 2)\n                )\n            return Ehess\n        def test_dirichlet_expected_hess():\n            k = 3\n            rng = np.random.default_rng(seed=123)\n            x0 = rng.standard_normal(size=k)\n            y_sample = rng.dirichlet(np.exp(x0), size=5_000_000)\n            x_broadcast = np.broadcast_to(x0, (y_sample.shape[0], k))\n            g_sample = dirichlet_grad(x_broadcast, y_sample)\n            ref = (g_sample.T @ g_sample) / y_sample.shape[0]\n            Ehess = dirichlet_expected_hess(x0.reshape((1,-1)))[0]\n            np.testing.assert_almost_equal(Ehess, ref, decimal=2)\n        test_dirichlet_expected_hess()\n\n    .. code-tab:: r R\n\n        dirichlet.expected.hess <- function(pred) {\n            epred <- exp(pred)\n            k <- ncol(pred)\n            H <- array(dim = c(nrow(pred), k, k))\n            for (row in seq_len(nrow(pred))) {\n                H[row, , ] <- (\n                    - trigamma(sum(epred[row,])) * tcrossprod(epred[row,])\n                    + diag(trigamma(epred[row,]) * epred[row,]^2)\n                )\n            }\n            return(H)\n        }\n\n        test_that(\"expected hess\", {\n            k <- 3L\n            set.seed(123)\n            x0 <- rnorm(k)\n            alpha <- exp(x0)\n            n.samples <- 5e6\n            y.samples <- rdirichlet(n.samples, alpha)\n\n            x.broadcast <- rep(x0, n.samples) |> matrix(ncol=k, byrow=T)\n            grad.samples <- dirichlet.grad(x.broadcast, y.samples)\n            ref <- crossprod(grad.samples) / n.samples\n            Ehess <- dirichlet.expected.hess(matrix(x0, nrow=1))\n            expect_equal(Ehess[1,,], ref, tolerance=1e-2)\n        })\n\nBut note that this is still not usable for XGBoost, since the expected\nHessian, just like the true Hessian, has shape ``[nrows, k, k]``, while\nXGBoost requires something with shape ``[nrows, k]``.\n\nOne may use the diagonal of the expected Hessian for each row, but it's\npossible to do better: one can use instead an upper bound with diagonal\nstructure, since it should lead to better convergence properties, just like\nfor other Hessian-based optimization methods.\n\nIn the absence of any obvious way of obtaining an upper bound, a possibility\nhere is to construct such a bound numerically based directly on the definition\nof a diagonally dominant matrix:\n\n`<https://en.wikipedia.org/wiki/Diagonally_dominant_matrix>`_\n\nThat is: take the absolute value of the expected Hessian for each row of the data,\nand sum by rows of the ``[k, k]``-shaped Hessian for that row in the data:\n\n.. tabs::\n    .. code-tab:: py\n\n        def dirichlet_diag_upper_bound_expected_hess(\n            pred: np.ndarray, Y: np.ndarray\n        ) -> np.ndarray:\n            Ehess = dirichlet_expected_hess(pred)\n            diag_bound_Ehess = np.empty((pred.shape[0], Y.shape[1]))\n            for row in range(pred.shape[0]):\n                diag_bound_Ehess[row, :] = np.abs(Ehess[row, :, :]).sum(axis=1)\n            return diag_bound_Ehess\n\n    .. code-tab:: r R\n\n        dirichlet.diag.upper.bound.expected.hess <- function(pred, y) {\n            Ehess <- dirichlet.expected.hess(pred)\n            diag.bound.Ehess <- array(dim=dim(pred))\n            for (row in seq_len(nrow(pred))) {\n                diag.bound.Ehess[row,] <- abs(Ehess[row,,]) |> rowSums()\n            }\n            return(diag.bound.Ehess)\n        }\n\n(*note: the calculation can be made more efficiently than what is shown here\nby not calculating the full matrix, and in R, by making the rows be the last\ndimension and transposing after the fact*)\n\nWith all these pieces in place, one can now frame this model into the format\nrequired for XGBoost's custom objectives:\n\n.. tabs::\n    .. code-tab:: py\n\n        import xgboost as xgb\n        from typing import Tuple\n\n        def dirichlet_xgb_objective(\n            pred: np.ndarray, dtrain: xgb.DMatrix\n        ) -> Tuple[np.ndarray, np.ndarray]:\n            Y = dtrain.get_label().reshape(pred.shape)\n            return (\n                dirichlet_grad(pred, Y),\n                dirichlet_diag_upper_bound_expected_hess(pred, Y),\n            )\n\n    .. code-tab:: r R\n\n        library(xgboost)\n\n        dirichlet.xgb.objective <- function(pred, dtrain) {\n            y <- getinfo(dtrain, \"label\")\n            return(\n                list(\n                    grad = dirichlet.grad(pred, y),\n                    hess = dirichlet.diag.upper.bound.expected.hess(pred, y)\n                )\n            )\n        }\n\nAnd for an evaluation metric monitoring based on the Dirichlet log-likelihood:\n\n.. tabs::\n    .. code-tab:: py\n\n        def dirichlet_eval_metric(\n            pred: np.ndarray, dtrain: xgb.DMatrix\n        ) -> Tuple[str, float]:\n            Y = dtrain.get_label().reshape(pred.shape)\n            return \"dirichlet_ll\", dirichlet_fun(pred, Y)\n\n    .. code-tab:: r R\n\n        dirichlet.eval.metric <- function(pred, dtrain) {\n            y <- getinfo(dtrain, \"label\")\n            ll <- dirichlet.fun(pred, y)\n            return(\n                list(\n                    metric = \"dirichlet_ll\",\n                    value = ll\n                )\n            )\n        }\n\n*****************\nPractical Example\n*****************\n\nA good source for test datasets for proportions data is the R package ``DirichletReg``:\n\n`<https://cran.r-project.org/package=DirichletReg>`_\n\nFor this example, we'll now use the Arctic Lake dataset\n(Aitchison, J. (2003). The Statistical Analysis of Compositional Data. The Blackburn Press, Caldwell, NJ.),\ntaken from the ``DirichletReg`` R package, which consists of 39 rows with one predictor variable 'depth'\nand a three-valued response variable denoting the sediment composition of the measurements in this arctic\nlake (sand, silt, clay).\n\nThe data:\n\n.. tabs::\n    .. code-tab:: py\n\n        # depth\n        X = np.array([\n            10.4,11.7,12.8,13,15.7,16.3,18,18.7,20.7,22.1,\n            22.4,24.4,25.8,32.5,33.6,36.8,37.8,36.9,42.2,47,\n            47.1,48.4,49.4,49.5,59.2,60.1,61.7,62.4,69.3,73.6,\n            74.4,78.5,82.9,87.7,88.1,90.4,90.6,97.7,103.7,\n        ]).reshape((-1,1))\n        # sand, silt, clay\n        Y = np.array([\n            [0.775,0.195,0.03], [0.719,0.249,0.032], [0.507,0.361,0.132],\n            [0.522,0.409,0.066], [0.7,0.265,0.035], [0.665,0.322,0.013],\n            [0.431,0.553,0.016], [0.534,0.368,0.098], [0.155,0.544,0.301],\n            [0.317,0.415,0.268], [0.657,0.278,0.065], [0.704,0.29,0.006],\n            [0.174,0.536,0.29], [0.106,0.698,0.196], [0.382,0.431,0.187],\n            [0.108,0.527,0.365], [0.184,0.507,0.309], [0.046,0.474,0.48],\n            [0.156,0.504,0.34], [0.319,0.451,0.23], [0.095,0.535,0.37],\n            [0.171,0.48,0.349], [0.105,0.554,0.341], [0.048,0.547,0.41],\n            [0.026,0.452,0.522], [0.114,0.527,0.359], [0.067,0.469,0.464],\n            [0.069,0.497,0.434], [0.04,0.449,0.511], [0.074,0.516,0.409],\n            [0.048,0.495,0.457], [0.045,0.485,0.47], [0.066,0.521,0.413],\n            [0.067,0.473,0.459], [0.074,0.456,0.469], [0.06,0.489,0.451],\n            [0.063,0.538,0.399], [0.025,0.48,0.495], [0.02,0.478,0.502],\n        ])\n\n    .. code-tab:: r R\n\n        data(\"ArcticLake\", package=\"DirichletReg\")\n        x <- ArcticLake[, c(\"depth\"), drop=F]\n        y <- ArcticLake[, c(\"sand\", \"silt\", \"clay\")] |> as.matrix()\n\nFitting an XGBoost model and making predictions:\n\n.. tabs::\n    .. code-tab:: py\n\n        from typing import Dict, List\n\n        dtrain = xgb.DMatrix(X, label=Y)\n        results: Dict[str, Dict[str, List[float]]] = {}\n        booster = xgb.train(\n            params={\n                \"tree_method\": \"hist\",\n                \"num_target\": Y.shape[1],\n                \"base_score\": 0,\n                \"disable_default_eval_metric\": True,\n                \"max_depth\": 3,\n                \"seed\": 123,\n            },\n            dtrain=dtrain,\n            num_boost_round=10,\n            obj=dirichlet_xgb_objective,\n            evals=[(dtrain, \"Train\")],\n            evals_result=results,\n            custom_metric=dirichlet_eval_metric,\n        )\n        yhat = softmax(booster.inplace_predict(X), axis=1)\n\n    .. code-tab:: r R\n\n        dtrain <- xgb.DMatrix(x, y)\n        booster <- xgb.train(\n            params = list(\n                tree_method=\"hist\",\n                num_target=ncol(y),\n                base_score=0,\n                disable_default_eval_metric=TRUE,\n                max_depth=3,\n                seed=123\n            ),\n            data = dtrain,\n            nrounds = 10,\n            obj = dirichlet.xgb.objective,\n            evals = list(Train=dtrain),\n            eval_metric = dirichlet.eval.metric\n        )\n        raw.pred <- predict(booster, x, reshape=TRUE)\n        yhat <- apply(raw.pred, 1, softmax) |> t()\n\n\nShould produce an evaluation log as follows (note: the function is decreasing as\nexpected - but unlike other objectives, the minimum value here can reach below zero):\n\n.. code-block:: none\n\n    [0] Train-dirichlet_ll:-40.25009\n    [1] Train-dirichlet_ll:-47.69122\n    [2] Train-dirichlet_ll:-52.64620\n    [3] Train-dirichlet_ll:-56.36977\n    [4] Train-dirichlet_ll:-59.33048\n    [5] Train-dirichlet_ll:-61.93359\n    [6] Train-dirichlet_ll:-64.17280\n    [7] Train-dirichlet_ll:-66.29709\n    [8] Train-dirichlet_ll:-68.21001\n    [9] Train-dirichlet_ll:-70.03442\n\nOne can confirm that the obtained ``yhat`` resembles the actual concentrations\nto a large degree, beyond what would be expected from random predictions by a\nsimple look at both ``yhat`` and ``Y``.\n\nFor better results, one might want to add an intercept. XGBoost only\nallows using scalars for intercepts, but for a vector-valued model,\nthe optimal intercept should also have vector form.\n\nThis can be done by supplying ``base_margin`` instead - unlike the\nintercept, one must specifically supply values for every row here,\nand said ``base_margin`` must be supplied again at the moment of making\npredictions (i.e. does not get added automatically like ``base_score``\ndoes).\n\nFor the case of a Dirichlet model, the optimal intercept can be obtained\nefficiently using a general solver (e.g. SciPy's Newton solver) with\ndedicated likelihood, gradient and Hessian functions for just the intercept part.\nFurther, note that if one frames it instead as bounded optimization without\napplying 'exp' transform to the concentrations, it becomes instead a convex\nproblem, for which the true Hessian can be used without issues in other\nclasses of solvers.\n\nFor simplicity, this example will nevertheless reuse the same likelihood\nand gradient functions that were defined earlier alongside with SciPy's / R's\nL-BFGS solver to obtain the optimal vector-valued intercept:\n\n.. tabs::\n    .. code-tab:: py\n\n        from scipy.optimize import minimize\n\n        def get_optimal_intercepts(Y: np.ndarray) -> np.ndarray:\n            k = Y.shape[1]\n            res = minimize(\n                fun=lambda pred: dirichlet_fun(\n                    np.broadcast_to(pred, (Y.shape[0], k)),\n                    Y\n                ),\n                x0=np.zeros(k),\n                jac=lambda pred: dirichlet_grad(\n                    np.broadcast_to(pred, (Y.shape[0], k)),\n                    Y\n                ).sum(axis=0)\n            )\n            return res[\"x\"]\n        intercepts = get_optimal_intercepts(Y)\n\n    .. code-tab:: r R\n\n        get.optimal.intercepts <- function(y) {\n            k <- ncol(y)\n            broadcast.vec <- function(x) rep(x, nrow(y)) |> matrix(ncol=k, byrow=T)\n            res <- optim(\n                par = numeric(k),\n                fn = function(x) dirichlet.fun(broadcast.vec(x), y),\n                gr = function(x) dirichlet.grad(broadcast.vec(x), y) |> colSums(),\n                method = \"L-BFGS-B\"\n            )\n            return(res$par)\n        }\n        intercepts <- get.optimal.intercepts(y)\n\n\nNow fitting a model again, this time with the intercept:\n\n.. tabs::\n    .. code-tab:: py\n\n        base_margin = np.broadcast_to(intercepts, Y.shape)\n        dtrain_w_intercept = xgb.DMatrix(X, label=Y, base_margin=base_margin)\n        results: Dict[str, Dict[str, List[float]]] = {}\n        booster = xgb.train(\n            params={\n                \"tree_method\": \"hist\",\n                \"num_target\": Y.shape[1],\n                \"base_score\": 0,\n                \"disable_default_eval_metric\": True,\n                \"max_depth\": 3,\n                \"seed\": 123,\n            },\n            dtrain=dtrain_w_intercept,\n            num_boost_round=10,\n            obj=dirichlet_xgb_objective,\n            evals=[(dtrain, \"Train\")],\n            evals_result=results,\n            custom_metric=dirichlet_eval_metric,\n        )\n        yhat = softmax(\n            booster.predict(\n                xgb.DMatrix(X, base_margin=base_margin)\n            ),\n            axis=1\n        )\n\n    .. code-tab:: r R\n\n        base.margin <- rep(intercepts, nrow(y)) |> matrix(nrow=nrow(y), byrow=T)\n        dtrain <- xgb.DMatrix(x, y, base_margin=base.margin)\n        booster <- xgb.train(\n            params = list(\n                tree_method=\"hist\",\n                num_target=ncol(y),\n                base_score=0,\n                disable_default_eval_metric=TRUE,\n                max_depth=3,\n                seed=123\n            ),\n            data = dtrain,\n            nrounds = 10,\n            obj = dirichlet.xgb.objective,\n            evals = list(Train=dtrain),\n            eval_metric = dirichlet.eval.metric\n        )\n        raw.pred <- predict(\n            booster,\n            x,\n            base_margin=base.margin,\n            reshape=TRUE\n        )\n        yhat <- apply(raw.pred, 1, softmax) |> t()\n\n.. code-block:: none\n\n    [0] Train-dirichlet_ll:-37.01861\n    [1] Train-dirichlet_ll:-42.86120\n    [2] Train-dirichlet_ll:-46.55133\n    [3] Train-dirichlet_ll:-49.15111\n    [4] Train-dirichlet_ll:-51.02638\n    [5] Train-dirichlet_ll:-52.53880\n    [6] Train-dirichlet_ll:-53.77409\n    [7] Train-dirichlet_ll:-54.88851\n    [8] Train-dirichlet_ll:-55.95961\n    [9] Train-dirichlet_ll:-56.95497\n\nFor this small example problem, predictions should be very similar between the\ntwo and the version without intercepts achieved a lower objective function in the\ntraining data (for the Python version at least), but for more serious usage with\nreal-world data, one is likely to observe better results when adding the intercepts.\n"
  },
  {
    "path": "doc/tutorials/aft_survival_analysis.rst",
    "content": "###############################################\nSurvival Analysis with Accelerated Failure Time\n###############################################\n\n.. contents::\n  :local:\n  :backlinks: none\n\n**************************\nWhat is survival analysis?\n**************************\n\n**Survival analysis (regression)** models **time to an event of interest**. Survival analysis is a special kind of regression and differs from the conventional regression task as follows:\n\n* The label is always positive, since you cannot wait a negative amount of time until the event occurs.\n* The label may not be fully known, or **censored**, because \"it takes time to measure time.\"\n\nThe second bullet point is crucial and we should dwell on it more. As you may have guessed from the name, one of the earliest applications of survival analysis is to model mortality of a given population. Let's take `NCCTG Lung Cancer Dataset <https://stat.ethz.ch/R-manual/R-devel/library/survival/html/lung.html>`_ as an example. The first 8 columns represent features and the last column, Time to death, represents the label.\n\n==== === === ======= ======== ========= ======== ======= ========================\nInst Age Sex ph.ecog ph.karno pat.karno meal.cal wt.loss **Time to death (days)**\n==== === === ======= ======== ========= ======== ======= ========================\n3    74  1   1       90       100       1175     N/A     306\n3    68  1   0       90       90        1225     15      455\n3    56  1   0       90       90        N/A      15      :math:`[1010, +\\infty)`\n5    57  1   1       90       60        1150     11      210\n1    60  1   0       100      90        N/A      0       883\n12   74  1   1       50       80        513      0       :math:`[1022, +\\infty)`\n7    68  2   2       70       60        384      10      310\n==== === === ======= ======== ========= ======== ======= ========================\n\nTake a close look at the label for the third patient. **His label is a range, not a single number.** The third patient's label is said to be **censored**, because for some reason the experimenters could not get a complete measurement for that label. One possible scenario: the patient survived the first 1010 days and walked out of the clinic on the 1011th day, so his death was not directly observed. Another possibility: The experiment was cut short (since you cannot run it forever) before his death could be observed. In any case, his label is :math:`[1010, +\\infty)`, meaning his time to death can be any number that's higher than 1010, e.g. 2000, 3000, or 10000.\n\nThere are four kinds of censoring:\n\n* **Uncensored**: the label is not censored and given as a single number.\n* **Right-censored**: the label is of form :math:`[a, +\\infty)`, where :math:`a` is the lower bound.\n* **Left-censored**: the label is of form :math:`[0, b]`, where :math:`b` is the upper bound.\n* **Interval-censored**: the label is of form :math:`[a, b]`, where :math:`a` and :math:`b` are the lower and upper bounds, respectively.\n\nRight-censoring is the most commonly used.\n\n******************************\nAccelerated Failure Time model\n******************************\n**Accelerated Failure Time (AFT)** model is one of the most commonly used models in survival analysis. The model is of the following form:\n\n.. math::\n\n  \\ln{Y} = \\langle \\mathbf{w}, \\mathbf{x} \\rangle + \\sigma Z\n\nwhere\n\n* :math:`\\mathbf{x}` is a vector in :math:`\\mathbb{R}^d` representing the features.\n* :math:`\\mathbf{w}` is a vector consisting of :math:`d` coefficients, each corresponding to a feature.\n* :math:`\\langle \\cdot, \\cdot \\rangle` is the usual dot product in :math:`\\mathbb{R}^d`.\n* :math:`\\ln{(\\cdot)}` is the natural logarithm.\n* :math:`Y` and :math:`Z` are random variables.\n\n  - :math:`Y` is the output label.\n  - :math:`Z` is a random variable of a known probability distribution. Common choices are the normal distribution, the logistic distribution, and the extreme distribution. Intuitively, :math:`Z` represents the \"noise\" that pulls the prediction :math:`\\langle \\mathbf{w}, \\mathbf{x} \\rangle` away from the true log label :math:`\\ln{Y}`.\n\n* :math:`\\sigma` is a parameter that scales the size of :math:`Z`.\n\nNote that this model is a generalized form of a linear regression model :math:`Y = \\langle \\mathbf{w}, \\mathbf{x} \\rangle`. In order to make AFT work with gradient boosting, we revise the model as follows:\n\n.. math::\n\n  \\ln{Y} = \\mathcal{T}(\\mathbf{x}) + \\sigma Z\n\nwhere :math:`\\mathcal{T}(\\mathbf{x})` represents the output from a decision tree ensemble, given input :math:`\\mathbf{x}`. Since :math:`Z` is a random variable, we have a likelihood defined for the expression :math:`\\ln{Y} = \\mathcal{T}(\\mathbf{x}) + \\sigma Z`. So the goal for XGBoost is to maximize the (log) likelihood by fitting a good tree ensemble :math:`\\mathcal{T}(\\mathbf{x})`.\n\n**********\nHow to use\n**********\nThe first step is to express the labels in the form of a range, so that **every data point has two numbers associated with it, namely the lower and upper bounds for the label.** For uncensored labels, use a degenerate interval of form :math:`[a, a]`.\n\n.. |tick| unicode:: U+2714\n.. |cross| unicode:: U+2718\n\n================= ==================== =================== ===================\nCensoring type    Interval form        Lower bound finite? Upper bound finite?\n================= ==================== =================== ===================\nUncensored        :math:`[a, a]`       |tick|              |tick|\nRight-censored    :math:`[a, +\\infty)` |tick|              |cross|\nLeft-censored     :math:`[0, b]`       |tick|              |tick|\nInterval-censored :math:`[a, b]`       |tick|              |tick|\n================= ==================== =================== ===================\n\nCollect the lower bound numbers in one array (let's call it ``y_lower_bound``) and the upper bound number in another array (call it ``y_upper_bound``). The ranged labels are associated with a data matrix object via calls to :meth:`xgboost.DMatrix.set_float_info`:\n\n.. code-block:: python\n  :caption: Python\n\n  import numpy as np\n  import xgboost as xgb\n\n  # 4-by-2 Data matrix\n  X = np.array([[1, -1], [-1, 1], [0, 1], [1, 0]])\n  dtrain = xgb.DMatrix(X)\n\n  # Associate ranged labels with the data matrix.\n  # This example shows each kind of censored labels.\n  #                         uncensored    right     left  interval\n  y_lower_bound = np.array([      2.0,     3.0,     0.0,     4.0])\n  y_upper_bound = np.array([      2.0, +np.inf,     4.0,     5.0])\n  dtrain.set_float_info('label_lower_bound', y_lower_bound)\n  dtrain.set_float_info('label_upper_bound', y_upper_bound)\n\n.. code-block:: r\n  :caption: R\n\n  library(xgboost)\n\n  # 4-by-2 Data matrix\n  X <- matrix(c(1., -1., -1., 1., 0., 1., 1., 0.),\n              nrow=4, ncol=2, byrow=TRUE)\n  dtrain <- xgb.DMatrix(X)\n\n  # Associate ranged labels with the data matrix.\n  # This example shows each kind of censored labels.\n  #                   uncensored  right  left  interval\n  y_lower_bound <- c(        2.,    3.,   0.,       4.)\n  y_upper_bound <- c(        2.,  +Inf,   4.,       5.)\n  setinfo(dtrain, 'label_lower_bound', y_lower_bound)\n  setinfo(dtrain, 'label_upper_bound', y_upper_bound)\n\nNow we are ready to invoke the training API:\n\n.. code-block:: python\n  :caption: Python\n\n  params = {'objective': 'survival:aft',\n            'eval_metric': 'aft-nloglik',\n            'aft_loss_distribution': 'normal',\n            'aft_loss_distribution_scale': 1.20,\n            'tree_method': 'hist', 'learning_rate': 0.05, 'max_depth': 2}\n  bst = xgb.train(params, dtrain, num_boost_round=5,\n                  evals=[(dtrain, 'train')])\n\n.. code-block:: r\n  :caption: R\n\n  params <- list(objective='survival:aft',\n                 eval_metric='aft-nloglik',\n                 aft_loss_distribution='normal',\n                 aft_loss_distribution_scale=1.20,\n                 tree_method='hist',\n                 learning_rate=0.05,\n                 max_depth=2)\n  watchlist <- list(train = dtrain)\n  bst <- xgb.train(params, dtrain, nrounds=5, watchlist)\n\nWe set ``objective`` parameter to ``survival:aft`` and ``eval_metric`` to ``aft-nloglik``, so that the log likelihood for the AFT model would be maximized. (XGBoost will actually minimize the negative log likelihood, hence the name ``aft-nloglik``.)\n\nThe parameter ``aft_loss_distribution`` corresponds to the distribution of the :math:`Z` term in the AFT model, and ``aft_loss_distribution_scale`` corresponds to the scaling factor :math:`\\sigma`.\n\nCurrently, you can choose from three probability distributions for ``aft_loss_distribution``:\n\n========================= ===========================================\n``aft_loss_distribution`` Probability Density Function (PDF)\n========================= ===========================================\n``normal``                :math:`\\dfrac{\\exp{(-z^2/2)}}{\\sqrt{2\\pi}}`\n``logistic``              :math:`\\dfrac{e^z}{(1+e^z)^2}`\n``extreme``               :math:`e^z e^{-\\exp{z}}`\n========================= ===========================================\n\nNote that it is not yet possible to set the ranged label using the scikit-learn interface (e.g. :class:`xgboost.XGBRegressor`). For now, you should use :class:`xgboost.train` with :class:`xgboost.DMatrix`. For a collection of Python examples, see :doc:`/python/survival-examples/index`\n"
  },
  {
    "path": "doc/tutorials/c_api_tutorial.rst",
    "content": "##############\nC API Tutorial\n##############\n\nIn this tutorial, we are going to install XGBoost library & configure the CMakeLists.txt file of our C/C++ application to link XGBoost library with our application. Later on, we will see some useful tips for using C API and code snippets as examples to use various functions available in C API to perform basic task like loading, training model & predicting on test dataset. For API reference, please visit :doc:`/c`\n\n.. contents::\n  :backlinks: none\n  :local:\n\n************\nRequirements\n************\n\nInstall CMake - Follow the `cmake installation documentation <https://cmake.org/install/>`_ for instructions.\nInstall Conda - Follow the `conda installation  documentation <https://docs.conda.io/projects/conda/en/latest/user-guide/install/index.html>`_ for instructions\n\n*************************************\nInstall XGBoost on conda environment\n*************************************\n\nRun the following commands on your terminal. The below commands will install the XGBoost in your XGBoost folder of the repository cloned\n\n.. code-block:: bash\n\n    # clone the XGBoost repository & its submodules\n    git clone --recursive https://github.com/dmlc/xgboost\n    cd xgboost\n    # Activate the Conda environment, into which we'll install XGBoost\n    conda activate [env_name]\n    # Build the compiled version of XGBoost inside the build folder\n    cmake -B build -S . -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX\n    # install XGBoost in your conda environment (usually under [your home directory]/miniconda3)\n    cmake --build build --target install\n\n*********************************************************************\nConfigure CMakeList.txt file of your application to link with XGBoost\n*********************************************************************\n\nHere, we assume that your C++ application is using CMake for builds.\n\nUse ``find_package()`` and ``target_link_libraries()`` in your application's CMakeList.txt to link with the XGBoost library:\n\n.. code-block:: cmake\n\n    cmake_minimum_required(VERSION 3.18)\n    project(your_project_name LANGUAGES C CXX VERSION your_project_version)\n    find_package(xgboost REQUIRED)\n    add_executable(your_project_name /path/to/project_file.c)\n    target_link_libraries(your_project_name xgboost::xgboost)\n\nTo ensure that CMake can locate the XGBoost library, supply ``-DCMAKE_PREFIX_PATH=$CONDA_PREFIX`` argument when invoking CMake. This option instructs CMake to locate the XGBoost library in ``$CONDA_PREFIX``, which is where your Conda environment is located.\n\n.. code-block:: bash\n\n  # Activate the Conda environment where we previously installed XGBoost\n  conda activate [env_name]\n  # Invoke CMake with CMAKE_PREFIX_PATH\n  cmake -B build -S . -DCMAKE_PREFIX_PATH=$CONDA_PREFIX\n  # Build your application\n  cmake --build build\n\n************************\nUseful Tips To Remember\n************************\n\nBelow are some useful tips while using C API:\n\n1. Error handling: Always check the return value of the C API functions.\n\na. In a C application: Use the following macro to guard all calls to XGBoost's C API functions. The macro prints all the error/ exception occurred:\n\n.. highlight:: c\n   :linenothreshold: 5\n\n.. code-block:: c\n\n  #define safe_xgboost(call) {  \\\n    int err = (call); \\\n    if (err != 0) { \\\n      fprintf(stderr, \"%s:%d: error in %s: %s\\n\", __FILE__, __LINE__, #call, XGBGetLastError());  \\\n      exit(1); \\\n    } \\\n  }\n\nIn your application, wrap all C API function calls with the macro as follows:\n\n.. code-block:: c\n\n  DMatrixHandle train;\n  safe_xgboost(XGDMatrixCreateFromFile(\"/path/to/training/dataset/\", silent, &train));\n\nb. In a C++ application: modify the macro ``safe_xgboost`` to throw an exception upon an error.\n\n.. highlight:: cpp\n   :linenothreshold: 5\n\n.. code-block:: cpp\n\n  #define safe_xgboost(call) {  \\\n    int err = (call); \\\n    if (err != 0) { \\\n      throw std::runtime_error(std::string(__FILE__) + \":\" + std::to_string(__LINE__) + \\\n                          \": error in \" + #call + \":\" + XGBGetLastError());  \\\n    } \\\n  }\n\nc. Assertion technique: It works both in C/ C++. If expression evaluates to 0 (false), then the expression, source code filename, and line number are sent to the standard error, and then abort() function is called. It can be used to test assumptions made by you in the code.\n\n.. code-block:: c\n\n  DMatrixHandle dmat;\n  assert( XGDMatrixCreateFromFile(\"training_data.libsvm\", 0, &dmat) == 0);\n\n\n2. Always remember to free the allocated space by BoosterHandle & DMatrixHandle appropriately:\n\n.. code-block:: c\n\n    #include <assert.h>\n    #include <stdio.h>\n    #include <stdlib.h>\n    #include <xgboost/c_api.h>\n\n    int main(int argc, char** argv) {\n      int silent = 0;\n\n      BoosterHandle booster;\n\n      // do something with booster\n\n      //free the memory\n      XGBoosterFree(booster);\n\n      DMatrixHandle DMatrixHandle_param;\n\n      // do something with DMatrixHandle_param\n\n      // free the memory\n      XGDMatrixFree(DMatrixHandle_param);\n\n      return 0;\n    }\n\n\n3. For tree models, it is important to use consistent data formats during training and scoring/ predicting otherwise it will result in wrong outputs.\n   Example if we our training data is in ``dense matrix`` format then your prediction dataset should also be a ``dense matrix`` or if training in ``libsvm`` format then dataset for prediction should also be in ``libsvm`` format.\n\n\n4. Always use strings for setting values to the parameters in booster handle object. The parameter value can be of any data type (e.g. int, char, float, double, etc), but they should always be encoded as strings.\n\n.. code-block:: c\n\n    BoosterHandle booster;\n    XGBoosterSetParam(booster, \"parameter_name\", \"0.1\");\n\n\n**************************************************************\nSample examples along with Code snippet to use C API functions\n**************************************************************\n\n1. If the dataset is available in a file, it can be loaded into a ``DMatrix`` object using the :cpp:func:`XGDMatrixCreateFromFile`\n\n.. code-block:: c\n\n  DMatrixHandle data; // handle to DMatrix\n  // Load the data from file & store it in data variable of DMatrixHandle datatype\n  safe_xgboost(XGDMatrixCreateFromFile(\"/path/to/file/filename\", silent, &data));\n\n\n2. You can also create a ``DMatrix`` object from a 2D Matrix using the :cpp:func:`XGDMatrixCreateFromMat`\n\n.. code-block:: c\n\n  // 1D matrix\n  const int data1[] = { 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0 };\n\n  // 2D matrix\n  const int ROWS = 6, COLS = 3;\n  const int data2[ROWS][COLS] = { {1, 2, 3}, {2, 4, 6}, {3, -1, 9}, {4, 8, -1}, {2, 5, 1}, {0, 1, 5} };\n  DMatrixHandle dmatrix1, dmatrix2;\n  // Pass the matrix, no of rows & columns contained in the matrix variable\n  // here '0' represents the missing value in the matrix dataset\n  // dmatrix variable will contain the created DMatrix using it\n  safe_xgboost(XGDMatrixCreateFromMat(data1, 1, 50, 0, &dmatrix));\n  // here -1 represents the missing value in the matrix dataset\n  safe_xgboost(XGDMatrixCreateFromMat(data2, ROWS, COLS, -1, &dmatrix2));\n\n\n3. Create a Booster object for training & testing on dataset using :cpp:func:`XGBoosterCreate`\n\n.. code-block:: c\n\n  BoosterHandle booster;\n  const int eval_dmats_size;\n  // We assume that training and test data have been loaded into 'train' and 'test'\n  DMatrixHandle eval_dmats[eval_dmats_size] = {train, test};\n  safe_xgboost(XGBoosterCreate(eval_dmats, eval_dmats_size, &booster));\n\n\n4. For each ``DMatrix`` object, set the labels using :cpp:func:`XGDMatrixSetFloatInfo`. Later you can access the label using :cpp:func:`XGDMatrixGetFloatInfo`.\n\n.. code-block:: c\n\n  const int ROWS=5, COLS=3;\n  const int data[ROWS][COLS] = { {1, 2, 3}, {2, 4, 6}, {3, -1, 9}, {4, 8, -1}, {2, 5, 1}, {0, 1, 5} };\n  DMatrixHandle dmatrix;\n\n  safe_xgboost(XGDMatrixCreateFromMat(data, ROWS, COLS, -1, &dmatrix));\n\n  // variable to store labels for the dataset created from above matrix\n  float labels[ROWS];\n\n  for (int i = 0; i < ROWS; i++) {\n    labels[i] = i;\n  }\n\n  // Loading the labels\n  safe_xgboost(XGDMatrixSetFloatInfo(dmatrix, \"label\", labels, ROWS));\n\n  // reading the labels and store the length of the result\n  bst_ulong result_len;\n\n  // labels result\n  const float *result;\n\n  safe_xgboost(XGDMatrixGetFloatInfo(dmatrix, \"label\", &result_len, &result));\n\n  for(unsigned int i = 0; i < result_len; i++) {\n    printf(\"label[%i] = %f\\n\", i, result[i]);\n  }\n\n\n5. Set the parameters for the ``Booster`` object according to the requirement using :cpp:func:`XGBoosterSetParam` . Check out the full list of parameters available :doc:`here </parameter>` .\n\n.. code-block :: c\n\n    BoosterHandle booster;\n    safe_xgboost(XGBoosterSetParam(booster, \"booster\", \"gblinear\"));\n    // default max_depth =6\n    safe_xgboost(XGBoosterSetParam(booster, \"max_depth\", \"3\"));\n    // default eta  = 0.3\n    safe_xgboost(XGBoosterSetParam(booster, \"eta\", \"0.1\"));\n\n\n6. Train & evaluate the model using :cpp:func:`XGBoosterUpdateOneIter` and :cpp:func:`XGBoosterEvalOneIter` respectively.\n\n.. code-block:: c\n\n    int num_of_iterations = 20;\n    const char* eval_names[eval_dmats_size] = {\"train\", \"test\"};\n    const char* eval_result = NULL;\n\n    for (int i = 0; i < num_of_iterations; ++i) {\n      // Update the model performance for each iteration\n      safe_xgboost(XGBoosterUpdateOneIter(booster, i, train));\n\n      // Give the statistics for the learner for training & testing dataset in terms of error after each iteration\n      safe_xgboost(XGBoosterEvalOneIter(booster, i, eval_dmats, eval_names, eval_dmats_size, &eval_result));\n      printf(\"%s\\n\", eval_result);\n    }\n\n.. note:: For customized loss function, use :cpp:func:`XGBoosterBoostOneIter` instead and manually specify the gradient and 2nd order gradient.\n\n\n7.  Predict the result on a test set using :cpp:func:`XGBoosterPredictFromDMatrix`\n\n.. code-block:: c\n\n    char const config[] =\n        \"{\\\"training\\\": false, \\\"type\\\": 0, \"\n        \"\\\"iteration_begin\\\": 0, \\\"iteration_end\\\": 0, \\\"strict_shape\\\": false}\";\n    /* Shape of output prediction */\n    uint64_t const* out_shape;\n    /* Dimension of output prediction */\n    uint64_t out_dim;\n    /* Pointer to a thread local contiguous array, assigned in prediction function. */\n    float const* out_result = NULL;\n    safe_xgboost(\n        XGBoosterPredictFromDMatrix(booster, dmatrix, config, &out_shape, &out_dim, &out_result));\n\n    for (unsigned int i = 0; i < output_length; i++){\n      printf(\"prediction[%i] = %f \\n\", i, output_result[i]);\n    }\n\n\n8. Get the number of features in your dataset using :cpp:func:`XGBoosterGetNumFeature`.\n\n.. code-block:: c\n\n    bst_ulong num_of_features = 0;\n\n    // Assuming booster variable of type BoosterHandle is already declared\n    // and dataset is loaded and trained on booster\n    // storing the results in num_of_features variable\n    safe_xgboost(XGBoosterGetNumFeature(booster, &num_of_features));\n\n    // Printing number of features by type conversion of num_of_features variable from bst_ulong to unsigned long\n    printf(\"num_feature: %lu\\n\", (unsigned long)(num_of_features));\n\n\n\n9. Save the model using :cpp:func:`XGBoosterSaveModel`\n\n.. code-block:: c\n\n    BoosterHandle booster;\n    const char *model_path = \"/path/of/model.json\";\n    safe_xgboost(XGBoosterSaveModel(booster, model_path));\n\n\n10. Load the model using :cpp:func:`XGBoosterLoadModel`\n\n.. code-block:: c\n\n    BoosterHandle booster;\n    const char *model_path = \"/path/of/model.json\";\n\n    // create booster handle first\n    safe_xgboost(XGBoosterCreate(NULL, 0, &booster));\n\n    // set the model parameters here\n\n    // load model\n    safe_xgboost(XGBoosterLoadModel(booster, model_path));\n\n    // predict the model here\n\n\n11. Free all the internal structure used in your code using :cpp:func:`XGDMatrixFree` and :cpp:func:`XGBoosterFree`. This step is important to prevent memory leak.\n\n.. code-block:: c\n\n  safe_xgboost(XGDMatrixFree(dmatrix));\n  safe_xgboost(XGBoosterFree(booster));\n"
  },
  {
    "path": "doc/tutorials/categorical.rst",
    "content": "################\nCategorical Data\n################\n\n**Contents**\n\n.. contents::\n  :backlinks: none\n  :local:\n\nSince version 1.5, XGBoost has support for categorical data.  For numerical data, the\nsplit condition is defined as :math:`value < threshold`, while for categorical data the\nsplit is defined depending on whether partitioning or onehot encoding is used. For\npartition-based splits, the splits are specified as :math:`value \\in categories`, where\n``categories`` is the set of categories in one feature.  If onehot encoding is used\ninstead, then the split is defined as :math:`value == category`. More advanced categorical\nsplit strategy is planned for future releases and this tutorial details how to inform\nXGBoost about the data type.\n\n\n************************************\nTraining with scikit-learn Interface\n************************************\n\nThe easiest way to pass categorical data into XGBoost is using dataframe and the\n``scikit-learn`` interface like :class:`XGBClassifier <xgboost.XGBClassifier>`.  For\npreparing the data, users need to specify the data type of input predictor as\n``category``.  For ``pandas/cudf Dataframe``, this can be achieved by\n\n.. code:: python\n\n  X[\"cat_feature\"].astype(\"category\")\n\nfor all columns that represent categorical features.  After which, users can tell XGBoost\nto enable training with categorical data.  Assuming that you are using the\n:class:`XGBClassifier <xgboost.XGBClassifier>` for classification problem, specify the\nparameter ``enable_categorical``:\n\n.. code:: python\n\n  # Supported tree methods are `approx` and `hist`.\n  clf = xgb.XGBClassifier(tree_method=\"hist\", enable_categorical=True, device=\"cuda\")\n  # X is the dataframe we created in previous snippet\n  clf.fit(X, y)\n  # Must use JSON/UBJSON for serialization, otherwise the information is lost.\n  clf.save_model(\"categorical-model.json\")\n\n\nOnce training is finished, most of other features can utilize the model.  For instance one\ncan plot the model and calculate the global feature importance:\n\n\n.. code:: python\n\n  # Get a graph\n  graph = xgb.to_graphviz(clf, num_trees=1)\n  # Or get a matplotlib axis\n  ax = xgb.plot_tree(clf, num_trees=1)\n  # Get feature importances\n  clf.feature_importances_\n\n\nThe ``scikit-learn`` interface from dask is similar to single node version.  The basic\nidea is create dataframe with category feature type, and tell XGBoost to use it by setting\nthe ``enable_categorical`` parameter.  See :ref:`sphx_glr_python_examples_categorical.py`\nfor a worked example of using categorical data with ``scikit-learn`` interface with\none-hot encoding.  A comparison between using one-hot encoded data and XGBoost's\ncategorical data support can be found :ref:`sphx_glr_python_examples_cat_in_the_dat.py`.\n\n.. versionadded:: 3.0\n\n   Support for the R package using ``factor``.\n\n********************\nOptimal Partitioning\n********************\n\n.. versionadded:: 1.6\n\nOptimal partitioning is a technique for partitioning the categorical predictors for each\nnode split, the proof of optimality for numerical output was first introduced by `[1]\n<#references>`__. The algorithm is used in decision trees `[2] <#references>`__, later\nLightGBM `[3] <#references>`__ brought it to the context of gradient boosting trees and\nnow is also adopted in XGBoost as an optional feature for handling categorical\nsplits. More specifically, the proof by Fisher `[1] <#references>`__ states that, when\ntrying to partition a set of discrete values into groups based on the distances between a\nmeasure of these values, one only needs to look at sorted partitions instead of\nenumerating all possible permutations. In the context of decision trees, the discrete\nvalues are categories, and the measure is the output leaf value.  Intuitively, we want to\ngroup the categories that output similar leaf values. During split finding, we first sort\nthe gradient histogram to prepare the contiguous partitions then enumerate the splits\naccording to these sorted values. One of the related parameters for XGBoost is\n``max_cat_to_onehot``, which controls whether one-hot encoding or partitioning should be\nused for each feature, see :ref:`cat-param` for details.\n\n\n**********************\nUsing native interface\n**********************\n\nThe ``scikit-learn`` interface is user friendly, but lacks some features that are only\navailable in native interface.  For instance users cannot compute SHAP value directly.\nAlso native interface supports more data types. To use the native interface with\ncategorical data, we need to pass the similar parameter to :class:`~xgboost.DMatrix` or\n:py:class:`~xgboost.QuantileDMatrix` and the :func:`train <xgboost.train>` function.  For\ndataframe input:\n\n.. code:: python\n\n  # X is a dataframe we created in previous snippet\n  Xy = xgb.DMatrix(X, y, enable_categorical=True)\n  booster = xgb.train({\"tree_method\": \"hist\", \"max_cat_to_onehot\": 5}, Xy)\n  # Must use JSON for serialization, otherwise the information is lost\n  booster.save_model(\"categorical-model.json\")\n\nSHAP value computation:\n\n.. code:: python\n\n  SHAP = booster.predict(Xy, pred_interactions=True)\n\n  # categorical features are listed as \"c\"\n  print(booster.feature_types)\n\nFor other types of input, like ``numpy array``, we can tell XGBoost about the feature\ntypes by using the ``feature_types`` parameter in :class:`DMatrix <xgboost.DMatrix>`:\n\n.. code:: python\n\n  # \"q\" is numerical feature, while \"c\" is categorical feature\n  ft = [\"q\", \"c\", \"c\"]\n  X: np.ndarray = load_my_data()\n  assert X.shape[1] == 3\n  Xy = xgb.DMatrix(X, y, feature_types=ft, enable_categorical=True)\n\nFor numerical data, the feature type can be ``\"q\"`` or ``\"float\"``, while for categorical\nfeature it's specified as ``\"c\"``.  The Dask module in XGBoost has the same interface so\n:class:`dask.Array <dask.Array>` can also be used for categorical data. Lastly, the\nsklearn interface :py:class:`~xgboost.XGBRegressor` has the same parameter.\n\n.. _cat-recode:\n\n********************************\nAuto-recoding (Data Consistency)\n********************************\n\n.. versionchanged:: 3.1\n\n  Starting with XGBoost 3.1, the **Python** interface can perform automatic re-coding for\n  new inputs.\n\nXGBoost accepts parameters to indicate which feature is considered categorical, either\nthrough the ``dtypes`` of a dataframe or through the ``feature_types`` parameter. However,\nexcept for the Python interface, XGBoost doesn't store the information about how\ncategories are encoded in the first place. For instance, given an encoding schema that\nmaps music genres to integer codes:\n\n.. code-block:: python\n\n  {\"acoustic\": 0, \"indie\": 1, \"blues\": 2, \"country\": 3}\n\nAside from the Python interface (R/Java/C, etc), XGBoost doesn't know this mapping from\nthe input and hence cannot store it in the model. The mapping usually happens in the\nusers' data engineering pipeline. To ensure the correct result from XGBoost, users need to\nkeep the pipeline for transforming data consistent across training and testing data.\n\nStarting with 3.1, the **Python** interface can remember the encoding and perform recoding\nduring inference and training continuation when the input is a dataframe (`pandas`,\n`cuDF`, `polars`, `pyarrow`, `modin`). The feature support focuses on basic usage. It has\nsome restrictions on the types of inputs that can be accepted. First, category names must\nhave one of the following types:\n\n- string\n- integer, from 8-bit to 64-bit, both signed and unsigned are supported.\n- 32-bit or 64-bit floating point\n\nOther category types are not supported. Second, the input types must be strictly\nconsistent. For example, XGBoost will raise an error if the categorical columns in the\ntraining set are unsigned integers whereas the test dataset has signed integer columns. If\nyou have categories that are not one of the supported types, you need to perform the\nre-coding using a pre-processing data transformer like the\n:py:class:`sklearn.preprocessing.OrdinalEncoder`. See\n:ref:`sphx_glr_python_examples_cat_pipeline.py` for a worked example using an ordinal\nencoder. To clarify, the type here refers to the type of the name of categories (called\n``Index`` in pandas):\n\n.. code-block:: python\n\n  # string type\n  {\"acoustic\": 0, \"indie\": 1, \"blues\": 2, \"country\": 3}\n  # integer type\n  {-1: 0, 1: 1, 3: 2, 7: 3}\n  # depending on the dataframe implementation, it can be signed or unsigned.\n  {5: 0, 1: 1, 3: 2, 7: 3}\n  # floating point type, both 32-bit and 64-bit are supported.\n  {-1.0: 0, 1.0: 1, 3.0: 2, 7.0: 3}\n\nInternally, XGBoost attempts to extract the categories from the dataframe inputs. For\ninference (predict), the re-coding happens on the fly and there's no data copy (baring\nsome internal transformations performed by the dataframe itself). For training\ncontinuation however, re-coding requires some extra steps if you are using the native\ninterface. The sklearn interface and the Dask interface can handle training continuation\nautomatically. Last, please note that using the re-coder with the native interface is\nstill experimental. It's ready for testing, but we want to observe the feature usage for a\nperiod of time and might make some breaking changes if needed. The following is a snippet\nof using the native interface:\n\n.. code-block:: python\n\n  import pandas as pd\n\n  X = pd.DataFrame()\n  Xy = xgboost.QuantileDMatrix(X, y, enable_categorical=True)\n  booster = xgboost.train({}, Xy)\n\n  # XGBoost can handle re-coding for inference without user intervention\n  X_new = pd.DataFrame()\n  booster.inplace_predict(X_new)\n\n  # Get categories saved in the model for training continuation\n  categories = booster.get_categories()\n  # Use saved categories as a reference for re-coding.\n  # Training continuation requires a re-coded DMatrix, pass the categories as feature_types\n  Xy_new = xgboost.QuantileDMatrix(\n    X_new, y_new, feature_types=categories, enable_categorical=True, ref=Xy\n  )\n  booster_1 = xgboost.train({}, Xy_new, xgb_model=booster)\n\n\nNo extra step is required for using the scikit-learn interface as long as the inputs are\ndataframes. During training continuation, XGBoost will either extract the categories from\nthe previous model or use the categories from the new training dataset if the input model\ndoesn't have the information. As a side note, users can inspect the content of the\ncategories by exporting it to arrow arrays. This interface is still experimental:\n\n.. code-block:: python\n\n  categories = booster.get_categories(export_to_arrow=True)\n  print(categories.to_arrow())\n\n\nIn addition to the notes above, there's a `blog post\n<https://developer.nvidia.com/blog/training-xgboost-models-with-gpu-accelerated-polars-dataframes/>`__\nabout using XGBoost with Polars for categorical features with various examples.\n\nThe re-coder handles missing categories at inference time. However, if there's a new\ncategory during inference that's unseen during training (missing during training), a\nre-coder doesn't help as it doesn't know what would be a valid code. There are various\nheuristics for handling unseen categories during inference. The best and simplest approach\nis to re-train the model since a new category represents a new type of data. The type of a\ncategorical feature is defined by the set of discrete values. If the set is changed, then\nthe type is considered to be different. In addition, one might add an \"unknown\" category\nduring training and synthesize some samples with this category as missing values. Lastly,\nyou might consider the new category similar to an existing one based on your domain\nknowledge, and map to that category during ETL.\n\nFor **R**, the auto-recoding is not yet supported as of 3.1. To provide an example:\n\n.. code-block:: R\n\n    > f0 = factor(c(\"a\", \"b\", \"c\"))\n    > as.numeric(f0)\n    [1] 1 2 3\n    > f0\n    [1] a b c\n    Levels: a b c\n\nIn the above snippet, we have the mapping: ``a -> 1, b -> 2, c -> 3``. Assuming the above\nis the training data, and the next snippet is the test data:\n\n.. code-block:: R\n\n    > f1 = factor(c(\"a\", \"c\"))\n    > as.numeric(f1)\n    [1] 1 2\n    > f1\n    [1] a c\n    Levels: a c\n\n\nNow, we have ``a -> 1, c -> 2`` because ``b`` is missing, and the R factor encodes the data\ndifferently, resulting in invalid test-time encoding. XGBoost cannot remember the original\nencoding for the R package. You will have to encode the data explicitly during inference:\n\n.. code-block:: R\n\n    > f1 = factor(c(\"a\", \"c\"), levels = c(\"a\", \"b\", \"c\"))\n    > f1\n    [1] a c\n    Levels: a b c\n    > as.numeric(f1)\n      [1] 1 3\n\n\n*************\nMiscellaneous\n*************\n\nBy default, XGBoost assumes input category codes are integers starting from 0 till the\nnumber of categories :math:`[0, n\\_categories)`. However, user might provide inputs with\ninvalid values due to mistakes or missing values in training dataset. It can be negative\nvalue, integer values that can not be accurately represented by 32-bit floating point, or\nvalues that are larger than actual number of unique categories.  During training this is\nvalidated but for prediction it's treated as the same as not-chosen category for\nperformance reasons.\n\n\n**********\nReferences\n**********\n\n[1] Walter D. Fisher. \"`On Grouping for Maximum Homogeneity`_\". Journal of the American Statistical Association. Vol. 53, No. 284 (Dec., 1958), pp. 789-798.\n\n[2] Trevor Hastie, Robert Tibshirani, Jerome Friedman. \"`The Elements of Statistical Learning`_\". Springer Series in Statistics Springer New York Inc. (2001).\n\n[3] Guolin Ke, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, Tie-Yan Liu. \"`LightGBM\\: A Highly Efficient Gradient Boosting Decision Tree`_.\" Advances in Neural Information Processing Systems 30 (NIPS 2017), pp. 3149-3157.\n\n\n.. _On Grouping for Maximum Homogeneity: https://www.tandfonline.com/doi/abs/10.1080/01621459.1958.10501479\n\n.. _The Elements of Statistical Learning: https://link.springer.com/book/10.1007/978-0-387-84858-7\n\n.. _LightGBM\\: A Highly Efficient Gradient Boosting Decision Tree: https://papers.nips.cc/paper/6907-lightgbm-a-highly-efficient-gradient-boosting-decision-tree.pdf\n"
  },
  {
    "path": "doc/tutorials/custom_metric_obj.rst",
    "content": "######################################\nCustom Objective and Evaluation Metric\n######################################\n\n**Contents**\n\n.. contents::\n  :backlinks: none\n  :local:\n\n********\nOverview\n********\n\nXGBoost is designed to be an extensible library.  One way to extend it is by providing our\nown objective function for training and corresponding metric for performance monitoring.\nThis document introduces implementing a customized elementwise evaluation metric and\nobjective for XGBoost. Although the introduction uses Python for demonstration, the\nconcepts should be readily applicable to other language bindings.\n\n.. note::\n\n   * The ranking task does not support customized functions.\n   * Breaking change was made in XGBoost 1.6.\n\nSee also the advanced usage example for more information about limitations and\nworkarounds for more complex objetives: :doc:`/tutorials/advanced_custom_obj`\n\nIn the following two sections, we will provide a step by step walk through of implementing\nthe ``Squared Log Error (SLE)`` objective function:\n\n.. math::\n   \\frac{1}{2}[\\log(pred + 1) - \\log(label + 1)]^2\n\nand its default metric ``Root Mean Squared Log Error(RMSLE)``:\n\n.. math::\n   \\sqrt{\\frac{1}{N}[\\log(pred + 1) - \\log(label + 1)]^2}\n\nAlthough XGBoost has native support for said functions, using it for demonstration\nprovides us the opportunity of comparing the result from our own implementation and the\none from XGBoost internal for learning purposes.  After finishing this tutorial, we should\nbe able to provide our own functions for rapid experiments.  And at the end, we will\nprovide some notes on non-identity link function along with examples of using custom metric\nand objective with the `scikit-learn` interface.\n\nIf we compute the gradient of said objective function:\n\n.. math::\n   g = \\frac{\\partial{objective}}{\\partial{pred}} = \\frac{\\log(pred + 1) - \\log(label + 1)}{pred + 1}\n\nAs well as the hessian (the second derivative of the objective):\n\n.. math::\n   h = \\frac{\\partial^2{objective}}{\\partial{pred}^2} = \\frac{ - \\log(pred + 1) + \\log(label + 1) + 1}{(pred + 1)^2}\n\n*****************************\nCustomized Objective Function\n*****************************\n\nDuring model training, the objective function plays an important role: provide gradient\ninformation, both first and second order gradient, based on model predictions and observed\ndata labels (or targets).  Therefore, a valid objective function should accept two inputs,\nnamely prediction and labels.  For implementing ``SLE``, we define:\n\n.. code-block:: python\n\n    import numpy as np\n    import xgboost as xgb\n    from typing import Tuple\n\n    def gradient(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:\n        '''Compute the gradient squared log error.'''\n        y = dtrain.get_label()\n        return (np.log1p(predt) - np.log1p(y)) / (predt + 1)\n\n    def hessian(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:\n        '''Compute the hessian for squared log error.'''\n        y = dtrain.get_label()\n        return ((-np.log1p(predt) + np.log1p(y) + 1) /\n                np.power(predt + 1, 2))\n\n    def squared_log(predt: np.ndarray,\n                    dtrain: xgb.DMatrix) -> Tuple[np.ndarray, np.ndarray]:\n        '''Squared Log Error objective. A simplified version for RMSLE used as\n        objective function.\n        '''\n        predt[predt < -1] = -1 + 1e-6\n        grad = gradient(predt, dtrain)\n        hess = hessian(predt, dtrain)\n        return grad, hess\n\n\nIn the above code snippet, ``squared_log`` is the objective function we want.  It accepts a\nnumpy array ``predt`` as model prediction, and the training DMatrix for obtaining required\ninformation, including labels and weights (not used here).  This objective is then used as\na callback function for XGBoost during training by passing it as an argument to\n``xgb.train``:\n\n.. code-block:: python\n\n   xgb.train({'tree_method': 'hist', 'seed': 1994},  # any other tree method is fine.\n              dtrain=dtrain,\n              num_boost_round=10,\n              obj=squared_log)\n\nNotice that in our definition of the objective, whether we subtract the labels from the\nprediction or the other way around is important.  If you find the training error goes up\ninstead of down, this might be the reason.\n\n\n**************************\nCustomized Metric Function\n**************************\n\nSo after having a customized objective, we might also need a corresponding metric to\nmonitor our model's performance.  As mentioned above, the default metric for ``SLE`` is\n``RMSLE``.  Similarly we define another callback like function as the new metric:\n\n.. code-block:: python\n\n    def rmsle(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, float]:\n        ''' Root mean squared log error metric.'''\n        y = dtrain.get_label()\n        predt[predt < -1] = -1 + 1e-6\n        elements = np.power(np.log1p(y) - np.log1p(predt), 2)\n        return 'PyRMSLE', float(np.sqrt(np.sum(elements) / len(y)))\n\nSince we are demonstrating in Python, the metric or objective need not be a function, any\ncallable object should suffice.  Similar to the objective function, our metric also\naccepts ``predt`` and ``dtrain`` as inputs, but returns the name of the metric itself and\na floating point value as the result.  After passing it into XGBoost as argument of\n``custom_metric`` parameter:\n\n.. code-block:: python\n\n    xgb.train({'tree_method': 'hist', 'seed': 1994,\n               'disable_default_eval_metric': 1},\n              dtrain=dtrain,\n              num_boost_round=10,\n              obj=squared_log,\n              custom_metric=rmsle,\n              evals=[(dtrain, 'dtrain'), (dtest, 'dtest')],\n              evals_result=results)\n\nWe will be able to see XGBoost printing something like:\n\n.. code-block:: none\n\n    [0] dtrain-PyRMSLE:1.37153  dtest-PyRMSLE:1.31487\n    [1] dtrain-PyRMSLE:1.26619  dtest-PyRMSLE:1.20899\n    [2] dtrain-PyRMSLE:1.17508  dtest-PyRMSLE:1.11629\n    [3] dtrain-PyRMSLE:1.09836  dtest-PyRMSLE:1.03871\n    [4] dtrain-PyRMSLE:1.03557  dtest-PyRMSLE:0.977186\n    [5] dtrain-PyRMSLE:0.985783 dtest-PyRMSLE:0.93057\n    ...\n\nNotice that the parameter ``disable_default_eval_metric`` is used to suppress the default metric\nin XGBoost.\n\nFor fully reproducible source code and comparison plots, see\n:ref:`sphx_glr_python_examples_custom_rmsle.py`.\n\n*********************\nReverse Link Function\n*********************\n\nWhen using builtin objective, the raw prediction is transformed according to the objective\nfunction.  When a custom objective is provided XGBoost doesn't know its link function so the\nuser is responsible for making the transformation for both objective and custom evaluation\nmetric.  For objective with identity link like ``squared error`` this is trivial, but for\nother link functions like log link or inverse link the difference is significant.\n\nFor the Python package, the behaviour of prediction can be controlled by the\n``output_margin`` parameter in ``predict`` function.  When using the ``custom_metric``\nparameter without a custom objective, the metric function will receive transformed\nprediction since the objective is defined by XGBoost. However, when the custom objective is\nalso provided along with that metric, then both the objective and custom metric will\nreceive raw prediction.  The following example provides a comparison between two different\nbehavior with a multi-class classification model. Firstly we define 2 different Python\nmetric functions implementing the same underlying metric for comparison,\n`merror_with_transform` is used when custom objective is also used, otherwise the simpler\n`merror` is preferred since XGBoost can perform the transformation itself.\n\n.. code-block:: python\n\n    import xgboost as xgb\n    import numpy as np\n\n    def merror_with_transform(predt: np.ndarray, dtrain: xgb.DMatrix):\n        \"\"\"Used when custom objective is supplied.\"\"\"\n        y = dtrain.get_label()\n        n_classes = predt.size // y.shape[0]\n        # Like custom objective, the predt is untransformed leaf weight when custom objective\n        # is provided.\n\n        # With the use of `custom_metric` parameter in train function, custom metric receives\n        # raw input only when custom objective is also being used.  Otherwise custom metric\n        # will receive transformed prediction.\n        assert predt.shape == (d_train.num_row(), n_classes)\n        out = np.zeros(dtrain.num_row())\n        for r in range(predt.shape[0]):\n            i = np.argmax(predt[r])\n            out[r] = i\n\n        assert y.shape == out.shape\n\n        errors = np.zeros(dtrain.num_row())\n        errors[y != out] = 1.0\n        return 'PyMError', np.sum(errors) / dtrain.num_row()\n\nThe above function is only needed when we want to use custom objective and XGBoost doesn't\nknow how to transform the prediction.  The normal implementation for multi-class error\nfunction is:\n\n.. code-block:: python\n\n    def merror(predt: np.ndarray, dtrain: xgb.DMatrix):\n        \"\"\"Used when there's no custom objective.\"\"\"\n        # No need to do transform, XGBoost handles it internally.\n        errors = np.zeros(dtrain.num_row())\n        errors[y != out] = 1.0\n        return 'PyMError', np.sum(errors) / dtrain.num_row()\n\n\nNext we need the custom softprob objective:\n\n.. code-block:: python\n\n    def softprob_obj(predt: np.ndarray, data: xgb.DMatrix):\n        \"\"\"Loss function.  Computing the gradient and approximated hessian (diagonal).\n        Reimplements the `multi:softprob` inside XGBoost.\n        \"\"\"\n\n        # Full implementation is available in the Python demo script linked below\n        ...\n\n        return grad, hess\n\nLastly we can train the model using ``obj`` and ``custom_metric`` parameters:\n\n.. code-block:: python\n\n    Xy = xgb.DMatrix(X, y)\n    booster = xgb.train(\n        {\"num_class\": kClasses, \"disable_default_eval_metric\": True},\n        m,\n        num_boost_round=kRounds,\n        obj=softprob_obj,\n        custom_metric=merror_with_transform,\n        evals_result=custom_results,\n        evals=[(m, \"train\")],\n    )\n\nOr if you don't need the custom objective and just want to supply a metric that's not\navailable in XGBoost:\n\n.. code-block:: python\n\n    booster = xgb.train(\n        {\n            \"num_class\": kClasses,\n            \"disable_default_eval_metric\": True,\n            \"objective\": \"multi:softmax\",\n        },\n        m,\n        num_boost_round=kRounds,\n        # Use a simpler metric implementation.\n        custom_metric=merror,\n        evals_result=custom_results,\n        evals=[(m, \"train\")],\n    )\n\nWe use ``multi:softmax`` to illustrate the differences of transformed prediction.  With\n``softprob`` the output prediction array has shape ``(n_samples, n_classes)`` while for\n``softmax`` it's ``(n_samples, )``. A demo for multi-class objective function is also\navailable at :ref:`sphx_glr_python_examples_custom_softmax.py`. Also, see\n:doc:`/tutorials/intercept` for some more explanation.\n\n\n**********************\nScikit-Learn Interface\n**********************\n\nThe scikit-learn interface of XGBoost has some utilities to improve the integration with\nstandard scikit-learn functions.  For instance, after XGBoost 1.6.0 users can use the cost\nfunction (not scoring functions) from scikit-learn out of the box:\n\n.. code-block:: python\n\n    from sklearn.datasets import load_diabetes\n    from sklearn.metrics import mean_absolute_error\n    X, y = load_diabetes(return_X_y=True)\n    reg = xgb.XGBRegressor(\n        tree_method=\"hist\",\n        eval_metric=mean_absolute_error,\n    )\n    reg.fit(X, y, eval_set=[(X, y)])\n\nAlso, for custom objective function, users can define the objective without having to\naccess ``DMatrix``:\n\n.. code-block:: python\n\n    def softprob_obj(labels: np.ndarray, predt: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:\n        rows = labels.shape[0]\n        classes = predt.shape[1]\n        grad = np.zeros((rows, classes), dtype=float)\n        hess = np.zeros((rows, classes), dtype=float)\n        eps = 1e-6\n        for r in range(predt.shape[0]):\n            target = labels[r]\n            p = softmax(predt[r, :])\n            for c in range(predt.shape[1]):\n                g = p[c] - 1.0 if c == target else p[c]\n                h = max((2.0 * p[c] * (1.0 - p[c])).item(), eps)\n                grad[r, c] = g\n                hess[r, c] = h\n\n        grad = grad.reshape((rows * classes, 1))\n        hess = hess.reshape((rows * classes, 1))\n        return grad, hess\n\n    clf = xgb.XGBClassifier(tree_method=\"hist\", objective=softprob_obj)\n"
  },
  {
    "path": "doc/tutorials/dart.rst",
    "content": "####\nDART\n####\nXGBoost mostly combines a huge number of regression trees with a small learning rate.\nIn this situation, trees added early are significant and trees added late are unimportant.\n\nVinayak and Gilad-Bachrach proposed a new method to add dropout techniques from the deep neural net community to boosted trees, and reported better results in some situations.\n\nThis is a instruction of the dropout mode for tree models. Dropout is controlled by\nparameters like ``rate_drop``. The legacy ``dart`` booster name remains available for\ncompatibility.\n\n**************\nOriginal paper\n**************\nRashmi Korlakai Vinayak, Ran Gilad-Bachrach. \"DART: Dropouts meet Multiple Additive Regression Trees.\" [`PMLR <http://proceedings.mlr.press/v38/korlakaivinayak15.pdf>`_, `arXiv <https://arxiv.org/abs/1505.01866>`_].\n\n********\nFeatures\n********\n- Drop trees in order to solve the over-fitting.\n\n  - Trivial trees (to correct trivial errors) may be prevented.\n\nBecause of the randomness introduced in the training, expect the following few differences:\n\n- Training can be slower than ``gbtree`` because the random dropout prevents usage of the prediction buffer.\n- The early stop might not be stable, due to the randomness.\n\n************\nHow it works\n************\n- In :math:`m`-th training round, suppose :math:`k` trees are selected to be dropped.\n- Let :math:`D = \\sum_{i \\in \\mathbf{K}} F_i` be the leaf scores of dropped trees and :math:`F_m = \\eta \\tilde{F}_m` be the leaf scores of a new tree.\n- The objective function is as follows:\n\n.. math::\n\n  \\mathrm{Obj}\n  = \\sum_{j=1}^n L \\left( y_j, \\hat{y}_j^{m-1} - D_j + \\tilde{F}_m \\right)\n  + \\Omega \\left( \\tilde{F}_m \\right).\n\n- :math:`D` and :math:`F_m` are overshooting, so using scale factor\n\n.. math::\n\n  \\hat{y}_j^m = \\sum_{i \\not\\in \\mathbf{K}} F_i + a \\left( \\sum_{i \\in \\mathbf{K}} F_i + b F_m \\right) .\n\n**********\nParameters\n**********\n\nDropout uses the same tree parameters as ``gbtree``, such as ``eta``, ``gamma``,\n``max_depth``, and others.\n\nAdditional parameters are noted below:\n\n* ``sample_type``: type of sampling algorithm.\n\n  - ``uniform``: (default) dropped trees are selected uniformly.\n  - ``weighted``: dropped trees are selected in proportion to weight.\n\n* ``normalize_type``: type of normalization algorithm.\n\n  - ``tree``: (default) New trees have the same weight of each of dropped trees.\n\n  .. math::\n\n    a \\left( \\sum_{i \\in \\mathbf{K}} F_i + \\frac{1}{k} F_m \\right)\n    &= a \\left( \\sum_{i \\in \\mathbf{K}} F_i + \\frac{\\eta}{k} \\tilde{F}_m \\right) \\\\\n    &\\sim a \\left( 1 + \\frac{\\eta}{k} \\right) D \\\\\n    &= a \\frac{k + \\eta}{k} D = D , \\\\\n    &\\quad a = \\frac{k}{k + \\eta}\n\n  - ``forest``: New trees have the same weight of sum of dropped trees (forest).\n\n  .. math::\n\n    a \\left( \\sum_{i \\in \\mathbf{K}} F_i + F_m \\right)\n    &= a \\left( \\sum_{i \\in \\mathbf{K}} F_i + \\eta \\tilde{F}_m \\right) \\\\\n    &\\sim a \\left( 1 + \\eta \\right) D \\\\\n    &= a (1 + \\eta) D = D , \\\\\n    &\\quad a = \\frac{1}{1 + \\eta} .\n\n* ``rate_drop``: dropout rate.\n\n  - range: [0.0, 1.0]\n\n* ``skip_drop``: probability of skipping dropout.\n\n  - If a dropout is skipped, new trees are added in the same manner as gbtree.\n  - range: [0.0, 1.0]\n\n*************\nSample Script\n*************\n\n.. code-block:: python\n\n  import xgboost as xgb\n  # read in data\n  dtrain = xgb.DMatrix('demo/data/agaricus.txt.train?format=libsvm')\n  dtest = xgb.DMatrix('demo/data/agaricus.txt.test?format=libsvm')\n  # specify parameters via map\n  param = {'max_depth': 5, 'learning_rate': 0.1,\n           'objective': 'binary:logistic',\n           'sample_type': 'uniform',\n           'normalize_type': 'tree',\n           'rate_drop': 0.1,\n           'skip_drop': 0.5}\n  num_round = 50\n  bst = xgb.train(param, dtrain, num_round)\n  preds = bst.predict(dtest)\n"
  },
  {
    "path": "doc/tutorials/dask.rst",
    "content": "#############################\nDistributed XGBoost with Dask\n#############################\n\n`Dask <https://dask.org>`_ is a parallel computing library built on Python. Dask allows\neasy management of distributed workers and excels at handling large distributed data\nscience workflows.  The implementation in XGBoost originates from `dask-xgboost\n<https://github.com/dask/dask-xgboost>`_ with some extended functionalities and a\ndifferent interface.  The tutorial here focuses on basic usage of dask with CPU tree\nalgorithms.  For an overview of GPU based training and internal workings, see `A New,\nOfficial Dask API for XGBoost\n<https://medium.com/rapids-ai/a-new-official-dask-api-for-xgboost-e8b10f3d1eb7>`_.\n\n.. note::\n\n  The integration is not tested with Windows.\n\n**Contents**\n\n.. contents::\n  :backlinks: none\n  :local:\n\n************\nRequirements\n************\n\nDask can be installed using either pip or conda (see the dask `installation\ndocumentation <https://docs.dask.org/en/latest/install.html>`_ for more information).  For\naccelerating XGBoost with GPUs, `dask-cuda <https://github.com/rapidsai/dask-cuda>`__ is\nrecommended for creating GPU clusters.\n\n\n********\nOverview\n********\n\nA dask cluster consists of three different components: a centralized scheduler, one or\nmore workers, and one or more clients which act as the user-facing entry point for submitting\ntasks to the cluster.  When using XGBoost with dask, one needs to call the XGBoost dask interface\nfrom the client side.  Below is a small example which illustrates basic usage of running XGBoost\non a dask cluster:\n\n.. code-block:: python\n\n    from xgboost import dask as dxgb\n\n    import dask.array as da\n    import dask.distributed\n\n    if __name__ == \"__main__\":\n        cluster = dask.distributed.LocalCluster()\n        client = dask.distributed.Client(cluster)\n\n        # X and y must be Dask dataframes or arrays\n        num_obs = 1e5\n        num_features = 20\n        X = da.random.random(size=(num_obs, num_features), chunks=(1000, num_features))\n        y = da.random.random(size=(num_obs, 1), chunks=(1000, 1))\n\n        dtrain = dxgb.DaskDMatrix(client, X, y)\n        # or\n        # dtrain = dxgb.DaskQuantileDMatrix(client, X, y)\n\n        output = dxgb.train(\n            client,\n            {\"verbosity\": 2, \"tree_method\": \"hist\", \"objective\": \"reg:squarederror\"},\n            dtrain,\n            num_boost_round=4,\n            evals=[(dtrain, \"train\")],\n        )\n\nHere we first create a cluster in single-node mode with\n:py:class:`distributed.LocalCluster`, then connect a :py:class:`distributed.Client` to\nthis cluster, setting up an environment for later computation.  Notice that the cluster\nconstruction is guarded by ``__name__ == \"__main__\"``, which is necessary otherwise there\nmight be obscure errors.\n\nWe then create a :py:class:`xgboost.dask.DaskDMatrix` object and pass it to\n:py:func:`xgboost.dask.train`, along with some other parameters, much like XGBoost's\nnormal, non-dask interface. Unlike that interface, ``data`` and ``label`` must be either\n:py:class:`Dask DataFrame <dask.dataframe.DataFrame>` or :py:class:`Dask Array\n<dask.array.Array>` instances.\n\nThe primary difference with XGBoost's dask interface is\nwe pass our dask client as an additional argument for carrying out the computation. Note that if\nclient is set to ``None``, XGBoost will use the default client returned by dask.\n\nThere are two sets of APIs implemented in XGBoost.  The first set is functional API\nillustrated in above example.  Given the data and a set of parameters, the ``train`` function\nreturns a model and the computation history as a Python dictionary:\n\n.. code-block:: python\n\n  {\n    \"booster\": Booster,\n    \"history\": dict,\n  }\n\nFor prediction, pass the ``output`` returned by ``train`` into :py:func:`xgboost.dask.predict`:\n\n.. code-block:: python\n\n  prediction = dxgb.predict(client, output, dtrain)\n  # Or equivalently, pass ``output['booster']``:\n  prediction = dxgb.predict(client, output['booster'], dtrain)\n\nEliminating the construction of DaskDMatrix is also possible, this can make the\ncomputation a bit faster when meta information like ``base_margin`` is not needed:\n\n.. code-block:: python\n\n  prediction = dxgb.predict(client, output, X)\n  # Use inplace version.\n  prediction = dxgb.inplace_predict(client, output, X)\n\nHere ``prediction`` is a dask ``Array`` object containing predictions from model if input\nis a ``DaskDMatrix`` or ``da.Array``.  When putting dask collection directly into the\n``predict`` function or using :py:func:`xgboost.dask.inplace_predict`, the output type\ndepends on input data.  See next section for details.\n\nAlternatively, XGBoost also implements the Scikit-Learn interface with\n:py:class:`~xgboost.dask.DaskXGBClassifier`, :py:class:`~xgboost.dask.DaskXGBRegressor`,\n:py:class:`~xgboost.dask.DaskXGBRanker` and 2 random forest variances.  This wrapper is\nsimilar to the single node Scikit-Learn interface in xgboost, with dask collection as\ninputs and has an additional ``client`` attribute.  See following sections and\n:ref:`dask-examples` for more examples.\n\n\n******************\nRunning prediction\n******************\n\nIn previous example we used ``DaskDMatrix`` as input to ``predict`` function.  In\npractice, it's also possible to call ``predict`` function directly on dask collections\nlike ``Array`` and ``DataFrame`` and might have better prediction performance.  When\n``DataFrame`` is used as prediction input, the result is a dask ``Series`` instead of\narray.  Also, there's in-place predict support on dask interface, which can help reducing\nboth memory usage and prediction time.\n\n.. code-block:: python\n\n  # dtrain is the DaskDMatrix defined above.\n  prediction = dxgb.predict(client, booster, dtrain)\n\nor equivalently:\n\n.. code-block:: python\n\n  # where X is a dask DataFrame or dask Array.\n  prediction = dxgb.predict(client, booster, X)\n\nAlso for inplace prediction:\n\n.. code-block:: python\n\n  # where X is a dask DataFrame or dask Array backed by cupy or cuDF.\n  booster.set_param({\"device\": \"cuda\"})\n  prediction = dxgb.inplace_predict(client, booster, X)\n\nWhen input is ``da.Array`` object, output is always ``da.Array``.  However, if the input\ntype is ``dd.DataFrame``, output can be ``dd.Series``, ``dd.DataFrame`` or ``da.Array``,\ndepending on output shape.  For example, when SHAP-based prediction is used, the return\nvalue can have 3 or 4 dimensions , in such cases an ``Array`` is always returned.\n\nThe performance of running prediction, either using ``predict`` or ``inplace_predict``, is\nsensitive to number of blocks.  Internally, it's implemented using ``da.map_blocks`` and\n``dd.map_partitions``.  When number of partitions is large and each of them have only\nsmall amount of data, the overhead of calling predict becomes visible.  On the other hand,\nif not using GPU, the number of threads used for prediction on each block matters.  Right\nnow, xgboost uses single thread for each partition.  If the number of blocks on each\nworkers is smaller than number of cores, then the CPU workers might not be fully utilized.\n\nOne simple optimization for running consecutive predictions is using\n:py:class:`distributed.Future`:\n\n.. code-block:: python\n\n    dataset = [X_0, X_1, X_2]\n    booster_f = client.scatter(booster, broadcast=True)\n    futures = []\n    for X in dataset:\n        # Here we pass in a future instead of concrete booster\n        shap_f = dxgb.predict(client, booster_f, X, pred_contribs=True)\n        futures.append(shap_f)\n\n    results = client.gather(futures)\n\n\nThis is only available on functional interface, as the Scikit-Learn wrapper doesn't know\nhow to maintain a valid future for booster.  To obtain the booster object from\nScikit-Learn wrapper object:\n\n.. code-block:: python\n\n    cls = dxgb.DaskXGBClassifier()\n    cls.fit(X, y)\n\n    booster = cls.get_booster()\n\n\n********************************\nScikit-Learn Estimator Interface\n********************************\n\nAs mentioned previously, there's another interface that mimics the scikit-learn estimators\nwith higher level of of abstraction.  The interface is easier to use compared to the\nfunctional interface but with more constraints.  It's worth mentioning that, although the\ninterface mimics scikit-learn estimators, it doesn't work with normal scikit-learn\nutilities like ``GridSearchCV`` as scikit-learn doesn't understand distributed dask data\ncollection.\n\n\n.. code-block:: python\n\n    from distributed import LocalCluster, Client\n    from xgboost import dask as dxgb\n\n\n    def main(client: Client) -> None:\n        X, y = load_data()\n        clf = dxgb.DaskXGBClassifier(n_estimators=100, tree_method=\"hist\")\n        clf.client = client  # assign the client\n        clf.fit(X, y, eval_set=[(X, y)])\n        proba = clf.predict_proba(X)\n\n\n    if __name__ == \"__main__\":\n        with LocalCluster() as cluster:\n            with Client(cluster) as client:\n                main(client)\n\n\n****************\nGPU acceleration\n****************\n\nFor most of the use cases with GPUs, the `Dask-CUDA <https://docs.rapids.ai/api/dask-cuda/stable/quickstart.html>`__ project should be used to create the cluster, which automatically configures the correct device ordinal for worker processes. As a result, users should NOT specify the ordinal (good: ``device=cuda``, bad: ``device=cuda:1``). See :ref:`sphx_glr_python_dask-examples_gpu_training.py` and :ref:`sphx_glr_python_dask-examples_sklearn_gpu_training.py` for worked examples.\n\n***************************\nWorking with other clusters\n***************************\n\nUsing Dask's ``LocalCluster`` is convenient for getting started quickly on a local machine. Once you're ready to scale your work, though, there are a number of ways to deploy Dask on a distributed cluster. You can use `Dask-CUDA <https://docs.rapids.ai/api/dask-cuda/stable/quickstart.html>`_, for example, for GPUs and you can use Dask Cloud Provider to `deploy Dask clusters in the cloud <https://docs.dask.org/en/stable/deploying.html#cloud>`_. See the `Dask documentation for a more comprehensive list <https://docs.dask.org/en/stable/deploying.html>`__.\n\nIn the example below, a ``KubeCluster`` is used for `deploying Dask on Kubernetes <https://docs.dask.org/en/stable/deploying-kubernetes.html>`_:\n\n.. code-block:: python\n\n  from dask_kubernetes.operator import KubeCluster  # Need to install the ``dask-kubernetes`` package\n  from dask_kubernetes.operator.kubecluster.kubecluster import CreateMode\n\n  from dask.distributed import Client\n  from xgboost import dask as dxgb\n  import dask.array as da\n\n\n  def main():\n    '''Connect to a remote kube cluster with GPU nodes and run training on it.'''\n      m = 1000\n      n = 10\n      kWorkers = 2                # assuming you have 2 GPU nodes on that cluster.\n      # You need to work out the worker-spec yourself.  See document in dask_kubernetes for\n      # its usage.  Here we just want to show that XGBoost works on various clusters.\n\n      # See notes below for why we use pre-allocated cluster.\n      with KubeCluster(\n          name=\"xgboost-test\",\n          image=\"my-image-name:latest\",\n          n_workers=kWorkers,\n          create_mode=CreateMode.CONNECT_ONLY,\n          shutdown_on_close=False,\n      ) as cluster:\n          with Client(cluster) as client:\n              X = da.random.random(size=(m, n), chunks=100)\n              y = X.sum(axis=1)\n\n              regressor = dxgb.DaskXGBRegressor(n_estimators=10, missing=0.0)\n              regressor.client = client\n              regressor.set_params(tree_method='hist', device=\"cuda\")\n              regressor.fit(X, y, eval_set=[(X, y)])\n\n\n  if __name__ == '__main__':\n      # Launch the kube cluster on somewhere like GKE, then run this as client process.\n      # main function will connect to that cluster and start training xgboost model.\n      main()\n\n\nDifferent cluster classes might have subtle differences like network configuration, or\nspecific cluster implementation might contains bugs that we are not aware of.  Open an\nissue if such case is found and there's no documentation on how to resolve it in that\ncluster implementation.\n\nAn interesting aspect of the Kubernetes cluster is that the pods may become available\nafter the Dask workflow has begun, which can cause issues with distributed XGBoost since\nXGBoost expects the nodes used by input data to remain unchanged during training. To use\nKubernetes clusters, it is necessary to wait for all the pods to be online before\nsubmitting XGBoost tasks. One can either create a wait function in Python or simply\npre-allocate a cluster with k8s tools (like ``kubectl``) before running dask workflows. To\npre-allocate a cluster, we can first generate the cluster spec using dask kubernetes:\n\n.. code-block:: python\n\n    import json\n\n    from dask_kubernetes.operator import make_cluster_spec\n\n    spec = make_cluster_spec(name=\"xgboost-test\", image=\"my-image-name:latest\", n_workers=16)\n    with open(\"cluster-spec.json\", \"w\") as fd:\n        json.dump(spec, fd, indent=2)\n\n.. code-block:: sh\n\n    kubectl apply -f ./cluster-spec.json\n\n\nCheck whether the pods are available:\n\n.. code-block:: sh\n\n    kubectl get pods\n\nOnce all pods have been initialized, the Dask XGBoost workflow can be run, as in the\nprevious example. It is important to ensure that the cluster sets the parameter\n``create_mode=CreateMode.CONNECT_ONLY`` and optionally ``shutdown_on_close=False`` if you\ndo not want to shut down the cluster after a single job.\n\n*******\nThreads\n*******\n\nXGBoost has built in support for parallel computation through threads by the setting\n``nthread`` parameter (``n_jobs`` for scikit-learn).  If these parameters are set, they\nwill override the configuration in Dask.  For example:\n\n.. code-block:: python\n\n  with dask.distributed.LocalCluster(n_workers=7, threads_per_worker=4) as cluster:\n\nThere are 4 threads allocated for each dask worker.  Then by default XGBoost will use 4\nthreads in each process for training.  But if ``nthread`` parameter is set:\n\n.. code-block:: python\n\n    output = dxgb.train(\n        client,\n        {\"verbosity\": 1, \"nthread\": 8, \"tree_method\": \"hist\"},\n        dtrain,\n        num_boost_round=4,\n        evals=[(dtrain, \"train\")],\n    )\n\nXGBoost will use 8 threads in each training process.\n\n********************\nWorking with asyncio\n********************\n\n.. versionadded:: 1.2.0\n\nXGBoost's dask interface supports the new :py:mod:`asyncio` in Python and can be\nintegrated into asynchronous workflows.  For using dask with asynchronous operations,\nplease refer to `this dask example\n<https://examples.dask.org/applications/async-await.html>`_ and document in `distributed\n<https://distributed.dask.org/en/latest/asynchronous.html>`_. To use XGBoost's Dask\ninterface asynchronously, the ``client`` which is passed as an argument for training and\nprediction must be operating in asynchronous mode by specifying ``asynchronous=True`` when\nthe ``client`` is created (example below). All functions (including ``DaskDMatrix``)\nprovided by the functional interface will then return coroutines which can then be awaited\nto retrieve their result. Please note that XGBoost is a compute-bounded application, where\nparallelism is more important than concurrency. The support for `asyncio` is more about\ncompatibility instead of performance gain.\n\nFunctional interface:\n\n.. code-block:: python\n\n    async with dask.distributed.Client(scheduler_address, asynchronous=True) as client:\n        X, y = generate_array()\n        m = await dxgb.DaskDMatrix(client, X, y)\n        output = await dxgb.train(client, {}, dtrain=m)\n\n        with_m = await dxgb.predict(client, output, m)\n        with_X = await dxgb.predict(client, output, X)\n        inplace = await dxgb.inplace_predict(client, output, X)\n\n        # Use ``client.compute`` instead of the ``compute`` method from dask collection\n        print(await client.compute(with_m))\n\n\nWhile for the Scikit-Learn interface, trivial methods like ``set_params`` and accessing class\nattributes like ``evals_result()`` do not require ``await``.  Other methods involving\nactual computation will return a coroutine and hence require awaiting:\n\n.. code-block:: python\n\n    async with dask.distributed.Client(scheduler_address, asynchronous=True) as client:\n        X, y = generate_array()\n        regressor = await dxgb.DaskXGBRegressor(verbosity=1, n_estimators=2)\n        regressor.set_params(tree_method='hist')  # trivial method, synchronous operation\n        regressor.client = client  #  accessing attribute, synchronous operation\n        regressor = await regressor.fit(X, y, eval_set=[(X, y)])\n        prediction = await regressor.predict(X)\n\n        # Use `client.compute` instead of the `compute` method from dask collection\n        print(await client.compute(prediction))\n\n*****************************\nEvaluation and Early Stopping\n*****************************\n\n.. versionadded:: 1.3.0\n\nThe Dask interface allows the use of validation sets that are stored in distributed collections (Dask DataFrame or Dask Array). These can be used for evaluation and early stopping.\n\nTo enable early stopping, pass one or more validation sets containing ``DaskDMatrix`` objects.\n\n.. code-block:: python\n\n    import dask.array as da\n    from xgboost import dask as dxgb\n\n    num_rows = 1e6\n    num_features = 100\n    num_partitions = 10\n    rows_per_chunk = num_rows / num_partitions\n\n    data = da.random.random(\n        size=(num_rows, num_features),\n        chunks=(rows_per_chunk, num_features)\n    )\n\n    labels = da.random.random(\n        size=(num_rows, 1),\n        chunks=(rows_per_chunk, 1)\n    )\n\n    X_eval = da.random.random(\n        size=(num_rows, num_features),\n        chunks=(rows_per_chunk, num_features)\n    )\n\n    y_eval = da.random.random(\n        size=(num_rows, 1),\n        chunks=(rows_per_chunk, 1)\n    )\n\n    dtrain = dxgb.DaskDMatrix(\n        client=client,\n        data=data,\n        label=labels\n    )\n\n    dvalid = dxgb.DaskDMatrix(\n        client=client,\n        data=X_eval,\n        label=y_eval\n    )\n\n    result = dxgb.train(\n        client=client,\n        params={\n            \"objective\": \"reg:squarederror\",\n        },\n        dtrain=dtrain,\n        num_boost_round=10,\n        evals=[(dvalid, \"valid1\")],\n        early_stopping_rounds=3\n    )\n\nWhen validation sets are provided to :py:func:`xgboost.dask.train` in this way, the model object returned by :py:func:`xgboost.dask.train` contains a history of evaluation metrics for each validation set, across all boosting rounds.\n\n.. code-block:: python\n\n    print(result[\"history\"])\n    # {'valid1': OrderedDict([('rmse', [0.28857, 0.28858, 0.288592, 0.288598])])}\n\nIf early stopping is enabled by also passing ``early_stopping_rounds``, you can check the best iteration in the returned booster.\n\n.. code-block:: python\n\n    booster = result[\"booster\"]\n    print(booster.best_iteration)\n    best_model = booster[: booster.best_iteration]\n\n\n*******************\nOther customization\n*******************\n\nXGBoost dask interface accepts other advanced features found in single node Python\ninterface, including callback functions, custom evaluation metric and objective:\n\n.. code-block:: python\n\n    def eval_error_metric(predt, dtrain: xgb.DMatrix):\n        label = dtrain.get_label()\n        r = np.zeros(predt.shape)\n        gt = predt > 0.5\n        r[gt] = 1 - label[gt]\n        le = predt <= 0.5\n        r[le] = label[le]\n        return 'CustomErr', np.sum(r)\n\n    # custom callback\n    early_stop = xgb.callback.EarlyStopping(\n        rounds=early_stopping_rounds,\n        metric_name=\"CustomErr\",\n        data_name=\"Train\",\n        save_best=True,\n    )\n\n    booster = dxgb.train(\n        client,\n        params={\n            \"objective\": \"binary:logistic\",\n            \"eval_metric\": [\"error\", \"rmse\"],\n            \"tree_method\": \"hist\",\n        },\n        dtrain=D_train,\n        evals=[(D_train, \"Train\"), (D_valid, \"Valid\")],\n        feval=eval_error_metric,  # custom evaluation metric\n        num_boost_round=100,\n        callbacks=[early_stop],\n    )\n\n**********************\nHyper-parameter tuning\n**********************\n\nSee https://github.com/coiled/dask-xgboost-nyctaxi for a set of examples of using XGBoost\nwith dask and optuna.\n\n\n.. _ltr-dask:\n\n****************\nLearning to Rank\n****************\n\n  .. versionadded:: 3.0.0\n\n  .. note::\n\n     Position debiasing is not yet supported.\n\nThere are two operation modes in the Dask learning to rank for performance reasons. The\ndifference is whether a distributed global sort is needed. Please see :ref:`ltr-dist` for\nhow ranking works with distributed training in general. Below we will discuss some of the\nDask-specific features.\n\nFirst, if you use the :py:class:`~xgboost.dask.DaskQuantileDMatrix` interface or the\n:py:class:`~xgboost.dask.DaskXGBRanker` with ``allow_group_split`` set to ``True``,\nXGBoost will try to sort and group the samples for each worker based on the query ID. This\nmode tries to skip the global sort and sort only worker-local data, and hence no\ninter-worker data shuffle. Please note that even worker-local sort is costly, particularly\nin terms of memory usage as there's no spilling when\n:py:meth:`~pandas.DataFrame.sort_values` is used, and we need to concatenate the\ndata. XGBoost first checks whether the QID is already sorted before actually performing\nthe sorting operation. One can choose this if the query groups are relatively consecutive,\nmeaning most of the samples within a query group are close to each other and are likely to\nbe resided to the same worker. Don't use this if you have performed a random shuffle on\nyour data.\n\nIf the input data is random, then there's no way we can guarantee most of data within the\nsame group being in the same worker. For large query groups, this might not be an\nissue. But for small query groups, it's possible that each worker gets only one or two\nsamples from their group for all groups, which can lead to disastrous performance. In that\ncase, we can partition the data according to query group, which is the default behavior of\nthe :py:class:`~xgboost.dask.DaskXGBRanker` unless the ``allow_group_split`` is set to\n``True``. This mode performs a sort and a groupby on the entire dataset in addition to an\nencoding operation for the query group IDs. Along with partition fragmentation, this\noption can lead to slow performance. See\n:ref:`sphx_glr_python_dask-examples_dask_learning_to_rank.py` for a worked example.\n\n.. _tracker-ip:\n\n***************\nTroubleshooting\n***************\n\n\n- In some environments XGBoost might fail to resolve the IP address of the scheduler, a\n  symptom is user receiving ``OSError: [Errno 99] Cannot assign requested address`` error\n  during training.  A quick workaround is to specify the address explicitly.  To do that\n  the collective :py:class:`~xgboost.collective.Config` is used:\n\n  .. versionadded:: 3.0.0\n\n.. code-block:: python\n\n    import dask\n    from distributed import Client\n    from xgboost import dask as dxgb\n    from xgboost.collective import Config\n\n    # let xgboost know the scheduler address\n    coll_cfg = Config(retry=1, timeout=20, tracker_host_ip=\"10.23.170.98\", tracker_port=0)\n\n    with Client(scheduler_file=\"sched.json\") as client:\n        reg = dxgb.DaskXGBRegressor(coll_cfg=coll_cfg)\n\n- Please note that XGBoost requires a different port than dask. By default, on a unix-like\n  system XGBoost uses the port 0 to find available ports, which may fail if a user is\n  running in a restricted docker environment. In this case, please open additional ports\n  in the container and specify it as in the above snippet.\n\n- If you encounter a NCCL system error while training with GPU enabled, which usually\n  includes the error message `NCCL failure: unhandled system error`, you can specify its\n  network configuration using one of the environment variables listed in the `NCCL\n  document <https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html>`__ such as\n  the ``NCCL_SOCKET_IFNAME``. In addition, you can use ``NCCL_DEBUG`` to obtain debug\n  logs.\n\n- If NCCL fails to initialize in a container environment, it might be caused by limited\n  system shared memory. With docker, one can try the flag: `--shm-size=4g`.\n\n- MIG (Multi-Instance GPU) is not yet supported by NCCL. You will receive an error message\n  that includes `Multiple processes within a communication group ...` upon initialization.\n\n.. _nccl-load:\n\n- Starting from version 2.1.0, to reduce the size of the binary wheel, the XGBoost package\n  (installed using pip) loads NCCL from the environment instead of bundling it\n  directly. This means that if you encounter an error message like\n  \"Failed to load nccl ...\", it indicates that NCCL is not installed or properly\n  configured in your environment.\n\n  To resolve this issue, you can install NCCL using pip:\n\n  .. code-block:: sh\n\n    pip install nvidia-nccl-cu12 # (or with any compatible CUDA version)\n\n  The default conda installation of XGBoost should not encounter this error. If you are\n  using a customized XGBoost, please make sure one of the followings is true:\n\n  + XGBoost is NOT compiled with the `USE_DLOPEN_NCCL` flag.\n  + The `dmlc_nccl_path` parameter is set to full NCCL path when initializing the collective.\n\n  Here are some additional tips for troubleshooting NCCL dependency issues:\n\n  + Check the NCCL installation path and verify that it's installed correctly. We try to\n    find NCCL by using ``from nvidia.nccl import lib`` in Python when XGBoost is installed\n    using pip.\n  + Ensure that you have the correct CUDA version installed. NCCL requires a compatible\n    CUDA version to function properly.\n  + If you are not using distributed training with XGBoost and yet see this error, please\n    open an issue on GitHub.\n  + If you continue to encounter NCCL dependency issues, please open an issue on GitHub.\n\n************\nIPv6 Support\n************\n\n.. versionadded:: 1.7.0\n\nXGBoost has initial IPv6 support for the dask interface on Linux. Due to most of the\ncluster support for IPv6 is partial (dual stack instead of IPv6 only), we require\nadditional user configuration similar to :ref:`tracker-ip` to help XGBoost obtain the\ncorrect address information:\n\n.. code-block:: python\n\n    import dask\n    from distributed import Client\n    from xgboost import dask as dxgb\n    # let xgboost know the scheduler address, use the same bracket format as dask.\n    with dask.config.set({\"xgboost.scheduler_address\": \"[fd20:b6f:f759:9800::]\"}):\n        with Client(\"[fd20:b6f:f759:9800::]\") as client:\n            reg = dxgb.DaskXGBRegressor(tree_method=\"hist\")\n\n\nWhen GPU is used, XGBoost employs `NCCL <https://developer.nvidia.com/nccl>`_ as the\nunderlying communication framework, which may require some additional configuration via\nenvironment variable depending on the setting of the cluster. Please note that IPv6\nsupport is Unix only.\n\n\n******************************\nLogging the evaluation results\n******************************\n\nBy default, the Dask interface prints evaluation results in the scheduler process. This\nmakes it difficult for a user to monitor training progress. We can define custom\nevaluation monitors using callback functions. See\n:ref:`sphx_glr_python_dask-examples_forward_logging.py` for a worked example on how to\nforward the logs to the client process. In the example, there are two potential solutions\nusing Dask builtin methods, including :py:meth:`distributed.Client.forward_logging` and\n:py:func:`distributed.print`. Both of them have some caveats but can be a good starting\npoint for developing more sophisticated methods like writing to files.\n\n\n*****************************************************************************\nWhy is the initialization of ``DaskDMatrix``  so slow and throws weird errors\n*****************************************************************************\n\nThe dask API in XGBoost requires construction of ``DaskDMatrix``.  With the Scikit-Learn\ninterface, ``DaskDMatrix`` is implicitly constructed for all input data during the ``fit`` or\n``predict`` steps.  You might have observed that ``DaskDMatrix`` construction can take large amounts of time,\nand sometimes throws errors that don't seem to be relevant to ``DaskDMatrix``.  Here is a\nbrief explanation for why.  By default most dask computations are `lazily evaluated\n<https://docs.dask.org/en/latest/user-interfaces.html#laziness-and-computing>`_, which\nmeans that computation is not carried out until you explicitly ask for a result by, for example,\ncalling ``compute()``.  See the previous link for details in dask, and `this wiki\n<https://en.wikipedia.org/wiki/Lazy_evaluation>`_ for information on the general concept of lazy evaluation.\nThe ``DaskDMatrix`` constructor forces lazy computations to be evaluated, which means it's\nwhere all your earlier computation actually being carried out, including operations like\n``dd.read_csv()``.  To isolate the computation in ``DaskDMatrix`` from other lazy\ncomputations, one can explicitly wait for results of input data before constructing a ``DaskDMatrix``.\nAlso dask's `diagnostics dashboard <https://distributed.dask.org/en/latest/web.html>`_ can be used to\nmonitor what operations are currently being performed.\n\n*******************\nReproducible Result\n*******************\n\nIn a single node mode, we can always expect the same training result between runs as along\nas the underlying platforms are the same. However, it's difficult to obtain reproducible\nresult in a distributed environment, since the tasks might get different machine\nallocation or have different amount of available resources during different\nsessions. There are heuristics and guidelines on how to achieve it but no proven method\nfor guaranteeing such deterministic behavior. The Dask interface in XGBoost tries to\nprovide reproducible result with best effort. This section highlights some known criteria\nand try to share some insights into the issue.\n\nThere are primarily two different tasks for XGBoost the carry out, training and\ninference. Inference is reproducible given the same software and hardware along with the\nsame run-time configurations. The remaining of this section will focus on training.\n\nMany of the challenges come from the fact that we are using approximation algorithms, The\nsketching algorithm used to find histogram bins is an approximation to the exact quantile\nalgorithm, the `AUC` metric in a distributed environment is an approximation to the exact\n`AUC` score, and floating-point number is an approximation to real number. Floating-point\nis an issue as its summation is not associative, meaning :math:`(a + b) + c` does not\nnecessarily equal to :math:`a + (b + c)`, even though this property holds true for real\nnumber. As a result, whenever we change the order of a summation, the result can\ndiffer. This imposes the requirement that, in order to have reproducible output from\nXGBoost, the entire pipeline needs to be reproducible.\n\n- The software stack is the same for each runs. This goes without saying. XGBoost might\n  generate different outputs between different versions. This is expected as we might\n  change the default value of hyper-parameter, or the parallel strategy that generates\n  different floating-point result. We guarantee the correctness the algorithms, but there\n  are lots of wiggle room for the final output. The situation is similar for many\n  dependencies, for instance, the random number generator might differ from platform to\n  platform.\n\n- The hardware stack is the same for each runs. This includes the number of workers, and\n  the amount of available resources on each worker. XGBoost can generate different results\n  using different number of workers. This is caused by the approximation issue mentioned\n  previously.\n\n- Similar to the hardware constraint, the network topology is also a factor in final\n  output. If we change topology the workers might be ordered differently, leading to\n  different ordering of floating-point operations.\n\n- The random seed used in various place of the pipeline.\n\n- The partitioning of data needs to be reproducible. This is related to the available\n  resources on each worker. Dask might partition the data differently for each run\n  according to its own scheduling policy. For instance, if there are some additional tasks\n  in the cluster while you are running the second training session for XGBoost, some of\n  the workers might have constrained memory and Dask may not push the training data for\n  XGBoost to that worker. This change in data partitioning can lead to different output\n  models. If you are using a shared Dask cluster, then the result is likely to vary\n  between runs.\n\n- The operations performed on dataframes need to be reproducible. There are some\n  operations like `DataFrame.merge` not being deterministic on parallel hardwares like GPU\n  where the order of the index might differ from run to run.\n\nIt's expected to have different results when training the model in a distributed\nenvironment than training the model using a single node due to aforementioned criteria.\n\n\n************\nMemory Usage\n************\n\nHere are some practices on reducing memory usage with dask and xgboost.\n\n- In a distributed work flow, data is best loaded by dask collections directly instead of\n  loaded by client process.  When loading with client process is unavoidable, use\n  ``client.scatter`` to distribute data from client process to workers.  See [2] for a\n  nice summary.\n\n- When using GPU input, like dataframe loaded by ``dask_cudf``, you can try\n  :py:class:`xgboost.dask.DaskQuantileDMatrix` as a drop in replacement for ``DaskDMatrix``\n  to reduce overall memory usage.  See\n  :ref:`sphx_glr_python_dask-examples_gpu_training.py` for an example.\n\n- Use in-place prediction when possible.\n\nReferences:\n\n#. https://github.com/dask/dask/issues/6833\n#. https://stackoverflow.com/questions/45941528/how-to-efficiently-send-a-large-numpy-array-to-the-cluster-with-dask-array\n"
  },
  {
    "path": "doc/tutorials/external_memory.rst",
    "content": "#####################################\nUsing XGBoost External Memory Version\n#####################################\n\n**Contents**\n\n.. contents::\n  :backlinks: none\n  :local:\n\n\n********\nOverview\n********\n\nWhen working with large datasets, training XGBoost models can be challenging as the entire\ndataset needs to be loaded into the main memory. This can be costly and sometimes\ninfeasible.\n\nExternal memory training is sometimes called out-of-core training. It refers to the\ncapability that XGBoost can optionally cache data in a location external to the main\nprocessor, be it CPU or GPU. XGBoost doesn't support network file systems by itself. As a\nresult, for CPU, the external memory usually refers to a harddrive. And for GPU, it refers\nto either the host memory or a harddrive.\n\nUsers can define a custom iterator to load data in chunks for running XGBoost\nalgorithms. External memory can be used for training and prediction, but training is the\nprimary use case and it will be our focus in this tutorial. For prediction and evaluation,\nusers can iterate through the data themselves, whereas training requires the entire\ndataset to be loaded into the memory. During model training, XGBoost fetches the cache in\nbatches to construct the decision trees, hence avoiding loading the entire dataset into\nthe main memory and achieve better vertical scaling (scaling within the same node).\n\nSignificant progress was made in the 3.0 release for the GPU implementation. We will\nintroduce the difference between CPU and GPU in the following sections.\n\n.. note::\n\n   Training on data from external memory is not supported by the ``exact`` tree method. We\n   recommend using the default ``hist`` tree method for performance reasons.\n\n.. note::\n\n   The feature is considered experimental but ready for public testing in 3.0. Vector-leaf\n   is not yet supported.\n\nThe external memory support has undergone multiple development iterations. See below\nsections for a brief history.\n\n\n*************\nData Iterator\n*************\n\nTo start using the external memory, users need define a data iterator. The data iterator\ninterface was added to the Python and C interfaces in 1.5, and to the R interface in\n3.0.0. Like the :py:class:`~xgboost.QuantileDMatrix` with :py:class:`~xgboost.DataIter`,\nXGBoost loads data batch-by-batch using the custom iterator supplied by the user. However,\nunlike the :py:class:`~xgboost.QuantileDMatrix`, external memory does not concatenate the\nbatches. Instead, it caches all batches in the external memory and fetch them\non-demand. Go to the end of the document to see a comparison between\n:py:class:`~xgboost.QuantileDMatrix` and the external memory version of\n:py:class:`~xgboost.ExtMemQuantileDMatrix`.\n\nSome examples are in the ``demo`` directory for a quick start. To enable external memory\ntraining, the custom data iterator needs to have two class methods: ``next`` and\n``reset``.\n\n.. code-block:: python\n\n    import os\n    from typing import List, Callable\n\n    import numpy as np\n    import xgboost\n\n    class Iterator(xgboost.DataIter):\n        \"\"\"A custom iterator for loading files in batches.\"\"\"\n\n        def __init__(\n            self, device: Literal[\"cpu\", \"cuda\"], file_paths: List[Tuple[str, str]]\n        ) -> None:\n            self.device = device\n\n            self._file_paths = file_paths\n            self._it = 0\n            # XGBoost will generate some cache files under the current directory with the\n            # prefix \"cache\"\n            super().__init__(cache_prefix=os.path.join(\".\", \"cache\"))\n\n        def load_file(self) -> Tuple[np.ndarray, np.ndarray]:\n            \"\"\"Load a single batch of data.\"\"\"\n            X_path, y_path = self._file_paths[self._it]\n            # When the `ExtMemQuantileDMatrix` is used, the device must match. GPU cannot\n            # consume CPU input data and vice-versa.\n            if self.device == \"cpu\":\n                X = np.load(X_path)\n                y = np.load(y_path)\n            else:\n                import cupy as cp\n\n                X = cp.load(X_path)\n                y = cp.load(y_path)\n\n            assert X.shape[0] == y.shape[0]\n            return X, y\n\n        def next(self, input_data: Callable) -> bool:\n            \"\"\"Advance the iterator by 1 step and pass the data to XGBoost.  This function\n            is called by XGBoost during the construction of ``DMatrix``\n\n            \"\"\"\n            if self._it == len(self._file_paths):\n                # return False to let XGBoost know this is the end of iteration\n                return False\n\n            # input_data is a keyword-only function passed in by XGBoost and has the similar\n            # signature to the ``DMatrix`` constructor.\n            X, y = self.load_file()\n            input_data(data=X, label=y)\n            self._it += 1\n            return True\n\n        def reset(self) -> None:\n            \"\"\"Reset the iterator to its beginning\"\"\"\n            self._it = 0\n\nAfter defining the iterator, we can to pass it into the :py:class:`~xgboost.DMatrix` or\nthe :py:class:`~xgboost.ExtMemQuantileDMatrix` constructor:\n\n.. code-block:: python\n\n  it = Iterator(device=\"cpu\", file_paths=[\"file_0.npy\", \"file_1.npy\", \"file_2.npy\"])\n\n  # Use the ``ExtMemQuantileDMatrix`` for the hist tree method, recommended.\n  Xy = xgboost.ExtMemQuantileDMatrix(it)\n  booster = xgboost.train({\"tree_method\": \"hist\"}, Xy)\n\n  # The ``approx`` tree method also works, but with lower performance and cannot be used\n  # with the quantile DMatrix.\n  Xy = xgboost.DMatrix(it)\n  booster = xgboost.train({\"tree_method\": \"approx\"}, Xy)\n\nThe above snippet is a simplified version of :ref:`sphx_glr_python_examples_external_memory.py`.\nFor an example in C, please see ``demo/c-api/external-memory/``. The iterator is the\ncommon interface for using external memory with XGBoost, you can pass the resulting\n:py:class:`~xgboost.DMatrix` object for training, prediction, and evaluation.\n\nThe :py:class:`~xgboost.ExtMemQuantileDMatrix` is an external memory version of the\n:py:class:`~xgboost.QuantileDMatrix`. These two classes are specifically designed for the\n``hist`` tree method for reduced memory usage and data loading overhead. See respective\nreferences for more info.\n\nIt is important to set the batch size based on the memory available. A good starting point\nfor CPU is to set the batch size to 10GB per batch if you have 64GB of memory. It is *not*\nrecommended to set small batch sizes like 32 samples per batch, as this can severely hurt\nperformance in gradient boosting. See below sections for information about the GPU version\nand other best practices.\n\n**********************************\nGPU Version (GPU Hist tree method)\n**********************************\n\nExternal memory is supported by GPU algorithms (i.e., when ``device`` is set to\n``cuda``). Starting with 3.0, the default GPU implementation is similar to what the CPU\nversion does. It also supports the use of :py:class:`~xgboost.ExtMemQuantileDMatrix` when\nthe ``hist`` tree method is employed (default). For a GPU device, the main memory is the\ndevice memory, whereas the external memory can be either a disk or the CPU memory. XGBoost\nstages the cache on CPU memory by default. Users can change the backing storage to disk by\nspecifying the ``on_host`` parameter in the :py:class:`~xgboost.DataIter`. However, using\nthe disk is not recommended as it's likely to make the GPU slower than the CPU. The option\nis here for experimentation purposes only. In addition,\n:py:class:`~xgboost.ExtMemQuantileDMatrix` parameters ``min_cache_page_bytes``, and\n``max_quantile_batches`` can help control the data placement and memory usage.\n\nInputs to the :py:class:`~xgboost.ExtMemQuantileDMatrix` (through the iterator) must be on\nthe GPU. It's crucial to use an asynchronous memory pool for all memory allocations when\ntraining with external memory. XGBoost relies on the asynchronous memory pool to reduce\nthe overhead of data fetching. There are two options for setting up the memory pool:\n\n- **CUDA Async Pool**: Uses the CUDA driver's built-in async memory pool. This option\n  doesn't require any additional dependencies. It's the same as using the\n  `CudaAsyncMemoryResource` from RMM (see below).\n- **RMM Pool**: Uses `RAPIDS Memory Manager (RMM) <https://github.com/rapidsai/rmm>`__\n  with an asynchronous memory resource. This option requires RMM to be installed and\n  XGBoost to be built with RMM support.\n\nChoose the one that best fits your use case.\n\n=====================\nUsing CUDA Async Pool\n=====================\n\nThe CUDA async pool uses the driver's default memory pool with a configured release\nthreshold. See :ref:`global_config` for the parameter `use_cuda_async_pool`.\n\n  .. versionadded:: 3.2.0\n\n  .. warning:: This is an experimental feature and is subject to change without\n               notice. Windows is not supported yet.\n\n.. code-block:: python\n\n    import cupy as cp\n    import cuda.bindings.driver as driver\n    import cuda.bindings.runtime as cudart\n    from cupy.cuda import MemoryAsyncPool\n\n    # Get the default memory pool and configure the release threshold\n    status, dft_pool = cudart.cudaDeviceGetDefaultMemPool(0)\n    # Set the release threshold to 90% of total device memory\n    status, free, total = cudart.cudaMemGetInfo()\n    v = driver.cuuint64_t(int(total * 0.9))\n    cudart.cudaMemPoolSetAttribute(\n        dft_pool,\n        cudart.cudaMemPoolAttr.cudaMemPoolAttrReleaseThreshold,\n        v,\n    )\n    # Set the allocator for cupy as well.\n    cp.cuda.set_allocator(MemoryAsyncPool().malloc)\n\n    # Make sure XGBoost is using the CUDA async pool for all allocations.\n    with xgboost.config_context(use_cuda_async_pool=True):\n        # Construct the iterators for ExtMemQuantileDMatrix\n        # ...\n        # Build the ExtMemQuantileDMatrix and start training\n        Xy_train = xgboost.ExtMemQuantileDMatrix(it_train, max_bin=n_bins)\n        # Use the training DMatrix as a reference\n        Xy_valid = xgboost.ExtMemQuantileDMatrix(it_valid, max_bin=n_bins, ref=Xy_train)\n        booster = xgboost.train(\n            {\n                \"tree_method\": \"hist\",\n                \"max_bin\": n_bins,\n                \"device\": device,\n            },\n            Xy_train,\n            num_boost_round=n_rounds,\n            evals=[(Xy_train, \"Train\"), (Xy_valid, \"Valid\")]\n        )\n\n==============\nUsing RMM Pool\n==============\n\nAlternatively, you can use RMM with an asynchronous memory resource. If XGBoost is not\nbuilt with RMM support, a warning will be raised:\n\n.. code-block:: python\n\n    import cupy as cp\n    import rmm\n    from rmm.allocators.cupy import rmm_cupy_allocator\n\n    # We use the pool memory resource here for simplicity, you can also try the\n    # `ArenaMemoryResource` for improved memory fragmentation handling.\n    mr = rmm.mr.PoolMemoryResource(rmm.mr.CudaAsyncMemoryResource())\n    rmm.mr.set_current_device_resource(mr)\n    # Set the allocator for cupy as well.\n    cp.cuda.set_allocator(rmm_cupy_allocator)\n\n    # Make sure XGBoost is using RMM for all allocations.\n    with xgboost.config_context(use_rmm=True):\n        # Construct the iterators for ExtMemQuantileDMatrix\n        # ...\n        # Build the ExtMemQuantileDMatrix and start training\n        Xy_train = xgboost.ExtMemQuantileDMatrix(it_train, max_bin=n_bins)\n        # Use the training DMatrix as a reference\n        Xy_valid = xgboost.ExtMemQuantileDMatrix(it_valid, max_bin=n_bins, ref=Xy_train)\n        booster = xgboost.train(\n            {\n                \"tree_method\": \"hist\",\n                \"max_depth\": 6,\n                \"max_bin\": n_bins,\n                \"device\": device,\n            },\n            Xy_train,\n            num_boost_round=n_rounds,\n            evals=[(Xy_train, \"Train\"), (Xy_valid, \"Valid\")]\n        )\n\nIn addition, the open source `NVIDIA Linux driver\n<https://developer.nvidia.com/blog/nvidia-transitions-fully-towards-open-source-gpu-kernel-modules/>`__\nis required for ``Heterogeneous memory management (HMM)`` support. Usually, users need not\nto change :py:class:`~xgboost.ExtMemQuantileDMatrix` parameters like\n``min_cache_page_bytes``, they are automatically configured based on the device and don't\nchange model accuracy. However, the ``max_quantile_batches`` can be useful if\n:py:class:`~xgboost.ExtMemQuantileDMatrix` is running out of device memory during\nconstruction, see :py:class:`~xgboost.QuantileDMatrix` and the following sections for more\ninfo. Currently, we focus on devices with ``NVLink-C2C`` support for GPU-based external\nmemory support.\n\n==========\nNVLink-C2C\n==========\n\nThe newer NVIDIA platforms like `Grace-Hopper\n<https://www.nvidia.com/en-us/data-center/grace-hopper-superchip/>`__ use `NVLink-C2C\n<https://www.nvidia.com/en-us/data-center/nvlink-c2c/>`__, which facilitates a fast\ninterconnect between the CPU and the GPU. With the host memory serving as the data cache,\nXGBoost can retrieve data with significantly lower overhead. When the input data is dense,\nthere's minimal to no performance loss for training, except for the initial construction\nof the :py:class:`~xgboost.ExtMemQuantileDMatrix`.  The initial construction iterates\nthrough the input data twice, as a result, the most significant overhead compared to\nin-core training is one additional data read when the data is dense. Please note that\nthere are multiple variants of the platform and they come with different C2C\nbandwidths. During initial development of the feature, we used the LPDDR5 480G version,\nwhich has about 350GB/s bandwidth for host to device transfer. When choosing the variant\nfor training XGBoost models, one should pay extra attention to the C2C bandwidth.\n\nHere we provide a simple example as a starting point for training with external memory. We\nused this example for one of the benchmarks. To train a model with `2 ^ 29` 32-bit\nfloating point samples, `512` features (total 1TB) on a GH200 (a H200 GPU connected to a\nGrace CPU by a chip-to-chip link) system. One can start with:\n- Evenly divide the data into 128 batches with 8GB per batch.\n- Define a custom iterator as previously described.\n- Set the `max_quantile_batches` parameter of the :py:class:`~xgboost.ExtMemQuantileDMatrix` to 32 (256GB per sub-stream for quantization). Load the data.\n- Start training with ``device=cuda``.\n\nTo run experiments on these platforms, the open source `NVIDIA Linux driver\n<https://developer.nvidia.com/blog/nvidia-transitions-fully-towards-open-source-gpu-kernel-modules/>`__\nwith version ``>=565.47`` is required, it should come with CTK 12.7 and later\nversions. Lastly, there's a known issue with Linux 6.11 that can lead to CUDA host memory\nallocation failure with an ``invalid argument`` error.\n\n.. _extmem-adaptive-cache:\n\n==============\nAdaptive Cache\n==============\n\nStarting with 3.1, XGBoost introduces an adaptive cache for GPU-based external memory\ntraining. The feature helps split the data cache into a host cache and a device cache. By\nkeeping a portion of the cache on the GPU, we can reduce the amount of data transfer\nduring training when there's sufficient amount of GPU memory. The feature can be\ncontrolled by the ``cache_host_ratio`` parameter of the\n:py:class:`xgboost.ExtMemQuantileDMatrix`. Unless explicitly specified, the ratio is\nautomatically estimated based on device memory size and the size of the dataset.\n\nHowever, this parameter increases memory fragmentation as XGBoost needs large memory pages\nwith irregular sizes. As a result, you might see out of memory error after the\nconstruction of the ``DMatrix`` but before the actual training begins.\n\nFor reference, we tested the adaptive cache with a 128GB (512 features) dense 32bit\nfloating dataset using a NVIDIA A6000 GPU, which comes with 48GB device memory. The\n``cache_host_ratio`` was estimated to be about 0.3, meaning about 30 percent of the\nquantized cache was on the host and rest of 70 percent was actually in-core. Given this\nratio, the overhead is minimal. However, the estimated ratio increases as the data size\ngrows.\n\n================================\nNon-Uniform Memory Access (NUMA)\n================================\n\nOn multi-socket systems, `NUMA\n<https://en.wikipedia.org/wiki/Non-uniform_memory_access>`__ helps optimize data access by\nprioritizing memory that is local to each socket.  On these systems, it's essential to set\nthe correct affinity to reduce the overhead of cross-socket data access. Since the out of\ncore training stages the data cache on the host and trains the model using a GPU, the\ntraining performance is particularly sensitive to the data read bandwidth. To provide some\ncontext, on a GB200 machine, accessing the wrong NUMA node from a GPU can reduce the C2C\nbandwidth by half. Even if you are not using distributed training, you should still pay\nattention to NUMA control since there's no guarantee that your process will have the\ncorrect configuration.\n\nTo configure the NUMA binding from command line on Linux, one can use the ``numactl`` or\nthe ``hwloc-bind``:\n\n.. code-block:: sh\n\n    numactl --membind=${NODEID} --cpunodebind=${NODEID} ./myapp\n\n\nTo obtain the node ID, you can check the machine topology via ``nvidia-smi``:\n\n.. code-block:: sh\n\n    nvidia-smi topo -m\n\nThe column ``NUMA Affinity`` lists the NUMA node ID for each GPU. In the example output\nshown below, the `GPU0` is associated with the `0` node ID::\n\n            GPU0    GPU1    NIC0    NIC1    NIC2    NIC3    CPU Affinity    NUMA Affinity   GPU NUMA ID\n    GPU0     X      NV18    NODE    NODE    NODE    SYS     0-71            0               2\n    GPU1    NV18     X      SYS     SYS     SYS     NODE    72-143          1               10\n    NIC0    NODE    SYS      X      PIX     NODE    SYS\n    NIC1    NODE    SYS     PIX      X      NODE    SYS\n    NIC2    NODE    SYS     NODE    NODE     X      SYS\n    NIC3    SYS     NODE    SYS     SYS     SYS      X\n\nAlternatively, one can also use the ``hwloc`` command line interface, please make sure the\nstrict flag is used:\n\n.. code-block:: sh\n\n    hwloc-bind --strict --membind node:${NODEID} --cpubind node:${NODEID} ./myapp\n\n\nBoth projects provide a programming interface for configuring NUMA bindings within\napplications. See :ref:`sphx_glr_python_examples_distributed_extmem_basic.py` for a\ncomplete example of using ``pyhwloc`` in a distributed training setting.\n\n********************\nDistributed Training\n********************\n\nDistributed training is similar to in-core learning, but the work for framework\nintegration is still on-going. See :ref:`sphx_glr_python_examples_distributed_extmem_basic.py`\nfor an example for using the communicator to build a simple pipeline. Since users can\ndefine their custom data loader, it's unlikely that existing distributed frameworks\ninterface in XGBoost can meet all the use cases, the example can be a starting point for\nusers who have custom infrastructure.\n\n**************\nBest Practices\n**************\n\nIn previous sections, we demonstrated how to train a tree-based model with data residing\non an external memory. In addition, we made some recommendations for batch size and\nNUMA. Here are some other configurations we find useful. The external memory feature\ninvolves iterating through data batches stored in a cache during tree construction. For\noptimal performance, we recommend using the ``grow_policy=depthwise`` setting, which\nallows XGBoost to build an entire layer of tree nodes with only a few batch\niterations. Conversely, using the ``lossguide`` policy requires XGBoost to iterate over\nthe data set for each tree node, resulting in significantly slower performance (tree size\nis exponential to the depth).\n\nIn addition, the ``hist`` tree method should be preferred over the ``approx`` tree method\nas the former doesn't recreate the histogram bins for every iteration. Creating the\nhistogram bins requires loading the raw input data, which is prohibitively expensive. The\n:py:class:`~xgboost.ExtMemQuantileDMatrix` designed for the ``hist`` tree method can speed\nup the initial data construction and the evaluation significantly for external memory.\n\nSince the external memory implementation focuses on training where XGBoost needs to access\nthe entire dataset, only the ``X`` is divided into batches while everything else is\nconcatenated. As a result, it's recommended for users to define their own management code\nto iterate through the data for inference, especially for SHAP value computation. The size\nof SHAP matrix can be larger than the feature matrix ``X``, making external memory in\nXGBoost less effective.\n\nWhen external memory is used, the performance of CPU training is limited by disk IO\n(input/output) speed. This means that the disk IO speed primarily determines the training\nspeed. Similarly, PCIe bandwidth limits the GPU performance, assuming the CPU memory is\nused as a cache and address translation services (ATS) is unavailable. During development,\nwe observed that typical data transfer in XGBoost with PCIe4x16 has about 24GB/s bandwidth\nand about 42GB/s with PCIe5, which is significantly lower than the GPU processing\nperformance. Whereas with a C2C-enabled machine, the performance of data transfer and\nprocessing in training are close to each other.\n\nRunning inference is much less computation-intensive than training and, hence, much\nfaster. As a result, the performance bottleneck of inference is back to data transfer. For\nGPU, the time it takes to read the data from host to device completely determines the time\nit takes to run inference, even if a C2C link is available.\n\n.. code-block:: python\n\n    Xy_train = xgboost.ExtMemQuantileDMatrix(it_train, max_bin=n_bins)\n    Xy_valid = xgboost.ExtMemQuantileDMatrix(it_valid, max_bin=n_bins, ref=Xy_train)\n\nIn addition, since the GPU implementation relies on asynchronous memory pool, memory\nfragmentation can occur regardless of whether you use the CUDA async pool or RMM.  You\nmight want to start the training with a fresh pool instead of starting training right\nafter the ETL process. If you run into out-of-memory errors and you are convinced that the\npool is not full yet (pool memory usage can be profiled with ``nsight-system``), consider\nusing the :py:class:`~rmm.mr.ArenaMemoryResource` memory resource with RMM, or using the\nCUDA asynchronous pool with the latest NVIDIA kernel driver.\n\nDuring CPU benchmarking, we used an NVMe connected to a PCIe-4 slot. Other types of\nstorage can be too slow for practical usage. However, your system will likely perform some\ncaching to reduce the overhead of the file read. See the following sections for remarks.\n\n.. _ext_remarks:\n\n*******\nRemarks\n*******\n\nWhen using external memory with XGBoost, data is divided into smaller chunks so that only\na fraction of it needs to be stored in memory at any given time. It's important to note\nthat this method only applies to the predictor data (``X``), while other data, like labels\nand internal runtime structures are concatenated. This means that memory reduction is most\neffective when dealing with wide datasets where ``X`` is significantly larger in size\ncompared to other data like ``y``, while it has little impact on slim datasets.\n\nAs one might expect, fetching data on demand puts significant pressure on the storage\ndevice. Today's computing devices can process way more data than storage devices can read\nin a single unit of time. The ratio is in the order of magnitudes. A GPU is capable of\nprocessing hundreds of Gigabytes of floating-point data in a split second. On the other\nhand, a four-lane NVMe storage connected to a PCIe-4 slot usually has about 6GB/s of data\ntransfer rate. As a result, the training is likely to be severely bounded by your storage\ndevice. Before adopting the external memory solution, some back-of-envelop calculations\nmight help you determine its viability. For instance, if your NVMe drive can transfer 4GB\n(a reasonably practical number) of data per second, and you have a 100GB of data in a\ncompressed XGBoost cache (corresponding to a dense float32 numpy array with 200GB, give or\ntake). A tree with depth 8 needs at least 16 iterations through the data when the\nparameter is optimal. You need about 14 minutes to train a single tree without accounting\nfor some other overheads and assume the computation overlaps with the IO. If your dataset\nhappens to have a TB-level size, you might need thousands of trees to get a generalized\nmodel. These calculations can help you get an estimate of the expected training time.\n\nHowever, sometimes, we can ameliorate this limitation. One should also consider that the\nOS (mainly talking about the Linux kernel) can usually cache the data on host memory. It\nonly evicts pages when new data comes in and there's no room left. In practice, at least\nsome portion of the data can persist in the host memory throughout the entire training\nsession. We are aware of this cache when optimizing the external memory fetcher. The\ncompressed cache is usually smaller than the raw input data, especially when the input is\ndense without any missing value. If the host memory can fit a significant portion of this\ncompressed cache, the performance should be decent after initialization. Our development\nso far focuses on following fronts of optimization for external memory:\n\n- Avoid iterating through the data whenever appropriate.\n- If the OS can cache the data, the performance should be close to in-core training.\n- For GPU, the actual computation should overlap with memory copy as much as possible.\n\nStarting with XGBoost 2.0, the CPU implementation of external memory uses ``mmap``. It has\nnot been tested against system errors like disconnected network devices (`SIGBUS`). In the\nface of a bus error, you will see a hard crash and need to clean up the cache files. If\nthe training session might take a long time and you use solutions like NVMe-oF, we\nrecommend checkpointing your model periodically. Also, it's worth noting that most tests\nhave been conducted on Linux distributions.\n\nAnother important point to keep in mind is that creating the initial cache for XGBoost may\ntake some time. The interface to external memory is through custom iterators, which we can\nnot assume to be thread-safe. Therefore, initialization is performed sequentially. Using\nthe :py:func:`~xgboost.config_context` with `verbosity=2` can give you some information on\nwhat XGBoost is doing during the wait if you don't mind the extra output.\n\n*******************************\nCompared to the QuantileDMatrix\n*******************************\n\nPassing an iterator to the :py:class:`~xgboost.QuantileDMatrix` enables direct\nconstruction of :py:class:`~xgboost.QuantileDMatrix` with data chunks. On the other hand,\nif it's passed to the :py:class:`~xgboost.DMatrix` or the\n:py:class:`~xgboost.ExtMemQuantileDMatrix`, it instead enables the external memory\nfeature. The :py:class:`~xgboost.QuantileDMatrix` concatenates the data in memory after\ncompression and doesn't fetch data during training. On the other hand, the external memory\n:py:class:`~xgboost.DMatrix` (:py:class:`~xgboost.ExtMemQuantileDMatrix`) fetches data\nbatches from external memory on demand. Use the :py:class:`~xgboost.QuantileDMatrix` (with\niterator if necessary) when you can fit most of your data in memory. For many platforms,\nthe training speed can be an order of magnitude faster than external memory.\n\n*************\nBrief History\n*************\n\nFor a long time, external memory support has been an experimental feature and has\nundergone multiple development iterations. Here's a brief summary of major changes:\n\n- Gradient-based sampling was introduced to the GPU hist in 1.1.\n- The iterator interface was introduced in 1.5, along with a major rewrite for the\n  internal framework.\n- 2.0 introduced the use of ``mmap``, along with optimization in XBGoost to enable\n  zero-copy data fetching.\n- 3.0 reworked the GPU implementation to support caching data on the host and disk,\n  introduced the :py:class:`~xgboost.ExtMemQuantileDMatrix` class, added quantile-based\n  objectives support.\n- In addition, we begin support for distributed training in 3.0\n- 3.1 added support for having divided cache pages. One can have part of a cache page in\n  the GPU and the rest of the cache in the host memory. In addition, XGBoost works with\n  the Grace Blackwell hardware decompression engine when data is sparse.\n- The text file cache format has been removed in 3.1.0.\n- The page concatenation option has been removed in 3.2.0.\n"
  },
  {
    "path": "doc/tutorials/feature_interaction_constraint.rst",
    "content": "###############################\nFeature Interaction Constraints\n###############################\n\nThe decision tree is a powerful tool to discover interaction among independent\nvariables (features). Variables that appear together in a traversal path\nare interacting with one another, since the condition of a child node is\npredicated on the condition of the parent node. For example, the highlighted\nred path in the diagram below contains three variables: :math:`x_1`, :math:`x_7`,\nand :math:`x_{10}`, so the highlighted prediction (at the highlighted leaf node)\nis the product of interaction between :math:`x_1`, :math:`x_7`, and\n:math:`x_{10}`.\n\n.. plot::\n  :nofigs:\n\n  from graphviz import Source\n  source = r\"\"\"\n    digraph feature_interaction_illustration1 {\n      graph [fontname = \"helvetica\"];\n      node [fontname = \"helvetica\"];\n      edge [fontname = \"helvetica\"];\n      0 [label=<x<SUB><FONT POINT-SIZE=\"11\">10</FONT></SUB> &lt; -1.5 ?>, shape=box, color=red, fontcolor=red];\n      1 [label=<x<SUB><FONT POINT-SIZE=\"11\">2</FONT></SUB> &lt; 2 ?>, shape=box];\n      2 [label=<x<SUB><FONT POINT-SIZE=\"11\">7</FONT></SUB> &lt; 0.3 ?>, shape=box, color=red, fontcolor=red];\n      3 [label=\"...\", shape=none];\n      4 [label=\"...\", shape=none];\n      5 [label=<x<SUB><FONT POINT-SIZE=\"11\">1</FONT></SUB> &lt; 0.5 ?>, shape=box, color=red, fontcolor=red];\n      6 [label=\"...\", shape=none];\n      7 [label=\"...\", shape=none];\n      8 [label=\"Predict +1.3\", color=red, fontcolor=red];\n      0 -> 1 [labeldistance=2.0, labelangle=45, headlabel=\"Yes/Missing           \"];\n      0 -> 2 [labeldistance=2.0, labelangle=-45,\n              headlabel=\"No\", color=red, fontcolor=red];\n      1 -> 3 [labeldistance=2.0, labelangle=45, headlabel=\"Yes\"];\n      1 -> 4 [labeldistance=2.0, labelangle=-45, headlabel=\"             No/Missing\"];\n      2 -> 5 [labeldistance=2.0, labelangle=-45, headlabel=\"Yes\",\n              color=red, fontcolor=red];\n      2 -> 6 [labeldistance=2.0, labelangle=-45, headlabel=\"           No/Missing\"];\n      5 -> 7;\n      5 -> 8 [color=red];\n    }\n  \"\"\"\n  Source(source, format='png').render('../_static/feature_interaction_illustration1', view=False)\n  Source(source, format='svg').render('../_static/feature_interaction_illustration1', view=False)\n\n.. figure:: ../_static/feature_interaction_illustration1.svg\n   :align: center\n   :figwidth: 80 %\n\nWhen the tree depth is larger than one, many variables interact on\nthe sole basis of minimizing training loss, and the resulting decision tree may\ncapture a spurious relationship (noise) rather than a legitimate relationship\nthat generalizes across different datasets. **Feature interaction constraints**\nallow users to decide which variables are allowed to interact and which are not.\n\nPotential benefits include:\n\n* Better predictive performance from focusing on interactions that work --\n  whether through domain specific knowledge or algorithms that rank interactions\n* Less noise in predictions; better generalization\n* More control to the user on what the model can fit. For example, the user may\n  want to exclude some interactions even if they perform well due to regulatory\n  constraints.\n\n****************\nA Simple Example\n****************\n\nFeature interaction constraints are expressed in terms of groups of variables\nthat are allowed to interact. For example, the constraint\n``[0, 1]`` indicates that variables :math:`x_0` and :math:`x_1` are allowed to\ninteract with each other but with no other variable. Similarly, ``[2, 3, 4]``\nindicates that :math:`x_2`, :math:`x_3`, and :math:`x_4` are allowed to\ninteract with one another but with no other variable. A set of feature\ninteraction constraints is expressed as a nested list, e.g.\n``[[0, 1], [2, 3, 4]]``, where each inner list is a group of indices of features\nthat are allowed to interact with each other.\n\nIn the following diagram, the left decision tree is in violation of the first\nconstraint (``[0, 1]``), whereas the right decision tree complies with both the\nfirst and second constraints (``[0, 1]``, ``[2, 3, 4]``).\n\n.. plot::\n  :nofigs:\n\n  from graphviz import Source\n  source = r\"\"\"\n    digraph feature_interaction_illustration2 {\n      graph [fontname = \"helvetica\"];\n      node [fontname = \"helvetica\"];\n      edge [fontname = \"helvetica\"];\n      0 [label=<x<SUB><FONT POINT-SIZE=\"11\">0</FONT></SUB> &lt; 5.0 ?>, shape=box];\n      1 [label=<x<SUB><FONT POINT-SIZE=\"11\">2</FONT></SUB> &lt; -3.0 ?>, shape=box];\n      2 [label=\"+0.6\"];\n      3 [label=\"-0.4\"];\n      4 [label=\"+1.2\"];\n      0 -> 1 [labeldistance=2.0, labelangle=45, headlabel=\"Yes/Missing           \"];\n      0 -> 2 [labeldistance=2.0, labelangle=-45, headlabel=\"No\"];\n      1 -> 3 [labeldistance=2.0, labelangle=45, headlabel=\"Yes\"];\n      1 -> 4 [labeldistance=2.0, labelangle=-45, headlabel=\"           No/Missing\"];\n    }\n  \"\"\"\n  Source(source, format='png').render('../_static/feature_interaction_illustration2', view=False)\n  Source(source, format='svg').render('../_static/feature_interaction_illustration2', view=False)\n\n.. plot::\n  :nofigs:\n\n  from graphviz import Source\n  source = r\"\"\"\n    digraph feature_interaction_illustration3 {\n      graph [fontname = \"helvetica\"];\n      node [fontname = \"helvetica\"];\n      edge [fontname = \"helvetica\"];\n      0 [label=<x<SUB><FONT POINT-SIZE=\"11\">3</FONT></SUB> &lt; 2.5 ?>, shape=box];\n      1 [label=\"+1.6\"];\n      2 [label=<x<SUB><FONT POINT-SIZE=\"11\">2</FONT></SUB> &lt; -1.2 ?>, shape=box];\n      3 [label=\"+0.1\"];\n      4 [label=\"-0.3\"];\n      0 -> 1 [labeldistance=2.0, labelangle=45, headlabel=\"Yes\"];\n      0 -> 2 [labeldistance=2.0, labelangle=-45, headlabel=\"           No/Missing\"];\n      2 -> 3 [labeldistance=2.0, labelangle=45, headlabel=\"Yes/Missing           \"];\n      2 -> 4 [labeldistance=2.0, labelangle=-45, headlabel=\"No\"];\n    }\n  \"\"\"\n  Source(source, format='png').render('../_static/feature_interaction_illustration3', view=False)\n  Source(source, format='svg').render('../_static/feature_interaction_illustration3', view=False)\n\n.. |fig1| image:: ../_static/feature_interaction_illustration2.svg\n   :scale: 7%\n   :align: middle\n\n.. |fig2| image:: ../_static/feature_interaction_illustration3.svg\n   :scale: 7%\n   :align: middle\n\n+-----------+---------+\n| |fig1|    | |fig2|  |\n+-----------+---------+\n| forbidden | allowed |\n+-----------+---------+\n\n\n****************************************************\nEnforcing Feature Interaction Constraints in XGBoost\n****************************************************\n\nIt is very simple to enforce feature interaction constraints in XGBoost.  Here we will\ngive an example using Python, but the same general idea generalizes to other\nplatforms.\n\nSuppose the following code fits your model without feature interaction constraints:\n\n.. code-block:: python\n\n  model_no_constraints = xgb.train(params, dtrain,\n                                   num_boost_round = 1000, evals = evallist,\n                                   early_stopping_rounds = 10)\n\nThen fitting with feature interaction constraints only requires adding a single\nparameter:\n\n.. code-block:: python\n\n  params_constrained = params.copy()\n  # Use nested list to define feature interaction constraints\n  params_constrained['interaction_constraints'] = '[[0, 2], [1, 3, 4], [5, 6]]'\n  # Features 0 and 2 are allowed to interact with each other but with no other feature\n  # Features 1, 3, 4 are allowed to interact with one another but with no other feature\n  # Features 5 and 6 are allowed to interact with each other but with no other feature\n\n  model_with_constraints = xgb.train(params_constrained, dtrain,\n                                     num_boost_round = 1000, evals = evallist,\n                                     early_stopping_rounds = 10)\n\n**************************\nUsing feature name instead\n**************************\n\nXGBoost's Python and R packages support using feature names instead of feature index for\nspecifying the constraints. Given a data frame with columns ``[\"f0\", \"f1\", \"f2\"]``, the\nfeature interaction constraint can be specified as ``[[\"f0\", \"f2\"]]`` (Python) or\n``list(c(\"f0\", \"f2\"))`` (R, when passing them to function ``xgboost()``).\n\n**************\nAdvanced topic\n**************\n\nThe intuition behind interaction constraints is simple.  Users may have prior knowledge about\nrelations between different features, and encode it as constraints during model\nconstruction.  But there are also some subtleties around specifying constraints.  Take\nthe constraint ``[[1, 2], [2, 3, 4]]`` as an example.  The second feature appears in two\ndifferent interaction sets, ``[1, 2]`` and ``[2, 3, 4]``.  So the union set of features\nallowed to interact with ``2`` is ``{1, 3, 4}``.  In the following diagram, the root splits at\nfeature ``2``.  Because all its descendants should be able to interact with it, all 4 features\nare legitimate split candidates at the second layer. At first sight, this might look like\ndisregarding the specified constraint sets, but it is not.\n\n.. plot::\n  :nofigs:\n\n  from graphviz import Source\n  source = r\"\"\"\n    digraph feature_interaction_illustration4 {\n      graph [fontname = \"helvetica\"];\n      node [fontname = \"helvetica\"];\n      edge [fontname = \"helvetica\"];\n      0 [label=<x<SUB><FONT POINT-SIZE=\"11\">2</FONT></SUB>>, shape=box, color=black, fontcolor=black];\n      1 [label=<x<SUB><FONT POINT-SIZE=\"11\">{1, 2, 3, 4}</FONT></SUB>>, shape=box];\n      2 [label=<x<SUB><FONT POINT-SIZE=\"11\">{1, 2, 3, 4}</FONT></SUB>>, shape=box, color=black, fontcolor=black];\n      3 [label=\"...\", shape=none];\n      4 [label=\"...\", shape=none];\n      5 [label=\"...\", shape=none];\n      6 [label=\"...\", shape=none];\n      0 -> 1;\n      0 -> 2;\n      1 -> 3;\n      1 -> 4;\n      2 -> 5;\n      2 -> 6;\n    }\n  \"\"\"\n  Source(source, format='png').render('../_static/feature_interaction_illustration4', view=False)\n  Source(source, format='svg').render('../_static/feature_interaction_illustration5', view=False)\n\n.. figure:: ../_static/feature_interaction_illustration4.png\n   :align: center\n   :figwidth: 80 %\n\n   ``{1, 2, 3, 4}`` represents the sets of legitimate split features.\n\nThis has lead to some interesting implications of feature interaction constraints.  Take\n``[[0, 1], [0, 1, 2], [1, 2]]`` as another example.  Assuming we have only 3 available\nfeatures in our training datasets for presentation purpose, careful readers might have\nfound out that the above constraint is the same as simply ``[[0, 1, 2]]``.  Since no matter which\nfeature is chosen for split in the root node, all its descendants are allowed to include every\nfeature as legitimate split candidates without violating interaction constraints.\n\nFor one last example, we use ``[[0, 1], [1, 3, 4]]`` and choose feature ``0`` as split for\nthe root node.  At the second layer of the built tree, ``1`` is the only legitimate split\ncandidate except for ``0`` itself, since they belong to the same constraint set.\nFollowing the grow path of our example tree below, the node at the second layer splits at\nfeature ``1``.  But due to the fact that ``1`` also belongs to second constraint set ``[1,\n3, 4]``, at the third layer, we are allowed to include all features as split candidates and\nstill comply with the interaction constraints of its ascendants.\n\n.. plot::\n  :nofigs:\n\n  from graphviz import Source\n  source = r\"\"\"\n    digraph feature_interaction_illustration5 {\n      graph [fontname = \"helvetica\"];\n      node [fontname = \"helvetica\"];\n      edge [fontname = \"helvetica\"];\n      0 [label=<x<SUB><FONT POINT-SIZE=\"11\">0</FONT></SUB>>, shape=box, color=black, fontcolor=black];\n      1 [label=\"...\", shape=none];\n      2 [label=<x<SUB><FONT POINT-SIZE=\"11\">1</FONT></SUB>>, shape=box, color=black, fontcolor=black];\n      3 [label=<x<SUB><FONT POINT-SIZE=\"11\">{0, 1, 3, 4}</FONT></SUB>>, shape=box, color=black, fontcolor=black];\n      4 [label=<x<SUB><FONT POINT-SIZE=\"11\">{0, 1, 3, 4}</FONT></SUB>>, shape=box, color=black, fontcolor=black];\n      5 [label=\"...\", shape=none];\n      6 [label=\"...\", shape=none];\n      7 [label=\"...\", shape=none];\n      8 [label=\"...\", shape=none];\n      0 -> 1;\n      0 -> 2;\n      2 -> 3;\n      2 -> 4;\n      3 -> 5;\n      3 -> 6;\n      4 -> 7;\n      4 -> 8;\n    }\n  \"\"\"\n  Source(source, format='png').render('../_static/feature_interaction_illustration6', view=False)\n  Source(source, format='svg').render('../_static/feature_interaction_illustration7', view=False)\n\n\n.. figure:: ../_static/feature_interaction_illustration6.png\n   :align: center\n   :figwidth: 80 %\n\n   ``{0, 1, 3, 4}`` represents the sets of legitimate split features.\n"
  },
  {
    "path": "doc/tutorials/index.rst",
    "content": "#################\nXGBoost Tutorials\n#################\n\nThis section contains official tutorials inside XGBoost package.\nSee `Awesome XGBoost <https://github.com/dmlc/xgboost/tree/master/demo>`_ for more resources. Also, don't miss the feature introductions in each package.\n\n.. toctree::\n  :maxdepth: 1\n  :caption: Contents:\n\n  model\n  saving_model\n  slicing_model\n  learning_to_rank\n  dart\n  monotonic\n  feature_interaction_constraint\n  aft_survival_analysis\n  categorical\n  multioutput\n  rf\n  kubernetes\n  Distributed XGBoost with XGBoost4J-Spark <https://xgboost.readthedocs.io/en/latest/jvm/xgboost4j_spark_tutorial.html>\n  Distributed XGBoost with XGBoost4J-Spark-GPU <https://xgboost.readthedocs.io/en/latest/jvm/xgboost4j_spark_gpu_tutorial.html>\n  dask\n  spark_estimator\n  ray\n  external_memory\n  c_api_tutorial\n  input_format\n  param_tuning\n  custom_metric_obj\n  advanced_custom_obj\n  intercept\n  privacy_preserving"
  },
  {
    "path": "doc/tutorials/input_format.rst",
    "content": "############################\nText Input Format of DMatrix\n############################\n\n.. _basic_input_format:\n\nHere we will briefly describe the text input formats for XGBoost. However, for users with access to a supported language environment like Python or R, it's recommended to use data parsers from that ecosystem instead. For instance, :py:func:`sklearn.datasets.load_svmlight_file`.\n\n.. warning::\n\n   As stated above, users are encouraged to use third-party data parsers. The text parsers\n   in XGBoost have been deprecated.\n\n******************\nBasic Input Format\n******************\n\nXGBoost currently supports two text formats for ingesting data: LIBSVM and CSV. The rest of this document will describe the LIBSVM format. (See `this Wikipedia article <https://en.wikipedia.org/wiki/Comma-separated_values>`_ for a description of the CSV format.).  Please be careful that, XGBoost does **not** understand file extensions, nor try to guess the file format, as there is no universal agreement upon file extension of LIBSVM or CSV.  Instead it employs `URI <https://en.wikipedia.org/wiki/Uniform_Resource_Identifier>`_ format for specifying the precise input file type.  For example if you provide a `csv` file ``./data.train.csv`` as input, XGBoost will blindly use the default LIBSVM parser to digest it and generate a parser error.  Instead, users need to provide an URI in the form of ``train.csv?format=csv`` or ``train.csv?format=libsvm``.  For external memory input, the URI should of a form similar to ``train.csv?format=csv#dtrain.cache``.  See :ref:`python_data_interface` and :doc:`/tutorials/external_memory` also.\n\nFor training or predicting, XGBoost takes an instance file with the format as below:\n\n.. code-block:: none\n  :caption: ``train.txt``\n\n  1 101:1.2 102:0.03\n  0 1:2.1 10001:300 10002:400\n  0 0:1.3 1:0.3\n  1 0:0.01 1:0.3\n  0 0:0.2 1:0.3\n\nEach line represent a single instance, and in the first line '1' is the instance label, '101' and '102' are feature indices, '1.2' and '0.03' are feature values. In the binary classification case, '1' is used to indicate positive samples, and '0' is used to indicate negative samples. We also support probability values in [0,1] as label, to indicate the probability of the instance being positive.\n\n******************************************\nAuxiliary Files for Additional Information\n******************************************\n**Note: all information below is applicable only to single-node version of the package.** If you'd like to perform distributed training with multiple nodes, skip to the section `Embedding additional information inside LIBSVM file`_.\n\nGroup Input Format\n==================\nFor ranking task, XGBoost supports the group input format. In ranking task, instances are categorized into *query groups* in real world scenarios. For example, in the learning to rank web pages scenario, the web page instances are grouped by their queries. XGBoost requires an file that indicates the group information. For example, if the instance file is the ``train.txt`` shown above,  the group file should be named ``train.txt.group`` and be of the following format:\n\n.. code-block:: none\n  :caption: ``train.txt.group``\n\n  2\n  3\n\nThis means that, the data set contains 5 instances, and the first two instances are in a group and the other three are in another group. The numbers in the group file are actually indicating the number of instances in each group in the instance file in order.\nAt the time of configuration, you do not have to indicate the path of the group file. If the instance file name is ``xxx``, XGBoost will check whether there is a file named ``xxx.group`` in the same directory.\n\nInstance Weight File\n====================\nInstances in the training data may be assigned weights to differentiate relative importance among them. For example, if we provide an instance weight file for the ``train.txt`` file in the example as below:\n\n.. code-block:: none\n  :caption: ``train.txt.weight``\n\n  1\n  0.5\n  0.5\n  1\n  0.5\n\nIt means that XGBoost will emphasize more on the first and fourth instance (i.e. the positive instances) while training.\nThe configuration is similar to configuring the group information. If the instance file name is ``xxx``, XGBoost will look for a file named ``xxx.weight`` in the same directory. If the file exists, the instance weights will be extracted and used at the time of training.\n\n.. note:: Binary buffer format and instance weights\n\n  If you choose to save the training data as a binary buffer (using :py:meth:`save_binary() <xgboost.DMatrix.save_binary>`), keep in mind that the resulting binary buffer file will include the instance weights. To update the weights, use the :py:meth:`set_weight() <xgboost.DMatrix.set_weight>` function.\n\nInitial Margin File\n===================\nXGBoost supports providing each instance an initial margin prediction. For example, if we have a initial prediction using logistic regression for ``train.txt`` file, we can create the following file:\n\n.. code-block:: none\n  :caption: ``train.txt.base_margin``\n\n  -0.4\n  1.0\n  3.4\n\nXGBoost will take these values as initial margin prediction and boost from that. An important note about base_margin is that it should be margin prediction before transformation, so if you are doing logistic loss, you will need to put in value before logistic transformation. If you are using XGBoost predictor, use ``pred_margin=1`` to output margin values.\n\n***************************************************\nEmbedding additional information inside LIBSVM file\n***************************************************\n**This section is applicable to both single- and multiple-node settings.**\n\nQuery ID Columns\n================\nThis is most useful for `ranking task <https://github.com/dmlc/xgboost/tree/master/demo/rank>`_, where the instances are grouped into query groups. You may embed query group ID for each instance in the LIBSVM file by adding a token of form ``qid:xx`` in each row:\n\n.. code-block:: none\n  :caption: ``train.txt``\n\n  1 qid:1 101:1.2 102:0.03\n  0 qid:1 1:2.1 10001:300 10002:400\n  0 qid:2 0:1.3 1:0.3\n  1 qid:2 0:0.01 1:0.3\n  0 qid:3 0:0.2 1:0.3\n  1 qid:3 3:-0.1 10:-0.3\n  0 qid:3 6:0.2 10:0.15\n\nKeep in mind the following restrictions:\n\n* You are not allowed to specify query ID's for some instances but not for others. Either every row is assigned query ID's or none at all.\n* The rows have to be sorted in ascending order by the query IDs. So, for instance, you may not have one row having large query ID than any of the following rows.\n\nInstance weights\n================\nYou may specify instance weights in the LIBSVM file by appending each instance label with the corresponding weight in the form of ``[label]:[weight]``, as shown by the following example:\n\n.. code-block:: none\n  :caption: ``train.txt``\n\n  1:1.0 101:1.2 102:0.03\n  0:0.5 1:2.1 10001:300 10002:400\n  0:0.5 0:1.3 1:0.3\n  1:1.0 0:0.01 1:0.3\n  0:0.5 0:0.2 1:0.3\n\nwhere the negative instances are assigned half weights compared to the positive instances.\n"
  },
  {
    "path": "doc/tutorials/intercept.rst",
    "content": "#########\nIntercept\n#########\n\n.. versionadded:: 2.0.0\n\nSince 2.0.0, XGBoost supports estimating the model intercept (named ``base_score``)\nautomatically based on targets upon training. The behavior can be controlled by setting\n``base_score`` to a constant value. The following snippet disables the automatic\nestimation:\n\n.. tabs::\n    .. code-tab:: py\n\n        import xgboost as xgb\n\n        clf = xgb.XGBClassifier(n_estimators=10)\n        clf.set_params(base_score=0.5)\n\n    .. code-tab:: r R\n\n        library(xgboost)\n\n        # Load built-in dataset\n        data(agaricus.train, package = \"xgboost\")\n\n        # Set base_score parameter directly\n        model <- xgboost(\n          x = agaricus.train$data,\n          y = factor(agaricus.train$label),\n          base_score = 0.5,\n          nrounds = 10\n        )\n\nIn addition, here 0.5 represents the value after applying the inverse link function. See\nthe end of the document for a description.\n\nOther than the ``base_score``, users can also provide global bias via the data field\n``base_margin``, which is a vector or a matrix depending on the task. With multi-output\nand multi-class, the ``base_margin`` is a matrix with size ``(n_samples, n_targets)`` or\n``(n_samples, n_classes)``.\n\n.. tabs::\n    .. code-tab:: py\n\n        import xgboost as xgb\n        from sklearn.datasets import make_classification\n\n        X, y = make_classification()\n\n        clf = xgb.XGBClassifier()\n        clf.fit(X, y)\n        # Request for raw prediction\n        m = clf.predict(X, output_margin=True)\n\n        clf_1 = xgb.XGBClassifier()\n        # Feed the prediction into the next model\n        # Using base margin overrides the base score, see below sections.\n        clf_1.fit(X, y, base_margin=m)\n        clf_1.predict(X, base_margin=m)\n\n    .. code-tab:: r R\n\n        library(xgboost)\n\n        # Load built-in dataset\n        data(agaricus.train, package = \"xgboost\")\n\n        # Train first model\n        model_1 <- xgboost(\n          x = agaricus.train$data,\n          y = factor(agaricus.train$label),\n          nrounds = 10\n        )\n\n        # Request for raw prediction\n        m <- predict(model_1, agaricus.train$data, type = \"raw\")\n\n        # Feed the prediction into the next model using base_margin\n        # Using base margin overrides the base score, see below sections.\n        model_2 <- xgboost(\n          x = agaricus.train$data,\n          y = factor(agaricus.train$label),\n          base_margin = m,\n          nrounds = 10\n        )\n\n        # Make predictions with base_margin\n        pred <- predict(model_2, agaricus.train$data, base_margin = m)\n\n\nIt specifies the bias for each sample and can be used for stacking an XGBoost model on top\nof other models, see :ref:`sphx_glr_python_examples_boost_from_prediction.py` for a worked\nexample. When ``base_margin`` is specified, it automatically overrides the ``base_score``\nparameter. If you are stacking XGBoost models, then the usage should be relatively\nstraightforward, with the previous model providing raw prediction and a new model using\nthe prediction as bias. For more customized inputs, users need to take extra care of the\nlink function. Let :math:`F` be the model and :math:`g` be the link function, since\n``base_score`` is overridden when sample-specific ``base_margin`` is available, we will\nomit it here:\n\n.. math::\n\n   g(E[y_i]) = F(x_i)\n\n\nWhen base margin :math:`b` is provided, it's added to the raw model output :math:`F`:\n\n.. math::\n\n   g(E[y_i]) = F(x_i) + b_i\n\nand the output of the final model is:\n\n\n.. math::\n\n   g^{-1}(F(x_i) + b_i)\n\nUsing the gamma deviance objective ``reg:gamma`` as an example, which has a log link\nfunction, hence:\n\n.. math::\n\n   \\ln{(E[y_i])} = F(x_i) + b_i \\\\\n   E[y_i] = \\exp{(F(x_i) + b_i)}\n\nAs a result, if you are feeding outputs from models like GLM with a corresponding\nobjective function, make sure the outputs are not yet transformed by the inverse link\n(activation).\n\nIn the case of ``base_score`` (intercept), it can be accessed through\n:py:meth:`~xgboost.Booster.save_config` after estimation. Unlike the ``base_margin``, the\nreturned value represents a value after applying inverse link.  With logistic regression\nand the logit link function as an example, given the ``base_score`` as 0.5,\n:math:`g(intercept) = logit(0.5) = 0` is added to the raw model output:\n\n.. math::\n\n   E[y_i] = g^{-1}{(F(x_i) + g(intercept))}\n\nand 0.5 is the same as :math:`base\\_score = g^{-1}(0) = 0.5`. This is more intuitive if\nyou remove the model and consider only the intercept, which is estimated before the model\nis fitted:\n\n.. math::\n\n   E[y] = g^{-1}{(g(intercept))} \\\\\n   E[y] = intercept\n\nFor some objectives like MAE, there are close solutions, while for others it's estimated\nwith one step Newton method.\n\n******\nOffset\n******\n\nThe ``base_margin`` is a form of ``offset`` in GLM. Using the Poisson objective as an\nexample, we might want to model the rate instead of the count:\n\n.. math::\n\n   rate = \\frac{count}{exposure}\n\nAnd the offset is defined as log link applied to the exposure variable:\n:math:`\\ln{exposure}`. Let :math:`c` be the count and :math:`\\gamma` be the exposure,\nsubstituting the response :math:`y` in our previous formulation of base margin:\n\n.. math::\n\n   g(\\frac{E[c_i]}{\\gamma_i}) = F(x_i)\n\nSubstitute :math:`g` with :math:`\\ln` for Poisson regression:\n\n.. math::\n\n   \\ln{\\frac{E[c_i]}{\\gamma_i}} = F(x_i)\n\nWe have:\n\n.. math::\n\n   E[c_i] &= \\exp{(F(x_i) + \\ln{\\gamma_i})} \\\\\n   E[c_i] &= g^{-1}(F(x_i) + g(\\gamma_i))\n\nAs you can see, we can use the ``base_margin`` for modeling with offset similar to GLMs\n\n*******\nExample\n*******\n\nThe following example shows the relationship between ``base_score`` and ``base_margin``\nusing binary logistic with a `logit` link function:\n\n.. tabs::\n    .. code-tab:: py\n\n        import numpy as np\n        from scipy.special import logit\n        from sklearn.datasets import make_classification\n\n        import xgboost as xgb\n\n        X, y = make_classification(random_state=2025)\n\n    .. code-tab:: r R\n\n        library(xgboost)\n\n        # Load built-in dataset\n        data(agaricus.train, package = \"xgboost\")\n        X <- agaricus.train$data\n        y <- agaricus.train$label\n\nThe intercept is a valid probability (0.5). It's used as the initial estimation of the\nprobability of obtaining a positive sample.\n\n.. tabs::\n    .. code-tab:: py\n\n        intercept = 0.5\n\n    .. code-tab:: r R\n\n        intercept <- 0.5\n\nFirst we use the intercept to train a model:\n\n.. tabs::\n    .. code-tab:: py\n\n        booster = xgb.train(\n            {\"base_score\": intercept, \"objective\": \"binary:logistic\"},\n            dtrain=xgb.DMatrix(X, y),\n            num_boost_round=1,\n        )\n        predt_0 = booster.predict(xgb.DMatrix(X, y))\n\n    .. code-tab:: r R\n\n        # First model with base_score\n        model_0 <- xgboost(\n          x = X, y = factor(y),\n          base_score = intercept,\n          objective = \"binary:logistic\",\n          nrounds = 1\n        )\n        predt_0 <- predict(model_0, X)\n\nApply :py:func:`~scipy.special.logit` to obtain the \"margin\":\n\n.. tabs::\n    .. code-tab:: py\n\n        # Apply logit function to obtain the \"margin\"\n        margin = np.full(y.shape, fill_value=logit(intercept), dtype=np.float32)\n        Xy = xgb.DMatrix(X, y, base_margin=margin)\n        # Second model with base_margin\n        # 0.2 is a dummy value to show that `base_margin` overrides `base_score`.\n        booster = xgb.train(\n            {\"base_score\": 0.2, \"objective\": \"binary:logistic\"},\n            dtrain=Xy,\n            num_boost_round=1,\n        )\n        predt_1 = booster.predict(Xy)\n\n    .. code-tab:: r R\n\n        # Apply logit function to obtain the \"margin\"\n        logit_intercept <- log(intercept / (1 - intercept))\n        margin <- rep(logit_intercept, length(y))\n        # Second model with base_margin\n        # 0.2 is a dummy value to show that `base_margin` overrides `base_score`\n        model_1 <- xgboost(\n          x = X, y = factor(y),\n          base_margin = margin,\n          base_score = 0.2,\n          objective = \"binary:logistic\",\n          nrounds = 1\n        )\n        predt_1 <- predict(model_1, X, base_margin = margin)\n\nCompare the results:\n\n.. tabs::\n    .. code-tab:: py\n\n        np.testing.assert_allclose(predt_0, predt_1)\n\n    .. code-tab:: r R\n\n        all.equal(predt_0, predt_1, tolerance = 1e-6)\n"
  },
  {
    "path": "doc/tutorials/kubernetes.rst",
    "content": "###################################\nDistributed XGBoost on Kubernetes\n###################################\n\nDistributed XGBoost training on `Kubernetes <https://kubernetes.io/>`_ is supported\nvia `Kubeflow Trainer <https://github.com/kubeflow/trainer>`_. Kubeflow Trainer provides\na built-in XGBoost runtime that manages the scheduling, distributed coordination, and\nlifecycle of XGBoost training jobs on Kubernetes clusters.\n\nThis tutorial covers the end-to-end workflow: from setting up prerequisites, through\nwriting distributed training code, to launching and monitoring multi-node XGBoost jobs.\n\n.. contents::\n  :backlinks: none\n  :local:\n\n********\nOverview\n********\n\nXGBoost supports distributed training through the **Collective** communication\nprotocol (historically known as Rabit). In a distributed setting, multiple worker\nprocesses each operate on a shard of the data and synchronize histogram bin\nstatistics via AllReduce to agree on the best tree splits. Kubeflow Trainer's\nXGBoost runtime automates the orchestration of this process on Kubernetes by:\n\n- Deploying worker pods as a `JobSet <https://github.com/kubernetes-sigs/jobset>`_\n- Automatically injecting the ``DMLC_*`` environment variables required by XGBoost's\n  Collective communication layer\n- Providing the rank-0 pod with the tracker address so user code can start a\n  ``RabitTracker`` for worker coordination\n- Supporting both CPU and GPU training workloads\n\nArchitecture\n============\n\nThe distributed XGBoost training architecture on Kubernetes consists of the following\ncomponents:\n\n1. **TrainJob**: A Kubernetes custom resource that declares the training job configuration\n   (number of nodes, resources per node, training code).\n2. **ClusterTrainingRuntime**: A cluster-scoped resource that defines the XGBoost runtime\n   template (container image, ML policy, default settings). The built-in runtime is named\n   ``xgboost-distributed``.\n3. **Trainer Controller**: Resolves the ``TrainJob`` against the referenced runtime,\n   enforces the XGBoost ML policy (injects environment variables), and creates the\n   underlying ``JobSet``.\n4. **Worker Pods**: Each pod runs the same training script. The user's training code\n   on the rank-0 pod is responsible for starting a ``RabitTracker`` for coordination.\n\n.. code-block:: text\n\n   ┌─────────────────────────────────────────────────────────────────┐\n   │  User submits TrainJob (SDK or kubectl)                         │\n   └──────────────────────────┬──────────────────────────────────────┘\n                              │\n                              ▼\n   ┌─────────────────────────────────────────────────────────────────┐\n   │  Trainer Controller                                             │\n   │  • Resolves ClusterTrainingRuntime (xgboost-distributed)        │\n   │  • Enforces XGBoost MLPolicy (injects DMLC_* env vars)          │\n   │  • Creates JobSet with worker pods                              │\n   └──────────────────────────┬──────────────────────────────────────┘\n                              │\n                              ▼\n   ┌─────────────────────────────────────────────────────────────────┐\n   │  Kubernetes Cluster (Headless Service)                          │\n   │                                                                 │\n   │  ┌────────────────┐  ┌──────────┐  ┌──────────┐                 │\n   │  │ Pod: node-0-0  │  │ node-0-1 │  │ node-0-2 │  ...            │\n   │  │ TASK_ID=0      │  │ TASK_ID=1│  │ TASK_ID=2│                 │\n   │  │ (Tracker)      │  │ (Worker) │  │ (Worker) │                 │\n   │  └───────┬────────┘  └────┬─────┘  └────┬─────┘                 │\n   │          │                │              │                      │\n   │          └──── Collective Protocol ───────┘                     │\n   └─────────────────────────────────────────────────────────────────┘\n\nEnvironment Variables\n=====================\n\nThe XGBoost runtime plugin automatically injects the following environment variables\ninto each worker pod. These are native to XGBoost's Collective protocol:\n\n.. list-table::\n   :header-rows: 1\n   :widths: 25 50 25\n\n   * - Variable\n     - Description\n     - Example Value\n   * - ``DMLC_TRACKER_URI``\n     - DNS address of the rank-0 pod running the tracker\n     - ``myjob-node-0-0.myjob``\n   * - ``DMLC_TRACKER_PORT``\n     - Port for tracker communication\n     - ``29500``\n   * - ``DMLC_TASK_ID``\n     - Worker rank (derived from pod completion index)\n     - ``0``, ``1``, ``2``, ...\n   * - ``DMLC_NUM_WORKER``\n     - Total number of workers across all nodes\n     - ``4``\n\nThese environment variables are **reserved** and cannot be manually set by the user in the\n``TrainJob`` spec. The runtime plugin validates this and rejects any ``TrainJob`` that\nattempts to override them.\n\nWorker Count Calculation\n========================\n\nThe total number of workers (``DMLC_NUM_WORKER``) is calculated as:\n\n.. code-block:: text\n\n   DMLC_NUM_WORKER = numNodes × workersPerNode\n\nWhere ``workersPerNode`` is determined by:\n\n- **CPU training**: 1 worker per node. XGBoost does **not** spawn multiple worker\n  processes for CPU training. Instead, a single worker process uses OpenMP to\n  parallelize tree building across all available CPU cores on the node. This means\n  if a pod has 8 CPU cores, 1 XGBoost worker will use all 8 cores for intra-process\n  parallelism (histogram construction, split evaluation, etc.).\n\n  The number of threads can be controlled with the ``nthread`` Booster parameter:\n\n  .. code-block:: python\n\n     # By default, XGBoost uses all available CPU cores.\n     # Set nthread to limit the number of OpenMP threads per worker.\n     params = {\n         \"objective\": \"binary:logistic\",\n         \"nthread\": 4,          # Use only 4 of the available cores\n         \"tree_method\": \"hist\",\n     }\n\n  The ``nthread`` parameter in the DMatrix constructor controls parallelism during\n  data loading, while ``nthread`` in the Booster parameters controls parallelism\n  during training. If not set, both default to the maximum number of threads\n  available on the machine.\n\n  .. tip::\n\n     When setting ``resourcesPerNode`` CPU requests in your ``TrainJob``, align the\n     ``nthread`` parameter with the CPU requests to avoid over-subscription. For\n     example, if you request ``cpu: \"4\"``, set ``\"nthread\": 4`` in your training\n     parameters.\n\n- **GPU training**: 1 worker per GPU. The GPU count is derived from the\n  ``resourcesPerNode`` limits in the ``TrainJob`` or runtime template.  In\n  distributed environments, use ``device=\"cuda\"`` (not ``\"cuda:<ordinal>\"``);\n  GPU ordinal selection is handled by the distributed framework, and specifying\n  an ordinal will result in an error.\n\n.. list-table::\n   :header-rows: 1\n   :widths: 30 15 20 20\n\n   * - Configuration\n     - numNodes\n     - workersPerNode\n     - DMLC_NUM_WORKER\n   * - 4 nodes, CPU-only\n     - 4\n     - 1\n     - 4\n   * - 2 nodes, 4 GPUs each\n     - 2\n     - 4\n     - 8\n   * - 1 node, 8 GPUs\n     - 1\n     - 8\n     - 8\n\n*************\nPrerequisites\n*************\n\nBefore running distributed XGBoost jobs on Kubernetes, ensure the following:\n\n1. **Kubernetes Cluster**: A running Kubernetes cluster (v1.27+). You can use\n   `kind <https://kind.sigs.k8s.io/>`_, `minikube <https://minikube.sigs.k8s.io/>`_,\n   or a managed Kubernetes service (GKE, EKS, AKS).\n\n2. **kubectl**: The Kubernetes CLI tool, configured to communicate with your cluster.\n   See the `kubectl installation guide <https://kubernetes.io/docs/tasks/tools/>`_.\n\n3. **Kubeflow Trainer**: Install Kubeflow Trainer and its dependencies (JobSet) on\n   your cluster. Follow the\n   `Kubeflow Trainer installation guide <https://www.kubeflow.org/docs/components/trainer/>`_:\n\n   .. code-block:: bash\n\n      # Install the Kubeflow Trainer control plane (includes JobSet).\n      kubectl apply --server-side -k \"github.com/kubeflow/trainer/manifests/overlays/standalone\"\n\n4. **Kubeflow Python SDK** (optional, for programmatic job submission):\n\n   .. code-block:: bash\n\n      pip install kubeflow\n\n5. **GPU Support** (optional, for GPU training): Ensure the\n   `NVIDIA GPU Operator <https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/getting-started.html>`_\n   or equivalent device plugin is installed on your cluster.\n\nVerify the Installation\n=======================\n\nAfter installing Kubeflow Trainer, verify that the XGBoost runtime is available:\n\n.. code-block:: bash\n\n   kubectl get clustertrainingruntime\n\nYou should see the ``xgboost-distributed`` runtime listed:\n\n.. code-block:: text\n\n   NAME                   AGE\n   xgboost-distributed    1m\n\n************************************\nXGBoost ClusterTrainingRuntime\n************************************\n\nThe ``xgboost-distributed`` ``ClusterTrainingRuntime`` is deployed as part of the\nKubeflow Trainer installation. It defines the default XGBoost runtime template:\n\n.. code-block:: yaml\n\n   apiVersion: trainer.kubeflow.org/v1alpha1\n   kind: ClusterTrainingRuntime\n   metadata:\n     name: xgboost-distributed\n     labels:\n       trainer.kubeflow.org/framework: xgboost\n   spec:\n     mlPolicy:\n       numNodes: 1\n       xgboost: {}\n     template:\n       spec:\n         replicatedJobs:\n           - name: node\n             template:\n               metadata:\n                 labels:\n                   trainer.kubeflow.org/trainjob-ancestor-step: trainer\n               spec:\n                 template:\n                   spec:\n                     containers:\n                       - name: node\n                         image: ghcr.io/kubeflow/trainer/xgboost-runtime:latest\n\nKey points:\n\n- ``mlPolicy.xgboost: {}`` activates the XGBoost runtime plugin, which handles\n  injection of ``DMLC_*`` environment variables.\n- ``numNodes`` defaults to ``1`` and can be overridden per ``TrainJob``.\n- The container image ``ghcr.io/kubeflow/trainer/xgboost-runtime:latest`` is based on\n  ``nvidia/cuda:12.4.0-runtime-ubuntu22.04`` and includes XGBoost 3.0.2 with CUDA 12\n  support, NumPy, and scikit-learn.\n\n***************************************\nExample: Distributed XGBoost Training\n***************************************\n\nThis section demonstrates two approaches for running distributed XGBoost training:\nusing the Python SDK (recommended for interactive use) and using ``kubectl`` with YAML\nmanifests.\n\nUsing the Python SDK\n====================\n\nThe Kubeflow Python SDK provides a ``TrainerClient`` that simplifies submitting and\nmanaging training jobs programmatically.\n\nStep 1: Define the Training Function\n-------------------------------------\n\nWrite the training function that will be serialized and executed on each worker node.\nThe ``DMLC_*`` environment variables are automatically injected by the runtime.\n\n.. code-block:: python\n\n   def xgboost_train_classification():\n       \"\"\"\n       Distributed XGBoost training function using the Collective API.\n\n       DMLC_* env vars are injected by the Kubeflow Trainer XGBoost plugin:\n         - DMLC_TRACKER_URI:  DNS name of the rank-0 pod running the tracker\n         - DMLC_TRACKER_PORT: Port for tracker communication (default: 29500)\n         - DMLC_TASK_ID:      Worker rank (0, 1, 2, ...)\n         - DMLC_NUM_WORKER:   Total number of workers\n       \"\"\"\n       import os\n       import xgboost as xgb\n       from xgboost import collective as coll\n       from xgboost.tracker import RabitTracker\n       from sklearn.datasets import make_classification\n       from sklearn.model_selection import train_test_split\n       from sklearn.metrics import accuracy_score\n\n       # Read injected environment variables.\n       rank = int(os.environ[\"DMLC_TASK_ID\"])\n       world_size = int(os.environ[\"DMLC_NUM_WORKER\"])\n       tracker_uri = os.environ[\"DMLC_TRACKER_URI\"]\n       tracker_port = int(os.environ[\"DMLC_TRACKER_PORT\"])\n\n       # Rank 0 starts the Rabit tracker (required for coordination).\n       tracker = None\n       if rank == 0:\n           tracker = RabitTracker(\n               host_ip=\"0.0.0.0\", n_workers=world_size, port=tracker_port\n           )\n           tracker.start()\n\n       # All workers connect to the tracker via the Collective communicator.\n       with coll.CommunicatorContext(\n           dmlc_tracker_uri=tracker_uri,\n           dmlc_tracker_port=tracker_port,\n           dmlc_task_id=str(rank),\n       ):\n           # Generate synthetic classification data.\n           # In practice, each worker would load its own data shard.\n           X, y = make_classification(\n               n_samples=10000, n_features=20, n_informative=10,\n               n_classes=2, random_state=42 + rank,\n           )\n           X_train, X_valid, y_train, y_valid = train_test_split(\n               X, y, test_size=0.2, random_state=42,\n           )\n\n           # NOTE: DMatrix construction MUST be inside the communicator context\n           # because it involves cross-worker synchronization for quantization.\n           #\n           # Use QuantileDMatrix instead of DMatrix for the hist tree method\n           # (the default). QuantileDMatrix quantizes data on-the-fly, avoiding\n           # an intermediate dense copy and significantly reducing memory usage.\n           dtrain = xgb.QuantileDMatrix(X_train, label=y_train)\n           # Validation QuantileDMatrix must reference the training matrix\n           # so that the same quantile bins are reused.\n           dvalid = xgb.QuantileDMatrix(X_valid, label=y_valid, ref=dtrain)\n\n           # Training parameters.\n           params = {\n               \"objective\": \"binary:logistic\",\n               \"max_depth\": 6,\n               \"eta\": 0.1,\n               \"eval_metric\": \"logloss\",\n           }\n\n           # Distributed training - workers synchronize histogram stats via collective ops.\n           # early_stopping_rounds activates early stopping based on the validation metric.\n           # verbose_eval=10 prints evaluation results every 10 rounds (rank 0 only).\n           model = xgb.train(\n               params, dtrain,\n               num_boost_round=100,\n               evals=[(dvalid, \"validation\")],\n               early_stopping_rounds=10,\n               verbose_eval=10,\n           )\n\n           # Note: early_stopping_rounds returns the *last* model, not the best.\n           # Use bst.best_iteration to slice the model to the best round.\n           if hasattr(model, \"best_iteration\"):\n               model = model[: model.best_iteration + 1]\n\n           # Evaluate on validation set.\n           preds = model.predict(dvalid)\n           predictions = [1 if p > 0.5 else 0 for p in preds]\n           accuracy = accuracy_score(y_valid, predictions)\n\n           # Only perform logging and model saving from rank 0\n           # to avoid duplicate output and file write conflicts.\n           if coll.get_rank() == 0:\n               print(f\"Validation Accuracy: {accuracy:.4f}\")\n               model.save_model(\"/workspace/xgboost_model.json\")\n               print(\"Model saved to /workspace/xgboost_model.json\")\n\n       # Wait for tracker to finish (rank 0 only).\n       if tracker is not None:\n           tracker.wait_for()\n\nStep 2: Submit the Training Job\n-------------------------------\n\nUse the ``TrainerClient`` to submit the training function as a distributed job:\n\n.. code-block:: python\n\n   from kubeflow.trainer import CustomTrainer, TrainerClient\n\n   client = TrainerClient()\n\n   # Submit a distributed XGBoost training job on 3 nodes.\n   job_name = client.train(\n       trainer=CustomTrainer(\n           func=xgboost_train_classification,\n           num_nodes=3,\n           resources_per_node={\"cpu\": 3},\n       ),\n       runtime=\"xgboost-distributed\",\n   )\n\n   print(f\"TrainJob '{job_name}' submitted\")\n\nFor GPU training, include GPU resources:\n\n.. code-block:: python\n\n   job_name = client.train(\n       trainer=CustomTrainer(\n           func=xgboost_train_classification,\n           num_nodes=2,\n           resources_per_node={\n               \"cpu\": 4,\n               \"gpu\": 4,  # 4 GPUs per node → 8 total workers\n           },\n       ),\n       runtime=\"xgboost-distributed\",\n   )\n\n.. note::\n\n   For GPU training, add ``\"device\": \"cuda\"`` to the XGBoost ``params`` dictionary\n   in your training function.\n\nStep 3: Monitor the Training Job\n---------------------------------\n\nCheck the job status and view logs:\n\n.. code-block:: python\n\n   # Wait for the job to start running.\n   client.wait_for_job_status(name=job_name, status={\"Running\"})\n\n   # Check the steps (one per worker node).\n   for step in client.get_job(name=job_name).steps:\n       print(f\"Step: {step.name}, Status: {step.status}\")\n\n   # Stream logs from each worker node.\n   num_nodes = 3\n   for i in range(num_nodes):\n       logs = client.get_job_logs(name=job_name, follow=True, step=f\"node-{i}\")\n       print(f\"\\n=== Node {i} ===\")\n       print(\"\\n\".join(logs))\n\nStep 4: Clean Up\n----------------\n\nDelete the training job when it is finished:\n\n.. code-block:: python\n\n   client.delete_job(job_name)\n\nUsing kubectl with YAML\n========================\n\nYou can also create ``TrainJob`` resources directly using ``kubectl``.\n\nCPU Training Example\n---------------------\n\nThe following YAML creates a distributed XGBoost training job with 4 worker nodes:\n\n.. code-block:: yaml\n\n   apiVersion: trainer.kubeflow.org/v1alpha1\n   kind: TrainJob\n   metadata:\n     name: xgboost-cpu-example\n   spec:\n     runtimeRef:\n       name: xgboost-distributed\n     trainer:\n       image: ghcr.io/kubeflow/trainer/xgboost-runtime:latest\n       command:\n         - python\n         - train.py\n       numNodes: 4\n       resourcesPerNode:\n         requests:\n           cpu: \"4\"\n           memory: \"8Gi\"\n\nApply the manifest:\n\n.. code-block:: bash\n\n   kubectl apply -f xgboost-cpu-trainjob.yaml\n\nGPU Training Example\n---------------------\n\nFor multi-node GPU training, specify GPU resources via ``resourcesPerNode``:\n\n.. code-block:: yaml\n\n   apiVersion: trainer.kubeflow.org/v1alpha1\n   kind: TrainJob\n   metadata:\n     name: xgboost-gpu-example\n   spec:\n     runtimeRef:\n       name: xgboost-distributed\n     trainer:\n       image: ghcr.io/kubeflow/trainer/xgboost-runtime:latest\n       command:\n         - python\n         - train.py\n       numNodes: 2\n       resourcesPerNode:\n         limits:\n           nvidia.com/gpu: \"4\"\n         requests:\n           cpu: \"4\"\n           memory: \"16Gi\"\n\nWith this configuration, the runtime calculates ``DMLC_NUM_WORKER = 2 nodes × 4 GPUs = 8``.\nEach GPU runs one XGBoost worker process.\n\nMonitoring with kubectl\n------------------------\n\n.. code-block:: bash\n\n   # Check TrainJob status.\n   kubectl get trainjob xgboost-cpu-example\n\n   # View logs from a specific worker pod.\n   kubectl logs xgboost-cpu-example-node-0-0\n\n   # Delete the TrainJob.\n   kubectl delete trainjob xgboost-cpu-example\n\n*************\nHow It Works\n*************\n\nThis section provides additional implementation details for users who want to\nunderstand the runtime plugin internals.\n\nXGBoost Runtime Plugin\n======================\n\nThe XGBoost runtime is implemented as a Go plugin in the Kubeflow Trainer controller\n(see ``pkg/runtime/framework/plugins/xgboost/`` in the Trainer repository). It\nimplements two interfaces:\n\n- ``EnforceMLPolicyPlugin``: Injects the ``DMLC_*`` environment variables (described\n  in `Environment Variables`_) and exposes container port ``29500``.\n- ``CustomValidationPlugin``: Rejects any ``TrainJob`` that manually sets reserved\n  ``DMLC_*`` environment variables.\n\nTracker Discovery\n=================\n\nWorkers discover the ``RabitTracker`` on rank-0 via a Kubernetes headless service.\nThe ``DMLC_TRACKER_URI`` is constructed as:\n\n.. code-block:: text\n\n   <trainjob-name>-node-0-0.<trainjob-name>\n\nFor example, a ``TrainJob`` named ``myjob`` with 4 nodes creates pods:\n\n.. code-block:: text\n\n   myjob-node-0-0   DMLC_TASK_ID=0   (Tracker + Worker)\n   myjob-node-0-1   DMLC_TASK_ID=1   (Worker)\n   myjob-node-0-2   DMLC_TASK_ID=2   (Worker)\n   myjob-node-0-3   DMLC_TASK_ID=3   (Worker)\n\n.. note::\n\n   Starting the tracker is the **user's responsibility**. The runtime injects the\n   environment variables, but the training code on rank-0 must call\n   ``RabitTracker(...).start()`` before other workers can connect.\n\n***************\nBest Practices\n***************\n\nThis section covers practical tips for getting the most out of distributed XGBoost\non Kubernetes.\n\nUse QuantileDMatrix for Memory Efficiency\n=========================================\n\nThe default tree method is ``hist`` (``tree_method=\"auto\"`` resolves to ``hist``).\nWhen using ``hist``, prefer :py:class:`xgboost.QuantileDMatrix` over\n:py:class:`xgboost.DMatrix`. ``QuantileDMatrix`` generates quantilized data directly\nfrom input, skipping the intermediate dense representation and significantly\nreducing memory consumption:\n\n.. code-block:: python\n\n   # Standard DMatrix — loads data then quantizes (higher peak memory)\n   dtrain = xgb.DMatrix(X_train, label=y_train)\n\n   # QuantileDMatrix — quantizes on-the-fly (lower peak memory)\n   dtrain = xgb.QuantileDMatrix(X_train, label=y_train)\n\nWhen constructing a validation ``QuantileDMatrix``, always pass the training matrix\nas ``ref`` so XGBoost reuses the same quantile bins. Omitting ``ref`` for validation\ndata may lead to inconsistent quantization and degraded model quality:\n\n.. code-block:: python\n\n   dtrain = xgb.QuantileDMatrix(X_train, label=y_train)\n   dvalid = xgb.QuantileDMatrix(X_valid, label=y_valid, ref=dtrain)  # correct\n\n.. note::\n\n   ``QuantileDMatrix`` was added in XGBoost 1.7.0. No explicit ``tree_method``\n   parameter is needed — the default ``auto`` already uses ``hist``.\n\nEarly Stopping\n==============\n\nEarly stopping is activated by passing ``early_stopping_rounds`` to\n:py:func:`xgboost.train`. It requires at least one validation set in ``evals``.\nTraining stops if the validation metric does not improve for the specified number\nof consecutive rounds:\n\n.. code-block:: python\n\n   model = xgb.train(\n       params, dtrain,\n       num_boost_round=500,\n       evals=[(dvalid, \"validation\")],\n       early_stopping_rounds=10,\n   )\n\nEarly stopping works correctly in distributed mode — evaluation metrics are already\nsynchronized across workers via the collective protocol.\n\n**Important**: ``xgb.train`` with ``early_stopping_rounds`` returns the **last**\nmodel, not the best one. To get the best model, use model slicing:\n\n.. code-block:: python\n\n   # After training, slice to keep only rounds up to the best iteration.\n   if hasattr(model, \"best_iteration\"):\n       model = model[: model.best_iteration + 1]\n\nAlternatively, use the :py:class:`xgboost.callback.EarlyStopping` callback directly\nwith ``save_best=True`` to automatically keep only the best model:\n\n.. code-block:: python\n\n   from xgboost.callback import EarlyStopping\n\n   model = xgb.train(\n       params, dtrain,\n       num_boost_round=500,\n       evals=[(dvalid, \"validation\")],\n       callbacks=[EarlyStopping(rounds=10, save_best=True)],\n   )\n   # model now contains only the rounds up to the best iteration\n\nWhen multiple evaluation datasets are provided in ``evals``, the **last** entry\nis used for early stopping. When multiple ``eval_metric`` values are specified,\nthe **last** metric is used.\n\nLogging in Distributed Mode\n===========================\n\nIn distributed training, ``print()`` executes on every worker, producing duplicate\nlog lines. To log from a single worker, guard with a rank check:\n\n.. code-block:: python\n\n   from xgboost import collective as coll\n\n   with coll.CommunicatorContext(...):\n       # Print only from rank 0.\n       if coll.get_rank() == 0:\n           print(f\"Training complete, best score: {model.best_score}\")\n\n:py:func:`xgboost.collective.communicator_print` is an alternative that routes\nmessages through the tracker rather than stdout. Note that it does **not** filter\nby rank — any worker that calls it will have its message printed by the tracker.\nIt is primarily used internally (e.g., by ``verbose_eval``, which adds its own\nrank-0 guard via :py:class:`xgboost.callback.EvaluationMonitor`).\n\nSetting verbose_eval for Production\n===================================\n\nIn distributed Kubernetes jobs, set ``verbose_eval`` to an integer rather than\n``True`` to reduce log volume:\n\n.. code-block:: python\n\n   model = xgb.train(\n       params, dtrain,\n       num_boost_round=500,\n       evals=[(dvalid, \"validation\")],\n       verbose_eval=50,  # print every 50 rounds instead of every round\n   )\n\nCheckpointing\n=============\n\nXGBoost provides a :py:class:`xgboost.callback.TrainingCheckPoint` callback that\nperiodically saves model snapshots during training. The callback automatically\nsaves only from rank 0 to avoid multiple workers writing to the same path:\n\n.. code-block:: python\n\n   from xgboost.callback import TrainingCheckPoint\n\n   model = xgb.train(\n       params, dtrain,\n       num_boost_round=500,\n       evals=[(dvalid, \"validation\")],\n       callbacks=[\n           TrainingCheckPoint(\n               directory=\"/workspace/checkpoints\",\n               name=\"xgb_model\",\n               interval=50,  # save every 50 rounds\n           ),\n       ],\n   )\n\n.. warning::\n\n   XGBoost does not handle distributed file systems. The ``directory`` path must be\n   writable from the rank-0 pod — for example, a Kubernetes\n   `PersistentVolumeClaim <https://kubernetes.io/docs/concepts/storage/persistent-volumes/>`_\n   mounted into the pod.\n\nTo resume training from a checkpoint, pass the saved model file via ``xgb_model``:\n\n.. code-block:: python\n\n   model = xgb.train(\n       params, dtrain,\n       num_boost_round=500,\n       xgb_model=\"/workspace/checkpoints/xgb_model_200.ubj\",  # resume from round 200\n       evals=[(dvalid, \"validation\")],\n   )\n\nData Partitioning\n=================\n\nBy default, each worker in a distributed XGBoost job holds a different subset of\n**rows** (horizontal partitioning). This is controlled by the ``data_split_mode``\nparameter (default: ``DataSplitMode.ROW``). In this mode, each worker loads its\nown shard of the data:\n\n.. code-block:: python\n\n   with coll.CommunicatorContext(...):\n       # Each worker loads a different data shard based on its rank.\n       rank = coll.get_rank()\n       X_shard, y_shard = load_data_shard(rank)\n       dtrain = xgb.QuantileDMatrix(X_shard, label=y_shard)\n\nColumn-wise splitting (``DataSplitMode.COL``) is also supported, where each worker\nholds a different subset of features. This is typically used for vertical federated\nlearning scenarios and is not the common distributed training pattern.\n\nRank-Specific Logic\n===================\n\nUse :py:func:`xgboost.collective.get_rank` and\n:py:func:`xgboost.collective.get_world_size` for rank-specific operations inside\nthe communicator context:\n\n.. code-block:: python\n\n   with coll.CommunicatorContext(...):\n       if coll.get_rank() == 0:\n           model.save_model(\"/workspace/model.json\")\n           # Broadcast results to all workers if needed\n           results = coll.broadcast(results, root=0)\n\n:py:func:`xgboost.collective.broadcast` can broadcast any picklable Python object\nfrom one worker to all others. This is useful for sharing preprocessed metadata\n(e.g., label encoders, feature name lists) computed on rank 0.\n\n********************************\nCommon Issues and Edge Cases\n********************************\n\nReserved Environment Variables\n==============================\n\nThe runtime plugin rejects any ``TrainJob`` that manually sets the reserved ``DMLC_*``\nenvironment variables (``DMLC_TRACKER_URI``, ``DMLC_TRACKER_PORT``, ``DMLC_TASK_ID``,\n``DMLC_NUM_WORKER``). If you set any of these in ``spec.trainer.env``, the webhook\nwill return a ``Forbidden`` error:\n\n.. code-block:: text\n\n   spec.trainer.env[0]: Forbidden: DMLC_TRACKER_URI is reserved for the XGBoost runtime\n\nRemove the reserved variables from your ``TrainJob`` spec and let the runtime inject\nthem automatically.\n\nNo Environment Injection When Trainer Is Nil\n============================================\n\nIf the ``TrainJob`` does not include a ``spec.trainer`` section, the XGBoost plugin\nskips environment variable injection entirely. The ``DMLC_*`` variables are only\ninjected when ``spec.trainer`` is present and the runtime can locate the ``node``\ncontainer in the pod template. Ensure your ``TrainJob`` includes the ``trainer``\nfield.\n\nResource Precedence: TrainJob Overrides Runtime\n===============================================\n\nWhen GPU resources are specified in both the ``ClusterTrainingRuntime`` template and\nthe ``TrainJob.spec.trainer.resourcesPerNode``, the **TrainJob value takes precedence**.\nThis affects the ``workersPerNode`` calculation:\n\n.. code-block:: text\n\n   Runtime template: nvidia.com/gpu: 1  →  workersPerNode = 1\n   TrainJob override: nvidia.com/gpu: 3  →  workersPerNode = 3  (this wins)\n\nIf neither specifies GPU resources, ``workersPerNode`` defaults to ``1`` (CPU mode).\n\nGPU Device Ordinal in Distributed Mode\n======================================\n\nIn distributed training, do **not** use ``device=\"cuda:0\"`` or any specific GPU ordinal\nin your XGBoost parameters. GPU device assignment is handled by the Kubernetes device\nplugin and the distributed framework. Use ``device=\"cuda\"`` instead:\n\n.. code-block:: python\n\n   # Correct\n   params = {\"device\": \"cuda\", \"tree_method\": \"hist\"}\n\n   # Wrong — will raise an error in distributed mode\n   params = {\"device\": \"cuda:0\", \"tree_method\": \"hist\"}\n\nData Matrices Must Be Inside CommunicatorContext\n=================================================\n\nConstructing ``xgb.DMatrix`` or ``xgb.QuantileDMatrix`` outside the\n``CommunicatorContext`` may appear to work with dense data, but the behavior is\nundefined. The constructor performs cross-worker synchronization for data shape\nvalidation and quantile sketching (needed by ``tree_method=\"hist\"``). Always\nconstruct data matrices inside the context:\n\n.. code-block:: python\n\n   # Wrong — data matrix outside context\n   dtrain = xgb.QuantileDMatrix(X_train, label=y_train)\n   with coll.CommunicatorContext(...):\n       model = xgb.train(params, dtrain, ...)  # Undefined behavior\n\n   # Correct — data matrix inside context\n   with coll.CommunicatorContext(...):\n       dtrain = xgb.QuantileDMatrix(X_train, label=y_train)\n       model = xgb.train(params, dtrain, ...)\n\nSingle-Node Defaults\n====================\n\nIf ``numNodes`` is not specified in the ``TrainJob``, the runtime uses the default\nfrom the ``ClusterTrainingRuntime`` (``1`` for the ``xgboost-distributed`` runtime).\nA single-node job still goes through the full runtime pipeline — the ``RabitTracker``\nis started on rank-0 (which is the only pod), and ``DMLC_NUM_WORKER`` is set to ``1``.\nThis is useful for testing your training function locally before scaling up.\n\nCPU Over-Subscription\n=====================\n\nBy default, XGBoost uses all available CPU cores via OpenMP. In a Kubernetes pod,\n\"available cores\" is determined by cgroup limits set by the container runtime.\nIf your pod specifies only CPU **requests** (no **limits**), the cgroup may not\ncap CPU usage, and XGBoost may attempt to use all cores on the node, causing\ncontention with other pods.\n\nTo avoid this, either:\n\n- Set ``nthread`` in your XGBoost parameters to match your CPU request\n- Set CPU ``limits`` (not just ``requests``) in ``resourcesPerNode`` so the container\n  runtime enforces a cgroup ceiling\n\n.. code-block:: yaml\n\n   # Setting both requests and limits ensures XGBoost sees the correct core count\n   resourcesPerNode:\n     requests:\n       cpu: \"4\"\n     limits:\n       cpu: \"4\"\n\n*******\nSupport\n*******\n\n- For issues related to the Kubeflow Trainer XGBoost runtime, open an issue on the\n  `Kubeflow Trainer repository <https://github.com/kubeflow/trainer/issues>`_.\n- For XGBoost-specific questions, see the\n  `XGBoost documentation <https://xgboost.readthedocs.io/>`_.\n- The complete example notebook is available in the\n  `Kubeflow Trainer examples <https://github.com/kubeflow/trainer/tree/master/examples/xgboost/distributed-training>`_.\n"
  },
  {
    "path": "doc/tutorials/learning_to_rank.rst",
    "content": "################\nLearning to Rank\n################\n\n**Contents**\n\n.. contents::\n  :local:\n  :backlinks: none\n\n********\nOverview\n********\nOften in the context of information retrieval, learning-to-rank aims to train a model that arranges a set of query results into an ordered list `[1] <#references>`__. For supervised learning-to-rank, the predictors are sample documents encoded as feature matrix, and the labels are relevance degree for each sample. Relevance degree can be multi-level (graded) or binary (relevant or not). The training samples are often grouped by their query index with each query group containing multiple query results.\n\nXGBoost implements learning to rank through a set of objective functions and performance metrics. The default objective is ``rank:ndcg`` based on the ``LambdaMART`` `[2] <#references>`__ algorithm, which in turn is an adaptation of the ``LambdaRank`` `[3] <#references>`__ framework to gradient boosting trees. For a history and a summary of the algorithm, see `[5] <#references>`__. The implementation in XGBoost features deterministic GPU computation, distributed training, position debiasing and two different pair construction strategies.\n\n************************************\nTraining with the Pairwise Objective\n************************************\n``LambdaMART`` is a pairwise ranking model, meaning that it compares the relevance degree for every pair of samples in a query group and calculate a proxy gradient for each pair. The default objective ``rank:ndcg`` is using the surrogate gradient derived from the ``ndcg`` metric. To train a XGBoost model, we need an additional sorted array called ``qid`` for specifying the query group of input samples. An example input would look like this:\n\n+-------+-----------+---------------+\n|   QID |   Label   |   Features    |\n+=======+===========+===============+\n|   1   |   0       |   :math:`x_1` |\n+-------+-----------+---------------+\n|   1   |   1       |   :math:`x_2` |\n+-------+-----------+---------------+\n|   1   |   0       |   :math:`x_3` |\n+-------+-----------+---------------+\n|   2   |   0       |   :math:`x_4` |\n+-------+-----------+---------------+\n|   2   |   1       |   :math:`x_5` |\n+-------+-----------+---------------+\n|   2   |   1       |   :math:`x_6` |\n+-------+-----------+---------------+\n|   2   |   1       |   :math:`x_7` |\n+-------+-----------+---------------+\n\nNotice that the samples are sorted based on their query index in a non-decreasing order. In the above example, the first three samples belong to the first query and the next four samples belong to the second. For the sake of simplicity, we will use a synthetic binary learning-to-rank dataset in the following code snippets, with binary labels representing whether the result is relevant or not, and randomly assign the query group index to each sample. For an example that uses a real world dataset, please see :ref:`sphx_glr_python_examples_learning_to_rank.py`.\n\n.. code-block:: python\n\n  from sklearn.datasets import make_classification\n  import numpy as np\n\n  import xgboost as xgb\n\n  # Make a synthetic ranking dataset for demonstration\n  seed = 1994\n  X, y = make_classification(random_state=seed)\n  rng = np.random.default_rng(seed)\n  n_query_groups = 3\n  qid = rng.integers(0, n_query_groups, size=X.shape[0])\n\n  # Sort the inputs based on query index\n  sorted_idx = np.argsort(qid)\n  X = X[sorted_idx, :]\n  y = y[sorted_idx]\n  qid = qid[sorted_idx]\n\nThe simplest way to train a ranking model is by using the scikit-learn estimator interface. Continuing the previous snippet, we can train a simple ranking model without tuning:\n\n.. code-block:: python\n\n  ranker = xgb.XGBRanker(tree_method=\"hist\", lambdarank_num_pair_per_sample=8, objective=\"rank:ndcg\", lambdarank_pair_method=\"topk\")\n  ranker.fit(X, y, qid=qid)\n\nPlease note that, as of writing, there's no learning-to-rank interface in scikit-learn. As a result, the :py:class:`xgboost.XGBRanker` class does not fully conform the scikit-learn estimator guideline and can not be directly used with some of its utility functions. For instances, the ``auc_score`` and ``ndcg_score`` in scikit-learn don't consider query group information nor the pairwise loss. Most of the metrics are implemented as part of XGBoost, but to use scikit-learn utilities like :py:func:`sklearn.model_selection.cross_validation`, we need to make some adjustments in order to pass the ``qid`` as an additional parameter for :py:meth:`xgboost.XGBRanker.score`. Given a data frame ``X`` (either pandas or cuDF), add the column ``qid`` as follows:\n\n.. code-block:: python\n\n  import pandas as pd\n\n  # `X`, `qid`, and `y` are from the previous snippet, they are all sorted by the `sorted_idx`.\n  df = pd.DataFrame(X, columns=[str(i) for i in range(X.shape[1])])\n  df[\"qid\"] = qid\n\n  ranker.fit(df, y)  # No need to pass qid as a separate argument\n\n  from sklearn.model_selection import StratifiedGroupKFold, cross_val_score\n  # Works with cv in scikit-learn, along with HPO utilities like GridSearchCV\n  kfold = StratifiedGroupKFold(shuffle=False)\n  cross_val_score(ranker, df, y, cv=kfold, groups=df.qid)\n\nThe above snippets build a model using ``LambdaMART`` with the ``NDCG@8`` metric. The outputs of a ranker are relevance scores:\n\n.. code-block:: python\n\n  scores = ranker.predict(X)\n  sorted_idx = np.argsort(scores)[::-1]\n  # Sort the relevance scores from most relevant to least relevant\n  scores = scores[sorted_idx]\n\n\n*************\nPosition Bias\n*************\n\n.. versionadded:: 2.0.0\n\n.. note::\n\n   The feature is considered experimental. This is a heated research area, and your input is much appreciated!\n\nObtaining real relevance degrees for query results is an expensive and strenuous, requiring human labelers to label all results one by one. When such labeling task is infeasible, we might want to train the learning-to-rank model on user click data instead, as it is relatively easy to collect. Another advantage of using click data directly is that it can reflect the most up-to-date user preferences `[1] <#references>`__. However, user clicks are often biased,  as users tend to choose results that are displayed in higher positions. User clicks are also noisy, where users might accidentally click on irrelevant documents. To ameliorate these issues, XGBoost implements the ``Unbiased LambdaMART`` `[4] <#references>`__ algorithm to debias the position-dependent click data. The feature can be enabled by the ``lambdarank_unbiased`` parameter; see :ref:`ltr-param` for related options and :ref:`sphx_glr_python_examples_learning_to_rank.py` for a worked example with simulated user clicks.\n\n****\nLoss\n****\n\nXGBoost implements different ``LambdaMART`` objectives based on different metrics. We list them here as a reference. Other than those used as objective function, XGBoost also implements metrics like ``pre`` (for precision) for evaluation. See :doc:`parameters </parameter>` for available options and the following sections for how to choose these objectives based of the amount of effective pairs.\n\n* NDCG\n\n`Normalized Discounted Cumulative Gain` ``NDCG`` can be used with both binary relevance and multi-level relevance. If you are not sure about your data, this metric can be used as the default. The name for the objective is ``rank:ndcg``.\n\n\n* MAP\n\n`Mean average precision` ``MAP`` is a binary measure. It can be used when the relevance label is 0 or 1. The name for the objective is ``rank:map``.\n\n\n* Pairwise\n\nThe `LambdaMART` algorithm scales the logistic loss with learning to rank metrics like ``NDCG`` in the hope of including ranking information into the loss function. The ``rank:pairwise`` loss is the original version of the pairwise loss, also known as the `RankNet loss` `[7] <#references>`__ or the `pairwise logistic loss`. Unlike the ``rank:map`` and the ``rank:ndcg``, no scaling is applied (:math:`|\\Delta Z_{ij}| = 1`).\n\nWhether scaling with a LTR metric is actually more effective is still up for debate; `[8] <#references>`__ provides a theoretical foundation for general lambda loss functions and some insights into the framework.\n\n******************\nConstructing Pairs\n******************\n\nThere are two implemented strategies for constructing document pairs for :math:`\\lambda`-gradient calculation. The first one is the ``mean`` method, another one is the ``topk`` method. The preferred strategy can be specified by the ``lambdarank_pair_method`` parameter.\n\nFor the ``mean`` strategy, XGBoost samples ``lambdarank_num_pair_per_sample`` pairs for each document in a query list. For example, given a list of 3 documents and ``lambdarank_num_pair_per_sample`` is set to 2, XGBoost will randomly sample 6 pairs, assuming the labels for these documents are different. On the other hand, if the pair method is set to ``topk``, XGBoost constructs about :math:`k \\times |query|` number of pairs with :math:`|query|` pairs for each sample at the top :math:`k = lambdarank\\_num\\_pair` position. The number of pairs counted here is an approximation since we skip pairs that have the same label.\n\n*********************\nObtaining Good Result\n*********************\n\nLearning to rank is a sophisticated task and an active research area. It's not trivial to train a model that generalizes well. There are multiple loss functions available in XGBoost along with a set of hyperparameters. This section contains some hints for how to choose hyperparameters as a starting point. One can further optimize the model by tuning these hyperparameters.\n\nThe first question would be how to choose an objective that matches the task at hand. If your input data has multi-level relevance degrees, then either ``rank:ndcg`` or ``rank:pairwise`` should be used. However, when the input has binary labels, we have multiple options based on the target metric. `[6] <#references>`__ provides some guidelines on this topic and users are encouraged to see the analysis done in their work. The choice should be based on the number of `effective pairs`, which refers to the number of pairs that can generate non-zero gradient and contribute to training. `LambdaMART` with ``MRR`` has the least amount of effective pairs as the :math:`\\lambda`-gradient is only non-zero when the pair contains a non-relevant document ranked higher than the top relevant document. As a result, it's not implemented in XGBoost. Since ``NDCG`` is a multi-level metric, it usually generate more effective pairs than ``MAP``.\n\nHowever, when there are sufficiently many effective pairs, it's shown in `[6] <#references>`__ that matching the target metric with the objective is of significance. When the target metric is ``MAP`` and you are using a large dataset that can provide a sufficient amount of effective pairs, ``rank:map`` can in theory yield higher ``MAP`` value than ``rank:ndcg``.\n\nThe consideration of effective pairs also applies to the choice of pair method (``lambdarank_pair_method``) and the number of pairs for each sample (``lambdarank_num_pair_per_sample``). For example, the mean-``NDCG`` considers more pairs than ``NDCG@10``, so the former generates more effective pairs and provides more granularity than the latter. Also, using the ``mean`` strategy can help the model generalize with random sampling. However, one might want to focus the training on the top :math:`k` documents instead of using all pairs, to better fit their real-world application.\n\nWhen using the mean strategy for generating pairs, where the target metric (like ``NDCG``) is computed over the whole query list, users can specify how many pairs should be generated per each document, by setting the ``lambdarank_num_pair_per_sample``. XGBoost will randomly sample ``lambdarank_num_pair_per_sample`` pairs for each element in the query group (:math:`|pairs| = |query| \\times num\\_pairsample`). Often, setting it to 1 can produce reasonable results. In cases where performance is inadequate due to insufficient number of effective pairs being generated, set ``lambdarank_num_pair_per_sample`` to a higher value. As more document pairs are generated, more effective pairs will be generated as well.\n\nOn the other hand, if you are prioritizing the top :math:`k` documents, the ``lambdarank_num_pair_per_sample`` should be set slightly higher than :math:`k` (with a few more documents) to obtain a good training result. Lastly, XGBoost employs additional regularization for learning to rank objectives, which can be disabled by setting the ``lambdarank_normalization`` to ``False``.\n\n\n**Summary** If you have large amount of training data:\n\n* Use the target-matching objective.\n* Choose the ``topk`` strategy for generating document pairs (if it's appropriate for your application).\n\nOn the other hand, if you have comparatively small amount of training data:\n\n* Select ``NDCG`` or the RankNet loss (``rank:pairwise``).\n* Choose the ``mean`` strategy for generating document pairs, to obtain more effective pairs.\n\nFor any method chosen, you can modify ``lambdarank_num_pair_per_sample`` to control the amount of pairs generated.\n\n.. _ltr-dist:\n\n********************\nDistributed Training\n********************\n\nXGBoost implements distributed learning-to-rank with integration of multiple frameworks\nincluding :doc:`Dask </tutorials/dask>`, :doc:`Spark </jvm/xgboost4j_spark_tutorial>`, and\n:doc:`PySpark </tutorials/spark_estimator>`. The interface is similar to the single-node\ncounterpart. Please refer to document of the respective XGBoost interface for details.\n\n.. warning::\n\n   Position-debiasing is not yet supported for existing distributed interfaces.\n\nXGBoost works with collective operations, which means data is scattered to multiple workers. We can divide the data partitions by query group and ensure no query group is split among workers. However, this requires a costly sort and groupby operation and might only be necessary for selected use cases. Splitting and scattering a query group to multiple workers is theoretically sound but can affect the model's accuracy. If there are only a small number of groups sitting at the boundaries of workers, the small discrepancy is not an issue, as the amount of training data is usually large when distributed training is used.\n\nFor a longer explanation, assuming the pairwise ranking method is used, we calculate the gradient based on relevance degree by constructing pairs within a query group. If a single query group is split among workers and we use worker-local data for gradient calculation, then we are simply sampling pairs from a smaller group for each worker to calculate the gradient and the evaluation metric. The comparison between each pair doesn't change because a group is split into sub-groups, what changes is the number of total and effective pairs and normalizers like `IDCG`. One can generate more pairs from a large group than it's from two smaller subgroups. As a result, the obtained gradient is still valid from a theoretical standpoint but might not be optimal. As long as each data partitions within a worker are correctly sorted by query IDs, XGBoost can aggregate sample gradients accordingly. And both the (Py)Spark interface and the Dask interface can sort the data according to query ID, please see respected tutorials for more information.\n\nHowever, it's possible that a distributed framework shuffles the data during map reduce and splits every query group into multiple workers. In that case, the performance would be disastrous. As a result, it depends on the data and the framework for whether a sorted groupby is needed.\n\n**********************************\nComparing Results with Version 1.7\n**********************************\n\nThe learning to rank implementation has been significantly updated in 2.0 with added hyper-parameters and training strategies. To obtain similar result as the 1.7 :py:class:`xgboost.XGBRanker`, following parameter should be used:\n\n.. code-block:: python\n\n    params = {\n        # 1.7 only supports sampling, while 2.0 and later use top-k as the default.\n        # See above sections for the trade-off.\n        \"lambdarank_pair_method\": \"mean\",\n        # 1.7 uses the ranknet loss while later versions use the NDCG weighted loss\n        \"objective\": \"rank:pairwise\",\n        # 1.7 doesn't have this normalization.\n        \"lambdarank_score_normalization\": False,\n        \"base_score\": 0.5,\n        # The default tree method has been changed from approx to hist.\n        \"tree_method\": \"approx\",\n        # The default for `mean` pair method is one pair each sample, which is the default in 1.7 as well.\n        # You can leave it as unset.\n        \"lambdarank_num_pair_per_sample\": 1,\n    }\n\nThe result still differs due to the change of random seed. But the overall training strategy would be the same for ``rank:pairwise``.\n\n*******************\nReproducible Result\n*******************\n\nLike any other tasks, XGBoost should generate reproducible results given the same hardware and software environments (and data partitions, if distributed interface is used). Even when the underlying environment has changed, the result should still be consistent. However, when the ``lambdarank_pair_method`` is set to ``mean``, XGBoost uses random sampling, and results may differ depending on the platform used. The random number generator used on Windows (Microsoft Visual C++) is different from the ones used on other platforms like Linux (GCC, Clang) [#f0]_, so the output varies significantly between these platforms.\n\n.. [#f0] `minstd_rand` implementation is different on MSVC. The implementations from GCC and Thrust produce the same output.\n\n**********\nReferences\n**********\n\n[1] Tie-Yan Liu. 2009. \"`Learning to Rank for Information Retrieval`_\". Found. Trends Inf. Retr. 3, 3 (March 2009), 225–331.\n\n[2] Christopher J. C. Burges, Robert Ragno, and Quoc Viet Le. 2006. \"`Learning to rank with nonsmooth cost functions`_\". In Proceedings of the 19th International Conference on Neural Information Processing Systems (NIPS'06). MIT Press, Cambridge, MA, USA, 193–200.\n\n[3] Wu, Q., Burges, C.J.C., Svore, K.M. et al. \"`Adapting boosting for information retrieval measures`_\". Inf Retrieval 13, 254–270 (2010).\n\n[4] Ziniu Hu, Yang Wang, Qu Peng, Hang Li. \"`Unbiased LambdaMART: An Unbiased Pairwise Learning-to-Rank Algorithm`_\". Proceedings of the 2019 World Wide Web Conference.\n\n[5] Burges, Chris J.C. \"`From RankNet to LambdaRank to LambdaMART: An Overview`_\". MSR-TR-2010-82\n\n[6] Pinar Donmez, Krysta M. Svore, and Christopher J.C. Burges. 2009. \"`On the local optimality of LambdaRank`_\". In Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval (SIGIR '09). Association for Computing Machinery, New York, NY, USA, 460–467.\n\n[7] Chris Burges, Tal Shaked, Erin Renshaw, Ari Lazier, Matt Deeds, Nicole Hamilton, and Greg Hullender. 2005. \"`Learning to rank using gradient descent`_\". In Proceedings of the 22nd international conference on Machine learning (ICML '05). Association for Computing Machinery, New York, NY, USA, 89–96.\n\n[8] Xuanhui Wang and Cheng Li and Nadav Golbandi and Mike Bendersky and Marc Najork. 2018. \"`The LambdaLoss Framework for Ranking Metric Optimization`_\". Proceedings of The 27th ACM International Conference on Information and Knowledge Management (CIKM '18).\n\n.. _`Learning to Rank for Information Retrieval`: https://doi.org/10.1561/1500000016\n.. _`Learning to rank with nonsmooth cost functions`: https://dl.acm.org/doi/10.5555/2976456.2976481\n.. _`Adapting boosting for information retrieval measures`: https://doi.org/10.1007/s10791-009-9112-1\n.. _`Unbiased LambdaMART: An Unbiased Pairwise Learning-to-Rank Algorithm`: https://dl.acm.org/doi/10.1145/3308558.3313447\n.. _`From RankNet to LambdaRank to LambdaMART: An Overview`: https://www.microsoft.com/en-us/research/publication/from-ranknet-to-lambdarank-to-lambdamart-an-overview/\n.. _`On the local optimality of LambdaRank`: https://doi.org/10.1145/1571941.1572021\n.. _`Learning to rank using gradient descent`:  https://doi.org/10.1145/1102351.1102363\n.. _`The LambdaLoss Framework for Ranking Metric Optimization`: https://dl.acm.org/doi/10.1145/3269206.3271784\n"
  },
  {
    "path": "doc/tutorials/model.rst",
    "content": "#############################\nIntroduction to Boosted Trees\n#############################\nXGBoost stands for \"Extreme Gradient Boosting\", where the term \"Gradient Boosting\" originates from the paper *Greedy Function Approximation: A Gradient Boosting Machine*, by Friedman.\n\nThe term **gradient boosted trees** has been around for a while, and there are a lot of materials on the topic.\nThis tutorial will explain boosted trees in a self-contained and principled way using the elements of supervised learning.\nWe think this explanation is cleaner, more formal, and motivates the model formulation used in XGBoost.\n\n*******************************\nElements of Supervised Learning\n*******************************\nXGBoost is used for supervised learning problems, where we use the training data (with multiple features) :math:`x_i` to predict a target variable :math:`y_i`.\nBefore we learn about trees specifically, let us start by reviewing the basic elements in supervised learning.\n\nModel and Parameters\n====================\nThe **model** in supervised learning usually refers to the mathematical structure of by which the prediction :math:`y_i` is made from the input :math:`x_i`.\nA common example is a *linear model*, where the prediction is given as :math:`\\hat{y}_i = \\sum_j \\theta_j x_{ij}`, a linear combination of weighted input features.\nThe prediction value can have different interpretations, depending on the task, i.e., regression or classification.\nFor example, it can be logistic transformed to get the probability of positive class in logistic regression, and it can also be used as a ranking score when we want to rank the outputs.\n\nThe **parameters** are the undetermined part that we need to learn from data. In linear regression problems, the parameters are the coefficients :math:`\\theta`.\nUsually we will use :math:`\\theta` to denote the parameters (there are many parameters in a model, our definition here is sloppy).\n\nObjective Function: Training Loss + Regularization\n==================================================\nWith judicious choices for :math:`y_i`, we may express a variety of tasks, such as regression, classification, and ranking.\nThe task of **training** the model amounts to finding the best parameters :math:`\\theta` that best fit the training data :math:`x_i` and labels :math:`y_i`. In order to train the model, we need to define the **objective function**\nto measure how well the model fit the training data.\n\nA salient characteristic of objective functions is that they consist of two parts: **training loss** and **regularization term**:\n\n.. math::\n\n  \\text{obj}(\\theta) = L(\\theta) + \\Omega(\\theta)\n\nwhere :math:`L` is the training loss function, and :math:`\\Omega` is the regularization term. The training loss measures how *predictive* our model is with respect to the training data.\nA common choice of :math:`L` is the *mean squared error*, which is given by\n\n.. math::\n\n  L(\\theta) = \\sum_i (y_i-\\hat{y}_i)^2\n\nAnother commonly used loss function is logistic loss, to be used for logistic regression:\n\n.. math::\n\n  L(\\theta) = \\sum_i[ y_i\\ln (1+e^{-\\hat{y}_i}) + (1-y_i)\\ln (1+e^{\\hat{y}_i})]\n\nThe **regularization term** is what people usually forget to add. The regularization term controls the complexity of the model, which helps us to avoid overfitting.\nThis sounds a bit abstract, so let us consider the following problem in the following picture. You are asked to *fit* visually a step function given the input data points\non the upper left corner of the image.\nWhich solution among the three do you think is the best fit?\n\n.. image:: https://raw.githubusercontent.com/dmlc/web-data/master/xgboost/model/step_fit.png\n  :alt: step functions to fit data points, illustrating bias-variance tradeoff\n\nThe correct answer is marked in red. Please consider if this visually seems a reasonable fit to you. The general principle is we want both a *simple* and *predictive* model.\nThe tradeoff between the two is also referred as **bias-variance tradeoff** in machine learning.\n\nWhy introduce the general principle?\n====================================\nThe elements introduced above form the basic elements of supervised learning, and they are natural building blocks of machine learning toolkits.\nFor example, you should be able to describe the differences and commonalities between gradient boosted trees and random forests.\nUnderstanding the process in a formalized way also helps us to understand the objective that we are learning and the reason behind the heuristics such as\npruning and smoothing.\n\n***********************\nDecision Tree Ensembles\n***********************\nNow that we have introduced the elements of supervised learning, let us get started with real trees.\nTo begin with, let us first learn about the model choice of XGBoost: **decision tree ensembles**.\nThe tree ensemble model consists of a set of classification and regression trees (CART). Here's a simple example of a CART that classifies whether someone will like a hypothetical computer game X.\n\n.. image:: https://raw.githubusercontent.com/dmlc/web-data/master/xgboost/model/cart.png\n  :width: 100%\n  :alt: a toy example for CART\n\nWe classify the members of a family into different leaves, and assign them the score on the corresponding leaf.\nA CART is a bit different from decision trees, in which the leaf only contains decision values. In CART, a real score\nis associated with each of the leaves, which gives us richer interpretations that go beyond classification.\nThis also allows for a principled, unified approach to optimization, as we will see in a later part of this tutorial.\n\nUsually, a single tree is not strong enough to be used in practice. What is actually used is the ensemble model,\nwhich sums the prediction of multiple trees together.\n\n.. image:: https://raw.githubusercontent.com/dmlc/web-data/master/xgboost/model/twocart.png\n  :width: 100%\n  :alt: a toy example for tree ensemble, consisting of two CARTs\n\nHere is an example of a tree ensemble of two trees. The prediction scores of each individual tree are summed up to get the final score.\nIf you look at the example, an important fact is that the two trees try to *complement* each other.\nMathematically, we can write our model in the form\n\n.. math::\n\n  \\hat{y}_i = \\sum_{k=1}^K f_k(x_i), f_k \\in \\mathcal{F}\n\nwhere :math:`K` is the number of trees, :math:`f_k` is a function in the functional space :math:`\\mathcal{F}`, and :math:`\\mathcal{F}` is the set of all possible CARTs. The objective function to be optimized is given by\n\n.. math::\n\n  \\text{obj}(\\theta) = \\sum_i^n l(y_i, \\hat{y}_i) + \\sum_{k=1}^K \\omega(f_k)\n\nwhere :math:`\\omega(f_k)` is the complexity of the tree :math:`f_k`, defined in detail later.\n\nNow here comes a trick question: what is the *model* used in random forests? Tree ensembles! So random forests and boosted trees are really the same models; the\ndifference arises from how we train them. This means that, if you write a predictive service for tree ensembles, you only need to write one and it should work\nfor both random forests and gradient boosted trees. (See `Treelite <https://treelite.readthedocs.io/en/latest/index.html>`_ for an actual example.) One example of why elements of supervised learning rock.\n\n*************\nTree Boosting\n*************\nNow that we introduced the model, let us turn to training: How should we learn the trees?\nThe answer is, as is always for all supervised learning models: *define an objective function and optimize it*!\n\nLet the following be the objective function (remember it always needs to contain training loss and regularization):\n\n.. math::\n\n  \\text{obj} = \\sum_{i=1}^n l(y_i, \\hat{y}_i^{(t)}) + \\sum_{k=1}^t\\omega(f_k)\n\nin which :math:`t` is the number of trees in our ensemble.\n(Each training step will add one new tree, so that at step :math:`t` the ensemble contains :math:`K=t` trees).\n\nAdditive Training\n=================\n\nThe first question we want to ask: what are the **parameters** of trees?\nYou can find that what we need to learn are those functions :math:`f_k`, each containing the structure\nof the tree and the leaf scores. Learning tree structure is much harder than traditional optimization problem where you can simply take the gradient.\nIt is intractable to learn all the trees at once.\nInstead, we use an additive strategy: fix what we have learned, and add one new tree at a time.\nWe write the prediction value at step :math:`t` as :math:`\\hat{y}_i^{(t)}`. Then we have\n\n.. math::\n\n  \\hat{y}_i^{(0)} &= 0\\\\\n  \\hat{y}_i^{(1)} &= f_1(x_i) = \\hat{y}_i^{(0)} + f_1(x_i)\\\\\n  \\hat{y}_i^{(2)} &= f_1(x_i) + f_2(x_i)= \\hat{y}_i^{(1)} + f_2(x_i)\\\\\n  &\\dots\\\\\n  \\hat{y}_i^{(t)} &= \\sum_{k=1}^t f_k(x_i)= \\hat{y}_i^{(t-1)} + f_t(x_i)\n\nIt remains to ask: which tree do we want at each step?  A natural thing is to add the one that optimizes our objective.\n\n.. math::\n\n  \\text{obj}^{(t)} & = \\sum_{i=1}^n l(y_i, \\hat{y}_i^{(t)}) + \\sum_{k=1}^t\\omega(f_k) \\\\\n            & = \\sum_{i=1}^n l(y_i, \\hat{y}_i^{(t-1)} + f_t(x_i)) + \\omega(f_t) + \\mathrm{constant}\n\nIf we consider using mean squared error (MSE) as our loss function, the objective becomes\n\n.. math::\n\n  \\text{obj}^{(t)} & = \\sum_{i=1}^n (y_i - (\\hat{y}_i^{(t-1)} + f_t(x_i)))^2 + \\sum_{k=1}^t\\omega(f_k) \\\\\n            & = \\sum_{i=1}^n [2(\\hat{y}_i^{(t-1)} - y_i)f_t(x_i) + f_t(x_i)^2] + \\omega(f_t) + \\mathrm{constant}\n\nThe form of MSE is friendly, with a first order term (usually called the residual) and a quadratic term.\nFor other losses of interest (for example, logistic loss), it is not so easy to get such a nice form.\nSo in the general case, we take the *Taylor expansion of the loss function up to the second order*:\n\n.. math::\n\n  \\text{obj}^{(t)} = \\sum_{i=1}^n [l(y_i, \\hat{y}_i^{(t-1)}) + g_i f_t(x_i) + \\frac{1}{2} h_i f_t^2(x_i)] + \\omega(f_t) + \\mathrm{constant}\n\nwhere the :math:`g_i` and :math:`h_i` are defined as\n\n.. math::\n\n  g_i &= \\partial_{\\hat{y}_i^{(t-1)}} l(y_i, \\hat{y}_i^{(t-1)})\\\\\n  h_i &= \\partial_{\\hat{y}_i^{(t-1)}}^2 l(y_i, \\hat{y}_i^{(t-1)})\n\nAfter we remove all the constants, the specific objective at step :math:`t` becomes\n\n.. math::\n\n  \\sum_{i=1}^n [g_i f_t(x_i) + \\frac{1}{2} h_i f_t^2(x_i)] + \\omega(f_t)\n\nThis becomes our optimization goal for the new tree. One important advantage of this definition is that\nthe value of the objective function only depends on :math:`g_i` and :math:`h_i`. This is how XGBoost supports custom loss functions.\nWe can optimize every loss function, including logistic regression and pairwise ranking, using exactly\nthe same solver that takes :math:`g_i` and :math:`h_i` as input!\n\nModel Complexity\n================\nWe have introduced the training step, but wait, there is one important thing, the **regularization term**!\nWe need to define the complexity of the tree :math:`\\omega(f)`. In order to do so, let us first refine the definition of the tree :math:`f(x)` as\n\n.. math::\n\n  f_t(x) = w_{q(x)}, w \\in R^T, q:R^d\\rightarrow \\{1,2,\\cdots,T\\} .\n\nHere :math:`w` is the vector of scores on leaves, :math:`q` is a function assigning each data point to the corresponding leaf, and :math:`T` is the number of leaves.\nIn XGBoost, we define the complexity as\n\n.. math::\n\n  \\omega(f) = \\gamma T + \\frac{1}{2}\\lambda \\sum_{j=1}^T w_j^2\n\nOf course, there is more than one way to define the complexity, but this one works well in practice. The regularization is one part most tree packages treat\nless carefully, or simply ignore. This was because the traditional treatment of tree learning only emphasized improving impurity, while the complexity control was left to heuristics.\nBy defining it formally, we can get a better idea of what we are learning and obtain models that perform well in the wild.\n\nThe Structure Score\n===================\nHere is the magical part of the derivation. After re-formulating the tree model, we can write the objective value with the :math:`t`-th tree as:\n\n.. math::\n\n  \\text{obj}^{(t)} &\\approx \\sum_{i=1}^n [g_i w_{q(x_i)} + \\frac{1}{2} h_i w_{q(x_i)}^2] + \\gamma T + \\frac{1}{2}\\lambda \\sum_{j=1}^T w_j^2\\\\\n  &= \\sum^T_{j=1} [(\\sum_{i\\in I_j} g_i) w_j + \\frac{1}{2} (\\sum_{i\\in I_j} h_i + \\lambda) w_j^2 ] + \\gamma T\n\nwhere :math:`I_j = \\{i|q(x_i)=j\\}` is the set of indices of data points assigned to the :math:`j`-th leaf.\nNotice that in the second line we have changed the index of the summation because all the data points on the same leaf get the same score.\nWe could further compress the expression by defining :math:`G_j = \\sum_{i\\in I_j} g_i` and :math:`H_j = \\sum_{i\\in I_j} h_i`:\n\n.. math::\n\n  \\text{obj}^{(t)} = \\sum^T_{j=1} [G_jw_j + \\frac{1}{2} (H_j+\\lambda) w_j^2] +\\gamma T\n\nIn this equation, :math:`w_j` are independent with respect to each other, the form :math:`G_jw_j+\\frac{1}{2}(H_j+\\lambda)w_j^2` is quadratic and the best :math:`w_j` for a given structure :math:`q(x)` and the best objective reduction we can get is:\n\n.. math::\n\n  w_j^\\ast &= -\\frac{G_j}{H_j+\\lambda}\\\\\n  \\text{obj}^\\ast &= -\\frac{1}{2} \\sum_{j=1}^T \\frac{G_j^2}{H_j+\\lambda} + \\gamma T\n\nThe last equation measures *how good* a tree structure :math:`q(x)` is.\n\n.. image:: https://raw.githubusercontent.com/dmlc/web-data/master/xgboost/model/struct_score.png\n  :width: 100%\n  :alt: illustration of structure score (fitness)\n\nIf all this sounds a bit complicated, let's take a look at the picture, and see how the scores can be calculated.\nBasically, for a given tree structure, we push the statistics :math:`g_i` and :math:`h_i` to the leaves they belong to,\nsum the statistics together, and use the formula to calculate how good the tree is.\nThis score is like the impurity measure in a decision tree, except that it also takes the model complexity into account.\n\nLearn the tree structure\n========================\nNow that we have a way to measure how good a tree is, ideally we would enumerate all possible trees and pick the best one.\nIn practice this is intractable, so we will try to optimize one level of the tree at a time.\nSpecifically we try to split a leaf into two leaves, and the score it gains is\n\n.. math::\n  Gain = \\frac{1}{2} \\left[\\frac{G_L^2}{H_L+\\lambda}+\\frac{G_R^2}{H_R+\\lambda}-\\frac{(G_L+G_R)^2}{H_L+H_R+\\lambda}\\right] - \\gamma\n\nThis formula can be decomposed as 1) the score on the new left leaf 2) the score on the new right leaf 3) The score on the original leaf 4) regularization on the additional leaf.\nWe can see an important fact here: if the gain is smaller than :math:`\\gamma`, we would do better not to add that branch. This is exactly the **pruning** techniques in tree based\nmodels! By using the principles of supervised learning, we can naturally come up with the reason these techniques work :)\n\nFor real valued data, we usually want to search for an optimal split. To efficiently do so, we place all the instances in sorted order, like the following picture.\n\n.. image:: https://raw.githubusercontent.com/dmlc/web-data/master/xgboost/model/split_find.png\n  :width: 100%\n  :alt: Schematic of choosing the best split\n\nA left to right scan is sufficient to calculate the structure score of all possible split solutions, and we can find the best split efficiently.\n\n.. note:: Limitation of additive tree learning\n\n  Since it is intractable to enumerate all possible tree structures, we add one split at a time. This approach works well most of the time, but there are some edge cases that fail due to this approach. For those edge cases, training results in a degenerate model because we consider only one feature dimension at a time. See `Can Gradient Boosting Learn Simple Arithmetic? <http://mariofilho.com/can-gradient-boosting-learn-simple-arithmetic/>`_ for an example.\n\n**********************\nFinal words on XGBoost\n**********************\nNow that you understand what boosted trees are, you may ask, where is the introduction for XGBoost?\nXGBoost is exactly a tool motivated by the formal principle introduced in this tutorial!\nMore importantly, it is developed with both deep consideration in terms of **systems optimization** and **principles in machine learning**.\nThe goal of this library is to push the extreme of the computation limits of machines to provide a **scalable**, **portable** and **accurate** library.\nMake sure you try it out, and most importantly, contribute your piece of wisdom (code, examples, tutorials) to the community!\n"
  },
  {
    "path": "doc/tutorials/monotonic.rst",
    "content": "#####################\nMonotonic Constraints\n#####################\n\nIt is often the case in a modeling problem or project that the functional form of an acceptable model is constrained in some way. This may happen due to business considerations, or because of the type of scientific question being investigated.  In some cases, where there is a very strong prior belief that the true relationship has some quality, constraints can be used to improve the predictive performance of the model.\n\nA common type of constraint in this situation is that certain features bear a **monotonic** relationship to the predicted response:\n\n.. math::\n\n  f(x_1, x_2, \\ldots, x, \\ldots, x_{n-1}, x_n) \\leq f(x_1, x_2, \\ldots, x', \\ldots, x_{n-1}, x_n)\n\nwhenever :math:`x \\leq x'` is an **increasing constraint**; or\n\n.. math::\n\n  f(x_1, x_2, \\ldots, x, \\ldots, x_{n-1}, x_n) \\geq f(x_1, x_2, \\ldots, x', \\ldots, x_{n-1}, x_n)\n\nwhenever :math:`x \\leq x'` is a **decreasing constraint**.\n\nXGBoost has the ability to enforce monotonicity constraints on any features used in a boosted model.\n\n****************\nA Simple Example\n****************\n\nTo illustrate, let's create some simulated data with two features and a response according to the following scheme\n\n.. math::\n\n  y = 5 x_1 + \\sin(10 \\pi x_1) - 5 x_2 - \\cos(10 \\pi x_2) + N(0, 0.01)\n  x_1, x_2 \\in [0, 1]\n\nThe response generally increases with respect to the :math:`x_1` feature, but a sinusoidal variation has been superimposed, resulting in the true effect being non-monotonic.  For the :math:`x_2` feature the variation is decreasing with a sinusoidal variation.\n\n.. image:: https://raw.githubusercontent.com/dmlc/web-data/master/xgboost/monotonic/two.feature.sample.data.png\n  :alt: Data in sinusoidal fit\n\nLet's fit a boosted tree model to this data without imposing any monotonic constraints:\n\n.. image:: https://raw.githubusercontent.com/dmlc/web-data/master/xgboost/monotonic/two.feature.no.constraint.png\n  :alt: Fit of Model with No Constraint\n\nThe black curve shows the trend inferred from the model for each feature.  To make these plots the distinguished feature :math:`x_i` is fed to the model over a one-dimensional grid of values, while all the other features (in this case only one other feature) are set to their average values.  We see that the model does a good job of capturing the general trend with the oscillatory wave superimposed.\n\nHere is the same model, but fit with monotonicity constraints:\n\n.. image:: https://raw.githubusercontent.com/dmlc/web-data/master/xgboost/monotonic/two.feature.with.constraint.png\n  :alt: Fit of Model with Constraint\n\nWe see the effect of the constraint.  For each variable the general direction of the trend is still evident, but the oscillatory behaviour no longer remains as it would violate our imposed constraints.\n\n******************************************\nEnforcing Monotonic Constraints in XGBoost\n******************************************\n\nIt is very simple to enforce monotonicity constraints in XGBoost.  Here we will give an example using Python, but the same general idea generalizes to other platforms.\n\nSuppose the following code fits your model without monotonicity constraints\n\n.. code-block:: python\n\n  model_no_constraints = xgb.train(params, dtrain,\n                                   num_boost_round = 1000, evals = evallist,\n                                   early_stopping_rounds = 10)\n\nThen fitting with monotonicity constraints only requires adding a single parameter\n\n.. code-block:: python\n\n  params_constrained = params.copy()\n  params_constrained['monotone_constraints'] = (1,-1)\n\n  model_with_constraints = xgb.train(params_constrained, dtrain,\n                                     num_boost_round = 1000, evals = evallist,\n                                     early_stopping_rounds = 10)\n\nIn this example the training data ``X`` has two columns, and by using the parameter values ``(1,-1)`` we are telling XGBoost to impose an increasing constraint on the first predictor and a decreasing constraint on the second.\n\nSome other examples:\n\n- ``(1,0)``: An increasing constraint on the first predictor and no constraint on the second.\n- ``(0,-1)``: No constraint on the first predictor and a decreasing constraint on the second.\n\n\n.. note::\n\n   **Note for the 'hist' tree construction algorithm**.  If ``tree_method`` is set to\n   either ``hist`` or ``approx``, enabling monotonic constraints may produce unnecessarily\n   shallow trees. This is because the ``hist`` method reduces the number of candidate\n   splits to be considered at each split. Monotonic constraints may wipe out all available\n   split candidates, in which case no split is made. To reduce the effect, you may want to\n   increase the ``max_bin`` parameter to consider more split candidates.\n\n\n*******************\nUsing feature names\n*******************\n\nXGBoost's Python and R packages support using feature names instead of feature indices for\nspecifying the constraints. Given a data frame with columns ``[\"f0\", \"f1\", \"f2\"]``, the\nmonotonic constraint can be specified as ``{\"f0\": 1, \"f2\": -1}`` (Python) or as\n``list(f0=1, f2=-1)`` (R, when using 'xgboost()', but not 'xgb.train'), and ``\"f1\"`` will\ndefault to ``0`` (no constraint).\n"
  },
  {
    "path": "doc/tutorials/multioutput.rst",
    "content": "################\nMultiple Outputs\n################\n\n**Contents**\n\n.. contents::\n  :backlinks: none\n  :local:\n\n\n.. versionadded:: 1.6\n\nStarting from version 1.6, XGBoost has experimental support for multi-output regression\nand multi-label classification with Python package.  Multi-label classification usually\nrefers to targets that have multiple non-exclusive class labels.  For instance, a movie\ncan be simultaneously classified as both sci-fi and comedy.  For detailed explanation of\nterminologies related to different multi-output models please refer to the\n:doc:`scikit-learn user guide <sklearn:modules/multiclass>`.\n\n.. note::\n\n   As of XGBoost 3.0, the feature is experimental and has limited features. Only the\n   Python package is tested. In addition, ``glinear`` is not supported.\n\n**********************************\nTraining with One-Model-Per-Target\n**********************************\n\nBy default, XGBoost builds one model for each target similar to sklearn meta estimators,\nwith the added benefit of reusing data and other integrated features like SHAP.  For a\nworked example of regression, see\n:ref:`sphx_glr_python_examples_multioutput_regression.py`. For multi-label classification,\nthe binary relevance strategy is used.  Input ``y`` should be of shape ``(n_samples,\nn_classes)`` with each column having a value of 0 or 1 to specify whether the sample is\nlabeled as positive for respective class. Given a sample with 3 output classes and 2\nlabels, the corresponding `y` should be encoded as ``[1, 0, 1]`` with the second class\nlabeled as negative and the rest labeled as positive. At the moment XGBoost supports only\ndense matrix for labels.\n\n.. code-block:: python\n\n    from sklearn.datasets import make_multilabel_classification\n    import numpy as np\n\n    X, y = make_multilabel_classification(\n        n_samples=32, n_classes=5, n_labels=3, random_state=0\n    )\n    clf = xgb.XGBClassifier(tree_method=\"hist\")\n    clf.fit(X, y)\n    np.testing.assert_allclose(clf.predict(X), y)\n\n\nThe feature is still under development with limited support from objectives and metrics.\n\n*************************\nTraining with Vector Leaf\n*************************\n\n.. versionadded:: 2.0.0\n\n.. note::\n\n   This is still working-in-progress, and most features are missing.\n\nXGBoost can optionally build multi-output trees with the size of leaf equals to the number\nof targets when the tree method `hist` is used. The behavior can be controlled by the\n``multi_strategy`` training parameter, which can take the value `one_output_per_tree` (the\ndefault) for building one model per-target or `multi_output_tree` for building\nmulti-output trees.\n\n.. code-block:: python\n\n  clf = xgb.XGBClassifier(tree_method=\"hist\", multi_strategy=\"multi_output_tree\")\n\nSee :ref:`sphx_glr_python_examples_multioutput_regression.py` for a worked example with\nregression.\n\n\n*************************************\nUsing Reduced Gradient (Sketch Boost)\n*************************************\n\n.. versionadded:: 3.2.0\n\n.. note::\n\n   This is still working-in-progress, and most features are missing. It is documented here\n   for early testers to provide feedback. Related interface might change without notice.\n\nWhen the number of targets is large, training a gradient boosting tree model using the\nfull gradient matrix becomes challenging. The training procedure may run out of memory for\nstoring the histogram, or run extremely slowly due to the amount of computation needed. As\nan optimization, XGBoost implements an interface for using two types of gradients based on\nthe concepts from `Sketch Boost` `[1] <#references>`__.\n\nThe key insight is that we can use different gradients for two distinct purposes:\n\n- **Split gradient**: A reduced-dimension gradient used to determine the tree structure.\n- **Value gradient**: The full gradient used to calculate the final leaf values for\n  accurate predictions.\n\nThis separation allows the expensive histogram building and split finding to operate on a\nsmaller gradient matrix, while still producing valid predictions using the full loss\nfunction for leaf values. The `Sketch Boost` paper proposes using dimensionality reduction\non the gradient matrix. In practice, one can also define a different but related loss with\na small gradient matrix for finding the tree structure.\n\nTo access this feature, create a custom objective that inherits from ``TreeObjective`` and\nimplement the ``split_grad`` method.\n\n.. code-block:: python\n\n    from xgboost.objective import TreeObjective\n    from cuml.decomposition import TruncatedSVD\n\n    import cupy as cp\n\n    class LsObj(TreeObjective):\n        def __call__(self, iteration: int, y_pred, dtrain):\n            \"\"\"Least squared error.\"\"\"\n            y_true = dtrain.get_label()\n            grad = y_pred - y_true\n            hess = cp.ones(grad.shape)\n            return cp.array(grad), cp.array(hess)\n\n        def split_grad(self, iteration: int, grad, hess):\n            svd_params = {\"algorithm\": \"jacobi\", \"n_components\": 2, \"n_iter\": 8}\n            svd = TruncatedSVD(output_type=\"cupy\", **svd_params)\n            svd.fit(grad)\n            grad = svd.transform(grad)\n            hess = svd.transform(hess)\n            hess = cp.clip(hess, 0.01, None)\n\n            return grad, hess\n\nSee :ref:`sphx_glr_python_examples_multioutput_reduced_gradient.py` for a complete worked\nexample. The feature supports only the ``multi_strategy=multi_output_tree``.\n\n**********\nReferences\n**********\n\n[1] Leonid Iosipoi, Anton Vakhrushev. \"`Fast Gradient Boosted Decision Tree for Multioutput Problems`_\". NeurIPS 2022, pp 25422 - 25435.\n\n.. _Fast Gradient Boosted Decision Tree for Multioutput Problems: https://proceedings.neurips.cc/paper_files/paper/2022/file/a36c3dbe676fa8445715a31a90c66ab3-Paper-Conference.pdf\n"
  },
  {
    "path": "doc/tutorials/param_tuning.rst",
    "content": "#########################\nNotes on Parameter Tuning\n#########################\nParameter tuning is a dark art in machine learning, the optimal parameters\nof a model can depend on many scenarios. So it is impossible to create a\ncomprehensive guide for doing so.\n\nThis document tries to provide some guideline for parameters in XGBoost.\n\n************************************\nUnderstanding Bias-Variance Tradeoff\n************************************\nIf you take a machine learning or statistics course, this is likely to be one\nof the most important concepts.\nWhen we allow the model to get more complicated (e.g. more depth), the model\nhas better ability to fit the training data, resulting in a less biased model.\nHowever, such complicated model requires more data to fit.\n\nMost of parameters in XGBoost are about bias variance tradeoff. The best model\nshould trade the model complexity with its predictive power carefully.\n:doc:`Parameters Documentation </parameter>` will tell you whether each parameter\nwill make the model more conservative or not. This can be used to help you\nturn the knob between complicated model and simple model.\n\n*******************\nControl Overfitting\n*******************\nWhen you observe high training accuracy, but low test accuracy, it is likely that you encountered overfitting problem.\n\nThere are in general two ways that you can control overfitting in XGBoost:\n\n* The first way is to directly control model complexity.\n\n  - This includes ``max_depth``, ``min_child_weight``, ``gamma``, ``max_cat_threshold``\n    and other similar regularization parameters. See :doc:`/parameter` for a comprehensive\n    set of parameters.\n  - Set a constant ``base_score`` based on your own criteria. See\n    :doc:`/tutorials/intercept` for more info.\n\n* The second way is to add randomness to make training robust to noise.\n\n  - This includes ``subsample`` and ``colsample_bytree``, which may be used with boosting\n    RF ``num_parallel_tree``.\n  - You can also reduce stepsize ``eta``, possibly with a training callback. Remember to\n    increase ``num_round`` when you do so.\n\n\n*************************\nHandle Imbalanced Dataset\n*************************\nFor common cases such as ads clickthrough log, the dataset is extremely imbalanced.\nThis can affect the training of XGBoost model, and there are two ways to improve it.\n\n* If you care only about the overall performance metric (AUC) of your prediction\n\n  - Balance the positive and negative weights via ``scale_pos_weight``\n  - Use AUC for evaluation\n\n* If you care about predicting the right probability\n\n  - In such a case, you cannot re-balance the dataset\n  - Set parameter ``max_delta_step`` to a finite number (say 1) to help convergence\n\n\n*************************************************\nUse Hyper Parameter Optimization (HPO) Frameworks\n*************************************************\nTuning models is a sophisticated task and there are advanced frameworks to help you. For\nexamples, some meta estimators in scikit-learn like\n:py:class:`sklearn.model_selection.HalvingGridSearchCV` can help guide the search\nprocess. Optuna is another great option and there are many more based on different\nbranches of statistics.\n\n**************\nKnow Your Data\n**************\nIt cannot be stressed enough the importance of understanding the data, sometimes that's\nall it takes to get a good model. Many solutions use a simple XGBoost tree model without\nmuch tuning and emphasize the data pre-processing step. XGBoost can help feature selection\nby providing both a global feature importance score and sample feature importance with\nSHAP value. Also, there are parameters specifically targeting categorical features, and\ntasks like survival and ranking. Feel free to explore them.\n\n*********************\nReducing Memory Usage\n*********************\n\nIf you are using a HPO library like :py:class:`sklearn.model_selection.GridSearchCV`,\nplease control the number of threads it can use. It's best to let XGBoost to run in\nparallel instead of asking `GridSearchCV` to run multiple experiments at the same\ntime. For instance, creating a fold of data for cross validation can consume a significant\namount of memory:\n\n.. code-block:: python\n\n    # This creates a copy of dataset. X and X_train are both in memory at the same time.\n\n    # This happens for every thread at the same time if you run `GridSearchCV` with\n    # `n_jobs` larger than 1\n\n    X_train, X_test, y_train, y_test = train_test_split(X, y)\n\n.. code-block:: python\n\n    df = pd.DataFrame()\n    # This creates a new copy of the dataframe, even if you specify the inplace parameter\n    new_df = df.drop(...)\n\n.. code-block:: python\n\n    array = np.array(...)\n    # This may or may not make a copy of the data, depending on the type of the data\n    array.astype(np.float32)\n\n.. code-block::\n\n    # np by default uses double, do you actually need it?\n    array = np.array(...)\n\nYou can find some more specific memory reduction practices scattered through the documents\nFor instances: :doc:`/tutorials/dask`, :doc:`/gpu/index`. However, before going into\nthese, being conscious about making data copies is a good starting point. It usually\nconsumes a lot more memory than people expect.\n"
  },
  {
    "path": "doc/tutorials/privacy_preserving.rst",
    "content": "#############################################\nPrivacy Preserving Inference with Concrete ML\n#############################################\n\n`Concrete ML`_ is a specialized library developed by Zama that allows the execution of machine learning models on encrypted data through `Fully Homomorphic Encryption (FHE) <https://www.youtube.com/watch?v=FFox2S4uqEo>`_, thereby preserving data privacy.\n\nTo use models such as XGBClassifier, use the following import:\n\n.. code:: python\n\n  from concrete.ml.sklearn import XGBClassifier\n\n***************************************\nPerforming Privacy Preserving Inference\n***************************************\n\nInitialization of a XGBClassifier can be done as follows:\n\n.. code:: python\n\n  classifier = XGBClassifier(n_bits=6, [other_hyperparameters])\n\n\nwhere ``n_bits`` determines the precision of the input features. Note that a higher value of ``n_bits`` increases the precision of the input features and possibly the final model accuracy but also ends up with longer FHE execution time.\n\nOther hyper-parameters that exist in xgboost library can be used.\n\n******************************\nModel Training and Compilation\n******************************\n\nAs commonly used in scikit-learn like models, it can be trained with the .fit() method.\n\n.. code:: python\n\n  classifier.fit(X_train, y_train)\n\nAfter training, the model can be compiled with a calibration dataset, potentially a subset of the training data:\n\n.. code:: python\n\n  classifier.compile(X_calibrate)\n\nThis calibration dataset, ``X_calibrate``, is used in Concrete ML compute the precision (bit-width) of each intermediate value in the model. This is a necessary step to optimize the equivalent FHE circuit.\n\n****************************\nFHE Simulation and Execution\n****************************\n\nTo verify model accuracy in encrypted computations, you can run an FHE simulation:\n\n.. code:: python\n\n  predictions = classifier.predict(X_test, fhe=\"simulate\")\n\nThis simulation can be used to evaluate the model. The resulting accuracy of this simulation step is representative of the actual FHE execution without having to pay the cost of an actual FHE execution. \n\nWhen the model is ready, actual Fully Homomorphic Encryption execution can be performed:\n\n.. code:: python\n\n  predictions = classifier.predict(X_test, fhe=\"execute\")\n\n\nNote that using FHE=\"execute\" is a convenient way to assess the model in FHE, but for real deployment, functions to encrypt (on the client), run in FHE (on the server), and finally decrypt (on the client) have to be used for end-to-end privacy-preserving inferences.\n\nConcrete ML provides a deployment API to facilitate this process, ensuring end-to-end privacy.\n\nTo go further in the deployment API you can read:\n\n- the `deployment documentation <https://docs.zama.ai/concrete-ml/advanced-topics/client_server>`_\n- the `deployment notebook <https://github.com/zama-ai/concrete-ml/blob/17779ca571d20b001caff5792eb11e76fe2c19ba/docs/advanced_examples/ClientServer.ipynb>`_\n\n*******************************\nParameter Tuning in Concrete ML\n*******************************\n\nConcrete ML is compatible with standard scikit-learn pipelines such as GridSearchCV or any other hyper-parameter tuning techniques.\n\n******************\nExamples and Demos\n******************\n\n- `Sentiment analysis (based on transformers + xgboost) <https://huggingface.co/spaces/zama-fhe/encrypted_sentiment_analysis>`_\n- `XGBoost Classifier <https://github.com/zama-ai/concrete-ml/blob/6966c84b9698d5418209b346900f81d1270c64bd/docs/advanced_examples/XGBClassifier.ipynb>`_\n- `XGBoost Regressor <https://github.com/zama-ai/concrete-ml/blob/6966c84b9698d5418209b346900f81d1270c64bd/docs/advanced_examples/XGBRegressor.ipynb>`_\n\n**********\nConclusion\n**********\n\nConcrete ML provides a framework for executing privacy-preserving inferences by leveraging Fully Homomorphic Encryption, allowing secure and private computations on encrypted data.\n\nMore information and examples are given in the `Concrete ML documentation`_.\n\n.. _Concrete ML: https://github.com/zama-ai/concrete-ml\n.. _`Concrete ML documentation`: https://docs.zama.ai/concrete-ml"
  },
  {
    "path": "doc/tutorials/ray.rst",
    "content": "############################\nDistributed XGBoost with Ray\n############################\n\n`Ray <https://ray.io/>`_ is a general purpose distributed execution framework.\nRay can be used to scale computations from a single node to a cluster of hundreds\nof nodes without changing any code.\n\nThe Python bindings of Ray come with a collection of well maintained\nmachine learning libraries for hyperparameter optimization and model serving.\n\nThe `XGBoost-Ray <https://github.com/ray-project/xgboost_ray>`_ project provides\nan interface to run XGBoost training and prediction jobs on a Ray cluster. It allows\nto utilize distributed data representations, such as\n`Modin <https://modin.readthedocs.io/en/latest/>`_ dataframes,\nas well as distributed loading from cloud storage (e.g. Parquet files).\n\nXGBoost-Ray integrates well with hyperparameter optimization library Ray Tune, and\nimplements advanced fault tolerance handling mechanisms. With Ray you can scale\nyour training jobs to hundreds of nodes just by adding new\nnodes to a cluster. You can also use Ray to leverage multi GPU XGBoost training.\n\nInstalling and starting Ray\n===========================\nRay can be installed from PyPI like this:\n\n.. code-block:: bash\n\n    pip install ray\n\nIf you're using Ray on a single machine, you don't need to do anything else -\nXGBoost-Ray will automatically start a local Ray cluster when used.\n\nIf you want to use Ray on a cluster, you can use the\n`Ray cluster launcher <https://docs.ray.io/en/master/cluster/cloud.html>`_.\n\nInstalling XGBoost-Ray\n======================\nXGBoost-Ray is also available via PyPI:\n\n.. code-block:: bash\n\n    pip install xgboost_ray\n\nThis will install all dependencies needed to run XGBoost on Ray, including\nRay itself if it hasn't been installed before.\n\nUsing XGBoost-Ray for training and prediction\n=============================================\nXGBoost-Ray uses the same API as core XGBoost. There are only two differences:\n\n1. Instead of using a ``xgboost.DMatrix``, you'll use a ``xgboost_ray.RayDMatrix`` object\n2. There is an additional ``xgboost_ray.RayParams`` parameter that you can use to configure distributed training.\n\nSimple training example\n-----------------------\n\nTo run this simple example, you'll need to install\n`scikit-learn <https://scikit-learn.org/>`_ (with ``pip install sklearn``).\n\nIn this example, we will load the `breast cancer dataset <https://archive.ics.uci.edu/ml/datasets/breast+cancer>`_\nand train a binary classifier using two actors.\n\n.. code-block:: python\n\n    from xgboost_ray import RayDMatrix, RayParams, train\n    from sklearn.datasets import load_breast_cancer\n\n    train_x, train_y = load_breast_cancer(return_X_y=True)\n    train_set = RayDMatrix(train_x, train_y)\n\n    evals_result = {}\n    bst = train(\n        {\n            \"objective\": \"binary:logistic\",\n            \"eval_metric\": [\"logloss\", \"error\"],\n        },\n        train_set,\n        evals_result=evals_result,\n        evals=[(train_set, \"train\")],\n        verbose_eval=False,\n        ray_params=RayParams(num_actors=2, cpus_per_actor=1))\n\n    bst.save_model(\"model.xgb\")\n    print(\"Final training error: {:.4f}\".format(\n        evals_result[\"train\"][\"error\"][-1]))\n\n\nThe only differences compared to the non-distributed API are\nthe import statement (``xgboost_ray`` instead of ``xgboost``), using the\n``RayDMatrix`` instead of the ``DMatrix``, and passing a ``xgboost_ray.RayParams`` object.\n\nThe return object is a regular ``xgboost.Booster`` instance.\n\n\nSimple prediction example\n-------------------------\n.. code-block:: python\n\n    from xgboost_ray import RayDMatrix, RayParams, predict\n    from sklearn.datasets import load_breast_cancer\n    import xgboost as xgb\n\n    data, labels = load_breast_cancer(return_X_y=True)\n\n    dpred = RayDMatrix(data, labels)\n\n    bst = xgb.Booster(model_file=\"model.xgb\")\n    pred_ray = predict(bst, dpred, ray_params=RayParams(num_actors=2))\n\n    print(pred_ray)\n\nIn this example, the data will be split across two actors. The result array\nwill integrate this data in the correct order.\n\nThe RayParams object\n========================\nThe ``RayParams`` object is used to configure various settings relating to the distributed\ntraining.\n\nMulti GPU training\n==================\nRay automatically detects GPUs on cluster nodes.\nIn order to start training on multiple GPUs, all you have to do is\nto set the ``gpus_per_actor`` parameter of the ``RayParams`` object, as well\nas the ``num_actors`` parameter for multiple GPUs:\n\n.. code-block:: python\n\n    ray_params = RayParams(\n        num_actors=4,\n        gpus_per_actor=1,\n    )\n\nThis will train on four GPUs in parallel.\n\nNote that it usually does not make sense to allocate more than one GPU per actor,\nas XGBoost relies on distributed libraries such as Dask or Ray to utilize multi\nGPU training.\n\nSetting the number of CPUs per actor\n====================================\nXGBoost natively utilizes multi threading to speed up computations. Thus if\nyour are training on CPUs only, there is likely no benefit in using more than\none actor per node. In that case, assuming you have a cluster of homogeneous nodes,\nset the number of CPUs per actor to the number of CPUs available on each node,\nand the number of actors to the number of nodes.\n\nIf you are using multi GPU training on a single node, divide the number of\navailable CPUs evenly across all actors. For instance, if you have 16 CPUs and\n4 GPUs available, each actor should access 1 GPU and 4 CPUs.\n\nIf you are using a cluster of heterogeneous nodes (with different amounts of CPUs),\nyou might just want to use the `greatest common divisor <https://en.wikipedia.org/wiki/Greatest_common_divisor>`_\nfor the number of CPUs per actor. E.g. if you have a cluster of three nodes with\n4, 8, and 12 CPUs, respectively, you'd start 6 actors with 4 CPUs each for maximum\nCPU utilization.\n\nFault tolerance\n===============\nXGBoost-Ray supports two fault tolerance modes. In **non-elastic training**, whenever\na training actor dies (e.g. because the node goes down), the training job will stop,\nXGBoost-Ray will wait for the actor (or its resources) to become available again\n(this might be on a different node), and then continue training once all actors are back.\n\nIn **elastic-training**, whenever a training actor dies, the rest of the actors\ncontinue training without the dead actor. If the actor comes back, it will be re-integrated\ninto training again.\n\nPlease note that in elastic-training this means that you will train on fewer data\nfor some time. The benefit is that you can continue training even if a node goes\naway for the remainder of the training run, and don't have to wait until it is back up again.\nIn practice this usually leads to a very minor decrease in accuracy but a much shorter\ntraining time compared to non-elastic training.\n\nBoth training modes can be configured using the respective ``xgboost_ray.RayParams``\nparameters.\n\nHyperparameter optimization\n===========================\nXGBoost-Ray integrates well with `hyperparameter optimization framework Ray Tune <http://tune.io>`_.\nRay Tune uses Ray to start multiple distributed trials with different hyperparameter configurations.\nIf used with XGBoost-Ray, these trials will then start their own distributed training\njobs.\n\nXGBoost-Ray automatically reports evaluation results back to Ray Tune. There's only\na few things you need to do:\n\n1. Put your XGBoost-Ray training call into a function accepting parameter configurations\n   (``train_model`` in the example below).\n2. Create a ``xgboost_ray.RayParams`` object (``ray_params`` in the example below).\n3. Define the parameter search space (``config`` dict in the example below).\n4. Call ``tune.run()``:\n    * The ``metric`` parameter should contain the metric you'd like to optimize.\n      Usually this consists of the prefix passed to the ``evals`` argument of\n      ``xgboost_ray.train()``, and an ``eval_metric`` passed in the\n      XGBoost parameters (``train-error`` in the example below).\n    * The ``mode`` should either be ``min`` or ``max``, depending on whether\n      you'd like to minimize or maximize the metric\n    * The ``resources_per_actor`` should be set using ``ray_params.get_tune_resources()``.\n      This will make sure that each trial has the necessary resources available to\n      start their distributed training jobs.\n\n.. code-block:: python\n\n    from xgboost_ray import RayDMatrix, RayParams, train\n    from sklearn.datasets import load_breast_cancer\n\n    num_actors = 4\n    num_cpus_per_actor = 1\n\n    ray_params = RayParams(\n        num_actors=num_actors, cpus_per_actor=num_cpus_per_actor)\n\n    def train_model(config):\n        train_x, train_y = load_breast_cancer(return_X_y=True)\n        train_set = RayDMatrix(train_x, train_y)\n\n        evals_result = {}\n        bst = train(\n            params=config,\n            dtrain=train_set,\n            evals_result=evals_result,\n            evals=[(train_set, \"train\")],\n            verbose_eval=False,\n            ray_params=ray_params)\n        bst.save_model(\"model.xgb\")\n\n    from ray import tune\n\n    # Specify the hyperparameter search space.\n    config = {\n        \"tree_method\": \"approx\",\n        \"objective\": \"binary:logistic\",\n        \"eval_metric\": [\"logloss\", \"error\"],\n        \"eta\": tune.loguniform(1e-4, 1e-1),\n        \"subsample\": tune.uniform(0.5, 1.0),\n        \"max_depth\": tune.randint(1, 9)\n    }\n\n    # Make sure to use the `get_tune_resources` method to set the `resources_per_trial`\n    analysis = tune.run(\n        train_model,\n        config=config,\n        metric=\"train-error\",\n        mode=\"min\",\n        num_samples=4,\n        resources_per_trial=ray_params.get_tune_resources())\n    print(\"Best hyperparameters\", analysis.best_config)\n\n\nRay Tune supports various\n`search algorithms and libraries (e.g. BayesOpt, Tree-Parzen estimators) <https://docs.ray.io/en/latest/tune/key-concepts.html#tune-search-algorithms>`_,\n`smart schedulers like successive halving <https://docs.ray.io/en/latest/tune/key-concepts.html#tune-schedulers>`_,\nand other features. Please refer to the `Ray Tune documentation <http://tune.io>`_\nfor more information.\n\nAdditional resources\n====================\n* `XGBoost-Ray repository <https://github.com/ray-project/xgboost_ray>`_\n* `XGBoost-Ray documentation <https://docs.ray.io/en/master/xgboost-ray.html>`_\n* `Ray core documentation <https://docs.ray.io/en/master/index.html>`_\n* `Ray Tune documentation <http://tune.io>`_\n"
  },
  {
    "path": "doc/tutorials/rf.rst",
    "content": "#############################\nRandom Forests(TM) in XGBoost\n#############################\n\nXGBoost is normally used to train gradient-boosted decision trees and other gradient\nboosted models. Random Forests use the same model representation and inference, as\ngradient-boosted decision trees, but a different training algorithm.  One can use XGBoost\nto train a standalone random forest or use random forest as a base model for gradient\nboosting.  Here we focus on training standalone random forest.\n\nWe have native APIs for training random forests since the early days, and a new\nScikit-Learn wrapper after 0.82 (not included in 0.82).  Please note that the new\nScikit-Learn wrapper is still **experimental**, which means we might change the interface\nwhenever needed.\n\n*****************************************\nStandalone Random Forest With XGBoost API\n*****************************************\n\nThe following parameters must be set to enable random forest training.\n\n* ``booster`` should be set to ``gbtree``, as we are training forests. Note that as this\n  is the default, this parameter needn't be set explicitly.\n* ``subsample`` must be set to a value less than 1 to enable random selection of training\n  cases (rows).\n* One of ``colsample_by*`` parameters must be set to a value less than 1 to enable random\n  selection of columns. Normally, ``colsample_bynode`` would be set to a value less than 1\n  to randomly sample columns at each tree split.\n* ``num_parallel_tree`` should be set to the size of the forest being trained.\n* ``num_boost_round`` should be set to 1 to prevent XGBoost from boosting multiple random\n  forests.  Note that this is a keyword argument to ``train()``, and is not part of the\n  parameter dictionary.\n* ``eta`` (alias: ``learning_rate``) must be set to 1 when training random forest\n  regression.\n* ``random_state`` can be used to seed the random number generator.\n\n\nOther parameters should be set in a similar way they are set for gradient boosting. For\ninstance, ``objective`` will typically be ``reg:squarederror`` for regression and\n``binary:logistic`` for classification, ``lambda`` should be set according to a desired\nregularization weight, etc.\n\nIf both ``num_parallel_tree`` and ``num_boost_round`` are greater than 1, training will\nuse a combination of random forest and gradient boosting strategy. It will perform\n``num_boost_round`` rounds, boosting a random forest of ``num_parallel_tree`` trees at\neach round. If early stopping is not enabled, the final model will consist of\n``num_parallel_tree`` * ``num_boost_round`` trees.\n\nHere is a sample parameter dictionary for training a random forest on a GPU using\nxgboost::\n\n  params = {\n    \"colsample_bynode\": 0.8,\n    \"learning_rate\": 1,\n    \"max_depth\": 5,\n    \"num_parallel_tree\": 100,\n    \"objective\": \"binary:logistic\",\n    \"subsample\": 0.8,\n    \"tree_method\": \"hist\",\n    \"device\": \"cuda\",\n  }\n\nA random forest model can then be trained as follows::\n\n  bst = train(params, dmatrix, num_boost_round=1)\n\n\n***************************************************\nStandalone Random Forest With Scikit-Learn-Like API\n***************************************************\n\n``XGBRFClassifier`` and ``XGBRFRegressor`` are SKL-like classes that provide random forest\nfunctionality. They are basically versions of ``XGBClassifier`` and ``XGBRegressor`` that\ntrain random forest instead of gradient boosting, and have default values and meaning of\nsome of the parameters adjusted accordingly. In particular:\n\n* ``n_estimators`` specifies the size of the forest to be trained; it is converted to\n  ``num_parallel_tree``, instead of the number of boosting rounds\n* ``learning_rate`` is set to 1 by default\n* ``colsample_bynode`` and ``subsample`` are set to 0.8 by default\n* ``booster`` is always ``gbtree``\n\nFor a simple example, you can train a random forest regressor with::\n\n    from sklearn.model_selection import KFold\n\n    # Your code ...\n\n    kf = KFold(n_splits=2)\n    for train_index, test_index in kf.split(X, y):\n        xgb_model = xgb.XGBRFRegressor(random_state=42).fit(\n\tX[train_index], y[train_index])\n\nNote that these classes have a smaller selection of parameters compared to using\n``train()``. In particular, it is impossible to combine random forests with gradient\nboosting using this API.\n\n\n*******\nCaveats\n*******\n\n* XGBoost uses 2nd order approximation to the objective function. This can lead to results\n  that differ from a random forest implementation that uses the exact value of the\n  objective function.\n* XGBoost does not perform replacement when subsampling training cases. Each training case\n  can occur in a subsampled set either 0 or 1 time.\n"
  },
  {
    "path": "doc/tutorials/saving_model.rst",
    "content": "########################\nIntroduction to Model IO\n########################\n\n**Contents**\n\n.. contents::\n  :backlinks: none\n  :local:\n\nSince 2.1.0, the default model format for XGBoost is the UBJSON format, the option is\nenabled for serializing models to file, serializing models to buffer, and for memory\nsnapshot (pickle and alike).\n\nJSON and UBJSON have the same document structure with different representations, and we\nwill refer them collectively as the JSON format. This tutorial aims to share some basic\ninsights into the JSON serialisation method used in XGBoost.  Without explicitly\nmentioned, the following sections assume you are using the one of the 2 outputs formats,\nwhich can be enabled by providing the file name with ``.json`` (or ``.ubj`` for binary\nJSON) as file extension when saving/loading model: ``booster.save_model('model.json')``.\nMore details below.\n\nBefore we get started, XGBoost is a gradient boosting library with focus on tree models,\nwhich means inside XGBoost, there are 2 distinct parts:\n\n1. The model consisting of trees and\n2. Hyperparameters and configurations used for building the model.\n\nIf you come from the Deep Learning community, then it should be clear to you that there\nare differences between the neural network structures composed of weights with fixed\ntensor operations, and the optimizers (like RMSprop) used to train them.\n\nSo when one calls ``booster.save_model`` (``xgb.save`` in R), XGBoost saves the trees,\nsome model parameters like number of input columns in trained trees, and the objective\nfunction, which combined to represent the concept of \"model\" in XGBoost.  As for why are\nwe saving the objective as part of model, that's because objective controls transformation\nof global bias (called ``base_score`` or the intercept in XGBoost) and task-specific\ninformation.  Users can share this model with others for inference, evaluation or continue\nthe training with a different set of hyper-parameters etc.\n\nHowever, this is not the end of story.  There are cases where we need to save something\nmore than just the model itself.  For example, in distributed training, XGBoost performs\ncheckpointing operation.  Or for some reasons, your favorite distributed computing\nframework decide to copy the model from one worker to another and continue the training in\nthere. In such cases, the serialisation output is required to contain enough information\nto continue previous training without user providing any parameters again.  We consider\nsuch scenario as **memory snapshot** (or memory based serialisation method) and\ndistinguish it with normal model IO operation. Currently, memory snapshot is used in the\nfollowing places:\n\n* Python package: when the ``Booster`` object is pickled with the built-in ``pickle`` module.\n* R package: when the ``xgb.Booster`` object is persisted with the built-in functions ``saveRDS``\n  or ``save``.\n* JVM packages: when the ``Booster`` object is serialized with the built-in functions ``saveModel``.\n\nTo enable JSON format support for model IO (saving only the trees and objective), provide\na filename with ``.json`` or ``.ubj`` as file extension, the latter is the extension for\n`Universal Binary JSON <https://ubjson.org/>`__\n\n.. code-block:: python\n  :caption: Python\n\n  bst.save_model('model_file_name.json')\n\n.. code-block:: r\n  :caption: R\n\n  xgb.save(bst, 'model_file_name.json')\n\n.. code-block:: Scala\n  :caption: Scala\n\n  val format = \"json\"  // or val format = \"ubj\"\n  model.write.option(\"format\", format).save(\"model_directory_path\")\n\n.. note::\n\n  Only load models from JSON files that were produced by XGBoost. Attempting to load\n  JSON files that were produced by an external source may lead to undefined behaviors\n  and crashes.\n\nWhen loading the model back, XGBoost recognizes the file extensions ``.json`` and\n``.ubj``, and can dispatch accordingly. If the extension is not specified, XGBoost tries\nto guess the right one.\n\n***************************************************************\nA note on backward compatibility of models and memory snapshots\n***************************************************************\n\n**We guarantee backward compatibility for models but not for memory snapshots.**\n\nModels (trees and objective) use a stable representation, so that models produced in earlier\nversions of XGBoost are accessible in later versions of XGBoost. **If you'd like to store or archive\nyour model for long-term storage, use** ``save_model`` (Python) and ``xgb.save`` (R).\n\nOn the other hand, memory snapshot (serialisation) captures many stuff internal to XGBoost, and its\nformat is not stable and is subject to frequent changes. Therefore, memory snapshot is suitable for\ncheckpointing only, where you persist the complete snapshot of the training configurations so that\nyou can recover robustly from possible failures and resume the training process. Loading memory\nsnapshot generated by an earlier version of XGBoost may result in errors or undefined behaviors.\n**If a model is persisted with** ``pickle.dump`` (Python) or ``saveRDS`` (R), **then the model may\nnot be accessible in later versions of XGBoost.**\n\n.. _custom-obj-metric:\n\n***************************\nCustom objective and metric\n***************************\n\nXGBoost accepts user provided objective, metric, and callback functions as extensions.\nThese functions are not saved in model file as they are language dependent features. With\nPython, user can pickle the model to include these functions in saved binary. One drawback\nis that the output from pickle is not a stable serialization format and doesn't work on\ndifferent Python versions nor XGBoost versions, not to mention different language\nenvironments. Another way to workaround this limitation is to provide these functions\nagain after the model is loaded by separating the serialization between the XGBoost\nbuilt-in model and auxiliary methods. If the customized function is useful, please consider\nmaking a PR for implementing it inside XGBoost, this way we can have your functions\nworking with different language bindings. See the next section for more about pickling.\n\n**********************************\nLoading pickled files or RDS files\n**********************************\n\n- From a different XGBoost version\n\n  As noted, pickled model is neither portable nor stable, but in some cases the pickled\n  models are valuable.  One way to restore it in the future is to load it back with that\n  specific version of Python and XGBoost, and then export the model by calling\n  :py:meth:`xgboost.Booster.save_model` or :py:meth:`xgboost.XGBModel.save_model`.\n\n.. note:: Pickle is not secure\n\n  Only load pickled files from a trusted source. The ``pickle`` Python module is NOT\n  secure. And by extension, ``joblib``, ``cloudpickle`` are also not safe when loading\n  files from unknown sources. See https://docs.python.org/3/library/pickle.html for more\n  information.\n\nA similar procedure may be used to recover the model persisted in an old RDS file. In R,\nyou are able to install an older version of XGBoost using the ``remotes`` package:\n\n.. code-block:: r\n\n  library(remotes)\n  remotes::install_version(\"xgboost\", \"0.90.0.1\")  # Install version 0.90.0.1\n\nOnce the desired version is installed, you can load the RDS file with ``readRDS`` and\nrecover the ``xgb.Booster`` object. Then call ``xgb.save`` to export the model using the\nstable representation.  Now you should be able to use the model in the latest version of\nXGBoost.\n\n********************************************************\nSaving and Loading the internal parameters configuration\n********************************************************\n\nXGBoost's ``C API``, ``Python API`` and ``R API`` support saving and loading the internal\nconfiguration directly as a JSON string.  In Python package:\n\n.. code-block:: python\n\n  bst = xgboost.train(...)\n  config = bst.save_config()\n  print(config)\n\n\nor in R:\n\n.. code-block:: R\n\n  config <- xgb.config(bst)\n  print(config)\n\nWill print out something similar to (not actual output as it's too long for demonstration):\n\n.. code-block:: javascript\n\n    {\n      \"Learner\": {\n        \"generic_parameter\": {\n          \"device\": \"cuda:0\",\n          \"gpu_page_size\": \"0\",\n          \"n_jobs\": \"0\",\n          \"random_state\": \"0\",\n          \"seed\": \"0\",\n          \"seed_per_iteration\": \"0\"\n        },\n        \"gradient_booster\": {\n          \"gbtree_train_param\": {\n            \"num_parallel_tree\": \"1\",\n            \"process_type\": \"default\",\n            \"tree_method\": \"hist\",\n            \"updater\": \"grow_gpu_hist\",\n            \"updater_seq\": \"grow_gpu_hist\"\n          },\n          \"name\": \"gbtree\",\n          \"updater\": {\n            \"grow_gpu_hist\": {\n              \"gpu_hist_train_param\": {\n                \"debug_synchronize\": \"0\",\n              },\n              \"train_param\": {\n                \"alpha\": \"0\",\n                \"cache_opt\": \"1\",\n                \"colsample_bylevel\": \"1\",\n                \"colsample_bynode\": \"1\",\n                \"colsample_bytree\": \"1\",\n                \"default_direction\": \"learn\",\n\n                ...\n\n                \"subsample\": \"1\"\n              }\n            }\n          }\n        },\n        \"learner_train_param\": {\n          \"booster\": \"gbtree\",\n          \"disable_default_eval_metric\": \"0\",\n          \"objective\": \"reg:squarederror\"\n        },\n        \"metrics\": [],\n        \"objective\": {\n          \"name\": \"reg:squarederror\",\n          \"reg_loss_param\": {\n            \"scale_pos_weight\": \"1\"\n          }\n        }\n      },\n      \"version\": [1, 0, 0]\n    }\n\n\nYou can load it back to the model generated by same version of XGBoost by:\n\n.. code-block:: python\n\n  bst.load_config(config)\n\nThis way users can study the internal representation more closely. Please note that some\nJSON generators make use of locale dependent floating point serialization methods, which\nis not supported by XGBoost.\n\n*************************************************\nDifference between saving model and dumping model\n*************************************************\n\nXGBoost has a function called ``dump_model`` in the Booster class, which lets you to\nexport the model in a readable format like ``text``, ``json`` or ``dot`` (graphviz).  The\nprimary use case for it is for model interpretation and visualization, and is not supposed\nto be loaded back to XGBoost.\n\n**********\nCategories\n**********\n\nSince 3.1, the categories encoding from a training dataframe is stored in the booster to\nprovide test-time re-coding support, see :ref:`cat-recode` for more info about how the\nre-coder works. We will briefly explain the JSON format for the serialized category index.\n\nThe categories are saved in a JSON object named \"cats\" under the gbtree model. It contains\nthree keys:\n\n- feature_segments\n\nThis is a CSR-like pointer that stores the number of categories for each feature. It\nstarts with zero and ends with the total number of categories from all features. For\nexample:\n\n.. code-block:: python\n\n    feature_segments = [0, 3, 3, 5]\n\nThe ``feature_segments`` list represents a dataset with two categorical features and one\nnumerical feature. The first feature contains three categories, the second feature is\nnumerical and thus has no categories, and the last feature includes two categories.\n\n- sorted_idx\n\nThis array stores the sorted indices (`argsort`) of categories across all features,\nsegmented by the ``feature_segments``. Given a feature with categories: ``[\"b\", \"c\",\n\"a\"]``, the sorted index is ``[2, 0, 1]``.\n\n- enc\n\nThis is an array with a length equal to the number of features, storing all the categories\nin the same order as the input dataframe. The storage schema depends on whether the\ncategories are strings (XGBoost also supports numerical categories, such as integers). For\nstring categories, we use a schema similar to the arrow format for a string array. The\ncategories of each feature are represented by two arrays, namely ``offsets`` and\n``values``. The format is also similar to a CSR-matrix. The ``values`` field is a\n``uint8`` array storing characters from all category names. Given a feature with three\ncategories: ``[\"bb\", \"c\", \"a\"]``, the ``values`` field is ``[98, 98, 99, 97]``. Then the\n``offsets`` segments the ``values`` array similar to a CSR pointer: ``[0, 2, 3, 4]``. We\nchose to not store the ``values`` as a JSON string to avoid handling special characters\nand string encoding. The string names are stored exactly as given by the dataframe.\n\nAs for numerical categories, the ``enc`` contains two keys: ``type`` and ``values``. The\n``type`` field is an integer ID that identifies the type of the categories, such as 64-bit\nintegers and 32-bit floating points (note that they are all f32 inside a decision\ntree). The exact mapping between the type to the integer ID is internal but stable. The\n``values`` is an array storing all categories in a feature.\n\n*************\nBrief History\n*************\n\n- The JSON format was introduced in 1.0, aiming to replace the now removed old binary\n  internal format with an open format that can be easily reused\n- Later in XGBoost 1.6.0, additional support for Universal Binary JSON was introduced as\n  an optimization for more efficient model IO.\n- UBJSON has been set to default in 2.1.\n- The old binary format was removed in 3.1.\n- The JSON schema file is no longer maintained and has been removed in 3.2. The underlying\n  schema of the model is not changed.\n"
  },
  {
    "path": "doc/tutorials/slicing_model.rst",
    "content": "##############\nSlicing Models\n##############\n\nSlice tree model\n----------------\n\nWhen ``booster`` is set to ``gbtree`` or ``dart``, XGBoost builds a tree model, which is a\nlist of trees and can be sliced into multiple sub-models.\n\n.. tabs::\n\n    .. code-tab:: py\n\n        import xgboost as xgb\n        from sklearn.datasets import make_classification\n        num_classes = 3\n        X, y = make_classification(n_samples=1000, n_informative=5,\n                                   n_classes=num_classes)\n        dtrain = xgb.DMatrix(data=X, label=y)\n        num_parallel_tree = 4\n        num_boost_round = 16\n        # total number of built trees is num_parallel_tree * num_classes * num_boost_round\n\n        # We build a boosted random forest for classification here.\n        booster = xgb.train({\n            'num_parallel_tree': 4, 'subsample': 0.5, 'num_class': 3},\n                            num_boost_round=num_boost_round, dtrain=dtrain)\n\n        # This is the sliced model, containing [3, 7) forests\n        # step is also supported with some limitations like negative step is invalid.\n        sliced: xgb.Booster = booster[3:7]\n\n        # Access individual tree layer\n        trees = [_ for _ in booster]\n        assert len(trees) == num_boost_round\n\n    .. code-tab:: r R\n\n        library(xgboost)\n        data(agaricus.train, package = \"xgboost\")\n        dm <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)\n\n        model <- xgb.train(\n          params = xgb.params(objective = \"binary:logistic\", max_depth = 4),\n          data = dm,\n          nrounds = 20\n        )\n        sliced <- model[seq(3, 7)]\n        ##### xgb.Booster\n        # of features: 126\n        # of rounds:  5\n\nThe sliced model is a copy of selected trees, that means the model itself is immutable\nduring slicing. This feature is the basis of ``save_best`` option in early stopping\ncallback. See :ref:`sphx_glr_python_examples_individual_trees.py` for a worked example on\nhow to combine prediction with sliced trees.\n\n.. note::\n\n   The returned model slice doesn't contain attributes like\n   :py:class:`~xgboost.Booster.best_iteration` and\n   :py:class:`~xgboost.Booster.best_score`.\n"
  },
  {
    "path": "doc/tutorials/spark_estimator.rst",
    "content": "################################\nDistributed XGBoost with PySpark\n################################\n\nStarting from version 1.7.0, xgboost supports pyspark estimator APIs.\n\n.. note::\n\n  The integration is only tested on Linux distributions.\n\n.. contents::\n  :backlinks: none\n  :local:\n\n*************************\nXGBoost PySpark Estimator\n*************************\n\nSparkXGBRegressor\n=================\n\nSparkXGBRegressor is a PySpark ML estimator. It implements the XGBoost classification\nalgorithm based on XGBoost python library, and it can be used in PySpark Pipeline\nand PySpark ML meta algorithms like CrossValidator/TrainValidationSplit/OneVsRest.\n\nWe can create a ``SparkXGBRegressor`` estimator like:\n\n.. code-block:: python\n\n  from xgboost.spark import SparkXGBRegressor\n  xgb_regressor = SparkXGBRegressor(\n    features_col=\"features\",\n    label_col=\"label\",\n    num_workers=2,\n  )\n\n\nThe above snippet creates a spark estimator which can fit on a spark dataset, and return a\nspark model that can transform a spark dataset and generate dataset with prediction\ncolumn. We can set almost all of xgboost sklearn estimator parameters as\n``SparkXGBRegressor`` parameters, but some parameter such as ``nthread`` is forbidden in\nspark estimator, and some parameters are replaced with pyspark specific parameters such as\n``weight_col``, ``validation_indicator_col``, for details please see ``SparkXGBRegressor``\ndoc.\n\nThe following code snippet shows how to train a spark xgboost regressor model,\nfirst we need to prepare a training dataset as a spark dataframe contains\n\"label\" column and \"features\" column(s), the \"features\" column(s) must be ``pyspark.ml.linalg.Vector``\ntype or spark array type or a list of feature column names.\n\n\n.. code-block:: python\n\n  xgb_regressor_model = xgb_regressor.fit(train_spark_dataframe)\n\n\nThe following code snippet shows how to predict test data using a spark xgboost regressor model,\nfirst we need to prepare a test dataset as a spark dataframe contains\n\"features\" and \"label\" column, the \"features\" column must be ``pyspark.ml.linalg.Vector``\ntype or spark array type.\n\n.. code-block:: python\n\n  transformed_test_spark_dataframe = xgb_regressor_model.transform(test_spark_dataframe)\n\n\nThe above snippet code returns a ``transformed_test_spark_dataframe`` that contains the input\ndataset columns and an appended column \"prediction\" representing the prediction results.\n\nSparkXGBClassifier\n==================\n\n``SparkXGBClassifier`` estimator has similar API with ``SparkXGBRegressor``, but it has some\npyspark classifier specific params, e.g. ``raw_prediction_col`` and ``probability_col`` parameters.\nCorrespondingly, by default, ``SparkXGBClassifierModel`` transforming test dataset will\ngenerate result dataset with 3 new columns:\n\n- \"prediction\": represents the predicted label.\n- \"raw_prediction\": represents the output margin values.\n- \"probability\": represents the prediction probability on each label.\n\n\n***************************\nXGBoost PySpark GPU support\n***************************\n\nXGBoost PySpark fully supports GPU acceleration. Users are not only able to enable\nefficient training but also utilize their GPUs for the whole PySpark pipeline including\nETL and inference. In below sections, we will walk through an example of training on a\nSpark standalone cluster with GPU support. To get started, first we need to install some\nadditional packages, then we can set the ``device`` parameter to ``cuda`` or ``gpu``.\n\nPrepare the necessary packages\n==============================\n\nAside from the PySpark and XGBoost modules, we also need the `cuDF\n<https://docs.rapids.ai/api/cudf/stable/>`_ package for handling Spark dataframe. We\nrecommend using either Conda or Virtualenv to manage python dependencies for PySpark\njobs. Please refer to `How to Manage Python Dependencies in PySpark\n<https://www.databricks.com/blog/2020/12/22/how-to-manage-python-dependencies-in-pyspark.html>`_\nfor more details on PySpark dependency management.\n\nIn short, to create a Python environment that can be sent to a remote cluster using\nvirtualenv and pip:\n\n.. code-block:: bash\n\n  python -m venv xgboost_env\n  source xgboost_env/bin/activate\n  pip install pyarrow pandas venv-pack xgboost\n  # https://docs.rapids.ai/install#pip-install\n  pip install cudf-cu11 --extra-index-url=https://pypi.nvidia.com\n  venv-pack -o xgboost_env.tar.gz\n\nWith Conda:\n\n.. code-block:: bash\n\n  conda create -y -n xgboost_env -c conda-forge conda-pack python=3.9\n  conda activate xgboost_env\n  # use conda when the supported version of xgboost (1.7) is released on conda-forge\n  pip install xgboost\n  conda install cudf pyarrow pandas -c rapids -c nvidia -c conda-forge\n  conda pack -f -o xgboost_env.tar.gz\n\n\nWrite your PySpark application\n==============================\n\nBelow snippet is a small example for training xgboost model with PySpark. Notice that we are\nusing a list of feature names instead of vector type as the input. The parameter ``\"device=cuda\"``\nspecifically indicates that the training will be performed on a GPU.\n\n.. code-block:: python\n\n  from xgboost.spark import SparkXGBRegressor\n  spark = SparkSession.builder.getOrCreate()\n\n  # read data into spark dataframe\n  train_data_path = \"xxxx/train\"\n  train_df = spark.read.parquet(data_path)\n\n  test_data_path = \"xxxx/test\"\n  test_df = spark.read.parquet(test_data_path)\n\n  # assume the label column is named \"class\"\n  label_name = \"class\"\n\n  # get a list with feature column names\n  feature_names = [x.name for x in train_df.schema if x.name != label_name]\n\n  # create a xgboost pyspark regressor estimator and set device=\"cuda\"\n  regressor = SparkXGBRegressor(\n    features_col=feature_names,\n    label_col=label_name,\n    num_workers=2,\n    device=\"cuda\",\n  )\n\n  # train and return the model\n  model = regressor.fit(train_df)\n\n  # predict on test data\n  predict_df = model.transform(test_df)\n  predict_df.show()\n\nLike other distributed interfaces, the ``device`` parameter doesn't support specifying ordinal as GPUs are managed by Spark instead of XGBoost (good: ``device=cuda``, bad: ``device=cuda:0``).\n\n.. _stage-level-scheduling:\n\nSubmit the PySpark application\n==============================\n\nAssuming you have configured the Spark standalone cluster with GPU support. Otherwise, please\nrefer to `spark standalone configuration with GPU support <https://nvidia.github.io/spark-rapids/docs/get-started/getting-started-on-prem.html#spark-standalone-cluster>`_.\n\nStarting from XGBoost 2.0.1, stage-level scheduling is automatically enabled. Therefore,\nif you are using Spark standalone cluster version 3.4.0 or higher, we strongly recommend\nconfiguring the ``\"spark.task.resource.gpu.amount\"`` as a fractional value. This will\nenable running multiple tasks in parallel during the ETL phase. An example configuration\nwould be ``\"spark.task.resource.gpu.amount=1/spark.executor.cores\"``. However, if you are\nusing a XGBoost version earlier than 2.0.1 or a Spark standalone cluster version below 3.4.0,\nyou still need to set ``\"spark.task.resource.gpu.amount\"`` equal to ``\"spark.executor.resource.gpu.amount\"``.\n\n.. note::\n\n  As of now, the stage-level scheduling feature in XGBoost is limited to the Spark standalone cluster mode.\n  However, we have plans to expand its compatibility to YARN and Kubernetes once Spark 3.5.1 is officially released.\n\n.. code-block:: bash\n\n  export PYSPARK_DRIVER_PYTHON=python\n  export PYSPARK_PYTHON=./environment/bin/python\n\n  spark-submit \\\n    --master spark://<master-ip>:7077 \\\n    --conf spark.executor.cores=12 \\\n    --conf spark.task.cpus=1 \\\n    --conf spark.executor.resource.gpu.amount=1 \\\n    --conf spark.task.resource.gpu.amount=0.08 \\\n    --archives xgboost_env.tar.gz#environment \\\n    xgboost_app.py\n\nThe above command submits the xgboost pyspark application with the python environment created by pip or conda,\nspecifying a request for 1 GPU and 12 CPUs per executor. So you can see, a total of 12 tasks per executor will be\nexecuted concurrently during the ETL phase.\n\nModel Persistence\n=================\n\nSimilar to standard PySpark ml estimators, one can persist and reuse the model with ``save``\nand ``load`` methods:\n\n.. code-block:: python\n\n  regressor = SparkXGBRegressor()\n  model = regressor.fit(train_df)\n  # save the model\n  model.save(\"/tmp/xgboost-pyspark-model\")\n  # load the model\n  model2 = SparkXGBRankerModel.load(\"/tmp/xgboost-pyspark-model\")\n\nTo export the underlying booster model used by XGBoost:\n\n.. code-block:: python\n\n  regressor = SparkXGBRegressor()\n  model = regressor.fit(train_df)\n  # the same booster object returned by xgboost.train\n  booster: xgb.Booster = model.get_booster()\n  booster.predict(...)\n  booster.save_model(\"model.json\") # or model.ubj, depending on your choice of format.\n\nThis booster is not only shared by other Python interfaces but also used by all the\nXGBoost bindings including the C, Java, and the R package. Lastly, one can extract the\nbooster file directly from a saved spark estimator without going through the getter:\n\n.. code-block:: python\n\n  import xgboost as xgb\n  bst = xgb.Booster()\n  # Loading the model saved in previous snippet\n  bst.load_model(\"/tmp/xgboost-pyspark-model/model/part-00000\")\n\n\nAccelerate the whole pipeline for xgboost pyspark\n=================================================\n\nWith `RAPIDS Accelerator for Apache Spark <https://nvidia.github.io/spark-rapids/>`_, you\ncan leverage GPUs to accelerate the whole pipeline (ETL, Train, Transform) for xgboost\npyspark without the need for any code modifications. Likewise, you have the option to configure\nthe ``\"spark.task.resource.gpu.amount\"`` setting as a fractional value, enabling a higher\nnumber of tasks to be executed in parallel during the ETL phase. please refer to\n:ref:`stage-level-scheduling` for more details.\n\n\nAn example submit command is shown below with additional spark configurations and dependencies:\n\n.. code-block:: bash\n\n  export PYSPARK_DRIVER_PYTHON=python\n  export PYSPARK_PYTHON=./environment/bin/python\n\n  spark-submit \\\n    --master spark://<master-ip>:7077 \\\n    --conf spark.executor.cores=12 \\\n    --conf spark.task.cpus=1 \\\n    --conf spark.executor.resource.gpu.amount=1 \\\n    --conf spark.task.resource.gpu.amount=0.08 \\\n    --packages com.nvidia:rapids-4-spark_2.12:24.04.1 \\\n    --conf spark.plugins=com.nvidia.spark.SQLPlugin \\\n    --conf spark.sql.execution.arrow.maxRecordsPerBatch=1000000 \\\n    --archives xgboost_env.tar.gz#environment \\\n    xgboost_app.py\n\nWhen rapids plugin is enabled, both of the JVM rapids plugin and the cuDF Python package\nare required. More configuration options can be found in the RAPIDS link above along with\ndetails on the plugin.\n\nAdvanced Usage\n==============\n\nXGBoost needs to repartition the input dataset to the num_workers to ensure there will be\nnum_workers training tasks running at the same time. However, repartition is a costly operation.\n\nIf there is a scenario where reading the data from source and directly fitting it to XGBoost\nwithout introducing the shuffle stage, users can avoid the need for repartitioning by setting\nthe Spark configuration parameters ``spark.sql.files.maxPartitionNum`` and\n``spark.sql.files.minPartitionNum`` to num_workers. This tells Spark to automatically partition\nthe dataset into the desired number of partitions.\n\nHowever, if the input dataset is skewed (i.e. the data is not evenly distributed), setting\nthe partition number to num_workers may not be efficient. In this case, users can set\nthe ``force_repartition=true`` option to explicitly force XGBoost to repartition the dataset,\neven if the partition number is already equal to num_workers. This ensures the data is evenly\ndistributed across the workers.\n"
  },
  {
    "path": "doc/xgboost_doc.yml",
    "content": "name: xgboost_docs\ndependencies:\n  - python=3.10\n  - pip\n  - pygraphviz\n  - sphinx\n  - sphinx-gallery\n  - recommonmark\n  - mock\n  - sh\n  - matplotlib\n  - numpy\n  - scipy\n  - scikit-learn\n  - myst-parser\n  - pyspark\n  - pip:\n    - breathe\n    - sphinx_rtd_theme\n    - pydot-ng\n    - graphviz\n    - ray[train]\n    - xgboost_ray\n"
  },
  {
    "path": "include/xgboost/base.h",
    "content": "/**\n * Copyright 2015-2026, XGBoost Contributors\n * \\file base.h\n * \\brief Defines configuration macros and basic types for xgboost.\n */\n#ifndef XGBOOST_BASE_H_\n#define XGBOOST_BASE_H_\n\n#include <dmlc/omp.h>  // for omp_uint, omp_ulong\n// Put the windefs here to guard as many files as possible.\n#include <xgboost/windefs.h>\n\n#include <cstdint>  // for int32_t, uint64_t, int16_t\n#include <ostream>  // for ostream\n#include <string>   // for string\n#include <utility>  // for pair\n#include <vector>   // for vector\n\n/*!\n * \\brief string flag for R library, to leave hooks when needed.\n */\n#ifndef XGBOOST_STRICT_R_MODE\n#define XGBOOST_STRICT_R_MODE 0\n#endif  // XGBOOST_STRICT_R_MODE\n\n/*!\n * \\brief Whether always log console message with time.\n *  It will display like, with timestamp appended to head of the message.\n *  \"[21:47:50] 6513x126 matrix with 143286 entries loaded from\n * ../data/agaricus.txt.train\"\n */\n#ifndef XGBOOST_LOG_WITH_TIME\n#define XGBOOST_LOG_WITH_TIME 1\n#endif  // XGBOOST_LOG_WITH_TIME\n\n/*!\n * \\brief Check if alignas(*) keyword is supported. (g++ 4.8 or higher)\n */\n#if defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4)\n#define XGBOOST_ALIGNAS(X) alignas(X)\n#else\n#define XGBOOST_ALIGNAS(X)\n#endif  // defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4)\n\n#if defined(__GNUC__)\n#define XGBOOST_EXPECT(cond, ret) __builtin_expect((cond), (ret))\n#else\n#define XGBOOST_EXPECT(cond, ret) (cond)\n#endif  // defined(__GNUC__)\n\n/*!\n * \\brief Tag function as usable by device\n */\n#if defined(__CUDA__) || defined(__NVCC__)\n#define XGBOOST_DEVICE __host__ __device__\n#else\n#define XGBOOST_DEVICE\n#endif  // defined (__CUDA__) || defined(__NVCC__)\n\n#if defined(__CUDA__) || defined(__CUDACC__)\n#define XGBOOST_HOST_DEV_INLINE XGBOOST_DEVICE __forceinline__\n#define XGBOOST_DEV_INLINE __device__ __forceinline__\n#else\n#define XGBOOST_HOST_DEV_INLINE\n#define XGBOOST_DEV_INLINE\n#endif  // defined(__CUDA__) || defined(__CUDACC__)\n\n// restrict\n#if defined(_MSC_VER)\n#define XGBOOST_RESTRICT __restrict\n#else\n#define XGBOOST_RESTRICT __restrict__\n#endif\n\n// These check are for Makefile.\n#if !defined(XGBOOST_MM_PREFETCH_PRESENT) && !defined(XGBOOST_BUILTIN_PREFETCH_PRESENT)\n/* default logic for software pre-fetching */\n#if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64))) || defined(__INTEL_COMPILER)\n// Enable _mm_prefetch for Intel compiler and MSVC+x86\n#define XGBOOST_MM_PREFETCH_PRESENT\n#define XGBOOST_BUILTIN_PREFETCH_PRESENT\n#elif defined(__GNUC__)\n// Enable __builtin_prefetch for GCC\n#define XGBOOST_BUILTIN_PREFETCH_PRESENT\n#endif  // GUARDS\n\n#endif  // !defined(XGBOOST_MM_PREFETCH_PRESENT) && !defined()\n\nnamespace xgboost {\n/*! \\brief unsigned integer type used for feature index. */\nusing bst_uint = std::uint32_t;  // NOLINT\n/*! \\brief unsigned long integers */\nusing bst_ulong = std::uint64_t;  // NOLINT\n/*! \\brief float type, used for storing statistics */\nusing bst_float = float;  // NOLINT\n/*! \\brief Categorical value type. */\nusing bst_cat_t = std::int32_t;  // NOLINT\n/*! \\brief Type for data column (feature) index. */\nusing bst_feature_t = std::uint32_t;  // NOLINT\n/**\n * @brief Type for histogram bin index.  We sometimes use -1 to indicate invalid bin.\n */\nusing bst_bin_t = std::int32_t;  // NOLINT\n/**\n * @brief Type for data row index (sample).\n */\nusing bst_idx_t = std::uint64_t;  // NOLINT\n/**\n * \\brief Type for tree node index and tree depth.\n */\nusing bst_node_t = std::int32_t;  // NOLINT\n/**\n * @brief Type for ranking group index.\n */\nusing bst_group_t = std::uint32_t;  // NOLINT\n/**\n * @brief Type for indexing into output targets.\n */\nusing bst_target_t = std::uint32_t;  // NOLINT\n/**\n * @brief Type for indexing boosted layers.\n */\nusing bst_layer_t = std::int32_t;  // NOLINT\n/**\n * @brief Type for indexing trees.\n */\nusing bst_tree_t = std::int32_t;  // NOLINT\n/**\n * @brief Ordinal of a CUDA device.\n */\nusing bst_d_ordinal_t = std::int16_t;  // NOLINT\n\nnamespace detail {\n/*! \\brief Implementation of gradient statistics pair. Template specialisation\n * may be used to overload different gradients types e.g. low precision, high\n * precision, integer, floating point. */\ntemplate <typename T>\nclass GradientPairInternal {\n  /*! \\brief gradient statistics */\n  T grad_{0};\n  /*! \\brief second order gradient statistics */\n  T hess_{0};\n\n  XGBOOST_DEVICE void SetGrad(T g) { grad_ = g; }\n  XGBOOST_DEVICE void SetHess(T h) { hess_ = h; }\n\n public:\n  using ValueT = T;\n\n  inline void Add(const ValueT &grad, const ValueT &hess) {\n    grad_ += grad;\n    hess_ += hess;\n  }\n\n  GradientPairInternal() = default;\n\n  XGBOOST_DEVICE GradientPairInternal(T grad, T hess) {\n    SetGrad(grad);\n    SetHess(hess);\n  }\n\n  // Copy constructor if of same value type, marked as default to be trivially_copyable\n  GradientPairInternal(GradientPairInternal const &g) = default;\n  GradientPairInternal(GradientPairInternal &&g) = default;\n  GradientPairInternal &operator=(GradientPairInternal const &that) = default;\n  GradientPairInternal &operator=(GradientPairInternal &&that) = default;\n\n  // Copy constructor if different value type - use getters and setters to\n  // perform conversion\n  template <typename T2>\n  XGBOOST_DEVICE explicit GradientPairInternal(const GradientPairInternal<T2> &g) {\n    SetGrad(g.GetGrad());\n    SetHess(g.GetHess());\n  }\n\n  XGBOOST_DEVICE T GetGrad() const { return grad_; }\n  XGBOOST_DEVICE T GetHess() const { return hess_; }\n\n  XGBOOST_DEVICE GradientPairInternal<T> &operator+=(const GradientPairInternal<T> &rhs) {\n    grad_ += rhs.grad_;\n    hess_ += rhs.hess_;\n    return *this;\n  }\n\n  XGBOOST_DEVICE GradientPairInternal<T> operator+(const GradientPairInternal<T> &rhs) const {\n    GradientPairInternal<T> g;\n    g.grad_ = grad_ + rhs.grad_;\n    g.hess_ = hess_ + rhs.hess_;\n    return g;\n  }\n\n  XGBOOST_DEVICE GradientPairInternal<T> &operator-=(const GradientPairInternal<T> &rhs) {\n    grad_ -= rhs.grad_;\n    hess_ -= rhs.hess_;\n    return *this;\n  }\n\n  XGBOOST_DEVICE GradientPairInternal<T> operator-(const GradientPairInternal<T> &rhs) const {\n    GradientPairInternal<T> g;\n    g.grad_ = grad_ - rhs.grad_;\n    g.hess_ = hess_ - rhs.hess_;\n    return g;\n  }\n\n  XGBOOST_DEVICE GradientPairInternal<T> &operator*=(float multiplier) {\n    grad_ *= multiplier;\n    hess_ *= multiplier;\n    return *this;\n  }\n\n  XGBOOST_DEVICE GradientPairInternal<T> operator*(float multiplier) const {\n    GradientPairInternal<T> g;\n    g.grad_ = grad_ * multiplier;\n    g.hess_ = hess_ * multiplier;\n    return g;\n  }\n\n  XGBOOST_DEVICE GradientPairInternal<T> &operator/=(float divisor) {\n    grad_ /= divisor;\n    hess_ /= divisor;\n    return *this;\n  }\n\n  XGBOOST_DEVICE GradientPairInternal<T> operator/(float divisor) const {\n    GradientPairInternal<T> g;\n    g.grad_ = grad_ / divisor;\n    g.hess_ = hess_ / divisor;\n    return g;\n  }\n\n  XGBOOST_DEVICE bool operator==(const GradientPairInternal<T> &rhs) const {\n    return grad_ == rhs.grad_ && hess_ == rhs.hess_;\n  }\n\n  XGBOOST_DEVICE explicit GradientPairInternal(int value) {\n    *this = GradientPairInternal<T>(static_cast<float>(value), static_cast<float>(value));\n  }\n\n  friend std::ostream &operator<<(std::ostream &os, const GradientPairInternal<T> &g) {\n    os << g.GetGrad() << \"/\" << g.GetHess();\n    return os;\n  }\n};\n}  // namespace detail\n\n/*! \\brief gradient statistics pair usually needed in gradient boosting */\nusing GradientPair = detail::GradientPairInternal<float>;\n/*! \\brief High precision gradient statistics pair */\nusing GradientPairPrecise = detail::GradientPairInternal<double>;\n\n/*! \\brief Fixed point representation for high precision gradient pair. Has a different interface so\n * we don't accidentally use it in gain calculations.*/\nclass GradientPairInt64 {\n  using T = int64_t;\n  T grad_ = 0;\n  T hess_ = 0;\n\n public:\n  using ValueT = T;\n\n  XGBOOST_DEVICE GradientPairInt64(T grad, T hess) : grad_(grad), hess_(hess) {}\n  GradientPairInt64() = default;\n\n  // Copy constructor if of same value type, marked as default to be trivially_copyable\n  GradientPairInt64(GradientPairInt64 const &g) = default;\n  GradientPairInt64 &operator=(GradientPairInt64 const &g) = default;\n\n  [[nodiscard]] XGBOOST_DEVICE T GetQuantisedGrad() const { return grad_; }\n  [[nodiscard]] XGBOOST_DEVICE T GetQuantisedHess() const { return hess_; }\n\n  XGBOOST_DEVICE GradientPairInt64 &operator+=(const GradientPairInt64 &rhs) {\n    grad_ += rhs.grad_;\n    hess_ += rhs.hess_;\n    return *this;\n  }\n\n  XGBOOST_DEVICE GradientPairInt64 operator+(const GradientPairInt64 &rhs) const {\n    GradientPairInt64 g;\n    g.grad_ = grad_ + rhs.grad_;\n    g.hess_ = hess_ + rhs.hess_;\n    return g;\n  }\n\n  XGBOOST_DEVICE GradientPairInt64 &operator-=(const GradientPairInt64 &rhs) {\n    grad_ -= rhs.grad_;\n    hess_ -= rhs.hess_;\n    return *this;\n  }\n\n  XGBOOST_DEVICE GradientPairInt64 operator-(const GradientPairInt64 &rhs) const {\n    GradientPairInt64 g;\n    g.grad_ = grad_ - rhs.grad_;\n    g.hess_ = hess_ - rhs.hess_;\n    return g;\n  }\n\n  XGBOOST_DEVICE bool operator==(const GradientPairInt64 &rhs) const {\n    return grad_ == rhs.grad_ && hess_ == rhs.hess_;\n  }\n  friend std::ostream &operator<<(std::ostream &os, const GradientPairInt64 &g) {\n    os << g.GetQuantisedGrad() << \"/\" << g.GetQuantisedHess();\n    return os;\n  }\n};\n\nusing Args = std::vector<std::pair<std::string, std::string> >;\n\n/** @brief small eps gap for minimum split decision. */\nconstexpr inline float kRtEps = 1e-6f;\n\n/*! \\brief define unsigned long for openmp loop */\nusing omp_ulong = dmlc::omp_ulong;  // NOLINT\n/*! \\brief define unsigned int for openmp loop */\nusing bst_omp_uint = dmlc::omp_uint;  // NOLINT\n/*! \\brief Type used for representing version number in binary form.*/\nusing XGBoostVersionT = std::int32_t;\n}  // namespace xgboost\n\n#endif  // XGBOOST_BASE_H_\n"
  },
  {
    "path": "include/xgboost/byteswap.h",
    "content": "/**\n * Copyright 2022-2026, XGBoost Contributors\n */\n#pragma once\n\n#include <dmlc/endian.h>  // for ByteSwap\n#include <xgboost/base.h>\n#include <xgboost/windefs.h>\n\n#include <cstdint>\n\n#if defined(xgboost_IS_WIN)\n\n#include <cstdlib>  // for _byteswap_uint64, _byteswap_ulong, _byteswap_ushort\n\n#endif  // defined(xgboost_IS_WIN)\n\nnamespace xgboost {\n#if defined(__CUDA_ARCH__)\n// CUDA kernel version\ntemplate <typename T>\n[[nodiscard]] __device__ T ByteSwap(T v);\n\ntemplate <>\ninline __device__ std::uint16_t ByteSwap(std::uint16_t v) {\n  return __nv_bswap16(v);\n}\n\ntemplate <>\ninline __device__ std::uint32_t ByteSwap(std::uint32_t v) {\n  return __nv_bswap32(v);\n}\n\ntemplate <>\ninline __device__ std::uint64_t ByteSwap(std::uint64_t v) {\n  return __nv_bswap64(v);\n}\n\n#elif defined(__GLIBC__)\n// Host gcc/clang\ntemplate <typename T>\nT ByteSwap(T v);\n\ntemplate <>\ninline std::uint16_t ByteSwap(std::uint16_t v) {\n  return __builtin_bswap16(v);\n}\n\ntemplate <>\ninline std::uint32_t ByteSwap(std::uint32_t v) {\n  return __builtin_bswap32(v);\n}\n\ntemplate <>\ninline std::uint64_t ByteSwap(std::uint64_t v) {\n  return __builtin_bswap64(v);\n}\n\n#elif defined(xgboost_IS_WIN) && !defined(__MINGW32__)\n// MSVC\ntemplate <typename T>\nT ByteSwap(T v);\n\ntemplate <>\ninline std::uint16_t ByteSwap(std::uint16_t v) {\n  return _byteswap_ushort(v);\n}\n\ntemplate <>\ninline std::uint32_t ByteSwap(std::uint32_t v) {\n  return _byteswap_ulong(v);\n}\n\ntemplate <>\ninline std::uint64_t ByteSwap(std::uint64_t v) {\n  return _byteswap_uint64(v);\n}\n\n#else\n\ntemplate <typename T>\nT ByteSwap(T v) {\n  dmlc::ByteSwap(&v, sizeof(v), 1);\n  return v;\n}\n\n#endif  //  defined(__CUDA_ARCH__)\n}  // namespace xgboost\n"
  },
  {
    "path": "include/xgboost/c_api.h",
    "content": "/**\n * Copyright 2015-2025, XGBoost Contributors\n *\n * @brief C API of XGBoost, used to interface with other high-level languages.\n */\n#ifndef XGBOOST_C_API_H_\n#define XGBOOST_C_API_H_\n\n#ifdef __cplusplus\n#define XGB_EXTERN_C extern \"C\"\n#include <cstddef>\n#include <cstdint>\n#include <cstdio>\n#else\n#define XGB_EXTERN_C\n#include <stddef.h>\n#include <stdint.h>\n#include <stdio.h>\n#endif  // __cplusplus\n\n#if defined(_MSC_VER) || defined(_WIN32)\n#define XGB_DLL XGB_EXTERN_C __declspec(dllexport)\n#else\n#define XGB_DLL XGB_EXTERN_C __attribute__((visibility(\"default\")))\n#endif  // defined(_MSC_VER) || defined(_WIN32)\n\n// manually define unsigned long\ntypedef uint64_t bst_ulong;  // NOLINT(*)\n\n/**\n * @mainpage\n *\n * @brief XGBoost C API reference.\n *\n * For the official document page see:\n * <a href=\"https://xgboost.readthedocs.io/en/stable/c.html\">XGBoost C Package</a>.\n */\n\n/**\n * @defgroup Library Library\n *\n * These functions are used to obtain general information about XGBoost including version,\n * build info and current global configuration.\n *\n * @{\n */\n\n/** @brief Handle to the DMatrix */\ntypedef void *DMatrixHandle;  // NOLINT(*)\n/** @brief Handle to the Booster */\ntypedef void *BoosterHandle;  // NOLINT(*)\n/**\n * @brief Handle to the categories container.\n *\n * @since 3.2.0\n */\ntypedef void *CategoriesHandle;  // NOLINT(*)\n\n/**\n * @brief Return the version of the XGBoost library.\n *\n *   The output variable is only written if it's not NULL.\n *\n * @param major Store the major version number.\n * @param minor Store the minor version number.\n * @param patch Store the patch (revision) number.\n */\nXGB_DLL void XGBoostVersion(int *major, int *minor, int *patch);\n\n/**\n * @brief Get compile information of the shared XGBoost library.\n *\n * @param out string encoded JSON object containing build flags and dependency versions.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBuildInfo(char const **out);\n\n/**\n * @brief Get the string message of the last error.\n *\n *   Most functions in XGBoost returns 0 when success and non-zero when an error\n *   occurred. In the case of error, @ref XGBGetLastError can be used to retrieve the\n *   error message\n *\n *   This function is thread safe.\n *\n * @return The error message from the last error.\n */\nXGB_DLL const char *XGBGetLastError();\n\n/**\n * @brief register callback function for LOG(INFO) messages -- helpful messages\n *        that are not errors.\n *\n * @note This function can be called by multiple threads. The callback function\n *       will run on the thread that registered it.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBRegisterLogCallback(void (*callback)(const char *));\n\n/**\n * @brief Set global configuration (collection of parameters that apply globally). This function\n *        accepts the list of key-value pairs representing the global-scope parameters to be\n *        configured. The list of key-value pairs are passed in as a JSON string.\n * @param config a JSON string representing the list of key-value pairs. The JSON object shall\n *                 be flat: no value can be a JSON object or an array.\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBSetGlobalConfig(char const *config);\n\n/**\n * @brief Get current global configuration (collection of parameters that apply globally).\n * @param out_config pointer to received returned global configuration, represented as a JSON string.\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBGetGlobalConfig(char const **out_config);\n\n/**@}*/\n\n/**\n * @defgroup DMatrix DMatrix\n *\n * @brief DMatrix is the basic data storage for XGBoost used by all XGBoost algorithms\n *        including both training, prediction and explanation. There are a few variants of\n *        `DMatrix` including normal `DMatrix`, which is a CSR matrix, `QuantileDMatrix`,\n *        which is used by histogram-based tree methods for saving memory, and lastly the\n *        experimental external-memory-based DMatrix, which reads data in batches during\n *        training. For the last two variants, see the @ref Streaming group.\n *\n * @{\n */\n\n/**\n * @brief load a data matrix\n *\n * @deprecated since 2.0.0\n * @see XGDMatrixCreateFromURI()\n * @param fname the name of the file\n * @param silent whether print messages during loading\n * @param out a loaded data matrix\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGDMatrixCreateFromFile(const char *fname, int silent, DMatrixHandle *out);\n\n/**\n * @brief load a data matrix\n *\n * @param config JSON encoded parameters for DMatrix construction.  Accepted fields are:\n *   - uri: The URI of the input file. The URI parameter `format` is required when loading text data.\n *          @verbatim embed:rst:leading-asterisk\n *            See :doc:`/tutorials/input_format` for more info.\n *          @endverbatim\n *   - silent (optional): Whether to print message during loading. Default to true.\n *   - data_split_mode (optional): Whether the file was split by row or column beforehand for distributed computing. Default to row.\n * @param out a loaded data matrix\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGDMatrixCreateFromURI(char const *config, DMatrixHandle *out);\n\n/**\n * @brief Create a DMatrix from columnar data. (table)\n *\n * A special type of input to the `DMatrix` is the columnar format, which refers to\n * column-based dataframes. XGBoost can accept both numeric data types like integers and\n * floats, along with the categorical type, called dictionary in arrow's term. The\n * addition of categorical type is introduced in 3.1.0. The dataframe is represented by a\n * list array interfaces with one object for each column.\n *\n * A categorical type is represented by 3 buffers, the validity mask, the names of the\n * categories (called index for most of the dataframe implementation), and the codes used\n * to represent the categories in the rows. XGBoost consumes a categorical column by\n * accepting two JSON-encoded arrow arrays in a list. The first item in the list is a JSON\n * object with `{\"offsets\": IntegerArray, \"values\": StringArray }` representing the string\n * names defined by the arrow columnar format. The second buffer is an masked integer\n * array that stores the categorical codes along with the validity mask:\n *\n * @code{javascript}\n * [\n *   // categorical column, represented as an array (list)\n *   [\n *     {\n *       'offsets':\n *       {\n *         'data': (129412626415808, True),\n *         'typestr': '<i4', 'version': 3, 'strides': None, 'shape': (3,), 'mask': None\n *       },\n *       'values':\n *       {\n *         'data': (129412626416000, True),\n *         'typestr': '<i1', 'version': 3, 'strides': None, 'shape': (7,), 'mask': None\n *       }\n *     },\n *     {\n *       'data': (106200854378448, True),\n *       'typestr': '<i1', 'version': 3, 'strides': None, 'shape': (2,), 'mask': None\n *     }\n *   ],\n *   // numeric column, represented as an object, same number of rows as the previous column (2)\n *   {\n *     'data': (106200854378448, True),\n *     'typestr': '<f4', 'version': 3, 'strides': None, 'shape': (2,), 'mask': None\n *   }\n * ]\n * @endcode\n *\n * As for numeric inputs, it's the same as dense array.\n *\n * @param data   A list of JSON-encoded array interfaces.\n * @param config See @ref XGDMatrixCreateFromDense for details.\n * @param out    The created DMatrix.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGDMatrixCreateFromColumnar(char const *data, char const *config, DMatrixHandle *out);\n\n/**\n * @example c-api-demo.c\n */\n/**\n * @brief Create a DMatrix from CSR matrix.\n * @param indptr  JSON encoded __array_interface__ to row pointers in CSR.\n * @param indices JSON encoded __array_interface__ to column indices in CSR.\n * @param data    JSON encoded __array_interface__ to values in CSR.\n * @param ncol    The number of columns.\n * @param config  See @ref XGDMatrixCreateFromDense for details.\n * @param out     The created dmatrix\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGDMatrixCreateFromCSR(char const *indptr, char const *indices, char const *data,\n                                   bst_ulong ncol, char const *config, DMatrixHandle *out);\n\n/**\n * @brief Create a DMatrix from dense array.\n *\n * The array interface is defined in https://numpy.org/doc/2.1/reference/arrays.interface.html\n * We encode the interface as a JSON object.\n *\n * @param data   JSON encoded __array_interface__ to array values.\n * @param config JSON encoded configuration.  Required values are:\n *   - missing: Which value to represent missing value.\n *   - nthread (optional): Number of threads used for initializing DMatrix.\n *   - data_split_mode (optional): Whether the data was split by row or column beforehand. Default to row.\n * @param out The created DMatrix\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGDMatrixCreateFromDense(char const *data, char const *config, DMatrixHandle *out);\n\n/**\n * @brief Create a DMatrix from a CSC matrix.\n *\n * @param indptr  JSON encoded __array_interface__ to column pointers in CSC.\n * @param indices JSON encoded __array_interface__ to row indices in CSC.\n * @param data    JSON encoded __array_interface__ to values in CSC.\n * @param nrow    The number of rows in the matrix.\n * @param config  See @ref XGDMatrixCreateFromDense for details.\n * @param out     The created dmatrix.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGDMatrixCreateFromCSC(char const *indptr, char const *indices, char const *data,\n                                   bst_ulong nrow, char const *config, DMatrixHandle *out);\n\n/**\n * @brief create matrix content from dense matrix\n * @param data pointer to the data space\n * @param nrow number of rows\n * @param ncol number columns\n * @param missing which value to represent missing value\n * @param out created dmatrix\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGDMatrixCreateFromMat(const float *data, bst_ulong nrow, bst_ulong ncol, float missing,\n                                   DMatrixHandle *out);\n/**\n * @brief create matrix content from dense matrix\n * @param data pointer to the data space\n * @param nrow number of rows\n * @param ncol number columns\n * @param missing which value to represent missing value\n * @param out created dmatrix\n * @param nthread number of threads (up to maximum cores available, if <=0 use all cores)\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGDMatrixCreateFromMat_omp(const float *data,  // NOLINT\n                                       bst_ulong nrow, bst_ulong ncol, float missing,\n                                       DMatrixHandle *out, int nthread);\n\n/**\n * @brief Create DMatrix from CUDA columnar format. (cuDF)\n *\n * See @ref XGDMatrixCreateFromColumnar for a brief description of the columnar format.\n *\n * @param data   A list of JSON-encoded array interfaces.\n * @param config See @ref XGDMatrixCreateFromDense for details.\n * @param out    Created dmatrix\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *data, char const *config,\n                                            DMatrixHandle *out);\n\n/**\n * @brief Create DMatrix from CUDA array.\n * @param data JSON encoded __cuda_array_interface__ for array data.\n * @param config JSON encoded configuration.  Required values are:\n *   - missing: Which value to represent missing value.\n *   - nthread (optional): Number of threads used for initializing DMatrix.\n *   - data_split_mode (optional): Whether the data was split by row or column beforehand. Default to row.\n * @param out created dmatrix\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data, char const *config,\n                                                  DMatrixHandle *out);\n\n/**\n * @defgroup Streaming Streaming\n * @ingroup DMatrix\n *\n * @brief Quantile DMatrix and external memory DMatrix can be created from batches of\n *        data.\n *\n * There are 2 sets of data callbacks for DMatrix.  The first one is currently exclusively\n * used by JVM packages.  It uses `XGBoostBatchCSR` to accept batches for CSR formated\n * input, and concatenate them into 1 final big CSR.  The related functions are:\n *\n * - @ref XGBCallbackSetData\n * - @ref XGBCallbackDataIterNext\n * - @ref XGDMatrixCreateFromDataIter\n *\n * Another set is used by external data iterator. It accepts foreign data iterators as\n * callbacks.  There are 2 different senarios where users might want to pass in callbacks\n * instead of raw data.  First it's the Quantile DMatrix used by the hist and GPU-based\n * hist tree method. For this case, the data is first compressed by quantile sketching\n * then merged.  This is particular useful for distributed setting as it eliminates 2\n * copies of data. First one by a `concat` from external library to make the data into a\n * blob for normal DMatrix initialization, another one by the internal CSR copy of\n * DMatrix.\n *\n * The second use case is external memory support where users can pass a custom data\n * iterator into XGBoost for loading data in batches. For both cases, the iterator is only\n * used during the construction of the DMatrix and can be safely freed after construction\n * finishes. There are short notes on each of the use cases in respected DMatrix factory\n * function.\n *\n * Related functions are:\n *\n * # Factory functions\n * - @ref XGDMatrixCreateFromCallback for external memory\n * - @ref XGQuantileDMatrixCreateFromCallback for quantile DMatrix\n * - @ref XGExtMemQuantileDMatrixCreateFromCallback for External memory Quantile DMatrix\n *\n * # Proxy that callers can use to pass data to XGBoost\n * - @ref XGProxyDMatrixCreate\n * - @ref XGDMatrixCallbackNext\n * - @ref DataIterResetCallback\n * - @ref XGProxyDMatrixSetDataCudaArrayInterface\n * - @ref XGProxyDMatrixSetDataColumnar\n * - @ref XGProxyDMatrixSetDataCudaColumnar\n * - @ref XGProxyDMatrixSetDataDense\n * - @ref XGProxyDMatrixSetDataCSR\n * - ... (data setters)\n *\n * @{\n */\n\n/*  ==== First set of callback functions, used exclusively by JVM packages. ==== */\n\n/** @brief handle to a external data iterator */\ntypedef void *DataIterHandle;  // NOLINT(*)\n/** @brief handle to an internal data holder. */\ntypedef void *DataHolderHandle;  // NOLINT(*)\n\n/** @brief Mini batch used in XGBoost Data Iteration */\ntypedef struct {  // NOLINT(*)\n  /** @brief number of rows in the minibatch */\n  size_t size;\n  /** @brief number of columns in the minibatch. */\n  size_t columns;\n  /** @brief row pointer to the rows in the data */\n#ifdef __APPLE__\n  /* Necessary as Java on MacOS defines jlong as long int\n   * and gcc defines int64_t as long long int. */\n  long *offset;  // NOLINT(*)\n#else\n  int64_t *offset;  // NOLINT(*)\n#endif  // __APPLE__\n  /** @brief labels of each instance */\n  float *label;\n  /** @brief weight of each instance, can be NULL */\n  float *weight;\n  /** @brief feature index */\n  int *index;\n  /** @brief feature values */\n  float *value;\n} XGBoostBatchCSR;\n\n/**\n * @brief Callback to set the data to handle,\n * @param handle The handle to the callback.\n * @param batch The data content to be set.\n */\nXGB_EXTERN_C typedef int XGBCallbackSetData(  // NOLINT(*)\n    DataHolderHandle handle, XGBoostBatchCSR batch);\n\n/**\n * @brief The data reading callback function.\n *  The iterator will be able to give subset of batch in the data.\n *\n *  If there is data, the function will call set_function to set the data.\n *\n * @param data_handle The handle to the callback.\n * @param set_function The batch returned by the iterator\n * @param set_function_handle The handle to be passed to set function.\n * @return 0 if we are reaching the end and batch is not returned.\n */\nXGB_EXTERN_C typedef int XGBCallbackDataIterNext(  // NOLINT(*)\n    DataIterHandle data_handle, XGBCallbackSetData *set_function,\n    DataHolderHandle set_function_handle);\n\n/**\n * @brief Create a DMatrix from a data iterator.\n * @param data_handle The handle to the data.\n * @param callback The callback to get the data.\n * @param cache_info Additional information about cache file, can be null.\n * @param missing Which value to represent missing value.\n * @param out The created DMatrix\n * @return 0 when success, -1 when failure happens.\n */\nXGB_DLL int XGDMatrixCreateFromDataIter(DataIterHandle data_handle,\n                                        XGBCallbackDataIterNext *callback, const char *cache_info,\n                                        float missing, DMatrixHandle *out);\n\n/**\n * Second set of callback functions, used by constructing Quantile DMatrix or external\n * memory DMatrix using a custom iterator.\n */\n\n/**\n * @brief Create a DMatrix proxy for setting data, can be freed by @ref XGDMatrixFree.\n *\n * The DMatrix proxy is only a temporary reference (wrapper) to the actual user data. For\n * instance, if a dense matrix (like a numpy array) is passed into the proxy DMatrix via\n * the @ref XGProxyDMatrixSetDataDense method, then the proxy DMatrix holds only a\n * reference and the input array cannot be freed until the next iteration starts, signaled\n * by a call to the @ref XGDMatrixCallbackNext by XGBoost. It's called `ProxyDMatrix`\n * because it reuses the interface of the DMatrix class in XGBoost, but it's just a mid\n * interface for the @ref XGDMatrixCreateFromCallback and related constructors to consume\n * various user input types.\n *\n * @code{.unparsed}\n *   User inputs -> Proxy DMatrix (wrapper) -> Actual DMatrix\n * @endcode\n *\n * @param out The created Proxy DMatrix.\n *\n * @return 0 when success, -1 when failure happens.\n */\nXGB_DLL int XGProxyDMatrixCreate(DMatrixHandle *out);\n\n/**\n * @brief Callback function prototype for getting next batch of data.\n *\n * @param iter  A handler to the user defined iterator.\n *\n * @return 0 when success, -1 when failure happens.\n */\nXGB_EXTERN_C typedef int XGDMatrixCallbackNext(DataIterHandle iter);  // NOLINT(*)\n\n/**\n * @brief Callback function prototype for resetting the external iterator.\n */\nXGB_EXTERN_C typedef void DataIterResetCallback(DataIterHandle handle);  // NOLINT(*)\n\n/**\n * @brief Create an external memory DMatrix with data iterator.\n *\n * Short note for how to use second set of callback for external memory data support:\n *\n * - Step 0: Define a data iterator with 2 methods `reset`, and `next`.\n * - Step 1: Create a DMatrix proxy by \\ref XGProxyDMatrixCreate and hold the handle.\n * - Step 2: Pass the iterator handle, proxy handle and 2 methods into\n *           \\ref XGDMatrixCreateFromCallback, along with other parameters encoded as a JSON object.\n * - Step 3: Call appropriate data setters in `next` functions.\n *\n * @param iter    A handle to external data iterator.\n * @param proxy   A DMatrix proxy handle created by \\ref XGProxyDMatrixCreate.\n * @param reset   Callback function resetting the iterator state.\n * @param next    Callback function yielding the next batch of data.\n * @param config  JSON encoded parameters for DMatrix construction.  Accepted fields are:\n *   - missing:      Which value to represent missing value\n *   - cache_prefix: The path of cache file, caller must initialize all the directories in this path.\n *   - nthread (optional): Number of threads used for initializing DMatrix.\n * @param[out] out      The created external memory DMatrix\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHandle proxy,\n                                        DataIterResetCallback *reset, XGDMatrixCallbackNext *next,\n                                        char const *config, DMatrixHandle *out);\n/**\n * @example external_memory.c\n */\n\n/**\n * @brief Create a Quantile DMatrix with a data iterator.\n *\n * Short note for how to use the second set of callback for (GPU)Hist tree method:\n *\n * - Step 0: Define a data iterator with 2 methods `reset`, and `next`.\n * - Step 1: Create a DMatrix proxy by @ref XGProxyDMatrixCreate and hold the handle.\n * - Step 2: Pass the iterator handle, proxy handle and 2 methods into\n *           @ref XGQuantileDMatrixCreateFromCallback.\n * - Step 3: Call appropriate data setters in `next` functions.\n *\n * See test_iterative_dmatrix.cu or Python interface for examples.\n *\n * @param iter     A handle to external data iterator.\n * @param proxy    A DMatrix proxy handle created by @ref XGProxyDMatrixCreate.\n * @param ref      Reference DMatrix for providing quantile information.\n * @param reset    Callback function resetting the iterator state.\n * @param next     Callback function yielding the next batch of data.\n * @param config   JSON encoded parameters for DMatrix construction.  Accepted fields are:\n *   - missing:      Which value to represent missing value\n *   - nthread (optional): Number of threads used for initializing DMatrix.\n *   - max_bin (optional): Maximum number of bins for building histogram. Must be consistent with\n *                         the corresponding booster training parameter.\n *   - max_quantile_blocks (optional, deprecated): This parameter no longer has any effect and\n *       will be removed in a future release.\n * @param out      The created Quantile DMatrix.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGQuantileDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHandle proxy,\n                                                DataIterHandle ref, DataIterResetCallback *reset,\n                                                XGDMatrixCallbackNext *next, char const *config,\n                                                DMatrixHandle *out);\n\n/**\n * @brief Create a Quantile DMatrix backed by external memory.\n *\n * @since 3.0.0\n *\n * @note This is experimental and subject to change.\n *\n * @verbatim embed:rst:leading-asterisk\n *    See :doc:`/tutorials/external_memory` for more info.\n * @endverbatim\n *\n * @param iter     A handle to external data iterator.\n * @param proxy    A DMatrix proxy handle created by @ref XGProxyDMatrixCreate.\n * @param ref      Reference DMatrix for providing quantile information.\n * @param reset    Callback function resetting the iterator state.\n * @param next     Callback function yielding the next batch of data.\n * @param config   JSON encoded parameters for DMatrix construction.  Accepted fields are:\n *   - missing:      Which value to represent missing value\n *   - cache_prefix: The path of cache file, caller must initialize all the directories in this path.\n *   - nthread (optional): Number of threads used for initializing DMatrix.\n *   - max_bin (optional): Maximum number of bins for building histogram. Must be consistent with\n *                         the corresponding booster training parameter.\n *   - on_host (optional): Whether the data should be placed on host memory. Used by GPU inputs.\n *   - min_cache_page_bytes (optional): The minimum number of bytes for each internal GPU\n *      page. Set to 0 to disable page concatenation. Automatic configuration if the\n *      parameter is not provided or set to None.\n *   - max_quantile_blocks (optional, deprecated): This parameter no longer has any effect and\n *       will be removed in a future release.\n * - cache_host_ratio (optioinal): For GPU-based inputs, XGBoost can split the cache into\n *      host and device portitions to reduce the data transfer overhead. This parameter\n *      specifies the size of host cache compared to the size of the entire cache:\n *      `host / (host + device)`.\n * @param out The created Quantile DMatrix.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGExtMemQuantileDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHandle proxy,\n                                                      DataIterHandle ref,\n                                                      DataIterResetCallback *reset,\n                                                      XGDMatrixCallbackNext *next,\n                                                      char const *config, DMatrixHandle *out);\n\n/**\n * @brief Set data on a DMatrix proxy.\n *\n * @param handle  A DMatrix proxy created by @ref XGProxyDMatrixCreate\n * @param data    Null terminated JSON document string representation of CUDA\n *                array interface.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGProxyDMatrixSetDataCudaArrayInterface(DMatrixHandle handle, const char *data);\n\n/**\n * @brief Set columnar (table) data on a DMatrix proxy.\n *\n * @param handle A DMatrix proxy created by @ref XGProxyDMatrixCreate\n * @param data   See @ref XGDMatrixCreateFromColumnar for details.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGProxyDMatrixSetDataColumnar(DMatrixHandle handle, char const *data);\n\n/**\n * @brief Set CUDA-based columnar (table) data on a DMatrix proxy.\n *\n * @param handle A DMatrix proxy created by @ref XGProxyDMatrixCreate\n * @param data   See @ref XGDMatrixCreateFromColumnar for details.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGProxyDMatrixSetDataCudaColumnar(DMatrixHandle handle, const char *data);\n\n/**\n * @brief Set data on a DMatrix proxy.\n *\n * @param handle  A DMatrix proxy created by @ref XGProxyDMatrixCreate\n * @param data    Null terminated JSON document string representation of array\n *                interface.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGProxyDMatrixSetDataDense(DMatrixHandle handle, char const *data);\n\n/**\n * @brief Set data on a DMatrix proxy.\n *\n * @param handle        A DMatrix proxy created by \\ref XGProxyDMatrixCreate\n * @param indptr        JSON encoded __array_interface__ to row pointer in CSR.\n * @param indices       JSON encoded __array_interface__ to column indices in CSR.\n * @param data          JSON encoded __array_interface__ to values in CSR..\n * @param ncol          The number of columns of input CSR matrix.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGProxyDMatrixSetDataCSR(DMatrixHandle handle, char const *indptr, char const *indices,\n                                     char const *data, bst_ulong ncol);\n\n/** @} */  // End of Streaming\n\n/**\n * @brief create a new dmatrix from sliced content of existing matrix\n * @param handle instance of data matrix to be sliced\n * @param idxset index set\n * @param len length of index set\n * @param out a sliced new matrix\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGDMatrixSliceDMatrix(DMatrixHandle handle, const int *idxset, bst_ulong len,\n                                  DMatrixHandle *out);\n/**\n * @brief create a new dmatrix from sliced content of existing matrix\n * @param handle instance of data matrix to be sliced\n * @param idxset index set\n * @param len length of index set\n * @param out a sliced new matrix\n * @param allow_groups allow slicing of an array with groups\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGDMatrixSliceDMatrixEx(DMatrixHandle handle, const int *idxset, bst_ulong len,\n                                    DMatrixHandle *out, int allow_groups);\n/**\n * @brief Free a DMatrix object.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGDMatrixFree(DMatrixHandle handle);\n/**\n * @example c-api-demo.c inference.c external_memory.c\n */\n\n/**\n * @brief Save the DMatrix object into a file. `QuantileDMatrix` and external memory\n *        DMatrix are not supported.\n *\n * @param handle a instance of data matrix\n * @param fname File name\n * @param silent print statistics when saving\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGDMatrixSaveBinary(DMatrixHandle handle, const char *fname, int silent);\n\n/**\n * @brief Set content in array interface to a content in info.\n *\n * @param handle An instance of data matrix\n * @param field  Field name.\n * @param data   JSON encoded __array_interface__ to values in the dense matrix/vector.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGDMatrixSetInfoFromInterface(DMatrixHandle handle, char const *field,\n                                          char const *data);\n\n/**\n * @brief set float vector to a content in info\n * @param handle a instance of data matrix\n * @param field field name, can be label, weight\n * @param array pointer to float vector\n * @param len length of array\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle, const char *field, const float *array,\n                                  bst_ulong len);\n/**\n * @deprecated since 2.1.0\n *\n * Use @ref XGDMatrixSetInfoFromInterface instead.\n */\nXGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle, const char *field, const unsigned *array,\n                                 bst_ulong len);\n\n/**\n * @brief Set string encoded information of all features.\n *\n * Accepted fields are:\n *   - feature_name\n *   - feature_type\n *\n * @param handle    An instance of data matrix\n * @param field     Field name\n * @param features  Pointer to array of strings.\n * @param size      Size of `features` pointer (number of strings passed in).\n *\n * @return 0 when success, -1 when failure happens\n *\n * @code{c}\n *\n *   char const* feat_names [] {\"feat_0\", \"feat_1\"};\n *   XGDMatrixSetStrFeatureInfo(handle, \"feature_name\", feat_names, 2);\n *\n *   // i for integer, q for quantitive, c for categorical.  Similarly \"int\" and \"float\"\n *   // are also recognized.\n *   char const* feat_types [] {\"i\", \"q\"};\n *   XGDMatrixSetStrFeatureInfo(handle, \"feature_type\", feat_types, 2);\n *\n * @endcode\n */\nXGB_DLL int XGDMatrixSetStrFeatureInfo(DMatrixHandle handle, const char *field,\n                                       const char **features, const bst_ulong size);\n\n/**\n * @brief Get string encoded information of all features.\n *\n * Accepted fields are:\n *   - feature_name\n *   - feature_type\n *\n * Caller is responsible for copying out the data, before next call to any API function of\n * XGBoost.\n *\n * @param handle       An instance of data matrix\n * @param field        Field name\n * @param size         Size of output pointer `features` (number of strings returned).\n * @param out_features Address of a pointer to array of strings.  Result is stored in\n *                     thread local memory.\n *\n * @return 0 when success, -1 when failure happens\n *\n * @code{c}\n *\n *  char const **c_out_features = NULL;\n *  bst_ulong out_size = 0;\n *\n *  // Asumming the feature names are already set by `XGDMatrixSetStrFeatureInfo`.\n *  XGDMatrixGetStrFeatureInfo(handle, \"feature_name\", &out_size,\n *                             &c_out_features)\n *\n *  for (bst_ulong i = 0; i < out_size; ++i) {\n *    // Here we are simply printing the string.  Copy it out if the feature name is\n *    // useful after printing.\n *    printf(\"feature %lu: %s\\n\", i, c_out_features[i]);\n *  }\n *\n * @endcode\n */\nXGB_DLL int XGDMatrixGetStrFeatureInfo(DMatrixHandle handle, const char *field, bst_ulong *size,\n                                       const char ***out_features);\n\n/**\n * @brief Create an opaque handle to the internal category container.\n *\n * @since 3.2.0\n *\n * @note Experimental API, subject to change in the future.\n *\n * The container should be freed by @ref XGBCategoriesFree\n *\n * @param handle An instance of the data matrix.\n * @param config Unused, reserved for the future.\n * @param out    Created handle to the category container. Set to NULL if there's no category.\n *\n * @return 0 when success, -1 when failure happens.\n *\n * @code{c}\n *    DMatrixHandle fmat;\n *    // Create a DMatrix from categorical data\n *    // ...\n *    CategoriesHandle cats;\n *    int err = XGBoosterGetCategories(fmat, NULL, &cats)\n *    if (err != 0) {\n *        exit(-1);\n *    }\n *    err = XGBCategoriesFree(cats);\n *    if (err != 0) {\n *        exit(-1);\n *    }\n * @endcode\n */\nXGB_DLL int XGDMatrixGetCategories(DMatrixHandle handle, char const *config, CategoriesHandle *out);\n\n/**\n * @brief Create an opaque handle to the internal container and export it to arrow.\n *\n * @since 3.2.0\n *\n * @note Experimental API, subject to change in the future.\n *\n * The container should be freed by @ref XGBCategoriesFree\n *\n * @param handle     An instance of the data matrix.\n * @param config     Unused, reserved for the future.\n * @param out        Created handle to the category container\n * @param export_out JSON encoded array of categories, with length equal to the number of features.\n *\n * @return 0 when success, -1 when failure happens.\n */\nXGB_DLL int XGDMatrixGetCategoriesExportToArrow(DMatrixHandle handle, char const *config,\n                                                CategoriesHandle *out, char const **export_out);\n\n/**\n * @brief Free the opaque handle.\n *\n * @since 3.2.0\n *\n * @note Experimental API, subject to change in the future.\n *\n * @param handle An instance of the category container.\n *\n * @return 0 when success, -1 when failure happens.\n */\nXGB_DLL int XGBCategoriesFree(CategoriesHandle handle);\n\n/**\n * @deprecated since 2.1.0\n *\n * Use @ref XGDMatrixSetInfoFromInterface instead.\n */\nXGB_DLL int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field, void const *data,\n                                  bst_ulong size, int type);\n\n/**\n * @brief Get a reference to data like label or weight.\n *\n * This method replaces the existing @ref XGDMatrixGetFloatInfo and @ref\n * XGDMatrixGetUIntInfo to support non-vector (like a matrix) output. The output data\n * directly references the internal storage, as a result, it's read-only and user should\n * copy data before the next XGBoost call.\n *\n * @since 3.2.0\n *\n * @param handle    An instance of data matrix\n * @param field     Field name\n * @param out_array JSON encoded __(cuda)_array_interface__ to the output.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGDMatrixGetInfoRef(DMatrixHandle handle, char const *field, char const **out_array);\n\n/**\n * @brief get float info vector from matrix.\n * @param handle a instance of data matrix\n * @param field field name\n * @param out_len used to set result length\n * @param out_dptr pointer to the result\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGDMatrixGetFloatInfo(const DMatrixHandle handle, const char *field, bst_ulong *out_len,\n                                  const float **out_dptr);\n/**\n * @example c-api-demo.c\n */\n\n/**\n * @brief get uint32 info vector from matrix\n * @param handle a instance of data matrix\n * @param field field name\n * @param out_len The length of the field.\n * @param out_dptr pointer to the result\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGDMatrixGetUIntInfo(const DMatrixHandle handle, const char *field, bst_ulong *out_len,\n                                 const unsigned **out_dptr);\n/**\n * @brief Get the number of rows from a DMatrix.\n *\n * @param handle the handle to the DMatrix\n * @param out The address to hold number of rows.\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGDMatrixNumRow(DMatrixHandle handle, bst_ulong *out);\n/**\n * @brief Get the number of columns from a DMatrix.\n *\n * @param handle the handle to the DMatrix\n * @param out The output of number of columns\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGDMatrixNumCol(DMatrixHandle handle, bst_ulong *out);\n\n/**\n * @brief Get number of valid values from a DMatrix.\n *\n * @param handle the handle to the DMatrix\n * @param out The output of number of non-missing values\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGDMatrixNumNonMissing(DMatrixHandle handle, bst_ulong *out);\n\n/**\n * @brief Get the data split mode from DMatrix.\n *\n * @param handle the handle to the DMatrix\n * @param out The output of the data split mode\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGDMatrixDataSplitMode(DMatrixHandle handle, bst_ulong *out);\n\n/**\n * @brief Get the predictors from DMatrix as CSR matrix for testing.  If this is a\n *        quantized DMatrix, quantized values are returned instead.\n *\n * Unlike most of XGBoost C functions, caller of `XGDMatrixGetDataAsCSR` is required to\n * allocate the memory for return buffer instead of using thread local memory from\n * XGBoost. This is to avoid allocating a huge memory buffer that can not be freed until\n * exiting the thread.\n *\n * @since 1.7.0\n *\n * @param handle the handle to the DMatrix\n * @param config JSON configuration string. At the moment it should be an empty document,\n *               preserved for future use.\n * @param out_indptr  indptr of output CSR matrix.\n * @param out_indices Column index of output CSR matrix.\n * @param out_data    Data value of CSR matrix.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGDMatrixGetDataAsCSR(DMatrixHandle const handle, char const *config,\n                                  bst_ulong *out_indptr, unsigned *out_indices, float *out_data);\n\n/**\n * @brief Export the quantile cuts used for training histogram-based models like `hist` and\n *        `approx`. Useful for model compression.\n *\n * @since 2.0.0\n *\n * @param handle the handle to the DMatrix\n * @param config JSON configuration string. At the moment it should be an empty document,\n *               preserved for future use.\n *\n * @param out_indptr indptr of output CSC matrix represented by a JSON encoded\n *                   __(cuda_)array_interface__.\n * @param out_data   Data value of CSC matrix represented by a JSON encoded\n *                   __(cuda_)array_interface__.\n */\nXGB_DLL int XGDMatrixGetQuantileCut(DMatrixHandle const handle, char const *config,\n                                    char const **out_indptr, char const **out_data);\n\n/** @} */  // End of DMatrix\n\n/**\n * @defgroup Booster Booster\n *\n * @brief The `Booster` class is the gradient-boosted model for XGBoost.\n *\n * During training, the booster object has many caches for improved performance. In\n * addition to gradient and prediction, it also includes runtime buffers like leaf\n * partitions. These buffers persist with the Booster object until either XGBoosterReset()\n * is called or the booster is deleted by the XGBoosterFree().\n *\n * @{\n */\n\n/**\n * @brief Create a XGBoost learner (booster)\n *\n * @param dmats matrices that are set to be cached by the booster.\n * @param len length of dmats\n * @param out handle to the result booster\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterCreate(const DMatrixHandle dmats[], bst_ulong len, BoosterHandle *out);\n/**\n * @example c-api-demo.c\n */\n\n/**\n * @brief Delete the booster.\n *\n * @param handle The handle to be freed.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterFree(BoosterHandle handle);\n/**\n * @example c-api-demo.c inference.c external_memory.c\n */\n\n/**\n * @brief Reset the booster object to release data caches used for training.\n *\n * @since 3.0.0\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterReset(BoosterHandle handle);\n\n/**\n * @brief Slice a model using boosting index. The slice m:n indicates taking all trees\n *        that were fit during the boosting rounds m, (m+1), (m+2), ..., (n-1).\n *\n * @param handle Booster to be sliced.\n * @param begin_layer start of the slice\n * @param end_layer end of the slice; end_layer=0 is equivalent to\n *                  end_layer=num_boost_round\n * @param step step size of the slice\n * @param out Sliced booster.\n *\n * @return 0 when success, -1 when failure happens, -2 when index is out of bound.\n */\nXGB_DLL int XGBoosterSlice(BoosterHandle handle, int begin_layer, int end_layer, int step,\n                           BoosterHandle *out);\n\n/**\n * @brief Get number of boosted rounds from gradient booster.  When process_type is\n *        update, this number might drop due to removed tree.\n * @param handle Handle to booster.\n * @param out Pointer to output integer.\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterBoostedRounds(BoosterHandle handle, int *out);\n\n/**\n * @brief set parameters\n * @param handle handle\n * @param name  parameter name\n * @param value value of parameter\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterSetParam(BoosterHandle handle, const char *name, const char *value);\n/**\n * @example c-api-demo.c\n */\n\n/**\n * @brief get number of features\n * @param handle Handle to booster.\n * @param out number of features\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterGetNumFeature(BoosterHandle handle, bst_ulong *out);\n/**\n * @example c-api-demo.c\n */\n\n/**\n * @brief update the model in one round using dtrain\n * @param handle handle\n * @param iter current iteration rounds\n * @param dtrain training data\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterUpdateOneIter(BoosterHandle handle, int iter, DMatrixHandle dtrain);\n/**\n * @example c-api-demo.c\n */\n\n/**\n * @deprecated since 2.1.0\n */\nXGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle, DMatrixHandle dtrain, float *grad,\n                                  float *hess, bst_ulong len);\n\n/**\n * @brief Update a model with gradient and Hessian. This is used for training with a\n *        custom objective function.\n *\n * @since 2.0.0\n *\n * @param handle handle\n * @param dtrain The training data.\n * @param iter   The current iteration round. When training continuation is used, the count\n *               should restart.\n * @param grad   Json encoded __(cuda)_array_interface__ for gradient.\n * @param hess   Json encoded __(cuda)_array_interface__ for Hessian.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterTrainOneIter(BoosterHandle handle, DMatrixHandle dtrain, int iter,\n                                  char const *grad, char const *hess);\n\n/**\n * @brief get evaluation statistics for xgboost\n * @param handle handle\n * @param iter current iteration rounds\n * @param dmats pointers to data to be evaluated\n * @param evnames pointers to names of each data\n * @param len length of dmats\n * @param out_result the string containing evaluation statistics\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle, int iter, DMatrixHandle dmats[],\n                                 const char *evnames[], bst_ulong len, const char **out_result);\n/**\n * @example c-api-demo.c\n */\n\n/**\n * @defgroup Prediction Prediction\n * @ingroup Booster\n *\n * @brief These functions are used for running prediction and explanation algorithms.\n *\n * @{\n */\n\n/**\n * @brief make prediction based on dmat (deprecated, use \\ref XGBoosterPredictFromDMatrix instead)\n * \\deprecated\n * \\see XGBoosterPredictFromDMatrix()\n *\n * @param handle handle\n * @param dmat data matrix\n * @param option_mask bit-mask of options taken in prediction, possible values\n *          0:normal prediction\n *          1:output margin instead of transformed value\n *          2:output leaf index of trees instead of leaf value, note leaf index is unique per tree\n *          4:output feature contributions to individual predictions\n * @param ntree_limit limit number of trees used for prediction, this is only valid for boosted trees\n *    when the parameter is set to 0, we will use all the trees\n * @param training Whether the prediction function is used as part of a training loop.\n *    Prediction can be run in 2 scenarios:\n *    1. Given data matrix X, obtain prediction y_pred from the model.\n *    2. Obtain the prediction for computing gradients. For example, DART booster performs dropout\n *       during training, and the prediction result will be different from the one obtained by normal\n *       inference step due to dropped trees.\n *    Set training=false for the first scenario. Set training=true for the second scenario.\n *    The second scenario applies when you are defining a custom objective function.\n * @param out_len used to store length of returning result\n * @param out_result used to set a pointer to array\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterPredict(BoosterHandle handle, DMatrixHandle dmat, int option_mask,\n                             unsigned ntree_limit, int training, bst_ulong *out_len,\n                             const float **out_result);\n\n/**\n * @brief Make prediction from DMatrix, replacing \\ref XGBoosterPredict.\n *\n * @param handle Booster handle\n * @param dmat   DMatrix handle\n * @param config String encoded predict configuration in JSON format, with following\n *                      available fields in the JSON object:\n *\n *    \"type\": [0, 6]\n *      - 0: normal prediction\n *      - 1: output margin\n *      - 2: predict contribution\n *      - 3: predict approximated contribution\n *      - 4: predict feature interaction\n *      - 5: predict approximated feature interaction\n *      - 6: predict leaf\n *    \"training\": bool\n *      Whether the prediction function is used as part of a training loop.  **Not used\n *      for inplace prediction**.\n *\n *      Prediction can be run in 2 scenarios:\n *        1. Given data matrix X, obtain prediction y_pred from the model.\n *        2. Obtain the prediction for computing gradients. For example, DART booster performs dropout\n *           during training, and the prediction result will be different from the one obtained by normal\n *           inference step due to dropped trees.\n *      Set training=false for the first scenario. Set training=true for the second\n *      scenario.  The second scenario applies when you are defining a custom objective\n *      function.\n *    \"iteration_begin\": int\n *      Beginning iteration of prediction.\n *    \"iteration_end\": int\n *      End iteration of prediction.  Set to 0 this will become the size of tree model (all the trees).\n *    \"strict_shape\": bool\n *      Whether should we reshape the output with stricter rules.  If set to true,\n *      normal/margin/contrib/interaction predict will output consistent shape\n *      disregarding the use of multi-class model, and leaf prediction will output 4-dim\n *      array representing: (n_samples, n_iterations, n_classes, n_trees_in_forest)\n *\n *   Example JSON input for running a normal prediction with strict output shape, 2 dim\n *   for softprob , 1 dim for others.\n *   @code{javascript}\n *      {\n *         \"type\": 0,\n *         \"training\": false,\n *         \"iteration_begin\": 0,\n *         \"iteration_end\": 0,\n *         \"strict_shape\": true\n *      }\n *   @endcode\n *\n * @param out_shape Shape of output prediction (copy before use).\n * @param out_dim   Dimension of output prediction.\n * @param out_result Buffer storing prediction value (copy before use).\n *\n * @return 0 when success, -1 when failure happens\n *\n * @see XGBoosterPredictFromDense XGBoosterPredictFromCSR XGBoosterPredictFromCudaArray XGBoosterPredictFromCudaColumnar\n */\nXGB_DLL int XGBoosterPredictFromDMatrix(BoosterHandle handle, DMatrixHandle dmat,\n                                        char const *config, bst_ulong const **out_shape,\n                                        bst_ulong *out_dim, float const **out_result);\n/**\n * @example inference.c\n */\n\n/**\n * @brief Inplace prediction from CPU dense matrix.\n *\n * \\note If the booster is configured to run on a CUDA device, XGBoost falls back to run\n *       prediction with DMatrix with a performance warning.\n *\n * @param handle        Booster handle.\n * @param values        JSON encoded __array_interface__ to values.\n * @param config        See \\ref XGBoosterPredictFromDMatrix for more info.\n *   Additional fields for inplace prediction are:\n *     - \"missing\": float\n * @param m             An optional (NULL if not available) proxy DMatrix instance\n *                      storing meta info.\n *\n * @param out_shape     See \\ref XGBoosterPredictFromDMatrix for more info.\n * @param out_dim       See \\ref XGBoosterPredictFromDMatrix for more info.\n * @param out_result    See \\ref XGBoosterPredictFromDMatrix for more info.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterPredictFromDense(BoosterHandle handle, char const *values, char const *config,\n                                      DMatrixHandle m, bst_ulong const **out_shape,\n                                      bst_ulong *out_dim, const float **out_result);\n/**\n * @example inference.c\n */\n\n/**\n * @brief Inplace prediction from CPU columnar data. (Table)\n *\n * @note If the booster is configured to run on a CUDA device, XGBoost falls back to run\n *       prediction with DMatrix with a performance warning.\n *\n * @param handle        Booster handle.\n * @param data          See @ref XGDMatrixCreateFromColumnar for more info.\n * @param config        See @ref XGBoosterPredictFromDMatrix for more info.\n *   Additional fields for inplace prediction are:\n *     - \"missing\": float\n * @param m             An optional (NULL if not available) proxy DMatrix instance\n *                      storing meta info.\n *\n * @param out_shape     See @ref XGBoosterPredictFromDMatrix for more info.\n * @param out_dim       See @ref XGBoosterPredictFromDMatrix for more info.\n * @param out_result    See @ref XGBoosterPredictFromDMatrix for more info.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterPredictFromColumnar(BoosterHandle handle, char const *values,\n                                         char const *config, DMatrixHandle m,\n                                         bst_ulong const **out_shape, bst_ulong *out_dim,\n                                         const float **out_result);\n\n/**\n * @brief Inplace prediction from CPU CSR matrix.\n *\n * \\note If the booster is configured to run on a CUDA device, XGBoost falls back to run\n *       prediction with DMatrix with a performance warning.\n *\n * @param handle        Booster handle.\n * @param indptr        JSON encoded __array_interface__ to row pointer in CSR.\n * @param indices       JSON encoded __array_interface__ to column indices in CSR.\n * @param values        JSON encoded __array_interface__ to values in CSR..\n * @param ncol          Number of features in data.\n * @param config        See \\ref XGBoosterPredictFromDMatrix for more info.\n *   Additional fields for inplace prediction are:\n *     - \"missing\": float\n * @param m             An optional (NULL if not available) proxy DMatrix instance\n *                      storing meta info.\n *\n * @param out_shape     See \\ref XGBoosterPredictFromDMatrix for more info.\n * @param out_dim       See \\ref XGBoosterPredictFromDMatrix for more info.\n * @param out_result    See \\ref XGBoosterPredictFromDMatrix for more info.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterPredictFromCSR(BoosterHandle handle, char const *indptr, char const *indices,\n                                    char const *values, bst_ulong ncol, char const *config,\n                                    DMatrixHandle m, bst_ulong const **out_shape,\n                                    bst_ulong *out_dim, const float **out_result);\n\n/**\n * @brief Inplace prediction from CUDA Dense matrix (cupy in Python).\n *\n * @note If the booster is configured to run on a CPU, XGBoost falls back to run\n *       prediction with DMatrix with a performance warning.\n *\n * @param handle        Booster handle\n * @param values        JSON encoded __cuda_array_interface__ to values.\n * @param config        See @ref XGBoosterPredictFromDMatrix for more info.\n *   Additional fields for inplace prediction are:\n *     - \"missing\": float\n * @param proxy         An optional (NULL if not available) proxy DMatrix instance\n *                      storing meta info.\n * @param out_shape     See @ref XGBoosterPredictFromDMatrix for more info.\n * @param out_dim       See @ref XGBoosterPredictFromDMatrix for more info.\n * @param out_result    See @ref XGBoosterPredictFromDMatrix for more info.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterPredictFromCudaArray(BoosterHandle handle, char const *values,\n                                          char const *config, DMatrixHandle proxy,\n                                          bst_ulong const **out_shape, bst_ulong *out_dim,\n                                          const float **out_result);\n\n/**\n * @brief Inplace prediction from CUDA dense dataframe (cuDF in Python).\n *\n * @note If the booster is configured to run on a CPU, XGBoost falls back to run\n *       prediction with DMatrix with a performance warning.\n *\n * @param handle        Booster handle\n * @param data          See @ref XGDMatrixCreateFromColumnar for more info.\n * @param config        See @ref XGBoosterPredictFromDMatrix for more info.\n *   Additional fields for inplace prediction are:\n *     - \"missing\": float\n * @param proxy         An optional (NULL if not available) proxy DMatrix instance\n *                      storing meta info.\n * @param out_shape     See @ref XGBoosterPredictFromDMatrix for more info.\n * @param out_dim       See @ref XGBoosterPredictFromDMatrix for more info.\n * @param out_result    See @ref XGBoosterPredictFromDMatrix for more info.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterPredictFromCudaColumnar(BoosterHandle handle, char const *data,\n                                             char const *config, DMatrixHandle proxy,\n                                             bst_ulong const **out_shape, bst_ulong *out_dim,\n                                             const float **out_result);\n\n/**@}*/  // End of Prediction\n\n/**\n * @defgroup Serialization Serialization\n * @ingroup Booster\n *\n * @brief There are multiple ways to serialize a Booster object depending on the use case.\n *\n * Short note for serialization APIs.  There are 3 different sets of serialization API.\n *\n * - Functions with the term \"Model\" handles saving/loading XGBoost model like trees or\n *   linear weights.  Striping out parameters configuration like training algorithms or\n *   CUDA device ID.  These functions are designed to let users reuse the trained model\n *   for different tasks, examples are prediction, training continuation or model\n *   interpretation.\n *\n * - Functions with the term \"Config\" handles save/loading configuration.  It helps user\n *   to study the internal of XGBoost.  Also user can use the load method for specifying\n *   parameters in a structured way.  These functions were introduced in 1.0.0.\n *\n * - Functions with the term \"Serialization\" are combination of above two.  They are used\n *   in situations like check-pointing, or continuing training task in a distributed\n *   environment.  In these cases the task must be carried out without any user\n *   intervention.\n *\n * @{\n */\n\n/**\n * @brief Load the model from an existing file\n *\n * @param handle handle\n * @param fname File name. The string must be UTF-8 encoded.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterLoadModel(BoosterHandle handle, const char *fname);\n/**\n * @brief Save the model into an existing file\n *\n * @param handle handle\n * @param fname File name. The string must be UTF-8 encoded.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterSaveModel(BoosterHandle handle, const char *fname);\n/**\n * @brief load model from in memory buffer\n *\n * @param handle handle\n * @param buf pointer to the buffer\n * @param len the length of the buffer\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterLoadModelFromBuffer(BoosterHandle handle, const void *buf, bst_ulong len);\n\n/**\n * @brief Save model into raw bytes, return header of the array.  User must copy the\n *        result out, before next xgboost call\n *\n * @param handle handle\n * @param config JSON encoded string storing parameters for the function.  Following\n *               keys are expected in the JSON document:\n *               - \"format\": str\n *                 - json: Output booster will be encoded as JSON.\n *                 - ubj:  Output booster will be encoded as Universal binary JSON.\n *                   this format except for compatibility reasons.\n * @param out_len  The argument to hold the output length\n * @param out_dptr The argument to hold the output data pointer\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterSaveModelToBuffer(BoosterHandle handle, char const *config, bst_ulong *out_len,\n                                       char const **out_dptr);\n\n/**\n * @brief Memory snapshot based serialization method.  Saves everything states\n * into buffer.\n *\n * @param handle handle\n * @param out_len the argument to hold the output length\n * @param out_dptr the argument to hold the output data pointer\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterSerializeToBuffer(BoosterHandle handle, bst_ulong *out_len,\n                                       const char **out_dptr);\n/**\n * @brief Memory snapshot based serialization method.  Loads the buffer returned\n *        from \\ref XGBoosterSerializeToBuffer.\n *\n * @param handle handle\n * @param buf pointer to the buffer\n * @param len the length of the buffer\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterUnserializeFromBuffer(BoosterHandle handle, const void *buf, bst_ulong len);\n\n/**\n * @brief Save XGBoost's internal configuration into a JSON document.  Currently the\n *        support is experimental, function signature may change in the future without\n *        notice.\n *\n * @param handle handle to Booster object.\n * @param out_len length of output string\n * @param out_str A valid pointer to array of characters.  The characters array is\n *                allocated and managed by XGBoost, while pointer to that array needs to\n *                be managed by caller.\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterSaveJsonConfig(BoosterHandle handle, bst_ulong *out_len, char const **out_str);\n/**\n * @brief Load XGBoost's internal configuration from a JSON document.  Currently the\n *        support is experimental, function signature may change in the future without\n *        notice.\n *\n * @param handle handle to Booster object.\n * @param config string representation of a JSON document.\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterLoadJsonConfig(BoosterHandle handle, char const *config);\n/**@}*/  // End of Serialization\n\n/**\n * @brief dump model, return array of strings representing model dump\n * @param handle handle\n * @param fmap  name to fmap can be empty string\n * @param with_stats whether to dump with statistics\n * @param out_len length of output array\n * @param out_dump_array pointer to hold representing dump of each model\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterDumpModel(BoosterHandle handle, const char *fmap, int with_stats,\n                               bst_ulong *out_len, const char ***out_dump_array);\n\n/**\n * @brief dump model, return array of strings representing model dump\n * @param handle handle\n * @param fmap  name to fmap can be empty string\n * @param with_stats whether to dump with statistics\n * @param format the format to dump the model in\n * @param out_len length of output array\n * @param out_dump_array pointer to hold representing dump of each model\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterDumpModelEx(BoosterHandle handle, const char *fmap, int with_stats,\n                                 const char *format, bst_ulong *out_len,\n                                 const char ***out_dump_array);\n\n/**\n * @brief dump model, return array of strings representing model dump\n * @param handle handle\n * @param fnum number of features\n * @param fname names of features\n * @param ftype types of features\n * @param with_stats whether to dump with statistics\n * @param out_len length of output array\n * @param out_models pointer to hold representing dump of each model\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterDumpModelWithFeatures(BoosterHandle handle, int fnum, const char **fname,\n                                           const char **ftype, int with_stats, bst_ulong *out_len,\n                                           const char ***out_models);\n\n/**\n * @brief dump model, return array of strings representing model dump\n * @param handle handle\n * @param fnum number of features\n * @param fname names of features\n * @param ftype types of features\n * @param with_stats whether to dump with statistics\n * @param format the format to dump the model in\n * @param out_len length of output array\n * @param out_models pointer to hold representing dump of each model\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterDumpModelExWithFeatures(BoosterHandle handle, int fnum, const char **fname,\n                                             const char **ftype, int with_stats, const char *format,\n                                             bst_ulong *out_len, const char ***out_models);\n\n/**\n * See @ref XGDMatrixGetCategories\n *\n * @since 3.2.0\n *\n * @note Experimental API, subject to change in the future.\n */\nXGB_DLL int XGBoosterGetCategories(BoosterHandle handle, char const *config, CategoriesHandle *out);\n\n/**\n * See @ref XGDMatrixGetCategoriesExportToArrow\n *\n * @since 3.2.0\n *\n * @note Experimental API, subject to change in the future.\n */\nXGB_DLL int XGBoosterGetCategoriesExportToArrow(BoosterHandle handle, char const *config,\n                                                CategoriesHandle *out, char const **export_out);\n\n/**\n * @brief Get string attribute from Booster.\n * @param handle handle\n * @param key The key of the attribute.\n * @param out The result attribute, can be NULL if the attribute do not exist.\n * @param success Whether the result is contained in out.\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterGetAttr(BoosterHandle handle, const char *key, const char **out, int *success);\n/**\n * @brief Set or delete string attribute.\n *\n * @param handle handle\n * @param key The key of the attribute.\n * @param value The value to be saved.\n *              If nullptr, the attribute would be deleted.\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterSetAttr(BoosterHandle handle, const char *key, const char *value);\n/**\n * @brief Get the names of all attribute from Booster.\n * @param handle handle\n * @param out_len the argument to hold the output length\n * @param out pointer to hold the output attribute stings\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterGetAttrNames(BoosterHandle handle, bst_ulong *out_len, const char ***out);\n\n/**\n * @brief Set string encoded feature info in Booster, similar to the feature\n *        info in DMatrix.\n *\n * Accepted fields are:\n *   - feature_name\n *   - feature_type\n *\n * @param handle    An instance of Booster\n * @param field     Field name\n * @param features  Pointer to array of strings.\n * @param size      Size of `features` pointer (number of strings passed in).\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterSetStrFeatureInfo(BoosterHandle handle, const char *field,\n                                       const char **features, const bst_ulong size);\n\n/**\n * @brief Get string encoded feature info from Booster, similar to the feature info\n *        in DMatrix.\n *\n * Accepted field names are:\n *   - feature_name\n *   - feature_type\n *\n * Caller is responsible for copying out the data, before the next call to any API\n * function of XGBoost.\n *\n * @param handle       An instance of Booster\n * @param field        Field name\n * @param len          Size of output pointer `features` (number of strings returned).\n * @param out_features Address of a pointer to array of strings. Result is stored in\n *        thread local memory.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterGetStrFeatureInfo(BoosterHandle handle, const char *field, bst_ulong *len,\n                                       const char ***out_features);\n\n/**\n * @brief Calculate feature scores for tree models.  When used on linear model, only the\n * `weight` importance type is defined, and output scores is a row major matrix with shape\n * [n_features, n_classes] for multi-class model.  For tree model, out_n_feature is always\n * equal to out_n_scores and has multiple definitions of importance type.\n *\n * @param handle          An instance of Booster\n * @param config          Parameters for computing scores encoded as JSON.  Accepted JSON keys are:\n *   - importance_type: A JSON string with following possible values:\n *       * 'weight': the number of times a feature is used to split the data across all trees.\n *       * 'gain': the average gain across all splits the feature is used in.\n *       * 'cover': the average coverage across all splits the feature is used in.\n *       * 'total_gain': the total gain across all splits the feature is used in.\n *       * 'total_cover': the total coverage across all splits the feature is used in.\n *   - feature_map: An optional JSON string with URI or path to the feature map file.\n *   - feature_names: An optional JSON array with string names for each feature.\n *\n * @param out_n_features  Length of output feature names.\n * @param out_features    An array of string as feature names, ordered the same as output scores.\n * @param out_dim         Dimension of output feature scores.\n * @param out_shape       Shape of output feature scores with length of `out_dim`.\n * @param out_scores      An array of floating point as feature scores with shape of `out_shape`.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, const char *config,\n                                  bst_ulong *out_n_features, char const ***out_features,\n                                  bst_ulong *out_dim, bst_ulong const **out_shape,\n                                  float const **out_scores);\n/**@}*/  // End of Booster\n\n/**\n * @defgroup Collective Collective\n *\n * @brief Experimental support for exposing internal communicator in XGBoost.\n *\n * @note This is still under development.\n *\n * The collective communicator in XGBoost evolved from the `rabit` project of dmlc but has\n * changed significantly since its adoption. It consists of a tracker and a set of\n * workers. The tracker is responsible for bootstrapping the communication group and\n * handling centralized tasks like logging. The workers are actual communicators\n * performing collective tasks like allreduce.\n *\n * To use the collective implementation, one needs to first create a tracker with\n * corresponding parameters, then get the arguments for workers using\n * XGTrackerWorkerArgs().  The obtained arguments can then be passed to the\n * XGCommunicatorInit() function. Call to XGCommunicatorInit() must be accompanied with a\n * XGCommunicatorFinalize() call for cleanups. Please note that the communicator uses\n * `std::thread` in C++, which has undefined behavior in a C++ destructor due to the\n * runtime shutdown sequence. It's preferable to call XGCommunicatorFinalize() before the\n * runtime is shutting down. This requirement is similar to a Python thread or socket,\n * which should not be relied upon in a `__del__` function.\n *\n * Since it's used as a part of XGBoost, errors will be returned when a XGBoost function\n * is called, for instance, training a booster might return a connection error.\n *\n * @{\n */\n\n/**\n * @brief Handle to the tracker.\n *\n *   There are currently two types of tracker in XGBoost, first one is `rabit`, while the\n *   other one is `federated`.  `rabit` is used for normal collective communication, while\n *   `federated` is used for federated learning.\n *\n */\ntypedef void *TrackerHandle; /* NOLINT */\n\n/**\n * @brief Create a new tracker.\n *\n * @param config JSON encoded parameters.\n *\n *   - dmlc_communicator: String, the type of tracker to create. Available options are\n *                        `rabit` and `federated`. See @ref TrackerHandle for more info.\n *   - n_workers: Integer, the number of workers.\n *   - port: (Optional) Integer, the port this tracker should listen to.\n *   - timeout: (Optional) Integer, timeout in seconds for various networking\n                 operations. Default is 300 seconds.\n *\n *   Some configurations are `rabit` specific:\n *\n *   - host: (Optional) String, Used by the the `rabit` tracker to specify the address of the host.\n *           This can be useful when the communicator cannot reliably obtain the host address.\n *   - sortby: (Optional) Integer.\n *     + 0: Sort workers by their host name.\n *     + 1: Sort workers by task IDs.\n *\n *   Some `federated` specific configurations:\n *   - federated_secure: Boolean, whether this is a secure server. False for testing.\n *   - server_key_path: Path to the server key. Used only if this is a secure server.\n *   - server_cert_path: Path to the server certificate. Used only if this is a secure server.\n *   - client_cert_path: Path to the client certificate. Used only if this is a secure server.\n *\n * @param handle The handle to the created tracker.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGTrackerCreate(char const *config, TrackerHandle *handle);\n\n/**\n * @brief Get the arguments needed for running workers. This should be called after\n *        XGTrackerRun().\n *\n * @param handle The handle to the tracker.\n * @param args The arguments returned as a JSON document.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGTrackerWorkerArgs(TrackerHandle handle, char const **args);\n\n/**\n * @brief Start the tracker. The tracker runs in the background and this function returns\n *        once the tracker is started.\n *\n * @param handle The handle to the tracker.\n * @param config Unused at the moment, preserved for the future.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGTrackerRun(TrackerHandle handle, char const *config);\n\n/**\n * @brief Wait for the tracker to finish, should be called after XGTrackerRun(). This\n *        function will block until the tracker task is finished or timeout is reached.\n *\n * @param handle The handle to the tracker.\n * @param config JSON encoded configuration. No argument is required yet, preserved for\n *        the future.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGTrackerWaitFor(TrackerHandle handle, char const *config);\n\n/**\n * @brief Free a tracker instance. This should be called after XGTrackerWaitFor(). If the\n *        tracker is not properly waited, this function will shutdown all connections with\n *        the tracker, potentially leading to undefined behavior.\n *\n * @param handle The handle to the tracker.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGTrackerFree(TrackerHandle handle);\n\n/**\n * @brief Initialize the collective communicator.\n *\n *  Currently the communicator API is experimental, function signatures may change in the future\n *  without notice.\n *\n *  Call this once in the worker process before using anything. Please make sure\n *  XGCommunicatorFinalize() is called after use. The initialized commuicator is a global\n *  thread-local variable.\n *\n * @param config JSON encoded configuration. Accepted JSON keys are:\n *   - dmlc_communicator: The type of the communicator, this should match the tracker type.\n *     * rabit: Use Rabit. This is the default if the type is unspecified.\n *     * federated: Use the gRPC interface for Federated Learning.\n *\n * Only applicable to the `rabit` communicator:\n *   - dmlc_tracker_uri: Hostname or IP address of the tracker.\n *   - dmlc_tracker_port: Port number of the tracker.\n *   - dmlc_task_id: ID of the current task, can be used to obtain deterministic rank assignment.\n *   - dmlc_retry: The number of retries for connection failure.\n *   - dmlc_timeout: Timeout in seconds.\n *   - dmlc_nccl_path: Path to the nccl shared library `libnccl.so`.\n *\n * Only applicable to the `federated` communicator (use upper case for environment variables, use\n * lower case for runtime configuration):\n *   - federated_server_address: Address of the federated server.\n *   - federated_world_size: Number of federated workers.\n *   - federated_rank: Rank of the current worker.\n *   - federated_server_cert_path: Server certificate file path. Only needed for the SSL mode.\n *   - federated_client_key_path: Client key file path. Only needed for the SSL mode.\n *   - federated_client_cert_path: Client certificate file path. Only needed for the SSL mode.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGCommunicatorInit(char const *config);\n\n/**\n * @brief Finalize the collective communicator.\n *\n * Call this function after you have finished all jobs.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGCommunicatorFinalize(void);\n\n/**\n * @brief Get rank of the current process.\n *\n * @return Rank of the worker.\n */\nXGB_DLL int XGCommunicatorGetRank(void);\n\n/**\n * @brief Get the total number of processes.\n *\n * @return Total world size.\n */\nXGB_DLL int XGCommunicatorGetWorldSize(void);\n\n/**\n * @brief Get if the communicator is distributed.\n *\n * @return True if the communicator is distributed.\n */\nXGB_DLL int XGCommunicatorIsDistributed(void);\n\n/**\n * @brief Print the message to the tracker.\n *\n * This function can be used to communicate the information of the progress to the user\n * who monitors the tracker.\n *\n * @param message The message to be printed.\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGCommunicatorPrint(char const *message);\n\n/**\n * @brief Get the name of the processor.\n *\n * @param name_str Pointer to received returned processor name.\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGCommunicatorGetProcessorName(const char **name_str);\n\n/**\n * @brief Broadcast a memory region to all others from root. This function is NOT\n *        thread-safe.\n *\n * Example:\n * @code\n *   int a = 1;\n *   Broadcast(&a, sizeof(a), root);\n * @endcode\n *\n * @param send_receive_buffer Pointer to the send or receive buffer.\n * @param size Size of the data in bytes.\n * @param root The process rank to broadcast from.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGCommunicatorBroadcast(void *send_receive_buffer, size_t size, int root);\n\n/**\n * @brief Perform in-place allreduce. This function is NOT thread-safe.\n *\n * Example Usage: the following code gives sum of the result\n * @code\n *     enum class Op {\n *         kMax = 0, kMin = 1, kSum = 2, kBitwiseAND = 3, kBitwiseOR = 4, kBitwiseXOR = 5\n *     };\n *     std::vector<int> data(10);\n *     ...\n *     Allreduce(data.data(), data.size(), DataType:kInt32, Op::kSum);\n *     ...\n * @endcode\n\n * @param send_receive_buffer Buffer for both sending and receiving data.\n * @param count Number of elements to be reduced.\n * @param data_type Enumeration of data type, see xgboost::collective::DataType in communicator.h.\n * @param op Enumeration of operation type, see xgboost::collective::Operation in communicator.h.\n *\n * @return 0 when success, -1 when failure happens\n */\nXGB_DLL int XGCommunicatorAllreduce(void *send_receive_buffer, size_t count, int data_type, int op);\n\n/**@}*/  // End of Collective\n#endif   // XGBOOST_C_API_H_\n"
  },
  {
    "path": "include/xgboost/cache.h",
    "content": "/**\n * Copyright 2023 by XGBoost contributors\n */\n#ifndef XGBOOST_CACHE_H_\n#define XGBOOST_CACHE_H_\n\n#include <xgboost/logging.h>  // for CHECK_EQ, CHECK\n\n#include <cstddef>            // for size_t\n#include <memory>             // for weak_ptr, shared_ptr, make_shared\n#include <mutex>              // for mutex, lock_guard\n#include <queue>              // for queue\n#include <thread>             // for thread\n#include <unordered_map>      // for unordered_map\n#include <utility>            // for move\n#include <vector>             // for vector\n\nnamespace xgboost {\nclass DMatrix;\n/**\n * \\brief Thread-aware FIFO cache for DMatrix related data.\n *\n * \\tparam CacheT The type that needs to be cached.\n */\ntemplate <typename CacheT>\nclass DMatrixCache {\n public:\n  struct Item {\n    // A weak pointer for checking whether the DMatrix object has expired.\n    std::weak_ptr<DMatrix> ref;\n    // The cached item\n    std::shared_ptr<CacheT> value;\n\n    CacheT const& Value() const { return *value; }\n    CacheT& Value() { return *value; }\n\n    Item(std::shared_ptr<DMatrix> m, std::shared_ptr<CacheT> v) : ref{m}, value{std::move(v)} {}\n  };\n\n  static constexpr std::size_t DefaultSize() { return 32; }\n\n private:\n  mutable std::mutex lock_;\n\n protected:\n  struct Key {\n    DMatrix const* ptr;\n    std::thread::id const thread_id;\n\n    bool operator==(Key const& that) const {\n      return ptr == that.ptr && thread_id == that.thread_id;\n    }\n  };\n  struct Hash {\n    std::size_t operator()(Key const& key) const noexcept {\n      std::size_t f = std::hash<DMatrix const*>()(key.ptr);\n      std::size_t s = std::hash<std::thread::id>()(key.thread_id);\n      if (f == s) {\n        return f;\n      }\n      return f ^ s;\n    }\n  };\n\n  std::unordered_map<Key, Item, Hash> container_;\n  std::queue<Key> queue_;\n  std::size_t max_size_;\n\n  void CheckConsistent() const { CHECK_EQ(queue_.size(), container_.size()); }\n\n  void ClearExpired() {\n    // Clear expired entries\n    this->CheckConsistent();\n    std::vector<Key> expired;\n    std::queue<Key> remained;\n\n    while (!queue_.empty()) {\n      auto p_fmat = queue_.front();\n      auto it = container_.find(p_fmat);\n      CHECK(it != container_.cend());\n      if (it->second.ref.expired()) {\n        expired.push_back(it->first);\n      } else {\n        remained.push(it->first);\n      }\n      queue_.pop();\n    }\n    CHECK(queue_.empty());\n    CHECK_EQ(remained.size() + expired.size(), container_.size());\n\n    for (auto const& key : expired) {\n      container_.erase(key);\n    }\n    while (!remained.empty()) {\n      auto p_fmat = remained.front();\n      queue_.push(p_fmat);\n      remained.pop();\n    }\n    this->CheckConsistent();\n  }\n\n  void ClearExcess() {\n    this->CheckConsistent();\n    // clear half of the entries to prevent repeatingly clearing cache.\n    std::size_t half_size = max_size_ / 2;\n    while (queue_.size() >= half_size && !queue_.empty()) {\n      auto p_fmat = queue_.front();\n      queue_.pop();\n      container_.erase(p_fmat);\n    }\n    this->CheckConsistent();\n  }\n\n public:\n  /**\n   * \\param cache_size Maximum size of the cache.\n   */\n  explicit DMatrixCache(std::size_t cache_size) : max_size_{cache_size} {}\n\n  DMatrixCache& operator=(DMatrixCache&& that) {\n    CHECK(lock_.try_lock());\n    lock_.unlock();\n    CHECK(that.lock_.try_lock());\n    that.lock_.unlock();\n    std::swap(this->container_, that.container_);\n    std::swap(this->queue_, that.queue_);\n    std::swap(this->max_size_, that.max_size_);\n    return *this;\n  }\n\n  /**\n   * \\brief Cache a new DMatrix if it's not in the cache already.\n   *\n   *  Passing in a `shared_ptr` is critical here.  First to create a `weak_ptr` inside the\n   *  entry this shared pointer is necessary.  More importantly, the life time of this\n   *  cache is tied to the shared pointer.\n   *\n   * \\param m    shared pointer to the DMatrix that needs to be cached.\n   * \\param args The arguments for constructing a new cache item, if needed.\n   *\n   * \\return The cache entry for passed in DMatrix, either an existing cache or newly\n   *         created.\n   */\n  template <typename... Args>\n  std::shared_ptr<CacheT> CacheItem(std::shared_ptr<DMatrix> m, Args const&... args) {\n    CHECK(m);\n    std::lock_guard<std::mutex> guard{lock_};\n\n    this->ClearExpired();\n    if (container_.size() >= max_size_) {\n      this->ClearExcess();\n    }\n    // after clear, cache size < max_size\n    CHECK_LT(container_.size(), max_size_);\n    auto key = Key{m.get(), std::this_thread::get_id()};\n    auto it = container_.find(key);\n    if (it == container_.cend()) {\n      // after the new DMatrix, cache size is at most max_size\n      container_.emplace(key, Item{m, std::make_shared<CacheT>(args...)});\n      queue_.emplace(key);\n    }\n    return container_.at(key).value;\n  }\n  /**\n   * \\brief Re-initialize the item in cache.\n   *\n   *   Since the shared_ptr is used to hold the item, any reference that lives outside of\n   *   the cache can no-longer be reached from the cache.\n   *\n   *   We use reset instead of erase to avoid walking through the whole cache for renewing\n   *   a single item. (the cache is FIFO, needs to maintain the order).\n   */\n  template <typename... Args>\n  std::shared_ptr<CacheT> ResetItem(std::shared_ptr<DMatrix> m, Args const&... args) {\n    std::lock_guard<std::mutex> guard{lock_};\n    CheckConsistent();\n    auto key = Key{m.get(), std::this_thread::get_id()};\n    auto it = container_.find(key);\n    CHECK(it != container_.cend());\n    it->second = {m, std::make_shared<CacheT>(args...)};\n    CheckConsistent();\n    return it->second.value;\n  }\n  /**\n   * \\brief Get a const reference to the underlying hash map.  Clear expired caches before\n   *        returning.\n   */\n  decltype(container_) const& Container() {\n    std::lock_guard<std::mutex> guard{lock_};\n\n    this->ClearExpired();\n    return container_;\n  }\n\n  std::shared_ptr<CacheT> Entry(DMatrix const* m) const {\n    std::lock_guard<std::mutex> guard{lock_};\n    auto key = Key{m, std::this_thread::get_id()};\n    CHECK(container_.find(key) != container_.cend());\n    CHECK(!container_.at(key).ref.expired());\n    return container_.at(key).value;\n  }\n};\n}  // namespace xgboost\n#endif  // XGBOOST_CACHE_H_\n"
  },
  {
    "path": "include/xgboost/collective/poll_utils.h",
    "content": "/**\n *  Copyright 2014-2024, XGBoost Contributors\n * \\file socket.h\n * \\author Tianqi Chen\n */\n#pragma once\n#include <xgboost/collective/result.h>\n#include <xgboost/collective/socket.h>\n\n#if defined(_WIN32)\n#include <xgboost/windefs.h>\n// Socket API\n#include <winsock2.h>\n#include <ws2tcpip.h>\n#else\n\n#include <arpa/inet.h>\n#include <fcntl.h>\n#include <netdb.h>\n#include <netinet/in.h>\n#include <sys/ioctl.h>\n#include <sys/socket.h>\n#include <unistd.h>\n\n#include <cerrno>\n\n#endif  // defined(_WIN32)\n\n#include <chrono>\n#include <cstring>\n#include <string>\n#include <system_error>  // make_error_code, errc\n#include <unordered_map>\n#include <vector>\n\n#if !defined(_WIN32)\n\n#include <poll.h>\n\nusing SOCKET = int;\nusing sock_size_t = size_t;  // NOLINT\n#endif  // !defined(_WIN32)\n\n#define IS_MINGW() defined(__MINGW32__)\n\n#if IS_MINGW() && !defined(POLLRDNORM) && !defined(POLLRDBAND)\n/*\n * On later mingw versions poll should be supported (with bugs).  See:\n * https://stackoverflow.com/a/60623080\n *\n * But right now the mingw distributed with R 3.6 doesn't support it.\n * So we just give a warning and provide dummy implementation to get\n * compilation passed.  Otherwise we will have to provide a stub for\n * RABIT.\n *\n * Even on mingw version that has these structures and flags defined,\n * functions like `send` and `listen` might have unresolved linkage to\n * their implementation.  So supporting mingw is quite difficult at\n * the time of writing.\n */\n#pragma message(\"Distributed training on mingw is not supported.\")\ntypedef struct pollfd {\n  SOCKET fd;\n  short  events;  // NOLINT\n  short  revents;  // NOLINT\n} WSAPOLLFD, *PWSAPOLLFD, *LPWSAPOLLFD;\n\n// POLLRDNORM | POLLRDBAND\n#define POLLIN    (0x0100 | 0x0200)\n#define POLLPRI    0x0400\n// POLLWRNORM\n#define POLLOUT    0x0010\n\n#endif  // IS_MINGW() && !defined(POLLRDNORM) && !defined(POLLRDBAND)\n\nnamespace rabit {\nnamespace utils {\n\ntemplate <typename PollFD>\nint PollImpl(PollFD* pfd, int nfds, std::chrono::seconds timeout) noexcept(true) {\n  // For Windows and Linux, negative timeout means infinite timeout. For freebsd,\n  // INFTIM(-1) should be used instead.\n#if defined(_WIN32)\n\n#if IS_MINGW()\n  xgboost::MingWError();\n  return -1;\n#else\n  return WSAPoll(pfd, nfds, std::chrono::milliseconds(timeout).count());\n#endif  // IS_MINGW()\n\n#else\n  return poll(pfd, nfds, timeout.count() < 0 ? -1 : std::chrono::milliseconds(timeout).count());\n#endif  // IS_MINGW()\n}\n\ntemplate <typename E>\nstd::enable_if_t<std::is_integral_v<E>, xgboost::collective::Result> PollError(E const& revents) {\n  if ((revents & POLLERR) != 0) {\n    auto err = errno;\n    auto str = strerror(err);\n    return xgboost::system::FailWithCode(std::string{\"Poll error condition:\"} +  // NOLINT\n                                         std::string{str} +                      // NOLINT\n                                         \" code:\" + std::to_string(err));\n  }\n  if ((revents & POLLNVAL) != 0) {\n    return xgboost::system::FailWithCode(\"Invalid polling request.\");\n  }\n  if ((revents & POLLHUP) != 0) {\n    // Excerpt from the Linux manual:\n    //\n    // Note that when reading from a channel such as a pipe or a stream socket, this event\n    // merely indicates that the peer closed its end of the channel.Subsequent reads from\n    // the channel will return 0 (end of file) only after all outstanding data in the\n    // channel has been consumed.\n    //\n    // We don't usually have a barrier for exiting workers, it's normal to have one end\n    // exit while the other still reading data.\n    return xgboost::collective::Success();\n  }\n#if defined(POLLRDHUP)\n  // Linux only flag\n  if ((revents & POLLRDHUP) != 0) {\n    return xgboost::system::FailWithCode(\"Poll hung up on the other end.\");\n  }\n#endif  // defined(POLLRDHUP)\n  return xgboost::collective::Success();\n}\n\n/*! \\brief helper data structure to perform poll */\nstruct PollHelper {\n public:\n  /*!\n   * \\brief add file descriptor to watch for read\n   * \\param fd file descriptor to be watched\n   */\n  inline void WatchRead(SOCKET fd) {\n    auto& pfd = fds[fd];\n    pfd.fd = fd;\n    pfd.events |= POLLIN;\n  }\n  void WatchRead(xgboost::collective::TCPSocket const &socket) { this->WatchRead(socket.Handle()); }\n\n  /*!\n   * \\brief add file descriptor to watch for write\n   * \\param fd file descriptor to be watched\n   */\n  inline void WatchWrite(SOCKET fd) {\n    auto& pfd = fds[fd];\n    pfd.fd = fd;\n    pfd.events |= POLLOUT;\n  }\n  void WatchWrite(xgboost::collective::TCPSocket const &socket) {\n    this->WatchWrite(socket.Handle());\n  }\n\n  /*!\n   * \\brief add file descriptor to watch for exception\n   * \\param fd file descriptor to be watched\n   */\n  inline void WatchException(SOCKET fd) {\n    auto& pfd = fds[fd];\n    pfd.fd = fd;\n    pfd.events |= POLLPRI;\n  }\n  void WatchException(xgboost::collective::TCPSocket const &socket) {\n    this->WatchException(socket.Handle());\n  }\n  /*!\n   * \\brief Check if the descriptor is ready for read\n   * \\param fd file descriptor to check status\n   */\n  [[nodiscard]] bool CheckRead(SOCKET fd) const {\n    const auto& pfd = fds.find(fd);\n    return pfd != fds.end() && ((pfd->second.events & POLLIN) != 0);\n  }\n  [[nodiscard]] bool CheckRead(xgboost::collective::TCPSocket const& socket) const {\n    return this->CheckRead(socket.Handle());\n  }\n\n  /*!\n   * \\brief Check if the descriptor is ready for write\n   * \\param fd file descriptor to check status\n   */\n  [[nodiscard]] bool CheckWrite(SOCKET fd) const {\n    const auto& pfd = fds.find(fd);\n    return pfd != fds.end() && ((pfd->second.events & POLLOUT) != 0);\n  }\n  [[nodiscard]] bool CheckWrite(xgboost::collective::TCPSocket const& socket) const {\n    return this->CheckWrite(socket.Handle());\n  }\n  /**\n   * @brief perform poll on the set defined, read, write, exception\n   *\n   * @param timeout specify timeout in seconds. Block if negative.\n   */\n  [[nodiscard]] xgboost::collective::Result Poll(std::chrono::seconds timeout,\n                                                 bool check_error = true) {\n    std::vector<pollfd> fdset;\n    fdset.reserve(fds.size());\n    for (auto kv : fds) {\n      fdset.push_back(kv.second);\n    }\n    std::int32_t ret = PollImpl(fdset.data(), fdset.size(), timeout);\n    if (ret == 0) {\n      return xgboost::collective::Fail(\n          \"Poll timeout:\" + std::to_string(timeout.count()) + \" seconds.\",\n          std::make_error_code(std::errc::timed_out));\n    } else if (ret < 0) {\n      return xgboost::system::FailWithCode(\"Poll failed, nfds:\" + std::to_string(fdset.size()));\n    }\n\n    for (auto& pfd : fdset) {\n      auto result = PollError(pfd.revents);\n      if (check_error && !result.OK()) {\n        return result;\n      }\n\n      auto revents = pfd.revents & pfd.events;\n      fds[pfd.fd].events = revents;\n    }\n    return xgboost::collective::Success();\n  }\n\n  std::unordered_map<SOCKET, pollfd> fds;\n};\n}  // namespace utils\n}  // namespace rabit\n\n#if IS_MINGW() && !defined(POLLRDNORM) && !defined(POLLRDBAND)\n#undef POLLIN\n#undef POLLPRI\n#undef POLLOUT\n#endif  // IS_MINGW()\n"
  },
  {
    "path": "include/xgboost/collective/result.h",
    "content": "/**\n *  Copyright 2023-2024, XGBoost Contributors\n */\n#pragma once\n\n#include <cstdint>       // for int32_t\n#include <memory>        // for unique_ptr\n#include <string>        // for string\n#include <system_error>  // for error_code\n#include <utility>       // for move\n\nnamespace xgboost::collective {\nnamespace detail {\nstruct ResultImpl {\n  std::string message;\n  std::error_code errc{};  // optional for system error.\n\n  std::unique_ptr<ResultImpl> prev{nullptr};\n\n  ResultImpl() = delete;  // must initialize.\n  ResultImpl(ResultImpl const& that) = delete;\n  ResultImpl(ResultImpl&& that) = default;\n  ResultImpl& operator=(ResultImpl const& that) = delete;\n  ResultImpl& operator=(ResultImpl&& that) = default;\n\n  explicit ResultImpl(std::string msg) : message{std::move(msg)} {}\n  explicit ResultImpl(std::string msg, std::error_code errc)\n      : message{std::move(msg)}, errc{std::move(errc)} {}\n  explicit ResultImpl(std::string msg, std::unique_ptr<ResultImpl> prev)\n      : message{std::move(msg)}, prev{std::move(prev)} {}\n  explicit ResultImpl(std::string msg, std::error_code errc, std::unique_ptr<ResultImpl> prev)\n      : message{std::move(msg)}, errc{std::move(errc)}, prev{std::move(prev)} {}\n\n  [[nodiscard]] bool operator==(ResultImpl const& that) const noexcept(true) {\n    if ((prev && !that.prev) || (!prev && that.prev)) {\n      // one of them doesn't have prev\n      return false;\n    }\n\n    auto cur_eq = message == that.message && errc == that.errc;\n    if (prev && that.prev) {\n      // recursive comparison\n      auto prev_eq = *prev == *that.prev;\n      return cur_eq && prev_eq;\n    }\n    return cur_eq;\n  }\n\n  [[nodiscard]] std::string Report() const;\n  [[nodiscard]] std::error_code Code() const;\n\n  void Concat(std::unique_ptr<ResultImpl> rhs);\n};\n\n#if (!defined(__GNUC__) && !defined(__clang__)) || defined(__MINGW32__)\n#define __builtin_FILE() nullptr\n#define __builtin_LINE() (-1)\n#endif\n\nstd::string MakeMsg(std::string&& msg, char const* file, std::int32_t line);\n}  // namespace detail\n\n/**\n * @brief An error type that's easier to handle than throwing dmlc exception. We can\n *        record and propagate the system error code.\n */\nstruct Result {\n private:\n  std::unique_ptr<detail::ResultImpl> impl_{nullptr};\n\n public:\n  Result() noexcept(true) = default;\n  explicit Result(std::string msg) : impl_{std::make_unique<detail::ResultImpl>(std::move(msg))} {}\n  explicit Result(std::string msg, std::error_code errc)\n      : impl_{std::make_unique<detail::ResultImpl>(std::move(msg), std::move(errc))} {}\n  Result(std::string msg, Result&& prev)\n      : impl_{std::make_unique<detail::ResultImpl>(std::move(msg), std::move(prev.impl_))} {}\n  Result(std::string msg, std::error_code errc, Result&& prev)\n      : impl_{std::make_unique<detail::ResultImpl>(std::move(msg), std::move(errc),\n                                                   std::move(prev.impl_))} {}\n\n  Result(Result const& that) = delete;\n  Result& operator=(Result const& that) = delete;\n  Result(Result&& that) = default;\n  Result& operator=(Result&& that) = default;\n\n  [[nodiscard]] bool OK() const noexcept(true) { return !impl_; }\n  [[nodiscard]] std::string Report() const { return OK() ? \"\" : impl_->Report(); }\n  /**\n   * @brief Return the root system error. This might return success if there's no system error.\n   */\n  [[nodiscard]] auto Code() const { return OK() ? std::error_code{} : impl_->Code(); }\n  [[nodiscard]] bool operator==(Result const& that) const noexcept(true) {\n    if (OK() && that.OK()) {\n      return true;\n    }\n    if ((OK() && !that.OK()) || (!OK() && that.OK())) {\n      return false;\n    }\n    return *impl_ == *that.impl_;\n  }\n\n  friend Result operator+(Result&& lhs, Result&& rhs);\n};\n\n[[nodiscard]] inline Result operator+(Result&& lhs, Result&& rhs) {\n  if (lhs.OK()) {\n    return std::forward<Result>(rhs);\n  }\n  if (rhs.OK()) {\n    return std::forward<Result>(lhs);\n  }\n  lhs.impl_->Concat(std::move(rhs.impl_));\n  return std::forward<Result>(lhs);\n}\n\n/**\n * @brief Return success.\n */\n[[nodiscard]] inline auto Success() noexcept(true) { return Result{}; }\n/**\n * @brief Return failure.\n */\n[[nodiscard]] inline auto Fail(std::string msg, char const* file = __builtin_FILE(),\n                               std::int32_t line = __builtin_LINE()) {\n  return Result{detail::MakeMsg(std::move(msg), file, line)};\n}\n/**\n * @brief Return failure with `errno`.\n */\n[[nodiscard]] inline auto Fail(std::string msg, std::error_code errc,\n                               char const* file = __builtin_FILE(),\n                               std::int32_t line = __builtin_LINE()) {\n  return Result{detail::MakeMsg(std::move(msg), file, line), std::move(errc)};\n}\n/**\n * @brief Return failure with a previous error.\n */\n[[nodiscard]] inline auto Fail(std::string msg, Result&& prev, char const* file = __builtin_FILE(),\n                               std::int32_t line = __builtin_LINE()) {\n  return Result{detail::MakeMsg(std::move(msg), file, line), std::forward<Result>(prev)};\n}\n/**\n * @brief Return failure with a previous error and a new `errno`.\n */\n[[nodiscard]] inline auto Fail(std::string msg, std::error_code errc, Result&& prev,\n                               char const* file = __builtin_FILE(),\n                               std::int32_t line = __builtin_LINE()) {\n  return Result{detail::MakeMsg(std::move(msg), file, line), std::move(errc),\n                std::forward<Result>(prev)};\n}\n\n// We don't have monad, a simple helper would do.\ntemplate <typename Fn>\n[[nodiscard]] std::enable_if_t<std::is_invocable_v<Fn>, Result> operator<<(Result&& r, Fn&& fn) {\n  if (!r.OK()) {\n    return std::forward<Result>(r);\n  }\n  return fn();\n}\n\nvoid SafeColl(Result const& rc, char const* file = __builtin_FILE(),\n              std::int32_t line = __builtin_LINE());\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "include/xgboost/collective/socket.h",
    "content": "/**\n * Copyright 2022-2025, XGBoost Contributors\n */\n#pragma once\n\n#include <cerrno>        // errno, EINTR, EBADF\n#include <climits>       // HOST_NAME_MAX\n#include <cstddef>       // std::size_t\n#include <cstdint>       // std::int32_t, std::uint16_t\n#include <cstring>       // memset\n#include <string>        // std::string\n#include <system_error>  // std::error_code, std::system_category\n#include <utility>       // std::swap\n\n#if defined(__linux__)\n#include <sys/ioctl.h>  // for TIOCOUTQ, FIONREAD\n#endif                  // defined(__linux__)\n\n#if defined(_WIN32)\n// Guard the include.\n#include <xgboost/windefs.h>\n// Socket API\n#include <winsock2.h>\n#include <ws2tcpip.h>\n\nusing in_port_t = std::uint16_t;\n\n#ifdef _MSC_VER\n#pragma comment(lib, \"Ws2_32.lib\")\n#endif  // _MSC_VER\n\n#if !defined(xgboost_IS_MINGW)\nusing ssize_t = int;\n#endif  // !xgboost_IS_MINGW()\n\n#else  // UNIX\n\n#include <arpa/inet.h>    // inet_ntop\n#include <fcntl.h>        // fcntl, F_GETFL, O_NONBLOCK\n#include <netinet/in.h>   // sockaddr_in6, sockaddr_in, in_port_t, INET6_ADDRSTRLEN, INET_ADDRSTRLEN\n#include <netinet/in.h>   // IPPROTO_TCP\n#include <netinet/tcp.h>  // TCP_NODELAY\n#include <sys/socket.h>  // socket, SOL_SOCKET, SO_ERROR, MSG_WAITALL, recv, send, AF_INET6, AF_INET\n#include <unistd.h>      // close\n\n#if defined(__sun) || defined(sun)\n#include <sys/sockio.h>\n#endif                            // defined(__sun) || defined(sun)\n\n#endif                            // defined(_WIN32)\n\n#include \"xgboost/base.h\"               // XGBOOST_EXPECT\n#include \"xgboost/collective/result.h\"  // for Result\n#include \"xgboost/logging.h\"            // LOG\n#include \"xgboost/string_view.h\"        // StringView\n\n#if !defined(HOST_NAME_MAX)\n#define HOST_NAME_MAX 256  // macos\n#endif\n\nnamespace xgboost {\n\n#if defined(xgboost_IS_MINGW)\n// see the dummy implementation of `poll` in rabit for more info.\ninline void MingWError() { LOG(FATAL) << \"Distributed training on mingw is not supported.\"; }\n#endif  // defined(xgboost_IS_MINGW)\n\nnamespace system {\ninline std::int32_t LastError() {\n#if defined(_WIN32)\n  return WSAGetLastError();\n#else\n  int errsv = errno;\n  return errsv;\n#endif\n}\n\n[[nodiscard]] inline collective::Result FailWithCode(std::string msg) {\n  return collective::Fail(std::move(msg), std::error_code{LastError(), std::system_category()});\n}\n\n#if defined(__GLIBC__)\ninline auto ThrowAtError(StringView fn_name, std::int32_t errsv = LastError(),\n                         std::int32_t line = __builtin_LINE(),\n                         char const *file = __builtin_FILE()) {\n  auto err = std::error_code{errsv, std::system_category()};\n  LOG(FATAL) << \"\\n\"\n             << file << \"(\" << line << \"): Failed to call `\" << fn_name << \"`: \" << err.message()\n             << std::endl;\n}\n#else\ninline auto ThrowAtError(StringView fn_name, std::int32_t errsv = LastError()) {\n  auto err = std::error_code{errsv, std::system_category()};\n  LOG(FATAL) << \"Failed to call `\" << fn_name << \"`: \" << err.message() << std::endl;\n}\n#endif  // defined(__GLIBC__)\n\n#if defined(_WIN32)\nusing SocketT = SOCKET;\n#else\nusing SocketT = int;\n#define INVALID_SOCKET -1\n#endif  // defined(_WIN32)\n\n#if !defined(xgboost_CHECK_SYS_CALL)\n#define xgboost_CHECK_SYS_CALL(exp, expected)         \\\n  do {                                                \\\n    if (XGBOOST_EXPECT((exp) != (expected), false)) { \\\n      ::xgboost::system::ThrowAtError(#exp);          \\\n    }                                                 \\\n  } while (false)\n#endif  // !defined(xgboost_CHECK_SYS_CALL)\n\ninline std::int32_t CloseSocket(SocketT fd) {\n#if defined(_WIN32)\n  return closesocket(fd);\n#else\n  return close(fd);\n#endif\n}\n\ninline std::int32_t ShutdownSocket(SocketT fd) {\n#if defined(_WIN32)\n  auto rc = shutdown(fd, SD_BOTH);\n  if (rc != 0 && LastError() == WSANOTINITIALISED) {\n    return 0;\n  }\n#else\n  auto rc = shutdown(fd, SHUT_RDWR);\n  if (rc != 0 && LastError() == ENOTCONN) {\n    return 0;\n  }\n#endif\n  return rc;\n}\n\ninline bool ErrorWouldBlock(std::int32_t errsv) noexcept(true) {\n#ifdef _WIN32\n  return errsv == WSAEWOULDBLOCK;\n#else\n  return errsv == EAGAIN || errsv == EWOULDBLOCK || errsv == EINPROGRESS;\n#endif  // _WIN32\n}\n\ninline bool LastErrorWouldBlock() {\n  int errsv = LastError();\n  return ErrorWouldBlock(errsv);\n}\n\ninline void SocketStartup() {\n#if defined(_WIN32)\n  WSADATA wsa_data;\n  if (WSAStartup(MAKEWORD(2, 2), &wsa_data) == -1) {\n    ThrowAtError(\"WSAStartup\");\n  }\n  if (LOBYTE(wsa_data.wVersion) != 2 || HIBYTE(wsa_data.wVersion) != 2) {\n    WSACleanup();\n    LOG(FATAL) << \"Could not find a usable version of Winsock.dll\";\n  }\n#endif  // defined(_WIN32)\n}\n\ninline void SocketFinalize() {\n#if defined(_WIN32)\n  WSACleanup();\n#endif  // defined(_WIN32)\n}\n\n#if defined(_WIN32) && defined(xgboost_IS_MINGW)\n// dummy definition for old mysys32.\ninline const char *inet_ntop(int, const void *, char *, socklen_t) {  // NOLINT\n  MingWError();\n  return nullptr;\n}\n#else\nusing ::inet_ntop;\n#endif  // defined(_WIN32) && defined(xgboost_IS_MINGW)\n\n}  // namespace system\n\nnamespace collective {\nclass SockAddress;\n\nenum class SockDomain : std::int32_t { kV4 = AF_INET, kV6 = AF_INET6 };\n\n/**\n * \\brief Parse host address and return a SockAddress instance. Supports IPv4 and IPv6\n *        host.\n */\nSockAddress MakeSockAddress(StringView host, in_port_t port);\n\nclass SockAddrV6 {\n  sockaddr_in6 addr_;\n\n public:\n  explicit SockAddrV6(sockaddr_in6 addr) : addr_{addr} {}\n  SockAddrV6() { std::memset(&addr_, '\\0', sizeof(addr_)); }\n\n  static SockAddrV6 Loopback();\n  static SockAddrV6 InaddrAny();\n\n  in_port_t Port() const { return ntohs(addr_.sin6_port); }\n\n  std::string Addr() const {\n    char buf[INET6_ADDRSTRLEN];\n    auto const *s = system::inet_ntop(static_cast<std::int32_t>(SockDomain::kV6), &addr_.sin6_addr,\n                                      buf, INET6_ADDRSTRLEN);\n    if (s == nullptr) {\n      system::ThrowAtError(\"inet_ntop\");\n    }\n    return {buf};\n  }\n  sockaddr_in6 const &Handle() const { return addr_; }\n};\n\nclass SockAddrV4 {\n private:\n  sockaddr_in addr_;\n\n public:\n  explicit SockAddrV4(sockaddr_in addr) : addr_{addr} {}\n  SockAddrV4() { std::memset(&addr_, '\\0', sizeof(addr_)); }\n\n  static SockAddrV4 Loopback();\n  static SockAddrV4 InaddrAny();\n\n  [[nodiscard]] in_port_t Port() const { return ntohs(addr_.sin_port); }\n\n  [[nodiscard]] std::string Addr() const {\n    char buf[INET_ADDRSTRLEN];\n    auto const *s = system::inet_ntop(static_cast<std::int32_t>(SockDomain::kV4), &addr_.sin_addr,\n                                      buf, INET_ADDRSTRLEN);\n    if (s == nullptr) {\n      system::ThrowAtError(\"inet_ntop\");\n    }\n    return {buf};\n  }\n  [[nodiscard]] sockaddr_in const &Handle() const { return addr_; }\n};\n\n/**\n * \\brief Address for TCP socket, can be either IPv4 or IPv6.\n */\nclass SockAddress {\n private:\n  SockAddrV6 v6_;\n  SockAddrV4 v4_;\n  SockDomain domain_{SockDomain::kV4};\n\n public:\n  SockAddress() = default;\n  explicit SockAddress(SockAddrV6 const &addr) : v6_{addr}, domain_{SockDomain::kV6} {}\n  explicit SockAddress(SockAddrV4 const &addr) : v4_{addr} {}\n\n  [[nodiscard]] auto Domain() const { return domain_; }\n\n  [[nodiscard]] bool IsV4() const { return Domain() == SockDomain::kV4; }\n  [[nodiscard]] bool IsV6() const { return !IsV4(); }\n\n  [[nodiscard]] auto const &V4() const { return v4_; }\n  [[nodiscard]] auto const &V6() const { return v6_; }\n};\n\n/**\n * \\brief TCP socket for simple communication.\n */\nclass TCPSocket {\n public:\n  using HandleT = system::SocketT;\n\n private:\n  HandleT handle_{InvalidSocket()};\n  bool non_blocking_{false};\n  // There's reliable no way to extract domain from a socket without first binding that\n  // socket on macos.\n#if defined(__APPLE__)\n  SockDomain domain_{SockDomain::kV4};\n#endif\n\n  constexpr static HandleT InvalidSocket() { return INVALID_SOCKET; }\n\n  explicit TCPSocket(HandleT newfd) : handle_{newfd} {}\n\n public:\n  TCPSocket() = default;\n  /**\n   * \\brief Return the socket domain.\n   */\n  [[nodiscard]] auto Domain() const -> SockDomain {\n    auto ret_iafamily = [](std::int32_t domain) {\n      switch (domain) {\n        case AF_INET:\n          return SockDomain::kV4;\n        case AF_INET6:\n          return SockDomain::kV6;\n        default: {\n          LOG(FATAL) << \"Unknown IA family.\";\n        }\n      }\n      return SockDomain::kV4;\n    };\n\n#if defined(_WIN32)\n    WSAPROTOCOL_INFOW info;\n    socklen_t len = sizeof(info);\n    xgboost_CHECK_SYS_CALL(\n        getsockopt(handle_, SOL_SOCKET, SO_PROTOCOL_INFO, reinterpret_cast<char *>(&info), &len),\n        0);\n    return ret_iafamily(info.iAddressFamily);\n#elif defined(__APPLE__)\n    return domain_;\n#elif defined(__unix__)\n#ifndef __PASE__\n    std::int32_t domain;\n    socklen_t len = sizeof(domain);\n    xgboost_CHECK_SYS_CALL(\n        getsockopt(this->Handle(), SOL_SOCKET, SO_DOMAIN, reinterpret_cast<char *>(&domain), &len),\n        0);\n    return ret_iafamily(domain);\n#else\n    struct sockaddr sa;\n    socklen_t sizeofsa = sizeof(sa);\n    xgboost_CHECK_SYS_CALL(getsockname(handle_, &sa, &sizeofsa), 0);\n    if (sizeofsa < sizeof(uchar_t) * 2) {\n      return ret_iafamily(AF_INET);\n    }\n    return ret_iafamily(sa.sa_family);\n#endif  // __PASE__\n#else\n    LOG(FATAL) << \"Unknown platform.\";\n    return ret_iafamily(AF_INET);\n#endif  // platforms\n  }\n\n  [[nodiscard]] bool IsClosed() const { return handle_ == InvalidSocket(); }\n\n  /** @brief get last error code if any */\n  [[nodiscard]] Result GetSockError() const {\n    std::int32_t optval = 0;\n    socklen_t len = sizeof(optval);\n    auto ret = getsockopt(handle_, SOL_SOCKET, SO_ERROR, reinterpret_cast<char *>(&optval), &len);\n    if (ret != 0) {\n      auto errc = std::error_code{system::LastError(), std::system_category()};\n      return Fail(\"Failed to retrieve socket error.\", std::move(errc));\n    }\n    if (optval != 0) {\n      auto errc = std::error_code{optval, std::system_category()};\n      return Fail(\"Socket error.\", std::move(errc));\n    }\n    return Success();\n  }\n\n  /** \\brief check if anything bad happens */\n  [[nodiscard]] bool BadSocket() const {\n    if (IsClosed()) {\n      return true;\n    }\n    auto err = GetSockError();\n    if (err.Code() == std::error_code{EBADF, std::system_category()} ||  // NOLINT\n        err.Code() == std::error_code{EINTR, std::system_category()}) {  // NOLINT\n      return true;\n    }\n    return false;\n  }\n\n  [[nodiscard]] Result NonBlocking(bool non_block) {\n#if defined(_WIN32)\n    u_long mode = non_block ? 1 : 0;\n    if (ioctlsocket(handle_, FIONBIO, &mode) != NO_ERROR) {\n      return system::FailWithCode(\"Failed to set socket to non-blocking.\");\n    }\n#else\n    std::int32_t flag = fcntl(handle_, F_GETFL, 0);\n    auto rc = flag;\n    if (rc == -1) {\n      return system::FailWithCode(\"Failed to get socket flag.\");\n    }\n    if (non_block) {\n      flag |= O_NONBLOCK;\n    } else {\n      flag &= ~O_NONBLOCK;\n    }\n    rc = fcntl(handle_, F_SETFL, flag);\n    if (rc == -1) {\n      return system::FailWithCode(\"Failed to set socket to non-blocking.\");\n    }\n#endif  // _WIN32\n    non_blocking_ = non_block;\n    return Success();\n  }\n  [[nodiscard]] bool NonBlocking() const { return non_blocking_; }\n  [[nodiscard]] Result RecvTimeout(std::chrono::seconds timeout) {\n    // https://stackoverflow.com/questions/2876024/linux-is-there-a-read-or-recv-from-socket-with-timeout\n#if defined(_WIN32)\n    DWORD tv = timeout.count() * 1000;\n    auto rc =\n        setsockopt(Handle(), SOL_SOCKET, SO_RCVTIMEO, reinterpret_cast<char *>(&tv), sizeof(tv));\n#else\n    struct timeval tv;\n    tv.tv_sec = timeout.count();\n    tv.tv_usec = 0;\n    auto rc = setsockopt(Handle(), SOL_SOCKET, SO_RCVTIMEO, reinterpret_cast<char const *>(&tv),\n                         sizeof(tv));\n#endif\n    if (rc != 0) {\n      return system::FailWithCode(\"Failed to set timeout on recv.\");\n    }\n    return Success();\n  }\n\n  [[nodiscard]] Result SetBufSize(std::int32_t n_bytes) {\n    auto rc = setsockopt(this->Handle(), SOL_SOCKET, SO_SNDBUF, reinterpret_cast<char *>(&n_bytes),\n                         sizeof(n_bytes));\n    if (rc != 0) {\n      return system::FailWithCode(\"Failed to set send buffer size.\");\n    }\n    rc = setsockopt(this->Handle(), SOL_SOCKET, SO_RCVBUF, reinterpret_cast<char *>(&n_bytes),\n                    sizeof(n_bytes));\n    if (rc != 0) {\n      return system::FailWithCode(\"Failed to set recv buffer size.\");\n    }\n    return Success();\n  }\n\n  [[nodiscard]] Result SendBufSize(std::int32_t *n_bytes) {\n    socklen_t optlen;\n    auto rc = getsockopt(this->Handle(), SOL_SOCKET, SO_SNDBUF, reinterpret_cast<char *>(n_bytes),\n                         &optlen);\n    if (rc != 0 || optlen != sizeof(std::int32_t)) {\n      return system::FailWithCode(\"getsockopt\");\n    }\n    return Success();\n  }\n  [[nodiscard]] Result RecvBufSize(std::int32_t *n_bytes) {\n    socklen_t optlen;\n    auto rc = getsockopt(this->Handle(), SOL_SOCKET, SO_RCVBUF, reinterpret_cast<char *>(n_bytes),\n                         &optlen);\n    if (rc != 0 || optlen != sizeof(std::int32_t)) {\n      return system::FailWithCode(\"getsockopt\");\n    }\n    return Success();\n  }\n#if defined(__linux__)\n  [[nodiscard]] Result PendingSendSize(std::int32_t *n_bytes) const {\n    return ioctl(this->Handle(), TIOCOUTQ, n_bytes) == 0 ? Success()\n                                                         : system::FailWithCode(\"ioctl\");\n  }\n  [[nodiscard]] Result PendingRecvSize(std::int32_t *n_bytes) const {\n    return ioctl(this->Handle(), FIONREAD, n_bytes) == 0 ? Success()\n                                                         : system::FailWithCode(\"ioctl\");\n  }\n#endif  // defined(__linux__)\n\n  [[nodiscard]] Result SetKeepAlive() {\n    std::int32_t keepalive = 1;\n    auto rc = setsockopt(handle_, SOL_SOCKET, SO_KEEPALIVE, reinterpret_cast<char *>(&keepalive),\n                         sizeof(keepalive));\n    if (rc != 0) {\n      return system::FailWithCode(\"Failed to set TCP keeaplive.\");\n    }\n    return Success();\n  }\n\n  [[nodiscard]] Result SetNoDelay(std::int32_t no_delay = 1) {\n    auto rc = setsockopt(handle_, IPPROTO_TCP, TCP_NODELAY, reinterpret_cast<char *>(&no_delay),\n                         sizeof(no_delay));\n    if (rc != 0) {\n      return system::FailWithCode(\"Failed to set TCP no delay.\");\n    }\n    return Success();\n  }\n\n  /**\n   * \\brief Accept new connection, returns a new TCP socket for the new connection.\n   */\n  TCPSocket Accept() {\n    SockAddress addr;\n    TCPSocket newsock;\n    auto rc = this->Accept(&newsock, &addr);\n    SafeColl(rc);\n    return newsock;\n  }\n\n  [[nodiscard]] Result Accept(TCPSocket *out, SockAddress *addr) {\n#if defined(_WIN32)\n    auto interrupt = WSAEINTR;\n#else\n    auto interrupt = EINTR;\n#endif\n    if (this->Domain() == SockDomain::kV4) {\n      struct sockaddr_in caddr;\n      socklen_t caddr_len = sizeof(caddr);\n      HandleT newfd = accept(Handle(), reinterpret_cast<sockaddr *>(&caddr), &caddr_len);\n      if (newfd == InvalidSocket() && system::LastError() != interrupt) {\n        return system::FailWithCode(\"Failed to accept.\");\n      }\n      *addr = SockAddress{SockAddrV4{caddr}};\n      *out = TCPSocket{newfd};\n    } else {\n      struct sockaddr_in6 caddr;\n      socklen_t caddr_len = sizeof(caddr);\n      HandleT newfd = accept(Handle(), reinterpret_cast<sockaddr *>(&caddr), &caddr_len);\n      if (newfd == InvalidSocket() && system::LastError() != interrupt) {\n        return system::FailWithCode(\"Failed to accept.\");\n      }\n      *addr = SockAddress{SockAddrV6{caddr}};\n      *out = TCPSocket{newfd};\n    }\n    // On MacOS, this is automatically set to async socket if the parent socket is async\n    // We make sure all socket are blocking by default.\n    //\n    // On Windows, a closed socket is returned during shutdown. We guard against it when\n    // setting non-blocking.\n    if (!out->IsClosed()) {\n      return out->NonBlocking(false);\n    }\n    return Success();\n  }\n\n  ~TCPSocket() {\n    if (!IsClosed()) {\n      auto rc = this->Close();\n      if (!rc.OK()) {\n        LOG(WARNING) << rc.Report();\n      }\n    }\n  }\n\n  TCPSocket(TCPSocket const &that) = delete;\n  TCPSocket(TCPSocket &&that) noexcept(true) { std::swap(this->handle_, that.handle_); }\n  TCPSocket &operator=(TCPSocket const &that) = delete;\n  TCPSocket &operator=(TCPSocket &&that) noexcept(true) {\n    std::swap(this->handle_, that.handle_);\n    return *this;\n  }\n  /**\n   * @brief Return the native socket file descriptor.\n   */\n  [[nodiscard]] HandleT const &Handle() const { return handle_; }\n  /**\n   * @brief Listen to incoming requests. Should be called after bind.\n   *\n   *   Both the default and minimum backlog is set to 256.\n   */\n  [[nodiscard]] Result Listen(std::int32_t backlog = 256);\n  /**\n   * @brief Bind socket to INADDR_ANY, return the port selected by the OS.\n   */\n  [[nodiscard]] Result BindHost(std::int32_t* p_out) {\n    // Use int32 instead of in_port_t for consistency. We take port as parameter from\n    // users using other languages, the port is usually stored and passed around as int.\n    if (Domain() == SockDomain::kV6) {\n      auto addr = SockAddrV6::InaddrAny();\n      auto handle = reinterpret_cast<sockaddr const *>(&addr.Handle());\n      if (bind(handle_, handle, sizeof(std::remove_reference_t<decltype(addr.Handle())>)) != 0) {\n        return system::FailWithCode(\"bind failed.\");\n      }\n\n      sockaddr_in6 res_addr;\n      socklen_t addrlen = sizeof(res_addr);\n      if (getsockname(handle_, reinterpret_cast<sockaddr *>(&res_addr), &addrlen) != 0) {\n        return system::FailWithCode(\"getsockname failed.\");\n      }\n      *p_out = ntohs(res_addr.sin6_port);\n    } else {\n      auto addr = SockAddrV4::InaddrAny();\n      auto handle = reinterpret_cast<sockaddr const *>(&addr.Handle());\n      if (bind(handle_, handle, sizeof(std::remove_reference_t<decltype(addr.Handle())>)) != 0) {\n        return system::FailWithCode(\"bind failed.\");\n      }\n\n      sockaddr_in res_addr;\n      socklen_t addrlen = sizeof(res_addr);\n      if (getsockname(handle_, reinterpret_cast<sockaddr *>(&res_addr), &addrlen) != 0) {\n        return system::FailWithCode(\"getsockname failed.\");\n      }\n      *p_out = ntohs(res_addr.sin_port);\n    }\n\n    return Success();\n  }\n\n  [[nodiscard]] auto Port() const {\n    if (this->Domain() == SockDomain::kV4) {\n      sockaddr_in res_addr;\n      socklen_t addrlen = sizeof(res_addr);\n      auto code = getsockname(handle_, reinterpret_cast<sockaddr *>(&res_addr), &addrlen);\n      if (code != 0) {\n        return std::make_pair(system::FailWithCode(\"getsockname\"), std::int32_t{0});\n      }\n      return std::make_pair(Success(), std::int32_t{ntohs(res_addr.sin_port)});\n    } else {\n      sockaddr_in6 res_addr;\n      socklen_t addrlen = sizeof(res_addr);\n      auto code = getsockname(handle_, reinterpret_cast<sockaddr *>(&res_addr), &addrlen);\n      if (code != 0) {\n        return std::make_pair(system::FailWithCode(\"getsockname\"), std::int32_t{0});\n      }\n      return std::make_pair(Success(), std::int32_t{ntohs(res_addr.sin6_port)});\n    }\n  }\n  /**\n   * @brief Bind the socket to the address.\n   *\n   * @param ip[in]        The IP address.\n   * @param port [in,out] Let the system choose a port if this parameter is set to 0.\n   */\n  [[nodiscard]] Result Bind(StringView ip, std::int32_t *port) {\n    // bind socket handle_ to ip\n    auto addr = MakeSockAddress(ip, *port);\n    std::int32_t errc{0};\n    if (addr.IsV4()) {\n      auto handle = reinterpret_cast<sockaddr const *>(&addr.V4().Handle());\n      errc = bind(handle_, handle, sizeof(std::remove_reference_t<decltype(addr.V4().Handle())>));\n    } else {\n      auto handle = reinterpret_cast<sockaddr const *>(&addr.V6().Handle());\n      errc = bind(handle_, handle, sizeof(std::remove_reference_t<decltype(addr.V6().Handle())>));\n    }\n    if (errc != 0) {\n      return system::FailWithCode(\"Failed to bind socket.\");\n    }\n    auto [rc, new_port] = this->Port();\n    if (!rc.OK()) {\n      return std::move(rc);\n    }\n    if (*port == 0) {\n      *port = new_port;\n      return Success();\n    }\n    if (*port != new_port) {\n      return Fail(\"Got an invalid port from bind.\");\n    }\n    return Success();\n  }\n\n  /**\n   * @brief Send data, without error then all data should be sent.\n   */\n  [[nodiscard]] Result SendAll(void const *buf, std::size_t len, std::size_t *n_sent) {\n    char const *_buf = reinterpret_cast<const char *>(buf);\n    std::size_t &ndone = *n_sent;\n    ndone = 0;\n    while (ndone < len) {\n      ssize_t ret = send(handle_, _buf, len - ndone, 0);\n      if (ret == -1) {\n        if (system::LastErrorWouldBlock()) {\n          return Success();\n        }\n        return system::FailWithCode(\"send\");\n      }\n      _buf += ret;\n      ndone += ret;\n    }\n    return Success();\n  }\n  /**\n   * @brief Receive data, without error then all data should be received.\n   */\n  [[nodiscard]] Result RecvAll(void *buf, std::size_t len, std::size_t *n_recv) {\n    char *_buf = reinterpret_cast<char *>(buf);\n    std::size_t &ndone = *n_recv;\n    ndone = 0;\n    while (ndone < len) {\n      ssize_t ret = recv(handle_, _buf, len - ndone, MSG_WAITALL);\n      if (ret == -1) {\n        if (system::LastErrorWouldBlock()) {\n          return Success();\n        }\n        return system::FailWithCode(\"recv\");\n      }\n      if (ret == 0) {\n        return Success();\n      }\n      _buf += ret;\n      ndone += ret;\n    }\n    return Success();\n  }\n  /**\n   * \\brief Send data using the socket\n   * \\param buf the pointer to the buffer\n   * \\param len the size of the buffer\n   * \\param flags extra flags\n   * \\return size of data actually sent return -1 if error occurs\n   */\n  auto Send(const void *buf_, std::size_t len, std::int32_t flags = 0) {\n    const char *buf = reinterpret_cast<const char *>(buf_);\n    return send(handle_, buf, len, flags);\n  }\n  /**\n   * \\brief receive data using the socket\n   * \\param buf the pointer to the buffer\n   * \\param len the size of the buffer\n   * \\param flags extra flags\n   * \\return size of data actually received return -1 if error occurs\n   */\n  auto Recv(void *buf, std::size_t len, std::int32_t flags = 0) {\n    char *_buf = static_cast<char *>(buf);\n    // See https://github.com/llvm/llvm-project/issues/104241 for skipped tidy analysis\n    // NOLINTBEGIN(clang-analyzer-unix.BlockInCriticalSection)\n    return recv(handle_, _buf, len, flags);\n    // NOLINTEND(clang-analyzer-unix.BlockInCriticalSection)\n  }\n  /**\n   * \\brief Send string, format is matched with the Python socket wrapper in RABIT.\n   */\n  std::size_t Send(StringView str);\n  /**\n   * @brief Receive string, format is matched with the Python socket wrapper in RABIT.\n   */\n  [[nodiscard]] Result Recv(std::string *p_str);\n  /**\n   * @brief Close the socket, called automatically in destructor if the socket is not closed.\n   */\n  [[nodiscard]] Result Close() {\n    if (InvalidSocket() != handle_) {\n      auto rc = system::CloseSocket(handle_);\n#if defined(_WIN32)\n      // it's possible that we close TCP sockets after finalizing WSA due to detached thread.\n      if (rc != 0 && system::LastError() != WSANOTINITIALISED) {\n        return system::FailWithCode(\"Failed to close the socket.\");\n      }\n#else\n      if (rc != 0) {\n        return system::FailWithCode(\"Failed to close the socket.\");\n      }\n#endif\n      handle_ = InvalidSocket();\n    }\n    return Success();\n  }\n  /**\n   * @brief Call shutdown on the socket.\n   */\n  [[nodiscard]] Result Shutdown() {\n    if (this->IsClosed()) {\n      return Success();\n    }\n    auto rc = system::ShutdownSocket(this->Handle());\n#if defined(_WIN32)\n    // Windows cannot shutdown a socket if it's not connected.\n    if (rc == -1 && system::LastError() == WSAENOTCONN) {\n      return Success();\n    }\n#endif\n    if (rc != 0) {\n      return system::FailWithCode(\"Failed to shutdown socket.\");\n    }\n    return Success();\n  }\n\n  /**\n   * \\brief Create a TCP socket on specified domain.\n   */\n  static TCPSocket Create(SockDomain domain) {\n#if defined(xgboost_IS_MINGW)\n    MingWError();\n    return {};\n#else\n    auto fd = socket(static_cast<std::int32_t>(domain), SOCK_STREAM, 0);\n    if (fd == InvalidSocket()) {\n      system::ThrowAtError(\"socket\");\n    }\n\n    TCPSocket socket{fd};\n#if defined(__APPLE__)\n    socket.domain_ = domain;\n#endif  // defined(__APPLE__)\n    return socket;\n#endif  // defined(xgboost_IS_MINGW)\n  }\n\n  static TCPSocket *CreatePtr(SockDomain domain) {\n#if defined(xgboost_IS_MINGW)\n    MingWError();\n    return nullptr;\n#else\n    auto fd = socket(static_cast<std::int32_t>(domain), SOCK_STREAM, 0);\n    if (fd == InvalidSocket()) {\n      system::ThrowAtError(\"socket\");\n    }\n    auto socket = new TCPSocket{fd};\n\n#if defined(__APPLE__)\n    socket->domain_ = domain;\n#endif  // defined(__APPLE__)\n    return socket;\n#endif  // defined(xgboost_IS_MINGW)\n  }\n};\n\n/**\n * @brief Connect to remote address, returns the error code if failed.\n *\n * @param host   Host IP address.\n * @param port   Connection port.\n * @param retry  Number of retries to attempt.\n * @param timeout  Timeout of each connection attempt.\n * @param out_conn Output socket if the connection is successful. Value is invalid and undefined if\n *                 the connection failed.\n *\n * @return Connection status.\n */\n[[nodiscard]] Result Connect(xgboost::StringView host, std::int32_t port, std::int32_t retry,\n                             std::chrono::seconds timeout,\n                             xgboost::collective::TCPSocket *out_conn);\n\n/**\n * @brief Get the local host name.\n */\n[[nodiscard]] Result GetHostName(std::string *p_out);\n\n/**\n * @brief inet_ntop\n */\ntemplate <typename H>\nResult INetNToP(H const &host, std::string *p_out) {\n  std::string &ip = *p_out;\n  switch (host->h_addrtype) {\n    case AF_INET: {\n      auto addr = reinterpret_cast<struct in_addr *>(host->h_addr_list[0]);\n      char str[INET_ADDRSTRLEN];\n      inet_ntop(AF_INET, addr, str, INET_ADDRSTRLEN);\n      ip = str;\n      break;\n    }\n    case AF_INET6: {\n      auto addr = reinterpret_cast<struct in6_addr *>(host->h_addr_list[0]);\n      char str[INET6_ADDRSTRLEN];\n      inet_ntop(AF_INET6, addr, str, INET6_ADDRSTRLEN);\n      ip = str;\n      break;\n    }\n    default: {\n      return Fail(\"Invalid address type.\");\n    }\n  }\n  return Success();\n}\n}  // namespace collective\n}  // namespace xgboost\n\n#undef xgboost_CHECK_SYS_CALL\n"
  },
  {
    "path": "include/xgboost/context.h",
    "content": "/**\n * Copyright 2014-2026, XGBoost Contributors\n * \\file context.h\n */\n#ifndef XGBOOST_CONTEXT_H_\n#define XGBOOST_CONTEXT_H_\n\n#include <xgboost/base.h>       // for bst_d_ordinal_t\n#include <xgboost/logging.h>    // for CHECK_GE\n#include <xgboost/parameter.h>  // for XGBoostParameter\n\n#include <cstdint>      // for int16_t, int32_t, int64_t\n#include <memory>       // for shared_ptr\n#include <random>       // for mt19937\n#include <string>       // for string, to_string\n#include <type_traits>  // for invoke_result_t, is_same_v, underlying_type_t\n\nnamespace xgboost {\n\nclass Json;\nstruct CUDAContext;\n/**\n * @brief Define mt19937 as default type Random Engine.\n */\nusing RandomEngine = std::mt19937;\n\n// symbolic names\nstruct DeviceSym {\n  static auto constexpr CPU() { return \"cpu\"; }\n  static auto constexpr CUDA() { return \"cuda\"; }\n  static auto constexpr SyclDefault() { return \"sycl\"; }\n  static auto constexpr SyclCPU() { return \"sycl:cpu\"; }\n  static auto constexpr SyclGPU() { return \"sycl:gpu\"; }\n};\n\n/**\n * @brief A type for device ordinal. The type is packed into 32-bit for efficient use in\n *        viewing types like `linalg::TensorView`.\n */\nstruct DeviceOrd {\n  // Constant representing the device ID of CPU.\n  static bst_d_ordinal_t constexpr CPUOrdinal() { return -1; }\n  static bst_d_ordinal_t constexpr InvalidOrdinal() { return -2; }\n\n  enum Type : std::int16_t {\n    kCPU = 0,\n    kCUDA = 1,\n    kSyclDefault = 2,\n    kSyclCPU = 3,\n    kSyclGPU = 4\n  } device{kCPU};\n  // CUDA or Sycl device ordinal.\n  bst_d_ordinal_t ordinal{CPUOrdinal()};\n\n  [[nodiscard]] bool IsCUDA() const { return device == kCUDA; }\n  [[nodiscard]] bool IsCPU() const { return device == kCPU; }\n  [[nodiscard]] bool IsSyclDefault() const { return device == kSyclDefault; }\n  [[nodiscard]] bool IsSyclCPU() const { return device == kSyclCPU; }\n  [[nodiscard]] bool IsSyclGPU() const { return device == kSyclGPU; }\n  [[nodiscard]] bool IsSycl() const { return (IsSyclDefault() || IsSyclCPU() || IsSyclGPU()); }\n\n  constexpr DeviceOrd() = default;\n  constexpr DeviceOrd(Type type, bst_d_ordinal_t ord) : device{type}, ordinal{ord} {}\n\n  constexpr DeviceOrd(DeviceOrd const& that) = default;\n  constexpr DeviceOrd& operator=(DeviceOrd const& that) = default;\n  constexpr DeviceOrd(DeviceOrd&& that) = default;\n  constexpr DeviceOrd& operator=(DeviceOrd&& that) = default;\n\n  /**\n   * @brief Constructor for CPU.\n   */\n  [[nodiscard]] constexpr static auto CPU() { return DeviceOrd{kCPU, CPUOrdinal()}; }\n  /**\n   * @brief Constructor for CUDA device.\n   *\n   * @param ordinal CUDA device ordinal.\n   */\n  [[nodiscard]] static constexpr auto CUDA(bst_d_ordinal_t ordinal) {\n    return DeviceOrd{kCUDA, ordinal};\n  }\n  /**\n   * @brief Constructor for SYCL.\n   *\n   * @param ordinal SYCL device ordinal.\n   */\n  [[nodiscard]] constexpr static auto SyclDefault(bst_d_ordinal_t ordinal = -1) {\n    return DeviceOrd{kSyclDefault, ordinal};\n  }\n  /**\n   * @brief Constructor for SYCL CPU.\n   *\n   * @param ordinal SYCL CPU device ordinal.\n   */\n  [[nodiscard]] constexpr static auto SyclCPU(bst_d_ordinal_t ordinal = -1) {\n    return DeviceOrd{kSyclCPU, ordinal};\n  }\n\n  /**\n   * @brief Constructor for SYCL GPU.\n   *\n   * @param ordinal SYCL GPU device ordinal.\n   */\n  [[nodiscard]] constexpr static auto SyclGPU(bst_d_ordinal_t ordinal = -1) {\n    return DeviceOrd{kSyclGPU, ordinal};\n  }\n\n  [[nodiscard]] bool operator==(DeviceOrd const& that) const {\n    return device == that.device && ordinal == that.ordinal;\n  }\n  [[nodiscard]] bool operator!=(DeviceOrd const& that) const { return !(*this == that); }\n  /**\n   * @brief Get a string representation of the device and the ordinal.\n   */\n  [[nodiscard]] std::string Name() const {\n    switch (device) {\n      case DeviceOrd::kCPU:\n        return DeviceSym::CPU();\n      case DeviceOrd::kCUDA:\n        return DeviceSym::CUDA() + (':' + std::to_string(ordinal));\n      case DeviceOrd::kSyclDefault:\n        return DeviceSym::SyclDefault() + (':' + std::to_string(ordinal));\n      case DeviceOrd::kSyclCPU:\n        return DeviceSym::SyclCPU() + (':' + std::to_string(ordinal));\n      case DeviceOrd::kSyclGPU:\n        return DeviceSym::SyclGPU() + (':' + std::to_string(ordinal));\n      default: {\n        LOG(FATAL) << \"Unknown device.\";\n        return \"\";\n      }\n    }\n  }\n};\n\nstatic_assert(sizeof(DeviceOrd) == sizeof(std::int32_t));\n\nstd::ostream& operator<<(std::ostream& os, DeviceOrd ord);\n\n/**\n * @brief Runtime context for XGBoost. Contains information like threads and device.\n */\nstruct Context : public XGBoostParameter<Context> {\n private:\n  // User interfacing parameter for device ordinal\n  std::string device{DeviceSym::CPU()};  // NOLINT\n  // The device ordinal set by user\n  DeviceOrd device_{DeviceOrd::CPU()};\n\n public:\n  static std::int64_t constexpr kDefaultSeed = 0;\n\n public:\n  Context();\n\n  void Init(Args const& kwargs);\n\n  template <typename Container>\n  Args UpdateAllowUnknown(Container const& kwargs) {\n    auto args = XGBoostParameter<Context>::UpdateAllowUnknown(kwargs);\n    this->SetDeviceOrdinal(kwargs);\n    return args;\n  }\n\n  // The number of threads to use if OpenMP is enabled. If equals 0, use the system default.\n  std::int32_t nthread{0};  // NOLINT\n  // stored random seed\n  std::int64_t seed{kDefaultSeed};\n  // whether seed the PRNG each iteration\n  bool seed_per_iteration{false};\n  // fail when gpu_id is invalid\n  bool fail_on_invalid_gpu_id{false};\n  bool validate_parameters{false};\n\n  /**\n   * @brief Returns the automatically chosen number of threads based on the `nthread`\n   *        parameter and the system settting.\n   */\n  [[nodiscard]] std::int32_t Threads() const;\n  /**\n   * @brief Is XGBoost running on CPU?\n   */\n  [[nodiscard]] bool IsCPU() const { return Device().IsCPU(); }\n  /**\n   * @brief Is XGBoost running on a CUDA device?\n   */\n  [[nodiscard]] bool IsCUDA() const { return Device().IsCUDA(); }\n  /**\n   * @brief Is XGBoost running on the default SYCL device?\n   */\n  [[nodiscard]] bool IsSyclDefault() const { return Device().IsSyclDefault(); }\n  /**\n   * @brief Is XGBoost running on a SYCL CPU?\n   */\n  [[nodiscard]] bool IsSyclCPU() const { return Device().IsSyclCPU(); }\n  /**\n   * @brief Is XGBoost running on a SYCL GPU?\n   */\n  [[nodiscard]] bool IsSyclGPU() const { return Device().IsSyclGPU(); }\n  /**\n   * @brief Is XGBoost running on any SYCL device?\n   */\n  [[nodiscard]] bool IsSycl() const { return IsSyclDefault() || IsSyclCPU() || IsSyclGPU(); }\n\n  /**\n   * @brief Get the current device and ordinal.\n   */\n  [[nodiscard]] DeviceOrd Device() const { return device_; }\n\n  /**\n   * @brief Get the current device and ordinal, if it supports fp64,\n            otherwise returns default CPU\n   */\n  [[nodiscard]] DeviceOrd DeviceFP64() const;\n\n  /**\n   * @brief Get the CUDA device ordinal. -1 if XGBoost is running on CPU.\n   */\n  [[nodiscard]] bst_d_ordinal_t Ordinal() const { return Device().ordinal; }\n  /**\n   * @brief Name of the current device.\n   */\n  [[nodiscard]] std::string DeviceName() const { return Device().Name(); }\n  /**\n   * @brief Get a CUDA device context for allocator and stream.\n   */\n  [[nodiscard]] CUDAContext const* CUDACtx() const;\n  /**\n   * @brief Get the random engine.\n   */\n  [[nodiscard]] RandomEngine& Rng() const { return rng_; }\n\n  [[nodiscard]] Json ToJson() const;\n  void FromJson(Json const& in);\n\n  /**\n   * @brief Make a CUDA context based on the current context.\n   *\n   * @param ordinal The CUDA device ordinal.\n   */\n  [[nodiscard]] Context MakeCUDA(bst_d_ordinal_t ordinal = 0) const {\n    Context ctx = *this;\n    return ctx.SetDevice(DeviceOrd::CUDA(ordinal));\n  }\n  /**\n   * @brief Make a CPU context based on the current context.\n   */\n  [[nodiscard]] Context MakeCPU() const {\n    Context ctx = *this;\n    return ctx.SetDevice(DeviceOrd::CPU());\n  }\n\n  /**\n   * @brief Call function based on the current device.\n   */\n  template <typename CPUFn, typename CUDAFn>\n  decltype(auto) DispatchDevice(CPUFn&& cpu_fn, CUDAFn&& cuda_fn) const {\n    static_assert(std::is_same_v<std::invoke_result_t<CPUFn>, std::invoke_result_t<CUDAFn>>);\n    switch (this->Device().device) {\n      case DeviceOrd::kCPU:\n        return cpu_fn();\n      case DeviceOrd::kCUDA:\n        return cuda_fn();\n      default:\n        // Do not use the device name as this is likely an internal error, the name\n        // wouldn't be valid.\n        if (this->Device().IsSycl()) {\n          LOG(WARNING) << \"The requested feature doesn't have SYCL specific implementation yet. \"\n                       << \"CPU implementation is used\";\n          return cpu_fn();\n        } else {\n          LOG(FATAL) << \"Unknown device type:\"\n                     << static_cast<std::underlying_type_t<DeviceOrd::Type>>(this->Device().device);\n          break;\n        }\n    }\n    return std::invoke_result_t<CPUFn>();\n  }\n\n  /**\n   * @brief Call function for sycl devices\n   */\n  template <typename CPUFn, typename CUDAFn, typename SYCLFn>\n  decltype(auto) DispatchDevice(CPUFn&& cpu_fn, CUDAFn&& cuda_fn, SYCLFn&& sycl_fn) const {\n    static_assert(std::is_same_v<std::invoke_result_t<CPUFn>, std::invoke_result_t<SYCLFn>>);\n    if (this->Device().IsSycl()) {\n      return sycl_fn();\n    } else {\n      return DispatchDevice(cpu_fn, cuda_fn);\n    }\n  }\n\n  // declare parameters\n  DMLC_DECLARE_PARAMETER(Context) {\n    DMLC_DECLARE_FIELD(seed)\n        .set_default(kDefaultSeed)\n        .describe(\"Random number seed during training.\");\n    DMLC_DECLARE_ALIAS(seed, random_state);\n    DMLC_DECLARE_FIELD(seed_per_iteration)\n        .set_default(false)\n        .describe(\"Seed PRNG determnisticly via iterator number.\");\n    DMLC_DECLARE_FIELD(device).set_default(DeviceSym::CPU()).describe(\"Device ordinal.\");\n    DMLC_DECLARE_FIELD(nthread).set_default(0).describe(\"Number of threads to use.\");\n    DMLC_DECLARE_ALIAS(nthread, n_jobs);\n    DMLC_DECLARE_FIELD(fail_on_invalid_gpu_id)\n        .set_default(false)\n        .describe(\"Fail with error when gpu_id is invalid.\");\n    DMLC_DECLARE_FIELD(validate_parameters)\n        .set_default(false)\n        .describe(\"Enable checking whether parameters are used or not.\");\n  }\n\n private:\n  void SetDeviceOrdinal(Args const& kwargs);\n  Context& SetDevice(DeviceOrd d) {\n    this->device = (this->device_ = d).Name();\n    return *this;\n  }\n\n  // mutable for lazy cuda context initialization. This avoids initializing CUDA at load.\n  // shared_ptr is used instead of unique_ptr as with unique_ptr it's difficult to define\n  // p_impl while trying to hide CUDA code from the host compiler.\n  mutable std::shared_ptr<CUDAContext> cuctx_;\n  mutable RandomEngine rng_;\n  // cached value for CFS CPU limit. (used in containerized env)\n  std::int32_t cfs_cpu_count_;  // NOLINT\n};\n}  // namespace xgboost\n\n#endif  // XGBOOST_CONTEXT_H_\n"
  },
  {
    "path": "include/xgboost/data.h",
    "content": "/**\n * Copyright 2015-2025, XGBoost Contributors\n * \\file data.h\n * \\brief The input data structure of xgboost.\n * \\author Tianqi Chen\n */\n#ifndef XGBOOST_DATA_H_\n#define XGBOOST_DATA_H_\n\n#include <dmlc/base.h>\n#include <dmlc/io.h>          // for Stream\n#include <dmlc/serializer.h>  // for Handler\n#include <xgboost/base.h>\n#include <xgboost/host_device_vector.h>\n#include <xgboost/linalg.h>\n#include <xgboost/span.h>\n#include <xgboost/string_view.h>\n\n#include <algorithm>\n#include <array>    // for array\n#include <cstdint>  // for int32_t, uint8_t\n#include <limits>\n#include <memory>\n#include <string>\n#include <utility>\n#include <vector>\n\nnamespace xgboost {\n// forward declare dmatrix.\nclass DMatrix;\nstruct Context;\n\n/*! \\brief data type accepted by xgboost interface */\nenum class DataType : uint8_t { kFloat32 = 1, kDouble = 2, kUInt32 = 3, kUInt64 = 4, kStr = 5 };\n\nenum class FeatureType : uint8_t { kNumerical = 0, kCategorical = 1 };\n\nenum class DataSplitMode : int { kRow = 0, kCol = 1 };\n\n// Forward declaration of the container used by the meta info.\nclass CatContainer;\n\n/** @brief Used as a reference to a linalg::Matrix, or a vector */\nstruct TypedArrayRef {\n  using SizeType = linalg::VectorView<float>::SizeType;\n  // 2-dim is the maximum for return type, we can use larger ones if needed.\n  using Shape = std::array<SizeType, 2>;\n\n  DataType dtype{DataType::kFloat32};\n  Shape shape{0, 0};\n  SizeType ndim{0};\n  void const* data{nullptr};\n\n  [[nodiscard]] std::string ArrayInterfaceStr() const;\n  [[nodiscard]] SizeType Size() const {\n    if (ndim == 1) {\n      return shape[0];\n    } else {\n      return shape[0] * shape[1];\n    }\n  }\n};\n\n/**\n * @brief Meta information about dataset, always sit in memory.\n */\nclass MetaInfo {\n public:\n  /*! \\brief number of data fields in MetaInfo */\n  static constexpr uint64_t kNumField = 13;\n\n  /*! \\brief number of rows in the data */\n  bst_idx_t num_row_{0};  // NOLINT\n  /*! \\brief number of columns in the data */\n  uint64_t num_col_{0};  // NOLINT\n  /*! \\brief number of nonzero entries in the data */\n  uint64_t num_nonzero_{0};  // NOLINT\n  /*! \\brief label of each instance */\n  linalg::Tensor<float, 2> labels;\n  /*! \\brief data split mode */\n  DataSplitMode data_split_mode{DataSplitMode::kRow};\n  /*!\n   * \\brief the index of begin and end of a group\n   *  needed when the learning task is ranking.\n   */\n  std::vector<bst_group_t> group_ptr_;  // NOLINT\n  /*! \\brief weights of each instance, optional */\n  HostDeviceVector<bst_float> weights_;  // NOLINT\n  /*!\n   * \\brief initialized margins,\n   * if specified, xgboost will start from this init margin\n   * can be used to specify initial prediction to boost from.\n   */\n  linalg::Matrix<float> base_margin_;  // NOLINT\n  /*!\n   * \\brief lower bound of the label, to be used for survival analysis (censored regression)\n   */\n  HostDeviceVector<bst_float> labels_lower_bound_;  // NOLINT\n  /*!\n   * \\brief upper bound of the label, to be used for survival analysis (censored regression)\n   */\n  HostDeviceVector<bst_float> labels_upper_bound_;  // NOLINT\n\n  /*!\n   * \\brief Name of type for each feature provided by users. Eg. \"int\"/\"float\"/\"i\"/\"q\"\n   */\n  std::vector<std::string> feature_type_names;\n  /*!\n   * \\brief Name for each feature.\n   */\n  std::vector<std::string> feature_names;\n  /*\n   * \\brief Type of each feature.  Automatically set when feature_type_names is specifed.\n   */\n  HostDeviceVector<FeatureType> feature_types;\n  /*\n   * \\brief Weight of each feature, used to define the probability of each feature being\n   *        selected when using column sampling.\n   */\n  HostDeviceVector<float> feature_weights;\n\n  MetaInfo();\n  MetaInfo(MetaInfo&& that) = default;\n  MetaInfo(MetaInfo const& that) = delete;\n  MetaInfo& operator=(MetaInfo&& that) = default;\n  MetaInfo& operator=(MetaInfo const& that) = delete;\n\n  /**\n   * @brief Validate all metainfo.\n   */\n  void Validate(DeviceOrd device) const;\n  /**\n   * @brief Slice the meta info.\n   *\n   * The device of ridxs is specified by the ctx object.\n   *\n   * @param ridxs Index of selected rows.\n   * @param nnz   The number of non-missing values.\n   */\n  MetaInfo Slice(Context const* ctx, common::Span<bst_idx_t const> ridxs, bst_idx_t nnz) const;\n\n  MetaInfo Copy() const;\n  /**\n   * @brief Whether the matrix is dense.\n   */\n  bool IsDense() const { return num_col_ * num_row_ == num_nonzero_; }\n  /*!\n   * \\brief Get weight of each instances.\n   * \\param i Instance index.\n   * \\return The weight.\n   */\n  inline bst_float GetWeight(size_t i) const {\n    return weights_.Size() != 0 ? weights_.HostVector()[i] : 1.0f;\n  }\n  /*! \\brief get sorted indexes (argsort) of labels by absolute value (used by cox loss) */\n  const std::vector<size_t>& LabelAbsSort(Context const* ctx) const;\n  /*! \\brief clear all the information */\n  void Clear();\n  /*!\n   * \\brief Load the Meta info from binary stream.\n   * \\param fi The input stream\n   */\n  void LoadBinary(dmlc::Stream* fi);\n  /*!\n   * \\brief Save the Meta info to binary stream\n   * \\param fo The output stream.\n   */\n  void SaveBinary(dmlc::Stream* fo) const;\n  /**\n   * @brief Set information in the meta info with array interface.\n   *\n   * @param key The key of the information.\n   * @param in_array String representation of json format array interface.\n   */\n  void SetInfo(Context const& ctx, StringView key, StringView in_array);\n  /** @brief Return an array reference for a meta info. */\n  [[nodiscard]] TypedArrayRef GetInfo(Context const* ctx, StringView key) const;\n\n  void SetFeatureInfo(const char* key, const char** info, const bst_ulong size);\n  void GetFeatureInfo(const char* field, std::vector<std::string>* out_str_vecs) const;\n\n  /**\n   * @brief Extend with other MetaInfo.\n   *\n   * @param that The other MetaInfo object.\n   *\n   * @param accumulate_rows Whether rows need to be accumulated in this function.  If\n   *                        client code knows number of rows in advance, set this\n   *                        parameter to false.\n   * @param check_column Whether the extend method should check the consistency of\n   *                     columns.\n   */\n  void Extend(MetaInfo const& that, bool accumulate_rows, bool check_column);\n  /**\n   * @brief Synchronize the number of columns across all workers.\n   *\n   * Normally we just need to find the maximum number of columns across all workers, but\n   * in vertical federated learning, since each worker loads its own list of columns,\n   * we need to sum them.\n   */\n  void SynchronizeNumberOfColumns(Context const* ctx, DataSplitMode split_mode);\n\n  /** @brief Whether the data is split row-wise. */\n  [[nodiscard]] bool IsRowSplit() const { return data_split_mode == DataSplitMode::kRow; }\n  /** @brief Whether the data is split column-wise. */\n  [[nodiscard]] bool IsColumnSplit() const { return data_split_mode == DataSplitMode::kCol; }\n  /** @brief Whether this is a learning to rank data. */\n  [[nodiscard]] bool IsRanking() const { return !group_ptr_.empty(); }\n\n  /**\n   * @brief A convenient method to check if we are doing vertical federated learning, which requires\n   * some special processing.\n   */\n  [[nodiscard]] bool IsVerticalFederated() const;\n\n  /*!\n   * \\brief A convenient method to check if the MetaInfo should contain labels.\n   *\n   * Normally we assume labels are available everywhere. The only exception is in vertical federated\n   * learning where labels are only available on worker 0.\n   */\n  bool ShouldHaveLabels() const;\n  /**\n   * @brief Flag for whether the DMatrix has categorical features.\n   */\n  bool HasCategorical() const { return has_categorical_; }\n  /**\n   * @brief Getters for categories.\n   */\n  [[nodiscard]] CatContainer const* Cats() const;\n  [[nodiscard]] CatContainer* Cats();\n  [[nodiscard]] std::shared_ptr<CatContainer const> CatsShared() const;\n  /**\n   * @brief Setter for categories.\n   */\n  void Cats(std::shared_ptr<CatContainer> cats);\n\n private:\n  void SetInfoFromHost(Context const* ctx, StringView key, Json arr);\n  void SetInfoFromCUDA(Context const* ctx, StringView key, Json arr);\n\n  /*! \\brief argsort of labels */\n  mutable std::vector<size_t> label_order_cache_;\n  bool has_categorical_{false};\n\n  std::shared_ptr<CatContainer> cats_;\n};\n\n/*! \\brief Element from a sparse vector */\nstruct Entry {\n  /*! \\brief feature index */\n  bst_feature_t index;\n  /*! \\brief feature value */\n  bst_float fvalue;\n  /*! \\brief default constructor */\n  Entry() = default;\n  /*!\n   * \\brief constructor with index and value\n   * \\param index The feature or row index.\n   * \\param fvalue The feature value.\n   */\n  XGBOOST_DEVICE Entry(bst_feature_t index, bst_float fvalue) : index(index), fvalue(fvalue) {}\n  /*! \\brief reversely compare feature values */\n  inline static bool CmpValue(const Entry& a, const Entry& b) { return a.fvalue < b.fvalue; }\n  static bool CmpIndex(Entry const& a, Entry const& b) { return a.index < b.index; }\n  inline bool operator==(const Entry& other) const {\n    return (this->index == other.index && this->fvalue == other.fvalue);\n  }\n};\n\n/**\n * @brief Parameters for constructing histogram index batches.\n */\nstruct BatchParam {\n  /**\n   * @brief Maximum number of bins per feature for histograms.\n   */\n  bst_bin_t max_bin{0};\n  /**\n   * @brief Hessian, used for sketching with future approx implementation.\n   */\n  common::Span<float const> hess;\n  /**\n   * @brief Whether should we force DMatrix to regenerate the batch.  Only used for\n   *        GHistIndex.\n   */\n  bool regen{false};\n  /**\n   * @brief Forbid regenerating the gradient index. Used for internal validation.\n   */\n  bool forbid_regen{false};\n  /**\n   * @brief Parameter used to generate column matrix for hist.\n   */\n  double sparse_thresh{std::numeric_limits<double>::quiet_NaN()};\n  /**\n   * @brief Used for GPU external memory. Whether to copy the data into device.\n   *\n   * This affects only the current round of iteration.\n   */\n  bool prefetch_copy{true};\n  /**\n   * @brief The number of batches to pre-fetch for external memory.\n   */\n  std::int32_t n_prefetch_batches{3};\n  /**\n   * @brief Exact or others that don't need histogram.\n   */\n  BatchParam() = default;\n  /**\n   * @brief Used by the hist tree method.\n   */\n  BatchParam(bst_bin_t max_bin, double sparse_thresh)\n      : max_bin{max_bin}, sparse_thresh{sparse_thresh} {}\n  /**\n   * @brief Used by the approx tree method.\n   *\n   *   Get batch with sketch weighted by hessian.  The batch will be regenerated if the\n   *   span is changed, so caller should keep the span for each iteration.\n   */\n  BatchParam(bst_bin_t max_bin, common::Span<float const> hessian, bool regenerate)\n      : max_bin{max_bin}, hess{hessian}, regen{regenerate} {}\n\n  [[nodiscard]] bool ParamNotEqual(BatchParam const& other) const {\n    // Check non-floating parameters.\n    bool cond = max_bin != other.max_bin;\n    // Check sparse thresh.\n    bool l_nan = std::isnan(sparse_thresh);\n    bool r_nan = std::isnan(other.sparse_thresh);\n    bool st_chg = (l_nan != r_nan) || (!l_nan && !r_nan && (sparse_thresh != other.sparse_thresh));\n    cond |= st_chg;\n\n    return cond;\n  }\n  [[nodiscard]] bool Initialized() const { return max_bin != 0; }\n  /**\n   * @brief Make a copy of self for DMatrix to describe how its existing index was generated.\n   */\n  [[nodiscard]] BatchParam MakeCache() const {\n    auto p = *this;\n    // These parameters have nothing to do with how the gradient index was generated in the\n    // first place.\n    p.regen = false;\n    p.forbid_regen = false;\n    return p;\n  }\n};\n\nstruct HostSparsePageView {\n  using Inst = common::Span<Entry const>;\n\n  common::Span<bst_idx_t const> offset;\n  common::Span<Entry const> data;\n\n  [[nodiscard]] Inst operator[](std::size_t i) const {\n    auto size = *(offset.data() + i + 1) - *(offset.data() + i);\n    return {data.data() + *(offset.data() + i), static_cast<Inst::index_type>(size)};\n  }\n\n  [[nodiscard]] size_t Size() const { return offset.size() == 0 ? 0 : offset.size() - 1; }\n};\n\n/*!\n * \\brief In-memory storage unit of sparse batch, stored in CSR format.\n */\nclass SparsePage {\n public:\n  // Offset for each row.\n  HostDeviceVector<bst_idx_t> offset;\n  /*! \\brief the data of the segments */\n  HostDeviceVector<Entry> data;\n\n  size_t base_rowid{0};\n\n  /*! \\brief an instance of sparse vector in the batch */\n  using Inst = common::Span<Entry const>;\n\n  [[nodiscard]] HostSparsePageView GetView() const {\n    return {offset.ConstHostSpan(), data.ConstHostSpan()};\n  }\n\n  /*! \\brief constructor */\n  SparsePage() { this->Clear(); }\n\n  SparsePage(SparsePage const& that) = delete;\n  SparsePage(SparsePage&& that) = default;\n  SparsePage& operator=(SparsePage const& that) = delete;\n  SparsePage& operator=(SparsePage&& that) = default;\n  virtual ~SparsePage() = default;\n\n  /*! \\return Number of instances in the page. */\n  [[nodiscard]] size_t Size() const { return offset.Size() == 0 ? 0 : offset.Size() - 1; }\n\n  /*! \\return estimation of memory cost of this page */\n  [[nodiscard]] size_t MemCostBytes() const {\n    return offset.Size() * sizeof(size_t) + data.Size() * sizeof(Entry);\n  }\n\n  /*! \\brief clear the page */\n  inline void Clear() {\n    base_rowid = 0;\n    auto& offset_vec = offset.HostVector();\n    offset_vec.clear();\n    offset_vec.push_back(0);\n    data.HostVector().clear();\n  }\n\n  /*! \\brief Set the base row id for this page. */\n  inline void SetBaseRowId(size_t row_id) { base_rowid = row_id; }\n\n  [[nodiscard]] SparsePage GetTranspose(int num_columns, int32_t n_threads) const;\n\n  /**\n   * \\brief Sort the column index.\n   */\n  void SortIndices(int32_t n_threads);\n  /**\n   * \\brief Check wether the column index is sorted.\n   */\n  [[nodiscard]] bool IsIndicesSorted(int32_t n_threads) const;\n  /**\n   * \\brief Reindex the column index with an offset.\n   */\n  void Reindex(uint64_t feature_offset, int32_t n_threads);\n\n  void SortRows(int32_t n_threads);\n\n  /**\n   * \\brief Pushes external data batch onto this page\n   *\n   * \\tparam  AdapterBatchT\n   * \\param batch\n   * \\param missing\n   * \\param nthread\n   *\n   * \\return  The maximum number of columns encountered in this input batch. Useful when pushing many adapter batches to work out the total number of columns.\n   */\n  template <typename AdapterBatchT>\n  bst_idx_t Push(AdapterBatchT const& batch, float missing, std::int32_t nthread);\n\n  /*!\n   * \\brief Push a sparse page\n   * \\param batch the row page\n   */\n  void Push(const SparsePage& batch);\n  /*!\n   * \\brief Push a SparsePage stored in CSC format\n   * \\param batch The row batch to be pushed\n   */\n  void PushCSC(const SparsePage& batch);\n};\n\nclass CSCPage : public SparsePage {\n public:\n  CSCPage() : SparsePage() {}\n  explicit CSCPage(SparsePage page) : SparsePage(std::move(page)) {}\n};\n\n/**\n * \\brief Sparse page for exporting DMatrix. Same as SparsePage, just a different type to\n *        prevent being used internally.\n */\nclass ExtSparsePage {\n public:\n  std::shared_ptr<SparsePage const> page;\n  explicit ExtSparsePage(std::shared_ptr<SparsePage const> p) : page{std::move(p)} {}\n};\n\nclass SortedCSCPage : public SparsePage {\n public:\n  SortedCSCPage() : SparsePage() {}\n  explicit SortedCSCPage(SparsePage page) : SparsePage(std::move(page)) {}\n};\n\nclass EllpackPage;\nclass GHistIndexMatrix;\n\ntemplate <typename T>\nclass BatchIteratorImpl {\n public:\n  using iterator_category = std::forward_iterator_tag;  // NOLINT\n  virtual ~BatchIteratorImpl() = default;\n  virtual const T& operator*() const = 0;\n  virtual BatchIteratorImpl& operator++() = 0;\n  [[nodiscard]] virtual bool AtEnd() const = 0;\n  virtual std::shared_ptr<T const> Page() const = 0;\n};\n\ntemplate <typename T>\nclass BatchIterator {\n public:\n  using iterator_category = std::forward_iterator_tag;  // NOLINT\n  explicit BatchIterator(BatchIteratorImpl<T>* impl) { impl_.reset(impl); }\n  explicit BatchIterator(std::shared_ptr<BatchIteratorImpl<T>> impl) { impl_ = impl; }\n\n  BatchIterator& operator++() {\n    CHECK(impl_ != nullptr);\n    ++(*impl_);\n    return *this;\n  }\n\n  const T& operator*() const {\n    CHECK(impl_ != nullptr);\n    return *(*impl_);\n  }\n\n  [[nodiscard]] bool operator!=(const BatchIterator&) const { return !this->AtEnd(); }\n\n  [[nodiscard]] bool AtEnd() const {\n    CHECK(impl_ != nullptr);\n    return impl_->AtEnd();\n  }\n\n  [[nodiscard]] std::shared_ptr<T const> Page() const { return impl_->Page(); }\n\n private:\n  std::shared_ptr<BatchIteratorImpl<T>> impl_;\n};\n\ntemplate <typename T>\nclass BatchSet {\n public:\n  explicit BatchSet(BatchIterator<T> begin_iter) : begin_iter_(std::move(begin_iter)) {}\n  BatchIterator<T> begin() { return begin_iter_; }              // NOLINT\n  BatchIterator<T> end() { return BatchIterator<T>(nullptr); }  // NOLINT\n\n private:\n  BatchIterator<T> begin_iter_;\n};\n\nstruct XGBAPIThreadLocalEntry;\n\n// Configuration for external memoroy DMatrix.\nstruct ExtMemConfig {\n  // Cache prefix, not used if the cache is in the host memory. (on_host is true)\n  std::string cache;\n  // Whether the ellpack page is stored in the host memory.\n  bool on_host;\n  // Host cache/Total cache for the GPU impl.\n  float cache_host_ratio;\n  // Minimum number of of bytes for each ellpack page in cache. Only used for in-host\n  // ExtMemQdm.\n  std::int64_t min_cache_page_bytes;\n  // Missing value.\n  float missing;\n  // The number of CPU threads.\n  std::int32_t n_threads{0};\n  // The ratio of the cache that can be compressed. Used for testing.\n  float hw_decomp_ratio{std::numeric_limits<float>::quiet_NaN()};\n  // Fallback to using nvcomp. Used for testing.\n  bool allow_decomp_fallback{false};\n\n  ExtMemConfig() = delete;\n  ExtMemConfig(std::string cache, bool on_host, float h_ratio, std::int64_t min_cache,\n               float missing, std::int32_t n_threads)\n      : cache{std::move(cache)},\n        on_host{on_host},\n        cache_host_ratio{h_ratio},\n        min_cache_page_bytes{min_cache},\n        missing{missing},\n        n_threads{n_threads} {}\n\n  ExtMemConfig& SetParamsForTest(float _hw_decomp_ratio, bool _allow_decomp_fallback) {\n    this->hw_decomp_ratio = _hw_decomp_ratio;\n    this->allow_decomp_fallback = _allow_decomp_fallback;\n    return *this;\n  }\n};\n\n/**\n * @brief Internal data structured used by XGBoost to hold all external data.\n *\n *    There are multiple variants of the DMatrix class and can be accessed through the\n *    @ref Create() methods. The DMatrix itself holds the predictor `X`, and other data\n *    including labels and sample weights are stored in the @ref MetaInfo class.\n */\nclass DMatrix {\n public:\n  /*! \\brief default constructor */\n  DMatrix() = default;\n  /** @brief meta information of the dataset */\n  [[nodiscard]] virtual MetaInfo& Info() = 0;\n  virtual void SetInfo(const char* key, std::string const& interface_str) {\n    auto const& ctx = *this->Ctx();\n    this->Info().SetInfo(ctx, key, StringView{interface_str});\n  }\n  /** @brief meta information of the dataset */\n  [[nodiscard]] virtual const MetaInfo& Info() const = 0;\n\n  /*! \\brief Get thread local memory for returning data from DMatrix. */\n  [[nodiscard]] XGBAPIThreadLocalEntry& GetThreadLocal() const;\n  /**\n   * @brief Get the context object of this DMatrix.  The context is created during construction of\n   *        DMatrix with user specified `nthread` parameter.\n   */\n  [[nodiscard]] virtual Context const* Ctx() const = 0;\n\n  /**\n   * @brief Gets batches. Use range based for loop over BatchSet to access individual batches.\n   */\n  template <typename T>\n  BatchSet<T> GetBatches();\n  template <typename T>\n  BatchSet<T> GetBatches(Context const* ctx);\n  template <typename T>\n  BatchSet<T> GetBatches(Context const* ctx, const BatchParam& param);\n  template <typename T>\n  [[nodiscard]] bool PageExists() const;\n\n  /**\n   * @return Whether the contains a single batch.\n   *\n   * The naming is legacy.\n   */\n  [[nodiscard]] bool SingleColBlock() const { return this->NumBatches() == 1; }\n  [[nodiscard]] virtual std::int32_t NumBatches() const { return 1; }\n\n  virtual ~DMatrix();\n\n  /**\n   * @brief Whether the matrix is dense.\n   */\n  [[nodiscard]] bool IsDense() const { return this->Info().IsDense(); }\n\n  /**\n   * @brief Load DMatrix from URI.\n   *\n   * @param uri The URI of input.\n   * @param silent Whether print information during loading.\n   * @param data_split_mode Indicate how the data was split beforehand.\n   * @return The created DMatrix.\n   */\n  static DMatrix* Load(const std::string& uri, bool silent = true,\n                       DataSplitMode data_split_mode = DataSplitMode::kRow);\n\n  /**\n   * @brief Creates a new DMatrix from an external data adapter.\n   *\n   * @tparam  AdapterT  Type of the adapter.\n   * @param [in,out]  adapter         View onto an external data.\n   * @param           missing         Values to count as missing.\n   * @param           nthread         Number of threads for construction.\n   * @param           cache_prefix    (Optional) The cache prefix for external memory.\n   * @param           data_split_mode (Optional) Data split mode.\n   *\n   * @return  a Created DMatrix.\n   */\n  template <typename AdapterT>\n  static DMatrix* Create(AdapterT* adapter, float missing, int nthread,\n                         const std::string& cache_prefix = \"\",\n                         DataSplitMode data_split_mode = DataSplitMode::kRow);\n\n  /**\n   * @brief Create a new Quantile based DMatrix used for histogram based algorithm.\n   *\n   * @tparam DataIterHandle         External iterator type, defined in C API.\n   * @tparam DMatrixHandle          DMatrix handle, defined in C API.\n   * @tparam DataIterResetCallback  Callback for reset, prototype defined in C API.\n   * @tparam XGDMatrixCallbackNext  Callback for next, prototype defined in C API.\n   *\n   * @param iter    External data iterator\n   * @param proxy   A hanlde to ProxyDMatrix\n   * @param ref     Reference Quantile DMatrix.\n   * @param reset   Callback for reset\n   * @param next    Callback for next\n   * @param missing Value that should be treated as missing.\n   * @param nthread number of threads used for initialization.\n   * @param max_bin Maximum number of bins.\n   *\n   * @return A created quantile based DMatrix.\n   */\n  template <typename DataIterHandle, typename DMatrixHandle, typename DataIterResetCallback,\n            typename XGDMatrixCallbackNext>\n  static DMatrix* Create(DataIterHandle iter, DMatrixHandle proxy, std::shared_ptr<DMatrix> ref,\n                         DataIterResetCallback* reset, XGDMatrixCallbackNext* next, float missing,\n                         std::int32_t nthread, bst_bin_t max_bin);\n\n  /**\n   * @brief Create an external memory DMatrix with callbacks.\n   *\n   * @tparam DataIterHandle         External iterator type, defined in C API.\n   * @tparam DMatrixHandle          DMatrix handle, defined in C API.\n   * @tparam DataIterResetCallback  Callback for reset, prototype defined in C API.\n   * @tparam XGDMatrixCallbackNext  Callback for next, prototype defined in C API.\n   *\n   * @param iter    External data iterator\n   * @param proxy   A hanlde to ProxyDMatrix\n   * @param reset   Callback for reset\n   * @param next    Callback for next\n   * @param config  Configuration for the cache.\n   *\n   * @return A created external memory DMatrix.\n   */\n  template <typename DataIterHandle, typename DMatrixHandle, typename DataIterResetCallback,\n            typename XGDMatrixCallbackNext>\n  static DMatrix* Create(DataIterHandle iter, DMatrixHandle proxy, DataIterResetCallback* reset,\n                         XGDMatrixCallbackNext* next, ExtMemConfig const& config);\n\n  /**\n   * @brief Create an external memory quantile DMatrix with callbacks.\n   *\n   *     Parameters are a combination of the external memory DMatrix and the quantile DMatrix.\n   *\n   * @return A created external memory quantile DMatrix.\n   */\n  template <typename DataIterHandle, typename DMatrixHandle, typename DataIterResetCallback,\n            typename XGDMatrixCallbackNext>\n  static DMatrix* Create(DataIterHandle iter, DMatrixHandle proxy, std::shared_ptr<DMatrix> ref,\n                         DataIterResetCallback* reset, XGDMatrixCallbackNext* next,\n                         bst_bin_t max_bin, ExtMemConfig const& config);\n\n  virtual DMatrix* Slice(common::Span<int32_t const> ridxs) = 0;\n\n  /**\n   * @brief Slice a DMatrix by columns.\n   *\n   * @param num_slices Total number of slices\n   * @param slice_id Index of the current slice\n   * @return DMatrix containing the slice of columns\n   */\n  virtual DMatrix* SliceCol(int num_slices, int slice_id) = 0;\n  /**\n   * @brief Accessor for the string representation of the categories.\n   */\n  [[nodiscard]] CatContainer const* Cats() const { return this->CatsShared().get(); }\n  [[nodiscard]] std::shared_ptr<CatContainer const> CatsShared() const {\n    return this->Info().CatsShared();\n  }\n\n protected:\n  virtual BatchSet<SparsePage> GetRowBatches() = 0;\n  virtual BatchSet<CSCPage> GetColumnBatches(Context const* ctx) = 0;\n  virtual BatchSet<SortedCSCPage> GetSortedColumnBatches(Context const* ctx) = 0;\n  virtual BatchSet<EllpackPage> GetEllpackBatches(Context const* ctx, BatchParam const& param) = 0;\n  virtual BatchSet<GHistIndexMatrix> GetGradientIndex(Context const* ctx,\n                                                      BatchParam const& param) = 0;\n  virtual BatchSet<ExtSparsePage> GetExtBatches(Context const* ctx, BatchParam const& param) = 0;\n\n  [[nodiscard]] virtual bool EllpackExists() const = 0;\n  [[nodiscard]] virtual bool GHistIndexExists() const = 0;\n  [[nodiscard]] virtual bool SparsePageExists() const = 0;\n};\n\ntemplate <>\ninline BatchSet<SparsePage> DMatrix::GetBatches() {\n  return GetRowBatches();\n}\n\ntemplate <>\ninline bool DMatrix::PageExists<EllpackPage>() const {\n  return this->EllpackExists();\n}\n\ntemplate <>\ninline bool DMatrix::PageExists<GHistIndexMatrix>() const {\n  return this->GHistIndexExists();\n}\n\ntemplate <>\ninline bool DMatrix::PageExists<SparsePage>() const {\n  return this->SparsePageExists();\n}\n\ntemplate <>\ninline BatchSet<SparsePage> DMatrix::GetBatches(Context const*) {\n  return GetRowBatches();\n}\n\ntemplate <>\ninline BatchSet<CSCPage> DMatrix::GetBatches(Context const* ctx) {\n  return GetColumnBatches(ctx);\n}\n\ntemplate <>\ninline BatchSet<SortedCSCPage> DMatrix::GetBatches(Context const* ctx) {\n  return GetSortedColumnBatches(ctx);\n}\n\ntemplate <>\ninline BatchSet<EllpackPage> DMatrix::GetBatches(Context const* ctx, BatchParam const& param) {\n  return GetEllpackBatches(ctx, param);\n}\n\ntemplate <>\ninline BatchSet<GHistIndexMatrix> DMatrix::GetBatches(Context const* ctx, BatchParam const& param) {\n  return GetGradientIndex(ctx, param);\n}\n\ntemplate <>\ninline BatchSet<ExtSparsePage> DMatrix::GetBatches(Context const* ctx, BatchParam const& param) {\n  return GetExtBatches(ctx, param);\n}\n}  // namespace xgboost\n\nDECLARE_FIELD_ENUM_CLASS(xgboost::DataSplitMode);\n\nnamespace dmlc {\nDMLC_DECLARE_TRAITS(is_pod, xgboost::Entry, true);\n\nnamespace serializer {\n\ntemplate <>\nstruct Handler<xgboost::Entry> {\n  inline static void Write(Stream* strm, const xgboost::Entry& data) {\n    strm->Write(data.index);\n    strm->Write(data.fvalue);\n  }\n\n  inline static bool Read(Stream* strm, xgboost::Entry* data) {\n    return strm->Read(&data->index) && strm->Read(&data->fvalue);\n  }\n};\n\n}  // namespace serializer\n}  // namespace dmlc\n#endif  // XGBOOST_DATA_H_\n"
  },
  {
    "path": "include/xgboost/feature_map.h",
    "content": "/*!\n * Copyright 2014-2021 by Contributors\n * \\file feature_map.h\n * \\brief Feature map data structure to help visualization and model dump.\n * \\author Tianqi Chen\n */\n#ifndef XGBOOST_FEATURE_MAP_H_\n#define XGBOOST_FEATURE_MAP_H_\n\n#include <xgboost/logging.h>\n\n#include <vector>\n#include <string>\n#include <cstring>\n#include <iostream>\n\nnamespace xgboost {\n/*!\n * \\brief Feature map data structure to help text model dump.\n * TODO(tqchen) consider make it even more lightweight.\n */\nclass FeatureMap {\n public:\n  /*! \\brief type of feature maps */\n  enum Type {\n    kIndicator = 0,\n    kQuantitive = 1,\n    kInteger = 2,\n    kFloat = 3,\n    kCategorical = 4\n  };\n  /*!\n   * \\brief load feature map from input stream\n   * \\param is Input text stream\n   */\n  inline void LoadText(std::istream& is) { // NOLINT(*)\n    int fid;\n    std::string fname, ftype;\n    while (is >> fid >> fname >> ftype) {\n      this->PushBack(fid, fname.c_str(), ftype.c_str());\n    }\n  }\n  /*!\n   * \\brief push back feature map.\n   * \\param fid The feature index.\n   * \\param fname The feature name.\n   * \\param ftype The feature type.\n   */\n  inline void PushBack(int fid, const char *fname, const char *ftype) {\n    CHECK_EQ(fid, static_cast<int>(names_.size()));\n    names_.emplace_back(fname);\n    types_.push_back(GetType(ftype));\n  }\n  /*! \\brief clear the feature map */\n  inline void Clear() {\n    names_.clear();\n    types_.clear();\n  }\n  /*! \\return number of known features */\n  inline size_t Size() const {\n    return names_.size();\n  }\n  /*! \\return name of specific feature */\n  inline const char* Name(size_t idx) const {\n    CHECK_LT(idx,  names_.size()) << \"FeatureMap feature index exceed bound\";\n    return names_[idx].c_str();\n  }\n  /*! \\return type of specific feature */\n  Type TypeOf(size_t idx) const {\n    CHECK_LT(idx, names_.size()) << \"FeatureMap feature index exceed bound\";\n    return types_[idx];\n  }\n\n private:\n  /*!\n   * \\return feature type enum given name.\n   * \\param tname The type name.\n   * \\return The translated type.\n   */\n  inline static Type GetType(const char* tname) {\n    using std::strcmp;\n    if (!strcmp(\"i\", tname)) return kIndicator;\n    if (!strcmp(\"q\", tname)) return kQuantitive;\n    if (!strcmp(\"int\", tname)) return kInteger;\n    if (!strcmp(\"float\", tname)) return kFloat;\n    if (!strcmp(\"c\", tname)) return kCategorical;\n    LOG(FATAL) << \"unknown feature type, use i for indicator and q for quantity\";\n    return kIndicator;\n  }\n  /*! \\brief name of the feature */\n  std::vector<std::string> names_;\n  /*! \\brief type of the feature */\n  std::vector<Type> types_;\n};\n}  // namespace xgboost\n#endif  // XGBOOST_FEATURE_MAP_H_\n"
  },
  {
    "path": "include/xgboost/gbm.h",
    "content": "/**\n * Copyright 2014-2025, XGBoost Contributors\n * \\file gbm.h\n * \\brief Interface of gradient booster,\n *  that learns through gradient statistics.\n * \\author Tianqi Chen\n */\n#ifndef XGBOOST_GBM_H_\n#define XGBOOST_GBM_H_\n\n#include <dmlc/registry.h>\n#include <xgboost/base.h>\n#include <xgboost/data.h>\n#include <xgboost/gradient.h>  // for GradientContainer\n#include <xgboost/host_device_vector.h>\n#include <xgboost/model.h>\n\n#include <functional>\n#include <memory>\n#include <string>\n#include <vector>\n\nnamespace xgboost {\n\nclass Json;\nclass FeatureMap;\nclass ObjFunction;\nclass CatContainer;\n\nstruct Context;\nstruct LearnerModelParam;\nstruct PredictionCacheEntry;\n\n/*!\n * \\brief interface of gradient boosting model.\n */\nclass GradientBooster : public Model, public Configurable {\n protected:\n  Context const* ctx_;\n  explicit GradientBooster(Context const* ctx) : ctx_{ctx} {}\n\n public:\n  /*! \\brief virtual destructor */\n  ~GradientBooster() override = default;\n  /**\n   * @brief Set the configuration of gradient boosting.\n   *  User must call configure once before InitModel and Training.\n   *\n   * @param cfg configurations on both training and model parameters.\n   */\n  virtual void Configure(Args const& cfg) = 0;\n\n  /**\n   * \\brief Slice a model using boosting index. The slice m:n indicates taking all trees\n   *        that were fit during the boosting rounds m, (m+1), (m+2), ..., (n-1).\n   * \\param begin Beginning of boosted tree layer used for prediction.\n   * \\param end   End of booster layer. 0 means do not limit trees.\n   * \\param out   Output gradient booster\n   */\n  virtual void Slice(bst_layer_t /*begin*/, bst_layer_t /*end*/, bst_layer_t /*step*/,\n                     GradientBooster* /*out*/, bool* /*out_of_bound*/) const {\n    LOG(FATAL) << \"Slice is not supported by the current booster.\";\n  }\n  /**\n   * @brief Return number of boosted rounds.\n   */\n  [[nodiscard]] virtual std::int32_t BoostedRounds() const = 0;\n  /**\n   * \\brief Whether the model has already been trained. When tree booster is chosen, then\n   *        returns true when there are existing trees.\n   */\n  [[nodiscard]] virtual bool ModelFitted() const = 0;\n  /**\n   * @brief perform update to the model(boosting)\n   *\n   * @param p_fmat feature matrix that provide access to features\n   * @param in_gpair address of the gradient pair statistics of the data\n   * @param prediction The output prediction cache entry that needs to be updated.\n   *                   the booster may change content of gpair\n   * @param obj The objective function used for boosting.\n   */\n  virtual void DoBoost(DMatrix* p_fmat, GradientContainer* in_gpair,\n                       PredictionCacheEntry* prediction, ObjFunction const* obj) = 0;\n\n  /**\n   * \\brief Generate predictions for given feature matrix\n   *\n   * \\param dmat     The feature matrix.\n   * \\param out_preds output vector to hold the predictions\n   * \\param training Whether the prediction value is used for training.  For dart booster\n   *                 drop out is performed during training.\n   * \\param begin    Beginning of boosted tree layer used for prediction.\n   * \\param end      End of booster layer. 0 means do not limit trees.\n   */\n  virtual void PredictBatch(DMatrix* dmat, PredictionCacheEntry* out_preds, bool training,\n                            bst_layer_t begin, bst_layer_t end) = 0;\n\n  /**\n   * \\brief Inplace prediction.\n   *\n   * \\param           p_fmat    A proxy DMatrix that contains the data and related.\n   * \\param           missing   Missing value in the data.\n   * \\param [in,out]  out_preds The output preds.\n   * \\param           begin     (Optional) Beginning of boosted tree layer used for prediction.\n   * \\param           end       (Optional) End of booster layer. 0 means do not limit trees.\n   */\n  virtual void InplacePredict(std::shared_ptr<DMatrix>, float, PredictionCacheEntry*, bst_layer_t,\n                              bst_layer_t) const {\n    LOG(FATAL) << \"Inplace predict is not supported by the current booster.\";\n  }\n  /*!\n   * \\brief predict the leaf index of each tree, the output will be nsample * ntree vector\n   *        this is only valid in gbtree predictor\n   * \\param dmat feature matrix\n   * \\param out_preds output vector to hold the predictions\n   * \\param layer_begin Beginning of boosted tree layer used for prediction.\n   * \\param layer_end   End of booster layer. 0 means do not limit trees.\n   */\n  virtual void PredictLeaf(DMatrix *dmat,\n                           HostDeviceVector<bst_float> *out_preds,\n                           unsigned layer_begin, unsigned layer_end) = 0;\n\n  /*!\n   * \\brief feature contributions to individual predictions; the output will be a vector\n   *         of length (nfeats + 1) * num_output_group * nsample, arranged in that order\n   * \\param dmat feature matrix\n   * \\param out_contribs output vector to hold the contributions\n   * \\param layer_begin Beginning of boosted tree layer used for prediction.\n   * \\param layer_end   End of booster layer. 0 means do not limit trees.\n   * \\param approximate use a faster (inconsistent) approximation of SHAP values\n   */\n  virtual void PredictContribution(DMatrix* dmat, HostDeviceVector<float>* out_contribs,\n                                   bst_layer_t layer_begin, bst_layer_t layer_end,\n                                   bool approximate = false) = 0;\n\n  virtual void PredictInteractionContributions(DMatrix* dmat, HostDeviceVector<float>* out_contribs,\n                                               bst_layer_t layer_begin, bst_layer_t layer_end,\n                                               bool approximate) = 0;\n\n  /**\n   * @brief dump the model in the requested format\n   * @param fmap feature map that may help give interpretations of feature\n   * @param with_stats extra statistics while dumping model\n   * @param format the format to dump the model in\n   * @return a vector of dump for boosters.\n   */\n  [[nodiscard]] virtual std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats,\n                                                           std::string format) const = 0;\n\n  virtual void FeatureScore(std::string const& importance_type,\n                            common::Span<int32_t const> trees,\n                            std::vector<bst_feature_t>* features,\n                            std::vector<float>* scores) const = 0;\n  /**\n   * @brief Getter for categories.\n   */\n  [[nodiscard]] virtual CatContainer const* Cats() const {\n    LOG(FATAL) << \"Retrieving categories is not supported by the current booster.\";\n    return nullptr;\n  }\n  /**\n   * @brief create a gradient booster from given name\n   * @param name name of gradient booster\n   * @param generic_param Pointer to runtime parameters\n   * @param learner_model_param pointer to global model parameters\n   * @return The created booster.\n   */\n  static GradientBooster* Create(const std::string& name, Context const* ctx,\n                                 LearnerModelParam const* learner_model_param);\n};\n\n/*!\n * \\brief Registry entry for tree updater.\n */\nstruct GradientBoosterReg\n    : public dmlc::FunctionRegEntryBase<\n          GradientBoosterReg,\n          std::function<GradientBooster*(LearnerModelParam const* learner_model_param,\n                                         Context const* ctx)> > {};\n\n/*!\n * \\brief Macro to register gradient booster.\n *\n * \\code\n * // example of registering a objective ndcg@k\n * XGBOOST_REGISTER_GBM(GBTree, \"gbtree\")\n * .describe(\"Boosting tree ensembles.\")\n * .set_body([]() {\n *     return new GradientBooster<TStats>();\n *   });\n * \\endcode\n */\n#define XGBOOST_REGISTER_GBM(UniqueId, Name)                            \\\n  static DMLC_ATTRIBUTE_UNUSED ::xgboost::GradientBoosterReg &          \\\n  __make_ ## GradientBoosterReg ## _ ## UniqueId ## __ =                \\\n      ::dmlc::Registry< ::xgboost::GradientBoosterReg>::Get()->__REGISTER__(Name)\n\n}  // namespace xgboost\n#endif  // XGBOOST_GBM_H_\n"
  },
  {
    "path": "include/xgboost/global_config.h",
    "content": "/**\n * Copyright 2020-2025, XGBoost Contributors\n * \\file global_config.h\n * \\brief Global configuration for XGBoost\n * \\author Hyunsu Cho\n */\n#ifndef XGBOOST_GLOBAL_CONFIG_H_\n#define XGBOOST_GLOBAL_CONFIG_H_\n\n#include <dmlc/thread_local.h>  // for ThreadLocalStore\n#include <xgboost/parameter.h>  // for XGBoostParameter\n\n#include <cstdint>  // for int32_t\n\nnamespace xgboost {\nstruct GlobalConfiguration : public XGBoostParameter<GlobalConfiguration> {\n  std::int32_t verbosity{1};\n  bool use_rmm{false};\n  bool use_cuda_async_pool{false};\n  // This is not a dmlc parameter to avoid conflict with the context class.\n  std::int32_t nthread{0};\n  DMLC_DECLARE_PARAMETER(GlobalConfiguration) {\n    DMLC_DECLARE_FIELD(verbosity)\n        .set_range(0, 3)\n        .set_default(1)  // shows only warning\n        .describe(\"Flag to print out detailed breakdown of runtime.\");\n    DMLC_DECLARE_FIELD(use_rmm).set_default(false).describe(\n        \"Whether to use RAPIDS Memory Manager to allocate GPU memory in XGBoost\");\n    DMLC_DECLARE_FIELD(use_cuda_async_pool)\n        .set_default(false)\n        .describe(\"Whether to use the async memory pool in CUDA.\");\n  }\n};\n\nusing GlobalConfigThreadLocalStore = dmlc::ThreadLocalStore<GlobalConfiguration>;\n\nstruct InitNewThread {\n  GlobalConfiguration config;\n  std::int32_t device{-1};\n\n  void operator()() const;\n  InitNewThread();\n};\n}  // namespace xgboost\n\n#endif  // XGBOOST_GLOBAL_CONFIG_H_\n"
  },
  {
    "path": "include/xgboost/gradient.h",
    "content": "/**\n * Copyright 2025, XGBoost Contributors\n */\n#pragma once\n\n#include <xgboost/base.h>    // for GradientPair\n#include <xgboost/linalg.h>  // for Matrix\n#include <xgboost/logging.h>\n\n#include <cstddef>  // for size_t\n\nnamespace xgboost {\n/**\n * @brief Container for gradient produced by objective.\n */\nstruct GradientContainer {\n  /** @brief Gradient used for multi-target tree split and linear model. */\n  linalg::Matrix<GradientPair> gpair;\n  /** @brief Gradient used for tree leaf value, optional. */\n  linalg::Matrix<GradientPair> value_gpair;\n\n  [[nodiscard]] bool HasValueGrad() const noexcept { return !value_gpair.Empty(); }\n\n  [[nodiscard]] std::size_t NumSplitTargets() const noexcept { return gpair.Shape(1); }\n  [[nodiscard]] std::size_t NumTargets() const noexcept {\n    return HasValueGrad() ? value_gpair.Shape(1) : this->gpair.Shape(1);\n  }\n\n  linalg::MatrixView<GradientPair const> ValueGrad(Context const* ctx) const {\n    if (HasValueGrad()) {\n      return this->value_gpair.View(ctx->Device());\n    }\n    return this->gpair.View(ctx->Device());\n  }\n\n  [[nodiscard]] linalg::Matrix<GradientPair> const* Grad() const { return &gpair; }\n  [[nodiscard]] linalg::Matrix<GradientPair>* Grad() { return &gpair; }\n\n  [[nodiscard]] linalg::Matrix<GradientPair> const* FullGradOnly() const {\n    if (this->HasValueGrad()) {\n      LOG(FATAL) << \"Reduced gradient is not yet supported.\";\n    }\n    return this->Grad();\n  }\n  [[nodiscard]] linalg::Matrix<GradientPair>* FullGradOnly() {\n    if (this->HasValueGrad()) {\n      LOG(FATAL) << \"Reduced gradient is not yet supported.\";\n    }\n    return this->Grad();\n  }\n};\n}  // namespace xgboost\n"
  },
  {
    "path": "include/xgboost/host_device_vector.h",
    "content": "/*!\n * Copyright 2017-2019 XGBoost contributors\n */\n\n/**\n * @file host_device_vector.h\n * @brief A device-and-host vector abstraction layer.\n *\n * Why HostDeviceVector?<br/>\n * With CUDA, one has to explicitly manage memory through 'cudaMemcpy' calls.\n * This wrapper class hides this management from the users, thereby making it\n * easy to integrate GPU/CPU usage under a single interface.\n *\n * Initialization/Allocation:<br/>\n * One can choose to initialize the vector on CPU or GPU during constructor.\n * (use the 'devices' argument) Or, can choose to use the 'Resize' method to\n * allocate/resize memory explicitly, and use the 'SetDevice' method\n * to specify the device.\n *\n * Accessing underlying data:<br/>\n * Use 'HostVector' method to explicitly query for the underlying std::vector.\n * If you need the raw device pointer, use the 'DevicePointer' method. For perf\n * implications of these calls, see below.\n *\n * Accessing underling data and their perf implications:<br/>\n * There are 4 scenarios to be considered here:\n * HostVector and data on CPU --> no problems, std::vector returned immediately\n * HostVector but data on GPU --> this causes a cudaMemcpy to be issued internally.\n *                        subsequent calls to HostVector, will NOT incur this penalty.\n *                        (assuming 'DevicePointer' is not called in between)\n * DevicePointer but data on CPU  --> this causes a cudaMemcpy to be issued internally.\n *                        subsequent calls to DevicePointer, will NOT incur this penalty.\n *                        (assuming 'HostVector' is not called in between)\n * DevicePointer and data on GPU  --> no problems, the device ptr\n *                        will be returned immediately.\n *\n * What if xgboost is compiled without CUDA?<br/>\n * In that case, there's a special implementation which always falls-back to\n * working with std::vector. This logic can be found in host_device_vector.cc\n *\n * Why not consider CUDA unified memory?<br/>\n * We did consider. However, it poses complications if we need to support both\n * compiling with and without CUDA toolkit. It was easier to have\n * 'HostDeviceVector' with a special-case implementation in host_device_vector.cc\n *\n * @note: Size and Devices methods are thread-safe.\n */\n\n#ifndef XGBOOST_HOST_DEVICE_VECTOR_H_\n#define XGBOOST_HOST_DEVICE_VECTOR_H_\n\n#include <xgboost/context.h>  // for DeviceOrd\n#include <xgboost/span.h>     // for Span\n\n#include <initializer_list>\n#include <type_traits>\n#include <vector>\n\nnamespace xgboost {\n\n#ifdef __CUDACC__\n// Sets a function to call instead of cudaSetDevice();\n// only added for testing\nvoid SetCudaSetDeviceHandler(void (*handler)(int));\n#endif  // __CUDACC__\n\ntemplate <typename T> struct HostDeviceVectorImpl;\n\n/*!\n * \\brief Controls data access from the GPU.\n *\n * Since a `HostDeviceVector` can have data on both the host and device, access control needs to be\n * maintained to keep the data consistent.\n *\n * There are 3 scenarios supported:\n *   - Data is being manipulated on device. GPU has write access, host doesn't have access.\n *   - Data is read-only on both the host and device.\n *   - Data is being manipulated on the host. Host has write access, device doesn't have access.\n */\nenum GPUAccess {\n  kNone, kRead,\n  // write implies read\n  kWrite\n};\n\ntemplate <typename T>\nclass HostDeviceVector {\n  static_assert(std::is_standard_layout_v<T>, \"HostDeviceVector admits only POD types\");\n\n public:\n  explicit HostDeviceVector(size_t size = 0, T v = T(), DeviceOrd device = DeviceOrd::CPU());\n  HostDeviceVector(std::initializer_list<T> init, DeviceOrd device = DeviceOrd::CPU());\n  explicit HostDeviceVector(const std::vector<T>& init, DeviceOrd device = DeviceOrd::CPU());\n  ~HostDeviceVector();\n\n  HostDeviceVector(const HostDeviceVector<T>&) = delete;\n  HostDeviceVector(HostDeviceVector<T>&&);\n\n  HostDeviceVector<T>& operator=(const HostDeviceVector<T>&) = delete;\n  HostDeviceVector<T>& operator=(HostDeviceVector<T>&&);\n\n  [[nodiscard]] bool Empty() const { return Size() == 0; }\n  [[nodiscard]] std::size_t Size() const;\n  [[nodiscard]] std::size_t SizeBytes() const { return this->Size() * sizeof(T); }\n  [[nodiscard]] DeviceOrd Device() const;\n  common::Span<T> DeviceSpan();\n  common::Span<const T> ConstDeviceSpan() const;\n  common::Span<const T> DeviceSpan() const { return ConstDeviceSpan(); }\n  T* DevicePointer();\n  const T* ConstDevicePointer() const;\n  const T* DevicePointer() const { return ConstDevicePointer(); }\n\n  T* HostPointer() { return HostVector().data(); }\n  common::Span<T> HostSpan() { return common::Span<T>{HostVector()}; }\n  common::Span<T const> HostSpan() const { return common::Span<T const>{HostVector()}; }\n  common::Span<T const> ConstHostSpan() const { return HostSpan(); }\n  const T* ConstHostPointer() const { return ConstHostVector().data(); }\n  const T* HostPointer() const { return ConstHostPointer(); }\n\n  void Fill(T v);\n  void Copy(const HostDeviceVector<T>& other);\n  void Copy(const std::vector<T>& other);\n  void Copy(std::initializer_list<T> other);\n\n  void Extend(const HostDeviceVector<T>& other);\n\n  std::vector<T>& HostVector();\n  const std::vector<T>& ConstHostVector() const;\n  const std::vector<T>& HostVector() const {return ConstHostVector(); }\n\n  [[nodiscard]] bool HostCanRead() const;\n  [[nodiscard]] bool HostCanWrite() const;\n  [[nodiscard]] bool DeviceCanRead() const;\n  [[nodiscard]] bool DeviceCanWrite() const;\n  [[nodiscard]] GPUAccess DeviceAccess() const;\n\n  void SetDevice(DeviceOrd device) const;\n\n  void Resize(std::size_t new_size);\n  /** @brief Resize and initialize the data if the new size is larger than the old size. */\n  void Resize(std::size_t new_size, T v);\n\n  using value_type = T;  // NOLINT\n\n private:\n  HostDeviceVectorImpl<T>* impl_;\n};\n\n}  // namespace xgboost\n\n#endif  // XGBOOST_HOST_DEVICE_VECTOR_H_\n"
  },
  {
    "path": "include/xgboost/intrusive_ptr.h",
    "content": "/*!\n * Copyright (c) by Contributors 2020\n * \\file intrusive_ptr.h\n * \\brief Implementation of Intrusive Ptr.\n */\n#ifndef XGBOOST_INTRUSIVE_PTR_H_\n#define XGBOOST_INTRUSIVE_PTR_H_\n\n#include <atomic>\n#include <cinttypes>\n#include <functional>\n#include <ostream>\n\nnamespace xgboost {\n/*!\n * \\brief Helper class for embedding reference counting into client objects.  See\n *        https://www.boost.org/doc/libs/1_74_0/doc/html/atomic/usage_examples.html for\n *        discussions of memory order.\n */\nclass IntrusivePtrCell {\n private:\n  std::atomic<int32_t> count_ {0};\n  template <typename T> friend class IntrusivePtr;\n\n  std::int32_t IncRef() noexcept {\n    return count_.fetch_add(1, std::memory_order_relaxed);\n  }\n  std::int32_t DecRef() noexcept {\n    return count_.fetch_sub(1, std::memory_order_release);\n  }\n  bool IsZero() const { return Count() == 0; }\n\n public:\n  IntrusivePtrCell() noexcept = default;\n  int32_t Count() const { return count_.load(std::memory_order_relaxed); }\n};\n\n/*!\n * \\brief User defined function for returning embedded reference count.\n */\ntemplate <typename T> IntrusivePtrCell &IntrusivePtrRefCount(T const *ptr) noexcept;\n\n/*!\n * \\brief Implementation of Intrusive Pointer.  A smart pointer that points to an object\n *        with an embedded reference counter. The underlying object must implement a\n *        friend function IntrusivePtrRefCount() that returns the ref counter (of type\n *        IntrusivePtrCell). The intrusive pointer is faster than std::shared_ptr<>:\n *        std::shared_ptr<> makes an extra memory allocation for the ref counter whereas\n *        the intrusive pointer does not.\n *\n * \\code\n *\n *   class ForIntrusivePtrTest {\n *    public:\n *     mutable class IntrusivePtrCell ref;\n *     float data { 0 };\n *\n *     friend IntrusivePtrCell &\n *     IntrusivePtrRefCount(ForIntrusivePtrTest const *t) noexcept {  // NOLINT\n *       return t->ref;\n *     }\n *\n *     ForIntrusivePtrTest() = default;\n *     ForIntrusivePtrTest(float a, int32_t b) : data{a + static_cast<float>(b)} {}\n *\n *     explicit ForIntrusivePtrTest(NotCopyConstructible a) : data{a.data} {}\n *   };\n *\n *   IntrusivePtr<ForIntrusivePtrTest> ptr {new ForIntrusivePtrTest};\n *\n * \\endcode\n */\ntemplate <typename T> class IntrusivePtr {\n private:\n  void IncRef(T *ptr) {\n    if (ptr) {\n      IntrusivePtrRefCount(ptr).IncRef();\n    }\n  }\n  void DecRef(T *ptr) {\n    if (ptr) {\n      if (IntrusivePtrRefCount(ptr).DecRef() == 1) {\n        std::atomic_thread_fence(std::memory_order_acquire);\n        delete ptr;\n      }\n    }\n  }\n\n protected:\n  T *ptr_{nullptr};\n\n public:\n  using element_type = T;  // NOLINT\n  struct Hash {\n    std::size_t operator()(IntrusivePtr<element_type> const &ptr) const noexcept {\n      return std::hash<element_type *>()(ptr.get());\n    }\n  };\n  /*!\n   * \\brief Contruct an IntrusivePtr from raw pointer. IntrusivePtr takes the ownership.\n   *\n   * \\param p Raw pointer to object\n   */\n  explicit IntrusivePtr(T *p) : ptr_{p} {\n    if (ptr_) {\n      IncRef(ptr_);\n    }\n  }\n\n  IntrusivePtr() noexcept = default;\n  IntrusivePtr(IntrusivePtr const &that) : ptr_{that.ptr_} { IncRef(ptr_); }\n  IntrusivePtr(IntrusivePtr &&that) noexcept : ptr_{that.ptr_} { that.ptr_ = nullptr; }\n\n  ~IntrusivePtr() { DecRef(ptr_); }\n\n  IntrusivePtr<T> &operator=(IntrusivePtr<T> const &that) {\n    IntrusivePtr<T>{that}.swap(*this);\n    return *this;\n  }\n  IntrusivePtr<T> &operator=(IntrusivePtr<T> &&that) noexcept {\n    std::swap(ptr_, that.ptr_);\n    return *this;\n  }\n\n  void reset() {  // NOLINT\n    DecRef(ptr_);\n    ptr_ = nullptr;\n  }\n  void reset(element_type *that) { IntrusivePtr{that}.swap(*this); }  // NOLINT\n  // clang-tidy might manufacture a null value, disable the check\n  element_type &operator*() const noexcept { return *ptr_; }  // NOLINT\n  element_type *operator->() const noexcept { return ptr_; }\n  element_type *get() const noexcept { return ptr_; }  // NOLINT\n\n  explicit operator bool() const noexcept { return static_cast<bool>(ptr_); }\n\n  int32_t use_count() noexcept {  // NOLINT\n    return ptr_ ? IntrusivePtrRefCount(ptr_).Count() : 0;\n  }\n\n  /*\n   * \\brief Helper function for swapping 2 pointers.\n   */\n  void swap(IntrusivePtr<T> &that) noexcept {  // NOLINT\n    std::swap(ptr_, that.ptr_);\n  }\n};\n\ntemplate <class T, class U>\nbool operator==(IntrusivePtr<T> const &x, IntrusivePtr<U> const &y) noexcept {\n  return x.get() == y.get();\n}\n\ntemplate <class T, class U>\nbool operator!=(IntrusivePtr<T> const &x, IntrusivePtr<U> const &y) noexcept {\n  return x.get() != y.get();\n}\n\ntemplate <class T, class U>\nbool operator==(IntrusivePtr<T> const &x, U *y) noexcept {\n  return x.get() == y;\n}\n\ntemplate <class T, class U>\nbool operator!=(IntrusivePtr<T> const &x, U *y) noexcept {\n  return x.get() != y;\n}\n\ntemplate <class T, class U>\nbool operator==(T *x, IntrusivePtr<U> const &y) noexcept {\n  return y == x;\n}\n\ntemplate <class T, class U>\nbool operator!=(T *x, IntrusivePtr<U> const &y) noexcept {\n  return y != x;\n}\n\ntemplate <class T>\nbool operator<(IntrusivePtr<T> const &x, IntrusivePtr<T> const &y) noexcept {\n  return std::less<T*>{}(x.get(), y.get());\n}\n\ntemplate <class T>\nbool operator<=(IntrusivePtr<T> const &x, IntrusivePtr<T> const &y) noexcept {\n  return std::less_equal<T*>{}(x.get(), y.get());\n}\n\ntemplate <class T>\nbool operator>(IntrusivePtr<T> const &x, IntrusivePtr<T> const &y) noexcept {\n  return !(x <= y);\n}\n\ntemplate <class T>\nbool operator>=(IntrusivePtr<T> const &x, IntrusivePtr<T> const &y) noexcept {\n  return !(x < y);\n}\n\ntemplate <class E, class T, class Y>\nstd::basic_ostream<E, T> &operator<<(std::basic_ostream<E, T> &os,\n                                     IntrusivePtr<Y> const &p) {\n  os << p.get();\n  return os;\n}\n}  // namespace xgboost\n\nnamespace std {\ntemplate <class T>\nvoid swap(xgboost::IntrusivePtr<T> &x,  // NOLINT\n          xgboost::IntrusivePtr<T> &y) noexcept {\n  x.swap(y);\n}\n\ntemplate <typename T>\nstruct hash<xgboost::IntrusivePtr<T>> : public xgboost::IntrusivePtr<T>::Hash {};\n}      // namespace std\n#endif  // XGBOOST_INTRUSIVE_PTR_H_\n"
  },
  {
    "path": "include/xgboost/json.h",
    "content": "/**\n * Copyright 2019-2025, XGBoost Contributors\n */\n#ifndef XGBOOST_JSON_H_\n#define XGBOOST_JSON_H_\n\n#include <xgboost/intrusive_ptr.h>\n#include <xgboost/logging.h>\n#include <xgboost/parameter.h>\n#include <xgboost/string_view.h>\n\n#include <functional>\n#include <map>\n#include <string>\n#include <type_traits>  // std::enable_if_t\n#include <utility>\n#include <vector>\n\nnamespace xgboost {\n\nclass Json;\nclass JsonReader;\nclass JsonWriter;\n\nclass Value {\n private:\n  mutable class IntrusivePtrCell ref_;\n  friend IntrusivePtrCell& IntrusivePtrRefCount(xgboost::Value const* t) noexcept {\n    return t->ref_;\n  }\n\n public:\n  /**\n   * @brief Simplified implementation of LLVM RTTI.\n   *\n   * @note The integer ID must be kept stable.\n   */\n  enum class ValueKind : std::int64_t {\n    kString = 0,\n    kNumber = 1,\n    kInteger = 2,\n    kObject = 3,  // std::map\n    kArray = 4,   // std::vector\n    kBoolean = 5,\n    kNull = 6,\n    // typed array for ubjson\n    kF32Array = 7,\n    kF64Array = 8,\n    kI8Array = 9,\n    kU8Array = 10,\n    kI16Array = 11,\n    kU16Array = 12,\n    kI32Array = 13,\n    kU32Array = 14,\n    kI64Array = 15,\n    kU64Array = 16,\n  };\n\n  explicit Value(ValueKind _kind) : kind_{_kind} {}\n\n  ValueKind Type() const { return kind_; }\n  virtual ~Value() = default;\n\n  virtual void Save(JsonWriter* writer) const = 0;\n\n  virtual Json& operator[](std::string const& key);\n  virtual Json& operator[](int ind);\n\n  virtual bool operator==(Value const& rhs) const = 0;\n  virtual Value& operator=(Value const& rhs) = delete;\n\n  std::string TypeStr() const;\n\n private:\n  ValueKind kind_;\n};\n\ntemplate <typename T>\nbool IsA(Value const* value) {\n  return T::IsClassOf(value);\n}\n\ntemplate <typename T, typename U>\nT* Cast(U* value) {\n  if (IsA<T>(value)) {\n    return dynamic_cast<T*>(value);\n  } else {\n    LOG(FATAL) << \"Invalid cast, from \" + value->TypeStr() + \" to \" + T().TypeStr();\n  }\n  return dynamic_cast<T*>(value);  // suppress compiler warning.\n}\n\nclass JsonString : public Value {\n  std::string str_;\n\n public:\n  JsonString() : Value(ValueKind::kString) {}\n  JsonString(std::string const& str) :  // NOLINT\n      Value(ValueKind::kString), str_{str} {}\n  JsonString(std::string&& str) noexcept :  // NOLINT\n      Value(ValueKind::kString), str_{std::forward<std::string>(str)} {}\n  JsonString(JsonString&& str) noexcept : Value(ValueKind::kString) {  // NOLINT\n    std::swap(str.str_, this->str_);\n  }\n\n  void Save(JsonWriter* writer) const override;\n\n  std::string const& GetString() &&      { return str_; }\n  std::string const& GetString() const & { return str_; }\n  std::string&       GetString()       & { return str_; }\n\n  bool operator==(Value const& rhs) const override;\n  Value& operator=(Value const& rhs) override = delete;\n\n  static bool IsClassOf(Value const* value) {\n    return value->Type() == ValueKind::kString;\n  }\n};\n\nclass JsonArray : public Value {\n  std::vector<Json> vec_;\n\n public:\n  JsonArray() : Value(ValueKind::kArray) {}\n  JsonArray(std::vector<Json>&& arr) noexcept  // NOLINT\n      : Value(ValueKind::kArray), vec_{std::forward<std::vector<Json>>(arr)} {}\n  JsonArray(std::vector<Json> const& arr) :  // NOLINT\n      Value(ValueKind::kArray), vec_{arr} {}\n  JsonArray(JsonArray const& that) = delete;\n  JsonArray(JsonArray && that) noexcept;\n\n  void Save(JsonWriter* writer) const override;\n\n  Json& operator[](int ind) override { return vec_.at(ind); }\n  // silent the partial oveeridden warning\n  Json& operator[](std::string const& key) override { return Value::operator[](key); }\n\n  std::vector<Json> const& GetArray() &&      { return vec_; }\n  std::vector<Json> const& GetArray() const & { return vec_; }\n  std::vector<Json>&       GetArray()       & { return vec_; }\n\n  bool operator==(Value const& rhs) const override;\n  Value& operator=(Value const& rhs) override = delete;\n\n  static bool IsClassOf(Value const* value) {\n    return value->Type() == ValueKind::kArray;\n  }\n};\n\n/**\n * \\brief Typed array for Universal Binary JSON.\n *\n * \\tparam T The underlying primitive type.\n * \\tparam kind Value kind defined by JSON type.\n */\ntemplate <typename T, Value::ValueKind kind>\nclass JsonTypedArray : public Value {\n  std::vector<T> vec_;\n\n public:\n  using value_type = T;  // NOLINT\n\n  JsonTypedArray() : Value(kind) {}\n  explicit JsonTypedArray(std::size_t n) : Value(kind) { vec_.resize(n); }\n  JsonTypedArray(JsonTypedArray&& that) noexcept : Value{kind}, vec_{std::move(that.vec_)} {}\n\n  bool operator==(Value const& rhs) const override;\n  Value& operator=(Value const& rhs) override = delete;\n\n  void Set(size_t i, T v) { vec_[i] = v; }\n  size_t Size() const { return vec_.size(); }\n\n  void Save(JsonWriter* writer) const override;\n\n  std::vector<T> const& GetArray() && { return vec_; }\n  std::vector<T> const& GetArray() const& { return vec_; }\n  std::vector<T>& GetArray() & { return vec_; }\n\n  static bool IsClassOf(Value const* value) { return value->Type() == kind; }\n};\n\n/**\n * @brief Typed UBJSON array for 32-bit floating point.\n */\nusing F32Array = JsonTypedArray<float, Value::ValueKind::kF32Array>;\n/**\n * @brief Typed UBJSON array for 64-bit floating point.\n */\nusing F64Array = JsonTypedArray<double, Value::ValueKind::kF64Array>;\n/**\n * @brief Typed UBJSON array for int8_t.\n */\nusing I8Array = JsonTypedArray<std::int8_t, Value::ValueKind::kI8Array>;\n/**\n * @brief Typed UBJSON array for uint8_t.\n */\nusing U8Array = JsonTypedArray<std::uint8_t, Value::ValueKind::kU8Array>;\n/**\n * @brief Typed UBJSON array for int16_t.\n */\nusing I16Array = JsonTypedArray<std::int16_t, Value::ValueKind::kI16Array>;\n/**\n * @brief Typed UBJSON array for uint16_t.\n */\nusing U16Array = JsonTypedArray<std::uint16_t, Value::ValueKind::kU16Array>;\n/**\n * @brief Typed UBJSON array for int32_t.\n */\nusing I32Array = JsonTypedArray<std::int32_t, Value::ValueKind::kI32Array>;\n/**\n * @brief Typed UBJSON array for uint32_t.\n */\nusing U32Array = JsonTypedArray<std::uint32_t, Value::ValueKind::kU32Array>;\n/**\n * @brief Typed UBJSON array for int64_t.\n */\nusing I64Array = JsonTypedArray<std::int64_t, Value::ValueKind::kI64Array>;\n/**\n * @brief Typed UBJSON array for uint64_t.\n */\nusing U64Array = JsonTypedArray<std::uint64_t, Value::ValueKind::kU64Array>;\n\nclass JsonObject : public Value {\n public:\n  using Map = std::map<std::string, Json, std::less<>>;\n\n private:\n  Map object_;\n\n public:\n  JsonObject() : Value(ValueKind::kObject) {}\n  JsonObject(Map&& object) noexcept;  // NOLINT\n  JsonObject(JsonObject const& that) = delete;\n  JsonObject(JsonObject&& that) noexcept;\n\n  void Save(JsonWriter* writer) const override;\n\n  // silent the partial oveeridden warning\n  Json& operator[](int ind) override { return Value::operator[](ind); }\n  Json& operator[](std::string const& key) override { return object_[key]; }\n\n  Map const& GetObject() && { return object_; }\n  Map const& GetObject() const& { return object_; }\n  Map& GetObject() & { return object_; }\n\n  bool operator==(Value const& rhs) const override;\n  Value& operator=(Value const& rhs) override = delete;\n\n  static bool IsClassOf(Value const* value) { return value->Type() == ValueKind::kObject; }\n  ~JsonObject() override = default;\n};\n\nnamespace detail {\ntemplate <typename T, typename U>\nusing IsSameT = std::enable_if_t<std::is_same_v<std::remove_cv_t<T>, std::remove_cv_t<U>>>;\n\ntemplate <typename T>\nusing IsF64T = std::enable_if_t<std::is_same_v<T, double>>;\n}  // namespace detail\n\nclass JsonNumber : public Value {\n public:\n  using Float = float;\n\n private:\n  Float number_ { 0 };\n\n public:\n  JsonNumber() : Value(ValueKind::kNumber) {}\n  template <typename FloatT, typename detail::IsSameT<FloatT, Float>* = nullptr>\n  JsonNumber(FloatT value) : Value(ValueKind::kNumber), number_{value} {}  // NOLINT\n  template <typename FloatT, typename detail::IsF64T<FloatT>* = nullptr>\n  JsonNumber(FloatT value)  // NOLINT\n      : Value{ValueKind::kNumber}, number_{static_cast<Float>(value)} {}\n  JsonNumber(JsonNumber const& that) = delete;\n  JsonNumber(JsonNumber&& that) noexcept : Value{ValueKind::kNumber}, number_{that.number_} {}\n\n  void Save(JsonWriter* writer) const override;\n\n  Float const& GetNumber() &&      { return number_; }\n  Float const& GetNumber() const & { return number_; }\n  Float&       GetNumber()       & { return number_; }\n\n  bool operator==(Value const& rhs) const override;\n  Value& operator=(Value const& rhs) override = delete;\n\n  static bool IsClassOf(Value const* value) {\n    return value->Type() == ValueKind::kNumber;\n  }\n};\n\nnamespace detail {\ntemplate <typename IntT>\nusing Not32SizeT = std::enable_if_t<std::is_same_v<IntT, std::uint32_t> &&\n                                    !std::is_same_v<std::size_t, std::uint32_t>>;\n}\n\n\nclass JsonInteger : public Value {\n public:\n  using Int = int64_t;\n\n private:\n  Int integer_ {0};\n\n public:\n  JsonInteger() : Value(ValueKind::kInteger) {}  // NOLINT\n  template <typename IntT, typename detail::IsSameT<IntT, Int>* = nullptr>\n  JsonInteger(IntT value) : Value(ValueKind::kInteger), integer_{value} {}  // NOLINT\n  template <typename IntT, typename detail::IsSameT<IntT, std::size_t>* = nullptr>\n  JsonInteger(IntT value)  // NOLINT\n      : Value(ValueKind::kInteger), integer_{static_cast<Int>(value)} {}\n  template <typename IntT, typename detail::IsSameT<IntT, std::int32_t>* = nullptr>\n  JsonInteger(IntT value)  // NOLINT\n      : Value(ValueKind::kInteger), integer_{static_cast<Int>(value)} {}\n  template <typename IntT,\n            typename detail::Not32SizeT<IntT>* = nullptr>\n  JsonInteger(IntT value)  // NOLINT\n      : Value(ValueKind::kInteger), integer_{static_cast<Int>(value)} {}\n\n  JsonInteger(JsonInteger &&that) noexcept\n      : Value{ValueKind::kInteger}, integer_{that.integer_} {}\n\n  bool operator==(Value const& rhs) const override;\n  Value& operator=(Value const& rhs) override = delete;\n\n  Int const& GetInteger() &&      { return integer_; }\n  Int const& GetInteger() const & { return integer_; }\n  Int& GetInteger() &             { return integer_; }\n  void Save(JsonWriter* writer) const override;\n\n  static bool IsClassOf(Value const* value) {\n    return value->Type() == ValueKind::kInteger;\n  }\n};\n\nclass JsonNull : public Value {\n public:\n  JsonNull() : Value(ValueKind::kNull) {}\n  JsonNull(std::nullptr_t) : Value(ValueKind::kNull) {}  // NOLINT\n  JsonNull(JsonNull&&) noexcept : Value(ValueKind::kNull) {}\n\n  void Save(JsonWriter* writer) const override;\n\n  bool operator==(Value const& rhs) const override;\n  Value& operator=(Value const& rhs) override = delete;\n\n  static bool IsClassOf(Value const* value) {\n    return value->Type() == ValueKind::kNull;\n  }\n};\n\n/*! \\brief Describes both true and false. */\nclass JsonBoolean : public Value {\n  bool boolean_ = false;\n\n public:\n  JsonBoolean() : Value(ValueKind::kBoolean) {}  // NOLINT\n  // Ambigious with JsonNumber.\n  template <typename Bool, typename detail::IsSameT<std::remove_cv_t<Bool>, bool>* = nullptr>\n  JsonBoolean(Bool value) : Value(ValueKind::kBoolean), boolean_{value} {}  // NOLINT\n  JsonBoolean(JsonBoolean&& value) noexcept:  // NOLINT\n      Value(ValueKind::kBoolean), boolean_{value.boolean_} {}\n\n  void Save(JsonWriter* writer) const override;\n\n  bool const& GetBoolean() &&      { return boolean_; }\n  bool const& GetBoolean() const & { return boolean_; }\n  bool&       GetBoolean()       & { return boolean_; }\n\n  bool operator==(Value const& rhs) const override;\n  Value& operator=(Value const& rhs) override = delete;\n\n  static bool IsClassOf(Value const* value) {\n    return value->Type() == ValueKind::kBoolean;\n  }\n};\n\n/*!\n * \\brief Data structure representing JSON format.\n *\n * Limitation:  UTF-8 is not properly supported.  Code points above ASCII are\n *              invalid.\n *\n * Examples:\n *\n * \\code\n *   // Create a JSON object.\n *   Json object { Object() };\n *   // Assign key \"key\" with a JSON string \"Value\";\n *   object[\"key\"] = String(\"Value\");\n *   // Assign key \"arr\" with a empty JSON Array;\n *   object[\"arr\"] = Array();\n * \\endcode\n */\nclass Json {\n public:\n  /**\n   *  \\brief Decode the JSON object.  Optional parameter mode for choosing between text\n   *         and binary (ubjson) input.\n   */\n  static Json Load(StringView str, std::ios::openmode mode = std::ios::in);\n  /*! \\brief Pass your own JsonReader. */\n  static Json Load(JsonReader* reader);\n  /**\n   *  \\brief Encode the JSON object.  Optional parameter mode for choosing between text\n   *         and binary (ubjson) output.\n   */\n  static void Dump(Json json, std::string* out, std::ios::openmode mode = std::ios::out);\n  static void Dump(Json json, std::vector<char>* out, std::ios::openmode mode = std::ios::out);\n  /*! \\brief Use your own JsonWriter. */\n  static void Dump(Json json, JsonWriter* writer);\n\n  template <typename Container = std::string>\n  static Container Dump(Json json) {\n    if constexpr (std::is_same_v<Container, std::string>) {\n      std::string str;\n      Dump(json, &str);\n      return str;\n    } else {\n      std::vector<char> str;\n      Dump(json, &str);\n      return str;\n    }\n  }\n\n  Json() = default;\n\n  // number\n  explicit Json(JsonNumber number) : ptr_{new JsonNumber(std::move(number))} {}\n  Json& operator=(JsonNumber number) {\n    ptr_.reset(new JsonNumber(std::move(number)));\n    return *this;\n  }\n  // integer\n  explicit Json(JsonInteger integer) : ptr_{new JsonInteger(std::move(integer))} {}\n  Json& operator=(JsonInteger integer) {\n    ptr_.reset(new JsonInteger(std::move(integer)));\n    return *this;\n  }\n  // array\n  explicit Json(JsonArray&& list) : ptr_{new JsonArray(std::forward<JsonArray>(list))} {}\n  Json& operator=(JsonArray&& array) {\n    ptr_.reset(new JsonArray(std::forward<JsonArray>(array)));\n    return *this;\n  }\n  // typed array\n  template <typename T, Value::ValueKind kind>\n  explicit Json(JsonTypedArray<T, kind>&& list)\n      : ptr_{new JsonTypedArray<T, kind>(std::forward<JsonTypedArray<T, kind>>(list))} {}\n  template <typename T, Value::ValueKind kind>\n  Json& operator=(JsonTypedArray<T, kind>&& array) {\n    ptr_.reset(new JsonTypedArray<T, kind>(std::forward<JsonTypedArray<T, kind>>(array)));\n    return *this;\n  }\n  // object\n  explicit Json(JsonObject&& object) : ptr_{new JsonObject(std::forward<JsonObject>(object))} {}\n  Json& operator=(JsonObject&& object) {\n    ptr_.reset(new JsonObject(std::forward<JsonObject>(object)));\n    return *this;\n  }\n  // string\n  explicit Json(JsonString&& str) : ptr_{new JsonString(std::forward<JsonString>(str))} {}\n  Json& operator=(JsonString&& str) {\n    ptr_.reset(new JsonString(std::forward<JsonString>(str)));\n    return *this;\n  }\n  // bool\n  explicit Json(JsonBoolean boolean) :\n      ptr_{new JsonBoolean(std::move(boolean))} {}\n  Json& operator=(JsonBoolean boolean) {\n    ptr_.reset(new JsonBoolean(std::move(boolean)));\n    return *this;\n  }\n  // null\n  explicit Json(JsonNull null) :\n      ptr_{new JsonNull(std::move(null))} {}\n  Json& operator=(JsonNull null) {\n    ptr_.reset(new JsonNull(std::move(null)));\n    return *this;\n  }\n\n  // copy\n  Json(Json const& other) = default;\n  Json& operator=(Json const& other) = default;\n  // move\n  Json(Json &&other) noexcept { std::swap(this->ptr_, other.ptr_); }\n  Json &operator=(Json &&other) noexcept {\n    std::swap(this->ptr_, other.ptr_);\n    return *this;\n  }\n\n  /*! \\brief Index Json object with a std::string, used for Json Object. */\n  Json& operator[](std::string const & key) const { return (*ptr_)[key]; }\n  /*! \\brief Index Json object with int, used for Json Array. */\n  Json& operator[](int ind)                 const { return (*ptr_)[ind]; }\n\n  /*! \\brief Return the reference to stored Json value. */\n  [[nodiscard]] Value const& GetValue() const& { return *ptr_; }\n  Value const& GetValue() && { return *ptr_; }\n  Value& GetValue() & { return *ptr_; }\n\n  bool operator==(Json const& rhs) const {\n    return *ptr_ == *(rhs.ptr_);\n  }\n\n  friend std::ostream& operator<<(std::ostream& os, Json const& j) {\n    std::string str;\n    Json::Dump(j, &str);\n    os << str;\n    return os;\n  }\n\n  [[nodiscard]] IntrusivePtr<Value> const& Ptr() const { return ptr_; }\n\n private:\n  IntrusivePtr<Value> ptr_{new JsonNull};\n};\n\n/**\n * \\brief Check whether a Json object has specific type.\n *\n * \\code\n *   Json json {Array{}};\n *   bool is_array = IsA<Array>(json);\n *   CHECK(is_array);\n * \\endcode\n */\ntemplate <typename T>\nbool IsA(Json const& j) {\n  auto const& v = j.GetValue();\n  return IsA<T>(&v);\n}\n\nnamespace detail {\n// Number\ntemplate <typename T, typename std::enable_if_t<std::is_same_v<T, JsonNumber>>* = nullptr>\nJsonNumber::Float& GetImpl(T& val) {  // NOLINT\n  return val.GetNumber();\n}\ntemplate <typename T, typename std::enable_if_t<std::is_same_v<T, JsonNumber const>>* = nullptr>\nJsonNumber::Float const& GetImpl(T& val) {  // NOLINT\n  return val.GetNumber();\n}\n\n// Integer\ntemplate <typename T, typename std::enable_if_t<std::is_same_v<T, JsonInteger>>* = nullptr>\nJsonInteger::Int& GetImpl(T& val) {  // NOLINT\n  return val.GetInteger();\n}\ntemplate <typename T, typename std::enable_if_t<std::is_same_v<T, JsonInteger const>>* = nullptr>\nJsonInteger::Int const& GetImpl(T& val) {  // NOLINT\n  return val.GetInteger();\n}\n\n// String\ntemplate <typename T, typename std::enable_if_t<std::is_same_v<T, JsonString>>* = nullptr>\nstd::string& GetImpl(T& val) {  // NOLINT\n  return val.GetString();\n}\ntemplate <typename T, typename std::enable_if_t<std::is_same_v<T, JsonString const>>* = nullptr>\nstd::string const& GetImpl(T& val) {  // NOLINT\n  return val.GetString();\n}\n\n// Boolean\ntemplate <typename T, typename std::enable_if_t<std::is_same_v<T, JsonBoolean>>* = nullptr>\nbool& GetImpl(T& val) {  // NOLINT\n  return val.GetBoolean();\n}\ntemplate <typename T,\n          typename std::enable_if_t<std::is_same_v<T, JsonBoolean const>>* = nullptr>\nbool const& GetImpl(T& val) {  // NOLINT\n  return val.GetBoolean();\n}\n\n// Array\ntemplate <typename T, typename std::enable_if_t<std::is_same_v<T, JsonArray>>* = nullptr>\nstd::vector<Json>& GetImpl(T& val) {  // NOLINT\n  return val.GetArray();\n}\ntemplate <typename T, typename std::enable_if_t<std::is_same_v<T, JsonArray const>>* = nullptr>\nstd::vector<Json> const& GetImpl(T& val) {  // NOLINT\n  return val.GetArray();\n}\n\n// Typed Array\ntemplate <typename T, Value::ValueKind kind>\nstd::vector<T>& GetImpl(JsonTypedArray<T, kind>& val) {  // NOLINT\n  return val.GetArray();\n}\ntemplate <typename T, Value::ValueKind kind>\nstd::vector<T> const& GetImpl(JsonTypedArray<T, kind> const& val) {\n  return val.GetArray();\n}\n\n// Object\ntemplate <typename T, typename std::enable_if_t<std::is_same_v<T, JsonObject>>* = nullptr>\nJsonObject::Map& GetImpl(T& val) {  // NOLINT\n  return val.GetObject();\n}\ntemplate <typename T, typename std::enable_if_t<std::is_same_v<T, JsonObject const>>* = nullptr>\nJsonObject::Map const& GetImpl(T& val) {  // NOLINT\n  return val.GetObject();\n}\n}  // namespace detail\n\n/*!\n * \\brief Get Json value.\n *\n * \\tparam T One of the Json value type.\n *\n * \\param json\n * \\return Value contained in Json object of type T.\n */\ntemplate <typename T, typename U>\nauto get(U& json) -> decltype(detail::GetImpl(*Cast<T>(&json.GetValue())))& { // NOLINT\n  auto& value = *Cast<T>(&json.GetValue());\n  return detail::GetImpl(value);\n}\n\nusing Object  = JsonObject;\nusing Array   = JsonArray;\nusing Number  = JsonNumber;\nusing Integer = JsonInteger;\nusing Boolean = JsonBoolean;\nusing String  = JsonString;\nusing Null    = JsonNull;\n\n/**\n * \\brief Convert XGBoost parameter to JSON object.\n *\n * \\tparam Parameter An instantiation of XGBoostParameter\n *\n * \\param param Input parameter\n *\n * \\return JSON object representing the input parameter\n */\ntemplate <typename Parameter>\nObject ToJson(Parameter const& param) {\n  Object obj;\n  for (auto const& kv : param.__DICT__()) {\n    obj[kv.first] = kv.second;\n  }\n  return obj;\n}\n\n/**\n * \\brief Load a XGBoost parameter from a JSON object.\n *\n * \\tparam Parameter An instantiation of XGBoostParameter\n *\n * \\param obj JSON object representing the parameter.\n * \\param param Output parameter.\n *\n * \\return Unknown arguments in the JSON object.\n */\ntemplate <typename Parameter>\nArgs FromJson(Json const& obj, Parameter* param) {\n  auto const& j_param = get<Object const>(obj);\n  Args args;\n  for (auto const& kv : j_param) {\n    args.emplace_back(kv.first, get<String const>(kv.second));\n  }\n  return param->UpdateAllowUnknown(args);\n}\n}  // namespace xgboost\n#endif  // XGBOOST_JSON_H_\n"
  },
  {
    "path": "include/xgboost/json_io.h",
    "content": "/**\n * Copyright 2019-2026, XGBoost Contributors\n */\n#ifndef XGBOOST_JSON_IO_H_\n#define XGBOOST_JSON_IO_H_\n\n#include <xgboost/base.h>\n#include <xgboost/byteswap.h>  // for ByteSwap\n#include <xgboost/json.h>\n\n#include <cstdint>  // for int8_t\n#include <limits>\n#include <string>\n#include <utility>\n#include <vector>\n\nnamespace xgboost {\n/**\n * \\brief A json reader, currently error checking and utf-8 is not fully supported.\n */\nclass JsonReader {\n public:\n  using Char = std::int8_t;\n\n protected:\n  size_t constexpr static kMaxNumLength = std::numeric_limits<double>::max_digits10 + 1;\n\n  struct SourceLocation {\n   private:\n    std::size_t pos_{0};  // current position in raw_str_\n\n   public:\n    SourceLocation() = default;\n    size_t Pos() const { return pos_; }\n\n    void Forward() { pos_++; }\n    void Forward(uint32_t n) { pos_ += n; }\n  } cursor_;\n\n  StringView raw_str_;\n\n protected:\n  void SkipSpaces();\n\n  Char GetNextChar() {\n    if (XGBOOST_EXPECT((cursor_.Pos() == raw_str_.size()), false)) {\n      return -1;\n    }\n    char ch = raw_str_[cursor_.Pos()];\n    cursor_.Forward();\n    return ch;\n  }\n\n  Char PeekNextChar() {\n    if (cursor_.Pos() == raw_str_.size()) {\n      return -1;\n    }\n    Char ch = raw_str_[cursor_.Pos()];\n    return ch;\n  }\n\n  /* \\brief Skip spaces and consume next character. */\n  Char GetNextNonSpaceChar() {\n    SkipSpaces();\n    return GetNextChar();\n  }\n  /* \\brief Consume next character without first skipping empty space, throw when the next\n   *        character is not the expected one.\n   */\n  Char GetConsecutiveChar(char expected_char) {\n    Char result = GetNextChar();\n    if (XGBOOST_EXPECT(result != expected_char, false)) { Expect(expected_char, result); }\n    return result;\n  }\n\n  void Error(std::string msg) const;\n\n  // Report expected character\n  void Expect(Char c, Char got) {\n    std::string msg = \"Expecting: \\\"\";\n    msg += c;\n    msg += \"\\\", got: \\\"\";\n    if (got == EOF) {\n      msg += \"EOF\\\"\";\n    } else if (got == 0) {\n      msg += \"\\\\0\\\"\";\n    } else {\n      msg += std::to_string(got) + \" \\\"\";\n    }\n    Error(msg);\n  }\n\n  virtual Json ParseString();\n  virtual Json ParseObject();\n  virtual Json ParseArray();\n  virtual Json ParseNumber();\n  virtual Json ParseBoolean();\n  virtual Json ParseNull();\n\n  Json Parse();\n\n public:\n  explicit JsonReader(StringView str) :\n      raw_str_{str} {}\n\n  virtual ~JsonReader() = default;\n\n  virtual Json Load();\n};\n\nclass JsonWriter {\n  template <typename T, std::enable_if_t<!std::is_same_v<Json, T>>* = nullptr>\n  void Save(T const& v) {\n    this->Save(Json{v});\n  }\n  template <typename Array, typename Fn>\n  void WriteArray(Array const* arr, Fn&& fn) {\n    stream_->emplace_back('[');\n    auto const& vec = arr->GetArray();\n    size_t size = vec.size();\n    for (size_t i = 0; i < size; ++i) {\n      auto const& value = vec[i];\n      this->Save(fn(value));\n      if (i != size - 1) {\n        stream_->emplace_back(',');\n      }\n    }\n    stream_->emplace_back(']');\n  }\n\n protected:\n  std::vector<char>* stream_;\n\n public:\n  explicit JsonWriter(std::vector<char>* stream) : stream_{stream} {}\n\n  virtual ~JsonWriter() = default;\n\n  virtual void Save(Json json);\n\n  virtual void Visit(JsonArray  const* arr);\n  virtual void Visit(F32Array  const* arr);\n  virtual void Visit(F64Array const*) { LOG(FATAL) << \"Only UBJSON format can handle f64 array.\"; }\n  virtual void Visit(I8Array  const* arr);\n  virtual void Visit(U8Array const* arr);\n  virtual void Visit(I16Array const* arr);\n  virtual void Visit(U16Array const* arr);\n  virtual void Visit(I32Array  const* arr);\n  virtual void Visit(U32Array  const* arr);\n  virtual void Visit(I64Array  const* arr);\n  virtual void Visit(U64Array  const* arr);\n  virtual void Visit(JsonObject const* obj);\n  virtual void Visit(JsonNumber const* num);\n  virtual void Visit(JsonInteger const* num);\n  virtual void Visit(JsonNull   const* null);\n  virtual void Visit(JsonString const* str);\n  virtual void Visit(JsonBoolean const* boolean);\n};\n\ntemplate <typename T, std::enable_if_t<sizeof(T) == 1>* = nullptr>\ninline T ToBigEndian(T v) {\n  return v;\n}\n\ntemplate <typename T, std::enable_if_t<sizeof(T) != 1>* = nullptr>\ninline T ToBigEndian(T v) {\n  static_assert(std::is_pod<T>::value, \"Only pod is supported.\");\n#if DMLC_LITTLE_ENDIAN\n  auto constexpr kS = sizeof(T);\n  std::conditional_t<kS == 2, uint16_t, std::conditional_t<kS == 4, uint32_t, uint64_t>> u;\n  std::memcpy(&u, &v, sizeof(u));\n  u = ByteSwap(u);\n  std::memcpy(&v, &u, sizeof(u));\n#endif  // DMLC_LITTLE_ENDIAN\n  return v;\n}\n\n/**\n * \\brief Reader for UBJSON https://ubjson.org/\n */\nclass UBJReader : public JsonReader {\n  Json Parse();\n\n  template <typename T>\n  T ReadStream() {\n    auto ptr = this->raw_str_.c_str() + cursor_.Pos();\n    T v{0};\n    std::memcpy(&v, ptr, sizeof(v));\n    cursor_.Forward(sizeof(v));\n    return v;\n  }\n\n  template <typename T>\n  T ReadPrimitive() {\n    auto v = ReadStream<T>();\n    v = ToBigEndian(v);\n    return v;\n  }\n\n  template <typename TypedArray>\n  auto ParseTypedArray(std::int64_t n) {\n    TypedArray results{static_cast<size_t>(n)};\n    for (int64_t i = 0; i < n; ++i) {\n      auto v = this->ReadPrimitive<typename TypedArray::value_type>();\n      results.Set(i, v);\n    }\n    return Json{std::move(results)};\n  }\n\n  std::string DecodeStr();\n\n  Json ParseArray() override;\n  Json ParseObject() override;\n\n public:\n  using JsonReader::JsonReader;\n  Json Load() override;\n};\n\n/**\n * \\brief Writer for UBJSON https://ubjson.org/\n */\nclass UBJWriter : public JsonWriter {\n  void Visit(JsonArray const* arr) override;\n  void Visit(F32Array const* arr) override;\n  void Visit(F64Array const* arr) override;\n  void Visit(I8Array  const* arr) override;\n  void Visit(U8Array  const* arr) override;\n  void Visit(I16Array  const* arr) override;\n  void Visit(I32Array  const* arr) override;\n  void Visit(I64Array  const* arr) override;\n  void Visit(JsonObject const* obj) override;\n  void Visit(JsonNumber const* num) override;\n  void Visit(JsonInteger const* num) override;\n  void Visit(JsonNull const* null) override;\n  void Visit(JsonString const* str) override;\n  void Visit(JsonBoolean const* boolean) override;\n\n public:\n  using JsonWriter::JsonWriter;\n  void Save(Json json) override;\n};\n}      // namespace xgboost\n\n#endif  // XGBOOST_JSON_IO_H_\n"
  },
  {
    "path": "include/xgboost/learner.h",
    "content": "/**\n * Copyright 2015-2025, XGBoost Contributors\n *\n * \\brief Learner interface that integrates objective, gbm and evaluation together.\n *  This is the user facing XGBoost training module.\n * \\author Tianqi Chen\n */\n#ifndef XGBOOST_LEARNER_H_\n#define XGBOOST_LEARNER_H_\n\n#include <dmlc/io.h>           // for Serializable\n#include <xgboost/base.h>      // for bst_feature_t, bst_target_t, bst_float, Args, GradientPair, ..\n#include <xgboost/context.h>   // for Context\n#include <xgboost/gradient.h>  // for GradientContainer\n#include <xgboost/linalg.h>    // for Vector, VectorView\n#include <xgboost/metric.h>    // for Metric\n#include <xgboost/model.h>     // for Configurable, Model\n#include <xgboost/span.h>      // for Span\n#include <xgboost/task.h>      // for ObjInfo\n\n#include <algorithm>  // for max\n#include <cstdint>    // for int32_t, uint32_t, uint8_t\n#include <map>        // for map\n#include <memory>     // for shared_ptr, unique_ptr\n#include <string>     // for string\n#include <utility>    // for move\n#include <vector>     // for vector\n\nnamespace xgboost {\nclass FeatureMap;\nclass Metric;\nclass GradientBooster;\nclass ObjFunction;\nclass DMatrix;\nclass Json;\nstruct XGBAPIThreadLocalEntry;\ntemplate <typename T>\nclass HostDeviceVector;\nclass CatContainer;\n\nenum class PredictionType : std::uint8_t {  // NOLINT\n  kValue = 0,\n  kMargin = 1,\n  kContribution = 2,\n  kApproxContribution = 3,\n  kInteraction = 4,\n  kApproxInteraction = 5,\n  kLeaf = 6\n};\n\n/**\n * @brief Learner class that does training and prediction.\n *  This is the user facing module of xgboost training.\n *  The Load/Save function corresponds to the model used in python/R.\n *  @code\n *\n *  std::unique_ptr<Learner> learner{Learner::Create(cache_mats)};\n *  learner->Configure(configs);\n *\n *  for (int iter = 0; iter < max_iter; ++iter) {\n *    learner->UpdateOneIter(iter, train_mat);\n *    LOG(INFO) << learner->EvalOneIter(iter, data_sets, data_names);\n *  }\n *\n *  @endcode\n */\nclass Learner : public Model, public Configurable, public dmlc::Serializable {\n public:\n  ~Learner() override;\n  /*!\n   * \\brief Configure Learner based on set parameters.\n   */\n  virtual void Configure() = 0;\n  /*!\n   * \\brief update the model for one iteration\n   *  With the specified objective function.\n   * \\param iter current iteration number\n   * \\param train reference to the data matrix.\n   */\n  virtual void UpdateOneIter(std::int32_t iter, std::shared_ptr<DMatrix> train) = 0;\n  /**\n   * @brief Do customized gradient boosting with in_gpair.\n   *\n   * @note in_gpair can be mutated after this call.\n   *\n   * @param iter current iteration number\n   * @param train reference to the data matrix.\n   * @param in_gpair The input gradient statistics.\n   */\n  virtual void BoostOneIter(std::int32_t iter, std::shared_ptr<DMatrix> train,\n                            GradientContainer* in_gpair) = 0;\n  /*!\n   * \\brief evaluate the model for specific iteration using the configured metrics.\n   * \\param iter iteration number\n   * \\param data_sets datasets to be evaluated.\n   * \\param data_names name of each dataset\n   * \\return a string corresponding to the evaluation result\n   */\n  virtual std::string EvalOneIter(int iter,\n                                  const std::vector<std::shared_ptr<DMatrix>>& data_sets,\n                                  const std::vector<std::string>& data_names) = 0;\n  /*!\n   * \\brief get prediction given the model.\n   * \\param data input data\n   * \\param output_margin whether to only predict margin value instead of transformed prediction\n   * \\param out_preds output vector that stores the prediction\n   * \\param layer_begin Beginning of boosted tree layer used for prediction.\n   * \\param layer_end   End of booster layer. 0 means do not limit trees.\n   * \\param training Whether the prediction result is used for training\n   * \\param pred_leaf whether to only predict the leaf index of each tree in a boosted tree predictor\n   * \\param pred_contribs whether to only predict the feature contributions\n   * \\param approx_contribs whether to approximate the feature contributions for speed\n   * \\param pred_interactions whether to compute the feature pair contributions\n   */\n  virtual void Predict(std::shared_ptr<DMatrix> data, bool output_margin,\n                       HostDeviceVector<bst_float>* out_preds, bst_layer_t layer_begin,\n                       bst_layer_t layer_end, bool training = false, bool pred_leaf = false,\n                       bool pred_contribs = false, bool approx_contribs = false,\n                       bool pred_interactions = false) = 0;\n\n  /*!\n   * \\brief Inplace prediction.\n   *\n   * \\param          p_fmat      A proxy DMatrix that contains the data and related meta info.\n   * \\param          type        Prediction type.\n   * \\param          missing     Missing value in the data.\n   * \\param [in,out] out_preds   Pointer to output prediction vector.\n   * \\param          layer_begin Beginning of boosted tree layer used for prediction.\n   * \\param          layer_end   End of booster layer. 0 means do not limit trees.\n   */\n  virtual void InplacePredict(std::shared_ptr<DMatrix> p_m, PredictionType type, float missing,\n                              HostDeviceVector<float>** out_preds, bst_layer_t layer_begin,\n                              bst_layer_t layer_end) = 0;\n\n  /*!\n   * \\brief Calculate feature score.  See doc in C API for outputs.\n   */\n  virtual void CalcFeatureScore(std::string const& importance_type,\n                                common::Span<int32_t const> trees,\n                                std::vector<bst_feature_t>* features,\n                                std::vector<float>* scores) = 0;\n\n  /*\n   * \\brief Get number of boosted rounds from gradient booster.\n   */\n  virtual int32_t BoostedRounds() const = 0;\n  /**\n   * \\brief Get the number of output groups from the model.\n   */\n  virtual std::uint32_t Groups() const = 0;\n\n  void LoadModel(Json const& in) override = 0;\n  void SaveModel(Json* out) const override = 0;\n\n  /*!\n   * \\brief Set multiple parameters at once.\n   *\n   * \\param args parameters.\n   */\n  virtual void SetParams(Args const& args) = 0;\n  /*!\n   * \\brief Set parameter for booster\n   *\n   *  The property will NOT be saved along with booster\n   *\n   * \\param key   The key of parameter\n   * \\param value The value of parameter\n   */\n  virtual void SetParam(const std::string& key, const std::string& value) = 0;\n\n  /**\n   * @brief Get the number of features of the booster.\n   * @return The number of features\n   */\n  virtual bst_feature_t GetNumFeature() const = 0;\n\n  /*!\n   * \\brief Set additional attribute to the Booster.\n   *\n   *  The property will be saved along the booster.\n   *\n   * \\param key The key of the property.\n   * \\param value The value of the property.\n   */\n  virtual void SetAttr(const std::string& key, const std::string& value) = 0;\n  /*!\n   * \\brief Get attribute from the booster.\n   *  The property will be saved along the booster.\n   * \\param key The key of the attribute.\n   * \\param out The output value.\n   * \\return Whether the key exists among booster's attributes.\n   */\n  virtual bool GetAttr(const std::string& key, std::string* out) const = 0;\n  /*!\n   * \\brief Delete an attribute from the booster.\n   * \\param key The key of the attribute.\n   * \\return Whether the key was found among booster's attributes.\n   */\n  virtual bool DelAttr(const std::string& key) = 0;\n  /*!\n   * \\brief Get a vector of attribute names from the booster.\n   * \\return vector of attribute name strings.\n   */\n  virtual std::vector<std::string> GetAttrNames() const = 0;\n  /*!\n   * \\brief Set the feature names for current booster.\n   * \\param fn Input feature names\n   */\n  virtual  void SetFeatureNames(std::vector<std::string> const& fn) = 0;\n  /*!\n   * \\brief Get the feature names for current booster.\n   * \\param fn Output feature names\n   */\n  virtual void GetFeatureNames(std::vector<std::string>* fn) const = 0;\n  /*!\n   * \\brief Set the feature types for current booster.\n   * \\param ft Input feature types.\n   */\n  virtual void SetFeatureTypes(std::vector<std::string> const& ft) = 0;\n  /*!\n   * \\brief Get the feature types for current booster.\n   * \\param fn Output feature types\n   */\n  virtual void GetFeatureTypes(std::vector<std::string>* ft) const = 0;\n  /**\n   * @brief Getter for categories.\n   */\n  [[nodiscard]] virtual CatContainer const* Cats() const = 0;\n  /**\n   * @brief Slice the model.\n   *\n   * See InplacePredict for layer parameters.\n   *\n   * @param step step size between slice.\n   * @param out_of_bound Return true if end layer is out of bound.\n   *\n   * @return a sliced model.\n   */\n  virtual Learner* Slice(bst_layer_t begin, bst_layer_t end, bst_layer_t step,\n                         bool* out_of_bound) = 0;\n  /*!\n   * \\brief dump the model in the requested format\n   * \\param fmap feature map that may help give interpretations of feature\n   * \\param with_stats extra statistics while dumping model\n   * \\param format the format to dump the model in\n   * \\return a vector of dump for boosters.\n   */\n  virtual std::vector<std::string> DumpModel(const FeatureMap& fmap,\n                                             bool with_stats,\n                                             std::string format) = 0;\n\n  virtual XGBAPIThreadLocalEntry& GetThreadLocal() const = 0;\n  /**\n   * @brief Reset the booster object to release data caches used for training.\n   */\n  virtual void Reset() = 0;\n  /*!\n   * \\brief Create a new instance of learner.\n   * \\param cache_data The matrix to cache the prediction.\n   * \\return Created learner.\n   */\n  static Learner* Create(const std::vector<std::shared_ptr<DMatrix> >& cache_data);\n  /**\n   * \\brief Return the context object of this Booster.\n   */\n  virtual Context const* Ctx() const = 0;\n  /*!\n   * \\brief Get configuration arguments currently stored by the learner\n   * \\return Key-value pairs representing configuration arguments\n   */\n  virtual const std::map<std::string, std::string>& GetConfigurationArguments() const = 0;\n\n protected:\n  /*! \\brief objective function */\n  std::unique_ptr<ObjFunction> obj_;\n  /*! \\brief The gradient booster used by the model*/\n  std::unique_ptr<GradientBooster> gbm_;\n  /*! \\brief The evaluation metrics used to evaluate the model. */\n  std::vector<std::unique_ptr<Metric> > metrics_;\n  /*! \\brief Training parameter. */\n  Context ctx_;\n};\n\nstruct LearnerModelParamLegacy;\n\n/**\n * @brief Strategy for building multi-target models.\n */\nenum class MultiStrategy : std::int32_t {\n  kOneOutputPerTree = 0,\n  kMultiOutputTree = 1,\n};\n\n/**\n * @brief Basic model parameters, used to describe the booster.\n */\nstruct LearnerModelParam {\n private:\n  /**\n   * @brief Global bias, this is just a scalar value but can be extended to vector when we\n   *        support multi-class and multi-target.\n   *\n   * The value stored here is the value before applying the inverse link function, used\n   * for initializing the prediction matrix/vector.\n   */\n  linalg::Vector<float> base_score_;\n\n  LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t,\n                    MultiStrategy multi_strategy);\n\n public:\n  /**\n   * @brief The number of features.\n   */\n  bst_feature_t num_feature{0};\n  /**\n   * @brief The number of classes or targets.\n   */\n  std::uint32_t num_output_group{0};\n  /**\n   * @brief Current task, determined by objective.\n   */\n  ObjInfo task{ObjInfo::kRegression};\n  /**\n   * @brief Strategy for building multi-target models.\n   */\n  MultiStrategy multi_strategy{MultiStrategy::kOneOutputPerTree};\n\n  LearnerModelParam() = default;\n  LearnerModelParam(Context const* ctx, LearnerModelParamLegacy const& user_param,\n                    linalg::Vector<float> base_score, ObjInfo t, MultiStrategy multi_strategy);\n  // This ctor is only used by tests.\n  LearnerModelParam(bst_feature_t n_features, linalg::Vector<float> base_score,\n                    std::uint32_t n_groups, bst_target_t n_targets, MultiStrategy multi_strategy)\n      : base_score_{std::move(base_score)},\n        num_feature{n_features},\n        num_output_group{std::max(n_groups, n_targets)},\n        multi_strategy{multi_strategy} {}\n\n  linalg::VectorView<float const> BaseScore(Context const* ctx) const;\n  [[nodiscard]] linalg::VectorView<float const> BaseScore(DeviceOrd device) const;\n\n  void Copy(LearnerModelParam const& that);\n  [[nodiscard]] bool IsVectorLeaf() const noexcept {\n    return multi_strategy == MultiStrategy::kMultiOutputTree;\n  }\n  [[nodiscard]] bst_target_t OutputLength() const noexcept { return this->num_output_group; }\n  [[nodiscard]] bst_target_t LeafLength() const noexcept {\n    return this->IsVectorLeaf() ? this->OutputLength() : 1;\n  }\n\n  /* \\brief Whether this parameter is initialized with LearnerModelParamLegacy. */\n  [[nodiscard]] bool Initialized() const { return num_feature != 0 && num_output_group != 0; }\n};\n\n}  // namespace xgboost\n#endif  // XGBOOST_LEARNER_H_\n"
  },
  {
    "path": "include/xgboost/linalg.h",
    "content": "/**\n * Copyright 2021-2026, XGBoost Contributors\n *\n * @file linalg.h\n * @brief Linear algebra related utilities.\n */\n#ifndef XGBOOST_LINALG_H_\n#define XGBOOST_LINALG_H_\n\n#include <dmlc/endian.h>\n#include <xgboost/base.h>\n#include <xgboost/context.h>\n#include <xgboost/host_device_vector.h>\n#include <xgboost/json.h>\n#include <xgboost/span.h>\n\n#include <algorithm>\n#include <cassert>\n#include <cstddef>  // for size_t\n#include <cstdint>  // for int32_t\n#include <limits>\n#include <string>\n#include <tuple>  // for make_tuple\n#include <type_traits>\n#include <utility>\n#include <vector>\n\n#if defined(_MSC_VER)\n#include <intrin.h>\n#endif  // defined(_MSC_VER)\n\n// decouple it from xgboost.\n#ifndef LINALG_HD\n#if defined(__CUDA__) || defined(__NVCC__)\n#define LINALG_HD __host__ __device__\n#else\n#define LINALG_HD\n#endif  // defined (__CUDA__) || defined(__NVCC__)\n#endif  // LINALG_HD\n\nnamespace xgboost::linalg {\nnamespace detail {\n\nstruct ArrayInterfaceHandler {\n  template <typename T>\n  static constexpr char TypeChar() {\n    return (std::is_floating_point_v<T>\n                ? 'f'\n                : (std::is_integral_v<T> ? (std::is_signed_v<T> ? 'i' : 'u') : '\\0'));\n  }\n};\n\ntemplate <size_t dim, typename S, typename Head, size_t D>\nconstexpr size_t Offset(S (&strides)[D], size_t n, Head head) {\n  static_assert(dim < D);\n  return n + head * strides[dim];\n}\n\ntemplate <size_t dim, typename S, size_t D, typename Head, typename... Tail>\nconstexpr std::enable_if_t<sizeof...(Tail) != 0, size_t> Offset(S (&strides)[D], size_t n,\n                                                                Head head, Tail &&...rest) {\n  static_assert(dim < D);\n  return Offset<dim + 1>(strides, n + (head * strides[dim]), std::forward<Tail>(rest)...);\n}\n\ntemplate <int32_t D, bool f_array = false>\nconstexpr void CalcStride(size_t const (&shape)[D], size_t (&stride)[D]) {\n  if (f_array) {\n    stride[0] = 1;\n    for (int32_t s = 1; s < D; ++s) {\n      stride[s] = shape[s - 1] * stride[s - 1];\n    }\n  } else {\n    stride[D - 1] = 1;\n    for (int32_t s = D - 2; s >= 0; --s) {\n      stride[s] = shape[s + 1] * stride[s + 1];\n    }\n  }\n}\n\nstruct AllTag {};\n\nstruct IntTag {};\n\ntemplate <typename I>\nstruct RangeTag {\n  I beg;\n  I end;\n  [[nodiscard]] constexpr size_t Size() const { return end - beg; }\n};\n\n/**\n * \\brief Calculate the dimension of sliced tensor.\n */\ntemplate <typename T>\nconstexpr int32_t CalcSliceDim() {\n  return std::is_same_v<T, IntTag> ? 0 : 1;\n}\n\ntemplate <typename T, typename... S>\nconstexpr std::enable_if_t<sizeof...(S) != 0, int32_t> CalcSliceDim() {\n  return CalcSliceDim<T>() + CalcSliceDim<S...>();\n}\n\ntemplate <int32_t D>\nconstexpr size_t CalcSize(size_t (&shape)[D]) {\n  size_t size = 1;\n  for (auto d : shape) {\n    size *= d;\n  }\n  return size;\n}\n\ntemplate <typename S>\nusing RemoveCRType = std::remove_const_t<std::remove_reference_t<S>>;\n\ntemplate <typename S>\nusing IndexToTag = std::conditional_t<std::is_integral_v<RemoveCRType<S>>, IntTag, S>;\n\ntemplate <int32_t n, typename Fn>\nLINALG_HD constexpr auto UnrollLoop(Fn fn) {\n#if defined __CUDA_ARCH__\n#pragma unroll n\n#endif  // defined __CUDA_ARCH__\n  for (int32_t i = 0; i < n; ++i) {\n    fn(i);\n  }\n}\n\ntemplate <typename T>\nint32_t NativePopc(T v) {\n  int c = 0;\n  for (; v != 0; v &= v - 1) c++;\n  return c;\n}\n\ninline LINALG_HD int Popc(uint32_t v) {\n#if defined(__CUDA_ARCH__)\n  return __popc(v);\n#elif defined(__GNUC__) || defined(__clang__)\n  return __builtin_popcount(v);\n#elif defined(_MSC_VER)\n  return __popcnt(v);\n#else\n  return NativePopc(v);\n#endif  // compiler\n}\n\ninline LINALG_HD int Popc(uint64_t v) {\n#if defined(__CUDA_ARCH__)\n  return __popcll(v);\n#elif defined(__GNUC__) || defined(__clang__)\n  return __builtin_popcountll(v);\n#elif defined(_MSC_VER) && defined(_M_X64)\n  return __popcnt64(v);\n#else\n  return NativePopc(v);\n#endif  // compiler\n}\n\ntemplate <std::size_t D, typename Head>\nLINALG_HD void IndexToArr(std::size_t (&arr)[D], Head head) {\n  static_assert(std::is_integral_v<std::remove_reference_t<Head>>, \"Invalid index type.\");\n  arr[D - 1] = head;\n}\n\n/**\n * \\brief Convert index from parameter pack to C-style array.\n */\ntemplate <std::size_t D, typename Head, typename... Rest>\nLINALG_HD void IndexToArr(std::size_t (&arr)[D], Head head, Rest &&...index) {\n  static_assert(sizeof...(Rest) < D, \"Index overflow.\");\n  static_assert(std::is_integral_v<std::remove_reference_t<Head>>, \"Invalid index type.\");\n  arr[D - sizeof...(Rest) - 1] = head;\n  IndexToArr(arr, std::forward<Rest>(index)...);\n}\n\ntemplate <class T, std::size_t N, std::size_t... Idx>\nconstexpr auto ArrToTuple(T (&arr)[N], std::index_sequence<Idx...>) {\n  return std::make_tuple(arr[Idx]...);\n}\n\n/**\n * \\brief Convert C-styple array to std::tuple.\n */\ntemplate <class T, std::size_t N>\nconstexpr auto ArrToTuple(T (&arr)[N]) {\n  return ArrToTuple(arr, std::make_index_sequence<N>{});\n}\n\n// uint division optimization inspired by the CIndexer in cupy.  Division operation is\n// slow on both CPU and GPU, especially 64 bit integer.  So here we first try to avoid 64\n// bit when the index is smaller, then try to avoid division when it's exp of 2.\ntemplate <typename I, std::int32_t D>\nLINALG_HD auto UnravelImpl(I idx, common::Span<size_t const, D> shape) {\n  std::size_t index[D]{0};\n  static_assert(std::is_signed_v<decltype(D)>,\n                \"Don't change the type without changing the for loop.\");\n  auto const sptr = shape.data();\n  for (int32_t dim = D; --dim > 0;) {\n    auto s = static_cast<std::remove_const_t<std::remove_reference_t<I>>>(sptr[dim]);\n    if (s & (s - 1)) {\n      auto t = idx / s;\n      index[dim] = idx - t * s;\n      idx = t;\n    } else {  // exp of 2\n      index[dim] = idx & (s - 1);\n      idx >>= Popc(s - 1);\n    }\n  }\n  index[0] = idx;\n  return ArrToTuple(index);\n}\n\ntemplate <size_t dim, typename I, int32_t D>\nvoid ReshapeImpl(size_t (&out_shape)[D], I s) {\n  static_assert(dim < D);\n  out_shape[dim] = s;\n}\n\ntemplate <size_t dim, int32_t D, typename... S, typename I,\n          std::enable_if_t<sizeof...(S) != 0> * = nullptr>\nvoid ReshapeImpl(size_t (&out_shape)[D], I &&s, S &&...rest) {\n  static_assert(dim < D);\n  out_shape[dim] = s;\n  ReshapeImpl<dim + 1>(out_shape, std::forward<S>(rest)...);\n}\n\n/**\n * C++ 17 conjunction\n */\ntemplate <class...>\nstruct Conjunction : std::true_type {};\ntemplate <class B1>\nstruct Conjunction<B1> : B1 {};\ntemplate <class B1, class... Bn>\nstruct Conjunction<B1, Bn...>\n    : std::conditional_t<static_cast<bool>(B1::value), Conjunction<Bn...>, B1> {};\n\ntemplate <typename... Index>\nusing IsAllIntegral = Conjunction<std::is_integral<std::remove_reference_t<Index>>...>;\n\ntemplate <typename... Index>\nusing EnableIfIntegral = std::enable_if_t<IsAllIntegral<Index...>::value>;\n}  // namespace detail\n\n/**\n * \\brief Specify all elements in the axis for slicing.\n */\nconstexpr detail::AllTag All() { return {}; }\n/**\n * \\brief Specify a range of elements in the axis for slicing.\n */\ntemplate <typename I>\nconstexpr detail::RangeTag<I> Range(I beg, I end) {\n  return {beg, end};\n}\n\nenum Order : std::uint8_t {\n  kC,  // Row major\n  kF,  // Col major\n};\n\n/**\n * @brief A tensor view with static type and dimension. It implements indexing and slicing.\n *\n * Most of the algorithms in XGBoost are implemented for both CPU and GPU without using\n * much linear algebra routines, this class is a helper intended to ease some high level\n * operations like indexing into prediction tensor or gradient matrix.  It can be passed\n * into CUDA kernel as normal argument for GPU algorithms.\n *\n * Ideally we should add a template parameter `bool on_host` so that the compiler can\n * prevent passing/accessing the wrong view, but inheritance is heavily used in XGBoost so\n * some functions expect data types that can be used in everywhere (update prediction\n * cache for example).\n */\ntemplate <typename T, std::int32_t kDim>\nclass TensorView {\n public:\n  using ShapeT = std::size_t[kDim];\n  using StrideT = ShapeT;\n  using SizeType = std::size_t;\n\n  using element_type = T;                  // NOLINT\n  using value_type = std::remove_cv_t<T>;  // NOLINT\n\n private:\n  StrideT stride_{1};\n  ShapeT shape_{0};\n  common::Span<T> data_;\n  T *ptr_{nullptr};  // pointer of data_ to avoid bound check.\n\n  SizeType size_{0};\n  DeviceOrd device_;\n\n  // Unlike `Tensor`, the data_ can have arbitrary size since this is just a view.\n  LINALG_HD void CalcSize() {\n    if (data_.empty()) {\n      size_ = 0;\n    } else {\n      size_ = detail::CalcSize(shape_);\n    }\n  }\n\n  template <size_t old_dim, size_t new_dim, std::int32_t D, typename I>\n  LINALG_HD SizeType MakeSliceDim(std::size_t new_shape[D], std::size_t new_stride[D],\n                                  detail::RangeTag<I> &&range) const {\n    static_assert(new_dim < D);\n    static_assert(old_dim < kDim);\n    new_stride[new_dim] = stride_[old_dim];\n    new_shape[new_dim] = range.Size();\n    assert(static_cast<decltype(shape_[old_dim])>(range.end) <= shape_[old_dim]);\n\n    auto offset = stride_[old_dim] * range.beg;\n    return offset;\n  }\n  /**\n   * \\brief Slice dimension for Range tag.\n   */\n  template <size_t old_dim, size_t new_dim, int32_t D, typename I, typename... S>\n  LINALG_HD SizeType MakeSliceDim(size_t new_shape[D], size_t new_stride[D],\n                                  detail::RangeTag<I> &&range, S &&...slices) const {\n    static_assert(new_dim < D);\n    static_assert(old_dim < kDim);\n    new_stride[new_dim] = stride_[old_dim];\n    new_shape[new_dim] = range.Size();\n    assert(static_cast<decltype(shape_[old_dim])>(range.end) <= shape_[old_dim]);\n\n    auto offset = stride_[old_dim] * range.beg;\n    return MakeSliceDim<old_dim + 1, new_dim + 1, D>(new_shape, new_stride,\n                                                     std::forward<S>(slices)...) +\n           offset;\n  }\n\n  template <size_t old_dim, size_t new_dim, int32_t D>\n  LINALG_HD SizeType MakeSliceDim(size_t new_shape[D], size_t new_stride[D], detail::AllTag) const {\n    static_assert(new_dim < D);\n    static_assert(old_dim < kDim);\n    new_stride[new_dim] = stride_[old_dim];\n    new_shape[new_dim] = shape_[old_dim];\n    return 0;\n  }\n  /**\n   * \\brief Slice dimension for All tag.\n   */\n  template <size_t old_dim, size_t new_dim, int32_t D, typename... S>\n  LINALG_HD SizeType MakeSliceDim(size_t new_shape[D], size_t new_stride[D], detail::AllTag,\n                                  S &&...slices) const {\n    static_assert(new_dim < D);\n    static_assert(old_dim < kDim);\n    new_stride[new_dim] = stride_[old_dim];\n    new_shape[new_dim] = shape_[old_dim];\n    return MakeSliceDim<old_dim + 1, new_dim + 1, D>(new_shape, new_stride,\n                                                     std::forward<S>(slices)...);\n  }\n\n  template <size_t old_dim, size_t new_dim, int32_t D, typename Index>\n  LINALG_HD SizeType MakeSliceDim([[maybe_unused]] size_t new_shape[D],\n                                  [[maybe_unused]] size_t new_stride[D], Index i) const {\n    static_assert(old_dim < kDim);\n    return stride_[old_dim] * i;\n  }\n  /**\n   * \\brief Slice dimension for Index tag.\n   */\n  template <size_t old_dim, size_t new_dim, int32_t D, typename Index, typename... S>\n  LINALG_HD std::enable_if_t<std::is_integral_v<Index>, size_t> MakeSliceDim(size_t new_shape[D],\n                                                                             size_t new_stride[D],\n                                                                             Index i,\n                                                                             S &&...slices) const {\n    static_assert(old_dim < kDim);\n    auto offset = stride_[old_dim] * i;\n    auto res =\n        MakeSliceDim<old_dim + 1, new_dim, D>(new_shape, new_stride, std::forward<S>(slices)...);\n    return res + offset;\n  }\n\n public:\n  size_t constexpr static kValueSize = sizeof(T);\n  size_t constexpr static kDimension = kDim;\n\n public:\n  /**\n   * \\brief Create a tensor with data and shape.\n   *\n   * \\tparam I     Type of the shape array element.\n   * \\tparam D     Size of the shape array, can be lesser than or equal to tensor dimension.\n   *\n   * \\param data   Raw data input, can be const if this tensor has const type in its\n   *               template parameter.\n   * \\param shape  shape of the tensor\n   * \\param device Device ordinal\n   */\n  template <typename I, std::int32_t D>\n  LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], DeviceOrd device)\n      : TensorView{data, shape, device, Order::kC} {}\n\n  template <typename I, int32_t D>\n  LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], DeviceOrd device, Order order)\n      : data_{data}, ptr_{data_.data()}, device_{device} {\n    static_assert(D > 0 && D <= kDim, \"Invalid shape.\");\n    // shape\n    detail::UnrollLoop<D>([&](auto i) { shape_[i] = shape[i]; });\n    for (auto i = D; i < kDim; ++i) {\n      shape_[i] = 1;\n    }\n    // stride\n    switch (order) {\n      case Order::kC: {\n        detail::CalcStride(shape_, stride_);\n        break;\n      }\n      case Order::kF: {\n        detail::CalcStride<kDim, true>(shape_, stride_);\n        break;\n      }\n      default: {\n        SPAN_CHECK(false);\n      }\n    }\n    // size\n    this->CalcSize();\n  }\n\n  /**\n   * \\brief Create a tensor with data, shape and strides.  Don't use this constructor if\n   *        stride can be calculated from shape.\n   */\n  template <typename I, std::int32_t D>\n  LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], I const (&stride)[D],\n                       DeviceOrd device)\n      : data_{data}, ptr_{data_.data()}, device_{device} {\n    static_assert(D == kDim, \"Invalid shape & stride.\");\n    detail::UnrollLoop<D>([&](auto i) {\n      shape_[i] = shape[i];\n      stride_[i] = stride[i];\n    });\n    this->CalcSize();\n  }\n\n  template <\n      typename U,\n      std::enable_if_t<common::detail::IsAllowedElementTypeConversion<U, T>::value> * = nullptr>\n  LINALG_HD TensorView(TensorView<U, kDim> const &that)  // NOLINT\n      : data_{that.Values()}, ptr_{data_.data()}, size_{that.Size()}, device_{that.Device()} {\n    detail::UnrollLoop<kDim>([&](auto i) {\n      stride_[i] = that.Stride(i);\n      shape_[i] = that.Shape(i);\n    });\n  }\n\n  /**\n   * \\brief Index the tensor to obtain a scalar value.\n   *\n   * \\code\n   *\n   *   // Create a 3-dim tensor.\n   *   Tensor<float, 3> t {data, shape, 0};\n   *   float pi = 3.14159;\n   *   t(1, 2, 3) = pi;\n   *   ASSERT_EQ(t(1, 2, 3), pi);\n   *\n   * \\endcode\n   */\n  template <typename... Index, detail::EnableIfIntegral<Index...> * = nullptr>\n  LINALG_HD T &operator()(Index &&...index) {\n    static_assert(sizeof...(index) <= kDim, \"Invalid index.\");\n    size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward<Index>(index)...);\n    assert(offset < data_.size() && \"Out of bound access.\");\n    return ptr_[offset];\n  }\n  /**\n   * \\brief Index the tensor to obtain a scalar value.\n   */\n  template <typename... Index, detail::EnableIfIntegral<Index...> * = nullptr>\n  LINALG_HD T const &operator()(Index &&...index) const {\n    static_assert(sizeof...(index) <= kDim, \"Invalid index.\");\n    size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward<Index>(index)...);\n    assert(offset < data_.size() && \"Out of bound access.\");\n    return ptr_[offset];\n  }\n\n  /**\n   * \\brief Slice the tensor.  The returned tensor has inferred dim and shape.  Scalar\n   *        result is not supported.\n   *\n   * \\code\n   *\n   *   // Create a 3-dim tensor.\n   *   Tensor<float, 3> t {data, shape, 0};\n   *   // s has 2 dimensions (matrix)\n   *   auto s = t.Slice(1, All(), All());\n   *\n   * \\endcode\n   */\n  template <typename... S>\n  LINALG_HD auto Slice(S &&...slices) const {\n    static_assert(sizeof...(slices) <= kDim, \"Invalid slice.\");\n    int32_t constexpr kNewDim{detail::CalcSliceDim<detail::IndexToTag<S>...>()};\n    size_t new_shape[kNewDim];\n    size_t new_stride[kNewDim];\n    auto offset = MakeSliceDim<0, 0, kNewDim>(new_shape, new_stride, std::forward<S>(slices)...);\n    // ret is a different type due to changed dimension, so we can not access its private\n    // fields.\n    TensorView<T, kNewDim> ret{data_.subspan(data_.empty() ? 0 : offset), new_shape, new_stride,\n                               device_};\n    return ret;\n  }\n\n  LINALG_HD auto Shape() const { return common::Span<size_t const, kDim>{shape_}; }\n  /**\n   * Get the shape for i^th dimension\n   */\n  LINALG_HD auto Shape(size_t i) const { return shape_[i]; }\n  LINALG_HD auto Stride() const { return common::Span<size_t const, kDim>{stride_}; }\n  /**\n   * Get the stride for i^th dimension, stride is specified as number of items instead of bytes.\n   */\n  LINALG_HD auto Stride(size_t i) const { return stride_[i]; }\n\n  /**\n   * @brief Number of items in the tensor.\n   */\n  [[nodiscard]] LINALG_HD std::size_t Size() const { return size_; }\n  [[nodiscard]] bool Empty() const { return Size() == 0; }\n  /**\n   * \\brief Whether this is a contiguous array, both C and F contiguous returns true.\n   */\n  [[nodiscard]] LINALG_HD bool Contiguous() const {\n    return data_.size() == this->Size() || this->CContiguous() || this->FContiguous();\n  }\n  /**\n   * \\brief Whether it's a c-contiguous array.\n   */\n  [[nodiscard]] LINALG_HD bool CContiguous() const {\n    StrideT stride;\n    static_assert(std::is_same_v<decltype(stride), decltype(stride_)>);\n    // It's contiguous if the stride can be calculated from shape.\n    detail::CalcStride(shape_, stride);\n    return common::Span<size_t const, kDim>{stride_} == common::Span<size_t const, kDim>{stride};\n  }\n  /**\n   * \\brief Whether it's a f-contiguous array.\n   */\n  [[nodiscard]] LINALG_HD bool FContiguous() const {\n    StrideT stride;\n    static_assert(std::is_same_v<decltype(stride), decltype(stride_)>);\n    // It's contiguous if the stride can be calculated from shape.\n    detail::CalcStride<kDim, true>(shape_, stride);\n    return common::Span<size_t const, kDim>{stride_} == common::Span<size_t const, kDim>{stride};\n  }\n  /**\n   * \\brief Obtain a reference to the raw data.\n   */\n  LINALG_HD auto Values() const -> decltype(data_) const & { return data_; }\n  /**\n   * \\brief Obtain the CUDA device ordinal.\n   */\n  LINALG_HD auto Device() const { return device_; }\n};\n\n/**\n * \\brief Constructor for automatic type deduction.\n */\ntemplate <typename Container, typename... S,\n          std::enable_if_t<!common::detail::IsSpan<Container>::value &&\n                           !std::is_pointer_v<Container>> * = nullptr>\nauto MakeTensorView(Context const *ctx, Container &data, S &&...shape) {  // NOLINT\n  using T = std::conditional_t<std::is_const_v<Container>,\n                               std::add_const_t<typename Container::value_type>,\n                               typename Container::value_type>;\n  std::size_t in_shape[sizeof...(S)];\n  detail::IndexToArr(in_shape, std::forward<S>(shape)...);\n  return TensorView<T, sizeof...(S)>{data, in_shape, ctx->Device()};\n}\n\ntemplate <typename T, decltype(common::dynamic_extent) ext, typename... S>\nLINALG_HD auto MakeTensorView(DeviceOrd device, common::Span<T, ext> data, S &&...shape) {\n  std::size_t in_shape[sizeof...(S)];\n  detail::IndexToArr(in_shape, std::forward<S>(shape)...);\n  return TensorView<T, sizeof...(S)>{data, in_shape, device};\n}\n\ntemplate <typename T, decltype(common::dynamic_extent) ext, typename... S>\nauto MakeTensorView(Context const *ctx, common::Span<T, ext> data, S &&...shape) {\n  return MakeTensorView(ctx->Device(), data, std::forward<S>(shape)...);\n}\n\ntemplate <typename T, decltype(common::dynamic_extent) ext, typename... S>\nauto MakeTensorView(Context const *ctx, Order order, common::Span<T, ext> data, S &&...shape) {\n  std::size_t in_shape[sizeof...(S)];\n  detail::IndexToArr(in_shape, std::forward<S>(shape)...);\n  return TensorView<T, sizeof...(S)>{data, in_shape, ctx->Device(), order};\n}\n\ntemplate <typename T, typename... S>\nauto MakeTensorView(Context const *ctx, HostDeviceVector<T> *data, S &&...shape) {\n  auto span = ctx->IsCPU() ? data->HostSpan() : data->DeviceSpan();\n  return MakeTensorView(ctx->Device(), span, std::forward<S>(shape)...);\n}\n\ntemplate <typename T, typename... S>\nauto MakeTensorView(Context const *ctx, HostDeviceVector<T> const *data, S &&...shape) {\n  auto span = ctx->IsCPU() ? data->ConstHostSpan() : data->ConstDeviceSpan();\n  return MakeTensorView(ctx->Device(), span, std::forward<S>(shape)...);\n}\n\n/**\n * \\brief Turns linear index into multi-dimension index.  Similar to numpy unravel.\n */\ntemplate <size_t D>\nLINALG_HD auto UnravelIndex(size_t idx, common::Span<size_t const, D> shape) {\n  if (idx > std::numeric_limits<uint32_t>::max()) {\n    return detail::UnravelImpl<uint64_t, D>(static_cast<uint64_t>(idx), shape);\n  } else {\n    return detail::UnravelImpl<uint32_t, D>(static_cast<uint32_t>(idx), shape);\n  }\n}\n\ntemplate <size_t D>\nLINALG_HD auto UnravelIndex(size_t idx, std::size_t const (&shape)[D]) {\n  return UnravelIndex(idx, common::Span<std::size_t const, D>(shape));\n}\n\ntemplate <typename... S>\nLINALG_HD auto UnravelIndex(std::size_t idx, S... shape) {\n  std::size_t s[sizeof...(S)];\n  detail::IndexToArr(s, shape...);\n  return UnravelIndex(idx, common::Span<std::size_t const, sizeof...(S)>(s));\n}\n\n/**\n * \\brief A view over a vector, specialization of Tensor\n *\n * \\tparam T data type of vector\n */\ntemplate <typename T>\nusing VectorView = TensorView<T, 1>;\n\n/**\n * \\brief Create a vector view from contigious memory.\n *\n * \\param ptr Pointer to the contigious memory.\n * \\param s   Size of the vector.\n * \\param device (optional) Device ordinal, default to be host.\n */\ntemplate <typename T>\nauto MakeVec(T *ptr, size_t s, DeviceOrd device = DeviceOrd::CPU()) {\n  return linalg::TensorView<T, 1>{{ptr, s}, {s}, device};\n}\n\ntemplate <typename T>\nauto MakeVec(DeviceOrd device, common::Span<T> s) {\n  return linalg::TensorView<T, 1>{s, {s.size()}, device};\n}\n\ntemplate <typename T>\nauto MakeVec(std::vector<T> const &v) {\n  return linalg::TensorView<std::add_const_t<T>, 1>{\n      {v.data(), v.size()}, {v.size()}, DeviceOrd::CPU()};\n}\n\ntemplate <typename T>\nauto MakeVec(HostDeviceVector<T> *data) {\n  return MakeVec(data->Device().IsCPU() ? data->HostPointer() : data->DevicePointer(), data->Size(),\n                 data->Device());\n}\n\ntemplate <typename T>\nauto MakeVec(HostDeviceVector<T> const *data) {\n  return MakeVec(data->Device().IsCPU() ? data->ConstHostPointer() : data->ConstDevicePointer(),\n                 data->Size(), data->Device());\n}\n\n/**\n * \\brief A view over a matrix, specialization of Tensor.\n *\n * \\tparam T data type of matrix\n */\ntemplate <typename T>\nusing MatrixView = TensorView<T, 2>;\n\n/**\n * \\brief Array Interface defined by\n * <a href=\"https://numpy.org/doc/stable/reference/arrays.interface.html\">numpy</a>.\n *\n * `stream` is optionally included when data is on CUDA device.\n */\ntemplate <typename T, std::int32_t D>\nJson ArrayInterface(TensorView<T const, D> const &t) {\n  Json array_interface{Object{}};\n  array_interface[\"data\"] = std::vector<Json>(2);\n  array_interface[\"data\"][0] = Integer{reinterpret_cast<int64_t>(t.Values().data())};\n  array_interface[\"data\"][1] = Boolean{true};\n  if (t.Device().IsCUDA()) {\n    // Change this once we have different CUDA stream.\n    array_interface[\"stream\"] = Integer{2};\n  }\n  std::vector<Json> shape(t.Shape().size());\n  std::vector<Json> stride(t.Stride().size());\n  for (size_t i = 0; i < t.Shape().size(); ++i) {\n    shape[i] = Integer(t.Shape(i));\n    stride[i] = Integer(t.Stride(i) * sizeof(T));\n  }\n  array_interface[\"shape\"] = Array{shape};\n  array_interface[\"strides\"] = Array{stride};\n  array_interface[\"version\"] = 3;\n\n  char constexpr kT = detail::ArrayInterfaceHandler::TypeChar<T>();\n  static_assert(kT != '\\0');\n  if (DMLC_LITTLE_ENDIAN) {\n    array_interface[\"typestr\"] = String{\"<\" + (kT + std::to_string(sizeof(T)))};\n  } else {\n    array_interface[\"typestr\"] = String{\">\" + (kT + std::to_string(sizeof(T)))};\n  }\n  return array_interface;\n}\n\n/**\n * \\brief Same as const version, but returns non-readonly data pointer.\n */\ntemplate <typename T, int32_t D>\nJson ArrayInterface(TensorView<T, D> const &t) {\n  TensorView<T const, D> const &as_const = t;\n  auto res = ArrayInterface(as_const);\n  res[\"data\"][1] = Boolean{false};\n  return res;\n}\n\n/**\n * \\brief Return string representation of array interface.\n */\ntemplate <typename T, int32_t D>\nauto ArrayInterfaceStr(TensorView<T const, D> const &t) {\n  std::string str;\n  Json::Dump(ArrayInterface(t), &str);\n  return str;\n}\n\ntemplate <typename T, int32_t D>\nauto ArrayInterfaceStr(TensorView<T, D> const &t) {\n  std::string str;\n  Json::Dump(ArrayInterface(t), &str);\n  return str;\n}\n\ntemplate <typename T>\nauto Make1dInterface(T const *vec, std::size_t len) {\n  Context ctx;\n  auto t = linalg::MakeTensorView(&ctx, common::Span{vec, len}, len);\n  auto str = linalg::ArrayInterfaceStr(t);\n  return str;\n}\n\n/**\n * \\brief A tensor storage. To use it for other functionality like slicing one needs to\n *        obtain a view first.  This way we can use it on both host and device.\n */\ntemplate <typename T, int32_t kDim = 5>\nclass Tensor {\n public:\n  using ShapeT = std::size_t[kDim];\n  using StrideT = ShapeT;\n\n private:\n  HostDeviceVector<T> data_;\n  ShapeT shape_{0};\n  Order order_{Order::kC};\n\n  template <typename I, std::int32_t D>\n  void Initialize(I const (&shape)[D], DeviceOrd device) {\n    static_assert(D <= kDim, \"Invalid shape.\");\n    std::copy(shape, shape + D, shape_);\n    for (auto i = D; i < kDim; ++i) {\n      shape_[i] = 1;\n    }\n    if (!device.IsCPU()) {\n      data_.SetDevice(device);\n      data_.ConstDevicePointer();  // Pull to device;\n    }\n    CHECK_EQ(data_.Size(), detail::CalcSize(shape_));\n  }\n\n public:\n  Tensor() = default;\n\n  /**\n   * \\brief Create a tensor with shape and device ordinal.  The storage is initialized\n   *        automatically.\n   *\n   * See \\ref TensorView for parameters of this constructor.\n   */\n  template <typename I, int32_t D>\n  explicit Tensor(I const (&shape)[D], DeviceOrd device, Order order = kC)\n      : Tensor{common::Span<I const, D>{shape}, device, order} {}\n\n  template <typename I, size_t D>\n  explicit Tensor(common::Span<I const, D> shape, DeviceOrd device, Order order = kC)\n      : order_{order} {\n    // No device unroll as this is a host only function.\n    std::copy(shape.data(), shape.data() + D, shape_);\n    for (auto i = D; i < kDim; ++i) {\n      shape_[i] = 1;\n    }\n    auto size = detail::CalcSize(shape_);\n    if (!device.IsCPU()) {\n      data_.SetDevice(device);\n    }\n    data_.Resize(size);\n    if (!device.IsCPU()) {\n      data_.DevicePointer();  // Pull to device\n    }\n  }\n  /**\n   * Initialize from 2 host iterators.\n   */\n  template <typename It, typename I, int32_t D>\n  explicit Tensor(It begin, It end, I const (&shape)[D], DeviceOrd device, Order order = kC)\n      : order_{order} {\n    auto &h_vec = data_.HostVector();\n    h_vec.insert(h_vec.begin(), begin, end);\n    // shape\n    this->Initialize(shape, device);\n  }\n\n  template <typename I, int32_t D>\n  explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], DeviceOrd device,\n                  Order order = kC)\n      : order_{order} {\n    auto &h_vec = data_.HostVector();\n    h_vec = data;\n    // shape\n    this->Initialize(shape, device);\n  }\n  /**\n   * \\brief Index operator. Not thread safe, should not be used in performance critical\n   *        region. For more efficient indexing, consider getting a view first.\n   */\n  template <typename... Index>\n  T &operator()(Index &&...idx) {\n    return this->HostView()(std::forward<Index>(idx)...);\n  }\n  /**\n   * \\brief Index operator. Not thread safe, should not be used in performance critical\n   *        region. For more efficient indexing, consider getting a view first.\n   */\n  template <typename... Index>\n  T const &operator()(Index &&...idx) const {\n    return this->HostView()(std::forward<Index>(idx)...);\n  }\n\n  /**\n   * @brief Get a @ref TensorView for this tensor.\n   */\n  auto View(DeviceOrd device) {\n    if (device.IsCPU()) {\n      auto span = data_.HostSpan();\n      return TensorView<T, kDim>{span, shape_, device, order_};\n    } else {\n      data_.SetDevice(device);\n      auto span = data_.DeviceSpan();\n      return TensorView<T, kDim>{span, shape_, device, order_};\n    }\n  }\n  auto View(DeviceOrd device) const {\n    if (device.IsCPU()) {\n      auto span = data_.ConstHostSpan();\n      return TensorView<T const, kDim>{span, shape_, device, order_};\n    } else {\n      data_.SetDevice(device);\n      auto span = data_.ConstDeviceSpan();\n      return TensorView<T const, kDim>{span, shape_, device, order_};\n    }\n  }\n\n  auto HostView() { return this->View(DeviceOrd::CPU()); }\n  auto HostView() const { return this->View(DeviceOrd::CPU()); }\n\n  [[nodiscard]] std::size_t Size() const { return data_.Size(); }\n  [[nodiscard]] bool Empty() const { return Size() == 0; }\n\n  auto Shape() const { return common::Span<size_t const, kDim>{shape_}; }\n  auto Shape(size_t i) const { return shape_[i]; }\n\n  HostDeviceVector<T> *Data() { return &data_; }\n  HostDeviceVector<T> const *Data() const { return &data_; }\n\n  /**\n   * \\brief Visitor function for modification that changes shape and data.\n   *\n   * \\tparam Fn function that takes a pointer to `HostDeviceVector` and a static sized\n   *         span as parameters.\n   */\n  template <typename Fn>\n  void ModifyInplace(Fn &&fn) {\n    fn(this->Data(), common::Span<size_t, kDim>{this->shape_});\n    CHECK_EQ(this->Data()->Size(), detail::CalcSize(this->shape_))\n        << \"Inconsistent size after modification.\";\n  }\n\n  /**\n   * \\brief Reshape the tensor.\n   *\n   *    If the total size is changed, then data in this tensor is no longer valid.\n   */\n  template <typename... S, detail::EnableIfIntegral<S...> * = nullptr>\n  void Reshape(S &&...s) {\n    static_assert(sizeof...(S) <= kDim, \"Invalid shape.\");\n    detail::ReshapeImpl<0>(shape_, std::forward<S>(s)...);\n    auto constexpr kEnd = sizeof...(S);\n    static_assert(kEnd <= kDim, \"Invalid shape.\");\n    std::fill(shape_ + kEnd, shape_ + kDim, 1);\n    auto n = detail::CalcSize(shape_);\n    data_.Resize(n);\n  }\n\n  /**\n   * \\brief Reshape the tensor.\n   *\n   *    If the total size is changed, then data in this tensor is no longer valid.\n   */\n  template <size_t D>\n  void Reshape(common::Span<size_t const, D> shape) {\n    static_assert(D <= kDim, \"Invalid shape.\");\n    std::copy(shape.data(), shape.data() + D, this->shape_);\n    std::fill(shape_ + D, shape_ + kDim, 1);\n    auto n = detail::CalcSize(shape_);\n    data_.Resize(n);\n  }\n\n  template <size_t D>\n  void Reshape(size_t (&shape)[D]) {\n    this->Reshape(common::Span<size_t const, D>{shape});\n  }\n  /**\n   * \\brief Get a host view on the slice.\n   */\n  template <typename... S>\n  auto Slice(S &&...slices) const {\n    return this->HostView().Slice(std::forward<S>(slices)...);\n  }\n  /**\n   * \\brief Get a host view on the slice.\n   */\n  template <typename... S>\n  auto Slice(S &&...slices) {\n    return this->HostView().Slice(std::forward<S>(slices)...);\n  }\n\n  /**\n   * \\brief Set device ordinal for this tensor.\n   */\n  void SetDevice(DeviceOrd device) const { data_.SetDevice(device); }\n  [[nodiscard]] DeviceOrd Device() const { return data_.Device(); }\n};\n\ntemplate <typename T>\nusing Matrix = Tensor<T, 2>;\n\ntemplate <typename T>\nusing Vector = Tensor<T, 1>;\n\n/**\n * @brief Create an array without initialization.\n */\ntemplate <typename T, typename... Index>\nauto Empty(Context const *ctx, Index &&...index) {\n  Tensor<T, sizeof...(Index)> t;\n  t.SetDevice(ctx->Device());\n  t.Reshape(index...);\n  return t;\n}\n\n/**\n * @brief Create an array with the same shape and dtype as the input.\n */\ntemplate <typename T, std::int32_t kDim>\nauto EmptyLike(Context const *ctx, Tensor<T, kDim> const &in) {\n  Tensor<T, kDim> t;\n  t.SetDevice(ctx->Device());\n  t.Reshape(in.Shape());\n  return t;\n}\n\n/**\n * @brief Create an array with value v.\n */\ntemplate <typename T, typename... Index>\nauto Constant(Context const *ctx, T v, Index &&...index) {\n  Tensor<T, sizeof...(Index)> t;\n  t.SetDevice(ctx->Device());\n  t.Reshape(index...);\n  t.Data()->Fill(std::move(v));\n  return t;\n}\n\n/**\n * @brief Like `np.zeros`, return a new array of given shape and type, filled with zeros.\n */\ntemplate <typename T, typename... Index>\nauto Zeros(Context const *ctx, Index &&...index) {\n  return Constant(ctx, static_cast<T>(0), index...);\n}\n\n// Only first axis is supported for now.\ntemplate <typename T, int32_t D>\nvoid Stack(Tensor<T, D> *l, Tensor<T, D> const &r) {\n  if (r.Device().IsCUDA()) {\n    l->SetDevice(r.Device());\n  }\n  l->ModifyInplace([&](HostDeviceVector<T> *data, common::Span<size_t, D> shape) {\n    for (size_t i = 1; i < D; ++i) {\n      if (shape[i] == 0) {\n        shape[i] = r.Shape(i);\n      } else {\n        CHECK_EQ(shape[i], r.Shape(i));\n      }\n    }\n    data->Extend(*r.Data());\n    shape[0] = l->Shape(0) + r.Shape(0);\n  });\n}\n\n/**\n * @brief Push an extra dim to the end.\n */\ntemplate <typename T>\nMatrixView<T> ExpandDim(VectorView<T> x) {\n  std::size_t shape[2]{x.Shape(0), 1};\n  std::size_t stride[2]{x.Stride(0), 1};\n  return MatrixView<T>{x.Values(), shape, stride, x.Device()};\n}\n}  // namespace xgboost::linalg\n\n#if defined(LINALG_HD)\n#undef LINALG_HD\n#endif  // defined(LINALG_HD)\n#endif  // XGBOOST_LINALG_H_\n"
  },
  {
    "path": "include/xgboost/linear_updater.h",
    "content": "/*\n * Copyright 2018 by Contributors\n */\n#pragma once\n\n#include <dmlc/registry.h>\n#include <xgboost/base.h>\n#include <xgboost/data.h>\n#include <xgboost/host_device_vector.h>\n#include <xgboost/model.h>\n\n#include <functional>\n#include <string>\n#include <utility>\n#include <vector>\n\n\nnamespace xgboost {\n\nclass Json;\nstruct Context;\n\nnamespace gbm {\nclass GBLinearModel;\n}  // namespace gbm\n\n/*!\n * \\brief interface of linear updater\n */\nclass LinearUpdater : public Configurable {\n protected:\n  Context const* ctx_;\n\n public:\n  /*! \\brief virtual destructor */\n  ~LinearUpdater() override = default;\n  /*!\n   * \\brief Initialize the updater with given arguments.\n   * \\param args arguments to the objective function.\n   */\n  virtual void Configure(\n      const std::vector<std::pair<std::string, std::string> >& args) = 0;\n\n  /**\n   * \\brief Updates linear model given gradients.\n   *\n   * \\param in_gpair            The gradient pair statistics of the data.\n   * \\param data                Input data matrix.\n   * \\param model               Model to be updated.\n   * \\param sum_instance_weight The sum instance weights, used to normalise l1/l2 penalty.\n   */\n  virtual void Update(linalg::Matrix<GradientPair>* in_gpair, DMatrix* data,\n                      gbm::GBLinearModel* model, double sum_instance_weight) = 0;\n\n  /*!\n   * \\brief Create a linear updater given name\n   * \\param name Name of the linear updater.\n   */\n  static LinearUpdater* Create(const std::string& name, Context const*);\n};\n\n/*!\n * \\brief Registry entry for linear updater.\n */\nstruct LinearUpdaterReg\n    : public dmlc::FunctionRegEntryBase<LinearUpdaterReg,\n                                        std::function<LinearUpdater*()> > {};\n\n/*!\n * \\brief Macro to register linear updater.\n */\n#define XGBOOST_REGISTER_LINEAR_UPDATER(UniqueId, Name)                        \\\n  static DMLC_ATTRIBUTE_UNUSED ::xgboost::LinearUpdaterReg&                    \\\n      __make_##LinearUpdaterReg##_##UniqueId##__ =                             \\\n          ::dmlc::Registry< ::xgboost::LinearUpdaterReg>::Get()->__REGISTER__( \\\n              Name)\n\n}  // namespace xgboost\n"
  },
  {
    "path": "include/xgboost/logging.h",
    "content": "/*!\n * Copyright (c) 2015-2019 by Contributors\n * \\file logging.h\n *\n * \\brief defines console logging options for xgboost.  Use to enforce unified print\n *  behavior.\n */\n#ifndef XGBOOST_LOGGING_H_\n#define XGBOOST_LOGGING_H_\n\n#include <dmlc/logging.h>\n#include <dmlc/thread_local.h>\n\n#include <xgboost/base.h>\n#include <xgboost/parameter.h>\n#include <xgboost/global_config.h>\n\n#include <sstream>\n#include <map>\n#include <string>\n#include <utility>\n#include <vector>\n\nnamespace xgboost {\n\nclass BaseLogger {\n public:\n  BaseLogger() {\n#if XGBOOST_LOG_WITH_TIME\n    log_stream_ << \"[\" << dmlc::DateLogger().HumanDate() << \"] \";\n#endif  // XGBOOST_LOG_WITH_TIME\n  }\n  std::ostream& stream() { return log_stream_; }  // NOLINT\n\n protected:\n  std::ostringstream log_stream_;\n};\n\nclass ConsoleLogger : public BaseLogger {\n public:\n  enum class LogVerbosity {\n    kSilent = 0,\n    kWarning = 1,\n    kInfo = 2,   // information may interests users.\n    kDebug = 3,  // information only interesting to developers.\n    kIgnore = 4  // ignore global setting\n  };\n  using LV = LogVerbosity;\n\n private:\n  LogVerbosity cur_verbosity_;\n\n public:\n  static void Configure(Args const& args);\n\n  static LogVerbosity GlobalVerbosity();\n  static LogVerbosity DefaultVerbosity();\n  static bool ShouldLog(LogVerbosity verbosity);\n\n  ConsoleLogger() = delete;\n  explicit ConsoleLogger(LogVerbosity cur_verb);\n  ConsoleLogger(const std::string& file, int line, LogVerbosity cur_verb);\n  ~ConsoleLogger();\n};\n\nclass TrackerLogger : public BaseLogger {\n public:\n  ~TrackerLogger();\n};\n\n// custom logging callback; disabled for R wrapper\n#if !defined(XGBOOST_STRICT_R_MODE) || XGBOOST_STRICT_R_MODE == 0\nclass LogCallbackRegistry {\n public:\n  using Callback = void (*)(const char*);\n  LogCallbackRegistry()\n    : log_callback_([] (const char* msg) { std::cerr << msg << std::endl; }) {}\n  inline void Register(Callback log_callback) {\n    this->log_callback_ = log_callback;\n  }\n  inline Callback Get() const {\n    return log_callback_;\n  }\n private:\n  Callback log_callback_;\n};\n#else\nclass LogCallbackRegistry {\n public:\n  using Callback = void (*)(const char*);\n  LogCallbackRegistry() {}\n  inline void Register(Callback) {}\n  inline Callback Get() const { return nullptr; }\n};\n#endif  // !defined(XGBOOST_STRICT_R_MODE) || XGBOOST_STRICT_R_MODE == 0\n\nusing LogCallbackRegistryStore = dmlc::ThreadLocalStore<LogCallbackRegistry>;\n\n// Redefines LOG_WARNING for controling verbosity\n#if defined(LOG_WARNING)\n#undef  LOG_WARNING\n#endif  // defined(LOG_WARNING)\n#define LOG_WARNING                                                            \\\n  if (::xgboost::ConsoleLogger::ShouldLog(                                     \\\n          ::xgboost::ConsoleLogger::LV::kWarning))                             \\\n  ::xgboost::ConsoleLogger(__FILE__, __LINE__,                                 \\\n                           ::xgboost::ConsoleLogger::LogVerbosity::kWarning)\n\n// Redefines LOG_INFO for controling verbosity\n#if defined(LOG_INFO)\n#undef  LOG_INFO\n#endif  // defined(LOG_INFO)\n#define LOG_INFO                                                               \\\n  if (::xgboost::ConsoleLogger::ShouldLog(                                     \\\n          ::xgboost::ConsoleLogger::LV::kInfo))                                \\\n  ::xgboost::ConsoleLogger(__FILE__, __LINE__,                                 \\\n                           ::xgboost::ConsoleLogger::LogVerbosity::kInfo)\n\n#if defined(LOG_DEBUG)\n#undef LOG_DEBUG\n#endif  // defined(LOG_DEBUG)\n#define LOG_DEBUG                                                              \\\n  if (::xgboost::ConsoleLogger::ShouldLog(                                     \\\n          ::xgboost::ConsoleLogger::LV::kDebug))                               \\\n  ::xgboost::ConsoleLogger(__FILE__, __LINE__,                                 \\\n                           ::xgboost::ConsoleLogger::LogVerbosity::kDebug)\n\n// redefines the logging macro if not existed\n#ifndef LOG\n#define LOG(severity) LOG_##severity.stream()\n#endif  // LOG\n\n// Enable LOG(CONSOLE) for print messages to console.\n#define LOG_CONSOLE ::xgboost::ConsoleLogger(           \\\n    ::xgboost::ConsoleLogger::LogVerbosity::kIgnore)\n// Enable LOG(TRACKER) for print messages to tracker\n#define LOG_TRACKER ::xgboost::TrackerLogger()\n\n#if defined(CHECK)\n#undef CHECK\n#define CHECK(cond)                                     \\\n  if (XGBOOST_EXPECT(!(cond), false))                   \\\n    dmlc::LogMessageFatal(__FILE__, __LINE__).stream()  \\\n        << \"Check failed: \" #cond << \": \"\n#endif  // defined(CHECK)\n\n}  // namespace xgboost.\n#endif  // XGBOOST_LOGGING_H_\n"
  },
  {
    "path": "include/xgboost/metric.h",
    "content": "/**\n * Copyright 2014-2023 by XGBoost Contributors\n * \\file metric.h\n * \\brief interface of evaluation metric function supported in xgboost.\n * \\author Tianqi Chen, Kailong Chen\n */\n#ifndef XGBOOST_METRIC_H_\n#define XGBOOST_METRIC_H_\n\n#include <dmlc/registry.h>\n#include <xgboost/base.h>\n#include <xgboost/data.h>\n#include <xgboost/host_device_vector.h>\n#include <xgboost/model.h>\n\n#include <functional>\n#include <memory>  // shared_ptr\n#include <string>\n#include <utility>\n#include <vector>\n\nnamespace xgboost {\nstruct Context;\n\n/*!\n * \\brief interface of evaluation metric used to evaluate model performance.\n *  This has nothing to do with training, but merely act as evaluation purpose.\n */\nclass Metric : public Configurable {\n protected:\n  Context const* ctx_{nullptr};\n\n public:\n  /*!\n   * \\brief Configure the Metric with the specified parameters.\n   * \\param args arguments to the objective function.\n   */\n  virtual void Configure(\n      const std::vector<std::pair<std::string, std::string> >&) {}\n  /*!\n   * \\brief Load configuration from JSON object\n   * By default, metric has no internal configuration;\n   * override this function to maintain internal configuration\n   * \\param in JSON object containing the configuration\n   */\n  void LoadConfig(Json const&) override {}\n  /*!\n   * \\brief Save configuration to JSON object\n   * By default, metric has no internal configuration;\n   * override this function to maintain internal configuration\n   * \\param out pointer to output JSON object\n   */\n  void SaveConfig(Json* p_out) const override {\n    auto& out = *p_out;\n    out[\"name\"] = String(this->Name());\n  }\n\n  /**\n   * \\brief Evaluate a metric with DMatrix as input.\n   *\n   * \\param preds Prediction\n   * \\param p_fmat DMatrix that contains related information like labels.\n   */\n  virtual double Evaluate(HostDeviceVector<bst_float> const& preds,\n                          std::shared_ptr<DMatrix> p_fmat) = 0;\n\n  /*! \\return name of metric */\n  virtual const char* Name() const = 0;\n  /*! \\brief virtual destructor */\n  ~Metric() override = default;\n  /*!\n   * \\brief create a metric according to name.\n   * \\param name name of the metric.\n   *        name can be in form metric[@]param and the name will be matched in the\n   *        registry.\n   * \\param ctx A global context\n   * \\return the created metric.\n   */\n  static Metric* Create(const std::string& name, Context const* ctx);\n};\n\n/*!\n * \\brief Registry entry for Metric factory functions.\n *  The additional parameter const char* param gives the value after @, can be null.\n *  For example, metric map@3, then: param == \"3\".\n */\nstruct MetricReg\n    : public dmlc::FunctionRegEntryBase<MetricReg,\n                                        std::function<Metric* (const char*)> > {\n};\n\n/*!\n * \\brief Macro to register metric.\n *\n * \\code\n * // example of registering a objective ndcg@k\n * XGBOOST_REGISTER_METRIC(RMSE, \"ndcg\")\n * .describe(\"Rooted mean square error.\")\n * .set_body([](const char* param) {\n *     int at_k = atoi(param);\n *     return new NDCG(at_k);\n *   });\n * \\endcode\n */\n#define XGBOOST_REGISTER_METRIC(UniqueId, Name)                         \\\n  ::xgboost::MetricReg&  __make_ ## MetricReg ## _ ## UniqueId ## __ =  \\\n      ::dmlc::Registry< ::xgboost::MetricReg>::Get()->__REGISTER__(Name)\n}  // namespace xgboost\n#endif  // XGBOOST_METRIC_H_\n"
  },
  {
    "path": "include/xgboost/model.h",
    "content": "/**\n * Copyright 2019-2025, XGBoost Contributors\n *\n * @file model.h\n * @brief Defines the abstract interface for different components in XGBoost.\n */\n#ifndef XGBOOST_MODEL_H_\n#define XGBOOST_MODEL_H_\n\nnamespace xgboost {\n\nclass Json;\n\nstruct Model {\n  virtual ~Model() = default;\n  /*!\n   * \\brief load the model from a JSON object\n   * \\param in JSON object where to load the model from\n   */\n  virtual void LoadModel(Json const& in) = 0;\n  /*!\n   * \\brief saves the model config to a JSON object\n   * \\param out JSON container where to save the model to\n   */\n  virtual void SaveModel(Json* out) const = 0;\n};\n\nstruct Configurable {\n  virtual ~Configurable() = default;\n  /*!\n   * \\brief Load configuration from JSON object\n   * \\param in JSON object containing the configuration\n   */\n  virtual void LoadConfig(Json const& in) = 0;\n  /*!\n   * \\brief Save configuration to JSON object\n   * \\param out pointer to output JSON object\n   */\n  virtual void SaveConfig(Json* out) const = 0;\n};\n}  // namespace xgboost\n\n#endif  // XGBOOST_MODEL_H_\n"
  },
  {
    "path": "include/xgboost/multi_target_tree_model.h",
    "content": "/**\n * Copyright 2023-2026, XGBoost contributors\n *\n * @brief Core data structure for multi-target trees.\n */\n#ifndef XGBOOST_MULTI_TARGET_TREE_MODEL_H_\n#define XGBOOST_MULTI_TARGET_TREE_MODEL_H_\n\n#include <xgboost/base.h>                // for bst_node_t, bst_target_t, bst_feature_t\n#include <xgboost/context.h>             // for Context\n#include <xgboost/host_device_vector.h>  // for HostDeviceVector\n#include <xgboost/linalg.h>              // for VectorView, MatrixView\n#include <xgboost/model.h>               // for Model\n#include <xgboost/span.h>                // for Span\n\n#include <cstddef>  // for size_t\n#include <cstdint>  // for uint8_t\n#include <vector>   // for vector\n\nnamespace xgboost {\nnamespace tree {\nstruct MultiTargetTreeView;\n}\nstruct TreeParam;\n\n/**\n * @brief Tree structure for multi-target model.\n *\n * In order to support reduced gradient, the internal storage distinguishes weights\n * between base weights and leaf weights. The former is the weight calculated from split\n * gradient, and the later is the weight calculated from value gradient and used as\n * outputs. Every node has a base weight, but only leaves have leaf weights.\n *\n * To access the leaf weights, we re-use the right child to store leaf indices. For split\n * nodes, the `right_` member stores their right child node indices, for leaf nodes, the\n * `right_` member stores the corresponding leaf weight indices.\n */\nclass MultiTargetTree : public Model {\n public:\n  static bst_node_t constexpr InvalidNodeId() { return -1; }\n  friend struct tree::MultiTargetTreeView;\n\n private:\n  TreeParam const* param_;\n  // Mapping from node index to its left child. -1 for a leaf node.\n  HostDeviceVector<bst_node_t> left_;\n  // Mapping from node index to its right child. Maps to leaf weight for a leaf node.\n  HostDeviceVector<bst_node_t> right_;\n  // Mapping from node index to its parent.\n  HostDeviceVector<bst_node_t> parent_;\n  // Feature index for node split.\n  HostDeviceVector<bst_feature_t> split_index_;\n  // Whether the left child is the default node when split feature is missing.\n  HostDeviceVector<std::uint8_t> default_left_;\n  // Threshold for splitting a node.\n  HostDeviceVector<float> split_conds_;\n  // Internal base weights.\n  HostDeviceVector<float> weights_;\n  // Output weights.\n  HostDeviceVector<float> leaf_weights_;\n  // Loss change for each node.\n  HostDeviceVector<float> loss_chg_;\n  // Sum of hessians for each node (coverage).\n  HostDeviceVector<float> sum_hess_;\n\n  [[nodiscard]] linalg::VectorView<float const> NodeWeight(bst_node_t nidx) const {\n    auto beg = nidx * this->NumSplitTargets();\n    auto v = this->weights_.ConstHostSpan().subspan(beg, this->NumSplitTargets());\n    return linalg::MakeTensorView(DeviceOrd::CPU(), v, v.size());\n  }\n  // Unlike the const version, `NumSplitTargets` is not reliable if the tree can change.\n  [[nodiscard]] linalg::VectorView<float> NodeWeight(bst_node_t nidx,\n                                                     bst_target_t n_split_targets) {\n    auto beg = nidx * n_split_targets;\n    auto v = this->weights_.HostSpan().subspan(beg, n_split_targets);\n    return linalg::MakeTensorView(DeviceOrd::CPU(), v, v.size());\n  }\n  [[nodiscard]] bst_node_t LeafIdx(bst_node_t nidx) const { return this->RightChild(nidx); }\n\n public:\n  explicit MultiTargetTree(TreeParam const* param);\n  MultiTargetTree(MultiTargetTree const& that);\n  MultiTargetTree& operator=(MultiTargetTree const& that) = delete;\n  MultiTargetTree(MultiTargetTree&& that) = delete;\n  MultiTargetTree& operator=(MultiTargetTree&& that) = delete;\n\n  /**\n   * @brief Set the weight and statistics for the root.\n   *\n   * @param weight   The weight vector for the root node.\n   * @param sum_hess The sum of hessians for the root node (coverage).\n   */\n  void SetRoot(linalg::VectorView<float const> weight, float sum_hess);\n  /**\n   * @brief Expand a leaf into split node.\n   */\n  void Expand(bst_node_t nidx, bst_feature_t split_idx, float split_cond, bool default_left,\n              linalg::VectorView<float const> base_weight,\n              linalg::VectorView<float const> left_weight,\n              linalg::VectorView<float const> right_weight, float loss_chg, float sum_hess,\n              float left_sum, float right_sum);\n  /** @see RegTree::SetLeaves */\n  void SetLeaves(std::vector<bst_node_t> leaves, common::Span<float const> weights);\n  /** @brief Copy base weight into leaf weight for a non-reduced multi-target tree. */\n  void SetLeaves();\n\n  [[nodiscard]] bool IsLeaf(bst_node_t nidx) const {\n    return left_.ConstHostVector()[nidx] == InvalidNodeId();\n  }\n  [[nodiscard]] bst_node_t LeftChild(bst_node_t nidx) const {\n    return left_.ConstHostVector().at(nidx);\n  }\n  [[nodiscard]] bst_node_t RightChild(bst_node_t nidx) const {\n    return right_.ConstHostVector().at(nidx);\n  }\n  /**\n   * @brief Number of targets (size of a leaf).\n   */\n  [[nodiscard]] bst_target_t NumTargets() const;\n  /**\n   * @brief Number of reduced targets.\n   */\n  [[nodiscard]] bst_target_t NumSplitTargets() const;\n  [[nodiscard]] auto NumLeaves() const { return this->leaf_weights_.Size() / this->NumTargets(); }\n\n  [[nodiscard]] std::size_t Size() const;\n  [[nodiscard]] MultiTargetTree* Copy(TreeParam const* param) const;\n\n  common::Span<float const> LeafWeights(DeviceOrd device) const {\n    if (device.IsCPU()) {\n      return this->leaf_weights_.ConstHostSpan();\n    }\n    this->leaf_weights_.SetDevice(device);\n    return this->leaf_weights_.ConstDeviceSpan();\n  }\n\n  [[nodiscard]] linalg::VectorView<float const> LeafValue(bst_node_t nidx) const {\n    CHECK(IsLeaf(nidx));\n    auto n_targets = this->NumTargets();\n    auto h_leaf_mapping = this->right_.ConstHostSpan();\n    auto h_leaf_weights = this->leaf_weights_.ConstHostSpan();\n    auto lidx = h_leaf_mapping[nidx];\n    CHECK_NE(lidx, InvalidNodeId());\n    auto weight = h_leaf_weights.subspan(lidx * n_targets, n_targets);\n    return linalg::MakeVec(DeviceOrd::CPU(), weight);\n  }\n\n  void LoadModel(Json const& in) override;\n  void SaveModel(Json* out) const override;\n\n  [[nodiscard]] std::size_t MemCostBytes() const;\n};\n}  // namespace xgboost\n#endif  // XGBOOST_MULTI_TARGET_TREE_MODEL_H_\n"
  },
  {
    "path": "include/xgboost/objective.h",
    "content": "/**\n * Copyright 2014-2025, XGBoost Contributors\n *\n * @brief interface of objective function used by xgboost.\n * @author Tianqi Chen, Kailong Chen\n */\n#ifndef XGBOOST_OBJECTIVE_H_\n#define XGBOOST_OBJECTIVE_H_\n\n#include <dmlc/registry.h>\n#include <xgboost/base.h>\n#include <xgboost/data.h>\n#include <xgboost/host_device_vector.h>\n#include <xgboost/linalg.h>  // for Vector\n#include <xgboost/model.h>\n#include <xgboost/task.h>\n\n#include <cstdint>  // for int32_t\n#include <functional>\n#include <string>  // for string\n\nnamespace xgboost {\n\nclass RegTree;\nstruct Context;\n\n/** @brief The interface of objective function */\nclass ObjFunction : public Configurable {\n protected:\n  Context const* ctx_{nullptr};\n\n public:\n  static constexpr float DefaultBaseScore() { return 0.5f; }\n\n public:\n  ~ObjFunction() override = default;\n  /**\n   * @brief Configure the objective with the specified parameters.\n   *\n   * @param args arguments to the objective function.\n   */\n  virtual void Configure(Args const& args) = 0;\n  /**\n   * @brief Get gradient over each of predictions, given existing information.\n   *\n   * @param preds Raw prediction (before applying the inverse link) of the current round.\n   * @param info information about labels, weights, groups in rank.\n   * @param iteration current iteration number.\n   * @param out_gpair output of get gradient, saves gradient and second order gradient in\n   */\n  virtual void GetGradient(HostDeviceVector<float> const& preds, MetaInfo const& info,\n                           std::int32_t iter, linalg::Matrix<GradientPair>* out_gpair) = 0;\n\n  /** @return the default evaluation metric for the objective */\n  [[nodiscard]] virtual const char* DefaultEvalMetric() const = 0;\n  /**\n   * @brief Return the configuration for the default metric.\n   */\n  [[nodiscard]] virtual Json DefaultMetricConfig() const { return Json{Null{}}; }\n  /**\n   * @brief Apply inverse link (activation) function to prediction values.\n   *\n   *   This is only called when Prediction is called\n   *\n   * @param [in,out] io_preds prediction values, saves to this vector as well.\n   */\n  virtual void PredTransform(HostDeviceVector<float>*) const {}\n  /**\n   * @brief Apply inverse link (activation) function to prediction values\n   *\n   *  This is only called when Eval is called, usually it redirect to PredTransform\n   *\n   * @param [in,out] io_preds prediction values, saves to this vector as well.\n   */\n  virtual void EvalTransform(HostDeviceVector<float>* io_preds) { this->PredTransform(io_preds); }\n  /**\n   * @brief Apply the link function to the intercept.\n   *\n   *   This is an inverse of `PredTransform` for most of the objectives (if there's a\n   *   valid inverse). It's used to transform user-set base_score back to margin used by\n   *   gradient boosting. The method converts objective-based valid outputs like\n   *   probability back to raw model outputs.\n   *\n   * @param [in,out] base_score The intercept to transform.\n   */\n  virtual void ProbToMargin(linalg::Vector<float>* /*base_score*/) const {}\n  /**\n   * @brief Obtain the initial estimation of prediction (intercept).\n   *\n   *   The output in `base_score` represents prediction after apply the inverse link function\n   *   (valid prediction instead of raw).\n   *\n   * @param info MetaInfo that contains label.\n   * @param base_score Output estimation.\n   */\n  virtual void InitEstimation(MetaInfo const& info, linalg::Vector<float>* base_score) const;\n  /**\n   * @brief Return task of this objective.\n   */\n  [[nodiscard]] virtual struct ObjInfo Task() const = 0;\n  /**\n   * @brief Return number of targets for input matrix.  Right now XGBoost supports only\n   *        multi-target regression.\n   */\n  [[nodiscard]] virtual bst_target_t Targets(MetaInfo const& info) const {\n    if (info.labels.Shape(1) > 1) {\n      LOG(FATAL) << \"multioutput is not supported by the current objective function\";\n    }\n    return 1;\n  }\n  /** @brief Getter of the context. */\n  [[nodiscard]] Context const* Ctx() const { return this->ctx_; }\n\n  /**\n   * @brief Update the leaf values after a tree is built. Needed for objectives with 0\n   *        hessian.\n   *\n   *   Note that the leaf update is not well defined for distributed training as XGBoost\n   *   computes only an average of quantile between workers. This breaks when some leaf\n   *   have no sample assigned in a local worker.\n   *\n   * @param position The leaf index for each rows.\n   * @param info MetaInfo providing labels and weights.\n   * @param learning_rate The learning rate for current iteration.\n   * @param prediction Model prediction after transformation.\n   * @param group_idx The group index for this tree, 0 when it's not multi-target or multi-class.\n   * @param p_tree Tree that needs to be updated.\n   */\n  virtual void UpdateTreeLeaf(HostDeviceVector<bst_node_t> const& /*position*/,\n                              MetaInfo const& /*info*/, float /*learning_rate*/,\n                              HostDeviceVector<float> const& /*prediction*/,\n                              bst_target_t /*group_idx*/, RegTree* /*p_tree*/) const {}\n  /**\n   * @brief Create an objective function according to the name.\n   *\n   * @param name Name of the objective.\n   * @param ctx  Pointer to the context.\n   */\n  static ObjFunction* Create(const std::string& name, Context const* ctx);\n};\n\n/*!\n * \\brief Registry entry for objective factory functions.\n */\nstruct ObjFunctionReg\n    : public dmlc::FunctionRegEntryBase<ObjFunctionReg,\n                                        std::function<ObjFunction* ()> > {\n};\n\n/*!\n * \\brief Macro to register objective function.\n *\n * \\code\n * // example of registering a objective\n * XGBOOST_REGISTER_OBJECTIVE(LinearRegression, \"reg:squarederror\")\n * .describe(\"Linear regression objective\")\n * .set_body([]() {\n *     return new RegLossObj(LossType::kLinearSquare);\n *   });\n * \\endcode\n */\n#define XGBOOST_REGISTER_OBJECTIVE(UniqueId, Name)                      \\\n  static DMLC_ATTRIBUTE_UNUSED ::xgboost::ObjFunctionReg &              \\\n  __make_ ## ObjFunctionReg ## _ ## UniqueId ## __ =                    \\\n      ::dmlc::Registry< ::xgboost::ObjFunctionReg>::Get()->__REGISTER__(Name)\n}  // namespace xgboost\n#endif  // XGBOOST_OBJECTIVE_H_\n"
  },
  {
    "path": "include/xgboost/parameter.h",
    "content": "/*!\n * Copyright 2018 by Contributors\n * \\file parameter.h\n * \\brief macro for using C++11 enum class as DMLC parameter\n * \\author Hyunsu Philip Cho\n */\n\n#ifndef XGBOOST_PARAMETER_H_\n#define XGBOOST_PARAMETER_H_\n\n#include <dmlc/parameter.h>\n#include <xgboost/base.h>\n#include <string>\n#include <type_traits>\n\n/*!\n * \\brief Specialization of FieldEntry for enum class (backed by int)\n *\n * Use this macro to use C++11 enum class as DMLC parameters\n *\n * Usage:\n *\n * \\code{.cpp}\n *\n *   // enum class must inherit from int type\n *   enum class Foo : int {\n *     kBar = 0, kFrog = 1, kCat = 2, kDog = 3\n *   };\n *\n *   // This line is needed to prevent compilation error\n *   DECLARE_FIELD_ENUM_CLASS(Foo);\n *\n *   // Now define DMLC parameter as usual;\n *   //   enum classes can now be members.\n *   struct MyParam : dmlc::Parameter<MyParam> {\n *     Foo foo;\n *     DMLC_DECLARE_PARAMETER(MyParam) {\n *       DMLC_DECLARE_FIELD(foo)\n *         .set_default(Foo::kBar)\n *         .add_enum(\"bar\", Foo::kBar)\n *         .add_enum(\"frog\", Foo::kFrog)\n *         .add_enum(\"cat\", Foo::kCat)\n *         .add_enum(\"dog\", Foo::kDog);\n *     }\n *   };\n *\n *   DMLC_REGISTER_PARAMETER(MyParam);\n * \\endcode\n */\n#define DECLARE_FIELD_ENUM_CLASS(EnumClass) \\\nnamespace dmlc {  \\\nnamespace parameter {  \\\ntemplate <>  \\\nclass FieldEntry<EnumClass> : public FieldEntry<int> {  \\\n public:  \\\n  FieldEntry() {  \\\n    static_assert(  \\\n      std::is_same_v<int, typename std::underlying_type_t<EnumClass>>,  \\\n      \"enum class must be backed by int\");  \\\n    is_enum_ = true;  \\\n  }  \\\n  using Super = FieldEntry<int>;  \\\n  void Set(void *head, const std::string &value) const override {  \\\n    Super::Set(head, value);  \\\n  }  \\\n  inline FieldEntry<EnumClass>& add_enum(const std::string &key, EnumClass value) {  \\\n    Super::add_enum(key, static_cast<int>(value));  \\\n    return *this;  \\\n  }  \\\n  inline FieldEntry<EnumClass>& set_default(const EnumClass& default_value) {  \\\n    default_value_ = static_cast<int>(default_value);  \\\n    has_default_ = true;  \\\n    return *this;  \\\n  }  \\\n  inline void Init(const std::string &key, void *head, EnumClass& ref) {  /* NOLINT */  \\\n    Super::Init(key, head, *reinterpret_cast<int*>(&ref));  \\\n  }  \\\n};  \\\n}  /* namespace parameter */  \\\n}  /* namespace dmlc */\n\nnamespace xgboost {\ntemplate <typename Type>\nstruct XGBoostParameter : public dmlc::Parameter<Type> {\n protected:\n  bool initialised_ {false};\n\n public:\n  template <typename Container>\n  Args UpdateAllowUnknown(Container const& kwargs) {\n    if (initialised_) {\n      return dmlc::Parameter<Type>::UpdateAllowUnknown(kwargs);\n    } else {\n      auto unknown = dmlc::Parameter<Type>::InitAllowUnknown(kwargs);\n      initialised_ = true;\n      return unknown;\n    }\n  }\n  bool GetInitialised() const { return static_cast<bool>(this->initialised_); }\n};\n}  // namespace xgboost\n\n#endif  // XGBOOST_PARAMETER_H_\n"
  },
  {
    "path": "include/xgboost/predictor.h",
    "content": "/**\n * Copyright 2017-2025, XGBoost Contributors\n * \\file predictor.h\n * \\brief Interface of predictor,\n *  performs predictions for a gradient booster.\n */\n#pragma once\n#include <dmlc/registry.h>    // for FunctionRegEntryBase\n#include <xgboost/base.h>     // for bst_tree_t\n#include <xgboost/cache.h>    // for DMatrixCache\n#include <xgboost/context.h>  // for Context\n#include <xgboost/context.h>\n#include <xgboost/data.h>\n#include <xgboost/host_device_vector.h>\n\n#include <functional>  // for function\n#include <memory>      // for shared_ptr\n#include <string>\n#include <vector>\n\n// Forward declarations\nnamespace xgboost::gbm {\nstruct GBTreeModel;\n}  // namespace xgboost::gbm\n\nnamespace xgboost {\n/**\n * \\brief Contains pointer to input matrix and associated cached predictions.\n */\nstruct PredictionCacheEntry {\n  // A storage for caching prediction values\n  HostDeviceVector<float> predictions;\n  // The version of current cache, corresponding number of layers of trees\n  std::uint32_t version{0};\n\n  PredictionCacheEntry() = default;\n  /**\n   * \\brief Update the cache entry by number of versions.\n   *\n   * \\param v Added versions.\n   */\n  void Update(std::uint32_t v) { version += v; }\n  void Reset() { version = 0; }\n};\n\n/**\n * \\brief A container for managed prediction caches.\n */\nclass PredictionContainer : public DMatrixCache<PredictionCacheEntry> {\n  // We cache up to 64 DMatrix for all threads\n  std::size_t static constexpr DefaultSize() { return 64; }\n\n public:\n  PredictionContainer() : DMatrixCache<PredictionCacheEntry>{DefaultSize()} {}\n  std::shared_ptr<PredictionCacheEntry> Cache(std::shared_ptr<DMatrix> m, DeviceOrd device) {\n    auto p_cache = this->CacheItem(m);\n    if (!device.IsCPU()) {\n      p_cache->predictions.SetDevice(device);\n    }\n    return p_cache;\n  }\n};\n\n/**\n * \\class Predictor\n *\n * \\brief Performs prediction on individual training instances or batches of instances for\n *        GBTree. Prediction functions all take a GBTreeModel and a DMatrix as input and\n *        output a vector of predictions. The predictor does not modify any state of the\n *        model itself.\n */\nclass Predictor {\n protected:\n  Context const* ctx_;\n\n public:\n  explicit Predictor(Context const* ctx) : ctx_{ctx} {}\n\n  virtual ~Predictor() = default;\n\n  /**\n   * \\brief Configure and register input matrices in prediction cache.\n   *\n   * \\param cfg   The configuration.\n   */\n  virtual void Configure(Args const&);\n\n  /**\n   * \\brief Initialize output prediction\n   *\n   * \\param info Meta info for the DMatrix object used for prediction.\n   * \\param out_predt Prediction vector to be initialized.\n   * \\param model Tree model used for prediction.\n   */\n  virtual void InitOutPredictions(const MetaInfo& info, HostDeviceVector<float>* out_predt,\n                                  const gbm::GBTreeModel& model) const;\n\n  /**\n   * \\brief Generate batch predictions for a given feature matrix. May use\n   * cached predictions if available instead of calculating from scratch.\n   *\n   * \\param [in,out]  dmat        Feature matrix.\n   * \\param [in,out]  out_preds   The output preds.\n   * \\param           model       The model to predict from.\n   * \\param           tree_begin  The tree begin index.\n   * \\param           tree_end    The tree end index.\n   */\n  virtual void PredictBatch(DMatrix* dmat, PredictionCacheEntry* out_preds,\n                            gbm::GBTreeModel const& model, bst_tree_t tree_begin,\n                            bst_tree_t tree_end = 0,\n                            std::vector<float> const* tree_weights = nullptr) const = 0;\n\n  /**\n   * \\brief Inplace prediction.\n   *\n   * \\param           p_fmat                 A proxy DMatrix that contains the data and related\n   *                                         meta info.\n   * \\param           model                  The model to predict from.\n   * \\param           missing                Missing value in the data.\n   * \\param [in,out]  out_preds              The output preds.\n   * \\param           tree_begin (Optional) Beginning of boosted trees used for prediction.\n   * \\param           tree_end   (Optional) End of booster trees. 0 means do not limit trees.\n   *\n   * \\return True if the data can be handled by current predictor, false otherwise.\n   */\n  virtual bool InplacePredict(std::shared_ptr<DMatrix> p_fmat, const gbm::GBTreeModel& model,\n                              float missing, PredictionCacheEntry* out_preds,\n                              bst_tree_t tree_begin = 0, bst_tree_t tree_end = 0,\n                              std::vector<float> const* tree_weights = nullptr) const = 0;\n\n  /**\n   * \\brief predict the leaf index of each tree, the output will be nsample *\n   * ntree vector this is only valid in gbtree predictor.\n   *\n   * \\param [in,out]  dmat        The input feature matrix.\n   * \\param [in,out]  out_preds   The output preds.\n   * \\param           model       Model to make predictions from.\n   * \\param           tree_end    (Optional) The tree end index.\n   */\n\n  virtual void PredictLeaf(DMatrix* dmat, HostDeviceVector<float>* out_preds,\n                           gbm::GBTreeModel const& model, bst_tree_t tree_end = 0) const = 0;\n\n  /**\n   * \\brief feature contributions to individual predictions; the output will be\n   * a vector of length (nfeats + 1) * num_output_group * nsample, arranged in\n   * that order.\n   *\n   * \\param [in,out]  dmat               The input feature matrix.\n   * \\param [in,out]  out_contribs       The output feature contribs.\n   * \\param           model              Model to make predictions from.\n   * \\param           tree_end           The tree end index.\n   * \\param           tree_weights       (Optional) Weights to multiply each tree by.\n   * \\param           approximate        Use fast approximate algorithm.\n   * \\param           condition          Condition on the condition_feature (0=no, -1=cond off, 1=cond on).\n   * \\param           condition_feature  Feature to condition on (i.e. fix) during calculations.\n   */\n\n  virtual void PredictContribution(DMatrix* dmat, HostDeviceVector<float>* out_contribs,\n                                   gbm::GBTreeModel const& model, bst_tree_t tree_end = 0,\n                                   std::vector<float> const* tree_weights = nullptr,\n                                   bool approximate = false, int condition = 0,\n                                   unsigned condition_feature = 0) const = 0;\n\n  virtual void PredictInteractionContributions(DMatrix* dmat, HostDeviceVector<float>* out_contribs,\n                                               gbm::GBTreeModel const& model,\n                                               bst_tree_t tree_end = 0,\n                                               std::vector<float> const* tree_weights = nullptr,\n                                               bool approximate = false) const = 0;\n\n  /**\n   * \\brief Creates a new Predictor*.\n   *\n   * \\param name  Name of the predictor.\n   * \\param ctx   Pointer to runtime parameters.\n   */\n  static Predictor* Create(std::string const& name, Context const* ctx);\n};\n\n/*!\n * \\brief Registry entry for predictor.\n */\nstruct PredictorReg\n    : public dmlc::FunctionRegEntryBase<PredictorReg, std::function<Predictor*(Context const*)>> {};\n\n#define XGBOOST_REGISTER_PREDICTOR(UniqueId, Name)                                               \\\n  static DMLC_ATTRIBUTE_UNUSED ::xgboost::PredictorReg& __make_##PredictorReg##_##UniqueId##__ = \\\n      ::dmlc::Registry<::xgboost::PredictorReg>::Get()->__REGISTER__(Name)\n}  // namespace xgboost\n"
  },
  {
    "path": "include/xgboost/span.h",
    "content": "/**\n * Copyright 2018-2025, XGBoost contributors\n * \\brief span class based on ISO++20 span\n *\n * About NOLINTs in this file:\n *\n *   If we want Span to work with std interface, like range for loop, the\n *   naming must be consistent with std, not XGBoost. Also, the interface also\n *   conflicts with XGBoost coding style, specifically, the use of `explicit'\n *   keyword.\n *\n *\n * Some of the code is copied from Guidelines Support Library, here is the\n * license:\n *\n * Copyright (c) 2015 Microsoft Corporation. All rights reserved.\n *\n * This code is licensed under the MIT License (MIT).\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#ifndef XGBOOST_SPAN_H_\n#define XGBOOST_SPAN_H_\n\n#include <xgboost/base.h>\n\n#include <cstddef>  // size_t\n#include <cstdio>\n#include <iterator>\n#include <limits>  // numeric_limits\n#include <type_traits>\n#include <utility>  // for move\n#include <vector>   // for vector\n\n#if defined(__CUDACC__)\n#include <cuda_runtime.h>\n#endif  // defined(__CUDACC__)\n\n/*!\n * The version number 1910 is picked up from GSL.\n *\n * We might want to use MOODYCAMEL_NOEXCEPT from dmlc/concurrentqueue.h. But\n * there are a lot more definitions in that file would cause warnings/troubles\n * in MSVC 2013. Currently we try to keep the closure of Span as minimal as\n * possible.\n *\n * There are other workarounds for MSVC, like _Unwrapped, _Verify_range ...\n * Some of these are hidden magics of MSVC and I tried to avoid them. Should any\n * of them become needed, please consult the source code of GSL, and possibly\n * some explanations from this thread:\n *\n *   https://github.com/Microsoft/GSL/pull/664\n *\n * TODO(trivialfis): Group these MSVC workarounds into a manageable place.\n */\n#if defined(_MSC_VER) && _MSC_VER < 1910\n\n#define __span_noexcept\n\n#pragma push_macro(\"constexpr\")\n#define constexpr /*constexpr*/\n\n#else\n\n#define __span_noexcept noexcept\n\n#endif  // defined(_MSC_VER) && _MSC_VER < 1910\n\nnamespace xgboost::common {\n\n#if defined(__CUDA_ARCH__)\n// Usual logging facility is not available inside device code.\n\n#if defined(_MSC_VER)\n\n// Windows CUDA doesn't have __assert_fail.\n#define CUDA_KERNEL_CHECK(cond)           \\\n  do {                                    \\\n    if (XGBOOST_EXPECT(!(cond), false)) { \\\n      asm(\"trap;\");                       \\\n    }                                     \\\n  } while (0)\n\n#else  // defined(_MSC_VER)\n\n#define __ASSERT_STR_HELPER(x) #x\n\n#define CUDA_KERNEL_CHECK(cond) \\\n  (XGBOOST_EXPECT((cond), true) \\\n       ? static_cast<void>(0)   \\\n       : __assert_fail(__ASSERT_STR_HELPER((cond)), __FILE__, __LINE__, __PRETTY_FUNCTION__))\n\n#endif  // defined(_MSC_VER)\n\n#define KERNEL_CHECK CUDA_KERNEL_CHECK\n\n#define SPAN_CHECK KERNEL_CHECK\n\n#else  // ------------------------------ not CUDA ----------------------------\n\n#if defined(XGBOOST_STRICT_R_MODE) && XGBOOST_STRICT_R_MODE == 1\n\n#define KERNEL_CHECK(cond)\n\n#define SPAN_CHECK(cond) KERNEL_CHECK(cond)\n\n#else\n\n#if defined(__SYCL_DEVICE_ONLY__)\n\n// SYCL doesn't support termination\n#define SYCL_KERNEL_CHECK(cond)\n\n#define KERNEL_CHECK(cond) SYCL_KERNEL_CHECK(cond)\n\n#else  // defined(__SYCL_DEVICE_ONLY__)\n#define KERNEL_CHECK(cond) (XGBOOST_EXPECT((cond), true) ? static_cast<void>(0) : std::terminate())\n#endif  // defined(__SYCL_DEVICE_ONLY__)\n\n#define SPAN_CHECK(cond) KERNEL_CHECK(cond)\n\n#endif  // defined(XGBOOST_STRICT_R_MODE)\n\n#endif  // __CUDA_ARCH__\n\n#define SPAN_LT(lhs, rhs) SPAN_CHECK((lhs) < (rhs))\n\nnamespace detail {\n/*!\n * By default, XGBoost uses uint32_t for indexing data. int64_t covers all\n *   values uint32_t can represent. Also, On x86-64 Linux, GCC uses long int to\n *   represent ptrdiff_t, which is just int64_t. So we make it deterministic\n *   here.\n */\nusing ptrdiff_t = typename std::conditional_t<  // NOLINT\n    std::is_same_v<std::ptrdiff_t, std::int64_t>, std::ptrdiff_t, std::int64_t>;\n}  // namespace detail\n\n#if defined(_MSC_VER) && _MSC_VER < 1910\nconstexpr const std::size_t\ndynamic_extent = std::numeric_limits<std::size_t>::max();  // NOLINT\n#else\nconstexpr std::size_t dynamic_extent = std::numeric_limits<std::size_t>::max();  // NOLINT\n#endif  // defined(_MSC_VER) && _MSC_VER < 1910\n\nenum class byte : unsigned char {};  // NOLINT\n\ntemplate <class ElementType, std::size_t Extent>\nclass Span;\n\nnamespace detail {\n\ntemplate <typename SpanType, bool IsConst>\nclass SpanIterator {\n  using ElementType = typename SpanType::element_type;\n\n public:\n  using iterator_category = std::random_access_iterator_tag;      // NOLINT\n  using value_type = typename SpanType::value_type;  // NOLINT\n  using difference_type = detail::ptrdiff_t;             // NOLINT\n\n  using reference = typename std::conditional<                    // NOLINT\n    IsConst, const ElementType, ElementType>::type&;\n  using pointer = typename std::add_pointer<reference>::type;     // NOLINT\n\n  constexpr SpanIterator() = default;\n\n  XGBOOST_DEVICE constexpr SpanIterator(\n      const SpanType* _span,\n      typename SpanType::index_type _idx) __span_noexcept :\n                                           span_(_span), index_(_idx) {}\n\n  friend SpanIterator<SpanType, true>;\n  template <bool B, typename std::enable_if_t<!B && IsConst>* = nullptr>\n  XGBOOST_DEVICE constexpr SpanIterator(  // NOLINT\n      const SpanIterator<SpanType, B>& other_) __span_noexcept\n      : SpanIterator(other_.span_, other_.index_) {}\n\n  XGBOOST_DEVICE reference operator*() const {\n    SPAN_CHECK(index_ < span_->size());\n    return *(span_->data() + index_);\n  }\n  XGBOOST_DEVICE reference operator[](difference_type n) const {\n    return *(*this + n);\n  }\n\n  XGBOOST_DEVICE pointer operator->() const {\n    SPAN_CHECK(index_ != span_->size());\n    return span_->data() + index_;\n  }\n\n  XGBOOST_DEVICE SpanIterator& operator++() {\n    SPAN_CHECK(index_ != span_->size());\n    index_++;\n    return *this;\n  }\n\n  XGBOOST_DEVICE SpanIterator operator++(int) {\n    auto ret = *this;\n    ++(*this);\n    return ret;\n  }\n\n  XGBOOST_DEVICE SpanIterator& operator--() {\n    SPAN_CHECK(index_ != 0 && index_ <= span_->size());\n    index_--;\n    return *this;\n  }\n\n  XGBOOST_DEVICE SpanIterator operator--(int) {\n    auto ret = *this;\n    --(*this);\n    return ret;\n  }\n\n  XGBOOST_DEVICE SpanIterator operator+(difference_type n) const {\n    auto ret = *this;\n    return ret += n;\n  }\n\n  XGBOOST_DEVICE SpanIterator& operator+=(difference_type n) {\n    SPAN_CHECK((index_ + n) <= span_->size());\n    index_ += n;\n    return *this;\n  }\n\n  XGBOOST_DEVICE difference_type operator-(SpanIterator rhs) const {\n    SPAN_CHECK(span_ == rhs.span_);\n    return index_ - rhs.index_;\n  }\n\n  XGBOOST_DEVICE SpanIterator operator-(difference_type n) const {\n    auto ret = *this;\n    return ret -= n;\n  }\n\n  XGBOOST_DEVICE SpanIterator& operator-=(difference_type n) {\n    return *this += -n;\n  }\n\n  // friends\n  XGBOOST_DEVICE constexpr friend bool operator==(\n      SpanIterator _lhs, SpanIterator _rhs) __span_noexcept {\n    return _lhs.span_ == _rhs.span_ && _lhs.index_ == _rhs.index_;\n  }\n\n  XGBOOST_DEVICE constexpr friend bool operator!=(\n      SpanIterator _lhs, SpanIterator _rhs) __span_noexcept {\n    return !(_lhs == _rhs);\n  }\n\n  XGBOOST_DEVICE constexpr friend bool operator<(\n      SpanIterator _lhs, SpanIterator _rhs) __span_noexcept {\n    return _lhs.index_ < _rhs.index_;\n  }\n\n  XGBOOST_DEVICE constexpr friend bool operator<=(\n      SpanIterator _lhs, SpanIterator _rhs) __span_noexcept {\n    return !(_rhs < _lhs);\n  }\n\n  XGBOOST_DEVICE constexpr friend bool operator>(\n      SpanIterator _lhs, SpanIterator _rhs) __span_noexcept {\n    return _rhs < _lhs;\n  }\n\n  XGBOOST_DEVICE constexpr friend bool operator>=(\n      SpanIterator _lhs, SpanIterator _rhs) __span_noexcept {\n    return !(_rhs > _lhs);\n  }\n\n protected:\n  const SpanType *span_ { nullptr };\n  typename SpanType::index_type index_ { 0 };\n};\n\n\n// It's tempting to use constexpr instead of structs to do the following meta\n// programming. But remember that we are supporting MSVC 2013 here.\n\n/*!\n * The extent E of the span returned by subspan is determined as follows:\n *\n *   - If Count is not dynamic_extent, Count;\n *   - Otherwise, if Extent is not dynamic_extent, Extent - Offset;\n *   - Otherwise, dynamic_extent.\n */\ntemplate <std::size_t Extent, std::size_t Offset, std::size_t Count>\nstruct ExtentValue : public std::integral_constant<\n  std::size_t, Count != dynamic_extent ?\n  Count : (Extent != dynamic_extent ? Extent - Offset : Extent)> {};\n\n/*!\n * If N is dynamic_extent, the extent of the returned span E is also\n * dynamic_extent; otherwise it is std::size_t(sizeof(T)) * N.\n */\ntemplate <typename T, std::size_t Extent>\nstruct ExtentAsBytesValue : public std::integral_constant<\n  std::size_t,\n  Extent == dynamic_extent ?\n  Extent : sizeof(T) * Extent> {};\n\ntemplate <std::size_t From, std::size_t To>\nstruct IsAllowedExtentConversion : public std::integral_constant<\n  bool, From == To || From == dynamic_extent || To == dynamic_extent> {};\n\ntemplate <class From, class To>\nstruct IsAllowedElementTypeConversion\n    : public std::integral_constant<bool, std::is_convertible_v<From (*)[], To (*)[]>> {}; // NOLINT\n\ntemplate <class T>\nstruct IsSpanOracle : std::false_type {};\n\ntemplate <class T, std::size_t Extent>\nstruct IsSpanOracle<Span<T, Extent>> : std::true_type {};\n\ntemplate <class T>\nstruct IsSpan : public IsSpanOracle<typename std::remove_cv_t<T>> {};\n\n// Re-implement std algorithms here to adopt CUDA.\ntemplate <typename T>\nstruct Less {\n  XGBOOST_DEVICE constexpr bool operator()(const T& _x, const T& _y) const {\n    return _x < _y;\n  }\n};\n\ntemplate <typename T>\nstruct Greater {\n  XGBOOST_DEVICE constexpr bool operator()(const T& _x, const T& _y) const {\n    return _x > _y;\n  }\n};\n\ntemplate <class InputIt1, class InputIt2,\n          class Compare =\n          detail::Less<decltype(std::declval<InputIt1>().operator*())>>\nXGBOOST_DEVICE bool LexicographicalCompare(InputIt1 first1, InputIt1 last1,\n                                            InputIt2 first2, InputIt2 last2) {\n  Compare comp;\n  for (; first1 != last1 && first2 != last2; ++first1, ++first2) {\n    if (comp(*first1, *first2)) {\n      return true;\n    }\n    if (comp(*first2, *first1)) {\n      return false;\n    }\n  }\n  return first1 == last1 && first2 != last2;\n}\n\n}  // namespace detail\n\ntemplate <typename T>\nXGBOOST_DEVICE std::enable_if_t<!std::is_reference_v<T>, std::size_t> SizeBytes(std::size_t n) {\n  return n * sizeof(T);\n}\n\n/*!\n * \\brief span class implementation, based on ISO++20 span<T>. The interface\n *      should be the same.\n *\n * What's different from span<T> in Guidelines Support Library (GSL)\n *\n *    Interface might be slightly different, we stick with ISO.\n *\n *    GSL uses C++14/17 features, which are not available here.\n *    GSL uses constexpr extensively, which is not possible with limitation\n *      of C++11.\n *    GSL doesn't concern about CUDA.\n *\n *    GSL is more thoroughly implemented and tested.\n *    GSL is more optimized, especially for static extent.\n *\n *    GSL uses __buildin_unreachable() when error, Span<T> uses dmlc LOG and\n *      customized CUDA logging.\n *\n *\n * What's different from span<T> in ISO++20 (ISO)\n *\n *    ISO uses functions/structs from std library, which might be not available\n *      in CUDA.\n *    Initializing from std::array is not supported.\n *\n *    ISO uses constexpr extensively, which is not possible with limitation\n *      of C++11.\n *    ISO uses C++14/17 features, which is not available here.\n *    ISO doesn't concern about CUDA.\n *\n *    ISO uses std::terminate(), Span<T> uses dmlc LOG and customized CUDA\n *      logging.\n *\n *\n * Limitations:\n *    With thrust:\n *       It's not adviced to initialize Span with host_vector directly, since\n *         host_vector::data() is a host function.\n *       It's not possible to initialize Span with device_vector directly, since\n *         device_vector::data() returns a wrapped pointer.\n *       It's unclear that what kind of thrust algorithm can be used without\n *         memory error. See the test case \"GPUSpan.WithTrust\"\n *\n *    Pass iterator to kernel:\n *       Not possible. Use subspan instead.\n *\n *       The underlying Span in SpanIterator is a pointer, but CUDA pass kernel\n *       parameter by value.  If we were to hold a Span value instead of a\n *       pointer, the following snippet will crash, violating the safety\n *       purpose of Span:\n *\n *       \\code{.cpp}\n *       Span<float> span {arr_a};\n *       auto beg = span.begin();\n *\n *       Span<float> span_b = arr_b;\n *       span = span_b;\n *\n *       delete arr_a;\n *       beg++;                 // crash\n *       \\endcode\n *\n *       While holding a pointer or reference should avoid the problem, it's a\n *       compromise. Since we have subspan, it's acceptable not to support\n *       passing iterator.\n */\ntemplate <typename T,\n          std::size_t Extent = dynamic_extent>\nclass Span {\n public:\n  using element_type = T;                               // NOLINT\n  using value_type = typename std::remove_cv<T>::type;  // NOLINT\n  using index_type = std::size_t;                       // NOLINT\n  using difference_type = detail::ptrdiff_t;            // NOLINT\n  using pointer = T*;                                   // NOLINT\n  using reference = T&;                                 // NOLINT\n\n  using iterator = detail::SpanIterator<Span<T, Extent>, false>;               // NOLINT\n  using const_iterator = const detail::SpanIterator<Span<T, Extent>, true>;    // NOLINT\n  using reverse_iterator = std::reverse_iterator<iterator>;                    // NOLINT\n  using const_reverse_iterator = const std::reverse_iterator<const_iterator>;  // NOLINT\n\n  // constructors\n  constexpr Span() = default;\n\n  XGBOOST_DEVICE Span(pointer _ptr, index_type _count) :\n      size_(_count), data_(_ptr) {\n    SPAN_CHECK(!(Extent != dynamic_extent && _count != Extent));\n    SPAN_CHECK(_ptr || _count == 0);\n  }\n\n  XGBOOST_DEVICE Span(pointer _first, pointer _last) :\n      size_(_last - _first), data_(_first) {\n    SPAN_CHECK(data_ || size_ == 0);\n  }\n\n  template <std::size_t N>\n  XGBOOST_DEVICE constexpr Span(element_type (&arr)[N])  // NOLINT\n      __span_noexcept : size_(N), data_(&arr[0]) {}\n\n  template <class Container,\n            class = typename std::enable_if_t<\n                !std::is_const_v<element_type> && !detail::IsSpan<Container>::value &&\n                std::is_convertible_v<typename Container::pointer, pointer> &&\n                std::is_convertible_v<typename Container::pointer,\n                                      decltype(std::declval<Container>().data())>>>\n  Span(Container& _cont)  // NOLINT\n      : size_(_cont.size()), data_(_cont.data()) {\n    static_assert(!detail::IsSpan<Container>::value, \"Wrong constructor of Span is called.\");\n  }\n\n  template <class Container,\n            class = typename std::enable_if_t<\n                std::is_const_v<element_type> && !detail::IsSpan<Container>::value &&\n                std::is_convertible_v<typename Container::pointer, pointer> &&\n                std::is_convertible_v<typename Container::pointer,\n                                      decltype(std::declval<Container>().data())>>>\n  Span(const Container& _cont)  // NOLINT\n      : size_(_cont.size()), data_(_cont.data()) {\n    static_assert(!detail::IsSpan<Container>::value, \"Wrong constructor of Span is called.\");\n  }\n\n  template <class U, std::size_t OtherExtent,\n            class = typename std::enable_if_t<\n                detail::IsAllowedElementTypeConversion<U, T>::value &&\n                detail::IsAllowedExtentConversion<OtherExtent, Extent>::value>>\n  XGBOOST_DEVICE constexpr Span(const Span<U, OtherExtent>& _other)  // NOLINT\n      __span_noexcept : size_(_other.size()),\n                        data_(_other.data()) {}\n\n  constexpr Span(Span const& _other) noexcept(true) = default;\n  constexpr Span& operator=(Span const& _other) noexcept(true) = default;\n  constexpr Span(Span&& _other) noexcept(true) = default;\n  constexpr Span& operator=(Span&& _other) noexcept(true) = default;\n  ~Span() noexcept(true) = default;\n\n  XGBOOST_DEVICE constexpr iterator begin() const __span_noexcept {  // NOLINT\n    return {this, 0};\n  }\n\n  XGBOOST_DEVICE constexpr iterator end() const __span_noexcept {    // NOLINT\n    return {this, size()};\n  }\n\n  XGBOOST_DEVICE constexpr const_iterator cbegin() const __span_noexcept {  // NOLINT\n    return {this, 0};\n  }\n\n  XGBOOST_DEVICE constexpr const_iterator cend() const __span_noexcept {    // NOLINT\n    return {this, size()};\n  }\n\n  constexpr reverse_iterator rbegin() const __span_noexcept {  // NOLINT\n    return reverse_iterator{end()};\n  }\n\n  constexpr reverse_iterator rend() const __span_noexcept {  // NOLINT\n    return reverse_iterator{begin()};\n  }\n\n  XGBOOST_DEVICE constexpr const_reverse_iterator crbegin() const __span_noexcept {  // NOLINT\n    return const_reverse_iterator{cend()};\n  }\n\n  XGBOOST_DEVICE constexpr const_reverse_iterator crend() const __span_noexcept {    // NOLINT\n    return const_reverse_iterator{cbegin()};\n  }\n\n  // element access\n\n  XGBOOST_DEVICE reference front() const {  // NOLINT\n    return (*this)[0];\n  }\n\n  XGBOOST_DEVICE reference back() const {  // NOLINT\n    return (*this)[size() - 1];\n  }\n\n  XGBOOST_DEVICE reference operator[](index_type _idx) const {\n    SPAN_LT(_idx, size());\n    return data()[_idx];\n  }\n\n  XGBOOST_DEVICE reference operator()(index_type _idx) const {\n    return this->operator[](_idx);\n  }\n\n  XGBOOST_DEVICE constexpr pointer data() const __span_noexcept {   // NOLINT\n    return data_;\n  }\n\n  // Observers\n  XGBOOST_DEVICE constexpr index_type size() const __span_noexcept {  // NOLINT\n    return size_;\n  }\n  XGBOOST_DEVICE constexpr index_type size_bytes() const __span_noexcept {  // NOLINT\n    return SizeBytes<T>(size());\n  }\n\n  XGBOOST_DEVICE constexpr bool empty() const __span_noexcept {  // NOLINT\n    return size() == 0;\n  }\n\n  // Subviews\n  template <std::size_t Count>\n  XGBOOST_DEVICE Span<element_type, Count> first() const {  // NOLINT\n    SPAN_CHECK(Count <= size());\n    return {data(), Count};\n  }\n\n  XGBOOST_DEVICE Span<element_type, dynamic_extent> first(  // NOLINT\n      std::size_t _count) const {\n    SPAN_CHECK(_count <= size());\n    return {data(), _count};\n  }\n\n  template <std::size_t Count>\n  XGBOOST_DEVICE Span<element_type, Count> last() const {  // NOLINT\n    SPAN_CHECK(Count <= size());\n    return {data() + size() - Count, Count};\n  }\n\n  XGBOOST_DEVICE Span<element_type, dynamic_extent> last(  // NOLINT\n      std::size_t _count) const {\n    SPAN_CHECK(_count <= size());\n    return subspan(size() - _count, _count);\n  }\n\n  /*!\n   * If Count is std::dynamic_extent, r.size() == this->size() - Offset;\n   * Otherwise r.size() == Count.\n   */\n  template <std::size_t Offset,\n            std::size_t Count = dynamic_extent>\n  XGBOOST_DEVICE auto subspan() const ->                   // NOLINT\n      Span<element_type,\n           detail::ExtentValue<Extent, Offset, Count>::value> {\n    SPAN_CHECK((Count == dynamic_extent) ?\n               (Offset <= size()) : (Offset + Count <= size()));\n    return {data() + Offset, Count == dynamic_extent ? size() - Offset : Count};\n  }\n\n  XGBOOST_DEVICE Span<element_type, dynamic_extent> subspan(  // NOLINT\n      index_type _offset,\n      index_type _count = dynamic_extent) const {\n    SPAN_CHECK((_count == dynamic_extent) ?\n               (_offset <= size()) : (_offset + _count <= size()));\n    return {data() + _offset, _count ==\n            dynamic_extent ? size() - _offset : _count};\n  }\n\n private:\n  index_type size_ { 0 };\n  pointer data_ { nullptr };\n};\n\ntemplate <class T, std::size_t X, class U, std::size_t Y>\nXGBOOST_DEVICE bool operator==(Span<T, X> l, Span<U, Y> r) {\n  if (l.size() != r.size()) {\n    return false;\n  }\n  for (auto l_beg = l.cbegin(), r_beg = r.cbegin(); l_beg != l.cend();\n       ++l_beg, ++r_beg) {\n    if (*l_beg != *r_beg) {\n      return false;\n    }\n  }\n  return true;\n}\n\ntemplate <class T, std::size_t X, class U, std::size_t Y>\nXGBOOST_DEVICE constexpr bool operator!=(Span<T, X> l, Span<U, Y> r) {\n  return !(l == r);\n}\n\ntemplate <class T, std::size_t X, class U, std::size_t Y>\nXGBOOST_DEVICE constexpr bool operator<(Span<T, X> l, Span<U, Y> r) {\n  return detail::LexicographicalCompare(l.begin(), l.end(),\n                                         r.begin(), r.end());\n}\n\ntemplate <class T, std::size_t X, class U, std::size_t Y>\nXGBOOST_DEVICE constexpr bool operator<=(Span<T, X> l, Span<U, Y> r) {\n  return !(l > r);\n}\n\ntemplate <class T, std::size_t X, class U, std::size_t Y>\nXGBOOST_DEVICE constexpr bool operator>(Span<T, X> l, Span<U, Y> r) {\n  return detail::LexicographicalCompare<\n    typename Span<T, X>::iterator, typename Span<U, Y>::iterator,\n    detail::Greater<typename Span<T, X>::element_type>>(l.begin(), l.end(),\n                                                        r.begin(), r.end());\n}\n\ntemplate <class T, std::size_t X, class U, std::size_t Y>\nXGBOOST_DEVICE constexpr bool operator>=(Span<T, X> l, Span<U, Y> r) {\n  return !(l < r);\n}\n\ntemplate <class T, std::size_t E>\nXGBOOST_DEVICE auto as_bytes(Span<T, E> s) __span_noexcept ->           // NOLINT\n    Span<const byte, detail::ExtentAsBytesValue<T, E>::value> {\n  return {reinterpret_cast<const byte*>(s.data()), s.size_bytes()};\n}\n\ntemplate <class T, std::size_t E>\nXGBOOST_DEVICE auto as_writable_bytes(Span<T, E> s) __span_noexcept ->  // NOLINT\n    Span<byte, detail::ExtentAsBytesValue<T, E>::value> {\n  return {reinterpret_cast<byte*>(s.data()), s.size_bytes()};\n}\n\n/**\n * \\brief A simple custom Span type that uses general iterator instead of pointer.\n */\ntemplate <typename It>\nclass IterSpan {\n public:\n  using value_type = typename std::iterator_traits<It>::value_type;  // NOLINT\n  using index_type = std::size_t;                                    // NOLINT\n  using iterator = It;                                               // NOLINT\n\n private:\n  It it_;\n  index_type size_{0};\n\n public:\n  IterSpan() = default;\n  XGBOOST_DEVICE IterSpan(It it, index_type size) : it_{std::move(it)}, size_{size} {}\n  XGBOOST_DEVICE explicit IterSpan(common::Span<It, dynamic_extent> span)\n      : it_{span.data()}, size_{span.size()} {}\n\n  [[nodiscard]] XGBOOST_DEVICE index_type size() const noexcept { return size_; }  // NOLINT\n  [[nodiscard]] XGBOOST_DEVICE decltype(auto) operator[](index_type i) const { return it_[i]; }\n  [[nodiscard]] XGBOOST_DEVICE decltype(auto) operator[](index_type i) { return it_[i]; }\n  [[nodiscard]] XGBOOST_DEVICE bool empty() const noexcept { return size() == 0; }  // NOLINT\n  [[nodiscard]] XGBOOST_DEVICE It data() const noexcept { return it_; }             // NOLINT\n  [[nodiscard]] XGBOOST_DEVICE IterSpan<It> subspan(                                // NOLINT\n      index_type _offset, index_type _count = dynamic_extent) const {\n    SPAN_CHECK((_count == dynamic_extent) ? (_offset <= size()) : (_offset + _count <= size()));\n    return {data() + _offset, _count == dynamic_extent ? size() - _offset : _count};\n  }\n  [[nodiscard]] XGBOOST_DEVICE constexpr iterator begin() const noexcept {  // NOLINT\n    return it_;\n  }\n  [[nodiscard]] XGBOOST_DEVICE constexpr iterator end() const noexcept {  // NOLINT\n    return it_ + size();\n  }\n};\n\ntemplate <typename T>\nSpan(std::vector<T> const&) -> Span<T const>;\n\ntemplate <typename T>\nSpan(std::vector<T>&) -> Span<T>;\n}  // namespace xgboost::common\n\n\n#if defined(_MSC_VER) &&_MSC_VER < 1910\n#undef constexpr\n#pragma pop_macro(\"constexpr\")\n#undef __span_noexcept\n#endif  // _MSC_VER < 1910\n\n#endif  // XGBOOST_SPAN_H_\n"
  },
  {
    "path": "include/xgboost/string_view.h",
    "content": "/**\n * Copyright 2021-2023, XGBoost Contributors\n */\n#ifndef XGBOOST_STRING_VIEW_H_\n#define XGBOOST_STRING_VIEW_H_\n#include <xgboost/logging.h>  // CHECK_LT\n#include <xgboost/span.h>     // Span\n\n#include <algorithm>  // for equal, min\n#include <cstddef>    // for size_t\n#include <iterator>   // for reverse_iterator\n#include <ostream>    // for ostream\n#include <string>     // for char_traits, string\n\nnamespace xgboost {\nstruct StringView {\n private:\n  using CharT = char;\n  using Traits = std::char_traits<CharT>;\n  CharT const* str_{nullptr};\n  std::size_t size_{0};\n\n public:\n  using value_type = CharT;                                        // NOLINT\n  using iterator = const CharT*;                                   // NOLINT\n  using const_iterator = iterator;                                 // NOLINT\n  using reverse_iterator = std::reverse_iterator<const_iterator>;  // NOLINT\n  using const_reverse_iterator = reverse_iterator;                 // NOLINT\n\n public:\n  constexpr StringView() = default;\n  constexpr StringView(value_type const* str, std::size_t size) : str_{str}, size_{size} {}\n  StringView(std::string const& str) : str_{str.c_str()}, size_{str.size()} {}  // NOLINT\n  constexpr StringView(value_type const* str)                                   // NOLINT\n      : str_{str}, size_{str == nullptr ? 0ul : Traits::length(str)} {}\n\n  [[nodiscard]] value_type const& operator[](std::size_t p) const { return str_[p]; }\n  [[nodiscard]] explicit operator std::string() const { return {this->c_str(), this->size()}; }\n  [[nodiscard]] value_type const& at(std::size_t p) const {  // NOLINT\n    CHECK_LT(p, size_);\n    return str_[p];\n  }\n  [[nodiscard]] constexpr std::size_t size() const { return size_; }       // NOLINT\n  [[nodiscard]] constexpr bool empty() const { return size() == 0; }       // NOLINT\n  [[nodiscard]] StringView substr(std::size_t beg, std::size_t n) const {  // NOLINT\n    CHECK_LE(beg, size_);\n    std::size_t len = std::min(n, size_ - beg);\n    return {str_ + beg, len};\n  }\n  [[nodiscard]] value_type const* c_str() const { return str_; }  // NOLINT\n\n  [[nodiscard]] constexpr const_iterator cbegin() const { return str_; }         // NOLINT\n  [[nodiscard]] constexpr const_iterator cend() const { return str_ + size(); }  // NOLINT\n  [[nodiscard]] constexpr iterator begin() const { return str_; }                // NOLINT\n  [[nodiscard]] constexpr iterator end() const { return str_ + size(); }         // NOLINT\n\n  [[nodiscard]] const_reverse_iterator rbegin() const noexcept {  // NOLINT\n    return const_reverse_iterator(this->end());\n  }\n  [[nodiscard]] const_reverse_iterator crbegin() const noexcept {  // NOLINT\n    return const_reverse_iterator(this->end());\n  }\n  [[nodiscard]] const_reverse_iterator rend() const noexcept {  // NOLINT\n    return const_reverse_iterator(this->begin());\n  }\n  [[nodiscard]] const_reverse_iterator crend() const noexcept {  // NOLINT\n    return const_reverse_iterator(this->begin());\n  }\n};\n\ninline std::ostream& operator<<(std::ostream& os, StringView const v) {\n  for (auto c : v) {\n    os.put(c);\n  }\n  return os;\n}\n\ninline bool operator==(StringView l, StringView r) {\n  if (l.size() != r.size()) {\n    return false;\n  }\n  return std::equal(l.cbegin(), l.cend(), r.cbegin());\n}\n\ninline bool operator!=(StringView l, StringView r) { return !(l == r); }\n\ninline bool operator<(StringView l, StringView r) {\n  return common::Span<StringView::value_type const>{l.c_str(), l.size()} <\n         common::Span<StringView::value_type const>{r.c_str(), r.size()};\n}\n\ninline bool operator<(std::string const& l, StringView r) { return StringView{l} < r; }\n\ninline bool operator<(StringView l, std::string const& r) { return l < StringView{r}; }\n}  // namespace xgboost\n#endif  // XGBOOST_STRING_VIEW_H_\n"
  },
  {
    "path": "include/xgboost/task.h",
    "content": "/**\n * Copyright 2021-2024, XGBoost Contributors\n */\n#ifndef XGBOOST_TASK_H_\n#define XGBOOST_TASK_H_\n\n#include <xgboost/base.h>\n\n#include <cstdint>  // for uint8_t\n\nnamespace xgboost {\n/*!\n * \\brief A struct returned by objective, which determines task at hand.  The struct is\n *        not used by any algorithm yet, only for future development like categorical\n *        split.\n *\n * The task field is useful for tree split finding, also for some metrics like auc.\n * Lastly, knowing whether hessian is constant can allow some optimizations like skipping\n * the quantile sketching.\n *\n * This struct should not be serialized since it can be recovered from objective function,\n * hence it doesn't need to be stable.\n */\nstruct ObjInfo {\n  // What kind of problem are we trying to solve\n  enum Task : std::uint8_t {\n    kRegression = 0,\n    kBinary = 1,\n    kClassification = 2,\n    kSurvival = 3,\n    kRanking = 4,\n    kOther = 5,\n  } task;\n  // Does the objective have constant hessian value?\n  bool const_hess{false};\n  bool zero_hess{false};\n\n  ObjInfo(Task t) : task{t} {}  // NOLINT\n  ObjInfo(Task t, bool khess, bool zhess) : task{t}, const_hess{khess}, zero_hess(zhess) {}\n\n  /**\n   * \\brief Use adaptive tree if the objective doesn't have valid hessian value.\n   */\n  XGBOOST_DEVICE bool UpdateTreeLeaf() const { return zero_hess; }\n};\n}  // namespace xgboost\n#endif  // XGBOOST_TASK_H_\n"
  },
  {
    "path": "include/xgboost/tree_model.h",
    "content": "/**\n * Copyright 2014-2026, XGBoost Contributors\n *\n * @brief model structure for tree\n * \\author Tianqi Chen\n */\n#ifndef XGBOOST_TREE_MODEL_H_\n#define XGBOOST_TREE_MODEL_H_\n\n#include <xgboost/base.h>\n#include <xgboost/data.h>\n#include <xgboost/feature_map.h>\n#include <xgboost/host_device_vector.h>  // for HostDeviceVector\n#include <xgboost/linalg.h>              // for VectorView\n#include <xgboost/logging.h>\n#include <xgboost/model.h>\n#include <xgboost/multi_target_tree_model.h>  // for MultiTargetTree\n\n#include <algorithm>\n#include <cstring>\n#include <limits>  // for numeric_limits\n#include <memory>  // for unique_ptr\n#include <string>\n#include <type_traits>  // for is_signed_v\n#include <vector>\n\nnamespace xgboost {\n\nnamespace tree {\nstruct ScalarTreeView;\nstruct MultiTargetTreeView;\n}  // namespace tree\n\nclass Json;\n\n/** @brief meta parameters of the tree */\nstruct TreeParam {\n  /** @brief The number of nodes */\n  bst_node_t num_nodes{1};\n  /** @brief The number of deleted nodes */\n  bst_node_t num_deleted{0};\n  /** @brief The number of features used for tree construction */\n  bst_feature_t num_feature{0};\n  /** @brief leaf vector size. Used by the vector leaf. */\n  bst_target_t size_leaf_vector{1};\n\n  bool operator==(const TreeParam& b) const {\n    return num_nodes == b.num_nodes && num_deleted == b.num_deleted &&\n           num_feature == b.num_feature && size_leaf_vector == b.size_leaf_vector;\n  }\n\n  void FromJson(Json const& in);\n  void ToJson(Json* p_out) const;\n};\n\n/** @brief node statistics used in regression tree */\nstruct RTreeNodeStat {\n  /** @brief loss change caused by current split */\n  float loss_chg;\n  /** @brief sum of hessian values, used to measure coverage of data */\n  float sum_hess;\n  /** @brief weight of current node */\n  float base_weight;\n  /** @brief number of child that is leaf node known up to now */\n  int leaf_child_cnt{0};\n\n  RTreeNodeStat() = default;\n  RTreeNodeStat(float loss_chg, float sum_hess, float weight)\n      : loss_chg{loss_chg}, sum_hess{sum_hess}, base_weight{weight} {}\n  bool operator==(const RTreeNodeStat& b) const {\n    return loss_chg == b.loss_chg && sum_hess == b.sum_hess && base_weight == b.base_weight &&\n           leaf_child_cnt == b.leaf_child_cnt;\n  }\n};\n\n/**\n * @brief define regression tree to be the most common tree model.\n *\n *  This is the data structure used in xgboost's major tree models.\n */\nclass RegTree : public Model {\n public:\n  using SplitCondT = float;\n  static constexpr bst_node_t kInvalidNodeId{MultiTargetTree::InvalidNodeId()};\n  static constexpr uint32_t kDeletedNodeMarker = std::numeric_limits<uint32_t>::max();\n  static constexpr bst_node_t kRoot{0};\n\n  /** @brief tree node */\n  class Node {\n   public:\n    XGBOOST_DEVICE Node() {\n      // assert compact alignment\n      static_assert(sizeof(Node) == 4 * sizeof(int) + sizeof(Info), \"Node: 64 bit align\");\n    }\n    Node(int32_t cleft, int32_t cright, int32_t parent, uint32_t split_ind, float split_cond,\n         bool default_left)\n        : parent_{parent}, cleft_{cleft}, cright_{cright} {\n      this->SetParent(parent_);\n      this->SetSplit(split_ind, split_cond, default_left);\n    }\n\n    /*! \\brief index of left child */\n    [[nodiscard]] XGBOOST_DEVICE int LeftChild() const { return this->cleft_; }\n    /*! \\brief index of right child */\n    [[nodiscard]] XGBOOST_DEVICE int RightChild() const { return this->cright_; }\n    /*! \\brief index of default child when feature is missing */\n    [[nodiscard]] XGBOOST_DEVICE int DefaultChild() const {\n      return this->DefaultLeft() ? this->LeftChild() : this->RightChild();\n    }\n    /*! \\brief feature index of split condition */\n    [[nodiscard]] XGBOOST_DEVICE bst_feature_t SplitIndex() const {\n      static_assert(!std::is_signed_v<bst_feature_t>);\n      return sindex_ & ((1U << 31) - 1U);\n    }\n    /*! \\brief when feature is unknown, whether goes to left child */\n    [[nodiscard]] XGBOOST_DEVICE bool DefaultLeft() const { return (sindex_ >> 31) != 0; }\n    /*! \\brief whether current node is leaf node */\n    [[nodiscard]] XGBOOST_DEVICE bool IsLeaf() const { return cleft_ == kInvalidNodeId; }\n    /*! \\return get leaf value of leaf node */\n    [[nodiscard]] XGBOOST_DEVICE float LeafValue() const { return (this->info_).leaf_value; }\n    /*! \\return get split condition of the node */\n    [[nodiscard]] XGBOOST_DEVICE SplitCondT SplitCond() const { return (this->info_).split_cond; }\n    /*! \\brief get parent of the node */\n    [[nodiscard]] XGBOOST_DEVICE int Parent() const { return parent_ & ((1U << 31) - 1); }\n    /*! \\brief whether current node is left child */\n    [[nodiscard]] XGBOOST_DEVICE bool IsLeftChild() const { return (parent_ & (1U << 31)) != 0; }\n    /*! \\brief whether this node is deleted */\n    [[nodiscard]] XGBOOST_DEVICE bool IsDeleted() const { return sindex_ == kDeletedNodeMarker; }\n    /*! \\brief whether current node is root */\n    [[nodiscard]] XGBOOST_DEVICE bool IsRoot() const { return parent_ == kInvalidNodeId; }\n    /*!\n     * \\brief set the left child\n     * \\param nid node id to right child\n     */\n    XGBOOST_DEVICE void SetLeftChild(int nid) { this->cleft_ = nid; }\n    /*!\n     * \\brief set the right child\n     * \\param nid node id to right child\n     */\n    XGBOOST_DEVICE void SetRightChild(int nid) { this->cright_ = nid; }\n    /*!\n     * \\brief set split condition of current node\n     * \\param split_index feature index to split\n     * \\param split_cond  split condition\n     * \\param default_left the default direction when feature is unknown\n     */\n    XGBOOST_DEVICE void SetSplit(unsigned split_index, SplitCondT split_cond,\n                                 bool default_left = false) {\n      if (default_left) split_index |= (1U << 31);\n      this->sindex_ = split_index;\n      (this->info_).split_cond = split_cond;\n    }\n    /*!\n     * \\brief set the leaf value of the node\n     * \\param value leaf value\n     * \\param right right index, could be used to store\n     *        additional information\n     */\n    XGBOOST_DEVICE void SetLeaf(bst_float value, int right = kInvalidNodeId) {\n      (this->info_).leaf_value = value;\n      this->cleft_ = kInvalidNodeId;\n      this->cright_ = right;\n    }\n    /*! \\brief mark that this node is deleted */\n    XGBOOST_DEVICE void MarkDelete() { this->sindex_ = kDeletedNodeMarker; }\n    /*! \\brief Reuse this deleted node. */\n    XGBOOST_DEVICE void Reuse() { this->sindex_ = 0; }\n    // set parent\n    XGBOOST_DEVICE void SetParent(int pidx, bool is_left_child = true) {\n      if (is_left_child) pidx |= (1U << 31);\n      this->parent_ = pidx;\n    }\n    bool operator==(const Node& b) const {\n      return parent_ == b.parent_ && cleft_ == b.cleft_ && cright_ == b.cright_ &&\n             sindex_ == b.sindex_ && info_.leaf_value == b.info_.leaf_value;\n    }\n\n   private:\n    /*!\n     * \\brief in leaf node, we have weights, in non-leaf nodes,\n     *        we have split condition\n     */\n    union Info {\n      bst_float leaf_value;\n      SplitCondT split_cond;\n    };\n    // pointer to parent, highest bit is used to\n    // indicate whether it's a left child or not\n    int32_t parent_{kInvalidNodeId};\n    // pointer to left, right\n    int32_t cleft_{kInvalidNodeId}, cright_{kInvalidNodeId};\n    // split feature index, left split or right split depends on the highest bit\n    uint32_t sindex_{0};\n    // extra info\n    Info info_;\n  };\n\n  /**\n   * @brief Change a non leaf node to a leaf node, delete its children\n   *\n   * @param nidx Node id\n   * @param value The new leaf value\n   */\n  void ChangeToLeaf(bst_node_t nidx, float value) {\n    auto& h_nodes = nodes_.HostVector();\n    CHECK(h_nodes[h_nodes[nidx].LeftChild()].IsLeaf());\n    CHECK(h_nodes[h_nodes[nidx].RightChild()].IsLeaf());\n    this->DeleteNode(h_nodes[nidx].LeftChild());\n    this->DeleteNode(h_nodes[nidx].RightChild());\n    h_nodes[nidx].SetLeaf(value);\n  }\n  /**\n   * @brief Collapse a non leaf node to a leaf node, delete its children\n   *\n   * @param nidx Node id\n   * @param value The new leaf value\n   */\n  void CollapseToLeaf(bst_node_t nidx, float value) {\n    auto& h_nodes = nodes_.HostVector();\n    if (h_nodes[nidx].IsLeaf()) return;\n    if (!h_nodes[h_nodes[nidx].LeftChild()].IsLeaf()) {\n      CollapseToLeaf(h_nodes[nidx].LeftChild(), 0.0f);\n    }\n    if (!h_nodes[h_nodes[nidx].RightChild()].IsLeaf()) {\n      CollapseToLeaf(h_nodes[nidx].RightChild(), 0.0f);\n    }\n    this->ChangeToLeaf(nidx, value);\n  }\n\n  RegTree() {\n    nodes_.HostVector().resize(param_.num_nodes);\n    stats_.HostVector().resize(param_.num_nodes);\n    split_types_.HostVector().resize(param_.num_nodes, FeatureType::kNumerical);\n    split_categories_segments_.HostVector().resize(param_.num_nodes);\n    auto& h_nodes = nodes_.HostVector();\n    for (int i = 0; i < param_.num_nodes; i++) {\n      h_nodes[i].SetLeaf(0.0f);\n      h_nodes[i].SetParent(kInvalidNodeId);\n    }\n  }\n  /**\n   * \\brief Constructor that initializes the tree model with shape.\n   */\n  explicit RegTree(bst_target_t n_targets, bst_feature_t n_features) : RegTree{} {\n    param_.num_feature = n_features;\n    param_.size_leaf_vector = n_targets;\n    if (n_targets > 1) {\n      this->p_mt_tree_.reset(new MultiTargetTree{&param_});\n    }\n  }\n\n  /*! \\brief get node given nid */\n  Node& operator[](bst_node_t nidx) { return nodes_.HostVector()[nidx]; }\n\n public:\n  /** @brief Get const reference to nodes */\n  [[nodiscard]] common::Span<Node const> GetNodes(DeviceOrd device) const {\n    CHECK(!this->IsMultiTarget());\n    return device.IsCPU() ? nodes_.ConstHostSpan()\n                          : (nodes_.SetDevice(device), nodes_.ConstDeviceSpan());\n  }\n\n  /** @brief Get const reference to stats */\n  [[nodiscard]] common::Span<RTreeNodeStat const> GetStats(DeviceOrd device) const {\n    CHECK(!this->IsMultiTarget());\n    return device.IsCPU() ? stats_.ConstHostSpan()\n                          : (stats_.SetDevice(device), stats_.ConstDeviceSpan());\n  }\n\n  /*! \\brief get node statistics given nid */\n  RTreeNodeStat& Stat(int nid) { return stats_.HostVector()[nid]; }\n\n  void LoadModel(Json const& in) override;\n  void SaveModel(Json* out) const override;\n\n  bool operator==(const RegTree& b) const {\n    return nodes_.ConstHostVector() == b.nodes_.ConstHostVector() &&\n           stats_.ConstHostVector() == b.stats_.ConstHostVector() &&\n           deleted_nodes_ == b.deleted_nodes_ && param_ == b.param_;\n  }\n  /*!\n   * \\brief Compares whether 2 trees are equal from a user's perspective.  The equality\n   *        compares only non-deleted nodes.\n   *\n   * \\param b The other tree.\n   */\n  [[nodiscard]] bool Equal(const RegTree& b) const;\n\n  /**\n   * \\brief Expands a leaf node into two additional leaf nodes.\n   *\n   * \\param nid               The node index to expand.\n   * \\param split_index       Feature index of the split.\n   * \\param split_value       The split condition.\n   * \\param default_left      True to default left.\n   * \\param base_weight       The base weight, before learning rate.\n   * \\param left_leaf_weight  The left leaf weight for prediction, modified by learning rate.\n   * \\param right_leaf_weight The right leaf weight for prediction, modified by learning rate.\n   * \\param loss_change       The loss change.\n   * \\param sum_hess          The sum hess.\n   * \\param left_sum          The sum hess of left leaf.\n   * \\param right_sum         The sum hess of right leaf.\n   * \\param leaf_right_child  The right child index of leaf, by default kInvalidNodeId,\n   *                          some updaters use the right child index of leaf as a marker\n   */\n  void ExpandNode(bst_node_t nid, unsigned split_index, bst_float split_value, bool default_left,\n                  bst_float base_weight, bst_float left_leaf_weight, bst_float right_leaf_weight,\n                  bst_float loss_change, float sum_hess, float left_sum, float right_sum,\n                  bst_node_t leaf_right_child = kInvalidNodeId);\n  /**\n   * @brief Expands a leaf node into two additional leaf nodes for a multi-target tree.\n   *\n   * @param gain      The gain (loss change) from this split.\n   * @param sum_hess  The sum of hessians for the parent node (coverage).\n   * @param left_sum  The sum of hessians for the left child (coverage).\n   * @param right_sum The sum of hessians for the right child (coverage).\n   */\n  void ExpandNode(bst_node_t nidx, bst_feature_t split_index, float split_cond, bool default_left,\n                  linalg::VectorView<float const> base_weight,\n                  linalg::VectorView<float const> left_weight,\n                  linalg::VectorView<float const> right_weight, float loss_chg, float sum_hess,\n                  float left_sum, float right_sum);\n  /**\n   * @brief Set all leaf weights for a multi-target tree.\n   *\n   * The leaf weight can be different from the internal weight stored by @ref ExpandNode\n   * This function is used to set the leaf at the end of tree construction.\n   *\n   * @param leaves  The node indices for all leaves. This must contain all the leaves in this tree.\n   * @param weights Row-major matrix for leaf weights, each row contains a leaf specified by the\n   *                leaves parameter.\n   */\n  void SetLeaves(std::vector<bst_node_t> leaves, common::Span<float const> weights);\n\n  /**\n   * \\brief Expands a leaf node with categories\n   *\n   * \\param nid               The node index to expand.\n   * \\param split_index       Feature index of the split.\n   * \\param split_cat         The bitset containing categories\n   * \\param default_left      True to default left.\n   * \\param base_weight       The base weight, before learning rate.\n   * \\param left_leaf_weight  The left leaf weight for prediction, modified by learning rate.\n   * \\param right_leaf_weight The right leaf weight for prediction, modified by learning rate.\n   * \\param loss_change       The loss change.\n   * \\param sum_hess          The sum hess.\n   * \\param left_sum          The sum hess of left leaf.\n   * \\param right_sum         The sum hess of right leaf.\n   */\n  void ExpandCategorical(bst_node_t nid, bst_feature_t split_index,\n                         common::Span<const uint32_t> split_cat, bool default_left,\n                         bst_float base_weight, bst_float left_leaf_weight,\n                         bst_float right_leaf_weight, bst_float loss_change, float sum_hess,\n                         float left_sum, float right_sum);\n  /**\n   * @brief Expands a leaf node with categories for a multi-target tree.\n   */\n  void ExpandCategorical(bst_node_t nidx, bst_feature_t split_index,\n                         common::Span<const uint32_t> split_cat, bool default_left,\n                         linalg::VectorView<float const> base_weight,\n                         linalg::VectorView<float const> left_weight,\n                         linalg::VectorView<float const> right_weight, float loss_chg,\n                         float sum_hess, float left_sum, float right_sum);\n  /**\n   * @brief Whether this tree has categorical split.\n   */\n  [[nodiscard]] bool HasCategoricalSplit() const { return !split_categories_.Empty(); }\n  /**\n   * \\brief Whether this is a multi-target tree.\n   */\n  [[nodiscard]] bool IsMultiTarget() const { return static_cast<bool>(p_mt_tree_); }\n  /**\n   * \\brief The size of leaf weight.\n   */\n  [[nodiscard]] bst_target_t NumTargets() const { return param_.size_leaf_vector; }\n  /**\n   * \\brief Get the underlying implementaiton of multi-target tree.\n   */\n  [[nodiscard]] auto GetMultiTargetTree() const {\n    CHECK(IsMultiTarget());\n    return p_mt_tree_.get();\n  }\n  /**\n   * \\brief Get the number of features.\n   */\n  [[nodiscard]] bst_feature_t NumFeatures() const noexcept { return param_.num_feature; }\n  /**\n   * \\brief Get the total number of nodes including deleted ones in this tree.\n   */\n  [[nodiscard]] bst_node_t NumNodes() const noexcept { return param_.num_nodes; }\n  /**\n   * \\brief Get the total number of valid nodes in this tree.\n   */\n  [[nodiscard]] bst_node_t NumValidNodes() const noexcept {\n    return param_.num_nodes - param_.num_deleted;\n  }\n  /**\n   * \\brief number of extra nodes besides the root\n   */\n  [[nodiscard]] bst_node_t NumExtraNodes() const noexcept {\n    return param_.num_nodes - 1 - param_.num_deleted;\n  }\n  /* \\brief Count number of leaves in tree. */\n  [[nodiscard]] bst_node_t GetNumLeaves() const;\n  [[nodiscard]] bst_node_t GetNumSplitNodes() const;\n\n  /**\n   * @brief Get the depth of a node.\n   */\n  [[nodiscard]] bst_node_t GetDepth(bst_node_t nidx) const;\n  /**\n   * @brief Set the root weight and statistics for a multi-target tree.\n   *\n   * @param weight   Internal split weight, with size equals to reduced targets.\n   * @param sum_hess The sum of hessians for the root node (coverage).\n   */\n  void SetRoot(linalg::VectorView<float const> weight, float sum_hess) {\n    CHECK(IsMultiTarget());\n    return this->p_mt_tree_->SetRoot(weight, sum_hess);\n  }\n  /**\n   * @brief Get the maximum depth.\n   */\n  [[nodiscard]] bst_node_t MaxDepth() const;\n\n  /*!\n   * \\brief dense feature vector that can be taken by RegTree\n   * and can be construct from sparse feature vector.\n   */\n  struct FVec {\n    /*!\n     * \\brief initialize the vector with size vector\n     * \\param size The size of the feature vector.\n     */\n    void Init(size_t size);\n    /*!\n     * \\brief fill the vector with sparse vector\n     * \\param inst The sparse instance to fill.\n     */\n    void Fill(SparsePage::Inst const& inst);\n\n    /*!\n     * \\brief drop the trace after fill, must be called after fill.\n     * \\param inst The sparse instance to drop.\n     */\n    void Drop();\n    /*!\n     * \\brief returns the size of the feature vector\n     * \\return the size of the feature vector\n     */\n    [[nodiscard]] size_t Size() const;\n    /*!\n     * \\brief get ith value\n     * \\param i feature index.\n     * \\return the i-th feature value\n     */\n    [[nodiscard]] bst_float GetFvalue(size_t i) const;\n    /*!\n     * \\brief check whether i-th entry is missing\n     * \\param i feature index.\n     * \\return whether i-th value is missing.\n     */\n    [[nodiscard]] bool IsMissing(size_t i) const;\n    [[nodiscard]] bool HasMissing() const;\n    void HasMissing(bool has_missing) { this->has_missing_ = has_missing; }\n\n    [[nodiscard]] common::Span<float> Data() { return data_; }\n\n   private:\n    /**\n     * @brief A dense vector for a single sample.\n     *\n     * It's nan if the value is missing.\n     */\n    std::vector<float> data_;\n    bool has_missing_;\n  };\n\n  /*!\n   * \\brief dump the model in the requested format as a text string\n   * \\param fmap feature map that may help give interpretations of feature\n   * \\param with_stats whether dump out statistics as well\n   * \\param format the format to dump the model in\n   * \\return the string of dumped model\n   */\n  [[nodiscard]] std::string DumpModel(const FeatureMap& fmap, bool with_stats,\n                                      std::string format) const;\n  /**\n   * @brief Get split types for all nodes.\n   */\n  [[nodiscard]] common::Span<FeatureType const> GetSplitTypes(DeviceOrd device) const {\n    return device.IsCPU() ? split_types_.ConstHostSpan()\n                          : (split_types_.SetDevice(device), split_types_.ConstDeviceSpan());\n  }\n  [[nodiscard]] common::Span<uint32_t const> GetSplitCategories(DeviceOrd device) const {\n    return device.IsCPU()\n               ? split_categories_.ConstHostSpan()\n               : (split_categories_.SetDevice(device), split_categories_.ConstDeviceSpan());\n  }\n  [[nodiscard]] auto const& GetSplitCategoriesPtr() const {\n    return split_categories_segments_.ConstHostVector();\n  }\n\n  /**\n   * @brief CSR-like matrix for categorical splits.\n   *\n   * The fields of split_categories_segments_[i] are set such that the range\n   * node_ptr[beg:(beg+size)] stores the bitset for the matching categories for the\n   * i-th node.\n   */\n  struct CategoricalSplitMatrix {\n    struct Segment {\n      std::size_t beg{0};\n      std::size_t size{0};\n    };\n    common::Span<FeatureType const> split_type;\n    common::Span<uint32_t const> categories;\n    common::Span<Segment const> node_ptr;\n  };\n\n  [[nodiscard]] CategoricalSplitMatrix GetCategoriesMatrix(DeviceOrd device) const {\n    CategoricalSplitMatrix view;\n    view.split_type = this->GetSplitTypes(device);\n    view.categories = this->GetSplitCategories(device);\n    if (device.IsCPU()) {\n      view.node_ptr = split_categories_segments_.ConstHostSpan();\n    } else {\n      split_categories_segments_.SetDevice(device);\n      view.node_ptr = split_categories_segments_.ConstDeviceSpan();\n    }\n    return view;\n  }\n\n  [[nodiscard]] bst_node_t LeftChild(bst_node_t nidx) const {\n    if (IsMultiTarget()) {\n      return this->p_mt_tree_->LeftChild(nidx);\n    }\n    return nodes_.ConstHostVector()[nidx].LeftChild();\n  }\n  [[nodiscard]] bst_node_t RightChild(bst_node_t nidx) const {\n    if (IsMultiTarget()) {\n      return this->p_mt_tree_->RightChild(nidx);\n    }\n    return nodes_.ConstHostVector()[nidx].RightChild();\n  }\n  [[nodiscard]] bst_node_t Size() const {\n    if (IsMultiTarget()) {\n      return this->p_mt_tree_->Size();\n    }\n    return this->nodes_.Size();\n  }\n\n  [[nodiscard]] RegTree* Copy() const;\n  tree::ScalarTreeView HostScView() const;\n  tree::MultiTargetTreeView HostMtView() const;\n\n private:\n  template <bool typed>\n  void LoadCategoricalSplit(Json const& in);\n  void SaveCategoricalSplit(Json* p_out) const;\n  /*! \\brief model parameter */\n  TreeParam param_;\n  // vector of nodes\n  HostDeviceVector<Node> nodes_;\n  // free node space, used during training process\n  std::vector<int> deleted_nodes_;\n  // stats of nodes\n  HostDeviceVector<RTreeNodeStat> stats_;\n  HostDeviceVector<FeatureType> split_types_;\n\n  // Categories for each internal node.\n  HostDeviceVector<uint32_t> split_categories_;\n  // Ptr to split categories of each node.\n  HostDeviceVector<CategoricalSplitMatrix::Segment> split_categories_segments_;\n  // ptr to multi-target tree with vector leaf.\n  std::unique_ptr<MultiTargetTree> p_mt_tree_;\n  // allocate a new node,\n  // !!!!!! NOTE: may cause BUG here, nodes.resize\n  bst_node_t AllocNode() {\n    if (param_.num_deleted != 0) {\n      int nid = deleted_nodes_.back();\n      deleted_nodes_.pop_back();\n      nodes_.HostVector()[nid].Reuse();\n      --param_.num_deleted;\n      return nid;\n    }\n    int nd = param_.num_nodes++;\n    CHECK_LT(param_.num_nodes, std::numeric_limits<int>::max())\n        << \"number of nodes in the tree exceed 2^31\";\n    nodes_.HostVector().resize(param_.num_nodes);\n    stats_.HostVector().resize(param_.num_nodes);\n    split_types_.HostVector().resize(param_.num_nodes, FeatureType::kNumerical);\n    split_categories_segments_.HostVector().resize(param_.num_nodes);\n    return nd;\n  }\n  // delete a tree node, keep the parent field to allow trace back\n  void DeleteNode(int nid) {\n    CHECK_GE(nid, 1);\n    auto pid = (*this)[nid].Parent();\n    if (nid == (*this)[pid].LeftChild()) {\n      (*this)[pid].SetLeftChild(kInvalidNodeId);\n    } else {\n      (*this)[pid].SetRightChild(kInvalidNodeId);\n    }\n\n    deleted_nodes_.push_back(nid);\n    nodes_.HostVector()[nid].MarkDelete();\n    ++param_.num_deleted;\n  }\n};\n\ninline void RegTree::FVec::Init(size_t size) {\n  data_.resize(size);\n  std::fill(data_.begin(), data_.end(), std::numeric_limits<float>::quiet_NaN());\n  has_missing_ = true;\n}\n\ninline void RegTree::FVec::Fill(SparsePage::Inst const& inst) {\n  auto p_data = inst.data();\n  auto p_out = data_.data();\n\n  for (std::size_t i = 0, n = inst.size(); i < n; ++i) {\n    auto const& entry = p_data[i];\n    p_out[entry.index] = entry.fvalue;\n  }\n  has_missing_ = data_.size() != inst.size();\n}\n\ninline void RegTree::FVec::Drop() { this->Init(this->Size()); }\n\ninline size_t RegTree::FVec::Size() const { return data_.size(); }\n\ninline float RegTree::FVec::GetFvalue(size_t i) const { return data_[i]; }\n\ninline bool RegTree::FVec::IsMissing(size_t i) const { return std::isnan(data_[i]); }\n\ninline bool RegTree::FVec::HasMissing() const { return has_missing_; }\n\n// Multi-target tree not yet implemented error\ninline StringView MTNotImplemented() {\n  return \" support for multi-target tree is not yet implemented.\";\n}\n}  // namespace xgboost\n#endif  // XGBOOST_TREE_MODEL_H_\n"
  },
  {
    "path": "include/xgboost/tree_updater.h",
    "content": "/**\n * Copyright 2014-2026, XGBoost Contributors\n *\n * @brief General primitive for tree learning,\n *   Updating a collection of trees given the information.\n * \\author Tianqi Chen\n */\n#ifndef XGBOOST_TREE_UPDATER_H_\n#define XGBOOST_TREE_UPDATER_H_\n\n#include <dmlc/registry.h>\n#include <xgboost/base.h>                // for Args, GradientPair\n#include <xgboost/data.h>                // for DMatrix\n#include <xgboost/gradient.h>            // for GradientContainer\n#include <xgboost/host_device_vector.h>  // for HostDeviceVector\n#include <xgboost/linalg.h>              // for VectorView\n#include <xgboost/model.h>               // for Configurable\n#include <xgboost/span.h>                // for Span\n#include <xgboost/tree_model.h>          // for RegTree\n\n#include <functional>  // for function\n#include <string>      // for string\n#include <vector>      // for vector\n\nnamespace xgboost {\nnamespace tree {\nstruct TrainParam;\n}\n\nclass Json;\nstruct Context;\nstruct ObjInfo;\n\n/**\n * @brief interface of tree update module, that performs update of a tree.\n */\nclass TreeUpdater : public Configurable {\n protected:\n  Context const* ctx_ = nullptr;\n\n public:\n  explicit TreeUpdater(const Context* ctx) : ctx_(ctx) {}\n  ~TreeUpdater() override = default;\n  /**\n   * @brief Initialize the updater with given arguments.\n   * @param args arguments to the objective function.\n   */\n  virtual void Configure(const Args& args) = 0;\n  /**\n   * @brief Whether this updater can be used for updating existing trees.\n   *\n   *  Some updaters are used for building new trees (like `hist`), while some others are\n   *  used for modifying existing trees (like `prune`).  Return true if it can modify\n   *  existing trees.\n   */\n  [[nodiscard]] virtual bool CanModifyTree() const { return false; }\n  /**\n   * @brief Whether the out_position in `Update` is valid. This determines whether adaptive\n   *        tree can be used.\n   */\n  [[nodiscard]] virtual bool HasNodePosition() const { return false; }\n  /**\n   * @brief perform update to the tree models\n   *\n   * @param param  Hyper-parameter for constructing trees.\n   * @param gpair  The gradient pair statistics of the data\n   * @param p_fmat The data matrix passed to the updater.\n   * @param out_position The leaf index for each row.  The index is negated if that row is\n   *                     removed during sampling. So the 3th node is ~3.\n   * @param out_trees references the trees to be updated, updater will change the content of trees\n   *\n   * @note All the trees in the vector are updated, with the same statistics, but maybe\n   *       different random seeds, usually one tree is passed in at a time, there can be\n   *       multiple trees when we train a random forest style model.\n   */\n  virtual void Update(tree::TrainParam const* param, GradientContainer* gpair, DMatrix* p_fmat,\n                      common::Span<HostDeviceVector<bst_node_t>> out_position,\n                      std::vector<RegTree*> const& out_trees) = 0;\n\n  /**\n   * @brief Determines whether updater has enough knowledge about a given dataset to\n   *        quickly update prediction cache for the training data and performs the update\n   *        if possible.\n   *\n   * @param p_fmat data matrix\n   * @param out_preds prediction cache to be updated\n   *\n   * @return boolean indicating whether updater has capability to update the prediction\n   *         cache. If true, the prediction cache will have been updated by the time this\n   *         function returns.\n   */\n  virtual bool UpdatePredictionCache(DMatrix const* /*data*/,\n                                     common::Span<HostDeviceVector<bst_node_t>> /*out_position*/,\n                                     linalg::MatrixView<float> /*out_preds*/) {\n    return false;\n  }\n\n  [[nodiscard]] virtual char const* Name() const = 0;\n\n  /**\n   * @brief Create a tree updater given name\n   *\n   * @param name Name of the tree updater.\n   * @param ctx A global runtime parameter\n   * @param task Infomation about the objective.\n   */\n  static TreeUpdater* Create(const std::string& name, Context const* ctx, ObjInfo const* task);\n};\n\n/**\n * @brief Registry entry for tree updater.\n */\nstruct TreeUpdaterReg\n    : public dmlc::FunctionRegEntryBase<\n          TreeUpdaterReg, std::function<TreeUpdater*(Context const* ctx, ObjInfo const* task)>> {};\n\n/*!\n * \\brief Macro to register tree updater.\n *\n * \\code\n * // example of registering a objective ndcg@k\n * XGBOOST_REGISTER_TREE_UPDATER(ColMaker, \"colmaker\")\n * .describe(\"Column based tree maker.\")\n * .set_body([]() {\n *     return new ColMaker<TStats>();\n *   });\n * \\endcode\n */\n#define XGBOOST_REGISTER_TREE_UPDATER(UniqueId, Name)                   \\\n  static DMLC_ATTRIBUTE_UNUSED ::xgboost::TreeUpdaterReg&               \\\n  __make_ ## TreeUpdaterReg ## _ ## UniqueId ## __ =                    \\\n      ::dmlc::Registry< ::xgboost::TreeUpdaterReg>::Get()->__REGISTER__(Name)\n\n}  // namespace xgboost\n#endif  // XGBOOST_TREE_UPDATER_H_\n"
  },
  {
    "path": "include/xgboost/version_config.h",
    "content": "/**\n * Copyright 2019-2026, XGBoost contributors\n */\n#ifndef XGBOOST_VERSION_CONFIG_H_\n#define XGBOOST_VERSION_CONFIG_H_\n\n#define XGBOOST_VER_MAJOR 3 /* NOLINT */\n#define XGBOOST_VER_MINOR 3 /* NOLINT */\n#define XGBOOST_VER_PATCH 0 /* NOLINT */\n\n#endif  // XGBOOST_VERSION_CONFIG_H_\n"
  },
  {
    "path": "include/xgboost/windefs.h",
    "content": "/**\n * Copyright 2024, XGBoost Contributors\n *\n * @brief Macro for Windows.\n */\n#pragma once\n\n#if !defined(xgboost_IS_WIN)\n\n#if defined(_MSC_VER) || defined(__MINGW32__)\n#define xgboost_IS_WIN 1\n#endif  // defined(_MSC_VER) || defined(__MINGW32__)\n\n#endif  // !defined(xgboost_IS_WIN)\n\n#if defined(xgboost_IS_WIN)\n\n#if !defined(NOMINMAX)\n#define NOMINMAX\n#endif  // !defined(NOMINMAX)\n\n// A macro used inside `windows.h` to avoid conflicts with `winsock2.h`\n#if !defined(WIN32_LEAN_AND_MEAN)\n#define WIN32_LEAN_AND_MEAN\n#endif  // !defined(WIN32_LEAN_AND_MEAN)\n\n// Stop windows.h from including winsock.h\n// mingw uses _WINSOCKAPI_ to check whether windows.h is included, don't mess with it.\n#if !defined(_WINSOCKAPI_) && !defined(__MINGW32__)\n#define _WINSOCKAPI_\n#endif  // !defined(_WINSOCKAPI_)\n\n#if !defined(xgboost_IS_MINGW)\n\n#if defined(__MINGW32__)\n#define xgboost_IS_MINGW 1\n#endif  // defined(__MINGW32__)\n\n#endif  // xgboost_IS_MINGW\n\n#endif  // !defined(xgboost_IS_WIN)\n"
  },
  {
    "path": "jvm-packages/.gitignore",
    "content": "build.sh\nxgboost4j-tester/pom.xml\nxgboost4j-tester/iris.csv\ndependency-reduced-pom.xml\n.factorypath\n"
  },
  {
    "path": "jvm-packages/CMakeLists.txt",
    "content": "find_package(JNI REQUIRED)\n\nlist(APPEND JVM_SOURCES\n  ${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j/src/native/xgboost4j.cpp\n  ${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j/src/native/xgboost4j-gpu.cpp)\n\nif(USE_CUDA)\n  list(APPEND JVM_SOURCES\n    ${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j/src/native/xgboost4j-gpu.cu)\nendif()\n\nadd_library(xgboost4j SHARED ${JVM_SOURCES} ${XGBOOST_OBJ_SOURCES})\n\nif(ENABLE_ALL_WARNINGS)\n  target_compile_options(xgboost4j PUBLIC -Wall -Wextra)\nendif()\n\ntarget_link_libraries(xgboost4j PRIVATE objxgboost)\ntarget_include_directories(xgboost4j\n  PRIVATE\n  ${JNI_INCLUDE_DIRS}\n  ${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j/src/native\n  ${PROJECT_SOURCE_DIR}/include\n  ${PROJECT_SOURCE_DIR}/dmlc-core/include)\n\nset_output_directory(xgboost4j ${PROJECT_SOURCE_DIR}/lib)\n\n# MacOS: Patch libxgboost4j.dylib to use @rpath/libomp.dylib\nif(USE_OPENMP AND APPLE)\n  patch_openmp_path_macos(xgboost4j libxgboost4j)\nendif()\n"
  },
  {
    "path": "jvm-packages/README.md",
    "content": "# XGBoost4J: Distributed XGBoost for Scala/Java\n[![Build Status](https://badge.buildkite.com/aca47f40a32735c00a8550540c5eeff6a4c1d246a580cae9b0.svg?branch=master)](https://buildkite.com/xgboost/xgboost-ci)\n[![Documentation Status](https://readthedocs.org/projects/xgboost/badge/?version=latest)](https://xgboost.readthedocs.org/en/latest/jvm/index.html)\n[![GitHub license](http://dmlc.github.io/img/apache2.svg)](../LICENSE)\n\n[Documentation](https://xgboost.readthedocs.org/en/stable/jvm/index.html) |\n[Resources](../demo/README.md) |\n[Release Notes](../NEWS.md)\n\nXGBoost4J is the JVM package of xgboost. It brings all the optimizations and power xgboost\ninto JVM ecosystem.\n\n- Train XGBoost models in scala and java with easy customization.\n- Run distributed xgboost natively on jvm frameworks such as Apache Flink and Apache\nSpark.\n\nYou can find more about XGBoost on [Documentation](https://xgboost.readthedocs.org/en/stable/jvm/index.html) and [Resource Page](../demo/README.md)."
  },
  {
    "path": "jvm-packages/checkstyle-suppressions.xml",
    "content": "<!--\n  ~ Licensed to the Apache Software Foundation (ASF) under one or more\n  ~ contributor license agreements.  See the NOTICE file distributed with\n  ~ this work for additional information regarding copyright ownership.\n  ~ The ASF licenses this file to You under the Apache License, Version 2.0\n  ~ (the \"License\"); you may not use this file except in compliance with\n  ~ the License.  You may obtain a copy of the License at\n  ~\n  ~    http://www.apache.org/licenses/LICENSE-2.0\n  ~\n  ~ Unless required by applicable law or agreed to in writing, software\n  ~ distributed under the License is distributed on an \"AS IS\" BASIS,\n  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n  ~ See the License for the specific language governing permissions and\n  ~ limitations under the License.\n  -->\n\n<!DOCTYPE suppressions PUBLIC\n\"-//Puppy Crawl//DTD Suppressions 1.1//EN\"\n\"http://www.puppycrawl.com/dtds/suppressions_1_1.dtd\">\n\n<!--\n\n    This file contains suppression rules for Checkstyle checks.\n    Ideally only files that cannot be modified (e.g. third-party code)\n    should be added here. All other violations should be fixed.\n\n-->\n\n<suppressions>\n  <suppress checks=\".*\" files=\"XGBoostJNI.java\"/>\n</suppressions>\n"
  },
  {
    "path": "jvm-packages/checkstyle.xml",
    "content": "<!--\n  ~ Licensed to the Apache Software Foundation (ASF) under one or more\n  ~ contributor license agreements.  See the NOTICE file distributed with\n  ~ this work for additional information regarding copyright ownership.\n  ~ The ASF licenses this file to You under the Apache License, Version 2.0\n  ~ (the \"License\"); you may not use this file except in compliance with\n  ~ the License.  You may obtain a copy of the License at\n  ~\n  ~    http://www.apache.org/licenses/LICENSE-2.0\n  ~\n  ~ Unless required by applicable law or agreed to in writing, software\n  ~ distributed under the License is distributed on an \"AS IS\" BASIS,\n  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n  ~ See the License for the specific language governing permissions and\n  ~ limitations under the License.\n  -->\n\n<!DOCTYPE module PUBLIC\n          \"-//Puppy Crawl//DTD Check Configuration 1.3//EN\"\n          \"http://www.puppycrawl.com/dtds/configuration_1_3.dtd\">\n\n<!--\n\n    Checkstyle configuration based on the Google coding conventions from:\n\n    -  Google Java Style\n       https://google-styleguide.googlecode.com/svn-history/r130/trunk/javaguide.html\n\n    with Spark-specific changes from:\n\n    https://cwiki.apache.org/confluence/display/SPARK/Spark+Code+Style+Guide\n\n    Checkstyle is very configurable. Be sure to read the documentation at\n    http://checkstyle.sf.net (or in your downloaded distribution).\n\n    Most Checks are configurable, be sure to consult the documentation.\n\n    To completely disable a check, just comment it out or delete it from the file.\n\n    Authors: Max Vetrenko, Ruslan Diachenko, Roman Ivanov.\n\n -->\n\n<module name = \"Checker\">\n    <property name=\"charset\" value=\"UTF-8\"/>\n\n    <property name=\"severity\" value=\"error\"/>\n\n    <property name=\"fileExtensions\" value=\"java, properties, xml\"/>\n\n    <!-- Checks for whitespace                               -->\n    <!-- See http://checkstyle.sf.net/config_whitespace.html -->\n    <module name=\"FileTabCharacter\">\n        <property name=\"eachLine\" value=\"true\"/>\n    </module>\n\n    <module name=\"RegexpSingleline\">\n        <!-- \\s matches whitespace character, $ matches end of line. -->\n        <property name=\"format\" value=\"\\s+$\"/>\n        <property name=\"message\" value=\"No trailing whitespace allowed.\"/>\n    </module>\n\n    <module name=\"LineLength\">\n        <property name=\"max\" value=\"100\"/>\n        <property name=\"ignorePattern\" value=\"^package.*|^import.*|a href|href|http://|https://|ftp://\"/>\n    </module>\n\n    <module name=\"TreeWalker\">\n        <module name=\"OuterTypeFilename\"/>\n        <module name=\"IllegalTokenText\">\n            <property name=\"tokens\" value=\"STRING_LITERAL, CHAR_LITERAL\"/>\n            <property name=\"format\" value=\"\\\\u00(08|09|0(a|A)|0(c|C)|0(d|D)|22|27|5(C|c))|\\\\(0(10|11|12|14|15|42|47)|134)\"/>\n            <property name=\"message\" value=\"Avoid using corresponding octal or Unicode escape.\"/>\n        </module>\n        <module name=\"AvoidEscapedUnicodeCharacters\">\n            <property name=\"allowEscapesForControlCharacters\" value=\"true\"/>\n            <property name=\"allowByTailComment\" value=\"true\"/>\n            <property name=\"allowNonPrintableEscapes\" value=\"true\"/>\n        </module>\n        \n        <module name=\"NoLineWrap\"/>\n        <module name=\"EmptyBlock\">\n            <property name=\"option\" value=\"TEXT\"/>\n            <property name=\"tokens\" value=\"LITERAL_TRY, LITERAL_FINALLY, LITERAL_IF, LITERAL_ELSE, LITERAL_SWITCH\"/>\n        </module>\n        <module name=\"NeedBraces\">\n            <property name=\"allowSingleLineStatement\" value=\"true\"/>\n        </module>\n        <module name=\"OneStatementPerLine\"/>\n        <module name=\"ArrayTypeStyle\"/>\n        <module name=\"FallThrough\"/>\n        <module name=\"UpperEll\"/>\n        <module name=\"ModifierOrder\"/>\n        <module name=\"SeparatorWrap\">\n            <property name=\"tokens\" value=\"DOT\"/>\n            <property name=\"option\" value=\"nl\"/>\n        </module>\n        <module name=\"SeparatorWrap\">\n            <property name=\"tokens\" value=\"COMMA\"/>\n            <property name=\"option\" value=\"EOL\"/>\n        </module>\n        <module name=\"PackageName\">\n            <property name=\"format\" value=\"^[a-z]+(\\.[a-z][a-z0-9]*)*$\"/>\n            <message key=\"name.invalidPattern\"\n             value=\"Package name ''{0}'' must match pattern ''{1}''.\"/>\n        </module>\n        <module name=\"ClassTypeParameterName\">\n            <property name=\"format\" value=\"([A-Z][a-zA-Z0-9]*$)\"/>\n            <message key=\"name.invalidPattern\"\n             value=\"Class type name ''{0}'' must match pattern ''{1}''.\"/>\n        </module>\n        <module name=\"MethodTypeParameterName\">\n            <property name=\"format\" value=\"([A-Z][a-zA-Z0-9]*)\"/>\n            <message key=\"name.invalidPattern\"\n             value=\"Method type name ''{0}'' must match pattern ''{1}''.\"/>\n        </module>\n        <module name=\"GenericWhitespace\">\n            <message key=\"ws.followed\"\n             value=\"GenericWhitespace ''{0}'' is followed by whitespace.\"/>\n             <message key=\"ws.preceded\"\n             value=\"GenericWhitespace ''{0}'' is preceded with whitespace.\"/>\n             <message key=\"ws.illegalFollow\"\n             value=\"GenericWhitespace ''{0}'' should followed by whitespace.\"/>\n             <message key=\"ws.notPreceded\"\n             value=\"GenericWhitespace ''{0}'' is not preceded with whitespace.\"/>\n        </module>\n        <module name=\"Indentation\">\n            <property name=\"basicOffset\" value=\"2\"/>\n            <property name=\"braceAdjustment\" value=\"0\"/>\n            <property name=\"caseIndent\" value=\"2\"/>\n            <property name=\"throwsIndent\" value=\"4\"/>\n            <property name=\"lineWrappingIndentation\" value=\"4\"/>\n            <property name=\"arrayInitIndent\" value=\"2\"/>\n        </module>\n        <module name=\"ImportOrder\">\n            <property name=\"separated\" value=\"true\"/>\n            <property name=\"ordered\" value=\"true\"/>\n            <property name=\"groups\" value=\"/^javax?\\./,scala,*,ml.dmlc.xgboost4j\"/>\n        </module>\n        <module name=\"MethodParamPad\"/>\n        <module name=\"AnnotationLocation\">\n            <property name=\"tokens\" value=\"CLASS_DEF, INTERFACE_DEF, ENUM_DEF, METHOD_DEF, CTOR_DEF\"/>\n        </module>\n        <module name=\"AnnotationLocation\">\n            <property name=\"tokens\" value=\"VARIABLE_DEF\"/>\n            <property name=\"allowSamelineMultipleAnnotations\" value=\"true\"/>\n        </module>\n        <module name=\"MethodName\">\n            <property name=\"format\" value=\"^[a-z][a-z0-9][a-zA-Z0-9_]*$\"/>\n            <message key=\"name.invalidPattern\"\n             value=\"Method name ''{0}'' must match pattern ''{1}''.\"/>\n        </module>\n        <module name=\"EmptyCatchBlock\">\n            <property name=\"exceptionVariableName\" value=\"expected\"/>\n        </module>\n        <module name=\"CommentsIndentation\"/>\n    </module>\n</module>\n"
  },
  {
    "path": "jvm-packages/create_jni.py",
    "content": "#!/usr/bin/env python\nimport argparse\nimport errno\nimport glob\nimport os\nimport platform\nimport shutil\nimport subprocess\nimport sys\nfrom contextlib import contextmanager\n\n# Monkey-patch the API inconsistency between Python2.X and 3.X.\nif sys.platform.startswith(\"linux\"):\n    sys.platform = \"linux\"\n\n\nCONFIG = {\n    \"USE_OPENMP\": \"ON\",\n    \"USE_CUDA\": \"OFF\",\n    \"USE_NCCL\": \"OFF\",\n    \"JVM_BINDINGS\": \"ON\",\n    \"LOG_CAPI_INVOCATION\": \"OFF\",\n    \"CMAKE_EXPORT_COMPILE_COMMANDS\": \"ON\",\n}\n\n\n@contextmanager\ndef cd(path):\n    path = normpath(path)\n    cwd = os.getcwd()\n    os.chdir(path)\n    print(\"cd \" + path, flush=True)\n    try:\n        yield path\n    finally:\n        os.chdir(cwd)\n\n\ndef maybe_makedirs(path):\n    path = normpath(path)\n    print(\"mkdir -p \" + path, flush=True)\n    try:\n        os.makedirs(path)\n    except OSError as e:\n        if e.errno != errno.EEXIST:\n            raise\n\n\ndef run(command, **kwargs):\n    print(command, flush=True)\n    subprocess.run(command, shell=True, check=True, env=os.environ, **kwargs)\n\n\ndef cp(source, target):\n    source = normpath(source)\n    target = normpath(target)\n    print(\"cp {0} {1}\".format(source, target), flush=True)\n    shutil.copy(source, target)\n\n\ndef normpath(path):\n    \"\"\"Normalize UNIX path to a native path.\"\"\"\n    normalized = os.path.join(*path.split(\"/\"))\n    if os.path.isabs(path):\n        return os.path.abspath(\"/\") + normalized\n    else:\n        return normalized\n\n\ndef native_build(cli_args: argparse.Namespace) -> None:\n    CONFIG[\"USE_OPENMP\"] = cli_args.use_openmp\n    if sys.platform == \"darwin\":\n        os.environ[\"JAVA_HOME\"] = (\n            subprocess.check_output(\"/usr/libexec/java_home\").strip().decode()\n        )\n    if cli_args.use_debug == \"ON\":\n        CONFIG[\"CMAKE_BUILD_TYPE\"] = \"Debug\"\n    CONFIG[\"USE_NVTX\"] = cli_args.use_nvtx\n    CONFIG[\"PLUGIN_RMM\"] = cli_args.plugin_rmm\n\n    print(\"building Java wrapper\", flush=True)\n    with cd(\"..\"):\n        build_dir = \"build-gpu\" if cli_args.use_cuda == \"ON\" else \"build\"\n        maybe_makedirs(build_dir)\n\n        if sys.platform == \"linux\":\n            maybe_parallel_build = \" -- -j $(nproc)\"\n        elif sys.platform == \"win32\":\n            maybe_parallel_build = ' -- /m /nodeReuse:false \"/consoleloggerparameters:ShowCommandLine;Verbosity=minimal\"'\n        else:\n            maybe_parallel_build = \"\"\n\n        if cli_args.log_capi_invocation == \"ON\":\n            CONFIG[\"LOG_CAPI_INVOCATION\"] = \"ON\"\n\n        if cli_args.use_cuda == \"ON\":\n            CONFIG[\"USE_CUDA\"] = \"ON\"\n            CONFIG[\"USE_NCCL\"] = \"ON\"\n            CONFIG[\"USE_DLOPEN_NCCL\"] = \"OFF\"\n\n        args = [\"-D{0}:BOOL={1}\".format(k, v) for k, v in CONFIG.items()]\n        if sys.platform != \"win32\":\n            try:\n                subprocess.check_call([\"ninja\", \"--version\"])\n                args.append(\"-GNinja\")\n            except FileNotFoundError:\n                pass\n\n        # if enviorment set GPU_ARCH_FLAG\n        gpu_arch_flag = os.getenv(\"GPU_ARCH_FLAG\", None)\n        if gpu_arch_flag is not None:\n            args.append(\"-DCMAKE_CUDA_ARCHITECTURES=%s\" % gpu_arch_flag)\n\n        with cd(build_dir):\n            lib_dir = os.path.join(os.pardir, \"lib\")\n            if os.path.exists(lib_dir):\n                shutil.rmtree(lib_dir)\n\n            # Same trick as Python build, just test all possible generators.\n            if sys.platform == \"win32\":\n                supported_generators = (\n                    \"\",  # empty, decided by cmake\n                    '-G\"Visual Studio 17 2022\" -A x64',\n                    '-G\"Visual Studio 16 2019\" -A x64',\n                    '-G\"Visual Studio 15 2017\" -A x64',\n                )\n                for generator in supported_generators:\n                    try:\n                        run(\"cmake .. \" + \" \".join(args + [generator]))\n                        break\n                    except subprocess.CalledProcessError as e:\n                        print(f\"Failed to build with generator: {generator}\", e, flush=True)\n                        with cd(os.path.pardir):\n                            shutil.rmtree(build_dir)\n                            maybe_makedirs(build_dir)\n            else:\n                run(\"cmake .. \" + \" \".join(args))\n            run(\"cmake --build . --config Release\" + maybe_parallel_build)\n\n\n    print(\"copying native library\", flush=True)\n    library_name, os_folder = {\n        \"Windows\": (\"xgboost4j.dll\", \"windows\"),\n        \"Darwin\": (\"libxgboost4j.dylib\", \"macos\"),\n        \"Linux\": (\"libxgboost4j.so\", \"linux\"),\n        \"SunOS\": (\"libxgboost4j.so\", \"solaris\"),\n    }[platform.system()]\n    arch_folder = {\n        \"x86_64\": \"x86_64\",  # on Linux & macOS x86_64\n        \"amd64\": \"x86_64\",  # on Windows x86_64\n        \"i86pc\": \"x86_64\",  # on Solaris x86_64\n        \"sun4v\": \"sparc\",  # on Solaris sparc\n        \"arm64\": \"aarch64\",  # on macOS & Windows ARM 64-bit\n        \"aarch64\": \"aarch64\",\n    }[platform.machine().lower()]\n    output_folder = \"xgboost4j/src/main/resources/lib/{}/{}\".format(\n        os_folder, arch_folder\n    )\n    maybe_makedirs(output_folder)\n    cp(\"../lib/\" + library_name, output_folder)\n\n    print(\"copying train/test files\", flush=True)\n\n    # for xgboost4j\n    maybe_makedirs(\"xgboost4j/src/test/resources\")\n    for file in glob.glob(\"../demo/data/agaricus.*\"):\n        cp(file, \"xgboost4j/src/test/resources\")\n\n    # for xgboost4j-spark\n    maybe_makedirs(\"xgboost4j-spark/src/test/resources\")\n    with cd(\"../demo/data/regression\"):\n        run(f'\"{sys.executable}\" mapfeat.py')\n        run(f'\"{sys.executable}\" mknfold.py machine.txt 1')\n    for file in glob.glob(\"../demo/data/regression/machine.txt.t*\"):\n        cp(file, \"xgboost4j-spark/src/test/resources\")\n    for file in glob.glob(\"../demo/data/agaricus.*\"):\n        cp(file, \"xgboost4j-spark/src/test/resources\")\n\n    # for xgboost4j-spark-gpu\n    if cli_args.use_cuda == \"ON\":\n        maybe_makedirs(\"xgboost4j-spark-gpu/src/test/resources\")\n        for file in glob.glob(\"../demo/data/veterans_lung_cancer.csv\"):\n            cp(file, \"xgboost4j-spark-gpu/src/test/resources\")\n        cp(\"xgboost4j-spark/src/test/resources/rank.train.csv\", \"xgboost4j-spark-gpu/src/test/resources\")\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\n        \"--log-capi-invocation\", type=str, choices=[\"ON\", \"OFF\"], default=\"OFF\"\n    )\n    parser.add_argument(\"--use-cuda\", type=str, choices=[\"ON\", \"OFF\"], default=\"OFF\")\n    parser.add_argument(\"--use-openmp\", type=str, choices=[\"ON\", \"OFF\"], default=\"ON\")\n    parser.add_argument(\"--use-debug\", type=str, choices=[\"ON\", \"OFF\"], default=\"OFF\")\n    parser.add_argument(\"--use-nvtx\", type=str, choices=[\"ON\", \"OFF\"], default=\"OFF\")\n    parser.add_argument(\"--plugin-rmm\", type=str, choices=[\"ON\", \"OFF\"], default=\"OFF\")\n    cli_args = parser.parse_args()\n    native_build(cli_args)\n"
  },
  {
    "path": "jvm-packages/pom.xml",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project xmlns=\"http://maven.apache.org/POM/4.0.0\"\n         xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n         xsi:schemaLocation=\"http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd\">\n    <modelVersion>4.0.0</modelVersion>\n\n    <groupId>ml.dmlc</groupId>\n    <artifactId>xgboost-jvm_2.12</artifactId>\n    <version>3.3.0-SNAPSHOT</version>\n    <packaging>pom</packaging>\n    <name>XGBoost JVM Package</name>\n    <description>JVM Package for XGBoost</description>\n    <url>https://github.com/dmlc/xgboost/tree/master/jvm-packages</url>\n    <licenses>\n        <license>\n            <name>The Apache License, Version 2.0</name>\n            <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>\n        </license>\n    </licenses>\n    <developers>\n        <developer>\n            <name>Bobby Wang</name>\n            <email>wbo4958@gmail.com</email>\n        </developer>\n        <developer>\n            <name>Jiaming Yuan</name>\n            <email>jm.yuan@outlook.com</email>\n        </developer>\n        <developer>\n            <name>Hyunsu Cho</name>\n            <email>chohyu01@cs.washington.edu</email>\n        </developer>\n        <developer>\n            <name>CodingCat</name>\n            <email>codingcat@apache.org</email>\n        </developer>\n    </developers>\n    <scm>\n        <connection>scm:git:git:/github.com/dmlc/xgboost.git</connection>\n        <developerConnection>scm:git:ssh://github.com/dmlc/xgboost.git</developerConnection>\n        <url>https://github.com/dmlc/xgboost</url>\n    </scm>\n    <properties>\n        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>\n        <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>\n        <maven.compiler.source>1.8</maven.compiler.source>\n        <maven.compiler.target>1.8</maven.compiler.target>\n        <flink.version>1.20.0</flink.version>\n        <junit.version>4.13.2</junit.version>\n        <spark.version>3.5.3</spark.version>\n        <spark.version.gpu>3.5.1</spark.version.gpu>\n        <fasterxml.jackson.version>2.15.0</fasterxml.jackson.version>\n        <scala.version>2.12.18</scala.version>\n        <scala.binary.version>2.12</scala.binary.version>\n        <hadoop.version>3.4.1</hadoop.version>\n        <maven.wagon.http.retryHandler.count>5</maven.wagon.http.retryHandler.count>\n        <log.capi.invocation>OFF</log.capi.invocation>\n        <use.cuda>OFF</use.cuda>\n        <use.openmp>ON</use.openmp>\n        <use.debug>OFF</use.debug>\n        <use.nvtx>OFF</use.nvtx>\n        <plugin.rmm>OFF</plugin.rmm>\n        <cudf.version>24.10.0</cudf.version>\n        <spark.rapids.version>24.10.0</spark.rapids.version>\n        <spark.rapids.classifier>cuda12</spark.rapids.classifier>\n        <scalatest.version>3.2.19</scalatest.version>\n        <scala-collection-compat.version>2.12.0</scala-collection-compat.version>\n        <skip.native.build>false</skip.native.build>\n\n        <!-- SPARK-36796 for JDK-17 test-->\n        <extraJavaTestArgs>\n          -XX:+IgnoreUnrecognizedVMOptions\n          --add-opens=java.base/java.lang=ALL-UNNAMED\n          --add-opens=java.base/java.lang.invoke=ALL-UNNAMED\n          --add-opens=java.base/java.io=ALL-UNNAMED\n          --add-opens=java.base/java.net=ALL-UNNAMED\n          --add-opens=java.base/java.nio=ALL-UNNAMED\n          --add-opens=java.base/java.util=ALL-UNNAMED\n          --add-opens=java.base/java.util.concurrent=ALL-UNNAMED\n          --add-opens=java.base/sun.nio.ch=ALL-UNNAMED\n          --add-opens=java.base/sun.nio.cs=ALL-UNNAMED\n          --add-opens=java.base/sun.security.action=ALL-UNNAMED\n          --add-opens=java.base/sun.util.calendar=ALL-UNNAMED\n        </extraJavaTestArgs>\n      </properties>\n    <repositories>\n        <repository>\n            <id>central_maven</id>\n            <name>central maven</name>\n            <url>https://repo1.maven.org/maven2</url>\n        </repository>\n    </repositories>\n    <modules>\n    </modules>\n\n    <profiles>\n        <profile>\n            <!-- default active profile excluding gpu related test suites -->\n            <id>default</id>\n            <activation>\n                <activeByDefault>true</activeByDefault>\n            </activation>\n            <modules>\n                <module>xgboost4j</module>\n                <module>xgboost4j-example</module>\n                <module>xgboost4j-spark</module>\n                <module>xgboost4j-flink</module>\n            </modules>\n        </profile>\n\n        <profile>\n            <id>gpu</id>\n            <properties>\n               <use.cuda>ON</use.cuda>\n            </properties>\n            <modules>\n                <module>xgboost4j</module>\n                <module>xgboost4j-spark</module>\n                <module>xgboost4j-spark-gpu</module>\n            </modules>\n        </profile>\n\n        <profile>\n            <id>docs</id>\n            <properties>\n               <use.cuda>ON</use.cuda>\n               <skipTests>true</skipTests>\n               <maven.test.skip>true</maven.test.skip>\n               <skip.native.build>true</skip.native.build>\n            </properties>\n            <modules>\n                <module>xgboost4j</module>\n                <module>xgboost4j-spark</module>\n                <module>xgboost4j-spark-gpu</module>\n                <module>xgboost4j-flink</module>\n            </modules>\n        </profile>\n\n        <profile>\n            <id>release</id>\n            <build>\n                <plugins>\n                    <plugin>\n                        <groupId>org.apache.maven.plugins</groupId>\n                        <artifactId>maven-jar-plugin</artifactId>\n                        <version>3.4.2</version>\n                        <executions>\n                            <execution>\n                                <id>empty-javadoc-jar</id>\n                                <phase>package</phase>\n                                <goals>\n                                    <goal>jar</goal>\n                                </goals>\n                                <configuration>\n                                    <classifier>javadoc</classifier>\n                                    <classesDirectory>${basedir}/javadoc</classesDirectory>\n                                </configuration>\n                            </execution>\n                        </executions>\n                    </plugin>\n                    <plugin>\n                        <groupId>org.apache.maven.plugins</groupId>\n                        <artifactId>maven-release-plugin</artifactId>\n                        <version>3.1.1</version>\n                        <configuration>\n                            <autoVersionSubmodules>true</autoVersionSubmodules>\n                            <useReleaseProfile>false</useReleaseProfile>\n                            <releaseProfiles>release</releaseProfiles>\n                            <goals>deploy</goals>\n                        </configuration>\n                    </plugin>\n                    <plugin>\n                        <groupId>org.apache.maven.plugins</groupId>\n                        <artifactId>maven-gpg-plugin</artifactId>\n                        <version>3.2.7</version>\n                        <executions>\n                            <execution>\n                                <id>sign-artifacts</id>\n                                <phase>verify</phase>\n                                <goals>\n                                    <goal>sign</goal>\n                                </goals>\n                            </execution>\n                        </executions>\n                    </plugin>\n                    <plugin>\n                        <groupId>org.apache.maven.plugins</groupId>\n                        <artifactId>maven-source-plugin</artifactId>\n                        <version>3.3.1</version>\n                        <executions>\n                            <execution>\n                                <id>attach-sources</id>\n                                <goals>\n                                    <goal>jar-no-fork</goal>\n                                </goals>\n                            </execution>\n                        </executions>\n                    </plugin>\n                    <plugin>\n                        <groupId>org.sonatype.central</groupId>\n                        <artifactId>central-publishing-maven-plugin</artifactId>\n                        <version>0.7.0</version>\n                        <extensions>true</extensions>\n                        <configuration>\n                            <publishingServerId>central</publishingServerId>\n                        </configuration>\n                    </plugin>\n                    <plugin>\n                        <groupId>org.apache.maven.plugins</groupId>\n                        <artifactId>maven-surefire-plugin</artifactId>\n                        <configuration>\n                            <skipTests>true</skipTests>\n                        </configuration>\n                    </plugin>\n                </plugins>\n            </build>\n        </profile>\n        <profile>\n            <id>release-to-s3</id>\n            <distributionManagement>\n                <snapshotRepository>\n                    <id>maven-s3-snapshot-repo</id>\n                    <url>s3://xgboost-maven-repo/snapshot</url>\n                </snapshotRepository>\n                <repository>\n                    <id>maven-s3-release-repo</id>\n                    <url>s3://xgboost-maven-repo/release</url>\n                </repository>\n            </distributionManagement>\n            <repositories>\n                <repository>\n                    <id>maven-s3-snapshot-repo</id>\n                    <url>https://s3.amazonaws.com/xgboost-maven-repo/snapshot</url>\n                </repository>\n                <repository>\n                    <id>maven-s3-release-repo</id>\n                    <url>https://s3.amazonaws.com/xgboost-maven-repo/release</url>\n                </repository>\n            </repositories>\n            <build>\n                <plugins>\n                    <plugin>\n                        <groupId>org.apache.maven.plugins</groupId>\n                        <artifactId>maven-surefire-plugin</artifactId>\n                        <configuration>\n                            <skipTests>true</skipTests>\n                        </configuration>\n                    </plugin>\n                </plugins>\n            </build>\n        </profile>\n    </profiles>\n    <distributionManagement>\n        <snapshotRepository>\n            <id>ossrh</id>\n            <url>https://oss.sonatype.org/content/repositories/snapshots</url>\n        </snapshotRepository>\n    </distributionManagement>\n    <build>\n        <resources>\n            <resource>\n                <directory>src/main/resources</directory>\n                <filtering>true</filtering>\n            </resource>\n        </resources>\n\n        <pluginManagement>\n          <plugins>\n            <plugin>\n              <groupId>org.scalatest</groupId>\n              <artifactId>scalatest-maven-plugin</artifactId>\n              <version>2.2.0</version>\n              <configuration>\n                <argLine>-ea -Xmx4g -Xss4m ${extraJavaTestArgs}</argLine>\n              </configuration>\n              <executions>\n                <execution>\n                  <id>test</id>\n                  <goals>\n                    <goal>test</goal>\n                  </goals>\n                </execution>\n              </executions>\n            </plugin>\n          </plugins>\n        </pluginManagement>\n\n        <plugins>\n            <plugin>\n                <groupId>org.apache.maven.plugins</groupId>\n                <artifactId>maven-site-plugin</artifactId>\n                <version>3.21.0</version>\n            </plugin>\n            <plugin>\n                <groupId>org.apache.maven.plugins</groupId>\n                <artifactId>maven-checkstyle-plugin</artifactId>\n                <version>3.6.0</version>\n                <configuration>\n                    <configLocation>checkstyle.xml</configLocation>\n                    <suppressionsLocation>checkstyle-suppressions.xml</suppressionsLocation>\n                    <suppressionsFileExpression>checkstyle.suppressions.file</suppressionsFileExpression>\n                    <failOnViolation>true</failOnViolation>\n                </configuration>\n                <executions>\n                    <execution>\n                        <id>checkstyle</id>\n                        <phase>validate</phase>\n                        <goals>\n                            <goal>check</goal>\n                        </goals>\n                    </execution>\n                </executions>\n            </plugin>\n            <plugin>\n                <groupId>net.alchim31.maven</groupId>\n                <artifactId>scala-maven-plugin</artifactId>\n                <version>4.9.2</version>\n                <executions>\n                    <execution>\n                        <id>compile</id>\n                        <goals>\n                            <goal>compile</goal>\n                        </goals>\n                        <phase>compile</phase>\n                    </execution>\n                    <execution>\n                        <id>test-compile</id>\n                        <goals>\n                            <goal>testCompile</goal>\n                        </goals>\n                        <phase>test-compile</phase>\n                    </execution>\n                    <execution>\n                        <phase>process-resources</phase>\n                        <goals>\n                            <goal>compile</goal>\n                        </goals>\n                    </execution>\n                    <execution>\n                        <id>scala-compile-first</id>\n                        <phase>process-resources</phase>\n                        <goals>\n                            <goal>compile</goal>\n                            <goal>add-source</goal>\n                        </goals>\n                    </execution>\n                </executions>\n            </plugin>\n            <plugin>\n                <groupId>org.apache.maven.plugins</groupId>\n                <artifactId>maven-surefire-plugin</artifactId>\n                <version>3.5.2</version>\n                <configuration>\n                    <skipTests>false</skipTests>\n                    <useSystemClassLoader>false</useSystemClassLoader>\n                </configuration>\n            </plugin>\n            <plugin>\n                <groupId>org.scalatest</groupId>\n                <artifactId>scalatest-maven-plugin</artifactId>\n                      <configuration>\n                        <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>\n                        <junitxml>.</junitxml>\n                        <filereports>XGBoostTestSuite.txt</filereports>\n                      </configuration>\n                      <executions>\n                        <execution>\n                          <id>test</id>\n                          <goals>\n                            <goal>test</goal>\n                          </goals>\n                        </execution>\n                      </executions>\n            </plugin>\n        </plugins>\n        <extensions>\n            <extension>\n                <groupId>com.github.seahen</groupId>\n                <artifactId>maven-s3-wagon</artifactId>\n                <version>1.3.3</version>\n            </extension>\n        </extensions>\n    </build>\n    <reporting>\n        <plugins>\n            <plugin>\n                <artifactId>maven-project-info-reports-plugin</artifactId>\n                <version>3.8.0</version>\n            </plugin>\n            <plugin>\n                <groupId>net.alchim31.maven</groupId>\n                <artifactId>scala-maven-plugin</artifactId>\n                <version>4.9.2</version>\n                <configuration>\n                    <jvmArgs>\n                        <jvmArg>-Xms64m</jvmArg>\n                        <jvmArg>-Xmx1024m</jvmArg>\n                    </jvmArgs>\n                </configuration>\n            </plugin>\n        </plugins>\n    </reporting>\n    <dependencies>\n        <dependency>\n            <groupId>com.esotericsoftware</groupId>\n            <artifactId>kryo</artifactId>\n            <version>5.6.2</version>\n        </dependency>\n        <dependency>\n            <groupId>commons-logging</groupId>\n            <artifactId>commons-logging</artifactId>\n            <version>1.3.4</version>\n        </dependency>\n        <dependency>\n            <groupId>org.scalatest</groupId>\n            <artifactId>scalatest_${scala.binary.version}</artifactId>\n            <version>${scalatest.version}</version>\n            <scope>test</scope>\n        </dependency>\n        <dependency>\n            <groupId>org.scalactic</groupId>\n            <artifactId>scalactic_${scala.binary.version}</artifactId>\n            <version>${scalatest.version}</version>\n            <scope>test</scope>\n        </dependency>\n    </dependencies>\n</project>\n"
  },
  {
    "path": "jvm-packages/scalastyle-config.xml",
    "content": "<!--\n  ~ Licensed to the Apache Software Foundation (ASF) under one or more\n  ~ contributor license agreements.  See the NOTICE file distributed with\n  ~ this work for additional information regarding copyright ownership.\n  ~ The ASF licenses this file to You under the Apache License, Version 2.0\n  ~ (the \"License\"); you may not use this file except in compliance with\n  ~ the License.  You may obtain a copy of the License at\n  ~\n  ~    http://www.apache.org/licenses/LICENSE-2.0\n  ~\n  ~ Unless required by applicable law or agreed to in writing, software\n  ~ distributed under the License is distributed on an \"AS IS\" BASIS,\n  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n  ~ See the License for the specific language governing permissions and\n  ~ limitations under the License.\n  -->\n<!--\n\nIf you wish to turn off checking for a section of code, you can put a comment in the source\nbefore and after the section, with the following syntax:\n\n  // scalastyle:off\n  ...  // stuff that breaks the styles\n  // scalastyle:on\n\nYou can also disable only one rule, by specifying its rule id, as specified in:\n  http://www.scalastyle.org/rules-0.7.0.html\n\n  // scalastyle:off no.finalize\n  override def finalize(): Unit = ...\n  // scalastyle:on no.finalize\n\nThis file is divided into 3 sections:\n (1) rules that we enforce.\n (2) rules that we would like to enforce, but haven't cleaned up the codebase to turn on yet\n     (or we need to make the scalastyle rule more configurable).\n (3) rules that we don't want to enforce.\n-->\n\n<scalastyle>\n  <name>Scalastyle standard configuration</name>\n\n  <!-- ================================================================================ -->\n  <!--                               rules we enforce                                   -->\n  <!-- ================================================================================ -->\n\n  <check level=\"error\" class=\"org.scalastyle.file.FileTabChecker\" enabled=\"true\"></check>\n\n  <check level=\"error\" class=\"org.scalastyle.file.HeaderMatchesChecker\" enabled=\"true\">\n    <parameters>\n      <parameter name=\"regex\">true</parameter>\n      <parameter name=\"header\"><![CDATA[/\\*\n Copyright \\(c\\) \\d{4}.* by Contributors\n\n Licensed under the Apache License, Version 2\\.0 \\(the \"License\"\\);\n you may not use this file except in compliance with the License\\.\n You may obtain a copy of the License at\n\n http://www\\.apache\\.org/licenses/LICENSE-2\\.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied\\.\n See the License for the specific language governing permissions and\n limitations under the License\\.\n \\*/]]></parameter>\n    </parameters>\n  </check>\n\n  <check level=\"error\" class=\"org.scalastyle.scalariform.SpacesAfterPlusChecker\" enabled=\"true\"></check>\n\n  <check level=\"error\" class=\"org.scalastyle.scalariform.SpacesBeforePlusChecker\" enabled=\"true\"></check>\n\n  <check level=\"error\" class=\"org.scalastyle.file.WhitespaceEndOfLineChecker\" enabled=\"true\"></check>\n\n  <check level=\"error\" class=\"org.scalastyle.file.FileLineLengthChecker\" enabled=\"true\">\n    <parameters>\n      <parameter name=\"maxLineLength\"><![CDATA[100]]></parameter>\n      <parameter name=\"tabSize\"><![CDATA[2]]></parameter>\n      <parameter name=\"ignoreImports\">true</parameter>\n    </parameters>\n  </check>\n\n  <check level=\"error\" class=\"org.scalastyle.scalariform.ClassNamesChecker\" enabled=\"true\">\n    <parameters>\n      <parameter name=\"regex\"><![CDATA[[A-Z][A-Za-z]*]]></parameter>\n    </parameters>\n  </check>\n\n  <check level=\"error\" class=\"org.scalastyle.scalariform.ObjectNamesChecker\" enabled=\"true\">\n    <parameters>\n      <parameter name=\"regex\"><![CDATA[[A-Z][A-Za-z]*]]></parameter>\n    </parameters>\n  </check>\n\n  <check level=\"error\" class=\"org.scalastyle.scalariform.PackageObjectNamesChecker\" enabled=\"true\">\n    <parameters>\n      <parameter name=\"regex\"><![CDATA[^[a-z][A-Za-z]*$]]></parameter>\n    </parameters>\n  </check>\n\n  <check level=\"error\" class=\"org.scalastyle.scalariform.ParameterNumberChecker\" enabled=\"true\">\n    <parameters>\n      <parameter name=\"maxParameters\"><![CDATA[10]]></parameter>\n    </parameters>\n  </check>\n\n  <check level=\"error\" class=\"org.scalastyle.scalariform.NoFinalizeChecker\" enabled=\"false\"></check>\n\n  <check level=\"error\" class=\"org.scalastyle.scalariform.CovariantEqualsChecker\" enabled=\"true\"></check>\n\n  <check level=\"error\" class=\"org.scalastyle.scalariform.StructuralTypeChecker\" enabled=\"true\"></check>\n\n  <check level=\"error\" class=\"org.scalastyle.scalariform.UppercaseLChecker\" enabled=\"true\"></check>\n\n  <check level=\"error\" class=\"org.scalastyle.scalariform.IfBraceChecker\" enabled=\"true\">\n    <parameters>\n      <parameter name=\"singleLineAllowed\"><![CDATA[true]]></parameter>\n      <parameter name=\"doubleLineAllowed\"><![CDATA[true]]></parameter>\n    </parameters>\n  </check>\n\n  <check level=\"error\" class=\"org.scalastyle.scalariform.PublicMethodsHaveTypeChecker\" enabled=\"true\"></check>\n\n  <check level=\"error\" class=\"org.scalastyle.file.NewLineAtEofChecker\" enabled=\"true\"></check>\n\n  <check level=\"error\" class=\"org.scalastyle.scalariform.NonASCIICharacterChecker\" enabled=\"true\"></check>\n\n  <check level=\"error\" class=\"org.scalastyle.scalariform.SpaceAfterCommentStartChecker\" enabled=\"true\"></check>\n\n  <check level=\"error\" class=\"org.scalastyle.scalariform.EnsureSingleSpaceBeforeTokenChecker\" enabled=\"true\">\n    <parameters>\n      <parameter name=\"tokens\">ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>\n    </parameters>\n  </check>\n\n  <check level=\"error\" class=\"org.scalastyle.scalariform.EnsureSingleSpaceAfterTokenChecker\" enabled=\"true\">\n    <parameters>\n      <parameter name=\"tokens\">ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY,\n        LARROW, RARROW\n      </parameter>\n    </parameters>\n  </check>\n\n  <!-- ??? usually shouldn't be checked into the code base. -->\n  <check level=\"error\" class=\"org.scalastyle.scalariform.NotImplementedErrorUsage\" enabled=\"true\"></check>\n\n  <check customId=\"visiblefortesting\" level=\"error\" class=\"org.scalastyle.file.RegexChecker\" enabled=\"true\">\n    <parameters>\n      <parameter name=\"regex\">@VisibleForTesting</parameter>\n    </parameters>\n    <customMessage><![CDATA[\n      @VisibleForTesting causes classpath issues. Please note this in the java doc instead (SPARK-11615).\n    ]]></customMessage>\n  </check>\n\n  <check customId=\"runtimeaddshutdownhook\" level=\"error\" class=\"org.scalastyle.file.RegexChecker\" enabled=\"true\">\n    <parameters>\n      <parameter name=\"regex\">Runtime\\.getRuntime\\.addShutdownHook</parameter>\n    </parameters>\n    <customMessage><![CDATA[\n      Are you sure that you want to use Runtime.getRuntime.addShutdownHook? In most cases, you should use\n      ShutdownHookManager.addShutdownHook instead.\n      If you must use Runtime.getRuntime.addShutdownHook, wrap the code block with\n      // scalastyle:off runtimeaddshutdownhook\n      Runtime.getRuntime.addShutdownHook(...)\n      // scalastyle:on runtimeaddshutdownhook\n    ]]></customMessage>\n  </check>\n\n  <check customId=\"mutablesynchronizedbuffer\" level=\"error\" class=\"org.scalastyle.file.RegexChecker\" enabled=\"true\">\n    <parameters>\n      <parameter name=\"regex\">mutable\\.SynchronizedBuffer</parameter>\n    </parameters>\n    <customMessage><![CDATA[\n      Are you sure that you want to use mutable.SynchronizedBuffer? In most cases, you should use\n      java.util.concurrent.ConcurrentLinkedQueue instead.\n      If you must use mutable.SynchronizedBuffer, wrap the code block with\n      // scalastyle:off mutablesynchronizedbuffer\n      mutable.SynchronizedBuffer[...]\n      // scalastyle:on mutablesynchronizedbuffer\n    ]]></customMessage>\n  </check>\n\n  <check customId=\"classforname\" level=\"error\" class=\"org.scalastyle.file.RegexChecker\" enabled=\"true\">\n    <parameters>\n      <parameter name=\"regex\">Class\\.forName</parameter>\n    </parameters>\n    <customMessage><![CDATA[\n      Are you sure that you want to use Class.forName? In most cases, you should use Utils.classForName instead.\n      If you must use Class.forName, wrap the code block with\n      // scalastyle:off classforname\n      Class.forName(...)\n      // scalastyle:on classforname\n    ]]></customMessage>\n  </check>\n\n  <!-- As of SPARK-9613 JavaConversions should be replaced with JavaConverters -->\n  <check customId=\"javaconversions\" level=\"error\" class=\"org.scalastyle.scalariform.TokenChecker\" enabled=\"true\">\n    <parameters>\n      <parameter name=\"regex\">JavaConversions</parameter>\n    </parameters>\n    <customMessage>Instead of importing implicits in scala.collection.JavaConversions._, import\n      scala.collection.JavaConverters._ and use .asScala / .asJava methods\n    </customMessage>\n  </check>\n\n  <check level=\"error\" class=\"org.scalastyle.scalariform.ImportOrderChecker\" enabled=\"true\">\n    <parameters>\n      <parameter name=\"groups\">java,scala,3rdParty,dmlc</parameter>\n      <parameter name=\"group.java\">javax?\\..*</parameter>\n      <parameter name=\"group.scala\">scala\\..*</parameter>\n      <parameter name=\"group.3rdParty\">(?!ml\\.dmlc\\.xgboost4j).*</parameter>\n      <parameter name=\"group.dmlc\">ml.dmlc.xgboost4j.*</parameter>\n    </parameters>\n  </check>\n\n  <check level=\"error\" class=\"org.scalastyle.scalariform.DisallowSpaceBeforeTokenChecker\" enabled=\"true\">\n    <parameters>\n      <parameter name=\"tokens\">COMMA</parameter>\n    </parameters>\n  </check>\n\n  <!-- ================================================================================ -->\n  <!--       rules we'd like to enforce, but haven't cleaned up the codebase yet        -->\n  <!-- ================================================================================ -->\n\n  <!-- We cannot turn the following two on, because it'd fail a lot of string interpolation use cases. -->\n  <!-- Ideally the following two rules should be configurable to rule out string interpolation. -->\n  <check level=\"error\" class=\"org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker\" enabled=\"false\"></check>\n  <check level=\"error\" class=\"org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker\" enabled=\"false\"></check>\n\n  <!-- This breaks symbolic method names so we don't turn it on. -->\n  <!-- Maybe we should update it to allow basic symbolic names, and then we are good to go. -->\n  <check level=\"error\" class=\"org.scalastyle.scalariform.MethodNamesChecker\" enabled=\"false\">\n    <parameters>\n      <parameter name=\"regex\"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter>\n    </parameters>\n  </check>\n\n  <!-- Should turn this on, but we have a few places that need to be fixed first -->\n  <check level=\"error\" class=\"org.scalastyle.scalariform.EqualsHashCodeChecker\" enabled=\"false\"></check>\n\n  <!-- ================================================================================ -->\n  <!--                               rules we don't want                                -->\n  <!-- ================================================================================ -->\n\n  <check level=\"error\" class=\"org.scalastyle.scalariform.IllegalImportsChecker\" enabled=\"false\">\n    <parameters>\n      <parameter name=\"illegalImports\"><![CDATA[sun._,java.awt._]]></parameter>\n    </parameters>\n  </check>\n\n  <!-- We want the opposite of this: NewLineAtEofChecker -->\n  <check level=\"error\" class=\"org.scalastyle.file.NoNewLineAtEofChecker\" enabled=\"false\"></check>\n\n  <!-- This one complains about all kinds of random things. Disable. -->\n  <check level=\"error\" class=\"org.scalastyle.scalariform.SimplifyBooleanExpressionChecker\" enabled=\"false\"></check>\n\n  <!-- We use return quite a bit for control flows and guards -->\n  <check level=\"error\" class=\"org.scalastyle.scalariform.ReturnChecker\" enabled=\"false\"></check>\n\n  <!-- We use null a lot in low level code and to interface with 3rd party code -->\n  <check level=\"error\" class=\"org.scalastyle.scalariform.NullChecker\" enabled=\"false\"></check>\n\n  <!-- Doesn't seem super big deal here ... -->\n  <check level=\"error\" class=\"org.scalastyle.scalariform.NoCloneChecker\" enabled=\"false\"></check>\n\n  <!-- Doesn't seem super big deal here ... -->\n  <check level=\"error\" class=\"org.scalastyle.file.FileLengthChecker\" enabled=\"false\">\n    <parameters>\n      <parameter name=\"maxFileLength\">800></parameter>\n    </parameters>\n  </check>\n\n  <!-- Doesn't seem super big deal here ... -->\n  <check level=\"error\" class=\"org.scalastyle.scalariform.NumberOfTypesChecker\" enabled=\"false\">\n    <parameters>\n      <parameter name=\"maxTypes\">30</parameter>\n    </parameters>\n  </check>\n\n  <!-- Doesn't seem super big deal here ... -->\n  <check level=\"error\" class=\"org.scalastyle.scalariform.CyclomaticComplexityChecker\" enabled=\"false\">\n    <parameters>\n      <parameter name=\"maximum\">10</parameter>\n    </parameters>\n  </check>\n\n  <!-- Doesn't seem super big deal here ... -->\n  <check level=\"error\" class=\"org.scalastyle.scalariform.MethodLengthChecker\" enabled=\"false\">\n    <parameters>\n      <parameter name=\"maxLength\">50</parameter>\n    </parameters>\n  </check>\n\n  <!-- Not exactly feasible to enforce this right now. -->\n  <!-- It is also infrequent that somebody introduces a new class with a lot of methods. -->\n  <check level=\"error\" class=\"org.scalastyle.scalariform.NumberOfMethodsInTypeChecker\" enabled=\"false\">\n    <parameters>\n      <parameter name=\"maxMethods\"><![CDATA[30]]></parameter>\n    </parameters>\n  </check>\n\n  <!-- Doesn't seem super big deal here, and we have a lot of magic numbers ... -->\n  <check level=\"error\" class=\"org.scalastyle.scalariform.MagicNumberChecker\" enabled=\"false\">\n    <parameters>\n      <parameter name=\"ignore\">-1,0,1,2,3</parameter>\n    </parameters>\n  </check>\n\n  <check level=\"error\" class=\"org.scalastyle.scalariform.IllegalImportsChecker\" enabled=\"true\">\n    <parameters>\n      <parameter name=\"illegalImports\"><![CDATA[scala.collection.Seq,scala.collection.IndexedSeq]]></parameter>\n    </parameters>\n    <customMessage><![CDATA[\n        Don't import scala.collection.Seq and scala.collection.IndexedSeq as it may cause issues with cross-build between Scala 2.12 and 2.13.\n\n        Please refer below page to see the details of changes around Seq / IndexedSeq.\n        https://docs.scala-lang.org/overviews/core/collections-migration-213.html\n\n        If you really need to use scala.collection.Seq or scala.collection.IndexedSeq, please use the fully-qualified name instead.\n        ]]></customMessage>\n  </check>\n  <check level=\"error\" class=\"org.scalastyle.scalariform.ProcedureDeclarationChecker\" enabled=\"true\">\n    <customMessage>procedure syntax is deprecated in Scala 2.13: add return type `: Unit` and `=`</customMessage>\n  </check>\n  <check level=\"error\" class=\"org.scalastyle.file.RegexChecker\" enabled=\"true\">\n    <parameters>\n      <parameter name=\"regex\">ArrayBuilder.make\\[(.+)\\]\\(\\)</parameter>\n      <parameter name=\"line\">false</parameter>\n    </parameters>\n    <customMessage>ArrayBuilder.make does not accept parens anymore in Scala 2.13</customMessage>\n  </check>\n\n  <check level=\"warning\" class=\"org.scalastyle.file.RegexChecker\" enabled=\"false\">\n    <parameters>\n      <parameter name=\"regex\">(: |\\[)(Indexed)?Seq\\[[A-Za-z0-9_]+\\]</parameter>\n      <parameter name=\"line\">false</parameter>\n    </parameters>\n    <customMessage><![CDATA[NOTE: Scala 2.12 defaults scala.(Indexed)Seq to scala.collection.(Indexed)Seq while Scala 2.13 defaults\n        scala.(Indexed)Seq to scala.collection.immutable.(Indexed)Seq\n\n        Please refer below page to see the details of changes around Seq / IndexedSeq.\n        https://docs.scala-lang.org/overviews/core/collections-migration-213.html\n        ]]></customMessage>\n  </check>\n</scalastyle>\n"
  },
  {
    "path": "jvm-packages/xgboost4j/LICENSE",
    "content": "/*\nCopyright (c) 2014 by Contributors \n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n    \n   http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n*/"
  },
  {
    "path": "jvm-packages/xgboost4j/pom.xml",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project xmlns=\"http://maven.apache.org/POM/4.0.0\"\n         xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n         xsi:schemaLocation=\"http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd\">\n    <modelVersion>4.0.0</modelVersion>\n    <parent>\n        <groupId>ml.dmlc</groupId>\n        <artifactId>xgboost-jvm_2.12</artifactId>\n        <version>3.3.0-SNAPSHOT</version>\n    </parent>\n    <name>xgboost4j</name>\n    <artifactId>xgboost4j_2.12</artifactId>\n    <version>3.3.0-SNAPSHOT</version>\n    <packaging>jar</packaging>\n\n    <dependencies>\n      <dependency>\n          <groupId>org.scala-lang</groupId>\n          <artifactId>scala-compiler</artifactId>\n          <version>${scala.version}</version>\n      </dependency>\n      <dependency>\n            <groupId>org.scala-lang</groupId>\n            <artifactId>scala-library</artifactId>\n            <version>${scala.version}</version>\n        </dependency>\n        <dependency>\n            <groupId>org.scala-lang.modules</groupId>\n            <artifactId>scala-collection-compat_${scala.binary.version}</artifactId>\n            <version>${scala-collection-compat.version}</version>\n        </dependency>\n        <dependency>\n            <groupId>org.apache.hadoop</groupId>\n            <artifactId>hadoop-hdfs</artifactId>\n            <version>${hadoop.version}</version>\n            <scope>provided</scope>\n        </dependency>\n        <dependency>\n            <groupId>org.apache.hadoop</groupId>\n            <artifactId>hadoop-common</artifactId>\n            <version>${hadoop.version}</version>\n            <scope>provided</scope>\n        </dependency>\n        <dependency>\n            <groupId>junit</groupId>\n            <artifactId>junit</artifactId>\n            <version>${junit.version}</version>\n            <scope>test</scope>\n        </dependency>\n        <dependency>\n          <groupId>org.scalatest</groupId>\n          <artifactId>scalatest_${scala.binary.version}</artifactId>\n          <version>${scalatest.version}</version>\n          <scope>provided</scope>\n        </dependency>\n        <dependency>\n            <groupId>com.fasterxml.jackson.core</groupId>\n            <artifactId>jackson-databind</artifactId>\n            <version>${fasterxml.jackson.version}</version>\n            <scope>provided</scope>\n        </dependency>\n    </dependencies>\n\n    <build>\n      <plugins>\n          <plugin>\n            <groupId>org.apache.maven.plugins</groupId>\n            <artifactId>maven-javadoc-plugin</artifactId>\n            <version>3.11.3</version>\n            <configuration>\n              <show>protected</show>\n              <nohelp>true</nohelp>\n            </configuration>\n          </plugin>\n          <plugin>\n              <groupId>org.apache.maven.plugins</groupId>\n              <artifactId>maven-assembly-plugin</artifactId>\n              <configuration>\n                  <skipAssembly>false</skipAssembly>\n              </configuration>\n          </plugin>\n          <plugin>\n              <artifactId>exec-maven-plugin</artifactId>\n              <groupId>org.codehaus.mojo</groupId>\n              <version>3.5.0</version>\n              <executions>\n                  <execution>\n                      <id>native</id>\n                      <phase>generate-sources</phase>\n                      <goals>\n                          <goal>exec</goal>\n                      </goals>\n                      <configuration>\n                          <executable>python</executable>\n                          <arguments>\n                            <argument>create_jni.py</argument>\n                            <argument>--log-capi-invocation</argument>\n                            <argument>${log.capi.invocation}</argument>\n                            <argument>--use-cuda</argument>\n                            <argument>${use.cuda}</argument>\n                            <argument>--use-openmp</argument>\n                            <argument>${use.openmp}</argument>\n                            <argument>--use-debug</argument>\n                            <argument>${use.debug}</argument>\n                            <argument>--use-nvtx</argument>\n                            <argument>${use.nvtx}</argument>\n                            <argument>--plugin-rmm</argument>\n                            <argument>${plugin.rmm}</argument>\n                          </arguments>\n                          <workingDirectory>${user.dir}</workingDirectory>\n                          <skip>${skip.native.build}</skip>\n                      </configuration>\n                  </execution>\n              </executions>\n          </plugin>\n          <plugin>\n              <groupId>org.apache.maven.plugins</groupId>\n              <artifactId>maven-jar-plugin</artifactId>\n              <version>3.4.2</version>\n              <executions>\n                  <execution>\n                      <goals>\n                          <goal>test-jar</goal>\n                      </goals>\n                  </execution>\n              </executions>\n          </plugin>\n          <plugin>\n              <groupId>org.apache.maven.plugins</groupId>\n              <artifactId>maven-resources-plugin</artifactId>\n              <version>3.3.1</version>\n              <configuration>\n                  <nonFilteredFileExtensions>\n                      <nonFilteredFileExtension>dll</nonFilteredFileExtension>\n                      <nonFilteredFileExtension>dylib</nonFilteredFileExtension>\n                      <nonFilteredFileExtension>so</nonFilteredFileExtension>\n                  </nonFilteredFileExtensions>\n              </configuration>\n          </plugin>\n      </plugins>\n    </build>\n</project>\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/LabeledPoint.java",
    "content": "/*\n Copyright (c) 2014-2025 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j;\n\nimport java.io.Serializable;\nimport java.util.Arrays;\nimport java.util.Objects;\n\n/**\n * Labeled training data point.\n * TODO(hcho3): Migrate Record class when we upgrade to Java 14+, to reduce boilerplate.\n */\npublic final class LabeledPoint implements Serializable {\n  private final float label;\n  private final int size;\n  private final int[] indices;\n  private final float[] values;\n  private final float weight;\n  private final int group;\n  private final float baseMargin;\n\n  /**\n   * @param label Label of this point.\n   * @param size Feature dimensionality\n   * @param indices Feature indices of this point or `null` if the data is dense.\n   * @param values Feature values of this point.\n   * @param weight Weight of this point.\n   * @param group Group of this point (used for ranking) or -1.\n   * @param baseMargin Initial prediction on this point or `Float.NaN`\n   */\n  public LabeledPoint(\n      float label, int size, int[] indices, float[] values, float weight,\n      int group, float baseMargin\n  ) {\n    assert (indices == null || indices.length == values.length):\n      \"indices and values must have the same number of elements\";\n    assert (indices == null || size >= indices.length):\n      \"feature dimensionality must be greater equal than size of indices\";\n    this.label = label;\n    this.size = size;\n    this.indices = indices;\n    this.values = values;\n    this.weight = weight;\n    this.group = group;\n    this.baseMargin = baseMargin;\n  }\n\n  /**\n   * @param label Label of this point.\n   * @param size Feature dimensionality\n   * @param indices Feature indices of this point or `null` if the data is dense.\n   * @param values Feature values of this point.\n   */\n  public LabeledPoint(\n      float label, int size, int[] indices, float[] values\n  ) {\n    this(label, size, indices, values, 1.0f, -1, Float.NaN);\n  }\n\n  /**\n   * @param label Label of this point.\n   * @param size Feature dimensionality\n   * @param indices Feature indices of this point or `null` if the data is dense.\n   * @param values Feature values of this point.\n   * @param weight Weight of this point.\n   */\n  public LabeledPoint(\n      float label, int size, int[] indices, float[] values, float weight\n  ) {\n    this(label, size, indices, values, weight, -1, Float.NaN);\n  }\n\n  /**\n   * @param label Label of this point.\n   * @param size Feature dimensionality\n   * @param indices Feature indices of this point or `null` if the data is dense.\n   * @param values Feature values of this point.\n   * @param weight Weight of this point.\n   * @param group Group of this point (used for ranking) or -1.\n   */\n  public LabeledPoint(\n      float label, int size, int[] indices, float[] values, float weight,\n      int group\n  ) {\n    this(label, size, indices, values, weight, group, Float.NaN);\n  }\n\n\n  @Override\n  public int hashCode() {\n    return Objects.hash(this.label, this.size, Arrays.hashCode(this.indices),\n      Arrays.hashCode(this.values), this.weight, this.group, this.baseMargin);\n  }\n\n  @Override\n  public boolean equals(Object obj) {\n    if (this == obj) {\n      return true;\n    } else if (!(obj instanceof LabeledPoint)) {\n      return false;\n    } else {\n      LabeledPoint other = (LabeledPoint) obj;\n      return Objects.equals(label, other.label)\n        && Objects.equals(size, other.size)\n        && Arrays.equals(indices, other.indices)\n        && Arrays.equals(values, other.values)\n        && Objects.equals(weight, other.weight)\n        && Objects.equals(group, other.group)\n        && Objects.equals(baseMargin, other.baseMargin);\n    }\n  }\n\n  public float label() { return this.label; }\n  public int size() { return this.size; }\n  public int[] indices() { return this.indices; }\n  public float[] values() { return this.values; }\n  public float weight() { return this.weight; }\n  public int group() { return this.group; }\n  public float baseMargin() { return this.baseMargin; }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java",
    "content": "/*\n Copyright (c) 2014-2023 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java;\n\nimport java.io.*;\nimport java.util.Arrays;\nimport java.util.HashMap;\nimport java.util.HashSet;\nimport java.util.List;\nimport java.util.Map;\nimport java.util.Set;\n\nimport com.esotericsoftware.kryo.Kryo;\nimport com.esotericsoftware.kryo.KryoSerializable;\nimport com.esotericsoftware.kryo.io.Input;\nimport com.esotericsoftware.kryo.io.Output;\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\n\n/**\n * Booster for xgboost, this is a model API that support interactive build of an XGBoost Model\n */\npublic class Booster implements Serializable, KryoSerializable {\n  public static final String DEFAULT_FORMAT = \"ubj\";\n  private static final Log logger = LogFactory.getLog(Booster.class);\n  // handle to the booster.\n  private long handle = 0;\n  private int version = 0;\n\n  /**\n   * This enumeration defines the type of prediction to be made and is used for inplace predictions.\n   * Type of prediction, used for inplace_predict.\n   */\n  public enum PredictionType {\n    kValue(0),\n    kMargin(1);\n\n    private Integer ptype;\n    private PredictionType(final Integer ptype) {\n      this.ptype = ptype;\n    }\n    public Integer getPType() {\n      return ptype;\n    }\n  }\n\n  /**\n   * Create a new Booster with empty stage.\n   *\n   * @param params  Model parameters that are used to build the Booster\n   * @param cacheMats Cached DMatrix entries that help increase the speed of Booster prediction\n   *\n   * @throws XGBoostError native error\n   */\n  Booster(Map<String, Object> params, DMatrix[] cacheMats) throws XGBoostError {\n    init(cacheMats);\n    setParams(params);\n  }\n\n  /**\n   * Load a new Booster model from modelPath\n   * @param modelPath model path\n   * @return The created Booster.\n   * @throws XGBoostError\n   */\n  static Booster loadModel(String modelPath) throws XGBoostError {\n    if (modelPath == null) {\n      throw new NullPointerException(\"modelPath : null\");\n    }\n    Booster ret = new Booster(new HashMap<>(), new DMatrix[0]);\n    XGBoostJNI.checkCall(XGBoostJNI.XGBoosterLoadModel(ret.handle, modelPath));\n    return ret;\n  }\n\n  /**\n   * Load a new Booster model from a byte array buffer.\n   * The assumption is the array only contains one XGBoost Model.\n   * This can be used to load existing booster models saved by other xgboost bindings.\n   *\n   * @param buffer The byte contents of the booster.\n   * @return The created booster.\n   * @throws XGBoostError\n   */\n  static Booster loadModel(byte[] buffer) throws XGBoostError {\n    Booster ret = new Booster(new HashMap<>(), new DMatrix[0]);\n    XGBoostJNI.checkCall(XGBoostJNI.XGBoosterLoadModelFromBuffer(ret.handle, buffer));\n    return ret;\n  }\n\n  /**\n   * Set parameter to the Booster.\n   *\n   * @param key   param name\n   * @param value param value\n   * @throws XGBoostError native error\n   */\n  public final void setParam(String key, Object value) throws XGBoostError {\n    XGBoostJNI.checkCall(XGBoostJNI.XGBoosterSetParam(handle, key, value.toString()));\n  }\n\n  /**\n   * Set parameters to the Booster.\n   *\n   * @param params parameters key-value map\n   * @throws XGBoostError native error\n   */\n  public void setParams(Map<String, Object> params) throws XGBoostError {\n    if (params != null) {\n      for (Map.Entry<String, Object> entry : params.entrySet()) {\n        setParam(entry.getKey(), entry.getValue().toString());\n      }\n    }\n  }\n\n  /**\n   * Get attributes stored in the Booster as a Map.\n   *\n   * @return A map contain attribute pairs.\n   * @throws XGBoostError native error\n   */\n  public final Map<String, String> getAttrs() throws XGBoostError {\n    String[][] attrNames = new String[1][];\n    XGBoostJNI.checkCall(XGBoostJNI.XGBoosterGetAttrNames(handle, attrNames));\n    Map<String, String> attrMap = new HashMap<>();\n    for (String name: attrNames[0]) {\n      attrMap.put(name, this.getAttr(name));\n    }\n    return attrMap;\n  }\n\n  /**\n   * Get attribute value from the Booster based on the key provided.\n   *\n   * @param key   attribute key\n   * @return attribute value\n   * @throws XGBoostError native error\n   */\n  public final String getAttr(String key) throws XGBoostError {\n    String[] attrValue = new String[1];\n    XGBoostJNI.checkCall(XGBoostJNI.XGBoosterGetAttr(handle, key, attrValue));\n    return attrValue[0];\n  }\n\n  /**\n   * Set an attribute key-value pair to the Booster.\n   *\n   * @param key   attribute key\n   * @param value attribute value\n   * @throws XGBoostError native error\n   */\n  public final void setAttr(String key, String value) throws XGBoostError {\n    XGBoostJNI.checkCall(XGBoostJNI.XGBoosterSetAttr(handle, key, value));\n  }\n\n  /**\n   * Set multiple attribute key-value pairs to the Booster.\n   *\n   * @param attrs attributes key-value map\n   * @throws XGBoostError native error\n   */\n  public void setAttrs(Map<String, String> attrs) throws XGBoostError {\n    if (attrs != null) {\n      for (Map.Entry<String, String> entry : attrs.entrySet()) {\n        setAttr(entry.getKey(), entry.getValue());\n      }\n    }\n  }\n\n  /**\n   * Get all the feature names from the Booster.\n   * @return An array of all the feature names.\n   * @throws XGBoostError\n   */\n  public final String[] getFeatureNames() throws XGBoostError {\n    int numFeature = (int) getNumFeature();\n    String[] out = new String[numFeature];\n    XGBoostJNI.checkCall(XGBoostJNI.XGBoosterGetStrFeatureInfo(handle, \"feature_name\", out));\n    return out;\n  }\n\n  /**\n   * Set feature names to the Booster.\n   *\n   * @param featureNames An array of all the feature names.\n   * @throws XGBoostError\n   */\n  public void setFeatureNames(String[] featureNames) throws XGBoostError {\n    XGBoostJNI.checkCall(XGBoostJNI.XGBoosterSetStrFeatureInfo(\n        handle, \"feature_name\", featureNames));\n  }\n\n  /**\n   * Get feature types from the Booster.\n   * @return An array of all the feature types.\n   * @throws XGBoostError\n   */\n  public final String[] getFeatureTypes() throws XGBoostError {\n    int numFeature = (int) getNumFeature();\n    String[] out = new String[numFeature];\n    XGBoostJNI.checkCall(XGBoostJNI.XGBoosterGetStrFeatureInfo(handle, \"feature_type\", out));\n    return out;\n  }\n\n  /**\n   * Set feature types to the Booster.\n   * @param featureTypes An array of all the feature types.\n   * @throws XGBoostError\n   */\n  public void setFeatureTypes(String[] featureTypes) throws XGBoostError {\n    XGBoostJNI.checkCall(XGBoostJNI.XGBoosterSetStrFeatureInfo(\n        handle, \"feature_type\", featureTypes));\n  }\n\n  /**\n   * Update the booster for one iteration.\n   *\n   * @param dtrain training data\n   * @param iter   current iteration number\n   * @throws XGBoostError native error\n   */\n  public void update(DMatrix dtrain, int iter) throws XGBoostError {\n    XGBoostJNI.checkCall(XGBoostJNI.XGBoosterUpdateOneIter(handle, iter, dtrain.getHandle()));\n  }\n\n  @Deprecated\n  public void update(DMatrix dtrain, IObjective obj) throws XGBoostError {\n    float[][] predicts = this.predict(dtrain, true, 0, false, false);\n    List<float[]> gradients = obj.getGradient(predicts, dtrain);\n    this.boost(dtrain, gradients.get(0), gradients.get(1));\n  }\n\n  /**\n   * Update with customize object functon\n   *\n   * @param dtrain training data\n   * @param iter   The current training iteration.\n   * @param obj    customized objective class\n   * @throws XGBoostError native error\n   */\n  public void update(DMatrix dtrain, int iter, IObjective obj) throws XGBoostError {\n    float[][] predicts = this.predict(dtrain, true, 0, false, false);\n    List<float[]> gradients = obj.getGradient(predicts, dtrain);\n    this.boost(dtrain, iter, gradients.get(0), gradients.get(1));\n  }\n\n  @Deprecated\n  public void boost(DMatrix dtrain, float[] grad, float[] hess) throws XGBoostError {\n    this.boost(dtrain, 0, grad, hess);\n  }\n\n  /**\n   * Update with give grad and hess\n   *\n   * @param dtrain training data\n   * @param iter   The current training iteration.\n   * @param grad   first order of gradient\n   * @param hess   seconde order of gradient\n   * @throws XGBoostError native error\n   */\n  public void boost(DMatrix dtrain, int iter, float[] grad, float[] hess) throws XGBoostError {\n    if (grad.length != hess.length) {\n      throw new AssertionError(String.format(\"grad/hess length mismatch %s / %s\", grad.length,\n              hess.length));\n    }\n    XGBoostJNI.checkCall(XGBoostJNI.XGBoosterTrainOneIter(handle,\n                                                          dtrain.getHandle(), iter, grad, hess));\n  }\n\n  /**\n   * Evaluate the Booster model with given dmatrixs.\n   *\n   * @param evalMatrixs dmatrixs for evaluation\n   * @param evalNames   name for eval dmatrixs, used for check results\n   * @param iter        current eval iteration\n   * @return eval Information containing the evaluation results\n   * @throws XGBoostError native error\n   */\n  public String evalSet(DMatrix[] evalMatrixs, String[] evalNames, int iter) throws XGBoostError {\n    long[] handles = dmatrixsToHandles(evalMatrixs);\n    String[] evalInfo = new String[1];\n    XGBoostJNI.checkCall(XGBoostJNI.XGBoosterEvalOneIter(handle, iter, handles, evalNames,\n            evalInfo));\n    return evalInfo[0];\n  }\n\n  /**\n   * Evaluate the Booster model with given dmatrixs.\n   *\n   * @param evalMatrixs dmatrixs for evaluation\n   * @param evalNames   name for eval dmatrixs, used for check results\n   * @param iter        current eval iteration\n   * @param metricsOut  output array containing the evaluation metrics for each evalMatrix\n   * @return eval Information containing the evaluation results\n   * @throws XGBoostError native error\n   */\n  public String evalSet(DMatrix[] evalMatrixs, String[] evalNames, int iter, float[] metricsOut)\n          throws XGBoostError {\n    String stringFormat = evalSet(evalMatrixs, evalNames, iter);\n    String[] metricPairs = stringFormat.split(\"\\t\");\n    for (int i = 1; i < metricPairs.length; i++) {\n      String value = metricPairs[i].split(\":\")[1];\n      if (value.equalsIgnoreCase(\"nan\")) {\n        metricsOut[i - 1] = Float.NaN;\n      } else if (value.equalsIgnoreCase(\"-nan\")) {\n        metricsOut[i - 1] = -Float.NaN;\n      } else {\n        metricsOut[i - 1] = Float.valueOf(value);\n      }\n    }\n    return stringFormat;\n  }\n\n  /**\n   * Evaluate the Booster model given customized Evaluation class\n   *\n   * @param evalMatrixs evaluation matrix\n   * @param evalNames   evaluation names\n   * @param eval        custom evaluator\n   * @return eval Information containing the evaluation results\n   * @throws XGBoostError native error\n   */\n  public String evalSet(DMatrix[] evalMatrixs, String[] evalNames, IEvaluation eval)\n          throws XGBoostError {\n    // Hopefully, a tiny redundant allocation wouldn't hurt.\n    return evalSet(evalMatrixs, evalNames, eval, new float[evalNames.length]);\n  }\n\n  public String evalSet(DMatrix[] evalMatrixs, String[] evalNames, IEvaluation eval,\n                        float[] metricsOut) throws XGBoostError {\n    String evalInfo = \"\";\n    for (int i = 0; i < evalNames.length; i++) {\n      String evalName = evalNames[i];\n      DMatrix evalMat = evalMatrixs[i];\n      float evalResult = eval.eval(predict(evalMat), evalMat);\n      String evalMetric = eval.getMetric();\n      evalInfo += String.format(\"\\t%s-%s:%f\", evalName, evalMetric, evalResult);\n      metricsOut[i] = evalResult;\n    }\n    return evalInfo;\n  }\n\n  /**\n   * An advanced prediction function with all the options.\n   *\n   * @param data         the test data for which prodictions are to be made\n   * @param outputMargin output margin\n   * @param treeLimit    limit number of trees, 0 means all trees.\n   * @param predLeaf     prediction minimum to keep leafs\n   * @param predContribs prediction feature contributions\n   * @return predict two dimensional array of results, where each row corresponds to a prediction.\n   */\n  private float[][] predict(DMatrix data,\n                            boolean outputMargin,\n                            int treeLimit,\n                            boolean predLeaf,\n                            boolean predContribs) throws XGBoostError {\n    int optionMask = 0;\n    if (outputMargin) {\n      optionMask = 1;\n    }\n    if (predLeaf) {\n      optionMask = 2;\n    }\n    if (predContribs) {\n      optionMask = 4;\n    }\n    float[][] rawPredicts = new float[1][];\n    XGBoostJNI.checkCall(XGBoostJNI.XGBoosterPredict(handle, data.getHandle(), optionMask,\n            treeLimit, rawPredicts));\n    int row = (int) data.rowNum();\n    int col = rawPredicts[0].length / row;\n    float[][] predicts = new float[row][col];\n    int r, c;\n    for (int i = 0; i < rawPredicts[0].length; i++) {\n      r = i / col;\n      c = i % col;\n      predicts[r][c] = rawPredicts[0][i];\n    }\n    return predicts;\n  }\n\n  /**\n   * Perform thread-safe prediction.\n   *\n   * @param data      Flattened input matrix of features for prediction\n   * @param nrow      The number of preditions to make (count of input matrix rows)\n   * @param ncol      The number of features in the model (count of input matrix columns)\n   * @param missing   Value indicating missing element in the <code>data</code> input matrix\n   *\n   * @return predict  Result matrix\n   */\n  public float[][] inplace_predict(float[] data,\n                                   int nrow,\n                                   int ncol,\n                                   float missing) throws XGBoostError {\n    int[] iteration_range = new int[2];\n    iteration_range[0] = 0;\n    iteration_range[1] = 0;\n    return this.inplace_predict(data, nrow, ncol,\n        missing, iteration_range, PredictionType.kValue, null);\n  }\n\n  /**\n   * Perform thread-safe prediction.\n   *\n   * @param data      Flattened input matrix of features for prediction\n   * @param nrow      The number of preditions to make (count of input matrix rows)\n   * @param ncol      The number of features in the model (count of input matrix columns)\n   * @param missing   Value indicating missing element in the <code>data</code> input matrix\n   * @param iteration_range Specifies which layer of trees are used in prediction.  For\n   *                        example, if a random forest is trained with 100 rounds.\n   *                        Specifying `iteration_range=[10, 20)`, then only the forests\n   *                        built during [10, 20) (half open set) rounds are used in this\n   *                        prediction.\n   *\n   * @return predict  Result matrix\n   */\n  public float[][] inplace_predict(float[] data,\n                                   int nrow,\n                                   int ncol,\n                                   float missing, int[] iteration_range) throws XGBoostError {\n    return this.inplace_predict(data, nrow, ncol,\n        missing, iteration_range, PredictionType.kValue, null);\n  }\n\n\n  /**\n   * Perform thread-safe prediction.\n   *\n   * @param data            Flattened input matrix of features for prediction\n   * @param nrow            The number of preditions to make (count of input matrix rows)\n   * @param ncol            The number of features in the model (count of input matrix columns)\n   * @param missing         Value indicating missing element in the <code>data</code> input matrix\n   * @param iteration_range Specifies which layer of trees are used in prediction.  For\n   *                        example, if a random forest is trained with 100 rounds.\n   *                        Specifying `iteration_range=[10, 20)`, then only the forests\n   *                        built during [10, 20) (half open set) rounds are used in this\n   *                        prediction.\n   * @param predict_type    What kind of prediction to run.\n   * @return predict       Result matrix\n   */\n  public float[][] inplace_predict(float[] data,\n                                   int nrow,\n                                   int ncol,\n                                   float missing,\n                                   int[] iteration_range,\n                                   PredictionType predict_type,\n                                   float[] base_margin) throws XGBoostError {\n    if (iteration_range.length != 2) {\n      throw new XGBoostError(new String(\"Iteration range is expected to be [begin, end).\"));\n    }\n    int ptype = predict_type.getPType();\n\n    int begin = iteration_range[0];\n    int end = iteration_range[1];\n\n    float[][] rawPredicts = new float[1][];\n    XGBoostJNI.checkCall(XGBoostJNI.XGBoosterPredictFromDense(handle, data, nrow, ncol,\n        missing,\n        begin, end, ptype, base_margin, rawPredicts));\n\n    int col = rawPredicts[0].length / nrow;\n    float[][] predicts = new float[nrow][col];\n    int r, c;\n    for (int i = 0; i < rawPredicts[0].length; i++) {\n      r = i / col;\n      c = i % col;\n      predicts[r][c] = rawPredicts[0][i];\n    }\n    return predicts;\n  }\n\n  /**\n   * Predict leaf indices given the data\n   *\n   * @param data The input data.\n   * @param treeLimit Number of trees to include, 0 means all trees.\n   * @return The leaf indices of the instance.\n   * @throws XGBoostError\n   */\n  public float[][] predictLeaf(DMatrix data, int treeLimit) throws XGBoostError {\n    return this.predict(data, false, treeLimit, true, false);\n  }\n\n  /**\n   * Output feature contributions toward predictions of given data\n   *\n   * @param data The input data.\n   * @param treeLimit Number of trees to include, 0 means all trees.\n   * @return The feature contributions and bias.\n   * @throws XGBoostError\n   */\n  public float[][] predictContrib(DMatrix data, int treeLimit) throws XGBoostError {\n    return this.predict(data, false, treeLimit, true, true);\n  }\n\n  /**\n   * Make a prediction with test data in a DMatrix format.\n   *\n   * @param data dmatrix storing the test input on which predictions are to be made\n   * @return predict The results of the prediction, where each row corresponds to a prediction.\n   * @throws XGBoostError native error\n   */\n  public float[][] predict(DMatrix data) throws XGBoostError {\n    return this.predict(data, false, 0, false, false);\n  }\n\n  /**\n   * Make a prediction with test data in a DMatrix format and output margin.\n   *\n   * @param data  dmatrix storing the test input on which predictions are to be made\n   * @param outputMargin output margin\n   * @return predict The results of the prediction, where each row corresponds to a prediction.\n   */\n  public float[][] predict(DMatrix data, boolean outputMargin) throws XGBoostError {\n    return this.predict(data, outputMargin, 0, false, false);\n  }\n\n  /**\n   * Advanced predict function with all the options.\n   *\n   * @param data         matrix storing the test input on which predictions are to be made\n   * @param outputMargin output margin\n   * @param treeLimit    limit number of trees, 0 means all trees.\n   * @return predict The results of the prediction, where each row corresponds to a prediction.\n   */\n  public float[][] predict(DMatrix data, boolean outputMargin, int treeLimit) throws XGBoostError {\n    return this.predict(data, outputMargin, treeLimit, false, false);\n  }\n\n  /**\n   * Save model to modelPath\n   *\n   * @param modelPath model path\n   */\n  public void saveModel(String modelPath) throws XGBoostError{\n    XGBoostJNI.checkCall(XGBoostJNI.XGBoosterSaveModel(handle, modelPath));\n  }\n\n  /**\n   * Save the model to file opened as output stream.\n   * The model format is compatible with other xgboost bindings.\n   * The output stream can only save one xgboost model.\n   * This function will close the OutputStream after the save.\n   *\n   * @param out The output stream\n   */\n  public void saveModel(OutputStream out) throws XGBoostError, IOException {\n    saveModel(out, DEFAULT_FORMAT);\n  }\n\n  /**\n   * Save the model to file opened as output stream.\n   * The model format is compatible with other xgboost bindings.\n   * The output stream can only save one xgboost model.\n   * This function will close the OutputStream after the save.\n   *\n   * @param out The output stream\n   * @param format The model format (ubj, json, deprecated)\n   * @throws XGBoostError\n   * @throws IOException\n   */\n  public void saveModel(OutputStream out, String format) throws XGBoostError, IOException {\n    out.write(this.toByteArray(format));\n    out.close();\n  }\n\n  /**\n   * Get the dump of the model as a string array\n   *\n   * @param featureMap A string containing the path to a feature map.\n   * @param withStats Controls whether the split statistics are output.\n   * @return The dumped model information\n   * @throws XGBoostError native error\n   */\n  public String[] getModelDump(String featureMap, boolean withStats) throws XGBoostError {\n    return getModelDump(featureMap, withStats, \"text\");\n  }\n\n  /**\n   * Get the dump of the model as a string array with specified feature map, stats,\n   * and the specified format.\n   *\n   * @param featureMap A string containing the path to a feature map.\n   * @param withStats Controls whether the split statistics are output.\n   * @param format The format in which the model is dumped (text, json, ubj).\n   * @return The dumped model information\n   * @throws XGBoostError\n   */\n  public String[] getModelDump(String featureMap, boolean withStats, String format)\n         throws XGBoostError {\n    int statsFlag = 0;\n    if (featureMap == null) {\n      featureMap = \"\";\n    }\n    if (withStats) {\n      statsFlag = 1;\n    }\n    if (format == null) {\n      format = \"text\";\n    }\n    String[][] modelInfos = new String[1][];\n    XGBoostJNI.checkCall(\n            XGBoostJNI.XGBoosterDumpModelEx(handle, featureMap, statsFlag, format, modelInfos));\n    return modelInfos[0];\n  }\n\n  /**\n   * Get the dump of the model as a string array with specified feature names.\n   *\n   * @param featureNames Names of the features.\n   * @return dumped model information\n   * @throws XGBoostError\n   */\n  public String[] getModelDump(String[] featureNames, boolean withStats) throws XGBoostError {\n    return getModelDump(featureNames, withStats, \"text\");\n  }\n\n  /**\n   * Get the dump of the model as a string array with specified feature map, stats,\n   * and the specified format.\n   *\n   * @param featureNames An array of strings containing the feature names.\n   * @param withStats Controls whether the split statistics are output.\n   * @param format The format in which the model is dumped (text, json, ubj).\n   * @return The dumped model information\n   * @throws XGBoostError\n   */\n  public String[] getModelDump(String[] featureNames, boolean withStats, String format)\n      throws XGBoostError {\n    int statsFlag = 0;\n    if (withStats) {\n      statsFlag = 1;\n    }\n    if (format == null) {\n      format = \"text\";\n    }\n    String[][] modelInfos = new String[1][];\n    XGBoostJNI.checkCall(XGBoostJNI.XGBoosterDumpModelExWithFeatures(\n        handle, featureNames, statsFlag, format, modelInfos));\n    return modelInfos[0];\n  }\n\n  /**\n   * Supported feature importance types\n   *\n   * WEIGHT = Number of nodes that a feature was used to determine a split\n   * GAIN = Average information gain per split for a feature\n   * COVER = Average cover per split for a feature\n   * TOTAL_GAIN = Total information gain over all splits of a feature\n   * TOTAL_COVER = Total cover over all splits of a feature\n   */\n  public static class FeatureImportanceType {\n    public static final String WEIGHT = \"weight\";\n    public static final String GAIN = \"gain\";\n    public static final String COVER = \"cover\";\n    public static final String TOTAL_GAIN = \"total_gain\";\n    public static final String TOTAL_COVER = \"total_cover\";\n    public static final Set<String> ACCEPTED_TYPES = new HashSet<>(\n            Arrays.asList(WEIGHT, GAIN, COVER, TOTAL_GAIN, TOTAL_COVER));\n  }\n\n  /**\n   * Get importance of each feature with specified feature names.\n   *\n   * @return featureScoreMap  key: feature name, value: feature importance score, can be nill.\n   * @throws XGBoostError native error\n   */\n  public Map<String, Integer> getFeatureScore(String[] featureNames) throws XGBoostError {\n    String[] modelInfos = getModelDump(featureNames, false);\n    return getFeatureWeightsFromModel(modelInfos);\n  }\n\n  /**\n   * Get importance of each feature\n   *\n   * @return featureScoreMap  key: feature index, value: feature importance score, can be nill\n   * @throws XGBoostError native error\n   */\n  public Map<String, Integer> getFeatureScore(String featureMap) throws XGBoostError {\n    String[] modelInfos = getModelDump(featureMap, false);\n    return getFeatureWeightsFromModel(modelInfos);\n  }\n\n  /**\n   * Get the importance of each feature based purely on weights (number of splits)\n   *\n   * @return featureScoreMap key: feature index,\n   * value: feature importance score based on weight\n   * @throws XGBoostError native error\n   */\n  private Map<String, Integer> getFeatureWeightsFromModel(String[] modelInfos) throws XGBoostError {\n    Map<String, Integer> featureScore = new HashMap<>();\n    for (String tree : modelInfos) {\n      for (String node : tree.split(\"\\n\")) {\n        String[] array = node.split(\"\\\\[\");\n        if (array.length == 1) {\n          continue;\n        }\n        String fid = array[1].split(\"\\\\]\")[0];\n        fid = fid.split(\"<\")[0];\n        if (featureScore.containsKey(fid)) {\n          featureScore.put(fid, 1 + featureScore.get(fid));\n        } else {\n          featureScore.put(fid, 1);\n        }\n      }\n    }\n    return featureScore;\n  }\n\n  /**\n   * Get the feature importances for gain or cover (average or total)\n   *\n   * @return featureImportanceMap key: feature index,\n   * values: feature importance score based on gain or cover\n   * @throws XGBoostError native error\n   */\n  public Map<String, Double> getScore(\n          String[] featureNames, String importanceType) throws XGBoostError {\n    String[] modelInfos = getModelDump(featureNames, true);\n    return getFeatureImportanceFromModel(modelInfos, importanceType);\n  }\n\n  /**\n   * Get the feature importances for gain or cover (average or total), with feature names\n   *\n   * @return featureImportanceMap key: feature name,\n   * values: feature importance score based on gain or cover\n   * @throws XGBoostError native error\n   */\n  public Map<String, Double> getScore(\n          String featureMap, String importanceType) throws XGBoostError {\n    String[] modelInfos = getModelDump(featureMap, true);\n    return getFeatureImportanceFromModel(modelInfos, importanceType);\n  }\n\n  /**\n   * Get the importance of each feature based on information gain or cover\n   *\n   * @return featureImportanceMap key: feature index, value: feature importance score\n   * based on information gain or cover\n   * @throws XGBoostError native error\n   */\n  private Map<String, Double> getFeatureImportanceFromModel(\n          String[] modelInfos, String importanceType) throws XGBoostError {\n    if (!FeatureImportanceType.ACCEPTED_TYPES.contains(importanceType)) {\n      throw new AssertionError(String.format(\"Importance type %s is not supported\",\n              importanceType));\n    }\n    Map<String, Double> importanceMap = new HashMap<>();\n    Map<String, Double> weightMap = new HashMap<>();\n    if (importanceType.equals(FeatureImportanceType.WEIGHT)) {\n      Map<String, Integer> importanceWeights = getFeatureWeightsFromModel(modelInfos);\n      for (String feature: importanceWeights.keySet()) {\n        importanceMap.put(feature, new Double(importanceWeights.get(feature)));\n      }\n      return importanceMap;\n    }\n    /* Each split in the tree has this text form:\n    \"0:[f28<-9.53674316e-07] yes=1,no=2,missing=1,gain=4000.53101,cover=1628.25\"\n    So the line has to be split according to whether cover or gain is desired */\n    String splitter = \"gain=\";\n    if (importanceType.equals(FeatureImportanceType.COVER)\n        || importanceType.equals(FeatureImportanceType.TOTAL_COVER)) {\n      splitter = \"cover=\";\n    }\n    for (String tree: modelInfos) {\n      for (String node: tree.split(\"\\n\")) {\n        String[] array = node.split(\"\\\\[\");\n        if (array.length == 1) {\n          continue;\n        }\n        String[] fidWithImportance = array[1].split(\"\\\\]\");\n        // Extract gain or cover from string after closing bracket\n        Double importance = Double.parseDouble(\n            fidWithImportance[1].split(splitter)[1].split(\",\")[0]\n        );\n        String fid = fidWithImportance[0].split(\"<\")[0];\n        if (importanceMap.containsKey(fid)) {\n          importanceMap.put(fid, importance + importanceMap.get(fid));\n          weightMap.put(fid, 1d + weightMap.get(fid));\n        } else {\n          importanceMap.put(fid, importance);\n          weightMap.put(fid, 1d);\n        }\n      }\n    }\n    /* By default we calculate total gain and total cover.\n    Divide by the number of nodes per feature to get gain / cover */\n    if (importanceType.equals(FeatureImportanceType.COVER)\n        || importanceType.equals(FeatureImportanceType.GAIN)) {\n      for (String fid: importanceMap.keySet()) {\n        importanceMap.put(fid, importanceMap.get(fid)/weightMap.get(fid));\n      }\n    }\n    return importanceMap;\n  }\n\n  /**\n   * Save model into raw byte array in the UBJSON (\"ubj\") format.\n   *\n   * @return the saved byte array\n   * @throws XGBoostError native error\n   */\n  public byte[] toByteArray() throws XGBoostError {\n    return this.toByteArray(DEFAULT_FORMAT);\n  }\n\n  /**\n   * Save model into raw byte array.\n   *\n   * @param format The output format.  Available options are \"json\", \"ubj\" and \"deprecated\".\n   *\n   * @return the saved byte array\n   * @throws XGBoostError native error\n   */\n  public byte[] toByteArray(String format) throws XGBoostError {\n    byte[][] bytes = new byte[1][];\n    XGBoostJNI.checkCall(XGBoostJNI.XGBoosterSaveModelToBuffer(this.handle, format, bytes));\n    return bytes[0];\n  }\n\n  /**\n   * Get number of model features.\n   * @return the number of features.\n   * @throws XGBoostError\n   */\n  public long getNumFeature() throws XGBoostError {\n    long[] numFeature = new long[1];\n    XGBoostJNI.checkCall(XGBoostJNI.XGBoosterGetNumFeature(this.handle, numFeature));\n    return numFeature[0];\n  }\n  public int getNumBoostedRound() throws XGBoostError {\n    int[] numRound = new int[1];\n    XGBoostJNI.checkCall(XGBoostJNI.XGBoosterGetNumBoostedRound(this.handle, numRound));\n    return numRound[0];\n  }\n\n  /**\n   * Internal initialization function.\n   * @param cacheMats The cached DMatrix.\n   * @throws XGBoostError\n   */\n  private void init(DMatrix[] cacheMats) throws XGBoostError {\n    long[] handles = null;\n    if (cacheMats != null) {\n      handles = dmatrixsToHandles(cacheMats);\n    }\n    long[] out = new long[1];\n    XGBoostJNI.checkCall(XGBoostJNI.XGBoosterCreate(handles, out));\n\n    handle = out[0];\n  }\n\n  /**\n   * transfer DMatrix array to handle array (used for native functions)\n   *\n   * @param dmatrixs\n   * @return handle array for input dmatrixs\n   */\n  private static long[] dmatrixsToHandles(DMatrix[] dmatrixs) {\n    long[] handles = new long[dmatrixs.length];\n    for (int i = 0; i < dmatrixs.length; i++) {\n      handles[i] = dmatrixs[i].getHandle();\n    }\n    return handles;\n  }\n\n  // making Booster serializable\n  private void writeObject(java.io.ObjectOutputStream out) throws IOException {\n    try {\n      out.writeInt(version);\n      out.writeObject(this.toByteArray(\"ubj\"));\n    } catch (XGBoostError ex) {\n      ex.printStackTrace();\n      logger.error(ex.getMessage());\n    }\n  }\n\n  private void readObject(java.io.ObjectInputStream in)\n          throws IOException, ClassNotFoundException {\n    try {\n      this.init(null);\n      this.version = in.readInt();\n      byte[] bytes = (byte[])in.readObject();\n      XGBoostJNI.checkCall(XGBoostJNI.XGBoosterLoadModelFromBuffer(this.handle, bytes));\n    } catch (XGBoostError ex) {\n      ex.printStackTrace();\n      logger.error(ex.getMessage());\n    }\n  }\n\n  @Override\n  protected void finalize() throws Throwable {\n    super.finalize();\n    dispose();\n  }\n\n  public synchronized void dispose() {\n    if (handle != 0L) {\n      XGBoostJNI.XGBoosterFree(handle);\n      handle = 0;\n    }\n  }\n\n  @Override\n  public void write(Kryo kryo, Output output) {\n    try {\n      byte[] serObj = this.toByteArray(\"ubj\");\n      int serObjSize = serObj.length;\n      output.writeInt(serObjSize);\n      output.writeInt(version);\n      output.write(serObj);\n    } catch (XGBoostError ex) {\n      logger.error(ex.getMessage(), ex);\n    }\n  }\n\n  @Override\n  public void read(Kryo kryo, Input input) {\n    try {\n      this.init(null);\n      int serObjSize = input.readInt();\n      this.version = input.readInt();\n      byte[] bytes = new byte[serObjSize];\n      input.readBytes(bytes);\n      XGBoostJNI.checkCall(XGBoostJNI.XGBoosterLoadModelFromBuffer(this.handle, bytes));\n    } catch (XGBoostError ex) {\n      logger.error(ex.getMessage(), ex);\n    }\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Column.java",
    "content": "/*\n Copyright (c) 2021-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.java;\n\n/**\n * This Column abstraction provides an array interface JSON string, which is\n * used to reconstruct columnar data within the XGBoost library.\n */\npublic abstract class Column implements AutoCloseable {\n\n  /**\n   * Return array interface json string for this Column\n   */\n  public abstract String toJson();\n\n  @Override\n  public void close() throws Exception {\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/ColumnBatch.java",
    "content": "/*\n Copyright (c) 2021-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.java;\n\n/**\n * This class wraps multiple Column and provides the array interface json\n * for all columns.\n */\npublic abstract class ColumnBatch extends Column {\n\n  /** Get features cuda array interface json string */\n  public abstract String toFeaturesJson();\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Communicator.java",
    "content": "package ml.dmlc.xgboost4j.java;\n\nimport java.io.Serializable;\nimport java.nio.ByteBuffer;\nimport java.nio.ByteOrder;\nimport java.util.Map;\n\nimport com.fasterxml.jackson.core.JsonProcessingException;\nimport com.fasterxml.jackson.databind.ObjectMapper;\n\n/**\n * Collective communicator global class for synchronization.\n *\n * Currently the communicator API is experimental, function signatures may change in the future\n * without notice.\n */\npublic class Communicator {\n\n  public enum OpType implements Serializable {\n    MAX(0), MIN(1), SUM(2);\n\n    private int op;\n\n    public int getOperand() {\n      return this.op;\n    }\n\n    OpType(int op) {\n      this.op = op;\n    }\n  }\n\n  public enum DataType implements Serializable {\n    FLOAT16(0, 2), FLOAT32(1, 4), FLOAT64(2, 8),\n    INT8(4, 1), INT16(5, 2), INT32(6, 4), INT64(7, 8),\n    UINT8(8, 1), UINT16(9, 2), UINT32(10, 4), UINT64(11, 8);\n\n    private final int enumOp;\n    private final int size;\n\n    public int getEnumOp() {\n      return this.enumOp;\n    }\n\n    public int getSize() {\n      return this.size;\n    }\n\n    DataType(int enumOp, int size) {\n      this.enumOp = enumOp;\n      this.size = size;\n    }\n  }\n\n  private static void checkCall(int ret) throws XGBoostError {\n    if (ret != 0) {\n      throw new XGBoostError(XGBoostJNI.XGBGetLastError());\n    }\n  }\n\n  /**\n   * Initialize the collective communicator on current working thread.\n   *\n   * @param envs The additional environment variables to pass to the communicator.\n   * @throws XGBoostError\n   */\n  public static void init(Map<String, Object> envs) throws XGBoostError {\n    ObjectMapper mapper = new ObjectMapper();\n    try {\n      String jconfig = mapper.writeValueAsString(envs);\n      checkCall(XGBoostJNI.CommunicatorInit(jconfig));\n    } catch (JsonProcessingException ex) {\n      throw new XGBoostError(\"Failed to read arguments for the communicator.\", ex);\n    }\n  }\n\n  /**\n   * Shutdown the communicator in current working thread, equals to finalize.\n   *\n   * @throws XGBoostError\n   */\n  public static void shutdown() throws XGBoostError {\n    checkCall(XGBoostJNI.CommunicatorFinalize());\n  }\n\n  /**\n   * Print the message via the communicator.\n   *\n   * @param msg\n   * @throws XGBoostError\n   */\n  public static void communicatorPrint(String msg) throws XGBoostError {\n    checkCall(XGBoostJNI.CommunicatorPrint(msg));\n  }\n\n  /**\n   * get rank of current thread.\n   *\n   * @return the rank.\n   * @throws XGBoostError\n   */\n  public static int getRank() throws XGBoostError {\n    int[] out = new int[1];\n    checkCall(XGBoostJNI.CommunicatorGetRank(out));\n    return out[0];\n  }\n\n  /**\n   * get world size of current job.\n   *\n   * @return the worldsize\n   * @throws XGBoostError\n   */\n  public static int getWorldSize() throws XGBoostError {\n    int[] out = new int[1];\n    checkCall(XGBoostJNI.CommunicatorGetWorldSize(out));\n    return out[0];\n  }\n\n  /**\n   * perform Allreduce on distributed float vectors using operator op.\n   *\n   * @param elements local elements on distributed workers.\n   * @param op       operator used for Allreduce.\n   * @return All-reduced float elements according to the given operator.\n   */\n  public static float[] allReduce(float[] elements, OpType op) {\n    DataType dataType = DataType.FLOAT32;\n    ByteBuffer buffer = ByteBuffer.allocateDirect(dataType.getSize() * elements.length)\n            .order(ByteOrder.nativeOrder());\n\n    for (float el : elements) {\n      buffer.putFloat(el);\n    }\n    buffer.flip();\n\n    XGBoostJNI.CommunicatorAllreduce(buffer, elements.length, dataType.getEnumOp(),\n            op.getOperand());\n    float[] results = new float[elements.length];\n    buffer.asFloatBuffer().get(results);\n\n    return results;\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/ConfigContext.java",
    "content": "/*\n Copyright (c) 2025 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java;\n\nimport java.util.HashMap;\nimport java.util.Map;\n\nimport com.fasterxml.jackson.core.JsonProcessingException;\nimport com.fasterxml.jackson.core.type.TypeReference;\nimport com.fasterxml.jackson.databind.ObjectMapper;\n\n/**\n * Global configuration context for XGBoost.\n *\n * @version 3.0.0\n * <p>\n * See the parameter document for supported global configuration. The configuration is\n * restored upon close.\n */\npublic class ConfigContext implements AutoCloseable {\n  private final String initialConfiguration;\n\n  public ConfigContext() throws XGBoostError {\n    initialConfiguration = getGlobalConfig();\n  }\n\n  /* Set the parameters during initializing */\n  public ConfigContext(Map<String, Object> params) throws XGBoostError {\n    if (params != null && !params.isEmpty()) {\n      initialConfiguration = getGlobalConfig();\n      setConfigs(params);\n    } else {\n      initialConfiguration = null;\n    }\n  }\n\n  /**\n   * Get the global configuration\n   */\n  private String getGlobalConfig() throws XGBoostError {\n    String[] config = new String[1];\n    XGBoostJNI.checkCall(XGBoostJNI.XGBGetGlobalConfig(config));\n    return config[0];\n  }\n\n  public Object getConfig(String name) throws XGBoostError {\n    String jconfig = getGlobalConfig();\n    ObjectMapper mapper = new ObjectMapper();\n    try {\n      Map<String, Object> map = mapper.readValue(jconfig,\n        new TypeReference<Map<String, Object>>() {\n        });\n      return map.get(name);\n    } catch (JsonProcessingException ex) {\n      throw new XGBoostError(\"Failed to get the global config due to a decode error.\", ex);\n    }\n  }\n\n  /** Set one single configuration */\n  public void setConfig(String key, Object value) throws XGBoostError {\n    HashMap<String, Object> configs = new HashMap<>();\n    configs.put(key, value);\n    ObjectMapper mapper = new ObjectMapper();\n    try {\n      String config = mapper.writeValueAsString(configs);\n      XGBoostJNI.checkCall(XGBoostJNI.XGBSetGlobalConfig(config));\n    } catch (JsonProcessingException ex) {\n      throw new XGBoostError(\"Failed to set the global config due to an encode error.\", ex);\n    }\n  }\n\n  /** Set a bunch of configurations */\n  public void setConfigs(Map<String, Object> configs) throws XGBoostError {\n    ObjectMapper mapper = new ObjectMapper();\n    try {\n      String config = mapper.writeValueAsString(configs);\n      XGBoostJNI.checkCall(XGBoostJNI.XGBSetGlobalConfig(config));\n    } catch (JsonProcessingException ex) {\n      throw new XGBoostError(\"Failed to set the global config due to an encode error.\", ex);\n    }\n  }\n\n  @Override\n  public void close() throws XGBoostError {\n    if (initialConfiguration != null) {\n      XGBoostJNI.checkCall(XGBoostJNI.XGBSetGlobalConfig(initialConfiguration));\n    }\n  }\n};\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/DMatrix.java",
    "content": "/*\n Copyright (c) 2014-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java;\n\nimport java.util.Iterator;\n\nimport ml.dmlc.xgboost4j.LabeledPoint;\nimport ml.dmlc.xgboost4j.java.util.BigDenseMatrix;\n\n/**\n * DMatrix for xgboost.\n *\n * @author hzx\n */\npublic class DMatrix {\n  protected long handle = 0;\n\n  /**\n   * Create DMatrix from iterator.\n   *\n   * @param iter      The data iterator of mini batch to provide the data.\n   * @param cacheInfo Cache path information, used for external memory setting, can be null.\n   * @throws XGBoostError\n   */\n  public DMatrix(Iterator<LabeledPoint> iter, String cacheInfo) throws XGBoostError {\n    this(iter, cacheInfo, Float.NaN);\n  }\n\n  /**\n   * Create DMatrix from iterator.\n   *\n   * @param iter      The data iterator of mini batch to provide the data.\n   * @param cacheInfo Cache path information, used for external memory setting, can be null.\n   * @param missing   the missing value\n   * @throws XGBoostError\n   */\n  public DMatrix(Iterator<LabeledPoint> iter,\n                 String cacheInfo,\n                 float missing) throws XGBoostError {\n    if (iter == null) {\n      throw new NullPointerException(\"iter: null\");\n    }\n    // 32k as batch size\n    int batchSize = 32 << 10;\n    Iterator<DataBatch> batchIter = new DataBatch.BatchIterator(iter, batchSize);\n    long[] out = new long[1];\n    XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixCreateFromDataIter(\n        batchIter, cacheInfo, missing, out));\n    handle = out[0];\n  }\n\n  /**\n   * Create DMatrix by loading libsvm file from dataPath\n   *\n   * @param dataPath The path to the data.\n   * @throws XGBoostError\n   */\n  public DMatrix(String dataPath) throws XGBoostError {\n    if (dataPath == null) {\n      throw new NullPointerException(\"dataPath: null\");\n    }\n    long[] out = new long[1];\n    XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixCreateFromFile(dataPath, 1, out));\n    handle = out[0];\n  }\n\n  /**\n   * Create DMatrix from Sparse matrix in CSR/CSC format.\n   *\n   * @param headers The row index of the matrix.\n   * @param indices The indices of presenting entries.\n   * @param data    The data content.\n   * @param st      Type of sparsity.\n   * @throws XGBoostError\n   */\n  @Deprecated\n  public DMatrix(long[] headers, int[] indices, float[] data,\n                 DMatrix.SparseType st) throws XGBoostError {\n    this(headers, indices, data, st, 0, Float.NaN, -1);\n  }\n\n  /**\n   * Create DMatrix from Sparse matrix in CSR/CSC format.\n   *\n   * @param headers    The row index of the matrix.\n   * @param indices    The indices of presenting entries.\n   * @param data       The data content.\n   * @param st         Type of sparsity.\n   * @param shapeParam when st is CSR, it specifies the column number, otherwise it is taken as\n   *                   row number\n   * @throws XGBoostError\n   */\n  public DMatrix(long[] headers, int[] indices, float[] data, DMatrix.SparseType st,\n                 int shapeParam) throws XGBoostError {\n    this(headers, indices, data, st, shapeParam, Float.NaN, -1);\n  }\n\n  public DMatrix(long[] headers, int[] indices, float[] data, DMatrix.SparseType st, int shapeParam,\n                 float missing, int nthread) throws XGBoostError {\n    long[] out = new long[1];\n    if (st == SparseType.CSR) {\n      XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixCreateFromCSR(headers, indices, data,\n                                                             shapeParam, missing, nthread, out));\n    } else if (st == SparseType.CSC) {\n      XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixCreateFromCSC(headers, indices, data,\n                                                             shapeParam, missing, nthread, out));\n    } else {\n      throw new UnknownError(\"unknow sparsetype\");\n    }\n    handle = out[0];\n  }\n\n  /**\n   * create DMatrix from dense matrix\n   *\n   * @param data data values\n   * @param nrow number of rows\n   * @param ncol number of columns\n   * @throws XGBoostError native error\n   * @deprecated Please specify the missing value explicitly using\n   * {@link DMatrix(float[], int, int, float)}\n   */\n  @Deprecated\n  public DMatrix(float[] data, int nrow, int ncol) throws XGBoostError {\n    long[] out = new long[1];\n    XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixCreateFromMat(data, nrow, ncol, Float.NaN, out));\n    handle = out[0];\n  }\n\n  /**\n   * create DMatrix from a BigDenseMatrix\n   *\n   * @param matrix instance of BigDenseMatrix\n   * @throws XGBoostError native error\n   */\n  public DMatrix(BigDenseMatrix matrix) throws XGBoostError {\n    this(matrix, Float.NaN);\n  }\n\n  /**\n   * create DMatrix from dense matrix\n   *\n   * @param data    data values\n   * @param nrow    number of rows\n   * @param ncol    number of columns\n   * @param missing the specified value to represent the missing value\n   */\n  public DMatrix(float[] data, int nrow, int ncol, float missing) throws XGBoostError {\n    long[] out = new long[1];\n    XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixCreateFromMat(data, nrow, ncol, missing, out));\n    handle = out[0];\n  }\n\n  /**\n   * create DMatrix from dense matrix\n   *\n   * @param matrix  instance of BigDenseMatrix\n   * @param missing the specified value to represent the missing value\n   */\n  public DMatrix(BigDenseMatrix matrix, float missing) throws XGBoostError {\n    long[] out = new long[1];\n    XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixCreateFromMatRef(matrix.address, matrix.nrow,\n                                                              matrix.ncol, missing, out));\n    handle = out[0];\n  }\n\n  /**\n   * used for DMatrix slice\n   */\n  protected DMatrix(long handle) {\n    this.handle = handle;\n  }\n\n  /**\n   * Create the normal DMatrix from column array interface\n   *\n   * @param columnBatch the XGBoost ColumnBatch to provide the array interface\n   *                    of feature columns\n   * @param missing     missing value\n   * @param nthread     threads number\n   * @throws XGBoostError\n   */\n  public DMatrix(ColumnBatch columnBatch, float missing, int nthread) throws XGBoostError {\n    long[] out = new long[1];\n    String json = columnBatch.toFeaturesJson();\n    if (json == null || json.isEmpty()) {\n      throw new XGBoostError(\"Expecting non-empty feature columns' array interface\");\n    }\n    XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixCreateFromArrayInterfaceColumns(\n        json, missing, nthread, out));\n    handle = out[0];\n  }\n\n  /**\n   * flatten a mat to array\n   */\n  private static float[] flatten(float[][] mat) {\n    int size = 0;\n    for (float[] array : mat) size += array.length;\n    float[] result = new float[size];\n    int pos = 0;\n    for (float[] ar : mat) {\n      System.arraycopy(ar, 0, result, pos, ar.length);\n      pos += ar.length;\n    }\n\n    return result;\n  }\n\n  /**\n   * Set query id of DMatrix from array interface\n   *\n   * @param column the XGBoost Column to provide the array interface\n   *               of query id column\n   * @throws XGBoostError native error\n   */\n  public void setQueryId(Column column) throws XGBoostError {\n    setXGBDMatrixInfo(\"qid\", column.toJson());\n  }\n\n  private void setXGBDMatrixInfo(String type, String json) throws XGBoostError {\n    if (json == null || json.isEmpty()) {\n      throw new XGBoostError(\"Empty \" + type + \" columns' array interface\");\n    }\n    XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixSetInfoFromInterface(handle, type, json));\n  }\n\n  private void setXGBDMatrixFeatureInfo(String type, String[] values) throws XGBoostError {\n    if (type == null || type.isEmpty()) {\n      throw new XGBoostError(\"Found empty type\");\n    }\n    if (values == null || values.length == 0) {\n      throw new XGBoostError(\"Found empty values\");\n    }\n    XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixSetStrFeatureInfo(handle, type, values));\n  }\n\n  private String[] getXGBDMatrixFeatureInfo(String type) throws XGBoostError {\n    if (type == null || type.isEmpty()) {\n      throw new XGBoostError(\"Found empty type\");\n    }\n    long[] outLen = new long[1];\n    String[][] outValue = new String[1][];\n    XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixGetStrFeatureInfo(handle, type, outLen, outValue));\n\n    if (outLen[0] != outValue[0].length) {\n      throw new RuntimeException(\"Failed to get \" + type);\n    }\n    return outValue[0];\n  }\n\n  /**\n   * Get feature names\n   *\n   * @return an array of feature names to be returned\n   * @throws XGBoostError\n   */\n  public String[] getFeatureNames() throws XGBoostError {\n    return getXGBDMatrixFeatureInfo(\"feature_name\");\n  }\n\n  /**\n   * Set feature names\n   *\n   * @param values feature names to be set\n   * @throws XGBoostError\n   */\n  public void setFeatureNames(String[] values) throws XGBoostError {\n    setXGBDMatrixFeatureInfo(\"feature_name\", values);\n  }\n\n  /**\n   * Get feature types\n   *\n   * @return an array of feature types to be returned\n   * @throws XGBoostError\n   */\n  public String[] getFeatureTypes() throws XGBoostError {\n    return getXGBDMatrixFeatureInfo(\"feature_type\");\n  }\n\n  /**\n   * Set feature types\n   *\n   * @param values feature types to be set\n   * @throws XGBoostError\n   */\n  public void setFeatureTypes(String[] values) throws XGBoostError {\n    setXGBDMatrixFeatureInfo(\"feature_type\", values);\n  }\n\n  /**\n   * Get group sizes of DMatrix\n   *\n   * @return group size as array\n   * @throws XGBoostError native error\n   */\n  public int[] getGroup() throws XGBoostError {\n    return getIntInfo(\"group_ptr\");\n  }\n\n  /**\n   * Set group sizes of DMatrix (used for ranking)\n   *\n   * @param group group size as array\n   * @throws XGBoostError native error\n   */\n  public void setGroup(int[] group) throws XGBoostError {\n    XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixSetUIntInfo(handle, \"group\", group));\n  }\n\n  /**\n   * Set query ids (used for ranking)\n   *\n   * @param qid the query ids\n   * @throws XGBoostError native error\n   */\n  public void setQueryId(int[] qid) throws XGBoostError {\n    XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixSetUIntInfo(handle, \"qid\", qid));\n  }\n\n  private float[] getFloatInfo(String field) throws XGBoostError {\n    float[][] infos = new float[1][];\n    XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixGetFloatInfo(handle, field, infos));\n    return infos[0];\n  }\n\n  private int[] getIntInfo(String field) throws XGBoostError {\n    int[][] infos = new int[1][];\n    XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixGetUIntInfo(handle, field, infos));\n    return infos[0];\n  }\n\n  /**\n   * get label values\n   *\n   * @return label\n   * @throws XGBoostError native error\n   */\n  public float[] getLabel() throws XGBoostError {\n    return getFloatInfo(\"label\");\n  }\n\n  /**\n   * Set label of DMatrix from array interface\n   *\n   * @param column the XGBoost Column to provide the array interface\n   *               of label column\n   * @throws XGBoostError native error\n   */\n  public void setLabel(Column column) throws XGBoostError {\n    setXGBDMatrixInfo(\"label\", column.toJson());\n  }\n\n  /**\n   * set label of dmatrix\n   *\n   * @param labels labels\n   * @throws XGBoostError native error\n   */\n  public void setLabel(float[] labels) throws XGBoostError {\n    XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixSetFloatInfo(handle, \"label\", labels));\n  }\n\n  /**\n   * get weight of the DMatrix\n   *\n   * @return weights\n   * @throws XGBoostError native error\n   */\n  public float[] getWeight() throws XGBoostError {\n    return getFloatInfo(\"weight\");\n  }\n\n  /**\n   * Set weight of DMatrix from array interface\n   *\n   * @param column the XGBoost Column to provide the array interface\n   *               of weight column\n   * @throws XGBoostError native error\n   */\n  public void setWeight(Column column) throws XGBoostError {\n    setXGBDMatrixInfo(\"weight\", column.toJson());\n  }\n\n  /**\n   * set weight of each instance\n   *\n   * @param weights weights\n   * @throws XGBoostError native error\n   */\n  public void setWeight(float[] weights) throws XGBoostError {\n    XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixSetFloatInfo(handle, \"weight\", weights));\n  }\n\n  /**\n   * Get base margin of the DMatrix.\n   */\n  public float[] getBaseMargin() throws XGBoostError {\n    return getFloatInfo(\"base_margin\");\n  }\n\n  /**\n   * Set base margin of DMatrix from array interface\n   *\n   * @param column the XGBoost Column to provide the array interface\n   *               of base margin column\n   * @throws XGBoostError native error\n   */\n  public void setBaseMargin(Column column) throws XGBoostError {\n    setXGBDMatrixInfo(\"base_margin\", column.toJson());\n  }\n\n  /**\n   * Set base margin (initial prediction).\n   * <p>\n   * The margin must have the same number of elements as the number of\n   * rows in this matrix.\n   */\n  public void setBaseMargin(float[] baseMargin) throws XGBoostError {\n    if (baseMargin.length != rowNum()) {\n      throw new IllegalArgumentException(String.format(\n        \"base margin must have exactly %s elements, got %s\",\n        rowNum(), baseMargin.length));\n    }\n\n    XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixSetFloatInfo(handle, \"base_margin\", baseMargin));\n  }\n\n  /**\n   * Set base margin (initial prediction).\n   */\n  public void setBaseMargin(float[][] baseMargin) throws XGBoostError {\n    setBaseMargin(flatten(baseMargin));\n  }\n\n  /**\n   * Slice the DMatrix and return a new DMatrix that only contains `rowIndex`.\n   *\n   * @param rowIndex row index\n   * @return sliced new DMatrix\n   * @throws XGBoostError native error\n   */\n  public DMatrix slice(int[] rowIndex) throws XGBoostError {\n    long[] out = new long[1];\n    XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixSliceDMatrix(handle, rowIndex, out));\n    long sHandle = out[0];\n    DMatrix sMatrix = new DMatrix(sHandle);\n    return sMatrix;\n  }\n\n  /**\n   * get the row number of DMatrix\n   *\n   * @return number of rows\n   * @throws XGBoostError native error\n   */\n  public long rowNum() throws XGBoostError {\n    long[] rowNum = new long[1];\n    XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixNumRow(handle, rowNum));\n    return rowNum[0];\n  }\n\n  /**\n   * Get the number of non-missing values of DMatrix.\n   *\n   * @return The number of non-missing values\n   * @throws XGBoostError native error\n   */\n  public long nonMissingNum() throws XGBoostError {\n    long[] n = new long[1];\n    XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixNumNonMissing(handle, n));\n    return n[0];\n  }\n\n  /**\n   * save DMatrix to filePath\n   */\n  public void saveBinary(String filePath) {\n    XGBoostJNI.XGDMatrixSaveBinary(handle, filePath, 1);\n  }\n\n  /**\n   * Get the handle\n   */\n  public long getHandle() {\n    return handle;\n  }\n\n  @Override\n  protected void finalize() {\n    dispose();\n  }\n\n  public synchronized void dispose() {\n    if (handle != 0) {\n      XGBoostJNI.XGDMatrixFree(handle);\n      handle = 0;\n    }\n  }\n\n  /**\n   * sparse matrix type (CSR or CSC)\n   */\n  public enum SparseType {\n    CSR,\n    CSC\n  }\n\n  /**\n   * A class to hold the quantile information\n   */\n  public class QuantileCut {\n    // cut ptr\n    long[] indptr;\n    // cut values\n    float[] values;\n\n    QuantileCut(long[] indptr, float[] values) {\n      this.indptr = indptr;\n      this.values = values;\n    }\n\n    public long[] getIndptr() {\n      return indptr;\n    }\n\n    public float[] getValues() {\n      return values;\n    }\n  }\n\n  /**\n   * Get the Quantile Cut.\n   * @return QuantileCut\n   * @throws XGBoostError\n   */\n  public QuantileCut getQuantileCut() throws XGBoostError {\n    long[][] indptr = new long[1][];\n    float[][] values = new float[1][];\n    XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixGetQuantileCut(this.handle, indptr, values));\n    return new QuantileCut(indptr[0], values[0]);\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/DataBatch.java",
    "content": "package ml.dmlc.xgboost4j.java;\n\nimport java.util.ArrayList;\nimport java.util.Iterator;\nimport java.util.List;\n\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\n\nimport ml.dmlc.xgboost4j.LabeledPoint;\n\n/**\n * A mini-batch of data that can be converted to DMatrix.\n * The data is in sparse matrix CSR format.\n *\n * This class is used to support advanced creation of DMatrix from Iterator of DataBatch,\n */\nclass DataBatch {\n  private static final Log logger = LogFactory.getLog(DataBatch.class);\n  /** The offset of each rows in the sparse matrix */\n  final long[] rowOffset;\n  /** weight of each data point, can be null */\n  final float[] weight;\n  /** label of each data point, can be null */\n  final float[] label;\n  /** index of each feature(column) in the sparse matrix */\n  final int[] featureIndex;\n  /** value of each non-missing entry in the sparse matrix */\n  final float[] featureValue ;\n  /** feature columns */\n  final int featureCols;\n\n  DataBatch(long[] rowOffset, float[] weight, float[] label, int[] featureIndex,\n            float[] featureValue, int featureCols) {\n    this.rowOffset = rowOffset;\n    this.weight = weight;\n    this.label = label;\n    this.featureIndex = featureIndex;\n    this.featureValue = featureValue;\n    this.featureCols = featureCols;\n  }\n\n  static class BatchIterator implements Iterator<DataBatch> {\n    private final Iterator<LabeledPoint> base;\n    private final int batchSize;\n\n    BatchIterator(Iterator<LabeledPoint> base, int batchSize) {\n      this.base = base;\n      this.batchSize = batchSize;\n    }\n\n    @Override\n    public boolean hasNext() {\n      return base.hasNext();\n    }\n\n    @Override\n    public DataBatch next() {\n      try {\n        int numRows = 0;\n        int numElem = 0;\n        int numCol  = -1;\n        List<LabeledPoint> batch = new ArrayList<>(batchSize);\n        while (base.hasNext() && batch.size() < batchSize) {\n          LabeledPoint labeledPoint = base.next();\n          if (numCol == -1) {\n            numCol = labeledPoint.size();\n          } else if (numCol != labeledPoint.size()) {\n            throw new RuntimeException(\"Feature size is not the same\");\n          }\n          batch.add(labeledPoint);\n          numElem += labeledPoint.values().length;\n          numRows++;\n        }\n\n        long[] rowOffset = new long[numRows + 1];\n        float[] label = new float[numRows];\n        int[] featureIndex = new int[numElem];\n        float[] featureValue = new float[numElem];\n        float[] weight = new float[numRows];\n\n        int offset = 0;\n        for (int i = 0; i < batch.size(); i++) {\n          LabeledPoint labeledPoint = batch.get(i);\n          rowOffset[i] = offset;\n          label[i] = labeledPoint.label();\n          weight[i] = labeledPoint.weight();\n          if (labeledPoint.indices() != null) {\n            System.arraycopy(labeledPoint.indices(), 0, featureIndex, offset,\n                    labeledPoint.indices().length);\n          } else {\n            for (int j = 0; j < labeledPoint.values().length; j++) {\n              featureIndex[offset + j] = j;\n            }\n          }\n\n          System.arraycopy(labeledPoint.values(), 0, featureValue, offset,\n                  labeledPoint.values().length);\n          offset += labeledPoint.values().length;\n        }\n\n        rowOffset[batch.size()] = offset;\n        return new DataBatch(rowOffset, weight, label, featureIndex, featureValue, numCol);\n      } catch (RuntimeException runtimeError) {\n        logger.error(runtimeError);\n        return null;\n      }\n    }\n\n    @Override\n    public void remove() {\n      throw new UnsupportedOperationException(\"DataBatch.BatchIterator.remove\");\n    }\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/ExternalCheckpointManager.java",
    "content": "/*\n Copyright (c) 2014-2023 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java;\n\nimport java.io.IOException;\nimport java.io.InputStream;\nimport java.io.OutputStream;\nimport java.util.*;\nimport java.util.stream.Collectors;\n\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\nimport org.apache.hadoop.fs.FileSystem;\nimport org.apache.hadoop.fs.Path;\n\n/**\n * This class contains the methods that are required for managing the state of the training\n * process. The training state is stored in a distributed file system, that consists of\n * UBJ (Universal Binary JSON) model files.\n * The class provides methods for saving, loading and cleaning up checkpoints.\n */\npublic class ExternalCheckpointManager {\n\n  private Log logger = LogFactory.getLog(\"ExternalCheckpointManager\");\n  private String modelSuffix = \".ubj\";\n  private Path checkpointPath;  // directory for checkpoints\n  private FileSystem fs;\n\n  /**\n   * This constructor creates a new Expternal Checkpoint Manager at the specified path in the\n   * specified file system.\n   *\n   * @param checkpointPath The directory path where checkpoints will be stored.\n   * @param fs The file system to use for storing checkpoints.\n   * @throws XGBoostError the error that is thrown is the checkpoint path is null or empty.\n   */\n  public ExternalCheckpointManager(String checkpointPath, FileSystem fs) throws XGBoostError {\n    if (checkpointPath == null || checkpointPath.isEmpty()) {\n      throw new XGBoostError(\"cannot create ExternalCheckpointManager with null or\" +\n              \" empty checkpoint path\");\n    }\n    this.checkpointPath = new Path(checkpointPath);\n    this.fs = fs;\n  }\n\n  private String getPath(int version) {\n    return checkpointPath.toUri().getPath() + \"/\" + version + modelSuffix;\n  }\n\n  private List<Integer> getExistingVersions() throws IOException {\n    if (!fs.exists(checkpointPath)) {\n      return new ArrayList<>();\n    } else {\n      // Get integer versions from a list of checkpoint files.\n      return Arrays.stream(fs.listStatus(checkpointPath))\n              .map(path -> path.getPath().getName())\n              .filter(fileName -> fileName.endsWith(modelSuffix))\n              .map(fileName -> Integer.valueOf(\n                      fileName.substring(0, fileName.length() - modelSuffix.length())))\n              .collect(Collectors.toList());\n    }\n  }\n\n  private Integer latest(List<Integer> versions) {\n    return versions.stream()\n        .max(Comparator.comparing(Integer::valueOf)).get();\n  }\n\n  /**\n   * This method cleans all the directories and files that are present in the checkpoint path.\n   * @throws IOException exception that is thrown when there is an error deleting the\n   * checkpoint path.\n   */\n  public void cleanPath() throws IOException {\n    fs.delete(checkpointPath, true);\n  }\n\n  /**\n   * Read the checkpoint from the checkpoint path. Once the checkpoint path is read, we get\n   * the latest version of the checkpoint from all the checkpoint versions and lead it\n   * into the booster for the purpose of making predictions.\n   *\n   * @return The booster object that is used for making predictions.\n   * @throws IOException Any expection that occurs when reading the checkpoint path.\n   * @throws XGBoostError Any exception that occurs when loading the model into the booster.\n   */\n  public Booster loadCheckpointAsBooster() throws IOException, XGBoostError {\n    List<Integer> versions = getExistingVersions();\n    if (versions.size() > 0) {\n      int latestVersion = this.latest(versions);\n      String checkpointPath = getPath(latestVersion);\n      InputStream in = fs.open(new Path(checkpointPath));\n      logger.info(\"loaded checkpoint from \" + checkpointPath);\n      Booster booster = XGBoost.loadModel(in);\n      return booster;\n    } else {\n      return null;\n    }\n  }\n\n  /**\n   * This method updates the booster checkpoint to the the latest or current\n   * version and deleted all the previous versions of the checkpoint.\n   * @param boosterToCheckpoint The booster object that is to be checkpointed and\n   *                            saved as a model file.\n   * @throws IOException Any exception that occurs when writing the model file to the\n   * checkpoint path.\n   * @throws XGBoostError Any exception that occurs when saving the model from the booster.\n   */\n  public void updateCheckpoint(Booster boosterToCheckpoint) throws IOException, XGBoostError {\n    List<String> prevModelPaths = getExistingVersions().stream()\n        .map(this::getPath).collect(Collectors.toList());\n    // checkpointing is done after update, so n_rounds - 1 is the current iteration\n    // accounting for training continuation.\n    Integer iter = boosterToCheckpoint.getNumBoostedRound() - 1;\n    String eventualPath = getPath(iter);\n    String tempPath = eventualPath + \"-\" + UUID.randomUUID();\n    try (OutputStream out = fs.create(new Path(tempPath), true)) {\n      boosterToCheckpoint.saveModel(out);\n      fs.rename(new Path(tempPath), new Path(eventualPath));\n      logger.info(\"saving checkpoint with version \" + iter);\n      prevModelPaths.stream().forEach(path -> {\n        try {\n          fs.delete(new Path(path), true);\n        } catch (IOException e) {\n          logger.error(\"failed to delete outdated checkpoint at \" + path, e);\n        }\n      });\n    }\n  }\n\n  /**\n   * This method cleans up all the checkpoint versions that are higher than the current round.\n   * This is useful when multiple training instances are running and we want to make sure that\n   * only the checkpoints from the current training instance are retained.\n   * @param currentRound The current round of training.\n   * @throws IOException Any exception that occurs when deleting the checkpoint files.\n   */\n  public void cleanUpHigherVersions(int currentRound) throws IOException {\n    getExistingVersions().stream().filter(v -> v > currentRound).forEach(v -> {\n      try {\n        fs.delete(new Path(getPath(v)), true);\n      } catch (IOException e) {\n        logger.error(\"failed to clean checkpoint from other training instance\", e);\n      }\n    });\n  }\n\n  /**\n   * Get a list of iterations that need checkpointing.\n   * @param firstRound The first round of training.\n   * @param checkpointInterval The interval at which checkpoints are to be saved.\n   * @param numOfRounds The number of rounds to be trained.\n   * @return A list of integer rounds that need checkpointing.\n   * @throws IOException Any exception that occurs when getting the list of rounds.\n   */\n  public List<Integer> getCheckpointRounds(\n      int firstRound, int checkpointInterval, int numOfRounds)\n      throws IOException {\n    int end = firstRound + numOfRounds; // exclusive\n    int lastRound = end - 1;\n    if (end - 1 < 0) {\n      throw new IllegalArgumentException(\"Inavlid `numOfRounds`.\");\n    }\n\n    List<Integer> arr = new ArrayList<>();\n    if (checkpointInterval > 0) {\n      for (int i = firstRound; i < end; i += checkpointInterval) {\n        arr.add(i);\n      }\n    }\n\n    if (!arr.contains(lastRound)) {\n      arr.add(lastRound);\n    }\n    return arr;\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/IEvaluation.java",
    "content": "/*\n Copyright (c) 2014 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java;\n\nimport java.io.Serializable;\n\n/**\n * interface for customized evaluation\n *\n * @author hzx\n */\npublic interface IEvaluation extends Serializable {\n  /**\n   * get evaluate metric\n   *\n   * @return evalMetric\n   */\n  String getMetric();\n\n  /**\n   * evaluate with predicts and data\n   *\n   * @param predicts predictions as array\n   * @param dmat     data matrix to evaluate\n   * @return result of the metric\n   */\n  float eval(float[][] predicts, DMatrix dmat);\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/IObjective.java",
    "content": "/*\n Copyright (c) 2014 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java;\n\nimport java.io.Serializable;\nimport java.util.List;\n\n/**\n * interface for customize Object function\n *\n * @author hzx\n */\npublic interface IObjective extends Serializable {\n  /**\n   * user define objective function, return gradient and second order gradient\n   *\n   * @param predicts untransformed margin predicts\n   * @param dtrain   training data\n   * @return List with two float array, correspond to first order grad and second order grad\n   */\n  List<float[]> getGradient(float[][] predicts, DMatrix dtrain);\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/ITracker.java",
    "content": "package ml.dmlc.xgboost4j.java;\n\nimport java.util.Map;\n\n/**\n * Interface for a tracker implementations with three public methods:\n *\n *  - start(timeout): Start the tracker awaiting for worker connections, with a given\n *  timeout value (in seconds).\n *  - getWorkerArgs(): Return the arguments needed to initialize Rabit clients.\n *  - waitFor(timeout): Wait for the task execution by the worker nodes for at most `timeout`\n *  milliseconds.\n *\n * Each implementation is expected to implement a callback function\n *\n *    public void uncaughtException(Threat t, Throwable e) { ... }\n *\n * to interrupt waitFor() in order to prevent the tracker from hanging indefinitely.\n *\n * The Rabit tracker handles connections from distributed workers, assigns ranks to workers, and\n * brokers connections between workers.\n */\npublic interface ITracker extends Thread.UncaughtExceptionHandler {\n\n  Map<String, Object> getWorkerArgs() throws XGBoostError;\n\n  boolean start() throws XGBoostError;\n\n  void stop() throws XGBoostError;\n\n  void waitFor(long taskExecutionTimeout) throws XGBoostError;\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/NativeLibLoader.java",
    "content": "/*\n Copyright (c) 2014, 2021 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java;\n\nimport java.io.File;\nimport java.io.FileNotFoundException;\nimport java.io.FileOutputStream;\nimport java.io.IOException;\nimport java.io.InputStream;\nimport java.io.OutputStream;\nimport java.util.Locale;\n\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\n\nimport static ml.dmlc.xgboost4j.java.NativeLibLoader.LibraryPathProvider.getLibraryPathFor;\nimport static ml.dmlc.xgboost4j.java.NativeLibLoader.LibraryPathProvider.getPropertyNameForLibrary;\n\n/**\n * class to load native library\n *\n * @author hzx\n */\nclass NativeLibLoader {\n  private static final Log logger = LogFactory.getLog(NativeLibLoader.class);\n\n  /**\n   * Supported OS enum.\n   */\n  enum OS {\n    WINDOWS(\"windows\"),\n    MACOS(\"macos\"),\n    LINUX(\"linux\"),\n    SOLARIS(\"solaris\");\n\n    final String name;\n\n    OS(String name) {\n      this.name = name;\n    }\n\n    /**\n     * Detects the OS using the system properties.\n     * Throws IllegalStateException if the OS is not recognized.\n     *\n     * @return The OS.\n     */\n    static OS detectOS() {\n      String os = System.getProperty(\"os.name\", \"generic\").toLowerCase(Locale.ENGLISH);\n      if (os.contains(\"mac\") || os.contains(\"darwin\")) {\n        return MACOS;\n      } else if (os.contains(\"win\")) {\n        return WINDOWS;\n      } else if (os.contains(\"nux\")) {\n        return LINUX;\n      } else if (os.contains(\"sunos\")) {\n        return SOLARIS;\n      } else {\n        throw new IllegalStateException(\"Unsupported OS:\" + os);\n      }\n    }\n\n  }\n\n  /**\n   * Supported architecture enum.\n   */\n  enum Arch {\n    X86_64(\"x86_64\"),\n    AARCH64(\"aarch64\"),\n    SPARC(\"sparc\");\n\n    final String name;\n\n    Arch(String name) {\n      this.name = name;\n    }\n\n    /**\n     * Detects the chip architecture using the system properties.\n     * Throws IllegalStateException if the architecture is not recognized.\n     * @return The architecture.\n     */\n    static Arch detectArch() {\n      String arch = System.getProperty(\"os.arch\", \"generic\").toLowerCase(Locale.ENGLISH);\n      if (arch.startsWith(\"amd64\") || arch.startsWith(\"x86_64\")) {\n        return X86_64;\n      } else if (arch.startsWith(\"aarch64\") || arch.startsWith(\"arm64\")) {\n        return AARCH64;\n      } else if (arch.startsWith(\"sparc\")) {\n        return SPARC;\n      } else {\n        throw new IllegalStateException(\"Unsupported architecture:\" + arch);\n      }\n    }\n  }\n\n  /**\n   * Utility class to determine the path of a native library.\n   */\n  static class LibraryPathProvider {\n\n    private static final String nativeResourcePath = \"/lib\";\n    private static final String customNativeLibraryPathPropertyPrefix = \"xgboostruntime.native.\";\n\n    static String getPropertyNameForLibrary(String libName) {\n      return customNativeLibraryPathPropertyPrefix + libName;\n    }\n\n    /**\n     * If a library-specific system property is set, this value is\n     * being used without further processing.\n     * Otherwise, the library path depends on the OS and architecture.\n     *\n     * @return path of the native library\n     */\n    static String getLibraryPathFor(OS os, Arch arch, String libName) {\n\n      String libraryPath = System.getProperty(getPropertyNameForLibrary(libName));\n\n      if (libraryPath == null) {\n        libraryPath = nativeResourcePath + \"/\" +\n                getPlatformFor(os, arch) + \"/\" +\n                System.mapLibraryName(libName);\n      }\n\n      logger.debug(\"Using path \" + libraryPath + \" for library with name \" + libName);\n\n      return libraryPath;\n    }\n\n  }\n\n  private static boolean initialized = false;\n  private static final String[] libNames = new String[]{\"xgboost4j\"};\n\n  /**\n   * Loads the XGBoost library.\n   * <p>\n   * Throws IllegalStateException if the architecture or OS is unsupported.\n   * <ul>\n   *   <li>Supported OS: macOS, Windows, Linux, Solaris.</li>\n   *   <li>Supported Architectures: x86_64, aarch64, sparc.</li>\n   * </ul>\n   * Throws UnsatisfiedLinkError if the library failed to load its dependencies.\n   * @throws IOException If the library could not be extracted from the jar.\n   */\n  static synchronized void initXGBoost() throws IOException {\n    if (!initialized) {\n      OS os = OS.detectOS();\n      Arch arch = Arch.detectArch();\n      for (String libName : libNames) {\n        try {\n          String libraryPathInJar = getLibraryPathFor(os, arch, libName);\n          loadLibraryFromJar(libraryPathInJar);\n        } catch (UnsatisfiedLinkError ule) {\n          String failureMessageIncludingOpenMPHint = \"Failed to load \" + libName + \" \" +\n              \"due to missing native dependencies for \" +\n              \"platform \" + getPlatformFor(os, arch) + \", \" +\n              \"this is likely due to a missing OpenMP dependency\";\n\n          switch (os) {\n            case WINDOWS:\n              logger.error(failureMessageIncludingOpenMPHint);\n              logger.error(\"You may need to install 'vcomp140.dll' or 'libgomp-1.dll'\");\n              break;\n            case MACOS:\n              logger.error(failureMessageIncludingOpenMPHint);\n              logger.error(\"You may need to install 'libomp.dylib', via `brew install libomp` \" +\n                  \"or similar\");\n              break;\n            case LINUX:\n              logger.error(failureMessageIncludingOpenMPHint);\n              logger.error(\"You may need to install 'libgomp.so' (or glibc) via your package \" +\n                  \"manager.\");\n              logger.error(\"Alternatively, if your Linux OS is musl-based, you should set \" +\n                      \"the path for the native library \" + libName + \" \" +\n                      \"via the system property \" + getPropertyNameForLibrary(libName));\n              break;\n            case SOLARIS:\n              logger.error(failureMessageIncludingOpenMPHint);\n              logger.error(\"You may need to install 'libgomp.so' (or glibc) via your package \" +\n                  \"manager.\");\n              break;\n          }\n          throw ule;\n        } catch (IOException ioe) {\n          logger.error(\"Failed to load \" + libName + \" library from jar for platform \" +\n                  getPlatformFor(os, arch));\n          throw ioe;\n        }\n      }\n      initialized = true;\n    }\n  }\n\n  /**\n   * Loads library from current JAR archive\n   * <p/>\n   * The file from JAR is copied into system temporary directory and then loaded.\n   * The temporary file is deleted after exiting.\n   * Method uses String as filename because the pathname is \"abstract\", not system-dependent.\n   * <p/>\n   * The restrictions of {@link File#createTempFile(java.lang.String, java.lang.String)} apply to\n   * {@code path}.\n   *\n   * @param path The filename inside JAR as absolute path (beginning with '/'),\n   *             e.g. /package/File.ext\n   * @throws IOException              If temporary file creation or read/write operation fails\n   * @throws IllegalArgumentException If source file (param path) does not exist\n   * @throws IllegalArgumentException If the path is not absolute or if the filename is shorter than\n   * three characters\n   */\n  private static void loadLibraryFromJar(String path) throws IOException, IllegalArgumentException {\n    String temp = createTempFileFromResource(path);\n    System.load(temp);\n  }\n\n  /**\n   * Create a temp file that copies the resource from current JAR archive\n   * <p/>\n   * The file from JAR is copied into system temp file.\n   * The temporary file is deleted after exiting.\n   * Method uses String as filename because the pathname is \"abstract\", not system-dependent.\n   * <p/>\n   * The restrictions of {@link File#createTempFile(java.lang.String, java.lang.String)} apply to\n   * {@code path}.\n   * @param path Path to the resources in the jar\n   * @return The created temp file.\n   * @throws IOException If it failed to read the file.\n   * @throws IllegalArgumentException If the filename is invalid.\n   */\n  static String createTempFileFromResource(String path) throws\n          IOException, IllegalArgumentException {\n    // Obtain filename from path\n    if (!path.startsWith(\"/\")) {\n      throw new IllegalArgumentException(\"The path has to be absolute (start with '/').\");\n    }\n\n    String[] parts = path.split(\"/\");\n    String filename = (parts.length > 1) ? parts[parts.length - 1] : null;\n\n    // Split filename to prefix and suffix (extension)\n    String prefix = \"\";\n    String suffix = null;\n    if (filename != null) {\n      parts = filename.split(\"\\\\.\", 2);\n      prefix = parts[0];\n      suffix = (parts.length > 1) ? \".\" + parts[parts.length - 1] : null; // Thanks, davs! :-)\n    }\n\n    // Check if the filename is okay\n    if (filename == null || prefix.length() < 3) {\n      throw new IllegalArgumentException(\"The filename has to be at least 3 characters long.\");\n    }\n    // Prepare temporary file\n    File temp = File.createTempFile(prefix, suffix);\n    temp.deleteOnExit();\n\n    if (!temp.exists()) {\n      throw new FileNotFoundException(\"File \" + temp.getAbsolutePath() + \" does not exist.\");\n    }\n\n    // Prepare buffer for data copying\n    byte[] buffer = new byte[1024];\n    int readBytes;\n\n    // Open and check input stream\n    try (InputStream is = NativeLibLoader.class.getResourceAsStream(path);\n         OutputStream os = new FileOutputStream(temp)) {\n      if (is == null) {\n        throw new FileNotFoundException(\"File \" + path + \" was not found inside JAR.\");\n      }\n\n      // Open output stream and copy data between source file in JAR and the temporary file\n      while ((readBytes = is.read(buffer)) != -1) {\n        os.write(buffer, 0, readBytes);\n      }\n    }\n\n    return temp.getAbsolutePath();\n  }\n\n  private static String getPlatformFor(OS os, Arch arch) {\n    return os.name + \"/\" + arch.name;\n  }\n\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/RabitTracker.java",
    "content": "/*\n Copyright (c) 2014-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.java;\n\nimport java.util.Map;\n\nimport com.fasterxml.jackson.core.JsonProcessingException;\nimport com.fasterxml.jackson.core.type.TypeReference;\nimport com.fasterxml.jackson.databind.ObjectMapper;\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\n\n/**\n * Java implementation of the Rabit tracker to coordinate distributed workers.\n */\npublic class RabitTracker implements ITracker {\n  // Maybe per tracker logger?\n  private static final Log logger = LogFactory.getLog(RabitTracker.class);\n  private long handle = 0;\n  private Thread trackerDaemon;\n\n  public RabitTracker(int numWorkers) throws XGBoostError {\n    this(numWorkers, \"\");\n  }\n\n  public RabitTracker(int numWorkers, String hostIp)\n      throws XGBoostError {\n    this(numWorkers, hostIp, 0, 300);\n  }\n  public RabitTracker(int numWorkers, String hostIp, int port, int timeout) throws XGBoostError {\n    if (numWorkers < 1) {\n      throw new XGBoostError(\"numWorkers must be greater equal to one\");\n    }\n\n    long[] out = new long[1];\n    XGBoostJNI.checkCall(XGBoostJNI.TrackerCreate(hostIp, numWorkers, port, 0, timeout, out));\n    this.handle = out[0];\n  }\n\n  public void uncaughtException(Thread t, Throwable e) {\n    logger.error(\"Uncaught exception thrown by worker:\", e);\n    try {\n      Thread.sleep(5000L);\n    } catch (InterruptedException ex) {\n      logger.error(ex);\n    } finally {\n      this.trackerDaemon.interrupt();\n    }\n  }\n\n  /**\n   * Get environments that can be used to pass to worker.\n   * @return The environment settings.\n   */\n  public Map<String, Object> getWorkerArgs() throws XGBoostError {\n    // fixme: timeout\n    String[] args = new String[1];\n    XGBoostJNI.checkCall(XGBoostJNI.TrackerWorkerArgs(this.handle, 0, args));\n    ObjectMapper mapper = new ObjectMapper();\n    Map<String, Object> config;\n    try {\n      config = mapper.readValue(args[0], new TypeReference<Map<String, Object>>() {});\n    } catch (JsonProcessingException ex) {\n      throw new XGBoostError(\"Failed to get worker arguments.\", ex);\n    }\n    return config;\n  }\n\n  public void stop() throws XGBoostError {\n    XGBoostJNI.checkCall(XGBoostJNI.TrackerFree(this.handle));\n  }\n\n  public boolean start() throws XGBoostError {\n    XGBoostJNI.checkCall(XGBoostJNI.TrackerRun(this.handle));\n    this.trackerDaemon = new Thread(() -> {\n      try {\n        waitFor(0);\n      } catch (Exception ex) {\n        logger.error(ex);\n        return; // exit the thread\n      }\n    });\n    this.trackerDaemon.setDaemon(true);\n    this.trackerDaemon.start();\n\n    return this.trackerDaemon.isAlive();\n  }\n\n  public void waitFor(long timeout) throws XGBoostError {\n    XGBoostJNI.checkCall(XGBoostJNI.TrackerWaitFor(this.handle, timeout));\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java",
    "content": "/*\n Copyright (c) 2014-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java;\n\nimport java.io.*;\nimport java.util.*;\nimport java.util.regex.Matcher;\nimport java.util.regex.Pattern;\n\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\nimport org.apache.hadoop.fs.FileSystem;\n\n/**\n * trainer for xgboost\n *\n * @author hzx\n */\npublic class XGBoost {\n  private static final Log logger = LogFactory.getLog(XGBoost.class);\n\n  public static final String[] MAXIMIZ_METRICES = {\n    \"auc\", \"aucpr\", \"pre\", \"pre@\", \"map\", \"ndcg\",\n    \"auc@\", \"aucpr@\", \"map@\", \"ndcg@\",\n  };\n\n  /**\n   * load model from modelPath\n   *\n   * @param modelPath booster modelPath (model generated by booster.saveModel)\n   * @throws XGBoostError native error\n   */\n  public static Booster loadModel(String modelPath)\n          throws XGBoostError {\n    return Booster.loadModel(modelPath);\n  }\n\n  /**\n   * Load a new Booster model from a file opened as input stream.\n   * The assumption is the input stream only contains one XGBoost Model.\n   * This can be used to load existing booster models saved by other xgboost bindings.\n   *\n   * @param in The input stream of the file,\n   *           will be closed after this function call.\n   * @return The create boosted\n   * @throws XGBoostError\n   * @throws IOException\n   */\n  public static Booster loadModel(InputStream in) throws XGBoostError, IOException {\n    int size;\n    byte[] buf = new byte[1<<20];\n    ByteArrayOutputStream os = new ByteArrayOutputStream();\n    while ((size = in.read(buf)) != -1) {\n      os.write(buf, 0, size);\n    }\n    in.close();\n    return Booster.loadModel(os.toByteArray());\n  }\n\n  /**\n   * Load a new Booster model from a byte array buffer.\n   * The assumption is the array only contains one XGBoost Model.\n   * This can be used to load existing booster models saved by other xgboost bindings.\n   *\n   * @param buffer The byte contents of the booster.\n   * @return The create boosted\n   * @throws XGBoostError\n   */\n  public static Booster loadModel(byte[] buffer) throws XGBoostError, IOException {\n    return Booster.loadModel(buffer);\n  }\n\n  /**\n   * Train a booster given parameters.\n   *\n   * @param dtrain  Data to be trained.\n   * @param params  Parameters.\n   * @param round   Number of boosting iterations.\n   * @param watches a group of items to be evaluated during training, this allows user to watch\n   *                performance on the validation set.\n   * @param obj     customized objective\n   * @param eval    customized evaluation\n   * @return The trained booster.\n   */\n  public static Booster train(\n          DMatrix dtrain,\n          Map<String, Object> params,\n          int round,\n          Map<String, DMatrix> watches,\n          IObjective obj,\n          IEvaluation eval) throws XGBoostError {\n    return train(dtrain, params, round, watches, null, obj, eval, 0);\n  }\n\n  /**\n   * Train a booster given parameters.\n   *\n   * @param dtrain  Data to be trained.\n   * @param params  Parameters.\n   * @param round   Number of boosting iterations.\n   * @param watches a group of items to be evaluated during training, this allows user to watch\n   *                performance on the validation set.\n   * @param metrics array containing the evaluation metrics for each matrix in watches for each\n   *                iteration\n   * @param earlyStoppingRound if non-zero, training would be stopped\n   *                           after a specified number of consecutive\n   *                           increases in any evaluation metric.\n   * @param obj     customized objective\n   * @param eval    customized evaluation\n   * @return The trained booster.\n   */\n  public static Booster train(\n          DMatrix dtrain,\n          Map<String, Object> params,\n          int round,\n          Map<String, DMatrix> watches,\n          float[][] metrics,\n          IObjective obj,\n          IEvaluation eval,\n          int earlyStoppingRound) throws XGBoostError {\n    return train(dtrain, params, round, watches, metrics, obj, eval, earlyStoppingRound, null);\n  }\n  // save checkpoint if iter is in checkpointIterations\n  private static void saveCheckpoint(\n          Booster booster,\n          int iter,\n          Set<Integer> checkpointIterations,\n          ExternalCheckpointManager ecm) throws XGBoostError {\n    try {\n      if (checkpointIterations.contains(iter)) {\n        ecm.updateCheckpoint(booster);\n      }\n    } catch (Exception e) {\n      logger.error(\"failed to save checkpoint in XGBoost4J at iteration \" + iter, e);\n      throw new XGBoostError(\"failed to save checkpoint in XGBoost4J at iteration\" + iter, e);\n    }\n  }\n\n  public static Booster trainAndSaveCheckpoint(\n      DMatrix dtrain,\n      Map<String, Object> params,\n      int numRounds,\n      Map<String, DMatrix> watches,\n      float[][] metrics,\n      IObjective obj,\n      IEvaluation eval,\n      int earlyStoppingRounds,\n      Booster booster,\n      int checkpointInterval,\n      String checkpointPath,\n      FileSystem fs) throws XGBoostError, IOException {\n    //collect eval matrixs\n    String[] evalNames;\n    DMatrix[] evalMats;\n    float bestScore = 1;\n    int bestIteration;\n    List<String> names = new ArrayList<String>();\n    List<DMatrix> mats = new ArrayList<DMatrix>();\n    ExternalCheckpointManager ecm = null;\n    if (checkpointPath != null) {\n      ecm = new ExternalCheckpointManager(checkpointPath, fs);\n    }\n\n    for (Map.Entry<String, DMatrix> evalEntry : watches.entrySet()) {\n      names.add(evalEntry.getKey());\n      mats.add(evalEntry.getValue());\n    }\n\n    evalNames = names.toArray(new String[names.size()]);\n    evalMats = mats.toArray(new DMatrix[mats.size()]);\n\n    bestIteration = 0;\n    metrics = metrics == null ? new float[evalNames.length][numRounds] : metrics;\n\n    //collect all data matrixs\n    DMatrix[] allMats;\n    if (evalMats.length > 0) {\n      allMats = new DMatrix[evalMats.length + 1];\n      allMats[0] = dtrain;\n      System.arraycopy(evalMats, 0, allMats, 1, evalMats.length);\n    } else {\n      allMats = new DMatrix[1];\n      allMats[0] = dtrain;\n    }\n\n    //initialize booster\n    if (booster == null) {\n      // Start training on a new booster\n      booster = new Booster(params, allMats);\n      booster.setFeatureNames(dtrain.getFeatureNames());\n      booster.setFeatureTypes(dtrain.getFeatureTypes());\n    } else {\n      // Start training on an existing booster\n      booster.setParams(params);\n    }\n\n    Set<Integer> checkpointIterations = new HashSet<>();\n    if (ecm != null) {\n      checkpointIterations = new HashSet<>(\n          ecm.getCheckpointRounds(booster.getNumBoostedRound(), checkpointInterval, numRounds));\n    }\n\n    boolean initial_best_score_flag = false;\n    boolean max_direction = false;\n\n    // begin to train\n    for (int iter = 0; iter < numRounds; iter++) {\n      if (obj != null) {\n        booster.update(dtrain, iter, obj);\n      } else {\n        booster.update(dtrain, iter);\n      }\n      saveCheckpoint(booster, iter, checkpointIterations, ecm);\n\n      // evaluation\n      if (evalMats.length > 0) {\n        float[] metricsOut = new float[evalMats.length];\n        String evalInfo;\n        if (eval != null) {\n          evalInfo = booster.evalSet(evalMats, evalNames, eval, metricsOut);\n        } else {\n          evalInfo = booster.evalSet(evalMats, evalNames, iter, metricsOut);\n        }\n\n        if (!initial_best_score_flag) {\n          if (isMaximizeEvaluation(evalInfo, evalNames, params)) {\n            max_direction = true;\n            bestScore = -Float.MAX_VALUE;\n          } else {\n            max_direction = false;\n            bestScore = Float.MAX_VALUE;\n          }\n          initial_best_score_flag = true;\n        }\n\n        for (int i = 0; i < metricsOut.length; i++) {\n          metrics[i][iter] = metricsOut[i];\n        }\n\n        // If there is more than one evaluation datasets, the last one would be used\n        // to determinate early stop.\n        float score = metricsOut[metricsOut.length - 1];\n        if (max_direction) {\n          // Update best score if the current score is better (no update when equal)\n          if (score > bestScore) {\n            bestScore = score;\n            bestIteration = iter;\n            booster.setAttr(\"best_iteration\", String.valueOf(bestIteration));\n            booster.setAttr(\"best_score\", String.valueOf(bestScore));\n          }\n        } else {\n          if (score < bestScore) {\n            bestScore = score;\n            bestIteration = iter;\n            booster.setAttr(\"best_iteration\", String.valueOf(bestIteration));\n            booster.setAttr(\"best_score\", String.valueOf(bestScore));\n          }\n        }\n        if (shouldEarlyStop(earlyStoppingRounds, iter, bestIteration)) {\n          if (shouldPrint(params, iter)) {\n            Communicator.communicatorPrint(String.format(\n                \"early stopping after %d rounds away from the best iteration\",\n                earlyStoppingRounds\n            ));\n          }\n          break;\n        }\n        if (Communicator.getRank() == 0 && shouldPrint(params, iter)) {\n          Communicator.communicatorPrint(evalInfo + '\\n');\n        }\n      }\n    }\n    return booster;\n  }\n\n  /**\n   * Train a booster given parameters.\n   *\n   * @param dtrain  Data to be trained.\n   * @param params  Parameters.\n   * @param round   Number of boosting iterations.\n   * @param watches a group of items to be evaluated during training, this allows user to watch\n   *                performance on the validation set.\n   * @param metrics array containing the evaluation metrics for each matrix in watches for each\n   *                iteration\n   * @param earlyStoppingRounds if non-zero, training would be stopped\n   *                           after a specified number of consecutive\n   *                           goes to the unexpected direction in any evaluation metric.\n   * @param obj     customized objective\n   * @param eval    customized evaluation\n   * @param booster train from scratch if set to null; train from an existing booster if not null.\n   * @return The trained booster.\n   */\n  public static Booster train(\n          DMatrix dtrain,\n          Map<String, Object> params,\n          int round,\n          Map<String, DMatrix> watches,\n          float[][] metrics,\n          IObjective obj,\n          IEvaluation eval,\n          int earlyStoppingRounds,\n          Booster booster) throws XGBoostError {\n    try {\n      return trainAndSaveCheckpoint(dtrain, params, round, watches, metrics, obj, eval,\n              earlyStoppingRounds, booster,\n              -1, null, null);\n    } catch (IOException e) {\n      logger.error(\"training failed in xgboost4j\", e);\n      throw new XGBoostError(\"training failed in xgboost4j \", e);\n    }\n  }\n\n  private static Integer tryGetIntFromObject(Object o) {\n    if (o instanceof Integer) {\n      return (int)o;\n    } else if (o instanceof String) {\n      try {\n        return Integer.parseInt((String)o);\n      } catch (NumberFormatException e) {\n        return null;\n      }\n    } else {\n      return null;\n    }\n  }\n\n  private static boolean shouldPrint(Map<String, Object> params, int iter) {\n    Object silent = params.get(\"silent\");\n    Integer silentInt = tryGetIntFromObject(silent);\n    if (silent != null) {\n      if (silent.equals(\"true\") || silent.equals(\"True\")\n              || (silentInt != null && silentInt != 0)) {\n        return false;  // \"silent\" will stop printing, otherwise go look at \"verbose_eval\"\n      }\n    }\n\n    Object verboseEval = params.get(\"verbose_eval\");\n    Integer verboseEvalInt = tryGetIntFromObject(verboseEval);\n    if (verboseEval == null) {\n      return true; // Default to printing evalInfo\n    } else if (verboseEval.equals(\"false\") || verboseEval.equals(\"False\")) {\n      return false;\n    } else if (verboseEvalInt != null) {\n      if (verboseEvalInt == 0) {\n        return false;\n      } else {\n        return iter % verboseEvalInt == 0;\n      }\n    } else {\n      return true; // Don't understand the option, default to printing\n    }\n  }\n\n  static boolean shouldEarlyStop(int earlyStoppingRounds, int iter, int bestIteration) {\n    if (earlyStoppingRounds <= 0) {\n      return false;\n    }\n    return iter - bestIteration >= earlyStoppingRounds;\n  }\n\n  private static String getMetricNameFromlog(String evalInfo, String[] evalNames) {\n    String regexPattern = Pattern.quote(evalNames[0]) + \"-(.*):\";\n    Pattern pattern = Pattern.compile(regexPattern);\n    Matcher matcher = pattern.matcher(evalInfo);\n\n    String metricName = null;\n    if (matcher.find()) {\n      metricName = matcher.group(1);\n      logger.debug(\"Got the metric name: \" + metricName);\n    }\n    return metricName;\n  }\n\n  // visiable for testing\n\n  /**\n   * Decides whether the evaluation metrics are to be maximized or not.\n   *\n   * @param evalInfo The evaluation log string from which the metric name is inferred.\n   * @param evalNames The names of the evaluation matrices.\n   * @param params The parameters that contain information regarding whether the\n   *  evaluation metrics are to be maximized or not.\n   * @return True if the evaluation metrics are to be maximized, false otherwise.\n   */\n  public static boolean isMaximizeEvaluation(String evalInfo,\n                                             String[] evalNames,\n                                             Map<String, Object> params) {\n\n    String metricName;\n\n    if (params.get(\"maximize_evaluation_metrics\") != null) {\n      // user has forced the direction no matter what is the metric name.\n      String maximize = String.valueOf(params.get(\"maximize_evaluation_metrics\"));\n      return Boolean.valueOf(maximize);\n    }\n\n    if (params.get(\"eval_metric\") != null) {\n      // user has special metric name\n      metricName = String.valueOf(params.get(\"eval_metric\"));\n    } else {\n      // infer the metric name from log\n      metricName = getMetricNameFromlog(evalInfo, evalNames);\n    }\n\n    assert metricName != null;\n\n    if (!\"mape\".equals(metricName)) {\n      for (String x : MAXIMIZ_METRICES) {\n        if (metricName.startsWith(x)) {\n          return true;\n        }\n      }\n    }\n    return false;\n  }\n\n  /**\n   * Cross-validation with given parameters.\n   *\n   * @param data    Data to be trained.\n   * @param params  Booster params.\n   * @param round   Number of boosting iterations.\n   * @param nfold   Number of folds in CV.\n   * @param metrics Evaluation metrics to be watched in CV.\n   * @param obj     customized objective (set to null if not used)\n   * @param eval    customized evaluation (set to null if not used)\n   * @return evaluation history\n   * @throws XGBoostError native error\n   */\n  public static String[] crossValidation(\n      DMatrix data,\n      Map<String, Object> params,\n      int round,\n      int nfold,\n      String[] metrics,\n      IObjective obj,\n      IEvaluation eval) throws XGBoostError {\n    CVPack[] cvPacks = makeNFold(data, nfold, params, metrics);\n    String[] evalHist = new String[round];\n    String[] results = new String[cvPacks.length];\n    for (int i = 0; i < round; i++) {\n      for (CVPack cvPack : cvPacks) {\n        if (obj != null) {\n          cvPack.update(obj);\n        } else {\n          cvPack.update(i);\n        }\n      }\n\n      for (int j = 0; j < cvPacks.length; j++) {\n        if (eval != null) {\n          results[j] = cvPacks[j].eval(eval);\n        } else {\n          results[j] = cvPacks[j].eval(i);\n        }\n      }\n\n      evalHist[i] = aggCVResults(results);\n      logger.info(evalHist[i]);\n    }\n    return evalHist;\n  }\n\n  /**\n   * make an n-fold array of CVPack from random indices\n   *\n   * @param data        original data\n   * @param nfold       num of folds\n   * @param params      booster parameters\n   * @param evalMetrics Evaluation metrics\n   * @return CV package array\n   * @throws XGBoostError native error\n   */\n  private static CVPack[] makeNFold(DMatrix data, int nfold, Map<String, Object> params,\n                                    String[] evalMetrics) throws XGBoostError {\n    List<Integer> samples = genRandPermutationNums(0, (int) data.rowNum());\n    int step = samples.size() / nfold;\n    int[] testSlice = new int[step];\n    int[] trainSlice = new int[samples.size() - step];\n    int testid, trainid;\n    CVPack[] cvPacks = new CVPack[nfold];\n    for (int i = 0; i < nfold; i++) {\n      testid = 0;\n      trainid = 0;\n      for (int j = 0; j < samples.size(); j++) {\n        if (j > (i * step) && j < (i * step + step) && testid < step) {\n          testSlice[testid] = samples.get(j);\n          testid++;\n        } else {\n          if (trainid < samples.size() - step) {\n            trainSlice[trainid] = samples.get(j);\n            trainid++;\n          } else {\n            testSlice[testid] = samples.get(j);\n            testid++;\n          }\n        }\n      }\n\n      DMatrix dtrain = data.slice(trainSlice);\n      DMatrix dtest = data.slice(testSlice);\n      CVPack cvPack = new CVPack(dtrain, dtest, params);\n      //set eval types\n      if (evalMetrics != null) {\n        for (String type : evalMetrics) {\n          cvPack.booster.setParam(\"eval_metric\", type);\n        }\n      }\n      cvPacks[i] = cvPack;\n    }\n\n    return cvPacks;\n  }\n\n  private static List<Integer> genRandPermutationNums(int start, int end) {\n    List<Integer> samples = new ArrayList<Integer>();\n    for (int i = start; i < end; i++) {\n      samples.add(i);\n    }\n    Collections.shuffle(samples);\n    return samples;\n  }\n\n  /**\n   * Aggregate cross-validation results.\n   *\n   * @param results eval info from each data sample\n   * @return cross-validation eval info\n   */\n  private static String aggCVResults(String[] results) {\n    Map<String, List<Float>> cvMap = new HashMap<String, List<Float>>();\n    String aggResult = results[0].split(\"\\t\")[0];\n    for (String result : results) {\n      String[] items = result.split(\"\\t\");\n      for (int i = 1; i < items.length; i++) {\n        String[] tup = items[i].split(\":\");\n        String key = tup[0];\n        Float value = Float.valueOf(tup[1]);\n        if (!cvMap.containsKey(key)) {\n          cvMap.put(key, new ArrayList<Float>());\n        }\n        cvMap.get(key).add(value);\n      }\n    }\n\n    for (String key : cvMap.keySet()) {\n      float value = 0f;\n      for (Float tvalue : cvMap.get(key)) {\n        value += tvalue;\n      }\n      value /= cvMap.get(key).size();\n      aggResult += String.format(\"\\tcv-%s:%f\", key, value);\n    }\n\n    return aggResult;\n  }\n\n  /**\n   * cross validation package for xgb\n   *\n   * @author hzx\n   */\n  private static class CVPack {\n    DMatrix dtrain;\n    DMatrix dtest;\n    DMatrix[] dmats;\n    String[] names;\n    Booster booster;\n\n    /**\n     * create an cross validation package\n     *\n     * @param dtrain train data\n     * @param dtest  test data\n     * @param params parameters\n     * @throws XGBoostError native error\n     */\n    public CVPack(DMatrix dtrain, DMatrix dtest, Map<String, Object> params)\n            throws XGBoostError {\n      dmats = new DMatrix[]{dtrain, dtest};\n      booster = new Booster(params, dmats);\n      names = new String[]{\"train\", \"test\"};\n      this.dtrain = dtrain;\n      this.dtest = dtest;\n    }\n\n    /**\n     * update one iteration\n     *\n     * @param iter iteration num\n     * @throws XGBoostError native error\n     */\n    public void update(int iter) throws XGBoostError {\n      booster.update(dtrain, iter);\n    }\n\n    /**\n     * update one iteration\n     *\n     * @param obj  customized objective\n     * @throws XGBoostError native error\n     */\n    public void update(IObjective obj) throws XGBoostError {\n      booster.update(dtrain, obj);\n    }\n\n    /**\n     * evaluation\n     *\n     * @param iter iteration num\n     * @return evaluation\n     * @throws XGBoostError native error\n     */\n    public String eval(int iter) throws XGBoostError {\n      return booster.evalSet(dmats, names, iter);\n    }\n\n    /**\n     * evaluation\n     *\n     * @param eval customized eval\n     * @return evaluation\n     * @throws XGBoostError native error\n     */\n    public String eval(IEvaluation eval) throws XGBoostError {\n      return booster.evalSet(dmats, names, eval);\n    }\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoostError.java",
    "content": "/*\n Copyright (c) 2014 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java;\n\n/**\n * custom error class for xgboost\n *\n * @author hzx\n */\npublic class XGBoostError extends Exception {\n  public XGBoostError(String message) {\n    super(message);\n  }\n\n  public XGBoostError(String message, Throwable cause) {\n    super(message, cause);\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoostJNI.java",
    "content": "/*\n Copyright (c) 2014-2026 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java;\n\nimport java.nio.ByteBuffer;\n\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\n\n/**\n * xgboost JNI functions\n * change 2015-7-6: *use a long[] (length=1) as container of handle to get the\n * output DMatrix or Booster\n *\n * @author hzx\n */\npublic class XGBoostJNI {\n  private static final Log logger = LogFactory.getLog(DMatrix.class);\n\n  static {\n    try {\n      NativeLibLoader.initXGBoost();\n    } catch (Exception ex) {\n      logger.error(\"Failed to load native library\", ex);\n      throw new RuntimeException(ex);\n    }\n  }\n\n  /**\n   * Check the return code of the JNI call.\n   *\n   * @throws XGBoostError if the call failed.\n   */\n  static void checkCall(int ret) throws XGBoostError {\n    if (ret != 0) {\n      throw new XGBoostError(XGBGetLastError());\n    }\n  }\n\n  public final static native String XGBGetLastError();\n\n  public final static native int XGDMatrixCreateFromFile(String fname, int silent, long[] out);\n\n  final static native int XGDMatrixCreateFromDataIter(java.util.Iterator<DataBatch> iter,\n      String cache_info, float missing, long[] out);\n\n  public final static native int XGDMatrixCreateFromCSR(long[] indptr, int[] indices,\n      float[] data, int shapeParam,\n      float missing, int nthread,\n      long[] out);\n\n  public final static native int XGDMatrixCreateFromCSC(long[] colptr, int[] indices,\n      float[] data, int shapeParam,\n      float missing, int nthread,\n      long[] out);\n\n  public final static native int XGDMatrixCreateFromMat(float[] data, int nrow, int ncol,\n      float missing, long[] out);\n\n  public final static native int XGDMatrixCreateFromMatRef(long dataRef, int nrow, int ncol,\n      float missing, long[] out);\n\n  public final static native int XGDMatrixSliceDMatrix(long handle, int[] idxset, long[] out);\n\n  public final static native int XGDMatrixFree(long handle);\n\n  public final static native int XGDMatrixSaveBinary(long handle, String fname, int silent);\n\n  public final static native int XGDMatrixSetFloatInfo(long handle, String field, float[] array);\n\n  public final static native int XGDMatrixSetUIntInfo(long handle, String field, int[] array);\n\n  public final static native int XGDMatrixGetFloatInfo(long handle, String field, float[][] info);\n\n  public final static native int XGDMatrixGetUIntInfo(long handle, String filed, int[][] info);\n\n  /**\n   * Set the feature information\n   * \n   * @param handle the DMatrix native address\n   * @param field  \"feature_names\" or \"feature_types\"\n   * @param values an array of string\n   * @return 0 when success, -1 when failure happens\n   */\n  public final static native int XGDMatrixSetStrFeatureInfo(long handle, String field,\n      String[] values);\n\n  public final static native int XGDMatrixGetStrFeatureInfo(long handle, String field,\n      long[] outLength, String[][] outValues);\n\n  public final static native int XGDMatrixNumRow(long handle, long[] row);\n\n  public final static native int XGDMatrixNumNonMissing(long handle, long[] nonMissings);\n\n  public final static native int XGBoosterCreate(long[] handles, long[] out);\n\n  public final static native int XGBoosterFree(long handle);\n\n  public final static native int XGBoosterSetParam(long handle, String name, String value);\n\n  public final static native int XGBoosterUpdateOneIter(long handle, int iter, long dtrain);\n\n  public final static native int XGBoosterTrainOneIter(long handle, long dtrain, int iter, float[] grad,\n      float[] hess);\n\n  public final static native int XGBoosterEvalOneIter(long handle, int iter, long[] dmats,\n      String[] evnames, String[] eval_info);\n\n  public final static native int XGBoosterPredict(long handle, long dmat, int option_mask,\n      int ntree_limit, float[][] predicts);\n\n  public final static native int XGBoosterPredictFromDense(long handle, float[] data,\n      long nrow, long ncol, float missing, int iteration_begin, int iteration_end, int predict_type, float[] margin,\n      float[][] predicts);\n\n  public final static native int XGBoosterLoadModel(long handle, String fname);\n\n  public final static native int XGBoosterSaveModel(long handle, String fname);\n\n  public final static native int XGBoosterLoadModelFromBuffer(long handle, byte[] bytes);\n\n  public final static native int XGBoosterSaveModelToBuffer(long handle, String format, byte[][] out_bytes);\n\n  public final static native int XGBoosterDumpModelEx(long handle, String fmap, int with_stats,\n      String format, String[][] out_strings);\n\n  public final static native int XGBoosterDumpModelExWithFeatures(\n      long handle, String[] feature_names, int with_stats, String format, String[][] out_strings);\n\n  public final static native int XGBoosterGetAttrNames(long handle, String[][] out_strings);\n\n  public final static native int XGBoosterGetAttr(long handle, String key, String[] out_string);\n\n  public final static native int XGBoosterSetAttr(long handle, String key, String value);\n\n  public final static native int XGBoosterGetNumFeature(long handle, long[] feature);\n\n  public final static native int XGBoosterGetNumBoostedRound(long handle, int[] rounds);\n\n  // communicator functions\n  public final static native int CommunicatorInit(String args);\n\n  public final static native int CommunicatorFinalize();\n\n  public final static native int CommunicatorPrint(String msg);\n\n  public final static native int CommunicatorGetRank(int[] out);\n\n  public final static native int CommunicatorGetWorldSize(int[] out);\n\n  // Tracker functions\n  public final static native int TrackerCreate(String host, int nWorkers, int port, int sortby, long timeout,\n      long[] out);\n\n  public final static native int TrackerRun(long handle);\n\n  public final static native int TrackerWaitFor(long handle, long timeout);\n\n  public final static native int TrackerWorkerArgs(long handle, long timeout, String[] out);\n\n  public final static native int TrackerFree(long handle);\n\n  // Perform Allreduce operation on data in sendrecvbuf.\n  final static native int CommunicatorAllreduce(ByteBuffer sendrecvbuf, int count,\n      int enum_dtype, int enum_op);\n\n  public final static native int XGDMatrixSetInfoFromInterface(\n      long handle, String field, String json);\n\n  public final static native int XGQuantileDMatrixCreateFromCallback(\n      java.util.Iterator<ColumnBatch> iter, long[] ref, String config, long[] out);\n\n  public final static native int XGExtMemQuantileDMatrixCreateFromCallback(\n      java.util.Iterator<ColumnBatch> iter, long[] ref, String config, long[] out);\n\n  public final static native int XGDMatrixCreateFromArrayInterfaceColumns(\n      String featureJson, float missing, int nthread, long[] out);\n\n  public final static native int XGBoosterSetStrFeatureInfo(long handle, String field, String[] features);\n\n  public final static native int XGBoosterGetStrFeatureInfo(long handle, String field, String[] out);\n\n  public final static native int XGDMatrixGetQuantileCut(long handle, long[][] outIndptr, float[][] outValues);\n\n  public final static native int XGBSetGlobalConfig(String config);\n\n  public final static native int XGBGetGlobalConfig(String[] out);\n\n  // CUDA device management functions\n  public final static native int CudaSetDevice(int deviceId);\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/util/BigDenseMatrix.java",
    "content": "/*\n Copyright (c) 2014 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java.util;\n\n/**\n * Off-heap implementation of a Dense Matrix, matrix size is only limited by the\n * amount of the available memory and the matrix dimension cannot exceed\n * Integer.MAX_VALUE (this is consistent with XGBoost API restrictions on maximum\n * length of a response).\n */\npublic final class BigDenseMatrix {\n\n  private static final int FLOAT_BYTE_SIZE = 4;\n  public static final long MAX_MATRIX_SIZE = Long.MAX_VALUE / FLOAT_BYTE_SIZE;\n\n  public final int nrow;\n  public final int ncol;\n  public final long address;\n\n  public static void setDirect(long valAddress, float val) {\n    UtilUnsafe.UNSAFE.putFloat(valAddress, val);\n  }\n\n  public static float getDirect(long valAddress) {\n    return UtilUnsafe.UNSAFE.getFloat(valAddress);\n  }\n\n  public BigDenseMatrix(int nrow, int ncol) {\n    final long size = (long) nrow * ncol;\n    if (size > MAX_MATRIX_SIZE) {\n      throw new IllegalArgumentException(\"Matrix too large; matrix size cannot exceed \" +\n          MAX_MATRIX_SIZE);\n    }\n    this.nrow = nrow;\n    this.ncol = ncol;\n    this.address = UtilUnsafe.UNSAFE.allocateMemory(size * FLOAT_BYTE_SIZE);\n  }\n\n  public final void set(long idx, float val) {\n    setDirect(address + idx * FLOAT_BYTE_SIZE, val);\n  }\n\n  public final void set(int i, int j, float val) {\n    set(index(i, j), val);\n  }\n\n  public final float get(long idx) {\n    return getDirect(address + idx * FLOAT_BYTE_SIZE);\n  }\n\n  public final float get(int i, int j) {\n    return get(index(i, j));\n  }\n\n  public final void dispose() {\n    UtilUnsafe.UNSAFE.freeMemory(address);\n  }\n\n  private long index(int i, int j) {\n    return (long) i * ncol + j;\n  }\n\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/util/UtilUnsafe.java",
    "content": "/*\n Copyright (c) 2014 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java.util;\n\nimport java.lang.reflect.Field;\n\nimport sun.misc.Unsafe;\n\n/**\n * Simple class to obtain access to the {@link Unsafe} object. Use responsibly :)\n */\npublic final class UtilUnsafe {\n\n  static Unsafe UNSAFE = getUnsafe();\n\n  private UtilUnsafe() {\n  } // dummy private constructor\n\n  private static Unsafe getUnsafe() {\n    // Not on bootclasspath\n    if (UtilUnsafe.class.getClassLoader() == null) {\n      return Unsafe.getUnsafe();\n    }\n    try {\n      final Field fld = Unsafe.class.getDeclaredField(\"theUnsafe\");\n      fld.setAccessible(true);\n      return (Unsafe) fld.get(UtilUnsafe.class);\n    } catch (Exception e) {\n      throw new RuntimeException(\"Could not obtain access to sun.misc.Unsafe\", e);\n    }\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/main/resources/xgboost4j-version.properties",
    "content": "version=${project.version}"
  },
  {
    "path": "jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/Booster.scala",
    "content": "/*\n Copyright (c) 2014-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala\n\nimport scala.collection.JavaConverters._\nimport scala.collection.mutable\n\nimport com.esotericsoftware.kryo.{Kryo, KryoSerializable}\nimport com.esotericsoftware.kryo.io.{Input, Output}\n\nimport ml.dmlc.xgboost4j.java.{Booster => JBooster}\nimport ml.dmlc.xgboost4j.java.XGBoostError\n\n/**\n  * Booster for xgboost, this is a model API that support interactive build of a XGBoost Model\n  *\n  * DEVELOPER WARNING: A Java Booster must not be shared by more than one Scala Booster\n  * @param booster the java booster object.\n  */\nclass Booster private[xgboost4j](private[xgboost4j] var booster: JBooster)\n  extends Serializable  with KryoSerializable {\n\n  /**\n   * Get attributes stored in the Booster as a Map.\n   *\n   * @return A map contain attribute pairs.\n   */\n  @throws(classOf[XGBoostError])\n  def getAttrs: Map[String, String] = {\n    booster.getAttrs.asScala.toMap\n  }\n\n  /**\n   * Get attribute from the Booster.\n   *\n   * @param key   attr name\n   * @return attr value\n   */\n  @throws(classOf[XGBoostError])\n  def getAttr(key: String): String = {\n    booster.getAttr(key)\n  }\n\n  /**\n   * Set attribute to the Booster.\n   *\n   * @param key   attr name\n   * @param value attr value\n   */\n  @throws(classOf[XGBoostError])\n  def setAttr(key: String, value: String): Unit = {\n    booster.setAttr(key, value)\n  }\n\n  /**\n   * set attributes\n   *\n   * @param params attributes key-value map\n   */\n  @throws(classOf[XGBoostError])\n  def setAttrs(params: Map[String, String]): Unit = {\n    booster.setAttrs(params.asJava)\n  }\n\n  /**\n    * Set parameter to the Booster.\n    *\n    * @param key   param name\n    * @param value param value\n    */\n  @throws(classOf[XGBoostError])\n  def setParam(key: String, value: AnyRef): Unit = {\n    booster.setParam(key, value)\n  }\n\n  /**\n   * set parameters\n   *\n   * @param params parameters key-value map\n   */\n  @throws(classOf[XGBoostError])\n  def setParams(params: Map[String, AnyRef]): Unit = {\n    booster.setParams(params.asJava)\n  }\n\n  /**\n   * Update (one iteration)\n   *\n   * @param dtrain training data\n   * @param iter   current iteration number\n   */\n  @throws(classOf[XGBoostError])\n  def update(dtrain: DMatrix, iter: Int): Unit = {\n    booster.update(dtrain.jDMatrix, iter)\n  }\n\n  @throws(classOf[XGBoostError])\n  @deprecated\n  def update(dtrain: DMatrix, obj: ObjectiveTrait): Unit = {\n    booster.update(dtrain.jDMatrix, obj)\n  }\n\n  /**\n   * update with customize obj func\n   *\n   * @param dtrain training data\n   * @param iter   The current training iteration\n   * @param obj    customized objective class\n   */\n  @throws(classOf[XGBoostError])\n  def update(dtrain: DMatrix, iter: Int, obj: ObjectiveTrait): Unit = {\n    booster.update(dtrain.jDMatrix, iter, obj)\n  }\n\n  @throws(classOf[XGBoostError])\n  @deprecated\n  def boost(dtrain: DMatrix, grad: Array[Float], hess: Array[Float]): Unit = {\n    booster.boost(dtrain.jDMatrix, grad, hess)\n  }\n\n  /**\n   * update with give grad and hess\n   *\n   * @param dtrain training data\n   * @param iter   The current training iteration\n   * @param grad   first order of gradient\n   * @param hess   seconde order of gradient\n   */\n  @throws(classOf[XGBoostError])\n  def boost(dtrain: DMatrix, iter: Int, grad: Array[Float], hess: Array[Float]): Unit = {\n    booster.boost(dtrain.jDMatrix, iter, grad, hess)\n  }\n\n  /**\n   * evaluate with given dmatrixs.\n   *\n   * @param evalMatrixs dmatrixs for evaluation\n   * @param evalNames   name for eval dmatrixs, used for check results\n   * @param iter        current eval iteration\n   * @return eval information\n   */\n  @throws(classOf[XGBoostError])\n  def evalSet(evalMatrixs: Array[DMatrix], evalNames: Array[String], iter: Int)\n    : String = {\n    booster.evalSet(evalMatrixs.map(_.jDMatrix), evalNames, iter)\n  }\n\n  /**\n   * evaluate with given customized Evaluation class\n   *\n   * @param evalMatrixs evaluation matrix\n   * @param evalNames   evaluation names\n   * @param eval        custom evaluator\n   * @return eval information\n   */\n  @throws(classOf[XGBoostError])\n  def evalSet(evalMatrixs: Array[DMatrix], evalNames: Array[String], eval: EvalTrait)\n    : String = {\n    booster.evalSet(evalMatrixs.map(_.jDMatrix), evalNames, eval)\n  }\n\n\n  /**\n   * Predict with data\n   *\n   * @param data         dmatrix storing the input\n   * @param outPutMargin Whether to output the raw untransformed margin value.\n   * @param treeLimit    Limit number of trees in the prediction; defaults to 0 (use all trees).\n   * @return predict result\n   */\n  @throws(classOf[XGBoostError])\n  def predict(data: DMatrix, outPutMargin: Boolean = false, treeLimit: Int = 0):\n      Array[Array[Float]] = {\n    booster.predict(data.jDMatrix, outPutMargin, treeLimit)\n  }\n\n  /**\n   * Predict the leaf indices\n   *\n   * @param data      dmatrix storing the input\n   * @param treeLimit Limit number of trees in the prediction; defaults to 0 (use all trees).\n   * @return predict result\n   * @throws XGBoostError native error\n   */\n  @throws(classOf[XGBoostError])\n  def predictLeaf(data: DMatrix, treeLimit: Int = 0): Array[Array[Float]] = {\n    booster.predictLeaf(data.jDMatrix, treeLimit)\n  }\n\n  /**\n    * Output feature contributions toward predictions of given data\n    *\n    * @param data      dmatrix storing the input\n    * @param treeLimit Limit number of trees in the prediction; defaults to 0 (use all trees).\n    * @return The feature contributions and bias.\n    * @throws XGBoostError native error\n    */\n  @throws(classOf[XGBoostError])\n  def predictContrib(data: DMatrix, treeLimit: Int = 0) : Array[Array[Float]] = {\n    booster.predictContrib(data.jDMatrix, treeLimit)\n  }\n\n  /**\n   * save model to modelPath\n   *\n   * @param modelPath model path\n   */\n  @throws(classOf[XGBoostError])\n  def saveModel(modelPath: String): Unit = {\n    booster.saveModel(modelPath)\n  }\n\n  /**\n    * save model to Output stream\n    *\n    * @param out Output stream\n    */\n  @throws(classOf[XGBoostError])\n  def saveModel(out: java.io.OutputStream): Unit = {\n    booster.saveModel(out)\n  }\n\n  /**\n   * save model to Output stream\n   * @param out output stream\n   * @param format the supported model format, (json, ubj, deprecated)\n   * @throws ml.dmlc.xgboost4j.java.XGBoostError\n   */\n  @throws(classOf[XGBoostError])\n  def saveModel(out: java.io.OutputStream, format: String): Unit = {\n    booster.saveModel(out, format)\n  }\n\n  /**\n   * Dump model as Array of string\n   *\n   * @param featureMap featureMap file\n   * @param withStats  bool\n   *                   Controls whether the split statistics are output.\n   */\n  @throws(classOf[XGBoostError])\n  def getModelDump(featureMap: String = null, withStats: Boolean = false, format: String = \"text\")\n    : Array[String] = {\n    booster.getModelDump(featureMap, withStats, format)\n  }\n\n  /**\n    * Dump model as Array of string with specified feature names.\n    *\n    * @param featureNames Names of features.\n    */\n  @throws(classOf[XGBoostError])\n  def getModelDump(featureNames: Array[String]): Array[String] = {\n    booster.getModelDump(featureNames, false, \"text\")\n  }\n\n  def getModelDump(featureNames: Array[String], withStats: Boolean, format: String)\n    : Array[String] = {\n    booster.getModelDump(featureNames, withStats, format)\n  }\n\n\n  /**\n   * Get importance of each feature based on weight only (number of splits)\n   *\n   * @return featureScoreMap  key: feature index, value: feature importance score\n   */\n  @throws(classOf[XGBoostError])\n  def getFeatureScore(featureMap: String = null): mutable.Map[String, Integer] = {\n    booster.getFeatureScore(featureMap).asScala\n  }\n\n  /**\n    * Get importance of each feature based on weight only\n    * (number of splits), with specified feature names.\n    *\n    * @return featureScoreMap  key: feature name, value: feature importance score\n    */\n  @throws(classOf[XGBoostError])\n  def getFeatureScore(featureNames: Array[String]): mutable.Map[String, Integer] = {\n    booster.getFeatureScore(featureNames).asScala\n  }\n\n  /**\n    * Get importance of each feature based on information gain or cover\n    * Supported: [\"gain, \"cover\", \"total_gain\", \"total_cover\"]\n    *\n    * @return featureScoreMap  key: feature index, value: feature importance score\n    */\n  @throws(classOf[XGBoostError])\n  def getScore(featureMap: String, importanceType: String): Map[String, Double] = {\n    Map(booster.getScore(featureMap, importanceType)\n        .asScala.mapValues(_.doubleValue).toSeq: _*)\n  }\n\n  /**\n    * Get importance of each feature based on information gain or cover\n    * , with specified feature names.\n    * Supported: [\"gain, \"cover\", \"total_gain\", \"total_cover\"]\n    *\n    * @return featureScoreMap  key: feature name, value: feature importance score\n    */\n  @throws(classOf[XGBoostError])\n  def getScore(featureNames: Array[String], importanceType: String): Map[String, Double] = {\n    Map(booster.getScore(featureNames, importanceType)\n        .asScala.mapValues(_.doubleValue).toSeq: _*)\n  }\n\n  /**\n    * Get the number of model features.\n    *\n    * @return number of features\n    */\n  @throws(classOf[XGBoostError])\n  def getNumFeature: Long = booster.getNumFeature\n\n  def getNumBoostedRound: Long = booster.getNumBoostedRound\n\n  /**\n    * Save model into a raw byte array.  Available options are \"json\", \"ubj\" and \"deprecated\".\n    */\n  @throws(classOf[XGBoostError])\n  def toByteArray(format: String): Array[Byte] = {\n    booster.toByteArray(format)\n  }\n\n  /**\n    * Save model into a raw byte array in the UBJSON (\"ubj\") format.\n    */\n  @throws(classOf[XGBoostError])\n  def toByteArray: Array[Byte] = {\n    booster.toByteArray()\n  }\n\n  /**\n    *  Dispose the booster when it is no longer needed\n    */\n  def dispose: Unit = {\n    booster.dispose()\n  }\n\n  override def finalize(): Unit = {\n    super.finalize()\n    dispose\n  }\n\n  override def write(kryo: Kryo, output: Output): Unit = {\n    kryo.writeObject(output, booster)\n  }\n\n  override def read(kryo: Kryo, input: Input): Unit = {\n    booster = kryo.readObject(input, classOf[JBooster])\n  }\n\n  // a flag to indicate if the device is set for the GPU transform\n  var deviceIsSet = false\n\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/DMatrix.scala",
    "content": "/*\n Copyright (c) 2014-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala\n\nimport scala.collection.JavaConverters._\n\nimport ml.dmlc.xgboost4j.LabeledPoint\nimport ml.dmlc.xgboost4j.java.{Column, ColumnBatch, DMatrix => JDMatrix, XGBoostError}\n\nclass DMatrix private[scala](private[scala] val jDMatrix: JDMatrix) {\n  /**\n   * init DMatrix from file (svmlight format)\n   *\n   * @param dataPath path of data file\n   * @throws XGBoostError native error\n   */\n  def this(dataPath: String) {\n    this(new JDMatrix(dataPath))\n  }\n\n  /**\n   * init DMatrix from Iterator of LabeledPoint\n   *\n   * @param dataIter  An iterator of LabeledPoint\n   * @param cacheInfo Cache path information, used for external memory setting, null by default.\n   * @param missing   Which value will be treated as the missing value\n   * @throws XGBoostError native error\n   */\n  def this(dataIter: Iterator[LabeledPoint],\n           cacheInfo: String = null,\n           missing: Float = Float.NaN) {\n    this(new JDMatrix(dataIter.asJava, cacheInfo, missing))\n  }\n\n  /**\n   * create DMatrix from sparse matrix\n   *\n   * @param headers index to headers (rowHeaders for CSR or colHeaders for CSC)\n   * @param indices Indices (colIndexs for CSR or rowIndexs for CSC)\n   * @param data    non zero values (sequence by row for CSR or by col for CSC)\n   * @param st      sparse matrix type (CSR or CSC)\n   */\n  @throws(classOf[XGBoostError])\n  @deprecated\n  def this(headers: Array[Long], indices: Array[Int], data: Array[Float], st: JDMatrix.SparseType) {\n    this(new JDMatrix(headers, indices, data, st, 0, Float.NaN, -1))\n  }\n\n  /**\n   * create DMatrix from sparse matrix\n   *\n   * @param headers    index to headers (rowHeaders for CSR or colHeaders for CSC)\n   * @param indices    Indices (colIndexs for CSR or rowIndexs for CSC)\n   * @param data       non zero values (sequence by row for CSR or by col for CSC)\n   * @param st         sparse matrix type (CSR or CSC)\n   * @param shapeParam when st is CSR, it specifies the column number, otherwise it is taken as\n   *                   row number\n   */\n  @throws(classOf[XGBoostError])\n  def this(headers: Array[Long], indices: Array[Int], data: Array[Float], st: JDMatrix.SparseType,\n           shapeParam: Int) {\n    this(new JDMatrix(headers, indices, data, st, shapeParam, Float.NaN, -1))\n  }\n\n  /**\n   * create DMatrix from sparse matrix\n   *\n   * @param headers    index to headers (rowHeaders for CSR or colHeaders for CSC)\n   * @param indices    Indices (colIndexs for CSR or rowIndexs for CSC)\n   * @param data       non zero values (sequence by row for CSR or by col for CSC)\n   * @param st         sparse matrix type (CSR or CSC)\n   * @param shapeParam when st is CSR, it specifies the column number, otherwise it is taken as\n   *                   row number\n   * @param missing    missing value\n   * @param nthread    The number of threads used for constructing DMatrix\n   */\n  @throws(classOf[XGBoostError])\n  def this(headers: Array[Long], indices: Array[Int], data: Array[Float], st: JDMatrix.SparseType,\n           shapeParam: Int, missing: Float, nthread: Int) {\n    this(new JDMatrix(headers, indices, data, st, shapeParam, missing, nthread))\n  }\n\n  /**\n   * Create the normal DMatrix from column array interface\n   *\n   * @param columnBatch the XGBoost ColumnBatch to provide the cuda array interface\n   *                    of feature columns\n   * @param missing     missing value\n   * @param nthread     The number of threads used for constructing DMatrix\n   */\n  @throws(classOf[XGBoostError])\n  def this(columnBatch: ColumnBatch, missing: Float, nthread: Int) {\n    this(new JDMatrix(columnBatch, missing, nthread))\n  }\n\n  /**\n   * create DMatrix from dense matrix\n   *\n   * @param data data values\n   * @param nrow number of rows\n   * @param ncol number of columns\n   */\n  @deprecated(\"Please specify the missing value explicitly\", \"XGBoost 1.5\")\n  @throws(classOf[XGBoostError])\n  def this(data: Array[Float], nrow: Int, ncol: Int) {\n    this(new JDMatrix(data, nrow, ncol))\n  }\n\n  /**\n   * create DMatrix from dense matrix\n   *\n   * @param data    data values\n   * @param nrow    number of rows\n   * @param ncol    number of columns\n   * @param missing the specified value to represent the missing value\n   */\n  @throws(classOf[XGBoostError])\n  def this(data: Array[Float], nrow: Int, ncol: Int, missing: Float) {\n    this(new JDMatrix(data, nrow, ncol, missing))\n  }\n\n  /**\n   * set label of dmatrix\n   *\n   * @param labels labels\n   */\n  @throws(classOf[XGBoostError])\n  def setLabel(labels: Array[Float]): Unit = {\n    jDMatrix.setLabel(labels)\n  }\n\n  /**\n   * set weight of each instance\n   *\n   * @param weights weights\n   */\n  @throws(classOf[XGBoostError])\n  def setWeight(weights: Array[Float]): Unit = {\n    jDMatrix.setWeight(weights)\n  }\n\n  /**\n   * if specified, xgboost will start from this init margin\n   * can be used to specify initial prediction to boost from\n   *\n   * @param baseMargin base margin\n   */\n  @throws(classOf[XGBoostError])\n  def setBaseMargin(baseMargin: Array[Float]): Unit = {\n    jDMatrix.setBaseMargin(baseMargin)\n  }\n\n  /**\n   * if specified, xgboost will start from this init margin\n   * can be used to specify initial prediction to boost from\n   *\n   * @param baseMargin base margin\n   */\n  @throws(classOf[XGBoostError])\n  def setBaseMargin(baseMargin: Array[Array[Float]]): Unit = {\n    jDMatrix.setBaseMargin(baseMargin)\n  }\n\n  /**\n   * Set group sizes of DMatrix (used for ranking)\n   *\n   * @param group group size as array\n   */\n  @throws(classOf[XGBoostError])\n  def setGroup(group: Array[Int]): Unit = {\n    jDMatrix.setGroup(group)\n  }\n\n  /**\n   * Set query ids (used for ranking)\n   *\n   * @param qid query ids\n   */\n  @throws(classOf[XGBoostError])\n  def setQueryId(qid: Array[Int]): Unit = {\n    jDMatrix.setQueryId(qid)\n  }\n\n  /**\n   * Set label of DMatrix from cuda array interface\n   */\n  @throws(classOf[XGBoostError])\n  def setLabel(column: Column): Unit = {\n    jDMatrix.setLabel(column)\n  }\n\n  /**\n   * set weight of dmatrix from column array interface\n   */\n  @throws(classOf[XGBoostError])\n  def setWeight(column: Column): Unit = {\n    jDMatrix.setWeight(column)\n  }\n\n  /**\n   * set base margin of dmatrix from column array interface\n   */\n  @throws(classOf[XGBoostError])\n  def setBaseMargin(column: Column): Unit = {\n    jDMatrix.setBaseMargin(column)\n  }\n\n  /**\n   * set query id of dmatrix from column array interface\n   */\n  @throws(classOf[XGBoostError])\n  def setQueryId(column: Column): Unit = {\n    jDMatrix.setQueryId(column)\n  }\n\n  /**\n   * set feature names\n   *\n   * @param values feature names\n   * @throws ml.dmlc.xgboost4j.java.XGBoostError\n   */\n  @throws(classOf[XGBoostError])\n  def setFeatureNames(values: Array[String]): Unit = {\n    jDMatrix.setFeatureNames(values)\n  }\n\n  /**\n   * set feature types\n   *\n   * @param values feature types\n   * @throws ml.dmlc.xgboost4j.java.XGBoostError\n   */\n  @throws(classOf[XGBoostError])\n  def setFeatureTypes(values: Array[String]): Unit = {\n    jDMatrix.setFeatureTypes(values)\n  }\n\n  /**\n   * Get group sizes of DMatrix (used for ranking)\n   */\n  @throws(classOf[XGBoostError])\n  def getGroup(): Array[Int] = {\n    jDMatrix.getGroup()\n  }\n\n  /**\n   * get label values\n   *\n   * @return label\n   */\n  @throws(classOf[XGBoostError])\n  def getLabel: Array[Float] = {\n    jDMatrix.getLabel\n  }\n\n  /**\n   * get weight of the DMatrix\n   *\n   * @return weights\n   */\n  @throws(classOf[XGBoostError])\n  def getWeight: Array[Float] = {\n    jDMatrix.getWeight\n  }\n\n  /**\n   * get base margin of the DMatrix\n   *\n   * @return base margin\n   */\n  @throws(classOf[XGBoostError])\n  def getBaseMargin: Array[Float] = {\n    jDMatrix.getBaseMargin\n  }\n\n  /**\n   * get feature names\n   *\n   * @throws ml.dmlc.xgboost4j.java.XGBoostError\n   * @return\n   */\n  @throws(classOf[XGBoostError])\n  def getFeatureNames: Array[String] = {\n    jDMatrix.getFeatureNames\n  }\n\n  /**\n   * get feature types\n   *\n   * @throws ml.dmlc.xgboost4j.java.XGBoostError\n   * @return\n   */\n  @throws(classOf[XGBoostError])\n  def getFeatureTypes: Array[String] = {\n    jDMatrix.getFeatureTypes\n  }\n\n  /**\n   * Slice the DMatrix and return a new DMatrix that only contains `rowIndex`.\n   *\n   * @param rowIndex row index\n   * @return sliced new DMatrix\n   */\n  @throws(classOf[XGBoostError])\n  def slice(rowIndex: Array[Int]): DMatrix = {\n    new DMatrix(jDMatrix.slice(rowIndex))\n  }\n\n  /**\n   * get the row number of DMatrix\n   *\n   * @return number of rows\n   */\n  @throws(classOf[XGBoostError])\n  def rowNum: Long = {\n    jDMatrix.rowNum\n  }\n\n  /**\n   * Get the number of non-missing values of DMatrix.\n   *\n   * @return The number of non-missing values\n   */\n  @throws(classOf[XGBoostError])\n  def nonMissingNum: Long = {\n    jDMatrix.nonMissingNum\n  }\n\n  /**\n   * save DMatrix to filePath\n   *\n   * @param filePath file path\n   */\n  def saveBinary(filePath: String): Unit = {\n    jDMatrix.saveBinary(filePath)\n  }\n\n  def getHandle: Long = {\n    jDMatrix.getHandle\n  }\n\n  def delete(): Unit = {\n    jDMatrix.dispose()\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/EvalTrait.scala",
    "content": "/*\n Copyright (c) 2014 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala\n\nimport ml.dmlc.xgboost4j.java\nimport ml.dmlc.xgboost4j.java.IEvaluation\n\ntrait EvalTrait extends IEvaluation {\n\n  /**\n   * get evaluate metric\n   *\n   * @return evalMetric\n   */\n  def getMetric: String\n\n  /**\n   * evaluate with predicts and data\n   *\n   * @param predicts predictions as array\n   * @param dmat     data matrix to evaluate\n   * @return result of the metric\n   */\n  def eval(predicts: Array[Array[Float]], dmat: DMatrix): Float\n\n  def eval(predicts: Array[Array[Float]], jdmat: java.DMatrix): Float = {\n    require(predicts.length == jdmat.getLabel.length, \"predicts size and label size must match \" +\n      s\" predicts size: ${predicts.length}, label size: ${jdmat.getLabel.length}\")\n    eval(predicts, new DMatrix(jdmat))\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/ExternalCheckpointManager.scala",
    "content": "/*\n Copyright (c) 2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala\n\nimport org.apache.hadoop.fs.FileSystem\n\nimport ml.dmlc.xgboost4j.java.{ExternalCheckpointManager => JavaECM}\n\nclass ExternalCheckpointManager(checkpointPath: String, fs: FileSystem)\n  extends JavaECM(checkpointPath, fs) {\n\n  def updateCheckpoint(booster: Booster): Unit = {\n    super.updateCheckpoint(booster.booster)\n  }\n\n  def loadCheckpointAsScalaBooster(): Booster = {\n    val loadedBooster = super.loadCheckpointAsBooster()\n    if (loadedBooster == null) {\n      null\n    } else {\n      new Booster(loadedBooster)\n    }\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/ObjectiveTrait.scala",
    "content": "/*\n Copyright (c) 2014 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala\n\nimport scala.collection.JavaConverters._\n\nimport ml.dmlc.xgboost4j.java.{DMatrix => JDMatrix}\nimport ml.dmlc.xgboost4j.java.IObjective\n\ntrait ObjectiveTrait extends IObjective {\n  /**\n   * user define objective function, return gradient and second order gradient\n   *\n   * @param predicts untransformed margin predicts\n   * @param dtrain   training data\n   * @return List with two float array, correspond to grad and hess\n   */\n  def getGradient(predicts: Array[Array[Float]], dtrain: DMatrix): List[Array[Float]]\n\n  def getGradient(predicts: Array[Array[Float]], dtrain: JDMatrix):\n    java.util.List[Array[Float]] = {\n    getGradient(predicts, new DMatrix(dtrain)).asJava\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala",
    "content": "/*\n Copyright (c) 2014-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala\n\nimport java.io.InputStream\n\nimport scala.jdk.CollectionConverters._\n\nimport org.apache.hadoop.conf.Configuration\nimport org.apache.hadoop.fs.Path\n\nimport ml.dmlc.xgboost4j.java.{XGBoost => JXGBoost, XGBoostError}\n\n/**\n  * XGBoost Scala Training function.\n  */\nobject XGBoost {\n\n  private[scala] def trainAndSaveCheckpoint(\n      dtrain: DMatrix,\n      params: Map[String, Any],\n      numRounds: Int,\n      watches: Map[String, DMatrix] = Map(),\n      metrics: Array[Array[Float]] = null,\n      obj: ObjectiveTrait = null,\n      eval: EvalTrait = null,\n      earlyStoppingRound: Int = 0,\n      prevBooster: Booster,\n      checkpointParams: Option[ExternalCheckpointParams]): Booster = {\n\n    // we have to filter null value for customized obj and eval\n    val jParams: java.util.Map[String, AnyRef] =\n      params.filter(_._2 != null).mapValues(_.toString.asInstanceOf[AnyRef]).toMap.asJava\n\n    val jWatches = watches.mapValues(_.jDMatrix).toMap.asJava\n    val jBooster = if (prevBooster == null) {\n      null\n    } else {\n      prevBooster.booster\n    }\n\n    val xgboostInJava = checkpointParams.\n      map(cp => {\n          JXGBoost.trainAndSaveCheckpoint(\n            dtrain.jDMatrix,\n            jParams,\n            numRounds, jWatches, metrics, obj, eval, earlyStoppingRound, jBooster,\n            cp.checkpointInterval,\n            cp.checkpointPath,\n            new Path(cp.checkpointPath).getFileSystem(new Configuration()))\n        }).\n      getOrElse(\n        JXGBoost.train(\n          dtrain.jDMatrix,\n          jParams,\n          numRounds, jWatches, metrics, obj, eval, earlyStoppingRound, jBooster)\n      )\n    if (prevBooster == null) {\n      new Booster(xgboostInJava)\n    } else {\n      // Avoid creating a new SBooster with the same JBooster\n      prevBooster\n    }\n  }\n\n  /**\n    * Train a booster given parameters.\n    *\n    * @param dtrain  Data to be trained.\n    * @param params  Parameters.\n    * @param round   Number of boosting iterations.\n    * @param watches a group of items to be evaluated during training, this allows user to watch\n    *                performance on the validation set.\n    * @param metrics array containing the evaluation metrics for each matrix in watches for each\n    *                iteration\n    * @param earlyStoppingRound if non-zero, training would be stopped\n    *                           after a specified number of consecutive\n    *                           increases in any evaluation metric.\n    * @param obj     customized objective\n    * @param eval    customized evaluation\n    * @param booster train from scratch if set to null; train from an existing booster if not null.\n    * @return The trained booster.\n    */\n  @throws(classOf[XGBoostError])\n  def train(\n      dtrain: DMatrix,\n      params: Map[String, Any],\n      round: Int,\n      watches: Map[String, DMatrix] = Map(),\n      metrics: Array[Array[Float]] = null,\n      obj: ObjectiveTrait = null,\n      eval: EvalTrait = null,\n      earlyStoppingRound: Int = 0,\n      booster: Booster = null): Booster = {\n    trainAndSaveCheckpoint(dtrain, params, round, watches, metrics, obj, eval, earlyStoppingRound,\n      booster, None)\n  }\n\n  /**\n    * Cross-validation with given parameters.\n    *\n    * @param data    Data to be trained.\n    * @param params  Booster params.\n    * @param round   Number of boosting iterations.\n    * @param nfold   Number of folds in CV.\n    * @param metrics Evaluation metrics to be watched in CV.\n    * @param obj     customized objective\n    * @param eval    customized evaluation\n    * @return evaluation history\n    */\n  @throws(classOf[XGBoostError])\n  def crossValidation(\n      data: DMatrix,\n      params: Map[String, Any],\n      round: Int,\n      nfold: Int = 5,\n      metrics: Array[String] = null,\n      obj: ObjectiveTrait = null,\n      eval: EvalTrait = null): Array[String] = {\n    JXGBoost.crossValidation(\n      data.jDMatrix, params.map{ case (key: String, value) => (key, value.toString)}.\n        toMap[String, AnyRef].asJava,\n      round, nfold, metrics, obj, eval)\n  }\n\n  /**\n    * load model from modelPath\n    *\n    * @param modelPath booster modelPath\n    */\n  @throws(classOf[XGBoostError])\n  def loadModel(modelPath: String): Booster = {\n    val xgboostInJava = JXGBoost.loadModel(modelPath)\n    new Booster(xgboostInJava)\n  }\n\n  /**\n    * Load a new Booster model from a file opened as input stream.\n    * The assumption is the input stream only contains one XGBoost Model.\n    * This can be used to load existing booster models saved by other XGBoost bindings.\n    *\n    * @param in The input stream of the file.\n    * @return The create booster\n    */\n  @throws(classOf[XGBoostError])\n  def loadModel(in: InputStream): Booster = {\n    val xgboostInJava = JXGBoost.loadModel(in)\n    new Booster(xgboostInJava)\n  }\n}\n\nprivate[scala] case class ExternalCheckpointParams(\n    checkpointInterval: Int,\n    checkpointPath: String,\n    skipCleanCheckpoint: Boolean)\n\nprivate[scala] object ExternalCheckpointParams {\n\n  def extractParams(params: Map[String, Any]): Option[ExternalCheckpointParams] = {\n    val checkpointPath: String = params.get(\"checkpoint_path\") match {\n      case None | Some(null) | Some(\"\") => null\n      case Some(path: String) => path\n      case _ => throw new IllegalArgumentException(\"parameter \\\"checkpoint_path\\\" must be\" +\n        s\" an instance of String, but current value is ${params(\"checkpoint_path\")}\")\n    }\n\n    val checkpointInterval: Int = params.get(\"checkpoint_interval\") match {\n      case None => 0\n      case Some(freq: Int) => freq\n      case _ => throw new IllegalArgumentException(\"parameter \\\"checkpoint_interval\\\" must be\" +\n        \" an instance of Int.\")\n    }\n\n    val skipCleanCheckpointFile: Boolean = params.get(\"skip_clean_checkpoint\") match {\n      case None => false\n      case Some(skipCleanCheckpoint: Boolean) => skipCleanCheckpoint\n      case _ => throw new IllegalArgumentException(\"parameter \\\"skip_clean_checkpoint\\\" must be\" +\n        \" an instance of Boolean\")\n    }\n    if (checkpointPath == null || checkpointInterval == 0) {\n      None\n    } else {\n      Some(ExternalCheckpointParams(checkpointInterval, checkpointPath, skipCleanCheckpointFile))\n    }\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/native/jvm_utils.h",
    "content": "/**\n *  Copyright 2014-2025, XGBoost Contributors\n */\n#ifndef JVM_UTILS_H_\n#define JVM_UTILS_H_\n\n#include <jni.h>\n\n#include \"xgboost/logging.h\"  // for Check\n\n#define JVM_CHECK_CALL(__expr) \\\n  {                            \\\n    int __errcode = (__expr);  \\\n    if (__errcode != 0) {      \\\n      return __errcode;        \\\n    }                          \\\n  }\n\nJavaVM *&GlobalJvm();\nvoid setHandle(JNIEnv *jenv, jlongArray jhandle, void *handle);\n\ntemplate <typename T>\nT CheckJvmCall(T const &v, JNIEnv *jenv) {\n  if (!v) {\n    CHECK(jenv->ExceptionOccurred());\n    jenv->ExceptionDescribe();\n  }\n  return v;\n}\n\n#endif  // JVM_UTILS_H_\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/native/xgboost4j-gpu.cpp",
    "content": "/**\n * Copyright 2021-2024, XGBoost Contributors\n */\n#ifndef XGBOOST_USE_CUDA\n\n#include <jni.h>\n\n#include \"../../../../src/c_api/c_api_error.h\"\n#include \"../../../../src/common/common.h\"\n\nnamespace xgboost::jni {\nint QdmFromCallback(JNIEnv *, jobject, jlongArray, char const *, bool, jlongArray) {\n  API_BEGIN();\n  common::AssertGPUSupport();\n  API_END();\n}\n}  // namespace xgboost::jni\n#endif  // XGBOOST_USE_CUDA\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/native/xgboost4j-gpu.cu",
    "content": "/**\n * Copyright 2021-2025, XGBoost Contributors\n */\n#include <jni.h>\n#include <xgboost/c_api.h>\n\n#include \"../../../../src/common/common.h\"\n#include \"../../../../src/common/cuda_pinned_allocator.h\"\n#include \"../../../../src/common/device_vector.cuh\"  // for device_vector\n#include \"../../../../src/data/array_interface.h\"\n#include \"jvm_utils.h\"  // for CheckJvmCall\n\nnamespace xgboost::jni {\ntemplate <typename T, typename Alloc>\nT const *RawPtr(std::vector<T, Alloc> const &data) {\n  return data.data();\n}\n\ntemplate <typename T, typename Alloc>\nT *RawPtr(std::vector<T, Alloc> &data) {\n  return data.data();\n}\n\ntemplate <typename T>\nT const *RawPtr(dh::device_vector<T> const &data) {\n  return data.data().get();\n}\n\ntemplate <typename T>\nT *RawPtr(dh::device_vector<T> &data) {\n  return data.data().get();\n}\n\ntemplate <typename VCont>\nvoid CopyColumnMask(xgboost::ArrayInterface<1> const &interface, std::vector<Json> const &columns,\n                    cudaMemcpyKind kind, size_t c, VCont *p_mask, Json *p_out,\n                    cudaStream_t stream) {\n  auto &mask = *p_mask;\n  auto &out = *p_out;\n  auto size = sizeof(typename VCont::value_type) * interface.n;\n  mask.resize(size);\n  CHECK(RawPtr(mask));\n  CHECK(size);\n  CHECK(interface.valid.Data());\n  dh::safe_cuda(cudaMemcpyAsync(RawPtr(mask), interface.valid.Data(), size, kind, stream));\n  auto const &mask_column = columns[c][\"mask\"];\n  out[\"mask\"] = Object();\n  std::vector<Json> mask_data{Json{reinterpret_cast<Integer::Int>(RawPtr(mask))},\n                              Json{get<Boolean const>(mask_column[\"data\"][1])}};\n  out[\"mask\"][\"data\"] = Array(std::move(mask_data));\n  if (get<Array const>(mask_column[\"shape\"]).size() == 2) {\n    std::vector<Json> mask_shape{Json{get<Integer const>(mask_column[\"shape\"][0])},\n                                 Json{get<Integer const>(mask_column[\"shape\"][1])}};\n    out[\"mask\"][\"shape\"] = Array(std::move(mask_shape));\n  } else if (get<Array const>(mask_column[\"shape\"]).size() == 1) {\n    std::vector<Json> mask_shape{Json{get<Integer const>(mask_column[\"shape\"][0])}};\n    out[\"mask\"][\"shape\"] = Array(std::move(mask_shape));\n  } else {\n    LOG(FATAL) << \"Invalid shape of mask\";\n  }\n  out[\"mask\"][\"typestr\"] = String(\"<t1\");\n  out[\"mask\"][\"version\"] = Integer{3};\n}\n\ntemplate <typename DCont, typename VCont>\nvoid CopyInterface(std::vector<xgboost::ArrayInterface<1>> &interface_arr,\n                   std::vector<Json> const &columns, cudaMemcpyKind kind,\n                   std::vector<DCont> *p_data, std::vector<VCont> *p_mask,\n                   std::vector<xgboost::Json> *p_out, cudaStream_t stream) {\n  p_data->resize(interface_arr.size());\n  p_mask->resize(interface_arr.size());\n  p_out->resize(interface_arr.size());\n  for (size_t c = 0; c < interface_arr.size(); ++c) {\n    auto &interface = interface_arr.at(c);\n    size_t element_size = interface.ElementSize();\n    size_t size = element_size * interface.n;\n\n    auto &data = (*p_data)[c];\n    auto &mask = (*p_mask)[c];\n    data.resize(size);\n    dh::safe_cuda(cudaMemcpyAsync(RawPtr(data), interface.data, size, kind, stream));\n\n    auto &out = (*p_out)[c];\n    out = Object();\n    std::vector<Json> j_data{Json{Integer(reinterpret_cast<Integer::Int>(RawPtr(data)))},\n                             Json{Boolean{false}}};\n\n    out[\"data\"] = Array(std::move(j_data));\n    out[\"shape\"] = Array(std::vector<Json>{Json(Integer(interface.Shape<0>()))});\n\n    if (interface.valid.Data()) {\n      CopyColumnMask(interface, columns, kind, c, &mask, &out, stream);\n    }\n    out[\"typestr\"] = String(\"<f4\");\n    out[\"version\"] = Integer(3);\n  }\n}\n\ntemplate <typename T>\nvoid CopyMetaInfo(Json *p_interface, dh::device_vector<T> *out, cudaStream_t stream) {\n  auto &j_interface = *p_interface;\n  CHECK_EQ(get<Array const>(j_interface).size(), 1);\n  auto object = get<Object>(get<Array>(j_interface)[0]);\n  ArrayInterface<1> interface(object);\n  out->resize(interface.Shape<0>());\n  size_t element_size = interface.ElementSize();\n  size_t size = element_size * interface.n;\n  dh::safe_cuda(\n      cudaMemcpyAsync(RawPtr(*out), interface.data, size, cudaMemcpyDeviceToDevice, stream));\n  j_interface[0][\"data\"][0] = reinterpret_cast<Integer::Int>(RawPtr(*out));\n}\n\ntemplate <typename DCont, typename VCont>\nstruct DataFrame {\n  std::vector<DCont> data;\n  std::vector<VCont> valid;\n  std::vector<Json> interfaces;\n};\n\nnamespace {\n// constant names\nstruct Symbols {\n  static constexpr StringView kLabel{\"label\"};\n  static constexpr StringView kWeight{\"weight\"};\n  static constexpr StringView kBaseMargin{\"baseMargin\"};\n  static constexpr StringView kQid{\"qid\"};\n};\n\nclass JvmIter {\n  JNIEnv *jenv_;\n  jobject jiter_;\n  int jni_status_;\n  jobject last_batch_{nullptr};\n\n public:\n  explicit JvmIter(jobject jiter)\n      : jiter_{jiter},\n        jni_status_{GlobalJvm()->GetEnv(reinterpret_cast<void **>(&jenv_), JNI_VERSION_1_6)} {}\n\n  void CloseJvmBatch() {\n    if (last_batch_) {\n      jclass batch_class = CheckJvmCall(jenv_->GetObjectClass(last_batch_), jenv_);\n      jmethodID closeMethod = CheckJvmCall(jenv_->GetMethodID(batch_class, \"close\", \"()V\"), jenv_);\n      jenv_->CallVoidMethod(last_batch_, closeMethod);\n      last_batch_ = nullptr;\n    }\n  }\n\n  auto Status() const { return jni_status_; }\n\n  template <typename Fn>\n  bool PullIterFromJVM(Fn &&fn) {\n    this->CloseJvmBatch();\n    jclass iterClass = jenv_->FindClass(\"java/util/Iterator\");\n\n    jmethodID has_next = CheckJvmCall(jenv_->GetMethodID(iterClass, \"hasNext\", \"()Z\"), jenv_);\n    jmethodID next =\n        CheckJvmCall(jenv_->GetMethodID(iterClass, \"next\", \"()Ljava/lang/Object;\"), jenv_);\n\n    if (jenv_->CallBooleanMethod(jiter_, has_next)) {\n      // batch should be ColumnBatch from jvm\n      jobject batch = CheckJvmCall(jenv_->CallObjectMethod(jiter_, next), jenv_);\n      jclass batch_class = CheckJvmCall(jenv_->GetObjectClass(batch), jenv_);\n      jmethodID toJson =\n          CheckJvmCall(jenv_->GetMethodID(batch_class, \"toJson\", \"()Ljava/lang/String;\"), jenv_);\n\n      // Json array interface\n      auto jaif = static_cast<jstring>(jenv_->CallObjectMethod(batch, toJson));\n      CheckJvmCall(jaif, jenv_);\n      char const *cjaif = CheckJvmCall(jenv_->GetStringUTFChars(jaif, nullptr), jenv_);\n\n      fn(cjaif);\n\n      jenv_->ReleaseStringUTFChars(jaif, cjaif);\n\n      last_batch_ = batch;\n      return true;\n    } else {\n      return false;\n    }\n  }\n};\n\nclass DMatrixProxy {\n  DMatrixHandle proxy_;\n\n public:\n  DMatrixProxy() { CHECK_EQ(XGProxyDMatrixCreate(&proxy_), 0); }\n  ~DMatrixProxy() { CHECK_EQ(XGDMatrixFree(proxy_), 0); }\n  auto GetDMatrixHandle() const { return proxy_; }\n\n  void SetInfo(StringView name, Json jaif) {\n    std::string str;\n    Json::Dump(jaif, &str);\n    CHECK_EQ(XGDMatrixSetInfoFromInterface(proxy_, name.c_str(), str.c_str()), 0);\n  }\n  void SetData(Json jaif) {\n    std::string str;\n    Json::Dump(jaif, &str);\n    CHECK_EQ(XGProxyDMatrixSetDataCudaColumnar(proxy_, str.c_str()), 0);\n  }\n};\n\ntemplate <typename Map>\nJson GetLabel(Map const &jmap) {\n  auto it = jmap.find(Symbols::kLabel);\n  StringView msg{\"Must have a label field.\"};\n  CHECK(it != jmap.cend()) << msg;\n  Json label = it->second;\n  CHECK(!IsA<Null>(label)) << msg;\n  return label;\n}\n\nclass HostMemProxy {\n  DMatrixProxy proxy_;\n  JvmIter jiter_;\n\n  template <typename T>\n  using Alloc = xgboost::common::cuda_impl::PinnedAllocator<T>;\n  template <typename U>\n  using HostVector = std::vector<U, Alloc<U>>;\n\n  // This vector is created for staging device data on host to save GPU memory.\n  // When space is not of concern, we can stage them on device memory directly.\n  std::vector<std::unique_ptr<DataFrame<HostVector<char>, HostVector<std::uint8_t>>>> host_columns_;\n\n  // Staging area for metainfo.\n  // TODO(Bobby): label_upper_bound, label_lower_bound.\n  std::vector<std::unique_ptr<dh::device_vector<float>>> labels_;\n  std::vector<std::unique_ptr<dh::device_vector<float>>> weights_;\n  std::vector<std::unique_ptr<dh::device_vector<float>>> base_margins_;\n  std::vector<std::unique_ptr<dh::device_vector<int>>> qids_;\n  std::vector<Json> label_interfaces_;\n  std::vector<Json> weight_interfaces_;\n  std::vector<Json> margin_interfaces_;\n  std::vector<Json> qid_interfaces_;\n\n  std::size_t it_{0};\n  std::size_t n_batches_{0};\n  bool initialized_{false};\n\n  // Temp buffer on device, each `dh::device_vector` represents a column\n  // from cudf.\n  std::vector<dh::device_vector<char>> staging_data_;\n  std::vector<dh::device_vector<std::uint8_t>> staging_mask_;\n\n  cudaStream_t copy_stream_;\n\n public:\n  explicit HostMemProxy(jobject jiter) : jiter_{jiter} {\n    this->Reset();\n    dh::safe_cuda(cudaStreamCreateWithFlags(&copy_stream_, cudaStreamNonBlocking));\n  }\n  ~HostMemProxy() { dh::safe_cuda(cudaStreamDestroy(copy_stream_)); }\n\n  DMatrixHandle GetDMatrixHandle() const { return proxy_.GetDMatrixHandle(); }\n\n  // Helper function for staging meta info.\n  void StageMetaInfo(Json jaif) {\n    CHECK(!IsA<Null>(jaif));\n    auto json_map = get<Object const>(jaif);\n    Json label = GetLabel(json_map);\n\n    labels_.emplace_back(std::make_unique<dh::device_vector<float>>());\n    CopyMetaInfo(&label, labels_.back().get(), copy_stream_);\n    label_interfaces_.emplace_back(label);\n    proxy_.SetInfo(Symbols::kLabel, label);\n\n    auto it = json_map.find(Symbols::kWeight);\n    if (it != json_map.cend()) {\n      Json weight = it->second;\n      CHECK(!IsA<Null>(weight));\n      weights_.emplace_back(new dh::device_vector<float>);\n      CopyMetaInfo(&weight, weights_.back().get(), copy_stream_);\n      weight_interfaces_.emplace_back(weight);\n\n      proxy_.SetInfo(Symbols::kWeight, weight);\n    }\n\n    it = json_map.find(Symbols::kBaseMargin);\n    if (it != json_map.cend()) {\n      Json base_margin = it->second;\n      base_margins_.emplace_back(new dh::device_vector<float>);\n      CopyMetaInfo(&base_margin, base_margins_.back().get(), copy_stream_);\n      margin_interfaces_.emplace_back(base_margin);\n\n      proxy_.SetInfo(\"base_margin\", base_margin);\n    }\n\n    it = json_map.find(Symbols::kQid);\n    if (it != json_map.cend()) {\n      Json qid = it->second;\n      qids_.emplace_back(new dh::device_vector<int>);\n      CopyMetaInfo(&qid, qids_.back().get(), copy_stream_);\n      qid_interfaces_.emplace_back(qid);\n\n      proxy_.SetInfo(Symbols::kQid, qid);\n    }\n  }\n\n  void Reset() {\n    it_ = 0;\n    this->jiter_.CloseJvmBatch();\n  }\n\n  void StageData(std::string interface_str) {\n    ++n_batches_;\n    // DataFrame\n    using T = decltype(host_columns_)::value_type::element_type;\n    host_columns_.emplace_back(std::make_unique<T>());\n\n    // Stage the meta info, Json array interface.\n    auto jaif = Json::Load({interface_str.c_str(), interface_str.size()});\n    CHECK(!IsA<Null>(jaif));\n\n    StageMetaInfo(jaif);\n\n    Json features = jaif[\"features\"];\n    auto json_columns = get<Array const>(features);\n    std::vector<ArrayInterface<1>> interfaces;\n\n    // Stage the data\n    for (auto &json_col : json_columns) {\n      auto column = ArrayInterface<1>(get<Object const>(json_col));\n      interfaces.emplace_back(column);\n    }\n    Json::Dump(features, &interface_str);\n    CopyInterface(interfaces, json_columns, cudaMemcpyDeviceToHost, &host_columns_.back()->data,\n                  &host_columns_.back()->valid, &host_columns_.back()->interfaces, copy_stream_);\n\n    proxy_.SetData(features);\n    it_++;\n  }\n\n  int NextFirstLoop() {\n    try {\n      dh::safe_cuda(cudaStreamSynchronize(copy_stream_));\n      if (this->jiter_.PullIterFromJVM([this](char const *cjaif) { this->StageData(cjaif); })) {\n        return 1;\n      } else {\n        initialized_ = true;\n        return 0;\n      }\n    } catch (dmlc::Error const &e) {\n      if (jiter_.Status() == JNI_EDETACHED) {\n        GlobalJvm()->DetachCurrentThread();\n      }\n      LOG(FATAL) << e.what();\n    }\n    LOG(FATAL) << \"Unreachable\";\n    return 1;\n  }\n\n  int NextSecondLoop() {\n    std::string str;\n    // Meta\n    auto const &label = this->label_interfaces_.at(it_);\n    proxy_.SetInfo(Symbols::kLabel, label);\n\n    if (n_batches_ == this->weight_interfaces_.size()) {\n      auto const &weight = this->weight_interfaces_.at(it_);\n      proxy_.SetInfo(Symbols::kWeight, weight);\n    }\n\n    if (n_batches_ == this->margin_interfaces_.size()) {\n      auto const &base_margin = this->margin_interfaces_.at(it_);\n      proxy_.SetInfo(\"base_margin\", base_margin);\n    }\n\n    if (n_batches_ == this->qid_interfaces_.size()) {\n      auto const &qid = this->qid_interfaces_.at(it_);\n      proxy_.SetInfo(Symbols::kQid, qid);\n    }\n\n    // Data\n    auto const &json_interface = host_columns_.at(it_)->interfaces;\n\n    std::vector<ArrayInterface<1>> in;\n    for (auto interface : json_interface) {\n      auto column = ArrayInterface<1>(get<Object const>(interface));\n      in.emplace_back(column);\n    }\n    std::vector<Json> out;\n    CopyInterface(in, json_interface, cudaMemcpyHostToDevice, &staging_data_, &staging_mask_, &out,\n                  nullptr);\n\n    Json temp{Array(std::move(out))};\n    proxy_.SetData(temp);\n    it_++;\n    return 1;\n  }\n\n  int Next() {\n    if (!initialized_) {\n      return NextFirstLoop();\n    } else {\n      if (it_ == n_batches_) {\n        return 0;\n      }\n      return NextSecondLoop();\n    }\n  }\n};\n\n// An iterator proxy for external memory.\nclass ExtMemProxy {\n  JvmIter jiter_;\n  DMatrixProxy proxy_;\n\n public:\n  explicit ExtMemProxy(jobject jiter) : jiter_(jiter) {}\n\n  ~ExtMemProxy() = default;\n\n  DMatrixHandle GetDMatrixHandle() const { return proxy_.GetDMatrixHandle(); }\n\n  void SetArrayInterface(StringView aif) {\n    auto jaif = Json::Load(aif);\n    CHECK(!IsA<Null>(jaif));\n\n    Json features = jaif[\"features\"];\n    proxy_.SetData(features);\n\n    // set the meta info.\n    auto json_map = get<Object const>(jaif);\n    Json label = GetLabel(json_map);\n    proxy_.SetInfo(Symbols::kLabel, label);\n\n    auto it = json_map.find(Symbols::kWeight);\n    if (it != json_map.cend()) {\n      Json weight = it->second;\n      CHECK(!IsA<Null>(weight));\n      proxy_.SetInfo(Symbols::kWeight, weight);\n    }\n\n    it = json_map.find(Symbols::kBaseMargin);\n    if (it != json_map.cend()) {\n      Json basemargin = it->second;\n      proxy_.SetInfo(\"base_margin\", basemargin);\n    }\n\n    it = json_map.find(Symbols::kQid);\n    if (it != json_map.cend()) {\n      Json qid = it->second;\n      proxy_.SetInfo(Symbols::kQid, qid);\n    }\n  }\n\n  int Next() {\n    try {\n      if (this->jiter_.PullIterFromJVM(\n              [this](char const *cjaif) { this->SetArrayInterface(cjaif); })) {\n        return 1;\n      } else {\n        return 0;\n      }\n    } catch (dmlc::Error const &e) {\n      if (jiter_.Status() == JNI_EDETACHED) {\n        GlobalJvm()->DetachCurrentThread();\n      }\n      LOG(FATAL) << e.what();\n    }\n    return 0;\n  }\n\n  void Reset() { this->jiter_.CloseJvmBatch(); }\n};\n\ntemplate <typename T>\nusing Deleter = std::function<void(T *)>;\n}  // anonymous namespace\n\n/**\n * @brief Create QuantileDMatrix for both in-core version and the external memory version.\n */\nint QdmFromCallback(JNIEnv *jenv, jobject jdata_iter, jlongArray jref, char const *config,\n                    bool is_extmem, jlongArray jout) {\n  DMatrixHandle result;\n  DMatrixHandle ref{nullptr};\n\n  if (jref != nullptr) {\n    std::unique_ptr<jlong, Deleter<jlong>> refptr{jenv->GetLongArrayElements(jref, nullptr),\n                                                  [&](jlong *ptr) {\n                                                    jenv->ReleaseLongArrayElements(jref, ptr, 0);\n                                                    jenv->DeleteLocalRef(jref);\n                                                  }};\n    ref = reinterpret_cast<DMatrixHandle>(refptr.get()[0]);\n  }\n\n  int ret = 0;\n  if (is_extmem) {\n    xgboost::jni::ExtMemProxy proxy{jdata_iter};\n    ret = XGExtMemQuantileDMatrixCreateFromCallback(\n        &proxy, proxy.GetDMatrixHandle(), ref,\n        [](DataIterHandle self) { static_cast<xgboost::jni::ExtMemProxy *>(self)->Reset(); },\n        [](DataIterHandle self) { return static_cast<xgboost::jni::ExtMemProxy *>(self)->Next(); },\n        config, &result);\n  } else {\n    xgboost::jni::HostMemProxy proxy{jdata_iter};\n    ret = XGQuantileDMatrixCreateFromCallback(\n        &proxy, proxy.GetDMatrixHandle(), ref,\n        [](DataIterHandle self) { static_cast<xgboost::jni::HostMemProxy *>(self)->Reset(); },\n        [](DataIterHandle self) { return static_cast<xgboost::jni::HostMemProxy *>(self)->Next(); },\n        config, &result);\n  }\n\n  JVM_CHECK_CALL(ret);\n  setHandle(jenv, jout, result);\n  return ret;\n}\n}  // namespace xgboost::jni\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/native/xgboost4j.cpp",
    "content": "/**\n *  Copyright 2014-2024, XGBoost Contributors\n *\n *  Licensed under the Apache License, Version 2.0 (the \"License\");\n *  you may not use this file except in compliance with the License.\n *  You may obtain a copy of the License at\n *\n *  http://www.apache.org/licenses/LICENSE-2.0\n *\n *  Unless required by applicable law or agreed to in writing, software\n *  distributed under the License is distributed on an \"AS IS\" BASIS,\n *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n *  See the License for the specific language governing permissions and\n *  limitations under the License.\n*/\n\n#include \"./xgboost4j.h\"\n\n#include <xgboost/base.h>\n#include <xgboost/c_api.h>\n#include <xgboost/json.h>\n#include <xgboost/logging.h>\n#include <xgboost/string_view.h>  // for StringView\n\n#include <algorithm>  // for copy_n\n#include <cstddef>\n#include <cstdint>\n#include <cstring>\n#include <memory>  // for unique_ptr\n#include <string>\n#include <type_traits>\n#include <vector>\n\n#include \"jvm_utils.h\"  // for JVM_CHECK_CALL\n#include \"../../../../src/c_api/c_api_error.h\"\n#include \"../../../../src/c_api/c_api_utils.h\"\n#include \"../../../../src/data/array_interface.h\"  // for ArrayInterface\n#include \"../../../../src/common/cuda_rt_utils.h\"  // for xgboost::curt::SetDevice\n\n// helper functions\n// set handle\nvoid setHandle(JNIEnv *jenv, jlongArray jhandle, void *handle) {\n#ifdef __APPLE__\n  jlong out = (long)handle;\n#else\n  int64_t out = (int64_t)handle;\n#endif\n  jenv->SetLongArrayRegion(jhandle, 0, 1, &out);\n}\n\nJavaVM*& GlobalJvm() {\n  static JavaVM* vm;\n  return vm;\n}\n\n// Global device id for CUDA, -1 means not set yet\nstd::int32_t& GlobalDeviceId() {\n  static std::int32_t device_id = -1;\n  return device_id;\n}\n\n// overrides JNI on load\njint JNI_OnLoad(JavaVM *vm, void *reserved) {\n  GlobalJvm() = vm;\n  return JNI_VERSION_1_6;\n}\n\nnamespace {\ntemplate <typename T>\nusing Deleter = std::function<void(T *)>;\n}  // anonymous namespace\n\nXGB_EXTERN_C int XGBoost4jCallbackDataIterNext(\n    DataIterHandle data_handle,\n    XGBCallbackSetData* set_function,\n    DataHolderHandle set_function_handle) {\n  jobject jiter = static_cast<jobject>(data_handle);\n  JNIEnv* jenv;\n  int jni_status = GlobalJvm()->GetEnv((void **)&jenv, JNI_VERSION_1_6);\n  if (jni_status == JNI_EDETACHED) {\n    GlobalJvm()->AttachCurrentThread(reinterpret_cast<void **>(&jenv), nullptr);\n  } else {\n    CHECK(jni_status == JNI_OK);\n  }\n  try {\n    jclass iterClass = jenv->FindClass(\"java/util/Iterator\");\n    jmethodID hasNext = jenv->GetMethodID(iterClass,\n                                          \"hasNext\", \"()Z\");\n    jmethodID next = jenv->GetMethodID(iterClass,\n                                       \"next\", \"()Ljava/lang/Object;\");\n    int ret_value;\n    if (jenv->CallBooleanMethod(jiter, hasNext)) {\n      ret_value = 1;\n      jobject batch = jenv->CallObjectMethod(jiter, next);\n      if (batch == nullptr) {\n        CHECK(jenv->ExceptionOccurred());\n        jenv->ExceptionDescribe();\n        return -1;\n      }\n\n      jclass batchClass = jenv->GetObjectClass(batch);\n      jlongArray joffset = (jlongArray)jenv->GetObjectField(\n          batch, jenv->GetFieldID(batchClass, \"rowOffset\", \"[J\"));\n      jfloatArray jlabel = (jfloatArray)jenv->GetObjectField(\n          batch, jenv->GetFieldID(batchClass, \"label\", \"[F\"));\n      jfloatArray jweight = (jfloatArray)jenv->GetObjectField(\n          batch, jenv->GetFieldID(batchClass, \"weight\", \"[F\"));\n      jintArray jindex = (jintArray)jenv->GetObjectField(\n          batch, jenv->GetFieldID(batchClass, \"featureIndex\", \"[I\"));\n      jfloatArray jvalue = (jfloatArray)jenv->GetObjectField(\n          batch, jenv->GetFieldID(batchClass, \"featureValue\", \"[F\"));\n      jint jcols = jenv->GetIntField(\n          batch, jenv->GetFieldID(batchClass, \"featureCols\", \"I\"));\n\n      std::unique_ptr<XGBoostBatchCSR, Deleter<XGBoostBatchCSR>> cbatch{\n          [&] {\n            auto ptr = new XGBoostBatchCSR;\n            auto &cbatch = *ptr;\n\n            // Init\n            cbatch.size = jenv->GetArrayLength(joffset) - 1;\n            cbatch.columns = jcols;\n            cbatch.offset = reinterpret_cast<jlong *>(jenv->GetLongArrayElements(joffset, nullptr));\n\n            if (jlabel != nullptr) {\n              cbatch.label = jenv->GetFloatArrayElements(jlabel, nullptr);\n              CHECK_EQ(jenv->GetArrayLength(jlabel), static_cast<long>(cbatch.size))\n                  << \"batch.label.length must equal batch.numRows()\";\n            } else {\n              cbatch.label = nullptr;\n            }\n\n            if (jweight != nullptr) {\n              cbatch.weight = jenv->GetFloatArrayElements(jweight, nullptr);\n              CHECK_EQ(jenv->GetArrayLength(jweight), static_cast<long>(cbatch.size))\n                  << \"batch.weight.length must equal batch.numRows()\";\n            } else {\n              cbatch.weight = nullptr;\n            }\n\n            auto max_elem = cbatch.offset[cbatch.size];\n            cbatch.index = (int *)jenv->GetIntArrayElements(jindex, nullptr);\n            cbatch.value = jenv->GetFloatArrayElements(jvalue, nullptr);\n            CHECK_EQ(jenv->GetArrayLength(jindex), max_elem)\n                << \"batch.index.length must equal batch.offset.back()\";\n            CHECK_EQ(jenv->GetArrayLength(jvalue), max_elem)\n                << \"batch.index.length must equal batch.offset.back()\";\n            return ptr;\n          }(),\n          [&](XGBoostBatchCSR *ptr) {\n            auto &cbatch = *ptr;\n            jenv->ReleaseLongArrayElements(joffset, reinterpret_cast<jlong *>(cbatch.offset), 0);\n            jenv->DeleteLocalRef(joffset);\n\n            if (jlabel) {\n              jenv->ReleaseFloatArrayElements(jlabel, cbatch.label, 0);\n              jenv->DeleteLocalRef(jlabel);\n            }\n            if (jweight) {\n              jenv->ReleaseFloatArrayElements(jweight, cbatch.weight, 0);\n              jenv->DeleteLocalRef(jweight);\n            }\n\n            jenv->ReleaseIntArrayElements(jindex, (jint *)cbatch.index, 0);\n            jenv->DeleteLocalRef(jindex);\n\n            jenv->ReleaseFloatArrayElements(jvalue, cbatch.value, 0);\n            jenv->DeleteLocalRef(jvalue);\n\n            delete ptr;\n          }};\n\n      CHECK_EQ((*set_function)(set_function_handle, *cbatch), 0) << XGBGetLastError();\n\n      jenv->DeleteLocalRef(batch);\n      jenv->DeleteLocalRef(batchClass);\n\n      ret_value = 1;\n    } else {\n      ret_value = 0;\n    }\n    jenv->DeleteLocalRef(iterClass);\n    // only detach if it is a async call.\n    if (jni_status == JNI_EDETACHED) {\n      GlobalJvm()->DetachCurrentThread();\n    }\n    return ret_value;\n  } catch(dmlc::Error const& e) {\n    // only detach if it is a async call.\n    if (jni_status == JNI_EDETACHED) {\n      GlobalJvm()->DetachCurrentThread();\n    }\n    LOG(FATAL) << e.what();\n    return -1;\n  }\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBGetLastError\n * Signature: ()Ljava/lang/String;\n */\nJNIEXPORT jstring JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBGetLastError\n  (JNIEnv *jenv, jclass jcls) {\n  jstring jresult = 0;\n  const char* result = XGBGetLastError();\n  if (result) {\n    jresult = jenv->NewStringUTF(result);\n  }\n  return jresult;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixCreateFromDataIter\n * Signature: (Ljava/util/Iterator;Ljava/lang/String;[J)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromDataIter\n  (JNIEnv *jenv, jclass jcls, jobject jiter, jstring jcache_info, jfloat jmissing, jlongArray jout) {\n  DMatrixHandle result;\n  std::unique_ptr<char const, Deleter<char const>> cache_info;\n  if (jcache_info != nullptr) {\n    cache_info = {jenv->GetStringUTFChars(jcache_info, nullptr), [&](char const *ptr) {\n                    jenv->ReleaseStringUTFChars(jcache_info, ptr);\n                  }};\n  }\n  auto missing = static_cast<float>(jmissing);\n  int ret =\n      XGDMatrixCreateFromDataIter(jiter, XGBoost4jCallbackDataIterNext, cache_info.get(),\n                                  missing,&result);\n  JVM_CHECK_CALL(ret);\n  setHandle(jenv, jout, result);\n  return ret;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixCreateFromFile\n * Signature: (Ljava/lang/String;I[J)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromFile(\n    JNIEnv *jenv, jclass jcls, jstring jfname, jint jsilent, jlongArray jout) {\n  std::unique_ptr<char const, Deleter<char const>> fname{jenv->GetStringUTFChars(jfname, nullptr),\n                                                         [&](char const *ptr) {\n                                                           jenv->ReleaseStringUTFChars(jfname, ptr);\n                                                         }};\n  DMatrixHandle result;\n  int ret = XGDMatrixCreateFromFile(fname.get(), jsilent, &result);\n  JVM_CHECK_CALL(ret);\n  setHandle(jenv, jout, result);\n  return ret;\n}\n\nnamespace {\nusing JavaIndT =\n    std::conditional_t<std::is_convertible<jint *, std::int32_t *>::value, std::int32_t, long>;\n/**\n * \\brief Create from sparse matrix.\n *\n * \\param maker Indirect call to XGBoost C function for creating CSC and CSR.\n *\n * \\return Status\n */\ntemplate <typename Fn>\njint MakeJVMSparseInput(JNIEnv *jenv, jlongArray jindptr, jintArray jindices, jfloatArray jdata,\n                        jfloat jmissing, jint jnthread, Fn &&maker, jlongArray jout) {\n  DMatrixHandle result;\n\n  std::unique_ptr<jlong, Deleter<jlong>> indptr{jenv->GetLongArrayElements(jindptr, nullptr),\n                                                [&](jlong *ptr) {\n                                                  jenv->ReleaseLongArrayElements(jindptr, ptr, 0);\n                                                }};\n  std::unique_ptr<jint, Deleter<jint>> indices{jenv->GetIntArrayElements(jindices, nullptr),\n                                               [&](jint *ptr) {\n                                                 jenv->ReleaseIntArrayElements(jindices, ptr, 0);\n                                               }};\n  std::unique_ptr<jfloat, Deleter<jfloat>> data{jenv->GetFloatArrayElements(jdata, nullptr),\n                                                [&](jfloat *ptr) {\n                                                  jenv->ReleaseFloatArrayElements(jdata, ptr, 0);\n                                                }};\n\n  bst_ulong nindptr = static_cast<bst_ulong>(jenv->GetArrayLength(jindptr));\n  bst_ulong nelem = static_cast<bst_ulong>(jenv->GetArrayLength(jdata));\n\n  std::string sindptr, sindices, sdata;\n  CHECK_EQ(indptr.get()[nindptr - 1], nelem);\n  using IndPtrT = std::conditional_t<std::is_convertible<jlong *, long *>::value, long, long long>;\n  xgboost::detail::MakeSparseFromPtr(\n      static_cast<IndPtrT const *>(indptr.get()), static_cast<JavaIndT const *>(indices.get()),\n      static_cast<float const *>(data.get()), nindptr, &sindptr, &sindices, &sdata);\n\n  xgboost::Json jconfig{xgboost::Object{}};\n  auto missing = static_cast<float>(jmissing);\n  auto n_threads = static_cast<std::int32_t>(jnthread);\n  // Construct configuration\n  jconfig[\"nthread\"] = xgboost::Integer{n_threads};\n  jconfig[\"missing\"] = xgboost::Number{missing};\n  std::string config;\n  xgboost::Json::Dump(jconfig, &config);\n\n  jint ret = maker(sindptr.c_str(), sindices.c_str(), sdata.c_str(), config.c_str(), &result);\n  JVM_CHECK_CALL(ret);\n  setHandle(jenv, jout, result);\n  return ret;\n}\n}  // anonymous namespace\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixCreateFromCSR\n * Signature: ([J[I[FIFI[J)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromCSR(\n    JNIEnv *jenv, jclass jcls, jlongArray jindptr, jintArray jindices, jfloatArray jdata, jint jcol,\n    jfloat jmissing, jint jnthread, jlongArray jout) {\n  using CSTR = char const *;\n  return MakeJVMSparseInput(\n      jenv, jindptr, jindices, jdata, jmissing, jnthread,\n      [&](CSTR sindptr, CSTR sindices, CSTR sdata, CSTR sconfig, DMatrixHandle *result) {\n        return XGDMatrixCreateFromCSR(sindptr, sindices, sdata, static_cast<std::int32_t>(jcol),\n                                      sconfig, result);\n      },\n      jout);\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixCreateFromCSC\n * Signature: ([J[I[FIFI[J)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromCSC(\n    JNIEnv *jenv, jclass jcls, jlongArray jindptr, jintArray jindices, jfloatArray jdata, jint jrow,\n    jfloat jmissing, jint jnthread, jlongArray jout) {\n  using CSTR = char const *;\n  return MakeJVMSparseInput(\n      jenv, jindptr, jindices, jdata, jmissing, jnthread,\n      [&](CSTR sindptr, CSTR sindices, CSTR sdata, CSTR sconfig, DMatrixHandle *result) {\n        return XGDMatrixCreateFromCSC(sindptr, sindices, sdata, static_cast<bst_ulong>(jrow),\n                                      sconfig, result);\n      },\n      jout);\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixCreateFromMatRef\n * Signature: (JIIF)J\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromMatRef\n  (JNIEnv *jenv, jclass jcls, jlong jdataRef, jint jnrow, jint jncol, jfloat jmiss, jlongArray jout) {\n  DMatrixHandle result;\n  bst_ulong nrow = (bst_ulong)jnrow;\n  bst_ulong ncol = (bst_ulong)jncol;\n  jint ret = (jint) XGDMatrixCreateFromMat((float const *)jdataRef, nrow, ncol, jmiss, &result);\n  JVM_CHECK_CALL(ret);\n  setHandle(jenv, jout, result);\n  return ret;\n}\n\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixCreateFromMat\n * Signature: ([FIIF)J\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromMat\n  (JNIEnv *jenv, jclass jcls, jfloatArray jdata, jint jnrow, jint jncol, jfloat jmiss, jlongArray jout) {\n  DMatrixHandle result;\n  std::unique_ptr<jfloat, Deleter<jfloat>> data{jenv->GetFloatArrayElements(jdata, 0), [&](jfloat* ptr) {\n    jenv->ReleaseFloatArrayElements(jdata, ptr, 0);\n  }};\n\n  bst_ulong nrow = (bst_ulong)jnrow;\n  bst_ulong ncol = (bst_ulong)jncol;\n  jint ret =\n      XGDMatrixCreateFromMat(static_cast<float const *>(data.get()), nrow, ncol, jmiss, &result);\n  JVM_CHECK_CALL(ret);\n  setHandle(jenv, jout, result);\n  return ret;\n}\n\nnamespace {\n// Workaround int is not the same as jint. For some reason, if constexpr couldn't dispatch\n// the following.\ntemplate <typename T>\nauto SliceDMatrixWinWar(DMatrixHandle handle, T *ptr, std::size_t len, DMatrixHandle *result) {\n  // default to not allowing slicing with group ID specified -- feel free to add if necessary\n  return XGDMatrixSliceDMatrixEx(handle, ptr, len, result, 0);\n}\n\ntemplate <>\nauto SliceDMatrixWinWar<long>(DMatrixHandle handle, long *ptr, std::size_t len, DMatrixHandle *result) {\n  std::vector<std::int32_t> copy(len);\n  std::copy_n(ptr, len, copy.begin());\n  // default to not allowing slicing with group ID specified -- feel free to add if necessary\n  return XGDMatrixSliceDMatrixEx(handle, copy.data(), len, result, 0);\n}\n}  // namespace\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixSliceDMatrix\n * Signature: (J[I)J\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixSliceDMatrix(\n    JNIEnv *jenv, jclass jcls, jlong jhandle, jintArray jindexset, jlongArray jout) {\n  DMatrixHandle result;\n  auto handle = reinterpret_cast<DMatrixHandle>(jhandle);\n\n  std::unique_ptr<jint, Deleter<jint>> indexset{jenv->GetIntArrayElements(jindexset, nullptr),\n                                                [&](jint *ptr) {\n                                                  jenv->ReleaseIntArrayElements(jindexset, ptr, 0);\n                                                }};\n  auto len = static_cast<bst_ulong>(jenv->GetArrayLength(jindexset));\n  auto ret = SliceDMatrixWinWar(handle, indexset.get(), len, &result);\n  JVM_CHECK_CALL(ret);\n  setHandle(jenv, jout, result);\n  return ret;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixFree\n * Signature: (J)V\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixFree\n  (JNIEnv *jenv, jclass jcls, jlong jhandle) {\n  DMatrixHandle handle = (DMatrixHandle) jhandle;\n  int ret = XGDMatrixFree(handle);\n  return ret;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixSaveBinary\n * Signature: (JLjava/lang/String;I)V\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixSaveBinary(\n    JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfname, jint jsilent) {\n  DMatrixHandle handle = reinterpret_cast<DMatrixHandle>(jhandle);\n  std::unique_ptr<char const, Deleter<char const>> fname{\n      jenv->GetStringUTFChars(jfname, nullptr), [&](char const *ptr) {\n        if (ptr) {\n          jenv->ReleaseStringUTFChars(jfname, ptr);\n        }\n      }};\n  int ret = XGDMatrixSaveBinary(handle, fname.get(), jsilent);\n  JVM_CHECK_CALL(ret);\n  return ret;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixSetFloatInfo\n * Signature: (JLjava/lang/String;[F)V\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixSetFloatInfo(\n    JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfield, jfloatArray jarray) {\n  auto handle = reinterpret_cast<DMatrixHandle>(jhandle);\n  std::unique_ptr<char const, Deleter<char const>> field{\n      jenv->GetStringUTFChars(jfield, nullptr), [&](char const *ptr) {\n        if (ptr) {\n          jenv->ReleaseStringUTFChars(jfield, ptr);\n        }\n      }};\n  std::unique_ptr<jfloat, Deleter<jfloat>> array{jenv->GetFloatArrayElements(jarray, nullptr),\n                                                 [&](jfloat *ptr) {\n                                                   jenv->ReleaseFloatArrayElements(jarray, ptr, 0);\n                                                 }};\n\n  bst_ulong len = (bst_ulong)jenv->GetArrayLength(jarray);\n  auto str = xgboost::linalg::Make1dInterface(array.get(), len);\n  return XGDMatrixSetInfoFromInterface(handle, field.get(), str.c_str());\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixSetUIntInfo\n * Signature: (JLjava/lang/String;[I)V\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixSetUIntInfo\n  (JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfield, jintArray jarray) {\n  auto handle = reinterpret_cast<DMatrixHandle>(jhandle);\n  std::unique_ptr<char const, Deleter<char const>> field{\n      jenv->GetStringUTFChars(jfield, nullptr), [&](char const *ptr) {\n        if (ptr) {\n          jenv->ReleaseStringUTFChars(jfield, ptr);\n        }\n      }};\n  std::unique_ptr<jint, Deleter<jint>> array{jenv->GetIntArrayElements(jarray, nullptr),\n                                             [&](jint *ptr) {\n                                               jenv->ReleaseIntArrayElements(jarray, ptr, 0);\n                                             }};\n  bst_ulong len = (bst_ulong)jenv->GetArrayLength(jarray);\n  auto str = xgboost::linalg::Make1dInterface(array.get(), len);\n  return XGDMatrixSetInfoFromInterface(handle, field.get(), str.c_str());\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixGetFloatInfo\n * Signature: (JLjava/lang/String;)[F\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixGetFloatInfo\n  (JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfield, jobjectArray jout) {\n  auto handle = reinterpret_cast<DMatrixHandle>(jhandle);\n  std::unique_ptr<char const, Deleter<char const>> field{\n      jenv->GetStringUTFChars(jfield, nullptr), [&](char const *ptr) {\n        if (ptr) {\n          jenv->ReleaseStringUTFChars(jfield, ptr);\n        }\n      }};\n  bst_ulong len;\n  float *result;\n  int ret = XGDMatrixGetFloatInfo(handle, field.get(), &len, (const float**) &result);\n  JVM_CHECK_CALL(ret);\n\n  jsize jlen = (jsize) len;\n  jfloatArray jarray = jenv->NewFloatArray(jlen);\n  jenv->SetFloatArrayRegion(jarray, 0, jlen, (jfloat *) result);\n  jenv->SetObjectArrayElement(jout, 0, (jobject) jarray);\n\n  return ret;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixGetUIntInfo\n * Signature: (JLjava/lang/String;)[I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixGetUIntInfo\n  (JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfield, jobjectArray jout) {\n  auto handle = reinterpret_cast<DMatrixHandle>(jhandle);\n  std::unique_ptr<char const, Deleter<char const>> field{\n      jenv->GetStringUTFChars(jfield, nullptr), [&](char const *ptr) {\n        if (ptr) {\n          jenv->ReleaseStringUTFChars(jfield, ptr);\n        }\n      }};\n  bst_ulong len;\n  unsigned int *result;\n  int ret = (jint)XGDMatrixGetUIntInfo(handle, field.get(), &len, (const unsigned int **)&result);\n  JVM_CHECK_CALL(ret);\n\n  jsize jlen = (jsize) len;\n  jintArray jarray = jenv->NewIntArray(jlen);\n  jenv->SetIntArrayRegion(jarray, 0, jlen, (jint *) result);\n  jenv->SetObjectArrayElement(jout, 0, jarray);\n  return ret;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixNumRow\n * Signature: (J)J\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixNumRow\n  (JNIEnv *jenv, jclass jcls, jlong jhandle, jlongArray jout) {\n  auto handle = reinterpret_cast<DMatrixHandle>(jhandle);\n  bst_ulong result[1];\n  int ret = (jint) XGDMatrixNumRow(handle, result);\n  JVM_CHECK_CALL(ret);\n  jenv->SetLongArrayRegion(jout, 0, 1, (const jlong *) result);\n  return ret;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixNumNonMissing\n * Signature: (J[J)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixNumNonMissing(\n    JNIEnv *jenv, jclass, jlong jhandle, jlongArray jout) {\n  DMatrixHandle handle = reinterpret_cast<DMatrixHandle>(jhandle);\n  CHECK(handle);\n  bst_ulong result[1];\n  auto ret = static_cast<jint>(XGDMatrixNumNonMissing(handle, result));\n  jlong jresult[1]{static_cast<jlong>(result[0])};\n  jenv->SetLongArrayRegion(jout, 0, 1, jresult);\n  JVM_CHECK_CALL(ret);\n  return ret;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterCreate\n * Signature: ([J)J\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterCreate\n  (JNIEnv *jenv, jclass jcls, jlongArray jhandles, jlongArray jout) {\n  std::vector<DMatrixHandle> handles;\n  if (jhandles != nullptr) {\n    size_t len = jenv->GetArrayLength(jhandles);\n    std::unique_ptr<jlong, Deleter<jlong>> cjhandles{\n        jenv->GetLongArrayElements(jhandles, nullptr), [&](jlong *ptr) {\n          jenv->ReleaseLongArrayElements(jhandles, ptr, 0);\n        }};\n    for (size_t i = 0; i < len; ++i) {\n      handles.push_back(reinterpret_cast<DMatrixHandle>(cjhandles.get()[i]));\n    }\n  }\n  BoosterHandle result;\n  int ret = XGBoosterCreate(dmlc::BeginPtr(handles), handles.size(), &result);\n  JVM_CHECK_CALL(ret);\n  setHandle(jenv, jout, result);\n  return ret;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterFree\n * Signature: (J)V\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterFree(JNIEnv *jenv,\n                                                                            jclass jcls,\n                                                                            jlong jhandle) {\n  auto handle = reinterpret_cast<BoosterHandle>(jhandle);\n  return XGBoosterFree(handle);\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterSetParam\n * Signature: (JLjava/lang/String;Ljava/lang/String;)V\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterSetParam(\n    JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jname, jstring jvalue) {\n  auto handle = reinterpret_cast<BoosterHandle>(jhandle);\n  std::unique_ptr<char const, Deleter<char const>> name{jenv->GetStringUTFChars(jname, nullptr),\n                                                        [&](char const *ptr) {\n                                                          if (ptr) {\n                                                            jenv->ReleaseStringUTFChars(jname, ptr);\n                                                          }\n                                                        }};\n  std::unique_ptr<char const, Deleter<char const>> value{\n      jenv->GetStringUTFChars(jvalue, nullptr), [&](char const *ptr) {\n        if (ptr) {\n          jenv->ReleaseStringUTFChars(jvalue, ptr);\n        }\n      }};\n  int ret = XGBoosterSetParam(handle, name.get(), value.get());\n  JVM_CHECK_CALL(ret);\n  return ret;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterUpdateOneIter\n * Signature: (JIJ)V\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterUpdateOneIter\n  (JNIEnv *jenv, jclass jcls, jlong jhandle, jint jiter, jlong jdtrain) {\n  auto handle = reinterpret_cast<BoosterHandle>(jhandle);\n  auto dtrain = reinterpret_cast<DMatrixHandle>(jdtrain);\n  return XGBoosterUpdateOneIter(handle, jiter, dtrain);\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterTrainOneIter\n * Signature: (JJI[F[F)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterTrainOneIter(\n    JNIEnv *jenv, jclass jcls, jlong jhandle, jlong jdtrain, jint jiter, jfloatArray jgrad,\n    jfloatArray jhess) {\n  API_BEGIN();\n  auto handle = reinterpret_cast<BoosterHandle *>(jhandle);\n  auto dtrain = reinterpret_cast<DMatrixHandle *>(jdtrain);\n  CHECK(handle);\n  CHECK(dtrain);\n  bst_ulong n_samples{0};\n  JVM_CHECK_CALL(XGDMatrixNumRow(dtrain, &n_samples));\n\n  bst_ulong len = static_cast<bst_ulong>(jenv->GetArrayLength(jgrad));\n  std::unique_ptr<jfloat, Deleter<jfloat>> grad{jenv->GetFloatArrayElements(jgrad, nullptr),\n                                                [&](jfloat *ptr) {\n                                                  jenv->ReleaseFloatArrayElements(jgrad, ptr, 0);\n                                                }};\n  std::unique_ptr<jfloat, Deleter<jfloat>> hess{jenv->GetFloatArrayElements(jhess, nullptr),\n                                                [&](jfloat *ptr) {\n                                                  jenv->ReleaseFloatArrayElements(jhess, ptr, 0);\n                                                }};\n  CHECK(grad);\n  CHECK(hess);\n\n  xgboost::bst_target_t n_targets{1};\n  if (len != n_samples && n_samples != 0) {\n    CHECK_EQ(len % n_samples, 0) << \"Invalid size of gradient.\";\n    n_targets = len / n_samples;\n  }\n\n  auto ctx = xgboost::detail::BoosterCtx(handle);\n  auto [s_grad, s_hess] = xgboost::detail::MakeGradientInterface(\n      ctx, grad.get(), hess.get(), xgboost::linalg::kC, n_samples, n_targets);\n  return XGBoosterTrainOneIter(handle, dtrain, static_cast<std::int32_t>(jiter), s_grad.c_str(),\n                               s_hess.c_str());\n  API_END();\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterEvalOneIter\n * Signature: (JI[J[Ljava/lang/String;)Ljava/lang/String;\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterEvalOneIter\n  (JNIEnv *jenv, jclass jcls, jlong jhandle, jint jiter, jlongArray jdmats, jobjectArray jevnames, jobjectArray jout) {\n  auto handle = reinterpret_cast<BoosterHandle>(jhandle);\n  std::vector<DMatrixHandle> dmats;\n  std::vector<std::string> evnames;\n  std::vector<const char*> evchars;\n\n  size_t len =  static_cast<size_t>(jenv->GetArrayLength(jdmats));\n  // put handle from jhandles to chandles\n  std::unique_ptr<jlong, Deleter<jlong>> cjdmats{\n      jenv->GetLongArrayElements(jdmats, nullptr), [&](jlong *ptr) {\n        jenv->ReleaseLongArrayElements(jdmats, ptr, 0);\n      }};\n  for (size_t i = 0; i < len; ++i) {\n    dmats.push_back(reinterpret_cast<DMatrixHandle>(cjdmats.get()[i]));\n    jstring jevname = (jstring)jenv->GetObjectArrayElement(jevnames, i);\n    std::unique_ptr<char const, Deleter<char const>> s{jenv->GetStringUTFChars(jevname, nullptr),\n                                                       [&](char const *ptr) {\n                                                         jenv->ReleaseStringUTFChars(jevname, ptr);\n                                                       }};\n    evnames.emplace_back(s.get(), jenv->GetStringLength(jevname));\n  }\n\n  for (size_t i = 0; i < len; ++i) {\n    evchars.push_back(evnames[i].c_str());\n  }\n  const char *result;\n  int ret = XGBoosterEvalOneIter(handle, jiter, dmlc::BeginPtr(dmats), dmlc::BeginPtr(evchars), len,\n                                 &result);\n  JVM_CHECK_CALL(ret);\n  jstring jinfo = nullptr;\n  if (result != nullptr) {\n    jinfo = jenv->NewStringUTF(result);\n  }\n  jenv->SetObjectArrayElement(jout, 0, jinfo);\n  return ret;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterPredict\n * Signature: (JJIJ)[F\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterPredict\n  (JNIEnv *jenv, jclass jcls, jlong jhandle, jlong jdmat, jint joption_mask, jint jntree_limit, jobjectArray jout) {\n  auto handle = reinterpret_cast<BoosterHandle>(jhandle);\n  auto dmat = reinterpret_cast<DMatrixHandle>(jdmat);\n  bst_ulong len;\n  float *result;\n  int ret = XGBoosterPredict(handle, dmat, joption_mask, (unsigned int) jntree_limit,\n                             /* training = */ 0,  // Currently this parameter is not supported by JVM\n                             &len, (const float **) &result);\n  JVM_CHECK_CALL(ret);\n  if (len) {\n    jsize jlen = (jsize) len;\n    jfloatArray jarray = jenv->NewFloatArray(jlen);\n    jenv->SetFloatArrayRegion(jarray, 0, jlen, (jfloat *) result);\n    jenv->SetObjectArrayElement(jout, 0, jarray);\n  }\n  return ret;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterPredictFromDense\n * Signature: (J[FJJFIII[F[[F)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterPredictFromDense(\n    JNIEnv *jenv, jclass jcls, jlong jhandle, jfloatArray jdata, jlong num_rows, jlong num_features,\n    jfloat missing, jint iteration_begin, jint iteration_end, jint predict_type,\n    jfloatArray jmargin, jobjectArray jout) {\n  API_BEGIN();\n  auto handle = reinterpret_cast<BoosterHandle>(jhandle);\n\n  /**\n   * Create array interface.\n   */\n  namespace linalg = xgboost::linalg;\n  jfloat *data = jenv->GetFloatArrayElements(jdata, nullptr);\n  xgboost::Context ctx;\n  auto t_data = linalg::MakeTensorView(\n      ctx.Device(),\n      xgboost::common::Span{data, static_cast<std::size_t>(num_rows * num_features)}, num_rows,\n      num_features);\n  auto s_array = linalg::ArrayInterfaceStr(t_data);\n\n  /**\n   * Create configuration object.\n   */\n  xgboost::Json config{xgboost::Object{}};\n  config[\"cache_id\"] = xgboost::Integer{};\n  config[\"type\"] = xgboost::Integer{static_cast<std::int32_t>(predict_type)};\n  config[\"iteration_begin\"] = xgboost::Integer{static_cast<xgboost::bst_layer_t>(iteration_begin)};\n  config[\"iteration_end\"] = xgboost::Integer{static_cast<xgboost::bst_layer_t>(iteration_end)};\n  config[\"missing\"] = xgboost::Number{static_cast<float>(missing)};\n  config[\"strict_shape\"] = xgboost::Boolean{true};\n  std::string s_config;\n  xgboost::Json::Dump(config, &s_config);\n\n  /**\n   * Handle base margin\n   */\n  BoosterHandle proxy{nullptr};\n\n  float *margin{nullptr};\n  if (jmargin) {\n    margin = jenv->GetFloatArrayElements(jmargin, nullptr);\n    JVM_CHECK_CALL(XGProxyDMatrixCreate(&proxy));\n    auto str = xgboost::linalg::Make1dInterface(margin, jenv->GetArrayLength(jmargin));\n    JVM_CHECK_CALL(XGDMatrixSetInfoFromInterface(proxy, \"base_margin\", str.c_str()));\n  }\n\n  bst_ulong const *out_shape;\n  bst_ulong out_dim;\n  float const *result;\n  auto ret = XGBoosterPredictFromDense(handle, s_array.c_str(), s_config.c_str(), proxy, &out_shape,\n                                       &out_dim, &result);\n\n  jenv->ReleaseFloatArrayElements(jdata, data, 0);\n  if (proxy) {\n    XGDMatrixFree(proxy);\n    jenv->ReleaseFloatArrayElements(jmargin, margin, 0);\n  }\n\n  if (ret != 0) {\n    return ret;\n  }\n\n  std::size_t n{1};\n  for (std::size_t i = 0; i < out_dim; ++i) {\n    n *= out_shape[i];\n  }\n\n  jfloatArray jarray = jenv->NewFloatArray(n);\n\n  jenv->SetFloatArrayRegion(jarray, 0, n, result);\n  jenv->SetObjectArrayElement(jout, 0, jarray);\n\n  API_END();\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterLoadModel\n * Signature: (JLjava/lang/String;)V\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterLoadModel(JNIEnv *jenv,\n                                                                                 jclass jcls,\n                                                                                 jlong jhandle,\n                                                                                 jstring jfname) {\n  auto handle = reinterpret_cast<BoosterHandle>(jhandle);\n  std::unique_ptr<char const, Deleter<char const>> fname{jenv->GetStringUTFChars(jfname, nullptr),\n                                                         [&](char const *ptr) {\n                                                           jenv->ReleaseStringUTFChars(jfname, ptr);\n                                                         }};\n  return XGBoosterLoadModel(handle, fname.get());\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterSaveModel\n * Signature: (JLjava/lang/String;)V\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterSaveModel(JNIEnv *jenv,\n                                                                                 jclass jcls,\n                                                                                 jlong jhandle,\n                                                                                 jstring jfname) {\n  auto handle = reinterpret_cast<BoosterHandle>(jhandle);\n  std::unique_ptr<char const, Deleter<char const>> fname{\n      jenv->GetStringUTFChars(jfname, nullptr), [&](char const *ptr) {\n        if (ptr) {\n          jenv->ReleaseStringUTFChars(jfname, ptr);\n        }\n      }};\n  return XGBoosterSaveModel(handle, fname.get());\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterLoadModelFromBuffer\n * Signature: (J[B)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterLoadModelFromBuffer(\n    JNIEnv *jenv, jclass jcls, jlong jhandle, jbyteArray jbytes) {\n  auto handle = reinterpret_cast<BoosterHandle>(jhandle);\n  std::unique_ptr<jbyte, Deleter<jbyte>> buffer{jenv->GetByteArrayElements(jbytes, nullptr),\n                                                [&](jbyte *ptr) {\n                                                  jenv->ReleaseByteArrayElements(jbytes, ptr, 0);\n                                                }};\n  return XGBoosterLoadModelFromBuffer(handle, buffer.get(), jenv->GetArrayLength(jbytes));\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterSaveModelToBuffer\n * Signature: (JLjava/lang/String;[[B)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterSaveModelToBuffer\n  (JNIEnv * jenv, jclass jcls, jlong jhandle, jstring jformat, jobjectArray jout) {\n  auto handle = reinterpret_cast<BoosterHandle>(jhandle);\n  std::unique_ptr<char const, Deleter<char const>> format{\n      jenv->GetStringUTFChars(jformat, nullptr), [&](char const *ptr) {\n        if (ptr) {\n          jenv->ReleaseStringUTFChars(jformat, ptr);\n        }\n      }};\n  bst_ulong len = 0;\n  const char *result{nullptr};\n  xgboost::Json config{xgboost::Object{}};\n  config[\"format\"] = std::string{format.get()};\n  std::string config_str;\n  xgboost::Json::Dump(config, &config_str);\n\n  int ret = XGBoosterSaveModelToBuffer(handle, config_str.c_str(), &len, &result);\n  JVM_CHECK_CALL(ret);\n  if (result) {\n    jbyteArray jarray = jenv->NewByteArray(len);\n    jenv->SetByteArrayRegion(jarray, 0, len, (jbyte *)result);\n    jenv->SetObjectArrayElement(jout, 0, jarray);\n  }\n  return ret;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterDumpModelEx\n * Signature: (JLjava/lang/String;ILjava/lang/String;[[Ljava/lang/String;)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterDumpModelEx\n  (JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfmap, jint jwith_stats, jstring jformat, jobjectArray jout) {\n  auto handle = reinterpret_cast<BoosterHandle>(jhandle);\n  std::unique_ptr<char const, Deleter<char const>> fmap{jenv->GetStringUTFChars(jfmap, nullptr),\n                                                        [&](char const *ptr) {\n                                                          if (ptr) {\n                                                            jenv->ReleaseStringUTFChars(jfmap, ptr);\n                                                          }\n                                                        }};\n  std::unique_ptr<char const, Deleter<char const>> format{\n      jenv->GetStringUTFChars(jformat, nullptr), [&](char const *ptr) {\n        if (ptr) {\n          jenv->ReleaseStringUTFChars(jformat, ptr);\n        }\n      }};\n  bst_ulong len = 0;\n  char const **result;\n\n  int ret = XGBoosterDumpModelEx(handle, fmap.get(), jwith_stats, format.get(), &len, &result);\n  JVM_CHECK_CALL(ret);\n\n  jsize jlen = (jsize) len;\n  jobjectArray jinfos = jenv->NewObjectArray(jlen, jenv->FindClass(\"java/lang/String\"), jenv->NewStringUTF(\"\"));\n  for(int i=0 ; i<jlen; i++) {\n    jenv->SetObjectArrayElement(jinfos, i, jenv->NewStringUTF((const char*) result[i]));\n  }\n  jenv->SetObjectArrayElement(jout, 0, jinfos);\n\n  return ret;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterDumpModelExWithFeatures\n * Signature: (J[Ljava/lang/String;ILjava/lang/String;[[Ljava/lang/String;)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterDumpModelExWithFeatures\n  (JNIEnv *jenv, jclass jcls, jlong jhandle, jobjectArray jfeature_names, jint jwith_stats,\n    jstring jformat, jobjectArray jout) {\n  auto handle = reinterpret_cast<BoosterHandle>(jhandle);\n  bst_ulong feature_num = (bst_ulong)jenv->GetArrayLength(jfeature_names);\n\n  std::vector<std::string> feature_names;\n  std::vector<char const*> feature_names_char;\n\n  std::string feature_type_q = \"q\";\n  std::vector<char const *> feature_types_char;\n\n  for (bst_ulong i = 0; i < feature_num; ++i) {\n    jstring jfeature_name = (jstring)jenv->GetObjectArrayElement(jfeature_names, i);\n    std::unique_ptr<char const, Deleter<char const>> s{\n        jenv->GetStringUTFChars(jfeature_name, nullptr), [&](char const *ptr) {\n          if (ptr != nullptr) {\n            jenv->ReleaseStringUTFChars(jfeature_name, ptr);\n          }\n        }};\n    feature_names.emplace_back(s.get(), jenv->GetStringLength(jfeature_name));\n\n    if (feature_names.back().length() == 0) {\n      feature_names.pop_back();\n    }\n  }\n\n  for (size_t i = 0; i < feature_names.size(); ++i) {\n    feature_names_char.push_back(feature_names[i].c_str());\n    feature_types_char.push_back(feature_type_q.c_str());\n  }\n\n  std::unique_ptr<char const, Deleter<char const>> format{\n      jenv->GetStringUTFChars(jformat, nullptr), [&](char const *ptr) {\n        if (ptr) {\n          jenv->ReleaseStringUTFChars(jformat, ptr);\n        }\n      }};\n  bst_ulong len = 0;\n  char **result;\n\n  int ret = XGBoosterDumpModelExWithFeatures(\n      handle, feature_num, (const char **)dmlc::BeginPtr(feature_names_char),\n      (const char **)dmlc::BeginPtr(feature_types_char), jwith_stats, format.get(), &len,\n      (const char ***)&result);\n  JVM_CHECK_CALL(ret);\n\n  jsize jlen = (jsize) len;\n  jobjectArray jinfos = jenv->NewObjectArray(jlen, jenv->FindClass(\"java/lang/String\"), jenv->NewStringUTF(\"\"));\n  for(int i=0 ; i<jlen; i++) {\n    jenv->SetObjectArrayElement(jinfos, i, jenv->NewStringUTF((const char*) result[i]));\n  }\n  jenv->SetObjectArrayElement(jout, 0, jinfos);\n\n  return ret;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterGetAttrNames\n * Signature: (J[[Ljava/lang/String;)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterGetAttrNames\n  (JNIEnv *jenv, jclass jcls, jlong jhandle, jobjectArray jout) {\n  BoosterHandle handle = (BoosterHandle) jhandle;\n  bst_ulong len = 0;\n  char **result;\n  int ret = XGBoosterGetAttrNames(handle, &len, (const char ***) &result);\n  JVM_CHECK_CALL(ret);\n\n  jsize jlen = (jsize) len;\n  jobjectArray jinfos = jenv->NewObjectArray(jlen, jenv->FindClass(\"java/lang/String\"), jenv->NewStringUTF(\"\"));\n  for(int i=0 ; i<jlen; i++) {\n    jenv->SetObjectArrayElement(jinfos, i, jenv->NewStringUTF((const char*) result[i]));\n  }\n  jenv->SetObjectArrayElement(jout, 0, jinfos);\n\n  return ret;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterGetAttr\n * Signature: (JLjava/lang/String;[Ljava/lang/String;)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterGetAttr(\n    JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jkey, jobjectArray jout) {\n  auto handle = reinterpret_cast<BoosterHandle>(jhandle);\n  std::unique_ptr<char const, Deleter<char const>> key{jenv->GetStringUTFChars(jkey, nullptr),\n                                                       [&](char const *ptr) {\n                                                         if (ptr) {\n                                                           jenv->ReleaseStringUTFChars(jkey, ptr);\n                                                         }\n                                                       }};\n\n  const char *result;\n  int success;\n  int ret = XGBoosterGetAttr(handle, key.get(), &result, &success);\n  JVM_CHECK_CALL(ret);\n\n  if (success > 0) {\n    jstring jret = jenv->NewStringUTF(result);\n    jenv->SetObjectArrayElement(jout, 0, jret);\n  }\n\n  return ret;\n};\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterSetAttr\n * Signature: (JLjava/lang/String;Ljava/lang/String;)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterSetAttr(\n    JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jkey, jstring jvalue) {\n  auto handle = reinterpret_cast<BoosterHandle>(jhandle);\n  std::unique_ptr<char const, Deleter<char const>> key{jenv->GetStringUTFChars(jkey, nullptr),\n                                                       [&](char const *ptr) {\n                                                         if (ptr) {\n                                                           jenv->ReleaseStringUTFChars(jkey, ptr);\n                                                         }\n                                                       }};\n  std::unique_ptr<char const, Deleter<char const>> value{\n      jenv->GetStringUTFChars(jvalue, nullptr), [&](char const *ptr) {\n        if (ptr) {\n          jenv->ReleaseStringUTFChars(jvalue, ptr);\n        }\n      }};\n  return XGBoosterSetAttr(handle, key.get(), value.get());\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterGetNumFeature\n * Signature: (J[J)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterGetNumFeature(\n    JNIEnv *jenv, jclass jcls, jlong jhandle, jlongArray jout) {\n  auto handle = reinterpret_cast<BoosterHandle>(jhandle);\n  bst_ulong num_feature;\n  int ret = XGBoosterGetNumFeature(handle, &num_feature);\n  JVM_CHECK_CALL(ret);\n  jlong jnum_feature = num_feature;\n  jenv->SetLongArrayRegion(jout, 0, 1, &jnum_feature);\n  return ret;\n}\n\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterGetNumBoostedRound(\n    JNIEnv *jenv, jclass, jlong jhandle, jintArray jout) {\n  auto handle = reinterpret_cast<BoosterHandle>(jhandle);\n  std::int32_t n_rounds{0};\n  auto ret = XGBoosterBoostedRounds(handle, &n_rounds);\n  JVM_CHECK_CALL(ret);\n  jint jn_rounds = n_rounds;\n  jenv->SetIntArrayRegion(jout, 0, 1, &jn_rounds);\n  return ret;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    CommunicatorInit\n * Signature: (Ljava/lang/String;)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_CommunicatorInit(JNIEnv *jenv,\n                                                                               jclass jcls,\n                                                                               jstring jargs) {\n  xgboost::Json config{xgboost::Object{}};\n  std::unique_ptr<char const, Deleter<char const>> args{jenv->GetStringUTFChars(jargs, nullptr),\n                                                        [&](char const *ptr) {\n                                                          if (ptr) {\n                                                            jenv->ReleaseStringUTFChars(jargs, ptr);\n                                                          }\n                                                        }};\n  return XGCommunicatorInit(args.get());\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    TrackerCreate\n * Signature: (Ljava/lang/String;IIIJ[J)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_TrackerCreate(\n    JNIEnv *jenv, jclass, jstring host, jint n_workers, jint port, jint sortby, jlong timeout,\n    jlongArray jout) {\n  using namespace xgboost;  // NOLINT\n\n  TrackerHandle handle;\n  Json config{Object{}};\n  std::unique_ptr<char const, Deleter<char const>> p_shost{jenv->GetStringUTFChars(host, nullptr),\n                                                           [&](char const *ptr) {\n                                                             jenv->ReleaseStringUTFChars(host, ptr);\n                                                           }};\n  std::string shost{p_shost.get(),\n                    static_cast<std::string::size_type>(jenv->GetStringLength(host))};\n  if (!shost.empty()) {\n    config[\"host\"] = shost;\n  }\n  config[\"port\"] = Integer{static_cast<Integer::Int>(port)};\n  config[\"n_workers\"] = Integer{static_cast<Integer::Int>(n_workers)};\n  config[\"timeout\"] = Integer{static_cast<Integer::Int>(timeout)};\n  config[\"sortby\"] = Integer{static_cast<Integer::Int>(sortby)};\n  config[\"dmlc_communicator\"] = String{\"rabit\"};\n  std::string sconfig = Json::Dump(config);\n  JVM_CHECK_CALL(XGTrackerCreate(sconfig.c_str(), &handle));\n  setHandle(jenv, jout, handle);\n\n  return 0;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    TrackerRun\n * Signature: (J)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_TrackerRun(JNIEnv *, jclass,\n                                                                         jlong jhandle) {\n  auto handle = reinterpret_cast<TrackerHandle>(jhandle);\n  JVM_CHECK_CALL(XGTrackerRun(handle, nullptr));\n  return 0;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    TrackerWaitFor\n * Signature: (JJ)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_TrackerWaitFor(JNIEnv *, jclass,\n                                                                             jlong jhandle,\n                                                                             jlong timeout) {\n  using namespace xgboost;  // NOLINT\n\n  auto handle = reinterpret_cast<TrackerHandle>(jhandle);\n  Json config{Object{}};\n  config[\"timeout\"] = Integer{static_cast<Integer::Int>(timeout)};\n  std::string sconfig = Json::Dump(config);\n  JVM_CHECK_CALL(XGTrackerWaitFor(handle, sconfig.c_str()));\n  return 0;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    TrackerWorkerArgs\n * Signature: (JJ[Ljava/lang/String;)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_TrackerWorkerArgs(\n    JNIEnv *jenv, jclass, jlong jhandle, jlong timeout, jobjectArray jout) {\n  using namespace xgboost;  // NOLINT\n  auto handle = reinterpret_cast<TrackerHandle>(jhandle);\n  char const *args;\n  JVM_CHECK_CALL(XGTrackerWorkerArgs(handle, &args));\n  auto jargs = Json::Load(StringView{args});\n\n  jstring jret = jenv->NewStringUTF(args);\n  jenv->SetObjectArrayElement(jout, 0, jret);\n  return 0;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    TrackerFree\n * Signature: (J)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_TrackerFree(JNIEnv *, jclass,\n                                                                          jlong jhandle) {\n  auto handle = reinterpret_cast<TrackerHandle>(jhandle);\n  JVM_CHECK_CALL(XGTrackerFree(handle));\n  return 0;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    CommunicatorFinalize\n * Signature: ()I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_CommunicatorFinalize(JNIEnv *,\n                                                                                   jclass) {\n  JVM_CHECK_CALL(XGCommunicatorFinalize());\n  return 0;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    CommunicatorPrint\n * Signature: (Ljava/lang/String;)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_CommunicatorPrint(JNIEnv *jenv,\n                                                                                jclass jcls,\n                                                                                jstring jmsg) {\n  std::unique_ptr<char const, Deleter<char const>> msg{jenv->GetStringUTFChars(jmsg, nullptr),\n                                                       [&](char const *ptr) {\n                                                         if (ptr) {\n                                                           jenv->ReleaseStringUTFChars(jmsg, ptr);\n                                                         }\n                                                       }};\n  std::string str(msg.get(), jenv->GetStringLength(jmsg));\n  return XGCommunicatorPrint(str.c_str());\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    CommunicatorGetRank\n * Signature: ([I)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_CommunicatorGetRank\n  (JNIEnv *jenv, jclass jcls, jintArray jout) {\n  jint rank = XGCommunicatorGetRank();\n  jenv->SetIntArrayRegion(jout, 0, 1, &rank);\n  return 0;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    CommunicatorGetWorldSize\n * Signature: ([I)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_CommunicatorGetWorldSize\n  (JNIEnv *jenv, jclass jcls, jintArray jout) {\n  jint out = XGCommunicatorGetWorldSize();\n  jenv->SetIntArrayRegion(jout, 0, 1, &out);\n  return 0;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    CommunicatorAllreduce\n * Signature: (Ljava/nio/ByteBuffer;III)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_CommunicatorAllreduce\n  (JNIEnv *jenv, jclass jcls, jobject jsendrecvbuf, jint jcount, jint jenum_dtype, jint jenum_op) {\n  void *ptr_sendrecvbuf = jenv->GetDirectBufferAddress(jsendrecvbuf);\n  JVM_CHECK_CALL(XGCommunicatorAllreduce(ptr_sendrecvbuf, (size_t) jcount, jenum_dtype, jenum_op));\n  return 0;\n}\n\nnamespace xgboost::jni {\nint QdmFromCallback(JNIEnv *jenv, jobject jdata_iter, jlongArray jref, char const *config,\n                    bool is_extmem, jlongArray jout);\n}  // namespace xgboost::jni\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGQuantileDMatrixCreateFromCallback\n * Signature: (Ljava/util/Iterator;[JLjava/lang/String;[J)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGQuantileDMatrixCreateFromCallback(\n    JNIEnv *jenv, jclass, jobject jdata_iter, jlongArray jref, jstring jconf,\n    jlongArray jout) {\n  std::unique_ptr<char const, Deleter<char const>> conf{jenv->GetStringUTFChars(jconf, nullptr),\n                                                        [&](char const *ptr) {\n                                                          jenv->ReleaseStringUTFChars(jconf, ptr);\n                                                        }};\n  return xgboost::jni::QdmFromCallback(jenv, jdata_iter, jref, conf.get(), false, jout);\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGExtMemQuantileDMatrixCreateFromCallback\n * Signature: (Ljava/util/Iterator;[JLjava/lang/String;[J)I\n */\nJNIEXPORT jint JNICALL\nJava_ml_dmlc_xgboost4j_java_XGBoostJNI_XGExtMemQuantileDMatrixCreateFromCallback(\n    JNIEnv *jenv, jclass jcls, jobject jdata_iter, jlongArray jref, jstring jconf,\n    jlongArray jout) {\n  std::unique_ptr<char const, Deleter<char const>> conf{jenv->GetStringUTFChars(jconf, nullptr),\n                                                        [&](char const *ptr) {\n                                                          jenv->ReleaseStringUTFChars(jconf, ptr);\n                                                        }};\n  return xgboost::jni::QdmFromCallback(jenv, jdata_iter, jref, conf.get(), true, jout);\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixSetInfoFromInterface\n * Signature: (JLjava/lang/String;Ljava/lang/String;)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixSetInfoFromInterface(\n    JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfield, jstring jjson_columns) {\n  auto handle = reinterpret_cast<DMatrixHandle>(jhandle);\n  std::unique_ptr<char const, Deleter<char const>> field{jenv->GetStringUTFChars(jfield, nullptr),\n                                                         [&](char const *ptr) {\n                                                           jenv->ReleaseStringUTFChars(jfield, ptr);\n                                                         }};\n  std::unique_ptr<char const, Deleter<char const>> cjson_columns{\n      jenv->GetStringUTFChars(jjson_columns, nullptr), [&](char const *ptr) {\n        jenv->ReleaseStringUTFChars(jjson_columns, ptr);\n      }};\n\n  return XGDMatrixSetInfoFromInterface(handle, field.get(), cjson_columns.get());\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixCreateFromArrayInterfaceColumns\n * Signature: (Ljava/lang/String;FI[J)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromArrayInterfaceColumns\n  (JNIEnv *jenv, jclass jcls, jstring jjson_columns, jfloat jmissing, jint jnthread, jlongArray jout) {\n  DMatrixHandle result;\n  std::unique_ptr<char const, Deleter<char const>> cjson_columns{\n      jenv->GetStringUTFChars(jjson_columns, nullptr), [&](char const *ptr) {\n        jenv->ReleaseStringUTFChars(jjson_columns, ptr);\n      }};\n  xgboost::Json config{xgboost::Object{}};\n  auto missing = static_cast<float>(jmissing);\n  auto n_threads = static_cast<int32_t>(jnthread);\n  config[\"missing\"] = xgboost::Number(missing);\n  config[\"nthread\"] = xgboost::Integer(n_threads);\n  std::string config_str;\n  xgboost::Json::Dump(config, &config_str);\n  int ret = XGDMatrixCreateFromCudaColumnar(cjson_columns.get(), config_str.c_str(), &result);\n  JVM_CHECK_CALL(ret);\n  setHandle(jenv, jout, result);\n  return ret;\n}\n\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixSetStrFeatureInfo\n    (JNIEnv *jenv, jclass jclz, jlong jhandle, jstring jfield, jobjectArray jvalues) {\n  auto handle = reinterpret_cast<DMatrixHandle>(jhandle);\n  std::unique_ptr<char const, Deleter<char const>> field{jenv->GetStringUTFChars(jfield, nullptr),\n                                                         [&](char const *ptr) {\n                                                           jenv->ReleaseStringUTFChars(jfield, ptr);\n                                                         }};\n  int size = jenv->GetArrayLength(jvalues);\n\n  // tmp storage for java strings\n  std::vector<std::string> values;\n  for (int i = 0; i < size; i++) {\n    jstring jstr = (jstring)(jenv->GetObjectArrayElement(jvalues, i));\n    std::unique_ptr<char const, Deleter<char const>> value{jenv->GetStringUTFChars(jstr, nullptr),\n                                                           [&](char const *ptr) {\n                                                             jenv->ReleaseStringUTFChars(jstr, ptr);\n                                                           }};\n    values.emplace_back(value.get());\n  }\n\n  std::vector<char const *> c_values;\n  c_values.resize(size);\n  std::transform(values.cbegin(), values.cend(), c_values.begin(),\n                 [](auto const &str) { return str.c_str(); });\n\n  return XGDMatrixSetStrFeatureInfo(handle, field.get(), c_values.data(), size);\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixGetStrFeatureInfo\n * Signature: (JLjava/lang/String;[J[[Ljava/lang/String;)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixGetStrFeatureInfo(\n    JNIEnv *jenv, jclass jclz, jlong jhandle, jstring jfield, jlongArray joutLenArray,\n    jobjectArray joutValueArray) {\n  auto handle = reinterpret_cast<DMatrixHandle>(jhandle);\n  std::unique_ptr<char const, Deleter<char const>> field{jenv->GetStringUTFChars(jfield, nullptr),\n                                                         [&](char const *ptr) {\n                                                           jenv->ReleaseStringUTFChars(jfield, ptr);\n                                                         }};\n\n  bst_ulong out_len = 0;\n  char const **c_out_features;\n  int ret = XGDMatrixGetStrFeatureInfo(handle, field.get(), &out_len, &c_out_features);\n\n  jlong jlen = (jlong)out_len;\n  jenv->SetLongArrayRegion(joutLenArray, 0, 1, &jlen);\n\n  jobjectArray jinfos =\n      jenv->NewObjectArray(jlen, jenv->FindClass(\"java/lang/String\"), jenv->NewStringUTF(\"\"));\n  for (int i = 0; i < jlen; i++) {\n    jenv->SetObjectArrayElement(jinfos, i, jenv->NewStringUTF(c_out_features[i]));\n  }\n  jenv->SetObjectArrayElement(joutValueArray, 0, jinfos);\n\n  return ret;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterSetStrFeatureInfo\n * Signature: (JLjava/lang/String;[Ljava/lang/String;])I\n */\nJNIEXPORT jint JNICALL\nJava_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterSetStrFeatureInfo(\n    JNIEnv *jenv, jclass jclz, jlong jhandle, jstring jfield,\n    jobjectArray jfeatures) {\n  auto handle = reinterpret_cast<BoosterHandle>(jhandle);\n\n  std::unique_ptr<char const, Deleter<char const>> field{jenv->GetStringUTFChars(jfield, nullptr),\n                                                         [&](char const *ptr) {\n                                                           jenv->ReleaseStringUTFChars(jfield, ptr);\n                                                         }};\n  bst_ulong feature_num = (bst_ulong)jenv->GetArrayLength(jfeatures);\n\n  std::vector<std::string> features;\n  std::vector<char const*> features_char;\n\n  for (bst_ulong i = 0; i < feature_num; ++i) {\n    jstring jfeature = (jstring)jenv->GetObjectArrayElement(jfeatures, i);\n    std::unique_ptr<char const, Deleter<char const>> s{\n        jenv->GetStringUTFChars(jfeature, nullptr), [&](char const *ptr) {\n          if (ptr) {\n            jenv->ReleaseStringUTFChars(jfeature, ptr);\n          }\n        }};\n    features.emplace_back(s.get(), jenv->GetStringLength(jfeature));\n  }\n\n  for (size_t i = 0; i < features.size(); ++i) {\n    features_char.push_back(features[i].c_str());\n  }\n\n  return XGBoosterSetStrFeatureInfo(handle, field.get(), dmlc::BeginPtr(features_char),\n                                    feature_num);\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterSetGtrFeatureInfo\n * Signature: (JLjava/lang/String;[Ljava/lang/String;])I\n */\nJNIEXPORT jint JNICALL\nJava_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterGetStrFeatureInfo(\n    JNIEnv *jenv, jclass jclz, jlong jhandle, jstring jfield,\n    jobjectArray jout) {\n  auto handle = reinterpret_cast<BoosterHandle>(jhandle);\n  std::unique_ptr<char const, Deleter<char const>> field{jenv->GetStringUTFChars(jfield, nullptr),\n                                                         [&](char const *ptr) {\n                                                           jenv->ReleaseStringUTFChars(jfield, ptr);\n                                                         }};\n\n  bst_ulong feature_num = (bst_ulong)jenv->GetArrayLength(jout);\n\n  const char **features;\n  std::vector<char *> features_char;\n\n  int ret =\n      XGBoosterGetStrFeatureInfo(handle, field.get(), &feature_num, (const char ***)&features);\n  JVM_CHECK_CALL(ret);\n\n  for (bst_ulong i = 0; i < feature_num; i++) {\n    jstring jfeature = jenv->NewStringUTF(features[i]);\n    jenv->SetObjectArrayElement(jout, i, jfeature);\n  }\n\n  return ret;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixGetQuantileCut\n * Signature: (J[[J[[F)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixGetQuantileCut(\n    JNIEnv *jenv, jclass, jlong jhandle, jobjectArray j_indptr, jobjectArray j_values) {\n  using namespace xgboost;  // NOLINT\n  auto handle = reinterpret_cast<DMatrixHandle>(jhandle);\n\n  char const *str_indptr;\n  char const *str_data;\n  Json config{Object{}};\n  auto str_config = Json::Dump(config);\n\n  auto ret = XGDMatrixGetQuantileCut(handle, str_config.c_str(), &str_indptr, &str_data);\n\n  ArrayInterface<1> indptr{StringView{str_indptr}};\n  ArrayInterface<1> data{StringView{str_data}};\n  CHECK_GE(indptr.Shape<0>(), 2);\n\n  // Cut ptr\n  auto j_indptr_array = jenv->NewLongArray(indptr.Shape<0>());\n  CHECK_EQ(indptr.type, ArrayInterfaceHandler::Type::kU8);\n  CHECK_LT(indptr(indptr.Shape<0>() - 1),\n           static_cast<std::uint64_t>(std::numeric_limits<std::int64_t>::max()));\n  static_assert(sizeof(jlong) == sizeof(std::uint64_t));\n  jenv->SetLongArrayRegion(j_indptr_array, 0, indptr.Shape<0>(),\n                           static_cast<jlong const *>(indptr.data));\n  jenv->SetObjectArrayElement(j_indptr, 0, j_indptr_array);\n\n  // Cut values\n  auto n_cuts = indptr(indptr.Shape<0>() - 1);\n  jfloatArray jcuts_array = jenv->NewFloatArray(n_cuts);\n  CHECK_EQ(data.type, ArrayInterfaceHandler::Type::kF4);\n  jenv->SetFloatArrayRegion(jcuts_array, 0, n_cuts, static_cast<float const *>(data.data));\n  jenv->SetObjectArrayElement(j_values, 0, jcuts_array);\n\n  return ret;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBSetGlobalConfig\n * Signature: (Ljava/lang/String;)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBSetGlobalConfig(JNIEnv *jenv,\n                                                                                 jclass,\n                                                                                 jstring config) {\n  std::unique_ptr<char const, Deleter<char const>> args{\n      jenv->GetStringUTFChars(config, nullptr), [&](char const *ptr) {\n        if (ptr) {\n          jenv->ReleaseStringUTFChars(config, ptr);\n        }\n      }};\n  auto ret = XGBSetGlobalConfig(args.get());\n  JVM_CHECK_CALL(ret);\n  return ret;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBGetGlobalConfig\n * Signature: ([Ljava/lang/String;)I\n */\nJNIEXPORT jint JNICALL\nJava_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBGetGlobalConfig(JNIEnv *jenv, jclass, jobjectArray jout) {\n  char const *args;\n  auto ret = XGBGetGlobalConfig(&args);\n  JVM_CHECK_CALL(ret);\n  jstring jret = jenv->NewStringUTF(args);\n  jenv->SetObjectArrayElement(jout, 0, jret);\n  return 0;\n}\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    CudaSetDevice\n * Signature: (I)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_CudaSetDevice(JNIEnv *, jclass,\n                                                                            jint id) {\n  auto device_id = static_cast<std::int32_t>(id);\n  auto& global_device_id = GlobalDeviceId();\n  if (global_device_id != -1 && global_device_id != device_id) {\n    LOG(WARNING) << \"Device ID is already set to \" << global_device_id\n                 << \", but a different device ID \" << device_id << \" is requested.\";\n  }\n  global_device_id = device_id;\n  xgboost::curt::SetDevice(device_id);\n  return 0;\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/native/xgboost4j.h",
    "content": "/* DO NOT EDIT THIS FILE - it is machine generated */\n#include <jni.h>\n/* Header for class ml_dmlc_xgboost4j_java_XGBoostJNI */\n\n#ifndef _Included_ml_dmlc_xgboost4j_java_XGBoostJNI\n#define _Included_ml_dmlc_xgboost4j_java_XGBoostJNI\n#ifdef __cplusplus\nextern \"C\" {\n#endif\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBGetLastError\n * Signature: ()Ljava/lang/String;\n */\nJNIEXPORT jstring JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBGetLastError\n  (JNIEnv *, jclass);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixCreateFromFile\n * Signature: (Ljava/lang/String;I[J)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromFile\n  (JNIEnv *, jclass, jstring, jint, jlongArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixCreateFromDataIter\n * Signature: (Ljava/util/Iterator;Ljava/lang/String;F[J)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromDataIter\n  (JNIEnv *, jclass, jobject, jstring, jfloat, jlongArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixCreateFromCSR\n * Signature: ([J[I[FIFI[J)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromCSR\n  (JNIEnv *, jclass, jlongArray, jintArray, jfloatArray, jint, jfloat, jint, jlongArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixCreateFromCSC\n * Signature: ([J[I[FIFI[J)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromCSC\n  (JNIEnv *, jclass, jlongArray, jintArray, jfloatArray, jint, jfloat, jint, jlongArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixCreateFromMat\n * Signature: ([FIIF[J)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromMat\n  (JNIEnv *, jclass, jfloatArray, jint, jint, jfloat, jlongArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixCreateFromMatRef\n * Signature: (JIIF[J)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromMatRef\n  (JNIEnv *, jclass, jlong, jint, jint, jfloat, jlongArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixSliceDMatrix\n * Signature: (J[I[J)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixSliceDMatrix\n  (JNIEnv *, jclass, jlong, jintArray, jlongArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixFree\n * Signature: (J)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixFree\n  (JNIEnv *, jclass, jlong);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixSaveBinary\n * Signature: (JLjava/lang/String;I)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixSaveBinary\n  (JNIEnv *, jclass, jlong, jstring, jint);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixSetFloatInfo\n * Signature: (JLjava/lang/String;[F)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixSetFloatInfo\n  (JNIEnv *, jclass, jlong, jstring, jfloatArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixSetUIntInfo\n * Signature: (JLjava/lang/String;[I)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixSetUIntInfo\n  (JNIEnv *, jclass, jlong, jstring, jintArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixGetFloatInfo\n * Signature: (JLjava/lang/String;[[F)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixGetFloatInfo\n  (JNIEnv *, jclass, jlong, jstring, jobjectArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixGetUIntInfo\n * Signature: (JLjava/lang/String;[[I)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixGetUIntInfo\n  (JNIEnv *, jclass, jlong, jstring, jobjectArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixSetStrFeatureInfo\n * Signature: (JLjava/lang/String;[Ljava/lang/String;)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixSetStrFeatureInfo\n  (JNIEnv *, jclass, jlong, jstring, jobjectArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixGetStrFeatureInfo\n * Signature: (JLjava/lang/String;[J[[Ljava/lang/String;)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixGetStrFeatureInfo\n  (JNIEnv *, jclass, jlong, jstring, jlongArray, jobjectArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixNumRow\n * Signature: (J[J)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixNumRow\n  (JNIEnv *, jclass, jlong, jlongArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixNumNonMissing\n * Signature: (J[J)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixNumNonMissing\n  (JNIEnv *, jclass, jlong, jlongArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterCreate\n * Signature: ([J[J)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterCreate\n  (JNIEnv *, jclass, jlongArray, jlongArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterFree\n * Signature: (J)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterFree\n  (JNIEnv *, jclass, jlong);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterSetParam\n * Signature: (JLjava/lang/String;Ljava/lang/String;)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterSetParam\n  (JNIEnv *, jclass, jlong, jstring, jstring);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterUpdateOneIter\n * Signature: (JIJ)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterUpdateOneIter\n  (JNIEnv *, jclass, jlong, jint, jlong);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterTrainOneIter\n * Signature: (JJI[F[F)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterTrainOneIter\n  (JNIEnv *, jclass, jlong, jlong, jint, jfloatArray, jfloatArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterEvalOneIter\n * Signature: (JI[J[Ljava/lang/String;[Ljava/lang/String;)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterEvalOneIter\n  (JNIEnv *, jclass, jlong, jint, jlongArray, jobjectArray, jobjectArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterPredict\n * Signature: (JJII[[F)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterPredict\n  (JNIEnv *, jclass, jlong, jlong, jint, jint, jobjectArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterPredictFromDense\n * Signature: (J[FJJFIII[F[[F)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterPredictFromDense\n  (JNIEnv *, jclass, jlong, jfloatArray, jlong, jlong, jfloat, jint, jint, jint, jfloatArray, jobjectArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterLoadModel\n * Signature: (JLjava/lang/String;)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterLoadModel\n  (JNIEnv *, jclass, jlong, jstring);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterSaveModel\n * Signature: (JLjava/lang/String;)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterSaveModel\n  (JNIEnv *, jclass, jlong, jstring);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterLoadModelFromBuffer\n * Signature: (J[B)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterLoadModelFromBuffer\n  (JNIEnv *, jclass, jlong, jbyteArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterSaveModelToBuffer\n * Signature: (JLjava/lang/String;[[B)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterSaveModelToBuffer\n  (JNIEnv *, jclass, jlong, jstring, jobjectArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterDumpModelEx\n * Signature: (JLjava/lang/String;ILjava/lang/String;[[Ljava/lang/String;)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterDumpModelEx\n  (JNIEnv *, jclass, jlong, jstring, jint, jstring, jobjectArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterDumpModelExWithFeatures\n * Signature: (J[Ljava/lang/String;ILjava/lang/String;[[Ljava/lang/String;)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterDumpModelExWithFeatures\n  (JNIEnv *, jclass, jlong, jobjectArray, jint, jstring, jobjectArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterGetAttrNames\n * Signature: (J[[Ljava/lang/String;)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterGetAttrNames\n  (JNIEnv *, jclass, jlong, jobjectArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterGetAttr\n * Signature: (JLjava/lang/String;[Ljava/lang/String;)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterGetAttr\n  (JNIEnv *, jclass, jlong, jstring, jobjectArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterSetAttr\n * Signature: (JLjava/lang/String;Ljava/lang/String;)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterSetAttr\n  (JNIEnv *, jclass, jlong, jstring, jstring);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterGetNumFeature\n * Signature: (J[J)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterGetNumFeature\n  (JNIEnv *, jclass, jlong, jlongArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterGetNumBoostedRound\n * Signature: (J[I)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterGetNumBoostedRound\n  (JNIEnv *, jclass, jlong, jintArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    CommunicatorInit\n * Signature: (Ljava/lang/String;)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_CommunicatorInit\n  (JNIEnv *, jclass, jstring);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    CommunicatorFinalize\n * Signature: ()I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_CommunicatorFinalize\n  (JNIEnv *, jclass);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    CommunicatorPrint\n * Signature: (Ljava/lang/String;)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_CommunicatorPrint\n  (JNIEnv *, jclass, jstring);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    CommunicatorGetRank\n * Signature: ([I)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_CommunicatorGetRank\n  (JNIEnv *, jclass, jintArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    CommunicatorGetWorldSize\n * Signature: ([I)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_CommunicatorGetWorldSize\n  (JNIEnv *, jclass, jintArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    TrackerCreate\n * Signature: (Ljava/lang/String;IIIJ[J)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_TrackerCreate\n  (JNIEnv *, jclass, jstring, jint, jint, jint, jlong, jlongArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    TrackerRun\n * Signature: (J)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_TrackerRun\n  (JNIEnv *, jclass, jlong);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    TrackerWaitFor\n * Signature: (JJ)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_TrackerWaitFor\n  (JNIEnv *, jclass, jlong, jlong);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    TrackerWorkerArgs\n * Signature: (JJ[Ljava/lang/String;)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_TrackerWorkerArgs\n  (JNIEnv *, jclass, jlong, jlong, jobjectArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    TrackerFree\n * Signature: (J)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_TrackerFree\n  (JNIEnv *, jclass, jlong);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    CommunicatorAllreduce\n * Signature: (Ljava/nio/ByteBuffer;III)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_CommunicatorAllreduce\n  (JNIEnv *, jclass, jobject, jint, jint, jint);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixSetInfoFromInterface\n * Signature: (JLjava/lang/String;Ljava/lang/String;)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixSetInfoFromInterface\n  (JNIEnv *, jclass, jlong, jstring, jstring);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGQuantileDMatrixCreateFromCallback\n * Signature: (Ljava/util/Iterator;[JLjava/lang/String;[J)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGQuantileDMatrixCreateFromCallback\n  (JNIEnv *, jclass, jobject, jlongArray, jstring, jlongArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGExtMemQuantileDMatrixCreateFromCallback\n * Signature: (Ljava/util/Iterator;[JLjava/lang/String;[J)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGExtMemQuantileDMatrixCreateFromCallback\n  (JNIEnv *, jclass, jobject, jlongArray, jstring, jlongArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixCreateFromArrayInterfaceColumns\n * Signature: (Ljava/lang/String;FI[J)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromArrayInterfaceColumns\n  (JNIEnv *, jclass, jstring, jfloat, jint, jlongArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterSetStrFeatureInfo\n * Signature: (JLjava/lang/String;[Ljava/lang/String;)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterSetStrFeatureInfo\n  (JNIEnv *, jclass, jlong, jstring, jobjectArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBoosterGetStrFeatureInfo\n * Signature: (JLjava/lang/String;[Ljava/lang/String;)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterGetStrFeatureInfo\n  (JNIEnv *, jclass, jlong, jstring, jobjectArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGDMatrixGetQuantileCut\n * Signature: (J[[J[[F)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixGetQuantileCut\n  (JNIEnv *, jclass, jlong, jobjectArray, jobjectArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBSetGlobalConfig\n * Signature: (Ljava/lang/String;)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBSetGlobalConfig\n  (JNIEnv *, jclass, jstring);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    XGBGetGlobalConfig\n * Signature: ([Ljava/lang/String;)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBGetGlobalConfig\n  (JNIEnv *, jclass, jobjectArray);\n\n/*\n * Class:     ml_dmlc_xgboost4j_java_XGBoostJNI\n * Method:    CudaSetDevice\n * Signature: (I)I\n */\nJNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_CudaSetDevice\n  (JNIEnv *, jclass, jint);\n\n#ifdef __cplusplus\n}\n#endif\n#endif\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/ArchDetectionTest.java",
    "content": "/*\n Copyright (c) 2014 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java;\n\nimport org.junit.Test;\nimport org.junit.experimental.runners.Enclosed;\nimport org.junit.runner.RunWith;\nimport org.junit.runners.Parameterized;\nimport org.junit.runners.Parameterized.Parameters;\n\nimport java.util.Collection;\n\nimport static java.util.Arrays.asList;\nimport static junit.framework.TestCase.assertSame;\nimport static ml.dmlc.xgboost4j.java.NativeLibLoader.Arch.X86_64;\nimport static ml.dmlc.xgboost4j.java.NativeLibLoader.Arch.AARCH64;\nimport static ml.dmlc.xgboost4j.java.NativeLibLoader.Arch.SPARC;\nimport static ml.dmlc.xgboost4j.java.NativeLibLoader.Arch.detectArch;\nimport static org.junit.Assert.assertThrows;\n\n/**\n * Test cases for {@link NativeLibLoader.Arch}.\n */\n@RunWith(Enclosed.class)\npublic class ArchDetectionTest {\n\n  private static final String OS_ARCH_PROPERTY = \"os.arch\";\n\n  @RunWith(Parameterized.class)\n  public static class ParameterizedArchDetectionTest {\n\n    private final String osArchValue;\n    private final NativeLibLoader.Arch expectedArch;\n\n    public ParameterizedArchDetectionTest(String osArchValue, NativeLibLoader.Arch expectedArch) {\n      this.osArchValue = osArchValue;\n      this.expectedArch = expectedArch;\n    }\n\n    @Parameters\n    public static Collection<Object[]> data() {\n      return asList(new Object[][]{\n        {\"x86_64\", X86_64},\n        {\"amd64\", X86_64},\n        {\"aarch64\", AARCH64},\n        {\"arm64\", AARCH64},\n        {\"sparc64\", SPARC}\n      });\n    }\n\n    @Test\n    public void testArch() {\n      executeAndRestoreProperty(() -> {\n        System.setProperty(OS_ARCH_PROPERTY, osArchValue);\n        assertSame(detectArch(), expectedArch);\n      });\n    }\n  }\n\n  public static class UnsupportedArchDetectionTest {\n\n    @Test\n    public void testUnsupportedArch() {\n      executeAndRestoreProperty(() -> {\n        System.setProperty(OS_ARCH_PROPERTY, \"unsupported\");\n        assertThrows(IllegalStateException.class, NativeLibLoader.Arch::detectArch);\n      });\n    }\n  }\n\n  private static void executeAndRestoreProperty(Runnable action) {\n    String oldValue = System.getProperty(OS_ARCH_PROPERTY);\n\n    try {\n      action.run();\n    } finally {\n      if (oldValue != null) {\n        System.setProperty(OS_ARCH_PROPERTY, oldValue);\n      } else {\n        System.clearProperty(OS_ARCH_PROPERTY);\n      }\n    }\n  }\n\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java",
    "content": "/*\n Copyright (c) 2014-2023 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java;\n\nimport junit.framework.TestCase;\nimport org.junit.Test;\n\nimport java.io.ByteArrayInputStream;\nimport java.io.ByteArrayOutputStream;\nimport java.io.File;\nimport java.io.IOException;\nimport java.util.*;\nimport java.util.concurrent.*;\n\nimport static org.junit.Assert.assertArrayEquals;\nimport static org.junit.Assert.fail;\n\n/**\n * test cases for Booster Inplace Predict\n *\n * @author hzx and Sovrn\n */\npublic class BoosterImplTest {\n  private String train_uri = \"../../demo/data/agaricus.txt.train?indexing_mode=1&format=libsvm\";\n  private String test_uri = \"../../demo/data/agaricus.txt.test?indexing_mode=1&format=libsvm\";\n\n  public static class EvalError implements IEvaluation {\n    @Override\n    public String getMetric() {\n      return \"custom_error\";\n    }\n\n    @Override\n    public float eval(float[][] predicts, DMatrix dmat) {\n      float error = 0f;\n      float[] labels;\n      try {\n        labels = dmat.getLabel();\n      } catch (XGBoostError ex) {\n        throw new RuntimeException(ex);\n      }\n      int nrow = predicts.length;\n      for (int i = 0; i < nrow; i++) {\n        if (labels[i] == 0f && predicts[i][0] > 0) {\n          error++;\n        } else if (labels[i] == 1f && predicts[i][0] <= 0) {\n          error++;\n        }\n      }\n\n      return error / labels.length;\n    }\n  }\n\n  private Booster trainBooster(DMatrix trainMat, DMatrix testMat) throws XGBoostError {\n    //set params\n    Map<String, Object> paramMap = new HashMap<String, Object>() {\n      {\n        put(\"eta\", 1.0);\n        put(\"max_depth\", 2);\n        put(\"silent\", 1);\n        put(\"objective\", \"binary:logistic\");\n      }\n    };\n\n    //set watchList\n    HashMap<String, DMatrix> watches = new HashMap<String, DMatrix>();\n\n    watches.put(\"train\", trainMat);\n    watches.put(\"test\", testMat);\n\n    //set round\n    int round = 5;\n\n    //train a boost model\n    return XGBoost.train(trainMat, paramMap, round, watches, null, null);\n  }\n\n  @Test\n  public void testBoosterBasic() throws XGBoostError, IOException {\n\n    DMatrix trainMat = new DMatrix(this.train_uri);\n    DMatrix testMat = new DMatrix(this.test_uri);\n\n    Booster booster = trainBooster(trainMat, testMat);\n\n    //predict raw output\n    float[][] predicts = booster.predict(testMat, true, 0);\n\n    //eval\n    IEvaluation eval = new EvalError();\n    //error must be less than 0.1\n    TestCase.assertTrue(eval.eval(predicts, testMat) < 0.1f);\n  }\n\n  @Test\n  public void inplacePredictTest() throws XGBoostError {\n    /* Data Generation */\n    // Generate a training set.\n    int trainRows = 1000;\n    int features = 10;\n    int trainSize = trainRows * features;\n    float[] trainX = generateRandomDataSet(trainSize);\n    float[] trainY = generateRandomDataSet(trainRows);\n\n    DMatrix trainingMatrix = new DMatrix(trainX, trainRows, features, Float.NaN);\n    trainingMatrix.setLabel(trainY);\n\n    // Generate a testing set\n    int testRows = 10;\n    int testSize = testRows * features;\n    float[] testX = generateRandomDataSet(testSize);\n    float[] testY = generateRandomDataSet(testRows);\n\n    DMatrix testingMatrix = new DMatrix(testX, testRows, features, Float.NaN);\n    testingMatrix.setLabel(testY);\n\n    /* Training */\n\n    // Set parameters\n    Map<String, Object> params = new HashMap<>();\n    params.put(\"eta\", 1.0);\n    params.put(\"max_depth\",2);\n    params.put(\"silent\", 1);\n    params.put(\"tree_method\", \"hist\");\n\n    Map<String, DMatrix> watches = new HashMap<>();\n    watches.put(\"train\", trainingMatrix);\n    watches.put(\"test\", testingMatrix);\n\n    Booster booster = XGBoost.train(trainingMatrix, params, 10, watches, null, null);\n\n    /* Prediction */\n\n    // Standard prediction\n    float[][] predictions = booster.predict(testingMatrix);\n\n    // Inplace-prediction\n    float[][] inplacePredictions = booster.inplace_predict(testX, testRows, features, Float.NaN);\n\n    // Confirm that the two prediction results are identical\n    assertArrayEquals(predictions, inplacePredictions);\n  }\n\n  @Test\n  public void inplacePredictMultiPredictTest() throws InterruptedException {\n    // Multithreaded, multiple prediction\n    int trainRows = 1000;\n    int features = 10;\n    int trainSize = trainRows * features;\n\n    int testRows = 10;\n    int testSize = testRows * features;\n\n    //Simulate multiple predictions on multiple random data sets simultaneously.\n    ExecutorService executorService = Executors.newFixedThreadPool(5);\n    int predictsToPerform = 100;\n    for(int i = 0; i < predictsToPerform; i++) {\n      executorService.submit(() -> {\n        try {\n          float[] trainX = generateRandomDataSet(trainSize);\n          float[] trainY = generateRandomDataSet(trainRows);\n          DMatrix trainingMatrix = new DMatrix(trainX, trainRows, features, Float.NaN);\n          trainingMatrix.setLabel(trainY);\n\n          float[] testX = generateRandomDataSet(testSize);\n          float[] testY = generateRandomDataSet(testRows);\n          DMatrix testingMatrix = new DMatrix(testX, testRows, features, Float.NaN);\n          testingMatrix.setLabel(testY);\n\n          Map<String, Object> params = new HashMap<>();\n          params.put(\"eta\", 1.0);\n          params.put(\"max_depth\", 2);\n          params.put(\"silent\", 1);\n          params.put(\"tree_method\", \"hist\");\n\n          Map<String, DMatrix> watches = new HashMap<>();\n          watches.put(\"train\", trainingMatrix);\n          watches.put(\"test\", testingMatrix);\n\n          Booster booster = XGBoost.train(trainingMatrix, params, 10, watches, null, null);\n\n          float[][] predictions = booster.predict(testingMatrix);\n          float[][] inplacePredictions = booster.inplace_predict(testX, testRows, features, Float.NaN);\n\n          assertArrayEquals(predictions, inplacePredictions);\n        } catch (XGBoostError xgBoostError) {\n          fail(xgBoostError.getMessage());\n        }\n      });\n    }\n    executorService.shutdown();\n    if(!executorService.awaitTermination(1, TimeUnit.MINUTES))\n      executorService.shutdownNow();\n  }\n\n  @Test\n  public void inplacePredictWithMarginTest() throws XGBoostError {\n    //Generate a training set\n    int trainRows = 1000;\n    int features = 10;\n    int trainSize = trainRows * features;\n    float[] trainX = generateRandomDataSet(trainSize);\n    float[] trainY = generateRandomDataSet(trainRows);\n\n    DMatrix trainingMatrix = new DMatrix(trainX, trainRows, features, Float.NaN);\n    trainingMatrix.setLabel(trainY);\n\n    // Generate a testing set\n    int testRows = 10;\n    int testSize = testRows * features;\n    float[] testX = generateRandomDataSet(testSize);\n    float[] testY = generateRandomDataSet(testRows);\n\n    DMatrix testingMatrix = new DMatrix(testX, testRows, features, Float.NaN);\n    testingMatrix.setLabel(testY);\n\n    // Set booster parameters\n    Map<String, Object> params = new HashMap<>();\n    params.put(\"eta\", 1.0);\n    params.put(\"max_depth\",2);\n    params.put(\"tree_method\", \"hist\");\n    params.put(\"base_score\", 0.0);\n\n    Map<String, DMatrix> watches = new HashMap<>();\n    watches.put(\"train\", trainingMatrix);\n    watches.put(\"test\", testingMatrix);\n\n    // Train booster on training matrix.\n    Booster booster = XGBoost.train(trainingMatrix, params, 10, watches, null, null);\n\n    // Create a margin\n    float[] margin = new float[testRows];\n    Arrays.fill(margin, 0.5f);\n\n    // Define an iteration range to use all training iterations, this should match\n    // the without margin call\n    // which defines an iteration range of [0,0)\n    int[] iterationRange = new int[] { 0, 0 };\n\n    float[][] inplacePredictionsWithMargin = booster.inplace_predict(testX,\n        testRows,\n        features,\n        Float.NaN,\n        iterationRange,\n        Booster.PredictionType.kValue,\n        margin);\n    float[][] inplacePredictionsWithoutMargin = booster.inplace_predict(testX, testRows, features, Float.NaN);\n\n    for (int i = 0; i < inplacePredictionsWithoutMargin.length; i++) {\n      for (int j = 0; j < inplacePredictionsWithoutMargin[i].length; j++) {\n        inplacePredictionsWithoutMargin[i][j] += margin[j];\n      }\n    }\n    for (int i = 0; i < inplacePredictionsWithoutMargin.length; i++) {\n      assertArrayEquals(inplacePredictionsWithMargin[i], inplacePredictionsWithoutMargin[i], 1e-6f);\n    }\n  }\n\n  private float[] generateRandomDataSet(int size) {\n    float[] newSet = new float[size];\n    Random random = new Random();\n    for(int i = 0; i < size; i++) {\n      newSet[i] = random.nextFloat();\n    }\n    return newSet;\n  }\n\n  @Test\n  public void saveLoadModelWithPath() throws XGBoostError, IOException {\n    DMatrix trainMat = new DMatrix(this.train_uri);\n    DMatrix testMat = new DMatrix(this.test_uri);\n    IEvaluation eval = new EvalError();\n\n    Booster booster = trainBooster(trainMat, testMat);\n    // save and load\n    File temp = File.createTempFile(\"temp\", \"model\");\n    temp.deleteOnExit();\n    booster.saveModel(temp.getAbsolutePath());\n\n    Booster bst2 = XGBoost.loadModel(temp.getAbsolutePath());\n    assert (Arrays.equals(bst2.toByteArray(\"ubj\"), booster.toByteArray(\"ubj\")));\n    assert (Arrays.equals(bst2.toByteArray(\"json\"), booster.toByteArray(\"json\")));\n    float[][] predicts2 = bst2.predict(testMat, true, 0);\n    TestCase.assertTrue(eval.eval(predicts2, testMat) < 0.1f);\n  }\n\n  @Test\n  public void saveLoadModelWithFeaturesWithPath() throws XGBoostError, IOException {\n    DMatrix trainMat = new DMatrix(this.train_uri);\n    DMatrix testMat = new DMatrix(this.test_uri);\n    IEvaluation eval = new EvalError();\n\n    String[] featureNames = new String[126];\n    String[] featureTypes = new String[126];\n    for(int i = 0; i < 126; i++) {\n      featureNames[i] = \"test_feature_name_\" + i;\n      featureTypes[i] = \"q\";\n    }\n    trainMat.setFeatureNames(featureNames);\n    testMat.setFeatureNames(featureNames);\n    trainMat.setFeatureTypes(featureTypes);\n    testMat.setFeatureTypes(featureTypes);\n\n    Booster booster = trainBooster(trainMat, testMat);\n    // save and load, only json format save and load feature_name and feature_type\n    File temp = File.createTempFile(\"temp\", \".json\");\n    temp.deleteOnExit();\n    booster.saveModel(temp.getAbsolutePath());\n\n    String modelString = new String(booster.toByteArray(\"json\"));\n\n    Booster bst2 = XGBoost.loadModel(temp.getAbsolutePath());\n    assert (Arrays.equals(bst2.toByteArray(\"ubj\"), booster.toByteArray(\"ubj\")));\n    assert (Arrays.equals(bst2.toByteArray(\"json\"), booster.toByteArray(\"json\")));\n    float[][] predicts2 = bst2.predict(testMat, true, 0);\n    TestCase.assertTrue(eval.eval(predicts2, testMat) < 0.1f);\n  }\n\n  @Test\n  public void saveLoadModelWithStream() throws XGBoostError, IOException {\n    DMatrix trainMat = new DMatrix(this.train_uri);\n    DMatrix testMat = new DMatrix(this.test_uri);\n\n    Booster booster = trainBooster(trainMat, testMat);\n\n    ByteArrayOutputStream output = new ByteArrayOutputStream();\n    booster.saveModel(output);\n    IEvaluation eval = new EvalError();\n    Booster loadedBooster = XGBoost.loadModel(new ByteArrayInputStream(output.toByteArray()));\n    float originalPredictError = eval.eval(booster.predict(testMat, true), testMat);\n    TestCase.assertTrue(\"originalPredictErr:\" + originalPredictError,\n            originalPredictError < 0.1f);\n    float loadedPredictError = eval.eval(loadedBooster.predict(testMat, true), testMat);\n    TestCase.assertTrue(\"loadedPredictErr:\" + loadedPredictError, loadedPredictError < 0.1f);\n  }\n\n  private static class IncreasingEval implements IEvaluation {\n    private int value = 1;\n\n    @Override\n    public String getMetric() {\n      return \"inc\";\n    }\n\n    @Override\n    public float eval(float[][] predicts, DMatrix dmat) {\n      return value++;\n    }\n  }\n\n  @Test\n  public void testDescendMetricsWithBoundaryCondition() {\n    // maximize_evaluation_metrics = false\n    int totalIterations = 11;\n    int earlyStoppingRound = 10;\n    float[][] metrics = new float[1][totalIterations];\n    for (int i = 0; i < totalIterations; i++) {\n      metrics[0][i] = i;\n    }\n    int bestIteration = 0;\n\n    for (int itr = 0; itr < totalIterations; itr++) {\n      boolean es = XGBoost.shouldEarlyStop(earlyStoppingRound, itr, bestIteration);\n      if (itr == totalIterations - 1) {\n        TestCase.assertTrue(es);\n      } else {\n        TestCase.assertFalse(es);\n      }\n    }\n  }\n\n  @Test\n  public void testEarlyStoppingForMultipleMetrics() {\n    // maximize_evaluation_metrics = true\n    int earlyStoppingRound = 3;\n    int totalIterations = 5;\n    int numOfMetrics = 3;\n    float[][] metrics = new float[numOfMetrics][totalIterations];\n    // Only assign metric values to the first dataset, zeros for other datasets\n    for (int i = 0; i < numOfMetrics; i++) {\n      for (int j = 0; j < totalIterations; j++) {\n        metrics[0][j] = j;\n      }\n    }\n    int bestIteration;\n\n    for (int i = 0; i < totalIterations; i++) {\n      bestIteration = i;\n      boolean es = XGBoost.shouldEarlyStop(earlyStoppingRound, i, bestIteration);\n      TestCase.assertFalse(es);\n    }\n\n    // when we have multiple datasets, only the last one was used to determinate early stop\n    // Here we changed the metric of the first dataset, it doesn't have any effect to the final result\n    for (int i = 0; i < totalIterations; i++) {\n      metrics[0][i] = totalIterations - i;\n    }\n    for (int i = 0; i < totalIterations; i++) {\n      bestIteration = i;\n      boolean es = XGBoost.shouldEarlyStop(earlyStoppingRound, i, bestIteration);\n      TestCase.assertFalse(es);\n    }\n\n    // Now assign metric values to the last dataset.\n    for (int i = 0; i < totalIterations; i++) {\n      metrics[2][i] = totalIterations - i;\n    }\n    bestIteration = 0;\n\n    for (int i = 0; i < totalIterations; i++) {\n      // if any metrics off, we need to stop\n      boolean es = XGBoost.shouldEarlyStop(earlyStoppingRound, i, bestIteration);\n      if (i >= earlyStoppingRound) {\n        TestCase.assertTrue(es);\n      } else {\n        TestCase.assertFalse(es);\n      }\n    }\n  }\n\n  @Test\n  public void testDescendMetrics() {\n    // maximize_evaluation_metrics = false\n    int totalIterations = 10;\n    int earlyStoppingRounds = 5;\n    float[][] metrics = new float[1][totalIterations];\n    for (int i = 0; i < totalIterations; i++) {\n      metrics[0][i] = i;\n    }\n    int bestIteration = 0;\n\n    boolean es = XGBoost.shouldEarlyStop(earlyStoppingRounds, totalIterations - 1, bestIteration);\n    TestCase.assertTrue(es);\n    for (int i = 0; i < totalIterations; i++) {\n      metrics[0][i] = totalIterations - i;\n    }\n    bestIteration = totalIterations - 1;\n\n    es = XGBoost.shouldEarlyStop(earlyStoppingRounds, totalIterations - 1, bestIteration);\n    TestCase.assertFalse(es);\n\n    for (int i = 0; i < totalIterations; i++) {\n      metrics[0][i] = totalIterations - i;\n    }\n    metrics[0][4] = 1;\n    metrics[0][9] = 5;\n\n    bestIteration = 4;\n\n    es = XGBoost.shouldEarlyStop(earlyStoppingRounds, totalIterations - 1, bestIteration);\n    TestCase.assertTrue(es);\n  }\n\n  @Test\n  public void testAscendMetricsWithBoundaryCondition() {\n    // maximize_evaluation_metrics = true\n    int totalIterations = 11;\n    int earlyStoppingRounds = 10;\n    float[][] metrics = new float[1][totalIterations];\n    for (int i = 0; i < totalIterations; i++) {\n      metrics[0][i] = totalIterations - i;\n    }\n    int bestIteration = 0;\n\n    for (int itr = 0; itr < totalIterations; itr++) {\n      boolean es = XGBoost.shouldEarlyStop(earlyStoppingRounds, itr, bestIteration);\n      if (itr == totalIterations - 1) {\n        TestCase.assertTrue(es);\n      } else {\n        TestCase.assertFalse(es);\n      }\n    }\n  }\n\n  @Test\n  public void testAscendMetrics() {\n    // maximize_evaluation_metrics = true\n    int totalIterations = 10;\n    int earlyStoppingRounds = 5;\n    float[][] metrics = new float[1][totalIterations];\n    for (int i = 0; i < totalIterations; i++) {\n      metrics[0][i] = totalIterations - i;\n    }\n    int bestIteration = 0;\n\n    boolean es = XGBoost.shouldEarlyStop(earlyStoppingRounds, totalIterations - 1, bestIteration);\n    TestCase.assertTrue(es);\n    for (int i = 0; i < totalIterations; i++) {\n      metrics[0][i] = i;\n    }\n    bestIteration = totalIterations - 1;\n\n    es = XGBoost.shouldEarlyStop(earlyStoppingRounds, totalIterations - 1, bestIteration);\n    TestCase.assertFalse(es);\n\n    for (int i = 0; i < totalIterations; i++) {\n      metrics[0][i] = i;\n    }\n    metrics[0][4] = 9;\n    metrics[0][9] = 4;\n\n    bestIteration = 4;\n\n    es = XGBoost.shouldEarlyStop(earlyStoppingRounds, totalIterations - 1, bestIteration);\n    TestCase.assertTrue(es);\n  }\n\n  @Test\n  public void testBoosterEarlyStop() throws XGBoostError, IOException {\n    DMatrix trainMat = new DMatrix(this.train_uri);\n    DMatrix testMat = new DMatrix(this.test_uri);\n    Map<String, Object> paramMap = new HashMap<String, Object>() {\n      {\n        put(\"max_depth\", 3);\n        put(\"silent\", 1);\n        put(\"objective\", \"binary:logistic\");\n        put(\"maximize_evaluation_metrics\", \"false\");\n      }\n    };\n    Map<String, DMatrix> watches = new LinkedHashMap<>();\n    watches.put(\"training\", trainMat);\n    watches.put(\"test\", testMat);\n\n    final int round = 10;\n    int earlyStoppingRound = 2;\n    float[][] metrics = new float[watches.size()][round];\n    XGBoost.train(trainMat, paramMap, round, watches, metrics, null, new IncreasingEval(),\n            earlyStoppingRound);\n\n    // Make sure we've stopped early.\n    for (int w = 0; w < watches.size(); w++) {\n      for (int r = 0; r <= earlyStoppingRound; r++) {\n        TestCase.assertFalse(0.0f == metrics[w][r]);\n      }\n    }\n\n    for (int w = 0; w < watches.size(); w++) {\n      for (int r = earlyStoppingRound + 1; r < round; r++) {\n        TestCase.assertEquals(0.0f, metrics[w][r]);\n      }\n    }\n  }\n\n  @Test\n  public void testEarlyStoppingAttributes() throws XGBoostError, IOException {\n    DMatrix trainMat = new DMatrix(this.train_uri);\n    DMatrix testMat = new DMatrix(this.test_uri);\n    Map<String, Object> paramMap = new HashMap<String, Object>() {\n      {\n        put(\"max_depth\", 3);\n        put(\"objective\", \"binary:logistic\");\n        put(\"maximize_evaluation_metrics\", \"false\");\n      }\n    };\n    Map<String, DMatrix> watches = new LinkedHashMap<>();\n    watches.put(\"training\", trainMat);\n    watches.put(\"test\", testMat);\n\n    int round = 30;\n    int earlyStoppingRound = 4;\n    float[][] metrics = new float[watches.size()][round];\n\n    Booster booster = XGBoost.train(trainMat, paramMap, round,\n\t\t\t\t    watches, metrics, null, null, earlyStoppingRound);\n\n    int bestIter = Integer.valueOf(booster.getAttr(\"best_iteration\"));\n    float bestScore = Float.valueOf(booster.getAttr(\"best_score\"));\n    TestCase.assertEquals(bestIter, round - 1);\n    TestCase.assertEquals(bestScore, metrics[watches.size() - 1][round - 1]);\n  }\n\n  private void testWithQuantileHisto(DMatrix trainingSet, Map<String, DMatrix> watches, int round,\n                                      Map<String, Object> paramMap, float threshold) throws XGBoostError {\n    float[][] metrics = new float[watches.size()][round];\n    Booster booster = XGBoost.train(trainingSet, paramMap, round, watches,\n            metrics, null, null, 0);\n    for (int i = 0; i < metrics.length; i++)\n      for (int j = 1; j < metrics[i].length; j++) {\n        TestCase.assertTrue(metrics[i][j] >= metrics[i][j - 1] ||\n                Math.abs(metrics[i][j] - metrics[i][j - 1]) < 0.1);\n      }\n    for (int i = 0; i < metrics.length; i++)\n      for (int j = 0; j < metrics[i].length; j++) {\n        TestCase.assertTrue(metrics[i][j] >= threshold);\n      }\n    booster.dispose();\n  }\n\n  @Test\n  public void testQuantileHistoDepthWise() throws XGBoostError {\n    DMatrix trainMat = new DMatrix(this.train_uri);\n    DMatrix testMat = new DMatrix(this.test_uri);\n    Map<String, Object> paramMap = new HashMap<String, Object>() {\n      {\n        put(\"max_depth\", 3);\n        put(\"silent\", 1);\n        put(\"objective\", \"binary:logistic\");\n        put(\"tree_method\", \"hist\");\n        put(\"grow_policy\", \"depthwise\");\n        put(\"eval_metric\", \"auc\");\n      }\n    };\n    Map<String, DMatrix> watches = new HashMap<>();\n    watches.put(\"training\", trainMat);\n    watches.put(\"test\", testMat);\n    testWithQuantileHisto(trainMat, watches, 10, paramMap, 0.95f);\n  }\n\n  @Test\n  public void testQuantileHistoLossGuide() throws XGBoostError {\n    DMatrix trainMat = new DMatrix(this.train_uri);\n    DMatrix testMat = new DMatrix(this.test_uri);\n    Map<String, Object> paramMap = new HashMap<String, Object>() {\n      {\n        put(\"max_depth\", 3);\n        put(\"silent\", 1);\n        put(\"objective\", \"binary:logistic\");\n        put(\"tree_method\", \"hist\");\n        put(\"grow_policy\", \"lossguide\");\n        put(\"max_leaves\", 8);\n        put(\"eval_metric\", \"auc\");\n      }\n    };\n    Map<String, DMatrix> watches = new HashMap<>();\n    watches.put(\"training\", trainMat);\n    watches.put(\"test\", testMat);\n    testWithQuantileHisto(trainMat, watches, 10, paramMap, 0.95f);\n  }\n\n  @Test\n  public void testQuantileHistoLossGuideMaxBin() throws XGBoostError {\n    DMatrix trainMat = new DMatrix(this.train_uri);\n    DMatrix testMat = new DMatrix(this.test_uri);\n    Map<String, Object> paramMap = new HashMap<String, Object>() {\n      {\n        put(\"max_depth\", 3);\n        put(\"silent\", 1);\n        put(\"objective\", \"binary:logistic\");\n        put(\"tree_method\", \"hist\");\n        put(\"grow_policy\", \"lossguide\");\n        put(\"max_leaves\", 8);\n        put(\"max_bin\", 16);\n        put(\"eval_metric\", \"auc\");\n      }\n    };\n    Map<String, DMatrix> watches = new HashMap<>();\n    watches.put(\"training\", trainMat);\n    testWithQuantileHisto(trainMat, watches, 10, paramMap, 0.95f);\n  }\n\n  @Test\n  public void testDumpModelJson() throws XGBoostError {\n    DMatrix trainMat = new DMatrix(this.train_uri);\n    DMatrix testMat = new DMatrix(this.test_uri);\n\n    Booster booster = trainBooster(trainMat, testMat);\n    String[] dump = booster.getModelDump(\"\", false, \"json\");\n    TestCase.assertEquals(\"  { \\\"nodeid\\\":\", dump[0].substring(0, 13));\n\n    // test with specified feature names\n    String[] featureNames = new String[126];\n    for(int i = 0; i < 126; i++) featureNames[i] = \"test_feature_name_\" + i;\n    dump = booster.getModelDump(featureNames, false, \"json\");\n    TestCase.assertTrue(dump[0].contains(\"test_feature_name_\"));\n  }\n\n  @Test\n  public void testGetFeatureScore() throws XGBoostError {\n    DMatrix trainMat = new DMatrix(this.train_uri);\n    DMatrix testMat = new DMatrix(this.test_uri);\n\n    Booster booster = trainBooster(trainMat, testMat);\n    String[] featureNames = new String[126];\n    for(int i = 0; i < 126; i++) featureNames[i] = \"test_feature_name_\" + i;\n    Map<String, Integer> scoreMap = booster.getFeatureScore(featureNames);\n    for (String fName: scoreMap.keySet()) TestCase.assertTrue(fName.startsWith(\"test_feature_name_\"));\n  }\n\n  @Test\n  public void testGetFeatureImportanceGain() throws XGBoostError {\n    DMatrix trainMat = new DMatrix(this.train_uri);\n    DMatrix testMat = new DMatrix(this.test_uri);\n\n    Booster booster = trainBooster(trainMat, testMat);\n    String[] featureNames = new String[126];\n    for(int i = 0; i < 126; i++) featureNames[i] = \"test_feature_name_\" + i;\n    Map<String, Double> scoreMap = booster.getScore(featureNames, \"gain\");\n    for (String fName: scoreMap.keySet()) TestCase.assertTrue(fName.startsWith(\"test_feature_name_\"));\n  }\n\n  @Test\n  public void testGetFeatureImportanceTotalGain() throws XGBoostError {\n    DMatrix trainMat = new DMatrix(this.train_uri);\n    DMatrix testMat = new DMatrix(this.test_uri);\n\n    Booster booster = trainBooster(trainMat, testMat);\n    String[] featureNames = new String[126];\n    for(int i = 0; i < 126; i++) featureNames[i] = \"test_feature_name_\" + i;\n    Map<String, Double> scoreMap = booster.getScore(featureNames, \"total_gain\");\n    for (String fName: scoreMap.keySet()) TestCase.assertTrue(fName.startsWith(\"test_feature_name_\"));\n  }\n\n  @Test\n  public void testGetFeatureImportanceCover() throws XGBoostError {\n    DMatrix trainMat = new DMatrix(this.train_uri);\n    DMatrix testMat = new DMatrix(this.test_uri);\n\n    Booster booster = trainBooster(trainMat, testMat);\n    String[] featureNames = new String[126];\n    for(int i = 0; i < 126; i++) featureNames[i] = \"test_feature_name_\" + i;\n    Map<String, Double> scoreMap = booster.getScore(featureNames, \"cover\");\n    for (String fName: scoreMap.keySet()) TestCase.assertTrue(fName.startsWith(\"test_feature_name_\"));\n  }\n\n  @Test\n  public void testGetFeatureImportanceTotalCover() throws XGBoostError {\n    DMatrix trainMat = new DMatrix(this.train_uri);\n    DMatrix testMat = new DMatrix(this.test_uri);\n\n    Booster booster = trainBooster(trainMat, testMat);\n    String[] featureNames = new String[126];\n    for(int i = 0; i < 126; i++) featureNames[i] = \"test_feature_name_\" + i;\n    Map<String, Double> scoreMap = booster.getScore(featureNames, \"total_cover\");\n    for (String fName: scoreMap.keySet()) TestCase.assertTrue(fName.startsWith(\"test_feature_name_\"));\n  }\n\n  @Test\n  public void testQuantileHistoDepthwiseMaxDepth() throws XGBoostError {\n    DMatrix trainMat = new DMatrix(this.train_uri);\n    Map<String, Object> paramMap = new HashMap<String, Object>() {\n      {\n        put(\"max_depth\", 3);\n        put(\"silent\", 1);\n        put(\"objective\", \"binary:logistic\");\n        put(\"tree_method\", \"hist\");\n        put(\"grow_policy\", \"depthwise\");\n        put(\"eval_metric\", \"auc\");\n      }\n    };\n    Map<String, DMatrix> watches = new HashMap<>();\n    watches.put(\"training\", trainMat);\n    testWithQuantileHisto(trainMat, watches, 10, paramMap, 0.95f);\n  }\n\n  @Test\n  public void testQuantileHistoDepthwiseMaxDepthMaxBin() throws XGBoostError {\n    DMatrix trainMat = new DMatrix(this.train_uri);\n    DMatrix testMat = new DMatrix(this.test_uri);\n    Map<String, Object> paramMap = new HashMap<String, Object>() {\n      {\n        put(\"max_depth\", 3);\n        put(\"silent\", 1);\n        put(\"objective\", \"binary:logistic\");\n        put(\"tree_method\", \"hist\");\n        put(\"max_bin\", 2);\n        put(\"grow_policy\", \"depthwise\");\n        put(\"eval_metric\", \"auc\");\n      }\n    };\n    Map<String, DMatrix> watches = new HashMap<>();\n    watches.put(\"training\", trainMat);\n    testWithQuantileHisto(trainMat, watches, 10, paramMap, 0.95f);\n  }\n\n  /**\n   * test cross valiation\n   *\n   * @throws XGBoostError\n   */\n  @Test\n  public void testCV() throws XGBoostError {\n    //load train mat\n    DMatrix trainMat = new DMatrix(this.train_uri);\n\n    //set params\n    Map<String, Object> param = new HashMap<String, Object>() {\n      {\n        put(\"eta\", 1.0);\n        put(\"max_depth\", 3);\n        put(\"silent\", 1);\n        put(\"nthread\", 6);\n        put(\"objective\", \"binary:logistic\");\n        put(\"gamma\", 1.0);\n        put(\"eval_metric\", \"error\");\n      }\n    };\n\n    //do 5-fold cross validation\n    int round = 2;\n    int nfold = 5;\n    String[] evalHist = XGBoost.crossValidation(trainMat, param, round, nfold, null, null, null);\n  }\n\n  /**\n   * test train from existing model\n   *\n   * @throws XGBoostError\n   */\n  @Test\n  public void testTrainFromExistingModel() throws XGBoostError, IOException {\n    DMatrix trainMat = new DMatrix(this.train_uri);\n    DMatrix testMat = new DMatrix(this.test_uri);\n    IEvaluation eval = new EvalError();\n\n    Map<String, Object> paramMap = new HashMap<String, Object>() {\n      {\n        put(\"eta\", 1.0);\n        put(\"max_depth\", 2);\n        put(\"silent\", 1);\n        put(\"objective\", \"binary:logistic\");\n      }\n    };\n\n    //set watchList\n    HashMap<String, DMatrix> watches = new HashMap<String, DMatrix>();\n\n    watches.put(\"train\", trainMat);\n    watches.put(\"test\", testMat);\n\n    // Train without saving temp booster\n    int round = 4;\n    Booster booster1 = XGBoost.train(trainMat, paramMap, round, watches, null, null, null, 0);\n    float booster1error = eval.eval(booster1.predict(testMat, true, 0), testMat);\n\n    // Train with temp Booster\n    round = 2;\n    Booster tempBooster = XGBoost.train(trainMat, paramMap, round, watches, null, null, null, 0);\n    float tempBoosterError = eval.eval(tempBooster.predict(testMat, true, 0), testMat);\n\n    // Save tempBooster to bytestream and load back\n    ByteArrayInputStream in = new ByteArrayInputStream(tempBooster.toByteArray());\n    tempBooster = XGBoost.loadModel(in);\n    in.close();\n\n    // Continue training using tempBooster\n    round = 2;\n    Booster booster2 = XGBoost.train(trainMat, paramMap, round, watches, null, null, null, 0, tempBooster);\n    float booster2error = eval.eval(booster2.predict(testMat, true, 0), testMat);\n    TestCase.assertTrue(booster1error == booster2error);\n    TestCase.assertTrue(tempBoosterError > booster2error);\n  }\n\n  /**\n   * test set/get attributes to/from a booster\n   *\n   * @throws XGBoostError\n   */\n  @Test\n  public void testSetAndGetAttrs() throws XGBoostError {\n    DMatrix trainMat = new DMatrix(this.train_uri);\n    DMatrix testMat = new DMatrix(this.test_uri);\n\n    Booster booster = trainBooster(trainMat, testMat);\n    booster.setAttr(\"testKey1\", \"testValue1\");\n    TestCase.assertEquals(booster.getAttr(\"testKey1\"), \"testValue1\");\n    booster.setAttr(\"testKey1\", \"testValue2\");\n    TestCase.assertEquals(booster.getAttr(\"testKey1\"), \"testValue2\");\n\n    booster.setAttrs(new HashMap<String, String>(){{\n      put(\"aa\", \"AA\");\n      put(\"bb\", \"BB\");\n      put(\"cc\", \"CC\");\n    }});\n\n    Map<String, String> attr = booster.getAttrs();\n    TestCase.assertEquals(attr.size(), 6);\n    TestCase.assertEquals(attr.get(\"testKey1\"), \"testValue2\");\n    TestCase.assertEquals(attr.get(\"aa\"), \"AA\");\n    TestCase.assertEquals(attr.get(\"bb\"), \"BB\");\n    TestCase.assertEquals(attr.get(\"cc\"), \"CC\");\n  }\n\n  /**\n   * test get number of features from a booster\n   *\n   * @throws XGBoostError\n   */\n  @Test\n  public void testGetNumFeature() throws XGBoostError {\n    DMatrix trainMat = new DMatrix(this.train_uri);\n    DMatrix testMat = new DMatrix(this.test_uri);\n\n    Booster booster = trainBooster(trainMat, testMat);\n    TestCase.assertEquals(booster.getNumFeature(), 126);\n  }\n\n  @Test\n  public void testConcurrentPredict() throws InterruptedException, XGBoostError, ExecutionException, TimeoutException {\n    DMatrix trainMat = new DMatrix(this.train_uri);\n    DMatrix testMat = new DMatrix(this.test_uri);\n    Booster booster = trainBooster(trainMat, testMat);\n\n    float[][] expectedPredictions = booster.predict(testMat);\n\n    ExecutorService executor = Executors.newFixedThreadPool(10);\n    List<CompletableFuture<Void>> futures = new ArrayList<>();\n\n    //10 threads - each calling predict 50 times\n    for (int t = 0; t < 10; t++) {\n      futures.add(CompletableFuture.runAsync(() -> {\n        try {\n          for (int i = 0; i < 50; i++) {\n            float[][] predictions = booster.predict(testMat);\n            assertArrayEquals(expectedPredictions, predictions);\n          }\n        } catch (XGBoostError e) {\n          throw new RuntimeException(e);\n        }\n      }, executor));\n    }\n\n    CompletableFuture.allOf(futures.toArray(new CompletableFuture[0]))\n      .get(30, TimeUnit.SECONDS);\n    executor.shutdown();\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/ConfigContextTest.java",
    "content": "/*\n Copyright (c) 2025 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java;\n\nimport junit.framework.TestCase;\nimport org.junit.Test;\n\nimport java.util.HashMap;\n\n/**\n * Test cases for the config context.\n */\npublic class ConfigContextTest {\n  @Test\n  public void testBasic() throws XGBoostError {\n    try (ConfigContext ctx = new ConfigContext()) {\n      TestCase.assertEquals(1, ctx.getConfig(\"verbosity\"));\n\n      ctx.setConfig(\"verbosity\", 3);\n      TestCase.assertEquals(3, ctx.getConfig(\"verbosity\"));\n    }\n  }\n\n  @Test\n  public void testWriteMap() throws XGBoostError {\n    try (ConfigContext ctx = new ConfigContext()) {\n      TestCase.assertEquals(1, ctx.getConfig(\"verbosity\"));\n      TestCase.assertEquals(false, ctx.getConfig(\"use_rmm\"));\n    }\n\n    HashMap<String, Object> configs = new HashMap<>();\n    configs.put(\"verbosity\", 3);\n    configs.put(\"use_rmm\", true);\n    try (ConfigContext ctx = new ConfigContext(configs)) {\n      TestCase.assertEquals(3, ctx.getConfig(\"verbosity\"));\n      TestCase.assertEquals(true, ctx.getConfig(\"use_rmm\"));\n    }\n\n    try (ConfigContext ctx = new ConfigContext()) {\n      TestCase.assertEquals(1, ctx.getConfig(\"verbosity\"));\n      TestCase.assertEquals(false, ctx.getConfig(\"use_rmm\"));\n    }\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/DMatrixTest.java",
    "content": "/*\n Copyright (c) 2014-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java;\n\nimport java.io.File;\nimport java.io.FileOutputStream;\nimport java.io.IOException;\nimport java.io.InputStream;\nimport java.util.Arrays;\nimport java.util.HashMap;\nimport java.util.Map;\nimport java.util.Random;\n\nimport junit.framework.TestCase;\nimport ml.dmlc.xgboost4j.LabeledPoint;\nimport ml.dmlc.xgboost4j.java.util.BigDenseMatrix;\nimport org.junit.Test;\n\nimport static org.junit.Assert.assertArrayEquals;\nimport static org.junit.Assert.assertEquals;\n\n/**\n * test cases for DMatrix\n *\n * @author hzx\n */\npublic class DMatrixTest {\n\n\n  @Test\n  public void testCreateFromDataIteratorWithMissingValue() throws XGBoostError {\n    //create DMatrix from DataIterator\n    java.util.List<LabeledPoint> blist = new java.util.LinkedList<>();\n    blist.add(new LabeledPoint(0.1f, 4, null, new float[]{1, 0, 0, 0}));\n    blist.add(new LabeledPoint(0.1f, 4, null, new float[]{Float.NaN, 13, 14, 15}));\n    blist.add(new LabeledPoint(0.1f, 4, null, new float[]{21, 23, 0, 25}));\n\n    // Default missing value: Float.NaN\n    DMatrix dmat = new DMatrix(blist.iterator(), null);\n    assert dmat.nonMissingNum() == 11;\n\n    // missing value 0\n    dmat = new DMatrix(blist.iterator(), null, 0.0f);\n    assert dmat.nonMissingNum() == 12 - 4 - 1;\n\n    // missing value 21\n    dmat = new DMatrix(blist.iterator(), null, 21.0f);\n    assert dmat.nonMissingNum() == 12 - 1 - 1;\n\n    // missing value 101010101010\n    dmat = new DMatrix(blist.iterator(), null, 101010101010.0f);\n    assert dmat.nonMissingNum() == 12 - 1;\n  }\n\n  @Test\n  public void testCreateFromDataIterator() throws XGBoostError {\n    //create DMatrix from DataIterator\n\n    java.util.ArrayList<Float> labelall = new java.util.ArrayList<Float>();\n    int nrep = 3000;\n    java.util.List<LabeledPoint> blist = new java.util.LinkedList<LabeledPoint>();\n    for (int i = 0; i < nrep; ++i) {\n      LabeledPoint p = new LabeledPoint(\n        0.1f + i, 4, new int[]{0, 2, 3}, new float[]{3, 4, 5});\n      blist.add(p);\n      labelall.add(p.label());\n    }\n    DMatrix dmat = new DMatrix(blist.iterator(), null);\n    // get label\n    float[] labels = dmat.getLabel();\n    for (int i = 0; i < labels.length; ++i) {\n      TestCase.assertTrue(labelall.get(i) == labels[i]);\n    }\n  }\n\n  @Test\n  public void testCreateFromDataIteratorWithDiffFeatureSize() throws XGBoostError {\n    //create DMatrix from DataIterator\n\n    java.util.ArrayList<Float> labelall = new java.util.ArrayList<Float>();\n    int nrep = 3000;\n    java.util.List<LabeledPoint> blist = new java.util.LinkedList<LabeledPoint>();\n    int featureSize = 4;\n    for (int i = 0; i < nrep; ++i) {\n      // set some rows with wrong feature size\n      if (i % 10 == 1) {\n        featureSize = 5;\n      }\n      LabeledPoint p = new LabeledPoint(\n        0.1f + i, featureSize, new int[]{0, 2, 3}, new float[]{3, 4, 5});\n      blist.add(p);\n      labelall.add(p.label());\n    }\n    boolean success = true;\n    try {\n      DMatrix dmat = new DMatrix(blist.iterator(), null);\n    } catch (XGBoostError e) {\n      success = false;\n    }\n    TestCase.assertTrue(success == false);\n  }\n\n  @Test\n  public void testCreateFromFile() throws XGBoostError {\n    //create DMatrix from file\n    String filePath = writeResourceIntoTempFile(\"/agaricus.txt.test\");\n    DMatrix dmat = new DMatrix(filePath + \"?format=libsvm\");\n    //get label\n    float[] labels = dmat.getLabel();\n    //check length\n    TestCase.assertTrue(dmat.rowNum() == labels.length);\n    //set weights\n    float[] weights = Arrays.copyOf(labels, labels.length);\n    dmat.setWeight(weights);\n    float[] dweights = dmat.getWeight();\n    TestCase.assertTrue(Arrays.equals(weights, dweights));\n  }\n\n  @Test\n  public void testCreateFromCSR() throws XGBoostError {\n    //create Matrix from csr format sparse Matrix and labels\n    /**\n     * sparse matrix\n     * 1 0 2 3 0\n     * 4 0 2 3 5\n     * 3 1 2 5 0\n     */\n    float[] data = new float[]{1, 2, 3, 4, 2, 3, 5, 3, 1, 2, 5};\n    int[] colIndex = new int[]{0, 2, 3, 0, 2, 3, 4, 0, 1, 2, 3};\n    long[] rowHeaders = new long[]{0, 3, 7, 11};\n    DMatrix dmat1 = new DMatrix(rowHeaders, colIndex, data, DMatrix.SparseType.CSR);\n    //check row num\n    TestCase.assertTrue(dmat1.rowNum() == 3);\n    //test set label\n    float[] label1 = new float[]{1, 0, 1};\n    dmat1.setLabel(label1);\n    float[] label2 = dmat1.getLabel();\n    TestCase.assertTrue(Arrays.equals(label1, label2));\n  }\n\n  @Test\n  public void testCreateFromCSREx() throws XGBoostError {\n    //create Matrix from csr format sparse Matrix and labels\n    /**\n     * sparse matrix\n     * 1 0 2 3 0\n     * 4 0 2 3 5\n     * 3 1 2 5 0\n     */\n    float[] data = new float[]{1, 2, 3, 4, 2, 3, 5, 3, 1, 2, 5};\n    int[] colIndex = new int[]{0, 2, 3, 0, 2, 3, 4, 0, 1, 2, 3};\n    long[] rowHeaders = new long[]{0, 3, 7, 11};\n    DMatrix dmat1 = new DMatrix(rowHeaders, colIndex, data, DMatrix.SparseType.CSR, 5);\n    //check row num\n    TestCase.assertTrue(dmat1.rowNum() == 3);\n    //test set label\n    float[] label1 = new float[]{1, 0, 1};\n    dmat1.setLabel(label1);\n    float[] label2 = dmat1.getLabel();\n    TestCase.assertTrue(Arrays.equals(label1, label2));\n  }\n\n  @Test\n  public void testCreateFromCSC() throws XGBoostError {\n    //create Matrix from csc format sparse Matrix and labels\n    /**\n     * sparse matrix\n     * 1 0 2\n     * 3 0 4\n     * 0 2 3\n     * 5 3 1\n     * 2 5 0\n     */\n    float[] data = new float[]{1, 3, 5, 2, 2, 3, 5, 2, 4, 3, 1};\n    int[] rowIndex = new int[]{0, 1, 3, 4, 2, 3, 4, 0, 1, 2, 3};\n    long[] colHeaders = new long[]{0, 4, 7, 11};\n    DMatrix dmat1 = new DMatrix(colHeaders, rowIndex, data, DMatrix.SparseType.CSC);\n    //check row num\n    System.out.println(dmat1.rowNum());\n    TestCase.assertTrue(dmat1.rowNum() == 5);\n    //test set label\n    float[] label1 = new float[]{1, 0, 1, 1, 1};\n    dmat1.setLabel(label1);\n    float[] label2 = dmat1.getLabel();\n    TestCase.assertTrue(Arrays.equals(label1, label2));\n  }\n\n  @Test\n  public void testCreateFromCSCEx() throws XGBoostError {\n    //create Matrix from csc format sparse Matrix and labels\n    /**\n     * sparse matrix\n     * 1 0 2\n     * 3 0 4\n     * 0 2 3\n     * 5 3 1\n     * 2 5 0\n     */\n    float[] data = new float[]{1, 3, 5, 2, 2, 3, 5, 2, 4, 3, 1};\n    int[] rowIndex = new int[]{0, 1, 3, 4, 2, 3, 4, 0, 1, 2, 3};\n    long[] colHeaders = new long[]{0, 4, 7, 11};\n    DMatrix dmat1 = new DMatrix(colHeaders, rowIndex, data, DMatrix.SparseType.CSC, 5);\n    //check row num\n    System.out.println(dmat1.rowNum());\n    TestCase.assertTrue(dmat1.rowNum() == 5);\n    //test set label\n    float[] label1 = new float[]{1, 0, 1, 1, 1};\n    dmat1.setLabel(label1);\n    float[] label2 = dmat1.getLabel();\n    TestCase.assertTrue(Arrays.equals(label1, label2));\n  }\n\n  @Test\n  public void testCreateFromDenseMatrix() throws XGBoostError {\n    //create DMatrix from 10*5 dense matrix\n    int nrow = 10;\n    int ncol = 5;\n    float[] data0 = new float[nrow * ncol];\n    //put random nums\n    Random random = new Random();\n    for (int i = 0; i < nrow * ncol; i++) {\n      data0[i] = random.nextFloat();\n    }\n\n    //create label\n    float[] label0 = new float[nrow];\n    for (int i = 0; i < nrow; i++) {\n      label0[i] = random.nextFloat();\n    }\n\n    DMatrix dmat0 = new DMatrix(data0, nrow, ncol, Float.NaN);\n    dmat0.setLabel(label0);\n\n    //check\n    TestCase.assertTrue(dmat0.rowNum() == 10);\n    TestCase.assertTrue(dmat0.getLabel().length == 10);\n\n    //set weights for each instance\n    float[] weights = new float[nrow];\n    for (int i = 0; i < nrow; i++) {\n      weights[i] = random.nextFloat();\n    }\n    dmat0.setWeight(weights);\n\n    TestCase.assertTrue(Arrays.equals(weights, dmat0.getWeight()));\n  }\n\n  private DMatrix createFromDenseMatrix() throws XGBoostError {\n    //create DMatrix from 10*5 dense matrix\n    int nrow = 10;\n    int ncol = 5;\n    float[] data0 = new float[nrow * ncol];\n    //put random nums\n    Random random = new Random();\n    for (int i = 0; i < nrow * ncol; i++) {\n      if (i % 10 == 0) {\n        data0[i] = -0.1f;\n      } else {\n        data0[i] = random.nextFloat();\n      }\n    }\n\n    //create label\n    float[] label0 = new float[nrow];\n    for (int i = 0; i < nrow; i++) {\n      label0[i] = random.nextFloat();\n    }\n\n    DMatrix dm = new DMatrix(data0, nrow, ncol, -0.1f);\n    dm.setLabel(label0);\n    return dm;\n  }\n\n  @Test\n  public void testCreateFromDenseMatrixWithMissingValue() throws XGBoostError {\n    DMatrix dm = createFromDenseMatrix();\n    //check\n    TestCase.assertTrue(dm.rowNum() == 10);\n    TestCase.assertTrue(dm.getLabel().length == 10);\n  }\n\n  @Test\n  public void testCreateFromDenseMatrixRef() throws XGBoostError {\n    //create DMatrix from 10*5 dense matrix\n    final int nrow = 10;\n    final int ncol = 5;\n\n    DMatrix dmat0 = null;\n    BigDenseMatrix data0 = null;\n    try {\n      data0 = new BigDenseMatrix(nrow, ncol);\n      //put random nums\n      Random random = new Random();\n      for (int i = 0; i < nrow * ncol; i++) {\n        data0.set(i, random.nextFloat());\n      }\n\n      //create label\n      float[] label0 = new float[nrow];\n      for (int i = 0; i < nrow; i++) {\n        label0[i] = random.nextFloat();\n      }\n\n      dmat0 = new DMatrix(data0, Float.NaN);\n      dmat0.setLabel(label0);\n\n      //check\n      TestCase.assertTrue(dmat0.rowNum() == 10);\n      TestCase.assertTrue(dmat0.getLabel().length == 10);\n    } finally {\n      if (dmat0 != null) {\n        dmat0.dispose();\n      } else if (data0 != null) {\n        data0.dispose();\n      }\n    }\n  }\n\n  @Test\n  public void testTrainWithDenseMatrixRef() throws XGBoostError {\n    Map<String, Object> rabitEnv = new HashMap<>();\n    rabitEnv.put(\"DMLC_TASK_ID\", \"0\");\n    Communicator.init(rabitEnv);\n    DMatrix trainMat = null;\n    BigDenseMatrix data0 = null;\n    try {\n      // trivial dataset with 3 rows and 2 columns\n      // (4,5) -> 1\n      // (3,1) -> 2\n      // (2,3) -> 3\n      float[][] data = new float[][]{\n        new float[]{4f, 5f},\n        new float[]{3f, 1f},\n        new float[]{2f, 3f}\n      };\n      data0 = new BigDenseMatrix(3, 2);\n      for (int i = 0; i < data0.nrow; i++)\n        for (int j = 0; j < data0.ncol; j++)\n          data0.set(i, j, data[i][j]);\n\n      trainMat = new DMatrix(data0, Float.NaN);\n      trainMat.setLabel(new float[]{1f, 2f, 3f});\n\n      HashMap<String, Object> params = new HashMap<>();\n      params.put(\"eta\", 1);\n      params.put(\"max_depth\", 5);\n      params.put(\"silent\", 1);\n      params.put(\"objective\", \"reg:linear\");\n      params.put(\"seed\", 123);\n\n      HashMap<String, DMatrix> watches = new HashMap<>();\n      watches.put(\"train\", trainMat);\n\n      Booster booster = XGBoost.train(trainMat, params, 10, watches, null, null);\n\n      // check overfitting\n      // (4,5) -> 1\n      // (3,1) -> 2\n      // (2,3) -> 3\n      for (int i = 0; i < 3; i++) {\n        float[][] preds = booster.predict(new DMatrix(data[i], 1, 2, Float.NaN));\n        assertEquals(1, preds.length);\n        assertArrayEquals(new float[]{(float) (i + 1)}, preds[0], 1e-2f);\n      }\n    } finally {\n      if (trainMat != null)\n        trainMat.dispose();\n      else if (data0 != null) {\n        data0.dispose();\n      }\n      Communicator.shutdown();\n    }\n  }\n\n  private String writeResourceIntoTempFile(String resource) {\n    InputStream input = getClass().getResourceAsStream(resource);\n    if (input == null) {\n      throw new IllegalArgumentException(\"Resource \" + resource + \" does not exist.\");\n    }\n    File tmp;\n    try {\n      tmp = File.createTempFile(\"junit\", \".test\");\n    } catch (IOException e) {\n      throw new RuntimeException(\"Unable to write to temp file.\", e);\n    }\n    byte[] buff = new byte[1024];\n    try (FileOutputStream output = new FileOutputStream(tmp)) {\n      int n;\n      while ((n = input.read(buff)) > 0) {\n        output.write(buff, 0, n);\n      }\n    } catch (IOException e) {\n      throw new RuntimeException(\"Unable to write to temp file.\", e);\n    }\n    return tmp.getAbsolutePath();\n  }\n\n  @Test\n  public void testSetAndGetGroup() throws XGBoostError {\n    //create DMatrix from 10*5 dense matrix\n    int nrow = 10;\n    int ncol = 5;\n    float[] data0 = new float[nrow * ncol];\n    //put random nums\n    Random random = new Random();\n    for (int i = 0; i < nrow * ncol; i++) {\n      data0[i] = random.nextFloat();\n    }\n\n    //create label\n    float[] label0 = new float[nrow];\n    for (int i = 0; i < nrow; i++) {\n      label0[i] = random.nextFloat();\n    }\n\n    //create two groups\n    int[] groups = new int[]{5, 5};\n\n    DMatrix dmat0 = new DMatrix(data0, nrow, ncol, -0.1f);\n    dmat0.setLabel(label0);\n    dmat0.setGroup(groups);\n\n    //check\n    TestCase.assertTrue(Arrays.equals(new int[]{0, 5, 10}, dmat0.getGroup()));\n  }\n\n  @Test\n  public void testSetAndGetFeatureInfo() throws XGBoostError {\n    //create DMatrix from 10*5 dense matrix\n    int nrow = 10;\n    int ncol = 5;\n    float[] data = new float[nrow * ncol];\n    //put random nums\n    Random random = new Random();\n    for (int i = 0; i < nrow * ncol; i++) {\n      data[i] = random.nextInt();\n    }\n\n    DMatrix dmat = new DMatrix(data, nrow, ncol, Float.NaN);\n\n    String[] featureNames = new String[]{\"f1\", \"f2\", \"f3\", \"f4\", \"f5\"};\n    dmat.setFeatureNames(featureNames);\n    String[] retFeatureNames = dmat.getFeatureNames();\n    assertArrayEquals(featureNames, retFeatureNames);\n\n    String[] featureTypes = new String[]{\"i\", \"q\", \"c\", \"i\", \"q\"};\n    dmat.setFeatureTypes(featureTypes);\n    String[] retFeatureTypes = dmat.getFeatureTypes();\n    assertArrayEquals(featureTypes, retFeatureTypes);\n  }\n\n  @Test\n  public void testSetAndGetQueryId() throws XGBoostError {\n    //create DMatrix from 10*5 dense matrix\n    int nrow = 10;\n    int ncol = 5;\n    float[] data0 = new float[nrow * ncol];\n    //put random nums\n    Random random = new Random();\n    for (int i = 0; i < nrow * ncol; i++) {\n      data0[i] = random.nextFloat();\n    }\n\n    //create label\n    float[] label0 = new float[nrow];\n    for (int i = 0; i < nrow; i++) {\n      label0[i] = random.nextFloat();\n    }\n\n    //create two groups\n    int[] qid = new int[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};\n    int[] qidExpected = new int[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};\n\n    DMatrix dmat0 = new DMatrix(data0, nrow, ncol, -0.1f);\n    dmat0.setLabel(label0);\n    dmat0.setQueryId(qid);\n    //check\n    TestCase.assertTrue(Arrays.equals(qidExpected, dmat0.getGroup()));\n\n    //create two groups\n    int[] qid1 = new int[]{10, 10, 10, 20, 60, 60, 80, 80, 90, 100};\n    int[] qidExpected1 = new int[]{0, 3, 4, 6, 8, 9, 10};\n    dmat0.setQueryId(qid1);\n    TestCase.assertTrue(Arrays.equals(qidExpected1, dmat0.getGroup()));\n\n  }\n\n  @Test\n  public void getGetQuantileCut() throws XGBoostError {\n    DMatrix Xy = createFromDenseMatrix();\n    Map<String, Object> params = new HashMap<String, Object>();\n    HashMap<String, DMatrix> watches = new HashMap<String, DMatrix>();\n    watches.put(\"train\", Xy);\n    XGBoost.train(Xy, params, 1, watches, null, null); // Create the cuts\n    DMatrix.QuantileCut cuts = Xy.getQuantileCut();\n    TestCase.assertEquals(cuts.indptr.length, 6);\n    for (int i = 1; i < cuts.indptr.length; ++i) {\n      // Number of bins for each feature + min value.\n      TestCase.assertTrue(cuts.indptr[i] - cuts.indptr[i - 1] >= 5);\n      TestCase.assertTrue(cuts.indptr[i] - cuts.indptr[i - 1] <= Xy.rowNum() + 1);\n    }\n    TestCase.assertEquals(cuts.values.length, cuts.indptr[cuts.indptr.length - 1]);\n    for (int i = 1; i < cuts.indptr.length; ++i) {\n      long begin = cuts.indptr[i - 1];\n      long end = cuts.indptr[i];\n      for (long j = begin + 1; j < end; ++j) {\n        TestCase.assertTrue(cuts.values[(int) j] > cuts.values[(int) j - 1]);\n      }\n    }\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/LibraryPathProviderTest.java",
    "content": "/*\n Copyright (c) 2014 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java;\n\nimport org.junit.Test;\n\nimport static junit.framework.TestCase.assertEquals;\nimport static junit.framework.TestCase.assertTrue;\nimport static ml.dmlc.xgboost4j.java.NativeLibLoader.Arch.X86_64;\nimport static ml.dmlc.xgboost4j.java.NativeLibLoader.LibraryPathProvider.getLibraryPathFor;\nimport static ml.dmlc.xgboost4j.java.NativeLibLoader.OS.LINUX;\n\npublic class LibraryPathProviderTest {\n\n  @Test\n  public void testLibraryPathProviderUsesOsAndArchToResolvePath() {\n    String libraryPath = getLibraryPathFor(LINUX, X86_64, \"someLibrary\");\n\n    assertTrue(libraryPath.startsWith(\"/lib/linux/x86_64/\"));\n  }\n\n  @Test\n  public void testLibraryPathProviderUsesPropertyValueForPathIfPresent() {\n    String propertyName = \"xgboostruntime.native.library\";\n\n    executeAndRestoreProperty(propertyName, () -> {\n      System.setProperty(propertyName, \"/my/custom/path/to/my/library\");\n      String libraryPath = getLibraryPathFor(LINUX, X86_64, \"library\");\n\n      assertEquals(\"/my/custom/path/to/my/library\", libraryPath);\n    });\n  }\n\n  private static void executeAndRestoreProperty(String propertyName, Runnable action) {\n    String oldValue = System.getProperty(propertyName);\n\n    try {\n      action.run();\n    } finally {\n      if (oldValue != null) {\n        System.setProperty(propertyName, oldValue);\n      } else {\n        System.clearProperty(propertyName);\n      }\n    }\n  }\n\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/OsDetectionTest.java",
    "content": "/*\n Copyright (c) 2014 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java;\n\nimport ml.dmlc.xgboost4j.java.NativeLibLoader.OS;\nimport org.junit.Test;\nimport org.junit.experimental.runners.Enclosed;\nimport org.junit.runner.RunWith;\nimport org.junit.runners.Parameterized;\nimport org.junit.runners.Parameterized.Parameters;\n\nimport java.util.Collection;\n\nimport static java.util.Arrays.asList;\nimport static junit.framework.TestCase.assertSame;\nimport static ml.dmlc.xgboost4j.java.NativeLibLoader.OS.*;\nimport static org.junit.Assert.assertThrows;\n\n/**\n * Test cases for {@link OS}.\n */\n@RunWith(Enclosed.class)\npublic class OsDetectionTest {\n\n  private static final String OS_NAME_PROPERTY = \"os.name\";\n\n  @RunWith(Parameterized.class)\n  public static class SupportedOSDetectionTest {\n\n    private final String osNameValue;\n    private final OS expectedOS;\n\n    public SupportedOSDetectionTest(String osNameValue, OS expectedOS) {\n      this.osNameValue = osNameValue;\n      this.expectedOS = expectedOS;\n    }\n\n    @Parameters\n    public static Collection<Object[]> data() {\n      return asList(new Object[][]{\n        {\"windows\", WINDOWS},\n        {\"mac\", MACOS},\n        {\"darwin\", MACOS},\n        {\"linux\", LINUX},\n        {\"sunos\", SOLARIS}\n      });\n    }\n\n    @Test\n    public void getOS() {\n      executeAndRestoreProperty(() -> {\n        System.setProperty(OS_NAME_PROPERTY, osNameValue);\n        assertSame(detectOS(), expectedOS);\n      });\n    }\n  }\n\n  public static class UnsupportedOSDetectionTest {\n\n    @Test\n    public void testUnsupportedOs() {\n      executeAndRestoreProperty(() -> {\n        System.setProperty(OS_NAME_PROPERTY, \"unsupported\");\n        assertThrows(IllegalStateException.class, OS::detectOS);\n      });\n    }\n  }\n\n  private static void executeAndRestoreProperty(Runnable action) {\n    String oldValue = System.getProperty(OS_NAME_PROPERTY);\n\n    try {\n      action.run();\n    } finally {\n      if (oldValue != null) {\n        System.setProperty(OS_NAME_PROPERTY, oldValue);\n      } else {\n        System.clearProperty(OS_NAME_PROPERTY);\n      }\n    }\n  }\n\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/XGBoostTest.java",
    "content": "/*\n Copyright (c) 2023 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.java;\n\nimport junit.framework.TestCase;\nimport ml.dmlc.xgboost4j.LabeledPoint;\nimport org.junit.Test;\n\nimport java.util.HashMap;\nimport java.util.Map;\nimport java.util.Random;\n\npublic class XGBoostTest {\n\n  private String composeEvalInfo(String metric, String evalName) {\n    return \"[0]\\t\" + evalName + \"-\" + metric + \":\" + \"\\ttest\";\n  }\n\n  @Test\n  public void testIsMaximizeEvaluation() {\n    String[] minimum_metrics = {\"mape\", \"logloss\", \"error\", \"others\"};\n    String[] evalNames = {\"set-abc\"};\n\n    HashMap<String, Object> params = new HashMap<>();\n\n    // test1, infer the metric from faked log\n    for (String x : XGBoost.MAXIMIZ_METRICES) {\n      String evalInfo = composeEvalInfo(x, evalNames[0]);\n      TestCase.assertTrue(XGBoost.isMaximizeEvaluation(evalInfo, evalNames, params));\n    }\n\n    // test2, the direction for mape should be minimum\n    String evalInfo = composeEvalInfo(\"mape\", evalNames[0]);\n    TestCase.assertFalse(XGBoost.isMaximizeEvaluation(evalInfo, evalNames, params));\n\n    // test3, force maximize_evaluation_metrics\n    params.clear();\n    params.put(\"maximize_evaluation_metrics\", true);\n    // auc should be max,\n    evalInfo = composeEvalInfo(\"auc\", evalNames[0]);\n    TestCase.assertTrue(XGBoost.isMaximizeEvaluation(evalInfo, evalNames, params));\n\n    params.clear();\n    params.put(\"maximize_evaluation_metrics\", false);\n    // auc should be min,\n    evalInfo = composeEvalInfo(\"auc\", evalNames[0]);\n    TestCase.assertFalse(XGBoost.isMaximizeEvaluation(evalInfo, evalNames, params));\n\n    // test4, set the metric manually\n    for (String x : XGBoost.MAXIMIZ_METRICES) {\n      params.clear();\n      params.put(\"eval_metric\", x);\n      evalInfo = composeEvalInfo(x, evalNames[0]);\n      TestCase.assertTrue(XGBoost.isMaximizeEvaluation(evalInfo, evalNames, params));\n    }\n\n    // test5, set the metric manually\n    for (String x : minimum_metrics) {\n      params.clear();\n      params.put(\"eval_metric\", x);\n      evalInfo = composeEvalInfo(x, evalNames[0]);\n      TestCase.assertFalse(XGBoost.isMaximizeEvaluation(evalInfo, evalNames, params));\n    }\n\n  }\n\n  @Test\n  public void testEarlyStop() throws XGBoostError {\n    Random random = new Random(1);\n\n    java.util.ArrayList<Float> labelall = new java.util.ArrayList<Float>();\n    int nrep = 3000;\n    java.util.List<LabeledPoint> blist = new java.util.LinkedList<LabeledPoint>();\n    for (int i = 0; i < nrep; ++i) {\n      LabeledPoint p = new LabeledPoint(\n        i % 2, 4,\n        new int[]{0, 1, 2, 3},\n        new float[]{random.nextFloat(), random.nextFloat(), random.nextFloat(), random.nextFloat()});\n      blist.add(p);\n      labelall.add(p.label());\n    }\n\n    DMatrix dmat = new DMatrix(blist.iterator(), null);\n\n    int round = 50;\n    int earlyStop = 2;\n\n    HashMap<String, Object> mapParams = new HashMap<>();\n    mapParams.put(\"eta\", 0.1);\n    mapParams.put(\"objective\", \"binary:logistic\");\n    mapParams.put(\"max_depth\", 3);\n    mapParams.put(\"eval_metric\", \"auc\");\n    mapParams.put(\"silent\", 0);\n\n    HashMap<String, DMatrix> mapWatches = new HashMap<>();\n    mapWatches.put(\"selTrain-*\", dmat);\n\n    try {\n      Booster booster = XGBoost.train(dmat, mapParams, round, mapWatches, null, null, null, earlyStop);\n      Map<String, String> attrs = booster.getAttrs();\n      TestCase.assertTrue(Integer.valueOf(attrs.get(\"best_iteration\")) < round - 1);\n    } catch (Exception e) {\n      TestCase.assertFalse(false);\n    }\n\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/test/scala/ml/dmlc/xgboost4j/scala/DMatrixSuite.scala",
    "content": "/*\n Copyright (c) 2014-2023 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala\n\nimport java.util.Arrays\n\nimport scala.util.Random\n\nimport org.scalatest.funsuite.AnyFunSuite\n\nimport ml.dmlc.xgboost4j.java.{DMatrix => JDMatrix}\n\nclass DMatrixSuite extends AnyFunSuite {\n  test(\"create DMatrix from File\") {\n    val dmat = new DMatrix(\"../../demo/data/agaricus.txt.test?format=libsvm\")\n    // get label\n    val labels: Array[Float] = dmat.getLabel\n    // check length\n    assert(dmat.rowNum === labels.length)\n    // set weights\n    val weights: Array[Float] = Arrays.copyOf(labels, labels.length)\n    dmat.setWeight(weights)\n    val dweights: Array[Float] = dmat.getWeight\n    assert(weights === dweights)\n  }\n\n  test(\"create DMatrix from CSR\") {\n    // create Matrix from csr format sparse Matrix and labels\n    /**\n     * sparse matrix\n     * 1 0 2 3 0\n     * 4 0 2 3 5\n     * 3 1 2 5 0\n     */\n    val data = List[Float](1, 2, 3, 4, 2, 3, 5, 3, 1, 2, 5).toArray\n    val colIndex = List(0, 2, 3, 0, 2, 3, 4, 0, 1, 2, 3).toArray\n    val rowHeaders = List[Long](0, 3, 7, 11).toArray\n    val dmat1 = new DMatrix(rowHeaders, colIndex, data, JDMatrix.SparseType.CSR)\n    assert(dmat1.rowNum === 3)\n    val label1 = List[Float](1, 0, 1).toArray\n    dmat1.setLabel(label1)\n    val label2 = dmat1.getLabel\n    assert(label2 === label1)\n\n    val dmat2 = new DMatrix(rowHeaders, colIndex, data, JDMatrix.SparseType.CSR, 5, 1.0f, -1)\n    assert(dmat2.nonMissingNum === 9);\n  }\n\n  test(\"create DMatrix from CSREx\") {\n    // create Matrix from csr format sparse Matrix and labels\n    /**\n     * sparse matrix\n     * 1 0 2 3 0\n     * 4 0 2 3 5\n     * 3 1 2 5 0\n     */\n    val data = List[Float](1, 2, 3, 4, 2, 3, 5, 3, 1, 2, 5).toArray\n    val colIndex = List(0, 2, 3, 0, 2, 3, 4, 0, 1, 2, 3).toArray\n    val rowHeaders = List[Long](0, 3, 7, 11).toArray\n    val dmat1 = new DMatrix(rowHeaders, colIndex, data, JDMatrix.SparseType.CSR, 5)\n    assert(dmat1.rowNum === 3)\n    val label1 = List[Float](1, 0, 1).toArray\n    dmat1.setLabel(label1)\n    val label2 = dmat1.getLabel\n    assert(label2 === label1)\n  }\n\n  test(\"create DMatrix from CSC\") {\n    // create Matrix from csc format sparse Matrix and labels\n    /**\n     * sparse matrix\n     * 1 0 2\n     * 3 0 4\n     * 0 2 3\n     * 5 3 1\n     * 2 5 0\n     */\n    val data = List[Float](1, 3, 5, 2, 2, 3, 5, 2, 4, 3, 1).toArray\n    val rowIndex = List(0, 1, 3, 4, 2, 3, 4, 0, 1, 2, 3).toArray\n    val colHeaders = List[Long](0, 4, 7, 11).toArray\n    val dmat1 = new DMatrix(colHeaders, rowIndex, data, JDMatrix.SparseType.CSC)\n    assert(dmat1.rowNum === 5)\n    val label1 = List[Float](1, 0, 1, 1, 1).toArray\n    dmat1.setLabel(label1)\n    val label2 = dmat1.getLabel\n    assert(label2 === label1)\n\n    val dmat2 = new DMatrix(colHeaders, rowIndex, data, JDMatrix.SparseType.CSC, 5, 1.0f, -1)\n    assert(dmat2.nonMissingNum === 9);\n  }\n\n  test(\"create DMatrix from CSCEx\") {\n    // create Matrix from csc format sparse Matrix and labels\n    /**\n     * sparse matrix\n     * 1 0 2\n     * 3 0 4\n     * 0 2 3\n     * 5 3 1\n     * 2 5 0\n     */\n    val data = List[Float](1, 3, 5, 2, 2, 3, 5, 2, 4, 3, 1).toArray\n    val rowIndex = List(0, 1, 3, 4, 2, 3, 4, 0, 1, 2, 3).toArray\n    val colHeaders = List[Long](0, 4, 7, 11).toArray\n    val dmat1 = new DMatrix(colHeaders, rowIndex, data, JDMatrix.SparseType.CSC, 5)\n    assert(dmat1.rowNum === 5)\n    val label1 = List[Float](1, 0, 1, 1, 1).toArray\n    dmat1.setLabel(label1)\n    val label2 = dmat1.getLabel\n    assert(label2 === label1)\n  }\n\n  test(\"create DMatrix from DenseMatrix\") {\n    val nrow = 10\n    val ncol = 5\n    val data0 = new Array[Float](nrow * ncol)\n    // put random nums\n    for (i <- data0.indices) {\n      data0(i) = Random.nextFloat()\n    }\n    // create label\n    val label0 = new Array[Float](nrow)\n    for (i <- label0.indices) {\n      label0(i) = Random.nextFloat()\n    }\n    val dmat0 = new DMatrix(data0, nrow, ncol, Float.NaN)\n    dmat0.setLabel(label0)\n    // check\n    assert(dmat0.rowNum === 10)\n    assert(dmat0.getLabel.length === 10)\n    // set weights for each instance\n    val weights = new Array[Float](nrow)\n    for (i <- weights.indices) {\n      weights(i) = Random.nextFloat()\n    }\n    dmat0.setWeight(weights)\n    assert(weights === dmat0.getWeight)\n  }\n\n  test(\"create DMatrix from DenseMatrix with missing value\") {\n    val nrow = 10\n    val ncol = 5\n    val data0 = new Array[Float](nrow * ncol)\n    // put random nums\n    for (i <- data0.indices) {\n      if (i % 10 == 0) {\n        data0(i) = -0.1f\n      } else {\n        data0(i) = Random.nextFloat()\n      }\n    }\n    // create label\n    val label0 = new Array[Float](nrow)\n    for (i <- label0.indices) {\n      label0(i) = Random.nextFloat()\n    }\n    val dmat0 = new DMatrix(data0, nrow, ncol, -0.1f)\n    dmat0.setLabel(label0)\n    // check\n    assert(dmat0.rowNum === 10)\n    assert(dmat0.getLabel.length === 10)\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j/src/test/scala/ml/dmlc/xgboost4j/scala/ScalaBoosterImplSuite.scala",
    "content": "/*\n Copyright (c) 2014-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala\n\nimport java.io.{File, FileInputStream, FileOutputStream}\n\nimport junit.framework.TestCase\nimport org.apache.commons.logging.LogFactory\nimport org.scalatest.funsuite.AnyFunSuite\n\nimport ml.dmlc.xgboost4j.java.XGBoostError\n\nclass ScalaBoosterImplSuite extends AnyFunSuite {\n\n  private class EvalError extends EvalTrait {\n\n    val logger = LogFactory.getLog(classOf[EvalError])\n\n    private[xgboost4j] var evalMetric: String = \"custom_error\"\n\n    /**\n     * get evaluate metric\n     *\n     * @return evalMetric\n     */\n    override def getMetric: String = evalMetric\n\n    /**\n     * evaluate with predicts and data\n     *\n     * @param predicts predictions as array\n     * @param dmat     data matrix to evaluate\n     * @return result of the metric\n     */\n    override def eval(predicts: Array[Array[Float]], dmat: DMatrix): Float = {\n      var error: Float = 0f\n      var labels: Array[Float] = null\n      try {\n        labels = dmat.getLabel\n      } catch {\n        case ex: XGBoostError =>\n          logger.error(ex)\n          return -1f\n      }\n      val nrow: Int = predicts.length\n      for (i <- 0 until nrow) {\n        if (labels(i) == 0.0 && predicts(i)(0) > 0) {\n          error += 1\n        } else if (labels(i) == 1.0 && predicts(i)(0) <= 0) {\n          error += 1\n        }\n      }\n      error / labels.length\n    }\n  }\n\n  private def trainBooster(trainMat: DMatrix, testMat: DMatrix): Booster = {\n    val paramMap = List(\"eta\" -> \"1\", \"max_depth\" -> \"2\", \"silent\" -> \"1\",\n      \"objective\" -> \"binary:logistic\").toMap\n    val watches = List(\"train\" -> trainMat, \"test\" -> testMat).toMap\n\n    val round = 2\n    XGBoost.train(trainMat, paramMap, round, watches)\n  }\n\n  private def trainBoosterWithQuantileHisto(\n      trainMat: DMatrix,\n      watches: Map[String, DMatrix],\n      round: Int,\n      paramMap: Map[String, String],\n      threshold: Float): Booster = {\n    val metrics = Array.fill(watches.size, round)(0.0f)\n    val booster = XGBoost.train(trainMat, paramMap, round, watches, metrics)\n    for (i <- 0 until watches.size; j <- 1 until metrics(i).length) {\n      assert(metrics(i)(j) >= metrics(i)(j - 1))\n    }\n    for (metricsArray <- metrics; m <- metricsArray) {\n      assert(m >= threshold)\n    }\n    booster\n  }\n\n  test(\"basic operation of booster\") {\n    val trainMat = new DMatrix(\"../../demo/data/agaricus.txt.train?format=libsvm\")\n    val testMat = new DMatrix(\"../../demo/data/agaricus.txt.test?format=libsvm\")\n\n    val booster = trainBooster(trainMat, testMat)\n    val predicts = booster.predict(testMat, true)\n    val eval = new EvalError\n    assert(eval.eval(predicts, testMat) < 0.1)\n  }\n\n  test(\"save/load model with path\") {\n\n    val trainMat = new DMatrix(\"../../demo/data/agaricus.txt.train?format=libsvm\")\n    val testMat = new DMatrix(\"../../demo/data/agaricus.txt.test?format=libsvm\")\n    val eval = new EvalError\n    val booster = trainBooster(trainMat, testMat)\n    // save and load\n    val temp: File = File.createTempFile(\"temp\", \"model\")\n    temp.deleteOnExit()\n    booster.saveModel(temp.getAbsolutePath)\n\n    val bst2: Booster = XGBoost.loadModel(temp.getAbsolutePath)\n    assert(java.util.Arrays.equals(bst2.toByteArray, booster.toByteArray))\n    assert(java.util.Arrays.equals(bst2.toByteArray(\"ubj\"), booster.toByteArray(\"ubj\")))\n    val predicts2: Array[Array[Float]] = bst2.predict(testMat, true, 0)\n    TestCase.assertTrue(eval.eval(predicts2, testMat) < 0.1f)\n  }\n\n  test(\"save/load model with stream\") {\n    val trainMat = new DMatrix(\"../../demo/data/agaricus.txt.train?format=libsvm\")\n    val testMat = new DMatrix(\"../../demo/data/agaricus.txt.test?format=libsvm\")\n    val eval = new EvalError\n    val booster = trainBooster(trainMat, testMat)\n    // save and load\n    val temp: File = File.createTempFile(\"temp\", \"model\")\n    temp.deleteOnExit()\n    booster.saveModel(new FileOutputStream(temp.getAbsolutePath))\n\n    val bst2: Booster = XGBoost.loadModel(new FileInputStream(temp.getAbsolutePath))\n    assert(java.util.Arrays.equals(bst2.toByteArray, booster.toByteArray))\n    val predicts2: Array[Array[Float]] = bst2.predict(testMat, true, 0)\n    TestCase.assertTrue(eval.eval(predicts2, testMat) < 0.1f)\n  }\n\n  test(\"cross validation\") {\n    val trainMat = new DMatrix(\"../../demo/data/agaricus.txt.train?format=libsvm\")\n    val params = List(\"eta\" -> \"1.0\", \"max_depth\" -> \"3\", \"silent\" -> \"1\", \"nthread\" -> \"6\",\n      \"objective\" -> \"binary:logistic\", \"gamma\" -> \"1.0\", \"eval_metric\" -> \"error\").toMap\n    val round = 2\n    val nfold = 5\n    XGBoost.crossValidation(trainMat, params, round, nfold)\n  }\n\n  test(\"test with quantile histo depthwise\") {\n    val trainMat = new DMatrix(\"../../demo/data/agaricus.txt.train?format=libsvm\")\n    val testMat = new DMatrix(\"../../demo/data/agaricus.txt.test?format=libsvm\")\n    val paramMap = List(\"max_depth\" -> \"3\", \"silent\" -> \"0\",\n      \"objective\" -> \"binary:logistic\", \"tree_method\" -> \"hist\",\n      \"grow_policy\" -> \"depthwise\", \"eval_metric\" -> \"auc\").toMap\n    trainBoosterWithQuantileHisto(trainMat, Map(\"training\" -> trainMat, \"test\" -> testMat),\n      round = 10, paramMap, 0.95f)\n  }\n\n  test(\"test with quantile histo lossguide\") {\n    val trainMat = new DMatrix(\"../../demo/data/agaricus.txt.train?format=libsvm\")\n    val testMat = new DMatrix(\"../../demo/data/agaricus.txt.test?format=libsvm\")\n    val paramMap = List(\"max_depth\" -> \"3\", \"silent\" -> \"0\",\n      \"objective\" -> \"binary:logistic\", \"tree_method\" -> \"hist\",\n      \"grow_policy\" -> \"lossguide\", \"max_leaves\" -> \"8\", \"eval_metric\" -> \"auc\").toMap\n    trainBoosterWithQuantileHisto(trainMat, Map(\"training\" -> trainMat, \"test\" -> testMat),\n      round = 10, paramMap, 0.95f)\n  }\n\n  test(\"test with quantile histo lossguide with max bin\") {\n    val trainMat = new DMatrix(\"../../demo/data/agaricus.txt.train?format=libsvm\")\n    val paramMap = List(\"max_depth\" -> \"3\", \"silent\" -> \"0\",\n      \"objective\" -> \"binary:logistic\", \"tree_method\" -> \"hist\",\n      \"grow_policy\" -> \"lossguide\", \"max_leaves\" -> \"8\", \"max_bin\" -> \"16\",\n      \"eval_metric\" -> \"auc\").toMap\n    trainBoosterWithQuantileHisto(trainMat, Map(\"training\" -> trainMat),\n      round = 10, paramMap, 0.95f)\n  }\n\n  test(\"test with quantile histo depthwidth with max depth\") {\n    val trainMat = new DMatrix(\"../../demo/data/agaricus.txt.train?format=libsvm\")\n    val paramMap = List(\"max_depth\" -> \"0\", \"silent\" -> \"0\",\n      \"objective\" -> \"binary:logistic\", \"tree_method\" -> \"hist\",\n      \"grow_policy\" -> \"depthwise\", \"max_leaves\" -> \"8\", \"max_depth\" -> \"2\",\n      \"eval_metric\" -> \"auc\").toMap\n    trainBoosterWithQuantileHisto(trainMat, Map(\"training\" -> trainMat),\n      round = 10, paramMap, 0.95f)\n  }\n\n  test(\"test with quantile histo depthwidth with max depth and max bin\") {\n    val trainMat = new DMatrix(\"../../demo/data/agaricus.txt.train?format=libsvm\")\n    val paramMap = List(\"max_depth\" -> \"0\", \"silent\" -> \"0\",\n      \"objective\" -> \"binary:logistic\", \"tree_method\" -> \"hist\",\n      \"grow_policy\" -> \"depthwise\", \"max_depth\" -> \"2\", \"max_bin\" -> \"2\",\n      \"eval_metric\" -> \"auc\").toMap\n    trainBoosterWithQuantileHisto(trainMat, Map(\"training\" -> trainMat),\n      round = 10, paramMap, 0.95f)\n  }\n\n  test(\"test training from existing model in scala\") {\n    val trainMat = new DMatrix(\"../../demo/data/agaricus.txt.train?format=libsvm\")\n    val paramMap = List(\"max_depth\" -> \"0\", \"silent\" -> \"0\",\n      \"objective\" -> \"binary:logistic\", \"tree_method\" -> \"hist\",\n      \"grow_policy\" -> \"depthwise\", \"max_depth\" -> \"2\", \"max_bin\" -> \"2\",\n      \"eval_metric\" -> \"auc\").toMap\n\n    val prevBooster = XGBoost.train(trainMat, paramMap, round = 2)\n    val nextBooster = XGBoost.train(trainMat, paramMap, round = 4, booster = prevBooster)\n    assert(prevBooster == nextBooster)\n  }\n\n  test(\"test getting number of features from a booster\") {\n    val trainMat = new DMatrix(\"../../demo/data/agaricus.txt.train?format=libsvm\")\n    val testMat = new DMatrix(\"../../demo/data/agaricus.txt.test?format=libsvm\")\n    val booster = trainBooster(trainMat, testMat)\n\n    TestCase.assertEquals(booster.getNumFeature, 127)\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-example/LICENSE",
    "content": "/*\nCopyright (c) 2014 by Contributors \n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n    \n   http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n*/"
  },
  {
    "path": "jvm-packages/xgboost4j-example/README.md",
    "content": "XGBoost4J Code Examples\n=======================\n\n## Java API\n* [Basic walkthrough of wrappers](src/main/java/ml/dmlc/xgboost4j/java/example/BasicWalkThrough.java)\n* [Customize loss function, and evaluation metric](src/main/java/ml/dmlc/xgboost4j/java/example/CustomObjective.java)\n* [Boosting from existing prediction](src/main/java/ml/dmlc/xgboost4j/java/example/BoostFromPrediction.java)\n* [Predicting using first n trees](src/main/java/ml/dmlc/xgboost4j/java/example/PredictFirstNtree.java)\n* [Generalized Linear Model](src/main/java/ml/dmlc/xgboost4j/java/example/GeneralizedLinearModel.java)\n* [Cross validation](src/main/java/ml/dmlc/xgboost4j/java/example/CrossValidation.java)\n* [Predicting leaf indices](src/main/java/ml/dmlc/xgboost4j/java/example/PredictLeafIndices.java)\n* [Early Stopping](src/main/java/ml/dmlc/xgboost4j/java/example/EarlyStopping.java)\n\n## Scala API\n\n* [Basic walkthrough of wrappers](src/main/scala/ml/dmlc/xgboost4j/scala/example/BasicWalkThrough.scala)\n* [Customize loss function, and evaluation metric](src/main/scala/ml/dmlc/xgboost4j/scala/example/CustomObjective.scala)\n* [Boosting from existing prediction](src/main/scala/ml/dmlc/xgboost4j/scala/example/BoostFromPrediction.scala)\n* [Predicting using first n trees](src/main/scala/ml/dmlc/xgboost4j/scala/example/PredictFirstNTree.scala)\n* [Generalized Linear Model](src/main/scala/ml/dmlc/xgboost4j/scala/example/GeneralizedLinearModel.scala)\n* [Cross validation](src/main/scala/ml/dmlc/xgboost4j/scala/example/CrossValidation.scala)\n* [Predicting leaf indices](src/main/scala/ml/dmlc/xgboost4j/scala/example/PredictLeafIndices.scala)\n\n## Spark API\n* [Distributed Training with Spark](src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkMLlibPipeline.scala)\n\n## Flink API\n* [Distributed Training with Flink](src/main/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlink.scala)\n"
  },
  {
    "path": "jvm-packages/xgboost4j-example/pom.xml",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project xmlns=\"http://maven.apache.org/POM/4.0.0\"\n         xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n         xsi:schemaLocation=\"http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd\">\n    <modelVersion>4.0.0</modelVersion>\n    <parent>\n        <groupId>ml.dmlc</groupId>\n        <artifactId>xgboost-jvm_2.12</artifactId>\n        <version>3.3.0-SNAPSHOT</version>\n    </parent>\n    <name>xgboost4j-example</name>\n    <artifactId>xgboost4j-example_2.12</artifactId>\n    <version>3.3.0-SNAPSHOT</version>\n    <packaging>jar</packaging>\n    <build>\n        <plugins>\n            <plugin>\n                <groupId>org.apache.maven.plugins</groupId>\n                <artifactId>maven-assembly-plugin</artifactId>\n                <configuration>\n                    <skipAssembly>false</skipAssembly>\n                </configuration>\n            </plugin>\n        </plugins>\n    </build>\n    <dependencies>\n        <dependency>\n            <groupId>ml.dmlc</groupId>\n            <artifactId>xgboost4j-spark_2.12</artifactId>\n            <version>${project.version}</version>\n        </dependency>\n        <dependency>\n            <groupId>org.apache.spark</groupId>\n            <artifactId>spark-mllib_${scala.binary.version}</artifactId>\n            <version>${spark.version}</version>\n            <scope>provided</scope>\n        </dependency>\n        <dependency>\n            <groupId>ml.dmlc</groupId>\n            <artifactId>xgboost4j-flink_2.12</artifactId>\n            <version>${project.version}</version>\n        </dependency>\n        <dependency>\n            <groupId>junit</groupId>\n            <artifactId>junit</artifactId>\n            <version>${junit.version}</version>\n            <scope>test</scope>\n        </dependency>\n    </dependencies>\n</project>\n"
  },
  {
    "path": "jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/BasicWalkThrough.java",
    "content": "/*\n Copyright (c) 2014-2023 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java.example;\n\nimport java.io.File;\nimport java.io.IOException;\nimport java.io.PrintWriter;\nimport java.util.Arrays;\nimport java.util.HashMap;\n\nimport ml.dmlc.xgboost4j.java.Booster;\nimport ml.dmlc.xgboost4j.java.DMatrix;\nimport ml.dmlc.xgboost4j.java.XGBoost;\nimport ml.dmlc.xgboost4j.java.XGBoostError;\nimport ml.dmlc.xgboost4j.java.example.util.DataLoader;\n\n/**\n * a simple example of java wrapper for xgboost\n *\n * @author hzx\n */\npublic class BasicWalkThrough {\n  public static boolean checkPredicts(float[][] fPredicts, float[][] sPredicts) {\n    if (fPredicts.length != sPredicts.length) {\n      return false;\n    }\n\n    for (int i = 0; i < fPredicts.length; i++) {\n      if (!Arrays.equals(fPredicts[i], sPredicts[i])) {\n        return false;\n      }\n    }\n\n    return true;\n  }\n\n  public static void saveDumpModel(String modelPath, String[] modelInfos) throws IOException {\n    try{\n      PrintWriter writer = new PrintWriter(modelPath, \"UTF-8\");\n      for(int i = 0; i < modelInfos.length; ++ i) {\n        writer.print(\"booster[\" + i + \"]:\\n\");\n        writer.print(modelInfos[i]);\n      }\n      writer.close();\n    } catch (Exception e) {\n      e.printStackTrace();\n    }\n  }\n\n  public static void main(String[] args) throws IOException, XGBoostError {\n    // load file from text file, also binary buffer generated by xgboost4j\n    DMatrix trainMat = new DMatrix(\n        \"../../demo/data/agaricus.txt.train?format=libsvm&indexing_mode=1\");\n    DMatrix testMat = new DMatrix(\n        \"../../demo/data/agaricus.txt.test?format=libsvm&indexing_mode=1\");\n\n    HashMap<String, Object> params = new HashMap<String, Object>();\n    params.put(\"eta\", 1.0);\n    params.put(\"max_depth\", 2);\n    params.put(\"silent\", 1);\n    params.put(\"objective\", \"binary:logistic\");\n\n\n    HashMap<String, DMatrix> watches = new HashMap<String, DMatrix>();\n    watches.put(\"train\", trainMat);\n    watches.put(\"test\", testMat);\n\n    //set round\n    int round = 2;\n\n    //train a boost model\n    Booster booster = XGBoost.train(trainMat, params, round, watches, null, null);\n\n    //predict\n    float[][] predicts = booster.predict(testMat);\n\n    //save model to modelPath\n    File file = new File(\"./model\");\n    if (!file.exists()) {\n      file.mkdirs();\n    }\n\n    String modelPath = \"./model/xgb.model\";\n    booster.saveModel(modelPath);\n\n    //dump model with feature map\n    String[] modelInfos = booster.getModelDump(\"../../demo/data/featmap.txt\", false);\n    saveDumpModel(\"./model/dump.raw.txt\", modelInfos);\n\n    //save dmatrix into binary buffer\n    testMat.saveBinary(\"./model/dtest.buffer\");\n\n    //reload model and data\n    Booster booster2 = XGBoost.loadModel(\"./model/xgb.model\");\n    DMatrix testMat2 = new DMatrix(\"./model/dtest.buffer\");\n    float[][] predicts2 = booster2.predict(testMat2);\n\n\n    //check the two predicts\n    System.out.println(checkPredicts(predicts, predicts2));\n\n    System.out.println(\"start build dmatrix from csr sparse data ...\");\n    //build dmatrix from CSR Sparse Matrix\n    DataLoader.CSRSparseData spData =\n        DataLoader.loadSVMFile(\"../../demo/data/agaricus.txt.train\");\n\n    DMatrix trainMat2 = new DMatrix(spData.rowHeaders, spData.colIndex, spData.data,\n                                    DMatrix.SparseType.CSR, 127);\n    trainMat2.setLabel(spData.labels);\n\n    //specify watchList\n    HashMap<String, DMatrix> watches2 = new HashMap<String, DMatrix>();\n    watches2.put(\"train\", trainMat2);\n    watches2.put(\"test\", testMat2);\n    Booster booster3 = XGBoost.train(trainMat2, params, round, watches2, null, null);\n    float[][] predicts3 = booster3.predict(testMat2);\n\n    //check predicts\n    System.out.println(checkPredicts(predicts, predicts3));\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/BoostFromPrediction.java",
    "content": "/*\n Copyright (c) 2014 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java.example;\n\nimport java.util.HashMap;\n\nimport ml.dmlc.xgboost4j.java.Booster;\nimport ml.dmlc.xgboost4j.java.DMatrix;\nimport ml.dmlc.xgboost4j.java.XGBoost;\nimport ml.dmlc.xgboost4j.java.XGBoostError;\n\n/**\n * example for start from a initial base prediction\n *\n * @author hzx\n */\npublic class BoostFromPrediction {\n  public static void main(String[] args) throws XGBoostError {\n    System.out.println(\"start running example to start from a initial prediction\");\n\n    // load file from text file, also binary buffer generated by xgboost4j\n    DMatrix trainMat = new DMatrix(\"../../demo/data/agaricus.txt.train?format=libsvm\");\n    DMatrix testMat = new DMatrix(\"../../demo/data/agaricus.txt.test?format=libsvm\");\n\n    //specify parameters\n    HashMap<String, Object> params = new HashMap<String, Object>();\n    params.put(\"eta\", 1.0);\n    params.put(\"max_depth\", 2);\n    params.put(\"silent\", 1);\n    params.put(\"objective\", \"binary:logistic\");\n\n    //specify watchList\n    HashMap<String, DMatrix> watches = new HashMap<String, DMatrix>();\n    watches.put(\"train\", trainMat);\n    watches.put(\"test\", testMat);\n\n    //train xgboost for 1 round\n    Booster booster = XGBoost.train(trainMat, params, 1, watches, null, null);\n\n    float[][] trainPred = booster.predict(trainMat, true);\n    float[][] testPred = booster.predict(testMat, true);\n\n    trainMat.setBaseMargin(trainPred);\n    testMat.setBaseMargin(testPred);\n\n    System.out.println(\"result of running from initial prediction\");\n    Booster booster2 = XGBoost.train(trainMat, params, 1, watches, null, null);\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/CrossValidation.java",
    "content": "/*\n Copyright (c) 2014 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java.example;\n\nimport java.io.IOException;\nimport java.util.HashMap;\n\nimport ml.dmlc.xgboost4j.java.DMatrix;\nimport ml.dmlc.xgboost4j.java.XGBoost;\nimport ml.dmlc.xgboost4j.java.XGBoostError;\n\n/**\n * an example of cross validation\n *\n * @author hzx\n */\npublic class CrossValidation {\n  public static void main(String[] args) throws IOException, XGBoostError {\n    //load train mat\n    DMatrix trainMat = new DMatrix(\"../../demo/data/agaricus.txt.train?format=libsvm\");\n\n    //set params\n    HashMap<String, Object> params = new HashMap<String, Object>();\n\n    params.put(\"eta\", 1.0);\n    params.put(\"max_depth\", 3);\n    params.put(\"silent\", 1);\n    params.put(\"nthread\", 6);\n    params.put(\"objective\", \"binary:logistic\");\n    params.put(\"gamma\", 1.0);\n    params.put(\"eval_metric\", \"error\");\n\n    //do 5-fold cross validation\n    int round = 2;\n    int nfold = 5;\n    //set additional eval_metrics\n    String[] metrics = null;\n\n    String[] evalHist = XGBoost.crossValidation(trainMat, params, round, nfold, metrics, null,\n            null);\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/CustomObjective.java",
    "content": "/*\n Copyright (c) 2014 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java.example;\n\nimport java.util.ArrayList;\nimport java.util.HashMap;\nimport java.util.List;\n\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\n\nimport ml.dmlc.xgboost4j.java.*;\n\n/**\n * an example user define objective and eval\n * NOTE: when you do customized loss function, the default prediction value is margin\n * this may make buildin evalution metric not function properly\n * for example, we are doing logistic loss, the prediction is score before logistic transformation\n * he buildin evaluation error assumes input is after logistic transformation\n * Take this in mind when you use the customization, and maybe you need write customized evaluation\n * function\n *\n * @author hzx\n */\npublic class CustomObjective {\n  /**\n   * loglikelihoode loss obj function\n   */\n  public static class LogRegObj implements IObjective {\n    private static final Log logger = LogFactory.getLog(LogRegObj.class);\n\n    /**\n     * simple sigmoid func\n     *\n     * @param input Sigmod(x)\n     * @return Note: this func is not concern about numerical stability, only used as example\n     */\n    public float sigmoid(float input) {\n      float val = (float) (1 / (1 + Math.exp(-input)));\n      return val;\n    }\n\n    public float[][] transform(float[][] predicts) {\n      int nrow = predicts.length;\n      float[][] transPredicts = new float[nrow][1];\n\n      for (int i = 0; i < nrow; i++) {\n        transPredicts[i][0] = sigmoid(predicts[i][0]);\n      }\n\n      return transPredicts;\n    }\n\n    @Override\n    public List<float[]> getGradient(float[][] predicts, DMatrix dtrain) {\n      int nrow = predicts.length;\n      List<float[]> gradients = new ArrayList<float[]>();\n      float[] labels;\n      try {\n        labels = dtrain.getLabel();\n      } catch (XGBoostError ex) {\n        logger.error(ex);\n        return null;\n      }\n      float[] grad = new float[nrow];\n      float[] hess = new float[nrow];\n\n      float[][] transPredicts = transform(predicts);\n\n      for (int i = 0; i < nrow; i++) {\n        float predict = transPredicts[i][0];\n        grad[i] = predict - labels[i];\n        hess[i] = predict * (1 - predict);\n      }\n\n      gradients.add(grad);\n      gradients.add(hess);\n      return gradients;\n    }\n  }\n\n  /**\n   * user defined eval function.\n   * NOTE: when you do customized loss function, the default prediction value is margin\n   * this may make buildin evalution metric not function properly\n   * for example, we are doing logistic loss, the prediction is score before logistic transformation\n   * the buildin evaluation error assumes input is after logistic transformation\n   * Take this in mind when you use the customization, and maybe you need write customized\n   * evaluation function\n   */\n  public static class EvalError implements IEvaluation {\n    private static final Log logger = LogFactory.getLog(EvalError.class);\n\n    String evalMetric = \"custom_error\";\n\n    public EvalError() {\n    }\n\n    @Override\n    public String getMetric() {\n      return evalMetric;\n    }\n\n    @Override\n    public float eval(float[][] predicts, DMatrix dmat) {\n      float error = 0f;\n      float[] labels;\n      try {\n        labels = dmat.getLabel();\n      } catch (XGBoostError ex) {\n        logger.error(ex);\n        return -1f;\n      }\n      int nrow = predicts.length;\n      for (int i = 0; i < nrow; i++) {\n        if (labels[i] == 0f && predicts[i][0] > 0) {\n          error++;\n        } else if (labels[i] == 1f && predicts[i][0] <= 0) {\n          error++;\n        }\n      }\n\n      return error / labels.length;\n    }\n  }\n\n  public static void main(String[] args) throws XGBoostError {\n    //load train mat (svmlight format)\n    DMatrix trainMat = new DMatrix(\"../../demo/data/agaricus.txt.train?format=libsvm\");\n    //load valid mat (svmlight format)\n    DMatrix testMat = new DMatrix(\"../../demo/data/agaricus.txt.test?format=libsvm\");\n\n    HashMap<String, Object> params = new HashMap<String, Object>();\n    params.put(\"eta\", 1.0);\n    params.put(\"max_depth\", 2);\n    params.put(\"silent\", 1);\n\n\n    //set round\n    int round = 2;\n\n    //specify watchList\n    HashMap<String, DMatrix> watches = new HashMap<String, DMatrix>();\n    watches.put(\"train\", trainMat);\n    watches.put(\"test\", testMat);\n\n    //user define obj and eval\n    IObjective obj = new LogRegObj();\n    IEvaluation eval = new EvalError();\n\n    //train a booster\n    System.out.println(\"begin to train the booster model\");\n    Booster booster = XGBoost.train(trainMat, params, round, watches, obj, eval);\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/EarlyStopping.java",
    "content": "/*\n Copyright (c) 2021 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java.example;\n\nimport java.io.IOException;\nimport java.util.HashMap;\nimport java.util.LinkedHashMap;\nimport java.util.Map;\n\nimport ml.dmlc.xgboost4j.java.Booster;\nimport ml.dmlc.xgboost4j.java.DMatrix;\nimport ml.dmlc.xgboost4j.java.XGBoost;\nimport ml.dmlc.xgboost4j.java.XGBoostError;\nimport ml.dmlc.xgboost4j.java.example.util.DataLoader;\n\npublic class EarlyStopping {\n  public static void main(String[] args) throws IOException, XGBoostError {\n    DataLoader.CSRSparseData trainCSR =\n        DataLoader.loadSVMFile(\"../../demo/data/agaricus.txt.train\");\n    DataLoader.CSRSparseData testCSR =\n        DataLoader.loadSVMFile(\"../../demo/data/agaricus.txt.test\");\n\n    Map<String, Object> paramMap = new HashMap<String, Object>() {\n      {\n        put(\"max_depth\", 3);\n        put(\"objective\", \"binary:logistic\");\n        put(\"maximize_evaluation_metrics\", \"false\");\n      }\n    };\n\n    DMatrix trainXy = new DMatrix(trainCSR.rowHeaders, trainCSR.colIndex, trainCSR.data,\n                                  DMatrix.SparseType.CSR, 127);\n    trainXy.setLabel(trainCSR.labels);\n    DMatrix testXy = new DMatrix(testCSR.rowHeaders, testCSR.colIndex, testCSR.data,\n                                 DMatrix.SparseType.CSR, 127);\n    testXy.setLabel(testCSR.labels);\n\n    int nRounds = 128;\n    int nEarlyStoppingRounds = 4;\n\n    Map<String, DMatrix> watches = new LinkedHashMap<>();\n    watches.put(\"training\", trainXy);\n    watches.put(\"test\", testXy);\n\n    float[][] metrics = new float[watches.size()][nRounds];\n    Booster booster = XGBoost.train(trainXy, paramMap, nRounds,\n                                    watches, metrics, null, null, nEarlyStoppingRounds);\n\n    int bestIter = Integer.valueOf(booster.getAttr(\"best_iteration\"));\n    float bestScore = Float.valueOf(booster.getAttr(\"best_score\"));\n\n    System.out.printf(\"Best iter: %d, Best score: %f\\n\", bestIter, bestScore);\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/GeneralizedLinearModel.java",
    "content": "/*\n Copyright (c) 2014 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java.example;\n\nimport java.util.HashMap;\n\nimport ml.dmlc.xgboost4j.java.Booster;\nimport ml.dmlc.xgboost4j.java.DMatrix;\nimport ml.dmlc.xgboost4j.java.XGBoost;\nimport ml.dmlc.xgboost4j.java.XGBoostError;\nimport ml.dmlc.xgboost4j.java.example.util.CustomEval;\n\n/**\n * this is an example of fit generalized linear model in xgboost\n * basically, we are using linear model, instead of tree for our boosters\n *\n * @author hzx\n */\npublic class GeneralizedLinearModel {\n  public static void main(String[] args) throws XGBoostError {\n    // load file from text file, also binary buffer generated by xgboost4j\n    DMatrix trainMat = new DMatrix(\"../../demo/data/agaricus.txt.train?format=libsvm\");\n    DMatrix testMat = new DMatrix(\"../../demo/data/agaricus.txt.test?format=libsvm\");\n\n    //specify parameters\n    //change booster to gblinear, so that we are fitting a linear model\n    // alpha is the L1 regularizer\n    //lambda is the L2 regularizer\n    //you can also set lambda_bias which is L2 regularizer on the bias term\n    HashMap<String, Object> params = new HashMap<String, Object>();\n    params.put(\"alpha\", 0.0001);\n    params.put(\"silent\", 1);\n    params.put(\"objective\", \"binary:logistic\");\n    params.put(\"booster\", \"gblinear\");\n\n    //normally, you do not need to set eta (step_size)\n    //XGBoost uses a parallel coordinate descent algorithm (shotgun),\n    //there could be affection on convergence with parallelization on certain cases\n    //setting eta to be smaller value, e.g 0.5 can make the optimization more stable\n    //param.put(\"eta\", \"0.5\");\n\n\n    //specify watchList\n    HashMap<String, DMatrix> watches = new HashMap<String, DMatrix>();\n    watches.put(\"train\", trainMat);\n    watches.put(\"test\", testMat);\n\n    //train a booster\n    int round = 4;\n    Booster booster = XGBoost.train(trainMat, params, round, watches, null, null);\n\n    float[][] predicts = booster.predict(testMat);\n\n    CustomEval eval = new CustomEval();\n    System.out.println(\"error=\" + eval.eval(predicts, testMat));\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/PredictFirstNtree.java",
    "content": "/*\n Copyright (c) 2014 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java.example;\n\nimport java.util.HashMap;\n\nimport ml.dmlc.xgboost4j.java.Booster;\nimport ml.dmlc.xgboost4j.java.DMatrix;\nimport ml.dmlc.xgboost4j.java.XGBoost;\nimport ml.dmlc.xgboost4j.java.XGBoostError;\nimport ml.dmlc.xgboost4j.java.example.util.CustomEval;\n\n/**\n * predict first ntree\n *\n * @author hzx\n */\npublic class PredictFirstNtree {\n  public static void main(String[] args) throws XGBoostError {\n    // load file from text file, also binary buffer generated by xgboost4j\n    DMatrix trainMat = new DMatrix(\"../../demo/data/agaricus.txt.train?format=libsvm\");\n    DMatrix testMat = new DMatrix(\"../../demo/data/agaricus.txt.test?format=libsvm\");\n\n    //specify parameters\n    HashMap<String, Object> params = new HashMap<String, Object>();\n\n    params.put(\"eta\", 1.0);\n    params.put(\"max_depth\", 2);\n    params.put(\"silent\", 1);\n    params.put(\"objective\", \"binary:logistic\");\n\n\n    //specify watchList\n    HashMap<String, DMatrix> watches = new HashMap<String, DMatrix>();\n    watches.put(\"train\", trainMat);\n    watches.put(\"test\", testMat);\n\n\n    //train a booster\n    int round = 3;\n    Booster booster = XGBoost.train(trainMat, params, round, watches, null, null);\n\n    //predict use 1 tree\n    float[][] predicts1 = booster.predict(testMat, false, 1);\n    //by default all trees are used to do predict\n    float[][] predicts2 = booster.predict(testMat);\n\n    //use a simple evaluation class to check error result\n    CustomEval eval = new CustomEval();\n    System.out.println(\"error of predicts1: \" + eval.eval(predicts1, testMat));\n    System.out.println(\"error of predicts2: \" + eval.eval(predicts2, testMat));\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/PredictLeafIndices.java",
    "content": "/*\n Copyright (c) 2014-2026 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java.example;\n\nimport java.util.HashMap;\n\nimport ml.dmlc.xgboost4j.java.Booster;\nimport ml.dmlc.xgboost4j.java.DMatrix;\nimport ml.dmlc.xgboost4j.java.XGBoost;\nimport ml.dmlc.xgboost4j.java.XGBoostError;\n\n/**\n * predict leaf indices\n *\n * @author hzx\n */\npublic class PredictLeafIndices {\n  public static void main(String[] args) throws XGBoostError {\n    // load file from text file, also binary buffer generated by xgboost4j\n    DMatrix trainMat = new DMatrix(\"../../demo/data/agaricus.txt.train?format=libsvm\");\n    DMatrix testMat = new DMatrix(\"../../demo/data/agaricus.txt.test?format=libsvm\");\n\n    //specify parameters\n    HashMap<String, Object> params = new HashMap<String, Object>();\n    params.put(\"eta\", 1.0);\n    params.put(\"max_depth\", 2);\n    params.put(\"silent\", 1);\n    params.put(\"objective\", \"binary:logistic\");\n\n    //specify watchList\n    HashMap<String, DMatrix> watches = new HashMap<String, DMatrix>();\n    watches.put(\"train\", trainMat);\n    watches.put(\"test\", testMat);\n\n\n    //train a booster\n    int round = 3;\n    Booster booster = XGBoost.train(trainMat, params, round, watches, null, null);\n\n    //predict using first 2 tree\n    float[][] leafindex = booster.predictLeaf(testMat, 2);\n    if (leafindex.length > 0 && leafindex[0].length > 1) {\n      System.out.println(leafindex[0][0] + \", \" + leafindex[0][1]);\n    }\n\n    //predict all trees\n    leafindex = booster.predictLeaf(testMat, 0);\n    if (leafindex.length > 0 && leafindex[0].length > 1) {\n      System.out.println(leafindex[0][0] + \", \" + leafindex[0][1]);\n    }\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/flink/DistTrainWithFlinkExample.java",
    "content": "/*\n Copyright (c) 2014-2021 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java.example.flink;\n\nimport java.nio.file.Path;\nimport java.util.Arrays;\nimport java.util.HashMap;\nimport java.util.List;\n\nimport org.apache.flink.api.common.typeinfo.TypeHint;\nimport org.apache.flink.api.common.typeinfo.TypeInformation;\nimport org.apache.flink.api.java.DataSet;\nimport org.apache.flink.api.java.ExecutionEnvironment;\nimport org.apache.flink.api.java.operators.MapOperator;\nimport org.apache.flink.api.java.tuple.Tuple13;\nimport org.apache.flink.api.java.tuple.Tuple2;\nimport org.apache.flink.api.java.utils.DataSetUtils;\nimport org.apache.flink.ml.linalg.DenseVector;\nimport org.apache.flink.ml.linalg.Vector;\nimport org.apache.flink.ml.linalg.Vectors;\n\nimport ml.dmlc.xgboost4j.java.flink.XGBoost;\nimport ml.dmlc.xgboost4j.java.flink.XGBoostModel;\n\n\npublic class DistTrainWithFlinkExample {\n\n  static Tuple2<XGBoostModel, DataSet<Float[]>> runPrediction(\n      ExecutionEnvironment env,\n      java.nio.file.Path trainPath,\n      int percentage) throws Exception {\n    // reading data\n    final DataSet<Tuple2<Long, Tuple2<Vector, Double>>> data =\n        DataSetUtils.zipWithIndex(parseCsv(env, trainPath));\n    final long size = data.count();\n    final long trainCount = Math.round(size * 0.01 * percentage);\n    final DataSet<Tuple2<Vector, Double>> trainData =\n        data\n          .filter(item -> item.f0 < trainCount)\n          .map(t -> t.f1)\n          .returns(TypeInformation.of(new TypeHint<Tuple2<Vector, Double>>(){}));\n    final DataSet<Vector> testData =\n        data\n          .filter(tuple -> tuple.f0 >= trainCount)\n          .map(t -> t.f1.f0)\n          .returns(TypeInformation.of(new TypeHint<Vector>(){}));\n\n    // define parameters\n    HashMap<String, Object> paramMap = new HashMap<String, Object>(3);\n    paramMap.put(\"eta\", 0.1);\n    paramMap.put(\"max_depth\", 2);\n    paramMap.put(\"objective\", \"binary:logistic\");\n\n    // number of iterations\n    final int round = 2;\n    // train the model\n    XGBoostModel model = XGBoost.train(trainData, paramMap, round);\n    DataSet<Float[]> predTest = model.predict(testData);\n    return new Tuple2<XGBoostModel, DataSet<Float[]>>(model, predTest);\n  }\n\n  private static MapOperator<Tuple13<Double, String, Double, Double, Double, Integer, Integer,\n      Integer, Integer, Integer, Integer, Integer, Integer>,\n      Tuple2<Vector, Double>> parseCsv(ExecutionEnvironment env, Path trainPath) {\n    return env.readCsvFile(trainPath.toString())\n      .ignoreFirstLine()\n      .types(Double.class, String.class, Double.class, Double.class, Double.class,\n        Integer.class, Integer.class, Integer.class, Integer.class, Integer.class,\n        Integer.class, Integer.class, Integer.class)\n      .map(DistTrainWithFlinkExample::mapFunction);\n  }\n\n  private static Tuple2<Vector, Double> mapFunction(Tuple13<Double, String, Double, Double, Double,\n      Integer, Integer, Integer, Integer, Integer, Integer, Integer, Integer> tuple) {\n    final DenseVector dense = Vectors.dense(tuple.f2, tuple.f3, tuple.f4, tuple.f5, tuple.f6,\n        tuple.f7, tuple.f8, tuple.f9, tuple.f10, tuple.f11, tuple.f12);\n    if (tuple.f1.contains(\"inf\")) {\n      return new Tuple2<Vector, Double>(dense, 1.0);\n    } else {\n      return new Tuple2<Vector, Double>(dense, 0.0);\n    }\n  }\n\n  public static void main(String[] args) throws Exception {\n    final java.nio.file.Path parentPath = java.nio.file.Paths.get(Arrays.stream(args)\n        .findFirst().orElse(\".\"));\n    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();\n    Tuple2<XGBoostModel, DataSet<Float[]>> tuple2 = runPrediction(\n        env, parentPath.resolve(\"veterans_lung_cancer.csv\"), 70\n    );\n    List<Float[]> list = tuple2.f1.collect();\n    System.out.println(list.size());\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/util/CustomEval.java",
    "content": "/*\n Copyright (c) 2014 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java.example.util;\n\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\n\nimport ml.dmlc.xgboost4j.java.DMatrix;\nimport ml.dmlc.xgboost4j.java.IEvaluation;\nimport ml.dmlc.xgboost4j.java.XGBoostError;\n\n/**\n * a util evaluation class for examples\n *\n * @author hzx\n */\npublic class CustomEval implements IEvaluation {\n  private static final Log logger = LogFactory.getLog(CustomEval.class);\n\n  String evalMetric = \"custom_error\";\n\n  @Override\n  public String getMetric() {\n    return evalMetric;\n  }\n\n  @Override\n  public float eval(float[][] predicts, DMatrix dmat) {\n    float error = 0f;\n    float[] labels;\n    try {\n      labels = dmat.getLabel();\n    } catch (XGBoostError ex) {\n      logger.error(ex);\n      return -1f;\n    }\n    int nrow = predicts.length;\n    for (int i = 0; i < nrow; i++) {\n      if (labels[i] == 0f && predicts[i][0] > 0.5) {\n        error++;\n      } else if (labels[i] == 1f && predicts[i][0] <= 0.5) {\n        error++;\n      }\n    }\n\n    return error / labels.length;\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/util/DataLoader.java",
    "content": "/*\n Copyright (c) 2014 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java.example.util;\n\nimport java.io.*;\nimport java.util.ArrayList;\nimport java.util.List;\n\nimport org.apache.commons.lang3.ArrayUtils;\n\n/**\n * util class for loading data\n *\n * @author hzx\n */\npublic class DataLoader {\n  public static class DenseData {\n    public float[] labels;\n    public float[] data;\n    public int nrow;\n    public int ncol;\n  }\n\n  public static class CSRSparseData {\n    public float[] labels;\n    public float[] data;\n    public long[] rowHeaders;\n    public int[] colIndex;\n  }\n\n  public static DenseData loadCSVFile(String filePath) throws IOException {\n    DenseData denseData = new DenseData();\n\n    File f = new File(filePath);\n    FileInputStream in = new FileInputStream(f);\n    BufferedReader reader = new BufferedReader(new InputStreamReader(in, \"UTF-8\"));\n\n    denseData.nrow = 0;\n    denseData.ncol = -1;\n    String line;\n    List<Float> tlabels = new ArrayList<>();\n    List<Float> tdata = new ArrayList<>();\n\n    while ((line = reader.readLine()) != null) {\n      String[] items = line.trim().split(\",\");\n      if (items.length == 0) {\n        continue;\n      }\n      denseData.nrow++;\n      if (denseData.ncol == -1) {\n        denseData.ncol = items.length - 1;\n      }\n\n      tlabels.add(Float.valueOf(items[items.length - 1]));\n      for (int i = 0; i < items.length - 1; i++) {\n        tdata.add(Float.valueOf(items[i]));\n      }\n    }\n\n    reader.close();\n    in.close();\n\n    denseData.labels = ArrayUtils.toPrimitive(tlabels.toArray(new Float[tlabels.size()]));\n    denseData.data = ArrayUtils.toPrimitive(tdata.toArray(new Float[tdata.size()]));\n\n    return denseData;\n  }\n\n  public static CSRSparseData loadSVMFile(String filePath) throws IOException {\n    CSRSparseData spData = new CSRSparseData();\n\n    List<Float> tlabels = new ArrayList<>();\n    List<Float> tdata = new ArrayList<>();\n    List<Long> theaders = new ArrayList<>();\n    List<Integer> tindex = new ArrayList<>();\n\n    File f = new File(filePath);\n    FileInputStream in = new FileInputStream(f);\n    BufferedReader reader = new BufferedReader(new InputStreamReader(in, \"UTF-8\"));\n\n    String line;\n    long rowheader = 0;\n    theaders.add(rowheader);\n    while ((line = reader.readLine()) != null) {\n      String[] items = line.trim().split(\" \");\n      if (items.length == 0) {\n        continue;\n      }\n\n      rowheader += items.length - 1;\n      theaders.add(rowheader);\n      tlabels.add(Float.valueOf(items[0]));\n\n      for (int i = 1; i < items.length; i++) {\n        String[] tup = items[i].split(\":\");\n        assert tup.length == 2;\n\n        tdata.add(Float.valueOf(tup[1]));\n        tindex.add(Integer.valueOf(tup[0]));\n      }\n    }\n\n    spData.labels = ArrayUtils.toPrimitive(tlabels.toArray(new Float[tlabels.size()]));\n    spData.data = ArrayUtils.toPrimitive(tdata.toArray(new Float[tdata.size()]));\n    spData.colIndex = ArrayUtils.toPrimitive(tindex.toArray(new Integer[tindex.size()]));\n    spData.rowHeaders = ArrayUtils.toPrimitive(theaders.toArray(new Long[theaders.size()]));\n\n    return spData;\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/BasicWalkThrough.scala",
    "content": "/*\n Copyright (c) 2014-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.example\n\nimport java.io.File\nimport java.io.PrintWriter\n\nimport scala.collection.mutable\n\nimport ml.dmlc.xgboost4j.java.{DMatrix => JDMatrix}\nimport ml.dmlc.xgboost4j.java.example.util.DataLoader\nimport ml.dmlc.xgboost4j.scala.{DMatrix, XGBoost}\n\nobject BasicWalkThrough {\n  def saveDumpModel(modelPath: String, modelInfos: Array[String]): Unit = {\n    val writer = new PrintWriter(modelPath, \"UTF-8\")\n    for (i <- 0 until modelInfos.length) {\n      writer.print(s\"booster[$i]:\\n\")\n      writer.print(modelInfos(i))\n    }\n    writer.close()\n  }\n\n  def main(args: Array[String]): Unit = {\n    val trainMax = new DMatrix(\"../../demo/data/agaricus.txt.train?format=libsvm&indexing_mode=1\")\n    val testMax = new DMatrix(\"../../demo/data/agaricus.txt.test?format=libsvm&indexing_mode=1\")\n\n    val params = new mutable.HashMap[String, Any]()\n    params += \"eta\" -> 1.0\n    params += \"max_depth\" -> 2\n    params += \"silent\" -> 1\n    params += \"objective\" -> \"binary:logistic\"\n\n    val watches = new mutable.HashMap[String, DMatrix]\n    watches += \"train\" -> trainMax\n    watches += \"test\" -> testMax\n\n    val round = 2\n    // train a model\n    val booster = XGBoost.train(trainMax, params.toMap, round, watches.toMap)\n    // predict\n    val predicts = booster.predict(testMax)\n    // save model to model path\n    val file = new File(\"./model\")\n    if (!file.exists()) {\n      file.mkdirs()\n    }\n    booster.saveModel(file.getAbsolutePath + \"/xgb.model\")\n    // dump model with feature map\n    val modelInfos = booster.getModelDump(\"../../demo/data/featmap.txt\", false)\n    saveDumpModel(file.getAbsolutePath + \"/dump.raw.txt\", modelInfos)\n    // save dmatrix into binary buffer\n    testMax.saveBinary(file.getAbsolutePath + \"/dtest.buffer\")\n\n    // reload model and data\n    val booster2 = XGBoost.loadModel(file.getAbsolutePath + \"/xgb.model\")\n    val testMax2 = new DMatrix(file.getAbsolutePath + \"/dtest.buffer\")\n    val predicts2 = booster2.predict(testMax2)\n\n    // check predicts\n    println(checkPredicts(predicts, predicts2))\n\n    // build dmatrix from CSR Sparse Matrix\n    println(\"start build dmatrix from csr sparse data ...\")\n    val spData = DataLoader.loadSVMFile(\"../../demo/data/agaricus.txt.train\")\n    val trainMax2 = new DMatrix(spData.rowHeaders, spData.colIndex, spData.data,\n      JDMatrix.SparseType.CSR, 127)\n    trainMax2.setLabel(spData.labels)\n\n    // specify watchList\n    val watches2 = new mutable.HashMap[String, DMatrix]\n    watches2 += \"train\" -> trainMax2\n    watches2 += \"test\" -> testMax2\n    val booster3 = XGBoost.train(trainMax2, params.toMap, round, watches2.toMap)\n    val predicts3 = booster3.predict(testMax2)\n    println(checkPredicts(predicts, predicts3))\n  }\n\n  def checkPredicts(fPredicts: Array[Array[Float]], sPredicts: Array[Array[Float]]): Boolean = {\n    require(fPredicts.length == sPredicts.length, \"the comparing predicts must be with the same \" +\n      \"length\")\n    for (i <- fPredicts.indices) {\n      if (!java.util.Arrays.equals(fPredicts(i), sPredicts(i))) {\n        return false\n      }\n    }\n    true\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/BoostFromPrediction.scala",
    "content": "/*\n Copyright (c) 2014-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.example\n\nimport scala.collection.mutable\n\nimport ml.dmlc.xgboost4j.scala.{DMatrix, XGBoost}\n\n\nobject BoostFromPrediction {\n  def main(args: Array[String]): Unit = {\n    println(\"start running example to start from a initial prediction\")\n\n    val trainMat = new DMatrix(\"../../demo/data/agaricus.txt.train?format=libsvm\")\n    val testMat = new DMatrix(\"../../demo/data/agaricus.txt.test?format=libsvm\")\n\n    val params = new mutable.HashMap[String, Any]()\n    params += \"eta\" -> 1.0\n    params += \"max_depth\" -> 2\n    params += \"silent\" -> 1\n    params += \"objective\" -> \"binary:logistic\"\n\n    val watches = new mutable.HashMap[String, DMatrix]\n    watches += \"train\" -> trainMat\n    watches += \"test\" -> testMat\n\n    val round = 2\n    // train a model\n    val booster = XGBoost.train(trainMat, params.toMap, round, watches.toMap)\n\n    val trainPred = booster.predict(trainMat, true)\n    val testPred = booster.predict(testMat, true)\n\n    trainMat.setBaseMargin(trainPred)\n    testMat.setBaseMargin(testPred)\n\n    System.out.println(\"result of running from initial prediction\")\n    XGBoost.train(trainMat, params.toMap, 1, watches.toMap, null, null)\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/CrossValidation.scala",
    "content": "/*\n Copyright (c) 2014 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.scala.example\n\nimport scala.collection.mutable\n\nimport ml.dmlc.xgboost4j.scala.{DMatrix, XGBoost}\n\nobject CrossValidation {\n  def main(args: Array[String]): Unit = {\n    val trainMat: DMatrix = new DMatrix(\"../../demo/data/agaricus.txt.train?format=libsvm\")\n\n    // set params\n    val params = new mutable.HashMap[String, Any]\n\n    params.put(\"eta\", 1.0)\n    params.put(\"max_depth\", 3)\n    params.put(\"silent\", 1)\n    params.put(\"nthread\", 6)\n    params.put(\"objective\", \"binary:logistic\")\n    params.put(\"gamma\", 1.0)\n    params.put(\"eval_metric\", \"error\")\n\n    // do 5-fold cross validation\n    val round: Int = 2\n    val nfold: Int = 5\n    // set additional eval_metrics\n    val metrics: Array[String] = null\n\n    XGBoost.crossValidation(trainMat, params.toMap, round, nfold, metrics)\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/CustomObjective.scala",
    "content": "/*\n Copyright (c) 2014-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.scala.example\n\nimport scala.collection.mutable\nimport scala.collection.mutable.ListBuffer\n\nimport org.apache.commons.logging.{Log, LogFactory}\n\nimport ml.dmlc.xgboost4j.java.XGBoostError\nimport ml.dmlc.xgboost4j.scala.{DMatrix, EvalTrait, ObjectiveTrait, XGBoost}\n\n/**\n * an example user define objective and eval\n * NOTE: when you do customized loss function, the default prediction value is margin\n * this may make buildin evalution metric not function properly\n * for example, we are doing logistic loss, the prediction is score before logistic transformation\n * he buildin evaluation error assumes input is after logistic transformation\n * Take this in mind when you use the customization, and maybe you need write customized evaluation\n * function\n *\n */\nobject CustomObjective {\n\n  /**\n   * loglikelihoode loss obj function\n   */\n  class LogRegObj extends ObjectiveTrait {\n    private val logger: Log = LogFactory.getLog(classOf[LogRegObj])\n    /**\n     * user define objective function, return gradient and second order gradient\n     *\n     * @param predicts untransformed margin predicts\n     * @param dtrain   training data\n     * @return List with two float array, correspond to first order grad and second order grad\n     */\n    override def getGradient(predicts: Array[Array[Float]], dtrain: DMatrix)\n        : List[Array[Float]] = {\n      val nrow = predicts.length\n      val gradients = new ListBuffer[Array[Float]]\n      var labels: Array[Float] = null\n      try {\n        labels = dtrain.getLabel\n      } catch {\n        case e: XGBoostError =>\n          logger.error(e)\n          null\n        case _: Throwable =>\n          null\n      }\n      val grad = new Array[Float](nrow)\n      val hess = new Array[Float](nrow)\n      val transPredicts = transform(predicts)\n\n      for (i <- 0 until nrow) {\n        val predict = transPredicts(i)(0)\n        grad(i) = predict - labels(i)\n        hess(i) = predict * (1 - predict)\n      }\n      gradients += grad\n      gradients += hess\n      gradients.toList\n    }\n\n    /**\n     * simple sigmoid func\n     *\n     * @param input\n     * @return Note: this func is not concern about numerical stability, only used as example\n     */\n    def sigmoid(input: Float): Float = {\n      (1 / (1 + Math.exp(-input))).toFloat\n    }\n\n    def transform(predicts: Array[Array[Float]]): Array[Array[Float]] = {\n      val nrow = predicts.length\n      val transPredicts = Array.fill[Float](nrow, 1)(0)\n      for (i <- 0 until nrow) {\n        transPredicts(i)(0) = sigmoid(predicts(i)(0))\n      }\n      transPredicts\n    }\n\n  }\n\n  class EvalError extends EvalTrait {\n\n    val logger = LogFactory.getLog(classOf[EvalError])\n\n    private[xgboost4j] var evalMetric: String = \"custom_error\"\n\n    /**\n     * get evaluate metric\n     *\n     * @return evalMetric\n     */\n    override def getMetric: String = evalMetric\n\n    /**\n     * evaluate with predicts and data\n     *\n     * @param predicts predictions as array\n     * @param dmat     data matrix to evaluate\n     * @return result of the metric\n     */\n    override def eval(predicts: Array[Array[Float]], dmat: DMatrix): Float = {\n      var error: Float = 0f\n      var labels: Array[Float] = null\n      try {\n        labels = dmat.getLabel\n      } catch {\n        case ex: XGBoostError =>\n          logger.error(ex)\n          return -1f\n      }\n      val nrow: Int = predicts.length\n      for (i <- 0 until nrow) {\n        if (labels(i) == 0.0 && predicts(i)(0) > 0) {\n          error += 1\n        } else if (labels(i) == 1.0 && predicts(i)(0) <= 0) {\n          error += 1\n        }\n      }\n      error / labels.length\n    }\n  }\n\n  def main(args: Array[String]): Unit = {\n    val trainMat = new DMatrix(\"../../demo/data/agaricus.txt.train?format=libsvm\")\n    val testMat = new DMatrix(\"../../demo/data/agaricus.txt.test?format=libsvm\")\n    val params = new mutable.HashMap[String, Any]()\n    params += \"eta\" -> 1.0\n    params += \"max_depth\" -> 2\n    params += \"silent\" -> 1\n    val watches = new mutable.HashMap[String, DMatrix]\n    watches += \"train\" -> trainMat\n    watches += \"test\" -> testMat\n\n    val round = 2\n    // train a model\n    XGBoost.train(trainMat, params.toMap, round, watches.toMap)\n    XGBoost.train(trainMat, params.toMap, round, watches.toMap,\n      obj = new LogRegObj, eval = new EvalError)\n  }\n\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/GeneralizedLinearModel.scala",
    "content": "/*\n Copyright (c) 2014-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.scala.example\n\nimport scala.collection.mutable\n\nimport ml.dmlc.xgboost4j.scala.{DMatrix, XGBoost}\nimport ml.dmlc.xgboost4j.scala.example.util.CustomEval\n\n\n/**\n * this is an example of fit generalized linear model in xgboost\n * basically, we are using linear model, instead of tree for our boosters\n */\nobject GeneralizedLinearModel {\n  def main(args: Array[String]): Unit = {\n    val trainMat = new DMatrix(\"../../demo/data/agaricus.txt.train?format=libsvm\")\n    val testMat = new DMatrix(\"../../demo/data/agaricus.txt.test?format=libsvm\")\n\n    // specify parameters\n    // change booster to gblinear, so that we are fitting a linear model\n    // alpha is the L1 regularizer\n    // lambda is the L2 regularizer\n    // you can also set lambda_bias which is L2 regularizer on the bias term\n    val params = new mutable.HashMap[String, Any]()\n    params += \"alpha\" -> 0.0001\n    params += \"boosterh\" -> \"gblinear\"\n    params += \"silent\" -> 1\n    params += \"objective\" -> \"binary:logistic\"\n\n    // normally, you do not need to set eta (step_size)\n    // XGBoost uses a parallel coordinate descent algorithm (shotgun),\n    // there could be affection on convergence with parallelization on certain cases\n    // setting eta to be smaller value, e.g 0.5 can make the optimization more stable\n    // param.put(\"eta\", \"0.5\");\n\n    val watches = new mutable.HashMap[String, DMatrix]\n    watches += \"train\" -> trainMat\n    watches += \"test\" -> testMat\n\n    val booster = XGBoost.train(trainMat, params.toMap, 1, watches.toMap)\n    val predicts = booster.predict(testMat)\n    val eval = new CustomEval\n    println(s\"error=${eval.eval(predicts, testMat)}\")\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/PredictFirstNTree.scala",
    "content": "/*\n Copyright (c) 2014-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.scala.example\n\nimport scala.collection.mutable\n\nimport ml.dmlc.xgboost4j.scala.{DMatrix, XGBoost}\nimport ml.dmlc.xgboost4j.scala.example.util.CustomEval\n\nobject PredictFirstNTree {\n\n  def main(args: Array[String]): Unit = {\n    val trainMat = new DMatrix(\"../../demo/data/agaricus.txt.train?format=libsvm\")\n    val testMat = new DMatrix(\"../../demo/data/agaricus.txt.test?format=libsvm\")\n\n    val params = new mutable.HashMap[String, Any]()\n    params += \"eta\" -> 1.0\n    params += \"max_depth\" -> 2\n    params += \"silent\" -> 1\n    params += \"objective\" -> \"binary:logistic\"\n\n    val watches = new mutable.HashMap[String, DMatrix]\n    watches += \"train\" -> trainMat\n    watches += \"test\" -> testMat\n\n    val round = 3\n    // train a model\n    val booster = XGBoost.train(trainMat, params.toMap, round, watches.toMap)\n\n    // predict use 1 tree\n    val predicts1 = booster.predict(testMat, false, 1)\n    // by default all trees are used to do predict\n    val predicts2 = booster.predict(testMat)\n\n    val eval = new CustomEval\n    println(\"error of predicts1: \" + eval.eval(predicts1, testMat))\n    println(\"error of predicts2: \" + eval.eval(predicts2, testMat))\n  }\n\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/PredictLeafIndices.scala",
    "content": "/*\n Copyright (c) 2014-2026 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.example\n\nimport scala.collection.mutable\n\nimport ml.dmlc.xgboost4j.scala.{DMatrix, XGBoost}\n\nobject PredictLeafIndices {\n\n  def main(args: Array[String]): Unit = {\n    val trainMat = new DMatrix(\"../../demo/data/agaricus.txt.train?format=libsvm\")\n    val testMat = new DMatrix(\"../../demo/data/agaricus.txt.test?format=libsvm\")\n\n    val params = new mutable.HashMap[String, Any]()\n    params += \"eta\" -> 1.0\n    params += \"max_depth\" -> 2\n    params += \"silent\" -> 1\n    params += \"objective\" -> \"binary:logistic\"\n\n    val watches = new mutable.HashMap[String, DMatrix]\n    watches += \"train\" -> trainMat\n    watches += \"test\" -> testMat\n\n    val round = 3\n    val booster = XGBoost.train(trainMat, params.toMap, round, watches.toMap)\n\n    // predict using first 2 tree\n    val leafIndex = booster.predictLeaf(testMat, 2)\n    if (leafIndex.length > 0 && leafIndex(0).length > 1) {\n      println(s\"${leafIndex(0)(0)}, ${leafIndex(0)(1)}\")\n    }\n\n    // predict all trees\n    val leafIndex2 = booster.predictLeaf(testMat, 0)\n    if (leafIndex2.length > 0 && leafIndex2(0).length > 1) {\n      println(s\"${leafIndex2(0)(0)}, ${leafIndex2(0)(1)}\")\n    }\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlink.scala",
    "content": "/*\n Copyright (c) 2014 - 2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.scala.example.flink\n\nimport java.lang.{Double => JDouble, Long => JLong}\nimport java.nio.file.{Path, Paths}\n\nimport org.apache.flink.api.common.typeinfo.{TypeHint, TypeInformation}\nimport org.apache.flink.api.java.{DataSet, ExecutionEnvironment}\nimport org.apache.flink.api.java.tuple.{Tuple13, Tuple2}\nimport org.apache.flink.api.java.utils.DataSetUtils\nimport org.apache.flink.ml.linalg.{Vector, Vectors}\n\nimport ml.dmlc.xgboost4j.java.flink.{XGBoost, XGBoostModel}\n\n\nobject DistTrainWithFlink {\n  import scala.jdk.CollectionConverters._\n  private val rowTypeHint = TypeInformation.of(new TypeHint[Tuple2[Vector, JDouble]]{})\n  private val testDataTypeHint = TypeInformation.of(classOf[Vector])\n\n  private[flink] def parseCsv(trainPath: Path)(implicit env: ExecutionEnvironment):\n      DataSet[Tuple2[JLong, Tuple2[Vector, JDouble]]] = {\n    DataSetUtils.zipWithIndex(\n    env\n      .readCsvFile(trainPath.toString)\n      .ignoreFirstLine\n      .types(\n        classOf[Double], classOf[String], classOf[Double], classOf[Double], classOf[Double],\n        classOf[Integer], classOf[Integer], classOf[Integer], classOf[Integer],\n        classOf[Integer], classOf[Integer], classOf[Integer], classOf[Integer]\n      )\n      .map((row: Tuple13[Double, String, Double, Double, Double,\n        Integer, Integer, Integer, Integer, Integer, Integer, Integer, Integer]) => {\n        val dense = Vectors.dense(row.f2, row.f3, row.f4,\n          row.f5.toDouble, row.f6.toDouble, row.f7.toDouble, row.f8.toDouble,\n          row.f9.toDouble, row.f10.toDouble, row.f11.toDouble, row.f12.toDouble)\n        val label = if (row.f1.contains(\"inf\")) {\n          JDouble.valueOf(1.0)\n        } else {\n          JDouble.valueOf(0.0)\n        }\n        new Tuple2[Vector, JDouble](dense, label)\n      })\n      .returns(rowTypeHint)\n    )\n  }\n\n  private[flink] def runPrediction(trainPath: Path, percentage: Int)\n                                  (implicit env: ExecutionEnvironment):\n    (XGBoostModel, DataSet[Array[Float]]) = {\n    // read training data\n    val data: DataSet[Tuple2[JLong, Tuple2[Vector, JDouble]]] = parseCsv(trainPath)\n    val trainSize = Math.round(0.01 * percentage * data.count())\n    val trainData: DataSet[Tuple2[Vector, JDouble]] =\n      data.filter(d => d.f0 < trainSize).map(_.f1).returns(rowTypeHint)\n\n\n    val testData: DataSet[Vector] =\n        data\n          .filter(d => d.f0 >= trainSize)\n          .map(_.f1.f0)\n          .returns(testDataTypeHint)\n\n    val paramMap = Map(\n        (\"eta\", \"0.1\".asInstanceOf[AnyRef]),\n        (\"max_depth\", \"2\"),\n        (\"objective\", \"binary:logistic\"),\n        (\"verbosity\", \"1\")\n      )\n      .asJava\n\n    // number of iterations\n    val round = 2\n    // train the model\n    val model = XGBoost.train(trainData, paramMap, round)\n    val result = model.predict(testData).map(prediction => prediction.map(Float.unbox))\n    (model, result)\n  }\n\n  def main(args: Array[String]): Unit = {\n    implicit val env: ExecutionEnvironment = ExecutionEnvironment.getExecutionEnvironment\n    val parentPath = Paths.get(args.headOption.getOrElse(\".\"))\n    val (_, predTest) = runPrediction(parentPath.resolve(\"veterans_lung_cancer.csv\"), 70)\n    val list = predTest.collect().asScala\n    println(list.length)\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkMLlibPipeline.scala",
    "content": "/*\n Copyright (c) 2014 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.example.spark\n\nimport org.apache.spark.ml.{Pipeline, PipelineModel}\nimport org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator\nimport org.apache.spark.ml.feature._\nimport org.apache.spark.ml.tuning._\nimport org.apache.spark.sql.{DataFrame, SparkSession}\nimport org.apache.spark.sql.types._\n\nimport ml.dmlc.xgboost4j.scala.spark.{XGBoostClassificationModel, XGBoostClassifier}\n\n// this example works with Iris dataset (https://archive.ics.uci.edu/ml/datasets/iris)\n\nobject SparkMLlibPipeline {\n\n  def main(args: Array[String]): Unit = {\n\n    if (args.length != 3 && args.length != 4) {\n      println(\"Usage: SparkMLlibPipeline input_path native_model_path pipeline_model_path \" +\n        \"[cpu|gpu]\")\n      sys.exit(1)\n    }\n\n    val inputPath = args(0)\n    val nativeModelPath = args(1)\n    val pipelineModelPath = args(2)\n\n    val (device, numWorkers) = if (args.length == 4 && args(3) == \"gpu\") {\n      (\"cuda\", 1)\n    } else (\"cpu\", 2)\n\n    val spark = SparkSession\n      .builder()\n      .appName(\"XGBoost4J-Spark Pipeline Example\")\n      .getOrCreate()\n\n    run(spark, inputPath, nativeModelPath, pipelineModelPath, device, numWorkers)\n      .show(false)\n  }\n  private[spark] def run(spark: SparkSession, inputPath: String, nativeModelPath: String,\n                         pipelineModelPath: String, device: String,\n                         numWorkers: Int): DataFrame = {\n\n    // Load dataset\n    val schema = new StructType(Array(\n      StructField(\"sepal length\", DoubleType, true),\n      StructField(\"sepal width\", DoubleType, true),\n      StructField(\"petal length\", DoubleType, true),\n      StructField(\"petal width\", DoubleType, true),\n      StructField(\"class\", StringType, true)))\n\n    val rawInput = spark.read.schema(schema).csv(inputPath)\n\n    // Split training and test dataset\n    val Array(training, test) = rawInput.randomSplit(Array(0.8, 0.2), 123)\n\n    // Build ML pipeline, it includes 4 stages:\n    // 1, Assemble all features into a single vector column.\n    // 2, From string label to indexed double label.\n    // 3, Use XGBoostClassifier to train classification model.\n    // 4, Convert indexed double label back to original string label.\n    val assembler = new VectorAssembler()\n      .setInputCols(Array(\"sepal length\", \"sepal width\", \"petal length\", \"petal width\"))\n      .setOutputCol(\"features\")\n    val labelIndexer = new StringIndexer()\n      .setInputCol(\"class\")\n      .setOutputCol(\"classIndex\")\n      .fit(training)\n    val booster = new XGBoostClassifier(\n      Map(\n        \"eta\" -> 0.1f,\n        \"max_depth\" -> 2,\n        \"objective\" -> \"multi:softprob\",\n        \"num_class\" -> 3,\n        \"device\" -> device\n      )\n    ).setNumRound(10).setNumWorkers(numWorkers)\n    booster.setFeaturesCol(\"features\")\n    booster.setLabelCol(\"classIndex\")\n    val labelConverter = new IndexToString()\n      .setInputCol(\"prediction\")\n      .setOutputCol(\"realLabel\")\n      .setLabels(labelIndexer.labelsArray(0))\n\n    val pipeline = new Pipeline()\n      .setStages(Array(assembler, labelIndexer, booster, labelConverter))\n    val model: PipelineModel = pipeline.fit(training)\n\n    // Batch prediction\n    val prediction = model.transform(test)\n    prediction.show(false)\n\n    // Model evaluation\n    val evaluator = new MulticlassClassificationEvaluator()\n    evaluator.setLabelCol(\"classIndex\")\n    evaluator.setPredictionCol(\"prediction\")\n    val accuracy = evaluator.evaluate(prediction)\n    println(\"The model accuracy is : \" + accuracy)\n\n    // Tune model using cross validation\n    val paramGrid = new ParamGridBuilder()\n      .addGrid(booster.maxDepth, Array(3, 8))\n      .addGrid(booster.eta, Array(0.2, 0.6))\n      .build()\n    val cv = new CrossValidator()\n      .setEstimator(pipeline)\n      .setEvaluator(evaluator)\n      .setEstimatorParamMaps(paramGrid)\n      .setNumFolds(3)\n\n    val cvModel = cv.fit(training)\n\n    val bestModel = cvModel.bestModel.asInstanceOf[PipelineModel].stages(2)\n      .asInstanceOf[XGBoostClassificationModel]\n    println(\"The params of best XGBoostClassification model : \" +\n      bestModel.extractParamMap())\n    println(\"The training summary of best XGBoostClassificationModel : \" +\n      bestModel.summary)\n\n    // Export the XGBoostClassificationModel as local XGBoost model,\n    // then you can load it back in local Python environment.\n    bestModel.nativeBooster.saveModel(nativeModelPath)\n\n    // ML pipeline persistence\n    model.write.overwrite().save(pipelineModelPath)\n\n    // Load a saved model and serving\n    val model2 = PipelineModel.load(pipelineModelPath)\n    model2.transform(test)\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkTraining.scala",
    "content": "/*\n Copyright (c) 2014-2022 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.example.spark\n\nimport org.apache.spark.ml.feature.{StringIndexer, VectorAssembler}\nimport org.apache.spark.sql.{DataFrame, SparkSession}\nimport org.apache.spark.sql.types.{DoubleType, StringType, StructField, StructType}\n\nimport ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier\n\n\n// this example works with Iris dataset (https://archive.ics.uci.edu/ml/datasets/iris)\nobject SparkTraining {\n\n  def main(args: Array[String]): Unit = {\n    if (args.length < 1) {\n      // scalastyle:off\n      println(\"Usage: program input_path [cpu|gpu]\")\n      sys.exit(1)\n    }\n\n    val (device, numWorkers) = if (args.length == 2 && args(1) == \"gpu\") {\n      (\"cuda\", 1)\n    } else (\"cpu\", 2)\n\n    val spark = SparkSession.builder().getOrCreate()\n    val inputPath = args(0)\n    val results: DataFrame = run(spark, inputPath, device, numWorkers)\n    results.show()\n  }\n\nprivate[spark] def run(spark: SparkSession, inputPath: String,\n                       device: String, numWorkers: Int): DataFrame =  {\n    val schema = new StructType(Array(\n      StructField(\"sepal length\", DoubleType, true),\n      StructField(\"sepal width\", DoubleType, true),\n      StructField(\"petal length\", DoubleType, true),\n      StructField(\"petal width\", DoubleType, true),\n      StructField(\"class\", StringType, true)))\n    val rawInput = spark.read.schema(schema).csv(inputPath)\n\n    // transform class to index to make xgboost happy\n    val stringIndexer = new StringIndexer()\n      .setInputCol(\"class\")\n      .setOutputCol(\"classIndex\")\n      .fit(rawInput)\n    val labelTransformed = stringIndexer.transform(rawInput).drop(\"class\")\n    // compose all feature columns as vector\n    val vectorAssembler = new VectorAssembler().\n      setInputCols(Array(\"sepal length\", \"sepal width\", \"petal length\", \"petal width\")).\n      setOutputCol(\"features\")\n    val xgbInput = vectorAssembler.transform(labelTransformed).select(\"features\",\n      \"classIndex\")\n\n    val Array(train, eval1, eval2, test) = xgbInput.randomSplit(Array(0.6, 0.2, 0.1, 0.1))\n\n    /**\n     * setup spark.scheduler.barrier.maxConcurrentTasksCheck.interval and\n     * spark.scheduler.barrier.maxConcurrentTasksCheck.maxFailures to make this application\n     * if it cannot get enough resources to get 2 workers within interval * maxFailures s\n     *\n     * setup \"checkpoint_path\" -> \"/checkpoints\" and \"checkpoint_interval\" -> 2 to save\n     * checkpoint for every two iterations\n     */\n    val xgbParam = Map(\"eta\" -> 0.1f,\n      \"max_depth\" -> 2,\n      \"objective\" -> \"multi:softprob\",\n      \"num_class\" -> 3,\n      \"eval_sets\" -> Map(\"eval1\" -> eval1, \"eval2\" -> eval2),\n      \"device\" -> device)\n    val xgbClassifier = new XGBoostClassifier(xgbParam).\n      setFeaturesCol(\"features\").\n      setLabelCol(\"classIndex\")\n      .setNumWorkers(numWorkers)\n      .setNumRound(10)\n    val xgbClassificationModel = xgbClassifier.fit(train)\n    xgbClassificationModel.transform(test)\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/util/CustomEval.scala",
    "content": "/*\n Copyright (c) 2014 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.scala.example.util\n\nimport org.apache.commons.logging.{Log, LogFactory}\n\nimport ml.dmlc.xgboost4j.java.XGBoostError\nimport ml.dmlc.xgboost4j.scala.{DMatrix, EvalTrait}\n\nclass CustomEval extends EvalTrait {\n  private val logger: Log = LogFactory.getLog(classOf[CustomEval])\n  /**\n   * get evaluate metric\n   *\n   * @return evalMetric\n   */\n  override def getMetric: String = {\n    \"custom_error\"\n  }\n\n  /**\n   * evaluate with predicts and data\n   *\n   * @param predicts predictions as array\n   * @param dmat     data matrix to evaluate\n   * @return result of the metric\n   */\n  override def eval(predicts: Array[Array[Float]], dmat: DMatrix): Float = {\n    var error: Float = 0f\n    var labels: Array[Float] = null\n    try {\n      labels = dmat.getLabel\n    } catch {\n      case ex: XGBoostError =>\n        logger.error(ex)\n        return -1f\n    }\n    val nrow: Int = predicts.length\n    for (i <- 0 until nrow) {\n      if (labels(i) == 0.0 && predicts(i)(0) > 0.5) {\n        error += 1\n      } else if (labels(i) == 1.0 && predicts(i)(0) <= 0.5) {\n        error += 1\n      }\n    }\n    error / labels.length\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-example/src/test/java/ml/dmlc/xgboost4j/java/example/JavaExamplesTest.java",
    "content": "/*\n Copyright (c) 2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java.example;\n\nimport java.io.IOException;\nimport ml.dmlc.xgboost4j.java.XGBoostError;\nimport org.junit.Test;\n\n\npublic class JavaExamplesTest {\n\n  @Test\n  public void testExamples() throws XGBoostError, IOException {\n    String[] args = {\"\"};\n    System.out.println(\"BasicWalkThrough\");\n    BasicWalkThrough.main(args);\n    System.out.println(\"BoostFromPrediction\");\n    BoostFromPrediction.main(args);\n    System.out.println(\"CrossValidation\");\n    CrossValidation.main(args);\n    System.out.println(\"CustomObjective\");\n    CustomObjective.main(args);\n    System.out.println(\"EarlyStopping\");\n    EarlyStopping.main(args);\n    System.out.println(\"GeneralizedLinearModel\");\n    GeneralizedLinearModel.main(args);\n    System.out.println(\"PredictFirstNtree\");\n    PredictFirstNtree.main(args);\n    System.out.println(\"PredictLeafIndices\");\n    PredictLeafIndices.main(args);\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/java/example/flink/DistTrainWithFlinkExampleTest.scala",
    "content": "/*\n Copyright (c) 2014-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java.example.flink\n\nimport java.nio.file.Paths\n\nimport org.apache.flink.api.java.ExecutionEnvironment\nimport org.scalatest.Inspectors._\nimport org.scalatest.funsuite.AnyFunSuite\nimport org.scalatest.matchers.should.Matchers._\n\n\nclass DistTrainWithFlinkExampleTest extends AnyFunSuite {\n  private val parentPath = Paths.get(\"../../\").resolve(\"demo\").resolve(\"data\")\n  private val data = parentPath.resolve(\"veterans_lung_cancer.csv\")\n\n  test(\"Smoke test for scala flink example\") {\n    val env = ExecutionEnvironment.createLocalEnvironment(1)\n    val tuple2 = DistTrainWithFlinkExample.runPrediction(env, data, 70)\n    val results = tuple2.f1.collect()\n    results should have size 41\n    forEvery(results)(item => item should have size 1)\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/ScalaExamplesTest.scala",
    "content": "/*\n Copyright (c) 2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.scala.example\n\nimport org.scalatest.funsuite.AnyFunSuite\n\nclass ScalaExamplesTest extends AnyFunSuite {\n  test(\"Smoke test for Scala examples\") {\n    val args = Array(\"\")\n    println(\"BasicWalkThrough\")\n    BasicWalkThrough.main(args)\n    println(\"BoostFromPrediction\")\n    BoostFromPrediction.main(args)\n    println(\"CrossValidation\")\n    CrossValidation.main(args)\n    println(\"CustomObjective\")\n    CustomObjective.main(args)\n    println(\"GeneralizedLinearModel\")\n    GeneralizedLinearModel.main(args)\n    println(\"PredictFirstNTree\")\n    PredictFirstNTree.main(args)\n    println(\"PredictLeafIndices\")\n    PredictLeafIndices.main(args)\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlinkSuite.scala",
    "content": "/*\n Copyright (c) 2014-2023 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.scala.example.flink\n\nimport java.nio.file.Paths\n\nimport scala.jdk.CollectionConverters._\n\nimport org.apache.flink.api.java.ExecutionEnvironment\nimport org.scalatest.Inspectors._\nimport org.scalatest.funsuite.AnyFunSuite\nimport org.scalatest.matchers.should.Matchers._\n\nclass DistTrainWithFlinkSuite extends AnyFunSuite {\n  private val parentPath = Paths.get(\"../../\").resolve(\"demo\").resolve(\"data\")\n  private val data = parentPath.resolve(\"veterans_lung_cancer.csv\")\n\n  test(\"Smoke test for scala flink example\") {\n    implicit val env: ExecutionEnvironment = ExecutionEnvironment.createLocalEnvironment(1)\n    val (_, result) = DistTrainWithFlink.runPrediction(data, 70)\n    val results = result.collect().asScala\n    results should have size 41\n    forEvery(results)(item => item should have size 1)\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkExamplesTest.scala",
    "content": "/*\n Copyright (c) 2014-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.scala.example.spark\n\n\nimport java.io.File\nimport java.nio.file.{Files, StandardOpenOption}\n\nimport scala.jdk.CollectionConverters._\nimport scala.util.{Random, Try}\n\nimport org.apache.spark.sql.SparkSession\nimport org.scalatest.BeforeAndAfterAll\nimport org.scalatest.funsuite.AnyFunSuite\nimport org.slf4j.LoggerFactory\n\nclass SparkExamplesTest extends AnyFunSuite with BeforeAndAfterAll {\n  private val logger = LoggerFactory.getLogger(classOf[SparkExamplesTest])\n  private val random = new Random(42)\n  protected val numWorkers: Int = scala.math.min(Runtime.getRuntime.availableProcessors(), 4)\n\n  private val pathToTestDataset = Files.createTempFile(\"\", \"iris.csv\").toAbsolutePath\n  private var spark: SparkSession = _\n\n  override def beforeAll(): Unit = {\n\n    def generateLine(i: Int): String = {\n      val getIrisName = (int: Int) => {\n        int % 3 match {\n          case 0 => \"Iris-versicolor\"\n          case 1 => \"Iris-virginica\"\n          case 2 => \"Iris-setosa\"\n        }\n      }\n      val generateValue = () => Math.abs(random.nextInt(99) * 0.1)\n      val sepalLength = generateValue()\n      val sepalWidth = generateValue()\n      val petalLength = generateValue()\n      val petalWidth = generateValue()\n      val irisName = getIrisName(Math.abs(random.nextInt()) + i)\n      s\"$sepalLength,$sepalWidth,$petalLength,$petalWidth,$irisName\"\n    }\n\n    if (spark == null) {\n      spark = SparkSession\n        .builder()\n        .appName(\"XGBoost4J-Spark Pipeline Example\")\n        .master(s\"local[${numWorkers}]\")\n        .config(\"spark.ui.enabled\", value = false)\n        .config(\"spark.driver.memory\", \"512m\")\n        .config(\"spark.barrier.sync.timeout\", 10)\n        .config(\"spark.task.cpus\", 1)\n        .getOrCreate()\n      spark.sparkContext.setLogLevel(\"ERROR\")\n    }\n    val data = (0 until 150)\n      .map(i => generateLine(i))\n      .toList\n      .asJava\n    Files.write(pathToTestDataset,\n      data,\n      StandardOpenOption.CREATE,\n      StandardOpenOption.WRITE,\n      StandardOpenOption.TRUNCATE_EXISTING)\n    logger.info(s\"${new String(Files.readAllBytes(pathToTestDataset))}\")\n\n  }\n\n  override def afterAll(): Unit = {\n    if (spark != null) {\n      spark.stop()\n      cleanExternalCache(spark.sparkContext.appName)\n      spark = null\n    }\n\n    Try(Files.deleteIfExists(pathToTestDataset))\n      .recover {\n        case e =>\n          logger.warn(\n            s\"Could not delete temporary file $pathToTestDataset. Please, remove it manually\",\n            e\n          )\n          true\n      }\n  }\n\n  private def cleanExternalCache(prefix: String): Unit = {\n    val dir = new File(\".\")\n    for (file <- dir.listFiles() if file.getName.startsWith(prefix)) {\n      file.delete()\n    }\n  }\n\n  test(\"Smoke test for SparkMLlibPipeline example\") {\n    SparkMLlibPipeline.run(spark, pathToTestDataset.toString, \"target/native-model\",\n      \"target/pipeline-model\", \"cpu\", 2)\n  }\n\n  test(\"Smoke test for SparkTraining example\") {\n    val spark = SparkSession\n      .builder()\n      .appName(\"XGBoost4J-Spark Pipeline Example\")\n      .master(s\"local[${numWorkers}]\")\n      .config(\"spark.ui.enabled\", value = false)\n      .config(\"spark.driver.memory\", \"512m\")\n      .config(\"spark.barrier.sync.timeout\", 10)\n      .config(\"spark.task.cpus\", 1)\n      .getOrCreate()\n\n    SparkTraining.run(spark, pathToTestDataset.toString, \"cpu\", 2)\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-flink/pom.xml",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project xmlns=\"http://maven.apache.org/POM/4.0.0\"\n         xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n         xsi:schemaLocation=\"http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd\">\n    <modelVersion>4.0.0</modelVersion>\n    <parent>\n        <groupId>ml.dmlc</groupId>\n        <artifactId>xgboost-jvm_2.12</artifactId>\n        <version>3.3.0-SNAPSHOT</version>\n    </parent>\n\n    <name>xgboost4j-flink</name>\n    <artifactId>xgboost4j-flink_2.12</artifactId>\n    <version>3.3.0-SNAPSHOT</version>\n    <properties>\n      <flink-ml.version>2.2.0</flink-ml.version>\n    </properties>\n    <build>\n        <plugins>\n            <plugin>\n                <groupId>org.apache.maven.plugins</groupId>\n                <artifactId>maven-assembly-plugin</artifactId>\n                <configuration>\n                    <skipAssembly>false</skipAssembly>\n                </configuration>\n            </plugin>\n        </plugins>\n    </build>\n    <packaging>jar</packaging>\n    <dependencies>\n        <dependency>\n            <groupId>ml.dmlc</groupId>\n            <artifactId>xgboost4j_2.12</artifactId>\n            <version>${project.version}</version>\n        </dependency>\n        <dependency>\n            <groupId>org.apache.flink</groupId>\n            <artifactId>flink-clients</artifactId>\n            <version>${flink.version}</version>\n        </dependency>\n        <dependency>\n            <groupId>org.apache.flink</groupId>\n            <artifactId>flink-ml-servable-core</artifactId>\n            <version>${flink-ml.version}</version>\n        </dependency>\n        <dependency>\n            <groupId>org.apache.hadoop</groupId>\n            <artifactId>hadoop-common</artifactId>\n            <version>${hadoop.version}</version>\n        </dependency>\n    </dependencies>\n\n</project>\n"
  },
  {
    "path": "jvm-packages/xgboost4j-flink/src/main/java/ml/dmlc/xgboost4j/java/flink/XGBoost.java",
    "content": "/*\n Copyright (c) 2014-2023 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.java.flink;\n\n\nimport java.util.HashMap;\nimport java.util.Iterator;\nimport java.util.Map;\nimport java.util.Optional;\nimport java.util.function.Function;\nimport java.util.stream.Collectors;\nimport java.util.stream.StreamSupport;\n\nimport org.apache.flink.api.common.functions.RichMapPartitionFunction;\nimport org.apache.flink.api.java.DataSet;\nimport org.apache.flink.api.java.tuple.Tuple2;\nimport org.apache.flink.ml.linalg.SparseVector;\nimport org.apache.flink.ml.linalg.Vector;\nimport org.apache.flink.util.Collector;\nimport org.apache.hadoop.conf.Configuration;\nimport org.apache.hadoop.fs.FSDataInputStream;\nimport org.apache.hadoop.fs.FileSystem;\nimport org.apache.hadoop.fs.Path;\nimport org.slf4j.Logger;\nimport org.slf4j.LoggerFactory;\n\nimport ml.dmlc.xgboost4j.LabeledPoint;\nimport ml.dmlc.xgboost4j.java.Booster;\nimport ml.dmlc.xgboost4j.java.Communicator;\nimport ml.dmlc.xgboost4j.java.DMatrix;\nimport ml.dmlc.xgboost4j.java.RabitTracker;\nimport ml.dmlc.xgboost4j.java.XGBoostError;\n\n\npublic class XGBoost {\n  private static final Logger logger = LoggerFactory.getLogger(XGBoost.class);\n\n  private static class MapFunction\n      extends RichMapPartitionFunction<Tuple2<Vector, Double>, XGBoostModel> {\n\n    private final Map<String, Object> params;\n    private final int round;\n    private final Map<String, Object> workerEnvs;\n\n    public MapFunction(Map<String, Object> params, int round, Map<String, Object> workerEnvs) {\n      this.params = params;\n      this.round = round;\n      this.workerEnvs = workerEnvs;\n    }\n\n    /**\n     * Trains the XGBoost model based on the data elements in the given partition.\n     *\n     * @param it The iterable object consisting of Tuple2 instances where the first field (f0)\n     *           is a Vector representing the features and the second field (f1) is a Double\n     *           representing the label.\n     * @param collector The collector object that is used to emit the trained XGBoost model.\n     * @throws XGBoostError Error thrown during training.\n     */\n    public void mapPartition(java.lang.Iterable<Tuple2<Vector, Double>> it,\n                             Collector<XGBoostModel> collector) throws XGBoostError {\n      workerEnvs.put(\n          \"DMLC_TASK_ID\",\n          String.valueOf(this.getRuntimeContext().getIndexOfThisSubtask())\n      );\n\n      if (logger.isInfoEnabled()) {\n        logger.info(\"start with env: {}\", workerEnvs.entrySet().stream()\n            .map(e -> String.format(\"\\\"%s\\\": \\\"%s\\\"\", e.getKey(), e.getValue()))\n            .collect(Collectors.joining(\", \"))\n        );\n      }\n\n      final Iterator<LabeledPoint> dataIter =\n          StreamSupport\n            .stream(it.spliterator(), false)\n            .map(VectorToPointMapper.INSTANCE)\n            .iterator();\n\n      if (dataIter.hasNext()) {\n        final DMatrix trainMat = new DMatrix(dataIter, null);\n        int numEarlyStoppingRounds =\n            Optional.ofNullable(params.get(\"numEarlyStoppingRounds\"))\n              .map(x -> Integer.parseInt(x.toString()))\n              .orElse(0);\n\n        final Booster booster = trainBooster(trainMat, numEarlyStoppingRounds);\n        collector.collect(new XGBoostModel(booster));\n      } else {\n        logger.warn(\"Nothing to train with.\");\n      }\n    }\n\n    private Booster trainBooster(DMatrix trainMat,\n                                 int numEarlyStoppingRounds) throws XGBoostError {\n      Booster booster;\n      final Map<String, DMatrix> watches =\n          new HashMap<String, DMatrix>() {{ put(\"train\", trainMat); }};\n      try {\n        Communicator.init(workerEnvs);\n        booster = ml.dmlc.xgboost4j.java.XGBoost\n          .train(\n            trainMat,\n            params,\n            round,\n            watches,\n            null,\n            null,\n            null,\n            numEarlyStoppingRounds);\n      } catch (XGBoostError xgbException) {\n        final String identifier = String.valueOf(this.getRuntimeContext().getIndexOfThisSubtask());\n        logger.warn(\n            String.format(\"XGBooster worker %s has failed due to\", identifier),\n            xgbException\n        );\n        throw xgbException;\n      } finally {\n        Communicator.shutdown();\n      }\n      return booster;\n    }\n\n    private static class VectorToPointMapper\n        implements Function<Tuple2<Vector, Double>, LabeledPoint> {\n      public static VectorToPointMapper INSTANCE = new VectorToPointMapper();\n      @Override\n      public LabeledPoint apply(Tuple2<Vector, Double> tuple) {\n        final SparseVector vector = tuple.f0.toSparse();\n        final double[] values = vector.values;\n        final int size = values.length;\n        final float[] array = new float[size];\n        for (int i = 0; i < size; i++) {\n          array[i] = (float) values[i];\n        }\n        return new LabeledPoint(\n          tuple.f1.floatValue(),\n          vector.size(),\n          vector.indices,\n          array);\n      }\n    }\n  }\n\n  /**\n   * Load XGBoost model from path, using Hadoop Filesystem API.\n   *\n   * @param modelPath The path that is accessible by hadoop filesystem API.\n   * @return The loaded model\n   */\n  public static XGBoostModel loadModelFromHadoopFile(final String modelPath) throws Exception {\n    final FileSystem fileSystem = FileSystem.get(new Configuration());\n    final Path f = new Path(modelPath);\n\n    try (FSDataInputStream opened = fileSystem.open(f)) {\n      return new XGBoostModel(ml.dmlc.xgboost4j.java.XGBoost.loadModel(opened));\n    }\n  }\n\n  /**\n   * Train a xgboost model with link.\n   *\n   * @param dtrain The training data.\n   * @param params XGBoost parameters.\n   * @param numBoostRound  Number of rounds to train.\n   */\n  public static XGBoostModel train(DataSet<Tuple2<Vector, Double>> dtrain,\n                                   Map<String, Object> params,\n                                   int numBoostRound) throws Exception {\n    final RabitTracker tracker =\n        new RabitTracker(dtrain.getExecutionEnvironment().getParallelism());\n    if (tracker.start()) {\n      return dtrain\n        .mapPartition(new MapFunction(params, numBoostRound, tracker.getWorkerArgs()))\n        .reduce((x, y) -> x)\n        .collect()\n        .get(0);\n    } else {\n      throw new Error(\"Tracker cannot be started\");\n    }\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-flink/src/main/java/ml/dmlc/xgboost4j/java/flink/XGBoostModel.java",
    "content": "/*\n Copyright (c) 2014-2023 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.java.flink;\nimport java.io.IOException;\nimport java.io.Serializable;\nimport java.util.Arrays;\nimport java.util.Iterator;\nimport java.util.stream.StreamSupport;\n\nimport org.apache.commons.lang3.ArrayUtils;\nimport org.apache.flink.api.common.functions.MapPartitionFunction;\nimport org.apache.flink.api.java.DataSet;\nimport org.apache.flink.ml.linalg.SparseVector;\nimport org.apache.flink.ml.linalg.Vector;\nimport org.apache.flink.util.Collector;\nimport org.apache.hadoop.conf.Configuration;\nimport org.apache.hadoop.fs.FileSystem;\nimport org.apache.hadoop.fs.Path;\n\nimport ml.dmlc.xgboost4j.LabeledPoint;\nimport ml.dmlc.xgboost4j.java.Booster;\nimport ml.dmlc.xgboost4j.java.DMatrix;\nimport ml.dmlc.xgboost4j.java.XGBoostError;\n\n\npublic class XGBoostModel implements Serializable {\n  private static final org.slf4j.Logger logger =\n      org.slf4j.LoggerFactory.getLogger(XGBoostModel.class);\n\n  private final Booster booster;\n  private final PredictorFunction predictorFunction;\n\n\n  public XGBoostModel(Booster booster) {\n    this.booster = booster;\n    this.predictorFunction = new PredictorFunction(booster);\n  }\n\n  /**\n   * Save the model as a Hadoop filesystem file.\n   *\n   * @param modelPath The model path as in Hadoop path.\n   */\n  public void saveModelAsHadoopFile(String modelPath) throws IOException, XGBoostError {\n    booster.saveModel(FileSystem.get(new Configuration()).create(new Path(modelPath)));\n  }\n\n  public byte[] toByteArray(String format) throws XGBoostError {\n    return booster.toByteArray(format);\n  }\n\n  /**\n   * Save the model as a Hadoop filesystem file.\n   *\n   * @param modelPath The model path as in Hadoop path.\n   * @param format The model format (ubj, json, deprecated)\n   * @throws XGBoostError internal error\n   * @throws IOException save error\n   */\n  public void saveModelAsHadoopFile(String modelPath, String format)\n      throws IOException, XGBoostError {\n    booster.saveModel(FileSystem.get(new Configuration()).create(new Path(modelPath)), format);\n  }\n\n  /**\n   * predict with the given DMatrix\n   *\n   * @param testSet the local test set represented as DMatrix\n   * @return prediction result\n   */\n  public float[][] predict(DMatrix testSet) throws XGBoostError {\n    return booster.predict(testSet, true, 0);\n  }\n\n  /**\n   * Predict given vector dataset.\n   *\n   * @param data The dataset to be predicted.\n   * @return The prediction result.\n   */\n  public DataSet<Float[]> predict(DataSet<Vector> data) {\n    return data.mapPartition(predictorFunction);\n  }\n\n\n  private static class PredictorFunction implements MapPartitionFunction<Vector, Float[]> {\n\n    private final Booster booster;\n\n    public PredictorFunction(Booster booster) {\n      this.booster = booster;\n    }\n\n    @Override\n    public void mapPartition(Iterable<Vector> it, Collector<Float[]> out) throws Exception {\n      final Iterator<LabeledPoint> dataIter =\n          StreamSupport.stream(it.spliterator(), false)\n            .map(Vector::toSparse)\n            .map(PredictorFunction::fromVector)\n            .iterator();\n\n      if (dataIter.hasNext()) {\n        final DMatrix data = new DMatrix(dataIter, null);\n        float[][] predictions = booster.predict(data, true, 2);\n        Arrays.stream(predictions).map(ArrayUtils::toObject).forEach(out::collect);\n      } else {\n        logger.debug(\"Empty partition\");\n      }\n    }\n\n    private static LabeledPoint fromVector(SparseVector vector) {\n      final int[] index = vector.indices;\n      final double[] value = vector.values;\n      int size = value.length;\n      final float[] values = new float[size];\n      for (int i = 0; i < size; i++) {\n        values[i] = (float) value[i];\n      }\n      return new LabeledPoint(0.0f, vector.size(), index, values);\n    }\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/pom.xml",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project xmlns=\"http://maven.apache.org/POM/4.0.0\"\n         xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n         xsi:schemaLocation=\"http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd\">\n    <modelVersion>4.0.0</modelVersion>\n    <parent>\n        <groupId>ml.dmlc</groupId>\n        <artifactId>xgboost-jvm_2.12</artifactId>\n        <version>3.3.0-SNAPSHOT</version>\n    </parent>\n    <name>xgboost4j-spark</name>\n    <artifactId>xgboost4j-spark_2.12</artifactId>\n    <build>\n        <plugins>\n            <plugin>\n                <groupId>org.apache.maven.plugins</groupId>\n                <artifactId>maven-assembly-plugin</artifactId>\n                <configuration>\n                    <skipAssembly>false</skipAssembly>\n                </configuration>\n            </plugin>\n            <plugin>\n                <groupId>org.apache.maven.plugins</groupId>\n                <artifactId>maven-shade-plugin</artifactId>\n                <configuration>\n                  <createDependencyReducedPom>true</createDependencyReducedPom>\n                  <useDependencyReducedPomInJar>true</useDependencyReducedPomInJar>\n                  <shadedArtifactAttached>false</shadedArtifactAttached>\n                  <artifactSet>\n                    <includes>\n                      <include>ml.dmlc:xgboost4j_${scala.binary.version}</include>\n                    </includes>\n                  </artifactSet>\n                </configuration>\n                <executions>\n                  <execution>\n                    <phase>package</phase>\n                    <goals>\n                      <goal>shade</goal>\n                    </goals>\n                  </execution>\n                </executions>\n              </plugin>\n        </plugins>\n    </build>\n    <dependencies>\n        <dependency>\n            <groupId>ml.dmlc</groupId>\n            <artifactId>xgboost4j_2.12</artifactId>\n            <version>${project.version}</version>\n        </dependency>\n        <dependency>\n            <groupId>org.apache.spark</groupId>\n            <artifactId>spark-core_${scala.binary.version}</artifactId>\n            <version>${spark.version}</version>\n            <scope>provided</scope>\n        </dependency>\n        <dependency>\n            <groupId>org.apache.spark</groupId>\n            <artifactId>spark-sql_${scala.binary.version}</artifactId>\n            <version>${spark.version}</version>\n            <scope>provided</scope>\n        </dependency>\n        <dependency>\n            <groupId>org.apache.spark</groupId>\n            <artifactId>spark-mllib_${scala.binary.version}</artifactId>\n            <version>${spark.version}</version>\n            <scope>provided</scope>\n        </dependency>\n    </dependencies>\n</project>\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/Utils.scala",
    "content": "/*\n Copyright (c) 2014-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark\n\nimport org.apache.spark.ml.feature.{LabeledPoint => MLLabeledPoint}\nimport org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}\nimport org.json4s.{DefaultFormats, FullTypeHints, JField, JValue, NoTypeHints, TypeHints}\n\nimport ml.dmlc.xgboost4j.{LabeledPoint => XGBLabeledPoint}\n\nprivate[scala] object Utils {\n\n  private[spark] implicit class XGBLabeledPointFeatures(\n      val labeledPoint: XGBLabeledPoint\n  ) extends AnyVal {\n    /** Converts the point to [[MLLabeledPoint]]. */\n    private[spark] def asML: MLLabeledPoint = {\n      MLLabeledPoint(labeledPoint.label, labeledPoint.features)\n    }\n\n    /**\n     * Returns feature of the point as [[org.apache.spark.ml.linalg.Vector]].\n     */\n    def features: Vector = if (labeledPoint.indices == null) {\n      Vectors.dense(labeledPoint.values.map(_.toDouble))\n    } else {\n      Vectors.sparse(labeledPoint.size, labeledPoint.indices, labeledPoint.values.map(_.toDouble))\n    }\n  }\n\n  private[spark] implicit class MLVectorToXGBLabeledPoint(val v: Vector) extends AnyVal {\n    /**\n     * Converts a [[Vector]] to a data point with a dummy label.\n     *\n     * This is needed for constructing a [[ml.dmlc.xgboost4j.scala.DMatrix]]\n     * for prediction.\n     */\n    // TODO support sparsevector\n    def asXGB: XGBLabeledPoint = v match {\n      case v: DenseVector =>\n        new XGBLabeledPoint(0.0f, v.size, null, v.values.map(_.toFloat))\n      case v: SparseVector =>\n        new XGBLabeledPoint(0.0f, v.size, v.indices, v.toDense.values.map(_.toFloat))\n    }\n  }\n\n  def getSparkClassLoader: ClassLoader = getClass.getClassLoader\n\n  def getContextOrSparkClassLoader: ClassLoader =\n    Option(Thread.currentThread().getContextClassLoader).getOrElse(getSparkClassLoader)\n\n  // scalastyle:off classforname\n\n  /** Preferred alternative to Class.forName(className) */\n  def classForName(className: String): Class[_] = {\n    Class.forName(className, true, getContextOrSparkClassLoader)\n    // scalastyle:on classforname\n  }\n\n  /**\n   * Get the TypeHints according to the value\n   *\n   * @param value the instance of class to be serialized\n   * @return if value is null,\n   *         return NoTypeHints\n   *         else return the FullTypeHints.\n   *\n   *         The FullTypeHints will save the full class name into the \"jsonClass\" of the json,\n   *         so we can find the jsonClass and turn it to FullTypeHints when deserializing.\n   */\n  def getTypeHintsFromClass(value: Any): TypeHints = {\n    if (value == null) { // XGBoost will save the default value (null)\n      NoTypeHints\n    } else {\n      FullTypeHints(List(value.getClass))\n    }\n  }\n\n  /**\n   * Get the TypeHints according to the saved jsonClass field\n   *\n   * @param json\n   * @return TypeHints\n   */\n  def getTypeHintsFromJsonClass(json: JValue): TypeHints = {\n    val jsonClassField = json findField {\n      case JField(\"jsonClass\", _) => true\n      case _ => false\n    }\n\n    jsonClassField.map { field =>\n      implicit val formats = DefaultFormats\n      val className = field._2.extract[String]\n      FullTypeHints(List(Utils.classForName(className)))\n    }.getOrElse(NoTypeHints)\n  }\n\n  val TRAIN_NAME = \"train\"\n  val VALIDATION_NAME = \"eval\"\n\n  val TMP_FEATURE_ARRAY_NAME = \"xgboost_eGdib29zdC1qdm0K_jvm\"\n\n  /** Executes the provided code block and then closes the resource */\n  def withResource[T <: AutoCloseable, V](r: T)(block: T => V): V = {\n    try {\n      block(r)\n    } finally {\n      r.close()\n    }\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala",
    "content": "/*\n Copyright (c) 2014-2026 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark\n\nimport java.io.File\n\nimport scala.jdk.CollectionConverters._\n\nimport org.apache.commons.io.FileUtils\nimport org.apache.commons.logging.LogFactory\nimport org.apache.spark.{SparkConf, SparkContext, TaskContext}\nimport org.apache.spark.rdd.RDD\nimport org.apache.spark.resource.{ResourceProfileBuilder, TaskResourceRequests}\n\nimport ml.dmlc.xgboost4j.java.{Communicator, ConfigContext, RabitTracker, XGBoostJNI}\nimport ml.dmlc.xgboost4j.scala.{XGBoost => SXGBoost, _}\nimport ml.dmlc.xgboost4j.scala.spark.Utils.withResource\n\n\nprivate[spark] case class RuntimeParams(\n    numWorkers: Int,\n    numRounds: Int,\n    trackerConf: TrackerConf,\n    earlyStoppingRounds: Int,\n    device: String,\n    isLocal: Boolean,\n    runOnGpu: Boolean,\n    obj: Option[ObjectiveTrait] = None,\n    eval: Option[EvalTrait] = None,\n    configs: Map[String, AnyRef] = Map.empty)\n\n/**\n * A trait to manage stage-level scheduling\n */\nprivate[spark] trait StageLevelScheduling extends Serializable {\n  private val logger = LogFactory.getLog(\"XGBoostSpark\")\n\n  private[spark] def isStandaloneOrLocalCluster(conf: SparkConf): Boolean = {\n    val master = conf.get(\"spark.master\")\n    master != null && (master.startsWith(\"spark://\") || master.startsWith(\"local-cluster\"))\n  }\n\n  /**\n   * To determine if stage-level scheduling should be skipped according to the spark version\n   * and spark configurations\n   *\n   * @param sparkVersion spark version\n   * @param runOnGpu     if xgboost training run on GPUs\n   * @param conf         spark configurations\n   * @return Boolean to skip stage-level scheduling or not\n   */\n  private[spark] def skipStageLevelScheduling(sparkVersion: String,\n                                              runOnGpu: Boolean,\n                                              conf: SparkConf): Boolean = {\n    if (runOnGpu) {\n      if (sparkVersion < \"3.4.0\") {\n        logger.info(\"Stage-level scheduling in xgboost requires spark version 3.4.0+\")\n        return true\n      }\n\n      if (!isStandaloneOrLocalCluster(conf)) {\n        logger.info(\"Stage-level scheduling in xgboost requires spark standalone or \" +\n          \"local-cluster mode\")\n        return true\n      }\n\n      val executorCores = conf.getInt(\"spark.executor.cores\", -1)\n      val executorGpus = conf.getInt(\"spark.executor.resource.gpu.amount\", -1)\n      if (executorCores == -1 || executorGpus == -1) {\n        logger.info(\"Stage-level scheduling in xgboost requires spark.executor.cores, \" +\n          \"spark.executor.resource.gpu.amount to be set.\")\n        return true\n      }\n\n      if (executorCores == 1) {\n        logger.info(\"Stage-level scheduling in xgboost requires spark.executor.cores > 1\")\n        return true\n      }\n\n      if (executorGpus > 1) {\n        logger.info(\"Stage-level scheduling in xgboost will not work \" +\n          \"when spark.executor.resource.gpu.amount > 1\")\n        return true\n      }\n\n      val taskGpuAmount = conf.getDouble(\"spark.task.resource.gpu.amount\", -1.0).toFloat\n\n      if (taskGpuAmount == -1.0) {\n        // The ETL tasks will not grab a gpu when spark.task.resource.gpu.amount is not set,\n        // but with stage-level scheduling, we can make training task grab the gpu.\n        return false\n      }\n\n      if (taskGpuAmount == executorGpus.toFloat) {\n        // spark.executor.resource.gpu.amount = spark.task.resource.gpu.amount\n        // results in only 1 task running at a time, which may cause perf issue.\n        return true\n      }\n      // We can enable stage-level scheduling\n      false\n    } else true // Skip stage-level scheduling for cpu training.\n  }\n\n  /**\n   * Attempt to modify the task resources so that only one task can be executed\n   * on a single executor simultaneously.\n   *\n   * @param sc  the spark context\n   * @param rdd the rdd to be applied with new resource profile\n   * @return the original rdd or the modified rdd\n   */\n  private[spark] def tryStageLevelScheduling[T](sc: SparkContext,\n                                                xgbExecParams: RuntimeParams,\n                                                rdd: RDD[T]\n                                               ): RDD[T] = {\n\n    val conf = sc.getConf\n    if (skipStageLevelScheduling(sc.version, xgbExecParams.runOnGpu, conf)) {\n      return rdd\n    }\n\n    // Ensure executor_cores is not None\n    val executor_cores = conf.getInt(\"spark.executor.cores\", -1)\n    if (executor_cores == -1) {\n      throw new RuntimeException(\"Wrong spark.executor.cores\")\n    }\n\n    // Spark-rapids is a GPU-acceleration project for Spark SQL.\n    // When spark-rapids is enabled, we prevent concurrent execution of other ETL tasks\n    // that utilize GPUs alongside training tasks in order to avoid GPU out-of-memory errors.\n    val spark_plugins = conf.get(\"spark.plugins\", \" \")\n    val spark_rapids_sql_enabled = conf.get(\"spark.rapids.sql.enabled\", \"true\")\n\n    // Determine the number of cores required for each task.\n    val task_cores = if (spark_plugins.contains(\"com.nvidia.spark.SQLPlugin\") &&\n      spark_rapids_sql_enabled.toLowerCase == \"true\") {\n      executor_cores\n    } else {\n      (executor_cores / 2) + 1\n    }\n\n    // Each training task requires cpu cores > total executor cores//2 + 1 to\n    // ensure tasks are sent to different executors.\n    // Note: We cannot use GPUs to limit concurrent tasks\n    // due to https://issues.apache.org/jira/browse/SPARK-45527.\n    val task_gpus = 1.0\n    val treqs = new TaskResourceRequests().cpus(task_cores).resource(\"gpu\", task_gpus)\n    val rp = new ResourceProfileBuilder().require(treqs).build()\n\n    logger.info(s\"XGBoost training tasks require the resource(cores=$task_cores, gpu=$task_gpus).\")\n    rdd.withResources(rp)\n  }\n}\n\nprivate[spark] object XGBoost extends StageLevelScheduling {\n  private val logger = LogFactory.getLog(\"XGBoostSpark\")\n\n  def getGPUAddrFromResources: Int = {\n    val tc = TaskContext.get()\n    if (tc == null) {\n      throw new RuntimeException(\"Something wrong for task context\")\n    }\n    val resources = tc.resources()\n    if (resources.contains(\"gpu\")) {\n      val addrs = resources(\"gpu\").addresses\n      if (addrs.size > 1) {\n        // TODO should we throw exception ?\n        logger.warn(\"XGBoost only supports 1 gpu per worker\")\n      }\n      // take the first one\n      addrs.head.toInt\n    } else {\n      throw new RuntimeException(\"gpu is not allocated by spark, \" +\n        \"please check if gpu scheduling is enabled\")\n    }\n  }\n\n\n  /**\n   * Train a XGBoost Boost on the dataset in the Watches\n   *\n   * @param watches       holds the dataset to be trained\n   * @param runtimeParams XGBoost runtime parameters\n   * @param xgboostParams XGBoost library parameters\n   * @return a booster and the metrics\n   */\n  private def trainBooster(watches: Watches,\n                           runtimeParams: RuntimeParams,\n                           xgboostParams: Map[String, Any]\n                          ): (Booster, Array[Array[Float]]) = {\n\n    val numEarlyStoppingRounds = runtimeParams.earlyStoppingRounds\n    val metrics = Array.tabulate(watches.size)(_ =>\n      Array.ofDim[Float](runtimeParams.numRounds))\n\n    val booster = SXGBoost.train(watches.toMap(\"train\"), xgboostParams, runtimeParams.numRounds,\n      watches.toMap, metrics, runtimeParams.obj.orNull,\n      runtimeParams.eval.orNull, earlyStoppingRound = numEarlyStoppingRounds)\n    (booster, metrics)\n  }\n\n  /**\n   * Sets the CUDA device for current process.\n   *\n   * Note: Process exclusive mode is not required because we rely on Spark's resource\n   * scheduler to properly assign GPU resources and prevent multiple executors from\n   * using the same GPU simultaneously.\n   *\n   * @param addr The GPU device address/ID to set and acquire\n   * @return The same GPU device address that was passed in\n   */\n  private def setGpuDeviceAndAcquire(addr: Int): Int = {\n    XGBoostJNI.CudaSetDevice(addr.toInt)\n    addr\n  }\n\n  /**\n   * Train a XGBoost booster with parameters on the dataset\n   *\n   * @param input         the input dataset for training\n   * @param runtimeParams the runtime parameters for jvm\n   * @param xgboostParams the xgboost parameters to pass to xgboost library\n   * @return the booster and the metrics\n   */\n  def train(input: RDD[Watches],\n            runtimeParams: RuntimeParams,\n            xgboostParams: Map[String, Any]): (Booster, Map[String, Array[Float]]) = {\n\n    val sc = input.sparkContext\n    logger.info(s\"Running XGBoost ${spark.VERSION} with parameters: $xgboostParams\")\n\n    // TODO Rabit tracker exception handling.\n    val trackerConf = runtimeParams.trackerConf\n\n    val tracker = new RabitTracker(runtimeParams.numWorkers,\n      trackerConf.hostIp, trackerConf.port, trackerConf.timeout)\n    require(tracker.start(), \"FAULT: Failed to start tracker\")\n\n    try {\n      val rabitEnv = tracker.getWorkerArgs\n\n      val boostersAndMetrics = input.barrier().mapPartitions { iter =>\n        val partitionId = TaskContext.getPartitionId()\n\n        var params = xgboostParams\n        // Set GPU device ID if possible\n        if (runtimeParams.runOnGpu) {\n          val gpuId = if (runtimeParams.isLocal) {\n            partitionId % runtimeParams.numWorkers\n          } else {\n            getGPUAddrFromResources\n          }\n          logger.info(\"Leveraging gpu device \" + gpuId + \" to train\")\n          setGpuDeviceAndAcquire(gpuId)\n          params = params + (\"device\" -> s\"cuda:$gpuId\")\n        }\n\n        rabitEnv.put(\"DMLC_TASK_ID\", partitionId.toString)\n        try {\n          Communicator.init(rabitEnv)\n          require(iter.hasNext, \"Failed to create DMatrix\")\n\n          withResource(new ConfigContext(runtimeParams.configs.asJava)) { _ =>\n            val watches = iter.next()\n            try {\n              val (booster, metrics) = trainBooster(watches, runtimeParams, params)\n              if (partitionId == 0) {\n                Iterator(booster -> watches.toMap.keys.zip(metrics).toMap)\n              } else {\n                Iterator.empty\n              }\n            } finally {\n              if (watches != null) {\n                watches.delete()\n              }\n            }\n          }\n        } finally {\n          // If shutdown throws exception, then the real exception for\n          // training will be swallowed,\n          try {\n            Communicator.shutdown()\n          } catch {\n            case e: Throwable =>\n              logger.error(\"Communicator.shutdown error: \", e)\n          }\n        }\n      }\n\n      val rdd = tryStageLevelScheduling(sc, runtimeParams, boostersAndMetrics)\n      // The repartition step is to make training stage as ShuffleMapStage, so that when one\n      // of the training task fails the training stage can retry. ResultStage won't retry when\n      // it fails.\n      val (booster, metrics) = rdd.repartition(1).collect()(0)\n      (booster, metrics)\n    } catch {\n      case t: Throwable =>\n        // if the job was aborted due to an exception\n        logger.error(\"XGBoost job was aborted due to \", t)\n        throw t\n    } finally {\n      try {\n        tracker.stop()\n      } catch {\n        case t: Throwable => logger.error(t)\n      }\n    }\n  }\n}\n\nclass Watches private[scala](val datasets: Array[DMatrix],\n                             val names: Array[String],\n                             val cacheDirName: Option[String]) {\n\n  def toMap: Map[String, DMatrix] = {\n    names.zip(datasets).toMap.filter { case (_, matrix) => matrix.rowNum > 0 }\n  }\n\n  def size: Int = toMap.size\n\n  def delete(): Unit = {\n    toMap.values.foreach(_.delete())\n    cacheDirName.foreach { name =>\n      FileUtils.deleteDirectory(new File(name))\n    }\n  }\n\n  override def toString: String = toMap.toString\n}\n\n/**\n * Rabit tracker configurations.\n *\n * @param timeout The number of seconds before timeout waiting for workers to connect. and\n *                for the tracker to shutdown.\n * @param hostIp  The Rabit Tracker host IP address.\n *                This is only needed if the host IP cannot be automatically guessed.\n * @param port    The port number for the tracker to listen to. Use a system allocated one by\n *                default.\n */\nprivate[spark] case class TrackerConf(timeout: Int = 0, hostIp: String = \"\", port: Int = 0)\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifier.scala",
    "content": "/*\n Copyright (c) 2014-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark\n\nimport scala.collection.mutable\n\nimport org.apache.spark.ml.classification.{ProbabilisticClassificationModel, ProbabilisticClassifier}\nimport org.apache.spark.ml.linalg.{Vector, Vectors}\nimport org.apache.spark.ml.param.ParamMap\nimport org.apache.spark.ml.util.{DefaultParamsReadable, Identifiable, MLReadable, MLReader}\nimport org.apache.spark.ml.xgboost.{SparkUtils, XGBProbabilisticClassifierParams}\nimport org.apache.spark.sql.Dataset\nimport org.apache.spark.sql.functions.{col, udf}\nimport org.json4s.DefaultFormats\n\nimport ml.dmlc.xgboost4j.scala.Booster\nimport ml.dmlc.xgboost4j.scala.spark.params.LearningTaskParams.{BINARY_CLASSIFICATION_OBJS, MULTICLASSIFICATION_OBJS}\n\nclass XGBoostClassifier(override val uid: String,\n                        private[spark] val xgboostParams: Map[String, Any])\n  extends ProbabilisticClassifier[Vector, XGBoostClassifier, XGBoostClassificationModel]\n    with XGBoostEstimator[XGBoostClassifier, XGBoostClassificationModel]\n    with XGBProbabilisticClassifierParams[XGBoostClassifier] {\n\n  def this() = this(XGBoostClassifier._uid, Map.empty)\n\n  def this(uid: String) = this(uid, Map.empty)\n\n  def this(xgboostParams: Map[String, Any]) = this(XGBoostClassifier._uid, xgboostParams)\n\n  xgboost2SparkParams(xgboostParams)\n\n  private var numberClasses = 0\n\n  private def validateObjective(dataset: Dataset[_]): Unit = {\n    // If the objective is set explicitly, it must be in BINARY_CLASSIFICATION_OBJS and\n    // MULTICLASSIFICATION_OBJS\n    val obj = if (isSet(objective)) {\n      val tmpObj = getObjective\n      val supportedObjs = BINARY_CLASSIFICATION_OBJS.toSeq ++ MULTICLASSIFICATION_OBJS.toSeq\n      require(supportedObjs.contains(tmpObj),\n        s\"Wrong objective for XGBoostClassifier, supported objs: ${supportedObjs.mkString(\",\")}\")\n      Some(tmpObj)\n    } else {\n      None\n    }\n\n    def inferNumClasses: Int = {\n      var num = getNumClass\n      // Infer num class if num class is not set explicitly.\n      // Note that user sets the num classes explicitly, we're not checking that.\n      if (num == 0) {\n        num = SparkUtils.getNumClasses(dataset, getLabelCol)\n      }\n      require(num > 0)\n      num\n    }\n\n    // objective is set explicitly.\n    if (obj.isDefined) {\n      if (MULTICLASSIFICATION_OBJS.contains(getObjective)) {\n        numberClasses = inferNumClasses\n        setNumClass(numberClasses)\n      } else {\n        numberClasses = 2\n        // binary classification doesn't require num_class be set\n        require(!isSet(numClass), \"num_class is not allowed for binary classification\")\n      }\n    } else {\n      // infer the objective according to the num_class\n      numberClasses = inferNumClasses\n      if (numberClasses <= 2) {\n        setObjective(\"binary:logistic\")\n        logger.warn(\"Inferred for binary classification, set the objective to binary:logistic\")\n        require(!isSet(numClass), \"num_class is not allowed for binary classification\")\n      } else {\n        logger.warn(\"Inferred for multi classification, set the objective to multi:softprob\")\n        setObjective(\"multi:softprob\")\n        setNumClass(numberClasses)\n      }\n    }\n  }\n\n  /**\n   * Validate the parameters before training, throw exception if possible\n   */\n  override protected[spark] def validate(dataset: Dataset[_]): Unit = {\n    super.validate(dataset)\n    validateObjective(dataset)\n  }\n\n  override protected def createModel(booster: Booster, summary: XGBoostTrainingSummary):\n  XGBoostClassificationModel = {\n    new XGBoostClassificationModel(uid, numberClasses, booster, Option(summary))\n  }\n\n}\n\nobject XGBoostClassifier extends DefaultParamsReadable[XGBoostClassifier] {\n  private val _uid = Identifiable.randomUID(\"xgbc\")\n}\n\nclass XGBoostClassificationModel private[ml](\n    val uid: String,\n    val numClasses: Int,\n    val nativeBooster: Booster,\n    val summary: Option[XGBoostTrainingSummary] = None\n) extends ProbabilisticClassificationModel[Vector, XGBoostClassificationModel]\n  with XGBoostModel[XGBoostClassificationModel]\n  with XGBProbabilisticClassifierParams[XGBoostClassificationModel] {\n\n  def this(uid: String) = this(uid, 0, null)\n\n  override protected[spark] def postTransform(dataset: Dataset[_],\n                                              pred: PredictedColumns): Dataset[_] = {\n    var output = super.postTransform(dataset, pred)\n\n    // Always use probability col to get the prediction\n\n    if (isDefinedNonEmpty(predictionCol) && pred.predTmp) {\n      if (getObjective == \"multi:softmax\") {\n        // For objective=multi:softmax scenario, there is no probability predicted from xgboost.\n        // Instead, the probability column will be filled with real prediction\n        val predictUDF = udf { probability: mutable.WrappedArray[Float] =>\n          probability(0)\n        }\n        output = output.withColumn(getPredictionCol, predictUDF(col(TMP_TRANSFORMED_COL)))\n      } else {\n        val predCol = udf { probability: mutable.WrappedArray[Float] =>\n          val prob = probability.map(_.toDouble).toArray\n          val probabilities = if (numClasses == 2) Array(1.0 - prob(0), prob(0)) else prob\n          probability2prediction(Vectors.dense(probabilities))\n        }\n        output = output.withColumn(getPredictionCol, predCol(col(TMP_TRANSFORMED_COL)))\n      }\n    }\n\n    if (isDefinedNonEmpty(probabilityCol) && pred.predTmp) {\n      val probabilityUDF = udf { probability: mutable.WrappedArray[Float] =>\n        val prob = probability.map(_.toDouble).toArray\n        val probabilities = if (numClasses == 2) Array(1.0 - prob(0), prob(0)) else prob\n        Vectors.dense(probabilities)\n      }\n      output = output.withColumn(TMP_TRANSFORMED_COL,\n          probabilityUDF(output.col(TMP_TRANSFORMED_COL)))\n        .withColumnRenamed(TMP_TRANSFORMED_COL, getProbabilityCol)\n    }\n\n    if (pred.predRaw) {\n      val rawPredictionUDF = udf { raw: mutable.WrappedArray[Float] =>\n        val rawF = raw.map(_.toDouble).toArray\n        val rawPredictions = if (numClasses == 2) Array(-rawF(0), rawF(0)) else rawF\n        Vectors.dense(rawPredictions)\n      }\n      output = output.withColumn(getRawPredictionCol,\n        rawPredictionUDF(output.col(getRawPredictionCol)))\n    }\n\n    output.drop(TMP_TRANSFORMED_COL)\n  }\n\n  override def copy(extra: ParamMap): XGBoostClassificationModel = {\n    val newModel = copyValues(new XGBoostClassificationModel(uid, numClasses,\n      nativeBooster, summary), extra)\n    newModel.setParent(parent)\n  }\n\n  override protected def raw2probabilityInPlace(rawPrediction: Vector): Vector = {\n    throw new Exception(\"XGBoost-Spark does not support \\'raw2probabilityInPlace\\'\")\n  }\n\n  override def predictRaw(features: Vector): Vector =\n    throw new Exception(\"XGBoost-Spark does not support \\'predictRaw\\'\")\n\n}\n\nobject XGBoostClassificationModel extends MLReadable[XGBoostClassificationModel] {\n\n  override def read: MLReader[XGBoostClassificationModel] = new ModelReader\n\n  private class ModelReader extends XGBoostModelReader[XGBoostClassificationModel] {\n    override def load(path: String): XGBoostClassificationModel = {\n      val xgbModel = loadBooster(path)\n      val meta = SparkUtils.loadMetadata(path, sc)\n      implicit val format = DefaultFormats\n      val numClasses = (meta.params \\ \"numClass\").extractOpt[Int].getOrElse(2)\n      val model = new XGBoostClassificationModel(meta.uid, numClasses, xgbModel)\n      meta.getAndSetParams(model)\n      model\n    }\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostEstimator.scala",
    "content": "/*\n Copyright (c) 2024-2026 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark\n\nimport java.util.ServiceLoader\n\nimport scala.collection.mutable\nimport scala.collection.mutable.ArrayBuffer\nimport scala.jdk.CollectionConverters._\n\nimport org.apache.commons.logging.LogFactory\nimport org.apache.hadoop.fs.Path\nimport org.apache.spark.ml.{Estimator, Model}\nimport org.apache.spark.ml.functions.array_to_vector\nimport org.apache.spark.ml.linalg.{SparseVector, Vector}\nimport org.apache.spark.ml.param.{Param, ParamMap}\nimport org.apache.spark.ml.util.{DefaultParamsWritable, MLReader, MLWritable, MLWriter}\nimport org.apache.spark.ml.xgboost.{SparkUtils, XGBPredictorParams, XGBProbabilisticClassifierParams}\nimport org.apache.spark.rdd.RDD\nimport org.apache.spark.sql._\nimport org.apache.spark.sql.functions.{array, col, udf}\nimport org.apache.spark.sql.types._\n\nimport ml.dmlc.xgboost4j.{LabeledPoint => XGBLabeledPoint}\nimport ml.dmlc.xgboost4j.java.{Booster => JBooster}\nimport ml.dmlc.xgboost4j.scala.{Booster, DMatrix, XGBoost => SXGBoost}\nimport ml.dmlc.xgboost4j.scala.spark.Utils.MLVectorToXGBLabeledPoint\nimport ml.dmlc.xgboost4j.scala.spark.params._\n\n/**\n * Hold the column index\n */\nprivate[scala] case class ColumnIndices(\n    labelId: Int,\n    featureId: Option[Int], // the feature type is VectorUDT or Array\n    featureIds: Option[Seq[Int]], // the feature type is columnar\n    weightId: Option[Int],\n    marginId: Option[Int],\n    groupId: Option[Int])\n\nprivate[spark] trait NonParamVariables[T <: XGBoostEstimator[T, M], M <: XGBoostModel[M]] {\n\n  private var dataset: Option[Dataset[_]] = None\n\n  def setEvalDataset(ds: Dataset[_]): T = {\n    this.dataset = Some(ds)\n    this.asInstanceOf[T]\n  }\n\n  def getEvalDataset(): Option[Dataset[_]] = {\n    this.dataset\n  }\n}\n\nprivate[spark] object PluginUtils {\n  // Find the XGBoostPlugin by ServiceLoader\n  private val plugin: Option[XGBoostPlugin] = {\n    val classLoader = Option(Thread.currentThread().getContextClassLoader)\n      .getOrElse(getClass.getClassLoader)\n\n    val serviceLoader = ServiceLoader.load(classOf[XGBoostPlugin], classLoader)\n\n    // For now, we only trust GpuXGBoostPlugin.\n    serviceLoader.asScala.filter(x => x.getClass.getName.equals(\n      \"ml.dmlc.xgboost4j.scala.spark.GpuXGBoostPlugin\")).toList match {\n      case Nil => None\n      case head :: Nil =>\n        Some(head)\n      case _ => None\n    }\n  }\n\n  /** Visible for testing */\n  def getPlugin: Option[XGBoostPlugin] = plugin\n\n  def isPluginEnabled(dataset: Dataset[_]): Boolean = {\n    plugin.map(_.isEnabled(dataset)).getOrElse(false)\n  }\n}\n\nprivate[spark] trait XGBoostEstimator[\n  Learner <: XGBoostEstimator[Learner, M], M <: XGBoostModel[M]] extends Estimator[M]\n  with XGBoostParams[Learner] with SparkParams[Learner] with ParamUtils[Learner]\n  with NonParamVariables[Learner, M] with ParamMapConversion with DefaultParamsWritable {\n\n  protected val logger = LogFactory.getLog(\"XGBoostSpark\")\n\n  /**\n   * Cast the field in schema to the desired data type.\n   *\n   * @param dataset    the input dataset\n   * @param name       which column will be casted to float if possible.\n   * @param targetType the targetd data type\n   * @return Dataset\n   */\n  private[spark] def castIfNeeded(schema: StructType,\n                                  name: String,\n                                  targetType: DataType = FloatType): Column = {\n    if (!(schema(name).dataType == targetType)) {\n      val meta = schema(name).metadata\n      col(name).as(name, meta).cast(targetType)\n    } else {\n      col(name)\n    }\n  }\n\n  /**\n   * Repartition the dataset to the numWorkers if needed.\n   *\n   * @param dataset to be repartition\n   * @return the repartitioned dataset\n   */\n  private[spark] def repartitionIfNeeded(dataset: Dataset[_]): Dataset[_] = {\n    val numPartitions = dataset.rdd.getNumPartitions\n    if (getForceRepartition || getNumWorkers != numPartitions) {\n      dataset.repartition(getNumWorkers)\n    } else {\n      dataset\n    }\n  }\n\n  /**\n   * Sort partition for Ranker issue.\n   *\n   * @param dataset\n   * @return\n   */\n  private[spark] def sortPartitionIfNeeded(dataset: Dataset[_]): Dataset[_] = {\n    dataset\n  }\n\n  /**\n   * Build the columns indices.\n   */\n  private[spark] def buildColumnIndices(schema: StructType): ColumnIndices = {\n    // Get feature id(s)\n    val (featureIds: Option[Seq[Int]], featureId: Option[Int]) =\n      if (getFeaturesCols.length != 0) {\n        // Columnars has been converted to array\n        if (schema.names.contains(Utils.TMP_FEATURE_ARRAY_NAME)) {\n          (None, Some(schema.fieldIndex(Utils.TMP_FEATURE_ARRAY_NAME)))\n        } else {\n          (Some(getFeaturesCols.map(schema.fieldIndex).toSeq), None)\n        }\n      } else {\n        (None, Some(schema.fieldIndex(getFeaturesCol)))\n      }\n\n    // function to get the column id according to the parameter\n    def columnId(param: Param[String]): Option[Int] = {\n      if (isDefinedNonEmpty(param)) {\n        Some(schema.fieldIndex($(param)))\n      } else {\n        None\n      }\n    }\n\n    // Special handle for group\n    val groupId: Option[Int] = this match {\n      case p: HasGroupCol => columnId(p.groupCol)\n      case _ => None\n    }\n\n    ColumnIndices(\n      labelId = columnId(labelCol).get,\n      featureId = featureId,\n      featureIds = featureIds,\n      columnId(weightCol),\n      columnId(baseMarginCol),\n      groupId)\n  }\n\n  /**\n   * Preprocess the dataset to meet the xgboost input requirement\n   *\n   * @param dataset\n   * @return\n   */\n  private[spark] def preprocess(dataset: Dataset[_]): (Dataset[_], ColumnIndices) = {\n    val schema = dataset.schema\n    validateFeatureType(schema)\n\n    // Columns to be selected for XGBoost training\n    val selectedCols: ArrayBuffer[Column] = ArrayBuffer.empty\n\n    def selectCol(c: Param[String], targetType: DataType) = {\n      if (isDefinedNonEmpty(c)) {\n          selectedCols.append(castIfNeeded(schema, $(c), targetType))\n      }\n    }\n\n    Seq(labelCol, weightCol, baseMarginCol).foreach(p => selectCol(p, FloatType))\n    this match {\n      case p: HasGroupCol => selectCol(p.groupCol, IntegerType)\n      case _ =>\n    }\n\n    val featureCol = if (isSet(featuresCols)) {\n      // Make columnar to array\n      array(getFeaturesCols.map(col): _*)\n        .cast(ArrayType(FloatType))\n        .alias(Utils.TMP_FEATURE_ARRAY_NAME)\n    } else {\n      if (featureIsArrayType(schema)) {\n        col($(featuresCol)).cast(ArrayType(FloatType))\n      } else {\n        col($(featuresCol))\n      }\n    }\n    selectedCols.append(featureCol)\n\n    val repartitioned = repartitionIfNeeded(dataset.select(selectedCols.toArray: _*))\n    val sorted = sortPartitionIfNeeded(repartitioned)\n    val columnIndices = buildColumnIndices(sorted.schema)\n    (sorted, columnIndices)\n  }\n\n  /** visible for testing */\n  private[spark] def toXGBLabeledPoint(dataset: Dataset[_],\n                                       columnIndexes: ColumnIndices): RDD[XGBLabeledPoint] = {\n    val isSetMissing = isSet(missing)\n    dataset.toDF().rdd.map { row =>\n      val label = row.getFloat(columnIndexes.labelId)\n      val weight = columnIndexes.weightId.map(row.getFloat).getOrElse(1.0f)\n      val baseMargin = columnIndexes.marginId.map(row.getFloat).getOrElse(Float.NaN)\n      val group = columnIndexes.groupId.map(row.getInt).getOrElse(-1)\n\n      val values = row.schema(columnIndexes.featureId.get).dataType match {\n        case ArrayType(_, _) =>\n          // The driver has casted the array(*) to array(float), so it's safe to\n          // specify it as WrappedArray[Float] directly\n          row.getAs[mutable.WrappedArray[Float]](columnIndexes.featureId.get).toArray\n        case other =>\n          if (!SparkUtils.isVectorType(other)) {\n            throw new IllegalArgumentException(\"Feature must be array or vector type\")\n          }\n          val features = row.getAs[Vector](columnIndexes.featureId.get)\n          features match {\n            case _: SparseVector => if (!isSetMissing) {\n              throw new IllegalArgumentException(\"We've detected sparse vectors in the dataset \" +\n                \"that need conversion to dense format. However, we can't assume 0 for missing \" +\n                \"values as it may be meaningful. Please specify the missing value explicitly to\" +\n                \"ensure accurate data representation for analysis.\")\n            }\n            case _ => // DenseVector\n          }\n          // To make \"0\" meaningful, we convert sparse vector if possible to dense.\n          features.toArray.map(_.toFloat)\n      }\n      new XGBLabeledPoint(label, values.length, null, values, weight, group, baseMargin)\n    }\n  }\n\n  /**\n   * Convert the dataframe to RDD, visible to testing\n   *\n   * @param dataset\n   * @param columnsOrder the order of columns including weight/group/base margin ...\n   * @return RDD[Watches]\n   */\n  private[spark] def toRdd(dataset: Dataset[_],\n                           columnIndices: ColumnIndices): RDD[Watches] = {\n    val trainRDD = toXGBLabeledPoint(dataset, columnIndices)\n\n    val featureNames = if (getFeatureNames.isEmpty) None else Some(getFeatureNames)\n    val featureTypes = if (getFeatureTypes.isEmpty) None else Some(getFeatureTypes)\n\n    val missing = getMissing\n\n    // Transform the labeledpoint to get margins/groups and build DMatrix\n    // TODO support basemargin for multiclassification\n    // TODO and optimization, move it into JNI.\n    def buildDMatrix(iter: Iterator[XGBLabeledPoint]) = {\n      val dmatrix = if (columnIndices.marginId.isDefined || columnIndices.groupId.isDefined) {\n        val margins = new mutable.ArrayBuilder.ofFloat\n        val groups = new mutable.ArrayBuilder.ofInt\n        val groupWeights = new mutable.ArrayBuilder.ofFloat\n        var prevGroup = -101010\n        var prevWeight = -1.0f\n        var groupSize = 0\n        val transformedIter = iter.map { labeledPoint =>\n          if (columnIndices.marginId.isDefined) {\n            margins += labeledPoint.baseMargin\n          }\n          if (columnIndices.groupId.isDefined) {\n            if (prevGroup != labeledPoint.group) {\n              // starting with new group\n              if (prevGroup != -101010) {\n                // write the previous group\n                groups += groupSize\n                groupWeights += prevWeight\n              }\n              groupSize = 1\n              prevWeight = labeledPoint.weight\n              prevGroup = labeledPoint.group\n            } else {\n              // for the same group\n              if (prevWeight != labeledPoint.weight) {\n                throw new IllegalArgumentException(\"the instances in the same group have to be\" +\n                  s\" assigned with the same weight (unexpected weight ${labeledPoint.weight}\")\n              }\n              groupSize = groupSize + 1\n            }\n          }\n          labeledPoint\n        }\n        val dm = new DMatrix(transformedIter, null, missing)\n        columnIndices.marginId.foreach(_ => dm.setBaseMargin(margins.result()))\n        if (columnIndices.groupId.isDefined) {\n          if (prevGroup != -101011) {\n            // write the last group\n            groups += groupSize\n            groupWeights += prevWeight\n          }\n          dm.setGroup(groups.result())\n          // The new DMatrix() will set the weights for each instance. But ranking requires\n          // 1 weight for each group, so need to reset the weight.\n          // This is definitely optimized by moving setting group/base margin into JNI.\n          dm.setWeight(groupWeights.result())\n        }\n        dm\n      } else {\n        new DMatrix(iter, null, missing)\n      }\n      featureTypes.foreach(dmatrix.setFeatureTypes)\n      featureNames.foreach(dmatrix.setFeatureNames)\n      dmatrix\n    }\n\n    getEvalDataset().map { eval =>\n      val (evalDf, _) = preprocess(eval)\n      val evalRDD = toXGBLabeledPoint(evalDf, columnIndices)\n      trainRDD.zipPartitions(evalRDD) { (left, right) =>\n        new Iterator[Watches] {\n          override def hasNext: Boolean = left.hasNext\n\n          override def next(): Watches = {\n            val trainDMatrix = buildDMatrix(left)\n            val evalDMatrix = buildDMatrix(right)\n            new Watches(Array(trainDMatrix, evalDMatrix),\n              Array(Utils.TRAIN_NAME, Utils.VALIDATION_NAME), None)\n          }\n        }\n      }\n    }.getOrElse(\n      trainRDD.mapPartitions { iter =>\n        new Iterator[Watches] {\n          override def hasNext: Boolean = iter.hasNext\n\n          override def next(): Watches = {\n            val dm = buildDMatrix(iter)\n            new Watches(Array(dm), Array(Utils.TRAIN_NAME), None)\n          }\n        }\n      }\n    )\n  }\n\n  protected def createModel(booster: Booster, summary: XGBoostTrainingSummary): M\n\n  private[spark] def getRuntimeParameters(isLocal: Boolean,\n      configs: Map[String, AnyRef] = Map.empty): RuntimeParams = {\n    val runOnGpu = if (getDevice != \"cpu\") true else false\n    RuntimeParams(\n      getNumWorkers,\n      getNumRound,\n      TrackerConf(getRabitTrackerTimeout, getRabitTrackerHostIp, getRabitTrackerPort),\n      getNumEarlyStoppingRounds,\n      getDevice,\n      isLocal,\n      runOnGpu,\n      Option(getCustomObj),\n      Option(getCustomEval),\n      configs\n    )\n  }\n\n  /**\n   * Check to see if Spark expects SSL encryption (`spark.ssl.enabled` set to true).\n   * If so, throw an exception unless this safety measure has been explicitly overridden\n   * via conf `xgboost.spark.ignoreSsl`.\n   */\n  private def validateSparkSslConf(spark: SparkSession): Unit = {\n\n    val sparkSslEnabled = spark.conf.getOption(\"spark.ssl.enabled\").getOrElse(\"false\").toBoolean\n    val xgbIgnoreSsl = spark.conf.getOption(\"xgboost.spark.ignoreSsl\").getOrElse(\"false\").toBoolean\n\n    if (sparkSslEnabled) {\n      if (xgbIgnoreSsl) {\n        logger.warn(s\"spark-xgboost is being run without encrypting data in transit!  \" +\n          s\"Spark Conf spark.ssl.enabled=true was overridden with xgboost.spark.ignoreSsl=true.\")\n      } else {\n        throw new Exception(\"xgboost-spark found spark.ssl.enabled=true to encrypt data \" +\n          \"in transit, but xgboost-spark sends non-encrypted data over the wire for efficiency. \" +\n          \"To override this protection and still use xgboost-spark at your own risk, \" +\n          \"you can set the SparkSession conf to use xgboost.spark.ignoreSsl=true.\")\n      }\n    }\n  }\n\n  /**\n   * Validate the parameters before training, throw exception if possible\n   */\n  protected[spark] def validate(dataset: Dataset[_]): Unit = {\n    validateSparkSslConf(dataset.sparkSession)\n    val schema = dataset.schema\n    SparkUtils.checkNumericType(schema, $(labelCol))\n    if (isDefinedNonEmpty(weightCol)) {\n      SparkUtils.checkNumericType(schema, $(weightCol))\n    }\n\n    if (isDefinedNonEmpty(baseMarginCol)) {\n      SparkUtils.checkNumericType(schema, $(baseMarginCol))\n    }\n\n    if (isDefined(useExternalMemory) && getUseExternalMemory) {\n      require(getDevice == \"cuda\" || getDevice == \"gpu\",\n        \"The `useExternalMemory` is only supported for GPU at the moment.\")\n    }\n\n    val taskCpus = dataset.sparkSession.sparkContext.getConf.getInt(\"spark.task.cpus\", 1)\n    if (isDefined(nthread)) {\n      require(getNthread <= taskCpus,\n        s\"the nthread configuration ($getNthread) must be no larger than \" +\n          s\"spark.task.cpus ($taskCpus)\")\n    } else {\n      setNthread(taskCpus)\n    }\n  }\n\n  protected def train(dataset: Dataset[_]): M = {\n    validate(dataset)\n\n    val (rdd, configs) = if (PluginUtils.isPluginEnabled(dataset)) {\n      PluginUtils.getPlugin.get.buildRddWatches(this, dataset)\n    } else {\n      val (input, columnIndexes) = preprocess(dataset)\n      (toRdd(input, columnIndexes), Map.empty[String, AnyRef])\n    }\n\n    val runtimeParams = getRuntimeParameters(dataset.sparkSession.sparkContext.isLocal, configs)\n\n    val (booster, metrics) = XGBoost.train(rdd, runtimeParams, getXGBoostParams)\n\n    val summary = XGBoostTrainingSummary(metrics)\n    copyValues(createModel(booster, summary))\n  }\n\n  override def copy(extra: ParamMap): Learner = defaultCopy(extra).asInstanceOf[Learner]\n}\n\n/**\n * Indicate what to be predicted\n *\n * @param predLeaf    predicate leaf\n * @param predContrib predicate contribution\n * @param predRaw     predicate raw\n * @param predTmp     predicate probability for classification, and raw for regression\n */\nprivate[spark] case class PredictedColumns(\n    predLeaf: Boolean,\n    predContrib: Boolean,\n    predRaw: Boolean,\n    predTmp: Boolean)\n\n/**\n * XGBoost base model\n */\nprivate[spark] trait XGBoostModel[M <: XGBoostModel[M]] extends Model[M] with MLWritable\n  with XGBoostParams[M] with SparkParams[M] with ParamUtils[M] {\n\n  protected val TMP_TRANSFORMED_COL = \"_tmp_xgb_transformed_col\"\n\n  override def copy(extra: ParamMap): M = defaultCopy(extra).asInstanceOf[M]\n\n  /**\n   * Get the native XGBoost Booster\n   *\n   * @return\n   */\n  def nativeBooster: Booster\n\n  def summary: Option[XGBoostTrainingSummary]\n\n  protected[spark] def postTransform(dataset: Dataset[_], pred: PredictedColumns): Dataset[_] = {\n    var output = dataset\n    // Convert leaf/contrib to the vector from array\n    if (pred.predLeaf) {\n      output = output.withColumn(getLeafPredictionCol,\n        array_to_vector(output.col(getLeafPredictionCol)))\n    }\n\n    if (pred.predContrib) {\n      output = output.withColumn(getContribPredictionCol,\n        array_to_vector(output.col(getContribPredictionCol)))\n    }\n    output\n  }\n\n  /**\n   * Preprocess the schema before transforming.\n   *\n   * @return the transformed schema and the\n   */\n  private[spark] def preprocess(dataset: Dataset[_]): (StructType, PredictedColumns) = {\n    // Be careful about the order of columns\n    var schema = dataset.schema\n\n    /** If the parameter is defined, add it to schema and turn true */\n    def addToSchema(param: Param[String], colName: Option[String] = None): Boolean = {\n      if (isDefinedNonEmpty(param)) {\n        val name = colName.getOrElse($(param))\n        schema = schema.add(StructField(name, ArrayType(FloatType)))\n        true\n      } else {\n        false\n      }\n    }\n\n    val predLeaf = addToSchema(leafPredictionCol)\n    val predContrib = addToSchema(contribPredictionCol)\n\n    var predRaw = false\n    // For classification case, the transformed col is probability,\n    // while for others, it's the prediction value.\n    var predTmp = false\n    this match {\n      case p: XGBProbabilisticClassifierParams[_] => // classification case\n        predRaw = addToSchema(p.rawPredictionCol)\n        predTmp = addToSchema(p.probabilityCol, Some(TMP_TRANSFORMED_COL))\n\n        if (isDefinedNonEmpty(predictionCol)) {\n          // Let's use transformed col to calculate the prediction\n          if (!predTmp) {\n            // Add the transformed col for prediction\n            schema = schema.add(\n              StructField(TMP_TRANSFORMED_COL, ArrayType(FloatType)))\n            predTmp = true\n          }\n        }\n      case _ =>\n        // Rename TMP_TRANSFORMED_COL to prediction in the postTransform.\n        predTmp = addToSchema(predictionCol, Some(TMP_TRANSFORMED_COL))\n    }\n    (schema, PredictedColumns(predLeaf, predContrib, predRaw, predTmp))\n  }\n\n  /** Predict */\n  private[spark] def predictInternal(booster: Booster, dm: DMatrix, pred: PredictedColumns,\n                                     originalRowIter: Iterator[Row]): Iterator[Row] = {\n    val tmpIters: ArrayBuffer[Iterator[Row]] = ArrayBuffer.empty\n    if (pred.predLeaf) {\n      tmpIters += booster.predictLeaf(dm).map(Row(_)).iterator\n    }\n    if (pred.predContrib) {\n      tmpIters += booster.predictContrib(dm).map(Row(_)).iterator\n    }\n    if (pred.predRaw) {\n      tmpIters += booster.predict(dm, outPutMargin = true).map(Row(_)).iterator\n    }\n    if (pred.predTmp) {\n      tmpIters += booster.predict(dm, outPutMargin = false).map(Row(_)).iterator\n    }\n\n    // This is not so efficient considering that toSeq from first iterators will be called\n    // many times.\n    //    tmpIters.foldLeft(originalRowIter) { case (accIter, nextIter) =>\n    //      // Zip the accumulated iterator with the next iterator\n    //      accIter.zip(nextIter).map { case (a: Row, b: Row) =>\n    //        Row.fromSeq(a.toSeq ++ b.toSeq)\n    //      }\n    //    }\n\n    tmpIters.size match {\n      case 4 =>\n        originalRowIter.zip(tmpIters(0)).zip(tmpIters(1)).zip(tmpIters(2)).zip(tmpIters(3)).map {\n          case ((((a: Row, b: Row), c: Row), d: Row), e: Row) =>\n            Row.fromSeq(a.toSeq ++ b.toSeq ++ c.toSeq ++ d.toSeq ++ e.toSeq)\n        }\n      case 3 =>\n        originalRowIter.zip(tmpIters(0)).zip(tmpIters(1)).zip(tmpIters(2)).map {\n          case (((a: Row, b: Row), c: Row), d: Row) =>\n            Row.fromSeq(a.toSeq ++ b.toSeq ++ c.toSeq ++ d.toSeq)\n        }\n      case 2 =>\n        originalRowIter.zip(tmpIters(0)).zip(tmpIters(1)).map {\n          case ((a: Row, b: Row), c: Row) =>\n            Row.fromSeq(a.toSeq ++ b.toSeq ++ c.toSeq)\n        }\n      case 1 =>\n        originalRowIter.zip(tmpIters(0)).map {\n          case (a: Row, b: Row) =>\n            Row.fromSeq(a.toSeq ++ b.toSeq)\n        }\n      case 0 => originalRowIter\n      case _ => throw new RuntimeException(\"Unexpected array size\") // never reach here\n    }\n  }\n\n  override def transform(dataset: Dataset[_]): DataFrame = {\n    if (PluginUtils.isPluginEnabled(dataset)) {\n      return PluginUtils.getPlugin.get.transform(this, dataset)\n    }\n    val (schema, pred) = preprocess(dataset)\n    // Model could be trained with columnar, and the transform df could be array or vector\n    val (input, featureName, featureIsArray) = if (isSet(featuresCols) &&\n      getFeaturesCols.length > 0 &&\n      getFeaturesCols.forall(schema.names.contains)) {\n      (dataset.withColumn(Utils.TMP_FEATURE_ARRAY_NAME,\n        array(getFeaturesCols.map(col): _*).cast(ArrayType(FloatType))),\n        Utils.TMP_FEATURE_ARRAY_NAME,\n        true)\n    } else {\n      (dataset, getFeaturesCol, featureIsArrayType(dataset.schema))\n    }\n\n    // Broadcast the booster to each executor.\n    val bBooster = input.sparkSession.sparkContext.broadcast(nativeBooster)\n    val inferBatchSize = getInferBatchSize\n    val missing = getMissing\n\n    // Here, we use RDD instead of DF to avoid different encoders for different\n    // spark versions for the compatibility issue.\n    // 3.5+, Encoders.row(schema)\n    // 3.5-, RowEncoder(schema)\n    val outRDD = input.asInstanceOf[Dataset[Row]].rdd.mapPartitions { rowIter =>\n      rowIter.grouped(inferBatchSize).flatMap { batchRow =>\n        val features = batchRow.iterator.map(row => {\n          if (!featureIsArray) {\n            // Vector type\n            row.getAs[Vector](row.fieldIndex(featureName)).asXGB\n          } else {\n            // Array type\n            val values: Array[Float] = row.get(row.fieldIndex(featureName)) match {\n              case v: mutable.WrappedArray[_] =>\n                v.array match {\n                  case f: Array[java.lang.Float] => f.map(_.toFloat)\n                  case d: Array[java.lang.Double] => d.map(_.toFloat)\n                  case _ => throw new RuntimeException(\"Unsupported feature array type\")\n                }\n              case _ => throw new RuntimeException(\"Unsupported feature type\")\n            }\n            new XGBLabeledPoint(0.0f, values.size, null, values)\n          }\n        })\n        // DMatrix used to prediction\n        val dm = new DMatrix(features, null, missing)\n        try {\n          predictInternal(bBooster.value, dm, pred, batchRow.toIterator)\n        } finally {\n          dm.delete()\n        }\n      }\n    }\n    val output = input.sparkSession.createDataFrame(outRDD, schema)\n      .drop(Utils.TMP_FEATURE_ARRAY_NAME)\n\n    bBooster.unpersist(blocking = false)\n    postTransform(output, pred).toDF()\n  }\n\n  override def write: MLWriter = new XGBoostModelWriter(this)\n\n  protected def predictSingleInstance(features: Vector): Array[Float] = {\n    if (nativeBooster == null) {\n      throw new IllegalArgumentException(\"The model has not been trained\")\n    }\n    val dm = new DMatrix(Iterator(features.asXGB), null, getMissing)\n    nativeBooster.predict(data = dm)(0)\n  }\n}\n\n/**\n * Class to write the model\n *\n * @param instance model to be written\n */\nprivate[spark] class XGBoostModelWriter(instance: XGBoostModel[_]) extends MLWriter {\n\n  override protected def saveImpl(path: String): Unit = {\n    if (Option(instance.nativeBooster).isEmpty) {\n      throw new RuntimeException(\"The XGBoost model has not been trained\")\n    }\n    SparkUtils.saveMetadata(instance, path, sc)\n\n    // Save model data\n    val dataPath = new Path(path, \"data\").toString\n    val internalPath = new Path(dataPath, \"model\")\n    val outputStream = internalPath.getFileSystem(sc.hadoopConfiguration).create(internalPath)\n    val format = optionMap.getOrElse(\"format\", JBooster.DEFAULT_FORMAT)\n    try {\n      instance.nativeBooster.saveModel(outputStream, format)\n    } finally {\n      outputStream.close()\n    }\n  }\n}\n\nprivate[spark] abstract class XGBoostModelReader[M <: XGBoostModel[M]] extends MLReader[M] {\n\n  protected def loadBooster(path: String): Booster = {\n    val dataPath = new Path(path, \"data\").toString\n    val internalPath = new Path(dataPath, \"model\")\n    val dataInStream = internalPath.getFileSystem(sc.hadoopConfiguration).open(internalPath)\n    try {\n      SXGBoost.loadModel(dataInStream)\n    } finally {\n      dataInStream.close()\n    }\n  }\n}\n\n// Trait for Ranker and Regressor Model\nprivate[spark] trait RankerRegressorBaseModel[M <: XGBoostModel[M]] extends XGBoostModel[M]\n  with XGBPredictorParams[M] {\n\n  override protected[spark] def postTransform(dataset: Dataset[_],\n                                              pred: PredictedColumns): Dataset[_] = {\n    var output = super.postTransform(dataset, pred)\n    if (isDefinedNonEmpty(predictionCol) && pred.predTmp) {\n      val predictUDF = udf { (originalPrediction: mutable.WrappedArray[Float]) =>\n        originalPrediction(0).toDouble\n      }\n      output = output\n        .withColumn($(predictionCol), predictUDF(col(TMP_TRANSFORMED_COL)))\n        .drop(TMP_TRANSFORMED_COL)\n    }\n    output\n  }\n\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostPlugin.scala",
    "content": "/*\n Copyright (c) 2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.scala.spark\n\nimport java.io.Serializable\n\nimport org.apache.spark.rdd.RDD\nimport org.apache.spark.sql.{DataFrame, Dataset}\n\ntrait XGBoostPlugin extends Serializable {\n  /**\n   * Whether the plugin is enabled or not, if not enabled, fallback\n   * to the regular CPU pipeline\n   *\n   * @param dataset the input dataset\n   * @return Boolean\n   */\n  def isEnabled(dataset: Dataset[_]): Boolean\n\n  /**\n   * Convert Dataset to RDD[Watches] which will be fed into XGBoost\n   *\n   * @param estimator which estimator to be handled.\n   * @param dataset   to be converted.\n   * @return RDD[Watches]\n   */\n  def buildRddWatches[T <: XGBoostEstimator[T, M], M <: XGBoostModel[M]](\n      estimator: XGBoostEstimator[T, M],\n      dataset: Dataset[_]): (RDD[Watches], Map[String, AnyRef])\n\n  /**\n   * Transform the dataset\n   */\n  def transform[M <: XGBoostModel[M]](model: XGBoostModel[M], dataset: Dataset[_]): DataFrame\n\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRanker.scala",
    "content": "/*\n Copyright (c) 2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark\n\nimport org.apache.spark.ml.{PredictionModel, Predictor}\nimport org.apache.spark.ml.linalg.Vector\nimport org.apache.spark.ml.param.ParamMap\nimport org.apache.spark.ml.util.{DefaultParamsReadable, Identifiable, MLReadable, MLReader}\nimport org.apache.spark.ml.xgboost.SparkUtils\nimport org.apache.spark.sql.Dataset\nimport org.apache.spark.sql.functions.col\nimport org.apache.spark.sql.types.{DataType, DoubleType, StructType}\n\nimport ml.dmlc.xgboost4j.scala.Booster\nimport ml.dmlc.xgboost4j.scala.spark.XGBoostRanker._uid\nimport ml.dmlc.xgboost4j.scala.spark.params.HasGroupCol\nimport ml.dmlc.xgboost4j.scala.spark.params.LearningTaskParams.RANKER_OBJS\n\nclass XGBoostRanker(override val uid: String,\n                    private val xgboostParams: Map[String, Any])\n  extends Predictor[Vector, XGBoostRanker, XGBoostRankerModel]\n    with XGBoostEstimator[XGBoostRanker, XGBoostRankerModel] with HasGroupCol {\n\n  def this() = this(_uid, Map[String, Any]())\n\n  def this(uid: String) = this(uid, Map[String, Any]())\n\n  def this(xgboostParams: Map[String, Any]) = this(_uid, xgboostParams)\n\n  def setGroupCol(value: String): XGBoostRanker = set(groupCol, value)\n\n  xgboost2SparkParams(xgboostParams)\n\n  /**\n   * Validate the parameters before training, throw exception if possible\n   */\n  override protected[spark] def validate(dataset: Dataset[_]): Unit = {\n    super.validate(dataset)\n\n    require(isDefinedNonEmpty(groupCol), \"groupCol needs to be set\")\n\n    // If the objective is set explicitly, it must be in RANKER_OBJS\n    if (isSet(objective)) {\n      val tmpObj = getObjective\n      require(RANKER_OBJS.contains(tmpObj),\n        s\"Wrong objective for XGBoostRanker, supported objs: ${RANKER_OBJS.mkString(\",\")}\")\n    } else {\n      setObjective(\"rank:ndcg\")\n    }\n  }\n\n  /**\n   * Repartition the dataset to the numWorkers if needed.\n   *\n   * @param dataset to be repartition\n   * @return the repartitioned dataset\n   */\n  override private[spark] def repartitionIfNeeded(dataset: Dataset[_]) = {\n    val numPartitions = dataset.rdd.getNumPartitions\n    if (getForceRepartition || getNumWorkers != numPartitions) {\n      // Please note that the output of repartitionByRange is not deterministic\n      dataset.repartitionByRange(getNumWorkers, col(getGroupCol))\n    } else {\n      dataset\n    }\n  }\n\n  /**\n   * Sort partition for Ranker issue.\n   *\n   * @param dataset\n   * @return\n   */\n  override private[spark] def sortPartitionIfNeeded(dataset: Dataset[_]) = {\n    dataset.sortWithinPartitions(getGroupCol)\n  }\n\n  override protected def createModel(\n      booster: Booster,\n      summary: XGBoostTrainingSummary): XGBoostRankerModel = {\n    new XGBoostRankerModel(uid, booster, Option(summary))\n  }\n\n  override protected def validateAndTransformSchema(\n      schema: StructType,\n      fitting: Boolean,\n      featuresDataType: DataType): StructType =\n    SparkUtils.appendColumn(schema, $(predictionCol), DoubleType)\n}\n\nobject XGBoostRanker extends DefaultParamsReadable[XGBoostRanker] {\n  private val _uid = Identifiable.randomUID(\"xgbranker\")\n}\n\nclass XGBoostRankerModel private[ml](val uid: String,\n                                     val nativeBooster: Booster,\n                                     val summary: Option[XGBoostTrainingSummary] = None)\n  extends PredictionModel[Vector, XGBoostRankerModel]\n    with RankerRegressorBaseModel[XGBoostRankerModel] with HasGroupCol {\n\n  def this(uid: String) = this(uid, null)\n\n  def setGroupCol(value: String): XGBoostRankerModel = set(groupCol, value)\n\n  override def copy(extra: ParamMap): XGBoostRankerModel = {\n    val newModel = copyValues(new XGBoostRankerModel(uid, nativeBooster, summary), extra)\n    newModel.setParent(parent)\n  }\n\n  override def predict(features: Vector): Double = {\n    val values = predictSingleInstance(features)\n    values(0)\n  }\n}\n\nobject XGBoostRankerModel extends MLReadable[XGBoostRankerModel] {\n  override def read: MLReader[XGBoostRankerModel] = new ModelReader\n\n  private class ModelReader extends XGBoostModelReader[XGBoostRankerModel] {\n    override def load(path: String): XGBoostRankerModel = {\n      val xgbModel = loadBooster(path)\n      val meta = SparkUtils.loadMetadata(path, sc)\n      val model = new XGBoostRankerModel(meta.uid, xgbModel, None)\n      meta.getAndSetParams(model)\n      model\n    }\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressor.scala",
    "content": "/*\n Copyright (c) 2014-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark\n\nimport org.apache.spark.ml.{PredictionModel, Predictor}\nimport org.apache.spark.ml.linalg.Vector\nimport org.apache.spark.ml.param.ParamMap\nimport org.apache.spark.ml.util.{DefaultParamsReadable, Identifiable, MLReadable, MLReader}\nimport org.apache.spark.ml.xgboost.SparkUtils\nimport org.apache.spark.sql.Dataset\nimport org.apache.spark.sql.types.{DataType, DoubleType, StructType}\n\nimport ml.dmlc.xgboost4j.scala.Booster\nimport ml.dmlc.xgboost4j.scala.spark.XGBoostRegressor._uid\nimport ml.dmlc.xgboost4j.scala.spark.params.LearningTaskParams.REGRESSION_OBJS\n\nclass XGBoostRegressor(override val uid: String,\n                       private val xgboostParams: Map[String, Any])\n  extends Predictor[Vector, XGBoostRegressor, XGBoostRegressionModel]\n    with XGBoostEstimator[XGBoostRegressor, XGBoostRegressionModel] {\n\n  def this() = this(_uid, Map[String, Any]())\n\n  def this(uid: String) = this(uid, Map[String, Any]())\n\n  def this(xgboostParams: Map[String, Any]) = this(_uid, xgboostParams)\n\n  xgboost2SparkParams(xgboostParams)\n\n  /**\n   * Validate the parameters before training, throw exception if possible\n   */\n  override protected[spark] def validate(dataset: Dataset[_]): Unit = {\n    super.validate(dataset)\n\n    // If the objective is set explicitly, it must be in REGRESSION_OBJS\n    if (isSet(objective)) {\n      val tmpObj = getObjective\n      require(REGRESSION_OBJS.contains(tmpObj),\n        s\"Wrong objective for XGBoostRegressor, supported objs: ${REGRESSION_OBJS.mkString(\",\")}\")\n    }\n  }\n\n  override protected def createModel(\n      booster: Booster,\n      summary: XGBoostTrainingSummary): XGBoostRegressionModel = {\n    new XGBoostRegressionModel(uid, booster, Option(summary))\n  }\n\n  override protected def validateAndTransformSchema(\n      schema: StructType,\n      fitting: Boolean,\n      featuresDataType: DataType): StructType =\n    SparkUtils.appendColumn(schema, $(predictionCol), DoubleType)\n}\n\nobject XGBoostRegressor extends DefaultParamsReadable[XGBoostRegressor] {\n  private val _uid = Identifiable.randomUID(\"xgbr\")\n}\n\nclass XGBoostRegressionModel private[ml](val uid: String,\n                                         val nativeBooster: Booster,\n                                         val summary: Option[XGBoostTrainingSummary] = None)\n  extends PredictionModel[Vector, XGBoostRegressionModel]\n    with RankerRegressorBaseModel[XGBoostRegressionModel] {\n\n  def this(uid: String) = this(uid, null)\n\n  override def copy(extra: ParamMap): XGBoostRegressionModel = {\n    val newModel = copyValues(new XGBoostRegressionModel(uid, nativeBooster, summary), extra)\n    newModel.setParent(parent)\n  }\n\n  override def predict(features: Vector): Double = {\n    val values = predictSingleInstance(features)\n    values(0)\n  }\n}\n\nobject XGBoostRegressionModel extends MLReadable[XGBoostRegressionModel] {\n  override def read: MLReader[XGBoostRegressionModel] = new ModelReader\n\n  private class ModelReader extends XGBoostModelReader[XGBoostRegressionModel] {\n    override def load(path: String): XGBoostRegressionModel = {\n      val xgbModel = loadBooster(path)\n      val meta = SparkUtils.loadMetadata(path, sc)\n      val model = new XGBoostRegressionModel(meta.uid, xgbModel, None)\n      meta.getAndSetParams(model)\n      model\n    }\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostTrainingSummary.scala",
    "content": "/*\n Copyright (c) 2014 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark\n\nclass XGBoostTrainingSummary private(\n    val trainObjectiveHistory: Array[Float],\n    val validationObjectiveHistory: (String, Array[Float])*) extends Serializable {\n\n  override def toString: String = {\n    val train = trainObjectiveHistory.mkString(\",\")\n    val validationObjectiveHistoryString = {\n      validationObjectiveHistory.map {\n        case (name, metrics) =>\n          s\"${name}ObjectiveHistory=${metrics.mkString(\",\")}\"\n      }.mkString(\";\")\n    }\n    s\"XGBoostTrainingSummary(trainObjectiveHistory=$train; $validationObjectiveHistoryString)\"\n  }\n}\n\nprivate[spark] object XGBoostTrainingSummary {\n  def apply(metrics: Map[String, Array[Float]]): XGBoostTrainingSummary = {\n    new XGBoostTrainingSummary(\n      trainObjectiveHistory = metrics(\"train\"),\n      metrics.filter(_._1 != \"train\").toSeq: _*)\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/package.scala",
    "content": "/*\n Copyright (c) 2014 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala\n\nimport java.util.Properties\n\nimport org.apache.spark.SparkException\n\npackage object spark {\n  private def loadVersionInfo(): String = {\n    val versionResourceFile = Thread.currentThread().getContextClassLoader.getResourceAsStream(\n      \"xgboost4j-version.properties\")\n    try {\n      val unknownProp = \"<unknown>\"\n      val props = new Properties()\n      props.load(versionResourceFile)\n      props.getProperty(\"version\", unknownProp)\n    } catch {\n      case e: Exception =>\n        throw new SparkException(\"Error loading properties from xgboost4j-version.properties\", e)\n    } finally {\n      if (versionResourceFile != null) {\n        try {\n          versionResourceFile.close()\n        } catch {\n          case e: Exception =>\n            throw new SparkException(\"Error closing xgboost4j version resource stream\", e)\n        }\n      }\n    }\n  }\n\n  val VERSION: String = loadVersionInfo()\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/CustomParams.scala",
    "content": "/*\n Copyright (c) 2014-2022 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark.params\n\nimport org.apache.spark.ml.param.{Param, ParamPair, Params}\nimport org.json4s.{DefaultFormats, Extraction}\nimport org.json4s.jackson.JsonMethods.{compact, parse, render}\nimport org.json4s.jackson.Serialization\n\nimport ml.dmlc.xgboost4j.scala.{EvalTrait, ObjectiveTrait}\nimport ml.dmlc.xgboost4j.scala.spark.Utils\n\n/**\n * General spark parameter that includes TypeHints for (de)serialization using json4s.\n */\nclass CustomGeneralParam[T: Manifest](parent: Params,\n                                      name: String,\n                                      doc: String) extends Param[T](parent, name, doc) {\n\n  /** Creates a param pair with the given value (for Java). */\n  override def w(value: T): ParamPair[T] = super.w(value)\n\n  override def jsonEncode(value: T): String = {\n    implicit val format = Serialization.formats(Utils.getTypeHintsFromClass(value))\n    compact(render(Extraction.decompose(value)))\n  }\n\n  override def jsonDecode(json: String): T = {\n    jsonDecodeT(json)\n  }\n\n  private def jsonDecodeT[T](jsonString: String)(implicit m: Manifest[T]): T = {\n    val json = parse(jsonString)\n    implicit val formats = DefaultFormats.withHints(Utils.getTypeHintsFromJsonClass(json))\n    json.extract[T]\n  }\n}\n\nclass CustomEvalParam(parent: Params,\n                      name: String,\n                      doc: String) extends CustomGeneralParam[EvalTrait](parent, name, doc)\n\nclass CustomObjParam(parent: Params,\n                     name: String,\n                     doc: String) extends CustomGeneralParam[ObjectiveTrait](parent, name, doc)\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/DartBoosterParams.scala",
    "content": "/*\n Copyright (c) 2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark.params\n\nimport org.apache.spark.ml.param._\n\n/**\n * Dart booster parameters, more details can be found at\n * https://xgboost.readthedocs.io/en/stable/parameter.html#\n * additional-parameters-for-dart-booster-booster-dart\n */\nprivate[spark] trait DartBoosterParams extends Params {\n\n  final val sampleType = new Param[String](this, \"sample_type\", \"Type of sampling algorithm, \" +\n    \"options: {'uniform', 'weighted'}\", ParamValidators.inArray(Array(\"uniform\", \"weighted\")))\n\n  final def getSampleType: String = $(sampleType)\n\n  final val normalizeType = new Param[String](this, \"normalize_type\", \"type of normalization\" +\n    \" algorithm, options: {'tree', 'forest'}\",\n    ParamValidators.inArray(Array(\"tree\", \"forest\")))\n\n  final def getNormalizeType: String = $(normalizeType)\n\n  final val rateDrop = new DoubleParam(this, \"rate_drop\", \"Dropout rate (a fraction of previous \" +\n    \"trees to drop during the dropout)\",\n    ParamValidators.inRange(0, 1, true, true))\n\n  final def getRateDrop: Double = $(rateDrop)\n\n  final val oneDrop = new BooleanParam(this, \"one_drop\", \"When this flag is enabled, at least \" +\n    \"one tree is always dropped during the dropout (allows Binomial-plus-one or epsilon-dropout \" +\n    \"from the original DART paper)\")\n\n  final def getOneDrop: Boolean = $(oneDrop)\n\n  final val skipDrop = new DoubleParam(this, \"skip_drop\", \"Probability of skipping the dropout \" +\n    \"procedure during a boosting iteration.\\nIf a dropout is skipped, new trees are added \" +\n    \"in the same manner as gbtree.\\nNote that non-zero skip_drop has higher priority than \" +\n    \"rate_drop or one_drop.\",\n    ParamValidators.inRange(0, 1, true, true))\n\n  final def getSkipDrop: Double = $(skipDrop)\n\n  setDefault(sampleType -> \"uniform\", normalizeType -> \"tree\", rateDrop -> 0, skipDrop -> 0)\n\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/GeneralParams.scala",
    "content": "/*\n Copyright (c) 2014-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark.params\n\nimport org.apache.spark.ml.param._\n\n/**\n * General xgboost parameters, more details can be found\n * at https://xgboost.readthedocs.io/en/stable/parameter.html#general-parameters\n */\nprivate[spark] trait GeneralParams extends Params {\n\n  final val booster = new Param[String](this, \"booster\", \"Which booster to use. Can be gbtree, \" +\n    \"gblinear or dart; gbtree and dart use tree based models while gblinear uses linear \" +\n    \"functions.\", ParamValidators.inArray(Array(\"gbtree\", \"dart\")))\n\n  final def getBooster: String = $(booster)\n\n  final val device = new Param[String](this, \"device\", \"Device for XGBoost to run. User can \" +\n    \"set it to one of the following values: {cpu, cuda, gpu}\",\n    ParamValidators.inArray(Array(\"cpu\", \"cuda\", \"gpu\")))\n\n  final def getDevice: String = $(device)\n\n  final val verbosity = new IntParam(this, \"verbosity\", \"Verbosity of printing messages. Valid \" +\n    \"values are 0 (silent), 1 (warning), 2 (info), 3 (debug). Sometimes XGBoost tries to change \" +\n    \"configurations based on heuristics, which is displayed as warning message. If there's \" +\n    \"unexpected behaviour, please try to increase value of verbosity.\",\n    ParamValidators.inRange(0, 3, true, true))\n\n  final def getVerbosity: Int = $(verbosity)\n\n  final val validateParameters = new BooleanParam(this, \"validate_parameters\", \"When set to \" +\n    \"True, XGBoost will perform validation of input parameters to check whether a parameter \" +\n    \"is used or not. A warning is emitted when there's unknown parameter.\")\n\n  final def getValidateParameters: Boolean = $(validateParameters)\n\n  final val nthread = new IntParam(this, \"nthread\", \"Number of threads used by per worker\",\n    ParamValidators.gtEq(0))\n\n  final def getNthread: Int = $(nthread)\n\n  setDefault(booster -> \"gbtree\", device -> \"cpu\", verbosity -> 1, validateParameters -> false,\n    nthread -> 0)\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/LearningTaskParams.scala",
    "content": "/*\n Copyright (c) 2014-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark.params\n\nimport scala.collection.immutable.HashSet\n\nimport org.apache.spark.ml.param._\n\n/**\n * Specify the learning task and the corresponding learning objective.\n * More details can be found at\n * https://xgboost.readthedocs.io/en/stable/parameter.html#learning-task-parameters\n */\nprivate[spark] trait LearningTaskParams extends Params {\n\n  final val objective = new Param[String](this, \"objective\",\n    \"Objective function used for training\",\n    ParamValidators.inArray(LearningTaskParams.SUPPORTED_OBJECTIVES.toArray))\n\n  final def getObjective: String = $(objective)\n\n  final val numClass = new IntParam(this, \"num_class\", \"Number of classes, used by \" +\n    \"multi:softmax and multi:softprob objectives\", ParamValidators.gtEq(0))\n\n  final def getNumClass: Int = $(numClass)\n\n  final val baseScore = new DoubleParam(this, \"base_score\", \"The initial prediction score of \" +\n    \"all instances, global bias. The parameter is automatically estimated for selected \" +\n    \"objectives before training. To disable the estimation, specify a real number argument. \" +\n    \"For sufficient number of iterations, changing this value will not have too much effect.\")\n\n  final def getBaseScore: Double = $(baseScore)\n\n  final val evalMetric = new Param[String](this, \"eval_metric\", \"Evaluation metrics for \" +\n    \"validation data, a default metric will be assigned according to objective (rmse for \" +\n    \"regression, and logloss for classification, mean average precision for rank:map, etc.)\" +\n    \"User can add multiple evaluation metrics. Python users: remember to pass the metrics in \" +\n    \"as list of parameters pairs instead of map, so that latter eval_metric won't override \" +\n    \"previous ones\", ParamValidators.inArray(LearningTaskParams.SUPPORTED_EVAL_METRICS.toArray))\n\n  final def getEvalMetric: String = $(evalMetric)\n\n  final val seed = new LongParam(this, \"seed\", \"Random number seed.\")\n\n  final def getSeed: Long = $(seed)\n\n  final val seedPerIteration = new BooleanParam(this, \"seed_per_iteration\", \"Seed PRNG \" +\n    \"determnisticly via iterator number..\")\n\n  final def getSeedPerIteration: Boolean = $(seedPerIteration)\n\n  // Parameters for Tweedie Regression (objective=reg:tweedie)\n  final val tweedieVariancePower = new DoubleParam(this, \"tweedie_variance_power\", \"Parameter \" +\n    \"that controls the variance of the Tweedie distribution var(y) ~ E(y)^tweedie_variance_power.\",\n    ParamValidators.inRange(1, 2, false, false))\n\n  final def getTweedieVariancePower: Double = $(tweedieVariancePower)\n\n  // Parameter for using Pseudo-Huber (reg:pseudohubererror)\n  final val huberSlope = new DoubleParam(this, \"huber_slope\", \"A parameter used for Pseudo-Huber \" +\n    \"loss to define the (delta) term.\")\n\n  final def getHuberSlope: Double = $(huberSlope)\n\n  // Parameter for using Quantile Loss (reg:quantileerror) TODO\n  // Parameter for using Expectile Loss (reg:expectileerror)\n  final val expectileAlpha = new Param[String](this, \"expectile_alpha\", \"List of expectiles for \" +\n    \"expectile loss.\")\n\n  final def getExpectileAlpha: String = $(expectileAlpha)\n\n  // Parameter for using AFT Survival Loss (survival:aft) and Negative\n  // Log Likelihood of AFT metric (aft-nloglik)\n  final val aftLossDistribution = new Param[String](this, \"aft_loss_distribution\", \"Probability \" +\n    \"Density Function\",\n    ParamValidators.inArray(Array(\"normal\", \"logistic\", \"extreme\")))\n\n  final def getAftLossDistribution: String = $(aftLossDistribution)\n\n  // Parameters for learning to rank (rank:ndcg, rank:map, rank:pairwise)\n  final val lambdarankPairMethod = new Param[String](this, \"lambdarank_pair_method\", \"pairs for \" +\n    \"pair-wise learning\",\n    ParamValidators.inArray(Array(\"mean\", \"topk\")))\n\n  final def getLambdarankPairMethod: String = $(lambdarankPairMethod)\n\n  final val lambdarankNumPairPerSample = new IntParam(this, \"lambdarank_num_pair_per_sample\",\n    \"It specifies the number of pairs sampled for each document when pair method is mean, or\" +\n      \" the truncation level for queries when the pair method is topk. For example, to train \" +\n      \"with ndcg@6, set lambdarank_num_pair_per_sample to 6 and lambdarank_pair_method to topk\",\n    ParamValidators.gtEq(1))\n\n  final def getLambdarankNumPairPerSample: Int = $(lambdarankNumPairPerSample)\n\n  final val lambdarankUnbiased = new BooleanParam(this, \"lambdarank_unbiased\", \"Specify \" +\n    \"whether do we need to debias input click data.\")\n\n  final def getLambdarankUnbiased: Boolean = $(lambdarankUnbiased)\n\n  final val lambdarankBiasNorm = new DoubleParam(this, \"lambdarank_bias_norm\", \"Lp \" +\n    \"normalization for position debiasing, default is L2. Only relevant when \" +\n    \"lambdarankUnbiased is set to true.\")\n\n  final def getLambdarankBiasNorm: Double = $(lambdarankBiasNorm)\n\n  final val ndcgExpGain = new BooleanParam(this, \"ndcg_exp_gain\", \"Whether we should \" +\n    \"use exponential gain function for NDCG.\")\n\n  final def getNdcgExpGain: Boolean = $(ndcgExpGain)\n\n  setDefault(objective -> \"reg:squarederror\", numClass -> 0, seed -> 0, seedPerIteration -> false,\n    tweedieVariancePower -> 1.5, huberSlope -> 1, lambdarankPairMethod -> \"mean\",\n    lambdarankUnbiased -> false, lambdarankBiasNorm -> 2, ndcgExpGain -> true)\n}\n\nprivate[spark] object LearningTaskParams {\n  val SUPPORTED_OBJECTIVES = HashSet(\"reg:squarederror\", \"reg:squaredlogerror\", \"reg:logistic\",\n    \"reg:pseudohubererror\", \"reg:absoluteerror\", \"reg:quantileerror\", \"reg:expectileerror\",\n    \"binary:logistic\",\n    \"binary:logitraw\", \"binary:hinge\", \"count:poisson\", \"survival:cox\", \"survival:aft\",\n    \"multi:softmax\", \"multi:softprob\", \"rank:ndcg\", \"rank:map\", \"rank:pairwise\", \"reg:gamma\",\n    \"reg:tweedie\")\n\n  val BINARY_CLASSIFICATION_OBJS = HashSet(\"binary:logistic\", \"binary:hinge\", \"binary:logitraw\")\n  val MULTICLASSIFICATION_OBJS = HashSet(\"multi:softmax\", \"multi:softprob\")\n  val RANKER_OBJS = HashSet(\"rank:ndcg\", \"rank:map\", \"rank:pairwise\")\n  val REGRESSION_OBJS = SUPPORTED_OBJECTIVES -- BINARY_CLASSIFICATION_OBJS --\n    MULTICLASSIFICATION_OBJS -- RANKER_OBJS\n\n  val SUPPORTED_EVAL_METRICS = HashSet(\"rmse\", \"rmsle\", \"mae\", \"mape\", \"mphe\", \"logloss\", \"error\",\n    \"error@t\", \"merror\", \"mlogloss\", \"auc\", \"aucpr\", \"pre\", \"ndcg\", \"map\", \"ndcg@n\", \"map@n\",\n    \"pre@n\", \"ndcg-\", \"map-\", \"ndcg@n-\", \"map@n-\", \"poisson-nloglik\", \"gamma-nloglik\",\n    \"cox-nloglik\", \"gamma-deviance\", \"tweedie-nloglik\", \"aft-nloglik\", \"expectile\",\n    \"interval-regression-accuracy\")\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/ParamMapConversion.scala",
    "content": "/*\n Copyright (c) 2014-2022 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark.params\n\nimport scala.collection.mutable\n\nimport com.google.common.base.CaseFormat\nimport org.apache.spark.ml.param._\n\nprivate[spark] trait ParamMapConversion extends NonXGBoostParams {\n\n  /**\n   * Convert XGBoost parameters to Spark Parameters\n   *\n   * @param xgboostParams XGBoost style parameters\n   */\n  def xgboost2SparkParams(xgboostParams: Map[String, Any]): Unit = {\n    for ((paramName, paramValue) <- xgboostParams) {\n      val lowerCamelName = CaseFormat.LOWER_UNDERSCORE.to(CaseFormat.LOWER_CAMEL, paramName)\n      val lowerName = CaseFormat.LOWER_CAMEL.to(CaseFormat.LOWER_UNDERSCORE, paramName)\n      val qualifiedNames = mutable.Set(paramName, lowerName, lowerCamelName)\n      params.find(p => qualifiedNames.contains(p.name)) foreach {\n        case p: DoubleParam =>\n          set(p.name, paramValue.toString.toDouble)\n        case p: BooleanParam =>\n          set(p.name, paramValue.toString.toBoolean)\n        case p: IntParam =>\n          set(p.name, paramValue.toString.toInt)\n        case p: FloatParam =>\n          set(p.name, paramValue.toString.toFloat)\n        case p: LongParam =>\n          set(p.name, paramValue.toString.toLong)\n        case p: Param[_] =>\n          set(p.name, paramValue)\n      }\n    }\n  }\n\n  /**\n   * Convert the user-supplied parameters to the XGBoost parameters.\n   *\n   * Note that this doesn't contain jvm-specific parameters.\n   */\n  def getXGBoostParams: Map[String, Any] = {\n    val xgboostParams = new mutable.HashMap[String, Any]()\n\n    // Only pass user-supplied parameters to xgboost.\n    for (param <- params) {\n      if (isSet(param) && !nonXGBoostParams.contains(param.name)) {\n        xgboostParams += param.name -> $(param)\n      }\n    }\n    xgboostParams.toMap\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/RabitParams.scala",
    "content": "/*\n Copyright (c) 2014-2022 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark.params\n\nimport org.apache.spark.ml.param._\n\nprivate[spark] trait RabitParams extends Params with NonXGBoostParams {\n\n  final val rabitTrackerTimeout = new IntParam(this, \"rabitTrackerTimeout\", \"The number of \" +\n    \"seconds before timeout waiting for workers to connect. and for the tracker to shutdown.\",\n    ParamValidators.gtEq(0))\n\n  final def getRabitTrackerTimeout: Int = $(rabitTrackerTimeout)\n\n  final val rabitTrackerHostIp = new Param[String](this, \"rabitTrackerHostIp\", \"The Rabit \" +\n    \"Tracker host IP address. This is only needed if the host IP cannot be automatically \" +\n    \"guessed.\")\n\n  final def getRabitTrackerHostIp: String = $(rabitTrackerHostIp)\n\n  final val rabitTrackerPort = new IntParam(this, \"rabitTrackerPort\", \"The port number for the \" +\n    \"tracker to listen to. Use a system allocated one by default.\",\n    ParamValidators.gtEq(0))\n\n  final def getRabitTrackerPort: Int = $(rabitTrackerPort)\n\n  setDefault(rabitTrackerTimeout -> 0, rabitTrackerHostIp -> \"\", rabitTrackerPort -> 0)\n\n  addNonXGBoostParam(rabitTrackerPort, rabitTrackerHostIp, rabitTrackerPort)\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/TreeBoosterParams.scala",
    "content": "/*\n Copyright (c) 2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark.params\n\nimport scala.collection.immutable.HashSet\n\nimport org.apache.spark.ml.param._\n\n/**\n * TreeBoosterParams defines the XGBoost TreeBooster parameters for Spark\n *\n * The details can be found at\n * https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\n */\nprivate[spark] trait TreeBoosterParams extends Params {\n\n  final val eta = new DoubleParam(this, \"eta\", \"Step size shrinkage used in update to prevents \" +\n    \"overfitting. After each boosting step, we can directly get the weights of new features, \" +\n    \"and eta shrinks the feature weights to make the boosting process more conservative.\",\n    ParamValidators.inRange(0, 1, lowerInclusive = true, upperInclusive = true))\n\n  final def getEta: Double = $(eta)\n\n  final val gamma = new DoubleParam(this, \"gamma\", \"Minimum loss reduction required to make a \" +\n    \"further partition on a leaf node of the tree. The larger gamma is, the more conservative \" +\n    \"the algorithm will be.\",\n    ParamValidators.gtEq(0))\n\n  final def getGamma: Double = $(gamma)\n\n  final val maxDepth = new IntParam(this, \"max_depth\", \"Maximum depth of a tree. Increasing this \" +\n    \"value will make the model more complex and more likely to overfit. 0 indicates no limit \" +\n    \"on depth. Beware that XGBoost aggressively consumes memory when training a deep tree. \" +\n    \"exact tree method requires non-zero value.\",\n    ParamValidators.gtEq(0))\n\n  final def getMaxDepth: Int = $(maxDepth)\n\n  final val minChildWeight = new DoubleParam(this, \"min_child_weight\", \"Minimum sum of instance \" +\n    \"weight (hessian) needed in a child. If the tree partition step results in a leaf node \" +\n    \"with the sum of instance weight less than min_child_weight, then the building process \" +\n    \"will give up further partitioning. In linear regression task, this simply corresponds \" +\n    \"to minimum number of instances needed to be in each node. The larger min_child_weight \" +\n    \"is, the more conservative the algorithm will be.\",\n    ParamValidators.gtEq(0))\n\n  final def getMinChildWeight: Double = $(minChildWeight)\n\n  final val maxDeltaStep = new DoubleParam(this, \"max_delta_step\", \"Maximum delta step we allow \" +\n    \"each leaf output to be. If the value is set to 0, it means there is no constraint. If it \" +\n    \"is set to a positive value, it can help making the update step more conservative. Usually \" +\n    \"this parameter is not needed, but it might help in logistic regression when class is \" +\n    \"extremely imbalanced. Set it to value of 1-10 might help control the update.\",\n    ParamValidators.gtEq(0))\n\n  final def getMaxDeltaStep: Double = $(maxDeltaStep)\n\n  final val subsample = new DoubleParam(this, \"subsample\", \"Subsample ratio of the training \" +\n    \"instances. Setting it to 0.5 means that XGBoost would randomly sample half of the \" +\n    \"training data prior to growing trees. and this will prevent overfitting. Subsampling \" +\n    \"will occur once in every boosting iteration.\",\n    ParamValidators.inRange(0, 1, lowerInclusive = false, upperInclusive = true))\n\n  final def getSubsample: Double = $(subsample)\n\n  final val samplingMethod = new Param[String](this, \"sampling_method\", \"The method to use to \" +\n    \"sample the training instances. The supported sampling methods\" +\n    \"uniform: each training instance has an equal probability of being selected. Typically set \" +\n    \"subsample >= 0.5 for good results.\\n\" +\n    \"gradient_based: the selection probability for each training instance is proportional to \" +\n    \"the regularized absolute value of gradients. subsample may be set to as low as 0.1 \" +\n    \"without loss of model accuracy. Note that this sampling method is only supported when \" +\n    \"tree_method is set to hist and the device is cuda; other tree methods only support \" +\n    \"uniform sampling.\",\n    ParamValidators.inArray(Array(\"uniform\", \"gradient_based\")))\n\n  final def getSamplingMethod: String = $(samplingMethod)\n\n  final val colsampleBytree = new DoubleParam(this, \"colsample_bytree\", \"Subsample ratio of \" +\n    \"columns when constructing each tree. Subsampling occurs once for every tree constructed.\",\n    ParamValidators.inRange(0, 1, lowerInclusive = false, upperInclusive = true))\n\n  final def getColsampleBytree: Double = $(colsampleBytree)\n\n\n  final val colsampleBylevel = new DoubleParam(this, \"colsample_bylevel\", \"Subsample ratio of \" +\n    \"columns for each level. Subsampling occurs once for every new depth level reached in a \" +\n    \"tree. Columns are subsampled from the set of columns chosen for the current tree.\",\n    ParamValidators.inRange(0, 1, lowerInclusive = false, upperInclusive = true))\n\n  final def getColsampleBylevel: Double = $(colsampleBylevel)\n\n\n  final val colsampleBynode = new DoubleParam(this, \"colsample_bynode\", \"Subsample ratio of \" +\n    \"columns for each node (split). Subsampling occurs once every time a new split is \" +\n    \"evaluated. Columns are subsampled from the set of columns chosen for the current level.\",\n    ParamValidators.inRange(0, 1, lowerInclusive = false, upperInclusive = true))\n\n  final def getColsampleBynode: Double = $(colsampleBynode)\n\n\n  /**\n   * L2 regularization term on weights, increase this value will make model more conservative.\n   * [default=1]\n   */\n  final val lambda = new DoubleParam(this, \"lambda\", \"L2 regularization term on weights. \" +\n    \"Increasing this value will make model more conservative.\", ParamValidators.gtEq(0))\n\n  final def getLambda: Double = $(lambda)\n\n  final val alpha = new DoubleParam(this, \"alpha\", \"L1 regularization term on weights. \" +\n    \"Increasing this value will make model more conservative.\", ParamValidators.gtEq(0))\n\n  final def getAlpha: Double = $(alpha)\n\n  final val treeMethod = new Param[String](this, \"tree_method\", \"The tree construction \" +\n    \"algorithm used in XGBoost, options: {'auto', 'exact', 'approx', 'hist', 'gpu_hist'}\",\n    ParamValidators.inArray(BoosterParams.supportedTreeMethods.toArray))\n\n  final def getTreeMethod: String = $(treeMethod)\n\n  final val scalePosWeight = new DoubleParam(this, \"scale_pos_weight\", \"Control the balance of \" +\n    \"positive and negative weights, useful for unbalanced classes. A typical value to consider: \" +\n    \"sum(negative instances) / sum(positive instances)\")\n\n  final def getScalePosWeight: Double = $(scalePosWeight)\n\n  final val updater = new Param[String](this, \"updater\", \"A comma separated string defining the \" +\n    \"sequence of tree updaters to run, providing a modular way to construct and to modify the \" +\n    \"trees. This is an advanced parameter that is usually set automatically, depending on some \" +\n    \"other parameters. However, it could be also set explicitly by a user. \" +\n    \"The following updaters exist:\\n\" +\n    \"grow_colmaker: non-distributed column-based construction of trees.\\n\" +\n    \"grow_histmaker: distributed tree construction with row-based data splitting based on \" +\n    \"global proposal of histogram counting.\\n\" +\n    \"grow_quantile_histmaker: Grow tree using quantized histogram.\\n\" +\n    \"grow_gpu_hist: Enabled when tree_method is set to hist along with device=cuda.\\n\" +\n    \"grow_gpu_approx: Enabled when tree_method is set to approx along with device=cuda.\\n\" +\n    \"sync: synchronizes trees in all distributed nodes.\\n\" +\n    \"refresh: refreshes tree's statistics and or leaf values based on the current data. Note \" +\n    \"that no random subsampling of data rows is performed.\\n\" +\n    \"prune: prunes the splits where loss < min_split_loss (or gamma) and nodes that have depth \" +\n    \"greater than max_depth.\",\n    (value: String) => value.split(\",\").forall(\n      ParamValidators.inArray(BoosterParams.supportedUpdaters.toArray)))\n\n  final def getUpdater: String = $(updater)\n\n  final val refreshLeaf = new BooleanParam(this, \"refresh_leaf\", \"This is a parameter of the \" +\n    \"refresh updater. When this flag is 1, tree leafs as well as tree nodes' stats are updated. \" +\n    \"When it is 0, only node stats are updated.\")\n\n  final def getRefreshLeaf: Boolean = $(refreshLeaf)\n\n  // TODO set updater/refreshLeaf defaul value\n  final val processType = new Param[String](this, \"process_type\", \"A type of boosting process to \" +\n    \"run. options: {default, update}\",\n    ParamValidators.inArray(Array(\"default\", \"update\")))\n\n  final def getProcessType: String = $(processType)\n\n  final val growPolicy = new Param[String](this, \"grow_policy\", \"Controls a way new nodes are \" +\n    \"added to the tree. Currently supported only if tree_method is set to hist or approx. \" +\n    \"Choices: depthwise, lossguide. depthwise: split at nodes closest to the root. \" +\n    \"lossguide: split at nodes with highest loss change.\",\n    ParamValidators.inArray(Array(\"depthwise\", \"lossguide\")))\n\n  final def getGrowPolicy: String = $(growPolicy)\n\n\n  final val maxLeaves = new IntParam(this, \"max_leaves\", \"Maximum number of nodes to be added. \" +\n    \"Not used by exact tree method\", ParamValidators.gtEq(0))\n\n  final def getMaxLeaves: Int = $(maxLeaves)\n\n  final val maxBins = new IntParam(this, \"max_bin\", \"Maximum number of discrete bins to bucket \" +\n    \"continuous features. Increasing this number improves the optimality of splits at the cost \" +\n    \"of higher computation time. Only used if tree_method is set to hist or approx.\",\n    ParamValidators.gt(0))\n\n  final def getMaxBins: Int = $(maxBins)\n\n  final val numParallelTree = new IntParam(this, \"num_parallel_tree\", \"Number of parallel trees \" +\n    \"constructed during each iteration. This option is used to support boosted random forest.\",\n    ParamValidators.gt(0))\n\n  final def getNumParallelTree: Int = $(numParallelTree)\n\n  final val monotoneConstraints = new IntArrayParam(this, \"monotone_constraints\", \"Constraint of \" +\n    \"variable monotonicity.\")\n\n  final def getMonotoneConstraints: Array[Int] = $(monotoneConstraints)\n\n  final val interactionConstraints = new Param[String](this,\n    name = \"interaction_constraints\",\n    doc = \"Constraints for interaction representing permitted interactions. The constraints\" +\n      \" must be specified in the form of a nest list, e.g. [[0, 1], [2, 3, 4]],\" +\n      \" where each inner list is a group of indices of features that are allowed to interact\" +\n      \" with each other. See tutorial for more information\")\n\n  final def getInteractionConstraints: String = $(interactionConstraints)\n\n\n  final val maxCachedHistNode = new IntParam(this, \"max_cached_hist_node\", \"Maximum number of \" +\n    \"cached nodes for CPU histogram.\",\n    ParamValidators.gt(0))\n\n  final def getMaxCachedHistNode: Int = $(maxCachedHistNode)\n\n  setDefault(eta -> 0.3, gamma -> 0, maxDepth -> 6, minChildWeight -> 1, maxDeltaStep -> 0,\n    subsample -> 1, samplingMethod -> \"uniform\", colsampleBytree -> 1, colsampleBylevel -> 1,\n    colsampleBynode -> 1, lambda -> 1, alpha -> 0, treeMethod -> \"auto\", scalePosWeight -> 1,\n    processType -> \"default\", growPolicy -> \"depthwise\", maxLeaves -> 0, maxBins -> 256,\n    numParallelTree -> 1, maxCachedHistNode -> 65536)\n\n}\n\nprivate[spark] object BoosterParams {\n\n  val supportedTreeMethods = HashSet(\"auto\", \"exact\", \"approx\", \"hist\")\n\n  val supportedUpdaters = HashSet(\"grow_colmaker\", \"grow_histmaker\", \"grow_quantile_histmaker\",\n    \"grow_gpu_hist\", \"grow_gpu_approx\", \"sync\", \"refresh\", \"prune\")\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/XGBoostParams.scala",
    "content": "/*\n Copyright (c) 2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark.params\n\nimport scala.collection.mutable.ArrayBuffer\n\nimport org.apache.spark.ml.param._\nimport org.apache.spark.ml.param.shared._\nimport org.apache.spark.ml.xgboost.SparkUtils\nimport org.apache.spark.sql.types.{ArrayType, StructType}\n\nimport ml.dmlc.xgboost4j.scala.{EvalTrait, ObjectiveTrait}\n\ntrait HasLeafPredictionCol extends Params {\n  /**\n   * Param for leaf prediction column name.\n   *\n   * @group param\n   */\n  final val leafPredictionCol: Param[String] = new Param[String](this, \"leafPredictionCol\",\n    \"name of the predictLeaf results\")\n\n  /** @group getParam */\n  final def getLeafPredictionCol: String = $(leafPredictionCol)\n}\n\ntrait HasContribPredictionCol extends Params {\n  /**\n   * Param for contribution prediction column name.\n   *\n   * @group param\n   */\n  final val contribPredictionCol: Param[String] = new Param[String](this, \"contribPredictionCol\",\n    \"name of the predictContrib results\")\n\n  /** @group getParam */\n  final def getContribPredictionCol: String = $(contribPredictionCol)\n}\n\ntrait HasBaseMarginCol extends Params {\n\n  /**\n   * Param for initial prediction (aka base margin) column name.\n   *\n   * @group param\n   */\n  final val baseMarginCol: Param[String] = new Param[String](this, \"baseMarginCol\",\n    \"Initial prediction (aka base margin) column name.\")\n\n  /** @group getParam */\n  final def getBaseMarginCol: String = $(baseMarginCol)\n\n}\n\ntrait HasGroupCol extends Params {\n\n  final val groupCol: Param[String] = new Param[String](this, \"groupCol\", \"group column name.\")\n\n  /** @group getParam */\n  final def getGroupCol: String = $(groupCol)\n}\n\n/**\n * Trait for shared param featuresCols.\n */\ntrait HasFeaturesCols extends Params {\n  /**\n   * Param for the names of feature columns.\n   *\n   * @group param\n   */\n  final val featuresCols: StringArrayParam = new StringArrayParam(this, \"featuresCols\",\n    \"An array of feature column names.\")\n\n  /** @group getParam */\n  final def getFeaturesCols: Array[String] = $(featuresCols)\n\n  /** Check if featuresCols is valid */\n  def isFeaturesColsValid: Boolean = {\n    isDefined(featuresCols) && $(featuresCols) != Array.empty\n  }\n}\n\n/**\n * A trait to hold non-xgboost parameters\n */\ntrait NonXGBoostParams extends Params {\n  private val paramNames: ArrayBuffer[String] = ArrayBuffer.empty\n\n  protected def addNonXGBoostParam(ps: Param[_]*): Unit = {\n    ps.foreach(p => paramNames.append(p.name))\n  }\n\n  protected lazy val nonXGBoostParams: Array[String] = paramNames.toSet.toArray\n}\n\n/**\n * XGBoost spark-specific parameters which should not be passed\n * into the xgboost library\n *\n * @tparam T should be the XGBoost estimators or models\n */\nprivate[spark] trait SparkParams[T <: Params] extends HasFeaturesCols with HasFeaturesCol\n  with HasLabelCol with HasBaseMarginCol with HasWeightCol with HasPredictionCol\n  with HasLeafPredictionCol with HasContribPredictionCol\n  with RabitParams with NonXGBoostParams with SchemaValidationTrait {\n\n  final val numWorkers = new IntParam(this, \"numWorkers\", \"Number of workers used to train xgboost\",\n    ParamValidators.gtEq(1))\n\n  final def getNumWorkers: Int = $(numWorkers)\n\n  final val forceRepartition = new BooleanParam(this, \"forceRepartition\", \"If the partition \" +\n    \"is equal to numWorkers, xgboost won't repartition the dataset. Set forceRepartition to \" +\n    \"true to force repartition.\")\n\n  final def getForceRepartition: Boolean = $(forceRepartition)\n\n  final val numRound = new IntParam(this, \"numRound\", \"The number of rounds for boosting\",\n    ParamValidators.gtEq(1))\n\n  final def getNumRound: Int = $(numRound)\n\n  final val numEarlyStoppingRounds = new IntParam(this, \"numEarlyStoppingRounds\", \"Stop training \" +\n    \"Number of rounds of decreasing eval metric to tolerate before stopping training\",\n    ParamValidators.gtEq(0))\n\n  final def getNumEarlyStoppingRounds: Int = $(numEarlyStoppingRounds)\n\n  final val inferBatchSize = new IntParam(this, \"inferBatchSize\", \"batch size in rows \" +\n    \"to be grouped for inference\",\n    ParamValidators.gtEq(1))\n\n  /** @group getParam */\n  final def getInferBatchSize: Int = $(inferBatchSize)\n\n  /**\n   * the value treated as missing. default: Float.NaN\n   */\n  final val missing = new FloatParam(this, \"missing\", \"The value treated as missing\")\n\n  final def getMissing: Float = $(missing)\n\n  final val customObj = new CustomObjParam(this, \"customObj\", \"customized objective function \" +\n    \"provided by user\")\n\n  final def getCustomObj: ObjectiveTrait = $(customObj)\n\n  final val customEval = new CustomEvalParam(this, \"customEval\",\n    \"customized evaluation function provided by user\")\n\n  final def getCustomEval: EvalTrait = $(customEval)\n\n  /** Feature's name, it will be set to DMatrix and Booster, and in the final native json model.\n   * In native code, the parameter name is feature_name.\n   * */\n  final val featureNames = new StringArrayParam(this, \"feature_names\",\n    \"an array of feature names\")\n\n  final def getFeatureNames: Array[String] = $(featureNames)\n\n  /** Feature types, q is numeric and c is categorical.\n   * In native code, the parameter name is feature_type\n   * */\n  final val featureTypes = new StringArrayParam(this, \"feature_types\",\n    \"an array of feature types\")\n\n  final def getFeatureTypes: Array[String] = $(featureTypes)\n\n  final val useExternalMemory = new BooleanParam(this, \"useExternalMemory\", \"Whether to use \" +\n    \"the external memory or not when building QuantileDMatrix. Please note that \" +\n    \"useExternalMemory is useful only when `device` is set to `cuda` or `gpu`. When \" +\n    \"useExternalMemory is enabled, the directory specified by spark.local.dir if set will be \" +\n    \"used to cache the temporary files, if spark.local.dir is not set, the /tmp directory \" +\n    \"will be used.\")\n\n  final def getUseExternalMemory: Boolean = $(useExternalMemory)\n\n  final val maxQuantileBatches = new IntParam(this, \"maxQuantileBatches\", \"Maximum quantile \" +\n    \"batches\")\n\n  final def getMaxQuantileBatches: Int = $(maxQuantileBatches)\n\n  final val minCachePageBytes = new LongParam(this, \"minCachePageBytes\", \"Minimum number of \" +\n    \"bytes for each ellpack page in cache. Only used for in-host\")\n\n  final def getMinCachePageBytes: Long = $(minCachePageBytes)\n\n  final val cacheHostRatio = new FloatParam(this, \"cacheHostRatio\",\n    \"Used by the GPU implementation. For GPU-based inputs, XGBoost can split the cache into \" +\n      \"host and device caches to reduce the data transfer overhead. This parameter specifies \" +\n      \"the size of host cache compared to the size of the entire cache: host / (host + device)\",\n    ParamValidators.inRange(0.0, 1.0))\n\n  final def getCacheHostRatio: Float = $(cacheHostRatio)\n\n  setDefault(numRound -> 100, numWorkers -> 1, inferBatchSize -> (32 << 10),\n    numEarlyStoppingRounds -> 0, forceRepartition -> false, missing -> Float.NaN,\n    featuresCols -> Array.empty, customObj -> null, customEval -> null,\n    featureNames -> Array.empty, featureTypes -> Array.empty, useExternalMemory -> false,\n    maxQuantileBatches -> -1, minCachePageBytes -> -1)\n\n  addNonXGBoostParam(numWorkers, numRound, numEarlyStoppingRounds, inferBatchSize, featuresCol,\n    labelCol, baseMarginCol, weightCol, predictionCol, leafPredictionCol, contribPredictionCol,\n    forceRepartition, featuresCols, customEval, customObj, featureTypes, featureNames)\n\n  def setNumWorkers(value: Int): T = set(numWorkers, value).asInstanceOf[T]\n\n  def setForceRepartition(value: Boolean): T = set(forceRepartition, value).asInstanceOf[T]\n\n  def setNumRound(value: Int): T = set(numRound, value).asInstanceOf[T]\n\n  def setNumEarlyStoppingRounds(value: Int): T = set(numEarlyStoppingRounds, value).asInstanceOf[T]\n\n  def setFeaturesCol(value: Array[String]): T = set(featuresCols, value).asInstanceOf[T]\n\n  def setBaseMarginCol(value: String): T = set(baseMarginCol, value).asInstanceOf[T]\n\n  def setWeightCol(value: String): T = set(weightCol, value).asInstanceOf[T]\n\n  def setLeafPredictionCol(value: String): T = set(leafPredictionCol, value).asInstanceOf[T]\n\n  def setContribPredictionCol(value: String): T = set(contribPredictionCol, value).asInstanceOf[T]\n\n  def setInferBatchSize(value: Int): T = set(inferBatchSize, value).asInstanceOf[T]\n\n  def setMissing(value: Float): T = set(missing, value).asInstanceOf[T]\n\n  def setCustomObj(value: ObjectiveTrait): T = set(customObj, value).asInstanceOf[T]\n\n  def setCustomEval(value: EvalTrait): T = set(customEval, value).asInstanceOf[T]\n\n  def setRabitTrackerTimeout(value: Int): T = set(rabitTrackerTimeout, value).asInstanceOf[T]\n\n  def setRabitTrackerHostIp(value: String): T = set(rabitTrackerHostIp, value).asInstanceOf[T]\n\n  def setRabitTrackerPort(value: Int): T = set(rabitTrackerPort, value).asInstanceOf[T]\n\n  def setFeatureNames(value: Array[String]): T = set(featureNames, value).asInstanceOf[T]\n\n  def setFeatureTypes(value: Array[String]): T = set(featureTypes, value).asInstanceOf[T]\n\n  def setUseExternalMemory(value: Boolean): T = set(useExternalMemory, value).asInstanceOf[T]\n\n  def setMaxQuantileBatches(value: Int): T = set(maxQuantileBatches, value).asInstanceOf[T]\n\n  def setMinCachePageBytes(value: Long): T = set(minCachePageBytes, value).asInstanceOf[T]\n\n  def setCacheHostRatio(value: Float): T = set(cacheHostRatio, value)\n    .asInstanceOf[T]\n\n  protected[spark] def featureIsArrayType(schema: StructType): Boolean =\n    schema(getFeaturesCol).dataType.isInstanceOf[ArrayType]\n\n  protected[spark] def validateFeatureType(schema: StructType): Unit = {\n    // If featuresCols is not set, need to check featuresCol which must be Vector or Array\n    if (!isSet(featuresCols)) {\n      // Features cols must be Vector or Array.\n      val featureDataType = schema(getFeaturesCol).dataType\n\n      // Features column must be either ArrayType or VectorType.\n      if (!featureDataType.isInstanceOf[ArrayType] && !SparkUtils.isVectorType(featureDataType)) {\n        throw new IllegalArgumentException(\"Feature type must be either ArrayType or VectorType\")\n      }\n    } else {\n      // To check columns must be numeric type\n      require(getFeaturesCols.length > 0)\n      for (c <- getFeaturesCols) {\n        SparkUtils.checkNumericType(schema, c)\n      }\n    }\n  }\n}\n\nprivate[spark] trait SchemaValidationTrait {\n\n  def validateAndTransformSchema(schema: StructType,\n                                 fitting: Boolean): StructType = schema\n}\n\n/**\n * XGBoost ranking spark-specific parameters\n *\n * @tparam T should be XGBoostRanker or XGBoostRankingModel\n */\nprivate[spark] trait RankerParams[T <: Params] extends HasGroupCol with NonXGBoostParams {\n  def setGroupCol(value: String): T = set(groupCol, value).asInstanceOf[T]\n\n  addNonXGBoostParam(groupCol)\n}\n\n/**\n * XGBoost-specific parameters to pass into xgboost libraray\n *\n * @tparam T should be the XGBoost estimators or models\n */\nprivate[spark] trait XGBoostParams[T <: Params] extends TreeBoosterParams\n  with LearningTaskParams with GeneralParams with DartBoosterParams {\n\n  // Setters for TreeBoosterParams\n  def setEta(value: Double): T = set(eta, value).asInstanceOf[T]\n\n  def setGamma(value: Double): T = set(gamma, value).asInstanceOf[T]\n\n  def setMaxDepth(value: Int): T = set(maxDepth, value).asInstanceOf[T]\n\n  def setMinChildWeight(value: Double): T = set(minChildWeight, value).asInstanceOf[T]\n\n  def setMaxDeltaStep(value: Double): T = set(maxDeltaStep, value).asInstanceOf[T]\n\n  def setSubsample(value: Double): T = set(subsample, value).asInstanceOf[T]\n\n  def setSamplingMethod(value: String): T = set(samplingMethod, value).asInstanceOf[T]\n\n  def setColsampleBytree(value: Double): T = set(colsampleBytree, value).asInstanceOf[T]\n\n  def setColsampleBylevel(value: Double): T = set(colsampleBylevel, value).asInstanceOf[T]\n\n  def setColsampleBynode(value: Double): T = set(colsampleBynode, value).asInstanceOf[T]\n\n  def setLambda(value: Double): T = set(lambda, value).asInstanceOf[T]\n\n  def setAlpha(value: Double): T = set(alpha, value).asInstanceOf[T]\n\n  def setTreeMethod(value: String): T = set(treeMethod, value).asInstanceOf[T]\n\n  def setScalePosWeight(value: Double): T = set(scalePosWeight, value).asInstanceOf[T]\n\n  def setUpdater(value: String): T = set(updater, value).asInstanceOf[T]\n\n  def setRefreshLeaf(value: Boolean): T = set(refreshLeaf, value).asInstanceOf[T]\n\n  def setProcessType(value: String): T = set(processType, value).asInstanceOf[T]\n\n  def setGrowPolicy(value: String): T = set(growPolicy, value).asInstanceOf[T]\n\n  def setMaxLeaves(value: Int): T = set(maxLeaves, value).asInstanceOf[T]\n\n  def setMaxBins(value: Int): T = set(maxBins, value).asInstanceOf[T]\n\n  def setNumParallelTree(value: Int): T = set(numParallelTree, value).asInstanceOf[T]\n\n  def setInteractionConstraints(value: String): T =\n    set(interactionConstraints, value).asInstanceOf[T]\n\n  def setMaxCachedHistNode(value: Int): T = set(maxCachedHistNode, value).asInstanceOf[T]\n\n  // Setters for LearningTaskParams\n\n  def setObjective(value: String): T = set(objective, value).asInstanceOf[T]\n\n  def setNumClass(value: Int): T = set(numClass, value).asInstanceOf[T]\n\n  def setBaseScore(value: Double): T = set(baseScore, value).asInstanceOf[T]\n\n  def setEvalMetric(value: String): T = set(evalMetric, value).asInstanceOf[T]\n\n  def setSeed(value: Long): T = set(seed, value).asInstanceOf[T]\n\n  def setSeedPerIteration(value: Boolean): T = set(seedPerIteration, value).asInstanceOf[T]\n\n  def setTweedieVariancePower(value: Double): T = set(tweedieVariancePower, value).asInstanceOf[T]\n\n  def setHuberSlope(value: Double): T = set(huberSlope, value).asInstanceOf[T]\n\n  def setAftLossDistribution(value: String): T = set(aftLossDistribution, value).asInstanceOf[T]\n\n  def setLambdarankPairMethod(value: String): T = set(lambdarankPairMethod, value).asInstanceOf[T]\n\n  def setLambdarankNumPairPerSample(value: Int): T =\n    set(lambdarankNumPairPerSample, value).asInstanceOf[T]\n\n  def setLambdarankUnbiased(value: Boolean): T = set(lambdarankUnbiased, value).asInstanceOf[T]\n\n  def setLambdarankBiasNorm(value: Double): T = set(lambdarankBiasNorm, value).asInstanceOf[T]\n\n  def setNdcgExpGain(value: Boolean): T = set(ndcgExpGain, value).asInstanceOf[T]\n\n  // Setters for Dart\n  def setSampleType(value: String): T = set(sampleType, value).asInstanceOf[T]\n\n  def setNormalizeType(value: String): T = set(normalizeType, value).asInstanceOf[T]\n\n  def setRateDrop(value: Double): T = set(rateDrop, value).asInstanceOf[T]\n\n  def setOneDrop(value: Boolean): T = set(oneDrop, value).asInstanceOf[T]\n\n  def setSkipDrop(value: Double): T = set(skipDrop, value).asInstanceOf[T]\n\n  // Setters for GeneralParams\n  def setBooster(value: String): T = set(booster, value).asInstanceOf[T]\n\n  def setDevice(value: String): T = set(device, value).asInstanceOf[T]\n\n  def setVerbosity(value: Int): T = set(verbosity, value).asInstanceOf[T]\n\n  def setValidateParameters(value: Boolean): T = set(validateParameters, value).asInstanceOf[T]\n\n  def setNthread(value: Int): T = set(nthread, value).asInstanceOf[T]\n}\n\nprivate[spark] trait ParamUtils[T <: Params] extends Params {\n\n  def isDefinedNonEmpty(param: Param[String]): Boolean = {\n    isDefined(param) && $(param).nonEmpty\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/main/scala/org/apache/spark/ml/xgboost/SparkUtils.scala",
    "content": "/*\n Copyright (c) 2024-2026 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage org.apache.spark.ml.xgboost\n\nimport org.apache.spark.{SparkContext, SparkException}\nimport org.apache.spark.ml.PredictorParams\nimport org.apache.spark.ml.classification.ProbabilisticClassifierParams\nimport org.apache.spark.ml.linalg.VectorUDT\nimport org.apache.spark.ml.param.Params\nimport org.apache.spark.ml.util.{DefaultParamsReader, DefaultParamsWriter, MetadataUtils, SchemaUtils}\nimport org.apache.spark.ml.util.DefaultParamsReader.Metadata\nimport org.apache.spark.sql.{Column, Dataset, Row}\nimport org.apache.spark.sql.functions._\nimport org.apache.spark.sql.types.{DataType, DoubleType, IntegerType, StructType}\nimport org.json4s.{JObject, JValue}\n\nimport ml.dmlc.xgboost4j.scala.spark.params.NonXGBoostParams\n\n/**\n * XGBoost classification spark-specific parameters which should not be passed\n * into the xgboost library\n *\n * @tparam T should be XGBoostClassifier or XGBoostClassificationModel\n */\ntrait XGBProbabilisticClassifierParams[T <: Params]\n  extends ProbabilisticClassifierParams with NonXGBoostParams {\n\n  /**\n   * XGBoost doesn't use validateAndTransformSchema since spark validateAndTransformSchema\n   * needs to ensure the feature is vector type\n   */\n  override protected def validateAndTransformSchema(\n      schema: StructType,\n      fitting: Boolean,\n      featuresDataType: DataType): StructType = {\n    var outputSchema = SparkUtils.appendColumn(schema, $(predictionCol), DoubleType)\n    outputSchema = SparkUtils.appendVectorUDTColumn(outputSchema, $(rawPredictionCol))\n    outputSchema = SparkUtils.appendVectorUDTColumn(outputSchema, $(probabilityCol))\n    outputSchema\n  }\n\n  addNonXGBoostParam(rawPredictionCol, probabilityCol, thresholds)\n}\n\ntrait XGBPredictorParams[T <: Params] extends PredictorParams {\n\n  /**\n   * XGBoost doesn't use validateAndTransformSchema since spark validateAndTransformSchema\n   * needs to ensure the feature is vector type\n   */\n  override protected def validateAndTransformSchema(schema: StructType,\n                                                    fitting: Boolean,\n                                                    featuresDataType: DataType): StructType = {\n    SparkUtils.appendColumn(schema, $(predictionCol), DoubleType)\n  }\n\n}\n\n/** Utils to access the spark internal functions */\nobject SparkUtils {\n\n  private def checkClassificationLabels(\n      labelCol: String,\n      numClasses: Option[Int]): Column = {\n    val casted = col(labelCol).cast(DoubleType)\n    numClasses match {\n      case Some(2) =>\n        when(casted.isNull || casted.isNaN, raise_error(lit(\"Labels MUST NOT be Null or NaN\")))\n          .when(casted =!= 0 && casted =!= 1,\n            raise_error(concat(lit(\"Labels MUST be in {0, 1}, but got \"), casted)))\n          .otherwise(casted)\n\n      case _ =>\n        val n = numClasses.getOrElse(Int.MaxValue)\n        require(0 < n && n <= Int.MaxValue)\n        when(casted.isNull || casted.isNaN, raise_error(lit(\"Labels MUST NOT be Null or NaN\")))\n          .when(casted < 0 || casted >= n,\n            raise_error(concat(lit(s\"Labels MUST be in [0, $n), but got \"), casted)))\n          .when(casted =!= casted.cast(IntegerType),\n            raise_error(concat(lit(\"Labels MUST be Integers, but got \"), casted)))\n          .otherwise(casted)\n    }\n  }\n\n  // Copied from DatasetUtils of Spark to compatible with spark below 3.4\n  def getNumClasses(dataset: Dataset[_], labelCol: String, maxNumClasses: Int = 100): Int = {\n    MetadataUtils.getNumClasses(dataset.schema(labelCol)) match {\n      case Some(n: Int) => n\n      case None =>\n        // Get number of classes from dataset itself.\n        val maxLabelRow: Array[Row] = dataset\n          .select(max(checkClassificationLabels(labelCol, Some(maxNumClasses))))\n          .take(1)\n        if (maxLabelRow.isEmpty || maxLabelRow(0).get(0) == null) {\n          throw new SparkException(\"ML algorithm was given empty dataset.\")\n        }\n        val maxDoubleLabel: Double = maxLabelRow.head.getDouble(0)\n        require((maxDoubleLabel + 1).isValidInt, s\"Classifier found max label value =\" +\n          s\" $maxDoubleLabel but requires integers in range [0, ... ${Int.MaxValue})\")\n        val numClasses = maxDoubleLabel.toInt + 1\n        require(numClasses <= maxNumClasses, s\"Classifier inferred $numClasses from label values\" +\n          s\" in column $labelCol, but this exceeded the max numClasses ($maxNumClasses) allowed\" +\n          s\" to be inferred from values.  To avoid this error for labels with > $maxNumClasses\" +\n          s\" classes, specify numClasses explicitly in the metadata; this can be done by applying\" +\n          s\" StringIndexer to the label column.\")\n        numClasses\n    }\n  }\n\n  def checkNumericType(schema: StructType, colName: String, msg: String = \"\"): Unit = {\n    SchemaUtils.checkNumericType(schema, colName, msg)\n  }\n\n  def saveMetadata(instance: Params,\n                   path: String,\n                   sc: SparkContext,\n                   extraMetadata: Option[JObject] = None,\n                   paramMap: Option[JValue] = None): Unit = {\n    DefaultParamsWriter.saveMetadata(instance, path, sc, extraMetadata, paramMap)\n  }\n\n  def loadMetadata(path: String, sc: SparkContext, expectedClassName: String = \"\"): Metadata = {\n    DefaultParamsReader.loadMetadata(path, sc, expectedClassName)\n  }\n\n  def appendColumn(schema: StructType,\n                   colName: String,\n                   dataType: DataType,\n                   nullable: Boolean = false): StructType = {\n    SchemaUtils.appendColumn(schema, colName, dataType, nullable)\n  }\n\n  def appendVectorUDTColumn(schema: StructType,\n                            colName: String,\n                            dataType: DataType = new VectorUDT,\n                            nullable: Boolean = false): StructType = {\n    SchemaUtils.appendColumn(schema, colName, dataType, nullable)\n  }\n\n  def isVectorType(dataType: DataType): Boolean = dataType.isInstanceOf[VectorUDT]\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/test/resources/log4j.properties",
    "content": "log4j.logger.org.apache.spark=ERROR\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/test/resources/model/0.82/model/metadata/_SUCCESS",
    "content": ""
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/test/resources/model/0.82/model/metadata/part-00000",
    "content": "{\"class\":\"ml.dmlc.xgboost4j.scala.spark.XGBoostClassificationModel\",\"timestamp\":1555350539033,\"sparkVersion\":\"2.3.2-uber-109\",\"uid\":\"xgbc_5e7bec215a4c\",\"paramMap\":{\"useExternalMemory\":false,\"trainTestRatio\":1.0,\"alpha\":0.0,\"seed\":0,\"numWorkers\":100,\"skipDrop\":0.0,\"treeLimit\":0,\"silent\":0,\"trackerConf\":{\"workerConnectionTimeout\":0,\"trackerImpl\":\"python\"},\"missing\":\"NaN\",\"colsampleBylevel\":1.0,\"probabilityCol\":\"probability\",\"checkpointPath\":\"\",\"lambda\":1.0,\"rawPredictionCol\":\"rawPrediction\",\"eta\":0.3,\"numEarlyStoppingRounds\":0,\"growPolicy\":\"depthwise\",\"gamma\":0.0,\"sampleType\":\"uniform\",\"maxDepth\":6,\"rateDrop\":0.0,\"objective\":\"reg:linear\",\"customObj\":null,\"lambdaBias\":0.0,\"baseScore\":0.5,\"labelCol\":\"label\",\"minChildWeight\":1.0,\"customEval\":null,\"normalizeType\":\"tree\",\"maxBin\":16,\"nthread\":4,\"numRound\":20,\"colsampleBytree\":1.0,\"predictionCol\":\"prediction\",\"subsample\":1.0,\"timeoutRequestWorkers\":1800000,\"featuresCol\":\"features\",\"evalMetric\":\"error\",\"sketchEps\":0.03,\"scalePosWeight\":1.0,\"checkpointInterval\":-1,\"maxDeltaStep\":0.0,\"treeMethod\":\"approx\"}}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/test/resources/rank.test.txt",
    "content": "0 1:10.0229017899 2:7.30178495562 3:0.118115020017\n0 1:9.93639621859 2:9.93102159291 3:0.0435030004396\n0 1:10.1301737265 2:0.00411765220572 3:2.4165878053\n1 1:9.87828587087 2:0.608588414992 3:0.111262590883\n0 1:10.1373430048 2:0.47764012225 3:0.991553052194\n0 1:10.0523814718 2:4.72152505167 3:0.672978832666\n0 1:10.0449715742 2:8.40373928536 3:0.384457573667\n1 1:996.398498791 2:941.976309154 3:0.230269231292\n0 1:1005.11269468 2:900.093680877 3:0.265031528873\n0 1:997.160349441 2:891.331101688 3:2.19362017313\n0 1:993.754139031 2:44.8000165317 3:1.03868009875\n1 1:994.831299184 2:241.959208453 3:0.667631827024\n0 1:995.948333283 2:7.94326917112 3:0.750490877118\n0 1:989.733981273 2:7.52077625436 3:0.0126335967282\n0 1:1003.54086516 2:6.48177510564 3:1.19441696788\n0 1:996.56177804 2:9.71959812613 3:1.33082465111\n0 1:1005.61382467 2:0.234339369309 3:1.17987797356\n1 1:980.215758708 2:6.85554542926 3:2.63965085259\n1 1:987.776408872 2:2.23354609991 3:0.841885278028\n0 1:1006.54260396 2:8.12142049834 3:2.26639471174\n0 1:1009.87927639 2:6.40028519044 3:0.775155669615\n0 1:9.95006244393 2:928.76896718 3:234.948458244\n1 1:10.0749152258 2:255.294574476 3:62.9728604166\n1 1:10.1916541988 2:312.682867085 3:92.299413677\n0 1:9.95646724484 2:742.263188416 3:53.3310473654\n0 1:9.86211293222 2:996.237023866 3:2.00760301168\n1 1:9.91801019468 2:303.971783709 3:50.3147230679\n0 1:996.983996934 2:9.52188222766 3:1.33588120981\n0 1:995.704388126 2:9.49260524915 3:0.908498516541\n0 1:987.86480767 2:0.0870786716821 3:0.108859297837\n0 1:1000.99561307 2:2.85272694575 3:0.171134518956\n0 1:1011.05508066 2:7.55336771768 3:1.04950084825\n1 1:985.52199365 2:0.763305780608 3:1.7402424375\n0 1:10.0430321467 2:813.185427181 3:4.97728254185\n0 1:10.0812334228 2:258.297288417 3:0.127477670549\n0 1:9.84210504292 2:887.205815261 3:0.991689193955\n1 1:9.94625332613 2:0.298622762132 3:0.147881353231\n0 1:9.97800659954 2:727.619819757 3:0.0718361141866\n1 1:9.8037938472 2:957.385549617 3:0.0618862028941\n0 1:10.0880634741 2:185.024638577 3:1.7028095095\n0 1:9.98630799154 2:109.10631473 3:0.681117359751\n0 1:9.91671416638 2:166.248076588 3:122.538291094\n0 1:10.1206910464 2:88.1539468531 3:141.189859069\n1 1:10.1767160518 2:1.02960996847 3:172.02256237\n0 1:9.93025147233 2:391.196641942 3:58.040338247\n0 1:9.84850936037 2:474.63346537 3:17.5627875397\n1 1:9.8162731343 2:61.9199554213 3:30.6740972851\n0 1:10.0403482984 2:987.50416929 3:73.0472906209\n1 1:997.019228359 2:133.294717663 3:0.0572254083186\n0 1:973.303999107 2:1.79080888849 3:0.100478717048\n0 1:1008.28808825 2:342.282350685 3:0.409806485495\n0 1:1014.55621524 2:0.680510407082 3:0.929530602495\n1 1:1012.74370325 2:823.105266455 3:0.0894693730585\n0 1:1003.63554038 2:727.334432075 3:0.58206275756\n0 1:10.1560432436 2:740.35938307 3:11.6823378533\n0 1:9.83949099701 2:512.828227154 3:138.206666681\n1 1:10.1837395682 2:179.287126088 3:185.479062365\n1 1:9.9761881495 2:12.1093388336 3:9.1264604171\n1 1:9.77402180766 2:318.561317743 3:80.6005221355\n0 1:1011.15705381 2:0.215825852155 3:1.34429667906\n0 1:1005.60353229 2:727.202346126 3:1.47146041005\n1 1:1013.93702961 2:58.7312725205 3:0.421041560754\n0 1:1004.86813074 2:757.693204258 3:0.566055205344\n0 1:999.996324692 2:813.12386828 3:0.864428279513\n0 1:996.55255931 2:918.760056995 3:0.43365051974\n1 1:1004.1394132 2:464.371823646 3:0.312492288321\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/CommunicatorRobustnessSuite.scala",
    "content": "/*\n Copyright (c) 2014-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark\n\nimport org.scalatest.funsuite.AnyFunSuite\n\nimport ml.dmlc.xgboost4j.java.{Communicator, RabitTracker}\n\nclass CommunicatorRobustnessSuite extends AnyFunSuite with PerTest {\n\n  test(\"test Java RabitTracker wrapper's exception handling: it should not hang forever.\") {\n    /*\n      Deliberately create new instances of SparkContext in each unit test to avoid reusing the\n      same thread pool spawned by the local mode of Spark. As these tests simulate worker crashes\n      by throwing exceptions, the crashed worker thread never calls Rabit.shutdown, and therefore\n      corrupts the internal state of the native Rabit C++ code. Calling Rabit.init() in subsequent\n      tests on a reentrant thread will crash the entire Spark application, an undesired side-effect\n      that should be avoided.\n     */\n    val rdd = sc.parallelize(1 to numWorkers, numWorkers).cache()\n\n    val tracker = new RabitTracker(numWorkers)\n    tracker.start()\n    val trackerEnvs = tracker.getWorkerArgs\n\n    val workerCount: Int = numWorkers\n    /*\n       Simulate worker crash events by creating dummy Rabit workers, and throw exceptions in the\n       last created worker. A cascading event chain will be triggered once the RuntimeException is\n       thrown: the thread running the dummy spark job (sparkThread) catches the exception and\n       delegates it to the UnCaughtExceptionHandler, which is the Rabit tracker itself.\n\n       To prevent unit tests from crashing, deterministic delays were introduced to make sure that\n       the exception is thrown at last, ideally after all worker connections have been established.\n     */\n    val dummyTasks = rdd.mapPartitions { iter =>\n      Communicator.init(trackerEnvs)\n      val index = iter.next()\n      Thread.sleep(100 + index * 10)\n      if (index == workerCount) {\n        // kill the worker by throwing an exception\n        throw new RuntimeException(\"Worker exception.\")\n      }\n      Communicator.shutdown()\n      Iterator(index)\n    }.cache()\n\n    val sparkThread = new Thread() {\n      override def run(): Unit = {\n        // forces a Spark job.\n        dummyTasks.foreachPartition(() => _)\n      }\n    }\n\n    sparkThread.setUncaughtExceptionHandler(tracker)\n    sparkThread.start()\n  }\n\n  test(\"Communicator allreduce works.\") {\n    val rdd = sc.parallelize(1 to numWorkers, numWorkers).cache()\n    val tracker = new RabitTracker(numWorkers)\n    tracker.start()\n    val trackerEnvs = tracker.getWorkerArgs\n\n    val workerCount: Int = numWorkers\n\n    rdd.mapPartitions { iter =>\n      val index = iter.next()\n      Communicator.init(trackerEnvs)\n      val a = Array(1.0f, 2.0f, 3.0f)\n      System.out.println(a.mkString(\", \"))\n      val b = Communicator.allReduce(a, Communicator.OpType.SUM)\n      for (i <- 0 to 2) {\n        assert(a(i) * workerCount == b(i))\n      }\n      val c = Communicator.allReduce(a, Communicator.OpType.MIN);\n      for (i <- 0 to 2) {\n        assert(a(i) == c(i))\n      }\n      Communicator.shutdown()\n      Iterator(index)\n    }.collect()\n  }\n\n  test(\"should allow the dataframe containing communicator calls to be partially evaluated for\" +\n    \" multiple times (ISSUE-4406)\") {\n    val paramMap = Map(\n      \"eta\" -> \"1\",\n      \"max_depth\" -> \"6\",\n      \"silent\" -> \"1\",\n      \"objective\" -> \"binary:logistic\")\n    val trainingDF = smallBinaryClassificationVector\n    val model = new XGBoostClassifier(paramMap)\n      .setNumWorkers(numWorkers)\n      .setNumRound(10)\n      .fit(trainingDF)\n    val prediction = model.transform(trainingDF)\n    // a partial evaluation of dataframe will cause rabit initialized but not shutdown in some\n    // threads\n    prediction.show()\n    // a full evaluation here will re-run init and shutdown all rabit proxy\n    // expecting no error\n    prediction.collect()\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/CustomObj.scala",
    "content": "/*\n Copyright (c) 2021 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark\n\nimport scala.collection.mutable.ListBuffer\n\nimport org.apache.commons.logging.LogFactory\n\nimport ml.dmlc.xgboost4j.java.XGBoostError\nimport ml.dmlc.xgboost4j.scala.{DMatrix, ObjectiveTrait}\n\n\n/**\n * loglikelihood loss obj function\n */\nclass CustomObj(val customParameter: Int = 0) extends ObjectiveTrait {\n\n  val logger = LogFactory.getLog(classOf[CustomObj])\n\n  /**\n   * user define objective function, return gradient and second order gradient\n   *\n   * @param predicts untransformed margin predicts\n   * @param dtrain   training data\n   * @return List with two float array, correspond to first order grad and second order grad\n   */\n  override def getGradient(predicts: Array[Array[Float]], dtrain: DMatrix)\n  : List[Array[Float]] = {\n    val nrow = predicts.length\n    val gradients = new ListBuffer[Array[Float]]\n    var labels: Array[Float] = null\n    try {\n      labels = dtrain.getLabel\n    } catch {\n      case e: XGBoostError =>\n        logger.error(e)\n        throw e\n      case e: Throwable => throw e\n    }\n    val grad = new Array[Float](nrow)\n    val hess = new Array[Float](nrow)\n    val transPredicts = transform(predicts)\n\n    for (i <- 0 until nrow) {\n      val predict = transPredicts(i)(0)\n      grad(i) = predict - labels(i)\n      hess(i) = predict * (1 - predict)\n    }\n    gradients += grad\n    gradients += hess\n    gradients.toList\n  }\n\n  /**\n   * simple sigmoid func\n   *\n   * @param input\n   * @return Note: this func is not concern about numerical stability, only used as example\n   */\n  def sigmoid(input: Float): Float = {\n    (1 / (1 + Math.exp(-input))).toFloat\n  }\n\n  def transform(predicts: Array[Array[Float]]): Array[Array[Float]] = {\n    val nrow = predicts.length\n    val transPredicts = Array.fill[Float](nrow, 1)(0)\n    for (i <- 0 until nrow) {\n      transPredicts(i)(0) = sigmoid(predicts(i)(0))\n    }\n    transPredicts\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/EvalError.scala",
    "content": "/*\n Copyright (c) 2014 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark\n\nimport org.apache.commons.logging.LogFactory\n\nimport ml.dmlc.xgboost4j.java.XGBoostError\nimport ml.dmlc.xgboost4j.scala.{DMatrix, EvalTrait}\n\nclass EvalError extends EvalTrait {\n\n  val logger = LogFactory.getLog(classOf[EvalError])\n\n  private[xgboost4j] var evalMetric: String = \"custom_error\"\n\n  /**\n   * get evaluate metric\n   *\n   * @return evalMetric\n   */\n  override def getMetric: String = evalMetric\n\n  /**\n   * evaluate with predicts and data\n   *\n   * @param predicts predictions as array\n   * @param dmat     data matrix to evaluate\n   * @return result of the metric\n   */\n  override def eval(predicts: Array[Array[Float]], dmat: DMatrix): Float = {\n    var error: Float = 0f\n    var labels: Array[Float] = null\n    try {\n      labels = dmat.getLabel\n    } catch {\n      case ex: XGBoostError =>\n        logger.error(ex)\n        return -1f\n    }\n    require(predicts.length == labels.length, s\"predicts length ${predicts.length} has to be\" +\n      s\" equal with label length ${labels.length}\")\n    val nrow: Int = predicts.length\n    for (i <- 0 until nrow) {\n      if (labels(i) == 0.0 && predicts(i)(0) > 0) {\n        error += 1\n      } else if (labels(i) == 1.0 && predicts(i)(0) <= 0) {\n        error += 1\n      }\n    }\n    error / labels.length\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/PerTest.scala",
    "content": "/*\n Copyright (c) 2014-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark\n\nimport java.io.{File, FileInputStream}\n\nimport org.apache.commons.io.IOUtils\nimport org.apache.spark.SparkContext\nimport org.apache.spark.ml.linalg.Vectors\nimport org.apache.spark.sql._\nimport org.scalatest.BeforeAndAfterEach\nimport org.scalatest.funsuite.AnyFunSuite\n\nimport ml.dmlc.xgboost4j.{LabeledPoint => XGBLabeledPoint}\nimport ml.dmlc.xgboost4j.scala.spark.Utils.{withResource, XGBLabeledPointFeatures}\n\ntrait PerTest extends BeforeAndAfterEach {\n  self: AnyFunSuite =>\n\n  protected val numWorkers: Int = 4\n\n  @transient private var currentSession: SparkSession = _\n\n  def ss: SparkSession = getOrCreateSession\n\n  implicit def sc: SparkContext = ss.sparkContext\n\n  protected def sparkSessionBuilder: SparkSession.Builder = SparkSession.builder()\n    .master(s\"local[${numWorkers}]\")\n    .appName(\"XGBoostSuite\")\n    .config(\"spark.ui.enabled\", false)\n    .config(\"spark.driver.memory\", \"512m\")\n    .config(\"spark.barrier.sync.timeout\", 10)\n    .config(\"spark.task.cpus\", 1)\n    .config(\"spark.stage.maxConsecutiveAttempts\", 1)\n\n  override def beforeEach(): Unit = getOrCreateSession\n\n  override def afterEach(): Unit = {\n    if (currentSession != null) {\n      currentSession.stop()\n      cleanExternalCache(currentSession.sparkContext.appName)\n      currentSession = null\n    }\n  }\n\n  private def getOrCreateSession = synchronized {\n    if (currentSession == null) {\n      currentSession = sparkSessionBuilder.getOrCreate()\n      currentSession.sparkContext.setLogLevel(\"ERROR\")\n    }\n    currentSession\n  }\n\n  private def cleanExternalCache(prefix: String): Unit = {\n    val dir = new File(\".\")\n    for (file <- dir.listFiles() if file.getName.startsWith(prefix)) {\n      file.delete()\n    }\n  }\n\n  protected def buildDataFrame(\n      labeledPoints: Seq[XGBLabeledPoint],\n      numPartitions: Int = numWorkers): DataFrame = {\n    val it = labeledPoints.iterator.zipWithIndex\n      .map { case (labeledPoint: XGBLabeledPoint, id: Int) =>\n        (id, labeledPoint.label, labeledPoint.features, labeledPoint.weight)\n      }\n    ss.createDataFrame(sc.parallelize(it.toList, numPartitions))\n      .toDF(\"id\", \"label\", \"features\", \"weight\")\n  }\n\n  protected def buildDataFrameWithGroup(\n      labeledPoints: Seq[XGBLabeledPoint],\n      numPartitions: Int = numWorkers): DataFrame = {\n    val it = labeledPoints.iterator.zipWithIndex\n      .map { case (labeledPoint: XGBLabeledPoint, id: Int) =>\n        (id, labeledPoint.label, labeledPoint.features, labeledPoint.group, labeledPoint.weight)\n      }\n    ss.createDataFrame(sc.parallelize(it.toList, numPartitions))\n      .toDF(\"id\", \"label\", \"features\", \"group\", \"weight\")\n  }\n\n  protected def compareTwoFiles(lhs: String, rhs: String): Boolean = {\n    withResource(new FileInputStream(lhs)) { lfis =>\n      withResource(new FileInputStream(rhs)) { rfis =>\n        IOUtils.contentEquals(lfis, rfis)\n      }\n    }\n  }\n\n  def smallBinaryClassificationColumnar: DataFrame = ss.createDataFrame(sc.parallelize(Seq(\n    (1.0, 2.0, 3.0, 1.0),\n    (0.0, 0.0, 0.0, 0.0),\n    (0.0, 3.0, 0.0, 0.0),\n    (2.0, 0.0, 4.0, 1.0),\n    (0.2, 1.2, 2.0, 0.0),\n    (0.5, 2.2, 1.7, 1.0)\n  ))).toDF(\"c1\", \"c2\", \"c3\", \"label\")\n\n  def smallBinaryClassificationVector: DataFrame = ss.createDataFrame(sc.parallelize(Seq(\n    (1.0, 0.5, 1.0, Vectors.dense(1.0, 2.0, 3.0)),\n    (0.0, 0.4, -3.0, Vectors.dense(0.0, 0.0, 0.0)),\n    (0.0, 0.3, 1.0, Vectors.dense(0.0, 3.0, 0.0)),\n    (1.0, 1.2, 0.2, Vectors.dense(2.0, 0.0, 4.0)),\n    (0.0, -0.5, 0.0, Vectors.dense(0.2, 1.2, 2.0)),\n    (1.0, -0.4, -2.1, Vectors.dense(0.5, 2.2, 1.7))\n  ))).toDF(\"label\", \"margin\", \"weight\", \"features\")\n\n  def smallBinaryClassificationArray: DataFrame = ss.createDataFrame(sc.parallelize(Seq(\n    (1.0, 0.5, 1.0, Seq(1.0, 2.0, 3.0)),\n    (0.0, 0.4, -3.0, Seq(0.0, 0.0, 0.0)),\n    (0.0, 0.3, 1.0, Seq(0.0, 3.0, 0.0)),\n    (1.0, 1.2, 0.2, Seq(2.0, 0.0, 4.0)),\n    (0.0, -0.5, 0.0, Seq(0.2, 1.2, 2.0)),\n    (1.0, -0.4, -2.1, Seq(0.5, 2.2, 1.7))\n  ))).toDF(\"label\", \"margin\", \"weight\", \"features\")\n\n  def smallMultiClassificationVector: DataFrame = ss.createDataFrame(sc.parallelize(Seq(\n    (1.0, 0.5, 1.0, Vectors.dense(1.0, 2.0, 3.0)),\n    (0.0, 0.4, -3.0, Vectors.dense(0.0, 0.0, 0.0)),\n    (2.0, 0.3, 1.0, Vectors.dense(0.0, 3.0, 0.0)),\n    (1.0, 1.2, 0.2, Vectors.dense(2.0, 0.0, 4.0)),\n    (0.0, -0.5, 0.0, Vectors.dense(0.2, 1.2, 2.0)),\n    (2.0, -0.4, -2.1, Vectors.dense(0.5, 2.2, 1.7))\n  ))).toDF(\"label\", \"margin\", \"weight\", \"features\")\n\n  def smallGroupVector: DataFrame = ss.createDataFrame(sc.parallelize(Seq(\n    (1.0, 0, 0.5, 2.0, Vectors.dense(1.0, 2.0, 3.0)),\n    (0.0, 1, 0.4, 1.0, Vectors.dense(0.0, 0.0, 0.0)),\n    (0.0, 1, 0.3, 1.0, Vectors.dense(0.0, 3.0, 0.0)),\n    (1.0, 0, 1.2, 2.0, Vectors.dense(2.0, 0.0, 4.0)),\n    (1.0, 2, -0.5, 3.0, Vectors.dense(0.2, 1.2, 2.0)),\n    (0.0, 2, -0.4, 3.0, Vectors.dense(0.5, 2.2, 1.7))\n  ))).toDF(\"label\", \"group\", \"margin\", \"weight\", \"features\")\n\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/TmpFolderPerSuite.scala",
    "content": "/*\n Copyright (c) 2014 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark\n\nimport java.nio.file.{Files, Path}\n\nimport org.apache.spark.network.util.JavaUtils\nimport org.scalatest.BeforeAndAfterAll\nimport org.scalatest.funsuite.AnyFunSuite\n\ntrait TmpFolderPerSuite extends BeforeAndAfterAll { self: AnyFunSuite =>\n  protected var tempDir: Path = _\n\n  override def beforeAll(): Unit = {\n    super.beforeAll()\n\n    tempDir = Files.createTempDirectory(getClass.getName)\n  }\n\n  override def afterAll(): Unit = {\n    JavaUtils.deleteRecursively(tempDir.toFile)\n    super.afterAll()\n  }\n\n  protected def createTmpFolder(prefix: String): Path = {\n    Files.createTempDirectory(tempDir, prefix)\n  }\n\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/TrainTestData.scala",
    "content": "/*\n Copyright (c) 2014-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark\n\nimport scala.io.Source\nimport scala.util.Random\n\nimport ml.dmlc.xgboost4j.{LabeledPoint => XGBLabeledPoint}\n\ntrait TrainTestData {\n  protected def getResourceLines(resource: String): Iterator[String] = {\n    require(resource.startsWith(\"/\"), \"resource must start with /\")\n    val is = getClass.getResourceAsStream(resource)\n    if (is == null) {\n      sys.error(s\"failed to resolve resource $resource\")\n    }\n\n    Source.fromInputStream(is).getLines()\n  }\n\n  protected def getLabeledPoints(resource: String, featureSize: Int,\n                                 zeroBased: Boolean): Seq[XGBLabeledPoint] = {\n    getResourceLines(resource).map { line =>\n      val labelAndFeatures = line.split(\" \")\n      val label = labelAndFeatures.head.toFloat\n      val values = new Array[Float](featureSize)\n      for (feature <- labelAndFeatures.tail) {\n        val idAndValue = feature.split(\":\")\n        if (!zeroBased) {\n          values(idAndValue(0).toInt - 1) = idAndValue(1).toFloat\n        } else {\n          values(idAndValue(0).toInt) = idAndValue(1).toFloat\n        }\n      }\n\n      new XGBLabeledPoint(label, featureSize, null, values)\n    }.toList\n  }\n\n  protected def getLabeledPointsWithGroup(resource: String): Seq[XGBLabeledPoint] = {\n    getResourceLines(resource).map { line =>\n      val original = line.split(\",\")\n      val length = original.length\n      val label = original.head.toFloat\n      val group = original.last.toInt\n      val values = original.slice(1, length - 1).map(_.toFloat)\n      new XGBLabeledPoint(label, values.size, null, values, 1f, group, Float.NaN)\n    }.toList\n  }\n}\n\nobject Classification extends TrainTestData {\n  val train: Seq[XGBLabeledPoint] = getLabeledPoints(\"/agaricus.txt.train\", 126, zeroBased = false)\n  val test: Seq[XGBLabeledPoint] = getLabeledPoints(\"/agaricus.txt.test\", 126, zeroBased = false)\n\n  Random.setSeed(10)\n  val randomWeights = Array.fill(train.length)(Random.nextFloat())\n  val trainWithWeight = train.zipWithIndex.map { case (v, index) =>\n    new XGBLabeledPoint(v.label, v.size, v.indices, v.values,\n      randomWeights(index), v.group, v.baseMargin)\n  }\n}\n\nobject MultiClassification extends TrainTestData {\n\n  private def split(): (Seq[XGBLabeledPoint], Seq[XGBLabeledPoint]) = {\n    val tmp: Seq[XGBLabeledPoint] = getLabeledPoints(\"/dermatology.data\")\n    Random.setSeed(100)\n    val randomizedTmp = Random.shuffle(tmp)\n    val splitIndex = (randomizedTmp.length * 0.8).toInt\n    (randomizedTmp.take(splitIndex), randomizedTmp.drop(splitIndex))\n  }\n\n  val (train, test) = split()\n  Random.setSeed(10)\n  val randomWeights = Array.fill(train.length)(Random.nextFloat())\n  val trainWithWeight = train.zipWithIndex.map { case (v, index) =>\n    new XGBLabeledPoint(v.label, v.size, v.indices, v.values,\n      randomWeights(index), v.group, v.baseMargin)\n  }\n\n  private def getLabeledPoints(resource: String): Seq[XGBLabeledPoint] = {\n    getResourceLines(resource).map { line =>\n      val featuresAndLabel = line.split(\",\")\n      val label = featuresAndLabel.last.toFloat - 1\n      val values = new Array[Float](featuresAndLabel.length - 1)\n      values(values.length - 1) =\n        if (featuresAndLabel(featuresAndLabel.length - 2) == \"?\") 1 else 0\n      for (i <- 0 until values.length - 2) {\n        values(i) = featuresAndLabel(i).toFloat\n      }\n\n      new XGBLabeledPoint(label, values.length - 1, null, values.take(values.length - 1))\n    }.toList\n  }\n}\n\nobject Regression extends TrainTestData {\n  val MACHINE_COL_NUM = 36\n  val train: Seq[XGBLabeledPoint] = getLabeledPoints(\n    \"/machine.txt.train\", MACHINE_COL_NUM, zeroBased = true)\n  val test: Seq[XGBLabeledPoint] = getLabeledPoints(\n    \"/machine.txt.test\", MACHINE_COL_NUM, zeroBased = true)\n\n  Random.setSeed(10)\n  val randomWeights = Array.fill(train.length)(Random.nextFloat())\n  val trainWithWeight = train.zipWithIndex.map { case (v, index) =>\n    new XGBLabeledPoint(v.label, v.size, v.indices, v.values,\n      randomWeights(index), v.group, v.baseMargin)\n  }\n\n  object Ranking extends TrainTestData {\n    val RANK_COL_NUM = 3\n    val train: Seq[XGBLabeledPoint] = getLabeledPointsWithGroup(\"/rank.train.csv\")\n    // use the group as the weight\n    val trainWithWeight = train.map { labelPoint =>\n      new XGBLabeledPoint(labelPoint.label, labelPoint.size, labelPoint.indices, labelPoint.values,\n        labelPoint.group, labelPoint.group, labelPoint.baseMargin)\n    }\n    val trainGroups = train.map(_.group)\n    val test: Seq[XGBLabeledPoint] = getLabeledPoints(\n      \"/rank.test.txt\", RANK_COL_NUM, zeroBased = false)\n  }\n\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifierSuite.scala",
    "content": "/*\n Copyright (c) 2014-2025 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark\n\nimport java.io.File\n\nimport org.apache.spark.ml.linalg.DenseVector\nimport org.apache.spark.ml.param.ParamMap\nimport org.apache.spark.sql.DataFrame\nimport org.scalatest.funsuite.AnyFunSuite\n\nimport ml.dmlc.xgboost4j.scala.{DMatrix, XGBoost => ScalaXGBoost}\nimport ml.dmlc.xgboost4j.scala.spark.params.LearningTaskParams.{BINARY_CLASSIFICATION_OBJS, MULTICLASSIFICATION_OBJS}\nimport ml.dmlc.xgboost4j.scala.spark.params.XGBoostParams\n\nclass XGBoostClassifierSuite extends AnyFunSuite with PerTest with TmpFolderPerSuite {\n  test(\"XGBoostClassifier extmem\") {\n    val df = smallMultiClassificationVector\n    var classifier = new XGBoostClassifier()\n    classifier.setNumClass(3).setUseExternalMemory(true)\n    intercept[IllegalArgumentException](\n      classifier.validate(df)\n    )\n  }\n\n  test(\"XGBoostClassifier copy\") {\n    val classifier = new XGBoostClassifier().setNthread(2).setNumWorkers(10)\n    val classifierCopied = classifier.copy(ParamMap.empty)\n\n    assert(classifier.uid === classifierCopied.uid)\n    assert(classifier.getNthread === classifierCopied.getNthread)\n    assert(classifier.getNumWorkers === classifier.getNumWorkers)\n  }\n\n  test(\"XGBoostClassification copy\") {\n    val model = new XGBoostClassificationModel(\"hello\").setNthread(2).setNumWorkers(10)\n    val modelCopied = model.copy(ParamMap.empty)\n    assert(model.uid === modelCopied.uid)\n    assert(model.getNthread === modelCopied.getNthread)\n    assert(model.getNumWorkers === modelCopied.getNumWorkers)\n  }\n\n  test(\"read/write\") {\n    val trainDf = smallBinaryClassificationVector\n    val xgbParams: Map[String, Any] = Map(\n      \"max_depth\" -> 5,\n      \"eta\" -> 0.2,\n      \"objective\" -> \"binary:logistic\"\n    )\n\n    def check(xgboostParams: XGBoostParams[_]): Unit = {\n      assert(xgboostParams.getMaxDepth === 5)\n      assert(xgboostParams.getEta === 0.2)\n      assert(xgboostParams.getObjective === \"binary:logistic\")\n    }\n\n    val classifierPath = new File(tempDir.toFile, \"classifier\").getPath\n    val classifier = new XGBoostClassifier(xgbParams).setNumRound(2)\n    check(classifier)\n\n    classifier.write.overwrite().save(classifierPath)\n    val loadedClassifier = XGBoostClassifier.load(classifierPath)\n    check(loadedClassifier)\n\n    val model = loadedClassifier.fit(trainDf)\n    check(model)\n    assert(model.numClasses === 2)\n\n    val modelPath = new File(tempDir.toFile, \"model\").getPath\n    model.write.overwrite().save(modelPath)\n    val modelLoaded = XGBoostClassificationModel.load(modelPath)\n    assert(modelLoaded.numClasses === 2)\n    check(modelLoaded)\n  }\n\n  test(\"XGBoostClassificationModel transformed schema\") {\n    val trainDf = smallBinaryClassificationVector\n    val classifier = new XGBoostClassifier().setNumRound(1)\n    val model = classifier.fit(trainDf)\n    var out = model.transform(trainDf)\n\n    // Transform should not discard the other columns of the transforming dataframe\n    Seq(\"label\", \"margin\", \"weight\", \"features\").foreach { v =>\n      assert(out.schema.names.contains(v))\n    }\n\n    // Transform needs to add extra columns\n    Seq(\"rawPrediction\", \"probability\", \"prediction\").foreach { v =>\n      assert(out.schema.names.contains(v))\n    }\n\n    assert(out.schema.names.length === 7)\n\n    model.setRawPredictionCol(\"\").setProbabilityCol(\"\")\n    out = model.transform(trainDf)\n\n    // rawPrediction=\"\", probability=\"\"\n    Seq(\"rawPrediction\", \"probability\").foreach { v =>\n      assert(!out.schema.names.contains(v))\n    }\n\n    assert(out.schema.names.contains(\"prediction\"))\n\n    model.setLeafPredictionCol(\"leaf\").setContribPredictionCol(\"contrib\")\n    out = model.transform(trainDf)\n\n    assert(out.schema.names.contains(\"leaf\"))\n    assert(out.schema.names.contains(\"contrib\"))\n\n    val out1 = classifier.setLeafPredictionCol(\"leaf1\")\n      .setContribPredictionCol(\"contrib1\")\n      .fit(trainDf).transform(trainDf)\n\n    assert(out1.schema.names.contains(\"leaf1\"))\n    assert(out1.schema.names.contains(\"contrib1\"))\n  }\n\n  test(\"Supported objectives\") {\n    val classifier = new XGBoostClassifier()\n    val df = smallMultiClassificationVector\n    (BINARY_CLASSIFICATION_OBJS.toSeq ++ MULTICLASSIFICATION_OBJS.toSeq).foreach { obj =>\n      classifier.setObjective(obj)\n      classifier.validate(df)\n    }\n\n    classifier.setObjective(\"reg:squaredlogerror\")\n    intercept[IllegalArgumentException](\n      classifier.validate(df)\n    )\n  }\n\n  test(\"BinaryClassification infer objective and num_class\") {\n    val trainDf = smallBinaryClassificationVector\n    var classifier = new XGBoostClassifier()\n    assert(classifier.getObjective === \"reg:squarederror\")\n    assert(classifier.getNumClass === 0)\n    classifier.validate(trainDf)\n    assert(classifier.getObjective === \"binary:logistic\")\n    assert(!classifier.isSet(classifier.numClass))\n\n    // Infer objective according num class\n    classifier = new XGBoostClassifier()\n    classifier.setNumClass(2)\n    intercept[IllegalArgumentException](\n      classifier.validate(trainDf)\n    )\n\n    // Infer to num class according to num class\n    classifier = new XGBoostClassifier()\n    classifier.setObjective(\"binary:logistic\")\n    classifier.validate(trainDf)\n    assert(classifier.getObjective === \"binary:logistic\")\n    assert(!classifier.isSet(classifier.numClass))\n  }\n\n  test(\"MultiClassification infer objective and num_class\") {\n    val trainDf = smallMultiClassificationVector\n    var classifier = new XGBoostClassifier()\n    assert(classifier.getObjective === \"reg:squarederror\")\n    assert(classifier.getNumClass === 0)\n    classifier.validate(trainDf)\n    assert(classifier.getObjective === \"multi:softprob\")\n    assert(classifier.getNumClass === 3)\n\n    // Infer to objective according to num class\n    classifier = new XGBoostClassifier()\n    classifier.setNumClass(3)\n    classifier.validate(trainDf)\n    assert(classifier.getObjective === \"multi:softprob\")\n    assert(classifier.getNumClass === 3)\n\n    // Infer to num class according to objective\n    classifier = new XGBoostClassifier()\n    classifier.setObjective(\"multi:softmax\")\n    classifier.validate(trainDf)\n    assert(classifier.getObjective === \"multi:softmax\")\n    assert(classifier.getNumClass === 3)\n  }\n\n  test(\"XGBoost-Spark binary classification output should match XGBoost4j\") {\n    val trainingDM = new DMatrix(Classification.train.iterator)\n    val testDM = new DMatrix(Classification.test.iterator)\n    val trainingDF = buildDataFrame(Classification.train)\n    val testDF = buildDataFrame(Classification.test)\n    val paramMap = Map(\"objective\" -> \"binary:logistic\")\n    checkResultsWithXGBoost4j(trainingDM, testDM, trainingDF, testDF, 5, paramMap)\n  }\n\n  test(\"XGBoost-Spark binary classification output with weight should match XGBoost4j\") {\n    val trainingDM = new DMatrix(Classification.trainWithWeight.iterator)\n    trainingDM.setWeight(Classification.randomWeights)\n    val testDM = new DMatrix(Classification.test.iterator)\n    val trainingDF = buildDataFrame(Classification.trainWithWeight)\n    val testDF = buildDataFrame(Classification.test)\n    val paramMap = Map(\"objective\" -> \"binary:logistic\")\n    checkResultsWithXGBoost4j(trainingDM, testDM, trainingDF, testDF,\n      5, paramMap, Some(\"weight\"))\n  }\n\n  Seq(\"multi:softprob\", \"multi:softmax\").foreach { objective =>\n    test(s\"XGBoost-Spark multi classification with $objective output should match XGBoost4j\") {\n      val trainingDM = new DMatrix(MultiClassification.train.iterator)\n      val testDM = new DMatrix(MultiClassification.test.iterator)\n      val trainingDF = buildDataFrame(MultiClassification.train)\n      val testDF = buildDataFrame(MultiClassification.test)\n      val paramMap = Map(\"objective\" -> \"multi:softprob\", \"num_class\" -> 6)\n      checkResultsWithXGBoost4j(trainingDM, testDM, trainingDF, testDF, 5, paramMap)\n    }\n  }\n\n  test(\"XGBoost-Spark multi classification output with weight should match XGBoost4j\") {\n    val trainingDM = new DMatrix(MultiClassification.trainWithWeight.iterator)\n    trainingDM.setWeight(MultiClassification.randomWeights)\n    val testDM = new DMatrix(MultiClassification.test.iterator)\n    val trainingDF = buildDataFrame(MultiClassification.trainWithWeight)\n    val testDF = buildDataFrame(MultiClassification.test)\n    val paramMap = Map(\"objective\" -> \"multi:softprob\", \"num_class\" -> 6)\n    checkResultsWithXGBoost4j(trainingDM, testDM, trainingDF, testDF, 5, paramMap, Some(\"weight\"))\n  }\n\n  private def checkResultsWithXGBoost4j(\n      trainingDM: DMatrix,\n      testDM: DMatrix,\n      trainingDF: DataFrame,\n      testDF: DataFrame,\n      round: Int = 5,\n      xgbParams: Map[String, Any] = Map.empty,\n      weightCol: Option[String] = None): Unit = {\n    val paramMap = Map(\n      \"eta\" -> \"1\",\n      \"max_depth\" -> \"6\",\n      \"base_score\" -> 0.5,\n      \"max_bin\" -> 16) ++ xgbParams\n    val xgb4jModel = ScalaXGBoost.train(trainingDM, paramMap, round)\n\n    val classifier = new XGBoostClassifier(paramMap)\n      .setNumRound(round)\n      .setNumWorkers(numWorkers)\n      .setLeafPredictionCol(\"leaf\")\n      .setContribPredictionCol(\"contrib\")\n    weightCol.foreach(weight => classifier.setWeightCol(weight))\n\n    def checkEqual(left: Array[Array[Float]], right: Map[Int, Array[Float]]) = {\n      assert(left.size === right.size)\n      left.zipWithIndex.foreach { case (leftValue, index) =>\n        assert(leftValue.sameElements(right(index)))\n      }\n    }\n\n    val xgbSparkModel = classifier.fit(trainingDF)\n    val rows = xgbSparkModel.transform(testDF).collect()\n\n    // Check Leaf\n    val xgb4jLeaf = xgb4jModel.predictLeaf(testDM)\n    val xgbSparkLeaf = rows.map(row =>\n      (row.getAs[Int](\"id\"), row.getAs[DenseVector](\"leaf\").toArray.map(_.toFloat))).toMap\n    checkEqual(xgb4jLeaf, xgbSparkLeaf)\n\n    // Check contrib\n    val xgb4jContrib = xgb4jModel.predictContrib(testDM)\n    val xgbSparkContrib = rows.map(row =>\n      (row.getAs[Int](\"id\"), row.getAs[DenseVector](\"contrib\").toArray.map(_.toFloat))).toMap\n    checkEqual(xgb4jContrib, xgbSparkContrib)\n\n    def checkEqualForBinary(left: Array[Array[Float]], right: Map[Int, Array[Float]]) = {\n      assert(left.size === right.size)\n      left.zipWithIndex.foreach { case (leftValue, index) =>\n        assert(leftValue.length === 1)\n        assert(leftValue.length === right(index).length - 1)\n        assert(leftValue(0) === right(index)(1))\n      }\n    }\n\n    // Check probability\n    val xgb4jProb = xgb4jModel.predict(testDM)\n    val xgbSparkProb = rows.map(row =>\n      (row.getAs[Int](\"id\"), row.getAs[DenseVector](\"probability\").toArray.map(_.toFloat))).toMap\n    if (BINARY_CLASSIFICATION_OBJS.contains(classifier.getObjective)) {\n      checkEqualForBinary(xgb4jProb, xgbSparkProb)\n    } else {\n      checkEqual(xgb4jProb, xgbSparkProb)\n    }\n\n    // Check rawPrediction\n    val xgb4jRawPred = xgb4jModel.predict(testDM, outPutMargin = true)\n    val xgbSparkRawPred = rows.map(row =>\n      (row.getAs[Int](\"id\"), row.getAs[DenseVector](\"rawPrediction\").toArray.map(_.toFloat))).toMap\n    if (BINARY_CLASSIFICATION_OBJS.contains(classifier.getObjective)) {\n      checkEqualForBinary(xgb4jRawPred, xgbSparkRawPred)\n    } else {\n      checkEqual(xgb4jRawPred, xgbSparkRawPred)\n    }\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostEstimatorSuite.scala",
    "content": "/*\n Copyright (c) 2024-2026 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark\n\nimport java.io.File\nimport java.util.Arrays\n\nimport scala.collection.mutable.ArrayBuffer\n\nimport org.apache.spark.SparkException\nimport org.apache.spark.ml.Pipeline\nimport org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vectors}\nimport org.apache.spark.ml.param.ParamMap\nimport org.apache.spark.ml.xgboost.SparkUtils\nimport org.apache.spark.sql.functions.col\nimport org.apache.spark.sql.types.{ArrayType, DoubleType, FloatType}\nimport org.json4s.{DefaultFormats, Formats}\nimport org.json4s.jackson.parseJson\nimport org.scalatest.funsuite.AnyFunSuite\n\nimport ml.dmlc.xgboost4j.scala.DMatrix\nimport ml.dmlc.xgboost4j.scala.spark.Utils.TRAIN_NAME\n\nclass XGBoostEstimatorSuite extends AnyFunSuite with PerTest with TmpFolderPerSuite {\n\n  test(\"params\") {\n    val df = smallBinaryClassificationVector\n    val xgbParams: Map[String, Any] = Map(\n      \"max_depth\" -> 5,\n      \"eta\" -> 0.2,\n      \"objective\" -> \"binary:logistic\"\n    )\n    val estimator = new XGBoostClassifier(xgbParams)\n      .setFeaturesCol(\"features\")\n      .setMissing(0.2f)\n      .setAlpha(0.97)\n      .setLeafPredictionCol(\"leaf\")\n      .setContribPredictionCol(\"contrib\")\n      .setNumRound(1)\n\n    assert(estimator.getMaxDepth === 5)\n    assert(estimator.getEta === 0.2)\n    assert(estimator.getObjective === \"binary:logistic\")\n    assert(estimator.getFeaturesCol === \"features\")\n    assert(estimator.getMissing === 0.2f)\n    assert(estimator.getAlpha === 0.97)\n\n    estimator.setEta(0.66).setMaxDepth(7)\n    assert(estimator.getMaxDepth === 7)\n    assert(estimator.getEta === 0.66)\n\n    val model = estimator.fit(df)\n    assert(model.getMaxDepth === 7)\n    assert(model.getEta === 0.66)\n    assert(model.getObjective === \"binary:logistic\")\n    assert(model.getFeaturesCol === \"features\")\n    assert(model.getMissing === 0.2f)\n    assert(model.getAlpha === 0.97)\n    assert(model.getLeafPredictionCol === \"leaf\")\n    assert(model.getContribPredictionCol === \"contrib\")\n  }\n\n  test(\"camel case parameters\") {\n    val xgbParams: Map[String, Any] = Map(\n      \"max_depth\" -> 5,\n      \"featuresCol\" -> \"abc\",\n      \"num_workers\" -> 2,\n      \"numRound\" -> 11\n    )\n    val estimator = new XGBoostClassifier(xgbParams)\n    assert(estimator.getFeaturesCol === \"abc\")\n    assert(estimator.getNumWorkers === 2)\n    assert(estimator.getNumRound === 11)\n    assert(estimator.getMaxDepth === 5)\n\n    val xgbParams1: Map[String, Any] = Map(\n      \"maxDepth\" -> 5,\n      \"features_col\" -> \"abc\",\n      \"numWorkers\" -> 2,\n      \"num_round\" -> 11\n    )\n    val estimator1 = new XGBoostClassifier(xgbParams1)\n    assert(estimator1.getFeaturesCol === \"abc\")\n    assert(estimator1.getNumWorkers === 2)\n    assert(estimator1.getNumRound === 11)\n    assert(estimator1.getMaxDepth === 5)\n  }\n\n  test(\"get xgboost parameters\") {\n    val params: Map[String, Any] = Map(\n      \"max_depth\" -> 5,\n      \"featuresCol\" -> \"abc\",\n      \"label\" -> \"class\",\n      \"num_workers\" -> 2,\n      \"tree_method\" -> \"hist\",\n      \"numRound\" -> 11,\n      \"not_exist_parameters\" -> \"hello\"\n    )\n    val estimator = new XGBoostClassifier(params)\n    val xgbParams = estimator.getXGBoostParams\n    assert(xgbParams.size === 2)\n    assert(xgbParams.contains(\"max_depth\") && xgbParams.contains(\"tree_method\"))\n  }\n\n  test(\"nthread\") {\n    val classifier = new XGBoostClassifier().setNthread(100)\n\n    intercept[IllegalArgumentException](\n      classifier.validate(smallBinaryClassificationVector)\n    )\n  }\n\n  test(\"RuntimeParameter\") {\n    var runtimeParams = new XGBoostClassifier(\n      Map(\"device\" -> \"cpu\"))\n      .getRuntimeParameters(true)\n    assert(!runtimeParams.runOnGpu)\n\n    runtimeParams = new XGBoostClassifier(\n      Map(\"device\" -> \"cuda\")).setNumWorkers(1).setNumRound(1)\n      .getRuntimeParameters(true)\n    assert(runtimeParams.runOnGpu)\n  }\n\n  test(\"missing value exception for sparse vector\") {\n    val sparse1 = Vectors.dense(0.0, 0.0, 0.0).toSparse\n    assert(sparse1.isInstanceOf[SparseVector])\n    val sparse2 = Vectors.dense(0.5, 2.2, 1.7).toSparse\n    assert(sparse2.isInstanceOf[SparseVector])\n\n    val sparseInput = ss.createDataFrame(sc.parallelize(Seq(\n      (1.0, sparse1),\n      (2.0, sparse2)\n    ))).toDF(\"label\", \"features\")\n\n    val classifier = new XGBoostClassifier()\n    val (input, columnIndexes) = classifier.preprocess(sparseInput)\n    val rdd = classifier.toXGBLabeledPoint(input, columnIndexes)\n\n    val exception = intercept[SparkException] {\n      rdd.collect()\n    }\n    assert(exception.getMessage.contains(\"We've detected sparse vectors in the dataset \" +\n      \"that need conversion to dense format\"))\n\n    // explicitly set missing value, no exception\n    classifier.setMissing(Float.NaN)\n    val rdd1 = classifier.toXGBLabeledPoint(input, columnIndexes)\n    rdd1.collect()\n  }\n\n  test(\"missing value for dense vector no need to set missing explicitly\") {\n    val dense1 = Vectors.dense(0.0, 0.0, 0.0)\n    assert(dense1.isInstanceOf[DenseVector])\n    val dense2 = Vectors.dense(0.5, 2.2, 1.7)\n    assert(dense2.isInstanceOf[DenseVector])\n\n    val sparseInput = ss.createDataFrame(sc.parallelize(Seq(\n      (1.0, dense1),\n      (2.0, dense2)\n    ))).toDF(\"label\", \"features\")\n\n    val classifier = new XGBoostClassifier()\n    val (input, columnIndexes) = classifier.preprocess(sparseInput)\n    val rdd = classifier.toXGBLabeledPoint(input, columnIndexes)\n    rdd.collect()\n  }\n\n  test(\"test persistence of XGBoostClassifier and XGBoostClassificationModel \" +\n    \"using custom Eval and Obj\") {\n    val trainingDF = buildDataFrame(Classification.train)\n    val testDM = new DMatrix(Classification.test.iterator)\n\n    val paramMap = Map(\"eta\" -> \"0.1\", \"max_depth\" -> \"6\",\n      \"verbosity\" -> \"1\", \"objective\" -> \"binary:logistic\")\n\n    val xgbc = new XGBoostClassifier(paramMap)\n      .setCustomObj(new CustomObj(1))\n      .setCustomEval(new EvalError)\n      .setNumRound(10)\n      .setNumWorkers(numWorkers)\n\n    val xgbcPath = new File(tempDir.toFile, \"xgbc\").getPath\n    xgbc.write.overwrite().save(xgbcPath)\n    val xgbc2 = XGBoostClassifier.load(xgbcPath)\n\n    assert(xgbc.getCustomObj.asInstanceOf[CustomObj].customParameter === 1)\n    assert(xgbc2.getCustomObj.asInstanceOf[CustomObj].customParameter === 1)\n\n    val eval = new EvalError()\n\n    val model = xgbc.fit(trainingDF)\n    val evalResults = eval.eval(model.nativeBooster.predict(testDM, outPutMargin = true), testDM)\n    assert(evalResults < 0.1)\n    val xgbcModelPath = new File(tempDir.toFile, \"xgbcModel\").getPath\n    model.write.overwrite.save(xgbcModelPath)\n    val model2 = XGBoostClassificationModel.load(xgbcModelPath)\n    assert(Arrays.equals(model.nativeBooster.toByteArray, model2.nativeBooster.toByteArray))\n\n    assert(model.getEta === model2.getEta)\n    assert(model.getNumRound === model2.getNumRound)\n    assert(model.getRawPredictionCol === model2.getRawPredictionCol)\n    val evalResults2 = eval.eval(model2.nativeBooster.predict(testDM, outPutMargin = true), testDM)\n    assert(evalResults === evalResults2)\n  }\n\n  test(\"Check for Spark encryption over-the-wire\") {\n    val originalSslConfOpt = ss.conf.getOption(\"spark.ssl.enabled\")\n    ss.conf.set(\"spark.ssl.enabled\", true)\n\n    val paramMap = Map(\"eta\" -> \"1\", \"max_depth\" -> \"2\", \"verbosity\" -> \"1\",\n      \"objective\" -> \"binary:logistic\")\n    val training = smallBinaryClassificationVector\n\n    withClue(\"xgboost-spark should throw an exception when spark.ssl.enabled = true but \" +\n      \"xgboost.spark.ignoreSsl != true\") {\n      val thrown = intercept[Exception] {\n        new XGBoostClassifier(paramMap).setNumRound(2).setNumWorkers(numWorkers).fit(training)\n      }\n      assert(thrown.getMessage.contains(\"xgboost.spark.ignoreSsl\") &&\n        thrown.getMessage.contains(\"spark.ssl.enabled\"))\n    }\n\n    // Confirm that this check can be overridden.\n    ss.conf.set(\"xgboost.spark.ignoreSsl\", true)\n    new XGBoostClassifier(paramMap).setNumRound(2).setNumWorkers(numWorkers).fit(training)\n\n    originalSslConfOpt match {\n      case None =>\n        ss.conf.unset(\"spark.ssl.enabled\")\n      case Some(originalSslConf) =>\n        ss.conf.set(\"spark.ssl.enabled\", originalSslConf)\n    }\n    ss.conf.unset(\"xgboost.spark.ignoreSsl\")\n  }\n\n  test(\"nthread configuration must be no larger than spark.task.cpus\") {\n    val training = smallBinaryClassificationVector\n    val paramMap = Map(\"eta\" -> \"1\", \"max_depth\" -> \"2\", \"verbosity\" -> \"1\",\n      \"objective\" -> \"binary:logistic\")\n    intercept[IllegalArgumentException] {\n      new XGBoostClassifier(paramMap)\n        .setNumWorkers(numWorkers)\n        .setNumRound(2)\n        .setNthread(sc.getConf.getInt(\"spark.task.cpus\", 1) + 1)\n        .fit(training)\n    }\n  }\n\n  test(\"preprocess dataset\") {\n    val dataset = ss.createDataFrame(sc.parallelize(Seq(\n      (1.0, 0, 0.5, 1.0, Vectors.dense(1.0, 2.0, 3.0), \"a\"),\n      (0.0, 2, -0.5, 0.0, Vectors.dense(0.2, 1.2, 2.0), \"b\"),\n      (2.0, 2, -0.4, -2.1, Vectors.dense(0.5, 2.2, 1.7), \"c\")\n    ))).toDF(\"label\", \"group\", \"margin\", \"weight\", \"features\", \"other\")\n\n    val classifier = new XGBoostClassifier()\n      .setLabelCol(\"label\")\n      .setFeaturesCol(\"features\")\n      .setBaseMarginCol(\"margin\")\n      .setWeightCol(\"weight\")\n\n    val (df, indices) = classifier.preprocess(dataset)\n    var schema = df.schema\n    assert(!schema.names.contains(\"group\") && !schema.names.contains(\"other\"))\n    assert(indices.labelId == schema.fieldIndex(\"label\") &&\n      indices.groupId.isEmpty &&\n      indices.marginId.get == schema.fieldIndex(\"margin\") &&\n      indices.weightId.get == schema.fieldIndex(\"weight\") &&\n      indices.featureId.get == schema.fieldIndex(\"features\") &&\n      indices.featureIds.isEmpty)\n\n    classifier.setWeightCol(\"\")\n    val (df1, indices1) = classifier.preprocess(dataset)\n    schema = df1.schema\n    Seq(\"weight\", \"group\", \"other\").foreach(v => assert(!schema.names.contains(v)))\n    assert(indices1.labelId == schema.fieldIndex(\"label\") &&\n      indices1.groupId.isEmpty &&\n      indices1.marginId.get == schema.fieldIndex(\"margin\") &&\n      indices1.weightId.isEmpty &&\n      indices1.featureId.get == schema.fieldIndex(\"features\") &&\n      indices1.featureIds.isEmpty)\n  }\n\n  test(\"to XGBoostLabeledPoint RDD\") {\n    val data = Array(\n      Array(1.0, 2.0, 3.0, 4.0, 5.0),\n      Array(0.0, 0.0, 0.0, 0.0, 2.0),\n      Array(12.0, 13.0, 14.0, 14.0, 15.0),\n      Array(20.5, 21.2, 0.0, 0.0, 2.0)\n    )\n    val dataset = ss.createDataFrame(sc.parallelize(Seq(\n      (1.0, 0, 0.5, 1.0, Vectors.dense(data(0)), \"a\"),\n      (2.0, 2, -0.5, 0.0, Vectors.dense(data(1)).toSparse, \"b\"),\n      (3.0, 2, -0.5, 0.0, Vectors.dense(data(2)), \"b\"),\n      (4.0, 2, -0.4, -2.1, Vectors.dense(data(3)), \"c\")\n    ))).toDF(\"label\", \"group\", \"margin\", \"weight\", \"features\", \"other\")\n\n    val classifier = new XGBoostClassifier()\n      .setLabelCol(\"label\")\n      .setFeaturesCol(\"features\")\n      .setWeightCol(\"weight\")\n      .setNumWorkers(2)\n      .setMissing(Float.NaN)\n\n    val (df, indices) = classifier.preprocess(dataset)\n    val rdd = classifier.toXGBLabeledPoint(df, indices)\n    val result = rdd.collect().sortBy(x => x.label)\n\n    assert(result.length == data.length)\n\n    def toArray(index: Int): Array[Float] = {\n      val labelPoint = result(index)\n      if (labelPoint.indices != null) {\n        Vectors.sparse(labelPoint.size,\n          labelPoint.indices,\n          labelPoint.values.map(_.toDouble)).toArray.map(_.toFloat)\n      } else {\n        labelPoint.values\n      }\n    }\n\n    assert(result(0).label === 1.0f && result(0).baseMargin.isNaN &&\n      result(0).weight === 1.0f && toArray(0) === data(0).map(_.toFloat))\n    assert(result(1).label == 2.0f && result(1).baseMargin.isNaN &&\n      result(1).weight === 0.0f && toArray(1) === data(1).map(_.toFloat))\n    assert(result(2).label === 3.0f && result(2).baseMargin.isNaN &&\n      result(2).weight == 0.0f && toArray(2) === data(2).map(_.toFloat))\n    assert(result(3).label === 4.0f && result(3).baseMargin.isNaN &&\n      result(3).weight === -2.1f && toArray(3) === data(3).map(_.toFloat))\n  }\n\n  Seq((Float.NaN, 2), (0.0f, 7 + 2), (15.0f, 1 + 2), (10101011.0f, 0 + 2)).foreach {\n    case (missing, expectedMissingValue) =>\n      test(s\"to RDD watches with missing $missing\") {\n        val data = Array(\n          Array(1.0, 2.0, 3.0, 4.0, 5.0),\n          Array(1.0, Float.NaN, 0.0, 0.0, 2.0),\n          Array(12.0, 13.0, Float.NaN, 14.0, 15.0),\n          Array(0.0, 0.0, 0.0, 0.0, 0.0)\n        )\n        val dataset = ss.createDataFrame(sc.parallelize(Seq(\n          (1.0, 0, 0.5, 1.0, Vectors.dense(data(0)), \"a\"),\n          (2.0, 2, -0.5, 0.0, Vectors.dense(data(1)).toSparse, \"b\"),\n          (3.0, 3, -0.5, 0.0, Vectors.dense(data(2)), \"b\"),\n          (4.0, 4, -0.4, -2.1, Vectors.dense(data(3)), \"c\")\n        ))).toDF(\"label\", \"group\", \"margin\", \"weight\", \"features\", \"other\")\n\n        val classifier = new XGBoostClassifier()\n          .setLabelCol(\"label\")\n          .setFeaturesCol(\"features\")\n          .setWeightCol(\"weight\")\n          .setBaseMarginCol(\"margin\")\n          .setMissing(missing)\n          .setNumWorkers(2)\n\n        val (df, indices) = classifier.preprocess(dataset)\n        val rdd = classifier.toRdd(df, indices)\n        val result = rdd.mapPartitions { iter =>\n          if (iter.hasNext) {\n            val watches = iter.next()\n            val size = watches.size\n            val trainDM = watches.toMap(TRAIN_NAME)\n            val rowNum = trainDM.rowNum\n            val labels = trainDM.getLabel\n            val weight = trainDM.getWeight\n            val margins = trainDM.getBaseMargin\n            val nonMissing = trainDM.nonMissingNum\n            watches.delete()\n            Iterator.single((size, rowNum, labels, weight, margins, nonMissing))\n          } else {\n            Iterator.empty\n          }\n        }.collect()\n\n        val labels: ArrayBuffer[Float] = ArrayBuffer.empty\n        val weight: ArrayBuffer[Float] = ArrayBuffer.empty\n        val margins: ArrayBuffer[Float] = ArrayBuffer.empty\n        var nonMissingValues = 0L\n        var totalRows = 0L\n\n        for (row <- result) {\n          assert(row._1 === 1)\n          totalRows = totalRows + row._2\n          labels.append(row._3: _*)\n          weight.append(row._4: _*)\n          margins.append(row._5: _*)\n          nonMissingValues = nonMissingValues + row._6\n        }\n        assert(totalRows === 4)\n        assert(nonMissingValues === data.size * data(0).length - expectedMissingValue)\n        assert(labels.toArray.sorted === Array(1.0f, 2.0f, 3.0f, 4.0f).sorted)\n        assert(weight.toArray.sorted === Array(0.0f, 0.0f, 1.0f, -2.1f).sorted)\n        assert(margins.toArray.sorted === Array(-0.5f, -0.5f, -0.4f, 0.5f).sorted)\n      }\n  }\n\n  test(\"to RDD watches with eval\") {\n    val trainData = Array(\n      Array(-1.0, -2.0, -3.0, -4.0, -5.0),\n      Array(2.0, 2.0, 2.0, 3.0, -2.0),\n      Array(-12.0, -13.0, -14.0, -14.0, -15.0),\n      Array(-20.5, -21.2, 0.0, 0.0, 2.0)\n    )\n    val trainDataset = ss.createDataFrame(sc.parallelize(Seq(\n      (11.0, 0, 0.15, 11.0, Vectors.dense(trainData(0)), \"a\"),\n      (12.0, 12, -0.15, 10.0, Vectors.dense(trainData(1)).toSparse, \"b\"),\n      (13.0, 12, -0.15, 10.0, Vectors.dense(trainData(2)), \"b\"),\n      (14.0, 12, -0.14, -12.1, Vectors.dense(trainData(3)), \"c\")\n    ))).toDF(\"label\", \"group\", \"margin\", \"weight\", \"features\", \"other\")\n    val evalData = Array(\n      Array(1.0, 2.0, 3.0, 4.0, 5.0),\n      Array(0.0, 0.0, 0.0, 0.0, 2.0),\n      Array(12.0, 13.0, 14.0, 14.0, 15.0),\n      Array(20.5, 21.2, 0.0, 0.0, 2.0)\n    )\n    val evalDataset = ss.createDataFrame(sc.parallelize(Seq(\n      (1.0, 0, 0.5, 1.0, Vectors.dense(evalData(0)), \"a\"),\n      (2.0, 2, -0.5, 0.0, Vectors.dense(evalData(1)).toSparse, \"b\"),\n      (3.0, 2, -0.5, 0.0, Vectors.dense(evalData(2)), \"b\"),\n      (4.0, 2, -0.4, -2.1, Vectors.dense(evalData(3)), \"c\")\n    ))).toDF(\"label\", \"group\", \"margin\", \"weight\", \"features\", \"other\")\n\n    val classifier = new XGBoostClassifier()\n      .setLabelCol(\"label\")\n      .setFeaturesCol(\"features\")\n      .setWeightCol(\"weight\")\n      .setBaseMarginCol(\"margin\")\n      .setEvalDataset(evalDataset)\n      .setNumWorkers(2)\n      .setMissing(Float.NaN)\n\n    val (df, indices) = classifier.preprocess(trainDataset)\n    val rdd = classifier.toRdd(df, indices)\n    val result = rdd.mapPartitions { iter =>\n      if (iter.hasNext) {\n        val watches = iter.next()\n        val size = watches.size\n        val evalDM = watches.toMap(Utils.VALIDATION_NAME)\n        val rowNum = evalDM.rowNum\n        val labels = evalDM.getLabel\n        val weight = evalDM.getWeight\n        val margins = evalDM.getBaseMargin\n        watches.delete()\n        Iterator.single((size, rowNum, labels, weight, margins))\n      } else {\n        Iterator.empty\n      }\n    }.collect()\n\n    val labels: ArrayBuffer[Float] = ArrayBuffer.empty\n    val weight: ArrayBuffer[Float] = ArrayBuffer.empty\n    val margins: ArrayBuffer[Float] = ArrayBuffer.empty\n\n    var totalRows = 0L\n    for (row <- result) {\n      assert(row._1 === 2)\n      totalRows = totalRows + row._2\n      labels.append(row._3: _*)\n      weight.append(row._4: _*)\n      margins.append(row._5: _*)\n    }\n    assert(totalRows === 4)\n    assert(labels.toArray.sorted === Array(1.0f, 2.0f, 3.0f, 4.0f).sorted)\n    assert(weight.toArray.sorted === Array(0.0f, 0.0f, 1.0f, -2.1f).sorted)\n    assert(margins.toArray.sorted === Array(-0.5f, -0.5f, -0.4f, 0.5f).sorted)\n  }\n\n  test(\"XGBoost-Spark model format should match xgboost4j\") {\n    val trainingDF = buildDataFrame(MultiClassification.train)\n\n    Seq(new XGBoostClassifier()).foreach { est =>\n      est.setNumRound(5)\n      val model = est.fit(trainingDF)\n\n      // test json\n      val modelPath = new File(tempDir.toFile, \"xgbc\").getPath\n      model.write.overwrite().option(\"format\", \"json\").save(modelPath)\n      val nativeJsonModelPath = new File(tempDir.toFile, \"nativeModel.json\").getPath\n      model.nativeBooster.saveModel(nativeJsonModelPath)\n      assert(compareTwoFiles(new File(modelPath, \"data/model\").getPath,\n        nativeJsonModelPath))\n\n      // test ubj\n      val modelUbjPath = new File(tempDir.toFile, \"xgbcUbj\").getPath\n      model.write.overwrite().save(modelUbjPath)\n      val nativeUbjModelPath = new File(tempDir.toFile, \"nativeModel.ubj\").getPath\n      model.nativeBooster.saveModel(nativeUbjModelPath)\n      assert(compareTwoFiles(new File(modelUbjPath, \"data/model\").getPath,\n        nativeUbjModelPath))\n\n      // json file should be indifferent with ubj file\n      val modelJsonPath = new File(tempDir.toFile, \"xgbcJson\").getPath\n      model.write.overwrite().option(\"format\", \"json\").save(modelJsonPath)\n      val nativeUbjModelPath1 = new File(tempDir.toFile, \"nativeModel1.ubj\").getPath\n      model.nativeBooster.saveModel(nativeUbjModelPath1)\n      assert(!compareTwoFiles(new File(modelJsonPath, \"data/model\").getPath,\n        nativeUbjModelPath1))\n    }\n  }\n\n  test(\"native json model file should store feature_name and feature_type\") {\n    val featureNames = (1 to 33).map(idx => s\"feature_${idx}\").toArray\n    val featureTypes = (1 to 33).map(idx => \"q\").toArray\n    val trainingDF = buildDataFrame(MultiClassification.train)\n    val xgb = new XGBoostClassifier()\n      .setNumWorkers(numWorkers)\n      .setFeatureNames(featureNames)\n      .setFeatureTypes(featureTypes)\n      .setNumRound(2)\n    val model = xgb.fit(trainingDF)\n    val modelStr = new String(model.nativeBooster.toByteArray(\"json\"))\n    val jsonModel = parseJson(modelStr)\n    implicit val formats: Formats = DefaultFormats\n    val featureNamesInModel = (jsonModel \\ \"learner\" \\ \"feature_names\").extract[List[String]]\n    val featureTypesInModel = (jsonModel \\ \"learner\" \\ \"feature_types\").extract[List[String]]\n    assert(featureNamesInModel.length == 33)\n    assert(featureTypesInModel.length == 33)\n    assert(featureNames sameElements featureNamesInModel)\n    assert(featureTypes sameElements featureTypesInModel)\n  }\n\n  test(\"Exception with clear message\") {\n    val df = smallMultiClassificationVector\n    val classifier = new XGBoostClassifier()\n      .setNumRound(2)\n      .setObjective(\"multi:softprob\")\n      .setNumClass(2)\n\n    val exception = intercept[SparkException] {\n      classifier.fit(df)\n    }\n\n    exception.getMessage.contains(\"SoftmaxMultiClassObj: label must be in [0, num_class).\")\n  }\n\n  test(\"Model trained on vector can transform on array/columnar input\") {\n    val vectorDf = smallBinaryClassificationVector\n    val classifier = new XGBoostClassifier().setNumRound(2)\n\n    // The model is trained with vector as the input\n    val model = classifier.fit(vectorDf)\n\n    val columnarDf = smallBinaryClassificationColumnar\n\n    // Model is trained with vector input, it doesn't have columnar input information\n    val thrown = intercept[IllegalArgumentException] {\n      model.transform(columnarDf).collect()\n    }\n    assert(thrown.getMessage.contains(\"features does not exist\"))\n\n    // Transform on columnar input\n    model.copy(ParamMap.empty)\n      .setFeaturesCol(Array(\"c1\", \"c2\", \"c3\"))\n      .transform(columnarDf)\n      .collect()\n\n    // Transform on array input\n    val arrayDf = smallBinaryClassificationArray\n    model.copy(ParamMap.empty).transform(arrayDf).collect()\n  }\n\n  test(\"Model trained on array can transform on vector/columnar input\") {\n    val arrayDf = smallBinaryClassificationArray\n    val classifier = new XGBoostClassifier().setNumRound(2)\n\n    // The model is trained with vector as the input\n    val model = classifier.fit(arrayDf)\n    val columnarDf = smallBinaryClassificationColumnar\n\n    // Model is trained with vector input, it doesn't have columnar input information\n    val thrown = intercept[IllegalArgumentException] {\n      model.transform(columnarDf).collect()\n    }\n    assert(thrown.getMessage.contains(\"features does not exist\"))\n\n    // Transform on columnar input\n    model.copy(ParamMap.empty)\n      .setFeaturesCol(Array(\"c1\", \"c2\", \"c3\"))\n      .transform(columnarDf)\n      .collect()\n\n    // Transform on vector input\n    val vectorDf = smallBinaryClassificationVector\n    model.copy(ParamMap.empty).transform(vectorDf).collect()\n  }\n\n  test(\"Model trained on columnar can transform on array/vector input\") {\n    val columnarDf = smallBinaryClassificationColumnar\n    val features = Array(\"c1\", \"c2\", \"c3\")\n    val classifier = new XGBoostClassifier().setNumRound(2).setFeaturesCol(features)\n    // The model is trained with vector as the input\n    val model = classifier.fit(columnarDf)\n\n    // Transform on vector df\n    val vectorDf = smallBinaryClassificationVector\n    model.transform(vectorDf).collect()\n\n    // Transform on array df\n    val arrayDf = smallBinaryClassificationArray\n    model.transform(arrayDf).collect()\n  }\n\n  test(\"Fit and transform with columnar input\") {\n    val df = smallBinaryClassificationColumnar\n\n    val estimator = new XGBoostClassifier()\n      .setFeaturesCol(Array(\"c1\", \"c2\", \"c3\"))\n      .setNumRound(1)\n\n    // without any issue\n    val model = estimator.fit(df)\n    assert(model.getFeaturesCols sameElements Array(\"c1\", \"c2\", \"c3\"))\n\n    val transformedDF = model.transform(df)\n    assert(transformedDF.schema.names.contains(\"c1\"))\n    assert(transformedDF.schema.names.contains(\"c2\"))\n    assert(transformedDF.schema.names.contains(\"c3\"))\n    assert(!transformedDF.schema.names.contains(Utils.TMP_FEATURE_ARRAY_NAME))\n  }\n\n  test(\"Support columnar\") {\n    val df = smallBinaryClassificationColumnar\n\n    val classifier = new XGBoostClassifier().setFeaturesCol(Array(\"c1\", \"c2\", \"c3\"))\n    assert(classifier.getFeaturesCols sameElements Array(\"c1\", \"c2\", \"c3\"))\n\n    val (processed, _) = classifier.preprocess(df)\n    assert(!processed.schema.contains(\"c1\"))\n    assert(!processed.schema.contains(\"c2\"))\n    assert(!processed.schema.contains(\"c3\"))\n\n    val matched = processed.schema(Utils.TMP_FEATURE_ARRAY_NAME).dataType match {\n      case ArrayType(FloatType, _) => true\n      case _ => false\n    }\n    assert(matched)\n  }\n\n  test(\"Support array(float)\") {\n    val df = smallBinaryClassificationArray\n    val matched = df.schema(\"features\").dataType match {\n      case ArrayType(DoubleType, _) => true\n      case _ => false\n    }\n    assert(matched)\n\n    val newDf = df.withColumn(\"features\", col(\"features\").cast(ArrayType(FloatType)))\n    val matched1 = newDf.schema(\"features\").dataType match {\n      case ArrayType(FloatType, _) => true\n      case _ => false\n    }\n    assert(matched1)\n\n    val classifier = new XGBoostClassifier()\n    assert(classifier.featureIsArrayType(df.schema))\n\n    val (processed, _) = classifier.preprocess(df)\n    val matched2 = processed.schema(\"features\").dataType match {\n      case ArrayType(FloatType, _) => true\n      case _ => false\n    }\n    assert(matched2)\n  }\n\n  test(\"Support array(double)\") {\n    val df = smallBinaryClassificationArray\n    val matched = df.schema(\"features\").dataType match {\n      case ArrayType(DoubleType, _) => true\n      case _ => false\n    }\n    assert(matched)\n\n    val classifier = new XGBoostClassifier()\n    assert(classifier.featureIsArrayType(df.schema))\n\n    val (processed, _) = classifier.preprocess(df)\n    val matched1 = processed.schema(\"features\").dataType match {\n      case ArrayType(FloatType, _) => true\n      case _ => false\n    }\n    assert(matched1)\n  }\n\n  test(\"Fit and transform with array type\") {\n    val df = smallBinaryClassificationArray\n    val classifier = new XGBoostClassifier().setNumRound(2)\n    val transformedDf = classifier.fit(df).transform(df)\n\n    // transform shouldn't change the features type\n    val matched = transformedDf.schema(\"features\").dataType match {\n      case ArrayType(DoubleType, _) => true\n      case _ => false\n    }\n    assert(matched)\n\n    // No exception happened\n    transformedDf.collect()\n  }\n\n  test(\"Fit with array and transform with vector type\") {\n    val df = smallBinaryClassificationArray\n    val classifier = new XGBoostClassifier().setNumRound(2)\n    val model = classifier.fit(df)\n\n    val vectorDf = smallBinaryClassificationVector\n    assert(SparkUtils.isVectorType(vectorDf.schema(\"features\").dataType))\n\n    val transformedDf = model.transform(vectorDf)\n    assert(SparkUtils.isVectorType(transformedDf.schema(\"features\").dataType))\n\n    // No exception\n    transformedDf.collect()\n  }\n\n  test(\"Fit with vector and transform with array type\") {\n    val vectorDf = smallBinaryClassificationVector\n\n    val classifier = new XGBoostClassifier().setNumRound(2)\n    val model = classifier.fit(vectorDf)\n\n    val arrayDf = smallBinaryClassificationArray\n    assert(classifier.featureIsArrayType(arrayDf.schema))\n\n    val transformedDf = model.transform(arrayDf)\n    assert(classifier.featureIsArrayType(transformedDf.schema))\n\n    // No exception\n    transformedDf.collect()\n  }\n\n  test(\"Pipeline with columnar input for Regressor and Ranker\") {\n    val df = ss.createDataFrame(sc.parallelize(Seq(\n      (0.0, 1, 2, 1.0, 2.0, 3.0),\n      (1.0, 0, 5, 1.0, 2.0, 3.0),\n      (2.0, 2, 7, 1.0, 2.0, 3.0)\n    ))).toDF(\"label\", \"group\", \"weight\", \"c1\", \"c2\", \"c3\")\n\n    // XGBoostRegressor with columnar features\n    val regressor = new XGBoostRegressor()\n      .setNumRound(1)\n      .setNumWorkers(1)\n      .setFeaturesCol(Array(\"c1\", \"c2\", \"c3\"))\n\n    val regressorPipeline = new Pipeline().setStages(Array(regressor))\n    val regressorFit = regressorPipeline.fit(df)\n    val regressorPredictions = regressorFit.transform(df)\n    assert(regressorPredictions.count() === 3)\n\n    // XGBoostRanker with columnar features (requires group column)\n    val ranker = new XGBoostRanker()\n      .setNumRound(1)\n      .setNumWorkers(1)\n      .setGroupCol(\"group\")\n      .setFeaturesCol(Array(\"c1\", \"c2\", \"c3\"))\n\n    val rankerPipeline = new Pipeline().setStages(Array(ranker))\n    val rankerFit = rankerPipeline.fit(df)\n    val rankerPredictions = rankerFit.transform(df)\n    assert(rankerPredictions.count() === 3)\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostParamsSuite.scala",
    "content": "/*\n Copyright (c) 2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark\n\nimport scala.util.Try\n\nimport org.scalatest.funsuite.AnyFunSuite\n\n\nclass XGBoostParamsSuite extends AnyFunSuite with PerTest with TmpFolderPerSuite {\n\n  test(\"invalid parameters\") {\n    val estimator = new XGBoostClassifier()\n\n    // We didn't set it by default\n    var thrown = intercept[RuntimeException] {\n      estimator.getCacheHostRatio\n    }\n    assert(thrown.getMessage.contains(\"Failed to find a default value for cacheHostRatio\"))\n\n    val v = Try(estimator.getCacheHostRatio).getOrElse(Float.NaN)\n    assert(v.equals(Float.NaN))\n\n    // We didn't set it by default\n    thrown = intercept[RuntimeException] {\n      estimator.setCacheHostRatio(-1.0f)\n    }\n    assert(thrown.getMessage.contains(\"parameter cacheHostRatio given invalid value -1.0\"))\n\n    Seq(0.0f, 0.2f, 1.0f).forall(v => {\n      estimator.setCacheHostRatio(v)\n      estimator.getCacheHostRatio == v\n    })\n\n    estimator.setCacheHostRatio(0.66f)\n    val v1 = Try(estimator.getCacheHostRatio).getOrElse(Float.NaN)\n    assert(v1 == 0.66f)\n  }\n\n  test(\"setNumEarlyStoppingRounds\") {\n    val estimator = new XGBoostClassifier()\n    assert(estimator.getNumEarlyStoppingRounds == 0)\n    estimator.setNumEarlyStoppingRounds(10)\n    assert(estimator.getNumEarlyStoppingRounds == 10)\n  }\n\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRankerSuite.scala",
    "content": "/*\n Copyright (c) 2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark\n\nimport java.io.File\n\nimport scala.collection.mutable.ArrayBuffer\n\nimport org.apache.spark.ml.linalg.{DenseVector, Vectors}\nimport org.apache.spark.ml.param.ParamMap\nimport org.apache.spark.sql.{DataFrame, Dataset, Row}\nimport org.scalatest.funsuite.AnyFunSuite\n\nimport ml.dmlc.xgboost4j.scala.{DMatrix, XGBoost => ScalaXGBoost}\nimport ml.dmlc.xgboost4j.scala.spark.Regression.Ranking\nimport ml.dmlc.xgboost4j.scala.spark.params.LearningTaskParams.RANKER_OBJS\nimport ml.dmlc.xgboost4j.scala.spark.params.XGBoostParams\n\nclass XGBoostRankerSuite extends AnyFunSuite with PerTest with TmpFolderPerSuite {\n\n  test(\"XGBoostRanker copy\") {\n    val ranker = new XGBoostRanker().setNthread(2).setNumWorkers(10)\n    val rankertCopied = ranker.copy(ParamMap.empty)\n\n    assert(ranker.uid === rankertCopied.uid)\n    assert(ranker.getNthread === rankertCopied.getNthread)\n    assert(ranker.getNumWorkers === ranker.getNumWorkers)\n  }\n\n  test(\"XGBoostRankerModel copy\") {\n    val model = new XGBoostRankerModel(\"hello\").setNthread(2).setNumWorkers(10)\n    val modelCopied = model.copy(ParamMap.empty)\n    assert(model.uid === modelCopied.uid)\n    assert(model.getNthread === modelCopied.getNthread)\n    assert(model.getNumWorkers === modelCopied.getNumWorkers)\n  }\n\n  test(\"read/write\") {\n    val trainDf = smallGroupVector\n    val xgbParams: Map[String, Any] = Map(\n      \"max_depth\" -> 5,\n      \"eta\" -> 0.2,\n      \"objective\" -> \"rank:ndcg\"\n    )\n\n    def check(xgboostParams: XGBoostParams[_]): Unit = {\n      assert(xgboostParams.getMaxDepth === 5)\n      assert(xgboostParams.getEta === 0.2)\n      assert(xgboostParams.getObjective === \"rank:ndcg\")\n    }\n\n    val rankerPath = new File(tempDir.toFile, \"ranker\").getPath\n    val ranker = new XGBoostRanker(xgbParams).setNumRound(1).setGroupCol(\"group\")\n    check(ranker)\n    assert(ranker.getGroupCol === \"group\")\n\n    ranker.write.overwrite().save(rankerPath)\n    val loadedRanker = XGBoostRanker.load(rankerPath)\n    check(loadedRanker)\n    assert(loadedRanker.getGroupCol === \"group\")\n\n    val model = loadedRanker.fit(trainDf)\n    check(model)\n    assert(model.getGroupCol === \"group\")\n\n    val modelPath = new File(tempDir.toFile, \"model\").getPath\n    model.write.overwrite().save(modelPath)\n    val modelLoaded = XGBoostRankerModel.load(modelPath)\n    check(modelLoaded)\n    assert(modelLoaded.getGroupCol === \"group\")\n  }\n\n  test(\"validate\") {\n    val trainDf = smallGroupVector\n    val ranker = new XGBoostRanker()\n    // must define group column\n    intercept[IllegalArgumentException](\n      ranker.validate(trainDf)\n    )\n    val ranker1 = new XGBoostRanker().setGroupCol(\"group\")\n    ranker1.validate(trainDf)\n    assert(ranker1.getObjective === \"rank:ndcg\")\n  }\n\n  test(\"XGBoostRankerModel transformed schema\") {\n    val trainDf = smallGroupVector\n    val ranker = new XGBoostRanker().setGroupCol(\"group\").setNumRound(1)\n    val model = ranker.fit(trainDf)\n    var out = model.transform(trainDf)\n    // Transform should not discard the other columns of the transforming dataframe\n    Seq(\"label\", \"group\", \"margin\", \"weight\", \"features\").foreach { v =>\n      assert(out.schema.names.contains(v))\n    }\n    // Ranker does not have extra columns\n    Seq(\"rawPrediction\", \"probability\").foreach { v =>\n      assert(!out.schema.names.contains(v))\n    }\n    assert(out.schema.names.contains(\"prediction\"))\n    assert(out.schema.names.length === 6)\n    model.setLeafPredictionCol(\"leaf\").setContribPredictionCol(\"contrib\")\n    out = model.transform(trainDf)\n    assert(out.schema.names.contains(\"leaf\"))\n    assert(out.schema.names.contains(\"contrib\"))\n  }\n\n  test(\"Supported objectives\") {\n    val ranker = new XGBoostRanker().setGroupCol(\"group\")\n    val df = smallGroupVector\n    RANKER_OBJS.foreach { obj =>\n      ranker.setObjective(obj)\n      ranker.validate(df)\n    }\n\n    ranker.setObjective(\"binary:logistic\")\n    intercept[IllegalArgumentException](\n      ranker.validate(df)\n    )\n  }\n\n  test(\"The group col should be sorted in each partition\") {\n    val trainingDF = buildDataFrameWithGroup(Ranking.train)\n\n    val ranker = new XGBoostRanker()\n      .setNumRound(1)\n      .setNumWorkers(numWorkers)\n      .setGroupCol(\"group\")\n\n    val (df, _) = ranker.preprocess(trainingDF)\n    val groupId = df.schema.fieldIndex(\"group\")\n    df.rdd.foreachPartition { iter => {\n      var prevGroup = Int.MinValue\n      while (iter.hasNext) {\n        val curr = iter.next()\n        val group = curr.asInstanceOf[Row].getAs[Int](groupId)\n        assert(prevGroup <= group)\n        prevGroup = group\n      }\n    }}\n  }\n\n  test(\"Same group must be in the same partition\") {\n    val spark = ss\n    import spark.implicits._\n    val num_workers = 3\n    val df = ss.createDataFrame(sc.parallelize(Seq(\n      (0.1, Vectors.dense(1.0, 2.0, 3.0), 0),\n      (0.1, Vectors.dense(0.0, 0.0, 0.0), 0),\n      (0.1, Vectors.dense(0.0, 3.0, 0.0), 0),\n      (0.1, Vectors.dense(2.0, 0.0, 4.0), 1),\n      (0.1, Vectors.dense(0.2, 1.2, 2.0), 1),\n      (0.1, Vectors.dense(0.5, 2.2, 1.7), 1),\n      (0.1, Vectors.dense(0.5, 2.2, 1.7), 2),\n      (0.1, Vectors.dense(0.5, 2.2, 1.7), 2),\n      (0.1, Vectors.dense(0.5, 2.2, 1.7), 2)), 1)).toDF(\"label\", \"features\", \"group\")\n\n    // The original pattern will repartition df in a RoundRobin manner\n    val oriRows = df.repartition(num_workers)\n      .sortWithinPartitions(df.col(\"group\"))\n      .select(\"group\")\n      .mapPartitions { case iter =>\n        val tmp: ArrayBuffer[Int] = ArrayBuffer.empty\n        while (iter.hasNext) {\n          val r = iter.next()\n          tmp.append(r.getInt(0))\n        }\n        Iterator.single(tmp.mkString(\",\"))\n      }.collect()\n    assert(oriRows.length == 3)\n    assert(oriRows.contains(\"0,1,2\"))\n\n    // The fix has replaced repartition with repartitionByRange which will put the\n    // instances with same group into the same partition\n    val ranker = new XGBoostRanker().setGroupCol(\"group\").setNumWorkers(num_workers)\n    val (processedDf, _) = ranker.preprocess(df)\n    val rows = processedDf\n      .select(\"group\")\n      .mapPartitions { case iter =>\n        val tmp: ArrayBuffer[Int] = ArrayBuffer.empty\n        while (iter.hasNext) {\n          val r = iter.next()\n          tmp.append(r.getInt(0))\n        }\n        Iterator.single(tmp.mkString(\",\"))\n      }.collect()\n\n    rows.forall(Seq(\"0,0,0\", \"1,1,1\", \"2,2,2\").contains)\n  }\n\n  private def runLengthEncode(input: Seq[Int]): Seq[Int] = {\n    if (input.isEmpty) return Seq(0)\n\n    input.indices\n      .filter(i => i == 0 || input(i) != input(i - 1)) :+ input.length\n  }\n\n  private def runRanker(ranker: XGBoostRanker, dataset: Dataset[_]): (Array[Float], Array[Int]) = {\n    val (df, indices) = ranker.preprocess(dataset)\n    val rdd = ranker.toRdd(df, indices)\n    val result = rdd.mapPartitions { iter =>\n      if (iter.hasNext) {\n        val watches = iter.next()\n        val dm = watches.toMap(Utils.TRAIN_NAME)\n        val weight = dm.getWeight\n        val group = dm.getGroup\n        watches.delete()\n        Iterator.single((weight, group))\n      } else {\n        Iterator.empty\n      }\n    }.collect()\n\n    val weight: ArrayBuffer[Float] = ArrayBuffer.empty\n    val group: ArrayBuffer[Int] = ArrayBuffer.empty\n\n    for (row <- result) {\n      weight.append(row._1: _*)\n      group.append(row._2: _*)\n    }\n    (weight.toArray, group.toArray)\n  }\n\n  Seq(None, Some(\"weight\")).foreach { weightCol => {\n    val msg = weightCol.map(_ => \"with weight\").getOrElse(\"without weight\")\n    test(s\"to RDD watches with group $msg\") {\n      // One instance without setting weight\n      var df = ss.createDataFrame(sc.parallelize(Seq(\n        (1.0, 0, 10, Vectors.dense(Array(1.0, 2.0, 3.0)))\n      ))).toDF(\"label\", \"group\", \"weight\", \"features\")\n\n      val ranker = new XGBoostRanker()\n        .setLabelCol(\"label\")\n        .setFeaturesCol(\"features\")\n        .setGroupCol(\"group\")\n        .setNumWorkers(1)\n\n      weightCol.foreach(ranker.setWeightCol)\n\n      val (weights, groupSize) = runRanker(ranker, df)\n      val expectedWeight = weightCol.map(_ => Array(10.0f)).getOrElse(Array(1.0f))\n      assert(weights === expectedWeight)\n      assert(groupSize === runLengthEncode(Seq(0)))\n\n      df = ss.createDataFrame(sc.parallelize(Seq(\n        (1.0, 1, 2, Vectors.dense(Array(1.0, 2.0, 3.0))),\n        (2.0, 1, 2, Vectors.dense(Array(1.0, 2.0, 3.0))),\n        (1.0, 0, 5, Vectors.dense(Array(1.0, 2.0, 3.0))),\n        (0.0, 1, 2, Vectors.dense(Array(1.0, 2.0, 3.0))),\n        (1.0, 0, 5, Vectors.dense(Array(1.0, 2.0, 3.0))),\n        (2.0, 2, 7, Vectors.dense(Array(1.0, 2.0, 3.0)))\n      ))).toDF(\"label\", \"group\", \"weight\", \"features\")\n\n      val groups = Array(1, 1, 0, 1, 0, 2).sorted\n      val (weights1, groupSize1) = runRanker(ranker, df)\n      val expectedWeight1 = weightCol.map(_ => Array(5.0f, 2.0f, 7.0f))\n        .getOrElse(groups.distinct.map(_ => 1.0f))\n\n      assert(groupSize1 === runLengthEncode(groups))\n      assert(weights1 === expectedWeight1)\n    }\n  }\n  }\n\n  test(\"XGBoost-Spark output should match XGBoost4j\") {\n    val trainingDM = new DMatrix(Ranking.train.iterator)\n    val weights = Ranking.trainGroups.distinct.map(_ => 1.0f).toArray\n    trainingDM.setQueryId(Ranking.trainGroups.toArray)\n    trainingDM.setWeight(weights)\n\n    val testDM = new DMatrix(Ranking.test.iterator)\n    val trainingDF = buildDataFrameWithGroup(Ranking.train)\n    val testDF = buildDataFrameWithGroup(Ranking.test)\n    val paramMap = Map(\"objective\" -> \"rank:ndcg\")\n    checkResultsWithXGBoost4j(trainingDM, testDM, trainingDF, testDF, 5, paramMap)\n  }\n\n  test(\"XGBoost-Spark output with weight should match XGBoost4j\") {\n    val trainingDM = new DMatrix(Ranking.trainWithWeight.iterator)\n    trainingDM.setQueryId(Ranking.trainGroups.toArray)\n    trainingDM.setWeight(Ranking.trainGroups.distinct.map(_.toFloat).toArray)\n\n    val testDM = new DMatrix(Ranking.test.iterator)\n    val trainingDF = buildDataFrameWithGroup(Ranking.trainWithWeight)\n    val testDF = buildDataFrameWithGroup(Ranking.test)\n    val paramMap = Map(\"objective\" -> \"rank:ndcg\")\n    checkResultsWithXGBoost4j(trainingDM, testDM, trainingDF, testDF,\n      5, paramMap, Some(\"weight\"))\n  }\n\n  private def checkResultsWithXGBoost4j(\n      trainingDM: DMatrix,\n      testDM: DMatrix,\n      trainingDF: DataFrame,\n      testDF: DataFrame,\n      round: Int = 5,\n      xgbParams: Map[String, Any] = Map.empty,\n      weightCol: Option[String] = None): Unit = {\n    val paramMap = Map(\n      \"eta\" -> \"1\",\n      \"max_depth\" -> \"6\",\n      \"base_score\" -> 0.5,\n      \"max_bin\" -> 16) ++ xgbParams\n    val xgb4jModel = ScalaXGBoost.train(trainingDM, paramMap, round)\n\n    val ranker = new XGBoostRanker(paramMap)\n      .setNumRound(round)\n      // If we use multi workers to train the ranking, the result probably will be different\n      .setNumWorkers(1)\n      .setLeafPredictionCol(\"leaf\")\n      .setContribPredictionCol(\"contrib\")\n      .setGroupCol(\"group\")\n    weightCol.foreach(weight => ranker.setWeightCol(weight))\n\n    def checkEqual(left: Array[Array[Float]], right: Map[Int, Array[Float]]) = {\n      assert(left.size === right.size)\n      left.zipWithIndex.foreach { case (leftValue, index) =>\n        assert(leftValue.sameElements(right(index)))\n      }\n    }\n\n    val xgbSparkModel = ranker.fit(trainingDF)\n    val rows = xgbSparkModel.transform(testDF).collect()\n\n    // Check Leaf\n    val xgb4jLeaf = xgb4jModel.predictLeaf(testDM)\n    val xgbSparkLeaf = rows.map(row =>\n      (row.getAs[Int](\"id\"), row.getAs[DenseVector](\"leaf\").toArray.map(_.toFloat))).toMap\n    checkEqual(xgb4jLeaf, xgbSparkLeaf)\n\n    // Check contrib\n    val xgb4jContrib = xgb4jModel.predictContrib(testDM)\n    val xgbSparkContrib = rows.map(row =>\n      (row.getAs[Int](\"id\"), row.getAs[DenseVector](\"contrib\").toArray.map(_.toFloat))).toMap\n    checkEqual(xgb4jContrib, xgbSparkContrib)\n\n    // Check prediction\n    val xgb4jPred = xgb4jModel.predict(testDM)\n    val xgbSparkPred = rows.map(row => {\n      val pred = row.getAs[Double](\"prediction\").toFloat\n      (row.getAs[Int](\"id\"), Array(pred))\n    }).toMap\n    checkEqual(xgb4jPred, xgbSparkPred)\n  }\n\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressorSuite.scala",
    "content": "/*\n Copyright (c) 2014-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark\n\nimport java.io.File\n\nimport org.apache.spark.ml.linalg.DenseVector\nimport org.apache.spark.ml.param.ParamMap\nimport org.apache.spark.sql.DataFrame\nimport org.scalatest.funsuite.AnyFunSuite\n\nimport ml.dmlc.xgboost4j.scala.{DMatrix, XGBoost => ScalaXGBoost}\nimport ml.dmlc.xgboost4j.scala.spark.params.LearningTaskParams.REGRESSION_OBJS\nimport ml.dmlc.xgboost4j.scala.spark.params.XGBoostParams\n\nclass XGBoostRegressorSuite extends AnyFunSuite with PerTest with TmpFolderPerSuite {\n  test(\"XGBoostRegressor copy\") {\n    val regressor = new XGBoostRegressor().setNthread(2).setNumWorkers(10)\n    val regressortCopied = regressor.copy(ParamMap.empty)\n\n    assert(regressor.uid === regressortCopied.uid)\n    assert(regressor.getNthread === regressortCopied.getNthread)\n    assert(regressor.getNumWorkers === regressor.getNumWorkers)\n  }\n\n  test(\"XGBoostRegressionModel copy\") {\n    val model = new XGBoostRegressionModel(\"hello\").setNthread(2).setNumWorkers(10)\n    val modelCopied = model.copy(ParamMap.empty)\n    assert(model.uid === modelCopied.uid)\n    assert(model.getNthread === modelCopied.getNthread)\n    assert(model.getNumWorkers === modelCopied.getNumWorkers)\n  }\n\n  test(\"read/write\") {\n    val trainDf = smallBinaryClassificationVector\n    val xgbParams: Map[String, Any] = Map(\n      \"max_depth\" -> 5,\n      \"eta\" -> 0.2\n    )\n\n    def check(xgboostParams: XGBoostParams[_]): Unit = {\n      assert(xgboostParams.getMaxDepth === 5)\n      assert(xgboostParams.getEta === 0.2)\n      assert(xgboostParams.getObjective === \"reg:squarederror\")\n    }\n\n    val regressorPath = new File(tempDir.toFile, \"regressor\").getPath\n    val regressor = new XGBoostRegressor(xgbParams).setNumRound(1)\n    check(regressor)\n\n    regressor.write.overwrite().save(regressorPath)\n    val loadedRegressor = XGBoostRegressor.load(regressorPath)\n    check(loadedRegressor)\n\n    val model = loadedRegressor.fit(trainDf)\n    check(model)\n\n    val modelPath = new File(tempDir.toFile, \"model\").getPath\n    model.write.overwrite().save(modelPath)\n    val modelLoaded = XGBoostRegressionModel.load(modelPath)\n    check(modelLoaded)\n  }\n\n  test(\"XGBoostRegressionModel transformed schema\") {\n    val trainDf = smallBinaryClassificationVector\n    val regressor = new XGBoostRegressor().setNumRound(1)\n    val model = regressor.fit(trainDf)\n    var out = model.transform(trainDf)\n    // Transform should not discard the other columns of the transforming dataframe\n    Seq(\"label\", \"margin\", \"weight\", \"features\").foreach { v =>\n      assert(out.schema.names.contains(v))\n    }\n    // Regressor does not have extra columns\n    Seq(\"rawPrediction\", \"probability\").foreach { v =>\n      assert(!out.schema.names.contains(v))\n    }\n    assert(out.schema.names.contains(\"prediction\"))\n    assert(out.schema.names.length === 5)\n    model.setLeafPredictionCol(\"leaf\").setContribPredictionCol(\"contrib\")\n    out = model.transform(trainDf)\n    assert(out.schema.names.contains(\"leaf\"))\n    assert(out.schema.names.contains(\"contrib\"))\n  }\n\n  test(\"Supported objectives\") {\n    val regressor = new XGBoostRegressor()\n    val df = smallMultiClassificationVector\n    REGRESSION_OBJS.foreach { obj =>\n      regressor.setObjective(obj)\n      regressor.validate(df)\n    }\n\n    regressor.setObjective(\"binary:logistic\")\n    intercept[IllegalArgumentException](\n      regressor.validate(df)\n    )\n  }\n\n  test(\"XGBoost-Spark output should match XGBoost4j\") {\n    val trainingDM = new DMatrix(Regression.train.iterator)\n    val testDM = new DMatrix(Regression.test.iterator)\n    val trainingDF = buildDataFrame(Regression.train)\n    val testDF = buildDataFrame(Regression.test)\n    val paramMap = Map(\"objective\" -> \"reg:squarederror\")\n    checkResultsWithXGBoost4j(trainingDM, testDM, trainingDF, testDF, 5, paramMap)\n  }\n\n  test(\"XGBoost-Spark output with weight should match XGBoost4j\") {\n    val trainingDM = new DMatrix(Regression.trainWithWeight.iterator)\n    trainingDM.setWeight(Regression.randomWeights)\n    val testDM = new DMatrix(Regression.test.iterator)\n    val trainingDF = buildDataFrame(Regression.trainWithWeight)\n    val testDF = buildDataFrame(Regression.test)\n    val paramMap = Map(\"objective\" -> \"reg:squarederror\")\n    checkResultsWithXGBoost4j(trainingDM, testDM, trainingDF, testDF,\n      5, paramMap, Some(\"weight\"))\n  }\n\n  private def checkResultsWithXGBoost4j(\n      trainingDM: DMatrix,\n      testDM: DMatrix,\n      trainingDF: DataFrame,\n      testDF: DataFrame,\n      round: Int = 5,\n      xgbParams: Map[String, Any] = Map.empty,\n      weightCol: Option[String] = None): Unit = {\n    val paramMap = Map(\n      \"eta\" -> \"1\",\n      \"max_depth\" -> \"6\",\n      \"base_score\" -> 0.5,\n      \"max_bin\" -> 16) ++ xgbParams\n    val xgb4jModel = ScalaXGBoost.train(trainingDM, paramMap, round)\n\n    val regressor = new XGBoostRegressor(paramMap)\n      .setNumRound(round)\n      .setNumWorkers(numWorkers)\n      .setLeafPredictionCol(\"leaf\")\n      .setContribPredictionCol(\"contrib\")\n    weightCol.foreach(weight => regressor.setWeightCol(weight))\n\n    def checkEqual(left: Array[Array[Float]], right: Map[Int, Array[Float]]) = {\n      assert(left.size === right.size)\n      left.zipWithIndex.foreach { case (leftValue, index) =>\n        assert(leftValue.sameElements(right(index)))\n      }\n    }\n\n    val xgbSparkModel = regressor.fit(trainingDF)\n    val rows = xgbSparkModel.transform(testDF).collect()\n\n    // Check Leaf\n    val xgb4jLeaf = xgb4jModel.predictLeaf(testDM)\n    val xgbSparkLeaf = rows.map(row =>\n      (row.getAs[Int](\"id\"), row.getAs[DenseVector](\"leaf\").toArray.map(_.toFloat))).toMap\n    checkEqual(xgb4jLeaf, xgbSparkLeaf)\n\n    // Check contrib\n    val xgb4jContrib = xgb4jModel.predictContrib(testDM)\n    val xgbSparkContrib = rows.map(row =>\n      (row.getAs[Int](\"id\"), row.getAs[DenseVector](\"contrib\").toArray.map(_.toFloat))).toMap\n    checkEqual(xgb4jContrib, xgbSparkContrib)\n\n    // Check prediction\n    val xgb4jPred = xgb4jModel.predict(testDM)\n    val xgbSparkPred = rows.map(row => {\n      val pred = row.getAs[Double](\"prediction\").toFloat\n      (row.getAs[Int](\"id\"), Array(pred))}).toMap\n    checkEqual(xgb4jPred, xgbSparkPred)\n  }\n\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostSuite.scala",
    "content": "/*\n Copyright (c) 2023-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark\n\nimport org.apache.spark.SparkConf\nimport org.apache.spark.rdd.RDD\nimport org.apache.spark.sql.SparkSession\nimport org.scalatest.funsuite.AnyFunSuite\n\nimport ml.dmlc.xgboost4j.scala.Booster\n\nclass XGBoostSuite extends AnyFunSuite with PerTest {\n\n  // Do not create spark context\n  override def beforeEach(): Unit = {}\n\n  test(\"skip stage-level scheduling\") {\n    val conf = new SparkConf()\n      .setMaster(\"spark://foo\")\n      .set(\"spark.executor.cores\", \"12\")\n      .set(\"spark.task.cpus\", \"1\")\n      .set(\"spark.executor.resource.gpu.amount\", \"1\")\n      .set(\"spark.task.resource.gpu.amount\", \"0.08\")\n\n    // the correct configurations should not skip stage-level scheduling\n    assert(!XGBoost.skipStageLevelScheduling(sparkVersion = \"3.4.0\", runOnGpu = true, conf))\n\n    // spark version < 3.4.0\n    assert(XGBoost.skipStageLevelScheduling(sparkVersion = \"3.3.0\", runOnGpu = true, conf))\n\n    // not run on GPU\n    assert(XGBoost.skipStageLevelScheduling(sparkVersion = \"3.4.0\", runOnGpu = false, conf))\n\n    // spark.executor.cores is not set\n    var badConf = conf.clone().remove(\"spark.executor.cores\")\n    assert(XGBoost.skipStageLevelScheduling(sparkVersion = \"3.4.0\", runOnGpu = true, badConf))\n\n    // spark.executor.cores=1\n    badConf = conf.clone().set(\"spark.executor.cores\", \"1\")\n    assert(XGBoost.skipStageLevelScheduling(sparkVersion = \"3.4.0\", runOnGpu = true, badConf))\n\n    // spark.executor.resource.gpu.amount is not set\n    badConf = conf.clone().remove(\"spark.executor.resource.gpu.amount\")\n    assert(XGBoost.skipStageLevelScheduling(sparkVersion = \"3.4.0\", runOnGpu = true, badConf))\n\n    // spark.executor.resource.gpu.amount>1\n    badConf = conf.clone().set(\"spark.executor.resource.gpu.amount\", \"2\")\n    assert(XGBoost.skipStageLevelScheduling(sparkVersion = \"3.4.0\", runOnGpu = true, badConf))\n\n    // spark.task.resource.gpu.amount is not set\n    badConf = conf.clone().remove(\"spark.task.resource.gpu.amount\")\n    assert(!XGBoost.skipStageLevelScheduling(sparkVersion = \"3.4.0\", runOnGpu = true, badConf))\n\n    // spark.task.resource.gpu.amount=1\n    badConf = conf.clone().set(\"spark.task.resource.gpu.amount\", \"1\")\n    assert(XGBoost.skipStageLevelScheduling(sparkVersion = \"3.4.0\", runOnGpu = true, badConf))\n\n    // yarn\n    badConf = conf.clone().setMaster(\"yarn\")\n    assert(XGBoost.skipStageLevelScheduling(sparkVersion = \"3.4.0\", runOnGpu = true, badConf))\n\n    // k8s\n    badConf = conf.clone().setMaster(\"k8s://\")\n    assert(XGBoost.skipStageLevelScheduling(sparkVersion = \"3.4.0\", runOnGpu = true, badConf))\n  }\n\n\n  object FakedXGBoost extends StageLevelScheduling {\n\n    // Do not skip stage-level scheduling for testing purposes.\n    override private[spark] def skipStageLevelScheduling(\n        sparkVersion: String,\n        runOnGpu: Boolean,\n        conf: SparkConf) = false\n  }\n\n  test(\"try stage-level scheduling without spark-rapids\") {\n\n    val builder = SparkSession.builder()\n      .master(s\"local-cluster[1, 4, 1024]\")\n      .appName(\"XGBoostSuite\")\n      .config(\"spark.ui.enabled\", false)\n      .config(\"spark.driver.memory\", \"512m\")\n      .config(\"spark.barrier.sync.timeout\", 10)\n      .config(\"spark.task.cpus\", 1)\n      .config(\"spark.executor.cores\", 4)\n      .config(\"spark.executor.resource.gpu.amount\", 1)\n      .config(\"spark.task.resource.gpu.amount\", 0.25)\n    val ss = builder.getOrCreate()\n    if (ss.version < \"3.4.1\") {\n      // Pass\n      ss.stop()\n    } else {\n      try {\n        val df = ss.range(1, 10)\n        val rdd = df.rdd\n\n        val runtimeParams = new XGBoostClassifier(\n          Map(\"device\" -> \"cuda\")).setNumWorkers(1).setNumRound(1)\n          .getRuntimeParameters(true)\n        assert(runtimeParams.runOnGpu)\n\n        val finalRDD = FakedXGBoost.tryStageLevelScheduling(ss.sparkContext, runtimeParams,\n          rdd.asInstanceOf[RDD[(Booster, Map[String, Array[Float]])]])\n\n        val taskResources = finalRDD.getResourceProfile().taskResources\n        assert(taskResources.contains(\"cpus\"))\n        assert(taskResources.get(\"cpus\").get.amount == 3)\n\n        assert(taskResources.contains(\"gpu\"))\n        assert(taskResources.get(\"gpu\").get.amount == 1.0)\n      } finally {\n        ss.stop()\n      }\n    }\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark-gpu/pom.xml",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project xmlns=\"http://maven.apache.org/POM/4.0.0\"\n         xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n         xsi:schemaLocation=\"http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd\">\n    <modelVersion>4.0.0</modelVersion>\n    <parent>\n        <groupId>ml.dmlc</groupId>\n        <artifactId>xgboost-jvm_2.12</artifactId>\n        <version>3.3.0-SNAPSHOT</version>\n    </parent>\n    <name>xgboost4j-spark-gpu</name>\n    <groupId>ml.dmlc</groupId>\n    <artifactId>xgboost4j-spark-gpu_2.12</artifactId>\n    <version>3.3.0-SNAPSHOT</version>\n    <description>JVM Package for XGBoost</description>\n    <url>https://github.com/dmlc/xgboost/tree/master/jvm-packages</url>\n    <licenses>\n        <license>\n            <name>The Apache License, Version 2.0</name>\n            <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>\n        </license>\n    </licenses>\n    <developers>\n        <developer>\n            <name>Bobby Wang</name>\n            <email>wbo4958@gmail.com</email>\n        </developer>\n        <developer>\n            <name>Jiaming Yuan</name>\n            <email>jm.yuan@outlook.com</email>\n        </developer>\n        <developer>\n            <name>Hyunsu Cho</name>\n            <email>chohyu01@cs.washington.edu</email>\n        </developer>\n        <developer>\n            <name>CodingCat</name>\n            <email>codingcat@apache.org</email>\n        </developer>\n    </developers>\n    <scm>\n        <connection>scm:git:git:/github.com/dmlc/xgboost.git</connection>\n        <developerConnection>scm:git:ssh://github.com/dmlc/xgboost.git</developerConnection>\n        <url>https://github.com/dmlc/xgboost</url>\n    </scm>\n    <build>\n        <plugins>\n            <plugin>\n                <groupId>org.apache.maven.plugins</groupId>\n                <artifactId>maven-assembly-plugin</artifactId>\n                <configuration>\n                    <skipAssembly>false</skipAssembly>\n                </configuration>\n            </plugin>\n            <plugin>\n                <groupId>org.apache.maven.plugins</groupId>\n                <artifactId>maven-shade-plugin</artifactId>\n                <configuration>\n                  <createDependencyReducedPom>true</createDependencyReducedPom>\n                  <useDependencyReducedPomInJar>true</useDependencyReducedPomInJar>\n                  <shadedArtifactAttached>false</shadedArtifactAttached>\n                  <artifactSet>\n                    <includes>\n                      <include>ml.dmlc:xgboost4j_${scala.binary.version}</include>\n                      <include>ml.dmlc:xgboost4j-spark_${scala.binary.version}</include>\n                    </includes>\n                  </artifactSet>\n                </configuration>\n                <executions>\n                  <execution>\n                    <phase>package</phase>\n                    <goals>\n                      <goal>shade</goal>\n                    </goals>\n                  </execution>\n                </executions>\n              </plugin>\n        </plugins>\n    </build>\n    <dependencies>\n        <dependency>\n            <groupId>ml.dmlc</groupId>\n            <artifactId>xgboost4j_2.12</artifactId>\n            <version>${project.version}</version>\n        </dependency>\n        <dependency>\n            <groupId>ml.dmlc</groupId>\n            <artifactId>xgboost4j-spark_2.12</artifactId>\n            <version>${project.version}</version>\n            <exclusions>\n              <exclusion>\n                  <groupId>ml.dmlc</groupId>\n                  <artifactId>xgboost4j_2.12</artifactId>\n              </exclusion>\n            </exclusions>\n        </dependency>\n        <dependency>\n            <groupId>org.apache.spark</groupId>\n            <artifactId>spark-core_${scala.binary.version}</artifactId>\n            <version>${spark.version.gpu}</version>\n            <scope>provided</scope>\n        </dependency>\n        <dependency>\n            <groupId>org.apache.spark</groupId>\n            <artifactId>spark-sql_${scala.binary.version}</artifactId>\n            <version>${spark.version.gpu}</version>\n            <scope>provided</scope>\n        </dependency>\n        <dependency>\n            <groupId>org.apache.spark</groupId>\n            <artifactId>spark-mllib_${scala.binary.version}</artifactId>\n            <version>${spark.version.gpu}</version>\n            <scope>provided</scope>\n        </dependency>\n        <dependency>\n          <groupId>com.nvidia</groupId>\n          <artifactId>rapids-4-spark_${scala.binary.version}</artifactId>\n          <version>${spark.rapids.version}</version>\n          <classifier>${spark.rapids.classifier}</classifier>\n          <scope>provided</scope>\n        </dependency>\n        <dependency>\n          <groupId>com.fasterxml.jackson.core</groupId>\n          <artifactId>jackson-databind</artifactId>\n          <version>${fasterxml.jackson.version}</version>\n          <scope>provided</scope>\n        </dependency>\n        <dependency>\n          <groupId>junit</groupId>\n          <artifactId>junit</artifactId>\n          <version>${junit.version}</version>\n          <scope>test</scope>\n        </dependency>\n    </dependencies>\n</project>\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark-gpu/src/main/java/ml/dmlc/xgboost4j/java/CudfColumn.java",
    "content": "/*\n Copyright (c) 2021-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.java;\n\nimport java.util.ArrayList;\nimport java.util.List;\n\nimport ai.rapids.cudf.BaseDeviceMemoryBuffer;\nimport ai.rapids.cudf.ColumnVector;\nimport ai.rapids.cudf.DType;\nimport com.fasterxml.jackson.annotation.JsonInclude;\nimport com.fasterxml.jackson.core.JsonProcessingException;\nimport com.fasterxml.jackson.databind.ObjectMapper;\n\n/**\n * CudfColumn is the CUDF column representing, providing the cuda array interface\n */\n@JsonInclude(JsonInclude.Include.NON_NULL)\npublic class CudfColumn extends Column {\n  private List<Long> shape = new ArrayList<>();   // row count\n  private List<Object> data = new ArrayList<>(); //  gpu data buffer address\n  private String typestr;\n  private int version = 1;\n  private CudfColumn mask = null;\n\n  public CudfColumn(long shape, long data, String typestr, int version) {\n    this.shape.add(shape);\n    this.data.add(data);\n    this.data.add(false);\n    this.typestr = typestr;\n    this.version = version;\n  }\n\n  /**\n   * Create CudfColumn according to ColumnVector\n   */\n  public static CudfColumn from(ColumnVector cv) {\n    BaseDeviceMemoryBuffer dataBuffer = cv.getData();\n    assert dataBuffer != null;\n\n    DType dType = cv.getType();\n    String typeStr = \"\";\n    if (dType == DType.FLOAT32 || dType == DType.FLOAT64 ||\n        dType == DType.TIMESTAMP_DAYS || dType == DType.TIMESTAMP_MICROSECONDS ||\n        dType == DType.TIMESTAMP_MILLISECONDS || dType == DType.TIMESTAMP_NANOSECONDS ||\n        dType == DType.TIMESTAMP_SECONDS) {\n      typeStr = \"<f\" + dType.getSizeInBytes();\n    } else if (dType == DType.BOOL8 || dType == DType.INT8 || dType == DType.INT16 ||\n        dType == DType.INT32 || dType == DType.INT64) {\n      typeStr = \"<i\" + dType.getSizeInBytes();\n    } else {\n      // Unsupported type.\n      throw new IllegalArgumentException(\"Unsupported data type: \" + dType);\n    }\n\n    CudfColumn data = new CudfColumn(cv.getRowCount(), dataBuffer.getAddress(), typeStr, 1);\n\n    BaseDeviceMemoryBuffer validBuffer = cv.getValid();\n    if (validBuffer != null && cv.getNullCount() != 0) {\n      CudfColumn mask = new CudfColumn(cv.getRowCount(), validBuffer.getAddress(), \"<t1\", 1);\n      data.setMask(mask);\n    }\n    return data;\n  }\n\n  public List<Long> getShape() {\n    return shape;\n  }\n\n  public List<Object> getData() {\n    return data;\n  }\n\n  public String getTypestr() {\n    return typestr;\n  }\n\n  public int getVersion() {\n    return version;\n  }\n\n  public CudfColumn getMask() {\n    return mask;\n  }\n\n  public void setMask(CudfColumn mask) {\n    this.mask = mask;\n  }\n\n  @Override\n  public String toJson() {\n    ObjectMapper mapper = new ObjectMapper();\n    mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL);\n    try {\n      List<CudfColumn> objects = new ArrayList<>(1);\n      objects.add(this);\n      return mapper.writeValueAsString(objects);\n    } catch (JsonProcessingException e) {\n      throw new RuntimeException(e);\n    }\n  }\n\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark-gpu/src/main/java/ml/dmlc/xgboost4j/java/CudfColumnBatch.java",
    "content": "/*\n Copyright (c) 2021-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.java;\n\nimport java.util.List;\nimport java.util.stream.Collectors;\nimport java.util.stream.IntStream;\n\nimport ai.rapids.cudf.Table;\nimport com.fasterxml.jackson.annotation.JsonIgnore;\nimport com.fasterxml.jackson.annotation.JsonInclude;\nimport com.fasterxml.jackson.core.JsonProcessingException;\nimport com.fasterxml.jackson.databind.ObjectMapper;\n\n/**\n * CudfColumnBatch wraps multiple CudfColumns to provide the cuda\n * array interface json string for all columns.\n */\npublic class CudfColumnBatch extends ColumnBatch {\n  @JsonIgnore\n  private final Table featureTable;\n  @JsonIgnore\n  private final Table labelTable;\n  @JsonIgnore\n  private final Table weightTable;\n  @JsonIgnore\n  private final Table baseMarginTable;\n  @JsonIgnore\n  private final Table qidTable;\n\n  private List<CudfColumn> features;\n  private List<CudfColumn> label;\n  private List<CudfColumn> weight;\n  private List<CudfColumn> baseMargin;\n  private List<CudfColumn> qid;\n\n  public CudfColumnBatch(Table featureTable, Table labelTable, Table weightTable,\n                         Table baseMarginTable, Table qidTable) {\n    this.featureTable = featureTable;\n    this.labelTable = labelTable;\n    this.weightTable = weightTable;\n    this.baseMarginTable = baseMarginTable;\n    this.qidTable = qidTable;\n\n    features = initializeCudfColumns(featureTable);\n    if (labelTable != null) {\n      assert labelTable.getNumberOfColumns() == 1;\n      label = initializeCudfColumns(labelTable);\n    }\n\n    if (weightTable != null) {\n      assert weightTable.getNumberOfColumns() == 1;\n      weight = initializeCudfColumns(weightTable);\n    }\n\n    if (baseMarginTable != null) {\n      baseMargin = initializeCudfColumns(baseMarginTable);\n    }\n\n    if (qidTable != null) {\n      qid = initializeCudfColumns(qidTable);\n    }\n\n  }\n\n  private List<CudfColumn> initializeCudfColumns(Table table) {\n    assert table != null && table.getNumberOfColumns() > 0;\n\n    return IntStream.range(0, table.getNumberOfColumns())\n      .mapToObj(table::getColumn)\n      .map(CudfColumn::from)\n      .collect(Collectors.toList());\n  }\n\n  // visible for testing\n  public Table getFeatureTable() {\n    return featureTable;\n  }\n\n  // visible for testing\n  public Table getLabelTable() {\n    return labelTable;\n  }\n\n\n  public List<CudfColumn> getFeatures() {\n    return features;\n  }\n\n  public List<CudfColumn> getLabel() {\n    return label;\n  }\n\n  public List<CudfColumn> getWeight() {\n    return weight;\n  }\n\n  public List<CudfColumn> getBaseMargin() {\n    return baseMargin;\n  }\n\n  public List<CudfColumn> getQid() {\n    return qid;\n  }\n\n  public String toJson() {\n    ObjectMapper mapper = new ObjectMapper();\n    mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL);\n    try {\n      return mapper.writeValueAsString(this);\n    } catch (JsonProcessingException e) {\n      throw new RuntimeException(e);\n    }\n  }\n\n  @Override\n  public String toFeaturesJson() {\n    ObjectMapper mapper = new ObjectMapper();\n    try {\n      return mapper.writeValueAsString(features);\n    } catch (JsonProcessingException e) {\n      throw new RuntimeException(e);\n    }\n  }\n\n  @Override\n  public void close() {\n    if (featureTable != null) featureTable.close();\n    if (labelTable != null) labelTable.close();\n    if (weightTable != null) weightTable.close();\n    if (baseMarginTable != null) baseMarginTable.close();\n    if (qidTable != null) qidTable.close();\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark-gpu/src/main/java/ml/dmlc/xgboost4j/java/ExtMemQuantileDMatrix.java",
    "content": "/*\n Copyright (c) 2025 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java;\n\nimport java.util.Iterator;\nimport java.util.Map;\n\nimport com.fasterxml.jackson.core.JsonProcessingException;\nimport com.fasterxml.jackson.databind.ObjectMapper;\nimport com.fasterxml.jackson.databind.module.SimpleModule;\n\npublic class ExtMemQuantileDMatrix extends QuantileDMatrix {\n  // on_host is set to true by default as we only support GPU at the moment\n  // cache_prefix is not used yet since we have on_host=true.\n  public ExtMemQuantileDMatrix(Iterator<ColumnBatch> iter,\n      float missing,\n      int maxBin,\n      DMatrix ref,\n      int nthread,\n      int maxQuantileBatches,\n      long minCachePageBytes,\n      float cacheHostRatio) throws XGBoostError {\n    long[] out = new long[1];\n    long[] refHandle = null;\n    if (ref != null) {\n      refHandle = new long[1];\n      refHandle[0] = ref.getHandle();\n    }\n    String conf = this.getConfig(missing, maxBin, nthread,\n                                 maxQuantileBatches, minCachePageBytes, cacheHostRatio);\n    XGBoostJNI.checkCall(XGBoostJNI.XGExtMemQuantileDMatrixCreateFromCallback(\n        iter, refHandle, conf, out));\n    handle = out[0];\n  }\n\n  public ExtMemQuantileDMatrix(\n      Iterator<ColumnBatch> iter,\n      float missing,\n      int maxBin,\n      DMatrix ref) throws XGBoostError {\n    this(iter, missing, maxBin, ref, 0, -1, -1, Float.NaN);\n  }\n\n  public ExtMemQuantileDMatrix(\n      Iterator<ColumnBatch> iter,\n      float missing,\n      int maxBin) throws XGBoostError {\n    this(iter, missing, maxBin, null);\n  }\n\n  private String getConfig(float missing, int maxBin, int nthread,\n                           int maxQuantileBatches, long minCachePageBytes, float cacheHostRatio) {\n    Map<String, Object> conf = new java.util.HashMap<>();\n    conf.put(\"missing\", missing);\n    conf.put(\"max_bin\", maxBin);\n    conf.put(\"nthread\", nthread);\n\n    if (maxQuantileBatches > 0) {\n      conf.put(\"max_quantile_blocks\", maxQuantileBatches);\n    }\n    if (minCachePageBytes > 0) {\n      conf.put(\"min_cache_page_bytes\", minCachePageBytes);\n    }\n\n    if (cacheHostRatio >= 0.0 && cacheHostRatio <= 1.0) {\n      conf.put(\"cache_host_ratio\", cacheHostRatio);\n    }\n\n    conf.put(\"on_host\", true);\n    conf.put(\"cache_prefix\", \".\");\n    ObjectMapper mapper = new ObjectMapper();\n\n    // Handle NaN values. Jackson by default serializes NaN values into strings.\n    SimpleModule module = new SimpleModule();\n    module.addSerializer(Double.class, new F64NaNSerializer());\n    module.addSerializer(Float.class, new F32NaNSerializer());\n    mapper.registerModule(module);\n\n    try {\n      return mapper.writeValueAsString(conf);\n    } catch (JsonProcessingException e) {\n      throw new RuntimeException(\"Failed to serialize configuration\", e);\n    }\n  }\n};\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark-gpu/src/main/java/ml/dmlc/xgboost4j/java/QuantileDMatrix.java",
    "content": "/*\n Copyright (c) 2021-2025 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.java;\n\nimport java.io.IOException;\nimport java.util.Iterator;\nimport java.util.Map;\n\nimport com.fasterxml.jackson.core.JsonGenerator;\nimport com.fasterxml.jackson.core.JsonProcessingException;\nimport com.fasterxml.jackson.databind.JsonSerializer;\nimport com.fasterxml.jackson.databind.ObjectMapper;\nimport com.fasterxml.jackson.databind.SerializerProvider;\nimport com.fasterxml.jackson.databind.module.SimpleModule;\n\nclass F64NaNSerializer extends JsonSerializer<Double> {\n  @Override\n  public void serialize(Double value, JsonGenerator gen,\n                        SerializerProvider serializers) throws IOException {\n    if (value.isNaN()) {\n      gen.writeRawValue(\"NaN\"); // Write NaN without quotes\n    } else {\n      gen.writeNumber(value);\n    }\n  }\n}\n\nclass F32NaNSerializer extends JsonSerializer<Float> {\n  @Override\n  public void serialize(Float value, JsonGenerator gen,\n                        SerializerProvider serializers) throws IOException {\n    if (value.isNaN()) {\n      gen.writeRawValue(\"NaN\"); // Write NaN without quotes\n    } else {\n      gen.writeNumber(value);\n    }\n  }\n}\n\n/**\n * QuantileDMatrix will only be used to train\n */\npublic class QuantileDMatrix extends DMatrix {\n  // implicit constructor for the ext mem version of the QDM.\n  protected QuantileDMatrix() {\n    super(0);\n  }\n\n  /**\n   * Create QuantileDMatrix from iterator based on the cuda array interface\n   *\n   * @param iter    the XGBoost ColumnBatch batch to provide the corresponding cuda array interface\n   * @param missing the missing value\n   * @param maxBin  the max bin\n   * @param nthread the parallelism\n   * @throws XGBoostError\n   */\n  public QuantileDMatrix(\n      Iterator<ColumnBatch> iter,\n      float missing,\n      int maxBin,\n      int nthread) throws XGBoostError {\n    this(iter, null, missing, maxBin, nthread);\n  }\n\n  /**\n   * Create QuantileDMatrix from iterator based on the cuda array interface\n   *\n   * @param iter       the XGBoost ColumnBatch batch to provide the corresponding cuda array\n   *                   interface\n   * @param refDMatrix The reference QuantileDMatrix that provides quantile information, needed\n   *                   when creating validation/test dataset with QuantileDMatrix. Supplying the\n   *                   training DMatrix as a reference means that the same quantisation\n   *                   applied to the training data is applied to the validation/test data\n   * @param missing    the missing value\n   * @param maxBin     the max bin\n   * @param nthread    the parallelism\n   * @throws XGBoostError\n   */\n  public QuantileDMatrix(\n      Iterator<ColumnBatch> iter,\n      QuantileDMatrix refDMatrix,\n      float missing,\n      int maxBin,\n      int nthread) throws XGBoostError {\n    super(0);\n    long[] out = new long[1];\n    String conf = getConfig(missing, maxBin, nthread);\n    long[] ref = null;\n    if (refDMatrix != null) {\n      ref = new long[1];\n      ref[0] = refDMatrix.getHandle();\n    }\n    XGBoostJNI.checkCall(XGBoostJNI.XGQuantileDMatrixCreateFromCallback(\n        iter, ref, conf, out));\n    handle = out[0];\n  }\n\n  @Override\n  public void setLabel(Column column) throws XGBoostError {\n    throw new XGBoostError(\"QuantileDMatrix does not support setLabel.\");\n  }\n\n  @Override\n  public void setWeight(Column column) throws XGBoostError {\n    throw new XGBoostError(\"QuantileDMatrix does not support setWeight.\");\n  }\n\n  @Override\n  public void setBaseMargin(Column column) throws XGBoostError {\n    throw new XGBoostError(\"QuantileDMatrix does not support setBaseMargin.\");\n  }\n\n  @Override\n  public void setLabel(float[] labels) throws XGBoostError {\n    throw new XGBoostError(\"QuantileDMatrix does not support setLabel.\");\n  }\n\n  @Override\n  public void setWeight(float[] weights) throws XGBoostError {\n    throw new XGBoostError(\"QuantileDMatrix does not support setWeight.\");\n  }\n\n  @Override\n  public void setBaseMargin(float[] baseMargin) throws XGBoostError {\n    throw new XGBoostError(\"QuantileDMatrix does not support setBaseMargin.\");\n  }\n\n  @Override\n  public void setBaseMargin(float[][] baseMargin) throws XGBoostError {\n    throw new XGBoostError(\"QuantileDMatrix does not support setBaseMargin.\");\n  }\n\n  @Override\n  public void setGroup(int[] group) throws XGBoostError {\n    throw new XGBoostError(\"QuantileDMatrix does not support setGroup.\");\n  }\n\n  private String getConfig(float missing, int maxBin, int nthread) {\n    Map<String, Object> conf = new java.util.HashMap<>();\n    conf.put(\"missing\", missing);\n    conf.put(\"max_bin\", maxBin);\n    conf.put(\"nthread\", nthread);\n    ObjectMapper mapper = new ObjectMapper();\n\n    // Handle NaN values. Jackson by default serializes NaN values into strings.\n    SimpleModule module = new SimpleModule();\n    module.addSerializer(Double.class, new F64NaNSerializer());\n    module.addSerializer(Float.class, new F32NaNSerializer());\n    mapper.registerModule(module);\n\n    try {\n      return mapper.writeValueAsString(conf);\n    } catch (JsonProcessingException e) {\n      throw new RuntimeException(\"Failed to serialize configuration\", e);\n    }\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark-gpu/src/main/resources/META-INF/services/ml.dmlc.xgboost4j.scala.spark.XGBoostPlugin",
    "content": "ml.dmlc.xgboost4j.scala.spark.GpuXGBoostPlugin\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/ExtMemQuantileDMatrix.scala",
    "content": "/*\n Copyright (c) 2025 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\npackage ml.dmlc.xgboost4j.scala\n\nimport scala.collection.JavaConverters._\n\nimport ml.dmlc.xgboost4j.java.{ColumnBatch, ExtMemQuantileDMatrix => jExtMemQuantileDMatrix}\n\nclass ExtMemQuantileDMatrix private[scala](\n  private[scala] override val jDMatrix: jExtMemQuantileDMatrix) extends QuantileDMatrix(jDMatrix) {\n\n  def this(iter: Iterator[ColumnBatch],\n           missing: Float,\n           maxBin: Int,\n           ref: Option[QuantileDMatrix],\n           nthread: Int,\n           maxQuantileBatches: Int,\n           minCachePageBytes: Long,\n           cacheHostRatio: Float) {\n    this(new jExtMemQuantileDMatrix(iter.asJava, missing, maxBin,\n      ref.map(_.jDMatrix).orNull,\n      nthread, maxQuantileBatches, minCachePageBytes, cacheHostRatio))\n  }\n\n  def this(iter: Iterator[ColumnBatch], missing: Float, maxBin: Int) {\n    this(new jExtMemQuantileDMatrix(iter.asJava, missing, maxBin))\n  }\n\n  def this(\n    iter: Iterator[ColumnBatch],\n    ref: ExtMemQuantileDMatrix,\n    missing: Float,\n    maxBin: Int\n  ) {\n    this(new jExtMemQuantileDMatrix(iter.asJava, missing, maxBin, ref.jDMatrix))\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/QuantileDMatrix.scala",
    "content": "/*\n Copyright (c) 2021-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala\n\nimport scala.collection.JavaConverters._\n\nimport ml.dmlc.xgboost4j.java.{Column, ColumnBatch, QuantileDMatrix => JQuantileDMatrix, XGBoostError}\n\nclass QuantileDMatrix private[scala](\n    private[scala] override val jDMatrix: JQuantileDMatrix) extends DMatrix(jDMatrix) {\n\n  /**\n   * Create QuantileDMatrix from iterator based on the array interface\n   *\n   * @param iter    the XGBoost ColumnBatch batch to provide the corresponding array interface\n   * @param missing the missing value\n   * @param maxBin  the max bin\n   * @param nthread the parallelism\n   * @throws XGBoostError\n   */\n  def this(iter: Iterator[ColumnBatch], missing: Float, maxBin: Int, nthread: Int) {\n    this(new JQuantileDMatrix(iter.asJava, missing, maxBin, nthread))\n  }\n\n  /**\n   * Create QuantileDMatrix from iterator based on the array interface\n   *\n   * @param iter    the XGBoost ColumnBatch batch to provide the corresponding array interface\n   * @param ref     The reference QuantileDMatrix that provides quantile information, needed\n   *                when creating validation/test dataset with QuantileDMatrix. Supplying the\n   *                training DMatrix as a reference means that the same quantisation applied\n   *                to the training data is applied to the validation/test data\n   * @param missing the missing value\n   * @param maxBin  the max bin\n   * @param nthread the parallelism\n   * @throws XGBoostError\n   */\n  def this(iter: Iterator[ColumnBatch],\n           ref: Option[QuantileDMatrix],\n           missing: Float,\n           maxBin: Int,\n           nthread: Int) {\n    this(new JQuantileDMatrix(iter.asJava, ref.map(_.jDMatrix).orNull, missing, maxBin, nthread))\n  }\n\n  /**\n   * set label of dmatrix\n   *\n   * @param labels labels\n   */\n  @throws(classOf[XGBoostError])\n  override def setLabel(labels: Array[Float]): Unit =\n    throw new XGBoostError(\"QuantileDMatrix does not support setLabel.\")\n\n  /**\n   * set weight of each instance\n   *\n   * @param weights weights\n   */\n  @throws(classOf[XGBoostError])\n  override def setWeight(weights: Array[Float]): Unit =\n    throw new XGBoostError(\"QuantileDMatrix does not support setWeight.\")\n\n  /**\n   * if specified, xgboost will start from this init margin\n   * can be used to specify initial prediction to boost from\n   *\n   * @param baseMargin base margin\n   */\n  @throws(classOf[XGBoostError])\n  override def setBaseMargin(baseMargin: Array[Float]): Unit =\n    throw new XGBoostError(\"QuantileDMatrix does not support setBaseMargin.\")\n\n  /**\n   * if specified, xgboost will start from this init margin\n   * can be used to specify initial prediction to boost from\n   *\n   * @param baseMargin base margin\n   */\n  @throws(classOf[XGBoostError])\n  override def setBaseMargin(baseMargin: Array[Array[Float]]): Unit =\n    throw new XGBoostError(\"QuantileDMatrix does not support setBaseMargin.\")\n\n  /**\n   * Set group sizes of DMatrix (used for ranking)\n   *\n   * @param group group size as array\n   */\n  @throws(classOf[XGBoostError])\n  override def setGroup(group: Array[Int]): Unit =\n    throw new XGBoostError(\"QuantileDMatrix does not support setGroup.\")\n\n  /**\n   * Set label of DMatrix from array interface\n   */\n  @throws(classOf[XGBoostError])\n  override def setLabel(column: Column): Unit =\n    throw new XGBoostError(\"QuantileDMatrix does not support setLabel.\")\n\n  /**\n   * set weight of dmatrix from column array interface\n   */\n  @throws(classOf[XGBoostError])\n  override def setWeight(column: Column): Unit =\n    throw new XGBoostError(\"QuantileDMatrix does not support setWeight.\")\n\n  /**\n   * set base margin of dmatrix from column array interface\n   */\n  @throws(classOf[XGBoostError])\n  override def setBaseMargin(column: Column): Unit =\n    throw new XGBoostError(\"QuantileDMatrix does not support setBaseMargin.\")\n\n  @throws(classOf[XGBoostError])\n  override def setQueryId(column: Column): Unit = {\n    throw new XGBoostError(\"QuantileDMatrix does not support setQueryId.\")\n  }\n\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/spark/ExternalMemory.scala",
    "content": "/*\n Copyright (c) 2025 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark\n\nimport java.io.File\nimport java.nio.file.{Files, Paths}\n\nimport scala.collection.mutable.ArrayBuffer\n\nimport ai.rapids.cudf._\n\nimport ml.dmlc.xgboost4j.java.{ColumnBatch, CudfColumnBatch}\nimport ml.dmlc.xgboost4j.scala.spark.Utils.withResource\n\nprivate[spark] trait ExternalMemory[T] extends Iterator[Table] with AutoCloseable {\n\n  protected val buffers = ArrayBuffer.empty[T]\n  private lazy val buffersIterator = buffers.toIterator\n\n  /**\n   * Convert the table to T which will be cached\n   *\n   * @param table to be converted\n   * @return the content\n   */\n  def convertTable(table: Table): T\n\n  /**\n   * Load the content to the Table\n   *\n   * @param content to be loaded\n   * @return Table\n   */\n  def loadTable(content: T): Table\n\n  // Cache the table\n  def cacheTable(table: Table): Unit = {\n    val content = convertTable(table)\n    buffers.append(content)\n  }\n\n  override def hasNext: Boolean = buffersIterator.hasNext\n\n  override def next(): Table = loadTable(buffersIterator.next())\n\n  override def close(): Unit = {}\n}\n\n// The data will be cached into disk.\nprivate[spark] class DiskExternalMemoryIterator(val path: String) extends ExternalMemory[String] {\n\n  private lazy val root = {\n    val tmp = path + \"/xgboost\"\n    createDirectory(tmp)\n    tmp\n  }\n\n  private var counter = 0\n\n  private def createDirectory(dirPath: String): Unit = {\n    val path = Paths.get(dirPath)\n    if (!Files.exists(path)) {\n      Files.createDirectories(path)\n    }\n  }\n\n  /**\n   * Convert the table to file path which will be cached\n   *\n   * @param table to be converted\n   * @return the content\n   */\n  override def convertTable(table: Table): String = {\n    val names = (1 to table.getNumberOfColumns).map(_.toString)\n    val options = ArrowIPCWriterOptions.builder().withColumnNames(names: _*).build()\n    val path = root + \"/table_\" + counter + \"_\" + System.nanoTime();\n    counter += 1\n    withResource(Table.writeArrowIPCChunked(options, new File(path))) { writer =>\n      writer.write(table)\n    }\n    path\n  }\n\n  private def closeOnExcept[T <: AutoCloseable, V](r: ArrayBuffer[T])\n                                                  (block: ArrayBuffer[T] => V): V = {\n    try {\n      block(r)\n    } catch {\n      case t: Throwable =>\n        r.foreach(_.close())\n        throw t\n    }\n  }\n\n  /**\n   * Load the path from disk to the Table\n   *\n   * @param name to be loaded\n   * @return Table\n   */\n  override def loadTable(name: String): Table = {\n    val file = new File(name)\n    if (!file.exists()) {\n      throw new RuntimeException(s\"The cache file ${name} doesn't exist\" )\n    }\n    try {\n      withResource(Table.readArrowIPCChunked(file)) { reader =>\n        val tables = ArrayBuffer.empty[Table]\n        closeOnExcept(tables) { tables =>\n          var table = Option(reader.getNextIfAvailable())\n          while (table.isDefined) {\n            tables.append(table.get)\n            table = Option(reader.getNextIfAvailable())\n          }\n        }\n        if (tables.size > 1) {\n          closeOnExcept(tables) { tables =>\n            Table.concatenate(tables.toArray: _*)\n          }\n        } else {\n          tables(0)\n        }\n      }\n    } catch {\n      case e: Throwable =>\n        close()\n        throw e\n    } finally {\n      if (file.exists()) {\n        file.delete()\n      }\n    }\n  }\n\n  override def close(): Unit = {\n    buffers.foreach { path =>\n      val file = new File(path)\n      if (file.exists()) {\n        file.delete()\n      }\n    }\n    buffers.clear()\n  }\n}\n\nprivate[spark] object ExternalMemory {\n  def apply(path: Option[String] = None): ExternalMemory[_] = {\n    path.map(new DiskExternalMemoryIterator(_))\n      .getOrElse(throw new RuntimeException(\"No disk path provided\"))\n  }\n}\n\n/**\n * ExternalMemoryIterator supports iterating the data twice if the `swap` is called.\n *\n * The first round iteration gets the input batch that will be\n *   1. cached in the external memory\n *      2. fed in QuantilDmatrix\n *      The second round iteration returns the cached batch got from external memory.\n *\n * @param input   the spark input iterator\n * @param indices column index\n */\nprivate[scala] class ExternalMemoryIterator(val input: Iterator[Table],\n                                            val indices: ColumnIndices,\n                                            val path: Option[String] = None)\n  extends Iterator[ColumnBatch] {\n\n  private var iter = input\n\n  // Flag to indicate the input has been consumed.\n  private var inputIsConsumed = false\n  // Flag to indicate the input.next has been called which is valid\n  private var inputNextIsCalled = false\n\n  // visible for testing\n  private[spark] val externalMemory = ExternalMemory(path)\n\n  override def hasNext: Boolean = {\n    val value = iter.hasNext\n    if (!value && inputIsConsumed && inputNextIsCalled) {\n      externalMemory.close()\n    }\n    if (!inputIsConsumed && !value && inputNextIsCalled) {\n      inputIsConsumed = true\n      iter = externalMemory\n    }\n    value\n  }\n\n  override def next(): ColumnBatch = {\n    inputNextIsCalled = true\n    withResource(new GpuColumnBatch(iter.next())) { batch =>\n      if (iter.eq(input)) {\n        externalMemory.cacheTable(batch.table)\n      }\n      new CudfColumnBatch(\n        batch.select(indices.featureIds.get),\n        batch.select(indices.labelId),\n        batch.select(indices.weightId.getOrElse(-1)),\n        batch.select(indices.marginId.getOrElse(-1)),\n        batch.select(indices.groupId.getOrElse(-1)));\n    }\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/spark/GpuXGBoostPlugin.scala",
    "content": "/*\n Copyright (c) 2024-2025 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark\n\nimport scala.collection.mutable.ArrayBuffer\nimport scala.jdk.CollectionConverters._\nimport scala.util.Try\n\nimport ai.rapids.cudf.Table\nimport com.nvidia.spark.rapids.{ColumnarRdd, GpuColumnVectorUtils}\nimport org.apache.commons.logging.LogFactory\nimport org.apache.spark.TaskContext\nimport org.apache.spark.ml.param.Param\nimport org.apache.spark.rdd.RDD\nimport org.apache.spark.sql.{Column, DataFrame, Dataset, Row}\nimport org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}\nimport org.apache.spark.sql.catalyst.expressions.UnsafeProjection\nimport org.apache.spark.sql.types.{DataType, FloatType, IntegerType}\nimport org.apache.spark.sql.vectorized.ColumnarBatch\n\nimport ml.dmlc.xgboost4j.java.CudfColumnBatch\nimport ml.dmlc.xgboost4j.scala.{DMatrix, ExtMemQuantileDMatrix, QuantileDMatrix}\nimport ml.dmlc.xgboost4j.scala.spark.Utils.withResource\nimport ml.dmlc.xgboost4j.scala.spark.params.HasGroupCol\n\n/**\n * GpuXGBoostPlugin is the XGBoost plugin which leverages spark-rapids\n * to accelerate the XGBoost from ETL to train.\n */\nclass GpuXGBoostPlugin extends XGBoostPlugin {\n\n  private val logger = LogFactory.getLog(\"XGBoostSparkGpuPlugin\")\n\n  /**\n   * Whether the plugin is enabled or not, if not enabled, fallback\n   * to the regular CPU pipeline\n   *\n   * @param dataset the input dataset\n   * @return Boolean\n   */\n  override def isEnabled(dataset: Dataset[_]): Boolean = {\n    val conf = dataset.sparkSession.conf\n    val hasRapidsPlugin = conf.get(\"spark.plugins\", \"\").split(\",\").contains(\n      \"com.nvidia.spark.SQLPlugin\")\n    val rapidsEnabled = try {\n      conf.get(\"spark.rapids.sql.enabled\").toBoolean\n    } catch {\n      // Rapids plugin has default \"spark.rapids.sql.enabled\" to true\n      case _: NoSuchElementException => true\n      case _: Throwable => false // Any exception will return false\n    }\n    hasRapidsPlugin && rapidsEnabled\n  }\n\n  // TODO, support numeric type\n  private[spark] def preprocess[T <: XGBoostEstimator[T, M], M <: XGBoostModel[M]](\n      estimator: XGBoostEstimator[T, M], dataset: Dataset[_]): Dataset[_] = {\n\n    // Columns to be selected for XGBoost training\n    val selectedCols: ArrayBuffer[Column] = ArrayBuffer.empty\n    val schema = dataset.schema\n\n    def selectCol(c: Param[String], targetType: DataType = FloatType) = {\n      // TODO support numeric types\n      if (estimator.isDefinedNonEmpty(c)) {\n        selectedCols.append(estimator.castIfNeeded(schema, estimator.getOrDefault(c), targetType))\n      }\n    }\n\n    Seq(estimator.labelCol, estimator.weightCol, estimator.baseMarginCol)\n      .foreach(p => selectCol(p))\n    estimator match {\n      case p: HasGroupCol => selectCol(p.groupCol, IntegerType)\n      case _ =>\n    }\n\n    // TODO support array/vector feature\n    estimator.getFeaturesCols.foreach { name =>\n      val col = estimator.castIfNeeded(dataset.schema, name)\n      selectedCols.append(col)\n    }\n    val input = dataset.select(selectedCols.toArray: _*)\n    val repartitioned = estimator.repartitionIfNeeded(input)\n    estimator.sortPartitionIfNeeded(repartitioned)\n  }\n\n  // visible for testing\n  private[spark] def validate[T <: XGBoostEstimator[T, M], M <: XGBoostModel[M]](\n      estimator: XGBoostEstimator[T, M],\n      dataset: Dataset[_]): Unit = {\n    require(estimator.getDevice != \"cpu\",\n      \"Using Spark-Rapids to accelerate XGBoost must set device=cuda\")\n  }\n\n  /**\n   * Convert Dataset to RDD[Watches] which will be fed into XGBoost\n   *\n   * @param estimator which estimator to be handled.\n   * @param dataset   to be converted.\n   * @return RDD[Watches]\n   */\n  override def buildRddWatches[T <: XGBoostEstimator[T, M], M <: XGBoostModel[M]](\n      estimator: XGBoostEstimator[T, M],\n      dataset: Dataset[_]): (RDD[Watches], Map[String, AnyRef]) = {\n\n    validate(estimator, dataset)\n\n    val train = preprocess(estimator, dataset)\n    val schema = train.schema\n\n    val indices = estimator.buildColumnIndices(schema)\n\n    val maxBin = estimator.getMaxBins\n    val nthread = estimator.getNthread\n    val missing = estimator.getMissing\n\n    val useExtMem = estimator.getUseExternalMemory\n    val extMemPath = if (useExtMem) {\n      Some(dataset.sparkSession.conf.get(\"spark.local.dir\", \"/tmp\"))\n    } else None\n\n    val maxQuantileBatches = estimator.getMaxQuantileBatches\n    val minCachePageBytes = estimator.getMinCachePageBytes\n    val cacheHostRatio = Try(estimator.getCacheHostRatio).getOrElse(Float.NaN)\n\n    /** build QuantileDMatrix on the executor side */\n    def buildQuantileDMatrix(input: Iterator[Table],\n                             ref: Option[QuantileDMatrix] = None): QuantileDMatrix = {\n\n      extMemPath match {\n        case Some(_) =>\n          val itr = new ExternalMemoryIterator(input, indices, extMemPath)\n          new ExtMemQuantileDMatrix(itr, missing, maxBin, ref, nthread,\n            maxQuantileBatches, minCachePageBytes, cacheHostRatio)\n\n        case None =>\n          val itr = input.map { table =>\n            withResource(new GpuColumnBatch(table)) { batch =>\n              new CudfColumnBatch(\n                batch.select(indices.featureIds.get),\n                batch.select(indices.labelId),\n                batch.select(indices.weightId.getOrElse(-1)),\n                batch.select(indices.marginId.getOrElse(-1)),\n                batch.select(indices.groupId.getOrElse(-1)));\n            }\n          }\n          new QuantileDMatrix(itr, ref, missing, maxBin, nthread)\n      }\n    }\n\n    val rdd = estimator.getEvalDataset().map { evalDs =>\n      val evalProcessed = preprocess(estimator, evalDs)\n      ColumnarRdd(train.toDF()).zipPartitions(ColumnarRdd(evalProcessed.toDF())) {\n        (trainIter, evalIter) =>\n          new Iterator[Watches] {\n            override def hasNext: Boolean = trainIter.hasNext\n            override def next(): Watches = {\n              val trainDM = buildQuantileDMatrix(trainIter)\n              val evalDM = buildQuantileDMatrix(evalIter, Some(trainDM))\n              new Watches(Array(trainDM, evalDM),\n                Array(Utils.TRAIN_NAME, Utils.VALIDATION_NAME), None)\n            }\n          }\n      }\n    }.getOrElse(\n      ColumnarRdd(train.toDF()).mapPartitions { iter =>\n        new Iterator[Watches] {\n          override def hasNext: Boolean = iter.hasNext\n          override def next(): Watches = {\n            val dm = buildQuantileDMatrix(iter)\n            new Watches(Array(dm), Array(Utils.TRAIN_NAME), None)\n          }\n        }\n      }\n    )\n\n    val sconf = dataset.sparkSession.conf\n    val rmmEnabled: Boolean = try {\n      sconf.get(\"spark.rapids.memory.gpu.pool\").trim.toLowerCase != \"none\"\n    } catch {\n      case _: Throwable => false // Any exception will return false\n    }\n    val configs = if (rmmEnabled) {\n      Map(\"use_rmm\" -> rmmEnabled).asInstanceOf[Map[String, AnyRef]]\n    } else {\n      Map.empty[String, AnyRef]\n    }\n    (rdd, configs)\n  }\n\n  override def transform[M <: XGBoostModel[M]](model: XGBoostModel[M],\n                                               dataset: Dataset[_]): DataFrame = {\n    val sc = dataset.sparkSession.sparkContext\n\n    val (transformedSchema, pred) = model.preprocess(dataset)\n    val bBooster = sc.broadcast(model.nativeBooster)\n    val bOriginalSchema = sc.broadcast(dataset.schema)\n\n    val featureIds = model.getFeaturesCols.distinct.map(dataset.schema.fieldIndex).toList\n    val isLocal = sc.isLocal\n    val missing = model.getMissing\n    val nThread = model.getNthread\n\n    val rdd = ColumnarRdd(dataset.asInstanceOf[DataFrame]).mapPartitions { tableIters =>\n      // booster is visible for all spark tasks in the same executor\n      val booster = bBooster.value\n      val originalSchema = bOriginalSchema.value\n\n      // UnsafeProjection is not serializable so do it on the executor side\n      val toUnsafe = UnsafeProjection.create(originalSchema)\n\n      if (!booster.deviceIsSet) {\n        booster.deviceIsSet.synchronized {\n          if (!booster.deviceIsSet) {\n            booster.deviceIsSet = true\n            val gpuId = if (!isLocal) XGBoost.getGPUAddrFromResources else 0\n            booster.setParam(\"device\", s\"cuda:$gpuId\")\n            logger.info(\"GPU transform on GPU device: cuda:\" + gpuId)\n          }\n        }\n      }\n\n      // Iterator on Row\n      new Iterator[Row] {\n        // Convert InternalRow to Row\n        private val converter: InternalRow => Row = CatalystTypeConverters\n          .createToScalaConverter(originalSchema)\n          .asInstanceOf[InternalRow => Row]\n\n        // GPU batches read in must be closed by the receiver\n        @transient var currentBatch: ColumnarBatch = null\n\n        // Iterator on Row\n        var iter: Iterator[Row] = null\n\n        TaskContext.get().addTaskCompletionListener[Unit](_ => {\n          closeCurrentBatch() // close the last ColumnarBatch\n        })\n\n        private def closeCurrentBatch(): Unit = {\n          if (currentBatch != null) {\n            currentBatch.close()\n            currentBatch = null\n          }\n        }\n\n        def loadNextBatch(): Unit = {\n          closeCurrentBatch()\n          if (tableIters.hasNext) {\n            val dataTypes = originalSchema.fields.map(x => x.dataType)\n            iter = withResource(tableIters.next()) { table =>\n              // Create DMatrix\n              val featureTable = new GpuColumnBatch(table).select(featureIds)\n              if (featureTable == null) {\n                val msg = featureIds.mkString(\",\")\n                throw new RuntimeException(s\"Couldn't create feature table for the \" +\n                  s\"feature indices $msg\")\n              }\n              try {\n                val cudfColumnBatch = new CudfColumnBatch(featureTable, null, null, null, null)\n                val dm = new DMatrix(cudfColumnBatch, missing, nThread)\n                if (dm == null) {\n                  Iterator.empty\n                } else {\n                  try {\n                    currentBatch = new ColumnarBatch(\n                      GpuColumnVectorUtils.extractHostColumns(table, dataTypes),\n                      table.getRowCount().toInt)\n                    val rowIterator = currentBatch.rowIterator().asScala.map(toUnsafe)\n                      .map(converter(_))\n                    model.predictInternal(booster, dm, pred, rowIterator).toIterator\n                  } finally {\n                    dm.delete()\n                  }\n                }\n              } finally {\n                featureTable.close()\n              }\n            }\n          } else {\n            iter = null\n          }\n        }\n\n        override def hasNext: Boolean = {\n          val itHasNext = iter != null && iter.hasNext\n          if (!itHasNext) { // Don't have extra Row for current ColumnarBatch\n            loadNextBatch()\n            iter != null && iter.hasNext\n          } else {\n            itHasNext\n          }\n        }\n\n        override def next(): Row = {\n          if (iter == null || !iter.hasNext) {\n            loadNextBatch()\n          }\n          if (iter == null) {\n            throw new NoSuchElementException()\n          }\n          iter.next()\n        }\n      }\n    }\n    bBooster.unpersist(false)\n    bOriginalSchema.unpersist(false)\n\n    val output = dataset.sparkSession.createDataFrame(rdd, transformedSchema)\n    model.postTransform(output, pred).toDF()\n  }\n}\n\nprivate[scala] class GpuColumnBatch(val table: Table) extends AutoCloseable {\n\n  def select(index: Int): Table = {\n    select(Seq(index))\n  }\n\n  def select(indices: Seq[Int]): Table = {\n    if (!indices.forall(index => index < table.getNumberOfColumns && index >= 0)) {\n      return null;\n    }\n    new Table(indices.map(table.getColumn): _*)\n  }\n\n  override def close(): Unit = Option(table).foreach(_.close())\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark-gpu/src/test/java/ml/dmlc/xgboost4j/java/BoosterTest.java",
    "content": "/*\n Copyright (c) 2021-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.java;\n\nimport java.io.File;\nimport java.util.HashMap;\nimport java.util.LinkedList;\nimport java.util.List;\nimport java.util.Map;\n\nimport ai.rapids.cudf.*;\nimport junit.framework.TestCase;\nimport org.junit.Test;\n\n/**\n * Tests the BoosterTest trained by DMatrix\n *\n * @throws XGBoostError\n */\npublic class BoosterTest {\n\n  @Test\n  public void testBooster() throws XGBoostError {\n    String trainingDataPath = getClass().getClassLoader()\n      .getResource(\"veterans_lung_cancer.csv\").getPath();\n    Schema schema = Schema.builder()\n      .column(DType.FLOAT32, \"A\")\n      .column(DType.FLOAT32, \"B\")\n      .column(DType.FLOAT32, \"C\")\n      .column(DType.FLOAT32, \"D\")\n\n      .column(DType.FLOAT32, \"E\")\n      .column(DType.FLOAT32, \"F\")\n      .column(DType.FLOAT32, \"G\")\n      .column(DType.FLOAT32, \"H\")\n\n      .column(DType.FLOAT32, \"I\")\n      .column(DType.FLOAT32, \"J\")\n      .column(DType.FLOAT32, \"K\")\n      .column(DType.FLOAT32, \"L\")\n\n      .column(DType.FLOAT32, \"label\")\n      .build();\n    CSVOptions opts = CSVOptions.builder()\n      .hasHeader().build();\n\n    int maxBin = 16;\n    int round = 10;\n    //set params\n    Map<String, Object> paramMap = new HashMap<String, Object>() {\n      {\n        put(\"max_depth\", 2);\n        put(\"objective\", \"binary:logistic\");\n        put(\"num_round\", round);\n        put(\"num_workers\", 1);\n        put(\"tree_method\", \"hist\");\n        put(\"device\", \"cuda\");\n        put(\"max_bin\", maxBin);\n      }\n    };\n\n    try (Table tmpTable = Table.readCSV(schema, opts, new File(trainingDataPath))) {\n      ColumnVector[] df = new ColumnVector[10];\n      // exclude the first two columns, they are label bounds and contain inf.\n      for (int i = 2; i < 12; ++i) {\n        df[i - 2] = tmpTable.getColumn(i);\n      }\n      try (Table X = new Table(df);) {\n        ColumnVector[] labels = new ColumnVector[1];\n        labels[0] = tmpTable.getColumn(12);\n\n        try (Table y = new Table(labels);) {\n\n          CudfColumnBatch batch = new CudfColumnBatch(X, y, null, null, null);\n          CudfColumn labelColumn = CudfColumn.from(tmpTable.getColumn(12));\n\n          //set watchList\n          HashMap<String, DMatrix> watches = new HashMap<>();\n\n          DMatrix dMatrix1 = new DMatrix(batch, Float.NaN, 1);\n          dMatrix1.setLabel(labelColumn);\n          watches.put(\"train\", dMatrix1);\n          Booster model1 = XGBoost.train(dMatrix1, paramMap, round, watches, null, null);\n\n          List<ColumnBatch> tables = new LinkedList<>();\n          tables.add(batch);\n          DMatrix incrementalDMatrix = new QuantileDMatrix(tables.iterator(), Float.NaN, maxBin, 1);\n          //set watchList\n          HashMap<String, DMatrix> watches1 = new HashMap<>();\n          watches1.put(\"train\", incrementalDMatrix);\n          Booster model2 = XGBoost.train(incrementalDMatrix, paramMap, round, watches1, null, null);\n\n          float[][] predicat1 = model1.predict(dMatrix1);\n          float[][] predicat2 = model2.predict(dMatrix1);\n\n          for (int i = 0; i < tmpTable.getRowCount(); i++) {\n            TestCase.assertTrue(predicat1[i][0] - predicat2[i][0] < 1e-6);\n          }\n        }\n      }\n    }\n  }\n\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark-gpu/src/test/java/ml/dmlc/xgboost4j/java/DMatrixTest.java",
    "content": "/*\n Copyright (c) 2021-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.java;\n\nimport java.util.*;\n\nimport ai.rapids.cudf.Table;\nimport junit.framework.TestCase;\nimport org.junit.Test;\n\nimport static org.junit.Assert.assertArrayEquals;\n\n/**\n * Test suite for DMatrix based on GPU\n */\npublic class DMatrixTest {\n\n  @Test\n  public void testCreateFromArrayInterfaceColumns() {\n    Float[] labelFloats = new Float[]{2f, 4f, 6f, 8f, 10f};\n    Integer[] groups = new Integer[]{1, 1, 7, 7, 19, 26};\n    int[] expectedGroup = new int[]{0, 2, 4, 5, 6};\n\n    Throwable ex = null;\n    try (\n      Table X = new Table.TestBuilder().column(1.f, null, 5.f, 7.f, 9.f).build();\n      Table y = new Table.TestBuilder().column(labelFloats).build();\n      Table w = new Table.TestBuilder().column(labelFloats).build();\n      Table q = new Table.TestBuilder().column(groups).build();\n      Table margin = new Table.TestBuilder().column(labelFloats).build();) {\n\n      CudfColumnBatch cudfDataFrame = new CudfColumnBatch(X, y, w, null, null);\n\n      CudfColumn labelColumn = CudfColumn.from(y.getColumn(0));\n      CudfColumn weightColumn = CudfColumn.from(w.getColumn(0));\n      CudfColumn baseMarginColumn = CudfColumn.from(margin.getColumn(0));\n      CudfColumn qidColumn = CudfColumn.from(q.getColumn(0));\n\n      DMatrix dMatrix = new DMatrix(cudfDataFrame, 0, 1);\n      dMatrix.setLabel(labelColumn);\n      dMatrix.setWeight(weightColumn);\n      dMatrix.setBaseMargin(baseMarginColumn);\n      dMatrix.setQueryId(qidColumn);\n\n      String[] featureNames = new String[]{\"f1\"};\n      dMatrix.setFeatureNames(featureNames);\n      String[] retFeatureNames = dMatrix.getFeatureNames();\n      assertArrayEquals(featureNames, retFeatureNames);\n\n      String[] featureTypes = new String[]{\"i\"};\n      dMatrix.setFeatureTypes(featureTypes);\n      String[] retFeatureTypes = dMatrix.getFeatureTypes();\n      assertArrayEquals(featureTypes, retFeatureTypes);\n\n      float[] anchor = convertFloatTofloat(labelFloats);\n      float[] label = dMatrix.getLabel();\n      float[] weight = dMatrix.getWeight();\n      float[] baseMargin = dMatrix.getBaseMargin();\n      int[] group = dMatrix.getGroup();\n\n      TestCase.assertTrue(Arrays.equals(anchor, label));\n      TestCase.assertTrue(Arrays.equals(anchor, weight));\n      TestCase.assertTrue(Arrays.equals(anchor, baseMargin));\n      TestCase.assertTrue(Arrays.equals(expectedGroup, group));\n    } catch (Throwable e) {\n      ex = e;\n      e.printStackTrace();\n    }\n    TestCase.assertNull(ex);\n  }\n\n  @Test\n  public void testCreateFromColumnDataIterator() throws XGBoostError {\n\n    Float[] label1 = {25f, 21f, 22f, 20f, 24f};\n    Float[] weight1 = {1.3f, 2.31f, 0.32f, 3.3f, 1.34f};\n    Float[] baseMargin1 = {1.2f, 0.2f, 1.3f, 2.4f, 3.5f};\n    Integer[] groups1 = new Integer[]{1, 1, 7, 7, 19, 26};\n\n    Float[] label2 = {9f, 5f, 4f, 10f, 12f};\n    Float[] weight2 = {3.0f, 1.3f, 3.2f, 0.3f, 1.34f};\n    Float[] baseMargin2 = {0.2f, 2.5f, 3.1f, 4.4f, 2.2f};\n    Integer[] groups2 = new Integer[]{30, 30, 30, 40, 40};\n\n    int[] expectedGroup = new int[]{0, 2, 4, 5, 6, 9, 11};\n\n    try (\n      Table X_0 = new Table.TestBuilder()\n        .column(1.2f, null, 5.2f, 7.2f, 9.2f)\n        .column(0.2f, 0.4f, 0.6f, 2.6f, 0.10f)\n        .build();\n      Table y_0 = new Table.TestBuilder().column(label1).build();\n      Table w_0 = new Table.TestBuilder().column(weight1).build();\n      Table m_0 = new Table.TestBuilder().column(baseMargin1).build();\n      Table q_0 = new Table.TestBuilder().column(groups1).build();\n\n      Table X_1 = new Table.TestBuilder().column(11.2f, 11.2f, 15.2f, 17.2f, 19.2f)\n        .column(1.2f, 1.4f, null, 12.6f, 10.10f).build();\n      Table y_1 = new Table.TestBuilder().column(label2).build();\n      Table w_1 = new Table.TestBuilder().column(weight2).build();\n      Table m_1 = new Table.TestBuilder().column(baseMargin2).build();) {\n      Table q_1 = new Table.TestBuilder().column(groups2).build();\n\n      List<ColumnBatch> tables = new LinkedList<>();\n\n      tables.add(new CudfColumnBatch(X_0, y_0, w_0, m_0, q_0));\n      tables.add(new CudfColumnBatch(X_1, y_1, w_1, m_1, q_1));\n\n      QuantileDMatrix dmat = new QuantileDMatrix(tables.iterator(), 0.0f, 256, 1);\n      float[] anchorLabel = convertFloatTofloat(label1, label2);\n      float[] anchorWeight = convertFloatTofloat(weight1, weight2);\n      float[] anchorBaseMargin = convertFloatTofloat(baseMargin1, baseMargin2);\n\n      TestCase.assertTrue(Arrays.equals(anchorLabel, dmat.getLabel()));\n      TestCase.assertTrue(Arrays.equals(anchorWeight, dmat.getWeight()));\n      TestCase.assertTrue(Arrays.equals(anchorBaseMargin, dmat.getBaseMargin()));\n      TestCase.assertTrue(Arrays.equals(expectedGroup, dmat.getGroup()));\n    }\n  }\n\n  private Float[] generateFloatArray(int size, long seed) {\n    Float[] array = new Float[size];\n    Random random = new Random(seed);\n    for (int i = 0; i < size; i++) {\n      array[i] = random.nextFloat();\n    }\n    return array;\n  }\n\n   @Test\n  public void testGetQuantileCut() throws XGBoostError {\n\n    int rows = 100;\n    try (\n      Table X_0 = new Table.TestBuilder()\n        .column(generateFloatArray(rows, 1l))\n        .column(generateFloatArray(rows, 2l))\n        .column(generateFloatArray(rows, 3l))\n        .column(generateFloatArray(rows, 4l))\n        .column(generateFloatArray(rows, 5l))\n        .build();\n      Table y_0 = new Table.TestBuilder().column(generateFloatArray(rows, 6l)).build();\n\n      Table X_1 = new Table.TestBuilder()\n        .column(generateFloatArray(rows, 11l))\n        .column(generateFloatArray(rows, 12l))\n        .column(generateFloatArray(rows, 13l))\n        .column(generateFloatArray(rows, 14l))\n        .column(generateFloatArray(rows, 15l))\n        .build();\n      Table y_1 = new Table.TestBuilder().column(generateFloatArray(rows, 16l)).build();\n    ) {\n      List<ColumnBatch> tables = new LinkedList<>();\n      tables.add(new CudfColumnBatch(X_0, y_0, null, null, null));\n      QuantileDMatrix train = new QuantileDMatrix(tables.iterator(), 0.0f, 256, 1);\n\n      tables.clear();\n      tables.add(new CudfColumnBatch(X_1, y_1, null, null, null));\n      QuantileDMatrix eval = new QuantileDMatrix(tables.iterator(),  train, 0.0f, 256, 1);\n\n      DMatrix.QuantileCut trainCut = train.getQuantileCut();\n      DMatrix.QuantileCut evalCut = eval.getQuantileCut();\n\n      TestCase.assertTrue(trainCut.getIndptr().length == evalCut.getIndptr().length);\n      TestCase.assertTrue(Arrays.equals(trainCut.getIndptr(), evalCut.getIndptr()));\n\n      TestCase.assertTrue(trainCut.getValues().length == evalCut.getValues().length);\n      TestCase.assertTrue(Arrays.equals(trainCut.getValues(), evalCut.getValues()));\n    }\n  }\n\n  private float[] convertFloatTofloat(Float[]... datas) {\n    int totalLength = 0;\n    for (Float[] data : datas) {\n      totalLength += data.length;\n    }\n    float[] floatArray = new float[totalLength];\n    int index = 0;\n    for (Float[] data : datas) {\n      for (int i = 0; i < data.length; i++) {\n        floatArray[i + index] = data[i];\n      }\n      index += data.length;\n    }\n    return floatArray;\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark-gpu/src/test/resources/log4j.properties",
    "content": "log4j.logger.org.apache.spark=INFO\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/ExtMemQuantileDMatrixSuite.scala",
    "content": "/*\n Copyright (c) 2025 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala\n\nimport java.io.File\n\nimport scala.collection.mutable.ArrayBuffer\n\nimport ai.rapids.cudf.Table\nimport org.scalatest.funsuite.AnyFunSuite\n\nimport ml.dmlc.xgboost4j.java.{ColumnBatch, CudfColumnBatch}\nimport ml.dmlc.xgboost4j.scala.rapids.spark.TmpFolderSuite\nimport ml.dmlc.xgboost4j.scala.spark.{ColumnIndices, ExternalMemoryIterator, GpuColumnBatch}\nimport ml.dmlc.xgboost4j.scala.spark.Utils.withResource\n\nclass ExtMemQuantileDMatrixSuite extends AnyFunSuite with TmpFolderSuite {\n\n  private def runTest(buildIterator: (Iterator[Table], ColumnIndices) => Iterator[ColumnBatch]) = {\n    val label1 = Array[java.lang.Float](25f, 21f, 22f, 20f, 24f)\n    val weight1 = Array[java.lang.Float](1.3f, 2.31f, 0.32f, 3.3f, 1.34f)\n    val baseMargin1 = Array[java.lang.Float](1.2f, 0.2f, 1.3f, 2.4f, 3.5f)\n    val group1 = Array[java.lang.Integer](1, 1, 7, 7, 19, 26)\n\n    val label2 = Array[java.lang.Float](9f, 5f, 4f, 10f, 12f)\n    val weight2 = Array[java.lang.Float](3.0f, 1.3f, 3.2f, 0.3f, 1.34f)\n    val baseMargin2 = Array[java.lang.Float](0.2f, 2.5f, 3.1f, 4.4f, 2.2f)\n    val group2 = Array[java.lang.Integer](30, 30, 30, 40, 40)\n\n    val expectedGroup = Array(0, 2, 4, 5, 6, 9, 11)\n\n    withResource(new Table.TestBuilder()\n      .column(1.2f, null.asInstanceOf[java.lang.Float], 5.2f, 7.2f, 9.2f)\n      .column(0.2f, 0.4f, 0.6f, 2.6f, 0.10f.asInstanceOf[java.lang.Float])\n      .build) { X_0 =>\n      withResource(new Table.TestBuilder().column(label1: _*).build) { y_0 =>\n        withResource(new Table.TestBuilder().column(weight1: _*).build) { w_0 =>\n          withResource(new Table.TestBuilder().column(baseMargin1: _*).build) { m_0 =>\n            withResource(new Table.TestBuilder().column(group1: _*).build) { q_0 =>\n              withResource(new Table.TestBuilder()\n                .column(11.2f, 11.2f, 15.2f, 17.2f, 19.2f.asInstanceOf[java.lang.Float])\n                .column(1.2f, 1.4f, null.asInstanceOf[java.lang.Float], 12.6f, 10.10f).build) {\n                X_1 =>\n                  withResource(new Table.TestBuilder().column(label2: _*).build) { y_1 =>\n                    withResource(new Table.TestBuilder().column(weight2: _*).build) { w_1 =>\n                      withResource(new Table.TestBuilder().column(baseMargin2: _*).build) { m_1 =>\n                        withResource(new Table.TestBuilder().column(group2: _*).build) { q_2 =>\n                          val tables = new ArrayBuffer[Table]()\n                          tables += new Table(X_0.getColumn(0), X_0.getColumn(1), y_0.getColumn(0),\n                            w_0.getColumn(0), m_0.getColumn(0))\n                          tables += new Table(X_1.getColumn(0), X_1.getColumn(1), y_1.getColumn(0),\n                            w_1.getColumn(0), m_1.getColumn(0))\n\n                          val indices = ColumnIndices(\n                            labelId = 2,\n                            featureId = None,\n                            featureIds = Option(Seq(0, 1)),\n                            weightId = Option(3),\n                            marginId = Option(4),\n                            groupId = Option(5)\n                          )\n                          val iter = buildIterator(tables.toIterator, indices);\n                          val dmatrix = new ExtMemQuantileDMatrix(iter, 0.0f, 8)\n\n                          def check(dm: ExtMemQuantileDMatrix) = {\n                            assert(dm.getLabel.sameElements(label1 ++ label2))\n                            assert(dm.getWeight.sameElements(weight1 ++ weight2))\n                            assert(dm.getBaseMargin.sameElements(baseMargin1 ++ baseMargin2))\n                          }\n                          check(dmatrix)\n                        }\n                      }\n                    }\n                  }\n              }\n            }\n          }\n        }\n      }\n    }\n  }\n\n  test(\"ExtMemQuantileDMatrix test\") {\n    val buildIter = (input: Iterator[Table], indices: ColumnIndices) =>\n    new ExternalMemoryIterator(\n      input, indices, Option(new File(tempDir.toFile, \"xgboost\").getPath)\n    )\n    runTest(buildIter)\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/QuantileDMatrixSuite.scala",
    "content": "/*\n Copyright (c) 2021-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala\n\nimport scala.collection.mutable.ArrayBuffer\n\nimport ai.rapids.cudf.Table\nimport org.scalatest.funsuite.AnyFunSuite\n\nimport ml.dmlc.xgboost4j.java.CudfColumnBatch\nimport ml.dmlc.xgboost4j.scala.spark.Utils.withResource\n\nclass QuantileDMatrixSuite extends AnyFunSuite {\n\n  test(\"QuantileDMatrix test\") {\n\n    val label1 = Array[java.lang.Float](25f, 21f, 22f, 20f, 24f)\n    val weight1 = Array[java.lang.Float](1.3f, 2.31f, 0.32f, 3.3f, 1.34f)\n    val baseMargin1 = Array[java.lang.Float](1.2f, 0.2f, 1.3f, 2.4f, 3.5f)\n    val group1 = Array[java.lang.Integer](1, 1, 7, 7, 19, 26)\n\n    val label2 = Array[java.lang.Float](9f, 5f, 4f, 10f, 12f)\n    val weight2 = Array[java.lang.Float](3.0f, 1.3f, 3.2f, 0.3f, 1.34f)\n    val baseMargin2 = Array[java.lang.Float](0.2f, 2.5f, 3.1f, 4.4f, 2.2f)\n    val group2 = Array[java.lang.Integer](30, 30, 30, 40, 40)\n\n    val expectedGroup = Array(0, 2, 4, 5, 6, 9, 11)\n\n    withResource(new Table.TestBuilder()\n      .column(1.2f, null.asInstanceOf[java.lang.Float], 5.2f, 7.2f, 9.2f)\n      .column(0.2f, 0.4f, 0.6f, 2.6f, 0.10f.asInstanceOf[java.lang.Float])\n      .build) { X_0 =>\n      withResource(new Table.TestBuilder().column(label1: _*).build) { y_0 =>\n        withResource(new Table.TestBuilder().column(weight1: _*).build) { w_0 =>\n          withResource(new Table.TestBuilder().column(baseMargin1: _*).build) { m_0 =>\n            withResource(new Table.TestBuilder().column(group1: _*).build) { q_0 =>\n              withResource(new Table.TestBuilder()\n                .column(11.2f, 11.2f, 15.2f, 17.2f, 19.2f.asInstanceOf[java.lang.Float])\n                .column(1.2f, 1.4f, null.asInstanceOf[java.lang.Float], 12.6f, 10.10f).build) {\n                X_1 =>\n                  withResource(new Table.TestBuilder().column(label2: _*).build) { y_1 =>\n                    withResource(new Table.TestBuilder().column(weight2: _*).build) { w_1 =>\n                      withResource(new Table.TestBuilder().column(baseMargin2: _*).build) { m_1 =>\n                        withResource(new Table.TestBuilder().column(group2: _*).build) { q_2 =>\n                          val batches = new ArrayBuffer[CudfColumnBatch]()\n                          batches += new CudfColumnBatch(X_0, y_0, w_0, m_0, q_0)\n                          batches += new CudfColumnBatch(X_1, y_1, w_1, m_1, q_2)\n                          val dmatrix = new QuantileDMatrix(batches.toIterator, 0.0f, 8, 1)\n                          assert(dmatrix.getLabel.sameElements(label1 ++ label2))\n                          assert(dmatrix.getWeight.sameElements(weight1 ++ weight2))\n                          assert(dmatrix.getBaseMargin.sameElements(baseMargin1 ++ baseMargin2))\n                          assert(dmatrix.getGroup().sameElements(expectedGroup))\n                        }\n                      }\n                    }\n                  }\n              }\n            }\n          }\n        }\n      }\n    }\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/spark/ExternalMemorySuite.scala",
    "content": "/*\n Copyright (c) 2025 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark\n\nimport scala.collection.mutable.ArrayBuffer\n\nimport ai.rapids.cudf.Table\n\nimport ml.dmlc.xgboost4j.java.CudfColumnBatch\nimport ml.dmlc.xgboost4j.scala.rapids.spark.GpuTestSuite\nimport ml.dmlc.xgboost4j.scala.spark.Utils.withResource\n\nclass ExternalMemorySuite extends GpuTestSuite {\n\n  private def assertColumnBatchEqual(lhs: Array[CudfColumnBatch],\n                                     rhs: Array[CudfColumnBatch]): Unit = {\n    def assertTwoTable(lhsTable: Table, rhsTable: Table): Unit = {\n      assert(lhsTable.getNumberOfColumns === rhsTable.getNumberOfColumns)\n      for (i <- 0 until lhsTable.getNumberOfColumns) {\n        val lColumn = lhsTable.getColumn(i)\n        val rColumn = rhsTable.getColumn(i)\n\n        val lHost = lColumn.copyToHost()\n        val rHost = rColumn.copyToHost()\n\n        assert(lHost.getRowCount === rHost.getRowCount)\n        for (j <- 0 until lHost.getRowCount.toInt) {\n          assert(lHost.getFloat(j) === rHost.getFloat(j))\n        }\n      }\n    }\n\n    assert(lhs.length === rhs.length)\n    for ((l, r) <- lhs.zip(rhs)) {\n      assertTwoTable(l.getFeatureTable, r.getFeatureTable)\n      assertTwoTable(l.getLabelTable, r.getLabelTable)\n    }\n  }\n\n  def runExternalMemoryTest(buildExternalMemory: (Iterator[Table], ColumnIndices) =>\n    ExternalMemoryIterator): Unit = {\n\n    withResource(new Table.TestBuilder()\n      .column(1.0f, 2.0f, 3.0f.asInstanceOf[java.lang.Float])\n      .column(4.0f, 5.0f, 6.0f.asInstanceOf[java.lang.Float])\n      .column(7.0f, 8.0f, 9.0f.asInstanceOf[java.lang.Float])\n      .build) { table1 =>\n\n      withResource(new Table.TestBuilder()\n        .column(11.0f, 12.0f, 13.0f.asInstanceOf[java.lang.Float])\n        .column(14.0f, 15.0f, 16.0f.asInstanceOf[java.lang.Float])\n        .column(17.0f, 18.0f, 19.0f.asInstanceOf[java.lang.Float])\n        .build) { table2 =>\n\n        val tables = Seq(table1, table2)\n\n        val indices = ColumnIndices(labelId = 0, featureIds = Some(Seq(1, 2)), featureId = None,\n          weightId = None, marginId = None, groupId = None)\n        val extMemIter = buildExternalMemory(tables.toIterator, indices)\n        val expectTables = ArrayBuffer.empty[CudfColumnBatch]\n        while (extMemIter.hasNext) {\n          val table = extMemIter.next().asInstanceOf[CudfColumnBatch]\n          expectTables.append(table)\n        }\n        // The hasNext has swap the iterator internally, so we can still get the\n        // value for the next round of iteration\n\n        val targetTables = ArrayBuffer.empty[CudfColumnBatch]\n        while (extMemIter.hasNext) {\n          val table = extMemIter.next().asInstanceOf[CudfColumnBatch]\n          targetTables.append(table)\n        }\n\n        assertColumnBatchEqual(expectTables.toArray, targetTables.toArray)\n      }\n    }\n  }\n\n  test(\"DiskExternalMemory\") {\n    val buildIterator = (input: Iterator[Table], indices: ColumnIndices) => {\n      val iter = new ExternalMemoryIterator(input, indices, Some(\"/tmp/\"))\n      assert(iter.externalMemory.isInstanceOf[DiskExternalMemoryIterator])\n      iter\n    }\n    runExternalMemoryTest(buildIterator)\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/spark/GpuTestSuite.scala",
    "content": "/*\n Copyright (c) 2021-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.rapids.spark\n\nimport java.nio.file.{Files, Path}\nimport java.util.{Locale, TimeZone}\n\nimport org.apache.spark.{GpuTestUtils, SparkConf}\nimport org.apache.spark.internal.Logging\nimport org.apache.spark.network.util.JavaUtils\nimport org.apache.spark.sql.SparkSession\nimport org.scalatest.BeforeAndAfterAll\nimport org.scalatest.funsuite.AnyFunSuite\n\ntrait GpuTestSuite extends AnyFunSuite with TmpFolderSuite {\n\n  import SparkSessionHolder.withSparkSession\n\n  protected def getResourcePath(resource: String): String = {\n    require(resource.startsWith(\"/\"), \"resource must start with /\")\n    getClass.getResource(resource).getPath\n  }\n\n  def enableCsvConf(): SparkConf = {\n    new SparkConf()\n      .set(\"spark.rapids.sql.csv.read.float.enabled\", \"true\")\n      .set(\"spark.rapids.sql.csv.read.double.enabled\", \"true\")\n  }\n\n  def withGpuSparkSession[U](conf: SparkConf = new SparkConf())(f: SparkSession => U): U = {\n    // set \"spark.rapids.sql.explain\" to \"ALL\" to check if the operators\n    // can be replaced by GPU\n    val c = conf.clone()\n      .set(\"spark.rapids.sql.enabled\", \"true\")\n    withSparkSession(c, f)\n  }\n\n  def withCpuSparkSession[U](conf: SparkConf = new SparkConf())(f: SparkSession => U): U = {\n    val c = conf.clone()\n      .set(\"spark.rapids.sql.enabled\", \"false\") // Just to be sure\n    withSparkSession(c, f)\n  }\n}\n\ntrait TmpFolderSuite extends BeforeAndAfterAll {\n  self: AnyFunSuite =>\n  protected var tempDir: Path = _\n\n  override def beforeAll(): Unit = {\n    super.beforeAll()\n    tempDir = Files.createTempDirectory(getClass.getName)\n  }\n\n  override def afterAll(): Unit = {\n    JavaUtils.deleteRecursively(tempDir.toFile)\n    super.afterAll()\n  }\n\n  protected def createTmpFolder(prefix: String): Path = {\n    Files.createTempDirectory(tempDir, prefix)\n  }\n}\n\nobject SparkSessionHolder extends Logging {\n\n  private var spark = createSparkSession()\n  private var origConf = spark.conf.getAll\n  private var origConfKeys = origConf.keys.toSet\n\n  private def setAllConfs(confs: Array[(String, String)]): Unit = confs.foreach {\n    case (key, value) if spark.conf.get(key, null) != value =>\n      spark.conf.set(key, value)\n    case _ => // No need to modify it\n  }\n\n  private def createSparkSession(): SparkSession = {\n    GpuTestUtils.cleanupAnyExistingSession()\n\n    // Timezone is fixed to UTC to allow timestamps to work by default\n    TimeZone.setDefault(TimeZone.getTimeZone(\"UTC\"))\n    // Add Locale setting\n    Locale.setDefault(Locale.US)\n\n    val builder = SparkSession.builder()\n      .master(\"local[2]\")\n      .config(\"spark.sql.adaptive.enabled\", \"false\")\n      .config(\"spark.rapids.sql.test.enabled\", \"false\")\n      .config(\"spark.stage.maxConsecutiveAttempts\", \"1\")\n      .config(\"spark.plugins\", \"com.nvidia.spark.SQLPlugin\")\n      .config(\"spark.rapids.memory.gpu.pooling.enabled\", \"false\") // Disable RMM for unit tests.\n      .config(\"spark.sql.files.maxPartitionBytes\", \"1000\")\n      .appName(\"XGBoost4j-Spark-Gpu unit test\")\n\n    builder.getOrCreate()\n  }\n\n  private def reinitSession(): Unit = {\n    spark = createSparkSession()\n    origConf = spark.conf.getAll\n    origConfKeys = origConf.keys.toSet\n  }\n\n  def sparkSession: SparkSession = {\n    if (SparkSession.getActiveSession.isEmpty) {\n      reinitSession()\n    }\n    spark\n  }\n\n  def resetSparkSessionConf(): Unit = {\n    if (SparkSession.getActiveSession.isEmpty) {\n      reinitSession()\n    } else {\n      setAllConfs(origConf.toArray)\n      val currentKeys = spark.conf.getAll.keys.toSet\n      val toRemove = currentKeys -- origConfKeys\n      toRemove.foreach(spark.conf.unset)\n    }\n    logDebug(s\"RESET CONF TO: ${spark.conf.getAll}\")\n  }\n\n  def withSparkSession[U](conf: SparkConf, f: SparkSession => U): U = {\n    resetSparkSessionConf\n    logDebug(s\"SETTING  CONF: ${conf.getAll.toMap}\")\n    setAllConfs(conf.getAll)\n    logDebug(s\"RUN WITH CONF: ${spark.conf.getAll}\\n\")\n    spark.sparkContext.setLogLevel(\"WARN\")\n    f(spark)\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/spark/GpuXGBoostPluginSuite.scala",
    "content": "/*\n Copyright (c) 2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark\n\nimport java.io.File\n\nimport scala.collection.mutable.ArrayBuffer\n\nimport ai.rapids.cudf.{OrderByArg, Table}\nimport org.apache.spark.SparkConf\nimport org.apache.spark.ml.linalg.DenseVector\nimport org.apache.spark.sql.{Dataset, Row, SparkSession}\n\nimport ml.dmlc.xgboost4j.java.CudfColumnBatch\nimport ml.dmlc.xgboost4j.scala.{DMatrix, QuantileDMatrix, XGBoost => ScalaXGBoost}\nimport ml.dmlc.xgboost4j.scala.rapids.spark.GpuTestSuite\nimport ml.dmlc.xgboost4j.scala.rapids.spark.SparkSessionHolder.withSparkSession\nimport ml.dmlc.xgboost4j.scala.spark.Utils.withResource\n\nclass GpuXGBoostPluginSuite extends GpuTestSuite {\n  test(\"params\") {\n    withGpuSparkSession() { spark =>\n      import spark.implicits._\n      val df = Seq((1.0f, 2.0f, 1.0f, 2.0f, 0.0f, 0.0f),\n        (2.0f, 3.0f, 2.0f, 3.0f, 1.0f, 0.1f),\n        (3.0f, 4.0f, 5.0f, 6.0f, 0.0f, 0.1f),\n        (4.0f, 5.0f, 6.0f, 7.0f, 0.0f, 0.1f),\n        (5.0f, 6.0f, 7.0f, 8.0f, 0.0f, 0.1f)\n      ).toDF(\"c1\", \"c2\", \"weight\", \"margin\", \"label\", \"other\")\n      val xgbParams: Map[String, Any] = Map(\n        \"max_depth\" -> 5,\n        \"eta\" -> 0.2,\n        \"objective\" -> \"binary:logistic\"\n      )\n      val features = Array(\"c1\", \"c2\")\n      val estimator = new XGBoostClassifier(xgbParams)\n        .setFeaturesCol(features)\n        .setMissing(0.2f)\n        .setAlpha(0.97)\n        .setLeafPredictionCol(\"leaf\")\n        .setContribPredictionCol(\"contrib\")\n        .setNumRound(3)\n        .setDevice(\"cuda\")\n\n      assert(estimator.getMaxDepth === 5)\n      assert(estimator.getEta === 0.2)\n      assert(estimator.getObjective === \"binary:logistic\")\n      assert(estimator.getFeaturesCols === features)\n      assert(estimator.getMissing === 0.2f)\n      assert(estimator.getAlpha === 0.97)\n      assert(estimator.getDevice === \"cuda\")\n      assert(estimator.getNumRound === 3)\n\n      estimator.setEta(0.66).setMaxDepth(7)\n      assert(estimator.getMaxDepth === 7)\n      assert(estimator.getEta === 0.66)\n\n      val model = estimator.fit(df)\n      assert(model.getMaxDepth === 7)\n      assert(model.getEta === 0.66)\n      assert(model.getObjective === \"binary:logistic\")\n      assert(model.getFeaturesCols === features)\n      assert(model.getMissing === 0.2f)\n      assert(model.getAlpha === 0.97)\n      assert(model.getLeafPredictionCol === \"leaf\")\n      assert(model.getContribPredictionCol === \"contrib\")\n      assert(model.getDevice === \"cuda\")\n      assert(model.getNumRound === 3)\n    }\n  }\n\n  test(\"isEnabled\") {\n    def checkIsEnabled(spark: SparkSession, expected: Boolean): Unit = {\n      import spark.implicits._\n      val df = Seq((1.0f, 2.0f, 0.0f),\n        (2.0f, 3.0f, 1.0f)\n      ).toDF(\"c1\", \"c2\", \"label\")\n      assert(PluginUtils.getPlugin.isDefined)\n      assert(PluginUtils.getPlugin.get.isEnabled(df) === expected)\n    }\n\n    // spark.rapids.sql.enabled is not set explicitly, default to true\n    withSparkSession(new SparkConf(), spark => {\n      checkIsEnabled(spark, expected = true)\n    })\n\n    // set spark.rapids.sql.enabled to false\n    withCpuSparkSession() { spark =>\n      checkIsEnabled(spark, expected = false)\n    }\n\n    // set spark.rapids.sql.enabled to true\n    withGpuSparkSession() { spark =>\n      checkIsEnabled(spark, expected = true)\n    }\n  }\n\n  test(\"parameter validation\") {\n    withGpuSparkSession() { spark =>\n      import spark.implicits._\n      val df = Seq((1.0f, 2.0f, 1.0f, 2.0f, 0.0f, 0.0f),\n        (2.0f, 3.0f, 2.0f, 3.0f, 1.0f, 0.1f),\n        (3.0f, 4.0f, 5.0f, 6.0f, 0.0f, 0.1f),\n        (4.0f, 5.0f, 6.0f, 7.0f, 0.0f, 0.1f),\n        (5.0f, 6.0f, 7.0f, 8.0f, 0.0f, 0.1f)\n      ).toDF(\"c1\", \"c2\", \"weight\", \"margin\", \"label\", \"other\")\n      val classifier = new XGBoostClassifier()\n\n      val plugin = PluginUtils.getPlugin.get.asInstanceOf[GpuXGBoostPlugin]\n      intercept[IllegalArgumentException] {\n        plugin.validate(classifier, df)\n      }\n      classifier.setDevice(\"cuda\")\n      plugin.validate(classifier, df)\n\n      classifier.setDevice(\"gpu\")\n      plugin.validate(classifier, df)\n    }\n  }\n\n  test(\"preprocess\") {\n    withGpuSparkSession() { spark =>\n      import spark.implicits._\n      val df = Seq((1.0f, 2.0f, 1.0f, 2.0f, 0.0f, 0.0f),\n        (2.0f, 3.0f, 2.0f, 3.0f, 1.0f, 0.1f),\n        (3.0f, 4.0f, 5.0f, 6.0f, 0.0f, 0.1f),\n        (4.0f, 5.0f, 6.0f, 7.0f, 0.0f, 0.1f),\n        (5.0f, 6.0f, 7.0f, 8.0f, 0.0f, 0.1f)\n      ).toDF(\"c1\", \"c2\", \"weight\", \"margin\", \"label\", \"other\")\n        .repartition(5)\n\n      assert(df.schema.names.contains(\"other\"))\n      assert(df.rdd.getNumPartitions === 5)\n\n      val features = Array(\"c1\", \"c2\")\n      var classifier = new XGBoostClassifier()\n        .setNumWorkers(3)\n        .setFeaturesCol(features)\n      assert(PluginUtils.getPlugin.isDefined)\n      assert(PluginUtils.getPlugin.get.isInstanceOf[GpuXGBoostPlugin])\n      var out = PluginUtils.getPlugin.get.asInstanceOf[GpuXGBoostPlugin]\n        .preprocess(classifier, df)\n\n      assert(out.schema.names.contains(\"c1\") && out.schema.names.contains(\"c2\"))\n      assert(out.schema.names.contains(classifier.getLabelCol))\n      assert(!out.schema.names.contains(\"weight\") && !out.schema.names.contains(\"margin\"))\n      assert(out.rdd.getNumPartitions === 3)\n\n      classifier = new XGBoostClassifier()\n        .setNumWorkers(4)\n        .setFeaturesCol(features)\n        .setWeightCol(\"weight\")\n        .setBaseMarginCol(\"margin\")\n        .setDevice(\"cuda\")\n      out = PluginUtils.getPlugin.get.asInstanceOf[GpuXGBoostPlugin]\n        .preprocess(classifier, df)\n\n      assert(out.schema.names.contains(\"c1\") && out.schema.names.contains(\"c2\"))\n      assert(out.schema.names.contains(classifier.getLabelCol))\n      assert(out.schema.names.contains(\"weight\") && out.schema.names.contains(\"margin\"))\n      assert(out.rdd.getNumPartitions === 4)\n    }\n  }\n\n  test(\"global configs\") {\n    withGpuSparkSession() { spark =>\n      import spark.implicits._\n\n      val df = Seq(\n        (1.0f, 2.0f, 0),\n        (5.0f, 6.0f, 1)\n      ).toDF(\"c1\", \"c2\", \"label\")\n      val features = Array(\"c1\", \"c2\")\n      val classifier = new XGBoostClassifier().setDevice(\"cuda\").setFeaturesCol(features)\n      val (_, configs) = PluginUtils.getPlugin.get.buildRddWatches(classifier, df)\n      assert(configs.isEmpty)\n    }\n\n    val conf = new SparkConf().set(\"spark.rapids.memory.gpu.pooling.enabled\", \"true\")\n      .set(\"spark.rapids.memory.gpu.pool\", \"ASYNC\")\n    withGpuSparkSession(conf) { spark =>\n      import spark.implicits._\n\n      val df = Seq(\n        (1.0f, 2.0f, 0),\n        (5.0f, 6.0f, 1)\n      ).toDF(\"c1\", \"c2\", \"label\")\n      val features = Array(\"c1\", \"c2\")\n      val classifier = new XGBoostClassifier()\n        .setDevice(\"cuda\")\n        .setFeaturesCol(features)\n        .setNumRound(2)\n      val (_, configs) = PluginUtils.getPlugin.get.buildRddWatches(classifier, df)\n      assert(configs(\"use_rmm\") == true)\n\n      // No exception\n      classifier.fit(df)\n    }\n  }\n\n  // test distributed\n  test(\"build RDD Watches\") {\n    withGpuSparkSession() { spark =>\n      import spark.implicits._\n\n      // dataPoint -> (missing, rowNum, nonMissing)\n      Map(0.0f -> (0.0f, 5, 9), Float.NaN -> (0.0f, 5, 9)).foreach {\n        case (data, (missing, expectedRowNum, expectedNonMissing)) =>\n          val df = Seq(\n            (1.0f, 2.0f, 1.0f, 2.0f, 0.0f, 0.0f),\n            (2.0f, 3.0f, 2.0f, 3.0f, 1.0f, 0.1f),\n            (3.0f, data, 5.0f, 6.0f, 0.0f, 0.1f),\n            (4.0f, 5.0f, 6.0f, 7.0f, 0.0f, 0.1f),\n            (5.0f, 6.0f, 7.0f, 8.0f, 1.0f, 0.1f)\n          ).toDF(\"c1\", \"c2\", \"weight\", \"margin\", \"label\", \"other\")\n\n          val features = Array(\"c1\", \"c2\")\n          val classifier = new XGBoostClassifier()\n            .setNumWorkers(2)\n            .setWeightCol(\"weight\")\n            .setBaseMarginCol(\"margin\")\n            .setFeaturesCol(features)\n            .setDevice(\"cuda\")\n            .setMissing(missing)\n\n          val (rdd, _) = PluginUtils.getPlugin.get.buildRddWatches(classifier, df)\n          val result = rdd.mapPartitions { iter =>\n            val watches = iter.next()\n            val size = watches.size\n            val labels = watches.datasets(0).getLabel\n            val weight = watches.datasets(0).getWeight\n            val margins = watches.datasets(0).getBaseMargin\n            val rowNumber = watches.datasets(0).rowNum\n            val nonMissing = watches.datasets(0).nonMissingNum\n            Iterator.single(size, rowNumber, nonMissing, labels, weight, margins)\n          }.collect()\n\n          val labels: ArrayBuffer[Float] = ArrayBuffer.empty\n          val weight: ArrayBuffer[Float] = ArrayBuffer.empty\n          val margins: ArrayBuffer[Float] = ArrayBuffer.empty\n          val rowNumber: ArrayBuffer[Long] = ArrayBuffer.empty\n          val nonMissing: ArrayBuffer[Long] = ArrayBuffer.empty\n\n          for (row <- result) {\n            assert(row._1 === 1)\n            rowNumber.append(row._2)\n            nonMissing.append(row._3)\n            labels.append(row._4: _*)\n            weight.append(row._5: _*)\n            margins.append(row._6: _*)\n          }\n          assert(labels.sorted === Array(0.0f, 1.0f, 0.0f, 0.0f, 1.0f).sorted)\n          assert(weight.sorted === Array(1.0f, 2.0f, 5.0f, 6.0f, 7.0f).sorted)\n          assert(margins.sorted === Array(2.0f, 3.0f, 6.0f, 7.0f, 8.0f).sorted)\n          assert(rowNumber.sum === expectedRowNum)\n          assert(nonMissing.sum === expectedNonMissing)\n      }\n    }\n  }\n\n  test(\"build RDD Watches with Eval\") {\n    withGpuSparkSession() { spark =>\n      import spark.implicits._\n      val train = Seq(\n        (1.0f, 2.0f, 1.0f, 2.0f, 0.0f, 0.0f),\n        (2.0f, 3.0f, 2.0f, 3.0f, 1.0f, 0.1f)\n      ).toDF(\"c1\", \"c2\", \"weight\", \"margin\", \"label\", \"other\")\n\n      // dataPoint -> (missing, rowNum, nonMissing)\n      Map(0.0f -> (0.0f, 5, 9), Float.NaN -> (0.0f, 5, 9)).foreach {\n        case (data, (missing, expectedRowNum, expectedNonMissing)) =>\n          val eval = Seq(\n            (1.0f, 2.0f, 1.0f, 2.0f, 0.0f, 0.0f),\n            (2.0f, 3.0f, 2.0f, 3.0f, 1.0f, 0.1f),\n            (3.0f, data, 5.0f, 6.0f, 0.0f, 0.1f),\n            (4.0f, 5.0f, 6.0f, 7.0f, 0.0f, 0.1f),\n            (5.0f, 6.0f, 7.0f, 8.0f, 1.0f, 0.1f)\n          ).toDF(\"c1\", \"c2\", \"weight\", \"margin\", \"label\", \"other\")\n\n          val features = Array(\"c1\", \"c2\")\n          val classifier = new XGBoostClassifier()\n            .setNumWorkers(2)\n            .setWeightCol(\"weight\")\n            .setBaseMarginCol(\"margin\")\n            .setFeaturesCol(features)\n            .setDevice(\"cuda\")\n            .setMissing(missing)\n            .setEvalDataset(eval)\n\n          val (rdd, _) = PluginUtils.getPlugin.get.buildRddWatches(classifier, train)\n          val result = rdd.mapPartitions { iter =>\n            val watches = iter.next()\n            val size = watches.size\n            val labels = watches.datasets(1).getLabel\n            val weight = watches.datasets(1).getWeight\n            val margins = watches.datasets(1).getBaseMargin\n            val rowNumber = watches.datasets(1).rowNum\n            val nonMissing = watches.datasets(1).nonMissingNum\n            Iterator.single(size, rowNumber, nonMissing, labels, weight, margins)\n          }.collect()\n\n          val labels: ArrayBuffer[Float] = ArrayBuffer.empty\n          val weight: ArrayBuffer[Float] = ArrayBuffer.empty\n          val margins: ArrayBuffer[Float] = ArrayBuffer.empty\n          val rowNumber: ArrayBuffer[Long] = ArrayBuffer.empty\n          val nonMissing: ArrayBuffer[Long] = ArrayBuffer.empty\n\n          for (row <- result) {\n            assert(row._1 === 2)\n            rowNumber.append(row._2)\n            nonMissing.append(row._3)\n            labels.append(row._4: _*)\n            weight.append(row._5: _*)\n            margins.append(row._6: _*)\n          }\n          assert(labels.sorted === Array(0.0f, 1.0f, 0.0f, 0.0f, 1.0f).sorted)\n          assert(weight.sorted === Array(1.0f, 2.0f, 5.0f, 6.0f, 7.0f).sorted)\n          assert(margins.sorted === Array(2.0f, 3.0f, 6.0f, 7.0f, 8.0f).sorted)\n          assert(rowNumber.sum === expectedRowNum)\n          assert(nonMissing.sum === expectedNonMissing)\n      }\n    }\n  }\n\n  test(\"transformed schema\") {\n    withGpuSparkSession() { spark =>\n      import spark.implicits._\n      val df = Seq(\n        (1.0f, 2.0f, 1.0f, 2.0f, 0.0f, 0.0f),\n        (2.0f, 3.0f, 2.0f, 3.0f, 1.0f, 0.1f),\n        (3.0f, 4.0f, 5.0f, 6.0f, 0.0f, 0.1f),\n        (4.0f, 5.0f, 6.0f, 7.0f, 0.0f, 0.1f),\n        (5.0f, 6.0f, 7.0f, 8.0f, 1.0f, 0.1f)\n      ).toDF(\"c1\", \"c2\", \"weight\", \"margin\", \"label\", \"other\")\n\n      val estimator = new XGBoostClassifier()\n        .setNumWorkers(1)\n        .setNumRound(2)\n        .setFeaturesCol(Array(\"c1\", \"c2\"))\n        .setLabelCol(\"label\")\n        .setDevice(\"cuda\")\n\n      assert(PluginUtils.getPlugin.isDefined && PluginUtils.getPlugin.get.isEnabled(df))\n\n      val out = estimator.fit(df).transform(df)\n      // Transform should not discard the other columns of the transforming dataframe\n      Seq(\"c1\", \"c2\", \"weight\", \"margin\", \"label\", \"other\").foreach { v =>\n        assert(out.schema.names.contains(v))\n      }\n\n      // Transform for XGBoostClassifier needs to add extra columns\n      Seq(\"rawPrediction\", \"probability\", \"prediction\").foreach { v =>\n        assert(out.schema.names.contains(v))\n      }\n      assert(out.schema.names.length === 9)\n\n      val out1 = estimator.setLeafPredictionCol(\"leaf\").setContribPredictionCol(\"contrib\")\n        .fit(df)\n        .transform(df)\n      Seq(\"leaf\", \"contrib\").foreach { v =>\n        assert(out1.schema.names.contains(v))\n      }\n    }\n  }\n\n  private def checkEqual(left: Array[Array[Float]],\n                         right: Array[Array[Float]],\n                         epsilon: Float = 1e-4f): Unit = {\n    assert(left.size === right.size)\n    left.zip(right).foreach { case (leftValue, rightValue) =>\n      leftValue.zip(rightValue).foreach { case (l, r) =>\n        assert(math.abs(l - r) < epsilon)\n      }\n    }\n  }\n\n  Seq(false, true).foreach { useExtMem =>\n    Seq(\"binary:logistic\", \"multi:softprob\").foreach { objective =>\n      test(s\"$objective: XGBoost-Spark should match xgboost4j with useExtMem: ${useExtMem}\") {\n        withGpuSparkSession() { spark =>\n          import spark.implicits._\n\n          val numRound = 100\n          var xgboostParams: Map[String, Any] = Map(\n            \"objective\" -> objective,\n            \"device\" -> \"cuda\"\n          )\n\n          val (trainPath, testPath) = if (objective == \"binary:logistic\") {\n            (writeFile(Classification.train.toDF(\"label\", \"weight\", \"c1\", \"c2\", \"c3\")),\n              writeFile(Classification.test.toDF(\"label\", \"weight\", \"c1\", \"c2\", \"c3\")))\n          } else {\n            xgboostParams = xgboostParams ++ Map(\"num_class\" -> 6)\n            (writeFile(MultiClassification.train.toDF(\"label\", \"weight\", \"c1\", \"c2\", \"c3\")),\n              writeFile(MultiClassification.test.toDF(\"label\", \"weight\", \"c1\", \"c2\", \"c3\")))\n          }\n\n          val df = spark.read.parquet(trainPath)\n          val testdf = spark.read.parquet(testPath)\n\n          val features = Array(\"c1\", \"c2\", \"c3\")\n          val featuresIndices = features.map(df.schema.fieldIndex)\n          val label = \"label\"\n\n          val classifier = new XGBoostClassifier(xgboostParams)\n            .setFeaturesCol(features)\n            .setLabelCol(label)\n            .setNumRound(numRound)\n            .setLeafPredictionCol(\"leaf\")\n            .setContribPredictionCol(\"contrib\")\n            .setDevice(\"cuda\")\n            .setUseExternalMemory(useExtMem)\n\n          val xgb4jModel = withResource(new GpuColumnBatch(\n            Table.readParquet(new File(trainPath)))) { batch =>\n            val cb = new CudfColumnBatch(batch.select(featuresIndices),\n              batch.select(df.schema.fieldIndex(label)), null, null, null\n            )\n            val qdm = new QuantileDMatrix(Seq(cb).iterator, classifier.getMissing,\n              classifier.getMaxBins, classifier.getNthread)\n            ScalaXGBoost.train(qdm, xgboostParams, numRound)\n          }\n\n          val (xgb4jLeaf, xgb4jContrib, xgb4jProb, xgb4jRaw) = withResource(new GpuColumnBatch(\n            Table.readParquet(new File(testPath)))) { batch =>\n            val cb = new CudfColumnBatch(batch.select(featuresIndices), null, null, null, null\n            )\n            val qdm = new DMatrix(cb, classifier.getMissing, classifier.getNthread)\n            (xgb4jModel.predictLeaf(qdm), xgb4jModel.predictContrib(qdm),\n              xgb4jModel.predict(qdm), xgb4jModel.predict(qdm, outPutMargin = true))\n          }\n\n          val rows = classifier.fit(df).transform(testdf).collect()\n\n          // Check Leaf\n          val xgbSparkLeaf = rows.map(row => row.getAs[DenseVector](\"leaf\").toArray.map(_.toFloat))\n          checkEqual(xgb4jLeaf, xgbSparkLeaf)\n\n          // Check contrib\n          val xgbSparkContrib = rows.map(row =>\n            row.getAs[DenseVector](\"contrib\").toArray.map(_.toFloat))\n          checkEqual(xgb4jContrib, xgbSparkContrib)\n\n          // Check probability\n          var xgbSparkProb = rows.map(row =>\n            row.getAs[DenseVector](\"probability\").toArray.map(_.toFloat))\n          if (objective == \"binary:logistic\") {\n            xgbSparkProb = xgbSparkProb.map(v => Array(v(1)))\n          }\n          checkEqual(xgb4jProb, xgbSparkProb)\n\n          // Check raw\n          var xgbSparkRaw = rows.map(row =>\n            row.getAs[DenseVector](\"rawPrediction\").toArray.map(_.toFloat))\n          if (objective == \"binary:logistic\") {\n            xgbSparkRaw = xgbSparkRaw.map(v => Array(v(1)))\n          }\n          checkEqual(xgb4jRaw, xgbSparkRaw)\n\n        }\n      }\n    }\n  }\n\n  Seq(false, true).foreach { useExtMem =>\n    test(s\"Regression: XGBoost-Spark should match xgboost4j with useExtMem: ${useExtMem}\") {\n      withGpuSparkSession() { spark =>\n        import spark.implicits._\n\n        val trainPath = writeFile(Regression.train.toDF(\"label\", \"weight\", \"c1\", \"c2\", \"c3\"))\n        val testPath = writeFile(Regression.test.toDF(\"label\", \"weight\", \"c1\", \"c2\", \"c3\"))\n\n        val df = spark.read.parquet(trainPath)\n        val testdf = spark.read.parquet(testPath)\n\n        val features = Array(\"c1\", \"c2\", \"c3\")\n        val featuresIndices = features.map(df.schema.fieldIndex)\n        val label = \"label\"\n\n        val numRound = 100\n        val xgboostParams: Map[String, Any] = Map(\n          \"device\" -> \"cuda\"\n        )\n\n        val regressor = new XGBoostRegressor(xgboostParams)\n          .setFeaturesCol(features)\n          .setLabelCol(label)\n          .setNumRound(numRound)\n          .setLeafPredictionCol(\"leaf\")\n          .setContribPredictionCol(\"contrib\")\n          .setDevice(\"cuda\")\n          .setUseExternalMemory(useExtMem)\n\n        val xgb4jModel = withResource(new GpuColumnBatch(\n          Table.readParquet(new File(trainPath)))) { batch =>\n          val cb = new CudfColumnBatch(batch.select(featuresIndices),\n            batch.select(df.schema.fieldIndex(label)), null, null, null\n          )\n          val qdm = new QuantileDMatrix(Seq(cb).iterator, regressor.getMissing,\n            regressor.getMaxBins, regressor.getNthread)\n          ScalaXGBoost.train(qdm, xgboostParams, numRound)\n        }\n\n        val (xgb4jLeaf, xgb4jContrib, xgb4jPred) = withResource(new GpuColumnBatch(\n          Table.readParquet(new File(testPath)))) { batch =>\n          val cb = new CudfColumnBatch(batch.select(featuresIndices), null, null, null, null\n          )\n          val qdm = new DMatrix(cb, regressor.getMissing, regressor.getNthread)\n          (xgb4jModel.predictLeaf(qdm), xgb4jModel.predictContrib(qdm),\n            xgb4jModel.predict(qdm))\n        }\n\n        val rows = regressor.fit(df).transform(testdf).collect()\n\n        // Check Leaf\n        val xgbSparkLeaf = rows.map(row => row.getAs[DenseVector](\"leaf\").toArray.map(_.toFloat))\n        checkEqual(xgb4jLeaf, xgbSparkLeaf)\n\n        // Check contrib\n        val xgbSparkContrib = rows.map(row =>\n          row.getAs[DenseVector](\"contrib\").toArray.map(_.toFloat))\n        checkEqual(xgb4jContrib, xgbSparkContrib)\n\n        // Check prediction\n        val xgbSparkPred = rows.map(row =>\n          Array(row.getAs[Double](\"prediction\").toFloat))\n        checkEqual(xgb4jPred, xgbSparkPred)\n      }\n    }\n  }\n\n  test(\"The group col should be sorted in each partition\") {\n    withGpuSparkSession() { spark =>\n      import spark.implicits._\n      val df = Ranking.train.toDF(\"label\", \"weight\", \"group\", \"c1\", \"c2\", \"c3\")\n\n      val xgboostParams: Map[String, Any] = Map(\n        \"device\" -> \"cuda\",\n        \"objective\" -> \"rank:ndcg\"\n      )\n      val features = Array(\"c1\", \"c2\", \"c3\")\n      val label = \"label\"\n      val group = \"group\"\n\n      val ranker = new XGBoostRanker(xgboostParams)\n        .setFeaturesCol(features)\n        .setLabelCol(label)\n        .setNumWorkers(1)\n        .setNumRound(1)\n        .setGroupCol(group)\n        .setDevice(\"cuda\")\n\n      val processedDf = PluginUtils.getPlugin.get.asInstanceOf[GpuXGBoostPlugin]\n        .preprocess(ranker, df)\n      processedDf.rdd.foreachPartition { iter => {\n        var prevGroup = Int.MinValue\n        while (iter.hasNext) {\n          val curr = iter.next()\n          val group = curr.asInstanceOf[Row].getAs[Int](1)\n          assert(prevGroup <= group)\n          prevGroup = group\n        }\n      }\n      }\n    }\n  }\n\n  test(\"Same group must be in the same partition\") {\n    val num_workers = 3\n    withGpuSparkSession() { spark =>\n      import spark.implicits._\n      val df = spark.createDataFrame(spark.sparkContext.parallelize(Seq(\n        (0.1, 1, 0),\n        (0.1, 1, 0),\n        (0.1, 1, 0),\n        (0.1, 1, 1),\n        (0.1, 1, 1),\n        (0.1, 1, 1),\n        (0.1, 1, 2),\n        (0.1, 1, 2),\n        (0.1, 1, 2)), 1)).toDF(\"label\", \"f1\", \"group\")\n\n      // The original pattern will repartition df in a RoundRobin manner\n      val oriRows = df.repartition(num_workers)\n        .sortWithinPartitions(df.col(\"group\"))\n        .select(\"group\")\n        .mapPartitions { case iter =>\n          val tmp: ArrayBuffer[Int] = ArrayBuffer.empty\n          while (iter.hasNext) {\n            val r = iter.next()\n            tmp.append(r.getInt(0))\n          }\n          Iterator.single(tmp.mkString(\",\"))\n        }.collect()\n      assert(oriRows.length == 3)\n      assert(oriRows.contains(\"0,1,2\"))\n\n      // The fix has replaced repartition with repartitionByRange which will put the\n      // instances with same group into the same partition\n      val ranker = new XGBoostRanker().setGroupCol(\"group\").setNumWorkers(num_workers)\n      val processedDf = PluginUtils.getPlugin.get.asInstanceOf[GpuXGBoostPlugin]\n        .preprocess(ranker, df)\n      val rows = processedDf\n        .select(\"group\")\n        .mapPartitions { case iter =>\n          val tmp: ArrayBuffer[Int] = ArrayBuffer.empty\n          while (iter.hasNext) {\n            val r = iter.next()\n            tmp.append(r.getInt(0))\n          }\n          Iterator.single(tmp.mkString(\",\"))\n        }.collect()\n\n      rows.forall(Seq(\"0,0,0\", \"1,1,1\", \"2,2,2\").contains)\n    }\n  }\n\n  Seq(false, true).foreach { useExtMem =>\n    test(s\"Ranker: XGBoost-Spark should match xgboost4j with useExtMem=$useExtMem\") {\n      withGpuSparkSession() { spark =>\n        import spark.implicits._\n\n        val trainPath = writeFile(Ranking.train.toDF(\"label\", \"weight\", \"group\", \"c1\", \"c2\", \"c3\"))\n        val testPath = writeFile(Ranking.test.toDF(\"label\", \"weight\", \"group\", \"c1\", \"c2\", \"c3\"))\n\n        val df = spark.read.parquet(trainPath)\n        val testdf = spark.read.parquet(testPath)\n\n        val features = Array(\"c1\", \"c2\", \"c3\")\n        val featuresIndices = features.map(df.schema.fieldIndex)\n        val label = \"label\"\n        val group = \"group\"\n\n        val numRound = 100\n        val xgboostParams: Map[String, Any] = Map(\n          \"device\" -> \"cuda\",\n          \"objective\" -> \"rank:ndcg\"\n        )\n\n        val ranker = new XGBoostRanker(xgboostParams)\n          .setFeaturesCol(features)\n          .setLabelCol(label)\n          .setNumRound(numRound)\n          .setLeafPredictionCol(\"leaf\")\n          .setContribPredictionCol(\"contrib\")\n          .setGroupCol(group)\n          .setDevice(\"cuda\")\n          .setUseExternalMemory(useExtMem)\n\n        val xgb4jModel = withResource(new GpuColumnBatch(\n          Table.readParquet(new File(trainPath)\n          ).orderBy(OrderByArg.asc(df.schema.fieldIndex(group))))) { batch =>\n          val cb = new CudfColumnBatch(batch.select(featuresIndices),\n            batch.select(df.schema.fieldIndex(label)), null, null,\n            batch.select(df.schema.fieldIndex(group)))\n          val qdm = new QuantileDMatrix(Seq(cb).iterator, ranker.getMissing,\n            ranker.getMaxBins, ranker.getNthread)\n          ScalaXGBoost.train(qdm, xgboostParams, numRound)\n        }\n\n        val (xgb4jLeaf, xgb4jContrib, xgb4jPred) = withResource(new GpuColumnBatch(\n          Table.readParquet(new File(testPath)))) { batch =>\n          val cb = new CudfColumnBatch(batch.select(featuresIndices), null, null, null, null\n          )\n          val qdm = new DMatrix(cb, ranker.getMissing, ranker.getNthread)\n          (xgb4jModel.predictLeaf(qdm), xgb4jModel.predictContrib(qdm),\n            xgb4jModel.predict(qdm))\n        }\n\n        val rows = ranker.fit(df).transform(testdf).collect()\n\n        // Check Leaf\n        val xgbSparkLeaf = rows.map(row => row.getAs[DenseVector](\"leaf\").toArray.map(_.toFloat))\n        checkEqual(xgb4jLeaf, xgbSparkLeaf)\n\n        // Check contrib\n        val xgbSparkContrib = rows.map(row =>\n          row.getAs[DenseVector](\"contrib\").toArray.map(_.toFloat))\n        checkEqual(xgb4jContrib, xgbSparkContrib)\n\n        // Check prediction\n        val xgbSparkPred = rows.map(row =>\n          Array(row.getAs[Double](\"prediction\").toFloat))\n        checkEqual(xgb4jPred, xgbSparkPred)\n      }\n    }\n  }\n\n  def writeFile(df: Dataset[_]): String = {\n    def listFiles(directory: String): Array[String] = {\n      val dir = new File(directory)\n      if (dir.exists && dir.isDirectory) {\n        dir.listFiles.filter(f => f.isFile && f.getName.startsWith(\"part-\")).map(_.getName)\n      } else {\n        Array.empty[String]\n      }\n    }\n\n    val dir = createTmpFolder(\"gpu_\").toAbsolutePath.toString\n    df.coalesce(1).write.parquet(s\"$dir/data\")\n\n    val file = listFiles(s\"$dir/data\")(0)\n    s\"$dir/data/$file\"\n  }\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/spark/TrainTestData.scala",
    "content": "/*\n Copyright (c) 2014-2024 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage ml.dmlc.xgboost4j.scala.spark\n\nimport scala.util.Random\n\ntrait TrainTestData {\n\n  protected def generateClassificationDataset(\n      numRows: Int,\n      numClass: Int,\n      seed: Int = 1): Seq[(Int, Float, Float, Float, Float)] = {\n    val random = new Random()\n    random.setSeed(seed)\n    (1 to numRows).map { _ =>\n      val label = random.nextInt(numClass)\n      // label, weight, c1, c2, c3\n      (label, random.nextFloat().abs, random.nextGaussian().toFloat, random.nextGaussian().toFloat,\n        random.nextGaussian().toFloat)\n    }\n  }\n\n  protected def generateRegressionDataset(\n      numRows: Int,\n      seed: Int = 11): Seq[(Float, Float, Float, Float, Float)] = {\n    val random = new Random()\n    random.setSeed(seed)\n    (1 to numRows).map { _ =>\n      // label, weight, c1, c2, c3\n      (random.nextFloat(), random.nextFloat().abs, random.nextGaussian().toFloat,\n        random.nextGaussian().toFloat,\n        random.nextGaussian().toFloat)\n    }\n  }\n\n  protected def generateRankDataset(\n      numRows: Int,\n      numClass: Int,\n      maxGroup: Int = 12,\n      seed: Int = 99): Seq[(Int, Float, Int, Float, Float, Float)] = {\n    val random = new Random()\n    random.setSeed(seed)\n    (1 to numRows).map { _ =>\n      val group = random.nextInt(maxGroup)\n      // label, weight, group, c1, c2, c3\n      (random.nextInt(numClass), group.toFloat, group,\n        random.nextGaussian().toFloat,\n        random.nextGaussian().toFloat,\n        random.nextGaussian().toFloat)\n    }\n  }\n}\n\nobject Classification extends TrainTestData {\n  val train = generateClassificationDataset(300, 2, 3)\n  val test = generateClassificationDataset(150, 2, 5)\n}\n\nobject MultiClassification extends TrainTestData {\n  val train = generateClassificationDataset(300, 4, 11)\n  val test = generateClassificationDataset(150, 4, 12)\n}\n\nobject Regression extends TrainTestData {\n  val train = generateRegressionDataset(300, 222)\n  val test = generateRegressionDataset(150, 223)\n}\n\nobject Ranking extends TrainTestData {\n  val train = generateRankDataset(300, 10, 12, 555)\n  val test = generateRankDataset(150, 10, 12, 556)\n}\n"
  },
  {
    "path": "jvm-packages/xgboost4j-spark-gpu/src/test/scala/org/apache/spark/GpuTestUtils.scala",
    "content": "/*\n Copyright (c) 2023 by Contributors\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n */\n\npackage org.apache.spark\n\nimport org.apache.spark.sql.SparkSession\n\nobject GpuTestUtils {\n\n  def cleanupAnyExistingSession(): Unit = {\n    SparkSession.cleanupAnyExistingSession()\n  }\n\n}\n"
  },
  {
    "path": "ops/conda_env/aarch64_test.yml",
    "content": "name: aarch64_test\nchannels:\n- conda-forge\ndependencies:\n- python=3.10\n- pip\n- wheel\n- pytest\n- pytest-cov\n- numpy\n- scipy\n- scikit-learn\n- pandas\n- matplotlib\n- dask\n- distributed\n- hypothesis\n- graphviz\n- python-graphviz\n- codecov\n- cmake\n- ninja\n- boto3\n- awscli\n- numba\n- llvmlite\n- loky>=3.5.1\n- pyarrow\n- pyspark>=3.4.0\n- cloudpickle\n- pip:\n  - awscli\n  - auditwheel\n"
  },
  {
    "path": "ops/conda_env/cpp_test.yml",
    "content": "# conda environment for CPP test on Linux distributions\nname: cpp_test\nchannels:\n- conda-forge\ndependencies:\n- cmake\n- ninja\n- c-compiler\n- cxx-compiler\n- gtest\n- protobuf\n- libgrpc\n"
  },
  {
    "path": "ops/conda_env/linux_cpu_test.yml",
    "content": "name: linux_cpu_test\nchannels:\n- conda-forge\ndependencies:\n- python=3.10\n- cmake>=3.26.4\n- c-compiler\n- cxx-compiler\n- ninja\n- pip\n- wheel\n- pyyaml\n- cpplint\n- pylint\n- numpy\n- scipy\n- scikit-learn>=1.4.1\n- pandas\n- polars\n- matplotlib\n- dask<=2024.10.0\n- distributed<=2024.10.0\n- python-graphviz\n- hypothesis>=6.46\n- astroid\n- sh\n- mock\n- pytest\n- pytest-timeout\n- pytest-cov\n- python-kubernetes\n- urllib3\n- boto3\n- awscli\n- py-ubjson\n- loky>=3.5.1\n- pyarrow\n- protobuf\n- cloudpickle\n- modin\n- pyspark>=3.4.0\n"
  },
  {
    "path": "ops/conda_env/linux_sycl_test.yml",
    "content": "name: linux_sycl_test\nchannels:\n- conda-forge\n- https://software.repos.intel.com/python/conda/\ndependencies:\n- python=3.10\n- cmake>=3.26.4\n- c-compiler\n- cxx-compiler\n- gtest\n- pip\n- wheel\n- numpy\n- scipy\n- scikit-learn\n- pandas\n- hypothesis>=6.46\n- pytest\n- pytest-timeout\n- pytest-cov\n- dask=2024.11\n- ninja\n- dpcpp_linux-64>=2024.2.1\n- onedpl-devel\n- intel-openmp\n"
  },
  {
    "path": "ops/conda_env/macos_cpu_test.yml",
    "content": "name: macos_test\nchannels:\n- conda-forge\ndependencies:\n- python=3.10\n- pip\n- wheel\n- pyyaml\n- numpy\n- scipy\n- llvm-openmp\n- scikit-learn>=1.4.1\n- pandas\n- matplotlib\n- dask\n- distributed\n- graphviz\n- python-graphviz\n- hypothesis\n- astroid\n- sh\n- pytest\n- pytest-cov\n- pytest-timeout\n- python-kubernetes\n- urllib3\n- boto3\n- awscli\n- loky>=3.5.1\n- pyarrow\n- cloudpickle\n"
  },
  {
    "path": "ops/conda_env/minimal.yml",
    "content": "name: minimal\nchannels:\n- conda-forge\ndependencies:\n- python=3.11\n- awscli\n"
  },
  {
    "path": "ops/conda_env/python_lint.yml",
    "content": "name: python_lint\nchannels:\n- conda-forge\ndependencies:\n- python=3.10\n- pylint\n- wheel\n- setuptools\n- mypy\n- numpy\n- scipy\n- pandas\n- pyarrow\n- scikit-learn\n- dask\n- distributed\n- black\n- isort\n- cloudpickle\n- pytest\n- hypothesis\n- hatchling\n- pyspark>=3.4.0\n"
  },
  {
    "path": "ops/conda_env/sdist_test.yml",
    "content": "# conda environment for source distribution test.\nname: sdist_test\nchannels:\n- conda-forge\ndependencies:\n- python=3.10\n- pip\n- wheel\n- cmake\n- ninja\n- python-build\n"
  },
  {
    "path": "ops/conda_env/win64_test.yml",
    "content": "name: win64_env\nchannels:\n- conda-forge\ndependencies:\n- python=3.10\n- numpy\n- scipy\n- matplotlib\n- scikit-learn\n- pandas\n- pytest\n- boto3\n- hypothesis\n- cupy>=13.2,<14\n- python-graphviz\n- pip\n- py-ubjson\n- loky>=3.5.1\n- pyarrow\n"
  },
  {
    "path": "ops/docker_run.py",
    "content": "\"\"\"\nWrapper script to run a command inside a Docker container\n\"\"\"\n\nimport argparse\nimport grp\nimport itertools\nimport os\nimport pathlib\nimport pwd\nimport subprocess\nimport sys\nimport textwrap\n\nOPS_DIR = pathlib.Path(__file__).expanduser().resolve().parent\nPROJECT_ROOT_DIR = OPS_DIR.parent\nLINEWIDTH = 88\nTEXT_WRAPPER = textwrap.TextWrapper(\n    width=LINEWIDTH,\n    initial_indent=\"\",\n    subsequent_indent=\"    \",\n    break_long_words=False,\n    break_on_hyphens=False,\n)\n\n\ndef parse_run_args(*, raw_run_args: str) -> list[str]:\n    return [x for x in raw_run_args.split() if x]\n\n\ndef get_user_ids() -> dict[str, str]:\n    uid = os.getuid()\n    gid = os.getgid()\n    return {\n        \"CI_BUILD_UID\": str(uid),\n        \"CI_BUILD_USER\": pwd.getpwuid(uid).pw_name,\n        \"CI_BUILD_GID\": str(gid),\n        \"CI_BUILD_GROUP\": grp.getgrgid(gid).gr_name,\n    }\n\n\ndef fancy_print_cli_args(*, cli_args: list[str]) -> None:\n    print(\n        \"=\" * LINEWIDTH\n        + \"\\n\"\n        + \"  \\\\\\n\".join(TEXT_WRAPPER.wrap(\" \".join(cli_args)))\n        + \"\\n\"\n        + \"=\" * LINEWIDTH\n        + \"\\n\",\n        flush=True,\n    )\n\n\ndef docker_run(\n    *,\n    image_uri: str,\n    command_args: list[str],\n    use_gpus: bool,\n    workdir: pathlib.Path,\n    user_ids: dict[str, str],\n    extra_args: list[str],\n) -> None:\n    # Command-line arguments to be passed to `docker run`\n    docker_run_cli_args = [\"--rm\", \"--pid=host\"]\n\n    if use_gpus:\n        docker_run_cli_args.extend([\"--gpus\", \"all\"])\n\n    docker_run_cli_args.extend([\"-v\", f\"{workdir}:/workspace\", \"-w\", \"/workspace\"])\n    docker_run_cli_args.extend(\n        itertools.chain.from_iterable([[\"-e\", f\"{k}={v}\"] for k, v in user_ids.items()])\n    )\n    docker_run_cli_args.extend(extra_args)\n    docker_run_cli_args.append(image_uri)\n    docker_run_cli_args.extend(command_args)\n\n    cli_args = [\"docker\", \"run\"] + docker_run_cli_args\n    fancy_print_cli_args(cli_args=cli_args)\n    subprocess.run(cli_args, check=True, encoding=\"utf-8\")\n\n\ndef main(*, args: argparse.Namespace) -> None:\n    run_args = parse_run_args(raw_run_args=args.run_args)\n    user_ids = get_user_ids()\n\n    if args.use_gpus:\n        print(\"Using NVIDIA GPUs for `docker run`\")\n    if args.interactive:\n        print(\"Using interactive mode for `docker run`\")\n        run_args.append(\"-it\")\n\n    docker_run(\n        image_uri=args.image_uri,\n        command_args=args.command_args,\n        use_gpus=args.use_gpus,\n        workdir=args.workdir,\n        user_ids=user_ids,\n        extra_args=run_args,\n    )\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(\n        usage=(\n            f\"{sys.argv[0]} --image-uri IMAGE_URI [--use-gpus] [--interactive] \"\n            \"[--workdir WORKDIR] [--run-args RUN_ARGS] -- COMMAND_ARG \"\n            \"[COMMAND_ARG ...]\"\n        ),\n        description=\"Run tasks inside a Docker container\",\n    )\n    parser.add_argument(\n        \"--image-uri\",\n        type=str,\n        required=True,\n        help=(\n            \"Fully qualified image URI to identify the container, e.g. \"\n            \"492475357299.dkr.ecr.us-west-2.amazonaws.com/xgb-ci.gpu:main\"\n        ),\n    )\n    parser.add_argument(\n        \"--use-gpus\",\n        action=\"store_true\",\n        help=(\n            \"Grant the container access to NVIDIA GPUs; requires the NVIDIA \"\n            \"Container Toolkit.\"\n        ),\n    )\n    parser.add_argument(\n        \"--interactive\",\n        action=\"store_true\",\n        help=(\n            \"Run the container in the interactive mode; requires an interactive shell \"\n            \"(TTY). With this flag, you can use Ctrl-C to interrupt an long-running \"\n            \"command.\"\n        ),\n    )\n    parser.add_argument(\n        \"--workdir\",\n        type=lambda p: pathlib.Path(p).expanduser().resolve(),\n        default=PROJECT_ROOT_DIR,\n        help=\"Path to working directory; if unset, use the project's root\",\n    )\n    parser.add_argument(\n        \"--run-args\",\n        type=str,\n        default=\"\",\n        help=(\n            \"Argument(s) to be passed to `docker run`. When passing multiple \"\n            \"arguments, use single quotes to wrap them. Example: \"\n            \"--run-args '--cap-add SYS_PTRACE --shm-size=4g'\"\n        ),\n    )\n    parser.add_argument(\n        \"command_args\",\n        metavar=\"COMMAND_ARG\",\n        type=str,\n        nargs=\"+\",\n        help=(\n            \"Argument(s) for the command to execute. NOTE. Make sure to specify \"\n            \"double-dash (--) to clearly distinguish between the command and the \"\n            \"preceding parameters. Example: --run-args '--cap-add SYS_PTRACE \"\n            \"--shm-size=4g' -- ./myprog\"\n        ),\n    )\n\n    if len(sys.argv) == 1:\n        parser.print_help()\n        sys.exit(1)\n\n    parsed_args = parser.parse_args()\n    main(args=parsed_args)\n"
  },
  {
    "path": "ops/pipeline/audit-cuda-wheel.sh",
    "content": "#!/bin/bash\n## Audit XGBoost CUDA wheel for manylinux compliance\n## This script runs inside the manylinux container (via GitHub Actions container support)\n\nset -euo pipefail\n\nif [[ -z \"${GITHUB_SHA:-}\" ]]\nthen\n  echo \"Make sure to set environment variable GITHUB_SHA\"\n  exit 1\nfi\n\nif [[ \"$#\" -lt 1 ]]\nthen\n  echo \"Usage: $0 {x86_64,aarch64} [--cuda-variant {cuda,cuda13}]\"\n  exit 2\nfi\narch=\"$1\"\ncuda_variant=\"cuda\"  # default\n\n# Parse optional arguments\nshift\nwhile [[ $# -gt 0 ]]; do\n  case $1 in\n    --cuda-variant)\n      cuda_variant=\"$2\"\n      shift 2\n      ;;\n    *)\n      echo \"Unknown option: $1\"\n      exit 2\n      ;;\n  esac\ndone\n\nsource ops/pipeline/classify-git-branch.sh\n\nWHEEL_TAG=manylinux_2_28_${arch}\n\nset -x\n\necho \"--- Audit binary wheel to ensure it's compliant with ${WHEEL_TAG} standard\"\nauditwheel repair --only-plat --plat ${WHEEL_TAG} python-package/dist/*.whl\npython3 -m wheel tags --python-tag py3 --abi-tag none --platform ${WHEEL_TAG} --remove \\\n  wheelhouse/*.whl\nmv -v wheelhouse/*.whl python-package/dist/\n\nif ! unzip -l ./python-package/dist/*.whl | grep libgomp > /dev/null; then\n  echo \"error: libgomp.so was not vendored in the wheel\"\n  exit -1\nfi\n\n# Check size of wheel\npydistcheck --config python-package/pyproject.toml python-package/dist/*.whl\n\n# Generate meta.json only for the main CUDA variant (not cuda13)\nif [[ $cuda_variant == \"cuda\" && $arch == \"x86_64\" ]]\nthen\n  # Generate the meta info which includes xgboost version and the commit info\n  # TODO(hcho3): Generate meta.json that contains both x86_64 and aarch64 wheels\n  echo \"--- Generate meta info\"\n  python3 ops/script/format_wheel_meta.py \\\n    --wheel-path python-package/dist/*.whl  \\\n    --commit-hash ${GITHUB_SHA}  \\\n    --platform-tag ${WHEEL_TAG}  \\\n    --meta-path python-package/dist/\nfi\n\necho \"--- Upload Python wheel\"\nif [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]\nthen\n  python3 ops/pipeline/manage-artifacts.py upload \\\n    --s3-bucket xgboost-nightly-builds \\\n    --prefix ${BRANCH_NAME}/${GITHUB_SHA} --make-public \\\n    python-package/dist/*.whl\n\n  if [[ $cuda_variant == \"cuda\" && $arch == \"x86_64\" ]]\n  then\n    python3 ops/pipeline/manage-artifacts.py upload \\\n      --s3-bucket xgboost-nightly-builds \\\n      --prefix ${BRANCH_NAME} --make-public \\\n      python-package/dist/meta.json\n  fi\nfi\n"
  },
  {
    "path": "ops/pipeline/build-cpu.sh",
    "content": "#!/bin/bash\n## Build and test XGBoost with CPU\n\nset -euox pipefail\n\nif [[ \"$#\" -lt 1 ]]\nthen\n  echo \"Usage: $0 {cpu,cpu-nonomp,cpu-sanitizer,i386}\"\n  exit 1\nfi\nsuite=\"$1\"\n\nmkdir -p build\npushd build\n\ncase \"${suite}\" in\n  cpu)\n    echo \"--- Build libxgboost from the source\"\n    cmake .. \\\n      -GNinja \\\n      -DHIDE_CXX_SYMBOLS=ON \\\n      -DGOOGLE_TEST=ON \\\n      -DUSE_DMLC_GTEST=ON \\\n      -DENABLE_ALL_WARNINGS=ON \\\n      -DCMAKE_C_COMPILER_LAUNCHER=sccache \\\n      -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \\\n      -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF \\\n      -DCMAKE_PREFIX_PATH='/opt/grpc' \\\n      -DPLUGIN_FEDERATED=ON\n    time ninja -v\n    echo \"--- Run Google Test\"\n    ctest --extra-verbose\n    ;;\n  cpu-nonomp)\n    echo \"--- Build and test XGBoost with OpenMP disabled\"\n    cmake .. \\\n      -GNinja \\\n      -DUSE_OPENMP=OFF \\\n      -DHIDE_CXX_SYMBOLS=ON \\\n      -DGOOGLE_TEST=ON \\\n      -DENABLE_ALL_WARNINGS=ON \\\n      -DCMAKE_C_COMPILER_LAUNCHER=sccache \\\n      -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \\\n      -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF\n    time ninja -v\n    ctest --extra-verbose\n    ;;\n  cpu-sanitizer)\n    echo \"--- Build and test XGBoost with sanitizer\"\n    cmake .. \\\n      -GNinja \\\n      -DHIDE_CXX_SYMBOLS=ON \\\n      -DGOOGLE_TEST=ON \\\n      -DUSE_DMLC_GTEST=ON \\\n      -DENABLE_ALL_WARNINGS=ON \\\n      -DCMAKE_C_COMPILER_LAUNCHER=sccache \\\n      -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \\\n      -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF \\\n      -DUSE_SANITIZER=ON \\\n      -DENABLED_SANITIZERS=\"address;leak;undefined\" \\\n      -DCMAKE_BUILD_TYPE=Debug \\\n      -DSANITIZER_PATH=/usr/lib/x86_64-linux-gnu/\n    time ninja -v\n    ./testxgboost --gtest_filter=-*DeathTest*\n    ;;\n  i386)\n    echo \"--- Build and test XGBoost for i386 (32-bit)\"\n    export CXXFLAGS='-Wno-error=overloaded-virtual -Wno-error=maybe-uninitialized -Wno-error=redundant-move -Wno-narrowing'\n    cmake .. \\\n      -GNinja \\\n      -DGOOGLE_TEST=ON \\\n      -DUSE_DMLC_GTEST=ON \\\n      -DENABLE_ALL_WARNINGS=ON \\\n      -DCMAKE_COMPILE_WARNING_AS_ERROR=ON\n    time ninja -v\n    # TODO(hcho3): Run gtest for i386\n    # ./testxgboost\n    ;;\n  *)\n    echo \"Unrecognized argument: $suite\"\n    exit 1\n    ;;\nesac\n\npopd\n"
  },
  {
    "path": "ops/pipeline/build-cuda-impl.sh",
    "content": "#!/bin/bash\n## Build XGBoost with CUDA\n## Companion script for ops/pipeline/build-cuda.sh\n\nset -euox pipefail\n\nif [[ \"${BUILD_ONLY_SM75:-}\" == 1 ]]\nthen\n  cmake_args='-DGPU_COMPUTE_VER=75'\nelse\n  cmake_args=''\nfi\n\nif [[ \"${USE_FEDERATED:-0}\" == 1 ]]\nthen\n  cmake_args=\"${cmake_args} -DPLUGIN_FEDERATED=ON\"\nelse\n  cmake_args=\"${cmake_args} -DPLUGIN_FEDERATED=OFF\"\nfi\n\nif [[ \"${USE_RMM:-0}\" == 1 ]]\nthen\n  cmake_prefix_path='/opt/grpc;/opt/rmm;/opt/rmm/lib64/rapids/cmake'\n  cmake_args=\"${cmake_args} -DPLUGIN_RMM=ON\"\nelse\n  cmake_prefix_path='/opt/grpc'\nfi\n\n# Disable CMAKE_COMPILE_WARNING_AS_ERROR option temporarily until\n# https://github.com/dmlc/xgboost/issues/10400 is fixed\necho \"--- Build libxgboost from the source\"\nmkdir -p build\npushd build\ncmake .. \\\n  -GNinja \\\n  -DCMAKE_PREFIX_PATH=\"${cmake_prefix_path}\" \\\n  -DCMAKE_C_COMPILER_LAUNCHER=sccache \\\n  -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \\\n  -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache \\\n  -DUSE_CUDA=ON \\\n  -DUSE_OPENMP=ON \\\n  -DHIDE_CXX_SYMBOLS=ON \\\n  -DUSE_NCCL=ON \\\n  -DUSE_NCCL_LIB_PATH=ON \\\n  -DNCCL_INCLUDE_DIR=/usr/include \\\n  -DUSE_DLOPEN_NCCL=ON \\\n  -DGOOGLE_TEST=ON \\\n  -DUSE_DMLC_GTEST=ON \\\n  -DENABLE_ALL_WARNINGS=ON \\\n  -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF \\\n  ${cmake_args}\ntime ninja -v\npopd\n\necho \"--- Build binary wheel\"\npushd python-package\nrm -rfv dist/*\npip wheel --no-deps -v . --wheel-dir dist/\npopd\n"
  },
  {
    "path": "ops/pipeline/build-cuda.sh",
    "content": "#!/bin/bash\n## Build XGBoost with CUDA\n## This script runs inside a container (via GitHub Actions container support)\n##\n## Usage:\n##   build-cuda.sh --cuda-version <12|13> --use-rmm <0|1> --use-federated <0|1>\n##\n## All parameters are required (no defaults).\n##\n## Examples:\n##   # CUDA 12 standard build\n##   build-cuda.sh --cuda-version 12 --use-rmm 0 --use-federated 1\n##\n##   # CUDA 12 with RMM\n##   build-cuda.sh --cuda-version 12 --use-rmm 1 --use-federated 1\n##\n##   # CUDA 13\n##   build-cuda.sh --cuda-version 13 --use-rmm 0 --use-federated 0\n##\n##   # Variant wheels (CUDA 12 without federated)\n##   build-cuda.sh --cuda-version 12 --use-rmm 0 --use-federated 0\n\nset -euo pipefail\n\n# All parameters are required - no defaults\ncuda_version=\"\"\nuse_rmm=\"\"\nuse_federated=\"\"\n\n# Parse arguments\nwhile [[ $# -gt 0 ]]; do\n  case \"$1\" in\n    --cuda-version)\n      cuda_version=\"$2\"\n      shift 2\n      ;;\n    --use-rmm)\n      use_rmm=\"$2\"\n      shift 2\n      ;;\n    --use-federated)\n      use_federated=\"$2\"\n      shift 2\n      ;;\n    *)\n      echo \"Unrecognized argument: $1\"\n      echo \"Usage: $0 --cuda-version <12|13> --use-rmm <0|1> --use-federated <0|1>\"\n      exit 1\n      ;;\n  esac\ndone\n\n# Validate all required parameters are provided\nif [[ -z \"${cuda_version}\" ]]; then\n  echo \"Error: --cuda-version is required (12 or 13)\"\n  exit 1\nfi\n\nif [[ -z \"${use_rmm}\" ]]; then\n  echo \"Error: --use-rmm is required (0 or 1)\"\n  exit 1\nfi\n\nif [[ -z \"${use_federated}\" ]]; then\n  echo \"Error: --use-federated is required (0 or 1)\"\n  exit 1\nfi\n\n# Validate parameter values\ncase \"${cuda_version}\" in\n  12|13)\n    ;;\n  *)\n    echo \"Error: --cuda-version must be 12 or 13, got '${cuda_version}'\"\n    exit 1\n    ;;\nesac\n\ncase \"${use_rmm}\" in\n  0|1)\n    ;;\n  *)\n    echo \"Error: --use-rmm must be 0 or 1, got '${use_rmm}'\"\n    exit 1\n    ;;\nesac\n\ncase \"${use_federated}\" in\n  0|1)\n    ;;\n  *)\n    echo \"Error: --use-federated must be 0 or 1, got '${use_federated}'\"\n    exit 1\n    ;;\nesac\n\n# Validate CUDA 13 constraints\nif [[ \"${cuda_version}\" == \"13\" ]]; then\n  if [[ \"${use_rmm}\" == \"1\" ]]; then\n    echo \"Error: RMM is not supported for CUDA 13 (--use-rmm must be 0)\"\n    exit 1\n  fi\n  if [[ \"${use_federated}\" == \"1\" ]]; then\n    echo \"Error: Federated plugin is not supported for CUDA 13 (--use-federated must be 0)\"\n    exit 1\n  fi\nfi\n\n# Export validated values\nexport USE_RMM=\"${use_rmm}\"\nexport USE_FEDERATED=\"${use_federated}\"\n\nsource ops/pipeline/classify-git-branch.sh\n\necho \"--- Build with CUDA ${cuda_version}\"\n\nif [[ ($is_pull_request == 1) || ($is_release_branch == 0) ]]; then\n  export BUILD_ONLY_SM75=1\nelse\n  export BUILD_ONLY_SM75=0\nfi\n\nset -x\n\n# Configure PyPI variant for CUDA 13\nif [[ \"${cuda_version}\" == \"13\" ]]; then\n  python3 ops/script/pypi_variants.py --use-suffix=cu13 --require-nccl-dep=cu13\nfi\n\n# Run the build implementation directly (we're already inside the container)\nops/pipeline/build-cuda-impl.sh\n"
  },
  {
    "path": "ops/pipeline/build-gpu-rpkg.sh",
    "content": "#!/bin/bash\n## Build XGBoost R package with GPU support and package it in a tarball.\n## Users will be able to install it without having CTK installed\n## (only a compatible NVIDIA driver is needed).\n## This script runs inside the container (via GitHub Actions container support).\n\nset -euox pipefail\n\npython3 ops/script/test_r_package.py --task=pack\nmv xgboost/ xgboost_rpack/\n\nmkdir build\ncd build\ncmake .. -GNinja \\\n  -DUSE_CUDA=ON \\\n  -DR_LIB=ON \\\n  -DCMAKE_C_COMPILER_LAUNCHER=sccache \\\n  -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \\\n  -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache\nninja -v\ncd ..\n\n# This super wacky hack is found in cmake/RPackageInstall.cmake.in and\n# cmake/RPackageInstallTargetSetup.cmake. This hack lets us bypass the normal build process of R\n# and have R use xgboost.so that we've already built.\nrm -v xgboost_rpack/configure\nrm -rfv xgboost_rpack/src\nmkdir -p xgboost_rpack/src\ncp -v lib/xgboost.so xgboost_rpack/src/\necho 'all:' > xgboost_rpack/src/Makefile\necho 'all:' > xgboost_rpack/src/Makefile.win\nmv xgboost_rpack/ xgboost/\ntar cvzf xgboost_r_gpu_linux.tar.gz xgboost/\n"
  },
  {
    "path": "ops/pipeline/build-jvm-doc.sh",
    "content": "#!/bin/bash\n## Build docs for the JVM packages and package it in a tarball.\n## Note: this script assumes that the user has already built libxgboost4j.so\n## and placed it in the lib/ directory.\n\nset -euo pipefail\n\nif [[ -z ${BRANCH_NAME:-} ]]; then\n  echo \"Make sure to define environment variable BRANCH_NAME.\"\n  exit 1\nfi\n\nif [[ ! -f lib/libxgboost4j.so ]]; then\n  echo \"Must place libxgboost4j.so in lib/ first\"\n  exit 2\nfi\n\necho \"--- Build JVM packages doc\"\nset -x\n\n# Copy in libxgboost4j.so\nmkdir -p jvm-packages/xgboost4j/src/main/resources/lib/linux/x86_64/\ncp -v lib/libxgboost4j.so jvm-packages/xgboost4j/src/main/resources/lib/linux/x86_64/\n\ncd jvm-packages/\n# Install JVM packages in local Maven repository\nmvn --no-transfer-progress install -Pdocs\n# Build Scaladocs\nmvn --no-transfer-progress scala:doc -Pdocs\n# Build Javadocs\nmvn --no-transfer-progress javadoc:javadoc -Pdocs\n\n# Package JVM docs in a tarball\nmkdir -p tmp/scaladocs\ncp -rv xgboost4j/target/reports/apidocs/ ./tmp/javadocs/\ncp -rv xgboost4j/target/site/scaladocs/ ./tmp/scaladocs/xgboost4j/\ncp -rv xgboost4j-spark/target/site/scaladocs/ ./tmp/scaladocs/xgboost4j-spark/\ncp -rv xgboost4j-spark-gpu/target/site/scaladocs/ ./tmp/scaladocs/xgboost4j-spark-gpu/\ncp -rv xgboost4j-flink/target/site/scaladocs/ ./tmp/scaladocs/xgboost4j-flink/\n\ncd tmp\ntar cvjf ${BRANCH_NAME}.tar.bz2 javadocs/ scaladocs/\nmv ${BRANCH_NAME}.tar.bz2 ..\ncd ..\nrm -rfv tmp/\n"
  },
  {
    "path": "ops/pipeline/build-jvm-gpu.sh",
    "content": "#!/bin/bash\n## Build libxgboost4j.so with CUDA\n## This script runs inside the container (via GitHub Actions container support).\n\nset -euox pipefail\n\nsource ops/pipeline/classify-git-branch.sh\n\necho \"--- Build libxgboost4j.so with CUDA\"\n\nif [[ ($is_pull_request == 1) || ($is_release_branch == 0) ]]; then\n  arch_flag=\"-DGPU_COMPUTE_VER=75\"\nelse\n  arch_flag=\"\"\nfi\n\nmkdir -p build-gpu\ncd build-gpu\n\ncmake .. -GNinja \\\n  -DUSE_CUDA=ON \\\n  -DUSE_NCCL=ON \\\n  -DJVM_BINDINGS=ON \\\n  -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \\\n  -DCMAKE_C_COMPILER_LAUNCHER=sccache \\\n  -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \\\n  -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache \\\n  ${arch_flag}\n\ntime ninja -v\n"
  },
  {
    "path": "ops/pipeline/build-jvm-macos.sh",
    "content": "#!/bin/bash\n## Build libxgboost4j.dylib for MacOS (Apple Silicon or Intel)\n\nset -euox pipefail\n\n# Display system info\necho \"--- Display system information\"\nsystem_profiler SPSoftwareDataType\nsysctl -n machdep.cpu.brand_string\nuname -m\n\nbrew install ninja libomp\n\n# Build XGBoost4J binary\necho \"--- Build libxgboost4j.dylib\"\nmkdir build\npushd build\nexport JAVA_HOME=$(/usr/libexec/java_home)\ncmake .. -GNinja \\\n  -DJVM_BINDINGS=ON \\\n  -DUSE_OPENMP=ON \\\n  -DCMAKE_OSX_DEPLOYMENT_TARGET=10.15 \\\n  -DCMAKE_C_COMPILER_LAUNCHER=sccache \\\n  -DCMAKE_CXX_COMPILER_LAUNCHER=sccache\nninja -v\npopd\n\nrm -rf build\notool -L lib/libxgboost.dylib\n"
  },
  {
    "path": "ops/pipeline/build-jvm-manylinux2014.sh",
    "content": "#!/bin/bash\n## Build libxgboost4j.so targeting glibc 2.17 systems\n\nset -euo pipefail\n\nif [[ $# -ne 1 ]]\nthen\n  echo \"Usage: $0 {x86_64,aarch64}\"\n  exit 1\nfi\n\narch=$1\nimage_repo=\"xgb-ci.manylinux2014_${arch}\"\n\nsource ops/pipeline/classify-git-branch.sh\nsource ops/pipeline/get-docker-registry-details.sh\nsource ops/pipeline/get-image-tag.sh\n\nIMAGE_URI=\"${DOCKER_REGISTRY_URL}/${image_repo}:${IMAGE_TAG}\"\n\n# Build XGBoost4J binary\necho \"--- Build libxgboost4j.so (targeting glibc 2.17)\"\nset -x\nmkdir build\npython3 ops/docker_run.py \\\n  --image-uri \"${IMAGE_URI}\" \\\n  -- bash -c \\\n  \"cd build && cmake .. -DJVM_BINDINGS=ON -DUSE_OPENMP=ON && make -j$(nproc)\"\nldd lib/libxgboost4j.so\nobjdump -T lib/libxgboost4j.so | grep GLIBC_ | sed 's/.*GLIBC_\\([.0-9]*\\).*/\\1/g' | sort -Vu\n\nif [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]\nthen\n  libname=lib/libxgboost4j_linux_${arch}.so\n  mv -v lib/libxgboost4j.so ${libname}\n  python3 ops/pipeline/manage-artifacts.py upload \\\n    --s3-bucket xgboost-nightly-builds \\\n    --prefix ${BRANCH_NAME}/${GITHUB_SHA} --make-public \\\n    ${libname}\nfi\n"
  },
  {
    "path": "ops/pipeline/build-python-wheels-arm64-impl.sh",
    "content": "#!/bin/bash\n## Build and test XGBoost with ARM64 CPU\n## Companion script for ops/pipeline/build-cpu-arm64.sh\n\nset -euox pipefail\n\nsource activate aarch64_test\n\necho \"--- Build libxgboost from the source\"\nmkdir -p build\npushd build\n\ncmake .. \\\n  -GNinja \\\n  -DCMAKE_PREFIX_PATH=\"${CONDA_PREFIX}\" \\\n  -DUSE_OPENMP=ON \\\n  -DHIDE_CXX_SYMBOLS=ON \\\n  -DGOOGLE_TEST=ON \\\n  -DUSE_DMLC_GTEST=ON \\\n  -DENABLE_ALL_WARNINGS=ON \\\n  -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF\ntime ninja -v\n\necho \"--- Run Google Test\"\nctest --extra-verbose\npopd\n\necho \"--- Build binary wheel\"\npushd python-package\nrm -rfv dist/*\npip wheel --no-deps -v . --wheel-dir dist/\npopd\n"
  },
  {
    "path": "ops/pipeline/build-python-wheels-cpu.sh",
    "content": "#!/bin/bash\n# Build Python wheels, CPU variant (no federated learning)\n\nset -euo pipefail\n\nif [[ -z \"${GITHUB_SHA:-}\" ]]\nthen\n  echo \"Make sure to set environment variable GITHUB_SHA\"\n  exit 1\nfi\n\nif [[ \"$#\" -lt 2 ]]\nthen\n  echo \"Usage: $0 {manylinux2014,manylinux_2_28} {x86_64,aarch64}\"\n  exit 1\nfi\n\nmanylinux_target=\"$1\"\narch=\"$2\"\n\nsource ops/pipeline/classify-git-branch.sh\nsource ops/pipeline/get-docker-registry-details.sh\nsource ops/pipeline/get-image-tag.sh\n\nWHEEL_TAG=\"${manylinux_target}_${arch}\"\nIMAGE_REPO=\"xgb-ci.${WHEEL_TAG}\"\nIMAGE_URI=\"${DOCKER_REGISTRY_URL}/${IMAGE_REPO}:${IMAGE_TAG}\"\nPYTHON_BIN=\"/opt/python/cp310-cp310/bin/python\"\n\necho \"--- Build binary wheel for ${WHEEL_TAG} (CPU only)\"\nset -x\n\n# Patch to rename pkg to xgboost-cpu\npython3 ops/script/pypi_variants.py --use-suffix=cpu --require-nccl-dep=na\npython3 ops/docker_run.py \\\n  --image-uri \"${IMAGE_URI}\" \\\n  -- bash -c \\\n  \"cd python-package && ${PYTHON_BIN} -m pip wheel --no-deps -v . --wheel-dir dist/\"\n\npython3 ops/docker_run.py \\\n  --image-uri \"${IMAGE_URI}\" \\\n  -- auditwheel repair --only-plat \\\n  --plat ${WHEEL_TAG} python-package/dist/xgboost_cpu-*.whl\npython3 -m wheel tags --python-tag py3 --abi-tag none --platform ${WHEEL_TAG} --remove \\\n  wheelhouse/xgboost_cpu-*.whl\nrm -v python-package/dist/xgboost_cpu-*.whl\nmv -v wheelhouse/xgboost_cpu-*.whl python-package/dist/\n\nif ! unzip -l ./python-package/dist/*.whl | grep libgomp > /dev/null; then\n  echo \"error: libgomp.so was not vendored in the wheel\"\n  exit -1\nfi\n\n# Check size of wheel\npydistcheck --config python-package/pyproject.toml python-package/dist/*.whl\n\nif [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]\nthen\n  python3 ops/pipeline/manage-artifacts.py upload \\\n    --s3-bucket xgboost-nightly-builds \\\n    --prefix ${BRANCH_NAME}/${GITHUB_SHA} --make-public \\\n    python-package/dist/*.whl\nfi\n"
  },
  {
    "path": "ops/pipeline/build-python-wheels-macos.sh",
    "content": "#!/bin/bash\n# Build Python wheels targeting MacOS (no federated learning)\n\nset -euox pipefail\n\nif [[ $# -ne 2 ]]; then\n  echo \"Usage: $0 [platform_id] [commit ID]\"\n  exit 1\nfi\n\nplatform_id=$1\ncommit_id=$2\n\nif [[ \"$platform_id\" == macosx_* ]]; then\n    if [[ \"$platform_id\" == macosx_arm64 ]]; then\n        # MacOS, Apple Silicon\n        cpython_ver=310\n        cibw_archs=arm64\n        export MACOSX_DEPLOYMENT_TARGET=12.0\n    elif [[ \"$platform_id\" == macosx_x86_64 ]]; then\n        # MacOS, Intel\n        cpython_ver=310\n        cibw_archs=x86_64\n        export MACOSX_DEPLOYMENT_TARGET=10.15\n    else\n        echo \"Platform not supported: $platform_id\"\n        exit 3\n    fi\n    # Set up environment variables to configure cibuildwheel\n    export CIBW_BUILD=cp${cpython_ver}-${platform_id}\n    export CIBW_ARCHS=${cibw_archs}\n    export CIBW_TEST_SKIP='*-macosx_arm64'\n    export CIBW_BUILD_VERBOSITY=3\nelse\n    echo \"Platform not supported: $platform_id\"\n    exit 2\nfi\n\n# Tell delocate-wheel to not vendor libomp.dylib into the wheel\nexport CIBW_REPAIR_WHEEL_COMMAND_MACOS=\"delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel} --exclude libomp.dylib\"\n\nbrew unlink llvm@18 || true\n\npython -m pip install cibuildwheel\npython -m cibuildwheel python-package --output-dir wheelhouse\n"
  },
  {
    "path": "ops/pipeline/build-r-docs.sh",
    "content": "#!/bin/bash\n## Build docs for the R package and package it in a tarball.\n\nset -euo pipefail\n\nif [[ -z ${BRANCH_NAME:-} ]]; then\n  echo \"Make sure to define environment variable BRANCH_NAME.\"\n  exit 1\nfi\n\nif [[ -z \"${R_LIBS_USER:-}\" ]]; then\n  export R_LIBS_USER=/tmp/rtmpdir\nfi\n\necho \"--- Build R package doc\"\necho \"R_LIBS_USER: ${R_LIBS_USER}\"\nset -x\n\nif [[ ! -d ${R_LIBS_USER} ]]; then\n  echo \"Make ${R_LIBS_USER} for installing temporary R packages.\"\n  mkdir ${R_LIBS_USER}\nfi\n\n# Used only in container environment\nif command -v gosu 2>&1 >/dev/null; then\n  gosu root chown -R $UID:$GROUPS ${R_LIBS_USER}\nfi\n\ncd R-package\n\nMAKEFLAGS=-j$(nproc) Rscript ./tests/helper_scripts/install_deps.R\n# Some examples are failing\nMAKEFLAGS=-j$(nproc) Rscript -e \"pkgdown::build_site(examples=FALSE)\"\n# Install the package for vignettes\nMAKEFLAGS=-j$(nproc) R CMD INSTALL .\n\ncd -\n\ncd doc/R-package\nmake -j$(nproc) all\n\ncd ../../  # back to project root\n\ntar cvjf r-docs-${BRANCH_NAME}.tar.bz2 R-package/docs doc/R-package/xgboost_introduction.md doc/R-package/xgboostfromJSON.md\n"
  },
  {
    "path": "ops/pipeline/build-test-jvm-packages.sh",
    "content": "#!/bin/bash\n## Build and test JVM packages.\n## This script runs inside the container (via GitHub Actions container support).\n##\n## Note. This script takes in all inputs via environment variables.\n\nINPUT_DOC=$(\ncat <<-EOF\nInputs\n  - SCALA_VERSION:     Scala version, either 2.12 or 2.13 (Required)\n  - USE_CUDA:          Set to 1 to enable CUDA\n  - SKIP_NATIVE_BUILD: Set to 1 to have the JVM packages use an externally provided\n                       libxgboost4j.so. (Usually Maven will invoke create_jni.py to\n                       build it from scratch.) When using this option, make sure to\n                       place libxgboost4j.so in lib/ directory.\nEOF\n)\n\nset -euo pipefail\n\nfor arg in \"SCALA_VERSION\"\ndo\n  if [[ -z \"${!arg:-}\" ]]\n  then\n    echo -e \"Error: $arg must be set.\\n${INPUT_DOC}\"\n    exit 1\n  fi\ndone\n\nset -x\n\n# Set Scala version\nif [[ \"${SCALA_VERSION}\" == \"2.12\" || \"${SCALA_VERSION}\" == \"2.13\" ]]\nthen\n  python ops/script/change_scala_version.py --scala-version ${SCALA_VERSION} --purge-artifacts\nelse\n  echo \"Error: SCALA_VERSION must be either 2.12 or 2.13\"\n  exit 2\nfi\n\n# If SKIP_NATIVE_BUILD is set, copy in libxgboost4j.so from lib/\n# Also copy in other files needed for testing. (Usually create_jni.py would perform this\n# step, but we need to do it manually here.)\nif [[ \"${SKIP_NATIVE_BUILD:-}\" == \"1\" ]]\nthen\n  bash ops/script/inject_jvm_lib.sh\nfi\n\ncd jvm-packages/\n\n# Ensure that XGBoost4J-Spark is compatible with multiple versions of Spark\nif [[ \"${USE_CUDA:-}\" != \"1\" && \"${SCALA_VERSION}\" == \"2.12\" ]]\nthen\n  for spark_version in 3.1.3 3.2.4 3.3.4 3.4.3\n  do\n    mvn --no-transfer-progress clean package -Dspark.version=${spark_version} \\\n      -pl xgboost4j,xgboost4j-spark\n  done\nfi\n\nset +x\nmvn_options=\"\"\nif [[ \"${USE_CUDA:-}\" == \"1\" ]]\nthen\n  mvn_options=\"${mvn_options} -Pgpu\"\nfi\nif [[ \"${SKIP_NATIVE_BUILD:-}\" == \"1\" ]]\nthen\n  mvn_options=\"${mvn_options} -Dskip.native.build=true\"\nfi\nset -x\n\nmvn --no-transfer-progress clean install ${mvn_options}\n\n# Integration tests\nif [[ \"${USE_CUDA:-}\" != \"1\" ]]\nthen\n  mvn --no-transfer-progress test -pl xgboost4j-example\nfi\n"
  },
  {
    "path": "ops/pipeline/build-test-sycl.sh",
    "content": "#!/bin/bash\n## Build and test oneAPI\n\nset -euox pipefail\n\nif [[ \"$#\" -lt 1 ]]\nthen\n  echo \"Usage: $0 {gtest,pytest}\"\n  exit 1\nfi\n\nsuite=\"$1\"\n\nmkdir build\npushd build\ncmake .. -DGOOGLE_TEST=ON -DPLUGIN_SYCL=ON -DCMAKE_CXX_COMPILER=g++ \\\n  -DCMAKE_C_COMPILER=gcc -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX \\\n  -DCMAKE_PREFIX_PATH=$CONDA_PREFIX -GNinja\nninja\npopd\n\ncase \"$suite\" in\n  gtest)\n    ./build/testxgboost\n    ;;\n  pytest)\n    cd python-package\n    python --version\n    pip install -v .\n    cd ..\n    pytest -s -v -rxXs --durations=0 ./tests/python-sycl/\n    ;;\nesac\n"
  },
  {
    "path": "ops/pipeline/build-variant-wheels-impl.sh",
    "content": "#!/bin/bash\n## Build Python wheels using Wheel Variant prototype (WheelNext)\n## Companion script for ops/pipeline/build-variant-wheels.sh\n\nset -eo pipefail\n\nset -x\ngosu root chown -R $(id -u):$(id -g) /opt/miniforge/envs /opt/miniforge/pkgs/cache\ngosu root chown $(id -u):$(id -g) /opt/miniforge/pkgs\nset +x\n\nmamba create -y -n wheelnext python=3.13 python-build\n\nsource activate wheelnext\n\n# Cannot set -u before Conda env activation\nset -xu\n\npython -m pip install \"variantlib[cli] @ git+https://github.com/wheelnext/variantlib.git@main\"\npython -m pip install \"nvidia-variant-provider @ git+https://github.com/wheelnext/nvidia-variant-provider.git@master\"\nvariantlib make-variant --no-isolation -f python-package/dist/xgboost-*.whl \\\n  -p \"nvidia :: cuda_version_lower_bound :: 12.0\" \\\n  -p \"nvidia :: cuda_version_upper_bound :: 13\" \\\n  -o . --pyproject-toml python-package/pyproject.toml\n"
  },
  {
    "path": "ops/pipeline/build-variant-wheels.sh",
    "content": "#!/bin/bash\n## Build Python wheels using Wheel Variant prototype (WheelNext)\n## This script runs inside a container (via GitHub Actions container support)\n##\n## Uses CUDA 12 build settings with RMM and federated disabled.\n## Does not modify the package name (keeps default 'xgboost').\n\nset -euo pipefail\n\n# Use the unified build script with explicit parameters:\n# - CUDA 12\n# - RMM disabled\n# - Federated disabled (for variant wheel compatibility)\nexec bash ops/pipeline/build-cuda.sh \\\n  --cuda-version 12 \\\n  --use-rmm 0 \\\n  --use-federated 0\n"
  },
  {
    "path": "ops/pipeline/build-win64.ps1",
    "content": "## Build XGBoost on Windows (CPU or GPU)\n##\n## Usage:\n##   build-win64.ps1 -variant <cpu|gpu>\n##\n## Parameters:\n##   -variant cpu  - Build CPU-only version (creates xgboost-cpu wheel)\n##   -variant gpu  - Build with CUDA support (creates default xgboost wheel, includes gtest)\n##\n## Examples:\n##   # Build CPU wheel (xgboost-cpu)\n##   ops/pipeline/build-win64.ps1 -variant cpu\n##\n##   # Build GPU wheel with CUDA\n##   ops/pipeline/build-win64.ps1 -variant gpu\n\nparam(\n  [Parameter(Mandatory=$true)]\n  [ValidateSet(\"cpu\", \"gpu\")]\n  [string]$variant\n)\n\n$ErrorActionPreference = \"Stop\"\n\n. ops/pipeline/enforce-ci.ps1\n\n# Build common CMake arguments\n$cmake_args = @(\n  \"-G\", \"Ninja\",\n  \"-DCMAKE_BUILD_TYPE=Release\",\n  \"-DCMAKE_C_COMPILER_LAUNCHER=sccache\",\n  \"-DCMAKE_CXX_COMPILER_LAUNCHER=sccache\"\n)\n\nif ($variant -eq \"gpu\") {\n  Write-Host \"--- Build libxgboost on Windows with CUDA\"\n  nvcc --version\n  if ($LASTEXITCODE -ne 0) { throw \"Last command failed\" }\n\n  # Add CUDA-specific flags\n  $cmake_args += @(\n    \"-DUSE_CUDA=ON\",\n    \"-DGOOGLE_TEST=ON\",\n    \"-DUSE_DMLC_GTEST=ON\"\n  )\n\n  # Only build SM75 for non-release branches (faster CI)\n  if ($is_release_branch -eq 0) {\n    $cmake_args += \"-DGPU_COMPUTE_VER=75\"\n  }\n} else {\n  Write-Host \"--- Build libxgboost on Windows (CPU, minimal)\"\n}\n\n# Run CMake configure\nmkdir build\ncd build\n& cmake .. @cmake_args\nif ($LASTEXITCODE -ne 0) { throw \"Last command failed\" }\n\n# Build\ncmake --build . -v\nif ($LASTEXITCODE -ne 0) { throw \"Last command failed\" }\n\nWrite-Host \"--- Build binary wheel\"\ncd ..\n\n# For CPU variant, rename package to xgboost-cpu\nif ($variant -eq \"cpu\") {\n  conda activate\n  python ops/script/pypi_variants.py --use-suffix=cpu --require-nccl-dep=na\n  if ($LASTEXITCODE -ne 0) { throw \"Last command failed\" }\n}\n\ncd python-package\nconda activate\n& pip wheel --no-deps -v . --wheel-dir dist/\nif ($LASTEXITCODE -ne 0) { throw \"Last command failed\" }\n\npython -m wheel tags --python-tag py3 --abi-tag none `\n  --platform win_amd64 --remove `\n  (Get-ChildItem dist/*.whl | Select-Object -Expand FullName)\nif ($LASTEXITCODE -ne 0) { throw \"Last command failed\" }\n\nWrite-Host \"--- Upload Python wheel\"\ncd ..\nif ($is_release_branch -eq 1) {\n  python ops/pipeline/manage-artifacts.py upload `\n    --s3-bucket 'xgboost-nightly-builds' `\n    --prefix \"$Env:BRANCH_NAME/$Env:GITHUB_SHA\" --make-public `\n    (Get-ChildItem python-package/dist/*.whl | Select-Object -Expand FullName)\n  if ($LASTEXITCODE -ne 0) { throw \"Last command failed\" }\n}\n"
  },
  {
    "path": "ops/pipeline/classify-git-branch.sh",
    "content": "#!/bin/bash\n## Detect whether the current git branch is a pull request or a release branch\n\nset -euo pipefail\n\nif [[ -n ${GITHUB_BASE_REF:-} ]]\nthen\n  is_pull_request=1\nelse\n  is_pull_request=0\nfi\n\nif [[ ${BRANCH_NAME:-} == \"master\" || ${BRANCH_NAME:-} == \"release_\"* || ${BRANCH_NAME:-} == \"federated-secure\" ]]\nthen\n  is_release_branch=1\n  enforce_daily_budget=0\nelse\n  is_release_branch=0\n  enforce_daily_budget=1\nfi\n\nif [[ -n ${DISABLE_RELEASE:-} ]]\nthen\n  is_release_branch=0\nfi\n"
  },
  {
    "path": "ops/pipeline/deploy-jvm-packages.sh",
    "content": "#!/bin/bash\n## Deploy JVM packages to S3 bucket\n\nset -euox pipefail\n\nif [[ \"$#\" -lt 2 ]]\nthen\n  echo \"Usage: $0 {cpu,gpu} scala_version\"\n  exit 1\nfi\n\nvariant=\"$1\"\nscala_version=\"$2\"\nmaven_options=\"-DskipTests -Dmaven.test.skip=true -Dskip.native.build=true\"\n\ncase \"$variant\" in\n  cpu)\n    # CPU variant\n    python ops/script/change_scala_version.py --scala-version ${scala_version} --purge-artifacts\n    bash ops/script/inject_jvm_lib.sh\n    pushd jvm-packages\n    mvn --no-transfer-progress deploy -Pdefault,release-to-s3 ${maven_options}\n    popd\n    ;;\n  gpu)\n    # GPU variant\n    python ops/script/change_scala_version.py --scala-version ${scala_version} --purge-artifacts\n    bash ops/script/inject_jvm_lib.sh\n    pushd jvm-packages\n    mvn --no-transfer-progress install -Pgpu ${maven_options}\n    mvn --no-transfer-progress deploy -Pgpu,release-to-s3 -pl xgboost4j-spark-gpu ${maven_options}\n    popd\n    ;;\n  *)\n    echo \"Unrecognized argument: $variant\"\n    exit 2\n    ;;\nesac\n"
  },
  {
    "path": "ops/pipeline/enforce-ci.ps1",
    "content": "## Ensure that a script is running inside the CI.\n## Usage: . ops/pipeline/enforce-ci.ps1\n\nif ( -Not $Env:GITHUB_ACTIONS ) {\n  $script_name = (Split-Path -Path $PSCommandPath -Leaf)\n  Write-Host \"$script_name is not meant to run locally; it should run inside GitHub Actions.\"\n  Write-Host \"Please inspect the content of $script_name and locate the desired command manually.\"\n  exit 1\n}\n\nif ( -Not $Env:BRANCH_NAME ) {\n  Write-Host \"Make sure to define environment variable BRANCH_NAME.\"\n  exit 2\n}\n\nif ( $Env:GITHUB_BASE_REF ) {\n  $is_pull_request = 1\n} else {\n  $is_pull_request = 0\n}\n\nif ( ($Env:BRANCH_NAME -eq \"master\") -or ($Env:BRANCH_NAME -match \"release_.+\") ) {\n  $is_release_branch = 1\n  $enforce_daily_budget = 0\n} else {\n  $is_release_branch = 0\n  $enforce_daily_budget = 1\n}\n"
  },
  {
    "path": "ops/pipeline/enforce-ci.sh",
    "content": "#!/bin/bash\n\n## Ensure that a script is running inside the CI.\n## Usage: source ops/pipeline/enforce-ci.sh\n\nset -euo pipefail\n\nif [[ -z ${GITHUB_ACTIONS:-} ]]\nthen\n  echo \"$0 is not meant to run locally; it should run inside GitHub Actions.\"\n  echo \"Please inspect the content of $0 and locate the desired command manually.\"\n  exit 1\nfi\n\nif [[ -z ${BRANCH_NAME:-} ]]\nthen\n  echo \"Make sure to define environment variable BRANCH_NAME.\"\n  exit 2\nfi\n\nsource ops/pipeline/classify-git-branch.sh\n"
  },
  {
    "path": "ops/pipeline/get-docker-registry-details.sh",
    "content": "## Get details for AWS ECR (Elastic Container Registry) in environment variables\n\nECR_AWS_ACCOUNT_ID=\"492475357299\"\nECR_AWS_REGION=\"us-west-2\"\nDOCKER_REGISTRY_URL=\"${ECR_AWS_ACCOUNT_ID}.dkr.ecr.${ECR_AWS_REGION}.amazonaws.com\"\n"
  },
  {
    "path": "ops/pipeline/get-image-tag.sh",
    "content": "#!/usr/bin/env bash\n\n## Update the following line to test changes to CI images\n## See https://xgboost.readthedocs.io/en/latest/contrib/ci.html#making-changes-to-ci-containers\n\nIMAGE_TAG=main\n"
  },
  {
    "path": "ops/pipeline/login-docker-registry.sh",
    "content": "## Log into AWS ECR (Elastic Container Registry) to be able to pull containers from it\n## Note. Requires valid AWS credentials\n\nset -euo pipefail\n\nsource ops/pipeline/get-docker-registry-details.sh\n\necho \"aws ecr get-login-password --region ${ECR_AWS_REGION} |\" \\\n    \"docker login --username AWS --password-stdin ${DOCKER_REGISTRY_URL}\"\naws ecr get-login-password --region ${ECR_AWS_REGION} \\\n  | docker login --username AWS --password-stdin ${DOCKER_REGISTRY_URL}\n"
  },
  {
    "path": "ops/pipeline/manage-artifacts.py",
    "content": "\"\"\"\nUpload an artifact to an S3 bucket for later use\nNote. This script takes in all inputs via environment variables\n      except the path to the artifact(s).\n\"\"\"\n\nimport argparse\nimport os\nimport subprocess\nimport sys\nfrom pathlib import Path\nfrom urllib.parse import SplitResult, urlsplit, urlunsplit\n\n\ndef resolve(x: Path) -> Path:\n    return x.expanduser().resolve()\n\n\ndef path_equals(a: Path, b: Path) -> bool:\n    return resolve(a) == resolve(b)\n\n\ndef compute_s3_url(*, s3_bucket: str, prefix: str, artifact: str) -> str:\n    if prefix == \"\":\n        return f\"s3://{s3_bucket}/{artifact}\"\n    return f\"s3://{s3_bucket}/{prefix}/{artifact}\"\n\n\ndef aws_s3_upload(*, src: Path, dest: str, make_public: bool) -> None:\n    cli_args = [\"aws\", \"s3\", \"cp\", \"--no-progress\", str(src), dest]\n    if make_public:\n        cli_args.extend([\"--acl\", \"public-read\"])\n    print(\" \".join(cli_args))\n    subprocess.run(\n        cli_args,\n        check=True,\n        encoding=\"utf-8\",\n    )\n\n\ndef aws_s3_download(*, src: str, dest_dir: Path) -> None:\n    cli_args = [\"aws\", \"s3\", \"cp\", \"--no-progress\", src, str(dest_dir)]\n    print(\" \".join(cli_args))\n    subprocess.run(\n        cli_args,\n        check=True,\n        encoding=\"utf-8\",\n    )\n\n\ndef aws_s3_download_with_wildcard(*, src: str, dest_dir: Path) -> None:\n    parsed_src = urlsplit(src)\n    src_dir = urlunsplit(\n        SplitResult(\n            scheme=\"s3\",\n            netloc=parsed_src.netloc,\n            path=os.path.dirname(parsed_src.path),\n            query=\"\",\n            fragment=\"\",\n        )\n    )\n    src_glob = os.path.basename(parsed_src.path)\n    cli_args = [\n        \"aws\",\n        \"s3\",\n        \"cp\",\n        \"--recursive\",\n        \"--no-progress\",\n        \"--exclude\",\n        \"'*'\",\n        \"--include\",\n        src_glob,\n        src_dir,\n        str(dest_dir),\n    ]\n    print(\" \".join(cli_args))\n    subprocess.run(\n        cli_args,\n        check=True,\n        encoding=\"utf-8\",\n    )\n\n\ndef upload(*, args: argparse.Namespace) -> None:\n    print(f\"Uploading artifacts to prefix {args.prefix}...\")\n    for artifact in args.artifacts:\n        artifact_path = Path(artifact)\n        s3_url = compute_s3_url(\n            s3_bucket=args.s3_bucket, prefix=args.prefix, artifact=artifact_path.name\n        )\n        aws_s3_upload(src=artifact_path, dest=s3_url, make_public=args.make_public)\n\n\ndef download(*, args: argparse.Namespace) -> None:\n    print(f\"Downloading artifacts from prefix {args.prefix}...\")\n    dest_dir = Path(args.dest_dir)\n    print(f\"mkdir -p {str(dest_dir)}\")\n    dest_dir.mkdir(parents=True, exist_ok=True)\n    for artifact in args.artifacts:\n        s3_url = compute_s3_url(\n            s3_bucket=args.s3_bucket, prefix=args.prefix, artifact=artifact\n        )\n        if \"*\" in artifact:\n            aws_s3_download_with_wildcard(src=s3_url, dest_dir=dest_dir)\n        else:\n            aws_s3_download(src=s3_url, dest_dir=dest_dir)\n\n\nif __name__ == \"__main__\":\n    # Ensure that the current working directory is the project root\n    if not (Path.cwd() / \"ops\").is_dir() or not path_equals(\n        Path(__file__).parent.parent, Path.cwd() / \"ops\"\n    ):\n        x = Path(__file__).name\n        raise RuntimeError(f\"Script {x} must be run at the project's root directory\")\n\n    root_parser = argparse.ArgumentParser()\n    subparser_factory = root_parser.add_subparsers(required=True, dest=\"command\")\n    parsers = {}\n    for command in [\"upload\", \"download\"]:\n        parsers[command] = subparser_factory.add_parser(command)\n        parsers[command].add_argument(\n            \"--s3-bucket\",\n            type=str,\n            required=True,\n            help=\"Name of the S3 bucket to store the artifact\",\n        )\n        parsers[command].add_argument(\n            \"--prefix\",\n            type=str,\n            required=True,\n            help=(\n                \"Where the artifact(s) would be stored. The artifact(s) will be stored at \"\n                \"s3://[s3-bucket]/[prefix]/[filename].\"\n            ),\n        )\n        parsers[command].add_argument(\n            \"artifacts\",\n            type=str,\n            nargs=\"+\",\n            metavar=\"artifact\",\n            help=f\"Artifact(s) to {command}\",\n        )\n\n    parsers[\"upload\"].add_argument(\n        \"--make-public\", action=\"store_true\", help=\"Make artifact publicly accessible\"\n    )\n    parsers[\"download\"].add_argument(\n        \"--dest-dir\", type=str, required=True, help=\"Where to download artifact(s)\"\n    )\n\n    if len(sys.argv) == 1:\n        print(\"1. Upload artifact(s)\")\n        parsers[\"upload\"].print_help()\n        print(\"\\n2. Download artifact(s)\")\n        parsers[\"download\"].print_help()\n        sys.exit(1)\n\n    parsed_args = root_parser.parse_args()\n    if parsed_args.command == \"upload\":\n        upload(args=parsed_args)\n    elif parsed_args.command == \"download\":\n        download(args=parsed_args)\n"
  },
  {
    "path": "ops/pipeline/nightly-test-cccl-impl.sh",
    "content": "#!/bin/bash\n## Companion script for ops/pipeline/nightly-test-cccl.sh\n\nset -eo pipefail\n# Cannot set -u before Conda env activation\n\nif [[ \"$#\" -lt 1 ]]\nthen\n  echo \"Usage: $0 [cccl_version]\"\n  exit 1\nfi\ncccl_version=\"$1\"\n\n# Set up Conda env\ngosu root chown -R $(id -u):$(id -g) /opt/miniforge/envs /opt/miniforge/pkgs/cache\ngosu root chown $(id -u):$(id -g) /opt/miniforge/pkgs\nmamba create -y -n cccl_test -c conda-forge python=3.13 \\\n  cuda-version=13.0 cxx-compiler cuda-cudart-dev cuda-nvcc gcc_linux-64=14.* ninja \\\n  gtest nccl\n\nsource activate cccl_test\n\nset -xu\ngit clone https://github.com/NVIDIA/cccl.git -b \"${cccl_version}\" --depth 1\ncd cccl\ncmake . -DCMAKE_INSTALL_PREFIX=${CONDA_PREFIX} -GNinja\nninja install\n\nif [[ \"${BUILD_ONLY_SM75:-}\" == 1 ]]\nthen\n  cmake_args='-DGPU_COMPUTE_VER=75'\nelse\n  cmake_args=''\nfi\n\ncd ..\nmkdir -p build\ncd build\ncmake .. \\\n  -GNinja \\\n  -DCMAKE_PREFIX_PATH=\"${CONDA_PREFIX}\" \\\n  -DUSE_CUDA=ON \\\n  -DUSE_OPENMP=ON \\\n  -DHIDE_CXX_SYMBOLS=ON \\\n  -DUSE_NCCL=ON \\\n  -DUSE_DLOPEN_NCCL=ON \\\n  -DGOOGLE_TEST=ON \\\n  -DENABLE_ALL_WARNINGS=ON \\\n  -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF \\\n  ${cmake_args}\nninja -v\n"
  },
  {
    "path": "ops/pipeline/nightly-test-cccl.sh",
    "content": "#!/bin/bash\n## Test XGBoost against latest CCCL\n\nset -euo pipefail\n\nif [[ \"$#\" -lt 1 ]]\nthen\n  echo \"Usage: $0 [cccl_version]\"\n  exit 1\nfi\ncccl_version=\"$1\"\n\nsource ops/pipeline/classify-git-branch.sh\nsource ops/pipeline/get-docker-registry-details.sh\nsource ops/pipeline/get-image-tag.sh\n\nIMAGE_REPO=\"xgb-ci.gpu_build_cuda13_rockylinux8\"\nBUILD_IMAGE_URI=\"${DOCKER_REGISTRY_URL}/${IMAGE_REPO}:${IMAGE_TAG}\"\n\necho \"--- Build XGBoost with CCCL ${cccl_version}\"\n\nif [[ ($is_pull_request == 1) || ($is_release_branch == 0) ]]\nthen\n  export BUILD_ONLY_SM75=1\nelse\n  export BUILD_ONLY_SM75=0\nfi\necho \"BUILD_ONLY_SM75=${BUILD_ONLY_SM75}\"\n\nset -x\n\npython3 ops/docker_run.py \\\n  --image-uri ${BUILD_IMAGE_URI} \\\n  --run-args='-e BUILD_ONLY_SM75' \\\n  -- ops/pipeline/nightly-test-cccl-impl.sh \"${cccl_version}\"\n"
  },
  {
    "path": "ops/pipeline/nightly-test-rmm-impl.sh",
    "content": "#!/bin/bash\n## Companion script for ops/pipeline/nightly-test-rmm.sh\n\nset -eo pipefail\n# Cannot set -u before Conda env activation\n\nif [[ \"$#\" -lt 1 ]]\nthen\n  echo \"Usage: $0 [rmm_version]\"\n  exit 1\nfi\nrmm_version=\"$1\"\n\n# Set up Conda env\ngosu root chown -R $(id -u):$(id -g) /opt/miniforge/envs /opt/miniforge/pkgs/cache\ngosu root chown $(id -u):$(id -g) /opt/miniforge/pkgs\nmamba create -y -n rmm_test -c conda-forge -c rapidsai-nightly python=3.13 \\\n  cuda-version=13.0 cxx-compiler cuda-cudart-dev cuda-nvcc gcc_linux-64=14.* ninja \\\n  gtest nccl \"rmm=${rmm_version%.*}.*,>=0.0.0a0\"\n\nsource activate rmm_test\n\nif [[ \"${BUILD_ONLY_SM75:-}\" == 1 ]]\nthen\n  cmake_args='-DGPU_COMPUTE_VER=75'\nelse\n  cmake_args=''\nfi\n\nmkdir -p build\ncd build\ncmake .. \\\n  -GNinja \\\n  -DCMAKE_PREFIX_PATH=\"${CONDA_PREFIX}\" \\\n  -DUSE_CUDA=ON \\\n  -DUSE_OPENMP=ON \\\n  -DHIDE_CXX_SYMBOLS=ON \\\n  -DUSE_NCCL=ON \\\n  -DUSE_DLOPEN_NCCL=ON \\\n  -DGOOGLE_TEST=ON \\\n  -DENABLE_ALL_WARNINGS=ON \\\n  -DPLUGIN_RMM=ON \\\n  -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF \\\n  ${cmake_args}\nninja -v\n"
  },
  {
    "path": "ops/pipeline/nightly-test-rmm.sh",
    "content": "#!/bin/bash\n## Test XGBoost against latest RMM\n\nset -euo pipefail\n\nif [[ \"$#\" -lt 1 ]]\nthen\n  echo \"Usage: $0 [rmm_version]\"\n  exit 1\nfi\nrmm_version=\"$1\"\n\nsource ops/pipeline/classify-git-branch.sh\nsource ops/pipeline/get-docker-registry-details.sh\nsource ops/pipeline/get-image-tag.sh\n\nIMAGE_REPO=\"xgb-ci.gpu_build_cuda13_rockylinux8\"\nBUILD_IMAGE_URI=\"${DOCKER_REGISTRY_URL}/${IMAGE_REPO}:${IMAGE_TAG}\"\n\necho \"--- Build XGBoost with RMM ${rmm_version}\"\n\nif [[ ($is_pull_request == 1) || ($is_release_branch == 0) ]]\nthen\n  export BUILD_ONLY_SM75=1\nelse\n  export BUILD_ONLY_SM75=0\nfi\n\nset -x\n\npython3 ops/docker_run.py \\\n  --image-uri ${BUILD_IMAGE_URI} \\\n  --run-args='-e BUILD_ONLY_SM75' \\\n  -- ops/pipeline/nightly-test-rmm-impl.sh \"${rmm_version}\"\n"
  },
  {
    "path": "ops/pipeline/query-latest-cccl.sh",
    "content": "#!/bin/bash\n## Query latest version of CCCL using GitHub CLI\n## Note: RC version may be selected if available\n\nset -euo pipefail\n\ntmpfile=\"$(mktemp /tmp/abc-script.XXXXXX)\"\ncat >\"$tmpfile\" <<EOL\nimport fileinput\n\nfrom packaging.version import InvalidVersion, Version\n\nversions = []\nfor e in fileinput.input():\n    try:\n        tag = e.strip()\n        versions.append((tag, Version(tag)))\n    except InvalidVersion:\n        pass\nprint(max(versions, key=lambda x : x[1])[0])\nEOL\nexport CCCL_VERSION=$(\n  gh api repos/NVIDIA/cccl/tags --paginate --jq '.[].name' | python3 \"$tmpfile\"\n)\necho \"--- Latest CCCL version: ${CCCL_VERSION}\"\nrm \"$tmpfile\"\n"
  },
  {
    "path": "ops/pipeline/query-latest-rmm.sh",
    "content": "#!/bin/bash\n## Query latest version of RMM using GitHub CLI\n\nset -euo pipefail\n\ntmpfile=\"$(mktemp /tmp/abc-script.XXXXXX)\"\ncat >\"$tmpfile\" <<EOL\nimport fileinput\nimport re\n\nfrom packaging.version import InvalidVersion, Version\n\nversions = []\nfor e in fileinput.input():\n    try:\n        tag = e.strip()\n        versions.append((tag, Version(tag)))\n    except InvalidVersion:\n        pass\nlatest_tag = max(versions, key=lambda x : x[1])[0]\n\nm = re.search(r\"v([0-9]{2}.[0-9]{2})\", latest_tag)\nprint(m.group(1))\nEOL\nexport RMM_VERSION=$(\n  gh api repos/rapidsai/rmm/tags --paginate --jq '.[].name' | python3 \"$tmpfile\"\n)\necho \"--- Latest RMM version: ${RMM_VERSION}\"\nrm \"$tmpfile\"\n"
  },
  {
    "path": "ops/pipeline/run-clang-tidy.sh",
    "content": "#!/bin/bash\n\nset -euo pipefail\n\nsource ops/pipeline/get-docker-registry-details.sh\nsource ops/pipeline/get-image-tag.sh\n\nIMAGE_URI=${DOCKER_REGISTRY_URL}/xgb-ci.clang_tidy:${IMAGE_TAG}\n\necho \"--- Run clang-tidy\"\nset -x\npython3 ops/docker_run.py \\\n  --image-uri ${IMAGE_URI} \\\n  -- python3 ops/script/run_clang_tidy.py --cuda-archs 75\n"
  },
  {
    "path": "ops/pipeline/test-c-api-demo.sh",
    "content": "#!/bin/bash\n## Test C API demos\n\nset -euox pipefail\n\n# Build and install XGBoost static library (libxgboost.a)\nmkdir build\npushd build\n\ncmake .. \\\n  -GNinja \\\n  -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX \\\n  -DCMAKE_C_COMPILER_LAUNCHER=sccache \\\n  -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \\\n  -DBUILD_STATIC_LIB=ON\n\nninja -v install\npopd\n\n# Build and run C API demo with static library\npushd demo/c-api/\nmkdir build-c-api-demo\npushd build-c-api-demo\ncmake .. -GNinja -DCMAKE_PREFIX_PATH=$CONDA_PREFIX\nninja -v\nctest\npopd\nrm -rf ./build-c-api-demo\npopd\n\n# Build and install XGBoost shared library (libxgboost.so)\npushd build\ncmake .. -DBUILD_STATIC_LIB=OFF -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -GNinja \\\n  -DPLUGIN_FEDERATED=ON\nninja -v install\npopd\n\n# Build and run C API demo with shared library\nmkdir demo/c-api/build-c-api-demo\npushd demo/c-api/build-c-api-demo\ncmake .. -GNinja -DCMAKE_PREFIX_PATH=$CONDA_PREFIX\nninja -v\nctest\npopd\n./ops/script/verify_link.sh ./demo/c-api/build-c-api-demo/basic/api-demo\n./ops/script/verify_link.sh ./demo/c-api/build-c-api-demo/external-memory/external-memory-demo\n"
  },
  {
    "path": "ops/pipeline/test-cpp-i386.sh",
    "content": "#!/bin/bash\n## Run C++ tests for i386\n\nset -euo pipefail\n\nsource ops/pipeline/get-docker-registry-details.sh\nsource ops/pipeline/get-image-tag.sh\n\nIMAGE_URI=\"${DOCKER_REGISTRY_URL}/xgb-ci.i386:${IMAGE_TAG}\"\n\nset -x\npython3 ops/docker_run.py \\\n  --image-uri ${IMAGE_URI} \\\n  -- bash ops/pipeline/build-cpu.sh i386\n"
  },
  {
    "path": "ops/pipeline/test-freebsd.sh",
    "content": "#!/bin/bash\n## Run tests on FreeBSD\n\nset -euox pipefail\n\nmkdir build\ncd build\ncmake .. -GNinja -DGOOGLE_TEST=ON\nninja -v\n./testxgboost\n"
  },
  {
    "path": "ops/pipeline/test-python-macos.sh",
    "content": "#!/bin/bash\n## Test XGBoost Python wheel on MacOS\n\nset -euox pipefail\n\nbrew install ninja\n\nmkdir build\npushd build\n# Set prefix, to use OpenMP library from Conda env\n# See https://github.com/dmlc/xgboost/issues/7039#issuecomment-1025038228\n# to learn why we don't use libomp from Homebrew.\ncmake .. -GNinja -DCMAKE_PREFIX_PATH=$CONDA_PREFIX\nninja\npopd\n\ncd python-package\npython --version\npip install -v .\n\ncd ..\npytest -s -v -rxXs --durations=0 ./tests/python\npytest -s -v -rxXs --durations=0 ./tests/test_distributed/test_with_dask\n"
  },
  {
    "path": "ops/pipeline/test-python-sdist.sh",
    "content": "#!/bin/bash\n## Test installing Python XGBoost from source distribution\n\nset -euox pipefail\n\ncd python-package\npython --version\npython -m build --sdist\npip install -v ./dist/xgboost-*.tar.gz\ncd ..\npython -c 'import xgboost'\n"
  },
  {
    "path": "ops/pipeline/test-python-wheel.sh",
    "content": "#!/bin/bash\n## Script to test Python wheels, to be called from CI workflow\n##\n## Usage:\n##   ops/pipeline/test-python-wheel.sh --suite <suite> [--cuda-version <12|13>]\n##\n## --suite is required. --cuda-version is required for GPU suites but optional for CPU suites.\n\nset -eo pipefail\n\nsuite=\"\"\ncuda_version=\"\"\n\n# Parse arguments\nwhile [[ $# -gt 0 ]]; do\n  case \"$1\" in\n    --suite)\n      suite=\"$2\"\n      shift 2\n      ;;\n    --cuda-version)\n      cuda_version=\"$2\"\n      shift 2\n      ;;\n    *)\n      echo \"Unrecognized argument: $1\"\n      echo \"Usage: $0 --suite {gpu|mgpu|gpu-arm64|cpu|cpu-arm64} [--cuda-version {12|13}]\"\n      exit 1\n      ;;\n  esac\ndone\n\n# Validate required parameters\nif [[ -z \"${suite}\" ]]; then\n  echo \"Error: --suite is required (gpu, mgpu, gpu-arm64, cpu, or cpu-arm64)\"\n  exit 1\nfi\n\n# Validate parameter values\ncase \"${suite}\" in\n  gpu|mgpu|gpu-arm64|cpu|cpu-arm64)\n    ;;\n  *)\n    echo \"Error: --suite must be one of: gpu, mgpu, gpu-arm64, cpu, cpu-arm64. Got '${suite}'\"\n    exit 1\n    ;;\nesac\n\n# Validate --cuda-version is provided for GPU suites\ncase \"${suite}\" in\n  gpu|mgpu|gpu-arm64)\n    if [[ -z \"${cuda_version}\" ]]; then\n      echo \"Error: --cuda-version is required for GPU suites (12 or 13)\"\n      exit 1\n    fi\n    ;;\nesac\n\n# Validate --cuda-version value if provided\nif [[ -n \"${cuda_version}\" ]]; then\n  case \"${cuda_version}\" in\n    12|13)\n      ;;\n    *)\n      echo \"Error: --cuda-version must be 12 or 13, got '${cuda_version}'\"\n      exit 1\n      ;;\n  esac\nfi\n\n# Set up conda environment based on CUDA version and suite\n# Cannot set -u before Conda env activation\ncase \"$suite\" in\n  gpu|mgpu|gpu-arm64)\n    if [[ \"${cuda_version}\" == \"13\" ]]; then\n      # CUDA 13: Create conda environment on-the-fly\n      # Fix permissions for conda directories\n      gosu root chown -R \"$(id -u):$(id -g)\" /opt/miniforge/envs /opt/miniforge/pkgs/cache\n      gosu root chown \"$(id -u):$(id -g)\" /opt/miniforge/pkgs\n      mamba create -y -n gpu_test python=3.12 pytest cupy scipy numpy pandas scikit-learn joblib hypothesis\n    fi\n    source activate gpu_test\n    ;;\n  cpu|cpu-arm64)\n    source activate linux_cpu_test\n    ;;\nesac\n\nset -xu\n\nexport PYSPARK_DRIVER_PYTHON=$(which python)\nexport PYSPARK_PYTHON=$(which python)\n# This variable enables a special test mode in spark, but it's never publicly documented\n# as of writing.\nexport SPARK_TESTING=1\n\npip install -v ./wheelhouse/*.whl\n\ncase \"$suite\" in\n  gpu|gpu-arm64)\n    echo \"-- Run Python tests, using a single GPU\"\n    python -c 'from cupy.cuda import jitify; jitify._init_module()'\n    pytest -v -s -rxXs --durations=0 -m 'not mgpu' tests/python-gpu\n    ;;\n  mgpu)\n    echo \"-- Run Python tests, using multiple GPUs\"\n    python -c 'from cupy.cuda import jitify; jitify._init_module()'\n    pytest -v -s -rxXs --durations=0 -m 'mgpu' tests/python-gpu\n    pytest -v -s -rxXs --durations=0 tests/test_distributed/test_gpu_with_dask\n    pytest -v -s -rxXs --durations=0 tests/test_distributed/test_with_spark/test_data.py -k dmatrix_ctor_gpu\n    pytest -v -s -rxXs --durations=0 tests/test_distributed/test_with_spark/test_spark.py -k local_cluster_gpu\n    pytest -v -s -rxXs --durations=0 tests/test_distributed/test_gpu_federated\n    ;;\n  cpu)\n    echo \"-- Run Python tests (CPU)\"\n    export RAY_OBJECT_STORE_ALLOW_SLOW_STORAGE=1\n    pytest -v -s -rxXs --durations=0 tests/python\n    pytest -v -s -rxXs --durations=0 tests/test_distributed/test_with_dask\n    pytest -v -s -rxXs --durations=0 tests/test_distributed/test_with_spark\n    pytest -v -s -rxXs --durations=0 tests/test_distributed/test_federated\n    ;;\n  cpu-arm64)\n    echo \"-- Run Python tests (CPU, ARM64)\"\n    pytest -v -s -rxXs --durations=0 \\\n      tests/python/test_basic.py tests/python/test_basic_models.py \\\n      tests/python/test_model_compatibility.py\n    ;;\n  *)\n    echo \"Unrecognized suite: $suite\"\n    exit 1\n    ;;\nesac\n"
  },
  {
    "path": "ops/pipeline/test-python-with-sysprefix.sh",
    "content": "#!/bin/bash\n## Test if Python XGBoost can be configured to use libxgboost.so from the system prefix\n\nset -euox pipefail\n\nsudo apt-get update && sudo apt-get install -y ninja-build\n\nmkdir build\npushd build\ncmake .. -GNinja\nninja\npopd\n\n# Copy libxgboost.so to system prefix\ncp -v lib/* \"$(python -c 'import sys; print(sys.base_prefix)')/lib\"\n\n# Now configure Python XGBoost to use libxgboost.so from the system prefix\ncd python-package\npip install virtualenv\nvirtualenv venv\nsource venv/bin/activate && \\\n  pip install -v . --config-settings use_system_libxgboost=True && \\\n  python -c 'import xgboost'\n"
  },
  {
    "path": "ops/pipeline/test-win64-gpu.ps1",
    "content": "$ErrorActionPreference = \"Stop\"\n\nWrite-Host \"--- Test XGBoost on Windows with CUDA\"\n\nnvcc --version\n\nWrite-Host \"--- Run Google Tests\"\nbuild/testxgboost.exe\nif ($LASTEXITCODE -ne 0) { throw \"Last command failed\" }\n\nWrite-Host \"--- Set up Python env\"\nconda activate\n$env_name = -join(\"win64_\", (New-Guid).ToString().replace(\"-\", \"\"))\nmamba env create -n ${env_name} --file=ops/conda_env/win64_test.yml\nconda activate ${env_name}\npython -m pip install `\n  (Get-ChildItem python-package/dist/*.whl | Select-Object -Expand FullName)\nif ($LASTEXITCODE -ne 0) { throw \"Last command failed\" }\n\nWrite-Host \"--- Run Python tests\"\npython -X faulthandler -m pytest -v -s -rxXs tests/python\nif ($LASTEXITCODE -ne 0) { throw \"Last command failed\" }\nWrite-Host \"--- Run Python tests with GPU\"\npython -X faulthandler -m pytest -v -s -rxXs -m \"(not slow) and (not mgpu)\"`\n  tests/python-gpu\nif ($LASTEXITCODE -ne 0) { throw \"Last command failed\" }\n"
  },
  {
    "path": "ops/pipeline/trigger-rtd-impl.py",
    "content": "\"\"\"Helper script for triggering Read the docs build.\n\nSee `doc/contrib/docs.rst <https://xgboost.readthedocs.io/en/stable/contrib/docs.html>`__\nfor more info.\n\n\"\"\"\n\nimport json\nimport os\nimport pprint\nfrom http.client import responses as http_responses\n\nimport requests  # type: ignore\n\n\ndef trigger_build(token: str) -> None:\n    \"\"\"Trigger RTD build.\"\"\"\n\n    event_path = os.environ[\"GITHUB_EVENT_PATH\"]\n    with open(event_path, \"r\") as fd:\n        event: dict = json.load(fd)\n\n    if event.get(\"pull_request\", None) is None:\n        # refs/heads/branch-name\n        branch = event[\"ref\"].split(\"/\")[-1]\n    else:\n        branch = event[\"pull_request\"][\"number\"]\n\n    if branch == \"master\":\n        # Use the `latest` tag, otherwise RTD wouldn't update the rendered doc.\n        branch = \"latest\"\n\n    URL = f\"https://readthedocs.org/api/v3/projects/xgboost/versions/{branch}/builds/\"\n    HEADERS = {\"Authorization\": f\"token {token}\"}\n    response = requests.post(URL, headers=HEADERS)\n    # 202 means the build is successfully triggered.\n    if response.status_code != 202:\n        status_text = http_responses[response.status_code]\n        raise RuntimeError(\n            \"ReadTheDocs returned an unexpected response: \"\n            f\"{response.status_code} {status_text}, reason: {response.reason}\"\n        )\n    pprint.pprint(response.json(), indent=4)\n\n\ndef main() -> None:\n    token = os.getenv(\"RTD_AUTH_TOKEN\")\n    # GA redacts the secret by default, but we should still be really careful to not log\n    # (expose) the token in the CI.\n    if token is None:\n        raise RuntimeError(\n            \"The RTD_AUTH_TOKEN environment variable must be set to a valid auth token for the\"\n            \"ReadTheDocs service.\"\n        )\n    if len(token) == 0:\n        print(\"Document build is not triggered.\")\n        return\n\n    if not isinstance(token, str) or len(token) != 40:\n        raise ValueError(f\"Invalid token.\")\n\n    trigger_build(token)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "ops/pipeline/trigger-rtd.sh",
    "content": "#!/bin/bash\n## Trigger a new build on ReadTheDocs service.\n\nset -euo pipefail\n\nif [[ -z ${BRANCH_NAME:-} ]]\nthen\n  echo \"Make sure to define environment variable BRANCH_NAME.\"\n  exit 1\nfi\n\necho \"Branch name: ${BRANCH_NAME}\"\nexport RTD_AUTH_TOKEN=$(aws secretsmanager get-secret-value \\\n  --secret-id runs-on/readthedocs-auth-token --output text \\\n  --region us-west-2 --query SecretString || echo -n '')\npython3 ops/pipeline/trigger-rtd-impl.py\n"
  },
  {
    "path": "ops/script/change_scala_version.py",
    "content": "import argparse\nimport pathlib\nimport re\nimport shutil\n\n\ndef main(args: argparse.Namespace) -> None:\n    if args.scala_version == \"2.12\":\n        scala_ver = \"2.12\"\n        scala_patchver = \"2.12.18\"\n    elif args.scala_version == \"2.13\":\n        scala_ver = \"2.13\"\n        scala_patchver = \"2.13.11\"\n    else:\n        raise ValueError(f\"Unsupported Scala version: {args.scala_version}\")\n\n    # Clean artifacts\n    if args.purge_artifacts:\n        for target in pathlib.Path(\"jvm-packages/\").glob(\"**/target\"):\n            if target.is_dir():\n                print(f\"Removing {target}...\")\n                shutil.rmtree(target)\n        for ext in [\"so\", \"dll\", \"dylib\"]:\n            for target in pathlib.Path(\"jvm-packages/\").glob(f\"**/*.{ext}\"):\n                print(f\"Removing {target}...\")\n                target.unlink()\n\n    # Update pom.xml\n    for pom in pathlib.Path(\"jvm-packages/\").glob(\"**/pom.xml\"):\n        print(f\"Updating {pom}...\")\n        with open(pom, \"r\", encoding=\"utf-8\") as f:\n            lines = f.readlines()\n        with open(pom, \"w\", encoding=\"utf-8\") as f:\n            replaced_scalaver = False\n            replaced_scala_binver = False\n            for line in lines:\n                for artifact in [\n                    \"xgboost-jvm\",\n                    \"xgboost4j\",\n                    \"xgboost4j-spark\",\n                    \"xgboost4j-spark-gpu\",\n                    \"xgboost4j-flink\",\n                    \"xgboost4j-example\",\n                ]:\n                    line = re.sub(\n                        f\"<artifactId>{artifact}_[0-9\\\\.]*\",\n                        f\"<artifactId>{artifact}_{scala_ver}\",\n                        line,\n                    )\n                # Only replace the first occurrence of scala.version\n                if not replaced_scalaver:\n                    line, nsubs = re.subn(\n                        r\"<scala.version>[0-9\\.]*\",\n                        f\"<scala.version>{scala_patchver}\",\n                        line,\n                    )\n                    if nsubs > 0:\n                        replaced_scalaver = True\n                # Only replace the first occurrence of scala.binary.version\n                if not replaced_scala_binver:\n                    line, nsubs = re.subn(\n                        r\"<scala.binary.version>[0-9\\.]*\",\n                        f\"<scala.binary.version>{scala_ver}\",\n                        line,\n                    )\n                    if nsubs > 0:\n                        replaced_scala_binver = True\n                f.write(line)\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"--purge-artifacts\", action=\"store_true\")\n    parser.add_argument(\n        \"--scala-version\",\n        type=str,\n        required=True,\n        help=\"Version of Scala to use in the JVM packages\",\n        choices=[\"2.12\", \"2.13\"],\n    )\n    parsed_args = parser.parse_args()\n    main(parsed_args)\n"
  },
  {
    "path": "ops/script/change_version.py",
    "content": "\"\"\"\n1. Modify ``CMakeLists.txt`` in source tree and ``python-package/xgboost/VERSION`` if\nneeded, run CMake .\n    If this is a RC release, the Python version has the form <major>.<minor>.<patch>rc1\n2. Modify ``DESCRIPTION`` and ``configure.ac`` in R-package. Run ``autoreconf``.\n3. Run ``mvn`` in ``jvm-packages``\n    If this is a RC release, the version for JVM packages has the form\n    <major>.<minor>.<patch>-RC1\n\"\"\"\n\nimport argparse\nimport datetime\nimport os\nimport re\nimport subprocess\nimport sys\nimport tempfile\n\nfrom pypi_variants import make_pyproject\nfrom test_utils import JVM_PACKAGES, PY_PACKAGE, R_PACKAGE, ROOT, cd\n\n\n@cd(ROOT)\ndef cmake(major: int, minor: int, patch: int) -> None:\n    version = f\"{major}.{minor}.{patch}\"\n    with open(\"CMakeLists.txt\", \"r\") as fd:\n        cmakelist = fd.read()\n    pattern = r\"project\\(xgboost LANGUAGES .* VERSION ([0-9]+\\.[0-9]+\\.[0-9]+)\\)\"\n    matched = re.search(pattern, cmakelist)\n    assert matched, \"Couldn't find the version string in CMakeLists.txt.\"\n    print(matched.start(1), matched.end(1))\n    cmakelist = cmakelist[: matched.start(1)] + version + cmakelist[matched.end(1) :]\n    with open(\"CMakeLists.txt\", \"w\") as fd:\n        fd.write(cmakelist)\n\n    with tempfile.TemporaryDirectory() as tmpdir:\n        subprocess.call([\"cmake\", \"-S\", \".\", \"-B\", tmpdir])\n\n\n@cd(PY_PACKAGE)\ndef pypkg(\n    major: int, minor: int, patch: int, rc: int, is_rc: bool, is_dev: bool\n) -> None:\n    version = f\"{major}.{minor}.{patch}\"\n    pyver = version\n    if is_rc:\n        pyver = pyver + f\"rc{rc}\"\n    if is_dev:\n        pyver = pyver + \"-dev\"\n\n    pyver_path = os.path.join(\"xgboost\", \"VERSION\")\n    with open(pyver_path, \"w\") as fd:\n        fd.write(pyver + \"\\n\")\n\n    for pyprj_file in [\"pyproject.toml.in\", \"pyproject.toml.stub.in\"]:\n        pyprj_path = os.path.join(pyprj_file)\n        with open(pyprj_path, \"r\") as fd:\n            pyprj = fd.read()\n        matched = re.search('version = \"' + r\"([0-9]+\\.[0-9]+\\.[0-9]+.*)\" + '\"', pyprj)\n        assert matched, \"Couldn't find version string in pyproject.toml.\"\n        pyprj = pyprj[: matched.start(1)] + pyver + pyprj[matched.end(1) :]\n        with open(pyprj_path, \"w\") as fd:\n            fd.write(pyprj)\n\n    make_pyproject(use_suffix=\"na\", require_nccl_dep=\"cu12\")\n\n\n@cd(R_PACKAGE)\ndef rpkg(major: int, minor: int, patch: int, is_dev: bool) -> None:\n    if is_dev:\n        version = f\"{major}.{minor}.{patch}.0\"\n    else:\n        version = f\"{major}.{minor}.{patch}.1\"\n    # Version: 2.0.0.1\n    desc_path = \"DESCRIPTION\"\n    with open(desc_path, \"r\") as fd:\n        description = fd.read()\n        pattern = r\"Version:\\ ([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+)\"\n        matched = re.search(pattern, description)\n        assert matched, \"Couldn't find version string in DESCRIPTION.\"\n        description = (\n            description[: matched.start(1)] + version + description[matched.end(1) :]\n        )\n        pattern = r\"Date:\\ ([0-9]+\\-[0-9]+\\-[0-9]+)\"\n        today = datetime.date.today()\n        matched = re.search(pattern, description)\n        assert matched, \"Couldn't find date string in DESCRIPTION.\"\n        description = (\n            description[: matched.start(1)] + str(today) + description[matched.end(1) :]\n        )\n    with open(desc_path, \"w\") as fd:\n        fd.write(description)\n\n    config_path = \"configure.ac\"\n    # AC_INIT([xgboost],[2.0.0],[],[xgboost],[])\n    version = f\"{major}.{minor}.{patch}\"\n    with open(config_path, \"r\") as fd:\n        config = fd.read()\n        pattern = (\n            r\"AC_INIT\\(\\[xgboost\\],\\[([0-9]+\\.[0-9]+\\.[0-9]+)\\],\\[\\],\\[xgboost\\],\\[\\]\\)\"\n        )\n        matched = re.search(pattern, config)\n        assert matched, \"Couldn't find version string in configure.ac\"\n        config = config[: matched.start(1)] + version + config[matched.end(1) :]\n\n    with open(config_path, \"w\") as fd:\n        fd.write(config)\n\n    subprocess.check_call([\"autoreconf\"])\n\n\n@cd(JVM_PACKAGES)\ndef jvmpkgs(\n    major: int, minor: int, patch: int, rc: int, is_rc: bool, is_dev: bool\n) -> None:\n    version = f\"{major}.{minor}.{patch}\"\n    if is_dev:\n        version += \"-SNAPSHOT\"\n    if is_rc:\n        version += f\"-RC{rc}\"\n    subprocess.check_call([\"mvn\", \"versions:set\", f\"-DnewVersion={version}\"])\n\n\n@cd(ROOT)\ndef main(args: argparse.Namespace) -> None:\n    major = args.major\n    minor = args.minor\n    patch = args.patch\n    rc = args.rc\n    is_rc = args.is_rc\n    is_dev = args.is_dev\n    if is_rc and is_dev:\n        raise ValueError(\"It cannot be both a rc and a dev branch.\")\n    if is_rc:\n        assert rc >= 1, \"RC version starts from 1.\"\n    else:\n        assert rc == 0, \"RC is not used.\"\n\n    cmake(major, minor, patch)\n    pypkg(major, minor, patch, rc, is_rc, is_dev)\n    rpkg(major, minor, patch, is_dev=is_dev)\n    jvmpkgs(major, minor, patch, rc, is_rc, is_dev)\n\n    print(\"\"\"\n\nPlease examine the changes and commit. Be aware that mvn might leave backup files in the\nsource tree.\n\n\"\"\")\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"--major\", type=int, required=True)\n    parser.add_argument(\"--minor\", type=int, required=True)\n    parser.add_argument(\"--patch\", type=int, required=True)\n    parser.add_argument(\"--rc\", type=int, default=0)\n    parser.add_argument(\"--is-rc\", action=\"store_true\")\n    parser.add_argument(\"--is-dev\", action=\"store_true\")\n    args = parser.parse_args()\n    try:\n        main(args)\n    except Exception as e:\n        print(\"Error:\", e, file=sys.stderr)\n        exit(-1)\n"
  },
  {
    "path": "ops/script/changelog.py",
    "content": "\"\"\"Helper script for creating links to PRs for changelog. This should be used with the\n`sphinx-issues` extension.\n\n\"\"\"\n\nimport argparse\nimport os\nimport re\n\nfrom test_utils import ROOT\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\n        \"--version\",\n        type=str,\n        required=True,\n        help=\"Major version of the changelog, e.g., 3.0.0 .\",\n    )\n    args = parser.parse_args()\n    version = args.version\n\n    fname = os.path.join(ROOT, f\"doc/changes/v{version}.rst\")\n\n    with open(fname) as fd:\n        note = fd.read()\n\n    # E.g. #11285 -> :pr:`11285`.\n    regex = re.compile(r\"(#)(\\d+)\")\n    note = re.sub(regex, r\":pr:`\\2`\", note)\n    with open(fname, \"w\") as fd:\n        fd.write(note)\n"
  },
  {
    "path": "ops/script/format_wheel_meta.py",
    "content": "\"\"\"\nScript to generate meta.json to store metadata for a nightly build of\nXGBoost Python package.\n\"\"\"\n\nimport argparse\nimport json\nimport pathlib\n\n\ndef main(args: argparse.Namespace) -> None:\n    wheel_path = pathlib.Path(args.wheel_path).expanduser().resolve()\n    if not wheel_path.exists():\n        raise ValueError(f\"Wheel cannot be found at path {wheel_path}\")\n    if not wheel_path.is_file():\n        raise ValueError(f\"Path {wheel_path} is not a valid file\")\n    wheel_name = wheel_path.name\n\n    meta_path = pathlib.Path(args.meta_path)\n    if not meta_path.exists():\n        raise ValueError(f\"Path {meta_path} does not exist\")\n    if not meta_path.is_dir():\n        raise ValueError(f\"Path {meta_path} is not a valid directory\")\n\n    tokens = wheel_name.split(\"-\")\n    assert len(tokens) == 5\n    version = tokens[1].split(\"+\")[0]\n\n    meta_info = {\n        \"wheel_path\": f\"{args.commit_hash}/{wheel_name}\",\n        \"wheel_name\": wheel_name,\n        \"platform_tag\": args.platform_tag,\n        \"version\": version,\n        \"commit_id\": args.commit_hash,\n    }\n    with open(meta_path / \"meta.json\", \"w\") as f:\n        json.dump(meta_info, f, indent=4)\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(\n        description=\"Format meta.json encoding the latest nightly version of the Python wheel\"\n    )\n    parser.add_argument(\n        \"--wheel-path\", type=str, required=True, help=\"Path to the wheel\"\n    )\n    parser.add_argument(\n        \"--commit-hash\", type=str, required=True, help=\"Git commit hash\"\n    )\n    parser.add_argument(\n        \"--platform-tag\",\n        type=str,\n        required=True,\n        help=\"Platform tag (e.g. manylinux_2_28_x86_64)\",\n    )\n    parser.add_argument(\n        \"--meta-path\", type=str, required=True, help=\"Directory to place meta.json\"\n    )\n    parsed_args = parser.parse_args()\n    main(parsed_args)\n"
  },
  {
    "path": "ops/script/inject_jvm_lib.sh",
    "content": "#!/bin/bash\n# Inject lib/libxgboost4j.so into JVM packages.\n# This script is useful when the user opts to set skip.native.build=true\n# option in the JVM package build. When this option is set, the JVM package\n# build will not build libxgboost4j.so; instead it will expect to find the\n# library in jvm-packages/xgboost4j/src/main/resources/lib/{os}/{arch}/.\n# This script will ensure that libxgboost4j.so is copied to the correct\n# location.\n\nset -euox pipefail\n\necho \"Using externally provided libxgboost4j.so. Locating one from lib/...\"\nmkdir -p jvm-packages/xgboost4j/src/main/resources/lib/linux/x86_64/\ncp -v lib/libxgboost4j.so jvm-packages/xgboost4j/src/main/resources/lib/linux/x86_64/\nmkdir -p jvm-packages/xgboost4j/src/test/resources\nmkdir -p jvm-packages/xgboost4j-spark/src/test/resources\nmkdir -p jvm-packages/xgboost4j-spark-gpu/src/test/resources\n\n# Generate machine.txt.* files from the regression demo\npushd demo/data/regression\npython3 mapfeat.py\npython3 mknfold.py machine.txt 1\npopd\n\ncp -v demo/data/agaricus.* \\\n  jvm-packages/xgboost4j/src/test/resources\ncp -v demo/data/regression/machine.txt.t* demo/data/agaricus.* \\\n  jvm-packages/xgboost4j-spark/src/test/resources\ncp -v demo/data/veterans_lung_cancer.csv \\\n  jvm-packages/xgboost4j-spark/src/test/resources/rank.train.csv \\\n  jvm-packages/xgboost4j-spark-gpu/src/test/resources\n"
  },
  {
    "path": "ops/script/lint_cpp.py",
    "content": "import argparse\nimport os\nimport re\nimport sys\nfrom typing import TextIO\n\nimport cpplint\nfrom cpplint import _cpplint_state\n\nCXX_SUFFIX = set([\"cc\", \"c\", \"cpp\", \"h\", \"cu\", \"hpp\"])\n\n\ndef filepath_enumerate(paths: list[str]) -> list[str]:\n    \"\"\"Enumerate the file paths of all subfiles of the list of paths\"\"\"\n    out = []\n    for path in paths:\n        if os.path.isfile(path):\n            out.append(path)\n        else:\n            for root, dirs, files in os.walk(path):\n                for name in files:\n                    out.append(os.path.normpath(os.path.join(root, name)))\n    return out\n\n\ndef get_header_guard_dmlc(filename: str) -> str:\n    \"\"\"Get Header Guard Convention for DMLC Projects.\n\n    For headers in include, directly use the path\n    For headers in src, use project name plus path\n\n    Examples: with project-name = dmlc\n        include/dmlc/timer.h -> DMLC_TIMTER_H_\n        src/io/libsvm_parser.h -> DMLC_IO_LIBSVM_PARSER_H_\n    \"\"\"\n    fileinfo = cpplint.FileInfo(filename)\n    file_path_from_root = fileinfo.RepositoryName()\n    inc_list = [\"include\", \"api\", \"wrapper\", \"contrib\"]\n    if os.name == \"nt\":\n        inc_list.append(\"mshadow\")\n\n    if file_path_from_root.find(\"src/\") != -1 and _HELPER.project_name is not None:\n        idx = file_path_from_root.find(\"src/\")\n        file_path_from_root = _HELPER.project_name + file_path_from_root[idx + 3 :]\n    else:\n        idx = file_path_from_root.find(\"include/\")\n        if idx != -1:\n            file_path_from_root = file_path_from_root[idx + 8 :]\n        for spath in inc_list:\n            prefix = spath + \"/\"\n            if file_path_from_root.startswith(prefix):\n                file_path_from_root = re.sub(\"^\" + prefix, \"\", file_path_from_root)\n                break\n    return re.sub(r\"[-./\\s]\", \"_\", file_path_from_root).upper() + \"_\"\n\n\nclass Lint:\n    def __init__(self) -> None:\n        self.project_name = \"xgboost\"\n        self.cpp_header_map: dict[str, dict[str, int]] = {}\n        self.cpp_src_map: dict[str, dict[str, int]] = {}\n\n        self.pylint_cats = set([\"error\", \"warning\", \"convention\", \"refactor\"])\n        # setup cpp lint\n        cpplint_args = [\"--quiet\", \"--extensions=\" + (\",\".join(CXX_SUFFIX)), \".\"]\n        _ = cpplint.ParseArguments(cpplint_args)\n        cpplint._SetFilters(\n            \",\".join(\n                [\n                    \"-build/c++11\",\n                    \"-build/include,\",\n                    \"+build/namespaces\",\n                ]\n            )\n        )\n        cpplint._SetCountingStyle(\"toplevel\")\n        cpplint._line_length = 100\n\n    def process_cpp(self, path: str, suffix: str) -> None:\n        \"\"\"Process a cpp file.\"\"\"\n        _cpplint_state.ResetErrorCounts()\n        cpplint.ProcessFile(str(path), _cpplint_state.verbose_level)\n        _cpplint_state.PrintErrorCounts()\n        errors = _cpplint_state.errors_by_category.copy()\n\n        if suffix == \"h\":\n            self.cpp_header_map[str(path)] = errors\n        else:\n            self.cpp_src_map[str(path)] = errors\n\n    @staticmethod\n    def _print_summary_map(\n        strm: TextIO, result_map: dict[str, dict[str, int]], ftype: str\n    ) -> int:\n        \"\"\"Print summary of certain result map.\"\"\"\n        if len(result_map) == 0:\n            return 0\n        npass = sum(1 for x in result_map.values() if len(x) == 0)\n        strm.write(f\"====={npass}/{len(result_map)} {ftype} files passed check=====\\n\")\n        for fname, emap in result_map.items():\n            if len(emap) == 0:\n                continue\n            strm.write(\n                f\"{fname}: {sum(emap.values())} Errors of {len(emap)} Categories map={str(emap)}\\n\"\n            )\n        return len(result_map) - npass\n\n    def print_summary(self, strm: TextIO) -> int:\n        \"\"\"Print summary of lint.\"\"\"\n        nerr = 0\n        nerr += Lint._print_summary_map(strm, self.cpp_header_map, \"cpp-header\")\n        nerr += Lint._print_summary_map(strm, self.cpp_src_map, \"cpp-source\")\n        if nerr == 0:\n            strm.write(\"All passed!\\n\")\n        else:\n            strm.write(f\"{nerr} files failed lint\\n\")\n        return nerr\n\n\n_HELPER = Lint()\n\ncpplint.GetHeaderGuardCPPVariable = get_header_guard_dmlc\n\n\ndef process(fname: str, allow_type: list[str]) -> None:\n    \"\"\"Process a file.\"\"\"\n    fname = str(fname)\n    arr = fname.rsplit(\".\", 1)\n    if fname.find(\"#\") != -1 or arr[-1] not in allow_type:\n        return\n    if arr[-1] in CXX_SUFFIX:\n        _HELPER.process_cpp(fname, arr[-1])\n\n\ndef main() -> None:\n    parser = argparse.ArgumentParser(description=\"run cpp lint\")\n    parser.add_argument(\n        \"path\",\n        nargs=\"*\",\n        help=\"Path to traverse\",\n        default=[\n            \"src\",\n            \"include\",\n            os.path.join(\"R-package\", \"src\"),\n            \"python-package\",\n            \"plugin/sycl\",\n        ],\n    )\n    parser.add_argument(\n        \"--exclude_path\",\n        nargs=\"+\",\n        default=[],\n        help=\"exclude this path, and all subfolders if path is a folder\",\n    )\n    args = parser.parse_args()\n    excluded_paths = filepath_enumerate(args.exclude_path)\n\n    allow_type: list[str] = []\n    allow_type += CXX_SUFFIX\n\n    for path in args.path:\n        if not os.path.exists(path):\n            raise ValueError(f\"Unknown path: {path}\")\n        if os.path.isfile(path):\n            normpath = os.path.normpath(path)\n            if normpath not in excluded_paths:\n                process(path, allow_type)\n        else:\n            for root, dirs, files in os.walk(path):\n                for name in files:\n                    file_path = os.path.normpath(os.path.join(root, name))\n                    if file_path not in excluded_paths:\n                        process(file_path, allow_type)\n    nerr = _HELPER.print_summary(sys.stderr)\n    sys.exit(nerr > 0)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "ops/script/lint_r.R",
    "content": "library(lintr)\n\nargs <- commandArgs(\n    trailingOnly = TRUE\n)\nSOURCE_DIR <- args[[1L]]\n\nFILES_TO_LINT <- list.files(\n    path = SOURCE_DIR\n    , pattern = \"\\\\.r$|\\\\.rmd$\"\n    , all.files = TRUE\n    , ignore.case = TRUE\n    , full.names = TRUE\n    , recursive = TRUE\n    , include.dirs = FALSE\n)\n\nmy_linters <- list(\n  absolute_path_linter = lintr::absolute_path_linter(),\n  any_duplicated = lintr::any_duplicated_linter(),\n  any_is_na = lintr::any_is_na_linter(),\n  assignment_linter = lintr::assignment_linter(),\n  boolean_arithmetic = lintr::boolean_arithmetic_linter(),\n  brace_linter = lintr::brace_linter(),\n  class_equals = lintr::class_equals_linter(),\n  commas_linter = lintr::commas_linter(),\n  empty_assignment = lintr::empty_assignment_linter(),\n  equals_na = lintr::equals_na_linter(),\n  fixed_regex = lintr::fixed_regex_linter(),\n  for_loop_index = lintr::for_loop_index_linter(),\n  function_left_parentheses = lintr::function_left_parentheses_linter(),\n  function_return = lintr::function_return_linter(),\n  infix_spaces_linter = lintr::infix_spaces_linter(),\n  is_numeric = lintr::is_numeric_linter(),\n  line_length_linter = lintr::line_length_linter(length = 150L),\n  lengths = lintr::lengths_linter(),\n  matrix = lintr::matrix_apply_linter(),\n  object_usage_linter = lintr::object_usage_linter(),\n  object_length_linter = lintr::object_length_linter(),\n  routine_registration = lintr::routine_registration_linter(),\n  semicolon = lintr::semicolon_linter(),\n  seq = lintr::seq_linter(),\n  spaces_inside_linter = lintr::spaces_inside_linter(),\n  spaces_left_parentheses_linter = lintr::spaces_left_parentheses_linter(),\n  sprintf = lintr::sprintf_linter(),\n  string_boundary = lintr::string_boundary_linter(),\n  trailing_blank_lines_linter = lintr::trailing_blank_lines_linter(),\n  trailing_whitespace_linter = lintr::trailing_whitespace_linter(),\n  true_false = lintr::T_and_F_symbol_linter(),\n  unnecessary_concatenation = lintr::unnecessary_concatenation_linter(),\n  unreachable_code = lintr::unreachable_code_linter(),\n  vector_logic = lintr::vector_logic_linter(),\n  whitespace = lintr::whitespace_linter()\n)\n\nnoquote(paste0(length(FILES_TO_LINT), \" R files need linting\"))\n\nresults <- NULL\n\nfor (r_file in FILES_TO_LINT) {\n\n    this_result <- lintr::lint(\n        filename = r_file\n        , linters = my_linters\n        , cache = FALSE\n    )\n\n    print(\n        sprintf(\n            \"Found %i linting errors in %s\"\n            , length(this_result)\n            , r_file\n        )\n        , quote = FALSE\n    )\n\n    results <- c(results, this_result)\n\n}\n\nissues_found <- length(results)\n\nnoquote(paste0(\"Total linting issues found: \", issues_found))\n\nif (issues_found > 0L) {\n    print(results)\n    quit(save = \"no\", status = 1L)\n}\n"
  },
  {
    "path": "ops/script/pypi_variants.py",
    "content": "\"\"\"Create Package variants for PyPI distribution.\"\"\"\n\nimport argparse\nimport os\nimport tomllib\n\nfrom packaging.version import Version\nfrom test_utils import PY_PACKAGE\n\nIN_PATH = os.path.join(PY_PACKAGE, \"pyproject.toml.in\")\nSTUB_IN_PATH = os.path.join(PY_PACKAGE, \"pyproject.toml.stub.in\")\nOUT_PATH = os.path.join(PY_PACKAGE, \"pyproject.toml\")\n\nNCCL_WHL = \"\"\"    \\\"nvidia-nccl-{0} ; platform_system == 'Linux'\\\",\"\"\"\n\nNAME = \"{{ name }}\"\nNCCL = \"{{ nccl }}\"\nVERSION = \"{{ version }}\"\nCUDA_VARIANTS = [\"cu12\", \"cu13\"]\n\n\ndef copyfile(src: str, dst: str) -> None:\n    with open(src, \"rb\") as fd:\n        content = fd.read()\n    with open(dst, \"wb\") as fd:\n        fd.write(content)\n\n\ndef make_pyproject(\n    *, use_suffix: str, require_nccl_dep: str, create_stub: bool = False\n) -> None:\n    if use_suffix == \"cpu\" and require_nccl_dep != \"na\":\n        raise ValueError(\n            \"xgboost-cpu cannot require NCCL dependency. \"\n            \"When setting --use-suffix='cpu', you must also set --require-nccl-dep='na'.\"\n        )\n    if (\n        use_suffix in CUDA_VARIANTS\n        and require_nccl_dep in CUDA_VARIANTS\n        and use_suffix != require_nccl_dep\n    ):\n        raise ValueError(\n            \"Inconsistent choices for --use-suffix and --require-nccl-dep. \"\n            \"When --use-suffix is set to one of {{{0}}}, --require-nccl-dep must be \"\n            \"set to identical value as --use-suffix.\".format(\",\".join(CUDA_VARIANTS))\n        )\n    if create_stub:\n        if use_suffix == \"na\":\n            raise ValueError(\"To create a stub package, --use-suffix must not be 'na'\")\n        if require_nccl_dep != \"na\":\n            raise ValueError(\n                \"To create a stub package, --require-nccl-dep must be 'na'\"\n            )\n\n    with open(STUB_IN_PATH if create_stub else IN_PATH) as fd:\n        pyproject = fd.read()\n\n    readme_dft = os.path.join(PY_PACKAGE, \"README.dft.rst\")\n    readme_cpu = os.path.join(PY_PACKAGE, \"README.cpu.rst\")\n    readme_stub = os.path.join(PY_PACKAGE, \"README.stub.rst\")\n    readme = os.path.join(PY_PACKAGE, \"README.rst\")\n    pyproject = pyproject.replace(\n        NAME, f\"xgboost-{use_suffix}\" if use_suffix != \"na\" else \"xgboost\"\n    )\n    if create_stub:\n        copyfile(readme_stub, readme)\n        pyproject_parsed = tomllib.loads(pyproject)\n        pyproject = pyproject.replace(\n            VERSION, str(Version(pyproject_parsed[\"project\"][\"version\"]))\n        )\n    elif use_suffix == \"cpu\":\n        copyfile(readme_cpu, readme)\n    else:\n        copyfile(readme_dft, readme)\n    pyproject = pyproject.replace(\n        NCCL, NCCL_WHL.format(require_nccl_dep) if require_nccl_dep != \"na\" else \"\"\n    )\n    pyproject = (\n        f\"# Generated by `{os.path.basename(__file__)}`, don't edit: \"\n        f\"'--use-suffix={use_suffix} --require-nccl-dep={require_nccl_dep} \"\n        f\"--create-stub={create_stub}'\\n\" + pyproject\n    )\n\n    with open(OUT_PATH, \"w\") as fd:\n        fd.write(pyproject)\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\n        \"--use-suffix\",\n        type=str,\n        choices=[\"na\", \"cpu\"] + CUDA_VARIANTS,\n        default=\"na\",\n        help=(\n            \"When using this option, rename the package name to xgboost-[suffix]. \"\n            \"Set to 'na' to disable\"\n        ),\n    )\n    parser.add_argument(\n        \"--require-nccl-dep\",\n        type=str,\n        choices=[\"na\"] + CUDA_VARIANTS,\n        required=True,\n        help=\"Which NCCL dependency to use; select 'na' to remove NCCL dependency\",\n    )\n    parser.add_argument(\n        \"--create-stub\",\n        action=\"store_true\",\n        help=\"Create a stub package that redirects users to install `xgboost`\",\n    )\n    args = parser.parse_args()\n    make_pyproject(\n        use_suffix=args.use_suffix,\n        require_nccl_dep=args.require_nccl_dep,\n        create_stub=args.create_stub,\n    )\n"
  },
  {
    "path": "ops/script/release_artifacts.py",
    "content": "\"\"\"\nSimple script for managing Python, R, and source release packages.\n\ntqdm, sh, and build are required to run this script.\n\"\"\"\n\nimport argparse\nimport shutil\nimport subprocess\nimport tarfile\nimport tempfile\nfrom pathlib import Path\nfrom typing import Dict, List, Optional, Tuple\nfrom urllib.request import urlretrieve\n\nimport tqdm\nfrom packaging import version\nfrom pypi_variants import make_pyproject\nfrom sh.contrib import git\nfrom test_utils import PY_PACKAGE\nfrom test_utils import ROOT as root_path\nfrom test_utils import DirectoryExcursion\n\n# S3 bucket hosting the release artifacts\nS3_BUCKET_URL = \"https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds\"\nDIST = Path(PY_PACKAGE) / \"dist\"\nROOT = Path(root_path)\n\npbar = None\n\n\ndef show_progress(block_num: int, block_size: int, total_size: int) -> None:\n    \"\"\"Show file download progress.\"\"\"\n    global pbar\n    if pbar is None:\n        pbar = tqdm.tqdm(total=total_size / 1024, unit=\"kB\")\n\n    downloaded = block_num * block_size\n    if downloaded < total_size:\n        pbar.update(min(block_size / 1024, (total_size - downloaded) / 1024))\n    else:\n        pbar.close()\n        pbar = None\n\n\ndef retrieve(url: str, filename: Optional[Path] = None) -> str:\n    \"\"\"Retrieve a file from a URL with progress indication.\"\"\"\n    print(f\"Downloading {url} -> {filename}\")\n    return urlretrieve(url, filename, reporthook=show_progress)[0]\n\n\ndef latest_hash() -> str:\n    \"\"\"Get latest commit hash.\"\"\"\n    try:\n        result = subprocess.run(\n            [\"git\", \"rev-parse\", \"HEAD\"],\n            check=True,\n            capture_output=True,\n            text=True,\n            encoding=\"utf-8\",\n        )\n        return result.stdout.strip()\n    except subprocess.CalledProcessError as e:\n        raise RuntimeError(\"Failed to get latest commit hash.\") from e\n\n\ndef _download_python_wheels(\n    platforms: List[str],\n    dir_url: str,\n    src_filename_prefix: str,\n    target_filename_prefix: str,\n    outdir: Path,\n) -> List[Path]:\n    \"\"\"Download all Python binary wheels for a given set of platforms\"\"\"\n    wheel_paths = []\n    dist_dir = outdir / \"dist\"\n    dist_dir.mkdir(exist_ok=True)\n\n    for platform in platforms:\n        src_wheel = f\"{src_filename_prefix}{platform}.whl\"\n        url = f\"{dir_url}{src_wheel}\"\n        target_wheel = f\"{target_filename_prefix}{platform}.whl\"\n        wheel_path = dist_dir / target_wheel\n        wheel_paths.append(wheel_path)\n\n        retrieve(url=url, filename=wheel_path)\n\n        try:\n            result = subprocess.run(\n                [\"twine\", \"check\", str(wheel_path)],\n                check=True,\n                capture_output=True,\n                text=True,\n                encoding=\"utf-8\",\n            )\n            if \"warning\" in result.stderr or \"warning\" in result.stdout:\n                raise RuntimeError(\n                    f\"Unresolved warnings:\\n{result.stderr}\\n{result.stdout}\"\n                )\n        except subprocess.CalledProcessError as e:\n            raise RuntimeError(\"Failed twine check\") from e\n    return wheel_paths\n\n\ndef make_python_sdist(\n    release: str, rc: Optional[str], rc_ver: Optional[int], outdir: Path\n) -> None:\n    \"\"\"Make Python source distribution.\"\"\"\n    dist_dir = outdir / \"dist\"\n    dist_dir.mkdir(exist_ok=True)\n\n    # Build sdist for `xgboost-cpu`, `xgboost`.\n    for suffix, nccl_dep in [(\"cpu\", \"na\"), (\"na\", \"na\")]:\n        with DirectoryExcursion(ROOT):\n            make_pyproject(use_suffix=suffix, require_nccl_dep=nccl_dep)\n        with DirectoryExcursion(ROOT / \"python-package\"):\n            subprocess.run([\"python\", \"-m\", \"build\", \"--sdist\"], check=True)\n            pkg_name = \"xgboost\" if suffix == \"na\" else f\"xgboost_{suffix}\"\n            sdist_name = (\n                f\"{pkg_name}-{release}{rc}{rc_ver}.tar.gz\"\n                if rc\n                else f\"{pkg_name}-{release}.tar.gz\"\n            )\n            src = DIST / sdist_name\n            subprocess.run([\"twine\", \"check\", str(src)], check=True)\n            dest = dist_dir / sdist_name\n            shutil.move(src, dest)\n\n    # Build stub package `xgboost-cu12`.\n    with DirectoryExcursion(ROOT):\n        make_pyproject(use_suffix=\"cu12\", require_nccl_dep=\"na\", create_stub=True)\n\n    with DirectoryExcursion(ROOT / \"python-package\"):\n        subprocess.run([\"python\", \"-m\", \"build\", \"--sdist\"], check=True)\n        sdist_name = (\n            f\"xgboost_cu12-{release}{rc}{rc_ver}.tar.gz\"\n            if rc\n            else f\"xgboost_cu12-{release}.tar.gz\"\n        )\n        src = DIST / sdist_name\n        subprocess.run([\"twine\", \"check\", str(src)], check=True)\n        dest = dist_dir / sdist_name\n        shutil.move(src, dest)\n\n\ndef download_python_wheels(branch: str, commit_hash: str, outdir: Path) -> None:\n    \"\"\"Download all Python binary wheels for the specified branch.\"\"\"\n    full_platforms = [\n        \"win_amd64\",\n        \"manylinux_2_28_x86_64\",\n        \"manylinux_2_28_aarch64\",\n        \"macosx_10_15_x86_64\",\n        \"macosx_12_0_arm64\",\n    ]\n    cu13_platforms = [\n        \"manylinux_2_28_x86_64\",\n        \"manylinux_2_28_aarch64\",\n    ]\n    minimal_platforms = [\n        \"win_amd64\",\n        \"win_arm64\",\n        \"manylinux_2_28_x86_64\",\n        \"manylinux_2_28_aarch64\",\n    ]\n\n    # https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds/release_3.0.0/4bfd4bf60d32e2d62426cc4070ccb5a5ba1ed078/xgboost-3.0.0rc1-py3-none-manylinux_2_28_x86_64.whl\n    dir_url = f\"{S3_BUCKET_URL}/{branch}/{commit_hash}/\"\n    wheels = []\n    for pkg_name, platforms in [\n        (\"xgboost\", full_platforms),\n        (\"xgboost_cpu\", minimal_platforms),\n        (\"xgboost_cu13\", cu13_platforms),\n    ]:\n        src_filename_prefix = f\"{pkg_name}-{args.release}-py3-none-\"\n        target_filename_prefix = f\"{pkg_name}-{args.release}-py3-none-\"\n        wheels.extend(\n            _download_python_wheels(\n                platforms, dir_url, src_filename_prefix, target_filename_prefix, outdir\n            )\n        )\n    print(f\"List of downloaded wheels: {wheels}\")\n    print(\"\"\"\nFollowing steps should be done manually:\n- Upload pypi package by `python3 -m twine upload dist/<Package Name>` for all wheels.\n- Check the uploaded files on `https://pypi.org/project/xgboost/<VERSION>/#files` and\n  `pip install xgboost==<VERSION>` \"\"\")\n\n\ndef download_r_artifacts(\n    release: str, branch: str, commit: str, outdir: Path\n) -> Tuple[Dict[str, str], List[str]]:\n    \"\"\"Download R package artifacts for the specified release and branch.\"\"\"\n    platforms = [\"linux\"]\n    rpkg_dir = outdir / \"r-packages\"\n    rpkg_dir.mkdir(exist_ok=True)\n\n    artifacts = []\n    urls = {}\n\n    # https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds/release_3.0.0/4bfd4bf60d32e2d62426cc4070ccb5a5ba1ed078/xgboost_r_gpu_linux.tar.gz\n    for plat in platforms:\n        url = f\"{S3_BUCKET_URL}/{branch}/{commit}/xgboost_r_gpu_{plat}.tar.gz\"\n        artifact_name = f\"xgboost_r_gpu_{plat}.tar.gz\"\n        artifact_path = rpkg_dir / artifact_name\n        retrieve(url=url, filename=artifact_path)\n        artifacts.append(artifact_path)\n        urls[plat] = url\n\n    print(f\"Finished downloading R package artifacts: {artifacts}\")\n    hashes = []\n    with DirectoryExcursion(rpkg_dir):\n        for f in artifacts:\n            result = subprocess.run(\n                [\"sha256sum\", f.name],\n                check=True,\n                capture_output=True,\n                text=True,\n                encoding=\"utf-8\",\n            )\n            hashes.append(result.stdout.strip())\n    return urls, hashes\n\n\ndef check_path() -> None:\n    \"\"\"Ensure the script is run from the project root directory.\"\"\"\n    current_dir = Path.cwd().resolve()\n    if current_dir.name != \"xgboost\":\n        raise RuntimeError(\"Must be run from the project root directory.\")\n\n\ndef make_src_tarball(release: str, outdir: Path) -> Tuple[str, str]:\n    tarball_name = f\"xgboost-src-{release}.tar.gz\"\n    tarball_path = outdir / tarball_name\n    if tarball_path.exists():\n        tarball_path.unlink()\n\n    with tempfile.TemporaryDirectory() as tmpdir_str:\n        tmpdir = Path(tmpdir_str)\n        shutil.copytree(Path.cwd(), tmpdir / \"xgboost\")\n        with DirectoryExcursion(tmpdir / \"xgboost\"):\n            result = subprocess.run(\n                [\"git\", \"submodule\", \"foreach\", \"--quiet\", \"echo $sm_path\"],\n                check=True,\n                capture_output=True,\n                text=True,\n                encoding=\"utf-8\",\n            )\n            submodules = result.stdout.strip().split()\n            for mod in submodules:\n                mod_path = Path.cwd().resolve() / mod / \".git\"\n                mod_path.unlink()\n            shutil.rmtree(\".git\")\n            with tarfile.open(tarball_path, \"x:gz\") as tar:\n                tar.add(tmpdir / \"xgboost\", arcname=\"xgboost\")\n\n    with DirectoryExcursion(tarball_path.parent):\n        result = subprocess.run(\n            [\"sha256sum\", tarball_name],\n            check=True,\n            capture_output=True,\n            text=True,\n            encoding=\"utf-8\",\n        )\n        sha256sum = result.stdout.strip()\n    return tarball_name, sha256sum\n\n\ndef release_note(\n    release: str,\n    artifact_hashes: List[str],\n    r_urls: Dict[str, str],\n    tarball_name: str,\n    outdir: Path,\n) -> None:\n    \"\"\"Generate a note for GitHub release description.\"\"\"\n    r_gpu_linux_url = r_urls[\"linux\"]\n    src_tarball = (\n        f\"https://github.com/dmlc/xgboost/releases/download/v{release}/{tarball_name}\"\n    )\n    hash_note = \"\\n\".join(artifact_hashes)\n\n    end_note = f\"\"\"\n### Additional artifacts:\n\nYou can verify the downloaded packages by running the following command on your Unix shell:\n\n``` sh\necho \"<hash> <artifact>\" | shasum -a 256 --check\n```\n\n```\n{hash_note}\n```\n\n**Experimental binary packages for R with CUDA enabled**\n* xgboost_r_gpu_linux_{release}.tar.gz: [Download]({r_gpu_linux_url})\n\n**Source tarball**\n* {tarball_name}: [Download]({src_tarball})\"\"\"\n    print(end_note)\n    with open(outdir / \"end_note.md\", \"w\") as f:\n        f.write(end_note)\n\n\ndef main(args: argparse.Namespace) -> None:\n    check_path()\n\n    release_parsed: version.Version = version.parse(args.release)\n    print(f\"Release: {release_parsed}\")\n\n    major = release_parsed.major\n    minor = release_parsed.minor\n    patch = release_parsed.micro\n    if not release_parsed.is_prerelease:\n        # Major release\n        rc: Optional[str] = None\n        rc_ver: Optional[int] = None\n    else:\n        # RC release\n        assert release_parsed.pre is not None\n        rc, rc_ver = release_parsed.pre\n        if rc != \"rc\":\n            raise ValueError(\n                \"Only supports release candidates with 'rc' in the version string\"\n            )\n\n    # Release string with only major, minor, patch components\n    release = f\"{major}.{minor}.{patch}\"\n    if args.branch is not None:\n        branch = args.branch\n    else:\n        branch = f\"release_{major}.{minor}.0\"\n\n    git.clean(\"-xdf\")\n    git.checkout(branch)\n    git.pull(\"origin\", branch)\n    git.submodule(\"update\")\n    commit_hash = latest_hash()\n\n    outdir = Path(args.outdir).resolve()\n    if ROOT in outdir.parents:\n        raise ValueError(\"Output directory must be outside of the source tree.\")\n    outdir.mkdir(exist_ok=True)\n\n    artifact_hashes: List[str] = []\n\n    # Source tarball\n    tarball_name, hash = make_src_tarball(release, outdir)\n    artifact_hashes.append(hash)\n\n    # CUDA R packages\n    urls, hashes = download_r_artifacts(\n        release,\n        branch,\n        commit_hash,\n        outdir,\n    )\n    artifact_hashes.extend(hashes)\n\n    # Python source wheel\n    make_python_sdist(release, rc, rc_ver, outdir)\n\n    # Python binary wheels\n    download_python_wheels(branch, commit_hash, outdir)\n\n    # Write end note\n    release_note(release, artifact_hashes, urls, tarball_name, outdir)\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\n        \"--release\",\n        type=str,\n        required=True,\n        help=\"Version tag, e.g. '1.3.2', or '1.5.0rc1'\",\n    )\n    parser.add_argument(\n        \"--branch\",\n        type=str,\n        default=None,\n        help=(\n            \"Optional branch. Usually patch releases reuse the same branch of the\"\n            \" major release, but there can be exception.\"\n        ),\n    )\n    parser.add_argument(\n        \"--outdir\",\n        type=str,\n        default=None,\n        required=True,\n        help=\"Directory to store the generated packages.\",\n    )\n    args = parser.parse_args()\n    main(args)\n"
  },
  {
    "path": "ops/script/run_clang_tidy.py",
    "content": "#!/usr/bin/env python\nfrom __future__ import annotations\n\nimport argparse\nimport json\nimport os\nimport re\nimport shutil\nimport subprocess\nimport sys\nfrom multiprocessing import Pool, cpu_count\nfrom time import time\n\n\ndef call(args: list[str]) -> tuple[int, int, str, list[str]]:\n    \"\"\"Subprocess run wrapper.\"\"\"\n    completed = subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n    error_msg = completed.stdout.decode(\"utf-8\")\n    # `workspace` is a name used in the CI container.  Normally we should keep the dir\n    # as `xgboost`.\n    matched = re.search(\n        \"(workspace|xgboost)/.*(ops|src|tests|include)/.*warning:\",\n        error_msg,\n        re.MULTILINE,\n    )\n\n    if matched is None:\n        return_code = 0\n    else:\n        return_code = 1\n    return (completed.returncode, return_code, error_msg, args)\n\n\nclass ClangTidy:\n    \"\"\"clang tidy wrapper.\n    Args:\n      args:  Command line arguments.\n          cpp_lint: Run linter on C++ source code.\n          cuda_lint: Run linter on CUDA source code.\n          use_dmlc_gtest: Whether to use gtest bundled in dmlc-core.\n    \"\"\"\n\n    def __init__(self, args: argparse.Namespace) -> None:\n        self.cpp_lint = args.cpp\n        self.cuda_lint = args.cuda\n        self.use_dmlc_gtest: bool = args.use_dmlc_gtest\n        self.cuda_archs = args.cuda_archs.copy() if args.cuda_archs else []\n\n        if args.tidy_version:\n            self.exe = \"clang-tidy-\" + str(args.tidy_version)\n        else:\n            self.exe = \"clang-tidy\"\n\n        print(\"Run linter on CUDA: \", self.cuda_lint)\n        print(\"Run linter on C++:\", self.cpp_lint)\n        print(\"Use dmlc gtest:\", self.use_dmlc_gtest)\n        print(\"CUDA archs:\", \" \".join(self.cuda_archs))\n\n        if not self.cpp_lint and not self.cuda_lint:\n            raise ValueError(\"Both --cpp and --cuda are set to 0.\")\n        self.root_path = os.path.abspath(os.path.curdir)\n        print(\"Project root:\", self.root_path)\n        self.cdb_path = os.path.join(self.root_path, \"cdb\")\n\n    def __enter__(self) -> \"ClangTidy\":\n        self.start = time()\n        if os.path.exists(self.cdb_path):\n            shutil.rmtree(self.cdb_path)\n        self._generate_cdb()\n        return self\n\n    def __exit__(self, *args: list) -> None:\n        if os.path.exists(self.cdb_path):\n            shutil.rmtree(self.cdb_path)\n        self.end = time()\n        print(\"Finish running clang-tidy:\", self.end - self.start)\n\n    def _generate_cdb(self) -> None:\n        \"\"\"Run CMake to generate compilation database.\"\"\"\n        os.mkdir(self.cdb_path)\n        os.chdir(self.cdb_path)\n        cmake_args = [\n            \"cmake\",\n            self.root_path,\n            \"-GNinja\",  # prevents cmake from using --option-files for include path.\n            \"-DCMAKE_EXPORT_COMPILE_COMMANDS=ON\",\n            \"-DGOOGLE_TEST=ON\",\n            \"-DCMAKE_CXX_FLAGS='-Wno-clang-diagnostic-deprecated-declarations'\",\n        ]\n        if self.use_dmlc_gtest:\n            cmake_args.append(\"-DUSE_DMLC_GTEST=ON\")\n        else:\n            cmake_args.append(\"-DUSE_DMLC_GTEST=OFF\")\n\n        if self.cuda_lint:\n            cmake_args.extend([\"-DUSE_CUDA=ON\", \"-DUSE_NCCL=ON\"])\n            if self.cuda_archs:\n                arch_list = \";\".join(self.cuda_archs)\n                cmake_args.append(f\"-DCMAKE_CUDA_ARCHITECTURES={arch_list}\")\n        subprocess.run(cmake_args)\n        os.chdir(self.root_path)\n\n    def convert_nvcc_command_to_clang(self, command: str) -> str:\n        \"\"\"Convert nvcc flags to corresponding clang flags.\"\"\"\n        components = command.split()\n        compiler: str = components[0]\n        if compiler.find(\"nvcc\") != -1:\n            compiler = \"clang++\"\n            components[0] = compiler\n        # check each component in a command\n        converted_components = [compiler]\n\n        for i in range(1, len(components)):\n            if components[i] == \"-lineinfo\":\n                continue\n            elif components[i] == \"-fuse-ld=gold\":\n                continue\n            elif components[i] == \"-fuse-ld=lld\":\n                continue\n            elif components[i].find(\"--default-stream\") != -1:\n                continue\n            elif components[i] == \"-rdynamic\":\n                continue\n            elif components[i] == \"-Xfatbin=-compress-all\":\n                continue\n            elif components[i] == \"-forward-unknown-to-host-compiler\":\n                continue\n            elif components[i] == \"-x\" and components[i + 1] == \"cu\":\n                # -x cu -> -x cuda\n                converted_components.append(\"-x\")\n                converted_components.append(\"cuda\")\n                components[i + 1] = \"\"\n                continue\n            elif components[i].find(\"-Xcompiler\") != -1:\n                continue\n            elif components[i].find(\"--expt-\") != -1:\n                continue\n            elif components[i].find(\"-ccbin\") != -1:\n                continue\n            elif components[i].find(\"--generate-code\") != -1:\n                keyword = \"code=sm\"\n                pos = components[i].find(keyword)\n                capability = components[i][\n                    pos + len(keyword) + 1 : pos + len(keyword) + 3\n                ]\n                if pos != -1:\n                    converted_components.append(\"--cuda-gpu-arch=sm_\" + capability)\n            elif components[i].find(\"--std=c++14\") != -1:\n                converted_components.append(\"-std=c++14\")\n            elif components[i].startswith(\"-isystem=\"):\n                converted_components.extend(components[i].split(\"=\"))\n            else:\n                converted_components.append(components[i])\n\n        converted_components.append(\"-isystem /usr/local/cuda/include/\")\n\n        command = \"\"\n        for c in converted_components:\n            command = command + \" \" + c\n        command = command.strip()\n        return command\n\n    def _configure_flags(self, path: str, command: str) -> list[list[str]]:\n        src = os.path.join(self.root_path, \"src\").replace(\"/\", \"\\\\/\")\n        include = os.path.join(self.root_path, \"include\").replace(\"/\", \"\\\\/\")\n\n        header_filter = \"(\" + src + \"|\" + include + \")\"\n        common_args = [\n            self.exe,\n            path,\n            \"--header-filter=\" + header_filter,\n            \"--config-file=\" + self.tidy_file,\n        ]\n        common_args.append(\"--\")\n        command = self.convert_nvcc_command_to_clang(command)\n\n        command_split = command.split()[1:]  # remove clang/c++/g++\n        if \"-c\" in command_split:\n            index = command_split.index(\"-c\")\n            del command_split[index + 1]\n            command_split.remove(\"-c\")\n        if \"-o\" in command_split:\n            index = command_split.index(\"-o\")\n            del command_split[index + 1]\n            command_split.remove(\"-o\")\n\n        common_args.extend(command_split)\n\n        # Two passes, one for device code another for host code.\n        if path.endswith(\"cu\"):\n            args = [common_args.copy(), common_args.copy()]\n            args[0].append(\"--cuda-host-only\")\n            args[1].append(\"--cuda-device-only\")\n        else:\n            args = [common_args.copy()]\n        for a in args:\n            a.append(\"-Wno-unused-command-line-argument\")\n        return args\n\n    def _configure(self) -> list[list[str]]:\n        \"\"\"Load and configure compile_commands and clang_tidy.\"\"\"\n\n        def should_lint(path: str) -> bool:\n            if not self.cpp_lint and path.endswith(\".cc\"):\n                return False\n            isxgb = path.find(\"dmlc-core\") == -1\n            isxgb = isxgb and (not path.startswith(self.cdb_path))\n            if isxgb:\n                print(path)\n                return True\n            return False\n\n        cdb_file = os.path.join(self.cdb_path, \"compile_commands.json\")\n        with open(cdb_file, \"r\") as fd:\n            self.compile_commands = json.load(fd)\n\n        self.tidy_file = os.path.join(self.root_path, \".clang-tidy\")\n        all_files = []\n        for entry in self.compile_commands:\n            path = entry[\"file\"]\n            if should_lint(path):\n                args = self._configure_flags(path, entry[\"command\"])\n                all_files.extend(args)\n        return all_files\n\n    def run(self) -> bool:\n        \"\"\"Run clang-tidy.\"\"\"\n        all_files = self._configure()\n        passed = True\n        BAR = \"-\" * 32\n        with Pool(cpu_count()) as pool:\n            results = pool.map(call, all_files)\n            for i, (process_status, tidy_status, msg, args) in enumerate(results):\n                # Don't enforce clang-tidy to pass for now due to namespace\n                # for cub in thrust is not correct.\n                if tidy_status == 1:\n                    passed = False\n                    print(\n                        BAR,\n                        \"\\n\" \"Command args:\",\n                        \" \".join(args),\n                        \", \",\n                        \"Process return code:\",\n                        process_status,\n                        \", \",\n                        \"Tidy result code:\",\n                        tidy_status,\n                        \", \",\n                        \"Message:\\n\",\n                        msg,\n                        BAR,\n                        \"\\n\",\n                    )\n        if not passed:\n            print(\n                \"Errors in `thrust` namespace can be safely ignored.\",\n                \"Please address rest of the clang-tidy warnings.\",\n            )\n        return passed\n\n\ndef test_tidy(args: argparse.Namespace) -> None:\n    \"\"\"See if clang-tidy and our regex is working correctly.  There are many subtleties\n    we need to be careful. Tests here are not thorough, at least we want to guarantee\n    tidy is not missing anything on the CI.\n\n    \"\"\"\n    root_path = os.path.abspath(os.path.curdir)\n    tidy_file = os.path.join(root_path, \".clang-tidy\")\n    test_file_path = os.path.join(root_path, \"ops\", \"script\", \"test_tidy.cc\")\n\n    tidy_config = \"--config-file=\" + tidy_file\n    if not args.tidy_version:\n        tidy = \"clang-tidy\"\n    else:\n        tidy = \"clang-tidy-\" + str(args.tidy_version)\n    cmd = [tidy, tidy_config, test_file_path]\n    proc_code, tidy_status, error_msg, _ = call(cmd)\n    if proc_code != 0 or tidy_status != 1:\n        raise RuntimeError(error_msg)\n    print(\"clang-tidy is working.\")\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(description=\"Run clang-tidy.\")\n    parser.add_argument(\"--cpp\", type=int, default=1)\n    parser.add_argument(\n        \"--tidy-version\",\n        type=int,\n        default=None,\n        help=\"Specify the version of preferred clang-tidy.\",\n    )\n    parser.add_argument(\"--cuda\", type=int, default=1)\n    parser.add_argument(\n        \"--use-dmlc-gtest\",\n        action=\"store_true\",\n        help=\"Whether to use gtest bundled in dmlc-core.\",\n    )\n    parser.add_argument(\n        \"--cuda-archs\", action=\"append\", help=\"List of CUDA archs to build\"\n    )\n    args = parser.parse_args()\n\n    test_tidy(args)\n\n    with ClangTidy(args) as linter:\n        passed = linter.run()\n    if not passed:\n        sys.exit(1)\n"
  },
  {
    "path": "ops/script/setup_r_sccache.sh",
    "content": "#!/bin/bash\n# Configure R to use sccache for compiling packages.\n# This script creates ~/.R/Makevars with sccache compiler wrappers.\n\nset -euo pipefail\n\nif [ -f ~/.R/Makevars ]; then\n    echo \"Error: ~/.R/Makevars already exists. Aborting to avoid overwriting.\"\n    exit 1\nfi\n\nmkdir -p ~/.R\ncat > ~/.R/Makevars << 'EOF'\nCC = sccache gcc\nCXX = sccache g++\nCXX11 = sccache g++\nCXX14 = sccache g++\nCXX17 = sccache g++\nCXX20 = sccache g++\nEOF\n\necho \"Configured R to use sccache via ~/.R/Makevars\"\n"
  },
  {
    "path": "ops/script/test_r_package.py",
    "content": "\"\"\"Utilities for packaging R code and running tests.\"\"\"\n\nimport argparse\nimport os\nimport shutil\nimport subprocess\nfrom io import StringIO\nfrom pathlib import Path\nfrom platform import system\n\ntry:\n    import pandas as pd\nexcept ImportError:\n    pd = None\n\nfrom test_utils import R_PACKAGE, ROOT, DirectoryExcursion, cd, print_time, record_time\n\n\ndef get_mingw_bin() -> str:\n    return os.path.join(\"c:/rtools40/mingw64/\", \"bin\")\n\n\n@cd(ROOT)\n@record_time\ndef pack_rpackage() -> Path:\n    \"\"\"Compose the directory used for creating R package tar ball.\"\"\"\n    dest = Path(\"xgboost\")\n\n    def pkgroot(path: str) -> None:\n        \"\"\"Change makefiles according to the package layout.\"\"\"\n        with open(Path(\"R-package\") / \"src\" / path, \"r\") as fd:\n            makefile = fd.read()\n            makefile = makefile.replace(\"PKGROOT=../../\", \"PKGROOT=.\", 1)\n        with open(dest / \"src\" / path, \"w\") as fd:\n            fd.write(makefile)\n\n    output = subprocess.run([\"git\", \"clean\", \"-xdf\", \"--dry-run\"], capture_output=True)\n    if output.returncode != 0:\n        raise ValueError(\"Failed to check git repository status.\", output)\n    if len(output.stdout) == 0:\n        would_remove = None\n    else:\n        would_remove = output.stdout.decode(\"utf-8\").strip().split(\"\\n\")\n\n    if would_remove and not all(f.find(\"ops\") != -1 for f in would_remove):\n        raise ValueError(\n            \"\\n\".join(would_remove) + \"\\nPlease cleanup the working git repository.\"\n        )\n\n    shutil.copytree(\"R-package\", dest)\n    os.remove(dest / \"bootstrap.R\")\n    # core\n    shutil.copytree(\"src\", dest / \"src\" / \"src\")\n    shutil.copytree(\"include\", dest / \"src\" / \"include\")\n    shutil.copytree(\"amalgamation\", dest / \"src\" / \"amalgamation\")\n    # dmlc-core\n    dmlc_core = Path(\"dmlc-core\")\n    os.mkdir(dest / \"src\" / dmlc_core)\n    shutil.copytree(dmlc_core / \"include\", dest / \"src\" / \"dmlc-core\" / \"include\")\n    shutil.copytree(dmlc_core / \"src\", dest / \"src\" / \"dmlc-core\" / \"src\")\n    # makefile & license\n    shutil.copyfile(\"LICENSE\", dest / \"LICENSE\")\n    osxmakef = dest / \"src\" / \"Makevars.win-e\"\n    if os.path.exists(osxmakef):\n        os.remove(osxmakef)\n    pkgroot(\"Makevars.in\")\n    pkgroot(\"Makevars.win.in\")\n    # misc\n    rwsp = Path(\"R-package\") / \"remove_warning_suppression_pragma.sh\"\n    if system() != \"Windows\":\n        subprocess.check_call(rwsp)\n    rwsp = dest / \"remove_warning_suppression_pragma.sh\"\n    if system() != \"Windows\":\n        subprocess.check_call(rwsp)\n    os.remove(rwsp)\n    os.remove(dest / \"CMakeLists.txt\")\n    shutil.rmtree(dest / \"tests\" / \"helper_scripts\")\n    return dest\n\n\n@cd(ROOT)\n@record_time\ndef build_rpackage(path: str) -> str:\n    def find_tarball() -> str:\n        found = []\n        for root, subdir, files in os.walk(\".\"):\n            for f in files:\n                if f.endswith(\".tar.gz\") and f.startswith(\"xgboost\"):\n                    found.append(os.path.join(root, f))\n        if not found:\n            raise ValueError(\"Failed to find output tar ball.\")\n        if len(found) > 1:\n            raise ValueError(\"Found more than one packages:\", found)\n        return found[0]\n\n    env = os.environ.copy()\n    print(\"Ncpus:\", f\"{os.cpu_count()}\")\n    env.update({\"MAKEFLAGS\": f\"-j{os.cpu_count()}\"})\n    subprocess.check_call([R, \"CMD\", \"build\", path], env=env)\n\n    tarball = find_tarball()\n    return tarball\n\n\ndef check_example_timing(rcheck_dir: Path, threshold: float) -> None:\n    with open(rcheck_dir / \"xgboost-Ex.timings\", \"r\") as fd:\n        timings = fd.readlines()\n        newlines = []\n        for line in timings:\n            line = line.strip()\n            newlines.append(line)\n        con_timings = \"\\n\".join(newlines)\n        df = pd.read_csv(StringIO(con_timings), delimiter=\"\\t\")\n        ratio_n = \"user/elapsed\"\n        df[ratio_n] = df[\"user\"] / df[\"elapsed\"]\n        offending = df[df[ratio_n] > threshold]\n\n    try:\n        # requires the tabulate package\n        df.to_markdown(\"timings.md\")\n        offending.to_markdown(\"offending.md\")\n    except ImportError:\n        print(\"failed to export markdown files.\")\n        pass\n\n    if offending.shape[0] == 0:\n        return\n\n    print(offending)\n    raise ValueError(\"There are examples using too many threads\")\n\n\n@cd(ROOT)\n@record_time\ndef check_rpackage(path: str) -> None:\n    env = os.environ.copy()\n    print(\"Ncpus:\", f\"{os.cpu_count()}\")\n    threshold = 2.5\n    env.update(\n        {\n            \"MAKEFLAGS\": f\"-j{os.cpu_count()}\",\n            # cran specific environment variables\n            \"_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_\": str(threshold),\n            \"_R_CHECK_TEST_TIMING_CPU_TO_ELAPSED_THRESHOLD_\": str(threshold),\n            \"_R_CHECK_VIGNETTE_TIMING_CPU_TO_ELAPSED_THRESHOLD_\": str(threshold),\n        }\n    )\n\n    # Actually we don't run this check on windows due to dependency issue.\n    if system() == \"Windows\":\n        # make sure compiler from rtools is used.\n        mingw_bin = get_mingw_bin()\n        CXX = os.path.join(mingw_bin, \"g++.exe\")\n        CC = os.path.join(mingw_bin, \"gcc.exe\")\n        env.update({\"CC\": CC, \"CXX\": CXX})\n\n    status = subprocess.run(\n        [R, \"CMD\", \"check\", \"--as-cran\", \"--timings\", path], env=env\n    )\n    rcheck_dir = Path(\"xgboost.Rcheck\")\n    with open(rcheck_dir / \"00check.log\", \"r\") as fd:\n        check_log = fd.read()\n\n    with open(rcheck_dir / \"00install.out\", \"r\") as fd:\n        install_log = fd.read()\n\n    msg = f\"\"\"\n----------------------- Install ----------------------\n{install_log}\n\n-----------------------  Check -----------------------\n{check_log}\n\n    \"\"\"\n\n    if status.returncode != 0:\n        print(msg)\n        raise ValueError(\"Failed r package check.\")\n\n    if check_log.find(\"WARNING\") != -1:\n        print(msg)\n        raise ValueError(\"Has unresolved warnings.\")\n    if check_log.find(\"Examples with CPU time\") != -1:\n        print(msg)\n        raise ValueError(\"Suspicious NOTE.\")\n    if pd is not None:\n        check_example_timing(rcheck_dir, threshold)\n\n\n@cd(R_PACKAGE)\n@record_time\ndef check_rmarkdown() -> None:\n    assert system() != \"Windows\", \"Document test doesn't support Windows.\"\n    env = os.environ.copy()\n    env.update({\"MAKEFLAGS\": f\"-j{os.cpu_count()}\"})\n    print(\"Checking R documentation.\")\n    bin_dir = os.path.dirname(R)\n    rscript = os.path.join(bin_dir, \"Rscript\")\n    subprocess.check_call([rscript, \"-e\", \"roxygen2::roxygenize()\"], env=env)\n    output = subprocess.run([\"git\", \"diff\", \"--name-only\"], capture_output=True)\n    if len(output.stdout.decode(\"utf-8\").strip()) != 0:\n        output = subprocess.run([\"git\", \"diff\"], capture_output=True)\n        raise ValueError(\n            \"Please run `roxygen2::roxygenize()`. Diff:\\n\",\n            output.stdout.decode(\"utf-8\"),\n        )\n\n\n@cd(R_PACKAGE)\n@record_time\ndef test_with_autotools() -> None:\n    \"\"\"Windows only test. No `--as-cran` check, only unittests. We don't want to manage\n    the dependencies on Windows machine.\n\n    \"\"\"\n    assert system() == \"Windows\"\n    mingw_bin = get_mingw_bin()\n    CXX = os.path.join(mingw_bin, \"g++.exe\")\n    CC = os.path.join(mingw_bin, \"gcc.exe\")\n    cmd = [R, \"CMD\", \"INSTALL\", str(os.path.curdir)]\n    env = os.environ.copy()\n    env.update({\"CC\": CC, \"CXX\": CXX, \"MAKEFLAGS\": f\"-j{os.cpu_count()}\"})\n    subprocess.check_call(cmd, env=env)\n    subprocess.check_call(\n        [\"R.exe\", \"-q\", \"-e\", \"library(testthat); setwd('tests'); source('testthat.R')\"]\n    )\n\n\n@record_time\ndef test_with_cmake(args: argparse.Namespace) -> None:\n    os.mkdir(\"build\")\n    with DirectoryExcursion(\"build\"):\n        if args.compiler == \"mingw\":\n            mingw_bin = get_mingw_bin()\n            CXX = os.path.join(mingw_bin, \"g++.exe\")\n            CC = os.path.join(mingw_bin, \"gcc.exe\")\n            env = os.environ.copy()\n            env.update({\"CC\": CC, \"CXX\": CXX})\n            subprocess.check_call(\n                [\n                    \"cmake\",\n                    os.path.pardir,\n                    \"-DUSE_OPENMP=ON\",\n                    \"-DR_LIB=ON\",\n                    \"-DCMAKE_CONFIGURATION_TYPES=Release\",\n                    \"-G\",\n                    \"Unix Makefiles\",\n                ],\n                env=env,\n            )\n            subprocess.check_call([\"make\", \"-j\", \"install\"])\n        elif args.compiler == \"msvc\":\n            subprocess.check_call(\n                [\n                    \"cmake\",\n                    os.path.pardir,\n                    \"-DUSE_OPENMP=ON\",\n                    \"-DR_LIB=ON\",\n                    \"-DCMAKE_CONFIGURATION_TYPES=Release\",\n                    \"-A\",\n                    \"x64\",\n                    \"-G\",\n                    \"Visual Studio 17 2022\",\n                ]\n            )\n            subprocess.check_call(\n                [\n                    \"cmake\",\n                    \"--build\",\n                    os.path.curdir,\n                    \"--target\",\n                    \"install\",\n                    \"--config\",\n                    \"Release\",\n                ]\n            )\n        elif args.compiler == \"none\":\n            subprocess.check_call(\n                [\n                    \"cmake\",\n                    os.path.pardir,\n                    \"-DUSE_OPENMP=ON\",\n                    \"-DR_LIB=ON\",\n                    \"-DCMAKE_CONFIGURATION_TYPES=Release\",\n                    \"-G\",\n                    \"Unix Makefiles\",\n                ]\n            )\n            subprocess.check_call([\"make\", \"-j\", \"install\"])\n        else:\n            raise ValueError(\"Wrong compiler\")\n    with DirectoryExcursion(R_PACKAGE):\n        subprocess.check_call(\n            [\n                R,\n                \"-q\",\n                \"-e\",\n                \"library(testthat); setwd('tests'); source('testthat.R')\",\n            ]\n        )\n\n\n@record_time\ndef test_with_rchk() -> None:\n    \"\"\"Test with rchk, which is one of the additional checks in CRAN.\n\n    See https://github.com/kalibera/rchk/blob/master/doc/DOCKER.md for reference.\n\n    \"\"\"\n    results_dir = os.path.join(ROOT, \"rchk_results\")\n    if os.path.exists(results_dir):\n        raise ValueError(f\"{results_dir} exists, please remove it first.\")\n    src_dir = pack_rpackage()\n    tarball = build_rpackage(src_dir)\n\n    os.mkdir(results_dir)\n    shutil.copyfile(tarball, os.path.join(results_dir, tarball))\n\n    tarball = os.path.basename(tarball)\n    pkgpath = os.path.join(\"/rchk/packages/\", tarball)\n    image = \"kalibera/rchk:latest\"\n    cmd = [\n        \"docker\",\n        \"run\",\n        \"--rm\",\n        \"--mount\",\n        f\"type=bind,src={results_dir},dst=/rchk/packages\",\n        image,\n        pkgpath,\n    ]\n    subprocess.check_call(cmd)\n\n\n@record_time\ndef main(args: argparse.Namespace) -> None:\n    match args.task:\n        case \"pack\":\n            pack_rpackage()\n        case \"build\":\n            src_dir = pack_rpackage()\n            build_rpackage(src_dir)\n        case \"doc\":\n            check_rmarkdown()\n        case \"check\":\n            if args.build_tool == \"autotools\" and system() != \"Windows\":\n                src_dir = pack_rpackage()\n                tarball = build_rpackage(src_dir)\n                check_rpackage(tarball)\n            elif args.build_tool == \"autotools\":\n                test_with_autotools()\n            else:\n                test_with_cmake(args)\n        case \"rchk\":\n            test_with_rchk()\n        case \"timings\":\n            check_example_timing(Path(\"xgboost.Rcheck\"), 2.5)\n        case _:\n            raise ValueError(\"Unexpected task.\")\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(\n        description=(\n            \"Helper script for making R package and running R tests on CI. There are\"\n            \" also other helper scripts in the R tests directory for installing\"\n            \" dependencies and running linter.\"\n        )\n    )\n    parser.add_argument(\n        \"--task\",\n        type=str,\n        choices=[\"pack\", \"build\", \"check\", \"doc\", \"timings\", \"rchk\"],\n        default=\"check\",\n        required=False,\n    )\n    parser.add_argument(\n        \"--compiler\",\n        type=str,\n        choices=[\"mingw\", \"msvc\", \"none\"],\n        help=\"Compiler used for compiling CXX code. Only relevant for windows build\",\n        default=\"none\",\n        required=False,\n    )\n    parser.add_argument(\n        \"--build-tool\",\n        type=str,\n        choices=[\"cmake\", \"autotools\"],\n        help=\"Build tool for compiling CXX code and install R package.\",\n        default=\"autotools\",\n        required=False,\n    )\n    parser.add_argument(\n        \"--r\",\n        type=str,\n        default=\"R\" if system() != \"Windows\" else \"R.exe\",\n        help=\"Path to the R executable.\",\n    )\n    args = parser.parse_args()\n    R = args.r\n\n    try:\n        main(args)\n    finally:\n        print_time()\n"
  },
  {
    "path": "ops/script/test_tidy.cc",
    "content": "#include <iostream>\n#include <vector>\n\nstruct Foo {\n  int bar_;\n};\n\nint main() {\n  std::vector<Foo> values;\n  values.push_back(Foo());\n}\n"
  },
  {
    "path": "ops/script/test_utils.py",
    "content": "\"\"\"Utilities for the CI.\"\"\"\n\nimport os\nfrom datetime import datetime, timedelta\nfrom functools import wraps\nfrom typing import Any, Callable, Dict, TypedDict, TypeVar, Union\n\n\nclass DirectoryExcursion:\n    def __init__(self, path: Union[os.PathLike, str]) -> None:\n        self.path = path\n        self.curdir = os.path.normpath(os.path.abspath(os.path.curdir))\n\n    def __enter__(self) -> None:\n        os.chdir(self.path)\n\n    def __exit__(self, *args: Any) -> None:\n        os.chdir(self.curdir)\n\n\nR = TypeVar(\"R\")\n\n\ndef cd(path: Union[os.PathLike, str]) -> Callable:\n    \"\"\"Decorator for changing directory temporarily.\"\"\"\n\n    def chdir(func: Callable[..., R]) -> Callable[..., R]:\n        @wraps(func)\n        def inner(*args: Any, **kwargs: Any) -> R:\n            with DirectoryExcursion(path):\n                return func(*args, **kwargs)\n\n        return inner\n\n    return chdir\n\n\nRecord = TypedDict(\"Record\", {\"count\": int, \"total\": timedelta})\ntimer: Dict[str, Record] = {}\n\n\ndef record_time(func: Callable[..., R]) -> Callable[..., R]:\n    \"\"\"Decorator for recording function runtime.\"\"\"\n    global timer\n\n    @wraps(func)\n    def inner(*args: Any, **kwargs: Any) -> R:\n        if func.__name__ not in timer:\n            timer[func.__name__] = {\"count\": 0, \"total\": timedelta(0)}\n        s = datetime.now()\n        try:\n            r = func(*args, **kwargs)\n        finally:\n            e = datetime.now()\n            timer[func.__name__][\"count\"] += 1\n            timer[func.__name__][\"total\"] += e - s\n        return r\n\n    return inner\n\n\ndef print_time() -> None:\n    \"\"\"Print all recorded items by :py:func:`record_time`.\"\"\"\n    global timer\n    for k, v in timer.items():\n        print(\n            \"Name:\",\n            k,\n            \"Called:\",\n            v[\"count\"],\n            \"Elapsed:\",\n            f\"{v['total'].seconds} secs\",\n        )\n\n\nROOT = os.path.normpath(\n    os.path.join(\n        os.path.dirname(os.path.abspath(__file__)), os.path.pardir, os.path.pardir\n    )\n)\nR_PACKAGE = os.path.join(ROOT, \"R-package\")\nJVM_PACKAGES = os.path.join(ROOT, \"jvm-packages\")\nPY_PACKAGE = os.path.join(ROOT, \"python-package\")\n"
  },
  {
    "path": "ops/script/type_check_python.py",
    "content": "import os\nimport subprocess\nimport sys\nfrom typing import List\n\nfrom test_utils import PY_PACKAGE, ROOT, cd, print_time, record_time\n\n\nclass TypeCheckPaths:\n    \"\"\"The paths mypy runs on.\"\"\"\n\n    MYPY = (\n        # core\n        \"python-package/\",\n        # tests\n        \"tests/python/generate_models.py\",\n        \"tests/python/test_model_compatibility.py\",\n        \"tests/python/test_collective.py\",\n        \"tests/python/test_demos.py\",\n        \"tests/python/test_data_iterator.py\",\n        \"tests/python/test_multi_target.py\",\n        \"tests/python/test_intercept.py\",\n        \"tests/python/test_model_io.py\",\n        \"tests/python/test_ordinal.py\",\n        \"tests/python/test_interaction_constraints.py\",\n        \"tests/python-gpu/\",\n        \"tests/test_distributed/test_federated/\",\n        \"tests/test_distributed/test_gpu_federated/\",\n        \"tests/test_distributed/test_with_dask/\",\n        \"tests/test_distributed/test_with_spark/test_data.py\",\n        \"tests/test_distributed/test_gpu_with_dask/\",\n        # demo\n        \"demo/dask/\",\n        \"demo/guide-python/\",\n        \"demo/aft_survival/aft_survival_viz_demo.py\",\n        # CI\n        \"ops/\",\n    )\n\n\ndef check_cmd_print_failure_assistance(cmd: List[str]) -> bool:\n    if subprocess.run(cmd).returncode == 0:\n        return True\n\n    subprocess.run([cmd[0], \"--version\"])\n    msg = \"\"\"\nPlease run the following command on your machine to address the error:\n\n    \"\"\"\n    msg += \" \".join(cmd)\n    print(msg, file=sys.stderr)\n    return False\n\n\n@record_time\n@cd(PY_PACKAGE)\ndef run_mypy(rel_path: str) -> bool:\n    cmd = [\"mypy\", os.path.join(ROOT, rel_path)]\n    return check_cmd_print_failure_assistance(cmd)\n\n\n@record_time\ndef main() -> None:\n    mypy_results = [run_mypy(path) for path in TypeCheckPaths.MYPY]\n    if not all(mypy_results):\n        sys.exit(-1)\n\n\nif __name__ == \"__main__\":\n    try:\n        main()\n    finally:\n        print_time()\n"
  },
  {
    "path": "ops/script/verify_link.sh",
    "content": "# Make sure the dependencies of XGBoost don't appear in directly downstream project.\n# Pass the executable as argument for this script\n\nif readelf -d $1 | grep \"omp\";\nthen\n    echo \"Found openmp in direct dependency\"\n    exit -1\nelse\n    exit 0\nfi\n\nif readelf -d $1 | grep \"pthread\";\nthen\n    echo \"Found pthread in direct dependency\"\n    exit -1\nelse\n    exit 0\nfi\n"
  },
  {
    "path": "plugin/CMakeLists.txt",
    "content": "if(PLUGIN_SYCL)\n  set(CMAKE_CXX_COMPILER \"icpx\")\n  file(GLOB_RECURSE SYCL_SOURCES \"sycl/*.cc\")\n    list(APPEND SYCL_SOURCES\n    ${xgboost_SOURCE_DIR}/src/objective/regression_obj.cc\n    ${xgboost_SOURCE_DIR}/src/objective/hinge.cc\n    ${xgboost_SOURCE_DIR}/src/objective/quantile_obj.cc\n    ${xgboost_SOURCE_DIR}/src/objective/multiclass_obj.cc)\n  add_library(plugin_sycl OBJECT ${SYCL_SOURCES})\n  target_include_directories(plugin_sycl\n    PRIVATE\n    ${xgboost_SOURCE_DIR}/include\n    ${xgboost_SOURCE_DIR}/dmlc-core/include\n    ${xgboost_SOURCE_DIR}/rabit/include)\n    target_compile_definitions(plugin_sycl PUBLIC -DXGBOOST_USE_SYCL=1)\n    target_link_libraries(plugin_sycl PUBLIC -fsycl)\n    set_target_properties(plugin_sycl PROPERTIES\n    COMPILE_FLAGS \"-fsycl -fno-sycl-id-queries-fit-in-int\"\n    CXX_STANDARD 17\n    CXX_STANDARD_REQUIRED ON\n    POSITION_INDEPENDENT_CODE ON)\n  if(USE_OPENMP)\n    find_package(OpenMP REQUIRED)\n    set_target_properties(plugin_sycl PROPERTIES\n    COMPILE_FLAGS \"-fsycl -fno-sycl-id-queries-fit-in-int -qopenmp\")\n  endif()\n  # Get compilation and link flags of plugin_sycl and propagate to objxgboost\n  target_link_libraries(objxgboost PUBLIC plugin_sycl)\n  # Add all objects of plugin_sycl to objxgboost\n  target_sources(objxgboost INTERFACE $<TARGET_OBJECTS:plugin_sycl>)\nendif()\n\n# Add the Federate Learning plugin if enabled.\nif(PLUGIN_FEDERATED)\n  add_subdirectory(federated)\nendif()\n"
  },
  {
    "path": "plugin/README.md",
    "content": "XGBoost Plugins Modules\n=======================\n\nThis folder contains plugin modules to xgboost that can be optionally installed.  The\nplugin system helps us to extend xgboost with additional features, and add experimental\nfeatures that may not yet be ready to be included in the main project.\n\nTo include a certain plugin, say ```plugin_a```, you only need to add the following line\nto `xgboost/plugin/CMakeLists.txt`\n``` cmake\nset(PLUGIN_SOURCES ${PLUGIN_SOURCES}\n    ${xgboost_SOURCE_DIR}/plugin/plugin_a.cc PARENT_SCOPE)\n```\nalong with specified source file `plugin_a.cc`.\n\nThen rebuild XGBoost with CMake.\n\nWrite Your Own Plugin\n---------------------\nYou can plugin your own modules to xgboost by adding code to this folder,\nwithout modification to the main code repo.\nThe [example](example) folder provides an example to write a plugin.\n\nList of register functions\n--------------------------\nA plugin has to register a new functionality to xgboost to be able to use it.\nThe register macros available to plugin writers are:\n\n - XGBOOST_REGISTER_METRIC - Register an evaluation metric\n - XGBOOST_REGISTER_GBM - Register a new gradient booster that learns through\n   gradient statistics\n - XGBOOST_REGISTER_OBJECTIVE - Register a new objective function used by xgboost\n - XGBOOST_REGISTER_TREE_UPDATER - Register a new tree-updater which updates\n   the tree given the gradient information\n\nAnd from dmlc-core:\n\n - DMLC_REGISTER_PARAMETER - Register a set of parameter for a specific usecase\n"
  },
  {
    "path": "plugin/example/README.md",
    "content": "XGBoost Plugin Example\n======================\nThis folder provides an example of implementing xgboost plugin.\n\nThere are three steps you need to do to add a plugin to xgboost\n- Create your source .cc file, implement a new extension\n  - In this example [custom_obj.cc](custom_obj.cc)\n- Register this extension to xgboost via a registration macro\n  - In this example ```XGBOOST_REGISTER_OBJECTIVE``` in [this line](custom_obj.cc#L78)\n- Add a line to `xgboost/plugin/CMakeLists.txt`:\n```\ntarget_sources(objxgboost PRIVATE ${xgboost_SOURCE_DIR}/plugin/example/custom_obj.cc)\n```\n\nThen you can test this plugin by using ```objective=mylogistic``` parameter.\n\n<!--  LocalWords:  XGBoost\n -->\n"
  },
  {
    "path": "plugin/example/custom_obj.cc",
    "content": "/**\n * Copyright 2015-2025, XGBoost Contributors\n * \\file custom_metric.cc\n * \\brief This is an example to define plugin of xgboost.\n *  This plugin defines the additional metric function.\n */\n#include <xgboost/base.h>\n#include <xgboost/json.h>\n#include <xgboost/linalg.h>  // for Vector\n#include <xgboost/objective.h>\n#include <xgboost/parameter.h>\n\nnamespace xgboost::obj {\n// This is a helpful data structure to define parameters\n// You do not have to use it.\n// see http://dmlc-core.readthedocs.org/en/latest/parameter.html\n// for introduction of this module.\nstruct MyLogisticParam : public XGBoostParameter<MyLogisticParam> {\n  float scale_neg_weight;\n  // declare parameters\n  DMLC_DECLARE_PARAMETER(MyLogisticParam) {\n    DMLC_DECLARE_FIELD(scale_neg_weight).set_default(1.0f).set_lower_bound(0.0f)\n        .describe(\"Scale the weight of negative examples by this factor\");\n  }\n};\n\nDMLC_REGISTER_PARAMETER(MyLogisticParam);\n\n// Define a customized logistic regression objective in C++.\n// Implement the interface.\nclass MyLogistic : public ObjFunction {\n public:\n  void Configure(const Args& args) override { param_.UpdateAllowUnknown(args); }\n\n  [[nodiscard]] ObjInfo Task() const override { return ObjInfo::kRegression; }\n\n  void GetGradient(const HostDeviceVector<float>& preds, MetaInfo const& info,\n                   std::int32_t /*iter*/, linalg::Matrix<GradientPair>* out_gpair) override {\n    out_gpair->Reshape(info.num_row_, 1);\n    const std::vector<float>& preds_h = preds.HostVector();\n    auto out_gpair_h = out_gpair->HostView();\n    auto const labels_h = info.labels.HostView();\n    for (size_t i = 0; i < preds_h.size(); ++i) {\n      float w = info.GetWeight(i);\n      // scale the negative examples!\n      if (labels_h(i) == 0.0f) w *= param_.scale_neg_weight;\n      // logistic transformation\n      float p = 1.0f / (1.0f + std::exp(-preds_h[i]));\n      // this is the gradient\n      float grad = (p - labels_h(i)) * w;\n      // this is the second order gradient\n      float hess = p * (1.0f - p) * w;\n      out_gpair_h(i) = GradientPair(grad, hess);\n    }\n  }\n  [[nodiscard]] const char* DefaultEvalMetric() const override {\n    return \"logloss\";\n  }\n  void PredTransform(HostDeviceVector<float> *io_preds) const override {\n    // transform margin value to probability.\n    std::vector<float> &preds = io_preds->HostVector();\n    for (auto& pred : preds) {\n      pred = 1.0f / (1.0f + std::exp(-pred));\n    }\n  }\n  void ProbToMargin(linalg::Vector<float>* base_score) const override {\n    // transform probability to margin value\n    auto h_intercept = base_score->HostView();\n    for (std::size_t i = 0, n = h_intercept.Size(); i < n; ++i) {\n      h_intercept(i) = -std::log(1.0f / h_intercept(i) - 1.0f);\n    }\n  }\n\n  void SaveConfig(Json* p_out) const override {\n    auto& out = *p_out;\n    out[\"name\"] = String(\"mylogistic\");\n    out[\"my_logistic_param\"] = ToJson(param_);\n  }\n\n  void LoadConfig(Json const& in) override {\n    FromJson(in[\"my_logistic_param\"], &param_);\n  }\n\n private:\n  MyLogisticParam param_;\n};\n\n// Finally register the objective function.\n// After it succeeds you can try use xgboost with objective=mylogistic\nXGBOOST_REGISTER_OBJECTIVE(MyLogistic, \"mylogistic\")\n.describe(\"User defined logistic regression plugin\")\n.set_body([]() { return new MyLogistic(); });\n\n}  // namespace xgboost::obj\n"
  },
  {
    "path": "plugin/federated/CMakeLists.txt",
    "content": "# gRPC needs to be installed first. See README.md.\nset(protobuf_MODULE_COMPATIBLE TRUE)\nset(protobuf_BUILD_SHARED_LIBS TRUE)\n\nfind_package(Protobuf CONFIG)\nif(NOT Protobuf_FOUND)\n  find_package(Protobuf)\nendif()\nif(NOT Protobuf_FOUND)\n  # let CMake emit error\n  find_package(Protobuf CONFIG REQUIRED)\nendif()\n\nfind_package(gRPC CONFIG REQUIRED)\nmessage(STATUS \"Found gRPC: ${gRPC_CONFIG}\")\n\n# Generated code from the protobuf definition.\nadd_library(federated_proto STATIC federated.proto)\ntarget_link_libraries(federated_proto PUBLIC protobuf::libprotobuf gRPC::grpc gRPC::grpc++)\ntarget_include_directories(federated_proto PUBLIC ${CMAKE_CURRENT_BINARY_DIR})\nxgboost_target_properties(federated_proto)\n\nprotobuf_generate(\n    TARGET federated_proto\n    LANGUAGE cpp\n    PROTOC_OUT_DIR \"${PROTO_BINARY_DIR}\")\nprotobuf_generate(\n    TARGET federated_proto\n    LANGUAGE grpc\n    GENERATE_EXTENSIONS .grpc.pb.h .grpc.pb.cc\n    PLUGIN \"protoc-gen-grpc=\\$<TARGET_FILE:gRPC::grpc_cpp_plugin>\"\n    PROTOC_OUT_DIR \"${PROTO_BINARY_DIR}\")\n\n# Wrapper for the gRPC client.\nadd_library(federated_client INTERFACE)\ntarget_link_libraries(federated_client INTERFACE federated_proto)\n\n# Rabit engine for Federated Learning.\ntarget_sources(\n  objxgboost PRIVATE federated_tracker.cc federated_comm.cc federated_coll.cc\n)\nif(USE_CUDA)\n  target_sources(objxgboost PRIVATE federated_comm.cu federated_coll.cu)\nendif()\n\ntarget_link_libraries(objxgboost PRIVATE federated_client \"-Wl,--exclude-libs,ALL\")\ntarget_compile_definitions(objxgboost PUBLIC -DXGBOOST_USE_FEDERATED=1)\n"
  },
  {
    "path": "plugin/federated/README.md",
    "content": "XGBoost Plugin for Federated Learning\n=====================================\n\nThis folder contains the plugin for federated learning.\n\nSee [build instruction](../../doc/build.rst) for how to build the plugin.\n\n\nTest Federated XGBoost\n----------------------\n```shell\n# Under xgboost source tree.\ncd tests/distributed/test_federated\n# This tests both CPU training (`hist`) and GPU training (`gpu_hist`).\n./runtests-federated.sh\n```\n"
  },
  {
    "path": "plugin/federated/federated.proto",
    "content": "/*!\n * Copyright 2022-2023 XGBoost contributors\n */\nsyntax = \"proto3\";\n\npackage xgboost.collective.federated;\n\nservice Federated {\n  rpc Allgather(AllgatherRequest) returns (AllgatherReply) {}\n  rpc AllgatherV(AllgatherVRequest) returns (AllgatherVReply) {}\n  rpc Allreduce(AllreduceRequest) returns (AllreduceReply) {}\n  rpc Broadcast(BroadcastRequest) returns (BroadcastReply) {}\n}\n\nenum DataType {\n  HALF = 0;\n  FLOAT = 1;\n  DOUBLE = 2;\n  LONG_DOUBLE = 3;\n  INT8 = 4;\n  INT16 = 5;\n  INT32 = 6;\n  INT64 = 7;\n  UINT8 = 8;\n  UINT16 = 9;\n  UINT32 = 10;\n  UINT64 = 11;\n}\n\nenum ReduceOperation {\n  MAX = 0;\n  MIN = 1;\n  SUM = 2;\n  BITWISE_AND = 3;\n  BITWISE_OR = 4;\n  BITWISE_XOR = 5;\n}\n\nmessage AllgatherRequest {\n  // An incrementing counter that is unique to each round to operations.\n  uint64 sequence_number = 1;\n  int32 rank = 2;\n  bytes send_buffer = 3;\n}\n\nmessage AllgatherReply {\n  bytes receive_buffer = 1;\n}\n\nmessage AllgatherVRequest {\n  // An incrementing counter that is unique to each round to operations.\n  uint64 sequence_number = 1;\n  int32 rank = 2;\n  bytes send_buffer = 3;\n}\n\nmessage AllgatherVReply {\n  bytes receive_buffer = 1;\n}\n\nmessage AllreduceRequest {\n  // An incrementing counter that is unique to each round to operations.\n  uint64 sequence_number = 1;\n  int32 rank = 2;\n  bytes send_buffer = 3;\n  DataType data_type = 4;\n  ReduceOperation reduce_operation = 5;\n}\n\nmessage AllreduceReply {\n  bytes receive_buffer = 1;\n}\n\nmessage BroadcastRequest {\n  // An incrementing counter that is unique to each round to operations.\n  uint64 sequence_number = 1;\n  int32 rank = 2;\n  bytes send_buffer = 3;\n  // The root rank to broadcast from.\n  int32 root = 4;\n}\n\nmessage BroadcastReply {\n  bytes receive_buffer = 1;\n}\n"
  },
  {
    "path": "plugin/federated/federated_coll.cc",
    "content": "/**\n * Copyright 2023, XGBoost contributors\n */\n#include \"federated_coll.h\"\n\n#include <federated.grpc.pb.h>\n#include <federated.pb.h>\n\n#include <algorithm>  // for copy_n\n\n#include \"../../src/collective/allgather.h\"\n#include \"../../src/common/common.h\"    // for AssertGPUSupport\n#include \"federated_comm.h\"             // for FederatedComm\n#include \"xgboost/collective/result.h\"  // for Result\n\nnamespace xgboost::collective {\nnamespace {\n[[nodiscard]] Result GetGRPCResult(std::string const &name, grpc::Status const &status) {\n  return Fail(name + \" RPC failed. \" + std::to_string(status.error_code()) + \": \" +\n              status.error_message());\n}\n\n[[nodiscard]] Result BroadcastImpl(Comm const &comm, std::uint64_t *sequence_number,\n                                   common::Span<std::int8_t> data, std::int32_t root) {\n  using namespace federated;  // NOLINT\n\n  auto fed = dynamic_cast<FederatedComm const *>(&comm);\n  CHECK(fed);\n  auto stub = fed->Handle();\n\n  BroadcastRequest request;\n  request.set_sequence_number((*sequence_number)++);\n  request.set_rank(comm.Rank());\n  if (comm.Rank() != root) {\n    request.set_send_buffer(nullptr, 0);\n  } else {\n    request.set_send_buffer(data.data(), data.size());\n  }\n  request.set_root(root);\n\n  BroadcastReply reply;\n  grpc::ClientContext context;\n  context.set_wait_for_ready(true);\n  grpc::Status status = stub->Broadcast(&context, request, &reply);\n  if (!status.ok()) {\n    return GetGRPCResult(\"Broadcast\", status);\n  }\n  if (comm.Rank() != root) {\n    auto const &r = reply.receive_buffer();\n    std::copy_n(r.cbegin(), r.size(), data.data());\n  }\n\n  return Success();\n}\n}  // namespace\n\n#if !defined(XGBOOST_USE_CUDA)\nColl *FederatedColl::MakeCUDAVar() {\n  common::AssertGPUSupport();\n  return nullptr;\n}\n#endif\n\n[[nodiscard]] Result FederatedColl::Allreduce(Comm const &comm, common::Span<std::int8_t> data,\n                                              ArrayInterfaceHandler::Type type, Op op) {\n  using namespace federated;  // NOLINT\n  auto fed = dynamic_cast<FederatedComm const *>(&comm);\n  CHECK(fed);\n  auto stub = fed->Handle();\n\n  AllreduceRequest request;\n  request.set_sequence_number(sequence_number_++);\n  request.set_rank(comm.Rank());\n  request.set_send_buffer(data.data(), data.size());\n  request.set_data_type(static_cast<::xgboost::collective::federated::DataType>(type));\n  request.set_reduce_operation(static_cast<::xgboost::collective::federated::ReduceOperation>(op));\n\n  AllreduceReply reply;\n  grpc::ClientContext context;\n  context.set_wait_for_ready(true);\n  grpc::Status status = stub->Allreduce(&context, request, &reply);\n  if (!status.ok()) {\n    return GetGRPCResult(\"Allreduce\", status);\n  }\n  auto const &r = reply.receive_buffer();\n  std::copy_n(r.cbegin(), r.size(), data.data());\n  return Success();\n}\n\n[[nodiscard]] Result FederatedColl::Broadcast(Comm const &comm, common::Span<std::int8_t> data,\n                                              std::int32_t root) {\n  return BroadcastImpl(comm, &this->sequence_number_, data, root);\n}\n\n[[nodiscard]] Result FederatedColl::Allgather(Comm const &comm, common::Span<std::int8_t> data) {\n  using namespace federated;  // NOLINT\n  auto fed = dynamic_cast<FederatedComm const *>(&comm);\n  CHECK(fed);\n  auto stub = fed->Handle();\n  auto size = data.size_bytes() / comm.World();\n\n  auto offset = comm.Rank() * size;\n  auto segment = data.subspan(offset, size);\n\n  AllgatherRequest request;\n  request.set_sequence_number(sequence_number_++);\n  request.set_rank(comm.Rank());\n  request.set_send_buffer(segment.data(), segment.size());\n\n  AllgatherReply reply;\n  grpc::ClientContext context;\n  context.set_wait_for_ready(true);\n  grpc::Status status = stub->Allgather(&context, request, &reply);\n\n  if (!status.ok()) {\n    return GetGRPCResult(\"Allgather\", status);\n  }\n  auto const &r = reply.receive_buffer();\n  std::copy_n(r.cbegin(), r.size(), data.begin());\n  return Success();\n}\n\n[[nodiscard]] Result FederatedColl::AllgatherV(Comm const &comm,\n                                               common::Span<std::int8_t const> data,\n                                               common::Span<std::int64_t const>,\n                                               common::Span<std::int64_t>,\n                                               common::Span<std::int8_t> recv, AllgatherVAlgo) {\n  using namespace federated;  // NOLINT\n\n  auto fed = dynamic_cast<FederatedComm const *>(&comm);\n  CHECK(fed);\n  auto stub = fed->Handle();\n\n  AllgatherVRequest request;\n  request.set_sequence_number(sequence_number_++);\n  request.set_rank(comm.Rank());\n  request.set_send_buffer(data.data(), data.size());\n\n  AllgatherVReply reply;\n  grpc::ClientContext context;\n  context.set_wait_for_ready(true);\n  grpc::Status status = stub->AllgatherV(&context, request, &reply);\n  if (!status.ok()) {\n    return GetGRPCResult(\"AllgatherV\", status);\n  }\n  std::string const &r = reply.receive_buffer();\n  CHECK_EQ(r.size(), recv.size());\n  std::copy_n(r.cbegin(), r.size(), recv.begin());\n  return Success();\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "plugin/federated/federated_coll.cu",
    "content": "/**\n * Copyright 2023, XGBoost Contributors\n */\n#include <cstdint>  // for int8_t, int32_t\n#include <memory>   // for dynamic_pointer_cast\n#include <vector>   // for vector\n\n#include \"../../src/collective/comm.cuh\"\n#include \"../../src/common/cuda_context.cuh\"  // for CUDAContext\n#include \"../../src/data/array_interface.h\"   // for ArrayInterfaceHandler::Type\n#include \"federated_coll.cuh\"\n#include \"federated_comm.cuh\"\n#include \"xgboost/collective/result.h\"  // for Result\n#include \"xgboost/span.h\"               // for Span\n\nnamespace xgboost::collective {\nColl *FederatedColl::MakeCUDAVar() {\n  return new CUDAFederatedColl{std::dynamic_pointer_cast<FederatedColl>(this->shared_from_this())};\n}\n\n[[nodiscard]] Result CUDAFederatedColl::Allreduce(Comm const &comm, common::Span<std::int8_t> data,\n                                                  ArrayInterfaceHandler::Type type, Op op) {\n  auto cufed = dynamic_cast<CUDAFederatedComm const *>(&comm);\n  CHECK(cufed);\n\n  std::vector<std::int8_t> h_data(data.size());\n\n  return Success() << [&] {\n    return GetCUDAResult(\n        cudaMemcpy(h_data.data(), data.data(), data.size(), cudaMemcpyDeviceToHost));\n  } << [&] {\n    return p_impl_->Allreduce(comm, common::Span{h_data.data(), h_data.size()}, type, op);\n  } << [&] {\n    return GetCUDAResult(cudaMemcpyAsync(data.data(), h_data.data(), data.size(),\n                                         cudaMemcpyHostToDevice, cufed->Stream()));\n  };\n}\n\n[[nodiscard]] Result CUDAFederatedColl::Broadcast(Comm const &comm, common::Span<std::int8_t> data,\n                                                  std::int32_t root) {\n  auto cufed = dynamic_cast<CUDAFederatedComm const *>(&comm);\n  CHECK(cufed);\n  std::vector<std::int8_t> h_data(data.size());\n\n  return Success() << [&] {\n    return GetCUDAResult(\n        cudaMemcpy(h_data.data(), data.data(), data.size(), cudaMemcpyDeviceToHost));\n  } << [&] {\n    return p_impl_->Broadcast(comm, common::Span{h_data.data(), h_data.size()}, root);\n  } << [&] {\n    return GetCUDAResult(cudaMemcpyAsync(data.data(), h_data.data(), data.size(),\n                                         cudaMemcpyHostToDevice, cufed->Stream()));\n  };\n}\n\n[[nodiscard]] Result CUDAFederatedColl::Allgather(Comm const &comm, common::Span<std::int8_t> data) {\n  auto cufed = dynamic_cast<CUDAFederatedComm const *>(&comm);\n  CHECK(cufed);\n  std::vector<std::int8_t> h_data(data.size());\n\n  return Success() << [&] {\n    return GetCUDAResult(\n        cudaMemcpy(h_data.data(), data.data(), data.size(), cudaMemcpyDeviceToHost));\n  } << [&] {\n    return p_impl_->Allgather(comm, common::Span{h_data.data(), h_data.size()});\n  } << [&] {\n    return GetCUDAResult(cudaMemcpyAsync(data.data(), h_data.data(), data.size(),\n                                         cudaMemcpyHostToDevice, cufed->Stream()));\n  };\n}\n\n[[nodiscard]] Result CUDAFederatedColl::AllgatherV(\n    Comm const &comm, common::Span<std::int8_t const> data, common::Span<std::int64_t const> sizes,\n    common::Span<std::int64_t> recv_segments, common::Span<std::int8_t> recv, AllgatherVAlgo algo) {\n  auto cufed = dynamic_cast<CUDAFederatedComm const *>(&comm);\n  CHECK(cufed);\n\n  std::vector<std::int8_t> h_data(data.size());\n  std::vector<std::int8_t> h_recv(recv.size());\n\n  return Success() << [&] {\n    return GetCUDAResult(\n        cudaMemcpy(h_data.data(), data.data(), data.size(), cudaMemcpyDeviceToHost));\n  } << [&] {\n    return this->p_impl_->AllgatherV(comm, h_data, sizes, recv_segments, h_recv, algo);\n  } << [&] {\n    return GetCUDAResult(cudaMemcpyAsync(recv.data(), h_recv.data(), h_recv.size(),\n                                         cudaMemcpyHostToDevice, cufed->Stream()));\n  };\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "plugin/federated/federated_coll.cuh",
    "content": "/**\n * Copyright 2023-2024, XGBoost contributors\n */\n#include \"../../src/collective/comm.h\"  // for Comm, Coll\n#include \"federated_coll.h\"             // for FederatedColl\n#include \"xgboost/collective/result.h\"  // for Result\n#include \"xgboost/span.h\"               // for Span\n\nnamespace xgboost::collective {\nclass CUDAFederatedColl : public Coll {\n  std::shared_ptr<FederatedColl> p_impl_;\n\n public:\n  explicit CUDAFederatedColl(std::shared_ptr<FederatedColl> pimpl) : p_impl_{std::move(pimpl)} {}\n  [[nodiscard]] Result Allreduce(Comm const &comm, common::Span<std::int8_t> data,\n                                 ArrayInterfaceHandler::Type type, Op op) override;\n  [[nodiscard]] Result Broadcast(Comm const &comm, common::Span<std::int8_t> data,\n                                 std::int32_t root) override;\n  [[nodiscard]] Result Allgather(Comm const &, common::Span<std::int8_t> data) override;\n  [[nodiscard]] Result AllgatherV(Comm const &comm, common::Span<std::int8_t const> data,\n                                  common::Span<std::int64_t const> sizes,\n                                  common::Span<std::int64_t> recv_segments,\n                                  common::Span<std::int8_t> recv, AllgatherVAlgo algo) override;\n};\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "plugin/federated/federated_coll.h",
    "content": "/**\n * Copyright 2023-2024, XGBoost contributors\n */\n#pragma once\n#include \"../../src/collective/coll.h\"    // for Coll\n#include \"../../src/collective/comm.h\"    // for Comm\n\nnamespace xgboost::collective {\nclass FederatedColl : public Coll {\n private:\n  std::uint64_t sequence_number_{0};\n\n public:\n  Coll *MakeCUDAVar() override;\n\n  [[nodiscard]] Result Allreduce(Comm const &, common::Span<std::int8_t> data,\n                                 ArrayInterfaceHandler::Type type, Op op) override;\n  [[nodiscard]] Result Broadcast(Comm const &comm, common::Span<std::int8_t> data,\n                                 std::int32_t root) override;\n  [[nodiscard]] Result Allgather(Comm const &, common::Span<std::int8_t> data) override;\n  [[nodiscard]] Result AllgatherV(Comm const &comm, common::Span<std::int8_t const> data,\n                                  common::Span<std::int64_t const> sizes,\n                                  common::Span<std::int64_t> recv_segments,\n                                  common::Span<std::int8_t> recv, AllgatherVAlgo algo) override;\n};\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "plugin/federated/federated_comm.cc",
    "content": "/**\n * Copyright 2023-2024, XGBoost contributors\n */\n#include \"federated_comm.h\"\n\n#include <grpcpp/grpcpp.h>\n\n#include <cstdint>  // for int32_t\n#include <cstdlib>  // for getenv\n#include <limits>   // for numeric_limits\n#include <string>   // for string, stoi\n\n#include \"../../src/common/common.h\"      // for Split\n#include \"../../src/common/io.h\"          // for ReadAll\n#include \"../../src/common/json_utils.h\"  // for OptionalArg\n#include \"xgboost/json.h\"                 // for Json\n#include \"xgboost/logging.h\"\n\nnamespace xgboost::collective {\nvoid FederatedComm::Init(std::string const& host, std::int32_t port, std::int32_t world,\n                         std::int32_t rank, std::string const& server_cert,\n                         std::string const& client_key, std::string const& client_cert) {\n  this->rank_ = rank;\n  this->world_ = world;\n\n  this->tracker_.host = host;\n  this->tracker_.port = port;\n  this->tracker_.rank = rank;\n\n  CHECK_GE(world, 1) << \"Invalid world size.\";\n  CHECK_GE(rank, 0) << \"Invalid worker rank.\";\n  CHECK_LT(rank, world) << \"Invalid worker rank.\";\n\n  auto certs = {server_cert, client_cert, client_cert};\n  auto is_empty = [](auto const& s) { return s.empty(); };\n  bool valid = std::all_of(certs.begin(), certs.end(), is_empty) ||\n               std::none_of(certs.begin(), certs.end(), is_empty);\n  CHECK(valid) << \"Invalid arguments for certificates.\";\n\n  if (server_cert.empty()) {\n    stub_ = [&] {\n      grpc::ChannelArguments args;\n      args.SetMaxReceiveMessageSize(std::numeric_limits<std::int32_t>::max());\n      return federated::Federated::NewStub(grpc::CreateCustomChannel(\n          host + \":\" + std::to_string(port), grpc::InsecureChannelCredentials(), args));\n    }();\n  } else {\n    stub_ = [&] {\n      grpc::SslCredentialsOptions options;\n      options.pem_root_certs = common::ReadAll(server_cert);\n      options.pem_private_key = common::ReadAll(client_key);\n      options.pem_cert_chain = common::ReadAll(client_cert);\n      grpc::ChannelArguments args;\n      args.SetMaxReceiveMessageSize(std::numeric_limits<std::int32_t>::max());\n      auto channel = grpc::CreateCustomChannel(host + \":\" + std::to_string(port),\n                                               grpc::SslCredentials(options), args);\n      channel->WaitForConnected(gpr_time_add(\n          gpr_now(GPR_CLOCK_REALTIME), gpr_time_from_seconds(DefaultTimeoutSec(), GPR_TIMESPAN)));\n      return federated::Federated::NewStub(channel);\n    }();\n  }\n}\n\nFederatedComm::FederatedComm(std::int32_t retry, std::chrono::seconds timeout, std::string task_id,\n                             Json const& config) {\n  /**\n   * Topology\n   */\n  std::string server_address{};\n  std::int32_t world_size{0};\n  std::int32_t rank{-1};\n  // Parse environment variables first.\n  auto* value = std::getenv(\"FEDERATED_SERVER_ADDRESS\");\n  if (value != nullptr) {\n    server_address = value;\n  }\n  value = std::getenv(\"FEDERATED_WORLD_SIZE\");\n  if (value != nullptr) {\n    world_size = std::stoi(value);\n  }\n  value = std::getenv(\"FEDERATED_RANK\");\n  if (value != nullptr) {\n    rank = std::stoi(value);\n  }\n\n  server_address = OptionalArg<String>(config, \"federated_server_address\", server_address);\n  world_size =\n      OptionalArg<Integer>(config, \"federated_world_size\", static_cast<Integer::Int>(world_size));\n  rank = OptionalArg<Integer>(config, \"federated_rank\", static_cast<Integer::Int>(rank));\n\n  auto parsed = common::Split(server_address, ':');\n  CHECK_EQ(parsed.size(), 2) << \"Invalid server address:\" << server_address;\n\n  CHECK(!server_address.empty()) << \"Parameter `federated_server_address` is required.\";\n\n  /**\n   * Basic config\n   */\n  this->retry_ = retry;\n  this->timeout_ = timeout;\n  this->task_id_ = task_id;\n\n  /**\n   * Certificates\n   */\n  std::string server_cert{};\n  std::string client_key{};\n  std::string client_cert{};\n  value = getenv(\"FEDERATED_SERVER_CERT_PATH\");\n  if (value != nullptr) {\n    server_cert = value;\n  }\n  value = getenv(\"FEDERATED_CLIENT_KEY_PATH\");\n  if (value != nullptr) {\n    client_key = value;\n  }\n  value = getenv(\"FEDERATED_CLIENT_CERT_PATH\");\n  if (value != nullptr) {\n    client_cert = value;\n  }\n\n  server_cert = OptionalArg<String>(config, \"federated_server_cert_path\", server_cert);\n  client_key = OptionalArg<String>(config, \"federated_client_key_path\", client_key);\n  client_cert = OptionalArg<String>(config, \"federated_client_cert_path\", client_cert);\n\n  this->Init(parsed[0], std::stoi(parsed[1]), world_size, rank, server_cert, client_key,\n             client_cert);\n}\n\n#if !defined(XGBOOST_USE_CUDA)\nComm* FederatedComm::MakeCUDAVar(Context const*, std::shared_ptr<Coll>) const {\n  common::AssertGPUSupport();\n  return nullptr;\n}\n#endif  //  !defined(XGBOOST_USE_CUDA)\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "plugin/federated/federated_comm.cu",
    "content": "/**\n * Copyright 2023, XGBoost Contributors\n */\n#include <memory>  // for shared_ptr\n\n#include \"../../src/common/cuda_context.cuh\"\n#include \"federated_comm.cuh\"\n#include \"xgboost/context.h\"  // for Context\n\nnamespace xgboost::collective {\nCUDAFederatedComm::CUDAFederatedComm(Context const* ctx, std::shared_ptr<FederatedComm const> impl)\n    : FederatedComm{impl}, stream_{ctx->CUDACtx()->Stream()} {\n  CHECK(impl);\n  CHECK(ctx->IsCUDA());\n  dh::safe_cuda(cudaSetDevice(ctx->Ordinal()));\n}\n\nComm* FederatedComm::MakeCUDAVar(Context const* ctx, std::shared_ptr<Coll>) const {\n  return new CUDAFederatedComm{\n      ctx, std::dynamic_pointer_cast<FederatedComm const>(this->shared_from_this())};\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "plugin/federated/federated_comm.cuh",
    "content": "/**\n * Copyright 2023-2025, XGBoost Contributors\n */\n#pragma once\n\n#include <memory>  // for shared_ptr\n\n#include \"../../src/collective/coll.h\"          // for Coll\n#include \"../../src/common/cuda_stream.h\"       // for StreamRef\n#include \"federated_comm.h\"                     // for FederatedComm\n#include \"xgboost/context.h\"                    // for Context\n\nnamespace xgboost::collective {\nclass CUDAFederatedComm : public FederatedComm {\n  curt::StreamRef stream_;\n\n public:\n  explicit CUDAFederatedComm(Context const* ctx, std::shared_ptr<FederatedComm const> impl);\n  [[nodiscard]] auto Stream() const { return stream_; }\n  Comm* MakeCUDAVar(Context const*, std::shared_ptr<Coll>) const override {\n    LOG(FATAL) << \"[Internal Error]: Invalid request for CUDA variant.\";\n    return nullptr;\n  }\n};\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "plugin/federated/federated_comm.h",
    "content": "/**\n * Copyright 2023-2024, XGBoost contributors\n */\n#pragma once\n\n#include <federated.grpc.pb.h>\n#include <federated.pb.h>\n\n#include <chrono>   // for seconds\n#include <cstdint>  // for int32_t\n#include <memory>   // for shared_ptr\n#include <string>   // for string\n\n#include \"../../src/collective/comm.h\"    // for HostComm\n#include \"xgboost/json.h\"\n\nnamespace xgboost::collective {\nclass FederatedComm : public HostComm {\n  std::shared_ptr<federated::Federated::Stub> stub_;\n\n  void Init(std::string const& host, std::int32_t port, std::int32_t world, std::int32_t rank,\n            std::string const& server_cert, std::string const& client_key,\n            std::string const& client_cert);\n\n protected:\n  explicit FederatedComm(std::shared_ptr<FederatedComm const> that) : stub_{that->stub_} {\n    this->rank_ = that->Rank();\n    this->world_ = that->World();\n\n    this->retry_ = that->Retry();\n    this->timeout_ = that->Timeout();\n    this->task_id_ = that->TaskID();\n\n    this->tracker_ = that->TrackerInfo();\n  }\n\n public:\n  /**\n   * @param config\n   *\n   * - federated_server_address: Tracker address\n   * - federated_world_size: The number of workers\n   * - federated_rank: Rank of federated worker\n   * - federated_server_cert_path\n   * - federated_client_key_path\n   * - federated_client_cert_path\n   */\n  explicit FederatedComm(std::int32_t retry, std::chrono::seconds timeout, std::string task_id,\n                         Json const& config);\n  [[nodiscard]] Result Shutdown() final {\n    this->ResetState();\n    return Success();\n  }\n  ~FederatedComm() override { stub_.reset(); }\n\n  [[nodiscard]] std::shared_ptr<Channel> Chan(std::int32_t) const override {\n    LOG(FATAL) << \"peer to peer communication is not allowed for federated learning.\";\n    return nullptr;\n  }\n  [[nodiscard]] Result LogTracker(std::string msg) const override {\n    LOG(CONSOLE) << msg;\n    return Success();\n  }\n  [[nodiscard]] bool IsFederated() const override { return true; }\n  [[nodiscard]] federated::Federated::Stub* Handle() const { return stub_.get(); }\n\n  [[nodiscard]] Comm* MakeCUDAVar(Context const* ctx, std::shared_ptr<Coll> pimpl) const override;\n  /**\n   * @brief Get a string ID for the current process.\n   */\n  [[nodiscard]] Result ProcessorName(std::string* out) const final {\n    auto rank = this->Rank();\n    *out = \"rank:\" + std::to_string(rank);\n    return Success();\n  };\n};\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "plugin/federated/federated_tracker.cc",
    "content": "/**\n * Copyright 2022-2024, XGBoost contributors\n */\n#include \"federated_tracker.h\"\n\n#include <grpcpp/security/server_credentials.h>  // for InsecureServerCredentials, ...\n#include <grpcpp/server_builder.h>               // for ServerBuilder\n\n#include <cstdint>    // for int32_t\n#include <exception>  // for exception\n#include <future>     // for future, async\n#include <limits>     // for numeric_limits\n#include <string>     // for string\n\n#include \"../../src/common/io.h\"          // for ReadAll\n#include \"../../src/common/json_utils.h\"  // for RequiredArg\n\nnamespace xgboost::collective {\nnamespace federated {\ngrpc::Status FederatedService::Allgather(grpc::ServerContext*, AllgatherRequest const* request,\n                                         AllgatherReply* reply) {\n  handler_.Allgather(request->send_buffer().data(), request->send_buffer().size(),\n                     reply->mutable_receive_buffer(), request->sequence_number(), request->rank());\n  return grpc::Status::OK;\n}\n\ngrpc::Status FederatedService::AllgatherV(grpc::ServerContext*, AllgatherVRequest const* request,\n                                          AllgatherVReply* reply) {\n  handler_.AllgatherV(request->send_buffer().data(), request->send_buffer().size(),\n                      reply->mutable_receive_buffer(), request->sequence_number(), request->rank());\n  return grpc::Status::OK;\n}\n\ngrpc::Status FederatedService::Allreduce(grpc::ServerContext*, AllreduceRequest const* request,\n                                         AllreduceReply* reply) {\n  handler_.Allreduce(request->send_buffer().data(), request->send_buffer().size(),\n                     reply->mutable_receive_buffer(), request->sequence_number(), request->rank(),\n                     static_cast<xgboost::ArrayInterfaceHandler::Type>(request->data_type()),\n                     static_cast<xgboost::collective::Op>(request->reduce_operation()));\n  return grpc::Status::OK;\n}\n\ngrpc::Status FederatedService::Broadcast(grpc::ServerContext*, BroadcastRequest const* request,\n                                         BroadcastReply* reply) {\n  handler_.Broadcast(request->send_buffer().data(), request->send_buffer().size(),\n                     reply->mutable_receive_buffer(), request->sequence_number(), request->rank(),\n                     request->root());\n  return grpc::Status::OK;\n}\n}  // namespace federated\n\nFederatedTracker::FederatedTracker(Json const& config) : Tracker{config} {\n  auto is_secure = RequiredArg<Boolean const>(config, \"federated_secure\", __func__);\n  if (is_secure) {\n    StringView msg{\"Empty certificate path.\"};\n    server_key_path_ = RequiredArg<String const>(config, \"server_key_path\", __func__);\n    CHECK(!server_key_path_.empty()) << msg;\n    server_cert_file_ = RequiredArg<String const>(config, \"server_cert_path\", __func__);\n    CHECK(!server_cert_file_.empty()) << msg;\n    client_cert_file_ = RequiredArg<String const>(config, \"client_cert_path\", __func__);\n    CHECK(!client_cert_file_.empty()) << msg;\n  }\n}\n\nstd::future<Result> FederatedTracker::Run() {\n  return std::async(std::launch::async, [this]() {\n    std::string const server_address = \"0.0.0.0:\" + std::to_string(this->port_);\n    xgboost::collective::federated::FederatedService service{\n        static_cast<std::int32_t>(this->n_workers_)};\n    grpc::ServerBuilder builder;\n\n    if (this->server_cert_file_.empty()) {\n      builder.SetMaxReceiveMessageSize(std::numeric_limits<std::int32_t>::max());\n      if (this->port_ == 0) {\n        builder.AddListeningPort(server_address, grpc::InsecureServerCredentials(), &port_);\n      } else {\n        builder.AddListeningPort(server_address, grpc::InsecureServerCredentials());\n      }\n      builder.RegisterService(&service);\n      LOG(CONSOLE) << \"Insecure federated server listening on \" << server_address << \", world size \"\n                   << this->n_workers_;\n    } else {\n      auto options = grpc::SslServerCredentialsOptions(\n          GRPC_SSL_REQUEST_AND_REQUIRE_CLIENT_CERTIFICATE_AND_VERIFY);\n      options.pem_root_certs = xgboost::common::ReadAll(client_cert_file_);\n      auto key = grpc::SslServerCredentialsOptions::PemKeyCertPair();\n      key.private_key = xgboost::common::ReadAll(server_key_path_);\n      key.cert_chain = xgboost::common::ReadAll(server_cert_file_);\n      options.pem_key_cert_pairs.push_back(key);\n      builder.SetMaxReceiveMessageSize(std::numeric_limits<std::int32_t>::max());\n      if (this->port_ == 0) {\n        builder.AddListeningPort(server_address, grpc::SslServerCredentials(options), &port_);\n      } else {\n        builder.AddListeningPort(server_address, grpc::SslServerCredentials(options));\n      }\n      builder.RegisterService(&service);\n      LOG(CONSOLE) << \"Federated server listening on \" << server_address << \", world size \"\n                   << n_workers_;\n    }\n\n    try {\n      server_ = builder.BuildAndStart();\n      ready_ = true;\n      server_->Wait();\n    } catch (std::exception const& e) {\n      return collective::Fail(std::string{e.what()});\n    }\n\n    ready_ = false;\n    return collective::Success();\n  });\n}\n\nFederatedTracker::~FederatedTracker() = default;\n\nResult FederatedTracker::Shutdown() {\n  auto rc = this->WaitUntilReady();\n  SafeColl(rc);\n\n  try {\n    server_->Shutdown();\n  } catch (std::exception const& e) {\n    return Fail(\"Failed to shutdown:\" + std::string{e.what()});\n  }\n\n  return Success();\n}\n\n[[nodiscard]] Json FederatedTracker::WorkerArgs() const {\n  auto rc = this->WaitUntilReady();\n  SafeColl(rc);\n\n  std::string host;\n  rc = GetHostAddress(&host);\n  SafeColl(rc);\n  Json args{Object{}};\n  args[\"dmlc_tracker_uri\"] = String{host};\n  args[\"dmlc_tracker_port\"] = this->Port();\n  return args;\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "plugin/federated/federated_tracker.h",
    "content": "/**\n * Copyright 2022-2023, XGBoost contributors\n */\n#pragma once\n#include <federated.grpc.pb.h>  // for Server\n\n#include <future>  // for future\n#include <memory>  // for unique_ptr\n#include <string>  // for string\n\n#include \"../../src/collective/in_memory_handler.h\"\n#include \"../../src/collective/tracker.h\"  // for Tracker\n#include \"xgboost/collective/result.h\"     // for Result\n#include \"xgboost/json.h\"                  // for Json\n\nnamespace xgboost::collective {\nnamespace federated {\nclass FederatedService final : public Federated::Service {\n public:\n  explicit FederatedService(std::int32_t world_size) : handler_{world_size} {}\n\n  grpc::Status Allgather(grpc::ServerContext* context, AllgatherRequest const* request,\n                         AllgatherReply* reply) override;\n\n  grpc::Status AllgatherV(grpc::ServerContext* context, AllgatherVRequest const* request,\n                          AllgatherVReply* reply) override;\n\n  grpc::Status Allreduce(grpc::ServerContext* context, AllreduceRequest const* request,\n                         AllreduceReply* reply) override;\n\n  grpc::Status Broadcast(grpc::ServerContext* context, BroadcastRequest const* request,\n                         BroadcastReply* reply) override;\n\n private:\n  xgboost::collective::InMemoryHandler handler_;\n};\n};  // namespace federated\n\nclass FederatedTracker : public collective::Tracker {\n  std::unique_ptr<grpc::Server> server_;\n  std::string server_key_path_;\n  std::string server_cert_file_;\n  std::string client_cert_file_;\n\n public:\n  /**\n   * @brief CTOR\n   *\n   * @param config Configuration, other than the base configuration from Tracker, we have:\n   *\n   * - federated_secure: bool whether this is a secure server.\n   * - server_key_path: path to the key.\n   * - server_cert_path: certificate path.\n   * - client_cert_path: certificate path for client.\n   */\n  explicit FederatedTracker(Json const& config);\n  ~FederatedTracker() override;\n  std::future<Result> Run() override;\n\n  [[nodiscard]] Json WorkerArgs() const override;\n  [[nodiscard]] Result Shutdown();\n};\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "plugin/sycl/README.md",
    "content": "<!--\n******************************************************************************\n* Copyright by Contributors 2017-2023\n*******************************************************************************/-->\n\n# SYCL-based Algorithm for Tree Construction\nThis plugin adds support of SYCL programming model for prediction algorithms to XGBoost.\n\n## Usage\nSpecify the 'device' parameter as described in the table below to offload model training and inference on SYCL device.\n\n### Algorithms\n| device | Description |\n| --- | --- |\nsycl | use default sycl device  |\nsycl:gpu | use default sycl gpu  |\nsycl:cpu | use default sycl cpu  |\nsycl:gpu:N | use sycl gpu number N |\nsycl:cpu:N | use sycl cpu number N |\n\nPython example:\n```python\nparam['device'] = 'sycl:gpu:0'\n```\nNote: 'sycl:cpu' devices have full functional support but can't provide good enough performance. We recommend use 'sycl:cpu' devices only for test purposes.\nNote: if device is specified to be 'sycl', device type will be automatically chosen. In case the system has both sycl GPU and sycl CPU, GPU will on use.\n\n## Dependencies\nTo build and use the plugin, install [Intel® oneAPI DPC++/C++ Compiler](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compiler.html).\nSee also [Intel® oneAPI Programming Guide](https://www.intel.com/content/www/us/en/docs/oneapi/programming-guide/2024-0/overview.html).\n\n## Build\nFrom the ``xgboost`` directory, run:\n\n```bash\n$ cmake -B build -S . -DPLUGIN_SYCL=ON\n$ cmake --build build -j\n```\n"
  },
  {
    "path": "plugin/sycl/common/hist_util.cc",
    "content": "/*!\n * Copyright 2017-2023 by Contributors\n * \\file hist_util.cc\n */\n#include <vector>\n#include <limits>\n#include <algorithm>\n\n#include \"../data/gradient_index.h\"\n#include \"../tree/hist_dispatcher.h\"\n#include \"hist_util.h\"\n\n#include <sycl/sycl.hpp>\n\nnamespace xgboost {\nnamespace sycl {\nnamespace common {\n\n/*!\n * \\brief Fill histogram with zeroes\n */\ntemplate<typename GradientSumT>\nvoid InitHist(::sycl::queue* qu, GHistRow<GradientSumT, MemoryType::on_device>* hist,\n              size_t size, ::sycl::event* event) {\n  *event = qu->fill(hist->Begin(),\n                   xgboost::detail::GradientPairInternal<GradientSumT>(), size, *event);\n}\ntemplate void InitHist(::sycl::queue* qu,\n                       GHistRow<float,  MemoryType::on_device>* hist,\n                       size_t size, ::sycl::event* event);\ntemplate void InitHist(::sycl::queue* qu,\n                       GHistRow<double, MemoryType::on_device>* hist,\n                       size_t size, ::sycl::event* event);\n\n/*!\n * \\brief Copy histogram from src to dst\n */\ntemplate<typename GradientSumT>\nvoid CopyHist(::sycl::queue* qu,\n              GHistRow<GradientSumT, MemoryType::on_device>* dst,\n              const GHistRow<GradientSumT, MemoryType::on_device>& src,\n              size_t size) {\n  GradientSumT* pdst = reinterpret_cast<GradientSumT*>(dst->Data());\n  const GradientSumT* psrc = reinterpret_cast<const GradientSumT*>(src.DataConst());\n\n  qu->submit([&](::sycl::handler& cgh) {\n    cgh.parallel_for<>(::sycl::range<1>(2 * size), [=](::sycl::item<1> pid) {\n      const size_t i = pid.get_id(0);\n      pdst[i] = psrc[i];\n    });\n  }).wait();\n}\ntemplate void CopyHist(::sycl::queue* qu,\n                       GHistRow<float, MemoryType::on_device>* dst,\n                       const GHistRow<float, MemoryType::on_device>& src,\n                       size_t size);\ntemplate void CopyHist(::sycl::queue* qu,\n                       GHistRow<double, MemoryType::on_device>* dst,\n                       const GHistRow<double, MemoryType::on_device>& src,\n                       size_t size);\n\n/*!\n * \\brief Compute Subtraction: dst = src1 - src2\n */\ntemplate<typename GradientSumT>\n::sycl::event SubtractionHist(::sycl::queue* qu,\n                            GHistRow<GradientSumT, MemoryType::on_device>* dst,\n                            const GHistRow<GradientSumT, MemoryType::on_device>& src1,\n                            const GHistRow<GradientSumT, MemoryType::on_device>& src2,\n                            size_t size, ::sycl::event event_priv) {\n  GradientSumT* pdst = reinterpret_cast<GradientSumT*>(dst->Data());\n  const GradientSumT* psrc1 = reinterpret_cast<const GradientSumT*>(src1.DataConst());\n  const GradientSumT* psrc2 = reinterpret_cast<const GradientSumT*>(src2.DataConst());\n\n  auto event_final = qu->submit([&](::sycl::handler& cgh) {\n    cgh.depends_on(event_priv);\n    cgh.parallel_for<>(::sycl::range<1>(2 * size), [pdst, psrc1, psrc2](::sycl::item<1> pid) {\n      const size_t i = pid.get_id(0);\n      pdst[i] = psrc1[i] - psrc2[i];\n    });\n  });\n  return event_final;\n}\ntemplate ::sycl::event SubtractionHist(::sycl::queue* qu,\n                              GHistRow<float, MemoryType::on_device>* dst,\n                              const GHistRow<float, MemoryType::on_device>& src1,\n                              const GHistRow<float, MemoryType::on_device>& src2,\n                              size_t size, ::sycl::event event_priv);\ntemplate ::sycl::event SubtractionHist(::sycl::queue* qu,\n                              GHistRow<double, MemoryType::on_device>* dst,\n                              const GHistRow<double, MemoryType::on_device>& src1,\n                              const GHistRow<double, MemoryType::on_device>& src2,\n                              size_t size, ::sycl::event event_priv);\n\ntemplate <typename GradientPairT>\n::sycl::event ReduceHist(::sycl::queue* qu, GradientPairT* hist_data,\n                         GradientPairT* hist_buffer_data,\n                         size_t  nblocks, size_t nbins,\n                         const ::sycl::event& event_main) {\n  auto event_save = qu->submit([&](::sycl::handler& cgh) {\n    cgh.depends_on(event_main);\n    cgh.parallel_for<>(::sycl::range<1>(nbins), [=](::sycl::item<1> pid) {\n      size_t idx_bin = pid.get_id(0);\n\n      GradientPairT gpair = {0, 0};\n\n      for (size_t j = 0; j < nblocks; ++j) {\n        gpair += hist_buffer_data[j * nbins + idx_bin];\n      }\n\n      hist_data[idx_bin] = gpair;\n    });\n  });\n\n  return event_save;\n}\n\n// Kernel with buffer using\ntemplate<typename FPType, typename BinIdxType, bool isDense>\n::sycl::event BuildHistKernel(::sycl::queue* qu,\n                            const HostDeviceVector<GradientPair>& gpair,\n                            const RowSetCollection::Elem& row_indices,\n                            const GHistIndexMatrix& gmat,\n                            GHistRow<FPType, MemoryType::on_device>* hist,\n                            GHistRow<FPType, MemoryType::on_device>* hist_buffer,\n                            const tree::HistDispatcher<FPType>& dispatcher,\n                            ::sycl::event event_priv) {\n  using GradientPairT = xgboost::detail::GradientPairInternal<FPType>;\n  const size_t size = row_indices.Size();\n  const size_t* rid = row_indices.begin;\n  const size_t n_columns = isDense ? gmat.nfeatures : gmat.row_stride;\n  const auto* pgh = gpair.ConstDevicePointer();\n  const BinIdxType* gradient_index = gmat.index.data<BinIdxType>();\n  const uint32_t* offsets = gmat.cut.cut_ptrs_.ConstDevicePointer();\n  const size_t nbins = gmat.nbins;\n\n  const size_t work_group_size = dispatcher.work_group_size;\n  const size_t block_size = dispatcher.block.size;\n  const size_t nblocks = dispatcher.block.nblocks;\n\n  GradientPairT* hist_buffer_data = hist_buffer->Data();\n  auto event_fill = qu->fill(hist_buffer_data, GradientPairT(0, 0),\n                             nblocks * nbins, event_priv);\n  auto event_main = qu->submit([&](::sycl::handler& cgh) {\n    cgh.depends_on(event_fill);\n    cgh.parallel_for<>(::sycl::nd_range<2>(::sycl::range<2>(nblocks, work_group_size),\n                                           ::sycl::range<2>(1, work_group_size)),\n                       [=](::sycl::nd_item<2> pid) {\n      size_t block = pid.get_global_id(0);\n      size_t feat = pid.get_global_id(1);\n\n      GradientPairT* hist_local = hist_buffer_data + block * nbins;\n      for (size_t idx = 0; idx < block_size; ++idx) {\n        size_t i = block * block_size + idx;\n        if (i < size) {\n          const size_t icol_start = n_columns * rid[i];\n          const size_t idx_gh = rid[i];\n\n          const GradientPairT pgh_row = {pgh[idx_gh].GetGrad(), pgh[idx_gh].GetHess()};\n          pid.barrier(::sycl::access::fence_space::local_space);\n          const BinIdxType* gr_index_local = gradient_index + icol_start;\n\n          for (size_t j = feat; j < n_columns; j += work_group_size) {\n            uint32_t idx_bin = static_cast<uint32_t>(gr_index_local[j]);\n            if constexpr (isDense) {\n              idx_bin += offsets[j];\n            }\n            if (idx_bin < nbins) {\n              hist_local[idx_bin] += pgh_row;\n            }\n          }\n        }\n      }\n    });\n  });\n\n  GradientPairT* hist_data = hist->Data();\n  auto event_save = ReduceHist(qu, hist_data, hist_buffer_data, nblocks,\n                               nbins, event_main);\n\n  return event_save;\n}\n\n// Kernel with buffer and local hist using\ntemplate<typename FPType, typename BinIdxType>\n::sycl::event BuildHistKernelLocal(::sycl::queue* qu,\n                            const HostDeviceVector<GradientPair>& gpair,\n                            const RowSetCollection::Elem& row_indices,\n                            const GHistIndexMatrix& gmat,\n                            GHistRow<FPType, MemoryType::on_device>* hist,\n                            GHistRow<FPType, MemoryType::on_device>* hist_buffer,\n                            const tree::HistDispatcher<FPType>& dispatcher,\n                            ::sycl::event event_priv) {\n  constexpr int kMaxNumBins = tree::HistDispatcher<FPType>::KMaxNumBins;\n  using GradientPairT = xgboost::detail::GradientPairInternal<FPType>;\n  const size_t size = row_indices.Size();\n  const size_t* rid = row_indices.begin;\n  const size_t n_columns = gmat.nfeatures;\n  const auto* pgh = gpair.ConstDevicePointer();\n  const BinIdxType* gradient_index = gmat.index.data<BinIdxType>();\n  const uint32_t* offsets = gmat.cut.cut_ptrs_.ConstDevicePointer();\n  const size_t nbins = gmat.nbins;\n\n  const size_t work_group_size = dispatcher.work_group_size;\n  const size_t block_size = dispatcher.block.size;\n  const size_t nblocks = dispatcher.block.nblocks;\n\n  GradientPairT* hist_buffer_data = hist_buffer->Data();\n\n  auto event_main = qu->submit([&](::sycl::handler& cgh) {\n    cgh.depends_on(event_priv);\n    cgh.parallel_for<>(::sycl::nd_range<2>(::sycl::range<2>(nblocks, work_group_size),\n                                           ::sycl::range<2>(1, work_group_size)),\n                       [=](::sycl::nd_item<2> pid) {\n      size_t block = pid.get_global_id(0);\n      size_t feat = pid.get_global_id(1);\n\n      // This buffer will be keeped in L1/registers\n      GradientPairT hist_fast[kMaxNumBins];\n\n      GradientPairT* hist_local = hist_buffer_data + block * nbins;\n      for (size_t fid = feat; fid < n_columns; fid += work_group_size) {\n        size_t n_bins_feature = offsets[fid+1] - offsets[fid];\n\n        // Not all elements of hist_fast are actually used: n_bins_feature <= kMaxNumBins\n        // We initililize only the requared elements to prevent the unused go to cache.\n        for (int bin = 0; bin < n_bins_feature; ++bin) {\n          hist_fast[bin] = {0, 0};\n        }\n\n        for (size_t idx = 0; idx < block_size; ++idx) {\n          size_t i = block * block_size + idx;\n          if (i < size) {\n            size_t row_id = rid[i];\n\n            const size_t icol_start = n_columns * row_id;\n            const GradientPairT pgh_row(pgh[row_id].GetGrad(),\n                                        pgh[row_id].GetHess());\n\n            const BinIdxType* gr_index_local = gradient_index + icol_start;\n            uint32_t idx_bin = gr_index_local[fid];\n\n            hist_fast[idx_bin] += pgh_row;\n          }\n        }\n        for (int bin = 0 ; bin < n_bins_feature; ++bin) {\n          hist_local[bin + offsets[fid]] = hist_fast[bin];\n        }\n      }\n    });\n  });\n\n  GradientPairT* hist_data = hist->Data();\n  auto event_save = ReduceHist(qu, hist_data, hist_buffer_data, nblocks,\n                               nbins, event_main);\n  return event_save;\n}\n\n// Kernel with atomic using\ntemplate<typename FPType, typename BinIdxType, bool isDense>\n::sycl::event BuildHistKernel(::sycl::queue* qu,\n                            const HostDeviceVector<GradientPair>& gpair,\n                            const RowSetCollection::Elem& row_indices,\n                            const GHistIndexMatrix& gmat,\n                            GHistRow<FPType, MemoryType::on_device>* hist,\n                            const tree::HistDispatcher<FPType>& dispatcher,\n                            ::sycl::event event_priv) {\n  const size_t size = row_indices.Size();\n  const size_t* rid = row_indices.begin;\n  const size_t n_columns = isDense ? gmat.nfeatures : gmat.row_stride;\n  const GradientPair::ValueT* pgh =\n    reinterpret_cast<const GradientPair::ValueT*>(gpair.ConstDevicePointer());\n  const BinIdxType* gradient_index = gmat.index.data<BinIdxType>();\n  const uint32_t* offsets = gmat.cut.cut_ptrs_.ConstDevicePointer();\n  FPType* hist_data = reinterpret_cast<FPType*>(hist->Data());\n  const size_t nbins = gmat.nbins;\n\n  size_t work_group_size = dispatcher.work_group_size;\n  const size_t n_work_groups = n_columns / work_group_size + (n_columns % work_group_size > 0);\n\n  auto event_fill = qu->fill(hist_data, FPType(0), nbins * 2, event_priv);\n  auto event_main = qu->submit([&](::sycl::handler& cgh) {\n    cgh.depends_on(event_fill);\n    cgh.parallel_for<>(::sycl::nd_range<2>(::sycl::range<2>(size, n_work_groups * work_group_size),\n                                           ::sycl::range<2>(1, work_group_size)),\n                       [=](::sycl::nd_item<2> pid) {\n      const int i = pid.get_global_id(0);\n      auto group  = pid.get_group();\n\n      const size_t icol_start = n_columns * rid[i];\n      const size_t idx_gh = rid[i];\n      const FPType pgh_row[2] = {pgh[2 * idx_gh], pgh[2 * idx_gh + 1]};\n      const BinIdxType* gr_index_local = gradient_index + icol_start;\n\n      const size_t group_id = group.get_group_id()[1];\n      const size_t local_id = group.get_local_id()[1];\n      const size_t j = group_id * work_group_size + local_id;\n      if (j < n_columns) {\n        uint32_t idx_bin = static_cast<uint32_t>(gr_index_local[j]);\n        if constexpr (isDense) {\n          idx_bin += offsets[j];\n        }\n        if (idx_bin < nbins) {\n          AtomicRef<FPType> gsum(hist_data[2 * idx_bin]);\n          AtomicRef<FPType> hsum(hist_data[2 * idx_bin + 1]);\n          gsum += pgh_row[0];\n          hsum += pgh_row[1];\n        }\n      }\n    });\n  });\n  return event_main;\n}\n\ntemplate<typename FPType, typename BinIdxType>\n::sycl::event BuildHistDispatchKernel(\n                ::sycl::queue* qu,\n                const HostDeviceVector<GradientPair>& gpair,\n                const RowSetCollection::Elem& row_indices,\n                const GHistIndexMatrix& gmat,\n                GHistRow<FPType, MemoryType::on_device>* hist,\n                bool isDense,\n                GHistRow<FPType, MemoryType::on_device>* hist_buffer,\n                const DeviceProperties& device_prop,\n                ::sycl::event events_priv,\n                bool force_atomic_use) {\n  const size_t size = row_indices.Size();\n  const size_t n_columns = isDense ? gmat.nfeatures : gmat.row_stride;\n  const size_t nbins = gmat.nbins;\n  const size_t max_num_bins = gmat.max_num_bins;\n  const size_t min_num_bins = gmat.min_num_bins;\n\n  size_t max_n_blocks = hist_buffer->Size() / nbins;\n  auto dispatcher = tree::HistDispatcher<FPType>\n                       (device_prop, isDense, size, max_n_blocks, nbins,\n                        n_columns, max_num_bins, min_num_bins);\n\n  // force_atomic_use flag is used only for testing\n  bool use_atomic = dispatcher.use_atomics || force_atomic_use;\n  if (!use_atomic) {\n    if (isDense) {\n      if (dispatcher.use_local_hist) {\n        return BuildHistKernelLocal<FPType, BinIdxType>(qu, gpair, row_indices,\n                                                        gmat, hist, hist_buffer,\n                                                        dispatcher, events_priv);\n      } else {\n        return BuildHistKernel<FPType, BinIdxType, true>(qu, gpair, row_indices,\n                                                         gmat, hist, hist_buffer,\n                                                         dispatcher, events_priv);\n      }\n    } else {\n      return BuildHistKernel<FPType, uint32_t, false>(qu, gpair, row_indices,\n                                                      gmat, hist, hist_buffer,\n                                                      dispatcher, events_priv);\n    }\n  } else {\n    if (isDense) {\n      return BuildHistKernel<FPType, BinIdxType, true>(qu, gpair, row_indices,\n                                                       gmat, hist,\n                                                       dispatcher, events_priv);\n    } else {\n      return BuildHistKernel<FPType, uint32_t, false>(qu, gpair, row_indices,\n                                                      gmat, hist,\n                                                      dispatcher, events_priv);\n    }\n  }\n}\n\ntemplate<typename FPType>\n::sycl::event BuildHistKernel(::sycl::queue* qu,\n                            const HostDeviceVector<GradientPair>& gpair,\n                            const RowSetCollection::Elem& row_indices,\n                            const GHistIndexMatrix& gmat, const bool isDense,\n                            GHistRow<FPType, MemoryType::on_device>* hist,\n                            GHistRow<FPType, MemoryType::on_device>* hist_buffer,\n                            const DeviceProperties& device_prop,\n                            ::sycl::event event_priv,\n                            bool force_atomic_use) {\n  const bool is_dense = isDense;\n  switch (gmat.index.GetBinTypeSize()) {\n    case BinTypeSize::kUint8BinsTypeSize:\n      return BuildHistDispatchKernel<FPType, uint8_t>(qu, gpair, row_indices,\n                                                      gmat, hist, is_dense, hist_buffer,\n                                                      device_prop,\n                                                      event_priv, force_atomic_use);\n      break;\n    case BinTypeSize::kUint16BinsTypeSize:\n      return BuildHistDispatchKernel<FPType, uint16_t>(qu, gpair, row_indices,\n                                                       gmat, hist, is_dense, hist_buffer,\n                                                       device_prop,\n                                                       event_priv, force_atomic_use);\n      break;\n    case BinTypeSize::kUint32BinsTypeSize:\n      return BuildHistDispatchKernel<FPType, uint32_t>(qu, gpair, row_indices,\n                                                       gmat, hist, is_dense, hist_buffer,\n                                                       device_prop,\n                                                       event_priv, force_atomic_use);\n      break;\n    default:\n      CHECK(false);  // no default behavior\n  }\n}\n\ntemplate <typename GradientSumT>\n::sycl::event GHistBuilder<GradientSumT>::BuildHist(\n              const HostDeviceVector<GradientPair>& gpair,\n              const RowSetCollection::Elem& row_indices,\n              const GHistIndexMatrix &gmat,\n              GHistRowT<MemoryType::on_device>* hist,\n              bool isDense,\n              GHistRowT<MemoryType::on_device>* hist_buffer,\n              const DeviceProperties& device_prop,\n              ::sycl::event event_priv,\n              bool force_atomic_use) {\n  return BuildHistKernel<GradientSumT>(qu_, gpair, row_indices, gmat,\n                                       isDense, hist, hist_buffer,\n                                       device_prop, event_priv,\n                                       force_atomic_use);\n}\n\ntemplate\n::sycl::event GHistBuilder<float>::BuildHist(\n              const HostDeviceVector<GradientPair>& gpair,\n              const RowSetCollection::Elem& row_indices,\n              const GHistIndexMatrix& gmat,\n              GHistRow<float, MemoryType::on_device>* hist,\n              bool isDense,\n              GHistRow<float, MemoryType::on_device>* hist_buffer,\n              const DeviceProperties& device_prop,\n              ::sycl::event event_priv,\n              bool force_atomic_use);\ntemplate\n::sycl::event GHistBuilder<double>::BuildHist(\n              const HostDeviceVector<GradientPair>& gpair,\n              const RowSetCollection::Elem& row_indices,\n              const GHistIndexMatrix& gmat,\n              GHistRow<double, MemoryType::on_device>* hist,\n              bool isDense,\n              GHistRow<double, MemoryType::on_device>* hist_buffer,\n              const DeviceProperties& device_prop,\n              ::sycl::event event_priv,\n              bool force_atomic_use);\n\ntemplate<typename GradientSumT>\nvoid GHistBuilder<GradientSumT>::SubtractionTrick(GHistRowT<MemoryType::on_device>* self,\n                                                  const GHistRowT<MemoryType::on_device>& sibling,\n                                                  const GHistRowT<MemoryType::on_device>& parent) {\n  const size_t size = self->Size();\n  CHECK_EQ(sibling.Size(), size);\n  CHECK_EQ(parent.Size(), size);\n\n  SubtractionHist(qu_, self, parent, sibling, size, ::sycl::event());\n}\ntemplate\nvoid GHistBuilder<float>::SubtractionTrick(GHistRow<float, MemoryType::on_device>* self,\n                                           const GHistRow<float, MemoryType::on_device>& sibling,\n                                           const GHistRow<float, MemoryType::on_device>& parent);\ntemplate\nvoid GHistBuilder<double>::SubtractionTrick(GHistRow<double, MemoryType::on_device>* self,\n                                            const GHistRow<double, MemoryType::on_device>& sibling,\n                                            const GHistRow<double, MemoryType::on_device>& parent);\n}  // namespace common\n}  // namespace sycl\n}  // namespace xgboost\n"
  },
  {
    "path": "plugin/sycl/common/hist_util.h",
    "content": "/*!\n * Copyright 2017-2023 by Contributors\n * \\file hist_util.h\n */\n#ifndef PLUGIN_SYCL_COMMON_HIST_UTIL_H_\n#define PLUGIN_SYCL_COMMON_HIST_UTIL_H_\n\n#include <vector>\n#include <unordered_map>\n#include <memory>\n\n#include \"../data.h\"\n#include \"row_set.h\"\n\n#include \"../../src/common/hist_util.h\"\n#include \"../data/gradient_index.h\"\n#include \"../tree/hist_dispatcher.h\"\n\n#include <sycl/sycl.hpp>\n\nnamespace xgboost {\nnamespace sycl {\nnamespace common {\n\ntemplate<typename GradientSumT, MemoryType memory_type = MemoryType::shared>\nusing GHistRow = USMVector<xgboost::detail::GradientPairInternal<GradientSumT>, memory_type>;\n\nusing BinTypeSize = ::xgboost::common::BinTypeSize;\n\nclass ColumnMatrix;\n\n/*!\n * \\brief Fill histogram with zeroes\n */\ntemplate<typename GradientSumT>\nvoid InitHist(::sycl::queue* qu,\n              GHistRow<GradientSumT, MemoryType::on_device>* hist,\n              size_t size, ::sycl::event* event);\n\n/*!\n * \\brief Copy histogram from src to dst\n */\ntemplate<typename GradientSumT>\nvoid CopyHist(::sycl::queue* qu,\n              GHistRow<GradientSumT, MemoryType::on_device>* dst,\n              const GHistRow<GradientSumT, MemoryType::on_device>& src,\n              size_t size);\n\n/*!\n * \\brief Compute subtraction: dst = src1 - src2\n */\ntemplate<typename GradientSumT>\n::sycl::event SubtractionHist(::sycl::queue* qu,\n                              GHistRow<GradientSumT, MemoryType::on_device>* dst,\n                              const GHistRow<GradientSumT, MemoryType::on_device>& src1,\n                              const GHistRow<GradientSumT, MemoryType::on_device>& src2,\n                              size_t size, ::sycl::event event_priv);\n\n/*!\n * \\brief Histograms of gradient statistics for multiple nodes\n */\ntemplate<typename GradientSumT, MemoryType memory_type = MemoryType::shared>\nclass HistCollection {\n public:\n  using GHistRowT = GHistRow<GradientSumT, memory_type>;\n\n  // Access histogram for i-th node\n  GHistRowT& operator[](bst_uint nid) {\n    return *(data_.at(nid));\n  }\n\n  const GHistRowT& operator[](bst_uint nid) const {\n    return *(data_.at(nid));\n  }\n\n  // Initialize histogram collection\n  void Init(::sycl::queue* qu, uint32_t nbins) {\n    qu_ = qu;\n    if (nbins_ != nbins) {\n      nbins_ = nbins;\n      data_.clear();\n    }\n  }\n\n  // Create an empty histogram for i-th node\n  ::sycl::event AddHistRow(bst_uint nid) {\n    ::sycl::event event;\n    if (data_.count(nid) == 0) {\n      data_[nid] =\n        std::make_shared<GHistRowT>(qu_, nbins_,\n                                    xgboost::detail::GradientPairInternal<GradientSumT>(0, 0),\n                                    &event);\n    } else {\n      data_[nid]->Resize(qu_, nbins_,\n                         xgboost::detail::GradientPairInternal<GradientSumT>(0, 0),\n                         &event);\n    }\n    return event;\n  }\n\n private:\n  /*! \\brief Number of all bins over all features */\n  uint32_t nbins_ = 0;\n\n  std::unordered_map<uint32_t, std::shared_ptr<GHistRowT>> data_;\n\n  ::sycl::queue* qu_;\n};\n\n/*!\n * \\brief Stores temporary histograms to compute them in parallel\n */\ntemplate<typename GradientSumT>\nclass ParallelGHistBuilder {\n public:\n  using GHistRowT = GHistRow<GradientSumT, MemoryType::on_device>;\n\n  void Init(::sycl::queue* qu, size_t nbins) {\n    qu_ = qu;\n    if (nbins != nbins_) {\n      hist_buffer_.Init(qu_, nbins);\n      nbins_ = nbins;\n    }\n  }\n\n  void Reset(size_t nblocks) {\n    hist_device_buffer_.Resize(qu_, nblocks * nbins_);\n  }\n\n  GHistRowT& GetDeviceBuffer() {\n    return hist_device_buffer_;\n  }\n\n protected:\n  /*! \\brief Number of bins in each histogram */\n  size_t nbins_ = 0;\n  /*! \\brief Buffers for histograms for all nodes processed */\n  HistCollection<GradientSumT> hist_buffer_;\n\n  /*! \\brief Buffer for additional histograms for Parallel processing  */\n  GHistRowT hist_device_buffer_;\n\n  ::sycl::queue* qu_;\n};\n\n/*!\n * \\brief Builder for histograms of gradient statistics\n */\ntemplate<typename GradientSumT>\nclass GHistBuilder {\n public:\n  template<MemoryType memory_type = MemoryType::shared>\n  using GHistRowT = GHistRow<GradientSumT, memory_type>;\n\n  GHistBuilder() = default;\n  GHistBuilder(::sycl::queue* qu, uint32_t nbins) : qu_{qu}, nbins_{nbins} {}\n\n  // Construct a histogram via histogram aggregation\n  ::sycl::event BuildHist(const HostDeviceVector<GradientPair>& gpair,\n                          const RowSetCollection::Elem& row_indices,\n                          const GHistIndexMatrix& gmat,\n                          GHistRowT<MemoryType::on_device>* HistCollection,\n                          bool isDense,\n                          GHistRowT<MemoryType::on_device>* hist_buffer,\n                          const DeviceProperties& device_prop,\n                          ::sycl::event event,\n                          bool force_atomic_use = false);\n\n  // Construct a histogram via subtraction trick\n  void SubtractionTrick(GHistRowT<MemoryType::on_device>* self,\n                        const GHistRowT<MemoryType::on_device>& sibling,\n                        const GHistRowT<MemoryType::on_device>& parent);\n\n  uint32_t GetNumBins() const {\n      return nbins_;\n  }\n\n private:\n  /*! \\brief Number of all bins over all features */\n  uint32_t nbins_ { 0 };\n\n  ::sycl::queue* qu_;\n};\n}  // namespace common\n}  // namespace sycl\n}  // namespace xgboost\n#endif  // PLUGIN_SYCL_COMMON_HIST_UTIL_H_\n"
  },
  {
    "path": "plugin/sycl/common/host_device_vector.cc",
    "content": "/**\n * Copyright 2017-2024 by XGBoost contributors\n */\n\n#ifdef XGBOOST_USE_SYCL\n\n// implementation of HostDeviceVector with sycl support\n\n#include <memory>\n#include <utility>\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-W#pragma-messages\"\n#pragma GCC diagnostic ignored \"-Wtautological-constant-compare\"\n#include \"xgboost/host_device_vector.h\"\n#pragma GCC diagnostic pop\n\n#include \"../device_manager.h\"\n#include \"../data.h\"\n#include \"../predictor/node.h\"\n\nnamespace xgboost {\ntemplate <typename T>\nclass HostDeviceVectorImpl {\n  using DeviceStorage = sycl::USMVector<T, sycl::MemoryType::on_device>;\n\n public:\n  explicit HostDeviceVectorImpl(size_t size, T v, DeviceOrd device) : device_(device) {\n    if (device.IsSycl()) {\n      device_access_ = GPUAccess::kWrite;\n      SetDevice();\n      data_d_->Resize(qu_, size, v);\n    } else {\n      data_h_.resize(size, v);\n    }\n  }\n\n  template <class Initializer>\n  HostDeviceVectorImpl(const Initializer& init, DeviceOrd device) : device_(device) {\n    if (device.IsSycl()) {\n      device_access_ = GPUAccess::kWrite;\n\n      ResizeDevice(init.size());\n      Copy(init);\n    } else {\n      data_h_ = init;\n    }\n  }\n\n  HostDeviceVectorImpl(HostDeviceVectorImpl<T>&& that) : device_{that.device_},\n                                                         data_h_{std::move(that.data_h_)},\n                                                         data_d_{std::move(that.data_d_)},\n                                                         device_access_{that.device_access_} {}\n\n  std::vector<T>& HostVector() {\n    SyncHost(GPUAccess::kNone);\n    return data_h_;\n  }\n\n  const std::vector<T>& ConstHostVector() {\n    SyncHost(GPUAccess::kRead);\n    return data_h_;\n  }\n\n  void SetDevice(DeviceOrd device) {\n    if (device_ == device) { return; }\n    if (device_.IsSycl()) {\n      SyncHost(GPUAccess::kNone);\n    }\n\n    if (device_.IsSycl() && device.IsSycl()) {\n      CHECK_EQ(device_, device)\n          << \"New device is different from previous one.\";\n    }\n    device_ = device;\n    if (device_.IsSycl()) {\n      ResizeDevice(data_h_.size());\n    }\n  }\n\n  template <typename... U>\n  void Resize(size_t new_size, U&&... args) {\n    if (new_size == Size()) {\n      return;\n    }\n    if ((Size() == 0 && device_.IsSycl()) || (DeviceCanWrite() && device_.IsSycl())) {\n      // fast on-device resize\n      device_access_ = GPUAccess::kWrite;\n      SetDevice();\n      auto old_size = data_d_->Size();\n      data_d_->Resize(qu_, new_size, std::forward<U>(args)...);\n    } else {\n      // resize on host\n      SyncHost(GPUAccess::kNone);\n      auto old_size = data_h_.size();\n      data_h_.resize(new_size, std::forward<U>(args)...);\n    }\n  }\n\n  void SyncHost(GPUAccess access) {\n    if (HostCanAccess(access)) { return; }\n    if (HostCanRead()) {\n      // data is present, just need to deny access to the device\n      device_access_ = access;\n      return;\n    }\n    device_access_ = access;\n    if (data_h_.size() != data_d_->Size()) { data_h_.resize(data_d_->Size()); }\n    SetDevice();\n    qu_->memcpy(data_h_.data(), data_d_->Data(), data_d_->Size() * sizeof(T)).wait();\n  }\n\n  void SyncDevice(GPUAccess access) {\n    if (DeviceCanAccess(access)) { return; }\n    if (DeviceCanRead()) {\n      device_access_ = access;\n      return;\n    }\n    // data is on the host\n    ResizeDevice(data_h_.size());\n    SetDevice();\n    qu_->memcpy(data_d_->Data(), data_h_.data(), data_d_->Size() * sizeof(T)).wait();\n    device_access_ = access;\n  }\n\n  bool HostCanAccess(GPUAccess access) const { return device_access_ <= access; }\n  bool HostCanRead() const { return HostCanAccess(GPUAccess::kRead); }\n  bool HostCanWrite() const { return HostCanAccess(GPUAccess::kNone); }\n  bool DeviceCanAccess(GPUAccess access) const { return device_access_ >= access; }\n  bool DeviceCanRead() const { return DeviceCanAccess(GPUAccess::kRead); }\n  bool DeviceCanWrite() const { return DeviceCanAccess(GPUAccess::kWrite); }\n  GPUAccess Access() const { return device_access_; }\n\n  size_t Size() const {\n    return HostCanRead() ? data_h_.size() : data_d_ ? data_d_->Size() : 0;\n  }\n\n  DeviceOrd Device() const { return device_; }\n\n  T* DevicePointer() {\n    SyncDevice(GPUAccess::kWrite);\n    return data_d_->Data();\n  }\n\n  const T* ConstDevicePointer() {\n    SyncDevice(GPUAccess::kRead);\n    return data_d_->DataConst();\n  }\n\n  common::Span<T> DeviceSpan() {\n    SyncDevice(GPUAccess::kWrite);\n    return {this->DevicePointer(), Size()};\n  }\n\n  common::Span<const T> ConstDeviceSpan() {\n    SyncDevice(GPUAccess::kRead);\n    return {this->ConstDevicePointer(), Size()};\n  }\n\n  void Fill(T v) {\n    if (HostCanWrite()) {\n      std::fill(data_h_.begin(), data_h_.end(), v);\n    } else {\n      device_access_ = GPUAccess::kWrite;\n      SetDevice();\n      qu_->fill(data_d_->Data(), v, data_d_->Size()).wait();\n    }\n  }\n\n  void Copy(HostDeviceVectorImpl<T>* other) {\n    CHECK_EQ(Size(), other->Size());\n    SetDevice(other->device_);\n    // Data is on host.\n    if (HostCanWrite() && other->HostCanWrite()) {\n      std::copy(other->data_h_.begin(), other->data_h_.end(), data_h_.begin());\n      return;\n    }\n    SetDevice();\n    CopyToDevice(other);\n  }\n\n  void Copy(const std::vector<T>& other) {\n    CHECK_EQ(Size(), other.size());\n    if (HostCanWrite()) {\n      std::copy(other.begin(), other.end(), data_h_.begin());\n    } else {\n      CopyToDevice(other.data());\n    }\n  }\n\n  void Copy(std::initializer_list<T> other) {\n    CHECK_EQ(Size(), other.size());\n    if (HostCanWrite()) {\n      std::copy(other.begin(), other.end(), data_h_.begin());\n    } else {\n      CopyToDevice(other.begin());\n    }\n  }\n\n  void Extend(HostDeviceVectorImpl* other) {\n    auto ori_size = this->Size();\n    this->Resize(ori_size + other->Size(), T{});\n    if (HostCanWrite() && other->HostCanRead()) {\n      auto& h_vec = this->HostVector();\n      auto& other_vec = other->HostVector();\n      CHECK_EQ(h_vec.size(), ori_size + other->Size());\n      std::copy(other_vec.cbegin(), other_vec.cend(), h_vec.begin() + ori_size);\n    } else {\n      auto ptr = other->ConstDevicePointer();\n      SetDevice();\n      CHECK_EQ(this->Device(), other->Device());\n      qu_->memcpy(this->DevicePointer() + ori_size, ptr, other->Size() * sizeof(T)).wait();\n    }\n  }\n\n private:\n  void ResizeDevice(size_t new_size) {\n    if (data_d_ && new_size == data_d_->Size()) { return; }\n    SetDevice();\n    data_d_->Resize(qu_, new_size);\n  }\n\n  void SetDevice() {\n    if (!qu_) {\n      qu_ = device_manager_.GetQueue(device_);\n    }\n    if (!data_d_) {\n      data_d_.reset(new DeviceStorage());\n    }\n  }\n\n  void CopyToDevice(HostDeviceVectorImpl* other) {\n    if (other->HostCanWrite()) {\n      CopyToDevice(other->data_h_.data());\n    } else {\n      ResizeDevice(Size());\n      device_access_ = GPUAccess::kWrite;\n      SetDevice();\n      qu_->memcpy(data_d_->Data(), other->data_d_->Data(), data_d_->Size() * sizeof(T)).wait();\n    }\n  }\n\n  void CopyToDevice(const T* begin) {\n    data_d_->ResizeNoCopy(qu_, Size());\n    qu_->memcpy(data_d_->Data(), begin, data_d_->Size() * sizeof(T)).wait();\n    device_access_ = GPUAccess::kWrite;\n  }\n\n  sycl::DeviceManager device_manager_;\n  ::sycl::queue* qu_ = nullptr;\n  DeviceOrd device_{DeviceOrd::CPU()};\n  std::vector<T> data_h_{};\n  std::unique_ptr<DeviceStorage> data_d_{};\n  GPUAccess device_access_{GPUAccess::kNone};\n};\n\ntemplate <typename T>\nHostDeviceVector<T>::HostDeviceVector(size_t size, T v, DeviceOrd device)\n  : impl_(nullptr) {\n  impl_ = new HostDeviceVectorImpl<T>(size, v, device);\n}\n\ntemplate <typename T>\nHostDeviceVector<T>::HostDeviceVector(std::initializer_list<T> init, DeviceOrd device)\n  : impl_(nullptr) {\n  impl_ = new HostDeviceVectorImpl<T>(init, device);\n}\n\ntemplate <typename T>\nHostDeviceVector<T>::HostDeviceVector(const std::vector<T>& init, DeviceOrd device)\n  : impl_(nullptr) {\n  impl_ = new HostDeviceVectorImpl<T>(init, device);\n}\n\ntemplate <typename T>\nHostDeviceVector<T>::HostDeviceVector(HostDeviceVector<T>&& that) {\n  impl_ = new HostDeviceVectorImpl<T>(std::move(*that.impl_));\n}\n\ntemplate <typename T>\nHostDeviceVector<T>& HostDeviceVector<T>::operator=(HostDeviceVector<T>&& that) {\n  if (this == &that) { return *this; }\n\n  std::unique_ptr<HostDeviceVectorImpl<T>> new_impl(\n      new HostDeviceVectorImpl<T>(std::move(*that.impl_)));\n  delete impl_;\n  impl_ = new_impl.release();\n  return *this;\n}\n\ntemplate <typename T>\nHostDeviceVector<T>::~HostDeviceVector() {\n  delete impl_;\n  impl_ = nullptr;\n}\n\ntemplate <typename T>\nsize_t HostDeviceVector<T>::Size() const { return impl_->Size(); }\n\ntemplate <typename T>\nDeviceOrd HostDeviceVector<T>::Device() const {\n  return impl_->Device();\n}\n\ntemplate <typename T>\nT* HostDeviceVector<T>::DevicePointer() {\n  return impl_->DevicePointer();\n}\n\ntemplate <typename T>\nconst T* HostDeviceVector<T>::ConstDevicePointer() const {\n  return impl_->ConstDevicePointer();\n}\n\ntemplate <typename T>\ncommon::Span<T> HostDeviceVector<T>::DeviceSpan() {\n  return impl_->DeviceSpan();\n}\n\ntemplate <typename T>\ncommon::Span<const T> HostDeviceVector<T>::ConstDeviceSpan() const {\n  return impl_->ConstDeviceSpan();\n}\n\ntemplate <typename T>\nstd::vector<T>& HostDeviceVector<T>::HostVector() { return impl_->HostVector(); }\n\ntemplate <typename T>\nconst std::vector<T>& HostDeviceVector<T>::ConstHostVector() const {\n  return impl_->ConstHostVector();\n}\n\ntemplate <typename T>\nvoid HostDeviceVector<T>::Resize(size_t new_size, T v) {\n  impl_->Resize(new_size, v);\n}\n\ntemplate <typename T>\nvoid HostDeviceVector<T>::Resize(size_t new_size) {\n  impl_->Resize(new_size);\n}\n\ntemplate <typename T>\nvoid HostDeviceVector<T>::Fill(T v) {\n  impl_->Fill(v);\n}\n\ntemplate <typename T>\nvoid HostDeviceVector<T>::Copy(const HostDeviceVector<T>& other) {\n  impl_->Copy(other.impl_);\n}\n\ntemplate <typename T>\nvoid HostDeviceVector<T>::Copy(const std::vector<T>& other) {\n  impl_->Copy(other);\n}\n\ntemplate <typename T>\nvoid HostDeviceVector<T>::Copy(std::initializer_list<T> other) {\n  impl_->Copy(other);\n}\n\ntemplate <typename T>\nvoid HostDeviceVector<T>::Extend(HostDeviceVector const& other) {\n  impl_->Extend(other.impl_);\n}\n\ntemplate <typename T>\nbool HostDeviceVector<T>::HostCanRead() const {\n  return impl_->HostCanRead();\n}\n\ntemplate <typename T>\nbool HostDeviceVector<T>::HostCanWrite() const {\n  return impl_->HostCanWrite();\n}\n\ntemplate <typename T>\nbool HostDeviceVector<T>::DeviceCanRead() const {\n  return impl_->DeviceCanRead();\n}\n\ntemplate <typename T>\nbool HostDeviceVector<T>::DeviceCanWrite() const {\n  return impl_->DeviceCanWrite();\n}\n\ntemplate <typename T>\nGPUAccess HostDeviceVector<T>::DeviceAccess() const {\n  return impl_->Access();\n}\n\ntemplate <typename T>\nvoid HostDeviceVector<T>::SetDevice(DeviceOrd device) const {\n  impl_->SetDevice(device);\n}\n\n// explicit instantiations are required, as HostDeviceVector isn't header-only\ntemplate class HostDeviceVector<bst_float>;\ntemplate class HostDeviceVector<double>;\ntemplate class HostDeviceVector<GradientPair>;\ntemplate class HostDeviceVector<GradientPairPrecise>;\ntemplate class HostDeviceVector<std::int32_t>;   // bst_node_t\ntemplate class HostDeviceVector<std::uint8_t>;\ntemplate class HostDeviceVector<std::int8_t>;\ntemplate class HostDeviceVector<FeatureType>;\ntemplate class HostDeviceVector<Entry>;\ntemplate class HostDeviceVector<bst_idx_t>;\ntemplate class HostDeviceVector<std::uint32_t>;  // bst_feature_t\ntemplate class HostDeviceVector<RegTree::Node>;\ntemplate class HostDeviceVector<sycl::predictor::Node>;\ntemplate class HostDeviceVector<RegTree::CategoricalSplitMatrix::Segment>;\ntemplate class HostDeviceVector<RTreeNodeStat>;\n\n}  // namespace xgboost\n\n#endif  // XGBOOST_USE_SYCL\n"
  },
  {
    "path": "plugin/sycl/common/linalg_op.cc",
    "content": "/**\n * Copyright 2021-2025, XGBoost Contributors\n * \\file linalg_op.h\n */\n\n#include \"../data.h\"\n#include \"../device_manager.h\"\n\n#include \"../../../src/common/optional_weight.h\"  // for OptionalWeights\n#include \"xgboost/context.h\"  // for Context\n\n#include <sycl/sycl.hpp>\n\nnamespace xgboost::sycl::linalg {\nvoid SmallHistogram(Context const* ctx, xgboost::linalg::MatrixView<float const> indices,\n                    xgboost::common::OptionalWeights const& weights,\n                    xgboost::linalg::VectorView<float> bins) {\n  sycl::DeviceManager device_manager;\n  auto* qu = device_manager.GetQueue(ctx->Device());\n\n  qu->submit([&](::sycl::handler& cgh) {\n    cgh.parallel_for<>(::sycl::range<1>(indices.Size()),\n                       [=](::sycl::id<1> pid) {\n      const size_t i = pid[0];\n      auto y = indices(i);\n      auto w = weights[i];\n      AtomicRef<float> bin_val(const_cast<float&>(bins(static_cast<std::size_t>(y))));\n      bin_val += w;\n    });\n  }).wait();\n}\n}  // namespace xgboost::sycl::linalg\n"
  },
  {
    "path": "plugin/sycl/common/linalg_op.h",
    "content": "/**\n * Copyright 2021-2024, XGBoost Contributors\n * \\file linalg_op.h\n */\n#ifndef PLUGIN_SYCL_COMMON_LINALG_OP_H_\n#define PLUGIN_SYCL_COMMON_LINALG_OP_H_\n\n#include <vector>\n#include <utility>\n\n#include \"../data.h\"\n#include \"../device_manager.h\"\n\n#include <sycl/sycl.hpp>\n\nnamespace xgboost {\nnamespace sycl {\nnamespace linalg {\n\ntemplate<typename T, std::int32_t D>\nusing TensorView = xgboost::linalg::TensorView<T, D>;\n\nstruct WorkGroupsParams {\n  size_t n_workgroups;\n  size_t workgroup_size;\n};\n\ntemplate <typename Fn>\n::sycl::event GroupWiseKernel(::sycl::queue* qu, int* flag_ptr,\n                              const std::vector<::sycl::event>& events,\n                              const WorkGroupsParams& wg, Fn &&fn) {\n  ::sycl::buffer<int, 1> flag_buf(flag_ptr, 1);\n  auto event = qu->submit([&](::sycl::handler& cgh) {\n    cgh.depends_on(events);\n    auto flag  = flag_buf.get_access<::sycl::access::mode::write>(cgh);\n    cgh.parallel_for_work_group<>(::sycl::range<1>(wg.n_workgroups),\n                                  ::sycl::range<1>(wg.workgroup_size),\n                                  [=](::sycl::group<1> group) {\n      group.parallel_for_work_item([&](::sycl::h_item<1> item) {\n        const size_t idx = item.get_global_id()[0];\n        fn(idx, flag);\n      });\n    });\n  });\n  return event;\n}\n\ntemplate<typename Fn, typename TupleType, size_t ... I>\nauto call(Fn&& fn, TupleType t, std::index_sequence<I ...>) {\n     return fn(std::get<I>(t) ...);\n}\n\ntemplate<typename Fn, typename TupleType>\nauto call(Fn&& fn, TupleType t) {\n    static constexpr auto size = std::tuple_size<TupleType>::value;\n    return call(fn, t, std::make_index_sequence<size>{});\n}\n\ntemplate <typename T, int32_t D, typename Fn>\nvoid ElementWiseKernel(TensorView<T, D> t, Fn&& fn) {\n  sycl::DeviceManager device_manager;\n  auto* qu = device_manager.GetQueue(t.Device());\n  qu->submit([&](::sycl::handler& cgh) {\n    cgh.parallel_for<>(::sycl::range<1>(t.Size()),\n                       [=](::sycl::id<1> pid) {\n      const size_t idx = pid[0];\n      call(const_cast<Fn&&>(fn), xgboost::linalg::UnravelIndex(idx, t.Shape()));\n    });\n  }).wait_and_throw();\n}\n\ntemplate <typename T, int32_t D, typename Fn>\nbool Validate(DeviceOrd device, TensorView<T, D> t, Fn&& fn) {\n  sycl::DeviceManager device_manager;\n  auto* qu = device_manager.GetQueue(t.Device());\n\n  int flag = 0;\n  {\n    ::sycl::buffer<int, 1> flag_buf(&flag, 1);\n    qu->submit([&](::sycl::handler& cgh) {\n      auto flag_acc  = flag_buf.get_access<::sycl::access::mode::write>(cgh);\n      cgh.parallel_for<>(::sycl::range<1>(t.Size()),\n                         [=](::sycl::id<1> pid) {\n        const size_t idx = pid[0];\n        const T& value = call(t, xgboost::linalg::UnravelIndex(idx, t.Shape()));\n        bool is_valid = const_cast<Fn&&>(fn)(value);\n        if (!is_valid) {\n          AtomicRef<int> flag_ref(flag_acc[0]);\n          flag_ref = 1;\n        }\n      });\n    });\n  }\n  qu->wait_and_throw();\n  return (flag == 0);\n}\n\n}  // namespace linalg\n}  // namespace sycl\n}  // namespace xgboost\n#endif  // PLUGIN_SYCL_COMMON_LINALG_OP_H_\n"
  },
  {
    "path": "plugin/sycl/common/optional_weight.cc",
    "content": "/*!\n * Copyright by Contributors 2017-2025\n */\n#include <sycl/sycl.hpp>\n\n#include \"../../../src/common/optional_weight.h\"\n\n#include \"../device_manager.h\"\n\nnamespace xgboost::common::sycl_impl {\ndouble SumOptionalWeights(Context const* ctx, OptionalWeights const& weights) {\n  sycl::DeviceManager device_manager;\n  auto* qu = device_manager.GetQueue(ctx->Device());\n\n  const auto* data = weights.Data();\n  double result = 0;\n  {\n    ::sycl::buffer<double> buff(&result, 1);\n    qu->submit([&](::sycl::handler& cgh) {\n      auto reduction = ::sycl::reduction(buff, cgh, ::sycl::plus<>());\n      cgh.parallel_for<>(::sycl::range<1>(weights.Size()), reduction,\n                        [=](::sycl::id<1> pid, auto& sum) {\n        size_t i = pid[0];\n        sum += data[i];\n      });\n    }).wait_and_throw();\n  }\n\n  return result;\n}\n}  // namespace xgboost::common::sycl_impl\n"
  },
  {
    "path": "plugin/sycl/common/partition_builder.h",
    "content": "/*!\n * Copyright 2017-2024 XGBoost contributors\n */\n#ifndef PLUGIN_SYCL_COMMON_PARTITION_BUILDER_H_\n#define PLUGIN_SYCL_COMMON_PARTITION_BUILDER_H_\n\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wtautological-constant-compare\"\n#pragma GCC diagnostic ignored \"-W#pragma-messages\"\n#include <xgboost/data.h>\n#pragma GCC diagnostic pop\n#include <xgboost/tree_model.h>\n\n#include <algorithm>\n#include <vector>\n#include <utility>\n\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wtautological-constant-compare\"\n#include \"../../../src/common/column_matrix.h\"\n#pragma GCC diagnostic pop\n\n#include \"../data.h\"\n#include \"row_set.h\"\n#include \"../data/gradient_index.h\"\n#include \"../tree/expand_entry.h\"\n\n#include <sycl/sycl.hpp>\n\nnamespace xgboost {\nnamespace sycl {\nnamespace common {\n\n// split row indexes (rid_span) to 2 parts (both stored in rid_buf) depending\n// on comparison of indexes values (idx_span) and split point (split_cond)\n// Handle dense columns\ntemplate <bool default_left, typename BinIdxType>\ninline ::sycl::event PartitionDenseKernel(\n                                 ::sycl::queue* qu,\n                                 const GHistIndexMatrix& gmat,\n                                 const RowSetCollection::Elem& rid_span,\n                                 const size_t fid,\n                                 const int32_t split_cond,\n                                 xgboost::common::Span<size_t>* rid_buf,\n                                 size_t* parts_size,\n                                 ::sycl::event event) {\n  const size_t row_stride = gmat.row_stride;\n  const BinIdxType* gradient_index = gmat.index.data<BinIdxType>();\n  const size_t* rid = rid_span.begin;\n  const size_t range_size = rid_span.Size();\n  const size_t offset = gmat.cut.Ptrs()[fid];\n\n  size_t* p_rid_buf = rid_buf->data();\n\n  return qu->submit([&](::sycl::handler& cgh) {\n    cgh.depends_on(event);\n    cgh.parallel_for<>(::sycl::range<1>(range_size), [=](::sycl::item<1> nid) {\n      const size_t id = rid[nid.get_id(0)];\n      const int32_t value = static_cast<int32_t>(gradient_index[id * row_stride + fid] + offset);\n      const bool is_left = value <= split_cond;\n      if (is_left) {\n        AtomicRef<size_t> n_left(parts_size[0]);\n        p_rid_buf[n_left.fetch_add(1)] = id;\n      } else {\n        AtomicRef<size_t> n_right(parts_size[1]);\n        p_rid_buf[range_size - n_right.fetch_add(1) - 1] = id;\n      }\n    });\n  });\n}\n\n// split row indexes (rid_span) to 2 parts (both stored in rid_buf) depending\n// on comparison of indexes values (idx_span) and split point (split_cond)\n// Handle sparce columns\ntemplate <bool default_left, typename BinIdxType>\ninline ::sycl::event PartitionSparseKernel(::sycl::queue* qu,\n                                  const GHistIndexMatrix& gmat,\n                                  const RowSetCollection::Elem& rid_span,\n                                  const size_t fid,\n                                  const int32_t split_cond,\n                                  xgboost::common::Span<size_t>* rid_buf,\n                                  size_t* parts_size,\n                                  ::sycl::event event) {\n  const size_t row_stride = gmat.row_stride;\n  const BinIdxType* gradient_index = gmat.index.data<BinIdxType>();\n  const size_t* rid = rid_span.begin;\n  const size_t range_size = rid_span.Size();\n  const uint32_t* cut_ptrs = gmat.cut.cut_ptrs_.ConstDevicePointer();\n\n  size_t* p_rid_buf = rid_buf->data();\n  return qu->submit([&](::sycl::handler& cgh) {\n    cgh.depends_on(event);\n    cgh.parallel_for<>(::sycl::range<1>(range_size), [=](::sycl::item<1> nid) {\n      const size_t id = rid[nid.get_id(0)];\n\n      const BinIdxType* gr_index_local = gradient_index + row_stride * id;\n      const int32_t fid_local = std::lower_bound(gr_index_local,\n                                                 gr_index_local + row_stride,\n                                                 cut_ptrs[fid]) - gr_index_local;\n      const bool is_left = (fid_local >= row_stride ||\n                            gr_index_local[fid_local] >= cut_ptrs[fid + 1]) ?\n                              default_left :\n                              gr_index_local[fid_local] <= split_cond;\n      if (is_left) {\n        AtomicRef<size_t> n_left(parts_size[0]);\n        p_rid_buf[n_left.fetch_add(1)] = id;\n      } else {\n        AtomicRef<size_t> n_right(parts_size[1]);\n        p_rid_buf[range_size - n_right.fetch_add(1) - 1] = id;\n      }\n    });\n  });\n}\n\n// The builder is required for samples partition to left and rights children for set of nodes\nclass PartitionBuilder {\n public:\n  template<typename Func>\n  void Init(::sycl::queue* qu, size_t n_nodes, Func funcNTaks) {\n    qu_ = qu;\n    nodes_offsets_.resize(n_nodes+1);\n    result_rows_.resize(2 * n_nodes);\n    n_nodes_ = n_nodes;\n\n\n    nodes_offsets_[0] = 0;\n    for (size_t i = 1; i < n_nodes+1; ++i) {\n      nodes_offsets_[i] = nodes_offsets_[i-1] + funcNTaks(i-1);\n    }\n\n    if (data_.Size() < nodes_offsets_[n_nodes]) {\n      data_.Resize(qu, nodes_offsets_[n_nodes]);\n    }\n  }\n\n  size_t GetNLeftElems(int nid) const {\n    return result_rows_[2 * nid];\n  }\n\n  size_t GetNRightElems(int nid) const {\n    return result_rows_[2 * nid + 1];\n  }\n\n  // For test purposes only\n  void SetNLeftElems(int nid, size_t val) {\n    result_rows_[2 * nid] = val;\n  }\n\n  // For test purposes only\n  void SetNRightElems(int nid, size_t val) {\n    result_rows_[2 * nid + 1] = val;\n  }\n\n  xgboost::common::Span<size_t> GetData(int nid) {\n    return { data_.Data() + nodes_offsets_[nid], nodes_offsets_[nid + 1] - nodes_offsets_[nid] };\n  }\n\n  template <typename BinIdxType>\n  ::sycl::event Partition(const int32_t split_cond,\n                        const GHistIndexMatrix& gmat,\n                        const RowSetCollection::Elem& rid_span,\n                        const xgboost::RegTree::Node& node,\n                        xgboost::common::Span<size_t>* rid_buf,\n                        size_t* parts_size,\n                        ::sycl::event event) {\n    const bst_uint fid = node.SplitIndex();\n    const bool default_left = node.DefaultLeft();\n\n    if (gmat.IsDense()) {\n      if (default_left) {\n        return PartitionDenseKernel<true, BinIdxType>(qu_, gmat, rid_span, fid,\n                                                      split_cond, rid_buf, parts_size, event);\n      } else {\n        return PartitionDenseKernel<false, BinIdxType>(qu_, gmat, rid_span, fid,\n                                                      split_cond, rid_buf, parts_size, event);\n      }\n    } else {\n      if (default_left) {\n        return PartitionSparseKernel<true, BinIdxType>(qu_, gmat, rid_span, fid,\n                                                      split_cond, rid_buf, parts_size, event);\n      } else {\n        return PartitionSparseKernel<false, BinIdxType>(qu_, gmat, rid_span, fid,\n                                                        split_cond, rid_buf, parts_size, event);\n      }\n    }\n  }\n\n  // Entry point for Partition\n  void Partition(const GHistIndexMatrix& gmat,\n                 const std::vector<tree::ExpandEntry> nodes,\n                 const RowSetCollection& row_set_collection,\n                 const std::vector<int32_t>& split_conditions,\n                 RegTree* p_tree,\n                 ::sycl::event* general_event) {\n    nodes_events_.resize(n_nodes_);\n\n    parts_size_.ResizeAndFill(qu_, 2 * n_nodes_, 0, general_event);\n\n    for (size_t node_in_set = 0; node_in_set < n_nodes_; node_in_set++) {\n      const int32_t nid = nodes[node_in_set].nid;\n      ::sycl::event& node_event = nodes_events_[node_in_set];\n      const auto& rid_span = row_set_collection[nid];\n      if (rid_span.Size() > 0) {\n        const RegTree::Node& node = (*p_tree)[nid];\n        xgboost::common::Span<size_t> rid_buf = GetData(node_in_set);\n        size_t* part_size = parts_size_.Data() + 2 * node_in_set;\n        int32_t split_condition = split_conditions[node_in_set];\n        switch (gmat.index.GetBinTypeSize()) {\n          case common::BinTypeSize::kUint8BinsTypeSize:\n            node_event = Partition<uint8_t>(split_condition, gmat, rid_span, node,\n                                            &rid_buf, part_size, *general_event);\n            break;\n          case common::BinTypeSize::kUint16BinsTypeSize:\n            node_event = Partition<uint16_t>(split_condition, gmat, rid_span, node,\n                                            &rid_buf, part_size, *general_event);\n            break;\n          case common::BinTypeSize::kUint32BinsTypeSize:\n            node_event = Partition<uint32_t>(split_condition, gmat, rid_span, node,\n                                            &rid_buf, part_size, *general_event);\n            break;\n          default:\n            CHECK(false);  // no default behavior\n        }\n      } else {\n        node_event = ::sycl::event();\n      }\n    }\n\n    *general_event = qu_->memcpy(result_rows_.data(),\n                                 parts_size_.DataConst(),\n                                 sizeof(size_t) * 2 * n_nodes_,\n                                 nodes_events_);\n  }\n\n  void MergeToArray(size_t nid,\n                    size_t* data_result,\n                    ::sycl::event* event) {\n    size_t n_nodes_total = GetNLeftElems(nid) + GetNRightElems(nid);\n    if (n_nodes_total > 0) {\n      const size_t* data = data_.Data() + nodes_offsets_[nid];\n      qu_->memcpy(data_result, data, sizeof(size_t) * n_nodes_total, *event);\n    }\n  }\n\n protected:\n  std::vector<size_t> nodes_offsets_;\n  std::vector<size_t> result_rows_;\n  std::vector<::sycl::event> nodes_events_;\n  size_t n_nodes_;\n\n  USMVector<size_t, MemoryType::on_device> parts_size_;\n  USMVector<size_t, MemoryType::on_device> data_;\n\n  ::sycl::queue* qu_;\n};\n\n}  // namespace common\n}  // namespace sycl\n}  // namespace xgboost\n\n\n#endif  // PLUGIN_SYCL_COMMON_PARTITION_BUILDER_H_\n"
  },
  {
    "path": "plugin/sycl/common/row_set.h",
    "content": "/*!\n * Copyright 2017-2023 XGBoost contributors\n */\n#ifndef PLUGIN_SYCL_COMMON_ROW_SET_H_\n#define PLUGIN_SYCL_COMMON_ROW_SET_H_\n\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wtautological-constant-compare\"\n#pragma GCC diagnostic ignored \"-W#pragma-messages\"\n#include <xgboost/data.h>\n#pragma GCC diagnostic pop\n#include <algorithm>\n#include <vector>\n#include <utility>\n\n#include \"../data.h\"\n\n#include <sycl/sycl.hpp>\n\nnamespace xgboost {\nnamespace sycl {\nnamespace common {\n\n\n/*! \\brief Collection of rowsets stored on device in USM memory */\nclass RowSetCollection {\n public:\n  /*! \\brief data structure to store an instance set, a subset of\n   *  rows (instances) associated with a particular node in a decision\n   *  tree. */\n  struct Elem {\n    const size_t* begin{nullptr};\n    const size_t* end{nullptr};\n    bst_node_t node_id{-1};  // id of node associated with this instance set; -1 means uninitialized\n    Elem()\n         = default;\n    Elem(const size_t* begin,\n         const size_t* end,\n         bst_node_t node_id = -1)\n        : begin(begin), end(end), node_id(node_id) {}\n\n\n    inline size_t Size() const {\n      return end - begin;\n    }\n  };\n\n  inline size_t Size() const {\n    return elem_of_each_node_.size();\n  }\n\n  /*! \\brief return corresponding element set given the node_id */\n  inline const Elem& operator[](unsigned node_id) const {\n    const Elem& e = elem_of_each_node_[node_id];\n    CHECK(e.begin != nullptr)\n        << \"access element that is not in the set\";\n    return e;\n  }\n\n  /*! \\brief return corresponding element set given the node_id */\n  inline Elem& operator[](unsigned node_id) {\n    Elem& e = elem_of_each_node_[node_id];\n    return e;\n  }\n\n  // clear up things\n  inline void Clear() {\n    elem_of_each_node_.clear();\n  }\n  // initialize node id 0->everything\n  inline void Init() {\n    CHECK_EQ(elem_of_each_node_.size(), 0U);\n\n    const size_t* begin = row_indices_.Begin();\n    const size_t* end = row_indices_.End();\n    elem_of_each_node_.emplace_back(Elem(begin, end, 0));\n  }\n\n  auto& Data() { return row_indices_; }\n\n  // split rowset into two\n  inline void AddSplit(unsigned node_id,\n                       unsigned left_node_id,\n                       unsigned right_node_id,\n                       size_t n_left,\n                       size_t n_right) {\n    const Elem e = elem_of_each_node_[node_id];\n    CHECK(e.begin != nullptr);\n    size_t* all_begin = row_indices_.Begin();\n    size_t* begin = all_begin + (e.begin - all_begin);\n\n\n    CHECK_EQ(n_left + n_right, e.Size());\n    CHECK_LE(begin + n_left, e.end);\n    CHECK_EQ(begin + n_left + n_right, e.end);\n\n\n    if (left_node_id >= elem_of_each_node_.size()) {\n      elem_of_each_node_.resize(left_node_id + 1, Elem(nullptr, nullptr, -1));\n    }\n    if (right_node_id >= elem_of_each_node_.size()) {\n      elem_of_each_node_.resize(right_node_id + 1, Elem(nullptr, nullptr, -1));\n    }\n\n\n    elem_of_each_node_[left_node_id] = Elem(begin, begin + n_left, left_node_id);\n    elem_of_each_node_[right_node_id] = Elem(begin + n_left, e.end, right_node_id);\n    elem_of_each_node_[node_id] = Elem(nullptr, nullptr, -1);\n  }\n\n private:\n  // stores the row indexes in the set\n  USMVector<size_t, MemoryType::on_device> row_indices_;\n  // vector: node_id -> elements\n  std::vector<Elem> elem_of_each_node_;\n};\n\n}  // namespace common\n}  // namespace sycl\n}  // namespace xgboost\n\n\n#endif  // PLUGIN_SYCL_COMMON_ROW_SET_H_\n"
  },
  {
    "path": "plugin/sycl/common/stats.cc",
    "content": "/*!\n * Copyright by Contributors 2017-2026\n */\n\n#include \"../../../src/common/stats.h\"\n\n#include <sycl/sycl.hpp>\n\n#include \"../device_manager.h\"\n\nnamespace xgboost::common::sycl_impl {\nvoid Mean(Context const* ctx, linalg::VectorView<float const> v, linalg::VectorView<float> out) {\n  sycl::DeviceManager device_manager;\n  auto* qu = device_manager.GetQueue(ctx->Device());\n\n  qu->submit([&](::sycl::handler& cgh) {\n      auto reduction = ::sycl::reduction(&(out(0)), 0.0f, ::sycl::plus<float>(),\n                                         ::sycl::property::reduction::initialize_to_identity());\n      cgh.parallel_for<>(::sycl::range<1>(v.Size()), reduction, [=](::sycl::id<1> pid, auto& sum) {\n        size_t i = pid[0];\n        sum += v(i);\n      });\n    }).wait_and_throw();\n}\n}  // namespace xgboost::common::sycl_impl\n"
  },
  {
    "path": "plugin/sycl/common/transform.h",
    "content": "/**\n * Copyright 2021-2024, XGBoost Contributors\n * \\file transform.h\n */\n#ifndef PLUGIN_SYCL_COMMON_TRANSFORM_H_\n#define PLUGIN_SYCL_COMMON_TRANSFORM_H_\n\n#include \"../device_manager.h\"\n\n#include <sycl/sycl.hpp>\n\nnamespace xgboost {\nnamespace sycl {\nnamespace common {\n\ntemplate <typename Functor, typename... SpanType>\nvoid LaunchSyclKernel(DeviceOrd device, Functor&& _func, xgboost::common::Range _range,\n                      SpanType... _spans) {\n  sycl::DeviceManager device_manager;\n  auto* qu = device_manager.GetQueue(device);\n\n  size_t size = *(_range.end());\n  qu->submit([&](::sycl::handler& cgh) {\n    cgh.parallel_for<>(::sycl::range<1>(size),\n                       [=](::sycl::id<1> pid) {\n      const size_t idx = pid[0];\n      const_cast<Functor&&>(_func)(idx, _spans...);\n    });\n  }).wait();\n}\n\n}  // namespace common\n}  // namespace sycl\n}  // namespace xgboost\n#endif  // PLUGIN_SYCL_COMMON_TRANSFORM_H_\n"
  },
  {
    "path": "plugin/sycl/context_helper.cc",
    "content": "/*!\n * Copyright 2017-2025 by Contributors\n * \\file context_helper.cc\n */\n\n#include <sycl/sycl.hpp>\n\n\n#include \"device_manager.h\"\n#include \"context_helper.h\"\n\nnamespace xgboost {\nnamespace sycl {\n\nDeviceOrd DeviceFP64(const DeviceOrd& device) {\n  DeviceManager device_manager;\n  bool support_fp64 = device_manager.GetQueue(device)->get_device().has(::sycl::aspect::fp64);\n  if (support_fp64) {\n    return device;\n  } else {\n    LOG(WARNING) << \"Current device doesn't support fp64\";\n    return DeviceOrd::CPU();\n  }\n}\n}  // namespace sycl\n}  // namespace xgboost\n"
  },
  {
    "path": "plugin/sycl/context_helper.h",
    "content": "/**\n * Copyright 2021-2025, XGBoost Contributors\n * \\file context_helper.h\n */\n#ifndef PLUGIN_SYCL_CONTEXT_HELPER_H_\n#define PLUGIN_SYCL_CONTEXT_HELPER_H_\n\n#include <xgboost/context.h>\n\nnamespace xgboost {\nnamespace sycl {\n\nDeviceOrd DeviceFP64(const DeviceOrd& device);\n\n}  // namespace sycl\n}  // namespace xgboost\n#endif  // PLUGIN_SYCL_CONTEXT_HELPER_H_\n"
  },
  {
    "path": "plugin/sycl/data/gradient_index.cc",
    "content": "/*!\n * Copyright 2017-2024 by Contributors\n * \\file gradient_index.cc\n */\n#include <vector>\n#include <limits>\n#include <algorithm>\n\n#include \"gradient_index.h\"\n\n#include <sycl/sycl.hpp>\n\nnamespace xgboost {\nnamespace sycl {\nnamespace common {\n\nuint32_t SearchBin(const bst_float* cut_values, const uint32_t* cut_ptrs, Entry const& e) {\n  auto beg = cut_ptrs[e.index];\n  auto end = cut_ptrs[e.index + 1];\n  auto it = std::upper_bound(cut_values + beg, cut_values + end, e.fvalue);\n  uint32_t idx = it - cut_values;\n  if (idx == end) {\n    idx -= 1;\n  }\n  return idx;\n}\n\ntemplate <typename BinIdxType>\nvoid mergeSort(BinIdxType* begin, BinIdxType* end, BinIdxType* buf) {\n  const size_t total_len = end - begin;\n  for (size_t block_len = 1; block_len < total_len; block_len <<= 1) {\n    for (size_t cur_block = 0; cur_block + block_len < total_len; cur_block += 2 * block_len) {\n      size_t start = cur_block;\n      size_t mid = start + block_len;\n      size_t finish = mid + block_len < total_len ? mid + block_len : total_len;\n      size_t left_pos = start;\n      size_t right_pos = mid;\n      size_t pos = start;\n      while (left_pos < mid || right_pos < finish) {\n        if (left_pos < mid && (right_pos == finish || begin[left_pos] < begin[right_pos])) {\n          buf[pos++] = begin[left_pos++];\n        } else {\n          buf[pos++] = begin[right_pos++];\n        }\n      }\n      for (size_t i = start; i < finish; i++) begin[i] = buf[i];\n    }\n  }\n}\n\ntemplate <typename BinIdxType, bool isDense>\nvoid GHistIndexMatrix::SetIndexData(::sycl::queue* qu,\n                                    Context const * ctx,\n                                    BinIdxType* index_data,\n                                    DMatrix *dmat) {\n  if (nbins == 0) return;\n  const bst_float* cut_values = cut.cut_values_.ConstDevicePointer();\n  const uint32_t* cut_ptrs = cut.cut_ptrs_.ConstDevicePointer();\n  size_t* hit_count_ptr = hit_count.DevicePointer();\n\n  BinIdxType* sort_data = reinterpret_cast<BinIdxType*>(sort_buff.Data());\n\n  for (auto &batch : dmat->GetBatches<SparsePage>()) {\n    batch.data.SetDevice(ctx->Device());\n    batch.offset.SetDevice(ctx->Device());\n    const xgboost::Entry *data_ptr = batch.data.ConstDevicePointer();\n    const bst_idx_t *offset_vec = batch.offset.ConstDevicePointer();\n    size_t batch_size = batch.Size();\n    if (batch_size > 0) {\n      const auto base_rowid = batch.base_rowid;\n      size_t row_stride = this->row_stride;\n      size_t nbins = this->nbins;\n      qu->submit([&](::sycl::handler& cgh) {\n        cgh.parallel_for<>(::sycl::range<1>(batch_size), [=](::sycl::item<1> pid) {\n          const size_t i = pid.get_id(0);\n          const size_t ibegin = offset_vec[i];\n          const size_t iend = offset_vec[i + 1];\n          const size_t size = iend - ibegin;\n          const size_t start = (i + base_rowid) * row_stride;\n          for (bst_uint j = 0; j < size; ++j) {\n            uint32_t idx = SearchBin(cut_values, cut_ptrs, data_ptr[ibegin + j]);\n            index_data[start + j] = isDense ? idx - cut_ptrs[j] : idx;\n            AtomicRef<size_t> hit_count_ref(hit_count_ptr[idx]);\n            hit_count_ref.fetch_add(1);\n          }\n          if constexpr (!isDense) {\n            // Sparse case only\n            mergeSort<BinIdxType>(index_data + start, index_data + start + size, sort_data + start);\n            for (bst_uint j = size; j < row_stride; ++j) {\n              index_data[start + j] = nbins;\n            }\n          }\n        });\n      });\n    }\n  }\n  qu->wait();\n}\n\nvoid GHistIndexMatrix::ResizeIndex(::sycl::queue* qu, size_t n_index) {\n  if ((max_num_bins - 1 <= static_cast<int>(std::numeric_limits<uint8_t>::max())) && isDense_) {\n    index.SetBinTypeSize(BinTypeSize::kUint8BinsTypeSize);\n    index.Resize(qu, (sizeof(uint8_t)) * n_index);\n  } else if ((max_num_bins - 1 > static_cast<int>(std::numeric_limits<uint8_t>::max())  &&\n    max_num_bins - 1 <= static_cast<int>(std::numeric_limits<uint16_t>::max())) && isDense_) {\n    index.SetBinTypeSize(BinTypeSize::kUint16BinsTypeSize);\n    index.Resize(qu, (sizeof(uint16_t)) * n_index);\n  } else {\n    index.SetBinTypeSize(BinTypeSize::kUint32BinsTypeSize);\n    index.Resize(qu, (sizeof(uint32_t)) * n_index);\n  }\n}\n\nvoid GHistIndexMatrix::Init(::sycl::queue* qu,\n                            Context const * ctx,\n                            DMatrix *dmat,\n                            int max_bins) {\n  nfeatures = dmat->Info().num_col_;\n\n  cut = xgboost::common::SketchOnDMatrix(ctx, dmat, max_bins);\n  cut.SetDevice(ctx->Device());\n\n  max_num_bins = max_bins;\n  nbins = cut.Ptrs().back();\n\n  min_num_bins = nbins;\n  const size_t n_offsets = cut.cut_ptrs_.Size() - 1;\n  for (unsigned fid = 0; fid < n_offsets; ++fid) {\n    auto ibegin = cut.cut_ptrs_.ConstHostVector()[fid];\n    auto iend = cut.cut_ptrs_.ConstHostVector()[fid + 1];\n    min_num_bins = std::min<size_t>(min_num_bins, iend - ibegin);\n  }\n\n  hit_count.SetDevice(ctx->Device());\n  hit_count.Resize(nbins, 0);\n\n  const bool isDense = dmat->IsDense();\n  this->isDense_ = isDense;\n\n  row_stride = 0;\n  size_t n_rows = 0;\n  if (!isDense) {\n    for (const auto& batch : dmat->GetBatches<SparsePage>()) {\n      const auto& row_offset = batch.offset.ConstHostVector();\n      n_rows += batch.Size();\n      for (auto i = 1ull; i < row_offset.size(); i++) {\n        row_stride = std::max(row_stride, static_cast<size_t>(row_offset[i] - row_offset[i - 1]));\n      }\n    }\n  } else {\n    row_stride = nfeatures;\n    n_rows = dmat->Info().num_row_;\n  }\n\n  const size_t n_index = n_rows * row_stride;\n  ResizeIndex(qu, n_index);\n\n  CHECK_GT(cut.cut_values_.Size(), 0U);\n\n  if (isDense) {\n    BinTypeSize curent_bin_size = index.GetBinTypeSize();\n    if (curent_bin_size == BinTypeSize::kUint8BinsTypeSize) {\n      SetIndexData<uint8_t, true>(qu, ctx, index.data<uint8_t>(), dmat);\n\n    } else if (curent_bin_size == BinTypeSize::kUint16BinsTypeSize) {\n      SetIndexData<uint16_t, true>(qu, ctx, index.data<uint16_t>(), dmat);\n    } else {\n      CHECK_EQ(curent_bin_size, BinTypeSize::kUint32BinsTypeSize);\n      SetIndexData<uint32_t, true>(qu, ctx, index.data<uint32_t>(), dmat);\n    }\n  /* For sparse DMatrix we have to store index of feature for each bin\n     in index field to chose right offset. So offset is nullptr and index is not reduced */\n  } else {\n    sort_buff.Resize(qu, n_rows * row_stride * sizeof(uint32_t));\n    SetIndexData<uint32_t, false>(qu, ctx, index.data<uint32_t>(), dmat);\n  }\n}\n\n}  // namespace common\n}  // namespace sycl\n}  // namespace xgboost\n"
  },
  {
    "path": "plugin/sycl/data/gradient_index.h",
    "content": "/*!\n * Copyright 2017-2024 by Contributors\n * \\file gradient_index.h\n */\n#ifndef PLUGIN_SYCL_DATA_GRADIENT_INDEX_H_\n#define PLUGIN_SYCL_DATA_GRADIENT_INDEX_H_\n\n#include <sycl/sycl.hpp>\n#include <vector>\n\n#include \"../../src/common/hist_util.h\"\n#include \"../data.h\"\n\nnamespace xgboost {\nnamespace sycl {\nnamespace common {\n\nusing BinTypeSize = ::xgboost::common::BinTypeSize;\n\n/*!\n * \\brief Index data and offsets stored in USM buffers to provide access from device kernels\n */\nstruct Index {\n  Index() { SetBinTypeSize(binTypeSize_); }\n  Index(const Index& i) = delete;\n  Index& operator=(Index i) = delete;\n  Index(Index&& i) = delete;\n  Index& operator=(Index&& i) = delete;\n  void SetBinTypeSize(BinTypeSize binTypeSize) {\n    binTypeSize_ = binTypeSize;\n    CHECK(binTypeSize == BinTypeSize::kUint8BinsTypeSize ||\n          binTypeSize == BinTypeSize::kUint16BinsTypeSize ||\n          binTypeSize == BinTypeSize::kUint32BinsTypeSize);\n  }\n  BinTypeSize GetBinTypeSize() const { return binTypeSize_; }\n\n  template <typename T>\n  T* data() {\n    return reinterpret_cast<T*>(data_.Data());\n  }\n\n  template <typename T>\n  const T* data() const {\n    return reinterpret_cast<const T*>(data_.DataConst());\n  }\n\n  size_t Size() const { return data_.Size() / (binTypeSize_); }\n\n  void Resize(::sycl::queue* qu, const size_t nBytesData) { data_.Resize(qu, nBytesData); }\n\n  uint8_t* begin() const { return data_.Begin(); }\n\n  uint8_t* end() const { return data_.End(); }\n\n private:\n  USMVector<uint8_t, MemoryType::on_device> data_;\n  BinTypeSize binTypeSize_{BinTypeSize::kUint8BinsTypeSize};\n};\n\n/*!\n * \\brief Preprocessed global index matrix, in CSR format, stored in USM buffers\n *\n *  Transform floating values to integer index in histogram\n */\nstruct GHistIndexMatrix {\n  /*! \\brief row pointer to rows by element position */\n  /*! \\brief The index data */\n  Index index;\n  /*! \\brief hit count of each index */\n  HostDeviceVector<size_t> hit_count;\n\n  USMVector<uint8_t, MemoryType::on_device> sort_buff;\n  /*! \\brief The corresponding cuts */\n  xgboost::common::HistogramCuts cut{0};\n  size_t max_num_bins;\n  size_t min_num_bins;\n  size_t nbins;\n  size_t nfeatures;\n  size_t row_stride;\n\n  // Create a global histogram matrix based on a given DMatrix device wrapper\n  void Init(::sycl::queue* qu, Context const* ctx, DMatrix* dmat, int max_num_bins);\n\n  template <typename BinIdxType, bool isDense>\n  void SetIndexData(::sycl::queue* qu, Context const* ctx, BinIdxType* index_data, DMatrix* dmat);\n\n  void ResizeIndex(::sycl::queue* qu, size_t n_index);\n\n  inline void GetFeatureCounts(size_t* counts) const {\n    auto nfeature = cut.cut_ptrs_.Size() - 1;\n    for (unsigned fid = 0; fid < nfeature; ++fid) {\n      auto ibegin = cut.cut_ptrs_.ConstHostVector()[fid];\n      auto iend = cut.cut_ptrs_.ConstHostVector()[fid + 1];\n      for (auto i = ibegin; i < iend; ++i) {\n        *(counts + fid) += hit_count.ConstHostVector()[i];\n      }\n    }\n  }\n  inline bool IsDense() const { return isDense_; }\n\n private:\n  bool isDense_;\n};\n\n}  // namespace common\n}  // namespace sycl\n}  // namespace xgboost\n#endif  // PLUGIN_SYCL_DATA_GRADIENT_INDEX_H_\n"
  },
  {
    "path": "plugin/sycl/data.h",
    "content": "/*!\n * Copyright by Contributors 2017-2023\n */\n#ifndef PLUGIN_SYCL_DATA_H_\n#define PLUGIN_SYCL_DATA_H_\n\n#include <algorithm>\n#include <cstddef>\n#include <limits>\n#include <memory>\n#include <mutex>\n#include <vector>\n\n#include \"xgboost/base.h\"\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wtautological-constant-compare\"\n#pragma GCC diagnostic ignored \"-W#pragma-messages\"\n#include \"xgboost/data.h\"\n#pragma GCC diagnostic pop\n#include <sycl/sycl.hpp>\n\n#include \"../../src/common/threading_utils.h\"\n#include \"xgboost/host_device_vector.h\"\n#include \"xgboost/logging.h\"\n\nnamespace xgboost {\nnamespace sycl {\ntemplate <typename T>\nusing AtomicRef = ::sycl::atomic_ref<T, ::sycl::memory_order::relaxed, ::sycl::memory_scope::device,\n                                     ::sycl::access::address_space::global_space>;\n\nenum class MemoryType { shared, on_device };\n\ntemplate <typename T>\nclass USMDeleter {\n public:\n  explicit USMDeleter(::sycl::queue* qu) : qu_(qu) {}\n\n  void operator()(T* data) const { ::sycl::free(data, *qu_); }\n\n private:\n  ::sycl::queue* qu_;\n};\n\ntemplate <typename T, MemoryType memory_type = MemoryType::shared>\nclass USMVector {\n  static_assert(std::is_standard_layout<T>::value, \"USMVector admits only POD types\");\n\n  std::shared_ptr<T> allocate_memory_(::sycl::queue* qu, size_t size) {\n    if constexpr (memory_type == MemoryType::shared) {\n      return std::shared_ptr<T>(::sycl::malloc_shared<T>(size_, *qu), USMDeleter<T>(qu));\n    } else {\n      return std::shared_ptr<T>(::sycl::malloc_device<T>(size_, *qu), USMDeleter<T>(qu));\n    }\n  }\n\n  void copy_vector_to_memory_(::sycl::queue* qu, const std::vector<T>& vec) {\n    if constexpr (memory_type == MemoryType::shared) {\n      std::copy(vec.begin(), vec.end(), data_.get());\n    } else {\n      qu->memcpy(data_.get(), vec.data(), size_ * sizeof(T));\n    }\n  }\n\n public:\n  USMVector() : size_(0), capacity_(0), data_(nullptr) {}\n\n  USMVector(::sycl::queue* qu, size_t size) : size_(size), capacity_(size) {\n    data_ = allocate_memory_(qu, size_);\n  }\n\n  USMVector(::sycl::queue* qu, size_t size, T v) : size_(size), capacity_(size) {\n    data_ = allocate_memory_(qu, size_);\n    qu->fill(data_.get(), v, size_).wait();\n  }\n\n  USMVector(::sycl::queue* qu, size_t size, T v, ::sycl::event* event)\n      : size_(size), capacity_(size) {\n    data_ = allocate_memory_(qu, size_);\n    *event = qu->fill(data_.get(), v, size_, *event);\n  }\n\n  USMVector(::sycl::queue* qu, const std::vector<T>& vec) {\n    size_ = vec.size();\n    capacity_ = size_;\n    data_ = allocate_memory_(qu, size_);\n    copy_vector_to_memory_(qu, vec);\n  }\n\n  ~USMVector() {}\n\n  USMVector<T>& operator=(const USMVector<T>& other) {\n    size_ = other.size_;\n    capacity_ = other.capacity_;\n    data_ = other.data_;\n    return *this;\n  }\n\n  T* Data() { return data_.get(); }\n  const T* DataConst() const { return data_.get(); }\n\n  size_t Size() const { return size_; }\n\n  size_t Capacity() const { return capacity_; }\n\n  T& operator[](size_t i) { return data_.get()[i]; }\n  const T& operator[](size_t i) const { return data_.get()[i]; }\n\n  T* Begin() const { return data_.get(); }\n  T* End() const { return data_.get() + size_; }\n\n  bool Empty() const { return (size_ == 0); }\n\n  void Clear() {\n    data_.reset();\n    size_ = 0;\n    capacity_ = 0;\n  }\n\n  void Resize(::sycl::queue* qu, size_t size_new) {\n    if (size_new <= capacity_) {\n      size_ = size_new;\n    } else {\n      size_t size_old = size_;\n      auto data_old = data_;\n      size_ = size_new;\n      capacity_ = size_new;\n      data_ = allocate_memory_(qu, size_);\n      if (size_old > 0) {\n        qu->memcpy(data_.get(), data_old.get(), sizeof(T) * size_old).wait();\n      }\n    }\n  }\n\n  /* Resize without keeping the data*/\n  void ResizeNoCopy(::sycl::queue* qu, size_t size_new) {\n    if (size_new <= capacity_) {\n      size_ = size_new;\n    } else {\n      size_ = size_new;\n      capacity_ = size_new;\n      data_ = allocate_memory_(qu, size_);\n    }\n  }\n\n  void Resize(::sycl::queue* qu, size_t size_new, T v) {\n    if (size_new <= size_) {\n      size_ = size_new;\n    } else if (size_new <= capacity_) {\n      qu->fill(data_.get() + size_, v, size_new - size_).wait();\n      size_ = size_new;\n    } else {\n      size_t size_old = size_;\n      auto data_old = data_;\n      size_ = size_new;\n      capacity_ = size_new;\n      data_ = allocate_memory_(qu, size_);\n      if (size_old > 0) {\n        qu->memcpy(data_.get(), data_old.get(), sizeof(T) * size_old).wait();\n      }\n      qu->fill(data_.get() + size_old, v, size_new - size_old).wait();\n    }\n  }\n\n  void Resize(::sycl::queue* qu, size_t size_new, T v, ::sycl::event* event) {\n    if (size_new <= size_) {\n      size_ = size_new;\n    } else if (size_new <= capacity_) {\n      auto event = qu->fill(data_.get() + size_, v, size_new - size_);\n      size_ = size_new;\n    } else {\n      size_t size_old = size_;\n      auto data_old = data_;\n      size_ = size_new;\n      capacity_ = size_new;\n      data_ = allocate_memory_(qu, size_);\n      if (size_old > 0) {\n        *event = qu->memcpy(data_.get(), data_old.get(), sizeof(T) * size_old, *event);\n      }\n      *event = qu->fill(data_.get() + size_old, v, size_new - size_old, *event);\n    }\n  }\n\n  void ResizeAndFill(::sycl::queue* qu, size_t size_new, int v, ::sycl::event* event) {\n    if (size_new <= size_) {\n      size_ = size_new;\n      *event = qu->memset(data_.get(), v, size_new * sizeof(T), *event);\n    } else if (size_new <= capacity_) {\n      size_ = size_new;\n      *event = qu->memset(data_.get(), v, size_new * sizeof(T), *event);\n    } else {\n      size_t size_old = size_;\n      auto data_old = data_;\n      size_ = size_new;\n      capacity_ = size_new;\n      data_ = allocate_memory_(qu, size_);\n      *event = qu->memset(data_.get(), v, size_new * sizeof(T), *event);\n    }\n  }\n\n  ::sycl::event Fill(::sycl::queue* qu, T v) { return qu->fill(data_.get(), v, size_); }\n\n  void Init(::sycl::queue* qu, const std::vector<T>& vec) {\n    size_ = vec.size();\n    capacity_ = size_;\n    data_ = allocate_memory_(qu, size_);\n    copy_vector_to_memory_(qu, vec);\n  }\n\n  using value_type = T;  // NOLINT\n\n private:\n  size_t size_;\n  size_t capacity_;\n  std::shared_ptr<T> data_;\n};\n\n}  // namespace sycl\n}  // namespace xgboost\n\n#endif  // PLUGIN_SYCL_DATA_H_\n"
  },
  {
    "path": "plugin/sycl/device_manager.cc",
    "content": "/*!\n * Copyright 2017-2023 by Contributors\n * \\file device_manager.cc\n */\n#include \"../sycl/device_manager.h\"\n\n#include \"../../src/collective/communicator-inl.h\"\n\nnamespace xgboost {\nnamespace sycl {\n\n::sycl::queue* DeviceManager::GetQueue(const DeviceOrd& device_spec) const {\n    if (!device_spec.IsSycl()) {\n        LOG(WARNING) << \"Sycl kernel is executed with non-sycl context: \"\n                     << device_spec.Name() << \". \"\n                     << \"Default sycl device_selector will be used.\";\n    }\n\n    size_t queue_idx;\n    bool not_use_default_selector = (device_spec.ordinal != kDefaultOrdinal) ||\n                                    (collective::IsDistributed());\n    DeviceRegister& device_register = GetDevicesRegister();\n    if (not_use_default_selector) {\n        const int device_idx =\n            collective::IsDistributed() ? collective::GetRank() : device_spec.ordinal;\n        if (device_spec.IsSyclDefault()) {\n            auto& devices = device_register.devices;\n            CHECK_LT(device_idx, devices.size());\n            queue_idx = device_idx;\n        } else if (device_spec.IsSyclCPU()) {\n            auto& cpu_devices_idxes = device_register.cpu_devices_idxes;\n            CHECK_LT(device_idx, cpu_devices_idxes.size());\n            queue_idx = cpu_devices_idxes[device_idx];\n        } else if (device_spec.IsSyclGPU()) {\n            auto& gpu_devices_idxes = device_register.gpu_devices_idxes;\n            CHECK_LT(device_idx, gpu_devices_idxes.size());\n            queue_idx = gpu_devices_idxes[device_idx];\n        } else {\n            LOG(WARNING) << device_spec << \" is not sycl, sycl:cpu or sycl:gpu\";\n            auto device = ::sycl::queue(::sycl::default_selector_v).get_device();\n            queue_idx = device_register.devices.at(device);\n        }\n    } else {\n        if (device_spec.IsSyclCPU()) {\n            auto device = ::sycl::queue(::sycl::cpu_selector_v).get_device();\n            queue_idx = device_register.devices.at(device);\n        } else if (device_spec.IsSyclGPU()) {\n            auto device = ::sycl::queue(::sycl::gpu_selector_v).get_device();\n            queue_idx = device_register.devices.at(device);\n        } else {\n            auto device = ::sycl::queue(::sycl::default_selector_v).get_device();\n            queue_idx = device_register.devices.at(device);\n        }\n    }\n    return &(device_register.queues[queue_idx]);\n}\n\nDeviceManager::DeviceRegister& DeviceManager::GetDevicesRegister() const {\n    static DeviceRegister device_register;\n\n    if (device_register.devices.size() == 0) {\n        std::lock_guard<std::mutex> guard(device_registering_mutex);\n        std::vector<::sycl::device> devices = ::sycl::device::get_devices();\n        for (size_t i = 0; i < devices.size(); i++) {\n            LOG(INFO) << \"device_index = \" << i << \", name = \"\n                      << devices[i].get_info<::sycl::info::device::name>();\n        }\n\n        for (size_t i = 0; i < devices.size(); i++) {\n            device_register.devices[devices[i]] = i;\n            device_register.queues.push_back(::sycl::queue(devices[i]));\n            if (devices[i].is_cpu()) {\n                device_register.cpu_devices_idxes.push_back(i);\n            } else if (devices[i].is_gpu()) {\n                device_register.gpu_devices_idxes.push_back(i);\n            }\n        }\n    }\n    return device_register;\n}\n\n}  // namespace sycl\n}  // namespace xgboost\n"
  },
  {
    "path": "plugin/sycl/device_manager.h",
    "content": "/*!\n * Copyright 2017-2023 by Contributors\n * \\file device_manager.h\n */\n#ifndef PLUGIN_SYCL_DEVICE_MANAGER_H_\n#define PLUGIN_SYCL_DEVICE_MANAGER_H_\n\n#include <vector>\n#include <mutex>\n#include <string>\n#include <unordered_map>\n\n#include <sycl/sycl.hpp>\n\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wtautological-constant-compare\"\n#pragma GCC diagnostic ignored \"-W#pragma-messages\"\n#include \"xgboost/context.h\"\n#pragma GCC diagnostic pop\n\nnamespace xgboost {\nnamespace sycl {\n\nclass DeviceManager {\n public:\n  ::sycl::queue* GetQueue(const DeviceOrd& device_spec) const;\n\n private:\n  constexpr static int kDefaultOrdinal = -1;\n\n  struct DeviceRegister {\n    std::vector<::sycl::queue> queues;\n    std::unordered_map<::sycl::device, size_t> devices;\n    std::vector<size_t> cpu_devices_idxes;\n    std::vector<size_t> gpu_devices_idxes;\n  };\n\n  DeviceRegister& GetDevicesRegister() const;\n\n  mutable std::mutex device_registering_mutex;\n};\n\n}  // namespace sycl\n}  // namespace xgboost\n\n#endif  // PLUGIN_SYCL_DEVICE_MANAGER_H_\n"
  },
  {
    "path": "plugin/sycl/device_properties.h",
    "content": "/*!\n * Copyright 2017-2025 by Contributors\n * \\file device_properties.h\n */\n#ifndef PLUGIN_SYCL_DEVICE_PROPERTIES_H_\n#define PLUGIN_SYCL_DEVICE_PROPERTIES_H_\n\n#include <sycl/sycl.hpp>\n#include <sycl/ext/oneapi/experimental/device_architecture.hpp>\n#include \"../../src/common/common.h\"               // for HumanMemUnit\n\nnamespace xgboost {\nnamespace sycl {\n\nclass DeviceProperties {\n  void GetL2Size(const ::sycl::device& device) {\n    l2_size = device.get_info<::sycl::info::device::global_mem_cache_size>();\n    LOG(INFO) << \"Detected L2 Size = \" << ::xgboost::common::HumanMemUnit(l2_size);\n    l2_size_per_eu = static_cast<float>(l2_size) / max_compute_units;\n  }\n\n  void GetSRAMSize(const ::sycl::device& device) {\n    auto arch =\n      device.get_info<::sycl::ext::oneapi::experimental::info::device::architecture>();\n    size_t eu_per_core =\n      device.get_info<::sycl::ext::intel::info::device::gpu_eu_count_per_subslice>();\n    switch (arch) {\n      case ::sycl::ext::oneapi::experimental::architecture::intel_gpu_pvc: {\n        LOG(INFO) << \"Xe-HPC (Ponte Vecchio) Architecture. L1 friendly optimization enabled.\";\n        size_t l1_size = 512 * 1024;\n        size_t registers_size = 64 * 1024;\n        sram_size_per_eu = l1_size  / eu_per_core + registers_size;\n        break;\n      }\n      default:\n        sram_size_per_eu = 0;\n    }\n  }\n\n public:\n  bool is_gpu;\n  bool usm_host_allocations;\n  size_t max_compute_units;\n  size_t max_work_group_size;\n  size_t sub_group_size;\n  float sram_size_per_eu = 0;\n  size_t l2_size = 0;\n  float l2_size_per_eu = 0;\n\n  DeviceProperties():\n    is_gpu(false) {}\n\n  explicit DeviceProperties(const ::sycl::device& device):\n    is_gpu(device.is_gpu()),\n    usm_host_allocations(device.has(::sycl::aspect::usm_host_allocations)),\n    max_compute_units(device.get_info<::sycl::info::device::max_compute_units>()),\n    max_work_group_size(device.get_info<::sycl::info::device::max_work_group_size>()),\n    sub_group_size(device.get_info<::sycl::info::device::sub_group_sizes>().back()) {\n      GetL2Size(device);\n      if (is_gpu) {\n        GetSRAMSize(device);\n      }\n    }\n};\n\n}  // namespace sycl\n}  // namespace xgboost\n\n#endif  // PLUGIN_SYCL_DEVICE_PROPERTIES_H_\n"
  },
  {
    "path": "plugin/sycl/predictor/node.h",
    "content": "/*!\n * Copyright by Contributors 2017-2025\n * \\file node.h\n */\n#ifndef PLUGIN_SYCL_PREDICTOR_NODE_H_\n#define PLUGIN_SYCL_PREDICTOR_NODE_H_\n\n#include \"../../src/gbm/gbtree_model.h\"\n\nnamespace xgboost {\nnamespace sycl {\nnamespace predictor {\n\nunion NodeValue {\n  float leaf_weight;\n  float fvalue;\n};\n\nclass Node {\n  int fidx;\n  int left_child_idx;\n  int right_child_idx;\n  NodeValue val;\n\n public:\n  Node() = default;\n\n  explicit Node(const RegTree::Node& n) {\n    left_child_idx = n.LeftChild();\n    right_child_idx = n.RightChild();\n    fidx = n.SplitIndex();\n    if (n.DefaultLeft()) {\n      fidx |= (1U << 31);\n    }\n\n    if (n.IsLeaf()) {\n      val.leaf_weight = n.LeafValue();\n    } else {\n      val.fvalue = n.SplitCond();\n    }\n  }\n\n  int LeftChildIdx() const {return left_child_idx; }\n\n  int RightChildIdx() const {return right_child_idx; }\n\n  bool IsLeaf() const { return left_child_idx == -1; }\n\n  int GetFidx() const { return fidx & ((1U << 31) - 1U); }\n\n  bool MissingLeft() const { return (fidx >> 31) != 0; }\n\n  int MissingIdx() const {\n    if (MissingLeft()) {\n      return left_child_idx;\n    } else {\n      return right_child_idx;\n    }\n  }\n\n  float GetFvalue() const { return val.fvalue; }\n\n  float GetWeight() const { return val.leaf_weight; }\n};\n\n}  // namespace predictor\n}  // namespace sycl\n}  // namespace xgboost\n#endif  // PLUGIN_SYCL_PREDICTOR_NODE_H_\n"
  },
  {
    "path": "plugin/sycl/predictor/predictor.cc",
    "content": "/*!\n * Copyright by Contributors 2017-2025\n */\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wtautological-constant-compare\"\n#pragma GCC diagnostic ignored \"-W#pragma-messages\"\n#pragma GCC diagnostic pop\n\n#include \"xgboost/predictor.h\"\n\n#include <cstddef>\n#include <limits>\n#include <mutex>\n#include <sycl/sycl.hpp>\n\n#include \"../../../src/common/timer.h\"\n#include \"../data.h\"\n#include \"dmlc/registry.h\"\n#include \"xgboost/tree_model.h\"\n#include \"xgboost/tree_updater.h\"\n\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wtautological-constant-compare\"\n#include \"../../src/data/adapter.h\"\n#pragma GCC diagnostic pop\n#include \"../../src/common/math.h\"\n#include \"../../src/gbm/gbtree_model.h\"\n#include \"../device_manager.h\"\n#include \"../device_properties.h\"\n#include \"node.h\"\n\nnamespace xgboost::sycl_impl {\nvoid InitOutPredictions(Context const* ctx, linalg::VectorView<float const> base_score,\n                        linalg::MatrixView<float> predt) {\n  sycl::DeviceManager device_manager;\n  auto* qu = device_manager.GetQueue(predt.Device());\n  qu->submit([&](::sycl::handler& cgh) {\n      cgh.parallel_for<>(::sycl::range<1>(predt.Size()), [=](::sycl::id<1> pid) {\n        size_t k = pid[0];\n        auto [i, j] = xgboost::linalg::UnravelIndex(k, predt.Shape());\n        const_cast<float&>(predt(i, j)) = base_score(j);\n      });\n    }).wait_and_throw();\n}\n}  // namespace xgboost::sycl_impl\n\nnamespace xgboost {\nnamespace sycl {\nnamespace predictor {\n\nDMLC_REGISTRY_FILE_TAG(predictor_sycl);\n\nclass DeviceModel {\n public:\n  HostDeviceVector<Node> nodes;\n  HostDeviceVector<size_t> first_node_position;\n  HostDeviceVector<int> tree_group;\n\n  void SetDevice(DeviceOrd device) {\n    nodes.SetDevice(device);\n    first_node_position.SetDevice(device);\n    tree_group.SetDevice(device);\n  }\n\n  void Init(const gbm::GBTreeModel& model, size_t tree_begin, size_t tree_end) {\n    int n_nodes = 0;\n    first_node_position.Resize((tree_end - tree_begin) + 1);\n    auto& first_node_position_host = first_node_position.HostVector();\n    first_node_position_host[0] = n_nodes;\n    for (int tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) {\n      if (model.trees[tree_idx]->HasCategoricalSplit()) {\n        LOG(FATAL) << \"Categorical features are not yet supported by sycl\";\n      }\n      n_nodes += model.trees[tree_idx]->Size();\n      first_node_position_host[tree_idx - tree_begin + 1] = n_nodes;\n    }\n\n    nodes.Resize(n_nodes);\n    for (int tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) {\n      auto const& src_nodes = model.trees[tree_idx]->GetNodes(DeviceOrd::CPU());\n      size_t n_nodes_shift = first_node_position_host[tree_idx - tree_begin];\n      for (size_t node_idx = 0; node_idx < src_nodes.size(); node_idx++) {\n        nodes.HostVector()[node_idx + n_nodes_shift] = static_cast<Node>(src_nodes[node_idx]);\n      }\n    }\n\n    int num_group = model.learner_model_param->num_output_group;\n    if (num_group > 1) {\n      tree_group.Resize(model.tree_info.Size());\n      auto& tree_group_host = tree_group.HostVector();\n      auto const& tree_group_in = model.tree_info.ConstHostVector();\n      for (size_t tree_idx = 0; tree_idx < tree_group_in.size(); tree_idx++)\n        tree_group_host[tree_idx] = tree_group_in[tree_idx];\n    }\n  }\n};\n\n// Binary search\nfloat BinarySearch(const Entry* begin_ptr, const Entry* end_ptr, size_t col_idx,\n                   size_t num_features) {\n  const size_t n_elems = end_ptr - begin_ptr;\n  if (n_elems == num_features) {\n    return (begin_ptr + col_idx)->fvalue;\n  }\n\n  // Since indexes are in range [0: num_features),\n  // we can squeeze the search window from [0: n_elems) to [offset_left: offset_right)\n  const size_t shift = (num_features - 1) - col_idx;\n  const size_t offset_left = shift > n_elems - 1 ? 0 : std::max<size_t>(0, (n_elems - 1) - shift);\n  const size_t offset_right = std::min<size_t>(col_idx + 1, n_elems);\n\n  end_ptr = begin_ptr + offset_right;\n  begin_ptr += offset_left;\n  const Entry* previous_middle = nullptr;\n  while (end_ptr != begin_ptr) {\n    const Entry* middle = begin_ptr + (end_ptr - begin_ptr) / 2;\n    if (middle == previous_middle) {\n      break;\n    } else {\n      previous_middle = middle;\n    }\n    if (middle->index == col_idx) {\n      return middle->fvalue;\n    } else if (middle->index < col_idx) {\n      begin_ptr = middle + 1;\n    } else {\n      end_ptr = middle;\n    }\n  }\n  return std::numeric_limits<float>::quiet_NaN();\n}\n\nsize_t NextNodeIdx(float fvalue, const Node& node) {\n  if (std::isnan(fvalue)) {\n    return node.MissingIdx();\n  } else {\n    if (fvalue < node.GetFvalue()) {\n      return node.LeftChildIdx();\n    } else {\n      return node.RightChildIdx();\n    }\n  }\n}\n\nfloat GetLeafWeight(const Node* nodes, const Entry* first_entry, const Entry* last_entry,\n                    size_t num_features) {\n  size_t is_dense = (last_entry - first_entry == num_features);\n\n  const Node* node = nodes;\n  while (!node->IsLeaf()) {\n    const float fvalue = is_dense\n                             ? (first_entry + node->GetFidx())->fvalue\n                             : BinarySearch(first_entry, last_entry, node->GetFidx(), num_features);\n    node = nodes + NextNodeIdx(fvalue, *node);\n  }\n  return node->GetWeight();\n}\n\nfloat GetLeafWeight(const Node* nodes, const float* fval_buff) {\n  const Node* node = nodes;\n  while (!node->IsLeaf()) {\n    const float fvalue = fval_buff[node->GetFidx()];\n    node = nodes + NextNodeIdx(fvalue, *node);\n  }\n  return node->GetWeight();\n}\n\nclass Predictor : public xgboost::Predictor {\n public:\n  explicit Predictor(Context const* context)\n      : xgboost::Predictor::Predictor{context},\n        cpu_predictor(xgboost::Predictor::Create(\"cpu_predictor\", context)) {}\n\n  void PredictBatch(DMatrix* dmat, PredictionCacheEntry* predts, const gbm::GBTreeModel& model,\n                    bst_tree_t tree_begin, bst_tree_t tree_end = 0,\n                    std::vector<float> const* tree_weights = nullptr) const override {\n    if (tree_weights != nullptr) {\n      LOG(WARNING) << \"Weighted batch prediction is not yet implemented for SYCL. CPU Predictor \"\n                      \"is used.\";\n      return cpu_predictor->PredictBatch(dmat, predts, model, tree_begin, tree_end, tree_weights);\n    }\n\n    auto* out_preds = &predts->predictions;\n    device_model.SetDevice(ctx_->Device());\n    qu_ = device_manager.GetQueue(ctx_->Device());\n    if (device_ != ctx_->Device()) {\n      device_ = ctx_->Device();\n      device_prop_ = DeviceProperties(qu_->get_device());\n    }\n\n    out_preds->SetDevice(ctx_->Device());\n    if (tree_end == 0) {\n      tree_end = model.trees.size();\n    }\n\n    if (tree_begin < tree_end) {\n      const bool any_missing = !(dmat->IsDense());\n      if (any_missing) {\n        DevicePredictInternal<true>(dmat, out_preds, model, tree_begin, tree_end);\n      } else {\n        DevicePredictInternal<false>(dmat, out_preds, model, tree_begin, tree_end);\n      }\n    }\n  }\n\n  bool InplacePredict(std::shared_ptr<DMatrix> p_m, const gbm::GBTreeModel& model, float missing,\n                      PredictionCacheEntry* out_preds, bst_tree_t tree_begin, bst_tree_t tree_end,\n                      std::vector<float> const* tree_weights = nullptr) const override {\n    LOG(WARNING) << \"InplacePredict is not yet implemented for SYCL. CPU Predictor is used.\";\n    return cpu_predictor->InplacePredict(p_m, model, missing, out_preds, tree_begin, tree_end,\n                                         tree_weights);\n  }\n\n  void PredictLeaf(DMatrix* p_fmat, HostDeviceVector<bst_float>* out_preds,\n                   const gbm::GBTreeModel& model, bst_tree_t ntree_limit) const override {\n    LOG(WARNING) << \"PredictLeaf is not yet implemented for SYCL. CPU Predictor is used.\";\n    cpu_predictor->PredictLeaf(p_fmat, out_preds, model, ntree_limit);\n  }\n\n  void PredictContribution(DMatrix* p_fmat, HostDeviceVector<float>* out_contribs,\n                           const gbm::GBTreeModel& model, bst_tree_t ntree_limit,\n                           const std::vector<bst_float>* tree_weights, bool approximate,\n                           int condition, unsigned condition_feature) const override {\n    LOG(WARNING) << \"PredictContribution is not yet implemented for SYCL. CPU Predictor is used.\";\n    cpu_predictor->PredictContribution(p_fmat, out_contribs, model, ntree_limit, tree_weights,\n                                       approximate, condition, condition_feature);\n  }\n\n  void PredictInteractionContributions(DMatrix* p_fmat, HostDeviceVector<bst_float>* out_contribs,\n                                       const gbm::GBTreeModel& model, bst_tree_t ntree_limit,\n                                       const std::vector<bst_float>* tree_weights,\n                                       bool approximate) const override {\n    LOG(WARNING) << \"PredictInteractionContributions is not yet implemented for SYCL. \"\n                 << \"CPU Predictor is used.\";\n    cpu_predictor->PredictInteractionContributions(p_fmat, out_contribs, model, ntree_limit,\n                                                   tree_weights, approximate);\n  }\n\n private:\n  // 8KB fits EU registers\n  static constexpr int kMaxFeatureBufferSize = 2048;\n\n  // Relative cost of reading and writing for discrete and integrated devices.\n  static constexpr float kCostCalibrationIntegrated = 64;\n  static constexpr float kCostCalibrationDescrete = 4;\n\n  template <bool any_missing, int kFeatureBufferSize = 8>\n  void PredictKernelBufferDispatch(::sycl::event* event, const Entry* data, float* out_predictions,\n                                   const size_t* row_ptr, size_t num_rows, size_t num_features,\n                                   size_t num_group, size_t tree_begin, size_t tree_end,\n                                   float sparsity) const {\n    if constexpr (kFeatureBufferSize > kMaxFeatureBufferSize) {\n      LOG(FATAL) << \"Unreachable\";\n    } else {\n      if (num_features > kFeatureBufferSize) {\n        PredictKernelBufferDispatch<any_missing, 2 * kFeatureBufferSize>(\n            event, data, out_predictions, row_ptr, num_rows, num_features, num_group, tree_begin,\n            tree_end, sparsity);\n      } else {\n        PredictKernelBuffer<any_missing, kFeatureBufferSize>(event, data, out_predictions, row_ptr,\n                                                             num_rows, num_features, num_group,\n                                                             tree_begin, tree_end, sparsity);\n      }\n    }\n  }\n\n  size_t GetBlockSize(size_t n_nodes, size_t num_features, size_t num_rows, float sparsity) const {\n    size_t max_compute_units = device_prop_.max_compute_units;\n    size_t l2_size = device_prop_.l2_size;\n    size_t sub_group_size = device_prop_.sub_group_size;\n    size_t nodes_bytes = n_nodes * sizeof(Node);\n    bool nodes_fit_l2 = l2_size > 2 * nodes_bytes;\n    size_t block_size =\n        nodes_fit_l2\n            // nodes and data fit L2\n            ? 0.8 * (l2_size - nodes_bytes) / (sparsity * num_features * sizeof(Entry))\n            // only data fit L2\n            : 0.8 * (l2_size) / (sparsity * num_features * sizeof(Entry));\n    block_size = (block_size / sub_group_size) * sub_group_size;\n    if (block_size < max_compute_units * sub_group_size) {\n      block_size = max_compute_units * sub_group_size;\n    }\n\n    if (block_size > num_rows) block_size = num_rows;\n    return block_size;\n  }\n\n  template <bool any_missing, int kFeatureBufferSize>\n  void PredictKernelBuffer(::sycl::event* event, const Entry* data, float* out_predictions,\n                           const size_t* row_ptr, size_t num_rows, size_t num_features,\n                           size_t num_group, size_t tree_begin, size_t tree_end,\n                           float sparsity) const {\n    const Node* nodes = device_model.nodes.ConstDevicePointer();\n    const size_t* first_node_position = device_model.first_node_position.ConstDevicePointer();\n    const int* tree_group = device_model.tree_group.ConstDevicePointer();\n\n    size_t block_size = GetBlockSize(device_model.nodes.Size(), num_features, num_rows, sparsity);\n    size_t n_blocks = num_rows / block_size + (num_rows % block_size > 0);\n\n    for (size_t block = 0; block < n_blocks; ++block) {\n      *event = qu_->submit([&](::sycl::handler& cgh) {\n        cgh.depends_on(*event);\n        cgh.parallel_for<>(::sycl::range<1>(block_size), [=](::sycl::id<1> pid) {\n          int row_idx = block * block_size + pid[0];\n          if (row_idx < num_rows) {\n            const Entry* first_entry = data + row_ptr[row_idx];\n            const Entry* last_entry = data + row_ptr[row_idx + 1];\n\n            float fvalues[kFeatureBufferSize];\n            if constexpr (any_missing) {\n              for (size_t fid = 0; fid < num_features; ++fid) {\n                fvalues[fid] = std::numeric_limits<float>::quiet_NaN();\n              }\n            }\n\n            for (const Entry* entry = first_entry; entry < last_entry; entry += 1) {\n              fvalues[entry->index] = entry->fvalue;\n            }\n            if (num_group == 1) {\n              float& sum = out_predictions[row_idx];\n              for (int tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) {\n                const Node* first_node = nodes + first_node_position[tree_idx - tree_begin];\n                sum += GetLeafWeight(first_node, fvalues);\n              }\n            } else {\n              for (int tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) {\n                const Node* first_node = nodes + first_node_position[tree_idx - tree_begin];\n                int out_prediction_idx = row_idx * num_group + tree_group[tree_idx];\n                out_predictions[out_prediction_idx] += GetLeafWeight(first_node, fvalues);\n              }\n            }\n          }\n        });\n      });\n    }\n  }\n\n  void PredictKernel(::sycl::event* event, const Entry* data, float* out_predictions,\n                     const size_t* row_ptr, size_t num_rows, size_t num_features, size_t num_group,\n                     size_t tree_begin, size_t tree_end, float sparsity) const {\n    const Node* nodes = device_model.nodes.ConstDevicePointer();\n    const size_t* first_node_position = device_model.first_node_position.ConstDevicePointer();\n    const int* tree_group = device_model.tree_group.ConstDevicePointer();\n\n    size_t block_size = GetBlockSize(device_model.nodes.Size(), num_features, num_rows, sparsity);\n    size_t n_blocks = num_rows / block_size + (num_rows % block_size > 0);\n\n    for (size_t block = 0; block < n_blocks; ++block) {\n      *event = qu_->submit([&](::sycl::handler& cgh) {\n        cgh.depends_on(*event);\n        cgh.parallel_for<>(::sycl::range<1>(block_size), [=](::sycl::id<1> pid) {\n          int row_idx = block * block_size + pid[0];\n          if (row_idx < num_rows) {\n            const Entry* first_entry = data + row_ptr[row_idx];\n            const Entry* last_entry = data + row_ptr[row_idx + 1];\n\n            if (num_group == 1) {\n              float& sum = out_predictions[row_idx];\n              for (int tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) {\n                const Node* first_node = nodes + first_node_position[tree_idx - tree_begin];\n                sum += GetLeafWeight(first_node, first_entry, last_entry, num_features);\n              }\n            } else {\n              for (int tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) {\n                const Node* first_node = nodes + first_node_position[tree_idx - tree_begin];\n                int out_prediction_idx = row_idx * num_group + tree_group[tree_idx];\n                out_predictions[out_prediction_idx] +=\n                    GetLeafWeight(first_node, first_entry, last_entry, num_features);\n              }\n            }\n          }\n        });\n      });\n    }\n  }\n\n  template <bool any_missing>\n  bool UseFvalueBuffer(size_t tree_begin, size_t tree_end, int num_features) const {\n    size_t n_nodes = device_model.nodes.Size();\n    size_t n_trees = tree_end - tree_begin;\n    float av_depth = std::log2(static_cast<float>(n_nodes) / n_trees);\n    // the last one is leaf\n    float av_nodes_per_traversal = av_depth - 1;\n    // number of reads in case of no-bufer\n    float n_reads = av_nodes_per_traversal * n_trees;\n    if (any_missing) {\n      // we use binary search for sparse\n      n_reads *= std::log2(static_cast<float>(num_features));\n    }\n\n    float cost_callibration =\n        device_prop_.usm_host_allocations ? kCostCalibrationIntegrated : kCostCalibrationDescrete;\n\n    // number of writes in local memory.\n    float n_writes = num_features;\n    bool use_fvalue_buffer =\n        (num_features <= kMaxFeatureBufferSize) && (n_reads > cost_callibration * n_writes);\n    return use_fvalue_buffer;\n  }\n\n  template <bool any_missing>\n  void DevicePredictInternal(DMatrix* dmat, HostDeviceVector<float>* out_preds,\n                             const gbm::GBTreeModel& model, size_t tree_begin,\n                             size_t tree_end) const {\n    if (tree_end - tree_begin == 0) return;\n    if (out_preds->Size() == 0) return;\n\n    device_model.Init(model, tree_begin, tree_end);\n\n    int num_group = model.learner_model_param->num_output_group;\n    int num_features = dmat->Info().num_col_;\n\n    float* out_predictions = out_preds->DevicePointer();\n    ::sycl::event event;\n    for (auto& batch : dmat->GetBatches<SparsePage>()) {\n      batch.data.SetDevice(ctx_->Device());\n      batch.offset.SetDevice(ctx_->Device());\n      const Entry* data = batch.data.ConstDevicePointer();\n      const size_t* row_ptr = batch.offset.ConstDevicePointer();\n      size_t batch_size = batch.Size();\n      if (batch_size > 0) {\n        const auto base_rowid = batch.base_rowid;\n\n        float sparsity = static_cast<float>(batch.data.Size()) / (batch_size * num_features);\n\n        if (UseFvalueBuffer<any_missing>(tree_begin, tree_end, num_features)) {\n          PredictKernelBufferDispatch<any_missing>(\n              &event, data, out_predictions + base_rowid * num_group, row_ptr, batch_size,\n              num_features, num_group, tree_begin, tree_end, sparsity);\n        } else {\n          PredictKernel(&event, data, out_predictions + base_rowid * num_group, row_ptr, batch_size,\n                        num_features, num_group, tree_begin, tree_end, sparsity);\n        }\n      }\n    }\n    qu_->wait();\n  }\n\n  mutable xgboost::DeviceOrd device_;\n  mutable DeviceModel device_model;\n  DeviceManager device_manager;\n\n  mutable ::sycl::queue* qu_ = nullptr;\n  mutable DeviceProperties device_prop_;\n\n  std::unique_ptr<xgboost::Predictor> cpu_predictor;\n};\n\nXGBOOST_REGISTER_PREDICTOR(Predictor, \"sycl_predictor\")\n    .describe(\"Make predictions using SYCL.\")\n    .set_body([](Context const* ctx) { return new Predictor(ctx); });\n\n}  // namespace predictor\n}  // namespace sycl\n}  // namespace xgboost\n"
  },
  {
    "path": "plugin/sycl/tree/expand_entry.h",
    "content": "/**\n * Copyright 2017-2025, XGBoost Contributors\n */\n#ifndef PLUGIN_SYCL_TREE_EXPAND_ENTRY_H_\n#define PLUGIN_SYCL_TREE_EXPAND_ENTRY_H_\n\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wtautological-constant-compare\"\n#include \"../../src/tree/constraints.h\"\n#pragma GCC diagnostic pop\n#include \"../../src/tree/hist/expand_entry.h\"\n#include \"../../src/tree/tree_view.h\"\n\nnamespace xgboost {\nnamespace sycl {\nnamespace tree {\n/* tree growing policies */\nstruct ExpandEntry : public xgboost::tree::ExpandEntryImpl<ExpandEntry> {\n  static constexpr bst_node_t kRootNid  = 0;\n\n  xgboost::tree::SplitEntry split;\n\n  ExpandEntry(int nid, int depth) : ExpandEntryImpl{nid, depth} {}\n\n  bst_node_t GetSiblingId(::xgboost::tree::ScalarTreeView const& tree) const {\n    CHECK_EQ(tree.IsRoot(nid), false);\n    const size_t parent_id = tree.Parent(nid);\n    return GetSiblingId(tree, parent_id);\n  }\n\n  bst_node_t GetSiblingId(::xgboost::tree::ScalarTreeView const& tree, size_t parent_id) const {\n    return tree.IsLeftChild(nid) ? tree.RightChild(parent_id) : tree.LeftChild(parent_id);\n  }\n\n  bool IsValidImpl(xgboost::tree::TrainParam const &param, int32_t num_leaves) const {\n    if (split.loss_chg <= kRtEps) return false;\n    if (split.loss_chg < param.min_split_loss) return false;\n    if (param.max_depth > 0 && depth == param.max_depth) return false;\n    if (param.max_leaves > 0 && num_leaves == param.max_leaves) return false;\n\n    return true;\n  }\n};\n\n}  // namespace tree\n}  // namespace sycl\n}  // namespace xgboost\n\n#endif  // PLUGIN_SYCL_TREE_EXPAND_ENTRY_H_\n"
  },
  {
    "path": "plugin/sycl/tree/hist_dispatcher.h",
    "content": "/*!\n * Copyright 2017-2025 by Contributors\n * \\file hist_dispatcher.h\n */\n#ifndef PLUGIN_SYCL_TREE_HIST_DISPATCHER_H_\n#define PLUGIN_SYCL_TREE_HIST_DISPATCHER_H_\n\n#include <algorithm>\n#include <sycl/sycl.hpp>\n\n#include \"../device_properties.h\"\n\nnamespace xgboost {\nnamespace sycl {\nnamespace tree {\n\nstruct BlockParams { size_t size, nblocks; };\n\ntemplate <typename FPType>\nclass HistDispatcher {\n public:\n  // Max n_blocks/max_compute_units ration.\n  // Higher -> better GPU utilisation with higer memory overhead.\n  constexpr static int kMaxGPUUtilisation = 4;\n  // Minimal value of block size for buffer-based hist building\n  constexpr static size_t KMinBlockSize = 32;\n  // Maximal value of block size, when increasing can affect performance\n  constexpr static size_t KMaxEffectiveBlockSize = 1u << 11;\n  // Maximal number of bins acceptable for local histograms\n  constexpr static size_t KMaxNumBins = 256;\n  // Amount of sram for local-histogram kernel launch\n  constexpr static float KLocalHistSRAM = 32. * 1024;\n  // Max workgroups size, used by atomic-based hist-building\n  constexpr static size_t kMaxWorkGroupSizeAtomic = 32;\n  // Max workgroups size, used for local histograms\n  constexpr static size_t kMaxWorkGroupSizeLocal = 256;\n  // Atomic efficency normalization\n  constexpr static float kAtomicEfficiencyNormalization = 16 * 1024;\n  // Block kernel launch penalty normalization\n  constexpr static float kBlockPenaltyNormalization = 32 * 1024;\n  // Relative weight of quadratic term in atomic penalty model\n  constexpr static float kAtomicQuadraticWeight = 1.0 / 8.0;\n  // Minimal value of threshold GPU load\n  constexpr static float kMinTh = 1.0 / 16.0;\n\n  bool use_local_hist = false;\n  bool use_atomics = false;\n  size_t work_group_size;\n  BlockParams block;\n\n  inline BlockParams GetBlocksParameters(size_t size, size_t max_nblocks,\n                                         size_t max_compute_units) const {\n    if (max_nblocks == 0) return {0, 0};\n    size_t nblocks = max_compute_units;\n\n    size_t block_size = size / nblocks + !!(size % nblocks);\n    while (block_size > (1u << 11)) {\n      nblocks *= 2;\n      if (nblocks >= max_nblocks) {\n        nblocks = max_nblocks;\n        block_size = size / nblocks + !!(size % nblocks);\n        break;\n      }\n      block_size = size / nblocks + !!(size % nblocks);\n    }\n\n    if (block_size < KMinBlockSize) {\n      block_size = KMinBlockSize;\n      nblocks = size / block_size + !!(size % block_size);\n    }\n\n    return {block_size, nblocks};\n  }\n\n  HistDispatcher(const DeviceProperties& device_prop, bool isDense, size_t size,\n                 size_t max_nblocks, size_t nbins, size_t ncolumns,\n                 size_t max_num_bins, size_t min_num_bins) {\n    block = GetBlocksParameters(size, max_nblocks, device_prop.max_compute_units);\n    work_group_size = std::min(ncolumns, device_prop.max_work_group_size);\n    if (!device_prop.is_gpu) return;\n\n    using GradientPairT = xgboost::detail::GradientPairInternal<FPType>;\n    /* If local histogram is possible and beneficial */\n    const int buff_size = nbins * sizeof(GradientPairT);\n    /* block_size writes into array of size max_num_bins are made,\n    * if (block_size < max_num_bins)\n    * most part of buffer isn't used and perf suffers.\n    */\n    const size_t th_block_size = max_num_bins;\n    use_local_hist = (buff_size < device_prop.sram_size_per_eu - KLocalHistSRAM)\n                      && isDense\n                      && (max_num_bins <= KMaxNumBins)\n                      && (block.size >= th_block_size);\n\n    /* Predict penalty from atomic usage and compare with one from block-based build with buffer */\n    // EUs processing different columns do not trigger conflicts.\n    float wg_per_columns = std::max(1.0f, static_cast<float>(ncolumns) / kMaxWorkGroupSizeAtomic);\n    /* Rows are processed per execution unit.\n    * Some EUs process different columns, and don't triiger conflicts.\n    * We use a worse case scenario, i.e. use the minimal number of bins per feature\n    */\n    float conflicts_per_bin = (device_prop.max_compute_units / wg_per_columns) / min_num_bins;\n\n    // Atomics resolve conflicts between EUs, so L2 size can be a proxy for atomic efficiency.\n    float atomic_efficency = device_prop.l2_size_per_eu / kAtomicEfficiencyNormalization;\n    // We use simple quadratic model to predict atomic penalty\n    float atomic_penalty = conflicts_per_bin\n                        + kAtomicQuadraticWeight * (conflicts_per_bin * conflicts_per_bin);\n\n    // Block-based builder operates with buffer of type FPType, placed in L2.\n    float base_block_penalty = kBlockPenaltyNormalization /\n                                device_prop.l2_size_per_eu * (sizeof(FPType) / 4);\n\n    if (block.nblocks >= device_prop.max_compute_units) {\n      // if GPU is fully loaded, we can simply compare penaltys.\n      use_atomics = base_block_penalty > atomic_penalty / atomic_efficency;\n    } else {\n      float blocks_per_eu = static_cast<float>(block.nblocks) / device_prop.max_compute_units;\n      /* The GPU is not 100% loaded. We need to take this into account in our model:\n      * block_penalty = base_block_penalty + base_time * (1 - blocks_per_eu);\n      *\n      * atomics should be used, if:\n      * block_penalty > atomic_penalty\n      *\n      * The normalization is chosen so that: base_time = 1\n      * base_block_penalty + 1 - blocks_per_eu > atomic_penalty / atomic_efficency\n      *\n      * blocks_per_eu < 1 + base_block_penalty - atomic_penalty / atomic_efficency\n      */\n      float th_block_per_eu = 1 + base_block_penalty - atomic_penalty / atomic_efficency;\n\n      /* We can't trust the decision of the approximate performance model\n      * if penalties are close to each other\n      * i.e. (1 + base_block_penalty) ~ (atomic_penalty / atomic_efficency)\n      * We manually limit the minimal value of th_block_per_eu,\n      * to determine the behaviour in this region.\n      */\n      th_block_per_eu = std::max<float>(kMinTh, th_block_per_eu);\n\n      use_atomics = (blocks_per_eu < th_block_per_eu);\n    }\n\n    if (use_atomics) {\n      work_group_size = std::min(kMaxWorkGroupSizeAtomic,\n                                 work_group_size);\n    } else if (use_local_hist) {\n      work_group_size = std::min(kMaxWorkGroupSizeLocal,\n                                 work_group_size);\n    }\n  }\n};\n\n// For some datasets buffer is not used, we estimate if it is the case.\ntemplate<typename FPType>\nsize_t GetRequiredBufferSize(const DeviceProperties& device_prop, size_t max_n_rows, size_t nbins,\n                             size_t ncolumns, size_t max_num_bins, size_t min_num_bins) {\n  size_t max_nblocks = HistDispatcher<FPType>::kMaxGPUUtilisation * device_prop.max_compute_units;\n  // Buffer size doesn't depend on isDense flag.\n  auto build_params = HistDispatcher<FPType>\n                      (device_prop, true, max_n_rows, max_nblocks, nbins,\n                       ncolumns, max_num_bins, min_num_bins);\n\n  return build_params.use_atomics ? 0 : build_params.block.nblocks;\n}\n\n}  // namespace tree\n}  // namespace sycl\n}  // namespace xgboost\n\n#endif  // PLUGIN_SYCL_TREE_HIST_DISPATCHER_H_\n"
  },
  {
    "path": "plugin/sycl/tree/hist_row_adder.h",
    "content": "/**\n * Copyright 2017-2026, XGBoost Contributors\n */\n#ifndef PLUGIN_SYCL_TREE_HIST_ROW_ADDER_H_\n#define PLUGIN_SYCL_TREE_HIST_ROW_ADDER_H_\n\n#include <algorithm>\n#include <vector>\n\nnamespace xgboost {\nnamespace sycl {\nnamespace tree {\n\ntemplate <typename GradientSumT>\nclass HistUpdater;\n\ntemplate <typename GradientSumT>\nclass HistRowsAdder {\n public:\n  virtual void AddHistRows(HistUpdater<GradientSumT>* builder, std::vector<int>* sync_ids,\n                           RegTree* p_tree) = 0;\n  virtual ~HistRowsAdder() = default;\n};\n\ntemplate <typename GradientSumT>\nclass BatchHistRowsAdder : public HistRowsAdder<GradientSumT> {\n public:\n  void AddHistRows(HistUpdater<GradientSumT>* builder, std::vector<int>* sync_ids,\n                   RegTree* p_tree) override {\n    builder->builder_monitor_.Start(\"AddHistRows\");\n\n    for (auto const& entry : builder->nodes_for_explicit_hist_build_) {\n      int nid = entry.nid;\n      auto event = builder->hist_.AddHistRow(nid);\n    }\n    for (auto const& node : builder->nodes_for_subtraction_trick_) {\n      auto event = builder->hist_.AddHistRow(node.nid);\n    }\n\n    builder->builder_monitor_.Stop(\"AddHistRows\");\n  }\n};\n\ntemplate <typename GradientSumT>\nclass DistributedHistRowsAdder : public HistRowsAdder<GradientSumT> {\n public:\n  void AddHistRows(HistUpdater<GradientSumT>* builder, std::vector<int>* sync_ids,\n                   RegTree* p_tree) override {\n    builder->builder_monitor_.Start(\"AddHistRows\");\n    const size_t explicit_size = builder->nodes_for_explicit_hist_build_.size();\n    const size_t subtaction_size = builder->nodes_for_subtraction_trick_.size();\n    std::vector<int> merged_node_ids(explicit_size + subtaction_size);\n    for (size_t i = 0; i < explicit_size; ++i) {\n      merged_node_ids[i] = builder->nodes_for_explicit_hist_build_[i].nid;\n    }\n    for (size_t i = 0; i < subtaction_size; ++i) {\n      merged_node_ids[explicit_size + i] = builder->nodes_for_subtraction_trick_[i].nid;\n    }\n    std::sort(merged_node_ids.begin(), merged_node_ids.end());\n    sync_ids->clear();\n    for (auto const& nid : merged_node_ids) {\n      if ((*p_tree)[nid].IsLeftChild()) {\n        builder->hist_.AddHistRow(nid);\n        builder->hist_local_worker_.AddHistRow(nid);\n        sync_ids->push_back(nid);\n      }\n    }\n    for (auto const& nid : merged_node_ids) {\n      if (!((*p_tree)[nid].IsLeftChild())) {\n        builder->hist_.AddHistRow(nid);\n        builder->hist_local_worker_.AddHistRow(nid);\n      }\n    }\n    builder->builder_monitor_.Stop(\"AddHistRows\");\n  }\n};\n\n}  // namespace tree\n}  // namespace sycl\n}  // namespace xgboost\n\n#endif  // PLUGIN_SYCL_TREE_HIST_ROW_ADDER_H_\n"
  },
  {
    "path": "plugin/sycl/tree/hist_synchronizer.h",
    "content": "/**\n * Copyright 2017-2025, XGBoost Contributors\n */\n#ifndef PLUGIN_SYCL_TREE_HIST_SYNCHRONIZER_H_\n#define PLUGIN_SYCL_TREE_HIST_SYNCHRONIZER_H_\n\n#include <vector>\n\n#include \"../../src/tree/tree_view.h\"\n#include \"../common/hist_util.h\"\n#include \"expand_entry.h\"\n\nnamespace xgboost {\nnamespace sycl {\nnamespace tree {\n\ntemplate <typename GradientSumT>\nclass HistUpdater;\n\ntemplate <typename GradientSumT>\nclass HistSynchronizer {\n public:\n  virtual void SyncHistograms(HistUpdater<GradientSumT>* builder, const std::vector<int>& sync_ids,\n                              RegTree const* p_tree) = 0;\n  virtual ~HistSynchronizer() = default;\n};\n\ntemplate <typename GradientSumT>\nclass BatchHistSynchronizer: public HistSynchronizer<GradientSumT> {\n public:\n  void SyncHistograms(HistUpdater<GradientSumT>* builder, const std::vector<int>& sync_ids,\n                      RegTree const* p_tree) override {\n    auto tree = p_tree->HostScView();\n    builder->builder_monitor_.Start(\"SyncHistograms\");\n    const size_t nbins = builder->hist_builder_.GetNumBins();\n\n    hist_sync_events_.resize(builder->nodes_for_explicit_hist_build_.size());\n    for (int i = 0; i < builder->nodes_for_explicit_hist_build_.size(); i++) {\n      const auto entry = builder->nodes_for_explicit_hist_build_[i];\n      auto& this_hist = builder->hist_[entry.nid];\n\n      if (!(tree).IsRoot(entry.nid)) {\n        const size_t parent_id = tree.Parent(entry.nid);\n        auto& parent_hist = builder->hist_[parent_id];\n        auto& sibling_hist = builder->hist_[entry.GetSiblingId(tree, parent_id)];\n        hist_sync_events_[i] = common::SubtractionHist(builder->qu_, &sibling_hist, parent_hist,\n                                                       this_hist, nbins, ::sycl::event());\n      }\n    }\n    builder->qu_->wait_and_throw();\n\n    builder->builder_monitor_.Stop(\"SyncHistograms\");\n  }\n\n  std::vector<::sycl::event> GetEvents() const {\n    return hist_sync_events_;\n  }\n\n private:\n  std::vector<::sycl::event> hist_sync_events_;\n};\n\ntemplate <typename GradientSumT>\nclass DistributedHistSynchronizer: public HistSynchronizer<GradientSumT> {\n public:\n  void SyncHistograms(HistUpdater<GradientSumT>* builder, const std::vector<int>& sync_ids,\n                      RegTree const* p_tree) override {\n    auto tree = p_tree->HostScView();\n    builder->builder_monitor_.Start(\"SyncHistograms\");\n    const size_t nbins = builder->hist_builder_.GetNumBins();\n    for (int node = 0; node < builder->nodes_for_explicit_hist_build_.size(); node++) {\n      const auto entry = builder->nodes_for_explicit_hist_build_[node];\n      auto& this_hist = builder->hist_[entry.nid];\n      // // Store posible parent node\n      auto& this_local = builder->hist_local_worker_[entry.nid];\n      common::CopyHist(builder->qu_, &this_local, this_hist, nbins);\n\n      if (!tree.IsRoot(entry.nid)) {\n        const size_t parent_id = tree.Parent(entry.nid);\n        auto sibling_nid = entry.GetSiblingId(tree, parent_id);\n        auto& parent_hist = builder->hist_local_worker_[parent_id];\n\n        auto& sibling_hist = builder->hist_[sibling_nid];\n        common::SubtractionHist(builder->qu_, &sibling_hist, parent_hist,\n                                this_hist, nbins, ::sycl::event());\n        builder->qu_->wait_and_throw();\n        // Store posible parent node\n        auto& sibling_local = builder->hist_local_worker_[sibling_nid];\n        common::CopyHist(builder->qu_, &sibling_local, sibling_hist, nbins);\n      }\n    }\n    builder->ReduceHists(sync_ids, nbins);\n\n    ParallelSubtractionHist(builder, builder->nodes_for_explicit_hist_build_, p_tree);\n    ParallelSubtractionHist(builder, builder->nodes_for_subtraction_trick_, p_tree);\n\n    builder->builder_monitor_.Stop(\"SyncHistograms\");\n  }\n\n  void ParallelSubtractionHist(HistUpdater<GradientSumT>* builder,\n                               const std::vector<ExpandEntry>& nodes,\n                               const RegTree * p_tree) {\n    const size_t nbins = builder->hist_builder_.GetNumBins();\n    auto tree = p_tree->HostScView();\n    for (int node = 0; node < nodes.size(); node++) {\n      const auto entry = nodes[node];\n      if (!(tree.IsLeftChild(entry.nid))) {\n        auto& this_hist = builder->hist_[entry.nid];\n\n        if (!tree.IsRoot(entry.nid)) {\n          const size_t parent_id = tree.Parent(entry.nid);\n          auto& parent_hist = builder->hist_[parent_id];\n          auto& sibling_hist = builder->hist_[entry.GetSiblingId(tree, parent_id)];\n          common::SubtractionHist(builder->qu_, &this_hist, parent_hist,\n                                  sibling_hist, nbins, ::sycl::event());\n          builder->qu_->wait_and_throw();\n        }\n      }\n    }\n  }\n\n private:\n  std::vector<::sycl::event> hist_sync_events_;\n};\n\n}  // namespace tree\n}  // namespace sycl\n}  // namespace xgboost\n\n#endif  // PLUGIN_SYCL_TREE_HIST_SYNCHRONIZER_H_\n"
  },
  {
    "path": "plugin/sycl/tree/hist_updater.cc",
    "content": "/*!\n * Copyright 2017-2026, XGBoost Contributors\n * \\file hist_updater.cc\n */\n\n#include \"hist_updater.h\"\n\n#include <functional>\n#include <oneapi/dpl/random>\n\n#include \"../../src/collective/allreduce.h\"\n#include \"../../src/tree/common_row_partitioner.h\"\n#include \"../common/hist_util.h\"\n#include \"xgboost/linalg.h\"\n\nnamespace xgboost {\nnamespace sycl {\nnamespace tree {\n\nusing ::sycl::ext::oneapi::maximum;\nusing ::sycl::ext::oneapi::minimum;\nusing ::sycl::ext::oneapi::plus;\n\ntemplate <typename GradientSumT>\nvoid HistUpdater<GradientSumT>::ReduceHists(const std::vector<int>& sync_ids, size_t nbins) {\n  if (reduce_buffer_.size() < sync_ids.size() * nbins) {\n    reduce_buffer_.resize(sync_ids.size() * nbins);\n  }\n  for (size_t i = 0; i < sync_ids.size(); i++) {\n    auto& this_hist = hist_[sync_ids[i]];\n    const GradientPairT* psrc = reinterpret_cast<const GradientPairT*>(this_hist.DataConst());\n    qu_->memcpy(reduce_buffer_.data() + i * nbins, psrc, nbins * sizeof(GradientPairT)).wait();\n  }\n\n  auto buffer_vec = ::xgboost::linalg::MakeVec(\n      reinterpret_cast<GradientSumT*>(reduce_buffer_.data()), 2 * nbins * sync_ids.size());\n  auto rc = collective::Allreduce(ctx_, buffer_vec, collective::Op::kSum);\n  SafeColl(rc);\n\n  for (size_t i = 0; i < sync_ids.size(); i++) {\n    auto& this_hist = hist_[sync_ids[i]];\n    GradientPairT* psrc = reinterpret_cast<GradientPairT*>(this_hist.Data());\n    qu_->memcpy(psrc, reduce_buffer_.data() + i * nbins, nbins * sizeof(GradientPairT)).wait();\n  }\n}\n\ntemplate <typename GradientSumT>\nvoid HistUpdater<GradientSumT>::SetHistSynchronizer(HistSynchronizer<GradientSumT>* sync) {\n  hist_synchronizer_.reset(sync);\n}\n\ntemplate <typename GradientSumT>\nvoid HistUpdater<GradientSumT>::SetHistRowsAdder(HistRowsAdder<GradientSumT>* adder) {\n  hist_rows_adder_.reset(adder);\n}\n\ntemplate <typename GradientSumT>\nvoid HistUpdater<GradientSumT>::BuildHistogramsLossGuide(\n    ExpandEntry entry, const common::GHistIndexMatrix& gmat, RegTree* p_tree,\n    const HostDeviceVector<GradientPair>& gpair) {\n  nodes_for_explicit_hist_build_.clear();\n  nodes_for_subtraction_trick_.clear();\n  nodes_for_explicit_hist_build_.push_back(entry);\n  auto tree = p_tree->HostScView();\n\n  if (!tree.IsRoot(entry.nid)) {\n    auto sibling_id = entry.GetSiblingId(tree);\n    nodes_for_subtraction_trick_.emplace_back(sibling_id, p_tree->GetDepth(sibling_id));\n  }\n\n  std::vector<int> sync_ids;\n  hist_rows_adder_->AddHistRows(this, &sync_ids, p_tree);\n  qu_->wait_and_throw();\n  BuildLocalHistograms(gmat, p_tree, gpair);\n  hist_synchronizer_->SyncHistograms(this, sync_ids, p_tree);\n}\n\ntemplate <typename GradientSumT>\nvoid HistUpdater<GradientSumT>::BuildLocalHistograms(const common::GHistIndexMatrix& gmat,\n                                                     RegTree* p_tree,\n                                                     const HostDeviceVector<GradientPair>& gpair) {\n  builder_monitor_.Start(\"BuildLocalHistograms\");\n  const size_t n_nodes = nodes_for_explicit_hist_build_.size();\n  ::sycl::event event;\n\n  for (size_t i = 0; i < n_nodes; i++) {\n    const int32_t nid = nodes_for_explicit_hist_build_[i].nid;\n\n    if (row_set_collection_[nid].Size() > 0) {\n      event = BuildHist(gpair, row_set_collection_[nid], gmat, &(hist_[nid]),\n                        &(hist_buffer_.GetDeviceBuffer()), event);\n    } else {\n      common::InitHist(qu_, &(hist_[nid]), hist_[nid].Size(), &event);\n    }\n  }\n  qu_->wait_and_throw();\n  builder_monitor_.Stop(\"BuildLocalHistograms\");\n}\n\ntemplate <typename GradientSumT>\nvoid HistUpdater<GradientSumT>::BuildNodeStats(const common::GHistIndexMatrix& gmat,\n                                               RegTree* p_tree,\n                                               const HostDeviceVector<GradientPair>& gpair) {\n  builder_monitor_.Start(\"BuildNodeStats\");\n  for (auto const& entry : qexpand_depth_wise_) {\n    int nid = entry.nid;\n    this->InitNewNode(nid, gmat, gpair, *p_tree);\n    // add constraints\n    if (!(*p_tree)[nid].IsLeftChild() && !(*p_tree)[nid].IsRoot()) {\n      // it's a right child\n      auto parent_id = (*p_tree)[nid].Parent();\n      auto left_sibling_id = (*p_tree)[parent_id].LeftChild();\n      auto parent_split_feature_id = snode_host_[parent_id].best.SplitIndex();\n      tree_evaluator_.AddSplit(parent_id, left_sibling_id, nid, parent_split_feature_id,\n                               snode_host_[left_sibling_id].weight, snode_host_[nid].weight);\n      interaction_constraints_.Split(parent_id, parent_split_feature_id, left_sibling_id, nid);\n    }\n  }\n  builder_monitor_.Stop(\"BuildNodeStats\");\n}\n\ntemplate <typename GradientSumT>\nvoid HistUpdater<GradientSumT>::AddSplitsToTree(const common::GHistIndexMatrix& gmat,\n                                                RegTree* p_tree, int* num_leaves, int depth,\n                                                std::vector<ExpandEntry>* nodes_for_apply_split,\n                                                std::vector<ExpandEntry>* temp_qexpand_depth) {\n  builder_monitor_.Start(\"AddSplitsToTree\");\n  auto evaluator = tree_evaluator_.GetEvaluator();\n  for (auto const& entry : qexpand_depth_wise_) {\n    const auto lr = param_.learning_rate;\n    int nid = entry.nid;\n\n    if (snode_host_[nid].best.loss_chg < kRtEps ||\n        (param_.max_depth > 0 && depth == param_.max_depth) ||\n        (param_.max_leaves > 0 && (*num_leaves) == param_.max_leaves)) {\n      (*p_tree)[nid].SetLeaf(snode_host_[nid].weight * lr);\n    } else {\n      nodes_for_apply_split->push_back(entry);\n\n      NodeEntry<GradientSumT>& e = snode_host_[nid];\n      bst_float left_leaf_weight =\n          evaluator.CalcWeight(nid, GradStats<GradientSumT>{e.best.left_sum}) * lr;\n      bst_float right_leaf_weight =\n          evaluator.CalcWeight(nid, GradStats<GradientSumT>{e.best.right_sum}) * lr;\n      p_tree->ExpandNode(nid, e.best.SplitIndex(), e.best.split_value, e.best.DefaultLeft(),\n                         e.weight, left_leaf_weight, right_leaf_weight, e.best.loss_chg,\n                         e.stats.GetHess(), e.best.left_sum.GetHess(), e.best.right_sum.GetHess());\n\n      int left_id = (*p_tree)[nid].LeftChild();\n      int right_id = (*p_tree)[nid].RightChild();\n      temp_qexpand_depth->push_back(ExpandEntry(left_id, p_tree->GetDepth(left_id)));\n      temp_qexpand_depth->push_back(ExpandEntry(right_id, p_tree->GetDepth(right_id)));\n      // - 1 parent + 2 new children\n      (*num_leaves)++;\n    }\n  }\n  builder_monitor_.Stop(\"AddSplitsToTree\");\n}\n\ntemplate <typename GradientSumT>\nvoid HistUpdater<GradientSumT>::EvaluateAndApplySplits(\n    const common::GHistIndexMatrix& gmat, RegTree* p_tree, int* num_leaves, int depth,\n    std::vector<ExpandEntry>* temp_qexpand_depth) {\n  EvaluateSplits(qexpand_depth_wise_, gmat, *p_tree);\n\n  std::vector<ExpandEntry> nodes_for_apply_split;\n  AddSplitsToTree(gmat, p_tree, num_leaves, depth, &nodes_for_apply_split, temp_qexpand_depth);\n  ApplySplit(nodes_for_apply_split, gmat, p_tree);\n}\n\n// Split nodes to 2 sets depending on amount of rows in each node\n// Histograms for small nodes will be built explicitly\n// Histograms for big nodes will be built by 'Subtraction Trick'\n// Exception: in distributed setting, we always build the histogram for the left child node\n//    and use 'Subtraction Trick' to built the histogram for the right child node.\n//    This ensures that the workers operate on the same set of tree nodes.\ntemplate <typename GradientSumT>\nvoid HistUpdater<GradientSumT>::SplitSiblings(const std::vector<ExpandEntry>& nodes,\n                                              std::vector<ExpandEntry>* small_siblings,\n                                              std::vector<ExpandEntry>* big_siblings,\n                                              RegTree* p_tree) {\n  builder_monitor_.Start(\"SplitSiblings\");\n  for (auto const& entry : nodes) {\n    int nid = entry.nid;\n    RegTree::Node& node = (*p_tree)[nid];\n    if (node.IsRoot()) {\n      small_siblings->push_back(entry);\n    } else {\n      const int32_t left_id = (*p_tree)[node.Parent()].LeftChild();\n      const int32_t right_id = (*p_tree)[node.Parent()].RightChild();\n\n      if (nid == left_id &&\n          row_set_collection_[left_id].Size() < row_set_collection_[right_id].Size()) {\n        small_siblings->push_back(entry);\n      } else if (nid == right_id &&\n                 row_set_collection_[right_id].Size() <= row_set_collection_[left_id].Size()) {\n        small_siblings->push_back(entry);\n      } else {\n        big_siblings->push_back(entry);\n      }\n    }\n  }\n  builder_monitor_.Stop(\"SplitSiblings\");\n}\n\ntemplate <typename GradientSumT>\nvoid HistUpdater<GradientSumT>::ExpandWithDepthWise(const common::GHistIndexMatrix& gmat,\n                                                    RegTree* p_tree,\n                                                    const HostDeviceVector<GradientPair>& gpair) {\n  int num_leaves = 0;\n\n  // in depth_wise growing, we feed loss_chg with 0.0 since it is not used anyway\n  qexpand_depth_wise_.emplace_back(ExpandEntry::kRootNid, p_tree->GetDepth(ExpandEntry::kRootNid));\n  ++num_leaves;\n  for (int depth = 0; depth < param_.max_depth + 1; depth++) {\n    std::vector<int> sync_ids;\n    std::vector<ExpandEntry> temp_qexpand_depth;\n    SplitSiblings(qexpand_depth_wise_, &nodes_for_explicit_hist_build_,\n                  &nodes_for_subtraction_trick_, p_tree);\n    hist_rows_adder_->AddHistRows(this, &sync_ids, p_tree);\n    BuildLocalHistograms(gmat, p_tree, gpair);\n    hist_synchronizer_->SyncHistograms(this, sync_ids, p_tree);\n    BuildNodeStats(gmat, p_tree, gpair);\n\n    EvaluateAndApplySplits(gmat, p_tree, &num_leaves, depth, &temp_qexpand_depth);\n\n    // clean up\n    qexpand_depth_wise_.clear();\n    nodes_for_subtraction_trick_.clear();\n    nodes_for_explicit_hist_build_.clear();\n    if (temp_qexpand_depth.empty()) {\n      break;\n    } else {\n      qexpand_depth_wise_ = temp_qexpand_depth;\n      temp_qexpand_depth.clear();\n    }\n  }\n}\n\ntemplate <typename GradientSumT>\nvoid HistUpdater<GradientSumT>::ExpandWithLossGuide(const common::GHistIndexMatrix& gmat,\n                                                    RegTree* p_tree,\n                                                    const HostDeviceVector<GradientPair>& gpair) {\n  builder_monitor_.Start(\"ExpandWithLossGuide\");\n  int num_leaves = 0;\n  const auto lr = param_.learning_rate;\n\n  ExpandEntry node(ExpandEntry::kRootNid, p_tree->GetDepth(ExpandEntry::kRootNid));\n  BuildHistogramsLossGuide(node, gmat, p_tree, gpair);\n\n  this->InitNewNode(ExpandEntry::kRootNid, gmat, gpair, *p_tree);\n\n  this->EvaluateSplits({node}, gmat, *p_tree);\n  node.split.loss_chg = snode_host_[ExpandEntry::kRootNid].best.loss_chg;\n\n  qexpand_loss_guided_->push(node);\n  ++num_leaves;\n\n  while (!qexpand_loss_guided_->empty()) {\n    const ExpandEntry candidate = qexpand_loss_guided_->top();\n    const int nid = candidate.nid;\n    qexpand_loss_guided_->pop();\n    if (!candidate.IsValid(param_, num_leaves)) {\n      (*p_tree)[nid].SetLeaf(snode_host_[nid].weight * lr);\n    } else {\n      auto evaluator = tree_evaluator_.GetEvaluator();\n      NodeEntry<GradientSumT>& e = snode_host_[nid];\n      bst_float left_leaf_weight =\n          evaluator.CalcWeight(nid, GradStats<GradientSumT>{e.best.left_sum}) * lr;\n      bst_float right_leaf_weight =\n          evaluator.CalcWeight(nid, GradStats<GradientSumT>{e.best.right_sum}) * lr;\n      p_tree->ExpandNode(nid, e.best.SplitIndex(), e.best.split_value, e.best.DefaultLeft(),\n                         e.weight, left_leaf_weight, right_leaf_weight, e.best.loss_chg,\n                         e.stats.GetHess(), e.best.left_sum.GetHess(), e.best.right_sum.GetHess());\n\n      this->ApplySplit({candidate}, gmat, p_tree);\n\n      const int cleft = (*p_tree)[nid].LeftChild();\n      const int cright = (*p_tree)[nid].RightChild();\n\n      ExpandEntry left_node(cleft, p_tree->GetDepth(cleft));\n      ExpandEntry right_node(cright, p_tree->GetDepth(cright));\n\n      if (row_set_collection_[cleft].Size() < row_set_collection_[cright].Size()) {\n        BuildHistogramsLossGuide(left_node, gmat, p_tree, gpair);\n      } else {\n        BuildHistogramsLossGuide(right_node, gmat, p_tree, gpair);\n      }\n\n      this->InitNewNode(cleft, gmat, gpair, *p_tree);\n      this->InitNewNode(cright, gmat, gpair, *p_tree);\n      bst_uint featureid = snode_host_[nid].best.SplitIndex();\n      tree_evaluator_.AddSplit(nid, cleft, cright, featureid, snode_host_[cleft].weight,\n                               snode_host_[cright].weight);\n      interaction_constraints_.Split(nid, featureid, cleft, cright);\n\n      this->EvaluateSplits({left_node, right_node}, gmat, *p_tree);\n      left_node.split.loss_chg = snode_host_[cleft].best.loss_chg;\n      right_node.split.loss_chg = snode_host_[cright].best.loss_chg;\n\n      qexpand_loss_guided_->push(left_node);\n      qexpand_loss_guided_->push(right_node);\n\n      ++num_leaves;  // give two and take one, as parent is no longer a leaf\n    }\n  }\n  builder_monitor_.Stop(\"ExpandWithLossGuide\");\n}\n\ntemplate <typename GradientSumT>\nvoid HistUpdater<GradientSumT>::Update(\n    xgboost::tree::TrainParam const* param, const common::GHistIndexMatrix& gmat,\n    const HostDeviceVector<GradientPair>& gpair, DMatrix* p_fmat,\n    xgboost::common::Span<HostDeviceVector<bst_node_t>> out_position, RegTree* p_tree) {\n  builder_monitor_.Start(\"Update\");\n\n  tree_evaluator_.Reset(qu_, param_, p_fmat->Info().num_col_);\n  interaction_constraints_.Reset();\n\n  this->InitData(gmat, gpair, *p_fmat, *p_tree);\n  if (param_.grow_policy == xgboost::tree::TrainParam::kLossGuide) {\n    ExpandWithLossGuide(gmat, p_tree, gpair);\n  } else {\n    ExpandWithDepthWise(gmat, p_tree, gpair);\n  }\n\n  for (int nid = 0; nid < p_tree->NumNodes(); ++nid) {\n    p_tree->Stat(nid).loss_chg = snode_host_[nid].best.loss_chg;\n    p_tree->Stat(nid).base_weight = snode_host_[nid].weight;\n    p_tree->Stat(nid).sum_hess = static_cast<float>(snode_host_[nid].stats.GetHess());\n  }\n\n  builder_monitor_.Stop(\"Update\");\n}\n\ntemplate <typename GradientSumT>\nbool HistUpdater<GradientSumT>::UpdatePredictionCache(\n    const DMatrix* data, ::xgboost::linalg::MatrixView<float> out_preds) {\n  CHECK(out_preds.Device().IsSycl());\n  // p_last_fmat_ is a valid pointer as long as UpdatePredictionCache() is called in\n  // conjunction with Update().\n  if (!p_last_fmat_ || !p_last_tree_ || data != p_last_fmat_) {\n    return false;\n  }\n  builder_monitor_.Start(\"UpdatePredictionCache\");\n  CHECK_GT(out_preds.Size(), 0U);\n\n  size_t n_nodes = row_set_collection_.Size();\n  std::vector<::sycl::event> events(n_nodes);\n  auto tree = p_last_tree_->HostScView();\n  for (size_t node = 0; node < n_nodes; node++) {\n    const common::RowSetCollection::Elem& rowset = row_set_collection_[node];\n    if (rowset.begin != nullptr && rowset.end != nullptr && rowset.Size() != 0) {\n      int nid = rowset.node_id;\n      // if a node is marked as deleted by the pruner, traverse upward to locate\n      // a non-deleted leaf.\n      if (tree.IsDeleted(nid)) {\n        while (tree.IsDeleted(nid)) {\n          nid = tree.Parent(nid);\n        }\n        CHECK(tree.IsLeaf(nid));\n      }\n      bst_float leaf_value = tree.LeafValue(nid);\n      const size_t* rid = rowset.begin;\n      const size_t num_rows = rowset.Size();\n\n      events[node] = qu_->submit([&](::sycl::handler& cgh) {\n        cgh.parallel_for<>(::sycl::range<1>(num_rows), [=](::sycl::item<1> pid) {\n          size_t row_id = rid[pid.get_id(0)];\n          float& val = const_cast<float&>(out_preds(row_id));\n          val += leaf_value;\n        });\n      });\n    }\n  }\n  qu_->wait();\n\n  builder_monitor_.Stop(\"UpdatePredictionCache\");\n  return true;\n}\n\ntemplate <typename GradientSumT>\nvoid HistUpdater<GradientSumT>::InitSampling(\n    const HostDeviceVector<GradientPair>& gpair,\n    USMVector<size_t, MemoryType::on_device>* row_indices) {\n  const size_t num_rows = row_indices->Size();\n  auto* row_idx = row_indices->Data();\n  const auto* gpair_ptr = gpair.ConstDevicePointer();\n  uint64_t num_samples = 0;\n  const auto subsample = param_.subsample;\n  ::sycl::event event;\n\n  {\n    ::sycl::buffer<uint64_t, 1> flag_buf(&num_samples, 1);\n    uint64_t seed = seed_;\n    seed_ += num_rows;\n\n    /*\n    * oneDLP bernoulli_distribution implicitly uses double.\n    * In this case the device doesn't have fp64 support,\n    * we generate bernoulli distributed random values from uniform distribution\n    */\n    if (has_fp64_support_) {\n      // Use oneDPL bernoulli_distribution for better perf\n      event = qu_->submit([&](::sycl::handler& cgh) {\n        auto flag_buf_acc = flag_buf.get_access<::sycl::access::mode::read_write>(cgh);\n        cgh.parallel_for<>(::sycl::range<1>(::sycl::range<1>(num_rows)), [=](::sycl::item<1> pid) {\n          uint64_t i = pid.get_id(0);\n          // Create minstd_rand engine\n          oneapi::dpl::minstd_rand engine(seed, i);\n          oneapi::dpl::bernoulli_distribution coin_flip(subsample);\n          auto bernoulli_rnd = coin_flip(engine);\n\n          if (gpair_ptr[i].GetHess() >= 0.0f && bernoulli_rnd) {\n            AtomicRef<uint64_t> num_samples_ref(flag_buf_acc[0]);\n            row_idx[num_samples_ref++] = i;\n          }\n        });\n      });\n    } else {\n      // Use oneDPL uniform, as far as bernoulli_distribution uses fp64\n      event = qu_->submit([&](::sycl::handler& cgh) {\n        auto flag_buf_acc = flag_buf.get_access<::sycl::access::mode::read_write>(cgh);\n        cgh.parallel_for<>(::sycl::range<1>(::sycl::range<1>(num_rows)), [=](::sycl::item<1> pid) {\n          uint64_t i = pid.get_id(0);\n          oneapi::dpl::minstd_rand engine(seed, i);\n          oneapi::dpl::uniform_real_distribution<float> distr;\n          const float rnd = distr(engine);\n          const bool bernoulli_rnd = rnd < subsample ? 1 : 0;\n\n          if (gpair_ptr[i].GetHess() >= 0.0f && bernoulli_rnd) {\n            AtomicRef<uint64_t> num_samples_ref(flag_buf_acc[0]);\n            row_idx[num_samples_ref++] = i;\n          }\n        });\n      });\n    }\n    /* After calling a destructor for flag_buf,  content will be copyed to num_samples */\n  }\n\n  row_indices->Resize(qu_, num_samples, 0, &event);\n  qu_->wait();\n}\n\ntemplate <typename GradientSumT>\nvoid HistUpdater<GradientSumT>::InitData(const common::GHistIndexMatrix& gmat,\n                                         const HostDeviceVector<GradientPair>& gpair,\n                                         const DMatrix& fmat, const RegTree& tree) {\n  CHECK((param_.max_depth > 0 || param_.max_leaves > 0))\n      << \"max_depth or max_leaves cannot be both 0 (unlimited); \"\n      << \"at least one should be a positive quantity.\";\n  if (param_.grow_policy == xgboost::tree::TrainParam::kDepthWise) {\n    CHECK(param_.max_depth > 0) << \"max_depth cannot be 0 (unlimited) \"\n                                << \"when grow_policy is depthwise.\";\n  }\n  builder_monitor_.Start(\"InitData\");\n  const auto& info = fmat.Info();\n\n  // initialize the row set\n  {\n    row_set_collection_.Clear();\n\n    // initialize histogram collection\n    uint32_t nbins = gmat.cut.Ptrs().back();\n    hist_.Init(qu_, nbins);\n    hist_local_worker_.Init(qu_, nbins);\n\n    // initialize histogram builder\n    hist_builder_ = common::GHistBuilder<GradientSumT>(qu_, nbins);\n\n    USMVector<size_t, MemoryType::on_device>* row_indices = &(row_set_collection_.Data());\n    row_indices->Resize(qu_, info.num_row_);\n    size_t* p_row_indices = row_indices->Data();\n    // mark subsample and build list of member rows\n    if (param_.subsample < 1.0f) {\n      CHECK_EQ(param_.sampling_method, xgboost::tree::TrainParam::kUniform)\n          << \"Only uniform sampling is supported, \"\n          << \"gradient-based sampling is only support by GPU Hist.\";\n      InitSampling(gpair, row_indices);\n    } else {\n      int has_neg_hess = 0;\n      const GradientPair* gpair_ptr = gpair.ConstDevicePointer();\n      ::sycl::event event;\n      {\n        ::sycl::buffer<int, 1> flag_buf(&has_neg_hess, 1);\n        event = qu_->submit([&](::sycl::handler& cgh) {\n          auto flag_buf_acc = flag_buf.get_access<::sycl::access::mode::read_write>(cgh);\n          cgh.parallel_for<>(::sycl::range<1>(::sycl::range<1>(info.num_row_)),\n                             [=](::sycl::item<1> pid) {\n                               const size_t idx = pid.get_id(0);\n                               p_row_indices[idx] = idx;\n                               if (gpair_ptr[idx].GetHess() < 0.0f) {\n                                 AtomicRef<int> has_neg_hess_ref(flag_buf_acc[0]);\n                                 has_neg_hess_ref.fetch_max(1);\n                               }\n                             });\n        });\n      }\n\n      if (has_neg_hess) {\n        size_t max_idx = 0;\n        {\n          ::sycl::buffer<size_t, 1> flag_buf(&max_idx, 1);\n          event = qu_->submit([&](::sycl::handler& cgh) {\n            cgh.depends_on(event);\n            auto flag_buf_acc = flag_buf.get_access<::sycl::access::mode::read_write>(cgh);\n            cgh.parallel_for<>(::sycl::range<1>(::sycl::range<1>(info.num_row_)),\n                               [=](::sycl::item<1> pid) {\n                                 const size_t idx = pid.get_id(0);\n                                 if (gpair_ptr[idx].GetHess() >= 0.0f) {\n                                   AtomicRef<size_t> max_idx_ref(flag_buf_acc[0]);\n                                   p_row_indices[max_idx_ref++] = idx;\n                                 }\n                               });\n          });\n        }\n        row_indices->Resize(qu_, max_idx, 0, &event);\n      }\n      qu_->wait_and_throw();\n    }\n  }\n  row_set_collection_.Init();\n\n  {\n    /* determine layout of data */\n    const size_t nrow = info.num_row_;\n    const size_t ncol = info.num_col_;\n    const size_t nnz = info.num_nonzero_;\n    // number of discrete bins for feature 0\n    const uint32_t nbins_f0 = gmat.cut.Ptrs()[1] - gmat.cut.Ptrs()[0];\n    if (nrow * ncol == nnz) {\n      // dense data with zero-based indexing\n      data_layout_ = kDenseDataZeroBased;\n    } else if (nbins_f0 == 0 && nrow * (ncol - 1) == nnz) {\n      // dense data with one-based indexing\n      data_layout_ = kDenseDataOneBased;\n    } else {\n      // sparse data\n      data_layout_ = kSparseData;\n    }\n  }\n\n  // store a pointer to the tree\n  p_last_tree_ = &tree;\n  column_sampler_->Init(ctx_, info.num_col_, info.feature_weights, param_.colsample_bynode,\n                        param_.colsample_bylevel, param_.colsample_bytree);\n  if (data_layout_ == kDenseDataZeroBased || data_layout_ == kDenseDataOneBased) {\n    /* specialized code for dense data:\n       choose the column that has a least positive number of discrete bins.\n       For dense data (with no missing value),\n       the sum of gradient histogram is equal to snode[nid] */\n    const std::vector<uint32_t>& row_ptr = gmat.cut.Ptrs();\n    const auto nfeature = static_cast<bst_uint>(row_ptr.size() - 1);\n    uint32_t min_nbins_per_feature = 0;\n    for (bst_uint i = 0; i < nfeature; ++i) {\n      const uint32_t nbins = row_ptr[i + 1] - row_ptr[i];\n      if (nbins > 0) {\n        if (min_nbins_per_feature == 0 || min_nbins_per_feature > nbins) {\n          min_nbins_per_feature = nbins;\n          fid_least_bins_ = i;\n        }\n      }\n    }\n    CHECK_GT(min_nbins_per_feature, 0U);\n  }\n\n  std::fill(snode_host_.begin(), snode_host_.end(), NodeEntry<GradientSumT>(param_));\n\n  {\n    if (param_.grow_policy == xgboost::tree::TrainParam::kLossGuide) {\n      qexpand_loss_guided_.reset(new ExpandQueue(LossGuide));\n    } else {\n      qexpand_depth_wise_.clear();\n    }\n  }\n\n  {\n    uint32_t nbins = gmat.cut.Ptrs().back();\n    hist_buffer_.Init(qu_, nbins);\n    bool isDense = data_layout_ != kSparseData;\n    const size_t ncolumns = isDense ? gmat.nfeatures : gmat.row_stride;\n    size_t buffer_size = GetRequiredBufferSize<GradientSumT>(\n        device_properties_, info.num_row_, nbins, ncolumns, gmat.max_num_bins, gmat.min_num_bins);\n    hist_buffer_.Reset(buffer_size);\n  }\n\n  builder_monitor_.Stop(\"InitData\");\n}\n\ntemplate <typename GradientSumT>\nvoid HistUpdater<GradientSumT>::AddSplitsToRowSet(const std::vector<ExpandEntry>& nodes,\n                                                  RegTree* p_tree) {\n  const size_t n_nodes = nodes.size();\n  for (size_t i = 0; i < n_nodes; ++i) {\n    const int32_t nid = nodes[i].nid;\n    const size_t n_left = partition_builder_.GetNLeftElems(i);\n    const size_t n_right = partition_builder_.GetNRightElems(i);\n\n    row_set_collection_.AddSplit(nid, (*p_tree)[nid].LeftChild(), (*p_tree)[nid].RightChild(),\n                                 n_left, n_right);\n  }\n}\n\ntemplate <typename GradientSumT>\nvoid HistUpdater<GradientSumT>::ApplySplit(const std::vector<ExpandEntry> nodes,\n                                           const common::GHistIndexMatrix& gmat, RegTree* p_tree) {\n  using CommonRowPartitioner = xgboost::tree::CommonRowPartitioner;\n  builder_monitor_.Start(\"ApplySplit\");\n\n  const size_t n_nodes = nodes.size();\n  std::vector<int32_t> split_conditions(n_nodes);\n  auto tree = p_tree->HostScView();\n  CommonRowPartitioner::FindSplitConditions(nodes, tree, gmat, &split_conditions);\n\n  partition_builder_.Init(qu_, n_nodes, [&](size_t node_in_set) {\n    const int32_t nid = nodes[node_in_set].nid;\n    return row_set_collection_[nid].Size();\n  });\n\n  ::sycl::event event;\n  partition_builder_.Partition(gmat, nodes, row_set_collection_, split_conditions, p_tree, &event);\n  qu_->wait_and_throw();\n\n  for (size_t node_in_set = 0; node_in_set < n_nodes; node_in_set++) {\n    const int32_t nid = nodes[node_in_set].nid;\n    size_t* data_result = const_cast<size_t*>(row_set_collection_[nid].begin);\n    partition_builder_.MergeToArray(node_in_set, data_result, &event);\n  }\n  qu_->wait_and_throw();\n\n  AddSplitsToRowSet(nodes, p_tree);\n\n  builder_monitor_.Stop(\"ApplySplit\");\n}\n\ntemplate <typename GradientSumT>\nvoid HistUpdater<GradientSumT>::InitNewNode(int nid, const common::GHistIndexMatrix& gmat,\n                                            const HostDeviceVector<GradientPair>& gpair,\n                                            const RegTree& tree) {\n  builder_monitor_.Start(\"InitNewNode\");\n\n  snode_host_.resize(tree.NumNodes(), NodeEntry<GradientSumT>(param_));\n  auto sc_tree = tree.HostScView();\n  {\n    if (sc_tree.IsRoot(nid)) {\n      GradStats<GradientSumT> grad_stat;\n      if (data_layout_ == kDenseDataZeroBased || data_layout_ == kDenseDataOneBased) {\n        const std::vector<uint32_t>& row_ptr = gmat.cut.Ptrs();\n        const uint32_t ibegin = row_ptr[fid_least_bins_];\n        const uint32_t iend = row_ptr[fid_least_bins_ + 1];\n        const auto* hist = reinterpret_cast<GradStats<GradientSumT>*>(hist_[nid].Data());\n\n        std::vector<GradStats<GradientSumT>> ets(iend - ibegin);\n        qu_->memcpy(ets.data(), hist + ibegin, (iend - ibegin) * sizeof(GradStats<GradientSumT>))\n            .wait_and_throw();\n        for (const auto& et : ets) {\n          grad_stat += et;\n        }\n      } else {\n        const common::RowSetCollection::Elem e = row_set_collection_[nid];\n        const size_t* row_idxs = e.begin;\n        const size_t size = e.Size();\n        const GradientPair* gpair_ptr = gpair.ConstDevicePointer();\n\n        ::sycl::buffer<GradStats<GradientSumT>> buff(&grad_stat, 1);\n        qu_->submit([&](::sycl::handler& cgh) {\n             auto reduction = ::sycl::reduction(buff, cgh, ::sycl::plus<>());\n             cgh.parallel_for<>(\n                 ::sycl::range<1>(size), reduction, [=](::sycl::item<1> pid, auto& sum) {\n                   size_t i = pid.get_id(0);\n                   size_t row_idx = row_idxs[i];\n                   if constexpr (std::is_same<GradientPair::ValueT, GradientSumT>::value) {\n                     sum += gpair_ptr[row_idx];\n                   } else {\n                     sum += GradStats<GradientSumT>(gpair_ptr[row_idx].GetGrad(),\n                                                    gpair_ptr[row_idx].GetHess());\n                   }\n                 });\n           })\n            .wait_and_throw();\n      }\n      auto rc = collective::Allreduce(\n          ctx_, ::xgboost::linalg::MakeVec(reinterpret_cast<GradientSumT*>(&grad_stat), 2),\n          collective::Op::kSum);\n      SafeColl(rc);\n      snode_host_[nid].stats = grad_stat;\n    } else {\n      int parent_id = sc_tree.Parent(nid);\n      if (sc_tree.IsLeftChild(nid)) {\n        snode_host_[nid].stats = snode_host_[parent_id].best.left_sum;\n      } else {\n        snode_host_[nid].stats = snode_host_[parent_id].best.right_sum;\n      }\n    }\n  }\n\n  // calculating the weights\n  {\n    auto evaluator = tree_evaluator_.GetEvaluator();\n    bst_uint parentid = sc_tree.Parent(nid);\n    snode_host_[nid].weight = evaluator.CalcWeight(parentid, snode_host_[nid].stats);\n    snode_host_[nid].root_gain = evaluator.CalcGain(parentid, snode_host_[nid].stats);\n  }\n  builder_monitor_.Stop(\"InitNewNode\");\n}\n\n// nodes_set - set of nodes to be processed in parallel\ntemplate <typename GradientSumT>\nvoid HistUpdater<GradientSumT>::EvaluateSplits(const std::vector<ExpandEntry>& nodes_set,\n                                               const common::GHistIndexMatrix& gmat,\n                                               const RegTree& tree) {\n  builder_monitor_.Start(\"EvaluateSplits\");\n\n  const size_t n_nodes_in_set = nodes_set.size();\n\n  using FeatureSetType = std::shared_ptr<HostDeviceVector<bst_feature_t>>;\n\n  // Generate feature set for each tree node\n  size_t pos = 0;\n  for (size_t nid_in_set = 0; nid_in_set < n_nodes_in_set; ++nid_in_set) {\n    const bst_node_t nid = nodes_set[nid_in_set].nid;\n    FeatureSetType features_set = column_sampler_->GetFeatureSet(ctx_, tree.GetDepth(nid));\n    for (size_t idx = 0; idx < features_set->Size(); idx++) {\n      const size_t fid = features_set->ConstHostVector()[idx];\n      if (interaction_constraints_.Query(nid, fid)) {\n        auto this_hist = hist_[nid].DataConst();\n        if (pos < split_queries_host_.size()) {\n          split_queries_host_[pos] = SplitQuery{nid, fid, this_hist};\n        } else {\n          split_queries_host_.push_back({nid, fid, this_hist});\n        }\n        ++pos;\n      }\n    }\n  }\n  const size_t total_features = pos;\n\n  split_queries_device_.Resize(qu_, total_features);\n  auto event = qu_->memcpy(split_queries_device_.Data(), split_queries_host_.data(),\n                           total_features * sizeof(SplitQuery));\n\n  auto evaluator = tree_evaluator_.GetEvaluator();\n  SplitQuery* split_queries_device = split_queries_device_.Data();\n  const uint32_t* cut_ptr = gmat.cut.cut_ptrs_.ConstDevicePointer();\n  const bst_float* cut_val = gmat.cut.cut_values_.ConstDevicePointer();\n\n  snode_device_.ResizeNoCopy(qu_, snode_host_.size());\n  event = qu_->memcpy(snode_device_.Data(), snode_host_.data(),\n                      snode_host_.size() * sizeof(NodeEntry<GradientSumT>), event);\n  const NodeEntry<GradientSumT>* snode = snode_device_.Data();\n\n  const float min_child_weight = param_.min_child_weight;\n\n  best_splits_device_.ResizeNoCopy(qu_, total_features);\n  if (best_splits_host_.size() < total_features) best_splits_host_.resize(total_features);\n  SplitEntry<GradientSumT>* best_splits = best_splits_device_.Data();\n\n  event = qu_->submit([&](::sycl::handler& cgh) {\n    cgh.depends_on(event);\n    cgh.parallel_for<>(::sycl::nd_range<2>(::sycl::range<2>(total_features, sub_group_size_),\n                                           ::sycl::range<2>(1, sub_group_size_)),\n                       [=](::sycl::nd_item<2> pid) {\n                         int i = pid.get_global_id(0);\n                         auto sg = pid.get_sub_group();\n                         int nid = split_queries_device[i].nid;\n                         int fid = split_queries_device[i].fid;\n                         const GradientPairT* hist_data = split_queries_device[i].hist;\n\n                         best_splits[i] = snode[nid].best;\n                         EnumerateSplit(sg, cut_ptr, cut_val, hist_data, snode[nid],\n                                        &(best_splits[i]), fid, nid, evaluator, min_child_weight);\n                       });\n  });\n  event = qu_->memcpy(best_splits_host_.data(), best_splits,\n                      total_features * sizeof(SplitEntry<GradientSumT>), event);\n\n  qu_->wait();\n  for (size_t i = 0; i < total_features; i++) {\n    int nid = split_queries_host_[i].nid;\n    snode_host_[nid].best.Update(best_splits_host_[i]);\n  }\n\n  builder_monitor_.Stop(\"EvaluateSplits\");\n}\n\n// Enumerate the split values of specific feature.\n// Returns the sum of gradients corresponding to the data points that contains a non-missing value\n// for the particular feature fid.\ntemplate <typename GradientSumT>\nvoid HistUpdater<GradientSumT>::EnumerateSplit(\n    const ::sycl::sub_group& sg, const uint32_t* cut_ptr, const bst_float* cut_val,\n    const GradientPairT* hist_data, const NodeEntry<GradientSumT>& snode,\n    SplitEntry<GradientSumT>* p_best, bst_uint fid, bst_uint nodeID,\n    typename TreeEvaluator<GradientSumT>::SplitEvaluator const& evaluator, float min_child_weight) {\n  SplitEntry<GradientSumT> best;\n\n  int32_t ibegin = static_cast<int32_t>(cut_ptr[fid]);\n  int32_t iend = static_cast<int32_t>(cut_ptr[fid + 1]);\n\n  GradStats<GradientSumT> sum(0, 0);\n\n  int32_t sub_group_size = sg.get_local_range().size();\n  const size_t local_id = sg.get_local_id()[0];\n\n  /* TODO(razdoburdin)\n   * Currently the first additions are fast and the last are slow.\n   * Maybe calculating of reduce overgroup in seprate kernel and reusing it here can be faster\n   */\n  for (int32_t i = ibegin + local_id; i < iend; i += sub_group_size) {\n    sum.Add(::sycl::inclusive_scan_over_group(sg, hist_data[i].GetGrad(), std::plus<>()),\n            ::sycl::inclusive_scan_over_group(sg, hist_data[i].GetHess(), std::plus<>()));\n\n    if (sum.GetHess() >= min_child_weight) {\n      GradStats<GradientSumT> c = snode.stats - sum;\n      if (c.GetHess() >= min_child_weight) {\n        bst_float loss_chg = evaluator.CalcSplitGain(nodeID, fid, sum, c) - snode.root_gain;\n        bst_float split_pt = cut_val[i];\n        best.Update(loss_chg, fid, split_pt, false, sum, c);\n      }\n    }\n\n    const bool last_iter = i + sub_group_size >= iend;\n    if (!last_iter) {\n      size_t end = i - local_id + sub_group_size;\n      if (end > iend) end = iend;\n      for (size_t j = i + 1; j < end; ++j) {\n        sum.Add(hist_data[j].GetGrad(), hist_data[j].GetHess());\n      }\n    }\n  }\n\n  bst_float total_loss_chg = ::sycl::reduce_over_group(sg, best.loss_chg, maximum<>());\n  bst_feature_t total_split_index = ::sycl::reduce_over_group(\n      sg, best.loss_chg == total_loss_chg ? best.SplitIndex() : (1U << 31) - 1U, minimum<>());\n  if (best.loss_chg == total_loss_chg && best.SplitIndex() == total_split_index)\n    p_best->Update(best);\n}\n\ntemplate class HistUpdater<float>;\ntemplate class HistUpdater<double>;\n\n}  // namespace tree\n}  // namespace sycl\n}  // namespace xgboost\n"
  },
  {
    "path": "plugin/sycl/tree/hist_updater.h",
    "content": "/*!\n * Copyright 2017-2026, XGBoost Contributors\n * \\file hist_updater.h\n */\n#ifndef PLUGIN_SYCL_TREE_HIST_UPDATER_H_\n#define PLUGIN_SYCL_TREE_HIST_UPDATER_H_\n\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wtautological-constant-compare\"\n#pragma GCC diagnostic ignored \"-W#pragma-messages\"\n#include <xgboost/linalg.h>  // for MatrixView\n#include <xgboost/tree_updater.h>\n#pragma GCC diagnostic pop\n\n#include <memory>\n#include <queue>\n#include <utility>\n#include <vector>\n\n#include \"../../src/common/random.h\"\n#include \"../common/partition_builder.h\"\n#include \"../data.h\"\n#include \"hist_dispatcher.h\"\n#include \"hist_row_adder.h\"\n#include \"hist_synchronizer.h\"\n#include \"split_evaluator.h\"\n\nnamespace xgboost {\nnamespace sycl {\nnamespace tree {\n\n// data structure\ntemplate <typename GradType>\nstruct NodeEntry {\n  /*! \\brief statics for node entry */\n  GradStats<GradType> stats;\n  /*! \\brief loss of this node, without split */\n  GradType root_gain;\n  /*! \\brief weight calculated related to current data */\n  GradType weight;\n  /*! \\brief current best solution */\n  SplitEntry<GradType> best;\n  // constructor\n  explicit NodeEntry(const xgboost::tree::TrainParam& param) : root_gain(0.0f), weight(0.0f) {}\n};\n\ntemplate <typename GradientSumT>\nclass HistUpdater {\n public:\n  template <MemoryType memory_type = MemoryType::shared>\n  using GHistRowT = common::GHistRow<GradientSumT, memory_type>;\n  using GradientPairT = xgboost::detail::GradientPairInternal<GradientSumT>;\n\n  explicit HistUpdater(const Context* ctx, ::sycl::queue* qu,\n                       const xgboost::tree::TrainParam& param,\n                       FeatureInteractionConstraintHost int_constraints_, DMatrix const* fmat)\n      : ctx_(ctx),\n        qu_(qu),\n        device_properties_(qu->get_device()),\n        param_(param),\n        column_sampler_{std::make_shared<xgboost::common::ColumnSampler>()},\n        tree_evaluator_(qu, param, fmat->Info().num_col_),\n        interaction_constraints_{std::move(int_constraints_)},\n        p_last_tree_(nullptr),\n        p_last_fmat_(fmat) {\n    builder_monitor_.Init(\"SYCL::Quantile::HistUpdater\");\n    kernel_monitor_.Init(\"SYCL::Quantile::HistUpdater\");\n    if (param.max_depth > 0) {\n      snode_device_.Resize(qu, 1u << (param.max_depth + 1));\n    }\n    has_fp64_support_ = qu_->get_device().has(::sycl::aspect::fp64);\n    const auto sub_group_sizes =\n        qu_->get_device().get_info<::sycl::info::device::sub_group_sizes>();\n    sub_group_size_ = sub_group_sizes.back();\n  }\n\n  // update one tree, growing\n  void Update(xgboost::tree::TrainParam const* param, const common::GHistIndexMatrix& gmat,\n              const HostDeviceVector<GradientPair>& gpair, DMatrix* p_fmat,\n              xgboost::common::Span<HostDeviceVector<bst_node_t>> out_position, RegTree* p_tree);\n\n  bool UpdatePredictionCache(const DMatrix* data, ::xgboost::linalg::MatrixView<float> p_out_preds);\n\n  void SetHistSynchronizer(HistSynchronizer<GradientSumT>* sync);\n  void SetHistRowsAdder(HistRowsAdder<GradientSumT>* adder);\n\n protected:\n  friend class BatchHistSynchronizer<GradientSumT>;\n  friend class DistributedHistSynchronizer<GradientSumT>;\n\n  friend class BatchHistRowsAdder<GradientSumT>;\n  friend class DistributedHistRowsAdder<GradientSumT>;\n\n  struct SplitQuery {\n    bst_node_t nid;\n    size_t fid;\n    const GradientPairT* hist;\n  };\n\n  void InitSampling(const HostDeviceVector<GradientPair>& gpair,\n                    USMVector<size_t, MemoryType::on_device>* row_indices);\n\n  void EvaluateSplits(const std::vector<ExpandEntry>& nodes_set,\n                      const common::GHistIndexMatrix& gmat, const RegTree& tree);\n\n  // Enumerate the split values of specific feature\n  // Returns the sum of gradients corresponding to the data points that contains a non-missing\n  // value for the particular feature fid.\n  static void EnumerateSplit(const ::sycl::sub_group& sg, const uint32_t* cut_ptr,\n                             const bst_float* cut_val, const GradientPairT* hist_data,\n                             const NodeEntry<GradientSumT>& snode, SplitEntry<GradientSumT>* p_best,\n                             bst_uint fid, bst_uint nodeID,\n                             typename TreeEvaluator<GradientSumT>::SplitEvaluator const& evaluator,\n                             float min_child_weight);\n\n  void ApplySplit(std::vector<ExpandEntry> nodes, const common::GHistIndexMatrix& gmat,\n                  RegTree* p_tree);\n\n  void AddSplitsToRowSet(const std::vector<ExpandEntry>& nodes, RegTree* p_tree);\n\n  void InitData(const common::GHistIndexMatrix& gmat, const HostDeviceVector<GradientPair>& gpair,\n                const DMatrix& fmat, const RegTree& tree);\n\n  inline ::sycl::event BuildHist(const HostDeviceVector<GradientPair>& gpair,\n                                 const common::RowSetCollection::Elem row_indices,\n                                 const common::GHistIndexMatrix& gmat,\n                                 GHistRowT<MemoryType::on_device>* hist,\n                                 GHistRowT<MemoryType::on_device>* hist_buffer,\n                                 ::sycl::event event_priv) {\n    return hist_builder_.BuildHist(gpair, row_indices, gmat, hist, data_layout_ != kSparseData,\n                                   hist_buffer, device_properties_, event_priv);\n  }\n\n  void InitNewNode(int nid, const common::GHistIndexMatrix& gmat,\n                   const HostDeviceVector<GradientPair>& gpair, const RegTree& tree);\n\n  // Split nodes to 2 sets depending on amount of rows in each node\n  // Histograms for small nodes will be built explicitly\n  // Histograms for big nodes will be built by 'Subtraction Trick'\n  void SplitSiblings(const std::vector<ExpandEntry>& nodes,\n                     std::vector<ExpandEntry>* small_siblings,\n                     std::vector<ExpandEntry>* big_siblings, RegTree* p_tree);\n\n  void BuildNodeStats(const common::GHistIndexMatrix& gmat, RegTree* p_tree,\n                      const HostDeviceVector<GradientPair>& gpair);\n\n  void EvaluateAndApplySplits(const common::GHistIndexMatrix& gmat, RegTree* p_tree,\n                              int* num_leaves, int depth,\n                              std::vector<ExpandEntry>* temp_qexpand_depth);\n\n  void AddSplitsToTree(const common::GHistIndexMatrix& gmat, RegTree* p_tree, int* num_leaves,\n                       int depth, std::vector<ExpandEntry>* nodes_for_apply_split,\n                       std::vector<ExpandEntry>* temp_qexpand_depth);\n\n  void ExpandWithDepthWise(const common::GHistIndexMatrix& gmat, RegTree* p_tree,\n                           const HostDeviceVector<GradientPair>& gpair);\n\n  void BuildLocalHistograms(const common::GHistIndexMatrix& gmat, RegTree* p_tree,\n                            const HostDeviceVector<GradientPair>& gpair);\n\n  void BuildHistogramsLossGuide(ExpandEntry entry, const common::GHistIndexMatrix& gmat,\n                                RegTree* p_tree, const HostDeviceVector<GradientPair>& gpair);\n\n  void ExpandWithLossGuide(const common::GHistIndexMatrix& gmat, RegTree* p_tree,\n                           const HostDeviceVector<GradientPair>& gpair);\n\n  void ReduceHists(const std::vector<int>& sync_ids, size_t nbins);\n\n  inline static bool LossGuide(ExpandEntry lhs, ExpandEntry rhs) {\n    if (lhs.GetLossChange() == rhs.GetLossChange()) {\n      return lhs.GetNodeId() > rhs.GetNodeId();  // favor small timestamp\n    } else {\n      return lhs.GetLossChange() < rhs.GetLossChange();  // favor large loss_chg\n    }\n  }\n\n  //  --data fields--\n  const Context* ctx_;\n  ::sycl::queue* qu_;\n  bool has_fp64_support_;\n  size_t sub_group_size_;\n\n  DeviceProperties device_properties_;\n\n  // the internal row sets\n  common::RowSetCollection row_set_collection_;\n\n  const xgboost::tree::TrainParam& param_;\n  std::shared_ptr<xgboost::common::ColumnSampler> column_sampler_;\n\n  std::vector<SplitQuery> split_queries_host_;\n  USMVector<SplitQuery, MemoryType::on_device> split_queries_device_;\n\n  USMVector<SplitEntry<GradientSumT>, MemoryType::on_device> best_splits_device_;\n  std::vector<SplitEntry<GradientSumT>> best_splits_host_;\n\n  TreeEvaluator<GradientSumT> tree_evaluator_;\n  FeatureInteractionConstraintHost interaction_constraints_;\n\n  // back pointers to tree and data matrix\n  const RegTree* p_last_tree_;\n  DMatrix const* const p_last_fmat_;\n\n  using ExpandQueue = std::priority_queue<ExpandEntry, std::vector<ExpandEntry>,\n                                          std::function<bool(ExpandEntry, ExpandEntry)>>;\n\n  std::unique_ptr<ExpandQueue> qexpand_loss_guided_;\n  std::vector<ExpandEntry> qexpand_depth_wise_;\n\n  enum DataLayout { kDenseDataZeroBased, kDenseDataOneBased, kSparseData };\n  DataLayout data_layout_;\n\n  common::GHistBuilder<GradientSumT> hist_builder_;\n  common::ParallelGHistBuilder<GradientSumT> hist_buffer_;\n  /*! \\brief culmulative histogram of gradients. */\n  common::HistCollection<GradientSumT, MemoryType::on_device> hist_;\n  /*! \\brief culmulative local parent histogram of gradients. */\n  common::HistCollection<GradientSumT, MemoryType::on_device> hist_local_worker_;\n\n  /*! \\brief TreeNode Data: statistics for each constructed node */\n  std::vector<NodeEntry<GradientSumT>> snode_host_;\n  USMVector<NodeEntry<GradientSumT>, MemoryType::on_device> snode_device_;\n\n  xgboost::common::Monitor builder_monitor_;\n  xgboost::common::Monitor kernel_monitor_;\n\n  /*! \\brief feature with least # of bins. to be used for dense specialization\n              of InitNewNode() */\n  uint32_t fid_least_bins_;\n\n  uint64_t seed_ = 0;\n\n  common::PartitionBuilder partition_builder_;\n\n  // key is the node id which should be calculated by Subtraction Trick, value is the node which\n  // provides the evidence for substracts\n  std::vector<ExpandEntry> nodes_for_subtraction_trick_;\n  // list of nodes whose histograms would be built explicitly.\n  std::vector<ExpandEntry> nodes_for_explicit_hist_build_;\n\n  std::unique_ptr<HistSynchronizer<GradientSumT>> hist_synchronizer_;\n  std::unique_ptr<HistRowsAdder<GradientSumT>> hist_rows_adder_;\n\n  std::vector<GradientPairT> reduce_buffer_;\n};\n\n}  // namespace tree\n}  // namespace sycl\n}  // namespace xgboost\n\n#endif  // PLUGIN_SYCL_TREE_HIST_UPDATER_H_\n"
  },
  {
    "path": "plugin/sycl/tree/param.h",
    "content": "/*!\n * Copyright 2014-2024 by Contributors\n */\n#ifndef PLUGIN_SYCL_TREE_PARAM_H_\n#define PLUGIN_SYCL_TREE_PARAM_H_\n\n\n#include <cmath>\n#include <cstring>\n#include <limits>\n#include <string>\n#include <vector>\n\n\n#include \"xgboost/parameter.h\"\n#include \"xgboost/data.h\"\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wtautological-constant-compare\"\n#include \"../src/tree/param.h\"\n#pragma GCC diagnostic pop\n\n#include <sycl/sycl.hpp>\n\nnamespace xgboost {\nnamespace sycl {\nnamespace tree {\n\n\n/*! \\brief Wrapper for necessary training parameters for regression tree to access on device */\n/* The original structure xgboost::tree::TrainParam can't be used,\n * since std::vector are not copyable on sycl-devices.\n */\nstruct TrainParam {\n  float min_child_weight;\n  float reg_lambda;\n  float reg_alpha;\n  float max_delta_step;\n\n  TrainParam() {}\n\n  explicit TrainParam(const xgboost::tree::TrainParam& param) {\n    reg_lambda = param.reg_lambda;\n    reg_alpha = param.reg_alpha;\n    min_child_weight = param.min_child_weight;\n    max_delta_step = param.max_delta_step;\n  }\n};\n\ntemplate <typename GradType>\nusing GradStats = xgboost::detail::GradientPairInternal<GradType>;\n\n/*!\n * \\brief SYCL implementation of SplitEntryContainer for device compilation.\n *        Original structure cannot be used due 'cat_bits' field of type std::vector<uint32_t>,\n *        which is not device-copyable\n */\ntemplate<typename GradientT>\nstruct SplitEntryContainer {\n  /*! \\brief loss change after split this node */\n  bst_float loss_chg {0.0f};\n  /*! \\brief split index */\n  bst_feature_t sindex{0};\n  bst_float split_value{0.0f};\n\n\n  GradientT left_sum;\n  GradientT right_sum;\n\n\n  SplitEntryContainer() = default;\n\n\n  friend std::ostream& operator<<(std::ostream& os, SplitEntryContainer const& s) {\n    os << \"loss_chg: \" << s.loss_chg << \", \"\n       << \"split index: \" << s.SplitIndex() << \", \"\n       << \"split value: \" << s.split_value << \", \"\n       << \"left_sum: \" << s.left_sum << \", \"\n       << \"right_sum: \" << s.right_sum;\n    return os;\n  }\n  /*!\\return feature index to split on */\n  bst_feature_t SplitIndex() const { return sindex & ((1U << 31) - 1U); }\n  /*!\\return whether missing value goes to left branch */\n  bool DefaultLeft() const { return (sindex >> 31) != 0; }\n  /*!\n   * \\brief decides whether we can replace current entry with the given statistics\n   *\n   *   This function gives better priority to lower index when loss_chg == new_loss_chg.\n   *   Not the best way, but helps to give consistent result during multi-thread\n   *   execution.\n   *\n   * \\param new_loss_chg the loss reduction get through the split\n   * \\param split_index the feature index where the split is on\n   */\n  inline bool NeedReplace(bst_float new_loss_chg, unsigned split_index) const {\n    if (::sycl::isinf(new_loss_chg)) {  // in some cases new_loss_chg can be NaN or Inf,\n                                      // for example when lambda = 0 & min_child_weight = 0\n                                      // skip value in this case\n      return false;\n    } else if (this->SplitIndex() <= split_index) {\n      return new_loss_chg > this->loss_chg;\n    } else {\n      return !(this->loss_chg > new_loss_chg);\n    }\n  }\n  /*!\n   * \\brief update the split entry, replace it if e is better\n   * \\param e candidate split solution\n   * \\return whether the proposed split is better and can replace current split\n   */\n  inline bool Update(const SplitEntryContainer &e) {\n    if (this->NeedReplace(e.loss_chg, e.SplitIndex())) {\n      this->loss_chg = e.loss_chg;\n      this->sindex = e.sindex;\n      this->split_value = e.split_value;\n      this->left_sum = e.left_sum;\n      this->right_sum = e.right_sum;\n      return true;\n    } else {\n      return false;\n    }\n  }\n  /*!\n   * \\brief update the split entry, replace it if e is better\n   * \\param new_loss_chg loss reduction of new candidate\n   * \\param split_index feature index to split on\n   * \\param new_split_value the split point\n   * \\param default_left whether the missing value goes to left\n   * \\return whether the proposed split is better and can replace current split\n   */\n  bool Update(bst_float new_loss_chg, unsigned split_index,\n              bst_float new_split_value, bool default_left,\n              const GradientT &left_sum,\n              const GradientT &right_sum) {\n    if (this->NeedReplace(new_loss_chg, split_index)) {\n      this->loss_chg = new_loss_chg;\n      if (default_left) {\n        split_index |= (1U << 31);\n      }\n      this->sindex = split_index;\n      this->split_value = new_split_value;\n      this->left_sum = left_sum;\n      this->right_sum = right_sum;\n      return true;\n    } else {\n      return false;\n    }\n  }\n\n\n  /*! \\brief same as update, used by AllReduce*/\n  inline static void Reduce(SplitEntryContainer &dst,         // NOLINT(*)\n                            const SplitEntryContainer &src) { // NOLINT(*)\n    dst.Update(src);\n  }\n};\n\ntemplate<typename GradType>\nusing SplitEntry = SplitEntryContainer<GradStats<GradType>>;\n\n}  // namespace tree\n}  // namespace sycl\n}  // namespace xgboost\n#endif  // PLUGIN_SYCL_TREE_PARAM_H_\n"
  },
  {
    "path": "plugin/sycl/tree/split_evaluator.h",
    "content": "/*!\n * Copyright 2018-2024 by Contributors\n */\n\n#ifndef PLUGIN_SYCL_TREE_SPLIT_EVALUATOR_H_\n#define PLUGIN_SYCL_TREE_SPLIT_EVALUATOR_H_\n\n#include <dmlc/registry.h>\n#include <xgboost/base.h>\n#include <utility>\n#include <vector>\n#include <limits>\n\n#include \"param.h\"\n#include \"../data.h\"\n\n#include \"xgboost/tree_model.h\"\n#include \"xgboost/host_device_vector.h\"\n#include \"xgboost/context.h\"\n#include \"../../src/common/transform.h\"\n#include \"../../src/common/math.h\"\n#include \"../../src/tree/param.h\"\n\n#include <sycl/sycl.hpp>\n\nnamespace xgboost {\nnamespace sycl {\nnamespace tree {\n\n/*! \\brief SYCL implementation of TreeEvaluator, with USM memory for temporary buffer to access on device.\n *         It also contains own implementation of SplitEvaluator for device compilation, because some of the\n           functions from the original SplitEvaluator are currently not supported\n */\n\ntemplate<typename GradType>\nclass TreeEvaluator {\n  // hist and exact use parent id to calculate constraints.\n  static constexpr bst_node_t kRootParentId =\n      (-1 & static_cast<bst_node_t>((1U << 31) - 1));\n\n  USMVector<GradType> lower_bounds_;\n  USMVector<GradType> upper_bounds_;\n  USMVector<int> monotone_;\n  TrainParam param_;\n  ::sycl::queue* qu_;\n  bool has_constraint_;\n\n public:\n  void Reset(::sycl::queue* qu, xgboost::tree::TrainParam const& p, bst_feature_t n_features) {\n    qu_ = qu;\n\n    has_constraint_ = false;\n    for (const auto& constraint : p.monotone_constraints) {\n      if (constraint != 0) {\n        has_constraint_ = true;\n        break;\n      }\n    }\n\n    if (has_constraint_) {\n      monotone_.Resize(qu_, n_features, 0);\n      qu_->memcpy(monotone_.Data(), p.monotone_constraints.data(),\n                 sizeof(int) * p.monotone_constraints.size());\n      qu_->wait();\n\n      lower_bounds_.Resize(qu_, p.MaxNodes(), std::numeric_limits<GradType>::lowest());\n      upper_bounds_.Resize(qu_, p.MaxNodes(), std::numeric_limits<GradType>::max());\n    }\n    param_ = TrainParam(p);\n  }\n\n  bool HasConstraint() const {\n    return has_constraint_;\n  }\n\n  TreeEvaluator(::sycl::queue* qu, xgboost::tree::TrainParam const& p, bst_feature_t n_features) {\n    Reset(qu, p, n_features);\n  }\n\n  struct SplitEvaluator {\n    const int* constraints;\n    const GradType* lower;\n    const GradType* upper;\n    bool has_constraint;\n    TrainParam param;\n\n    GradType CalcSplitGain(bst_node_t nidx,\n                        bst_feature_t fidx,\n                        const GradStats<GradType>& left,\n                        const GradStats<GradType>& right) const {\n      const GradType negative_infinity = -std::numeric_limits<GradType>::infinity();\n      GradType wleft = this->CalcWeight(nidx, left);\n      GradType wright = this->CalcWeight(nidx, right);\n\n      GradType gain = this->CalcGainGivenWeight(nidx, left,  wleft) +\n                      this->CalcGainGivenWeight(nidx, right, wright);\n      if (!has_constraint) {\n        return gain;\n      }\n\n      int constraint = constraints[fidx];\n      if (constraint == 0) {\n        return gain;\n      } else if (constraint > 0) {\n        return wleft <= wright ? gain : negative_infinity;\n      } else {\n        return wleft >= wright ? gain : negative_infinity;\n      }\n    }\n\n    inline static GradType ThresholdL1(GradType w, float alpha) {\n      if (w > + alpha) {\n        return w - alpha;\n      }\n      if (w < - alpha) {\n        return w + alpha;\n      }\n      return 0.0;\n    }\n\n    inline GradType CalcWeight(GradType sum_grad, GradType sum_hess) const {\n      if (sum_hess < param.min_child_weight || sum_hess <= 0.0) {\n        return 0.0;\n      }\n      GradType dw = -this->ThresholdL1(sum_grad, param.reg_alpha) / (sum_hess + param.reg_lambda);\n      if (param.max_delta_step != 0.0f && std::abs(dw) > param.max_delta_step) {\n        dw = ::sycl::copysign((GradType)param.max_delta_step, dw);\n      }\n      return dw;\n    }\n\n    inline GradType CalcWeight(bst_node_t nodeid, const GradStats<GradType>& stats) const {\n      GradType w = this->CalcWeight(stats.GetGrad(), stats.GetHess());\n      if (!has_constraint) {\n        return w;\n      }\n\n      if (nodeid == kRootParentId) {\n        return w;\n      } else if (w < lower[nodeid]) {\n        return lower[nodeid];\n      } else if (w > upper[nodeid]) {\n        return upper[nodeid];\n      } else {\n        return w;\n      }\n    }\n\n    inline GradType CalcGainGivenWeight(GradType sum_grad, GradType sum_hess, GradType w) const {\n      return -(2.0f * sum_grad * w + (sum_hess + param.reg_lambda) * xgboost::common::Sqr(w));\n    }\n\n    inline GradType CalcGainGivenWeight(bst_node_t nid, const GradStats<GradType>& stats,\n                                        GradType w) const {\n      if (stats.GetHess() <= 0) {\n        return .0f;\n      }\n      // Avoiding tree::CalcGainGivenWeight can significantly reduce avg floating point error.\n      if (param.max_delta_step == 0.0f && has_constraint == false) {\n        return xgboost::common::Sqr(this->ThresholdL1(stats.GetGrad(), param.reg_alpha)) /\n               (stats.GetHess() + param.reg_lambda);\n      }\n      return this->CalcGainGivenWeight(stats.GetGrad(), stats.GetHess(), w);\n    }\n\n    GradType CalcGain(bst_node_t nid, const GradStats<GradType>& stats) const {\n      return this->CalcGainGivenWeight(nid, stats, this->CalcWeight(nid, stats));\n    }\n  };\n\n public:\n  /* Get a view to the evaluator that can be passed down to device. */\n  auto GetEvaluator() const {\n    return SplitEvaluator{monotone_.DataConst(),\n                          lower_bounds_.DataConst(),\n                          upper_bounds_.DataConst(),\n                          has_constraint_,\n                          param_};\n  }\n\n  void AddSplit(bst_node_t nodeid, bst_node_t leftid, bst_node_t rightid,\n                bst_feature_t f, GradType left_weight, GradType right_weight) {\n    if (!has_constraint_) {\n      return;\n    }\n\n    lower_bounds_[leftid] = lower_bounds_[nodeid];\n    upper_bounds_[leftid] = upper_bounds_[nodeid];\n\n    lower_bounds_[rightid] = lower_bounds_[nodeid];\n    upper_bounds_[rightid] = upper_bounds_[nodeid];\n    int32_t c = monotone_[f];\n    GradType mid = (left_weight + right_weight) / 2;\n\n    if (c < 0) {\n      lower_bounds_[leftid] = mid;\n      upper_bounds_[rightid] = mid;\n    } else if (c > 0) {\n      upper_bounds_[leftid] = mid;\n      lower_bounds_[rightid] = mid;\n    }\n  }\n};\n}  // namespace tree\n}  // namespace sycl\n}  // namespace xgboost\n\n#endif  // PLUGIN_SYCL_TREE_SPLIT_EVALUATOR_H_\n"
  },
  {
    "path": "plugin/sycl/tree/updater_quantile_hist.cc",
    "content": "/*!\n * Copyright 2017-2024 by Contributors\n * \\file updater_quantile_hist.cc\n */\n#include <vector>\n#include <memory>\n\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wtautological-constant-compare\"\n#pragma GCC diagnostic ignored \"-W#pragma-messages\"\n#include \"xgboost/gradient.h\"  // for GradientContainer\n#include \"xgboost/tree_updater.h\"\n#pragma GCC diagnostic pop\n\n#include \"../../src/collective/communicator-inl.h\"  // for IsDistributed\n#include \"../data.h\"\n#include \"updater_quantile_hist.h\"\n#include \"xgboost/logging.h\"\n\nnamespace xgboost {\nnamespace sycl {\nnamespace tree {\n\nDMLC_REGISTRY_FILE_TAG(updater_quantile_hist_sycl);\n\nDMLC_REGISTER_PARAMETER(HistMakerTrainParam);\n\nvoid QuantileHistMaker::Configure(const Args& args) {\n  const DeviceOrd device_spec = ctx_->Device();\n  qu_ = device_manager.GetQueue(device_spec);\n\n  param_.UpdateAllowUnknown(args);\n  hist_maker_param_.UpdateAllowUnknown(args);\n\n  bool has_fp64_support = qu_->get_device().has(::sycl::aspect::fp64);\n  if (hist_maker_param_.single_precision_histogram || !has_fp64_support) {\n    if (!hist_maker_param_.single_precision_histogram) {\n      LOG(WARNING) << \"Target device doesn't support fp64, using single_precision_histogram=True\";\n    }\n    hist_precision_ = HistPrecision::fp32;\n  } else {\n    hist_precision_ = HistPrecision::fp64;\n  }\n}\n\ntemplate<typename GradientSumT>\nvoid QuantileHistMaker::SetPimpl(std::unique_ptr<HistUpdater<GradientSumT>>* pimpl,\n                                 DMatrix *dmat) {\n  pimpl->reset(new HistUpdater<GradientSumT>(\n                ctx_,\n                qu_,\n                param_,\n                int_constraint_, dmat));\n  if (collective::IsDistributed()) {\n    (*pimpl)->SetHistSynchronizer(new DistributedHistSynchronizer<GradientSumT>());\n    (*pimpl)->SetHistRowsAdder(new DistributedHistRowsAdder<GradientSumT>());\n  } else {\n    (*pimpl)->SetHistSynchronizer(new BatchHistSynchronizer<GradientSumT>());\n    (*pimpl)->SetHistRowsAdder(new BatchHistRowsAdder<GradientSumT>());\n  }\n}\n\ntemplate <typename GradientSumT>\nvoid QuantileHistMaker::CallUpdate(const std::unique_ptr<HistUpdater<GradientSumT>> &pimpl,\n                                   xgboost::tree::TrainParam const *param,\n                                   ::xgboost::linalg::Matrix<GradientPair> *gpair, DMatrix *dmat,\n                                   xgboost::common::Span<HostDeviceVector<bst_node_t>> out_position,\n                                   const std::vector<RegTree *> &trees) {\n  for (auto tree : trees) {\n    pimpl->Update(param, gmat_, *(gpair->Data()), dmat, out_position, tree);\n  }\n}\n\nvoid QuantileHistMaker::Update(xgboost::tree::TrainParam const *param, GradientContainer *in_gpair,\n                               DMatrix *dmat,\n                               xgboost::common::Span<HostDeviceVector<bst_node_t>> out_position,\n                               const std::vector<RegTree *> &trees) {\n  auto gpair = in_gpair->FullGradOnly();\n  gpair->Data()->SetDevice(ctx_->Device());\n  if (dmat != p_last_dmat_ || is_gmat_initialized_ == false) {\n    updater_monitor_.Start(\"GmatInitialization\");\n    gmat_.Init(qu_, ctx_, dmat, static_cast<uint32_t>(param_.max_bin));\n    updater_monitor_.Stop(\"GmatInitialization\");\n    is_gmat_initialized_ = true;\n  }\n  // rescale learning rate according to size of trees\n  float lr = param_.learning_rate;\n  param_.learning_rate = lr / trees.size();\n  int_constraint_.Configure(param_, dmat->Info().num_col_);\n  // build tree\n  if (hist_precision_ == HistPrecision::fp32) {\n    if (!pimpl_fp32) {\n      SetPimpl(&pimpl_fp32, dmat);\n    }\n    CallUpdate(pimpl_fp32, param, gpair, dmat, out_position, trees);\n  } else {\n    if (!pimpl_fp64) {\n      SetPimpl(&pimpl_fp64, dmat);\n    }\n    CallUpdate(pimpl_fp64, param, gpair, dmat, out_position, trees);\n  }\n\n  param_.learning_rate = lr;\n\n  p_last_dmat_ = dmat;\n}\n\nbool QuantileHistMaker::UpdatePredictionCache(const DMatrix *data,\n                                              xgboost::common::Span<HostDeviceVector<bst_node_t>>,\n                                              ::xgboost::linalg::MatrixView<float> out_preds) {\n  if (param_.subsample < 1.0f) return false;\n\n  if (hist_precision_ == HistPrecision::fp32) {\n    if (pimpl_fp32) {\n      return pimpl_fp32->UpdatePredictionCache(data, out_preds);\n    } else {\n      return false;\n    }\n  } else {\n    if (pimpl_fp64) {\n      return pimpl_fp64->UpdatePredictionCache(data, out_preds);\n    } else {\n      return false;\n    }\n  }\n}\n\nXGBOOST_REGISTER_TREE_UPDATER(QuantileHistMaker, \"grow_quantile_histmaker_sycl\")\n.describe(\"Grow tree using quantized histogram with SYCL.\")\n.set_body(\n    [](Context const* ctx, ObjInfo const * task) {\n      return new QuantileHistMaker(ctx, task);\n    });\n}  // namespace tree\n}  // namespace sycl\n}  // namespace xgboost\n"
  },
  {
    "path": "plugin/sycl/tree/updater_quantile_hist.h",
    "content": "/**\n * Copyright 2017-2025, XGBoost Contributors\n * \\file updater_quantile_hist.h\n */\n#ifndef PLUGIN_SYCL_TREE_UPDATER_QUANTILE_HIST_H_\n#define PLUGIN_SYCL_TREE_UPDATER_QUANTILE_HIST_H_\n\n#include <dmlc/timer.h>\n#include <xgboost/tree_updater.h>\n\n#include <memory>\n#include <vector>\n\n#include \"../../src/common/random.h\"\n#include \"../../src/tree/constraints.h\"\n#include \"../common/hist_util.h\"\n#include \"../common/partition_builder.h\"\n#include \"../common/row_set.h\"\n#include \"../data/gradient_index.h\"\n#include \"../device_manager.h\"\n#include \"hist_updater.h\"\n#include \"split_evaluator.h\"\n#include \"xgboost/data.h\"\n#include \"xgboost/gradient.h\"  // for GradientContainer\n#include \"xgboost/json.h\"\n\nnamespace xgboost {\nnamespace sycl {\nnamespace tree {\n\n// training parameters specific to this algorithm\nstruct HistMakerTrainParam\n    : public XGBoostParameter<HistMakerTrainParam> {\n  bool single_precision_histogram = false;\n  // declare parameters\n  DMLC_DECLARE_PARAMETER(HistMakerTrainParam) {\n    DMLC_DECLARE_FIELD(single_precision_histogram).set_default(false).describe(\n        \"Use single precision to build histograms.\");\n  }\n};\n\n/*! \\brief construct a tree using quantized feature values with SYCL backend*/\nclass QuantileHistMaker: public TreeUpdater {\n public:\n  QuantileHistMaker(Context const* ctx, ObjInfo const * task) :\n                             TreeUpdater(ctx), task_{task} {\n    updater_monitor_.Init(\"SYCLQuantileHistMaker\");\n  }\n  void Configure(const Args& args) override;\n\n  void Update(xgboost::tree::TrainParam const* param, GradientContainer* in_gpair, DMatrix* dmat,\n              xgboost::common::Span<HostDeviceVector<bst_node_t>> out_position,\n              const std::vector<RegTree*>& trees) override;\n\n  bool UpdatePredictionCache(const DMatrix* data,\n                             xgboost::common::Span<HostDeviceVector<bst_node_t>>,\n                             ::xgboost::linalg::MatrixView<float> out_preds) override;\n\n  void LoadConfig(Json const& in) override {\n    auto const& config = get<Object const>(in);\n    FromJson(config.at(\"train_param\"), &this->param_);\n    FromJson(config.at(\"sycl_hist_train_param\"), &this->hist_maker_param_);\n  }\n\n  void SaveConfig(Json* p_out) const override {\n    auto& out = *p_out;\n    out[\"train_param\"] = ToJson(param_);\n    out[\"sycl_hist_train_param\"] = ToJson(hist_maker_param_);\n  }\n\n  char const* Name() const override {\n    return \"grow_quantile_histmaker_sycl\";\n  }\n\n protected:\n  HistMakerTrainParam hist_maker_param_;\n  // training parameter\n  xgboost::tree::TrainParam param_;\n  // quantized data matrix\n  common::GHistIndexMatrix gmat_;\n  // (optional) data matrix with feature grouping\n  // column accessor\n  DMatrix const* p_last_dmat_ {nullptr};\n  bool is_gmat_initialized_ {false};\n\n  xgboost::common::Monitor updater_monitor_;\n\n  template<typename GradientSumT>\n  void SetPimpl(std::unique_ptr<HistUpdater<GradientSumT>>*, DMatrix *dmat);\n\n  template<typename GradientSumT>\n  void CallUpdate(const std::unique_ptr<HistUpdater<GradientSumT>>& builder,\n                  xgboost::tree::TrainParam const *param,\n                  ::xgboost::linalg::Matrix<GradientPair> *gpair,\n                  DMatrix *dmat,\n                  xgboost::common::Span<HostDeviceVector<bst_node_t>> out_position,\n                  const std::vector<RegTree *> &trees);\n\n  enum class HistPrecision {fp32, fp64};\n  HistPrecision hist_precision_;\n\n  std::unique_ptr<HistUpdater<float>> pimpl_fp32;\n  std::unique_ptr<HistUpdater<double>> pimpl_fp64;\n\n  FeatureInteractionConstraintHost int_constraint_;\n\n  ::sycl::queue* qu_;\n  DeviceManager device_manager;\n  ObjInfo const *task_{nullptr};\n};\n\n\n}  // namespace tree\n}  // namespace sycl\n}  // namespace xgboost\n\n#endif  // PLUGIN_SYCL_TREE_UPDATER_QUANTILE_HIST_H_\n"
  },
  {
    "path": "plugin/updater_gpu/README.md",
    "content": "# XGBoost GPU algorithms\n\nGPU algorithms are no longer a plugin and are included in official releases. [See documentation for more details](https://xgboost.readthedocs.io/en/latest/gpu/).\n"
  },
  {
    "path": "python-package/.gitignore",
    "content": "build\ndist\n*.egg*"
  },
  {
    "path": "python-package/README.cpu.rst",
    "content": "=================================\nXGBoost Python Package (CPU only)\n=================================\n\n|PyPI version|\n\nThe ``xgboost-cpu`` package provides for a minimal installation, with no support for the\nGPU algorithms or federated learning. It is provided to allow XGBoost to be installed in a\nspace-constrained environment.\n\nNote. ``xgboost-cpu`` package is only provided for x86_64 (amd64) Linux and Windows\nplatforms.  For other platforms, please install ``xgboost`` from\nhttps://pypi.org/project/xgboost/.\n\nNote. ``xgboost-cpu`` does not provide an sdist (source distribution). You may install\nsdist from https://pypi.org/project/xgboost/.\n\nInstallation\n============\n\nFrom `PyPI <https://pypi.python.org/pypi/xgboost-cpu>`_\n-------------------------------------------------------\n\nFor a stable version, install using ``pip``::\n\n    pip install xgboost-cpu\n\n.. |PyPI version| image:: https://badge.fury.io/py/xgboost-cpu.svg\n   :target: https://badge.fury.io/py/xgboost-cpu\n\nFor building from source, see `build <https://xgboost.readthedocs.io/en/latest/build.html>`_.\n"
  },
  {
    "path": "python-package/README.dft.rst",
    "content": "======================\nXGBoost Python Package\n======================\n\n|PyPI version|\n\nInstallation\n============\n\nFrom `PyPI <https://pypi.python.org/pypi/xgboost>`_\n---------------------------------------------------\n\nFor a stable version, install using ``pip``::\n\n    pip install xgboost\n\n.. |PyPI version| image:: https://badge.fury.io/py/xgboost.svg\n   :target: http://badge.fury.io/py/xgboost\n\nFor building from source, see `build <https://xgboost.readthedocs.io/en/latest/build.html>`_.\n"
  },
  {
    "path": "python-package/README.rst",
    "content": "======================\nXGBoost Python Package\n======================\n\n|PyPI version|\n\nInstallation\n============\n\nFrom `PyPI <https://pypi.python.org/pypi/xgboost>`_\n---------------------------------------------------\n\nFor a stable version, install using ``pip``::\n\n    pip install xgboost\n\n.. |PyPI version| image:: https://badge.fury.io/py/xgboost.svg\n   :target: http://badge.fury.io/py/xgboost\n\nFor building from source, see `build <https://xgboost.readthedocs.io/en/latest/build.html>`_.\n"
  },
  {
    "path": "python-package/README.stub.rst",
    "content": "======================================\nPlaceholder for XGBoost Python Package\n======================================\n\nThis package is a placeholder for the `xgboost` package.\n"
  },
  {
    "path": "python-package/hatch_build.py",
    "content": "\"\"\"\nCustom hook to customize the behavior of Hatchling.\nHere, we customize the tag of the generated wheels.\n\"\"\"\n\nfrom typing import Any, Dict\n\nfrom hatchling.builders.hooks.plugin.interface import BuildHookInterface\nfrom packaging.tags import platform_tags\n\n\ndef get_tag() -> str:\n    \"\"\"Get appropriate wheel tag according to system\"\"\"\n    platform_tag = next(platform_tags())\n    return f\"py3-none-{platform_tag}\"\n\n\nclass CustomBuildHook(BuildHookInterface):\n    \"\"\"A custom build hook\"\"\"\n\n    # pylint: disable=unused-argument\n    def initialize(self, version: str, build_data: Dict[str, Any]) -> None:\n        \"\"\"This step ccurs immediately before each build.\"\"\"\n        build_data[\"tag\"] = get_tag()\n"
  },
  {
    "path": "python-package/packager/__init__.py",
    "content": ""
  },
  {
    "path": "python-package/packager/build_config.py",
    "content": "\"\"\"Build configuration\"\"\"\n\nimport dataclasses\nfrom typing import Any, Dict, List, Optional\n\n\n@dataclasses.dataclass\nclass BuildConfiguration:  # pylint: disable=R0902\n    \"\"\"Configurations use when building libxgboost\"\"\"\n\n    # Whether to hide C++ symbols in libxgboost.so\n    hide_cxx_symbols: bool = True\n    # Whether to enable OpenMP\n    use_openmp: bool = True\n    # Whether to enable CUDA\n    use_cuda: bool = False\n    # Whether to enable NCCL\n    use_nccl: bool = False\n    # Whether to load nccl dynamically\n    use_dlopen_nccl: bool = False\n    # Whether to enable federated learning\n    plugin_federated: bool = False\n    # Whether to enable rmm support\n    plugin_rmm: bool = False\n    # Special option: See explanation below\n    use_system_libxgboost: bool = False\n\n    def _set_config_setting(self, config_settings: Dict[str, Any]) -> None:\n        for field_name in config_settings:\n            setattr(\n                self,\n                field_name,\n                (config_settings[field_name].lower() in [\"true\", \"1\", \"on\"]),\n            )\n\n    def update(self, config_settings: Optional[Dict[str, Any]]) -> None:\n        \"\"\"Parse config_settings from Pip (or other PEP 517 frontend)\"\"\"\n        if config_settings is not None:\n            self._set_config_setting(config_settings)\n\n    def get_cmake_args(self) -> List[str]:\n        \"\"\"Convert build configuration to CMake args\"\"\"\n        cmake_args = []\n        for field_name in [x.name for x in dataclasses.fields(self)]:\n            if field_name in [\"use_system_libxgboost\"]:\n                continue\n            cmake_option = field_name.upper()\n            cmake_value = \"ON\" if getattr(self, field_name) is True else \"OFF\"\n            cmake_args.append(f\"-D{cmake_option}={cmake_value}\")\n        return cmake_args\n"
  },
  {
    "path": "python-package/packager/nativelib.py",
    "content": "\"\"\"\nFunctions for building libxgboost\n\"\"\"\n\nimport logging\nimport os\nimport pathlib\nimport shutil\nimport subprocess\nimport sys\nfrom platform import system\nfrom typing import Optional\n\nfrom .build_config import BuildConfiguration\n\n\ndef _lib_name() -> str:\n    \"\"\"Return platform dependent shared object name.\"\"\"\n    if system() in [\"Linux\", \"OS400\"] or system().upper().endswith(\"BSD\"):\n        name = \"libxgboost.so\"\n    elif system() == \"Darwin\":\n        name = \"libxgboost.dylib\"\n    elif system() == \"Windows\":\n        name = \"xgboost.dll\"\n    else:\n        raise NotImplementedError(f\"System {system()} not supported\")\n    return name\n\n\ndef build_libxgboost(\n    cpp_src_dir: pathlib.Path,\n    build_dir: pathlib.Path,\n    build_config: BuildConfiguration,\n) -> pathlib.Path:\n    \"\"\"Build libxgboost in a temporary directory and obtain the path to built\n    libxgboost.\n\n    \"\"\"\n    logger = logging.getLogger(\"xgboost.packager.build_libxgboost\")\n\n    if not cpp_src_dir.is_dir():\n        raise RuntimeError(f\"Expected {cpp_src_dir} to be a directory\")\n    logger.info(\n        \"Building %s from the C++ source files in %s...\", _lib_name(), str(cpp_src_dir)\n    )\n\n    def _build(*, generator: str) -> None:\n        cmake_cmd = [\n            \"cmake\",\n            str(cpp_src_dir),\n            generator,\n            \"-DKEEP_BUILD_ARTIFACTS_IN_BINARY_DIR=ON\",\n        ]\n        cmake_cmd.extend(build_config.get_cmake_args())\n\n        logger.info(\"CMake args: %s\", str(cmake_cmd))\n        subprocess.check_call(cmake_cmd, cwd=build_dir)\n\n        if system() == \"Windows\":\n            subprocess.check_call(\n                [\"cmake\", \"--build\", \".\", \"--config\", \"Release\"], cwd=build_dir\n            )\n        else:\n            nproc = os.cpu_count()\n            assert build_tool is not None\n            subprocess.check_call([build_tool, f\"-j{nproc}\"], cwd=build_dir)\n\n    if system() == \"Windows\":\n        supported_generators = (\n            \"-GVisual Studio 17 2022\",\n            \"-GVisual Studio 16 2019\",\n            \"-GVisual Studio 15 2017\",\n            \"-GMinGW Makefiles\",\n        )\n        for generator in supported_generators:\n            try:\n                _build(generator=generator)\n                logger.info(\n                    \"Successfully built %s using generator %s\", _lib_name(), generator\n                )\n                break\n            except subprocess.CalledProcessError as e:\n                logger.info(\n                    \"Tried building with generator %s but failed with exception %s\",\n                    generator,\n                    str(e),\n                )\n                # Empty build directory\n                shutil.rmtree(build_dir)\n                build_dir.mkdir()\n        else:\n            raise RuntimeError(\n                \"None of the supported generators produced a successful build!\"\n                f\"Supported generators: {supported_generators}\"\n            )\n    else:\n        build_tool = \"ninja\" if shutil.which(\"ninja\") else \"make\"\n        generator = \"-GNinja\" if build_tool == \"ninja\" else \"-GUnix Makefiles\"\n        try:\n            _build(generator=generator)\n        except subprocess.CalledProcessError as e:\n            logger.info(\"Failed to build with OpenMP. Exception: %s\", str(e))\n            build_config.use_openmp = False\n            _build(generator=generator)\n\n    return build_dir / \"lib\" / _lib_name()\n\n\ndef locate_local_libxgboost(\n    toplevel_dir: pathlib.Path,\n    logger: logging.Logger,\n) -> Optional[pathlib.Path]:\n    \"\"\"\n    Locate libxgboost from the local project directory's lib/ subdirectory.\n    \"\"\"\n    libxgboost = toplevel_dir.parent / \"lib\" / _lib_name()\n    if libxgboost.exists():\n        logger.info(\"Found %s at %s\", libxgboost.name, str(libxgboost.parent))\n        return libxgboost\n    return None\n\n\ndef locate_or_build_libxgboost(\n    toplevel_dir: pathlib.Path,\n    build_dir: pathlib.Path,\n    build_config: BuildConfiguration,\n) -> pathlib.Path:\n    \"\"\"Locate libxgboost; if not exist, build it\"\"\"\n    logger = logging.getLogger(\"xgboost.packager.locate_or_build_libxgboost\")\n\n    if build_config.use_system_libxgboost:\n        # Find libxgboost from system prefix\n        sys_prefix = pathlib.Path(sys.base_prefix)\n        sys_prefix_candidates = [\n            sys_prefix / \"lib\",\n            # Paths possibly used on Windows\n            sys_prefix / \"bin\",\n            sys_prefix / \"Library\",\n            sys_prefix / \"Library\" / \"bin\",\n            sys_prefix / \"Library\" / \"lib\",\n            sys_prefix / \"Library\" / \"mingw-w64\",\n            sys_prefix / \"Library\" / \"mingw-w64\" / \"bin\",\n            sys_prefix / \"Library\" / \"mingw-w64\" / \"lib\",\n        ]\n        sys_prefix_candidates = [\n            p.expanduser().resolve() for p in sys_prefix_candidates\n        ]\n        for candidate_dir in sys_prefix_candidates:\n            libxgboost_sys = candidate_dir / _lib_name()\n            if libxgboost_sys.exists():\n                logger.info(\"Using system XGBoost: %s\", str(libxgboost_sys))\n                return libxgboost_sys\n        raise RuntimeError(\n            f\"use_system_libxgboost was specified but {_lib_name()} is \"\n            f\"not found. Paths searched (in order): \\n\"\n            + \"\\n\".join([f\"* {str(p)}\" for p in sys_prefix_candidates])\n        )\n\n    libxgboost = locate_local_libxgboost(toplevel_dir, logger=logger)\n    if libxgboost is not None:\n        return libxgboost\n\n    if toplevel_dir.joinpath(\"cpp_src\").exists():\n        # Source distribution; all C++ source files to be found in cpp_src/\n        cpp_src_dir = toplevel_dir.joinpath(\"cpp_src\")\n    else:\n        # Probably running \"pip install .\" from python-package/\n        cpp_src_dir = toplevel_dir.parent\n        if not cpp_src_dir.joinpath(\"CMakeLists.txt\").exists():\n            raise RuntimeError(f\"Did not find CMakeLists.txt from {cpp_src_dir}\")\n    return build_libxgboost(cpp_src_dir, build_dir=build_dir, build_config=build_config)\n"
  },
  {
    "path": "python-package/packager/pep517.py",
    "content": "\"\"\"\nCustom build backend for XGBoost Python package.\nBuilds source distribution and binary wheels, following PEP 517 / PEP 660.\nReuses components of Hatchling (https://github.com/pypa/hatch/tree/master/backend) for the sake\nof brevity.\n\"\"\"\n\nimport dataclasses\nimport logging\nimport os\nimport pathlib\nimport tempfile\nfrom contextlib import contextmanager\nfrom typing import Any, Dict, Iterator, Optional, Union\n\nimport hatchling.build\n\nfrom .build_config import BuildConfiguration\nfrom .nativelib import locate_local_libxgboost, locate_or_build_libxgboost\nfrom .sdist import copy_cpp_src_tree\nfrom .util import copy_with_logging, copytree_with_logging\n\n\n@contextmanager\ndef cd(path: Union[str, pathlib.Path]) -> Iterator[str]:  # pylint: disable=C0103\n    \"\"\"\n    Temporarily change working directory.\n    TODO(hcho3): Remove this once we adopt Python 3.11, which implements contextlib.chdir.\n    \"\"\"\n    path = str(path)\n    path = os.path.realpath(path)\n    cwd = os.getcwd()\n    os.chdir(path)\n    try:\n        yield path\n    finally:\n        os.chdir(cwd)\n\n\nTOPLEVEL_DIR = pathlib.Path(__file__).parent.parent.absolute().resolve()\nlogging.basicConfig(level=logging.INFO)\n\n\n# Aliases\nget_requires_for_build_sdist = hatchling.build.get_requires_for_build_sdist\nget_requires_for_build_wheel = hatchling.build.get_requires_for_build_wheel\nget_requires_for_build_editable = hatchling.build.get_requires_for_build_editable\n\n\ndef build_wheel(\n    wheel_directory: str,\n    config_settings: Optional[Dict[str, Any]] = None,\n    metadata_directory: Optional[str] = None,\n) -> str:\n    \"\"\"Build a wheel\"\"\"\n    logger = logging.getLogger(\"xgboost.packager.build_wheel\")\n\n    build_config = BuildConfiguration()\n    build_config.update(config_settings)\n    logger.info(\"Parsed build configuration: %s\", dataclasses.asdict(build_config))\n\n    # Create tempdir with Python package + libxgboost\n    with tempfile.TemporaryDirectory() as td:\n        td_path = pathlib.Path(td)\n        build_dir = td_path / \"libbuild\"\n        build_dir.mkdir()\n\n        workspace = td_path / \"whl_workspace\"\n        workspace.mkdir()\n        logger.info(\"Copying project files to temporary directory %s\", str(workspace))\n\n        copy_with_logging(TOPLEVEL_DIR / \"pyproject.toml\", workspace, logger=logger)\n        copy_with_logging(TOPLEVEL_DIR / \"hatch_build.py\", workspace, logger=logger)\n        copy_with_logging(TOPLEVEL_DIR / \"README.rst\", workspace, logger=logger)\n\n        pkg_path = workspace / \"xgboost\"\n        copytree_with_logging(TOPLEVEL_DIR / \"xgboost\", pkg_path, logger=logger)\n        lib_path = pkg_path / \"lib\"\n        lib_path.mkdir()\n        libxgboost = locate_or_build_libxgboost(\n            TOPLEVEL_DIR, build_dir=build_dir, build_config=build_config\n        )\n        if not build_config.use_system_libxgboost:\n            copy_with_logging(libxgboost, lib_path, logger=logger)\n\n        with cd(workspace):\n            wheel_name = hatchling.build.build_wheel(\n                wheel_directory, config_settings, metadata_directory\n            )\n    return wheel_name\n\n\ndef build_sdist(\n    sdist_directory: str,\n    config_settings: Optional[Dict[str, Any]] = None,\n) -> str:\n    \"\"\"Build a source distribution\"\"\"\n    logger = logging.getLogger(\"xgboost.packager.build_sdist\")\n\n    if config_settings:\n        raise NotImplementedError(\n            \"XGBoost's custom build backend doesn't support config_settings option \"\n            f\"when building sdist. {config_settings=}\"\n        )\n\n    cpp_src_dir = TOPLEVEL_DIR.parent\n    if not cpp_src_dir.joinpath(\"CMakeLists.txt\").exists():\n        raise RuntimeError(f\"Did not find CMakeLists.txt from {cpp_src_dir}\")\n\n    # Create tempdir with Python package + C++ sources\n    with tempfile.TemporaryDirectory() as td:\n        td_path = pathlib.Path(td)\n\n        workspace = td_path / \"sdist_workspace\"\n        workspace.mkdir()\n        logger.info(\"Copying project files to temporary directory %s\", str(workspace))\n\n        copy_with_logging(TOPLEVEL_DIR / \"pyproject.toml\", workspace, logger=logger)\n        copy_with_logging(TOPLEVEL_DIR / \"hatch_build.py\", workspace, logger=logger)\n        copy_with_logging(TOPLEVEL_DIR / \"README.rst\", workspace, logger=logger)\n\n        copytree_with_logging(\n            TOPLEVEL_DIR / \"xgboost\", workspace / \"xgboost\", logger=logger\n        )\n        copytree_with_logging(\n            TOPLEVEL_DIR / \"packager\", workspace / \"packager\", logger=logger\n        )\n\n        temp_cpp_src_dir = workspace / \"cpp_src\"\n        copy_cpp_src_tree(cpp_src_dir, target_dir=temp_cpp_src_dir, logger=logger)\n\n        with cd(workspace):\n            sdist_name = hatchling.build.build_sdist(sdist_directory, config_settings)\n    return sdist_name\n\n\ndef build_editable(\n    wheel_directory: str,\n    config_settings: Optional[Dict[str, Any]] = None,\n    metadata_directory: Optional[str] = None,\n) -> str:\n    \"\"\"Build an editable installation. We mostly delegate to Hatchling.\"\"\"\n    logger = logging.getLogger(\"xgboost.packager.build_editable\")\n\n    if config_settings:\n        raise NotImplementedError(\n            \"XGBoost's custom build backend doesn't support config_settings option \"\n            f\"when building editable installation. {config_settings=}\"\n        )\n\n    if locate_local_libxgboost(TOPLEVEL_DIR, logger=logger) is None:\n        raise RuntimeError(\n            \"To use the editable installation, first build libxgboost with CMake. \"\n            \"See https://xgboost.readthedocs.io/en/latest/build.html for detailed instructions.\"\n        )\n\n    return hatchling.build.build_editable(\n        wheel_directory, config_settings, metadata_directory\n    )\n"
  },
  {
    "path": "python-package/packager/sdist.py",
    "content": "\"\"\"\nFunctions for building sdist\n\"\"\"\n\nimport logging\nimport pathlib\n\nfrom .util import copy_with_logging, copytree_with_logging\n\n\ndef copy_cpp_src_tree(\n    cpp_src_dir: pathlib.Path, target_dir: pathlib.Path, logger: logging.Logger\n) -> None:\n    \"\"\"Copy C++ source tree into build directory\"\"\"\n\n    for subdir in [\n        \"src\",\n        \"include\",\n        \"dmlc-core\",\n        \"gputreeshap\",\n        \"cmake\",\n        \"plugin\",\n    ]:\n        copytree_with_logging(cpp_src_dir / subdir, target_dir / subdir, logger=logger)\n\n    for filename in [\"CMakeLists.txt\", \"LICENSE\"]:\n        copy_with_logging(cpp_src_dir.joinpath(filename), target_dir, logger=logger)\n"
  },
  {
    "path": "python-package/packager/util.py",
    "content": "\"\"\"\nUtility functions for implementing PEP 517 backend\n\"\"\"\n\nimport logging\nimport pathlib\nimport shutil\n\n\ndef copytree_with_logging(\n    src: pathlib.Path, dest: pathlib.Path, logger: logging.Logger\n) -> None:\n    \"\"\"Call shutil.copytree() with logging\"\"\"\n    logger.info(\"Copying %s -> %s\", str(src), str(dest))\n    shutil.copytree(src, dest)\n\n\ndef copy_with_logging(\n    src: pathlib.Path, dest: pathlib.Path, logger: logging.Logger\n) -> None:\n    \"\"\"Call shutil.copy() with logging\"\"\"\n    if dest.is_dir():\n        logger.info(\"Copying %s -> %s\", str(src), str(dest / src.name))\n    else:\n        logger.info(\"Copying %s -> %s\", str(src), str(dest))\n    shutil.copy(src, dest)\n"
  },
  {
    "path": "python-package/pyproject.toml",
    "content": "# Generated by `pypi_variants.py`, don't edit: '--use-suffix=na --require-nccl-dep=cu12 --create-stub=False'\n[build-system]\nrequires = [\n    \"hatchling>=1.12.1\",\n    \"packaging>=21.3\",\n]\nbackend-path = [\".\"]\nbuild-backend = \"packager.pep517\"\n\n[project]\nname = \"xgboost\"\ndescription = \"XGBoost Python Package\"\nreadme = { file = \"README.rst\", content-type = \"text/x-rst\" }\nauthors = [\n    { name = \"Hyunsu Cho\", email = \"chohyu01@cs.washington.edu\" },\n    { name = \"Jiaming Yuan\", email = \"jm.yuan@outlook.com\" }\n]\nversion = \"3.3.0-dev\"\nrequires-python = \">=3.10\"\nlicense = { text = \"Apache-2.0\" }\nclassifiers = [\n    \"License :: OSI Approved :: Apache Software License\",\n    \"Development Status :: 5 - Production/Stable\",\n    \"Operating System :: OS Independent\",\n    \"Typing :: Typed\",\n    \"Programming Language :: Python\",\n    \"Programming Language :: Python :: 3\",\n    \"Programming Language :: Python :: 3.10\",\n    \"Programming Language :: Python :: 3.11\",\n    \"Programming Language :: Python :: 3.12\",\n    \"Programming Language :: Python :: 3.13\",\n    \"Programming Language :: Python :: 3.14\",\n]\ndependencies = [\n    \"numpy\",\n    \"scipy\",\n    \"nvidia-nccl-cu12 ; platform_system == 'Linux'\",\n]\n\n[project.urls]\ndocumentation = \"https://xgboost.readthedocs.io/en/stable/\"\nrepository = \"https://github.com/dmlc/xgboost\"\n\n[project.optional-dependencies]\npandas = [\"pandas>=1.2\"]\nscikit-learn = [\"scikit-learn\"]\ndask = [\"dask\", \"pandas\", \"distributed\"]\nplotting = [\"graphviz\", \"matplotlib\"]\npyspark = [\"pyspark>=3.4\", \"scikit-learn\", \"cloudpickle\"]\n\n[tool.hatch.build.targets.wheel.hooks.custom]\n\n[tool.isort]\nprofile = \"black\"\n\n[tool.ruff.lint.isort]\nknown-first-party = [\"xgboost\"]\n\n[tool.mypy]\nignore_missing_imports = true\ndisallow_untyped_defs = true\nfollow_imports = \"silent\"\n\n[tool.pylint.main]\nignore = [\"tests\"]\nextension-pkg-whitelist = [\"numpy\", \"cuda\"]\ndisable = [\n    \"import-error\",\n    \"invalid-name\",\n    \"attribute-defined-outside-init\",\n    \"import-outside-toplevel\",\n    \"too-few-public-methods\",\n    \"too-many-ancestors\",\n    \"too-many-nested-blocks\",\n    \"unsubscriptable-object\",\n    \"useless-object-inheritance\",\n    \"wrong-import-order\",\n]\ndummy-variables-rgx = \"(unused|)_.*\"\nreports = false\n\n[tool.hatch.build.targets.wheel]\npackages = [\"xgboost/\"]\n\n[tool.pylint.basic]\n# Enforce naming convention\nconst-naming-style = \"UPPER_CASE\"\nclass-naming-style = \"PascalCase\"\nfunction-naming-style = \"snake_case\"\nmethod-naming-style = \"snake_case\"\nattr-naming-style = \"snake_case\"\nargument-naming-style = \"snake_case\"\nvariable-naming-style = \"snake_case\"\nclass-attribute-naming-style = \"snake_case\"\n\n# Allow single-letter variables\nvariable-rgx = \"[a-zA-Z_][a-z0-9_]{0,30}$\"\n\n[tool.pydistcheck]\ninspect = true\nignore = [\"compiled-objects-have-debug-symbols\"]\nmax_allowed_size_compressed = '300M'\nmax_allowed_size_uncompressed = '500M'\n\n[variant.default-priorities]\nnamespace = [\"nvidia\"]\n\n[variant.providers.nvidia]\nrequires = [\"nvidia-variant-provider>=0.0.1,<1.0.0\"]\nplugin-api = \"nvidia_variant_provider.plugin:NvidiaVariantPlugin\"\n"
  },
  {
    "path": "python-package/pyproject.toml.in",
    "content": "[build-system]\nrequires = [\n    \"hatchling>=1.12.1\",\n    \"packaging>=21.3\",\n]\nbackend-path = [\".\"]\nbuild-backend = \"packager.pep517\"\n\n[project]\nname = \"{{ name }}\"\ndescription = \"XGBoost Python Package\"\nreadme = { file = \"README.rst\", content-type = \"text/x-rst\" }\nauthors = [\n    { name = \"Hyunsu Cho\", email = \"chohyu01@cs.washington.edu\" },\n    { name = \"Jiaming Yuan\", email = \"jm.yuan@outlook.com\" }\n]\nversion = \"3.3.0-dev\"\nrequires-python = \">=3.10\"\nlicense = { text = \"Apache-2.0\" }\nclassifiers = [\n    \"License :: OSI Approved :: Apache Software License\",\n    \"Development Status :: 5 - Production/Stable\",\n    \"Operating System :: OS Independent\",\n    \"Typing :: Typed\",\n    \"Programming Language :: Python\",\n    \"Programming Language :: Python :: 3\",\n    \"Programming Language :: Python :: 3.10\",\n    \"Programming Language :: Python :: 3.11\",\n    \"Programming Language :: Python :: 3.12\",\n    \"Programming Language :: Python :: 3.13\",\n    \"Programming Language :: Python :: 3.14\",\n]\ndependencies = [\n    \"numpy\",\n    \"scipy\",\n{{ nccl }}\n]\n\n[project.urls]\ndocumentation = \"https://xgboost.readthedocs.io/en/stable/\"\nrepository = \"https://github.com/dmlc/xgboost\"\n\n[project.optional-dependencies]\npandas = [\"pandas>=1.2\"]\nscikit-learn = [\"scikit-learn\"]\ndask = [\"dask\", \"pandas\", \"distributed\"]\nplotting = [\"graphviz\", \"matplotlib\"]\npyspark = [\"pyspark>=3.4\", \"scikit-learn\", \"cloudpickle\"]\n\n[tool.hatch.build.targets.wheel.hooks.custom]\n\n[tool.isort]\nprofile = \"black\"\n\n[tool.ruff.lint.isort]\nknown-first-party = [\"xgboost\"]\n\n[tool.mypy]\nignore_missing_imports = true\ndisallow_untyped_defs = true\nfollow_imports = \"silent\"\n\n[tool.pylint.main]\nignore = [\"tests\"]\nextension-pkg-whitelist = [\"numpy\", \"cuda\"]\ndisable = [\n    \"import-error\",\n    \"invalid-name\",\n    \"attribute-defined-outside-init\",\n    \"import-outside-toplevel\",\n    \"too-few-public-methods\",\n    \"too-many-ancestors\",\n    \"too-many-nested-blocks\",\n    \"unsubscriptable-object\",\n    \"useless-object-inheritance\",\n    \"wrong-import-order\",\n]\ndummy-variables-rgx = \"(unused|)_.*\"\nreports = false\n\n[tool.hatch.build.targets.wheel]\npackages = [\"xgboost/\"]\n\n[tool.pylint.basic]\n# Enforce naming convention\nconst-naming-style = \"UPPER_CASE\"\nclass-naming-style = \"PascalCase\"\nfunction-naming-style = \"snake_case\"\nmethod-naming-style = \"snake_case\"\nattr-naming-style = \"snake_case\"\nargument-naming-style = \"snake_case\"\nvariable-naming-style = \"snake_case\"\nclass-attribute-naming-style = \"snake_case\"\n\n# Allow single-letter variables\nvariable-rgx = \"[a-zA-Z_][a-z0-9_]{0,30}$\"\n\n[tool.pydistcheck]\ninspect = true\nignore = [\"compiled-objects-have-debug-symbols\"]\nmax_allowed_size_compressed = '300M'\nmax_allowed_size_uncompressed = '500M'\n\n[variant.default-priorities]\nnamespace = [\"nvidia\"]\n\n[variant.providers.nvidia]\nrequires = [\"nvidia-variant-provider>=0.0.1,<1.0.0\"]\nplugin-api = \"nvidia_variant_provider.plugin:NvidiaVariantPlugin\"\n"
  },
  {
    "path": "python-package/pyproject.toml.stub.in",
    "content": "[build-system]\nrequires = [\n    \"hatchling>=1.12.1\",\n]\nbuild-backend = \"hatchling.build\"\n\n[project]\nname = \"{{ name }}\"\ndescription = \"XGBoost Python Package\"\nreadme = { file = \"README.rst\", content-type = \"text/x-rst\" }\nauthors = [\n    { name = \"Hyunsu Cho\", email = \"chohyu01@cs.washington.edu\" },\n    { name = \"Jiaming Yuan\", email = \"jm.yuan@outlook.com\" }\n]\nversion = \"3.3.0-dev\"\nrequires-python = \">=3.10\"\nlicense = { text = \"Apache-2.0\" }\nclassifiers = [\n    \"License :: OSI Approved :: Apache Software License\",\n    \"Development Status :: 5 - Production/Stable\",\n    \"Operating System :: OS Independent\",\n    \"Typing :: Typed\",\n    \"Programming Language :: Python\",\n    \"Programming Language :: Python :: 3\",\n    \"Programming Language :: Python :: 3.10\",\n    \"Programming Language :: Python :: 3.11\",\n    \"Programming Language :: Python :: 3.12\",\n    \"Programming Language :: Python :: 3.13\",\n]\ndependencies = [\n    \"xgboost=={{ version }}\",\n]\n\n[tool.hatch.build.targets.sdist]\nonly-include = [\"pyproject.toml\"]\n\n[tool.hatch.build.targets.wheel]\nonly-include = [\"pyproject.toml\"]\n"
  },
  {
    "path": "python-package/xgboost/VERSION",
    "content": "3.3.0-dev\n"
  },
  {
    "path": "python-package/xgboost/__init__.py",
    "content": "\"\"\"XGBoost: eXtreme Gradient Boosting library.\n\nContributors: https://github.com/dmlc/xgboost/blob/master/CONTRIBUTORS.md\n\"\"\"\n\nfrom . import tracker  # noqa\nfrom . import collective\nfrom ._c_api import _py_version\nfrom .core import (\n    Booster,\n    DataIter,\n    DMatrix,\n    ExtMemQuantileDMatrix,\n    QuantileDMatrix,\n    build_info,\n)\nfrom .tracker import RabitTracker  # noqa\nfrom .training import cv, train\n\ntry:\n    from .config import config_context, get_config, set_config\n    from .plotting import plot_importance, plot_tree, to_graphviz\n    from .sklearn import (\n        XGBClassifier,\n        XGBModel,\n        XGBRanker,\n        XGBRegressor,\n        XGBRFClassifier,\n        XGBRFRegressor,\n    )\nexcept ImportError:\n    pass\n\n\n__version__ = _py_version()\n\n\n__all__ = [\n    # core\n    \"DMatrix\",\n    \"QuantileDMatrix\",\n    \"ExtMemQuantileDMatrix\",\n    \"Booster\",\n    \"DataIter\",\n    \"train\",\n    \"cv\",\n    # utilities\n    \"RabitTracker\",\n    \"build_info\",\n    \"plot_importance\",\n    \"plot_tree\",\n    \"to_graphviz\",\n    \"set_config\",\n    \"get_config\",\n    \"config_context\",\n    # sklearn\n    \"XGBModel\",\n    \"XGBClassifier\",\n    \"XGBRegressor\",\n    \"XGBRanker\",\n    \"XGBRFClassifier\",\n    \"XGBRFRegressor\",\n    # collective\n    \"collective\",\n]\n"
  },
  {
    "path": "python-package/xgboost/_c_api.py",
    "content": "\"\"\"Low-level ctypes bridge for the XGBoost C API.\"\"\"\n\nimport ctypes\nimport json\nimport os\nimport warnings\nfrom typing import Any, Callable, List, Tuple, Union, cast, overload\n\nfrom ._typing import CStrPptr, c_bst_ulong\nfrom .compat import py_str\nfrom .libpath import find_lib_path\n\n\nclass XGBoostError(ValueError):\n    \"\"\"Error thrown by xgboost trainer.\"\"\"\n\n\n@overload\ndef from_pystr_to_cstr(data: str) -> bytes: ...\n\n\n@overload\ndef from_pystr_to_cstr(data: List[str]) -> ctypes.Array: ...\n\n\ndef from_pystr_to_cstr(data: Union[str, List[str]]) -> Union[bytes, ctypes.Array]:\n    \"\"\"Convert a Python str or list of Python str to C pointer.\"\"\"\n    if isinstance(data, str):\n        return bytes(data, \"utf-8\")\n    if isinstance(data, list):\n        data_as_bytes: List[bytes] = [bytes(d, \"utf-8\") for d in data]\n        pointers: ctypes.Array[ctypes.c_char_p] = (\n            ctypes.c_char_p * len(data_as_bytes)\n        )(*data_as_bytes)\n        return pointers\n    raise TypeError()\n\n\ndef from_cstr_to_pystr(data: CStrPptr, length: c_bst_ulong) -> List[str]:\n    \"\"\"Revert C pointer to Python str.\"\"\"\n    res = []\n    for i in range(length.value):\n        try:\n            res.append(str(cast(bytes, data[i]).decode(\"ascii\")))\n        except UnicodeDecodeError:\n            res.append(str(cast(bytes, data[i]).decode(\"utf-8\")))\n    return res\n\n\ndef make_jcargs(**kwargs: Any) -> bytes:\n    \"\"\"Make JSON-based arguments for C functions.\"\"\"\n    return from_pystr_to_cstr(json.dumps(kwargs))\n\n\ndef _log_callback(msg: bytes) -> None:\n    \"\"\"Redirect logs from native library into Python console.\"\"\"\n    smsg = py_str(msg)\n    if smsg.find(\"WARNING:\") != -1:\n        # Stacklevel:\n        # 1: This line\n        # 2: XGBoost C functions like `_LIB.XGBoosterTrainOneIter`.\n        # 3: The Python function that calls the C function.\n        warnings.warn(smsg, UserWarning, stacklevel=3)\n        return\n    print(smsg)\n\n\ndef _get_log_callback_func() -> Callable:\n    \"\"\"Wrap log_callback() method in ctypes callback type.\"\"\"\n    c_callback = ctypes.CFUNCTYPE(None, ctypes.c_char_p)\n    return c_callback(_log_callback)\n\n\ndef _lib_version(lib: ctypes.CDLL) -> Tuple[int, int, int]:\n    \"\"\"Get the XGBoost version from native shared object.\"\"\"\n    major = ctypes.c_int()\n    minor = ctypes.c_int()\n    patch = ctypes.c_int()\n    lib.XGBoostVersion(ctypes.byref(major), ctypes.byref(minor), ctypes.byref(patch))\n    return major.value, minor.value, patch.value\n\n\ndef _py_version() -> str:\n    \"\"\"Get the XGBoost version from Python version file.\"\"\"\n    version_file = os.path.join(os.path.dirname(__file__), \"VERSION\")\n    with open(version_file, encoding=\"ascii\") as f:\n        return f.read().strip()\n\n\ndef _register_log_callback(lib: ctypes.CDLL) -> None:\n    lib.XGBGetLastError.restype = ctypes.c_char_p\n    lib.callback = _get_log_callback_func()  # type: ignore[attr-defined]\n    if lib.XGBRegisterLogCallback(lib.callback) != 0:\n        raise XGBoostError(lib.XGBGetLastError())\n\n\ndef _parse_version(ver: str) -> Tuple[Tuple[int, int, int], str]:\n    \"\"\"Avoid dependency on packaging (PEP 440).\"\"\"\n    # 2.0.0-dev, 2.0.0, 2.0.0.post1, or 2.0.0rc1\n    if ver.find(\"post\") != -1:\n        major, minor, patch = ver.split(\".\")[:-1]\n        postfix = ver.split(\".\")[-1]\n    elif \"-dev\" in ver:\n        major, minor, patch = ver.split(\"-\")[0].split(\".\")\n        postfix = \"dev\"\n    else:\n        major, minor, patch = ver.split(\".\")\n        rc = patch.find(\"rc\")\n        if rc != -1:\n            postfix = patch[rc:]\n            patch = patch[:rc]\n        else:\n            postfix = \"\"\n\n    return (int(major), int(minor), int(patch)), postfix\n\n\ndef _load_lib() -> ctypes.CDLL:\n    \"\"\"Load xgboost library.\"\"\"\n    lib_paths = find_lib_path()\n    if not lib_paths:\n        # This happens only when building document.\n        return None  # type: ignore[return-value]\n    try:\n        path_backup = os.environ[\"PATH\"].split(os.pathsep)\n    except KeyError:\n        path_backup = []\n    lib_success = False\n    os_error_list = []\n    for lib_path in lib_paths:\n        try:\n            # needed when the lib is linked with non-system-available\n            # dependencies\n            os.environ[\"PATH\"] = os.pathsep.join(\n                path_backup + [os.path.dirname(lib_path)]\n            )\n            lib = ctypes.cdll.LoadLibrary(lib_path)\n            setattr(lib, \"path\", os.path.normpath(lib_path))\n            lib_success = True\n            break\n        except OSError as e:\n            os_error_list.append(str(e))\n            continue\n        finally:\n            os.environ[\"PATH\"] = os.pathsep.join(path_backup)\n    if not lib_success:\n        libname = os.path.basename(lib_paths[0])\n        raise XGBoostError(f\"\"\"\nXGBoost Library ({libname}) could not be loaded.\nLikely causes:\n  * OpenMP runtime is not installed\n    - vcomp140.dll or libgomp-1.dll for Windows\n    - libomp.dylib for Mac OSX\n    - libgomp.so for Linux and other UNIX-like OSes\n    Mac OSX users: Run `brew install libomp` to install OpenMP runtime.\n\n  * You are running 32-bit Python on a 64-bit OS\n\nError message(s): {os_error_list}\n\"\"\")\n    _register_log_callback(lib)\n\n    libver = _lib_version(lib)\n    pyver, _ = _parse_version(_py_version())\n\n    # verify that we are loading the correct binary.\n    if pyver != libver:\n        pyver_str = \".\".join((str(v) for v in pyver))\n        libver_str = \".\".join((str(v) for v in libver))\n        msg = (\n            \"Mismatched version between the Python package and the native shared \"\n            f\"\"\"object.  Python package version: {pyver_str}. Shared object \"\"\"\n            f\"\"\"version: {libver_str}. Shared object is loaded from: {lib.path}.\nLikely cause:\n  * XGBoost is first installed with anaconda then upgraded with pip. To fix it \"\"\"\n            \"please remove one of the installations.\"\n        )\n        raise ValueError(msg)\n\n    return lib\n\n\n# load the XGBoost library globally\n_LIB = _load_lib()\n\n\ndef _check_call(ret: int) -> None:\n    \"\"\"Check the return value of C API call.\"\"\"\n    if ret != 0:\n        raise XGBoostError(py_str(_LIB.XGBGetLastError()))\n\n\ndef c_str(string: str) -> ctypes.c_char_p:\n    \"\"\"Convert a python string to cstring.\"\"\"\n    return ctypes.c_char_p(string.encode(\"utf-8\"))\n"
  },
  {
    "path": "python-package/xgboost/_data_utils.py",
    "content": "\"\"\"Helpers for interfacing array like objects.\"\"\"\n\nimport copy\nimport ctypes\nimport json\nfrom abc import ABC, abstractmethod\nfrom functools import cache as fcache\nfrom typing import (\n    TYPE_CHECKING,\n    Any,\n    Callable,\n    Dict,\n    List,\n    Literal,\n    Optional,\n    Protocol,\n    Tuple,\n    Type,\n    TypeAlias,\n    TypedDict,\n    TypeGuard,\n    Union,\n    cast,\n    overload,\n)\n\nimport numpy as np\n\nfrom ._typing import (\n    ArrowCatList,\n    CNumericPtr,\n    DataType,\n    FeatureTypes,\n    NumpyDType,\n    NumpyOrCupy,\n)\nfrom .compat import import_cupy, import_pyarrow, lazy_isinstance\n\nif TYPE_CHECKING:\n    import pandas as pd\n    import pyarrow as pa\n\n\n# Used for accepting inputs for numpy and cupy arrays\nclass _ArrayLikeArg(Protocol):\n    @property\n    def __array_interface__(self) -> \"ArrayInf\": ...\n\n\nclass _CudaArrayLikeArg(Protocol):\n    @property\n    def __cuda_array_interface__(self) -> \"CudaArrayInf\": ...\n\n\nArrayInf = TypedDict(\n    \"ArrayInf\",\n    {\n        \"data\": Tuple[int, bool],\n        \"typestr\": str,\n        \"version\": Literal[3],\n        \"strides\": Optional[Tuple[int, ...]],\n        \"shape\": Tuple[int, ...],\n        \"mask\": Union[\"ArrayInf\", None, _ArrayLikeArg],\n    },\n)\n\nCudaArrayInf = TypedDict(\n    \"CudaArrayInf\",\n    {\n        \"data\": Tuple[int, bool],\n        \"typestr\": str,\n        \"version\": Literal[3],\n        \"strides\": Optional[Tuple[int, ...]],\n        \"shape\": Tuple[int, ...],\n        \"mask\": Union[\"ArrayInf\", None, _ArrayLikeArg],\n        \"stream\": int,\n    },\n)\n\nStringArray = TypedDict(\"StringArray\", {\"offsets\": ArrayInf, \"values\": ArrayInf})\nCudaStringArray = TypedDict(\n    \"CudaStringArray\", {\"offsets\": CudaArrayInf, \"values\": CudaArrayInf}\n)\n\n\ndef array_hasobject(data: DataType) -> bool:\n    \"\"\"Whether the numpy array has object dtype.\"\"\"\n    return (\n        hasattr(data, \"dtype\")\n        and hasattr(data.dtype, \"hasobject\")\n        and data.dtype.hasobject\n    )\n\n\ndef cuda_array_interface_dict(data: _CudaArrayLikeArg) -> CudaArrayInf:\n    \"\"\"Returns a dictionary storing the CUDA array interface.\"\"\"\n    if array_hasobject(data):\n        raise ValueError(\"Input data contains `object` dtype.  Expecting numeric data.\")\n    ainf = data.__cuda_array_interface__\n    if \"mask\" in ainf:\n        ainf[\"mask\"] = ainf[\"mask\"].__cuda_array_interface__  # type: ignore[union-attr]\n    return ainf\n\n\ndef cuda_array_interface(data: _CudaArrayLikeArg) -> bytes:\n    \"\"\"Make cuda array interface str.\"\"\"\n    interface = cuda_array_interface_dict(data)\n    interface_str = bytes(json.dumps(interface), \"utf-8\")\n    return interface_str\n\n\ndef from_array_interface(interface: ArrayInf, zero_copy: bool = False) -> NumpyOrCupy:\n    \"\"\"Convert array interface to numpy or cupy array\"\"\"\n\n    class Array:\n        \"\"\"Wrapper type for communicating with numpy and cupy.\"\"\"\n\n        _interface: Optional[ArrayInf] = None\n\n        @property\n        def __array_interface__(self) -> Optional[ArrayInf]:\n            return self._interface\n\n        @__array_interface__.setter\n        def __array_interface__(self, interface: ArrayInf) -> None:\n            self._interface = copy.copy(interface)\n            # Convert some fields to tuple as required by numpy\n            self._interface[\"shape\"] = tuple(self._interface[\"shape\"])\n            self._interface[\"data\"] = (\n                self._interface[\"data\"][0],\n                self._interface[\"data\"][1],\n            )\n            strides = self._interface.get(\"strides\", None)\n            if strides is not None:\n                self._interface[\"strides\"] = tuple(strides)\n\n        @property\n        def __cuda_array_interface__(self) -> Optional[ArrayInf]:\n            return self.__array_interface__\n\n        @__cuda_array_interface__.setter\n        def __cuda_array_interface__(self, interface: ArrayInf) -> None:\n            self.__array_interface__ = interface\n\n        @property\n        def shape(self) -> Tuple[int, ...]:\n            \"\"\"Shape of the input array.\"\"\"\n            aif = self.__array_interface__\n            assert aif is not None\n            return aif[\"shape\"]\n\n        @property\n        def size(self) -> np.signedinteger:\n            \"\"\"Total size of the input array.\"\"\"\n            return np.prod(self.shape)\n\n    arr = Array()\n\n    # Cupy and numpy might run into issue when constructing an empty array from an array\n    # interface. we explicitly check for emptiness.\n    if \"stream\" in interface:\n        # CUDA stream is presented, this is a __cuda_array_interface__.\n        arr.__cuda_array_interface__ = interface\n        cp = import_cupy()\n        if arr.size == 0:\n            return cp.empty(shape=arr.shape, dtype=np.dtype(interface[\"typestr\"]))\n        out = cp.array(arr, copy=not zero_copy)\n    else:\n        arr.__array_interface__ = interface\n        if arr.size == 0:\n            return np.empty(shape=arr.shape, dtype=np.dtype(interface[\"typestr\"]))\n        out = np.array(arr, copy=not zero_copy)\n\n    return out\n\n\n# Default constant value for CUDA per-thread stream.\nSTREAM_PER_THREAD = 2\n\n\n# Typing is not strict as there are subtle differences between CUDA array interface and\n# array interface. We handle them uniformly for now.\ndef make_array_interface(\n    ptr: Union[CNumericPtr, int],\n    shape: Tuple[int, ...],\n    dtype: Type[np.number],\n    is_cuda: bool,\n) -> ArrayInf:\n    \"\"\"Make an __(cuda)_array_interface__ from a pointer.\"\"\"\n    # Use an empty array to handle typestr and descr\n    if is_cuda:\n        empty = import_cupy().empty(shape=(0,), dtype=dtype)\n        array = empty.__cuda_array_interface__  # pylint: disable=no-member\n    else:\n        empty = np.empty(shape=(0,), dtype=dtype)\n        array = empty.__array_interface__  # pylint: disable=no-member\n\n    if not isinstance(ptr, int):\n        addr = ctypes.cast(ptr, ctypes.c_void_p).value\n    else:\n        addr = ptr\n    length = int(np.prod(shape))\n    # Handle empty dataset.\n    assert addr is not None or length == 0\n\n    if addr is None:\n        return array\n\n    array[\"data\"] = (addr, True)\n    if is_cuda and \"stream\" not in array:\n        array[\"stream\"] = STREAM_PER_THREAD\n    array[\"shape\"] = shape\n    array[\"strides\"] = None\n    return array\n\n\ndef is_arrow_dict(data: Any) -> TypeGuard[\"pa.DictionaryArray\"]:\n    \"\"\"Is this an arrow dictionary array?\"\"\"\n    return lazy_isinstance(data, \"pyarrow.lib\", \"DictionaryArray\")\n\n\nclass DfCatAccessor(Protocol):\n    \"\"\"Protocol for pandas cat accessor.\"\"\"\n\n    @property\n    def categories(  # pylint: disable=missing-function-docstring\n        self,\n    ) -> \"pd.Index\": ...\n\n    @property\n    def codes(self) -> \"pd.Series\": ...  # pylint: disable=missing-function-docstring\n\n    @property\n    def dtype(self) -> np.dtype: ...  # pylint: disable=missing-function-docstring\n\n    @property\n    def values(self) -> np.ndarray: ...  # pylint: disable=missing-function-docstring\n\n    def to_arrow(  # pylint: disable=missing-function-docstring\n        self,\n    ) -> Union[\"pa.StringArray\", \"pa.IntegerArray\"]: ...\n\n    @property\n    def __cuda_array_interface__(self) -> CudaArrayInf: ...\n\n    @property\n    def _column(self) -> Any: ...\n\n\ndef _is_df_cat(data: Any) -> TypeGuard[DfCatAccessor]:\n    # Test pd.Series.cat, not pd.Series\n    return hasattr(data, \"categories\") and hasattr(data, \"codes\")\n\n\n@fcache\ndef _arrow_npdtype() -> Dict[Any, Type[np.number]]:\n    import pyarrow as pa\n\n    mapping: Dict[Any, Type[np.number]] = {\n        pa.int8(): np.int8,\n        pa.int16(): np.int16,\n        pa.int32(): np.int32,\n        pa.int64(): np.int64,\n        pa.uint8(): np.uint8,\n        pa.uint16(): np.uint16,\n        pa.uint32(): np.uint32,\n        pa.uint64(): np.uint64,\n        pa.float16(): np.float16,\n        pa.float32(): np.float32,\n        pa.float64(): np.float64,\n    }\n\n    return mapping\n\n\n@overload\ndef _arrow_buf_inf(address: int, typestr: str, size: int, stream: None) -> ArrayInf: ...\n\n\n@overload\ndef _arrow_buf_inf(\n    address: int, typestr: str, size: int, stream: int\n) -> CudaArrayInf: ...\n\n\ndef _arrow_buf_inf(\n    address: int, typestr: str, size: int, stream: Optional[int]\n) -> Union[ArrayInf, CudaArrayInf]:\n    if stream is not None:\n        jcuaif: CudaArrayInf = {\n            \"data\": (address, True),\n            \"typestr\": typestr,\n            \"version\": 3,\n            \"strides\": None,\n            \"shape\": (size,),\n            \"mask\": None,\n            \"stream\": stream,\n        }\n        return jcuaif\n\n    jaif: ArrayInf = {\n        \"data\": (address, True),\n        \"typestr\": typestr,\n        \"version\": 3,\n        \"strides\": None,\n        \"shape\": (size,),\n        \"mask\": None,\n    }\n    return jaif\n\n\ndef _arrow_cat_names_inf(cats: \"pa.StringArray\") -> Tuple[StringArray, Any]:\n    if not TYPE_CHECKING:\n        pa = import_pyarrow()\n\n    # FIXME(jiamingy): Account for offset, need to find an implementation that returns\n    # offset > 0\n    assert cats.offset == 0\n    buffers: List[pa.Buffer] = cats.buffers()\n    mask, offset, data = buffers\n    assert offset.is_cpu\n\n    off_len = len(cats) + 1\n\n    def get_n_bytes(typ: Type) -> int:\n        return off_len * (np.iinfo(typ).bits // 8)\n\n    if offset.size == get_n_bytes(np.int64):\n        if not isinstance(cats, pa.LargeStringArray):\n            arrow_str_error = \"Expecting a `pyarrow.Array`.\"\n            raise TypeError(arrow_str_error + f\" Got: {type(cats)}.\")\n        # Convert to 32bit integer, arrow recommends against the use of i64. Also,\n        # XGBoost cannot handle large number of categories (> 2**31).\n        i32cats = cats.cast(pa.string())\n        mask, offset, data = i32cats.buffers()\n\n    if offset.size != get_n_bytes(np.int32):\n        raise TypeError(\n            \"Arrow dictionary type offsets is required to be 32-bit integer.\"\n        )\n\n    joffset = _arrow_buf_inf(offset.address, \"<i4\", off_len, None)\n    jdata = _arrow_buf_inf(data.address, \"|i1\", data.size, None)\n    # Categories should not have missing values.\n    assert mask is None\n\n    jnames: StringArray = {\"offsets\": joffset, \"values\": jdata}\n    return jnames, (mask, offset, data)\n\n\ndef _arrow_array_inf(\n    array: \"pa.Array\",\n) -> ArrayInf:\n    \"\"\"Helper for handling categorical codes.\"\"\"\n    if not TYPE_CHECKING:\n        pa = import_pyarrow()\n    if not isinstance(array, pa.Array):  # pylint: disable=E0606\n        raise TypeError(f\"Invalid input type: {type(array)}\")\n\n    mask, data = array.buffers()\n    jdata = make_array_interface(\n        data.address,\n        shape=(len(array),),\n        dtype=_arrow_npdtype()[array.type],\n        is_cuda=not data.is_cpu,\n    )\n\n    if mask is not None:\n        jmask: Optional[ArrayInf] = {\n            \"data\": (mask.address, True),\n            \"typestr\": \"<t1\",\n            \"version\": 3,\n            \"strides\": None,\n            \"shape\": (len(array),),\n            \"mask\": None,\n        }\n        if not mask.is_cpu:\n            jmask[\"stream\"] = STREAM_PER_THREAD  # type: ignore[index, typeddict-unknown-key]\n    else:\n        jmask = None\n\n    jdata[\"mask\"] = jmask\n    return jdata\n\n\ndef arrow_cat_inf(  # pylint: disable=too-many-locals\n    cats: \"pa.StringArray\",\n    codes: Union[_ArrayLikeArg, _CudaArrayLikeArg, \"pa.IntegerArray\"],\n) -> Tuple[StringArray, ArrayInf, Tuple]:\n    \"\"\"Get the array interface representation of a string-based category array.\"\"\"\n    jnames, cats_tmp = _arrow_cat_names_inf(cats)\n    jcodes = _arrow_array_inf(codes)\n\n    return jnames, jcodes, (cats_tmp, None)\n\n\ndef _ensure_np_dtype(\n    data: DataType, dtype: Optional[NumpyDType]\n) -> Tuple[np.ndarray, Optional[NumpyDType]]:\n    \"\"\"Ensure the np array has correct type and is contiguous.\"\"\"\n    if array_hasobject(data) or data.dtype in [np.float16, np.bool_]:\n        dtype = np.float32\n        data = data.astype(dtype, copy=False)\n    if not data.flags.aligned:\n        data = np.require(data, requirements=\"A\")\n    return data, dtype\n\n\ndef _is_flatten(array: NumpyOrCupy) -> bool:\n    return len(array.shape) == 1 or array.shape[1] == 1\n\n\ndef array_interface_dict(data: np.ndarray) -> ArrayInf:\n    \"\"\"Returns an array interface from the input.\"\"\"\n    if array_hasobject(data):\n        raise ValueError(\"Input data contains `object` dtype.  Expecting numeric data.\")\n    ainf = data.__array_interface__\n    if \"mask\" in ainf:\n        ainf[\"mask\"] = ainf[\"mask\"].__array_interface__\n    return cast(ArrayInf, ainf)\n\n\ndef pd_cat_inf(  # pylint: disable=too-many-locals\n    cats: DfCatAccessor, codes: \"pd.Series\"\n) -> Tuple[Union[StringArray, ArrayInf], ArrayInf, Tuple]:\n    \"\"\"Get the array interface representation of pandas category accessor.\"\"\"\n    # pandas uses -1 to represent missing values for categorical features\n    codes = codes.replace(-1, np.nan)\n\n    def is_prim() -> bool:\n        dtype = cats.dtype\n        try:\n            return np.issubdtype(dtype, np.floating) or np.issubdtype(dtype, np.integer)\n        except TypeError:\n            return False\n\n    if is_prim():\n        # Numeric index type\n        name_values_num = cats.values\n        jarr_values = array_interface_dict(name_values_num)\n        code_values = codes.values\n        jarr_codes = array_interface_dict(code_values)\n        return jarr_values, jarr_codes, (name_values_num, code_values)\n\n    def npstr_to_arrow_strarr(strarr: Any) -> Tuple[np.ndarray, str]:\n        \"\"\"Convert a string-like array to an arrow string array.\"\"\"\n        if not isinstance(strarr, np.ndarray):\n            if hasattr(strarr, \"to_numpy\"):\n                strarr = strarr.to_numpy(dtype=object)\n            else:\n                strarr = np.asarray(strarr, dtype=object)\n\n        lenarr = np.vectorize(len)\n        offsets = np.cumsum(\n            np.concatenate([np.array([0], dtype=np.int64), lenarr(strarr)])\n        )\n        if strarr.dtype.kind == \"S\":\n            str_list = [s.decode(\"utf-8\") for s in strarr.tolist()]\n        else:\n            str_list = [str(s) for s in strarr.tolist()]\n        values = \"\".join(str_list)\n        assert \"\\0\" not in values  # arrow string array doesn't need null terminal\n        return offsets.astype(np.int32), values\n\n    # String index type\n    name_offsets, name_values = npstr_to_arrow_strarr(cats.values)\n    name_offsets, _ = _ensure_np_dtype(name_offsets, np.int32)\n    joffsets = array_interface_dict(name_offsets)\n    bvalues = name_values.encode(\"utf-8\")\n\n    ptr = ctypes.c_void_p.from_buffer(ctypes.c_char_p(bvalues)).value\n    assert ptr is not None\n\n    jvalues: ArrayInf = {\n        \"data\": (ptr, True),\n        \"typestr\": \"|i1\",\n        \"shape\": (len(name_values),),\n        \"strides\": None,\n        \"version\": 3,\n        \"mask\": None,\n    }\n    jnames: StringArray = {\"offsets\": joffsets, \"values\": jvalues}\n\n    code_values = codes.values\n    jcodes = array_interface_dict(code_values)\n\n    buf = (\n        name_offsets,\n        name_values,\n        bvalues,\n        code_values,\n    )  # store temporary values\n    return jnames, jcodes, buf\n\n\ndef array_interface(data: np.ndarray) -> bytes:\n    \"\"\"Make array interface str.\"\"\"\n    interface = array_interface_dict(data)\n    interface_str = bytes(json.dumps(interface), \"utf-8\")\n    return interface_str\n\n\ndef check_cudf_meta(data: _CudaArrayLikeArg, field: str) -> None:\n    \"Make sure no missing value in meta data.\"\n    if (\n        \"mask\" in data.__cuda_array_interface__\n        and data.__cuda_array_interface__[\"mask\"] is not None\n    ):\n        raise ValueError(f\"Missing value is not allowed for: {field}\")\n\n\nclass ArrowSchema(ctypes.Structure):\n    \"\"\"The Schema type from arrow C array.\"\"\"\n\n    _fields_ = [\n        (\"format\", ctypes.c_char_p),\n        (\"name\", ctypes.c_char_p),\n        (\"metadata\", ctypes.c_char_p),\n        (\"flags\", ctypes.c_int64),\n        (\"n_children\", ctypes.c_int64),\n        (\"children\", ctypes.POINTER(ctypes.c_void_p)),\n        (\"dictionary\", ctypes.c_void_p),\n        (\"release\", ctypes.c_void_p),\n        (\"private_data\", ctypes.c_void_p),\n    ]\n\n\nclass ArrowArray(ctypes.Structure):\n    \"\"\"The Array type from arrow C array.\"\"\"\n\n\nArrowArray._fields_ = [  # pylint: disable=protected-access\n    (\"length\", ctypes.c_int64),\n    (\"null_count\", ctypes.c_int64),\n    (\"offset\", ctypes.c_int64),\n    (\"n_buffers\", ctypes.c_int64),\n    (\"n_children\", ctypes.c_int64),\n    (\"buffers\", ctypes.POINTER(ctypes.c_void_p)),\n    (\"children\", ctypes.POINTER(ctypes.POINTER(ArrowArray))),\n    (\"dictionary\", ctypes.POINTER(ArrowArray)),\n    (\"release\", ctypes.c_void_p),\n    (\"private_data\", ctypes.c_void_p),\n]\n\n\nclass ArrowDeviceArray(ctypes.Structure):\n    \"\"\"The Array type from arrow C device array.\"\"\"\n\n    _fields_ = [\n        (\"array\", ArrowArray),\n        (\"device_id\", ctypes.c_int64),\n        (\"device_type\", ctypes.c_int32),\n        (\"sync_event\", ctypes.c_void_p),\n        (\"reserved\", ctypes.c_int64 * 3),\n    ]\n\n\nPyCapsule_GetName = ctypes.pythonapi.PyCapsule_GetName\nPyCapsule_GetName.restype = ctypes.c_char_p\nPyCapsule_GetName.argtypes = [ctypes.py_object]\n\n\nPyCapsule_GetPointer = ctypes.pythonapi.PyCapsule_GetPointer\nPyCapsule_GetPointer.restype = ctypes.c_void_p\nPyCapsule_GetPointer.argtypes = [ctypes.py_object, ctypes.c_char_p]\n\n\ndef wait_event(event_hdl: int) -> None:\n    \"\"\"Wait for CUDA event exported by arrow.\"\"\"\n    # cuda-python is a dependency of cuDF.\n    from cuda.bindings import runtime as cudart\n\n    event = ctypes.cast(event_hdl, ctypes.POINTER(ctypes.c_int64))\n    (status,) = cudart.cudaStreamWaitEvent(\n        STREAM_PER_THREAD,\n        event.contents.value,\n        cudart.cudaEventWaitDefault,\n    )\n    if status != cudart.cudaError_t.cudaSuccess:\n        _, msg = cudart.cudaGetErrorString(status)\n        raise ValueError(msg)\n\n\ndef cudf_cat_inf(  # pylint: disable=too-many-locals\n    cats: DfCatAccessor, codes: \"pd.Series\"\n) -> Tuple[Union[CudaArrayInf, CudaStringArray], ArrayInf, Tuple]:\n    \"\"\"Obtain the cuda array interface for cuDF categories.\"\"\"\n    cp = import_cupy()\n    is_num_idx = cp.issubdtype(cats.dtype, cp.floating) or cp.issubdtype(\n        cats.dtype, cp.integer\n    )\n    if is_num_idx:\n        cats_ainf = cuda_array_interface_dict(cats)\n        codes_ainf = cuda_array_interface_dict(codes)\n        return cats_ainf, codes_ainf, (cats, codes)\n\n    # pylint: disable=protected-access\n    arrow_col = cats._column.to_pylibcudf(mode=\"read\")\n    # Tuple[types.CapsuleType, types.CapsuleType]\n    schema, array = arrow_col.__arrow_c_device_array__()\n\n    array_ptr = PyCapsule_GetPointer(array, PyCapsule_GetName(array))\n    schema_ptr = PyCapsule_GetPointer(schema, PyCapsule_GetName(schema))\n\n    # Cast to arrow array\n    arrow_device_array = ctypes.cast(\n        array_ptr, ctypes.POINTER(ArrowDeviceArray)\n    ).contents\n    wait_event(arrow_device_array.sync_event)\n    assert arrow_device_array.device_type == 2  # 2 is CUDA\n\n    arrow_array = arrow_device_array.array\n    mask, offset, data = (\n        arrow_array.buffers[0],\n        arrow_array.buffers[1],\n        arrow_array.buffers[2],\n    )\n    # Categories should not have missing values.\n    assert mask is None\n    assert arrow_array.n_children == 0\n    assert arrow_array.n_buffers == 3\n    assert arrow_array.offset == 0\n\n    # Cast to ArrowSchema\n    arrow_schema = ctypes.cast(schema_ptr, ctypes.POINTER(ArrowSchema)).contents\n    assert arrow_schema.format in (b\"u\", b\"U\", b\"vu\")  # utf8, large utf8\n    if arrow_schema.format in (b\"u\", b\"vu\"):\n        joffset: CudaArrayInf = _arrow_buf_inf(\n            offset, \"<i4\", arrow_array.length + 1, STREAM_PER_THREAD\n        )\n    elif arrow_schema.format == b\"U\":\n        raise TypeError(\"Large string for category index (names) is not supported.\")\n    else:\n        raise TypeError(\n            \"Unexpected type for category index. It's neither numeric nor string.\"\n        )\n    # 0 size for unknown\n    jdata: CudaArrayInf = _arrow_buf_inf(data, \"|i1\", 0, STREAM_PER_THREAD)\n    jnames: CudaStringArray = {\n        \"offsets\": joffset,\n        \"values\": jdata,\n    }\n\n    jcodes = cuda_array_interface_dict(codes)\n    return jnames, jcodes, (arrow_col,)\n\n\nclass Categories:\n    \"\"\"An internal storage class for categories returned by the DMatrix and the\n    Booster. This class is designed to be opaque. It is intended to be used exclusively\n    by XGBoost as an intermediate storage for re-coding categorical data.\n\n    The categories are saved along with the booster object. As a result, users don't\n    need to preserve this class for re-coding. Use the booster model IO instead if you\n    want to preserve the categories in a stable format.\n\n    .. versionadded:: 3.1.0\n\n    .. warning::\n\n        This class is internal.\n\n    .. code-block:: python\n\n        Xy = xgboost.QuantileDMatrix(X, y, enable_categorical=True)\n        booster = xgboost.train({}, Xy)\n\n        categories = booster.get_categories() # Get categories\n\n        # Use categories as a reference for re-coding\n        Xy_new = xgboost.QuantileDMatrix(\n            X_new, y_new, feature_types=categories, enable_categorical=True, ref=Xy\n        )\n\n        # Categories will be part of the `model.json`.\n        booster.save_model(\"model.json\")\n\n    \"\"\"\n\n    def __init__(\n        self,\n        handle: Tuple[ctypes.c_void_p, Callable[[], None]],\n        arrow_arrays: Optional[ArrowCatList],\n    ) -> None:\n        # The handle type is a bundle of the handle and the free call. Otherwise, we\n        # will have to import the `_lib` and the `_check_call` from the core module\n        # inside the __del__ method to avoid cyclic model dependency.\n        # Importing modules in __del__ can result in Python abort if __del__ is called\n        # during exception handling (interpreter is shutting down).\n        self._handle, self._free = handle\n        self._arrow_arrays = arrow_arrays\n\n    def to_arrow(self) -> ArrowCatList:\n        \"\"\"Get the categories in the dataset. The results are stored in a list of\n        (feature name, arrow array) pairs, with one array for each categorical\n        feature. If a feature is numerical, then the corresponding column in the list is\n        None. A value error will be raised if this container was created without the\n        `export_to_arrow` option.\n\n        \"\"\"\n        if self._arrow_arrays is None:\n            raise ValueError(\n                \"The `export_to_arrow` option of the `get_categories` method\"\n                \" is required.\"\n            )\n        return self._arrow_arrays\n\n    def empty(self) -> bool:\n        \"\"\"Returns True if there's no category.\"\"\"\n        return self._handle.value is None\n\n    def get_handle(self) -> int:\n        \"\"\"Internal method for retrieving the handle.\"\"\"\n        assert self._handle.value\n        return self._handle.value\n\n    def __del__(self) -> None:\n        if self._handle.value is None:\n            return\n        self._free()\n\n\ndef get_ref_categories(\n    feature_types: Optional[Union[FeatureTypes, Categories]],\n) -> Tuple[Optional[FeatureTypes], Optional[Categories]]:\n    \"\"\"Get the optional reference categories from the `feature_types`. This is used by\n    various `DMatrix` where the `feature_types` is reused for specifying the reference\n    categories.\n\n    \"\"\"\n    if isinstance(feature_types, Categories):\n        ref_categories = feature_types\n        feature_types = None\n    else:\n        ref_categories = None\n    return feature_types, ref_categories\n\n\n# Type schema for storing JSON-encoded array interface\nAifType: TypeAlias = List[\n    Union[\n        # numeric column\n        Union[ArrayInf, CudaArrayInf],\n        # categorical column\n        Tuple[\n            # (cuda) numeric index | (cuda) string index\n            Union[ArrayInf, CudaArrayInf, StringArray, CudaStringArray],\n            Union[ArrayInf, CudaArrayInf],  # codes\n        ],\n    ]\n]\n\n\nclass TransformedDf(ABC):\n    \"\"\"Internal class for storing transformed dataframe.\n\n    Parameters\n    ----------\n    ref_categories :\n        Optional reference categories used for re-coding.\n\n    aitfs :\n        Array interface for each column.\n\n    \"\"\"\n\n    def __init__(\n        self,\n        ref_categories: Optional[Categories],\n        aitfs: AifType,\n        temporary_buffers: List[Tuple],\n    ) -> None:\n        self.ref_categories = ref_categories\n        if ref_categories is not None and ref_categories.get_handle() is not None:\n            aif = ref_categories.get_handle()\n            self.ref_aif: Optional[int] = aif\n        else:\n            self.ref_aif = None\n\n        self.aitfs = aitfs\n        self.temporary_buffers = temporary_buffers\n\n    def array_interface(self) -> bytes:\n        \"\"\"Return a byte string for JSON encoded array interface.\"\"\"\n        if self.ref_categories is not None:\n            ref_inf: dict = {\"ref_categories\": self.ref_aif, \"columns\": self.aitfs}\n            inf = bytes(json.dumps(ref_inf), \"utf-8\")\n        else:\n            inf = bytes(json.dumps(self.aitfs), \"utf-8\")\n        return inf\n\n    @property\n    @abstractmethod\n    def shape(self) -> Tuple[int, int]:\n        \"\"\"Return the shape of the dataframe.\"\"\"\n"
  },
  {
    "path": "python-package/xgboost/_typing.py",
    "content": "# pylint: disable=protected-access\n\"\"\"Shared typing definition.\"\"\"\n\nimport ctypes\nimport os\nfrom enum import IntEnum, unique\nfrom typing import (\n    TYPE_CHECKING,\n    Any,\n    AnyStr,\n    Callable,\n    Dict,\n    List,\n    Optional,\n    Sequence,\n    Tuple,\n    Type,\n    TypeAlias,\n    TypeVar,\n    Union,\n)\n\nimport numpy as np\n\nDataType = Any\n\nFeatureInfo = Sequence[str]\nFeatureNames = FeatureInfo\nFeatureTypes = FeatureInfo\nBoosterParam = Union[List, Dict[str, Any]]  # better be sequence\n\nArrayLike = Any\nif TYPE_CHECKING:\n    import pyarrow as pa\n\n    PathLike = Union[str, os.PathLike[str]]\nelse:\n    PathLike = Union[str, os.PathLike]\n\nArrowCatCol: TypeAlias = Optional[Union[\"pa.StringArray\", \"pa.NumericArray\"]]\nArrowCatList: TypeAlias = List[Tuple[str, Optional[ArrowCatCol]]]\n\nCupyT = ArrayLike  # maybe need a stub for cupy arrays\nNumpyOrCupy = Union[np.ndarray, Any]\nNumpyDType = Union[str, Type[np.number]]\nPandasDType = Any  # real type is pandas.core.dtypes.base.ExtensionDtype\n\nFloatCompatible = Union[float, np.float32, np.float64]\n\n# typing.SupportsInt is not suitable here since floating point values are convertible to\n# integers as well.\nInteger = Union[int, np.integer]\nIterationRange = Tuple[Integer, Integer]\n\n# callables\nFPreProcCallable = Callable\n\n# ctypes\n# c_bst_ulong corresponds to bst_ulong defined in xgboost/c_api.h\nc_bst_ulong = ctypes.c_uint64  # pylint: disable=C0103\n\nModelIn = Union[os.PathLike[AnyStr], bytearray, str]\n\nCTypeT = TypeVar(\n    \"CTypeT\",\n    ctypes.c_void_p,\n    ctypes.c_char_p,\n    ctypes.c_int,\n    ctypes.c_float,\n    ctypes.c_uint,\n    ctypes.c_size_t,\n)\n\n# supported numeric types\nCNumeric = Union[\n    ctypes.c_float,\n    ctypes.c_double,\n    ctypes.c_uint,\n    ctypes.c_uint64,\n    ctypes.c_int32,\n    ctypes.c_int64,\n]\n\n# c pointer types\nif TYPE_CHECKING:\n    CStrPtr = ctypes._Pointer[ctypes.c_char]\n\n    CStrPptr = ctypes._Pointer[ctypes.c_char_p]\n\n    CFloatPtr = ctypes._Pointer[ctypes.c_float]\n\n    CNumericPtr = Union[\n        ctypes._Pointer[ctypes.c_float],\n        ctypes._Pointer[ctypes.c_double],\n        ctypes._Pointer[ctypes.c_uint],\n        ctypes._Pointer[ctypes.c_uint64],\n        ctypes._Pointer[ctypes.c_int32],\n        ctypes._Pointer[ctypes.c_int64],\n    ]\nelse:\n    CStrPtr = ctypes._Pointer\n\n    CStrPptr = ctypes._Pointer\n\n    CFloatPtr = ctypes._Pointer\n\n    CNumericPtr = Union[\n        ctypes._Pointer,\n        ctypes._Pointer,\n        ctypes._Pointer,\n        ctypes._Pointer,\n        ctypes._Pointer,\n        ctypes._Pointer,\n    ]\n\n# The second arg is actually Optional[List[cudf.Series]], skipped for easier type check.\n# The cudf Series is the obtained cat codes, preserved in the `DataIter` to prevent it\n# being freed.\nTransformedData = Tuple[Any, Optional[FeatureNames], Optional[FeatureTypes]]\n\n# template parameter\n_T = TypeVar(\"_T\")\n_F = TypeVar(\"_F\", bound=Callable[..., Any])\n\n_ScoreList = Union[List[float], List[Tuple[float, float]]]\nEvalsLog: TypeAlias = Dict[str, Dict[str, _ScoreList]]\n\n\n@unique\nclass DataSplitMode(IntEnum):\n    \"\"\"Supported data split mode for DMatrix.\"\"\"\n\n    ROW = 0\n    COL = 1\n"
  },
  {
    "path": "python-package/xgboost/callback.py",
    "content": "\"\"\"Callback library containing training routines.  See :doc:`Callback Functions\n</python/callbacks>` for a quick introduction.\n\n\"\"\"\n\nimport collections\nimport os\nimport pickle\nfrom abc import ABC\nfrom typing import (\n    Any,\n    Callable,\n    Dict,\n    List,\n    Optional,\n    Sequence,\n    Tuple,\n    TypeAlias,\n    TypeVar,\n    Union,\n    cast,\n)\n\nimport numpy\n\nfrom . import collective\nfrom ._typing import EvalsLog, _ScoreList\nfrom .core import (\n    Booster,\n    DMatrix,\n    XGBoostError,\n    _deprecate_positional_args,\n    _parse_eval_str,\n)\n\n__all__ = [\n    \"TrainingCallback\",\n    \"LearningRateScheduler\",\n    \"EarlyStopping\",\n    \"EvaluationMonitor\",\n    \"TrainingCheckPoint\",\n    \"CallbackContainer\",\n]\n\n_Score = Union[float, Tuple[float, float]]\n\n_Model = Any  # real type is Union[Booster, CVPack]; need more work\n\n\n# pylint: disable=unused-argument\nclass TrainingCallback(ABC):\n    \"\"\"Interface for training callback.\n\n    .. versionadded:: 1.3.0\n\n    \"\"\"\n\n    EvalsLog: TypeAlias = EvalsLog\n\n    def __init__(self) -> None:\n        pass\n\n    def before_training(self, model: _Model) -> _Model:\n        \"\"\"Run before training starts.\"\"\"\n        return model\n\n    def after_training(self, model: _Model) -> _Model:\n        \"\"\"Run after training is finished.\"\"\"\n        return model\n\n    def before_iteration(self, model: _Model, epoch: int, evals_log: EvalsLog) -> bool:\n        \"\"\"Run before each iteration.  Returns True when training should stop. See\n        :py:meth:`after_iteration` for details.\n\n        \"\"\"\n        return False\n\n    def after_iteration(self, model: _Model, epoch: int, evals_log: EvalsLog) -> bool:\n        \"\"\"Run after each iteration.  Returns `True` when training should stop.\n\n        Parameters\n        ----------\n\n        model :\n            Eeither a :py:class:`~xgboost.Booster` object or a CVPack if the cv function\n            in xgboost is being used.\n        epoch :\n            The current training iteration.\n        evals_log :\n            A dictionary containing the evaluation history:\n\n            .. code-block:: python\n\n                {\"data_name\": {\"metric_name\": [0.5, ...]}}\n\n        \"\"\"\n        return False\n\n\ndef _aggcv(rlist: List[str]) -> List[Tuple[str, float, float]]:\n    # pylint: disable=invalid-name, too-many-locals\n    \"\"\"Aggregate cross-validation results.\"\"\"\n    cvmap: Dict[Tuple[int, str], List[float]] = {}\n    idx = rlist[0].split()[0]\n    for line in rlist:\n        arr: List[str] = line.split()\n        assert idx == arr[0]\n        for metric_idx, it in enumerate(arr[1:]):\n            if not isinstance(it, str):\n                it = it.decode()\n            k, v = it.split(\":\")\n            if (metric_idx, k) not in cvmap:\n                cvmap[(metric_idx, k)] = []\n            cvmap[(metric_idx, k)].append(float(v))\n    msg = idx\n    results = []\n    for (_, name), s in sorted(cvmap.items(), key=lambda x: x[0][0]):\n        as_arr = numpy.array(s)\n        if not isinstance(msg, str):\n            msg = msg.decode()\n        mean, std = numpy.mean(as_arr), numpy.std(as_arr)\n        results.extend([(name, mean, std)])\n    return results\n\n\n# allreduce type\n_ART = TypeVar(\"_ART\")\n\n\ndef _allreduce_metric(score: _ART) -> _ART:\n    \"\"\"Helper function for computing customized metric in distributed\n    environment.  Not strictly correct as many functions don't use mean value\n    as final result.\n\n    \"\"\"\n    world = collective.get_world_size()\n    assert world != 0\n    if world == 1:\n        return score\n    if isinstance(score, tuple):  # has mean and stdv\n        raise ValueError(\n            \"xgboost.cv function should not be used in distributed environment.\"\n        )\n    arr = numpy.array([score])\n    arr = collective.allreduce(arr, collective.Op.SUM) / world\n    return arr[0]\n\n\nclass CallbackContainer:\n    \"\"\"A special internal callback for invoking a list of other callbacks.\n\n    .. versionadded:: 1.3.0\n\n    \"\"\"\n\n    def __init__(\n        self,\n        callbacks: Sequence[TrainingCallback],\n        metric: Optional[Callable] = None,\n        output_margin: bool = True,\n        is_cv: bool = False,\n    ) -> None:\n        self.callbacks = list(dict.fromkeys(callbacks))\n        for cb in callbacks:\n            if not isinstance(cb, TrainingCallback):\n                raise TypeError(\"callback must be an instance of `TrainingCallback`.\")\n\n        msg = (\n            \"metric must be callable object for monitoring.  For builtin metrics\"\n            \", passing them in training parameter invokes monitor automatically.\"\n        )\n        if metric is not None and not callable(metric):\n            raise TypeError(msg)\n\n        self.metric = metric\n        self.history: EvalsLog = collections.OrderedDict()\n        self._output_margin = output_margin\n        self.is_cv = is_cv\n\n        if self.is_cv:\n            self.aggregated_cv: Optional[list[tuple[str, float, float]]] = None\n\n    def before_training(self, model: _Model) -> _Model:\n        \"\"\"Function called before training.\"\"\"\n        for c in self.callbacks:\n            model = c.before_training(model=model)\n            msg = \"before_training should return the model\"\n            if self.is_cv:\n                assert isinstance(model.cvfolds, list), msg\n            else:\n                assert isinstance(model, Booster), msg\n        return model\n\n    def after_training(self, model: _Model) -> _Model:\n        \"\"\"Function called after training.\"\"\"\n        for c in self.callbacks:\n            model = c.after_training(model=model)\n            msg = \"after_training should return the model\"\n            if self.is_cv:\n                assert isinstance(model.cvfolds, list), msg\n            else:\n                assert isinstance(model, Booster), msg\n\n        return model\n\n    def before_iteration(\n        self,\n        model: _Model,\n        epoch: int,\n        dtrain: DMatrix,\n        evals: Optional[List[Tuple[DMatrix, str]]],\n    ) -> bool:\n        \"\"\"Function called before training iteration.\"\"\"\n        return any(\n            c.before_iteration(model, epoch, self.history) for c in self.callbacks\n        )\n\n    def _update_history(\n        self,\n        score: Union[List[Tuple[str, float]], List[Tuple[str, float, float]]],\n        epoch: int,\n    ) -> None:\n        for d in score:\n            name: str = d[0]\n            s: float = d[1]\n            if self.is_cv:\n                std = float(cast(Tuple[str, float, float], d)[2])\n                x: _Score = (s, std)\n            else:\n                x = s\n            splited_names = name.split(\"-\")\n            data_name = splited_names[0]\n            metric_name = \"-\".join(splited_names[1:])\n            x = _allreduce_metric(x)\n            if data_name not in self.history:\n                self.history[data_name] = collections.OrderedDict()\n            data_history = self.history[data_name]\n            if metric_name not in data_history:\n                data_history[metric_name] = cast(_ScoreList, [])\n            metric_history = data_history[metric_name]\n            if self.is_cv:\n                cast(List[Tuple[float, float]], metric_history).append(\n                    cast(Tuple[float, float], x)\n                )\n            else:\n                cast(List[float], metric_history).append(cast(float, x))\n\n    def after_iteration(\n        self,\n        model: _Model,\n        epoch: int,\n        dtrain: DMatrix,\n        evals: Optional[List[Tuple[DMatrix, str]]],\n    ) -> bool:\n        \"\"\"Function called after training iteration.\"\"\"\n        if self.is_cv:\n            scores = model.eval(epoch, self.metric, self._output_margin)\n            scores = _aggcv(scores)\n            self.aggregated_cv = scores\n            self._update_history(scores, epoch)\n        else:\n            evals = [] if evals is None else evals\n            for _, name in evals:\n                assert name.find(\"-\") == -1, \"Dataset name should not contain `-`\"\n            score: str = model.eval_set(evals, epoch, self.metric, self._output_margin)\n            metric_score = _parse_eval_str(score)\n            self._update_history(metric_score, epoch)\n        ret = any(c.after_iteration(model, epoch, self.history) for c in self.callbacks)\n        return ret\n\n\nclass LearningRateScheduler(TrainingCallback):\n    \"\"\"Callback function for scheduling learning rate.\n\n    .. versionadded:: 1.3.0\n\n    Parameters\n    ----------\n\n    learning_rates :\n        If it's a callable object, then it should accept an integer parameter\n        `epoch` and returns the corresponding learning rate.  Otherwise it\n        should be a sequence like list or tuple with the same size of boosting\n        rounds.\n\n    \"\"\"\n\n    def __init__(\n        self, learning_rates: Union[Callable[[int], float], Sequence[float]]\n    ) -> None:\n        if not callable(learning_rates) and not isinstance(\n            learning_rates, collections.abc.Sequence\n        ):\n            raise TypeError(\n                \"Invalid learning rates, expecting callable or sequence, got: \"\n                f\"{type(learning_rates)}\"\n            )\n\n        if callable(learning_rates):\n            self.learning_rates = learning_rates\n        else:\n            self.learning_rates = lambda epoch: cast(Sequence, learning_rates)[epoch]\n        super().__init__()\n\n    def after_iteration(self, model: _Model, epoch: int, evals_log: EvalsLog) -> bool:\n        model.set_param(\"learning_rate\", self.learning_rates(epoch))\n        return False\n\n\n# pylint: disable=too-many-instance-attributes\nclass EarlyStopping(TrainingCallback):\n    \"\"\"Callback function for early stopping\n\n    .. versionadded:: 1.3.0\n\n    Parameters\n    ----------\n    rounds :\n        Early stopping rounds.\n    metric_name :\n        Name of metric that is used for early stopping.\n    data_name :\n        Name of dataset that is used for early stopping.\n    maximize :\n        Whether to maximize evaluation metric.  None means auto (discouraged).\n    save_best :\n        Whether training should return the best model or the last model. If set to\n        `True`, it will only keep the boosting rounds up to the detected best iteration,\n        discarding the ones that come after. This is only supported with tree methods\n        (not `gblinear`). Also, the `cv` function doesn't return a model, the parameter\n        is not applicable.\n    min_delta :\n\n        .. versionadded:: 1.5.0\n\n        Minimum absolute change in score to be qualified as an improvement.\n\n    Examples\n    --------\n\n    .. code-block:: python\n\n        es = xgboost.callback.EarlyStopping(\n            rounds=2,\n            min_delta=1e-3,\n            save_best=True,\n            maximize=False,\n            data_name=\"validation_0\",\n            metric_name=\"mlogloss\",\n        )\n        clf = xgboost.XGBClassifier(tree_method=\"hist\", device=\"cuda\", callbacks=[es])\n\n        X, y = load_digits(return_X_y=True)\n        clf.fit(X, y, eval_set=[(X, y)])\n    \"\"\"\n\n    # pylint: disable=too-many-arguments\n    @_deprecate_positional_args\n    def __init__(\n        self,\n        *,\n        rounds: int,\n        metric_name: Optional[str] = None,\n        data_name: Optional[str] = None,\n        maximize: Optional[bool] = None,\n        save_best: Optional[bool] = False,\n        min_delta: float = 0.0,\n    ) -> None:\n        self.data = data_name\n        self.metric_name = metric_name\n        self.rounds = rounds\n        self.save_best = save_best\n        self.maximize = maximize\n        self.stopping_history: EvalsLog = {}\n        self._min_delta = min_delta\n        if self._min_delta < 0:\n            raise ValueError(\"min_delta must be greater or equal to 0.\")\n\n        self.current_rounds: int = 0\n        self.best_scores: dict = {}\n        self.starting_round: int = 0\n        super().__init__()\n\n    def before_training(self, model: _Model) -> _Model:\n        self.starting_round = model.num_boosted_rounds()\n        if not isinstance(model, Booster) and self.save_best:\n            raise ValueError(\n                \"`save_best` is not applicable to the `cv` function as it doesn't\"\n                \" return a model.\"\n            )\n        return model\n\n    def _update_rounds(\n        self, *, score: _Score, name: str, metric: str, model: _Model, epoch: int\n    ) -> bool:\n        def get_s(value: _Score) -> float:\n            \"\"\"get score if it's cross validation history.\"\"\"\n            return value[0] if isinstance(value, tuple) else value\n\n        def maximize(new: _Score, best: _Score) -> bool:\n            \"\"\"New score should be greater than the old one.\"\"\"\n            return numpy.greater(get_s(new) - self._min_delta, get_s(best))\n\n        def minimize(new: _Score, best: _Score) -> bool:\n            \"\"\"New score should be lesser than the old one.\"\"\"\n            return numpy.greater(get_s(best) - self._min_delta, get_s(new))\n\n        if self.maximize is None:\n            # Just to be compatibility with old behavior before 1.3.  We should let\n            # user to decide.\n            maximize_metrics = (\n                \"auc\",\n                \"aucpr\",\n                \"pre\",\n                \"pre@\",\n                \"map\",\n                \"ndcg\",\n                \"auc@\",\n                \"aucpr@\",\n                \"map@\",\n                \"ndcg@\",\n            )\n            if metric != \"mape\" and any(metric.startswith(x) for x in maximize_metrics):\n                self.maximize = True\n            else:\n                self.maximize = False\n\n        if self.maximize:\n            improve_op = maximize\n        else:\n            improve_op = minimize\n\n        if not self.stopping_history:  # First round\n            self.current_rounds = 0\n            self.stopping_history[name] = {}\n            self.stopping_history[name][metric] = cast(_ScoreList, [score])\n            self.best_scores[name] = {}\n            self.best_scores[name][metric] = [score]\n            model.set_attr(best_score=str(get_s(score)), best_iteration=str(epoch))\n        elif not improve_op(score, self.best_scores[name][metric][-1]):\n            # Not improved\n            self.stopping_history[name][metric].append(score)  # type: ignore[arg-type]\n            self.current_rounds += 1\n        else:  # Improved\n            self.stopping_history[name][metric].append(score)  # type: ignore[arg-type]\n            self.best_scores[name][metric].append(score)\n            record = self.stopping_history[name][metric][-1]\n            model.set_attr(best_score=str(get_s(record)), best_iteration=str(epoch))\n            self.current_rounds = 0  # reset\n\n        if self.current_rounds >= self.rounds:\n            # Should stop\n            return True\n        return False\n\n    def after_iteration(self, model: _Model, epoch: int, evals_log: EvalsLog) -> bool:\n        epoch += self.starting_round  # training continuation\n        msg = \"Must have at least 1 validation dataset for early stopping.\"\n        if len(evals_log.keys()) < 1:\n            raise ValueError(msg)\n\n        # Get data name\n        if self.data:\n            data_name = self.data\n        else:\n            # Use the last one as default.\n            data_name = list(evals_log.keys())[-1]\n        if data_name not in evals_log:\n            raise ValueError(f\"No dataset named: {data_name}\")\n\n        if not isinstance(data_name, str):\n            raise TypeError(\n                f\"The name of the dataset should be a string. Got: {type(data_name)}\"\n            )\n        data_log = evals_log[data_name]\n\n        # Get metric name\n        if self.metric_name:\n            metric_name = self.metric_name\n        else:\n            # Use last metric by default.\n            metric_name = list(data_log.keys())[-1]\n        if metric_name not in data_log:\n            raise ValueError(f\"No metric named: {metric_name}\")\n\n        # The latest score\n        score = data_log[metric_name][-1]\n        return self._update_rounds(\n            score=score, name=data_name, metric=metric_name, model=model, epoch=epoch\n        )\n\n    def after_training(self, model: _Model) -> _Model:\n        if not self.save_best:\n            return model\n\n        try:\n            best_iteration = model.best_iteration\n            best_score = model.best_score\n            assert best_iteration is not None and best_score is not None\n            model = model[: best_iteration + 1]\n            model.best_iteration = best_iteration\n            model.best_score = best_score\n        except XGBoostError as e:\n            raise XGBoostError(\n                \"`save_best` is not applicable to the current booster\"\n            ) from e\n\n        return model\n\n\nclass EvaluationMonitor(TrainingCallback):\n    \"\"\"Print the evaluation result at each iteration.\n\n    .. versionadded:: 1.3.0\n\n    Parameters\n    ----------\n\n    rank :\n        Which worker should be used for printing the result.\n    period :\n        How many epoches between printing.\n    show_stdv :\n        Used in cv to show standard deviation.  Users should not specify it.\n    logger :\n        A callable used for logging evaluation result.\n\n    \"\"\"\n\n    def __init__(\n        self,\n        rank: int = 0,\n        period: int = 1,\n        show_stdv: bool = False,\n        logger: Callable[[str], None] = collective.communicator_print,\n    ):\n        self.printer_rank = rank\n        self.show_stdv = show_stdv\n        self.period = period\n        self._logger = logger\n        assert period > 0\n        # last error message, useful when early stopping and period are used together.\n        self._latest: Optional[str] = None\n        super().__init__()\n\n    def _fmt_metric(\n        self, data: str, metric: str, score: float, std: Optional[float]\n    ) -> str:\n        if std is not None and self.show_stdv:\n            msg = f\"\\t{data + '-' + metric}:{score:.5f}+{std:.5f}\"\n        else:\n            msg = f\"\\t{data + '-' + metric}:{score:.5f}\"\n        return msg\n\n    def after_iteration(self, model: _Model, epoch: int, evals_log: EvalsLog) -> bool:\n        if not evals_log:\n            return False\n\n        msg: str = f\"[{epoch}]\"\n        if collective.get_rank() == self.printer_rank:\n            for data, metric in evals_log.items():\n                for metric_name, log in metric.items():\n                    stdv: Optional[float] = None\n                    if isinstance(log[-1], tuple):\n                        score = log[-1][0]\n                        stdv = log[-1][1]\n                    else:\n                        score = log[-1]\n                    msg += self._fmt_metric(data, metric_name, score, stdv)\n            msg += \"\\n\"\n\n            if (epoch % self.period) == 0 or self.period == 1:\n                self._logger(msg)\n                self._latest = None\n            else:\n                # There is skipped message\n                self._latest = msg\n        return False\n\n    def after_training(self, model: _Model) -> _Model:\n        if collective.get_rank() == self.printer_rank and self._latest is not None:\n            self._logger(self._latest)\n        return model\n\n\nclass TrainingCheckPoint(TrainingCallback):\n    \"\"\"Checkpointing operation. Users are encouraged to create their own callbacks for\n    checkpoint as XGBoost doesn't handle distributed file systems. When checkpointing on\n    distributed systems, be sure to know the rank of the worker to avoid multiple\n    workers checkpointing to the same place.\n\n    .. versionadded:: 1.3.0\n\n    Since XGBoost 2.1.0, the default format is changed to UBJSON.\n\n    Parameters\n    ----------\n\n    directory :\n        Output model directory.\n    name :\n        pattern of output model file.  Models will be saved as name_0.ubj, name_1.ubj,\n        name_2.ubj ....\n    as_pickle :\n        When set to True, all training parameters will be saved in pickle format,\n        instead of saving only the model.\n    interval :\n        Interval of checkpointing.  Checkpointing is slow so setting a larger number can\n        reduce performance hit.\n\n    \"\"\"\n\n    default_format = \"ubj\"\n\n    def __init__(\n        self,\n        directory: Union[str, os.PathLike],\n        name: str = \"model\",\n        as_pickle: bool = False,\n        interval: int = 100,\n    ) -> None:\n        self._path = os.fspath(directory)\n        self._name = name\n        self._as_pickle = as_pickle\n        self._iterations = interval\n        self._epoch = 0  # counter for iterval\n        self._start = 0  # beginning iteration\n        super().__init__()\n\n    def before_training(self, model: _Model) -> _Model:\n        self._start = model.num_boosted_rounds()\n        return model\n\n    def after_iteration(self, model: _Model, epoch: int, evals_log: EvalsLog) -> bool:\n        if self._epoch == self._iterations:\n            path = os.path.join(\n                self._path,\n                self._name\n                + \"_\"\n                + (str(epoch + self._start))\n                + (\".pkl\" if self._as_pickle else f\".{self.default_format}\"),\n            )\n            self._epoch = 0  # reset counter\n            if collective.get_rank() == 0:\n                # checkpoint using the first worker\n                if self._as_pickle:\n                    with open(path, \"wb\") as fd:\n                        pickle.dump(model, fd)\n                else:\n                    model.save_model(path)\n        self._epoch += 1\n        return False\n"
  },
  {
    "path": "python-package/xgboost/collective.py",
    "content": "\"\"\"XGBoost collective communication related API.\"\"\"\n\nimport ctypes\nimport logging\nimport os\nimport pickle\nfrom dataclasses import dataclass\nfrom enum import IntEnum, unique\nfrom typing import Any, Callable, Dict, Optional, TypeAlias, Union\n\nimport numpy as np\n\nfrom ._typing import _T\nfrom .core import _LIB, _check_call, build_info, c_str, make_jcargs, py_str\n\nLOGGER = logging.getLogger(\"[xgboost.collective]\")\n\n\n_Conf: TypeAlias = Dict[str, Union[int, str]]\n_ArgVals: TypeAlias = Optional[Union[int, str]]\n_Args: TypeAlias = Dict[str, _ArgVals]\n\n\n@dataclass\nclass Config:\n    \"\"\"User configuration for the communicator context. This is used for easier\n    integration with distributed frameworks. Users of the collective module can pass the\n    parameters directly into tracker and the communicator.\n\n    .. versionadded:: 3.0\n\n    Attributes\n    ----------\n    retry : See `dmlc_retry` in :py:meth:`init`.\n\n    timeout :\n        See `dmlc_timeout` in :py:meth:`init`. This is only used for communicators, not\n        the tracker. They are different parameters since the timeout for tracker limits\n        only the time for starting and finalizing the communication group, whereas the\n        timeout for communicators limits the time used for collective operations, like\n        :py:meth:`allreduce`.\n\n    tracker_host_ip : See :py:class:`~xgboost.tracker.RabitTracker`.\n\n    tracker_port : See :py:class:`~xgboost.tracker.RabitTracker`.\n\n    tracker_timeout : See :py:class:`~xgboost.tracker.RabitTracker`.\n\n    worker_port :\n\n        The port each worker listens to for peer-to-peer connections. By default,\n        workers use an available port assigned by the OS. This option can be used in\n        restricted network environments where only specific ports are open.\n\n        This can be an integer for a fixed port used by all workers, or a callback\n        function that takes no arguments and returns a port number. The callback is\n        invoked per-worker at the worker side.\n\n        .. note::\n\n            The option does not affect the NCCL communicator group, which must be\n            configured via NCCL's own environment variables.\n\n    \"\"\"\n\n    retry: Optional[int] = None\n    timeout: Optional[int] = None\n\n    tracker_host_ip: Optional[str] = None\n    tracker_port: Optional[int] = None\n    tracker_timeout: Optional[int] = None\n\n    worker_port: Optional[Union[Callable[[], int], int]] = None\n\n    def update_worker_args(self, args: _Conf) -> _Conf:\n        \"\"\"Worker side arguments resolution.\"\"\"\n        if self.worker_port is None:\n            return args\n        if callable(self.worker_port):\n            args[\"dmlc_worker_port\"] = self.worker_port()\n        else:\n            args[\"dmlc_worker_port\"] = self.worker_port\n        return args\n\n    def get_comm_config(self, args: _Conf) -> _Conf:\n        \"\"\"Update the arguments for the communicator.\"\"\"\n        if self.retry is not None:\n            args[\"dmlc_retry\"] = self.retry\n        if self.timeout is not None:\n            args[\"dmlc_timeout\"] = self.timeout\n        return args\n\n\ndef init(**args: _ArgVals) -> None:\n    \"\"\"Initialize the collective library with arguments.\n\n    Parameters\n    ----------\n    args :\n        Keyword arguments representing the parameters and their values.\n\n        Accepted parameters:\n          - dmlc_communicator: The type of the communicator.\n            * rabit: Use Rabit. This is the default if the type is unspecified.\n            * federated: Use the gRPC interface for Federated Learning.\n\n        Only applicable to the Rabit communicator:\n          - dmlc_tracker_uri: Hostname of the tracker.\n          - dmlc_tracker_port: Port number of the tracker.\n          - dmlc_task_id: ID of the current task, can be used to obtain deterministic\n          - dmlc_retry: The number of retry when handling network errors.\n          - dmlc_timeout: Timeout in seconds.\n          - dmlc_nccl_path: Path to load (dlopen) nccl for GPU-based communication.\n\n        Only applicable to the Federated communicator:\n          - federated_server_address: Address of the federated server.\n          - federated_world_size: Number of federated workers.\n          - federated_rank: Rank of the current worker.\n          - federated_server_cert: Server certificate file path. Only needed for the SSL\n            mode.\n          - federated_client_key: Client key file path. Only needed for the SSL mode.\n          - federated_client_cert: Client certificate file path. Only needed for the SSL\n            mode.\n\n        Use upper case for environment variables, use lower case for runtime\n        configuration.\n\n    \"\"\"\n    _check_call(_LIB.XGCommunicatorInit(make_jcargs(**args)))\n\n\ndef finalize() -> None:\n    \"\"\"Finalize the communicator.\"\"\"\n    _check_call(_LIB.XGCommunicatorFinalize())\n\n\ndef get_rank() -> int:\n    \"\"\"Get rank of current process.\n\n    Returns\n    -------\n    rank : int\n        Rank of current process.\n    \"\"\"\n    ret = _LIB.XGCommunicatorGetRank()\n    return ret\n\n\ndef get_world_size() -> int:\n    \"\"\"Get total number workers.\n\n    Returns\n    -------\n    n :\n        Total number of process.\n    \"\"\"\n    ret = _LIB.XGCommunicatorGetWorldSize()\n    return ret\n\n\ndef is_distributed() -> bool:\n    \"\"\"If the collective communicator is distributed.\"\"\"\n    is_dist = _LIB.XGCommunicatorIsDistributed()\n    return bool(is_dist)\n\n\ndef communicator_print(msg: Any) -> None:\n    \"\"\"Print message to the communicator.\n\n    This function can be used to communicate the information of\n    the progress to the communicator.\n\n    Parameters\n    ----------\n    msg : str\n        The message to be printed to the communicator.\n    \"\"\"\n    if not isinstance(msg, str):\n        msg = str(msg)\n    is_dist = _LIB.XGCommunicatorIsDistributed()\n    if is_dist != 0:\n        _check_call(_LIB.XGCommunicatorPrint(c_str(msg.strip())))\n    else:\n        print(msg.strip(), flush=True)\n\n\ndef get_processor_name() -> str:\n    \"\"\"Get the processor name.\n\n    Returns\n    -------\n    name :\n        The name of processor(host)\n    \"\"\"\n    name_str = ctypes.c_char_p()\n    _check_call(_LIB.XGCommunicatorGetProcessorName(ctypes.byref(name_str)))\n    value = name_str.value\n    return py_str(value)\n\n\ndef broadcast(data: _T, root: int) -> _T:\n    \"\"\"Broadcast object from one node to all other nodes.\n\n    Parameters\n    ----------\n    data : any type that can be pickled\n        Input data, if current rank does not equal root, this can be None\n    root : int\n        Rank of the node to broadcast data from.\n\n    Returns\n    -------\n    object : int\n        the result of broadcast.\n    \"\"\"\n    rank = get_rank()\n    length = ctypes.c_ulong()\n    if root == rank:\n        assert data is not None, \"need to pass in data when broadcasting\"\n        s = pickle.dumps(data, protocol=pickle.HIGHEST_PROTOCOL)\n        length.value = len(s)\n    # Run first broadcast\n    _check_call(\n        _LIB.XGCommunicatorBroadcast(\n            ctypes.byref(length), ctypes.sizeof(ctypes.c_ulong), root\n        )\n    )\n    if root != rank:\n        dptr = (ctypes.c_char * length.value)()\n        # run second\n        _check_call(\n            _LIB.XGCommunicatorBroadcast(\n                ctypes.cast(dptr, ctypes.c_void_p), length.value, root\n            )\n        )\n        data = pickle.loads(dptr.raw)\n        del dptr\n    else:\n        _check_call(\n            _LIB.XGCommunicatorBroadcast(\n                ctypes.cast(ctypes.c_char_p(s), ctypes.c_void_p), length.value, root\n            )\n        )\n        del s\n    return data\n\n\n# enumeration of dtypes\ndef _map_dtype(dtype: np.dtype) -> int:\n    dtype_map = {\n        np.dtype(\"float16\"): 0,\n        np.dtype(\"float32\"): 1,\n        np.dtype(\"float64\"): 2,\n        np.dtype(\"int8\"): 4,\n        np.dtype(\"int16\"): 5,\n        np.dtype(\"int32\"): 6,\n        np.dtype(\"int64\"): 7,\n        np.dtype(\"uint8\"): 8,\n        np.dtype(\"uint16\"): 9,\n        np.dtype(\"uint32\"): 10,\n        np.dtype(\"uint64\"): 11,\n    }\n    try:\n        dtype_map.update({np.dtype(\"float128\"): 3})\n    except TypeError:  # float128 doesn't exist on the system\n        pass\n\n    if dtype not in dtype_map:\n        raise TypeError(f\"data type {dtype} is not supported on the current platform.\")\n\n    return dtype_map[dtype]\n\n\n@unique\nclass Op(IntEnum):\n    \"\"\"Supported operations for allreduce.\"\"\"\n\n    MAX = 0\n    MIN = 1\n    SUM = 2\n    BITWISE_AND = 3\n    BITWISE_OR = 4\n    BITWISE_XOR = 5\n\n\ndef allreduce(data: np.ndarray, op: Op) -> np.ndarray:\n    \"\"\"Perform allreduce, return the result.\n\n    Parameters\n    ----------\n    data :\n        Input data.\n    op :\n        Reduction operator.\n\n    Returns\n    -------\n    result :\n        The result of allreduce, have same shape as data\n\n    Notes\n    -----\n    This function is not thread-safe.\n    \"\"\"\n    if not isinstance(data, np.ndarray):\n        raise TypeError(\"allreduce only takes in numpy.ndarray\")\n    buf = data.ravel().copy()\n    _check_call(\n        _LIB.XGCommunicatorAllreduce(\n            buf.ctypes.data_as(ctypes.c_void_p),\n            buf.size,\n            _map_dtype(buf.dtype),\n            int(op),\n        )\n    )\n    return buf\n\n\ndef signal_error() -> None:\n    \"\"\"Kill the process.\"\"\"\n    _check_call(_LIB.XGCommunicatorSignalError())\n\n\ndef _find_nccl() -> Optional[str]:\n    from nvidia.nccl import lib\n\n    # There are two versions of nvidia-nccl, one is from PyPI, another one from\n    # nvidia-pyindex. We support only the first one as the second one is too old (2.9.8\n    # as of writing).\n    #\n    # nccl 2.28 doesn't have the __file__ attribute, we use the namespace path instead.\n    if lib.__file__ is not None:\n        dirname: Optional[str] = os.path.dirname(lib.__file__)\n    elif hasattr(lib, \"__path__\") and len(lib.__path__) > 0:\n        dirname = lib.__path__[0]\n    else:\n        dirname = None\n    if not dirname:\n        return None\n\n    # Find the first shared object in the lib directory.\n    files = os.listdir(dirname)\n    if not files:\n        return None\n\n    libname: Optional[str] = None\n    for name in files:\n        if name.startswith(\"libnccl.so\"):\n            libname = name\n            break\n\n    if libname is not None:\n        path = os.path.join(dirname, libname)\n        return path\n    return None\n\n\nclass CommunicatorContext:\n    \"\"\"A context controlling collective communicator initialization and finalization.\"\"\"\n\n    def __init__(self, **args: _ArgVals) -> None:\n        self.args = args\n        key = \"dmlc_nccl_path\"\n        if args.get(key, None) is not None:\n            return\n\n        binfo = build_info()\n        if not binfo[\"USE_DLOPEN_NCCL\"]:\n            return\n\n        try:\n            # PyPI package of NCCL.\n            path = _find_nccl()\n            if path:\n                self.args[key] = path\n        except ImportError:\n            pass\n\n    def __enter__(self) -> _Args:\n        init(**self.args)\n        assert is_distributed()\n        LOGGER.debug(\"-------------- communicator say hello ------------------\")\n        return self.args\n\n    def __exit__(self, *args: Any) -> None:\n        finalize()\n        LOGGER.debug(\"--------------- communicator say bye ------------------\")\n"
  },
  {
    "path": "python-package/xgboost/compat.py",
    "content": "# pylint: disable=unused-import\n\"\"\"For compatibility and optional dependencies.\"\"\"\n\nimport functools\nimport importlib.util\nimport logging\nimport sys\nimport types\nfrom typing import TYPE_CHECKING, Any, Sequence, TypeGuard, cast\n\nimport numpy as np\n\nfrom ._typing import _T, DataType\n\nif TYPE_CHECKING:\n    import pandas as pd\n    import pyarrow as pa\n\nassert sys.version_info[0] == 3, \"Python 2 is no longer supported.\"\n\n\ndef py_str(x: bytes | None) -> str:\n    \"\"\"convert c string back to python string\"\"\"\n    assert x is not None  # ctypes might return None\n    return x.decode(\"utf-8\")  # type: ignore[union-attr]\n\n\ndef lazy_isinstance(instance: Any, module: str, name: str) -> bool:\n    \"\"\"Use string representation to identify a type.\"\"\"\n\n    # Notice, we use .__class__ as opposed to type() in order\n    # to support object proxies such as weakref.proxy\n    cls = instance.__class__\n    is_same_module = cls.__module__ == module\n    has_same_name = cls.__name__ == name\n    return is_same_module and has_same_name\n\n\n# sklearn\ntry:\n    from sklearn import __version__ as _sklearn_version\n    from sklearn.base import BaseEstimator as XGBModelBase\n    from sklearn.base import ClassifierMixin as XGBClassifierBase\n    from sklearn.base import RegressorMixin as XGBRegressorBase\n\n    try:\n        from sklearn.model_selection import StratifiedKFold as XGBStratifiedKFold\n    except ImportError:\n        from sklearn.cross_validation import StratifiedKFold as XGBStratifiedKFold\n\n    # sklearn.utils Tags types can be imported unconditionally once\n    # xgboost's minimum scikit-learn version is 1.6 or higher\n    try:\n        from sklearn.utils import Tags as _sklearn_Tags\n    except ImportError:\n        _sklearn_Tags = object\n\n    SKLEARN_INSTALLED = True\n\nexcept ImportError:\n    SKLEARN_INSTALLED = False\n\n    # used for compatibility without sklearn\n    class XGBModelBase:  # type: ignore[no-redef]\n        \"\"\"Dummy class for sklearn.base.BaseEstimator.\"\"\"\n\n    class XGBClassifierBase:  # type: ignore[no-redef]\n        \"\"\"Dummy class for sklearn.base.ClassifierMixin.\"\"\"\n\n    class XGBRegressorBase:  # type: ignore[no-redef]\n        \"\"\"Dummy class for sklearn.base.RegressorMixin.\"\"\"\n\n    XGBStratifiedKFold = None\n\n    _sklearn_Tags = object\n    _sklearn_version = object\n\n\n_logger = logging.getLogger(__name__)\n\n\n@functools.cache\ndef is_cudf_available() -> bool:\n    \"\"\"Check cuDF package available or not\"\"\"\n    if importlib.util.find_spec(\"cudf\") is None:\n        return False\n    try:\n        import cudf\n\n        return True\n    except ImportError:\n        _logger.exception(\"Importing cuDF failed, use DMatrix instead of QDM\")\n        return False\n\n\n@functools.cache\ndef is_cupy_available() -> bool:\n    \"\"\"Check cupy package available or not\"\"\"\n    if importlib.util.find_spec(\"cupy\") is None:\n        return False\n    try:\n        import cupy\n\n        return True\n    except ImportError:\n        return False\n\n\n@functools.cache\ndef import_cupy() -> types.ModuleType:\n    \"\"\"Import cupy.\"\"\"\n    if not is_cupy_available():\n        raise ImportError(\"`cupy` is required for handling CUDA buffer.\")\n\n    import cupy\n\n    return cupy\n\n\n@functools.cache\ndef is_pyarrow_available() -> bool:\n    \"\"\"Check pyarrow package available or not\"\"\"\n    if importlib.util.find_spec(\"pyarrow\") is None:\n        return False\n    return True\n\n\n@functools.cache\ndef import_pyarrow() -> types.ModuleType:\n    \"\"\"Import pyarrow with memory cache.\"\"\"\n    import pyarrow as pa\n\n    return pa\n\n\n@functools.cache\ndef import_pandas() -> types.ModuleType:\n    \"\"\"Import pandas with memory cache.\"\"\"\n    import pandas as pd\n\n    return pd\n\n\n@functools.cache\ndef import_polars() -> types.ModuleType:\n    \"\"\"Import polars with memory cache.\"\"\"\n    import polars as pl\n\n    return pl\n\n\n@functools.cache\ndef is_pandas_available() -> bool:\n    \"\"\"Check the pandas package is available or not.\"\"\"\n    if importlib.util.find_spec(\"pandas\") is None:\n        return False\n    return True\n\n\ntry:\n    import scipy.sparse as scipy_sparse\n    from scipy.sparse import csr_matrix as scipy_csr\nexcept ImportError:\n    scipy_sparse = False\n    scipy_csr = object\n\n\ndef _is_polars_lazyframe(data: DataType) -> bool:\n    return lazy_isinstance(data, \"polars.lazyframe.frame\", \"LazyFrame\")\n\n\ndef _is_polars_series(data: DataType) -> bool:\n    return lazy_isinstance(data, \"polars.series.series\", \"Series\")\n\n\ndef _is_polars(data: DataType) -> bool:\n    lf = _is_polars_lazyframe(data)\n    df = lazy_isinstance(data, \"polars.dataframe.frame\", \"DataFrame\")\n    return lf or df\n\n\ndef _is_arrow(data: DataType) -> TypeGuard[\"pa.Table\"]:\n    return lazy_isinstance(data, \"pyarrow.lib\", \"Table\")\n\n\ndef _is_cudf_df(data: DataType) -> bool:\n    return lazy_isinstance(data, \"cudf.core.dataframe\", \"DataFrame\")\n\n\ndef _is_cudf_ser(data: DataType) -> bool:\n    return lazy_isinstance(data, \"cudf.core.series\", \"Series\")\n\n\ndef _is_cudf_pandas(data: DataType) -> bool:\n    \"\"\"Must go before both pandas and cudf checks.\"\"\"\n    return (_is_pandas_df(data) or _is_pandas_series(data)) and lazy_isinstance(\n        type(data), \"cudf.pandas.fast_slow_proxy\", \"_FastSlowProxyMeta\"\n    )\n\n\ndef _is_pandas_df(data: DataType) -> TypeGuard[\"pd.DataFrame\"]:\n    return lazy_isinstance(data, \"pandas.core.frame\", \"DataFrame\") or lazy_isinstance(\n        data, \"pandas\", \"DataFrame\"\n    )\n\n\ndef _is_pandas_series(data: DataType) -> TypeGuard[\"pd.Series\"]:\n    return lazy_isinstance(data, \"pandas.core.series\", \"Series\") or lazy_isinstance(\n        data, \"pandas\", \"Series\"\n    )\n\n\ndef _is_modin_df(data: DataType) -> bool:\n    return lazy_isinstance(data, \"modin.pandas.dataframe\", \"DataFrame\")\n\n\ndef _is_modin_series(data: DataType) -> bool:\n    return lazy_isinstance(data, \"modin.pandas.series\", \"Series\")\n\n\ndef is_dataframe(data: DataType) -> bool:\n    \"\"\"Whether the input is a dataframe. Currently supported dataframes:\n\n    - pandas\n    - cudf\n    - cudf.pandas\n    - polars\n    - pyarrow\n    - modin\n\n\n    \"\"\"\n    return any(\n        p(data)\n        for p in (\n            _is_polars,\n            _is_polars_series,\n            _is_arrow,\n            _is_cudf_df,\n            _is_cudf_ser,\n            _is_cudf_pandas,\n            _is_pandas_df,\n            _is_pandas_series,\n            _is_modin_df,\n            _is_modin_series,\n        )\n    )\n\n\ndef _is_cupy_alike(data: DataType) -> bool:\n    return hasattr(data, \"__cuda_array_interface__\")\n\n\ndef concat(value: Sequence[_T]) -> _T:  # pylint: disable=too-many-return-statements\n    \"\"\"Concatenate row-wise.\"\"\"\n    if isinstance(value[0], np.ndarray):\n        value_arr = cast(Sequence[np.ndarray], value)\n        return np.concatenate(value_arr, axis=0)\n    if scipy_sparse and isinstance(value[0], scipy_sparse.csr_matrix):\n        return scipy_sparse.vstack(value, format=\"csr\")\n    if scipy_sparse and isinstance(value[0], scipy_sparse.csc_matrix):\n        return scipy_sparse.vstack(value, format=\"csc\")\n    if scipy_sparse and isinstance(value[0], scipy_sparse.spmatrix):\n        # other sparse format will be converted to CSR.\n        return scipy_sparse.vstack(value, format=\"csr\")\n    if _is_pandas_df(value[0]) or _is_pandas_series(value[0]):\n        from pandas import concat as pd_concat\n\n        return pd_concat(value, axis=0)\n    if lazy_isinstance(value[0], \"cudf.core.dataframe\", \"DataFrame\") or lazy_isinstance(\n        value[0], \"cudf.core.series\", \"Series\"\n    ):\n        from cudf import concat as CUDF_concat\n\n        return CUDF_concat(value, axis=0)\n    if _is_cupy_alike(value[0]):\n        import cupy\n\n        # pylint: disable=c-extension-no-member,no-member\n        d = cupy.cuda.runtime.getDevice()\n        for v in value:\n            arr = cast(cupy.ndarray, v)\n            d_v = arr.device.id\n            assert d_v == d, \"Concatenating arrays on different devices.\"\n        return cupy.concatenate(value, axis=0)\n    raise TypeError(f\"Unknown type: {type(value[0])}\")\n"
  },
  {
    "path": "python-package/xgboost/config.py",
    "content": "# pylint: disable=missing-function-docstring\n\"\"\"Global configuration for XGBoost\"\"\"\n\nimport ctypes\nimport json\nfrom contextlib import contextmanager\nfrom functools import wraps\nfrom typing import Any, Callable, Dict, Iterator, Optional, cast\n\nfrom ._typing import _F\nfrom .core import _LIB, _check_call, c_str, py_str\n\n\ndef config_doc(\n    *,\n    header: Optional[str] = None,\n    extra_note: Optional[str] = None,\n    parameters: Optional[str] = None,\n    returns: Optional[str] = None,\n    see_also: Optional[str] = None,\n) -> Callable[[_F], _F]:\n    \"\"\"Decorator to format docstring for config functions.\n\n    Parameters\n    ----------\n    header: str\n        An introducion to the function\n    extra_note: str\n        Additional notes\n    parameters: str\n        Parameters of the function\n    returns: str\n        Return value\n    see_also: str\n        Related functions\n    \"\"\"\n\n    doc_template = \"\"\"\n    {header}\n\n    Global configuration consists of a collection of parameters that can be applied in the\n    global scope. See :ref:`global_config` for the full list of parameters supported in\n    the global configuration.\n\n    {extra_note}\n\n    .. versionadded:: 1.4.0\n    \"\"\"\n\n    common_example = \"\"\"\n    Example\n    -------\n\n    .. code-block:: python\n\n        import xgboost as xgb\n\n        # Show all messages, including ones pertaining to debugging\n        xgb.set_config(verbosity=2)\n\n        # Get current value of global configuration\n        # This is a dict containing all parameters in the global configuration,\n        # including 'verbosity'\n        config = xgb.get_config()\n        assert config['verbosity'] == 2\n\n        # Example of using the context manager xgb.config_context().\n        # The context manager will restore the previous value of the global\n        # configuration upon exiting.\n        with xgb.config_context(verbosity=0):\n            # Suppress warning caused by model generated with XGBoost version < 1.0.0\n            bst = xgb.Booster(model_file='./old_model.bin')\n        assert xgb.get_config()['verbosity'] == 2  # old value restored\n\n    Nested configuration context is also supported:\n\n    Example\n    -------\n\n    .. code-block:: python\n\n        with xgb.config_context(verbosity=3):\n            assert xgb.get_config()[\"verbosity\"] == 3\n            with xgb.config_context(verbosity=2):\n                assert xgb.get_config()[\"verbosity\"] == 2\n\n        xgb.set_config(verbosity=2)\n        assert xgb.get_config()[\"verbosity\"] == 2\n        with xgb.config_context(verbosity=3):\n            assert xgb.get_config()[\"verbosity\"] == 3\n    \"\"\"\n\n    def none_to_str(value: Optional[str]) -> str:\n        return \"\" if value is None else value\n\n    def config_doc_decorator(func: _F) -> _F:\n        func.__doc__ = (\n            doc_template.format(\n                header=none_to_str(header), extra_note=none_to_str(extra_note)\n            )\n            + none_to_str(parameters)\n            + none_to_str(returns)\n            + none_to_str(common_example)\n            + none_to_str(see_also)\n        )\n\n        @wraps(func)\n        def wrap(*args: Any, **kwargs: Any) -> Any:\n            return func(*args, **kwargs)\n\n        return cast(_F, wrap)\n\n    return config_doc_decorator\n\n\n@config_doc(\n    header=\"\"\"\n    Set global configuration.\n    \"\"\",\n    parameters=\"\"\"\n    Parameters\n    ----------\n    new_config: Dict[str, Any]\n        Keyword arguments representing the parameters and their values\n            \"\"\",\n)\ndef set_config(**new_config: Any) -> None:\n    not_none = {}\n    for k, v in new_config.items():\n        if v is not None:\n            not_none[k] = v\n    config = json.dumps(not_none)\n    _check_call(_LIB.XGBSetGlobalConfig(c_str(config)))\n\n\n@config_doc(\n    header=\"\"\"\n    Get current values of the global configuration.\n    \"\"\",\n    returns=\"\"\"\n    Returns\n    -------\n    args: Dict[str, Any]\n        The list of global parameters and their values\n            \"\"\",\n)\ndef get_config() -> Dict[str, Any]:\n    config_str = ctypes.c_char_p()\n    _check_call(_LIB.XGBGetGlobalConfig(ctypes.byref(config_str)))\n    value = config_str.value\n    assert value\n    config = json.loads(py_str(value))\n    return config\n\n\n@contextmanager\n@config_doc(\n    header=\"\"\"\n    Context manager for global XGBoost configuration.\n    \"\"\",\n    parameters=\"\"\"\n    Parameters\n    ----------\n    new_config: Dict[str, Any]\n        Keyword arguments representing the parameters and their values\n            \"\"\",\n    extra_note=\"\"\"\n    .. note::\n\n        All settings, not just those presently modified, will be returned to their\n        previous values when the context manager is exited. This is not thread-safe.\n            \"\"\",\n    see_also=\"\"\"\n    See Also\n    --------\n    set_config: Set global XGBoost configuration\n    get_config: Get current values of the global configuration\n            \"\"\",\n)\ndef config_context(**new_config: Any) -> Iterator[None]:\n    old_config = get_config().copy()\n    set_config(**new_config)\n\n    try:\n        yield\n    finally:\n        set_config(**old_config)\n"
  },
  {
    "path": "python-package/xgboost/core.py",
    "content": "# pylint: disable=too-many-arguments, too-many-branches\n# pylint: disable=too-many-lines, too-many-locals\n\"\"\"Core XGBoost Library.\"\"\"\n\nimport copy\nimport ctypes\nimport json\nimport os\nimport re\nimport sys\nimport warnings\nimport weakref\nfrom abc import ABC, abstractmethod\nfrom collections.abc import Mapping\nfrom functools import wraps\nfrom inspect import Parameter, signature\nfrom types import EllipsisType\nfrom typing import (\n    TYPE_CHECKING,\n    Any,\n    Callable,\n    Dict,\n    Generator,\n    Iterable,\n    List,\n    Optional,\n    Sequence,\n    Tuple,\n    Type,\n    TypeGuard,\n    TypeVar,\n    Union,\n    cast,\n)\n\nimport numpy as np\nimport scipy.sparse\n\nfrom ._c_api import (\n    _LIB,\n    _check_call,\n    c_str,\n    from_cstr_to_pystr,\n    from_pystr_to_cstr,\n    make_jcargs,\n)\nfrom ._c_api import (\n    XGBoostError as _XGBoostError,\n)\nfrom ._data_utils import (\n    Categories,\n    TransformedDf,\n    _ensure_np_dtype,\n    array_interface,\n    cuda_array_interface,\n    from_array_interface,\n    make_array_interface,\n)\nfrom ._typing import (\n    _T,\n    ArrayLike,\n    ArrowCatList,\n    BoosterParam,\n    CFloatPtr,\n    CNumeric,\n    CNumericPtr,\n    CStrPtr,\n    CTypeT,\n    DataSplitMode,\n    DataType,\n    FeatureInfo,\n    FeatureNames,\n    FeatureTypes,\n    Integer,\n    IterationRange,\n    ModelIn,\n    NumpyOrCupy,\n    PathLike,\n    TransformedData,\n    c_bst_ulong,\n)\nfrom .compat import (\n    _is_cupy_alike,\n    import_polars,\n    import_pyarrow,\n    is_pandas_available,\n    is_pyarrow_available,\n    py_str,\n)\nfrom .objective import Objective, TreeObjective, _grad_arrinf\n\nif TYPE_CHECKING:\n    from pandas import DataFrame as PdDataFrame\n\nXGBoostError = _XGBoostError\n\n\ndef _parse_eval_str(result: str) -> List[Tuple[str, float]]:\n    \"\"\"Parse an eval result string from the booster.\"\"\"\n    splited = result.split()[1:]\n    # split up `test-error:0.1234`\n    metric_score_str = [tuple(s.split(\":\")) for s in splited]\n    # convert to float\n    metric_score = [(n, float(s)) for n, s in metric_score_str]\n    return metric_score\n\n\nIterRange = TypeVar(\"IterRange\", Optional[Tuple[int, int]], Tuple[int, int])\n\n\ndef _expect(expectations: Sequence[Type], got: Type) -> str:\n    \"\"\"Translate input error into string.\n\n    Parameters\n    ----------\n    expectations :\n        a list of expected value.\n    got :\n        actual input\n\n    Returns\n    -------\n    msg: str\n    \"\"\"\n    msg = \"Expecting \"\n    for t in range(len(expectations) - 1):\n        msg += str(expectations[t])\n        msg += \" or \"\n    msg += str(expectations[-1])\n    msg += \".  Got \" + str(got)\n    return msg\n\n\ndef _check_distributed_params(kwargs: Dict[str, Any]) -> None:\n    \"\"\"Validate parameters in distributed environments.\"\"\"\n    device = kwargs.get(\"device\", None)\n    if device and not isinstance(device, str):\n        msg = \"Invalid type for the `device` parameter\"\n        msg += _expect((str,), type(device))\n        raise TypeError(msg)\n\n    if device and device.find(\":\") != -1:\n        if device != \"sycl:gpu\":\n            raise ValueError(\n                \"Distributed training doesn't support selecting device ordinal as GPUs\"\n                \"  are managed by the distributed frameworks. use `device=cuda` or\"\n                \"  `device=gpu` instead.\"\n            )\n\n    if kwargs.get(\"booster\", None) == \"gblinear\":\n        raise NotImplementedError(\n            f\"booster `{kwargs['booster']}` is not supported for distributed training.\"\n        )\n\n\ndef _validate_feature_info(\n    feature_info: Sequence[str], n_features: int, is_column_split: bool, name: str\n) -> List[str]:\n    if not isinstance(feature_info, (str, Sequence, Categories)):\n        raise TypeError(\n            f\"Expecting a sequence of strings for {name}, got: {type(feature_info)}\"\n        )\n    feature_info = list(feature_info)\n    if len(feature_info) != n_features and n_features != 0 and not is_column_split:\n        msg = (\n            f\"{name} must have the same length as the number of data columns, \",\n            f\"expected {n_features}, got {len(feature_info)}\",\n        )\n        raise ValueError(msg)\n    return feature_info\n\n\ndef build_info() -> dict:\n    \"\"\"Build information of XGBoost.  The returned value format is not stable. Also,\n    please note that build time dependency is not the same as runtime dependency. For\n    instance, it's possible to build XGBoost with older CUDA version but run it with the\n    lastest one.\n\n      .. versionadded:: 1.6.0\n\n    \"\"\"\n    j_info = ctypes.c_char_p()\n    _check_call(_LIB.XGBuildInfo(ctypes.byref(j_info)))\n    assert j_info.value is not None\n    res = json.loads(j_info.value.decode())  # pylint: disable=no-member\n    res[\"libxgboost\"] = _LIB.path\n    return res\n\n\ndef _numpy2ctypes_type(dtype: Type[np.number]) -> Type[CNumeric]:\n    _NUMPY_TO_CTYPES_MAPPING: Dict[Type[np.number], Type[CNumeric]] = {\n        np.float32: ctypes.c_float,\n        np.float64: ctypes.c_double,\n        np.uint32: ctypes.c_uint,\n        np.uint64: ctypes.c_uint64,\n        np.int32: ctypes.c_int32,\n        np.int64: ctypes.c_int64,\n    }\n    if np.intc is not np.int32:  # Windows\n        _NUMPY_TO_CTYPES_MAPPING[np.intc] = _NUMPY_TO_CTYPES_MAPPING[np.int32]\n    if dtype not in _NUMPY_TO_CTYPES_MAPPING:\n        raise TypeError(\n            f\"Supported types: {_NUMPY_TO_CTYPES_MAPPING.keys()}, got: {dtype}\"\n        )\n    return _NUMPY_TO_CTYPES_MAPPING[dtype]\n\n\ndef ctypes2numpy(cptr: CNumericPtr, length: int, dtype: Type[np.number]) -> np.ndarray:\n    \"\"\"Convert a ctypes pointer array to a numpy array.\"\"\"\n    ctype: Type[CNumeric] = _numpy2ctypes_type(dtype)\n    if not isinstance(cptr, ctypes.POINTER(ctype)):\n        raise RuntimeError(f\"expected {ctype} pointer\")\n    res = np.zeros(length, dtype=dtype)\n    if not ctypes.memmove(res.ctypes.data, cptr, length * res.strides[0]):\n        raise RuntimeError(\"memmove failed\")\n    return res\n\n\ndef ctypes2buffer(cptr: CStrPtr, length: int) -> bytearray:\n    \"\"\"Convert ctypes pointer to buffer type.\"\"\"\n    if not isinstance(cptr, ctypes.POINTER(ctypes.c_char)):\n        raise RuntimeError(\"expected char pointer\")\n    res = bytearray(length)\n    rptr = (ctypes.c_char * length).from_buffer(res)\n    if not ctypes.memmove(rptr, cptr, length):\n        raise RuntimeError(\"memmove failed\")\n    return res\n\n\ndef c_array(\n    ctype: Type[CTypeT], values: ArrayLike\n) -> Union[ctypes.Array, ctypes._Pointer]:\n    \"\"\"Convert a python array to c array.\"\"\"\n    if isinstance(values, np.ndarray) and values.dtype.itemsize == ctypes.sizeof(ctype):\n        return values.ctypes.data_as(ctypes.POINTER(ctype))\n    return (ctype * len(values))(*values)\n\n\ndef _prediction_output(\n    shape: CNumericPtr, dims: c_bst_ulong, predts: CFloatPtr, is_cuda: bool\n) -> NumpyOrCupy:\n    arr_shape = tuple(ctypes2numpy(shape, dims.value, np.uint64).flatten())\n    array = from_array_interface(\n        make_array_interface(predts, arr_shape, np.float32, is_cuda)\n    )\n    return array\n\n\nclass DataIter(ABC):  # pylint: disable=too-many-instance-attributes\n    \"\"\"The interface for user defined data iterator. The iterator facilitates\n    distributed training, :py:class:`QuantileDMatrix`, and external memory support using\n    :py:class:`DMatrix` or :py:class:`ExtMemQuantileDMatrix`. Most of time, users don't\n    need to interact with this class directly.\n\n    .. note::\n\n        The class caches some intermediate results using the `data` input (predictor\n        `X`) as key. Don't repeat the `X` for multiple batches with different meta data\n        (like `label`), make a copy if necessary.\n\n    .. note::\n\n        When the input for each batch is a DataFrame, we assume categories are\n        consistently encoded for all batches. For example, given two dataframes for two\n        batches, this is invalid:\n\n        .. code-block::\n\n            import pandas as pd\n\n            x0 = pd.DataFrame({\"a\": [0, 1]}, dtype=\"category\")\n            x1 = pd.DataFrame({\"a\": [1, 2]}, dtype=\"category\")\n\n        This is invalid because the `x0` has `[0, 1]` as categories while `x2` has `[1,\n        2]`. They should share the same set of categories and encoding:\n\n        .. code-block::\n\n            import numpy as np\n\n            categories = np.array([0, 1, 2])\n            x0[\"a\"] = pd.Categorical.from_codes(\n                codes=np.array([0, 1]), categories=categories\n            )\n            x1[\"a\"] = pd.Categorical.from_codes(\n                codes=np.array([1, 2]), categories=categories\n            )\n\n        You can make sure the consistent encoding in your preprocessing step be careful\n        that the data is stored in formats that preserve the encoding when chunking the\n        data.\n\n    Parameters\n    ----------\n    cache_prefix :\n        Prefix to the cache files, only used in external memory.\n\n        Note that using this class for external memory **will cache data\n        on disk** under the path passed here.\n\n    release_data :\n        Whether the iterator should release the data during iteration. Set it to True if\n        the data transformation (converting data to np.float32 type) is memory\n        intensive. Otherwise, if the transformation is computation intensive then we can\n        keep the cache.\n\n    on_host :\n        Whether the data should be cached on the host memory instead of the file system\n        when using GPU with external memory. When set to true (the default), the\n        \"external memory\" is the CPU (host) memory. See\n        :doc:`/tutorials/external_memory` for more info.\n\n        .. versionadded:: 3.0.0\n\n        .. warning::\n\n            This is an experimental parameter and subject to change.\n\n    min_cache_page_bytes :\n        The minimum number of bytes of each cached pages. Only used for on-host cache\n        with GPU-based :py:class:`ExtMemQuantileDMatrix`. When using GPU-based external\n        memory with the data cached in the host memory, XGBoost can concatenate the\n        pages internally to increase the batch size for the GPU. The default page size\n        is about 1/16 of the total device memory. Users can manually set the value based\n        on the actual hardware and datasets. Set this to 0 to disable page\n        concatenation.\n\n        .. versionadded:: 3.0.0\n\n        .. warning::\n\n            This is an experimental parameter and subject to change.\n\n    \"\"\"\n\n    def __init__(\n        self,\n        cache_prefix: Optional[str] = None,\n        release_data: bool = True,\n        *,\n        on_host: bool = True,\n        min_cache_page_bytes: Optional[int] = None,\n    ) -> None:\n        self.cache_prefix = cache_prefix\n        self.on_host = on_host\n        self.min_cache_page_bytes = min_cache_page_bytes\n\n        self._handle = _ProxyDMatrix()\n        self._exception: Optional[Exception] = None\n        self._enable_categorical = False\n        self._release = release_data\n        # Stage data in Python until reset or next is called to avoid data being free.\n        self._temporary_data: Optional[TransformedData] = None\n        self._data_ref: Optional[weakref.ReferenceType] = None\n\n    def get_callbacks(self, enable_categorical: bool) -> Tuple[Callable, Callable]:\n        \"\"\"Get callback functions for iterating in C. This is an internal function.\"\"\"\n        assert hasattr(self, \"cache_prefix\"), \"__init__ is not called.\"\n        reset_callback = ctypes.CFUNCTYPE(None, ctypes.c_void_p)(self._reset_wrapper)\n        next_callback = ctypes.CFUNCTYPE(\n            ctypes.c_int,\n            ctypes.c_void_p,\n        )(self._next_wrapper)\n        self._enable_categorical = enable_categorical\n        return reset_callback, next_callback\n\n    @property\n    def proxy(self) -> \"_ProxyDMatrix\":\n        \"\"\"Handle of DMatrix proxy.\"\"\"\n        return self._handle\n\n    def _handle_exception(self, fn: Callable, dft_ret: _T) -> _T:\n        if self._exception is not None:\n            return dft_ret\n\n        try:\n            return fn()\n        except Exception as e:  # pylint: disable=broad-except\n            # Defer the exception in order to return 0 and stop the iteration.\n            # Exception inside a ctype callback function has no effect except\n            # for printing to stderr (doesn't stop the execution).\n            tb = sys.exc_info()[2]\n            # On dask, the worker is restarted and somehow the information is\n            # lost.\n            self._exception = e.with_traceback(tb)\n        return dft_ret\n\n    def reraise(self) -> None:\n        \"\"\"Reraise the exception thrown during iteration.\"\"\"\n        self._temporary_data = None\n        if self._exception is not None:\n            #  pylint 2.7.0 believes `self._exception` can be None even with `assert\n            #  isinstace`\n            exc = self._exception\n            self._exception = None\n            raise exc  # pylint: disable=raising-bad-type\n\n    def __del__(self) -> None:\n        assert self._temporary_data is None\n        assert self._exception is None\n\n    def _reset_wrapper(self, this: None) -> None:  # pylint: disable=unused-argument\n        \"\"\"A wrapper for user defined `reset` function.\"\"\"\n        # free the data\n        if self._release:\n            self._temporary_data = None\n        self._handle_exception(self.reset, None)\n\n    def _next_wrapper(self, this: None) -> int:  # pylint: disable=unused-argument\n        \"\"\"A wrapper for user defined `next` function.\n\n        `this` is not used in Python.  ctypes can handle `self` of a Python\n        member function automatically when converting it to c function\n        pointer.\n\n        \"\"\"\n\n        @require_keyword_args(True)\n        def input_data(\n            *,\n            data: Any,\n            feature_names: Optional[FeatureNames] = None,\n            feature_types: Optional[FeatureTypes] = None,\n            **kwargs: Any,\n        ) -> None:\n            from .data import _proxy_transform, dispatch_proxy_set_data\n\n            # Reduce the amount of transformation that's needed for QuantileDMatrix.\n            #\n            # To construct the QDM, one needs 4 iterations on CPU, or 2 iterations on\n            # GPU. If the QDM has only one batch of input (most of the cases), we can\n            # avoid transforming the data repeatly.\n            try:\n                ref = weakref.ref(data)\n            except TypeError:\n                ref = None\n            if (\n                self._temporary_data is not None\n                and ref is not None\n                and ref is self._data_ref\n            ):\n                new, feature_names, feature_types = self._temporary_data\n            else:\n                new, feature_names, feature_types = _proxy_transform(\n                    data,\n                    feature_names,\n                    feature_types,\n                    self._enable_categorical,\n                )\n            # Stage the data, meta info are copied inside C++ MetaInfo.\n            self._temporary_data = (new, feature_names, feature_types)\n            dispatch_proxy_set_data(self.proxy, new)\n            self.proxy.set_info(\n                feature_names=feature_names,\n                feature_types=feature_types,\n                **kwargs,\n            )\n            self._data_ref = ref\n\n        # Release the data before next batch is loaded.\n        if self._release:\n            self._temporary_data = None\n        # pylint: disable=not-callable\n        return self._handle_exception(lambda: int(self.next(input_data)), 0)\n\n    @abstractmethod\n    def reset(self) -> None:\n        \"\"\"Reset the data iterator.  Prototype for user defined function.\"\"\"\n        raise NotImplementedError()\n\n    @abstractmethod\n    def next(self, input_data: Callable) -> bool:\n        \"\"\"Set the next batch of data.\n\n        Parameters\n        ----------\n\n        input_data:\n            A function with same data fields like `data`, `label` with\n            `xgboost.DMatrix`.\n\n        Returns\n        -------\n        False if there's no more batch, otherwise True.\n\n        \"\"\"\n        raise NotImplementedError()\n\n\n# Notice for `require_keyword_args`\n# Authors: Olivier Grisel\n#          Gael Varoquaux\n#          Andreas Mueller\n#          Lars Buitinck\n#          Alexandre Gramfort\n#          Nicolas Tresegnie\n#          Sylvain Marie\n# License: BSD 3 clause\ndef require_keyword_args(\n    error: bool,\n) -> Callable[[Callable[..., _T]], Callable[..., _T]]:\n    \"\"\"Decorator for methods that issues warnings for positional arguments\n\n    Using the keyword-only argument syntax in pep 3102, arguments after the\n    * will issue a warning or error when passed as a positional argument.\n\n    Modified from sklearn utils.validation.\n\n    Parameters\n    ----------\n    error :\n        Whether to throw an error or raise a warning.\n    \"\"\"\n\n    def throw_if(func: Callable[..., _T]) -> Callable[..., _T]:\n        \"\"\"Throw an error/warning if there are positional arguments after the asterisk.\n\n        Parameters\n        ----------\n        f :\n            function to check arguments on.\n\n        \"\"\"\n        sig = signature(func)\n        kwonly_args = []\n        all_args = []\n\n        for name, param in sig.parameters.items():\n            if param.kind == Parameter.POSITIONAL_OR_KEYWORD:\n                all_args.append(name)\n            elif param.kind == Parameter.KEYWORD_ONLY:\n                kwonly_args.append(name)\n\n        @wraps(func)\n        def inner_f(*args: Any, **kwargs: Any) -> _T:\n            extra_args = len(args) - len(all_args)\n            if not all_args and extra_args > 0:  # keyword argument only\n                raise TypeError(\"Keyword argument is required.\")\n\n            if extra_args > 0:\n                # ignore first 'self' argument for instance methods\n                args_msg = [\n                    f\"{name}\"\n                    for name, _ in zip(kwonly_args[:extra_args], args[-extra_args:])\n                ]\n                # pylint: disable=consider-using-f-string\n                msg = \"Pass `{}` as keyword args.\".format(\", \".join(args_msg))\n                if error:\n                    raise TypeError(msg)\n                warnings.warn(msg, FutureWarning)\n            for k, arg in zip(sig.parameters, args):\n                kwargs[k] = arg\n            return func(**kwargs)\n\n        return inner_f\n\n    return throw_if\n\n\n_deprecate_positional_args = require_keyword_args(False)\n\n\ndef _get_categories(\n    cfn: Callable[[ctypes.c_char_p], int],\n    feature_names: FeatureNames,\n    n_features: int,\n) -> ArrowCatList:\n    if not is_pyarrow_available():\n        raise ImportError(\n            \"`pyarrow` is required for exporting categories to arrow arrays.\"\n        )\n\n    if not TYPE_CHECKING:\n        pa = import_pyarrow()\n    else:\n        import pyarrow as pa\n\n    results: ArrowCatList = []\n\n    ret = ctypes.c_char_p()\n    _check_call(cfn(ret))\n    if ret.value is None:\n        results = [(feature_names[i], None) for i in range(n_features)]\n        return results\n\n    retstr = ret.value.decode()  # pylint: disable=no-member\n    jcats = json.loads(retstr)\n    assert isinstance(jcats, list) and len(jcats) == n_features\n\n    for fidx in range(n_features):\n        f_jcats = jcats[fidx]\n        if f_jcats is None:\n            # Numeric data\n            results.append((feature_names[fidx], None))\n            continue\n\n        if \"offsets\" not in f_jcats:\n            values = from_array_interface(f_jcats)\n            pa_values = pa.Array.from_pandas(values)\n            results.append((feature_names[fidx], pa_values))\n            continue\n\n        joffsets = f_jcats[\"offsets\"]\n        jvalues = f_jcats[\"values\"]\n        offsets = from_array_interface(joffsets)\n        values = from_array_interface(jvalues)\n        pa_offsets = pa.array(offsets).buffers()\n        pa_values = pa.array(values).buffers()\n        assert pa_offsets[0] is None and pa_values[0] is None, (\n            \"Should not have null mask.\"\n        )\n        pa_dict = pa.StringArray.from_buffers(\n            len(offsets) - 1, pa_offsets[1], pa_values[1]\n        )\n        results.append((feature_names[fidx], pa_dict))\n\n    return results\n\n\ndef _is_iter(data: DataType) -> TypeGuard[DataIter]:\n    return isinstance(data, DataIter)\n\n\nclass SingleBatchInternalIter(DataIter):  # pylint: disable=R0902\n    \"\"\"An iterator for single batch data to help creating device DMatrix.\n    Transforming input directly to histogram with normal single batch data API\n    can not access weight for sketching.  So this iterator acts as a staging\n    area for meta info.\n    \"\"\"\n\n    def __init__(self, **kwargs: Any) -> None:\n        self.kwargs = kwargs\n        self.it = 0\n\n        # This does not necessarily increase memory usage as the data transformation\n        # might use memory.\n        super().__init__(release_data=False)\n\n    def next(self, input_data: Callable) -> bool:\n        if self.it == 1:\n            return False\n        self.it += 1\n        input_data(**self.kwargs)\n        return True\n\n    def reset(self) -> None:\n        self.it = 0\n\n\nclass DMatrix:  # pylint: disable=too-many-instance-attributes,too-many-public-methods\n    \"\"\"Data Matrix used in XGBoost.\n\n    DMatrix is an internal data structure that is used by XGBoost, which is optimized\n    for both memory efficiency and training speed.  You can construct DMatrix from\n    multiple different sources of data.\n\n    \"\"\"\n\n    @_deprecate_positional_args\n    def __init__(\n        self,\n        data: DataType,\n        label: Optional[ArrayLike] = None,\n        *,\n        weight: Optional[ArrayLike] = None,\n        base_margin: Optional[ArrayLike] = None,\n        missing: Optional[float] = None,\n        silent: bool = False,\n        feature_names: Optional[FeatureNames] = None,\n        feature_types: Optional[Union[FeatureTypes, Categories]] = None,\n        nthread: Optional[int] = None,\n        group: Optional[ArrayLike] = None,\n        qid: Optional[ArrayLike] = None,\n        label_lower_bound: Optional[ArrayLike] = None,\n        label_upper_bound: Optional[ArrayLike] = None,\n        feature_weights: Optional[ArrayLike] = None,\n        enable_categorical: bool = True,\n        data_split_mode: DataSplitMode = DataSplitMode.ROW,\n    ) -> None:\n        \"\"\"Parameters\n        ----------\n        data :\n            Data source of DMatrix. See :ref:`py-data` for a list of supported input\n            types.\n\n            Note that, if passing an iterator, it **will cache data on disk**, and note\n            that fields like ``label`` will be concatenated in-memory from multiple\n            calls to the iterator.\n        label :\n            Label of the training data.\n        weight :\n            Weight for each instance.\n\n             .. note::\n\n                 For ranking task, weights are per-group.  In ranking task, one weight\n                 is assigned to each group (not each data point). This is because we\n                 only care about the relative ordering of data points within each group,\n                 so it doesn't make sense to assign weights to individual data points.\n\n        base_margin :\n            Global bias for each instance. See :doc:`/tutorials/intercept` for details.\n        missing :\n            Value in the input data which needs to be present as a missing value. If\n            None, defaults to np.nan.\n        silent :\n            Whether print messages during construction\n        feature_names :\n            Set names for features.\n        feature_types :\n\n            Set types for features. If `data` is a DataFrame type and passing\n            `enable_categorical=True`, the types will be deduced automatically from the\n            column types.\n\n            Otherwise, one can pass a list-like input with the same length as number of\n            columns in `data`, with the following possible values:\n\n            - \"c\", which represents categorical columns.\n            - \"q\", which represents numeric columns.\n            - \"int\", which represents integer columns.\n            - \"i\", which represents boolean columns.\n\n            Note that, while categorical types are treated differently from the rest for\n            model fitting purposes, the other types do not influence the generated\n            model, but have effects in other functionalities such as feature\n            importances.\n\n            For categorical features, the input is assumed to be preprocessed and\n            encoded by the users. The encoding can be done via\n            :py:class:`sklearn.preprocessing.OrdinalEncoder` or pandas dataframe\n            `.cat.codes` method. This is useful when users want to specify categorical\n            features without having to construct a dataframe as input.\n\n            .. versionadded:: 3.1.0\n\n            Alternatively, user can pass a :py:class:`~xgboost.core.Categories` object\n            returned from previous training as a reference for re-coding. One can obtain\n            the reference with the :py:meth:`.get_categories` from the previous training\n            DMatrix or the Booster. This feature is experimental.\n\n        nthread :\n            Number of threads to use for loading data when parallelization is\n            applicable. If -1, uses maximum threads available on the system.\n        group :\n            Group size for all ranking group.\n        qid :\n            Query ID for data samples, used for ranking.\n        label_lower_bound :\n            Lower bound for survival training.\n        label_upper_bound :\n            Upper bound for survival training.\n        feature_weights :\n            Set feature weights for column sampling.\n        enable_categorical :\n\n            .. versionadded:: 1.3.0\n\n            .. note:: This parameter is experimental\n\n            Experimental support of specializing for categorical features. See\n            :doc:`/tutorials/categorical` for more info.\n\n            If passing `True` and `data` is a data frame (from supported libraries such\n            as Pandas, Modin, polars, and cuDF), The DMatrix recognizes categorical\n            columns and automatically set the `feature_types` parameter. If `data` is\n            not a data frame, this argument is ignored.\n\n            If passing `False` and `data` is a data frame with categorical columns, it\n            will result in an error.\n\n            See notes in the :py:class:`DataIter` for consistency requirement when the\n            input is an iterator.\n\n            .. versionchanged:: 3.1.0\n\n            XGBoost can remember the encoding of categories when the input is a\n            dataframe.\n\n        \"\"\"\n        if group is not None and qid is not None:\n            raise ValueError(\"Either one of `group` or `qid` should be None.\")\n\n        self.missing = missing if missing is not None else np.nan\n        self.nthread = nthread if nthread is not None else -1\n        self.silent = silent\n\n        if isinstance(data, ctypes.c_void_p):\n            # Used for constructing DMatrix slice.\n            self.handle = data\n            return\n\n        from .data import dispatch_data_backend\n\n        if _is_iter(data):\n            self._init_from_iter(data, enable_categorical)\n            assert self.handle is not None\n            return\n\n        handle, feature_names, feature_types = dispatch_data_backend(\n            data=data,\n            missing=self.missing,\n            threads=self.nthread,\n            feature_names=feature_names,\n            feature_types=feature_types,\n            enable_categorical=enable_categorical,\n            data_split_mode=data_split_mode,\n        )\n        assert handle is not None\n        self.handle = handle\n\n        self.set_info(\n            label=label,\n            weight=weight,\n            base_margin=base_margin,\n            group=group,\n            qid=qid,\n            label_lower_bound=label_lower_bound,\n            label_upper_bound=label_upper_bound,\n            feature_weights=feature_weights,\n        )\n\n        if feature_names is not None:\n            self.feature_names = feature_names\n        if feature_types is not None:\n            self.feature_types = feature_types\n\n    def _init_from_iter(self, it: DataIter, enable_categorical: bool) -> None:\n        args = make_jcargs(\n            missing=self.missing,\n            nthread=self.nthread,\n            cache_prefix=it.cache_prefix if it.cache_prefix else \"\",\n            on_host=it.on_host,\n            min_cache_page_bytes=it.min_cache_page_bytes,\n        )\n        handle = ctypes.c_void_p()\n        reset_callback, next_callback = it.get_callbacks(enable_categorical)\n        ret = _LIB.XGDMatrixCreateFromCallback(\n            None,\n            it.proxy.handle,\n            reset_callback,\n            next_callback,\n            args,\n            ctypes.byref(handle),\n        )\n        it.reraise()\n        # delay check_call to throw intermediate exception first\n        _check_call(ret)\n        self.handle = handle\n\n    def __del__(self) -> None:\n        if hasattr(self, \"handle\"):\n            assert self.handle is not None\n            _check_call(_LIB.XGDMatrixFree(self.handle))\n            del self.handle\n\n    @_deprecate_positional_args\n    def set_info(\n        self,\n        *,\n        label: Optional[ArrayLike] = None,\n        weight: Optional[ArrayLike] = None,\n        base_margin: Optional[ArrayLike] = None,\n        group: Optional[ArrayLike] = None,\n        qid: Optional[ArrayLike] = None,\n        label_lower_bound: Optional[ArrayLike] = None,\n        label_upper_bound: Optional[ArrayLike] = None,\n        feature_names: Optional[FeatureNames] = None,\n        feature_types: Optional[FeatureTypes] = None,\n        feature_weights: Optional[ArrayLike] = None,\n    ) -> None:\n        \"\"\"Set meta info for DMatrix.  See doc string for :py:obj:`xgboost.DMatrix`.\"\"\"\n        from .data import dispatch_meta_backend\n\n        if label is not None:\n            self.set_label(label)\n        if weight is not None:\n            self.set_weight(weight)\n        if base_margin is not None:\n            self.set_base_margin(base_margin)\n        if group is not None:\n            self.set_group(group)\n        if qid is not None:\n            self.set_uint_info(\"qid\", qid)\n        if label_lower_bound is not None:\n            self.set_float_info(\"label_lower_bound\", label_lower_bound)\n        if label_upper_bound is not None:\n            self.set_float_info(\"label_upper_bound\", label_upper_bound)\n        if feature_names is not None:\n            self.feature_names = feature_names\n        if feature_types is not None:\n            self.feature_types = feature_types\n        if feature_weights is not None:\n            dispatch_meta_backend(\n                matrix=self, data=feature_weights, name=\"feature_weights\"\n            )\n\n    def _get_info(self, field: str) -> NumpyOrCupy:\n        \"\"\"Get meta info.\"\"\"\n        c_sdata = ctypes.c_char_p()\n        _check_call(\n            _LIB.XGDMatrixGetInfoRef(self.handle, c_str(field), ctypes.byref(c_sdata))\n        )\n        assert c_sdata.value is not None\n        idata = json.loads(c_sdata.value)\n        data = from_array_interface(idata)\n        return data\n\n    def get_float_info(self, field: str) -> NumpyOrCupy:\n        \"\"\"Get float property from the DMatrix.\n\n        Parameters\n        ----------\n        field: str\n            The field name of the information.\n\n        \"\"\"\n        return self._get_info(field)\n\n    def get_uint_info(self, field: str) -> NumpyOrCupy:\n        \"\"\"Get unsigned integer property from the DMatrix.\n\n        Parameters\n        ----------\n        field: str\n            The field name of the information.\n\n        \"\"\"\n        return self._get_info(field)\n\n    def set_float_info(self, field: str, data: ArrayLike) -> None:\n        \"\"\"Set float type property into the DMatrix.\n\n        Parameters\n        ----------\n        field: str\n            The field name of the information\n\n        data: numpy array\n            The array of data to be set\n        \"\"\"\n        from .data import dispatch_meta_backend\n\n        dispatch_meta_backend(self, data, field, \"float\")\n\n    def set_float_info_npy2d(self, field: str, data: ArrayLike) -> None:\n        \"\"\"Set float type property into the DMatrix\n           for numpy 2d array input\n\n        Parameters\n        ----------\n        field: str\n            The field name of the information\n\n        data: numpy array\n            The array of data to be set\n        \"\"\"\n        from .data import dispatch_meta_backend\n\n        dispatch_meta_backend(self, data, field, \"float\")\n\n    def set_uint_info(self, field: str, data: ArrayLike) -> None:\n        \"\"\"Set uint type property into the DMatrix.\n\n        Parameters\n        ----------\n        field: str\n            The field name of the information\n\n        data: numpy array\n            The array of data to be set\n        \"\"\"\n        from .data import dispatch_meta_backend\n\n        dispatch_meta_backend(self, data, field, \"uint32\")\n\n    def save_binary(self, fname: PathLike, silent: bool = True) -> None:\n        \"\"\"Save DMatrix to an XGBoost buffer.  Saved binary can be later loaded\n        by providing the path to :py:func:`xgboost.DMatrix` as input.\n\n        Parameters\n        ----------\n        fname : string or os.PathLike\n            Name of the output buffer file.\n        silent : bool (optional; default: True)\n            If set, the output is suppressed.\n        \"\"\"\n        fname = os.fspath(os.path.expanduser(fname))\n        _check_call(\n            _LIB.XGDMatrixSaveBinary(self.handle, c_str(fname), ctypes.c_int(silent))\n        )\n\n    def set_label(self, label: ArrayLike) -> None:\n        \"\"\"Set label of dmatrix\n\n        Parameters\n        ----------\n        label: array like\n            The label information to be set into DMatrix\n        \"\"\"\n        from .data import dispatch_meta_backend\n\n        dispatch_meta_backend(self, label, \"label\", \"float\")\n\n    def set_weight(self, weight: ArrayLike) -> None:\n        \"\"\"Set weight of each instance.\n\n        Parameters\n        ----------\n        weight : array like\n            Weight for each data point\n\n            .. note:: For ranking task, weights are per-group.\n\n                In ranking task, one weight is assigned to each group (not each\n                data point). This is because we only care about the relative\n                ordering of data points within each group, so it doesn't make\n                sense to assign weights to individual data points.\n\n        \"\"\"\n        from .data import dispatch_meta_backend\n\n        dispatch_meta_backend(self, weight, \"weight\", \"float\")\n\n    def set_base_margin(self, margin: ArrayLike) -> None:\n        \"\"\"Set base margin of booster to start from.\n\n        This can be used to specify a prediction value of existing model to be\n        base_margin However, remember margin is needed, instead of transformed\n        prediction e.g. for logistic regression: need to put in value before\n        logistic transformation see also example/demo.py\n\n        Parameters\n        ----------\n        margin: array like\n            Prediction margin of each datapoint\n\n        \"\"\"\n        from .data import dispatch_meta_backend\n\n        dispatch_meta_backend(self, margin, \"base_margin\", \"float\")\n\n    def set_group(self, group: ArrayLike) -> None:\n        \"\"\"Set group size of DMatrix (used for ranking).\n\n        Parameters\n        ----------\n        group : array like\n            Group size of each group\n        \"\"\"\n        from .data import dispatch_meta_backend\n\n        dispatch_meta_backend(self, group, \"group\", \"uint32\")\n\n    def get_label(self) -> NumpyOrCupy:\n        \"\"\"Get the label of the DMatrix.\"\"\"\n        return self._get_info(\"label\")\n\n    def get_weight(self) -> NumpyOrCupy:\n        \"\"\"Get the weight of the DMatrix.\"\"\"\n        return self._get_info(\"weight\")\n\n    def get_base_margin(self) -> NumpyOrCupy:\n        \"\"\"Get the base margin of the DMatrix.\"\"\"\n        return self._get_info(\"base_margin\")\n\n    def get_group(self) -> np.ndarray:\n        \"\"\"Get the group of the DMatrix.\n\n        Returns\n        -------\n        group\n        \"\"\"\n        group_ptr = self._get_info(\"group_ptr\")\n        return np.diff(group_ptr)\n\n    def get_data(self) -> scipy.sparse.csr_matrix:\n        \"\"\"Get the predictors from DMatrix as a CSR matrix. This getter is mostly for\n        testing purposes. If this is a quantized DMatrix then quantized values are\n        returned instead of input values.\n\n        .. versionadded:: 1.7.0\n\n        \"\"\"\n        indptr = np.empty(self.num_row() + 1, dtype=np.uint64)\n        indices = np.empty(self.num_nonmissing(), dtype=np.uint32)\n        data = np.empty(self.num_nonmissing(), dtype=np.float32)\n\n        c_indptr = indptr.ctypes.data_as(ctypes.POINTER(c_bst_ulong))\n        c_indices = indices.ctypes.data_as(ctypes.POINTER(ctypes.c_uint32))\n        c_data = data.ctypes.data_as(ctypes.POINTER(ctypes.c_float))\n        config = from_pystr_to_cstr(json.dumps({}))\n\n        _check_call(\n            _LIB.XGDMatrixGetDataAsCSR(self.handle, config, c_indptr, c_indices, c_data)\n        )\n        ret = scipy.sparse.csr_matrix(\n            (data, indices, indptr), shape=(self.num_row(), self.num_col())\n        )\n        return ret\n\n    def get_quantile_cut(self) -> Tuple[np.ndarray, np.ndarray]:\n        \"\"\"Get quantile cuts for quantization.\n\n        .. versionadded:: 2.0.0\n\n        \"\"\"\n        n_features = self.num_col()\n\n        c_sindptr = ctypes.c_char_p()\n        c_sdata = ctypes.c_char_p()\n        config = make_jcargs()\n        _check_call(\n            _LIB.XGDMatrixGetQuantileCut(\n                self.handle, config, ctypes.byref(c_sindptr), ctypes.byref(c_sdata)\n            )\n        )\n        assert c_sindptr.value is not None\n        assert c_sdata.value is not None\n\n        i_indptr = json.loads(c_sindptr.value)\n        indptr = from_array_interface(i_indptr)\n        assert indptr.size == n_features + 1\n        assert indptr.dtype == np.uint64\n\n        i_data = json.loads(c_sdata.value)\n        data = from_array_interface(i_data)\n        assert data.size == indptr[-1]\n        assert data.dtype == np.float32\n        return indptr, data\n\n    def get_categories(self, export_to_arrow: bool = False) -> Categories:\n        \"\"\"Get the categories in the dataset.\n\n        .. versionadded:: 3.1.0\n\n        .. warning::\n\n            This function is experimental.\n\n        Parameters\n        ----------\n        export_to_arrow :\n            The returned container will contain a list of ``pyarrow`` arrays for the\n            categories. See the :py:meth:`~Categories.to_arrow` for more info.\n\n        \"\"\"\n        fnames = self.feature_names\n        n_features = self.num_col()\n        if fnames is None:\n            fnames = [str(i) for i in range(n_features)]\n\n        hdl = ctypes.c_void_p()\n        if export_to_arrow:\n            arrow_arrays = _get_categories(\n                lambda ret: _LIB.XGDMatrixGetCategoriesExportToArrow(\n                    self.handle, None, ctypes.byref(hdl), ctypes.byref(ret)\n                ),\n                fnames,\n                n_features,\n            )\n        else:\n            arrow_arrays = None\n            _check_call(\n                _LIB.XGDMatrixGetCategories(self.handle, None, ctypes.byref(hdl))\n            )\n\n        return Categories(\n            (hdl, lambda: _check_call(_LIB.XGBCategoriesFree(hdl))), arrow_arrays\n        )\n\n    def num_row(self) -> int:\n        \"\"\"Get the number of rows in the DMatrix.\"\"\"\n        ret = c_bst_ulong()\n        _check_call(_LIB.XGDMatrixNumRow(self.handle, ctypes.byref(ret)))\n        return ret.value\n\n    def num_col(self) -> int:\n        \"\"\"Get the number of columns (features) in the DMatrix.\"\"\"\n        ret = c_bst_ulong()\n        _check_call(_LIB.XGDMatrixNumCol(self.handle, ctypes.byref(ret)))\n        return ret.value\n\n    def num_nonmissing(self) -> int:\n        \"\"\"Get the number of non-missing values in the DMatrix.\n\n        .. versionadded:: 1.7.0\n\n        \"\"\"\n        ret = c_bst_ulong()\n        _check_call(_LIB.XGDMatrixNumNonMissing(self.handle, ctypes.byref(ret)))\n        return ret.value\n\n    def data_split_mode(self) -> DataSplitMode:\n        \"\"\"Get the data split mode of the DMatrix.\n\n        .. versionadded:: 2.1.0\n\n        \"\"\"\n        ret = c_bst_ulong()\n        _check_call(_LIB.XGDMatrixDataSplitMode(self.handle, ctypes.byref(ret)))\n        return DataSplitMode(ret.value)\n\n    def slice(\n        self, rindex: Union[List[int], np.ndarray], allow_groups: bool = False\n    ) -> \"DMatrix\":\n        \"\"\"Slice the DMatrix and return a new DMatrix that only contains `rindex`.\n\n        Parameters\n        ----------\n        rindex\n            List of indices to be selected.\n        allow_groups\n            Allow slicing of a matrix with a groups attribute\n\n        Returns\n        -------\n        res\n            A new DMatrix containing only selected indices.\n        \"\"\"\n        from .data import _maybe_np_slice\n\n        handle = ctypes.c_void_p()\n\n        rindex = _maybe_np_slice(rindex, dtype=np.int32)\n        _check_call(\n            _LIB.XGDMatrixSliceDMatrixEx(\n                self.handle,\n                c_array(ctypes.c_int, rindex),\n                c_bst_ulong(len(rindex)),\n                ctypes.byref(handle),\n                ctypes.c_int(1 if allow_groups else 0),\n            )\n        )\n        return DMatrix(handle)\n\n    @property\n    def feature_names(self) -> Optional[FeatureNames]:\n        \"\"\"Labels for features (column labels).\n\n        Setting it to ``None`` resets existing feature names.\n\n        \"\"\"\n        length = c_bst_ulong()\n        sarr = ctypes.POINTER(ctypes.c_char_p)()\n        _check_call(\n            _LIB.XGDMatrixGetStrFeatureInfo(\n                self.handle,\n                c_str(\"feature_name\"),\n                ctypes.byref(length),\n                ctypes.byref(sarr),\n            )\n        )\n        feature_names = from_cstr_to_pystr(sarr, length)\n        if not feature_names:\n            return None\n        return feature_names\n\n    @feature_names.setter\n    def feature_names(self, feature_names: Optional[FeatureNames]) -> None:\n        if feature_names is None:\n            _check_call(\n                _LIB.XGDMatrixSetStrFeatureInfo(\n                    self.handle, c_str(\"feature_name\"), None, c_bst_ulong(0)\n                )\n            )\n            return\n\n        # validate feature name\n        feature_names = _validate_feature_info(\n            feature_names,\n            self.num_col(),\n            self.data_split_mode() == DataSplitMode.COL,\n            \"feature names\",\n        )\n        if len(feature_names) != len(set(feature_names)):\n            values, counts = np.unique(\n                feature_names,\n                return_index=False,\n                return_inverse=False,\n                return_counts=True,\n            )\n            duplicates = [name for name, cnt in zip(values, counts) if cnt > 1]\n            raise ValueError(\n                f\"feature_names must be unique. Duplicates found: {duplicates}\"\n            )\n\n        # prohibit the use symbols that may affect parsing. e.g. []<\n        if not all(\n            isinstance(f, str) and not any(x in f for x in [\"[\", \"]\", \"<\"])\n            for f in feature_names\n        ):\n            raise ValueError(\n                \"feature_names must be string, and may not contain [, ] or <\"\n            )\n\n        feature_names_bytes = [bytes(f, encoding=\"utf-8\") for f in feature_names]\n        c_feature_names = (ctypes.c_char_p * len(feature_names_bytes))(\n            *feature_names_bytes\n        )\n        _check_call(\n            _LIB.XGDMatrixSetStrFeatureInfo(\n                self.handle,\n                c_str(\"feature_name\"),\n                c_feature_names,\n                c_bst_ulong(len(feature_names)),\n            )\n        )\n\n    @property\n    def feature_types(self) -> Optional[FeatureTypes]:\n        \"\"\"Type of features (column types).\n\n        This is for displaying the results and categorical data support. See\n        :py:class:`DMatrix` for details.\n\n        Setting it to ``None`` resets existing feature types.\n\n        \"\"\"\n        length = c_bst_ulong()\n        sarr = ctypes.POINTER(ctypes.c_char_p)()\n        _check_call(\n            _LIB.XGDMatrixGetStrFeatureInfo(\n                self.handle,\n                c_str(\"feature_type\"),\n                ctypes.byref(length),\n                ctypes.byref(sarr),\n            )\n        )\n        res = from_cstr_to_pystr(sarr, length)\n        if not res:\n            return None\n        return res\n\n    @feature_types.setter\n    def feature_types(self, feature_types: Optional[FeatureTypes]) -> None:\n        if feature_types is None:\n            # Reset\n            _check_call(\n                _LIB.XGDMatrixSetStrFeatureInfo(\n                    self.handle, c_str(\"feature_type\"), None, c_bst_ulong(0)\n                )\n            )\n            return\n\n        feature_types = _validate_feature_info(\n            feature_types,\n            self.num_col(),\n            self.data_split_mode() == DataSplitMode.COL,\n            \"feature types\",\n        )\n\n        feature_types_bytes = [bytes(f, encoding=\"utf-8\") for f in feature_types]\n        c_feature_types = (ctypes.c_char_p * len(feature_types_bytes))(\n            *feature_types_bytes\n        )\n        _check_call(\n            _LIB.XGDMatrixSetStrFeatureInfo(\n                self.handle,\n                c_str(\"feature_type\"),\n                c_feature_types,\n                c_bst_ulong(len(feature_types)),\n            )\n        )\n\n\nclass _ProxyDMatrix(DMatrix):\n    \"\"\"A placeholder class when DMatrix cannot be constructed (QuantileDMatrix,\n    inplace_predict).\n\n    \"\"\"\n\n    def __init__(self) -> None:  # pylint: disable=super-init-not-called\n        self.handle = ctypes.c_void_p()\n        _check_call(_LIB.XGProxyDMatrixCreate(ctypes.byref(self.handle)))\n\n    def _ref_data_from_cuda_interface(self, data: DataType) -> None:\n        \"\"\"Reference data from CUDA array interface.\"\"\"\n        arrinf = cuda_array_interface(data)\n        _check_call(_LIB.XGProxyDMatrixSetDataCudaArrayInterface(self.handle, arrinf))\n\n    def _ref_data_from_cuda_columnar(self, data: TransformedDf) -> None:\n        \"\"\"Reference data from CUDA columnar format.\"\"\"\n        _check_call(\n            _LIB.XGProxyDMatrixSetDataCudaColumnar(self.handle, data.array_interface())\n        )\n\n    def _ref_data_from_array(self, data: np.ndarray) -> None:\n        \"\"\"Reference data from numpy array.\"\"\"\n        _check_call(_LIB.XGProxyDMatrixSetDataDense(self.handle, array_interface(data)))\n\n    def _ref_data_from_columnar(self, data: TransformedDf) -> None:\n        \"\"\"Reference data from a CPU DataFrame.\"\"\"\n        _check_call(\n            _LIB.XGProxyDMatrixSetDataColumnar(self.handle, data.array_interface())\n        )\n\n    def _ref_data_from_csr(self, csr: scipy.sparse.csr_matrix) -> None:\n        \"\"\"Reference data from scipy csr.\"\"\"\n        _LIB.XGProxyDMatrixSetDataCSR(\n            self.handle,\n            array_interface(csr.indptr),\n            array_interface(csr.indices),\n            array_interface(csr.data),\n            ctypes.c_size_t(csr.shape[1]),\n        )\n\n\nclass _RefMixIn:\n    @property\n    def ref(self) -> Optional[weakref.ReferenceType]:\n        \"\"\"Internal method for retrieving a reference to the training DMatrix.\"\"\"\n        if hasattr(self, \"_ref\"):\n            return self._ref\n        return None\n\n    @ref.setter\n    def ref(self, ref: weakref.ReferenceType) -> None:\n        self._ref = ref\n\n\nclass QuantileDMatrix(DMatrix, _RefMixIn):\n    \"\"\"A DMatrix variant that generates quantilized data directly from input for the\n    ``hist`` tree method. This DMatrix is primarily designed to save memory in training\n    by avoiding intermediate storage. Set ``max_bin`` to control the number of bins\n    during quantisation, which should be consistent with the training parameter\n    ``max_bin``. When ``QuantileDMatrix`` is used for validation/test dataset, ``ref``\n    should be another ``QuantileDMatrix`` or ``DMatrix``, but not recommended as it\n    defeats the purpose of saving memory) constructed from training dataset.  See\n    :py:obj:`xgboost.DMatrix` for documents on meta info.\n\n    .. note::\n\n        Do not use ``QuantileDMatrix`` as validation/test dataset without supplying a\n        reference (the training dataset) ``QuantileDMatrix`` using ``ref`` as some\n        information may be lost in quantisation.\n\n    .. versionadded:: 1.7.0\n\n    Examples\n    --------\n\n    .. code-block::\n\n        from sklearn.datasets import make_regression\n        from sklearn.model_selection import train_test_split\n\n        X, y = make_regression()\n        X_train, X_test, y_train, y_test = train_test_split(X, y)\n        Xy_train = xgb.QuantileDMatrix(X_train, y_train)\n        # It's necessary to have the training DMatrix as a reference for valid\n        # quantiles.\n        Xy_test = xgb.QuantileDMatrix(X_test, y_test, ref=Xy_train)\n\n    Parameters\n    ----------\n    max_bin :\n        The number of histogram bin, should be consistent with the training parameter\n        ``max_bin``.\n\n    ref :\n        The training dataset that provides quantile information, needed when creating\n        validation/test dataset with ``QuantileDMatrix``. Supplying the training DMatrix\n        as a reference means that the same quantisation applied to the training data is\n        applied to the validation/test data\n\n    max_quantile_batches :\n        Deprecated. This parameter no longer has any effect and will be removed in a\n        future release.\n\n        .. versionadded:: 3.0.0\n\n        .. deprecated:: 3.3.0\n\n    \"\"\"\n\n    @_deprecate_positional_args\n    def __init__(  # pylint: disable=super-init-not-called\n        self,\n        data: DataType,\n        label: Optional[ArrayLike] = None,\n        *,\n        weight: Optional[ArrayLike] = None,\n        base_margin: Optional[ArrayLike] = None,\n        missing: Optional[float] = None,\n        silent: bool = False,\n        feature_names: Optional[FeatureNames] = None,\n        feature_types: Optional[FeatureTypes] = None,\n        nthread: Optional[int] = None,\n        max_bin: Optional[int] = None,\n        ref: Optional[DMatrix] = None,\n        group: Optional[ArrayLike] = None,\n        qid: Optional[ArrayLike] = None,\n        label_lower_bound: Optional[ArrayLike] = None,\n        label_upper_bound: Optional[ArrayLike] = None,\n        feature_weights: Optional[ArrayLike] = None,\n        enable_categorical: bool = True,\n        max_quantile_batches: Optional[int] = None,\n        data_split_mode: DataSplitMode = DataSplitMode.ROW,\n    ) -> None:\n        self.max_bin = max_bin\n        self.missing = missing if missing is not None else np.nan\n        self.nthread = nthread if nthread is not None else -1\n        self._silent = silent  # unused, kept for compatibility\n\n        if isinstance(data, ctypes.c_void_p):\n            self.handle = data\n            return\n\n        if qid is not None and group is not None:\n            raise ValueError(\n                \"Only one of the eval_qid or eval_group for each evaluation \"\n                \"dataset should be provided.\"\n            )\n        if isinstance(data, DataIter):\n            if any(\n                info is not None\n                for info in (\n                    label,\n                    weight,\n                    base_margin,\n                    feature_names,\n                    feature_types,\n                    group,\n                    qid,\n                    label_lower_bound,\n                    label_upper_bound,\n                    feature_weights,\n                )\n            ):\n                raise ValueError(\n                    \"If data iterator is used as input, data like label should be \"\n                    \"specified as batch argument.\"\n                )\n\n        self._init(\n            data,\n            ref=ref,\n            label=label,\n            weight=weight,\n            base_margin=base_margin,\n            group=group,\n            qid=qid,\n            label_lower_bound=label_lower_bound,\n            label_upper_bound=label_upper_bound,\n            feature_weights=feature_weights,\n            feature_names=feature_names,\n            feature_types=feature_types,\n            enable_categorical=enable_categorical,\n            max_quantile_blocks=max_quantile_batches,\n        )\n\n    def _init(\n        self,\n        data: DataType,\n        ref: Optional[DMatrix],\n        enable_categorical: bool,\n        max_quantile_blocks: Optional[int],\n        **meta: Any,\n    ) -> None:\n        from .data import _is_dlpack, _transform_dlpack\n\n        if _is_dlpack(data):\n            # We specialize for dlpack because cupy will take the memory from it so\n            # it can't be transformed twice.\n            data = _transform_dlpack(data)\n        if _is_iter(data):\n            it = data\n        else:\n            it = SingleBatchInternalIter(data=data, **meta)\n\n        handle = ctypes.c_void_p()\n        reset_callback, next_callback = it.get_callbacks(enable_categorical)\n        if it.cache_prefix is not None:\n            raise ValueError(\n                \"QuantileDMatrix doesn't cache data, remove the cache_prefix \"\n                \"in iterator to fix this error.\"\n            )\n\n        config = make_jcargs(\n            nthread=self.nthread,\n            missing=self.missing,\n            max_bin=self.max_bin,\n            max_quantile_blocks=max_quantile_blocks,\n        )\n        ret = _LIB.XGQuantileDMatrixCreateFromCallback(\n            None,\n            it.proxy.handle,\n            ref.handle if ref is not None else ref,\n            reset_callback,\n            next_callback,\n            config,\n            ctypes.byref(handle),\n        )\n        it.reraise()\n        # delay check_call to throw intermediate exception first\n        _check_call(ret)\n        self.handle = handle\n\n        if ref is not None:\n            self.ref = weakref.ref(ref)\n\n\nclass ExtMemQuantileDMatrix(DMatrix, _RefMixIn):\n    \"\"\"The external memory version of the :py:class:`QuantileDMatrix`.\n\n    See :doc:`/tutorials/external_memory` for explanation and usage examples, and\n    :py:class:`QuantileDMatrix` for parameter document.\n\n    .. warning::\n\n        This is an experimental feature and subject to change.\n\n    .. versionadded:: 3.0.0\n\n    \"\"\"\n\n    @_deprecate_positional_args\n    def __init__(  # pylint: disable=super-init-not-called\n        self,\n        data: DataIter,\n        *,\n        missing: Optional[float] = None,\n        nthread: Optional[int] = None,\n        max_bin: Optional[int] = None,\n        ref: Optional[DMatrix] = None,\n        enable_categorical: bool = True,\n        max_quantile_batches: Optional[int] = None,\n        cache_host_ratio: Optional[float] = None,\n    ) -> None:\n        \"\"\"\n        Parameters\n        ----------\n        data :\n            A user-defined :py:class:`DataIter` for loading data.\n\n        max_quantile_batches :\n            Deprecated. See :py:class:`QuantileDMatrix`.\n\n        cache_host_ratio :\n\n            .. versionadded:: 3.1.0\n\n            Used by the GPU implementation. For GPU-based inputs, XGBoost can split the\n            cache into host and device caches to reduce the data transfer overhead. This\n            parameter specifies the size of host cache compared to the size of the\n            entire cache: :math:`host / (host + device)`.\n\n            See :ref:`extmem-adaptive-cache` for more info.\n\n        \"\"\"\n        self.max_bin = max_bin\n        self.missing = missing if missing is not None else np.nan\n        self.nthread = nthread if nthread is not None else -1\n\n        self._init(\n            data,\n            ref,\n            enable_categorical=enable_categorical,\n            max_quantile_blocks=max_quantile_batches,\n            cache_host_ratio=(\n                None if cache_host_ratio is None else float(cache_host_ratio)\n            ),\n        )\n        assert self.handle is not None\n\n    def _init(\n        self,\n        it: DataIter,\n        ref: Optional[DMatrix],\n        *,\n        enable_categorical: bool,\n        max_quantile_blocks: Optional[int] = None,\n        cache_host_ratio: Optional[float] = None,\n    ) -> None:\n        args = make_jcargs(\n            missing=self.missing,\n            nthread=self.nthread,\n            cache_prefix=it.cache_prefix if it.cache_prefix else \"\",\n            on_host=it.on_host,\n            max_bin=self.max_bin,\n            min_cache_page_bytes=it.min_cache_page_bytes,\n            # It's called blocks internally due to block-based quantile sketching.\n            max_quantile_blocks=max_quantile_blocks,\n            cache_host_ratio=cache_host_ratio,\n        )\n        handle = ctypes.c_void_p()\n        reset_callback, next_callback = it.get_callbacks(enable_categorical)\n        # We don't need the iter handle (hence None) in Python as reset,next callbacks\n        # are member functions, and ctypes can handle the `self` parameter\n        # automatically.\n        ret = _LIB.XGExtMemQuantileDMatrixCreateFromCallback(\n            None,  # iter\n            it.proxy.handle,  # proxy\n            ref.handle if ref is not None else ref,  # ref\n            reset_callback,  # reset\n            next_callback,  # next\n            args,  # config\n            ctypes.byref(handle),  # out\n        )\n        it.reraise()\n        # delay check_call to throw intermediate exception first\n        _check_call(ret)\n        self.handle = handle\n\n        if ref is not None:\n            self.ref = weakref.ref(ref)\n\n\nPlainObj = Callable[[np.ndarray, DMatrix], Tuple[np.ndarray, np.ndarray]]\nMetric = Callable[[np.ndarray, DMatrix], Tuple[str, float]]\n\n\ndef _configure_metrics(params: BoosterParam) -> BoosterParam:\n    if (\n        isinstance(params, dict)\n        and \"eval_metric\" in params\n        and isinstance(params[\"eval_metric\"], list)\n    ):\n        eval_metrics = params[\"eval_metric\"]\n        params.pop(\"eval_metric\", None)\n        params_list = list(params.items())\n        for eval_metric in eval_metrics:\n            params_list += [(\"eval_metric\", eval_metric)]\n        return params_list\n    return params\n\n\nclass Booster:\n    # pylint: disable=too-many-public-methods\n    \"\"\"A Booster of XGBoost.\n\n    Booster is the model of xgboost, that contains low level routines for\n    training, prediction and evaluation.\n    \"\"\"\n\n    def __init__(\n        self,\n        params: Optional[BoosterParam] = None,\n        cache: Optional[Sequence[DMatrix]] = None,\n        model_file: Optional[Union[\"Booster\", bytearray, os.PathLike, str]] = None,\n    ) -> None:\n        \"\"\"\n        Parameters\n        ----------\n        params :\n            Parameters for boosters.\n        cache :\n            List of cache items.\n        model_file :\n            Path to the model file if it's string or PathLike.\n        \"\"\"\n        cache = cache if cache is not None else []\n        for d in cache:\n            if not isinstance(d, DMatrix):\n                raise TypeError(f\"Invalid cache item: {type(d).__name__}\", cache)\n\n        dmats = c_array(ctypes.c_void_p, [d.handle for d in cache])\n        self.handle: Optional[ctypes.c_void_p] = ctypes.c_void_p()\n        _check_call(\n            _LIB.XGBoosterCreate(\n                dmats, c_bst_ulong(len(cache)), ctypes.byref(self.handle)\n            )\n        )\n        for d in cache:\n            # Validate feature only after the feature names are saved into booster.\n            self._assign_dmatrix_features(d)\n\n        if isinstance(model_file, Booster):\n            assert self.handle is not None\n            # We use the pickle interface for getting memory snapshot from\n            # another model, and load the snapshot with this booster.\n            state = model_file.__getstate__()\n            handle = state[\"handle\"]\n            del state[\"handle\"]\n            ptr = (ctypes.c_char * len(handle)).from_buffer(handle)\n            length = c_bst_ulong(len(handle))\n            _check_call(_LIB.XGBoosterUnserializeFromBuffer(self.handle, ptr, length))\n            self.__dict__.update(state)\n        elif isinstance(model_file, (str, os.PathLike, bytearray)):\n            self.load_model(model_file)\n        elif model_file is None:\n            pass\n        else:\n            raise TypeError(\"Unknown type:\", model_file)\n\n        params = params or {}\n        params_processed = _configure_metrics(params.copy())\n        params_processed = self._configure_constraints(params_processed)\n        if isinstance(params_processed, list):\n            params_processed.append((\"validate_parameters\", True))\n        else:\n            params_processed[\"validate_parameters\"] = True\n\n        self.set_param(params_processed or {})\n\n    def _transform_monotone_constrains(\n        self, value: Union[Dict[str, int], str, Tuple[int, ...]]\n    ) -> Union[Tuple[int, ...], str]:\n        if isinstance(value, str):\n            return value\n        if isinstance(value, tuple):\n            return value\n\n        constrained_features = set(value.keys())\n        feature_names = self.feature_names or []\n        if not constrained_features.issubset(set(feature_names)):\n            raise ValueError(\n                \"Constrained features are not a subset of training data feature names\"\n            )\n\n        return tuple(value.get(name, 0) for name in feature_names)\n\n    def _transform_interaction_constraints(\n        self, value: Union[Sequence[Sequence[str]], str]\n    ) -> Union[str, List[List[int]]]:\n        if isinstance(value, str):\n            return value\n        feature_idx_mapping = {\n            name: idx for idx, name in enumerate(self.feature_names or [])\n        }\n\n        try:\n            result = []\n            for constraint in value:\n                result.append(\n                    [feature_idx_mapping[feature_name] for feature_name in constraint]\n                )\n            return result\n        except KeyError as e:\n            raise ValueError(\n                \"Constrained features are not a subset of training data feature names\"\n            ) from e\n\n    def _configure_constraints(self, params: BoosterParam) -> BoosterParam:\n        if isinstance(params, dict):\n            # we must use list in the internal code as there can be multiple metrics\n            # with the same parameter name `eval_metric` (same key for dictionary).\n            params = list(params.items())\n        for idx, param in enumerate(params):\n            name, value = param\n            if value is None:\n                continue\n\n            if name == \"monotone_constraints\":\n                params[idx] = (name, self._transform_monotone_constrains(value))\n            elif name == \"interaction_constraints\":\n                params[idx] = (name, self._transform_interaction_constraints(value))\n\n        return params\n\n    def __del__(self) -> None:\n        if hasattr(self, \"handle\") and self.handle is not None:\n            _check_call(_LIB.XGBoosterFree(self.handle))\n            self.handle = None\n\n    def __getstate__(self) -> Dict:\n        # can't pickle ctypes pointers, put model content in a bytearray\n        this = self.__dict__.copy()\n        handle = this[\"handle\"]\n        if handle is not None:\n            length = c_bst_ulong()\n            cptr = ctypes.POINTER(ctypes.c_char)()\n            _check_call(\n                _LIB.XGBoosterSerializeToBuffer(\n                    self.handle, ctypes.byref(length), ctypes.byref(cptr)\n                )\n            )\n            buf = ctypes2buffer(cptr, length.value)\n            this[\"handle\"] = buf\n        return this\n\n    def __setstate__(self, state: Dict) -> None:\n        # reconstruct the handle from raw data\n        handle = state[\"handle\"]\n        if handle is not None:\n            buf = handle\n            dmats = c_array(ctypes.c_void_p, [])\n            handle = ctypes.c_void_p()\n            _check_call(\n                _LIB.XGBoosterCreate(dmats, c_bst_ulong(0), ctypes.byref(handle))\n            )\n            length = c_bst_ulong(len(buf))\n            ptr = (ctypes.c_char * len(buf)).from_buffer(buf)\n            _check_call(_LIB.XGBoosterUnserializeFromBuffer(handle, ptr, length))\n            state[\"handle\"] = handle\n        self.__dict__.update(state)\n\n    def __getitem__(self, val: Union[Integer, tuple, slice, EllipsisType]) -> \"Booster\":\n        \"\"\"Get a slice of the tree-based model. Attributes like `best_iteration` and\n        `best_score` are removed in the resulting booster.\n\n        .. versionadded:: 1.3.0\n\n        \"\"\"\n        # convert to slice for all other types\n        if isinstance(val, (np.integer, int)):\n            val = slice(int(val), int(val + 1))\n        if isinstance(val, EllipsisType):\n            val = slice(0, 0)\n        if isinstance(val, tuple):\n            raise ValueError(\"Only supports slicing through 1 dimension.\")\n        # All supported types are now slice\n        if not isinstance(val, slice):\n            msg = _expect((int, slice, np.integer, EllipsisType), type(val))\n            raise TypeError(msg)\n\n        if isinstance(val.start, EllipsisType) or val.start is None:\n            start = 0\n        else:\n            start = val.start\n        if isinstance(val.stop, EllipsisType) or val.stop is None:\n            stop = 0\n        else:\n            stop = val.stop\n            if stop < start:\n                raise ValueError(\"Invalid slice\", val)\n\n        step = val.step if val.step is not None else 1\n\n        c_start = ctypes.c_int(start)\n        c_stop = ctypes.c_int(stop)\n        c_step = ctypes.c_int(step)\n\n        sliced_handle = ctypes.c_void_p()\n        status = _LIB.XGBoosterSlice(\n            self.handle, c_start, c_stop, c_step, ctypes.byref(sliced_handle)\n        )\n        if status == -2:\n            raise IndexError(\"Layer index out of range\")\n        _check_call(status)\n\n        sliced = Booster()\n        _check_call(_LIB.XGBoosterFree(sliced.handle))\n        sliced.handle = sliced_handle\n        return sliced\n\n    def __iter__(self) -> Generator[\"Booster\", None, None]:\n        \"\"\"Iterator method for getting individual trees.\n\n        .. versionadded:: 2.0.0\n\n        \"\"\"\n        for i in range(0, self.num_boosted_rounds()):\n            yield self[i]\n\n    def save_config(self) -> str:\n        \"\"\"Output internal parameter configuration of Booster as a JSON\n        string.\n\n        .. versionadded:: 1.0.0\n\n        \"\"\"\n        json_string = ctypes.c_char_p()\n        length = c_bst_ulong()\n        _check_call(\n            _LIB.XGBoosterSaveJsonConfig(\n                self.handle, ctypes.byref(length), ctypes.byref(json_string)\n            )\n        )\n        assert json_string.value is not None\n        result = json_string.value.decode()  # pylint: disable=no-member\n        return result\n\n    def load_config(self, config: str) -> None:\n        \"\"\"Load configuration returned by `save_config`.\n\n        .. versionadded:: 1.0.0\n        \"\"\"\n        assert isinstance(config, str)\n        _check_call(_LIB.XGBoosterLoadJsonConfig(self.handle, c_str(config)))\n\n    def __copy__(self) -> \"Booster\":\n        return self.__deepcopy__(None)\n\n    def __deepcopy__(self, _: Any) -> \"Booster\":\n        \"\"\"Return a copy of booster.\"\"\"\n        return Booster(model_file=self)\n\n    def copy(self) -> \"Booster\":\n        \"\"\"Copy the booster object.\n\n        Returns\n        -------\n        booster :\n            A copied booster model\n        \"\"\"\n        return copy.copy(self)\n\n    def reset(self) -> \"Booster\":\n        \"\"\"Reset the booster object to release data caches used for training.\n\n        .. versionadded:: 3.0.0\n\n        \"\"\"\n        _check_call(_LIB.XGBoosterReset(self.handle))\n        return self\n\n    def attr(self, key: str) -> Optional[str]:\n        \"\"\"Get attribute string from the Booster.\n\n        Parameters\n        ----------\n        key :\n            The key to get attribute from.\n\n        Returns\n        -------\n        value :\n            The attribute value of the key, returns None if attribute do not exist.\n        \"\"\"\n        ret = ctypes.c_char_p()\n        success = ctypes.c_int()\n        _check_call(\n            _LIB.XGBoosterGetAttr(\n                self.handle, c_str(key), ctypes.byref(ret), ctypes.byref(success)\n            )\n        )\n        if success.value != 0:\n            value = ret.value\n            assert value\n            return py_str(value)\n        return None\n\n    def attributes(self) -> Dict[str, Optional[str]]:\n        \"\"\"Get attributes stored in the Booster as a dictionary.\n\n        Returns\n        -------\n        result : dictionary of  attribute_name: attribute_value pairs of strings.\n            Returns an empty dict if there's no attributes.\n        \"\"\"\n        length = c_bst_ulong()\n        sarr = ctypes.POINTER(ctypes.c_char_p)()\n        _check_call(\n            _LIB.XGBoosterGetAttrNames(\n                self.handle, ctypes.byref(length), ctypes.byref(sarr)\n            )\n        )\n        attr_names = from_cstr_to_pystr(sarr, length)\n        return {n: self.attr(n) for n in attr_names}\n\n    def set_attr(self, **kwargs: Optional[Any]) -> None:\n        \"\"\"Set the attribute of the Booster.\n\n        Parameters\n        ----------\n        **kwargs\n            The attributes to set. Setting a value to None deletes an attribute.\n        \"\"\"\n        for key, value in kwargs.items():\n            c_value = None\n            if value is not None:\n                c_value = c_str(str(value))\n            _check_call(_LIB.XGBoosterSetAttr(self.handle, c_str(key), c_value))\n\n    def _get_feature_info(self, field: str) -> Optional[FeatureInfo]:\n        length = c_bst_ulong()\n        sarr = ctypes.POINTER(ctypes.c_char_p)()\n        if not hasattr(self, \"handle\") or self.handle is None:\n            return None\n        _check_call(\n            _LIB.XGBoosterGetStrFeatureInfo(\n                self.handle,\n                c_str(field),\n                ctypes.byref(length),\n                ctypes.byref(sarr),\n            )\n        )\n        feature_info = from_cstr_to_pystr(sarr, length)\n        return feature_info if feature_info else None\n\n    def _set_feature_info(self, features: Optional[FeatureInfo], field: str) -> None:\n        if features is not None:\n            assert isinstance(features, list)\n            feature_info_bytes = [bytes(f, encoding=\"utf-8\") for f in features]\n            c_feature_info = (ctypes.c_char_p * len(feature_info_bytes))(\n                *feature_info_bytes\n            )\n            _check_call(\n                _LIB.XGBoosterSetStrFeatureInfo(\n                    self.handle,\n                    c_str(field),\n                    c_feature_info,\n                    c_bst_ulong(len(features)),\n                )\n            )\n        else:\n            _check_call(\n                _LIB.XGBoosterSetStrFeatureInfo(\n                    self.handle, c_str(field), None, c_bst_ulong(0)\n                )\n            )\n\n    @property\n    def feature_types(self) -> Optional[FeatureTypes]:\n        \"\"\"Feature types for this booster.  Can be directly set by input data or by\n        assignment.  See :py:class:`DMatrix` for details.\n\n        \"\"\"\n        return self._get_feature_info(\"feature_type\")\n\n    @feature_types.setter\n    def feature_types(self, features: Optional[FeatureTypes]) -> None:\n        self._set_feature_info(features, \"feature_type\")\n\n    @property\n    def feature_names(self) -> Optional[FeatureNames]:\n        \"\"\"Feature names for this booster.  Can be directly set by input data or by\n        assignment.\n\n        \"\"\"\n        return self._get_feature_info(\"feature_name\")\n\n    @feature_names.setter\n    def feature_names(self, features: Optional[FeatureNames]) -> None:\n        self._set_feature_info(features, \"feature_name\")\n\n    def get_categories(self, export_to_arrow: bool = False) -> Categories:\n        \"\"\"Same method as :py:meth:`DMatrix.get_categories`.\"\"\"\n\n        fnames = self.feature_names\n        n_features = self.num_features()\n        if fnames is None:\n            fnames = [str(i) for i in range(n_features)]\n\n        hdl = ctypes.c_void_p()\n        if export_to_arrow:\n            arrow_arrays = _get_categories(\n                lambda ret: _LIB.XGBoosterGetCategoriesExportToArrow(\n                    self.handle, None, ctypes.byref(hdl), ctypes.byref(ret)\n                ),\n                fnames,\n                n_features,\n            )\n        else:\n            arrow_arrays = None\n            _check_call(\n                _LIB.XGBoosterGetCategories(self.handle, None, ctypes.byref(hdl))\n            )\n\n        return Categories(\n            (hdl, lambda: _check_call(_LIB.XGBCategoriesFree(hdl))), arrow_arrays\n        )\n\n    def set_param(\n        self,\n        params: Union[Dict, Iterable[Tuple[str, Any]], str],\n        value: Optional[str] = None,\n    ) -> None:\n        \"\"\"Set parameters into the Booster.\n\n        Parameters\n        ----------\n        params :\n           list of key,value pairs, dict of key to value or simply str key\n        value :\n           value of the specified parameter, when params is str key\n        \"\"\"\n        if isinstance(params, Mapping):\n            params = params.items()\n        elif isinstance(params, str) and value is not None:\n            params = [(params, value)]\n        for key, val in cast(Iterable[Tuple[str, str]], params):\n            if isinstance(val, np.ndarray):\n                val = val.tolist()\n            elif hasattr(val, \"__cuda_array_interface__\") and hasattr(val, \"tolist\"):\n                val = val.tolist()\n            if val is not None:\n                _check_call(\n                    _LIB.XGBoosterSetParam(self.handle, c_str(key), c_str(str(val)))\n                )\n\n    def update(\n        self,\n        dtrain: DMatrix,\n        iteration: int,\n        fobj: Optional[PlainObj] = None,\n    ) -> None:\n        \"\"\"Update for one iteration, with objective function calculated\n        internally.\n\n        .. warning::\n\n            This function should not be called directly by users.\n\n        Parameters\n        ----------\n        dtrain :\n            Training data.\n        iteration :\n            The current training iteration.\n        fobj :\n            Custom objective function.\n\n        \"\"\"\n        if not isinstance(dtrain, DMatrix):\n            raise TypeError(f\"Invalid training matrix: {type(dtrain).__name__}\")\n        self._assign_dmatrix_features(dtrain)\n\n        if fobj is None:\n            _check_call(\n                _LIB.XGBoosterUpdateOneIter(\n                    self.handle, ctypes.c_int(iteration), dtrain.handle\n                )\n            )\n            return\n\n        # Forward the gradient calculation to the boost method.\n        self.boost(\n            dtrain,\n            iteration=iteration,\n            fobj=fobj,\n        )\n\n    def boost(\n        self,\n        dtrain: DMatrix,\n        iteration: int,\n        *,\n        grad: Optional[NumpyOrCupy] = None,\n        hess: Optional[NumpyOrCupy] = None,\n        fobj: Optional[PlainObj] = None,\n    ) -> None:\n        \"\"\"Boost the booster for one iteration with customized gradient statistics.\n\n        .. warning::\n\n            Like :py:meth:`.update`, this function should not be called directly by\n            users.\n\n        Parameters\n        ----------\n        dtrain :\n            The training DMatrix.\n        iteration :\n            The current training iteration.\n        grad :\n            The first order of gradient.\n        hess :\n            The second order of gradient.\n        fobj :\n            A custom objective function. If gradient is None, then an objective function\n            is required.\n\n        \"\"\"\n        self._assign_dmatrix_features(dtrain)\n\n        if all(arg is not None for arg in (grad, hess, fobj)):\n            raise ValueError(\n                \"Provide either the objective, or the gradient and hessian, not both.\"\n            )\n        n_samples = dtrain.num_row()\n\n        def train_one_iter(grad: NumpyOrCupy, hess: NumpyOrCupy) -> None:\n            _check_call(\n                _LIB.XGBoosterTrainOneIter(\n                    self.handle,\n                    dtrain.handle,\n                    iteration,\n                    _grad_arrinf(grad, n_samples),\n                    _grad_arrinf(hess, n_samples),\n                )\n            )\n\n        if grad is not None or hess is not None:\n            # Handle the case where gradient is directly provided for compatibility with\n            # XGBoost < 3.2\n            train_one_iter(grad, hess)\n            return\n\n        if fobj is None:\n            raise ValueError(\n                \"Invalid input for the boost function. Either the gradient or \"\n                \"the objective should have a valid value.\"\n            )\n\n        y_pred = self.predict(dtrain, output_margin=True, training=True)\n\n        vgrad: Optional[ArrayLike]\n        vhess: Optional[ArrayLike]\n\n        if isinstance(fobj, TreeObjective):\n            # full gradient for leaf values\n            vgrad, vhess = fobj(iteration, y_pred, dtrain)\n            # Reduced gradient for split nodes\n            split_grad = fobj.split_grad(iteration, vgrad, vhess)\n            # Switch the role of gradient if there's no split gradient but the tree\n            # objective is used.\n            if split_grad is not None:\n                sgrad, shess = split_grad\n            else:\n                sgrad, shess = vgrad, vhess\n                vgrad, vhess = None, None\n        elif isinstance(fobj, Objective):\n            sgrad, shess = fobj(iteration, y_pred, dtrain)\n            vgrad, vhess = None, None\n        else:\n            # Plain callable\n            sgrad, shess = fobj(y_pred, dtrain)\n            vgrad, vhess = None, None\n\n        if vgrad is None:\n            train_one_iter(sgrad, shess)\n            return\n\n        _check_call(\n            _LIB.XGBoosterTrainOneIterWithSplitGrad(\n                self.handle,\n                dtrain.handle,\n                iteration,\n                _grad_arrinf(sgrad, n_samples),\n                _grad_arrinf(shess, n_samples),\n                _grad_arrinf(vgrad, n_samples),\n                _grad_arrinf(vhess, n_samples),\n            )\n        )\n\n    def eval_set(\n        self,\n        evals: Sequence[Tuple[DMatrix, str]],\n        iteration: int = 0,\n        feval: Optional[Metric] = None,\n        output_margin: bool = True,\n    ) -> str:\n        \"\"\"Evaluate a set of data.\n\n        Parameters\n        ----------\n        evals :\n            List of items to be evaluated.\n        iteration :\n            Current iteration.\n        feval :\n            Custom evaluation function.\n\n        Returns\n        -------\n        result: str\n            Evaluation result string.\n        \"\"\"\n        for d in evals:\n            if not isinstance(d[0], DMatrix):\n                raise TypeError(f\"expected DMatrix, got {type(d[0]).__name__}\")\n            if not isinstance(d[1], str):\n                raise TypeError(f\"expected string, got {type(d[1]).__name__}\")\n            self._assign_dmatrix_features(d[0])\n\n        dmats = c_array(ctypes.c_void_p, [d[0].handle for d in evals])\n        evnames = c_array(ctypes.c_char_p, [c_str(d[1]) for d in evals])\n        msg = ctypes.c_char_p()\n        _check_call(\n            _LIB.XGBoosterEvalOneIter(\n                self.handle,\n                ctypes.c_int(iteration),\n                dmats,\n                evnames,\n                c_bst_ulong(len(evals)),\n                ctypes.byref(msg),\n            )\n        )\n        assert msg.value is not None\n        res = msg.value.decode()  # pylint: disable=no-member\n        if feval is not None:\n            for dmat, evname in evals:\n                feval_ret = feval(\n                    self.predict(dmat, training=False, output_margin=output_margin),\n                    dmat,\n                )\n                if isinstance(feval_ret, list):\n                    for name, val in feval_ret:\n                        # pylint: disable=consider-using-f-string\n                        res += \"\\t%s-%s:%f\" % (evname, name, val)\n                else:\n                    name, val = feval_ret\n                    # pylint: disable=consider-using-f-string\n                    res += \"\\t%s-%s:%f\" % (evname, name, val)\n        return res\n\n    def eval(self, data: DMatrix, name: str = \"eval\", iteration: int = 0) -> str:\n        \"\"\"Evaluate the model on mat.\n\n        Parameters\n        ----------\n        data :\n            The dmatrix storing the input.\n\n        name :\n            The name of the dataset.\n\n        iteration :\n            The current iteration number.\n\n        Returns\n        -------\n        result: str\n            Evaluation result string.\n        \"\"\"\n        self._assign_dmatrix_features(data)\n        return self.eval_set([(data, name)], iteration)\n\n    # pylint: disable=too-many-function-args\n    @_deprecate_positional_args\n    def predict(\n        self,\n        data: DMatrix,\n        *,\n        output_margin: bool = False,\n        pred_leaf: bool = False,\n        pred_contribs: bool = False,\n        approx_contribs: bool = False,\n        pred_interactions: bool = False,\n        validate_features: bool = True,\n        training: bool = False,\n        iteration_range: IterationRange = (0, 0),\n        strict_shape: bool = False,\n    ) -> np.ndarray:\n        \"\"\"Predict with data.  The full model will be used unless `iteration_range` is\n        specified, meaning users have to either slice the model or use the\n        ``best_iteration`` attribute to get prediction from best model returned from\n        early stopping.\n\n        .. note::\n\n            See :doc:`Prediction </prediction>` for issues like thread safety and a\n            summary of outputs from this function.\n\n        Parameters\n        ----------\n        data :\n            The dmatrix storing the input.\n\n        output_margin :\n            Whether to output the raw untransformed margin value.\n\n        pred_leaf :\n            When this option is on, the output will be a matrix of (nsample,\n            ntrees) with each record indicating the predicted leaf index of\n            each sample in each tree.  Note that the leaf index of a tree is\n            unique per tree, so you may find leaf 1 in both tree 1 and tree 0.\n\n        pred_contribs :\n            When this is True the output will be a matrix of size (nsample,\n            nfeats + 1) with each record indicating the feature contributions\n            (SHAP values) for that prediction. The sum of all feature\n            contributions is equal to the raw untransformed margin value of the\n            prediction. Note the final column is the bias term.\n\n        approx_contribs :\n            Approximate the contributions of each feature.  Used when ``pred_contribs``\n            or ``pred_interactions`` is set to True.  Changing the default of this\n            parameter (False) is not recommended.\n\n        pred_interactions :\n            When this is True the output will be a matrix of size (nsample,\n            nfeats + 1, nfeats + 1) indicating the SHAP interaction values for\n            each pair of features. The sum of each row (or column) of the\n            interaction values equals the corresponding SHAP value (from\n            pred_contribs), and the sum of the entire matrix equals the raw\n            untransformed margin value of the prediction. Note the last row and\n            column correspond to the bias term.\n\n        validate_features :\n            When this is True, validate that the Booster's and data's\n            feature_names are identical.  Otherwise, it is assumed that the\n            feature_names are the same.\n\n        training :\n            Whether the prediction value is used for training.  This can effect `dart`\n            booster, which performs dropouts during training iterations but use all\n            trees for inference. If you want to obtain result with dropouts, set this\n            parameter to `True`.  Also, the parameter is set to true when obtaining\n            prediction for custom objective function.\n\n            .. versionadded:: 1.0.0\n\n        iteration_range :\n            Specifies which layer of trees are used in prediction.  For example, if a\n            random forest is trained with 100 rounds.  Specifying `iteration_range=(10,\n            20)`, then only the forests built during [10, 20) (half open set) rounds are\n            used in this prediction.\n\n            .. versionadded:: 1.4.0\n\n        strict_shape :\n            When set to True, output shape is invariant to whether classification is\n            used.  For both value and margin prediction, the output shape is (n_samples,\n            n_groups), n_groups == 1 when multi-class is not used.  Default to False, in\n            which case the output shape can be (n_samples, ) if multi-class is not used.\n\n            .. versionadded:: 1.4.0\n\n        Returns\n        -------\n        prediction : numpy array\n\n        \"\"\"\n        if not isinstance(data, DMatrix):\n            raise TypeError(\"Expecting data to be a DMatrix object, got: \", type(data))\n        if validate_features:\n            fn = data.feature_names\n            self._validate_features(fn)\n        args = {\n            \"type\": 0,\n            \"training\": training,\n            \"iteration_begin\": int(iteration_range[0]),\n            \"iteration_end\": int(iteration_range[1]),\n            \"strict_shape\": strict_shape,\n        }\n\n        def assign_type(t: int) -> None:\n            if args[\"type\"] != 0:\n                raise ValueError(\"One type of prediction at a time.\")\n            args[\"type\"] = t\n\n        if output_margin:\n            assign_type(1)\n        if pred_contribs:\n            assign_type(2 if not approx_contribs else 3)\n        if pred_interactions:\n            assign_type(4 if not approx_contribs else 5)\n        if pred_leaf:\n            assign_type(6)\n        preds = ctypes.POINTER(ctypes.c_float)()\n        shape = ctypes.POINTER(c_bst_ulong)()\n        dims = c_bst_ulong()\n        _check_call(\n            _LIB.XGBoosterPredictFromDMatrix(\n                self.handle,\n                data.handle,\n                from_pystr_to_cstr(json.dumps(args)),\n                ctypes.byref(shape),\n                ctypes.byref(dims),\n                ctypes.byref(preds),\n            )\n        )\n        return _prediction_output(shape, dims, preds, False)\n\n    # pylint: disable=too-many-statements\n    @_deprecate_positional_args\n    def inplace_predict(\n        self,\n        data: DataType,\n        *,\n        iteration_range: IterationRange = (0, 0),\n        predict_type: str = \"value\",\n        missing: float = np.nan,\n        validate_features: bool = True,\n        base_margin: Any = None,\n        strict_shape: bool = False,\n    ) -> NumpyOrCupy:\n        \"\"\"Run prediction in-place when possible, Unlike :py:meth:`predict` method,\n        inplace prediction does not cache the prediction result.\n\n        Calling only ``inplace_predict`` in multiple threads is safe and lock\n        free.  But the safety does not hold when used in conjunction with other\n        methods. E.g. you can't train the booster in one thread and perform\n        prediction in the other.\n\n        .. note::\n\n            If the device ordinal of the input data doesn't match the one configured for\n            the booster, data will be copied to the booster device.\n\n        .. code-block:: python\n\n            booster.set_param({\"device\": \"cuda:0\"})\n            booster.inplace_predict(cupy_array)\n\n            booster.set_param({\"device\": \"cpu\"})\n            booster.inplace_predict(numpy_array)\n\n        .. versionadded:: 1.1.0\n\n        Parameters\n        ----------\n        data :\n            The input data.\n        iteration_range :\n            See :py:meth:`predict` for details.\n        predict_type :\n            * `value` Output model prediction values.\n            * `margin` Output the raw untransformed margin value.\n        missing :\n            See :py:obj:`xgboost.DMatrix` for details.\n        validate_features:\n            See :py:meth:`xgboost.Booster.predict` for details.\n        base_margin:\n            See :py:obj:`xgboost.DMatrix` for details.\n\n            .. versionadded:: 1.4.0\n\n        strict_shape:\n            See :py:meth:`xgboost.Booster.predict` for details.\n\n            .. versionadded:: 1.4.0\n\n        Returns\n        -------\n        prediction : numpy.ndarray/cupy.ndarray\n            The prediction result.  When input data is on GPU, prediction result is\n            stored in a cupy array.\n\n        \"\"\"\n        preds = ctypes.POINTER(ctypes.c_float)()\n\n        # once caching is supported, we can pass id(data) as cache id.\n        args = make_jcargs(\n            type=1 if predict_type == \"margin\" else 0,\n            training=False,\n            iteration_begin=int(iteration_range[0]),\n            iteration_end=int(iteration_range[1]),\n            missing=missing,\n            strict_shape=strict_shape,\n            cache_id=0,\n        )\n        shape = ctypes.POINTER(c_bst_ulong)()\n        dims = c_bst_ulong()\n\n        if base_margin is not None:\n            proxy: Optional[_ProxyDMatrix] = _ProxyDMatrix()\n            assert proxy is not None\n            proxy.set_info(base_margin=base_margin)\n            p_handle = proxy.handle\n        else:\n            proxy = None\n            p_handle = ctypes.c_void_p()\n        assert proxy is None or isinstance(proxy, _ProxyDMatrix)\n\n        from .data import (\n            ArrowTransformed,\n            PandasTransformed,\n            _is_arrow,\n            _is_cudf_df,\n            _is_cudf_pandas,\n            _is_list,\n            _is_np_array_like,\n            _is_pandas_df,\n            _is_pandas_series,\n            _is_polars,\n            _is_polars_series,\n            _is_tuple,\n            _transform_arrow_table,\n            _transform_pandas_df,\n            _transform_polars_df,\n        )\n\n        if _is_cudf_pandas(data):\n            data = data._fsproxy_fast  # pylint: disable=protected-access\n\n        enable_categorical = True\n        if _is_arrow(data):\n            data, fns, _ = _transform_arrow_table(data, enable_categorical, None, None)\n        if _is_polars_series(data):\n            pl = import_polars()\n            data = pl.DataFrame({data.name: data})\n        if _is_polars(data):\n            data, fns, _ = _transform_polars_df(data, enable_categorical, None, None)\n        if _is_pandas_series(data):\n            import pandas as pd\n\n            data = pd.DataFrame(data)\n        if _is_pandas_df(data):\n            data, fns, _ = _transform_pandas_df(data, enable_categorical)\n            if validate_features:\n                self._validate_features(fns)\n        if _is_list(data) or _is_tuple(data):\n            data = np.array(data)\n\n        if validate_features:\n            if not hasattr(data, \"shape\"):\n                raise TypeError(\n                    \"`shape` attribute is required when `validate_features` is True\"\n                    f\", got: {type(data)}\"\n                )\n            if len(data.shape) != 1 and self.num_features() != data.shape[1]:\n                raise ValueError(\n                    f\"Feature shape mismatch, expected: {self.num_features()}, \"\n                    f\"got {data.shape[1]}\"\n                )\n\n        if _is_np_array_like(data):\n            data, _ = _ensure_np_dtype(data, data.dtype)\n            _check_call(\n                _LIB.XGBoosterPredictFromDense(\n                    self.handle,\n                    array_interface(data),\n                    args,\n                    p_handle,\n                    ctypes.byref(shape),\n                    ctypes.byref(dims),\n                    ctypes.byref(preds),\n                )\n            )\n            return _prediction_output(shape, dims, preds, False)\n        if isinstance(data, (ArrowTransformed, PandasTransformed)):\n            _check_call(\n                _LIB.XGBoosterPredictFromColumnar(\n                    self.handle,\n                    data.array_interface(),\n                    args,\n                    p_handle,\n                    ctypes.byref(shape),\n                    ctypes.byref(dims),\n                    ctypes.byref(preds),\n                )\n            )\n            return _prediction_output(shape, dims, preds, False)\n        if isinstance(data, scipy.sparse.csr_matrix):\n            from .data import transform_scipy_sparse\n\n            data = transform_scipy_sparse(data, True)\n            _check_call(\n                _LIB.XGBoosterPredictFromCSR(\n                    self.handle,\n                    array_interface(data.indptr),\n                    array_interface(data.indices),\n                    array_interface(data.data),\n                    c_bst_ulong(data.shape[1]),\n                    args,\n                    p_handle,\n                    ctypes.byref(shape),\n                    ctypes.byref(dims),\n                    ctypes.byref(preds),\n                )\n            )\n            return _prediction_output(shape, dims, preds, False)\n        if _is_cupy_alike(data):\n            from .data import _transform_cupy_array\n\n            data = _transform_cupy_array(data)\n            interface_str = cuda_array_interface(data)\n            _check_call(\n                _LIB.XGBoosterPredictFromCudaArray(\n                    self.handle,\n                    interface_str,\n                    args,\n                    p_handle,\n                    ctypes.byref(shape),\n                    ctypes.byref(dims),\n                    ctypes.byref(preds),\n                )\n            )\n            return _prediction_output(shape, dims, preds, True)\n        if _is_cudf_df(data):\n            from .data import _transform_cudf_df\n\n            df, fns, _ = _transform_cudf_df(data, None, None, enable_categorical)\n            if validate_features:\n                self._validate_features(fns)\n            _check_call(\n                _LIB.XGBoosterPredictFromCudaColumnar(\n                    self.handle,\n                    df.array_interface(),\n                    args,\n                    p_handle,\n                    ctypes.byref(shape),\n                    ctypes.byref(dims),\n                    ctypes.byref(preds),\n                )\n            )\n            return _prediction_output(shape, dims, preds, True)\n\n        raise TypeError(\n            \"Data type:\" + str(type(data)) + \" not supported by inplace prediction.\"\n        )\n\n    def save_model(self, fname: PathLike) -> None:\n        \"\"\"Save the model to a file.\n\n        The model is saved in an XGBoost internal format which is universal among the\n        various XGBoost interfaces. Auxiliary attributes of the Python Booster object\n        (such as feature_names) are only saved when using JSON or UBJSON (default)\n        format. Also, parameters that are not part of the model (like metrics,\n        `max_depth`, etc) are not saved, see :doc:`Model IO </tutorials/saving_model>`\n        for more info.\n\n        .. code-block:: python\n\n          model.save_model(\"model.json\")\n          # or\n          model.save_model(\"model.ubj\")\n\n        Parameters\n        ----------\n        fname :\n            Output file name\n\n        \"\"\"\n        if isinstance(fname, (str, os.PathLike)):  # assume file name\n            fname = os.fspath(os.path.expanduser(fname))\n            _check_call(_LIB.XGBoosterSaveModel(self.handle, c_str(fname)))\n        else:\n            raise TypeError(\"fname must be a string or os PathLike\")\n\n    def save_raw(self, raw_format: str = \"ubj\") -> bytearray:\n        \"\"\"Save the model to a in memory buffer representation instead of file.\n\n        The model is saved in an XGBoost internal format which is universal among the\n        various XGBoost interfaces. Auxiliary attributes of the Python Booster object\n        (such as feature_names) are only saved when using JSON or UBJSON (default)\n        format. Also, parameters that are not part of the model (like metrics,\n        `max_depth`, etc) are not saved, see :doc:`Model IO </tutorials/saving_model>`\n        for more info.\n\n        Parameters\n        ----------\n        raw_format :\n            Format of output buffer. Can be `json` or `ubj`.\n\n        Returns\n        -------\n        An in memory buffer representation of the model\n\n        \"\"\"\n        length = c_bst_ulong()\n        cptr = ctypes.POINTER(ctypes.c_char)()\n        config = make_jcargs(format=raw_format)\n        _check_call(\n            _LIB.XGBoosterSaveModelToBuffer(\n                self.handle, config, ctypes.byref(length), ctypes.byref(cptr)\n            )\n        )\n        return ctypes2buffer(cptr, length.value)\n\n    def load_model(self, fname: ModelIn) -> None:\n        \"\"\"Load the model from a file or a bytearray.\n\n        The model is saved in an XGBoost internal format which is universal among the\n        various XGBoost interfaces. Auxiliary attributes of the Python Booster object\n        (such as feature_names) are only saved when using JSON or UBJSON (default)\n        format. Also, parameters that are not part of the model (like metrics,\n        `max_depth`, etc) are not saved, see :doc:`Model IO </tutorials/saving_model>`\n        for more info.\n\n        .. code-block:: python\n\n          model.save_model(\"model.json\")\n          model.load_model(\"model.json\")\n\n          # or\n          model.save_model(\"model.ubj\")\n          model.load_model(\"model.ubj\")\n\n          # or\n          buf = model.save_raw()\n          model.load_model(buf)\n\n        Parameters\n        ----------\n        fname :\n            Input file name or memory buffer(see also save_raw)\n\n        \"\"\"\n\n        def is_pathlike(path: ModelIn) -> TypeGuard[os.PathLike[str]]:\n            return isinstance(path, os.PathLike)\n\n        if isinstance(fname, str) or is_pathlike(fname):\n            # assume file name, cannot use os.path.exist to check, file can be from URL.\n            fname = os.fspath(os.path.expanduser(fname))\n            _check_call(_LIB.XGBoosterLoadModel(self.handle, c_str(fname)))\n        elif isinstance(fname, bytearray):\n            buf = fname\n            length = c_bst_ulong(len(buf))\n            ptr = (ctypes.c_char * len(buf)).from_buffer(buf)\n            _check_call(_LIB.XGBoosterLoadModelFromBuffer(self.handle, ptr, length))\n        else:\n            raise TypeError(\"Unknown file type: \", fname)\n\n    @property\n    def best_iteration(self) -> int:\n        \"\"\"The best iteration during training.\"\"\"\n        best = self.attr(\"best_iteration\")\n        if best is not None:\n            return int(best)\n\n        raise AttributeError(\n            \"`best_iteration` is only defined when early stopping is used.\"\n        )\n\n    @best_iteration.setter\n    def best_iteration(self, iteration: int) -> None:\n        self.set_attr(best_iteration=iteration)\n\n    @property\n    def best_score(self) -> float:\n        \"\"\"The best evaluation score during training.\"\"\"\n        best = self.attr(\"best_score\")\n        if best is not None:\n            return float(best)\n\n        raise AttributeError(\n            \"`best_score` is only defined when early stopping is used.\"\n        )\n\n    @best_score.setter\n    def best_score(self, score: int) -> None:\n        self.set_attr(best_score=score)\n\n    def num_boosted_rounds(self) -> int:\n        \"\"\"Get number of boosted rounds.  For gblinear this is reset to 0 after\n        serializing the model.\n\n        \"\"\"\n        rounds = ctypes.c_int()\n        assert self.handle is not None\n        _check_call(_LIB.XGBoosterBoostedRounds(self.handle, ctypes.byref(rounds)))\n        return rounds.value\n\n    def num_features(self) -> int:\n        \"\"\"Number of features in booster.\"\"\"\n        features = c_bst_ulong()\n        assert self.handle is not None\n        _check_call(_LIB.XGBoosterGetNumFeature(self.handle, ctypes.byref(features)))\n        return features.value\n\n    def dump_model(\n        self,\n        fout: PathLike,\n        fmap: PathLike = \"\",\n        with_stats: bool = False,\n        dump_format: str = \"text\",\n    ) -> None:\n        \"\"\"Dump model into a text or JSON file.  Unlike :py:meth:`save_model`, the\n        output format is primarily used for visualization or interpretation,\n        hence it's more human readable but cannot be loaded back to XGBoost.\n\n        Parameters\n        ----------\n        fout :\n            Output file name.\n        fmap :\n            Name of the file containing feature map names.\n        with_stats :\n            Controls whether the split statistics are output.\n        dump_format :\n            Format of model dump file. Can be 'text' or 'json'.\n        \"\"\"\n        if isinstance(fout, (str, os.PathLike)):\n            fout = os.fspath(os.path.expanduser(fout))\n            # pylint: disable=consider-using-with\n            fout_obj = open(fout, \"w\", encoding=\"utf-8\")\n            need_close = True\n        else:\n            fout_obj = fout\n            need_close = False\n        ret = self.get_dump(fmap, with_stats, dump_format)\n        if dump_format == \"json\":\n            fout_obj.write(\"[\\n\")\n            for i, val in enumerate(ret):\n                fout_obj.write(val)\n                if i < len(ret) - 1:\n                    fout_obj.write(\",\\n\")\n            fout_obj.write(\"\\n]\")\n        else:\n            for i, val in enumerate(ret):\n                fout_obj.write(f\"booster[{i}]:\\n\")\n                fout_obj.write(val)\n        if need_close:\n            fout_obj.close()\n\n    def get_dump(\n        self,\n        fmap: PathLike = \"\",\n        with_stats: bool = False,\n        dump_format: str = \"text\",\n    ) -> List[str]:\n        \"\"\"Returns the model dump as a list of strings.  Unlike :py:meth:`save_model`,\n        the output format is primarily used for visualization or interpretation, hence\n        it's more human readable but cannot be loaded back to XGBoost.\n\n        Parameters\n        ----------\n        fmap :\n            Name of the file containing feature map names.\n        with_stats :\n            Controls whether the split statistics should be included.\n        dump_format :\n            Format of model dump. Can be 'text', 'json' or 'dot'.\n\n        \"\"\"\n        fmap = os.fspath(os.path.expanduser(fmap))\n        length = c_bst_ulong()\n        sarr = ctypes.POINTER(ctypes.c_char_p)()\n        _check_call(\n            _LIB.XGBoosterDumpModelEx(\n                self.handle,\n                c_str(fmap),\n                ctypes.c_int(with_stats),\n                c_str(dump_format),\n                ctypes.byref(length),\n                ctypes.byref(sarr),\n            )\n        )\n        res = from_cstr_to_pystr(sarr, length)\n        return res\n\n    def get_fscore(self, fmap: PathLike = \"\") -> Dict[str, Union[float, List[float]]]:\n        \"\"\"Get feature importance of each feature.\n\n        .. note:: Zero-importance features will not be included\n\n           Keep in mind that this function does not include zero-importance feature,\n           i.e.  those features that have not been used in any split conditions.\n\n        Parameters\n        ----------\n        fmap :\n           The name of feature map file\n        \"\"\"\n\n        return self.get_score(fmap, importance_type=\"weight\")\n\n    def get_score(\n        self, fmap: PathLike = \"\", importance_type: str = \"weight\"\n    ) -> Dict[str, Union[float, List[float]]]:\n        \"\"\"Get feature importance of each feature.\n        For tree model Importance type can be defined as:\n\n        * 'weight': the number of times a feature is used to split the data across all\n           trees.\n        * 'gain': the average gain across all splits the feature is used in.\n        * 'cover': the average coverage across all splits the feature is used in.\n        * 'total_gain': the total gain across all splits the feature is used in.\n        * 'total_cover': the total coverage across all splits the feature is used in.\n\n        .. note::\n\n           For linear model, only \"weight\" is defined and it's the normalized\n           coefficients without bias.\n\n        .. note:: Zero-importance features will not be included\n\n           Keep in mind that this function does not include zero-importance feature,\n           i.e.  those features that have not been used in any split conditions.\n\n        Parameters\n        ----------\n        fmap :\n           The name of feature map file.\n        importance_type :\n            One of the importance types defined above.\n\n        Returns\n        -------\n        A map between feature names and their scores.  When `gblinear` is used for\n        multi-class classification the scores for each feature is a list with length\n        `n_classes`, otherwise they're scalars.\n        \"\"\"\n        fmap = os.fspath(os.path.expanduser(fmap))\n        features = ctypes.POINTER(ctypes.c_char_p)()\n        scores = ctypes.POINTER(ctypes.c_float)()\n        n_out_features = c_bst_ulong()\n        out_dim = c_bst_ulong()\n        shape = ctypes.POINTER(c_bst_ulong)()\n\n        _check_call(\n            _LIB.XGBoosterFeatureScore(\n                self.handle,\n                make_jcargs(importance_type=importance_type, feature_map=fmap),\n                ctypes.byref(n_out_features),\n                ctypes.byref(features),\n                ctypes.byref(out_dim),\n                ctypes.byref(shape),\n                ctypes.byref(scores),\n            )\n        )\n        features_arr = from_cstr_to_pystr(features, n_out_features)\n        scores_arr = _prediction_output(shape, out_dim, scores, False)\n\n        results: Dict[str, Union[float, List[float]]] = {}\n        if len(scores_arr.shape) > 1 and scores_arr.shape[1] > 1:\n            for feat, score in zip(features_arr, scores_arr):\n                results[feat] = [float(s) for s in score]\n        else:\n            for feat, score in zip(features_arr, scores_arr):\n                results[feat] = float(score)\n        return results\n\n    # pylint: disable=too-many-statements\n    def trees_to_dataframe(self, fmap: PathLike = \"\") -> \"PdDataFrame\":\n        \"\"\"Parse a boosted tree model text dump into a pandas DataFrame structure.\n\n        This feature is only defined when the decision tree model is chosen as base\n        learner (`booster in {gbtree, dart}`). It is not defined for other base learner\n        types, such as linear learners (`booster=gblinear`).\n\n        Parameters\n        ----------\n        fmap :\n           The name of feature map file.\n        \"\"\"\n        # pylint: disable=too-many-locals\n        from pandas import DataFrame\n\n        fmap = os.fspath(os.path.expanduser(fmap))\n        if not is_pandas_available():\n            raise ImportError(\n                (\n                    \"pandas must be available to use this method.\"\n                    \"Install pandas before calling again.\"\n                )\n            )\n        booster = json.loads(self.save_config())[\"learner\"][\"gradient_booster\"][\"name\"]\n        if booster not in {\"gbtree\", \"dart\"}:\n            raise ValueError(f\"This method is not defined for Booster type {booster}\")\n\n        tree_ids = []\n        node_ids = []\n        fids = []\n        splits: List[Union[float, str]] = []\n        categories: List[Union[Optional[float], List[str]]] = []\n        y_directs: List[Union[float, str]] = []\n        n_directs: List[Union[float, str]] = []\n        missings: List[Union[float, str]] = []\n        gains = []\n        covers = []\n\n        trees = self.get_dump(fmap, with_stats=True)\n        for i, tree in enumerate(trees):\n            for line in tree.split(\"\\n\"):\n                arr = line.split(\"[\")\n                # Leaf node\n                if len(arr) == 1:\n                    # Last element of line.split is an empty string\n                    if arr == [\"\"]:\n                        continue\n                    # parse string\n                    parse = arr[0].split(\":\")\n                    stats = re.split(\"=|,\", parse[1])\n\n                    # append to lists\n                    tree_ids.append(i)\n                    node_ids.append(int(re.findall(r\"\\b\\d+\\b\", parse[0])[0]))\n                    fids.append(\"Leaf\")\n                    splits.append(float(\"NAN\"))\n                    categories.append(float(\"NAN\"))\n                    y_directs.append(float(\"NAN\"))\n                    n_directs.append(float(\"NAN\"))\n                    missings.append(float(\"NAN\"))\n                    gains.append(float(stats[1]))\n                    covers.append(float(stats[3]))\n                # Not a Leaf Node\n                else:\n                    # parse string\n                    fid = arr[1].split(\"]\")\n                    if fid[0].find(\"<\") != -1:\n                        # numerical\n                        parse = fid[0].split(\"<\")\n                        splits.append(float(parse[1]))\n                        categories.append(None)\n                    elif fid[0].find(\":{\") != -1:\n                        # categorical\n                        parse = fid[0].split(\":\")\n                        cats = parse[1][1:-1]  # strip the {}\n                        cats_split = cats.split(\",\")\n                        splits.append(float(\"NAN\"))\n                        categories.append(cats_split if cats_split else None)\n                    else:\n                        # indicator (boolean) feature: format is\n                        #   {nid}:[{fname}] yes={yes},no={no}\n                        # No split threshold or missing direction.\n                        bracket_expr = fid[0]\n                        remainder = fid[1] if len(fid) > 1 else \"\"\n                        if (\n                            \"<\" in bracket_expr\n                            or \":{\" in bracket_expr\n                            or \"yes=\" not in remainder\n                            or \"no=\" not in remainder\n                        ):\n                            raise ValueError(\n                                f\"Unrecognized split format: [{bracket_expr}]{remainder}\"\n                            )\n                        parse = [bracket_expr]\n                        splits.append(float(\"NAN\"))\n                        categories.append(None)\n                    stats = re.split(\"=|,\", fid[1])\n\n                    # append to lists\n                    tree_ids.append(i)\n                    node_ids.append(int(re.findall(r\"\\b\\d+\\b\", arr[0])[0]))\n                    fids.append(parse[0])\n                    str_i = str(i)\n                    y_directs.append(str_i + \"-\" + stats[1])\n                    n_directs.append(str_i + \"-\" + stats[3])\n                    # Indicator nodes have no explicit missing= field;\n                    # the default (missing) child is the \"no\" direction.\n                    if len(stats) > 5 and stats[4] == \"missing\":\n                        missings.append(str_i + \"-\" + stats[5])\n                        gains.append(float(stats[7]))\n                        covers.append(float(stats[9]))\n                    else:\n                        missings.append(str_i + \"-\" + stats[3])\n                        gains.append(float(stats[5]))\n                        covers.append(float(stats[7]))\n\n        ids = [str(t_id) + \"-\" + str(n_id) for t_id, n_id in zip(tree_ids, node_ids)]\n        df = DataFrame(\n            {\n                \"Tree\": tree_ids,\n                \"Node\": node_ids,\n                \"ID\": ids,\n                \"Feature\": fids,\n                \"Split\": splits,\n                \"Yes\": y_directs,\n                \"No\": n_directs,\n                \"Missing\": missings,\n                \"Gain\": gains,\n                \"Cover\": covers,\n                \"Category\": categories,\n            }\n        )\n\n        return df.sort_values([\"Tree\", \"Node\"]).reset_index(drop=True)\n\n    def _assign_dmatrix_features(self, data: DMatrix) -> None:\n        if data.num_row() == 0:\n            return\n\n        fn = data.feature_names\n        ft = data.feature_types\n\n        if self.feature_names is None:\n            self.feature_names = fn\n        if self.feature_types is None:\n            self.feature_types = ft\n\n        self._validate_features(fn)\n\n    def _validate_features(self, feature_names: Optional[FeatureNames]) -> None:\n        if self.feature_names is None:\n            return\n\n        if feature_names is None and self.feature_names is not None:\n            raise ValueError(\n                \"data did not contain feature names, but the following fields are expected: \"\n                + \", \".join(self.feature_names)\n            )\n\n        if self.feature_names != feature_names:\n            dat_missing = set(cast(FeatureNames, self.feature_names)) - set(\n                cast(FeatureNames, feature_names)\n            )\n            my_missing = set(cast(FeatureNames, feature_names)) - set(\n                cast(FeatureNames, self.feature_names)\n            )\n\n            msg = \"feature_names mismatch: {0} {1}\"\n\n            if dat_missing:\n                msg += (\n                    \"\\nexpected \"\n                    + \", \".join(str(s) for s in dat_missing)\n                    + \" in input data\"\n                )\n\n            if my_missing:\n                msg += (\n                    \"\\ntraining data did not have the following fields: \"\n                    + \", \".join(str(s) for s in my_missing)\n                )\n\n            raise ValueError(msg.format(self.feature_names, feature_names))\n\n    def get_split_value_histogram(\n        self,\n        feature: str,\n        fmap: PathLike = \"\",\n        bins: Optional[int] = None,\n        as_pandas: bool = True,\n    ) -> Union[np.ndarray, \"PdDataFrame\"]:\n        \"\"\"Get split value histogram of a feature\n\n        Parameters\n        ----------\n        feature :\n            The name of the feature.\n        fmap:\n            The name of feature map file.\n        bin :\n            The maximum number of bins.\n            Number of bins equals number of unique split values n_unique,\n            if bins == None or bins > n_unique.\n        as_pandas :\n            Return pd.DataFrame when pandas is installed.\n            If False or pandas is not installed, return numpy ndarray.\n\n        Returns\n        -------\n        a histogram of used splitting values for the specified feature\n        either as numpy array or pandas DataFrame.\n        \"\"\"\n        from .data import CAT_T\n\n        xgdump = self.get_dump(fmap=fmap)\n        values = []\n        # pylint: disable=consider-using-f-string\n        regexp = re.compile(r\"\\[{0}<([\\d.Ee+-]+)\\]\".format(feature))\n        for val in xgdump:\n            m = re.findall(regexp, val)\n            values.extend([float(x) for x in m])\n\n        n_unique = len(np.unique(values))\n        bins = max(min(n_unique, bins) if bins is not None else n_unique, 1)\n\n        nph = np.histogram(values, bins=bins)\n        nph_stacked = np.column_stack((nph[1][1:], nph[0]))\n        nph_stacked = nph_stacked[nph_stacked[:, 1] > 0]\n\n        if nph_stacked.size == 0:\n            ft = self.feature_types\n            fn = self.feature_names\n            if fn is None:\n                # Let xgboost generate the feature names.\n                fn = [f\"f{i}\" for i in range(self.num_features())]\n            try:\n                index = fn.index(feature)\n                feature_t: Optional[str] = cast(List[str], ft)[index]\n            except (ValueError, AttributeError, TypeError):\n                # None.index: attr err, None[0]: type err, fn.index(-1): value err\n                feature_t = None\n            if feature_t == CAT_T:  # categorical\n                raise ValueError(\n                    \"Split value historgam doesn't support categorical split.\"\n                )\n\n        if as_pandas and is_pandas_available():\n            from pandas import DataFrame\n\n            return DataFrame(nph_stacked, columns=[\"SplitValue\", \"Count\"])\n        if as_pandas and not is_pandas_available():\n            warnings.warn(\n                \"Returning histogram as ndarray\"\n                \" (as_pandas == True, but pandas is not installed).\",\n                UserWarning,\n            )\n        return nph_stacked\n"
  },
  {
    "path": "python-package/xgboost/dask/__init__.py",
    "content": "# pylint: disable=too-many-arguments, too-many-locals\n# pylint: disable=missing-class-docstring\n# pylint: disable=too-many-lines\n# pylint: disable=duplicate-code\n\"\"\"\nDask extensions for distributed training\n----------------------------------------\n\nSee :doc:`Distributed XGBoost with Dask </tutorials/dask>` for simple tutorial.  Also\n:doc:`/python/dask-examples/index` for some examples.\n\nThere are two sets of APIs in this module, one is the functional API including\n``train`` and ``predict`` methods.  Another is stateful Scikit-Learner wrapper\ninherited from single-node Scikit-Learn interface.\n\nThe implementation is heavily influenced by dask_xgboost:\nhttps://github.com/dask/dask-xgboost\n\nOptional dask configuration\n===========================\n\n- **coll_cfg**:\n    Specify the scheduler address along with communicator configurations. This can be\n    used as a replacement of the existing global Dask configuration\n    `xgboost.scheduler_address` (see below). See :ref:`tracker-ip` for more info. The\n    `tracker_host_ip` should specify the IP address of the Dask scheduler node.\n\n  .. versionadded:: 3.0.0\n\n  .. code-block:: python\n\n    from xgboost import dask as dxgb\n    from xgboost.collective import Config\n\n    coll_cfg = Config(\n        retry=1, timeout=20, tracker_host_ip=\"10.23.170.98\", tracker_port=0\n    )\n\n    clf = dxgb.DaskXGBClassifier(coll_cfg=coll_cfg)\n    # or\n    dxgb.train(client, {}, Xy, num_boost_round=10, coll_cfg=coll_cfg)\n\n- **xgboost.scheduler_address**: Specify the scheduler address\n\n  .. versionadded:: 1.6.0\n\n  .. deprecated:: 3.0.0\n\n  .. code-block:: python\n\n      dask.config.set({\"xgboost.scheduler_address\": \"192.0.0.100\"})\n      # We can also specify the port.\n      dask.config.set({\"xgboost.scheduler_address\": \"192.0.0.100:12345\"})\n\n\"\"\"\n\nimport logging\nfrom collections import defaultdict\nfrom contextlib import contextmanager\nfrom functools import partial, update_wrapper\nfrom threading import Thread\nfrom typing import (\n    Any,\n    Awaitable,\n    Callable,\n    Dict,\n    Generator,\n    Iterable,\n    List,\n    Optional,\n    ParamSpec,\n    Sequence,\n    Set,\n    Tuple,\n    TypeAlias,\n    TypedDict,\n    TypeGuard,\n    TypeVar,\n    Union,\n)\n\nimport dask\nimport distributed\nimport numpy\nfrom dask import array as da\nfrom dask import bag as db\nfrom dask import dataframe as dd\nfrom dask.delayed import Delayed\nfrom distributed import Future\n\nfrom .. import collective, config\nfrom .._data_utils import Categories\nfrom .._typing import FeatureNames, FeatureTypes, IterationRange\nfrom ..callback import TrainingCallback\nfrom ..collective import Config as CollConfig\nfrom ..collective import _Args as CollArgs\nfrom ..collective import _ArgVals as CollArgsVals\nfrom ..compat import _is_cudf_df, _is_cudf_ser, _is_cupy_alike\nfrom ..core import (\n    Booster,\n    DMatrix,\n    Metric,\n    PlainObj,\n    XGBoostError,\n    _check_distributed_params,\n    _deprecate_positional_args,\n    _expect,\n)\nfrom ..sklearn import (\n    XGBClassifier,\n    XGBClassifierBase,\n    XGBModel,\n    XGBRanker,\n    XGBRankerMixIn,\n    XGBRegressorBase,\n    _can_use_qdm,\n    _check_rf_callback,\n    _cls_predict_proba,\n    _objective_decorator,\n    _wrap_evaluation_matrices,\n    xgboost_model_doc,\n)\nfrom ..tracker import RabitTracker\nfrom ..training import train as worker_train\nfrom .data import _get_dmatrices, no_group_split\nfrom .utils import _DASK_2024_12_1, _DASK_2025_3_0, get_address_from_user, get_n_threads\n\n_DaskCollection: TypeAlias = Union[da.Array, dd.DataFrame, dd.Series]\n_DataT: TypeAlias = Union[da.Array, dd.DataFrame]  # do not use series as predictor\nTrainReturnT = TypedDict(\n    \"TrainReturnT\",\n    {\n        \"booster\": Booster,\n        \"history\": Dict,\n    },\n)\n\n__all__ = [\n    \"CommunicatorContext\",\n    \"DaskDMatrix\",\n    \"DaskQuantileDMatrix\",\n    \"DaskXGBRegressor\",\n    \"DaskXGBClassifier\",\n    \"DaskXGBRanker\",\n    \"DaskXGBRFRegressor\",\n    \"DaskXGBRFClassifier\",\n    \"train\",\n    \"predict\",\n    \"inplace_predict\",\n]\n\n# TODOs:\n#   - CV\n#\n# Note for developers:\n#\n#   As of writing asyncio is still a new feature of Python and in depth documentation is\n#   rare.  Best examples of various asyncio tricks are in dask (luckily).  Classes like\n#   Client, Worker are awaitable.  Some general rules for the implementation here:\n#\n#     - Synchronous world is different from asynchronous one, and they don't mix well.\n#     - Write everything with async, then use distributed Client sync function to do the\n#       switch.\n#     - Use Any for type hint when the return value can be union of Awaitable and plain\n#       value.  This is caused by Client.sync can return both types depending on\n#       context.  Right now there's no good way to silent:\n#\n#         await train(...)\n#\n#       if train returns an Union type.\n\n\nLOGGER = logging.getLogger(\"[xgboost.dask]\")\n\n\ndef _try_start_tracker(\n    n_workers: int,\n    addrs: List[Union[Optional[str], Optional[Tuple[str, int]]]],\n    timeout: Optional[int],\n) -> CollArgs:\n    env: CollArgs = {}\n    try:\n        if isinstance(addrs[0], tuple):\n            host_ip = addrs[0][0]\n            port = addrs[0][1]\n            rabit_tracker = RabitTracker(\n                n_workers=n_workers,\n                host_ip=host_ip,\n                port=port,\n                sortby=\"task\",\n                timeout=0 if timeout is None else timeout,\n            )\n        else:\n            addr = addrs[0]\n            assert isinstance(addr, str) or addr is None\n            rabit_tracker = RabitTracker(\n                n_workers=n_workers,\n                host_ip=addr,\n                sortby=\"task\",\n                timeout=0 if timeout is None else timeout,\n            )\n\n        rabit_tracker.start()\n        # No timeout since we don't want to abort the training\n        thread = Thread(target=rabit_tracker.wait_for)\n        thread.daemon = True\n        thread.start()\n        env.update(rabit_tracker.worker_args())\n\n    except XGBoostError as e:\n        if len(addrs) < 2:\n            raise\n        LOGGER.warning(\n            \"Failed to bind address '%s', trying to use '%s' instead. Error:\\n %s\",\n            str(addrs[0]),\n            str(addrs[1]),\n            str(e),\n        )\n        env = _try_start_tracker(n_workers, addrs[1:], timeout)\n\n    return env\n\n\ndef _start_tracker(\n    n_workers: int,\n    addr_from_dask: Optional[str],\n    addr_from_user: Optional[Tuple[str, int]],\n    timeout: Optional[int],\n) -> CollArgs:\n    \"\"\"Start Rabit tracker, recurse to try different addresses.\"\"\"\n    env = _try_start_tracker(n_workers, [addr_from_user, addr_from_dask], timeout)\n    return env\n\n\nclass CommunicatorContext(collective.CommunicatorContext):\n    \"\"\"A context controlling collective communicator initialization and finalization.\"\"\"\n\n    def __init__(self, **args: CollArgsVals) -> None:\n        super().__init__(**args)\n\n        worker = distributed.get_worker()\n        # We use task ID for rank assignment which makes the RABIT rank consistent (but\n        # not the same as task ID is string and \"10\" is sorted before \"2\") with dask\n        # worker name. This outsources the rank assignment to dask and prevents\n        # non-deterministic issue.\n        self.args[\"DMLC_TASK_ID\"] = f\"[xgboost.dask-{worker.name}]:{worker.address}\"\n\n\ndef _get_client(client: Optional[\"distributed.Client\"]) -> \"distributed.Client\":\n    \"\"\"Simple wrapper around testing None.\"\"\"\n    if not isinstance(client, (type(distributed.get_client()), type(None))):\n        raise TypeError(\n            _expect([type(distributed.get_client()), type(None)], type(client))\n        )\n    ret = distributed.get_client() if client is None else client\n    return ret\n\n\n# From the implementation point of view, DaskDMatrix complicates a lots of\n# things.  A large portion of the code base is about syncing and extracting\n# stuffs from DaskDMatrix.  But having an independent data structure gives us a\n# chance to perform some specialized optimizations, like building histogram\n# index directly.\n\n\nclass DaskDMatrix:\n    # pylint: disable=too-many-instance-attributes\n    \"\"\"DMatrix holding on references to Dask DataFrame or Dask Array.  Constructing a\n    `DaskDMatrix` forces all lazy computation to be carried out.  Wait for the input\n    data explicitly if you want to see actual computation of constructing `DaskDMatrix`.\n\n    See doc for :py:obj:`xgboost.DMatrix` constructor for other parameters.  DaskDMatrix\n    accepts only dask collection.\n\n    .. note::\n\n        `DaskDMatrix` does not repartition or move data between workers.  It's the\n        caller's responsibility to balance the data.\n\n    .. note::\n\n        For aligning partitions with ranking query groups, use the\n        :py:class:`DaskXGBRanker` and its ``allow_group_split`` option.\n\n    .. versionadded:: 1.0.0\n\n    Parameters\n    ----------\n    client :\n        Specify the dask client used for training.  Use default client returned from\n        dask if it's set to None.\n\n    \"\"\"\n\n    @_deprecate_positional_args\n    def __init__(\n        self,\n        client: Optional[\"distributed.Client\"],\n        data: _DataT,\n        label: Optional[_DaskCollection] = None,\n        *,\n        weight: Optional[_DaskCollection] = None,\n        base_margin: Optional[_DaskCollection] = None,\n        missing: Optional[float] = None,\n        silent: bool = False,  # pylint: disable=unused-argument\n        feature_names: Optional[FeatureNames] = None,\n        feature_types: Optional[FeatureTypes] = None,\n        group: Optional[_DaskCollection] = None,\n        qid: Optional[_DaskCollection] = None,\n        label_lower_bound: Optional[_DaskCollection] = None,\n        label_upper_bound: Optional[_DaskCollection] = None,\n        feature_weights: Optional[_DaskCollection] = None,\n        enable_categorical: bool = True,\n    ) -> None:\n        client = _get_client(client)\n\n        self.feature_names = feature_names\n        self.feature_types = feature_types\n        if isinstance(feature_types, Categories):\n            raise TypeError(\n                \"The Dask interface can handle categories from DataFrame automatically.\"\n            )\n        self.missing = missing if missing is not None else numpy.nan\n        self.enable_categorical = enable_categorical\n\n        if qid is not None and weight is not None:\n            raise NotImplementedError(\"per-group weight is not implemented.\")\n        if group is not None:\n            raise NotImplementedError(\n                \"group structure is not implemented, use qid instead.\"\n            )\n\n        if len(data.shape) != 2:\n            raise ValueError(f\"Expecting 2 dimensional input, got: {data.shape}\")\n\n        if not isinstance(data, (dd.DataFrame, da.Array)):\n            raise TypeError(_expect((dd.DataFrame, da.Array), type(data)))\n        if not isinstance(label, (dd.DataFrame, da.Array, dd.Series, type(None))):\n            raise TypeError(_expect((dd.DataFrame, da.Array, dd.Series), type(label)))\n\n        self._n_cols = data.shape[1]\n        assert isinstance(self._n_cols, int)\n        self.worker_map: Dict[str, List[Future]] = defaultdict(list)\n        self.is_quantile: bool = False\n\n        self._init = client.sync(\n            self._map_local_data,\n            client=client,\n            data=data,\n            label=label,\n            weights=weight,\n            base_margin=base_margin,\n            qid=qid,\n            feature_weights=feature_weights,\n            label_lower_bound=label_lower_bound,\n            label_upper_bound=label_upper_bound,\n        )\n\n    def __await__(self) -> Generator[None, None, \"DaskDMatrix\"]:\n        return self._init.__await__()\n\n    async def _map_local_data(\n        self,\n        *,\n        client: \"distributed.Client\",\n        data: _DataT,\n        label: Optional[_DaskCollection] = None,\n        weights: Optional[_DaskCollection] = None,\n        base_margin: Optional[_DaskCollection] = None,\n        qid: Optional[_DaskCollection] = None,\n        feature_weights: Optional[_DaskCollection] = None,\n        label_lower_bound: Optional[_DaskCollection] = None,\n        label_upper_bound: Optional[_DaskCollection] = None,\n    ) -> \"DaskDMatrix\":\n        \"\"\"Obtain references to local data.\"\"\"\n\n        def inconsistent(\n            left: List[Any], left_name: str, right: List[Any], right_name: str\n        ) -> str:\n            msg = (\n                f\"Partitions between {left_name} and {right_name} are not \"\n                f\"consistent: {len(left)} != {len(right)}.  \"\n                f\"Please try to repartition/rechunk your data.\"\n            )\n            return msg\n\n        def to_futures(d: _DaskCollection) -> List[Future]:\n            \"\"\"Breaking data into partitions.\"\"\"\n            d = client.persist(d)\n            if (\n                hasattr(d.partitions, \"shape\")\n                and len(d.partitions.shape) > 1\n                and d.partitions.shape[1] > 1\n            ):\n                raise ValueError(\n                    \"Data should be\"\n                    \" partitioned by row. To avoid this specify the number\"\n                    \" of columns for your dask Array explicitly. e.g.\"\n                    \" chunks=(partition_size, -1])\"\n                )\n            return client.futures_of(d)\n\n        def flatten_meta(meta: Optional[_DaskCollection]) -> Optional[List[Future]]:\n            if meta is not None:\n                meta_parts: List[Future] = to_futures(meta)\n                return meta_parts\n            return None\n\n        X_parts = to_futures(data)\n        y_parts = flatten_meta(label)\n        w_parts = flatten_meta(weights)\n        margin_parts = flatten_meta(base_margin)\n        qid_parts = flatten_meta(qid)\n        ll_parts = flatten_meta(label_lower_bound)\n        lu_parts = flatten_meta(label_upper_bound)\n\n        parts: Dict[str, List[Future]] = {\"data\": X_parts}\n\n        def append_meta(m_parts: Optional[List[Future]], name: str) -> None:\n            if m_parts is not None:\n                assert len(X_parts) == len(m_parts), inconsistent(\n                    X_parts, \"X\", m_parts, name\n                )\n                parts[name] = m_parts\n\n        append_meta(y_parts, \"label\")\n        append_meta(w_parts, \"weight\")\n        append_meta(margin_parts, \"base_margin\")\n        append_meta(qid_parts, \"qid\")\n        append_meta(ll_parts, \"label_lower_bound\")\n        append_meta(lu_parts, \"label_upper_bound\")\n        # At this point, `parts` looks like:\n        # [(x0, x1, ..), (y0, y1, ..), ..] in future form\n\n        # turn into list of dictionaries.\n        packed_parts: List[Dict[str, Future]] = []\n        for i in range(len(X_parts)):\n            part_dict: Dict[str, Future] = {}\n            for key, value in parts.items():\n                part_dict[key] = value[i]\n            packed_parts.append(part_dict)\n\n        # delay the zipped result\n        # pylint: disable=no-member\n        delayed_parts: List[Delayed] = list(map(dask.delayed, packed_parts))\n        # At this point, the mental model should look like:\n        # [{\"data\": x0, \"label\": y0, ..}, {\"data\": x1, \"label\": y1, ..}, ..]\n\n        # Convert delayed objects into futures and make sure they are realized\n        #\n        # This also makes partitions to align (co-locate) on workers (X_0, y_0 should be\n        # on the same worker).\n        fut_parts: List[Future] = client.compute(delayed_parts)\n        await distributed.wait(fut_parts)  # async wait for parts to be computed\n\n        for part in fut_parts:\n            # Each part is [{\"data\": x0, \"label\": y0, ..}, ...] in future form.\n            assert part.status == \"finished\", part.status\n\n        # Preserving the partition order for prediction.\n        self.partition_order = {}\n        for i, part in enumerate(fut_parts):\n            self.partition_order[part.key] = i\n\n        key_to_partition = {part.key: part for part in fut_parts}\n        who_has: Dict[str, Tuple[str, ...]] = await client.scheduler.who_has(\n            keys=[part.key for part in fut_parts]\n        )\n\n        worker_map: Dict[str, List[Future]] = defaultdict(list)\n\n        for key, workers in who_has.items():\n            worker_map[next(iter(workers))].append(key_to_partition[key])\n\n        self.worker_map = worker_map\n\n        if feature_weights is None:\n            self.feature_weights = None\n        else:\n            self.feature_weights = await client.compute(feature_weights).result()\n\n        return self\n\n    def _create_fn_args(self, worker_addr: str) -> Dict[str, Any]:\n        \"\"\"Create a dictionary of objects that can be pickled for function\n        arguments.\n\n        \"\"\"\n        return {\n            \"feature_names\": self.feature_names,\n            \"feature_types\": self.feature_types,\n            \"feature_weights\": self.feature_weights,\n            \"missing\": self.missing,\n            \"enable_categorical\": self.enable_categorical,\n            \"parts\": self.worker_map.get(worker_addr, None),\n            \"is_quantile\": self.is_quantile,\n        }\n\n    def num_col(self) -> int:\n        \"\"\"Get the number of columns (features) in the DMatrix.\n\n        Returns\n        -------\n        number of columns\n        \"\"\"\n        return self._n_cols\n\n\n_MapRetT = TypeVar(\"_MapRetT\")\n_P = ParamSpec(\"_P\")\n\n\nasync def map_worker_partitions(\n    client: Optional[\"distributed.Client\"],\n    func: Callable[_P, _MapRetT],\n    *refs: Any,\n    workers: Sequence[str],\n) -> _MapRetT:\n    \"\"\"Map a function onto partitions of each worker.\"\"\"\n    # Note for function purity:\n    # XGBoost is sensitive to data partition and uses random number generator.\n    client = _get_client(client)\n    futures = []\n    for addr in workers:\n        args = []\n        for ref in refs:\n            if isinstance(ref, DaskDMatrix):\n                # pylint: disable=protected-access\n                args.append(ref._create_fn_args(addr))\n            else:\n                args.append(ref)\n\n        def fn(_address: str, *args: _P.args, **kwargs: _P.kwargs) -> List[_MapRetT]:\n            worker = distributed.get_worker()\n\n            if worker.address != _address:\n                raise ValueError(\n                    f\"Invalid worker address: {worker.address}, expecting {_address}. \"\n                    \"This is likely caused by one of the workers died and Dask \"\n                    \"re-scheduled a different one. Resilience is not yet supported.\"\n                )\n            # Turn result into a list for bag construction\n            return [func(*args, **kwargs)]\n\n        # XGBoost requires all workers running training tasks to be unique. Meaning, we\n        # can't run 2 training jobs on the same node. This at best leads to an error\n        # (NCCL unique check), at worst leads to extremely slow training performance\n        # without any warning.\n        #\n        # See disitributed.scheduler.decide_worker for `allow_other_workers`. In\n        # summary, the scheduler chooses a worker from the valid set that has the task\n        # dependencies. Each XGBoost's training task has all dependencies in a single\n        # worker. As a result, the right worker should be picked by the scheduler even\n        # if `allow_other_workers` is set to True.\n        #\n        # In addition, the scheduler only discards the valid set (the `workers` arg) if\n        # there's no candidate can be found. This is likely caused by killed workers. In\n        # that case, the check in `fn` should be able to stop the task. If we don't\n        # relax the constraint and prevent Dask from choosing an invalid worker, the\n        # task will simply hangs. We prefer a quick error here.\n        #\n        fut = client.submit(\n            update_wrapper(partial(fn, addr), fn),\n            *args,\n            pure=False,\n            workers=[addr],\n            allow_other_workers=True,\n        )\n        futures.append(fut)\n\n    def first_valid(results: Iterable[Optional[_MapRetT]]) -> Optional[_MapRetT]:\n        for v in results:\n            if v is not None:\n                return v\n        return None\n\n    bag = db.from_delayed(futures)\n    fut = await bag.reduction(first_valid, first_valid)\n    result = await client.compute(fut).result()\n\n    return result\n\n\nclass DaskQuantileDMatrix(DaskDMatrix):\n    \"\"\"A dask version of :py:class:`QuantileDMatrix`. See :py:class:`DaskDMatrix` for\n    parameter documents.\n\n    \"\"\"\n\n    @_deprecate_positional_args\n    def __init__(\n        self,\n        client: Optional[\"distributed.Client\"],\n        data: _DataT,\n        label: Optional[_DaskCollection] = None,\n        *,\n        weight: Optional[_DaskCollection] = None,\n        base_margin: Optional[_DaskCollection] = None,\n        missing: Optional[float] = None,\n        silent: bool = False,  # disable=unused-argument\n        feature_names: Optional[FeatureNames] = None,\n        feature_types: Optional[Union[Any, List[Any]]] = None,\n        max_bin: Optional[int] = None,\n        ref: Optional[DaskDMatrix] = None,\n        group: Optional[_DaskCollection] = None,\n        qid: Optional[_DaskCollection] = None,\n        label_lower_bound: Optional[_DaskCollection] = None,\n        label_upper_bound: Optional[_DaskCollection] = None,\n        feature_weights: Optional[_DaskCollection] = None,\n        enable_categorical: bool = True,\n        max_quantile_batches: Optional[int] = None,\n    ) -> None:\n        super().__init__(\n            client=client,\n            data=data,\n            label=label,\n            weight=weight,\n            base_margin=base_margin,\n            group=group,\n            qid=qid,\n            label_lower_bound=label_lower_bound,\n            label_upper_bound=label_upper_bound,\n            missing=missing,\n            silent=silent,\n            feature_weights=feature_weights,\n            feature_names=feature_names,\n            feature_types=feature_types,\n            enable_categorical=enable_categorical,\n        )\n        self.max_bin = max_bin\n        self.max_quantile_batches = max_quantile_batches\n        self.is_quantile = True\n        self._ref: Optional[int] = id(ref) if ref is not None else None\n\n    def _create_fn_args(self, worker_addr: str) -> Dict[str, Any]:\n        args = super()._create_fn_args(worker_addr)\n        args[\"max_bin\"] = self.max_bin\n        args[\"max_quantile_batches\"] = self.max_quantile_batches\n        if self._ref is not None:\n            args[\"ref\"] = self._ref\n        return args\n\n\nasync def _get_rabit_args(\n    client: \"distributed.Client\",\n    n_workers: int,\n    dconfig: Optional[Dict[str, Any]] = None,\n    coll_cfg: Optional[CollConfig] = None,\n) -> Dict[str, Union[str, int]]:\n    \"\"\"Get rabit context arguments from data distribution in DaskDMatrix.\"\"\"\n    # There are 3 possible different addresses:\n    # 1. Provided by user via dask.config\n    # 2. Guessed by xgboost `get_host_ip` function\n    # 3. From dask scheduler\n    # We try 1 and 3 if 1 is available, otherwise 2 and 3.\n\n    # See if user config is available\n    coll_cfg = CollConfig() if coll_cfg is None else coll_cfg\n    host_ip: Optional[str] = None\n    port: int = 0\n    host_ip, port = get_address_from_user(dconfig, coll_cfg)\n\n    if host_ip is not None:\n        user_addr = (host_ip, port)\n    else:\n        user_addr = None\n\n    # Try address from dask scheduler, this might not work, see\n    # https://github.com/dask/dask-xgboost/pull/40\n    try:\n        sched_addr = distributed.comm.get_address_host(client.scheduler.address)\n        sched_addr = sched_addr.strip(\"/:\")\n    except Exception:  # pylint: disable=broad-except\n        sched_addr = None\n\n    # We assume the scheduler is a fair process and run the tracker there.\n    env = await client.run_on_scheduler(\n        _start_tracker, n_workers, sched_addr, user_addr, coll_cfg.tracker_timeout\n    )\n    env = coll_cfg.get_comm_config(env)\n    assert env is not None\n    return env\n\n\ndef _get_dask_config() -> Optional[Dict[str, Any]]:\n    return dask.config.get(\"xgboost\", default=None)\n\n\n# train and predict methods are supposed to be \"functional\", which meets the\n# dask paradigm.  But as a side effect, the `evals_result` in single-node API\n# is no longer supported since it mutates the input parameter, and it's not\n# intuitive to sync the mutation result.  Therefore, a dictionary containing\n# evaluation history is instead returned.\n\n\ndef _get_workers_from_data(\n    dtrain: DaskDMatrix, evals: Optional[Sequence[Tuple[DaskDMatrix, str]]]\n) -> List[str]:\n    X_worker_map: Set[str] = set(dtrain.worker_map.keys())\n    if evals:\n        for e in evals:\n            assert len(e) == 2\n            assert isinstance(e[0], DaskDMatrix) and isinstance(e[1], str)\n            if e[0] is dtrain:\n                continue\n            worker_map = set(e[0].worker_map.keys())\n            X_worker_map = X_worker_map.union(worker_map)\n    return list(X_worker_map)\n\n\nasync def _check_workers_are_alive(\n    workers: List[str], client: \"distributed.Client\"\n) -> None:\n    info = await client.scheduler.identity()\n    current_workers = info[\"workers\"].keys()\n    missing_workers = set(workers) - current_workers\n    if missing_workers:\n        raise RuntimeError(f\"Missing required workers: {missing_workers}\")\n\n\nasync def _train_async(\n    *,\n    client: \"distributed.Client\",\n    global_config: Dict[str, Any],\n    dconfig: Optional[Dict[str, Any]],\n    params: Dict[str, Any],\n    dtrain: DaskDMatrix,\n    num_boost_round: int,\n    evals: Optional[Sequence[Tuple[DaskDMatrix, str]]],\n    obj: Optional[PlainObj],\n    early_stopping_rounds: Optional[int],\n    verbose_eval: Union[int, bool],\n    xgb_model: Optional[Booster],\n    callbacks: Optional[Sequence[TrainingCallback]],\n    custom_metric: Optional[Metric],\n    coll_cfg: Optional[CollConfig],\n) -> Optional[TrainReturnT]:\n    workers = _get_workers_from_data(dtrain, evals)\n    await _check_workers_are_alive(workers, client)\n    coll_args = await _get_rabit_args(\n        client, len(workers), dconfig=dconfig, coll_cfg=coll_cfg\n    )\n    _check_distributed_params(params)\n\n    # This function name is displayed in the Dask dashboard task status, let's make it\n    # clear that it's XGBoost training.\n    def do_train(  # pylint: disable=too-many-positional-arguments\n        parameters: Dict,\n        coll_args: Dict[str, Union[str, int]],\n        train_id: int,\n        evals_name: List[str],\n        evals_id: List[int],\n        train_ref: dict,\n        *refs: dict,\n    ) -> Optional[TrainReturnT]:\n        worker = distributed.get_worker()\n        local_param = parameters.copy()\n        n_threads = get_n_threads(local_param, worker)\n        local_param.update({\"nthread\": n_threads, \"n_jobs\": n_threads})\n\n        local_history: TrainingCallback.EvalsLog = {}\n        global_config.update({\"nthread\": n_threads})\n\n        if coll_cfg is not None:\n            coll_args = coll_cfg.update_worker_args(coll_args)\n\n        with CommunicatorContext(**coll_args), config.config_context(**global_config):\n            Xy, evals = _get_dmatrices(\n                train_ref,\n                train_id,\n                *refs,\n                evals_id=evals_id,\n                evals_name=evals_name,\n                n_threads=n_threads,\n                # We need the model for reference categories.\n                model=xgb_model,\n            )\n\n            booster = worker_train(\n                params=local_param,\n                dtrain=Xy,\n                num_boost_round=num_boost_round,\n                evals_result=local_history,\n                evals=evals if len(evals) != 0 else None,\n                obj=obj,\n                custom_metric=custom_metric,\n                early_stopping_rounds=early_stopping_rounds,\n                verbose_eval=verbose_eval,\n                xgb_model=xgb_model,\n                callbacks=callbacks,\n            )\n        # Don't return the boosters from empty workers. It's quite difficult to\n        # guarantee everything is in sync in the present of empty workers, especially\n        # with complex objectives like quantile.\n        if Xy.num_row() != 0:\n            ret: Optional[TrainReturnT] = {\n                \"booster\": booster,\n                \"history\": local_history,\n            }\n        else:\n            ret = None\n        return ret\n\n    async with distributed.MultiLock(workers, client):\n        if evals is not None:\n            evals_data = [d for d, n in evals]\n            evals_name = [n for d, n in evals]\n            evals_id = [id(d) for d in evals_data]\n        else:\n            evals_data = []\n            evals_name = []\n            evals_id = []\n\n        result = await map_worker_partitions(\n            client,\n            do_train,\n            # extra function parameters\n            params,\n            coll_args,\n            id(dtrain),\n            evals_name,\n            evals_id,\n            *([dtrain] + evals_data),\n            # workers to be used for training\n            workers=workers,\n        )\n        return result\n\n\n@_deprecate_positional_args\ndef train(  # pylint: disable=unused-argument\n    client: \"distributed.Client\",\n    params: Dict[str, Any],\n    dtrain: DaskDMatrix,\n    num_boost_round: int = 10,\n    *,\n    evals: Optional[Sequence[Tuple[DaskDMatrix, str]]] = None,\n    obj: Optional[PlainObj] = None,\n    early_stopping_rounds: Optional[int] = None,\n    xgb_model: Optional[Booster] = None,\n    verbose_eval: Union[int, bool] = True,\n    callbacks: Optional[Sequence[TrainingCallback]] = None,\n    custom_metric: Optional[Metric] = None,\n    coll_cfg: Optional[CollConfig] = None,\n) -> Any:\n    \"\"\"Train XGBoost model.\n\n    .. versionadded:: 1.0.0\n\n    .. note::\n\n        Other parameters are the same as :py:func:`xgboost.train` except for\n        `evals_result`, which is returned as part of function return value instead of\n        argument.\n\n    Parameters\n    ----------\n    client :\n        Specify the dask client used for training.  Use default client returned from\n        dask if it's set to None.\n\n    coll_cfg :\n        Configuration for the communicator used during training. See\n        :py:class:`~xgboost.collective.Config`.\n\n    Returns\n    -------\n    results: dict\n        A dictionary containing trained booster and evaluation history.  `history` field\n        is the same as `eval_result` from `xgboost.train`.\n\n        .. code-block:: python\n\n            {'booster': xgboost.Booster,\n             'history': {'train': {'logloss': ['0.48253', '0.35953']},\n                         'eval': {'logloss': ['0.480385', '0.357756']}}}\n\n    \"\"\"\n    client = _get_client(client)\n    return client.sync(\n        _train_async,\n        global_config=config.get_config(),\n        dconfig=_get_dask_config(),\n        **locals(),\n    )\n\n\ndef _can_output_df(is_df: bool, output_shape: Tuple) -> bool:\n    return is_df and len(output_shape) <= 2\n\n\ndef _maybe_dataframe(\n    data: Any, prediction: Any, columns: List[int], is_df: bool\n) -> Any:\n    \"\"\"Return dataframe for prediction when applicable.\"\"\"\n    if _can_output_df(is_df, prediction.shape):\n        # Need to preserve the index for dataframe.\n        # See issue: https://github.com/dmlc/xgboost/issues/6939\n        # In older versions of dask, the partition is actually a numpy array when input\n        # is dataframe.\n        index = getattr(data, \"index\", None)\n        if _is_cudf_df(data):\n            import cudf\n\n            if prediction.size == 0:\n                return cudf.DataFrame({}, columns=columns, dtype=numpy.float32)\n\n            prediction = cudf.DataFrame(\n                prediction, columns=columns, dtype=numpy.float32, index=index\n            )\n        else:\n            import pandas as pd\n\n            if prediction.size == 0:\n                return pd.DataFrame(\n                    {}, columns=columns, dtype=numpy.float32, index=index\n                )\n\n            prediction = pd.DataFrame(\n                prediction, columns=columns, dtype=numpy.float32, index=index\n            )\n    return prediction\n\n\nasync def _direct_predict_impl(  # pylint: disable=too-many-branches\n    *,\n    mapped_predict: Callable,\n    booster: \"distributed.Future\",\n    data: _DataT,\n    base_margin: Optional[_DaskCollection],\n    output_shape: Tuple[int, ...],\n    meta: Dict[int, str],\n) -> _DaskCollection:\n    columns = tuple(meta.keys())\n    if len(output_shape) >= 3 and isinstance(data, dd.DataFrame):\n        # Without this check, dask will finish the prediction silently even if output\n        # dimension is greater than 3.  But during map_partitions, dask passes a\n        # `dd.DataFrame` as local input to xgboost, which is converted to csr_matrix by\n        # `_convert_unknown_data` since dd.DataFrame is not known to xgboost native\n        # binding.\n        raise ValueError(\n            \"Use `da.Array` or `DaskDMatrix` when output has more than 2 dimensions.\"\n        )\n    if _can_output_df(isinstance(data, dd.DataFrame), output_shape):\n        if base_margin is not None and isinstance(base_margin, da.Array):\n            # Easier for map_partitions\n            base_margin_df: Optional[Union[dd.DataFrame, dd.Series]] = (\n                base_margin.to_dask_dataframe()\n            )\n        else:\n            base_margin_df = base_margin\n        predictions = dd.map_partitions(\n            mapped_predict,\n            booster,\n            data,\n            True,\n            columns,\n            base_margin_df,\n            meta=dd.utils.make_meta(meta),\n        )\n        # classification can return a dataframe, drop 1 dim when it's reg/binary\n        if len(output_shape) == 1:\n            predictions = predictions.iloc[:, 0]\n    else:\n        if base_margin is not None and isinstance(\n            base_margin, (dd.Series, dd.DataFrame)\n        ):\n            # Easier for map_blocks\n            base_margin_array: Optional[da.Array] = base_margin.to_dask_array()\n        else:\n            base_margin_array = base_margin\n        # Input data is 2-dim array, output can be 1(reg, binary)/2(multi-class,\n        # contrib)/3(contrib, interaction)/4(interaction) dims.\n        if len(output_shape) == 1:\n            drop_axis: Union[int, List[int]] = [1]  # drop from 2 to 1 dim.\n            new_axis: Union[int, List[int]] = []\n        else:\n            drop_axis = []\n            if isinstance(data, dd.DataFrame):\n                new_axis = list(range(len(output_shape) - 2))\n            else:\n                new_axis = [i + 2 for i in range(len(output_shape) - 2)]\n        if len(output_shape) == 2:\n            # Somehow dask fail to infer output shape change for 2-dim prediction, and\n            #  `chunks = (None, output_shape[1])` doesn't work due to None is not\n            #  supported in map_blocks.\n\n            # data must be an array here as dataframe + 2-dim output predict will return\n            # a dataframe instead.\n            chunks: Optional[List[Tuple]] = list(data.chunks)\n            assert isinstance(chunks, list)\n            chunks[1] = (output_shape[1],)\n        else:\n            chunks = None\n        predictions = da.map_blocks(\n            mapped_predict,\n            booster,\n            data,\n            False,\n            columns,\n            base_margin_array,\n            chunks=chunks,\n            drop_axis=drop_axis,\n            new_axis=new_axis,\n            dtype=numpy.float32,\n        )\n    return predictions\n\n\ndef _infer_predict_output(\n    booster: Booster, features: int, is_df: bool, inplace: bool, **kwargs: Any\n) -> Tuple[Tuple[int, ...], Dict[int, str]]:\n    \"\"\"Create a dummy test sample to infer output shape for prediction.\"\"\"\n    assert isinstance(features, int)\n    rng = numpy.random.RandomState(1994)\n    test_sample = rng.randn(1, features)\n    if inplace:\n        kwargs = kwargs.copy()\n        if kwargs.pop(\"predict_type\") == \"margin\":\n            kwargs[\"output_margin\"] = True\n    m = DMatrix(test_sample, enable_categorical=True)\n    # generated DMatrix doesn't have feature name, so no validation.\n    test_predt = booster.predict(m, validate_features=False, **kwargs)\n    n_columns = test_predt.shape[1] if len(test_predt.shape) > 1 else 1\n    meta: Dict[int, str] = {}\n    if _can_output_df(is_df, test_predt.shape):\n        for i in range(n_columns):\n            meta[i] = \"f4\"\n    return test_predt.shape, meta\n\n\nasync def _get_model_future(\n    client: \"distributed.Client\", model: Union[Booster, Dict, \"distributed.Future\"]\n) -> \"distributed.Future\":\n    # See https://github.com/dask/dask/issues/11179#issuecomment-2168094529 for the use\n    # of hash.\n    # https://github.com/dask/distributed/pull/8796 Don't use broadcast in the `scatter`\n    # call, otherwise, the predict function might hang.\n    if isinstance(model, Booster):\n        booster = await client.scatter(model, hash=False)\n    elif isinstance(model, dict):\n        booster = await client.scatter(model[\"booster\"], hash=False)\n    elif isinstance(model, distributed.Future):\n        booster = model\n        t = booster.type\n        if t is not Booster:\n            raise TypeError(\n                f\"Underlying type of model future should be `Booster`, got {t}\"\n            )\n    else:\n        raise TypeError(_expect([Booster, dict, distributed.Future], type(model)))\n    return booster\n\n\n# pylint: disable=too-many-statements\nasync def _predict_async(\n    client: \"distributed.Client\",\n    global_config: Dict[str, Any],\n    model: Union[Booster, Dict, \"distributed.Future\"],\n    data: _DataT,\n    *,\n    output_margin: bool,\n    missing: float,\n    pred_leaf: bool,\n    pred_contribs: bool,\n    approx_contribs: bool,\n    pred_interactions: bool,\n    validate_features: bool,\n    iteration_range: IterationRange,\n    strict_shape: bool,\n) -> _DaskCollection:\n    _booster = await _get_model_future(client, model)\n    if not isinstance(data, (DaskDMatrix, da.Array, dd.DataFrame)):\n        raise TypeError(_expect([DaskDMatrix, da.Array, dd.DataFrame], type(data)))\n\n    def mapped_predict(\n        booster: Booster, partition: Any, is_df: bool, columns: List[int], _: Any\n    ) -> Any:\n        with config.config_context(**global_config):\n            m = DMatrix(\n                data=partition,\n                missing=missing,\n                enable_categorical=True,\n            )\n            predt = booster.predict(\n                data=m,\n                output_margin=output_margin,\n                pred_leaf=pred_leaf,\n                pred_contribs=pred_contribs,\n                approx_contribs=approx_contribs,\n                pred_interactions=pred_interactions,\n                validate_features=validate_features,\n                iteration_range=iteration_range,\n                strict_shape=strict_shape,\n            )\n            predt = _maybe_dataframe(partition, predt, columns, is_df)\n            return predt\n\n    # Predict on dask collection directly.\n    if isinstance(data, (da.Array, dd.DataFrame)):\n        _output_shape, meta = await client.compute(\n            client.submit(\n                _infer_predict_output,\n                _booster,\n                features=data.shape[1],\n                is_df=isinstance(data, dd.DataFrame),\n                inplace=False,\n                output_margin=output_margin,\n                pred_leaf=pred_leaf,\n                pred_contribs=pred_contribs,\n                approx_contribs=approx_contribs,\n                pred_interactions=pred_interactions,\n                strict_shape=strict_shape,\n            )\n        )\n        return await _direct_predict_impl(\n            mapped_predict=mapped_predict,\n            booster=_booster,\n            data=data,\n            base_margin=None,\n            output_shape=_output_shape,\n            meta=meta,\n        )\n\n    output_shape, _ = await client.compute(\n        client.submit(\n            _infer_predict_output,\n            booster=_booster,\n            features=data.num_col(),\n            is_df=False,\n            inplace=False,\n            output_margin=output_margin,\n            pred_leaf=pred_leaf,\n            pred_contribs=pred_contribs,\n            approx_contribs=approx_contribs,\n            pred_interactions=pred_interactions,\n            strict_shape=strict_shape,\n        )\n    )\n    # Prediction on dask DMatrix.\n    partition_order = data.partition_order\n    feature_names = data.feature_names\n    feature_types = data.feature_types\n    missing = data.missing\n\n    def dispatched_predict(booster: Booster, part: Dict[str, Any]) -> numpy.ndarray:\n        data = part[\"data\"]\n        base_margin = part.get(\"base_margin\", None)\n        with config.config_context(**global_config):\n            m = DMatrix(\n                data,\n                missing=missing,\n                base_margin=base_margin,\n                feature_names=feature_names,\n                feature_types=feature_types,\n                enable_categorical=True,\n            )\n            predt = booster.predict(\n                m,\n                output_margin=output_margin,\n                pred_leaf=pred_leaf,\n                pred_contribs=pred_contribs,\n                approx_contribs=approx_contribs,\n                pred_interactions=pred_interactions,\n                validate_features=validate_features,\n                iteration_range=iteration_range,\n                strict_shape=strict_shape,\n            )\n            return predt\n\n    all_parts = []\n    all_orders = []\n    all_shapes = []\n    all_workers: List[str] = []\n    workers_address = list(data.worker_map.keys())\n    for worker_addr in workers_address:\n        list_of_parts = data.worker_map[worker_addr]\n        all_parts.extend(list_of_parts)\n        all_workers.extend(len(list_of_parts) * [worker_addr])\n        all_orders.extend([partition_order[part.key] for part in list_of_parts])\n    for w, part in zip(all_workers, all_parts):\n        s = client.submit(lambda part: part[\"data\"].shape[0], part, workers=[w])\n        all_shapes.append(s)\n\n    parts_with_order = list(zip(all_parts, all_shapes, all_orders, all_workers))\n    parts_with_order = sorted(parts_with_order, key=lambda p: p[2])\n    all_parts = [part for part, shape, order, w in parts_with_order]\n    all_shapes = [shape for part, shape, order, w in parts_with_order]\n    all_workers = [w for part, shape, order, w in parts_with_order]\n\n    futures = []\n    for w, part in zip(all_workers, all_parts):\n        f = client.submit(dispatched_predict, _booster, part, workers=[w])\n        futures.append(f)\n\n    # Constructing a dask array from list of numpy arrays\n    # See https://docs.dask.org/en/latest/array-creation.html\n    arrays = []\n    all_shapes = await client.gather(all_shapes)\n    for i, rows in enumerate(all_shapes):\n        arrays.append(\n            da.from_delayed(\n                futures[i], shape=(rows,) + output_shape[1:], dtype=numpy.float32\n            )\n        )\n    predictions = da.concatenate(arrays, axis=0)\n    return predictions\n\n\n@_deprecate_positional_args\ndef predict(  # pylint: disable=unused-argument\n    client: Optional[\"distributed.Client\"],\n    model: Union[TrainReturnT, Booster, \"distributed.Future\"],\n    data: Union[DaskDMatrix, _DataT],\n    *,\n    output_margin: bool = False,\n    missing: float = numpy.nan,\n    pred_leaf: bool = False,\n    pred_contribs: bool = False,\n    approx_contribs: bool = False,\n    pred_interactions: bool = False,\n    validate_features: bool = True,\n    iteration_range: IterationRange = (0, 0),\n    strict_shape: bool = False,\n) -> Any:\n    \"\"\"Run prediction with a trained booster.\n\n    .. note::\n\n        Using ``inplace_predict`` might be faster when some features are not needed.\n        See :py:meth:`xgboost.Booster.predict` for details on various parameters.  When\n        output has more than 2 dimensions (shap value, leaf with strict_shape), input\n        should be ``da.Array`` or ``DaskDMatrix``.\n\n    .. versionadded:: 1.0.0\n\n    Parameters\n    ----------\n    client:\n        Specify the dask client used for training.  Use default client\n        returned from dask if it's set to None.\n    model:\n        The trained model.  It can be a distributed.Future so user can\n        pre-scatter it onto all workers.\n    data:\n        Input data used for prediction.  When input is a dataframe object,\n        prediction output is a series.\n    missing:\n        Used when input data is not DaskDMatrix.  Specify the value\n        considered as missing.\n\n    Returns\n    -------\n    prediction: dask.array.Array/dask.dataframe.Series\n        When input data is ``dask.array.Array`` or ``DaskDMatrix``, the return value is\n        an array, when input data is ``dask.dataframe.DataFrame``, return value can be\n        ``dask.dataframe.Series``, ``dask.dataframe.DataFrame``, depending on the output\n        shape.\n\n    \"\"\"\n    client = _get_client(client)\n    return client.sync(_predict_async, global_config=config.get_config(), **locals())\n\n\nasync def _inplace_predict_async(  # pylint: disable=too-many-branches\n    *,\n    client: \"distributed.Client\",\n    global_config: Dict[str, Any],\n    model: Union[Booster, Dict, \"distributed.Future\"],\n    data: _DataT,\n    iteration_range: IterationRange,\n    predict_type: str,\n    missing: float,\n    validate_features: bool,\n    base_margin: Optional[_DaskCollection],\n    strict_shape: bool,\n) -> _DaskCollection:\n    client = _get_client(client)\n    booster = await _get_model_future(client, model)\n    if not isinstance(data, (da.Array, dd.DataFrame)):\n        raise TypeError(_expect([da.Array, dd.DataFrame], type(data)))\n    if base_margin is not None and not isinstance(\n        data, (da.Array, dd.DataFrame, dd.Series)\n    ):\n        raise TypeError(_expect([da.Array, dd.DataFrame, dd.Series], type(base_margin)))\n\n    def mapped_predict(\n        booster: Booster,\n        partition: Any,\n        is_df: bool,\n        columns: List[int],\n        base_margin: Any,\n    ) -> Any:\n        with config.config_context(**global_config):\n            prediction = booster.inplace_predict(\n                partition,\n                iteration_range=iteration_range,\n                predict_type=predict_type,\n                missing=missing,\n                base_margin=base_margin,\n                validate_features=validate_features,\n                strict_shape=strict_shape,\n            )\n        prediction = _maybe_dataframe(partition, prediction, columns, is_df)\n        return prediction\n\n    # await turns future into value.\n    shape, meta = await client.compute(\n        client.submit(\n            _infer_predict_output,\n            booster,\n            features=data.shape[1],\n            is_df=isinstance(data, dd.DataFrame),\n            inplace=True,\n            predict_type=predict_type,\n            iteration_range=iteration_range,\n            strict_shape=strict_shape,\n        )\n    )\n    return await _direct_predict_impl(\n        mapped_predict=mapped_predict,\n        booster=booster,\n        data=data,\n        base_margin=base_margin,\n        output_shape=shape,\n        meta=meta,\n    )\n\n\n@_deprecate_positional_args\ndef inplace_predict(  # pylint: disable=unused-argument\n    client: Optional[\"distributed.Client\"],\n    model: Union[TrainReturnT, Booster, \"distributed.Future\"],\n    data: _DataT,\n    *,\n    iteration_range: IterationRange = (0, 0),\n    predict_type: str = \"value\",\n    missing: float = numpy.nan,\n    validate_features: bool = True,\n    base_margin: Optional[_DaskCollection] = None,\n    strict_shape: bool = False,\n) -> Any:\n    \"\"\"Inplace prediction. See doc in :py:meth:`xgboost.Booster.inplace_predict` for\n    details.\n\n    .. versionadded:: 1.1.0\n\n    Parameters\n    ----------\n    client:\n        Specify the dask client used for training.  Use default client\n        returned from dask if it's set to None.\n    model:\n        See :py:func:`xgboost.dask.predict` for details.\n    data :\n        dask collection.\n    iteration_range:\n        See :py:meth:`xgboost.Booster.predict` for details.\n    predict_type:\n        See :py:meth:`xgboost.Booster.inplace_predict` for details.\n    missing:\n        Value in the input data which needs to be present as a missing\n        value. If None, defaults to np.nan.\n    base_margin:\n        See :py:obj:`xgboost.DMatrix` for details.\n\n        .. versionadded:: 1.4.0\n\n    strict_shape:\n        See :py:meth:`xgboost.Booster.predict` for details.\n\n        .. versionadded:: 1.4.0\n\n    Returns\n    -------\n    prediction :\n        When input data is ``dask.array.Array``, the return value is an array, when\n        input data is ``dask.dataframe.DataFrame``, return value can be\n        ``dask.dataframe.Series``, ``dask.dataframe.DataFrame``, depending on the output\n        shape.\n\n    \"\"\"\n    client = _get_client(client)\n    # When used in asynchronous environment, the `client` object should have\n    # `asynchronous` attribute as True.  When invoked by the skl interface, it's\n    # responsible for setting up the client.\n    return client.sync(\n        _inplace_predict_async, global_config=config.get_config(), **locals()\n    )\n\n\nasync def _async_wrap_evaluation_matrices(\n    client: Optional[\"distributed.Client\"],\n    device: Optional[str],\n    tree_method: Optional[str],\n    max_bin: Optional[int],\n    **kwargs: Any,\n) -> Tuple[DaskDMatrix, Optional[List[Tuple[DaskDMatrix, str]]]]:\n    \"\"\"A switch function for async environment.\"\"\"\n\n    def _dispatch(ref: Optional[DaskDMatrix], **kwargs: Any) -> DaskDMatrix:\n        if _can_use_qdm(tree_method, device):\n            return DaskQuantileDMatrix(\n                client=client, ref=ref, max_bin=max_bin, **kwargs\n            )\n        return DaskDMatrix(client=client, **kwargs)\n\n    train_dmatrix, evals = _wrap_evaluation_matrices(create_dmatrix=_dispatch, **kwargs)\n    train_dmatrix = await train_dmatrix\n    if evals is None:\n        return train_dmatrix, evals\n    awaited = []\n    for e in evals:\n        if e[0] is train_dmatrix:  # already awaited\n            awaited.append(e)\n            continue\n        awaited.append((await e[0], e[1]))\n    return train_dmatrix, awaited\n\n\n@contextmanager\ndef _set_worker_client(\n    model: \"DaskScikitLearnBase\", client: \"distributed.Client\"\n) -> Generator:\n    \"\"\"Temporarily set the client for sklearn model.\"\"\"\n    try:\n        model.client = client\n        yield model\n    finally:\n        model.client = None  # type: ignore[assignment]\n\n\nclass DaskScikitLearnBase(XGBModel):\n    \"\"\"Base class for implementing scikit-learn interface with Dask\"\"\"\n\n    _client = None\n\n    def __init__(self, *, coll_cfg: Optional[CollConfig] = None, **kwargs: Any) -> None:\n        super().__init__(**kwargs)\n\n        self.coll_cfg = coll_cfg\n\n    async def _predict_async(\n        self,\n        data: _DataT,\n        *,\n        output_margin: bool,\n        validate_features: bool,\n        base_margin: Optional[_DaskCollection],\n        iteration_range: Optional[IterationRange],\n    ) -> Any:\n        iteration_range = self._get_iteration_range(iteration_range)\n        # Dask doesn't support gblinear and accepts only Dask collection types (array\n        # and dataframe). We can perform inplace predict.\n        assert self._can_use_inplace_predict()\n        predts = await inplace_predict(\n            client=self.client,\n            model=self.get_booster(),\n            data=data,\n            iteration_range=iteration_range,\n            predict_type=\"margin\" if output_margin else \"value\",\n            missing=self.missing,\n            base_margin=base_margin,\n            validate_features=validate_features,\n        )\n        if isinstance(predts, dd.DataFrame):\n            predts = predts.to_dask_array()\n            # Make sure the booster is part of the task graph implicitly\n            # only needed for certain versions of dask.\n            if _DASK_2024_12_1() and not _DASK_2025_3_0():\n                # Fixes this issue for dask>=2024.1.1,<2025.3.0\n                # Dask==2025.3.0 fails with:\n                #     RuntimeError: Attempting to use an asynchronous\n                #     Client in a synchronous context of `dask.compute`\n                #\n                # Dask==2025.4.0 fails with:\n                #     TypeError: Value type is not supported for data\n                #     iterator:<class 'distributed.client.Future'>\n                predts = predts.persist()\n        return predts\n\n    @_deprecate_positional_args\n    def predict(\n        self,\n        X: _DataT,\n        *,\n        output_margin: bool = False,\n        validate_features: bool = True,\n        base_margin: Optional[_DaskCollection] = None,\n        iteration_range: Optional[IterationRange] = None,\n    ) -> Any:\n        return self.client.sync(\n            self._predict_async,\n            X,\n            output_margin=output_margin,\n            validate_features=validate_features,\n            base_margin=base_margin,\n            iteration_range=iteration_range,\n        )\n\n    async def _apply_async(\n        self,\n        X: _DataT,\n        iteration_range: Optional[IterationRange] = None,\n    ) -> Any:\n        iteration_range = self._get_iteration_range(iteration_range)\n        test_dmatrix: DaskDMatrix = await DaskDMatrix(\n            self.client,\n            data=X,\n            missing=self.missing,\n            feature_types=self.feature_types,\n        )\n        predts = await predict(\n            self.client,\n            model=self.get_booster(),\n            data=test_dmatrix,\n            pred_leaf=True,\n            iteration_range=iteration_range,\n        )\n        return predts\n\n    def apply(\n        self,\n        X: _DataT,\n        iteration_range: Optional[IterationRange] = None,\n    ) -> Any:\n        return self.client.sync(self._apply_async, X, iteration_range=iteration_range)\n\n    def __await__(self) -> Awaitable[Any]:\n        # Generate a coroutine wrapper to make this class awaitable.\n        async def _() -> Awaitable[Any]:\n            return self\n\n        return self._client_sync(_).__await__()\n\n    def __getstate__(self) -> Dict:\n        this = self.__dict__.copy()\n        if \"_client\" in this:\n            del this[\"_client\"]\n        return this\n\n    @property\n    def client(self) -> \"distributed.Client\":\n        \"\"\"The dask client used in this model.  The `Client` object can not be\n        serialized for transmission, so if task is launched from a worker instead of\n        directly from the client process, this attribute needs to be set at that worker.\n\n        \"\"\"\n\n        client = _get_client(self._client)\n        return client\n\n    @client.setter\n    def client(self, clt: \"distributed.Client\") -> None:\n        # calling `worker_client' doesn't return the correct `asynchronous` attribute,\n        # so we have to pass it ourselves.\n        self._asynchronous = clt.asynchronous if clt is not None else False\n        self._client = clt\n\n    def _client_sync(self, func: Callable, **kwargs: Any) -> Any:\n        \"\"\"Get the correct client, when method is invoked inside a worker we\n        should use `worker_client' instead of default client.\n\n        \"\"\"\n\n        if self._client is None:\n            asynchronous = getattr(self, \"_asynchronous\", False)\n            try:\n                distributed.get_worker()\n                in_worker = True\n            except ValueError:\n                in_worker = False\n            if in_worker:\n                with distributed.worker_client() as client:\n                    with _set_worker_client(self, client) as this:\n                        ret = this.client.sync(\n                            func, **kwargs, asynchronous=asynchronous\n                        )\n                        return ret\n                    return ret\n\n        return self.client.sync(func, **kwargs, asynchronous=self.client.asynchronous)\n\n\n@xgboost_model_doc(\n    \"\"\"Implementation of the Scikit-Learn API for XGBoost.\"\"\", [\"estimators\", \"model\"]\n)\nclass DaskXGBRegressor(XGBRegressorBase, DaskScikitLearnBase):\n    \"\"\"dummy doc string to workaround pylint, replaced by the decorator.\"\"\"\n\n    async def _fit_async(\n        self,\n        X: _DataT,\n        y: _DaskCollection,\n        *,\n        sample_weight: Optional[_DaskCollection],\n        base_margin: Optional[_DaskCollection],\n        eval_set: Optional[Sequence[Tuple[_DaskCollection, _DaskCollection]]],\n        sample_weight_eval_set: Optional[Sequence[_DaskCollection]],\n        base_margin_eval_set: Optional[Sequence[_DaskCollection]],\n        verbose: Union[int, bool],\n        xgb_model: Optional[Union[Booster, XGBModel]],\n        feature_weights: Optional[_DaskCollection],\n    ) -> _DaskCollection:\n        params = self.get_xgb_params()\n        model, metric, params, feature_weights = self._configure_fit(\n            xgb_model, params, feature_weights\n        )\n\n        dtrain, evals = await _async_wrap_evaluation_matrices(\n            client=self.client,\n            device=self.device,\n            tree_method=self.tree_method,\n            max_bin=self.max_bin,\n            X=X,\n            y=y,\n            group=None,\n            qid=None,\n            sample_weight=sample_weight,\n            base_margin=base_margin,\n            feature_weights=feature_weights,\n            eval_set=eval_set,\n            sample_weight_eval_set=sample_weight_eval_set,\n            base_margin_eval_set=base_margin_eval_set,\n            eval_group=None,\n            eval_qid=None,\n            missing=self.missing,\n            enable_categorical=self.enable_categorical,\n            feature_types=self.feature_types,\n        )\n\n        if callable(self.objective):\n            obj: Optional[Callable] = _objective_decorator(self.objective)\n        else:\n            obj = None\n        results = await self.client.sync(\n            _train_async,\n            asynchronous=True,\n            client=self.client,\n            global_config=config.get_config(),\n            dconfig=_get_dask_config(),\n            params=params,\n            dtrain=dtrain,\n            num_boost_round=self.get_num_boosting_rounds(),\n            evals=evals,\n            obj=obj,\n            custom_metric=metric,\n            verbose_eval=verbose,\n            early_stopping_rounds=self.early_stopping_rounds,\n            callbacks=self.callbacks,\n            coll_cfg=self.coll_cfg,\n            xgb_model=model,\n        )\n        self._Booster = results[\"booster\"]\n        self._set_evaluation_result(results[\"history\"])\n        return self\n\n    # pylint: disable=missing-docstring, disable=unused-argument\n    @_deprecate_positional_args\n    def fit(\n        self,\n        X: _DataT,\n        y: _DaskCollection,\n        *,\n        sample_weight: Optional[_DaskCollection] = None,\n        base_margin: Optional[_DaskCollection] = None,\n        eval_set: Optional[Sequence[Tuple[_DaskCollection, _DaskCollection]]] = None,\n        verbose: Optional[Union[int, bool]] = True,\n        xgb_model: Optional[Union[Booster, str, XGBModel]] = None,\n        sample_weight_eval_set: Optional[Sequence[_DaskCollection]] = None,\n        base_margin_eval_set: Optional[Sequence[_DaskCollection]] = None,\n        feature_weights: Optional[_DaskCollection] = None,\n    ) -> \"DaskXGBRegressor\":\n        args = {k: v for k, v in locals().items() if k not in (\"self\", \"__class__\")}\n        return self._client_sync(self._fit_async, **args)\n\n\n@xgboost_model_doc(\n    \"Implementation of the scikit-learn API for XGBoost classification.\",\n    [\"estimators\", \"model\"],\n)\nclass DaskXGBClassifier(XGBClassifierBase, DaskScikitLearnBase):\n    # pylint: disable=missing-class-docstring\n    async def _fit_async(\n        self,\n        X: _DataT,\n        y: _DaskCollection,\n        *,\n        sample_weight: Optional[_DaskCollection],\n        base_margin: Optional[_DaskCollection],\n        eval_set: Optional[Sequence[Tuple[_DaskCollection, _DaskCollection]]],\n        sample_weight_eval_set: Optional[Sequence[_DaskCollection]],\n        base_margin_eval_set: Optional[Sequence[_DaskCollection]],\n        verbose: Union[int, bool],\n        xgb_model: Optional[Union[Booster, XGBModel]],\n        feature_weights: Optional[_DaskCollection],\n    ) -> \"DaskXGBClassifier\":\n        params = self.get_xgb_params()\n        model, metric, params, feature_weights = self._configure_fit(\n            xgb_model, params, feature_weights\n        )\n\n        dtrain, evals = await _async_wrap_evaluation_matrices(\n            self.client,\n            device=self.device,\n            tree_method=self.tree_method,\n            max_bin=self.max_bin,\n            X=X,\n            y=y,\n            group=None,\n            qid=None,\n            sample_weight=sample_weight,\n            base_margin=base_margin,\n            feature_weights=feature_weights,\n            eval_set=eval_set,\n            sample_weight_eval_set=sample_weight_eval_set,\n            base_margin_eval_set=base_margin_eval_set,\n            eval_group=None,\n            eval_qid=None,\n            missing=self.missing,\n            enable_categorical=self.enable_categorical,\n            feature_types=self.feature_types,\n        )\n\n        # pylint: disable=attribute-defined-outside-init\n        if isinstance(y, da.Array):\n            self.classes_ = await self.client.compute(da.unique(y))\n        else:\n            self.classes_ = await self.client.compute(y.drop_duplicates())\n        if _is_cudf_ser(self.classes_):\n            self.classes_ = self.classes_.to_cupy()\n        if _is_cupy_alike(self.classes_):\n            self.classes_ = self.classes_.get()\n        self.classes_ = numpy.array(self.classes_)\n        self.n_classes_ = len(self.classes_)\n\n        if self.n_classes_ > 2:\n            params[\"objective\"] = \"multi:softprob\"\n            params[\"num_class\"] = self.n_classes_\n        else:\n            params[\"objective\"] = \"binary:logistic\"\n\n        if callable(self.objective):\n            obj: Optional[Callable] = _objective_decorator(self.objective)\n        else:\n            obj = None\n        results = await self.client.sync(\n            _train_async,\n            asynchronous=True,\n            client=self.client,\n            global_config=config.get_config(),\n            dconfig=_get_dask_config(),\n            params=params,\n            dtrain=dtrain,\n            num_boost_round=self.get_num_boosting_rounds(),\n            evals=evals,\n            obj=obj,\n            custom_metric=metric,\n            verbose_eval=verbose,\n            early_stopping_rounds=self.early_stopping_rounds,\n            callbacks=self.callbacks,\n            coll_cfg=self.coll_cfg,\n            xgb_model=model,\n        )\n        self._Booster = results[\"booster\"]\n        if not callable(self.objective):\n            self.objective = params[\"objective\"]\n        self._set_evaluation_result(results[\"history\"])\n        return self\n\n    # pylint: disable=unused-argument\n    def fit(\n        self,\n        X: _DataT,\n        y: _DaskCollection,\n        *,\n        sample_weight: Optional[_DaskCollection] = None,\n        base_margin: Optional[_DaskCollection] = None,\n        eval_set: Optional[Sequence[Tuple[_DaskCollection, _DaskCollection]]] = None,\n        verbose: Optional[Union[int, bool]] = True,\n        xgb_model: Optional[Union[Booster, str, XGBModel]] = None,\n        sample_weight_eval_set: Optional[Sequence[_DaskCollection]] = None,\n        base_margin_eval_set: Optional[Sequence[_DaskCollection]] = None,\n        feature_weights: Optional[_DaskCollection] = None,\n    ) -> \"DaskXGBClassifier\":\n        args = {k: v for k, v in locals().items() if k not in (\"self\", \"__class__\")}\n        return self._client_sync(self._fit_async, **args)\n\n    async def _predict_proba_async(\n        self,\n        X: _DataT,\n        validate_features: bool,\n        base_margin: Optional[_DaskCollection],\n        iteration_range: Optional[IterationRange],\n    ) -> _DaskCollection:\n        if self.objective == \"multi:softmax\":\n            raise ValueError(\n                \"multi:softmax doesn't support `predict_proba`.  \"\n                \"Switch to `multi:softproba` instead\"\n            )\n        predts = await super()._predict_async(\n            data=X,\n            output_margin=False,\n            validate_features=validate_features,\n            base_margin=base_margin,\n            iteration_range=iteration_range,\n        )\n        vstack = update_wrapper(\n            partial(da.vstack, allow_unknown_chunksizes=True), da.vstack\n        )\n        return _cls_predict_proba(getattr(self, \"n_classes_\", 0), predts, vstack)\n\n    # pylint: disable=missing-function-docstring\n    def predict_proba(\n        self,\n        X: _DaskCollection,\n        validate_features: bool = True,\n        base_margin: Optional[_DaskCollection] = None,\n        iteration_range: Optional[IterationRange] = None,\n    ) -> Any:\n        return self._client_sync(\n            self._predict_proba_async,\n            X=X,\n            validate_features=validate_features,\n            base_margin=base_margin,\n            iteration_range=iteration_range,\n        )\n\n    if XGBClassifier.predict_proba.__doc__ is not None:\n        predict_proba.__doc__ = XGBClassifier.predict_proba.__doc__\n\n    async def _predict_async(\n        self,\n        data: _DataT,\n        *,\n        output_margin: bool,\n        validate_features: bool,\n        base_margin: Optional[_DaskCollection],\n        iteration_range: Optional[IterationRange],\n    ) -> _DaskCollection:\n        pred_probs = await super()._predict_async(\n            data,\n            output_margin=output_margin,\n            validate_features=validate_features,\n            base_margin=base_margin,\n            iteration_range=iteration_range,\n        )\n        if output_margin:\n            return pred_probs\n\n        if len(pred_probs.shape) == 1:\n            preds = (pred_probs > 0.5).astype(int)\n        else:\n            assert len(pred_probs.shape) == 2\n            assert isinstance(pred_probs, da.Array)\n            # when using da.argmax directly, dask will construct a numpy based return\n            # array, which runs into error when computing GPU based prediction.\n\n            def _argmax(x: Any) -> Any:\n                return x.argmax(axis=1)\n\n            preds = da.map_blocks(_argmax, pred_probs, drop_axis=1)\n        return preds\n\n\n@xgboost_model_doc(\n    \"\"\"Implementation of the Scikit-Learn API for XGBoost Ranking.\n\n    .. versionadded:: 1.4.0\n\n\"\"\",\n    [\"estimators\", \"model\"],\n    extra_parameters=\"\"\"\n    allow_group_split :\n\n        .. versionadded:: 3.0.0\n\n        Whether a query group can be split among multiple workers. When set to `False`,\n        inputs must be Dask dataframes or series. If you have many small query groups,\n        this can significantly increase the fragmentation of the data, and the internal\n        DMatrix construction can take longer.\n\n\"\"\",\n    end_note=\"\"\"\n        .. note::\n\n            For the dask implementation, group is not supported, use qid instead.\n\"\"\",\n)\nclass DaskXGBRanker(XGBRankerMixIn, DaskScikitLearnBase):\n    @_deprecate_positional_args\n    def __init__(\n        self,\n        *,\n        objective: str = \"rank:ndcg\",\n        allow_group_split: bool = False,\n        coll_cfg: Optional[CollConfig] = None,\n        **kwargs: Any,\n    ) -> None:\n        if callable(objective):\n            raise ValueError(\"Custom objective function not supported by XGBRanker.\")\n        self.allow_group_split = allow_group_split\n        super().__init__(objective=objective, coll_cfg=coll_cfg, **kwargs)\n\n    def _wrapper_params(self) -> Set[str]:\n        params = super()._wrapper_params()\n        params.add(\"allow_group_split\")\n        return params\n\n    async def _fit_async(\n        self,\n        X: _DataT,\n        y: _DaskCollection,\n        *,\n        qid: Optional[_DaskCollection],\n        sample_weight: Optional[_DaskCollection],\n        base_margin: Optional[_DaskCollection],\n        eval_set: Optional[Sequence[Tuple[_DaskCollection, _DaskCollection]]],\n        sample_weight_eval_set: Optional[Sequence[_DaskCollection]],\n        base_margin_eval_set: Optional[Sequence[_DaskCollection]],\n        eval_qid: Optional[Sequence[_DaskCollection]],\n        verbose: Union[int, bool],\n        xgb_model: Optional[Union[XGBModel, Booster]],\n        feature_weights: Optional[_DaskCollection],\n    ) -> \"DaskXGBRanker\":\n        params = self.get_xgb_params()\n        model, metric, params, feature_weights = self._configure_fit(\n            xgb_model, params, feature_weights\n        )\n        dtrain, evals = await _async_wrap_evaluation_matrices(\n            self.client,\n            device=self.device,\n            tree_method=self.tree_method,\n            max_bin=self.max_bin,\n            X=X,\n            y=y,\n            group=None,\n            qid=qid,\n            sample_weight=sample_weight,\n            base_margin=base_margin,\n            feature_weights=feature_weights,\n            eval_set=eval_set,\n            sample_weight_eval_set=sample_weight_eval_set,\n            base_margin_eval_set=base_margin_eval_set,\n            eval_group=None,\n            eval_qid=eval_qid,\n            missing=self.missing,\n            enable_categorical=self.enable_categorical,\n            feature_types=self.feature_types,\n        )\n        results = await self.client.sync(\n            _train_async,\n            asynchronous=True,\n            client=self.client,\n            global_config=config.get_config(),\n            dconfig=_get_dask_config(),\n            params=params,\n            dtrain=dtrain,\n            num_boost_round=self.get_num_boosting_rounds(),\n            evals=evals,\n            obj=None,\n            custom_metric=metric,\n            verbose_eval=verbose,\n            early_stopping_rounds=self.early_stopping_rounds,\n            callbacks=self.callbacks,\n            xgb_model=model,\n            coll_cfg=self.coll_cfg,\n        )\n        self._Booster = results[\"booster\"]\n        self.evals_result_ = results[\"history\"]\n        return self\n\n    # pylint: disable=unused-argument, arguments-differ\n    @_deprecate_positional_args\n    def fit(\n        self,\n        X: _DataT,\n        y: _DaskCollection,\n        *,\n        group: Optional[_DaskCollection] = None,\n        qid: Optional[_DaskCollection] = None,\n        sample_weight: Optional[_DaskCollection] = None,\n        base_margin: Optional[_DaskCollection] = None,\n        eval_set: Optional[Sequence[Tuple[_DaskCollection, _DaskCollection]]] = None,\n        eval_group: Optional[Sequence[_DaskCollection]] = None,\n        eval_qid: Optional[Sequence[_DaskCollection]] = None,\n        verbose: Optional[Union[int, bool]] = False,\n        xgb_model: Optional[Union[XGBModel, str, Booster]] = None,\n        sample_weight_eval_set: Optional[Sequence[_DaskCollection]] = None,\n        base_margin_eval_set: Optional[Sequence[_DaskCollection]] = None,\n        feature_weights: Optional[_DaskCollection] = None,\n    ) -> \"DaskXGBRanker\":\n        msg = \"Use the `qid` instead of the `group` with the dask interface.\"\n        if not (group is None and eval_group is None):\n            raise ValueError(msg)\n        if qid is None:\n            raise ValueError(\"`qid` is required for ranking.\")\n\n        def check_df(X: _DaskCollection) -> TypeGuard[dd.DataFrame]:\n            if not isinstance(X, dd.DataFrame):\n                raise TypeError(\n                    \"When `allow_group_split` is set to False, X is required to be\"\n                    \" a dataframe.\"\n                )\n            return True\n\n        def check_ser(\n            qid: Optional[_DaskCollection], name: str\n        ) -> TypeGuard[Optional[dd.Series]]:\n            if not isinstance(qid, dd.Series) and qid is not None:\n                raise TypeError(\n                    f\"When `allow_group_split` is set to False, {name} is required to \"\n                    \"be a series.\"\n                )\n            return True\n\n        if not self.allow_group_split:\n            assert (\n                check_df(X)\n                and check_ser(qid, \"qid\")\n                and check_ser(y, \"y\")\n                and check_ser(sample_weight, \"sample_weight\")\n                and check_ser(base_margin, \"base_margin\")\n            )\n            assert qid is not None and y is not None\n            X_id = id(X)\n            X, qid, y, sample_weight, base_margin = no_group_split(\n                self.device,\n                X,\n                qid,\n                y=y,\n                sample_weight=sample_weight,\n                base_margin=base_margin,\n            )\n\n            if eval_set is not None:\n                new_eval_set = []\n                new_eval_qid = []\n                new_sample_weight_eval_set = []\n                new_base_margin_eval_set = []\n                assert eval_qid\n                for i, (Xe, ye) in enumerate(eval_set):\n                    we = sample_weight_eval_set[i] if sample_weight_eval_set else None\n                    be = base_margin_eval_set[i] if base_margin_eval_set else None\n                    assert check_df(Xe)\n                    assert eval_qid\n                    qe = eval_qid[i]\n                    assert (\n                        eval_qid\n                        and check_ser(qe, \"qid\")\n                        and check_ser(ye, \"y\")\n                        and check_ser(we, \"sample_weight\")\n                        and check_ser(be, \"base_margin\")\n                    )\n                    assert qe is not None and ye is not None\n                    if id(Xe) != X_id:\n                        Xe, qe, ye, we, be = no_group_split(\n                            self.device, Xe, qe, ye, we, be\n                        )\n                    else:\n                        Xe, qe, ye, we, be = X, qid, y, sample_weight, base_margin\n\n                    new_eval_set.append((Xe, ye))\n                    new_eval_qid.append(qe)\n\n                    if we is not None:\n                        new_sample_weight_eval_set.append(we)\n                    if be is not None:\n                        new_base_margin_eval_set.append(be)\n\n                eval_set = new_eval_set\n                eval_qid = new_eval_qid\n                sample_weight_eval_set = (\n                    new_sample_weight_eval_set if new_sample_weight_eval_set else None\n                )\n                base_margin_eval_set = (\n                    new_base_margin_eval_set if new_base_margin_eval_set else None\n                )\n\n        return self._client_sync(\n            self._fit_async,\n            X=X,\n            y=y,\n            qid=qid,\n            sample_weight=sample_weight,\n            base_margin=base_margin,\n            eval_set=eval_set,\n            eval_qid=eval_qid,\n            verbose=verbose,\n            xgb_model=xgb_model,\n            sample_weight_eval_set=sample_weight_eval_set,\n            base_margin_eval_set=base_margin_eval_set,\n            feature_weights=feature_weights,\n        )\n\n    # FIXME(trivialfis): arguments differ due to additional parameters like group and\n    # qid.\n    if XGBRanker.fit.__doc__ is not None:\n        fit.__doc__ = XGBRanker.fit.__doc__\n\n\n@xgboost_model_doc(\n    \"\"\"Implementation of the Scikit-Learn API for XGBoost Random Forest Regressor.\n\n    .. versionadded:: 1.4.0\n\n\"\"\",\n    [\"model\", \"objective\"],\n    extra_parameters=\"\"\"\n    n_estimators : int\n        Number of trees in random forest to fit.\n\"\"\",\n)\nclass DaskXGBRFRegressor(DaskXGBRegressor):\n    @_deprecate_positional_args\n    def __init__(\n        self,\n        *,\n        learning_rate: Optional[float] = 1,\n        subsample: Optional[float] = 0.8,\n        colsample_bynode: Optional[float] = 0.8,\n        reg_lambda: Optional[float] = 1e-5,\n        coll_cfg: Optional[CollConfig] = None,\n        **kwargs: Any,\n    ) -> None:\n        super().__init__(\n            learning_rate=learning_rate,\n            subsample=subsample,\n            colsample_bynode=colsample_bynode,\n            reg_lambda=reg_lambda,\n            coll_cfg=coll_cfg,\n            **kwargs,\n        )\n\n    def get_xgb_params(self) -> Dict[str, Any]:\n        params = super().get_xgb_params()\n        params[\"num_parallel_tree\"] = self.n_estimators\n        return params\n\n    def get_num_boosting_rounds(self) -> int:\n        return 1\n\n    # pylint: disable=unused-argument\n    def fit(\n        self,\n        X: _DataT,\n        y: _DaskCollection,\n        *,\n        sample_weight: Optional[_DaskCollection] = None,\n        base_margin: Optional[_DaskCollection] = None,\n        eval_set: Optional[Sequence[Tuple[_DaskCollection, _DaskCollection]]] = None,\n        verbose: Optional[Union[int, bool]] = True,\n        xgb_model: Optional[Union[Booster, str, XGBModel]] = None,\n        sample_weight_eval_set: Optional[Sequence[_DaskCollection]] = None,\n        base_margin_eval_set: Optional[Sequence[_DaskCollection]] = None,\n        feature_weights: Optional[_DaskCollection] = None,\n    ) -> \"DaskXGBRFRegressor\":\n        args = {k: v for k, v in locals().items() if k not in (\"self\", \"__class__\")}\n        _check_rf_callback(self.early_stopping_rounds, self.callbacks)\n        super().fit(**args)\n        return self\n\n\n@xgboost_model_doc(\n    \"\"\"Implementation of the Scikit-Learn API for XGBoost Random Forest Classifier.\n\n    .. versionadded:: 1.4.0\n\n\"\"\",\n    [\"model\", \"objective\"],\n    extra_parameters=\"\"\"\n    n_estimators : int\n        Number of trees in random forest to fit.\n\"\"\",\n)\nclass DaskXGBRFClassifier(DaskXGBClassifier):\n    @_deprecate_positional_args\n    def __init__(\n        self,\n        *,\n        learning_rate: Optional[float] = 1,\n        subsample: Optional[float] = 0.8,\n        colsample_bynode: Optional[float] = 0.8,\n        reg_lambda: Optional[float] = 1e-5,\n        coll_cfg: Optional[CollConfig] = None,\n        **kwargs: Any,\n    ) -> None:\n        super().__init__(\n            learning_rate=learning_rate,\n            subsample=subsample,\n            colsample_bynode=colsample_bynode,\n            reg_lambda=reg_lambda,\n            coll_cfg=coll_cfg,\n            **kwargs,\n        )\n\n    def get_xgb_params(self) -> Dict[str, Any]:\n        params = super().get_xgb_params()\n        params[\"num_parallel_tree\"] = self.n_estimators\n        return params\n\n    def get_num_boosting_rounds(self) -> int:\n        return 1\n\n    # pylint: disable=unused-argument\n    def fit(\n        self,\n        X: _DataT,\n        y: _DaskCollection,\n        *,\n        sample_weight: Optional[_DaskCollection] = None,\n        base_margin: Optional[_DaskCollection] = None,\n        eval_set: Optional[Sequence[Tuple[_DaskCollection, _DaskCollection]]] = None,\n        verbose: Optional[Union[int, bool]] = True,\n        xgb_model: Optional[Union[Booster, str, XGBModel]] = None,\n        sample_weight_eval_set: Optional[Sequence[_DaskCollection]] = None,\n        base_margin_eval_set: Optional[Sequence[_DaskCollection]] = None,\n        feature_weights: Optional[_DaskCollection] = None,\n    ) -> \"DaskXGBRFClassifier\":\n        args = {k: v for k, v in locals().items() if k not in (\"self\", \"__class__\")}\n        _check_rf_callback(self.early_stopping_rounds, self.callbacks)\n        super().fit(**args)\n        return self\n"
  },
  {
    "path": "python-package/xgboost/dask/data.py",
    "content": "# pylint: disable=too-many-arguments\n\"\"\"Copyright 2019-2025, XGBoost contributors\"\"\"\n\nimport logging\nfrom collections.abc import Sequence\nfrom typing import (\n    Any,\n    Callable,\n    Dict,\n    List,\n    Optional,\n    Tuple,\n    TypeVar,\n    Union,\n    cast,\n    overload,\n)\n\nimport dask\nimport distributed\nimport numpy as np\nimport pandas as pd\nfrom dask import dataframe as dd\n\nfrom .. import collective as coll\nfrom .._data_utils import Categories\nfrom .._typing import FeatureNames, FeatureTypes\nfrom ..compat import concat, import_cupy\nfrom ..core import Booster, DataIter, DMatrix, QuantileDMatrix\nfrom ..data import is_on_cuda\nfrom ..sklearn import get_model_categories, pick_ref_categories\nfrom ..training import _RefError\n\nLOGGER = logging.getLogger(\"[xgboost.dask]\")\n\n_DataParts = List[Dict[str, Any]]\n\n\nmeta = [\n    \"label\",\n    \"weight\",\n    \"base_margin\",\n    \"qid\",\n    \"label_lower_bound\",\n    \"label_upper_bound\",\n]\n\n\nclass DaskPartitionIter(DataIter):  # pylint: disable=R0902\n    \"\"\"A data iterator for the `DaskQuantileDMatrix`.\"\"\"\n\n    def __init__(\n        self,\n        data: List[Any],\n        feature_names: Optional[FeatureNames] = None,\n        feature_types: Optional[Union[FeatureTypes, Categories]] = None,\n        feature_weights: Optional[Any] = None,\n        **kwargs: Optional[List[Any]],\n    ) -> None:\n        types = (Sequence, type(None))\n        # Samples\n        self._data = data\n        for k in meta:\n            setattr(self, k, kwargs.get(k, None))\n            assert isinstance(getattr(self, k), types)\n\n        # Feature info\n        self._feature_names = feature_names\n        self._feature_types = feature_types\n        self._feature_weights = feature_weights\n\n        assert isinstance(self._data, Sequence)\n\n        self._iter = 0  # set iterator to 0\n        super().__init__(release_data=True)\n\n    def _get(self, attr: str) -> Optional[Any]:\n        if getattr(self, attr) is not None:\n            return getattr(self, attr)[self._iter]\n        return None\n\n    def data(self) -> Any:\n        \"\"\"Utility function for obtaining current batch of data.\"\"\"\n        return self._data[self._iter]\n\n    def reset(self) -> None:\n        \"\"\"Reset the iterator\"\"\"\n        self._iter = 0\n\n    def next(self, input_data: Callable) -> bool:\n        \"\"\"Yield next batch of data\"\"\"\n        if self._iter == len(self._data):\n            # Return False when there's no more batch.\n            return False\n\n        kwargs = {k: self._get(k) for k in meta}\n        input_data(\n            data=self.data(),\n            group=None,\n            feature_names=self._feature_names,\n            feature_types=self._feature_types,\n            feature_weights=self._feature_weights,\n            **kwargs,\n        )\n        self._iter += 1\n        return True\n\n\n@overload\ndef _add_column(df: dd.DataFrame, col: dd.Series) -> Tuple[dd.DataFrame, str]: ...\n\n\n@overload\ndef _add_column(df: dd.DataFrame, col: None) -> Tuple[dd.DataFrame, None]: ...\n\n\ndef _add_column(\n    df: dd.DataFrame, col: Optional[dd.Series]\n) -> Tuple[dd.DataFrame, Optional[str]]:\n    if col is None:\n        return df, col\n\n    trails = 0\n    uid = f\"{col.name}_{trails}\"\n    while uid in df.columns:\n        trails += 1\n        uid = f\"{col.name}_{trails}\"\n\n    df = df.assign(**{uid: col})\n    return df, uid\n\n\ndef no_group_split(  # pylint: disable=too-many-positional-arguments\n    device: str | None,\n    df: dd.DataFrame,\n    qid: dd.Series,\n    y: dd.Series,\n    sample_weight: Optional[dd.Series],\n    base_margin: Optional[dd.Series],\n) -> Tuple[\n    dd.DataFrame, dd.Series, dd.Series, Optional[dd.Series], Optional[dd.Series]\n]:\n    \"\"\"A function to prevent query group from being scattered to different\n    workers. Please see the tutorial in the document for the implication for not having\n    partition boundary based on query groups.\n\n    \"\"\"\n\n    df, qid_uid = _add_column(df, qid)\n    df, y_uid = _add_column(df, y)\n    df, w_uid = _add_column(df, sample_weight)\n    df, bm_uid = _add_column(df, base_margin)\n\n    # `tasks` shuffle is required as of rapids 24.12\n    shuffle = \"p2p\" if device is None or device == \"cpu\" else \"tasks\"\n    with dask.config.set({\"dataframe.shuffle.method\": shuffle}):\n        df = df.persist()\n        # Encode the QID to make it dense.\n        df[qid_uid] = df[qid_uid].astype(\"category\").cat.as_known().cat.codes\n        # The shuffle here is costly.\n        df = df.sort_values(by=qid_uid)\n        cnt = df.groupby(qid_uid)[qid_uid].count()\n        div = cnt.index.compute().values.tolist()\n        div = sorted(div)\n        div = tuple(div + [div[-1] + 1])\n\n        df = df.set_index(\n            qid_uid,\n            drop=False,\n            divisions=div,\n        ).persist()\n\n    qid = df[qid_uid]\n    y = df[y_uid]\n    sample_weight, base_margin = (\n        cast(dd.Series, df[uid]) if uid is not None else None for uid in (w_uid, bm_uid)\n    )\n\n    uids = [uid for uid in [qid_uid, y_uid, w_uid, bm_uid] if uid is not None]\n    df = df.drop(uids, axis=1).persist()\n    return df, qid, y, sample_weight, base_margin\n\n\ndef sort_data_by_qid(**kwargs: List[Any]) -> Dict[str, List[Any]]:\n    \"\"\"Sort worker-local data by query ID for learning to rank tasks.\"\"\"\n    data_parts = kwargs.get(\"data\")\n    assert data_parts is not None\n    n_parts = len(data_parts)\n\n    if is_on_cuda(data_parts[0]):\n        from cudf import DataFrame\n    else:\n        from pandas import DataFrame\n\n    def get_dict(i: int) -> Dict[str, list]:\n        \"\"\"Return a dictionary containing all the meta info and all partitions.\"\"\"\n\n        def _get(attr: Optional[List[Any]]) -> Optional[list]:\n            if attr is not None:\n                return attr[i]\n            return None\n\n        data_opt = {name: _get(kwargs.get(name, None)) for name in meta}\n        # Filter out None values.\n        data = {k: v for k, v in data_opt.items() if v is not None}\n        return data\n\n    def map_fn(i: int) -> pd.DataFrame:\n        data = get_dict(i)\n        return DataFrame(data)\n\n    meta_parts = [map_fn(i) for i in range(n_parts)]\n    dfq = concat(meta_parts)\n    if dfq.qid.is_monotonic_increasing:\n        return kwargs\n\n    LOGGER.warning(\n        \"[r%d]: Sorting data with %d partitions for ranking. \"\n        \"This is a costly operation and will increase the memory usage significantly. \"\n        \"To avoid this warning, sort the data based on qid before passing it into \"\n        \"XGBoost. Alternatively, you can use set the `allow_group_split` to False.\",\n        coll.get_rank(),\n        n_parts,\n    )\n    # I tried to construct a new dask DF to perform the sort, but it's quite difficult\n    # to get the partition alignment right. Along with the still maturing shuffle\n    # implementation and GPU compatibility, a simple concat is used.\n    #\n    # In case it might become useful one day, I managed to get a CPU version working,\n    # albeit qutie slow (much slower than concatenated sort). The implementation merges\n    # everything into a single Dask DF and runs `DF.sort_values`, then retrieve the\n    # individual X,y,qid, ... from calculated partition values `client.compute([p for p\n    # in df.partitions])`. It was to avoid creating mismatched partitions.\n    dfx = concat(data_parts)\n\n    if is_on_cuda(dfq):\n        cp = import_cupy()\n        sorted_idx = cp.argsort(dfq.qid)\n    else:\n        sorted_idx = np.argsort(dfq.qid)\n    dfq = dfq.iloc[sorted_idx, :]\n\n    if hasattr(dfx, \"iloc\"):\n        dfx = dfx.iloc[sorted_idx, :]\n    else:\n        dfx = dfx[sorted_idx, :]\n\n    kwargs.update({\"data\": [dfx]})\n    for i, c in enumerate(dfq.columns):\n        assert c in kwargs\n        kwargs.update({c: [dfq[c]]})\n\n    return kwargs\n\n\ndef _get_worker_parts(list_of_parts: _DataParts) -> Dict[str, List[Any]]:\n    \"\"\"Convert list of dictionaries into a dictionary of lists.\"\"\"\n    assert isinstance(list_of_parts, list)\n    result: Dict[str, List[Any]] = {}\n\n    def append(i: int, name: str) -> None:\n        if name in list_of_parts[i]:\n            part = list_of_parts[i][name]\n        else:\n            part = None\n        if part is not None:\n            if name not in result:\n                result[name] = []\n            result[name].append(part)\n\n    for i, _ in enumerate(list_of_parts):\n        append(i, \"data\")\n        for k in meta:\n            append(i, k)\n\n    qid = result.get(\"qid\", None)\n    if qid is not None:\n        result = sort_data_by_qid(**result)\n    return result\n\n\ndef _extract_data(\n    parts: _DataParts,\n    model: Optional[Booster],\n    feature_types: Optional[FeatureTypes],\n    xy_cats: Optional[Categories],\n) -> Tuple[Dict[str, List[Any]], Optional[Union[FeatureTypes, Categories]]]:\n    unzipped_dict = _get_worker_parts(parts)\n    X = unzipped_dict[\"data\"][0]\n    _, model_cats = get_model_categories(X, model, feature_types)\n    model_cats = pick_ref_categories(X, model_cats, xy_cats)\n    return unzipped_dict, model_cats\n\n\ndef _get_is_cuda(parts: Optional[_DataParts]) -> bool:\n    if parts is not None:\n        is_cuda = is_on_cuda(parts[0].get(\"data\"))\n    else:\n        is_cuda = False\n\n    is_cuda = bool(coll.allreduce(np.array([is_cuda], dtype=np.int32), coll.Op.MAX)[0])\n    return is_cuda\n\n\ndef _make_empty(is_cuda: bool) -> np.ndarray:\n    if is_cuda:\n        cp = import_cupy()\n        empty = cp.empty((0, 0))\n    else:\n        empty = np.empty((0, 0))\n    return empty\n\n\ndef _warn_empty() -> None:\n    worker = distributed.get_worker()\n    LOGGER.warning(\"Worker %s has an empty DMatrix.\", worker.address)\n\n\ndef _create_quantile_dmatrix(\n    *,\n    feature_names: Optional[FeatureNames],\n    feature_types: Optional[FeatureTypes],\n    feature_weights: Optional[Any],\n    missing: float,\n    nthread: int,\n    parts: Optional[_DataParts],\n    max_bin: int,\n    enable_categorical: bool,\n    max_quantile_batches: Optional[int],\n    ref: Optional[DMatrix] = None,\n    model: Optional[Booster],\n    Xy_cats: Optional[Categories],\n) -> QuantileDMatrix:\n    is_cuda = _get_is_cuda(parts)\n    if parts is None:\n        _warn_empty()\n        return QuantileDMatrix(\n            _make_empty(is_cuda),\n            feature_names=feature_names,\n            feature_types=feature_types,\n            max_bin=max_bin,\n            ref=ref,\n            enable_categorical=enable_categorical,\n            max_quantile_batches=max_quantile_batches,\n        )\n\n    unzipped_dict, model_cats = _extract_data(parts, model, feature_types, Xy_cats)\n\n    return QuantileDMatrix(\n        DaskPartitionIter(\n            **unzipped_dict,\n            feature_types=model_cats,\n            feature_names=feature_names,\n            feature_weights=feature_weights,\n        ),\n        missing=missing,\n        nthread=nthread,\n        max_bin=max_bin,\n        ref=ref,\n        enable_categorical=enable_categorical,\n        max_quantile_batches=max_quantile_batches,\n    )\n\n\ndef _create_dmatrix(  # pylint: disable=too-many-locals\n    *,\n    feature_names: Optional[FeatureNames],\n    feature_types: Optional[FeatureTypes],\n    feature_weights: Optional[Any],\n    missing: float,\n    nthread: int,\n    enable_categorical: bool,\n    parts: Optional[_DataParts],\n    model: Optional[Booster],\n    Xy_cats: Optional[Categories],\n) -> DMatrix:\n    \"\"\"Get data that local to worker from DaskDMatrix.\n\n    Returns\n    -------\n    A DMatrix object.\n\n    \"\"\"\n    is_cuda = _get_is_cuda(parts)\n    if parts is None:\n        _warn_empty()\n        return DMatrix(\n            _make_empty(is_cuda),\n            feature_names=feature_names,\n            feature_types=feature_types,\n            enable_categorical=enable_categorical,\n        )\n\n    T = TypeVar(\"T\")\n\n    def concat_or_none(data: Sequence[Optional[T]]) -> Optional[T]:\n        if any(part is None for part in data):\n            return None\n        return concat(data)\n\n    unzipped_dict, model_cats = _extract_data(parts, model, feature_types, Xy_cats)\n\n    concated_dict: Dict[str, Any] = {}\n    for key, value in unzipped_dict.items():\n        v = concat_or_none(value)\n        concated_dict[key] = v\n\n    return DMatrix(\n        **concated_dict,\n        missing=missing,\n        feature_names=feature_names,\n        feature_types=model_cats,\n        nthread=nthread,\n        enable_categorical=enable_categorical,\n        feature_weights=feature_weights,\n    )\n\n\ndef _dmatrix_from_list_of_parts(is_quantile: bool, **kwargs: Any) -> DMatrix:\n    if is_quantile:\n        return _create_quantile_dmatrix(**kwargs)\n    return _create_dmatrix(**kwargs)\n\n\ndef _get_dmatrices(\n    train_ref: dict,\n    train_id: int,\n    *refs: dict,\n    evals_id: Sequence[int],\n    evals_name: Sequence[str],\n    n_threads: int,\n    model: Optional[Booster],\n) -> Tuple[DMatrix, List[Tuple[DMatrix, str]]]:\n    # Create the training DMatrix\n    Xy = _dmatrix_from_list_of_parts(\n        **train_ref, nthread=n_threads, model=model, Xy_cats=None\n    )\n\n    # Create evaluation DMatrices\n    evals: List[Tuple[DMatrix, str]] = []\n    Xy_cats = Xy.get_categories()\n\n    for i, ref in enumerate(refs):\n        # Same DMatrix as the training\n        if evals_id[i] == train_id:\n            evals.append((Xy, evals_name[i]))\n            continue\n        # Check whether the training DMatrix has been used as a reference.\n        if ref.get(\"ref\", None) is not None:\n            if ref[\"ref\"] != train_id:\n                raise ValueError(_RefError)\n            del ref[\"ref\"]  # Avoid duplicated parameter in the next fn call.\n            eval_xy = _dmatrix_from_list_of_parts(\n                **ref, nthread=n_threads, ref=Xy, Xy_cats=Xy_cats, model=model\n            )\n        else:\n            eval_xy = _dmatrix_from_list_of_parts(\n                **ref, nthread=n_threads, Xy_cats=Xy_cats, model=model\n            )\n        evals.append((eval_xy, evals_name[i]))\n    return Xy, evals\n"
  },
  {
    "path": "python-package/xgboost/dask/utils.py",
    "content": "\"\"\"Utilities for the XGBoost Dask interface.\"\"\"\n\nimport logging\nimport warnings\nfrom functools import cache as fcache\nfrom typing import Any, Dict, Optional, Tuple\n\nimport dask\nimport distributed\nfrom packaging.version import Version\nfrom packaging.version import parse as parse_version\n\nfrom ..collective import Config\n\nLOGGER = logging.getLogger(\"[xgboost.dask]\")\n\n\ndef get_n_threads(local_param: Dict[str, Any], worker: \"distributed.Worker\") -> int:\n    \"\"\"Get the number of threads from a worker and the user-supplied parameters.\"\"\"\n    # dask worker nthreads, \"state\" is available in 2022.6.1\n    dwnt = worker.state.nthreads if hasattr(worker, \"state\") else worker.nthreads\n    n_threads = None\n    for p in [\"nthread\", \"n_jobs\"]:\n        if local_param.get(p, None) is not None and local_param.get(p, dwnt) != dwnt:\n            LOGGER.info(\"Overriding `nthreads` defined in dask worker.\")\n            n_threads = local_param[p]\n            break\n    if n_threads == 0 or n_threads is None:\n        n_threads = dwnt\n    return n_threads\n\n\ndef get_address_from_user(\n    dconfig: Optional[Dict[str, Any]], coll_cfg: Config\n) -> Tuple[Optional[str], int]:\n    \"\"\"Get the tracker address from the optional user configuration.\n\n    Parameters\n    ----------\n    dconfig :\n        Dask global configuration.\n\n    coll_cfg :\n        Collective configuration.\n\n    Returns\n    -------\n    The IP address along with the port number.\n\n    \"\"\"\n\n    valid_config = [\"scheduler_address\"]\n\n    host_ip = None\n    port = 0\n\n    if dconfig is not None:\n        for k in dconfig:\n            if k not in valid_config:\n                raise ValueError(f\"Unknown configuration: {k}\")\n            warnings.warn(\n                (\n                    \"Use `coll_cfg` instead of the Dask global configuration store\"\n                    f\" for the XGBoost tracker configuration: {k}.\"\n                ),\n                FutureWarning,\n            )\n    else:\n        dconfig = {}\n\n    host_ip = dconfig.get(\"scheduler_address\", None)\n    if host_ip is not None and host_ip.startswith(\"[\") and host_ip.endswith(\"]\"):\n        # convert dask bracket format to proper IPv6 address.\n        host_ip = host_ip[1:-1]\n    if host_ip is not None:\n        try:\n            host_ip, port = distributed.comm.get_address_host_port(host_ip)\n        except ValueError:\n            pass\n\n    if coll_cfg is None:\n        coll_cfg = Config()\n    if coll_cfg.tracker_host_ip is not None:\n        if host_ip is not None and coll_cfg.tracker_host_ip != host_ip:\n            raise ValueError(\n                \"Conflicting host IP addresses from the dask configuration and the \"\n                f\"collective configuration: {host_ip} v.s. {coll_cfg.tracker_host_ip}.\"\n            )\n        host_ip = coll_cfg.tracker_host_ip\n    if coll_cfg.tracker_port is not None:\n        if (\n            port != 0\n            and port is not None\n            and coll_cfg.tracker_port != 0\n            and port != coll_cfg.tracker_port\n        ):\n            raise ValueError(\n                \"Conflicting ports from the dask configuration and the \"\n                f\"collective configuration: {port} v.s. {coll_cfg.tracker_port}.\"\n            )\n        port = coll_cfg.tracker_port\n\n    return host_ip, port\n\n\n@fcache\ndef _DASK_VERSION() -> Version:\n    return parse_version(dask.__version__)\n\n\n@fcache\ndef _DASK_2024_12_1() -> bool:\n    return _DASK_VERSION() >= parse_version(\"2024.12.1\")\n\n\n@fcache\ndef _DASK_2025_3_0() -> bool:\n    return _DASK_VERSION() >= parse_version(\"2025.3.0\")\n"
  },
  {
    "path": "python-package/xgboost/data.py",
    "content": "# pylint: disable=too-many-arguments, too-many-branches, too-many-lines\n# pylint: disable=too-many-return-statements\n\"\"\"Data dispatching for DMatrix.\"\"\"\n\nimport ctypes\nimport functools\nimport json\nimport os\nimport warnings\nfrom typing import (\n    TYPE_CHECKING,\n    Any,\n    Callable,\n    Dict,\n    List,\n    Optional,\n    Sequence,\n    Tuple,\n    TypeAlias,\n    TypeGuard,\n    Union,\n)\n\nimport numpy as np\n\nfrom ._c_api import _LIB, _check_call, c_str, make_jcargs\nfrom ._data_utils import (\n    AifType,\n    Categories,\n    DfCatAccessor,\n    TransformedDf,\n    _arrow_array_inf,\n    _ensure_np_dtype,\n    _is_df_cat,\n    array_hasobject,\n    array_interface,\n    array_interface_dict,\n    arrow_cat_inf,\n    check_cudf_meta,\n    cuda_array_interface,\n    cuda_array_interface_dict,\n    cudf_cat_inf,\n    get_ref_categories,\n    is_arrow_dict,\n    pd_cat_inf,\n)\nfrom ._typing import (\n    CupyT,\n    DataSplitMode,\n    DataType,\n    FeatureNames,\n    FeatureTypes,\n    FloatCompatible,\n    NumpyDType,\n    PandasDType,\n    PathLike,\n    TransformedData,\n    c_bst_ulong,\n)\nfrom .compat import (\n    _is_arrow,\n    _is_cudf_df,\n    _is_cudf_pandas,\n    _is_cudf_ser,\n    _is_cupy_alike,\n    _is_modin_df,\n    _is_modin_series,\n    _is_pandas_df,\n    _is_pandas_series,\n    _is_polars,\n    _is_polars_lazyframe,\n    _is_polars_series,\n    import_pandas,\n    import_polars,\n    import_pyarrow,\n    is_pyarrow_available,\n    lazy_isinstance,\n)\n\nif TYPE_CHECKING:\n    import pyarrow as pa\n    from pandas import DataFrame as PdDataFrame\n    from pandas import Series as PdSeries\n\n    from .core import DMatrix, _ProxyDMatrix\n\n\nDispatchedDataBackendReturnType: TypeAlias = Tuple[\n    ctypes.c_void_p, Optional[FeatureNames], Optional[FeatureTypes]\n]\n\nCAT_T = \"c\"\n\n# meta info that can be a matrix instead of vector.\n_matrix_meta = {\"base_margin\", \"label\"}\n\n\ndef _warn_unused_missing(data: DataType, missing: Optional[FloatCompatible]) -> None:\n    if (missing is not None) and (not np.isnan(missing)):\n        warnings.warn(\n            \"`missing` is not used for current input data type:\" + str(type(data)),\n            UserWarning,\n        )\n\n\ndef _check_data_shape(data: DataType) -> None:\n    if hasattr(data, \"shape\") and len(data.shape) != 2:\n        raise ValueError(\"Please reshape the input data into 2-dimensional matrix.\")\n\n\ndef is_scipy_csr(data: DataType) -> bool:\n    \"\"\"Predicate for scipy CSR input.\"\"\"\n    is_array = False\n    is_matrix = False\n    try:\n        from scipy.sparse import csr_array\n\n        is_array = isinstance(data, csr_array)\n    except ImportError:\n        pass\n    try:\n        from scipy.sparse import csr_matrix\n\n        is_matrix = isinstance(data, csr_matrix)\n    except ImportError:\n        pass\n    return is_array or is_matrix\n\n\ndef transform_scipy_sparse(data: DataType, is_csr: bool) -> DataType:\n    \"\"\"Ensure correct data alignment and data type for scipy sparse inputs. Input should\n    be either csr or csc matrix.\n\n    \"\"\"\n    from scipy.sparse import csc_matrix, csr_matrix\n\n    if len(data.indices) != len(data.data):\n        raise ValueError(f\"length mismatch: {len(data.indices)} vs {len(data.data)}\")\n\n    indptr, _ = _ensure_np_dtype(data.indptr, data.indptr.dtype)\n    indices, _ = _ensure_np_dtype(data.indices, data.indices.dtype)\n    values, _ = _ensure_np_dtype(data.data, data.data.dtype)\n    if (\n        indptr is not data.indptr\n        or indices is not data.indices\n        or values is not data.data\n    ):\n        if is_csr:\n            data = csr_matrix((values, indices, indptr), shape=data.shape)\n        else:\n            data = csc_matrix((values, indices, indptr), shape=data.shape)\n    return data\n\n\ndef _from_scipy_csr(\n    *,\n    data: DataType,\n    missing: FloatCompatible,\n    nthread: int,\n    feature_names: Optional[FeatureNames],\n    feature_types: Optional[FeatureTypes],\n    data_split_mode: DataSplitMode = DataSplitMode.ROW,\n) -> DispatchedDataBackendReturnType:\n    \"\"\"Initialize data from a CSR matrix.\"\"\"\n\n    handle = ctypes.c_void_p()\n    data = transform_scipy_sparse(data, True)\n    _check_call(\n        _LIB.XGDMatrixCreateFromCSR(\n            array_interface(data.indptr),\n            array_interface(data.indices),\n            array_interface(data.data),\n            c_bst_ulong(data.shape[1]),\n            make_jcargs(\n                missing=float(missing),\n                nthread=int(nthread),\n                data_split_mode=int(data_split_mode),\n            ),\n            ctypes.byref(handle),\n        )\n    )\n    return handle, feature_names, feature_types\n\n\ndef is_scipy_csc(data: DataType) -> bool:\n    \"\"\"Predicate for scipy CSC input.\"\"\"\n    is_array = False\n    is_matrix = False\n    try:\n        from scipy.sparse import csc_array\n\n        is_array = isinstance(data, csc_array)\n    except ImportError:\n        pass\n    try:\n        from scipy.sparse import csc_matrix\n\n        is_matrix = isinstance(data, csc_matrix)\n    except ImportError:\n        pass\n    return is_array or is_matrix\n\n\ndef _from_scipy_csc(\n    *,\n    data: DataType,\n    missing: FloatCompatible,\n    nthread: int,\n    feature_names: Optional[FeatureNames],\n    feature_types: Optional[FeatureTypes],\n    data_split_mode: DataSplitMode = DataSplitMode.ROW,\n) -> DispatchedDataBackendReturnType:\n    \"\"\"Initialize data from a CSC matrix.\"\"\"\n    handle = ctypes.c_void_p()\n    transform_scipy_sparse(data, False)\n    _check_call(\n        _LIB.XGDMatrixCreateFromCSC(\n            array_interface(data.indptr),\n            array_interface(data.indices),\n            array_interface(data.data),\n            c_bst_ulong(data.shape[0]),\n            make_jcargs(\n                missing=float(missing),\n                nthread=int(nthread),\n                data_split_mode=int(data_split_mode),\n            ),\n            ctypes.byref(handle),\n        )\n    )\n    return handle, feature_names, feature_types\n\n\ndef is_scipy_coo(data: DataType) -> bool:\n    \"\"\"Predicate for scipy COO input.\"\"\"\n    is_array = False\n    is_matrix = False\n    try:\n        from scipy.sparse import coo_array\n\n        is_array = isinstance(data, coo_array)\n    except ImportError:\n        pass\n    try:\n        from scipy.sparse import coo_matrix\n\n        is_matrix = isinstance(data, coo_matrix)\n    except ImportError:\n        pass\n    return is_array or is_matrix\n\n\ndef _is_np_array_like(data: DataType) -> TypeGuard[np.ndarray]:\n    return hasattr(data, \"__array_interface__\")\n\n\ndef _maybe_np_slice(data: DataType, dtype: Optional[NumpyDType]) -> np.ndarray:\n    \"\"\"Handle numpy slice.  This can be removed if we use __array_interface__.\"\"\"\n    try:\n        if not data.flags.c_contiguous:\n            data = np.array(data, copy=True, dtype=dtype)\n        else:\n            data = np.asarray(data, dtype=dtype)\n    except AttributeError:\n        data = np.asarray(data, dtype=dtype)\n    data, dtype = _ensure_np_dtype(data, dtype)\n    return data\n\n\ndef _from_numpy_array(\n    *,\n    data: np.ndarray,\n    missing: FloatCompatible,\n    nthread: int,\n    feature_names: Optional[FeatureNames],\n    feature_types: Optional[FeatureTypes],\n    data_split_mode: DataSplitMode = DataSplitMode.ROW,\n) -> DispatchedDataBackendReturnType:\n    \"\"\"Initialize data from a 2-D numpy matrix.\"\"\"\n    _check_data_shape(data)\n    data, _ = _ensure_np_dtype(data, data.dtype)\n    handle = ctypes.c_void_p()\n    _check_call(\n        _LIB.XGDMatrixCreateFromDense(\n            array_interface(data),\n            make_jcargs(\n                missing=float(missing),\n                nthread=int(nthread),\n                data_split_mode=int(data_split_mode),\n            ),\n            ctypes.byref(handle),\n        )\n    )\n    return handle, feature_names, feature_types\n\n\n_pandas_dtype_mapper = {\n    \"int8\": \"int\",\n    \"int16\": \"int\",\n    \"int32\": \"int\",\n    \"int64\": \"int\",\n    \"uint8\": \"int\",\n    \"uint16\": \"int\",\n    \"uint32\": \"int\",\n    \"uint64\": \"int\",\n    \"float16\": \"float\",\n    \"float32\": \"float\",\n    \"float64\": \"float\",\n    \"bool\": \"i\",\n}\n\n# nullable types\npandas_nullable_mapper = {\n    \"Int8\": \"int\",\n    \"Int16\": \"int\",\n    \"Int32\": \"int\",\n    \"Int64\": \"int\",\n    \"UInt8\": \"int\",\n    \"UInt16\": \"int\",\n    \"UInt32\": \"int\",\n    \"UInt64\": \"int\",\n    \"Float32\": \"float\",\n    \"Float64\": \"float\",\n    \"boolean\": \"i\",\n}\n\npandas_pyarrow_mapper = {\n    \"int8[pyarrow]\": \"int\",\n    \"int16[pyarrow]\": \"int\",\n    \"int32[pyarrow]\": \"int\",\n    \"int64[pyarrow]\": \"int\",\n    \"uint8[pyarrow]\": \"int\",\n    \"uint16[pyarrow]\": \"int\",\n    \"uint32[pyarrow]\": \"int\",\n    \"uint64[pyarrow]\": \"int\",\n    \"float[pyarrow]\": \"float\",\n    \"float32[pyarrow]\": \"float\",\n    \"double[pyarrow]\": \"float\",\n    \"float64[pyarrow]\": \"float\",\n    \"bool[pyarrow]\": \"i\",\n}\n\n_pandas_dtype_mapper.update(pandas_nullable_mapper)\n_pandas_dtype_mapper.update(pandas_pyarrow_mapper)\n\n\n_ENABLE_CAT_ERR = (\n    \"When categorical type is supplied, the experimental DMatrix parameter\"\n    \"`enable_categorical` must be set to `True`.\"\n)\n\n\ndef _invalid_dataframe_dtype(data: DataType) -> None:\n    # pandas series has `dtypes` but it's just a single object\n    # cudf series doesn't have `dtypes`.\n    if hasattr(data, \"dtypes\") and hasattr(data.dtypes, \"__iter__\"):\n        bad_fields = [\n            f\"{data.columns[i]}: {dtype}\"\n            for i, dtype in enumerate(data.dtypes)\n            if dtype.name not in _pandas_dtype_mapper\n        ]\n        err = \" Invalid columns:\" + \", \".join(bad_fields)\n    else:\n        err = \"\"\n\n    type_err = \"DataFrame.dtypes for data must be int, float, bool or category.\"\n    msg = f\"\"\"{type_err} {_ENABLE_CAT_ERR} {err}\"\"\"\n    raise ValueError(msg)\n\n\ndef pandas_feature_info(\n    data: \"PdDataFrame\",\n    meta: Optional[str],\n    feature_names: Optional[FeatureNames],\n    feature_types: Optional[FeatureTypes],\n    enable_categorical: bool,\n) -> Tuple[Optional[FeatureNames], Optional[FeatureTypes]]:\n    \"\"\"Handle feature info for pandas dataframe.\"\"\"\n    pd = import_pandas()\n\n    # handle feature names\n    if feature_names is None and meta is None:\n        if isinstance(data.columns, pd.MultiIndex):\n            feature_names = [\" \".join([str(x) for x in i]) for i in data.columns]\n        else:\n            feature_names = list(data.columns.map(str))\n\n    # handle feature types and dtype validation\n    new_feature_types = []\n    need_sparse_extension_warn = True\n    for dtype in data.dtypes:\n        if is_pd_sparse_dtype(dtype):\n            new_feature_types.append(_pandas_dtype_mapper[dtype.subtype.name])\n            if need_sparse_extension_warn:\n                warnings.warn(\"Sparse arrays from pandas are converted into dense.\")\n                need_sparse_extension_warn = False\n        elif (\n            is_pd_cat_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)\n        ) and enable_categorical:\n            new_feature_types.append(CAT_T)\n        else:\n            try:\n                new_feature_types.append(_pandas_dtype_mapper[dtype.name])\n            except KeyError:\n                _invalid_dataframe_dtype(data)\n\n    if feature_types is None and meta is None:\n        feature_types = new_feature_types\n\n    return feature_names, feature_types\n\n\ndef is_nullable_dtype(dtype: PandasDType) -> bool:\n    \"\"\"Whether dtype is a pandas nullable type.\"\"\"\n\n    from pandas.api.extensions import ExtensionDtype\n\n    if not isinstance(dtype, ExtensionDtype):\n        return False\n\n    from pandas.api.types import is_bool_dtype, is_float_dtype, is_integer_dtype\n\n    is_int = is_integer_dtype(dtype) and dtype.name in pandas_nullable_mapper\n    # np.bool has alias `bool`, while pd.BooleanDtype has `boolean`.\n    is_bool = is_bool_dtype(dtype) and dtype.name == \"boolean\"\n    is_float = is_float_dtype(dtype) and dtype.name in pandas_nullable_mapper\n    return is_int or is_bool or is_float or is_pd_cat_dtype(dtype)\n\n\ndef is_pa_ext_dtype(dtype: Any) -> bool:\n    \"\"\"Return whether dtype is a pyarrow extension type for pandas\"\"\"\n    return hasattr(dtype, \"pyarrow_dtype\")\n\n\ndef is_pa_ext_categorical_dtype(dtype: Any) -> bool:\n    \"\"\"Check whether dtype is a dictionary type.\"\"\"\n    return lazy_isinstance(\n        getattr(dtype, \"pyarrow_dtype\", None), \"pyarrow.lib\", \"DictionaryType\"\n    )\n\n\n@functools.cache\ndef _lazy_load_pd_is_cat() -> Callable[[PandasDType], bool]:\n    pd = import_pandas()\n\n    if hasattr(pd.util, \"version\") and hasattr(pd.util.version, \"Version\"):\n        Version = pd.util.version.Version\n        if Version(pd.__version__) >= Version(\"2.1.0\"):\n            from pandas import CategoricalDtype\n\n            def pd_is_cat_210(dtype: PandasDType) -> bool:\n                return isinstance(dtype, CategoricalDtype)\n\n            return pd_is_cat_210\n    from pandas.api.types import is_categorical_dtype  # type: ignore[attr-defined]\n\n    return is_categorical_dtype\n\n\ndef is_pd_cat_dtype(dtype: PandasDType) -> bool:\n    \"\"\"Wrapper for testing pandas category type.\"\"\"\n    is_cat = _lazy_load_pd_is_cat()\n    return is_cat(dtype)\n\n\n@functools.cache\ndef _lazy_load_pd_is_sparse() -> Callable[[PandasDType], bool]:\n    pd = import_pandas()\n\n    if hasattr(pd.util, \"version\") and hasattr(pd.util.version, \"Version\"):\n        Version = pd.util.version.Version\n        if Version(pd.__version__) >= Version(\"2.1.0\"):\n            from pandas import SparseDtype\n\n            def pd_is_sparse_210(dtype: PandasDType) -> bool:\n                return isinstance(dtype, SparseDtype)\n\n            return pd_is_sparse_210\n\n    from pandas.api.types import is_sparse  # type: ignore[attr-defined]\n\n    return is_sparse\n\n\ndef is_pd_sparse_dtype(dtype: PandasDType) -> bool:\n    \"\"\"Wrapper for testing pandas sparse type.\"\"\"\n    is_sparse = _lazy_load_pd_is_sparse()\n\n    return is_sparse(dtype)\n\n\ndef pandas_pa_type(ser: Any) -> np.ndarray:\n    \"\"\"Handle pandas pyarrow extention.\"\"\"\n    pd = import_pandas()\n\n    if TYPE_CHECKING:\n        import pyarrow as pa\n    else:\n        pa = import_pyarrow()\n\n    # No copy, callstack:\n    # pandas.core.internals.managers.SingleBlockManager.array_values()\n    # pandas.core.internals.blocks.EABackedBlock.values\n    d_array: pd.arrays.ArrowExtensionArray = ser.array  # type: ignore[name-defined]\n    # no copy in __arrow_array__\n    # ArrowExtensionArray._data is a chunked array\n    aa: \"pa.ChunkedArray\" = d_array.__arrow_array__()\n    # combine_chunks takes the most significant amount of time\n    chunk: \"pa.Array\" = aa.combine_chunks()\n    # When there's null value, we have to use copy\n    zero_copy = chunk.null_count == 0 and not pa.types.is_boolean(chunk.type)\n    # Alternately, we can use chunk.buffers(), which returns a list of buffers and\n    # we need to concatenate them ourselves.\n    # FIXME(jiamingy): Is there a better way to access the arrow buffer along with\n    # its mask?\n    # Buffers from chunk.buffers() have the address attribute, but don't expose the\n    # mask.\n    arr: np.ndarray = chunk.to_numpy(zero_copy_only=zero_copy, writable=False)\n    arr, _ = _ensure_np_dtype(arr, arr.dtype)\n    return arr\n\n\n@functools.cache\ndef _lazy_has_npdtypes() -> bool:\n    return np.lib.NumpyVersion(np.__version__) > np.lib.NumpyVersion(\"1.25.0\")\n\n\n@functools.cache\ndef _lazy_load_pd_floats() -> tuple:\n    from pandas import Float32Dtype, Float64Dtype\n\n    return Float32Dtype, Float64Dtype\n\n\ndef pandas_transform_data(\n    data: \"PdDataFrame\",\n) -> List[Union[np.ndarray, DfCatAccessor]]:\n    \"\"\"Handle categorical dtype and extension types from pandas.\"\"\"\n    Float32Dtype, Float64Dtype = _lazy_load_pd_floats()\n\n    result: List[Union[np.ndarray, DfCatAccessor]] = []\n    np_dtypes = _lazy_has_npdtypes()\n\n    def cat_codes(ser: \"PdSeries\") -> DfCatAccessor:\n        return ser.cat\n\n    def nu_type(ser: \"PdSeries\") -> np.ndarray:\n        # Avoid conversion when possible\n        if isinstance(dtype, Float32Dtype):\n            res_dtype: NumpyDType = np.float32\n        elif isinstance(dtype, Float64Dtype):\n            res_dtype = np.float64\n        else:\n            res_dtype = np.float32\n        return _ensure_np_dtype(\n            ser.to_numpy(dtype=res_dtype, na_value=np.nan), res_dtype\n        )[0]\n\n    def oth_type(ser: \"PdSeries\") -> np.ndarray:\n        # The dtypes module is added in 1.25.\n        npdtypes = np_dtypes and isinstance(\n            ser.dtype,\n            (\n                # pylint: disable=no-member\n                np.dtypes.Float32DType,  # type: ignore[attr-defined]\n                # pylint: disable=no-member\n                np.dtypes.Float64DType,  # type: ignore[attr-defined]\n            ),\n        )\n\n        if npdtypes or dtype in {np.float32, np.float64}:\n            array = ser.to_numpy()\n        else:\n            # Specifying the dtype can significantly slow down the conversion (about\n            # 15% slow down for dense inplace-predict)\n            array = ser.to_numpy(dtype=np.float32, na_value=np.nan)\n        return _ensure_np_dtype(array, array.dtype)[0]\n\n    for col, dtype in zip(data.columns, data.dtypes):\n        if is_pa_ext_categorical_dtype(dtype):\n            raise ValueError(\n                \"pyarrow dictionary type is not supported. Use pandas category instead.\"\n            )\n        if is_pd_cat_dtype(dtype):\n            result.append(cat_codes(data[col]))\n        elif is_pa_ext_dtype(dtype):\n            result.append(pandas_pa_type(data[col]))\n        elif is_nullable_dtype(dtype):\n            result.append(nu_type(data[col]))\n        elif is_pd_sparse_dtype(dtype):\n            arr = data[col].values\n            arr = arr.to_dense()\n            if _is_np_array_like(arr):\n                arr, _ = _ensure_np_dtype(arr, arr.dtype)\n            result.append(arr)\n        else:\n            result.append(oth_type(data[col]))\n\n    # FIXME(jiamingy): Investigate the possibility of using dataframe protocol or arrow\n    # IPC format for pandas so that we can apply the data transformation inside XGBoost\n    # for better memory efficiency.\n    return result\n\n\nclass PandasTransformed(TransformedDf):\n    \"\"\"A storage class for transformed pandas DataFrame.\"\"\"\n\n    def __init__(\n        self,\n        columns: List[Union[np.ndarray, DfCatAccessor]],\n        ref_categories: Optional[Categories],\n    ) -> None:\n        self.columns = columns\n\n        aitfs: AifType = []\n        temporary_buffers = []\n\n        # Get the array interface representation for each column.\n        for col in self.columns:\n            if _is_df_cat(col):\n                # Categorical column\n                jnames, jcodes, buf = pd_cat_inf(col.categories, col.codes)\n                temporary_buffers.append(buf)\n                aitfs.append((jnames, jcodes))\n            else:\n                assert isinstance(col, np.ndarray)\n                inf = array_interface_dict(col)\n                # Numeric column\n                aitfs.append(inf)\n\n        super().__init__(\n            ref_categories=ref_categories,\n            aitfs=aitfs,\n            temporary_buffers=temporary_buffers,\n        )\n\n    @property\n    def shape(self) -> Tuple[int, int]:\n        \"\"\"Return shape of the transformed DataFrame.\"\"\"\n        if is_arrow_dict(self.columns[0]):\n            # When input is arrow.\n            n_samples = len(self.columns[0].indices)\n        elif _is_df_cat(self.columns[0]):\n            # When input is pandas.\n            n_samples = self.columns[0].codes.shape[0]\n        else:\n            # Anything else, TypeGuard is ignored by mypy 1.15.0 for some reason\n            n_samples = self.columns[0].shape[0]  # type: ignore[union-attr]\n        return n_samples, len(self.columns)\n\n\ndef _transform_pandas_df(\n    data: \"PdDataFrame\",\n    enable_categorical: bool,\n    feature_names: Optional[FeatureNames] = None,\n    feature_types: Optional[Union[FeatureTypes, Categories]] = None,\n    meta: Optional[str] = None,\n) -> Tuple[PandasTransformed, Optional[FeatureNames], Optional[FeatureTypes]]:\n    if meta and len(data.columns) > 1 and meta not in _matrix_meta:\n        raise ValueError(f\"DataFrame for {meta} cannot have multiple columns\")\n\n    feature_types, ref_categories = get_ref_categories(feature_types)\n    feature_names, feature_types = pandas_feature_info(\n        data, meta, feature_names, feature_types, enable_categorical\n    )\n\n    arrays = pandas_transform_data(data)\n    return (\n        PandasTransformed(arrays, ref_categories=ref_categories),\n        feature_names,\n        feature_types,\n    )\n\n\ndef _meta_from_pandas_df(\n    data: DataType,\n    name: str,\n    dtype: Optional[NumpyDType],\n    handle: ctypes.c_void_p,\n) -> None:\n    data, _, _ = _transform_pandas_df(data, False, meta=name)\n    if len(data.columns) == 1:\n        array = data.columns[0]\n    else:\n        array = np.stack(data.columns).T\n\n    array, dtype = _ensure_np_dtype(array, dtype)\n    _meta_from_numpy(array, name, dtype, handle)\n\n\ndef _from_pandas_df(\n    *,\n    data: \"PdDataFrame\",\n    enable_categorical: bool,\n    missing: FloatCompatible,\n    nthread: int,\n    feature_names: Optional[FeatureNames],\n    feature_types: Optional[Union[FeatureTypes, Categories]],\n    data_split_mode: DataSplitMode = DataSplitMode.ROW,\n) -> DispatchedDataBackendReturnType:\n    df, feature_names, feature_types = _transform_pandas_df(\n        data, enable_categorical, feature_names, feature_types\n    )\n\n    handle = ctypes.c_void_p()\n    _check_call(\n        _LIB.XGDMatrixCreateFromColumnar(\n            df.array_interface(),\n            make_jcargs(\n                nthread=nthread, missing=missing, data_split_mode=data_split_mode\n            ),\n            ctypes.byref(handle),\n        )\n    )\n    return handle, feature_names, feature_types\n\n\ndef _meta_from_pandas_series(\n    data: DataType, name: str, dtype: Optional[NumpyDType], handle: ctypes.c_void_p\n) -> None:\n    \"\"\"Help transform pandas series for meta data like labels\"\"\"\n    if is_pd_sparse_dtype(data.dtype):\n        data = data.values.to_dense().astype(np.float32)\n    elif is_pa_ext_dtype(data.dtype):\n        data = pandas_pa_type(data)\n    else:\n        data = data.to_numpy(np.float32, na_value=np.nan)\n\n    if is_pd_sparse_dtype(getattr(data, \"dtype\", data)):\n        data = data.to_dense()  # type: ignore[union-attr]\n    assert len(data.shape) == 1 or data.shape[1] == 0 or data.shape[1] == 1\n    _meta_from_numpy(data, name, dtype, handle)\n\n\ndef _from_pandas_series(\n    *,\n    data: DataType,\n    missing: FloatCompatible,\n    nthread: int,\n    enable_categorical: bool,\n    feature_names: Optional[FeatureNames],\n    feature_types: Optional[FeatureTypes],\n) -> DispatchedDataBackendReturnType:\n    if (data.dtype.name not in _pandas_dtype_mapper) and not (\n        is_pd_cat_dtype(data.dtype) and enable_categorical\n    ):\n        _invalid_dataframe_dtype(data)\n    if enable_categorical and is_pd_cat_dtype(data.dtype):\n        data = data.cat.codes\n    return _from_numpy_array(\n        data=data.values.reshape(data.shape[0], 1).astype(\"float\"),\n        missing=missing,\n        nthread=nthread,\n        feature_names=feature_names,\n        feature_types=feature_types,\n    )\n\n\nclass ArrowTransformed(TransformedDf):\n    \"\"\"A storage class for transformed arrow table.\"\"\"\n\n    def __init__(\n        self,\n        columns: List[Union[\"pa.NumericArray\", \"pa.DictionaryArray\"]],\n        ref_categories: Optional[Categories] = None,\n    ) -> None:\n        self.columns = columns\n\n        if TYPE_CHECKING:\n            import pyarrow as pa\n        else:\n            pa = import_pyarrow()\n\n        aitfs: AifType = []\n        temporary_buffers = []\n\n        def push_series(col: Union[\"pa.NumericArray\", \"pa.DictionaryArray\"]) -> None:\n            if isinstance(col, pa.DictionaryArray):\n                cats = col.dictionary\n                codes = col.indices\n                if not isinstance(cats, (pa.StringArray, pa.LargeStringArray)):\n                    raise TypeError(\n                        \"Only string-based categorical index is supported for arrow.\"\n                    )\n                jnames, jcodes, buf = arrow_cat_inf(cats, codes)\n                temporary_buffers.append(buf)\n                aitfs.append((jnames, jcodes))\n            else:\n                jdata = _arrow_array_inf(col)\n                aitfs.append(jdata)\n\n        for col in self.columns:\n            push_series(col)\n\n        super().__init__(\n            ref_categories=ref_categories,\n            aitfs=aitfs,\n            temporary_buffers=temporary_buffers,\n        )\n\n    @property\n    def shape(self) -> Tuple[int, int]:\n        \"\"\"Return shape of the transformed DataFrame.\"\"\"\n        return len(self.columns[0]), len(self.columns)\n\n\ndef _transform_arrow_table(\n    data: \"pa.Table\",\n    enable_categorical: bool,\n    feature_names: Optional[FeatureNames],\n    feature_types: Optional[Union[FeatureTypes, Categories]],\n) -> Tuple[ArrowTransformed, Optional[FeatureNames], Optional[FeatureTypes]]:\n    if TYPE_CHECKING:\n        import pyarrow as pa\n    else:\n        pa = import_pyarrow()\n\n    t_names, t_types = _arrow_feature_info(data)\n    feature_types, ref_categories = get_ref_categories(feature_types)\n\n    if feature_names is None:\n        feature_names = t_names\n    if feature_types is None:\n        feature_types = t_types\n\n    columns = []\n    for cname in feature_names:\n        col0 = data.column(cname)\n        col: Union[\"pa.NumericArray\", \"pa.DictionaryArray\"] = col0.combine_chunks()\n        if isinstance(col, pa.BooleanArray):\n            col = col.cast(pa.int8())  # bit-compressed array, not supported.\n        if is_arrow_dict(col) and not enable_categorical:\n            # None because the function doesn't know how to get the type info from arrow\n            # table.\n            _invalid_dataframe_dtype(None)\n        columns.append(col)\n\n    df_t = ArrowTransformed(columns, ref_categories=ref_categories)\n    return df_t, feature_names, feature_types\n\n\ndef _from_arrow_table(  # pylint: disable=too-many-positional-arguments\n    data: DataType,\n    enable_categorical: bool,\n    missing: FloatCompatible,\n    n_threads: int,\n    feature_names: Optional[FeatureNames],\n    feature_types: Optional[Union[FeatureTypes, Categories]],\n    data_split_mode: DataSplitMode = DataSplitMode.ROW,\n) -> DispatchedDataBackendReturnType:\n    df_t, feature_names, feature_types = _transform_arrow_table(\n        data, enable_categorical, feature_names, feature_types\n    )\n    handle = ctypes.c_void_p()\n    _check_call(\n        _LIB.XGDMatrixCreateFromColumnar(\n            df_t.array_interface(),\n            make_jcargs(\n                nthread=n_threads, missing=missing, data_split_mode=data_split_mode\n            ),\n            ctypes.byref(handle),\n        )\n    )\n    return handle, feature_names, feature_types\n\n\n@functools.cache\ndef _arrow_dtype() -> Dict[DataType, str]:\n    import pyarrow as pa\n\n    mapping = {\n        pa.int8(): \"int\",\n        pa.int16(): \"int\",\n        pa.int32(): \"int\",\n        pa.int64(): \"int\",\n        pa.uint8(): \"int\",\n        pa.uint16(): \"int\",\n        pa.uint32(): \"int\",\n        pa.uint64(): \"int\",\n        pa.float16(): \"float\",\n        pa.float32(): \"float\",\n        pa.float64(): \"float\",\n        pa.bool_(): \"i\",\n    }\n\n    return mapping\n\n\ndef _arrow_feature_info(data: DataType) -> Tuple[List[str], List]:\n    if TYPE_CHECKING:\n        import pyarrow as pa\n    else:\n        pa = import_pyarrow()\n\n    table: \"pa.Table\" = data\n    names = table.column_names\n\n    def map_type(name: str) -> str:\n        col = table.column(name)\n        if isinstance(col.type, pa.DictionaryType):\n            return CAT_T  # pylint: disable=unreachable\n\n        return _arrow_dtype()[col.type]\n\n    types = list(map(map_type, names))\n    return names, types\n\n\ndef _meta_from_arrow_table(\n    data: DataType,\n    name: str,\n    dtype: Optional[NumpyDType],\n    handle: ctypes.c_void_p,\n) -> None:\n    table: \"pa.Table\" = data\n    _meta_from_pandas_df(table.to_pandas(), name=name, dtype=dtype, handle=handle)\n\n\ndef _check_pyarrow_for_polars() -> None:\n    if not is_pyarrow_available():\n        raise ImportError(\"`pyarrow` is required for polars.\")\n\n\ndef _transform_polars_df(\n    data: DataType,\n    enable_categorical: bool,\n    feature_names: Optional[FeatureNames],\n    feature_types: Optional[Union[FeatureTypes, Categories]],\n) -> Tuple[ArrowTransformed, Optional[FeatureNames], Optional[FeatureTypes]]:\n    if _is_polars_lazyframe(data):\n        df = data.collect()\n        warnings.warn(\n            \"Using the default parameters for the polars `LazyFrame.collect`. Consider\"\n            \" passing a realized `DataFrame` or `Series` instead.\",\n            UserWarning,\n        )\n    else:\n        df = data\n\n    _check_pyarrow_for_polars()\n    table = df.to_arrow()\n    return _transform_arrow_table(\n        table, enable_categorical, feature_names, feature_types\n    )\n\n\ndef _from_polars_df(  # pylint: disable=too-many-positional-arguments\n    data: DataType,\n    enable_categorical: bool,\n    missing: FloatCompatible,\n    n_threads: int,\n    feature_names: Optional[FeatureNames],\n    feature_types: Optional[Union[FeatureTypes, Categories]],\n    data_split_mode: DataSplitMode = DataSplitMode.ROW,\n) -> DispatchedDataBackendReturnType:\n    df_t, feature_names, feature_types = _transform_polars_df(\n        data, enable_categorical, feature_names, feature_types\n    )\n    handle = ctypes.c_void_p()\n    _check_call(\n        _LIB.XGDMatrixCreateFromColumnar(\n            df_t.array_interface(),\n            make_jcargs(\n                nthread=n_threads, missing=missing, data_split_mode=data_split_mode\n            ),\n            ctypes.byref(handle),\n        )\n    )\n    return handle, feature_names, feature_types\n\n\n@functools.cache\ndef _lazy_load_cudf_is_cat() -> Callable[[Any], bool]:\n    try:\n        from cudf import CategoricalDtype\n\n        def is_categorical_dtype(dtype: Any) -> bool:\n            return isinstance(dtype, CategoricalDtype)\n\n    except ImportError:\n        try:\n            from cudf.api.types import (  # type: ignore[no-redef]\n                is_categorical_dtype,\n            )\n        except ImportError:\n            from cudf.utils.dtypes import (  # type: ignore[no-redef]\n                is_categorical_dtype,\n            )\n\n    return is_categorical_dtype\n\n\n@functools.cache\ndef _lazy_load_cudf_is_bool() -> Callable[[Any], bool]:\n    from cudf.api.types import is_bool_dtype\n\n    return is_bool_dtype\n\n\nclass CudfTransformed(TransformedDf):\n    \"\"\"A storage class for transformed cuDF dataframe.\"\"\"\n\n    def __init__(\n        self,\n        columns: List[Union[\"PdSeries\", DfCatAccessor]],\n        ref_categories: Optional[Categories],\n    ) -> None:\n        self.columns = columns\n        # Buffers for temporary data that cannot be freed until the data is consumed by\n        # the DMatrix or the booster.\n\n        aitfs: AifType = []\n        temporary_buffers = []\n\n        def push_series(ser: Any) -> None:\n            if _is_df_cat(ser):\n                cats, codes = ser.categories, ser.codes\n                cats_ainf, codes_ainf, buf = cudf_cat_inf(cats, codes)\n                temporary_buffers.append(buf)\n                aitfs.append((cats_ainf, codes_ainf))\n            else:\n                # numeric column\n                ainf = cuda_array_interface_dict(ser)\n                aitfs.append(ainf)\n\n        for col in self.columns:\n            push_series(col)\n\n        super().__init__(\n            ref_categories=ref_categories,\n            aitfs=aitfs,\n            temporary_buffers=temporary_buffers,\n        )\n\n    @property\n    def shape(self) -> Tuple[int, int]:\n        \"\"\"Return shape of the transformed DataFrame.\"\"\"\n        if _is_df_cat(self.columns[0]):\n            n_samples = self.columns[0].codes.shape[0]\n        else:\n            n_samples = self.columns[0].shape[0]  # type: ignore[union-attr]\n        return n_samples, len(self.columns)\n\n\ndef _transform_cudf_df(\n    data: DataType,\n    feature_names: Optional[FeatureNames],\n    feature_types: Optional[Union[FeatureTypes, Categories]],\n    enable_categorical: bool,\n) -> Tuple[\n    CudfTransformed,\n    Optional[FeatureNames],\n    Optional[FeatureTypes],\n]:\n    is_bool_dtype = _lazy_load_cudf_is_bool()\n\n    is_categorical_dtype = _lazy_load_cudf_is_cat()\n    # Work around https://github.com/dmlc/xgboost/issues/10181\n    if _is_cudf_ser(data):\n        if is_bool_dtype(data.dtype):\n            data = data.astype(np.uint8)\n        dtypes = [data.dtype]\n    else:\n        data = data.astype(\n            {col: np.uint8 for col in data.select_dtypes(include=\"bool\")}\n        )\n        dtypes = data.dtypes\n\n    if not all(\n        dtype.name in _pandas_dtype_mapper\n        or (is_categorical_dtype(dtype) and enable_categorical)\n        for dtype in dtypes\n    ):\n        _invalid_dataframe_dtype(data)\n\n    # handle feature names\n    if feature_names is None:\n        if _is_cudf_ser(data):\n            feature_names = [data.name]\n        elif lazy_isinstance(data.columns, \"cudf.core.multiindex\", \"MultiIndex\"):\n            feature_names = [\" \".join([str(x) for x in i]) for i in data.columns]\n        else:\n            feature_names = list(data.columns.map(str))\n\n    # handle feature types\n    feature_types, ref_categories = get_ref_categories(feature_types)\n    if feature_types is None:\n        feature_types = []\n        for dtype in dtypes:\n            if is_categorical_dtype(dtype) and enable_categorical:\n                feature_types.append(CAT_T)\n            else:\n                feature_types.append(_pandas_dtype_mapper[dtype.name])\n\n    # handle categorical data\n    result = []\n    if _is_cudf_ser(data):\n        # unlike pandas, cuDF uses NA for missing data.\n        if is_categorical_dtype(data.dtype) and enable_categorical:\n            result.append(data.cat)\n        elif is_categorical_dtype(data.dtype) and not enable_categorical:\n            raise ValueError(_ENABLE_CAT_ERR)\n        else:\n            result.append(data)\n    else:\n        for col, dtype in zip(data.columns, data.dtypes):\n            series = data[col]\n            if is_categorical_dtype(dtype) and enable_categorical:\n                result.append(series.cat)\n            elif is_categorical_dtype(dtype):\n                raise ValueError(_ENABLE_CAT_ERR)\n            else:\n                result.append(series)\n\n    return (\n        CudfTransformed(result, ref_categories=ref_categories),\n        feature_names,\n        feature_types,\n    )\n\n\ndef _from_cudf_df(\n    *,\n    data: DataType,\n    missing: FloatCompatible,\n    nthread: int,\n    feature_names: Optional[FeatureNames],\n    feature_types: Optional[Union[FeatureTypes, Categories]],\n    enable_categorical: bool,\n) -> DispatchedDataBackendReturnType:\n    df, feature_names, feature_types = _transform_cudf_df(\n        data, feature_names, feature_types, enable_categorical\n    )\n    handle = ctypes.c_void_p()\n    _check_call(\n        _LIB.XGDMatrixCreateFromCudaColumnar(\n            df.array_interface(),\n            make_jcargs(nthread=nthread, missing=missing),\n            ctypes.byref(handle),\n        )\n    )\n    return handle, feature_names, feature_types\n\n\ndef _transform_cupy_array(data: DataType) -> CupyT:\n    import cupy\n\n    if not hasattr(data, \"__cuda_array_interface__\") and hasattr(data, \"__array__\"):\n        data = cupy.array(data, copy=False)\n    if array_hasobject(data) or data.dtype in [cupy.bool_]:\n        data = data.astype(cupy.float32, copy=False)\n    return data\n\n\ndef _from_cupy_array(\n    data: DataType,\n    missing: FloatCompatible,\n    nthread: int,\n    feature_names: Optional[FeatureNames],\n    feature_types: Optional[FeatureTypes],\n) -> DispatchedDataBackendReturnType:\n    \"\"\"Initialize DMatrix from cupy ndarray.\"\"\"\n    data = _transform_cupy_array(data)\n    interface_str = cuda_array_interface(data)\n    handle = ctypes.c_void_p()\n    config = bytes(json.dumps({\"missing\": missing, \"nthread\": nthread}), \"utf-8\")\n    _check_call(\n        _LIB.XGDMatrixCreateFromCudaArrayInterface(\n            interface_str, config, ctypes.byref(handle)\n        )\n    )\n    return handle, feature_names, feature_types\n\n\ndef _is_cupy_csr(data: DataType) -> bool:\n    try:\n        import cupyx\n    except ImportError:\n        return False\n    return isinstance(data, cupyx.scipy.sparse.csr_matrix)\n\n\ndef _is_cupy_csc(data: DataType) -> bool:\n    try:\n        import cupyx\n    except ImportError:\n        return False\n    return isinstance(data, cupyx.scipy.sparse.csc_matrix)\n\n\ndef _is_dlpack(data: DataType) -> bool:\n    return \"PyCapsule\" in str(type(data)) and \"dltensor\" in str(data)\n\n\ndef _transform_dlpack(data: DataType) -> bool:\n    from cupy import from_dlpack  # pylint: disable=E0401\n\n    assert \"used_dltensor\" not in str(data)\n    data = from_dlpack(data)\n    return data\n\n\ndef _from_dlpack(\n    data: DataType,\n    missing: FloatCompatible,\n    nthread: int,\n    feature_names: Optional[FeatureNames],\n    feature_types: Optional[FeatureTypes],\n) -> DispatchedDataBackendReturnType:\n    data = _transform_dlpack(data)\n    return _from_cupy_array(data, missing, nthread, feature_names, feature_types)\n\n\ndef _is_uri(data: DataType) -> TypeGuard[PathLike]:\n    return isinstance(data, (str, os.PathLike))\n\n\ndef _from_uri(\n    data: PathLike,\n    missing: Optional[FloatCompatible],\n    feature_names: Optional[FeatureNames],\n    feature_types: Optional[FeatureTypes],\n    data_split_mode: DataSplitMode = DataSplitMode.ROW,\n) -> DispatchedDataBackendReturnType:\n    _warn_unused_missing(data, missing)\n    handle = ctypes.c_void_p()\n    data = os.fspath(os.path.expanduser(data))\n    config = make_jcargs(uri=str(data), data_split_mode=int(data_split_mode))\n    _check_call(_LIB.XGDMatrixCreateFromURI(config, ctypes.byref(handle)))\n    return handle, feature_names, feature_types\n\n\ndef _is_list(data: DataType) -> TypeGuard[list]:\n    return isinstance(data, list)\n\n\ndef _from_list(\n    *,\n    data: Sequence,\n    missing: FloatCompatible,\n    n_threads: int,\n    feature_names: Optional[FeatureNames],\n    feature_types: Optional[FeatureTypes],\n    data_split_mode: DataSplitMode = DataSplitMode.ROW,\n) -> DispatchedDataBackendReturnType:\n    array = np.array(data)\n    _check_data_shape(data)\n    return _from_numpy_array(\n        data=array,\n        missing=missing,\n        nthread=n_threads,\n        feature_names=feature_names,\n        feature_types=feature_types,\n        data_split_mode=data_split_mode,\n    )\n\n\ndef _is_tuple(data: DataType) -> TypeGuard[tuple]:\n    return isinstance(data, tuple)\n\n\ndef _from_tuple(\n    *,\n    data: Sequence,\n    missing: FloatCompatible,\n    n_threads: int,\n    feature_names: Optional[FeatureNames],\n    feature_types: Optional[FeatureTypes],\n    data_split_mode: DataSplitMode = DataSplitMode.ROW,\n) -> DispatchedDataBackendReturnType:\n    return _from_list(\n        data=data,\n        missing=missing,\n        n_threads=n_threads,\n        feature_names=feature_names,\n        feature_types=feature_types,\n        data_split_mode=data_split_mode,\n    )\n\n\ndef _has_array_protocol(data: DataType) -> bool:\n    return hasattr(data, \"__array__\")\n\n\ndef _convert_unknown_data(data: DataType) -> DataType:\n    warnings.warn(\n        f\"Unknown data type: {type(data)}, trying to convert it to csr_matrix\",\n        UserWarning,\n    )\n    try:\n        import scipy.sparse\n    except ImportError:\n        return None\n\n    try:\n        data = scipy.sparse.csr_matrix(data)\n    except Exception:  # pylint: disable=broad-except\n        return None\n\n    return data\n\n\ndef dispatch_data_backend(\n    *,\n    data: DataType,\n    missing: FloatCompatible,  # Or Optional[Float]\n    threads: int,\n    feature_names: Optional[FeatureNames],\n    feature_types: Optional[Union[FeatureTypes, Categories]],\n    enable_categorical: bool = False,\n    data_split_mode: DataSplitMode = DataSplitMode.ROW,\n) -> DispatchedDataBackendReturnType:\n    \"\"\"Dispatch data for DMatrix.\"\"\"\n\n    def check_cats(\n        feature_types: Optional[Union[FeatureTypes, Categories]],\n    ) -> TypeGuard[Optional[FeatureTypes]]:\n        if isinstance(feature_types, Categories):\n            raise ValueError(\n                \"Reference category is only supported by DataFrame inputs.\"\n            )\n        return True\n\n    if (\n        not _is_cudf_ser(data)\n        and not _is_pandas_series(data)\n        and not _is_polars_series(data)\n    ):\n        _check_data_shape(data)\n    if is_scipy_csr(data):\n        assert check_cats(feature_types)\n        return _from_scipy_csr(\n            data=data,\n            missing=missing,\n            nthread=threads,\n            feature_names=feature_names,\n            feature_types=feature_types,\n            data_split_mode=data_split_mode,\n        )\n    if is_scipy_csc(data):\n        assert check_cats(feature_types)\n        return _from_scipy_csc(\n            data=data,\n            missing=missing,\n            nthread=threads,\n            feature_names=feature_names,\n            feature_types=feature_types,\n            data_split_mode=data_split_mode,\n        )\n    if is_scipy_coo(data):\n        assert check_cats(feature_types)\n        return _from_scipy_csr(\n            data=data.tocsr(),\n            missing=missing,\n            nthread=threads,\n            feature_names=feature_names,\n            feature_types=feature_types,\n            data_split_mode=data_split_mode,\n        )\n    if _is_np_array_like(data):\n        assert check_cats(feature_types)\n        return _from_numpy_array(\n            data=data,\n            missing=missing,\n            nthread=threads,\n            feature_names=feature_names,\n            feature_types=feature_types,\n            data_split_mode=data_split_mode,\n        )\n    if _is_uri(data):\n        assert check_cats(feature_types)\n        return _from_uri(data, missing, feature_names, feature_types, data_split_mode)\n    if _is_list(data):\n        assert check_cats(feature_types)\n        return _from_list(\n            data=data,\n            missing=missing,\n            n_threads=threads,\n            feature_names=feature_names,\n            feature_types=feature_types,\n            data_split_mode=data_split_mode,\n        )\n    if _is_tuple(data):\n        assert check_cats(feature_types)\n        return _from_tuple(\n            data=data,\n            missing=missing,\n            n_threads=threads,\n            feature_names=feature_names,\n            feature_types=feature_types,\n            data_split_mode=data_split_mode,\n        )\n    if _is_polars_series(data):\n        pl = import_polars()\n\n        data = pl.DataFrame({data.name: data})\n    if _is_polars(data):\n        return _from_polars_df(\n            data,\n            enable_categorical,\n            missing=missing,\n            n_threads=threads,\n            feature_names=feature_names,\n            feature_types=feature_types,\n            data_split_mode=data_split_mode,\n        )\n    if _is_arrow(data):\n        return _from_arrow_table(\n            data,\n            enable_categorical,\n            missing=missing,\n            n_threads=threads,\n            feature_names=feature_names,\n            feature_types=feature_types,\n            data_split_mode=data_split_mode,\n        )\n    if _is_cudf_pandas(data):\n        data = data._fsproxy_fast  # pylint: disable=protected-access\n    if _is_pandas_series(data):\n        pd = import_pandas()\n\n        data = pd.DataFrame(data)\n    if _is_pandas_df(data):\n        return _from_pandas_df(\n            data=data,\n            enable_categorical=enable_categorical,\n            missing=missing,\n            nthread=threads,\n            feature_names=feature_names,\n            feature_types=feature_types,\n            data_split_mode=data_split_mode,\n        )\n    if _is_cudf_df(data) or _is_cudf_ser(data):\n        return _from_cudf_df(\n            data=data,\n            missing=missing,\n            nthread=threads,\n            feature_names=feature_names,\n            feature_types=feature_types,\n            enable_categorical=enable_categorical,\n        )\n    if _is_cupy_alike(data):\n        assert check_cats(feature_types)\n        return _from_cupy_array(data, missing, threads, feature_names, feature_types)\n    if _is_cupy_csr(data):\n        raise TypeError(\"cupyx CSR is not supported yet.\")\n    if _is_cupy_csc(data):\n        raise TypeError(\"cupyx CSC is not supported yet.\")\n    if _is_dlpack(data):\n        assert check_cats(feature_types)\n        return _from_dlpack(data, missing, threads, feature_names, feature_types)\n    if _is_modin_series(data):\n        pd = import_pandas()\n\n        data = pd.DataFrame(data)\n    if _is_modin_df(data):\n        return _from_pandas_df(\n            data=data,\n            enable_categorical=enable_categorical,\n            missing=missing,\n            nthread=threads,\n            feature_names=feature_names,\n            feature_types=feature_types,\n        )\n\n    if _has_array_protocol(data):\n        assert check_cats(feature_types)\n        array = np.asarray(data)\n        return _from_numpy_array(\n            data=array,\n            missing=missing,\n            nthread=threads,\n            feature_names=feature_names,\n            feature_types=feature_types,\n        )\n\n    converted = _convert_unknown_data(data)\n    if converted is not None:\n        assert check_cats(feature_types)\n        return _from_scipy_csr(\n            data=converted,\n            missing=missing,\n            nthread=threads,\n            feature_names=feature_names,\n            feature_types=feature_types,\n        )\n\n    raise TypeError(\"Not supported type for data.\" + str(type(data)))\n\n\ndef _validate_meta_shape(data: DataType, name: str) -> None:\n    if hasattr(data, \"shape\"):\n        msg = f\"Invalid shape: {data.shape} for {name}\"\n        if name in _matrix_meta:\n            if len(data.shape) > 2:\n                raise ValueError(msg)\n            return\n\n        if len(data.shape) > 2 or (\n            len(data.shape) == 2 and (data.shape[1] != 0 and data.shape[1] != 1)\n        ):\n            raise ValueError(f\"Invalid shape: {data.shape} for {name}\")\n\n\ndef _meta_from_numpy(\n    data: np.ndarray,\n    field: str,\n    dtype: Optional[NumpyDType],\n    handle: ctypes.c_void_p,\n) -> None:\n    data, dtype = _ensure_np_dtype(data, dtype)\n    interface = data.__array_interface__\n    if interface.get(\"mask\", None) is not None:\n        raise ValueError(\"Masked array is not supported.\")\n    interface_str = array_interface(data)\n    _check_call(_LIB.XGDMatrixSetInfoFromInterface(handle, c_str(field), interface_str))\n\n\ndef _meta_from_list(\n    data: Sequence, field: str, dtype: Optional[NumpyDType], handle: ctypes.c_void_p\n) -> None:\n    data_np = np.array(data)\n    _meta_from_numpy(data_np, field, dtype, handle)\n\n\ndef _meta_from_tuple(\n    data: Sequence, field: str, dtype: Optional[NumpyDType], handle: ctypes.c_void_p\n) -> None:\n    return _meta_from_list(data, field, dtype, handle)\n\n\ndef _meta_from_cudf_df(data: DataType, field: str, handle: ctypes.c_void_p) -> None:\n    if field not in _matrix_meta:\n        _meta_from_cudf_series(data.iloc[:, 0], field, handle)\n    else:\n        data = data.values\n        interface = cuda_array_interface(data)\n        _check_call(_LIB.XGDMatrixSetInfoFromInterface(handle, c_str(field), interface))\n\n\ndef _meta_from_cudf_series(data: DataType, field: str, handle: ctypes.c_void_p) -> None:\n    check_cudf_meta(data, field)\n    inf = cuda_array_interface(data)\n    _check_call(_LIB.XGDMatrixSetInfoFromInterface(handle, c_str(field), inf))\n\n\ndef _meta_from_cupy_array(data: DataType, field: str, handle: ctypes.c_void_p) -> None:\n    data = _transform_cupy_array(data)\n    inf = cuda_array_interface(data)\n    _check_call(_LIB.XGDMatrixSetInfoFromInterface(handle, c_str(field), inf))\n\n\ndef dispatch_meta_backend(\n    matrix: \"DMatrix\", data: DataType, name: str, dtype: Optional[NumpyDType] = None\n) -> None:\n    \"\"\"Dispatch for meta info.\"\"\"\n    handle = matrix.handle\n    assert handle is not None\n    _validate_meta_shape(data, name)\n    if data is None:\n        return\n    if _is_list(data):\n        _meta_from_list(data, name, dtype, handle)\n        return\n    if _is_tuple(data):\n        _meta_from_tuple(data, name, dtype, handle)\n        return\n    if _is_np_array_like(data):\n        _meta_from_numpy(data, name, dtype, handle)\n        return\n    if _is_arrow(data):\n        _meta_from_arrow_table(data, name, dtype, handle)\n        return\n    if _is_cudf_pandas(data):\n        data = data._fsproxy_fast  # pylint: disable=protected-access\n    if _is_polars(data):\n        if _is_polars_lazyframe(data):\n            data = data.collect()\n        _check_pyarrow_for_polars()\n        _meta_from_arrow_table(data.to_arrow(), name, dtype, handle)\n        return\n    if _is_pandas_df(data):\n        _meta_from_pandas_df(data, name, dtype=dtype, handle=handle)\n        return\n    if _is_pandas_series(data):\n        _meta_from_pandas_series(data, name, dtype, handle)\n        return\n    if _is_dlpack(data):\n        data = _transform_dlpack(data)\n        _meta_from_cupy_array(data, name, handle)\n        return\n    if _is_cudf_ser(data):\n        _meta_from_cudf_series(data, name, handle)\n        return\n    if _is_cudf_df(data):\n        _meta_from_cudf_df(data, name, handle)\n        return\n    if _is_cupy_alike(data):\n        _meta_from_cupy_array(data, name, handle)\n        return\n    if _is_modin_df(data):\n        _meta_from_pandas_df(data, name, dtype=dtype, handle=handle)\n        return\n    if _is_modin_series(data):\n        data = data.values.astype(\"float\")\n        assert len(data.shape) == 1 or data.shape[1] == 0 or data.shape[1] == 1\n        _meta_from_numpy(data, name, dtype, handle)\n        return\n    if _has_array_protocol(data):\n        # pyarrow goes here.\n        array = np.asarray(data)\n        _meta_from_numpy(array, name, dtype, handle)\n        return\n    raise TypeError(\"Unsupported type for \" + name, str(type(data)))\n\n\ndef _proxy_transform(\n    data: DataType,\n    feature_names: Optional[FeatureNames],\n    feature_types: Optional[FeatureTypes],\n    enable_categorical: bool,\n) -> TransformedData:\n    if _is_cudf_pandas(data):\n        data = data._fsproxy_fast  # pylint: disable=protected-access\n    if _is_cudf_df(data) or _is_cudf_ser(data):\n        return _transform_cudf_df(\n            data, feature_names, feature_types, enable_categorical\n        )\n    if _is_cupy_alike(data):\n        data = _transform_cupy_array(data)\n        return data, feature_names, feature_types\n    if _is_dlpack(data):\n        return _transform_dlpack(data), feature_names, feature_types\n    if _is_list(data) or _is_tuple(data):\n        data = np.array(data)\n    if _is_np_array_like(data):\n        data, _ = _ensure_np_dtype(data, data.dtype)\n        return data, feature_names, feature_types\n    if is_scipy_csr(data):\n        data = transform_scipy_sparse(data, True)\n        return data, feature_names, feature_types\n    if is_scipy_csc(data):\n        data = transform_scipy_sparse(data.tocsr(), True)\n        return data, feature_names, feature_types\n    if is_scipy_coo(data):\n        data = transform_scipy_sparse(data.tocsr(), True)\n        return data, feature_names, feature_types\n    if _is_polars(data):\n        df_pl, feature_names, feature_types = _transform_polars_df(\n            data, enable_categorical, feature_names, feature_types\n        )\n        return df_pl, feature_names, feature_types\n    if _is_pandas_series(data):\n        pd = import_pandas()\n\n        data = pd.DataFrame(data)\n    if _is_arrow(data):\n        df_pa, feature_names, feature_types = _transform_arrow_table(\n            data, enable_categorical, feature_names, feature_types\n        )\n        return df_pa, feature_names, feature_types\n    if _is_pandas_df(data):\n        df, feature_names, feature_types = _transform_pandas_df(\n            data, enable_categorical, feature_names, feature_types\n        )\n        return df, feature_names, feature_types\n    raise TypeError(\"Value type is not supported for data iterator:\" + str(type(data)))\n\n\ndef is_on_cuda(data: Any) -> bool:\n    \"\"\"Whether the data is a CUDA-based data structure.\"\"\"\n    return any(\n        p(data)\n        for p in (\n            _is_cudf_df,\n            _is_cudf_ser,\n            _is_cudf_pandas,\n            _is_cupy_alike,\n            _is_dlpack,\n        )\n    )\n\n\ndef dispatch_proxy_set_data(\n    proxy: \"_ProxyDMatrix\",\n    data: DataType,\n) -> None:\n    \"\"\"Dispatch for QuantileDMatrix.\"\"\"\n    if (\n        not _is_cudf_ser(data)\n        and not _is_pandas_series(data)\n        and not _is_polars_series(data)\n    ):\n        _check_data_shape(data)\n\n    if isinstance(data, CudfTransformed):\n        # pylint: disable=W0212\n        proxy._ref_data_from_cuda_columnar(data)\n        return\n    if _is_cupy_alike(data):\n        proxy._ref_data_from_cuda_interface(data)  # pylint: disable=W0212\n        return\n    if _is_dlpack(data):\n        data = _transform_dlpack(data)\n        proxy._ref_data_from_cuda_interface(data)  # pylint: disable=W0212\n        return\n    # Host\n    if isinstance(data, (ArrowTransformed, PandasTransformed)):\n        proxy._ref_data_from_columnar(data)  # pylint: disable=W0212\n        return\n    if _is_np_array_like(data):\n        _check_data_shape(data)\n        proxy._ref_data_from_array(data)  # pylint: disable=W0212\n        return\n    if is_scipy_csr(data):\n        proxy._ref_data_from_csr(data)  # pylint: disable=W0212\n        return\n\n    err = TypeError(\"Value type is not supported for data iterator:\" + str(type(data)))\n    raise err\n"
  },
  {
    "path": "python-package/xgboost/federated.py",
    "content": "\"\"\"XGBoost Experimental Federated Learning related API.\"\"\"\n\nimport ctypes\nfrom threading import Thread\nfrom typing import Any, Dict, Optional\n\nfrom .core import _LIB, _check_call, _deprecate_positional_args, make_jcargs\nfrom .tracker import RabitTracker\n\n\nclass FederatedTracker(RabitTracker):\n    \"\"\"Tracker for federated training.\n\n    Parameters\n    ----------\n    n_workers :\n        The number of federated workers.\n\n    port :\n        The port to listen on.\n\n    secure :\n        Whether this is a secure instance. If True, then the following arguments for SSL\n        must be provided.\n\n    server_key_path :\n        Path to the server private key file.\n\n    server_cert_path :\n        Path to the server certificate file.\n\n    client_cert_path :\n        Path to the client certificate file.\n\n    \"\"\"\n\n    @_deprecate_positional_args\n    def __init__(  # pylint: disable=R0913, W0231\n        self,\n        n_workers: int,\n        port: int,\n        *,\n        secure: bool,\n        server_key_path: Optional[str] = None,\n        server_cert_path: Optional[str] = None,\n        client_cert_path: Optional[str] = None,\n        timeout: int = 300,\n    ) -> None:\n        handle = ctypes.c_void_p()\n        args = make_jcargs(\n            n_workers=n_workers,\n            port=port,\n            dmlc_communicator=\"federated\",\n            federated_secure=secure,\n            server_key_path=server_key_path,\n            server_cert_path=server_cert_path,\n            client_cert_path=client_cert_path,\n            timeout=int(timeout),\n        )\n        _check_call(_LIB.XGTrackerCreate(args, ctypes.byref(handle)))\n        self.handle = handle\n\n\n@_deprecate_positional_args\ndef run_federated_server(  # pylint: disable=too-many-arguments\n    n_workers: int,\n    port: int,\n    *,\n    server_key_path: Optional[str] = None,\n    server_cert_path: Optional[str] = None,\n    client_cert_path: Optional[str] = None,\n    blocking: bool = True,\n    timeout: int = 300,\n) -> Optional[Dict[str, Any]]:\n    \"\"\"See :py:class:`~xgboost.federated.FederatedTracker` for more info.\n\n    Parameters\n    ----------\n    blocking :\n        Block the server until the training is finished. If set to False, the function\n        launches an additional thread and returns the worker arguments. The default is\n        True and a higher level framework is responsible for setting worker parameters.\n\n    \"\"\"\n    args: Dict[str, Any] = {\"n_workers\": n_workers}\n    secure = all(\n        path is not None\n        for path in [server_key_path, server_cert_path, client_cert_path]\n    )\n    tracker = FederatedTracker(\n        n_workers=n_workers,\n        port=port,\n        secure=secure,\n        timeout=timeout,\n        server_key_path=server_key_path,\n        server_cert_path=server_cert_path,\n        client_cert_path=client_cert_path,\n    )\n    tracker.start()\n\n    if blocking:\n        tracker.wait_for()\n        return None\n\n    thread = Thread(target=tracker.wait_for)\n    thread.daemon = True\n    thread.start()\n    args.update(tracker.worker_args())\n    return args\n"
  },
  {
    "path": "python-package/xgboost/libpath.py",
    "content": "# coding: utf-8\n\"\"\"Find the path to xgboost dynamic library files.\"\"\"\n\nimport os\nimport platform\nimport sys\nfrom typing import List\n\n\nclass XGBoostLibraryNotFound(Exception):\n    \"\"\"Error thrown by when xgboost is not found\"\"\"\n\n\ndef is_sphinx_build() -> bool:\n    \"\"\"`XGBOOST_BUILD_DOC` is used by the sphinx conf.py to skip building the C++ code.\"\"\"\n    return bool(os.environ.get(\"XGBOOST_BUILD_DOC\", False))\n\n\ndef find_lib_path() -> List[str]:\n    \"\"\"Find the path to xgboost dynamic library files.\n\n    Returns\n    -------\n    lib_path\n       List of all found library path to xgboost\n    \"\"\"\n    curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))\n    dll_path = [\n        # normal, after installation `lib` is copied into Python package tree.\n        os.path.join(curr_path, \"lib\"),\n        # editable installation, no copying is performed.\n        os.path.join(curr_path, os.path.pardir, os.path.pardir, \"lib\"),\n        # use libxgboost from a system prefix, if available.  This should be the last\n        # option.\n        os.path.join(sys.base_prefix, \"lib\"),\n    ]\n\n    if sys.platform == \"win32\":\n        # On Windows, Conda may install libs in different paths\n        dll_path.extend(\n            [\n                os.path.join(sys.base_prefix, \"bin\"),\n                os.path.join(sys.base_prefix, \"Library\"),\n                os.path.join(sys.base_prefix, \"Library\", \"bin\"),\n                os.path.join(sys.base_prefix, \"Library\", \"lib\"),\n                os.path.join(sys.base_prefix, \"Library\", \"mingw-w64\"),\n                os.path.join(sys.base_prefix, \"Library\", \"mingw-w64\", \"bin\"),\n                os.path.join(sys.base_prefix, \"Library\", \"mingw-w64\", \"lib\"),\n            ]\n        )\n        dll_path = [os.path.join(p, \"xgboost.dll\") for p in dll_path]\n    elif sys.platform.startswith((\"linux\", \"freebsd\", \"emscripten\")):\n        dll_path = [os.path.join(p, \"libxgboost.so\") for p in dll_path]\n    elif sys.platform == \"darwin\":\n        dll_path = [os.path.join(p, \"libxgboost.dylib\") for p in dll_path]\n    elif sys.platform == \"cygwin\":\n        dll_path = [os.path.join(p, \"cygxgboost.dll\") for p in dll_path]\n    if platform.system() == \"OS400\":\n        dll_path = [os.path.join(p, \"libxgboost.so\") for p in dll_path]\n\n    lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)]\n\n    if not lib_path and not is_sphinx_build():\n        link = \"https://xgboost.readthedocs.io/en/stable/install.html\"\n        msg = (\n            \"Cannot find XGBoost Library in the candidate path.  \"\n            + \"List of candidates:\\n- \"\n            + (\"\\n- \".join(dll_path))\n            + \"\\nXGBoost Python package path: \"\n            + curr_path\n            + \"\\nsys.base_prefix: \"\n            + sys.base_prefix\n            + \"\\nSee: \"\n            + link\n            + \" for installing XGBoost.\"\n        )\n        raise XGBoostLibraryNotFound(msg)\n    return lib_path\n"
  },
  {
    "path": "python-package/xgboost/objective.py",
    "content": "\"\"\"Experimental support for a new objective interface with target dimension\nreduction.\n\n.. warning::\n\n  Do not use this module unless you want to participate in development.\n\n.. versionadded:: 3.2.0\n\n\"\"\"\n\nimport warnings\nfrom abc import ABC, abstractmethod\nfrom typing import TYPE_CHECKING, Tuple\n\nimport numpy as np\n\nfrom ._data_utils import (\n    _ensure_np_dtype,\n    _is_flatten,\n    array_interface,\n    cuda_array_interface,\n)\nfrom ._typing import ArrayLike, NumpyOrCupy\nfrom .compat import _is_cupy_alike\n\nif TYPE_CHECKING:\n    from .core import DMatrix\n\n\nclass Objective(ABC):\n    \"\"\"Base class for custom objective functions.\n\n    .. warning::\n\n        Do not use this class unless you want to participate in development.\n\n    .. versionadded:: 3.2.0\n\n    \"\"\"\n\n    @abstractmethod\n    def __call__(\n        self, iteration: int, y_pred: ArrayLike, dtrain: \"DMatrix\"\n    ) -> Tuple[ArrayLike, ArrayLike]: ...\n\n\nclass TreeObjective(Objective):\n    \"\"\"Base class for tree-specific custom objective functions.\n\n    .. warning::\n\n        Do not use this class unless you want to participate in development.\n\n    .. versionadded:: 3.2.0\n\n    \"\"\"\n\n    # pylint: disable=unused-argument\n    def split_grad(\n        self, iteration: int, grad: ArrayLike, hess: ArrayLike\n    ) -> Tuple[ArrayLike, ArrayLike] | None:\n        \"\"\"Provide a different gradient type for finding tree structures.\"\"\"\n        return None\n\n\ndef _grad_arrinf(array: NumpyOrCupy, n_samples: int) -> bytes:\n    # Can we check for __array_interface__ instead of a specific type instead?\n    msg = (\n        \"Expecting `np.ndarray` or `cupy.ndarray` for gradient and hessian.\"\n        f\" Got: {type(array)}\"\n    )\n    if not isinstance(array, np.ndarray) and not _is_cupy_alike(array):\n        raise TypeError(msg)\n\n    if array.shape[0] != n_samples and _is_flatten(array):\n        warnings.warn(\n            \"Since 2.1.0, the shape of the gradient and hessian is required to\"\n            \" be (n_samples, n_targets) or (n_samples, n_classes).\",\n            FutureWarning,\n        )\n        array = array.reshape(n_samples, array.size // n_samples)\n\n    if isinstance(array, np.ndarray):\n        array, _ = _ensure_np_dtype(array, array.dtype)\n        interface = array_interface(array)\n    elif _is_cupy_alike(array):\n        interface = cuda_array_interface(array)\n    else:\n        raise TypeError(msg)\n\n    return interface\n"
  },
  {
    "path": "python-package/xgboost/plotting.py",
    "content": "# pylint: disable=too-many-locals, too-many-arguments\n# pylint: disable=too-many-branches\n\"\"\"Plotting Library.\"\"\"\n\nimport json\nimport warnings\nfrom io import BytesIO\nfrom typing import Any, Optional, Union\n\nimport numpy as np\n\nfrom ._typing import PathLike\nfrom .core import Booster, _deprecate_positional_args\nfrom .sklearn import XGBModel\n\nAxes = Any  # real type is matplotlib.axes.Axes\nGraphvizSource = Any  # real type is graphviz.Source\n\n\n@_deprecate_positional_args\ndef plot_importance(\n    booster: Union[XGBModel, Booster, dict],\n    *,\n    ax: Optional[Axes] = None,\n    height: float = 0.2,\n    xlim: Optional[tuple] = None,\n    ylim: Optional[tuple] = None,\n    title: str = \"Feature importance\",\n    xlabel: str = \"Importance score\",\n    ylabel: str = \"Features\",\n    fmap: PathLike = \"\",\n    importance_type: str = \"weight\",\n    max_num_features: Optional[int] = None,\n    grid: bool = True,\n    show_values: bool = True,\n    values_format: str = \"{v}\",\n    **kwargs: Any,\n) -> Axes:\n    \"\"\"Plot importance based on fitted trees.\n\n    Parameters\n    ----------\n    booster :\n        Booster or XGBModel instance, or dict taken by Booster.get_fscore()\n    ax : matplotlib Axes\n        Target axes instance. If None, new figure and axes will be created.\n    grid :\n        Turn the axes grids on or off.  Default is True (On).\n    importance_type :\n        How the importance is calculated: either \"weight\", \"gain\", or \"cover\"\n\n        * \"weight\" is the number of times a feature appears in a tree\n        * \"gain\" is the average gain of splits which use the feature\n        * \"cover\" is the average coverage of splits which use the feature\n          where coverage is defined as the number of samples affected by the split\n    max_num_features :\n        Maximum number of top features displayed on plot. If None, all features will be\n        displayed.\n    height :\n        Bar height, passed to ax.barh()\n    xlim :\n        Tuple passed to axes.xlim()\n    ylim :\n        Tuple passed to axes.ylim()\n    title :\n        Axes title. To disable, pass None.\n    xlabel :\n        X axis title label. To disable, pass None.\n    ylabel :\n        Y axis title label. To disable, pass None.\n    fmap :\n        The name of feature map file.\n    show_values :\n        Show values on plot. To disable, pass False.\n    values_format :\n        Format string for values. \"v\" will be replaced by the value of the feature\n        importance.  e.g. Pass \"{v:.2f}\" in order to limit the number of digits after\n        the decimal point to two, for each value printed on the graph.\n    kwargs :\n        Other keywords passed to ax.barh()\n\n    Returns\n    -------\n    ax : matplotlib Axes\n    \"\"\"\n    try:\n        import matplotlib.pyplot as plt\n    except ImportError as e:\n        raise ImportError(\"You must install matplotlib to plot importance\") from e\n\n    if isinstance(booster, XGBModel):\n        importance = booster.get_booster().get_score(\n            importance_type=importance_type, fmap=fmap\n        )\n    elif isinstance(booster, Booster):\n        importance = booster.get_score(importance_type=importance_type, fmap=fmap)\n    elif isinstance(booster, dict):\n        importance = booster\n    else:\n        raise ValueError(\"tree must be Booster, XGBModel or dict instance\")\n\n    if not importance:\n        raise ValueError(\n            \"Booster.get_score() results in empty.  \"\n            + \"This maybe caused by having all trees as decision dumps.\"\n        )\n\n    tuples = [(k, importance[k]) for k in importance]\n    if max_num_features is not None:\n        # pylint: disable=invalid-unary-operand-type\n        tuples = sorted(tuples, key=lambda _x: _x[1])[-max_num_features:]\n    else:\n        tuples = sorted(tuples, key=lambda _x: _x[1])\n    labels, values = zip(*tuples)\n\n    if ax is None:\n        _, ax = plt.subplots(1, 1)\n\n    ylocs = np.arange(len(values))\n    ax.barh(ylocs, values, align=\"center\", height=height, **kwargs)\n\n    if show_values is True:\n        for x, y in zip(values, ylocs):\n            ax.text(x + 1, float(y), values_format.format(v=x), va=\"center\")\n\n    ax.set_yticks(ylocs)\n    ax.set_yticklabels(labels)\n\n    if xlim is not None:\n        if not isinstance(xlim, tuple) or len(xlim) != 2:\n            raise ValueError(\"xlim must be a tuple of 2 elements\")\n    else:\n        xlim = (0, max(values) * 1.1)\n    ax.set_xlim(xlim)\n\n    if ylim is not None:\n        if not isinstance(ylim, tuple) or len(ylim) != 2:\n            raise ValueError(\"ylim must be a tuple of 2 elements\")\n    else:\n        ylim = (-1, len(values))\n    ax.set_ylim(ylim)\n\n    if title is not None:\n        ax.set_title(title)\n    if xlabel is not None:\n        ax.set_xlabel(xlabel)\n    if ylabel is not None:\n        ax.set_ylabel(ylabel)\n    ax.grid(grid)\n    return ax\n\n\n@_deprecate_positional_args\ndef to_graphviz(\n    booster: Union[Booster, XGBModel],\n    *,\n    fmap: PathLike = \"\",\n    num_trees: Optional[int] = None,\n    rankdir: Optional[str] = None,\n    yes_color: Optional[str] = None,\n    no_color: Optional[str] = None,\n    condition_node_params: Optional[dict] = None,\n    leaf_node_params: Optional[dict] = None,\n    with_stats: bool = False,\n    tree_idx: int = 0,\n    **kwargs: Any,\n) -> GraphvizSource:\n    \"\"\"Convert specified tree to graphviz instance. IPython can automatically plot\n    the returned graphviz instance. Otherwise, you should call .render() method\n    of the returned graphviz instance.\n\n    Parameters\n    ----------\n    booster :\n        Booster or XGBModel instance\n    fmap :\n       The name of feature map file\n    num_trees :\n\n        .. deprecated:: 3.0\n\n        Specify the ordinal number of target tree\n\n    rankdir :\n        Passed to graphviz via graph_attr\n    yes_color :\n        Edge color when meets the node condition.\n    no_color :\n        Edge color when doesn't meet the node condition.\n    condition_node_params :\n        Condition node configuration for for graphviz.  Example:\n\n        .. code-block:: python\n\n            {'shape': 'box',\n             'style': 'filled,rounded',\n             'fillcolor': '#78bceb'}\n\n    leaf_node_params :\n        Leaf node configuration for graphviz. Example:\n\n        .. code-block:: python\n\n            {'shape': 'box',\n             'style': 'filled',\n             'fillcolor': '#e48038'}\n\n    with_stats :\n\n        .. versionadded:: 3.0\n\n        Controls whether the split statistics should be included.\n\n    tree_idx :\n\n        .. versionadded:: 3.0\n\n        Specify the ordinal index of target tree.\n\n    kwargs :\n        Other keywords passed to graphviz graph_attr, e.g. ``graph [ {key} = {value} ]``\n\n    Returns\n    -------\n    graph: graphviz.Source\n\n    \"\"\"\n    try:\n        from graphviz import Source\n    except ImportError as e:\n        raise ImportError(\"You must install graphviz to plot tree\") from e\n    if isinstance(booster, XGBModel):\n        booster = booster.get_booster()\n\n    # squash everything back into kwargs again for compatibility\n    parameters = \"dot\"\n    extra = {}\n    for key, value in kwargs.items():\n        extra[key] = value\n\n    if rankdir is not None:\n        kwargs[\"graph_attrs\"] = {}\n        kwargs[\"graph_attrs\"][\"rankdir\"] = rankdir\n    for key, value in extra.items():\n        if kwargs.get(\"graph_attrs\", None) is not None:\n            kwargs[\"graph_attrs\"][key] = value\n        else:\n            kwargs[\"graph_attrs\"] = {}\n        del kwargs[key]\n\n    if yes_color is not None or no_color is not None:\n        kwargs[\"edge\"] = {}\n    if yes_color is not None:\n        kwargs[\"edge\"][\"yes_color\"] = yes_color\n    if no_color is not None:\n        kwargs[\"edge\"][\"no_color\"] = no_color\n\n    if condition_node_params is not None:\n        kwargs[\"condition_node_params\"] = condition_node_params\n    if leaf_node_params is not None:\n        kwargs[\"leaf_node_params\"] = leaf_node_params\n\n    if kwargs:\n        parameters += \":\"\n        parameters += json.dumps(kwargs)\n\n    if num_trees is not None:\n        warnings.warn(\n            \"The `num_trees` parameter is deprecated, use `tree_idx` insetad. \",\n            FutureWarning,\n        )\n        if tree_idx not in (0, num_trees):\n            raise ValueError(\n                \"Both `num_trees` and `tree_idx` are used, prefer `tree_idx` instead.\"\n            )\n        tree_idx = num_trees\n\n    tree = booster.get_dump(fmap=fmap, dump_format=parameters, with_stats=with_stats)[\n        tree_idx\n    ]\n    g = Source(tree)\n    return g\n\n\n@_deprecate_positional_args\ndef plot_tree(\n    booster: Union[Booster, XGBModel],\n    *,\n    fmap: PathLike = \"\",\n    num_trees: Optional[int] = None,\n    rankdir: Optional[str] = None,\n    ax: Optional[Axes] = None,\n    with_stats: bool = False,\n    tree_idx: int = 0,\n    **kwargs: Any,\n) -> Axes:\n    \"\"\"Plot specified tree.\n\n    Parameters\n    ----------\n    booster :\n        Booster or XGBModel instance\n    fmap: str (optional)\n       The name of feature map file\n    num_trees :\n\n        .. deprecated:: 3.0\n\n    rankdir : str, default \"TB\"\n        Passed to graphviz via graph_attr\n    ax : matplotlib Axes, default None\n        Target axes instance. If None, new figure and axes will be created.\n\n    with_stats :\n\n        .. versionadded:: 3.0\n\n        See :py:func:`to_graphviz`.\n\n    tree_idx :\n\n        .. versionadded:: 3.0\n\n        See :py:func:`to_graphviz`.\n\n    kwargs :\n        Other keywords passed to :py:func:`to_graphviz`\n\n    Returns\n    -------\n    ax : matplotlib Axes\n\n    \"\"\"\n    try:\n        from matplotlib import image\n        from matplotlib import pyplot as plt\n    except ImportError as e:\n        raise ImportError(\"You must install matplotlib to plot tree\") from e\n\n    if ax is None:\n        _, ax = plt.subplots(1, 1)\n\n    g = to_graphviz(\n        booster,\n        fmap=fmap,\n        num_trees=num_trees,\n        rankdir=rankdir,\n        with_stats=with_stats,\n        tree_idx=tree_idx,\n        **kwargs,\n    )\n\n    s = BytesIO()\n    s.write(g.pipe(format=\"png\"))\n    s.seek(0)\n    img = image.imread(s)\n\n    ax.imshow(img)\n    ax.axis(\"off\")\n    return ax\n"
  },
  {
    "path": "python-package/xgboost/py.typed",
    "content": ""
  },
  {
    "path": "python-package/xgboost/sklearn.py",
    "content": "# pylint: disable=too-many-arguments, too-many-locals, fixme, too-many-lines\n# pylint: disable=duplicate-code\n\"\"\"Scikit-Learn Wrapper interface for XGBoost.\"\"\"\n\nimport collections\nimport copy\nimport json\nimport os\nimport warnings\nfrom concurrent.futures import ThreadPoolExecutor\nfrom inspect import signature\nfrom typing import (\n    Any,\n    Callable,\n    Dict,\n    List,\n    Optional,\n    Protocol,\n    Sequence,\n    Set,\n    Tuple,\n    Type,\n    TypeVar,\n    Union,\n    cast,\n)\n\nimport numpy as np\nfrom scipy.special import softmax\n\nfrom ._c_api import _parse_version, _py_version\nfrom ._data_utils import Categories\nfrom ._typing import (\n    ArrayLike,\n    EvalsLog,\n    FeatureNames,\n    FeatureTypes,\n    IterationRange,\n    ModelIn,\n)\nfrom .callback import TrainingCallback\n\n# Do not use class names on scikit-learn directly.  Re-define the classes on\n# .compat to guarantee the behavior without scikit-learn\nfrom .compat import (\n    SKLEARN_INSTALLED,\n    XGBClassifierBase,\n    XGBModelBase,\n    XGBRegressorBase,\n    _sklearn_Tags,\n    _sklearn_version,\n    import_cupy,\n    is_dataframe,\n)\nfrom .config import config_context\nfrom .core import (\n    Booster,\n    DMatrix,\n    Metric,\n    PlainObj,\n    QuantileDMatrix,\n    XGBoostError,\n    _deprecate_positional_args,\n    _parse_eval_str,\n)\nfrom .data import (\n    CAT_T,\n    _is_cudf_df,\n    _is_cudf_ser,\n    _is_cupy_alike,\n    _is_pandas_df,\n    _is_polars_lazyframe,\n)\nfrom .training import train\n\n\nclass XGBRankerMixIn:\n    \"\"\"MixIn for ranking, defines the _estimator_type usually defined in scikit-learn\n    base classes.\n\n    \"\"\"\n\n    _estimator_type = \"ranker\"\n\n\ndef _check_rf_callback(\n    early_stopping_rounds: Optional[int],\n    callbacks: Optional[Sequence[TrainingCallback]],\n) -> None:\n    if early_stopping_rounds is not None or callbacks is not None:\n        raise NotImplementedError(\n            \"`early_stopping_rounds` and `callbacks` are not implemented for\"\n            \" the sklearn random forest estimator interface.\"\n        )\n\n\ndef _can_use_qdm(tree_method: Optional[str], device: Optional[str]) -> bool:\n    not_sycl = (device is None) or (not device.startswith(\"sycl\"))\n    return tree_method in (\"hist\", None, \"auto\") and not_sycl\n\n\nclass _SklObjWProto(Protocol):\n    def __call__(\n        self,\n        y_true: ArrayLike,\n        y_pred: ArrayLike,\n        sample_weight: Optional[ArrayLike] = None,\n    ) -> Tuple[ArrayLike, ArrayLike]: ...\n\n\n_SklObjProto = Callable[[ArrayLike, ArrayLike], Tuple[np.ndarray, np.ndarray]]\nSklObjective = Optional[Union[str, _SklObjWProto, _SklObjProto]]\n\n\ndef _objective_decorator(func: Union[_SklObjWProto, _SklObjProto]) -> PlainObj:\n    \"\"\"Decorate an objective function\n\n    Converts an objective function using the typical sklearn metrics\n    signature so that it is usable with ``xgboost.training.train``\n\n    Parameters\n    ----------\n    func:\n        Expects a callable with signature ``func(y_true, y_pred)``:\n\n        y_true: array_like of shape [n_samples]\n            The target values\n        y_pred: array_like of shape [n_samples]\n            The predicted values\n        sample_weight :\n            Optional sample weight, None or a ndarray.\n\n    Returns\n    -------\n    new_func:\n        The new objective function as expected by ``xgboost.training.train``.\n        The signature is ``new_func(preds, dmatrix)``:\n\n        preds: array_like, shape [n_samples]\n            The predicted values\n        dmatrix: ``DMatrix``\n            The training set from which the labels will be extracted using\n            ``dmatrix.get_label()``\n    \"\"\"\n\n    parameters = signature(func).parameters\n    supports_sw = \"sample_weight\" in parameters\n\n    def inner(preds: np.ndarray, dmatrix: DMatrix) -> Tuple[np.ndarray, np.ndarray]:\n        \"\"\"Internal function.\"\"\"\n        sample_weight = dmatrix.get_weight()\n        labels = dmatrix.get_label()\n\n        if sample_weight.size > 0 and not supports_sw:\n            raise ValueError(\n                \"Custom objective doesn't have the `sample_weight` parameter while\"\n                \" sample_weight is used.\"\n            )\n        if sample_weight.size > 0:\n            fnw = cast(_SklObjWProto, func)\n            return fnw(labels, preds, sample_weight=sample_weight)\n\n        fn = cast(_SklObjProto, func)\n        return fn(labels, preds)\n\n    return inner\n\n\ndef _metric_decorator(func: Callable) -> Metric:\n    \"\"\"Decorate a metric function from sklearn.\n\n    Converts an metric function that uses the typical sklearn metric signature so that\n    it is compatible with :py:func:`train`\n\n    \"\"\"\n\n    def inner(y_score: np.ndarray, dmatrix: DMatrix) -> Tuple[str, float]:\n        y_true = dmatrix.get_label()\n        weight = dmatrix.get_weight()\n        if weight.size == 0:\n            return func.__name__, func(y_true, y_score)\n        return func.__name__, func(y_true, y_score, sample_weight=weight)\n\n    return inner\n\n\ndef ltr_metric_decorator(func: Callable, n_jobs: Optional[int]) -> Metric:\n    \"\"\"Decorate a learning to rank metric.\"\"\"\n\n    def inner(y_score: np.ndarray, dmatrix: DMatrix) -> Tuple[str, float]:\n        y_true = dmatrix.get_label()\n        group_ptr = dmatrix.get_uint_info(\"group_ptr\")\n        if group_ptr.size < 2:\n            raise ValueError(\n                \"Invalid `group_ptr`. Likely caused by invalid qid or group.\"\n            )\n        scores = np.empty(group_ptr.size - 1)\n        futures = []\n        weight = dmatrix.get_group()\n        no_weight = weight.size == 0\n\n        def task(i: int) -> float:\n            begin = group_ptr[i - 1]\n            end = group_ptr[i]\n            gy = y_true[begin:end]\n            gp = y_score[begin:end]\n            if gy.size == 1:\n                # Maybe there's a better default? 1.0 because many ranking score\n                # functions have output in range [0, 1].\n                return 1.0\n            return func(gy, gp)\n\n        workers = n_jobs if n_jobs is not None else os.cpu_count()\n        with ThreadPoolExecutor(max_workers=workers) as executor:\n            for i in range(1, group_ptr.size):\n                f = executor.submit(task, i)\n                futures.append(f)\n\n            for i, f in enumerate(futures):\n                scores[i] = f.result()\n\n        if no_weight:\n            return func.__name__, scores.mean()\n\n        return func.__name__, np.average(scores, weights=weight)\n\n    return inner\n\n\n__estimator_doc = f\"\"\"\n    n_estimators : {Optional[int]}\n        Number of gradient boosted trees.  Equivalent to number of boosting\n        rounds.\n\"\"\"\n\n__model_doc = f\"\"\"\n    max_depth :  {Optional[int]}\n\n        Maximum tree depth for base learners.\n\n    max_leaves : {Optional[int]}\n\n        Maximum number of leaves; 0 indicates no limit.\n\n    max_bin : {Optional[int]}\n\n        If using histogram-based algorithm, maximum number of bins per feature\n\n    grow_policy : {Optional[str]}\n\n        Tree growing policy.\n\n        - depthwise: Favors splitting at nodes closest to the node,\n        - lossguide: Favors splitting at nodes with highest loss change.\n\n    learning_rate : {Optional[float]}\n\n        Boosting learning rate (xgb's \"eta\")\n\n    verbosity : {Optional[int]}\n\n        The degree of verbosity. Valid values are 0 (silent) - 3 (debug).\n\n    objective : {SklObjective}\n\n        Specify the learning task and the corresponding learning objective or a custom\n        objective function to be used.\n\n        For custom objective, see :doc:`/tutorials/custom_metric_obj` and\n        :ref:`custom-obj-metric` for more information, along with the end note for\n        function signatures.\n\n    booster: {Optional[str]}\n\n        Specify which booster to use: ``gbtree``, ``gblinear`` or ``dart``.\n\n        .. deprecated:: 3.3.0\n\n            ``gblinear`` is deprecated and support will be removed in a future release.\n\n    tree_method : {Optional[str]}\n\n        Specify which tree method to use.  Default to auto.  If this parameter is set to\n        default, XGBoost will choose the most conservative option available.  It's\n        recommended to study this option from the parameters document :doc:`tree method\n        </treemethod>`\n\n    n_jobs : {Optional[int]}\n\n        Number of parallel threads used to run xgboost.  When used with other\n        Scikit-Learn algorithms like grid search, you may choose which algorithm to\n        parallelize and balance the threads.  Creating thread contention will\n        significantly slow down both algorithms.\n\n    gamma : {Optional[float]}\n\n        (min_split_loss) Minimum loss reduction required to make a further partition on\n        a leaf node of the tree.\n\n    min_child_weight : {Optional[float]}\n\n        Minimum sum of instance weight(hessian) needed in a child.\n\n    max_delta_step : {Optional[float]}\n\n        Maximum delta step we allow each tree's weight estimation to be.\n\n    subsample : {Optional[float]}\n\n        Subsample ratio of the training instance.\n\n    sampling_method : {Optional[str]}\n\n        Sampling method. Used only by the GPU version of ``hist`` tree method.\n\n        - ``uniform``: Select random training instances uniformly.\n        - ``gradient_based``: Select random training instances with higher probability\n            when the gradient and hessian are larger. (cf. CatBoost)\n\n    colsample_bytree : {Optional[float]}\n\n        Subsample ratio of columns when constructing each tree.\n\n    colsample_bylevel : {Optional[float]}\n\n        Subsample ratio of columns for each level.\n\n    colsample_bynode : {Optional[float]}\n\n        Subsample ratio of columns for each split.\n\n    reg_alpha : {Optional[float]}\n\n        L1 regularization term on weights (xgb's alpha).\n\n    reg_lambda : {Optional[float]}\n\n        L2 regularization term on weights (xgb's lambda).\n\n    scale_pos_weight : {Optional[float]}\n        Balancing of positive and negative weights.\n\n    base_score : {Optional[Union[float, List[float]]]}\n\n        The initial prediction score of all instances, global bias.\n\n    random_state : {Optional[Union[np.random.RandomState, np.random.Generator, int]]}\n\n        Random number seed.\n\n        .. note::\n\n           Using gblinear booster with shotgun updater is nondeterministic as\n           it uses Hogwild algorithm.\n\n    missing : float\n\n        Value in the data which needs to be present as a missing value. Default to\n        :py:data:`numpy.nan`.\n\n    num_parallel_tree: {Optional[int]}\n\n        Used for boosting random forest.\n\n    monotone_constraints : {Optional[Union[Dict[str, int], str]]}\n\n        Constraint of variable monotonicity.  See :doc:`tutorial </tutorials/monotonic>`\n        for more information.\n\n    interaction_constraints : {Optional[Union[str, List[Tuple[str]]]]}\n\n        Constraints for interaction representing permitted interactions.  The\n        constraints must be specified in the form of a nested list, e.g. ``[[0, 1], [2,\n        3, 4]]``, where each inner list is a group of indices of features that are\n        allowed to interact with each other.  See :doc:`tutorial\n        </tutorials/feature_interaction_constraint>` for more information\n\n    importance_type: {Optional[str]}\n\n        The feature importance type for the feature_importances\\\\_ property:\n\n        * For tree model, it's either \"gain\", \"weight\", \"cover\", \"total_gain\" or\n          \"total_cover\".\n        * For linear model, only \"weight\" is defined and it's the normalized\n          coefficients without bias.\n\n    device : {Optional[str]}\n\n        .. versionadded:: 2.0.0\n\n        Device ordinal, available options are `cpu`, `cuda`, and `gpu`.\n\n    validate_parameters : {Optional[bool]}\n\n        Give warnings for unknown parameter.\n\n    enable_categorical : bool\n\n        See the same parameter of :py:class:`DMatrix` for details.\n\n    feature_types : {Optional[FeatureTypes]}\n\n        .. versionadded:: 1.7.0\n\n        Used for specifying feature types without constructing a dataframe. See\n        the :py:class:`DMatrix` for details.\n\n    feature_weights : Optional[ArrayLike]\n\n        Weight for each feature, defines the probability of each feature being selected\n        when colsample is being used.  All values must be greater than 0, otherwise a\n        `ValueError` is thrown.\n\n    max_cat_to_onehot : Optional[int]\n\n        .. versionadded:: 1.6.0\n\n        .. note:: This parameter is experimental\n\n        A threshold for deciding whether XGBoost should use one-hot encoding based split\n        for categorical data.  When number of categories is lesser than the threshold\n        then one-hot encoding is chosen, otherwise the categories will be partitioned\n        into children nodes. Also, `enable_categorical` needs to be set to have\n        categorical feature support. See :doc:`Categorical Data\n        </tutorials/categorical>` and :ref:`cat-param` for details.\n\n    max_cat_threshold : {Optional[int]}\n\n        .. versionadded:: 1.7.0\n\n        .. note:: This parameter is experimental\n\n        Maximum number of categories considered for each split. Used only by\n        partition-based splits for preventing over-fitting. Also, `enable_categorical`\n        needs to be set to have categorical feature support. See :doc:`Categorical Data\n        </tutorials/categorical>` and :ref:`cat-param` for details.\n\n    multi_strategy : {Optional[str]}\n\n        .. versionadded:: 2.0.0\n\n        .. note:: This parameter is working-in-progress.\n\n        The strategy used for training multi-target models, including multi-target\n        regression and multi-class classification. See :doc:`/tutorials/multioutput` for\n        more information.\n\n        - ``one_output_per_tree``: One model for each target.\n        - ``multi_output_tree``:  Use multi-target trees.\n\n    eval_metric : {Optional[Union[str, List[Union[str, Callable]], Callable]]}\n\n        .. versionadded:: 1.6.0\n\n        Metric used for monitoring the training result and early stopping.  It can be a\n        string or list of strings as names of predefined metric in XGBoost (See\n        :doc:`/parameter`), one of the metrics in :py:mod:`sklearn.metrics`, or any\n        other user defined metric that looks like `sklearn.metrics`.\n\n        If custom objective is also provided, then custom metric should implement the\n        corresponding reverse link function.\n\n        Unlike the `scoring` parameter commonly used in scikit-learn, when a callable\n        object is provided, it's assumed to be a cost function and by default XGBoost\n        will minimize the result during early stopping.\n\n        For advanced usage on Early stopping like directly choosing to maximize instead\n        of minimize, see :py:obj:`xgboost.callback.EarlyStopping`.\n\n        See :doc:`/tutorials/custom_metric_obj` and :ref:`custom-obj-metric` for more\n        information.\n\n        .. code-block:: python\n\n            from sklearn.datasets import load_diabetes\n            from sklearn.metrics import mean_absolute_error\n            X, y = load_diabetes(return_X_y=True)\n            reg = xgb.XGBRegressor(\n                tree_method=\"hist\",\n                eval_metric=mean_absolute_error,\n            )\n            reg.fit(X, y, eval_set=[(X, y)])\n\n    early_stopping_rounds : {Optional[int]}\n\n        .. versionadded:: 1.6.0\n\n        - Activates early stopping. Validation metric needs to improve at least once in\n          every **early_stopping_rounds** round(s) to continue training.  Requires at\n          least one item in **eval_set** in :py:meth:`fit`.\n\n        - If early stopping occurs, the model will have two additional attributes:\n          :py:attr:`best_score` and :py:attr:`best_iteration`. These are used by the\n          :py:meth:`predict` and :py:meth:`apply` methods to determine the optimal\n          number of trees during inference. If users want to access the full model\n          (including trees built after early stopping), they can specify the\n          `iteration_range` in these inference methods. In addition, other utilities\n          like model plotting can also use the entire model.\n\n        - If you prefer to discard the trees after `best_iteration`, consider using the\n          callback function :py:class:`xgboost.callback.EarlyStopping`.\n\n        - If there's more than one item in **eval_set**, the last entry will be used for\n          early stopping.  If there's more than one metric in **eval_metric**, the last\n          metric will be used for early stopping.\n\n    callbacks : {Optional[List[TrainingCallback]]}\n\n        List of callback functions that are applied at end of each iteration.\n        It is possible to use predefined callbacks by using\n        :ref:`Callback API <callback_api>`.\n\n        .. note::\n\n           States in callback are not preserved during training, which means callback\n           objects can not be reused for multiple training sessions without\n           reinitialization or deepcopy.\n\n        .. code-block:: python\n\n            for params in parameters_grid:\n                # be sure to (re)initialize the callbacks before each run\n                callbacks = [xgb.callback.LearningRateScheduler(custom_rates)]\n                reg = xgboost.XGBRegressor(**params, callbacks=callbacks)\n                reg.fit(X, y)\n\n    kwargs : {Optional[Any]}\n\n        Keyword arguments for XGBoost Booster object.  Full documentation of parameters\n        can be found :doc:`here </parameter>`.\n        Attempting to set a parameter via the constructor args and \\\\*\\\\*kwargs\n        dict simultaneously will result in a TypeError.\n\n        .. note:: \\\\*\\\\*kwargs unsupported by scikit-learn\n\n            \\\\*\\\\*kwargs is unsupported by scikit-learn.  We do not guarantee\n            that parameters passed via this argument will interact properly\n            with scikit-learn.\n\"\"\"\n\n__custom_obj_note = \"\"\"\n        .. note::  Custom objective function\n\n            A custom objective function can be provided for the ``objective``\n            parameter. In this case, it should have the signature ``objective(y_true,\n            y_pred) -> [grad, hess]`` or ``objective(y_true, y_pred, *, sample_weight)\n            -> [grad, hess]``:\n\n            y_true: array_like of shape [n_samples]\n                The target values\n            y_pred: array_like of shape [n_samples]\n                The predicted values\n            sample_weight :\n                Optional sample weights.\n\n            grad: array_like of shape [n_samples]\n                The value of the gradient for each sample point.\n            hess: array_like of shape [n_samples]\n                The value of the second derivative for each sample point\n\n            Note that, if the custom objective produces negative values for\n            the Hessian, these will be clipped. If the objective is non-convex,\n            one might also consider using the expected Hessian (Fisher\n            information) instead.\n\"\"\"\n\nTDoc = TypeVar(\"TDoc\", bound=Type)\n\n\ndef xgboost_model_doc(\n    header: str,\n    items: List[str],\n    extra_parameters: Optional[str] = None,\n    end_note: Optional[str] = None,\n) -> Callable[[TDoc], TDoc]:\n    \"\"\"Obtain documentation for Scikit-Learn wrappers\n\n    Parameters\n    ----------\n    header: str\n       An introducion to the class.\n    items : list\n       A list of common doc items.  Available items are:\n         - estimators: the meaning of n_estimators\n         - model: All the other parameters\n         - objective: note for customized objective\n    extra_parameters: str\n       Document for class specific parameters, placed at the head.\n    end_note: str\n       Extra notes put to the end.\"\"\"\n\n    def get_doc(item: str) -> str:\n        \"\"\"Return selected item\"\"\"\n        __doc = {\n            \"estimators\": __estimator_doc,\n            \"model\": __model_doc,\n            \"objective\": __custom_obj_note,\n        }\n        return __doc[item]\n\n    def adddoc(cls: TDoc) -> TDoc:\n        doc = [\n            \"\"\"\nParameters\n----------\n\"\"\"\n        ]\n        if extra_parameters:\n            doc.append(extra_parameters)\n        doc.extend([get_doc(i) for i in items])\n        if end_note:\n            doc.append(end_note)\n        full_doc = [\n            header + \"\\nSee :doc:`/python/sklearn_estimator` for more information.\\n\"\n        ]\n        full_doc.extend(doc)\n        cls.__doc__ = \"\".join(full_doc)\n        return cls\n\n    return adddoc\n\n\ndef get_model_categories(\n    X: ArrayLike,\n    model: Optional[Union[Booster, str]],\n    feature_types: Optional[FeatureTypes],\n) -> Tuple[Optional[Union[Booster, str]], Optional[Union[FeatureTypes, Categories]]]:\n    \"\"\"Extract the optional reference categories from the booster. Used for training\n    continuation. The result should be passed to the :py:func:`pick_ref_categories`.\n\n    \"\"\"\n    # Skip if it's not a dataframe as there's no new encoding to be recoded.\n    #\n    # This function helps override the `feature_types` parameter. The `feature_types`\n    # from user is not useful when input is a dataframe as the real feature type should\n    # be encoded into the DF.\n    if model is None or not is_dataframe(X):\n        return model, feature_types\n\n    if isinstance(model, str):\n        model = Booster(model_file=model)\n\n    categories = model.get_categories()\n    if not categories.empty():\n        # override the `feature_types`.\n        return model, categories\n    # Convert empty into None.\n    return model, feature_types\n\n\ndef pick_ref_categories(\n    X: Any,\n    model_cats: Optional[Union[FeatureTypes, Categories]],\n    Xy_cats: Optional[Categories],\n) -> Optional[Union[FeatureTypes, Categories]]:\n    \"\"\"Use the reference categories from the model. If none, then use the reference\n    categories from the training DMatrix.\n\n    Parameters\n    ----------\n    X :\n        Input feature matrix.\n\n    model_cats :\n        Optional categories stored in the previous model (training continuation). This\n        should come from the :py:func:`get_model_categories`.\n\n    Xy_cats :\n        Optional categories from the training DMatrix. Used for re-coding the validation\n        dataset.\n\n    \"\"\"\n    categories: Optional[Categories] = None\n    if not isinstance(model_cats, Categories) and is_dataframe(X):\n        categories = Xy_cats\n    if categories is not None and not categories.empty():\n        model_cats = categories\n\n    return model_cats\n\n\ndef _wrap_evaluation_matrices(\n    *,\n    missing: float,\n    X: Any,\n    y: Any,\n    group: Optional[Any],\n    qid: Optional[Any],\n    sample_weight: Optional[Any],\n    base_margin: Optional[Any],\n    feature_weights: Optional[ArrayLike],\n    eval_set: Optional[Sequence[Tuple[Any, Any]]],\n    sample_weight_eval_set: Optional[Sequence[Any]],\n    base_margin_eval_set: Optional[Sequence[Any]],\n    eval_group: Optional[Sequence[Any]],\n    eval_qid: Optional[Sequence[Any]],\n    create_dmatrix: Callable,\n    enable_categorical: bool,\n    feature_types: Optional[Union[FeatureTypes, Categories]],\n) -> Tuple[Any, List[Tuple[Any, str]]]:\n    \"\"\"Convert array_like evaluation matrices into DMatrix. Perform sanity checks on the\n    way.\n\n    \"\"\"\n    # Feature_types contains the optional reference categories from the booster object.\n    train_dmatrix = create_dmatrix(\n        data=X,\n        label=y,\n        group=group,\n        qid=qid,\n        weight=sample_weight,\n        base_margin=base_margin,\n        feature_weights=feature_weights,\n        missing=missing,\n        enable_categorical=enable_categorical,\n        feature_types=feature_types,\n        ref=None,\n    )\n\n    n_validation = 0 if eval_set is None else len(eval_set)\n    if hasattr(train_dmatrix, \"get_categories\"):\n        Xy_cats = train_dmatrix.get_categories()\n    else:\n        Xy_cats = None\n\n    def validate_or_none(meta: Optional[Sequence], name: str) -> Sequence:\n        if meta is None:\n            return [None] * n_validation\n        if len(meta) != n_validation:\n            raise ValueError(\n                f\"{name}'s length does not equal `eval_set`'s length, \"\n                + f\"expecting {n_validation}, got {len(meta)}\"\n            )\n        return meta\n\n    if eval_set is not None:\n        sample_weight_eval_set = validate_or_none(\n            sample_weight_eval_set, \"sample_weight_eval_set\"\n        )\n        base_margin_eval_set = validate_or_none(\n            base_margin_eval_set, \"base_margin_eval_set\"\n        )\n        eval_group = validate_or_none(eval_group, \"eval_group\")\n        eval_qid = validate_or_none(eval_qid, \"eval_qid\")\n\n        evals = []\n        for i, (valid_X, valid_y) in enumerate(eval_set):\n            # Skip the entry if it's the training DMatrix.\n            if all(\n                (\n                    valid_X is X,\n                    valid_y is y,\n                    sample_weight_eval_set[i] is sample_weight,\n                    base_margin_eval_set[i] is base_margin,\n                    eval_group[i] is group,\n                    eval_qid[i] is qid,\n                )\n            ):\n                evals.append(train_dmatrix)\n                continue\n\n            feature_types = pick_ref_categories(valid_X, feature_types, Xy_cats)\n            m = create_dmatrix(\n                data=valid_X,\n                label=valid_y,\n                weight=sample_weight_eval_set[i],\n                group=eval_group[i],\n                qid=eval_qid[i],\n                base_margin=base_margin_eval_set[i],\n                missing=missing,\n                enable_categorical=enable_categorical,\n                feature_types=feature_types,\n                ref=train_dmatrix,\n            )\n            evals.append(m)\n\n        nevals = len(evals)\n        eval_names = [f\"validation_{i}\" for i in range(nevals)]\n        evals = list(zip(evals, eval_names))\n    else:\n        if any(\n            meta is not None\n            for meta in [\n                sample_weight_eval_set,\n                base_margin_eval_set,\n                eval_group,\n                eval_qid,\n            ]\n        ):\n            raise ValueError(\n                \"`eval_set` is not set but one of the other evaluation meta info is \"\n                \"not None.\"\n            )\n        evals = []\n\n    return train_dmatrix, evals\n\n\nDEFAULT_N_ESTIMATORS = 100\n\n\n@xgboost_model_doc(\n    \"\"\"Implementation of the Scikit-Learn API for XGBoost.\"\"\",\n    [\"estimators\", \"model\", \"objective\"],\n)\nclass XGBModel(XGBModelBase):\n    # pylint: disable=too-many-arguments, too-many-instance-attributes, missing-docstring\n    @_deprecate_positional_args\n    def __init__(\n        self,\n        *,\n        max_depth: Optional[int] = None,\n        max_leaves: Optional[int] = None,\n        max_bin: Optional[int] = None,\n        grow_policy: Optional[str] = None,\n        learning_rate: Optional[float] = None,\n        n_estimators: Optional[int] = None,\n        verbosity: Optional[int] = None,\n        objective: SklObjective = None,\n        booster: Optional[str] = None,\n        tree_method: Optional[str] = None,\n        n_jobs: Optional[int] = None,\n        gamma: Optional[float] = None,\n        min_child_weight: Optional[float] = None,\n        max_delta_step: Optional[float] = None,\n        subsample: Optional[float] = None,\n        sampling_method: Optional[str] = None,\n        colsample_bytree: Optional[float] = None,\n        colsample_bylevel: Optional[float] = None,\n        colsample_bynode: Optional[float] = None,\n        reg_alpha: Optional[float] = None,\n        reg_lambda: Optional[float] = None,\n        scale_pos_weight: Optional[float] = None,\n        base_score: Optional[Union[float, List[float]]] = None,\n        random_state: Optional[\n            Union[np.random.RandomState, np.random.Generator, int]\n        ] = None,\n        missing: float = np.nan,\n        num_parallel_tree: Optional[int] = None,\n        monotone_constraints: Optional[Union[Dict[str, int], str]] = None,\n        interaction_constraints: Optional[Union[str, Sequence[Sequence[str]]]] = None,\n        importance_type: Optional[str] = None,\n        device: Optional[str] = None,\n        validate_parameters: Optional[bool] = None,\n        enable_categorical: bool = True,\n        feature_types: Optional[FeatureTypes] = None,\n        feature_weights: Optional[ArrayLike] = None,\n        max_cat_to_onehot: Optional[int] = None,\n        max_cat_threshold: Optional[int] = None,\n        multi_strategy: Optional[str] = None,\n        eval_metric: Optional[Union[str, List[Union[str, Callable]], Callable]] = None,\n        early_stopping_rounds: Optional[int] = None,\n        callbacks: Optional[List[TrainingCallback]] = None,\n        **kwargs: Any,\n    ) -> None:\n        if not SKLEARN_INSTALLED:\n            raise ImportError(\n                \"sklearn needs to be installed in order to use this module\"\n            )\n        self.n_estimators = n_estimators\n        self.objective = objective\n\n        self.max_depth = max_depth\n        self.max_leaves = max_leaves\n        self.max_bin = max_bin\n        self.grow_policy = grow_policy\n        self.learning_rate = learning_rate\n        self.verbosity = verbosity\n        self.booster = booster\n        self.tree_method = tree_method\n        self.gamma = gamma\n        self.min_child_weight = min_child_weight\n        self.max_delta_step = max_delta_step\n        self.subsample = subsample\n        self.sampling_method = sampling_method\n        self.colsample_bytree = colsample_bytree\n        self.colsample_bylevel = colsample_bylevel\n        self.colsample_bynode = colsample_bynode\n        self.reg_alpha = reg_alpha\n        self.reg_lambda = reg_lambda\n        self.scale_pos_weight = scale_pos_weight\n        self.base_score = base_score\n        self.missing = missing\n        self.num_parallel_tree = num_parallel_tree\n        self.random_state = random_state\n        self.n_jobs = n_jobs\n        self.monotone_constraints = monotone_constraints\n        self.interaction_constraints = interaction_constraints\n        self.importance_type = importance_type\n        self.device = device\n        self.validate_parameters = validate_parameters\n        self.enable_categorical = enable_categorical\n        self.feature_types = feature_types\n        if isinstance(self.feature_types, Categories):\n            raise TypeError(\n                \"If you are training with a prior model (training continuation), \"\n                \"The scikit-learn interface can automatically reuse the categories from\"\n                \" that model.\"\n            )\n        self.feature_weights = feature_weights\n        self.max_cat_to_onehot = max_cat_to_onehot\n        self.max_cat_threshold = max_cat_threshold\n        self.multi_strategy = multi_strategy\n        self.eval_metric = eval_metric\n        self.early_stopping_rounds = early_stopping_rounds\n        self.callbacks = callbacks\n        if kwargs:\n            self.kwargs = kwargs\n\n    def _more_tags(self) -> Dict[str, bool]:\n        \"\"\"Tags used for scikit-learn data validation.\"\"\"\n        tags = {\"allow_nan\": True, \"no_validation\": True, \"sparse\": True}\n        if hasattr(self, \"kwargs\") and self.kwargs.get(\"updater\") == \"shotgun\":\n            tags[\"non_deterministic\"] = True\n\n        tags[\"categorical\"] = self.enable_categorical\n        tags[\"string\"] = self.enable_categorical\n        return tags\n\n    @staticmethod\n    def _update_sklearn_tags_from_dict(\n        *,\n        tags: _sklearn_Tags,\n        tags_dict: Dict[str, bool],\n    ) -> _sklearn_Tags:\n        \"\"\"Update ``sklearn.utils.Tags`` inherited from ``scikit-learn`` base classes.\n\n        ``scikit-learn`` 1.6 introduced a dataclass-based interface for estimator tags.\n        ref: https://github.com/scikit-learn/scikit-learn/pull/29677\n\n        This method handles updating that instance based on the values in\n        ``self._more_tags()``.\n\n        \"\"\"\n        tags.non_deterministic = tags_dict.get(\"non_deterministic\", False)\n        tags.no_validation = tags_dict[\"no_validation\"]\n        tags.input_tags.allow_nan = tags_dict[\"allow_nan\"]\n        tags.input_tags.sparse = tags_dict[\"sparse\"]\n        tags.input_tags.categorical = tags_dict[\"categorical\"]\n        return tags\n\n    def __sklearn_tags__(self) -> _sklearn_Tags:\n        # XGBModelBase.__sklearn_tags__() cannot be called unconditionally,\n        # because that method isn't defined for scikit-learn<1.6\n        if not hasattr(XGBModelBase, \"__sklearn_tags__\"):\n            err_msg = (\n                \"__sklearn_tags__() should not be called when using scikit-learn<1.6. \"\n                f\"Detected version: {_sklearn_version}\"\n            )\n            raise AttributeError(err_msg)\n\n        # take whatever tags are provided by BaseEstimator, then modify\n        # them with XGBoost-specific values\n        return self._update_sklearn_tags_from_dict(\n            tags=super().__sklearn_tags__(),  # pylint: disable=no-member\n            tags_dict=self._more_tags(),\n        )\n\n    def __sklearn_is_fitted__(self) -> bool:\n        return hasattr(self, \"_Booster\")\n\n    @property\n    def _doc_link_module(self) -> str:\n        return \"xgboost\"\n\n    @property\n    def _doc_link_template(self) -> str:\n        ver = _py_version()\n        (major, minor, _), post = _parse_version(ver)\n\n        if post == \"dev\":\n            rel = \"latest\"\n        else:\n            # RTD tracks the release branch. We don't have independent branches for\n            # patch releases.\n            rel = f\"release_{major}.{minor}.0\"\n\n        module = self.__class__.__module__\n        # All sklearn estimators are forwarded to the top level module in both source\n        # code and sphinx api doc.\n        if module == \"xgboost.sklearn\":\n            module = module.split(\".\")[0]\n        name = self.__class__.__name__\n\n        base = \"https://xgboost.readthedocs.io/en\"\n        return f\"{base}/{rel}/python/python_api.html#{module}.{name}\"\n\n    def _wrapper_params(self) -> Set[str]:\n        wrapper_specific = {\n            \"importance_type\",\n            \"kwargs\",\n            \"missing\",\n            \"n_estimators\",\n            \"enable_categorical\",\n            \"early_stopping_rounds\",\n            \"callbacks\",\n            \"feature_types\",\n            \"feature_weights\",\n        }\n        return wrapper_specific\n\n    def get_booster(self) -> Booster:\n        \"\"\"Get the underlying xgboost Booster of this model.\n\n        This will raise an exception when fit was not called\n\n        Returns\n        -------\n        booster : a xgboost booster of underlying model\n        \"\"\"\n        if not self.__sklearn_is_fitted__():\n            from sklearn.exceptions import NotFittedError\n\n            raise NotFittedError(\"need to call fit or load_model beforehand\")\n        return self._Booster\n\n    def set_params(self, **params: Any) -> \"XGBModel\":\n        \"\"\"Set the parameters of this estimator.  Modification of the sklearn method to\n        allow unknown kwargs. This allows using the full range of xgboost\n        parameters that are not defined as member variables in sklearn grid\n        search.\n\n        Returns\n        -------\n        self\n\n        \"\"\"\n        if not params:\n            # Simple optimization to gain speed (inspect is slow)\n            return self\n\n        # this concatenates kwargs into parameters, enabling `get_params` for\n        # obtaining parameters from keyword parameters.\n        for key, value in params.items():\n            if hasattr(self, key):\n                setattr(self, key, value)\n            else:\n                if not hasattr(self, \"kwargs\"):\n                    self.kwargs = {}\n                self.kwargs[key] = value\n\n        if self.__sklearn_is_fitted__():\n            parameters = self.get_xgb_params()\n            self.get_booster().set_param(parameters)\n\n        return self\n\n    def get_params(self, deep: bool = True) -> Dict[str, Any]:\n        # pylint: disable=attribute-defined-outside-init\n        \"\"\"Get parameters.\"\"\"\n        # Based on: https://stackoverflow.com/questions/59248211\n        # The basic flow in `get_params` is:\n        # 0. Return parameters in subclass (self.__class__) first, by using inspect.\n        # 1. Return parameters in all parent classes (especially `XGBModel`).\n        # 2. Return whatever in `**kwargs`.\n        # 3. Merge them.\n        #\n        # This needs to accommodate being called recursively in the following\n        # inheritance graphs (and similar for classification and ranking):\n        #\n        #   XGBRFRegressor -> XGBRegressor -> XGBModel -> BaseEstimator\n        #                     XGBRegressor -> XGBModel -> BaseEstimator\n        #                                     XGBModel -> BaseEstimator\n        #\n        params = super().get_params(deep)  # pylint: disable=no-member\n        cp = copy.copy(self)\n        # If the immediate parent defines get_params(), use that.\n        if callable(getattr(cp.__class__.__bases__[0], \"get_params\", None)):\n            cp.__class__ = cp.__class__.__bases__[0]\n        # Otherwise, skip it and assume the next class will have it.\n        # This is here primarily for cases where the first class in MRO is a scikit-learn mixin.\n        else:\n            cp.__class__ = cp.__class__.__bases__[1]\n        params.update(cp.__class__.get_params(cp, deep))\n        # if kwargs is a dict, update params accordingly\n        if hasattr(self, \"kwargs\") and isinstance(self.kwargs, dict):\n            params.update(self.kwargs)\n        if isinstance(params[\"random_state\"], np.random.RandomState):\n            params[\"random_state\"] = params[\"random_state\"].randint(\n                np.iinfo(np.int32).max\n            )\n        elif isinstance(params[\"random_state\"], np.random.Generator):\n            params[\"random_state\"] = int(\n                params[\"random_state\"].integers(np.iinfo(np.int32).max)\n            )\n\n        return params\n\n    def get_xgb_params(self) -> Dict[str, Any]:\n        \"\"\"Get xgboost specific parameters.\"\"\"\n        params: Dict[str, Any] = self.get_params()\n\n        # Parameters that should not go into native learner.\n        wrapper_specific = self._wrapper_params()\n        filtered = {}\n        for k, v in params.items():\n            if k not in wrapper_specific and not callable(v):\n                filtered[k] = v\n\n        return filtered\n\n    def get_num_boosting_rounds(self) -> int:\n        \"\"\"Gets the number of xgboost boosting rounds.\"\"\"\n        return DEFAULT_N_ESTIMATORS if self.n_estimators is None else self.n_estimators\n\n    def _get_type(self) -> str:\n        if hasattr(self, \"_estimator_type\"):  # scikit-learn <1.8\n            return self._estimator_type  # pylint: disable=no-member\n        if hasattr(XGBModelBase, \"__sklearn_tags__\"):  # scikit-learn 1.8+\n            return self.__sklearn_tags__().estimator_type\n        raise TypeError(\n            \"`_estimator_type` undefined.  \"\n            \"Please use appropriate mixin to define estimator type.\"\n        )\n\n    def save_model(self, fname: Union[str, os.PathLike]) -> None:\n        meta: Dict[str, Any] = {}\n        # For validation.\n        meta[\"_estimator_type\"] = self._get_type()\n        meta_str = json.dumps(meta)\n        self.get_booster().set_attr(scikit_learn=meta_str)\n        self.get_booster().save_model(fname)\n        self.get_booster().set_attr(scikit_learn=None)\n\n    if Booster.save_model.__doc__ is not None:\n        save_model.__doc__ = f\"\"\"{Booster.save_model.__doc__}\"\"\"\n\n    def load_model(self, fname: ModelIn) -> None:\n        # pylint: disable=attribute-defined-outside-init\n        if not self.__sklearn_is_fitted__():\n            self._Booster = Booster({\"n_jobs\": self.n_jobs})\n        self.get_booster().load_model(fname)\n\n        meta_str = self.get_booster().attr(\"scikit_learn\")\n        if meta_str is not None:\n            meta = json.loads(meta_str)\n            t = meta.get(\"_estimator_type\", None)\n            if t is not None and t != self._get_type():\n                raise TypeError(\n                    \"Loading an estimator with different type. Expecting: \"\n                    f\"{self._get_type()}, got: {t}\"\n                )\n\n        self.get_booster().set_attr(scikit_learn=None)\n        config = json.loads(self.get_booster().save_config())\n        self._load_model_attributes(config)\n\n    if Booster.load_model.__doc__ is not None:\n        load_model.__doc__ = f\"\"\"{Booster.load_model.__doc__}\"\"\"\n\n    def _load_model_attributes(self, config: dict) -> None:\n        \"\"\"Load model attributes without hyper-parameters.\"\"\"\n        from sklearn.base import is_classifier\n\n        booster = self.get_booster()\n\n        self.objective = config[\"learner\"][\"objective\"][\"name\"]\n        self.booster = config[\"learner\"][\"gradient_booster\"][\"name\"]\n        self.base_score = json.loads(\n            config[\"learner\"][\"learner_model_param\"][\"base_score\"]\n        )\n        self.feature_types = booster.feature_types\n        self.enable_categorical = self.feature_types is not None and any(\n            ft == CAT_T for ft in self.feature_types\n        )\n\n        if is_classifier(self):\n            self.n_classes_ = int(config[\"learner\"][\"learner_model_param\"][\"num_class\"])\n            # binary classification is treated as regression in XGBoost.\n            self.n_classes_ = 2 if self.n_classes_ < 2 else self.n_classes_\n\n    # pylint: disable=too-many-branches\n    def _configure_fit(\n        self,\n        booster: Optional[Union[Booster, \"XGBModel\", str]],\n        params: Dict[str, Any],\n        feature_weights: Optional[ArrayLike],\n    ) -> Tuple[\n        Optional[Union[Booster, str, \"XGBModel\"]],\n        Optional[Metric],\n        Dict[str, Any],\n        Optional[ArrayLike],\n    ]:\n        \"\"\"Configure parameters for :py:meth:`fit`.\"\"\"\n        if isinstance(booster, XGBModel):\n            model: Optional[Union[Booster, str]] = booster.get_booster()\n        else:\n            model = booster\n\n        def _deprecated(parameter: str) -> None:\n            warnings.warn(\n                f\"`{parameter}` in `fit` method is deprecated for better compatibility \"\n                f\"with scikit-learn, use `{parameter}` in constructor or`set_params` \"\n                \"instead.\",\n                UserWarning,\n            )\n\n        def _duplicated(parameter: str) -> None:\n            raise ValueError(\n                f\"2 different `{parameter}` are provided.  Use the one in constructor \"\n                \"or `set_params` instead.\"\n            )\n\n        # - configure callable evaluation metric\n        metric: Optional[Metric] = None\n\n        def custom_metric(m: Callable) -> Metric:\n            if self._get_type() == \"ranker\":\n                wrapped = ltr_metric_decorator(m, self.n_jobs)\n            else:\n                wrapped = _metric_decorator(m)\n            return wrapped\n\n        def invalid_type(m: Any) -> None:\n            msg = f\"Invalid type for the `eval_metric`: {type(m)}\"\n            raise TypeError(msg)\n\n        if self.eval_metric is not None:\n            if callable(self.eval_metric):\n                metric = custom_metric(self.eval_metric)\n            elif isinstance(self.eval_metric, str):\n                params.update({\"eval_metric\": self.eval_metric})\n            else:\n                # A sequence of metrics\n                if not isinstance(self.eval_metric, collections.abc.Sequence):\n                    invalid_type(self.eval_metric)\n                # Could be a list of strings or callables\n                builtin_metrics: List[str] = []\n                for m in self.eval_metric:\n                    if callable(m):\n                        if metric is not None:\n                            raise NotImplementedError(\n                                \"Using multiple custom metrics is not yet supported.\"\n                            )\n                        metric = custom_metric(m)\n                    elif isinstance(m, str):\n                        builtin_metrics.append(m)\n                    else:\n                        invalid_type(m)\n                if builtin_metrics:\n                    params.update({\"eval_metric\": builtin_metrics})\n\n        if feature_weights is not None:\n            _deprecated(\"feature_weights\")\n        if feature_weights is not None and self.feature_weights is not None:\n            _duplicated(\"feature_weights\")\n        feature_weights = (\n            self.feature_weights\n            if self.feature_weights is not None\n            else feature_weights\n        )\n        return model, metric, params, feature_weights\n\n    def _create_dmatrix(self, ref: Optional[DMatrix], **kwargs: Any) -> DMatrix:\n        # Use `QuantileDMatrix` to save memory.\n        if _can_use_qdm(self.tree_method, self.device) and self.booster != \"gblinear\":\n            try:\n                return QuantileDMatrix(\n                    **kwargs, ref=ref, nthread=self.n_jobs, max_bin=self.max_bin\n                )\n            except TypeError:  # `QuantileDMatrix` supports lesser types than DMatrix\n                pass\n        return DMatrix(**kwargs, nthread=self.n_jobs)\n\n    def _set_evaluation_result(self, evals_result: EvalsLog) -> None:\n        if evals_result:\n            self.evals_result_ = cast(Dict[str, Dict[str, List[float]]], evals_result)\n\n    @_deprecate_positional_args\n    def fit(\n        self,\n        X: ArrayLike,\n        y: ArrayLike,\n        *,\n        sample_weight: Optional[ArrayLike] = None,\n        base_margin: Optional[ArrayLike] = None,\n        eval_set: Optional[Sequence[Tuple[ArrayLike, ArrayLike]]] = None,\n        verbose: Optional[Union[bool, int]] = True,\n        xgb_model: Optional[Union[Booster, str, \"XGBModel\"]] = None,\n        sample_weight_eval_set: Optional[Sequence[ArrayLike]] = None,\n        base_margin_eval_set: Optional[Sequence[ArrayLike]] = None,\n        feature_weights: Optional[ArrayLike] = None,\n    ) -> \"XGBModel\":\n        # pylint: disable=attribute-defined-outside-init\n        \"\"\"Fit gradient boosting model.\n\n        Note that calling ``fit()`` multiple times will cause the model object to be\n        re-fit from scratch. To resume training from a previous checkpoint, explicitly\n        pass ``xgb_model`` argument.\n\n        Parameters\n        ----------\n        X :\n            Input feature matrix. See :ref:`py-data` for a list of supported types.\n\n            When the ``tree_method`` is set to ``hist``, internally, the\n            :py:class:`QuantileDMatrix` will be used instead of the :py:class:`DMatrix`\n            for conserving memory. However, this has performance implications when the\n            device of input data is not matched with algorithm. For instance, if the\n            input is a numpy array on CPU but ``cuda`` is used for training, then the\n            data is first processed on CPU then transferred to GPU.\n        y :\n            Labels\n        sample_weight :\n            instance weights\n        base_margin :\n            Global bias for each instance. See :doc:`/tutorials/intercept` for details.\n        eval_set :\n            A list of (X, y) tuple pairs to use as validation sets, for which\n            metrics will be computed.\n            Validation metrics will help us track the performance of the model.\n\n        verbose :\n            If `verbose` is True and an evaluation set is used, the evaluation metric\n            measured on the validation set is printed to stdout at each boosting stage.\n            If `verbose` is an integer, the evaluation metric is printed at each\n            `verbose` boosting stage. The last boosting stage / the boosting stage found\n            by using `early_stopping_rounds` is also printed.\n        xgb_model :\n            file name of stored XGBoost model or 'Booster' instance XGBoost model to be\n            loaded before training (allows training continuation).\n        sample_weight_eval_set :\n            A list of the form [L_1, L_2, ..., L_n], where each L_i is an array like\n            object storing instance weights for the i-th validation set.\n        base_margin_eval_set :\n            A list of the form [M_1, M_2, ..., M_n], where each M_i is an array like\n            object storing base margin for the i-th validation set.\n        feature_weights :\n\n            .. deprecated:: 3.0.0\n\n            Use `feature_weights` in :py:meth:`__init__` or :py:meth:`set_params`\n            instead.\n\n        \"\"\"\n        with config_context(verbosity=self.verbosity):\n            params = self.get_xgb_params()\n            model, metric, params, feature_weights = self._configure_fit(\n                xgb_model, params, feature_weights\n            )\n            model, feature_types = get_model_categories(X, model, self.feature_types)\n\n            evals_result: EvalsLog = {}\n            train_dmatrix, evals = _wrap_evaluation_matrices(\n                missing=self.missing,\n                X=X,\n                y=y,\n                group=None,\n                qid=None,\n                sample_weight=sample_weight,\n                base_margin=base_margin,\n                feature_weights=feature_weights,\n                eval_set=eval_set,\n                sample_weight_eval_set=sample_weight_eval_set,\n                base_margin_eval_set=base_margin_eval_set,\n                eval_group=None,\n                eval_qid=None,\n                create_dmatrix=self._create_dmatrix,\n                enable_categorical=self.enable_categorical,\n                feature_types=feature_types,\n            )\n\n            if callable(self.objective):\n                obj: Optional[PlainObj] = _objective_decorator(self.objective)\n                params[\"objective\"] = \"reg:squarederror\"\n            else:\n                obj = None\n\n            self._Booster = train(\n                params,\n                train_dmatrix,\n                self.get_num_boosting_rounds(),\n                evals=evals,\n                early_stopping_rounds=self.early_stopping_rounds,\n                evals_result=evals_result,\n                obj=obj,\n                custom_metric=metric,\n                verbose_eval=verbose,\n                xgb_model=model,\n                callbacks=self.callbacks,\n            )\n\n            self._set_evaluation_result(evals_result)\n            return self\n\n    def _can_use_inplace_predict(self) -> bool:\n        return self.booster != \"gblinear\"\n\n    def _get_iteration_range(\n        self, iteration_range: Optional[IterationRange]\n    ) -> IterationRange:\n        if iteration_range is None or iteration_range[1] == 0:\n            # Use best_iteration if defined.\n            try:\n                iteration_range = (0, self.best_iteration + 1)\n            except AttributeError:\n                iteration_range = (0, 0)\n        if self.booster == \"gblinear\":\n            iteration_range = (0, 0)\n        return iteration_range\n\n    @_deprecate_positional_args\n    def predict(\n        self,\n        X: ArrayLike,\n        *,\n        output_margin: bool = False,\n        validate_features: bool = True,\n        base_margin: Optional[ArrayLike] = None,\n        iteration_range: Optional[IterationRange] = None,\n    ) -> ArrayLike:\n        \"\"\"Predict with `X`.  If the model is trained with early stopping, then\n        :py:attr:`best_iteration` is used automatically. The estimator uses\n        `inplace_predict` by default and falls back to using :py:class:`DMatrix` if\n        devices between the data and the estimator don't match.\n\n        .. note:: This function is only thread safe for `gbtree` and `dart`.\n\n        Parameters\n        ----------\n        X :\n            Data to predict with. See :ref:`py-data` for a list of supported types.\n        output_margin :\n            Whether to output the raw untransformed margin value.\n        validate_features :\n            When this is True, validate that the Booster's and data's feature_names are\n            identical.  Otherwise, it is assumed that the feature_names are the same.\n        base_margin :\n            Global bias for each instance. See :doc:`/tutorials/intercept` for details.\n        iteration_range :\n            Specifies which layer of trees are used in prediction.  For example, if a\n            random forest is trained with 100 rounds.  Specifying ``iteration_range=(10,\n            20)``, then only the forests built during [10, 20) (half open set) rounds\n            are used in this prediction.\n\n            .. versionadded:: 1.4.0\n\n        Returns\n        -------\n        prediction\n\n        \"\"\"\n        with config_context(verbosity=self.verbosity):\n            iteration_range = self._get_iteration_range(iteration_range)\n            if self._can_use_inplace_predict():\n                try:\n                    predts = self.get_booster().inplace_predict(\n                        data=X,\n                        iteration_range=iteration_range,\n                        predict_type=\"margin\" if output_margin else \"value\",\n                        missing=self.missing,\n                        base_margin=base_margin,\n                        validate_features=validate_features,\n                    )\n                    if _is_cupy_alike(predts):\n                        cp = import_cupy()\n\n                        predts = cp.asnumpy(predts)  # ensure numpy array is used.\n                    return predts\n                except TypeError:\n                    # coo, csc, dt\n                    pass\n\n            test = DMatrix(\n                X,\n                base_margin=base_margin,\n                missing=self.missing,\n                nthread=self.n_jobs,\n                feature_types=self.feature_types,\n                enable_categorical=self.enable_categorical,\n            )\n            return self.get_booster().predict(\n                data=test,\n                iteration_range=iteration_range,\n                output_margin=output_margin,\n                validate_features=validate_features,\n            )\n\n    def apply(\n        self,\n        X: ArrayLike,\n        iteration_range: Optional[IterationRange] = None,\n    ) -> np.ndarray:\n        \"\"\"Return the predicted leaf every tree for each sample. If the model is trained\n        with early stopping, then :py:attr:`best_iteration` is used automatically.\n\n        Parameters\n        ----------\n        X :\n            Input features matrix. See :ref:`py-data` for a list of supported types.\n\n        iteration_range :\n            See :py:meth:`predict`.\n\n        Returns\n        -------\n        X_leaves : array_like, shape=[n_samples, n_trees]\n            For each datapoint x in X and for each tree, return the index of the\n            leaf x ends up in. Leaves are numbered within\n            ``[0; 2**(self.max_depth+1))``, possibly with gaps in the numbering.\n\n        \"\"\"\n        with config_context(verbosity=self.verbosity):\n            iteration_range = self._get_iteration_range(iteration_range)\n            test_dmatrix = DMatrix(\n                X,\n                missing=self.missing,\n                feature_types=self.feature_types,\n                nthread=self.n_jobs,\n                enable_categorical=self.enable_categorical,\n            )\n            return self.get_booster().predict(\n                test_dmatrix, pred_leaf=True, iteration_range=iteration_range\n            )\n\n    def evals_result(self) -> Dict[str, Dict[str, List[float]]]:\n        \"\"\"Return the evaluation results.\n\n        If **eval_set** is passed to the :py:meth:`fit` function, you can call\n        ``evals_result()`` to get evaluation results for all passed **eval_sets**.  When\n        **eval_metric** is also passed to the :py:meth:`fit` function, the\n        **evals_result** will contain the **eval_metrics** passed to the :py:meth:`fit`\n        function.\n\n        The returned evaluation result is a dictionary:\n\n        .. code-block:: python\n\n            {'validation_0': {'logloss': ['0.604835', '0.531479']},\n             'validation_1': {'logloss': ['0.41965', '0.17686']}}\n\n        Returns\n        -------\n        evals_result\n\n        \"\"\"\n        if getattr(self, \"evals_result_\", None) is not None:\n            evals_result = self.evals_result_\n        else:\n            raise XGBoostError(\n                \"No evaluation result, `eval_set` is not used during training.\"\n            )\n\n        return evals_result\n\n    @property\n    def n_features_in_(self) -> int:\n        \"\"\"Number of features seen during :py:meth:`fit`.\"\"\"\n        booster = self.get_booster()\n        return booster.num_features()\n\n    @property\n    def feature_names_in_(self) -> np.ndarray:\n        \"\"\"Names of features seen during :py:meth:`fit`.  Defined only when `X` has\n        feature names that are all strings.\n\n        \"\"\"\n        feature_names = self.get_booster().feature_names\n        if feature_names is None:\n            raise AttributeError(\n                \"`feature_names_in_` is defined only when `X` has feature names that \"\n                \"are all strings.\"\n            )\n        return np.array(feature_names)\n\n    @property\n    def best_score(self) -> float:\n        \"\"\"The best score obtained by early stopping.\"\"\"\n        return self.get_booster().best_score\n\n    @property\n    def best_iteration(self) -> int:\n        \"\"\"The best iteration obtained by early stopping.  This attribute is 0-based,\n        for instance if the best iteration is the first round, then best_iteration is 0.\n\n        \"\"\"\n        return self.get_booster().best_iteration\n\n    @property\n    def feature_importances_(self) -> np.ndarray:\n        \"\"\"Feature importances property, return depends on `importance_type`\n        parameter. When model trained with multi-class/multi-label/multi-target dataset,\n        the feature importance is \"averaged\" over all targets. The \"average\" is defined\n        based on the importance type. For instance, if the importance type is\n        \"total_gain\", then the score is sum of loss change for each split from all\n        trees.\n\n        Returns\n        -------\n        feature_importances_ : array of shape ``[n_features]`` except for multi-class\n        linear model, which returns an array with shape `(n_features, n_classes)`\n\n        \"\"\"\n        b: Booster = self.get_booster()\n\n        def dft() -> str:\n            return \"weight\" if self.booster == \"gblinear\" else \"gain\"\n\n        score = b.get_score(\n            importance_type=self.importance_type if self.importance_type else dft()\n        )\n        if b.feature_names is None:\n            feature_names: FeatureNames = [f\"f{i}\" for i in range(self.n_features_in_)]\n        else:\n            feature_names = b.feature_names\n        # gblinear returns all features so the `get` in next line is only for gbtree.\n        all_features = [score.get(f, 0.0) for f in feature_names]\n        all_features_arr = np.array(all_features, dtype=np.float32)\n        total = all_features_arr.sum()\n        if total == 0:\n            return all_features_arr\n        return all_features_arr / total\n\n    @property\n    def coef_(self) -> np.ndarray:\n        \"\"\"\n        Coefficients property\n\n        .. note:: Coefficients are defined only for linear learners\n\n            Coefficients are only defined when the linear model is chosen as\n            base learner (`booster=gblinear`). It is not defined for other base\n            learner types, such as tree learners (`booster=gbtree`).\n\n        Returns\n        -------\n        coef_ : array of shape ``[n_features]`` or ``[n_classes, n_features]``\n        \"\"\"\n        if self.get_xgb_params()[\"booster\"] != \"gblinear\":\n            raise AttributeError(\n                f\"Coefficients are not defined for Booster type {self.booster}\"\n            )\n        b = self.get_booster()\n        coef = np.array(json.loads(b.get_dump(dump_format=\"json\")[0])[\"weight\"])\n        # Logic for multiclass classification\n        n_classes = getattr(self, \"n_classes_\", None)\n        if n_classes is not None:\n            if n_classes > 2:\n                assert len(coef.shape) == 1\n                assert coef.shape[0] % n_classes == 0\n                coef = coef.reshape((n_classes, -1))\n        return coef\n\n    @property\n    def intercept_(self) -> np.ndarray:\n        \"\"\"Intercept (bias) property\n\n        For tree-based model, the returned value is the `base_score`.\n\n        Returns\n        -------\n        intercept_ : array of shape ``(1,)`` or ``[n_classes]``\n\n        \"\"\"\n        booster_config = self.get_xgb_params()[\"booster\"]\n        b = self.get_booster()\n        if booster_config != \"gblinear\":  # gbtree, dart\n            config = json.loads(b.save_config())\n            intercept = json.loads(\n                config[\"learner\"][\"learner_model_param\"][\"base_score\"]\n            )\n            return np.array(intercept, dtype=np.float32)\n\n        return np.array(\n            json.loads(b.get_dump(dump_format=\"json\")[0])[\"bias\"], dtype=np.float32\n        )\n\n\nPredtT = TypeVar(\"PredtT\", bound=np.ndarray)\n\n\ndef _cls_predict_proba(n_classes: int, prediction: PredtT, vstack: Callable) -> PredtT:\n    assert len(prediction.shape) <= 2\n    if len(prediction.shape) == 2 and prediction.shape[1] == n_classes:\n        # multi-class\n        return prediction\n    if (\n        len(prediction.shape) == 2\n        and n_classes == 2\n        and prediction.shape[1] >= n_classes\n    ):\n        # multi-label\n        return prediction\n    # binary logistic function\n    classone_probs = prediction\n    classzero_probs = 1.0 - classone_probs\n    return vstack((classzero_probs, classone_probs)).transpose()\n\n\n@xgboost_model_doc(\n    \"Implementation of the scikit-learn API for XGBoost classification.\",\n    [\"model\", \"objective\"],\n    extra_parameters=\"\"\"\n    n_estimators : Optional[int]\n        Number of boosting rounds.\n\"\"\",\n)\nclass XGBClassifier(XGBClassifierBase, XGBModel):\n    # pylint: disable=missing-docstring,too-many-instance-attributes\n    @_deprecate_positional_args\n    def __init__(\n        self,\n        *,\n        objective: SklObjective = \"binary:logistic\",\n        **kwargs: Any,\n    ) -> None:\n        super().__init__(objective=objective, **kwargs)\n\n    def _more_tags(self) -> Dict[str, bool]:\n        tags = super()._more_tags()\n        tags[\"multilabel\"] = True\n        return tags\n\n    def __sklearn_tags__(self) -> _sklearn_Tags:\n        tags = super().__sklearn_tags__()\n        tags_dict = self._more_tags()\n        tags.classifier_tags.multi_label = tags_dict[\"multilabel\"]\n        return tags\n\n    @_deprecate_positional_args\n    def fit(\n        self,\n        X: ArrayLike,\n        y: ArrayLike,\n        *,\n        sample_weight: Optional[ArrayLike] = None,\n        base_margin: Optional[ArrayLike] = None,\n        eval_set: Optional[Sequence[Tuple[ArrayLike, ArrayLike]]] = None,\n        verbose: Optional[Union[bool, int]] = True,\n        xgb_model: Optional[Union[Booster, str, XGBModel]] = None,\n        sample_weight_eval_set: Optional[Sequence[ArrayLike]] = None,\n        base_margin_eval_set: Optional[Sequence[ArrayLike]] = None,\n        feature_weights: Optional[ArrayLike] = None,\n    ) -> \"XGBClassifier\":\n        # pylint: disable = attribute-defined-outside-init,too-many-statements\n        with config_context(verbosity=self.verbosity):\n            # We keep the n_classes_ as a simple member instead of loading it from\n            # booster in a Python property. This way we can have efficient and\n            # thread-safe prediction.\n            if _is_polars_lazyframe(y):\n                y = y.collect()\n            if _is_cudf_df(y) or _is_cudf_ser(y):\n                cp = import_cupy()\n\n                classes = cp.unique(y.values)\n                self.n_classes_ = len(classes)\n                expected_classes = cp.array(self.classes_)\n            elif _is_cupy_alike(y):\n                cp = import_cupy()\n\n                classes = cp.unique(y)\n                self.n_classes_ = len(classes)\n                expected_classes = cp.array(self.classes_)\n            else:\n                classes = np.unique(np.asarray(y))\n                self.n_classes_ = len(classes)\n                expected_classes = self.classes_\n            if (\n                classes.shape != expected_classes.shape\n                or not (classes == expected_classes).all()\n            ):\n                raise ValueError(\n                    f\"Invalid classes inferred from unique values of `y`.  \"\n                    f\"Expected: {expected_classes}, got {classes}\"\n                )\n\n            params = self.get_xgb_params()\n\n            if callable(self.objective):\n                obj: Optional[PlainObj] = _objective_decorator(self.objective)\n                # Use default value. Is it really not used ?\n                params[\"objective\"] = \"binary:logistic\"\n            else:\n                obj = None\n\n            if self.n_classes_ > 2:\n                # Switch to using a multiclass objective in the underlying XGB instance\n                if params.get(\"objective\", None) != \"multi:softmax\":\n                    params[\"objective\"] = \"multi:softprob\"\n                params[\"num_class\"] = self.n_classes_\n\n            model, metric, params, feature_weights = self._configure_fit(\n                xgb_model, params, feature_weights\n            )\n            model, feature_types = get_model_categories(X, model, self.feature_types)\n\n            evals_result: EvalsLog = {}\n            train_dmatrix, evals = _wrap_evaluation_matrices(\n                missing=self.missing,\n                X=X,\n                y=y,\n                group=None,\n                qid=None,\n                sample_weight=sample_weight,\n                base_margin=base_margin,\n                feature_weights=feature_weights,\n                eval_set=eval_set,\n                sample_weight_eval_set=sample_weight_eval_set,\n                base_margin_eval_set=base_margin_eval_set,\n                eval_group=None,\n                eval_qid=None,\n                create_dmatrix=self._create_dmatrix,\n                enable_categorical=self.enable_categorical,\n                feature_types=feature_types,\n            )\n\n            self._Booster = train(\n                params,\n                train_dmatrix,\n                self.get_num_boosting_rounds(),\n                evals=evals,\n                early_stopping_rounds=self.early_stopping_rounds,\n                evals_result=evals_result,\n                obj=obj,\n                custom_metric=metric,\n                verbose_eval=verbose,\n                xgb_model=model,\n                callbacks=self.callbacks,\n            )\n\n            if not callable(self.objective):\n                self.objective = params[\"objective\"]\n\n            self._set_evaluation_result(evals_result)\n            return self\n\n    if XGBModel.fit.__doc__ is not None:\n        fit.__doc__ = XGBModel.fit.__doc__.replace(\n            \"Fit gradient boosting model\", \"Fit gradient boosting classifier\", 1\n        )\n\n    @_deprecate_positional_args\n    def predict(\n        self,\n        X: ArrayLike,\n        *,\n        output_margin: bool = False,\n        validate_features: bool = True,\n        base_margin: Optional[ArrayLike] = None,\n        iteration_range: Optional[IterationRange] = None,\n    ) -> ArrayLike:\n        with config_context(verbosity=self.verbosity):\n            class_probs = super().predict(\n                X=X,\n                output_margin=output_margin,\n                validate_features=validate_features,\n                base_margin=base_margin,\n                iteration_range=iteration_range,\n            )\n            if output_margin:\n                # If output_margin is active, simply return the scores\n                return class_probs\n\n            if len(class_probs.shape) > 1 and self.n_classes_ != 2:\n                # multi-class, turns softprob into softmax\n                column_indexes: np.ndarray = np.argmax(class_probs, axis=1)\n            elif len(class_probs.shape) > 1 and class_probs.shape[1] != 1:\n                # multi-label\n                column_indexes = np.zeros(class_probs.shape)\n                column_indexes[class_probs > 0.5] = 1\n            elif self.objective == \"multi:softmax\":\n                return class_probs.astype(np.int32)\n            else:\n                # turns soft logit into class label\n                column_indexes = np.repeat(0, class_probs.shape[0])\n                column_indexes[class_probs > 0.5] = 1\n\n            return column_indexes\n\n    def predict_proba(\n        self,\n        X: ArrayLike,\n        validate_features: bool = True,\n        base_margin: Optional[ArrayLike] = None,\n        iteration_range: Optional[IterationRange] = None,\n    ) -> np.ndarray:\n        \"\"\"Predict the probability of each `X` example being of a given class. If the\n        model is trained with early stopping, then :py:attr:`best_iteration` is used\n        automatically. The estimator uses `inplace_predict` by default and falls back to\n        using :py:class:`DMatrix` if devices between the data and the estimator don't\n        match.\n\n        .. note:: This function is only thread safe for `gbtree` and `dart`.\n\n        Parameters\n        ----------\n        X :\n            Feature matrix. See :ref:`py-data` for a list of supported types.\n        validate_features :\n            When this is True, validate that the Booster's and data's feature_names are\n            identical.  Otherwise, it is assumed that the feature_names are the same.\n        base_margin :\n            Global bias for each instance. See :doc:`/tutorials/intercept` for details.\n        iteration_range :\n            Specifies which layer of trees are used in prediction.  For example, if a\n            random forest is trained with 100 rounds.  Specifying `iteration_range=(10,\n            20)`, then only the forests built during [10, 20) (half open set) rounds are\n            used in this prediction.\n\n        Returns\n        -------\n        prediction :\n            a numpy array of shape array-like of shape (n_samples, n_classes) with the\n            probability of each data example being of a given class.\n\n        \"\"\"\n        # custom obj:      Do nothing as we don't know what to do.\n        # softprob:        Do nothing, output is proba.\n        # softmax:         Use softmax from scipy\n        # binary:logistic: Expand the prob vector into 2-class matrix after predict.\n        # binary:logitraw: Unsupported by predict_proba()\n        if self.objective == \"multi:softmax\":\n            raw_predt = super().predict(\n                X=X,\n                validate_features=validate_features,\n                base_margin=base_margin,\n                iteration_range=iteration_range,\n                output_margin=True,\n            )\n            class_prob = softmax(raw_predt, axis=1)\n            return class_prob\n        class_probs = super().predict(\n            X=X,\n            validate_features=validate_features,\n            base_margin=base_margin,\n            iteration_range=iteration_range,\n        )\n        return _cls_predict_proba(self.n_classes_, class_probs, np.vstack)\n\n    @property\n    def classes_(self) -> np.ndarray:\n        return np.arange(self.n_classes_)\n\n\n@xgboost_model_doc(\n    \"scikit-learn API for XGBoost random forest classification.\",\n    [\"model\", \"objective\"],\n    extra_parameters=\"\"\"\n    n_estimators : Optional[int]\n        Number of trees in random forest to fit.\n\"\"\",\n)\nclass XGBRFClassifier(XGBClassifier):\n    # pylint: disable=missing-docstring\n    @_deprecate_positional_args\n    def __init__(\n        self,\n        *,\n        learning_rate: float = 1.0,\n        subsample: float = 0.8,\n        colsample_bynode: float = 0.8,\n        reg_lambda: float = 1e-5,\n        **kwargs: Any,\n    ):\n        super().__init__(\n            learning_rate=learning_rate,\n            subsample=subsample,\n            colsample_bynode=colsample_bynode,\n            reg_lambda=reg_lambda,\n            **kwargs,\n        )\n        _check_rf_callback(self.early_stopping_rounds, self.callbacks)\n\n    def get_xgb_params(self) -> Dict[str, Any]:\n        params = super().get_xgb_params()\n        params[\"num_parallel_tree\"] = super().get_num_boosting_rounds()\n        return params\n\n    def get_num_boosting_rounds(self) -> int:\n        return 1\n\n    # pylint: disable=unused-argument\n    @_deprecate_positional_args\n    def fit(\n        self,\n        X: ArrayLike,\n        y: ArrayLike,\n        *,\n        sample_weight: Optional[ArrayLike] = None,\n        base_margin: Optional[ArrayLike] = None,\n        eval_set: Optional[Sequence[Tuple[ArrayLike, ArrayLike]]] = None,\n        verbose: Optional[Union[bool, int]] = True,\n        xgb_model: Optional[Union[Booster, str, XGBModel]] = None,\n        sample_weight_eval_set: Optional[Sequence[ArrayLike]] = None,\n        base_margin_eval_set: Optional[Sequence[ArrayLike]] = None,\n        feature_weights: Optional[ArrayLike] = None,\n    ) -> \"XGBRFClassifier\":\n        args = {k: v for k, v in locals().items() if k not in (\"self\", \"__class__\")}\n        _check_rf_callback(self.early_stopping_rounds, self.callbacks)\n        super().fit(**args)\n        return self\n\n\n@xgboost_model_doc(\n    \"Implementation of the scikit-learn API for XGBoost regression.\",\n    [\"estimators\", \"model\", \"objective\"],\n)\nclass XGBRegressor(XGBRegressorBase, XGBModel):\n    # pylint: disable=missing-docstring\n    @_deprecate_positional_args\n    def __init__(\n        self, *, objective: SklObjective = \"reg:squarederror\", **kwargs: Any\n    ) -> None:\n        super().__init__(objective=objective, **kwargs)\n\n    def _more_tags(self) -> Dict[str, bool]:\n        tags = super()._more_tags()\n        tags[\"multioutput\"] = True\n        tags[\"multioutput_only\"] = False\n        return tags\n\n    def __sklearn_tags__(self) -> _sklearn_Tags:\n        tags = super().__sklearn_tags__()\n        tags_dict = self._more_tags()\n        tags.target_tags.multi_output = tags_dict[\"multioutput\"]\n        tags.target_tags.single_output = not tags_dict[\"multioutput_only\"]\n        return tags\n\n\n@xgboost_model_doc(\n    \"scikit-learn API for XGBoost random forest regression.\",\n    [\"model\", \"objective\"],\n    extra_parameters=\"\"\"\n    n_estimators : Optional[int]\n        Number of trees in random forest to fit.\n\"\"\",\n)\nclass XGBRFRegressor(XGBRegressor):\n    # pylint: disable=missing-docstring\n    @_deprecate_positional_args\n    def __init__(\n        self,\n        *,\n        learning_rate: float = 1.0,\n        subsample: float = 0.8,\n        colsample_bynode: float = 0.8,\n        reg_lambda: float = 1e-5,\n        **kwargs: Any,\n    ) -> None:\n        super().__init__(\n            learning_rate=learning_rate,\n            subsample=subsample,\n            colsample_bynode=colsample_bynode,\n            reg_lambda=reg_lambda,\n            **kwargs,\n        )\n        _check_rf_callback(self.early_stopping_rounds, self.callbacks)\n\n    def get_xgb_params(self) -> Dict[str, Any]:\n        params = super().get_xgb_params()\n        params[\"num_parallel_tree\"] = super().get_num_boosting_rounds()\n        return params\n\n    def get_num_boosting_rounds(self) -> int:\n        return 1\n\n    # pylint: disable=unused-argument\n    @_deprecate_positional_args\n    def fit(\n        self,\n        X: ArrayLike,\n        y: ArrayLike,\n        *,\n        sample_weight: Optional[ArrayLike] = None,\n        base_margin: Optional[ArrayLike] = None,\n        eval_set: Optional[Sequence[Tuple[ArrayLike, ArrayLike]]] = None,\n        verbose: Optional[Union[bool, int]] = True,\n        xgb_model: Optional[Union[Booster, str, XGBModel]] = None,\n        sample_weight_eval_set: Optional[Sequence[ArrayLike]] = None,\n        base_margin_eval_set: Optional[Sequence[ArrayLike]] = None,\n        feature_weights: Optional[ArrayLike] = None,\n    ) -> \"XGBRFRegressor\":\n        args = {k: v for k, v in locals().items() if k not in (\"self\", \"__class__\")}\n        _check_rf_callback(self.early_stopping_rounds, self.callbacks)\n        super().fit(**args)\n        return self\n\n\ndef _get_qid(\n    X: ArrayLike, qid: Optional[ArrayLike]\n) -> Tuple[ArrayLike, Optional[ArrayLike]]:\n    \"\"\"Get the special qid column from X if exists.\"\"\"\n    if (_is_pandas_df(X) or _is_cudf_df(X)) and hasattr(X, \"qid\"):\n        if qid is not None:\n            raise ValueError(\n                \"Found both the special column `qid` in `X` and the `qid` from the\"\n                \"`fit` method. Please remove one of them.\"\n            )\n        q_x = X.qid\n        X = X.drop(\"qid\", axis=1)\n        return X, q_x\n    return X, qid\n\n\n@xgboost_model_doc(\n    \"\"\"Implementation of the Scikit-Learn API for XGBoost Ranking.\n\nSee :doc:`Learning to Rank </tutorials/learning_to_rank>` for an introducion.\n\n    \"\"\",\n    [\"estimators\", \"model\"],\n    end_note=\"\"\"\n        .. note::\n\n            A custom objective function is currently not supported by XGBRanker.\n\n        .. note::\n\n            Query group information is only required for ranking training but not\n            prediction. Multiple groups can be predicted on a single call to\n            :py:meth:`predict`.\n\n        When fitting the model with the `group` parameter, your data need to be sorted\n        by the query group first. `group` is an array that contains the size of each\n        query group.\n\n        Similarly, when fitting the model with the `qid` parameter, the data should be\n        sorted according to query index and `qid` is an array that contains the query\n        index for each training sample.\n\n        For example, if your original data look like:\n\n        +-------+-----------+---------------+\n        |   qid |   label   |   features    |\n        +-------+-----------+---------------+\n        |   1   |   0       |   x_1         |\n        +-------+-----------+---------------+\n        |   1   |   1       |   x_2         |\n        +-------+-----------+---------------+\n        |   1   |   0       |   x_3         |\n        +-------+-----------+---------------+\n        |   2   |   0       |   x_4         |\n        +-------+-----------+---------------+\n        |   2   |   1       |   x_5         |\n        +-------+-----------+---------------+\n        |   2   |   1       |   x_6         |\n        +-------+-----------+---------------+\n        |   2   |   1       |   x_7         |\n        +-------+-----------+---------------+\n\n        then :py:meth:`fit` method can be called with either `group` array as ``[3, 4]``\n        or with `qid` as ``[1, 1, 1, 2, 2, 2, 2]``, that is the qid column.  Also, the\n        `qid` can be a special column of input `X` instead of a separated parameter, see\n        :py:meth:`fit` for more info.\"\"\",\n)\nclass XGBRanker(XGBRankerMixIn, XGBModel):\n    # pylint: disable=missing-docstring,too-many-arguments\n    @_deprecate_positional_args\n    def __init__(self, *, objective: str = \"rank:ndcg\", **kwargs: Any):\n        super().__init__(objective=objective, **kwargs)\n        if callable(self.objective):\n            raise ValueError(\"custom objective function not supported by XGBRanker\")\n        if \"rank:\" not in objective:\n            raise ValueError(\"please use XGBRanker for ranking task\")\n\n    def _create_ltr_dmatrix(\n        self, ref: Optional[DMatrix], data: ArrayLike, qid: ArrayLike, **kwargs: Any\n    ) -> DMatrix:\n        data, qid = _get_qid(data, qid)\n\n        if kwargs.get(\"group\", None) is None and qid is None:\n            raise ValueError(\"Either `group` or `qid` is required for ranking task\")\n\n        return super()._create_dmatrix(ref=ref, data=data, qid=qid, **kwargs)\n\n    @_deprecate_positional_args\n    def fit(\n        self,\n        X: ArrayLike,\n        y: ArrayLike,\n        *,\n        group: Optional[ArrayLike] = None,\n        qid: Optional[ArrayLike] = None,\n        sample_weight: Optional[ArrayLike] = None,\n        base_margin: Optional[ArrayLike] = None,\n        eval_set: Optional[Sequence[Tuple[ArrayLike, ArrayLike]]] = None,\n        eval_group: Optional[Sequence[ArrayLike]] = None,\n        eval_qid: Optional[Sequence[ArrayLike]] = None,\n        verbose: Optional[Union[bool, int]] = False,\n        xgb_model: Optional[Union[Booster, str, XGBModel]] = None,\n        sample_weight_eval_set: Optional[Sequence[ArrayLike]] = None,\n        base_margin_eval_set: Optional[Sequence[ArrayLike]] = None,\n        feature_weights: Optional[ArrayLike] = None,\n    ) -> \"XGBRanker\":\n        # pylint: disable = attribute-defined-outside-init,arguments-differ\n        \"\"\"Fit gradient boosting ranker\n\n        Note that calling ``fit()`` multiple times will cause the model object to be\n        re-fit from scratch. To resume training from a previous checkpoint, explicitly\n        pass ``xgb_model`` argument.\n\n        Parameters\n        ----------\n        X :\n            Feature matrix. See :ref:`py-data` for a list of supported types.\n\n            When this is a :py:class:`pandas.DataFrame` or a :py:class:`cudf.DataFrame`,\n            it may contain a special column called ``qid`` for specifying the query\n            index. Using a special column is the same as using the `qid` parameter,\n            except for being compatible with sklearn utility functions like\n            :py:func:`sklearn.model_selection.cross_validation`. The same convention\n            applies to the :py:meth:`XGBRanker.score` and :py:meth:`XGBRanker.predict`.\n\n            +-----+----------------+----------------+\n            | qid | feat_0         | feat_1         |\n            +-----+----------------+----------------+\n            | 0   | :math:`x_{00}` | :math:`x_{01}` |\n            +-----+----------------+----------------+\n            | 1   | :math:`x_{10}` | :math:`x_{11}` |\n            +-----+----------------+----------------+\n            | 1   | :math:`x_{20}` | :math:`x_{21}` |\n            +-----+----------------+----------------+\n\n            When the ``tree_method`` is set to ``hist``, internally, the\n            :py:class:`QuantileDMatrix` will be used instead of the :py:class:`DMatrix`\n            for conserving memory. However, this has performance implications when the\n            device of input data is not matched with algorithm. For instance, if the\n            input is a numpy array on CPU but ``cuda`` is used for training, then the\n            data is first processed on CPU then transferred to GPU.\n        y :\n            Labels\n        group :\n            Size of each query group of training data. Should have as many elements as\n            the query groups in the training data.  If this is set to None, then user\n            must provide qid.\n        qid :\n            Query ID for each training sample.  Should have the size of n_samples.  If\n            this is set to None, then user must provide group or a special column in X.\n        sample_weight :\n            Query group weights\n\n            .. note:: Weights are per-group for ranking tasks\n\n                In ranking task, one weight is assigned to each query group/id (not each\n                data point). This is because we only care about the relative ordering of\n                data points within each group, so it doesn't make sense to assign\n                weights to individual data points.\n\n        base_margin :\n            Global bias for each instance. See :doc:`/tutorials/intercept` for details.\n        eval_set :\n            A list of (X, y) tuple pairs to use as validation sets, for which\n            metrics will be computed.\n            Validation metrics will help us track the performance of the model.\n        eval_group :\n            A list in which ``eval_group[i]`` is the list containing the sizes of all\n            query groups in the ``i``-th pair in **eval_set**.\n        eval_qid :\n            A list in which ``eval_qid[i]`` is the array containing query ID of ``i``-th\n            pair in **eval_set**. The special column convention in `X` applies to\n            validation datasets as well.\n\n        verbose :\n            If `verbose` is True and an evaluation set is used, the evaluation metric\n            measured on the validation set is printed to stdout at each boosting stage.\n            If `verbose` is an integer, the evaluation metric is printed at each\n            `verbose` boosting stage. The last boosting stage / the boosting stage found\n            by using `early_stopping_rounds` is also printed.\n        xgb_model :\n            file name of stored XGBoost model or 'Booster' instance XGBoost model to be\n            loaded before training (allows training continuation).\n        sample_weight_eval_set :\n            A list of the form [L_1, L_2, ..., L_n], where each L_i is a list of\n            group weights on the i-th validation set.\n\n            .. note:: Weights are per-group for ranking tasks\n\n                In ranking task, one weight is assigned to each query group (not each\n                data point). This is because we only care about the relative ordering of\n                data points within each group, so it doesn't make sense to assign\n                weights to individual data points.\n        base_margin_eval_set :\n            A list of the form [M_1, M_2, ..., M_n], where each M_i is an array like\n            object storing base margin for the i-th validation set.\n        feature_weights :\n            Weight for each feature, defines the probability of each feature being\n            selected when colsample is being used.  All values must be greater than 0,\n            otherwise a `ValueError` is thrown.\n\n        \"\"\"\n        with config_context(verbosity=self.verbosity):\n            params = self.get_xgb_params()\n\n            model, metric, params, feature_weights = self._configure_fit(\n                xgb_model, params, feature_weights\n            )\n            model, feature_types = get_model_categories(X, model, self.feature_types)\n\n            evals_result: EvalsLog = {}\n            train_dmatrix, evals = _wrap_evaluation_matrices(\n                missing=self.missing,\n                X=X,\n                y=y,\n                group=group,\n                qid=qid,\n                sample_weight=sample_weight,\n                base_margin=base_margin,\n                feature_weights=feature_weights,\n                eval_set=eval_set,\n                sample_weight_eval_set=sample_weight_eval_set,\n                base_margin_eval_set=base_margin_eval_set,\n                eval_group=eval_group,\n                eval_qid=eval_qid,\n                create_dmatrix=self._create_ltr_dmatrix,\n                enable_categorical=self.enable_categorical,\n                feature_types=feature_types,\n            )\n\n            self._Booster = train(\n                params,\n                train_dmatrix,\n                num_boost_round=self.get_num_boosting_rounds(),\n                early_stopping_rounds=self.early_stopping_rounds,\n                evals=evals,\n                evals_result=evals_result,\n                custom_metric=metric,\n                verbose_eval=verbose,\n                xgb_model=model,\n                callbacks=self.callbacks,\n            )\n\n            self.objective = params[\"objective\"]\n\n            self._set_evaluation_result(evals_result)\n            return self\n\n    @_deprecate_positional_args\n    def predict(\n        self,\n        X: ArrayLike,\n        *,\n        output_margin: bool = False,\n        validate_features: bool = True,\n        base_margin: Optional[ArrayLike] = None,\n        iteration_range: Optional[IterationRange] = None,\n    ) -> ArrayLike:\n        X, _ = _get_qid(X, None)\n        return super().predict(\n            X,\n            output_margin=output_margin,\n            validate_features=validate_features,\n            base_margin=base_margin,\n            iteration_range=iteration_range,\n        )\n\n    def apply(\n        self,\n        X: ArrayLike,\n        iteration_range: Optional[IterationRange] = None,\n    ) -> ArrayLike:\n        X, _ = _get_qid(X, None)\n        return super().apply(X, iteration_range)\n\n    def score(self, X: ArrayLike, y: ArrayLike) -> float:\n        \"\"\"Evaluate score for data using the last evaluation metric. If the model is\n        trained with early stopping, then :py:attr:`best_iteration` is used\n        automatically.\n\n        Parameters\n        ----------\n        X : Union[pd.DataFrame, cudf.DataFrame]\n          Feature matrix. A DataFrame with a special `qid` column.\n\n        y :\n          Labels\n\n        Returns\n        -------\n        score :\n          The result of the first evaluation metric for the ranker.\n\n        \"\"\"\n        X, qid = _get_qid(X, None)\n        # fixme(jiamingy): base margin and group weight is not yet supported. We might\n        # need to make extra special fields in the dataframe.\n        Xyq = DMatrix(\n            X,\n            y,\n            qid=qid,\n            missing=self.missing,\n            enable_categorical=self.enable_categorical,\n            nthread=self.n_jobs,\n            feature_types=self.feature_types,\n        )\n        if callable(self.eval_metric):\n            metric = ltr_metric_decorator(self.eval_metric, self.n_jobs)\n            result_str = self.get_booster().eval_set([(Xyq, \"eval\")], feval=metric)\n        else:\n            result_str = self.get_booster().eval(Xyq)\n\n        metric_score = _parse_eval_str(result_str)\n        return metric_score[-1][1]\n"
  },
  {
    "path": "python-package/xgboost/spark/__init__.py",
    "content": "\"\"\"PySpark XGBoost integration interface\"\"\"\n\ntry:\n    import pyspark\nexcept ImportError as e:\n    raise ImportError(\"pyspark package needs to be installed to use this module\") from e\n\nfrom .estimator import (\n    SparkXGBClassifier,\n    SparkXGBClassifierModel,\n    SparkXGBRanker,\n    SparkXGBRankerModel,\n    SparkXGBRegressor,\n    SparkXGBRegressorModel,\n)\n\n__all__ = [\n    \"SparkXGBClassifier\",\n    \"SparkXGBClassifierModel\",\n    \"SparkXGBRegressor\",\n    \"SparkXGBRegressorModel\",\n    \"SparkXGBRanker\",\n    \"SparkXGBRankerModel\",\n]\n"
  },
  {
    "path": "python-package/xgboost/spark/core.py",
    "content": "\"\"\"XGBoost pyspark integration submodule for core code.\"\"\"\n\nimport base64\n\n# pylint: disable=fixme, protected-access, no-member\n# pylint: disable=too-many-lines, too-many-branches\nimport json\nimport logging\nimport os\nfrom collections import namedtuple\nfrom dataclasses import asdict\nfrom typing import (\n    Any,\n    Callable,\n    Dict,\n    Iterator,\n    List,\n    Optional,\n    Tuple,\n    Type,\n    Union,\n    cast,\n)\n\nimport numpy as np\nimport pandas as pd\nfrom pyspark import RDD, SparkConf, SparkContext, cloudpickle\nfrom pyspark.ml import Estimator, Model\nfrom pyspark.ml.functions import array_to_vector, vector_to_array\nfrom pyspark.ml.linalg import VectorUDT\nfrom pyspark.ml.param import Param, Params, TypeConverters\nfrom pyspark.ml.param.shared import (\n    HasFeaturesCol,\n    HasLabelCol,\n    HasPredictionCol,\n    HasProbabilityCol,\n    HasRawPredictionCol,\n    HasValidationIndicatorCol,\n    HasWeightCol,\n)\nfrom pyspark.ml.util import (\n    DefaultParamsReader,\n    DefaultParamsWriter,\n    MLReadable,\n    MLReader,\n    MLWritable,\n    MLWriter,\n)\nfrom pyspark.resource import ResourceProfileBuilder, TaskResourceRequests\nfrom pyspark.sql import Column, DataFrame\nfrom pyspark.sql.functions import col, countDistinct, pandas_udf, rand, struct\nfrom pyspark.sql.types import (\n    ArrayType,\n    BooleanType,\n    DoubleType,\n    FloatType,\n    IntegerType,\n    IntegralType,\n    LongType,\n    ShortType,\n)\nfrom scipy.special import expit, softmax  # pylint: disable=no-name-in-module\n\nfrom .._c_api import _py_version\nfrom .._typing import ArrayLike\nfrom ..collective import Config\nfrom ..compat import import_cupy, is_cudf_available, is_cupy_available\nfrom ..config import config_context, get_config\nfrom ..core import Booster, _check_distributed_params\nfrom ..sklearn import DEFAULT_N_ESTIMATORS, XGBClassifier, XGBModel, _can_use_qdm\nfrom ..training import train as worker_train\nfrom .data import (\n    _read_csr_matrix_from_unwrapped_spark_vec,\n    alias,\n    create_dmatrix_from_partitions,\n    pred_contribs,\n    stack_series,\n)\nfrom .params import (\n    HasArbitraryParamsDict,\n    HasBaseMarginCol,\n    HasContribPredictionCol,\n    HasEnableSparseDataOptim,\n    HasFeaturesCols,\n    HasQueryIdCol,\n)\nfrom .summary import XGBoostTrainingSummary\nfrom .utils import (\n    CommunicatorContext,\n    _get_default_params_from_func,\n    _get_gpu_id,\n    _get_host_ip,\n    _get_max_num_concurrent_tasks,\n    _get_rabit_args,\n    _get_spark_session,\n    _is_local,\n    _is_standalone_or_localcluster,\n    deserialize_booster,\n    deserialize_xgb_model,\n    get_class_name,\n    get_logger,\n    get_logger_level,\n    serialize_booster,\n    use_cuda,\n)\n\n# Put pyspark specific params here, they won't be passed to XGBoost.\n# like `validationIndicatorCol`, `base_margin_col`\n_pyspark_specific_params = [\n    \"featuresCol\",\n    \"labelCol\",\n    \"weightCol\",\n    \"rawPredictionCol\",\n    \"predictionCol\",\n    \"probabilityCol\",\n    \"validationIndicatorCol\",\n    \"base_margin_col\",\n    \"arbitrary_params_dict\",\n    \"force_repartition\",\n    \"num_workers\",\n    \"feature_names\",\n    \"features_cols\",\n    \"enable_sparse_data_optim\",\n    \"qid_col\",\n    \"repartition_random_shuffle\",\n    \"pred_contrib_col\",\n    \"launch_tracker_on_driver\",\n    \"coll_cfg\",\n]\n\n_non_booster_params = [\"missing\", \"n_estimators\", \"feature_types\", \"feature_weights\"]\n\n_pyspark_param_alias_map = {\n    \"features_col\": \"featuresCol\",\n    \"label_col\": \"labelCol\",\n    \"weight_col\": \"weightCol\",\n    \"raw_prediction_col\": \"rawPredictionCol\",\n    \"prediction_col\": \"predictionCol\",\n    \"probability_col\": \"probabilityCol\",\n    \"validation_indicator_col\": \"validationIndicatorCol\",\n}\n\n_inverse_pyspark_param_alias_map = {v: k for k, v in _pyspark_param_alias_map.items()}\n\n_unsupported_xgb_params = [\n    \"enable_categorical\",  # Use feature_types param to specify categorical feature instead\n    \"n_jobs\",  # Do not allow user to set it, will use `spark.task.cpus` value instead.\n    \"nthread\",  # Ditto\n]\n\n_unsupported_fit_params = {\n    \"sample_weight\",  # Supported by spark param weightCol\n    \"eval_set\",  # Supported by spark param validation_indicator_col\n    \"sample_weight_eval_set\",  # Supported by spark param weight_col + validation_indicator_col\n    \"base_margin\",  # Supported by spark param base_margin_col\n    \"base_margin_eval_set\",  # Supported by spark param base_margin_col + validation_indicator_col\n    \"group\",  # Use spark param `qid_col` instead\n    \"qid\",  # Use spark param `qid_col` instead\n    \"eval_group\",  # Use spark param `qid_col` instead\n    \"eval_qid\",  # Use spark param `qid_col` instead\n}\n\n_unsupported_train_params = {\n    \"evals\",  # Supported by spark param validation_indicator_col\n    \"evals_result\",  # Won't support yet+\n}\n\n_unsupported_predict_params = {\n    # for classification, we can use rawPrediction as margin\n    \"output_margin\",\n    \"validate_features\",  # TODO\n    \"base_margin\",  # Use pyspark base_margin_col param instead.\n}\n\n# TODO: supply hint message for all other unsupported params.\n_unsupported_params_hint_message = {\n    \"enable_categorical\": \"`xgboost.spark` estimators do not have 'enable_categorical' param, \"\n    \"but you can set `feature_types` param and mark categorical features with 'c' string.\"\n}\n\n# Global prediction names\nPred = namedtuple(\n    \"Pred\", (\"prediction\", \"raw_prediction\", \"probability\", \"pred_contrib\")\n)\npred = Pred(\"prediction\", \"rawPrediction\", \"probability\", \"predContrib\")\n\n_INIT_BOOSTER_SAVE_PATH = \"init_booster.json\"\n\n_LOG_TAG = \"XGBoost-PySpark\"\n\n\nclass _SparkXGBParams(\n    HasFeaturesCol,\n    HasLabelCol,\n    HasWeightCol,\n    HasPredictionCol,\n    HasValidationIndicatorCol,\n    HasArbitraryParamsDict,\n    HasBaseMarginCol,\n    HasFeaturesCols,\n    HasEnableSparseDataOptim,\n    HasQueryIdCol,\n    HasContribPredictionCol,\n):\n    num_workers = Param(\n        Params._dummy(),\n        \"num_workers\",\n        \"The number of XGBoost workers. Each XGBoost worker corresponds to one spark task.\",\n        TypeConverters.toInt,\n    )\n    device = Param(\n        Params._dummy(),\n        \"device\",\n        (\n            \"The device type for XGBoost executors. Available options are `cpu`,`cuda`\"\n            \" and `gpu`. Set `device` to `cuda` or `gpu` if the executors are running \"\n            \"on GPU instances. Currently, only one GPU per task is supported.\"\n        ),\n        TypeConverters.toString,\n    )\n    force_repartition = Param(\n        Params._dummy(),\n        \"force_repartition\",\n        \"A boolean variable. Set force_repartition=true if you \"\n        + \"want to force the input dataset to be repartitioned before XGBoost training.\"\n        + \"Note: The auto repartitioning judgement is not fully accurate, so it is recommended\"\n        + \"to have force_repartition be True.\",\n        TypeConverters.toBoolean,\n    )\n    repartition_random_shuffle = Param(\n        Params._dummy(),\n        \"repartition_random_shuffle\",\n        \"A boolean variable. Set repartition_random_shuffle=true if you want to random shuffle \"\n        \"dataset when repartitioning is required. By default is True.\",\n        TypeConverters.toBoolean,\n    )\n    feature_names = Param(\n        Params._dummy(),\n        \"feature_names\",\n        \"A list of str to specify feature names.\",\n        TypeConverters.toList,\n    )\n    launch_tracker_on_driver = Param(\n        Params._dummy(),\n        \"launch_tracker_on_driver\",\n        \"A boolean variable. Set launch_tracker_on_driver to true if you want the tracker to be \"\n        \"launched on the driver side; otherwise, it will be launched on the executor side.\",\n        TypeConverters.toBoolean,\n    )\n    coll_cfg = Param(\n        Params._dummy(),\n        \"coll_cfg\",\n        \"xgboost.collective.Config. The collective configuration.\",\n        TypeConverters.identity,\n    )\n\n    def set_coll_cfg(self, value: Config) -> \"_SparkXGBParams\":\n        \"\"\"Set collective configuration\"\"\"\n        assert isinstance(value, Config)\n        self.set(self.coll_cfg, value)\n        return self\n\n    def set_device(self, value: str) -> \"_SparkXGBParams\":\n        \"\"\"Set device, optional value: cpu, cuda, gpu\"\"\"\n        _check_distributed_params({\"device\": value})\n        assert value in (\"cpu\", \"cuda\", \"gpu\")\n        self.set(self.device, value)\n        return self\n\n    @classmethod\n    def _xgb_cls(cls) -> Type[XGBModel]:\n        \"\"\"\n        Subclasses should override this method and\n        returns an xgboost.XGBModel subclass\n        \"\"\"\n        raise NotImplementedError()\n\n    # Parameters for xgboost.XGBModel()\n    @classmethod\n    def _get_xgb_params_default(cls) -> Dict[str, Any]:\n        \"\"\"Get the xgboost.sklearn.XGBModel default parameters and filter out some\"\"\"\n        xgb_model_default = cls._xgb_cls()()\n        params_dict = xgb_model_default.get_params()\n        filtered_params_dict = {\n            k: params_dict[k] for k in params_dict if k not in _unsupported_xgb_params\n        }\n        filtered_params_dict[\"n_estimators\"] = DEFAULT_N_ESTIMATORS\n        return filtered_params_dict\n\n    def _set_xgb_params_default(self) -> None:\n        \"\"\"Set xgboost parameters into spark parameters\"\"\"\n        filtered_params_dict = self._get_xgb_params_default()\n        self._setDefault(**filtered_params_dict)\n\n    def _gen_xgb_params_dict(\n        self, gen_xgb_sklearn_estimator_param: bool = False\n    ) -> Dict[str, Any]:\n        \"\"\"Generate the xgboost parameters which will be passed into xgboost library\"\"\"\n        xgb_params = {}\n        non_xgb_params = (\n            set(_pyspark_specific_params)\n            | self._get_fit_params_default().keys()\n            | self._get_predict_params_default().keys()\n        )\n        if not gen_xgb_sklearn_estimator_param:\n            non_xgb_params |= set(_non_booster_params)\n        for param in self.extractParamMap():\n            if param.name not in non_xgb_params:\n                xgb_params[param.name] = self.getOrDefault(param)\n\n        arbitrary_params_dict = self.getOrDefault(\n            self.getParam(\"arbitrary_params_dict\")\n        )\n        xgb_params.update(arbitrary_params_dict)\n        return xgb_params\n\n    # Parameters for xgboost.XGBModel().fit()\n    @classmethod\n    def _get_fit_params_default(cls) -> Dict[str, Any]:\n        \"\"\"Get the xgboost.XGBModel().fit() parameters\"\"\"\n        fit_params = _get_default_params_from_func(\n            cls._xgb_cls().fit, _unsupported_fit_params\n        )\n        return fit_params\n\n    def _set_fit_params_default(self) -> None:\n        \"\"\"Get the xgboost.XGBModel().fit() parameters and set them to spark parameters\"\"\"\n        filtered_params_dict = self._get_fit_params_default()\n        self._setDefault(**filtered_params_dict)\n\n    def _gen_fit_params_dict(self) -> Dict[str, Any]:\n        \"\"\"Generate the fit parameters which will be passed into fit function\"\"\"\n        fit_params_keys = self._get_fit_params_default().keys()\n        fit_params = {}\n        for param in self.extractParamMap():\n            if param.name in fit_params_keys:\n                fit_params[param.name] = self.getOrDefault(param)\n        return fit_params\n\n    @classmethod\n    def _get_predict_params_default(cls) -> Dict[str, Any]:\n        \"\"\"Get the parameters from xgboost.XGBModel().predict()\"\"\"\n        predict_params = _get_default_params_from_func(\n            cls._xgb_cls().predict, _unsupported_predict_params\n        )\n        return predict_params\n\n    def _set_predict_params_default(self) -> None:\n        \"\"\"Get the parameters from xgboost.XGBModel().predict() and\n        set them into spark parameters\"\"\"\n        filtered_params_dict = self._get_predict_params_default()\n        self._setDefault(**filtered_params_dict)\n\n    def _gen_predict_params_dict(self) -> Dict[str, Any]:\n        \"\"\"Generate predict parameters which will be passed into xgboost.XGBModel().predict()\"\"\"\n        predict_params_keys = self._get_predict_params_default().keys()\n        predict_params = {}\n        for param in self.extractParamMap():\n            if param.name in predict_params_keys:\n                predict_params[param.name] = self.getOrDefault(param)\n        return predict_params\n\n    def _validate_gpu_params(\n        self, spark_version: str, conf: SparkConf, is_local: bool = False\n    ) -> None:\n        \"\"\"Validate the gpu parameters and gpu configurations\"\"\"\n\n        if self._run_on_gpu():\n            if is_local:\n                # Supporting GPU training in Spark local mode is just for debugging\n                # purposes, so it's okay for printing the below warning instead of\n                # checking the real gpu numbers and raising the exception.\n                get_logger(self.__class__.__name__).warning(\n                    \"You have enabled GPU in spark local mode. Please make sure your\"\n                    \" local node has at least %d GPUs\",\n                    self.getOrDefault(self.num_workers),\n                )\n            else:\n                executor_gpus = conf.get(\"spark.executor.resource.gpu.amount\")\n                if executor_gpus is None:\n                    raise ValueError(\n                        \"The `spark.executor.resource.gpu.amount` is required for training\"\n                        \" on GPU.\"\n                    )\n                gpu_per_task = conf.get(\"spark.task.resource.gpu.amount\")\n                if gpu_per_task is not None and float(gpu_per_task) > 1.0:\n                    get_logger(self.__class__.__name__).warning(\n                        \"The configuration assigns %s GPUs to each Spark task, but each \"\n                        \"XGBoost training task only utilizes 1 GPU, which will lead to \"\n                        \"unnecessary GPU waste\",\n                        gpu_per_task,\n                    )\n                # For 3.5.1+, Spark supports task stage-level scheduling for\n                #                          Yarn/K8s/Standalone/Local cluster\n                # From 3.4.0 ~ 3.5.0, Spark only supports task stage-level scheduing for\n                #                           Standalone/Local cluster\n                # For spark below 3.4.0, Task stage-level scheduling is not supported.\n                #\n                # With stage-level scheduling, spark.task.resource.gpu.amount is not required\n                # to be set explicitly. Or else, spark.task.resource.gpu.amount is a must-have and\n                # must be set to 1.0\n                if spark_version < \"3.4.0\" or (\n                    \"3.4.0\" <= spark_version < \"3.5.1\"\n                    and not _is_standalone_or_localcluster(conf)\n                ):\n                    if gpu_per_task is not None:\n                        if float(gpu_per_task) < 1.0:\n                            raise ValueError(\n                                \"XGBoost doesn't support GPU fractional configurations. Please set \"\n                                \"`spark.task.resource.gpu.amount=spark.executor.resource.gpu.\"\n                                \"amount`. To enable GPU fractional configurations, you can try \"\n                                \"standalone/localcluster with spark 3.4.0+ and\"\n                                \"YARN/K8S with spark 3.5.1+\"\n                            )\n                    else:\n                        raise ValueError(\n                            \"The `spark.task.resource.gpu.amount` is required for training\"\n                            \" on GPU.\"\n                        )\n\n    def _validate_params(self) -> None:\n        # pylint: disable=too-many-branches\n        init_model = self.getOrDefault(\"xgb_model\")\n        if init_model is not None and not isinstance(init_model, Booster):\n            raise ValueError(\n                \"The xgb_model param must be set with a `xgboost.core.Booster` \"\n                \"instance.\"\n            )\n\n        if self.getOrDefault(self.num_workers) < 1:\n            raise ValueError(\n                f\"Number of workers was {self.getOrDefault(self.num_workers)}.\"\n                f\"It cannot be less than 1 [Default is 1]\"\n            )\n\n        tree_method = self.getOrDefault(self.getParam(\"tree_method\"))\n        if tree_method == \"exact\":\n            raise ValueError(\n                \"The `exact` tree method is not supported for distributed systems.\"\n            )\n\n        if self.getOrDefault(\"objective\") is not None:\n            if not isinstance(self.getOrDefault(\"objective\"), str):\n                raise ValueError(\"Only string type 'objective' param is allowed.\")\n\n        eval_metric = \"eval_metric\"\n        if self.getOrDefault(eval_metric) is not None:\n            if not (\n                isinstance(self.getOrDefault(eval_metric), str)\n                or (\n                    isinstance(self.getOrDefault(eval_metric), List)\n                    and all(\n                        isinstance(metric, str)\n                        for metric in self.getOrDefault(eval_metric)\n                    )\n                )\n            ):\n                raise ValueError(\n                    \"Only string type or list of string type 'eval_metric' param is allowed.\"\n                )\n\n        if self.getOrDefault(\"early_stopping_rounds\") is not None:\n            if not self._col_is_defined_not_empty(self.validationIndicatorCol):\n                raise ValueError(\n                    \"If 'early_stopping_rounds' param is set, you need to set \"\n                    \"'validation_indicator_col' param as well.\"\n                )\n\n        if self.getOrDefault(self.enable_sparse_data_optim):\n            if self.getOrDefault(\"missing\") != 0.0:\n                # If DMatrix is constructed from csr / csc matrix, then inactive elements\n                # in csr / csc matrix are regarded as missing value, but, in pyspark, we\n                # are hard to control elements to be active or inactive in sparse vector column,\n                # some spark transformers such as VectorAssembler might compress vectors\n                # to be dense or sparse format automatically, and when a spark ML vector object\n                # is compressed to sparse vector, then all zero value elements become inactive.\n                # So we force setting missing param to be 0 when enable_sparse_data_optim config\n                # is True.\n                raise ValueError(\n                    \"If enable_sparse_data_optim is True, missing param != 0 is not supported.\"\n                )\n            if self.getOrDefault(self.features_cols):\n                raise ValueError(\n                    \"If enable_sparse_data_optim is True, you cannot set multiple feature columns \"\n                    \"but you should set one feature column with values of \"\n                    \"`pyspark.ml.linalg.Vector` type.\"\n                )\n\n        ss = _get_spark_session()\n        sc = ss.sparkContext\n        self._validate_gpu_params(ss.version, sc.getConf(), _is_local(sc))\n\n    def _run_on_gpu(self) -> bool:\n        \"\"\"If train or transform on the gpu according to the parameters\"\"\"\n\n        return use_cuda(self.getOrDefault(self.device))\n\n    def _col_is_defined_not_empty(self, param: \"Param[str]\") -> bool:\n        return self.isDefined(param) and self.getOrDefault(param) not in (None, \"\")\n\n\ndef _validate_and_convert_feature_col_as_float_col_list(\n    dataset: DataFrame, features_col_names: List[str]\n) -> List[Column]:\n    \"\"\"Values in feature columns must be integral types or float/double types\"\"\"\n    feature_cols = []\n    for c in features_col_names:\n        if isinstance(dataset.schema[c].dataType, DoubleType):\n            feature_cols.append(col(c).cast(FloatType()).alias(c))\n        elif isinstance(dataset.schema[c].dataType, (FloatType, IntegralType)):\n            feature_cols.append(col(c))\n        else:\n            raise ValueError(\n                \"Values in feature columns must be integral types or float/double types.\"\n            )\n    return feature_cols\n\n\ndef _validate_and_convert_feature_col_as_array_col(\n    dataset: DataFrame, features_col_name: str\n) -> Column:\n    \"\"\"It handles\n    1. Convert vector type to array type\n    2. Cast to Array(Float32)\"\"\"\n    features_col_datatype = dataset.schema[features_col_name].dataType\n    features_col = col(features_col_name)\n    if isinstance(features_col_datatype, ArrayType):\n        if not isinstance(\n            features_col_datatype.elementType,\n            (DoubleType, FloatType, LongType, IntegerType, ShortType),\n        ):\n            raise ValueError(\n                \"If feature column is array type, its elements must be number type, \"\n                f\"got {features_col_datatype.elementType}.\"\n            )\n        features_array_col = features_col.cast(ArrayType(FloatType())).alias(alias.data)\n    elif isinstance(features_col_datatype, VectorUDT):\n        features_array_col = vector_to_array(features_col, dtype=\"float32\").alias(\n            alias.data\n        )\n    else:\n        raise ValueError(\n            \"feature column must be array type or `pyspark.ml.linalg.Vector` type, \"\n            \"if you want to use multiple numetric columns as features, please use \"\n            \"`pyspark.ml.transform.VectorAssembler` to assemble them into a vector \"\n            \"type column first.\"\n        )\n    return features_array_col\n\n\ndef _get_unwrap_udt_fn() -> Callable[[Union[Column, str]], Column]:\n    try:\n        from pyspark.sql.functions import unwrap_udt\n\n        return unwrap_udt\n    except ImportError:\n        pass\n\n    try:\n        from pyspark.databricks.sql.functions import unwrap_udt as databricks_unwrap_udt\n\n        return databricks_unwrap_udt\n    except ImportError as exc:\n        raise RuntimeError(\n            \"Cannot import pyspark `unwrap_udt` function. Please install pyspark>=3.4 \"\n            \"or run on Databricks Runtime.\"\n        ) from exc\n\n\ndef _get_unwrapped_vec_cols(feature_col: Column) -> List[Column]:\n    unwrap_udt = _get_unwrap_udt_fn()\n    features_unwrapped_vec_col = unwrap_udt(feature_col)\n\n    # After a `pyspark.ml.linalg.VectorUDT` type column being unwrapped, it becomes\n    # a pyspark struct type column, the struct fields are:\n    #  - `type`: byte\n    #  - `size`: int\n    #  - `indices`: array<int>\n    #  - `values`: array<double>\n    # For sparse vector, `type` field is 0, `size` field means vector length,\n    # `indices` field is the array of active element indices, `values` field\n    # is the array of active element values.\n    # For dense vector, `type` field is 1, `size` and `indices` fields are None,\n    # `values` field is the array of the vector element values.\n    return [\n        features_unwrapped_vec_col.type.alias(\"featureVectorType\"),\n        features_unwrapped_vec_col.size.alias(\"featureVectorSize\"),\n        features_unwrapped_vec_col.indices.alias(\"featureVectorIndices\"),\n        # Note: the value field is double array type, cast it to float32 array type\n        # for speedup following repartitioning.\n        features_unwrapped_vec_col.values.cast(ArrayType(FloatType())).alias(\n            \"featureVectorValues\"\n        ),\n    ]\n\n\nFeatureProp = namedtuple(\n    \"FeatureProp\",\n    (\"enable_sparse_data_optim\", \"has_validation_col\", \"features_cols_names\"),\n)\n\n_MODEL_CHUNK_SIZE = 4096 * 1024\n\n\nclass _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):\n    _input_kwargs: Dict[str, Any]\n\n    def __init__(self) -> None:\n        super().__init__()\n        self._set_xgb_params_default()\n        self._set_fit_params_default()\n        self._set_predict_params_default()\n        # Note: The default value for arbitrary_params_dict must always be empty dict.\n        #  For additional settings added into \"arbitrary_params_dict\" by default,\n        #  they are added in `setParams`.\n        self._setDefault(\n            num_workers=1,\n            device=\"cpu\",\n            force_repartition=False,\n            repartition_random_shuffle=False,\n            feature_names=None,\n            feature_types=None,\n            feature_weights=None,\n            arbitrary_params_dict={},\n            launch_tracker_on_driver=True,\n        )\n\n        self.logger = get_logger(self.__class__.__name__)\n\n    def setParams(self, **kwargs: Any) -> None:\n        \"\"\"\n        Set params for the estimator.\n        \"\"\"\n        _extra_params = {}\n        if \"arbitrary_params_dict\" in kwargs:\n            raise ValueError(\"Invalid param name: 'arbitrary_params_dict'.\")\n\n        for k, v in kwargs.items():\n            # We're not allowing user use features_cols directly.\n            if k == self.features_cols.name:\n                raise ValueError(\n                    f\"Unsupported param '{k}' please use features_col instead.\"\n                )\n            if k in _inverse_pyspark_param_alias_map:\n                raise ValueError(\n                    f\"Please use param name {_inverse_pyspark_param_alias_map[k]} instead.\"\n                )\n            if k in _pyspark_param_alias_map:\n                if k == _inverse_pyspark_param_alias_map[\n                    self.featuresCol.name\n                ] and isinstance(v, list):\n                    real_k = self.features_cols.name\n                    k = real_k\n                else:\n                    real_k = _pyspark_param_alias_map[k]\n                    k = real_k\n\n            if self.hasParam(k):\n                if k == \"features_col\" and isinstance(v, list):\n                    self._set(**{\"features_cols\": v})\n                else:\n                    self._set(**{str(k): v})\n            else:\n                if (\n                    k in _unsupported_xgb_params\n                    or k in _unsupported_fit_params\n                    or k in _unsupported_predict_params\n                    or k in _unsupported_train_params\n                ):\n                    err_msg = _unsupported_params_hint_message.get(\n                        k, f\"Unsupported param '{k}'.\"\n                    )\n                    raise ValueError(err_msg)\n                _extra_params[k] = v\n\n        _check_distributed_params(kwargs)\n        _existing_extra_params = self.getOrDefault(self.arbitrary_params_dict)\n        self._set(arbitrary_params_dict={**_existing_extra_params, **_extra_params})\n\n    @classmethod\n    def _pyspark_model_cls(cls) -> Type[\"_SparkXGBModel\"]:\n        \"\"\"\n        Subclasses should override this method and\n        returns a _SparkXGBModel subclass\n        \"\"\"\n        raise NotImplementedError()\n\n    def _create_pyspark_model(\n        self, xgb_model: XGBModel, training_summary: XGBoostTrainingSummary\n    ) -> \"_SparkXGBModel\":\n        return self._pyspark_model_cls()(xgb_model, training_summary)\n\n    def _convert_to_sklearn_model(self, booster: bytearray, config: str) -> XGBModel:\n        xgb_sklearn_params = self._gen_xgb_params_dict(\n            gen_xgb_sklearn_estimator_param=True\n        )\n        sklearn_model = self._xgb_cls()(**xgb_sklearn_params)\n        sklearn_model.load_model(booster)\n        sklearn_model._Booster.load_config(config)\n        return sklearn_model\n\n    def _repartition_needed(self, dataset: DataFrame) -> bool:\n        \"\"\"\n        We repartition the dataset if the number of workers is not equal to the number of\n        partitions.\"\"\"\n        if self.getOrDefault(self.force_repartition):\n            return True\n        num_workers = self.getOrDefault(self.num_workers)\n        num_partitions = dataset.rdd.getNumPartitions()\n        return not num_workers == num_partitions\n\n    def _get_distributed_train_params(self, dataset: DataFrame) -> Dict[str, Any]:\n        \"\"\"\n        This just gets the configuration params for distributed xgboost\n        \"\"\"\n        params = self._gen_xgb_params_dict()\n        fit_params = self._gen_fit_params_dict()\n        verbose_eval = fit_params.pop(\"verbose\", None)\n\n        params.update(fit_params)\n        params[\"verbose_eval\"] = verbose_eval\n        classification = self._xgb_cls() == XGBClassifier\n        if classification:\n            num_classes = int(\n                dataset.select(countDistinct(alias.label)).collect()[0][0]\n            )\n            if num_classes <= 2:\n                params[\"objective\"] = \"binary:logistic\"\n            else:\n                params[\"objective\"] = \"multi:softprob\"\n                params[\"num_class\"] = num_classes\n        else:\n            # use user specified objective or default objective.\n            # e.g., the default objective for Regressor is 'reg:squarederror'\n            params[\"objective\"] = self.getOrDefault(\"objective\")\n\n        # TODO: support \"num_parallel_tree\" for random forest\n        params[\"num_boost_round\"] = self.getOrDefault(\"n_estimators\")\n\n        return params\n\n    @classmethod\n    def _get_xgb_train_call_args(\n        cls, train_params: Dict[str, Any]\n    ) -> Tuple[Dict[str, Any], Dict[str, Any]]:\n        xgb_train_default_args = _get_default_params_from_func(\n            worker_train, _unsupported_train_params\n        )\n        booster_params, kwargs_params = {}, {}\n        for key, value in train_params.items():\n            if key in xgb_train_default_args:\n                kwargs_params[key] = value\n            else:\n                booster_params[key] = value\n\n        booster_params = {\n            k: v for k, v in booster_params.items() if k not in _non_booster_params\n        }\n        return booster_params, kwargs_params\n\n    def _prepare_input_columns_and_feature_prop(\n        self, dataset: DataFrame\n    ) -> Tuple[List[Column], FeatureProp]:\n        label_col = col(self.getOrDefault(self.labelCol)).alias(alias.label)\n\n        select_cols = [label_col]\n        features_cols_names = None\n        enable_sparse_data_optim = self.getOrDefault(self.enable_sparse_data_optim)\n        if enable_sparse_data_optim:\n            features_col_name = self.getOrDefault(self.featuresCol)\n            features_col_datatype = dataset.schema[features_col_name].dataType\n            if not isinstance(features_col_datatype, VectorUDT):\n                raise ValueError(\n                    \"If enable_sparse_data_optim is True, the feature column values must be \"\n                    \"`pyspark.ml.linalg.Vector` type.\"\n                )\n            select_cols.extend(_get_unwrapped_vec_cols(col(features_col_name)))\n        else:\n            if self.getOrDefault(self.features_cols):\n                features_cols_names = self.getOrDefault(self.features_cols)\n                features_cols = _validate_and_convert_feature_col_as_float_col_list(\n                    dataset, features_cols_names\n                )\n                select_cols.extend(features_cols)\n            else:\n                features_array_col = _validate_and_convert_feature_col_as_array_col(\n                    dataset, self.getOrDefault(self.featuresCol)\n                )\n                select_cols.append(features_array_col)\n\n        if self._col_is_defined_not_empty(self.weightCol):\n            select_cols.append(\n                col(self.getOrDefault(self.weightCol)).alias(alias.weight)\n            )\n\n        has_validation_col = False\n        if self._col_is_defined_not_empty(self.validationIndicatorCol):\n            select_cols.append(\n                col(self.getOrDefault(self.validationIndicatorCol)).alias(alias.valid)\n            )\n            # In some cases, see https://issues.apache.org/jira/browse/SPARK-40407,\n            # the df.repartition can result in some reducer partitions without data,\n            # which will cause exception or hanging issue when creating DMatrix.\n            has_validation_col = True\n\n        if self._col_is_defined_not_empty(self.base_margin_col):\n            select_cols.append(\n                col(self.getOrDefault(self.base_margin_col)).alias(alias.margin)\n            )\n\n        if self._col_is_defined_not_empty(self.qid_col):\n            select_cols.append(col(self.getOrDefault(self.qid_col)).alias(alias.qid))\n\n        feature_prop = FeatureProp(\n            enable_sparse_data_optim, has_validation_col, features_cols_names\n        )\n        return select_cols, feature_prop\n\n    def _prepare_input(self, dataset: DataFrame) -> Tuple[DataFrame, FeatureProp]:\n        \"\"\"Prepare the input including column pruning, repartition and so on\"\"\"\n\n        select_cols, feature_prop = self._prepare_input_columns_and_feature_prop(\n            dataset\n        )\n\n        dataset = dataset.select(*select_cols)\n\n        num_workers = self.getOrDefault(self.num_workers)\n        sc = _get_spark_session().sparkContext\n        max_concurrent_tasks = _get_max_num_concurrent_tasks(sc)\n\n        if feature_prop.has_validation_col:\n            dtype = dataset.schema[alias.valid].dataType\n            if not isinstance(dtype, BooleanType):\n                raise TypeError(\"The validation indicator must be boolean type.\")\n\n        if num_workers > max_concurrent_tasks:\n            get_logger(self.__class__.__name__).warning(\n                \"The num_workers %s set for xgboost distributed \"\n                \"training is greater than current max number of concurrent \"\n                \"spark task slots, you need wait until more task slots available \"\n                \"or you need increase spark cluster workers.\",\n                num_workers,\n            )\n\n        if self._repartition_needed(dataset):\n            if self._col_is_defined_not_empty(self.qid_col):\n                # For ranking problem, we need to try best the put the instances with\n                # same group into the same partition\n                dataset = dataset.repartitionByRange(num_workers, alias.qid)\n            else:\n                # If validationIndicatorCol defined, and if user unionise train and validation\n                # dataset, users must set force_repartition to true to force repartition.\n                # Or else some partitions might contain only train or validation dataset.\n                if self.getOrDefault(self.repartition_random_shuffle):\n                    # In some cases, spark round-robin repartition might cause data skew\n                    # use random shuffle can address it.\n                    dataset = dataset.repartition(num_workers, rand(1))\n                else:\n                    dataset = dataset.repartition(num_workers)\n\n        if self._col_is_defined_not_empty(self.qid_col):\n            # XGBoost requires qid to be sorted for each partition\n            dataset = dataset.sortWithinPartitions(alias.qid, ascending=True)\n\n        return dataset, feature_prop\n\n    def _get_xgb_parameters(\n        self, dataset: DataFrame\n    ) -> Tuple[Dict[str, Any], Dict[str, Any], Dict[str, Any]]:\n        train_params = self._get_distributed_train_params(dataset)\n        booster_params, train_call_kwargs_params = self._get_xgb_train_call_args(\n            train_params\n        )\n        cpu_per_task = int(\n            _get_spark_session().sparkContext.getConf().get(\"spark.task.cpus\", \"1\")\n        )\n\n        dmatrix_kwargs = {\n            \"nthread\": cpu_per_task,\n            \"feature_types\": self.getOrDefault(\"feature_types\"),\n            \"feature_names\": self.getOrDefault(\"feature_names\"),\n            \"feature_weights\": self.getOrDefault(\"feature_weights\"),\n            \"missing\": float(self.getOrDefault(\"missing\")),\n        }\n        if dmatrix_kwargs[\"feature_types\"] is not None:\n            dmatrix_kwargs[\"enable_categorical\"] = True\n        booster_params[\"nthread\"] = cpu_per_task\n\n        # Remove the parameters whose value is None\n        booster_params = {k: v for k, v in booster_params.items() if v is not None}\n        train_call_kwargs_params = {\n            k: v for k, v in train_call_kwargs_params.items() if v is not None\n        }\n        dmatrix_kwargs = {k: v for k, v in dmatrix_kwargs.items() if v is not None}\n\n        return booster_params, train_call_kwargs_params, dmatrix_kwargs\n\n    def _skip_stage_level_scheduling(self, spark_version: str, conf: SparkConf) -> bool:\n        # pylint: disable=too-many-return-statements\n        \"\"\"Check if stage-level scheduling is not needed,\n        return true to skip stage-level scheduling\"\"\"\n\n        if self._run_on_gpu():\n            if spark_version < \"3.4.0\":\n                self.logger.info(\n                    \"Stage-level scheduling in xgboost requires spark version 3.4.0+\"\n                )\n                return True\n\n            if (\n                \"3.4.0\" <= spark_version < \"3.5.1\"\n                and not _is_standalone_or_localcluster(conf)\n            ):\n                self.logger.info(\n                    \"For %s, Stage-level scheduling in xgboost requires spark standalone \"\n                    \"or local-cluster mode\",\n                    spark_version,\n                )\n                return True\n\n            executor_cores = conf.get(\"spark.executor.cores\")\n            executor_gpus = conf.get(\"spark.executor.resource.gpu.amount\")\n            if executor_cores is None or executor_gpus is None:\n                self.logger.info(\n                    \"Stage-level scheduling in xgboost requires spark.executor.cores, \"\n                    \"spark.executor.resource.gpu.amount to be set.\"\n                )\n                return True\n\n            if int(executor_cores) == 1:\n                # there will be only 1 task running at any time.\n                self.logger.info(\n                    \"Stage-level scheduling in xgboost requires spark.executor.cores > 1 \"\n                )\n                return True\n\n            if int(executor_gpus) > 1:\n                # For spark.executor.resource.gpu.amount > 1, we suppose user knows how to configure\n                # to make xgboost run successfully.\n                #\n                self.logger.info(\n                    \"Stage-level scheduling in xgboost will not work \"\n                    \"when spark.executor.resource.gpu.amount>1\"\n                )\n                return True\n\n            task_gpu_amount = conf.get(\"spark.task.resource.gpu.amount\")\n\n            if task_gpu_amount is None:\n                # The ETL tasks will not grab a gpu when spark.task.resource.gpu.amount is not set,\n                # but with stage-level scheduling, we can make training task grab the gpu.\n                return False\n\n            if float(task_gpu_amount) == float(executor_gpus):\n                # spark.executor.resource.gpu.amount=spark.task.resource.gpu.amount \"\n                # results in only 1 task running at a time, which may cause perf issue.\n                return True\n\n            # We can enable stage-level scheduling\n            return False\n\n        # CPU training doesn't require stage-level scheduling\n        return True\n\n    def _try_stage_level_scheduling(self, rdd: RDD) -> RDD:\n        \"\"\"Try to enable stage-level scheduling\"\"\"\n        ss = _get_spark_session()\n        conf = ss.sparkContext.getConf()\n        if _is_local(ss.sparkContext) or self._skip_stage_level_scheduling(\n            ss.version, conf\n        ):\n            return rdd\n\n        # executor_cores will not be None\n        executor_cores = conf.get(\"spark.executor.cores\")\n        assert executor_cores is not None\n\n        # Spark-rapids is a project to leverage GPUs to accelerate spark SQL.\n        # If spark-rapids is enabled, to avoid GPU OOM, we don't allow other\n        # ETL gpu tasks running alongside training tasks.\n        spark_plugins = ss.conf.get(\"spark.plugins\", \" \")\n        assert spark_plugins is not None\n        spark_rapids_sql_enabled = ss.conf.get(\"spark.rapids.sql.enabled\", \"true\")\n        assert spark_rapids_sql_enabled is not None\n\n        task_cores = (\n            int(executor_cores)\n            if \"com.nvidia.spark.SQLPlugin\" in spark_plugins\n            and \"true\" == spark_rapids_sql_enabled.lower()\n            else (int(executor_cores) // 2) + 1\n        )\n\n        # Each training task requires cpu cores > total executor cores//2 + 1 which can\n        # make sure the tasks be sent to different executors.\n        #\n        # Please note that we can't use GPU to limit the concurrent tasks because of\n        # https://issues.apache.org/jira/browse/SPARK-45527.\n\n        task_gpus = 1.0\n        treqs = TaskResourceRequests().cpus(task_cores).resource(\"gpu\", task_gpus)\n        rp = ResourceProfileBuilder().require(treqs).build\n\n        self.logger.info(\n            \"XGBoost training tasks require the resource(cores=%s, gpu=%s).\",\n            task_cores,\n            task_gpus,\n        )\n        return rdd.withResources(rp)\n\n    def _get_tracker_args(self) -> Tuple[bool, Dict[str, Any]]:\n        \"\"\"Start the tracker and return the tracker envs on the driver side\"\"\"\n        launch_tracker_on_driver = self.getOrDefault(self.launch_tracker_on_driver)\n        rabit_args = {}\n        if launch_tracker_on_driver:\n            conf = Config()\n            if self.isDefined(self.coll_cfg):\n                conf = self.getOrDefault(self.coll_cfg)\n                assert isinstance(conf, Config)\n\n            if conf.tracker_host_ip is None:\n                conf.tracker_host_ip = (\n                    _get_spark_session().sparkContext.getConf().get(\"spark.driver.host\")\n                )\n            num_workers = self.getOrDefault(self.num_workers)\n            rabit_args.update(_get_rabit_args(conf, num_workers))\n        else:\n            if self.isDefined(self.coll_cfg):\n                conf = self.getOrDefault(self.coll_cfg)\n                assert isinstance(conf, Config)\n                if conf.tracker_host_ip is not None:\n                    raise ValueError(\n                        f\"You must enable launch_tracker_on_driver to use \"\n                        f\"tracker host: {conf.tracker_host_ip}\"\n                    )\n        return launch_tracker_on_driver, rabit_args\n\n    def _fit(self, dataset: DataFrame) -> \"_SparkXGBModel\":\n        # pylint: disable=too-many-statements, too-many-locals\n        self._validate_params()\n\n        dataset, feature_prop = self._prepare_input(dataset)\n\n        (\n            booster_params,\n            train_call_kwargs_params,\n            dmatrix_kwargs,\n        ) = self._get_xgb_parameters(dataset)\n\n        run_on_gpu = self._run_on_gpu()\n\n        is_local = _is_local(_get_spark_session().sparkContext)\n\n        num_workers = self.getOrDefault(self.num_workers)\n\n        launch_tracker_on_driver, rabit_args = self._get_tracker_args()\n        conf: Optional[Config] = (\n            self.getOrDefault(self.coll_cfg) if self.isSet(self.coll_cfg) else None\n        )\n\n        log_level = get_logger_level(_LOG_TAG)\n\n        use_rmm = get_config()[\"use_rmm\"]\n\n        def _train_booster(\n            pandas_df_iter: Iterator[pd.DataFrame],\n        ) -> Iterator[pd.DataFrame]:\n            \"\"\"Takes in an RDD partition and outputs a booster for that partition after\n            going through the Rabit Ring protocol\n\n            \"\"\"\n            from pyspark import BarrierTaskContext\n\n            context = BarrierTaskContext.get()\n            context.barrier()\n\n            dev_ordinal = None\n            use_qdm = _can_use_qdm(\n                booster_params.get(\"tree_method\", None),\n                booster_params.get(\"device\", None),\n            )\n            verbosity = booster_params.get(\"verbosity\", 1)\n            msg = \"Training on CPUs\"\n            if run_on_gpu:\n                dev_ordinal = (\n                    context.partitionId() if is_local else _get_gpu_id(context)\n                )\n                booster_params[\"device\"] = \"cuda:\" + str(dev_ordinal)\n                # If cuDF is not installed, then using DMatrix instead of QDM,\n                # because without cuDF, DMatrix performs better than QDM.\n                # Note: Checking `is_cudf_available` in spark worker side because\n                # spark worker might has different python environment with driver side.\n                use_qdm = use_qdm and is_cudf_available()\n                msg = (\n                    f\"Leveraging {booster_params['device']} to train with \"\n                    f\"QDM: {'on' if use_qdm else 'off'}\"\n                )\n\n            if use_qdm and (booster_params.get(\"max_bin\", None) is not None):\n                dmatrix_kwargs[\"max_bin\"] = booster_params[\"max_bin\"]\n            _rabit_args = rabit_args\n            if context.partitionId() == 0:\n                if not launch_tracker_on_driver:\n                    _conf = conf if conf is not None else Config()\n                    _conf.tracker_host_ip = _get_host_ip(context)\n                    _rabit_args = _get_rabit_args(_conf, num_workers)\n                get_logger(_LOG_TAG, log_level).info(msg)\n\n            worker_message: Dict[str, Any] = {\n                \"use_qdm\": use_qdm,\n            }\n\n            if not launch_tracker_on_driver:\n                worker_message[\"rabit_msg\"] = _rabit_args\n\n            messages = context.allGather(message=json.dumps(worker_message))\n            if len(set(json.loads(x)[\"use_qdm\"] for x in messages)) != 1:\n                raise RuntimeError(\"The workers' cudf environments are in-consistent \")\n\n            if not launch_tracker_on_driver:\n                _rabit_args = json.loads(messages[0])[\"rabit_msg\"]\n\n            if conf is not None:\n                _rabit_args = conf.update_worker_args(_rabit_args)\n\n            evals_result: Dict[str, Any] = {}\n            with (\n                config_context(verbosity=verbosity, use_rmm=use_rmm),\n                CommunicatorContext(context, **_rabit_args),\n            ):\n                dtrain, dvalid = create_dmatrix_from_partitions(\n                    iterator=pandas_df_iter,\n                    feature_cols=feature_prop.features_cols_names,\n                    dev_ordinal=dev_ordinal,\n                    use_qdm=use_qdm,\n                    kwargs=dmatrix_kwargs,\n                    enable_sparse_data_optim=feature_prop.enable_sparse_data_optim,\n                    has_validation_col=feature_prop.has_validation_col,\n                )\n                if dvalid is not None:\n                    dval = [(dtrain, \"training\"), (dvalid, \"validation\")]\n                else:\n                    dval = [(dtrain, \"training\")]\n                booster = worker_train(\n                    params=booster_params,\n                    dtrain=dtrain,\n                    evals=dval,\n                    evals_result=evals_result,\n                    **train_call_kwargs_params,\n                )\n            context.barrier()\n\n            if context.partitionId() == 0:\n                yield pd.DataFrame({\"data\": [json.dumps(dict(evals_result))]})\n                config = booster.save_config()\n                yield pd.DataFrame({\"data\": [config]})\n                booster_json = booster.save_raw(\"json\").decode(\"utf-8\")\n\n                for offset in range(0, len(booster_json), _MODEL_CHUNK_SIZE):\n                    booster_chunk = booster_json[offset : offset + _MODEL_CHUNK_SIZE]\n                    yield pd.DataFrame({\"data\": [booster_chunk]})\n\n        def _run_job() -> Tuple[str, str, str]:\n            rdd = (\n                dataset.mapInPandas(\n                    _train_booster,  # type: ignore[arg-type]\n                    schema=\"data string\",\n                )\n                .rdd.barrier()\n                .mapPartitions(lambda x: x)\n            )\n            rdd_with_resource = self._try_stage_level_scheduling(rdd)\n            ret = rdd_with_resource.collect()\n            data = [v[0] for v in ret]\n            return data[0], data[1], \"\".join(data[2:])\n\n        get_logger(_LOG_TAG).info(\n            \"Running xgboost-%s on %s workers with\"\n            \"\\n\\tbooster params: %s\"\n            \"\\n\\ttrain_call_kwargs_params: %s\"\n            \"\\n\\tdmatrix_kwargs: %s\",\n            _py_version(),\n            num_workers,\n            booster_params,\n            train_call_kwargs_params,\n            dmatrix_kwargs,\n        )\n        evals_result, config, booster = _run_job()\n        get_logger(_LOG_TAG).info(\"Finished xgboost training!\")\n\n        result_xgb_model = self._convert_to_sklearn_model(\n            bytearray(booster, \"utf-8\"), config\n        )\n        training_summary = XGBoostTrainingSummary.from_metrics(json.loads(evals_result))\n        spark_model = self._create_pyspark_model(result_xgb_model, training_summary)\n        # According to pyspark ML convention, the model uid should be the same\n        # with estimator uid.\n        spark_model._resetUid(self.uid)\n        return self._copyValues(spark_model)\n\n    def write(self) -> \"SparkXGBWriter\":\n        \"\"\"\n        Return the writer for saving the estimator.\n        \"\"\"\n        return SparkXGBWriter(self)\n\n    @classmethod\n    def read(cls) -> \"SparkXGBReader\":\n        \"\"\"\n        Return the reader for loading the estimator.\n        \"\"\"\n        return SparkXGBReader(cls)\n\n\nclass _SparkXGBModel(Model, _SparkXGBParams, MLReadable, MLWritable):\n    def __init__(\n        self,\n        xgb_sklearn_model: Optional[XGBModel] = None,\n        training_summary: Optional[XGBoostTrainingSummary] = None,\n    ) -> None:\n        super().__init__()\n        self._xgb_sklearn_model = xgb_sklearn_model\n        self.training_summary = training_summary\n\n    @classmethod\n    def _xgb_cls(cls) -> Type[XGBModel]:\n        raise NotImplementedError()\n\n    def get_booster(self) -> Booster:\n        \"\"\"\n        Return the `xgboost.core.Booster` instance.\n        \"\"\"\n        assert self._xgb_sklearn_model is not None\n        return self._xgb_sklearn_model.get_booster()\n\n    def get_feature_importances(\n        self, importance_type: str = \"weight\"\n    ) -> Dict[str, Union[float, List[float]]]:\n        \"\"\"Get feature importance of each feature.\n        Importance type can be defined as:\n\n        * 'weight': the number of times a feature is used to split the data across all trees.\n        * 'gain': the average gain across all splits the feature is used in.\n        * 'cover': the average coverage across all splits the feature is used in.\n        * 'total_gain': the total gain across all splits the feature is used in.\n        * 'total_cover': the total coverage across all splits the feature is used in.\n\n        Parameters\n        ----------\n        importance_type: str, default 'weight'\n            One of the importance types defined above.\n        \"\"\"\n        return self.get_booster().get_score(importance_type=importance_type)\n\n    def write(self) -> \"SparkXGBModelWriter\":\n        \"\"\"\n        Return the writer for saving the model.\n        \"\"\"\n        return SparkXGBModelWriter(self)\n\n    @classmethod\n    def read(cls) -> \"SparkXGBModelReader\":\n        \"\"\"\n        Return the reader for loading the model.\n        \"\"\"\n        return SparkXGBModelReader(cls)\n\n    def _get_feature_col(\n        self, dataset: DataFrame\n    ) -> Tuple[List[Column], Optional[List[str]]]:\n        \"\"\"XGBoost model trained with features_cols parameter can also predict\n        vector or array feature type. But first we need to check features_cols\n        and then featuresCol\n        \"\"\"\n        if self.getOrDefault(self.enable_sparse_data_optim):\n            feature_col_names = None\n            features_col = _get_unwrapped_vec_cols(\n                col(self.getOrDefault(self.featuresCol))\n            )\n            return features_col, feature_col_names\n\n        feature_col_names = self.getOrDefault(self.features_cols)\n        features_col = []\n        if feature_col_names and set(feature_col_names).issubset(set(dataset.columns)):\n            # The model is trained with features_cols and the predicted dataset\n            # also contains all the columns specified by features_cols.\n            features_col = _validate_and_convert_feature_col_as_float_col_list(\n                dataset, feature_col_names\n            )\n        else:\n            # 1. The model was trained by features_cols, but the dataset doesn't contain\n            #       all the columns specified by features_cols, so we need to check if\n            #       the dataframe has the featuresCol\n            # 2. The model was trained by featuresCol, and the predicted dataset must contain\n            #       featuresCol column.\n            feature_col_names = None\n            features_col.append(\n                _validate_and_convert_feature_col_as_array_col(\n                    dataset, self.getOrDefault(self.featuresCol)\n                )\n            )\n        return features_col, feature_col_names\n\n    def _get_pred_contrib_col_name(self) -> Optional[str]:\n        \"\"\"Return the pred_contrib_col col name\"\"\"\n        pred_contrib_col_name = None\n        if self._col_is_defined_not_empty(self.pred_contrib_col):\n            pred_contrib_col_name = self.getOrDefault(self.pred_contrib_col)\n\n        return pred_contrib_col_name\n\n    def _out_schema(self) -> Tuple[bool, str]:\n        \"\"\"Return the bool to indicate if it's a single prediction, true is single prediction,\n        and the returned type of the user-defined function. The value must\n        be a DDL-formatted type string.\"\"\"\n\n        if self._get_pred_contrib_col_name() is not None:\n            return False, f\"{pred.prediction} double, {pred.pred_contrib} array<double>\"\n\n        return True, \"double\"\n\n    def _get_predict_func(self) -> Callable:\n        \"\"\"Return the true prediction function which will be running on the executor side\"\"\"\n\n        predict_params = self._gen_predict_params_dict()\n        pred_contrib_col_name = self._get_pred_contrib_col_name()\n\n        def _predict(\n            model: XGBModel, X: ArrayLike, base_margin: Optional[ArrayLike]\n        ) -> Union[pd.DataFrame, pd.Series]:\n            data = {}\n            preds = model.predict(\n                X,\n                base_margin=base_margin,\n                validate_features=False,\n                **predict_params,\n            )\n            data[pred.prediction] = pd.Series(preds)\n\n            if pred_contrib_col_name is not None:\n                contribs = pred_contribs(model, X, base_margin)\n                data[pred.pred_contrib] = pd.Series(list(contribs))\n                return pd.DataFrame(data=data)\n\n            return data[pred.prediction]\n\n        return _predict\n\n    def _post_transform(self, dataset: DataFrame, pred_col: Column) -> DataFrame:\n        \"\"\"Post process of transform\"\"\"\n        prediction_col_name = self.getOrDefault(self.predictionCol)\n        single_pred, _ = self._out_schema()\n\n        if single_pred:\n            if prediction_col_name:\n                dataset = dataset.withColumn(prediction_col_name, pred_col)\n        else:\n            pred_struct_col = \"_prediction_struct\"\n            dataset = dataset.withColumn(pred_struct_col, pred_col)\n\n            if prediction_col_name:\n                dataset = dataset.withColumn(\n                    prediction_col_name, getattr(col(pred_struct_col), pred.prediction)\n                )\n\n            pred_contrib_col_name = self._get_pred_contrib_col_name()\n            if pred_contrib_col_name is not None:\n                dataset = dataset.withColumn(\n                    pred_contrib_col_name,\n                    array_to_vector(getattr(col(pred_struct_col), pred.pred_contrib)),\n                )\n\n            dataset = dataset.drop(pred_struct_col)\n        return dataset\n\n    def _run_on_gpu(self) -> bool:\n        \"\"\"If gpu is used to do the prediction according to the parameters\n        and spark configurations\"\"\"\n\n        use_gpu_by_params = super()._run_on_gpu()\n\n        if _is_local(_get_spark_session().sparkContext):\n            # if it's local model, no need to check the spark configurations\n            return use_gpu_by_params\n\n        gpu_per_task = (\n            _get_spark_session()\n            .sparkContext.getConf()\n            .get(\"spark.task.resource.gpu.amount\")\n        )\n\n        # User don't set gpu configurations, just use cpu\n        if gpu_per_task is None:\n            if use_gpu_by_params:\n                get_logger(_LOG_TAG).warning(\n                    \"Do the prediction on the CPUs since no gpu configurations are set\"\n                )\n            return False\n\n        # User already sets the gpu configurations.\n        return use_gpu_by_params\n\n    def _transform(self, dataset: DataFrame) -> DataFrame:\n        # pylint: disable=too-many-statements, too-many-locals\n        # Save xgb_sklearn_model and predict_params to be local variable\n        # to avoid the `self` object to be pickled to remote.\n        xgb_sklearn_model = self._xgb_sklearn_model\n\n        base_margin_col = None\n        if self._col_is_defined_not_empty(self.base_margin_col):\n            base_margin_col = col(self.getOrDefault(self.base_margin_col)).alias(\n                alias.margin\n            )\n        has_base_margin = base_margin_col is not None\n\n        features_col, feature_col_names = self._get_feature_col(dataset)\n        enable_sparse_data_optim = self.getOrDefault(self.enable_sparse_data_optim)\n\n        predict_func = self._get_predict_func()\n\n        _, schema = self._out_schema()\n\n        is_local = _is_local(_get_spark_session().sparkContext)\n        run_on_gpu = self._run_on_gpu()\n\n        log_level = get_logger_level(_LOG_TAG)\n\n        @pandas_udf(schema)  # type: ignore[call-overload]\n        def predict_udf(iterator: Iterator[pd.DataFrame]) -> Iterator[pd.Series]:\n            assert xgb_sklearn_model is not None\n            model = xgb_sklearn_model\n\n            from pyspark import TaskContext\n\n            context = TaskContext.get()\n            assert context is not None\n\n            dev_ordinal = -1\n\n            msg = \"Do the inference on the CPUs\"\n            if run_on_gpu:\n                if is_cudf_available() and is_cupy_available():\n                    if is_local:\n                        cp = import_cupy()\n\n                        total_gpus = cp.cuda.runtime.getDeviceCount()\n                        if total_gpus > 0:\n                            partition_id = context.partitionId()\n                            # For transform local mode, default the dev_ordinal to\n                            # (partition id) % gpus.\n                            dev_ordinal = partition_id % total_gpus\n                    else:\n                        dev_ordinal = _get_gpu_id(context)\n\n                    if dev_ordinal >= 0:\n                        device = \"cuda:\" + str(dev_ordinal)\n                        msg = \"Do the inference with device: \" + device\n                        model.set_params(device=device)\n                    else:\n                        msg = \"Couldn't get the correct gpu id, fallback the inference on the CPUs\"\n                else:\n                    msg = \"CUDF or Cupy is unavailable, fallback the inference on the CPUs\"\n\n            if context.partitionId() == 0:\n                get_logger(_LOG_TAG, log_level).info(msg)\n\n            def to_gpu_if_possible(data: ArrayLike) -> ArrayLike:\n                \"\"\"Move the data to gpu if possible\"\"\"\n                if dev_ordinal >= 0:\n                    import cudf\n                    import cupy as cp\n\n                    # We must set the device after import cudf, which will change the device id to 0\n                    # See https://github.com/rapidsai/cudf/issues/11386\n                    cp.cuda.runtime.setDevice(dev_ordinal)  # pylint: disable=I1101\n                    df = cudf.DataFrame(data)\n                    del data\n                    return df\n                return data\n\n            for data in iterator:\n                if enable_sparse_data_optim:\n                    X = _read_csr_matrix_from_unwrapped_spark_vec(data)\n                else:\n                    if feature_col_names is not None:\n                        tmp: ArrayLike = data[feature_col_names]\n                    else:\n                        tmp = stack_series(data[alias.data])\n                    X = to_gpu_if_possible(tmp)\n\n                if has_base_margin:\n                    base_margin = to_gpu_if_possible(data[alias.margin])\n                else:\n                    base_margin = None\n\n                yield predict_func(model, X, base_margin)\n\n        if has_base_margin:\n            assert base_margin_col is not None\n            pred_col = predict_udf(struct(*features_col, base_margin_col))\n        else:\n            pred_col = predict_udf(struct(*features_col))\n\n        return self._post_transform(dataset, pred_col)\n\n\nclass _ClassificationModel(  # pylint: disable=abstract-method\n    _SparkXGBModel, HasProbabilityCol, HasRawPredictionCol, HasContribPredictionCol\n):\n    \"\"\"\n    The model returned by :func:`xgboost.spark.SparkXGBClassifier.fit`\n\n    .. Note:: This API is experimental.\n    \"\"\"\n\n    def _out_schema(self) -> Tuple[bool, str]:\n        schema = (\n            f\"{pred.raw_prediction} array<double>, {pred.prediction} double,\"\n            f\" {pred.probability} array<double>\"\n        )\n        if self._get_pred_contrib_col_name() is not None:\n            # We will force setting strict_shape to True when predicting contribs,\n            # So, it will also output 3-D shape result.\n            schema = f\"{schema}, {pred.pred_contrib} array<array<double>>\"\n\n        return False, schema\n\n    def _get_predict_func(self) -> Callable:\n        predict_params = self._gen_predict_params_dict()\n        pred_contrib_col_name = self._get_pred_contrib_col_name()\n\n        def transform_margin(margins: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:\n            if margins.ndim == 1:\n                # binomial case\n                classone_probs = expit(margins)\n                classzero_probs = 1.0 - classone_probs\n                raw_preds = np.vstack((-margins, margins)).transpose()\n                class_probs = np.vstack((classzero_probs, classone_probs)).transpose()\n            else:\n                # multinomial case\n                raw_preds = margins\n                class_probs = softmax(raw_preds, axis=1)\n            return raw_preds, class_probs\n\n        def _predict(\n            model: XGBModel, X: ArrayLike, base_margin: Optional[np.ndarray]\n        ) -> Union[pd.DataFrame, pd.Series]:\n            margins = model.predict(\n                X,\n                base_margin=base_margin,\n                output_margin=True,\n                validate_features=False,\n                **predict_params,\n            )\n            raw_preds, class_probs = transform_margin(margins)\n\n            # It seems that they use argmax of class probs,\n            # not of margin to get the prediction (Note: scala implementation)\n            preds = np.argmax(class_probs, axis=1)\n            result: Dict[str, pd.Series] = {\n                pred.raw_prediction: pd.Series(list(raw_preds)),\n                pred.prediction: pd.Series(preds),\n                pred.probability: pd.Series(list(class_probs)),\n            }\n\n            if pred_contrib_col_name is not None:\n                contribs = pred_contribs(model, X, base_margin, strict_shape=True)\n                result[pred.pred_contrib] = pd.Series(list(contribs.tolist()))\n\n            return pd.DataFrame(data=result)\n\n        return _predict\n\n    def _post_transform(self, dataset: DataFrame, pred_col: Column) -> DataFrame:\n        pred_struct_col = \"_prediction_struct\"\n        dataset = dataset.withColumn(pred_struct_col, pred_col)\n\n        raw_prediction_col_name = self.getOrDefault(self.rawPredictionCol)\n        if raw_prediction_col_name:\n            dataset = dataset.withColumn(\n                raw_prediction_col_name,\n                array_to_vector(getattr(col(pred_struct_col), pred.raw_prediction)),\n            )\n\n        prediction_col_name = self.getOrDefault(self.predictionCol)\n        if prediction_col_name:\n            dataset = dataset.withColumn(\n                prediction_col_name, getattr(col(pred_struct_col), pred.prediction)\n            )\n\n        probability_col_name = self.getOrDefault(self.probabilityCol)\n        if probability_col_name:\n            dataset = dataset.withColumn(\n                probability_col_name,\n                array_to_vector(getattr(col(pred_struct_col), pred.probability)),\n            )\n\n        pred_contrib_col_name = self._get_pred_contrib_col_name()\n        if pred_contrib_col_name is not None:\n            dataset = dataset.withColumn(\n                pred_contrib_col_name,\n                getattr(col(pred_struct_col), pred.pred_contrib),\n            )\n\n        return dataset.drop(pred_struct_col)\n\n\nclass _SparkXGBSharedReadWrite:\n    @staticmethod\n    def saveMetadata(\n        instance: Union[_SparkXGBEstimator, _SparkXGBModel],\n        path: str,\n        sc: SparkContext,\n        logger: logging.Logger,\n        extraMetadata: Optional[Dict[str, Any]] = None,\n    ) -> None:\n        \"\"\"\n        Save the metadata of an xgboost.spark._SparkXGBEstimator or\n        xgboost.spark._SparkXGBModel.\n        \"\"\"\n        instance._validate_params()\n        skipParams = [\"callbacks\", \"xgb_model\", \"coll_cfg\"]\n        jsonParams = {}\n        for p, v in instance._paramMap.items():  # pylint: disable=protected-access\n            if p.name not in skipParams:\n                jsonParams[p.name] = v\n\n        extraMetadata = extraMetadata or {}\n        callbacks = instance.getOrDefault(\"callbacks\")\n        if callbacks is not None:\n            logger.warning(\n                \"The callbacks parameter is saved using cloudpickle and it \"\n                \"is not a fully self-contained format. It may fail to load \"\n                \"with different versions of dependencies.\"\n            )\n            serialized_callbacks = base64.encodebytes(\n                cloudpickle.dumps(callbacks)\n            ).decode(\"ascii\")\n            extraMetadata[\"serialized_callbacks\"] = serialized_callbacks\n        init_booster = instance.getOrDefault(\"xgb_model\")\n        if init_booster is not None:\n            extraMetadata[\"init_booster\"] = _INIT_BOOSTER_SAVE_PATH\n\n        if instance.isDefined(\"coll_cfg\"):\n            conf: Config = instance.getOrDefault(\"coll_cfg\")\n            if conf is not None:\n                extraMetadata[\"coll_cfg\"] = {\n                    k: v for k, v in asdict(conf).items() if not callable(v)\n                }\n            if callable(conf.worker_port):\n                logger.warning(\"The `worker_port` is not serialized.\")\n\n        DefaultParamsWriter.saveMetadata(\n            instance, path, sc, extraMetadata=extraMetadata, paramMap=jsonParams\n        )\n        if init_booster is not None:\n            ser_init_booster = serialize_booster(init_booster)\n            save_path = os.path.join(path, _INIT_BOOSTER_SAVE_PATH)\n            _get_spark_session().createDataFrame(\n                [(ser_init_booster,)], [\"init_booster\"]\n            ).write.parquet(save_path)\n\n    @staticmethod\n    def loadMetadataAndInstance(\n        pyspark_xgb_cls: Union[Type[_SparkXGBEstimator], Type[_SparkXGBModel]],\n        path: str,\n        sc: SparkContext,\n        logger: logging.Logger,\n    ) -> Tuple[Dict[str, Any], Union[_SparkXGBEstimator, _SparkXGBModel]]:\n        \"\"\"\n        Load the metadata and the instance of an xgboost.spark._SparkXGBEstimator or\n        xgboost.spark._SparkXGBModel.\n\n        :return: a tuple of (metadata, instance)\n        \"\"\"\n        metadata = DefaultParamsReader.loadMetadata(\n            path, sc, expectedClassName=get_class_name(pyspark_xgb_cls)\n        )\n        pyspark_xgb = pyspark_xgb_cls()\n        DefaultParamsReader.getAndSetParams(pyspark_xgb, metadata)\n\n        if \"serialized_callbacks\" in metadata:\n            serialized_callbacks = metadata[\"serialized_callbacks\"]\n            try:\n                callbacks = cloudpickle.loads(\n                    base64.decodebytes(serialized_callbacks.encode(\"ascii\"))\n                )\n                pyspark_xgb.set(pyspark_xgb.callbacks, callbacks)  # type: ignore[union-attr]\n            except Exception as e:  # pylint: disable=W0703\n                logger.warning(\n                    f\"Fails to load the callbacks param due to {e}. Please set the \"\n                    \"callbacks param manually for the loaded estimator.\"\n                )\n        if \"coll_cfg\" in metadata:\n            pyspark_xgb.set_coll_cfg(Config(**metadata[\"coll_cfg\"]))\n\n        if \"init_booster\" in metadata:\n            load_path = os.path.join(path, metadata[\"init_booster\"])\n            ser_init_booster = (\n                _get_spark_session().read.parquet(load_path).collect()[0].init_booster\n            )\n            init_booster = deserialize_booster(ser_init_booster)\n            pyspark_xgb.set(pyspark_xgb.xgb_model, init_booster)  # type: ignore[union-attr]\n\n        pyspark_xgb._resetUid(metadata[\"uid\"])  # pylint: disable=protected-access\n        return metadata, pyspark_xgb\n\n\nclass SparkXGBWriter(MLWriter):\n    \"\"\"\n    Spark Xgboost estimator writer.\n    \"\"\"\n\n    def __init__(self, instance: \"_SparkXGBEstimator\") -> None:\n        super().__init__()\n        self.instance = instance\n        self.logger = get_logger(self.__class__.__name__, level=\"WARN\")\n\n    def saveImpl(self, path: str) -> None:\n        \"\"\"\n        save model.\n        \"\"\"\n        _SparkXGBSharedReadWrite.saveMetadata(self.instance, path, self.sc, self.logger)\n\n\nclass SparkXGBReader(MLReader):\n    \"\"\"\n    Spark Xgboost estimator reader.\n    \"\"\"\n\n    def __init__(self, cls: Type[\"_SparkXGBEstimator\"]) -> None:\n        super().__init__()\n        self.cls = cls\n        self.logger = get_logger(self.__class__.__name__, level=\"WARN\")\n\n    def load(self, path: str) -> \"_SparkXGBEstimator\":\n        \"\"\"\n        load model.\n        \"\"\"\n        _, pyspark_xgb = _SparkXGBSharedReadWrite.loadMetadataAndInstance(\n            self.cls, path, self.sc, self.logger\n        )\n        return cast(\"_SparkXGBEstimator\", pyspark_xgb)\n\n\nclass SparkXGBModelWriter(MLWriter):\n    \"\"\"\n    Spark Xgboost model writer.\n    \"\"\"\n\n    def __init__(self, instance: _SparkXGBModel) -> None:\n        super().__init__()\n        self.instance = instance\n        self.logger = get_logger(self.__class__.__name__, level=\"WARN\")\n\n    def saveImpl(self, path: str) -> None:\n        \"\"\"\n        Save metadata and model for a :py:class:`_SparkXGBModel`\n        - save metadata to path/metadata\n        - save model to path/model.json\n        \"\"\"\n        xgb_model = self.instance._xgb_sklearn_model\n        assert xgb_model is not None\n        _SparkXGBSharedReadWrite.saveMetadata(self.instance, path, self.sc, self.logger)\n        model_save_path = os.path.join(path, \"model\")\n        booster = xgb_model.get_booster().save_raw(\"json\").decode(\"utf-8\")\n        booster_chunks = []\n\n        for offset in range(0, len(booster), _MODEL_CHUNK_SIZE):\n            booster_chunks.append(booster[offset : offset + _MODEL_CHUNK_SIZE])\n\n        _get_spark_session().sparkContext.parallelize(booster_chunks, 1).saveAsTextFile(\n            model_save_path\n        )\n\n\nclass SparkXGBModelReader(MLReader):\n    \"\"\"\n    Spark Xgboost model reader.\n    \"\"\"\n\n    def __init__(self, cls: Type[\"_SparkXGBModel\"]) -> None:\n        super().__init__()\n        self.cls = cls\n        self.logger = get_logger(self.__class__.__name__, level=\"WARN\")\n\n    def load(self, path: str) -> \"_SparkXGBModel\":\n        \"\"\"\n        Load metadata and model for a :py:class:`_SparkXGBModel`\n\n        :return: SparkXGBRegressorModel or SparkXGBClassifierModel instance\n        \"\"\"\n        _, py_model = _SparkXGBSharedReadWrite.loadMetadataAndInstance(\n            self.cls, path, self.sc, self.logger\n        )\n        py_model = cast(\"_SparkXGBModel\", py_model)\n\n        xgb_sklearn_params = py_model._gen_xgb_params_dict(\n            gen_xgb_sklearn_estimator_param=True\n        )\n        model_load_path = os.path.join(path, \"model\")\n\n        ser_xgb_model = \"\".join(\n            _get_spark_session().sparkContext.textFile(model_load_path).collect()\n        )\n\n        def create_xgb_model() -> \"XGBModel\":\n            return self.cls._xgb_cls()(**xgb_sklearn_params)\n\n        xgb_model = deserialize_xgb_model(ser_xgb_model, create_xgb_model)\n        py_model._xgb_sklearn_model = xgb_model\n        return py_model\n"
  },
  {
    "path": "python-package/xgboost/spark/data.py",
    "content": "# pylint: disable=protected-access\n\"\"\"Utilities for processing spark partitions.\"\"\"\n\nfrom collections import defaultdict, namedtuple\nfrom typing import Any, Callable, Dict, Iterator, List, Optional, Sequence, Tuple, Union\n\nimport numpy as np\nimport pandas as pd\nfrom scipy.sparse import csr_matrix\n\nfrom .._typing import ArrayLike\nfrom ..compat import concat\nfrom ..core import DataIter, DMatrix, QuantileDMatrix\nfrom ..sklearn import XGBModel\nfrom .utils import get_logger\n\n\ndef stack_series(series: pd.Series) -> np.ndarray:\n    \"\"\"Stack a series of arrays.\"\"\"\n    array = series.to_numpy(copy=False)\n    array = np.stack(array)  # type: ignore[arg-type]\n    return array\n\n\n# Global constant for defining column alias shared between estimator and data\n# processing procedures.\nAlias = namedtuple(\"Alias\", (\"data\", \"label\", \"weight\", \"margin\", \"valid\", \"qid\"))\nalias = Alias(\"values\", \"label\", \"weight\", \"baseMargin\", \"validationIndicator\", \"qid\")\n\n\ndef concat_or_none(seq: Optional[Sequence[np.ndarray]]) -> Optional[np.ndarray]:\n    \"\"\"Concatenate the data if it's not None.\"\"\"\n    if seq:\n        return concat(seq)\n    return None\n\n\ndef cache_partitions(\n    iterator: Iterator[pd.DataFrame], append: Callable[[pd.DataFrame, str, bool], None]\n) -> None:\n    \"\"\"Extract partitions from pyspark iterator. `append` is a user defined function for\n    accepting new partition.\"\"\"\n\n    def make_blob(part: pd.DataFrame, is_valid: bool) -> None:\n        append(part, alias.data, is_valid)\n        append(part, alias.label, is_valid)\n        append(part, alias.weight, is_valid)\n        append(part, alias.margin, is_valid)\n        append(part, alias.qid, is_valid)\n\n    has_validation: Optional[bool] = None\n\n    for part in iterator:\n        if has_validation is None:\n            has_validation = alias.valid in part.columns\n        if has_validation is True:\n            assert alias.valid in part.columns\n\n        if has_validation:\n            train = part.loc[~part[alias.valid], :]\n            valid = part.loc[part[alias.valid], :]\n        else:\n            train, valid = part, None\n\n        make_blob(train, False)\n        if valid is not None:\n            make_blob(valid, True)\n\n\nclass PartIter(DataIter):\n    \"\"\"Iterator for creating Quantile DMatrix from partitions.\"\"\"\n\n    def __init__(\n        self, data: Dict[str, List], device_id: Optional[int], **kwargs: Any\n    ) -> None:\n        self._iter = 0\n        self._device_id = device_id\n        self._data = data\n        self._kwargs = kwargs\n\n        super().__init__(release_data=True)\n\n    def _fetch(self, data: Optional[Sequence[pd.DataFrame]]) -> Optional[pd.DataFrame]:\n        if not data:\n            return None\n\n        if self._device_id is not None:\n            import cudf\n            import cupy as cp\n\n            # We must set the device after import cudf, which will change the device id to 0\n            # See https://github.com/rapidsai/cudf/issues/11386\n            cp.cuda.runtime.setDevice(self._device_id)  # pylint: disable=I1101\n            return cudf.DataFrame(data[self._iter])\n\n        return data[self._iter]\n\n    def next(self, input_data: Callable) -> bool:\n        if self._iter == len(self._data[alias.data]):\n            return False\n        input_data(\n            data=self._fetch(self._data[alias.data]),\n            label=self._fetch(self._data.get(alias.label, None)),\n            weight=self._fetch(self._data.get(alias.weight, None)),\n            base_margin=self._fetch(self._data.get(alias.margin, None)),\n            qid=self._fetch(self._data.get(alias.qid, None)),\n            **self._kwargs,\n        )\n        self._iter += 1\n        return True\n\n    def reset(self) -> None:\n        self._iter = 0\n\n\ndef _read_csr_matrix_from_unwrapped_spark_vec(part: pd.DataFrame) -> csr_matrix:\n    # variables for constructing csr_matrix\n    csr_indices_list, csr_indptr_list, csr_values_list = [], [0], []\n\n    n_features = 0\n\n    for vec_type, vec_size_, vec_indices, vec_values in zip(\n        part.featureVectorType,\n        part.featureVectorSize,\n        part.featureVectorIndices,\n        part.featureVectorValues,\n    ):\n        if vec_type == 0:\n            # sparse vector\n            vec_size = int(vec_size_)\n            csr_indices = vec_indices\n            csr_values = vec_values\n        else:\n            # dense vector\n            # Note: According to spark ML VectorUDT format,\n            # when type field is 1, the size field is also empty.\n            # we need to check the values field to get vector length.\n            vec_size = len(vec_values)\n            csr_indices = np.arange(vec_size, dtype=np.int32)\n            csr_values = vec_values\n\n        if n_features == 0:\n            n_features = vec_size\n        assert n_features == vec_size\n\n        csr_indices_list.append(csr_indices)\n        csr_indptr_list.append(csr_indptr_list[-1] + len(csr_indices))\n        csr_values_list.append(csr_values)\n\n    csr_indptr_arr = np.array(csr_indptr_list)\n    csr_indices_arr = np.concatenate(csr_indices_list)\n    csr_values_arr = np.concatenate(csr_values_list)\n\n    return csr_matrix(\n        (csr_values_arr, csr_indices_arr, csr_indptr_arr), shape=(len(part), n_features)\n    )\n\n\ndef make_qdm(\n    data: Dict[str, List[np.ndarray]],\n    dev_ordinal: Optional[int],\n    meta: Dict[str, Any],\n    ref: Optional[DMatrix],\n    params: Dict[str, Any],\n) -> DMatrix:\n    \"\"\"Handle empty partition for QuantileDMatrix.\"\"\"\n    if not data:\n        return QuantileDMatrix(np.empty((0, 0)), ref=ref)\n    it = PartIter(data, dev_ordinal, **meta)\n    m = QuantileDMatrix(it, **params, ref=ref)\n    return m\n\n\ndef create_dmatrix_from_partitions(  # pylint: disable=too-many-arguments\n    *,\n    iterator: Iterator[pd.DataFrame],\n    feature_cols: Optional[Sequence[str]],\n    dev_ordinal: Optional[int],\n    use_qdm: bool,\n    kwargs: Dict[str, Any],  # use dict to make sure this parameter is passed.\n    enable_sparse_data_optim: bool,\n    has_validation_col: bool,\n) -> Tuple[DMatrix, Optional[DMatrix]]:\n    \"\"\"Create DMatrix from spark data partitions.\n\n    Parameters\n    ----------\n    iterator :\n        Pyspark partition iterator.\n    feature_cols:\n        A sequence of feature names, used only when rapids plugin is enabled.\n    dev_ordinal:\n        Device ordinal, used when GPU is enabled.\n    use_qdm :\n        Whether QuantileDMatrix should be used instead of DMatrix.\n    kwargs :\n        Metainfo for DMatrix.\n    enable_sparse_data_optim :\n        Whether sparse data should be unwrapped\n    has_validation:\n        Whether there's validation data.\n\n    Returns\n    -------\n    Training DMatrix and an optional validation DMatrix.\n    \"\"\"\n    # pylint: disable=too-many-locals, too-many-statements\n    train_data: Dict[str, List[np.ndarray]] = defaultdict(list)\n    valid_data: Dict[str, List[np.ndarray]] = defaultdict(list)\n\n    n_features: int = 0\n\n    def append_m(part: pd.DataFrame, name: str, is_valid: bool) -> None:\n        nonlocal n_features\n        if name == alias.data or name in part.columns:\n            if (\n                name == alias.data\n                and feature_cols is not None\n                and part[feature_cols].shape[0] > 0  # guard against empty partition\n            ):\n                array: Optional[np.ndarray] = part[feature_cols]\n            elif part[name].shape[0] > 0:\n                array = part[name]\n                if name == alias.data:\n                    # For the array/vector typed case.\n                    array = stack_series(array)\n            else:\n                array = None\n\n            if name == alias.data and array is not None:\n                if n_features == 0:\n                    n_features = array.shape[1]\n                assert n_features == array.shape[1]\n\n            if array is None:\n                return\n\n            if is_valid:\n                valid_data[name].append(array)\n            else:\n                train_data[name].append(array)\n\n    def append_m_sparse(part: pd.DataFrame, name: str, is_valid: bool) -> None:\n        nonlocal n_features\n\n        if name == alias.data or name in part.columns:\n            if name == alias.data:\n                array = _read_csr_matrix_from_unwrapped_spark_vec(part)\n                if n_features == 0:\n                    n_features = array.shape[1]\n                assert n_features == array.shape[1]\n            else:\n                array = part[name]\n\n            if is_valid:\n                valid_data[name].append(array)\n            else:\n                train_data[name].append(array)\n\n    def make(values: Dict[str, List[np.ndarray]], kwargs: Dict[str, Any]) -> DMatrix:\n        if len(values) == 0:\n            get_logger(\"XGBoostPySpark\").warning(\n                \"Detected an empty partition in the training data. Consider to enable\"\n                \" repartition_random_shuffle\"\n            )\n            # We must construct an empty DMatrix to bypass the AllReduce\n            return DMatrix(data=np.empty((0, 0)), **kwargs)\n\n        data = concat_or_none(values[alias.data])\n        label = concat_or_none(values.get(alias.label, None))\n        weight = concat_or_none(values.get(alias.weight, None))\n        margin = concat_or_none(values.get(alias.margin, None))\n        qid = concat_or_none(values.get(alias.qid, None))\n        return DMatrix(\n            data=data, label=label, weight=weight, base_margin=margin, qid=qid, **kwargs\n        )\n\n    if enable_sparse_data_optim:\n        append_fn = append_m_sparse\n        assert \"missing\" in kwargs and kwargs[\"missing\"] == 0.0\n    else:\n        append_fn = append_m\n\n    def split_params() -> Tuple[Dict[str, Any], Dict[str, Union[int, float, bool]]]:\n        # FIXME(jiamingy): we really need a better way to bridge distributed frameworks\n        # to XGBoost native interface and prevent scattering parameters like this.\n\n        # parameters that are not related to data.\n        non_data_keys = (\n            \"max_bin\",\n            \"missing\",\n            \"silent\",\n            \"nthread\",\n            \"enable_categorical\",\n        )\n        non_data_params = {}\n        meta = {}\n        for k, v in kwargs.items():\n            if k in non_data_keys:\n                non_data_params[k] = v\n            else:\n                meta[k] = v\n        return meta, non_data_params\n\n    meta, params = split_params()\n\n    if feature_cols is not None and use_qdm:\n        cache_partitions(iterator, append_fn)\n        dtrain: DMatrix = make_qdm(train_data, dev_ordinal, meta, None, params)\n    elif feature_cols is not None and not use_qdm:\n        cache_partitions(iterator, append_fn)\n        dtrain = make(train_data, kwargs)\n    elif feature_cols is None and use_qdm:\n        cache_partitions(iterator, append_fn)\n        dtrain = make_qdm(train_data, dev_ordinal, meta, None, params)\n    else:\n        cache_partitions(iterator, append_fn)\n        dtrain = make(train_data, kwargs)\n\n    # Using has_validation_col here to indicate if there is validation col\n    # instead of getting it from iterator, since the iterator may be empty\n    # in some special case. That is to say, we must ensure every worker\n    # construct DMatrix even there is no data since we need to ensure every\n    # worker do the AllReduce when constructing DMatrix, or else it may hang\n    # forever.\n    if has_validation_col:\n        if use_qdm:\n            dvalid: Optional[DMatrix] = make_qdm(\n                valid_data, dev_ordinal, meta, dtrain, params\n            )\n        else:\n            dvalid = make(valid_data, kwargs) if has_validation_col else None\n    else:\n        dvalid = None\n\n    if dvalid is not None:\n        assert dvalid.num_col() == dtrain.num_col()\n\n    return dtrain, dvalid\n\n\ndef pred_contribs(\n    model: XGBModel,\n    data: ArrayLike,\n    base_margin: Optional[ArrayLike] = None,\n    strict_shape: bool = False,\n) -> np.ndarray:\n    \"\"\"Predict contributions with data with the full model.\"\"\"\n    iteration_range = model._get_iteration_range(None)\n    data_dmatrix = DMatrix(\n        data,\n        base_margin=base_margin,\n        missing=model.missing,\n        nthread=model.n_jobs,\n        feature_types=model.feature_types,\n        feature_weights=model.feature_weights,\n        enable_categorical=model.enable_categorical,\n    )\n    return model.get_booster().predict(\n        data_dmatrix,\n        pred_contribs=True,\n        validate_features=False,\n        iteration_range=iteration_range,\n        strict_shape=strict_shape,\n    )\n"
  },
  {
    "path": "python-package/xgboost/spark/estimator.py",
    "content": "\"\"\"Xgboost pyspark integration submodule for estimator API.\"\"\"\n\n# pylint: disable=protected-access, no-member\n# pylint: disable=unused-argument, too-many-locals\n\nfrom typing import Any, List, Optional, Type, Union\n\nimport numpy as np\nfrom pyspark import keyword_only\nfrom pyspark.ml.param import Param, Params\nfrom pyspark.ml.param.shared import HasProbabilityCol, HasRawPredictionCol\n\nfrom ..collective import Config\nfrom ..sklearn import XGBClassifier, XGBRanker, XGBRegressor\nfrom .core import (  # type: ignore[attr-defined]\n    _ClassificationModel,\n    _SparkXGBEstimator,\n    _SparkXGBModel,\n)\nfrom .utils import get_class_name\n\n\ndef _set_pyspark_xgb_cls_param_attrs(\n    estimator: Type[_SparkXGBEstimator], model: Type[_SparkXGBModel]\n) -> None:\n    \"\"\"This function automatically infer to xgboost parameters and set them\n    into corresponding pyspark estimators and models\"\"\"\n    params_dict = estimator._get_xgb_params_default()\n\n    def param_value_converter(v: Any) -> Any:\n        if isinstance(v, np.generic):\n            # convert numpy scalar values to corresponding python scalar values\n            return np.array(v).item()\n        if isinstance(v, dict):\n            return {k: param_value_converter(nv) for k, nv in v.items()}\n        if isinstance(v, list):\n            return [param_value_converter(nv) for nv in v]\n        return v\n\n    def set_param_attrs(attr_name: str, param: Param) -> None:\n        param.typeConverter = param_value_converter\n        setattr(estimator, attr_name, param)\n        setattr(model, attr_name, param)\n\n    for name in params_dict.keys():\n        doc = (\n            f\"Refer to XGBoost doc of \"\n            f\"{get_class_name(estimator._xgb_cls())} for this param {name}\"\n        )\n\n        param_obj: Param = Param(Params._dummy(), name=name, doc=doc)\n        set_param_attrs(name, param_obj)\n\n    fit_params_dict = estimator._get_fit_params_default()\n    for name in fit_params_dict.keys():\n        doc = (\n            f\"Refer to XGBoost doc of {get_class_name(estimator._xgb_cls())}\"\n            f\".fit() for this param {name}\"\n        )\n        if name == \"callbacks\":\n            doc += (\n                \"The callbacks can be arbitrary functions. It is saved using cloudpickle \"\n                \"which is not a fully self-contained format. It may fail to load with \"\n                \"different versions of dependencies.\"\n            )\n        param_obj = Param(Params._dummy(), name=name, doc=doc)\n        set_param_attrs(name, param_obj)\n\n    predict_params_dict = estimator._get_predict_params_default()\n    for name in predict_params_dict.keys():\n        doc = (\n            f\"Refer to XGBoost doc of {get_class_name(estimator._xgb_cls())}\"\n            f\".predict() for this param {name}\"\n        )\n        param_obj = Param(Params._dummy(), name=name, doc=doc)\n        set_param_attrs(name, param_obj)\n\n\nclass SparkXGBRegressor(_SparkXGBEstimator):\n    \"\"\"SparkXGBRegressor is a PySpark ML estimator. It implements the XGBoost regression\n    algorithm based on XGBoost python library, and it can be used in PySpark Pipeline\n    and PySpark ML meta algorithms like\n    - :py:class:`~pyspark.ml.tuning.CrossValidator`/\n    - :py:class:`~pyspark.ml.tuning.TrainValidationSplit`/\n    - :py:class:`~pyspark.ml.classification.OneVsRest`\n\n    SparkXGBRegressor automatically supports most of the parameters in\n    :py:class:`xgboost.XGBRegressor` constructor and most of the parameters used in\n    :py:meth:`xgboost.XGBRegressor.fit` and :py:meth:`xgboost.XGBRegressor.predict`\n    method.\n\n    To enable GPU support, set `device` to `cuda` or `gpu`.\n\n    SparkXGBRegressor doesn't support setting `base_margin` explicitly as well, but\n    support another param called `base_margin_col`. see doc below for more details.\n\n    SparkXGBRegressor doesn't support `validate_features` and `output_margin` param.\n\n    SparkXGBRegressor doesn't support setting `nthread` xgboost param, instead, the\n    `nthread` param for each xgboost worker will be set equal to `spark.task.cpus`\n    config value.\n\n\n    Parameters\n    ----------\n\n    features_col:\n        When the value is string, it requires the features column name to be vector type.\n        When the value is a list of string, it requires all the feature columns to be numeric types.\n    label_col:\n        Label column name. Default to \"label\".\n    prediction_col:\n        Prediction column name. Default to \"prediction\"\n    pred_contrib_col:\n        Contribution prediction column name.\n    validation_indicator_col:\n        For params related to `xgboost.XGBRegressor` training with\n        evaluation dataset's supervision,\n        set :py:attr:`xgboost.spark.SparkXGBRegressor.validation_indicator_col`\n        parameter instead of setting the `eval_set` parameter in `xgboost.XGBRegressor`\n        fit method.\n    weight_col:\n        To specify the weight of the training and validation dataset, set\n        :py:attr:`xgboost.spark.SparkXGBRegressor.weight_col` parameter instead of setting\n        `sample_weight` and `sample_weight_eval_set` parameter in `xgboost.XGBRegressor`\n        fit method.\n    base_margin_col:\n        To specify the base margins of the training and validation\n        dataset, set :py:attr:`xgboost.spark.SparkXGBRegressor.base_margin_col` parameter\n        instead of setting `base_margin` and `base_margin_eval_set` in the\n        `xgboost.XGBRegressor` fit method.\n\n    num_workers:\n        How many XGBoost workers to be used to train.\n        Each XGBoost worker corresponds to one spark task.\n    device:\n\n        .. versionadded:: 2.0.0\n\n        Device for XGBoost workers, available options are `cpu`, `cuda`, and `gpu`.\n\n    force_repartition:\n        Boolean value to specify if forcing the input dataset to be repartitioned\n        before XGBoost training.\n    repartition_random_shuffle:\n        Boolean value to specify if randomly shuffling the dataset when repartitioning is required.\n    enable_sparse_data_optim:\n        Boolean value to specify if enabling sparse data optimization, if True,\n        Xgboost DMatrix object will be constructed from sparse matrix instead of\n        dense matrix.\n    launch_tracker_on_driver:\n        Boolean value to indicate whether the tracker should be launched on the driver side or\n        the executor side.\n    coll_cfg:\n        The collective configuration. See :py:class:`~xgboost.collective.Config`\n\n    kwargs:\n        A dictionary of xgboost parameters, please refer to\n        https://xgboost.readthedocs.io/en/stable/parameter.html\n\n    Note\n    ----\n\n    The Parameters chart above contains parameters that need special handling.\n    For a full list of parameters, see entries with `Param(parent=...` below.\n\n    This API is experimental.\n\n\n    Examples\n    --------\n\n    >>> from xgboost.spark import SparkXGBRegressor\n    >>> from pyspark.ml.linalg import Vectors\n    >>> df_train = spark.createDataFrame([\n    ...     (Vectors.dense(1.0, 2.0, 3.0), 0, False, 1.0),\n    ...     (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 1, False, 2.0),\n    ...     (Vectors.dense(4.0, 5.0, 6.0), 2, True, 1.0),\n    ...     (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 3, True, 2.0),\n    ... ], [\"features\", \"label\", \"isVal\", \"weight\"])\n    >>> df_test = spark.createDataFrame([\n    ...     (Vectors.dense(1.0, 2.0, 3.0), ),\n    ...     (Vectors.sparse(3, {1: 1.0, 2: 5.5}), )\n    ... ], [\"features\"])\n    >>> xgb_regressor = SparkXGBRegressor(max_depth=5, missing=0.0,\n    ... validation_indicator_col='isVal', weight_col='weight',\n    ... early_stopping_rounds=1, eval_metric='rmse')\n    >>> xgb_reg_model = xgb_regressor.fit(df_train)\n    >>> xgb_reg_model.transform(df_test)\n\n    \"\"\"\n\n    @keyword_only\n    def __init__(  # pylint:disable=too-many-arguments\n        self,\n        *,\n        features_col: Union[str, List[str]] = \"features\",\n        label_col: str = \"label\",\n        prediction_col: str = \"prediction\",\n        pred_contrib_col: Optional[str] = None,\n        validation_indicator_col: Optional[str] = None,\n        weight_col: Optional[str] = None,\n        base_margin_col: Optional[str] = None,\n        num_workers: int = 1,\n        device: Optional[str] = None,\n        force_repartition: bool = False,\n        repartition_random_shuffle: bool = False,\n        enable_sparse_data_optim: bool = False,\n        launch_tracker_on_driver: bool = True,\n        coll_cfg: Optional[Config] = None,\n        **kwargs: Any,\n    ) -> None:\n        super().__init__()\n        input_kwargs = self._input_kwargs\n        self.setParams(**input_kwargs)\n\n    @classmethod\n    def _xgb_cls(cls) -> Type[XGBRegressor]:\n        return XGBRegressor\n\n    @classmethod\n    def _pyspark_model_cls(cls) -> Type[\"SparkXGBRegressorModel\"]:\n        return SparkXGBRegressorModel\n\n    def _validate_params(self) -> None:\n        super()._validate_params()\n        if self.isDefined(self.qid_col):\n            raise ValueError(\n                \"Spark Xgboost regressor estimator does not support `qid_col` param.\"\n            )\n\n\nclass SparkXGBRegressorModel(_SparkXGBModel):\n    \"\"\"\n    The model returned by :func:`xgboost.spark.SparkXGBRegressor.fit`\n\n    .. Note:: This API is experimental.\n    \"\"\"\n\n    @classmethod\n    def _xgb_cls(cls) -> Type[XGBRegressor]:\n        return XGBRegressor\n\n\n_set_pyspark_xgb_cls_param_attrs(SparkXGBRegressor, SparkXGBRegressorModel)\n\n\nclass SparkXGBClassifier(_SparkXGBEstimator, HasProbabilityCol, HasRawPredictionCol):\n    \"\"\"SparkXGBClassifier is a PySpark ML estimator. It implements the XGBoost\n    classification algorithm based on XGBoost python library, and it can be used in\n    PySpark Pipeline and PySpark ML meta algorithms like\n    - :py:class:`~pyspark.ml.tuning.CrossValidator`/\n    - :py:class:`~pyspark.ml.tuning.TrainValidationSplit`/\n    - :py:class:`~pyspark.ml.classification.OneVsRest`\n\n    SparkXGBClassifier automatically supports most of the parameters in\n    :py:class:`xgboost.XGBClassifier` constructor and most of the parameters used in\n    :py:meth:`xgboost.XGBClassifier.fit` and :py:meth:`xgboost.XGBClassifier.predict`\n    method.\n\n    To enable GPU support, set `device` to `cuda` or `gpu`.\n\n    SparkXGBClassifier doesn't support setting `base_margin` explicitly as well, but\n    support another param called `base_margin_col`. see doc below for more details.\n\n    SparkXGBClassifier doesn't support setting `output_margin`, but we can get output\n    margin from the raw prediction column. See `raw_prediction_col` param doc below for\n    more details.\n\n    SparkXGBClassifier doesn't support `validate_features` and `output_margin` param.\n\n    SparkXGBClassifier doesn't support setting `nthread` xgboost param, instead, the\n    `nthread` param for each xgboost worker will be set equal to `spark.task.cpus`\n    config value.\n\n\n    Parameters\n    ----------\n\n    features_col:\n        When the value is string, it requires the features column name to be vector type.\n        When the value is a list of string, it requires all the feature columns to be numeric types.\n    label_col:\n        Label column name. Default to \"label\".\n    prediction_col:\n        Prediction column name. Default to \"prediction\"\n    probability_col:\n        Column name for predicted class conditional probabilities. Default to probabilityCol\n    raw_prediction_col:\n        The `output_margin=True` is implicitly supported by the\n        `rawPredictionCol` output column, which is always returned with the predicted margin\n        values.\n    pred_contrib_col:\n        Contribution prediction column name.\n    validation_indicator_col:\n        For params related to `xgboost.XGBClassifier` training with\n        evaluation dataset's supervision,\n        set :py:attr:`xgboost.spark.SparkXGBClassifier.validation_indicator_col`\n        parameter instead of setting the `eval_set` parameter in `xgboost.XGBClassifier`\n        fit method.\n    weight_col:\n        To specify the weight of the training and validation dataset, set\n        :py:attr:`xgboost.spark.SparkXGBClassifier.weight_col` parameter instead of setting\n        `sample_weight` and `sample_weight_eval_set` parameter in `xgboost.XGBClassifier`\n        fit method.\n    base_margin_col:\n        To specify the base margins of the training and validation\n        dataset, set :py:attr:`xgboost.spark.SparkXGBClassifier.base_margin_col` parameter\n        instead of setting `base_margin` and `base_margin_eval_set` in the\n        `xgboost.XGBClassifier` fit method.\n\n    num_workers:\n        How many XGBoost workers to be used to train.\n        Each XGBoost worker corresponds to one spark task.\n    device:\n\n        .. versionadded:: 2.0.0\n\n        Device for XGBoost workers, available options are `cpu`, `cuda`, and `gpu`.\n\n    force_repartition:\n        Boolean value to specify if forcing the input dataset to be repartitioned\n        before XGBoost training.\n    repartition_random_shuffle:\n        Boolean value to specify if randomly shuffling the dataset when repartitioning is required.\n    enable_sparse_data_optim:\n        Boolean value to specify if enabling sparse data optimization, if True,\n        Xgboost DMatrix object will be constructed from sparse matrix instead of\n        dense matrix.\n    launch_tracker_on_driver:\n        Boolean value to indicate whether the tracker should be launched on the driver side or\n        the executor side.\n    coll_cfg:\n        The collective configuration. See :py:class:`~xgboost.collective.Config`\n\n    kwargs:\n        A dictionary of xgboost parameters, please refer to\n        https://xgboost.readthedocs.io/en/stable/parameter.html\n\n    Note\n    ----\n\n    The Parameters chart above contains parameters that need special handling.\n    For a full list of parameters, see entries with `Param(parent=...` below.\n\n    This API is experimental.\n\n    Examples\n    --------\n\n    >>> from xgboost.spark import SparkXGBClassifier\n    >>> from pyspark.ml.linalg import Vectors\n    >>> df_train = spark.createDataFrame([\n    ...     (Vectors.dense(1.0, 2.0, 3.0), 0, False, 1.0),\n    ...     (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 1, False, 2.0),\n    ...     (Vectors.dense(4.0, 5.0, 6.0), 0, True, 1.0),\n    ...     (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, True, 2.0),\n    ... ], [\"features\", \"label\", \"isVal\", \"weight\"])\n    >>> df_test = spark.createDataFrame([\n    ...     (Vectors.dense(1.0, 2.0, 3.0), ),\n    ... ], [\"features\"])\n    >>> xgb_classifier = SparkXGBClassifier(max_depth=5, missing=0.0,\n    ...     validation_indicator_col='isVal', weight_col='weight',\n    ...     early_stopping_rounds=1, eval_metric='logloss')\n    >>> xgb_clf_model = xgb_classifier.fit(df_train)\n    >>> xgb_clf_model.transform(df_test).show()\n\n    \"\"\"\n\n    @keyword_only\n    def __init__(  # pylint:disable=too-many-arguments\n        self,\n        *,\n        features_col: Union[str, List[str]] = \"features\",\n        label_col: str = \"label\",\n        prediction_col: str = \"prediction\",\n        probability_col: str = \"probability\",\n        raw_prediction_col: str = \"rawPrediction\",\n        pred_contrib_col: Optional[str] = None,\n        validation_indicator_col: Optional[str] = None,\n        weight_col: Optional[str] = None,\n        base_margin_col: Optional[str] = None,\n        num_workers: int = 1,\n        device: Optional[str] = None,\n        force_repartition: bool = False,\n        repartition_random_shuffle: bool = False,\n        enable_sparse_data_optim: bool = False,\n        launch_tracker_on_driver: bool = True,\n        coll_cfg: Optional[Config] = None,\n        **kwargs: Any,\n    ) -> None:\n        super().__init__()\n        # The default 'objective' param value comes from sklearn `XGBClassifier` ctor,\n        # but in pyspark we will automatically set objective param depending on\n        # binary or multinomial input dataset, and we need to remove the fixed default\n        # param value as well to avoid causing ambiguity.\n        input_kwargs = self._input_kwargs\n        self.setParams(**input_kwargs)\n        self._setDefault(objective=None)\n\n    @classmethod\n    def _xgb_cls(cls) -> Type[XGBClassifier]:\n        return XGBClassifier\n\n    @classmethod\n    def _pyspark_model_cls(cls) -> Type[\"SparkXGBClassifierModel\"]:\n        return SparkXGBClassifierModel\n\n    def _validate_params(self) -> None:\n        super()._validate_params()\n        if self.isDefined(self.qid_col):\n            raise ValueError(\n                \"Spark Xgboost classifier estimator does not support `qid_col` param.\"\n            )\n        if self.getOrDefault(\"objective\"):  # pylint: disable=no-member\n            raise ValueError(\n                \"Setting custom 'objective' param is not allowed in 'SparkXGBClassifier'.\"\n            )\n\n\nclass SparkXGBClassifierModel(_ClassificationModel):\n    \"\"\"\n    The model returned by :func:`xgboost.spark.SparkXGBClassifier.fit`\n\n    .. Note:: This API is experimental.\n    \"\"\"\n\n    @classmethod\n    def _xgb_cls(cls) -> Type[XGBClassifier]:\n        return XGBClassifier\n\n\n_set_pyspark_xgb_cls_param_attrs(SparkXGBClassifier, SparkXGBClassifierModel)\n\n\nclass SparkXGBRanker(_SparkXGBEstimator):\n    \"\"\"SparkXGBRanker is a PySpark ML estimator. It implements the XGBoost\n    ranking algorithm based on XGBoost python library, and it can be used in\n    PySpark Pipeline and PySpark ML meta algorithms like\n    :py:class:`~pyspark.ml.tuning.CrossValidator`/\n    :py:class:`~pyspark.ml.tuning.TrainValidationSplit`/\n    :py:class:`~pyspark.ml.classification.OneVsRest`\n\n    SparkXGBRanker automatically supports most of the parameters in\n    :py:class:`xgboost.XGBRanker` constructor and most of the parameters used in\n    :py:meth:`xgboost.XGBRanker.fit` and :py:meth:`xgboost.XGBRanker.predict` method.\n\n    To enable GPU support, set `device` to `cuda` or `gpu`.\n\n    SparkXGBRanker doesn't support setting `base_margin` explicitly as well, but support\n    another param called `base_margin_col`. see doc below for more details.\n\n    SparkXGBRanker doesn't support setting `output_margin`, but we can get output margin\n    from the raw prediction column. See `raw_prediction_col` param doc below for more\n    details.\n\n    SparkXGBRanker doesn't support `validate_features` and `output_margin` param.\n\n    SparkXGBRanker doesn't support setting `nthread` xgboost param, instead, the\n    `nthread` param for each xgboost worker will be set equal to `spark.task.cpus`\n    config value.\n\n\n    Parameters\n    ----------\n\n    features_col:\n        When the value is string, it requires the features column name to be vector type.\n        When the value is a list of string, it requires all the feature columns to be numeric types.\n    label_col:\n        Label column name. Default to \"label\".\n    prediction_col:\n        Prediction column name. Default to \"prediction\"\n    pred_contrib_col:\n        Contribution prediction column name.\n    validation_indicator_col:\n        For params related to `xgboost.XGBRanker` training with\n        evaluation dataset's supervision,\n        set :py:attr:`xgboost.spark.SparkXGBRanker.validation_indicator_col`\n        parameter instead of setting the `eval_set` parameter in :py:class:`xgboost.XGBRanker`\n        fit method.\n    weight_col:\n        To specify the weight of the training and validation dataset, set\n        :py:attr:`xgboost.spark.SparkXGBRanker.weight_col` parameter instead of setting\n        `sample_weight` and `sample_weight_eval_set` parameter in :py:class:`xgboost.XGBRanker`\n        fit method.\n    base_margin_col:\n        To specify the base margins of the training and validation\n        dataset, set :py:attr:`xgboost.spark.SparkXGBRanker.base_margin_col` parameter\n        instead of setting `base_margin` and `base_margin_eval_set` in the\n        :py:class:`xgboost.XGBRanker` fit method.\n    qid_col:\n        Query id column name.\n    num_workers:\n        How many XGBoost workers to be used to train.\n        Each XGBoost worker corresponds to one spark task.\n    device:\n\n        .. versionadded:: 2.0.0\n\n        Device for XGBoost workers, available options are `cpu`, `cuda`, and `gpu`.\n\n    force_repartition:\n        Boolean value to specify if forcing the input dataset to be repartitioned\n        before XGBoost training.\n    repartition_random_shuffle:\n        Boolean value to specify if randomly shuffling the dataset when repartitioning is required.\n    enable_sparse_data_optim:\n        Boolean value to specify if enabling sparse data optimization, if True,\n        Xgboost DMatrix object will be constructed from sparse matrix instead of\n        dense matrix.\n    launch_tracker_on_driver:\n        Boolean value to indicate whether the tracker should be launched on the driver side or\n        the executor side.\n    coll_cfg:\n        The collective configuration. See :py:class:`~xgboost.collective.Config`\n\n    kwargs:\n        A dictionary of xgboost parameters, please refer to\n        https://xgboost.readthedocs.io/en/stable/parameter.html\n\n    .. Note:: The Parameters chart above contains parameters that need special handling.\n        For a full list of parameters, see entries with `Param(parent=...` below.\n\n    .. Note:: This API is experimental.\n\n    Examples\n    --------\n\n    >>> from xgboost.spark import SparkXGBRanker\n    >>> from pyspark.ml.linalg import Vectors\n    >>> ranker = SparkXGBRanker(qid_col=\"qid\")\n    >>> df_train = spark.createDataFrame(\n    ...     [\n    ...         (Vectors.dense(1.0, 2.0, 3.0), 0, 0),\n    ...         (Vectors.dense(4.0, 5.0, 6.0), 1, 0),\n    ...         (Vectors.dense(9.0, 4.0, 8.0), 2, 0),\n    ...         (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 1),\n    ...         (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 1),\n    ...         (Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 1),\n    ...     ],\n    ...     [\"features\", \"label\", \"qid\"],\n    ... )\n    >>> df_test = spark.createDataFrame(\n    ...     [\n    ...         (Vectors.dense(1.5, 2.0, 3.0), 0),\n    ...         (Vectors.dense(4.5, 5.0, 6.0), 0),\n    ...         (Vectors.dense(9.0, 4.5, 8.0), 0),\n    ...         (Vectors.sparse(3, {1: 1.0, 2: 6.0}), 1),\n    ...         (Vectors.sparse(3, {1: 6.0, 2: 7.0}), 1),\n    ...         (Vectors.sparse(3, {1: 8.0, 2: 10.5}), 1),\n    ...     ],\n    ...     [\"features\", \"qid\"],\n    ... )\n    >>> model = ranker.fit(df_train)\n    >>> model.transform(df_test).show()\n    \"\"\"\n\n    @keyword_only\n    def __init__(  # pylint:disable=too-many-arguments\n        self,\n        *,\n        features_col: Union[str, List[str]] = \"features\",\n        label_col: str = \"label\",\n        prediction_col: str = \"prediction\",\n        pred_contrib_col: Optional[str] = None,\n        validation_indicator_col: Optional[str] = None,\n        weight_col: Optional[str] = None,\n        base_margin_col: Optional[str] = None,\n        qid_col: Optional[str] = None,\n        num_workers: int = 1,\n        device: Optional[str] = None,\n        force_repartition: bool = False,\n        repartition_random_shuffle: bool = False,\n        enable_sparse_data_optim: bool = False,\n        launch_tracker_on_driver: bool = True,\n        coll_cfg: Optional[Config] = None,\n        **kwargs: Any,\n    ) -> None:\n        super().__init__()\n        input_kwargs = self._input_kwargs\n        self.setParams(**input_kwargs)\n\n    @classmethod\n    def _xgb_cls(cls) -> Type[XGBRanker]:\n        return XGBRanker\n\n    @classmethod\n    def _pyspark_model_cls(cls) -> Type[\"SparkXGBRankerModel\"]:\n        return SparkXGBRankerModel\n\n    def _validate_params(self) -> None:\n        super()._validate_params()\n        if not self.isDefined(self.qid_col):\n            raise ValueError(\n                \"Spark Xgboost ranker estimator requires setting `qid_col` param.\"\n            )\n\n\nclass SparkXGBRankerModel(_SparkXGBModel):\n    \"\"\"\n    The model returned by :func:`xgboost.spark.SparkXGBRanker.fit`\n\n    .. Note:: This API is experimental.\n    \"\"\"\n\n    @classmethod\n    def _xgb_cls(cls) -> Type[XGBRanker]:\n        return XGBRanker\n\n\n_set_pyspark_xgb_cls_param_attrs(SparkXGBRanker, SparkXGBRankerModel)\n"
  },
  {
    "path": "python-package/xgboost/spark/params.py",
    "content": "\"\"\"Xgboost pyspark integration submodule for params.\"\"\"\n\nfrom typing import Dict\n\nfrom pyspark.ml.param import TypeConverters\nfrom pyspark.ml.param.shared import Param, Params\n\n\nclass HasArbitraryParamsDict(Params):\n    \"\"\"\n    This is a Params based class that is extended by _SparkXGBParams\n    and holds the variable to store the **kwargs parts of the XGBoost\n    input.\n    \"\"\"\n\n    arbitrary_params_dict: \"Param[Dict]\" = Param(\n        Params._dummy(),\n        \"arbitrary_params_dict\",\n        \"arbitrary_params_dict This parameter holds all of the additional parameters which are \"\n        \"not exposed as the XGBoost Spark estimator params but can be recognized by \"\n        \"underlying XGBoost library. It is stored as a dictionary.\",\n    )\n\n\nclass HasBaseMarginCol(Params):\n    \"\"\"\n    This is a Params based class that is extended by _SparkXGBParams\n    and holds the variable to store the base margin column part of XGboost.\n    \"\"\"\n\n    base_margin_col = Param(\n        Params._dummy(),\n        \"base_margin_col\",\n        \"This stores the name for the column of the base margin\",\n        typeConverter=TypeConverters.toString,\n    )\n\n\nclass HasFeaturesCols(Params):\n    \"\"\"\n    Mixin for param features_cols: a list of feature column names.\n    This parameter is taken effect only when GPU is enabled.\n    \"\"\"\n\n    features_cols = Param(\n        Params._dummy(),\n        \"features_cols\",\n        \"feature column names.\",\n        typeConverter=TypeConverters.toListString,\n    )\n\n    def __init__(self) -> None:\n        super().__init__()\n        self._setDefault(features_cols=[])\n\n\nclass HasEnableSparseDataOptim(Params):\n    \"\"\"\n    This is a Params based class that is extended by _SparkXGBParams\n    and holds the variable to store the boolean config of enabling sparse data optimization.\n    \"\"\"\n\n    enable_sparse_data_optim = Param(\n        Params._dummy(),\n        \"enable_sparse_data_optim\",\n        \"This stores the boolean config of enabling sparse data optimization, if enabled, \"\n        \"Xgboost DMatrix object will be constructed from sparse matrix instead of \"\n        \"dense matrix. This config is disabled by default. If most of examples in your \"\n        \"training dataset contains sparse features, we suggest to enable this config.\",\n        typeConverter=TypeConverters.toBoolean,\n    )\n\n    def __init__(self) -> None:\n        super().__init__()\n        self._setDefault(enable_sparse_data_optim=False)\n\n\nclass HasQueryIdCol(Params):\n    \"\"\"\n    Mixin for param qid_col: query id column name.\n    \"\"\"\n\n    qid_col = Param(\n        Params._dummy(),\n        \"qid_col\",\n        \"query id column name\",\n        typeConverter=TypeConverters.toString,\n    )\n\n\nclass HasContribPredictionCol(Params):\n    \"\"\"\n    Mixin for param pred_contrib_col: contribution prediction column name.\n\n    Output is a 3-dim array, with (rows, groups, columns + 1) for classification case.\n    Else, it can be a 2 dimension for regression case.\n    \"\"\"\n\n    pred_contrib_col: \"Param[str]\" = Param(\n        Params._dummy(),\n        \"pred_contrib_col\",\n        \"feature contributions to individual predictions.\",\n        typeConverter=TypeConverters.toString,\n    )\n"
  },
  {
    "path": "python-package/xgboost/spark/summary.py",
    "content": "\"\"\"Xgboost training summary integration submodule.\"\"\"\n\nfrom dataclasses import dataclass, field\nfrom typing import Dict, List\n\n\n@dataclass\nclass XGBoostTrainingSummary:\n    \"\"\"\n    A class that holds the training and validation objective history\n    of an XGBoost model during its training process.\n    \"\"\"\n\n    train_objective_history: Dict[str, List[float]] = field(default_factory=dict)\n    validation_objective_history: Dict[str, List[float]] = field(default_factory=dict)\n\n    @staticmethod\n    def from_metrics(\n        metrics: Dict[str, Dict[str, List[float]]],\n    ) -> \"XGBoostTrainingSummary\":\n        \"\"\"\n        Create an XGBoostTrainingSummary instance from a nested dictionary of metrics.\n\n        Parameters\n        ----------\n        metrics : dict of str to dict of str to list of float\n            A dictionary containing training and validation metrics.\n            Example format:\n                {\n                    \"training\": {\"logloss\": [0.1, 0.08]},\n                    \"validation\": {\"logloss\": [0.12, 0.1]}\n                }\n\n        Returns\n        -------\n        A new instance of XGBoostTrainingSummary.\n\n        \"\"\"\n        train_objective_history = metrics.get(\"training\", {})\n        validation_objective_history = metrics.get(\"validation\", {})\n        return XGBoostTrainingSummary(\n            train_objective_history, validation_objective_history\n        )\n"
  },
  {
    "path": "python-package/xgboost/spark/utils.py",
    "content": "\"\"\"Xgboost pyspark integration submodule for helper functions.\"\"\"\n\n# pylint: disable=fixme\n\nimport inspect\nimport logging\nimport os\nimport sys\nimport uuid\nfrom threading import Thread\nfrom typing import Any, Callable, Dict, Optional, Set, Type, Union\n\nimport pyspark\nfrom pyspark import BarrierTaskContext, SparkConf, SparkContext, SparkFiles, TaskContext\nfrom pyspark.sql.session import SparkSession\n\nfrom ..collective import CommunicatorContext as CCtx\nfrom ..collective import Config\nfrom ..collective import _Args as CollArgs\nfrom ..collective import _ArgVals as CollArgsVals\nfrom ..core import Booster\nfrom ..sklearn import XGBModel\nfrom ..tracker import RabitTracker\n\n\ndef get_class_name(cls: Type) -> str:\n    \"\"\"Return the class name.\"\"\"\n    return f\"{cls.__module__}.{cls.__name__}\"\n\n\ndef _get_default_params_from_func(\n    func: Callable, unsupported_set: Set[str]\n) -> Dict[str, Any]:\n    \"\"\"Returns a dictionary of parameters and their default value of function fn.  Only\n    the parameters with a default value will be included.\n\n    \"\"\"\n    sig = inspect.signature(func)\n    filtered_params_dict = {}\n    for parameter in sig.parameters.values():\n        # Remove parameters without a default value and those in the unsupported_set\n        if (\n            parameter.default is not parameter.empty\n            and parameter.name not in unsupported_set\n        ):\n            filtered_params_dict[parameter.name] = parameter.default\n    return filtered_params_dict\n\n\nclass CommunicatorContext(CCtx):\n    \"\"\"Context with PySpark specific task ID.\"\"\"\n\n    def __init__(self, context: BarrierTaskContext, **args: CollArgsVals) -> None:\n        args[\"dmlc_task_id\"] = str(context.partitionId())\n        super().__init__(**args)\n\n\ndef _start_tracker(host: str, n_workers: int, port: int = 0) -> CollArgs:\n    \"\"\"Start Rabit tracker with n_workers\"\"\"\n    args: CollArgs = {\"n_workers\": n_workers}\n    tracker = RabitTracker(n_workers=n_workers, host_ip=host, sortby=\"task\", port=port)\n    tracker.start()\n    thread = Thread(target=tracker.wait_for)\n    thread.daemon = True\n    thread.start()\n    args.update(tracker.worker_args())\n    return args\n\n\ndef _get_rabit_args(conf: Config, n_workers: int) -> CollArgs:\n    \"\"\"Get rabit context arguments to send to each worker.\"\"\"\n    assert conf.tracker_host_ip is not None\n    port = 0 if conf.tracker_port is None else conf.tracker_port\n    env = _start_tracker(conf.tracker_host_ip, n_workers, port)\n    return env\n\n\ndef _get_host_ip(context: BarrierTaskContext) -> str:\n    \"\"\"Gets the hostIP for Spark. This essentially gets the IP of the first worker.\"\"\"\n    task_ip_list = [info.address.split(\":\")[0] for info in context.getTaskInfos()]\n    return task_ip_list[0]\n\n\ndef _get_spark_session() -> SparkSession:\n    \"\"\"Get or create spark session. Note: This function can only be invoked from driver\n    side.\n\n    \"\"\"\n    if pyspark.TaskContext.get() is not None:\n        # This is a safety check.\n        raise RuntimeError(\n            \"_get_spark_session should not be invoked from executor side.\"\n        )\n    return SparkSession.builder.getOrCreate()\n\n\ndef get_logger(name: str, level: Optional[Union[str, int]] = None) -> logging.Logger:\n    \"\"\"Gets a logger by name, or creates and configures it for the first time.\"\"\"\n    logger = logging.getLogger(name)\n    if level is not None:\n        logger.setLevel(level)\n    else:\n        # Default to info if not set.\n        if logger.level == logging.NOTSET:\n            logger.setLevel(logging.INFO)\n    # If the logger is configured, skip the configure\n    if not logger.handlers and not logging.getLogger().handlers:\n        handler = logging.StreamHandler(sys.stderr)\n        formatter = logging.Formatter(\n            \"%(asctime)s %(levelname)s %(name)s: %(funcName)s %(message)s\"\n        )\n        handler.setFormatter(formatter)\n        logger.addHandler(handler)\n    return logger\n\n\ndef get_logger_level(name: str) -> Optional[int]:\n    \"\"\"Get the logger level for the given log name\"\"\"\n    logger = logging.getLogger(name)\n    return None if logger.level == logging.NOTSET else logger.level\n\n\ndef _get_max_num_concurrent_tasks(spark_context: SparkContext) -> int:\n    \"\"\"Gets the current max number of concurrent tasks.\"\"\"\n    # pylint: disable=protected-access\n    # spark 3.1 and above has a different API for fetching max concurrent tasks\n    if spark_context._jsc.sc().version() >= \"3.1\":\n        return spark_context._jsc.sc().maxNumConcurrentTasks(\n            spark_context._jsc.sc().resourceProfileManager().resourceProfileFromId(0)\n        )\n    return spark_context._jsc.sc().maxNumConcurrentTasks()\n\n\ndef _is_local(spark_context: SparkContext) -> bool:\n    \"\"\"Whether it is Spark local mode\"\"\"\n    # pylint: disable=protected-access\n    return spark_context._jsc.sc().isLocal()\n\n\ndef _is_standalone_or_localcluster(conf: SparkConf) -> bool:\n    master = conf.get(\"spark.master\")\n    return master is not None and (\n        master.startswith(\"spark://\") or master.startswith(\"local-cluster\")\n    )\n\n\ndef _get_gpu_id(task_context: TaskContext) -> int:\n    \"\"\"Get the gpu id from the task resources\"\"\"\n    if task_context is None:\n        # This is a safety check.\n        raise RuntimeError(\"_get_gpu_id should not be invoked from driver side.\")\n    resources = task_context.resources()\n    if \"gpu\" not in resources:\n        raise RuntimeError(\n            \"Couldn't get the gpu id, Please check the GPU resource configuration\"\n        )\n    # return the first gpu id.\n    return int(resources[\"gpu\"].addresses[0].strip())\n\n\ndef _get_or_create_tmp_dir() -> str:\n    root_dir = SparkFiles.getRootDirectory()\n    xgb_tmp_dir = os.path.join(root_dir, \"xgboost-tmp\")\n    if not os.path.exists(xgb_tmp_dir):\n        os.makedirs(xgb_tmp_dir)\n    return xgb_tmp_dir\n\n\ndef deserialize_xgb_model(\n    model: str, xgb_model_creator: Callable[[], XGBModel]\n) -> XGBModel:\n    \"\"\"\n    Deserialize an xgboost.XGBModel instance from the input model.\n    \"\"\"\n    xgb_model = xgb_model_creator()\n    xgb_model.load_model(bytearray(model.encode(\"utf-8\")))\n    return xgb_model\n\n\ndef serialize_booster(booster: Booster) -> str:\n    \"\"\"\n    Serialize the input booster to a string.\n\n    Parameters\n    ----------\n    booster:\n        an xgboost.core.Booster instance\n    \"\"\"\n    # TODO: change to use string io\n    tmp_file_name = os.path.join(_get_or_create_tmp_dir(), f\"{uuid.uuid4()}.json\")\n    booster.save_model(tmp_file_name)\n    with open(tmp_file_name, encoding=\"utf-8\") as f:\n        ser_model_string = f.read()\n    return ser_model_string\n\n\ndef deserialize_booster(model: str) -> Booster:\n    \"\"\"\n    Deserialize an xgboost.core.Booster from the input ser_model_string.\n    \"\"\"\n    booster = Booster()\n    # TODO: change to use string io\n    tmp_file_name = os.path.join(_get_or_create_tmp_dir(), f\"{uuid.uuid4()}.json\")\n    with open(tmp_file_name, \"w\", encoding=\"utf-8\") as f:\n        f.write(model)\n    booster.load_model(tmp_file_name)\n    return booster\n\n\ndef use_cuda(device: Optional[str]) -> bool:\n    \"\"\"Whether xgboost is using CUDA workers.\"\"\"\n    return device in (\"cuda\", \"gpu\")\n"
  },
  {
    "path": "python-package/xgboost/testing/__init__.py",
    "content": "\"\"\"Utilities for defining Python tests. The module is private and subject to frequent\nchange without notice.\n\n\"\"\"\n\n# pylint: disable=missing-function-docstring\nimport importlib.util\nimport os\nimport platform\nimport queue\nimport socket\nimport sys\nimport threading\nfrom contextlib import contextmanager\nfrom io import StringIO\nfrom platform import system\nfrom typing import (\n    Any,\n    Callable,\n    Dict,\n    Generator,\n    List,\n    Optional,\n    Set,\n    Tuple,\n    TypedDict,\n    TypeVar,\n    Union,\n)\n\nimport numpy as np\nimport pytest\nfrom scipy import sparse\n\nimport xgboost as xgb\nfrom xgboost import RabitTracker\nfrom xgboost.core import ArrayLike\nfrom xgboost.sklearn import SklObjective\n\nfrom .._typing import PathLike\nfrom .data import (\n    IteratorForTest,\n    get_california_housing,\n    get_cancer,\n    get_digits,\n    get_sparse,\n    make_batches,\n    make_categorical,\n    make_sparse_regression,\n)\n\n# Used to be defined in this top level module.\nfrom .utils import non_decreasing, non_increasing  # NOLINT\n\nhypothesis = pytest.importorskip(\"hypothesis\")\n\n# pylint:disable=wrong-import-position,wrong-import-order\nfrom hypothesis import strategies\nfrom hypothesis.extra.numpy import arrays\n\ndatasets = pytest.importorskip(\"sklearn.datasets\")\n\nPytestSkip = TypedDict(\"PytestSkip\", {\"condition\": bool, \"reason\": str})\n\n\ndef has_ipv6() -> bool:\n    \"\"\"Check whether IPv6 is enabled on this host.\"\"\"\n    # connection error in macos, still need some fixes.\n    if system() not in (\"Linux\", \"Windows\"):\n        return False\n\n    if socket.has_ipv6:\n        try:\n            with (\n                socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as server,\n                socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as client,\n            ):\n                server.bind((\"::1\", 0))\n                port = server.getsockname()[1]\n                server.listen()\n\n                client.connect((\"::1\", port))\n                conn, _ = server.accept()\n\n                client.sendall(\"abc\".encode())\n                msg = conn.recv(3).decode()\n                # if the code can be executed to this point, the message should be\n                # correct.\n                assert msg == \"abc\"\n            return True\n        except OSError:\n            pass\n    return False\n\n\ndef no_mod(name: str) -> PytestSkip:\n    spec = importlib.util.find_spec(name)\n    return {\"condition\": spec is None, \"reason\": f\"{name} is not installed.\"}\n\n\ndef no_ipv6() -> PytestSkip:\n    \"\"\"PyTest skip mark for IPv6.\"\"\"\n    return {\"condition\": not has_ipv6(), \"reason\": \"IPv6 is required to be enabled.\"}\n\n\ndef not_linux() -> PytestSkip:\n    return {\"condition\": system() != \"Linux\", \"reason\": \"Linux is required.\"}\n\n\ndef no_ubjson() -> PytestSkip:\n    return no_mod(\"ubjson\")\n\n\ndef no_sklearn() -> PytestSkip:\n    return no_mod(\"sklearn\")\n\n\ndef no_dask() -> PytestSkip:\n    return no_mod(\"dask\")\n\n\ndef no_loky() -> PytestSkip:\n    return no_mod(\"loky\")\n\n\ndef no_dask_ml() -> PytestSkip:\n    if sys.platform.startswith(\"win\"):\n        return {\"reason\": \"Unsupported platform.\", \"condition\": True}\n    return no_mod(\"dask_ml\")\n\n\ndef no_spark() -> PytestSkip:\n    if sys.platform.startswith(\"win\") or sys.platform.startswith(\"darwin\"):\n        return {\"reason\": \"Unsupported platform.\", \"condition\": True}\n    return no_mod(\"pyspark\")\n\n\ndef no_pandas() -> PytestSkip:\n    return no_mod(\"pandas\")\n\n\ndef no_arrow() -> PytestSkip:\n    return no_mod(\"pyarrow\")\n\n\ndef no_polars() -> PytestSkip:\n    return no_mod(\"polars\")\n\n\ndef no_modin() -> PytestSkip:\n    try:\n        import modin.pandas as md\n\n        md.DataFrame([[1, 2.0, True], [2, 3.0, False]], columns=[\"a\", \"b\", \"c\"])\n\n    except ImportError:\n        return {\"reason\": \"Failed import modin.\", \"condition\": True}\n    return {\"reason\": \"Failed import modin.\", \"condition\": True}\n\n\ndef no_matplotlib() -> PytestSkip:\n    reason = \"Matplotlib is not installed.\"\n    try:\n        import matplotlib.pyplot as _  # noqa\n\n        return {\"condition\": False, \"reason\": reason}\n    except ImportError:\n        return {\"condition\": True, \"reason\": reason}\n\n\ndef no_dask_cuda() -> PytestSkip:\n    return no_mod(\"dask_cuda\")\n\n\ndef no_cudf() -> PytestSkip:\n    return no_mod(\"cudf\")\n\n\ndef no_cupy() -> PytestSkip:\n    skip_cupy = no_mod(\"cupy\")\n    if not skip_cupy[\"condition\"] and system() == \"Windows\":\n        import cupy as cp\n\n        # Cupy might run into issue on Windows due to missing compiler\n        try:\n            cp.array([1, 2, 3]).sum()\n        except Exception:  # pylint: disable=broad-except\n            skip_cupy[\"condition\"] = True\n    return skip_cupy\n\n\ndef no_dask_cudf() -> PytestSkip:\n    return no_mod(\"dask_cudf\")\n\n\ndef no_graphviz() -> PytestSkip:\n    return no_mod(\"graphviz\")\n\n\ndef no_rmm() -> PytestSkip:\n    return no_mod(\"rmm\")\n\n\ndef no_multiple(*args: Any) -> PytestSkip:\n    condition = False\n    reason = \"\"\n    for arg in args:\n        condition = condition or arg[\"condition\"]\n        if arg[\"condition\"]:\n            reason = arg[\"reason\"]\n            break\n    return {\"condition\": condition, \"reason\": reason}\n\n\ndef skip_win() -> PytestSkip:\n    return {\"reason\": \"Unsupported platform.\", \"condition\": is_windows()}\n\n\ndef make_regression(\n    n_samples: int, n_features: int, use_cupy: bool\n) -> Tuple[ArrayLike, ArrayLike, ArrayLike]:\n    \"\"\"Make a simple regression dataset.\"\"\"\n    X, y, w = make_batches(n_samples, n_features, 1, use_cupy)\n    return X[0], y[0], w[0]\n\n\ndef make_batches_sparse(\n    n_samples_per_batch: int, n_features: int, n_batches: int, sparsity: float\n) -> Tuple[List[sparse.csr_matrix], List[np.ndarray], List[np.ndarray]]:\n    X = []\n    y = []\n    w = []\n    rng = np.random.RandomState(1994)\n    for _ in range(n_batches):\n        _X = sparse.random(\n            n_samples_per_batch,\n            n_features,\n            1.0 - sparsity,\n            format=\"csr\",\n            dtype=np.float32,\n            random_state=rng,\n        )\n        _y = rng.randn(n_samples_per_batch)\n        _w = rng.uniform(low=0, high=1, size=n_samples_per_batch)\n        X.append(_X)\n        y.append(_y)\n        w.append(_w)\n    return X, y, w\n\n\nclass TestDataset:\n    \"\"\"Contains a dataset in numpy format as well as the relevant objective and metric.\"\"\"\n\n    def __init__(\n        self, name: str, get_dataset: Callable, objective: str, metric: str\n    ) -> None:\n        self.name = name\n        self.objective = objective\n        self.metric = metric\n        self.X, self.y = get_dataset()\n        self.w: Optional[np.ndarray] = None\n        self.margin: Optional[np.ndarray] = None\n\n    def set_params(self, params_in: Dict[str, Any]) -> Dict[str, Any]:\n        params_in[\"objective\"] = self.objective\n        params_in[\"eval_metric\"] = self.metric\n        if self.objective == \"multi:softmax\":\n            params_in[\"num_class\"] = int(np.max(self.y) + 1)\n        return params_in\n\n    def get_dmat(self) -> xgb.DMatrix:\n        return xgb.DMatrix(\n            self.X,\n            self.y,\n            weight=self.w,\n            base_margin=self.margin,\n            enable_categorical=True,\n        )\n\n    def get_device_dmat(self, max_bin: Optional[int]) -> xgb.QuantileDMatrix:\n        import cupy as cp\n\n        w = None if self.w is None else cp.array(self.w)\n        X = cp.array(self.X, dtype=np.float32)\n        y = cp.array(self.y, dtype=np.float32)\n        return xgb.QuantileDMatrix(\n            X, y, weight=w, base_margin=self.margin, max_bin=max_bin\n        )\n\n    def get_external_dmat(self) -> xgb.DMatrix:\n        n_samples = self.X.shape[0]\n        n_batches = 10\n        per_batch = n_samples // n_batches + 1\n\n        predictor = []\n        response = []\n        weight = []\n        for i in range(n_batches):\n            beg = i * per_batch\n            end = min((i + 1) * per_batch, n_samples)\n            assert end != beg\n            X = self.X[beg:end, ...]\n            y = self.y[beg:end]\n            w = self.w[beg:end] if self.w is not None else None\n            predictor.append(X)\n            response.append(y)\n            if w is not None:\n                weight.append(w)\n\n        it = IteratorForTest(\n            predictor,\n            response,\n            weight if weight else None,\n            cache=\"cache\",\n            on_host=False,\n        )\n        return xgb.DMatrix(it)\n\n    def __repr__(self) -> str:\n        return self.name\n\n\ndef make_ltr(\n    n_samples: int,\n    n_features: int,\n    n_query_groups: int,\n    max_rel: int,\n    sort_qid: bool = True,\n) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:\n    \"\"\"Make a dataset for testing LTR.\"\"\"\n    rng = np.random.default_rng(1994)\n    X = rng.normal(0, 1.0, size=n_samples * n_features).reshape(n_samples, n_features)\n    y = np.sum(X, axis=1)\n    y -= y.min()\n    y = np.round(y / y.max() * max_rel).astype(np.int32)\n\n    qid = rng.integers(0, n_query_groups, size=n_samples, dtype=np.int32)\n    w = rng.normal(0, 1.0, size=n_query_groups)\n    w -= np.min(w)\n    w /= np.max(w)\n    if sort_qid:\n        qid = np.sort(qid)\n    return X, y, qid, w\n\n\ndef _cat_sampled_from() -> strategies.SearchStrategy:\n    @strategies.composite\n    def _make_cat(draw: Callable) -> Tuple[int, int, int, float]:\n        n_samples = draw(strategies.integers(2, 512))\n        n_features = draw(strategies.integers(1, 4))\n        n_cats = draw(strategies.integers(1, 128))\n        sparsity = draw(\n            strategies.floats(\n                min_value=0,\n                max_value=1,\n                allow_nan=False,\n                allow_infinity=False,\n                allow_subnormal=False,\n            )\n        )\n        return n_samples, n_features, n_cats, sparsity\n\n    def _build(args: Tuple[int, int, int, float]) -> TestDataset:\n        n_samples = args[0]\n        n_features = args[1]\n        n_cats = args[2]\n        sparsity = args[3]\n        return TestDataset(\n            f\"{n_samples}x{n_features}-{n_cats}-{sparsity}\",\n            lambda: make_categorical(\n                n_samples, n_features, n_cats, onehot=False, sparsity=sparsity\n            ),\n            \"reg:squarederror\",\n            \"rmse\",\n        )\n\n    return _make_cat().map(_build)  # pylint: disable=no-member\n\n\ncategorical_dataset_strategy: strategies.SearchStrategy = _cat_sampled_from()\n\nsparse_datasets_strategy = strategies.sampled_from(\n    [\n        TestDataset(\n            \"1e5x8-0.95-csr\",\n            lambda: make_sparse_regression(int(1e5), 8, 0.95, False),\n            \"reg:squarederror\",\n            \"rmse\",\n        ),\n        TestDataset(\n            \"1e5x8-0.5-csr\",\n            lambda: make_sparse_regression(int(1e5), 8, 0.5, False),\n            \"reg:squarederror\",\n            \"rmse\",\n        ),\n        TestDataset(\n            \"1e5x8-0.5-dense\",\n            lambda: make_sparse_regression(int(1e5), 8, 0.5, True),\n            \"reg:squarederror\",\n            \"rmse\",\n        ),\n        TestDataset(\n            \"1e5x8-0.05-csr\",\n            lambda: make_sparse_regression(int(1e5), 8, 0.05, False),\n            \"reg:squarederror\",\n            \"rmse\",\n        ),\n        TestDataset(\n            \"1e5x8-0.05-dense\",\n            lambda: make_sparse_regression(int(1e5), 8, 0.05, True),\n            \"reg:squarederror\",\n            \"rmse\",\n        ),\n    ]\n)\n\n\ndef make_datasets_with_margin(\n    unweighted_strategy: strategies.SearchStrategy,\n) -> Callable[[], strategies.SearchStrategy[TestDataset]]:\n    \"\"\"Factory function for creating strategies that generates datasets with weight and\n    base margin.\n\n    \"\"\"\n\n    @strategies.composite\n    def weight_margin(draw: Callable) -> TestDataset:\n        data: TestDataset = draw(unweighted_strategy)\n        if draw(strategies.booleans()):\n            data.w = draw(\n                arrays(np.float64, (len(data.y)), elements=strategies.floats(0.1, 2.0))\n            )\n        if draw(strategies.booleans()):\n            num_class = 1\n            if data.objective == \"multi:softmax\":\n                num_class = int(np.max(data.y) + 1)\n            elif data.name.startswith(\"mtreg\"):\n                num_class = data.y.shape[1]\n\n            data.margin = draw(\n                arrays(\n                    np.float64,\n                    (data.y.shape[0] * num_class),\n                    elements=strategies.floats(0.5, 1.0),\n                )\n            )\n            assert data.margin is not None\n            if num_class != 1:\n                data.margin = data.margin.reshape(data.y.shape[0], num_class)\n\n        return data\n\n    return weight_margin\n\n\n# A strategy for drawing from a set of example datasets. May add random weights to the\n# dataset\ndef make_dataset_strategy() -> strategies.SearchStrategy[TestDataset]:\n    _unweighted_datasets_strategy = strategies.sampled_from(\n        [\n            TestDataset(\n                \"calif_housing\", get_california_housing, \"reg:squarederror\", \"rmse\"\n            ),\n            TestDataset(\n                \"calif_housing-l1\", get_california_housing, \"reg:absoluteerror\", \"mae\"\n            ),\n            TestDataset(\"cancer\", get_cancer, \"binary:logistic\", \"logloss\"),\n            TestDataset(\"sparse\", get_sparse, \"reg:squarederror\", \"rmse\"),\n            TestDataset(\"sparse-l1\", get_sparse, \"reg:absoluteerror\", \"mae\"),\n            TestDataset(\n                \"empty\",\n                lambda: (np.empty((0, 100)), np.empty(0)),\n                \"reg:squarederror\",\n                \"rmse\",\n            ),\n        ]\n    )\n    return make_datasets_with_margin(_unweighted_datasets_strategy)()\n\n\n_unweighted_multi_datasets_strategy = strategies.sampled_from(\n    [\n        TestDataset(\"digits\", get_digits, \"multi:softmax\", \"mlogloss\"),\n        TestDataset(\n            \"mtreg\",\n            lambda: datasets.make_regression(n_samples=128, n_features=2, n_targets=3),\n            \"reg:squarederror\",\n            \"rmse\",\n        ),\n        TestDataset(\n            \"mtreg-l1\",\n            lambda: datasets.make_regression(n_samples=128, n_features=2, n_targets=3),\n            \"reg:absoluteerror\",\n            \"mae\",\n        ),\n    ]\n)\n\n# A strategy for drawing from a set of multi-target/multi-class datasets.\nmulti_dataset_strategy = make_datasets_with_margin(\n    _unweighted_multi_datasets_strategy\n)()\n\n\nM = TypeVar(\"M\", xgb.Booster, xgb.XGBModel)\n\n\ndef logregobj(preds: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[np.ndarray, np.ndarray]:\n    \"\"\"Binary regression custom objective.\"\"\"\n    labels = dtrain.get_label()\n    preds = 1.0 / (1.0 + np.exp(-preds))\n    grad = preds - labels\n    hess = preds * (1.0 - preds)\n    return grad, hess\n\n\ndef eval_error_metric(\n    predt: np.ndarray, dtrain: xgb.DMatrix, rev_link: bool\n) -> Tuple[str, np.float64]:\n    \"\"\"Evaluation metric for xgb.train.\n\n    Parameters\n    ----------\n    rev_link : Whether the metric needs to apply the reverse link function (activation).\n\n    \"\"\"\n    label = dtrain.get_label()\n    if rev_link:\n        predt = 1.0 / (1.0 + np.exp(-predt))\n    assert (0.0 <= predt).all() and (predt <= 1.0).all()\n    r = np.zeros(predt.shape)\n    gt = predt > 0.5\n    if predt.size == 0:\n        return \"CustomErr\", np.float64(0.0)\n    r[gt] = 1 - label[gt]\n    le = predt <= 0.5\n    r[le] = label[le]\n    return \"CustomErr\", np.sum(r)\n\n\ndef eval_error_metric_skl(\n    y_true: np.ndarray, y_score: np.ndarray, rev_link: bool = False\n) -> np.float64:\n    \"\"\"Evaluation metric that looks like metrics provided by sklearn.\"\"\"\n\n    if rev_link:\n        y_score = 1.0 / (1.0 + np.exp(-y_score))\n    assert (0.0 <= y_score).all() and (y_score <= 1.0).all()\n\n    r = np.zeros(y_score.shape)\n    gt = y_score > 0.5\n    r[gt] = 1 - y_true[gt]\n    le = y_score <= 0.5\n    r[le] = y_true[le]\n    return np.sum(r)\n\n\ndef root_mean_square(y_true: np.ndarray, y_score: np.ndarray) -> float:\n    err = y_score - y_true\n    rmse = np.sqrt(np.dot(err, err) / y_score.size)\n    return rmse\n\n\ndef softmax(x: np.ndarray) -> np.ndarray:\n    e = np.exp(x)\n    return e / np.sum(e)\n\n\ndef softprob_obj(\n    classes: int, use_cupy: bool = False, order: str = \"C\", gdtype: str = \"float32\"\n) -> SklObjective:\n    \"\"\"Custom softprob objective for testing.\n\n    Parameters\n    ----------\n    use_cupy :\n        Whether the objective should return cupy arrays.\n    order :\n        The order of gradient matrices. \"C\" or \"F\".\n    gdtype :\n        DType for gradient. Hessian is not set. This is for testing asymmetric types.\n    \"\"\"\n    if use_cupy:\n        import cupy as backend\n    else:\n        backend = np\n\n    def objective(\n        labels: backend.ndarray, predt: backend.ndarray\n    ) -> Tuple[backend.ndarray, backend.ndarray]:\n        rows = labels.shape[0]\n        grad = backend.zeros((rows, classes), dtype=np.float32)\n        hess = backend.zeros((rows, classes), dtype=np.float32)\n        eps = 1e-6\n        for r in range(predt.shape[0]):\n            target = labels[r]\n            p = softmax(predt[r, :])\n            for c in range(predt.shape[1]):\n                assert target >= 0 or target <= classes\n                g = p[c] - 1.0 if c == target else p[c]\n                h = max((2.0 * p[c] * (1.0 - p[c])).item(), eps)\n                grad[r, c] = g\n                hess[r, c] = h\n\n        grad = grad.reshape((rows, classes))\n        hess = hess.reshape((rows, classes))\n        grad = backend.require(grad, requirements=order, dtype=gdtype)\n        hess = backend.require(hess, requirements=order)\n        return grad, hess\n\n    return objective\n\n\ndef ls_obj(\n    y_true: np.ndarray, y_pred: np.ndarray, sample_weight: Optional[np.ndarray] = None\n) -> Tuple[np.ndarray, np.ndarray]:\n    \"\"\"Least squared error.\"\"\"\n    grad = y_pred - y_true\n    hess = np.ones(grad.shape)\n    if sample_weight is not None:\n        grad *= sample_weight\n        hess *= sample_weight\n    return grad, hess\n\n\nclass DirectoryExcursion:\n    \"\"\"Change directory.  Change back and optionally cleaning up the directory when\n    exit.\n\n    \"\"\"\n\n    def __init__(self, path: PathLike, cleanup: bool = False):\n        self.path = path\n        self.curdir = os.path.normpath(os.path.abspath(os.path.curdir))\n        self.cleanup = cleanup\n        self.files: Set[str] = set()\n\n    def __enter__(self) -> None:\n        os.chdir(self.path)\n        if self.cleanup:\n            self.files = {\n                os.path.join(root, f)\n                for root, subdir, files in os.walk(os.path.expanduser(self.path))\n                for f in files\n            }\n\n    def __exit__(self, *args: Any) -> None:\n        os.chdir(self.curdir)\n        if self.cleanup:\n            files = {\n                os.path.join(root, f)\n                for root, subdir, files in os.walk(os.path.expanduser(self.path))\n                for f in files\n            }\n            diff = files.difference(self.files)\n            for f in diff:\n                os.remove(f)\n\n\n@contextmanager\ndef captured_output() -> Generator[Tuple[StringIO, StringIO], None, None]:\n    \"\"\"Reassign stdout temporarily in order to test printed statements\n    Taken from:\n    https://stackoverflow.com/questions/4219717/how-to-assert-output-with-nosetest-unittest-in-python\n\n    Also works for pytest.\n\n    \"\"\"\n    new_out, new_err = StringIO(), StringIO()\n    old_out, old_err = sys.stdout, sys.stderr\n    try:\n        sys.stdout, sys.stderr = new_out, new_err\n        yield sys.stdout, sys.stderr\n    finally:\n        sys.stdout, sys.stderr = old_out, old_err\n\n\ndef timeout(sec: int, *args: Any, enable: bool = True, **kwargs: Any) -> Any:\n    \"\"\"Make a pytest mark for the `pytest-timeout` package.\n\n    Parameters\n    ----------\n    sec :\n        Timeout seconds.\n    enable :\n        Control whether timeout should be applied, used for debugging.\n\n    Returns\n    -------\n    pytest.mark.timeout\n    \"\"\"\n\n    if enable:\n        return pytest.mark.timeout(sec, *args, **kwargs)\n    return pytest.mark.timeout(None, *args, **kwargs)\n\n\ndef setup_rmm_pool(_: Any, pytestconfig: pytest.Config) -> None:\n    if pytestconfig.getoption(\"--use-rmm-pool\"):\n        if no_rmm()[\"condition\"]:\n            raise ImportError(\"The --use-rmm-pool option requires the RMM package\")\n        if no_dask_cuda()[\"condition\"]:\n            raise ImportError(\n                \"The --use-rmm-pool option requires the dask_cuda package\"\n            )\n        import rmm\n        from dask_cuda.utils import get_n_gpus\n\n        rmm.reinitialize(\n            pool_allocator=True,\n            initial_pool_size=1024 * 1024 * 1024,\n            devices=list(range(get_n_gpus())),\n        )\n\n\ndef demo_dir(path: str) -> str:\n    \"\"\"Look for the demo directory based on the test file name.\"\"\"\n    path = normpath(os.path.dirname(path))\n    while True:\n        subdirs = [f.path for f in os.scandir(path) if f.is_dir()]\n        subdirs = [os.path.basename(d) for d in subdirs]\n        if \"demo\" in subdirs:\n            return os.path.join(path, \"demo\")\n        new_path = normpath(os.path.join(path, os.path.pardir))\n        assert new_path != path\n        path = new_path\n\n\ndef normpath(path: str) -> str:\n    return os.path.normpath(os.path.abspath(path))\n\n\ndef data_dir(path: str) -> str:\n    return os.path.join(demo_dir(path), \"data\")\n\n\ndef load_agaricus(path: str) -> Tuple[xgb.DMatrix, xgb.DMatrix]:\n    dpath = data_dir(path)\n    dtrain = xgb.DMatrix(os.path.join(dpath, \"agaricus.txt.train?format=libsvm\"))\n    dtest = xgb.DMatrix(os.path.join(dpath, \"agaricus.txt.test?format=libsvm\"))\n    return dtrain, dtest\n\n\ndef project_root(path: str) -> str:\n    return normpath(os.path.join(demo_dir(path), os.path.pardir))\n\n\ndef run_with_rabit(\n    world_size: int, test_fn: Callable[..., Any], *args: Any, **kwargs: Any\n) -> None:\n    exception_queue: queue.Queue = queue.Queue()\n\n    def run_worker(rabit_env: Dict[str, Union[str, int]]) -> None:\n        try:\n            with xgb.collective.CommunicatorContext(**rabit_env):\n                test_fn(*args, **kwargs)\n        except Exception as e:  # pylint: disable=broad-except\n            exception_queue.put(e)\n\n    tracker = RabitTracker(host_ip=\"127.0.0.1\", n_workers=world_size)\n    tracker.start()\n\n    workers = []\n    for _ in range(world_size):\n        worker = threading.Thread(target=run_worker, args=(tracker.worker_args(),))\n        workers.append(worker)\n        worker.start()\n    for worker in workers:\n        worker.join()\n        assert exception_queue.empty(), f\"Worker failed: {exception_queue.get()}\"\n\n    tracker.wait_for()\n\n\ndef column_split_feature_names(\n    feature_names: List[Union[str, int]], world_size: int\n) -> List[str]:\n    \"\"\"Get the global list of feature names from the local feature names.\"\"\"\n    return [\n        f\"{rank}.{feature}\" for rank in range(world_size) for feature in feature_names\n    ]\n\n\ndef is_windows() -> bool:\n    \"\"\"Check if the current platform is Windows.\"\"\"\n    return platform.system() == \"Windows\"\n"
  },
  {
    "path": "python-package/xgboost/testing/basic_models.py",
    "content": "\"\"\"Tests for basic features of the Booster.\"\"\"\n\nfrom typing import Tuple\n\nimport numpy as np\n\nfrom xgboost import testing as tm\n\nfrom ..core import Booster, DMatrix\nfrom ..training import cv, train\nfrom .utils import Device\n\n\ndef run_custom_objective(  # pylint: disable=too-many-locals\n    tree_method: str,\n    device: Device,\n    dtrain: DMatrix,\n    dtest: DMatrix,\n) -> None:\n    \"\"\"Tests custom objective and metric functions.\"\"\"\n    param = {\n        \"max_depth\": 2,\n        \"eta\": 1,\n        \"objective\": \"reg:logistic\",\n        \"tree_method\": tree_method,\n        \"device\": device,\n    }\n    watchlist = [(dtest, \"eval\"), (dtrain, \"train\")]\n    num_round = 10\n\n    def evalerror(preds: np.ndarray, dtrain: DMatrix) -> Tuple[str, np.float64]:\n        return tm.eval_error_metric(preds, dtrain, rev_link=True)\n\n    # test custom_objective in training\n    bst = train(\n        param,\n        dtrain,\n        num_round,\n        evals=watchlist,\n        obj=tm.logregobj,\n        custom_metric=evalerror,\n    )\n    assert isinstance(bst, Booster)\n    preds = bst.predict(dtest)\n    labels = dtest.get_label()\n    err = sum(1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]) / float(\n        len(preds)\n    )\n    assert err < 0.1\n\n    # test custom_objective in cross-validation\n    cv(\n        param,\n        dtrain,\n        num_round,\n        nfold=5,\n        seed=0,\n        obj=tm.logregobj,\n        custom_metric=evalerror,\n    )\n\n    # test maximize parameter\n    def neg_evalerror(preds: np.ndarray, dtrain: DMatrix) -> Tuple[str, float]:\n        labels = dtrain.get_label()\n        preds = 1.0 / (1.0 + np.exp(-preds))\n        return \"error\", float(sum(labels == (preds > 0.0))) / len(labels)\n\n    bst2 = train(\n        param,\n        dtrain,\n        num_round,\n        evals=watchlist,\n        obj=tm.logregobj,\n        custom_metric=neg_evalerror,\n        maximize=True,\n    )\n    preds2 = bst2.predict(dtest)\n    err2 = sum(\n        1 for i in range(len(preds2)) if int(preds2[i] > 0.5) != labels[i]\n    ) / float(len(preds2))\n    assert err == err2\n"
  },
  {
    "path": "python-package/xgboost/testing/callbacks.py",
    "content": "# pylint: disable=too-many-locals\n\"\"\"Tests for callback functions.\"\"\"\n\nimport json\nfrom itertools import product\nfrom typing import Dict, List, Tuple\n\nfrom ..callback import LearningRateScheduler\nfrom ..core import Booster, DMatrix\nfrom ..training import cv, train\nfrom .utils import Device\n\n\ndef run_eta_decay(\n    tree_method: str, dtrain: DMatrix, dtest: DMatrix, device: Device\n) -> None:\n    \"\"\"Test learning rate scheduler, used by both CPU and GPU tests.\"\"\"\n    scheduler = LearningRateScheduler\n\n    watchlist = [(dtest, \"eval\"), (dtrain, \"train\")]\n    num_round = 4\n\n    # learning_rates as a list\n    # init eta with 0 to check whether learning_rates work\n    param = {\n        \"max_depth\": 2,\n        \"eta\": 0,\n        \"objective\": \"binary:logistic\",\n        \"eval_metric\": \"error\",\n        \"tree_method\": tree_method,\n        \"device\": device,\n    }\n    evals_result: Dict[str, Dict] = {}\n    bst = train(\n        param,\n        dtrain,\n        num_round,\n        evals=watchlist,\n        callbacks=[scheduler([0.8, 0.7, 0.6, 0.5])],\n        evals_result=evals_result,\n    )\n    eval_errors_0 = list(map(float, evals_result[\"eval\"][\"error\"]))\n    assert isinstance(bst, Booster)\n    # validation error should decrease, if eta > 0\n    assert eval_errors_0[0] > eval_errors_0[-1]\n\n    # init learning_rate with 0 to check whether learning_rates work\n    param = {\n        \"max_depth\": 2,\n        \"learning_rate\": 0,\n        \"objective\": \"binary:logistic\",\n        \"eval_metric\": \"error\",\n        \"tree_method\": tree_method,\n        \"device\": device,\n    }\n    evals_result = {}\n\n    bst = train(\n        param,\n        dtrain,\n        num_round,\n        evals=watchlist,\n        callbacks=[scheduler([0.8, 0.7, 0.6, 0.5])],\n        evals_result=evals_result,\n    )\n    eval_errors_1 = list(map(float, evals_result[\"eval\"][\"error\"]))\n    assert isinstance(bst, Booster)\n    # validation error should decrease, if learning_rate > 0\n    assert eval_errors_1[0] > eval_errors_1[-1]\n\n    # check if learning_rates override default value of eta/learning_rate\n    param = {\n        \"max_depth\": 2,\n        \"objective\": \"binary:logistic\",\n        \"eval_metric\": \"error\",\n        \"tree_method\": tree_method,\n        \"device\": device,\n    }\n    evals_result = {}\n    bst = train(\n        param,\n        dtrain,\n        num_round,\n        evals=watchlist,\n        callbacks=[scheduler([0, 0, 0, 0])],\n        evals_result=evals_result,\n    )\n    eval_errors_2 = list(map(float, evals_result[\"eval\"][\"error\"]))\n    assert isinstance(bst, Booster)\n    # validation error should not decrease, if eta/learning_rate = 0\n    assert eval_errors_2[0] == eval_errors_2[-1]\n\n    # learning_rates as a customized decay function\n    def eta_decay(ithround: int, num_boost_round: int = num_round) -> float:\n        return num_boost_round / (ithround + 1)\n\n    evals_result = {}\n    bst = train(\n        param,\n        dtrain,\n        num_round,\n        evals=watchlist,\n        callbacks=[scheduler(eta_decay)],\n        evals_result=evals_result,\n    )\n    eval_errors_3 = list(map(float, evals_result[\"eval\"][\"error\"]))\n\n    assert isinstance(bst, Booster)\n\n    assert eval_errors_3[0] == eval_errors_2[0]\n\n    for i in range(1, len(eval_errors_0)):\n        assert eval_errors_3[i] != eval_errors_2[i]\n\n    cv(param, dtrain, num_round, callbacks=[scheduler(eta_decay)])\n\n\ndef tree_methods_objs() -> List[Tuple[str, str]]:\n    \"\"\"Test parameters for the leaf output test.\"\"\"\n    return list(\n        product(\n            [\"approx\", \"hist\"],\n            [\n                \"binary:logistic\",\n                \"reg:absoluteerror\",\n                \"reg:quantileerror\",\n            ],\n        )\n    )\n\n\ndef run_eta_decay_leaf_output(\n    tree_method: str, objective: str, dtrain: DMatrix, dtest: DMatrix, device: Device\n) -> None:\n    \"\"\"check decay has effect on leaf output.\"\"\"\n    num_round = 4\n    scheduler = LearningRateScheduler\n\n    watchlist = [(dtest, \"eval\"), (dtrain, \"train\")]\n\n    param = {\n        \"max_depth\": 2,\n        \"objective\": objective,\n        \"eval_metric\": \"error\",\n        \"tree_method\": tree_method,\n        \"device\": device,\n    }\n    if objective == \"reg:quantileerror\":\n        param[\"quantile_alpha\"] = 0.3\n\n    def eta_decay_0(i: int) -> float:\n        return num_round / (i + 1)\n\n    bst0 = train(\n        param,\n        dtrain,\n        num_round,\n        evals=watchlist,\n        callbacks=[scheduler(eta_decay_0)],\n    )\n\n    def eta_decay_1(i: int) -> float:\n        if i > 1:\n            return 5.0\n        return num_round / (i + 1)\n\n    bst1 = train(\n        param,\n        dtrain,\n        num_round,\n        evals=watchlist,\n        callbacks=[scheduler(eta_decay_1)],\n    )\n    bst_json0 = bst0.save_raw(raw_format=\"json\")\n    bst_json1 = bst1.save_raw(raw_format=\"json\")\n\n    j0 = json.loads(bst_json0)\n    j1 = json.loads(bst_json1)\n\n    tree_2th_0 = j0[\"learner\"][\"gradient_booster\"][\"model\"][\"trees\"][2]\n    tree_2th_1 = j1[\"learner\"][\"gradient_booster\"][\"model\"][\"trees\"][2]\n    assert tree_2th_0[\"base_weights\"] == tree_2th_1[\"base_weights\"]\n    assert tree_2th_0[\"split_conditions\"] == tree_2th_1[\"split_conditions\"]\n\n    tree_3th_0 = j0[\"learner\"][\"gradient_booster\"][\"model\"][\"trees\"][3]\n    tree_3th_1 = j1[\"learner\"][\"gradient_booster\"][\"model\"][\"trees\"][3]\n    assert tree_3th_0[\"base_weights\"] != tree_3th_1[\"base_weights\"]\n    assert tree_3th_0[\"split_conditions\"] != tree_3th_1[\"split_conditions\"]\n"
  },
  {
    "path": "python-package/xgboost/testing/collective.py",
    "content": "\"\"\"Collective module related utilities.\"\"\"\n\nimport socket\n\n\ndef get_avail_port() -> int:\n    \"\"\"Returns a port that's available during the function call. It doesn't prevent the\n    port from being used after the function returns as we can't reserve the port. The\n    utility makes a test more likely to pass.\n\n    \"\"\"\n    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as server:\n        server.bind((\"127.0.0.1\", 0))\n        port = server.getsockname()[1]\n    return port\n"
  },
  {
    "path": "python-package/xgboost/testing/continuation.py",
    "content": "\"\"\"Tests for training continuation.\"\"\"\n\nimport json\nfrom typing import Any, Dict, TypeVar\n\nimport numpy as np\nimport pytest\nfrom hypothesis import strategies\n\nimport xgboost as xgb\n\n\n# pylint: disable=too-many-locals\ndef run_training_continuation_model_output(device: str, tree_method: str) -> None:\n    \"\"\"Run training continuation test.\"\"\"\n    datasets = pytest.importorskip(\"sklearn.datasets\")\n    n_samples = 64\n    n_features = 32\n    X, y = datasets.make_regression(n_samples, n_features, random_state=1)\n\n    dtrain = xgb.DMatrix(X, y)\n    params = {\n        \"tree_method\": tree_method,\n        \"max_depth\": \"2\",\n        \"gamma\": \"0.1\",\n        \"alpha\": \"0.01\",\n        \"device\": device,\n    }\n    bst_0 = xgb.train(params, dtrain, num_boost_round=64)\n    dump_0 = bst_0.get_dump(dump_format=\"json\")\n\n    bst_1 = xgb.train(params, dtrain, num_boost_round=32)\n    bst_1 = xgb.train(params, dtrain, num_boost_round=32, xgb_model=bst_1)\n    dump_1 = bst_1.get_dump(dump_format=\"json\")\n\n    T = TypeVar(\"T\", Dict[str, Any], float, str, int, list)\n\n    def recursive_compare(obj_0: T, obj_1: T) -> None:\n        if isinstance(obj_0, float):\n            assert np.isclose(obj_0, obj_1, atol=1e-6)\n        elif isinstance(obj_0, str):\n            assert obj_0 == obj_1\n        elif isinstance(obj_0, int):\n            assert obj_0 == obj_1\n        elif isinstance(obj_0, dict):\n            for i in range(len(obj_0.items())):\n                assert list(obj_0.keys())[i] == list(obj_1.keys())[i]\n                if list(obj_0.keys())[i] != \"missing\":\n                    recursive_compare(list(obj_0.values()), list(obj_1.values()))\n        else:\n            for i, lhs in enumerate(obj_0):\n                rhs = obj_1[i]\n                recursive_compare(lhs, rhs)\n\n    assert len(dump_0) == len(dump_1)\n\n    for i, lhs in enumerate(dump_0):\n        obj_0 = json.loads(lhs)\n        obj_1 = json.loads(dump_1[i])\n        recursive_compare(obj_0, obj_1)\n\n\n# pylint: disable=too-many-arguments\ndef run_training_continuation_determinism(\n    *,\n    device: str,\n    tree_method: str,\n    subsample: float,\n    sampling_method: str,\n    rate_drop: float,\n    colsample_bylevel: float,\n    num_class: int,\n    seed_per_iteration: bool,\n) -> None:\n    \"\"\"Check that 2-session training (4+4 iters) equals single-session (8 iters).\"\"\"\n    datasets = pytest.importorskip(\"sklearn.datasets\")\n\n    n_samples = 128\n    n_features = 16\n    total_rounds = 8\n    split_at = 4\n\n    if num_class > 1:\n        X, y = datasets.make_classification(\n            n_samples=n_samples,\n            n_features=n_features,\n            n_informative=6,\n            n_classes=num_class,\n            random_state=42,\n        )\n        objective = \"multi:softprob\"\n    else:\n        X, y = datasets.make_regression(\n            n_samples=n_samples, n_features=n_features, random_state=42\n        )\n        objective = \"reg:squarederror\"\n\n    dtrain = xgb.DMatrix(X, y)\n\n    params: Dict[str, Any] = {\n        \"device\": device,\n        \"tree_method\": tree_method,\n        \"max_depth\": 4,\n        \"objective\": objective,\n        \"subsample\": subsample,\n        \"sampling_method\": sampling_method,\n        \"rate_drop\": rate_drop,\n        \"colsample_bylevel\": colsample_bylevel,\n        \"seed_per_iteration\": seed_per_iteration,\n    }\n    if num_class > 1:\n        params[\"num_class\"] = num_class\n\n    bst_single = xgb.train(params, dtrain, num_boost_round=total_rounds)\n\n    bst_first = xgb.train(params, dtrain, num_boost_round=split_at)\n    bst_continued = xgb.train(\n        params, dtrain, num_boost_round=total_rounds - split_at, xgb_model=bst_first\n    )\n\n    config_single = json.loads(bst_single.save_config())\n    config_cont = json.loads(bst_continued.save_config())\n\n    rng_single = config_single[\"learner\"][\"generic_param\"][\"rng_state\"]\n    rng_cont = config_cont[\"learner\"][\"generic_param\"][\"rng_state\"]\n    assert rng_single == rng_cont, \"RNG states diverged between single and continued.\"\n\n    pred_single = bst_single.predict(dtrain)\n    pred_cont = bst_continued.predict(dtrain)\n    np.testing.assert_allclose(pred_single, pred_cont)\n\n\ndef make_determinism_strategy(tree_methods: list[str]) -> \"strategies.SearchStrategy\":\n    \"\"\"Hypothesis strategy for testing training continuation with sampling.\"\"\"\n    strategy = strategies.fixed_dictionaries(\n        {\n            \"subsample\": strategies.sampled_from([0.5, 1.0]),\n            \"sampling_method\": strategies.sampled_from([\"uniform\", \"gradient_based\"]),\n            \"rate_drop\": strategies.sampled_from([0.0, 0.5]),\n            \"colsample_bylevel\": strategies.sampled_from([0.5, 1.0]),\n            \"tree_method\": strategies.sampled_from(tree_methods),\n            \"num_class\": strategies.sampled_from([1, 3]),\n            \"seed_per_iteration\": strategies.booleans(),\n        }\n    ).filter(\n        lambda x: (\n            not (\n                x[\"sampling_method\"] == \"gradient_based\" and x[\"tree_method\"] != \"hist\"\n            )\n        )\n    )\n    return strategy\n"
  },
  {
    "path": "python-package/xgboost/testing/dask.py",
    "content": "\"\"\"Tests for dask shared by different test modules.\"\"\"\n\nfrom typing import Any, List, Literal, Tuple, Type, Union, cast, overload\n\nimport numpy as np\nimport pandas as pd\nfrom dask import array as da\nfrom dask import dataframe as dd\nfrom distributed import Client, get_worker\nfrom packaging.version import parse as parse_version\nfrom sklearn.datasets import make_classification\n\nimport xgboost as xgb\nimport xgboost.testing as tm\nfrom xgboost.compat import concat\nfrom xgboost.testing.updater import get_basescore\n\nfrom .. import dask as dxgb\nfrom .._typing import EvalsLog\nfrom ..dask import _get_rabit_args\nfrom ..dask.utils import _DASK_VERSION\nfrom .data import make_batches\nfrom .data import make_categorical as make_cat_local\nfrom .ordinal import make_recoded\nfrom .utils import Device, assert_allclose\n\n\ndef check_init_estimation_clf(\n    tree_method: str, device: Literal[\"cpu\", \"cuda\"], client: Client\n) -> None:\n    \"\"\"Test init estimation for classsifier.\"\"\"\n    X, y = make_classification(n_samples=4096 * 2, n_features=32, random_state=1994)\n    clf = xgb.XGBClassifier(\n        n_estimators=1, max_depth=1, tree_method=tree_method, device=device\n    )\n    clf.fit(X, y)\n    base_score = get_basescore(clf)\n\n    dx = da.from_array(X).rechunk(chunks=(32, None))\n    dy = da.from_array(y).rechunk(chunks=(32,))\n    dclf = dxgb.DaskXGBClassifier(\n        n_estimators=1,\n        max_depth=1,\n        tree_method=tree_method,\n        device=device,\n    )\n    dclf.client = client\n    dclf.fit(dx, dy)\n    dbase_score = get_basescore(dclf)\n    np.testing.assert_allclose(base_score, dbase_score)\n\n\ndef check_init_estimation_reg(\n    tree_method: str, device: Literal[\"cpu\", \"cuda\"], client: Client\n) -> None:\n    \"\"\"Test init estimation for regressor.\"\"\"\n    from sklearn.datasets import make_regression\n\n    # pylint: disable=unbalanced-tuple-unpacking\n    X, y = make_regression(n_samples=4096 * 2, n_features=32, random_state=1994)\n    reg = xgb.XGBRegressor(\n        n_estimators=1, max_depth=1, tree_method=tree_method, device=device\n    )\n    reg.fit(X, y)\n    base_score = get_basescore(reg)\n\n    dx = da.from_array(X).rechunk(chunks=(32, None))\n    dy = da.from_array(y).rechunk(chunks=(32,))\n    dreg = dxgb.DaskXGBRegressor(\n        n_estimators=1, max_depth=1, tree_method=tree_method, device=device\n    )\n    dreg.client = client\n    dreg.fit(dx, dy)\n    dbase_score = get_basescore(dreg)\n    np.testing.assert_allclose(base_score, dbase_score)\n\n\ndef check_init_estimation(\n    tree_method: str, device: Literal[\"cpu\", \"cuda\"], client: Client\n) -> None:\n    \"\"\"Test init estimation.\"\"\"\n    check_init_estimation_reg(tree_method, device, client)\n    check_init_estimation_clf(tree_method, device, client)\n\n\ndef check_uneven_nan(\n    client: Client, tree_method: str, device: Literal[\"cpu\", \"cuda\"], n_workers: int\n) -> None:\n    \"\"\"Issue #9271, not every worker has missing value.\"\"\"\n    assert n_workers >= 2\n\n    with client.as_current():\n        clf = dxgb.DaskXGBClassifier(tree_method=tree_method, device=device)\n        X = pd.DataFrame({\"a\": range(10000), \"b\": range(10000, 0, -1)})\n        y = pd.Series([*[0] * 5000, *[1] * 5000])\n\n        X.loc[:3000:1000, \"a\"] = np.nan\n\n        client.wait_for_workers(n_workers=n_workers)\n\n        clf.fit(\n            dd.from_pandas(X, npartitions=n_workers),\n            dd.from_pandas(y, npartitions=n_workers),\n        )\n\n\ndef check_external_memory(  # pylint: disable=too-many-locals\n    worker_id: int,\n    n_workers: int,\n    device: str,\n    comm_args: dict,\n    is_qdm: bool,\n) -> None:\n    \"\"\"Basic checks for distributed external memory.\"\"\"\n    n_samples_per_batch = 32\n    n_features = 4\n    n_batches = 16\n    use_cupy = device != \"cpu\"\n\n    n_threads = get_worker().state.nthreads\n    with xgb.collective.CommunicatorContext(dmlc_communicator=\"rabit\", **comm_args):\n        it = tm.IteratorForTest(\n            *make_batches(\n                n_samples_per_batch,\n                n_features,\n                n_batches,\n                use_cupy=use_cupy,\n                random_state=worker_id,\n            ),\n            cache=\"cache\",\n        )\n        if is_qdm:\n            Xy: xgb.DMatrix = xgb.ExtMemQuantileDMatrix(it, nthread=n_threads)\n        else:\n            Xy = xgb.DMatrix(it, nthread=n_threads)\n        results: EvalsLog = {}\n        xgb.train(\n            {\"tree_method\": \"hist\", \"nthread\": n_threads, \"device\": device},\n            Xy,\n            evals=[(Xy, \"Train\")],\n            num_boost_round=32,\n            evals_result=results,\n        )\n        assert tm.non_increasing(cast(List[float], results[\"Train\"][\"rmse\"]))\n\n    lx, ly, lw = [], [], []\n    for i in range(n_workers):\n        x, y, w = make_batches(\n            n_samples_per_batch,\n            n_features,\n            n_batches,\n            use_cupy=use_cupy,\n            random_state=i,\n        )\n        lx.extend(x)\n        ly.extend(y)\n        lw.extend(w)\n\n    X = concat(lx)\n    yconcat = concat(ly)\n    wconcat = concat(lw)\n    if is_qdm:\n        Xy = xgb.QuantileDMatrix(X, yconcat, weight=wconcat, nthread=n_threads)\n    else:\n        Xy = xgb.DMatrix(X, yconcat, weight=wconcat, nthread=n_threads)\n\n    results_local: EvalsLog = {}\n    xgb.train(\n        {\"tree_method\": \"hist\", \"nthread\": n_threads, \"device\": device},\n        Xy,\n        evals=[(Xy, \"Train\")],\n        num_boost_round=32,\n        evals_result=results_local,\n    )\n    np.testing.assert_allclose(\n        results[\"Train\"][\"rmse\"], results_local[\"Train\"][\"rmse\"], rtol=1e-4\n    )\n\n\ndef get_rabit_args(client: Client, n_workers: int) -> Any:\n    \"\"\"Get RABIT collective communicator arguments for tests.\"\"\"\n    return client.sync(_get_rabit_args, client, n_workers)\n\n\ndef get_client_workers(client: Client) -> List[str]:\n    \"Get workers from a dask client.\"\n    kwargs = {\"n_workers\": -1} if _DASK_VERSION() >= parse_version(\"2025.4.0\") else {}\n    workers = client.scheduler_info(**kwargs)[\"workers\"]\n    return list(workers.keys())\n\n\ndef make_ltr(  # pylint: disable=too-many-locals,too-many-arguments\n    client: Client,\n    n_samples: int,\n    n_features: int,\n    *,\n    n_query_groups: int,\n    max_rel: int,\n    device: str,\n) -> Tuple[dd.DataFrame, dd.Series, dd.Series]:\n    \"\"\"Synthetic dataset for learning to rank.\"\"\"\n    workers = get_client_workers(client)\n    n_samples_per_worker = n_samples // len(workers)\n\n    if device == \"cpu\":\n        from pandas import DataFrame as DF\n    else:\n        from cudf import DataFrame as DF\n\n    def make(n: int, seed: int) -> pd.DataFrame:\n        rng = np.random.default_rng(seed)\n        X, y = make_classification(\n            n,\n            n_features,\n            n_informative=n_features,\n            n_redundant=0,\n            n_classes=max_rel,\n            random_state=seed,\n        )\n        qid = rng.integers(size=(n,), low=0, high=n_query_groups)\n        df = DF(X, columns=[f\"f{i}\" for i in range(n_features)])\n        df[\"qid\"] = qid\n        df[\"y\"] = y\n        return df\n\n    futures = []\n    i = 0\n    for k in range(0, n_samples, n_samples_per_worker):\n        fut = client.submit(\n            make, n=n_samples_per_worker, seed=k, workers=[workers[i % len(workers)]]\n        )\n        futures.append(fut)\n        i += 1\n\n    last = n_samples - (n_samples_per_worker * len(workers))\n    if last != 0:\n        fut = client.submit(make, n=last, seed=n_samples_per_worker * len(workers))\n        futures.append(fut)\n\n    meta = make(1, 0)\n    df = dd.from_delayed(futures, meta=meta)\n    assert isinstance(df, dd.DataFrame)\n    return df.drop([\"qid\", \"y\"], axis=1), df.y, df.qid\n\n\ndef check_no_group_split(client: Client, device: str) -> None:\n    \"\"\"Test for the allow_group_split parameter.\"\"\"\n    X_tr, q_tr, y_tr = make_ltr(\n        client, 4096, 128, n_query_groups=4, max_rel=5, device=device\n    )\n    X_va, q_va, y_va = make_ltr(\n        client, 1024, 128, n_query_groups=4, max_rel=5, device=device\n    )\n\n    ltr = dxgb.DaskXGBRanker(\n        allow_group_split=False,\n        n_estimators=36,\n        device=device,\n        objective=\"rank:pairwise\",\n    )\n    ltr.fit(\n        X_tr,\n        y_tr,\n        qid=q_tr,\n        eval_set=[(X_tr, y_tr), (X_va, y_va)],\n        eval_qid=[q_tr, q_va],\n        verbose=True,\n    )\n\n    assert ltr.n_features_in_ == 128\n    assert X_tr.shape[1] == ltr.n_features_in_  # no change\n    ndcg = ltr.evals_result()[\"validation_0\"][\"ndcg@32\"]\n    assert tm.non_decreasing(ndcg[:16], tolerance=1e-2), ndcg\n    np.testing.assert_allclose(ndcg[-1], 1.0, rtol=1e-2)\n\n\n@overload\ndef make_categorical(  # pylint: disable=too-many-locals, too-many-arguments\n    client: Client,\n    n_samples: int,\n    n_features: int,\n    n_categories: int,\n    *,\n    onehot: bool = ...,\n    n_targets: Literal[1] = ...,\n    cat_dtype: np.typing.DTypeLike = ...,\n) -> Tuple[dd.DataFrame, dd.Series]: ...\n\n\n@overload\ndef make_categorical(  # pylint: disable=too-many-locals, too-many-arguments\n    client: Client,\n    n_samples: int,\n    n_features: int,\n    n_categories: int,\n    *,\n    onehot: bool = ...,\n    n_targets: int,\n    cat_dtype: np.typing.DTypeLike = ...,\n) -> Tuple[dd.DataFrame, Union[dd.Series, dd.DataFrame]]: ...\n\n\ndef make_categorical(  # pylint: disable=too-many-locals, too-many-arguments\n    client: Client,\n    n_samples: int,\n    n_features: int,\n    n_categories: int,\n    *,\n    onehot: bool = False,\n    n_targets: int = 1,\n    cat_dtype: np.typing.DTypeLike = np.int64,\n) -> Tuple[dd.DataFrame, Union[dd.Series, dd.DataFrame]]:\n    \"\"\"Synthesize categorical data with dask.\"\"\"\n    workers = get_client_workers(client)\n    n_workers = len(workers)\n    dfs = []\n\n    label_cols = (\n        [f\"label_{i}\" for i in range(n_targets)] if n_targets > 1 else [\"label\"]\n    )\n\n    def pack(**kwargs: Any) -> dd.DataFrame:\n        X, y = make_cat_local(**kwargs)\n        if y.ndim == 2:\n            for i in range(y.shape[1]):\n                X[f\"label_{i}\"] = y[:, i]\n        else:\n            X[\"label\"] = y\n        return X\n\n    meta = pack(\n        n_samples=1,\n        n_features=n_features,\n        n_categories=n_categories,\n        onehot=False,\n        n_targets=n_targets,\n        cat_dtype=cat_dtype,\n    )\n\n    for i, worker in enumerate(workers):\n        l_n_samples = min(\n            n_samples // n_workers, n_samples - i * (n_samples // n_workers)\n        )\n        # make sure there's at least one sample for testing empty DMatrix\n        if n_samples == 1 and i == 0:\n            l_n_samples = 1\n        future = client.submit(\n            pack,\n            n_samples=l_n_samples,\n            n_features=n_features,\n            n_categories=n_categories,\n            n_targets=n_targets,\n            cat_dtype=cat_dtype,\n            onehot=False,\n            workers=[worker],\n        )\n        dfs.append(future)\n\n    df: dd.DataFrame = cast(dd.DataFrame, dd.from_delayed(dfs, meta=meta))\n    y = df[label_cols]\n    if n_targets == 1:\n        y = y[label_cols[0]]\n    X = df[df.columns.difference(label_cols)]\n\n    if onehot:\n        return dd.get_dummies(X), y\n    return X, y\n\n\n# pylint: disable=too-many-locals\ndef run_recode(client: Client, device: Device) -> None:\n    \"\"\"Run re-coding test with the Dask interface.\"\"\"\n\n    def create_dmatrix(\n        DMatrixT: Type[dxgb.DaskDMatrix], *args: Any, **kwargs: Any\n    ) -> dxgb.DaskDMatrix:\n        if DMatrixT is dxgb.DaskQuantileDMatrix:\n            ref = kwargs.pop(\"ref\", None)\n            return DMatrixT(*args, ref=ref, **kwargs)\n\n        kwargs.pop(\"ref\", None)\n        return DMatrixT(*args, **kwargs)\n\n    def run(DMatrixT: Type[dxgb.DaskDMatrix]) -> None:\n        enc, reenc, y, _, _ = make_recoded(device, n_features=96)\n        to = get_client_workers(client)\n\n        denc, dreenc, dy = (\n            dd.from_pandas(enc, npartitions=8).persist(workers=to),\n            dd.from_pandas(reenc, npartitions=8).persist(workers=to),\n            da.from_array(y, chunks=(y.shape[0] // 8,)).persist(workers=to),\n        )\n\n        Xy = create_dmatrix(DMatrixT, client, denc, dy, enable_categorical=True)\n        Xy_valid = create_dmatrix(\n            DMatrixT, client, dreenc, dy, enable_categorical=True, ref=Xy\n        )\n        # Base model\n        results = dxgb.train(\n            client, {\"device\": device}, Xy, evals=[(Xy_valid, \"Valid\")]\n        )\n\n        # Training continuation\n        Xy = create_dmatrix(DMatrixT, client, denc, dy, enable_categorical=True)\n        Xy_valid = create_dmatrix(\n            DMatrixT, client, dreenc, dy, enable_categorical=True, ref=Xy\n        )\n        results_1 = dxgb.train(\n            client,\n            {\"device\": device},\n            Xy,\n            evals=[(Xy_valid, \"Valid\")],\n            xgb_model=results[\"booster\"],\n        )\n\n        # Reversed training continuation\n        Xy = create_dmatrix(DMatrixT, client, dreenc, dy, enable_categorical=True)\n        Xy_valid = create_dmatrix(\n            DMatrixT, client, denc, dy, enable_categorical=True, ref=Xy\n        )\n        results_2 = dxgb.train(\n            client,\n            {\"device\": device},\n            Xy,\n            evals=[(Xy_valid, \"Valid\")],\n            xgb_model=results[\"booster\"],\n        )\n        np.testing.assert_allclose(\n            results_1[\"history\"][\"Valid\"][\"rmse\"], results_2[\"history\"][\"Valid\"][\"rmse\"]\n        )\n\n        predt_0 = dxgb.inplace_predict(client, results, denc).compute()\n        predt_1 = dxgb.inplace_predict(client, results, dreenc).compute()\n        assert_allclose(device, predt_0, predt_1)\n\n        predt_0 = dxgb.predict(client, results, Xy).compute()\n        predt_1 = dxgb.predict(client, results, Xy_valid).compute()\n        assert_allclose(device, predt_0, predt_1)\n\n    for DMatrixT in [dxgb.DaskDMatrix, dxgb.DaskQuantileDMatrix]:\n        run(DMatrixT)\n"
  },
  {
    "path": "python-package/xgboost/testing/data.py",
    "content": "# pylint: disable=too-many-lines\n\"\"\"Utilities for data generation.\"\"\"\n\nimport gc\nimport multiprocessing\nimport os\nimport string\nimport zipfile\nfrom concurrent.futures import ThreadPoolExecutor\nfrom dataclasses import dataclass\nfrom typing import (\n    TYPE_CHECKING,\n    Any,\n    Callable,\n    Dict,\n    Generator,\n    List,\n    NamedTuple,\n    Optional,\n    Sequence,\n    Set,\n    Tuple,\n    Type,\n    Union,\n)\nfrom urllib import request\n\nimport numpy as np\nimport pytest\nfrom numpy import typing as npt\nfrom numpy.random import Generator as RNG\nfrom scipy import sparse\n\nfrom ..compat import concat\nfrom ..core import DataIter, DMatrix, QuantileDMatrix\nfrom ..data import is_pd_cat_dtype, pandas_pyarrow_mapper\nfrom ..sklearn import ArrayLike, XGBRanker\nfrom ..training import train as train_fn\n\nif TYPE_CHECKING:\n    from pandas import DataFrame as DataFrameT\nelse:\n    DataFrameT = Any\n\njoblib = pytest.importorskip(\"joblib\")\nmemory = joblib.Memory(\"./cachedir\", verbose=0)\n\n\ndef np_dtypes(\n    n_samples: int, n_features: int\n) -> Generator[Union[Tuple[np.ndarray, np.ndarray], Tuple[list, list]], None, None]:\n    \"\"\"Enumerate all supported dtypes from numpy.\"\"\"\n    pd = pytest.importorskip(\"pandas\")\n\n    rng = np.random.RandomState(1994)\n    # Integer and float.\n    orig = rng.randint(low=0, high=127, size=n_samples * n_features).reshape(\n        n_samples, n_features\n    )\n    dtypes = [\n        np.int32,\n        np.int64,\n        np.byte,\n        np.short,\n        np.intc,\n        np.int_,\n        np.longlong,\n        np.uint32,\n        np.uint64,\n        np.ubyte,\n        np.ushort,\n        np.uintc,\n        np.uint,\n        np.ulonglong,\n        np.float16,\n        np.float32,\n        np.float64,\n        np.half,\n        np.single,\n        np.double,\n    ]\n    for dtype in dtypes:\n        X = np.array(orig, dtype=dtype)\n        yield orig, X\n        yield orig.tolist(), X.tolist()\n\n    for dtype in dtypes:\n        X = np.array(orig, dtype=dtype)\n        df_orig = pd.DataFrame(orig)\n        df = pd.DataFrame(X)\n        yield df_orig, df\n\n    # Boolean\n    orig = rng.binomial(1, 0.5, size=n_samples * n_features).reshape(\n        n_samples, n_features\n    )\n    for dtype1 in [np.bool_, bool]:\n        X = np.array(orig, dtype=dtype1)\n        yield orig, X\n\n    for dtype2 in [np.bool_, bool]:\n        X = np.array(orig, dtype=dtype2)\n        df_orig = pd.DataFrame(orig)\n        df = pd.DataFrame(X)\n        yield df_orig, df\n\n\ndef pd_dtypes() -> Generator:\n    \"\"\"Enumerate all supported pandas extension types.\"\"\"\n    pd = pytest.importorskip(\"pandas\")\n\n    # Integer\n    dtypes = [\n        pd.UInt8Dtype(),\n        pd.UInt16Dtype(),\n        pd.UInt32Dtype(),\n        pd.UInt64Dtype(),\n        pd.Int8Dtype(),\n        pd.Int16Dtype(),\n        pd.Int32Dtype(),\n        pd.Int64Dtype(),\n    ]\n\n    Null: Union[float, None, Any] = np.nan\n    orig = pd.DataFrame(\n        {\"f0\": [1, 2, Null, 3], \"f1\": [4, 3, Null, 1]}, dtype=np.float32\n    )\n    for Null in (np.nan, None, pd.NA):\n        for dtype in dtypes:\n            df = pd.DataFrame(\n                {\"f0\": [1, 2, Null, 3], \"f1\": [4, 3, Null, 1]}, dtype=dtype\n            )\n            yield orig, df\n\n    # Float\n    Null = np.nan\n    dtypes = [pd.Float32Dtype(), pd.Float64Dtype()]\n    orig = pd.DataFrame(\n        {\"f0\": [1.0, 2.0, Null, 3.0], \"f1\": [3.0, 2.0, Null, 1.0]}, dtype=np.float32\n    )\n    for Null in (np.nan, None, pd.NA):\n        for dtype in dtypes:\n            df = pd.DataFrame(\n                {\"f0\": [1.0, 2.0, Null, 3.0], \"f1\": [3.0, 2.0, Null, 1.0]}, dtype=dtype\n            )\n            yield orig, df\n            ser_orig = orig[\"f0\"]\n            ser = df[\"f0\"]\n            assert isinstance(ser, pd.Series)\n            assert isinstance(ser_orig, pd.Series)\n            yield ser_orig, ser\n\n    # Categorical\n    orig = orig.astype(\"category\")\n    for c in orig.columns:\n        orig[c] = orig[c].cat.rename_categories(int)\n    for Null in (np.nan, None, pd.NA):\n        df = pd.DataFrame(\n            {\"f0\": [1, 2, Null, 3], \"f1\": [3, 2, Null, 1]},\n            dtype=pd.CategoricalDtype(),\n        )\n        yield orig, df\n\n    # Boolean\n    for Null in [None, pd.NA]:\n        data = {\"f0\": [True, False, Null, True], \"f1\": [False, True, Null, True]}\n        # pd.NA is not convertible to bool.\n        orig = pd.DataFrame(data, dtype=np.bool_ if Null is None else pd.BooleanDtype())\n        df = pd.DataFrame(data, dtype=pd.BooleanDtype())\n        yield orig, df\n\n\ndef pd_arrow_dtypes() -> Generator:\n    \"\"\"Pandas DataFrame with pyarrow backed type.\"\"\"\n    pd = pytest.importorskip(\"pandas\")\n    pa = pytest.importorskip(\"pyarrow\")\n\n    # Integer\n    dtypes = pandas_pyarrow_mapper\n    # Create a dictionary-backed dataframe, enable this when the roundtrip is\n    # implemented in pandas/pyarrow\n    #\n    # category = pd.ArrowDtype(pa.dictionary(pa.int32(), pa.int32(), ordered=True))\n    # df = pd.DataFrame({\"f0\": [0, 2, Null, 3], \"f1\": [4, 3, Null, 1]}, dtype=category)\n\n    # Error:\n    # >>> df.astype(\"category\")\n    #   Function 'dictionary_encode' has no kernel matching input types\n    #   (array[dictionary<values=int32, indices=int32, ordered=0>])\n\n    # Error:\n    # pd_cat_df = pd.DataFrame(\n    #     {\"f0\": [0, 2, Null, 3], \"f1\": [4, 3, Null, 1]},\n    #     dtype=\"category\"\n    # )\n    # pa_catcodes = (\n    #     df[\"f1\"].array.__arrow_array__().combine_chunks().to_pandas().cat.codes\n    # )\n    # pd_catcodes = pd_cat_df[\"f1\"].cat.codes\n    # assert pd_catcodes.equals(pa_catcodes)\n\n    for Null in (None, pd.NA, 0):\n        for dtype in dtypes:\n            if dtype.startswith(\"float16\") or dtype.startswith(\"bool\"):\n                continue\n            # Use np.nan is a baseline\n            orig_null = Null if not pd.isna(Null) and Null == 0 else np.nan\n            orig = pd.DataFrame(\n                {\"f0\": [1, 2, orig_null, 3], \"f1\": [4, 3, orig_null, 1]},\n                dtype=np.float32,\n            )\n\n            df = pd.DataFrame(\n                {\"f0\": [1, 2, Null, 3], \"f1\": [4, 3, Null, 1]}, dtype=dtype\n            )\n            yield orig, df\n\n    # If Null is `False`, then there's no missing value.\n    for Null in (pd.NA, False):\n        orig = pd.DataFrame(\n            {\"f0\": [True, False, Null, True], \"f1\": [False, True, Null, True]},\n            dtype=pd.BooleanDtype(),\n        )\n        df = pd.DataFrame(\n            {\"f0\": [True, False, Null, True], \"f1\": [False, True, Null, True]},\n            dtype=pd.ArrowDtype(pa.bool_()),\n        )\n        yield orig, df\n\n\ndef check_inf(rng: RNG) -> None:\n    \"\"\"Validate there's no inf in X.\"\"\"\n    X = rng.random(size=32).reshape(8, 4)\n    y = rng.random(size=8)\n    X[5, 2] = np.inf\n\n    with pytest.raises(ValueError, match=\"Input data contains `inf`\"):\n        QuantileDMatrix(X, y)\n\n    with pytest.raises(ValueError, match=\"Input data contains `inf`\"):\n        DMatrix(X, y)\n\n\n@memory.cache\ndef get_california_housing() -> Tuple[np.ndarray, np.ndarray]:\n    \"\"\"Synthesize a dataset similar to the sklearn California housing dataset.\n\n    The real one can be obtained via:\n\n    .. code-block::\n\n        import sklearn.datasets\n\n        X, y = sklearn.datasets.fetch_california_housing(return_X_y=True)\n\n    \"\"\"\n    n_samples = 20640\n    rng = np.random.default_rng(2025)\n\n    pd = pytest.importorskip(\"pandas\")\n\n    def mixture_2comp(\n        means: List[float], sigmas: List[float], weights: List[float]\n    ) -> np.ndarray:\n        l0 = rng.normal(\n            size=(int(n_samples * weights[0])), loc=means[0], scale=sigmas[0]\n        )\n        l1 = rng.normal(size=(n_samples - l0.shape[0]), loc=means[1], scale=sigmas[1])\n        return np.concatenate([l0, l1], axis=0)\n\n    def norm(mean: float, std: float) -> np.ndarray:\n        return rng.normal(loc=mean, scale=std, size=(n_samples,))\n\n    df = pd.DataFrame(\n        {\n            \"Longitude\": mixture_2comp(\n                [-118.0703597, -121.85682825],\n                [0.7897320650373969, 0.7248398629412008],\n                [0.60402556, 0.39597444],\n            ),\n            \"Latitude\": mixture_2comp(\n                [37.84266317, 33.86030848],\n                [1.0643911549736087, 0.5049274656834589],\n                [0.44485062, 0.55514938],\n            ),\n            \"MedInc\": norm(mean=3.8706710029069766, std=1.8997756945748738),\n            \"HouseAge\": norm(mean=28.639486434108527, std=12.585252725724606),\n            \"AveRooms\": norm(mean=5.428999742190376, std=2.474113202333516),\n            \"AveBedrms\": norm(mean=1.096675149606208, std=0.47389937625774475),\n            \"Population\": norm(mean=1425.4767441860465, std=1132.434687757615),\n            \"AveOccup\": norm(mean=3.0706551594363742, std=10.385797959128219),\n            \"MedHouseVal\": norm(mean=2.068558169089147, std=1.1539282040412253),\n        }\n    )\n    X = df[df.columns.difference([\"MedHouseVal\"])].to_numpy()\n    y = df[\"MedHouseVal\"].to_numpy()\n    return X, y\n\n\n@memory.cache\ndef get_digits() -> Tuple[np.ndarray, np.ndarray]:\n    \"\"\"Fetch the digits dataset from sklearn.\"\"\"\n    datasets = pytest.importorskip(\"sklearn.datasets\")\n    data = datasets.load_digits()\n    return data.data, data.target\n\n\n@memory.cache\ndef get_cancer() -> Tuple[np.ndarray, np.ndarray]:\n    \"\"\"Fetch the breast cancer dataset from sklearn.\"\"\"\n    datasets = pytest.importorskip(\"sklearn.datasets\")\n    return datasets.load_breast_cancer(return_X_y=True)\n\n\n@memory.cache\ndef get_sparse() -> Tuple[np.ndarray, np.ndarray]:\n    \"\"\"Generate a sparse dataset.\"\"\"\n    datasets = pytest.importorskip(\"sklearn.datasets\")\n    rng = np.random.RandomState(199)\n    n = 2000\n    sparsity = 0.75\n    X, y = datasets.make_regression(n, random_state=rng)\n    flag = rng.binomial(1, sparsity, X.shape)\n    for i in range(X.shape[0]):\n        for j in range(X.shape[1]):\n            if flag[i, j]:\n                X[i, j] = np.nan\n    return X, y\n\n\n# pylint: disable=too-many-statements\n@memory.cache\ndef get_ames_housing() -> Tuple[DataFrameT, np.ndarray]:\n    \"\"\"Get a synthetic version of the amse housing dataset.\n\n    The real one can be obtained via:\n\n    .. code-block::\n\n        from sklearn import datasets\n\n        datasets.fetch_openml(data_id=42165, as_frame=True, return_X_y=True)\n\n    Number of samples: 1460\n    Number of features: 20\n    Number of categorical features: 10\n    Number of numerical features: 10\n    \"\"\"\n    if TYPE_CHECKING:\n        import pandas as pd\n    else:\n        pd = pytest.importorskip(\"pandas\")\n\n    rng = np.random.default_rng(1994)\n    n_samples = 1460\n    df = pd.DataFrame()\n\n    def synth_cat(\n        name_proba: Dict[Union[str, float], float], density: float\n    ) -> pd.Series:\n        n_nulls = int(n_samples * (1 - density))\n        has_nan = np.abs(1.0 - density) > 1e-6 and n_nulls > 0\n        if has_nan:\n            sparsity = 1.0 - density\n            name_proba[np.nan] = sparsity\n\n        keys = list(name_proba.keys())\n        p = list(name_proba.values())\n        p[-1] += 1.0 - np.sum(p)  # Fix floating point error\n        x = rng.choice(keys, size=n_samples, p=p)\n\n        series = pd.Series(\n            x,\n            dtype=pd.CategoricalDtype(\n                # not NA\n                filter(lambda x: isinstance(x, str), keys)\n            ),\n        )\n        return series\n\n    df[\"BldgType\"] = synth_cat(\n        {\n            \"1Fam\": 0.835616,\n            \"2fmCon\": 0.078082,\n            \"Duplex\": 0.035616,\n            \"Twnhs\": 0.029452,\n            \"TwnhsE\": 0.021233,\n        },\n        1.0,\n    )\n    df[\"GarageFinish\"] = synth_cat(\n        {\"Unf\": 0.414384, \"RFn\": 0.289041, \"Fin\": 0.241096}, 0.94452\n    )\n    df[\"LotConfig\"] = synth_cat(\n        {\n            \"Corner\": 0.180137,\n            \"CulDSac\": 0.064384,\n            \"FR2\": 0.032192,\n            \"FR3\": 0.002740,\n        },\n        1.0,\n    )\n    df[\"Functional\"] = synth_cat(\n        {\n            \"Typ\": 0.931506,\n            \"Min2\": 0.023287,\n            \"Min1\": 0.021232,\n            \"Mod\": 0.010273,\n            \"Maj1\": 0.009589,\n            \"Maj2\": 0.003424,\n            \"Sev\": 0.000684,\n        },\n        1.0,\n    )\n    df[\"MasVnrType\"] = synth_cat(\n        {\n            \"None\": 0.591780,\n            \"BrkFace\": 0.304794,\n            \"Stone\": 0.087671,\n            \"BrkCmn\": 0.010273,\n        },\n        0.99452,\n    )\n    df[\"HouseStyle\"] = synth_cat(\n        {\n            \"1Story\": 0.497260,\n            \"2Story\": 0.304794,\n            \"1.5Fin\": 0.105479,\n            \"SLvl\": 0.044520,\n            \"SFoyer\": 0.025342,\n            \"1.5Unf\": 0.009589,\n            \"2.5Unf\": 0.007534,\n            \"2.5Fin\": 0.005479,\n        },\n        1.0,\n    )\n    df[\"FireplaceQu\"] = synth_cat(\n        {\n            \"Gd\": 0.260273,\n            \"TA\": 0.214383,\n            \"Fa\": 0.022602,\n            \"Ex\": 0.016438,\n            \"Po\": 0.013698,\n        },\n        0.527397,\n    )\n    df[\"ExterCond\"] = synth_cat(\n        {\n            \"TA\": 0.878082,\n            \"Gd\": 0.1,\n            \"Fa\": 0.019178,\n            \"Ex\": 0.002054,\n            \"Po\": 0.000684,\n        },\n        1.0,\n    )\n    df[\"ExterQual\"] = synth_cat(\n        {\n            \"TA\": 0.620547,\n            \"Gd\": 0.334246,\n            \"Ex\": 0.035616,\n            \"Fa\": 0.009589,\n        },\n        1.0,\n    )\n    df[\"PoolQC\"] = synth_cat(\n        {\n            \"Gd\": 0.002054,\n            \"Ex\": 0.001369,\n            \"Fa\": 0.001369,\n        },\n        0.004794,\n    )\n\n    # We focus on the cateogircal values here, for numerical features, simple normal\n    # distribution is used, which doesn't match the original data.\n    def synth_num(loc: float, std: float, density: float) -> pd.Series:\n        x = rng.normal(loc=loc, scale=std, size=n_samples)\n        n_nulls = int(n_samples * (1 - density))\n        if np.abs(1.0 - density) > 1e-6 and n_nulls > 0:\n            null_idx = rng.choice(n_samples, size=n_nulls, replace=False)\n            x[null_idx] = np.nan\n        return pd.Series(x, dtype=np.float64)\n\n    df[\"3SsnPorch\"] = synth_num(3.4095890410958902, 29.31733055678188, 1.0)\n    df[\"Fireplaces\"] = synth_num(0.613013698630137, 0.6446663863122295, 1.0)\n    df[\"BsmtHalfBath\"] = synth_num(0.057534246575342465, 0.23875264627921178, 1.0)\n    df[\"HalfBath\"] = synth_num(0.38287671232876713, 0.5028853810928914, 1.0)\n    df[\"GarageCars\"] = synth_num(1.7671232876712328, 0.7473150101111095, 1.0)\n    df[\"TotRmsAbvGrd\"] = synth_num(6.517808219178082, 1.6253932905840505, 1.0)\n    df[\"BsmtFinSF1\"] = synth_num(443.6397260273973, 456.0980908409277, 1.0)\n    df[\"BsmtFinSF2\"] = synth_num(46.54931506849315, 161.31927280654173, 1.0)\n    df[\"GrLivArea\"] = synth_num(1515.463698630137, 525.4803834232025, 1.0)\n    df[\"ScreenPorch\"] = synth_num(15.060958904109588, 55.757415281874174, 1.0)\n\n    columns = list(df.columns)\n    rng.shuffle(columns)\n    df = df[columns]\n\n    # linear interaction for testing purposes.\n    y = np.zeros(shape=(n_samples,))\n    for c in df.columns:\n        if isinstance(df[c].dtype, pd.CategoricalDtype):\n            y += df[c].cat.codes.astype(np.float64)\n        else:\n            y += df[c].values\n\n    # Shift and scale to match the original y.\n    y *= 79442.50288288662 / y.std()\n    y += 180921.19589041095 - y.mean()\n\n    return df, y\n\n\n@memory.cache\ndef get_mq2008(\n    dpath: str,\n) -> Tuple[\n    sparse.csr_matrix,\n    np.ndarray,\n    np.ndarray,\n    sparse.csr_matrix,\n    np.ndarray,\n    np.ndarray,\n    sparse.csr_matrix,\n    np.ndarray,\n    np.ndarray,\n]:\n    \"\"\"Fetch the mq2008 dataset.\"\"\"\n    datasets = pytest.importorskip(\"sklearn.datasets\")\n    src = \"https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip\"\n    target = os.path.join(dpath, \"MQ2008.zip\")\n    if not os.path.exists(target):\n        request.urlretrieve(url=src, filename=target)\n\n    with zipfile.ZipFile(target, \"r\") as f:\n        f.extractall(path=dpath)\n\n    (\n        x_train,\n        y_train,\n        qid_train,\n        x_test,\n        y_test,\n        qid_test,\n        x_valid,\n        y_valid,\n        qid_valid,\n    ) = datasets.load_svmlight_files(\n        (\n            os.path.join(dpath, \"MQ2008/Fold1/train.txt\"),\n            os.path.join(dpath, \"MQ2008/Fold1/test.txt\"),\n            os.path.join(dpath, \"MQ2008/Fold1/vali.txt\"),\n        ),\n        query_id=True,\n        zero_based=False,\n    )\n\n    return (\n        x_train,\n        y_train,\n        qid_train,\n        x_test,\n        y_test,\n        qid_test,\n        x_valid,\n        y_valid,\n        qid_valid,\n    )\n\n\ndef make_batches(  # pylint: disable=too-many-arguments,too-many-locals\n    n_samples_per_batch: int,\n    n_features: int,\n    n_batches: int,\n    use_cupy: bool = False,\n    *,\n    vary_size: bool = False,\n    random_state: int = 1994,\n) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray]]:\n    \"\"\"Make batches of dense data.\"\"\"\n    X = []\n    y = []\n    w = []\n    if use_cupy:\n        import cupy\n\n        rng = cupy.random.RandomState(np.uint64(random_state))\n    else:\n        rng = np.random.RandomState(random_state)\n    for i in range(n_batches):\n        n_samples = n_samples_per_batch + i * 10 if vary_size else n_samples_per_batch\n        _X = rng.randn(n_samples, n_features)\n        _y = rng.randn(n_samples)\n        _w = rng.uniform(low=0, high=1, size=n_samples)\n        X.append(_X)\n        y.append(_y)\n        w.append(_w)\n    return X, y, w\n\n\nRelData = Tuple[sparse.csr_matrix, npt.NDArray[np.int32], npt.NDArray[np.int32]]\n\n\n@dataclass\nclass ClickFold:\n    \"\"\"A structure containing information about generated user-click data.\"\"\"\n\n    X: sparse.csr_matrix\n    y: npt.NDArray[np.int32]\n    qid: npt.NDArray[np.int32]\n    score: npt.NDArray[np.float32]\n    click: npt.NDArray[np.int32]\n    pos: npt.NDArray[np.int64]\n\n\nclass RelDataCV(NamedTuple):\n    \"\"\"Simple data struct for holding a train-test split of a learning to rank dataset.\"\"\"\n\n    train: RelData\n    test: RelData\n    max_rel: int\n\n    def is_binary(self) -> bool:\n        \"\"\"Whether the label consists of binary relevance degree.\"\"\"\n        return self.max_rel == 1\n\n\nclass PBM:\n    \"\"\"Simulate click data with position bias model. There are other models available in\n    `ULTRA <https://github.com/ULTR-Community/ULTRA.git>`_ like the cascading model.\n\n    References\n    ----------\n    Unbiased LambdaMART: An Unbiased Pairwise Learning-to-Rank Algorithm\n\n    \"\"\"\n\n    def __init__(self, eta: float) -> None:\n        # click probability for each relevance degree. (from 0 to 4)\n        self.click_prob = np.array([0.1, 0.16, 0.28, 0.52, 1.0])\n        exam_prob = np.array(\n            [0.68, 0.61, 0.48, 0.34, 0.28, 0.20, 0.11, 0.10, 0.08, 0.06]\n        )\n        # Observation probability, encoding positional bias for each position\n        self.exam_prob = np.power(exam_prob, eta)\n\n    def sample_clicks_for_query(\n        self, labels: npt.NDArray[np.int32], position: npt.NDArray[np.int64]\n    ) -> npt.NDArray[np.int32]:\n        \"\"\"Sample clicks for one query based on input relevance degree and position.\n\n        Parameters\n        ----------\n\n        labels :\n            relevance_degree\n\n        \"\"\"\n        labels = np.array(labels, copy=True)\n\n        click_prob = np.zeros(labels.shape)\n        # minimum\n        labels[labels < 0] = 0\n        # maximum\n        labels[labels >= len(self.click_prob)] = -1\n        click_prob = self.click_prob[labels]\n\n        exam_prob = np.zeros(labels.shape)\n        assert position.size == labels.size\n        ranks = np.array(position, copy=True)\n        # maximum\n        ranks[ranks >= self.exam_prob.size] = -1\n        exam_prob = self.exam_prob[ranks]\n\n        rng = np.random.default_rng(1994)\n        prob = rng.random(size=labels.shape[0], dtype=np.float32)\n\n        clicks: npt.NDArray[np.int32] = np.zeros(labels.shape, dtype=np.int32)\n        clicks[prob < exam_prob * click_prob] = 1\n        return clicks\n\n\ndef rlencode(x: npt.NDArray[np.int32]) -> Tuple[npt.NDArray, npt.NDArray, npt.NDArray]:\n    \"\"\"Run length encoding using numpy, modified from:\n    https://gist.github.com/nvictus/66627b580c13068589957d6ab0919e66\n\n    \"\"\"\n    x = np.asarray(x)\n    n = x.size\n    starts = np.r_[0, np.flatnonzero(~np.isclose(x[1:], x[:-1], equal_nan=True)) + 1]\n    lengths = np.diff(np.r_[starts, n])\n    values = x[starts]\n    indptr = np.append(starts, np.array([x.size]))\n\n    return indptr, lengths, values\n\n\ndef init_rank_score(\n    X: sparse.csr_matrix,\n    y: npt.NDArray[np.int32],\n    qid: npt.NDArray[np.int32],\n    sample_rate: float = 0.1,\n) -> npt.NDArray[np.float32]:\n    \"\"\"We use XGBoost to generate the initial score instead of SVMRank for\n    simplicity. Sample rate is set to 0.1 by default so that we can test with small\n    datasets.\n\n    \"\"\"\n    # random sample\n    rng = np.random.default_rng(1994)\n    n_samples = int(X.shape[0] * sample_rate)\n    index: npt.NDArray = np.arange(0, X.shape[0], dtype=np.uint64)\n    rng.shuffle(index)\n    index = index[:n_samples]\n\n    X_train = X[index]\n    y_train = y[index]\n    qid_train = qid[index]\n\n    # Sort training data based on query id, required by XGBoost.\n    sorted_idx = np.argsort(qid_train)\n    X_train = X_train[sorted_idx]\n    y_train = y_train[sorted_idx]\n    qid_train = qid_train[sorted_idx]\n\n    ltr = XGBRanker(objective=\"rank:ndcg\", tree_method=\"hist\")\n    ltr.fit(X_train, y_train, qid=qid_train)\n\n    # Use the original order of the data.\n    scores = ltr.predict(X)\n    return scores\n\n\ndef simulate_one_fold(\n    fold: Tuple[sparse.csr_matrix, npt.NDArray[np.int32], npt.NDArray[np.int32]],\n    scores_fold: npt.NDArray[np.float32],\n) -> ClickFold:\n    \"\"\"Simulate clicks for one fold.\"\"\"\n    X_fold, y_fold, qid_fold = fold\n    assert qid_fold.dtype == np.int32\n\n    qids = np.unique(qid_fold)\n\n    position = np.empty((y_fold.size,), dtype=np.int64)\n    clicks = np.empty((y_fold.size,), dtype=np.int32)\n    pbm = PBM(eta=1.0)\n\n    # Avoid grouping by qid as we want to preserve the original data partition by\n    # the dataset authors.\n    for q in qids:\n        qid_mask = q == qid_fold\n        qid_mask = qid_mask.reshape(qid_mask.shape[0])\n        query_scores = scores_fold[qid_mask]\n        # Initial rank list, scores sorted to decreasing order\n        query_position = np.argsort(query_scores)[::-1]\n        position[qid_mask] = query_position\n        # get labels\n        relevance_degrees = y_fold[qid_mask]\n        query_clicks = pbm.sample_clicks_for_query(relevance_degrees, query_position)\n        clicks[qid_mask] = query_clicks\n\n    assert X_fold.shape[0] == qid_fold.shape[0], (X_fold.shape, qid_fold.shape)\n    assert X_fold.shape[0] == clicks.shape[0], (X_fold.shape, clicks.shape)\n\n    return ClickFold(X_fold, y_fold, qid_fold, scores_fold, clicks, position)\n\n\n# pylint: disable=too-many-locals\ndef simulate_clicks(cv_data: RelDataCV) -> Tuple[ClickFold, Optional[ClickFold]]:\n    \"\"\"Simulate click data using position biased model (PBM).\"\"\"\n    X, y, qid = list(zip(cv_data.train, cv_data.test))\n\n    # ptr to train-test split\n    indptr = np.array([0] + [v.shape[0] for v in X])\n    indptr = np.cumsum(indptr)\n\n    assert len(indptr) == 2 + 1  # train, test\n    X_full = sparse.vstack(X)\n    y_full = np.concatenate(y)\n    qid_full = np.concatenate(qid)\n\n    # Obtain initial relevance score for click simulation\n    scores_full = init_rank_score(X_full, y_full, qid_full)\n    # partition it back to (train, test) tuple\n    scores = [scores_full[indptr[i - 1] : indptr[i]] for i in range(1, indptr.size)]\n\n    X_lst, y_lst, q_lst, s_lst, c_lst, p_lst = [], [], [], [], [], []\n    for i in range(indptr.size - 1):\n        fold = simulate_one_fold((X[i], y[i], qid[i]), scores[i])\n        X_lst.append(fold.X)\n        y_lst.append(fold.y)\n        q_lst.append(fold.qid)\n        s_lst.append(fold.score)\n        c_lst.append(fold.click)\n        p_lst.append(fold.pos)\n\n    scores_check_1 = [s_lst[i] for i in range(indptr.size - 1)]\n    for i in range(2):\n        assert (scores_check_1[i] == scores[i]).all()\n\n    if len(X_lst) == 1:\n        train = ClickFold(X_lst[0], y_lst[0], q_lst[0], s_lst[0], c_lst[0], p_lst[0])\n        test = None\n    else:\n        train, test = (\n            ClickFold(X_lst[i], y_lst[i], q_lst[i], s_lst[i], c_lst[i], p_lst[i])\n            for i in range(len(X_lst))\n        )\n    return train, test\n\n\ndef sort_ltr_samples(\n    X: sparse.csr_matrix,\n    y: npt.NDArray[np.int32],\n    qid: npt.NDArray[np.int32],\n    clicks: npt.NDArray[np.int32],\n    pos: npt.NDArray[np.int64],\n) -> Tuple[\n    sparse.csr_matrix,\n    npt.NDArray[np.int32],\n    npt.NDArray[np.int32],\n    npt.NDArray[np.int32],\n]:\n    \"\"\"Sort data based on query index and position.\"\"\"\n    sorted_idx = np.argsort(qid)\n    X = X[sorted_idx]\n    clicks = clicks[sorted_idx]\n    qid = qid[sorted_idx]\n    pos = pos[sorted_idx]\n\n    indptr, _, _ = rlencode(qid)\n\n    for i in range(1, indptr.size):\n        beg = indptr[i - 1]\n        end = indptr[i]\n\n        assert beg < end, (beg, end)\n        assert np.unique(qid[beg:end]).size == 1, (beg, end)\n\n        query_pos = pos[beg:end]\n        assert query_pos.min() == 0, query_pos.min()\n        assert query_pos.max() >= query_pos.size - 1, (\n            query_pos.max(),\n            query_pos.size,\n            i,\n            np.unique(qid[beg:end]),\n        )\n        sorted_idx = np.argsort(query_pos)\n\n        X[beg:end] = X[beg:end][sorted_idx]\n        clicks[beg:end] = clicks[beg:end][sorted_idx]\n        y[beg:end] = y[beg:end][sorted_idx]\n        # not necessary\n        qid[beg:end] = qid[beg:end][sorted_idx]\n\n    data = X, clicks, y, qid\n\n    return data\n\n\ndef run_base_margin_info(DType: Callable, DMatrixT: Type[DMatrix], device: str) -> None:\n    \"\"\"Run tests for base margin.\"\"\"\n    rng = np.random.default_rng()\n    X = DType(rng.normal(0, 1.0, size=100).astype(np.float32).reshape(50, 2))\n    if hasattr(X, \"iloc\"):\n        y = X.iloc[:, 0]\n    else:\n        y = X[:, 0]\n    base_margin = X\n    # no error at set\n    Xy = DMatrixT(X, y, base_margin=base_margin)\n    # Error at train, caused by check in predictor.\n    with pytest.raises(ValueError, match=r\".*base_margin.*\"):\n        train_fn({\"tree_method\": \"hist\", \"device\": device}, Xy)\n\n    if not hasattr(X, \"iloc\"):\n        # column major matrix\n        got = DType(Xy.get_base_margin().reshape(50, 2))\n        assert (got == base_margin).all()\n\n        assert base_margin.T.flags.c_contiguous is False\n        assert base_margin.T.flags.f_contiguous is True\n        Xy.set_info(base_margin=base_margin.T)\n        got = DType(Xy.get_base_margin().reshape(2, 50))\n        assert (got == base_margin.T).all()\n\n        # Row vs col vec.\n        base_margin = y\n        Xy.set_base_margin(base_margin)\n        bm_col = Xy.get_base_margin()\n        Xy.set_base_margin(base_margin.reshape(1, base_margin.size))\n        bm_row = Xy.get_base_margin()\n        assert (bm_row == bm_col).all()\n\n        # type\n        base_margin = base_margin.astype(np.float64)\n        Xy.set_base_margin(base_margin)\n        bm_f64 = Xy.get_base_margin()\n        assert (bm_f64 == bm_col).all()\n\n        # too many dimensions\n        base_margin = X.reshape(2, 5, 2, 5)\n        with pytest.raises(ValueError, match=r\".*base_margin.*\"):\n            Xy.set_base_margin(base_margin)\n\n\n# pylint: disable=too-many-locals\n@memory.cache\ndef make_sparse_regression(\n    n_samples: int, n_features: int, sparsity: float, as_dense: bool\n) -> Tuple[Union[sparse.csr_matrix], np.ndarray]:\n    \"\"\"Make sparse matrix.\n\n    Parameters\n    ----------\n\n    as_dense:\n\n      Return the matrix as np.ndarray with missing values filled by NaN\n\n    \"\"\"\n    if not hasattr(np.random, \"default_rng\"):\n        rng = np.random.RandomState(1994)\n        X = sparse.random(\n            m=n_samples,\n            n=n_features,\n            density=1.0 - sparsity,\n            random_state=rng,\n            format=\"csr\",\n        )\n        y = rng.normal(loc=0.0, scale=1.0, size=n_samples)\n        return X, y\n\n    # Use multi-thread to speed up the generation, convenient if you use this function\n    # for benchmarking.\n    n_threads = min(multiprocessing.cpu_count(), n_features)\n\n    def random_csc(t_id: int) -> sparse.csc_matrix:\n        rng = np.random.default_rng(1994 * t_id)\n        thread_size = n_features // n_threads\n        if t_id == n_threads - 1:\n            n_features_tloc = n_features - t_id * thread_size\n        else:\n            n_features_tloc = thread_size\n\n        X = sparse.random(\n            m=n_samples,\n            n=n_features_tloc,\n            density=1.0 - sparsity,\n            random_state=rng,\n        ).tocsc()\n        y = np.zeros((n_samples, 1))\n\n        for i in range(X.shape[1]):\n            size = X.indptr[i + 1] - X.indptr[i]\n            if size != 0:\n                y += X[:, i].toarray() * rng.random((n_samples, 1)) * 0.2\n\n        return X, y\n\n    futures = []\n    with ThreadPoolExecutor(max_workers=n_threads) as executor:\n        for i in range(n_threads):\n            futures.append(executor.submit(random_csc, i))\n\n    X_results = []\n    y_results = []\n    for f in futures:\n        X, y = f.result()\n        X_results.append(X)\n        y_results.append(y)\n\n    assert len(y_results) == n_threads\n\n    csr: sparse.csr_matrix = sparse.hstack(X_results, format=\"csr\")\n    y = np.asarray(y_results)\n    y = y.reshape((y.shape[0], y.shape[1])).T\n    y = np.sum(y, axis=1)\n\n    assert csr.shape[0] == n_samples\n    assert csr.shape[1] == n_features\n    assert y.shape[0] == n_samples\n\n    if as_dense:\n        arr = csr.toarray()\n        assert arr.shape[0] == n_samples\n        assert arr.shape[1] == n_features\n        arr[arr == 0] = np.nan\n        return arr, y\n\n    return csr, y\n\n\ndef unique_random_strings(n_strings: int, seed: int) -> List[str]:\n    \"\"\"Generate n unique strings.\"\"\"\n    name_len = 8  # hardcoded, should be more than enough\n    unique_strings: Set[str] = set()\n    rng = np.random.default_rng(seed)\n\n    while len(unique_strings) < n_strings:\n        random_str = \"\".join(\n            rng.choice(list(string.ascii_letters), size=name_len, replace=True)\n        )\n        unique_strings.add(random_str)\n\n    return list(unique_strings)\n\n\n# pylint: disable=too-many-arguments,too-many-locals,too-many-branches\ndef make_categorical(\n    n_samples: int,\n    n_features: int,\n    n_categories: int,\n    *,\n    onehot: bool,\n    n_targets: int = 1,\n    sparsity: float = 0.0,\n    cat_ratio: float = 1.0,\n    shuffle: bool = False,\n    random_state: int = 1994,\n    cat_dtype: np.typing.DTypeLike = np.int64,\n    device: str = \"cpu\",\n) -> Tuple[ArrayLike, np.ndarray]:\n    \"\"\"Generate categorical features for test.\n\n    Parameters\n    ----------\n    n_categories:\n        Number of categories for categorical features.\n    onehot:\n        Should we apply one-hot encoding to the data?\n    n_targets:\n        Number of targets. When greater than 1, the label is a 2D array with shape\n        ``(n_samples, n_targets)``.\n    sparsity:\n        The ratio of the amount of missing values over the number of all entries.\n    cat_ratio:\n        The ratio of features that are categorical.\n    shuffle:\n        Whether we should shuffle the columns.\n    cat_dtype :\n        The dtype for categorical features, might be string or numeric.\n\n    Returns\n    -------\n    X, y\n    \"\"\"\n    pd = pytest.importorskip(\"pandas\")\n\n    # Use different rngs for column and rows. We can change the `n_samples` without\n    # changing the column type.\n    rng = np.random.RandomState(random_state)\n    row_rng = np.random.RandomState(random_state + 1)\n\n    df = pd.DataFrame()\n    for i in range(n_features):\n        choice = rng.binomial(1, cat_ratio, size=1)[0]\n        if choice == 1:\n            if np.issubdtype(cat_dtype, np.str_):\n                # we rely on using the feature index as the seed to generate the same\n                # categories for multiple calls to `make_categorical`.\n                categories = np.array(unique_random_strings(n_categories, i))\n                c = row_rng.choice(categories, size=n_samples, replace=True)\n            else:\n                categories = np.arange(0, n_categories)\n                c = row_rng.randint(low=0, high=n_categories, size=n_samples)\n\n            df[str(i)] = pd.Series(c, dtype=\"category\")\n            df[str(i)] = df[str(i)].cat.set_categories(categories)\n        else:\n            num = row_rng.randint(low=0, high=n_categories, size=n_samples)\n            df[str(i)] = pd.Series(num, dtype=num.dtype)\n\n    target_rng = np.random.RandomState(random_state + 2)\n    label: np.ndarray = np.ones((n_samples, n_targets))\n    for col in df.columns:\n        if isinstance(df[col].dtype, pd.CategoricalDtype):\n            codes = df[col].cat.codes.values\n            effects = target_rng.normal(size=(len(df[col].cat.categories), n_targets))\n            label += effects[codes]\n        else:\n            w = target_rng.uniform(low=0.5, high=1.5, size=n_targets)\n            label += np.outer(df[col].values, w)\n    if n_targets == 1:\n        label = label.squeeze(axis=1)\n\n    if sparsity > 0.0:\n        for i in range(n_features):\n            index = row_rng.randint(\n                low=0, high=n_samples - 1, size=int(n_samples * sparsity)\n            )\n            df.iloc[index, i] = np.nan\n            if is_pd_cat_dtype(df.dtypes.iloc[i]):\n                assert n_categories == np.unique(df.dtypes.iloc[i].categories).size\n\n    assert df.shape[1] == n_features\n    if onehot:\n        df = pd.get_dummies(df)\n\n    if shuffle:\n        columns = list(df.columns)\n        row_rng.shuffle(columns)\n        df = df[columns]\n\n    if device != \"cpu\":\n        assert device in [\"cuda\", \"gpu\"]\n        import cudf\n        import cupy\n\n        df = cudf.from_pandas(df)\n        label = cupy.array(label)\n    return df, label\n\n\nclass IteratorForTest(DataIter):\n    \"\"\"Iterator for testing streaming DMatrix. (external memory, quantile)\"\"\"\n\n    def __init__(  # pylint: disable=too-many-arguments\n        self,\n        X: Sequence,\n        y: Sequence,\n        w: Optional[Sequence],\n        *,\n        cache: Optional[str],\n        on_host: bool = False,\n        min_cache_page_bytes: Optional[int] = None,\n    ) -> None:\n        assert len(X) == len(y)\n        self.X = X\n        self.y = y\n        self.w = w\n        self.it = 0\n        super().__init__(\n            cache_prefix=cache,\n            on_host=on_host,\n            min_cache_page_bytes=min_cache_page_bytes,\n        )\n\n    def next(self, input_data: Callable) -> bool:\n        if self.it == len(self.X):\n            return False\n\n        with pytest.raises(TypeError, match=\"Keyword argument\"):\n            input_data(self.X[self.it], self.y[self.it], None)\n\n        # Use copy to make sure the iterator doesn't hold a reference to the data.\n        input_data(\n            data=self.X[self.it].copy(),\n            label=self.y[self.it].copy(),\n            weight=self.w[self.it].copy() if self.w else None,\n        )\n        gc.collect()  # clear up the copy, see if XGBoost access freed memory.\n        self.it += 1\n        return True\n\n    def reset(self) -> None:\n        self.it = 0\n\n    def as_arrays(\n        self,\n    ) -> Tuple[Union[np.ndarray, sparse.csr_matrix], ArrayLike, Optional[ArrayLike]]:\n        \"\"\"Return concatenated arrays.\"\"\"\n        X = concat(self.X)\n        y = concat(self.y)\n        if self.w:\n            w = np.concatenate(self.w, axis=0)\n        else:\n            w = None\n        return X, y, w\n"
  },
  {
    "path": "python-package/xgboost/testing/data_iter.py",
    "content": "\"\"\"Tests related to the `DataIter` interface.\"\"\"\n\nfrom typing import Callable, Optional\n\nimport numpy as np\n\nfrom xgboost import testing as tm\n\nfrom ..compat import import_cupy\nfrom ..core import DataIter, DMatrix, ExtMemQuantileDMatrix, QuantileDMatrix\nfrom .utils import predictor_equal\n\n\ndef run_mixed_sparsity(device: str) -> None:\n    \"\"\"Check QDM with mixed batches.\"\"\"\n    X_0, y_0, _ = tm.make_regression(128, 16, False)\n    if device.startswith(\"cuda\"):\n        X_1, y_1 = tm.make_sparse_regression(256, 16, 0.1, True)\n    else:\n        X_1, y_1 = tm.make_sparse_regression(256, 16, 0.1, False)\n    X_2, y_2 = tm.make_sparse_regression(512, 16, 0.9, True)\n    X = [X_0, X_1, X_2]\n    y = [y_0, y_1, y_2]\n\n    if device.startswith(\"cuda\"):\n        cp = import_cupy()\n\n        X = [cp.array(batch) for batch in X]\n\n    it = tm.IteratorForTest(X, y, None, cache=None, on_host=False)\n    Xy_0 = QuantileDMatrix(it)\n\n    X_1, y_1 = tm.make_sparse_regression(256, 16, 0.1, True)\n    X = [X_0, X_1, X_2]\n    y = [y_0, y_1, y_2]\n    X_arr = np.concatenate(X, axis=0)\n    y_arr = np.concatenate(y, axis=0)\n    Xy_1 = QuantileDMatrix(X_arr, y_arr)\n\n    assert predictor_equal(Xy_0, Xy_1)\n\n\ndef check_invalid_cat_batches(device: str) -> None:\n    \"\"\"Check error message for inconsistent feature types.\"\"\"\n\n    class _InvalidCatIter(DataIter):\n        def __init__(self) -> None:\n            super().__init__(cache_prefix=None)\n            self._it = 0\n\n        def next(self, input_data: Callable) -> bool:\n            if self._it == 2:\n                return False\n            X, y = tm.make_categorical(\n                64,\n                12,\n                4,\n                onehot=False,\n                sparsity=0.5,\n                cat_ratio=1.0 if self._it == 0 else 0.5,\n            )\n            if device == \"cuda\":\n                import cudf\n                import cupy\n\n                X = cudf.DataFrame(X)\n                y = cupy.array(y)\n\n            input_data(data=X, label=y)\n            self._it += 1\n            return True\n\n        def reset(self) -> None:\n            self._it = 0\n\n    it = _InvalidCatIter()\n    import pytest\n\n    with pytest.raises(ValueError, match=\"Inconsistent number of categories between\"):\n        ExtMemQuantileDMatrix(it, enable_categorical=True)\n\n    with pytest.raises(ValueError, match=\"Inconsistent number of categories between\"):\n        QuantileDMatrix(it, enable_categorical=True)\n\n    with pytest.raises(ValueError, match=\"Inconsistent feature types\"):\n        DMatrix(it, enable_categorical=True)\n\n\ndef check_uneven_sizes(device: str) -> None:\n    \"\"\"Tests for having irregular data shapes.\"\"\"\n    batches = [\n        tm.make_regression(n_samples, 16, use_cupy=device == \"cuda\")\n        for n_samples in [512, 256, 1024]\n    ]\n    unzip = list(zip(*batches))\n    it = tm.IteratorForTest(unzip[0], unzip[1], None, cache=\"cache\", on_host=True)\n\n    Xy = DMatrix(it)\n    assert Xy.num_col() == 16\n    assert Xy.num_row() == sum(x.shape[0] for x in unzip[0])\n\n    Xy = ExtMemQuantileDMatrix(it)\n    assert Xy.num_col() == 16\n    assert Xy.num_row() == sum(x.shape[0] for x in unzip[0])\n\n\nclass CatIter(DataIter):  # pylint: disable=too-many-instance-attributes\n    \"\"\"An iterator for testing categorical features.\"\"\"\n\n    def __init__(  # pylint: disable=too-many-arguments,too-many-locals\n        self,\n        n_samples_per_batch: int,\n        n_features: int,\n        *,\n        n_batches: int,\n        n_cats: int,\n        sparsity: float,\n        cat_ratio: float,\n        onehot: bool,\n        device: str,\n        cache: Optional[str],\n        n_targets: int = 1,\n    ) -> None:\n        super().__init__(cache_prefix=cache)\n        self.n_batches = n_batches\n        self.device = device\n\n        n_samples = n_samples_per_batch * n_batches\n        cat, y = tm.make_categorical(\n            n_samples,\n            n_features,\n            n_categories=n_cats,\n            onehot=onehot,\n            cat_ratio=cat_ratio,\n            sparsity=sparsity,\n            n_targets=n_targets,\n        )\n        xs, ys = [], []\n\n        prev = 0\n        for _ in range(n_batches):\n            n = min(n_samples_per_batch, n_samples - prev)\n            X = cat.iloc[prev : prev + n, :]\n            xs.append(X)\n            ys.append(y[prev : prev + n])\n            prev += n_samples_per_batch\n\n        self.xs = xs\n        self.ys = ys\n\n        self.x = cat\n        self.y = y\n\n        self._it = 0\n\n    def xy(self) -> tuple:\n        \"\"\"Return the concatenated data.\"\"\"\n        return self.x, self.y\n\n    def next(self, input_data: Callable) -> bool:\n        if self._it == self.n_batches:\n            return False\n\n        X, y = self.xs[self._it], self.ys[self._it]\n        if self.device == \"cuda\":\n            import cudf\n            import cupy\n\n            X = cudf.DataFrame(X)\n            y = cupy.array(y)\n        input_data(data=X, label=y)\n        self._it += 1\n        return True\n\n    def reset(self) -> None:\n        self._it = 0\n"
  },
  {
    "path": "python-package/xgboost/testing/federated.py",
    "content": "# pylint: disable=unbalanced-tuple-unpacking, too-many-locals\n\"\"\"Tests for federated learning.\"\"\"\n\nimport multiprocessing\nimport os\nimport subprocess\nimport tempfile\nimport time\nfrom typing import List, cast\n\nfrom sklearn.datasets import dump_svmlight_file, load_svmlight_file\nfrom sklearn.model_selection import train_test_split\n\nimport xgboost as xgb\nimport xgboost.federated\nfrom xgboost import testing as tm\n\nfrom .._typing import EvalsLog\nfrom ..collective import _Args as CollArgs\n\nSERVER_KEY = \"server-key.pem\"\nSERVER_CERT = \"server-cert.pem\"\nCLIENT_KEY = \"client-key.pem\"\nCLIENT_CERT = \"client-cert.pem\"\n\n\ndef run_server(port: int, world_size: int, with_ssl: bool) -> None:\n    \"\"\"Run federated server for test.\"\"\"\n    if with_ssl:\n        xgboost.federated.run_federated_server(\n            world_size,\n            port,\n            server_key_path=SERVER_KEY,\n            server_cert_path=SERVER_CERT,\n            client_cert_path=CLIENT_CERT,\n        )\n    else:\n        xgboost.federated.run_federated_server(world_size, port)\n\n\ndef run_worker(\n    port: int, world_size: int, rank: int, with_ssl: bool, device: str\n) -> None:\n    \"\"\"Run federated client worker for test.\"\"\"\n    comm_env: CollArgs = {\n        \"dmlc_communicator\": \"federated\",\n        \"federated_server_address\": f\"localhost:{port}\",\n        \"federated_world_size\": world_size,\n        \"federated_rank\": rank,\n    }\n    if with_ssl:\n        comm_env[\"federated_server_cert_path\"] = SERVER_CERT\n        comm_env[\"federated_client_key_path\"] = CLIENT_KEY\n        comm_env[\"federated_client_cert_path\"] = CLIENT_CERT\n\n    cpu_count = os.cpu_count()\n    assert cpu_count is not None\n    n_threads = cpu_count // world_size\n\n    # Always call this before using distributed module\n    with xgb.collective.CommunicatorContext(**comm_env):\n        # Load file, file will not be sharded in federated mode.\n        X, y = load_svmlight_file(f\"agaricus.txt-{rank}.train\")\n        dtrain = xgb.DMatrix(X, y)\n        X, y = load_svmlight_file(f\"agaricus.txt-{rank}.test\")\n        dtest = xgb.DMatrix(X, y)\n\n        # Specify parameters via map, definition are same as c++ version\n        param = {\n            \"max_depth\": 2,\n            \"eta\": 1,\n            \"objective\": \"binary:logistic\",\n            \"nthread\": n_threads,\n            \"tree_method\": \"hist\",\n            \"device\": device,\n        }\n\n        # Specify validations set to watch performance\n        watchlist = [(dtest, \"eval\"), (dtrain, \"train\")]\n        num_round = 20\n\n        # Run training, all the features in training API is available.\n        results: EvalsLog = {}\n        bst = xgb.train(\n            param,\n            dtrain,\n            num_round,\n            evals=watchlist,\n            early_stopping_rounds=2,\n            evals_result=results,\n        )\n        assert tm.non_increasing(cast(List[float], results[\"train\"][\"logloss\"]))\n        assert tm.non_increasing(cast(List[float], results[\"eval\"][\"logloss\"]))\n\n        # save the model, only ask process 0 to save the model.\n        if xgb.collective.get_rank() == 0:\n            with tempfile.TemporaryDirectory() as tmpdir:\n                bst.save_model(os.path.join(tmpdir, \"model.json\"))\n            xgb.collective.communicator_print(\"Finished training\\n\")\n\n\ndef run_federated(world_size: int, with_ssl: bool, use_gpu: bool) -> None:\n    \"\"\"Launcher for clients and the server.\"\"\"\n    port = 9091\n\n    server = multiprocessing.Process(\n        target=run_server, args=(port, world_size, with_ssl)\n    )\n    server.start()\n    time.sleep(1)\n    if not server.is_alive():\n        raise ValueError(\"Error starting Federated Learning server\")\n\n    workers = []\n    for rank in range(world_size):\n        device = f\"cuda:{rank}\" if use_gpu else \"cpu\"\n        worker = multiprocessing.Process(\n            target=run_worker, args=(port, world_size, rank, with_ssl, device)\n        )\n        workers.append(worker)\n        worker.start()\n    for worker in workers:\n        worker.join()\n    server.terminate()\n\n\ndef run_federated_learning(with_ssl: bool, use_gpu: bool, test_path: str) -> None:\n    \"\"\"Run federated learning tests.\"\"\"\n    n_workers = 2\n\n    if with_ssl:\n        command = \"openssl req -x509 -newkey rsa:2048 -days 7 -nodes -keyout {part}-key.pem -out {part}-cert.pem -subj /C=US/CN=localhost\"  # pylint: disable=line-too-long\n        server_key = command.format(part=\"server\").split()\n        subprocess.check_call(server_key)\n        client_key = command.format(part=\"client\").split()\n        subprocess.check_call(client_key)\n\n    train_path = os.path.join(tm.data_dir(test_path), \"agaricus.txt.train\")\n    test_path = os.path.join(tm.data_dir(test_path), \"agaricus.txt.test\")\n\n    X_train, y_train = load_svmlight_file(train_path)\n    X_test, y_test = load_svmlight_file(test_path)\n\n    X0, X1, y0, y1 = train_test_split(X_train, y_train, test_size=0.5)\n    X0_valid, X1_valid, y0_valid, y1_valid = train_test_split(\n        X_test, y_test, test_size=0.5\n    )\n\n    dump_svmlight_file(X0, y0, \"agaricus.txt-0.train\")\n    dump_svmlight_file(X0_valid, y0_valid, \"agaricus.txt-0.test\")\n\n    dump_svmlight_file(X1, y1, \"agaricus.txt-1.train\")\n    dump_svmlight_file(X1_valid, y1_valid, \"agaricus.txt-1.test\")\n\n    run_federated(world_size=n_workers, with_ssl=with_ssl, use_gpu=use_gpu)\n"
  },
  {
    "path": "python-package/xgboost/testing/interaction_constraints.py",
    "content": "\"\"\"Tests for interaction constraints.\"\"\"\n\nfrom typing import Optional, Sequence, Union\n\nimport numpy as np\n\nfrom .._typing import FeatureNames\nfrom ..core import DMatrix\nfrom ..training import train\nfrom .utils import Device\n\n\ndef run_interaction_constraints(  # pylint: disable=too-many-locals\n    tree_method: str,\n    device: Device,\n    feature_names: Optional[FeatureNames] = None,\n    interaction_constraints: Union[str, Sequence] = \"[[0, 1]]\",\n) -> None:\n    \"\"\"Tests interaction constraints on a synthetic dataset.\"\"\"\n    x1 = np.random.normal(loc=1.0, scale=1.0, size=1000)\n    x2 = np.random.normal(loc=1.0, scale=1.0, size=1000)\n    x3 = np.random.choice([1, 2, 3], size=1000, replace=True)\n    y = (\n        x1\n        + x2\n        + x3\n        + x1 * x2 * x3\n        + np.random.normal(loc=0.001, scale=1.0, size=1000)\n        + 3 * np.sin(x1)\n    )\n    X = np.column_stack((x1, x2, x3))\n    dtrain = DMatrix(X, label=y, feature_names=feature_names)\n\n    params = {\n        \"max_depth\": 3,\n        \"eta\": 0.1,\n        \"nthread\": 2,\n        \"interaction_constraints\": interaction_constraints,\n        \"tree_method\": tree_method,\n        \"device\": device,\n    }\n    num_boost_round = 12\n    # Fit a model that only allows interaction between x1 and x2\n    bst = train(params, dtrain, num_boost_round, evals=[(dtrain, \"train\")])\n\n    # Set all observations to have the same x3 values then increment by the same amount\n    def f(x: int) -> np.ndarray:\n        tmat = DMatrix(\n            np.column_stack((x1, x2, np.repeat(x, 1000))), feature_names=feature_names\n        )\n        return bst.predict(tmat)\n\n    preds = [f(x) for x in [1, 2, 3]]\n\n    # Check incrementing x3 has the same effect on all observations\n    #   since x3 is constrained to be independent of x1 and x2\n    #   and all observations start off from the same x3 value\n    diff1 = preds[1] - preds[0]\n    assert np.all(np.abs(diff1 - diff1[0]) < 1e-4)\n    diff2 = preds[2] - preds[1]\n    assert np.all(np.abs(diff2 - diff2[0]) < 1e-4)\n\n\ndef training_accuracy(tree_method: str, dpath: str, device: Device) -> None:\n    \"\"\"Test accuracy, reused by GPU tests.\"\"\"\n    from sklearn.metrics import accuracy_score\n\n    dtrain = DMatrix(dpath + \"agaricus.txt.train?indexing_mode=1&format=libsvm\")\n    dtest = DMatrix(dpath + \"agaricus.txt.test?indexing_mode=1&format=libsvm\")\n    params = {\n        \"eta\": 1,\n        \"max_depth\": 6,\n        \"objective\": \"binary:logistic\",\n        \"tree_method\": tree_method,\n        \"device\": device,\n        \"interaction_constraints\": \"[[1,2], [2,3,4]]\",\n    }\n    num_boost_round = 5\n\n    params[\"grow_policy\"] = \"lossguide\"\n    bst = train(params, dtrain, num_boost_round)\n    pred_dtest = bst.predict(dtest) < 0.5\n    assert accuracy_score(dtest.get_label(), pred_dtest) < 0.1\n\n    params[\"grow_policy\"] = \"depthwise\"\n    bst = train(params, dtrain, num_boost_round)\n    pred_dtest = bst.predict(dtest) < 0.5\n    assert accuracy_score(dtest.get_label(), pred_dtest) < 0.1\n"
  },
  {
    "path": "python-package/xgboost/testing/intercept.py",
    "content": "\"\"\"Tests for estimating the intercept.\"\"\"\n\nimport json\nfrom typing import Dict, List, Optional\n\nimport numpy as np\nfrom scipy.special import softmax\nfrom sklearn.datasets import (\n    make_classification,\n    make_multilabel_classification,\n    make_regression,\n)\n\nfrom ..core import Booster, DMatrix, QuantileDMatrix\nfrom ..sklearn import XGBClassifier, XGBRegressor\nfrom ..training import train\nfrom .updater import get_basescore\nfrom .utils import Device, non_increasing\n\n\n# pylint: disable=too-many-statements\ndef run_init_estimation(tree_method: str, device: Device) -> None:\n    \"\"\"Test for init estimation.\"\"\"\n\n    def run_reg(X: np.ndarray, y: np.ndarray) -> None:  # pylint: disable=invalid-name\n        reg = XGBRegressor(\n            tree_method=tree_method, max_depth=1, n_estimators=1, device=device\n        )\n        reg.fit(X, y, eval_set=[(X, y)])\n        base_score_0 = get_basescore(reg)\n        score_0 = reg.evals_result()[\"validation_0\"][\"rmse\"][0]\n\n        n_targets = 1 if y.ndim == 1 else y.shape[1]\n        intercept = np.full(shape=(n_targets,), fill_value=0.5, dtype=np.float32)\n        reg = XGBRegressor(\n            tree_method=tree_method,\n            device=device,\n            max_depth=1,\n            n_estimators=1,\n            base_score=intercept,\n        )\n        reg.fit(X, y, eval_set=[(X, y)])\n        base_score_1 = get_basescore(reg)\n        score_1 = reg.evals_result()[\"validation_0\"][\"rmse\"][0]\n        assert not np.isclose(base_score_0, base_score_1).any()\n        assert score_0 < score_1  # should be better\n\n    # pylint: disable=unbalanced-tuple-unpacking\n    X, y = make_regression(n_samples=4096, random_state=17)\n    run_reg(X, y)\n    # pylint: disable=unbalanced-tuple-unpacking\n    X, y = make_regression(n_samples=4096, n_targets=3, random_state=17)\n    run_reg(X, y)\n\n    # pylint: disable=invalid-name\n    def run_clf(\n        X: np.ndarray, y: np.ndarray, w: Optional[np.ndarray] = None\n    ) -> List[float]:\n        clf = XGBClassifier(\n            tree_method=tree_method, max_depth=1, n_estimators=1, device=device\n        )\n        if w is not None:\n            clf.fit(\n                X, y, sample_weight=w, eval_set=[(X, y)], sample_weight_eval_set=[w]\n            )\n        else:\n            clf.fit(X, y, eval_set=[(X, y)])\n        base_score_0 = get_basescore(clf)\n        if clf.n_classes_ == 2:\n            score_0 = clf.evals_result()[\"validation_0\"][\"logloss\"][0]\n        else:\n            score_0 = clf.evals_result()[\"validation_0\"][\"mlogloss\"][0]\n\n        n_targets = 1 if y.ndim == 1 else y.shape[1]\n        intercept = np.full(shape=(n_targets,), fill_value=0.5, dtype=np.float32)\n        clf = XGBClassifier(\n            tree_method=tree_method,\n            max_depth=1,\n            n_estimators=1,\n            device=device,\n            base_score=intercept,\n        )\n        if w is not None:\n            clf.fit(\n                X, y, sample_weight=w, eval_set=[(X, y)], sample_weight_eval_set=[w]\n            )\n        else:\n            clf.fit(X, y, eval_set=[(X, y)])\n        base_score_1 = get_basescore(clf)\n        if clf.n_classes_ == 2:\n            score_1 = clf.evals_result()[\"validation_0\"][\"logloss\"][0]\n        else:\n            score_1 = clf.evals_result()[\"validation_0\"][\"mlogloss\"][0]\n        assert not np.isclose(base_score_0, base_score_1).any()\n        assert score_0 < score_1 + 1e-4  # should be better\n\n        return base_score_0\n\n    # pylint: disable=unbalanced-tuple-unpacking\n    X, y = make_classification(n_samples=4096, random_state=17)\n    run_clf(X, y)\n    X, y = make_multilabel_classification(\n        n_samples=4096, n_labels=3, n_classes=5, random_state=17\n    )\n    run_clf(X, y)\n\n    # Extra tests for the classifier.\n    X, y = make_classification(\n        n_samples=4096, random_state=17, n_classes=5, n_informative=20, n_redundant=0\n    )\n    intercept = run_clf(X, y)\n    # un-transformed intercept sums to 0, as a convention.\n    np.testing.assert_allclose(np.sum(softmax(intercept)), 1.0)\n    np.testing.assert_allclose(np.sum(intercept), 0.0, atol=1e-6)\n\n    assert np.all(softmax(intercept) > 0)\n    np_int = (\n        np.histogram(\n            y, bins=np.concatenate([np.unique(y), np.array([np.finfo(np.float32).max])])\n        )[0]\n        / y.shape[0]\n    )\n    np.testing.assert_allclose(softmax(intercept), np_int, rtol=1e-6)\n\n    rng = np.random.default_rng(1994)\n    w = rng.uniform(low=0, high=1, size=(y.shape[0],))\n    intercept = run_clf(X, y, w)\n    np.testing.assert_allclose(np.sum(softmax(intercept)), 1.0)\n    assert np.all(softmax(intercept) > 0)\n\n\n# pylint: disable=too-many-locals\ndef run_adaptive(tree_method: str, weighted: bool, device: Device) -> None:\n    \"\"\"Test for adaptive trees.\"\"\"\n    rng = np.random.RandomState(1994)\n    from sklearn.utils import stats\n\n    n_samples = 256\n    X, y = make_regression(  # pylint: disable=unbalanced-tuple-unpacking\n        n_samples, 16, random_state=rng\n    )\n    if weighted:\n        w = rng.normal(size=n_samples)\n        w -= w.min()\n        Xy = DMatrix(X, y, weight=w)\n\n        kwargs = {\"percentile_rank\": 50}\n        base_score = stats._weighted_percentile(  # pylint: disable=protected-access\n            y, w, **kwargs\n        )\n    else:\n        Xy = DMatrix(X, y)\n        base_score = np.median(y)\n\n    # Check the base score is expected.\n    booster_0 = train(\n        {\n            \"tree_method\": tree_method,\n            \"base_score\": base_score,\n            \"objective\": \"reg:absoluteerror\",\n            \"device\": device,\n        },\n        Xy,\n        num_boost_round=1,\n    )\n    booster_1 = train(\n        {\n            \"tree_method\": tree_method,\n            \"objective\": \"reg:absoluteerror\",\n            \"device\": device,\n        },\n        Xy,\n        num_boost_round=1,\n    )\n    config_0 = json.loads(booster_0.save_config())\n    config_1 = json.loads(booster_1.save_config())\n\n    assert get_basescore(config_0) == get_basescore(config_1)\n\n    # check the base score is correctly serialized.\n    raw_booster = booster_1.save_raw(raw_format=\"ubj\")\n    booster_2 = Booster(model_file=raw_booster)\n    config_2 = json.loads(booster_2.save_config())\n    assert get_basescore(config_1) == get_basescore(config_2)\n\n    # check we can override the base score.\n    booster_0 = train(\n        {\n            \"tree_method\": tree_method,\n            \"base_score\": base_score + 1.0,\n            \"objective\": \"reg:absoluteerror\",\n            \"device\": device,\n        },\n        Xy,\n        num_boost_round=1,\n    )\n    config_0 = json.loads(booster_0.save_config())\n    np.testing.assert_allclose(\n        get_basescore(config_0), np.asarray(get_basescore(config_1)) + 1\n    )\n\n    # check we can use subsampling.\n    evals_result: Dict[str, Dict[str, list]] = {}\n    train(\n        {\n            \"tree_method\": tree_method,\n            \"device\": device,\n            \"objective\": \"reg:absoluteerror\",\n            \"subsample\": 0.8,\n            \"eta\": 1.0,\n        },\n        Xy,\n        num_boost_round=10,\n        evals=[(Xy, \"Train\")],\n        evals_result=evals_result,\n    )\n    mae = evals_result[\"Train\"][\"mae\"]\n    assert mae[-1] < 20.0\n    assert non_increasing(mae)\n\n\ndef run_exp_family(device: Device) -> None:\n    \"\"\"Exp family has a closed solution.\"\"\"\n    X, y = make_classification(n_samples=128, n_classes=2, weights=[0.8, 0.2])\n    Xy = QuantileDMatrix(X, y)\n    clf = train(\n        {\"objective\": \"binary:logistic\", \"device\": device}, Xy, num_boost_round=1\n    )\n    reg = train({\"objective\": \"reg:logistic\", \"device\": device}, Xy, num_boost_round=1)\n    clf1 = train(\n        {\"objective\": \"binary:logitraw\", \"device\": device}, Xy, num_boost_round=1\n    )\n    # The base score stored in the booster model is un-transformed\n    np.testing.assert_allclose([get_basescore(m) for m in (reg, clf, clf1)], y.mean())\n\n    X, y = make_classification(weights=[0.8, 0.2], random_state=2025)\n    clf = train(\n        {\"objective\": \"binary:logistic\", \"scale_pos_weight\": 4.0, \"device\": device},\n        QuantileDMatrix(X, y),\n        num_boost_round=1,\n    )\n    score = get_basescore(clf)\n    np.testing.assert_allclose(score, 0.5, rtol=1e-3)\n\n\ndef run_logistic_degenerate(device: Device) -> None:\n    \"\"\"Test https://github.com/dmlc/xgboost/issues/11499 .\"\"\"\n\n    def run(v: float) -> None:\n        dtrain = DMatrix(np.asarray([[1.0], [1.0]]), label=[v, v])\n        bst = train(\n            {\"objective\": \"binary:logistic\", \"device\": device},\n            dtrain,\n            1,\n        )\n        intercept = get_basescore(bst)\n        assert intercept[0] == v\n\n    run(0.0)\n    run(1.0)\n"
  },
  {
    "path": "python-package/xgboost/testing/metrics.py",
    "content": "\"\"\"Tests for evaluation metrics.\"\"\"\n\nfrom typing import Dict, List, Optional\n\nimport numpy as np\nimport pytest\n\nfrom ..compat import concat\nfrom ..core import DMatrix, QuantileDMatrix, _parse_eval_str\nfrom ..sklearn import XGBClassifier, XGBRanker\nfrom ..training import train\nfrom .utils import Device\n\n\ndef check_precision_score(  # pylint: disable=too-many-locals\n    tree_method: str, device: Device\n) -> None:\n    \"\"\"Test for precision with ranking and classification.\"\"\"\n    datasets = pytest.importorskip(\"sklearn.datasets\")\n\n    X, y = datasets.make_classification(\n        n_samples=1024, n_features=4, n_classes=2, random_state=2023\n    )\n    qid = np.zeros(shape=y.shape)  # same group\n\n    ltr = XGBRanker(n_estimators=2, tree_method=tree_method, device=device)\n    ltr.fit(X, y, qid=qid)\n\n    # re-generate so that XGBoost doesn't evaluate the result to 1.0\n    X, y = datasets.make_classification(\n        n_samples=512, n_features=4, n_classes=2, random_state=1994\n    )\n\n    ltr.set_params(eval_metric=\"pre@32\")\n    result = _parse_eval_str(ltr.get_booster().eval_set(evals=[(DMatrix(X, y), \"Xy\")]))\n    score_0 = result[1][1]\n\n    X_list = []\n    y_list = []\n    n_query_groups = 3\n    q_list: List[np.ndarray] = []\n    for i in range(n_query_groups):\n        # same for all groups\n        X, y = datasets.make_classification(\n            n_samples=512, n_features=4, n_classes=2, random_state=1994\n        )\n        X_list.append(X)\n        y_list.append(y)\n        q = np.full(shape=y.shape, fill_value=i, dtype=np.uint64)\n        q_list.append(q)\n\n    qid = concat(q_list)\n    X = concat(X_list)\n    y = concat(y_list)\n\n    result = _parse_eval_str(\n        ltr.get_booster().eval_set(evals=[(DMatrix(X, y, qid=qid), \"Xy\")])\n    )\n    assert result[1][0].endswith(\"pre@32\")\n    score_1 = result[1][1]\n    assert score_1 == score_0\n\n\ndef check_quantile_error(tree_method: str, device: Device) -> None:\n    \"\"\"Test for the `quantile` loss.\"\"\"\n    from sklearn.datasets import make_regression\n    from sklearn.metrics import mean_pinball_loss\n\n    rng = np.random.RandomState(19)\n    # pylint: disable=unbalanced-tuple-unpacking\n    X, y = make_regression(128, 3, random_state=rng)\n    Xy = QuantileDMatrix(X, y)\n    evals_result: Dict[str, Dict] = {}\n    booster = train(\n        {\n            \"tree_method\": tree_method,\n            \"eval_metric\": \"quantile\",\n            \"quantile_alpha\": 0.3,\n            \"device\": device,\n        },\n        Xy,\n        evals=[(Xy, \"Train\")],\n        evals_result=evals_result,\n    )\n    predt = booster.inplace_predict(X)\n    loss = mean_pinball_loss(y, predt, alpha=0.3)\n    np.testing.assert_allclose(evals_result[\"Train\"][\"quantile\"][-1], loss)\n\n    alpha = [0.25, 0.5, 0.75]\n    booster = train(\n        {\n            \"tree_method\": tree_method,\n            \"eval_metric\": \"quantile\",\n            \"quantile_alpha\": alpha,\n            \"objective\": \"reg:quantileerror\",\n            \"device\": device,\n        },\n        Xy,\n        evals=[(Xy, \"Train\")],\n        evals_result=evals_result,\n    )\n    predt = booster.inplace_predict(X)\n    loss = np.mean(\n        [mean_pinball_loss(y, predt[:, i], alpha=alpha[i]) for i in range(3)]\n    )\n    np.testing.assert_allclose(evals_result[\"Train\"][\"quantile\"][-1], loss)\n\n\ndef _expectile_loss(\n    y: np.ndarray, predt: np.ndarray, alpha: float, weight: Optional[np.ndarray]\n) -> float:\n    diff = predt - y\n    weight_scale = np.where(diff >= 0.0, 1.0 - alpha, alpha)\n    loss = weight_scale * diff * diff\n    if weight is None:\n        return float(np.mean(loss))\n    return float(np.sum(loss * weight) / np.sum(weight))\n\n\ndef _expectile_loss_multi(\n    y: np.ndarray, predt: np.ndarray, alpha: np.ndarray, weight: Optional[np.ndarray]\n) -> float:\n    diff = predt - y[:, None]\n    weight_scale = np.where(diff >= 0.0, 1.0 - alpha, alpha)\n    loss = weight_scale * diff * diff\n    if weight is None:\n        return float(np.mean(loss))\n    return float(np.sum(loss * weight[:, None]) / (np.sum(weight) * alpha.size))\n\n\ndef check_expectile_error(tree_method: str, device: Device) -> None:\n    \"\"\"Test for the `expectile` loss.\"\"\"\n    from sklearn.datasets import make_regression\n\n    rng = np.random.RandomState(23)\n    X, y = make_regression(128, 3, random_state=rng)\n    Xy = DMatrix(X, y)\n    evals_result: Dict[str, Dict] = {}\n    booster = train(\n        {\n            \"tree_method\": tree_method,\n            \"eval_metric\": \"expectile\",\n            \"expectile_alpha\": 0.3,\n            \"device\": device,\n        },\n        Xy,\n        evals=[(Xy, \"Train\")],\n        evals_result=evals_result,\n    )\n    predt = booster.inplace_predict(X)\n    loss = _expectile_loss(y, predt, 0.3, None)\n    np.testing.assert_allclose(evals_result[\"Train\"][\"expectile\"][-1], loss)\n\n    alpha = np.array([0.25, 0.5, 0.75])\n    booster = train(\n        {\n            \"tree_method\": tree_method,\n            \"eval_metric\": \"expectile\",\n            \"expectile_alpha\": alpha,\n            \"objective\": \"reg:expectileerror\",\n            \"device\": device,\n        },\n        Xy,\n        evals=[(Xy, \"Train\")],\n        evals_result=evals_result,\n    )\n    predt = booster.inplace_predict(X)\n    loss = _expectile_loss_multi(y, predt, alpha, None)\n    np.testing.assert_allclose(evals_result[\"Train\"][\"expectile\"][-1], loss)\n\n    weights = rng.uniform(0.1, 1.0, size=y.shape[0])\n    Xy_w = DMatrix(X, y, weight=weights)\n    evals_result_w: Dict[str, Dict] = {}\n    booster = train(\n        {\n            \"tree_method\": tree_method,\n            \"eval_metric\": \"expectile\",\n            \"expectile_alpha\": alpha,\n            \"objective\": \"reg:expectileerror\",\n            \"device\": device,\n        },\n        Xy_w,\n        evals=[(Xy_w, \"Train\")],\n        evals_result=evals_result_w,\n    )\n    predt = booster.inplace_predict(X)\n    loss = _expectile_loss_multi(y, predt, alpha, weights)\n    np.testing.assert_allclose(evals_result_w[\"Train\"][\"expectile\"][-1], loss)\n\n\ndef run_roc_auc_binary(tree_method: str, n_samples: int, device: Device) -> None:\n    \"\"\"TestROC AUC metric on a binary classification problem.\"\"\"\n    from sklearn.datasets import make_classification\n    from sklearn.metrics import roc_auc_score\n\n    rng = np.random.RandomState(1994)\n    n_features = 10\n\n    X, y = make_classification(\n        n_samples,\n        n_features,\n        n_informative=n_features,\n        n_redundant=0,\n        random_state=rng,\n    )\n    Xy = DMatrix(X, y)\n    booster = train(\n        {\n            \"tree_method\": tree_method,\n            \"device\": device,\n            \"eval_metric\": \"auc\",\n            \"objective\": \"binary:logistic\",\n        },\n        Xy,\n        num_boost_round=1,\n    )\n    score = booster.predict(Xy)\n    skl_auc = roc_auc_score(y, score)\n    auc = float(booster.eval(Xy).split(\":\")[1])\n    np.testing.assert_allclose(skl_auc, auc, rtol=1e-6)\n\n    X = rng.randn(*X.shape)\n    score = booster.predict(DMatrix(X))\n    skl_auc = roc_auc_score(y, score)\n    auc = float(booster.eval(DMatrix(X, y)).split(\":\")[1])\n    np.testing.assert_allclose(skl_auc, auc, rtol=1e-6)\n\n\ndef run_pr_auc_multi(tree_method: str, device: Device) -> None:\n    \"\"\"Test for PR AUC metric on a multi-class classification problem.\"\"\"\n    from sklearn.datasets import make_classification\n\n    X, y = make_classification(64, 16, n_informative=8, n_classes=3, random_state=1994)\n    clf = XGBClassifier(\n        tree_method=tree_method, n_estimators=1, eval_metric=\"aucpr\", device=device\n    )\n    clf.fit(X, y, eval_set=[(X, y)])\n    evals_result = clf.evals_result()[\"validation_0\"][\"aucpr\"][-1]\n    # No available implementation for comparison, just check that XGBoost converges\n    # to 1.0\n    clf = XGBClassifier(\n        tree_method=tree_method, n_estimators=10, eval_metric=\"aucpr\", device=device\n    )\n    clf.fit(X, y, eval_set=[(X, y)])\n    evals_result = clf.evals_result()[\"validation_0\"][\"aucpr\"][-1]\n    np.testing.assert_allclose(1.0, evals_result, rtol=1e-2)\n\n\ndef run_roc_auc_multi(  # pylint: disable=too-many-locals\n    tree_method: str, n_samples: int, weighted: bool, device: Device\n) -> None:\n    \"\"\"Test for ROC AUC metric on a multi-class classification problem.\"\"\"\n    from sklearn.datasets import make_classification\n    from sklearn.metrics import roc_auc_score\n\n    rng = np.random.RandomState(1994)\n    n_features = 10\n    n_classes = 4\n\n    X, y = make_classification(\n        n_samples,\n        n_features,\n        n_informative=n_features,\n        n_redundant=0,\n        n_classes=n_classes,\n        random_state=rng,\n    )\n    if weighted:\n        weights = rng.randn(n_samples)\n        weights -= weights.min()\n        weights /= weights.max()\n    else:\n        weights = None\n\n    Xy = DMatrix(X, y, weight=weights)\n    booster = train(\n        {\n            \"tree_method\": tree_method,\n            \"eval_metric\": \"auc\",\n            \"objective\": \"multi:softprob\",\n            \"num_class\": n_classes,\n            \"device\": device,\n        },\n        Xy,\n        num_boost_round=1,\n    )\n    score = booster.predict(Xy)\n    skl_auc = roc_auc_score(\n        y, score, average=\"weighted\", sample_weight=weights, multi_class=\"ovr\"\n    )\n    auc = float(booster.eval(Xy).split(\":\")[1])\n    np.testing.assert_allclose(skl_auc, auc, rtol=1e-6)\n\n    X = rng.randn(*X.shape)\n\n    score = booster.predict(DMatrix(X, weight=weights))\n    skl_auc = roc_auc_score(\n        y, score, average=\"weighted\", sample_weight=weights, multi_class=\"ovr\"\n    )\n    auc = float(booster.eval(DMatrix(X, y, weight=weights)).split(\":\")[1])\n    np.testing.assert_allclose(skl_auc, auc, rtol=1e-5)\n\n\ndef run_pr_auc_ltr(tree_method: str, device: Device) -> None:\n    \"\"\"Test for PR AUC metric on a ranking problem.\"\"\"\n    from sklearn.datasets import make_classification\n\n    X, y = make_classification(128, 4, n_classes=2, random_state=1994)\n    ltr = XGBRanker(\n        tree_method=tree_method,\n        n_estimators=16,\n        objective=\"rank:pairwise\",\n        eval_metric=\"aucpr\",\n        device=device,\n    )\n    groups = np.array([32, 32, 64])\n    ltr.fit(\n        X,\n        y,\n        group=groups,\n        eval_set=[(X, y)],\n        eval_group=[groups],\n    )\n    results = ltr.evals_result()[\"validation_0\"][\"aucpr\"]\n    assert results[-1] >= 0.99\n\n\ndef run_pr_auc_binary(tree_method: str, device: Device) -> None:\n    \"\"\"Test for PR AUC metric on a binary classification problem.\"\"\"\n    from sklearn.datasets import make_classification\n    from sklearn.metrics import auc, precision_recall_curve\n\n    X, y = make_classification(128, 4, n_classes=2, random_state=1994)\n    clf = XGBClassifier(\n        tree_method=tree_method, n_estimators=1, eval_metric=\"aucpr\", device=device\n    )\n    clf.fit(X, y, eval_set=[(X, y)])\n    evals_result = clf.evals_result()[\"validation_0\"][\"aucpr\"][-1]\n\n    y_score = clf.predict_proba(X)[:, 1]  # get the positive column\n    precision, recall, _ = precision_recall_curve(y, y_score)\n    prauc = auc(recall, precision)\n    # Interpolation results are slightly different from sklearn, but overall should\n    # be similar.\n    np.testing.assert_allclose(prauc, evals_result, rtol=1e-2)\n\n    clf = XGBClassifier(\n        tree_method=tree_method, n_estimators=10, eval_metric=\"aucpr\", device=device\n    )\n    clf.fit(X, y, eval_set=[(X, y)])\n    evals_result = clf.evals_result()[\"validation_0\"][\"aucpr\"][-1]\n    np.testing.assert_allclose(0.99, evals_result, rtol=1e-2)\n"
  },
  {
    "path": "python-package/xgboost/testing/monotone_constraints.py",
    "content": "\"\"\"Helpers for testing monotone constraints.\"\"\"\n\nfrom typing import Optional\n\nimport numpy as np\n\nfrom .._typing import FeatureNames\nfrom ..core import Booster, DMatrix\n\n\ndef is_increasing(v: np.ndarray) -> bool:\n    \"\"\"Whether is v increasing.\"\"\"\n    return np.count_nonzero(np.diff(v) < 0.0) == 0\n\n\ndef is_decreasing(v: np.ndarray) -> bool:\n    \"\"\"Whether is v decreasing.\"\"\"\n    return np.count_nonzero(np.diff(v) > 0.0) == 0\n\n\ndef is_correctly_constrained(\n    learner: Booster, feature_names: Optional[FeatureNames] = None\n) -> bool:\n    \"\"\"Whether the monotone constraint is correctly applied.\"\"\"\n    n = 100\n    variable_x = np.linspace(0, 1, n).reshape((n, 1))\n    fixed_xs_values = np.linspace(0, 1, n)\n\n    for i in range(n):\n        fixed_x = fixed_xs_values[i] * np.ones((n, 1))\n        monotonically_increasing_x = np.column_stack((variable_x, fixed_x))\n        monotonically_increasing_dset = DMatrix(\n            monotonically_increasing_x, feature_names=feature_names\n        )\n        monotonically_increasing_y = learner.predict(monotonically_increasing_dset)\n\n        monotonically_decreasing_x = np.column_stack((fixed_x, variable_x))\n        monotonically_decreasing_dset = DMatrix(\n            monotonically_decreasing_x, feature_names=feature_names\n        )\n        monotonically_decreasing_y = learner.predict(monotonically_decreasing_dset)\n\n        if not (\n            is_increasing(monotonically_increasing_y)\n            and is_decreasing(monotonically_decreasing_y)\n        ):\n            return False\n\n    return True\n\n\nNUMBER_OF_DPOINTS = 1000\nx1_positively_correlated_with_y = np.random.random(size=NUMBER_OF_DPOINTS)\nx2_negatively_correlated_with_y = np.random.random(size=NUMBER_OF_DPOINTS)\n\nx = np.column_stack((x1_positively_correlated_with_y, x2_negatively_correlated_with_y))\nzs = np.random.normal(loc=0.0, scale=0.01, size=NUMBER_OF_DPOINTS)\ny = (\n    5 * x1_positively_correlated_with_y\n    + np.sin(10 * np.pi * x1_positively_correlated_with_y)\n    - 5 * x2_negatively_correlated_with_y\n    - np.cos(10 * np.pi * x2_negatively_correlated_with_y)\n    + zs\n)\ntraining_dset = DMatrix(x, label=y)\n"
  },
  {
    "path": "python-package/xgboost/testing/multi_target.py",
    "content": "\"\"\"Tests for multi-target training.\"\"\"\n\n# pylint: disable=unbalanced-tuple-unpacking\nfrom types import ModuleType\nfrom typing import Any, Callable, Dict, List, Optional, Tuple\n\nimport numpy as np\nimport pytest\nfrom sklearn.datasets import (\n    make_classification,\n    make_multilabel_classification,\n    make_regression,\n)\nfrom sklearn.metrics.pairwise import cosine_similarity\n\nimport xgboost.testing as tm\n\nfrom .._typing import ArrayLike\nfrom ..compat import import_cupy\nfrom ..core import Booster, DMatrix, ExtMemQuantileDMatrix, QuantileDMatrix, build_info\nfrom ..objective import Objective, TreeObjective\nfrom ..sklearn import XGBClassifier\nfrom ..training import train\nfrom .data import IteratorForTest\nfrom .updater import ResetStrategy, train_result\nfrom .utils import Device, assert_allclose, non_increasing\n\n\ndef run_multiclass(device: Device, learning_rate: Optional[float]) -> None:\n    \"\"\"Use vector leaf for multi-class models.\"\"\"\n    X, y = make_classification(\n        128, n_features=12, n_informative=10, n_classes=4, random_state=2025\n    )\n    clf = XGBClassifier(\n        debug_synchronize=True,\n        multi_strategy=\"multi_output_tree\",\n        callbacks=[ResetStrategy()],\n        n_estimators=10,\n        device=device,\n        learning_rate=learning_rate,\n    )\n    clf.fit(X, y, eval_set=[(X, y)])\n    assert clf.objective == \"multi:softprob\"\n    assert non_increasing(clf.evals_result()[\"validation_0\"][\"mlogloss\"])\n    if learning_rate is not None and abs(learning_rate - 1.0) < 1e-5:\n        assert clf.evals_result()[\"validation_0\"][\"mlogloss\"][-1] < 0.045\n\n    proba = clf.predict_proba(X)\n    assert proba.shape == (y.shape[0], 4)\n\n\ndef run_multilabel(device: Device, learning_rate: Optional[float]) -> None:\n    \"\"\"Use vector leaf for multi-label classification models.\"\"\"\n    X, y = make_multilabel_classification(128, random_state=2025)\n    clf = XGBClassifier(\n        debug_synchronize=True,\n        multi_strategy=\"multi_output_tree\",\n        callbacks=[ResetStrategy()],\n        n_estimators=10,\n        device=device,\n        learning_rate=learning_rate,\n    )\n    clf.fit(X, y, eval_set=[(X, y)])\n    assert clf.objective == \"binary:logistic\"\n    assert non_increasing(clf.evals_result()[\"validation_0\"][\"logloss\"])\n    if learning_rate is not None and abs(learning_rate - 1.0) < 1e-5:\n        assert clf.evals_result()[\"validation_0\"][\"logloss\"][-1] < 0.065\n\n    proba = clf.predict_proba(X)\n    assert proba.shape == y.shape\n\n\ndef run_quantile_loss(device: Device, weighted: bool) -> None:\n    \"\"\"Check quantile regression for vector leaf.\"\"\"\n    params = {\n        \"objective\": \"reg:quantileerror\",\n        \"device\": device,\n        \"quantile_alpha\": [0.45, 0.5, 0.55],\n        \"multi_strategy\": \"multi_output_tree\",\n    }\n    n_samples = 2048\n    X, y = make_regression(n_samples=n_samples, n_features=16, random_state=2026)\n\n    def no_crossing_first_tree(weight: Optional[np.ndarray]) -> None:\n        \"\"\"The first tree should not generate quantile crossing given sufficient amount\n        of samples for quantile interpolation.\n\n        \"\"\"\n        Xy = QuantileDMatrix(X, y, weight=weight)\n        booster = train(params, Xy, evals=[(Xy, \"Train\")], num_boost_round=1)\n        y_predt = booster.predict(Xy)\n        assert y_predt.shape == (n_samples, 3)\n        assert (y_predt[:, 0] <= y_predt[:, 1]).all()\n        assert (y_predt[:, 1] <= y_predt[:, 2]).all()\n\n    if not weighted:\n        weight = None\n    else:\n        # Test with weights.\n        rng = np.random.default_rng(2026)\n        weight = rng.uniform(0.0, 1.0, size=n_samples)\n\n    no_crossing_first_tree(weight)\n\n    Xy = QuantileDMatrix(X, y, weight=weight)\n    evals_result = train_result(params, Xy, num_rounds=10)\n    assert non_increasing(evals_result[\"train\"][\"quantile\"])\n\n\ndef run_absolute_error(device: Device) -> None:\n    \"\"\"Test mean absolute error with vector leaf.\"\"\"\n    params = {\n        \"objective\": \"reg:absoluteerror\",\n        \"device\": device,\n        \"multi_strategy\": \"multi_output_tree\",\n    }\n    n_samples = 1024\n    X, y = make_regression(\n        n_samples=n_samples, n_features=16, n_targets=3, random_state=2026\n    )\n    Xy = QuantileDMatrix(X, y)\n    evals_result: Dict[str, Dict] = {}\n    booster = train(\n        params,\n        Xy,\n        evals=[(Xy, \"Train\")],\n        verbose_eval=False,\n        evals_result=evals_result,\n        num_boost_round=16,\n    )\n    predt = booster.predict(Xy)\n    # make sure different targets are used\n    assert np.abs((predt[:, 2] - predt[:, 1]).sum()) > 1000\n    assert np.abs((predt[:, 1] - predt[:, 0]).sum()) > 1000\n    assert non_increasing(evals_result[\"Train\"][\"mae\"])\n    assert evals_result[\"Train\"][\"mae\"][-1] < 30.0\n\n\ndef _array_impl(device: Device) -> ModuleType:\n    if device == \"cuda\":\n        nda = import_cupy()\n    else:\n        nda = np\n    return nda\n\n\nclass LsObj0(TreeObjective):\n    \"\"\"Split grad is the same as value grad.\"\"\"\n\n    def __init__(self, device: Device) -> None:\n        self.device = device\n\n    def __call__(\n        self, iteration: int, y_pred: ArrayLike, dtrain: DMatrix\n    ) -> Tuple[ArrayLike, ArrayLike]:\n        nda = _array_impl(self.device)\n\n        y_true = dtrain.get_label()\n        grad, hess = tm.ls_obj(y_true, y_pred, None)\n        return nda.array(grad), nda.array(hess)\n\n    def split_grad(\n        self, iteration: int, grad: ArrayLike, hess: ArrayLike\n    ) -> Tuple[ArrayLike, ArrayLike]:\n        nda = _array_impl(self.device)\n        return nda.array(grad), nda.array(hess)\n\n\nclass LsObj1(Objective):\n    \"\"\"No split grad.\"\"\"\n\n    def __init__(self, device: Device) -> None:\n        self.device = device\n\n    def __call__(\n        self, iteration: int, y_pred: ArrayLike, dtrain: DMatrix\n    ) -> Tuple[ArrayLike, ArrayLike]:\n        nda = _array_impl(self.device)\n\n        y_true = dtrain.get_label()\n        grad, hess = tm.ls_obj(y_true, y_pred, None)\n        return nda.array(grad), nda.array(hess)\n\n\n# Use mean gradient, should still converge.\nclass LsObj2(LsObj0):\n    \"\"\"Use mean as split grad.\"\"\"\n\n    def __init__(self, device: Device, check_used: bool):\n        self._chk = check_used\n        super().__init__(device=device)\n\n    def split_grad(\n        self, iteration: int, grad: ArrayLike, hess: ArrayLike\n    ) -> Tuple[np.ndarray, np.ndarray]:\n        nda = _array_impl(self.device)\n\n        if self._chk:\n            assert False\n        sgrad = nda.mean(grad, axis=1)\n        shess = nda.mean(hess, axis=1)\n        return sgrad, shess\n\n\ndef run_reduced_grad(device: Device) -> None:\n    \"\"\"Basic test for using reduced gradient for tree splits.\"\"\"\n    X, y = make_regression(\n        n_samples=1024, n_features=16, random_state=1994, n_targets=5\n    )\n    Xy = QuantileDMatrix(X, y)\n\n    def run_test(\n        obj: Optional[Objective], base_score: Optional[list[float]] = None\n    ) -> Booster:\n        evals_result: Dict[str, Dict] = {}\n        booster = train(\n            {\n                \"debug_synchronize\": True,\n                \"device\": device,\n                \"multi_strategy\": \"multi_output_tree\",\n                \"learning_rate\": 1,\n                \"base_score\": base_score,\n            },\n            Xy,\n            evals=[(Xy, \"Train\")],\n            obj=obj,\n            num_boost_round=8,\n            evals_result=evals_result,\n        )\n        assert non_increasing(evals_result[\"Train\"][\"rmse\"])\n        return booster\n\n    booster_0 = run_test(LsObj0(device))\n    booster_1 = run_test(LsObj1(device))\n    np.testing.assert_allclose(\n        booster_0.inplace_predict(X), booster_1.inplace_predict(X)\n    )\n\n    booster_2 = run_test(LsObj0(device), [0.5] * y.shape[1])\n    booster_3 = run_test(None, [0.5] * y.shape[1])\n    np.testing.assert_allclose(\n        booster_2.inplace_predict(X), booster_3.inplace_predict(X)\n    )\n\n    run_test(LsObj2(device, False))\n    with pytest.raises(AssertionError):\n        run_test(LsObj2(device, True))\n\n\ndef run_with_iter(device: Device) -> None:  # pylint: disable=too-many-locals\n    \"\"\"Test vector leaf with external memory.\"\"\"\n    nda = _array_impl(device)\n\n    n_batches = 4\n    n_rounds = 8\n    n_targets = 3\n    intercept = [0.5] * n_targets\n\n    params = {\n        \"device\": device,\n        \"multi_strategy\": \"multi_output_tree\",\n        \"learning_rate\": 1.0,\n        \"base_score\": intercept,\n        \"debug_synchronize\": True,\n    }\n\n    Xs = []\n    ys = []\n    for i in range(n_batches):\n        X_i, y_i = make_regression(\n            n_samples=4096, n_features=8, random_state=(i + 1), n_targets=n_targets\n        )\n        Xs.append(nda.asarray(X_i))\n        ys.append(nda.asarray(y_i))\n    it = IteratorForTest(Xs, ys, None, cache=\"cache\", on_host=True)\n    Xy: DMatrix = ExtMemQuantileDMatrix(\n        it, cache_host_ratio=1.0 if device == \"cuda\" else None\n    )\n\n    evals_result_0: Dict[str, Dict] = {}\n    booster_0 = train(\n        params,\n        Xy,\n        num_boost_round=n_rounds,\n        evals=[(Xy, \"Train\")],\n        evals_result=evals_result_0,\n    )\n\n    it = IteratorForTest(Xs, ys, None, cache=None)\n    Xy = QuantileDMatrix(it)\n    evals_result_1: Dict[str, Dict] = {}\n    booster_1 = train(\n        params,\n        Xy,\n        num_boost_round=n_rounds,\n        evals=[(Xy, \"Train\")],\n        evals_result=evals_result_1,\n    )\n    np.testing.assert_allclose(\n        evals_result_0[\"Train\"][\"rmse\"], evals_result_1[\"Train\"][\"rmse\"]\n    )\n    assert non_increasing(evals_result_0[\"Train\"][\"rmse\"])\n    X, _, _ = it.as_arrays()\n    assert_allclose(device, booster_0.inplace_predict(X), booster_1.inplace_predict(X))\n\n    binfo = build_info()\n    tv = \"THRUST_VERSION\"\n    if device == \"cuda\" and tv in binfo and binfo[tv][0] < 3:\n        pytest.xfail(\"CCCL version too old.\")\n\n    it = IteratorForTest(\n        Xs,\n        ys,\n        None,\n        cache=\"cache\",\n        on_host=True,\n        min_cache_page_bytes=X.shape[0] // n_batches * X.shape[1],\n    )\n    Xy = ExtMemQuantileDMatrix(it, cache_host_ratio=1.0 if device == \"cuda\" else None)\n\n    evals_result_2: Dict[str, Dict] = {}\n    booster_2 = train(\n        params,\n        Xy,\n        evals=[(Xy, \"Train\")],\n        obj=LsObj0(device),\n        num_boost_round=n_rounds,\n        evals_result=evals_result_2,\n    )\n    np.testing.assert_allclose(\n        evals_result_0[\"Train\"][\"rmse\"], evals_result_2[\"Train\"][\"rmse\"]\n    )\n    assert_allclose(device, booster_0.inplace_predict(X), booster_2.inplace_predict(X))\n\n\ndef run_eta(device: Device) -> None:\n    \"\"\"Test for learning rate.\"\"\"\n    X, y = make_regression(512, 16, random_state=2025, n_targets=3)\n\n    def run(obj: Optional[Objective]) -> None:\n        params = {\n            \"device\": device,\n            \"multi_strategy\": \"multi_output_tree\",\n            \"learning_rate\": 1.0,\n            \"debug_synchronize\": True,\n            \"base_score\": 0.0,\n        }\n        Xy = QuantileDMatrix(X, y)\n        booster_0 = train(params, Xy, num_boost_round=1, obj=obj)\n        params[\"learning_rate\"] = 0.1\n        booster_1 = train(params, Xy, num_boost_round=1, obj=obj)\n        params[\"learning_rate\"] = 2.0\n        booster_2 = train(params, Xy, num_boost_round=1, obj=obj)\n\n        predt_0 = booster_0.predict(Xy)\n        predt_1 = booster_1.predict(Xy)\n        predt_2 = booster_2.predict(Xy)\n\n        np.testing.assert_allclose(predt_0, predt_1 * 10, rtol=1e-6)\n        np.testing.assert_allclose(predt_0 * 2, predt_2, rtol=1e-6)\n\n    run(None)\n    run(LsObj0(device))\n\n\ndef run_deterministic(device: Device) -> None:\n    \"\"\"Check the vector leaf implementation is deterministic.\"\"\"\n    X, y = make_regression(\n        n_samples=int(2**16), n_features=64, random_state=1994, n_targets=5\n    )\n\n    def run() -> Booster:\n        Xy = QuantileDMatrix(X, y)\n        params = {\n            \"device\": device,\n            \"multi_strategy\": \"multi_output_tree\",\n            \"debug_synchronize\": True,\n        }\n        return train(params, Xy, num_boost_round=16)\n\n    booster_0 = run()\n    booster_1 = run()\n    raw_0 = booster_0.save_raw()\n    raw_1 = booster_1.save_raw()\n    assert raw_0 == raw_1\n\n\ndef run_column_sampling(device: Device) -> None:\n    \"\"\"Test column sampling with feature importance for multi-target trees.\"\"\"\n    n_features = 32\n    X, y = make_regression(\n        n_samples=1024, n_features=n_features, random_state=1994, n_targets=3\n    )\n    # First half of features have weight, second half has 0 weight (not sampled).\n    feature_weights = np.zeros(shape=(n_features, 1), dtype=np.float32)\n    feature_weights[: n_features // 2] = 1.0 / (n_features / 2)\n    Xy = QuantileDMatrix(X, y, feature_weights=feature_weights)\n\n    params = {\n        \"device\": device,\n        \"multi_strategy\": \"multi_output_tree\",\n        \"debug_synchronize\": True,\n        \"colsample_bynode\": 0.4,\n    }\n    booster = train(params, Xy, num_boost_round=16)\n\n    # Test all importance types\n    for importance_type in [\"weight\", \"gain\", \"total_gain\", \"cover\", \"total_cover\"]:\n        scores: dict = booster.get_score(importance_type=importance_type)\n        assert len(scores) > 0, f\"No scores for {importance_type}\"\n\n        # Sampled features (first half) should be in scores\n        for f in range(0, n_features // 2):\n            assert f\"f{f}\" in scores, f\"f{f} not in {importance_type} scores\"\n\n        # Non-sampled features (second half) should NOT be in scores\n        for f in range(n_features // 2, n_features):\n            assert f\"f{f}\" not in scores\n\n        for score in scores.values():\n            assert isinstance(score, float)\n            assert score >= 0\n\n    # sklearn Coef\n    X, y = make_multilabel_classification(random_state=1994)\n    clf = XGBClassifier(\n        multi_strategy=\"multi_output_tree\",\n        importance_type=\"weight\",\n        device=device,\n        colsample_bynode=0.2,\n    )\n    clf.fit(X, y, feature_weights=np.arange(0, X.shape[1]))\n    fi = clf.feature_importances_\n    assert fi[0] == 0.0\n    assert fi[-1] > fi[1] * 5\n\n    w = np.polynomial.Polynomial.fit(np.arange(0, X.shape[1]), fi, deg=1)\n    assert w.coef[1] > 0.03\n\n\ndef run_grow_policy(device: Device, grow_policy: str) -> None:\n    \"\"\"Test grow policy (depthwise and lossguide) for vector leaf.\"\"\"\n    X, y = make_regression(\n        n_samples=1024, n_features=16, random_state=1994, n_targets=3\n    )\n    Xy = QuantileDMatrix(X, y)\n\n    params = {\n        \"device\": device,\n        \"multi_strategy\": \"multi_output_tree\",\n        \"debug_synchronize\": True,\n        \"grow_policy\": grow_policy,\n    }\n\n    evals_result = train_result(params, Xy, num_rounds=10)\n    assert non_increasing(evals_result[\"train\"][\"rmse\"])\n\n\ndef run_mixed_strategy(device: Device) -> None:\n    \"\"\"Test mixed multi_strategy with ResetStrategy callback.\"\"\"\n    X, y = make_classification(\n        n_samples=1024, n_informative=8, n_classes=3, random_state=1994\n    )\n    Xy = DMatrix(data=X, label=y)\n\n    booster = train(\n        {\n            \"num_parallel_tree\": 4,\n            \"num_class\": 3,\n            \"objective\": \"multi:softprob\",\n            \"multi_strategy\": \"multi_output_tree\",\n            \"device\": device,\n            \"debug_synchronize\": True,\n            \"base_score\": 0,\n        },\n        num_boost_round=16,\n        dtrain=Xy,\n        callbacks=[ResetStrategy()],\n    )\n\n    # Test model slicing - each boosting round should be iterable\n    assert len(list(booster)) == 16\n\n    # Test that sliced predictions sum to full prediction\n    predt = booster.predict(Xy, output_margin=True)\n    predt_sum = np.zeros(predt.shape)\n    for t in booster:\n        predt_sum += t.predict(Xy, output_margin=True)\n    np.testing.assert_allclose(predt, predt_sum, atol=1e-5)\n\n    # Test feature importance works with mixed trees\n    for importance_type in [\"weight\", \"gain\", \"total_gain\", \"cover\", \"total_cover\"]:\n        scores = booster.get_score(importance_type=importance_type)\n        assert len(scores) > 0\n        for score in scores.values():\n            assert isinstance(score, float)\n            assert score >= 0\n\n\ndef run_feature_importance_strategy_compare(device: Device) -> None:\n    \"\"\"Different strategies produce similar feature importance ratios.\"\"\"\n    n_features = 16\n    X, y = make_classification(\n        n_samples=2048,\n        n_features=n_features,\n        n_informative=10,\n        n_classes=4,\n        random_state=1994,\n    )\n    Xy = DMatrix(data=X, label=y)\n\n    base_params: Dict[str, Any] = {\n        \"num_class\": 4,\n        \"objective\": \"multi:softprob\",\n        \"device\": device,\n        \"debug_synchronize\": True,\n        \"max_depth\": 5,\n    }\n\n    # Train models with different strategies\n    boosters = [\n        train(\n            {**base_params, \"multi_strategy\": \"multi_output_tree\"},\n            Xy,\n            num_boost_round=32,\n        ),\n        train(\n            {**base_params, \"multi_strategy\": \"one_output_per_tree\"},\n            Xy,\n            num_boost_round=32,\n        ),\n        train(\n            {**base_params, \"multi_strategy\": \"multi_output_tree\"},\n            Xy,\n            num_boost_round=32,\n            callbacks=[ResetStrategy()],\n        ),\n    ]\n\n    def get_normalized_importance(booster: Booster, importance_type: str) -> np.ndarray:\n        \"\"\"Get feature importance as normalized array (sums to 1).\"\"\"\n        scores = booster.get_score(importance_type=importance_type)\n        arr = np.array([scores.get(f\"f{i}\", 0.0) for i in range(n_features)])\n        return arr / arr.sum() if arr.sum() > 0 else arr\n\n    for importance_type in [\"weight\", \"gain\", \"total_gain\", \"cover\", \"total_cover\"]:\n        imps = [get_normalized_importance(b, importance_type) for b in boosters]\n\n        # Check that importances are not exactly the same (different strategies)\n        assert not np.allclose(imps[0], imps[1])\n        assert not np.allclose(imps[0], imps[2])\n\n        # Check that normalized importances are similar (correlated)\n        # All strategies should have reasonably similar importance patterns\n        assert cosine_similarity([imps[0]], [imps[1]])[0, 0] > 0.9\n        assert cosine_similarity([imps[0]], [imps[2]])[0, 0] > 0.9\n        assert cosine_similarity([imps[1]], [imps[2]])[0, 0] > 0.9\n\n\n# pylint: disable=too-many-arguments, too-many-locals\ndef _run_regression_objective_test(\n    device: Device,\n    objective: str,\n    metric: str,\n    X: np.ndarray,\n    y: np.ndarray,\n    *,\n    extra_params: Optional[Dict[str, Any]] = None,\n    check_pred_positive: bool = False,\n    check_pred_probability: bool = False,\n    check_pred_binary: bool = False,\n    strictly_non_increasing: bool = True,\n) -> None:\n    params: Dict[str, Any] = {\n        \"objective\": objective,\n        \"device\": device,\n        \"multi_strategy\": \"multi_output_tree\",\n    }\n    if extra_params:\n        params.update(extra_params)\n\n    n_samples = X.shape[0]\n    n_targets = y.shape[1] if y.ndim > 1 else 1\n\n    Xy = DMatrix(X, y)\n    evals_result: Dict[str, Dict] = {}\n    booster = train(\n        params,\n        Xy,\n        evals=[(Xy, \"Train\")],\n        verbose_eval=False,\n        evals_result=evals_result,\n        num_boost_round=16,\n    )\n    predt = booster.predict(Xy)\n    assert predt.shape == (n_samples, n_targets)\n\n    if check_pred_positive:\n        assert (predt > 0).all()\n    if check_pred_probability:\n        assert (predt > 0).all() and (predt < 1).all()\n    if check_pred_binary:\n        assert set(np.unique(predt)).issubset({0.0, 1.0})\n\n    metric_vals = evals_result[\"Train\"][metric]\n    if strictly_non_increasing:\n        assert non_increasing(metric_vals)\n    else:\n        assert metric_vals[-1] < metric_vals[0]\n\n\ndef run_reg_squarederror(device: Device) -> None:\n    \"\"\"Test squared error regression with vector leaf.\"\"\"\n    n_samples, n_targets = 1024, 3\n    X, y = make_regression(\n        n_samples=n_samples, n_features=16, n_targets=n_targets, random_state=2026\n    )\n    _run_regression_objective_test(device, \"reg:squarederror\", \"rmse\", X, y)\n\n\ndef run_reg_logistic(device: Device) -> None:\n    \"\"\"Test logistic regression for probability with vector leaf.\"\"\"\n    n_samples, n_targets = 1024, 3\n    rng = np.random.default_rng(2026)\n    X = rng.standard_normal((n_samples, 16))\n    y = rng.uniform(0.0, 1.0, (n_samples, n_targets))  # Labels in [0, 1]\n    _run_regression_objective_test(\n        device, \"reg:logistic\", \"rmse\", X, y, check_pred_probability=True\n    )\n\n\ndef run_reg_gamma(device: Device) -> None:\n    \"\"\"Test gamma regression with vector leaf.\"\"\"\n    n_samples, n_targets = 1024, 3\n    rng = np.random.default_rng(2026)\n    X = rng.standard_normal((n_samples, 16))\n    y = rng.gamma(2.0, 2.0, (n_samples, n_targets))  # Labels must be positive\n    _run_regression_objective_test(\n        device, \"reg:gamma\", \"gamma-deviance\", X, y, check_pred_positive=True\n    )\n\n\ndef run_reg_squaredlogerror(device: Device) -> None:\n    \"\"\"Test squared log error regression with vector leaf.\"\"\"\n    n_samples, n_targets = 1024, 3\n    rng = np.random.default_rng(2026)\n    X = rng.standard_normal((n_samples, 16))\n    y = np.abs(rng.standard_normal((n_samples, n_targets))) + 0.1  # Labels > -1\n    _run_regression_objective_test(device, \"reg:squaredlogerror\", \"rmsle\", X, y)\n\n\ndef run_reg_pseudohubererror(device: Device) -> None:\n    \"\"\"Test pseudo huber error regression with vector leaf.\"\"\"\n    n_samples, n_targets = 1024, 3\n    X, y = make_regression(\n        n_samples=n_samples, n_features=16, n_targets=n_targets, random_state=2026\n    )\n    _run_regression_objective_test(\n        device, \"reg:pseudohubererror\", \"mphe\", X, y, extra_params={\"huber_slope\": 1.0}\n    )\n\n\ndef run_binary_logitraw(device: Device) -> None:\n    \"\"\"Test binary logitraw with vector leaf (multi-label classification).\"\"\"\n    n_samples = 1024\n    X, y = make_multilabel_classification(n_samples, random_state=2026)\n    _run_regression_objective_test(\n        device, \"binary:logitraw\", \"logloss\", X, y, strictly_non_increasing=False\n    )\n\n\ndef run_binary_hinge(device: Device) -> None:\n    \"\"\"Test binary hinge loss with vector leaf (multi-label classification).\"\"\"\n    n_samples = 1024\n    X, y = make_multilabel_classification(n_samples, random_state=2026)\n    _run_regression_objective_test(\n        device,\n        \"binary:hinge\",\n        \"error\",\n        X,\n        y,\n        check_pred_binary=True,\n        strictly_non_increasing=False,\n    )\n\n\ndef run_count_poisson(device: Device) -> None:\n    \"\"\"Test Poisson regression with vector leaf.\"\"\"\n    n_samples, n_targets = 1024, 3\n    rng = np.random.default_rng(2026)\n    X = rng.standard_normal((n_samples, 16))\n    y = rng.poisson(5, (n_samples, n_targets)).astype(np.float32)  # Labels >= 0\n    _run_regression_objective_test(\n        device, \"count:poisson\", \"poisson-nloglik\", X, y, check_pred_positive=True\n    )\n\n\ndef run_reg_tweedie(device: Device) -> None:\n    \"\"\"Test Tweedie regression with vector leaf.\"\"\"\n    n_samples, n_targets = 1024, 3\n    rng = np.random.default_rng(2026)\n    X = rng.standard_normal((n_samples, 16))\n    y = rng.gamma(2.0, 2.0, (n_samples, n_targets))  # Labels >= 0\n    _run_regression_objective_test(\n        device, \"reg:tweedie\", \"tweedie-nloglik@1.5\", X, y, check_pred_positive=True\n    )\n\n\ndef all_reg_objectives() -> List[Callable[[Device], None]]:\n    \"\"\"List of obj tests.\"\"\"\n    objs: List[Callable[[Device], None]] = [\n        run_reg_squarederror,\n        run_reg_logistic,\n        run_reg_gamma,\n        run_reg_squaredlogerror,\n        run_reg_pseudohubererror,\n        run_binary_logitraw,\n        run_binary_hinge,\n        run_count_poisson,\n        run_reg_tweedie,\n    ]\n    return objs\n\n\ndef _make_subsample_params(device: Device, sampling_method: str) -> dict:\n    params = {\n        \"device\": device,\n        \"tree_method\": \"hist\",\n        \"multi_strategy\": \"multi_output_tree\",\n        \"subsample\": 0.5,\n        \"sampling_method\": sampling_method,\n        \"max_depth\": 6,\n        \"debug_synchronize\": True,\n        \"seed\": 2026,\n    }\n    return params\n\n\ndef run_subsample(device: Device, sampling_method: str) -> None:\n    \"\"\"Test row subsampling.\"\"\"\n    n_samples = 2048\n    X, y = make_regression(\n        n_samples=n_samples, n_features=16, n_targets=3, random_state=2026\n    )\n    Xy = QuantileDMatrix(X, y)\n\n    params = _make_subsample_params(device, sampling_method)\n\n    evals_result = train_result(params, Xy, num_rounds=16)\n    # Training should converge with subsampling\n    assert non_increasing(evals_result[\"train\"][\"rmse\"], tolerance=0.01)\n\n    # Test with quantile regression\n    params = _make_subsample_params(device, sampling_method)\n    params[\"objective\"] = \"reg:quantileerror\"\n    params[\"quantile_alpha\"] = [0.25, 0.5, 0.75]\n    Xy_single = QuantileDMatrix(X, y[:, 0])\n    evals_result_q = train_result(params, Xy_single, num_rounds=16)\n    assert non_increasing(evals_result_q[\"train\"][\"quantile\"], tolerance=0.01)\n\n\ndef run_gradient_based_sampling_accuracy(device: Device) -> None:\n    \"\"\"Test that gradient-based sampling provides better accuracy.\"\"\"\n    n_samples = 4096\n    X, y = make_regression(\n        n_samples=n_samples, n_features=16, n_targets=3, random_state=2026\n    )\n    Xy = QuantileDMatrix(X, y)\n\n    params_uniform = _make_subsample_params(device, \"uniform\")\n\n    def run(obj: Callable | None) -> None:\n        # Train with uniform sampling\n        evals_uniform: Dict[str, Dict] = {}\n        train(\n            params_uniform,\n            Xy,\n            num_boost_round=32,\n            evals=[(Xy, \"train\")],\n            obj=obj,\n            verbose_eval=False,\n            evals_result=evals_uniform,\n        )\n\n        # Train with gradient-based sampling\n        params_grad = _make_subsample_params(device, \"gradient_based\")\n        evals_grad: Dict[str, Dict] = {}\n        train(\n            params_grad,\n            Xy,\n            num_boost_round=32,\n            evals=[(Xy, \"train\")],\n            obj=obj,\n            verbose_eval=False,\n            evals_result=evals_grad,\n        )\n\n        uniform_final = evals_uniform[\"train\"][\"rmse\"][-1]\n        grad_final = evals_grad[\"train\"][\"rmse\"][-1]\n        assert non_increasing(evals_uniform[\"train\"][\"rmse\"])\n        assert non_increasing(evals_grad[\"train\"][\"rmse\"])\n        if obj is None:\n            assert grad_final < uniform_final\n\n    run(None)\n    run(LsObj2(device, False))\n"
  },
  {
    "path": "python-package/xgboost/testing/ordinal.py",
    "content": "\"\"\"Tests for the ordinal re-coder.\"\"\"\n\nimport itertools\nimport os\nimport tempfile\nfrom concurrent.futures import ThreadPoolExecutor\nfrom functools import cache as fcache\nfrom typing import Any, Tuple, Type, TypeVar\n\nimport numpy as np\nimport pytest\n\nfrom .._typing import EvalsLog\nfrom ..core import DMatrix, ExtMemQuantileDMatrix, QuantileDMatrix\nfrom ..data import _lazy_load_cudf_is_cat\nfrom ..training import train\nfrom .data import (\n    IteratorForTest,\n    is_pd_cat_dtype,\n    make_batches,\n    make_categorical,\n    memory,\n)\nfrom .updater import get_basescore\nfrom .utils import Device, assert_allclose, predictor_equal\n\n\n@fcache\ndef get_df_impl(device: Device) -> Tuple[Type, Type]:\n    \"\"\"Get data frame implementation based on the ]device.\"\"\"\n    if device == \"cpu\":\n        import pandas as pd\n\n        Df = pd.DataFrame\n        Ser = pd.Series\n    else:\n        import cudf\n\n        Df = cudf.DataFrame\n        Ser = cudf.Series\n    return Df, Ser\n\n\ndef asarray(device: Device, data: Any) -> np.ndarray:\n    \"\"\"Wrapper to get an array.\"\"\"\n    if device == \"cpu\":\n        return np.asarray(data)\n    import cupy as cp\n\n    return cp.asarray(data)\n\n\ndef comp_booster(device: Device, Xy: DMatrix, booster: str) -> None:\n    \"\"\"Compare the results from DMatrix and Booster.\"\"\"\n    cats_dm = Xy.get_categories(export_to_arrow=True).to_arrow()\n    assert cats_dm is not None\n\n    rng = np.random.default_rng(2025)\n    Xy.set_label(rng.normal(size=Xy.num_row()))\n    bst = train({\"booster\": booster, \"device\": device}, Xy, 1)\n    cats_bst = bst.get_categories(export_to_arrow=True).to_arrow()\n    assert cats_bst is not None\n    assert cats_dm == cats_bst\n\n\ndef run_cat_container(device: Device) -> None:\n    \"\"\"Basic tests for the container class used by the DMatrix.\"\"\"\n\n    def run_dispatch(device: Device, DMatrixT: Type) -> None:\n        Df, _ = get_df_impl(device)\n        # Basic test with a single feature\n        df = Df({\"c\": [\"cdef\", \"abc\"]}, dtype=\"category\")\n        categories = df.c.cat.categories\n\n        Xy = DMatrixT(df, enable_categorical=True)\n        assert Xy.feature_names == [\"c\"]\n        assert Xy.feature_types == [\"c\"]\n        results = Xy.get_categories(export_to_arrow=True).to_arrow()\n        assert results is not None\n        results_di = dict(results)\n        assert len(results_di[\"c\"]) == len(categories)\n        for i in range(len(results_di[\"c\"])):\n            assert str(results_di[\"c\"][i]) == str(categories[i]), (\n                results_di[\"c\"][i],\n                categories[i],\n            )\n\n        # Test with missing values.\n        df = Df({\"c\": [\"cdef\", None, \"abc\", \"abc\"]}, dtype=\"category\")\n        Xy = DMatrixT(df, enable_categorical=True)\n\n        cats = Xy.get_categories(export_to_arrow=True).to_arrow()\n        assert cats is not None\n        cats_id = dict(cats)\n        ser = cats_id[\"c\"].to_pandas()\n        assert ser.iloc[0] == \"abc\"\n        assert ser.iloc[1] == \"cdef\"\n        assert ser.size == 2\n\n        csr = Xy.get_data()\n        assert csr.data.size == 3\n        assert_allclose(device, csr.data, np.array([1.0, 0.0, 0.0]))\n        assert_allclose(device, csr.indptr, np.array([0, 1, 1, 2, 3]))\n        assert_allclose(device, csr.indices, np.array([0, 0, 0]))\n\n        comp_booster(device, Xy, \"gbtree\")\n        comp_booster(device, Xy, \"dart\")\n\n        # Test with explicit null-terminated strings.\n        df = Df({\"c\": [\"cdef\", None, \"abc\", \"abc\\0\"]}, dtype=\"category\")\n        Xy = DMatrixT(df, enable_categorical=True)\n\n        comp_booster(device, Xy, \"gbtree\")\n        comp_booster(device, Xy, \"dart\")\n\n        with pytest.raises(ValueError, match=\"export_to_arrow\"):\n            Xy.get_categories(export_to_arrow=False).to_arrow()\n\n    for dm in (DMatrix, QuantileDMatrix):\n        run_dispatch(device, dm)\n\n\n# pylint: disable=too-many-statements\ndef run_cat_container_mixed(device: Device) -> None:\n    \"\"\"Run checks with mixed types.\"\"\"\n    import pandas as pd\n\n    try:\n        is_cudf_cat = _lazy_load_cudf_is_cat()\n    except ImportError:\n\n        def is_cudf_cat(_: Any) -> bool:\n            return False\n\n    n_samples = int(2**10)\n\n    def check(Xy: DMatrix, X: pd.DataFrame) -> None:\n        cats = Xy.get_categories(export_to_arrow=True).to_arrow()\n        assert cats is not None\n        cats_di = dict(cats)\n\n        for fname in X.columns:\n            if is_pd_cat_dtype(X[fname].dtype) or is_cudf_cat(X[fname].dtype):\n                vf = cats_di[fname]\n                assert vf is not None\n                aw_list = sorted(vf.to_pylist())\n                if is_cudf_cat(X[fname].dtype):\n                    pd_list: list = X[fname].unique().to_arrow().to_pylist()\n                else:\n                    pd_list = X[fname].unique().tolist()\n                if np.nan in pd_list:  # pandas\n                    pd_list.remove(np.nan)\n                if None in pd_list:  # cudf\n                    pd_list.remove(None)\n                pd_list = sorted(pd_list)\n                assert aw_list == pd_list\n            else:\n                assert cats_di[fname] is None\n\n        if not hasattr(Xy, \"ref\"):  # not quantile DMatrix.\n            assert not isinstance(Xy, QuantileDMatrix)\n            with tempfile.TemporaryDirectory() as tmpdir:\n                fname = os.path.join(tmpdir, \"DMatrix.binary\")\n                Xy.save_binary(fname)\n\n                Xy_1 = DMatrix(fname)\n                cats_1 = Xy_1.get_categories(export_to_arrow=True).to_arrow()\n                assert cats_1 is not None\n                cats_1_di = dict(cats_1)\n\n                for k, v_0 in cats_di.items():\n                    v_1 = cats_1_di[k]\n                    if v_0 is None:\n                        assert v_1 is None\n                    else:\n                        assert v_1 is not None\n                        assert v_0.to_pylist() == v_1.to_pylist()\n\n        comp_booster(device, Xy, \"gbtree\")\n        comp_booster(device, Xy, \"dart\")\n\n    def run_dispatch(DMatrixT: Type) -> None:\n        # full str type\n        X, y = make_categorical(\n            n_samples, 16, 7, onehot=False, cat_dtype=np.str_, device=device\n        )\n        Xy = DMatrixT(X, y, enable_categorical=True)\n        check(Xy, X)\n\n        # str type, mixed with numerical features\n        X, y = make_categorical(\n            n_samples,\n            16,\n            7,\n            onehot=False,\n            cat_ratio=0.5,\n            cat_dtype=np.str_,\n            device=device,\n        )\n        Xy = DMatrixT(X, y, enable_categorical=True)\n        check(Xy, X)\n\n        # str type, mixed with numerical features and missing values\n        X, y = make_categorical(\n            n_samples,\n            16,\n            7,\n            onehot=False,\n            cat_ratio=0.5,\n            sparsity=0.5,\n            cat_dtype=np.str_,\n            device=device,\n        )\n        Xy = DMatrixT(X, y, enable_categorical=True)\n        check(Xy, X)\n\n        # int type\n        X, y = make_categorical(\n            n_samples, 16, 7, onehot=False, cat_dtype=np.int64, device=device\n        )\n        Xy = DMatrixT(X, y, enable_categorical=True)\n        check(Xy, X)\n\n        # int type, mixed with numerical features\n        X, y = make_categorical(\n            n_samples,\n            16,\n            7,\n            onehot=False,\n            cat_ratio=0.5,\n            cat_dtype=np.int64,\n            device=device,\n        )\n        Xy = DMatrixT(X, y, enable_categorical=True)\n        check(Xy, X)\n\n        # int type, mixed with numerical features and missing values\n        X, y = make_categorical(\n            n_samples,\n            16,\n            7,\n            onehot=False,\n            cat_ratio=0.5,\n            sparsity=0.5,\n            cat_dtype=np.int64,\n            device=device,\n        )\n        Xy = DMatrixT(X, y, enable_categorical=True)\n        check(Xy, X)\n\n    for dm in (DMatrix, QuantileDMatrix):\n        run_dispatch(dm)\n\n    # No category\n    batches = make_batches(\n        n_samples_per_batch=128, n_features=4, n_batches=1, use_cupy=device == \"cuda\"\n    )\n    X, y, w = map(lambda x: x[0], batches)\n\n    for DMatrixT in (DMatrix, QuantileDMatrix):\n        Xy = DMatrixT(X, y, weight=w)\n        all_num = Xy.get_categories(export_to_arrow=True).to_arrow()\n        assert all_num is not None\n        for _, v in all_num:\n            assert v is None\n\n        with pytest.raises(ValueError, match=\"export_to_arrow\"):\n            Xy.get_categories(export_to_arrow=False).to_arrow()\n\n\ndef run_cat_container_iter(device: Device) -> None:\n    \"\"\"Test the categories container for iterator-based inputs.\"\"\"\n    n_batches = 4\n    n_features = 8\n    n_samples_per_batch = 64\n    n_cats = 5\n\n    X, y = [], []\n    for _ in range(n_batches):\n        X_i, y_i = make_categorical(\n            n_samples_per_batch,\n            n_features,\n            n_cats,\n            onehot=False,\n            sparsity=0.5,\n            cat_dtype=np.int64,\n            device=device,\n        )\n        X.append(X_i)\n        y.append(y_i)\n\n    it = IteratorForTest(X, y, None, cache=\"cache\", on_host=device == \"cuda\")\n\n    Xy = ExtMemQuantileDMatrix(it, enable_categorical=True)\n    cats = Xy.get_categories(export_to_arrow=True).to_arrow()\n    assert cats is not None and len(cats) == n_features\n    cats_di = dict(cats)\n    for _, v in cats_di.items():\n        assert v is not None\n        assert v.null_count == 0\n        assert len(v) == n_cats\n\n\ndef _basic_example(device: Device) -> Tuple[Any, Any, np.ndarray, np.ndarray]:\n    Df, _ = get_df_impl(device)\n\n    enc = Df({\"c\": [\"cdef\", \"abc\", \"def\"]}, dtype=\"category\")\n    codes = enc.c.cat.codes  # 1, 0, 2\n    assert_allclose(device, asarray(device, codes), np.array([1, 0, 2]))\n    encoded = np.array([codes.iloc[2], codes.iloc[1]])  # def, abc\n    np.testing.assert_allclose(encoded, [2, 0])\n\n    reenc = Df({\"c\": [\"def\", \"abc\"]}, dtype=\"category\")  # same as `encoded`\n    codes = reenc.c.cat.codes\n    assert_allclose(device, codes, np.array([1, 0]))\n\n    y = np.array([0, 1, 2])\n\n    return enc, reenc, encoded, y\n\n\ndef run_basic_predict(DMatrixT: Type, device: Device, tdevice: Device) -> None:\n    \"\"\"Enable tests with mixed devices.\"\"\"\n    enc, reenc, encoded, y = _basic_example(device)\n\n    Xy = DMatrixT(enc, y, enable_categorical=True)\n    booster = train({\"device\": tdevice}, Xy, num_boost_round=4)\n\n    predt0 = booster.inplace_predict(reenc)\n    predt1 = booster.inplace_predict(encoded)\n    assert_allclose(device, predt0, predt1)\n\n    fmat = DMatrixT(reenc, enable_categorical=True)\n    predt2 = booster.predict(fmat)\n    assert_allclose(device, predt0, predt2)\n\n\ndef run_cat_predict(device: Device) -> None:\n    \"\"\"Basic tests for re-coding during prediction.\"\"\"\n    Df, _ = get_df_impl(device)\n\n    for dm in (DMatrix, QuantileDMatrix):\n        run_basic_predict(dm, device, device)\n\n    def run_mixed(DMatrixT: Type) -> None:\n        df = Df({\"b\": [2, 1, 3], \"c\": [\"cdef\", \"abc\", \"def\"]}, dtype=\"category\")\n        y = np.array([0, 1, 2])\n\n        # used with the next df\n        b_codes = df.b.cat.codes\n        assert_allclose(device, asarray(device, b_codes), np.array([1, 0, 2]))\n        # pick codes of 3, 1\n        b_encoded = np.array([b_codes.iloc[2], b_codes.iloc[1]])\n\n        c_codes = df.c.cat.codes\n        assert_allclose(device, asarray(device, c_codes), np.array([1, 0, 2]))\n        # pick codes of \"def\", \"abc\"\n        c_encoded = np.array([c_codes.iloc[2], c_codes.iloc[1]])\n        encoded = np.stack([b_encoded, c_encoded], axis=1)\n\n        Xy = DMatrixT(df, y, enable_categorical=True)\n        booster = train({\"device\": device}, Xy, num_boost_round=4)\n\n        df = Df({\"b\": [3, 1], \"c\": [\"def\", \"abc\"]}, dtype=\"category\")\n        predt0 = booster.inplace_predict(df)\n        predt1 = booster.inplace_predict(encoded)\n        assert_allclose(device, predt0, predt1)\n\n        fmat = DMatrixT(df, enable_categorical=True)\n        predt2 = booster.predict(fmat)\n        assert_allclose(device, predt0, predt2)\n\n    for dm in (DMatrix, QuantileDMatrix):\n        run_mixed(dm)\n\n\ndef run_cat_invalid(device: Device) -> None:\n    \"\"\"Basic tests for invalid inputs.\"\"\"\n    Df, Ser = get_df_impl(device)\n    y = np.array([0, 1, 2])\n\n    def run_invalid(DMatrixT: Type) -> None:\n        df = Df({\"b\": [2, 1, 3], \"c\": [\"cdef\", \"abc\", \"def\"]}, dtype=\"category\")\n\n        Xy = DMatrixT(df, y, enable_categorical=True)\n        booster = train({\"device\": device}, Xy, num_boost_round=4)\n        df[\"b\"] = df[\"b\"].astype(np.int64)\n        with pytest.raises(ValueError, match=\"The data type doesn't match\"):\n            booster.inplace_predict(df)\n\n        Xy = DMatrixT(df, y, enable_categorical=True)\n        with pytest.raises(ValueError, match=\"The data type doesn't match\"):\n            booster.predict(Xy)\n\n        df = Df(\n            {\"b\": [2, 1, 3, 4], \"c\": [\"cdef\", \"abc\", \"def\", \"bbc\"]}, dtype=\"category\"\n        )\n        with pytest.raises(ValueError, match=\"Found a category not in the training\"):\n            booster.inplace_predict(df)\n\n    for dm in (DMatrix, QuantileDMatrix):\n        run_invalid(dm)\n\n    df = Df({\"b\": [2, 1, 3], \"c\": [\"cdef\", \"abc\", \"def\"]}, dtype=\"category\")\n    Xy = DMatrix(df, y, enable_categorical=True)\n    booster = train({\"device\": device}, Xy, num_boost_round=4)\n    df[\"c\"] = Ser(asarray(device, [0, 1, 1]), dtype=\"category\")\n\n    msg = \"index type must match between the training and test set\"\n\n    with pytest.raises(ValueError, match=msg):\n        booster.inplace_predict(df)\n\n    with pytest.raises(ValueError, match=msg):\n        DMatrix(df, enable_categorical=True, feature_types=booster.get_categories())\n\n    with pytest.raises(ValueError, match=msg):\n        QuantileDMatrix(\n            df, enable_categorical=True, feature_types=booster.get_categories()\n        )\n\n\ndef run_cat_thread_safety(device: Device) -> None:\n    \"\"\"Basic tests for thread safety.\"\"\"\n    X, y = make_categorical(2048, 16, 112, onehot=False, cat_ratio=0.5, device=device)\n    Xy = QuantileDMatrix(X, y, enable_categorical=True)\n    booster = train({\"device\": device}, Xy, num_boost_round=10)\n\n    def run_thread_safety(DMatrixT: Type) -> bool:\n        Xy = DMatrixT(X, enable_categorical=True)\n        predt0 = booster.predict(Xy)\n        predt1 = booster.inplace_predict(X)\n        assert_allclose(device, predt0, predt1)\n        return True\n\n    futures = []\n    n_cpus = os.cpu_count()\n    assert n_cpus is not None\n    for dm in (DMatrix, QuantileDMatrix):\n        with ThreadPoolExecutor(max_workers=max(n_cpus, 10)) as e:\n            for _ in range(32):\n                fut = e.submit(run_thread_safety, dm)\n                futures.append(fut)\n\n    for f in futures:\n        assert f.result()\n\n\nU = TypeVar(\"U\", DMatrix, QuantileDMatrix)\n\n\ndef _make_dm(DMatrixT: Type[U], ref: DMatrix, *args: Any, **kwargs: Any) -> U:\n    if DMatrixT is QuantileDMatrix:\n        return DMatrixT(*args, ref=ref, enable_categorical=True, **kwargs)\n    return DMatrixT(*args, enable_categorical=True, **kwargs)\n\n\ndef _run_predt(\n    device: Device,\n    DMatrixT: Type,\n    pred_contribs: bool,\n    pred_interactions: bool,\n    pred_leaf: bool,\n) -> None:\n    enc, reenc, encoded, y = _basic_example(device)\n\n    Xy = DMatrixT(enc, y, enable_categorical=True)\n    booster = train({\"device\": device}, Xy, num_boost_round=4)\n\n    predt_0 = booster.predict(\n        _make_dm(DMatrixT, ref=Xy, data=reenc),\n        pred_contribs=pred_contribs,\n        pred_interactions=pred_interactions,\n        pred_leaf=pred_leaf,\n    )\n    predt_1 = booster.predict(\n        _make_dm(DMatrixT, ref=Xy, data=encoded.reshape(2, 1), feature_names=[\"c\"]),\n        pred_contribs=pred_contribs,\n        pred_interactions=pred_interactions,\n        pred_leaf=pred_leaf,\n    )\n    assert_allclose(device, predt_0, predt_1)\n\n\ndef run_cat_shap(device: Device) -> None:\n    \"\"\"Basic tests for SHAP values.\"\"\"\n\n    for dm in (DMatrix, QuantileDMatrix):\n        _run_predt(\n            device, dm, pred_contribs=True, pred_interactions=False, pred_leaf=False\n        )\n\n    for dm in (DMatrix, QuantileDMatrix):\n        _run_predt(\n            device, dm, pred_contribs=False, pred_interactions=True, pred_leaf=False\n        )\n\n\ndef run_cat_leaf(device: Device) -> None:\n    \"\"\"Basic tests for leaf prediction.\"\"\"\n    # QuantileDMatrix is not supported by leaf.\n    _run_predt(\n        device, DMatrix, pred_contribs=False, pred_interactions=False, pred_leaf=True\n    )\n\n\n# pylint: disable=too-many-locals\n@memory.cache\ndef make_recoded(device: Device, *, n_features: int = 4096) -> Tuple:\n    \"\"\"Synthesize a test dataset with changed encoding.\"\"\"\n    Df, _ = get_df_impl(device)\n\n    import pandas as pd\n\n    # Test large column numbers. XGBoost makes some specializations for slim datasets,\n    # make sure we cover all the cases.\n    n_samples = 1024\n\n    # Same between old and new, with 0 (\"a\") and 1 (\"b\") exchanged their position.\n    old_cats = [\"a\", \"b\", \"c\", \"d\"]\n    new_cats = [\"b\", \"a\", \"c\", \"d\"]\n    mapping = {0: 1, 1: 0}\n\n    rng = np.random.default_rng(2025)\n\n    col_numeric = rng.uniform(0, 1, size=(n_samples, n_features // 2))\n    col_categorical = rng.integers(\n        low=0, high=4, size=(n_samples, n_features // 2), dtype=np.int32\n    )\n\n    df = {}  # avoid fragmentation warning from pandas\n    for c in range(n_features):\n        if c % 2 == 0:\n            col = col_numeric[:, c // 2]\n        else:\n            codes = col_categorical[:, c // 2]\n            col = pd.Categorical.from_codes(\n                categories=old_cats,\n                codes=codes,\n            )\n        df[f\"f{c}\"] = col\n\n    enc = Df(df)\n    y = rng.normal(size=n_samples)\n\n    reenc = enc.copy()\n    for c in range(n_features):\n        if c % 2 == 0:\n            continue\n\n        name = f\"f{c}\"\n        codes_ser = reenc[name].cat.codes\n        if hasattr(codes_ser, \"to_pandas\"):  # cudf\n            codes_ser = codes_ser.to_pandas()\n        new_codes = codes_ser.replace(mapping)\n        reenc[name] = pd.Categorical.from_codes(categories=new_cats, codes=new_codes)\n    reenc = Df(reenc)\n    assert (reenc.iloc[:, 1].cat.codes != enc.iloc[:, 1].cat.codes).any()\n    return enc, reenc, y, col_numeric, col_categorical\n\n\ndef run_specified_cat(  # pylint: disable=too-many-locals\n    device: Device,\n) -> None:\n    \"\"\"Run with manually specified category encoding.\"\"\"\n    import pandas as pd\n\n    # Same between old and new, with 0 (\"a\") and 1 (\"b\") exchanged their position.\n    old_cats = [\"a\", \"b\", \"c\", \"d\"]\n    new_cats = [\"b\", \"a\", \"c\", \"d\"]\n\n    col0 = np.arange(0, 9)\n    col1 = pd.Categorical.from_codes(\n        # b, b, c, d, a, c, c, d, a\n        categories=old_cats,\n        codes=[1, 1, 2, 3, 0, 2, 2, 3, 0],\n    )\n    df = pd.DataFrame({\"f0\": col0, \"f1\": col1})\n    Df, _ = get_df_impl(device)\n    df = Df(df)\n    rng = np.random.default_rng(2025)\n    y = rng.uniform(size=df.shape[0])\n\n    for dm in (DMatrix, QuantileDMatrix):\n        Xy = dm(df, y, enable_categorical=True)\n        booster = train({\"device\": device}, Xy)\n        predt0 = booster.predict(Xy)\n        predt1 = booster.inplace_predict(df)\n        assert_allclose(device, predt0, predt1)\n\n        col1 = pd.Categorical.from_codes(\n            # b, b, c, d, a, c, c, d, a\n            categories=new_cats,\n            codes=[0, 0, 2, 3, 1, 2, 2, 3, 1],\n        )\n        df1 = Df({\"f0\": col0, \"f1\": col1})\n        predt2 = booster.inplace_predict(df1)\n        assert_allclose(device, predt0, predt2)\n\n    enc, reenc, y, col_numeric, col_categorical = make_recoded(device)\n\n    Xy = DMatrix(enc, y, enable_categorical=True)\n    booster = train({\"device\": device}, Xy)\n\n    predt0 = booster.predict(Xy)\n    predt1 = booster.inplace_predict(enc)\n    assert_allclose(device, predt0, predt1)\n\n    Xy = DMatrix(reenc, y, enable_categorical=True)\n    predt2 = booster.predict(Xy)\n    assert_allclose(device, predt0, predt2)\n\n    array = np.empty(shape=(reenc.shape[0], reenc.shape[1]))\n\n    array[:, enc.dtypes == \"category\"] = col_categorical\n    array[:, enc.dtypes != \"category\"] = col_numeric\n\n    if device == \"cuda\":\n        import cupy as cp\n\n        array = cp.array(array)\n\n    predt3 = booster.inplace_predict(array)\n    assert_allclose(device, predt0, predt3)\n\n\ndef run_validation(device: Device) -> None:\n    \"\"\"Check the validation dataset is using the correct encoding.\"\"\"\n    enc, reenc, y, _, _ = make_recoded(device)\n\n    Xy = DMatrix(enc, y, enable_categorical=True)\n    Xy_valid = DMatrix(reenc, y, enable_categorical=True)\n\n    evals_result: EvalsLog = {}\n    train(\n        {\"device\": device},\n        Xy,\n        evals=[(Xy, \"Train\"), (Xy_valid, \"Valid\")],\n        evals_result=evals_result,\n    )\n\n    # Evaluation dataset should have the exact same performance as the training dataset.\n    assert_allclose(\n        device, evals_result[\"Train\"][\"rmse\"], evals_result[\"Valid\"][\"rmse\"]\n    )\n\n\ndef run_recode_dmatrix(device: Device) -> None:\n    \"\"\"Test re-coding inpput for DMatrix.\"\"\"\n    import pandas as pd\n\n    Df, _ = get_df_impl(device)\n\n    # String index\n    old_cats = [\"a\", \"b\", \"c\", \"d\"]\n    new_cats = [\"b\", \"a\", \"c\", \"d\"]\n\n    col0 = np.arange(0, 9)\n    col1 = pd.Categorical.from_codes(\n        # b, b, c, d, a, c, c, d, a\n        categories=old_cats,\n        codes=[1, 1, 2, 3, 0, 2, 2, 3, 0],\n    )\n    df = Df({\"f0\": col0, \"f1\": col1})\n\n    Xy = DMatrix(df, enable_categorical=True)\n    cats_0 = Xy.get_categories(export_to_arrow=True)\n    assert Xy.feature_types == [\"int\", \"c\"]\n\n    col1 = pd.Categorical.from_codes(\n        # b, b, c, d, a, c, c, d, a\n        categories=new_cats,\n        codes=[0, 0, 2, 3, 1, 2, 2, 3, 1],\n    )\n    df = Df({\"f0\": col0, \"f1\": col1})\n    Xy = DMatrix(df, enable_categorical=True, feature_types=cats_0)\n    # feature_types is still correct\n    assert Xy.feature_names == [\"f0\", \"f1\"]\n    assert Xy.feature_types == [\"int\", \"c\"]\n    cats_1 = Xy.get_categories(export_to_arrow=True)\n    assert cats_0.to_arrow() == cats_1.to_arrow()\n\n    # Numeric index\n    col0 = pd.Categorical.from_codes(\n        categories=[5, 6, 7, 8],\n        codes=[0, 0, 2, 3, 1, 2, 2, 3, 1],\n    )\n    Df, _ = get_df_impl(device)\n    df = Df({\"cat\": col0})\n    for DMatrixT in (DMatrix, QuantileDMatrix):\n        Xy = DMatrixT(df, enable_categorical=True)\n        cats_0 = Xy.get_categories(export_to_arrow=True)\n        assert cats_0 is not None\n\n        Xy = DMatrixT(df, enable_categorical=True, feature_types=cats_0)\n        cats_1 = Xy.get_categories(export_to_arrow=True)\n        assert cats_1 is not None\n\n        assert cats_0.to_arrow() == cats_1.to_arrow()\n\n    # Recode\n    for DMatrixT in (DMatrix, QuantileDMatrix):\n        enc, reenc, y, _, _ = make_recoded(device)\n        Xy_0 = DMatrixT(enc, y, enable_categorical=True)\n        cats_0 = Xy_0.get_categories(export_to_arrow=True)\n\n        assert cats_0 is not None\n\n        Xy_1 = DMatrixT(reenc, y, feature_types=cats_0, enable_categorical=True)\n        cats_1 = Xy_1.get_categories(export_to_arrow=True)\n        assert cats_1 is not None\n\n        assert cats_0.to_arrow() == cats_1.to_arrow()\n        assert predictor_equal(Xy_0, Xy_1)\n\n\ndef run_training_continuation(device: Device) -> None:\n    \"\"\"Test re-coding for training continuation.\"\"\"\n    enc, reenc, y, _, _ = make_recoded(device)\n\n    def check(Xy_0: DMatrix, Xy_1: DMatrix) -> None:\n        params = {\"device\": device}\n\n        r = 2\n        evals_result_0: EvalsLog = {}\n        booster_0 = train(\n            params,\n            Xy_0,\n            evals=[(Xy_1, \"Valid\")],\n            num_boost_round=r,\n            evals_result=evals_result_0,\n        )\n        evals_result_1: EvalsLog = {}\n        booster_1 = train(\n            params,\n            Xy_1,\n            evals=[(Xy_1, \"Valid\")],\n            xgb_model=booster_0,\n            num_boost_round=r,\n            evals_result=evals_result_1,\n        )\n        assert get_basescore(booster_0) == get_basescore(booster_1)\n\n        evals_result_2: EvalsLog = {}\n        booster_2 = train(\n            params,\n            Xy_0,\n            evals=[(Xy_1, \"Valid\")],\n            num_boost_round=r * 2,\n            evals_result=evals_result_2,\n        )\n        # Check evaluation results\n        eval_concat = evals_result_0[\"Valid\"][\"rmse\"] + evals_result_1[\"Valid\"][\"rmse\"]\n        eval_full = evals_result_2[\"Valid\"][\"rmse\"]\n        np.testing.assert_allclose(eval_full, eval_concat)\n\n        # Test inference\n        for a, b in itertools.product([enc, reenc], [enc, reenc]):\n            predt_0 = booster_1.inplace_predict(a)\n            predt_1 = booster_2.inplace_predict(b)\n            assert_allclose(device, predt_0, predt_1, rtol=1e-5)\n\n        # With DMatrix\n        for a, b in itertools.product([Xy_0, Xy_1], [Xy_0, Xy_1]):\n            predt_0 = booster_1.predict(a)\n            predt_1 = booster_2.predict(b)\n            assert_allclose(device, predt_0, predt_1, rtol=1e-5)\n\n    for Train, Valid in itertools.product(\n        [DMatrix, QuantileDMatrix], [DMatrix, QuantileDMatrix]\n    ):\n        Xy_0 = Train(enc, y, enable_categorical=True)\n        if Valid is QuantileDMatrix:\n            Xy_1 = Valid(\n                reenc,\n                y,\n                enable_categorical=True,\n                feature_types=Xy_0.get_categories(),\n                ref=Xy_0,\n            )\n        else:\n            Xy_1 = Valid(\n                reenc, y, enable_categorical=True, feature_types=Xy_0.get_categories()\n            )\n        check(Xy_0, Xy_1)\n\n\ndef run_update(device: Device) -> None:\n    \"\"\"Test with individual updaters.\"\"\"\n    enc, reenc, y, _, _ = make_recoded(device)\n    Xy = DMatrix(enc, y, enable_categorical=True)\n    booster_0 = train({\"device\": device}, Xy, num_boost_round=4)\n    model_0 = booster_0.save_raw()\n    cats_0 = booster_0.get_categories()\n\n    Xy_1 = DMatrix(reenc, y, feature_types=cats_0, enable_categorical=True)\n\n    booster_1 = train(\n        {\n            \"device\": device,\n            \"updater\": \"prune\",\n            \"process_type\": \"update\",\n        },\n        Xy_1,\n        num_boost_round=4,\n        xgb_model=booster_0,\n    )\n    model_1 = booster_1.save_raw()\n\n    assert model_0 == model_1  # also compares the cat container inside\n\n\ndef run_recode_dmatrix_predict(device: Device) -> None:\n    \"\"\"Run prediction with re-coded DMatrix.\"\"\"\n    enc, reenc, y, _, _ = make_recoded(device)\n\n    for DMatrixT in (DMatrix, QuantileDMatrix):\n        Xy = DMatrix(enc, y, enable_categorical=True)\n        booster = train({\"device\": device}, Xy, num_boost_round=4)\n        cats_0 = booster.get_categories()\n\n        Xy_1 = _make_dm(DMatrixT, Xy, reenc, y, feature_types=cats_0)\n        Xy_2 = _make_dm(DMatrixT, Xy, reenc, y)\n\n        predt_0 = booster.predict(Xy)\n        predt_1 = booster.predict(Xy_1)\n        predt_2 = booster.predict(Xy_2)\n        predt_3 = booster.inplace_predict(enc)\n\n        for predt in (predt_1, predt_2, predt_3):\n            assert_allclose(device, predt_0, predt)\n"
  },
  {
    "path": "python-package/xgboost/testing/params.py",
    "content": "\"\"\"Strategies for updater tests.\"\"\"\n\nfrom typing import cast\n\nimport pytest\n\nstrategies = pytest.importorskip(\"hypothesis.strategies\")\n\n\nexact_parameter_strategy = strategies.fixed_dictionaries(\n    {\n        \"nthread\": strategies.integers(1, 4),\n        \"max_depth\": strategies.integers(1, 11),\n        \"min_child_weight\": strategies.floats(0.5, 2.0),\n        \"alpha\": strategies.floats(1e-5, 2.0),\n        \"lambda\": strategies.floats(1e-5, 2.0),\n        \"eta\": strategies.floats(0.01, 0.5),\n        \"gamma\": strategies.floats(1e-5, 2.0),\n        \"seed\": strategies.integers(0, 10),\n        # We cannot enable subsampling as the training loss can increase\n        # 'subsample': strategies.floats(0.5, 1.0),\n        \"colsample_bytree\": strategies.floats(0.5, 1.0),\n        \"colsample_bylevel\": strategies.floats(0.5, 1.0),\n    }\n)\n\nhist_parameter_strategy = strategies.fixed_dictionaries(\n    {\n        \"max_depth\": strategies.integers(1, 11),\n        \"max_leaves\": strategies.integers(0, 1024),\n        \"max_bin\": strategies.integers(2, 512),\n        \"grow_policy\": strategies.sampled_from([\"lossguide\", \"depthwise\"]),\n        \"min_child_weight\": strategies.floats(0.5, 2.0),\n        # We cannot enable subsampling as the training loss can increase\n        # 'subsample': strategies.floats(0.5, 1.0),\n        \"colsample_bytree\": strategies.floats(0.5, 1.0),\n        \"colsample_bylevel\": strategies.floats(0.5, 1.0),\n    }\n).filter(\n    lambda x: (cast(int, x[\"max_depth\"]) > 0 or cast(int, x[\"max_leaves\"]) > 0)\n    and (cast(int, x[\"max_depth\"]) > 0 or x[\"grow_policy\"] == \"lossguide\")\n)\n\nhist_cache_strategy = strategies.fixed_dictionaries(\n    {\"max_cached_hist_node\": strategies.sampled_from([1, 4, 1024, 2**31])}\n)\n\nhist_multi_parameter_strategy = strategies.fixed_dictionaries(\n    {\n        \"max_depth\": strategies.integers(1, 11),\n        \"max_leaves\": strategies.integers(0, 1024),\n        \"max_bin\": strategies.integers(2, 512),\n        \"multi_strategy\": strategies.sampled_from(\n            [\"multi_output_tree\", \"one_output_per_tree\"]\n        ),\n        \"grow_policy\": strategies.sampled_from([\"lossguide\", \"depthwise\"]),\n        \"min_child_weight\": strategies.floats(0.5, 2.0),\n        # We cannot enable subsampling as the training loss can increase\n        # 'subsample': strategies.floats(0.5, 1.0),\n        \"colsample_bytree\": strategies.floats(0.5, 1.0),\n        \"colsample_bylevel\": strategies.floats(0.5, 1.0),\n    }\n).filter(\n    lambda x: (cast(int, x[\"max_depth\"]) > 0 or cast(int, x[\"max_leaves\"]) > 0)\n    and (cast(int, x[\"max_depth\"]) > 0 or x[\"grow_policy\"] == \"lossguide\")\n)\n\ncat_parameter_strategy = strategies.fixed_dictionaries(\n    {\n        \"max_cat_to_onehot\": strategies.integers(1, 128),\n        \"max_cat_threshold\": strategies.integers(1, 128),\n    }\n)\n\nlambdarank_parameter_strategy = strategies.fixed_dictionaries(\n    {\n        \"lambdarank_unbiased\": strategies.sampled_from([True, False]),\n        \"lambdarank_pair_method\": strategies.sampled_from([\"topk\", \"mean\"]),\n        \"lambdarank_num_pair_per_sample\": strategies.integers(1, 8),\n        \"lambdarank_bias_norm\": strategies.floats(0.5, 2.0),\n        \"objective\": strategies.sampled_from(\n            [\"rank:ndcg\", \"rank:map\", \"rank:pairwise\"]\n        ),\n    }\n).filter(\n    lambda x: not (x[\"lambdarank_unbiased\"] and x[\"lambdarank_pair_method\"] == \"mean\")\n)\n"
  },
  {
    "path": "python-package/xgboost/testing/parse_tree.py",
    "content": "\"\"\"Tests for parsing trees.\"\"\"\n\nimport pytest\n\nfrom ..core import DMatrix\nfrom ..sklearn import XGBRegressor\nfrom ..training import train\nfrom .data import make_categorical\nfrom .utils import Device\n\n\ndef run_tree_to_df_categorical(tree_method: str, device: Device) -> None:\n    \"\"\"Tests tree_to_df with categorical features.\"\"\"\n    X, y = make_categorical(100, 10, 31, onehot=False)\n    Xy = DMatrix(X, y, enable_categorical=True)\n    booster = train(\n        {\"tree_method\": tree_method, \"device\": device}, Xy, num_boost_round=10\n    )\n    df = booster.trees_to_dataframe()\n    for _, x in df.iterrows():\n        if x[\"Feature\"] != \"Leaf\":\n            assert len(x[\"Category\"]) >= 1\n\n\ndef run_split_value_histograms(tree_method: str, device: Device) -> None:\n    \"\"\"Tests split_value_histograms with categorical features.\"\"\"\n    X, y = make_categorical(1000, 10, 13, onehot=False)\n    reg = XGBRegressor(tree_method=tree_method, enable_categorical=True, device=device)\n    reg.fit(X, y)\n\n    with pytest.raises(ValueError, match=\"doesn't\"):\n        reg.get_booster().get_split_value_histogram(\"3\", bins=5)\n"
  },
  {
    "path": "python-package/xgboost/testing/plotting.py",
    "content": "\"\"\"Test plotting functions for XGBoost.\"\"\"\n\nimport json\n\nfrom graphviz import Source\nfrom matplotlib.axes import Axes\n\nfrom ..plotting import plot_tree, to_graphviz\nfrom ..sklearn import XGBRegressor\nfrom .data import make_categorical\nfrom .utils import Device\n\n\ndef run_categorical(tree_method: str, device: Device) -> None:\n    \"\"\"Tests plotting functions for categorical features.\"\"\"\n    X, y = make_categorical(1000, 31, 19, onehot=False)\n    reg = XGBRegressor(\n        enable_categorical=True, n_estimators=10, tree_method=tree_method, device=device\n    )\n    reg.fit(X, y)\n    trees = reg.get_booster().get_dump(dump_format=\"json\")\n    for tree in trees:\n        j_tree = json.loads(tree)\n        assert \"leaf\" in j_tree.keys() or isinstance(j_tree[\"split_condition\"], list)\n\n    graph = to_graphviz(reg, tree_idx=len(j_tree) - 1)\n    assert isinstance(graph, Source)\n    ax = plot_tree(reg, tree_idx=len(j_tree) - 1)\n    assert isinstance(ax, Axes)\n"
  },
  {
    "path": "python-package/xgboost/testing/predict.py",
    "content": "\"\"\"Tests for inference.\"\"\"\n\nfrom typing import Type\n\nimport numpy as np\nfrom scipy.special import logit  # pylint: disable=no-name-in-module\n\nfrom ..core import DMatrix\nfrom ..training import train\nfrom .shared import validate_leaf_output\nfrom .updater import get_basescore\nfrom .utils import Device\n\n\n# pylint: disable=too-many-locals\ndef run_predict_leaf(device: Device, DMatrixT: Type[DMatrix]) -> np.ndarray:\n    \"\"\"Run tests for leaf index prediction.\"\"\"\n    rows = 100\n    cols = 4\n    classes = 5\n    num_parallel_tree = 4\n    num_boost_round = 10\n    rng = np.random.RandomState(1994)\n    X = rng.randn(rows, cols)\n    y = rng.randint(low=0, high=classes, size=rows)\n\n    m = DMatrixT(X, y)\n    booster = train(\n        {\n            \"num_parallel_tree\": num_parallel_tree,\n            \"num_class\": classes,\n            \"tree_method\": \"hist\",\n        },\n        m,\n        num_boost_round=num_boost_round,\n    )\n\n    booster.set_param({\"device\": device})\n    empty = DMatrixT(np.ones(shape=(0, cols)))\n    empty_leaf = booster.predict(empty, pred_leaf=True)\n    assert empty_leaf.shape[0] == 0\n\n    leaf = booster.predict(m, pred_leaf=True, strict_shape=True)\n    assert leaf.shape[0] == rows\n    assert leaf.shape[1] == num_boost_round\n    assert leaf.shape[2] == classes\n    assert leaf.shape[3] == num_parallel_tree\n\n    validate_leaf_output(leaf, num_parallel_tree)\n\n    n_iters = np.int32(2)\n    sliced = booster.predict(\n        m,\n        pred_leaf=True,\n        iteration_range=(0, n_iters),\n        strict_shape=True,\n    )\n    first = sliced[0, ...]\n\n    assert np.prod(first.shape) == classes * num_parallel_tree * n_iters\n\n    # When there's only 1 tree, the output is a 1 dim vector\n    booster = train({\"tree_method\": \"hist\"}, num_boost_round=1, dtrain=m)\n    booster.set_param({\"device\": device})\n    assert booster.predict(m, pred_leaf=True).shape == (rows,)\n\n    return leaf\n\n\ndef run_base_margin_vs_base_score(device: Device) -> None:\n    \"\"\"Test for the relation between score and margin.\"\"\"\n    from sklearn.datasets import make_classification\n\n    intercept = 0.5\n\n    X, y = make_classification(random_state=2025)\n    booster = train(\n        {\"base_score\": intercept, \"objective\": \"binary:logistic\", \"device\": device},\n        dtrain=DMatrix(X, y),\n        num_boost_round=1,\n    )\n    np.testing.assert_allclose(get_basescore(booster), intercept)\n    predt_0 = booster.predict(DMatrix(X, y))\n\n    margin = np.full(y.shape, fill_value=logit(intercept), dtype=np.float32)\n    Xy = DMatrix(X, y, base_margin=margin)\n    # 0.2 is a dummy value\n    booster = train(\n        {\"base_score\": 0.2, \"objective\": \"binary:logistic\", \"device\": device},\n        dtrain=Xy,\n        num_boost_round=1,\n    )\n    np.testing.assert_allclose(get_basescore(booster), 0.2)\n    predt_1 = booster.predict(Xy)\n\n    np.testing.assert_allclose(predt_0, predt_1)\n"
  },
  {
    "path": "python-package/xgboost/testing/quantile_dmatrix.py",
    "content": "\"\"\"QuantileDMatrix related tests.\"\"\"\n\nimport numpy as np\nimport pytest\nfrom sklearn.model_selection import train_test_split\n\nimport xgboost as xgb\n\nfrom .data import make_batches, make_categorical\n\n\ndef check_ref_quantile_cut(device: str) -> None:\n    \"\"\"Check obtaining the same cut values given a reference.\"\"\"\n    X, y, _ = (\n        data[0]\n        for data in make_batches(\n            n_samples_per_batch=8192,\n            n_features=16,\n            n_batches=1,\n            use_cupy=device.startswith(\"cuda\"),\n        )\n    )\n\n    X_train, X_valid, y_train, y_valid = train_test_split(X, y)\n    Xy_train = xgb.QuantileDMatrix(X_train, y_train)\n    Xy_valid = xgb.QuantileDMatrix(X_valid, y_valid, ref=Xy_train)\n\n    cut_train = Xy_train.get_quantile_cut()\n    cut_valid = Xy_valid.get_quantile_cut()\n\n    np.testing.assert_allclose(cut_train[0], cut_valid[0])\n    np.testing.assert_allclose(cut_train[1], cut_valid[1])\n\n    Xy_valid = xgb.QuantileDMatrix(X_valid, y_valid)\n    cut_valid = Xy_valid.get_quantile_cut()\n    assert not np.allclose(cut_train[1], cut_valid[1])\n\n\ndef check_categorical_strings(device: str) -> None:\n    \"\"\"Check string inputs.\"\"\"\n    if device == \"cpu\":\n        pd = pytest.importorskip(\"pandas\")\n    else:\n        pd = pytest.importorskip(\"cudf\")\n\n    n_categories = 32\n    X, y = make_categorical(\n        1024,\n        8,\n        n_categories,\n        onehot=False,\n        cat_dtype=np.str_,\n        cat_ratio=0.5,\n        shuffle=True,\n    )\n    X = pd.DataFrame(X)\n\n    Xy = xgb.QuantileDMatrix(X, y, enable_categorical=True)\n    assert Xy.num_col() == 8\n    cuts = Xy.get_quantile_cut()\n    indptr = cuts[0]\n    values = cuts[1]\n    for i in range(1, len(indptr)):\n        f_idx = i - 1\n        if isinstance(X[X.columns[f_idx]].dtype, pd.CategoricalDtype):\n            beg, end = indptr[f_idx], indptr[i]\n            col = values[beg:end]\n            np.testing.assert_allclose(col, np.arange(0, n_categories))\n"
  },
  {
    "path": "python-package/xgboost/testing/ranking.py",
    "content": "# pylint: disable=too-many-locals\n\"\"\"Tests for learning to rank.\"\"\"\n\nfrom types import ModuleType\nfrom typing import Any\n\nimport numpy as np\nimport pytest\n\nimport xgboost as xgb\nfrom xgboost import testing as tm\n\nfrom .utils import Device\n\n\ndef run_ranking_qid_df(impl: ModuleType, tree_method: str, device: Device) -> None:\n    \"\"\"Test ranking with qid packed into X.\"\"\"\n    import scipy.sparse\n    from sklearn.metrics import mean_squared_error\n    from sklearn.model_selection import StratifiedGroupKFold, cross_val_score\n\n    X, y, q, _ = tm.make_ltr(n_samples=128, n_features=2, n_query_groups=8, max_rel=3)\n\n    # pack qid into x using dataframe\n    df = impl.DataFrame(X)\n    df[\"qid\"] = q\n    ranker = xgb.XGBRanker(\n        n_estimators=3, eval_metric=\"ndcg\", tree_method=tree_method, device=device\n    )\n    ranker.fit(df, y)\n    s = ranker.score(df, y)\n    assert s > 0.7\n\n    # works with validation datasets as well\n    valid_df = df.copy()\n    valid_df.iloc[0, 0] = 3.0\n    ranker.fit(df, y, eval_set=[(valid_df, y)])\n\n    # same as passing qid directly\n    ranker = xgb.XGBRanker(\n        n_estimators=3, eval_metric=\"ndcg\", tree_method=tree_method, device=device\n    )\n    ranker.fit(X, y, qid=q)\n    s1 = ranker.score(df, y)\n    assert np.isclose(s, s1)\n\n    # Works with standard sklearn cv\n    if device == \"cpu\":\n        # we need cuML for this.\n        kfold = StratifiedGroupKFold(shuffle=False)\n        results = cross_val_score(ranker, df, y, cv=kfold, groups=df.qid)\n        assert len(results) == 5\n\n    # Works with custom metric\n    def neg_mse(y_true: Any, y_pred: Any, **kwargs: Any) -> float:\n        if hasattr(y_true, \"get\"):\n            y_true = y_true.get()\n        return -float(mean_squared_error(y_true, y_pred, **kwargs))\n\n    ranker = xgb.XGBRanker(\n        n_estimators=3,\n        eval_metric=neg_mse,\n        tree_method=tree_method,\n        device=device,\n        disable_default_eval_metric=True,\n    )\n    ranker.fit(df, y, eval_set=[(valid_df, y)])\n    score = ranker.score(valid_df, y)\n    assert np.isclose(score, ranker.evals_result()[\"validation_0\"][\"neg_mse\"][-1])\n\n    # Works with sparse data\n    if device == \"cpu\":\n        # no sparse with cuDF\n        X_csr = scipy.sparse.csr_matrix(X)\n        df = impl.DataFrame.sparse.from_spmatrix(\n            X_csr, columns=[str(i) for i in range(X.shape[1])]\n        )\n        df[\"qid\"] = q\n        ranker = xgb.XGBRanker(\n            n_estimators=3,\n            eval_metric=\"ndcg\",\n            tree_method=tree_method,\n            device=device,\n        )\n        ranker.fit(df, y)\n        s2 = ranker.score(df, y)\n        assert np.isclose(s2, s)\n\n    with pytest.raises(ValueError, match=\"Either `group` or `qid`.\"):\n        ranker.fit(df, y, eval_set=[(X, y)])\n\n\ndef run_ranking_categorical(device: str) -> None:\n    \"\"\"Test LTR with categorical features.\"\"\"\n    from sklearn.model_selection import cross_val_score\n\n    X, y = tm.make_categorical(\n        n_samples=512, n_features=10, n_categories=3, onehot=False\n    )\n    # NDCG requires non-negative integer relevance labels.\n    y = np.clip(np.round(y - y.min()).astype(int), 0, None)\n    rng = np.random.default_rng(1994)\n    qid = rng.choice(3, size=y.shape[0])\n    qid = np.sort(qid)\n    X[\"qid\"] = qid\n\n    ltr = xgb.XGBRanker(enable_categorical=True, device=device)\n    ltr.fit(X, y)\n    score = ltr.score(X, y)\n    assert score > 0.9\n\n    ltr = xgb.XGBRanker(enable_categorical=True, device=device)\n\n    # test using the score function inside sklearn.\n    scores = cross_val_score(ltr, X, y)\n    for s in scores:\n        assert s > 0.7\n\n\ndef run_normalization(device: str) -> None:\n    \"\"\"Test normalization.\"\"\"\n    X, y, qid, _ = tm.make_ltr(2048, 4, 64, 3)\n    # top-k\n    ltr = xgb.XGBRanker(objective=\"rank:pairwise\", n_estimators=4, device=device)\n    ltr.fit(X, y, qid=qid, eval_set=[(X, y)], eval_qid=[qid])\n    e0 = ltr.evals_result()\n\n    ltr = xgb.XGBRanker(\n        objective=\"rank:pairwise\",\n        n_estimators=4,\n        device=device,\n        lambdarank_normalization=False,\n    )\n    ltr.fit(X, y, qid=qid, eval_set=[(X, y)], eval_qid=[qid])\n    e1 = ltr.evals_result()\n    assert e1[\"validation_0\"][\"ndcg@32\"][-1] > e0[\"validation_0\"][\"ndcg@32\"][-1]\n\n    # mean\n    ltr = xgb.XGBRanker(\n        objective=\"rank:pairwise\",\n        n_estimators=4,\n        device=device,\n        lambdarank_pair_method=\"mean\",\n        lambdarank_normalization=True,\n    )\n    ltr.fit(X, y, qid=qid, eval_set=[(X, y)], eval_qid=[qid])\n    e0 = ltr.evals_result()\n\n    ltr = xgb.XGBRanker(\n        objective=\"rank:pairwise\",\n        n_estimators=4,\n        device=device,\n        lambdarank_pair_method=\"mean\",\n        lambdarank_normalization=False,\n    )\n    ltr.fit(X, y, qid=qid, eval_set=[(X, y)], eval_qid=[qid])\n    e1 = ltr.evals_result()\n    # no normalization since the number of pairs is 1.\n    assert e1[\"validation_0\"][\"ndcg\"][-1] == e0[\"validation_0\"][\"ndcg\"][-1]\n\n    # mean\n    ltr = xgb.XGBRanker(\n        objective=\"rank:pairwise\",\n        n_estimators=4,\n        device=device,\n        lambdarank_pair_method=\"mean\",\n        lambdarank_normalization=True,\n        lambdarank_num_pair_per_sample=4,\n    )\n    ltr.fit(X, y, qid=qid, eval_set=[(X, y)], eval_qid=[qid])\n    e0 = ltr.evals_result()\n\n    ltr = xgb.XGBRanker(\n        objective=\"rank:pairwise\",\n        n_estimators=4,\n        device=device,\n        lambdarank_pair_method=\"mean\",\n        lambdarank_normalization=False,\n        lambdarank_num_pair_per_sample=4,\n    )\n    ltr.fit(X, y, qid=qid, eval_set=[(X, y)], eval_qid=[qid])\n    e1 = ltr.evals_result()\n    assert e1[\"validation_0\"][\"ndcg\"][-1] != e0[\"validation_0\"][\"ndcg\"][-1]\n\n\ndef run_score_normalization(device: str, objective: str) -> None:\n    \"\"\"Test normalization by score differences.\"\"\"\n    if objective == \"rank:map\":\n        # Binary relevance\n        X, y, qid, _ = tm.make_ltr(4096, 4, 64, max_rel=1)\n    else:\n        X, y, qid, _ = tm.make_ltr(4096, 4, 64, 3)\n    ltr = xgb.XGBRanker(objective=objective, n_estimators=4, device=device)\n    ltr.fit(X, y, qid=qid, eval_set=[(X, y)], eval_qid=[qid])\n    e0 = ltr.evals_result()\n\n    ltr = xgb.XGBRanker(\n        objective=\"rank:pairwise\",\n        n_estimators=4,\n        device=device,\n        lambdarank_score_normalization=False,\n    )\n    ltr.fit(X, y, qid=qid, eval_set=[(X, y)], eval_qid=[qid])\n    e1 = ltr.evals_result()\n\n    m0, m1 = (\n        list(e0[\"validation_0\"].values())[-1][-1],\n        list(e1[\"validation_0\"].values())[-1][-1],\n    )\n    assert m0 != m1\n"
  },
  {
    "path": "python-package/xgboost/testing/shared.py",
    "content": "\"\"\"Testing code shared by other tests.\"\"\"\n\nimport collections\nimport importlib.util\nimport json\nimport os\nimport tempfile\nfrom typing import Any, Callable, Dict, Type\n\nimport numpy as np\n\nimport xgboost as xgb\nfrom xgboost._typing import ArrayLike\n\n\ndef validate_leaf_output(leaf: np.ndarray, num_parallel_tree: int) -> None:\n    \"\"\"Validate output for predict leaf tests.\"\"\"\n    for i in range(leaf.shape[0]):  # n_samples\n        for j in range(leaf.shape[1]):  # n_rounds\n            for k in range(leaf.shape[2]):  # n_classes\n                tree_group = leaf[i, j, k, :]\n                assert tree_group.shape[0] == num_parallel_tree\n                # No sampling, all trees within forest are the same\n                assert np.all(tree_group == tree_group[0])\n\n\ndef validate_data_initialization(\n    dmatrix: Type, model: Type[xgb.XGBModel], X: ArrayLike, y: ArrayLike\n) -> None:\n    \"\"\"Assert that we don't create duplicated DMatrix.\"\"\"\n\n    old_init = dmatrix.__init__\n    count = [0]\n\n    def new_init(self: Any, **kwargs: Any) -> Callable:\n        count[0] += 1\n        return old_init(self, **kwargs)\n\n    dmatrix.__init__ = new_init\n    model(n_estimators=1).fit(X, y, eval_set=[(X, y)])\n\n    assert count[0] == 1\n    count[0] = 0  # only 1 DMatrix is created.\n\n    y_copy = y.copy()\n    model(n_estimators=1).fit(X, y, eval_set=[(X, y_copy)])\n    assert count[0] == 2  # a different Python object is considered different\n\n    dmatrix.__init__ = old_init\n\n\n# pylint: disable=too-many-arguments,too-many-locals\ndef get_feature_weights(\n    *,\n    X: ArrayLike,\n    y: ArrayLike,\n    fw: np.ndarray,\n    parser_path: str,\n    tree_method: str,\n    model: Type[xgb.XGBModel] = xgb.XGBRegressor,\n) -> np.ndarray:\n    \"\"\"Get feature weights using the demo parser.\"\"\"\n    with tempfile.TemporaryDirectory() as tmpdir:\n        colsample_bynode = 0.5\n        reg = model(\n            tree_method=tree_method,\n            colsample_bynode=colsample_bynode,\n            feature_weights=fw,\n        )\n\n        reg.fit(X, y)\n        model_path = os.path.join(tmpdir, \"model.json\")\n        reg.save_model(model_path)\n        with open(model_path, \"r\", encoding=\"utf-8\") as fd:\n            model = json.load(fd)\n\n        spec = importlib.util.spec_from_file_location(\"JsonParser\", parser_path)\n        assert spec is not None\n        jsonm = importlib.util.module_from_spec(spec)\n        assert spec.loader is not None\n        spec.loader.exec_module(jsonm)\n        model = jsonm.Model(model)\n        splits: Dict[int, int] = {}\n        total_nodes = 0\n        for tree in model.trees:\n            n_nodes = len(tree.nodes)\n            total_nodes += n_nodes\n            for n in range(n_nodes):\n                if tree.is_leaf(n):\n                    continue\n                if splits.get(tree.split_index(n), None) is None:\n                    splits[tree.split_index(n)] = 1\n                else:\n                    splits[tree.split_index(n)] += 1\n\n        od = collections.OrderedDict(sorted(splits.items()))\n        tuples = list(od.items())\n        k, v = list(zip(*tuples))\n        w = np.polyfit(k, v, deg=1)\n        return w\n"
  },
  {
    "path": "python-package/xgboost/testing/updater.py",
    "content": "\"\"\"Tests for updaters.\"\"\"\n\nimport json\nfrom functools import partial, update_wrapper\nfrom string import ascii_lowercase\nfrom typing import Any, Dict, List, Union, overload\n\nimport numpy as np\nimport pytest\nfrom sklearn.datasets import make_regression\n\nimport xgboost.testing as tm\n\nfrom ..callback import TrainingCallback\nfrom ..compat import import_cupy\nfrom ..core import (\n    Booster,\n    DataIter,\n    DMatrix,\n    ExtMemQuantileDMatrix,\n    QuantileDMatrix,\n)\nfrom ..data import is_pd_cat_dtype\nfrom ..sklearn import XGBModel, XGBRegressor\nfrom ..training import train\nfrom .data import IteratorForTest, make_batches, make_categorical\nfrom .data_iter import CatIter\nfrom .utils import Device, assert_allclose, non_increasing\n\n\n@overload\ndef get_basescore(model: XGBModel) -> List[float]: ...\n\n\n@overload\ndef get_basescore(model: Booster) -> List[float]: ...\n\n\n@overload\ndef get_basescore(model: Dict[str, Any]) -> List[float]: ...\n\n\ndef get_basescore(\n    model: Union[XGBModel, Booster, Dict],\n) -> List[float]:\n    \"\"\"Get base score from an XGBoost sklearn estimator.\"\"\"\n    if isinstance(model, XGBModel):\n        model = model.get_booster()\n\n    if isinstance(model, dict):\n        jintercept = model[\"learner\"][\"learner_model_param\"][\"base_score\"]\n    else:\n        jintercept = json.loads(model.save_config())[\"learner\"][\"learner_model_param\"][\n            \"base_score\"\n        ]\n    return json.loads(jintercept)\n\n\n# pylint: disable=too-many-locals\ndef check_quantile_loss(tree_method: str, weighted: bool, device: Device) -> None:\n    \"\"\"Test for quantile loss.\"\"\"\n    from sklearn.metrics import mean_pinball_loss\n\n    from xgboost.sklearn import _metric_decorator\n\n    n_samples = 4096\n    n_features = 8\n    n_estimators = 8\n\n    rng = np.random.RandomState(1994)\n    # pylint: disable=unbalanced-tuple-unpacking\n    X, y = make_regression(\n        n_samples=n_samples,\n        n_features=n_features,\n        random_state=rng,\n    )\n    if weighted:\n        weight = rng.random(size=n_samples)\n    else:\n        weight = None\n\n    Xy = QuantileDMatrix(X, y, weight=weight)\n\n    alpha = np.array([0.1, 0.5])\n    # non-zero base score can cause floating point difference with GPU predictor.\n    # multi-class has small difference than single target in the prediction kernel\n    base_score = np.zeros(shape=alpha.shape, dtype=np.float32)\n    evals_result: Dict[str, Dict] = {}\n    booster_multi = train(\n        {\n            \"objective\": \"reg:quantileerror\",\n            \"tree_method\": tree_method,\n            \"device\": device,\n            \"quantile_alpha\": alpha,\n            \"base_score\": base_score,\n        },\n        Xy,\n        num_boost_round=n_estimators,\n        evals=[(Xy, \"Train\")],\n        evals_result=evals_result,\n    )\n    predt_multi = booster_multi.predict(Xy, strict_shape=True)\n\n    assert non_increasing(evals_result[\"Train\"][\"quantile\"])\n    assert evals_result[\"Train\"][\"quantile\"][-1] < 20.0\n    # check that there's a way to use custom metric and compare the results.\n    metrics = [\n        _metric_decorator(\n            update_wrapper(\n                partial(mean_pinball_loss, sample_weight=weight, alpha=alpha[i]),\n                mean_pinball_loss,\n            )\n        )\n        for i in range(alpha.size)\n    ]\n\n    predts = np.empty(predt_multi.shape)\n    for i in range(alpha.shape[0]):\n        a = alpha[i]\n\n        booster_i = train(\n            {\n                \"objective\": \"reg:quantileerror\",\n                \"tree_method\": tree_method,\n                \"device\": device,\n                \"quantile_alpha\": a,\n                \"base_score\": base_score[i],\n            },\n            Xy,\n            num_boost_round=n_estimators,\n            evals=[(Xy, \"Train\")],\n            custom_metric=metrics[i],\n            evals_result=evals_result,\n        )\n        assert non_increasing(evals_result[\"Train\"][\"quantile\"])\n        assert evals_result[\"Train\"][\"quantile\"][-1] < 30.0\n        np.testing.assert_allclose(\n            np.array(evals_result[\"Train\"][\"quantile\"]),\n            np.array(evals_result[\"Train\"][\"mean_pinball_loss\"]),\n            atol=1e-6,\n            rtol=1e-6,\n        )\n        predts[:, i] = booster_i.predict(Xy)\n\n    for i in range(alpha.shape[0]):\n        np.testing.assert_allclose(predts[:, i], predt_multi[:, i])\n\n\ndef check_quantile_loss_rf(\n    device: Device, tree_method: str, multi_strategy: str\n) -> None:\n    \"\"\"Test quantile loss with boosting random forest.\"\"\"\n    # pylint: disable=unbalanced-tuple-unpacking\n    X, y = make_regression(n_samples=2048, n_features=16, random_state=2026)\n    Xy = DMatrix(X, y)\n\n    def run(params: Dict[str, Any], metric: str) -> None:\n        evals_result_0: Dict[str, Dict] = {}\n        params[\"num_parallel_tree\"] = 2\n        train(\n            params,\n            Xy,\n            num_boost_round=8,\n            evals=[(Xy, \"Train\")],\n            evals_result=evals_result_0,\n        )\n\n        evals_result_1: Dict[str, Dict] = {}\n        params[\"num_parallel_tree\"] = 1\n        train(\n            params,\n            Xy,\n            num_boost_round=8,\n            evals=[(Xy, \"Train\")],\n            evals_result=evals_result_1,\n        )\n        # Without subsample, the result should be the same (barring floating point\n        # errors).\n        np.testing.assert_allclose(\n            evals_result_0[\"Train\"][metric], evals_result_1[\"Train\"][metric]\n        )\n        assert non_increasing(evals_result_0[\"Train\"][metric])\n\n    alpha = np.array([0.1, 0.5, 0.9])\n    params = {\n        \"objective\": \"reg:quantileerror\",\n        \"tree_method\": tree_method,\n        \"device\": device,\n        \"quantile_alpha\": alpha,\n        \"multi_strategy\": multi_strategy,\n    }\n    run(params, \"quantile\")\n\n    # Now test with MAE\n    params.pop(\"quantile_alpha\")\n    params[\"objective\"] = \"reg:absoluteerror\"\n    run(params, \"mae\")\n\n\ndef check_quantile_loss_extmem(\n    n_samples_per_batch: int,\n    n_features: int,\n    n_batches: int,\n    tree_method: str,\n    device: str,\n) -> None:\n    \"\"\"Check external memory with the quantile objective.\"\"\"\n    it = IteratorForTest(\n        *make_batches(n_samples_per_batch, n_features, n_batches, device != \"cpu\"),\n        cache=\"cache\",\n        on_host=False,\n    )\n    Xy_it = DMatrix(it)\n    params = {\n        \"tree_method\": tree_method,\n        \"objective\": \"reg:quantileerror\",\n        \"device\": device,\n        \"quantile_alpha\": [0.2, 0.8],\n    }\n    booster_it = train(params, Xy_it)\n    X, y, w = it.as_arrays()\n    Xy = DMatrix(X, y, weight=w)\n    booster = train(params, Xy)\n\n    predt_it = booster_it.predict(Xy_it)\n    predt = booster.predict(Xy)\n\n    np.testing.assert_allclose(predt, predt_it)\n\n\ndef check_extmem_qdm(  # pylint: disable=too-many-arguments\n    n_samples_per_batch: int,\n    n_features: int,\n    *,\n    n_batches: int,\n    n_bins: int,\n    device: str,\n    on_host: bool,\n    is_cat: bool,\n) -> None:\n    \"\"\"Basic test for the `ExtMemQuantileDMatrix`.\"\"\"\n\n    if is_cat:\n        it: DataIter = CatIter(\n            n_samples_per_batch=n_samples_per_batch,\n            n_features=n_features,\n            n_batches=n_batches,\n            n_cats=5,\n            sparsity=0.0,\n            cat_ratio=0.5,\n            onehot=False,\n            device=device,\n            cache=\"cache\",\n        )\n    else:\n        it = IteratorForTest(\n            *make_batches(\n                n_samples_per_batch, n_features, n_batches, use_cupy=device != \"cpu\"\n            ),\n            cache=\"cache\",\n            on_host=on_host,\n        )\n\n    Xy_it = ExtMemQuantileDMatrix(it, max_bin=n_bins, enable_categorical=is_cat)\n    with pytest.raises(ValueError, match=\"Only the `hist`\"):\n        booster_it = train(\n            {\"device\": device, \"tree_method\": \"approx\", \"max_bin\": n_bins},\n            Xy_it,\n            num_boost_round=8,\n        )\n\n    booster_it = train({\"device\": device, \"max_bin\": n_bins}, Xy_it, num_boost_round=8)\n    if is_cat:\n        it = CatIter(\n            n_samples_per_batch=n_samples_per_batch,\n            n_features=n_features,\n            n_batches=n_batches,\n            n_cats=5,\n            sparsity=0.0,\n            cat_ratio=0.5,\n            onehot=False,\n            device=device,\n            cache=None,\n        )\n    else:\n        it = IteratorForTest(\n            *make_batches(\n                n_samples_per_batch, n_features, n_batches, use_cupy=device != \"cpu\"\n            ),\n            cache=None,\n        )\n    Xy = QuantileDMatrix(it, max_bin=n_bins, enable_categorical=is_cat)\n    booster = train({\"device\": device, \"max_bin\": n_bins}, Xy, num_boost_round=8)\n\n    cut_it = Xy_it.get_quantile_cut()\n    cut = Xy.get_quantile_cut()\n    np.testing.assert_allclose(cut_it[0], cut[0])\n    np.testing.assert_allclose(cut_it[1], cut[1])\n\n    predt_it = booster_it.predict(Xy_it)\n    predt = booster.predict(Xy)\n    np.testing.assert_allclose(predt_it, predt)\n\n\ndef check_cut(\n    n_entries: int, indptr: np.ndarray, data: np.ndarray, dtypes: Any\n) -> None:\n    \"\"\"Check the cut values.\"\"\"\n    assert data.shape[0] == indptr[-1]\n    assert data.shape[0] == n_entries\n\n    assert indptr.dtype == np.uint64\n    for i in range(1, indptr.size):\n        beg = int(indptr[i - 1])\n        end = int(indptr[i])\n        for j in range(beg + 1, end):\n            assert data[j] > data[j - 1]\n            if is_pd_cat_dtype(dtypes.iloc[i - 1]):\n                assert data[j] == data[j - 1] + 1\n\n\ndef check_get_quantile_cut_device(tree_method: str, use_cupy: bool) -> None:\n    \"\"\"Check with optional cupy.\"\"\"\n    import pandas as pd\n\n    n_samples = 1024\n    n_features = 14\n    max_bin = 16\n    dtypes = pd.Series([np.float32] * n_features)\n\n    # numerical\n    X, y, w = tm.make_regression(n_samples, n_features, use_cupy=use_cupy)\n    # - qdm\n    Xyw: DMatrix = QuantileDMatrix(X, y, weight=w, max_bin=max_bin)\n    indptr, data = Xyw.get_quantile_cut()\n    check_cut((max_bin + 1) * n_features, indptr, data, dtypes)\n    # - dm\n    Xyw = DMatrix(X, y, weight=w)\n    train({\"tree_method\": tree_method, \"max_bin\": max_bin}, Xyw)\n    indptr, data = Xyw.get_quantile_cut()\n    check_cut((max_bin + 1) * n_features, indptr, data, dtypes)\n    # - ext mem\n    n_batches = 3\n    n_samples_per_batch = 256\n    it = IteratorForTest(\n        *make_batches(n_samples_per_batch, n_features, n_batches, use_cupy),\n        cache=\"cache\",\n        on_host=False,\n    )\n    Xy: DMatrix = DMatrix(it)\n    train({\"tree_method\": tree_method, \"max_bin\": max_bin}, Xyw)\n    indptr, data = Xyw.get_quantile_cut()\n    check_cut((max_bin + 1) * n_features, indptr, data, dtypes)\n\n    # categorical\n    n_categories = 32\n    X, y = make_categorical(\n        n_samples, n_features, n_categories, onehot=False, sparsity=0.8\n    )\n    if use_cupy:\n        import cudf\n\n        cp = import_cupy()\n\n        X = cudf.from_pandas(X)\n        y = cp.array(y)\n    # - qdm\n    Xy = QuantileDMatrix(X, y, max_bin=max_bin, enable_categorical=True)\n    indptr, data = Xy.get_quantile_cut()\n    check_cut(n_categories * n_features, indptr, data, X.dtypes)\n    # - dm\n    Xy = DMatrix(X, y, enable_categorical=True)\n    train({\"tree_method\": tree_method, \"max_bin\": max_bin}, Xy)\n    indptr, data = Xy.get_quantile_cut()\n    check_cut(n_categories * n_features, indptr, data, X.dtypes)\n\n    # mixed\n    X, y = make_categorical(\n        n_samples, n_features, n_categories, onehot=False, sparsity=0.8, cat_ratio=0.5\n    )\n    n_cat_features = len([0 for dtype in X.dtypes if is_pd_cat_dtype(dtype)])\n    n_num_features = n_features - n_cat_features\n    n_entries = n_categories * n_cat_features + (max_bin + 1) * n_num_features\n    # - qdm\n    Xy = QuantileDMatrix(X, y, max_bin=max_bin, enable_categorical=True)\n    indptr, data = Xy.get_quantile_cut()\n    check_cut(n_entries, indptr, data, X.dtypes)\n    # - dm\n    Xy = DMatrix(X, y, enable_categorical=True)\n    train({\"tree_method\": tree_method, \"max_bin\": max_bin}, Xy)\n    indptr, data = Xy.get_quantile_cut()\n    check_cut(n_entries, indptr, data, X.dtypes)\n\n\ndef check_get_quantile_cut(tree_method: str, device: str) -> None:\n    \"\"\"Check the quantile cut getter.\"\"\"\n\n    use_cupy = device.startswith(\"cuda\")\n    check_get_quantile_cut_device(tree_method, False)\n    if use_cupy:\n        check_get_quantile_cut_device(tree_method, True)\n\n\nUSE_ONEHOT = np.iinfo(np.int32).max\nUSE_PART = 1\n\n\ndef _create_dmatrix(  # pylint: disable=too-many-arguments\n    n_samples: int,\n    n_features: int,\n    *,\n    n_cats: int,\n    device: str,\n    sparsity: float,\n    tree_method: str,\n    onehot: bool,\n    extmem: bool,\n    enable_categorical: bool,\n) -> DMatrix:\n    n_batches = max(min(2, n_samples), 1)\n    it = CatIter(\n        n_samples // n_batches,\n        n_features,\n        n_batches=n_batches,\n        sparsity=sparsity,\n        cat_ratio=1.0,\n        n_cats=n_cats,\n        onehot=onehot,\n        device=device,\n        cache=\"cache\" if extmem else None,\n    )\n    if extmem:\n        if tree_method == \"hist\":\n            Xy: DMatrix = ExtMemQuantileDMatrix(\n                it, enable_categorical=enable_categorical\n            )\n        elif tree_method == \"approx\":\n            Xy = DMatrix(it, enable_categorical=enable_categorical)\n        else:\n            raise ValueError(f\"tree_method {tree_method} not supported.\")\n    else:\n        cat, label = it.xy()\n        Xy = DMatrix(cat, label, enable_categorical=enable_categorical)\n    return Xy\n\n\ndef check_categorical_ohe(  # pylint: disable=too-many-arguments\n    *,\n    rows: int,\n    cols: int,\n    rounds: int,\n    cats: int,\n    device: str,\n    tree_method: str,\n    extmem: bool = False,\n    multi_target: bool = False,\n) -> None:\n    \"Test for one-hot encoding with categorical data.\"\n    pd = pytest.importorskip(\"pandas\")\n\n    by_etl_results: Dict[str, Dict[str, List[float]]] = {}\n    by_builtin_results: Dict[str, Dict[str, List[float]]] = {}\n\n    parameters: Dict[str, Any] = {\n        \"tree_method\": tree_method,\n        # Use one-hot exclusively\n        \"max_cat_to_onehot\": USE_ONEHOT,\n        \"device\": device,\n    }\n\n    if multi_target:\n        n_targets = 3\n        parameters[\"multi_strategy\"] = \"multi_output_tree\"\n\n        cat, label = make_categorical(\n            rows,\n            cols,\n            n_categories=cats,\n            onehot=False,\n            sparsity=0.0,\n            n_targets=n_targets,\n        )\n\n        Xy_onehot = DMatrix(pd.get_dummies(cat), label)\n        Xy_cat = DMatrix(cat, label, enable_categorical=True)\n    else:\n        Xy_onehot = _create_dmatrix(\n            rows,\n            cols,\n            n_cats=cats,\n            device=device,\n            sparsity=0.0,\n            onehot=True,\n            tree_method=tree_method,\n            extmem=extmem,\n            enable_categorical=False,\n        )\n        Xy_cat = _create_dmatrix(\n            rows,\n            cols,\n            n_cats=cats,\n            device=device,\n            sparsity=0.0,\n            tree_method=tree_method,\n            onehot=False,\n            extmem=extmem,\n            enable_categorical=True,\n        )\n\n    train(\n        parameters,\n        Xy_onehot,\n        num_boost_round=rounds,\n        evals=[(Xy_onehot, \"Train\")],\n        evals_result=by_etl_results,\n    )\n    train(\n        parameters,\n        Xy_cat,\n        num_boost_round=rounds,\n        evals=[(Xy_cat, \"Train\")],\n        evals_result=by_builtin_results,\n    )\n\n    # There are guidelines on how to specify tolerance based on considering output\n    # as random variables. But in here the tree construction is extremely sensitive\n    # to floating point errors. An 1e-5 error in a histogram bin can lead to an\n    # entirely different tree. So even though the test is quite lenient, hypothesis\n    # can still pick up falsifying examples from time to time.\n    np.testing.assert_allclose(\n        np.array(by_etl_results[\"Train\"][\"rmse\"]),\n        np.array(by_builtin_results[\"Train\"][\"rmse\"]),\n        rtol=1e-3,\n    )\n    assert non_increasing(by_builtin_results[\"Train\"][\"rmse\"])\n\n    if not multi_target:\n        by_grouping: Dict[str, Dict[str, List[float]]] = {}\n        # switch to partition-based splits\n        parameters[\"max_cat_to_onehot\"] = USE_PART\n        parameters[\"reg_lambda\"] = 0\n        train(\n            parameters,\n            Xy_cat,\n            num_boost_round=rounds,\n            evals=[(Xy_cat, \"Train\")],\n            evals_result=by_grouping,\n        )\n        rmse_oh = by_builtin_results[\"Train\"][\"rmse\"]\n        rmse_group = by_grouping[\"Train\"][\"rmse\"]\n        # always better or equal to onehot when there's no regularization.\n        for a, b in zip(rmse_oh, rmse_group):\n            assert a >= b\n\n        parameters[\"reg_lambda\"] = 1.0\n        by_grouping = {}\n        train(\n            parameters,\n            Xy_cat,\n            num_boost_round=32,\n            evals=[(Xy_cat, \"Train\")],\n            evals_result=by_grouping,\n        )\n        assert non_increasing(by_grouping[\"Train\"][\"rmse\"]), by_grouping\n\n\ndef check_categorical_missing(  # pylint: disable=too-many-arguments\n    rows: int,\n    cols: int,\n    cats: int,\n    *,\n    device: Device,\n    tree_method: str,\n    extmem: bool,\n) -> None:\n    \"\"\"Check categorical data with missing values.\"\"\"\n    parameters: Dict[str, Any] = {\"tree_method\": tree_method, \"device\": device}\n    Xy = _create_dmatrix(\n        rows,\n        cols,\n        n_cats=cats,\n        sparsity=0.5,\n        device=device,\n        tree_method=tree_method,\n        onehot=False,\n        extmem=extmem,\n        enable_categorical=True,\n    )\n    label = Xy.get_label()\n\n    def run(max_cat_to_onehot: int) -> None:\n        # Test with onehot splits\n        parameters[\"max_cat_to_onehot\"] = max_cat_to_onehot\n\n        evals_result: Dict[str, Dict] = {}\n        booster = train(\n            parameters,\n            Xy,\n            num_boost_round=8,\n            evals=[(Xy, \"Train\")],\n            evals_result=evals_result,\n        )\n        assert non_increasing(evals_result[\"Train\"][\"rmse\"])\n        y_predt = booster.predict(Xy)\n        rmse = tm.root_mean_square(label, y_predt)\n        assert_allclose(device, rmse, evals_result[\"Train\"][\"rmse\"][-1], rtol=2e-5)\n\n    # Test with OHE split\n    run(USE_ONEHOT)\n\n    # Test with partition-based split\n    run(USE_PART)\n\n\ndef run_max_cat(tree_method: str, device: Device) -> None:\n    \"\"\"Test data with size smaller than number of categories.\"\"\"\n    import pandas as pd\n\n    rng = np.random.default_rng(0)\n    n_cat = 100\n    n = 5\n\n    X = pd.Series(\n        [\"\".join(rng.choice(list(ascii_lowercase), size=3)) for i in range(n_cat)],\n        dtype=\"category\",\n    )[:n].to_frame()\n\n    reg = XGBRegressor(\n        enable_categorical=True,\n        tree_method=tree_method,\n        device=device,\n        n_estimators=10,\n    )\n    y = pd.Series(range(n))\n    reg.fit(X=X, y=y, eval_set=[(X, y)])\n    assert non_increasing(reg.evals_result()[\"validation_0\"][\"rmse\"])\n\n\ndef run_invalid_category(tree_method: str, device: Device) -> None:\n    \"\"\"Test with invalid categorical inputs.\"\"\"\n    rng = np.random.default_rng()\n    # too large\n    X = rng.integers(low=0, high=4, size=1000).reshape(100, 10)\n    y = rng.normal(loc=0, scale=1, size=100)\n    X[13, 7] = np.iinfo(np.int32).max + 1\n\n    # Check is performed during sketching.\n    Xy = DMatrix(X, y, feature_types=[\"c\"] * 10)\n    with pytest.raises(ValueError):\n        train({\"tree_method\": tree_method, \"device\": device}, Xy)\n\n    X[13, 7] = 16777216\n    Xy = DMatrix(X, y, feature_types=[\"c\"] * 10)\n    with pytest.raises(ValueError):\n        train({\"tree_method\": tree_method, \"device\": device}, Xy)\n\n    # mixed positive and negative values\n    X = rng.normal(loc=0, scale=1, size=1000).reshape(100, 10)  # type: ignore[assignment]\n    y = rng.normal(loc=0, scale=1, size=100)\n\n    Xy = DMatrix(X, y, feature_types=[\"c\"] * 10)\n    with pytest.raises(ValueError):\n        train({\"tree_method\": tree_method, \"device\": device}, Xy)\n\n    if device == \"cuda\":\n        import cupy as cp\n\n        X, y = cp.array(X), cp.array(y)\n        with pytest.raises(ValueError):\n            QuantileDMatrix(X, y, feature_types=[\"c\"] * 10)\n\n\ndef train_result(\n    param: Dict[str, Any], dmat: DMatrix, num_rounds: int\n) -> Dict[str, Any]:\n    \"\"\"Get training result from parameters and data.\"\"\"\n    result: Dict[str, Any] = {}\n    booster = train(\n        param,\n        dmat,\n        num_rounds,\n        evals=[(dmat, \"train\")],\n        verbose_eval=False,\n        evals_result=result,\n    )\n    assert booster.num_features() == dmat.num_col()\n    assert booster.num_boosted_rounds() == num_rounds\n    assert booster.feature_names == dmat.feature_names\n    assert booster.feature_types == dmat.feature_types\n\n    return result\n\n\nclass ResetStrategy(TrainingCallback):\n    \"\"\"Callback for testing multi-output.\"\"\"\n\n    def after_iteration(self, model: Booster, epoch: int, evals_log: dict) -> bool:\n        if epoch % 2 == 0:\n            model.set_param({\"multi_strategy\": \"multi_output_tree\"})\n        else:\n            model.set_param({\"multi_strategy\": \"one_output_per_tree\"})\n        return False\n"
  },
  {
    "path": "python-package/xgboost/testing/utils.py",
    "content": "\"\"\"Helpers for test code.\"\"\"\n\nfrom typing import Any, Literal, Sequence, TypeAlias\n\nimport numpy as np\n\nfrom ..compat import _is_cupy_alike, import_cupy\nfrom ..core import DMatrix\n\nDevice: TypeAlias = Literal[\"cpu\", \"cuda\"]\n\n\ndef assert_allclose(\n    device: Device, a: Any, b: Any, *, rtol: float = 1e-7, atol: float = 0\n) -> None:\n    \"\"\"Dispatch the assert_allclose for devices.\"\"\"\n    if device == \"cpu\" and not _is_cupy_alike(a) and not _is_cupy_alike(b):\n        np.testing.assert_allclose(a, b, atol=atol, rtol=rtol)\n    else:\n        cp = import_cupy()\n        cp.testing.assert_allclose(a, b, atol=atol, rtol=rtol)\n\n\ndef predictor_equal(lhs: DMatrix, rhs: DMatrix) -> bool:\n    \"\"\"Assert whether two DMatrices contain the same predictors.\"\"\"\n    lcsr = lhs.get_data()\n    rcsr = rhs.get_data()\n    return all(\n        (\n            np.array_equal(lcsr.data, rcsr.data),\n            np.array_equal(lcsr.indices, rcsr.indices),\n            np.array_equal(lcsr.indptr, rcsr.indptr),\n        )\n    )\n\n\ndef non_increasing(L: Sequence[float], tolerance: float = 1e-4) -> bool:\n    \"\"\"Values in the sequence are not increasing.\"\"\"\n    return all((y - x) < tolerance for x, y in zip(L, L[1:]))\n\n\ndef non_decreasing(L: Sequence[float], tolerance: float = 1e-4) -> bool:\n    \"\"\"Values in the sequence are not decreasing.\"\"\"\n    return all((y - x) >= -tolerance for x, y in zip(L, L[1:]))\n"
  },
  {
    "path": "python-package/xgboost/testing/with_skl.py",
    "content": "# pylint: disable=too-many-arguments, too-many-positional-arguments\n\"\"\"Tests for compatiblity with sklearn.\"\"\"\n\nfrom typing import Callable, Optional, Type\n\nimport numpy as np\nimport pytest\nfrom scipy.special import softmax\n\nfrom ..core import DMatrix\nfrom ..sklearn import XGBClassifier, XGBRegressor, XGBRFRegressor\nfrom .data import get_california_housing, make_batches\nfrom .ordinal import make_recoded\nfrom .utils import Device, assert_allclose\n\n\ndef run_boost_from_prediction_binary(\n    tree_method: str,\n    device: Device,\n    X: np.ndarray,\n    y: np.ndarray,\n    as_frame: Optional[Callable],\n) -> None:\n    \"\"\"\n    Parameters\n    ----------\n\n    as_frame: A callable function to convert margin into DataFrame, useful for different\n    df implementations.\n    \"\"\"\n\n    model_0 = XGBClassifier(\n        learning_rate=0.3,\n        random_state=0,\n        n_estimators=4,\n        tree_method=tree_method,\n        device=device,\n    )\n    model_0.fit(X=X, y=y)\n    margin = model_0.predict(X, output_margin=True)\n    if as_frame is not None:\n        margin = as_frame(margin)\n\n    model_1 = XGBClassifier(\n        learning_rate=0.3,\n        random_state=0,\n        n_estimators=4,\n        tree_method=tree_method,\n        device=device,\n    )\n    model_1.fit(X=X, y=y, base_margin=margin)\n    predictions_1 = model_1.predict(X, base_margin=margin)\n\n    cls_2 = XGBClassifier(\n        learning_rate=0.3,\n        random_state=0,\n        n_estimators=8,\n        tree_method=tree_method,\n        device=device,\n    )\n    cls_2.fit(X=X, y=y)\n    predictions_2 = cls_2.predict(X)\n    np.testing.assert_allclose(predictions_1, predictions_2)\n\n\ndef run_boost_from_prediction_multi_clasas(\n    estimator: Type,\n    tree_method: str,\n    device: Device,\n    X: np.ndarray,\n    y: np.ndarray,\n    as_frame: Optional[Callable],\n) -> None:\n    \"\"\"Boosting from prediction with multi-class clf.\"\"\"\n    # Multi-class\n    model_0 = estimator(\n        learning_rate=0.3,\n        random_state=0,\n        n_estimators=4,\n        tree_method=tree_method,\n        device=device,\n    )\n    model_0.fit(X=X, y=y)\n    margin = model_0.get_booster().inplace_predict(X, predict_type=\"margin\")\n    if as_frame is not None:\n        margin = as_frame(margin)\n\n    model_1 = estimator(\n        learning_rate=0.3,\n        random_state=0,\n        n_estimators=4,\n        tree_method=tree_method,\n        device=device,\n    )\n    model_1.fit(X=X, y=y, base_margin=margin)\n    predictions_1 = model_1.get_booster().predict(\n        DMatrix(X, base_margin=margin), output_margin=True\n    )\n\n    model_2 = estimator(\n        learning_rate=0.3,\n        random_state=0,\n        n_estimators=8,\n        tree_method=tree_method,\n        device=device,\n    )\n    model_2.fit(X=X, y=y)\n    predictions_2 = model_2.get_booster().inplace_predict(X, predict_type=\"margin\")\n\n    if hasattr(predictions_1, \"get\"):\n        predictions_1 = predictions_1.get()\n    if hasattr(predictions_2, \"get\"):\n        predictions_2 = predictions_2.get()\n    np.testing.assert_allclose(predictions_1, predictions_2, atol=1e-6)\n\n\ndef run_housing_rf_regression(tree_method: str, device: Device) -> None:\n    \"\"\"Testwith the cali housing dataset.\"\"\"\n    from sklearn.metrics import mean_squared_error\n    from sklearn.model_selection import KFold\n\n    X, y = get_california_housing()\n    rng = np.random.RandomState(1994)\n    kf = KFold(n_splits=2, shuffle=True, random_state=rng)\n    for train_index, test_index in kf.split(X, y):\n        xgb_model = XGBRFRegressor(\n            random_state=42, tree_method=tree_method, device=device\n        ).fit(X[train_index], y[train_index])\n        preds = xgb_model.predict(X[test_index])\n        labels = y[test_index]\n        assert mean_squared_error(preds, labels) < 35\n\n    rfreg = XGBRFRegressor(device=device)\n    with pytest.raises(NotImplementedError):\n        rfreg.set_params(early_stopping_rounds=10)\n        rfreg.fit(X, y)\n\n\ndef run_recoding(device: Device) -> None:\n    \"\"\"Test re-coding for training continuation.\"\"\"\n    enc, reenc, y, _, _ = make_recoded(device, n_features=16)\n    reg = XGBRegressor(enable_categorical=True, n_estimators=2, device=device)\n    reg.fit(enc, y, eval_set=[(reenc, y)])\n    results_0 = reg.evals_result()\n\n    booster = reg.get_booster()\n    assert not booster.get_categories().empty()\n\n    reg = XGBRegressor(enable_categorical=True, n_estimators=2, device=device)\n    reg.fit(reenc, y, xgb_model=booster, eval_set=[(enc, y)])\n    results_1 = reg.evals_result()\n\n    booster = reg.get_booster()\n    assert booster.num_boosted_rounds() == 4\n    assert not booster.get_categories().empty()\n\n    reg = XGBRegressor(enable_categorical=True, n_estimators=4, device=device)\n    reg.fit(enc, y, eval_set=[(reenc, y)])\n    results_2 = reg.evals_result()\n\n    np.testing.assert_allclose(\n        results_2[\"validation_0\"][\"rmse\"],\n        results_0[\"validation_0\"][\"rmse\"] + results_1[\"validation_0\"][\"rmse\"],\n    )\n\n    np.testing.assert_allclose(reg.predict(reenc), reg.predict(enc))\n    np.testing.assert_allclose(reg.apply(reenc), reg.apply(enc))\n\n\ndef run_intercept(device: Device) -> None:\n    \"\"\"Tests for the intercept.\"\"\"\n    from sklearn.datasets import make_classification, make_multilabel_classification\n\n    X, y, w = [v[0] for v in make_batches(256, 3, 1, use_cupy=False)]\n    reg = XGBRegressor(device=device)\n    reg.fit(X, y, sample_weight=w)\n    result = reg.intercept_\n    assert result.dtype == np.float32\n    assert result[0] < 0.5\n\n    reg = XGBRegressor(booster=\"gblinear\", device=device)\n    reg.fit(X, y, sample_weight=w)\n    result = reg.intercept_\n    assert isinstance(result, np.ndarray)\n    assert result.dtype == np.float32\n    assert result[0] < 0.5\n\n    n_classes = 4\n    X, y = make_classification(\n        random_state=1994,\n        n_samples=128,\n        n_features=16,\n        n_classes=n_classes,\n        n_informative=16,\n        n_redundant=0,\n    )\n\n    clf = XGBClassifier(booster=\"gbtree\", objective=\"multi:softprob\", device=device)\n    clf.fit(X, y)\n    result = clf.intercept_\n    assert isinstance(result, np.ndarray)\n    assert len(result) == 4\n\n    assert (softmax(result) >= 0.0).all()\n    np.testing.assert_allclose(sum(result), 0.0, atol=1e-6)\n    np.testing.assert_allclose(sum(softmax(result)), 1.0)\n\n    # Tests for user input\n    # Multi-class\n    intercept = np.ones(shape=(n_classes), dtype=np.float32) / n_classes\n    if device == \"cuda\":\n        import cupy as cp\n\n        intercept = cp.array(intercept)\n\n    clf = XGBClassifier(objective=\"multi:softprob\", base_score=intercept)\n    clf.fit(X, y)\n    assert_allclose(device, intercept, clf.intercept_)\n\n    X, y = make_multilabel_classification(  # pylint: disable=unbalanced-tuple-unpacking\n        random_state=1994, n_samples=128, n_features=16, n_classes=n_classes\n    )\n\n    # Multi-label\n    intercept = np.ones(shape=(n_classes), dtype=np.float32) / 2\n    if device == \"cuda\":\n        import cupy as cp\n\n        intercept = cp.array(intercept)\n\n    clf = XGBClassifier(base_score=intercept)\n    clf.fit(X, y)\n    assert_allclose(device, intercept, clf.intercept_)\n    assert clf.objective == \"binary:logistic\"\n"
  },
  {
    "path": "python-package/xgboost/tracker.py",
    "content": "\"\"\"Tracker for XGBoost collective.\"\"\"\n\nimport ctypes\nimport json\nimport socket\nfrom enum import IntEnum, unique\nfrom typing import Dict, Optional, Union\n\nfrom .core import _LIB, _check_call, _deprecate_positional_args, make_jcargs\n\n\ndef get_family(addr: str) -> int:\n    \"\"\"Get network family from address.\"\"\"\n    return socket.getaddrinfo(addr, None)[0][0]\n\n\nclass RabitTracker:\n    \"\"\"Tracker for the collective used in XGBoost, acting as a coordinator between\n    workers.\n\n    Parameters\n    ----------\n\n    n_workers:\n\n        The total number of workers in the communication group.\n\n    host_ip:\n\n        The IP address of the tracker node. XGBoost can try to guess one by probing with\n        sockets. But it's best to explicitly pass an address.\n\n    port:\n\n        The port this tracker should listen to. XGBoost can query an available port from\n        the OS, this configuration is useful for restricted network environments.\n\n    sortby:\n\n        How to sort the workers for rank assignment. The default is host, but users can\n        set the `DMLC_TASK_ID` via arguments of :py:meth:`~xgboost.collective.init` and\n        obtain deterministic rank assignment through sorting by task name. Available\n        options are:\n\n          - host\n          - task\n\n    timeout :\n\n        Timeout for constructing (bootstrap) and shutting down the communication group,\n        doesn't apply to communication when the group is up and running.\n\n        The timeout value should take the time of data loading and pre-processing into\n        account, due to potential lazy execution. By default the Tracker doesn't have\n        any timeout to avoid pre-mature aborting.\n\n        The :py:meth:`.wait_for` method has a different timeout parameter that can stop\n        the tracker even if the tracker is still being used. A value error is raised\n        when timeout is reached.\n\n    Examples\n    --------\n\n    .. code-block:: python\n\n        from xgboost.tracker import RabitTracker\n        from xgboost import collective as coll\n\n        tracker = RabitTracker(host_ip=\"127.0.0.1\", n_workers=2)\n        tracker.start()\n\n        with coll.CommunicatorContext(**tracker.worker_args()):\n            ret = coll.broadcast(\"msg\", 0)\n            assert str(ret) == \"msg\"\n\n    \"\"\"\n\n    @unique\n    class _SortBy(IntEnum):\n        HOST = 0\n        TASK = 1\n\n    @_deprecate_positional_args\n    def __init__(  # pylint: disable=too-many-arguments\n        self,\n        n_workers: int,\n        host_ip: Optional[str],\n        port: int = 0,\n        *,\n        sortby: str = \"host\",\n        timeout: int = 0,\n    ) -> None:\n\n        handle = ctypes.c_void_p()\n        if sortby not in (\"host\", \"task\"):\n            raise ValueError(\"Expecting either 'host' or 'task' for sortby.\")\n        if host_ip is not None:\n            get_family(host_ip)  # use python socket to stop early for invalid address\n        args = make_jcargs(\n            host=host_ip,\n            n_workers=n_workers,\n            port=port,\n            dmlc_communicator=\"rabit\",\n            sortby=self._SortBy.HOST if sortby == \"host\" else self._SortBy.TASK,\n            timeout=int(timeout),\n        )\n        _check_call(_LIB.XGTrackerCreate(args, ctypes.byref(handle)))\n        self.handle = handle\n\n    def free(self) -> None:\n        \"\"\"Internal function for testing.\"\"\"\n        if hasattr(self, \"handle\"):\n            handle = self.handle\n            del self.handle\n            _check_call(_LIB.XGTrackerFree(handle))\n\n    def __del__(self) -> None:\n        self.free()\n\n    def start(self) -> None:\n        \"\"\"Start the tracker. Once started, the client still need to call the\n        :py:meth:`wait_for` method in order to wait for it to finish (think of it as a\n        thread).\n\n        \"\"\"\n        _check_call(_LIB.XGTrackerRun(self.handle, make_jcargs()))\n\n    def wait_for(self, timeout: Optional[int] = None) -> None:\n        \"\"\"Wait for the tracker to finish all the work and shutdown. When timeout is\n        reached, a value error is raised. By default we don't have timeout since we\n        don't know how long it takes for the model to finish training.\n\n        \"\"\"\n        _check_call(_LIB.XGTrackerWaitFor(self.handle, make_jcargs(timeout=timeout)))\n\n    def worker_args(self) -> Dict[str, Union[str, int]]:\n        \"\"\"Get arguments for workers.\"\"\"\n        c_env = ctypes.c_char_p()\n        _check_call(_LIB.XGTrackerWorkerArgs(self.handle, ctypes.byref(c_env)))\n        assert c_env.value is not None\n        env = json.loads(c_env.value)\n        return env\n"
  },
  {
    "path": "python-package/xgboost/training.py",
    "content": "# pylint: disable=too-many-locals, too-many-arguments\n# pylint: disable=too-many-branches, too-many-statements\n\"\"\"Training Library containing training routines.\"\"\"\n\nimport copy\nimport os\nimport weakref\nfrom typing import (\n    TYPE_CHECKING,\n    Any,\n    Dict,\n    Iterable,\n    List,\n    Optional,\n    Sequence,\n    Tuple,\n    Union,\n    cast,\n)\n\nimport numpy as np\n\nfrom ._typing import BoosterParam, Callable, FPreProcCallable\nfrom .callback import (\n    CallbackContainer,\n    EarlyStopping,\n    EvaluationMonitor,\n    TrainingCallback,\n)\nfrom .compat import SKLEARN_INSTALLED, XGBStratifiedKFold\nfrom .core import (\n    Booster,\n    DMatrix,\n    Metric,\n    PlainObj,\n    XGBoostError,\n    _deprecate_positional_args,\n    _RefMixIn,\n)\n\nif TYPE_CHECKING:\n    from pandas import DataFrame as PdDataFrame\n\n_CVFolds = Sequence[\"CVPack\"]\n\n_RefError = (\n    \"Training dataset should be used as a reference when constructing the \"\n    \"`QuantileDMatrix` for evaluation.\",\n)\n\n\n@_deprecate_positional_args\ndef train(\n    params: Dict[str, Any],\n    dtrain: DMatrix,\n    num_boost_round: int = 10,\n    *,\n    evals: Optional[Sequence[Tuple[DMatrix, str]]] = None,\n    obj: Optional[PlainObj] = None,\n    maximize: Optional[bool] = None,\n    early_stopping_rounds: Optional[int] = None,\n    evals_result: Optional[TrainingCallback.EvalsLog] = None,\n    verbose_eval: Optional[Union[bool, int]] = True,\n    xgb_model: Optional[Union[str, os.PathLike, Booster, bytearray]] = None,\n    callbacks: Optional[Sequence[TrainingCallback]] = None,\n    custom_metric: Optional[Metric] = None,\n) -> Booster:\n    \"\"\"Train a booster with given parameters.\n\n    Parameters\n    ----------\n    params :\n        Booster params.\n    dtrain :\n        Data to be trained.\n    num_boost_round :\n        Number of boosting iterations.\n    evals :\n        List of validation sets for which metrics will evaluated during training.\n        Validation metrics will help us track the performance of the model.\n    obj\n        Custom objective function.  See :doc:`Custom Objective\n        </tutorials/custom_metric_obj>` for details.\n    maximize :\n        Whether to maximize custom_metric.\n\n    early_stopping_rounds :\n\n        Activates early stopping. Validation metric needs to improve at least once in\n        every **early_stopping_rounds** round(s) to continue training.\n\n        Requires at least one item in **evals**.\n\n        The method returns the model from the last iteration (not the best one).  Use\n        custom callback :py:class:`~xgboost.callback.EarlyStopping` or :py:meth:`model\n        slicing <xgboost.Booster.__getitem__>` if the best model is desired.  If there's\n        more than one item in **evals**, the last entry will be used for early stopping.\n\n        If there's more than one metric in the **eval_metric** parameter given in\n        **params**, the last metric will be used for early stopping.\n\n        If early stopping occurs, the model will have two additional fields:\n        ``bst.best_score``, ``bst.best_iteration``.\n\n    evals_result :\n        This dictionary stores the evaluation results of all the items in watchlist.\n\n        Example: with a watchlist containing\n        ``[(dtest,'eval'), (dtrain,'train')]`` and\n        a parameter containing ``('eval_metric': 'logloss')``,\n        the **evals_result** returns\n\n        .. code-block:: python\n\n            {'train': {'logloss': ['0.48253', '0.35953']},\n             'eval': {'logloss': ['0.480385', '0.357756']}}\n\n    verbose_eval :\n        Requires at least one item in **evals**.\n\n        If **verbose_eval** is True then the evaluation metric on the validation set is\n        printed at each boosting stage.\n\n        If **verbose_eval** is an integer then the evaluation metric on the validation\n        set is printed at every given **verbose_eval** boosting stage. The last boosting\n        stage / the boosting stage found by using **early_stopping_rounds** is also\n        printed.\n\n        Example: with ``verbose_eval=4`` and at least one item in **evals**, an\n        evaluation metric is printed every 4 boosting stages, instead of every boosting\n        stage.\n\n    xgb_model :\n        Xgb model to be loaded before training (allows training continuation).\n\n    callbacks :\n        List of callback functions that are applied at end of each iteration.\n        It is possible to use predefined callbacks by using\n        :ref:`Callback API <callback_api>`.\n\n        .. note::\n\n           States in callback are not preserved during training, which means callback\n           objects can not be reused for multiple training sessions without\n           reinitialization or deepcopy.\n\n        .. code-block:: python\n\n            for params in parameters_grid:\n                # be sure to (re)initialize the callbacks before each run\n                callbacks = [xgb.callback.LearningRateScheduler(custom_rates)]\n                xgboost.train(params, Xy, callbacks=callbacks)\n\n    custom_metric:\n\n        .. versionadded 1.6.0\n\n        Custom metric function.  See :doc:`Custom Metric </tutorials/custom_metric_obj>`\n        for details. The metric receives transformed prediction (after applying the\n        reverse link function) when using a builtin objective, and raw output when using\n        a custom objective.\n\n    Returns\n    -------\n    Booster : a trained booster model\n\n    \"\"\"\n\n    callbacks = [] if callbacks is None else copy.copy(list(callbacks))\n    evals = list(evals) if evals else []\n\n    for va, _ in evals:\n        if not isinstance(va, DMatrix):\n            raise TypeError(\"Invalid type for the `evals`.\")\n\n        if (\n            isinstance(va, _RefMixIn)\n            and va.ref is not weakref.ref(dtrain)\n            and va is not dtrain\n        ):\n            raise ValueError(_RefError)\n\n    bst = Booster(params, [dtrain] + [d[0] for d in evals], model_file=xgb_model)\n    start_iteration = 0\n\n    if verbose_eval:\n        verbose_eval = 1 if verbose_eval is True else verbose_eval\n        callbacks.append(EvaluationMonitor(period=verbose_eval))\n    if early_stopping_rounds:\n        callbacks.append(EarlyStopping(rounds=early_stopping_rounds, maximize=maximize))\n    cb_container = CallbackContainer(\n        callbacks, metric=custom_metric, output_margin=callable(obj)\n    )\n\n    bst = cb_container.before_training(bst)\n\n    for i in range(start_iteration, num_boost_round):\n        if cb_container.before_iteration(bst, i, dtrain, evals):\n            break\n        bst.update(dtrain, iteration=i, fobj=obj)\n        if cb_container.after_iteration(bst, i, dtrain, evals):\n            break\n\n    bst = cb_container.after_training(bst)\n\n    if evals_result is not None:\n        evals_result.update(cb_container.history)\n\n    return bst.reset()\n\n\nclass CVPack:\n    \"\"\" \"Auxiliary datastruct to hold one fold of CV.\"\"\"\n\n    def __init__(\n        self, dtrain: DMatrix, dtest: DMatrix, param: Optional[Union[Dict, List]]\n    ) -> None:\n        \"\"\"Initialize the CVPack.\"\"\"\n        self.dtrain = dtrain\n        self.dtest = dtest\n        self.watchlist = [(dtrain, \"train\"), (dtest, \"test\")]\n        self.bst = Booster(param, [dtrain, dtest])\n\n    def __getattr__(self, name: str) -> Callable:\n        def _inner(*args: Any, **kwargs: Any) -> Any:\n            return getattr(self.bst, name)(*args, **kwargs)\n\n        return _inner\n\n    def update(self, iteration: int, fobj: Optional[PlainObj]) -> None:\n        \"\"\" \"Update the boosters for one iteration\"\"\"\n        self.bst.update(self.dtrain, iteration, fobj)\n\n    def eval(self, iteration: int, feval: Optional[Metric], output_margin: bool) -> str:\n        \"\"\" \"Evaluate the CVPack for one iteration.\"\"\"\n        return self.bst.eval_set(self.watchlist, iteration, feval, output_margin)\n\n\nclass _PackedBooster:\n    def __init__(self, cvfolds: _CVFolds) -> None:\n        self.cvfolds = cvfolds\n\n    def update(self, iteration: int, obj: Optional[PlainObj]) -> None:\n        \"\"\"Iterate through folds for update\"\"\"\n        for fold in self.cvfolds:\n            fold.update(iteration, obj)\n\n    def eval(\n        self, iteration: int, feval: Optional[Metric], output_margin: bool\n    ) -> List[str]:\n        \"\"\"Iterate through folds for eval\"\"\"\n        result = [f.eval(iteration, feval, output_margin) for f in self.cvfolds]\n        return result\n\n    def set_attr(self, **kwargs: Optional[Any]) -> Any:\n        \"\"\"Iterate through folds for setting attributes\"\"\"\n        for f in self.cvfolds:\n            f.bst.set_attr(**kwargs)\n\n    def attr(self, key: str) -> Optional[str]:\n        \"\"\"Redirect to booster attr.\"\"\"\n        return self.cvfolds[0].bst.attr(key)\n\n    def set_param(\n        self,\n        params: Union[Dict, Iterable[Tuple[str, Any]], str],\n        value: Optional[str] = None,\n    ) -> None:\n        \"\"\"Iterate through folds for set_param\"\"\"\n        for f in self.cvfolds:\n            f.bst.set_param(params, value)\n\n    def num_boosted_rounds(self) -> int:\n        \"\"\"Number of boosted rounds.\"\"\"\n        return self.cvfolds[0].num_boosted_rounds()\n\n    @property\n    def best_iteration(self) -> int:\n        \"\"\"Get best_iteration\"\"\"\n        return int(cast(int, self.cvfolds[0].bst.attr(\"best_iteration\")))\n\n    @best_iteration.setter\n    def best_iteration(self, iteration: int) -> None:\n        \"\"\"Get best_iteration\"\"\"\n        self.set_attr(best_iteration=iteration)\n\n    @property\n    def best_score(self) -> float:\n        \"\"\"Get best_score.\"\"\"\n        return float(cast(float, self.cvfolds[0].bst.attr(\"best_score\")))\n\n    @best_score.setter\n    def best_score(self, score: float) -> None:\n        self.set_attr(best_score=score)\n\n\ndef groups_to_rows(groups: np.ndarray, boundaries: np.ndarray) -> np.ndarray:\n    \"\"\"\n    Given group row boundaries, convert ground indexes to row indexes\n    :param groups: list of groups for testing\n    :param boundaries: rows index limits of each group\n    :return: row in group\n    \"\"\"\n    return np.concatenate([np.arange(boundaries[g], boundaries[g + 1]) for g in groups])\n\n\ndef mkgroupfold(\n    *,\n    dall: DMatrix,\n    nfold: int,\n    param: BoosterParam,\n    evals: Sequence[str] = (),\n    fpreproc: Optional[FPreProcCallable] = None,\n    shuffle: bool = True,\n) -> List[CVPack]:\n    \"\"\"\n    Make n folds for cross-validation maintaining groups\n    :return: cross-validation folds\n    \"\"\"\n    # we have groups for pairwise ranking... get a list of the group indexes\n    group_boundaries = dall.get_uint_info(\"group_ptr\")\n    group_sizes = np.diff(group_boundaries)\n\n    if shuffle is True:\n        idx = np.random.permutation(len(group_sizes))\n    else:\n        idx = np.arange(len(group_sizes))\n    # list by fold of test group indexes\n    out_group_idset = np.array_split(idx, nfold)\n    # list by fold of train group indexes\n    in_group_idset = [\n        np.concatenate([out_group_idset[i] for i in range(nfold) if k != i])\n        for k in range(nfold)\n    ]\n    # from the group indexes, convert them to row indexes\n    in_idset = [\n        groups_to_rows(in_groups, group_boundaries) for in_groups in in_group_idset\n    ]\n    out_idset = [\n        groups_to_rows(out_groups, group_boundaries) for out_groups in out_group_idset\n    ]\n\n    # build the folds by taking the appropriate slices\n    ret = []\n    for k in range(nfold):\n        # perform the slicing using the indexes determined by the above methods\n        dtrain = dall.slice(in_idset[k], allow_groups=True)\n        dtrain.set_group(group_sizes[in_group_idset[k]])\n        dtest = dall.slice(out_idset[k], allow_groups=True)\n        dtest.set_group(group_sizes[out_group_idset[k]])\n        # run preprocessing on the data set if needed\n        if fpreproc is not None:\n            dtrain, dtest, tparam = fpreproc(dtrain, dtest, param.copy())\n        else:\n            tparam = param\n        plst = list(tparam.items()) + [(\"eval_metric\", itm) for itm in evals]\n        ret.append(CVPack(dtrain, dtest, plst))\n    return ret\n\n\ndef mknfold(\n    *,\n    dall: DMatrix,\n    nfold: int,\n    param: BoosterParam,\n    seed: int,\n    evals: Sequence[str] = (),\n    fpreproc: Optional[FPreProcCallable] = None,\n    stratified: Optional[bool] = False,\n    folds: Optional[XGBStratifiedKFold] = None,\n    shuffle: bool = True,\n) -> List[CVPack]:\n    \"\"\"\n    Make an n-fold list of CVPack from random indices.\n    \"\"\"\n    evals = list(evals)\n    np.random.seed(seed)\n\n    if stratified is False and folds is None:\n        # Do standard k-fold cross validation. Automatically determine the folds.\n        if len(dall.get_uint_info(\"group_ptr\")) > 1:\n            return mkgroupfold(\n                dall=dall,\n                nfold=nfold,\n                param=param,\n                evals=evals,\n                fpreproc=fpreproc,\n                shuffle=shuffle,\n            )\n\n        if shuffle is True:\n            idx = np.random.permutation(dall.num_row())\n        else:\n            idx = np.arange(dall.num_row())\n        out_idset = np.array_split(idx, nfold)\n        in_idset = [\n            np.concatenate([out_idset[i] for i in range(nfold) if k != i])\n            for k in range(nfold)\n        ]\n    elif folds is not None:\n        # Use user specified custom split using indices\n        try:\n            in_idset = [x[0] for x in folds]\n            out_idset = [x[1] for x in folds]\n        except TypeError:\n            # Custom stratification using Sklearn KFoldSplit object\n            splits = list(folds.split(X=dall.get_label(), y=dall.get_label()))\n            in_idset = [x[0] for x in splits]\n            out_idset = [x[1] for x in splits]\n        nfold = len(out_idset)\n    else:\n        # Do standard stratefied shuffle k-fold split\n        sfk = XGBStratifiedKFold(n_splits=nfold, shuffle=True, random_state=seed)\n        splits = list(sfk.split(X=dall.get_label(), y=dall.get_label()))\n        in_idset = [x[0] for x in splits]\n        out_idset = [x[1] for x in splits]\n        nfold = len(out_idset)\n\n    ret = []\n    for k in range(nfold):\n        # perform the slicing using the indexes determined by the above methods\n        dtrain = dall.slice(in_idset[k])\n        dtest = dall.slice(out_idset[k])\n        # run preprocessing on the data set if needed\n        if fpreproc is not None:\n            dtrain, dtest, tparam = fpreproc(dtrain, dtest, param.copy())\n        else:\n            tparam = param\n        plst = list(tparam.items()) + [(\"eval_metric\", itm) for itm in evals]\n        ret.append(CVPack(dtrain, dtest, plst))\n    return ret\n\n\n@_deprecate_positional_args\ndef cv(\n    params: BoosterParam,\n    dtrain: DMatrix,\n    num_boost_round: int = 10,\n    *,\n    nfold: int = 3,\n    stratified: bool = False,\n    folds: XGBStratifiedKFold = None,\n    metrics: Sequence[str] = (),\n    obj: Optional[PlainObj] = None,\n    maximize: Optional[bool] = None,\n    early_stopping_rounds: Optional[int] = None,\n    fpreproc: Optional[FPreProcCallable] = None,\n    as_pandas: bool = True,\n    verbose_eval: Optional[Union[int, bool]] = None,\n    show_stdv: bool = True,\n    seed: int = 0,\n    callbacks: Optional[Sequence[TrainingCallback]] = None,\n    shuffle: bool = True,\n    custom_metric: Optional[Metric] = None,\n) -> Union[Dict[str, float], \"PdDataFrame\"]:\n    \"\"\"Cross-validation with given parameters.\n\n    Parameters\n    ----------\n    params : dict\n        Booster params.\n    dtrain :\n        Data to be trained. Only the :py:class:`DMatrix` without external memory is\n        supported.\n    num_boost_round :\n        Number of boosting iterations.\n    nfold : int\n        Number of folds in CV.\n    stratified : bool\n        Perform stratified sampling.\n    folds : a KFold or StratifiedKFold instance or list of fold indices\n        Sklearn KFolds or StratifiedKFolds object.\n        Alternatively may explicitly pass sample indices for each fold.\n        For ``n`` folds, **folds** should be a length ``n`` list of tuples.\n        Each tuple is ``(in,out)`` where ``in`` is a list of indices to be used\n        as the training samples for the ``n`` th fold and ``out`` is a list of\n        indices to be used as the testing samples for the ``n`` th fold.\n    metrics : string or list of strings\n        Evaluation metrics to be watched in CV.\n    obj :\n\n        Custom objective function.  See :doc:`Custom Objective\n        </tutorials/custom_metric_obj>` for details.\n\n    maximize : bool\n        Whether to maximize the evaluataion metric (score or error).\n\n    early_stopping_rounds: int\n        Activates early stopping. Cross-Validation metric (average of validation\n        metric computed over CV folds) needs to improve at least once in\n        every **early_stopping_rounds** round(s) to continue training.\n        The last entry in the evaluation history will represent the best iteration.\n        If there's more than one metric in the **eval_metric** parameter given in\n        **params**, the last metric will be used for early stopping.\n    fpreproc : function\n        Preprocessing function that takes (dtrain, dtest, param) and returns\n        transformed versions of those.\n    as_pandas : bool, default True\n        Return pd.DataFrame when pandas is installed.\n        If False or pandas is not installed, return np.ndarray\n    verbose_eval : bool, int, or None, default None\n        Whether to display the progress. If None, progress will be displayed\n        when np.ndarray is returned. If True, progress will be displayed at\n        boosting stage. If an integer is given, progress will be displayed\n        at every given `verbose_eval` boosting stage.\n    show_stdv : bool, default True\n        Whether to display the standard deviation in progress.\n        Results are not affected, and always contains std.\n    seed : int\n        Seed used to generate the folds (passed to numpy.random.seed).\n    callbacks :\n        List of callback functions that are applied at end of each iteration.\n        It is possible to use predefined callbacks by using\n        :ref:`Callback API <callback_api>`.\n\n        .. note::\n\n           States in callback are not preserved during training, which means callback\n           objects can not be reused for multiple training sessions without\n           reinitialization or deepcopy.\n\n        .. code-block:: python\n\n            for params in parameters_grid:\n                # be sure to (re)initialize the callbacks before each run\n                callbacks = [xgb.callback.LearningRateScheduler(custom_rates)]\n                xgboost.train(params, Xy, callbacks=callbacks)\n\n    shuffle : bool\n        Shuffle data before creating folds.\n    custom_metric :\n\n        .. versionadded 1.6.0\n\n        Custom metric function.  See :doc:`Custom Metric </tutorials/custom_metric_obj>`\n        for details.\n\n    Returns\n    -------\n    evaluation history : list(string)\n    \"\"\"\n    if stratified is True and not SKLEARN_INSTALLED:\n        raise XGBoostError(\n            \"sklearn needs to be installed in order to use stratified cv\"\n        )\n    if isinstance(metrics, str):\n        metrics = [metrics]\n    if isinstance(dtrain, _RefMixIn):\n        raise ValueError(\"`QuantileDMatrix` is not yet supported.\")\n\n    params = params.copy()\n    if isinstance(params, list):\n        _metrics = [x[1] for x in params if x[0] == \"eval_metric\"]\n        params = dict(params)\n        if \"eval_metric\" in params:\n            params[\"eval_metric\"] = _metrics\n\n    if (not metrics) and \"eval_metric\" in params:\n        if isinstance(params[\"eval_metric\"], list):\n            metrics = params[\"eval_metric\"]\n        else:\n            metrics = [params[\"eval_metric\"]]\n\n    params.pop(\"eval_metric\", None)\n\n    results: Dict[str, List[float]] = {}\n    cvfolds = mknfold(\n        dall=dtrain,\n        nfold=nfold,\n        param=params,\n        seed=seed,\n        evals=metrics,\n        fpreproc=fpreproc,\n        stratified=stratified,\n        folds=folds,\n        shuffle=shuffle,\n    )\n\n    # setup callbacks\n    callbacks = [] if callbacks is None else copy.copy(list(callbacks))\n\n    if verbose_eval:\n        verbose_eval = 1 if verbose_eval is True else verbose_eval\n        callbacks.append(EvaluationMonitor(period=verbose_eval, show_stdv=show_stdv))\n    if early_stopping_rounds:\n        callbacks.append(EarlyStopping(rounds=early_stopping_rounds, maximize=maximize))\n    callbacks_container = CallbackContainer(\n        callbacks, metric=custom_metric, is_cv=True, output_margin=callable(obj)\n    )\n\n    booster = _PackedBooster(cvfolds)\n    callbacks_container.before_training(booster)\n\n    for i in range(num_boost_round):\n        if callbacks_container.before_iteration(booster, i, dtrain, None):\n            break\n        booster.update(i, obj)\n\n        should_break = callbacks_container.after_iteration(booster, i, dtrain, None)\n        res = callbacks_container.aggregated_cv\n        for key, mean, std in cast(List[Tuple[str, float, float]], res):\n            if key + \"-mean\" not in results:\n                results[key + \"-mean\"] = []\n            if key + \"-std\" not in results:\n                results[key + \"-std\"] = []\n            results[key + \"-mean\"].append(mean)\n            results[key + \"-std\"].append(std)\n\n        if should_break:\n            for k in results.keys():  # pylint: disable=consider-iterating-dictionary\n                results[k] = results[k][: (booster.best_iteration + 1)]\n            break\n    if as_pandas:\n        try:\n            import pandas as pd\n\n            results = pd.DataFrame.from_dict(results)\n        except ImportError:\n            pass\n\n    callbacks_container.after_training(booster)\n\n    return results\n"
  },
  {
    "path": "src/CMakeLists.txt",
    "content": "file(GLOB_RECURSE CPU_SOURCES *.cc *.h)\nlist(REMOVE_ITEM CPU_SOURCES ${xgboost_SOURCE_DIR}/src/cli_main.cc)\n\nif(PLUGIN_SYCL)\n  list(REMOVE_ITEM CPU_SOURCES ${xgboost_SOURCE_DIR}/src/objective/regression_obj.cc)\n  list(REMOVE_ITEM CPU_SOURCES ${xgboost_SOURCE_DIR}/src/objective/hinge.cc)\n  list(REMOVE_ITEM CPU_SOURCES ${xgboost_SOURCE_DIR}/src/objective/quantile_obj.cc)\n  list(REMOVE_ITEM CPU_SOURCES ${xgboost_SOURCE_DIR}/src/objective/multiclass_obj.cc)\nendif()\n\n#-- Object library\n# Object library is necessary for jvm-package, which creates its own shared library.\nadd_library(objxgboost OBJECT)\ntarget_sources(objxgboost PRIVATE ${CPU_SOURCES})\n# Skip files with factory object\nset_source_files_properties(\n  predictor/predictor.cc gbm/gbm.cc tree/tree_updater.cc metric/metric.cc objective/objective.cc\n  PROPERTIES SKIP_UNITY_BUILD_INCLUSION ON)\n\nif(USE_CUDA)\n  file(GLOB_RECURSE CUDA_SOURCES *.cu)\n  target_sources(objxgboost PRIVATE ${CUDA_SOURCES})\nendif()\n\nif(PLUGIN_SYCL)\n  target_compile_definitions(objxgboost PRIVATE -DXGBOOST_USE_SYCL=1)\nendif()\n\ntarget_include_directories(objxgboost\n  PRIVATE\n  ${xgboost_SOURCE_DIR}/include\n  ${xgboost_SOURCE_DIR}/dmlc-core/include)\n\nif(LOG_CAPI_INVOCATION)\n  target_compile_definitions(objxgboost PRIVATE -DLOG_CAPI_INVOCATION=1)\nendif()\n\n# This grouping organises source files nicely in visual studio\nauto_source_group(\"${CUDA_SOURCES}\")\nauto_source_group(\"${CPU_SOURCES}\")\n\n#-- End object library\n"
  },
  {
    "path": "src/c_api/c_api.cc",
    "content": "/**\n * Copyright 2014-2025, XGBoost Contributors\n */\n#include \"xgboost/c_api.h\"\n\n#include <algorithm>     // for copy, transform\n#include <cinttypes>     // for strtoimax\n#include <cmath>         // for nan\n#include <cstring>       // for strcmp\n#include <limits>        // for numeric_limits\n#include <map>           // for operator!=, _Rb_tree_const_iterator, _Rb_tre...\n#include <memory>        // for shared_ptr, allocator, __shared_ptr_access\n#include <string>        // for char_traits, basic_string, operator==, string\n#include <system_error>  // for errc\n#include <utility>       // for pair\n#include <vector>        // for vector\n\n#include \"../common/api_entry.h\"         // for XGBAPIThreadLocalEntry\n#include \"../common/charconv.h\"          // for from_chars, to_chars, NumericLimits, from_ch...\n#include \"../common/cuda_rt_utils.h\"     // for MemoryPoolsSupported\n#include \"../common/error_msg.h\"         // for NoFederated\n#include \"../common/hist_util.h\"         // for HistogramCuts\n#include \"../common/io.h\"                // for FileExtension, LoadSequentialFile, MemoryBuf...\n#include \"../common/threading_utils.h\"   // for OmpGetNumThreads, ParallelFor\n#include \"../data/adapter.h\"             // for ArrayAdapter, DenseAdapter\n#include \"../data/batch_utils.h\"         // for MatchingPageBytes, CachePageRatio\n#include \"../data/cat_container.h\"       // for CatContainer\n#include \"../data/ellpack_page.h\"        // for EllpackPage\n#include \"../data/metainfo.h\"            // for DispatchDType\n#include \"../data/proxy_dmatrix.h\"       // for DMatrixProxy\n#include \"../data/simple_dmatrix.h\"      // for SimpleDMatrix\n#include \"../encoder/types.h\"            // for Overloaded\n#include \"c_api_error.h\"                 // for xgboost_CHECK_C_ARG_PTR, API_END, API_BEGIN\n#include \"c_api_utils.h\"                 // for RequiredArg, OptionalArg, GetMissing, CastDM...\n#include \"dmlc/base.h\"                   // for BeginPtr\n#include \"dmlc/io.h\"                     // for Stream\n#include \"dmlc/parameter.h\"              // for FieldAccessEntry, FieldEntry, ParamManager\n#include \"dmlc/thread_local.h\"           // for ThreadLocalStore\n#include \"xgboost/base.h\"                // for bst_ulong, bst_float, GradientPair, bst_feat...\n#include \"xgboost/context.h\"             // for Context\n#include \"xgboost/data.h\"                // for DMatrix, MetaInfo, DataType, ExtSparsePage\n#include \"xgboost/feature_map.h\"         // for FeatureMap\n#include \"xgboost/global_config.h\"       // for GlobalConfiguration, GlobalConfigThreadLocal...\n#include \"xgboost/host_device_vector.h\"  // for HostDeviceVector\n#include \"xgboost/json.h\"                // for Json, get, Integer, IsA, Boolean, String\n#include \"xgboost/learner.h\"             // for Learner, PredictionType\n#include \"xgboost/logging.h\"             // for LOG_FATAL, LogMessageFatal, CHECK, LogCheck_EQ\n#include \"xgboost/predictor.h\"           // for PredictionCacheEntry\n#include \"xgboost/span.h\"                // for Span\n#include \"xgboost/string_view.h\"         // for StringView, operator<<\n#include \"xgboost/version_config.h\"      // for XGBOOST_VER_MAJOR, XGBOOST_VER_MINOR, XGBOOS...\n#include \"xgboost/windefs.h\"             // for xgboost_IS_WIN\n\nusing namespace xgboost;  // NOLINT(*);\n\nXGB_DLL void XGBoostVersion(int *major, int *minor, int *patch) {\n  if (major) {\n    *major = XGBOOST_VER_MAJOR;\n  }\n  if (minor) {\n    *minor = XGBOOST_VER_MINOR;\n  }\n  if (patch) {\n    *patch = XGBOOST_VER_PATCH;\n  }\n}\n\nstatic_assert(DMLC_CXX11_THREAD_LOCAL, \"XGBoost depends on thread-local storage.\");\nusing GlobalConfigAPIThreadLocalStore = dmlc::ThreadLocalStore<XGBAPIThreadLocalEntry>;\n\n#if !defined(XGBOOST_USE_CUDA)\nnamespace xgboost {\nvoid XGBBuildInfoDevice(Json *p_info) {\n  auto &info = *p_info;\n  info[\"USE_CUDA\"] = Boolean{false};\n  info[\"USE_NCCL\"] = Boolean{false};\n  info[\"USE_RMM\"] = Boolean{false};\n  info[\"USE_DLOPEN_NCCL\"] = Boolean{false};\n}\n}  // namespace xgboost\n#endif\n\nXGB_DLL int XGBuildInfo(char const **out) {\n  API_BEGIN_UNGUARD()\n  xgboost_CHECK_C_ARG_PTR(out);\n  Json info{Object{}};\n\n#if defined(XGBOOST_BUILTIN_PREFETCH_PRESENT)\n  info[\"BUILTIN_PREFETCH_PRESENT\"] = Boolean{true};\n#else\n  info[\"BUILTIN_PREFETCH_PRESENT\"] = Boolean{false};\n#endif\n\n#if defined(XGBOOST_MM_PREFETCH_PRESENT)\n  info[\"MM_PREFETCH_PRESENT\"] = Boolean{true};\n#else\n  info[\"MM_PREFETCH_PRESENT\"] = Boolean{false};\n#endif\n\n#if defined(_OPENMP)\n  info[\"USE_OPENMP\"] = Boolean{true};\n#else\n  info[\"USE_OPENMP\"] = Boolean{false};\n#endif\n\n#if defined(__GNUC__) && !defined(__clang__)\n  info[\"GCC_VERSION\"] = std::vector<Json>{Json{Integer{__GNUC__}}, Json{Integer{__GNUC_MINOR__}},\n                                          Json{Integer{__GNUC_PATCHLEVEL__}}};\n#endif\n\n#if defined(__GLIBC__)\n  info[\"GLIBC_VERSION\"] = std::vector<Json>{Json{__GLIBC__}, Json{__GLIBC_MINOR__}};\n#endif  // defined(__GLIBC__)\n\n#if defined(__clang__)\n  info[\"CLANG_VERSION\"] =\n      std::vector<Json>{Json{Integer{__clang_major__}}, Json{Integer{__clang_minor__}},\n                        Json{Integer{__clang_patchlevel__}}};\n#endif\n\n#if !defined(NDEBUG)\n  info[\"DEBUG\"] = Boolean{true};\n#else\n  info[\"DEBUG\"] = Boolean{false};\n#endif\n\n#if defined(XGBOOST_USE_FEDERATED)\n  info[\"USE_FEDERATED\"] = Boolean{true};\n#else\n  info[\"USE_FEDERATED\"] = Boolean{false};\n#endif\n\n#if defined(XGBOOST_GIT_HASH)\n  char const *git_hash = XGBOOST_GIT_HASH;\n  info[\"GIT_HASH\"] = String{git_hash};\n#endif\n\n  XGBBuildInfoDevice(&info);\n\n  auto &out_str = GlobalConfigAPIThreadLocalStore::Get()->ret_str;\n  Json::Dump(info, &out_str);\n  *out = out_str.c_str();\n\n  API_END();\n}\n\nXGB_DLL int XGBRegisterLogCallback(void (*callback)(const char *)) {\n  API_BEGIN_UNGUARD()\n  LogCallbackRegistry *registry = LogCallbackRegistryStore::Get();\n  registry->Register(callback);\n  API_END();\n}\n\nXGB_DLL int XGBSetGlobalConfig(const char *json_str) {\n  API_BEGIN_UNGUARD()\n\n  xgboost_CHECK_C_ARG_PTR(json_str);\n  Json config{Json::Load(StringView{json_str})};\n\n  // handle nthread, it's not a dmlc parameter.\n  auto &obj = get<Object>(config);\n  auto it = obj.find(\"nthread\");\n  if (it != obj.cend()) {\n    auto nthread = OptionalArg<Integer>(config, \"nthread\", Integer::Int{0});\n    if (nthread > 0) {\n      omp_set_num_threads(nthread);\n      GlobalConfigThreadLocalStore::Get()->nthread = nthread;\n    }\n    get<Object>(config).erase(\"nthread\");\n  }\n\n  for (auto &items : obj) {\n    switch (items.second.GetValue().Type()) {\n      case xgboost::Value::ValueKind::kInteger: {\n        items.second = String{std::to_string(get<Integer const>(items.second))};\n        break;\n      }\n      case xgboost::Value::ValueKind::kBoolean: {\n        if (get<Boolean const>(items.second)) {\n          items.second = String{\"true\"};\n        } else {\n          items.second = String{\"false\"};\n        }\n        break;\n      }\n      case xgboost::Value::ValueKind::kNumber: {\n        auto n = get<Number const>(items.second);\n        char chars[NumericLimits<float>::kToCharsSize];\n        auto ec = to_chars(chars, chars + sizeof(chars), n).ec;\n        CHECK(ec == std::errc());\n        items.second = String{chars};\n        break;\n      }\n      default:\n        break;\n    }\n  }\n  auto unknown = FromJson(config, GlobalConfigThreadLocalStore::Get());\n  if (!unknown.empty()) {\n    std::stringstream ss;\n    ss << \"Unknown global parameters: { \";\n    size_t i = 0;\n    for (auto const &item : unknown) {\n      ss << item.first;\n      i++;\n      if (i != unknown.size()) {\n        ss << \", \";\n      }\n    }\n    LOG(FATAL) << ss.str() << \" }\";\n  }\n\n  // Check configuration is valid.\n  bool use_async_pool = GlobalConfigThreadLocalStore::Get()->use_cuda_async_pool;\n#if defined(XGBOOST_USE_RMM)\n  CHECK(!use_async_pool) << \"Cannot enable `use_cuda_async_pool` when compiled with RMM.\";\n#endif  // defined(XGBOOST_USE_RMM)\n#if defined(xgboost_IS_WIN)\n  CHECK(!use_async_pool) << \"Cannot enable `use_cuda_async_pool` on Windows.\";\n#endif  // defined(XGBOOST_USE_RMM)\n  if (use_async_pool && !curt::MemoryPoolsSupported(xgboost::curt::CurrentDevice())) {\n    LOG(FATAL) << \"CUDA async memory pool is not available for the current device.\";\n  }\n\n  API_END();\n}\n\nXGB_DLL int XGBGetGlobalConfig(const char **json_str) {\n  API_BEGIN_UNGUARD()\n  auto const &global_config = *GlobalConfigThreadLocalStore::Get();\n  Json config{ToJson(global_config)};\n  auto const *mgr = global_config.__MANAGER__();\n\n  for (auto &item : get<Object>(config)) {\n    auto const &str = get<String const>(item.second);\n    auto const &name = item.first;\n    auto e = mgr->Find(name);\n    CHECK(e);\n\n    if (dynamic_cast<dmlc::parameter::FieldEntry<int32_t> const *>(e) ||\n        dynamic_cast<dmlc::parameter::FieldEntry<int64_t> const *>(e) ||\n        dynamic_cast<dmlc::parameter::FieldEntry<uint32_t> const *>(e) ||\n        dynamic_cast<dmlc::parameter::FieldEntry<uint64_t> const *>(e)) {\n      auto i = std::strtoimax(str.data(), nullptr, 10);\n      CHECK_LE(i, static_cast<intmax_t>(std::numeric_limits<int64_t>::max()));\n      item.second = Integer(static_cast<int64_t>(i));\n    } else if (dynamic_cast<dmlc::parameter::FieldEntry<float> const *>(e) ||\n               dynamic_cast<dmlc::parameter::FieldEntry<double> const *>(e)) {\n      float f;\n      auto ec = from_chars(str.data(), str.data() + str.size(), f).ec;\n      CHECK(ec == std::errc());\n      item.second = Number(f);\n    } else if (dynamic_cast<dmlc::parameter::FieldEntry<bool> const *>(e)) {\n      item.second = Boolean(str != \"0\");\n    }\n  }\n\n  config[\"nthread\"] = GlobalConfigThreadLocalStore::Get()->nthread;\n  auto &local = *GlobalConfigAPIThreadLocalStore::Get();\n  Json::Dump(config, &local.ret_str);\n\n  xgboost_CHECK_C_ARG_PTR(json_str);\n  *json_str = local.ret_str.c_str();\n  API_END();\n}\n\nXGB_DLL int XGDMatrixCreateFromFile(const char *fname, int silent, DMatrixHandle *out) {\n  xgboost_CHECK_C_ARG_PTR(fname);\n  xgboost_CHECK_C_ARG_PTR(out);\n\n  LOG(WARNING) << error::DeprecatedFunc(__func__, \"2.0.0\", \"XGDMatrixCreateFromURI\");\n\n  Json config{Object()};\n  config[\"uri\"] = std::string{fname};\n  config[\"silent\"] = silent;\n  std::string config_str;\n  Json::Dump(config, &config_str);\n  return XGDMatrixCreateFromURI(config_str.c_str(), out);\n}\n\nXGB_DLL int XGDMatrixCreateFromURI(const char *config, DMatrixHandle *out) {\n  API_BEGIN();\n  xgboost_CHECK_C_ARG_PTR(config);\n  xgboost_CHECK_C_ARG_PTR(out);\n\n  auto jconfig = Json::Load(StringView{config});\n  std::string uri = RequiredArg<String>(jconfig, \"uri\", __func__);\n  auto silent = static_cast<bool>(OptionalArg<Integer, int64_t>(jconfig, \"silent\", 1));\n  auto data_split_mode =\n      static_cast<DataSplitMode>(OptionalArg<Integer, int64_t>(jconfig, \"data_split_mode\", 0));\n\n  *out = new std::shared_ptr<DMatrix>(DMatrix::Load(uri, silent, data_split_mode));\n  API_END();\n}\n\nXGB_DLL int XGDMatrixCreateFromDataIter(\n    void *data_handle,                  // a Java iterator\n    XGBCallbackDataIterNext *callback,  // C++ callback defined in xgboost4j.cpp\n    const char *cache_info, float missing, DMatrixHandle *out) {\n  API_BEGIN();\n\n  std::string scache;\n  if (cache_info != nullptr) {\n    scache = cache_info;\n  }\n  xgboost::data::IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR> adapter(\n      data_handle, callback);\n  xgboost_CHECK_C_ARG_PTR(out);\n  *out = new std::shared_ptr<DMatrix>{DMatrix::Create(&adapter, missing, 1, scache)};\n  API_END();\n}\n\n#ifndef XGBOOST_USE_CUDA\nXGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *, char const *, DMatrixHandle *) {\n  API_BEGIN();\n  common::AssertGPUSupport();\n  API_END();\n}\n\nXGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *, char const *, DMatrixHandle *) {\n  API_BEGIN();\n  common::AssertGPUSupport();\n  API_END();\n}\n\n#endif\n\n// Create from data iterator\nXGB_DLL int XGDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHandle proxy,\n                                        DataIterResetCallback *reset, XGDMatrixCallbackNext *next,\n                                        char const *config, DMatrixHandle *out) {\n  API_BEGIN();\n  xgboost_CHECK_C_ARG_PTR(config);\n\n  auto jconfig = Json::Load(StringView{config});\n  auto missing = GetMissing(jconfig);\n  std::string cache = RequiredArg<String>(jconfig, \"cache_prefix\", __func__);\n  std::int32_t n_threads = OptionalArg<Integer, std::int64_t>(jconfig, \"nthread\", 0);\n  auto on_host = OptionalArg<Boolean>(jconfig, \"on_host\", false);\n  auto min_cache_page_bytes = OptionalArg<Integer, std::int64_t>(jconfig, \"min_cache_page_bytes\",\n                                                                 cuda_impl::MatchingPageBytes());\n  CHECK_EQ(min_cache_page_bytes, cuda_impl::MatchingPageBytes())\n      << \"Page concatenation is not supported by the DMatrix yet.\";\n  auto cache_host_ratio =\n      OptionalArg<Number, float>(jconfig, \"cache_host_ratio\", cuda_impl::AutoHostRatio());\n\n  xgboost_CHECK_C_ARG_PTR(next);\n  xgboost_CHECK_C_ARG_PTR(reset);\n  xgboost_CHECK_C_ARG_PTR(out);\n\n  auto config =\n      ExtMemConfig{cache, on_host, cache_host_ratio, min_cache_page_bytes, missing, n_threads};\n  *out = new std::shared_ptr<xgboost::DMatrix>{\n      xgboost::DMatrix::Create(iter, proxy, reset, next, config)};\n  API_END();\n}\n\nnamespace {\nstd::shared_ptr<DMatrix> GetRefDMatrix(DataIterHandle ref) {\n  std::shared_ptr<DMatrix> _ref{nullptr};\n  if (ref) {\n    auto pp_ref = static_cast<std::shared_ptr<xgboost::DMatrix> *>(ref);\n    StringView err{\"Invalid handle to ref.\"};\n    CHECK(pp_ref) << err;\n    _ref = *pp_ref;\n    CHECK(_ref) << err;\n  }\n  return _ref;\n}\n\nvoid WarnDeprecatedMaxQuantileBlocks(Json const &config) {\n  auto const &obj = get<Object const>(config);\n  auto it = obj.find(\"max_quantile_blocks\");\n  if (it != obj.cend() && !IsA<Null>(it->second)) {\n    LOG(WARNING) << \"`max_quantile_blocks` is deprecated and has no effect. \"\n                    \"The parameter will be removed in a future release.\";\n  }\n}\n}  // namespace\n\nXGB_DLL int XGQuantileDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHandle proxy,\n                                                DataIterHandle ref, DataIterResetCallback *reset,\n                                                XGDMatrixCallbackNext *next, char const *config,\n                                                DMatrixHandle *out) {\n  API_BEGIN();\n  std::shared_ptr<DMatrix> p_ref{GetRefDMatrix(ref)};\n\n  xgboost_CHECK_C_ARG_PTR(config);\n  auto jconfig = Json::Load(StringView{config});\n  WarnDeprecatedMaxQuantileBlocks(jconfig);\n  auto missing = GetMissing(jconfig);\n  auto n_threads = OptionalArg<Integer, int64_t>(jconfig, \"nthread\", 0);\n  auto max_bin = OptionalArg<Integer, int64_t>(jconfig, \"max_bin\", 256);\n\n  xgboost_CHECK_C_ARG_PTR(next);\n  xgboost_CHECK_C_ARG_PTR(reset);\n  xgboost_CHECK_C_ARG_PTR(out);\n\n  *out = new std::shared_ptr<xgboost::DMatrix>{\n      xgboost::DMatrix::Create(iter, proxy, p_ref, reset, next, missing, n_threads, max_bin)};\n  API_END();\n}\n\nXGB_DLL int XGExtMemQuantileDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHandle proxy,\n                                                      DataIterHandle ref,\n                                                      DataIterResetCallback *reset,\n                                                      XGDMatrixCallbackNext *next,\n                                                      char const *config, DMatrixHandle *out) {\n  API_BEGIN();\n  std::shared_ptr<DMatrix> p_ref{GetRefDMatrix(ref)};\n\n  xgboost_CHECK_C_ARG_PTR(config);\n  auto jconfig = Json::Load(StringView{config});\n  WarnDeprecatedMaxQuantileBlocks(jconfig);\n  auto missing = GetMissing(jconfig);\n  std::int32_t n_threads = OptionalArg<Integer, std::int64_t>(jconfig, \"nthread\", 0);\n  auto max_bin = OptionalArg<Integer, std::int64_t>(jconfig, \"max_bin\", 256);\n  auto on_host = OptionalArg<Boolean>(jconfig, \"on_host\", false);\n  std::string cache = RequiredArg<String>(jconfig, \"cache_prefix\", __func__);\n  auto min_cache_page_bytes = OptionalArg<Integer, std::int64_t>(jconfig, \"min_cache_page_bytes\",\n                                                                 cuda_impl::AutoCachePageBytes());\n  auto cache_host_ratio =\n      OptionalArg<Number, float>(jconfig, \"cache_host_ratio\", cuda_impl::AutoHostRatio());\n\n  xgboost_CHECK_C_ARG_PTR(next);\n  xgboost_CHECK_C_ARG_PTR(reset);\n  xgboost_CHECK_C_ARG_PTR(out);\n\n  auto config =\n      ExtMemConfig{cache, on_host, cache_host_ratio, min_cache_page_bytes, missing, n_threads};\n  *out = new std::shared_ptr<xgboost::DMatrix>{\n      xgboost::DMatrix::Create(iter, proxy, p_ref, reset, next, max_bin, config)};\n  API_END();\n}\n\nXGB_DLL int XGProxyDMatrixCreate(DMatrixHandle *out) {\n  API_BEGIN();\n  xgboost_CHECK_C_ARG_PTR(out);\n  *out = new std::shared_ptr<xgboost::DMatrix>(new xgboost::data::DMatrixProxy);\n  API_END();\n}\n\nnamespace {\n[[nodiscard]] xgboost::data::DMatrixProxy *GetDMatrixProxy(DMatrixHandle handle) {\n  auto p_m = static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle);\n  CHECK(p_m);\n  auto m = static_cast<xgboost::data::DMatrixProxy *>(p_m->get());\n  CHECK(m) << \"Current DMatrix type does not support set data.\";\n  return m;\n}\n}  // namespace\n\nXGB_DLL int XGProxyDMatrixSetDataCudaArrayInterface(DMatrixHandle handle, char const *data) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  xgboost_CHECK_C_ARG_PTR(data);\n  GetDMatrixProxy(handle)->SetCudaArray(data);\n  API_END();\n}\n\nXGB_DLL int XGProxyDMatrixSetDataCudaColumnar(DMatrixHandle handle, char const *data) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  xgboost_CHECK_C_ARG_PTR(data);\n  GetDMatrixProxy(handle)->SetCudaColumnar(data);\n  API_END();\n}\n\nXGB_DLL int XGProxyDMatrixSetDataColumnar(DMatrixHandle handle, char const *data) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  xgboost_CHECK_C_ARG_PTR(data);\n  GetDMatrixProxy(handle)->SetColumnar(data);\n  API_END();\n}\n\nXGB_DLL int XGProxyDMatrixSetDataDense(DMatrixHandle handle, char const *data) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  xgboost_CHECK_C_ARG_PTR(data);\n  GetDMatrixProxy(handle)->SetArray(data);\n  API_END();\n}\n\nXGB_DLL int XGProxyDMatrixSetDataCSR(DMatrixHandle handle, char const *indptr, char const *indices,\n                                     char const *data, xgboost::bst_ulong ncol) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  xgboost_CHECK_C_ARG_PTR(indptr);\n  xgboost_CHECK_C_ARG_PTR(indices);\n  xgboost_CHECK_C_ARG_PTR(data);\n  GetDMatrixProxy(handle)->SetCsr(indptr, indices, data, ncol, true);\n  API_END();\n}\n\n// End Create from data iterator\n\nXGB_DLL int XGDMatrixCreateFromColumnar(char const *data, char const *c_json_config,\n                                        DMatrixHandle *out) {\n  API_BEGIN();\n  xgboost_CHECK_C_ARG_PTR(c_json_config);\n  xgboost_CHECK_C_ARG_PTR(data);\n\n  auto config = Json::Load(c_json_config);\n  float missing = GetMissing(config);\n  auto n_threads = OptionalArg<Integer, std::int64_t>(config, \"nthread\", 0);\n  auto data_split_mode =\n      static_cast<DataSplitMode>(OptionalArg<Integer, int64_t>(config, \"data_split_mode\", 0));\n\n  data::ColumnarAdapter adapter{data};\n  *out = new std::shared_ptr<DMatrix>(\n      DMatrix::Create(&adapter, missing, n_threads, \"\", data_split_mode));\n\n  API_END();\n}\n\nXGB_DLL int XGDMatrixCreateFromCSR(char const *indptr, char const *indices, char const *data,\n                                   xgboost::bst_ulong ncol, char const *c_json_config,\n                                   DMatrixHandle *out) {\n  API_BEGIN();\n  xgboost_CHECK_C_ARG_PTR(indptr);\n  xgboost_CHECK_C_ARG_PTR(indices);\n  xgboost_CHECK_C_ARG_PTR(data);\n  data::CSRArrayAdapter adapter(StringView{indptr}, StringView{indices}, StringView{data}, ncol);\n  xgboost_CHECK_C_ARG_PTR(c_json_config);\n  auto config = Json::Load(StringView{c_json_config});\n  float missing = GetMissing(config);\n  auto n_threads = OptionalArg<Integer, int64_t>(config, \"nthread\", 0);\n  auto data_split_mode =\n      static_cast<DataSplitMode>(OptionalArg<Integer, int64_t>(config, \"data_split_mode\", 0));\n  xgboost_CHECK_C_ARG_PTR(out);\n  *out = new std::shared_ptr<DMatrix>(\n      DMatrix::Create(&adapter, missing, n_threads, \"\", data_split_mode));\n  API_END();\n}\n\nXGB_DLL int XGDMatrixCreateFromDense(char const *data, char const *c_json_config,\n                                     DMatrixHandle *out) {\n  API_BEGIN();\n  xgboost_CHECK_C_ARG_PTR(data);\n  xgboost::data::ArrayAdapter adapter{xgboost::data::ArrayAdapter(StringView{data})};\n  xgboost_CHECK_C_ARG_PTR(c_json_config);\n  auto config = Json::Load(StringView{c_json_config});\n  float missing = GetMissing(config);\n  auto n_threads = OptionalArg<Integer, int64_t>(config, \"nthread\", 0);\n  auto data_split_mode =\n      static_cast<DataSplitMode>(OptionalArg<Integer, int64_t>(config, \"data_split_mode\", 0));\n  xgboost_CHECK_C_ARG_PTR(out);\n  *out = new std::shared_ptr<DMatrix>(\n      DMatrix::Create(&adapter, missing, n_threads, \"\", data_split_mode));\n  API_END();\n}\n\nXGB_DLL int XGDMatrixCreateFromCSC(char const *indptr, char const *indices, char const *data,\n                                   xgboost::bst_ulong nrow, char const *c_json_config,\n                                   DMatrixHandle *out) {\n  API_BEGIN();\n  xgboost_CHECK_C_ARG_PTR(indptr);\n  xgboost_CHECK_C_ARG_PTR(indices);\n  xgboost_CHECK_C_ARG_PTR(data);\n  data::CSCArrayAdapter adapter{StringView{indptr}, StringView{indices}, StringView{data},\n                                static_cast<std::size_t>(nrow)};\n  xgboost_CHECK_C_ARG_PTR(c_json_config);\n  auto config = Json::Load(StringView{c_json_config});\n  float missing = GetMissing(config);\n  auto n_threads = OptionalArg<Integer, int64_t>(config, \"nthread\", common::OmpGetNumThreads(0));\n  auto data_split_mode =\n      static_cast<DataSplitMode>(OptionalArg<Integer, int64_t>(config, \"data_split_mode\", 0));\n  xgboost_CHECK_C_ARG_PTR(out);\n  *out = new std::shared_ptr<DMatrix>(\n      DMatrix::Create(&adapter, missing, n_threads, \"\", data_split_mode));\n\n  API_END();\n}\n\nXGB_DLL int XGDMatrixCreateFromMat(const bst_float *data, xgboost::bst_ulong nrow,\n                                   xgboost::bst_ulong ncol, bst_float missing, DMatrixHandle *out) {\n  API_BEGIN();\n  data::DenseAdapter adapter(data, nrow, ncol);\n  xgboost_CHECK_C_ARG_PTR(out);\n  *out = new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, 1));\n  API_END();\n}\n\nXGB_DLL int XGDMatrixCreateFromMat_omp(const bst_float *data,  // NOLINT\n                                       xgboost::bst_ulong nrow, xgboost::bst_ulong ncol,\n                                       bst_float missing, DMatrixHandle *out, int nthread) {\n  API_BEGIN();\n  data::DenseAdapter adapter(data, nrow, ncol);\n  xgboost_CHECK_C_ARG_PTR(out);\n  *out = new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, nthread));\n  API_END();\n}\n\nXGB_DLL int XGDMatrixSliceDMatrix(DMatrixHandle handle, const int *idxset, xgboost::bst_ulong len,\n                                  DMatrixHandle *out) {\n  xgboost_CHECK_C_ARG_PTR(out);\n  return XGDMatrixSliceDMatrixEx(handle, idxset, len, out, 0);\n}\n\nXGB_DLL int XGDMatrixSliceDMatrixEx(DMatrixHandle handle, const int *idxset, xgboost::bst_ulong len,\n                                    DMatrixHandle *out, int allow_groups) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  if (!allow_groups) {\n    CHECK_EQ(static_cast<std::shared_ptr<DMatrix> *>(handle)->get()->Info().group_ptr_.size(), 0U)\n        << \"slice does not support group structure\";\n  }\n  DMatrix *dmat = static_cast<std::shared_ptr<DMatrix> *>(handle)->get();\n  *out = new std::shared_ptr<DMatrix>(dmat->Slice({idxset, static_cast<std::size_t>(len)}));\n  API_END();\n}\n\nXGB_DLL int XGDMatrixFree(DMatrixHandle handle) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  delete static_cast<std::shared_ptr<DMatrix> *>(handle);\n  API_END();\n}\n\nXGB_DLL int XGDMatrixSaveBinary(DMatrixHandle handle, const char *fname, int) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  auto dmat = static_cast<std::shared_ptr<DMatrix> *>(handle)->get();\n  xgboost_CHECK_C_ARG_PTR(fname);\n  if (data::SimpleDMatrix *derived = dynamic_cast<data::SimpleDMatrix *>(dmat)) {\n    derived->SaveToLocalFile(fname);\n  } else {\n    LOG(FATAL) << \"binary saving only supported by SimpleDMatrix\";\n  }\n  API_END();\n}\n\nXGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle, const char *field, const bst_float *info,\n                                  xgboost::bst_ulong len) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  xgboost_CHECK_C_ARG_PTR(field);\n  auto const &p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle);\n  p_fmat->SetInfo(field, linalg::Make1dInterface(info, len));\n  API_END();\n}\n\nXGB_DLL int XGDMatrixSetInfoFromInterface(DMatrixHandle handle, char const *field,\n                                          char const *interface_c_str) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  xgboost_CHECK_C_ARG_PTR(field);\n  auto const &p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle);\n  p_fmat->SetInfo(field, interface_c_str);\n  API_END();\n}\n\nXGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle, const char *field, const unsigned *info,\n                                 xgboost::bst_ulong len) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  xgboost_CHECK_C_ARG_PTR(field);\n  LOG(WARNING) << error::DeprecatedFunc(__func__, \"2.1.0\", \"XGDMatrixSetInfoFromInterface\");\n  auto const &p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle);\n  p_fmat->SetInfo(field, linalg::Make1dInterface(info, len));\n  API_END();\n}\n\nXGB_DLL int XGDMatrixSetStrFeatureInfo(DMatrixHandle handle, const char *field, const char **c_info,\n                                       const xgboost::bst_ulong size) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  auto &info = static_cast<std::shared_ptr<DMatrix> *>(handle)->get()->Info();\n  xgboost_CHECK_C_ARG_PTR(field);\n  info.SetFeatureInfo(field, c_info, size);\n  API_END();\n}\n\nXGB_DLL int XGDMatrixGetStrFeatureInfo(DMatrixHandle handle, const char *field,\n                                       xgboost::bst_ulong *len, const char ***out_features) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  auto m = *static_cast<std::shared_ptr<DMatrix> *>(handle);\n  auto &info = static_cast<std::shared_ptr<DMatrix> *>(handle)->get()->Info();\n\n  std::vector<const char *> &charp_vecs = m->GetThreadLocal().ret_vec_charp;\n  std::vector<std::string> &str_vecs = m->GetThreadLocal().ret_vec_str;\n\n  xgboost_CHECK_C_ARG_PTR(field);\n  info.GetFeatureInfo(field, &str_vecs);\n\n  charp_vecs.resize(str_vecs.size());\n  for (size_t i = 0; i < str_vecs.size(); ++i) {\n    charp_vecs[i] = str_vecs[i].c_str();\n  }\n  xgboost_CHECK_C_ARG_PTR(out_features);\n  xgboost_CHECK_C_ARG_PTR(len);\n  *out_features = dmlc::BeginPtr(charp_vecs);\n  *len = static_cast<xgboost::bst_ulong>(charp_vecs.size());\n  API_END();\n}\n\nnamespace {\ntemplate <typename FidxT>\nvoid GetCategoriesImpl(enc::HostColumnsView const &cats, FidxT n_features,\n                       std::string *p_out_storage, char const **out) {\n  auto &ret_str = *p_out_storage;\n  ret_str.clear();\n\n  // We can directly use the storage in the cat container instead of allocating temporary storage.\n  Json jout{Array{}};\n  for (decltype(n_features) f_idx = 0; f_idx < n_features; ++f_idx) {\n    if (cats.Empty()) {\n      get<Array>(jout).emplace_back();\n      continue;\n    }\n    auto const &col = cats[f_idx];\n    if (std::visit([](auto &&arg) { return arg.empty(); }, col)) {\n      get<Array>(jout).emplace_back();\n      continue;\n    }\n    std::visit(enc::Overloaded{[&](enc::CatStrArrayView const &str) {\n                                 auto const &offsets = str.offsets;\n                                 auto ovec = linalg::MakeVec(offsets.data(), offsets.size());\n                                 auto jovec = linalg::ArrayInterface(ovec);\n\n                                 auto const &values = str.values;\n                                 auto dvec = linalg::MakeVec(values.data(), values.size());\n                                 auto jdvec = linalg::ArrayInterface(dvec);\n\n                                 get<Array>(jout).emplace_back(Object{});\n                                 get<Array>(jout).back()[\"offsets\"] = std::move(jovec);\n                                 get<Array>(jout).back()[\"values\"] = std::move(jdvec);\n                               },\n                               [&](auto &&values) {\n                                 auto vec = linalg::MakeVec(values.data(), values.size());\n                                 auto jvec = linalg::ArrayInterface(vec);\n                                 get<Array>(jout).emplace_back(std::move(jvec));\n                               }},\n               col);\n  }\n  auto str = Json::Dump(jout);\n  ret_str = std::move(str);\n\n  *out = ret_str.c_str();\n}\n\nCatContainer *CopyCatContainer(Context const *ctx, CatContainer const *cats,\n                               bst_feature_t n_features) {\n  CatContainer *new_cats = new CatContainer{};\n  new_cats->Copy(ctx, *cats);\n  CHECK_EQ(new_cats->Empty(), cats->Empty());\n  if (!new_cats->Empty()) {\n    CHECK_EQ(new_cats->NumFeatures(), n_features);\n    CHECK_EQ(new_cats->NumFeatures(), cats->NumFeatures());\n  }\n  return new_cats;\n}\n}  // anonymous namespace\n\n/**\n * No actual container method is exposed through the C API. It's just an opaque handle at\n * the moment. This way we get to reuse the methods and the context from the DMatrix and\n * Booster.\n */\nXGB_DLL int XGDMatrixGetCategories(DMatrixHandle handle, char const * /*config*/,\n                                   CategoriesHandle *out) {\n  API_BEGIN()\n  CHECK_HANDLE()\n\n  auto const p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle);\n  auto const cats = p_fmat->Cats();\n  xgboost_CHECK_C_ARG_PTR(out);\n  if (cats->Empty()) {\n    out = nullptr;\n  } else {\n    auto new_cats = CopyCatContainer(p_fmat->Ctx(), cats, p_fmat->Info().num_col_);\n    *out = new_cats;\n  }\n\n  API_END()\n}\n\nXGB_DLL int XGDMatrixGetCategoriesExportToArrow(DMatrixHandle handle, char const * /*config*/,\n                                                CategoriesHandle *out, char const **export_out) {\n  API_BEGIN();\n  CHECK_HANDLE()\n\n  auto const p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle);\n  auto const cats = p_fmat->Cats();\n  auto n_features = p_fmat->Info().num_col_;\n\n  xgboost_CHECK_C_ARG_PTR(out);\n  xgboost_CHECK_C_ARG_PTR(export_out);\n\n  if (cats->Empty()) {\n    *out = nullptr;\n    *export_out = nullptr;\n  } else {\n    // Create a new container\n    auto new_cats = CopyCatContainer(p_fmat->Ctx(), cats, n_features);\n    *out = new_cats;\n    // Export to arrow\n    auto &ret_str = p_fmat->GetThreadLocal().ret_str;\n    GetCategoriesImpl(new_cats->HostView(), n_features, &ret_str, export_out);\n  }\n\n  API_END();\n}\n\nXGB_DLL int XGBCategoriesFree(CategoriesHandle handle) {\n  API_BEGIN();\n  xgboost_CHECK_C_ARG_PTR(handle);\n  auto p_cats = static_cast<CatContainer *>(handle);\n  CHECK(p_cats);\n  delete p_cats;\n  API_END();\n}\n\nXGB_DLL int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field, void const *data,\n                                  xgboost::bst_ulong size, int type) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  LOG(WARNING) << error::DeprecatedFunc(__func__, \"2.1.0\", \"XGDMatrixSetInfoFromInterface\");\n  CHECK(type >= 1 && type <= 4);\n  xgboost_CHECK_C_ARG_PTR(field);\n\n  Context ctx;\n  auto dtype = static_cast<DataType>(type);\n  auto p_fmat = CastDMatrixHandle(handle);\n\n  // Legacy code using XGBoost dtype, which is a small subset of array interface types.\n  data::DispatchDType(dtype, [&](auto dtype) {\n    using DType = decltype(dtype);\n    auto cast_d_ptr = reinterpret_cast<const DType *>(data);\n    auto t = linalg::TensorView<DType const, 1>(\n        common::Span<DType const>{cast_d_ptr,\n                                  static_cast<typename common::Span<DType>::index_type>(size)},\n        {size}, DeviceOrd::CPU());\n    CHECK(t.CContiguous());\n    Json iface{linalg::ArrayInterface(t)};\n    CHECK(ArrayInterface<1>{iface}.is_contiguous);\n    std::string str = Json::Dump(iface);\n    p_fmat->Info().SetInfo(ctx, field, StringView{str});\n  });\n  API_END();\n}\n\nXGB_DLL int XGDMatrixGetInfoRef(DMatrixHandle handle, char const *field, char const **out_array) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  xgboost_CHECK_C_ARG_PTR(field);\n  xgboost_CHECK_C_ARG_PTR(out_array);\n\n  auto p_fmat = CastDMatrixHandle(handle);\n  MetaInfo const &info = p_fmat->Info();\n  auto aif = info.GetInfo(p_fmat->Ctx(), StringView{field});\n\n  auto &res = p_fmat->GetThreadLocal().ret_str;\n  res = aif.ArrayInterfaceStr();\n  *out_array = res.c_str();\n\n  API_END();\n}\n\nnamespace {\ntemplate <typename T>\nint OldGetInfoImpl(const DMatrixHandle handle, const char *field, xgboost::bst_ulong *out_len,\n                   const T **out_dptr, DataType dtype) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  auto p_fmat = CastDMatrixHandle(handle);\n  const MetaInfo &info = p_fmat->Info();\n\n  xgboost_CHECK_C_ARG_PTR(field);\n  xgboost_CHECK_C_ARG_PTR(out_len);\n  xgboost_CHECK_C_ARG_PTR(out_dptr);\n\n  auto aif = info.GetInfo(p_fmat->Ctx(), StringView{field});\n  CHECK(aif.dtype == dtype) << \"Invalid dtype for the requested field: `\" << field << \"`\";\n  *out_len = aif.Size();\n  *out_dptr = static_cast<T const *>(aif.data);\n  API_END();\n}\n}  // namespace\n\nXGB_DLL int XGDMatrixGetFloatInfo(const DMatrixHandle handle, const char *field,\n                                  xgboost::bst_ulong *out_len, const float **out_dptr) {\n  return OldGetInfoImpl(handle, field, out_len, out_dptr, DataType::kFloat32);\n}\n\nXGB_DLL int XGDMatrixGetUIntInfo(const DMatrixHandle handle, const char *field,\n                                 xgboost::bst_ulong *out_len, const unsigned **out_dptr) {\n  return OldGetInfoImpl(handle, field, out_len, out_dptr, DataType::kUInt32);\n}\n\nnamespace {\n// out is using xgboost::bst_ulong to make sure the defs of bst_ulong match.\ntemplate <typename Fn>\nstd::enable_if_t<std::is_integral_v<std::invoke_result_t<Fn, DMatrix const *>>, int>\nGetDMatrixIntegralInfo(DMatrixHandle handle, xgboost::bst_ulong *out, Fn &&fn) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  auto p_m = CastDMatrixHandle(handle);\n  xgboost_CHECK_C_ARG_PTR(out);\n  *out = fn(p_m.get());\n  API_END();\n}\n}  // namespace\n\nXGB_DLL int XGDMatrixNumRow(DMatrixHandle handle, bst_ulong *out) {\n  return GetDMatrixIntegralInfo(handle, out, [](DMatrix const *p_fmat) {\n    return static_cast<bst_ulong>(p_fmat->Info().num_row_);\n  });\n}\n\nXGB_DLL int XGDMatrixNumCol(DMatrixHandle handle, bst_ulong *out) {\n  return GetDMatrixIntegralInfo(handle, out, [](DMatrix const *p_fmat) {\n    return static_cast<bst_ulong>(p_fmat->Info().num_col_);\n  });\n}\n\n// We name the function non-missing instead of non-zero since zero is perfectly valid for XGBoost.\nXGB_DLL int XGDMatrixNumNonMissing(DMatrixHandle handle, bst_ulong *out) {\n  return GetDMatrixIntegralInfo(handle, out, [](DMatrix const *p_fmat) {\n    return static_cast<bst_ulong>(p_fmat->Info().num_nonzero_);\n  });\n}\n\nXGB_DLL int XGDMatrixDataSplitMode(DMatrixHandle handle, bst_ulong *out) {\n  return GetDMatrixIntegralInfo(handle, out, [](DMatrix const *p_fmat) {\n    return static_cast<bst_ulong>(p_fmat->Info().data_split_mode);\n  });\n}\n\nXGB_DLL int XGDMatrixGetDataAsCSR(DMatrixHandle const handle, char const *config,\n                                  xgboost::bst_ulong *out_indptr, unsigned *out_indices,\n                                  float *out_data) {\n  API_BEGIN();\n  CHECK_HANDLE();\n\n  xgboost_CHECK_C_ARG_PTR(config);\n  auto jconfig = Json::Load(StringView{config});\n\n  auto p_m = CastDMatrixHandle(handle);\n\n  xgboost_CHECK_C_ARG_PTR(out_indptr);\n  xgboost_CHECK_C_ARG_PTR(out_indices);\n  xgboost_CHECK_C_ARG_PTR(out_data);\n\n  CHECK_LE(p_m->Info().num_col_, std::numeric_limits<unsigned>::max());\n\n  for (auto const &page : p_m->GetBatches<ExtSparsePage>(p_m->Ctx(), BatchParam{})) {\n    CHECK(page.page);\n    auto const &h_offset = page.page->offset.ConstHostVector();\n    std::copy(h_offset.cbegin(), h_offset.cend(), out_indptr);\n    auto pv = page.page->GetView();\n    common::ParallelFor(page.page->data.Size(), p_m->Ctx()->Threads(), [&](std::size_t i) {\n      auto fvalue = pv.data[i].fvalue;\n      auto findex = pv.data[i].index;\n      out_data[i] = fvalue;\n      out_indices[i] = findex;\n    });\n  }\n\n  API_END();\n}\n\nnamespace {\ntemplate <typename Page>\nvoid GetCutImpl(Context const *ctx, std::shared_ptr<DMatrix> p_m,\n                std::vector<std::uint64_t> *p_indptr, std::vector<float> *p_data) {\n  auto &indptr = *p_indptr;\n  auto &data = *p_data;\n  for (auto const &page : p_m->GetBatches<Page>(ctx, {})) {\n    auto const &cut = page.Cuts();\n    auto const &ptrs = cut.Ptrs();\n    auto const &vals = cut.Values();\n    auto ft = p_m->Info().feature_types.ConstHostSpan();\n\n    indptr.resize(ptrs.size());\n    data.clear();\n\n    for (bst_feature_t fidx = 0; fidx < p_m->Info().num_col_; ++fidx) {\n      indptr[fidx] = data.size();\n\n      if (!common::IsCat(ft, fidx)) {\n        data.push_back(common::HistogramCuts::NumericBinLowerBound(ptrs, vals, fidx, ptrs[fidx]));\n      }\n\n      auto beg = ptrs[fidx];\n      auto end = ptrs[fidx + 1];\n      data.insert(data.end(), vals.cbegin() + beg, vals.cbegin() + end);\n    }\n    indptr.back() = data.size();\n    break;\n  }\n}\n}  // namespace\n\nXGB_DLL int XGDMatrixGetQuantileCut(DMatrixHandle const handle, char const *config,\n                                    char const **out_indptr, char const **out_data) {\n  API_BEGIN();\n  CHECK_HANDLE();\n\n  auto p_m = CastDMatrixHandle(handle);\n\n  xgboost_CHECK_C_ARG_PTR(config);\n  xgboost_CHECK_C_ARG_PTR(out_indptr);\n  xgboost_CHECK_C_ARG_PTR(out_data);\n\n  auto jconfig = Json::Load(StringView{config});\n\n  if (!p_m->PageExists<GHistIndexMatrix>() && !p_m->PageExists<EllpackPage>()) {\n    LOG(FATAL) << \"The quantile cut hasn't been generated yet. Unless this is a `QuantileDMatrix`, \"\n                  \"quantile cut is generated during training.\";\n  }\n  // Get return buffer\n  auto &data = p_m->GetThreadLocal().ret_vec_float;\n  auto &indptr = p_m->GetThreadLocal().ret_vec_u64;\n\n  if (p_m->PageExists<GHistIndexMatrix>()) {\n    auto ctx = p_m->Ctx()->IsCPU() ? *p_m->Ctx() : p_m->Ctx()->MakeCPU();\n    GetCutImpl<GHistIndexMatrix>(&ctx, p_m, &indptr, &data);\n  } else {\n    auto ctx = p_m->Ctx()->IsCUDA() ? *p_m->Ctx() : p_m->Ctx()->MakeCUDA(0);\n    GetCutImpl<EllpackPage>(&ctx, p_m, &indptr, &data);\n  }\n\n  // Create a CPU context\n  Context ctx;\n  // Get return buffer\n  auto &ret_vec_str = p_m->GetThreadLocal().ret_vec_str;\n  ret_vec_str.clear();\n\n  ret_vec_str.emplace_back(linalg::ArrayInterfaceStr(\n      linalg::MakeTensorView(&ctx, common::Span{indptr.data(), indptr.size()}, indptr.size())));\n  ret_vec_str.emplace_back(linalg::ArrayInterfaceStr(\n      linalg::MakeTensorView(&ctx, common::Span{data.data(), data.size()}, data.size())));\n\n  auto &charp_vecs = p_m->GetThreadLocal().ret_vec_charp;\n  charp_vecs.resize(ret_vec_str.size());\n  std::transform(ret_vec_str.cbegin(), ret_vec_str.cend(), charp_vecs.begin(),\n                 [](auto const &str) { return str.c_str(); });\n\n  *out_indptr = charp_vecs[0];\n  *out_data = charp_vecs[1];\n  API_END();\n}\n\n// xgboost implementation\nXGB_DLL int XGBoosterCreate(const DMatrixHandle dmats[], xgboost::bst_ulong len,\n                            BoosterHandle *out) {\n  API_BEGIN();\n  std::vector<std::shared_ptr<DMatrix>> mats;\n  for (xgboost::bst_ulong i = 0; i < len; ++i) {\n    xgboost_CHECK_C_ARG_PTR(dmats);\n    mats.push_back(*static_cast<std::shared_ptr<DMatrix> *>(dmats[i]));\n  }\n  xgboost_CHECK_C_ARG_PTR(out);\n  *out = Learner::Create(mats);\n  API_END();\n}\n\nXGB_DLL int XGBoosterFree(BoosterHandle handle) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  delete static_cast<Learner *>(handle);\n  API_END();\n}\n\nXGB_DLL int XGBoosterReset(BoosterHandle handle) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  static_cast<Learner *>(handle)->Reset();\n  API_END();\n}\n\nXGB_DLL int XGBoosterSetParam(BoosterHandle handle, const char *name, const char *value) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  static_cast<Learner *>(handle)->SetParam(name, value);\n  API_END();\n}\n\nXGB_DLL int XGBoosterGetNumFeature(BoosterHandle handle, xgboost::bst_ulong *out) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  static_cast<Learner *>(handle)->Configure();\n  xgboost_CHECK_C_ARG_PTR(out);\n  *out = static_cast<Learner *>(handle)->GetNumFeature();\n  API_END();\n}\n\nXGB_DLL int XGBoosterBoostedRounds(BoosterHandle handle, int *out) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  static_cast<Learner *>(handle)->Configure();\n  xgboost_CHECK_C_ARG_PTR(out);\n  *out = static_cast<Learner *>(handle)->BoostedRounds();\n  API_END();\n}\n\nXGB_DLL int XGBoosterLoadJsonConfig(BoosterHandle handle, char const *json_parameters) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  xgboost_CHECK_C_ARG_PTR(json_parameters);\n  Json config{Json::Load(StringView{json_parameters})};\n  static_cast<Learner *>(handle)->LoadConfig(config);\n  API_END();\n}\n\nXGB_DLL int XGBoosterSaveJsonConfig(BoosterHandle handle, xgboost::bst_ulong *out_len,\n                                    char const **out_str) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  Json config{Object()};\n  auto *learner = static_cast<Learner *>(handle);\n  learner->Configure();\n  learner->SaveConfig(&config);\n  std::string &raw_str = learner->GetThreadLocal().ret_str;\n  Json::Dump(config, &raw_str);\n\n  xgboost_CHECK_C_ARG_PTR(out_str);\n  xgboost_CHECK_C_ARG_PTR(out_len);\n\n  *out_str = raw_str.c_str();\n  *out_len = static_cast<xgboost::bst_ulong>(raw_str.length());\n  API_END();\n}\n\nXGB_DLL int XGBoosterUpdateOneIter(BoosterHandle handle, int iter, DMatrixHandle dtrain) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  auto *bst = static_cast<Learner *>(handle);\n  xgboost_CHECK_C_ARG_PTR(dtrain);\n  auto *dtr = static_cast<std::shared_ptr<DMatrix> *>(dtrain);\n  CHECK(dtr);\n  bst->UpdateOneIter(iter, *dtr);\n  API_END();\n}\n\nXGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle, DMatrixHandle dtrain, bst_float *grad,\n                                  bst_float *hess, xgboost::bst_ulong len) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  LOG(WARNING) << error::DeprecatedFunc(__func__, \"2.1.0\", \"XGBoosterTrainOneIter\");\n  auto *learner = static_cast<Learner *>(handle);\n  auto ctx = learner->Ctx()->MakeCPU();\n\n  auto t_grad = linalg::MakeTensorView(&ctx, common::Span{grad, static_cast<size_t>(len)}, len);\n  auto t_hess = linalg::MakeTensorView(&ctx, common::Span{hess, static_cast<size_t>(len)}, len);\n\n  auto s_grad = linalg::ArrayInterfaceStr(t_grad);\n  auto s_hess = linalg::ArrayInterfaceStr(t_hess);\n\n  return XGBoosterTrainOneIter(handle, dtrain, 0, s_grad.c_str(), s_hess.c_str());\n  API_END();\n}\n\nnamespace xgboost {\n// copy user-supplied CUDA gradient arrays\nvoid CopyGradientFromCudaArrays(Context const *, ArrayInterface<2, false> const &,\n                                ArrayInterface<2, false> const &, linalg::Matrix<GradientPair> *)\n#if !defined(XGBOOST_USE_CUDA)\n{\n  common::AssertGPUSupport();\n}\n#else\n    ;  // NOLINT\n#endif\n\n// Helper function to copy gradient from array interface to linalg::Matrix\nvoid CopyGradientFromArrays(Context const *ctx, ArrayInterface<2, false> const &i_grad,\n                            ArrayInterface<2, false> const &i_hess,\n                            linalg::Matrix<GradientPair> *out_gpair) {\n  auto grad_is_cuda = ArrayInterfaceHandler::IsCudaPtr(i_grad.data);\n  auto hess_is_cuda = ArrayInterfaceHandler::IsCudaPtr(i_hess.data);\n  CHECK_EQ(grad_is_cuda, hess_is_cuda) << \"gradient and hessian should be on the same device.\";\n\n  if (!grad_is_cuda) {\n    out_gpair->Reshape(i_grad.Shape<0>(), i_grad.Shape<1>());\n    auto h_gpair = out_gpair->HostView();\n    DispatchDType(i_grad, DeviceOrd::CPU(), [&](auto &&t_grad) {\n      DispatchDType(i_hess, DeviceOrd::CPU(), [&](auto &&t_hess) {\n        common::ParallelFor(h_gpair.Size(), ctx->Threads(),\n                            detail::CustomGradHessOp{t_grad, t_hess, h_gpair});\n      });\n    });\n  } else {\n    CopyGradientFromCudaArrays(ctx, i_grad, i_hess, out_gpair);\n  }\n}\n}  // namespace xgboost\n\nXGB_DLL int XGBoosterTrainOneIter(BoosterHandle handle, DMatrixHandle dtrain, int iter,\n                                  char const *grad, char const *hess) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  xgboost_CHECK_C_ARG_PTR(grad);\n  xgboost_CHECK_C_ARG_PTR(hess);\n  auto p_fmat = CastDMatrixHandle(dtrain);\n  ArrayInterface<2, false> i_grad{StringView{grad}};\n  ArrayInterface<2, false> i_hess{StringView{hess}};\n  StringView msg{\"Mismatched shape between the gradient and hessian.\"};\n  CHECK_EQ(i_grad.Shape<0>(), i_hess.Shape<0>()) << msg;\n  CHECK_EQ(i_grad.Shape<1>(), i_hess.Shape<1>()) << msg;\n  CHECK_EQ(i_grad.Shape<0>(), p_fmat->Info().num_row_)\n      << \"Mismatched size between the gradient and training data.\";\n  auto *learner = static_cast<Learner *>(handle);\n  auto ctx = learner->Ctx();\n  GradientContainer gpair;\n  CopyGradientFromArrays(ctx, i_grad, i_hess, &gpair.gpair);\n  learner->BoostOneIter(iter, p_fmat, &gpair);\n  API_END();\n}\n\ntypedef char const *JArrayStr;  // NOLINT\n\n// Hidden, working-in-progress support for reduced gradient.\n/**\n * @brief Use a different type of gradient for tree split.\n *\n * @param split_grad Gradient for finding tree splits.\n * @param split_hess Hessian for finding tree splits.\n * @param value_grad Gradient for calculating tree leaf weights.\n * @param value_hess Hessian for calculating tree leaf weights.\n */\nXGB_DLL int XGBoosterTrainOneIterWithSplitGrad(BoosterHandle handle, DMatrixHandle dtrain, int iter,\n                                               JArrayStr split_grad, JArrayStr split_hess,\n                                               JArrayStr value_grad, JArrayStr value_hess) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  auto *learner = static_cast<Learner *>(handle);\n  GradientContainer gpair;\n  auto ctx = learner->Ctx();\n  {\n    ArrayInterface<2, false> i_grad{StringView{split_grad}};\n    ArrayInterface<2, false> i_hess{StringView{split_hess}};\n    StringView msg{\"Mismatched shape between the gradient and hessian.\"};\n    CHECK_EQ(i_grad.Shape<0>(), i_hess.Shape<0>()) << msg;\n    CHECK_EQ(i_grad.Shape<1>(), i_hess.Shape<1>()) << msg;\n    CopyGradientFromArrays(ctx, i_grad, i_hess, &gpair.gpair);\n  }\n  {\n    ArrayInterface<2, false> i_grad{StringView{value_grad}};\n    ArrayInterface<2, false> i_hess{StringView{value_hess}};\n    StringView msg{\"Mismatched shape between the gradient and hessian.\"};\n    CHECK_EQ(i_grad.Shape<0>(), i_hess.Shape<0>()) << msg;\n    CHECK_EQ(i_grad.Shape<1>(), i_hess.Shape<1>()) << msg;\n    CopyGradientFromArrays(ctx, i_grad, i_hess, &gpair.value_gpair);\n  }\n\n  auto p_fmat = CastDMatrixHandle(dtrain);\n  learner->BoostOneIter(iter, p_fmat, &gpair);\n\n  API_END();\n}\n\nXGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle, int iter, DMatrixHandle dmats[],\n                                 const char *evnames[], xgboost::bst_ulong len,\n                                 const char **out_str) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  auto *bst = static_cast<Learner *>(handle);\n  std::string &eval_str = bst->GetThreadLocal().ret_str;\n\n  std::vector<std::shared_ptr<DMatrix>> data_sets;\n  std::vector<std::string> data_names;\n\n  for (xgboost::bst_ulong i = 0; i < len; ++i) {\n    xgboost_CHECK_C_ARG_PTR(dmats);\n    data_sets.push_back(*static_cast<std::shared_ptr<DMatrix> *>(dmats[i]));\n    xgboost_CHECK_C_ARG_PTR(evnames);\n    data_names.emplace_back(evnames[i]);\n  }\n\n  eval_str = bst->EvalOneIter(iter, data_sets, data_names);\n  xgboost_CHECK_C_ARG_PTR(out_str);\n  *out_str = eval_str.c_str();\n  API_END();\n}\n\nXGB_DLL int XGBoosterPredict(BoosterHandle handle, DMatrixHandle dmat, int option_mask,\n                             unsigned ntree_limit, int training, xgboost::bst_ulong *len,\n                             const bst_float **out_result) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  auto *learner = static_cast<Learner *>(handle);\n  auto &entry = learner->GetThreadLocal().prediction_entry;\n  auto iteration_end = GetIterationFromTreeLimit(ntree_limit, learner);\n  learner->Predict(*static_cast<std::shared_ptr<DMatrix> *>(dmat), (option_mask & 1) != 0,\n                   &entry.predictions, 0, iteration_end, static_cast<bool>(training),\n                   (option_mask & 2) != 0, (option_mask & 4) != 0, (option_mask & 8) != 0,\n                   (option_mask & 16) != 0);\n\n  xgboost_CHECK_C_ARG_PTR(len);\n  xgboost_CHECK_C_ARG_PTR(out_result);\n\n  *out_result = dmlc::BeginPtr(entry.predictions.ConstHostVector());\n  *len = static_cast<xgboost::bst_ulong>(entry.predictions.Size());\n  API_END();\n}\n\nXGB_DLL int XGBoosterPredictFromDMatrix(BoosterHandle handle, DMatrixHandle dmat,\n                                        char const *c_json_config,\n                                        xgboost::bst_ulong const **out_shape,\n                                        xgboost::bst_ulong *out_dim, bst_float const **out_result) {\n  API_BEGIN();\n  if (handle == nullptr) {\n    LOG(FATAL) << \"Booster has not been initialized or has already been disposed.\";\n  }\n  if (dmat == nullptr) {\n    LOG(FATAL) << \"DMatrix has not been initialized or has already been disposed.\";\n  }\n  xgboost_CHECK_C_ARG_PTR(c_json_config);\n  auto config = Json::Load(StringView{c_json_config});\n\n  auto *learner = static_cast<Learner *>(handle);\n  auto &entry = learner->GetThreadLocal().prediction_entry;\n  auto p_m = *static_cast<std::shared_ptr<DMatrix> *>(dmat);\n\n  auto type = PredictionType(RequiredArg<Integer>(config, \"type\", __func__));\n  auto iteration_begin = RequiredArg<Integer>(config, \"iteration_begin\", __func__);\n  auto iteration_end = RequiredArg<Integer>(config, \"iteration_end\", __func__);\n\n  auto const &j_config = get<Object const>(config);\n  auto ntree_limit_it = j_config.find(\"ntree_limit\");\n  if (ntree_limit_it != j_config.cend() && !IsA<Null>(ntree_limit_it->second) &&\n      get<Integer const>(ntree_limit_it->second) != 0) {\n    CHECK(iteration_end == 0)\n        << \"Only one of the `ntree_limit` or `iteration_range` can be specified.\";\n    LOG(WARNING) << \"`ntree_limit` is deprecated, use `iteration_range` instead.\";\n    iteration_end = GetIterationFromTreeLimit(get<Integer const>(ntree_limit_it->second), learner);\n  }\n\n  bool approximate =\n      type == PredictionType::kApproxContribution || type == PredictionType::kApproxInteraction;\n  bool contribs =\n      type == PredictionType::kContribution || type == PredictionType::kApproxContribution;\n  bool interactions =\n      type == PredictionType::kInteraction || type == PredictionType::kApproxInteraction;\n  bool training = RequiredArg<Boolean>(config, \"training\", __func__);\n  learner->Predict(p_m, type == PredictionType::kMargin, &entry.predictions, iteration_begin,\n                   iteration_end, training, type == PredictionType::kLeaf, contribs, approximate,\n                   interactions);\n\n  xgboost_CHECK_C_ARG_PTR(out_result);\n  *out_result = dmlc::BeginPtr(entry.predictions.ConstHostVector());\n\n  auto &shape = learner->GetThreadLocal().prediction_shape;\n  auto chunksize = p_m->Info().num_row_ == 0 ? 0 : entry.predictions.Size() / p_m->Info().num_row_;\n  auto rounds = iteration_end - iteration_begin;\n  rounds = rounds == 0 ? learner->BoostedRounds() : rounds;\n  // Determine shape\n  bool strict_shape = RequiredArg<Boolean>(config, \"strict_shape\", __func__);\n\n  xgboost_CHECK_C_ARG_PTR(out_dim);\n  xgboost_CHECK_C_ARG_PTR(out_shape);\n\n  CalcPredictShape(strict_shape, type, p_m->Info().num_row_, p_m->Info().num_col_, chunksize,\n                   learner->Groups(), rounds, &shape, out_dim);\n  *out_shape = dmlc::BeginPtr(shape);\n  API_END();\n}\n\nvoid InplacePredictImpl(std::shared_ptr<DMatrix> p_m, char const *c_json_config, Learner *learner,\n                        xgboost::bst_ulong const **out_shape, xgboost::bst_ulong *out_dim,\n                        const float **out_result) {\n  xgboost_CHECK_C_ARG_PTR(c_json_config);\n  auto config = Json::Load(StringView{c_json_config});\n\n  HostDeviceVector<float> *p_predt{nullptr};\n  auto type = PredictionType(RequiredArg<Integer>(config, \"type\", __func__));\n  float missing = GetMissing(config);\n  learner->InplacePredict(p_m, type, missing, &p_predt,\n                          RequiredArg<Integer>(config, \"iteration_begin\", __func__),\n                          RequiredArg<Integer>(config, \"iteration_end\", __func__));\n  CHECK(p_predt);\n  auto &shape = learner->GetThreadLocal().prediction_shape;\n  auto const &info = p_m->Info();\n  auto n_samples = info.num_row_;\n  auto n_features = info.num_col_;\n  auto chunksize = n_samples == 0 ? 0 : p_predt->Size() / n_samples;\n  bool strict_shape = RequiredArg<Boolean>(config, \"strict_shape\", __func__);\n\n  xgboost_CHECK_C_ARG_PTR(out_dim);\n  CalcPredictShape(strict_shape, type, n_samples, n_features, chunksize, learner->Groups(),\n                   learner->BoostedRounds(), &shape, out_dim);\n  CHECK_GE(p_predt->Size(), n_samples);\n\n  xgboost_CHECK_C_ARG_PTR(out_result);\n  xgboost_CHECK_C_ARG_PTR(out_shape);\n\n  *out_result = dmlc::BeginPtr(p_predt->HostVector());\n  *out_shape = dmlc::BeginPtr(shape);\n}\n\nXGB_DLL int XGBoosterPredictFromDense(BoosterHandle handle, char const *data,\n                                      char const *c_json_config, DMatrixHandle m,\n                                      xgboost::bst_ulong const **out_shape,\n                                      xgboost::bst_ulong *out_dim, const float **out_result) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  std::shared_ptr<DMatrix> p_m{nullptr};\n  if (!m) {\n    p_m.reset(new data::DMatrixProxy);\n  } else {\n    p_m = *static_cast<std::shared_ptr<DMatrix> *>(m);\n  }\n  auto proxy = dynamic_cast<data::DMatrixProxy *>(p_m.get());\n  CHECK(proxy) << \"Invalid input type for inplace predict.\";\n  xgboost_CHECK_C_ARG_PTR(data);\n  proxy->SetArray(data);\n  auto *learner = static_cast<xgboost::Learner *>(handle);\n  InplacePredictImpl(p_m, c_json_config, learner, out_shape, out_dim, out_result);\n  API_END();\n}\n\nXGB_DLL int XGBoosterPredictFromColumnar(BoosterHandle handle, char const *array_interface,\n                                         char const *c_json_config, DMatrixHandle m,\n                                         xgboost::bst_ulong const **out_shape,\n                                         xgboost::bst_ulong *out_dim, const float **out_result) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  std::shared_ptr<DMatrix> p_m{nullptr};\n  if (!m) {\n    p_m.reset(new data::DMatrixProxy);\n  } else {\n    p_m = *static_cast<std::shared_ptr<DMatrix> *>(m);\n  }\n  auto proxy = dynamic_cast<data::DMatrixProxy *>(p_m.get());\n  CHECK(proxy) << \"Invalid input type for inplace predict.\";\n  xgboost_CHECK_C_ARG_PTR(array_interface);\n  proxy->SetColumnar(array_interface);\n  auto *learner = static_cast<xgboost::Learner *>(handle);\n  InplacePredictImpl(p_m, c_json_config, learner, out_shape, out_dim, out_result);\n  API_END();\n}\n\nXGB_DLL int XGBoosterPredictFromCSR(BoosterHandle handle, char const *indptr, char const *indices,\n                                    char const *data, xgboost::bst_ulong cols,\n                                    char const *c_json_config, DMatrixHandle m,\n                                    xgboost::bst_ulong const **out_shape,\n                                    xgboost::bst_ulong *out_dim, const float **out_result) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  std::shared_ptr<DMatrix> p_m{nullptr};\n  if (!m) {\n    p_m.reset(new data::DMatrixProxy);\n  } else {\n    p_m = *static_cast<std::shared_ptr<DMatrix> *>(m);\n  }\n  auto proxy = dynamic_cast<data::DMatrixProxy *>(p_m.get());\n  CHECK(proxy) << \"Invalid input type for inplace predict.\";\n  xgboost_CHECK_C_ARG_PTR(indptr);\n  proxy->SetCsr(indptr, indices, data, cols, true);\n  auto *learner = static_cast<xgboost::Learner *>(handle);\n  InplacePredictImpl(p_m, c_json_config, learner, out_shape, out_dim, out_result);\n  API_END();\n}\n\n#if !defined(XGBOOST_USE_CUDA)\nXGB_DLL int XGBoosterPredictFromCudaArray(BoosterHandle handle, char const *, char const *,\n                                          DMatrixHandle, xgboost::bst_ulong const **,\n                                          xgboost::bst_ulong *, const float **) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  common::AssertGPUSupport();\n  API_END();\n}\n\nXGB_DLL int XGBoosterPredictFromCudaColumnar(BoosterHandle handle, char const *, char const *,\n                                             DMatrixHandle, xgboost::bst_ulong const **,\n                                             xgboost::bst_ulong *, const float **) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  common::AssertGPUSupport();\n  API_END();\n}\n#endif  // !defined(XGBOOST_USE_CUDA)\n\nnamespace {\ntemplate <typename Buffer, typename Iter = typename Buffer::const_iterator>\nJson DispatchModelType(Buffer const &buffer, StringView ext, bool warn) {\n  auto first_non_space = [&](Iter beg, Iter end) {\n    for (auto i = beg; i != end; ++i) {\n      if (!std::isspace(*i)) {\n        return i;\n      }\n    }\n    return end;\n  };\n\n  Json model;\n  auto it = first_non_space(buffer.cbegin() + 1, buffer.cend());\n  if (it != buffer.cend() && *it == '\"') {\n    if (warn) {\n      LOG(WARNING) << \"Unknown file format: `\" << ext << \"`. Using JSON (`json`) as a guess.\";\n    }\n    model = Json::Load(StringView{buffer.data(), buffer.size()});\n  } else if (it != buffer.cend() && std::isalpha(*it)) {\n    if (warn) {\n      LOG(WARNING) << \"Unknown file format: `\" << ext << \"`. Using UBJSON (`ubj`) as a guess.\";\n    }\n    model = Json::Load(StringView{buffer.data(), buffer.size()}, std::ios::binary);\n  } else {\n    LOG(FATAL) << \"Invalid model format. Expecting UBJSON (`ubj`) or JSON (`json`), got `\" << ext\n               << \"`\";\n  }\n  return model;\n}\n}  // namespace\n\nXGB_DLL int XGBoosterLoadModel(BoosterHandle handle, const char *fname) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  xgboost_CHECK_C_ARG_PTR(fname);\n  auto read_file = [&]() {\n    auto str = common::LoadSequentialFile(fname);\n    // \"{}\"\n    CHECK_GE(str.size(), 2) << error::InvalidModel(fname);\n    // The old binary format has the starting bytes \"binf\".\n    if (str.size() >= 4 && StringView{str.data(), 4} == \"binf\") {  // NOLINT\n      LOG(FATAL) << error::OldBinaryModel(fname);\n    }\n    CHECK_EQ(str[0], '{') << error::InvalidModel(fname);\n    return str;\n  };\n  auto ext = common::FileExtension(fname);\n  if (ext == \"json\") {\n    auto buffer = read_file();\n    Json in{Json::Load(StringView{buffer.data(), buffer.size()})};\n    static_cast<Learner *>(handle)->LoadModel(in);\n  } else if (ext == \"ubj\") {\n    auto buffer = read_file();\n    Json in = Json::Load(StringView{buffer.data(), buffer.size()}, std::ios::binary);\n    static_cast<Learner *>(handle)->LoadModel(in);\n  } else {\n    auto buffer = read_file();\n    auto in = DispatchModelType(buffer, ext, true);\n    static_cast<Learner *>(handle)->LoadModel(in);\n  }\n  API_END();\n}\n\nXGB_DLL int XGBoosterSaveModel(BoosterHandle handle, const char *fname) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  xgboost_CHECK_C_ARG_PTR(fname);\n\n  std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname, \"w\"));\n  auto *learner = static_cast<Learner *>(handle);\n  learner->Configure();\n  auto save_json = [&](std::ios::openmode mode) {\n    Json out{Object()};\n    learner->SaveModel(&out);\n    std::vector<char> str;\n    Json::Dump(out, &str, mode);\n    fo->Write(str.data(), str.size());\n  };\n  auto ext = common::FileExtension(fname);\n  if (ext == \"json\") {\n    save_json(std::ios::out);\n  } else if (ext == \"ubj\") {\n    save_json(std::ios::binary);\n  } else {\n    LOG(WARNING) << \"Saving model in the UBJSON format as default.  You can use a file extension:\"\n                    \" `json` or `ubj` to choose between formats.\";\n    save_json(std::ios::binary);\n  }\n  API_END();\n}\n\nXGB_DLL int XGBoosterLoadModelFromBuffer(BoosterHandle handle, const void *buf,\n                                         xgboost::bst_ulong len) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  xgboost_CHECK_C_ARG_PTR(buf);\n  using CharT = std::add_const_t<char>;\n  using IdxType = common::Span<CharT>::index_type;\n  auto buffer = common::Span{static_cast<CharT *>(buf), static_cast<IdxType>(len)};\n  // Don't warn, we have to guess the format with buffer input.\n  auto in = DispatchModelType(buffer, \"\", false);\n  common::MemoryFixSizeBuffer fs((void *)buf, len);  // NOLINT(*)\n  static_cast<Learner *>(handle)->LoadModel(in);\n  API_END();\n}\n\nXGB_DLL int XGBoosterSaveModelToBuffer(BoosterHandle handle, char const *json_config,\n                                       xgboost::bst_ulong *out_len, char const **out_dptr) {\n  API_BEGIN();\n  CHECK_HANDLE();\n\n  xgboost_CHECK_C_ARG_PTR(json_config);\n  xgboost_CHECK_C_ARG_PTR(out_dptr);\n  xgboost_CHECK_C_ARG_PTR(out_len);\n\n  auto config = Json::Load(StringView{json_config});\n  auto format = RequiredArg<String>(config, \"format\", __func__);\n\n  auto *learner = static_cast<Learner *>(handle);\n  learner->Configure();\n\n  auto save_json = [&](std::ios::openmode mode) {\n    std::vector<char> &raw_char_vec = learner->GetThreadLocal().ret_char_vec;\n    Json out{Object{}};\n    learner->SaveModel(&out);\n    Json::Dump(out, &raw_char_vec, mode);\n    *out_dptr = dmlc::BeginPtr(raw_char_vec);\n    *out_len = static_cast<xgboost::bst_ulong>(raw_char_vec.size());\n  };\n\n  Json out{Object{}};\n  if (format == \"json\") {\n    save_json(std::ios::out);\n  } else if (format == \"ubj\") {\n    save_json(std::ios::binary);\n  } else {\n    LOG(FATAL) << \"Unknown model format: `\" << format\n               << \"`. Expecting UBJSON (`ubj`) or JSON (`json`).\";\n  }\n\n  API_END();\n}\n\n// The following two functions are `Load` and `Save` for memory based\n// serialization methods. E.g. Python pickle.\nXGB_DLL int XGBoosterSerializeToBuffer(BoosterHandle handle, xgboost::bst_ulong *out_len,\n                                       const char **out_dptr) {\n  API_BEGIN();\n  CHECK_HANDLE();\n\n  auto *learner = static_cast<Learner *>(handle);\n  std::string &raw_str = learner->GetThreadLocal().ret_str;\n  raw_str.resize(0);\n  common::MemoryBufferStream fo(&raw_str);\n  learner->Configure();\n  learner->Save(&fo);\n\n  xgboost_CHECK_C_ARG_PTR(out_dptr);\n  xgboost_CHECK_C_ARG_PTR(out_len);\n\n  *out_dptr = dmlc::BeginPtr(raw_str);\n  *out_len = static_cast<xgboost::bst_ulong>(raw_str.length());\n  API_END();\n}\n\nXGB_DLL int XGBoosterUnserializeFromBuffer(BoosterHandle handle, const void *buf,\n                                           xgboost::bst_ulong len) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  xgboost_CHECK_C_ARG_PTR(buf);\n\n  common::MemoryFixSizeBuffer fs((void *)buf, len);  // NOLINT(*)\n  static_cast<Learner *>(handle)->Load(&fs);\n  API_END();\n}\n\nXGB_DLL int XGBoosterSlice(BoosterHandle handle, int begin_layer, int end_layer, int step,\n                           BoosterHandle *out) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  xgboost_CHECK_C_ARG_PTR(out);\n\n  auto *learner = static_cast<Learner *>(handle);\n  bool out_of_bound = false;\n  auto p_out = learner->Slice(begin_layer, end_layer, step, &out_of_bound);\n  if (out_of_bound) {\n    return -2;\n  }\n  CHECK(p_out);\n  *out = p_out;\n  API_END();\n}\n\ninline void XGBoostDumpModelImpl(BoosterHandle handle, FeatureMap *fmap, int with_stats,\n                                 const char *format, xgboost::bst_ulong *len,\n                                 const char ***out_models) {\n  auto *bst = static_cast<Learner *>(handle);\n  bst->Configure();\n  GenerateFeatureMap(bst, {}, bst->GetNumFeature(), fmap);\n\n  std::vector<std::string> &str_vecs = bst->GetThreadLocal().ret_vec_str;\n  std::vector<const char *> &charp_vecs = bst->GetThreadLocal().ret_vec_charp;\n  str_vecs = bst->DumpModel(*fmap, with_stats != 0, format);\n  charp_vecs.resize(str_vecs.size());\n  for (size_t i = 0; i < str_vecs.size(); ++i) {\n    charp_vecs[i] = str_vecs[i].c_str();\n  }\n\n  xgboost_CHECK_C_ARG_PTR(out_models);\n  xgboost_CHECK_C_ARG_PTR(len);\n\n  *out_models = dmlc::BeginPtr(charp_vecs);\n  *len = static_cast<xgboost::bst_ulong>(charp_vecs.size());\n}\n\nXGB_DLL int XGBoosterDumpModel(BoosterHandle handle, const char *fmap, int with_stats,\n                               xgboost::bst_ulong *len, const char ***out_models) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  return XGBoosterDumpModelEx(handle, fmap, with_stats, \"text\", len, out_models);\n  API_END();\n}\n\nXGB_DLL int XGBoosterDumpModelEx(BoosterHandle handle, const char *fmap, int with_stats,\n                                 const char *format, xgboost::bst_ulong *len,\n                                 const char ***out_models) {\n  API_BEGIN();\n  CHECK_HANDLE();\n\n  xgboost_CHECK_C_ARG_PTR(fmap);\n  std::string uri{fmap};\n  FeatureMap featmap = LoadFeatureMap(uri);\n  XGBoostDumpModelImpl(handle, &featmap, with_stats, format, len, out_models);\n  API_END();\n}\n\nXGB_DLL int XGBoosterDumpModelWithFeatures(BoosterHandle handle, int fnum, const char **fname,\n                                           const char **ftype, int with_stats,\n                                           xgboost::bst_ulong *len, const char ***out_models) {\n  return XGBoosterDumpModelExWithFeatures(handle, fnum, fname, ftype, with_stats, \"text\", len,\n                                          out_models);\n}\n\nXGB_DLL int XGBoosterDumpModelExWithFeatures(BoosterHandle handle, int fnum, const char **fname,\n                                             const char **ftype, int with_stats, const char *format,\n                                             xgboost::bst_ulong *len, const char ***out_models) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  FeatureMap featmap;\n  for (int i = 0; i < fnum; ++i) {\n    xgboost_CHECK_C_ARG_PTR(fname);\n    xgboost_CHECK_C_ARG_PTR(ftype);\n    featmap.PushBack(i, fname[i], ftype[i]);\n  }\n  XGBoostDumpModelImpl(handle, &featmap, with_stats, format, len, out_models);\n  API_END();\n}\n\nXGB_DLL int XGBoosterGetCategories(BoosterHandle handle, char const * /*config*/,\n                                   CategoriesHandle *out) {\n  API_BEGIN()\n  CHECK_HANDLE()\n\n  auto *bst = static_cast<Learner *>(handle);\n  auto const cats = bst->Cats();\n  xgboost_CHECK_C_ARG_PTR(out);\n  if (cats->Empty()) {\n    out = nullptr;\n  } else {\n    auto new_cats = CopyCatContainer(bst->Ctx(), cats, bst->GetNumFeature());\n    *out = new_cats;\n  }\n\n  API_END()\n}\n\nXGB_DLL int XGBoosterGetCategoriesExportToArrow(BoosterHandle handle, char const * /*config*/,\n                                                CategoriesHandle *out, char const **export_out) {\n  API_BEGIN()\n  CHECK_HANDLE()\n\n  auto *bst = static_cast<Learner *>(handle);\n  auto const cats = bst->Cats();\n  auto n_features = bst->GetNumFeature();\n\n  xgboost_CHECK_C_ARG_PTR(out);\n  xgboost_CHECK_C_ARG_PTR(export_out);\n\n  if (cats->Empty()) {\n    *out = nullptr;\n    *export_out = nullptr;\n  } else {\n    // Create a new container\n    auto new_cats = CopyCatContainer(bst->Ctx(), cats, n_features);\n    *out = new_cats;\n    // Export to arrow\n    auto &ret_str = bst->GetThreadLocal().ret_str;\n    GetCategoriesImpl(new_cats->HostView(), n_features, &ret_str, export_out);\n  }\n\n  API_END()\n}\n\nXGB_DLL int XGBoosterGetAttr(BoosterHandle handle, const char *key, const char **out,\n                             int *success) {\n  auto *bst = static_cast<Learner *>(handle);\n  std::string &ret_str = bst->GetThreadLocal().ret_str;\n  API_BEGIN();\n  CHECK_HANDLE();\n\n  xgboost_CHECK_C_ARG_PTR(out);\n  xgboost_CHECK_C_ARG_PTR(success);\n\n  if (bst->GetAttr(key, &ret_str)) {\n    *out = ret_str.c_str();\n    *success = 1;\n  } else {\n    *out = nullptr;\n    *success = 0;\n  }\n  API_END();\n}\n\nXGB_DLL int XGBoosterSetAttr(BoosterHandle handle, const char *key, const char *value) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  auto *bst = static_cast<Learner *>(handle);\n  xgboost_CHECK_C_ARG_PTR(key);\n  if (value == nullptr) {\n    bst->DelAttr(key);\n  } else {\n    xgboost_CHECK_C_ARG_PTR(value);\n    bst->SetAttr(key, value);\n  }\n  API_END();\n}\n\nXGB_DLL int XGBoosterGetAttrNames(BoosterHandle handle, xgboost::bst_ulong *out_len,\n                                  const char ***out) {\n  API_BEGIN();\n  CHECK_HANDLE();\n\n  auto *learner = static_cast<Learner *>(handle);\n  std::vector<std::string> &str_vecs = learner->GetThreadLocal().ret_vec_str;\n  std::vector<const char *> &charp_vecs = learner->GetThreadLocal().ret_vec_charp;\n  str_vecs = learner->GetAttrNames();\n  charp_vecs.resize(str_vecs.size());\n  for (size_t i = 0; i < str_vecs.size(); ++i) {\n    charp_vecs[i] = str_vecs[i].c_str();\n  }\n\n  xgboost_CHECK_C_ARG_PTR(out);\n  xgboost_CHECK_C_ARG_PTR(out_len);\n\n  *out = dmlc::BeginPtr(charp_vecs);\n  *out_len = static_cast<xgboost::bst_ulong>(charp_vecs.size());\n  API_END();\n}\n\nXGB_DLL int XGBoosterSetStrFeatureInfo(BoosterHandle handle, const char *field,\n                                       const char **features, const xgboost::bst_ulong size) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  auto *learner = static_cast<Learner *>(handle);\n  std::vector<std::string> feature_info;\n  if (size > 0) {\n    xgboost_CHECK_C_ARG_PTR(features);\n  }\n  for (size_t i = 0; i < size; ++i) {\n    feature_info.emplace_back(features[i]);\n  }\n\n  xgboost_CHECK_C_ARG_PTR(field);\n  if (!std::strcmp(field, \"feature_name\")) {\n    learner->SetFeatureNames(feature_info);\n  } else if (!std::strcmp(field, \"feature_type\")) {\n    learner->SetFeatureTypes(feature_info);\n  } else {\n    LOG(FATAL) << \"Unknown field for Booster feature info:\" << field;\n  }\n  API_END();\n}\n\nXGB_DLL int XGBoosterGetStrFeatureInfo(BoosterHandle handle, const char *field,\n                                       xgboost::bst_ulong *len, const char ***out_features) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  auto const *learner = static_cast<Learner const *>(handle);\n  std::vector<const char *> &charp_vecs = learner->GetThreadLocal().ret_vec_charp;\n  std::vector<std::string> &str_vecs = learner->GetThreadLocal().ret_vec_str;\n  if (!std::strcmp(field, \"feature_name\")) {\n    learner->GetFeatureNames(&str_vecs);\n  } else if (!std::strcmp(field, \"feature_type\")) {\n    learner->GetFeatureTypes(&str_vecs);\n  } else {\n    LOG(FATAL) << \"Unknown field for Booster feature info:\" << field;\n  }\n  charp_vecs.resize(str_vecs.size());\n  for (size_t i = 0; i < str_vecs.size(); ++i) {\n    charp_vecs[i] = str_vecs[i].c_str();\n  }\n\n  xgboost_CHECK_C_ARG_PTR(out_features);\n  xgboost_CHECK_C_ARG_PTR(len);\n\n  *out_features = dmlc::BeginPtr(charp_vecs);\n  *len = static_cast<xgboost::bst_ulong>(charp_vecs.size());\n  API_END();\n}\n\nXGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, char const *config,\n                                  xgboost::bst_ulong *out_n_features, char const ***out_features,\n                                  bst_ulong *out_dim, bst_ulong const **out_shape,\n                                  float const **out_scores) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  auto *learner = static_cast<Learner *>(handle);\n  xgboost_CHECK_C_ARG_PTR(config);\n  auto jconfig = Json::Load(StringView{config});\n\n  auto importance = RequiredArg<String>(jconfig, \"importance_type\", __func__);\n  std::string feature_map_uri;\n  if (!IsA<Null>(jconfig[\"feature_map\"])) {\n    feature_map_uri = get<String const>(jconfig[\"feature_map\"]);\n  }\n  FeatureMap feature_map = LoadFeatureMap(feature_map_uri);\n  std::vector<Json> custom_feature_names;\n  if (!IsA<Null>(jconfig[\"feature_names\"])) {\n    custom_feature_names = get<Array const>(jconfig[\"feature_names\"]);\n  }\n\n  std::vector<int32_t> tree_idx;\n  if (!IsA<Null>(jconfig[\"tree_idx\"])) {\n    auto j_tree_idx = get<Array const>(jconfig[\"tree_idx\"]);\n    for (auto const &idx : j_tree_idx) {\n      tree_idx.push_back(get<Integer const>(idx));\n    }\n  }\n\n  auto &scores = learner->GetThreadLocal().ret_vec_float;\n  std::vector<bst_feature_t> features;\n  learner->CalcFeatureScore(importance, common::Span<int32_t const>(tree_idx), &features, &scores);\n\n  auto n_features = learner->GetNumFeature();\n  GenerateFeatureMap(learner, custom_feature_names, n_features, &feature_map);\n\n  auto &feature_names = learner->GetThreadLocal().ret_vec_str;\n  feature_names.resize(features.size());\n  auto &feature_names_c = learner->GetThreadLocal().ret_vec_charp;\n  feature_names_c.resize(features.size());\n\n  for (bst_feature_t i = 0; i < features.size(); ++i) {\n    feature_names[i] = feature_map.Name(features[i]);\n    feature_names_c[i] = feature_names[i].data();\n  }\n  xgboost_CHECK_C_ARG_PTR(out_n_features);\n  *out_n_features = feature_names.size();\n\n  CHECK_LE(features.size(), scores.size());\n  auto &shape = learner->GetThreadLocal().prediction_shape;\n\n  xgboost_CHECK_C_ARG_PTR(out_dim);\n  if (scores.size() > features.size()) {\n    // Linear model multi-class model\n    CHECK_EQ(scores.size() % features.size(), 0ul);\n    auto n_classes = scores.size() / features.size();\n    *out_dim = 2;\n    shape = {n_features, n_classes};\n  } else {\n    CHECK_EQ(features.size(), scores.size());\n    *out_dim = 1;\n    shape.resize(1);\n    shape.front() = scores.size();\n  }\n\n  xgboost_CHECK_C_ARG_PTR(out_shape);\n  xgboost_CHECK_C_ARG_PTR(out_scores);\n  xgboost_CHECK_C_ARG_PTR(out_features);\n\n  *out_shape = dmlc::BeginPtr(shape);\n  *out_scores = scores.data();\n  *out_features = dmlc::BeginPtr(feature_names_c);\n  API_END();\n}\n"
  },
  {
    "path": "src/c_api/c_api.cu",
    "content": "/**\n * Copyright 2019-2025, XGBoost Contributors\n */\n#include <thrust/transform.h>  // for transform\n\n#include \"../common/api_entry.h\"       // for XGBAPIThreadLocalEntry\n#include \"../common/cuda_context.cuh\"  // for CUDAContext\n#include \"../data/array_interface.h\"   // for DispatchDType, ArrayInterface\n#include \"../data/device_adapter.cuh\"\n#include \"../data/proxy_dmatrix.h\"\n#include \"c_api_error.h\"\n#include \"c_api_utils.h\"\n#include \"xgboost/c_api.h\"\n#include \"xgboost/data.h\"\n#include \"xgboost/json.h\"\n#include \"xgboost/learner.h\"\n#if defined(XGBOOST_USE_NCCL)\n#include <nccl.h>\n#endif  // defined(XGBOOST_USE_NCCL)\n#if defined(XGBOOST_USE_NVCOMP)\n#include <nvcomp/version.h>\n#endif  // defined(XGBOOST_USE_NVCOMP)\n#if defined(XGBOOST_USE_RMM)\n#include <rmm/version_config.hpp>\n#endif  // defined(XGBOOST_USE_RMM)\n\nnamespace xgboost {\nvoid XGBBuildInfoDevice(Json *p_info) {\n  auto &info = *p_info;\n\n  info[\"USE_CUDA\"] = true;\n\n  std::vector<Json> v{Json{Integer{THRUST_MAJOR_VERSION}}, Json{Integer{THRUST_MINOR_VERSION}},\n                      Json{Integer{THRUST_SUBMINOR_VERSION}}};\n  info[\"THRUST_VERSION\"] = v;\n\n  v = {Json{Integer{dh::CUDAVersion().first}}, Json{Integer{dh::CUDAVersion().second}}};\n  info[\"CUDA_VERSION\"] = v;\n\n#if defined(XGBOOST_USE_NCCL)\n  info[\"USE_NCCL\"] = Boolean{true};\n  v = {Json{Integer{NCCL_MAJOR}}, Json{Integer{NCCL_MINOR}}, Json{Integer{NCCL_PATCH}}};\n  info[\"NCCL_VERSION\"] = v;\n\n#if defined(XGBOOST_USE_DLOPEN_NCCL)\n  info[\"USE_DLOPEN_NCCL\"] = Boolean{true};\n#else\n  info[\"USE_DLOPEN_NCCL\"] = Boolean{false};\n#endif  // defined(XGBOOST_USE_DLOPEN_NCCL)\n\n#else\n  info[\"USE_NCCL\"] = Boolean{false};\n  info[\"USE_DLOPEN_NCCL\"] = Boolean{false};\n#endif\n\n#if defined(XGBOOST_USE_RMM)\n  info[\"USE_RMM\"] = Boolean{true};\n  v = {Json{Integer{RMM_VERSION_MAJOR}}, Json{Integer{RMM_VERSION_MINOR}},\n       Json{Integer{RMM_VERSION_PATCH}}};\n  info[\"RMM_VERSION\"] = v;\n#else\n  info[\"USE_RMM\"] = Boolean{false};\n#endif\n\n#if defined(XGBOOST_USE_NVCOMP)\n  info[\"USE_NVCOMP\"] = Boolean{true};\n  v = {Json{Integer{NVCOMP_VER_MAJOR}}, Json{Integer{NVCOMP_VER_MINOR}},\n       Json{Integer{NVCOMP_VER_PATCH}}};\n  info[\"NVCOMP_VERSION\"] = v;\n#else\n  info[\"USE_NVCOMP\"] = Boolean{false};\n#endif\n}\n\nvoid XGBoostAPIGuard::SetGPUAttribute() {\n  // Not calling `safe_cuda` to avoid unnecessary exception handling overhead.\n  // If errors, do nothing, assuming running on CPU only machine.\n  cudaGetDevice(&device_id_);\n}\n\nvoid XGBoostAPIGuard::RestoreGPUAttribute() {\n  // Not calling `safe_cuda` to avoid unnecessary exception handling overhead.\n  // If errors, do nothing, assuming running on CPU only machine.\n  cudaSetDevice(device_id_);\n}\n\nvoid CopyGradientFromCudaArrays(Context const *ctx, ArrayInterface<2, false> const &grad,\n                                ArrayInterface<2, false> const &hess,\n                                linalg::Matrix<GradientPair> *out_gpair) {\n  auto grad_dev = dh::CudaGetPointerDevice(grad.data);\n  auto hess_dev = dh::CudaGetPointerDevice(hess.data);\n  CHECK_EQ(grad_dev, hess_dev) << \"gradient and hessian should be on the same device.\";\n  auto &gpair = *out_gpair;\n  gpair.SetDevice(DeviceOrd::CUDA(grad_dev));\n  gpair.Reshape(grad.Shape<0>(), grad.Shape<1>());\n  auto d_gpair = gpair.View(DeviceOrd::CUDA(grad_dev));\n  auto cuctx = ctx->CUDACtx();\n\n  DispatchDType(grad, DeviceOrd::CUDA(grad_dev), [&](auto &&t_grad) {\n    DispatchDType(hess, DeviceOrd::CUDA(hess_dev), [&](auto &&t_hess) {\n      CHECK_EQ(t_grad.Size(), t_hess.Size());\n      thrust::for_each_n(cuctx->CTP(), thrust::make_counting_iterator(0ul), t_grad.Size(),\n                         detail::CustomGradHessOp{t_grad, t_hess, d_gpair});\n    });\n  });\n}\n}                        // namespace xgboost\n\nusing namespace xgboost;  // NOLINT\n\nXGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *data,\n                                            char const* c_json_config,\n                                            DMatrixHandle *out) {\n  API_BEGIN();\n\n  xgboost_CHECK_C_ARG_PTR(c_json_config);\n  xgboost_CHECK_C_ARG_PTR(data);\n\n  std::string json_str{data};\n  auto config = Json::Load(StringView{c_json_config});\n\n  float missing = GetMissing(config);\n  auto n_threads = OptionalArg<Integer, std::int64_t>(config, \"nthread\", 0);\n  data::CudfAdapter adapter(json_str);\n  *out =\n      new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, n_threads));\n  API_END();\n}\n\nXGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,\n                                                  char const* c_json_config,\n                                                  DMatrixHandle *out) {\n  API_BEGIN();\n  std::string json_str{data};\n  auto config = Json::Load(StringView{c_json_config});\n  float missing = GetMissing(config);\n  auto n_threads = OptionalArg<Integer, std::int64_t>(config, \"nthread\", 0);\n  data::CupyAdapter adapter(json_str);\n  *out =\n      new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, n_threads));\n  API_END();\n}\n\ntemplate <bool is_columnar>\nint InplacePreidctCUDA(BoosterHandle handle, char const *data, char const *c_json_config,\n                       std::shared_ptr<DMatrix> p_m, xgboost::bst_ulong const **out_shape,\n                       xgboost::bst_ulong *out_dim, const float **out_result) {\n  API_BEGIN();\n  CHECK_HANDLE();\n  if (!p_m) {\n    p_m.reset(new data::DMatrixProxy);\n  }\n  auto proxy = dynamic_cast<data::DMatrixProxy *>(p_m.get());\n  CHECK(proxy) << \"Invalid input type for inplace predict.\";\n  xgboost_CHECK_C_ARG_PTR(data);\n\n  if constexpr (is_columnar) {\n    proxy->SetCudaColumnar(data);\n  } else {\n    proxy->SetCudaArray(data);\n  }\n\n  auto config = Json::Load(StringView{c_json_config});\n  auto *learner = static_cast<Learner *>(handle);\n\n  HostDeviceVector<float> *p_predt{nullptr};\n  auto type = PredictionType(RequiredArg<Integer>(config, \"type\", __func__));\n  float missing = GetMissing(config);\n\n  learner->InplacePredict(p_m, type, missing, &p_predt,\n                          RequiredArg<Integer>(config, \"iteration_begin\", __func__),\n                          RequiredArg<Integer>(config, \"iteration_end\", __func__));\n  CHECK(p_predt);\n  if (learner->Ctx()->IsCUDA()) {\n    CHECK(p_predt->DeviceCanRead() && !p_predt->HostCanRead());\n  }\n  p_predt->SetDevice(proxy->Device());\n\n  auto &shape = learner->GetThreadLocal().prediction_shape;\n  size_t n_samples = p_m->Info().num_row_;\n  auto chunksize = n_samples == 0 ? 0 : p_predt->Size() / n_samples;\n  bool strict_shape = RequiredArg<Boolean>(config, \"strict_shape\", __func__);\n\n  xgboost_CHECK_C_ARG_PTR(out_result);\n  xgboost_CHECK_C_ARG_PTR(out_shape);\n  xgboost_CHECK_C_ARG_PTR(out_dim);\n\n  CalcPredictShape(strict_shape, type, n_samples, p_m->Info().num_col_, chunksize,\n                   learner->Groups(), learner->BoostedRounds(), &shape, out_dim);\n  *out_shape = dmlc::BeginPtr(shape);\n  *out_result = p_predt->ConstDevicePointer();\n  API_END();\n}\n\nXGB_DLL int XGBoosterPredictFromCudaColumnar(BoosterHandle handle, char const *data,\n                                             char const *c_json_config, DMatrixHandle m,\n                                             xgboost::bst_ulong const **out_shape,\n                                             xgboost::bst_ulong *out_dim,\n                                             const float **out_result) {\n  std::shared_ptr<DMatrix> p_m{nullptr};\n  xgboost_CHECK_C_ARG_PTR(c_json_config);\n  if (m) {\n    p_m = *static_cast<std::shared_ptr<DMatrix> *>(m);\n  }\n  return InplacePreidctCUDA<true>(handle, data, c_json_config, p_m, out_shape, out_dim, out_result);\n}\n\nXGB_DLL int XGBoosterPredictFromCudaArray(BoosterHandle handle, char const *data,\n                                          char const *c_json_config, DMatrixHandle m,\n                                          xgboost::bst_ulong const **out_shape,\n                                          xgboost::bst_ulong *out_dim, const float **out_result) {\n  std::shared_ptr<DMatrix> p_m{nullptr};\n  if (m) {\n    p_m = *static_cast<std::shared_ptr<DMatrix> *>(m);\n  }\n  xgboost_CHECK_C_ARG_PTR(out_result);\n  return InplacePreidctCUDA<false>(handle, data, c_json_config, p_m, out_shape, out_dim,\n                                   out_result);\n}\n"
  },
  {
    "path": "src/c_api/c_api_error.cc",
    "content": "/**\n *  Copyright 2015-2023, XGBoost Contributors\n * \\file c_api_error.cc\n * \\brief C error handling\n */\n#include \"./c_api_error.h\"\n\n#include <dmlc/thread_local.h>\n\n#include \"xgboost/c_api.h\"\n#include \"../collective/comm.h\"\n#include \"../collective/comm_group.h\"\n\nstruct XGBAPIErrorEntry {\n  std::string last_error;\n  std::int32_t code{-1};\n};\n\nusing XGBAPIErrorStore = dmlc::ThreadLocalStore<XGBAPIErrorEntry>;\n\nXGB_DLL const char* XGBGetLastError() { return XGBAPIErrorStore::Get()->last_error.c_str(); }\n\nvoid XGBAPISetLastError(const char* msg) {\n  XGBAPIErrorStore::Get()->last_error = msg;\n  XGBAPIErrorStore::Get()->code = -1;\n}\n\nXGB_DLL int XGBGetLastErrorCode() { return XGBAPIErrorStore::Get()->code; }\n"
  },
  {
    "path": "src/c_api/c_api_error.h",
    "content": "/**\n *  Copyright 2015-2023, XGBoost Contributors\n * \\file c_api_error.h\n * \\brief Error handling for C API.\n */\n#ifndef XGBOOST_C_API_C_API_ERROR_H_\n#define XGBOOST_C_API_C_API_ERROR_H_\n\n#include <dmlc/base.h>\n\n#include \"c_api_utils.h\"  // for XGBoostAPIGuard\n#include \"xgboost/logging.h\"\n\n/*! \\brief  macro to guard beginning and end section of all functions */\n#ifdef LOG_CAPI_INVOCATION\n#define API_BEGIN()                                                            \\\n  LOG(CONSOLE) << \"[XGBoost C API invocation] \" << __PRETTY_FUNCTION__;        \\\n  try {                                                                        \\\n    auto __guard = ::xgboost::XGBoostAPIGuard();\n\n#define API_BEGIN_UNGUARD()                                             \\\n  LOG(CONSOLE) << \"[XGBoost C API invocation] \" << __PRETTY_FUNCTION__; \\\n  try {\n#else  // LOG_CAPI_INVOCATION\n\n#define API_BEGIN()                                                            \\\n  try {                                                                        \\\n    auto __guard = ::xgboost::XGBoostAPIGuard();\n\n#define API_BEGIN_UNGUARD() try {\n#endif  // LOG_CAPI_INVOCATION\n\n/*! \\brief every function starts with API_BEGIN();\n     and finishes with API_END() */\n#define API_END()                                                              \\\n  } catch (dmlc::Error & _except_) {                                           \\\n    return XGBAPIHandleException(_except_);                                    \\\n  } catch (std::exception const& _except_) {                                   \\\n    return XGBAPIHandleException(dmlc::Error(_except_.what()));                \\\n  }                                                                            \\\n  return 0; // NOLINT(*)\n\n#define CHECK_HANDLE() \\\n  if (handle == nullptr) ::xgboost::detail::EmptyHandle();\n\n/*!\n * \\brief Set the last error message needed by C API\n * \\param msg The error message to set.\n */\nvoid XGBAPISetLastError(const char* msg);\n/*!\n * \\brief handle exception thrown out\n * \\param e the exception\n * \\return the return value of API after exception is handled\n */\ninline int XGBAPIHandleException(const dmlc::Error& e) {\n  XGBAPISetLastError(e.what());\n  return -1;\n}\n\n#define xgboost_CHECK_C_ARG_PTR(out_ptr)                      \\\n  do {                                                        \\\n    if (XGBOOST_EXPECT(!(out_ptr), false)) {                  \\\n      LOG(FATAL) << \"Invalid pointer argument: \" << #out_ptr; \\\n    }                                                         \\\n  } while (0)\n\n#endif  // XGBOOST_C_API_C_API_ERROR_H_\n"
  },
  {
    "path": "src/c_api/c_api_utils.h",
    "content": "/**\n * Copyright 2021-2024, XGBoost Contributors\n */\n#ifndef XGBOOST_C_API_C_API_UTILS_H_\n#define XGBOOST_C_API_C_API_UTILS_H_\n\n#include <algorithm>   // for min\n#include <cstddef>     // for size_t\n#include <functional>  // for multiplies\n#include <memory>      // for shared_ptr\n#include <numeric>     // for accumulate\n#include <string>      // for string\n#include <tuple>       // for make_tuple\n#include <utility>     // for move\n#include <vector>      // for vector\n\n#include \"../common/json_utils.h\"  // for TypeCheck\n#include \"xgboost/c_api.h\"\n#include \"xgboost/data.h\"         // DMatrix\n#include \"xgboost/feature_map.h\"  // for FeatureMap\n#include \"xgboost/json.h\"\n#include \"xgboost/learner.h\"\n#include \"xgboost/linalg.h\"  // ArrayInterfaceHandler, MakeTensorView, ArrayInterfaceStr\n#include \"xgboost/logging.h\"\n#include \"xgboost/string_view.h\"  // StringView\n\nnamespace xgboost {\n/* \\brief Determine the output shape of prediction.\n *\n * \\param strict_shape Whether should we reshape the output with consideration of groups\n *                     and forest.\n * \\param type         Prediction type\n * \\param rows         Input samples\n * \\param cols         Input features\n * \\param chunksize    Total elements of output / rows\n * \\param groups       Number of output groups from Learner\n * \\param rounds       end_iteration - beg_iteration\n * \\param out_shape    Output shape\n * \\param out_dim      Output dimension\n */\ninline void CalcPredictShape(bool strict_shape, PredictionType type, size_t rows, size_t cols,\n                             size_t chunksize, size_t groups, size_t rounds,\n                             std::vector<bst_ulong> *out_shape,\n                             xgboost::bst_ulong *out_dim) {\n  auto &shape = *out_shape;\n  if (type == PredictionType::kMargin && rows != 0) {\n    // When kValue is used, softmax can change the chunksize.\n    CHECK_EQ(chunksize, groups);\n  }\n\n  switch (type) {\n  case PredictionType::kValue:\n  case PredictionType::kMargin: {\n    if (chunksize == 1 && !strict_shape) {\n      *out_dim = 1;\n      shape.resize(*out_dim);\n      shape.front() = rows;\n    } else {\n      *out_dim = 2;\n      shape.resize(*out_dim);\n      shape.front() = rows;\n      // chunksize can be 1 if it's softmax\n      shape.back() = std::min(groups, chunksize);\n    }\n    break;\n  }\n  case PredictionType::kApproxContribution:\n  case PredictionType::kContribution: {\n    if (groups == 1 && !strict_shape) {\n      *out_dim = 2;\n      shape.resize(*out_dim);\n      shape.front() = rows;\n      shape.back() = cols + 1;\n    } else {\n      *out_dim = 3;\n      shape.resize(*out_dim);\n      shape[0] = rows;\n      shape[1] = groups;\n      shape[2] = cols + 1;\n    }\n    break;\n  }\n  case PredictionType::kApproxInteraction:\n  case PredictionType::kInteraction: {\n    if (groups == 1 && !strict_shape) {\n      *out_dim = 3;\n      shape.resize(*out_dim);\n      shape[0] = rows;\n      shape[1] = cols + 1;\n      shape[2] = cols + 1;\n    } else {\n      *out_dim = 4;\n      shape.resize(*out_dim);\n      shape[0] = rows;\n      shape[1] = groups;\n      shape[2] = cols + 1;\n      shape[3] = cols + 1;\n    }\n    break;\n  }\n  case PredictionType::kLeaf: {\n    if (strict_shape) {\n      shape.resize(4);\n      shape[0] = rows;\n      shape[1] = rounds;\n      shape[2] = groups;\n      auto forest = chunksize / (shape[1] * shape[2]);\n      forest = std::max(static_cast<decltype(forest)>(1), forest);\n      shape[3] = forest;\n      *out_dim = shape.size();\n    } else if (chunksize == 1) {\n      *out_dim = 1;\n      shape.resize(*out_dim);\n      shape.front() = rows;\n    } else {\n      *out_dim = 2;\n      shape.resize(*out_dim);\n      shape.front() = rows;\n      shape.back() = chunksize;\n    }\n    break;\n  }\n  default: {\n    LOG(FATAL) << \"Unknown prediction type:\" << static_cast<int>(type);\n  }\n  }\n  CHECK_EQ(\n      std::accumulate(shape.cbegin(), shape.cend(), static_cast<bst_ulong>(1), std::multiplies<>{}),\n      chunksize * rows);\n}\n\n// Reverse the ntree_limit in old prediction API.\ninline uint32_t GetIterationFromTreeLimit(uint32_t ntree_limit, Learner *learner) {\n  // On Python and R, `best_ntree_limit` is set to `best_iteration * num_parallel_tree`.\n  // To reverse it we just divide it by `num_parallel_tree`.\n  if (ntree_limit != 0) {\n    learner->Configure();\n    uint32_t num_parallel_tree = 0;\n\n    Json config{Object()};\n    learner->SaveConfig(&config);\n    auto const &booster = get<String const>(config[\"learner\"][\"gradient_booster\"][\"name\"]);\n    if (booster == \"gblinear\") {\n      num_parallel_tree = 0;\n    } else if (booster == \"dart\") {\n      num_parallel_tree =\n          std::stoi(get<String const>(config[\"learner\"][\"gradient_booster\"][\"gbtree\"]\n                                            [\"gbtree_model_param\"][\"num_parallel_tree\"]));\n    } else if (booster == \"gbtree\") {\n      num_parallel_tree = std::stoi(get<String const>(\n          (config[\"learner\"][\"gradient_booster\"][\"gbtree_model_param\"][\"num_parallel_tree\"])));\n    } else {\n      LOG(FATAL) << \"Unknown booster:\" << booster;\n    }\n    ntree_limit /= std::max(num_parallel_tree, 1u);\n  }\n  return ntree_limit;\n}\n\ninline float GetMissing(Json const &config) {\n  float missing;\n  auto const &obj = get<Object const>(config);\n  auto it = obj.find(\"missing\");\n  if (it == obj.cend()) {\n    LOG(FATAL) << \"Argument `missing` is required.\";\n  }\n\n  auto const &j_missing = it->second;\n  if (IsA<Number const>(j_missing)) {\n    missing = get<Number const>(j_missing);\n  } else if (IsA<Integer const>(j_missing)) {\n    missing = get<Integer const>(j_missing);\n  } else {\n    missing = nan(\"\");\n    TypeCheck<Number, Integer>(j_missing, \"missing\");\n  }\n  return missing;\n}\n\n// Safe guard some global variables from being changed by XGBoost.\nclass XGBoostAPIGuard {\n#if defined(XGBOOST_USE_CUDA)\n  std::int32_t device_id_ {0};\n\n  void SetGPUAttribute();\n  void RestoreGPUAttribute();\n#else\n  void SetGPUAttribute() {}\n  void RestoreGPUAttribute() {}\n#endif\n\n public:\n  XGBoostAPIGuard() {\n    SetGPUAttribute();\n  }\n  ~XGBoostAPIGuard() {\n    RestoreGPUAttribute();\n  }\n};\n\ninline FeatureMap LoadFeatureMap(std::string const& uri) {\n  FeatureMap feat;\n  if (uri.size() != 0) {\n    std::unique_ptr<dmlc::Stream> fs(dmlc::Stream::Create(uri.c_str(), \"r\"));\n    dmlc::istream is(fs.get());\n    feat.LoadText(is);\n  }\n  return feat;\n}\n\ninline void GenerateFeatureMap(Learner const *learner,\n                               std::vector<Json> const &custom_feature_names,\n                               size_t n_features, FeatureMap *out_feature_map) {\n  auto &feature_map = *out_feature_map;\n  auto maybe = [&](std::vector<std::string> const &values, size_t i,\n                   std::string const &dft) {\n    return values.empty() ? dft : values[i];\n  };\n  if (feature_map.Size() == 0) {\n    // Use the feature names and types from booster.\n    std::vector<std::string> feature_names;\n    // priority:\n    // 1. feature map.\n    // 2. customized feature name.\n    // 3. from booster\n    // 4. default feature name.\n    if (!custom_feature_names.empty()) {\n      CHECK_EQ(custom_feature_names.size(), n_features)\n          << \"Incorrect number of feature names.\";\n      feature_names.resize(custom_feature_names.size());\n      std::transform(custom_feature_names.begin(), custom_feature_names.end(),\n                     feature_names.begin(),\n                     [](Json const &name) { return get<String const>(name); });\n    } else {\n      learner->GetFeatureNames(&feature_names);\n    }\n    if (!feature_names.empty()) {\n      CHECK_EQ(feature_names.size(), n_features) << \"Incorrect number of feature names.\";\n    }\n\n    std::vector<std::string> feature_types;\n    learner->GetFeatureTypes(&feature_types);\n    if (!feature_types.empty()) {\n      CHECK_EQ(feature_types.size(), n_features) << \"Incorrect number of feature types.\";\n    }\n\n    for (size_t i = 0; i < n_features; ++i) {\n      feature_map.PushBack(\n          i,\n          maybe(feature_names, i, \"f\" + std::to_string(i)).data(),\n          maybe(feature_types, i, \"q\").data());\n    }\n  }\n  CHECK_EQ(feature_map.Size(), n_features);\n}\n\nvoid XGBBuildInfoDevice(Json* p_info);\n\n/**\n * \\brief Get shared ptr from DMatrix C handle with additional checks.\n */\ninline std::shared_ptr<DMatrix> CastDMatrixHandle(DMatrixHandle const handle) {\n  auto pp_m = static_cast<std::shared_ptr<DMatrix> *>(handle);\n  StringView msg{\"Invalid DMatrix handle\"};\n  CHECK(pp_m) << msg;\n  auto p_m = *pp_m;\n  CHECK(p_m) << msg;\n  return p_m;\n}\n\nnamespace detail {\ninline void EmptyHandle() {\n  LOG(FATAL) << \"DMatrix/Booster has not been initialized or has already been disposed.\";\n}\n\ninline xgboost::Context const *BoosterCtx(BoosterHandle handle) {\n  if (handle == nullptr) {\n    EmptyHandle();\n  }\n  auto *learner = static_cast<xgboost::Learner *>(handle);\n  CHECK(learner);\n  return learner->Ctx();\n}\n\ntemplate <typename PtrT, typename I, typename T>\nvoid MakeSparseFromPtr(PtrT const *p_indptr, I const *p_indices, T const *p_data,\n                       std::size_t nindptr, std::string *indptr_str, std::string *indices_str,\n                       std::string *data_str) {\n  auto ndata = static_cast<Integer::Int>(p_indptr[nindptr - 1]);\n  // Construct array interfaces\n  Json jindptr{Object{}};\n  Json jindices{Object{}};\n  Json jdata{Object{}};\n  CHECK(p_indptr);\n  jindptr[\"data\"] =\n      Array{std::vector<Json>{Json{reinterpret_cast<Integer::Int>(p_indptr)}, Json{true}}};\n  jindptr[\"shape\"] = std::vector<Json>{Json{nindptr}};\n  jindptr[\"version\"] = Integer{3};\n\n  CHECK(p_indices);\n  jindices[\"data\"] =\n      Array{std::vector<Json>{Json{reinterpret_cast<Integer::Int>(p_indices)}, Json{true}}};\n  jindices[\"shape\"] = std::vector<Json>{Json{ndata}};\n  jindices[\"version\"] = Integer{3};\n\n  CHECK(p_data);\n  jdata[\"data\"] =\n      Array{std::vector<Json>{Json{reinterpret_cast<Integer::Int>(p_data)}, Json{true}}};\n  jdata[\"shape\"] = std::vector<Json>{Json{ndata}};\n  jdata[\"version\"] = Integer{3};\n\n  std::string pindptr_typestr =\n      linalg::detail::ArrayInterfaceHandler::TypeChar<PtrT>() + std::to_string(sizeof(PtrT));\n  std::string ind_typestr =\n      linalg::detail::ArrayInterfaceHandler::TypeChar<I>() + std::to_string(sizeof(I));\n  std::string data_typestr =\n      linalg::detail::ArrayInterfaceHandler::TypeChar<T>() + std::to_string(sizeof(T));\n  if (DMLC_LITTLE_ENDIAN) {\n    jindptr[\"typestr\"] = String{\"<\" + pindptr_typestr};\n    jindices[\"typestr\"] = String{\"<\" + ind_typestr};\n    jdata[\"typestr\"] = String{\"<\" + data_typestr};\n  } else {\n    jindptr[\"typestr\"] = String{\">\" + pindptr_typestr};\n    jindices[\"typestr\"] = String{\">\" + ind_typestr};\n    jdata[\"typestr\"] = String{\">\" + data_typestr};\n  }\n\n  Json::Dump(jindptr, indptr_str);\n  Json::Dump(jindices, indices_str);\n  Json::Dump(jdata, data_str);\n}\n\n/**\n * @brief Make array interface for other language bindings.\n */\ntemplate <typename G, typename H>\nauto MakeGradientInterface(Context const *ctx, G const *grad, H const *hess, linalg::Order order,\n                           std::size_t n_samples, std::size_t n_targets) {\n  auto t_grad = linalg::MakeTensorView(ctx, order, common::Span{grad, n_samples * n_targets},\n                                       n_samples, n_targets);\n  auto t_hess = linalg::MakeTensorView(ctx, order, common::Span{hess, n_samples * n_targets},\n                                       n_samples, n_targets);\n  auto s_grad = linalg::ArrayInterfaceStr(t_grad);\n  auto s_hess = linalg::ArrayInterfaceStr(t_hess);\n  return std::make_tuple(s_grad, s_hess);\n}\n\ntemplate <typename G, typename H>\nstruct CustomGradHessOp {\n  linalg::MatrixView<G> t_grad;\n  linalg::MatrixView<H> t_hess;\n  linalg::MatrixView<GradientPair> d_gpair;\n\n  CustomGradHessOp(linalg::MatrixView<G> t_grad, linalg::MatrixView<H> t_hess,\n                   linalg::MatrixView<GradientPair> d_gpair)\n      : t_grad{std::move(t_grad)}, t_hess{std::move(t_hess)}, d_gpair{std::move(d_gpair)} {}\n\n  XGBOOST_DEVICE void operator()(std::size_t i) {\n    auto [m, n] = linalg::UnravelIndex(i, t_grad.Shape(0), t_grad.Shape(1));\n    auto g = t_grad(m, n);\n    auto h = t_hess(m, n);\n    // from struct of arrays to array of structs.\n    d_gpair(m, n) = GradientPair{static_cast<float>(g), static_cast<float>(h)};\n  }\n};\n}  // namespace detail\n}  // namespace xgboost\n#endif  // XGBOOST_C_API_C_API_UTILS_H_\n"
  },
  {
    "path": "src/c_api/coll_c_api.cc",
    "content": "/**\n * Copyright 2023-2026, XGBoost Contributors\n */\n#include <chrono>       // for seconds\n#include <future>       // for future\n#include <memory>       // for unique_ptr\n#include <string>       // for string\n#include <thread>       // for sleep_for\n#include <type_traits>  // for is_same_v, remove_pointer_t\n#include <utility>      // for pair\n\n#include \"../collective/allgather.h\"         // for Allgather\n#include \"../collective/allreduce.h\"         // for Allreduce\n#include \"../collective/broadcast.h\"         // for Broadcast\n#include \"../collective/comm.h\"              // for DefaultTimeoutSec\n#include \"../collective/comm_group.h\"        // for GlobalCommGroup\n#include \"../collective/communicator-inl.h\"  // for GetProcessorName\n#include \"../collective/tracker.h\"           // for RabitTracker\n#include \"../common/timer.h\"                 // for Timer\n#include \"c_api_error.h\"                     // for API_BEGIN\n#include \"xgboost/c_api.h\"\n#include \"xgboost/collective/result.h\"  // for Result\n#include \"xgboost/json.h\"               // for Json\n#include \"xgboost/string_view.h\"        // for StringView\n\n#if defined(XGBOOST_USE_FEDERATED)\n#include \"../../plugin/federated/federated_tracker.h\"  // for FederatedTracker\n#endif\n\nnamespace xgboost::collective {\nvoid Allreduce(void *send_receive_buffer, std::size_t count, std::int32_t data_type, int op) {\n  Context ctx;\n  DispatchDType(static_cast<ArrayInterfaceHandler::Type>(data_type), [&](auto t) {\n    using T = decltype(t);\n    auto data = linalg::MakeTensorView(\n        &ctx, common::Span{static_cast<T *>(send_receive_buffer), count}, count);\n    auto rc = Allreduce(&ctx, *GlobalCommGroup(), data, static_cast<Op>(op));\n    SafeColl(rc);\n  });\n}\n\nvoid Broadcast(void *send_receive_buffer, std::size_t size, int root) {\n  Context ctx;\n  auto rc = Broadcast(&ctx, *GlobalCommGroup(),\n                      linalg::MakeVec(static_cast<std::int8_t *>(send_receive_buffer), size), root);\n  SafeColl(rc);\n}\n\nvoid Allgather(void *send_receive_buffer, std::size_t size) {\n  Context ctx;\n  auto const &comm = GlobalCommGroup();\n  auto rc = Allgather(&ctx, *comm,\n                      linalg::MakeVec(reinterpret_cast<std::int8_t *>(send_receive_buffer), size));\n  SafeColl(rc);\n}\n}  // namespace xgboost::collective\n\nusing namespace xgboost;  // NOLINT\n\nnamespace {\nusing TrackerHandleT =\n    std::pair<std::shared_ptr<collective::Tracker>, std::shared_future<collective::Result>>;\n\nTrackerHandleT *GetTrackerHandle(TrackerHandle handle) {\n  xgboost_CHECK_C_ARG_PTR(handle);\n  auto *ptr = static_cast<TrackerHandleT *>(handle);\n  CHECK(ptr);\n  return ptr;\n}\n\nstruct CollAPIEntry {\n  std::string ret_str;\n};\nusing CollAPIThreadLocalStore = dmlc::ThreadLocalStore<CollAPIEntry>;\n\nvoid WaitImpl(TrackerHandleT *ptr, std::chrono::seconds timeout) {\n  constexpr std::int64_t kDft{collective::DefaultTimeoutSec()};\n  std::int64_t timeout_clipped = kDft;\n  if (collective::HasTimeout(timeout)) {\n    timeout_clipped = std::min(kDft, static_cast<std::int64_t>(timeout.count()));\n  }\n  std::chrono::seconds wait_for{timeout_clipped};\n\n  common::Timer timer;\n  timer.Start();\n\n  auto ref = ptr->first;  // hold a reference to that free don't delete it while waiting.\n\n  auto fut = ptr->second;\n  while (fut.valid()) {\n    auto res = fut.wait_for(wait_for);\n    CHECK(res != std::future_status::deferred);\n\n    if (res == std::future_status::ready) {\n      auto const &rc = ptr->second.get();\n      collective::SafeColl(rc);\n      break;\n    }\n\n    if (timer.Duration() > timeout && collective::HasTimeout(timeout)) {\n      collective::SafeColl(collective::Fail(\"Timeout waiting for the tracker.\"));\n    }\n  }\n}\n}  // namespace\n\nXGB_DLL int XGTrackerCreate(char const *config, TrackerHandle *handle) {\n  API_BEGIN_UNGUARD();\n  xgboost_CHECK_C_ARG_PTR(config);\n\n  Json jconfig = Json::Load(config);\n\n  auto type = RequiredArg<String>(jconfig, \"dmlc_communicator\", __func__);\n  std::shared_ptr<collective::Tracker> tptr;\n  if (type == \"federated\") {\n#if defined(XGBOOST_USE_FEDERATED)\n    tptr = std::make_shared<collective::FederatedTracker>(jconfig);\n#else\n    LOG(FATAL) << error::NoFederated();\n#endif  // defined(XGBOOST_USE_FEDERATED)\n  } else if (type == \"rabit\") {\n    tptr = std::make_shared<collective::RabitTracker>(jconfig);\n  } else {\n    LOG(FATAL) << \"Unknown communicator:\" << type;\n  }\n\n  auto ptr = new TrackerHandleT{std::move(tptr), std::future<collective::Result>{}};\n  static_assert(std::is_same_v<std::remove_pointer_t<decltype(ptr)>, TrackerHandleT>);\n\n  xgboost_CHECK_C_ARG_PTR(handle);\n  *handle = ptr;\n  API_END();\n}\n\nXGB_DLL int XGTrackerWorkerArgs(TrackerHandle handle, char const **args) {\n  API_BEGIN_UNGUARD();\n  auto *ptr = GetTrackerHandle(handle);\n  auto &local = *CollAPIThreadLocalStore::Get();\n  local.ret_str = Json::Dump(ptr->first->WorkerArgs());\n  xgboost_CHECK_C_ARG_PTR(args);\n  *args = local.ret_str.c_str();\n  API_END();\n}\n\nXGB_DLL int XGTrackerRun(TrackerHandle handle, char const *) {\n  API_BEGIN_UNGUARD();\n  auto *ptr = GetTrackerHandle(handle);\n  CHECK(!ptr->second.valid()) << \"Tracker is already running.\";\n  ptr->second = ptr->first->Run();\n  API_END();\n}\n\nXGB_DLL int XGTrackerWaitFor(TrackerHandle handle, char const *config) {\n  API_BEGIN_UNGUARD();\n  auto *ptr = GetTrackerHandle(handle);\n  xgboost_CHECK_C_ARG_PTR(config);\n  auto jconfig = Json::Load(StringView{config});\n  // Internally, 0 indicates no timeout, which is the default since we don't want to\n  // interrupt the model training.\n  xgboost_CHECK_C_ARG_PTR(config);\n  auto timeout = OptionalArg<Integer>(jconfig, \"timeout\", std::int64_t{0});\n  WaitImpl(ptr, std::chrono::seconds{timeout});\n  API_END();\n}\n\nXGB_DLL int XGTrackerFree(TrackerHandle handle) {\n  API_BEGIN_UNGUARD();\n  using namespace std::chrono_literals;  // NOLINT\n  auto *ptr = GetTrackerHandle(handle);\n  ptr->first->Stop();\n  // The wait is not necessary since we just called stop, just reusing the function to do\n  // any potential cleanups.\n  WaitImpl(ptr, ptr->first->Timeout());\n  common::Timer timer;\n  timer.Start();\n  // Make sure no one else is waiting on the tracker.\n\n  // Quote from https://en.cppreference.com/w/cpp/memory/shared_ptr/use_count#Notes:\n  //\n  // In multithreaded environment, `use_count() == 1` does not imply that the object is\n  // safe to modify because accesses to the managed object by former shared owners may not\n  // have completed, and because new shared owners may be introduced concurrently.\n  //\n  // - We don't have the first case since we never access the raw pointer.\n  //\n  // - We don't have the second case for most of the scenarios since tracker is an unique\n  //   object, if the free function is called before another function calls, it's likely\n  //   to be a bug in the user code. The use_count should only decrease in this function.\n  while (ptr->first.use_count() != 1) {\n    auto ela = timer.Duration().count();\n    if (collective::HasTimeout(ptr->first->Timeout()) && ela > ptr->first->Timeout().count()) {\n      LOG(WARNING) << \"Time out \" << ptr->first->Timeout().count()\n                   << \" seconds reached for TrackerFree, killing the tracker.\";\n      break;\n    }\n    std::this_thread::sleep_for(64ms);\n  }\n  delete ptr;\n  API_END();\n}\n\nXGB_DLL int XGCommunicatorInit(char const *json_config) {\n  API_BEGIN();\n  xgboost_CHECK_C_ARG_PTR(json_config);\n  Json config{Json::Load(StringView{json_config})};\n  collective::GlobalCommGroupInit(config);\n  API_END();\n}\n\nXGB_DLL int XGCommunicatorFinalize(void) {\n  API_BEGIN();\n  collective::GlobalCommGroupFinalize();\n  API_END();\n}\n\nXGB_DLL int XGCommunicatorGetRank(void) {\n  API_BEGIN();\n  return collective::GetRank();\n  API_END();\n}\n\nXGB_DLL int XGCommunicatorGetWorldSize(void) { return collective::GetWorldSize(); }\n\nXGB_DLL int XGCommunicatorIsDistributed(void) { return collective::IsDistributed(); }\n\nXGB_DLL int XGCommunicatorPrint(char const *message) {\n  API_BEGIN();\n  collective::Print(message);\n  API_END();\n}\n\nXGB_DLL int XGCommunicatorGetProcessorName(char const **name_str) {\n  API_BEGIN();\n  auto &local = *CollAPIThreadLocalStore::Get();\n  local.ret_str = collective::GetProcessorName();\n  xgboost_CHECK_C_ARG_PTR(name_str);\n  *name_str = local.ret_str.c_str();\n  API_END();\n}\n\nXGB_DLL int XGCommunicatorBroadcast(void *send_receive_buffer, size_t size, int root) {\n  API_BEGIN();\n  collective::Broadcast(send_receive_buffer, size, root);\n  API_END();\n}\n\nXGB_DLL int XGCommunicatorAllreduce(void *send_receive_buffer, size_t count, int enum_dtype,\n                                    int enum_op) {\n  API_BEGIN();\n  collective::Allreduce(send_receive_buffer, count, enum_dtype, enum_op);\n  API_END();\n}\n\n// Not exposed to the public since the previous implementation didn't and we don't want to\n// add unnecessary communicator API to a machine learning library.\nXGB_DLL int XGCommunicatorAllgather(void *send_receive_buffer, size_t count) {\n  API_BEGIN();\n  collective::Allgather(send_receive_buffer, count);\n  API_END();\n}\n\n// Not yet exposed to the public, error recovery is still WIP.\nXGB_DLL int XGCommunicatorSignalError() {\n  API_BEGIN();\n  auto msg = XGBGetLastError();\n  SafeColl(xgboost::collective::GlobalCommGroup()->SignalError(xgboost::collective::Fail(msg)));\n  API_END()\n}\n"
  },
  {
    "path": "src/collective/aggregator.cuh",
    "content": "/**\n * Copyright 2023-2024, XGBoost contributors\n *\n * Higher level functions built on top the Communicator API, taking care of behavioral differences\n * between row-split vs column-split distributed training, and horizontal vs vertical federated\n * learning.\n */\n#pragma once\n#include <xgboost/data.h>\n\n#include <limits>\n#include <string>\n#include <utility>\n#include <vector>\n\n#include \"allreduce.h\"\n#include \"xgboost/collective/result.h\"  // for Result\n\nnamespace xgboost::collective {\n\n/**\n * @brief Find the global sum of the given values across all workers.\n *\n * This only applies when the data is split row-wise (horizontally). When data is split\n * column-wise (vertically), the original values are returned.\n *\n * @tparam T The type of the values.\n *\n * @param info MetaInfo about the DMatrix.\n * @param values Pointer to the inputs to sum.\n * @param size Number of values to sum.\n */\ntemplate <typename T, std::int32_t kDim>\n[[nodiscard]] Result GlobalSum(Context const* ctx, MetaInfo const& info,\n                               linalg::TensorView<T, kDim> values) {\n  if (info.IsRowSplit()) {\n    return collective::Allreduce(ctx, values, collective::Op::kSum);\n  }\n  return Success();\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "src/collective/aggregator.h",
    "content": "/**\n * Copyright 2023-2024, XGBoost contributors\n *\n * Higher level functions built on top the Communicator API, taking care of behavioral differences\n * between row-split vs column-split distributed training, and horizontal vs vertical federated\n * learning.\n */\n#pragma once\n#include <limits>\n#include <string>\n#include <utility>\n\n#include \"allreduce.h\"\n#include \"broadcast.h\"\n#include \"comm.h\"\n#include \"communicator-inl.h\"\n#include \"xgboost/collective/result.h\"  // for Result\n#include \"xgboost/data.h\"               // for MetaINfo\n\nnamespace xgboost::collective {\nnamespace detail {\ntemplate <typename Fn>\n[[nodiscard]] Result TryApplyWithLabels(Context const* ctx, Fn&& fn) {\n  std::string msg;\n  if (collective::GetRank() == 0) {\n    try {\n      fn();\n    } catch (dmlc::Error const& e) {\n      msg = e.what();\n    }\n  }\n  std::size_t msg_size{msg.size()};\n  auto rc = Success() << [&] {\n    auto rc = collective::Broadcast(ctx, linalg::MakeVec(&msg_size, 1), 0);\n    return rc;\n  } << [&] {\n    if (msg_size > 0) {\n      msg.resize(msg_size);\n      return collective::Broadcast(ctx, linalg::MakeVec(msg.data(), msg.size()), 0);\n    }\n    return Success();\n  } << [&] {\n    if (msg_size > 0) {\n      LOG(FATAL) << msg;\n    }\n    return Success();\n  };\n  return rc;\n}\n}  // namespace detail\n\n/**\n * @brief Apply the given function where the labels are.\n *\n * Normally all the workers have access to the labels, so the function is just applied locally. In\n * vertical federated learning, we assume labels are only available on worker 0, so the function is\n * applied there, with the results broadcast to other workers.\n *\n * @tparam Function The function used to calculate the results.\n * @param info MetaInfo about the DMatrix.\n * @param buffer The buffer storing the results.\n * @param size The size of the buffer.\n * @param function The function used to calculate the results.\n */\ntemplate <typename Fn>\nvoid ApplyWithLabels(Context const* ctx, MetaInfo const& info, void* buffer, std::size_t size,\n                     Fn&& fn) {\n  if (info.IsVerticalFederated()) {\n    auto rc = detail::TryApplyWithLabels(ctx, fn) << [&] {\n      // We assume labels are only available on worker 0, so the calculation is done there and\n      // result broadcast to other workers.\n      return collective::Broadcast(\n          ctx, linalg::MakeVec(reinterpret_cast<std::int8_t*>(buffer), size), 0);\n    };\n    SafeColl(rc);\n  } else {\n    std::forward<Fn>(fn)();\n  }\n}\n\n/**\n * @brief Apply the given function where the labels are.\n *\n * Normally all the workers have access to the labels, so the function is just applied locally. In\n * vertical federated learning, we assume labels are only available on worker 0, so the function is\n * applied there, with the results broadcast to other workers.\n *\n * @tparam T Type of the HostDeviceVector storing the results.\n * @tparam Function The function used to calculate the results.\n * @param info MetaInfo about the DMatrix.\n * @param result The HostDeviceVector storing the results.\n * @param function The function used to calculate the results.\n */\ntemplate <typename T, typename Fn>\nvoid ApplyWithLabels(Context const* ctx, MetaInfo const& info, HostDeviceVector<T>* result,\n                     Fn&& fn) {\n  if (info.IsVerticalFederated()) {\n    // We assume labels are only available on worker 0, so the calculation is done there and result\n    // broadcast to other workers.\n    auto rc = detail::TryApplyWithLabels(ctx, fn);\n\n    std::size_t size{result->Size()};\n    rc = std::move(rc) << [&] {\n      return collective::Broadcast(ctx, linalg::MakeVec(&size, 1), 0);\n    } << [&] {\n      result->Resize(size);\n      return collective::Broadcast(ctx, linalg::MakeVec(result->HostPointer(), size), 0);\n    };\n    SafeColl(rc);\n  } else {\n    std::forward<Fn>(fn)();\n  }\n}\n\n/**\n * @brief Find the global max of the given value across all workers.\n *\n * This only applies when the data is split row-wise (horizontally). When data is split\n * column-wise (vertically), the local value is returned.\n *\n * @tparam T The type of the value.\n * @param info MetaInfo about the DMatrix.\n * @param value The input for finding the global max.\n * @return The global max of the input.\n */\ntemplate <typename T>\nstd::enable_if_t<std::is_trivially_copy_assignable_v<T>, T> GlobalMax(Context const* ctx,\n                                                                      MetaInfo const& info,\n                                                                      T value) {\n  if (info.IsRowSplit()) {\n    auto rc = collective::Allreduce(ctx, linalg::MakeVec(&value, 1), collective::Op::kMax);\n    SafeColl(rc);\n  }\n  return value;\n}\n\ntemplate <typename T, std::int32_t kDim>\n[[nodiscard]] Result GlobalSum(Context const* ctx, bool is_column_split,\n                               linalg::TensorView<T, kDim> values) {\n  if (!is_column_split) {\n    return collective::Allreduce(ctx, values, collective::Op::kSum);\n  }\n  return Success();\n}\n\n/**\n * @brief Find the global sum of the given values across all workers.\n *\n * This only applies when the data is split row-wise (horizontally). When data is split\n * column-wise (vertically), the original values are returned.\n *\n * @tparam T The type of the values.\n * @param info MetaInfo about the DMatrix.\n * @param values Pointer to the inputs to sum.\n * @param size Number of values to sum.\n */\ntemplate <typename T, std::int32_t kDim>\n[[nodiscard]] Result GlobalSum(Context const* ctx, MetaInfo const& info,\n                               linalg::TensorView<T, kDim> values) {\n  return GlobalSum(ctx, info.IsColumnSplit(), values);\n}\n\ntemplate <typename T>\n[[nodiscard]] Result GlobalSum(Context const* ctx, MetaInfo const& info,\n                               linalg::VectorView<T> values, double* sum_weight) {\n  if (info.IsColumnSplit()) {\n    return Success();\n  }\n  auto status = Success() << [&] {\n    return Allreduce(ctx, sum_weight, collective::Op::kSum);\n  } << [&] {\n    return Allreduce(ctx, values, collective::Op::kSum);\n  };\n  return status;\n}\n\n/**\n * @brief Find the global ratio of the given two values across all workers.\n *\n * This only applies when the data is split row-wise (horizontally). When data is split\n * column-wise (vertically), the local ratio is returned.\n *\n * @tparam T The type of the values.\n * @param info MetaInfo about the DMatrix.\n * @param dividend The dividend of the ratio.\n * @param divisor The divisor of the ratio.\n * @return The global ratio of the two inputs.\n */\ntemplate <typename T>\nT GlobalRatio(Context const* ctx, MetaInfo const& info, T dividend, T divisor) {\n  std::array<T, 2> results{dividend, divisor};\n  auto rc = GlobalSum(ctx, info, linalg::MakeVec(results.data(), results.size()));\n  SafeColl(rc);\n  std::tie(dividend, divisor) = std::tuple_cat(results);\n  if (divisor <= 0) {\n    return std::numeric_limits<T>::quiet_NaN();\n  } else {\n    return dividend / divisor;\n  }\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "src/collective/allgather.cc",
    "content": "/**\n * Copyright 2023-2024, XGBoost Contributors\n */\n#include \"allgather.h\"\n\n#include <algorithm>  // for min, copy_n, fill_n\n#include <cstddef>    // for size_t\n#include <cstdint>    // for int8_t, int32_t, int64_t\n#include <memory>     // for shared_ptr\n#include <utility>    // for move\n\n#include \"broadcast.h\"\n#include \"comm.h\"                       // for Comm, Channel\n#include \"xgboost/collective/result.h\"  // for Result\n#include \"xgboost/span.h\"               // for Span\n\nnamespace xgboost::collective {\nnamespace cpu_impl {\nResult RingAllgather(Comm const& comm, common::Span<std::int8_t> data, std::size_t segment_size,\n                     std::int32_t worker_off, std::shared_ptr<Channel> prev_ch,\n                     std::shared_ptr<Channel> next_ch) {\n  auto world = comm.World();\n  auto rank = comm.Rank();\n  CHECK_LT(worker_off, world);\n  if (world == 1) {\n    return Success();\n  }\n\n  for (std::int32_t r = 0; r < world; ++r) {\n    auto rc = Success() << [&] {\n      auto send_rank = (rank + world - r + worker_off) % world;\n      auto send_off = send_rank * segment_size;\n      bool is_last_segment = send_rank == (world - 1);\n      auto send_nbytes = is_last_segment ? (data.size_bytes() - send_off) : segment_size;\n      auto send_seg = data.subspan(send_off, send_nbytes);\n      CHECK_NE(send_seg.size(), 0);\n      return next_ch->SendAll(send_seg.data(), send_seg.size_bytes());\n    } << [&] {\n      auto recv_rank = (rank + world - r - 1 + worker_off) % world;\n      auto recv_off = recv_rank * segment_size;\n      bool is_last_segment = recv_rank == (world - 1);\n      auto recv_nbytes = is_last_segment ? (data.size_bytes() - recv_off) : segment_size;\n      auto recv_seg = data.subspan(recv_off, recv_nbytes);\n      CHECK_NE(recv_seg.size(), 0);\n      return prev_ch->RecvAll(recv_seg.data(), recv_seg.size_bytes());\n    } << [&] {\n      return comm.Block();\n    };\n    if (!rc.OK()) {\n      return Fail(\"Ring allgather failed, current iteration:\" + std::to_string(r), std::move(rc));\n    }\n  }\n\n  return Success();\n}\n\nResult BroadcastAllgatherV(Comm const& comm, common::Span<std::int64_t const> sizes,\n                           common::Span<std::int8_t> recv) {\n  std::size_t offset = 0;\n  for (std::int32_t r = 0; r < comm.World(); ++r) {\n    auto as_bytes = sizes[r];\n    auto rc = Broadcast(comm, recv.subspan(offset, as_bytes), r);\n    if (!rc.OK()) {\n      return Fail(\"Broadcast AllgatherV failed, current iteration:\" + std::to_string(r),\n                  std::move(rc));\n    }\n    offset += as_bytes;\n  }\n  return Success();\n}\n}  // namespace cpu_impl\n\nnamespace detail {\n[[nodiscard]] Result RingAllgatherV(Comm const& comm, common::Span<std::int64_t const> sizes,\n                                    common::Span<std::int64_t const> offset,\n                                    common::Span<std::int8_t> erased_result) {\n  auto world = comm.World();\n  if (world == 1) {\n    return Success();\n  }\n  auto rank = comm.Rank();\n\n  auto prev = BootstrapPrev(rank, comm.World());\n  auto next = BootstrapNext(rank, comm.World());\n\n  auto prev_ch = comm.Chan(prev);\n  auto next_ch = comm.Chan(next);\n\n  for (std::int32_t r = 0; r < world; ++r) {\n    auto rc = Success() << [&] {\n      auto send_rank = (rank + world - r) % world;\n      auto send_off = offset[send_rank];\n      auto send_size = sizes[send_rank];\n      auto send_seg = erased_result.subspan(send_off, send_size);\n      return next_ch->SendAll(send_seg);\n    } << [&] {\n      auto recv_rank = (rank + world - r - 1) % world;\n      auto recv_off = offset[recv_rank];\n      auto recv_size = sizes[recv_rank];\n      auto recv_seg = erased_result.subspan(recv_off, recv_size);\n      return prev_ch->RecvAll(recv_seg.data(), recv_seg.size_bytes());\n    } << [&] {\n      return prev_ch->Block();\n    };\n    if (!rc.OK()) {\n      return Fail(\"Ring AllgatherV failed, current iterataion:\" + std::to_string(r), std::move(rc));\n    }\n  }\n  return comm.Block();\n}\n}  // namespace detail\n\n[[nodiscard]] std::vector<std::vector<char>> VectorAllgatherV(\n    Context const* ctx, CommGroup const& comm, std::vector<std::vector<char>> const& input) {\n  auto n_inputs = input.size();\n  std::vector<std::int64_t> sizes(n_inputs);\n  std::transform(input.cbegin(), input.cend(), sizes.begin(),\n                 [](auto const& vec) { return vec.size(); });\n\n  std::vector<std::int64_t> recv_segments(comm.World() + 1, 0);\n\n  HostDeviceVector<std::int8_t> recv;\n  auto rc =\n      AllgatherV(ctx, comm, linalg::MakeVec(sizes.data(), sizes.size()), &recv_segments, &recv);\n  SafeColl(rc);\n\n  auto global_sizes = common::RestoreType<std::int64_t const>(recv.ConstHostSpan());\n  std::vector<std::int64_t> offset(global_sizes.size() + 1);\n  offset[0] = 0;\n  for (std::size_t i = 1; i < offset.size(); i++) {\n    offset[i] = offset[i - 1] + global_sizes[i - 1];\n  }\n\n  std::vector<char> collected;\n  for (auto const& vec : input) {\n    collected.insert(collected.end(), vec.cbegin(), vec.cend());\n  }\n  rc = AllgatherV(ctx, comm, linalg::MakeVec(collected.data(), collected.size()), &recv_segments,\n                  &recv);\n  SafeColl(rc);\n  auto out = common::RestoreType<char const>(recv.ConstHostSpan());\n\n  std::vector<std::vector<char>> result;\n  for (std::size_t i = 1; i < offset.size(); ++i) {\n    std::vector<char> local(out.cbegin() + offset[i - 1], out.cbegin() + offset[i]);\n    result.emplace_back(std::move(local));\n  }\n  return result;\n}\n\n[[nodiscard]] std::vector<std::vector<char>> VectorAllgatherV(\n    Context const* ctx, std::vector<std::vector<char>> const& input) {\n  return VectorAllgatherV(ctx, *GlobalCommGroup(), input);\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "src/collective/allgather.h",
    "content": "/**\n * Copyright 2023-2026, XGBoost Contributors\n */\n#pragma once\n#include <cstddef>      // for size_t\n#include <cstdint>      // for int32_t\n#include <memory>       // for shared_ptr\n#include <numeric>      // for accumulate\n#include <string>       // for string\n#include <type_traits>  // for remove_cv_t\n#include <vector>       // for vector\n\n#include \"../common/type.h\"             // for EraseType\n#include \"comm.h\"                       // for Comm, Channel\n#include \"comm_group.h\"                 // for CommGroup\n#include \"topo.h\"                       // for BootstrapNext, BootstrapPrev\n#include \"xgboost/collective/result.h\"  // for Result\n#include \"xgboost/linalg.h\"             // for MakeVec\n#include \"xgboost/span.h\"               // for Span\n\nnamespace xgboost::collective {\nnamespace cpu_impl {\n/**\n * @param worker_off Segment offset. For example, if the rank 2 worker specifies\n *                   worker_off = 1, then it owns the third segment (2 + 1).\n */\n[[nodiscard]] Result RingAllgather(Comm const& comm, common::Span<std::int8_t> data,\n                                   std::size_t segment_size, std::int32_t worker_off,\n                                   std::shared_ptr<Channel> prev_ch,\n                                   std::shared_ptr<Channel> next_ch);\n\n/**\n * @brief Implement allgather-v using broadcast.\n *\n * https://arxiv.org/abs/1812.05964\n */\nResult BroadcastAllgatherV(Comm const& comm, common::Span<std::int64_t const> sizes,\n                           common::Span<std::int8_t> recv);\n}  // namespace cpu_impl\n\nnamespace detail {\ninline void AllgatherVOffset(common::Span<std::int64_t const> sizes,\n                             common::Span<std::int64_t> offset) {\n  // get worker offset\n  std::fill_n(offset.data(), offset.size(), 0);\n  std::partial_sum(sizes.cbegin(), sizes.cend(), offset.begin() + 1);\n  CHECK_EQ(*offset.cbegin(), 0);\n}\n\n// An implementation that's used by both cpu and gpu\n[[nodiscard]] Result RingAllgatherV(Comm const& comm, common::Span<std::int64_t const> sizes,\n                                    common::Span<std::int64_t const> offset,\n                                    common::Span<std::int8_t> erased_result);\n}  // namespace detail\n\ntemplate <typename T>\n[[nodiscard]] Result RingAllgather(Comm const& comm, common::Span<T> data) {\n  // This function is also used for ring allreduce, hence we allow the last segment to be\n  // larger due to round-down.\n  auto n_bytes_per_segment = data.size_bytes() / comm.World();\n  auto erased = common::EraseType(data);\n\n  auto rank = comm.Rank();\n  auto prev = BootstrapPrev(rank, comm.World());\n  auto next = BootstrapNext(rank, comm.World());\n\n  auto prev_ch = comm.Chan(prev);\n  auto next_ch = comm.Chan(next);\n  auto rc = cpu_impl::RingAllgather(comm, erased, n_bytes_per_segment, 0, prev_ch, next_ch);\n  if (!rc.OK()) {\n    return rc;\n  }\n  return comm.Block();\n}\n\ntemplate <typename T>\n[[nodiscard]] Result RingAllgatherV(Comm const& comm, common::Span<T> data,\n                                    std::vector<std::remove_cv_t<T>>* p_out) {\n  auto world = comm.World();\n  auto rank = comm.Rank();\n\n  std::vector<std::int64_t> sizes(world, 0);\n  sizes[rank] = data.size_bytes();\n  auto rc = RingAllgather(comm, common::Span{sizes.data(), sizes.size()});\n  if (!rc.OK()) {\n    return rc;\n  }\n\n  std::vector<T>& result = *p_out;\n  auto n_total_bytes = std::accumulate(sizes.cbegin(), sizes.cend(), 0);\n  result.resize(n_total_bytes / sizeof(T));\n  auto h_result = common::Span{result.data(), result.size()};\n  auto erased_result = common::EraseType(h_result);\n  auto erased_data = common::EraseType(data);\n  std::vector<std::int64_t> recv_segments(world + 1);\n  auto s_segments = common::Span{recv_segments.data(), recv_segments.size()};\n\n  // get worker offset\n  detail::AllgatherVOffset(sizes, s_segments);\n  // copy data\n  auto current = erased_result.subspan(recv_segments[rank], data.size_bytes());\n  std::copy_n(erased_data.data(), erased_data.size(), current.data());\n\n  return detail::RingAllgatherV(comm, sizes, s_segments, erased_result);\n}\n\ntemplate <typename T>\n[[nodiscard]] Result Allgather(Context const* ctx, CommGroup const& comm,\n                               linalg::VectorView<T> data) {\n  if (!comm.IsDistributed()) {\n    return Success();\n  }\n  CHECK(data.Contiguous());\n  auto erased = common::EraseType(data.Values());\n\n  auto const& cctx = comm.Ctx(ctx, data.Device());\n  auto backend = comm.Backend(data.Device());\n  return backend->Allgather(cctx, erased);\n}\n\n/**\n * @brief Gather all data from all workers.\n *\n * @param data The input and output buffer, needs to be pre-allocated by the caller.\n */\ntemplate <typename T>\n[[nodiscard]] Result Allgather(Context const* ctx, linalg::VectorView<T> data) {\n  auto const& cg = *GlobalCommGroup();\n  if (data.Size() % cg.World() != 0) {\n    return Fail(\"The total number of elements should be multiple of the number of workers.\");\n  }\n  return Allgather(ctx, cg, data);\n}\n\ntemplate <typename T>\n[[nodiscard]] Result AllgatherV(Context const* ctx, CommGroup const& comm,\n                                linalg::VectorView<T> data,\n                                std::vector<std::int64_t>* recv_segments,\n                                HostDeviceVector<std::int8_t>* recv) {\n  if (!comm.IsDistributed()) {\n    return Success();\n  }\n  std::vector<std::int64_t> sizes(comm.World(), 0);\n  sizes[comm.Rank()] = data.Values().size_bytes();\n  auto erased_sizes = common::EraseType(common::Span{sizes.data(), sizes.size()});\n  auto rc =\n      comm.Backend(DeviceOrd::CPU())->Allgather(comm.Ctx(ctx, DeviceOrd::CPU()), erased_sizes);\n  if (!rc.OK()) {\n    return rc;\n  }\n\n  recv_segments->resize(sizes.size() + 1);\n  detail::AllgatherVOffset(sizes, common::Span{recv_segments->data(), recv_segments->size()});\n  auto total_bytes = std::accumulate(sizes.cbegin(), sizes.cend(), 0LL);\n  recv->SetDevice(data.Device());\n  recv->Resize(total_bytes);\n\n  auto s_segments = common::Span{recv_segments->data(), recv_segments->size()};\n\n  auto backend = comm.Backend(data.Device());\n  auto erased = common::EraseType(data.Values());\n\n  return backend->AllgatherV(\n      comm.Ctx(ctx, data.Device()), erased, common::Span{sizes.data(), sizes.size()}, s_segments,\n      data.Device().IsCUDA() ? recv->DeviceSpan() : recv->HostSpan(),\n      data.Device().IsCUDA() ? AllgatherVAlgo::kBcast : AllgatherVAlgo::kRing);\n}\n\n/**\n * @brief Allgather with variable length data.\n *\n * @param data The input data.\n * @param recv_segments segment size for each worker.  [0, 2, 5] means [0, 2) elements are\n *                      from the first worker, [2, 5) elements are from the second one.\n * @param recv The buffer storing the result.\n */\ntemplate <typename T>\n[[nodiscard]] Result AllgatherV(Context const* ctx, linalg::VectorView<T> data,\n                                std::vector<std::int64_t>* recv_segments,\n                                HostDeviceVector<std::int8_t>* recv) {\n  return AllgatherV(ctx, *GlobalCommGroup(), data, recv_segments, recv);\n}\n\n[[nodiscard]] std::vector<std::vector<char>> VectorAllgatherV(\n    Context const* ctx, CommGroup const& comm, std::vector<std::vector<char>> const& input);\n\n/**\n * @brief Gathers variable-length data from all processes and distributes it to all processes.\n *\n * @param inputs All the inputs from the local worker. The number of inputs can vary\n *               across different workers. Along with which, the size of each vector in\n *               the input can also vary.\n *\n * @return The AllgatherV result, containing vectors from all workers.\n */\n[[nodiscard]] std::vector<std::vector<char>> VectorAllgatherV(\n    Context const* ctx, std::vector<std::vector<char>> const& input);\n\n/**\n * @brief Gathers variable-length strings from all processes and distributes them to all processes.\n * @param input Variable-length list of variable-length strings.\n */\n[[nodiscard]] inline Result AllgatherStrings(std::vector<std::string> const& input,\n                                             std::vector<std::string>* p_result) {\n  std::vector<std::vector<char>> inputs(input.size());\n  for (std::size_t i = 0; i < input.size(); ++i) {\n    inputs[i] = {input[i].cbegin(), input[i].cend()};\n  }\n  Context ctx;\n  auto out = VectorAllgatherV(&ctx, *GlobalCommGroup(), inputs);\n  auto& result = *p_result;\n  result.resize(out.size());\n  for (std::size_t i = 0; i < out.size(); ++i) {\n    result[i] = {out[i].cbegin(), out[i].cend()};\n  }\n  return Success();\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "src/collective/allreduce.cc",
    "content": "/**\n * Copyright 2023-2024, XGBoost Contributors\n */\n#include \"allreduce.h\"\n\n#include <algorithm>  // for min\n#include <cstddef>    // for size_t\n#include <cstdint>    // for int32_t, int8_t\n#include <utility>    // for move\n#include <vector>     // for vector\n\n#include \"../data/array_interface.h\"    // for Type, DispatchDType\n#include \"allgather.h\"                  // for RingAllgather\n#include \"comm.h\"                       // for Comm\n#include \"xgboost/collective/result.h\"  // for Result\n#include \"xgboost/span.h\"               // for Span\n\nnamespace xgboost::collective::cpu_impl {\nnamespace {\ntemplate <typename T>\nResult RingAllreduceSmall(Comm const& comm, common::Span<std::int8_t> data, Func const& op) {\n  auto rank = comm.Rank();\n  auto world = comm.World();\n\n  auto next_ch = comm.Chan(BootstrapNext(rank, world));\n  auto prev_ch = comm.Chan(BootstrapPrev(rank, world));\n\n  std::vector<std::int8_t> buffer(data.size_bytes() * world, 0);\n  auto s_buffer = common::Span{buffer.data(), buffer.size()};\n\n  auto offset = data.size_bytes() * rank;\n  auto self = s_buffer.subspan(offset, data.size_bytes());\n  std::copy_n(data.data(), data.size_bytes(), self.data());\n\n  auto typed = common::RestoreType<T>(s_buffer);\n  auto rc = RingAllgather(comm, typed);\n\n  if (!rc.OK()) {\n    return Fail(\"Ring allreduce small failed.\", std::move(rc));\n  }\n  auto first = s_buffer.subspan(0, data.size_bytes());\n  CHECK_EQ(first.size(), data.size());\n\n  for (std::int32_t r = 1; r < world; ++r) {\n    auto offset = data.size_bytes() * r;\n    auto buf = s_buffer.subspan(offset, data.size_bytes());\n    op(buf, first);\n  }\n  std::copy_n(first.data(), first.size(), data.data());\n\n  return Success();\n}\n}  // namespace\n\ntemplate <typename T>\n// note that n_bytes_in_seg is calculated with round-down.\nResult RingScatterReduceTyped(Comm const& comm, common::Span<std::int8_t> data,\n                              std::size_t n_bytes_in_seg, Func const& op) {\n  auto rank = comm.Rank();\n  auto world = comm.World();\n\n  auto dst_rank = BootstrapNext(rank, world);\n  auto src_rank = BootstrapPrev(rank, world);\n  auto next_ch = comm.Chan(dst_rank);\n  auto prev_ch = comm.Chan(src_rank);\n\n  std::vector<std::int8_t> buffer(data.size_bytes() - (world - 1) * n_bytes_in_seg, -1);\n  auto s_buf = common::Span{buffer.data(), buffer.size()};\n\n  for (std::int32_t r = 0; r < world - 1; ++r) {\n    common::Span<std::int8_t> seg, recv_seg;\n    auto rc = Success() << [&] {\n      // send to ring next\n      auto send_rank = (rank + world - r) % world;\n      auto send_off = send_rank * n_bytes_in_seg;\n\n      bool is_last_segment = send_rank == (world - 1);\n\n      auto seg_nbytes = is_last_segment ? data.size_bytes() - send_off : n_bytes_in_seg;\n      CHECK_EQ(seg_nbytes % sizeof(T), 0);\n\n      auto send_seg = data.subspan(send_off, seg_nbytes);\n      return next_ch->SendAll(send_seg);\n    } << [&] {\n      // receive from ring prev\n      auto recv_rank = (rank + world - r - 1) % world;\n      auto recv_off = recv_rank * n_bytes_in_seg;\n\n      bool is_last_segment = recv_rank == (world - 1);\n\n      auto seg_nbytes = is_last_segment ? (data.size_bytes() - recv_off) : n_bytes_in_seg;\n      CHECK_EQ(seg_nbytes % sizeof(T), 0);\n\n      recv_seg = data.subspan(recv_off, seg_nbytes);\n      seg = s_buf.subspan(0, recv_seg.size());\n      return prev_ch->RecvAll(seg);\n    } << [&] {\n      return comm.Block();\n    };\n    if (!rc.OK()) {\n      return Fail(\"Ring scatter reduce failed, current iteration:\" + std::to_string(r),\n                  std::move(rc));\n    }\n\n    // accumulate to recv_seg\n    CHECK_EQ(seg.size(), recv_seg.size());\n    op(seg, recv_seg);\n  }\n\n  return Success();\n}\n\nResult RingAllreduce(Comm const& comm, common::Span<std::int8_t> data, Func const& op,\n                     ArrayInterfaceHandler::Type type) {\n  if (comm.World() == 1) {\n    return Success();\n  }\n  if (data.size_bytes() == 0) {\n    return Success();\n  }\n  return DispatchDType(type, [&](auto t) {\n    using T = decltype(t);\n    // Divide the data into segments according to the number of workers.\n    auto n_bytes_elem = sizeof(T);\n    CHECK_EQ(data.size_bytes() % n_bytes_elem, 0);\n    auto n = data.size_bytes() / n_bytes_elem;\n    auto world = comm.World();\n    if (n < static_cast<decltype(n)>(world)) {\n      return RingAllreduceSmall<T>(comm, data, op);\n    }\n\n    auto n_bytes_in_seg = (n / world) * sizeof(T);\n    auto rc = RingScatterReduceTyped<T>(comm, data, n_bytes_in_seg, op);\n    if (!rc.OK()) {\n      return Fail(\"Ring Allreduce failed.\", std::move(rc));\n    }\n\n    auto prev = BootstrapPrev(comm.Rank(), comm.World());\n    auto next = BootstrapNext(comm.Rank(), comm.World());\n    auto prev_ch = comm.Chan(prev);\n    auto next_ch = comm.Chan(next);\n\n    return std::move(rc) << [&] {\n      return RingAllgather(comm, data, n_bytes_in_seg, 1, prev_ch, next_ch);\n    } << [&] {\n      return comm.Block();\n    };\n  });\n}\n}  // namespace xgboost::collective::cpu_impl\n"
  },
  {
    "path": "src/collective/allreduce.h",
    "content": "/**\n * Copyright 2023-2026, XGBoost Contributors\n */\n#pragma once\n#include <cstdint>      // for int8_t\n#include <functional>   // for function\n#include <type_traits>  // for is_invocable_v, enable_if_t\n#include <vector>       // for vector\n\n#include \"../common/type.h\"             // for EraseType, RestoreType\n#include \"../data/array_interface.h\"    // for ToDType, ArrayInterfaceHandler\n#include \"broadcast.h\"                  // for Broadcast\n#include \"comm.h\"                       // for Comm, RestoreType\n#include \"comm_group.h\"                 // for GlobalCommGroup\n#include \"topo.h\"                       // for ParentLevel, Parent, Child\n#include \"xgboost/collective/result.h\"  // for Result\n#include \"xgboost/context.h\"            // for Context\n#include \"xgboost/span.h\"               // for Span\n\nnamespace xgboost::collective {\nnamespace cpu_impl {\nusing Func =\n    std::function<void(common::Span<std::int8_t const> lhs, common::Span<std::int8_t> out)>;\n\nResult RingAllreduce(Comm const& comm, common::Span<std::int8_t> data, Func const& op,\n                     ArrayInterfaceHandler::Type type);\n}  // namespace cpu_impl\n\ntemplate <typename T, typename Fn>\nstd::enable_if_t<std::is_invocable_v<Fn, common::Span<T const>, common::Span<T>>, Result> Allreduce(\n    Comm const& comm, common::Span<T> data, Fn redop) {\n  auto erased = common::EraseType(data);\n  auto type = ToDType<T>::kType;\n\n  auto erased_fn = [redop](common::Span<std::int8_t const> lhs, common::Span<std::int8_t> out) {\n    CHECK_EQ(lhs.size(), out.size()) << \"Invalid input for reduction.\";\n    auto lhs_t = common::RestoreType<T const>(lhs);\n    auto rhs_t = common::RestoreType<T>(out);\n    redop(lhs_t, rhs_t);\n  };\n\n  return cpu_impl::RingAllreduce(comm, erased, erased_fn, type);\n}\n\ntemplate <typename T, std::int32_t kDim>\n[[nodiscard]] Result Allreduce(Context const* ctx, CommGroup const& comm,\n                               linalg::TensorView<T, kDim> data, Op op) {\n  if (!comm.IsDistributed()) {\n    return Success();\n  }\n  CHECK(data.Contiguous());\n  auto erased = common::EraseType(data.Values());\n  auto type = ToDType<T>::kType;\n\n  auto backend = comm.Backend(data.Device());\n  return backend->Allreduce(comm.Ctx(ctx, data.Device()), erased, type, op);\n}\n\ntemplate <typename T, std::int32_t kDim>\n[[nodiscard]] Result Allreduce(Context const* ctx, linalg::TensorView<T, kDim> data, Op op) {\n  return Allreduce(ctx, *GlobalCommGroup(), data, op);\n}\n\n/**\n * @brief Specialization for std::vector.\n */\ntemplate <typename T, typename Alloc>\n[[nodiscard]] Result Allreduce(Context const* ctx, std::vector<T, Alloc>* data, Op op) {\n  return Allreduce(ctx, linalg::MakeVec(data->data(), data->size()), op);\n}\n\n/**\n * @brief Specialization for scalar value.\n */\ntemplate <typename T>\n[[nodiscard]] std::enable_if_t<std::is_standard_layout_v<T> && std::is_trivial_v<T>, Result>\nAllreduce(Context const* ctx, T* data, Op op) {\n  return Allreduce(ctx, linalg::MakeVec(data, 1), op);\n}\n\n/**\n * @brief Allreduce a variable-length vector over `comm`.\n *\n * The method performs a tree reduction rooted at rank 0 using `redop`, then broadcasts\n * the result so every rank ends with the same reduced payload in `data`.\n *\n * `redop` must have the signature\n * `void(Fn(const Span<T const>& lhs, const Span<T const>& rhs, std::vector<T>* out))` and must\n * write the combined result into `out`.\n */\ntemplate <typename T, typename Fn>\nstd::enable_if_t<\n    std::is_invocable_v<Fn, common::Span<T const>, common::Span<T const>, std::vector<T>*>, Result>\nAllreduceV(Comm const& comm, std::vector<T>* data, Fn redop) {\n  static_assert(std::is_standard_layout_v<T> && std::is_trivially_copyable_v<T>,\n                \"AllreduceV supports only standard-layout trivially-copyable types.\");\n  CHECK(data);\n  if (!comm.IsDistributed() || comm.World() == 1) {\n    return Success();\n  }\n\n  auto const world = comm.World();\n  auto const rank = comm.Rank();\n  auto constexpr kRoot = 0;\n\n  auto send = [&](std::int32_t peer, std::vector<T> const& vec) {\n    std::int64_t n = static_cast<std::int64_t>(vec.size());\n    auto n_bytes =\n        common::Span<std::int8_t const>{reinterpret_cast<std::int8_t const*>(&n), sizeof(n)};\n    return Success() << [&] {\n      return comm.Chan(peer)->SendAll(n_bytes);\n    } << [&] {\n      if (n == 0) {\n        return Success();\n      }\n      auto payload_bytes = static_cast<std::size_t>(n) * sizeof(T);\n      auto bytes = common::Span<std::int8_t const>{reinterpret_cast<std::int8_t const*>(vec.data()),\n                                                   payload_bytes};\n      return comm.Chan(peer)->SendAll(bytes);\n    } << [&] {\n      return comm.Chan(peer)->Block();\n    };\n  };\n\n  auto recv = [&](std::int32_t peer, std::vector<T>* out) {\n    std::int64_t n = 0;\n    auto n_bytes = common::Span<std::int8_t>{reinterpret_cast<std::int8_t*>(&n), sizeof(n)};\n    auto rc = Success() << [&] {\n      return comm.Chan(peer)->RecvAll(n_bytes);\n    } << [&] {\n      return comm.Chan(peer)->Block();\n    };\n    if (!rc.OK()) {\n      return rc;\n    }\n    CHECK_GE(n, 0);\n    out->resize(static_cast<std::size_t>(n));\n    if (n == 0) {\n      return Success();\n    }\n    auto payload_bytes = static_cast<std::size_t>(n) * sizeof(T);\n    auto bytes =\n        common::Span<std::int8_t>{reinterpret_cast<std::int8_t*>(out->data()), payload_bytes};\n    return Success() << [&] {\n      return comm.Chan(peer)->RecvAll(bytes);\n    } << [&] {\n      return comm.Chan(peer)->Block();\n    };\n  };\n\n  std::vector<T> incoming;\n  std::vector<T> out;\n  bool continue_reduce = true;\n  for (std::int32_t level = 0; (std::int32_t{1} << level) < world; ++level) {\n    if (!continue_reduce) {\n      continue;\n    }\n    if (rank > 0 && binomial_tree::ParentLevel(rank) == level) {\n      auto parent = binomial_tree::Parent(rank);\n      auto rc = send(parent, *data);\n      if (!rc.OK()) {\n        return Fail(\"AllreduceV failed to send data to parent.\", std::move(rc));\n      }\n      continue_reduce = false;\n      continue;\n    }\n    if (binomial_tree::HasChild(rank, level, world)) {\n      auto child = binomial_tree::Child(rank, level);\n      auto rc = recv(child, &incoming);\n      if (!rc.OK()) {\n        return Fail(\"AllreduceV failed to receive data from child.\", std::move(rc));\n      }\n      out.clear();\n      redop(common::Span<T const>{data->data(), data->size()},\n            common::Span<T const>{incoming.data(), incoming.size()}, &out);\n      data->swap(out);\n    }\n  }\n\n  std::int64_t reduced_size = static_cast<std::int64_t>(rank == kRoot ? data->size() : 0);\n  auto rc = Broadcast(comm, common::Span<std::int64_t>{&reduced_size, 1}, kRoot);\n  if (!rc.OK()) {\n    return Fail(\"AllreduceV failed to broadcast reduced size.\", std::move(rc));\n  }\n  if (reduced_size == 0) {\n    data->clear();\n    return Success();\n  }\n  if (rank != kRoot) {\n    data->resize(static_cast<std::size_t>(reduced_size));\n  }\n  auto reduced = common::Span<T>{data->data(), static_cast<std::size_t>(reduced_size)};\n  rc = Broadcast(comm, reduced, kRoot);\n  if (!rc.OK()) {\n    return Fail(\"AllreduceV failed to broadcast reduced payload.\", std::move(rc));\n  }\n  return Success();\n}\n\ntemplate <typename T, typename Fn>\nstd::enable_if_t<\n    std::is_invocable_v<Fn, common::Span<T const>, common::Span<T const>, std::vector<T>*>, Result>\nAllreduceV(Context const* ctx, CommGroup const& comm, std::vector<T>* data, Fn redop) {\n  if (!comm.IsDistributed()) {\n    return Success();\n  }\n  auto const& cctx = comm.Ctx(ctx, DeviceOrd::CPU());\n  return AllreduceV(cctx, data, redop);\n}\n\ntemplate <typename T, typename Fn>\nstd::enable_if_t<\n    std::is_invocable_v<Fn, common::Span<T const>, common::Span<T const>, std::vector<T>*>, Result>\nAllreduceV(Context const* ctx, std::vector<T>* data, Fn redop) {\n  return AllreduceV(ctx, *GlobalCommGroup(), data, redop);\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "src/collective/broadcast.cc",
    "content": "/**\n * Copyright 2023-2026, XGBoost Contributors\n */\n#include \"broadcast.h\"\n\n#include <cstdint>  // for int32_t, int8_t\n#include <utility>  // for move\n#include <vector>   // for vector\n\n#include \"comm.h\"                       // for Comm, binomial_tree\n#include \"topo.h\"                       // for Parent, Child\n#include \"xgboost/collective/result.h\"  // for Result\n#include \"xgboost/span.h\"               // for Span\n\nnamespace xgboost::collective::cpu_impl {\nnamespace {\n// Binomial tree broadcast using a fixed tree rooted at rank 0.\nResult BroadcastTree(Comm const& comm, common::Span<std::int8_t> data) {\n  auto rank = comm.Rank();\n  auto world = comm.World();\n\n  if (rank != 0) {\n    auto parent = binomial_tree::Parent(rank);\n    auto rc = Success() << [&] {\n      return comm.Chan(parent)->RecvAll(data);\n    } << [&] {\n      return comm.Chan(parent)->Block();\n    };\n    if (!rc.OK()) {\n      return Fail(\"broadcast failed.\", std::move(rc));\n    }\n  }\n\n  for (std::int32_t level = binomial_tree::Depth(world); level >= 0; --level) {\n    if (binomial_tree::HasChild(rank, level, world)) {\n      auto child = binomial_tree::Child(rank, level);\n      auto rc = comm.Chan(child)->SendAll(data);\n      if (!rc.OK()) {\n        return rc;\n      }\n    }\n  }\n\n  return comm.Block();\n}\n\n// Compute the path from `src` to rank 0 through the binomial tree (excluding 0).\nstd::vector<std::int32_t> TreePathToRoot(std::int32_t node) {\n  std::vector<std::int32_t> path;\n  auto cursor = node;\n  while (cursor > 0) {\n    path.push_back(cursor);\n    cursor = binomial_tree::Parent(cursor);\n  }\n  return path;\n}\n\n// Relay data from `node` up to rank 0 through the binomial tree.\n// Only nodes on the path from `node` to 0 participate; all others skip.\nResult RelayToRoot(Comm const& comm, common::Span<std::int8_t> data, std::int32_t node) {\n  auto rank = comm.Rank();\n  auto path = TreePathToRoot(node);\n\n  for (auto node : path) {\n    CHECK_GT(node, 0);\n    auto parent = binomial_tree::Parent(node);\n\n    if (rank == node) {\n      auto rc = Success() << [&] {\n        return comm.Chan(parent)->SendAll(data);\n      } << [&] {\n        return comm.Chan(parent)->Block();\n      };\n      if (!rc.OK()) {\n        return Fail(\"Relay broadcast: failed to send from \" + std::to_string(node), std::move(rc));\n      }\n    } else if (rank == parent) {\n      auto rc = Success() << [&] {\n        return comm.Chan(node)->RecvAll(data);\n      } << [&] {\n        return comm.Chan(node)->Block();\n      };\n      if (!rc.OK()) {\n        return Fail(\"Relay broadcast: failed to recv at \" + std::to_string(parent), std::move(rc));\n      }\n    }\n  }\n  return Success();\n}\n}  // namespace\n\nResult Broadcast(Comm const& comm, common::Span<std::int8_t> data, std::int32_t root) {\n  if (comm.World() <= 1) {\n    return Success();\n  }\n  CHECK(!data.empty());\n\n  if (root == 0) {\n    return BroadcastTree(comm, data);\n  }\n\n  // For non-zero root, relay data up to rank 0 through the tree, then broadcast.\n  return Success() << [&] {\n    return RelayToRoot(comm, data, root);\n  } << [&] {\n    return BroadcastTree(comm, data);\n  };\n}\n}  // namespace xgboost::collective::cpu_impl\n"
  },
  {
    "path": "src/collective/broadcast.h",
    "content": "/**\n * Copyright 2023-2024, XGBoost Contributors\n */\n#pragma once\n#include <cstdint>  // for int32_t, int8_t\n\n#include \"../common/type.h\"\n#include \"comm.h\"                       // for Comm, EraseType\n#include \"comm_group.h\"                 // for CommGroup\n#include \"xgboost/collective/result.h\"  // for Result\n#include \"xgboost/context.h\"            // for Context\n#include \"xgboost/linalg.h\"             // for VectorView\n#include \"xgboost/span.h\"               // for Span\n\nnamespace xgboost::collective {\nnamespace cpu_impl {\nResult Broadcast(Comm const& comm, common::Span<std::int8_t> data, std::int32_t root);\n}\n\n/**\n * @brief binomial tree broadcast is used on CPU with the default implementation.\n */\ntemplate <typename T>\n[[nodiscard]] Result Broadcast(Comm const& comm, common::Span<T> data, std::int32_t root) {\n  auto n_total_bytes = data.size_bytes();\n  auto erased =\n      common::Span<std::int8_t>{reinterpret_cast<std::int8_t*>(data.data()), n_total_bytes};\n  return cpu_impl::Broadcast(comm, erased, root);\n}\n\ntemplate <typename T>\n[[nodiscard]] Result Broadcast(Context const* ctx, CommGroup const& comm,\n                               linalg::VectorView<T> data, std::int32_t root) {\n  if (!comm.IsDistributed()) {\n    return Success();\n  }\n  CHECK(data.Contiguous());\n  auto erased = common::EraseType(data.Values());\n  auto backend = comm.Backend(data.Device());\n  return backend->Broadcast(comm.Ctx(ctx, data.Device()), erased, root);\n}\n\ntemplate <typename T>\n[[nodiscard]] Result Broadcast(Context const* ctx, linalg::VectorView<T> data, std::int32_t root) {\n  return Broadcast(ctx, *GlobalCommGroup(), data, root);\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "src/collective/coll.cc",
    "content": "/**\n * Copyright 2023-2024, XGBoost Contributors\n */\n#include \"coll.h\"\n\n#include <algorithm>    // for min, max, copy_n\n#include <cstddef>      // for size_t\n#include <cstdint>      // for int8_t, int64_t\n#include <functional>   // for bit_and, bit_or, bit_xor, plus\n#include <string>       // for string\n#include <type_traits>  // for is_floating_point_v, is_same_v\n#include <utility>      // for move\n\n#include \"../data/array_interface.h\"  // for ArrayInterfaceHandler\n#include \"allgather.h\"                // for RingAllgatherV, RingAllgather\n#include \"allreduce.h\"                // for Allreduce\n#include \"broadcast.h\"                // for Broadcast\n#include \"comm.h\"                     // for Comm\n\n#if defined(XGBOOST_USE_CUDA)\n#include \"cuda_fp16.h\"  // for __half\n#endif\n\nnamespace xgboost::collective {\ntemplate <typename T>\nbool constexpr IsFloatingPointV() {\n#if defined(XGBOOST_USE_CUDA)\n  return std::is_floating_point_v<T> || std::is_same_v<T, __half>;\n#else\n  return std::is_floating_point_v<T>;\n#endif  // defined(XGBOOST_USE_CUDA)\n}\n\n[[nodiscard]] Result Coll::Allreduce(Comm const& comm, common::Span<std::int8_t> data,\n                                     ArrayInterfaceHandler::Type type, Op op) {\n  namespace coll = ::xgboost::collective;\n\n  auto redop_fn = [](auto lhs, auto out, auto elem_op) {\n    auto p_lhs = lhs.data();\n    auto p_out = out.data();\n#if defined(__GNUC__) || defined(__clang__)\n    // For the sum op, one can verify the simd by: addps  %xmm15, %xmm14\n#pragma omp simd\n#endif\n    for (std::size_t i = 0; i < lhs.size(); ++i) {\n      p_out[i] = elem_op(p_lhs[i], p_out[i]);\n    }\n  };\n\n  auto fn = [&](auto elem_op, auto t) {\n    using T = decltype(t);\n    auto erased_fn = [redop_fn, elem_op](common::Span<std::int8_t const> lhs,\n                                         common::Span<std::int8_t> out) {\n      CHECK_EQ(lhs.size(), out.size()) << \"Invalid input for reduction.\";\n      auto lhs_t = common::RestoreType<T const>(lhs);\n      auto rhs_t = common::RestoreType<T>(out);\n\n      redop_fn(lhs_t, rhs_t, elem_op);\n    };\n\n    return cpu_impl::RingAllreduce(comm, data, erased_fn, type);\n  };\n\n  std::string msg{\"Floating point is not supported for bit wise collective operations.\"};\n\n  auto rc = DispatchDType(type, [&](auto t) {\n    using T = decltype(t);\n    switch (op) {\n      case Op::kMax: {\n        return fn([](auto l, auto r) { return std::max(l, r); }, t);\n      }\n      case Op::kMin: {\n        return fn([](auto l, auto r) { return std::min(l, r); }, t);\n      }\n      case Op::kSum: {\n        return fn(std::plus<>{}, t);\n      }\n      case Op::kBitwiseAND: {\n        if constexpr (IsFloatingPointV<T>()) {\n          return Fail(msg);\n        } else {\n          return fn(std::bit_and<>{}, t);\n        }\n      }\n      case Op::kBitwiseOR: {\n        if constexpr (IsFloatingPointV<T>()) {\n          return Fail(msg);\n        } else {\n          return fn(std::bit_or<>{}, t);\n        }\n      }\n      case Op::kBitwiseXOR: {\n        if constexpr (IsFloatingPointV<T>()) {\n          return Fail(msg);\n        } else {\n          return fn(std::bit_xor<>{}, t);\n        }\n      }\n    }\n    return Fail(\"Invalid op.\");\n  });\n\n  return std::move(rc) << [&] { return comm.Block(); };\n}\n\n[[nodiscard]] Result Coll::Broadcast(Comm const& comm, common::Span<std::int8_t> data,\n                                     std::int32_t root) {\n  return cpu_impl::Broadcast(comm, data, root);\n}\n\n[[nodiscard]] Result Coll::Allgather(Comm const& comm, common::Span<std::int8_t> data) {\n  return RingAllgather(comm, data);\n}\n\n[[nodiscard]] Result Coll::AllgatherV(Comm const& comm, common::Span<std::int8_t const> data,\n                                      common::Span<std::int64_t const> sizes,\n                                      common::Span<std::int64_t> recv_segments,\n                                      common::Span<std::int8_t> recv, AllgatherVAlgo algo) {\n  // get worker offset\n  detail::AllgatherVOffset(sizes, recv_segments);\n\n  // copy data\n  auto current = recv.subspan(recv_segments[comm.Rank()], data.size_bytes());\n  if (current.data() != data.data()) {\n    std::copy_n(data.data(), data.size(), current.data());\n  }\n\n  switch (algo) {\n    case AllgatherVAlgo::kRing:\n      return detail::RingAllgatherV(comm, sizes, recv_segments, recv);\n    case AllgatherVAlgo::kBcast:\n      return cpu_impl::BroadcastAllgatherV(comm, sizes, recv);\n    default: {\n      return Fail(\"Unknown algorithm for allgather-v\");\n    }\n  }\n}\n\n#if !defined(XGBOOST_USE_NCCL)\nColl* Coll::MakeCUDAVar() {\n  LOG(FATAL) << \"NCCL is required for device communication.\";\n  return nullptr;\n}\n#endif\n\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "src/collective/coll.cu",
    "content": "/**\n * Copyright 2023-2025, XGBoost Contributors\n */\n#if defined(XGBOOST_USE_NCCL)\n#include <chrono>               // for chrono, chrono_literals\n#include <cstddef>              // for size_t\n#include <cstdint>              // for int8_t, int64_t\n#include <functional>           // for bit_and, bit_or, bit_xor\n#include <future>               // for future, future_status\n#include <memory>               // for shared_ptr\n#include <mutex>                // for mutex, unique_lock\n#include <string>               // for string\n#include <thread>               // for this_thread\n#include <type_traits>          // for invoke_result_t, is_same_v, enable_if_t\n#include <utility>              // for move\n\n#include \"../common/cuda_stream.h\"       // for StreamRef, Event\n#include \"../common/device_helpers.cuh\"  // for device_vector\n#include \"../common/threadpool.h\"        // for ThreadPool\n#include \"../common/utils.h\"             // for MakeCleanup\n#include \"../data/array_interface.h\"     // for ArrayInterfaceHandler\n#include \"allgather.h\"                   // for AllgatherVOffset\n#include \"coll.cuh\"                      // for NCCLColl\n#include \"comm.cuh\"                      // for NCCLComm\n#include \"nccl.h\"                        // for ncclHalf, ncclFloat32, ...\n#include \"nccl_stub.h\"                   // for BusyWait\n#include \"xgboost/collective/result.h\"   // for Result, Fail\n#include \"xgboost/global_config.h\"       // for InitNewThread\n#include \"xgboost/span.h\"                // for Span\n\nnamespace xgboost::collective {\nColl* Coll::MakeCUDAVar() { return new NCCLColl{}; }\n\nNCCLColl::NCCLColl() : pool_{StringView{\"nccl-w\"}, 2, InitNewThread{}} {}\nNCCLColl::~NCCLColl() = default;\n\nnamespace {\nauto GetNCCLType(ArrayInterfaceHandler::Type type) {\n  auto fatal = [] {\n    LOG(FATAL) << \"Invalid type for NCCL operation.\";\n    return ncclHalf;  // dummy return to silent the compiler warning.\n  };\n  using H = ArrayInterfaceHandler;\n  switch (type) {\n    case H::kF2:\n      return ncclHalf;\n    case H::kF4:\n      return ncclFloat32;\n    case H::kF8:\n      return ncclFloat64;\n    case H::kF16:\n      return fatal();\n    case H::kI1:\n      return ncclInt8;\n    case H::kI2:\n      return fatal();\n    case H::kI4:\n      return ncclInt32;\n    case H::kI8:\n      return ncclInt64;\n    case H::kU1:\n      return ncclUint8;\n    case H::kU2:\n      return fatal();\n    case H::kU4:\n      return ncclUint32;\n    case H::kU8:\n      return ncclUint64;\n  }\n  return fatal();\n}\n\nnamespace {\nstruct Chan {\n  std::mutex cv_lock;\n  std::condition_variable cv;\n  // Whether the collective operator is called.\n  std::atomic<bool> called{false};\n\n  void Notify() {\n    std::unique_lock lock{this->cv_lock};\n    this->called = true;\n    this->cv.notify_one();\n  }\n  void WaitFor(std::chrono::seconds timeout) {\n    std::unique_lock lock{cv_lock};\n    cv.wait_for(lock, timeout, [&] { return static_cast<bool>(this->called); });\n  }\n};\n}  // namespace\n\ntemplate <typename Fn, typename R = std::invoke_result_t<Fn, curt::StreamRef>>\n[[nodiscard]] std::enable_if_t<std::is_same_v<R, Result>, Result> AsyncLaunch(\n    common::ThreadPool* pool, NCCLComm const* nccl, std::shared_ptr<NcclStub> stub,\n    curt::StreamRef stream, Fn&& fn) {\n  curt::Event e0;\n  e0.Record(nccl->Stream());\n  stream.Wait(e0);\n\n  auto cleanup = common::MakeCleanup([&] {\n    curt::Event e1;\n    e1.Record(stream);\n    nccl->Stream().Wait(e1);\n  });\n\n  Chan chan;\n\n  auto busy_wait = [&](ncclResult_t* async_error) {\n    using std::chrono_literals::operator\"\"ms;\n    do {\n      auto rc = GetCUDAResult(stream.Sync(false));\n      if (!rc.OK()) {\n        return rc;\n      }\n      // async_error is set to success if abort is called.\n      rc = stub->CommGetAsyncError(nccl->Handle(), async_error);\n      if (!rc.OK()) {\n        return rc;\n      }\n      if (*async_error == ncclInProgress) {\n        std::this_thread::sleep_for(5ms);\n      }\n    } while (*async_error == ncclInProgress);\n    return stub->GetNcclResult(*async_error);\n  };\n\n  std::future<Result> fut = pool->Submit([&] {\n    ncclResult_t async_error = ncclSuccess;\n    return Success() << [&] {\n      ncclResult_t async_error;\n      auto rc = stub->CommGetAsyncError(nccl->Handle(), &async_error);\n      if (!rc.OK()) {\n        return rc;\n      }\n      CHECK_NE(async_error, ncclInProgress);\n\n      rc = fn(stream);\n\n      chan.Notify();\n\n      return rc;\n    } << [&] {\n      return busy_wait(&async_error);\n    } << [&] {\n      auto rc = stub->CommGetAsyncError(nccl->Handle(), &async_error);\n      if (async_error == ncclInProgress) {\n        return Fail(\"In progress after async wait.\", std::move(rc));\n      }\n      return rc;\n    };\n  });\n\n  chan.WaitFor(nccl->Timeout());\n\n  auto abort = [&](std::string msg) {\n    auto rc = stub->CommAbort(nccl->Handle());\n    fut.wait();  // Must block, otherwise the thread might access freed memory.\n    return Fail(msg + \": \" + std::to_string(nccl->Timeout().count()) + \"s.\") + std::move(rc);\n  };\n  if (!chan.called) {\n    // Timeout waiting for the NCCL op to return. With older versions of NCCL, the op\n    // might block even if the config is set to nonblocking.\n    return abort(\"NCCL future timeout\");\n  }\n\n  // This actually includes the time for prior kernels due to CUDA async calls.\n  switch (fut.wait_for(nccl->Timeout())) {\n    case std::future_status::timeout:\n      // Timeout waiting for the NCCL op to finish.\n      return abort(\"NCCL timeout\");\n    case std::future_status::ready:\n      return fut.get();\n    case std::future_status::deferred:\n      return Fail(\"Invalid future status.\");\n  }\n\n  return Fail(\"Unreachable\");\n}\n\nbool IsBitwiseOp(Op const& op) {\n  return op == Op::kBitwiseAND || op == Op::kBitwiseOR || op == Op::kBitwiseXOR;\n}\n\ntemplate <typename Func>\nvoid RunBitwiseAllreduce(curt::StreamRef stream, common::Span<std::int8_t> out_buffer,\n                         std::int8_t const* device_buffer, Func func, std::int32_t world_size,\n                         std::size_t size) {\n  dh::LaunchN(size, stream, [=] __device__(std::size_t idx) {\n    auto result = device_buffer[idx];\n    for (auto rank = 1; rank < world_size; rank++) {\n      result = func(result, device_buffer[rank * size + idx]);\n    }\n    out_buffer[idx] = result;\n  });\n}\n\n[[nodiscard]] Result BitwiseAllReduce(common::ThreadPool* pool, NCCLComm const* pcomm,\n                                      common::Span<std::int8_t> data, Op op,\n                                      curt::StreamRef stream) {\n  dh::device_vector<std::int8_t> buffer(data.size() * pcomm->World());\n  auto* device_buffer = buffer.data().get();\n  auto stub = pcomm->Stub();\n\n  // First gather data from all the workers.\n  auto rc = AsyncLaunch(pool, pcomm, stub, stream, [&](curt::StreamRef s) {\n    return stub->Allgather(data.data(), device_buffer, data.size(), ncclInt8, pcomm->Handle(), s);\n  });\n  if (!rc.OK()) {\n    return rc;\n  }\n\n  // Then reduce locally.\n  switch (op) {\n    case Op::kBitwiseAND:\n      RunBitwiseAllreduce(pcomm->Stream(), data, device_buffer, std::bit_and{}, pcomm->World(),\n                          data.size());\n      break;\n    case Op::kBitwiseOR:\n      RunBitwiseAllreduce(pcomm->Stream(), data, device_buffer, std::bit_or{}, pcomm->World(),\n                          data.size());\n      break;\n    case Op::kBitwiseXOR:\n      RunBitwiseAllreduce(pcomm->Stream(), data, device_buffer, std::bit_xor{}, pcomm->World(),\n                          data.size());\n      break;\n    default:\n      LOG(FATAL) << \"Not a bitwise reduce operation.\";\n  }\n  return Success();\n}\n\nncclRedOp_t GetNCCLRedOp(Op const& op) {\n  ncclRedOp_t result{ncclMax};\n  switch (op) {\n    case Op::kMax:\n      result = ncclMax;\n      break;\n    case Op::kMin:\n      result = ncclMin;\n      break;\n    case Op::kSum:\n      result = ncclSum;\n      break;\n    default:\n      LOG(FATAL) << \"Unsupported reduce operation.\";\n  }\n  return result;\n}\n}  // namespace\n\n[[nodiscard]] Result NCCLColl::Allreduce(Comm const& comm, common::Span<std::int8_t> data,\n                                         ArrayInterfaceHandler::Type type, Op op) {\n  if (!comm.IsDistributed()) {\n    return Success();\n  }\n  auto nccl = dynamic_cast<NCCLComm const*>(&comm);\n  CHECK(nccl);\n  auto stub = nccl->Stub();\n\n  return Success() << [&] {\n    if (IsBitwiseOp(op)) {\n      return BitwiseAllReduce(&this->pool_, nccl, data, op, this->stream_.View());\n    } else {\n      return DispatchDType(type, [&](auto t) {\n        using T = decltype(t);\n        auto rdata = common::RestoreType<T>(data);\n        return AsyncLaunch(\n            &this->pool_, nccl, stub, this->stream_.View(), [&](curt::StreamRef s) {\n              return stub->Allreduce(data.data(), data.data(), rdata.size(), GetNCCLType(type),\n                                     GetNCCLRedOp(op), nccl->Handle(), s);\n            });\n      });\n    }\n  } << [&] {\n    return nccl->Block();\n  };\n}\n\n[[nodiscard]] Result NCCLColl::Broadcast(Comm const& comm, common::Span<std::int8_t> data,\n                                         std::int32_t root) {\n  if (!comm.IsDistributed()) {\n    return Success();\n  }\n  auto nccl = dynamic_cast<NCCLComm const*>(&comm);\n  CHECK(nccl);\n  auto stub = nccl->Stub();\n\n  return Success() << [&] {\n    return AsyncLaunch(&this->pool_, nccl, stub, this->stream_.View(),\n                       [data, nccl, root, stub](curt::StreamRef s) {\n                         return stub->Broadcast(data.data(), data.data(), data.size_bytes(),\n                                                ncclInt8, root, nccl->Handle(), s);\n                       });\n  } << [&] {\n    return nccl->Block();\n  };\n}\n\n[[nodiscard]] Result NCCLColl::Allgather(Comm const& comm, common::Span<std::int8_t> data) {\n  if (!comm.IsDistributed()) {\n    return Success();\n  }\n  auto nccl = dynamic_cast<NCCLComm const*>(&comm);\n  CHECK(nccl);\n  auto stub = nccl->Stub();\n  auto size = data.size_bytes() / comm.World();\n\n  auto send = data.subspan(comm.Rank() * size, size);\n  return Success() << [&] {\n    return AsyncLaunch(&this->pool_, nccl, stub, this->stream_.View(),\n                       [send, data, size, nccl, stub](curt::StreamRef s) {\n                         return stub->Allgather(send.data(), data.data(), size, ncclInt8,\n                                                nccl->Handle(), s);\n                       });\n  } << [&] {\n    return nccl->Block();\n  };\n}\n\nnamespace cuda_impl {\n/**\n * @brief Implement allgather-v using broadcast.\n *\n * https://arxiv.org/abs/1812.05964\n */\nResult BroadcastAllgatherV(NCCLComm const* comm, curt::StreamRef s,\n                           common::Span<std::int8_t const> data,\n                           common::Span<std::int64_t const> sizes, common::Span<std::int8_t> recv) {\n  auto stub = comm->Stub();\n  return Success() << [&stub] {\n    return stub->GroupStart();\n  } << [&] {\n    std::size_t offset = 0;\n    for (std::int32_t r = 0; r < comm->World(); ++r) {\n      auto as_bytes = sizes[r];\n      auto rc = stub->Broadcast(data.data(), recv.subspan(offset, as_bytes).data(), as_bytes,\n                                ncclInt8, r, comm->Handle(), s);\n      if (!rc.OK()) {\n        return rc;\n      }\n      offset += as_bytes;\n    }\n    return Success();\n  } << [&] {\n    return stub->GroupEnd();\n  };\n}\n}  // namespace cuda_impl\n\n[[nodiscard]] Result NCCLColl::AllgatherV(Comm const& comm, common::Span<std::int8_t const> data,\n                                          common::Span<std::int64_t const> sizes,\n                                          common::Span<std::int64_t> recv_segments,\n                                          common::Span<std::int8_t> recv, AllgatherVAlgo algo) {\n  auto nccl = dynamic_cast<NCCLComm const*>(&comm);\n  CHECK(nccl);\n  if (!comm.IsDistributed()) {\n    return Success();\n  }\n  auto stub = nccl->Stub();\n\n  switch (algo) {\n    case AllgatherVAlgo::kRing: {\n      return Success() << [&] {\n        return stub->GroupStart();\n      } << [&] {\n        // get worker offset\n        detail::AllgatherVOffset(sizes, recv_segments);\n        // copy data\n        auto current = recv.subspan(recv_segments[comm.Rank()], data.size_bytes());\n        if (current.data() != data.data()) {\n          dh::safe_cuda(cudaMemcpyAsync(current.data(), data.data(), current.size_bytes(),\n                                        cudaMemcpyDeviceToDevice, nccl->Stream()));\n        }\n        return detail::RingAllgatherV(comm, sizes, recv_segments, recv);\n      } << [&] {\n        return stub->GroupEnd();\n      } << [&] {\n        return nccl->Block();\n      } << [&] {\n        return BusyWait(stub, nccl->Handle(), nccl->Timeout());\n      };\n    }\n    case AllgatherVAlgo::kBcast: {\n      return AsyncLaunch(&this->pool_, nccl, stub, this->stream_.View(), [&](curt::StreamRef s) {\n        return cuda_impl::BroadcastAllgatherV(nccl, s, data, sizes, recv);\n      });\n    }\n    default: {\n      return Fail(\"Unknown algorithm for allgather-v\");\n    }\n  }\n}\n}  // namespace xgboost::collective\n\n#endif  // defined(XGBOOST_USE_NCCL)\n"
  },
  {
    "path": "src/collective/coll.cuh",
    "content": "/**\n * Copyright 2023-2025, XGBoost Contributors\n */\n#pragma once\n\n#include <cstdint>  // for int8_t, int64_t\n\n#include \"../common/cuda_stream.h\"    // for Stream\n#include \"../common/threadpool.h\"     // for ThreadPool\n#include \"../data/array_interface.h\"  // for ArrayInterfaceHandler\n#include \"coll.h\"                     // for Coll\n#include \"comm.h\"                     // for Comm\n#include \"xgboost/span.h\"             // for Span\n\nnamespace xgboost::collective {\nclass NCCLColl : public Coll {\n  common::ThreadPool pool_;\n  curt::Stream stream_;\n\n public:\n  NCCLColl();\n  ~NCCLColl() override;\n\n  [[nodiscard]] Result Allreduce(Comm const& comm, common::Span<std::int8_t> data,\n                                 ArrayInterfaceHandler::Type type, Op op) override;\n  [[nodiscard]] Result Broadcast(Comm const& comm, common::Span<std::int8_t> data,\n                                 std::int32_t root) override;\n  [[nodiscard]] Result Allgather(Comm const& comm, common::Span<std::int8_t> data) override;\n  [[nodiscard]] Result AllgatherV(Comm const& comm, common::Span<std::int8_t const> data,\n                                  common::Span<std::int64_t const> sizes,\n                                  common::Span<std::int64_t> recv_segments,\n                                  common::Span<std::int8_t> recv, AllgatherVAlgo algo) override;\n};\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "src/collective/coll.h",
    "content": "/**\n * Copyright 2023, XGBoost Contributors\n */\n#pragma once\n#include <cstdint>  // for int8_t, int64_t\n#include <memory>   // for enable_shared_from_this\n\n#include \"../data/array_interface.h\"    // for ArrayInterfaceHandler\n#include \"comm.h\"                       // for Comm\n#include \"xgboost/collective/result.h\"  // for Result\n#include \"xgboost/span.h\"               // for Span\n\nnamespace xgboost::collective {\nenum class AllgatherVAlgo {\n  kRing = 0,   // use ring-based allgather-v\n  kBcast = 1,  // use broadcast-based allgather-v\n};\n\n/**\n * @brief Interface and base implementation for collective.\n */\nclass Coll : public std::enable_shared_from_this<Coll> {\n public:\n  Coll() = default;\n  virtual ~Coll() noexcept(false) {}  // NOLINT\n\n  virtual Coll* MakeCUDAVar();\n\n  /**\n   * @brief Allreduce\n   *\n   * @param [in,out] data Data buffer for input and output.\n   * @param [in] type data type.\n   * @param [in] op Reduce operation. For custom operation, user needs to reach down to\n   *             the CPU implementation.\n   */\n  [[nodiscard]] virtual Result Allreduce(Comm const& comm, common::Span<std::int8_t> data,\n                                         ArrayInterfaceHandler::Type type, Op op);\n  /**\n   * @brief Broadcast\n   *\n   * @param [in,out] data Data buffer for input and output.\n   * @param [in] root Root rank for broadcast.\n   */\n  [[nodiscard]] virtual Result Broadcast(Comm const& comm, common::Span<std::int8_t> data,\n                                         std::int32_t root);\n  /**\n   * @brief Allgather\n   *\n   * @param [in,out] data Data buffer for input and output.\n   */\n  [[nodiscard]] virtual Result Allgather(Comm const& comm, common::Span<std::int8_t> data);\n  /**\n   * @brief Allgather with variable length.\n   *\n   * @param [in] data Input data for the current worker.\n   * @param [in] sizes Size of the input from each worker.\n   * @param [out] recv_segments pre-allocated offset buffer for each worker in the output,\n   *              size should be equal to (world + 1). GPU ring-based implementation\n   *              doesn't use the buffer.\n   * @param [out] recv pre-allocated buffer for output.\n   */\n  [[nodiscard]] virtual Result AllgatherV(Comm const& comm, common::Span<std::int8_t const> data,\n                                          common::Span<std::int64_t const> sizes,\n                                          common::Span<std::int64_t> recv_segments,\n                                          common::Span<std::int8_t> recv, AllgatherVAlgo algo);\n};\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "src/collective/comm.cc",
    "content": "/**\n * Copyright 2023-2026, XGBoost Contributors\n */\n#include \"comm.h\"\n\n#include <algorithm>  // for copy\n#include <chrono>     // for seconds\n#include <cstdint>    // for int32_t\n#include <cstdlib>    // for exit\n#include <memory>     // for shared_ptr\n#include <string>     // for string\n#include <thread>     // for thread\n#include <utility>    // for move, forward\n#if !defined(XGBOOST_USE_NCCL)\n#include \"../common/common.h\"           // for AssertNCCLSupport\n#endif                                  // !defined(XGBOOST_USE_NCCL)\n#include \"allgather.h\"                  // for RingAllgather\n#include \"protocol.h\"                   // for kMagic\n#include \"topo.h\"                       // for BootstrapNext, BootstrapPrev\n#include \"xgboost/base.h\"               // for XGBOOST_STRICT_R_MODE\n#include \"xgboost/collective/socket.h\"  // for TCPSocket\n#include \"xgboost/global_config.h\"      // for InitNewThread\n#include \"xgboost/json.h\"               // for Json, Object\n#include \"xgboost/string_view.h\"        // for StringView\n\nnamespace xgboost::collective {\nComm::Comm(std::string const& host, std::int32_t port, std::chrono::seconds timeout,\n           std::int32_t retry, std::string task_id)\n    : timeout_{timeout}, retry_{retry}, tracker_{host, port, -1}, task_id_{std::move(task_id)} {}\n\nResult ConnectTrackerImpl(proto::PeerInfo info, std::chrono::seconds timeout, std::int32_t retry,\n                          std::string const& task_id, TCPSocket* out, std::int32_t rank,\n                          std::int32_t world) {\n  // Get information from the tracker\n  CHECK(!info.host.empty());\n  TCPSocket& tracker = *out;\n  return Success() << [&] {\n    auto rc = Connect(info.host, info.port, retry, timeout, out);\n    if (rc.OK()) {\n      return rc;\n    } else {\n      return Fail(\"Failed to connect to the tracker.\", std::move(rc));\n    }\n  } << [&] {\n    return tracker.NonBlocking(false);\n  } << [&] {\n    return tracker.RecvTimeout(timeout);\n  } << [&] {\n    return proto::Magic{}.Verify(&tracker);\n  } << [&] {\n    return proto::Connect{}.WorkerSend(&tracker, world, rank, task_id);\n  } << [&] {\n    LOG(INFO) << \"Task \" << task_id << \" connected to the tracker\";\n    return Success();\n  };\n}\n\n[[nodiscard]] Result Comm::ConnectTracker(TCPSocket* out) const {\n  return ConnectTrackerImpl(this->TrackerInfo(), this->Timeout(), this->retry_, this->task_id_, out,\n                            this->Rank(), this->World());\n}\n\n// Connect to a peer (outgoing), sending our rank for identification.\n[[nodiscard]] Result ConnectPeer(proto::PeerInfo const& peer, std::int32_t my_rank,\n                                 std::chrono::seconds timeout, std::int32_t retry,\n                                 std::shared_ptr<TCPSocket>* out) {\n  auto sock = std::make_shared<TCPSocket>();\n  auto rc = Success() << [&] {\n    return Connect(peer.host, peer.port, retry, timeout, sock.get());\n  } << [&] {\n    return sock->RecvTimeout(timeout);\n  };\n  if (!rc.OK()) {\n    return rc;\n  }\n  std::size_t n_bytes{0};\n  rc = sock->SendAll(&my_rank, sizeof(my_rank), &n_bytes);\n  if (!rc.OK()) {\n    return rc;\n  }\n  if (n_bytes != sizeof(my_rank)) {\n    return Fail(\"Failed to send rank.\");\n  }\n  *out = std::move(sock);\n  return Success();\n}\n\n// Accept a connection from a peer (incoming), receiving their rank.\n[[nodiscard]] Result AcceptPeer(TCPSocket* listener, std::chrono::seconds timeout,\n                                std::int32_t* out_rank, std::shared_ptr<TCPSocket>* out) {\n  auto sock = std::make_shared<TCPSocket>();\n  auto rc = Success() << [&] {\n    SockAddress addr;\n    return listener->Accept(sock.get(), &addr);\n  } << [&] {\n    return sock->RecvTimeout(timeout);\n  };\n  if (!rc.OK()) {\n    return rc;\n  }\n  std::int32_t rank{-1};\n  std::size_t n_bytes{0};\n  rc = sock->RecvAll(&rank, sizeof(rank), &n_bytes);\n  if (!rc.OK()) {\n    return rc;\n  }\n  if (n_bytes != sizeof(rank)) {\n    return Fail(\"Failed to recv rank.\");\n  }\n  *out_rank = rank;\n  *out = std::move(sock);\n  return Success();\n}\n\n// Workers connect to a sparse subset of peers: ring neighbors plus binomial tree\n// neighbors (rooted at rank 0). This gives O(log n) connections per worker instead\n// of O(n). The bootstrap ring sockets are reused as the final ring channels.\n[[nodiscard]] Result ConnectWorkers(Comm const& comm, TCPSocket* listener, std::int32_t lport,\n                                    proto::PeerInfo ninfo, std::chrono::seconds timeout,\n                                    std::int32_t retry,\n                                    std::vector<std::shared_ptr<TCPSocket>>* out_workers) {\n  // Establish ring connections and exchange peer info.\n  auto next = std::make_shared<TCPSocket>();\n  auto prev = std::make_shared<TCPSocket>();\n\n  auto rc = Success() << [&] {\n    auto rc = Connect(ninfo.host, ninfo.port, retry, timeout, next.get());\n    if (!rc.OK()) {\n      return Fail(\"Bootstrap failed to connect to ring next.\", std::move(rc));\n    }\n    return rc;\n  } << [&] {\n    return next->NonBlocking(true);\n  } << [&] {\n    SockAddress addr;\n    return listener->Accept(prev.get(), &addr);\n  } << [&] {\n    return prev->NonBlocking(true);\n  };\n  if (!rc.OK()) {\n    return rc;\n  }\n\n  std::vector<std::int8_t> buffer(HOST_NAME_MAX * comm.World(), 0);\n  auto s_buffer = common::Span{buffer.data(), buffer.size()};\n  auto next_host = s_buffer.subspan(HOST_NAME_MAX * comm.Rank(), HOST_NAME_MAX);\n  if (next_host.size() < ninfo.host.size()) {\n    return Fail(\"Got an invalid host name.\");\n  }\n  std::copy(ninfo.host.cbegin(), ninfo.host.cend(), next_host.begin());\n\n  auto prev_ch = std::make_shared<Channel>(comm, prev);\n  auto next_ch = std::make_shared<Channel>(comm, next);\n\n  auto block = [&] {\n    for (auto ch : {prev_ch, next_ch}) {\n      auto rc = ch->Block();\n      if (!rc.OK()) {\n        return rc;\n      }\n    }\n    return Success();\n  };\n\n  rc = std::move(rc) << [&] {\n    return cpu_impl::RingAllgather(comm, s_buffer, HOST_NAME_MAX, 0, prev_ch, next_ch);\n  } << [&] {\n    return block();\n  };\n  if (!rc.OK()) {\n    return Fail(\"Failed to get host names from peers.\", std::move(rc));\n  }\n\n  std::vector<std::int32_t> peers_port(comm.World(), -1);\n  peers_port[comm.Rank()] = ninfo.port;\n  rc = std::move(rc) << [&] {\n    auto s_ports = common::Span{reinterpret_cast<std::int8_t*>(peers_port.data()),\n                                peers_port.size() * sizeof(ninfo.port)};\n    return cpu_impl::RingAllgather(comm, s_ports, sizeof(ninfo.port), 0, prev_ch, next_ch);\n  } << [&] {\n    return block();\n  };\n  if (!rc.OK()) {\n    return Fail(\"Failed to get the port from peers.\", std::move(rc));\n  }\n\n  std::vector<proto::PeerInfo> peers(comm.World());\n  for (auto r = 0; r < comm.World(); ++r) {\n    auto nhost = s_buffer.subspan(HOST_NAME_MAX * r, HOST_NAME_MAX);\n    auto nport = peers_port[r];\n    auto nrank = BootstrapNext(r, comm.World());\n    peers[nrank] = {std::string{reinterpret_cast<char const*>(nhost.data())}, nport, nrank};\n  }\n  CHECK_EQ(peers[comm.Rank()].port, lport);\n  for (auto const& p : peers) {\n    CHECK_NE(p.port, -1);\n  }\n\n  // Connect to sparse peer set, reusing bootstrap ring sockets.\n  auto my_peers = SparsePeers(comm.Rank(), comm.World());\n\n  auto& workers = *out_workers;\n  workers.resize(comm.World());\n\n  auto next_rank = BootstrapNext(comm.Rank(), comm.World());\n  auto prev_rank = BootstrapPrev(comm.Rank(), comm.World());\n  if (next_rank == prev_rank) {\n    // world == 2: both ring neighbors are the same rank. We have two TCP\n    // connections but only one slot. Keep the one where the lower-ranked\n    // worker initiated (next), matching the all-to-all convention.\n    if (comm.Rank() < next_rank) {\n      workers[next_rank] = std::move(next);\n    } else {\n      workers[prev_rank] = std::move(prev);\n    }\n  } else {\n    workers[next_rank] = std::move(next);\n    workers[prev_rank] = std::move(prev);\n  }\n\n  // For each peer pair, the lower-ranked worker initiates (connect) and the higher-ranked\n  // worker accepts. Ring sockets are already in place, so only tree-only peers need new\n  // connections.\n  for (auto r : my_peers) {\n    if (r > comm.Rank() && !workers[r]) {\n      rc = ConnectPeer(peers[r], comm.Rank(), timeout, retry, &workers[r]);\n      if (!rc.OK()) {\n        return Fail(\"Failed to connect to peer \" + std::to_string(r), std::move(rc));\n      }\n    }\n  }\n\n  // Accept connections from lower-ranked tree peers that weren't already covered by the\n  // ring. The exact arrival order is unspecified, so we accept n_accept times and use the\n  // rank sent by each peer to place the socket in the right slot.\n  std::int32_t n_accept = 0;\n  for (auto r : my_peers) {\n    if (r < comm.Rank() && !workers[r]) {\n      ++n_accept;\n    }\n  }\n  for (std::int32_t i = 0; i < n_accept; ++i) {\n    std::int32_t peer_rank{-1};\n    std::shared_ptr<TCPSocket> sock;\n    rc = AcceptPeer(listener, timeout, &peer_rank, &sock);\n    if (!rc.OK()) {\n      return Fail(\"Failed to accept from peer.\", std::move(rc));\n    }\n    workers.at(peer_rank) = std::move(sock);\n  }\n\n  for (auto r : my_peers) {\n    CHECK(workers[r]) << \"Peer \" << r << \" not connected for rank \" << comm.Rank();\n  }\n\n  return Success();\n}\n\nnamespace {\nstd::string InitLog(std::string task_id, std::int32_t rank) {\n  if (task_id.empty()) {\n    return \"Rank \" + std::to_string(rank);\n  }\n  return \"Task \" + task_id + \" got rank \" + std::to_string(rank);\n}\n}  // namespace\n\nRabitComm::RabitComm(std::string const& tracker_host, std::int32_t tracker_port,\n                     std::chrono::seconds timeout, std::int32_t retry, std::string task_id,\n                     StringView nccl_path, std::int32_t worker_port)\n    : HostComm{tracker_host, tracker_port, timeout, retry, std::move(task_id)},\n      nccl_path_{std::move(nccl_path)},\n      worker_port_{worker_port} {\n  if (this->TrackerInfo().host.empty()) {\n    // Not in a distributed environment.\n    LOG(CONSOLE) << InitLog(task_id_, rank_);\n    return;\n  }\n\n  loop_.reset(new Loop{std::chrono::seconds{timeout_}});  // NOLINT\n  auto rc = this->Bootstrap(timeout_, retry_, task_id_, worker_port_);\n  if (!rc.OK()) {\n    this->ResetState();\n    SafeColl(Fail(\"Failed to bootstrap the communication group.\", std::move(rc)));\n  }\n}\n\n#if !defined(XGBOOST_USE_NCCL)\nComm* RabitComm::MakeCUDAVar(Context const*, std::shared_ptr<Coll>) const {\n  common::AssertGPUSupport();\n  common::AssertNCCLSupport();\n  return nullptr;\n}\n#endif  //  !defined(XGBOOST_USE_NCCL)\n\n[[nodiscard]] Result RabitComm::Bootstrap(std::chrono::seconds timeout, std::int32_t retry,\n                                          std::string task_id, std::int32_t worker_port) {\n  TCPSocket tracker;\n  std::int32_t world{-1};\n  auto rc = ConnectTrackerImpl(this->TrackerInfo(), timeout, retry, task_id, &tracker, this->Rank(),\n                               world);\n  if (!rc.OK()) {\n    return Fail(\"Bootstrap failed.\", std::move(rc));\n  }\n\n  this->domain_ = tracker.Domain();\n\n  // Start command\n  TCPSocket listener = TCPSocket::Create(tracker.Domain());\n  std::int32_t lport{worker_port};\n  rc = std::move(rc) << [&] {\n    if (lport > 0) {\n      // User-specified port, bind to INADDR_ANY with the given port.\n      auto addr = (tracker.Domain() == SockDomain::kV6) ? \"::\" : \"0.0.0.0\";\n      return listener.Bind(addr, &lport);\n    }\n    // Default: let the OS pick an available port.\n    return listener.BindHost(&lport);\n  } << [&] {\n    return listener.Listen();\n  };\n  if (!rc.OK()) {\n    return rc;\n  }\n\n  // create worker for listening to error notice.\n  auto domain = tracker.Domain();\n  std::shared_ptr<TCPSocket> error_sock{TCPSocket::CreatePtr(domain)};\n  std::int32_t eport{0};\n  rc = std::move(rc) << [&] {\n    return error_sock->BindHost(&eport);\n  } << [&] {\n    return error_sock->Listen();\n  };\n  if (!rc.OK()) {\n    return rc;\n  }\n  error_port_ = eport;\n\n  error_worker_ = std::thread{[error_sock = std::move(error_sock), init = InitNewThread{}] {\n    init();\n    TCPSocket conn;\n    SockAddress addr;\n    auto rc = error_sock->Accept(&conn, &addr);\n    // On Linux, a shutdown causes an invalid argument error;\n    if (rc.Code() == std::errc::invalid_argument) {\n      return;\n    }\n    // On Windows, accept returns a closed socket after finalize.\n    if (conn.IsClosed()) {\n      return;\n    }\n    // The error signal is from the tracker, while shutdown signal is from the shutdown method\n    // of the RabitComm class (this).\n    bool is_error{false};\n    rc = proto::Error{}.RecvSignal(&conn, &is_error);\n    if (!rc.OK()) {\n      LOG(WARNING) << rc.Report();\n      return;\n    }\n    if (!is_error) {\n      return;  // shutdown\n    }\n\n    LOG(WARNING) << \"Another worker is running into error.\";\n#if !defined(XGBOOST_STRICT_R_MODE) || XGBOOST_STRICT_R_MODE == 0\n    // exit is nicer than abort as the former performs cleanups.\n    std::exit(-1);\n#else\n    LOG(FATAL) << \"abort\";\n#endif\n  }};\n  // The worker thread is detached here to avoid the need to handle it later during\n  // destruction. For C++, if a thread is not joined or detached, it will segfault during\n  // destruction.\n  error_worker_.detach();\n\n  proto::Start start;\n  rc = std::move(rc) << [&] {\n    return start.WorkerSend(lport, &tracker, eport);\n  } << [&] {\n    return start.WorkerRecv(&tracker, &world);\n  };\n  if (!rc.OK()) {\n    return rc;\n  }\n  this->world_ = world;\n\n  // get ring neighbors\n  std::string snext;\n  rc = tracker.Recv(&snext);\n  if (!rc.OK()) {\n    return Fail(\"Failed to receive the rank for the next worker.\", std::move(rc));\n  }\n  auto jnext = Json::Load(StringView{snext});\n\n  proto::PeerInfo ninfo{jnext};\n  // get the rank of this worker\n  this->rank_ = BootstrapPrev(ninfo.rank, world);\n  this->tracker_.rank = rank_;\n\n  std::vector<std::shared_ptr<TCPSocket>> workers;\n  rc = ConnectWorkers(*this, &listener, lport, ninfo, timeout, retry, &workers);\n  if (!rc.OK()) {\n    return Fail(\"Failed to connect to other workers.\", std::move(rc));\n  }\n\n  CHECK(this->channels_.empty());\n  for (auto& w : workers) {\n    if (w) {\n      rc = std::move(rc) << [&] {\n        return w->SetNoDelay();\n      } << [&] {\n        return w->NonBlocking(true);\n      } << [&] {\n        return w->SetKeepAlive();\n      };\n      if (!rc.OK()) {\n        return rc;\n      }\n      this->channels_.emplace_back(std::make_shared<Channel>(*this, w));\n    } else {\n      this->channels_.emplace_back(nullptr);\n    }\n  }\n\n  LOG(CONSOLE) << InitLog(task_id_, rank_);\n  return rc;\n}\n\nRabitComm::~RabitComm() noexcept(false) {\n  if (!this->IsDistributed()) {\n    return;\n  }\n  LOG(WARNING) << \"The communicator is being destroyed without a call to shutdown first. This can \"\n                  \"lead to undefined behaviour.\";\n  auto rc = this->Shutdown();\n  if (!rc.OK()) {\n    LOG(WARNING) << rc.Report();\n  }\n}\n\n[[nodiscard]] Result RabitComm::Shutdown() {\n  if (!this->IsDistributed()) {\n    return Success();\n  }\n  // Tell the tracker that this worker is shutting down.\n  TCPSocket tracker;\n  // Tell the error hanlding thread that we are shutting down.\n  TCPSocket err_client;\n\n  auto rc = Success() << [&] {\n    return ConnectTrackerImpl(tracker_, timeout_, retry_, task_id_, &tracker, Rank(), World());\n  } << [&] {\n    return this->Block();\n  } << [&] {\n    return proto::ShutdownCMD{}.Send(&tracker);\n  } << [&] {\n    this->channels_.clear();\n    return Success();\n  } << [&] {\n    // Use tracker address to determine whether we want to use IPv6.\n    auto taddr = MakeSockAddress(xgboost::StringView{this->tracker_.host}, this->tracker_.port);\n    // Shutdown the error handling thread. We signal the thread through socket,\n    // alternatively, we can get the native handle and use pthread_cancel. But using a\n    // socket seems to be clearer as we know what's happening.\n    auto const& addr = taddr.IsV4() ? SockAddrV4::Loopback().Addr() : SockAddrV6::Loopback().Addr();\n    // We use hardcoded 10 seconds and 1 retry here since we are just connecting to a\n    // local socket. For a normal OS, this should be enough time to schedule the\n    // connection.\n    auto rc = Connect(StringView{addr}, this->error_port_, 1,\n                      std::min(std::chrono::seconds{10}, timeout_), &err_client);\n    this->ResetState();\n    if (!rc.OK()) {\n      return Fail(\"Failed to connect to the error socket.\", std::move(rc));\n    }\n    return rc;\n  } << [&] {\n    // We put error thread shutdown at the end so that we have a better chance to finish\n    // the previous more important steps.\n    return proto::Error{}.SignalShutdown(&err_client);\n  };\n  if (!rc.OK()) {\n    return Fail(\"Failed to shutdown.\", std::move(rc));\n  }\n  return rc;\n}\n\n[[nodiscard]] Result RabitComm::LogTracker(std::string msg) const {\n  if (!this->IsDistributed()) {\n    LOG(CONSOLE) << msg;\n    return Success();\n  }\n  TCPSocket out;\n  proto::Print print;\n  return Success() << [&] {\n    return this->ConnectTracker(&out);\n  } << [&] {\n    return print.WorkerSend(&out, msg);\n  };\n}\n\n[[nodiscard]] Result RabitComm::SignalError(Result const& res) {\n  TCPSocket tracker;\n  return Success() << [&] {\n    return this->ConnectTracker(&tracker);\n  } << [&] {\n    return proto::ErrorCMD{}.WorkerSend(&tracker, res);\n  };\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "src/collective/comm.cu",
    "content": "/**\n * Copyright 2023-2026, XGBoost Contributors\n */\n#if defined(XGBOOST_USE_NCCL)\n#include <algorithm>  // for sort\n#include <cstddef>    // for size_t\n#include <cstdint>    // for uint64_t, int8_t\n#include <cstring>    // for memcpy\n#include <memory>     // for shared_ptr\n#include <sstream>    // for stringstream\n#include <vector>     // for vector\n\n#include \"../common/cuda_context.cuh\"   // for CUDAContext\n#include \"../common/cuda_rt_utils.h\"    // for SetDevice, GetUuid, PrintUuid\n#include \"../common/type.h\"             // for EraseType\n#include \"comm.cuh\"                     // for NCCLComm\n#include \"comm.h\"                       // for Comm\n#include \"nccl_stub.h\"                  // for NcclStub\n#include \"xgboost/collective/result.h\"  // for Result\n#include \"xgboost/span.h\"               // for Span\n\nnamespace xgboost::collective {\nnamespace {\nResult GetUniqueId(Comm const& comm, std::shared_ptr<NcclStub> stub, std::shared_ptr<Coll> coll,\n                   ncclUniqueId* pid) {\n  static const int kRootRank = 0;\n  ncclUniqueId id;\n  if (comm.Rank() == kRootRank) {\n    auto rc = stub->GetUniqueId(&id);\n    SafeColl(rc);\n  }\n  auto rc = coll->Broadcast(\n      comm, common::Span{reinterpret_cast<std::int8_t*>(&id), sizeof(ncclUniqueId)}, kRootRank);\n  if (!rc.OK()) {\n    return rc;\n  }\n  *pid = id;\n  return Success();\n}\n}  // namespace\n\nComm* RabitComm::MakeCUDAVar(Context const* ctx, std::shared_ptr<Coll> pimpl) const {\n  return new NCCLComm{ctx, *this, pimpl, StringView{this->nccl_path_}};\n}\n\nNCCLComm::NCCLComm(Context const* ctx, Comm const& root, std::shared_ptr<Coll> pimpl,\n                   StringView nccl_path)\n    : Comm{root.TrackerInfo().host, root.TrackerInfo().port, root.Timeout(), root.Retry(),\n           root.TaskID()},\n      stream_{ctx->CUDACtx()->Stream()} {\n  this->world_ = root.World();\n  this->rank_ = root.Rank();\n  this->domain_ = root.Domain();\n  if (!root.IsDistributed()) {\n    return;\n  }\n\n  curt::SetDevice(ctx->Ordinal());\n  stub_ = std::make_shared<NcclStub>(nccl_path);\n\n  std::vector<unsigned char> uuids(root.World() * curt::kUuidLength, 0);\n  auto s_uuid = common::Span{uuids.data(), uuids.size()};\n  auto s_this_uuid = s_uuid.subspan(root.Rank() * curt::kUuidLength, curt::kUuidLength);\n  curt::GetUuid(s_this_uuid, ctx->Ordinal());\n\n  auto rc = pimpl->Allgather(root, common::EraseType(s_uuid));\n  SafeColl(rc);\n\n  std::vector<common::Span<unsigned char>> converted(root.World());\n  std::size_t j = 0;\n  for (size_t i = 0; i < uuids.size(); i += curt::kUuidLength) {\n    converted[j] = s_uuid.subspan(i, curt::kUuidLength);\n    j++;\n  }\n\n  std::sort(converted.begin(), converted.end());\n  auto iter = std::unique(converted.begin(), converted.end());\n  auto n_uniques = std::distance(converted.begin(), iter);\n\n  CHECK_EQ(n_uniques, root.World())\n      << \"Multiple processes within communication group running on same CUDA \"\n      << \"device is not supported. \" << curt::PrintUuid(s_this_uuid) << \"\\n\";\n\n  rc = std::move(rc) << [&] {\n    return GetUniqueId(root, this->stub_, pimpl, &nccl_unique_id_);\n  } << [&] {\n    ncclConfig_t config = NCCL_CONFIG_INITIALIZER;\n    config.blocking = 0;\n    return this->stub_->CommInitRankConfig(&nccl_comm_, root.World(), nccl_unique_id_, root.Rank(),\n                                           &config);\n  } << [&] {\n    return BusyWait(this->stub_, this->nccl_comm_, this->Timeout());\n  };\n  SafeColl(rc);\n\n  for (std::int32_t r = 0; r < root.World(); ++r) {\n    this->channels_.emplace_back(\n        std::make_shared<NCCLChannel>(root, r, nccl_comm_, stub_, curt::DefaultStream()));\n  }\n}\n\nNCCLComm::~NCCLComm() {\n  if (nccl_comm_) {\n    auto rc = Success() << [this] {\n      return this->stub_->CommFinalize(this->nccl_comm_);\n    } << [this] {\n      auto rc = BusyWait(this->stub_, this->nccl_comm_, this->Timeout());\n      if (!rc.OK()) {\n        return std::move(rc) + this->stub_->CommAbort(this->nccl_comm_);\n      }\n      return rc;\n    } << [this] {\n      return this->stub_->CommDestroy(this->nccl_comm_);\n    };\n    if (!rc.OK()) {\n      LOG(WARNING) << rc.Report();\n    }\n  }\n  nccl_comm_ = nullptr;\n}\n}  // namespace xgboost::collective\n#endif  // defined(XGBOOST_USE_NCCL)\n"
  },
  {
    "path": "src/collective/comm.cuh",
    "content": "/**\n * Copyright 2023-2025, XGBoost Contributors\n */\n#pragma once\n\n#ifdef XGBOOST_USE_NCCL\n#include \"nccl.h\"\n#endif  // XGBOOST_USE_NCCL\n\n#include <utility>  // for move\n\n#include \"../common/cuda_stream.h\"  // for StreamRef\n#include \"coll.h\"\n#include \"comm.h\"\n#include \"nccl_stub.h\"  // for NcclStub\n#include \"xgboost/context.h\"\n\nnamespace xgboost::collective {\n\ninline Result GetCUDAResult(cudaError rc) {\n  if (rc == cudaSuccess) {\n    return Success();\n  }\n  std::string msg = thrust::system_error(rc, thrust::cuda_category()).what();\n  return Fail(msg);\n}\n\n#if defined(XGBOOST_USE_NCCL)\nclass NCCLComm : public Comm {\n  ncclComm_t nccl_comm_{nullptr};\n  std::shared_ptr<NcclStub> stub_;\n  ncclUniqueId nccl_unique_id_{};\n  curt::StreamRef stream_;\n  std::string nccl_path_;\n\n public:\n  [[nodiscard]] ncclComm_t Handle() const { return nccl_comm_; }\n  auto Stub() const { return stub_; }\n\n  explicit NCCLComm(Context const* ctx, Comm const& root, std::shared_ptr<Coll> pimpl,\n                    StringView nccl_path);\n  [[nodiscard]] Result LogTracker(std::string) const override {\n    LOG(FATAL) << \"Device comm is used for logging.\";\n    return Fail(\"Undefined.\");\n  }\n  ~NCCLComm() override;\n  [[nodiscard]] bool IsFederated() const override { return false; }\n  [[nodiscard]] curt::StreamRef Stream() const { return stream_; }\n  [[nodiscard]] Result Block() const override {\n    auto rc = this->Stream().Sync(false);\n    return GetCUDAResult(rc);\n  }\n  [[nodiscard]] Result Shutdown() final {\n    this->ResetState();\n    return Success();\n  }\n};\n\nclass NCCLChannel : public Channel {\n  std::int32_t rank_{-1};\n  ncclComm_t nccl_comm_{};\n  std::shared_ptr<NcclStub> stub_;\n  curt::StreamRef stream_;\n\n public:\n  explicit NCCLChannel(Comm const& comm, std::int32_t rank, ncclComm_t nccl_comm,\n                       std::shared_ptr<NcclStub> stub, curt::StreamRef stream)\n      : rank_{rank},\n        nccl_comm_{nccl_comm},\n        stub_{std::move(stub)},\n        Channel{comm, nullptr},\n        stream_{std::move(stream)} {}\n\n  [[nodiscard]] Result SendAll(std::int8_t const* ptr, std::size_t n) override {\n    return stub_->Send(ptr, n, ncclInt8, rank_, nccl_comm_, stream_);\n  }\n  [[nodiscard]] Result RecvAll(std::int8_t* ptr, std::size_t n) override {\n    return stub_->Recv(ptr, n, ncclInt8, rank_, nccl_comm_, stream_);\n  }\n  [[nodiscard]] Result Block() override {\n    auto rc = stream_.Sync(false);\n    return GetCUDAResult(rc);\n  }\n};\n\n#endif  //  defined(XGBOOST_USE_NCCL)\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "src/collective/comm.h",
    "content": "/**\n * Copyright 2023-2026, XGBoost Contributors\n */\n#pragma once\n#include <chrono>   // for seconds\n#include <cstddef>  // for size_t\n#include <cstdint>  // for int32_t, int64_t\n#include <memory>   // for shared_ptr\n#include <string>   // for string\n#include <thread>   // for thread\n#include <utility>  // for move\n#include <vector>   // for vector\n\n#include \"loop.h\"                       // for Loop\n#include \"protocol.h\"                   // for PeerInfo\n#include \"xgboost/collective/result.h\"  // for Result\n#include \"xgboost/collective/socket.h\"  // for TCPSocket, GetHostName\n#include \"xgboost/context.h\"            // for Context\n#include \"xgboost/span.h\"               // for Span\n\nnamespace xgboost::collective {\n\ninline constexpr std::int64_t DefaultTimeoutSec() { return 60 * 30; }  // 30min\ninline constexpr std::int32_t DefaultRetry() { return 3; }\n\ninline StringView DefaultNcclName() { return \"libnccl.so.2\"; }\n\nclass Channel;\nclass Coll;\n\n/**\n * @brief Base communicator storing info about the tracker and other communicators.\n */\nclass Comm : public std::enable_shared_from_this<Comm> {\n protected:\n  std::int32_t world_{-1};\n  std::int32_t rank_{0};\n  std::chrono::seconds timeout_{DefaultTimeoutSec()};\n  std::int32_t retry_{DefaultRetry()};\n\n  proto::PeerInfo tracker_;\n  SockDomain domain_{SockDomain::kV4};\n\n  std::thread error_worker_;\n  std::int32_t error_port_;\n\n  std::string task_id_;\n  std::vector<std::shared_ptr<Channel>> channels_;\n  std::shared_ptr<Loop> loop_{nullptr};  // fixme: require federated comm to have a timeout\n\n  void ResetState() {\n    this->world_ = -1;\n    this->rank_ = 0;\n    this->timeout_ = std::chrono::seconds{DefaultTimeoutSec()};\n\n    tracker_ = proto::PeerInfo{};\n    this->task_id_.clear();\n    channels_.clear();\n\n    loop_.reset();\n  }\n\n public:\n  Comm() = default;\n  Comm(std::string const& host, std::int32_t port, std::chrono::seconds timeout, std::int32_t retry,\n       std::string task_id);\n  virtual ~Comm() noexcept(false) {}  // NOLINT\n\n  Comm(Comm const& that) = delete;\n  Comm& operator=(Comm const& that) = delete;\n  Comm(Comm&& that) = delete;\n  Comm& operator=(Comm&& that) = delete;\n\n  [[nodiscard]] auto TrackerInfo() const { return tracker_; }\n  [[nodiscard]] Result ConnectTracker(TCPSocket* out) const;\n  [[nodiscard]] auto Domain() const { return domain_; }\n  [[nodiscard]] auto Timeout() const { return timeout_; }\n  [[nodiscard]] auto Retry() const { return retry_; }\n  [[nodiscard]] auto TaskID() const { return task_id_; }\n\n  [[nodiscard]] auto Rank() const noexcept { return rank_; }\n  [[nodiscard]] auto World() const noexcept { return IsDistributed() ? world_ : 1; }\n  [[nodiscard]] bool IsDistributed() const noexcept { return world_ != -1; }\n  void Submit(Loop::Op op) const {\n    CHECK(loop_);\n    loop_->Submit(std::move(op));\n  }\n  [[nodiscard]] virtual Result Block() const { return loop_->Block(); }\n\n  [[nodiscard]] bool HasChan(std::int32_t rank) const {\n    return rank >= 0 && rank < static_cast<std::int32_t>(channels_.size()) &&\n           channels_[rank] != nullptr;\n  }\n  [[nodiscard]] virtual std::shared_ptr<Channel> Chan(std::int32_t rank) const {\n    CHECK(HasChan(rank)) << \"No channel to rank \" << rank << \" from rank \" << rank_\n                         << \". The topology does not include this peer.\";\n    return channels_[rank];\n  }\n  [[nodiscard]] virtual bool IsFederated() const = 0;\n  [[nodiscard]] virtual Result LogTracker(std::string msg) const = 0;\n\n  [[nodiscard]] virtual Result SignalError(Result const&) { return Success(); }\n  /**\n   * @brief Get a string ID for the current process.\n   */\n  [[nodiscard]] virtual Result ProcessorName(std::string* out) const {\n    auto rc = GetHostName(out);\n    return rc;\n  }\n  [[nodiscard]] virtual Result Shutdown() = 0;\n};\n\n/**\n * @brief Base class for CPU-based communicator.\n */\nclass HostComm : public Comm {\n public:\n  using Comm::Comm;\n  [[nodiscard]] virtual Comm* MakeCUDAVar(Context const* ctx,\n                                          std::shared_ptr<Coll> pimpl) const = 0;\n};\n\nclass RabitComm : public HostComm {\n  std::string nccl_path_ = std::string{DefaultNcclName()};\n  // User-specified port for the worker listener socket. 0 means the OS picks an available\n  // port.\n  std::int32_t worker_port_{0};\n\n  [[nodiscard]] Result Bootstrap(std::chrono::seconds timeout, std::int32_t retry,\n                                 std::string task_id, std::int32_t worker_port);\n\n public:\n  // bootstrapping construction.\n  RabitComm() = default;\n  RabitComm(std::string const& tracker_host, std::int32_t tracker_port,\n            std::chrono::seconds timeout, std::int32_t retry, std::string task_id,\n            StringView nccl_path, std::int32_t worker_port);\n  ~RabitComm() noexcept(false) override;\n\n  [[nodiscard]] bool IsFederated() const override { return false; }\n  [[nodiscard]] Result LogTracker(std::string msg) const override;\n\n  [[nodiscard]] Result SignalError(Result const&) override;\n  [[nodiscard]] Result Shutdown() final;\n\n  [[nodiscard]] Comm* MakeCUDAVar(Context const* ctx, std::shared_ptr<Coll> pimpl) const override;\n};\n\n/**\n * @brief Communication channel between workers.\n */\nclass Channel {\n  std::shared_ptr<TCPSocket> sock_{nullptr};\n  Result rc_;\n  Comm const& comm_;\n\n public:\n  explicit Channel(Comm const& comm, std::shared_ptr<TCPSocket> sock)\n      : sock_{std::move(sock)}, comm_{comm} {}\n  virtual ~Channel() = default;\n\n  [[nodiscard]] virtual Result SendAll(std::int8_t const* ptr, std::size_t n) {\n    Loop::Op op{Loop::Op::kWrite, comm_.Rank(), const_cast<std::int8_t*>(ptr), n, sock_.get(), 0};\n    CHECK(sock_.get());\n    comm_.Submit(std::move(op));\n    return Success();\n  }\n  [[nodiscard]] Result SendAll(common::Span<std::int8_t const> data) {\n    return this->SendAll(data.data(), data.size_bytes());\n  }\n\n  [[nodiscard]] virtual Result RecvAll(std::int8_t* ptr, std::size_t n) {\n    Loop::Op op{Loop::Op::kRead, comm_.Rank(), ptr, n, sock_.get(), 0};\n    CHECK(sock_.get());\n    comm_.Submit(std::move(op));\n    return Success();\n  }\n  [[nodiscard]] Result RecvAll(common::Span<std::int8_t> data) {\n    return this->RecvAll(data.data(), data.size_bytes());\n  }\n\n  [[nodiscard]] auto Socket() const { return sock_; }\n  [[nodiscard]] virtual Result Block() { return comm_.Block(); }\n};\n\nenum class Op { kMax = 0, kMin = 1, kSum = 2, kBitwiseAND = 3, kBitwiseOR = 4, kBitwiseXOR = 5 };\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "src/collective/comm_group.cc",
    "content": "/**\n * Copyright 2023-2026, XGBoost Contributors\n */\n#include \"comm_group.h\"\n\n#include <algorithm>  // for transform\n#include <cctype>     // for tolower\n#include <chrono>     // for seconds\n#include <cstdint>    // for int32_t\n#include <iterator>   // for back_inserter\n#include <memory>     // for shared_ptr, unique_ptr\n#include <string>     // for string\n\n#include \"../common/json_utils.h\"  // for OptionalArg\n#include \"coll.h\"                  // for Coll\n#include \"comm.h\"                  // for Comm\n#include \"xgboost/context.h\"       // for DeviceOrd\n#include \"xgboost/json.h\"          // for Json\n\n#if defined(XGBOOST_USE_FEDERATED)\n#include \"../../plugin/federated/federated_coll.h\"\n#include \"../../plugin/federated/federated_comm.h\"\n#endif\n\nnamespace xgboost::collective {\n[[nodiscard]] std::shared_ptr<Coll> CommGroup::Backend(DeviceOrd device) const {\n  if (device.IsCUDA()) {\n    if (!gpu_coll_) {\n      gpu_coll_.reset(backend_->MakeCUDAVar());\n    }\n    return gpu_coll_;\n  }\n  return backend_;\n}\n\n[[nodiscard]] Comm const& CommGroup::Ctx(Context const* ctx, DeviceOrd device) const {\n  if (device.IsCUDA()) {\n    CHECK(ctx->IsCUDA());\n    if (!gpu_comm_ || gpu_comm_->World() != comm_->World()) {\n      gpu_comm_.reset(comm_->MakeCUDAVar(ctx, backend_));\n    }\n    return *gpu_comm_;\n  }\n  return *comm_;\n}\n\nCommGroup::CommGroup()\n    : comm_{std::shared_ptr<RabitComm>(new RabitComm{})},  // NOLINT\n      backend_{std::shared_ptr<Coll>(new Coll{})} {}       // NOLINT\n\n[[nodiscard]] CommGroup* CommGroup::Create(Json config) {\n  if (IsA<Null>(config)) {\n    return new CommGroup;\n  }\n\n  std::string type = OptionalArg<String>(config, \"dmlc_communicator\", std::string{\"rabit\"});\n  // Try both lower and upper case for compatibility\n  auto get_param = [&](std::string name, auto dft, auto t) {\n    std::string upper;\n    std::transform(name.cbegin(), name.cend(), std::back_inserter(upper),\n                   [](char c) { return std::toupper(c); });\n    std::transform(name.cbegin(), name.cend(), name.begin(),\n                   [](char c) { return std::tolower(c); });\n\n    auto const& obj = get<Object const>(config);\n    auto it = obj.find(upper);\n    if (it != obj.cend() && obj.find(name) != obj.cend()) {\n      LOG(FATAL) << \"Duplicated parameter:\" << name;\n    }\n    if (it != obj.cend()) {\n      return OptionalArg<decltype(t)>(config, upper, dft);\n    } else {\n      return OptionalArg<decltype(t)>(config, name, dft);\n    }\n  };\n  // Common args\n  auto retry = get_param(\"dmlc_retry\", static_cast<Integer::Int>(DefaultRetry()), Integer{});\n  auto timeout =\n      get_param(\"dmlc_timeout\", static_cast<Integer::Int>(DefaultTimeoutSec()), Integer{});\n  CHECK_GE(timeout, 0);\n  auto task_id = get_param(\"dmlc_task_id\", std::string{}, String{});\n\n  if (type == \"rabit\") {\n    auto tracker_host = get_param(\"dmlc_tracker_uri\", std::string{}, String{});\n    auto tracker_port = get_param(\"dmlc_tracker_port\", static_cast<std::int64_t>(0), Integer{});\n    auto nccl = get_param(\"dmlc_nccl_path\", std::string{DefaultNcclName()}, String{});\n    auto worker_port = get_param(\"dmlc_worker_port\", static_cast<std::int64_t>(0), Integer{});\n    CHECK_LE(worker_port, std::numeric_limits<in_port_t>::max());\n    CHECK_GE(worker_port, 0);\n    CHECK_LE(tracker_port, std::numeric_limits<in_port_t>::max());\n    CHECK_GE(tracker_port, 0);\n    auto ptr = new CommGroup{std::shared_ptr<RabitComm>{new RabitComm{\n                                 // NOLINT\n                                 tracker_host, static_cast<std::int32_t>(tracker_port),\n                                 std::chrono::seconds{timeout}, static_cast<std::int32_t>(retry),\n                                 task_id, nccl, static_cast<std::int32_t>(worker_port)}},\n                             std::shared_ptr<Coll>(new Coll{})};  // NOLINT\n    return ptr;\n  } else if (type == \"federated\") {\n#if defined(XGBOOST_USE_FEDERATED)\n    auto ptr = new CommGroup{\n        std::make_shared<FederatedComm>(retry, std::chrono::seconds{timeout}, task_id, config),\n        std::make_shared<FederatedColl>()};\n    return ptr;\n#endif  // defined(XGBOOST_USE_FEDERATED)\n  } else {\n    LOG(FATAL) << \"Invalid communicator type\";\n  }\n\n  return nullptr;\n}\n\nstd::unique_ptr<collective::CommGroup>& GlobalCommGroup() {\n  static thread_local std::unique_ptr<collective::CommGroup> sptr;\n  if (!sptr) {\n    Json config{Null{}};\n    sptr.reset(CommGroup::Create(config));\n  }\n  return sptr;\n}\n\nvoid GlobalCommGroupInit(Json config) {\n  auto& sptr = GlobalCommGroup();\n  sptr.reset(CommGroup::Create(std::move(config)));\n}\n\nvoid GlobalCommGroupFinalize() {\n  auto& sptr = GlobalCommGroup();\n  auto rc = sptr->Finalize();\n  sptr.reset();\n  SafeColl(rc);\n}\n\nvoid Init(Json const& config) { GlobalCommGroupInit(config); }\n\nvoid Finalize() { GlobalCommGroupFinalize(); }\n\nstd::int32_t GetRank() noexcept { return GlobalCommGroup()->Rank(); }\n\nstd::int32_t GetWorldSize() noexcept { return GlobalCommGroup()->World(); }\n\nbool IsDistributed() noexcept { return GlobalCommGroup()->IsDistributed(); }\n\n[[nodiscard]] bool IsFederated() {\n  return GlobalCommGroup()->Ctx(nullptr, DeviceOrd::CPU()).IsFederated();\n}\n\nvoid Print(std::string const& message) {\n  auto rc = GlobalCommGroup()->Ctx(nullptr, DeviceOrd::CPU()).LogTracker(message);\n  SafeColl(rc);\n}\n\nstd::string GetProcessorName() {\n  std::string out;\n  auto rc = GlobalCommGroup()->ProcessorName(&out);\n  SafeColl(rc);\n  return out;\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "src/collective/comm_group.h",
    "content": "/**\n * Copyright 2023, XGBoost Contributors\n */\n#pragma once\n#include <memory>   // for shared_ptr, unique_ptr\n#include <string>   // for string\n#include <utility>  // for move\n\n#include \"coll.h\"                       // for Comm\n#include \"comm.h\"                       // for Coll\n#include \"xgboost/collective/result.h\"  // for Result\n\nnamespace xgboost::collective {\n/**\n * @brief Communicator group used for double dispatching between communicators and\n *        collective implementations.\n */\nclass CommGroup {\n  std::shared_ptr<HostComm> comm_;\n  mutable std::shared_ptr<Comm> gpu_comm_;\n\n  std::shared_ptr<Coll> backend_;\n  mutable std::shared_ptr<Coll> gpu_coll_;  // lazy initialization\n\n  CommGroup(std::shared_ptr<Comm> comm, std::shared_ptr<Coll> coll)\n      : comm_{std::dynamic_pointer_cast<HostComm>(comm)}, backend_{std::move(coll)} {\n    CHECK(comm_);\n  }\n\n public:\n  CommGroup();\n\n  [[nodiscard]] auto World() const noexcept { return comm_->World(); }\n  [[nodiscard]] auto Rank() const noexcept { return comm_->Rank(); }\n  [[nodiscard]] bool IsDistributed() const noexcept { return comm_->IsDistributed(); }\n\n  [[nodiscard]] Result Finalize() const {\n    return Success() << [this] {\n      if (gpu_comm_) {\n        return gpu_comm_->Shutdown();\n      }\n      return Success();\n    } << [&] {\n      return comm_->Shutdown();\n    };\n  }\n\n  [[nodiscard]] static CommGroup* Create(Json config);\n\n  [[nodiscard]] std::shared_ptr<Coll> Backend(DeviceOrd device) const;\n  /**\n   * @brief Decide the context to use for communication.\n   *\n   * @param ctx Global context, provides the CUDA stream and ordinal.\n   * @param device The device used by the data to be communicated.\n   */\n  [[nodiscard]] Comm const& Ctx(Context const* ctx, DeviceOrd device) const;\n  [[nodiscard]] Result SignalError(Result const& res) { return comm_->SignalError(res); }\n\n  [[nodiscard]] Result ProcessorName(std::string* out) const {\n    return this->comm_->ProcessorName(out);\n  }\n};\n\nstd::unique_ptr<collective::CommGroup>& GlobalCommGroup();\n\nvoid GlobalCommGroupInit(Json config);\n\nvoid GlobalCommGroupFinalize();\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "src/collective/communicator-inl.h",
    "content": "/**\n * Copyright 2022-2024, XGBoost contributors\n */\n#pragma once\n#include <string>\n\n#include \"xgboost/json.h\"  // for Json\n\nnamespace xgboost::collective {\n/**\n * @brief Initialize the collective communicator.\n */\nvoid Init(Json const& config);\n\n/**\n * @brief Finalize the collective communicator.\n *\n * Call this function after you finished all jobs.\n */\nvoid Finalize();\n\n/**\n * @brief Get rank of current process.\n *\n * @return Rank of the worker.\n */\n[[nodiscard]] std::int32_t GetRank() noexcept;\n\n/**\n * @brief Get total number of processes.\n *\n * @return Total world size.\n */\n[[nodiscard]] std::int32_t GetWorldSize() noexcept;\n\n/**\n * @brief Get if the communicator is distributed.\n *\n * @return True if the communicator is distributed.\n */\n[[nodiscard]] bool IsDistributed() noexcept;\n\n/**\n * @brief Get if the communicator is federated.\n *\n * @return True if the communicator is federated.\n */\n[[nodiscard]] bool IsFederated();\n\n/**\n * @brief Print the message to the communicator.\n *\n * This function can be used to communicate the information of the progress to the user who monitors\n * the communicator.\n *\n * @param message The message to be printed.\n */\nvoid Print(std::string const& message);\n/**\n * @brief Get the name of the processor.\n *\n * @return Name of the processor.\n */\nstd::string GetProcessorName();\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "src/collective/in_memory_communicator.h",
    "content": "/*!\n * Copyright 2022 XGBoost contributors\n */\n#pragma once\n#include <xgboost/json.h>\n\n#include <string>\n\n#include \"../c_api/c_api_utils.h\"\n#include \"in_memory_handler.h\"\n\nnamespace xgboost {\nnamespace collective {\n\n/**\n * An in-memory communicator, useful for testing.\n */\nclass InMemoryCommunicator {\n public:\n  /**\n   * @brief Create a new communicator based on JSON configuration.\n   * @param config JSON configuration.\n   * @return Communicator as specified by the JSON configuration.\n   */\n  static InMemoryCommunicator* Create(Json const& config) {\n    int world_size{0};\n    int rank{-1};\n\n    // Parse environment variables first.\n    auto* value = getenv(\"IN_MEMORY_WORLD_SIZE\");\n    if (value != nullptr) {\n      world_size = std::stoi(value);\n    }\n    value = getenv(\"IN_MEMORY_RANK\");\n    if (value != nullptr) {\n      rank = std::stoi(value);\n    }\n\n    // Runtime configuration overrides, optional as users can specify them as env vars.\n    world_size = static_cast<int>(OptionalArg<Integer>(config, \"in_memory_world_size\",\n                                                       static_cast<Integer::Int>(world_size)));\n    rank = static_cast<int>(\n        OptionalArg<Integer>(config, \"in_memory_rank\", static_cast<Integer::Int>(rank)));\n\n    if (world_size == 0) {\n      LOG(FATAL) << \"Federated world size must be set.\";\n    }\n    if (rank == -1) {\n      LOG(FATAL) << \"Federated rank must be set.\";\n    }\n    return new InMemoryCommunicator(world_size, rank);\n  }\n\n  InMemoryCommunicator(int world_size, int rank) {\n    handler_.Init(world_size, rank);\n  }\n\n  ~InMemoryCommunicator() override { handler_.Shutdown(sequence_number_++, GetRank()); }\n\n  bool IsDistributed() const override { return true; }\n  bool IsFederated() const override { return false; }\n\n  std::string AllGather(std::string_view input) override {\n    std::string output;\n    handler_.Allgather(input.data(), input.size(), &output, sequence_number_++, GetRank());\n    return output;\n  }\n\n  std::string AllGatherV(std::string_view input) override {\n    std::string output;\n    handler_.AllgatherV(input.data(), input.size(), &output, sequence_number_++, GetRank());\n    return output;\n  }\n\n  void AllReduce(void* in_out, std::size_t size, DataType data_type, Operation operation) override {\n    auto const bytes = size * GetTypeSize(data_type);\n    std::string output;\n    handler_.Allreduce(static_cast<const char*>(in_out), bytes, &output, sequence_number_++,\n                       GetRank(), data_type, operation);\n    output.copy(static_cast<char*>(in_out), bytes);\n  }\n\n  void Broadcast(void* in_out, std::size_t size, int root) override {\n    std::string output;\n    handler_.Broadcast(static_cast<const char*>(in_out), size, &output, sequence_number_++,\n                       GetRank(), root);\n    output.copy(static_cast<char*>(in_out), size);\n  }\n\n  std::string GetProcessorName() override { return \"rank\" + std::to_string(GetRank()); }\n\n  void Print(const std::string& message) override { LOG(CONSOLE) << message; }\n\n protected:\n  void Shutdown() override {}\n\n private:\n  static InMemoryHandler handler_;\n  uint64_t sequence_number_{};\n};\n\n}  // namespace collective\n}  // namespace xgboost\n"
  },
  {
    "path": "src/collective/in_memory_handler.cc",
    "content": "/**\n * Copyright 2022-2023, XGBoost contributors\n */\n#include \"in_memory_handler.h\"\n\n#include <algorithm>\n#include <functional>\n#include <stdexcept>\n\n#include \"comm.h\"\n\nnamespace xgboost::collective {\n/**\n * @brief Functor for allgather.\n */\nclass AllgatherFunctor {\n public:\n  std::string const name{\"Allgather\"};\n\n  AllgatherFunctor(std::int32_t world_size, std::int32_t rank)\n      : world_size_{world_size}, rank_{rank} {}\n\n  void operator()(char const* input, std::size_t bytes, AlignedByteBuffer* buffer) const {\n    if (buffer->Empty()) {\n      // Resize the buffer if this is the first request.\n      buffer->Resize(bytes * world_size_);\n    }\n\n    // Splice the input into the common buffer.\n    buffer->Replace(rank_ * bytes, bytes, input);\n  }\n\n private:\n  std::int32_t world_size_;\n  std::int32_t rank_;\n};\n\n/**\n * @brief Functor for variable-length allgather.\n */\nclass AllgatherVFunctor {\n public:\n  std::string const name{\"AllgatherV\"};\n\n  AllgatherVFunctor(std::int32_t world_size, std::int32_t rank,\n                    std::map<std::size_t, std::string_view>* data)\n      : world_size_{world_size}, rank_{rank}, data_{data} {}\n\n  void operator()(char const* input, std::size_t bytes, AlignedByteBuffer* buffer) const {\n    data_->emplace(rank_, std::string_view{input, bytes});\n    if (data_->size() == static_cast<std::size_t>(world_size_)) {\n      for (auto const& kv : *data_) {\n        buffer->Append(kv.second);\n      }\n      data_->clear();\n    }\n  }\n\n private:\n  std::int32_t world_size_;\n  std::int32_t rank_;\n  std::map<std::size_t, std::string_view>* data_;\n};\n\n/**\n * @brief Functor for allreduce.\n */\nclass AllreduceFunctor {\n public:\n  std::string const name{\"Allreduce\"};\n\n  AllreduceFunctor(ArrayInterfaceHandler::Type dataType, Op operation)\n      : data_type_{dataType}, operation_{operation} {}\n\n  void operator()(char const* input, std::size_t bytes, AlignedByteBuffer* buffer) const {\n    if (buffer->Empty()) {\n      // Copy the input if this is the first request.\n      buffer->Assign(input, bytes);\n    } else {\n      auto n_bytes_type = DispatchDType(data_type_, [](auto t) { return sizeof(t); });\n      CHECK_EQ(bytes % n_bytes_type, 0) << \"Input size is not a multiple of its element size.\";\n      CHECK_EQ(buffer->Size(), bytes) << \"Input size differs across allreduce calls.\";\n      // Apply the reduce_operation to the input and the buffer.\n      Accumulate(input, bytes, buffer);\n    }\n  }\n\n private:\n  template <class T, std::enable_if_t<std::is_integral_v<T>>* = nullptr>\n  void AccumulateBitwise(T* buffer, T const* input, std::size_t size, Op reduce_operation) const {\n    switch (reduce_operation) {\n      case Op::kBitwiseAND:\n        std::transform(buffer, buffer + size, input, buffer, std::bit_and<T>());\n        break;\n      case Op::kBitwiseOR:\n        std::transform(buffer, buffer + size, input, buffer, std::bit_or<T>());\n        break;\n      case Op::kBitwiseXOR:\n        std::transform(buffer, buffer + size, input, buffer, std::bit_xor<T>());\n        break;\n      default:\n        throw std::invalid_argument(\"Invalid reduce operation\");\n    }\n  }\n\n  template <class T, std::enable_if_t<std::is_floating_point_v<T>>* = nullptr>\n  void AccumulateBitwise(T*, T const*, std::size_t, Op) const {\n    LOG(FATAL) << \"Floating point types do not support bitwise operations.\";\n  }\n\n  template <class T>\n  void Accumulate(T* buffer, T const* input, std::size_t size, Op reduce_operation) const {\n    switch (reduce_operation) {\n      case Op::kMax:\n        std::transform(buffer, buffer + size, input, buffer,\n                       [](T a, T b) { return std::max(a, b); });\n        break;\n      case Op::kMin:\n        std::transform(buffer, buffer + size, input, buffer,\n                       [](T a, T b) { return std::min(a, b); });\n        break;\n      case Op::kSum:\n        std::transform(buffer, buffer + size, input, buffer, std::plus<T>());\n        break;\n      case Op::kBitwiseAND:\n      case Op::kBitwiseOR:\n      case Op::kBitwiseXOR:\n        AccumulateBitwise(buffer, input, size, reduce_operation);\n        break;\n      default:\n        throw std::invalid_argument(\"Invalid reduce operation\");\n    }\n  }\n\n  void Accumulate(char const* input, std::size_t bytes, AlignedByteBuffer* buffer) const {\n    using Type = ArrayInterfaceHandler::Type;\n    auto data = buffer->Data();\n    auto size = bytes / DispatchDType(data_type_, [](auto t) { return sizeof(t); });\n    switch (data_type_) {\n      case Type::kI1:\n        Accumulate(reinterpret_cast<std::int8_t*>(data),\n                   reinterpret_cast<std::int8_t const*>(input), size, operation_);\n        break;\n      case Type::kU1:\n        Accumulate(reinterpret_cast<std::uint8_t*>(data),\n                   reinterpret_cast<std::uint8_t const*>(input), size, operation_);\n        break;\n      case Type::kI4:\n        Accumulate(reinterpret_cast<std::int32_t*>(data),\n                   reinterpret_cast<std::int32_t const*>(input), size, operation_);\n        break;\n      case Type::kU4:\n        Accumulate(reinterpret_cast<std::uint32_t*>(data),\n                   reinterpret_cast<std::uint32_t const*>(input), size, operation_);\n        break;\n      case Type::kI8:\n        Accumulate(reinterpret_cast<std::int64_t*>(data),\n                   reinterpret_cast<std::int64_t const*>(input), size, operation_);\n        break;\n      case Type::kU8:\n        Accumulate(reinterpret_cast<std::uint64_t*>(data),\n                   reinterpret_cast<std::uint64_t const*>(input), size, operation_);\n        break;\n      case Type::kF4:\n        Accumulate(reinterpret_cast<float*>(data), reinterpret_cast<float const*>(input), size,\n                   operation_);\n        break;\n      case Type::kF8:\n        Accumulate(reinterpret_cast<double*>(data), reinterpret_cast<double const*>(input), size,\n                   operation_);\n        break;\n      default:\n        throw std::invalid_argument(\"Invalid data type\");\n    }\n  }\n\n private:\n  ArrayInterfaceHandler::Type data_type_;\n  Op operation_;\n};\n\n/**\n * @brief Functor for broadcast.\n */\nclass BroadcastFunctor {\n public:\n  std::string const name{\"Broadcast\"};\n\n  BroadcastFunctor(std::int32_t rank, std::int32_t root) : rank_{rank}, root_{root} {}\n\n  void operator()(char const* input, std::size_t bytes, AlignedByteBuffer* buffer) const {\n    if (rank_ == root_) {\n      // Copy the input if this is the root.\n      buffer->Assign(input, bytes);\n    }\n  }\n\n private:\n  std::int32_t rank_;\n  std::int32_t root_;\n};\n\nvoid InMemoryHandler::Init(std::int32_t world_size, std::int32_t) {\n  CHECK(world_size_ < world_size) << \"In memory handler already initialized.\";\n\n  std::unique_lock<std::mutex> lock(mutex_);\n  world_size_++;\n  cv_.wait(lock, [this, world_size] { return world_size_ == world_size; });\n  lock.unlock();\n  cv_.notify_all();\n}\n\nvoid InMemoryHandler::Shutdown(uint64_t sequence_number, std::int32_t) {\n  CHECK(world_size_ > 0) << \"In memory handler already shutdown.\";\n\n  std::unique_lock<std::mutex> lock(mutex_);\n  cv_.wait(lock, [this, sequence_number] { return sequence_number_ == sequence_number; });\n  received_++;\n  cv_.wait(lock, [this] { return received_ == world_size_; });\n\n  received_ = 0;\n  world_size_ = 0;\n  sequence_number_ = 0;\n  lock.unlock();\n  cv_.notify_all();\n}\n\nvoid InMemoryHandler::Allgather(char const* input, std::size_t bytes, std::string* output,\n                                std::size_t sequence_number, std::int32_t rank) {\n  Handle(input, bytes, output, sequence_number, rank, AllgatherFunctor{world_size_, rank});\n}\n\nvoid InMemoryHandler::AllgatherV(char const* input, std::size_t bytes, std::string* output,\n                                 std::size_t sequence_number, std::int32_t rank) {\n  Handle(input, bytes, output, sequence_number, rank, AllgatherVFunctor{world_size_, rank, &aux_});\n}\n\nvoid InMemoryHandler::Allreduce(char const* input, std::size_t bytes, std::string* output,\n                                std::size_t sequence_number, std::int32_t rank,\n                                ArrayInterfaceHandler::Type data_type, Op op) {\n  Handle(input, bytes, output, sequence_number, rank, AllreduceFunctor{data_type, op});\n}\n\nvoid InMemoryHandler::Broadcast(char const* input, std::size_t bytes, std::string* output,\n                                std::size_t sequence_number, std::int32_t rank, std::int32_t root) {\n  Handle(input, bytes, output, sequence_number, rank, BroadcastFunctor{rank, root});\n}\n\ntemplate <class HandlerFunctor>\nvoid InMemoryHandler::Handle(char const* input, std::size_t bytes, std::string* output,\n                             std::size_t sequence_number, std::int32_t rank,\n                             HandlerFunctor const& functor) {\n  // Pass through if there is only 1 client.\n  if (world_size_ == 1) {\n    output->assign(input, bytes);\n    return;\n  }\n\n  std::unique_lock<std::mutex> lock(mutex_);\n\n  LOG(DEBUG) << functor.name << \" rank \" << rank << \": waiting for current sequence number\";\n  cv_.wait(lock, [this, sequence_number] { return sequence_number_ == sequence_number; });\n\n  LOG(DEBUG) << functor.name << \" rank \" << rank << \": handling request\";\n  functor(input, bytes, &buffer_);\n  received_++;\n\n  if (received_ == world_size_) {\n    LOG(DEBUG) << functor.name << \" rank \" << rank << \": all requests received\";\n    output->assign(buffer_.Data(), buffer_.Size());\n    sent_++;\n    lock.unlock();\n    cv_.notify_all();\n    return;\n  }\n\n  LOG(DEBUG) << functor.name << \" rank \" << rank << \": waiting for all clients\";\n  cv_.wait(lock, [this] { return received_ == world_size_; });\n\n  LOG(DEBUG) << functor.name << \" rank \" << rank << \": sending reply\";\n  output->assign(buffer_.Data(), buffer_.Size());\n  sent_++;\n\n  if (sent_ == world_size_) {\n    LOG(DEBUG) << functor.name << \" rank \" << rank << \": all replies sent\";\n    sent_ = 0;\n    received_ = 0;\n    buffer_.Clear();\n    sequence_number_++;\n    lock.unlock();\n    cv_.notify_all();\n  }\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "src/collective/in_memory_handler.h",
    "content": "/**\n * Copyright 2022-2023, XGBoost contributors\n */\n#pragma once\n#include <condition_variable>\n#include <cstddef>\n#include <cstring>\n#include <map>\n#include <string>\n#include <vector>\n\n#include \"../data/array_interface.h\"\n#include \"comm.h\"\n\nnamespace xgboost::collective {\nclass AlignedByteBuffer {\n  using StorageT = std::max_align_t;\n\n public:\n  [[nodiscard]] bool Empty() const { return size_ == 0; }\n  [[nodiscard]] std::size_t Size() const { return size_; }\n\n  [[nodiscard]] char* Data() { return reinterpret_cast<char*>(storage_.data()); }\n  [[nodiscard]] char const* Data() const { return reinterpret_cast<char const*>(storage_.data()); }\n\n  void Clear() {\n    storage_.clear();\n    size_ = 0;\n  }\n\n  void Resize(std::size_t n_bytes) {\n    storage_.resize((n_bytes + sizeof(StorageT) - 1) / sizeof(StorageT));\n    size_ = n_bytes;\n  }\n\n  void Assign(char const* input, std::size_t n_bytes) {\n    this->Resize(n_bytes);\n    if (n_bytes != 0) {\n      std::memcpy(this->Data(), input, n_bytes);\n    }\n  }\n\n  void Replace(std::size_t pos, std::size_t n_bytes, char const* input) {\n    CHECK_LE(pos + n_bytes, size_);\n    if (n_bytes != 0) {\n      std::memcpy(this->Data() + pos, input, n_bytes);\n    }\n  }\n\n  void Append(std::string_view data) {\n    auto old_size = size_;\n    this->Resize(size_ + data.size());\n    if (!data.empty()) {\n      std::memcpy(this->Data() + old_size, data.data(), data.size());\n    }\n  }\n\n private:\n  std::vector<StorageT> storage_{};\n  std::size_t size_{0};\n};\n\n/**\n * @brief Handles collective communication primitives in memory.\n *\n * This class is thread safe.\n */\nclass InMemoryHandler {\n public:\n  /**\n   * @brief Default constructor.\n   *\n   * This is used when multiple objects/threads are accessing the same handler and need to\n   * initialize it collectively.\n   */\n  InMemoryHandler() = default;\n\n  /**\n   * @brief Construct a handler with the given world size.\n   * @param world Number of workers.\n   *\n   * This is used when the handler only needs to be initialized once with a known world size.\n   */\n  explicit InMemoryHandler(std::int32_t world) : world_size_{world} {}\n\n  /**\n   * @brief Initialize the handler with the world size and rank.\n   * @param world_size Number of workers.\n   * @param rank Index of the worker.\n   *\n   * This is used when multiple objects/threads are accessing the same handler and need to\n   * initialize it collectively.\n   */\n  void Init(std::int32_t world_size, std::int32_t rank);\n\n  /**\n   * @brief Shut down the handler.\n   * @param sequence_number Call sequence number.\n   * @param rank Index of the worker.\n   *\n   * This is used when multiple objects/threads are accessing the same handler and need to\n   * shut it down collectively.\n   */\n  void Shutdown(uint64_t sequence_number, std::int32_t rank);\n\n  /**\n   * @brief Perform allgather.\n   * @param input The input buffer.\n   * @param bytes Number of bytes in the input buffer.\n   * @param output The output buffer.\n   * @param sequence_number Call sequence number.\n   * @param rank Index of the worker.\n   */\n  void Allgather(char const* input, std::size_t bytes, std::string* output,\n                 std::size_t sequence_number, std::int32_t rank);\n\n  /**\n   * @brief Perform variable-length allgather.\n   * @param input The input buffer.\n   * @param bytes Number of bytes in the input buffer.\n   * @param output The output buffer.\n   * @param sequence_number Call sequence number.\n   * @param rank Index of the worker.\n   */\n  void AllgatherV(char const* input, std::size_t bytes, std::string* output,\n                  std::size_t sequence_number, std::int32_t rank);\n\n  /**\n   * @brief Perform allreduce.\n   * @param input The input buffer.\n   * @param bytes Number of bytes in the input buffer.\n   * @param output The output buffer.\n   * @param sequence_number Call sequence number.\n   * @param rank Index of the worker.\n   * @param data_type Type of the data.\n   * @param op The reduce operation.\n   */\n  void Allreduce(char const* input, std::size_t bytes, std::string* output,\n                 std::size_t sequence_number, std::int32_t rank,\n                 ArrayInterfaceHandler::Type data_type, Op op);\n\n  /**\n   * @brief Perform broadcast.\n   * @param input The input buffer.\n   * @param bytes Number of bytes in the input buffer.\n   * @param output The output buffer.\n   * @param sequence_number Call sequence number.\n   * @param rank Index of the worker.\n   * @param root Index of the worker to broadcast from.\n   */\n  void Broadcast(char const* input, std::size_t bytes, std::string* output,\n                 std::size_t sequence_number, std::int32_t rank, std::int32_t root);\n\n private:\n  /**\n   * @brief Handle a collective communication primitive.\n   * @tparam HandlerFunctor The functor used to perform the specific primitive.\n   * @param input The input buffer.\n   * @param size Size of the input in terms of the data type.\n   * @param output The output buffer.\n   * @param sequence_number Call sequence number.\n   * @param rank Index of the worker.\n   * @param functor The functor instance used to perform the specific primitive.\n   */\n  template <class HandlerFunctor>\n  void Handle(char const* input, std::size_t size, std::string* output, std::size_t sequence_number,\n              std::int32_t rank, HandlerFunctor const& functor);\n\n  std::int32_t world_size_{};   /// Number of workers.\n  std::int64_t received_{};     /// Number of calls received with the current sequence.\n  std::int64_t sent_{};         /// Number of calls completed with the current sequence.\n  AlignedByteBuffer buffer_{};  /// A shared common buffer.\n  std::map<std::size_t, std::string_view> aux_{};  /// A shared auxiliary map.\n  uint64_t sequence_number_{};                     /// Call sequence number.\n  mutable std::mutex mutex_;                       /// Lock.\n  mutable std::condition_variable cv_;             /// Conditional variable to wait on.\n};\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "src/collective/loop.cc",
    "content": "/**\n * Copyright 2023-2024, XGBoost Contributors\n */\n#include \"loop.h\"\n\n#include <cstddef>    // for size_t\n#include <cstdint>    // for int32_t\n#include <exception>  // for exception, current_exception, rethrow_exception\n#include <future>     // for promise\n#include <memory>     // for make_shared\n#include <mutex>      // for lock_guard, unique_lock\n#include <queue>      // for queue\n#include <string>     // for string\n#include <thread>     // for thread\n#include <utility>    // for move\n\n#include \"../common/threading_utils.h\"      // for NameThread\n#include \"xgboost/collective/poll_utils.h\"  // for PollHelper\n#include \"xgboost/collective/result.h\"      // for Fail, Success\n#include \"xgboost/collective/socket.h\"      // for FailWithCode\n#include \"xgboost/logging.h\"                // for CHECK\n\nnamespace xgboost::collective {\nResult Loop::ProcessQueue(std::queue<Op>* p_queue) const {\n  timer_.Start(__func__);\n  auto error = [this](Op op) {\n    op.pr->set_value();\n    timer_.Stop(__func__);\n  };\n\n  if (stop_) {\n    timer_.Stop(__func__);\n    return Success();\n  }\n\n  auto& qcopy = *p_queue;\n\n  // clear the copied queue\n  while (!qcopy.empty()) {\n    rabit::utils::PollHelper poll;\n    std::size_t n_ops = qcopy.size();\n\n    // Iterate through all the ops for poll\n    for (std::size_t i = 0; i < n_ops; ++i) {\n      auto op = std::move(qcopy.front());\n      qcopy.pop();\n\n      switch (op.code) {\n        case Op::kRead: {\n          poll.WatchRead(*op.sock);\n          break;\n        }\n        case Op::kWrite: {\n          poll.WatchWrite(*op.sock);\n          break;\n        }\n        case Op::kSleep: {\n          break;\n        }\n        default: {\n          error(op);\n          return Fail(\"Invalid socket operation.\");\n        }\n      }\n\n      qcopy.push(std::move(op));\n    }\n\n    // poll, work on fds that are ready.\n    timer_.Start(\"poll\");\n    if (!poll.fds.empty()) {\n      auto rc = poll.Poll(timeout_);\n      if (!rc.OK()) {\n        timer_.Stop(__func__);\n        return rc;\n      }\n    }\n    timer_.Stop(\"poll\");\n\n    // We wonldn't be here if the queue is empty.\n    CHECK(!qcopy.empty());\n\n    // Iterate through all the ops for performing the operations\n    for (std::size_t i = 0; i < n_ops; ++i) {\n      auto op = std::move(qcopy.front());\n      qcopy.pop();\n\n      std::int32_t n_bytes_done{0};\n      if (!op.sock) {\n        CHECK(op.code == Op::kSleep);\n      } else {\n        CHECK(op.sock->NonBlocking());\n      }\n\n      switch (op.code) {\n        case Op::kRead: {\n          if (poll.CheckRead(*op.sock)) {\n            n_bytes_done = op.sock->Recv(op.ptr + op.off, op.n - op.off);\n            if (n_bytes_done == 0) {\n              error(op);\n              return Fail(\"Encountered EOF. The other end is likely closed.\",\n                          op.sock->GetSockError());\n            }\n          }\n          break;\n        }\n        case Op::kWrite: {\n          if (poll.CheckWrite(*op.sock)) {\n            n_bytes_done = op.sock->Send(op.ptr + op.off, op.n - op.off);\n          }\n          break;\n        }\n        case Op::kSleep: {\n          // For testing only.\n          std::this_thread::sleep_for(std::chrono::seconds{op.n});\n          n_bytes_done = op.n;\n          break;\n        }\n        default: {\n          error(op);\n          return Fail(\"Invalid socket operation.\");\n        }\n      }\n\n      if (n_bytes_done == -1 && !system::LastErrorWouldBlock()) {\n        auto rc = system::FailWithCode(\"Invalid socket output.\");\n        error(op);\n        return rc;\n      }\n\n      op.off += n_bytes_done;\n      CHECK_LE(op.off, op.n);\n\n      if (op.off != op.n) {\n        // not yet finished, push back to queue for the next round.\n        qcopy.push(op);\n      } else {\n        op.pr->set_value();\n      }\n    }\n  }\n\n  timer_.Stop(__func__);\n  return Success();\n}\n\nvoid Loop::Process() {\n  auto set_rc = [this](Result&& rc) {\n    std::lock_guard lock{rc_lock_};\n    rc_ = std::forward<Result>(rc);\n  };\n\n  // This loop cannot exit unless `stop_` is set to true. There must always be a thread to\n  // answer the call even if there are errors.\n  while (true) {\n    try {\n      std::unique_lock lock{mu_};\n      // This can handle missed notification: wait(lock, predicate) is equivalent to:\n      //\n      // while (!predicate()) {\n      //    cv.wait(lock);\n      // }\n      //\n      // As a result, if there's a missed notification, the queue wouldn't be empty, hence\n      // the predicate would be false and the actual wait wouldn't be invoked. Therefore,\n      // the blocking call can never go unanswered.\n      cv_.wait(lock, [this] { return !this->queue_.empty() || stop_; });\n      if (stop_) {\n        break;  // only point where this loop can exit.\n      }\n\n      // Move the global queue into a local variable to unblock it.\n      std::queue<Op> qcopy;\n\n      while (!queue_.empty()) {\n        auto op = std::move(queue_.front());\n        queue_.pop();\n        qcopy.push(op);\n      }\n      lock.unlock();\n\n      // Clear the local queue.\n      auto rc = this->ProcessQueue(&qcopy);\n\n      // Handle error\n      if (!rc.OK()) {\n        set_rc(std::move(rc));\n      } else {\n        std::unique_lock lock{mu_};\n        CHECK(qcopy.empty() || stop_);\n      }\n    } catch (std::exception const& e) {\n      curr_exce_ = std::current_exception();\n      set_rc(Fail(\"Exception inside the event loop:\" + std::string{e.what()}));\n    } catch (...) {\n      curr_exce_ = std::current_exception();\n      set_rc(Fail(\"Unknown exception inside the event loop.\"));\n    }\n  }\n}\n\nResult Loop::Stop() {\n  // Finish all remaining tasks\n  CHECK_EQ(this->Block().OK(), this->rc_.OK());\n\n  // Notify the loop to stop\n  std::unique_lock lock{mu_};\n  stop_ = true;\n  lock.unlock();\n  this->cv_.notify_one();\n\n  if (this->worker_.joinable()) {\n    this->worker_.join();\n  }\n\n  if (curr_exce_) {\n    std::rethrow_exception(curr_exce_);\n  }\n\n  return Success();\n}\n\n[[nodiscard]] Result Loop::Block() {\n  {\n    // Check whether the last op was successful, stop if not.\n    std::lock_guard<std::mutex> guard{rc_lock_};\n    if (!rc_.OK()) {\n      stop_ = true;\n    }\n  }\n  if (!this->worker_.joinable()) {\n    std::lock_guard<std::mutex> guard{rc_lock_};\n    return Fail(\"Worker has stopped.\", std::move(rc_));\n  }\n\n  {\n    std::unique_lock lock{mu_};\n    cv_.notify_one();\n  }\n\n  for (auto& fut : futures_) {\n    if (fut.valid()) {\n      try {\n        fut.get();\n      } catch (std::future_error const&) {\n        // Do nothing. If something went wrong in the worker, we have a std::future_error\n        // due to broken promise. This function will transfer the rc back to the caller.\n      }\n    }\n  }\n  futures_.clear();\n\n  {\n    // Transfer the rc.\n    std::lock_guard<std::mutex> lock{rc_lock_};\n    return std::move(rc_);\n  }\n}\n\nvoid Loop::Submit(Op op) {\n  auto p = std::make_shared<std::promise<void>>();\n  op.pr = std::move(p);\n  futures_.emplace_back(op.pr->get_future());\n  CHECK_NE(op.n, 0);\n\n  std::unique_lock lock{mu_};\n  queue_.push(op);\n}\n\nLoop::Loop(std::chrono::seconds timeout) : timeout_{timeout} {\n  timer_.Init(__func__);\n  worker_ = std::thread{[this] {\n    this->Process();\n  }};\n  common::NameThread(&worker_, \"lw\");\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "src/collective/loop.h",
    "content": "/**\n * Copyright 2023-2024, XGBoost Contributors\n */\n#pragma once\n#include <chrono>              // for seconds\n#include <condition_variable>  // for condition_variable\n#include <cstddef>             // for size_t\n#include <cstdint>             // for int8_t, int32_t\n#include <exception>           // for exception_ptr\n#include <future>              // for future\n#include <memory>              // for shared_ptr\n#include <mutex>               // for mutex\n#include <queue>               // for queue\n#include <thread>              // for thread\n#include <vector>              // for vector\n\n#include \"../common/timer.h\"            // for Monitor\n#include \"xgboost/collective/result.h\"  // for Result\n#include \"xgboost/collective/socket.h\"  // for TCPSocket\n\nnamespace xgboost::collective {\nclass Loop {\n public:\n  struct Op {\n    // kSleep is only for testing\n    enum Code : std::int8_t { kRead = 0, kWrite = 1, kSleep = 3 } code;\n    std::int32_t rank{-1};\n    std::int8_t* ptr{nullptr};\n    std::size_t n{0};\n    TCPSocket* sock{nullptr};\n    std::size_t off{0};\n    std::shared_ptr<std::promise<void>> pr;\n\n    explicit Op(Code c) : code{c} { CHECK(c == kSleep); }\n    Op(Code c, std::int32_t rank, std::int8_t* ptr, std::size_t n, TCPSocket* sock, std::size_t off)\n        : code{c}, rank{rank}, ptr{ptr}, n{n}, sock{sock}, off{off} {}\n    Op(Op const&) = default;\n    Op& operator=(Op const&) = default;\n    Op(Op&&) = default;\n    Op& operator=(Op&&) = default;\n    // For testing purpose only\n    [[nodiscard]] static Op Sleep(std::size_t seconds) {\n      Op op{kSleep};\n      op.n = seconds;\n      return op;\n    }\n  };\n\n private:\n  std::thread worker_;  // thread worker to execute the tasks\n\n  std::condition_variable cv_;  // CV used to notify a new submit call\n\n  std::queue<Op> queue_;  // event queue\n  std::vector<std::future<void>> futures_;\n  std::mutex mu_;  // mutex to protect the queue, cv, and block_done\n\n  std::chrono::seconds timeout_;\n\n  Result rc_;\n  std::mutex rc_lock_;  // lock for transferring error info.\n\n  bool stop_{false};\n  std::exception_ptr curr_exce_{nullptr};\n  common::Monitor mutable timer_;\n\n  Result ProcessQueue(std::queue<Op>* p_queue) const;\n  // The cunsumer function that runs inside a worker thread.\n  void Process();\n\n public:\n  /**\n   * @brief Stop the worker thread.\n   */\n  Result Stop();\n\n  void Submit(Op op);\n\n  /**\n   * @brief Block the event loop until all ops are finished. In the case of failure, this\n   *        loop should be not be used for new operations.\n   */\n  [[nodiscard]] Result Block();\n\n  explicit Loop(std::chrono::seconds timeout);\n\n  ~Loop() noexcept(false) {\n    // The worker will be joined in the stop function.\n    this->Stop();\n  }\n};\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "src/collective/nccl_stub.cc",
    "content": "/**\n * Copyright 2023-2024, XGBoost Contributors\n */\n#if defined(XGBOOST_USE_NCCL)\n#include \"nccl_stub.h\"\n\n#if defined(XGBOOST_USE_DLOPEN_NCCL)\n\n#include <dlfcn.h>  // for dlclose, dlsym, dlopen\n\n#include <cstdint>  // for int32_t\n\n#include \"xgboost/logging.h\"\n\n#endif  // defined(XGBOOST_USE_DLOPEN_NCCL)\n\n#include <cuda.h>              // for CUDA_VERSION\n#include <cuda_runtime_api.h>  // for cudaPeekAtLastError\n#include <nccl.h>\n#include <thrust/system/cuda/error.h>  // for cuda_category\n#include <thrust/system_error.h>       // for system_error\n\n#include <memory>   // for shared_ptr\n#include <sstream>  // for stringstream\n#include <string>   // for string\n#include <thread>   // for this_thread\n#include <utility>  // for move\n\n#include \"../common/error_msg.h\"  // for OldNccl\n#include \"../common/timer.h\"      // for Timer\n\nnamespace xgboost::collective {\n[[nodiscard]] Result NcclStub::GetNcclResult(ncclResult_t code) const {\n  if (code == ncclSuccess || code == ncclInProgress) {\n    return Success();\n  }\n\n  std::stringstream ss;\n  ss << \"NCCL failure: \" << this->GetErrorString(code) << \".\";\n  if (code == ncclUnhandledCudaError) {\n    // nccl usually preserves the last error so we can get more details.\n    auto err = cudaPeekAtLastError();\n    ss << \"  CUDA error: \" << thrust::system_error(err, thrust::cuda_category()).what() << \"\\n\";\n  } else if (code == ncclSystemError) {\n    ss << \"  This might be caused by a network configuration issue. Please consider specifying \"\n          \"the network interface for NCCL via environment variables listed in its reference: \"\n          \"`https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html`.\\n\";\n  }\n  return Fail(ss.str());\n}\n\nNcclStub::NcclStub(StringView path) : path_{std::move(path)} {\n#if defined(XGBOOST_USE_DLOPEN_NCCL)\n  CHECK(!path_.empty()) << \"Empty path for NCCL.\";\n\n  auto cu_major = (CUDA_VERSION) / 1000;\n  std::stringstream ss;\n  ss << R\"m(\n\nIf XGBoost is installed from PyPI with pip, the error can fixed by:\n\n- Run `pip install nvidia-nccl-cu)m\"\n     << cu_major << \"` (Or with any CUDA version that's compatible with \" << cu_major << \").\";\n  ss << R\"m(\n\nOtherwise, please refer to:\n\n  https://xgboost.readthedocs.io/en/stable/tutorials/dask.html#troubleshooting\n\nfor more info, or open an issue on GitHub. Starting from XGBoost 2.1.0, the PyPI package\nno long bundles NCCL in the binary wheel.\n\n)m\";\n  auto help = ss.str();\n  std::string msg{\"Failed to load NCCL from path: `\" + path_ + \"`. Error:\\n  \"};\n\n  auto safe_load = [&](auto t, StringView name) {\n    std::stringstream errs;\n    auto ptr = reinterpret_cast<decltype(t)>(dlsym(handle_, name.c_str()));\n    if (!ptr) {\n      errs << \"Failed to load NCCL symbol `\" << name << \"` from \" << path_ << \". Error:\\n  \"\n           << dlerror() << help;\n      LOG(FATAL) << errs.str();\n    }\n    return ptr;\n  };\n\n  handle_ = dlopen(path_.c_str(), RTLD_LAZY);\n  if (!handle_) {\n    LOG(FATAL) << msg << dlerror() << help;\n  }\n\n  allreduce_ = safe_load(allreduce_, \"ncclAllReduce\");\n  broadcast_ = safe_load(broadcast_, \"ncclBroadcast\");\n  allgather_ = safe_load(allgather_, \"ncclAllGather\");\n  comm_init_rank_ = safe_load(comm_init_rank_, \"ncclCommInitRank\");\n  comm_init_rank_config_ = safe_load(comm_init_rank_config_, \"ncclCommInitRankConfig\");\n  comm_destroy_ = safe_load(comm_destroy_, \"ncclCommDestroy\");\n  comm_finalize_ = safe_load(comm_finalize_, \"ncclCommFinalize\");\n  comm_get_async_error_ = safe_load(comm_get_async_error_, \"ncclCommGetAsyncError\");\n  comm_abort_ = safe_load(comm_abort_, \"ncclCommAbort\");\n  get_uniqueid_ = safe_load(get_uniqueid_, \"ncclGetUniqueId\");\n  send_ = safe_load(send_, \"ncclSend\");\n  recv_ = safe_load(recv_, \"ncclRecv\");\n  group_start_ = safe_load(group_start_, \"ncclGroupStart\");\n  group_end_ = safe_load(group_end_, \"ncclGroupEnd\");\n  get_error_string_ = safe_load(get_error_string_, \"ncclGetErrorString\");\n  get_version_ = safe_load(get_version_, \"ncclGetVersion\");\n#else\n  allreduce_ = ncclAllReduce;\n  broadcast_ = ncclBroadcast;\n  allgather_ = ncclAllGather;\n  comm_init_rank_ = ncclCommInitRank;\n  comm_init_rank_config_ = ncclCommInitRankConfig;\n  comm_destroy_ = ncclCommDestroy;\n  comm_finalize_ = ncclCommFinalize;\n  comm_get_async_error_ = ncclCommGetAsyncError;\n  comm_abort_ = ncclCommAbort;\n  get_uniqueid_ = ncclGetUniqueId;\n  send_ = ncclSend;\n  recv_ = ncclRecv;\n  group_start_ = ncclGroupStart;\n  group_end_ = ncclGroupEnd;\n  get_error_string_ = ncclGetErrorString;\n  get_version_ = ncclGetVersion;\n#endif\n\n  std::int32_t major = 0, minor = 0, patch = 0;\n  SafeColl(this->GetVersion(&major, &minor, &patch));\n  LOG(INFO) << \"Loaded shared NCCL \" << major << \".\" << minor << \".\" << patch << \":`\" << path_\n            << \"`\" << std::endl;\n\n  error::CheckOldNccl(major, minor, patch);\n};\n\nNcclStub::~NcclStub() {  // NOLINT\n#if defined(XGBOOST_USE_DLOPEN_NCCL)\n  if (handle_) {\n    auto rc = dlclose(handle_);\n    if (rc != 0) {\n      LOG(WARNING) << \"Failed to close NCCL handle:\" << dlerror();\n    }\n  }\n  handle_ = nullptr;\n#endif  // defined(XGBOOST_USE_DLOPEN_NCCL)\n}\n\n[[nodiscard]] Result BusyWait(std::shared_ptr<NcclStub> nccl, ncclComm_t comm,\n                              std::chrono::seconds timeout) {\n  using namespace std::chrono_literals;  // NOLINT\n  common::Timer timer;\n  ncclResult_t async_error = ncclSuccess;\n  timer.Start();\n  do {\n    auto rc = nccl->CommGetAsyncError(comm, &async_error);\n    if (!rc.OK()) {\n      return rc;\n    }\n    if (async_error == ncclInProgress) {\n      if (timer.Duration().count() < timeout.count()) {\n        std::this_thread::sleep_for(20ms);\n      } else {\n        return Fail(\"Timeout, elapsed:\" + std::to_string(timer.Duration().count()));\n      }\n    }\n  } while (async_error == ncclInProgress);\n\n  return nccl->GetNcclResult(async_error);\n}\n}  // namespace xgboost::collective\n#endif  // defined(XGBOOST_USE_NCCL)\n"
  },
  {
    "path": "src/collective/nccl_stub.h",
    "content": "/**\n * Copyright 2023-2024, XGBoost Contributors\n */\n#pragma once\n#if defined(XGBOOST_USE_NCCL)\n#include <cuda_runtime_api.h>\n#include <nccl.h>\n\n#include <atomic>  // for atomic\n#include <memory>  // for shared_ptr\n#include <string>  // for string\n\n#include \"xgboost/collective/result.h\"  // for Result\n#include \"xgboost/string_view.h\"        // for StringView\n\nnamespace xgboost::collective {\n/**\n * @brief A stub for NCCL to facilitate dynamic loading.\n */\nclass NcclStub {\n#if defined(XGBOOST_USE_DLOPEN_NCCL)\n  void* handle_{nullptr};\n#endif  // defined(XGBOOST_USE_DLOPEN_NCCL)\n  std::string path_;\n  std::atomic<bool> aborted_{false};\n\n  decltype(ncclAllReduce)* allreduce_{nullptr};\n  decltype(ncclBroadcast)* broadcast_{nullptr};\n  decltype(ncclAllGather)* allgather_{nullptr};\n  decltype(ncclCommInitRank)* comm_init_rank_{nullptr};\n  decltype(ncclCommInitRankConfig)* comm_init_rank_config_{nullptr};\n  decltype(ncclCommDestroy)* comm_destroy_{nullptr};\n  decltype(ncclCommFinalize)* comm_finalize_{nullptr};\n  decltype(ncclCommGetAsyncError)* comm_get_async_error_{nullptr};\n  decltype(ncclCommAbort)* comm_abort_{nullptr};\n  decltype(ncclGetUniqueId)* get_uniqueid_{nullptr};\n  decltype(ncclSend)* send_{nullptr};\n  decltype(ncclRecv)* recv_{nullptr};\n  decltype(ncclGroupStart)* group_start_{nullptr};\n  decltype(ncclGroupEnd)* group_end_{nullptr};\n  decltype(ncclGetErrorString)* get_error_string_{nullptr};\n  decltype(ncclGetVersion)* get_version_{nullptr};\n\n public:\n  [[nodiscard]] Result GetNcclResult(ncclResult_t code) const;\n\n public:\n  explicit NcclStub(StringView path);\n  ~NcclStub();\n\n  [[nodiscard]] Result Allreduce(const void* sendbuff, void* recvbuff, size_t count,\n                                 ncclDataType_t datatype, ncclRedOp_t op, ncclComm_t comm,\n                                 cudaStream_t stream) const {\n    return this->GetNcclResult(allreduce_(sendbuff, recvbuff, count, datatype, op, comm, stream));\n  }\n  [[nodiscard]] Result Broadcast(const void* sendbuff, void* recvbuff, size_t count,\n                                 ncclDataType_t datatype, int root, ncclComm_t comm,\n                                 cudaStream_t stream) const {\n    return this->GetNcclResult(broadcast_(sendbuff, recvbuff, count, datatype, root, comm, stream));\n  }\n  [[nodiscard]] Result Allgather(const void* sendbuff, void* recvbuff, size_t sendcount,\n                                 ncclDataType_t datatype, ncclComm_t comm,\n                                 cudaStream_t stream) const {\n    return this->GetNcclResult(allgather_(sendbuff, recvbuff, sendcount, datatype, comm, stream));\n  }\n  [[nodiscard]] Result CommInitRank(ncclComm_t* comm, int nranks, ncclUniqueId commId,\n                                    int rank) const {\n    return this->GetNcclResult(this->comm_init_rank_(comm, nranks, commId, rank));\n  }\n  [[nodiscard]] Result CommInitRankConfig(ncclComm_t* comm, int nranks, ncclUniqueId commId,\n                                          int rank, ncclConfig_t* config) const {\n    return this->GetNcclResult(this->comm_init_rank_config_(comm, nranks, commId, rank, config));\n  }\n  [[nodiscard]] Result CommDestroy(ncclComm_t comm) const {\n    if (this->Aborted()) {\n      return Success();\n    }\n    return this->GetNcclResult(comm_destroy_(comm));\n  }\n  [[nodiscard]] Result CommFinalize(ncclComm_t comm) const {\n    if (this->Aborted()) {\n      return Success();\n    }\n    return this->GetNcclResult(comm_finalize_(comm));\n  }\n  [[nodiscard]] bool Aborted() const { return this->aborted_; }\n\n  [[nodiscard]] Result CommGetAsyncError(ncclComm_t comm, ncclResult_t* async_error) const {\n    if (this->Aborted()) {\n      *async_error = ncclSuccess;\n      return Success();\n    }\n    return this->GetNcclResult(comm_get_async_error_(comm, async_error));\n  }\n  [[nodiscard]] Result CommAbort(ncclComm_t comm) {\n    if (this->Aborted()) {\n      return Success();\n    }\n    this->aborted_ = true;\n    return this->GetNcclResult(comm_abort_(comm));\n  }\n  [[nodiscard]] Result GetUniqueId(ncclUniqueId* uniqueId) const {\n    return this->GetNcclResult(get_uniqueid_(uniqueId));\n  }\n  [[nodiscard]] Result Send(const void* sendbuff, size_t count, ncclDataType_t datatype, int peer,\n                            ncclComm_t comm, cudaStream_t stream) {\n    return this->GetNcclResult(send_(sendbuff, count, datatype, peer, comm, stream));\n  }\n  [[nodiscard]] Result Recv(void* recvbuff, size_t count, ncclDataType_t datatype, int peer,\n                            ncclComm_t comm, cudaStream_t stream) const {\n    return this->GetNcclResult(recv_(recvbuff, count, datatype, peer, comm, stream));\n  }\n  [[nodiscard]] Result GroupStart() const { return this->GetNcclResult(group_start_()); }\n  [[nodiscard]] Result GroupEnd() const { return this->GetNcclResult(group_end_()); }\n  [[nodiscard]] const char* GetErrorString(ncclResult_t result) const {\n    return get_error_string_(result);\n  }\n  [[nodiscard]] Result GetVersion(std::int32_t* major, std::int32_t* minor,\n                                  std::int32_t* patch) const {\n    std::int32_t v = 0;\n    auto rc = this->GetNcclResult(get_version_(&v));\n    if (!rc.OK()) {\n      return rc;\n    }\n\n    if (major) {\n      *major = v / 10000;\n    }\n    if (minor) {\n      *minor = v % 10000 / 100;\n    }\n    if (patch) {\n      *patch = v % 100;\n    }\n    return rc;\n  }\n};\n\n[[nodiscard]] Result BusyWait(std::shared_ptr<NcclStub> nccl, ncclComm_t comm,\n                              std::chrono::seconds timeout);\n}  // namespace xgboost::collective\n\n#endif  // defined(XGBOOST_USE_NCCL)\n"
  },
  {
    "path": "src/collective/protocol.h",
    "content": "/**\n * Copyright 2023-2024, XGBoost Contributors\n */\n#pragma once\n#include <cstdint>  // for int32_t\n#include <string>   // for string\n#include <utility>  // for move\n\n#include \"xgboost/collective/result.h\"  // for Result\n#include \"xgboost/collective/socket.h\"  // for TCPSocket\n#include \"xgboost/json.h\"               // for Json\n\nnamespace xgboost::collective::proto {\nstruct PeerInfo {\n  std::string host;\n  std::int32_t port{-1};\n  std::int32_t rank{-1};\n\n  PeerInfo() = default;\n  PeerInfo(std::string host, std::int32_t port, std::int32_t rank)\n      : host{std::move(host)}, port{port}, rank{rank} {}\n\n  explicit PeerInfo(Json const& peer)\n      : host{get<String>(peer[\"host\"])},\n        port{static_cast<std::int32_t>(get<Integer const>(peer[\"port\"]))},\n        rank{static_cast<std::int32_t>(get<Integer const>(peer[\"rank\"]))} {}\n\n  [[nodiscard]] Json ToJson() const {\n    Json info{Object{}};\n    info[\"rank\"] = rank;\n    info[\"host\"] = String{host};\n    info[\"port\"] = Integer{port};\n    return info;\n  }\n\n  [[nodiscard]] auto HostPort() const { return host + \":\" + std::to_string(this->port); }\n};\n\nstruct Magic {\n  static constexpr std::int32_t kMagic = 0xff99;\n\n  [[nodiscard]] Result Verify(xgboost::collective::TCPSocket* p_sock) {\n    std::int32_t magic{kMagic};\n    std::size_t n_sent{0};\n    return Success() << [&] {\n      return p_sock->SendAll(&magic, sizeof(magic), &n_sent);\n    } << [&] {\n      if (n_sent != sizeof(magic)) {\n        return Fail(\"Failed to verify.\");\n      }\n      return Success();\n    } << [&] {\n      magic = 0;\n      return p_sock->RecvAll(&magic, sizeof(magic), &n_sent);\n    } << [&] {\n      if (n_sent != sizeof(magic)) {\n        return Fail(\"Failed to verify.\");\n      }\n      if (magic != kMagic) {\n        return xgboost::collective::Fail(\"Invalid verification number.\");\n      }\n      return Success();\n    };\n  }\n};\n\n// Basic commands for communication between workers and the tracker.\nenum class CMD : std::int32_t {\n  kInvalid = 0,\n  kStart = 1,\n  kShutdown = 2,\n  kError = 3,\n  kPrint = 4,\n};\n\nstruct Connect {\n  [[nodiscard]] Result WorkerSend(TCPSocket* tracker, std::int32_t world, std::int32_t rank,\n                                  std::string task_id) const {\n    Json jinit{Object{}};\n    jinit[\"world_size\"] = Integer{world};\n    jinit[\"rank\"] = Integer{rank};\n    jinit[\"task_id\"] = String{task_id};\n    std::string msg;\n    Json::Dump(jinit, &msg);\n    auto n_bytes = tracker->Send(msg);\n    if (n_bytes != msg.size()) {\n      return Fail(\"Failed to send init command from worker.\");\n    }\n    return Success();\n  }\n  [[nodiscard]] Result TrackerRecv(TCPSocket* sock, std::int32_t* world, std::int32_t* rank,\n                                   std::string* task_id) const {\n    std::string init;\n    auto rc = sock->Recv(&init);\n    if (!rc.OK()) {\n      return Fail(\"Connect protocol failed.\", std::move(rc));\n    }\n    auto jinit = Json::Load(StringView{init});\n    *world = get<Integer const>(jinit[\"world_size\"]);\n    *rank = get<Integer const>(jinit[\"rank\"]);\n    *task_id = get<String const>(jinit[\"task_id\"]);\n    return Success();\n  }\n};\n\nclass Start {\n private:\n  [[nodiscard]] Result TrackerSend(std::int32_t world, TCPSocket* worker) const {\n    Json jcmd{Object{}};\n    jcmd[\"world_size\"] = Integer{world};\n    auto scmd = Json::Dump(jcmd);\n    auto n_bytes = worker->Send(scmd);\n    if (n_bytes != scmd.size()) {\n      return Fail(\"Failed to send init command from tracker.\");\n    }\n    return Success();\n  }\n\n public:\n  [[nodiscard]] Result WorkerSend(std::int32_t lport, TCPSocket* tracker,\n                                  std::int32_t eport) const {\n    Json jcmd{Object{}};\n    jcmd[\"cmd\"] = Integer{static_cast<std::int32_t>(CMD::kStart)};\n    jcmd[\"port\"] = Integer{lport};\n    jcmd[\"error_port\"] = Integer{eport};\n    auto scmd = Json::Dump(jcmd);\n    auto n_bytes = tracker->Send(scmd);\n    if (n_bytes != scmd.size()) {\n      return Fail(\"Failed to send init command from worker.\");\n    }\n    return Success();\n  }\n  [[nodiscard]] Result WorkerRecv(TCPSocket* tracker, std::int32_t* p_world) const {\n    std::string scmd;\n    auto rc = tracker->Recv(&scmd);\n    if (!rc.OK()) {\n      return Fail(\"Failed to recv init command from tracker.\", std::move(rc));\n    }\n    auto jcmd = Json::Load(scmd);\n    auto world = get<Integer const>(jcmd[\"world_size\"]);\n    if (world <= 0) {\n      return Fail(\"Invalid world size.\");\n    }\n    *p_world = world;\n    return rc;\n  }\n  [[nodiscard]] Result TrackerHandle(Json jcmd, std::int32_t* recv_world, std::int32_t world,\n                                     std::int32_t* p_port, TCPSocket* p_sock,\n                                     std::int32_t* eport) const {\n    *p_port = get<Integer const>(jcmd[\"port\"]);\n    if (*p_port <= 0) {\n      return Fail(\"Invalid port.\");\n    }\n    if (*recv_world != -1) {\n      return Fail(\"Invalid initialization sequence.\");\n    }\n    *recv_world = world;\n    *eport = get<Integer const>(jcmd[\"error_port\"]);\n    return TrackerSend(world, p_sock);\n  }\n};\n\n// Protocol for communicating with the tracker for printing message.\nstruct Print {\n  [[nodiscard]] Result WorkerSend(TCPSocket* tracker, std::string msg) const {\n    Json jcmd{Object{}};\n    jcmd[\"cmd\"] = Integer{static_cast<std::int32_t>(CMD::kPrint)};\n    jcmd[\"msg\"] = String{std::move(msg)};\n    auto scmd = Json::Dump(jcmd);\n    auto n_bytes = tracker->Send(scmd);\n    if (n_bytes != scmd.size()) {\n      return Fail(\"Failed to send print command from worker.\");\n    }\n    return Success();\n  }\n  [[nodiscard]] Result TrackerHandle(Json jcmd, std::string* p_msg) const {\n    if (!IsA<String>(jcmd[\"msg\"])) {\n      return Fail(\"Invalid print command.\");\n    }\n    auto msg = get<String const>(jcmd[\"msg\"]);\n    *p_msg = msg;\n    return Success();\n  }\n};\n\n// Protocol for communicating with the tracker during error.\nstruct ErrorCMD {\n  [[nodiscard]] Result WorkerSend(TCPSocket* tracker, Result const& res) const {\n    auto msg = res.Report();\n    auto code = res.Code().value();\n    Json jcmd{Object{}};\n    jcmd[\"msg\"] = String{std::move(msg)};\n    jcmd[\"code\"] = Integer{code};\n    jcmd[\"cmd\"] = Integer{static_cast<std::int32_t>(CMD::kError)};\n    auto scmd = Json::Dump(jcmd);\n    auto n_bytes = tracker->Send(scmd);\n    if (n_bytes != scmd.size()) {\n      return Fail(\"Failed to send error command from worker.\");\n    }\n    return Success();\n  }\n  [[nodiscard]] Result TrackerHandle(Json jcmd, std::string* p_msg, int* p_code) const {\n    if (!IsA<String>(jcmd[\"msg\"]) || !IsA<Integer>(jcmd[\"code\"])) {\n      return Fail(\"Invalid error command.\");\n    }\n    auto msg = get<String const>(jcmd[\"msg\"]);\n    auto code = get<Integer const>(jcmd[\"code\"]);\n    *p_msg = msg;\n    *p_code = code;\n    return Success();\n  }\n};\n\n// Protocol for communicating with the tracker during shutdown.\nstruct ShutdownCMD {\n  [[nodiscard]] Result Send(TCPSocket* peer) const {\n    Json jcmd{Object{}};\n    jcmd[\"cmd\"] = Integer{static_cast<std::int32_t>(proto::CMD::kShutdown)};\n    auto scmd = Json::Dump(jcmd);\n    auto n_bytes = peer->Send(scmd);\n    if (n_bytes != scmd.size()) {\n      return Fail(\"Failed to send shutdown command from worker.\");\n    }\n    return Success();\n  }\n};\n\n// Protocol for communicating with the local error handler during error or shutdown. Only\n// one protocol that doesn't have the tracker involved.\nstruct Error {\n  constexpr static std::int32_t ShutdownSignal() { return 0; }\n  constexpr static std::int32_t ErrorSignal() { return -1; }\n\n  [[nodiscard]] Result SignalError(TCPSocket* worker) const {\n    std::int32_t err{ErrorSignal()};\n    std::size_t n_sent{0};\n    return Success() << [&] {\n      return worker->SendAll(&err, sizeof(err), &n_sent);\n    } << [&] {\n      if (n_sent == sizeof(err)) {\n        return Success();\n      }\n      return Fail(\"Failed to send error signal\");\n    };\n  }\n  // self is localhost, we are sending the signal to the error handling thread for it to\n  // close.\n  [[nodiscard]] Result SignalShutdown(TCPSocket* self) const {\n    std::int32_t err{ShutdownSignal()};\n    std::size_t n_sent{0};\n    return Success() << [&] {\n      return self->SendAll(&err, sizeof(err), &n_sent);\n    } << [&] {\n      if (n_sent == sizeof(err)) {\n        return Success();\n      }\n      return Fail(\"Failed to send shutdown signal\");\n    };\n  }\n  // get signal, either for error or for shutdown.\n  [[nodiscard]] Result RecvSignal(TCPSocket* peer, bool* p_is_error) const {\n    std::int32_t err{ShutdownSignal()};\n    std::size_t n_recv{0};\n    return Success() << [&] {\n      return peer->RecvAll(&err, sizeof(err), &n_recv);\n    } << [&] {\n      if (n_recv == sizeof(err)) {\n        *p_is_error = err == 1;\n        return Success();\n      }\n      return Fail(\"Failed to receive error signal.\");\n    };\n  }\n};\n}  // namespace xgboost::collective::proto\n"
  },
  {
    "path": "src/collective/result.cc",
    "content": "/**\n *  Copyright 2024-2025, XGBoost Contributors\n */\n#include \"xgboost/collective/result.h\"\n\n#include <filesystem>  // for path\n#include <sstream>     // for stringstream\n#include <stack>       // for stack\n\n#include \"xgboost/logging.h\"\n\nnamespace xgboost::collective {\nnamespace detail {\n[[nodiscard]] std::string ResultImpl::Report() const {\n  std::stringstream ss;\n  ss << \"\\n- \" << this->message;\n  if (this->errc != std::error_code{}) {\n    ss << \" system error:\" << this->errc.message();\n  }\n\n  auto ptr = prev.get();\n  while (ptr) {\n    ss << \"\\n- \";\n    ss << ptr->message;\n\n    if (ptr->errc != std::error_code{}) {\n      ss << \" \" << ptr->errc.message();\n    }\n    ptr = ptr->prev.get();\n  }\n\n  return ss.str();\n}\n\n[[nodiscard]] std::error_code ResultImpl::Code() const {\n  // Find the root error.\n  std::stack<ResultImpl const*> stack;\n  auto ptr = this;\n  while (ptr) {\n    stack.push(ptr);\n    if (ptr->prev) {\n      ptr = ptr->prev.get();\n    } else {\n      break;\n    }\n  }\n  while (!stack.empty()) {\n    auto frame = stack.top();\n    stack.pop();\n    if (frame->errc != std::error_code{}) {\n      return frame->errc;\n    }\n  }\n  return std::error_code{};\n}\n\nvoid ResultImpl::Concat(std::unique_ptr<ResultImpl> rhs) {\n  auto ptr = this;\n  while (ptr->prev) {\n    ptr = ptr->prev.get();\n  }\n  ptr->prev = std::move(rhs);\n}\n\nstd::string MakeMsg(std::string&& msg, char const* file, std::int32_t line) {\n  dmlc::DateLogger logger;\n  if (file && line != -1) {\n    auto name = std::filesystem::path{file}.filename();\n    return \"[\" + name.string() + \":\" + std::to_string(line) + \"|\" + logger.HumanDate() +\n           \"]: \" + std::forward<std::string>(msg);\n  }\n  return std::string{\"[\"} + logger.HumanDate() + \"]\" + std::forward<std::string>(msg);  // NOLINT\n}\n}  // namespace detail\n\nvoid SafeColl(Result const& rc, char const* file, std::int32_t line) {\n  if (rc.OK()) {\n    return;\n  }\n  if (file && line != -1) {\n    dmlc::DateLogger logger;\n    auto name = std::filesystem::path{file}.filename();\n    LOG(FATAL) << (\"[\" + name.string() + \":\" + std::to_string(line) + \"|\" + logger.HumanDate() +\n                   \"]:\\n\")\n               << rc.Report();\n    // Return just in case if this function is deep in ctypes callbacks.\n    return;\n  }\n  LOG(FATAL) << rc.Report();\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "src/collective/socket.cc",
    "content": "/**\n * Copyright 2022-2025, XGBoost Contributors\n */\n#include \"xgboost/collective/socket.h\"\n\n#include <algorithm>     // for max\n#include <array>         // for array\n#include <cstddef>       // for size_t\n#include <cstdint>       // for int32_t\n#include <cstring>       // for memcpy, memset\n#include <system_error>  // for error_code, system_category\n#include <thread>        // for sleep_for\n\n#include \"xgboost/collective/poll_utils.h\"  // for PollHelper\n#include \"xgboost/collective/result.h\"      // for Result\n\n#if defined(__unix__) || defined(__APPLE__)\n#include <netdb.h>  // getaddrinfo, freeaddrinfo\n#endif              // defined(__unix__) || defined(__APPLE__)\n\nnamespace xgboost::collective {\nSockAddress MakeSockAddress(StringView host, in_port_t port) {\n  struct addrinfo hints;\n  std::memset(&hints, 0, sizeof(hints));\n  hints.ai_socktype = SOCK_STREAM;\n  struct addrinfo *res = nullptr;\n  int sig = getaddrinfo(host.c_str(), nullptr, &hints, &res);\n  if (sig != 0) {\n    LOG(FATAL) << \"Failed to get addr info for: \" << host << \", error: \" << gai_strerror(sig);\n    return {};\n  }\n  if (res->ai_family == static_cast<std::int32_t>(SockDomain::kV4)) {\n    sockaddr_in addr;\n    std::memcpy(&addr, res->ai_addr, res->ai_addrlen);\n    addr.sin_port = htons(port);\n    auto v = SockAddrV4{addr};\n    freeaddrinfo(res);\n    return SockAddress{v};\n  } else if (res->ai_family == static_cast<std::int32_t>(SockDomain::kV6)) {\n    sockaddr_in6 addr;\n    std::memcpy(&addr, res->ai_addr, res->ai_addrlen);\n\n    addr.sin6_port = htons(port);\n    auto v = SockAddrV6{addr};\n    freeaddrinfo(res);\n    return SockAddress{v};\n  } else {\n    LOG(FATAL) << \"Failed to get addr info for: \" << host;\n  }\n\n  return SockAddress{};\n}\n\nSockAddrV4 SockAddrV4::Loopback() { return MakeSockAddress(\"127.0.0.1\", 0).V4(); }\nSockAddrV4 SockAddrV4::InaddrAny() { return MakeSockAddress(\"0.0.0.0\", 0).V4(); }\n\nSockAddrV6 SockAddrV6::Loopback() { return MakeSockAddress(\"::1\", 0).V6(); }\nSockAddrV6 SockAddrV6::InaddrAny() { return MakeSockAddress(\"::\", 0).V6(); }\n\n[[nodiscard]] Result TCPSocket::Listen(std::int32_t backlog) {\n  backlog = std::max(backlog, 256);\n  if (listen(this->handle_, backlog) != 0) {\n    return system::FailWithCode(\"Failed to listen.\");\n  }\n  return Success();\n}\n\nstd::size_t TCPSocket::Send(StringView str) {\n  CHECK(!this->IsClosed());\n  CHECK_LT(str.size(), std::numeric_limits<std::int32_t>::max());\n  std::int32_t len = static_cast<std::int32_t>(str.size());\n  std::size_t n_bytes{0};\n  auto rc = Success() << [&] {\n    return this->SendAll(&len, sizeof(len), &n_bytes);\n  } << [&] {\n    if (n_bytes != sizeof(len)) {\n      return Fail(\"Failed to send string length.\");\n    }\n    return Success();\n  } << [&] {\n    return this->SendAll(str.c_str(), str.size(), &n_bytes);\n  } << [&] {\n    if (n_bytes != str.size()) {\n      return Fail(\"Failed to send string.\");\n    }\n    return Success();\n  };\n  SafeColl(rc);\n  return n_bytes;\n}\n\n[[nodiscard]] Result TCPSocket::Recv(std::string *p_str) {\n  CHECK(!this->IsClosed());\n  std::int32_t len;\n  std::size_t n_bytes{0};\n  return Success() << [&] {\n    return this->RecvAll(&len, sizeof(len), &n_bytes);\n  } << [&] {\n    if (n_bytes != sizeof(len)) {\n      return Fail(\"Failed to recv string length.\");\n    }\n    return Success();\n  } << [&] {\n    p_str->resize(len);\n    return this->RecvAll(&(*p_str)[0], len, &n_bytes);\n  } << [&] {\n    if (static_cast<std::remove_reference_t<decltype(len)>>(n_bytes) != len) {\n      return Fail(\"Failed to recv string.\");\n    }\n    return Success();\n  };\n}\n\n[[nodiscard]] Result Connect(xgboost::StringView host, std::int32_t port, std::int32_t retry,\n                             std::chrono::seconds timeout,\n                             xgboost::collective::TCPSocket *out_conn) {\n  auto addr = MakeSockAddress(xgboost::StringView{host}, port);\n  auto &conn = *out_conn;\n\n  sockaddr const *addr_handle{nullptr};\n  socklen_t addr_len{0};\n  if (addr.IsV4()) {\n    addr_handle = reinterpret_cast<const sockaddr *>(&addr.V4().Handle());\n    addr_len = sizeof(addr.V4().Handle());\n  } else {\n    addr_handle = reinterpret_cast<const sockaddr *>(&addr.V6().Handle());\n    addr_len = sizeof(addr.V6().Handle());\n  }\n\n  if (conn.IsClosed()) {\n    conn = TCPSocket::Create(addr.Domain());\n  }\n  CHECK_EQ(static_cast<std::int32_t>(conn.Domain()), static_cast<std::int32_t>(addr.Domain()));\n  auto non_blocking = conn.NonBlocking();\n  auto rc = conn.NonBlocking(true);\n  if (!rc.OK()) {\n    return Fail(\"Failed to set socket option.\", std::move(rc));\n  }\n\n  Result last_error;\n  auto log_failure = [&host, &last_error, port](Result err) {\n    last_error = std::move(err);\n    LOG(WARNING) << \"Failed to connect to:\" << host << \":\" << port\n                 << \" Error:\" << last_error.Report();\n  };\n\n  for (std::int32_t attempt = 0; attempt < std::max(retry, 1); ++attempt) {\n    if (attempt > 0) {\n      LOG(WARNING) << \"Retrying connection to \" << host << \" for the \" << attempt << \" time.\";\n      std::this_thread::sleep_for(std::chrono::seconds{attempt << 1});\n    }\n\n    auto rc = connect(conn.Handle(), addr_handle, addr_len);\n    if (rc == 0) {\n      return conn.NonBlocking(non_blocking);\n    }\n\n    auto errcode = system::LastError();\n    if (!system::ErrorWouldBlock(errcode)) {\n      log_failure(Fail(\"connect failed.\", std::error_code{errcode, std::system_category()}));\n      continue;\n    }\n\n    rabit::utils::PollHelper poll;\n    poll.WatchWrite(conn);\n    auto result = poll.Poll(timeout);\n    if (!result.OK()) {\n      // poll would fail if there's a socket error, we log the root cause instead of the\n      // poll failure.\n      log_failure(std::move(result) + conn.GetSockError());\n      continue;\n    }\n    if (!poll.CheckWrite(conn)) {\n      log_failure(Fail(\"poll failed.\", std::error_code{errcode, std::system_category()}));\n      continue;\n    }\n    result = conn.GetSockError();\n    if (!result.OK()) {\n      log_failure(std::move(result));\n      continue;\n    }\n\n    return conn.NonBlocking(non_blocking);\n  }\n\n  std::stringstream ss;\n  ss << \"Failed to connect to \" << host << \":\" << port;\n  auto close_rc = conn.Close();\n  return Fail(ss.str(), std::move(close_rc) + std::move(last_error));\n}\n\n[[nodiscard]] Result GetHostName(std::string *p_out) {\n  std::array<char, HOST_NAME_MAX> buf;\n  if (gethostname(&buf[0], HOST_NAME_MAX) != 0) {\n    return system::FailWithCode(\"Failed to get host name.\");\n  }\n  *p_out = buf.data();\n  return Success();\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "src/collective/topo.h",
    "content": "/**\n * Copyright 2023-2026, XGBoost Contributors\n */\n#pragma once\n\n#include <cstdint>  // for int32_t\n#include <set>      // for set\n#include <vector>   // for vector\n\n#include \"../common/bitfield.h\"  // for TrailingZeroBits\n\nnamespace xgboost::collective {\n\n// Indexing into the ring\ninline std::int32_t BootstrapNext(std::int32_t r, std::int32_t world) {\n  auto nrank = (r + world + 1) % world;\n  return nrank;\n}\n\ninline std::int32_t BootstrapPrev(std::int32_t r, std::int32_t world) {\n  auto nrank = (r + world - 1) % world;\n  return nrank;\n}\n\n/**\n * @brief Helpers for the binomial tree rooted at rank 0.\n *\n * References\n * - https://people.mpi-inf.mpg.de/~mehlhorn/ftp/NewToolbox/collective.pdf\n * - https://en.wikipedia.org/wiki/Broadcast_(parallel_pattern)\n */\nnamespace binomial_tree {\ninline std::int32_t ParentLevel(std::int32_t rank) {\n  CHECK_GT(rank, 0);\n  return static_cast<std::int32_t>(TrailingZeroBits(static_cast<std::uint32_t>(rank)));\n}\n\ninline std::int32_t Parent(std::int32_t rank) {\n  return rank - (std::int32_t{1} << ParentLevel(rank));\n}\n\ninline std::int32_t Child(std::int32_t rank, std::int32_t level) {\n  return rank + (std::int32_t{1} << level);\n}\n\ninline bool HasChild(std::int32_t rank, std::int32_t level, std::int32_t world) {\n  return rank % (std::int32_t{1} << (level + 1)) == 0 && Child(rank, level) < world;\n}\n\ninline std::int32_t Depth(std::int32_t world) {\n  if (world <= 1) return -1;\n  std::int32_t depth = 0;\n  while ((std::int32_t{1} << (depth + 1)) < world) {\n    ++depth;\n  }\n  return depth;\n}\n}  // namespace binomial_tree\n\n/**\n * @brief Compute the sparse peer set for a given rank: ring neighbors union binomial tree\n *        neighbors (rooted at rank 0).\n */\ninline std::vector<std::int32_t> SparsePeers(std::int32_t rank, std::int32_t world) {\n  if (world <= 1) {\n    return {};\n  }\n  std::set<std::int32_t> peers;\n\n  peers.insert(BootstrapNext(rank, world));\n  peers.insert(BootstrapPrev(rank, world));\n\n  // Connect tree parents and children\n  if (rank > 0) {\n    peers.insert(binomial_tree::Parent(rank));\n  }\n\n  for (std::int32_t level = 0; (std::int32_t{1} << level) < world; ++level) {\n    if (binomial_tree::HasChild(rank, level, world)) {\n      peers.insert(binomial_tree::Child(rank, level));\n    }\n  }\n\n  peers.erase(rank);\n  return {peers.begin(), peers.end()};\n}\n\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "src/collective/tracker.cc",
    "content": "/**\n * Copyright 2023-2026, XGBoost Contributors\n */\n\n#if defined(__unix__) || defined(__APPLE__)\n#include <netdb.h>       // gethostbyname\n#include <sys/socket.h>  // socket, AF_INET6, AF_INET, connect, getsockname\n#endif                   // defined(__unix__) || defined(__APPLE__)\n\n#if defined(_WIN32)\n// Guard the include\n#include <xgboost/windefs.h>\n// Socket API\n#include <winsock2.h>\n#include <ws2tcpip.h>\n#endif  // defined(_WIN32)\n\n#include <algorithm>  // for sort\n#include <chrono>     // for seconds, ms\n#include <cstdint>    // for int32_t\n#include <memory>     // for unique_ptr\n#include <string>     // for string\n#include <utility>    // for move, forward\n\n#include \"../common/json_utils.h\"\n#include \"../common/threading_utils.h\"  // for NameThread\n#include \"../common/timer.h\"            // for Timer\n#include \"protocol.h\"                   // for kMagic, PeerInfo\n#include \"topo.h\"                       // for BootstrapNext\n#include \"tracker.h\"\n#include \"xgboost/collective/poll_utils.h\"  // for PollHelper\n#include \"xgboost/collective/result.h\"      // for Result, Fail, Success\n#include \"xgboost/collective/socket.h\"      // for GetHostName, FailWithCode, MakeSockAddress, ...\n#include \"xgboost/global_config.h\"          // for GlobalConfiguration\n#include \"xgboost/json.h\"                   // for Json\n\nnamespace xgboost::collective {\n\nTracker::Tracker(Json const& config)\n    : sortby_{static_cast<SortBy>(\n          OptionalArg<Integer const>(config, \"sortby\", static_cast<Integer::Int>(SortBy::kHost)))},\n      n_workers_{\n          static_cast<std::int32_t>(RequiredArg<Integer const>(config, \"n_workers\", __func__))},\n      port_{static_cast<std::int32_t>(OptionalArg<Integer const>(config, \"port\", Integer::Int{0}))},\n      timeout_{std::chrono::seconds{\n          OptionalArg<Integer const>(config, \"timeout\", static_cast<std::int64_t>(0))}} {\n  using std::chrono_literals::operator\"\"s;\n  // Some old configurations in JVM for the scala implementation (removed) use 0 to\n  // indicate blocking. We continue that convention here.\n  timeout_ = (timeout_ == 0s) ? -1s : timeout_;\n}\n\nResult Tracker::WaitUntilReady() const {\n  using namespace std::chrono_literals;  // NOLINT\n\n  // Busy waiting. The function is mostly for waiting for the OS to launch an async\n  // thread, which should be reasonably fast.\n  common::Timer timer;\n  timer.Start();\n  while (!this->Ready()) {\n    auto ela = timer.Duration().count();\n    if (HasTimeout(this->Timeout()) && ela > this->Timeout().count()) {\n      return Fail(\"Failed to start tracker, timeout:\" + std::to_string(this->Timeout().count()) +\n                  \" seconds.\");\n    }\n    std::this_thread::sleep_for(100ms);\n  }\n\n  return Success();\n}\n\nRabitTracker::WorkerProxy::WorkerProxy(std::int32_t world, TCPSocket sock, SockAddress addr)\n    : sock_{std::move(sock)} {\n  LOG(DEBUG) << \"[tracker]: Connected by the worker: \"\n             << (addr.IsV4() ? addr.V4().Addr() : addr.V6().Addr());\n  std::int32_t rank{0};\n  Json jcmd;\n  std::int32_t port{0};\n\n  rc_ = Success() << [&] {\n    return proto::Magic{}.Verify(&sock_);\n  } << [&] {\n    return proto::Connect{}.TrackerRecv(&sock_, &world_, &rank, &task_id_);\n  } << [&] {\n    std::string cmd;\n    auto rc = sock_.Recv(&cmd);\n    if (!rc.OK()) {\n      return rc;\n    }\n    jcmd = Json::Load(StringView{cmd});\n    cmd_ = static_cast<proto::CMD>(get<Integer const>(jcmd[\"cmd\"]));\n    return rc;\n  } << [&] {\n    if (cmd_ == proto::CMD::kStart) {\n      proto::Start start;\n      return start.TrackerHandle(jcmd, &world_, world, &port, &sock_, &eport_);\n    } else if (cmd_ == proto::CMD::kPrint) {\n      proto::Print print;\n      return print.TrackerHandle(jcmd, &msg_);\n    } else if (cmd_ == proto::CMD::kError) {\n      proto::ErrorCMD error;\n      return error.TrackerHandle(jcmd, &msg_, &code_);\n    }\n    return Success();\n  } << [&] {\n    if (addr.IsV4()) {\n      auto host = addr.V4().Addr();\n      info_ = proto::PeerInfo{host, port, rank};\n    } else {\n      auto host = addr.V6().Addr();\n      info_ = proto::PeerInfo{host, port, rank};\n    }\n    return Success();\n  };\n}\n\nRabitTracker::RabitTracker(Json const& config) : Tracker{config} {\n  auto rc = Success() << [&] {\n    host_.clear();\n    host_ = OptionalArg<String>(config, \"host\", std::string{});\n    if (host_.empty()) {\n      return collective::GetHostAddress(&host_);\n    }\n    return Success();\n  } << [&] {\n    auto addr = MakeSockAddress(xgboost::StringView{host_}, 0);\n    listener_ = TCPSocket::Create(addr.IsV4() ? SockDomain::kV4 : SockDomain::kV6);\n    return listener_.Bind(host_, &this->port_);\n  } << [&] {\n    CHECK_GT(this->n_workers_, 0);\n    return listener_.Listen(this->n_workers_);\n  };\n  SafeColl(rc);\n}\n\n// The thread init function here doesn't set any state in openmp and CUDA as the tracker\n// doesn't need them.\nstruct TrackerInitThread {\n  GlobalConfiguration config;\n  void operator()() const { *GlobalConfigThreadLocalStore::Get() = config; }\n};\n\nResult RabitTracker::Bootstrap(std::vector<WorkerProxy>* p_workers) {\n  auto& workers = *p_workers;\n\n  std::sort(workers.begin(), workers.end(), WorkerCmp{this->sortby_});\n\n  std::vector<std::thread> bootstrap_threads;\n  for (std::int32_t r = 0; r < n_workers_; ++r) {\n    auto& worker = workers[r];\n    auto next = BootstrapNext(r, n_workers_);\n    auto const& next_w = workers[next];\n    bootstrap_threads.emplace_back(\n        [next, &worker, &next_w, init = TrackerInitThread{*GlobalConfigThreadLocalStore::Get()}] {\n          init();\n          auto jnext = proto::PeerInfo{next_w.Host(), next_w.Port(), next}.ToJson();\n          std::string str;\n          Json::Dump(jnext, &str);\n          worker.Send(StringView{str});\n        });\n    std::string name = \"tkbs_t-\" + std::to_string(r);\n    common::NameThread(&bootstrap_threads.back(), name.c_str());\n  }\n\n  for (auto& t : bootstrap_threads) {\n    t.join();\n  }\n\n  for (auto const& w : workers) {\n    worker_error_handles_.emplace_back(w.Host(), w.ErrorPort());\n  }\n  return Success();\n}\n\n[[nodiscard]] std::future<Result> RabitTracker::Run() {\n  // a state machine to keep track of consistency.\n  struct State {\n    std::int32_t const n_workers;\n\n    std::int32_t n_shutdown{0};\n    bool during_restart{false};\n    bool running{false};\n    std::vector<WorkerProxy> pending;\n\n    explicit State(std::int32_t world) : n_workers{world} {}\n    State(State const& that) = delete;\n    State& operator=(State&& that) = delete;\n\n    // modifiers\n    void Start(WorkerProxy&& worker) {\n      CHECK_LT(pending.size(), n_workers);\n      CHECK_LE(n_shutdown, n_workers);\n      CHECK(!running);\n\n      pending.emplace_back(std::forward<WorkerProxy>(worker));\n\n      CHECK_LE(pending.size(), n_workers);\n    }\n    void Shutdown() {\n      CHECK_GE(n_shutdown, 0);\n      CHECK_LT(n_shutdown, n_workers);\n\n      running = false;\n      ++n_shutdown;\n\n      CHECK_LE(n_shutdown, n_workers);\n    }\n    void Error() {\n      CHECK_LE(pending.size(), n_workers);\n      CHECK_LE(n_shutdown, n_workers);\n\n      running = false;\n      during_restart = true;\n    }\n    void Bootstrap() {\n      CHECK_EQ(pending.size(), n_workers);\n      CHECK_LE(n_shutdown, n_workers);\n\n      running = true;\n\n      // A reset.\n      n_shutdown = 0;\n      during_restart = false;\n      pending.clear();\n    }\n\n    // observers\n    [[nodiscard]] bool Ready() const {\n      CHECK_LE(pending.size(), n_workers);\n      return static_cast<std::int32_t>(pending.size()) == n_workers;\n    }\n    [[nodiscard]] bool ShouldContinue() const {\n      CHECK_LE(pending.size(), n_workers);\n      CHECK_LE(n_shutdown, n_workers);\n      // - Without error, we should shutdown after all workers are offline.\n      // - With error, all workers are offline, and we have during_restart as true.\n      return n_shutdown != n_workers || during_restart;\n    }\n  };\n\n  auto handle_error = [&](WorkerProxy const& worker) {\n    auto msg = worker.Msg();\n    auto code = worker.Code();\n    LOG(WARNING) << \"[tracker]: Received error from [\" << worker.Host() << \":\" << worker.Rank()\n                 << \"]: \" << msg << \" code:\" << code;\n    auto host = worker.Host();\n    // We signal all workers for the error, if they haven't aborted already.\n    for (auto& w : worker_error_handles_) {\n      if (w.first == host) {\n        continue;\n      }\n      TCPSocket out;\n      // Connecting to the error port as a signal for exit.\n      //\n      // retry is set to 1, just let the worker timeout or error. Otherwise the\n      // tracker and the worker might be waiting for each other.\n      auto rc = Success() << [&] {\n        return Connect(w.first, w.second, 1, timeout_, &out);\n      } << [&] {\n        return proto::Error{}.SignalError(&out);\n      };\n      if (!rc.OK()) {\n        return Fail(\"Failed to inform worker:\" + w.first + \" for error.\", std::move(rc));\n      }\n    }\n    return Success();\n  };\n\n  return std::async(\n      std::launch::async,\n      [this, handle_error, init = TrackerInitThread{*GlobalConfigThreadLocalStore::Get()}] {\n        init();\n        State state{this->n_workers_};\n\n        auto select_accept = [&](TCPSocket* sock, auto* addr) {\n          // accept with poll so that we can enable timeout and interruption.\n          rabit::utils::PollHelper poll;\n          auto rc = Success() << [&] {\n            std::lock_guard lock{listener_mu_};\n            return listener_.NonBlocking(true);\n          } << [&] {\n            {\n              std::lock_guard lock{listener_mu_};\n              poll.WatchRead(listener_);\n            }\n            if (state.running) {\n              // Don't timeout if the communicator group is up and running.\n              return poll.Poll(std::chrono::seconds{-1});\n            } else {\n              // Have timeout for workers to bootstrap.\n              return poll.Poll(timeout_);\n            }\n          } << [&] {\n            // this->Stop() closes the socket with a lock. Therefore, when the accept returns\n            // due to shutdown, the state is still valid (closed).\n            return listener_.Accept(sock, addr);\n          };\n          return rc;\n        };\n\n        while (state.ShouldContinue()) {\n          TCPSocket sock;\n          SockAddress addr;\n          this->ready_ = true;\n          auto rc = select_accept(&sock, &addr);\n          if (!rc.OK()) {\n            return Fail(\"Failed to accept connection.\", this->Stop() + std::move(rc));\n          }\n\n          auto worker = WorkerProxy{n_workers_, std::move(sock), std::move(addr)};\n          if (!worker.Status().OK()) {\n            LOG(WARNING) << \"Failed to initialize worker proxy.\" << worker.Status().Report();\n            continue;\n          }\n          switch (worker.Command()) {\n            case proto::CMD::kStart: {\n              if (state.running) {\n                // Something went wrong with one of the workers. It got disconnected without\n                // notice.\n                state.Error();\n                rc = handle_error(worker);\n                if (!rc.OK()) {\n                  return Fail(\"Failed to handle abort.\", this->Stop() + std::move(rc));\n                }\n              }\n\n              state.Start(std::move(worker));\n              if (state.Ready()) {\n                rc = this->Bootstrap(&state.pending);\n                state.Bootstrap();\n              }\n              if (!rc.OK()) {\n                return this->Stop() + std::move(rc);\n              }\n              continue;\n            }\n            case proto::CMD::kShutdown: {\n              if (state.during_restart) {\n                // The worker can still send shutdown after call to `std::exit`.\n                continue;\n              }\n              state.Shutdown();\n              continue;\n            }\n            case proto::CMD::kError: {\n              if (state.during_restart) {\n                // Ignore further errors.\n                continue;\n              }\n              state.Error();\n              rc = handle_error(worker);\n              continue;\n            }\n            case proto::CMD::kPrint: {\n              LOG(CONSOLE) << worker.Msg();\n              continue;\n            }\n            case proto::CMD::kInvalid:\n            default: {\n              return Fail(\"Invalid command received.\", this->Stop());\n            }\n          }\n        }\n        return this->Stop();\n      });\n}\n\n[[nodiscard]] Json RabitTracker::WorkerArgs() const {\n  auto rc = this->WaitUntilReady();\n  SafeColl(rc);\n\n  Json args{Object{}};\n  args[\"dmlc_tracker_uri\"] = String{host_};\n  args[\"dmlc_tracker_port\"] = this->Port();\n  return args;\n}\n\n[[nodiscard]] Result RabitTracker::Stop() {\n  if (!this->Ready()) {\n    return Success();\n  }\n\n  ready_ = false;\n  std::lock_guard lock{listener_mu_};\n  if (this->listener_.IsClosed()) {\n    return Success();\n  }\n\n  return Success() << [&] {\n    // This should have the effect of stopping the `accept` call.\n    return this->listener_.Shutdown();\n  } << [&] {\n    return listener_.Close();\n  };\n}\n\n[[nodiscard]] Result GetHostAddress(std::string* out) {\n  auto rc = GetHostName(out);\n  if (!rc.OK()) {\n    return rc;\n  }\n\n  addrinfo hints;\n  addrinfo* servinfo;\n\n  std::memset(&hints, 0, sizeof(hints));\n  hints.ai_family = AF_UNSPEC;\n  hints.ai_socktype = SOCK_STREAM;\n  hints.ai_flags = AI_PASSIVE;\n\n  std::int32_t errc{0};\n  std::unique_ptr<addrinfo*, std::function<void(addrinfo**)>> guard{\n      &servinfo, [](addrinfo** ptr) { freeaddrinfo(*ptr); }};\n  if ((errc = getaddrinfo(nullptr, \"0\", &hints, &servinfo)) != 0) {\n    return Fail(\"Failed to get address info:\" + std::string{gai_strerror(errc)});\n  }\n\n  // https://beej.us/guide/bgnet/html/#getaddrinfoprepare-to-launch\n  std::vector<SockAddress> addresses;\n  for (addrinfo* p = servinfo; p != nullptr; p = p->ai_next) {\n    // Get the pointer to the address itself, different fields in IPv4 and IPv6:\n    if (p->ai_family == AF_INET) {  // IPv4\n      struct sockaddr_in* ipv4 = reinterpret_cast<sockaddr_in*>(p->ai_addr);\n      addresses.emplace_back(SockAddrV4{*ipv4});\n      auto ip = addresses.back().V4().Addr();\n      // Priortize V4.\n      // Return if this is a public IP address. Not accurate, we have other reserved IPs\n      if (ip.size() > 4 && ip.substr(0, 4) != \"127.\" && ip != SockAddrV4::InaddrAny().Addr()) {\n        *out = ip;\n        return Success();\n      }\n    } else {\n      struct sockaddr_in6* ipv6 = reinterpret_cast<sockaddr_in6*>(p->ai_addr);\n      addresses.emplace_back(SockAddrV6{*ipv6});\n    }\n  }\n  // If no v4 address is found, we try v6\n  for (auto const& addr : addresses) {\n    if (addr.IsV6()) {\n      auto ip = addr.V6().Addr();\n      if (ip != SockAddrV6::InaddrAny().Addr() && ip != SockAddrV6::Loopback().Addr()) {\n        *out = ip;\n        return Success();\n      }\n    }\n  }\n\n  // Create an UDP socket to prob the public IP address, it's fine even if it's\n  // unreachable.\n  auto sock = socket(AF_INET, SOCK_DGRAM, 0);\n  if (sock == INVALID_SOCKET) {\n    return Fail(\"Failed to create socket.\");\n  }\n\n  auto paddr = MakeSockAddress(StringView{\"10.255.255.255\"}, 1);\n  sockaddr const* addr_handle = reinterpret_cast<const sockaddr*>(&paddr.V4().Handle());\n  socklen_t addr_len{sizeof(paddr.V4().Handle())};\n  auto err = connect(sock, addr_handle, addr_len);\n  if (err != 0) {\n    return system::FailWithCode(\"Failed to find IP address.\");\n  }\n\n  // get the IP address from socket desrciptor\n  struct sockaddr_in addr;\n  socklen_t len = sizeof(addr);\n  if (getsockname(sock, reinterpret_cast<struct sockaddr*>(&addr), &len) == -1) {\n    return Fail(\"Failed to get sock name.\");\n  }\n  std::string ip = inet_ntoa(addr.sin_addr);\n\n  err = system::CloseSocket(sock);\n  if (err != 0) {\n    return system::FailWithCode(\"Failed to close socket.\");\n  }\n\n  *out = ip;\n  return Success();\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "src/collective/tracker.h",
    "content": "/**\n * Copyright 2023-2024, XGBoost Contributors\n */\n#pragma once\n#include <chrono>   // for seconds\n#include <cstdint>  // for int32_t\n#include <future>   // for future\n#include <string>   // for string\n#include <utility>  // for pair\n#include <vector>   // for vector\n\n#include \"protocol.h\"\n#include \"xgboost/collective/result.h\"  // for Result\n#include \"xgboost/collective/socket.h\"  // for TCPSocket\n#include \"xgboost/json.h\"               // for Json\n\nnamespace xgboost::collective {\ninline bool HasTimeout(std::chrono::seconds timeout) { return timeout.count() > 0; }\n/**\n *\n * @brief Implementation of RABIT tracker.\n *\n * * What is a tracker\n *\n *   The implementation of collective follows what RABIT did in the past. It requires a\n *   tracker to coordinate initialization and error recovery of workers. While the\n *   original implementation attempted to attain error resislient inside the collective\n *   module, which turned out be too challenging due to large amount of external\n *   states. The new implementation here differs from RABIT in the way that neither state\n *   recovery nor resislient is handled inside the collective, it merely provides the\n *   mechanism to signal error to other workers through the use of a centralized tracker.\n *\n *   There are three major functionalities provided the a tracker, namely:\n *   - Initialization. Share the node addresses among all workers.\n *   - Logging.\n *   - Signal error. If an exception is thrown in one (or many) of the workers, it can\n *     signal an error to the tracker and the tracker will notify other workers.\n */\nclass Tracker {\n public:\n  enum class SortBy : std::int8_t {\n    kHost = 0,\n    kTask = 1,\n  };\n\n protected:\n  // How to sort the workers, either by host name or by task ID. When using a multi-GPU\n  // setting, multiple workers can occupy the same host, in which case one should sort\n  // workers by task. Due to compatibility reason, the task ID is not always available, so\n  // we use host as the default.\n  SortBy sortby_;\n\n protected:\n  std::int32_t n_workers_{0};\n  std::int32_t port_{-1};\n  std::chrono::seconds timeout_{-1};\n  std::atomic<bool> ready_{false};\n\n public:\n  explicit Tracker(Json const& config);\n  virtual ~Tracker() = default;\n\n  [[nodiscard]] Result WaitUntilReady() const;\n\n  [[nodiscard]] virtual std::future<Result> Run() = 0;\n  [[nodiscard]] virtual Json WorkerArgs() const = 0;\n  [[nodiscard]] std::chrono::seconds Timeout() const { return timeout_; }\n  [[nodiscard]] virtual std::int32_t Port() const { return port_; }\n  /**\n   * @brief Flag to indicate whether the server is running.\n   */\n  [[nodiscard]] bool Ready() const { return ready_; }\n  /**\n   * @brief Shutdown the tracker, cannot be restarted again. Useful when the tracker hangs while\n   *        calling accept.\n   */\n  virtual Result Stop() { return Success(); }\n};\n\nclass RabitTracker : public Tracker {\n  // a wrapper for connected worker socket.\n  class WorkerProxy {\n    TCPSocket sock_;\n    proto::PeerInfo info_;\n    std::int32_t eport_{0};\n    std::int32_t world_{-1};\n    std::string task_id_;\n\n    proto::CMD cmd_{proto::CMD::kInvalid};\n    std::string msg_;\n    std::int32_t code_{0};\n    Result rc_;\n\n   public:\n    explicit WorkerProxy(std::int32_t world, TCPSocket sock, SockAddress addr);\n    WorkerProxy(WorkerProxy const& that) = delete;\n    WorkerProxy(WorkerProxy&& that) = default;\n    WorkerProxy& operator=(WorkerProxy const&) = delete;\n    WorkerProxy& operator=(WorkerProxy&&) = default;\n\n    [[nodiscard]] auto Host() const { return info_.host; }\n    [[nodiscard]] auto TaskID() const { return task_id_; }\n    [[nodiscard]] auto Port() const { return info_.port; }\n    [[nodiscard]] auto Rank() const { return info_.rank; }\n    [[nodiscard]] auto ErrorPort() const { return eport_; }\n    [[nodiscard]] auto Command() const { return cmd_; }\n    [[nodiscard]] auto Msg() const { return msg_; }\n    [[nodiscard]] auto Code() const { return code_; }\n\n    [[nodiscard]] Result const& Status() const { return rc_; }\n    [[nodiscard]] Result& Status() { return rc_; }\n\n    void Send(StringView value) { this->sock_.Send(value); }\n  };\n  // Provide an ordering for workers, this helps us get deterministic topology.\n  struct WorkerCmp {\n    SortBy sortby;\n    explicit WorkerCmp(SortBy sortby) : sortby{sortby} {}\n\n    [[nodiscard]] bool operator()(WorkerProxy const& lhs, WorkerProxy const& rhs) {\n      auto const& lh = sortby == Tracker::SortBy::kHost ? lhs.Host() : lhs.TaskID();\n      auto const& rh = sortby == Tracker::SortBy::kHost ? rhs.Host() : rhs.TaskID();\n\n      if (lh != rh) {\n        return lh < rh;\n      }\n      return lhs.TaskID() < rhs.TaskID();\n    }\n  };\n\n private:\n  std::string host_;\n  // record for how to reach out to workers if error happens.\n  std::vector<std::pair<std::string, std::int32_t>> worker_error_handles_;\n  // listening socket for incoming workers.\n  TCPSocket listener_;\n  // mutex for protecting the listener, used to prevent race when it's listening while\n  // another thread tries to shut it down.\n  std::mutex listener_mu_;\n\n  Result Bootstrap(std::vector<WorkerProxy>* p_workers);\n\n public:\n  explicit RabitTracker(Json const& config);\n  ~RabitTracker() override = default;\n\n  std::future<Result> Run() override;\n  [[nodiscard]] Json WorkerArgs() const override;\n  // Stop the tracker without waiting. This is to prevent the tracker from hanging when\n  // one of the workers failes to start.\n  [[nodiscard]] Result Stop() override;\n};\n\n// Prob the public IP address of the host, need a better method.\n//\n// This is directly translated from the previous Python implementation, we should find a\n// more riguous approach, can use some expertise in network programming.\n[[nodiscard]] Result GetHostAddress(std::string* out);\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "src/common/algorithm.cuh",
    "content": "/**\n * Copyright 2022-2025, XGBoost Contributors\n */\n#ifndef XGBOOST_COMMON_ALGORITHM_CUH_\n#define XGBOOST_COMMON_ALGORITHM_CUH_\n\n#include <thrust/copy.h>                        // for copy\n#include <thrust/iterator/counting_iterator.h>  // for make_counting_iterator\n#include <thrust/sort.h>                        // for stable_sort_by_key\n\n#include <cstddef>                                      // size_t\n#include <cstdint>                                      // int32_t\n#include <cub/device/device_run_length_encode.cuh>      // for DeviceRunLengthEncode\n#include <cub/device/dispatch/dispatch_radix_sort.cuh>  // for DispatchSegmentedRadixSort\n#include <cub/util_type.cuh>                            // for NullType, DoubleBuffer\n#include <cuda/std/tuple>                               // for tuple\n#include <functional>                                   // for plus, logical_and\n#include <iterator>                                     // for distance\n#include <limits>                                       // for numeric_limits\n#include <type_traits>                                  // for conditional_t,remove_const_t\n\n#include \"common.h\"            // safe_cuda\n#include \"cuda_context.cuh\"    // CUDAContext\n#include \"cuda_stream.h\"       // for StreamRef\n#include \"device_helpers.cuh\"  // TemporaryArray,SegmentId,LaunchN,Iota\n#include \"device_vector.cuh\"   // for device_vector\n#include \"xgboost/base.h\"      // XGBOOST_DEVICE\n#include \"xgboost/context.h\"   // Context\n#include \"xgboost/logging.h\"   // CHECK\n#include \"xgboost/span.h\"      // Span,byte\n\nnamespace xgboost::common {\nnamespace detail {\n\n#if CUB_VERSION >= 300000\nconstexpr auto kCubSortOrderAscending = cub::SortOrder::Ascending;\nconstexpr auto kCubSortOrderDescending = cub::SortOrder::Descending;\n#else\nconstexpr bool kCubSortOrderAscending = false;\nconstexpr bool kCubSortOrderDescending = true;\n#endif\n\n// Wrapper around cub sort to define is_decending\ntemplate <bool IS_DESCENDING, typename KeyT, typename BeginOffsetIteratorT,\n          typename EndOffsetIteratorT>\nstatic void DeviceSegmentedRadixSortKeys(CUDAContext const *ctx, void *d_temp_storage,\n                                         std::size_t &temp_storage_bytes,  // NOLINT\n                                         const KeyT *d_keys_in, KeyT *d_keys_out, int num_items,\n                                         int num_segments, BeginOffsetIteratorT d_begin_offsets,\n                                         EndOffsetIteratorT d_end_offsets, int begin_bit = 0,\n                                         int end_bit = sizeof(KeyT) * 8,\n                                         bool debug_synchronous = false) {\n  using OffsetT = int;\n\n  // Null value type\n  cub::DoubleBuffer<KeyT> d_keys(const_cast<KeyT *>(d_keys_in), d_keys_out);\n  cub::DoubleBuffer<cub::NullType> d_values;\n\n  constexpr auto kCubSortOrder = IS_DESCENDING ? kCubSortOrderDescending : kCubSortOrderAscending;\n  dh::safe_cuda((cub::DispatchSegmentedRadixSort<\n                 kCubSortOrder, KeyT, cub::NullType, BeginOffsetIteratorT, EndOffsetIteratorT,\n                 OffsetT>::Dispatch(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items,\n                                    num_segments, d_begin_offsets, d_end_offsets, begin_bit,\n                                    end_bit, false, ctx->Stream(), debug_synchronous)));\n}\n\n// Wrapper around cub sort for easier `descending` sort.\ntemplate <bool descending, typename KeyT, typename ValueT, typename BeginOffsetIteratorT,\n          typename EndOffsetIteratorT>\nvoid DeviceSegmentedRadixSortPair(void *d_temp_storage,\n                                  std::size_t &temp_storage_bytes,  // NOLINT\n                                  const KeyT *d_keys_in, KeyT *d_keys_out,\n                                  const ValueT *d_values_in, ValueT *d_values_out,\n                                  std::size_t num_items, std::size_t num_segments,\n                                  BeginOffsetIteratorT d_begin_offsets,\n                                  EndOffsetIteratorT d_end_offsets, curt::StreamRef stream,\n                                  int begin_bit = 0, int end_bit = sizeof(KeyT) * 8) {\n  cub::DoubleBuffer<KeyT> d_keys(const_cast<KeyT *>(d_keys_in), d_keys_out);\n  cub::DoubleBuffer<ValueT> d_values(const_cast<ValueT *>(d_values_in), d_values_out);\n  // In old version of cub, num_items in dispatch is also int32_t, no way to change.\n  using OffsetT = std::conditional_t<dh::BuildWithCUDACub() && dh::HasThrustMinorVer<13>(),\n                                     std::size_t, std::int32_t>;\n  CHECK_LE(num_items, std::numeric_limits<OffsetT>::max());\n  // For Thrust >= 1.12 or CUDA >= 11.4, we require system cub installation\n\n  constexpr auto kCubSortOrder = descending ? kCubSortOrderDescending : kCubSortOrderAscending;\n#if THRUST_MAJOR_VERSION >= 2\n  dh::safe_cuda((cub::DispatchSegmentedRadixSort<\n                 kCubSortOrder, KeyT, ValueT, BeginOffsetIteratorT, EndOffsetIteratorT,\n                 OffsetT>::Dispatch(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items,\n                                    num_segments, d_begin_offsets, d_end_offsets, begin_bit,\n                                    end_bit, false, stream)));\n#elif (THRUST_MAJOR_VERSION == 1 && THRUST_MINOR_VERSION >= 13)\n  dh::safe_cuda((cub::DispatchSegmentedRadixSort<\n                 kCubSortOrder, KeyT, ValueT, BeginOffsetIteratorT, EndOffsetIteratorT,\n                 OffsetT>::Dispatch(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items,\n                                    num_segments, d_begin_offsets, d_end_offsets, begin_bit,\n                                    end_bit, false, stream, false)));\n#else\n  dh::safe_cuda(\n      (cub::DispatchSegmentedRadixSort<kCubSortOrder, KeyT, ValueT, BeginOffsetIteratorT,\n                                       OffsetT>::Dispatch(d_temp_storage, temp_storage_bytes,\n                                                          d_keys, d_values, num_items, num_segments,\n                                                          d_begin_offsets, d_end_offsets, begin_bit,\n                                                          end_bit, false, stream, false)));\n#endif\n}\n}  // namespace detail\n\ntemplate <typename U, typename V>\nvoid SegmentedSequence(Context const *ctx, Span<U> d_offset_ptr, Span<V> out_sequence) {\n  dh::LaunchN(out_sequence.size(), ctx->CUDACtx()->Stream(),\n              [out_sequence, d_offset_ptr] __device__(std::size_t idx) {\n                auto group = dh::SegmentId(d_offset_ptr, idx);\n                out_sequence[idx] = idx - d_offset_ptr[group];\n              });\n}\n\ntemplate <bool descending, typename U, typename V>\ninline void SegmentedSortKeys(Context const *ctx, Span<V const> group_ptr,\n                              Span<U> out_sorted_values) {\n  CHECK_GE(group_ptr.size(), 1ul);\n  std::size_t n_groups = group_ptr.size() - 1;\n  std::size_t bytes = 0;\n  auto const *cuctx = ctx->CUDACtx();\n  CHECK(cuctx);\n  detail::DeviceSegmentedRadixSortKeys<descending>(\n      cuctx, nullptr, bytes, out_sorted_values.data(), out_sorted_values.data(),\n      out_sorted_values.size(), n_groups, group_ptr.data(), group_ptr.data() + 1);\n  dh::TemporaryArray<byte> temp_storage(bytes);\n  detail::DeviceSegmentedRadixSortKeys<descending>(\n      cuctx, temp_storage.data().get(), bytes, out_sorted_values.data(), out_sorted_values.data(),\n      out_sorted_values.size(), n_groups, group_ptr.data(), group_ptr.data() + 1);\n}\n\n/**\n * \\brief Create sorted index for data with multiple segments.\n *\n * \\tparam accending sorted in non-decreasing order.\n * \\tparam per_seg_index Index starts from 0 for each segment if true, otherwise the\n *                       the index span the whole data.\n */\ntemplate <bool accending, bool per_seg_index, typename U, typename V, typename IdxT>\nvoid SegmentedArgSort(Context const *ctx, Span<U> values, Span<V> group_ptr,\n                      Span<IdxT> sorted_idx) {\n  auto cuctx = ctx->CUDACtx();\n  CHECK_GE(group_ptr.size(), 1ul);\n  std::size_t n_groups = group_ptr.size() - 1;\n  std::size_t bytes = 0;\n  if (per_seg_index) {\n    SegmentedSequence(ctx, group_ptr, sorted_idx);\n  } else {\n    dh::Iota(sorted_idx, cuctx->Stream());\n  }\n  dh::TemporaryArray<std::remove_const_t<U>> values_out(values.size());\n  dh::TemporaryArray<std::remove_const_t<IdxT>> sorted_idx_out(sorted_idx.size());\n\n  detail::DeviceSegmentedRadixSortPair<!accending>(\n      nullptr, bytes, values.data(), values_out.data().get(), sorted_idx.data(),\n      sorted_idx_out.data().get(), sorted_idx.size(), n_groups, group_ptr.data(),\n      group_ptr.data() + 1, cuctx->Stream());\n  dh::TemporaryArray<byte> temp_storage(bytes);\n  detail::DeviceSegmentedRadixSortPair<!accending>(\n      temp_storage.data().get(), bytes, values.data(), values_out.data().get(), sorted_idx.data(),\n      sorted_idx_out.data().get(), sorted_idx.size(), n_groups, group_ptr.data(),\n      group_ptr.data() + 1, cuctx->Stream());\n\n  dh::safe_cuda(cudaMemcpyAsync(sorted_idx.data(), sorted_idx_out.data().get(),\n                                sorted_idx.size_bytes(), cudaMemcpyDeviceToDevice,\n                                cuctx->Stream()));\n}\n\n/**\n * \\brief Different from the radix-sort-based argsort, this one can handle cases where\n *        segment doesn't start from 0, but as a result it uses comparison sort.\n */\ntemplate <typename SegIt, typename ValIt>\nvoid SegmentedArgMergeSort(Context const *ctx, SegIt seg_begin, SegIt seg_end, ValIt val_begin,\n                           ValIt val_end, dh::device_vector<std::size_t> *p_sorted_idx) {\n  auto cuctx = ctx->CUDACtx();\n  using Tup = cuda::std::tuple<std::int32_t, float>;\n  auto &sorted_idx = *p_sorted_idx;\n  std::size_t n = std::distance(val_begin, val_end);\n  sorted_idx.resize(n);\n  dh::Iota(dh::ToSpan(sorted_idx), cuctx->Stream());\n  dh::device_vector<Tup> keys(sorted_idx.size());\n  auto key_it = dh::MakeIndexTransformIter([=] XGBOOST_DEVICE(std::size_t i) -> Tup {\n    std::int32_t seg_idx;\n    if (i < *seg_begin) {\n      seg_idx = -1;\n    } else {\n      seg_idx = dh::SegmentId(seg_begin, seg_end, i);\n    }\n    auto residue = val_begin[i];\n    return cuda::std::make_tuple(seg_idx, residue);\n  });\n  thrust::copy(ctx->CUDACtx()->CTP(), key_it, key_it + keys.size(), keys.begin());\n  thrust::stable_sort_by_key(cuctx->TP(), keys.begin(), keys.end(), sorted_idx.begin(),\n                             [=] XGBOOST_DEVICE(Tup const &l, Tup const &r) {\n                               if (cuda::std::get<0>(l) != cuda::std::get<0>(r)) {\n                                 // segment index\n                                 return cuda::std::get<0>(l) < cuda::std::get<0>(r);\n                               }\n                               return cuda::std::get<1>(l) < cuda::std::get<1>(r);  // residue\n                             });\n}\n\ntemplate <bool accending, typename IdxT, typename U>\nvoid ArgSort(Context const *ctx, Span<U> keys, Span<IdxT> sorted_idx) {\n  std::size_t bytes = 0;\n  auto cuctx = ctx->CUDACtx();\n  dh::Iota(sorted_idx, cuctx->Stream());\n\n  using KeyT = typename decltype(keys)::value_type;\n  using ValueT = std::remove_const_t<IdxT>;\n\n  dh::TemporaryArray<KeyT> out(keys.size());\n  cub::DoubleBuffer<KeyT> d_keys(const_cast<KeyT *>(keys.data()), out.data().get());\n  dh::TemporaryArray<IdxT> sorted_idx_out(sorted_idx.size());\n  cub::DoubleBuffer<ValueT> d_values(const_cast<ValueT *>(sorted_idx.data()),\n                                     sorted_idx_out.data().get());\n\n  // track https://github.com/NVIDIA/cub/pull/340 for 64bit length support\n  using OffsetT = std::conditional_t<!dh::BuildWithCUDACub(), std::ptrdiff_t, int32_t>;\n  CHECK_LE(sorted_idx.size(), std::numeric_limits<OffsetT>::max());\n\n  if (accending) {\n    void *d_temp_storage = nullptr;\n#if THRUST_MAJOR_VERSION >= 2\n    dh::safe_cuda(\n        (cub::DispatchRadixSort<detail::kCubSortOrderAscending, KeyT, ValueT, OffsetT>::Dispatch(\n            d_temp_storage, bytes, d_keys, d_values, sorted_idx.size(), 0, sizeof(KeyT) * 8, false,\n            cuctx->Stream())));\n#else\n    dh::safe_cuda(\n        (cub::DispatchRadixSort<detail::kCubSortOrderAscending, KeyT, ValueT, OffsetT>::Dispatch(\n            d_temp_storage, bytes, d_keys, d_values, sorted_idx.size(), 0, sizeof(KeyT) * 8, false,\n            nullptr, false)));\n#endif\n    dh::TemporaryArray<char> storage(bytes);\n    d_temp_storage = storage.data().get();\n#if THRUST_MAJOR_VERSION >= 2\n    dh::safe_cuda(\n        (cub::DispatchRadixSort<detail::kCubSortOrderAscending, KeyT, ValueT, OffsetT>::Dispatch(\n            d_temp_storage, bytes, d_keys, d_values, sorted_idx.size(), 0, sizeof(KeyT) * 8, false,\n            cuctx->Stream())));\n#else\n    dh::safe_cuda(\n        (cub::DispatchRadixSort<detail::kCubSortOrderAscending, KeyT, ValueT, OffsetT>::Dispatch(\n            d_temp_storage, bytes, d_keys, d_values, sorted_idx.size(), 0, sizeof(KeyT) * 8, false,\n            nullptr, false)));\n#endif\n  } else {\n    void *d_temp_storage = nullptr;\n#if THRUST_MAJOR_VERSION >= 2\n    dh::safe_cuda(\n        (cub::DispatchRadixSort<detail::kCubSortOrderDescending, KeyT, ValueT, OffsetT>::Dispatch(\n            d_temp_storage, bytes, d_keys, d_values, sorted_idx.size(), 0, sizeof(KeyT) * 8, false,\n            cuctx->Stream())));\n#else\n    dh::safe_cuda(\n        (cub::DispatchRadixSort<detail::kCubSortOrderDescending, KeyT, ValueT, OffsetT>::Dispatch(\n            d_temp_storage, bytes, d_keys, d_values, sorted_idx.size(), 0, sizeof(KeyT) * 8, false,\n            nullptr, false)));\n#endif\n    dh::TemporaryArray<char> storage(bytes);\n    d_temp_storage = storage.data().get();\n#if THRUST_MAJOR_VERSION >= 2\n    dh::safe_cuda(\n        (cub::DispatchRadixSort<detail::kCubSortOrderDescending, KeyT, ValueT, OffsetT>::Dispatch(\n            d_temp_storage, bytes, d_keys, d_values, sorted_idx.size(), 0, sizeof(KeyT) * 8, false,\n            cuctx->Stream())));\n#else\n    dh::safe_cuda(\n        (cub::DispatchRadixSort<detail::kCubSortOrderDescending, KeyT, ValueT, OffsetT>::Dispatch(\n            d_temp_storage, bytes, d_keys, d_values, sorted_idx.size(), 0, sizeof(KeyT) * 8, false,\n            nullptr, false)));\n#endif\n  }\n\n  dh::safe_cuda(cudaMemcpyAsync(sorted_idx.data(), sorted_idx_out.data().get(),\n                                sorted_idx.size_bytes(), cudaMemcpyDeviceToDevice,\n                                cuctx->Stream()));\n}\n\ntemplate <typename InIt, typename OutIt, typename Predicate>\nvoid CopyIf(CUDAContext const *cuctx, InIt in_first, InIt in_second, OutIt out_first,\n            Predicate pred) {\n  // We loop over batches because thrust::copy_if can't deal with sizes > 2^31\n  // See thrust issue #1302, XGBoost #6822\n  size_t constexpr kMaxCopySize = std::numeric_limits<int>::max() / 2;\n  size_t length = std::distance(in_first, in_second);\n  for (size_t offset = 0; offset < length; offset += kMaxCopySize) {\n    auto begin_input = in_first + offset;\n    auto end_input = in_first + std::min(offset + kMaxCopySize, length);\n    out_first = thrust::copy_if(cuctx->CTP(), begin_input, end_input, out_first, pred);\n  }\n}\n\n// Go one level down into cub::DeviceScan API to set OffsetT as 64 bit So we don't crash\n// on n > 2^31.\ntemplate <typename InputIteratorT, typename OutputIteratorT, typename ScanOpT, typename OffsetT>\nvoid InclusiveScan(xgboost::Context const *ctx, InputIteratorT d_in, OutputIteratorT d_out,\n                   ScanOpT scan_op, OffsetT num_items) {\n#if CUB_VERSION >= 300000\n  static_assert(std::is_unsigned_v<OffsetT>, \"OffsetT must be unsigned\");\n  static_assert(sizeof(OffsetT) >= 4, \"OffsetT must be at least 4 bytes long\");\n#endif\n  auto cuctx = ctx->CUDACtx();\n  std::size_t bytes = 0;\n#if THRUST_MAJOR_VERSION >= 2\n  dh::safe_cuda((\n      cub::DispatchScan<InputIteratorT, OutputIteratorT, ScanOpT, cub::NullType, OffsetT>::Dispatch(\n          nullptr, bytes, d_in, d_out, scan_op, cub::NullType(), num_items, nullptr)));\n#else\n  safe_cuda((\n      cub::DispatchScan<InputIteratorT, OutputIteratorT, ScanOpT, cub::NullType, OffsetT>::Dispatch(\n          nullptr, bytes, d_in, d_out, scan_op, cub::NullType(), num_items, nullptr, false)));\n#endif\n  dh::TemporaryArray<char> storage(bytes);\n#if THRUST_MAJOR_VERSION >= 2\n  dh::safe_cuda((\n      cub::DispatchScan<InputIteratorT, OutputIteratorT, ScanOpT, cub::NullType, OffsetT>::Dispatch(\n          storage.data().get(), bytes, d_in, d_out, scan_op, cub::NullType(), num_items, nullptr)));\n#else\n  safe_cuda((\n      cub::DispatchScan<InputIteratorT, OutputIteratorT, ScanOpT, cub::NullType, OffsetT>::Dispatch(\n          storage.data().get(), bytes, d_in, d_out, scan_op, cub::NullType(), num_items, nullptr,\n          false)));\n#endif\n}\n\ntemplate <typename InputIteratorT, typename OutputIteratorT, typename OffsetT>\nvoid InclusiveSum(Context const *ctx, InputIteratorT d_in, OutputIteratorT d_out,\n                  OffsetT num_items) {\n#if CUB_VERSION >= 200800\n  InclusiveScan(ctx, d_in, d_out, std::plus{}, num_items);\n#else\n  InclusiveScan(ctx, d_in, d_out, cub::Sum{}, num_items);\n#endif\n}\n\ntemplate <typename... Args>\nvoid RunLengthEncode(curt::StreamRef stream, Args &&...args) {\n  std::size_t n_bytes = 0;\n  dh::safe_cuda(cub::DeviceRunLengthEncode::Encode(nullptr, n_bytes, args..., stream));\n  dh::CachingDeviceUVector<char> tmp(n_bytes);\n  dh::safe_cuda(cub::DeviceRunLengthEncode::Encode(tmp.data(), n_bytes, args..., stream));\n}\n\ntemplate <typename... Args>\nvoid SegmentedSum(curt::StreamRef stream, Args &&...args) {\n  std::size_t n_bytes = 0;\n  dh::safe_cuda(cub::DeviceSegmentedReduce::Sum(nullptr, n_bytes, args..., stream));\n  dh::CachingDeviceUVector<char> tmp(n_bytes);\n  dh::safe_cuda(cub::DeviceSegmentedReduce::Sum(tmp.data(), n_bytes, args..., stream));\n}\n\n/**\n * @brief Customized version of @ref thrust::all_of\n *\n * @ref thrust::all_of uses small intervals for early stop. But we often use this function\n * to perform checks on data and in most cases need to walk through the entire dataset\n * (like all data point is valid). This function uses @ref thrust::reduce to avoid\n * excessive kernel launches and synchronizations.\n */\ntemplate <typename Policy, typename InputIt, typename Chk>\n[[nodiscard]] std::enable_if_t<\n    std::is_same_v<bool,\n                   std::invoke_result_t<Chk, typename std::iterator_traits<InputIt>::value_type>>,\n    bool>\nAllOf(Policy policy, InputIt first, InputIt second, Chk &&check) {\n  auto n = std::distance(first, second);\n  auto it =\n      dh::MakeIndexTransformIter([=] XGBOOST_DEVICE(std::size_t i) { return check(first[i]); });\n  return dh::Reduce(policy, it, it + n, true, std::logical_and<>{});\n}\n}  // namespace xgboost::common\n#endif  // XGBOOST_COMMON_ALGORITHM_CUH_\n"
  },
  {
    "path": "src/common/algorithm.h",
    "content": "/**\n * Copyright 2022-2025, XGBoost Contributors\n */\n#ifndef XGBOOST_COMMON_ALGORITHM_H_\n#define XGBOOST_COMMON_ALGORITHM_H_\n#include <algorithm>          // upper_bound, stable_sort, sort, max\n#include <cstddef>            // size_t\n#include <functional>         // less\n#include <iterator>           // iterator_traits, distance\n#include <vector>             // vector\n\n#include \"numeric.h\"          // Iota\n#include \"xgboost/context.h\"  // Context\n\n// clang with libstdc++ works as well\n#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__sun) && !defined(sun) && \\\n    !defined(__APPLE__) && __has_include(<omp.h>) && __has_include(<parallel/algorithm>)\n#define GCC_HAS_PARALLEL 1\nconstexpr bool kHasParallelStableSort = true;\n#else\nconstexpr bool kHasParallelStableSort = false;\n#endif  // GLIC_VERSION\n\n#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)\n#define MSVC_HAS_PARALLEL 1\n#endif  // MSC\n\n#if defined(GCC_HAS_PARALLEL)\n#include <parallel/algorithm>\n#elif defined(MSVC_HAS_PARALLEL)\n#include <ppl.h>\n#endif  // GLIBC VERSION\n\nnamespace xgboost {\nnamespace common {\ntemplate <typename It, typename Idx>\nauto SegmentId(It first, It last, Idx idx) {\n  std::size_t segment_id = std::upper_bound(first, last, idx) - 1 - first;\n  return segment_id;\n}\n\ntemplate <typename Iter, typename Comp>\nvoid StableSort(Context const *ctx, Iter begin, Iter end, Comp &&comp) {\n  if (ctx->Threads() > 1) {\n#if defined(GCC_HAS_PARALLEL)\n    __gnu_parallel::stable_sort(begin, end, comp,\n                                __gnu_parallel::default_parallel_tag(ctx->Threads()));\n#else\n    // the only stable sort is radix sort for msvc ppl.\n    std::stable_sort(begin, end, comp);\n#endif  // GLIBC VERSION\n  } else {\n    std::stable_sort(begin, end, comp);\n  }\n}\n\ntemplate <typename Iter, typename Comp>\nvoid Sort(Context const *ctx, Iter begin, Iter end, Comp comp) {\n  if (ctx->Threads() > 1) {\n#if defined(GCC_HAS_PARALLEL)\n    __gnu_parallel::sort(begin, end, comp, __gnu_parallel::default_parallel_tag(ctx->Threads()));\n#elif defined(MSVC_HAS_PARALLEL)\n    auto n = std::distance(begin, end);\n    // use chunk size as hint to number of threads. No local policy/scheduler input with the\n    // concurrency module.\n    std::size_t chunk_size = n / ctx->Threads();\n    // 2048 is the default of msvc ppl as of v2022.\n    chunk_size = std::max(chunk_size, static_cast<std::size_t>(2048));\n    concurrency::parallel_sort(begin, end, comp, chunk_size);\n#else\n    std::sort(begin, end, comp);\n#endif  // GLIBC VERSION\n  } else {\n    std::sort(begin, end, comp);\n  }\n}\n\ntemplate <typename Idx, typename Iter, typename V = typename std::iterator_traits<Iter>::value_type,\n          typename Comp = std::less<V>>\nstd::vector<Idx> ArgSort(Context const *ctx, Iter begin, Iter end, Comp comp = std::less<V>{}) {\n  auto n = std::distance(begin, end);\n  std::vector<Idx> result(n);\n  Iota(ctx, result.begin(), result.end(), 0);\n  auto op = [&](Idx const &l, Idx const &r) { return comp(begin[l], begin[r]); };\n  StableSort(ctx, result.begin(), result.end(), op);\n  return result;\n}\n}  // namespace common\n}  // namespace xgboost\n\n#if defined(GCC_HAS_PARALLEL)\n#undef GCC_HAS_PARALLEL\n#endif  // defined(GCC_HAS_PARALLEL)\n\n#if defined(MSVC_HAS_PARALLEL)\n#undef MSVC_HAS_PARALLEL\n#endif  // defined(MSVC_HAS_PARALLEL)\n\n#endif  // XGBOOST_COMMON_ALGORITHM_H_\n"
  },
  {
    "path": "src/common/api_entry.h",
    "content": "/**\n * Copyright 2016-2023 by XGBoost contributors\n */\n#ifndef XGBOOST_COMMON_API_ENTRY_H_\n#define XGBOOST_COMMON_API_ENTRY_H_\n#include <string>               // std::string\n#include <vector>               // std::vector\n\n#include \"xgboost/base.h\"       // GradientPair,bst_ulong\n#include \"xgboost/predictor.h\"  // PredictionCacheEntry\n\nnamespace xgboost {\n/**\n * \\brief entry to to easily hold returning information\n */\nstruct XGBAPIThreadLocalEntry {\n  /*! \\brief result holder for returning string */\n  std::string ret_str;\n  /*! \\brief result holder for returning raw buffer */\n  std::vector<char> ret_char_vec;\n  /*! \\brief result holder for returning strings */\n  std::vector<std::string> ret_vec_str;\n  /*! \\brief result holder for returning string pointers */\n  std::vector<const char *> ret_vec_charp;\n  /*! \\brief returning float vector. */\n  std::vector<float> ret_vec_float;\n  /*! \\brief returning uint vector. */\n  std::vector<std::uint64_t> ret_vec_u64;\n  /*! \\brief temp variable of gradient pairs. */\n  std::vector<GradientPair> tmp_gpair;\n  /*! \\brief Temp variable for returning prediction result. */\n  PredictionCacheEntry prediction_entry;\n  /*! \\brief Temp variable for returning prediction shape. */\n  std::vector<bst_ulong> prediction_shape;\n};\n}  // namespace xgboost\n#endif  // XGBOOST_COMMON_API_ENTRY_H_\n"
  },
  {
    "path": "src/common/base64.h",
    "content": "/*!\n * Copyright 2014 by Contributors\n * \\file base64.h\n * \\brief data stream support to input and output from/to base64 stream\n * base64 is easier to store and pass as text format in mapreduce\n * \\author Tianqi Chen\n */\n#ifndef XGBOOST_COMMON_BASE64_H_\n#define XGBOOST_COMMON_BASE64_H_\n\n#include <xgboost/logging.h>\n#include <cctype>\n#include <cstdio>\n#include <string>\n#include \"./io.h\"\n\nnamespace xgboost {\nnamespace common {\n/*! \\brief buffer reader of the stream that allows you to get */\nclass StreamBufferReader {\n public:\n  explicit StreamBufferReader(size_t buffer_size)\n      :stream_(NULL),\n       read_len_(1), read_ptr_(1) {\n    buffer_.resize(buffer_size);\n  }\n  /*!\n   * \\brief set input stream\n   */\n  inline void set_stream(dmlc::Stream *stream) {\n    stream_ = stream;\n    read_len_ = read_ptr_ = 1;\n  }\n  /*!\n   * \\brief allows quick read using get char\n   */\n  inline char GetChar(void) {\n    while (true) {\n      if (read_ptr_ < read_len_) {\n        return buffer_[read_ptr_++];\n      } else {\n        read_len_ = stream_->Read(&buffer_[0], buffer_.length());\n        if (read_len_ == 0) return EOF;\n        read_ptr_ = 0;\n      }\n    }\n  }\n  /*! \\brief whether we are reaching the end of file */\n  inline bool AtEnd(void) const {\n    return read_len_ == 0;\n  }\n\n private:\n  /*! \\brief the underlying stream */\n  dmlc::Stream *stream_;\n  /*! \\brief buffer to hold data */\n  std::string buffer_;\n  /*! \\brief length of valid data in buffer */\n  size_t read_len_;\n  /*! \\brief pointer in the buffer */\n  size_t read_ptr_;\n};\n\n/*! \\brief namespace of base64 decoding and encoding table */\nnamespace base64 {\nconst char DecodeTable[] = {\n  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n  62,  // '+'\n  0, 0, 0,\n  63,  // '/'\n  52, 53, 54, 55, 56, 57, 58, 59, 60, 61,  // '0'-'9'\n  0, 0, 0, 0, 0, 0, 0,\n  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,\n  13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,  // 'A'-'Z'\n  0, 0, 0, 0, 0, 0,\n  26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,\n  39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,  // 'a'-'z'\n};\nstatic const char EncodeTable[] =\n    \"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/\";\n}  // namespace base64\n/*! \\brief the stream that reads from base64, note we take from file pointers */\nclass Base64InStream: public dmlc::Stream {\n public:\n  explicit Base64InStream(dmlc::Stream *fs) : reader_(256) {\n    reader_.set_stream(fs);\n    num_prev = 0; tmp_ch = 0;\n  }\n  /*!\n   * \\brief initialize the stream position to beginning of next base64 stream\n   * call this function before actually start read\n   */\n  inline void InitPosition(void) {\n    // get a character\n    do {\n      tmp_ch = reader_.GetChar();\n    } while (isspace(tmp_ch));\n  }\n  /*! \\brief whether current position is end of a base64 stream */\n  inline bool IsEOF(void) const {\n    return num_prev == 0 && (tmp_ch == EOF || isspace(tmp_ch));\n  }\n  virtual size_t Read(void *ptr, size_t size) {\n    using base64::DecodeTable;\n    if (size == 0) return 0;\n    // use tlen to record left size\n    size_t tlen = size;\n    unsigned char *cptr = static_cast<unsigned char*>(ptr);\n    // if anything left, load from previous buffered result\n    if (num_prev != 0) {\n      if (num_prev == 2) {\n        if (tlen >= 2) {\n          *cptr++ = buf_prev[0];\n          *cptr++ = buf_prev[1];\n          tlen -= 2;\n          num_prev = 0;\n        } else {\n          // assert tlen == 1\n          *cptr++ = buf_prev[0]; --tlen;\n          buf_prev[0] = buf_prev[1];\n          num_prev = 1;\n        }\n      } else {\n        // assert num_prev == 1\n        *cptr++ = buf_prev[0]; --tlen; num_prev = 0;\n      }\n    }\n    if (tlen == 0) return size;\n    int nvalue;\n    // note: everything goes with 4 bytes in Base64\n    // so we process 4 bytes a unit\n    while (tlen && tmp_ch != EOF && !isspace(tmp_ch)) {\n      // first byte\n      nvalue = DecodeTable[tmp_ch] << 18;\n      {\n        // second byte\n        tmp_ch = reader_.GetChar();\n        CHECK(tmp_ch != EOF && !isspace(tmp_ch)) << \"invalid base64 format\";\n        nvalue |= DecodeTable[tmp_ch] << 12;\n        *cptr++ = (nvalue >> 16) & 0xFF; --tlen;\n        }\n      {\n        // third byte\n        tmp_ch = reader_.GetChar();\n        CHECK(tmp_ch != EOF && !isspace(tmp_ch)) << \"invalid base64 format\";\n        // handle termination\n        if (tmp_ch == '=') {\n          tmp_ch = reader_.GetChar();\n          CHECK(tmp_ch == '=') << \"invalid base64 format\";\n          tmp_ch = reader_.GetChar();\n          CHECK(tmp_ch == EOF || isspace(tmp_ch))\n              << \"invalid base64 format\";\n          break;\n        }\n        nvalue |= DecodeTable[tmp_ch] << 6;\n        if (tlen) {\n          *cptr++ = (nvalue >> 8) & 0xFF; --tlen;\n        } else {\n          buf_prev[num_prev++] = (nvalue >> 8) & 0xFF;\n        }\n      }\n      {\n        // fourth byte\n        tmp_ch = reader_.GetChar();\n        CHECK(tmp_ch != EOF && !isspace(tmp_ch))\n            << \"invalid base64 format\";\n        if (tmp_ch == '=') {\n          tmp_ch = reader_.GetChar();\n          CHECK(tmp_ch == EOF || isspace(tmp_ch))\n              << \"invalid base64 format\";\n          break;\n        }\n        nvalue |= DecodeTable[tmp_ch];\n        if (tlen) {\n          *cptr++ = nvalue & 0xFF; --tlen;\n        } else {\n          buf_prev[num_prev ++] = nvalue & 0xFF;\n        }\n      }\n      // get next char\n      tmp_ch = reader_.GetChar();\n    }\n    if (kStrictCheck) {\n      CHECK_EQ(tlen, 0) << \"Base64InStream: read incomplete\";\n    }\n    return size - tlen;\n  }\n  virtual void Write(const void *ptr, size_t size) {\n    LOG(FATAL) << \"Base64InStream do not support write\";\n  }\n\n private:\n  StreamBufferReader reader_;\n  int tmp_ch;\n  int num_prev;\n  unsigned char buf_prev[2];\n  // whether we need to do strict check\n  static const bool kStrictCheck = false;\n};\n/*! \\brief the stream that write to base64, note we take from file pointers */\nclass Base64OutStream: public dmlc::Stream {\n public:\n  explicit Base64OutStream(dmlc::Stream *fp) : fp(fp) {\n    buf_top = 0;\n  }\n  virtual void Write(const void *ptr, size_t size) {\n    using base64::EncodeTable;\n    size_t tlen = size;\n    const unsigned char *cptr = static_cast<const unsigned char*>(ptr);\n    while (tlen) {\n      while (buf_top < 3  && tlen != 0) {\n        buf[++buf_top] = *cptr++; --tlen;\n      }\n      if (buf_top == 3) {\n        // flush 4 bytes out\n        PutChar(EncodeTable[buf[1] >> 2]);\n        PutChar(EncodeTable[((buf[1] << 4) | (buf[2] >> 4)) & 0x3F]);\n        PutChar(EncodeTable[((buf[2] << 2) | (buf[3] >> 6)) & 0x3F]);\n        PutChar(EncodeTable[buf[3] & 0x3F]);\n        buf_top = 0;\n      }\n    }\n  }\n  virtual size_t Read(void *ptr, size_t size) {\n    LOG(FATAL) << \"Base64OutStream do not support read\";\n    return 0;\n  }\n  /*!\n   * \\brief finish writing of all current base64 stream, do some post processing\n   * \\param endch character to put to end of stream, if it is EOF, then nothing will be done\n   */\n  inline void Finish(char endch = EOF) {\n    using base64::EncodeTable;\n    if (buf_top == 1) {\n      PutChar(EncodeTable[buf[1] >> 2]);\n      PutChar(EncodeTable[(buf[1] << 4) & 0x3F]);\n      PutChar('=');\n      PutChar('=');\n    }\n    if (buf_top == 2) {\n      PutChar(EncodeTable[buf[1] >> 2]);\n      PutChar(EncodeTable[((buf[1] << 4) | (buf[2] >> 4)) & 0x3F]);\n      PutChar(EncodeTable[(buf[2] << 2) & 0x3F]);\n      PutChar('=');\n    }\n    buf_top = 0;\n    if (endch != EOF) PutChar(endch);\n    this->Flush();\n  }\n\n private:\n  dmlc::Stream *fp;\n  int buf_top;\n  unsigned char buf[4];\n  std::string out_buf;\n  static const size_t kBufferSize = 256;\n\n  inline void PutChar(char ch) {\n    out_buf += ch;\n    if (out_buf.length() >= kBufferSize) Flush();\n  }\n  inline void Flush(void) {\n    if (out_buf.length() != 0) {\n      fp->Write(&out_buf[0], out_buf.length());\n      out_buf.clear();\n    }\n  }\n};\n}  // namespace common\n}  // namespace xgboost\n#endif  // XGBOOST_COMMON_BASE64_H_\n"
  },
  {
    "path": "src/common/bitfield.h",
    "content": "/**\n * Copyright 2019-2023, XGBoost Contributors\n * \\file bitfield.h\n */\n#ifndef XGBOOST_COMMON_BITFIELD_H_\n#define XGBOOST_COMMON_BITFIELD_H_\n\n#include <algorithm>    // for min\n#include <bitset>       // for bitset\n#include <cstdint>      // for uint32_t, uint64_t, uint8_t\n#include <ostream>      // for ostream\n#include <type_traits>  // for conditional_t, is_signed_v, add_const_t\n\n#if defined(__CUDACC__)\n#include <thrust/copy.h>\n#include <thrust/device_ptr.h>\n\n#include \"device_helpers.cuh\"\n#endif  // defined(__CUDACC__)\n\n#include \"common.h\"\n#include \"xgboost/span.h\"  // for Span\n\nnamespace xgboost {\n\n#if defined(__CUDACC__)\nusing BitFieldAtomicType = unsigned long long;  // NOLINT\n\n__forceinline__ __device__ BitFieldAtomicType AtomicOr(BitFieldAtomicType* address,\n                                                       BitFieldAtomicType val) {\n  BitFieldAtomicType old = *address, assumed;  // NOLINT\n  do {\n    assumed = old;\n    old = atomicCAS(address, assumed, val | assumed);\n  } while (assumed != old);\n\n  return old;\n}\n\n__forceinline__ __device__ BitFieldAtomicType AtomicAnd(BitFieldAtomicType* address,\n                                                        BitFieldAtomicType val) {\n  BitFieldAtomicType old = *address, assumed;  // NOLINT\n  do {\n    assumed = old;\n    old = atomicCAS(address, assumed, val & assumed);\n  } while (assumed != old);\n\n  return old;\n}\n#endif  // defined(__CUDACC__)\n\n/**\n * @brief A non-owning type with auxiliary methods defined for manipulating bits.\n *\n * @tparam VT        Underlying value type, must be an unsigned integer.\n * @tparam Direction Whether the bits start from left or from right.\n * @tparam IsConst   Whether the view is const.\n */\ntemplate <typename VT, typename Direction, bool IsConst = false>\nstruct BitFieldContainer {\n  using value_type = std::conditional_t<IsConst, VT const, VT>;  // NOLINT\n  using size_type = size_t;                                      // NOLINT\n  using index_type = size_t;                                     // NOLINT\n  using pointer = value_type*;                                   // NOLINT\n\n  static index_type constexpr kValueSize = sizeof(value_type) * 8;\n  static index_type constexpr kOne = 1;  // force correct type.\n\n  struct Pos {\n    index_type int_pos{0};\n    index_type bit_pos{0};\n  };\n\n private:\n  value_type* bits_{nullptr};\n  size_type n_values_{0};\n  static_assert(!std::is_signed_v<VT>, \"Must use an unsiged type as the underlying storage.\");\n\n public:\n  XGBOOST_DEVICE static Pos ToBitPos(index_type pos) {\n    Pos pos_v;\n    if (pos == 0) {\n      return pos_v;\n    }\n    pos_v.int_pos = pos / kValueSize;\n    pos_v.bit_pos = pos % kValueSize;\n    return pos_v;\n  }\n\n public:\n  BitFieldContainer() = default;\n  XGBOOST_DEVICE explicit BitFieldContainer(common::Span<value_type> bits)\n      : bits_{bits.data()}, n_values_{bits.size()} {}\n  BitFieldContainer(BitFieldContainer const& other) = default;\n  BitFieldContainer(BitFieldContainer&& other) = default;\n  BitFieldContainer &operator=(BitFieldContainer const &that) = default;\n  BitFieldContainer &operator=(BitFieldContainer &&that) = default;\n\n  XGBOOST_DEVICE auto Bits() { return common::Span<value_type>{bits_, NumValues()}; }\n  XGBOOST_DEVICE auto Bits() const { return common::Span<value_type const>{bits_, NumValues()}; }\n\n  /*\\brief Compute the size of needed memory allocation.  The returned value is in terms\n   *       of number of elements with `BitFieldContainer::value_type'.\n   */\n  XGBOOST_DEVICE static size_t ComputeStorageSize(index_type size) {\n    return common::DivRoundUp(size, kValueSize);\n  }\n#if defined(__CUDA_ARCH__)\n  __device__ BitFieldContainer& operator|=(BitFieldContainer const& rhs) {\n    auto tid = blockIdx.x * blockDim.x + threadIdx.x;\n    std::size_t min_size = std::min(this->Capacity(), rhs.Capacity());\n    if (tid < min_size) {\n      if (this->Check(tid) || rhs.Check(tid)) {\n        this->Set(tid);\n      }\n    }\n    return *this;\n  }\n#else\n  BitFieldContainer& operator|=(BitFieldContainer const& rhs) {\n    size_t min_size = std::min(NumValues(), rhs.NumValues());\n    for (size_t i = 0; i < min_size; ++i) {\n      Data()[i] |= rhs.Data()[i];\n    }\n    return *this;\n  }\n#endif  // #if defined(__CUDA_ARCH__)\n\n#if defined(__CUDA_ARCH__)\n  __device__ BitFieldContainer& operator&=(BitFieldContainer const& rhs) {\n    auto tid = blockIdx.x * blockDim.x + threadIdx.x;\n    std::size_t min_size = std::min(this->Capacity(), rhs.Capacity());\n    if (tid < min_size) {\n      if (this->Check(tid) && rhs.Check(tid)) {\n        this->Set(tid);\n      } else {\n        this->Clear(tid);\n      }\n    }\n    return *this;\n  }\n#else\n  BitFieldContainer& operator&=(BitFieldContainer const& rhs) {\n    std::size_t min_size = std::min(NumValues(), rhs.NumValues());\n    for (size_t i = 0; i < min_size; ++i) {\n      Data()[i] &= rhs.Data()[i];\n    }\n    return *this;\n  }\n#endif  // defined(__CUDA_ARCH__)\n\n#if defined(__CUDA_ARCH__)\n  __device__ auto Set(index_type pos) noexcept(true) {\n    Pos pos_v = Direction::Shift(ToBitPos(pos));\n    value_type& value = Data()[pos_v.int_pos];\n    value_type set_bit = kOne << pos_v.bit_pos;\n    using Type = typename dh::detail::AtomicDispatcher<sizeof(value_type)>::Type;\n    atomicOr(reinterpret_cast<Type *>(&value), set_bit);\n  }\n  __device__ void Clear(index_type pos) noexcept(true) {\n    Pos pos_v = Direction::Shift(ToBitPos(pos));\n    value_type& value = Data()[pos_v.int_pos];\n    value_type clear_bit = ~(kOne << pos_v.bit_pos);\n    using Type = typename dh::detail::AtomicDispatcher<sizeof(value_type)>::Type;\n    atomicAnd(reinterpret_cast<Type *>(&value), clear_bit);\n  }\n#else\n  void Set(index_type pos) noexcept(true) {\n    Pos pos_v = Direction::Shift(ToBitPos(pos));\n    value_type& value = Data()[pos_v.int_pos];\n    value_type set_bit = kOne << pos_v.bit_pos;\n    value |= set_bit;\n  }\n  void Clear(index_type pos) noexcept(true) {\n    Pos pos_v = Direction::Shift(ToBitPos(pos));\n    value_type& value = Data()[pos_v.int_pos];\n    value_type clear_bit = ~(kOne << pos_v.bit_pos);\n    value &= clear_bit;\n  }\n#endif  // defined(__CUDA_ARCH__)\n\n  XGBOOST_DEVICE bool Check(Pos pos_v) const noexcept(true) {\n    pos_v = Direction::Shift(pos_v);\n    assert(pos_v.int_pos < NumValues());\n    value_type const value = Data()[pos_v.int_pos];\n    value_type const test_bit = kOne << pos_v.bit_pos;\n    value_type result = test_bit & value;\n    return static_cast<bool>(result);\n  }\n  [[nodiscard]] XGBOOST_DEVICE bool Check(index_type pos) const noexcept(true) {\n    Pos pos_v = ToBitPos(pos);\n    return Check(pos_v);\n  }\n  /**\n   * @brief Returns the total number of bits that can be viewed. This is equal to or\n   *        larger than the acutal number of valid bits.\n   */\n  [[nodiscard]] XGBOOST_DEVICE size_type Capacity() const noexcept(true) {\n    return kValueSize * NumValues();\n  }\n  /**\n   * @brief Number of storage unit used in this bit field.\n   */\n  [[nodiscard]] XGBOOST_DEVICE size_type NumValues() const noexcept(true) { return n_values_; }\n\n  XGBOOST_DEVICE pointer Data() const noexcept(true) { return bits_; }\n\n  inline friend std::ostream& operator<<(std::ostream& os,\n                                         BitFieldContainer<VT, Direction, IsConst> field) {\n    os << \"Bits \"\n       << \"storage size: \" << field.NumValues() << \"\\n\";\n    for (typename common::Span<value_type>::index_type i = 0; i < field.NumValues(); ++i) {\n      std::bitset<BitFieldContainer<VT, Direction, IsConst>::kValueSize> bset(field.Data()[i]);\n      os << bset << \"\\n\";\n    }\n    return os;\n  }\n};\n\n// Bits start from left most bits (most significant bit).\ntemplate <typename VT, bool IsConst = false>\nstruct LBitsPolicy : public BitFieldContainer<VT, LBitsPolicy<VT, IsConst>, IsConst> {\n  using Container = BitFieldContainer<VT, LBitsPolicy<VT, IsConst>, IsConst>;\n  using Pos = typename Container::Pos;\n  using value_type = typename Container::value_type;  // NOLINT\n\n  XGBOOST_DEVICE static Pos Shift(Pos pos) {\n    pos.bit_pos = Container::kValueSize - pos.bit_pos - Container::kOne;\n    return pos;\n  }\n};\n\n// Bits start from right most bit (least significant bit) of each entry, but integer index\n// is from left to right.\ntemplate <typename VT>\nstruct RBitsPolicy : public BitFieldContainer<VT, RBitsPolicy<VT>> {\n  using Container = BitFieldContainer<VT, RBitsPolicy<VT>>;\n  using Pos = typename Container::Pos;\n  using value_type = typename Container::value_type;  // NOLINT\n\n  XGBOOST_DEVICE static Pos Shift(Pos pos) {\n    return pos;\n  }\n};\n\n// Format: <Const><Direction>BitField<size of underlying type in bits>, underlying type\n// must be unsigned.\nusing LBitField64 = BitFieldContainer<std::uint64_t, LBitsPolicy<std::uint64_t>>;\nusing RBitField8 = BitFieldContainer<std::uint8_t, RBitsPolicy<unsigned char>>;\n\nusing LBitField32 = BitFieldContainer<std::uint32_t, LBitsPolicy<std::uint32_t>>;\nusing CLBitField32 = BitFieldContainer<std::uint32_t, LBitsPolicy<std::uint32_t, true>, true>;\nusing RBitField32 = BitFieldContainer<std::uint32_t, RBitsPolicy<std::uint32_t>>;\n\nnamespace detail {\ninline std::uint32_t TrailingZeroBitsImpl(std::uint32_t value) {\n  auto n = sizeof(value) * 8;\n  std::uint32_t cnt{0};\n  for (decltype(n) i = 0; i < n; i++) {\n    if ((value >> i) & 1) {\n      break;\n    }\n    cnt++;\n  }\n  return cnt;\n}\n}  // namespace detail\n\ninline std::uint32_t TrailingZeroBits(std::uint32_t value) {\n  if (value == 0) {\n    return sizeof(value) * 8;\n  }\n#if defined(__GNUC__)\n  return __builtin_ctz(value);\n#elif defined(_MSC_VER) && defined(_M_X64)\n  return _tzcnt_u32(value);\n#else\n  return detail::TrailingZeroBitsImpl(value);\n#endif  //  __GNUC__\n}\n}       // namespace xgboost\n\n#endif  // XGBOOST_COMMON_BITFIELD_H_\n"
  },
  {
    "path": "src/common/cache_manager.cc",
    "content": "/**\n * Copyright 2021-2025, XGBoost Contributors\n */\n#include \"cache_manager.h\"\n\n#include <cstdint>  // for uint64_t\n\n#if defined(__x86_64__)\n\nvoid RunCpuid(uint32_t eax, uint32_t ecx, uint32_t (& abcd)[4]) {\n#if defined(_MSC_VER)\n    __cpuidex(reinterpret_cast<int*>(abcd), eax, ecx);\n#else\n    uint32_t ebx = 0, edx = 0;\n    __asm__(\"cpuid\" : \"+b\"(ebx), \"+a\"(eax), \"+c\"(ecx), \"=d\"(edx));\n    abcd[0] = eax;\n    abcd[1] = ebx;\n    abcd[2] = ecx;\n    abcd[3] = edx;\n#endif\n}\n\n#define __extract_bitmask_value(val, mask, shift) (((val) & (mask)) >> shift)\n\n#define _CPUID_GET_TYPE(__eax) __extract_bitmask_value(__eax /*4:0*/, 0x1fU, 0)\n\n#define _CPUID_GET_LEVEL(__eax) __extract_bitmask_value(__eax /*7:5*/, 0xe0U, 5)\n\n#define _CPUID_GET_SETS(__ecx) ((__ecx) + 1)\n\n#define _CPUID_GET_LINE_SIZE(__ebx) (__extract_bitmask_value(__ebx /*11:0*/, 0x7ffU, 0) + 1)\n\n#define _CPUID_GET_PARTITIONS(__ebx) (__extract_bitmask_value(__ebx /*21:11*/, 0x3ff800U, 11) + 1)\n\n#define _CPUID_GET_WAYS(__ebx) (__extract_bitmask_value(__ebx /*31:22*/, 0xffc00000U, 22) + 1)\n\n#define _CPUID_CACHE_INFO_INTEL 0x4U\n\n#define _CPUID_CACHE_INFO_AMD 0x8000001DU\n\n#define _CPUID_VENDOR_ID_AMD 0x68747541\n\n// Run CPUID and collect raw output.\nvoid GetCacheInfo(int cache_num, int* type, int* level, int64_t* sets,\n                  int* line_size, int* partitions, int* ways) {\n// Leaf 0x0 returns Vendor ID in EBX, EDX, ECX\n  uint32_t vendor_reg[4];\n  RunCpuid(0, 0, vendor_reg);\n  bool is_amd = (vendor_reg[1] == _CPUID_VENDOR_ID_AMD);\n\n  uint32_t cache_info_leaf = is_amd ? _CPUID_CACHE_INFO_AMD : _CPUID_CACHE_INFO_INTEL;\n  static uint32_t abcd[4];\n  RunCpuid(cache_info_leaf, cache_num, abcd);\n\n  const uint32_t eax = abcd[0];\n  const uint32_t ebx = abcd[1];\n  const uint32_t ecx = abcd[2];\n  // const uint32_t edx = abcd[3];  // Not used\n  *type              = _CPUID_GET_TYPE(eax);\n  *level             = _CPUID_GET_LEVEL(eax);\n  *sets              = _CPUID_GET_SETS(ecx);\n  *line_size         = _CPUID_GET_LINE_SIZE(ebx);\n  *partitions        = _CPUID_GET_PARTITIONS(ebx);\n  *ways              = _CPUID_GET_WAYS(ebx);\n}\n\nconstexpr int kCpuidTypeNull = 0;\nconstexpr int kCpuidTypeData = 1;  // NOLINT\nconstexpr int kCpuidTypeInst = 2;\nconstexpr int kCpuidTypeUnif = 3;  // NOLINT\n\n// Interpret the raw CPUID results and extract actual (or unified) cache parameters.\ntemplate <std::int32_t kMaxCacheSize>\nvoid DetectDataCaches(int64_t* cache_sizes) {\n  (void)kCpuidTypeData;\n  (void)kCpuidTypeUnif;\n  int cache_num = 0, cache_sizes_idx = 0;\n  while (cache_sizes_idx < kMaxCacheSize) {\n    int type, level, line_size, partitions, ways;\n    int64_t sets, size;\n    GetCacheInfo(cache_num++, &type, &level, &sets, &line_size, &partitions, &ways);\n\n    if (type == kCpuidTypeNull) break;  // no more caches to read.\n    if (type == kCpuidTypeInst) continue;\n\n    size = ways * partitions * line_size * sets;\n    cache_sizes[cache_sizes_idx++] = size;\n  }\n}\n#endif  // defined(__x86_64__)\n\nnamespace xgboost::common {\n\n/* Detect CPU cache sizes at runtime using CPUID.\n * CPUID cannot be used reliably on:\n * 1. non-x86_64 architectures\n * 2. some virtualized environments\n *\n * In these cases, fallback L1/L2/L3 defaults are used.\n */\nCacheManager::CacheManager() {\n#if defined(__x86_64__)\n  DetectDataCaches<kMaxCacheSize>(cache_size_.data());\n#else\n  SetDefaultCaches();\n#endif  // defined(__x86_64__)\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "src/common/cache_manager.h",
    "content": "/**\n * Copyright 2021-2025, XGBoost Contributors\n */\n#ifndef XGBOOST_COMMON_CACHE_MANAGER_H_\n#define XGBOOST_COMMON_CACHE_MANAGER_H_\n\n#include <cstdint>     // for int64_t\n#include <array>\n\nnamespace xgboost::common {\n\n/* Detect cache sizes at runtime,\n * or fall back to defaults if detection is not possible.\n */\nclass CacheManager {\n private:\n  constexpr static int64_t kUninitCache = -1;\n  constexpr static int kMaxCacheSize = 4;\n  std::array<int64_t, kMaxCacheSize> cache_size_ = {kUninitCache, kUninitCache,\n                                                    kUninitCache, kUninitCache};\n\n  constexpr static int64_t kDefaultL1Size = 32 * 1024;    // 32KB\n  constexpr static int64_t kDefaultL2Size = 1024 * 1024;  // 1MB\n  constexpr static int64_t kDefaultL3Size = 0;            // 0MB\n\n  // If CPUID cannot be used, fall back to default L1/L2 cache sizes.\n  void SetDefaultCaches() {\n    // Overestimating cache sizes harms performance more than underestimation,\n    // so conservative defaults are used.\n    cache_size_[0] = kDefaultL1Size;\n    cache_size_[1] = kDefaultL2Size;\n    cache_size_[2] = kDefaultL3Size;\n  }\n\n public:\n  CacheManager();\n\n  int64_t L1Size() const {\n    return cache_size_[0] != kUninitCache ? cache_size_[0] : kDefaultL1Size;\n  }\n\n  int64_t L2Size() const {\n    return cache_size_[1] != kUninitCache ? cache_size_[1] : kDefaultL2Size;\n  }\n\n  int64_t L3Size() const {\n    return cache_size_[2] != kUninitCache ? cache_size_[2] : kDefaultL3Size;\n  }\n};\n}  // namespace xgboost::common\n#endif  // XGBOOST_COMMON_CACHE_MANAGER_H_\n"
  },
  {
    "path": "src/common/categorical.h",
    "content": "/**\n * Copyright 2020-2024, XGBoost Contributors\n * \\file categorical.h\n */\n#ifndef XGBOOST_COMMON_CATEGORICAL_H_\n#define XGBOOST_COMMON_CATEGORICAL_H_\n\n#include \"bitfield.h\"\n#include \"xgboost/base.h\"\n#include \"xgboost/data.h\"\n#include \"xgboost/span.h\"\n#include \"xgboost/tree_model.h\"\n\nnamespace xgboost::common {\nusing CatBitField = LBitField32;\nusing KCatBitField = CLBitField32;\n\n// Cast the categorical type.\ntemplate <typename T>\nXGBOOST_DEVICE bst_cat_t AsCat(T const& v) {\n  return static_cast<bst_cat_t>(v);\n}\n\n/* \\brief Whether is fidx a categorical feature.\n *\n * \\param ft   Feature type for all features.\n * \\param fidx Feature index.\n * \\return Whether feature pointed by fidx is categorical feature.\n */\ninline XGBOOST_DEVICE bool IsCat(Span<FeatureType const> ft, bst_feature_t fidx) {\n  return !ft.empty() && ft[fidx] == FeatureType::kCategorical;\n}\n\nconstexpr inline bst_cat_t OutOfRangeCat() {\n  // See the round trip assert in `InvalidCat`.\n  return static_cast<bst_cat_t>(16777217) - static_cast<bst_cat_t>(1);\n}\n\ninline XGBOOST_DEVICE bool InvalidCat(float cat) {\n  constexpr auto kMaxCat = OutOfRangeCat();\n  static_assert(static_cast<bst_cat_t>(static_cast<float>(kMaxCat)) == kMaxCat);\n  static_assert(static_cast<bst_cat_t>(static_cast<float>(kMaxCat + 1)) != kMaxCat + 1);\n  static_assert(static_cast<float>(kMaxCat + 1) == kMaxCat);\n  return cat < 0 || cat >= kMaxCat;\n}\n\n/**\n * \\brief Whether should it traverse to left branch of a tree.\n *\n *   Go to left if it's NOT the matching category, which matches one-hot encoding.\n */\ninline XGBOOST_DEVICE bool Decision(common::Span<CatBitField::value_type const> cats, float cat) {\n  KCatBitField const s_cats(cats);\n  if (XGBOOST_EXPECT(InvalidCat(cat), false)) {\n    return true;\n  }\n\n  auto pos = KCatBitField::ToBitPos(cat);\n  // If the input category is larger than the size of the bit field, it implies that the\n  // category is not chosen. Otherwise the bit field would have the category instead of\n  // being smaller than the category value.\n  if (pos.int_pos >= cats.size()) {\n    return true;\n  }\n  return !s_cats.Check(AsCat(cat));\n}\n\ninline void InvalidCategory() {\n  // OutOfRangeCat() can be accurately represented, but everything after it will be\n  // rounded toward it, so we use >= for comparison check.  As a result, we require input\n  // values to be less than this last representable value.\n  auto str = std::to_string(OutOfRangeCat());\n  LOG(FATAL) << \"Invalid categorical value detected.  Categorical value should be non-negative, \"\n                \"less than total number of categories in training data and less than \" +\n                    str;\n}\n\ninline void CheckMaxCat(float max_cat, size_t n_categories) {\n  CHECK_GE(max_cat + 1, n_categories)\n      << \"Maximum cateogry should not be lesser than the total number of categories.\";\n}\n\n/*!\n * \\brief Whether should we use onehot encoding for categorical data.\n */\nXGBOOST_DEVICE inline bool UseOneHot(uint32_t n_cats, uint32_t max_cat_to_onehot) {\n  bool use_one_hot = n_cats < max_cat_to_onehot;\n  return use_one_hot;\n}\n\nstruct IsCatOp {\n  XGBOOST_DEVICE bool operator()(FeatureType ft) { return ft == FeatureType::kCategorical; }\n};\n\ninline auto GetNodeCats(common::Span<CatBitField::value_type const> categories,\n                        RegTree::CategoricalSplitMatrix::Segment seg) {\n  KCatBitField node_cats{categories.subspan(seg.beg, seg.size)};\n  return node_cats;\n}\n}  // namespace xgboost::common\n\n#endif  // XGBOOST_COMMON_CATEGORICAL_H_\n"
  },
  {
    "path": "src/common/charconv.cc",
    "content": "/*!\n * Copyright 2020 by XGBoost Contributors\n *\n * \\brief An implementation of Ryu algorithm:\n *\n * https://dl.acm.org/citation.cfm?id=3192369\n *\n * The code is adopted from original (half) c implementation:\n * https://github.com/ulfjack/ryu.git with some more comments and tidying.  License is\n * attached below.\n *\n * Copyright 2018 Ulf Adams\n *\n * The contents of this file may be used under the terms of the Apache License,\n * Version 2.0.\n *\n *    (See accompanying file LICENSE-Apache or copy at\n *     http: *www.apache.org/licenses/LICENSE-2.0)\n *\n * Alternatively, the contents of this file may be used under the terms of\n * the Boost Software License, Version 1.0.\n *    (See accompanying file LICENSE-Boost or copy at\n *     https://www.boost.org/LICENSE_1_0.txt)\n *\n * Unless required by applicable law or agreed to in writing, this software\n * is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied.\n */\n#include <algorithm>\n#include <cassert>\n#include <cinttypes>\n#include <cstring>\n#include <cmath>\n\n#include \"xgboost/logging.h\"\n#include \"charconv.h\"\n\n#if defined(_MSC_VER)\n#include <intrin.h>\n#endif\n\n/*\n * We did some cleanup from the original implementation instead of doing line to line\n * port.\n *\n * The basic concept of floating rounding is, for a floating point number, we need to\n * convert base2 to base10.  During which we need to implement correct rounding.  Hence on\n * base2 we have:\n *\n * {low, value, high}\n *\n * 3 values, representing round down, no rounding, and round up.  In the original\n * implementation and paper, variables representing these 3 values are typically postfixed\n * with m, r, p like {vr, vm, vp}.  Here we name them more verbosely.\n */\n\nnamespace xgboost {\nnamespace detail {\nstatic constexpr char kItoaLut[200] = {\n    '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0',\n    '7', '0', '8', '0', '9', '1', '0', '1', '1', '1', '2', '1', '3', '1', '4',\n    '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', '2', '0', '2', '1', '2',\n    '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '2', '8', '2', '9',\n    '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3',\n    '7', '3', '8', '3', '9', '4', '0', '4', '1', '4', '2', '4', '3', '4', '4',\n    '4', '5', '4', '6', '4', '7', '4', '8', '4', '9', '5', '0', '5', '1', '5',\n    '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9',\n    '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6',\n    '7', '6', '8', '6', '9', '7', '0', '7', '1', '7', '2', '7', '3', '7', '4',\n    '7', '5', '7', '6', '7', '7', '7', '8', '7', '9', '8', '0', '8', '1', '8',\n    '2', '8', '3', '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9',\n    '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9',\n    '7', '9', '8', '9', '9'};\n\nconstexpr uint32_t Tens(uint32_t n) { return n == 1 ? 10 : (Tens(n - 1) * 10); }\n\nstruct UnsignedFloatBase2;\n\nstruct UnsignedFloatBase10 {\n  uint32_t mantissa;\n  // Decimal exponent's range is -45 to 38\n  // inclusive, and can fit in a short if needed.\n  int32_t exponent;\n};\n\ntemplate <typename To, typename From>\nTo BitCast(From&& from) {\n  static_assert(sizeof(From) == sizeof(To), \"Bit cast doesn't change output size.\");\n  To t;\n  std::memcpy(&t, &from, sizeof(To));\n  return t;\n}\n\nstruct IEEE754 {\n  static constexpr uint32_t kFloatMantissaBits = 23;\n  static constexpr uint32_t kFloatBias = 127;\n  static constexpr uint32_t kFloatExponentBits = 8;\n\n  static void Decode(float f, UnsignedFloatBase2* uf, bool* signbit);\n  static float Encode(UnsignedFloatBase2 const& uf, bool signbit);\n\n  static float Infinity(bool sign) {\n    uint32_t f =\n        ((static_cast<uint32_t>(sign))\n         << (IEEE754::kFloatExponentBits + IEEE754::kFloatMantissaBits)) |\n        (0xffu << IEEE754::kFloatMantissaBits);\n    float result = BitCast<float>(f);\n    return result;\n  }\n};\n\nstruct UnsignedFloatBase2 {\n  uint32_t mantissa;\n  // Decimal exponent's range is -45 to 38\n  // inclusive, and can fit in a short if needed.\n  uint32_t exponent;\n\n  bool Infinite() const {\n    return exponent == ((1u << IEEE754::kFloatExponentBits) - 1u);\n  }\n  bool Zero() const {\n    return mantissa == 0 && exponent == 0;\n  }\n};\n\ninline void IEEE754::Decode(float f, UnsignedFloatBase2 *uf, bool *signbit) {\n  auto bits = BitCast<uint32_t>(f);\n  // Decode bits into sign, mantissa, and exponent.\n  *signbit = std::signbit(f);\n  uf->mantissa = bits & ((1u << kFloatMantissaBits) - 1);\n  uf->exponent = (bits >> IEEE754::kFloatMantissaBits) &\n                 ((1u << IEEE754::kFloatExponentBits) - 1);  // remove signbit\n}\n\ninline float IEEE754::Encode(UnsignedFloatBase2 const &uf, bool signbit) {\n  uint32_t f =\n      ((((static_cast<uint32_t>(signbit)) << IEEE754::kFloatExponentBits) |\n        static_cast<uint32_t>(uf.exponent))\n       << IEEE754::kFloatMantissaBits) |\n      uf.mantissa;\n  return BitCast<float>(f);\n}\n\n// Represents the interval of information-preserving outputs.\nstruct MantissaInteval {\n  int32_t exponent;\n  // low: smaller half way point\n  uint32_t mantissa_low;\n  // correct: f\n  uint32_t mantissa_correct;\n  // high: larger half way point\n  uint32_t mantissa_high;\n};\n\nstruct RyuPowLogUtils {\n  // This table is generated by PrintFloatLookupTable from ryu.  We adopted only the float\n  // 32 table instead of double full table.\n  // f2s_full_table.h\n  uint32_t constexpr static kFloatPow5InvBitcount = 59;\n  static constexpr uint64_t kFloatPow5InvSplit[55] = {\n      576460752303423489u, 461168601842738791u, 368934881474191033u,\n      295147905179352826u, 472236648286964522u, 377789318629571618u,\n      302231454903657294u, 483570327845851670u, 386856262276681336u,\n      309485009821345069u, 495176015714152110u, 396140812571321688u,\n      316912650057057351u, 507060240091291761u, 405648192073033409u,\n      324518553658426727u, 519229685853482763u, 415383748682786211u,\n      332306998946228969u, 531691198313966350u, 425352958651173080u,\n      340282366920938464u, 544451787073501542u, 435561429658801234u,\n      348449143727040987u, 557518629963265579u, 446014903970612463u,\n      356811923176489971u, 570899077082383953u, 456719261665907162u,\n      365375409332725730u, 292300327466180584u, 467680523945888934u,\n      374144419156711148u, 299315535325368918u, 478904856520590269u,\n      383123885216472215u, 306499108173177772u, 490398573077084435u,\n      392318858461667548u, 313855086769334039u, 502168138830934462u,\n      401734511064747569u, 321387608851798056u, 514220174162876889u,\n      411376139330301511u, 329100911464241209u, 526561458342785934u,\n      421249166674228747u, 336999333339382998u, 539198933343012796u,\n      431359146674410237u, 345087317339528190u, 552139707743245103u,\n      441711766194596083u};\n\n  uint32_t constexpr static kFloatPow5Bitcount = 61;\n  static constexpr uint64_t kFloatPow5Split[47] = {\n      1152921504606846976u, 1441151880758558720u, 1801439850948198400u,\n      2251799813685248000u, 1407374883553280000u, 1759218604441600000u,\n      2199023255552000000u, 1374389534720000000u, 1717986918400000000u,\n      2147483648000000000u, 1342177280000000000u, 1677721600000000000u,\n      2097152000000000000u, 1310720000000000000u, 1638400000000000000u,\n      2048000000000000000u, 1280000000000000000u, 1600000000000000000u,\n      2000000000000000000u, 1250000000000000000u, 1562500000000000000u,\n      1953125000000000000u, 1220703125000000000u, 1525878906250000000u,\n      1907348632812500000u, 1192092895507812500u, 1490116119384765625u,\n      1862645149230957031u, 1164153218269348144u, 1455191522836685180u,\n      1818989403545856475u, 2273736754432320594u, 1421085471520200371u,\n      1776356839400250464u, 2220446049250313080u, 1387778780781445675u,\n      1734723475976807094u, 2168404344971008868u, 1355252715606880542u,\n      1694065894508600678u, 2117582368135750847u, 1323488980084844279u,\n      1654361225106055349u, 2067951531382569187u, 1292469707114105741u,\n      1615587133892632177u, 2019483917365790221u};\n\n  static uint32_t Pow5Factor(uint32_t value) noexcept(true) {\n    uint32_t count = 0;\n    for (;;) {\n      const uint32_t q = value / 5;\n      const uint32_t r = value % 5;\n      if (r != 0) {\n        break;\n      }\n      value = q;\n      ++count;\n    }\n    return count;\n  }\n\n  // Returns true if value is divisible by 5^p.\n  static bool MultipleOfPowerOf5(const uint32_t value, const uint32_t p) noexcept(true) {\n    return Pow5Factor(value) >= p;\n  }\n\n  // Returns true if value is divisible by 2^p.\n  static bool MultipleOfPowerOf2(const uint32_t value, const uint32_t p) noexcept(true) {\n#ifdef __GNUC__\n    return static_cast<uint32_t>(__builtin_ctz(value)) >= p;\n#else\n    return (value & ((1u << p) - 1)) == 0;\n#endif  //  __GNUC__\n  }\n\n  // Returns e == 0 ? 1 : ceil(log_2(5^e)).\n  static uint32_t Pow5Bits(const int32_t e) noexcept(true) {\n    return static_cast<uint32_t>(((e * 163391164108059ull) >> 46) + 1);\n  }\n\n  static int32_t Log2Pow5(const int32_t e) {\n    // This approximation works up to the point that the multiplication\n    // overflows at e = 3529. If the multiplication were done in 64 bits, it\n    // would fail at 5^4004 which is just greater than 2^9297.\n    assert(e >= 0);\n    assert(e <= 3528);\n    return static_cast<int32_t>(((static_cast<uint32_t>(e)) * 1217359) >> 19);\n  }\n\n  static int32_t CeilLog2Pow5(const int32_t e) {\n    return RyuPowLogUtils::Log2Pow5(e) + 1;\n  }\n\n  /*\n   * \\brief Multiply 32-bit and 64-bit -> 128 bit, then access the higher bits.\n   */\n  static uint32_t MulShift(const uint32_t x, const uint64_t y,\n                           const int32_t shift) noexcept(true) {\n    // For 32-bit * 64-bit: x * y, it can be decomposed into:\n    //\n    //   x * (y_high + y_low) = (x * y_high) + (x * y_low)\n    //\n    // For more general case 64-bit * 64-bit, see https://stackoverflow.com/a/1541458\n    const uint32_t y_low = static_cast<uint32_t>(y);\n    const uint32_t y_high = static_cast<uint32_t>(y >> 32);\n\n    const uint64_t low = static_cast<uint64_t>(x) * y_low;\n    const uint64_t high = static_cast<uint64_t>(x) * y_high;\n\n    const uint64_t sum = (low >> 32) + high;\n    const uint64_t shifted_sum = sum >> (shift - 32);\n\n    return static_cast<uint32_t>(shifted_sum);\n  }\n\n  /*\n   * \\brief floor(5^q/2*k) and shift by j\n   */\n  static uint32_t MulPow5InvDivPow2(const uint32_t m, const uint32_t q,\n                                    const int32_t j) noexcept(true) {\n    static_assert(sizeof(kFloatPow5InvSplit) == 55 * sizeof(std::uint64_t));\n    assert(q < 55);\n    return MulShift(m, kFloatPow5InvSplit[q], j);  // NOLINT\n  }\n\n  /*\n   * \\brief floor(2^k/5^q) + 1 and shift by j\n   */\n  static uint32_t MulPow5divPow2(const uint32_t m, const uint32_t i,\n                                 const int32_t j) noexcept(true) {\n    // clang-tidy makes false assumption that can lead to i >= 47, which is impossible.\n    // Can be verified by enumerating all float32 values.\n    return MulShift(m, kFloatPow5Split[i], j);  // NOLINT\n  }\n\n  static uint32_t FloorLog2(const uint32_t value) {\n#if defined(_MSC_VER)\n    unsigned long index;  // NOLINT\n    return _BitScanReverse(&index, value) ? index : 32;\n#else\n    return 31 - __builtin_clz(value);\n#endif\n  }\n\n  /*\n   * \\brief floor(e * log_10(2)).\n   */\n  static uint32_t Log10Pow2(const int32_t e) noexcept(true) {\n    // The first value this approximation fails for is 2^1651 which is just\n    // greater than 10^297.\n    assert(e >= 0);\n    assert(e <= 1 << 15);\n    return static_cast<uint32_t>((static_cast<uint64_t>(e) * 169464822037455ull) >> 49);\n  }\n\n  // Returns floor(e * log_10(5)).\n  static uint32_t Log10Pow5(const int32_t expoent) noexcept(true) {\n    // The first value this approximation fails for is 5^2621 which is just\n    // greater than 10^1832.\n    assert(expoent >= 0);\n    assert(expoent <= 1 << 15);\n    return static_cast<uint32_t>(\n        ((static_cast<uint64_t>(expoent)) * 196742565691928ull) >> 48);\n  }\n};\n\nconstexpr uint64_t RyuPowLogUtils::kFloatPow5InvSplit[55];\nconstexpr uint64_t RyuPowLogUtils::kFloatPow5Split[47];\n\nclass PowerBaseComputer {\n private:\n  static uint8_t\n  ToDecimalBase(bool const accept_bounds, uint32_t const mantissa_low_shift,\n                MantissaInteval const base2, MantissaInteval *base10,\n                bool *mantissa_low_is_trailing_zeros,\n                bool *mantissa_out_is_trailing_zeros) noexcept(true) {\n    uint8_t last_removed_digit = 0;\n    if (base2.exponent >= 0) {\n      const uint32_t q = RyuPowLogUtils::Log10Pow2(base2.exponent);\n      base10->exponent = static_cast<int32_t>(q);\n      const int32_t k = RyuPowLogUtils::kFloatPow5InvBitcount +\n                        RyuPowLogUtils::Pow5Bits(static_cast<int32_t>(q)) - 1;\n      const int32_t i = -base2.exponent + static_cast<int32_t>(q) + k;\n      base10->mantissa_low =\n          RyuPowLogUtils::MulPow5InvDivPow2(base2.mantissa_low, q, i);\n      base10->mantissa_correct =\n          RyuPowLogUtils::MulPow5InvDivPow2(base2.mantissa_correct, q, i);\n      base10->mantissa_high =\n          RyuPowLogUtils::MulPow5InvDivPow2(base2.mantissa_high, q, i);\n\n      if (q != 0 &&\n          (base10->mantissa_high - 1) / 10 <= base10->mantissa_low / 10) {\n        // We need to know one removed digit even if we are not going to loop\n        // below. We could use q = X - 1 above, except that would require 33\n        // bits for the result, and we've found that 32-bit arithmetic is\n        // faster even on 64-bit machines.\n        const int32_t l =\n            RyuPowLogUtils::kFloatPow5InvBitcount +\n            RyuPowLogUtils::Pow5Bits(static_cast<int32_t>(q - 1)) - 1;\n        last_removed_digit = static_cast<uint8_t>(\n            RyuPowLogUtils::MulPow5InvDivPow2(\n                base2.mantissa_correct, q - 1,\n                -base2.exponent + static_cast<int32_t>(q) - 1 + l) %\n            10);\n      }\n      if (q <= 9) {\n        // The largest power of 5 that fits in 24 bits is 5^10, but q <= 9 seems to be\n        // safe as well. Only one of mantissa_high, mantissa_correct, and mantissa_low can\n        // be a multiple of 5, if any.\n        if (base2.mantissa_correct % 5 == 0) {\n          *mantissa_out_is_trailing_zeros =\n              RyuPowLogUtils::MultipleOfPowerOf5(base2.mantissa_correct, q);\n        } else if (accept_bounds) {\n          *mantissa_low_is_trailing_zeros =\n              RyuPowLogUtils::MultipleOfPowerOf5(base2.mantissa_low, q);\n        } else {\n          base10->mantissa_high -=\n              RyuPowLogUtils::MultipleOfPowerOf5(base2.mantissa_high, q);\n        }\n      }\n    } else {\n      const uint32_t q = RyuPowLogUtils::Log10Pow5(-base2.exponent);\n      base10->exponent = static_cast<int32_t>(q) + base2.exponent;\n      const int32_t i = -base2.exponent - static_cast<int32_t>(q);\n      const int32_t k =\n          RyuPowLogUtils::Pow5Bits(i) - RyuPowLogUtils::kFloatPow5Bitcount;\n      int32_t j = static_cast<int32_t>(q) - k;\n      base10->mantissa_correct = RyuPowLogUtils::MulPow5divPow2(\n          base2.mantissa_correct, static_cast<uint32_t>(i), j);\n      base10->mantissa_high = RyuPowLogUtils::MulPow5divPow2(\n          base2.mantissa_high, static_cast<uint32_t>(i), j);\n      base10->mantissa_low = RyuPowLogUtils::MulPow5divPow2(\n          base2.mantissa_low, static_cast<uint32_t>(i), j);\n\n      if (q != 0 &&\n          (base10->mantissa_high - 1) / 10 <= base10->mantissa_low / 10) {\n        j = static_cast<int32_t>(q) - 1 -\n            (RyuPowLogUtils::Pow5Bits(i + 1) -\n             RyuPowLogUtils::kFloatPow5Bitcount);\n        last_removed_digit = static_cast<uint8_t>(\n            RyuPowLogUtils::MulPow5divPow2(base2.mantissa_correct,\n                                           static_cast<uint32_t>(i + 1), j) %\n            10);\n      }\n      if (q <= 1) {\n        // {mantissa_out, mantissa_out_high, mantissa_out_low} is trailing zeros if\n        // {mantissa_correct,mantissa_high,mantissa_low} has at least q trailing 0\n        // bits.mantissa_correct = 4 * m2, so it always has at least two trailing 0 bits.\n        *mantissa_out_is_trailing_zeros = true;\n        if (accept_bounds) {\n          // mantissa_low = mantissa_correct - 1 - mantissa_low_shift, so it has 1\n          // trailing 0 bit iff mmShift == 1.\n          *mantissa_low_is_trailing_zeros = mantissa_low_shift == 1;\n        } else {\n          // mantissa_high = mantissa_correct + 2, so it always has at least one trailing\n          // 0 bit.\n          --base10->mantissa_high;\n        }\n      } else if (q < 31) {\n        *mantissa_out_is_trailing_zeros =\n            RyuPowLogUtils::MultipleOfPowerOf2(base2.mantissa_correct, q - 1);\n      }\n    }\n    return last_removed_digit;\n  }\n\n  /*\n   * \\brief A varient of extended euclidean GCD algorithm.\n   */\n  static UnsignedFloatBase10\n  ShortestRepresentation(bool mantissa_low_is_trailing_zeros,\n                         bool mantissa_out_is_trailing_zeros,\n                         uint8_t last_removed_digit, bool const accept_bounds,\n                         MantissaInteval base10) noexcept(true) {\n    int32_t removed {0};\n    uint32_t output {0};\n\n    if (mantissa_low_is_trailing_zeros || mantissa_out_is_trailing_zeros) {\n      // General case, which happens rarely (~4.0%).\n      while (base10.mantissa_high / 10 > base10.mantissa_low / 10) {\n        mantissa_low_is_trailing_zeros &= base10.mantissa_low % 10 == 0;\n        mantissa_out_is_trailing_zeros &= last_removed_digit == 0;\n        last_removed_digit = static_cast<uint8_t>(base10.mantissa_correct % 10);\n        base10.mantissa_correct /= 10;\n        base10.mantissa_high /= 10;\n        base10.mantissa_low /= 10;\n        ++removed;\n      }\n\n      if (mantissa_low_is_trailing_zeros) {\n        while (base10.mantissa_low % 10 == 0) {\n          mantissa_out_is_trailing_zeros &= last_removed_digit == 0;\n          last_removed_digit = static_cast<uint8_t>(base10.mantissa_correct % 10);\n          base10.mantissa_correct /= 10;\n          base10.mantissa_high /= 10;\n          base10.mantissa_low /= 10;\n          ++removed;\n        }\n      }\n\n      if (mantissa_out_is_trailing_zeros && last_removed_digit == 5 &&\n          base10.mantissa_correct % 2 == 0) {\n        // Round even if the exact number is .....50..0.\n        last_removed_digit = 4;\n      }\n      // We need to take mantissa_out + 1 if mantissa_out is outside bounds or we need to\n      // round up.\n      output = base10.mantissa_correct +\n               ((base10.mantissa_correct == base10.mantissa_low &&\n                 (!accept_bounds || !mantissa_low_is_trailing_zeros)) ||\n                last_removed_digit >= 5);\n    } else {\n      // Specialized for the common case (~96.0%). Percentages below are\n      // relative to this. Loop iterations below (approximately): 0: 13.6%,\n      // 1: 70.7%, 2: 14.1%, 3: 1.39%, 4: 0.14%, 5+: 0.01%\n      while (base10.mantissa_high / 10 > base10.mantissa_low / 10) {\n        last_removed_digit = static_cast<uint8_t>(base10.mantissa_correct % 10);\n        base10.mantissa_correct /= 10;\n        base10.mantissa_high /= 10;\n        base10.mantissa_low /= 10;\n        ++removed;\n      }\n\n      // We need to take mantissa_out + 1 if mantissa_out is outside bounds or we need to\n      // round up.\n      output = base10.mantissa_correct +\n               (base10.mantissa_correct == base10.mantissa_low ||\n                last_removed_digit >= 5);\n    }\n    const int32_t exp = base10.exponent + removed;\n\n    UnsignedFloatBase10 fd;\n    fd.exponent = exp;\n    fd.mantissa = output;\n    return fd;\n  }\n\n public:\n  static UnsignedFloatBase10 Binary2Decimal(UnsignedFloatBase2 const f) noexcept(true) {\n    MantissaInteval base2_range;\n    uint32_t mantissa_base2;\n    if (f.exponent == 0) {\n      // We subtract 2 so that the bounds computation has 2 additional bits.\n      base2_range.exponent = static_cast<int32_t>(1) -\n                             static_cast<int32_t>(IEEE754::kFloatBias) -\n                             static_cast<int32_t>(IEEE754::kFloatMantissaBits) -\n                             static_cast<int32_t>(2);\n      static_assert(static_cast<int32_t>(1) - static_cast<int32_t>(IEEE754::kFloatBias) -\n                        static_cast<int32_t>(IEEE754::kFloatMantissaBits) -\n                        static_cast<int32_t>(2) ==\n                    -151);\n      mantissa_base2 = f.mantissa;\n    } else {\n      base2_range.exponent = static_cast<int32_t>(f.exponent) - IEEE754::kFloatBias -\n                             IEEE754::kFloatMantissaBits - 2;\n      mantissa_base2 = (1u << IEEE754::kFloatMantissaBits) | f.mantissa;\n    }\n    const bool even = (mantissa_base2 & 1) == 0;\n    const bool accept_bounds = even;\n\n    // Step 2: Determine the interval of valid decimal representations.\n    base2_range.mantissa_correct = 4 * mantissa_base2;\n    base2_range.mantissa_high = 4 * mantissa_base2 + 2;\n    // Implicit bool -> int conversion. True is 1, false is 0.\n    const uint32_t mantissa_low_shift = f.mantissa != 0 || f.exponent <= 1;\n    base2_range.mantissa_low = 4 * mantissa_base2 - 1 - mantissa_low_shift;\n\n    // Step 3: Convert to a decimal power base using 64-bit arithmetic.\n    MantissaInteval base10_range;\n    bool mantissa_low_is_trailing_zeros = false;\n    bool mantissa_out_is_trailing_zeros = false;\n    auto last_removed_digit = PowerBaseComputer::ToDecimalBase(\n        accept_bounds, mantissa_low_shift, base2_range, &base10_range,\n        &mantissa_low_is_trailing_zeros, &mantissa_out_is_trailing_zeros);\n\n    // Step 4: Find the shortest decimal representation in the interval of valid\n    // representations.\n    auto out = ShortestRepresentation(mantissa_low_is_trailing_zeros,\n                                      mantissa_out_is_trailing_zeros,\n                                      last_removed_digit,\n                                      accept_bounds, base10_range);\n    return out;\n  }\n};\n\n/*\n * \\brief Print the floating point number in base 10.\n */\nclass RyuPrinter {\n private:\n  static inline uint32_t OutputLength(const uint32_t v) noexcept(true) {\n    // Function precondition: v is not a 10-digit number.\n    // (f2s: 9 digits are sufficient for round-tripping.)\n    // (d2fixed: We print 9-digit blocks.)\n    static_assert(100000000 == Tens(8));\n    assert(v < Tens(9));\n    if (v >= Tens(8)) {\n      return 9;\n    }\n    if (v >= Tens(7)) {\n      return 8;\n    }\n    if (v >= Tens(6)) {\n      return 7;\n    }\n    if (v >= Tens(5)) {\n      return 6;\n    }\n    if (v >= Tens(4)) {\n      return 5;\n    }\n    if (v >= Tens(3)) {\n      return 4;\n    }\n    if (v >= Tens(2)) {\n      return 3;\n    }\n    if (v >= Tens(1)) {\n      return 2;\n    }\n    return 1;\n  }\n\n public:\n  static int32_t PrintBase10Float(UnsignedFloatBase10 v, const bool sign,\n                                  char *const result) noexcept(true) {\n    // Step 5: Print the decimal representation.\n    int index = 0;\n    if (sign) {\n      result[index++] = '-';\n    }\n\n    uint32_t output = v.mantissa;\n    const uint32_t out_length = OutputLength(output);\n\n    // Print the decimal digits.\n    // The following code is equivalent to:\n    // for (uint32_t i = 0; i < olength - 1; ++i) {\n    //   const uint32_t c = output % 10; output /= 10;\n    //   result[index + olength - i] = (char) ('0' + c);\n    // }\n    // result[index] = '0' + output % 10;\n    uint32_t i = 0;\n    while (output >= Tens(4)) {\n      const uint32_t c = output % Tens(4);\n      output /= Tens(4);\n      const uint32_t c0 = (c % 100) << 1;\n      const uint32_t c1 = (c / 100) << 1;\n      // This is used to speed up decimal digit generation by copying\n      // pairs of digits into the final output.\n      std::memcpy(result + index + out_length - i - 1, kItoaLut + c0, 2);\n      std::memcpy(result + index + out_length - i - 3, kItoaLut + c1, 2);\n      i += 4;\n    }\n    if (output >= 100) {\n      const uint32_t c = (output % 100) << 1;\n      output /= 100;\n      std::memcpy(result + index + out_length - i - 1, kItoaLut + c, 2);\n      i += 2;\n    }\n    if (output >= 10) {\n      const uint32_t c = output << 1;\n      // We can't use std::memcpy here: the decimal dot goes between these two\n      // digits.\n      result[index + out_length - i] = kItoaLut[c + 1];\n      result[index] = kItoaLut[c];\n    } else {\n      result[index] = static_cast<char>('0' + output);\n    }\n\n    // Print decimal point if needed.\n    if (out_length > 1) {\n      result[index + 1] = '.';\n      index += out_length + 1;\n    } else {\n      ++index;\n    }\n\n    // Print the exponent.\n    result[index++] = 'E';\n    int32_t exp = v.exponent + static_cast<int32_t>(out_length) - 1;\n    if (exp < 0) {\n      result[index++] = '-';\n      exp = -exp;\n    }\n\n    if (exp >= 10) {\n      std::memcpy(result + index, kItoaLut + 2 * exp, 2);\n      index += 2;\n    } else {\n      result[index++] = static_cast<char>('0' + exp);\n    }\n\n    return index;\n  }\n\n  static int32_t PrintSpecialFloat(const bool sign, UnsignedFloatBase2 f,\n                                   char *const result) noexcept(true) {\n    if (f.mantissa) {\n      std::memcpy(result, u8\"NaN\", 3);\n      return 3;\n    }\n    if (sign) {\n      result[0] = '-';\n    }\n    if (f.exponent) {\n      std::memcpy(result + sign, u8\"Infinity\", 8);\n      return sign + 8;\n    }\n    std::memcpy(result + sign, u8\"0E0\", 3);\n    return sign + 3;\n  }\n};\n\nint32_t ToCharsFloatImpl(float f, char * const result) {\n  // Step 1: Decode the floating-point number, and unify normalized and\n  // subnormal cases.\n  UnsignedFloatBase2 uf32;\n  bool sign;\n  IEEE754::Decode(f, &uf32, &sign);\n\n  // Case distinction; exit early for the easy cases.\n  if (uf32.Infinite() || uf32.Zero()) {\n    return RyuPrinter::PrintSpecialFloat(sign, uf32, result);\n  }\n\n  const UnsignedFloatBase10 v = PowerBaseComputer::Binary2Decimal(uf32);\n  const auto index = RyuPrinter::PrintBase10Float(v, sign, result);\n  return index;\n}\n\n\n// ====================== Integer ==================\n\n// This is an implementation for base 10 inspired by the one in libstdc++v3.  The general\n// scheme is by decomposing the value into multiple combination of base (which is 10) by\n// mod, until the value is lesser than 10, then last char is just char '0' (ASCII 48) plus\n// that value.  Other popular implementations can be found in RapidJson and libc++ (in\n// llvm-project), which uses the same general work flow with the same look up table, but\n// probably with better performance as they are more complicated.\nvoid ItoaUnsignedImpl(char *first, uint32_t length, uint64_t value) {\n  uint32_t position = length - 1;\n  while (value >= Tens(2)) {\n    auto const num = (value % Tens(2)) * 2;\n    value /= Tens(2);\n    first[position] = kItoaLut[num + 1];\n    first[position - 1] = kItoaLut[num];\n    position -= 2;\n  }\n  if (value >= 10) {\n    auto const num = value * 2;\n    first[0] = kItoaLut[num];\n    first[1] = kItoaLut[num + 1];\n  } else {\n    first[0]= '0' + value;\n  }\n}\n\nconstexpr uint32_t ShortestDigit10Impl(uint64_t value, uint32_t n) {\n  // Should trigger tail recursion optimization.\n  return value < 10 ? n :\n      (value < Tens(2) ? n + 1 :\n       (value < Tens(3) ? n + 2 :\n        (value < Tens(4) ? n + 3 :\n         ShortestDigit10Impl(value / Tens(4), n + 4))));\n}\n\nconstexpr uint32_t ShortestDigit10(uint64_t value) {\n  return ShortestDigit10Impl(value, 1);\n}\n\nto_chars_result ToCharsUnsignedImpl(char *first, char *last,\n                                    uint64_t const value) {\n  const uint32_t output_len = ShortestDigit10(value);\n  to_chars_result ret;\n  if (XGBOOST_EXPECT(std::distance(first, last) == 0, false)) {\n    ret.ec = std::errc::value_too_large;\n    ret.ptr = last;\n    return ret;\n  }\n\n  ItoaUnsignedImpl(first, output_len, value);\n  ret.ptr = first + output_len;\n  ret.ec = std::errc();\n  return ret;\n}\n\n/*\n * The parsing is also part of ryu.  As of writing, the implementation in ryu uses full\n * double table.  But here we optimize the table size with float table instead.  The\n * result is exactly the same.\n */\nfrom_chars_result FromCharFloatImpl(const char *buffer, const int len,\n                                    float *result) {\n  if (len == 0) {\n    return {buffer, std::errc::invalid_argument};\n  }\n  int32_t m10digits = 0;\n  int32_t e10digits = 0;\n  int32_t dot_ind = len;\n  int32_t e_ind = len;\n  uint32_t mantissa_b10 = 0;\n  int32_t exp_b10 = 0;\n  bool signed_mantissa = false;\n  bool signed_exp = false;\n  int32_t i = 0;\n  if (buffer[i] == '-') {\n    signed_mantissa = true;\n    i++;\n  }\n  for (; i < len; i++) {\n    char c = buffer[i];\n    if (c == '.') {\n      if (dot_ind != len) {\n        return {buffer + i, std::errc::invalid_argument};\n      }\n      dot_ind = i;\n      continue;\n    }\n    if ((c < '0') || (c > '9')) {\n      break;\n    }\n    if (m10digits >= 9) {\n      return {buffer + i, std::errc::result_out_of_range};\n    }\n    mantissa_b10 = 10 * mantissa_b10 + (c - '0');\n    if (mantissa_b10 != 0) {\n      m10digits++;\n    }\n  }\n\n  if (i < len && ((buffer[i] == 'e') || (buffer[i] == 'E'))) {\n    e_ind = i;\n    i++;\n    if (i < len && ((buffer[i] == '-') || (buffer[i] == '+'))) {\n      signed_exp = buffer[i] == '-';\n      i++;\n    }\n    for (; i < len; i++) {\n      char c = buffer[i];\n      if ((c < '0') || (c > '9')) {\n        return {buffer + i, std::errc::invalid_argument};\n      }\n      if (e10digits > 3) {\n        return {buffer + i, std::errc::result_out_of_range};\n      }\n      exp_b10 = 10 * exp_b10 + (c - '0');\n      if (exp_b10 != 0) {\n        e10digits++;\n      }\n    }\n  }\n  if (i < len) {\n    return {buffer + i, std::errc::invalid_argument};\n  }\n  if (signed_exp) {\n    exp_b10 = -exp_b10;\n  }\n  exp_b10 -= dot_ind < e_ind ? e_ind - dot_ind - 1 : 0;\n  if (mantissa_b10 == 0) {\n    *result = signed_mantissa ? -0.0f : 0.0f;\n    return {};\n  }\n\n  if ((m10digits + exp_b10 <= -46) || (mantissa_b10 == 0)) {\n    // Number is less than 1e-46, which should be rounded down to 0; return\n    // +/-0.0.\n    uint32_t ieee =\n        (static_cast<uint32_t>(signed_mantissa))\n        << (IEEE754::kFloatExponentBits + IEEE754::kFloatMantissaBits);\n    *result = BitCast<float>(ieee);\n    return {};\n  }\n  if (m10digits + exp_b10 >= 40) {\n    // Number is larger than 1e+39, which should be rounded to +/-Infinity.\n    *result = IEEE754::Infinity(signed_mantissa);\n    return {};\n  }\n\n  // Convert to binary float m2 * 2^e2, while retaining information about\n  // whether the conversion was exact (trailingZeros).\n  int32_t exp_b2;\n  uint32_t mantissa_b2;\n  bool trailing_zeros;\n  if (exp_b10 >= 0) {\n    // The length of m * 10^e in bits is:\n    //   log2(m10 * 10^e10) = log2(m10) + e10 log2(10) = log2(m10) + e10 + e10 *\n    //   log2(5)\n    //\n    // We want to compute the IEEE754::kFloatMantissaBits + 1 top-most bits (+1 for the\n    // implicit leading one in IEEE format). We therefore choose a binary output\n    // exponent of\n    //   log2(m10 * 10^e10) - (IEEE754::kFloatMantissaBits + 1).\n    //\n    // We use floor(log2(5^e10)) so that we get at least this many bits; better\n    // to have an additional bit than to not have enough bits.\n    exp_b2 = RyuPowLogUtils::FloorLog2(mantissa_b10) + exp_b10 +\n             RyuPowLogUtils::Log2Pow5(exp_b10) -\n             (IEEE754::kFloatMantissaBits + 1);\n\n    // We now compute [m10 * 10^e10 / 2^e2] = [m10 * 5^e10 / 2^(e2-e10)].\n    // To that end, we use the RyuPowLogUtils::kFloatPow5Bitcount table.\n    int j = exp_b2 - exp_b10 - RyuPowLogUtils::CeilLog2Pow5(exp_b10) +\n            RyuPowLogUtils::kFloatPow5Bitcount;\n    assert(j >= 0);\n    mantissa_b2 = RyuPowLogUtils::MulPow5divPow2(mantissa_b10, exp_b10, j);\n\n    // We also compute if the result is exact, i.e.,\n    //   [m10 * 10^e10 / 2^e2] == m10 * 10^e10 / 2^e2.\n    // This can only be the case if 2^e2 divides m10 * 10^e10, which in turn\n    // requires that the largest power of 2 that divides m10 + e10 is greater\n    // than e2. If e2 is less than e10, then the result must be exact. Otherwise\n    // we use the existing multipleOfPowerOf2 function.\n    trailing_zeros =\n        exp_b2 < exp_b10 ||\n        (exp_b2 - exp_b10 < 32 &&\n         RyuPowLogUtils::MultipleOfPowerOf2(mantissa_b10, exp_b2 - exp_b10));\n  } else {\n    exp_b2 = RyuPowLogUtils::FloorLog2(mantissa_b10) + exp_b10 -\n             RyuPowLogUtils::CeilLog2Pow5(-exp_b10) -\n             (IEEE754::kFloatMantissaBits + 1);\n\n    // We now compute [m10 * 10^e10 / 2^e2] = [m10 / (5^(-e10) 2^(e2-e10))].\n    int j = exp_b2 - exp_b10 + RyuPowLogUtils::CeilLog2Pow5(-exp_b10) - 1 +\n            RyuPowLogUtils::kFloatPow5InvBitcount;\n    mantissa_b2 = RyuPowLogUtils::MulPow5InvDivPow2(mantissa_b10, -exp_b10, j);\n\n    // We also compute if the result is exact, i.e.,\n    //   [m10 / (5^(-e10) 2^(e2-e10))] == m10 / (5^(-e10) 2^(e2-e10))\n    //\n    // If e2-e10 >= 0, we need to check whether (5^(-e10) 2^(e2-e10)) divides\n    // m10, which is the case iff pow5(m10) >= -e10 AND pow2(m10) >= e2-e10.\n    //\n    // If e2-e10 < 0, we have actually computed [m10 * 2^(e10 e2) / 5^(-e10)]\n    // above, and we need to check whether 5^(-e10) divides (m10 * 2^(e10-e2)),\n    // which is the case iff pow5(m10 * 2^(e10-e2)) = pow5(m10) >= -e10.\n    trailing_zeros =\n        (exp_b2 < exp_b10 ||\n         (exp_b2 - exp_b10 < 32 && RyuPowLogUtils::MultipleOfPowerOf2(\n                                       mantissa_b10, exp_b2 - exp_b10))) &&\n        RyuPowLogUtils::MultipleOfPowerOf5(mantissa_b10, -exp_b10);\n  }\n\n  // Compute the final IEEE exponent.\n  uint32_t f_e2 =\n      std::max(static_cast<int32_t>(0),\n               static_cast<int32_t>(exp_b2 + IEEE754::kFloatBias +\n                                    RyuPowLogUtils::FloorLog2(mantissa_b2)));\n\n  if (f_e2 > 0xfe) {\n    // Final IEEE exponent is larger than the maximum representable; return\n    // +/-Infinity.\n    *result = IEEE754::Infinity(signed_mantissa);\n    return {};\n  }\n\n  // We need to figure out how much we need to shift m2. The tricky part is that\n  // we need to take the final IEEE exponent into account, so we need to reverse\n  // the bias and also special-case the value 0.\n  int32_t shift = (f_e2 == 0 ? 1 : f_e2) - exp_b2 - IEEE754::kFloatBias -\n                  IEEE754::kFloatMantissaBits;\n  assert(shift >= 1);\n\n  // We need to round up if the exact value is more than 0.5 above the value we\n  // computed. That's equivalent to checking if the last removed bit was 1 and\n  // either the value was not just trailing zeros or the result would otherwise\n  // be odd.\n  //\n  // We need to update trailingZeros given that we have the exact output\n  // exponent ieee_e2 now.\n  trailing_zeros &= (mantissa_b2 & ((1u << (shift - 1)) - 1)) == 0;  // NOLINT\n  uint32_t lastRemovedBit = (mantissa_b2 >> (shift - 1)) & 1;\n  bool roundup = (lastRemovedBit != 0) &&\n                 (!trailing_zeros || (((mantissa_b2 >> shift) & 1) != 0));\n\n  uint32_t f_m2 = (mantissa_b2 >> shift) + roundup;\n  assert(f_m2 <= (1u << (IEEE754::kFloatMantissaBits + 1)));\n  f_m2 &= (1u << IEEE754::kFloatMantissaBits) - 1;\n  if (f_m2 == 0 && roundup) {\n    // Rounding up may overflow the mantissa.\n    // In this case we move a trailing zero of the mantissa into the exponent.\n    // Due to how the IEEE represents +/-Infinity, we don't need to check for\n    // overflow here.\n    f_e2++;\n  }\n  *result = IEEE754::Encode({f_m2, f_e2}, signed_mantissa);\n  return {};\n}\n}  // namespace detail\n}  // namespace xgboost\n"
  },
  {
    "path": "src/common/charconv.h",
    "content": "/*!\n * Copyright 2019 by XGBoost Contributors\n *\n * \\brief Implement `std::to_chars` and `std::from_chars` for float.  Only base 10 with\n *        scientific format is supported.  The implementation guarantees roundtrip\n *        reproducibility.\n */\n#ifndef XGBOOST_COMMON_CHARCONV_H_\n#define XGBOOST_COMMON_CHARCONV_H_\n\n#include <cstddef>\n#include <system_error>\n#include <iterator>\n#include <limits>\n\n#include \"xgboost/logging.h\"\n\nnamespace xgboost {\n\nstruct to_chars_result {  // NOLINT\n  char* ptr;\n  std::errc ec;\n};\n\nstruct from_chars_result {  // NOLINT\n  const char *ptr;\n  std::errc ec;\n};\n\nnamespace detail {\nint32_t ToCharsFloatImpl(float f, char * const result);\nto_chars_result ToCharsUnsignedImpl(char *first, char *last,\n                                    uint64_t const value);\nfrom_chars_result FromCharFloatImpl(const char *buffer, const int len,\n                                    float *result);\n}  // namespace detail\n\ntemplate <typename T>\nstruct NumericLimits;\n\ntemplate <> struct NumericLimits<float> {\n  // Unlike std::numeric_limit<float>::max_digits10, which represents the **minimum**\n  // length of base10 digits that are necessary to uniquely represent all distinct values.\n  // This value is used to represent the maximum length.  As sign bit occupies 1 character:\n  // sign + len(str(2^24)) + decimal point + `E` + sign + len(str(2^8)) + '\\0'\n  static constexpr size_t kToCharsSize = 16;\n};\n\ntemplate <> struct NumericLimits<int64_t> {\n  // From llvm libcxx: numeric_limits::digits10 returns value less on 1 than desired for\n  // unsigned numbers.  For example, for 1-byte unsigned value digits10 is 2 (999 can not\n  // be represented), so we need +1 here.\n  static constexpr size_t kToCharsSize =\n      std::numeric_limits<int64_t>::digits10 +\n      3;  // +1 for minus, +1 for digits10, +1 for '\\0' just to be safe.\n};\n\ninline to_chars_result to_chars(char  *first, char *last, float value) {  // NOLINT\n  if (XGBOOST_EXPECT(!(static_cast<size_t>(last - first) >=\n                       NumericLimits<float>::kToCharsSize),\n                     false)) {\n    return {first, std::errc::value_too_large};\n  }\n  auto index = detail::ToCharsFloatImpl(value, first);\n  to_chars_result ret;\n  ret.ptr = first + index;\n\n  if (XGBOOST_EXPECT(ret.ptr < last, true)) {\n    ret.ec = std::errc();\n  } else {\n    ret.ec =  std::errc::value_too_large;\n    ret.ptr = last;\n  }\n  return ret;\n}\n\ninline to_chars_result to_chars(char *first, char *last, int64_t value) { // NOLINT\n  if (XGBOOST_EXPECT(first == last, false)) {\n    return {first, std::errc::value_too_large};\n  }\n  // first write '-' and convert to unsigned, then write the rest.\n  if (value == 0) {\n    *first = '0';\n    return {std::next(first), std::errc()};\n  }\n  uint64_t unsigned_value = value;\n  if (value < 0) {\n    *first = '-';\n    std::advance(first, 1);\n    unsigned_value = static_cast<uint64_t>(~value) + static_cast<uint64_t>(1);\n  }\n  return detail::ToCharsUnsignedImpl(first, last, unsigned_value);\n}\n\ninline from_chars_result from_chars(const char *buffer, const char *end, // NOLINT\n                                    float &value) {  // NOLINT\n  from_chars_result res =\n      detail::FromCharFloatImpl(buffer, std::distance(buffer, end), &value);\n  return res;\n}\n}  // namespace xgboost\n\n#endif   // XGBOOST_COMMON_CHARCONV_H_\n"
  },
  {
    "path": "src/common/column_matrix.cc",
    "content": "/**\n * Copyright 2017-2023, XGBoost Contributors\n * \\brief Utility for fast column-wise access\n */\n#include \"column_matrix.h\"\n\n#include <algorithm>    // for transform\n#include <cstddef>      // for size_t\n#include <cstdint>      // for uint64_t, uint8_t\n#include <limits>       // for numeric_limits\n#include <type_traits>  // for remove_reference_t\n#include <vector>       // for vector\n\n#include \"../data/gradient_index.h\"  // for GHistIndexMatrix\n#include \"io.h\"                      // for AlignedResourceReadStream, AlignedFileWriteStream\n#include \"xgboost/base.h\"            // for bst_feaature_t\n#include \"xgboost/span.h\"            // for Span\n\nnamespace xgboost::common {\nvoid ColumnMatrix::InitStorage(GHistIndexMatrix const& gmat, double sparse_threshold) {\n  auto const nfeature = gmat.Features();\n  const size_t nrow = gmat.Size();\n  // identify type of each column\n  type_ = common::MakeFixedVecWithMalloc(nfeature, ColumnType{});\n\n  uint32_t max_val = std::numeric_limits<uint32_t>::max();\n  for (bst_feature_t fid = 0; fid < nfeature; ++fid) {\n    CHECK_LE(gmat.cut.Ptrs()[fid + 1] - gmat.cut.Ptrs()[fid], max_val);\n  }\n\n  bool all_dense_column = true;\n\n  std::vector<size_t> feature_counts(nfeature, 0);\n  gmat.GetFeatureCounts(feature_counts.data());\n\n  // classify features\n  for (bst_feature_t fid = 0; fid < nfeature; ++fid) {\n    if (static_cast<double>(feature_counts[fid]) < sparse_threshold * nrow) {\n      type_[fid] = kSparseColumn;\n      all_dense_column = false;\n    } else {\n      type_[fid] = kDenseColumn;\n    }\n  }\n\n  // want to compute storage boundary for each feature\n  // using variants of prefix sum scan\n  feature_offsets_ = common::MakeFixedVecWithMalloc(nfeature + 1, std::size_t{0});\n  size_t accum_index = 0;\n  feature_offsets_[0] = accum_index;\n  for (bst_feature_t fid = 1; fid < nfeature + 1; ++fid) {\n    if (type_[fid - 1] == kDenseColumn) {\n      accum_index += static_cast<size_t>(nrow);\n    } else {\n      accum_index += feature_counts[fid - 1];\n    }\n    feature_offsets_[fid] = accum_index;\n  }\n\n  SetTypeSize(gmat.MaxNumBinPerFeat());\n  auto storage_size =\n      feature_offsets_.back() * static_cast<std::underlying_type_t<BinTypeSize>>(bins_type_size_);\n\n  index_ = common::MakeFixedVecWithMalloc(storage_size, std::uint8_t{0});\n\n  if (!all_dense_column) {\n    row_ind_ = common::MakeFixedVecWithMalloc(feature_offsets_[nfeature], std::size_t{0});\n  }\n\n  // store least bin id for each feature\n  index_base_ = const_cast<uint32_t*>(gmat.cut.Ptrs().data());\n\n  any_missing_ = !gmat.IsDense();\n\n  missing_ = MissingIndicator{0, false};\n}\n\n// IO procedures for external memory.\nbool ColumnMatrix::Read(AlignedResourceReadStream* fi, uint32_t const* index_base) {\n  if (!common::ReadVec(fi, &index_)) {\n    return false;\n  }\n  if (!common::ReadVec(fi, &type_)) {\n    return false;\n  }\n  if (!common::ReadVec(fi, &row_ind_)) {\n    return false;\n  }\n  if (!common::ReadVec(fi, &feature_offsets_)) {\n    return false;\n  }\n\n  if (!common::ReadVec(fi, &missing_.storage)) {\n    return false;\n  }\n  missing_.InitView();\n\n  index_base_ = index_base;\n  if (!fi->Read(&bins_type_size_)) {\n    return false;\n  }\n  if (!fi->Read(&any_missing_)) {\n    return false;\n  }\n  return true;\n}\n\nstd::size_t ColumnMatrix::Write(AlignedFileWriteStream* fo) const {\n  std::size_t bytes{0};\n\n  bytes += common::WriteVec(fo, index_);\n  bytes += common::WriteVec(fo, type_);\n  bytes += common::WriteVec(fo, row_ind_);\n  bytes += common::WriteVec(fo, feature_offsets_);\n  bytes += common::WriteVec(fo, missing_.storage);\n\n  bytes += fo->Write(bins_type_size_);\n  bytes += fo->Write(any_missing_);\n\n  return bytes;\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "src/common/column_matrix.h",
    "content": "/**\n * Copyright 2017-2025, XGBoost Contributors\n * \\file column_matrix.h\n * \\brief Utility for fast column-wise access\n * \\author Philip Cho\n */\n\n#ifndef XGBOOST_COMMON_COLUMN_MATRIX_H_\n#define XGBOOST_COMMON_COLUMN_MATRIX_H_\n\n#include <algorithm>\n#include <cstddef>  // for size_t, byte\n#include <cstdint>  // for uint8_t\n#include <limits>\n#include <memory>\n#include <type_traits>  // for enable_if_t, is_same_v, is_signed_v\n\n#include \"../data/adapter.h\"  // for SparsePageAdapterBatch\n#include \"../data/entry.h\"    // for IsValidFunctor\n#include \"../data/gradient_index.h\"\n#include \"bitfield.h\"  // for RBitField8\n#include \"hist_util.h\"\n#include \"ref_resource_view.h\"  // for RefResourceView\n#include \"xgboost/base.h\"       // for bst_bin_t\n#include \"xgboost/span.h\"       // for Span\n\nnamespace xgboost::common {\nclass ColumnMatrix;\nclass AlignedFileWriteStream;\nclass AlignedResourceReadStream;\n\n/*! \\brief column type */\nenum ColumnType : std::uint8_t { kDenseColumn, kSparseColumn };\n\n/*! \\brief a column storage, to be used with ApplySplit. Note that each\n    bin id is stored as index[i] + index_base.\n    Different types of column index for each column allow\n    to reduce the memory usage. */\ntemplate <typename BinIdxType>\nclass Column {\n public:\n  static constexpr bst_bin_t kMissingId = -1;\n\n  Column(common::Span<const BinIdxType> index, bst_bin_t least_bin_idx)\n      : index_(index), index_base_(least_bin_idx) {}\n  virtual ~Column() = default;\n\n  [[nodiscard]] bst_bin_t GetGlobalBinIdx(size_t idx) const {\n    return index_base_ + static_cast<bst_bin_t>(index_.data()[idx]);\n  }\n\n  /* returns number of elements in column */\n  [[nodiscard]] size_t Size() const { return index_.size(); }\n\n private:\n  /* bin indexes in range [0, max_bins - 1] */\n  common::Span<BinIdxType const> index_;\n  /* bin index offset for specific feature */\n  bst_bin_t const index_base_;\n};\n\ntemplate <typename BinIdxT>\nclass SparseColumnIter : public Column<BinIdxT> {\n private:\n  using Base = Column<BinIdxT>;\n  /* indexes of rows */\n  common::Span<const size_t> row_ind_;\n  size_t idx_;\n\n  [[nodiscard]] size_t const* RowIndices() const { return row_ind_.data(); }\n\n public:\n  SparseColumnIter(common::Span<const BinIdxT> index, bst_bin_t least_bin_idx,\n                   common::Span<const size_t> row_ind, bst_idx_t first_row_idx)\n      : Base{index, least_bin_idx}, row_ind_(row_ind) {\n    // first_row_id is the first row in the leaf partition\n    const size_t* row_data = RowIndices();\n    const size_t column_size = this->Size();\n    // search first nonzero row with index >= rid_span.front()\n    // note that the input row partition is always sorted.\n    const size_t* p = std::lower_bound(row_data, row_data + column_size, first_row_idx);\n    // column_size if all missing\n    idx_ = p - row_data;\n  }\n  SparseColumnIter(SparseColumnIter const&) = delete;\n  SparseColumnIter(SparseColumnIter&&) = default;\n\n  [[nodiscard]] size_t GetRowIdx(size_t idx) const { return RowIndices()[idx]; }\n  bst_bin_t operator[](size_t rid) {\n    const size_t column_size = this->Size();\n    if (!((idx_) < column_size)) {\n      return this->kMissingId;\n    }\n    // find next non-missing row\n    while ((idx_) < column_size && GetRowIdx(idx_) < rid) {\n      ++(idx_);\n    }\n    if (((idx_) < column_size) && GetRowIdx(idx_) == rid) {\n      // non-missing row found\n      return this->GetGlobalBinIdx(idx_);\n    } else {\n      // at the end of column\n      return this->kMissingId;\n    }\n  }\n};\n\n/**\n * @brief Column stored as a dense vector. It might still contain missing values as\n *        indicated by the missing flags.\n */\ntemplate <typename BinIdxT, bool any_missing>\nclass DenseColumnIter : public Column<BinIdxT> {\n private:\n  using Base = Column<BinIdxT>;\n  /* flags for missing values in dense columns */\n  LBitField32 missing_flags_;\n  size_t feature_offset_;\n\n public:\n  explicit DenseColumnIter(common::Span<const BinIdxT> index, bst_bin_t index_base,\n                           LBitField32 missing_flags, size_t feature_offset)\n      : Base{index, index_base}, missing_flags_{missing_flags}, feature_offset_{feature_offset} {}\n  DenseColumnIter(DenseColumnIter const&) = delete;\n  DenseColumnIter(DenseColumnIter&&) = default;\n\n  [[nodiscard]] bool IsMissing(size_t ridx) const {\n    return missing_flags_.Check(feature_offset_ + ridx);\n  }\n\n  bst_bin_t operator[](size_t ridx) const {\n    if (any_missing) {\n      return IsMissing(ridx) ? this->kMissingId : this->GetGlobalBinIdx(ridx);\n    } else {\n      return this->GetGlobalBinIdx(ridx);\n    }\n  }\n};\n\n/**\n * @brief Column major matrix for gradient index on CPU.\n *\n *    This matrix contains both dense columns and sparse columns, the type of the column\n *    is controlled by the sparse threshold parameter. When the number of missing values\n *    in a column is below the threshold it's classified as dense column.\n */\nclass ColumnMatrix {\n  /**\n   * @brief A bit set for indicating whether an element in a dense column is missing.\n   */\n  struct MissingIndicator {\n    using BitFieldT = LBitField32;\n    using T = typename BitFieldT::value_type;\n\n    BitFieldT missing;\n    RefResourceView<T> storage;\n    static_assert(std::is_same_v<T, std::uint32_t>);\n\n    template <typename U>\n    [[nodiscard]] std::enable_if_t<!std::is_signed_v<U>, U> static InitValue(bool init) {\n      return init ? ~U{0} : U{0};\n    }\n\n    MissingIndicator() = default;\n    /**\n     * @param n_elements Size of the bit set\n     * @param init       Initialize the indicator to true or false.\n     */\n    MissingIndicator(std::size_t n_elements, bool init) {\n      auto m_size = missing.ComputeStorageSize(n_elements);\n      storage = common::MakeFixedVecWithMalloc(m_size, InitValue<T>(init));\n      this->InitView();\n    }\n    /** @brief Set the i^th element to be a valid element (instead of missing). */\n    void SetValid(typename LBitField32::index_type i) { missing.Clear(i); }\n    /** @brief assign the storage to the view. */\n    void InitView() {\n      missing = LBitField32{Span{storage.data(), static_cast<size_t>(storage.size())}};\n    }\n\n    void GrowTo(std::size_t n_elements, bool init) {\n      CHECK(storage.Resource()->Type() == ResourceHandler::kMalloc)\n          << \"[Internal Error]: Cannot grow the vector when external memory is used.\";\n      auto m_size = missing.ComputeStorageSize(n_elements);\n      CHECK_GE(m_size, storage.size());\n      if (m_size == storage.size()) {\n        return;\n      }\n      // grow the storage\n      auto resource = std::dynamic_pointer_cast<common::MallocResource>(storage.Resource());\n      CHECK(resource);\n      resource->Resize(m_size * sizeof(T), InitValue<std::byte>(init));\n      storage = RefResourceView<T>{resource->DataAs<T>(), m_size, resource};\n\n      this->InitView();\n    }\n  };\n\n  void InitStorage(GHistIndexMatrix const& gmat, double sparse_threshold);\n\n  template <typename ColumnBinT, typename BinT, typename RIdx>\n  void SetBinSparse(BinT bin_id, RIdx rid, bst_feature_t fid, ColumnBinT* local_index) {\n    if (type_[fid] == kDenseColumn) {\n      ColumnBinT* begin = &local_index[feature_offsets_[fid]];\n      begin[rid] = bin_id - index_base_[fid];\n      // not thread-safe with bit field.\n      // FIXME(jiamingy): We can directly assign kMissingId to the index to avoid missing\n      // flags.\n      missing_.SetValid(feature_offsets_[fid] + rid);\n    } else {\n      ColumnBinT* begin = &local_index[feature_offsets_[fid]];\n      begin[num_nonzeros_[fid]] = bin_id - index_base_[fid];\n      row_ind_[feature_offsets_[fid] + num_nonzeros_[fid]] = rid;\n      ++num_nonzeros_[fid];\n    }\n  }\n\n public:\n  // get number of features\n  [[nodiscard]] bst_feature_t GetNumFeature() const {\n    return static_cast<bst_feature_t>(type_.size());\n  }\n\n  ColumnMatrix() = default;\n  ColumnMatrix(GHistIndexMatrix const& gmat, double sparse_threshold) {\n    this->InitStorage(gmat, sparse_threshold);\n  }\n\n  /**\n   * @brief Initialize ColumnMatrix from GHistIndexMatrix with reference to the original\n   *        SparsePage.\n   */\n  void InitFromSparse(SparsePage const& page, const GHistIndexMatrix& gmat, double sparse_threshold,\n                      int32_t n_threads) {\n    auto batch = data::SparsePageAdapterBatch{page.GetView()};\n    this->InitStorage(gmat, sparse_threshold);\n    // ignore base row id here as we always has one column matrix for each sparse page.\n    this->PushBatch(n_threads, batch, std::numeric_limits<float>::quiet_NaN(), gmat, 0);\n  }\n\n  /**\n   * @brief Initialize ColumnMatrix from GHistIndexMatrix without reference to actual\n   *        data.\n   *\n   *    This function requires a binary search for each bin to get back the feature index\n   *    for those bins.\n   */\n  void InitFromGHist(Context const* ctx, GHistIndexMatrix const& gmat) {\n    auto n_threads = ctx->Threads();\n    if (!any_missing_) {\n      // row index is compressed, we need to dispatch it.\n      DispatchBinType(gmat.index.GetBinTypeSize(), [&, size = gmat.Size(), n_threads = n_threads,\n                                                    n_features = gmat.Features()](auto t) {\n        using RowBinIdxT = decltype(t);\n        SetIndexNoMissing(gmat.base_rowid, gmat.index.data<RowBinIdxT>(), size, n_features,\n                          n_threads);\n      });\n    } else {\n      SetIndexMixedColumns(gmat);\n    }\n  }\n\n  [[nodiscard]] bool IsInitialized() const { return !type_.empty(); }\n\n  /**\n   * \\brief Push batch of data for Quantile DMatrix support.\n   *\n   * \\param batch      Input data wrapped inside a adapter batch.\n   * \\param gmat       The row-major histogram index that contains index for ALL data.\n   * \\param base_rowid The beginning row index for current batch.\n   */\n  template <typename Batch>\n  void PushBatch(int32_t n_threads, Batch const& batch, float missing, GHistIndexMatrix const& gmat,\n                 size_t base_rowid) {\n    // pre-fill index_ for dense columns\n    if (!any_missing_) {\n      // row index is compressed, we need to dispatch it.\n\n      // use base_rowid from input parameter as gmat is a single matrix that contains all\n      // the histogram index instead of being only a batch.\n      DispatchBinType(gmat.index.GetBinTypeSize(), [&, size = batch.Size(), n_threads = n_threads,\n                                                    n_features = gmat.Features()](auto t) {\n        using RowBinIdxT = decltype(t);\n        SetIndexNoMissing(base_rowid, gmat.index.data<RowBinIdxT>(), size, n_features, n_threads);\n      });\n    } else {\n      SetIndexMixedColumns(base_rowid, batch, gmat, missing);\n    }\n  }\n\n  /* Set the number of bytes based on numeric limit of maximum number of bins provided by user */\n  void SetTypeSize(size_t max_bin_per_feat) {\n    if ((max_bin_per_feat - 1) <= static_cast<int>(std::numeric_limits<uint8_t>::max())) {\n      bins_type_size_ = kUint8BinsTypeSize;\n    } else if ((max_bin_per_feat - 1) <= static_cast<int>(std::numeric_limits<uint16_t>::max())) {\n      bins_type_size_ = kUint16BinsTypeSize;\n    } else {\n      bins_type_size_ = kUint32BinsTypeSize;\n    }\n  }\n\n  template <typename BinIdxType>\n  auto SparseColumn(bst_feature_t fidx, bst_idx_t first_row_idx) const {\n    const size_t feature_offset = feature_offsets_[fidx];  // to get right place for certain feature\n    const size_t column_size = feature_offsets_[fidx + 1] - feature_offset;\n    common::Span<const BinIdxType> bin_index = {\n        reinterpret_cast<const BinIdxType*>(&index_[feature_offset * bins_type_size_]),\n        column_size};\n    return SparseColumnIter<BinIdxType>(bin_index, index_base_[fidx],\n                                        {&row_ind_[feature_offset], column_size}, first_row_idx);\n  }\n\n  template <typename BinIdxType, bool any_missing>\n  auto DenseColumn(bst_feature_t fidx) const {\n    const size_t feature_offset = feature_offsets_[fidx];  // to get right place for certain feature\n    const size_t column_size = feature_offsets_[fidx + 1] - feature_offset;\n    common::Span<const BinIdxType> bin_index = {\n        reinterpret_cast<const BinIdxType*>(&index_[feature_offset * bins_type_size_]),\n        column_size};\n    return DenseColumnIter<BinIdxType, any_missing>{\n        bin_index, static_cast<bst_bin_t>(index_base_[fidx]), missing_.missing, feature_offset};\n  }\n\n  // all columns are dense column and has no missing value\n  // FIXME(jiamingy): We don't need a column matrix if there's no missing value.\n  template <typename RowBinIdxT>\n  void SetIndexNoMissing(bst_idx_t base_rowid, RowBinIdxT const* row_index, const size_t n_samples,\n                         const size_t n_features, int32_t n_threads) {\n    missing_.GrowTo(feature_offsets_[n_features], false);\n\n    DispatchBinType(bins_type_size_, [&](auto t) {\n      using ColumnBinT = decltype(t);\n      auto column_index = Span<ColumnBinT>{reinterpret_cast<ColumnBinT*>(index_.data()),\n                                           static_cast<size_t>(index_.size() / sizeof(ColumnBinT))};\n      ParallelFor(n_samples, n_threads, [&](auto rid) {\n        rid += base_rowid;\n        const size_t ibegin = rid * n_features;\n        const size_t iend = (rid + 1) * n_features;\n        for (size_t i = ibegin, j = 0; i < iend; ++i, ++j) {\n          const size_t idx = feature_offsets_[j];\n          // No need to add offset, as row index is compressed and stores the local index\n          column_index[idx + rid] = row_index[i];\n        }\n      });\n    });\n  }\n\n  /**\n   * \\brief Set column index for both dense and sparse columns\n   */\n  template <typename Batch>\n  void SetIndexMixedColumns(size_t base_rowid, Batch const& batch, const GHistIndexMatrix& gmat,\n                            float missing) {\n    auto n_features = gmat.Features();\n\n    missing_.GrowTo(feature_offsets_[n_features], true);\n    auto const* row_index = gmat.index.data<std::uint32_t>() + gmat.row_ptr[base_rowid];\n    if (num_nonzeros_.empty()) {\n      num_nonzeros_ = common::MakeFixedVecWithMalloc(n_features, std::size_t{0});\n    } else {\n      CHECK_EQ(num_nonzeros_.size(), n_features);\n    }\n\n    auto is_valid = data::IsValidFunctor{missing};\n\n    DispatchBinType(bins_type_size_, [&](auto t) {\n      using ColumnBinT = decltype(t);\n      ColumnBinT* local_index = reinterpret_cast<ColumnBinT*>(index_.data());\n      size_t const batch_size = batch.Size();\n      size_t k{0};\n      for (size_t rid = 0; rid < batch_size; ++rid) {\n        auto line = batch.GetLine(rid);\n        for (size_t i = 0; i < line.Size(); ++i) {\n          auto coo = line.GetElement(i);\n          if (is_valid(coo)) {\n            auto fid = coo.column_idx;\n            const uint32_t bin_id = row_index[k];\n            SetBinSparse(bin_id, rid + base_rowid, fid, local_index);\n            ++k;\n          }\n        }\n      }\n    });\n  }\n\n  /**\n   * \\brief Set column index for both dense and sparse columns, but with only GHistMatrix\n   *        available and requires a search for each bin.\n   */\n  void SetIndexMixedColumns(const GHistIndexMatrix& gmat) {\n    auto n_features = gmat.Features();\n\n    missing_ = MissingIndicator{feature_offsets_[n_features], true};\n    num_nonzeros_ = common::MakeFixedVecWithMalloc(n_features, std::size_t{0});\n\n    DispatchBinType(bins_type_size_, [&](auto t) {\n      using ColumnBinT = decltype(t);\n      ColumnBinT* local_index = reinterpret_cast<ColumnBinT*>(index_.data());\n      CHECK(this->any_missing_);\n      AssignColumnBinIndex(gmat,\n                           [&](auto bin_idx, std::size_t, std::size_t ridx, bst_feature_t fidx) {\n                             SetBinSparse(bin_idx, ridx, fidx, local_index);\n                           });\n    });\n  }\n\n  [[nodiscard]] BinTypeSize GetTypeSize() const { return bins_type_size_; }\n  [[nodiscard]] auto GetColumnType(bst_feature_t fidx) const { return type_[fidx]; }\n\n  // And this returns part of state\n  [[nodiscard]] bool AnyMissing() const { return any_missing_; }\n\n  // IO procedures for external memory.\n  [[nodiscard]] bool Read(AlignedResourceReadStream* fi, uint32_t const* index_base);\n  [[nodiscard]] std::size_t Write(AlignedFileWriteStream* fo) const;\n  [[nodiscard]] MissingIndicator const& Missing() const { return missing_; }\n\n private:\n  RefResourceView<std::uint8_t> index_;\n\n  RefResourceView<ColumnType> type_;\n  /** @brief indptr of a CSC matrix. */\n  RefResourceView<std::size_t> row_ind_;\n  /** @brief indicate where each column's index and row_ind is stored. */\n  RefResourceView<std::size_t> feature_offsets_;\n  /** @brief The number of nnz of each column. */\n  RefResourceView<std::size_t> num_nonzeros_;\n\n  // index_base_[fid]: least bin id for feature fid\n  std::uint32_t const* index_base_;\n\n  MissingIndicator missing_;\n\n  BinTypeSize bins_type_size_;\n  bool any_missing_;\n};\n}  // namespace xgboost::common\n#endif  // XGBOOST_COMMON_COLUMN_MATRIX_H_\n"
  },
  {
    "path": "src/common/common.cc",
    "content": "/**\n * Copyright 2015-2026, XGBoost Contributors\n */\n#include \"common.h\"\n\n#include <cmath>    // for pow\n#include <cstdint>  // for uint8_t\n#include <cstdio>   // for snprintf, size_t\n#include <string>   // for string\n#include <utility>  // for pair\n\n#include \"xgboost/string_view.h\"  // for StringView\n\nnamespace xgboost::common {\nvoid EscapeU8(std::string const &string, std::string *p_buffer) {\n  auto &buffer = *p_buffer;\n  for (size_t i = 0; i < string.length(); i++) {\n    const auto ch = string[i];\n    if (ch == '\\\\') {\n      if (i < string.size() && string[i + 1] == 'u') {\n        buffer += \"\\\\\";\n      } else {\n        buffer += \"\\\\\\\\\";\n      }\n    } else if (ch == '\"') {\n      buffer += \"\\\\\\\"\";\n    } else if (ch == '\\b') {\n      buffer += \"\\\\b\";\n    } else if (ch == '\\f') {\n      buffer += \"\\\\f\";\n    } else if (ch == '\\n') {\n      buffer += \"\\\\n\";\n    } else if (ch == '\\r') {\n      buffer += \"\\\\r\";\n    } else if (ch == '\\t') {\n      buffer += \"\\\\t\";\n    } else if (static_cast<uint8_t>(ch) <= 0x1f) {\n      // Unit separator\n      char buf[8];\n      snprintf(buf, sizeof buf, \"\\\\u%04x\", ch);\n      buffer += buf;\n    } else {\n      buffer += ch;\n    }\n  }\n}\n\nstd::string HumanMemUnit(std::size_t n_bytes) {\n  auto n_bytes_f64 = static_cast<double>(n_bytes);\n  double constexpr k1024 = 1024.0;\n  using P = std::pair<std::int32_t, StringView>;\n  std::stringstream ss;\n  for (auto pu : {P{3, \"GB\"}, P{2, \"MB\"}, P{1, \"KB\"}}) {\n    auto const [power, unit] = pu;  // NOLINT\n    if (n_bytes_f64 >= (std::pow(k1024, power))) {\n      ss << (n_bytes_f64 / std::pow(k1024, power)) << unit;\n      return ss.str();\n    }\n  }\n  ss << n_bytes_f64 << \"B\";\n  return ss.str();\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "src/common/common.cu",
    "content": "/**\n * Copyright 2018-2024, XGBoost contributors\n */\n#include <thrust/system/cuda/error.h>\n#include <thrust/system_error.h>\n\n#include \"common.h\"\n\nnamespace dh {\nvoid ThrowOnCudaError(cudaError_t code, const char *file, int line) {\n  if (code != cudaSuccess) {\n    std::string f;\n    if (file != nullptr) {\n      f = file;\n    }\n    LOG(FATAL) << thrust::system_error(code, thrust::cuda_category(),\n                                       f + \": \" + std::to_string(line))\n                      .what();\n  }\n}\n}  // namespace dh\n"
  },
  {
    "path": "src/common/common.h",
    "content": "/**\n * Copyright 2015-2026, XGBoost Contributors\n * \\file common.h\n * \\brief Common utilities\n */\n#ifndef XGBOOST_COMMON_COMMON_H_\n#define XGBOOST_COMMON_COMMON_H_\n\n#include <cmath>        // for ceil\n#include <cstddef>      // for size_t\n#include <cstdint>      // for int32_t, int64_t\n#include <sstream>      // for istringstream\n#include <string>       // for string, basic_string, getline, char_traits\n#include <string_view>  // for string_view\n#include <vector>       // for vector\n\n#include \"xgboost/base.h\"     // for XGBOOST_DEVICE\n#include \"xgboost/logging.h\"  // for LOG, LOG_FATAL, LogMessageFatal\n\n// magic to define functions based on the compiler.\n#if defined(__CUDACC__)\n\n#define WITH_CUDA() true\n\n#else\n\n#define WITH_CUDA() false\n\n#endif  // defined(__CUDACC__)\n\n#if defined(XGBOOST_USE_CUDA)\n#include <cuda_runtime_api.h>\n#endif\n\nnamespace dh {\n#if defined(XGBOOST_USE_CUDA)\n/*\n * Error handling functions\n */\nvoid ThrowOnCudaError(cudaError_t code, const char *file, int line);\n\n#define safe_cuda(ans) ThrowOnCudaError((ans), __FILE__, __LINE__)\n\n#endif  // defined(XGBOOST_USE_CUDA)\n}  // namespace dh\n\nnamespace xgboost::common {\n/*!\n * \\brief Split a string by delimiter\n * \\param s String to be split.\n * \\param delim The delimiter.\n */\n[[nodiscard]] inline std::vector<std::string> Split(std::string const &s, char delim) {\n  std::string item;\n  std::istringstream is{s};\n  std::vector<std::string> ret;\n  while (std::getline(is, item, delim)) {\n    ret.push_back(item);\n  }\n  return ret;\n}\n\n[[nodiscard]] inline std::vector<std::string_view> Split(std::string_view s, char delim) {\n  std::size_t cur = 0;\n  std::vector<std::string_view> ret;\n  while ((cur = s.find_first_of(delim)) != std::string_view::npos) {\n    auto segment = s.substr(0, cur);\n    ret.push_back(segment);\n    s = s.substr(cur + 1);\n  }\n  if (!s.empty()) {\n    ret.push_back(s);\n  }\n  return ret;\n}\n\n// Trims leading whitespace from a string\n[[nodiscard]] inline std::string_view TrimFirst(std::string_view const &str) {\n  if (str.empty()) {\n    return str;\n  }\n  auto first = str.find_first_not_of(\" \\t\\n\\r\");\n  if (first == std::string_view::npos) {\n    return {};\n  }\n  return str.substr(first);\n}\n\n[[nodiscard]] inline std::string_view TrimLast(std::string_view const &str) {\n  if (str.empty()) {\n    return str;\n  }\n  auto last = str.find_last_not_of(\" \\t\\n\\r\");\n  if (last == std::string_view::npos) {\n    return {};\n  }\n  return str.substr(0, last + 1);\n}\n\n/**\n * @brief Add escapes for a UTF-8 string.\n */\nvoid EscapeU8(std::string const &string, std::string *p_buffer);\n\n/**\n * @brief Add escapes for a UTF-8 string with newly created buffer as return.\n */\ninline std::string EscapeU8(std::string const &str) {\n  std::string buffer;\n  EscapeU8(str, &buffer);\n  return buffer;\n}\n\ntemplate <typename T>\nXGBOOST_DEVICE T Min(T a, T b) {\n  return a > b ? b : a;\n}\n\ntemplate <typename T>\nXGBOOST_DEVICE T Max(T a, T b) {\n  return a < b ? b : a;\n}\n\ntemplate <typename T1, typename T2>\nXGBOOST_DEVICE T1 DivRoundUp(const T1 a, const T2 b) {\n  return static_cast<T1>(std::ceil(static_cast<double>(a) / b));\n}\n\n/*\n * Range iterator\n */\nclass Range {\n public:\n  using DifferenceType = int64_t;\n\n  class Iterator {\n    friend class Range;\n\n   public:\n    XGBOOST_DEVICE DifferenceType operator*() const { return i_; }\n    XGBOOST_DEVICE const Iterator &operator++() {\n      i_ += step_;\n      return *this;\n    }\n    XGBOOST_DEVICE Iterator operator++(int) {\n      Iterator res{*this};\n      i_ += step_;\n      return res;\n    }\n\n    XGBOOST_DEVICE bool operator==(const Iterator &other) const { return i_ >= other.i_; }\n    XGBOOST_DEVICE bool operator!=(const Iterator &other) const { return i_ < other.i_; }\n\n    XGBOOST_DEVICE void Step(DifferenceType s) { step_ = s; }\n\n   protected:\n    XGBOOST_DEVICE explicit Iterator(DifferenceType start) : i_(start) {}\n    XGBOOST_DEVICE explicit Iterator(DifferenceType start, DifferenceType step)\n        : i_{start}, step_{step} {}\n\n   private:\n    int64_t i_;\n    DifferenceType step_ = 1;\n  };\n\n  XGBOOST_DEVICE Iterator begin() const { return begin_; }  // NOLINT\n  XGBOOST_DEVICE Iterator end() const { return end_; }      // NOLINT\n\n  XGBOOST_DEVICE Range(DifferenceType begin, DifferenceType end) : begin_(begin), end_(end) {}\n  XGBOOST_DEVICE Range(DifferenceType begin, DifferenceType end, DifferenceType step)\n      : begin_(begin, step), end_(end) {}\n\n  XGBOOST_DEVICE bool operator==(const Range &other) const {\n    return *begin_ == *other.begin_ && *end_ == *other.end_;\n  }\n  XGBOOST_DEVICE bool operator!=(const Range &other) const { return !(*this == other); }\n\n  XGBOOST_DEVICE void Step(DifferenceType s) { begin_.Step(s); }\n\n private:\n  Iterator begin_;\n  Iterator end_;\n};\n\ninline void AssertGPUSupport() {\n#ifndef XGBOOST_USE_CUDA\n  LOG(FATAL) << \"XGBoost version not compiled with GPU support.\";\n#endif  // XGBOOST_USE_CUDA\n}\n\ninline void AssertNvCompSupport() {\n#ifndef XGBOOST_USE_NVCOMP\n  LOG(FATAL) << \"XGBoost is not compiled with NVCOMP support.\";\n#endif  // XGBOOST_USE_CUDA\n}\n\ninline void AssertNCCLSupport() {\n#if !defined(XGBOOST_USE_NCCL)\n  LOG(FATAL) << \"XGBoost version not compiled with NCCL support.\";\n#endif  // !defined(XGBOOST_USE_NCCL)\n}\n\ninline void AssertSYCLSupport() {\n#ifndef XGBOOST_USE_SYCL\n  LOG(FATAL) << \"XGBoost version not compiled with SYCL support.\";\n#endif  // XGBOOST_USE_SYCL\n}\n\n/**\n * @brief Last index of a group in a CSR style of index pointer.\n */\ntemplate <typename Indexable>\nXGBOOST_DEVICE size_t LastOf(size_t group, Indexable const &indptr) {\n  return indptr[group + 1] - 1;\n}\n\n// Convert the number of bytes to a human readable unit.\nstd::string HumanMemUnit(std::size_t n_bytes);\n}  // namespace xgboost::common\n#endif  // XGBOOST_COMMON_COMMON_H_\n"
  },
  {
    "path": "src/common/compressed_iterator.h",
    "content": "/**\n * Copyright 2017-2025, XGBoost Contributors\n * \\file compressed_iterator.h\n */\n#pragma once\n#include <algorithm>  // for max\n#include <cmath>      // for ceil, log2\n#include <cstddef>    // for size_t\n#include <cstdint>    // for uint32_t\n\n#include \"common.h\"\n#include \"xgboost/base.h\"      // for XGBOOST_RESTRICT\n#include \"xgboost/byteswap.h\"  // for ByteSwap\n\n#ifdef __CUDACC__\n#include \"device_helpers.cuh\"\n#endif  // __CUDACC__\n\nnamespace xgboost::common {\nusing CompressedByteT = unsigned char;\n\nnamespace detail {\ninline void SetBit(CompressedByteT *byte, int bit_idx) {\n  *byte |= 1 << bit_idx;\n}\ntemplate <typename T>\ninline T CheckBit(const T &byte, int bit_idx) {\n  return byte & (1 << bit_idx);\n}\ninline void ClearBit(CompressedByteT *byte, int bit_idx) {\n  *byte &= ~(1 << bit_idx);\n}\ninline constexpr int kPadding = 8;  // Assign padding so we can read slightly off\n                                    // the beginning of the array\n\n// The number of bits required to represent a given unsigned range\ninline XGBOOST_DEVICE std::uint32_t SymbolBits(std::size_t n_symbols) {\n  std::uint32_t bits = std::ceil(log2(static_cast<double>(n_symbols)));\n  return common::Max(bits, std::uint32_t{1});\n}\n\n// The alignment is assumed to be power of 2.\ntemplate <typename T>\nXGBOOST_HOST_DEV_INLINE CompressedByteT const *AlignDown(T const *ptr, std::uint32_t alignment) {\n  return reinterpret_cast<CompressedByteT const *>(reinterpret_cast<std::uintptr_t>(ptr) &\n                                                   ~std::uintptr_t{alignment - 1});\n}\n\nstruct PaddedPtr {\n  CompressedByteT const *XGBOOST_RESTRICT ptr;\n  std::int32_t head_padding;\n};\n\n// Create an aligned pointer with head padding.\ntemplate <typename T>\nXGBOOST_DEVICE auto MakePaddedPtr(T const *XGBOOST_RESTRICT ptr, std::uint32_t alignment) {\n  auto base = AlignDown(ptr, alignment);\n  return PaddedPtr{\n      base, static_cast<std::int32_t>(reinterpret_cast<CompressedByteT const *>(ptr) - base)};\n}\n\n// Vector load, load a single 64-bit unsigned integer with 2 32-bit loads. Input ptr must\n// be correctly aligned first.\ntemplate <typename T>\nXGBOOST_DEVICE [[nodiscard]] std::uint64_t Load64u(T const *XGBOOST_RESTRICT ptr) {\n  std::uint64_t u64 = 0;\n  auto out_ptr = reinterpret_cast<std::uint32_t *>(&u64);\n  // base ptr in uint32\n  auto in_ptr = reinterpret_cast<std::uint32_t const *>(ptr);\n  // 2 vector loads for 8 bytes.\n  out_ptr[0] = in_ptr[0];\n  out_ptr[1] = in_ptr[1];\n  return u64;\n}\n}  // namespace detail\n\n/**\n * \\class CompressedBufferWriter\n *\n * \\brief Writes bit compressed symbols to a memory buffer. Use\n * CompressedIterator to read symbols back from buffer. Currently limited to a\n * maximum symbol size of 28 bits.\n *\n * \\author  Rory\n * \\date  7/9/2017\n */\n\nclass CompressedBufferWriter {\n  std::size_t symbol_bits_;\n\n public:\n  XGBOOST_DEVICE explicit CompressedBufferWriter(std::size_t num_symbols)\n      : symbol_bits_{detail::SymbolBits(num_symbols)} {}\n\n  /**\n   * \\fn  static size_t CompressedBufferWriter::CalculateBufferSize(int\n   * num_elements, int num_symbols)\n   *\n   * \\brief Calculates number of bytes required for a given number of elements\n   * and a symbol range.\n   *\n   * \\author  Rory\n   * \\date  7/9/2017\n   *\n   * \\param num_elements  Number of elements.\n   * \\param num_symbols   Max number of symbols (alphabet size)\n   *\n   * \\return  The calculated buffer size.\n   */\n  static size_t CalculateBufferSize(size_t num_elements, size_t num_symbols) {\n    constexpr int kBitsPerByte = 8;\n    size_t compressed_size = static_cast<size_t>(std::ceil(\n        static_cast<double>(detail::SymbolBits(num_symbols) * num_elements) / kBitsPerByte));\n    // Handle atomicOr where input must be unsigned int, hence 4 bytes aligned.\n    size_t ret = std::ceil(static_cast<double>(compressed_size + detail::kPadding) /\n                           static_cast<double>(sizeof(std::uint32_t))) *\n                 sizeof(std::uint32_t);\n    // Need at least 5 bytes for the reader\n    return std::max(ret, static_cast<std::size_t>(detail::kPadding + 1));\n  }\n\n  template <typename T>\n  void WriteSymbol(CompressedByteT *buffer, T symbol, size_t offset) {\n    constexpr std::int32_t kBitsPerByte = 8;\n\n    for (size_t i = 0; i < symbol_bits_; i++) {\n      size_t byte_idx = ((offset + 1) * symbol_bits_ - (i + 1)) / kBitsPerByte;\n      byte_idx += detail::kPadding;\n      size_t bit_idx = ((kBitsPerByte + i) - ((offset + 1) * symbol_bits_)) % kBitsPerByte;\n\n      if (detail::CheckBit(symbol, i)) {\n        detail::SetBit(&buffer[byte_idx], bit_idx);\n      } else {\n        detail::ClearBit(&buffer[byte_idx], bit_idx);\n      }\n    }\n  }\n\n#ifdef __CUDACC__\n  __device__ void AtomicWriteSymbol\n    (CompressedByteT* buffer, uint64_t symbol, size_t offset) {\n    size_t ibit_start = offset * symbol_bits_;\n    size_t ibit_end = (offset + 1) * symbol_bits_ - 1;\n    size_t ibyte_start = ibit_start / 8, ibyte_end = ibit_end / 8;\n\n    symbol <<= 7 - ibit_end % 8;\n    for (ptrdiff_t ibyte = ibyte_end; ibyte >= static_cast<ptrdiff_t>(ibyte_start); --ibyte) {\n      dh::AtomicOrByte(reinterpret_cast<unsigned int*>(buffer + detail::kPadding),\n                       ibyte, symbol & 0xff);\n      symbol >>= 8;\n    }\n  }\n#endif  // __CUDACC__\n\n  template <typename IterT>\n  void Write(CompressedByteT *buffer, IterT input_begin, IterT input_end) {\n    uint64_t tmp = 0;\n    size_t stored_bits = 0;\n    const size_t max_stored_bits = 64 - symbol_bits_;\n    size_t buffer_position = detail::kPadding;\n    const size_t num_symbols = input_end - input_begin;\n    for (size_t i = 0; i < num_symbols; i++) {\n      typename std::iterator_traits<IterT>::value_type symbol = input_begin[i];\n      if (stored_bits > max_stored_bits) {\n        // Eject only full bytes\n        size_t tmp_bytes = stored_bits / 8;\n        for (size_t j = 0; j < tmp_bytes; j++) {\n          buffer[buffer_position] = static_cast<CompressedByteT>(\n              tmp >> (stored_bits - (j + 1) * 8));\n          buffer_position++;\n        }\n        stored_bits -= tmp_bytes * 8;\n        tmp &= (1 << stored_bits) - 1;\n      }\n      // Store symbol\n      tmp <<= symbol_bits_;\n      tmp |= symbol;\n      stored_bits += symbol_bits_;\n    }\n\n    // Eject all bytes\n    int tmp_bytes =\n        static_cast<int>(std::ceil(static_cast<float>(stored_bits) / 8));\n    for (int j = 0; j < tmp_bytes; j++) {\n      int shift_bits = static_cast<int>(stored_bits) - (j + 1) * 8;\n      if (shift_bits >= 0) {\n        buffer[buffer_position] =\n            static_cast<CompressedByteT>(tmp >> shift_bits);\n      } else {\n        buffer[buffer_position] =\n            static_cast<CompressedByteT>(tmp << std::abs(shift_bits));\n      }\n      buffer_position++;\n    }\n  }\n};\n\n/**\n * @brief Read symbols from a bit compressed memory buffer. Usable on device and host.\n *\n * @author  Rory\n *\n * @tparam  T          Type of the symbols.\n */\ntemplate <typename T>\nclass CompressedIterator {\n public:\n  typedef T value_type;                     // NOLINT\n  typedef value_type *pointer;              // NOLINT\n  typedef value_type reference;             // NOLINT\n\n private:\n  CompressedByteT const *XGBOOST_RESTRICT buffer_{nullptr};\n  std::uint32_t const symbol_bits_{0};\n\n  static_assert(sizeof(T) <= sizeof(std::uint32_t));\n  static_assert(detail::kPadding >= std::alignment_of_v<std::uint32_t>);\n\n public:\n  CompressedIterator() = default;\n  CompressedIterator(CompressedByteT const *XGBOOST_RESTRICT buffer, bst_idx_t n_symbols)\n      : buffer_{buffer}, symbol_bits_{detail::SymbolBits(n_symbols)} {\n#if !defined(DMLC_LITTLE_ENDIAN) || DMLC_LITTLE_ENDIAN != 1\n    LOG(FATAL) << \"Not implemented for big endian\";\n#endif\n  }\n\n  XGBOOST_DEVICE reference operator[](std::size_t idx) const {\n    constexpr std::int32_t kBitsPerByte = 8;\n    // Read 5 bytes - the maximum we will need assuming symbols fit in a 32bit int.\n    constexpr std::int32_t kBytes = 5;\n\n    std::size_t start_bit_idx = ((idx + 1) * symbol_bits_ - 1);\n    std::size_t start_byte_idx = start_bit_idx / kBitsPerByte;\n    start_byte_idx += detail::kPadding;\n\n    /**\n     * The following load is equivalent to:\n     *\n     * std::uint64_t tmp = static_cast<std::uint64_t>(buffer_[start_byte_idx - 4]) << 32 |\n     *                     static_cast<std::uint64_t>(buffer_[start_byte_idx - 3]) << 24 |\n     *                     static_cast<std::uint64_t>(buffer_[start_byte_idx - 2]) << 16 |\n     *                     static_cast<std::uint64_t>(buffer_[start_byte_idx - 1]) << 8 |\n     *                     buffer_[start_byte_idx];\n     *\n     * The above snippet loads 5 bytes from the buffer, and performs a byte swap within\n     * the loaded 5 bytes. We use a vector load to reduce the pressure on the LSU.\n     */\n\n    // Pointer to the first byte.\n    auto beg_ptr = buffer_ + start_byte_idx - (kBytes - 1);\n    // Align the pointer for vector load.\n    auto [ptr, head_padding] = detail::MakePaddedPtr(beg_ptr, std::alignment_of_v<std::uint32_t>);\n    // Load 8 bytes, we will use 5 of them.\n    std::uint64_t tmp = detail::Load64u(ptr);\n    // tail_padding = 8 - 5 - head_padding\n    std::int32_t tail_padding_bits = (sizeof(tmp) - kBytes - head_padding) * kBitsPerByte;\n    // Unsigned logical shift. Knock out the unneeded bits loaded by the vector load. We\n    // assume little endian here.\n    tmp = ByteSwap(tmp << tail_padding_bits);\n\n    // Knock out the unneeded bits from the right\n    std::int32_t bit_shift = (kBitsPerByte - ((idx + 1) * symbol_bits_)) % kBitsPerByte;\n    tmp >>= bit_shift;\n    // Take exactly symbol_bits_ number of bits by masking off unneeded bits.\n    std::uint64_t mask = (static_cast<std::uint64_t>(1) << symbol_bits_) - 1;\n    return static_cast<T>(tmp & mask);\n  }\n};\n\n/**\n * @brief A compressed iterator with two buffers for the underlying storage.\n *\n * This accessor is significantly slower than the single buffer one due to pipeline\n * stalling and should not be used as default. Pre-calculating the buffer selection\n * indicator can help mitigate it. But we only use this iterator for external memory with\n * direct memory access, which is slow anyway.\n *\n * Use the single buffer one as a reference for how it works.\n */\ntemplate <typename OutT>\nclass DoubleCompressedIter {\n public:\n  using value_type = OutT;                       // NOLINT\n  using pointer = value_type *;                  // NOLINT\n  using reference = value_type;                  // NOLINT\n\n private:\n  using BufT = CompressedByteT const *;\n  BufT XGBOOST_RESTRICT buf0_{nullptr};\n  BufT XGBOOST_RESTRICT buf1_{nullptr};\n  bst_idx_t const n0_{0};  // Size of the first buffer in bytes.\n  std::uint32_t const symbol_bits_{0};\n\n public:\n  DoubleCompressedIter() = default;\n  DoubleCompressedIter(CompressedByteT const *XGBOOST_RESTRICT buf0, std::size_t n0_bytes,\n                       CompressedByteT const *XGBOOST_RESTRICT buf1, bst_idx_t n_symbols)\n      : buf0_{buf0}, buf1_{buf1}, n0_{n0_bytes}, symbol_bits_{detail::SymbolBits(n_symbols)} {}\n\n  XGBOOST_DEVICE reference operator[](std::size_t idx) const {\n    constexpr std::int32_t kBitsPerByte = 8;\n\n    std::size_t start_bit_idx = ((idx + 1) * symbol_bits_ - 1);\n    std::size_t start_byte_idx = start_bit_idx / kBitsPerByte;\n    start_byte_idx += detail::kPadding;\n\n    std::uint64_t tmp;\n\n    if (start_byte_idx >= this->n0_ && (start_byte_idx - 4) < this->n0_) {\n      // Access between two buffers.\n      auto getv = [&](auto shift) {\n        auto shifted = start_byte_idx - shift;\n        bool ind = (shifted >= n0_);  // indicator for which buffer to read\n        // Pick the buffer to read\n        auto const *XGBOOST_RESTRICT buf = ind ? buf1_ : buf0_;\n        shifted -= ind * n0_;\n        return static_cast<std::uint64_t>(buf[shifted]);\n      };\n      // Read 5 bytes - the maximum we will need\n      tmp = static_cast<std::uint64_t>(buf0_[start_byte_idx - 4]) << 32 | getv(3) << 24 |\n            getv(2) << 16 | getv(1) << 8 | static_cast<std::uint64_t>(buf1_[start_byte_idx - n0_]);\n    } else {\n      // Access one of the buffers\n      bool ind = start_byte_idx >= n0_;\n      // Pick the buffer to read\n      auto const *XGBOOST_RESTRICT buf = reinterpret_cast<CompressedByteT const *>(\n          (!ind) * reinterpret_cast<std::uintptr_t>(buf0_) +\n          ind * reinterpret_cast<std::uintptr_t>(buf1_));\n      // shifted start_byte_idx for buffer-local indexing.\n      auto shifted = start_byte_idx - n0_ * ind;\n\n      // Read 5 bytes - the maximum we will need\n\n      // We don't have vector load here as we might create out-of-bound access due to down\n      // alignment for the second buffer.\n      tmp = static_cast<std::uint64_t>(buf[shifted - 4]) << 32 |\n            static_cast<std::uint64_t>(buf[shifted - 3]) << 24 |\n            static_cast<std::uint64_t>(buf[shifted - 2]) << 16 |\n            static_cast<std::uint64_t>(buf[shifted - 1]) << 8 | buf[shifted];\n    }\n\n    // Knock out the unneeded bits from the right\n    std::int32_t bit_shift = (kBitsPerByte - ((idx + 1) * symbol_bits_)) % kBitsPerByte;\n    tmp >>= bit_shift;\n    // Take exactly symbol_bits_ number of bits by masking off unneeded bits.\n    std::uint64_t mask = (static_cast<std::uint64_t>(1) << symbol_bits_) - 1;\n    return static_cast<OutT>(tmp & mask);\n  }\n};\n}  // namespace xgboost::common\n"
  },
  {
    "path": "src/common/cuda_context.cuh",
    "content": "/**\n * Copyright 2022-2026, XGBoost Contributors\n */\n#ifndef XGBOOST_COMMON_CUDA_CONTEXT_CUH_\n#define XGBOOST_COMMON_CUDA_CONTEXT_CUH_\n#include <thrust/execution_policy.h>\n\n#include \"cuda_stream.h\"      // for DefaultStream\n#include \"device_vector.cuh\"  // for XGBCachingDeviceAllocator, XGBDeviceAllocator\n\nnamespace xgboost {\nstruct CUDAContext {\n private:\n  dh::XGBCachingDeviceAllocator<char> caching_alloc_;\n  dh::XGBDeviceAllocator<char> alloc_;\n\n public:\n  /**\n   * @brief Caching thrust policy.\n   */\n  auto CTP() const {\n    return thrust::cuda::par_nosync(caching_alloc_).on(curt::DefaultStream());\n  }\n  /**\n   * @brief Thrust policy without caching allocator.\n   */\n  auto TP() const {\n    return thrust::cuda::par_nosync(alloc_).on(curt::DefaultStream());\n  }\n  auto Stream() const { return curt::DefaultStream(); }\n};\n}  // namespace xgboost\n#endif  // XGBOOST_COMMON_CUDA_CONTEXT_CUH_\n"
  },
  {
    "path": "src/common/cuda_dr_utils.cc",
    "content": "/**\n * Copyright 2024-2026, XGBoost contributors\n */\n#if defined(XGBOOST_USE_CUDA)\n#include \"cuda_dr_utils.h\"\n\n#include <algorithm>  // for max\n#include <array>      // for array\n#include <charconv>   // for from_chars\n#include <cstdint>    // for int32_t\n#include <cstring>    // for memset\n#include <memory>     // for make_unique\n#include <mutex>      // for call_once\n#include <sstream>    // for stringstream\n#include <string>     // for string, stoi\n\n#include \"common.h\"               // for safe_cuda, TrimFirst, Split\n#include \"cuda_rt_utils.h\"        // for CurrentDevice\n#include \"io.h\"                   // for CmdOutput\n#include \"xgboost/string_view.h\"  // for StringView\n\nnamespace xgboost::cudr {\nCuDriverApi::CuDriverApi(std::int32_t cu_major, std::int32_t cu_minor, std::int32_t kdm_major) {\n  // similar to dlopen, but without the need to release a handle.\n  auto safe_load = [](xgboost::StringView name, auto **fnptr) {\n    cudaDriverEntryPointQueryResult status;\n#if (CUDA_VERSION / 1000) >= 13\n    dh::safe_cuda(cudaGetDriverEntryPointByVersion(name.c_str(), reinterpret_cast<void **>(fnptr),\n                                                   12080, cudaEnablePerThreadDefaultStream,\n                                                   &status));\n#else\n    dh::safe_cuda(cudaGetDriverEntryPoint(name.c_str(), reinterpret_cast<void **>(fnptr),\n                                          cudaEnablePerThreadDefaultStream, &status));\n#endif  // (CUDA_VERSION / 1000) >= 13\n    CHECK(status == cudaDriverEntryPointSuccess) << name;\n    CHECK(*fnptr);\n  };\n\n  safe_load(\"cuMemGetAllocationGranularity\", &this->cuMemGetAllocationGranularity);\n  safe_load(\"cuMemCreate\", &this->cuMemCreate);\n  safe_load(\"cuMemMap\", &this->cuMemMap);\n  safe_load(\"cuMemAddressReserve\", &this->cuMemAddressReserve);\n  safe_load(\"cuMemSetAccess\", &this->cuMemSetAccess);\n  safe_load(\"cuMemUnmap\", &this->cuMemUnmap);\n  safe_load(\"cuMemRelease\", &this->cuMemRelease);\n  safe_load(\"cuMemAddressFree\", &this->cuMemAddressFree);\n  safe_load(\"cuGetErrorString\", &this->cuGetErrorString);\n  safe_load(\"cuGetErrorName\", &this->cuGetErrorName);\n  safe_load(\"cuDeviceGetAttribute\", &this->cuDeviceGetAttribute);\n  safe_load(\"cuDeviceGet\", &this->cuDeviceGet);\n#if defined(CUDA_HW_DECOM_AVAILABLE)\n  // CTK 12.8\n  if (((cu_major == 12 && cu_minor >= 8) || cu_major > 12) && (kdm_major >= 570)) {\n    safe_load(\"cuMemBatchDecompressAsync\", &this->cuMemBatchDecompressAsync);\n  } else {\n    this->cuMemBatchDecompressAsync = nullptr;\n  }\n#else\n  (void)cu_major;\n  (void)cu_minor;\n  (void)kdm_major;\n#endif  // defined(CUDA_HW_DECOM_AVAILABLE)\n  CHECK(this->cuMemGetAllocationGranularity);\n}\n\nvoid CuDriverApi::ThrowIfError(CUresult status, StringView fn, std::int32_t line,\n                               char const *file) const {\n  if (status == CUDA_SUCCESS) {\n    return;\n  }\n  std::string cuerr{\"CUDA driver error:\"};\n\n  char const *name{nullptr};\n  auto err0 = this->cuGetErrorName(status, &name);\n  if (err0 != CUDA_SUCCESS) {\n    LOG(WARNING) << cuerr << status << \". Then we failed to get error name:\" << err0;\n  }\n  char const *msg{nullptr};\n  auto err1 = this->cuGetErrorString(status, &msg);\n  if (err1 != CUDA_SUCCESS) {\n    LOG(WARNING) << cuerr << status << \". Then we failed to get error string:\" << err1;\n  }\n\n  std::stringstream ss;\n  ss << fn << \"[\" << file << \":\" << line << \"]:\";\n  if (name != nullptr && err0 == CUDA_SUCCESS) {\n    ss << cuerr << \" \" << name << \".\";\n  }\n  if (msg != nullptr && err1 == CUDA_SUCCESS) {\n    ss << \" \" << msg << \"\\n\";\n  }\n  LOG(FATAL) << ss.str();\n}\n\n[[nodiscard]] CuDriverApi &GetGlobalCuDriverApi() {\n  std::int32_t cu_major = -1, cu_minor = -1;\n  curt::GetDrVersionGlobal(&cu_major, &cu_minor);\n\n  std::int32_t kdm_major = -1, kdm_minor = -1;\n  if (!GetVersionFromSmiGlobal(&kdm_major, &kdm_minor)) {\n    kdm_major = -1;\n  }\n\n  static std::once_flag flag;\n  static std::unique_ptr<CuDriverApi> cu;\n  std::call_once(flag, [&] { cu = std::make_unique<CuDriverApi>(cu_major, cu_minor, kdm_major); });\n  return *cu;\n}\n\nvoid MakeCuMemLocation(CUmemLocationType type, CUmemLocation *loc) {\n  auto ordinal = curt::CurrentDevice();\n  loc->type = type;\n\n  if (type == CU_MEM_LOCATION_TYPE_DEVICE) {\n    loc->id = ordinal;\n  } else {\n    std::int32_t numa_id = -1;\n    CUdevice device;\n    safe_cu(GetGlobalCuDriverApi().cuDeviceGet(&device, ordinal));\n    safe_cu(GetGlobalCuDriverApi().cuDeviceGetAttribute(&numa_id, CU_DEVICE_ATTRIBUTE_HOST_NUMA_ID,\n                                                        device));\n    numa_id = std::max(numa_id, 0);\n\n    loc->id = numa_id;\n  }\n}\n\n[[nodiscard]] CUmemAllocationProp MakeAllocProp(CUmemLocationType type) {\n  CUmemAllocationProp prop;\n  std::memset(&prop, '\\0', sizeof(prop));\n  prop.type = CU_MEM_ALLOCATION_TYPE_PINNED;\n  MakeCuMemLocation(type, &prop.location);\n  return prop;\n}\n\n[[nodiscard]] bool GetVersionFromSmi(std::int32_t *p_major, std::int32_t *p_minor) {\n  using ::xgboost::common::Split;\n  using ::xgboost::common::TrimFirst;\n  // `nvidia-smi --version` is not available for older versions, as a result, we can't query the\n  // cuda driver version unless we want to parse the table output.\n\n  // Example output on a 2-GPU system:\n  //\n  // $ nvidia-smi --query-gpu=driver_version --format=csv\n  //\n  // driver_version\n  // 570.124.06\n  // 570.124.06\n  //\n  auto cmd = \"nvidia-smi --query-gpu=driver_version --format=csv\";\n  auto smi_out_str = common::CmdOutput(StringView{cmd});\n\n  auto Invalid = [=] {\n    *p_major = *p_minor = -1;\n    return false;\n  };\n  if (smi_out_str.empty()) {\n    return Invalid();\n  }\n\n  auto smi_split = Split(smi_out_str, '\\n');\n  if (smi_split.size() < 2) {\n    return Invalid();\n  }\n\n  // Use the first GPU\n  auto smi_ver = Split(TrimFirst(smi_split[1]), '.');\n  // 570.124.06\n  // On WSL2, you can have driver version with two components, e.g. 573.24\n  if (smi_ver.size() != 2 && smi_ver.size() != 3) {\n    return Invalid();\n  }\n\n  auto [smajor, sminor] = std::tie(smi_ver[0], smi_ver[1]);\n  auto ret0 = std::from_chars(smajor.data(), smajor.data() + smajor.size(), *p_major);\n  auto ret1 = std::from_chars(sminor.data(), sminor.data() + sminor.size(), *p_minor);\n  if (ret0.ec != std::errc{} || ret1.ec != std::errc{}) {\n    return Invalid();\n  }\n  LOG(INFO) << \"Driver version: `\" << *p_major << \".\" << *p_minor << \"`\";\n  return true;\n}\n\n[[nodiscard]] bool GetVersionFromSmiGlobal(std::int32_t *p_major, std::int32_t *p_minor) {\n  static std::once_flag flag;\n  static std::int32_t major = -1, minor = -1;\n  static bool result = false;\n  std::call_once(flag, [&] { result = GetVersionFromSmi(&major, &minor); });\n\n  *p_major = major;\n  *p_minor = minor;\n  return result;\n}\n\nnamespace detail {\n// Split up an impl function for simple tests.\n[[nodiscard]] std::int32_t GetC2cLinkCountFromSmiImpl(std::string const &smi_output) {\n  using common::Split, common::TrimFirst, common::TrimLast;\n  auto smi_out_str = TrimLast(TrimFirst(smi_output));\n  auto lines = Split(smi_out_str, '\\n');\n  if (lines.size() <= 1) {\n    return -1;\n  }\n  return lines.size() - 1;\n}\n}  // namespace detail\n\n[[nodiscard]] std::int32_t GetC2cLinkCountFromSmi() {\n  auto n_devices = curt::AllVisibleGPUs();\n  if (n_devices < 1) {\n    return -1;\n  }\n\n  std::array<unsigned char, curt::kUuidLength> uuid;\n  // Select the current GPU to query.\n  curt::GetUuid(common::Span{uuid.data(), uuid.size()}, curt::CurrentDevice());\n  auto str_uuid = curt::PrintUuid(common::Span{uuid.data(), uuid.size()});\n  // See test for example output from smi.\n  auto cmd = \"nvidia-smi c2c -s -i \" + str_uuid;\n  auto out = common::CmdOutput(StringView{cmd});\n  LOG(DEBUG) << \"c2c:\\n\" << out << \"\\n\";\n  auto cnt = detail::GetC2cLinkCountFromSmiImpl(out);\n  return cnt;\n}\n\n[[nodiscard]] std::int32_t GetC2cLinkCountFromSmiGlobal() {\n  static std::once_flag once;\n  static std::int32_t cnt = -1;\n  std::call_once(once, [&] { cnt = GetC2cLinkCountFromSmi(); });\n  return cnt;\n}\n}  // namespace xgboost::cudr\n#endif\n"
  },
  {
    "path": "src/common/cuda_dr_utils.h",
    "content": "/**\n * Copyright 2024-2025, XGBoost contributors\n *\n * @brief Utility for CUDA driver API.\n *\n * XGBoost doesn't link libcuda.so at build time. The utilities here load the shared\n * object at runtime.\n */\n#pragma once\n\n#include <cuda.h>\n#include <cuda_runtime_api.h>\n\n#include <cstdint>  // for int32_t\n#include <string>   // for string\n\n#include \"xgboost/string_view.h\"  // for StringView\n\n#if CUDART_VERSION >= 12080 && defined(__linux__)\n#define CUDA_HW_DECOM_AVAILABLE 1\n#endif\n\nnamespace xgboost::cudr {\n/**\n * @brief A struct for retrieving CUDA driver API from the runtime API.\n */\nstruct CuDriverApi {\n  using Flags = unsigned long long;  // NOLINT\n\n  // Memroy manipulation functions.\n  using MemGetAllocationGranularityFn = CUresult(size_t *granularity,\n                                                 const CUmemAllocationProp *prop,\n                                                 CUmemAllocationGranularity_flags option);\n  using MemCreateFn = CUresult(CUmemGenericAllocationHandle *handle, size_t size,\n                               const CUmemAllocationProp *prop, Flags flags);\n  using MemMapFn = CUresult(CUdeviceptr ptr, size_t size, size_t offset,\n                            CUmemGenericAllocationHandle handle, Flags flags);\n  using MemAddressReserveFn = CUresult(CUdeviceptr *ptr, size_t size, size_t alignment,\n                                       CUdeviceptr addr, Flags flags);\n  using MemSetAccessFn = CUresult(CUdeviceptr ptr, size_t size, const CUmemAccessDesc *desc,\n                                  size_t count);\n  using MemUnmapFn = CUresult(CUdeviceptr ptr, size_t size);\n  using MemReleaseFn = CUresult(CUmemGenericAllocationHandle handle);\n  using MemAddressFreeFn = CUresult(CUdeviceptr ptr, size_t size);\n  // Error handling\n  using GetErrorString = CUresult(CUresult error, const char **pStr);\n  using GetErrorName = CUresult(CUresult error, const char **pStr);\n  // Device attributes\n  using DeviceGetAttribute = CUresult(int *pi, CUdevice_attribute attrib, CUdevice dev);\n  using DeviceGet = CUresult(CUdevice *device, int ordinal);\n\n#if defined(CUDA_HW_DECOM_AVAILABLE)\n  using BatchDecompressAsync = CUresult(CUmemDecompressParams *paramsArray, size_t count,\n                                        unsigned int flags, size_t *errorIndex, CUstream stream);\n#endif  // defined(CUDA_HW_DECOM_AVAILABLE)\n\n  MemGetAllocationGranularityFn *cuMemGetAllocationGranularity{nullptr};  // NOLINT\n  MemCreateFn *cuMemCreate{nullptr};                                      // NOLINT\n  /**\n   * @param[in] offset - Must be zero.\n   */\n  MemMapFn *cuMemMap{nullptr};  // NOLINT\n  /**\n   * @param[out] ptr       - Resulting pointer to start of virtual address range allocated\n   * @param[in]  size      - Size of the reserved virtual address range requested\n   * @param[in]  alignment - Alignment of the reserved virtual address range requested\n   * @param[in]  addr      - Fixed starting address range requested\n   * @param[in]  flags     - Currently unused, must be zero\n   */\n  MemAddressReserveFn *cuMemAddressReserve{nullptr};  // NOLINT\n  MemSetAccessFn *cuMemSetAccess{nullptr};            // NOLINT\n  MemUnmapFn *cuMemUnmap{nullptr};                    // NOLINT\n  MemReleaseFn *cuMemRelease{nullptr};                // NOLINT\n  MemAddressFreeFn *cuMemAddressFree{nullptr};        // NOLINT\n  GetErrorString *cuGetErrorString{nullptr};          // NOLINT\n  GetErrorName *cuGetErrorName{nullptr};              // NOLINT\n  DeviceGetAttribute *cuDeviceGetAttribute{nullptr};  // NOLINT\n  DeviceGet *cuDeviceGet{nullptr};                    // NOLINT\n\n#if defined(CUDA_HW_DECOM_AVAILABLE)\n\n  BatchDecompressAsync *cuMemBatchDecompressAsync{nullptr};  // NOLINT\n\n#endif  // defined(CUDA_HW_DECOM_AVAILABLE)\n\n  CuDriverApi(std::int32_t cu_major, std::int32_t cu_minor, std::int32_t kdm_major);\n\n  void ThrowIfError(CUresult status, StringView fn, std::int32_t line, char const *file) const;\n};\n\n[[nodiscard]] CuDriverApi &GetGlobalCuDriverApi();\n\n/**\n * @brief Macro for guarding CUDA driver API calls.\n */\n#define safe_cu(call)                                                                            \\\n  do {                                                                                           \\\n    auto __status = (call);                                                                      \\\n    if (__status != CUDA_SUCCESS) {                                                              \\\n      ::xgboost::cudr::GetGlobalCuDriverApi().ThrowIfError(__status, #call, __LINE__, __FILE__); \\\n    }                                                                                            \\\n  } while (0)\n\n// Get the allocation granularity.\ninline auto GetAllocGranularity(CUmemAllocationProp const *prop) {\n  std::size_t granularity;\n  safe_cu(GetGlobalCuDriverApi().cuMemGetAllocationGranularity(\n      &granularity, prop, CU_MEM_ALLOC_GRANULARITY_RECOMMENDED));\n  return granularity;\n}\n\n/**\n * @brief Obtain appropriate device ordinal for `CUmemLocation`.\n */\nvoid MakeCuMemLocation(CUmemLocationType type, CUmemLocation *loc);\n\n/**\n * @brief Construct a `CUmemAllocationProp`.\n */\n[[nodiscard]] CUmemAllocationProp MakeAllocProp(CUmemLocationType type);\n\n/**\n * @brief Get system driver version from the `nvidia-smi` command.\n *\n * @return Whether the system call is successful.\n */\n[[nodiscard]] bool GetVersionFromSmi(std::int32_t *p_major, std::int32_t *p_minor);\n\n/**\n * @brief Cache the result from @ref GetVersionFromSmi in a global variable\n */\n[[nodiscard]] bool GetVersionFromSmiGlobal(std::int32_t *p_major, std::int32_t *p_minor);\n\nnamespace detail {\n[[nodiscard]] std::int32_t GetC2cLinkCountFromSmiImpl(std::string const &smi_output);\n}  // namespace detail\n\n/**\n * @brief Get the total number of C2C links `NVML_FI_DEV_C2C_LINK_COUNT`.\n *\n * @return -1 if there's no C2C. Otherwise, the number of links.\n */\n[[nodiscard]] std::int32_t GetC2cLinkCountFromSmi();\n/**\n * @brief Cache the result from @ref GetC2cLinkCountFromSmi in a global variable\n */\n[[nodiscard]] std::int32_t GetC2cLinkCountFromSmiGlobal();\n}  // namespace xgboost::cudr\n"
  },
  {
    "path": "src/common/cuda_pinned_allocator.cu",
    "content": "/**\n * Copyright 2025, XGBoost Contributors\n */\n#include \"cuda_pinned_allocator.h\"\n\n#if defined(XGBOOST_USE_CUDA)\n\n#include <cuda_runtime_api.h>  // for cudaMemPoolCreate, cudaMemPoolDestroy\n\n#include <array>    // for array\n#include <cstring>  // for memset\n#include <memory>   // for unique_ptr\n\n#endif  // defined(XGBOOST_USE_CUDA)\n\n#include \"common.h\"\n#include \"cuda_dr_utils.h\"  // for CUDA_HW_DECOM_AVAILABLE\n#include \"cuda_rt_utils.h\"  // for CurrentDevice\n\nnamespace xgboost::common::cuda_impl {\n[[nodiscard]] MemPoolHdl CreateHostMemPool() {\n  auto mem_pool = std::unique_ptr<cudaMemPool_t, void (*)(cudaMemPool_t*)>{\n      [] {\n        cudaMemPoolProps h_props;\n        std::memset(&h_props, '\\0', sizeof(h_props));\n        auto numa_id = curt::GetNumaId();\n        h_props.location.id = numa_id;\n        h_props.location.type = cudaMemLocationTypeHostNuma;\n        h_props.allocType = cudaMemAllocationTypePinned;\n#if defined(CUDA_HW_DECOM_AVAILABLE)\n        h_props.usage = cudaMemPoolCreateUsageHwDecompress;\n#endif  // defined(CUDA_HW_DECOM_AVAILABLE)\n        h_props.handleTypes = cudaMemHandleTypeNone;\n\n        cudaMemPoolProps d_props;\n        std::memset(&d_props, '\\0', sizeof(d_props));\n        auto device_idx = curt::CurrentDevice();\n        d_props.location.id = device_idx;\n        d_props.location.type = cudaMemLocationTypeDevice;\n        d_props.allocType = cudaMemAllocationTypePinned;\n#if defined(CUDA_HW_DECOM_AVAILABLE)\n        d_props.usage = cudaMemPoolCreateUsageHwDecompress;\n#endif  // defined(CUDA_HW_DECOM_AVAILABLE)\n        d_props.handleTypes = cudaMemHandleTypeNone;\n\n        std::array<cudaMemPoolProps, 2> vprops{h_props, d_props};\n\n        cudaMemPool_t* mem_pool = new cudaMemPool_t;\n        dh::safe_cuda(cudaMemPoolCreate(mem_pool, vprops.data()));\n\n        cudaMemAccessDesc h_desc;\n        h_desc.location = h_props.location;\n        h_desc.flags = cudaMemAccessFlagsProtReadWrite;\n\n        cudaMemAccessDesc d_desc;\n        d_desc.location = d_props.location;\n        d_desc.flags = cudaMemAccessFlagsProtReadWrite;\n\n        std::array<cudaMemAccessDesc, 2> descs{h_desc, d_desc};\n        dh::safe_cuda(cudaMemPoolSetAccess(*mem_pool, descs.data(), descs.size()));\n        return mem_pool;\n      }(),\n      [](cudaMemPool_t* mem_pool) {\n        if (mem_pool) {\n          dh::safe_cuda(cudaMemPoolDestroy(*mem_pool));\n          delete mem_pool;\n        }\n      }};\n  return mem_pool;\n}\n}  // namespace xgboost::common::cuda_impl\n"
  },
  {
    "path": "src/common/cuda_pinned_allocator.h",
    "content": "/**\n * Copyright 2022-2025, XGBoost Contributors\n *\n * @brief cuda pinned allocator for usage with thrust containers\n */\n#pragma once\n\n#include <cuda_runtime.h>\n\n#include <cstddef>  // for size_t\n#include <limits>   // for numeric_limits\n#include <memory>   // for unique_ptr\n#include <new>      // for bad_array_new_length\n\n#include \"common.h\"\n\nnamespace xgboost::common::cuda_impl {\n// \\p pinned_allocator is a CUDA-specific host memory allocator\n//  that employs \\c cudaMallocHost for allocation.\n//\n// This implementation is ported from the experimental/pinned_allocator\n// that Thrust used to provide.\n//\n//  \\see https://en.cppreference.com/w/cpp/memory/allocator\ntemplate <typename T>\nstruct PinnedAllocPolicy {\n  using pointer = T*;              // NOLINT: The type returned by address() / allocate()\n  using const_pointer = const T*;  // NOLINT: The type returned by address()\n  using size_type = std::size_t;   // NOLINT: The type used for the size of the allocation\n  using value_type = T;            // NOLINT: The type of the elements in the allocator\n\n  [[nodiscard]] constexpr size_type max_size() const {  // NOLINT\n    return std::numeric_limits<size_type>::max() / sizeof(value_type);\n  }\n\n  [[nodiscard]] pointer allocate(size_type cnt, const_pointer = nullptr) const {  // NOLINT\n    if (cnt > this->max_size()) {\n      throw std::bad_array_new_length{};\n    }\n\n    pointer result(nullptr);\n    dh::safe_cuda(cudaMallocHost(reinterpret_cast<void**>(&result), cnt * sizeof(value_type)));\n    return result;\n  }\n\n  void deallocate(pointer p, size_type) { dh::safe_cuda(cudaFreeHost(p)); }  // NOLINT\n};\n\ntemplate <typename T>\nstruct ManagedAllocPolicy {\n  using pointer = T*;              // NOLINT: The type returned by address() / allocate()\n  using const_pointer = const T*;  // NOLINT: The type returned by address()\n  using size_type = std::size_t;   // NOLINT: The type used for the size of the allocation\n  using value_type = T;            // NOLINT: The type of the elements in the allocator\n\n  [[nodiscard]] constexpr size_type max_size() const {  // NOLINT\n    return std::numeric_limits<size_type>::max() / sizeof(value_type);\n  }\n\n  [[nodiscard]] pointer allocate(size_type cnt, const_pointer = nullptr) const {  // NOLINT\n    if (cnt > this->max_size()) {\n      throw std::bad_array_new_length{};\n    }\n\n    pointer result(nullptr);\n    dh::safe_cuda(cudaMallocManaged(reinterpret_cast<void**>(&result), cnt * sizeof(value_type)));\n    return result;\n  }\n\n  void deallocate(pointer p, size_type) { dh::safe_cuda(cudaFree(p)); }  // NOLINT\n};\n\n// This is actually a pinned memory allocator in disguise. We utilize HMM or ATS for\n// efficient tracked memory allocation.\ntemplate <typename T>\nstruct SamAllocPolicy {\n  using pointer = T*;              // NOLINT: The type returned by address() / allocate()\n  using const_pointer = const T*;  // NOLINT: The type returned by address()\n  using size_type = std::size_t;   // NOLINT: The type used for the size of the allocation\n  using value_type = T;            // NOLINT: The type of the elements in the allocator\n\n  [[nodiscard]] constexpr size_type max_size() const {  // NOLINT\n    return std::numeric_limits<size_type>::max() / sizeof(value_type);\n  }\n\n  [[nodiscard]] pointer allocate(size_type cnt, const_pointer = nullptr) const {  // NOLINT\n    if (cnt > this->max_size()) {\n      throw std::bad_array_new_length{};\n    }\n\n    size_type n_bytes = cnt * sizeof(value_type);\n    pointer result = reinterpret_cast<pointer>(std::malloc(n_bytes));\n    if (!result) {\n      throw std::bad_alloc{};\n    }\n    dh::safe_cuda(cudaHostRegister(result, n_bytes, cudaHostRegisterDefault));\n    return result;\n  }\n\n  void deallocate(pointer p, size_type) {  // NOLINT\n    dh::safe_cuda(cudaHostUnregister(p));\n    std::free(p);\n  }\n};\n\n/**\n * @brief A RAII handle type to the CUDA memory pool.\n */\nusing MemPoolHdl = std::unique_ptr<cudaMemPool_t, void (*)(cudaMemPool_t*)>;\n\n/**\n * @brief Create a CUDA memory pool for allocating host pinned memory.\n */\n[[nodiscard]] MemPoolHdl CreateHostMemPool();\n\n/**\n * @brief C++ wrapper for the CUDA memory pool.\n */\nclass HostPinnedMemPool {\n  MemPoolHdl pool_;\n\n public:\n  HostPinnedMemPool() : pool_{CreateHostMemPool()} {}\n  void* AllocateAsync(std::size_t n_bytes, cudaStream_t stream) {\n    void* ptr = nullptr;\n    dh::safe_cuda(cudaMallocFromPoolAsync(&ptr, n_bytes, *this->pool_, stream));\n    return ptr;\n  }\n  void DeallocateAsync(void* ptr, cudaStream_t stream) {\n    dh::safe_cuda(cudaFreeAsync(ptr, stream));\n  }\n};\n\ntemplate <typename T, template <typename> typename Policy>\nclass CudaHostAllocatorImpl : public Policy<T> {\n public:\n  using typename Policy<T>::value_type;\n  using typename Policy<T>::pointer;\n  using typename Policy<T>::const_pointer;\n  using typename Policy<T>::size_type;\n\n  using reference = value_type&;              // NOLINT: The parameter type for address()\n  using const_reference = const value_type&;  // NOLINT: The parameter type for address()\n\n  using difference_type = std::ptrdiff_t;  // NOLINT: The type of the distance between two pointers\n\n  template <typename U>\n  struct rebind {                                    // NOLINT\n    using other = CudaHostAllocatorImpl<U, Policy>;  // NOLINT: The rebound type\n  };\n\n  CudaHostAllocatorImpl() = default;\n  ~CudaHostAllocatorImpl() = default;\n  CudaHostAllocatorImpl(CudaHostAllocatorImpl const&) = default;\n\n  CudaHostAllocatorImpl& operator=(CudaHostAllocatorImpl const& that) = default;\n  CudaHostAllocatorImpl& operator=(CudaHostAllocatorImpl&& that) = default;\n\n  template <typename U>\n  CudaHostAllocatorImpl(CudaHostAllocatorImpl<U, Policy> const&) {}  // NOLINT\n\n  pointer address(reference r) { return &r; }              // NOLINT\n  const_pointer address(const_reference r) { return &r; }  // NOLINT\n\n  bool operator==(CudaHostAllocatorImpl const&) const { return true; }\n\n  bool operator!=(CudaHostAllocatorImpl const& x) const { return !operator==(x); }\n};\n\ntemplate <typename T>\nusing PinnedAllocator = CudaHostAllocatorImpl<T, PinnedAllocPolicy>;\n\ntemplate <typename T>\nusing ManagedAllocator = CudaHostAllocatorImpl<T, ManagedAllocPolicy>;\n\ntemplate <typename T>\nusing SamAllocator = CudaHostAllocatorImpl<T, SamAllocPolicy>;\n}  // namespace xgboost::common::cuda_impl\n"
  },
  {
    "path": "src/common/cuda_rt_utils.cc",
    "content": "/**\n * Copyright 2015-2026, XGBoost Contributors\n */\n#include \"cuda_rt_utils.h\"\n\n#include <cstring>  // for memcpy\n#include <set>      // for set\n#include <sstream>  // for stringstream\n\n#include \"cuda_stream.h\"   // for StreamRef\n#include \"xgboost/span.h\"  // for Span\n\n#if defined(XGBOOST_USE_CUDA)\n#include <cuda_runtime_api.h>\n\n#include <algorithm>  // for max\n\n#endif  // defined(XGBOOST_USE_CUDA)\n\n#include <cstddef>  // for size_t\n#include <cstdint>  // for int32_t\n#include <mutex>    // for once_flag, call_once\n\n#include \"common.h\"  // for safe_cuda\n\nnamespace xgboost::curt {\n#if defined(XGBOOST_USE_CUDA)\nstd::int32_t AllVisibleGPUs() {\n  int n_visgpus = 0;\n  try {\n    // When compiled with CUDA but running on CPU only device,\n    // cudaGetDeviceCount will fail.\n    dh::safe_cuda(cudaGetDeviceCount(&n_visgpus));\n  } catch (const dmlc::Error&) {\n    cudaGetLastError();  // reset error.\n    return 0;\n  }\n  return n_visgpus;\n}\n\nstd::int32_t CurrentDevice(bool raise) {\n  std::int32_t device = -1;\n  if (raise) {\n    dh::safe_cuda(cudaGetDevice(&device));\n  } else if (cudaGetDevice(&device) != cudaSuccess) {\n    // Return -1 as an error.\n    return -1;\n  }\n  return device;\n}\n\n// alternatively: `nvidia-smi -q | grep Addressing`\n[[nodiscard]] bool SupportsPageableMem() {\n  std::int32_t res{0};\n  dh::safe_cuda(cudaDeviceGetAttribute(&res, cudaDevAttrPageableMemoryAccess, CurrentDevice()));\n  return res == 1;\n}\n\n[[nodiscard]] bool SupportsAts() {\n  std::int32_t res{0};\n  dh::safe_cuda(cudaDeviceGetAttribute(&res, cudaDevAttrPageableMemoryAccessUsesHostPageTables,\n                                       CurrentDevice()));\n  return res == 1;\n}\n\nvoid SetDevice(std::int32_t device) {\n  if (device >= 0) {\n    dh::safe_cuda(cudaSetDevice(device));\n  }\n}\n\n[[nodiscard]] std::size_t TotalMemory() {\n  std::size_t device_free = 0;\n  std::size_t device_total = 0;\n  dh::safe_cuda(cudaMemGetInfo(&device_free, &device_total));\n  return device_total;\n}\n\nnamespace {\ntemplate <typename Fn>\nvoid GetVersionImpl(Fn&& fn, std::int32_t* major, std::int32_t* minor) {\n  static std::int32_t version = 0;\n  static std::once_flag flag;\n  std::call_once(flag, [&] { fn(&version); });\n  if (major) {\n    *major = version / 1000;\n  }\n  if (minor) {\n    *minor = version % 100 / 10;\n  }\n}\n}  // namespace\n\nvoid GetRtVersionGlobal(std::int32_t* major, std::int32_t* minor) {\n  GetVersionImpl([](std::int32_t* ver) { dh::safe_cuda(cudaRuntimeGetVersion(ver)); }, major,\n                 minor);\n}\n\nvoid GetDrVersionGlobal(std::int32_t* major, std::int32_t* minor) {\n  GetVersionImpl([](std::int32_t* ver) { dh::safe_cuda(cudaDriverGetVersion(ver)); }, major, minor);\n}\n\n[[nodiscard]] std::int32_t GetNumaId() {\n  std::int32_t numa_id = -1;\n  dh::safe_cuda(cudaDeviceGetAttribute(&numa_id, cudaDevAttrHostNumaId, curt::CurrentDevice()));\n  numa_id = std::max(numa_id, 0);\n  return numa_id;\n}\n\n[[nodiscard]] std::int32_t GetMpCnt(std::int32_t device) {\n  std::int32_t n_mps = 0;\n  dh::safe_cuda(cudaDeviceGetAttribute(&n_mps, cudaDevAttrMultiProcessorCount, device));\n  CHECK_GT(n_mps, 0);\n  return n_mps;\n}\n\n[[nodiscard]] bool MemoryPoolsSupported(std::int32_t device) {\n  std::int32_t res = 0;\n  dh::safe_cuda(cudaDeviceGetAttribute(&res, cudaDevAttrMemoryPoolsSupported, device));\n  return !!res;\n}\n\nstatic_assert(kUuidLength == sizeof(std::declval<cudaDeviceProp>().uuid));\n\nvoid GetUuid(xgboost::common::Span<unsigned char> uuid, std::int32_t device) {\n  cudaDeviceProp prop{};\n  dh::safe_cuda(cudaGetDeviceProperties(&prop, device));\n  std::memcpy(uuid.data(), static_cast<void*>(&(prop.uuid)), kUuidLength);\n}\n\n[[nodiscard]] std::string PrintUuid(common::Span<unsigned char const, kUuidLength> uuid) {\n  std::set<std::size_t> dash_pos{0, 4, 6, 8, 10};\n  std::stringstream ss;\n  ss << \"GPU\";\n  for (std::size_t i = 0; i < kUuidLength; ++i) {\n    if (dash_pos.find(i) != dash_pos.cend()) {\n      ss << \"-\";\n    }\n    ss << std::setw(2) << std::setfill('0') << std::hex << (0xFF & std::uint32_t{uuid[i]});\n  }\n  return ss.str();\n}\n\nvoid MemcpyAsync(void* dst, const void* src, std::size_t count, StreamRef stream) {\n  dh::safe_cuda(cudaMemcpyAsync(dst, src, count, cudaMemcpyDefault, stream));\n}\n\n#else\nstd::int32_t AllVisibleGPUs() { return 0; }\n\nstd::int32_t CurrentDevice(bool raise) {\n  if (raise) {\n    common::AssertGPUSupport();\n  }\n  return -1;\n}\n\nbool SupportsPageableMem() { return false; }\n\nbool SupportsAts() { return false; }\n\n[[nodiscard]] std::size_t TotalMemory() { return 0; }\n\nvoid CheckComputeCapability() {}\n\nvoid SetDevice(std::int32_t device) {\n  if (device >= 0) {\n    common::AssertGPUSupport();\n  }\n}\n\n[[nodiscard]] std::int32_t GetNumaId() {\n  common::AssertGPUSupport();\n  return 0;\n}\n\n[[nodiscard]] std::int32_t GetMpCnt(std::int32_t) {\n  common::AssertGPUSupport();\n  return 0;\n}\n\n[[nodiscard]] bool MemoryPoolsSupported(std::int32_t) { return false; }\n\nvoid GetUuid(xgboost::common::Span<unsigned char>, std::int32_t) { common::AssertGPUSupport(); }\n\n[[nodiscard]] std::string PrintUuid(common::Span<unsigned char const, kUuidLength>) { return {}; }\n\nvoid MemcpyAsync(void*, const void*, std::size_t, StreamRef) { common::AssertGPUSupport(); }\n\n#endif  // !defined(XGBOOST_USE_CUDA)\n}  // namespace xgboost::curt\n"
  },
  {
    "path": "src/common/cuda_rt_utils.h",
    "content": "/**\n * Copyright 2024-2026, XGBoost contributors\n */\n#pragma once\n#include <cstddef>  // for size_t\n#include <cstdint>  // for int32_t\n#include <string>   // for string\n\n#include \"cuda_stream.h\"   // for StreamRef\n#include \"xgboost/span.h\"  // for Span\n\nnamespace xgboost::curt {\nstd::int32_t AllVisibleGPUs();\n\n/**\n * @param raise Raise error if XGBoost is not compiled with CUDA, or GPU is not available.\n */\nstd::int32_t CurrentDevice(bool raise = true);\n\n// Whether the device supports coherently accessing pageable memory without calling\n// `cudaHostRegister` on it\n[[nodiscard]] bool SupportsPageableMem();\n\n// Address Translation Service (ATS)\n[[nodiscard]] bool SupportsAts();\n\nvoid SetDevice(std::int32_t device);\n\n/**\n * @brief Total device memory size.\n */\n[[nodiscard]] std::size_t TotalMemory();\n\n// Returns the CUDA Runtime version.\nvoid GetRtVersionGlobal(std::int32_t* major, std::int32_t* minor);\n\n// Returns the latest version of CUDA supported by the driver.\nvoid GetDrVersionGlobal(std::int32_t* major, std::int32_t* minor);\n\n// Get the current device's numa ID.\n[[nodiscard]] std::int32_t GetNumaId();\n\n[[nodiscard]] std::int32_t GetMpCnt(std::int32_t device);\n\n[[nodiscard]] bool MemoryPoolsSupported(std::int32_t device);\n\ninline constexpr std::size_t kUuidLength = 16;\n\nvoid GetUuid(common::Span<unsigned char> uuid, std::int32_t device);\n\n[[nodiscard]] std::string PrintUuid(common::Span<unsigned char const, kUuidLength> uuid);\n\n// cudaMemcpyAsync\nvoid MemcpyAsync(void* dst, const void* src, std::size_t count, StreamRef stream);\n}  // namespace xgboost::curt\n"
  },
  {
    "path": "src/common/cuda_stream.h",
    "content": "/**\n * Copyright 2022-2025, XGBoost contributors\n */\n#pragma once\n\n#if defined(XGBOOST_USE_CUDA)\n#include <cuda_runtime.h>\n#endif  // defined(XGBOOST_USE_CUDA)\n\n#include <memory>   // for unique_ptr\n#include <utility>  // for swap\n\n#include \"common.h\"\n\nnamespace xgboost::curt {\n#if defined(XGBOOST_USE_CUDA)\nclass StreamRef;\n\nclass Event {\n  std::unique_ptr<cudaEvent_t, void (*)(cudaEvent_t *)> event_;\n\n public:\n  explicit Event(bool disable_timing = true)\n      : event_{[disable_timing] {\n                 auto e = new cudaEvent_t;\n                 dh::safe_cuda(cudaEventCreateWithFlags(\n                     e, disable_timing ? cudaEventDisableTiming : cudaEventDefault));\n                 return e;\n               }(),\n               [](cudaEvent_t *e) {\n                 if (e) {\n                   dh::safe_cuda(cudaEventDestroy(*e));\n                   delete e;\n                 }\n               }} {}\n\n  inline void Record(StreamRef stream);  // NOLINT\n  // Define swap-based ctor to make sure an event is always valid.\n  Event(Event &&e) : Event() { std::swap(this->event_, e.event_); }\n  Event &operator=(Event &&e) {\n    std::swap(this->event_, e.event_);\n    return *this;\n  }\n\n  operator cudaEvent_t() const { return *event_; }                // NOLINT\n  cudaEvent_t const *data() const { return this->event_.get(); }  // NOLINT\n  void Sync() { dh::safe_cuda(cudaEventSynchronize(*this->data())); }\n};\n\nclass StreamRef {\n  cudaStream_t stream_{nullptr};\n\n public:\n  explicit StreamRef(cudaStream_t s) : stream_{s} {}\n  void Wait(Event const &e) {\n#if defined(__CUDACC_VER_MAJOR__)\n#if __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ == 0\n    // CUDA == 11.0\n    dh::safe_cuda(cudaStreamWaitEvent(stream_, cudaEvent_t{e}, 0));\n#else\n    // CUDA > 11.0\n    dh::safe_cuda(cudaStreamWaitEvent(stream_, cudaEvent_t{e}, cudaEventWaitDefault));\n#endif  // __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ == 0:\n#else   // clang\n    dh::safe_cuda(cudaStreamWaitEvent(stream_, cudaEvent_t{e}, cudaEventWaitDefault));\n#endif  //  defined(__CUDACC_VER_MAJOR__)\n  }\n  operator cudaStream_t() const {  // NOLINT\n    return stream_;\n  }\n  cudaError_t Sync(bool error = true) {\n    if (error) {\n      dh::safe_cuda(cudaStreamSynchronize(stream_));\n      return cudaSuccess;\n    }\n    return cudaStreamSynchronize(stream_);\n  }\n};\n\ninline void Event::Record(StreamRef stream) {  // NOLINT\n  dh::safe_cuda(cudaEventRecord(*event_, cudaStream_t{stream}));\n}\n\n// Changing this has effect on prediction return, where we need to pass the pointer to\n// third-party libraries like cuPy\ninline StreamRef DefaultStream() { return StreamRef{cudaStreamPerThread}; }\n\nclass Stream {\n  cudaStream_t stream_;\n\n public:\n  Stream() { dh::safe_cuda(cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking)); }\n  ~Stream() { dh::safe_cuda(cudaStreamDestroy(stream_)); }\n\n  [[nodiscard]] StreamRef View() const { return StreamRef{stream_}; }\n  [[nodiscard]] cudaStream_t Handle() const { return stream_; }\n\n  void Sync() { this->View().Sync(); }\n  void Wait(Event const &e) { this->View().Wait(e); }\n};\n#else\nclass StreamRef {};\n\ninline StreamRef DefaultStream() {\n  common::AssertGPUSupport();\n  return StreamRef{};\n}\n#endif\n}  // namespace xgboost::curt\n"
  },
  {
    "path": "src/common/cuda_stream_pool.h",
    "content": "/**\n * Copyright 2025, XGBoost contributors\n */\n#pragma once\n#include <atomic>   // for atomic\n#include <cstddef>  // for size_t\n#include <vector>   // for vector\n\n#include \"cuda_stream.h\"       // for StreamRef, Stream\n\nnamespace xgboost::curt {\n// rmm cuda_stream_pool\nclass StreamPool {\n  mutable std::atomic<std::size_t> next_{0};\n  std::vector<curt::Stream> stream_;\n\n public:\n  explicit StreamPool(std::size_t n) : stream_(n) {}\n  ~StreamPool() = default;\n  StreamPool(StreamPool const& that) = delete;\n  StreamPool& operator=(StreamPool const& that) = delete;\n\n  [[nodiscard]] curt::StreamRef operator[](std::size_t i) const { return stream_[i].View(); }\n  [[nodiscard]] curt::StreamRef Next() const { return stream_[(next_++) % stream_.size()].View(); }\n  [[nodiscard]] std::size_t Size() const { return stream_.size(); }\n};\n}  // namespace xgboost::curt\n"
  },
  {
    "path": "src/common/deterministic.cuh",
    "content": "/**\n * Copyright 2020-2026, XGBoost Contributors\n */\n#ifndef XGBOOST_COMMON_DETERMINISTIC_CUH_\n#define XGBOOST_COMMON_DETERMINISTIC_CUH_\n\n#include <cmath>   // for frexp, ldexp\n#include <limits>  // for numeric_limits\n\n#include \"xgboost/base.h\"  // XGBOOST_DEVICE\n\nnamespace xgboost {\nnamespace common {\n// Following 2 functions are slightly modified version of fbcuda.\n\n/**\n * \\brief Constructs a rounding factor used to truncate elements in a sum such that the\n *        sum of the truncated elements is the same no matter what the order of the sum\n *        is.\n *\n * Algorithm 5: Reproducible Sequential Sum in 'Fast Reproducible Floating-Point\n * Summation' by Demmel and Nguyen.\n */\ntemplate <typename T>\nXGBOOST_DEVICE T CreateRoundingFactor(T max_abs, bst_idx_t n) {\n  T delta = max_abs / (static_cast<T>(1.0) -\n                       static_cast<T>(2.0) * static_cast<T>(n) * std::numeric_limits<T>::epsilon());\n\n  // Calculate ceil(log_2(delta)).\n  // frexpf() calculates exp and returns `x` such that\n  // delta = x * 2^exp, where `x` in (-1.0, -0.5] U [0.5, 1).\n  // Because |x| < 1, exp is exactly ceil(log_2(delta)).\n  int exp;\n  std::frexp(delta, &exp);\n\n  // return M = 2 ^ ceil(log_2(delta))\n  return std::ldexp(static_cast<T>(1.0), exp);\n}\n\ntemplate <typename T>\nXGBOOST_DEVICE T TruncateWithRounding(T const rounding_factor, T const x) {\n  return (rounding_factor + x) - rounding_factor;\n}\n}  // namespace common\n}  // namespace xgboost\n#endif  // XGBOOST_COMMON_DETERMINISTIC_CUH_\n"
  },
  {
    "path": "src/common/device_compression.cu",
    "content": "/**\n * Copyright 2025, XGBoost contributors\n *\n * We use NVComp to perform compression and access the DE API directly for\n * decompression. Invoking the DE directly can help us avoid unnecessary kernal launches\n * and CUDA API calls and any potential blocking behaviours.\n */\n\n#include <cstddef>  // for size_t\n#include <cstdint>  // for uint8_t, uint32_t, int32_t\n#include <memory>   // for shared_ptr\n\n#include \"cuda_stream.h\"  // for StreamRef\n#include \"device_compression.cuh\"\n#include \"device_helpers.cuh\"  // for MemcpyBatchAsync\n#include \"xgboost/span.h\"      // for Span\n\n#if defined(XGBOOST_USE_NVCOMP)\n\n#include <nvcomp/snappy.h>   // for nvcompBatchedSnappyDecompressAsync\n#include <thrust/logical.h>  // for all_of\n#include <thrust/reduce.h>   // for reduce\n\n#include <algorithm>  // for transform, min\n#include <cstring>    // for memset\n#include <mutex>      // for once_flag, call_once\n#include <vector>     // for vector\n\n#include \"common.h\"               // for HumanMemUnit\n#include \"compressed_iterator.h\"  // for CompressedByteT\n#include \"cuda_context.cuh\"       // for CUDAContext\n#include \"cuda_dr_utils.h\"        // for GetGlobalCuDriverApi\n#include \"cuda_rt_utils.h\"        // for CurrentDevice\n#include \"device_compression.h\"\n#include \"device_vector.cuh\"      // for DeviceUVector\n#include \"nvtx_utils.h\"           // for xgboost_NVTX_FN_RANGE\n#include \"ref_resource_view.cuh\"  // for MakeFixedVecWithPinnedMemPool\n#include \"ref_resource_view.h\"    // for RefResourceView\n\nnamespace xgboost::dc {\nnamespace {\n// Parse snappy header\nXGBOOST_DEVICE std::uint32_t GetUncompressedSize(std::uint8_t const* src, std::size_t src_bytes,\n                                                 std::uint32_t* p_header_nbytes,\n                                                 std::int32_t* p_status) {\n  auto& n_bytes = *p_header_nbytes;\n  n_bytes = 0;\n\n  *p_status = 1;\n  std::uint32_t uncompressed_size = src[n_bytes++];\n  if (uncompressed_size > 0x7f) {\n    std::uint32_t c = (n_bytes < src_bytes) ? src[n_bytes++] : 0;\n    uncompressed_size = (uncompressed_size & 0x7f) | (c << 7);\n    if (uncompressed_size >= (0x80 << 7)) {\n      c = (n_bytes < src_bytes) ? src[n_bytes++] : 0;\n      uncompressed_size = (uncompressed_size & ((0x7f << 7) | 0x7f)) | (c << 14);\n      if (uncompressed_size >= (0x80 << 14)) {\n        c = (n_bytes < src_bytes) ? src[n_bytes++] : 0;\n        uncompressed_size = (uncompressed_size & ((0x7f << 14) | (0x7f << 7) | 0x7f)) | (c << 21);\n        if (uncompressed_size >= (0x80 << 21)) {\n          c = (n_bytes < src_bytes) ? src[n_bytes++] : 0;\n          if (c < 0x8) {\n            uncompressed_size =\n                (uncompressed_size & ((0x7f << 21) | (0x7f << 14) | (0x7f << 7) | 0x7f)) |\n                (c << 28);\n          } else {\n            *p_status = 0;\n          }\n        }\n      }\n    }\n  }\n\n  return uncompressed_size;\n}\n\nvoid FillDecompParams(void const* const* d_in_chunk_ptrs, std::size_t const* d_in_chunk_nbytes,\n                      common::Span<CUmemDecompressParams> de_params, size_t* d_act_nbytes,\n                      std::size_t const* d_out_chunk_nbytes, std::int32_t* statuses,\n                      curt::StreamRef stream) {\n  auto n_chunks = de_params.size();\n  dh::LaunchN(n_chunks, stream,\n              [d_in_chunk_ptrs, d_in_chunk_nbytes, d_out_chunk_nbytes, d_act_nbytes, de_params,\n               statuses, n_chunks] XGBOOST_DEVICE(std::size_t ix_chunk) {\n                std::size_t const dev_in_bytes = d_in_chunk_nbytes[ix_chunk];\n\n                // Parse the input buffer to determine the number of bytes to skip\n                // First byte with a 0 msb indicates no more bytes in the header\n                auto cur = reinterpret_cast<std::uint8_t const*>(d_in_chunk_ptrs[ix_chunk]);\n                std::uint32_t header_nbytes = 0;\n                std::uint32_t uncompressed_size =\n                    GetUncompressedSize(cur, dev_in_bytes, &header_nbytes, &statuses[ix_chunk]);\n                if (statuses[ix_chunk] == 0) {\n                  return;\n                }\n\n                de_params[ix_chunk].src = reinterpret_cast<const void*>(cur + header_nbytes);\n                de_params[ix_chunk].dst = nullptr;  // not know yet\n                de_params[ix_chunk].dstNumBytes = d_out_chunk_nbytes[ix_chunk];\n                d_act_nbytes[ix_chunk] = 0;\n                de_params[ix_chunk].dstActBytes =\n                    reinterpret_cast<cuuint32_t*>(&d_act_nbytes[ix_chunk]);\n                de_params[ix_chunk].srcNumBytes = dev_in_bytes - header_nbytes;\n                de_params[ix_chunk].algo = CU_MEM_DECOMPRESS_ALGORITHM_SNAPPY;\n                statuses[ix_chunk] = 1;\n              });\n}\n\nstruct ChkOp {\n  XGBOOST_DEVICE bool operator()(int s) { return s == 1; }\n};\n\nvoid CheckAlign(nvcompAlignmentRequirements_t alignment) {\n  CHECK_EQ(alignment.input, 1);\n  CHECK_EQ(alignment.output, 1);\n  CHECK_EQ(alignment.temp, 1);\n}\n\nvoid SafeNvComp(nvcompStatus_t status) {\n  if (status != nvcompSuccess) {\n    LOG(FATAL) << \"NVComp error:\" << static_cast<std::int32_t>(status);\n  }\n}\n}  // namespace\n\n[[nodiscard]] DeStatus const& GetGlobalDeStatus() {\n  std::once_flag static flag;\n  DeStatus static de;\n  std::call_once(flag, [&] {\n    // First check driver, we don't need to worry about mismatched libcuda version and rm\n    // version here. The first DE-enabled GPU requires >= 12.8 to work.\n    std::int32_t driver_version = 0;\n    dh::safe_cuda(cudaDriverGetVersion(&driver_version));\n    if (driver_version < 12080) {\n      return;\n    }\n\n    // Then check HW\n    auto device = curt::CurrentDevice();\n    std::int32_t mask = 0;\n    safe_cu(cudr::GetGlobalCuDriverApi().cuDeviceGetAttribute(\n        &mask, CU_DEVICE_ATTRIBUTE_MEM_DECOMPRESS_ALGORITHM_MASK, device));\n    de.avail = static_cast<bool>(mask);\n    if (!de.avail) {\n      return;\n    }\n\n    std::int32_t max_supported_size = 0;\n    // this refers to the output length of the decomp\n    safe_cu(cudr::GetGlobalCuDriverApi().cuDeviceGetAttribute(\n        &max_supported_size, CU_DEVICE_ATTRIBUTE_MEM_DECOMPRESS_MAXIMUM_LENGTH, device));\n    de.max_output_size = static_cast<std::size_t>(max_supported_size);\n    LOG(INFO) << \"The maximum supported size of the DE:\" << max_supported_size << std::endl;\n  });\n\n  return de;\n}\n\nSnappyDecomprMgrImpl::SnappyDecomprMgrImpl(curt::StreamRef s,\n                                           std::shared_ptr<HostPinnedMemPool> pool,\n                                           CuMemParams params,\n                                           common::Span<std::uint8_t const> in_compressed_data)\n    : n_dst_bytes{params.TotalDstBytes()} {\n  std::size_t n_chunks = params.size();\n  if (n_chunks == 0) {\n    return;\n  }\n\n  std::size_t last_in = 0, last_out = 0;\n\n  std::vector<void const*> in_chunk_ptrs(n_chunks);\n  std::vector<std::size_t> in_chunk_sizes(n_chunks);\n  std::vector<std::size_t> out_chunk_sizes(n_chunks);\n\n  dh::DeviceUVector<std::int32_t> status(n_chunks);\n  for (std::size_t i = 0; i < n_chunks; ++i) {\n    in_chunk_ptrs[i] = in_compressed_data.subspan(last_in, params[i].src_act_nbytes).data();\n    in_chunk_sizes[i] = params[i].src_act_nbytes;\n    out_chunk_sizes[i] = params[i].dst_nbytes;\n\n    last_in += params[i].src_nbytes;\n    last_out += params[i].dst_nbytes;\n  }\n  CHECK_EQ(this->n_dst_bytes, last_out);\n\n  // copy to d\n  dh::CopyTo(in_chunk_ptrs, &this->d_in_chunk_ptrs, s);\n  dh::CopyTo(in_chunk_sizes, &this->d_in_chunk_sizes, s);\n  dh::CopyTo(out_chunk_sizes, &this->d_out_chunk_sizes, s);\n  this->act_nbytes.resize(n_chunks, 0);\n\n  this->de_params = common::MakeFixedVecWithPinnedMemPool<decltype(this->de_params)::value_type>(\n      pool, n_chunks, s);\n  for (std::size_t i = 0; i < n_chunks; ++i) {\n    std::memset(this->de_params.data() + i, 0, sizeof(CUmemDecompressParams));\n  }\n\n  FillDecompParams(d_in_chunk_ptrs.data().get(), d_in_chunk_sizes.data().get(), de_params.ToSpan(),\n                   this->act_nbytes.data().get(), d_out_chunk_sizes.data().get(), status.data(), s);\n  dh::XGBCachingDeviceAllocator<char> alloc;\n  bool valid = thrust::all_of(thrust::cuda::par_nosync(alloc).on(s), status.cbegin(), status.cend(),\n                              ChkOp{});\n  CHECK(valid);\n\n  auto max_supported_size = GetGlobalDeStatus().max_output_size;\n  auto max_chunk_size = *std::max_element(out_chunk_sizes.cbegin(), out_chunk_sizes.cend());\n  if (GetGlobalDeStatus().avail) {\n    CHECK_GE(max_supported_size, max_chunk_size);\n  }\n\n  this->de_params_copy =\n      common::MakeFixedVecWithPinnedMemPool<decltype(this->de_params)::value_type>(pool, n_chunks,\n                                                                                   s);\n}\n\ncommon::Span<CUmemDecompressParams> SnappyDecomprMgrImpl::GetParams(\n    common::Span<common::CompressedByteT> out) {\n  xgboost_NVTX_FN_RANGE_C(3, 252, 198);\n  if (this->de_params.empty()) {\n    return {};\n  }\n  auto n_chunks = this->de_params.size();\n  CHECK(!this->de_params_copy.empty());\n  // Set the output buffers.\n  std::size_t last_out = 0;\n  for (std::size_t i = 0; i < n_chunks; ++i) {\n    this->de_params_copy[i] = this->de_params[i];\n    this->de_params_copy[i].dst = out.subspan(last_out, de_params[i].dstNumBytes).data();\n    last_out += de_params[i].dstNumBytes;\n  }\n\n  return this->de_params_copy.ToSpan();\n}\n\n[[nodiscard]] bool SnappyDecomprMgrImpl::Empty() const {\n#if defined(CUDA_HW_DECOM_AVAILABLE)\n  return this->de_params.empty();\n#else\n  return true;\n#endif\n}\n\nSnappyDecomprMgr::SnappyDecomprMgr() : pimpl_{std::make_unique<SnappyDecomprMgrImpl>()} {}\nSnappyDecomprMgr::SnappyDecomprMgr(SnappyDecomprMgr&& that) = default;\nSnappyDecomprMgr& SnappyDecomprMgr::operator=(SnappyDecomprMgr&& that) = default;\n\nSnappyDecomprMgr::~SnappyDecomprMgr() = default;\n\n[[nodiscard]] bool SnappyDecomprMgr::Empty() const { return this->Impl()->Empty(); }\n\n[[nodiscard]] std::size_t SnappyDecomprMgr::DecompressedBytes() const {\n  return this->Impl()->n_dst_bytes;\n}\n\nSnappyDecomprMgrImpl* SnappyDecomprMgr::Impl() const { return this->pimpl_.get(); }\n\nvoid DecompressSnappy(curt::StreamRef stream, SnappyDecomprMgr const& mgr,\n                      common::Span<common::CompressedByteT> out, bool allow_fallback) {\n  xgboost_NVTX_FN_RANGE();\n  auto mgr_impl = mgr.Impl();\n  auto params = mgr_impl->GetParams(out);\n  if (params.empty()) {\n    CHECK(out.empty());\n    return;\n  }\n  if (GetGlobalDeStatus().avail &&\n      cudr::GetGlobalCuDriverApi().cuMemBatchDecompressAsync != nullptr) {\n    // Invoke the DE.\n#if defined(CUDA_HW_DECOM_AVAILABLE)\n    std::size_t error_index;\n    safe_cu(cudr::GetGlobalCuDriverApi().cuMemBatchDecompressAsync(\n        params.data(), params.size(), 0 /*unused*/, &error_index, stream));\n#else\n    static_assert(false, \"`cuMemBatchDecompressAsync` requires CUDA >= 12.8.\")\n#endif  // defined(CUDA_HW_DECOM_AVAILABLE)\n  } else {\n    // Fallback to nvcomp. This is only used during tests where we don't have access to DE\n    // but still want the test coverage.\n    CHECK(allow_fallback);\n    nvcompAlignmentRequirements_t decompression_alignment_reqs;\n    SafeNvComp(nvcompBatchedSnappyDecompressGetRequiredAlignments(\n        nvcompBatchedSnappyDecompressDefaultOpts, &decompression_alignment_reqs));\n    CheckAlign(decompression_alignment_reqs);\n    auto n_chunks = mgr_impl->Chunks();\n    // Get sketch space\n    std::size_t n_tmp_bytes = 0;\n    SafeNvComp(nvcompBatchedSnappyDecompressGetTempSizeAsync(\n        n_chunks, /*max_uncompressed_chunk_bytes=*/0, nvcompBatchedSnappyDecompressDefaultOpts,\n        &n_tmp_bytes,\n        /*max_total_uncompressed_bytes=*/0));\n    dh::device_vector<char> tmp(n_tmp_bytes, 0);\n\n    dh::device_vector<nvcompStatus_t> status(n_chunks, nvcompSuccess);\n\n    // Build output vector\n    std::vector<void*> h_out_ptrs(n_chunks);\n    std::transform(params.cbegin(), params.cend(), h_out_ptrs.begin(),\n                   [](auto const& p) { return p.dst; });\n    dh::device_vector<void*> d_out_ptrs(n_chunks);\n    dh::safe_cuda(cudaMemcpyAsync(d_out_ptrs.data().get(), h_out_ptrs.data(),\n                                  dh::ToSpan(d_out_ptrs).size_bytes(), cudaMemcpyDefault, stream));\n    // Run nvcomp\n    SafeNvComp(nvcompBatchedSnappyDecompressAsync(\n        mgr_impl->d_in_chunk_ptrs.data().get(), mgr_impl->d_in_chunk_sizes.data().get(),\n        mgr_impl->d_out_chunk_sizes.data().get(), mgr_impl->act_nbytes.data().get(), n_chunks,\n        tmp.data().get(), n_tmp_bytes, d_out_ptrs.data().get(),\n        nvcompBatchedSnappyDecompressDefaultOpts, status.data().get(), stream));\n  }\n}\n\n[[nodiscard]] CuMemParams CompressSnappy(Context const* ctx,\n                                         common::Span<common::CompressedByteT const> in,\n                                         dh::DeviceUVector<std::uint8_t>* p_out,\n                                         std::size_t chunk_size) {\n  CHECK_GT(chunk_size, 0);\n  auto cuctx = ctx->CUDACtx();\n  auto nvcomp_batched_snappy_opts = nvcompBatchedSnappyCompressDefaultOpts;\n\n  nvcompAlignmentRequirements_t compression_alignment_reqs;\n  SafeNvComp(nvcompBatchedSnappyCompressGetRequiredAlignments(nvcomp_batched_snappy_opts,\n                                                              &compression_alignment_reqs));\n  CheckAlign(compression_alignment_reqs);\n\n  /**\n   * Inputs\n   */\n  std::size_t n_chunks = (in.size() + chunk_size - 1) / chunk_size;\n  if (n_chunks == 0) {\n    p_out->clear();\n    return {};\n  }\n  std::size_t last = 0;\n\n  std::vector<common::CompressedByteT const*> h_in_ptrs(n_chunks);\n  std::vector<std::size_t> h_in_sizes(n_chunks);\n  for (std::size_t i = 0; i < n_chunks; ++i) {\n    auto n = std::min(chunk_size, in.size() - last);\n    auto chunk = in.subspan(last, n);\n    last += n;\n\n    h_in_sizes[i] = chunk.size();\n    h_in_ptrs[i] = chunk.data();\n  }\n  CHECK_EQ(last, in.size());\n\n  dh::DeviceUVector<void const*> in_ptrs(h_in_ptrs.size());\n  dh::safe_cuda(cudaMemcpyAsync(in_ptrs.data(), h_in_ptrs.data(),\n                                common::Span{h_in_ptrs}.size_bytes(), cudaMemcpyDefault,\n                                cuctx->Stream()));\n  dh::DeviceUVector<std::size_t> in_sizes(h_in_sizes.size());\n  dh::safe_cuda(cudaMemcpyAsync(in_sizes.data(), h_in_sizes.data(),\n                                common::Span{h_in_sizes}.size_bytes(), cudaMemcpyDefault,\n                                cuctx->Stream()));\n\n  CHECK_EQ(n_chunks, in_sizes.size());\n  std::size_t max_in_nbytes = *std::max_element(h_in_sizes.cbegin(), h_in_sizes.cend());\n\n  /**\n   * Outputs\n   */\n  std::size_t comp_temp_bytes;\n  SafeNvComp(nvcompBatchedSnappyCompressGetTempSizeAsync(\n      n_chunks, chunk_size, nvcomp_batched_snappy_opts, &comp_temp_bytes,\n      /*max_total_uncompressed_bytes=*/in.size()));\n  CHECK_EQ(comp_temp_bytes, 0);\n  dh::DeviceUVector<char> comp_tmp(comp_temp_bytes);\n\n  std::size_t max_out_nbytes = 0;\n  SafeNvComp(nvcompBatchedSnappyCompressGetMaxOutputChunkSize(\n      std::min(max_in_nbytes, chunk_size), nvcomp_batched_snappy_opts, &max_out_nbytes));\n  p_out->resize(max_out_nbytes * n_chunks);\n  std::vector<void*> h_out_ptrs(n_chunks);\n  std::vector<std::size_t> h_out_sizes(n_chunks);\n  auto s_out = dh::ToSpan(*p_out);\n  for (std::size_t i = 0; i < n_chunks; ++i) {\n    auto chunk = s_out.subspan(max_out_nbytes * i, max_out_nbytes);\n    h_out_ptrs[i] = chunk.data();\n    h_out_sizes[i] = chunk.size();\n  }\n  dh::DeviceUVector<void*> out_ptrs(h_out_ptrs.size());\n  dh::safe_cuda(cudaMemcpyAsync(out_ptrs.data(), h_out_ptrs.data(),\n                                common::Span{h_out_ptrs}.size_bytes(), cudaMemcpyDefault));\n  dh::DeviceUVector<std::size_t> out_sizes(h_out_sizes.size());\n  dh::safe_cuda(cudaMemcpyAsync(out_sizes.data(), h_out_sizes.data(),\n                                common::Span{h_out_sizes}.size_bytes(), cudaMemcpyDefault));\n\n  /**\n   * Compress\n   */\n  SafeNvComp(nvcompBatchedSnappyCompressAsync(\n      in_ptrs.data(), in_sizes.data(), max_in_nbytes, n_chunks, comp_tmp.data(), comp_temp_bytes,\n      out_ptrs.data(), out_sizes.data(), nvcomp_batched_snappy_opts, /*device_statuses=*/nullptr,\n      cuctx->Stream()));\n  auto n_bytes = thrust::reduce(cuctx->CTP(), out_sizes.cbegin(), out_sizes.cend());\n  auto n_total_bytes = p_out->size();\n  auto ratio = static_cast<double>(n_total_bytes) / in.size_bytes();\n  auto ratio_act = static_cast<double>(n_bytes) / in.size_bytes();\n  LOG(DEBUG) << \"[snappy] Input: \" << common::HumanMemUnit(in.size_bytes())\n             << \", need:\" << common::HumanMemUnit(n_bytes)\n             << \", allocated:\" << common::HumanMemUnit(n_total_bytes) << \", ratio:\" << ratio\n             << \", actual ratio:\" << ratio_act;\n\n  /**\n   * Meta\n   */\n  CuMemParams params(n_chunks);\n  std::vector<std::size_t> h_act_nbytes(out_sizes.size());\n  dh::safe_cuda(cudaMemcpyAsync(h_act_nbytes.data(), out_sizes.data(),\n                                common::Span{h_out_sizes}.size_bytes(), cudaMemcpyDefault,\n                                cuctx->Stream()));\n  for (std::size_t i = 0; i < n_chunks; ++i) {\n    auto& p = params[i];\n    p.src_nbytes = h_out_sizes[i];\n    p.src_act_nbytes = h_act_nbytes[i];\n    p.dst_nbytes = h_in_sizes[i];\n    p.algo = ComprParam::kSnappy;\n  }\n  return params;\n}\n\n[[nodiscard]] common::RefResourceView<std::uint8_t> CoalesceCompressedBuffersToHost(\n    curt::StreamRef stream, std::shared_ptr<HostPinnedMemPool> pool, CuMemParams const& in_params,\n    dh::DeviceUVector<std::uint8_t> const& in_buf, CuMemParams* p_out) {\n  std::size_t n_total_act_bytes = in_params.TotalSrcActBytes();\n  std::size_t n_total_bytes = in_params.TotalSrcBytes();\n  if (n_total_bytes == 0) {\n    CHECK_EQ(n_total_act_bytes, 0);\n    p_out->resize(0);\n    return {};\n  }\n  // copy from device buffer to the host cache.\n  CHECK_EQ(n_total_bytes, in_buf.size());\n  CHECK(pool);\n  auto c_page =\n      common::MakeFixedVecWithPinnedMemPool<std::remove_reference_t<decltype(in_buf)>::value_type>(\n          pool, n_total_act_bytes, stream);\n  std::vector<std::uint8_t const*> srcs(in_params.size());\n  std::vector<std::uint8_t*> dsts(in_params.size());\n  std::vector<std::size_t> sizes(in_params.size());\n\n  decltype(srcs)::value_type sptr = in_buf.data();\n  decltype(dsts)::value_type dptr = c_page.data();\n\n  for (std::size_t i = 0; i < in_params.size(); ++i) {\n    CHECK_LE(in_params[i].src_act_nbytes, in_params[i].src_nbytes);\n    sizes[i] = in_params[i].src_act_nbytes;\n\n    srcs[i] = sptr;\n    dsts[i] = dptr;\n\n    sptr += in_params[i].src_nbytes;\n    dptr += in_params[i].src_act_nbytes;\n  }\n  std::size_t fail_idx = 0;\n  dh::safe_cuda(dh::MemcpyBatchAsync<cudaMemcpyDeviceToHost>(dsts.data(), srcs.data(), sizes.data(),\n                                                             in_params.size(), &fail_idx, stream));\n\n  auto& out_params = *p_out;\n  out_params.resize(in_params.size());\n  for (std::size_t i = 0; i < in_params.size(); ++i) {\n    out_params[i].algo = in_params[i].algo;\n    out_params[i].dst_nbytes = in_params[i].dst_nbytes;\n    out_params[i].src_nbytes = in_params[i].src_act_nbytes;  // change to act\n    out_params[i].src_act_nbytes = in_params[i].src_act_nbytes;\n  }\n  return c_page;\n}\n}  // namespace xgboost::dc\n\n#else\n\nnamespace xgboost::dc {\n// Impl\nSnappyDecomprMgrImpl::SnappyDecomprMgrImpl(curt::StreamRef,\n                                           std::shared_ptr<common::cuda_impl::HostPinnedMemPool>,\n                                           CuMemParams,\n                                           common::Span<common::CompressedByteT const>) {}\n\n// SnappyDecomprMgr\nSnappyDecomprMgr::SnappyDecomprMgr() = default;\nSnappyDecomprMgr::SnappyDecomprMgr(SnappyDecomprMgr&& that) = default;\nSnappyDecomprMgr& SnappyDecomprMgr::operator=(SnappyDecomprMgr&& that) = default;\nSnappyDecomprMgr::~SnappyDecomprMgr() = default;\nSnappyDecomprMgrImpl* SnappyDecomprMgr::Impl() const { return nullptr; }\n\n[[nodiscard]] bool SnappyDecomprMgr::Empty() const { return true; }\n[[nodiscard]] std::size_t SnappyDecomprMgr::DecompressedBytes() const { return 0; }\n\n// Round-trip compression\nvoid DecompressSnappy(curt::StreamRef, SnappyDecomprMgr const&,\n                      common::Span<common::CompressedByteT>, bool) {\n  common::AssertNvCompSupport();\n}\n\n[[nodiscard]] CuMemParams CompressSnappy(Context const*,\n                                         common::Span<common::CompressedByteT const> in,\n                                         dh::DeviceUVector<std::uint8_t>*, std::size_t) {\n  if (in.empty()) {\n    return {};\n  }\n  common::AssertNvCompSupport();\n  return {};\n}\n\n[[nodiscard]] common::RefResourceView<std::uint8_t> CoalesceCompressedBuffersToHost(\n    curt::StreamRef, std::shared_ptr<HostPinnedMemPool>, CuMemParams const& in_params,\n    dh::DeviceUVector<std::uint8_t> const&, CuMemParams*) {\n  std::size_t n_total_bytes = in_params.TotalSrcBytes();\n  if (n_total_bytes == 0) {\n    return {};\n  }\n  common::AssertNvCompSupport();\n  return {};\n}\n\n[[nodiscard]] DeStatus const& GetGlobalDeStatus() {\n  static thread_local DeStatus de;\n  return de;\n}\n}  // namespace xgboost::dc\n\n#endif  // defined(XGBOOST_USE_NVCOMP)\n"
  },
  {
    "path": "src/common/device_compression.cuh",
    "content": "/**\n * Copyright 2025, XGBoost contributors\n */\n#pragma once\n\n#include <cstddef>  // for size_t\n#include <cstdint>  // for uint8_t\n\n#include \"compressed_iterator.h\"    // for CompressedByteT\n#include \"cuda_dr_utils.h\"          // for CUDA_HW_DECOM_AVAILABLE\n#include \"cuda_pinned_allocator.h\"  // for HostPinnedMemPool\n#include \"cuda_stream.h\"            // for StreamRef\n#include \"device_compression.h\"     // for CuMemParams\n#include \"device_vector.cuh\"        // for DeviceUVector\n#include \"ref_resource_view.h\"      // for RefResourceView\n#include \"xgboost/span.h\"           // for Span\n\nnamespace xgboost::dc {\n\nusing HostPinnedMemPool = common::cuda_impl::HostPinnedMemPool;\n\n/**\n * @brief Use nvcomp to compress the data.\n *\n * @param ctx Context, provides the CUDA stream and execution policy.\n * @param in  Input buffer, data to be compressed\n * @param p_out Output buffer, storing comprssed data.\n * @param chunk_size The number of bytes for each chunk.\n */\n[[nodiscard]] CuMemParams CompressSnappy(Context const* ctx,\n                                         common::Span<common::CompressedByteT const> in,\n                                         dh::DeviceUVector<std::uint8_t>* p_out,\n                                         std::size_t chunk_size);\n/**\n * @brief Run decompression with meta data cached in a mgr object.\n *\n * @param stream CUDA stream, it should be an asynchronous stream.\n * @param mgr Cache for decompression-related data.\n * @param out Pre-allocated output buffer based on the @ref CuMemParams returned from\n *   compression.\n * @param allow_fallback Allow fallback to nvcomp implementation if hardware accelerated\n *   implementation is not available. Used for testing.\n */\nvoid DecompressSnappy(curt::StreamRef stream, SnappyDecomprMgr const& mgr,\n                      common::Span<common::CompressedByteT> out, bool allow_fallback);\n\n/**\n * @brief Coalesce the compressed chunks into a contiguous host pinned buffer.\n *\n * @param stream CUDA stream.\n * @param pool Pinned memory pool for storing the results.\n * @param in_params Params from @ref CompressSnappy, specifies the chunks.\n * @param in_buf The buffer storing compressed chunks.\n * @param p_out Re-newed parameters to keep track of the buffers.\n */\n[[nodiscard]] common::RefResourceView<std::uint8_t> CoalesceCompressedBuffersToHost(\n    curt::StreamRef stream, std::shared_ptr<HostPinnedMemPool> pool,\n    CuMemParams const& in_params, dh::DeviceUVector<std::uint8_t> const& in_buf,\n    CuMemParams* p_out);\n\n// We store decompression parameters in struct of vectors. This is due to nvcomp works\n// with this format. But the CUDA driver works with vector of structs. We can optimize\n// toward the driver decompression function if the overhead is significant (too many\n// chunks).\nstruct SnappyDecomprMgrImpl {\n  std::size_t n_dst_bytes{0};\n  // src of the CUmemDecompressParams\n  dh::device_vector<void const*> d_in_chunk_ptrs;\n  // srcNumBytes of the CUmemDecompressParams\n  dh::device_vector<std::size_t> d_in_chunk_sizes;\n  // dstNumBytes of the CUmemDecompressParams\n  dh::device_vector<std::size_t> d_out_chunk_sizes;\n  // dstActBytes of the CUmemDecompressParams\n  dh::device_vector<std::size_t> act_nbytes;\n\n#if defined(CUDA_HW_DECOM_AVAILABLE)\n  using DeParams = common::RefResourceView<CUmemDecompressParams>;\n  DeParams de_params;\n  DeParams de_params_copy;\n#endif  // defined(CUDA_HW_DECOM_AVAILABLE)\n\n  [[nodiscard]] std::size_t Chunks() const {\n#if defined(CUDA_HW_DECOM_AVAILABLE)\n    return de_params.size();\n#else\n    LOG(FATAL) << \"CUDA >= 12.8 is required.\";\n    return 0;\n#endif  // defined(CUDA_HW_DECOM_AVAILABLE)\n  }\n\n  SnappyDecomprMgrImpl(curt::StreamRef s, std::shared_ptr<HostPinnedMemPool> pool,\n                       CuMemParams params, common::Span<std::uint8_t const> in_compressed_data);\n\n#if defined(CUDA_HW_DECOM_AVAILABLE) && defined(XGBOOST_USE_NVCOMP)\n  common::Span<CUmemDecompressParams> GetParams(common::Span<common::CompressedByteT> out);\n#endif  // defined(CUDA_HW_DECOM_AVAILABLE)\n\n  // big 5\n  SnappyDecomprMgrImpl() = default;\n  SnappyDecomprMgrImpl(SnappyDecomprMgrImpl const& that) = delete;\n  SnappyDecomprMgrImpl(SnappyDecomprMgrImpl&& that) = default;\n  SnappyDecomprMgrImpl& operator=(SnappyDecomprMgrImpl const&) = delete;\n  SnappyDecomprMgrImpl& operator=(SnappyDecomprMgrImpl&&) = default;\n\n  [[nodiscard]] bool Empty() const;\n};\n\n#if defined(XGBOOST_USE_NVCOMP)\n[[nodiscard]] inline auto MakeSnappyDecomprMgr(\n    curt::StreamRef s, std::shared_ptr<HostPinnedMemPool> pool, CuMemParams params,\n    common::Span<std::uint8_t const> in_compressed_data) {\n  SnappyDecomprMgr mgr;\n  *mgr.Impl() = SnappyDecomprMgrImpl{s, std::move(pool), std::move(params), in_compressed_data};\n  return mgr;\n}\n#else\n[[nodiscard]] inline auto MakeSnappyDecomprMgr(curt::StreamRef,\n                                               std::shared_ptr<HostPinnedMemPool>, CuMemParams,\n                                               common::Span<std::uint8_t const>) {\n  SnappyDecomprMgr mgr;\n  return mgr;\n}\n#endif  // defined(XGBOOST_USE_NVCOMP)\n}  // namespace xgboost::dc\n"
  },
  {
    "path": "src/common/device_compression.h",
    "content": "/**\n * Copyright 2025, XGBoost contributors\n *\n * @brief Implement (de)compression with the help of nvcomp and the HW decompression engine.\n */\n#pragma once\n\n#include <cstddef>  // for size_t\n#include <numeric>  // for accumulate\n#include <vector>   // for vector\n\n#include \"transform_iterator.h\"  // for MakeIndexTransformIter\n\n#if defined(XGBOOST_USE_NVCOMP)\n\n#include <memory>  // for unique_ptr\n\n#endif  // defined(XGBOOST_USE_NVCOMP)\n\nnamespace xgboost::dc {\n/**\n * The cuda driver @ref CUmemDecompressParams struct without the pointers. We use this\n * struct to keep track of various buffer sizes. Naming of member variables follows the\n * CUDA struct.\n *\n * The src_nbytes stores the size of the allocated buffer for compressed data, and the\n * src_act_nbytes stores the actual size of the compressed data, which must be smaller\n * than the allocated size (src_nbytes). The nvcomp API over-allocate for compression.\n */\nstruct ComprParam {\n  enum Algo {\n    kLz4 = 0,\n    kGDefalte = 1,\n    kSnappy = 2,  // the only supported one at the moment.\n  };\n\n  // Compressed buffer bytes\n  std::size_t src_nbytes = 0;\n  // Actual compressed bytes\n  std::size_t src_act_nbytes = 0;\n  // Decompressed bytes.\n  std::size_t dst_nbytes = 0;\n  Algo algo;\n};\n\n/**\n * @brief A wrapper around vector of @ref ComprParam to help manage the chunks.\n */\nstruct CuMemParams {\n  std::vector<ComprParam> params;\n\n  CuMemParams() = default;\n  CuMemParams(CuMemParams const& that) = default;\n  CuMemParams(CuMemParams&& that) = default;\n  CuMemParams& operator=(CuMemParams&& that) = default;\n  CuMemParams& operator=(CuMemParams const& that) = default;\n\n  explicit CuMemParams(std::size_t n_chunks) : params(n_chunks) {}\n\n  ComprParam const& operator[](std::size_t i) const { return this->params[i]; }\n  ComprParam& operator[](std::size_t i) { return this->params[i]; }\n  ComprParam& at(std::size_t i) { return this->params.at(i); }              // NOLINT\n  ComprParam const& at(std::size_t i) const { return this->params.at(i); }  // NOLINT\n  void resize(std::size_t n) { this->params.resize(n); }                    // NOLINT\n\n  [[nodiscard]] auto cbegin() const { return this->params.cbegin(); }  // NOLINT\n  [[nodiscard]] auto cend() const { return this->params.cend(); }      // NOLINT\n\n  [[nodiscard]] auto begin() const { return this->params.begin(); }  // NOLINT\n  [[nodiscard]] auto end() const { return this->params.end(); }      // NOLINT\n  [[nodiscard]] auto begin() { return this->params.begin(); }        // NOLINT\n  [[nodiscard]] auto end() { return this->params.end(); }            // NOLINT\n\n  [[nodiscard]] std::size_t size() const { return this->params.size(); }  // NOLINT\n  [[nodiscard]] bool empty() const { return this->params.empty(); }       // NOLINT\n  [[nodiscard]] auto data() const { return this->params.data(); }         // NOLINT\n\n  [[nodiscard]] std::size_t TotalSrcBytes() const {\n    auto it = common::MakeIndexTransformIter(\n        [this](std::size_t i) { return this->params[i].src_nbytes; });\n    return std::accumulate(it, it + this->size(), static_cast<std::size_t>(0));\n  }\n  [[nodiscard]] std::size_t TotalSrcActBytes() const {\n    auto it = common::MakeIndexTransformIter(\n        [this](std::size_t i) { return this->params[i].src_act_nbytes; });\n    return std::accumulate(it, it + this->size(), static_cast<std::size_t>(0));\n  }\n  [[nodiscard]] std::size_t TotalDstBytes() const {\n    auto it = common::MakeIndexTransformIter(\n        [this](std::size_t i) { return this->params[i].dst_nbytes; });\n    return std::accumulate(it, it + this->size(), static_cast<std::size_t>(0));\n  }\n};\n\nclass SnappyDecomprMgrImpl;\n\n/**\n * @brief Help create and cache all decompression related meta data.\n *\n *   This struct is exposed to the CPU code. As a result, it's just a reference to the\n *   @SnappyDecomprMgrImpl .\n */\nclass SnappyDecomprMgr {\n public:\n  SnappyDecomprMgr();\n  SnappyDecomprMgr(SnappyDecomprMgr const& that) = delete;\n  SnappyDecomprMgr(SnappyDecomprMgr&& that);\n  SnappyDecomprMgr& operator=(SnappyDecomprMgr const& that) = delete;\n  SnappyDecomprMgr& operator=(SnappyDecomprMgr&& that);\n\n  ~SnappyDecomprMgr();\n\n  SnappyDecomprMgrImpl* Impl() const;\n\n  [[nodiscard]] bool Empty() const;\n  /**\n   * @brief The number of bytes of the uncompressed data.\n   */\n  [[nodiscard]] std::size_t DecompressedBytes() const;\n\n private:\n  // Hide the CUDA API calls.\n#if defined(XGBOOST_USE_NVCOMP)\n  std::unique_ptr<SnappyDecomprMgrImpl> pimpl_;\n#endif  // defined(XGBOOST_USE_NVCOMP)\n};\n\nstruct DeStatus {\n  bool avail{false};               // Whether the DE is present\n  std::size_t max_output_size{0};  // Maximum output size of the buffer\n};\n\n// Get the query result of DE stored in a global variable.\n[[nodiscard]] DeStatus const& GetGlobalDeStatus();\n}  // namespace xgboost::dc\n"
  },
  {
    "path": "src/common/device_debug.cuh",
    "content": "/**\n * Copyright 2025, XGBoost contributors\n */\n#pragma once\n\n#include <cstddef>   // for size_t\n#include <iostream>  // for cout\n#include <vector>    // for vector\n\n#include \"common.h\"\n#include \"device_helpers.cuh\"     // for CopyDeviceSpanToVector\n#include \"xgboost/span.h\"         // for Span\n#include \"xgboost/string_view.h\"  // for StringView\n\nnamespace xgboost::debug {\n// debug::SyncDevice(__FILE__, __LINE__);\ninline void SyncDevice(char const *file = __builtin_FILE(), int32_t line = __builtin_LINE()) {\n  {\n    auto err = cudaDeviceSynchronize();\n    dh::ThrowOnCudaError(err, file, line);\n  }\n  {\n    auto err = cudaGetLastError();\n    dh::ThrowOnCudaError(err, file, line);\n  }\n}\n\ntemplate <typename T>\nvoid PrintDeviceSpan(common::Span<T> values, StringView name) {\n  std::cout << name << std::endl;\n  std::vector<std::remove_cv_t<T>> h_values(values.size());\n  dh::CopyDeviceSpanToVector(&h_values, values);\n  for (std::size_t i = 0; i < values.size(); ++i) {\n    if (i != 0 && i % 16 == 0) {\n      std::cout << std::endl;\n    }\n    std::cout << h_values[i] << \", \";\n  }\n  std::cout << std::endl;\n}\n}  // namespace xgboost::debug\n"
  },
  {
    "path": "src/common/device_helpers.cu",
    "content": "/**\n * Copyright 2024-2025, XGBoost contributors\n */\n#include \"../common/cuda_dr_utils.h\"  // for GetVersionFromSmi\n#include \"device_helpers.cuh\"\n#include \"device_vector.cuh\"  // for GrowOnlyVirtualMemVec\n#include \"xgboost/windefs.h\"  // for xgboost_IS_WIN\n\nnamespace dh {\nnamespace {\n[[nodiscard]] bool IsSupportedDrVer(std::int32_t major, std::int32_t minor) {\n  return major > 12 || (major == 12 && minor >= 5);\n}\n\n// Check whether cuda virtual memory can be used.\n// Host NUMA allocation requires driver that supports CTK >= 12.5 to be stable\n[[nodiscard]] bool CheckVmAlloc() {\n  std::int32_t major{0}, minor{0};\n  xgboost::curt::GetDrVersionGlobal(&major, &minor);\n\n  bool vm_flag = true;\n  if (IsSupportedDrVer(major, minor)) {\n    // The result from the driver api is not reliable. The system driver might not match\n    // the CUDA driver in some obscure cases.\n    //\n    // https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html\n    // Ver                 Linux       Win\n    // CUDA 12.5 Update 1  >=555.42.06 >=555.85\n    // CUDA 12.5 GA        >=555.42.02 >=555.85\n    vm_flag = xgboost::cudr::GetVersionFromSmiGlobal(&major, &minor) && major >= 555;\n  } else {\n    vm_flag = false;\n  }\n  return vm_flag;\n}\n}  // namespace\n\nPinnedMemory::PinnedMemory() {\n#if defined(xgboost_IS_WIN)\n  this->impl_.emplace<detail::GrowOnlyPinnedMemoryImpl>();\n#else\n  if (CheckVmAlloc()) {\n    this->impl_.emplace<detail::GrowOnlyVirtualMemVec>(CU_MEM_LOCATION_TYPE_HOST_NUMA);\n  } else {\n    this->impl_.emplace<detail::GrowOnlyPinnedMemoryImpl>();\n  }\n#endif\n}\n}  // namespace dh\n"
  },
  {
    "path": "src/common/device_helpers.cuh",
    "content": "/**\n * Copyright 2017-2026, XGBoost contributors\n */\n#pragma once\n#include <thrust/binary_search.h>                       // thrust::upper_bound\n#include <thrust/device_ptr.h>                          // for device_ptr\n#include <thrust/device_vector.h>                       // for device_vector\n#include <thrust/execution_policy.h>                    // thrust::seq\n#include <thrust/iterator/discard_iterator.h>           // for discard_iterator\n#include <thrust/iterator/reverse_iterator.h>           // for make_reverse_iterator\n#include <thrust/iterator/transform_output_iterator.h>  // make_transform_output_iterator\n#include <thrust/system/cuda/error.h>\n#include <thrust/system_error.h>\n#include <thrust/unique.h>\n\n#include <algorithm>\n#include <cstddef>  // for size_t\n#include <cub/cub.cuh>\n#include <cub/util_type.cuh>  // for UnitWord, DoubleBuffer\n#include <cuda/std/iterator>  // for iterator_traits\n#include <cuda/std/utility>   // for pair\n#include <functional>         // for equal_to\n#include <variant>            // for variant, visit\n#include <vector>             // for vector\n\n#include \"common.h\"\n#include \"cuda_rt_utils.h\"  // for GetNumaId, CurrentDevice\n#include \"cuda_stream.h\"    // for Stream\n#include \"device_vector.cuh\"\n#include \"xgboost/host_device_vector.h\"\n#include \"xgboost/logging.h\"\n#include \"xgboost/span.h\"\n\n#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 || defined(__clang__)\n\n#else  // In device code and CUDA < 600\n__device__ __forceinline__ double atomicAdd(double* address, double val) {  // NOLINT\n  unsigned long long int* address_as_ull =\n      (unsigned long long int*)address;                   // NOLINT\n  unsigned long long int old = *address_as_ull, assumed;  // NOLINT\n\n  do {\n    assumed = old;\n    old = atomicCAS(address_as_ull, assumed,\n                    __double_as_longlong(val + __longlong_as_double(assumed)));\n\n    // Note: uses integer comparison to avoid hang in case of NaN (since NaN !=\n    // NaN)\n  } while (assumed != old);\n\n  return __longlong_as_double(old);\n}\n#endif\n\nnamespace dh {\n\n// FIXME(jiamingy): Remove this once we get rid of cub submodule.\nconstexpr bool BuildWithCUDACub() {\n#if defined(THRUST_IGNORE_CUB_VERSION_CHECK) && THRUST_IGNORE_CUB_VERSION_CHECK == 1\n  return false;\n#else\n  return true;\n#endif // defined(THRUST_IGNORE_CUB_VERSION_CHECK) && THRUST_IGNORE_CUB_VERSION_CHECK == 1\n}\n\nnamespace detail {\ntemplate <size_t size>\nstruct AtomicDispatcher;\n\ntemplate <>\nstruct AtomicDispatcher<sizeof(uint32_t)> {\n  using Type = unsigned int;  // NOLINT\n  static_assert(sizeof(Type) == sizeof(uint32_t), \"Unsigned should be of size 32 bits.\");\n};\n\ntemplate <>\nstruct AtomicDispatcher<sizeof(uint64_t)> {\n  using Type = unsigned long long;  // NOLINT\n  static_assert(sizeof(Type) == sizeof(uint64_t), \"Unsigned long long should be of size 64 bits.\");\n};\n}  // namespace detail\n}  // namespace dh\n\n// atomicAdd is not defined for size_t.\ntemplate <typename T = size_t,\n          std::enable_if_t<std::is_same_v<size_t, T> &&\n                           !std::is_same_v<size_t, unsigned long long>> * =  // NOLINT\n              nullptr>\nXGBOOST_DEV_INLINE T atomicAdd(T *addr, T v) {  // NOLINT\n  using Type = typename dh::detail::AtomicDispatcher<sizeof(T)>::Type;\n  Type ret = ::atomicAdd(reinterpret_cast<Type *>(addr), static_cast<Type>(v));\n  return static_cast<T>(ret);\n}\nnamespace dh {\n\ninline int32_t CudaGetPointerDevice(void const *ptr) {\n  if (!ptr) {\n    return -1;\n  }\n  int32_t device = -1;\n  cudaPointerAttributes attr;\n  dh::safe_cuda(cudaPointerGetAttributes(&attr, ptr));\n  device = attr.device;\n  return device;\n}\n\ninline size_t AvailableMemory(int device_idx) {\n  size_t device_free = 0;\n  size_t device_total = 0;\n  safe_cuda(cudaSetDevice(device_idx));\n  dh::safe_cuda(cudaMemGetInfo(&device_free, &device_total));\n  return device_free;\n}\n\ninline int32_t CurrentDevice() {\n  int32_t device = 0;\n  safe_cuda(cudaGetDevice(&device));\n  return device;\n}\n\n// Helper function to get a device from a potentially CPU context.\ninline auto GetDevice(xgboost::Context const *ctx) {\n  auto d = (ctx->IsCUDA()) ? ctx->Device() : xgboost::DeviceOrd::CUDA(::xgboost::curt::CurrentDevice());\n  CHECK(!d.IsCPU());\n  return d;\n}\n\n/**\n * \\fn  inline int MaxSharedMemory(int device_idx)\n *\n * \\brief Maximum shared memory per block on this device.\n *\n * \\param device_idx  Zero-based index of the device.\n */\n\ninline size_t MaxSharedMemory(int device_idx) {\n  int max_shared_memory = 0;\n  dh::safe_cuda(cudaDeviceGetAttribute\n                (&max_shared_memory, cudaDevAttrMaxSharedMemoryPerBlock,\n                 device_idx));\n  return static_cast<std::size_t>(max_shared_memory);\n}\n\n/**\n * \\fn  inline int MaxSharedMemoryOptin(int device_idx)\n *\n * \\brief Maximum dynamic shared memory per thread block on this device\n     that can be opted into when using cudaFuncSetAttribute().\n *\n * \\param device_idx  Zero-based index of the device.\n */\n\ninline size_t MaxSharedMemoryOptin(int device_idx) {\n  int max_shared_memory = 0;\n  dh::safe_cuda(cudaDeviceGetAttribute\n                (&max_shared_memory, cudaDevAttrMaxSharedMemoryPerBlockOptin,\n                 device_idx));\n  return static_cast<std::size_t>(max_shared_memory);\n}\n\nXGBOOST_DEV_INLINE void AtomicOrByte(unsigned int *__restrict__ buffer,\n                                     size_t ibyte, unsigned char b) {\n  atomicOr(&buffer[ibyte / sizeof(unsigned int)],\n           static_cast<unsigned int>(b)\n               << (ibyte % (sizeof(unsigned int)) * 8));\n}\n\ntemplate <typename T>\n__device__ xgboost::common::Range GridStrideRange(T begin, T end) {\n  begin += blockDim.x * blockIdx.x + threadIdx.x;\n  xgboost::common::Range r(begin, end);\n  r.Step(gridDim.x * blockDim.x);\n  return r;\n}\n\ntemplate <typename T>\n__device__ xgboost::common::Range BlockStrideRange(T begin, T end) {\n  begin += threadIdx.x;\n  xgboost::common::Range r(begin, end);\n  r.Step(blockDim.x);\n  return r;\n}\n\n// Threadblock iterates over range, filling with value. Requires all threads in\n// block to be active.\ntemplate <typename IterT, typename ValueT, typename SizeT>\n__device__ void BlockFill(IterT begin, SizeT n, ValueT value) {\n  for (auto i : BlockStrideRange(static_cast<SizeT>(0), n)) {\n    begin[i] = value;\n  }\n}\n\n/*\n * Kernel launcher\n */\n\ntemplate <typename L>\n__global__ void LaunchNKernel(size_t begin, size_t end, L lambda) {\n  for (auto i : GridStrideRange(begin, end)) {\n    lambda(i);\n  }\n}\n\n/* \\brief A wrapper around kernel launching syntax, used to guard against empty input.\n *\n * - nvcc fails to deduce template argument when kernel is a template accepting __device__\n *   function as argument.  Hence functions like `LaunchN` cannot use this wrapper.\n *\n * - With c++ initialization list `{}` syntax, you are forced to comply with the CUDA type\n *   specification.\n */\nclass LaunchKernel {\n  size_t shmem_size_;\n  cudaStream_t stream_;\n\n  dim3 grids_;\n  dim3 blocks_;\n\n public:\n  LaunchKernel(uint32_t _grids, uint32_t _blk, size_t _shmem=0, cudaStream_t _s=nullptr) :\n      grids_{_grids, 1, 1}, blocks_{_blk, 1, 1}, shmem_size_{_shmem}, stream_{_s} {}\n  LaunchKernel(dim3 _grids, dim3 _blk, size_t _shmem=0, cudaStream_t _s=nullptr) :\n      grids_{_grids}, blocks_{_blk}, shmem_size_{_shmem}, stream_{_s} {}\n\n  template <typename K, typename... Args>\n  void operator()(K kernel, Args... args) {\n    if (XGBOOST_EXPECT(grids_.x * grids_.y * grids_.z == 0, false)) {\n      LOG(DEBUG) << \"Skipping empty CUDA kernel.\";\n      return;\n    }\n    kernel<<<grids_, blocks_, shmem_size_, stream_>>>(args...);  // NOLINT\n  }\n};\n\ntemplate <int ITEMS_PER_THREAD = 8, int BLOCK_THREADS = 256, typename L>\ninline void LaunchN(size_t n, cudaStream_t stream, L lambda) {\n  if (n == 0) {\n    return;\n  }\n  const int GRID_SIZE =\n      static_cast<int>(xgboost::common::DivRoundUp(n, ITEMS_PER_THREAD * BLOCK_THREADS));\n  LaunchNKernel<<<GRID_SIZE, BLOCK_THREADS, 0, stream>>>(  // NOLINT\n      static_cast<size_t>(0), n, lambda);\n}\n\n// Default stream version\ntemplate <int ITEMS_PER_THREAD = 8, int BLOCK_THREADS = 256, typename L>\ninline void LaunchN(size_t n, L lambda) {\n  LaunchN<ITEMS_PER_THREAD, BLOCK_THREADS>(n, nullptr, lambda);\n}\n\ntemplate <typename Container>\nvoid Iota(Container array, cudaStream_t stream) {\n  LaunchN(array.size(), stream, [=] __device__(size_t i) { array[i] = i; });\n}\n\n// Faster to instantiate than caching_device_vector and invokes no synchronisation\n// Use this where vector functionality (e.g. resize) is not required\ntemplate <typename T>\nclass TemporaryArray {\n public:\n  using AllocT = XGBCachingDeviceAllocator<T>;\n  using value_type = T;  // NOLINT\n  explicit TemporaryArray(size_t n) : size_(n) { ptr_ = AllocT().allocate(n); }\n  TemporaryArray(size_t n, T val) : size_(n) {\n    ptr_ = AllocT().allocate(n);\n    this->fill(val);\n  }\n  ~TemporaryArray() { AllocT().deallocate(ptr_, this->size()); }\n  void fill(T val)  // NOLINT\n  {\n    int device = 0;\n    dh::safe_cuda(cudaGetDevice(&device));\n    auto d_data = ptr_.get();\n    LaunchN(this->size(), [=] __device__(size_t idx) { d_data[idx] = val; });\n  }\n  thrust::device_ptr<T> data() { return ptr_; }  // NOLINT\n  size_t size() { return size_; }  // NOLINT\n\n private:\n  thrust::device_ptr<T> ptr_;\n  size_t size_;\n};\n\n/**\n * \\brief A double buffer, useful for algorithms like sort.\n */\ntemplate <typename T>\nclass DoubleBuffer {\n public:\n  cub::DoubleBuffer<T> buff;\n  xgboost::common::Span<T> a, b;\n  DoubleBuffer() = default;\n  template <typename VectorT>\n  DoubleBuffer(VectorT *v1, VectorT *v2) {\n    a = xgboost::common::Span<T>(v1->data().get(), v1->size());\n    b = xgboost::common::Span<T>(v2->data().get(), v2->size());\n    buff = cub::DoubleBuffer<T>(a.data(), b.data());\n  }\n\n  size_t Size() const {\n    CHECK_EQ(a.size(), b.size());\n    return a.size();\n  }\n  cub::DoubleBuffer<T> &CubBuffer() { return buff; }\n\n  T *Current() { return buff.Current(); }\n  xgboost::common::Span<T> CurrentSpan() {\n    return xgboost::common::Span<T>{buff.Current(), Size()};\n  }\n\n  T *Other() { return buff.Alternate(); }\n};\n\ntemplate <typename T>\nxgboost::common::Span<T> LazyResize(xgboost::Context const *ctx,\n                                    xgboost::HostDeviceVector<T> *buffer, std::size_t n) {\n  buffer->SetDevice(ctx->Device());\n  if (buffer->Size() < n) {\n    buffer->Resize(n);\n  }\n  return buffer->DeviceSpan().subspan(0, n);\n}\n\n/**\n * \\brief Copies device span to std::vector.\n *\n * \\tparam  T Generic type parameter.\n * \\param [in,out]  dst Copy destination.\n * \\param           src Copy source. Must be device memory.\n */\ntemplate <typename T>\nvoid CopyDeviceSpanToVector(std::vector<T> *dst, xgboost::common::Span<T> src) {\n  CHECK_EQ(dst->size(), src.size());\n  dh::safe_cuda(cudaMemcpyAsync(dst->data(), src.data(), dst->size() * sizeof(T),\n                                cudaMemcpyDeviceToHost));\n}\n\n/**\n * \\brief Copies const device span to std::vector.\n *\n * \\tparam  T Generic type parameter.\n * \\param [in,out]  dst Copy destination.\n * \\param           src Copy source. Must be device memory.\n */\ntemplate <typename T>\nvoid CopyDeviceSpanToVector(std::vector<T> *dst, xgboost::common::Span<const T> src) {\n  CHECK_EQ(dst->size(), src.size());\n  dh::safe_cuda(cudaMemcpyAsync(dst->data(), src.data(), dst->size() * sizeof(T),\n                                cudaMemcpyDeviceToHost));\n}\n\n// Keep track of pinned memory allocation\nclass PinnedMemory {\n  std::variant<detail::GrowOnlyPinnedMemoryImpl, detail::GrowOnlyVirtualMemVec> impl_;\n\n public:\n  PinnedMemory();\n\n  template <typename T>\n  xgboost::common::Span<T> GetSpan(size_t size) {\n    return std::visit([&](auto &&alloc) { return alloc.template GetSpan<T>(size); }, this->impl_);\n  }\n  template <typename T>\n  xgboost::common::Span<T> GetSpan(size_t size, T const &init) {\n    auto result = this->GetSpan<T>(size);\n    std::fill_n(result.data(), result.size(), init);\n    return result;\n  }\n  // Used for testing.\n  [[nodiscard]] bool IsVm() {\n    return std::get_if<detail::GrowOnlyVirtualMemVec>(&this->impl_) != nullptr;\n  }\n};\n\n/*\n *  Utility functions\n */\n\n/**\n* @brief Helper function to perform device-wide sum-reduction, returns to the\n* host\n* @param in the input array to be reduced\n* @param nVals number of elements in the input array\n*/\ntemplate <typename T>\ntypename std::iterator_traits<T>::value_type SumReduction(T in, int nVals) {\n  using ValueT = typename std::iterator_traits<T>::value_type;\n  size_t tmpSize {0};\n  ValueT *dummy_out = nullptr;\n  dh::safe_cuda(cub::DeviceReduce::Sum(nullptr, tmpSize, in, dummy_out, nVals));\n\n  TemporaryArray<char> temp(tmpSize + sizeof(ValueT));\n  auto ptr = reinterpret_cast<ValueT *>(temp.data().get()) + 1;\n  dh::safe_cuda(cub::DeviceReduce::Sum(\n      reinterpret_cast<void *>(ptr), tmpSize, in,\n      reinterpret_cast<ValueT *>(temp.data().get()),\n      nVals));\n  ValueT sum;\n  dh::safe_cuda(cudaMemcpy(&sum, temp.data().get(), sizeof(ValueT),\n                           cudaMemcpyDeviceToHost));\n  return sum;\n}\n\nconstexpr std::pair<int, int> CUDAVersion() {\n#if defined(__CUDACC_VER_MAJOR__)\n  return std::make_pair(__CUDACC_VER_MAJOR__, __CUDACC_VER_MINOR__);\n#else\n  // clang/clang-tidy\n  return std::make_pair((CUDA_VERSION) / 1000, (CUDA_VERSION) % 100 / 10);\n#endif  // defined(__CUDACC_VER_MAJOR__)\n}\n\nconstexpr std::pair<int32_t, int32_t> ThrustVersion() {\n  return std::make_pair(THRUST_MAJOR_VERSION, THRUST_MINOR_VERSION);\n}\n// Whether do we have thrust 1.x with x >= minor\ntemplate <int32_t minor>\nconstexpr bool HasThrustMinorVer() {\n  return (ThrustVersion().first == 1 && ThrustVersion().second >= minor) ||\n         ThrustVersion().first > 1;\n}\n\nnamespace detail {\ntemplate <typename T>\nusing TypedDiscardCTK114 = thrust::discard_iterator<T>;\n\ntemplate <typename T>\nclass TypedDiscard : public thrust::discard_iterator<T> {\n public:\n  using value_type = T;  // NOLINT\n};\n} // namespace detail\n\ntemplate <typename T>\nusing TypedDiscard = std::conditional_t<HasThrustMinorVer<12>(), detail::TypedDiscardCTK114<T>,\n                                        detail::TypedDiscard<T>>;\n\ntemplate <typename VectorT, typename T = typename VectorT::value_type,\n          typename IndexT = typename xgboost::common::Span<T>::index_type>\nxgboost::common::Span<T> ToSpan(VectorT &vec, IndexT offset = 0,\n                                IndexT size = std::numeric_limits<size_t>::max()) {\n  size = size == std::numeric_limits<size_t>::max() ? vec.size() : size;\n  CHECK_LE(offset + size, vec.size());\n  return {thrust::raw_pointer_cast(vec.data()) + offset, size};\n}\n\ntemplate <typename T>\nxgboost::common::Span<T> ToSpan(device_vector<T> &vec, size_t offset, size_t size) {\n  return ToSpan(vec, offset, size);\n}\n\ntemplate <typename T>\nxgboost::common::Span<std::add_const_t<T>> ToSpan(device_vector<T> const &vec) {\n  return {thrust::raw_pointer_cast(vec.data()), vec.size()};\n}\n\ntemplate <typename T>\nxgboost::common::Span<T> ToSpan(DeviceUVector<T> &vec) {\n  return {vec.data(), vec.size()};\n}\n\ntemplate <typename T>\nxgboost::common::Span<std::add_const_t<T>> ToSpan(DeviceUVector<T> const &vec) {\n  return {vec.data(), vec.size()};\n}\n\n// thrust begin, similiar to std::begin\ntemplate <typename T>\nthrust::device_ptr<T> tbegin(xgboost::HostDeviceVector<T>& vector) {  // NOLINT\n  return thrust::device_ptr<T>(vector.DevicePointer());\n}\n\ntemplate <typename T>\nthrust::device_ptr<T> tend(xgboost::HostDeviceVector<T>& vector) {  // // NOLINT\n  return tbegin(vector) + vector.Size();\n}\n\ntemplate <typename T>\nthrust::device_ptr<T const> tcbegin(xgboost::HostDeviceVector<T> const& vector) {  // NOLINT\n  return thrust::device_ptr<T const>(vector.ConstDevicePointer());\n}\n\ntemplate <typename T>\nthrust::device_ptr<T const> tcend(xgboost::HostDeviceVector<T> const& vector) {  // NOLINT\n  return tcbegin(vector) + vector.Size();\n}\n\ntemplate <typename T>\nXGBOOST_DEVICE thrust::device_ptr<T> tbegin(xgboost::common::Span<T>& span) {  // NOLINT\n  return thrust::device_ptr<T>(span.data());\n}\n\ntemplate <typename T>\nXGBOOST_DEVICE thrust::device_ptr<T> tbegin(xgboost::common::Span<T> const& span) {  // NOLINT\n  return thrust::device_ptr<T>(span.data());\n}\n\ntemplate <typename T>\nXGBOOST_DEVICE thrust::device_ptr<T> tend(xgboost::common::Span<T>& span) {  // NOLINT\n  return tbegin(span) + span.size();\n}\n\ntemplate <typename T>\nXGBOOST_DEVICE thrust::device_ptr<T> tend(xgboost::common::Span<T> const& span) {  // NOLINT\n  return tbegin(span) + span.size();\n}\n\ntemplate <typename T>\nXGBOOST_DEVICE auto trbegin(xgboost::common::Span<T> &span) {  // NOLINT\n  return thrust::make_reverse_iterator(span.data() + span.size());\n}\n\ntemplate <typename T>\nXGBOOST_DEVICE auto trend(xgboost::common::Span<T> &span) {  // NOLINT\n  return trbegin(span) + span.size();\n}\n\ntemplate <typename T>\nXGBOOST_DEVICE thrust::device_ptr<T const> tcbegin(xgboost::common::Span<T> const& span) {  // NOLINT\n  return thrust::device_ptr<T const>(span.data());\n}\n\ntemplate <typename T>\nXGBOOST_DEVICE thrust::device_ptr<T const> tcend(xgboost::common::Span<T> const& span) {  // NOLINT\n  return tcbegin(span) + span.size();\n}\n\ntemplate <typename T>\nXGBOOST_DEVICE auto tcrbegin(xgboost::common::Span<T> const &span) {  // NOLINT\n  return thrust::make_reverse_iterator(span.data() + span.size());\n}\n\ntemplate <typename T>\nXGBOOST_DEVICE auto tcrend(xgboost::common::Span<T> const &span) {  // NOLINT\n  return tcrbegin(span) + span.size();\n}\n\n// Atomic add function for gradients\ntemplate <typename OutputGradientT, typename InputGradientT>\nXGBOOST_DEV_INLINE void AtomicAddGpair(OutputGradientT* dest,\n                                       const InputGradientT& gpair) {\n  auto dst_ptr = reinterpret_cast<typename OutputGradientT::ValueT*>(dest);\n\n  atomicAdd(dst_ptr,\n            static_cast<typename OutputGradientT::ValueT>(gpair.GetGrad()));\n  atomicAdd(dst_ptr + 1,\n            static_cast<typename OutputGradientT::ValueT>(gpair.GetHess()));\n}\n\n\n// Thrust version of this function causes error on Windows\ntemplate <typename ReturnT, typename IterT, typename FuncT>\nXGBOOST_DEVICE thrust::transform_iterator<FuncT, IterT, ReturnT> MakeTransformIterator(\n  IterT iter, FuncT func) {\n  return thrust::transform_iterator<FuncT, IterT, ReturnT>(iter, func);\n}\n\ntemplate <typename Fn>\nXGBOOST_DEVICE auto MakeIndexTransformIter(Fn &&fn) {\n  return thrust::make_transform_iterator(thrust::make_counting_iterator(0ul), std::forward<Fn>(fn));\n}\n\ntemplate <typename It>\nsize_t XGBOOST_DEVICE SegmentId(It first, It last, size_t idx) {\n  size_t segment_id = thrust::upper_bound(thrust::seq, first, last, idx) - 1 - first;\n  return segment_id;\n}\n\ntemplate <typename T>\nsize_t XGBOOST_DEVICE SegmentId(xgboost::common::Span<T> segments_ptr, size_t idx) {\n  return SegmentId(segments_ptr.cbegin(), segments_ptr.cend(), idx);\n}\n\nnamespace detail {\ntemplate <typename Key, typename KeyOutIt>\nstruct SegmentedUniqueReduceOp {\n  KeyOutIt key_out;\n  __device__ Key const& operator()(Key const& key) const {\n    auto constexpr kOne = static_cast<std::remove_reference_t<decltype(*(key_out + key.first))>>(1);\n    atomicAdd(&(*(key_out + key.first)), kOne);\n    return key;\n  }\n};\n}  // namespace detail\n\n/* \\brief Segmented unique function.  Keys are pointers to segments with key_segments_last -\n *        key_segments_first = n_segments + 1.\n *\n * \\pre   Input segment and output segment must not overlap.\n *\n * \\param key_segments_first Beginning iterator of segments.\n * \\param key_segments_last  End iterator of segments.\n * \\param val_first          Beginning iterator of values.\n * \\param val_last           End iterator of values.\n * \\param key_segments_out   Output iterator of segments.\n * \\param val_out            Output iterator of values.\n *\n * \\return Number of unique values in total.\n */\ntemplate <typename DerivedPolicy, typename KeyInIt, typename KeyOutIt, typename ValInIt,\n          typename ValOutIt, typename CompValue, typename CompKey = std::equal_to<size_t>>\nsize_t SegmentedUnique(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,\n                       KeyInIt key_segments_first, KeyInIt key_segments_last, ValInIt val_first,\n                       ValInIt val_last, KeyOutIt key_segments_out, ValOutIt val_out,\n                       CompValue comp, CompKey comp_key = std::equal_to<size_t>{}) {\n  using Key = cuda::std::pair<size_t, typename cuda::std::iterator_traits<ValInIt>::value_type>;\n  auto unique_key_it = dh::MakeTransformIterator<Key>(\n      thrust::make_counting_iterator(static_cast<size_t>(0)), [=] __device__(std::size_t i) {\n        size_t seg = dh::SegmentId(key_segments_first, key_segments_last, i);\n        return cuda::std::make_pair(seg, *(val_first + i));\n      });\n  size_t segments_len = key_segments_last - key_segments_first;\n  thrust::fill(exec, key_segments_out, key_segments_out + segments_len, 0);\n  size_t n_inputs = std::distance(val_first, val_last);\n  // Reduce the number of uniques elements per segment, avoid creating an intermediate\n  // array for `reduce_by_key`.  It's limited by the types that atomicAdd supports.  For\n  // example, size_t is not supported as of CUDA 10.2.\n  auto reduce_it = thrust::make_transform_output_iterator(\n      thrust::make_discard_iterator(),\n      detail::SegmentedUniqueReduceOp<Key, KeyOutIt>{key_segments_out});\n  auto uniques_ret = thrust::unique_by_key_copy(\n      exec, unique_key_it, unique_key_it + n_inputs,\n      val_first, reduce_it, val_out,\n      [=] __device__(Key const &l, Key const &r) {\n        if (comp_key(l.first, r.first)) {\n          // In the same segment.\n          return comp(l.second, r.second);\n        }\n        return false;\n      });\n  auto n_uniques = uniques_ret.second - val_out;\n  CHECK_LE(n_uniques, n_inputs);\n  thrust::exclusive_scan(exec, key_segments_out,\n                         key_segments_out + segments_len, key_segments_out, 0);\n  return n_uniques;\n}\n\n/**\n * \\brief Unique by key for many groups of data.  Has same constraint as `SegmentedUnique`.\n *\n * \\tparam exec               thrust execution policy\n * \\tparam key_segments_first start iter to segment pointer\n * \\tparam key_segments_last  end iter to segment pointer\n * \\tparam key_first          start iter to key for comparison\n * \\tparam key_last           end iter to key for comparison\n * \\tparam val_first          start iter to values\n * \\tparam key_segments_out   output iterator for new segment pointer\n * \\tparam val_out            output iterator for values\n * \\tparam comp               binary comparison operator\n */\ntemplate <typename DerivedPolicy, typename SegInIt, typename SegOutIt, typename KeyInIt,\n          typename ValInIt, typename ValOutIt, typename Comp>\nsize_t SegmentedUniqueByKey(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,\n                            SegInIt key_segments_first, SegInIt key_segments_last,\n                            KeyInIt key_first, KeyInIt key_last, ValInIt val_first,\n                            SegOutIt key_segments_out, ValOutIt val_out, Comp comp) {\n  using Key =\n      cuda::std::pair<std::size_t, typename cuda::std::iterator_traits<KeyInIt>::value_type>;\n\n  auto unique_key_it = dh::MakeTransformIterator<Key>(\n      thrust::make_counting_iterator(static_cast<size_t>(0)), [=] __device__(size_t i) {\n        size_t seg = dh::SegmentId(key_segments_first, key_segments_last, i);\n        return cuda::std::make_pair(seg, *(key_first + i));\n      });\n  size_t segments_len = key_segments_last - key_segments_first;\n  thrust::fill(exec, key_segments_out, key_segments_out + segments_len, 0);\n  size_t n_inputs = std::distance(key_first, key_last);\n  // Reduce the number of uniques elements per segment, avoid creating an\n  // intermediate array for `reduce_by_key`.  It's limited by the types that\n  // atomicAdd supports.  For example, size_t is not supported as of CUDA 10.2.\n  auto reduce_it = thrust::make_transform_output_iterator(\n      thrust::make_discard_iterator(),\n      detail::SegmentedUniqueReduceOp<Key, SegOutIt>{key_segments_out});\n  auto uniques_ret =\n      thrust::unique_by_key_copy(exec, unique_key_it, unique_key_it + n_inputs, val_first,\n                                 reduce_it, val_out, [=] __device__(Key const &l, Key const &r) {\n                                   if (l.first == r.first) {\n                                     // In the same segment.\n                                     return comp(l.second, r.second);\n                                   }\n                                   return false;\n                                 });\n  auto n_uniques = uniques_ret.second - val_out;\n  CHECK_LE(n_uniques, n_inputs);\n  thrust::exclusive_scan(exec, key_segments_out, key_segments_out + segments_len, key_segments_out,\n                         0);\n  return n_uniques;\n}\n\ntemplate <typename Policy, typename InputIt, typename Init, typename Func>\nauto Reduce(Policy policy, InputIt first, InputIt second, Init init, Func reduce_op) {\n  size_t constexpr kLimit = std::numeric_limits<int32_t>::max() / 2;\n  size_t size = std::distance(first, second);\n  using Ty = std::remove_cv_t<Init>;\n  Ty aggregate = init;\n  for (size_t offset = 0; offset < size; offset += kLimit) {\n    auto begin_it = first + offset;\n    auto end_it = first + std::min(offset + kLimit, size);\n    size_t batch_size = std::distance(begin_it, end_it);\n    CHECK_LE(batch_size, size);\n    auto ret = thrust::reduce(policy, begin_it, end_it, init, reduce_op);\n    aggregate = reduce_op(aggregate, ret);\n  }\n  return aggregate;\n}\n\ntemplate <class Src, class Dst>\nvoid CopyTo(Src const &src, Dst *dst,\n            ::xgboost::curt::StreamRef stream = ::xgboost::curt::DefaultStream()) {\n  if (src.empty()) {\n    dst->clear();\n    return;\n  }\n  dst->resize(src.size());\n  using SVT = std::remove_cv_t<typename Src::value_type>;\n  using DVT = std::remove_cv_t<typename Dst::value_type>;\n  static_assert(std::is_same_v<SVT, DVT>, \"Host and device containers must have same value type.\");\n  dh::safe_cuda(cudaMemcpyAsync(thrust::raw_pointer_cast(dst->data()), src.data(),\n                                src.size() * sizeof(SVT), cudaMemcpyDefault, stream));\n}\n\n/**\n * @brief Wrapper for the @ref cudaMemcpyBatchAsync .\n *\n * @param dsts Host pointer to a list of device pointers.\n * @param srcs Host pointer to a list of device pointers.\n * @param sizes Host pointer to a list of sizes.\n * @param count How many batches.\n * @param fail_idx Which batch has failed, if any. When it's assigned to SIZE_MAX, then\n *   it's a general error.\n * @param stream CUDA stream. The wrapper enforces stream order access.\n */\ntemplate <cudaMemcpyKind kind, typename T, typename U>\n[[nodiscard]] cudaError_t MemcpyBatchAsync(T **dsts, U **srcs, std::size_t const *sizes,\n                                           std::size_t count, std::size_t *fail_idx,\n                                           cudaStream_t stream) {\n#if CUDART_VERSION >= 12080\n  static_assert(kind == cudaMemcpyDeviceToHost || kind == cudaMemcpyHostToDevice,\n                \"Not implemented.\");\n  cudaMemcpyAttributes attr;\n  attr.srcAccessOrder = cudaMemcpySrcAccessOrderStream;\n  attr.flags = cudaMemcpyFlagPreferOverlapWithCompute;\n\n  auto assign_host = [](cudaMemLocation *hint) {\n    hint->type = cudaMemLocationTypeHostNuma;\n    hint->id = xgboost::curt::GetNumaId();\n  };\n  auto assign_device = [](cudaMemLocation *hint) {\n    hint->type = cudaMemLocationTypeDevice;\n    hint->id = xgboost::curt::CurrentDevice();\n  };\n  if constexpr (kind == cudaMemcpyDeviceToHost) {\n    assign_device(&attr.srcLocHint);\n    assign_host(&attr.dstLocHint);\n  } else {\n    assign_host(&attr.srcLocHint);\n    assign_device(&attr.dstLocHint);\n  }\n  return cudaMemcpyBatchAsync(dsts, srcs, const_cast<std::size_t *>(sizes), count, attr, fail_idx,\n                              stream);\n#else\n  LOG(FATAL) << \"CUDA >= 12.8 is required.\";\n  return cudaErrorInvalidValue;\n#endif  // CUDART_VERSION >= 12080\n}\n\ninline auto CachingThrustPolicy() {\n  XGBCachingDeviceAllocator<char> alloc;\n  return thrust::cuda::par_nosync(alloc).on(::xgboost::curt::DefaultStream());\n}\n\n// Force nvcc to load data as constant\ntemplate <typename T>\nclass LDGIterator {\n  using DeviceWordT = typename cub::UnitWord<T>::DeviceWord;\n  static constexpr std::size_t kNumWords = sizeof(T) / sizeof(DeviceWordT);\n\n  const T *ptr_;\n\n public:\n  XGBOOST_DEVICE explicit LDGIterator(const T *ptr) : ptr_(ptr) {}\n  __device__ T operator[](std::size_t idx) const {\n    DeviceWordT tmp[kNumWords];\n    static_assert(sizeof(tmp) == sizeof(T), \"Expect sizes to be equal.\");\n#pragma unroll\n    for (int i = 0; i < kNumWords; i++) {\n      tmp[i] = __ldg(reinterpret_cast<const DeviceWordT *>(ptr_ + idx) + i);\n    }\n    return *reinterpret_cast<const T *>(tmp);\n  }\n};\n\nconstexpr std::int32_t WarpThreads() { return 32; }\n}  // namespace dh\n"
  },
  {
    "path": "src/common/device_vector.cu",
    "content": "/**\n * Copyright 2017-2024, XGBoost contributors\n */\n#include <numeric>  // for accumulate\n\n#include \"../collective/communicator-inl.h\"  // for GetRank\n#include \"common.h\"                          // for HumanMemUnit\n#include \"cuda_dr_utils.h\"\n#include \"device_helpers.cuh\"  // for CurrentDevice\n#include \"device_vector.cuh\"\n#include \"transform_iterator.h\"  // for MakeIndexTransformIter\n\nnamespace dh {\nnamespace detail {\nvoid ThrowOOMError(std::string const &err, std::size_t bytes) {\n  auto device = CurrentDevice();\n  auto rank = xgboost::collective::GetRank();\n  using xgboost::common::HumanMemUnit;\n  std::stringstream ss;\n  ss << \"Memory allocation error on worker \" << rank << \": \" << err << \"\\n\"\n     << \"- Free memory: \" << HumanMemUnit(dh::AvailableMemory(device)) << \"\\n\"\n     << \"- Requested memory: \" << HumanMemUnit(bytes) << std::endl;\n  LOG(FATAL) << ss.str();\n}\n\n[[nodiscard]] std::size_t GrowOnlyVirtualMemVec::PhyCapacity() const {\n  auto it = xgboost::common::MakeIndexTransformIter(\n      [&](std::size_t i) { return this->handles_[i]->size; });\n  return std::accumulate(it, it + this->handles_.size(), static_cast<std::size_t>(0));\n}\n\nvoid GrowOnlyVirtualMemVec::Reserve(std::size_t new_size) {\n  auto va_capacity = this->Capacity();\n  if (new_size < va_capacity) {\n    return;\n  }\n\n  // Try to reserve new virtual address.\n  auto const aligned_size = RoundUp(new_size, this->granularity_);\n  auto const new_reserve_size = aligned_size - va_capacity;\n  CUresult status = CUDA_SUCCESS;\n  auto hint = this->DevPtr() + va_capacity;\n\n  bool failed{false};\n  auto range = std::make_unique<VaRange>(new_reserve_size, hint, &status, &failed);\n  if (failed) {\n    // Failed to reserve the requested address.\n    // Slow path, try to reserve a new address with full size.\n    range = std::make_unique<VaRange>(aligned_size, 0ULL, &status, &failed);\n    safe_cu(status);\n    CHECK(!failed);\n\n    // New allocation is successful. Map the pyhsical address to the virtual address.\n    // First unmap the existing ptr.\n    if (this->DevPtr() != 0) {\n      // Unmap the existing ptr.\n      safe_cu(cu_.cuMemUnmap(this->DevPtr(), this->PhyCapacity()));\n\n      // Then remap all the existing physical addresses to the new ptr.\n      CUdeviceptr ptr = range->DevPtr();\n      for (auto const &hdl : this->handles_) {\n        this->MapBlock(ptr, hdl);\n        ptr += hdl->size;\n      }\n\n      // Release the existing ptr.\n      va_ranges_.clear();\n    }\n  }\n\n  va_ranges_.emplace_back(std::move(range));\n}\n\nGrowOnlyVirtualMemVec::GrowOnlyVirtualMemVec(CUmemLocationType type)\n    : prop_{xgboost::cudr::MakeAllocProp(type)},\n      granularity_{xgboost::cudr::GetAllocGranularity(&this->prop_)} {\n  CHECK(type == CU_MEM_LOCATION_TYPE_DEVICE || type == CU_MEM_LOCATION_TYPE_HOST_NUMA);\n  // Assign the access descriptor\n  CUmemAccessDesc dacc;\n  dacc.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;\n  xgboost::cudr::MakeCuMemLocation(CU_MEM_LOCATION_TYPE_DEVICE, &dacc.location);\n  this->access_desc_.push_back(dacc);\n\n  if (type == CU_MEM_LOCATION_TYPE_HOST_NUMA) {\n    CUmemAccessDesc hacc;\n    hacc.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;\n\n    xgboost::cudr::MakeCuMemLocation(type, &hacc.location);\n    this->access_desc_.push_back(hacc);\n  }\n}\n\n[[nodiscard]] std::size_t GrowOnlyVirtualMemVec::Capacity() const {\n  auto it = xgboost::common::MakeIndexTransformIter(\n      [&](std::size_t i) { return this->va_ranges_[i]->Size(); });\n  return std::accumulate(it, it + this->va_ranges_.size(), static_cast<std::size_t>(0));\n}\n}  // namespace detail\n}  // namespace dh\n"
  },
  {
    "path": "src/common/device_vector.cuh",
    "content": "/**\n * Copyright 2017-2026, XGBoost Contributors\n */\n#pragma once\n#include <thrust/device_malloc_allocator.h>  // for device_malloc_allocator\n#include <thrust/device_ptr.h>               // for device_ptr\n#include <thrust/device_vector.h>            // for device_vector\n\n#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1\n\n#include <rmm/cuda_stream_view.hpp>             // for cuda_stream_view\n#include <rmm/mr/thrust_allocator_adaptor.hpp>  // for thrust_allocator\n\n#else\n\n#include \"xgboost/windefs.h\"  // for xgboost_IS_WIN\n\n#endif  // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1\n\n#include <cuda.h>  // for CUmemGenericAllocationHandle\n\n#include <atomic>                  // for atomic, memory_order\n#include <cstddef>                 // for size_t\n#include <cstdint>                 // for int64_t\n#include <cub/util_allocator.cuh>  // for CachingDeviceAllocator\n#include <cub/util_device.cuh>     // for CurrentDevice\n#include <functional>              // for function\n#include <memory>                  // for unique_ptr\n\n#include \"common.h\"                 // for safe_cuda, HumanMemUnit\n#include \"cuda_dr_utils.h\"          // for CuDriverApi\n#include \"cuda_stream.h\"            // for DefaultStream\n#include \"xgboost/global_config.h\"  // for GlobalConfigThreadLocalStore\n#include \"xgboost/logging.h\"\n#include \"xgboost/span.h\"  // for Span\n\nnamespace dh {\nnamespace detail {\n// std::atomic::fetch_max in c++26\ntemplate <typename T>\nT AtomicFetchMax(std::atomic<T> &atom, T val,  // NOLINT\n                 std::memory_order order = std::memory_order_seq_cst) {\n  auto expected = atom.load();\n  auto desired = expected > val ? expected : val;\n\n  while (desired == val && !atom.compare_exchange_strong(expected, desired, order, order)) {\n    desired = expected > val ? expected : val;\n  }\n\n  return expected;\n}\n\n/** \\brief Keeps track of global device memory allocations. Thread safe.*/\nclass MemoryLogger {\n  // Information for a single device\n  struct DeviceStats {\n    // Use signed int to allow temporary under-flow.\n    std::atomic<std::int64_t> currently_allocated_bytes{0};\n    std::atomic<std::int64_t> peak_allocated_bytes{0};\n    void RegisterAllocation(std::int64_t n) {\n      currently_allocated_bytes += n;\n      AtomicFetchMax(peak_allocated_bytes, currently_allocated_bytes.load());\n    }\n    void RegisterDeallocation(std::int64_t n) { currently_allocated_bytes -= n; }\n  };\n  DeviceStats stats_;\n\n public:\n  /**\n   * @brief Register the allocation for logging.\n   */\n  void RegisterAllocation(std::size_t n) {\n    if (!xgboost::ConsoleLogger::ShouldLog(xgboost::ConsoleLogger::LV::kDebug)) {\n      return;\n    }\n    stats_.RegisterAllocation(static_cast<std::int64_t>(n));\n  }\n  /**\n   * @brief Register the deallocation for logging.\n   */\n  void RegisterDeallocation(std::size_t n) {\n    if (!xgboost::ConsoleLogger::ShouldLog(xgboost::ConsoleLogger::LV::kDebug)) {\n      return;\n    }\n    stats_.RegisterDeallocation(static_cast<std::int64_t>(n));\n  }\n  std::int64_t PeakMemory() const { return stats_.peak_allocated_bytes; }\n  std::int64_t CurrentlyAllocatedBytes() const { return stats_.currently_allocated_bytes; }\n  void Clear() {\n    stats_.currently_allocated_bytes = 0;\n    stats_.peak_allocated_bytes = 0;\n  }\n\n  void Log() const {\n    if (!xgboost::ConsoleLogger::ShouldLog(xgboost::ConsoleLogger::LV::kDebug)) {\n      return;\n    }\n    auto current_device = cub::CurrentDevice();\n    LOG(CONSOLE) << \"======== Device \" << current_device << \" Memory Allocations: \"\n                 << \" ========\";\n    LOG(CONSOLE) << \"Peak memory usage: \"\n                 << xgboost::common::HumanMemUnit(stats_.peak_allocated_bytes);\n  }\n};\n\nvoid ThrowOOMError(std::string const &err, std::size_t bytes);\n\nstruct GrowOnlyPinnedMemoryImpl {\n  void *temp_storage{nullptr};\n  size_t temp_storage_bytes{0};\n\n  ~GrowOnlyPinnedMemoryImpl() { Free(); }\n\n  template <typename T>\n  xgboost::common::Span<T> GetSpan(size_t size) {\n    size_t num_bytes = size * sizeof(T);\n    if (num_bytes > temp_storage_bytes) {\n      Free();\n      safe_cuda(cudaMallocHost(&temp_storage, num_bytes));\n      temp_storage_bytes = num_bytes;\n    }\n    return xgboost::common::Span<T>(static_cast<T *>(temp_storage), size);\n  }\n\n  void Free() {\n    if (temp_storage != nullptr) {\n      safe_cuda(cudaFreeHost(temp_storage));\n    }\n  }\n};\n\n/**\n * @brief Use low-level virtual memory functions from CUDA driver API for grow-only memory\n *        allocation.\n *\n * @url https://developer.nvidia.com/blog/introducing-low-level-gpu-virtual-memory-management/\n *\n * Aside from the potential performance benefits, this is primarily implemented to prevent\n * deadlock in NCCL and XGBoost. The host NUMA version requires CTK12.5+ to be stable.\n */\nclass GrowOnlyVirtualMemVec {\n  static auto RoundUp(std::size_t new_sz, std::size_t chunk_sz) {\n    return ((new_sz + chunk_sz - 1) / chunk_sz) * chunk_sz;\n  }\n\n  struct PhyAddrHandle {\n    CUmemGenericAllocationHandle handle;\n    std::size_t size;\n  };\n\n  class VaRange {\n    CUdeviceptr ptr_{0};\n    std::size_t size_{0};\n\n   public:\n    VaRange(std::size_t size, CUdeviceptr hint, CUresult *p_status, bool *failed) : size_{size} {\n      CUresult &status = *p_status;\n      status = xgboost::cudr::GetGlobalCuDriverApi().cuMemAddressReserve(&ptr_, size, 0, hint, 0);\n      *failed = status != CUDA_SUCCESS || (hint != 0 && ptr_ != hint);\n    }\n    ~VaRange() {\n      if (ptr_ != 0) {\n        xgboost::cudr::GetGlobalCuDriverApi().cuMemAddressFree(ptr_, this->size_);\n      }\n    }\n\n    VaRange(VaRange const &that) = delete;\n    VaRange &operator=(VaRange const &that) = delete;\n\n    VaRange(VaRange &&that) { std::swap(*this, that); }\n    VaRange &operator=(VaRange &&that) {\n      std::swap(*this, that);\n      return *this;\n    }\n    [[nodiscard]] auto DevPtr() const { return this->ptr_; }\n    [[nodiscard]] std::size_t Size() const { return this->size_; }\n  };\n\n  using PhyHandle = std::unique_ptr<PhyAddrHandle, std::function<void(PhyAddrHandle *)>>;\n  std::vector<PhyHandle> handles_;\n  std::vector<std::unique_ptr<VaRange>> va_ranges_;\n\n  xgboost::cudr::CuDriverApi &cu_{xgboost::cudr::GetGlobalCuDriverApi()};\n  std::vector<CUmemAccessDesc> access_desc_;\n  CUmemAllocationProp const prop_;\n\n  // Always use bytes.\n  std::size_t const granularity_;\n\n  [[nodiscard]] std::size_t PhyCapacity() const;\n  [[nodiscard]] CUdeviceptr DevPtr() const {\n    if (this->va_ranges_.empty()) {\n      return 0;\n    }\n    return this->va_ranges_.front()->DevPtr();\n  }\n  void MapBlock(CUdeviceptr ptr, PhyHandle const &hdl) const {\n    safe_cu(cu_.cuMemMap(ptr, hdl->size, 0, hdl->handle, 0));\n    safe_cu(cu_.cuMemSetAccess(ptr, hdl->size, access_desc_.data(), access_desc_.size()));\n  }\n  auto CreatePhysicalMem(std::size_t size) const {\n    CUmemGenericAllocationHandle alloc_handle;\n    auto padded_size = RoundUp(size, this->granularity_);\n    safe_cu(this->cu_.cuMemCreate(&alloc_handle, padded_size, &this->prop_, 0));\n    return alloc_handle;\n  }\n  void Reserve(std::size_t new_size);\n\n public:\n  explicit GrowOnlyVirtualMemVec(CUmemLocationType type);\n\n  void GrowTo(std::size_t n_bytes) {\n    auto alloc_size = this->PhyCapacity();\n    if (n_bytes <= alloc_size) {\n      return;\n    }\n\n    std::size_t delta = n_bytes - alloc_size;\n    auto const padded_delta = RoundUp(delta, this->granularity_);\n    this->Reserve(alloc_size + padded_delta);\n\n    this->handles_.emplace_back(\n        std::unique_ptr<PhyAddrHandle, std::function<void(PhyAddrHandle *)>>{\n            new PhyAddrHandle{this->CreatePhysicalMem(padded_delta), padded_delta}, [&](auto *hdl) {\n              if (hdl) {\n                cu_.cuMemRelease(hdl->handle);\n              }\n            }});\n    auto ptr = this->DevPtr() + alloc_size;\n    this->MapBlock(ptr, this->handles_.back());\n  }\n\n  template <typename T>\n  xgboost::common::Span<T> GetSpan(std::size_t size) {\n    size_t n_bytes = size * sizeof(T);\n    this->GrowTo(n_bytes);\n    return xgboost::common::Span<T>(reinterpret_cast<T *>(this->DevPtr()), size);\n  }\n\n  ~GrowOnlyVirtualMemVec() noexcept(false) {\n    if (this->DevPtr() != 0) {\n      safe_cu(cu_.cuMemUnmap(this->DevPtr(), this->PhyCapacity()));\n    }\n\n    this->va_ranges_.clear();  // make sure all VA are freed before releasing the handles.\n    this->handles_.clear();    // release the handles\n  }\n\n  [[nodiscard]] void *data() { return reinterpret_cast<void *>(this->DevPtr()); }  // NOLINT\n  [[nodiscard]] std::size_t size() const { return this->PhyCapacity(); }           // NOLINT\n  [[nodiscard]] std::size_t Capacity() const;\n};\n}  // namespace detail\n\ninline detail::MemoryLogger &GlobalMemoryLogger() {\n  static detail::MemoryLogger memory_logger;\n  return memory_logger;\n}\n\nnamespace detail {\n#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1\n\ntemplate <typename T>\nclass ThrustAllocMrAdapter : public rmm::mr::thrust_allocator<T> {\n public:\n  template <typename U>\n  struct rebind {                           // NOLINT(readability-identifier-naming)\n    using other = ThrustAllocMrAdapter<U>;  // NOLINT(readability-identifier-naming)\n  };\n\n\n  ThrustAllocMrAdapter()\n      : rmm::mr::thrust_allocator<T>{\n            rmm::cuda_stream_view{cudaStream_t{xgboost::curt::DefaultStream()}}} {};\n};\n\ntemplate <typename T>\nusing XGBBaseDeviceAllocator = ThrustAllocMrAdapter<T>;\n\n#else  // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1\n\n/**\n * @brief Use CUDA async memory pool as an optional backing allocator.\n */\ntemplate <typename T>\nclass XGBAsyncPoolAllocator : public thrust::device_malloc_allocator<T> {\n#if !defined(xgboost_IS_WIN)\n  // MSVC/NVCC optimizes this variable away, as a result, we disable the async pool\n  // entirely on Windows.\n  std::int32_t use_async_pool_;\n#endif\n\n public:\n  using Super = thrust::device_malloc_allocator<T>;\n  using pointer = typename Super::pointer;      // NOLINT(readability-identifier-naming)\n  using size_type = typename Super::size_type;  // NOLINT(readability-identifier-naming)\n\n#if defined(xgboost_IS_WIN)\n  XGBAsyncPoolAllocator() = default;\n#else\n  XGBAsyncPoolAllocator()\n      : use_async_pool_{::xgboost::GlobalConfigThreadLocalStore::Get()->use_cuda_async_pool} {}\n#endif\n\n  template <typename U>\n  struct rebind {                            // NOLINT(readability-identifier-naming)\n    using other = XGBAsyncPoolAllocator<U>;  // NOLINT(readability-identifier-naming)\n  };\n\n  pointer allocate(std::size_t n) {  // NOLINT\n#if defined(xgboost_IS_WIN)\n    return Super::allocate(n);\n#else\n    if (!this->use_async_pool_) {\n      return Super::allocate(n);\n    }\n\n    T *raw_ptr = nullptr;\n    auto n_bytes = xgboost::common::SizeBytes<T>(n);\n    safe_cuda(cudaMallocAsync(&raw_ptr, n_bytes, xgboost::curt::DefaultStream()));\n    return thrust::device_pointer_cast(raw_ptr);\n#endif\n  }\n\n  void deallocate(pointer ptr, std::size_t n) {  // NOLINT\n#if defined(xgboost_IS_WIN)\n    return Super::deallocate(ptr, n);\n#else\n    if (!this->use_async_pool_) {\n      return Super::deallocate(ptr, n);\n    }\n\n    safe_cuda(cudaFreeAsync(thrust::raw_pointer_cast(ptr), xgboost::curt::DefaultStream()));\n#endif\n  }\n\n  // Used for tests.\n  void SetAsync(bool use_async_pool) {\n#if !defined(xgboost_IS_WIN)\n    this->use_async_pool_ = use_async_pool;\n#endif\n  }\n};\n\ntemplate <typename T>\nusing XGBBaseDeviceAllocator = XGBAsyncPoolAllocator<T>;\n#endif  // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1\n\n/**\n * @brief Default memory allocator, uses cudaMalloc/Free and logs allocations if verbose.\n */\ntemplate <class T>\nstruct XGBDefaultDeviceAllocatorImpl : public XGBBaseDeviceAllocator<T> {\n  using SuperT = XGBBaseDeviceAllocator<T>;\n  using pointer = thrust::device_ptr<T>;  // NOLINT\n\n  template <typename U>\n  struct rebind  // NOLINT\n  {\n    using other = XGBDefaultDeviceAllocatorImpl<U>;  // NOLINT\n  };\n\n  pointer allocate(std::size_t n) {  // NOLINT\n    pointer ptr;\n    try {\n      ptr = SuperT::allocate(n);\n      dh::safe_cuda(cudaGetLastError());\n    } catch (const std::exception &e) {\n      detail::ThrowOOMError(e.what(), n * sizeof(T));\n    }\n    GlobalMemoryLogger().RegisterAllocation(n * sizeof(T));\n    return ptr;\n  }\n\n  void deallocate(pointer ptr, std::size_t n) {  // NOLINT\n    GlobalMemoryLogger().RegisterDeallocation(n * sizeof(T));\n    SuperT::deallocate(ptr, n);\n  }\n\n  XGBDefaultDeviceAllocatorImpl() : SuperT{} {}\n};\n\n/**\n * @brief Caching memory allocator, uses cub::CachingDeviceAllocator as a back-end, unless\n *        RMM pool allocator is enabled. Does not initialise memory on construction.\n */\ntemplate <class T>\nstruct XGBCachingDeviceAllocatorImpl : public XGBBaseDeviceAllocator<T> {\n  using SuperT = XGBBaseDeviceAllocator<T>;\n  using pointer = thrust::device_ptr<T>;  // NOLINT\n  template <typename U>\n  struct rebind  // NOLINT\n  {\n    using other = XGBCachingDeviceAllocatorImpl<U>;  // NOLINT\n  };\n\n  static cub::CachingDeviceAllocator &GetGlobalCachingAllocator() {\n    // Configure allocator with maximum cached bin size of ~1GB and no limit on\n    // maximum cached bytes\n    thread_local std::unique_ptr<cub::CachingDeviceAllocator> allocator{\n        std::make_unique<cub::CachingDeviceAllocator>(2, 9, 29)};\n    return *allocator;\n  }\n\n  pointer allocate(std::size_t n) {  // NOLINT\n    pointer thrust_ptr;\n    if (use_cub_allocator_) {\n      T *raw_ptr{nullptr};\n      // NOLINTBEGIN(clang-analyzer-unix.BlockInCriticalSection)\n      auto errc = GetGlobalCachingAllocator().DeviceAllocate(reinterpret_cast<void **>(&raw_ptr),\n                                                             n * sizeof(T));\n      // NOLINTEND(clang-analyzer-unix.BlockInCriticalSection)\n      if (errc != cudaSuccess) {\n        detail::ThrowOOMError(\"Caching allocator\", n * sizeof(T));\n      }\n      thrust_ptr = thrust::device_pointer_cast(raw_ptr);\n    } else {\n      try {\n        thrust_ptr = SuperT::allocate(n);\n        dh::safe_cuda(cudaGetLastError());\n      } catch (const std::exception &e) {\n        detail::ThrowOOMError(e.what(), n * sizeof(T));\n      }\n    }\n    GlobalMemoryLogger().RegisterAllocation(n * sizeof(T));\n    return thrust_ptr;\n  }\n\n  void deallocate(pointer ptr, std::size_t n) {  // NOLINT\n    if (use_cub_allocator_) {\n      GetGlobalCachingAllocator().DeviceFree(thrust::raw_pointer_cast(ptr));\n    } else {\n      SuperT::deallocate(ptr, n);\n    }\n    GlobalMemoryLogger().RegisterDeallocation(n * sizeof(T));\n  }\n\n  XGBCachingDeviceAllocatorImpl()\n      : SuperT{},\n        use_cub_allocator_{!(xgboost::GlobalConfigThreadLocalStore::Get()->use_rmm ||\n                             xgboost::GlobalConfigThreadLocalStore::Get()->use_cuda_async_pool)} {}\n\n  XGBOOST_DEVICE void construct(T *) {}  // NOLINT\n\n private:\n  bool use_cub_allocator_;\n};\n}  // namespace detail\n\n// Declare xgboost allocators\n// Replacement of allocator with custom backend should occur here\ntemplate <typename T>\nusing XGBDeviceAllocator = detail::XGBDefaultDeviceAllocatorImpl<T>;\n\n/** Be careful that the initialization constructor is a no-op, which means calling\n *  `vec.resize(n)` won't initialize the memory region to 0. Instead use\n * `vec.resize(n, 0)`\n */\ntemplate <typename T>\nusing XGBCachingDeviceAllocator = detail::XGBCachingDeviceAllocatorImpl<T>;\n\n/** @brief Specialisation of thrust device vector using custom allocator. In addition, it catches\n *         OOM errors.\n */\ntemplate <typename T>\nusing device_vector = thrust::device_vector<T, XGBDeviceAllocator<T>>;  // NOLINT\ntemplate <typename T>\nusing caching_device_vector = thrust::device_vector<T, XGBCachingDeviceAllocator<T>>;  // NOLINT\n\n/**\n * @brief Container class that doesn't initialize the data.\n */\ntemplate <typename T, bool is_caching>\nclass DeviceUVectorImpl {\n private:\n  using Alloc =\n      std::conditional_t<is_caching, dh::XGBCachingDeviceAllocator<T>, dh::XGBDeviceAllocator<T>>;\n  Alloc alloc_;\n\n  std::size_t size_{0};\n  std::size_t capacity_{0};\n  std::unique_ptr<T, std::function<void(T *)>> data_;\n\n public:\n  using value_type = T;                        // NOLINT\n  using pointer = value_type *;                // NOLINT\n  using const_pointer = value_type const *;    // NOLINT\n  using reference = value_type &;              // NOLINT\n  using const_reference = value_type const &;  // NOLINT\n\n public:\n  DeviceUVectorImpl() = default;\n  explicit DeviceUVectorImpl(std::size_t n) { this->resize(n); }\n  DeviceUVectorImpl(DeviceUVectorImpl const &that) = delete;\n  DeviceUVectorImpl &operator=(DeviceUVectorImpl const &that) = delete;\n  DeviceUVectorImpl(DeviceUVectorImpl &&that) = default;\n  DeviceUVectorImpl &operator=(DeviceUVectorImpl &&that) = default;\n\n  [[nodiscard]] std::size_t Capacity() const { return this->capacity_; }\n\n  // Resize without init.\n  void resize(std::size_t n) {  // NOLINT\n    using ::xgboost::common::SizeBytes;\n\n    if (n <= this->Capacity()) {\n      this->size_ = n;\n      // early exit as no allocation is needed.\n      return;\n    }\n    CHECK_LE(this->size(), this->Capacity());\n\n    Alloc alloc = this->alloc_;\n    decltype(data_) new_ptr{thrust::raw_pointer_cast(this->alloc_.allocate(n)),\n                            [=](T *ptr) mutable {\n                              if (ptr) {\n                                alloc.deallocate(thrust::device_pointer_cast(ptr), n);\n                              }\n                            }};\n    CHECK(new_ptr.get());\n\n    auto s = ::xgboost::curt::DefaultStream();\n    safe_cuda(cudaMemcpyAsync(new_ptr.get(), this->data(), SizeBytes<T>(this->size()),\n                              cudaMemcpyDefault, s));\n    this->size_ = n;\n    this->capacity_ = n;\n\n    this->data_ = std::move(new_ptr);\n    // swap failed with CTK12.8\n    // std::swap(this->data_, new_ptr);\n  }\n  // Resize with init\n  void resize(std::size_t n, T const &v) {  // NOLINT\n    auto orig = this->size();\n    this->resize(n);\n    if (orig < n) {\n      auto exec = thrust::cuda::par_nosync.on(::xgboost::curt::DefaultStream());\n      thrust::fill(exec, this->begin() + orig, this->end(), v);\n    }\n  }\n\n  void clear() {  // NOLINT\n    this->resize(0);\n  }\n\n  [[nodiscard]] std::size_t size() const { return this->size_; }  // NOLINT\n  [[nodiscard]] bool empty() const { return this->size() == 0; }  // NOLINT\n\n  [[nodiscard]] auto begin() { return this->data(); }               // NOLINT\n  [[nodiscard]] auto end() { return this->data() + this->size(); }  // NOLINT\n\n  [[nodiscard]] auto begin() const { return this->cbegin(); }  // NOLINT\n  [[nodiscard]] auto end() const { return this->cend(); }      // NOLINT\n\n  [[nodiscard]] auto cbegin() const { return this->data(); }               // NOLINT\n  [[nodiscard]] auto cend() const { return this->data() + this->size(); }  // NOLINT\n\n  [[nodiscard]] auto data() { return this->data_.get(); }        // NOLINT\n  [[nodiscard]] auto data() const { return this->data_.get(); }  // NOLINT\n};\n\ntemplate <typename T>\nusing DeviceUVector = DeviceUVectorImpl<T, false>;\n\ntemplate <typename T>\nusing CachingDeviceUVector = DeviceUVectorImpl<T, true>;\n}  // namespace dh\n"
  },
  {
    "path": "src/common/error_msg.cc",
    "content": "/**\n * Copyright 2023-2025, XGBoost contributors\n */\n#include \"error_msg.h\"\n\n#include <mutex>         // for call_once, once_flag\n#include <sstream>       // for stringstream\n#include <system_error>  // for error_code, system_category\n\n#include \"../collective/communicator-inl.h\"  // for GetRank\n#include \"xgboost/collective/socket.h\"       // for LastError\n#include \"xgboost/context.h\"                 // for Context\n#include \"xgboost/logging.h\"\n\nnamespace xgboost::error {\n[[nodiscard]] std::string DeprecatedFunc(StringView old, StringView since, StringView replacement) {\n  std::stringstream ss;\n  ss << \"`\" << old << \"` is deprecated since\" << since << \", use `\" << replacement << \"` instead.\";\n  return ss.str();\n}\n\n[[nodiscard]] std::string InvalidModel(StringView fname) {\n  std::stringstream ss;\n  ss << \"Invalid model format in: `\" << fname << \"`.\";\n  return ss.str();\n}\n\n[[nodiscard]] std::string OldBinaryModel(StringView fname) {\n  std::stringstream ss;\n  ss << \"Failed to load model: `\" << fname << \"`. \";\n  ss << R\"doc(\nThe binary format has been deprecated in 1.6 and removed in 3.1, use UBJ or JSON\ninstead. You can port the binary model to UBJ and JSON by re-saving it with XGBoost\n3.0. See:\n\n    https://xgboost.readthedocs.io/en/stable/tutorials/saving_model.html\n\nfor more info.\n)doc\";\n  return ss.str();\n}\n\nvoid WarnManualUpdater() {\n  static std::once_flag flag;\n  std::call_once(flag, [] {\n    LOG(WARNING)\n        << \"You have manually specified the `updater` parameter. The `tree_method` parameter \"\n           \"will be ignored. Incorrect sequence of updaters will produce undefined \"\n           \"behavior. For common uses, we recommend using `tree_method` parameter instead.\";\n  });\n}\n\nvoid WarnEmptyDataset() {\n  static std::once_flag flag;\n  std::call_once(flag,\n                 [] { LOG(WARNING) << \"Empty dataset at worker: \" << collective::GetRank(); });\n}\n\nvoid MismatchedDevices(Context const* booster, Context const* data) {\n  static std::once_flag flag;\n  std::call_once(flag, [&] {\n    LOG(WARNING)\n        << \"Falling back to prediction using DMatrix due to mismatched devices. This might \"\n           \"lead to higher memory usage and slower performance. XGBoost is running on: \"\n        << booster->DeviceName() << \", while the input data is on: \" << data->DeviceName() << \".\\n\"\n        << R\"(Potential solutions:\n- Use a data structure that matches the device ordinal in the booster.\n- Set the device for booster before call to inplace_predict.\n\nThis warning will only be shown once.\n)\";\n  });\n}\n\nvoid CheckOldNccl(std::int32_t major, std::int32_t minor, std::int32_t patch) {\n  auto msg = [&] {\n    std::stringstream ss;\n    ss << \"NCCL version too old: \" << \"(\" << major << \".\" << minor << \".\" << patch << \")\"\n       << \". Install NCCL >= 2.23.4 .\";\n    return ss.str();\n  };\n\n  // Minimum required version.\n  CHECK_GE(major, 2) << msg();\n  CHECK_GE(minor, 21) << msg();\n\n  // With 2.23.4+, we can abort the NCCL communicator after timeout.\n  if (minor < 23) {\n    LOG(WARNING) << msg();\n  }\n}\n\n[[nodiscard]] std::error_code SystemError() {\n  std::int32_t errsv = system::LastError();\n  auto err = std::error_code{errsv, std::system_category()};\n  return err;\n}\n\nvoid InvalidIntercept(std::int32_t n_classes, bst_target_t n_targets, std::size_t intercept_len) {\n  std::stringstream ss;\n  ss << \"Invalid `base_score`, it should match the number of outputs for multi-class/target \"\n     << \"models. `base_score` len: \" << intercept_len;\n  if (n_classes > 1) {\n    ss << \", `n_classes`: \" << n_classes;\n  }\n  if (n_targets > 1) {\n    ss << \", `n_targets`: \" << n_targets;\n  }\n  LOG(FATAL) << ss.str();\n}\n}  // namespace xgboost::error\n"
  },
  {
    "path": "src/common/error_msg.h",
    "content": "/**\n * Copyright 2023-2026, XGBoost contributors\n *\n * \\brief Common error message for various checks.\n */\n#ifndef XGBOOST_COMMON_ERROR_MSG_H_\n#define XGBOOST_COMMON_ERROR_MSG_H_\n\n#include <cstdint>       // for uint64_t\n#include <limits>        // for numeric_limits\n#include <string>        // for string\n#include <system_error>  // for error_code\n\n#include \"xgboost/base.h\"     // for bst_feature_t\n#include \"xgboost/context.h\"  // for Context\n#include \"xgboost/logging.h\"\n#include \"xgboost/string_view.h\"  // for StringView\n\nnamespace xgboost::error {\nconstexpr StringView GroupWeight() {\n  return \"Size of weight must equal to the number of query groups when ranking group is used.\";\n}\n\nconstexpr StringView GroupSize() {\n  return \"Invalid query group structure. The number of rows obtained from group doesn't equal to \";\n}\n\nconstexpr StringView LabelScoreSize() {\n  return \"The size of label doesn't match the size of prediction.\";\n}\n\nconstexpr StringView InfInData() {\n  return \"Input data contains `inf` or a value too large, while `missing` is not set to `inf`\";\n}\n\nconstexpr StringView NoF128() {\n  return \"128-bit floating point is not supported on current platform.\";\n}\n\nconstexpr StringView InconsistentMaxBin() {\n  return \"Inconsistent `max_bin`. `max_bin` should be the same across different QuantileDMatrix, \"\n         \"and consistent with the Booster being trained.\";\n}\n\nconstexpr StringView InvalidMaxBin() { return \"`max_bin` must be equal to or greater than 2.\"; }\n\nconstexpr StringView UnknownDevice() { return \"Unknown device type.\"; }\n\ninline void MaxFeatureSize(std::uint64_t n_features) {\n  auto max_n_features = std::numeric_limits<bst_feature_t>::max();\n  CHECK_LE(n_features, max_n_features)\n      << \"Unfortunately, XGBoost does not support data matrices with \"\n      << std::numeric_limits<bst_feature_t>::max() << \" features or greater\";\n}\n\nconstexpr StringView InplacePredictProxy() {\n  return \"Inplace predict accepts only DMatrixProxy as input.\";\n}\n\ninline void MaxSampleSize(std::size_t n) {\n  LOG(FATAL) << \"Sample size too large for the current updater. Maximum number of samples:\" << n\n             << \". Consider using a different updater or tree_method.\";\n}\n\nconstexpr StringView OldSerialization() {\n  return R\"doc(If you are loading a serialized model (like pickle in Python, RDS in R) or\nconfiguration generated by an older version of XGBoost, please export the model by calling\n`Booster.save_model` from that version first, then load it back in current version. See:\n\n    https://xgboost.readthedocs.io/en/stable/tutorials/saving_model.html\n\nfor more details about differences between saving model and serializing.\n)doc\";\n}\n\ninline void WarnOldSerialization() {\n  // Display it once is enough. Otherwise this can be really verbose in distributed\n  // environments.\n  static thread_local bool logged{false};\n  if (logged) {\n    return;\n  }\n  LOG(WARNING) << OldSerialization();\n  logged = true;\n}\n\n[[nodiscard]] std::string InvalidModel(StringView fname);\n\n[[nodiscard]] std::string OldBinaryModel(StringView fname);\n\nvoid WarnManualUpdater();\n\nvoid WarnEmptyDataset();\n\n[[nodiscard]] std::string DeprecatedFunc(StringView old, StringView since, StringView replacement);\n\nconstexpr StringView InvalidCUDAOrdinal() {\n  return \"Invalid device. `device` is required to be CUDA and there must be at least one GPU \"\n         \"available for using GPU.\";\n}\n\nvoid MismatchedDevices(Context const* booster, Context const* data);\n\ninline auto NoFederated() { return \"XGBoost is not compiled with federated learning support.\"; }\n\ninline auto NoCategorical(std::string name) {\n  return name + \" doesn't support categorical features.\";\n}\n\nconstexpr StringView InconsistentFeatureTypes() {\n  return \"Inconsistent feature types between batches.\";\n}\n\nconstexpr StringView InconsistentCategories() {\n  return \"Inconsistent number of categories between batches.\";\n}\n\nvoid CheckOldNccl(std::int32_t major, std::int32_t minor, std::int32_t patch);\n\nconstexpr StringView ZeroCudaMemory() {\n  return \"No GPU memory is left, are you using RMM? If so, please install XGBoost with RMM \"\n         \"support. If you are using other types of memory pool, please consider reserving a \"\n         \"portion of the GPU memory for XGBoost.\";\n}\n\n// float64 is not supported by JSON yet. Also, floating point as categories is tricky\n// since floating point equality test is inaccurate for most hardware.\nconstexpr StringView NoFloatCat() {\n  return \"Category index from DataFrame has floating point dtype, consider using strings or \"\n         \"integers instead.\";\n}\n\nconstexpr StringView CacheHostRatioNotImpl() {\n  return \"`cache_host_ratio` is only used by the GPU `ExtMemQuantileDMatrix`.\";\n}\nconstexpr StringView CacheHostRatioInvalid() {\n  return \"`cache_host_ratio` must be in range [0, 1].\";\n}\n\n[[nodiscard]] std::error_code SystemError();\n\nvoid InvalidIntercept(std::int32_t n_classes, bst_target_t n_targets, std::size_t intercept_len);\n\ninline void Unreachable() { LOG(FATAL) << \"Unreachable\"; }\n}  // namespace xgboost::error\n#endif  // XGBOOST_COMMON_ERROR_MSG_H_\n"
  },
  {
    "path": "src/common/expectile_loss_utils.cc",
    "content": "/**\n * Copyright 2026, XGBoost contributors\n */\n#include \"expectile_loss_utils.h\"\n\nnamespace xgboost::common {\nDMLC_REGISTER_PARAMETER(ExpectileLossParam);\n}  // namespace xgboost::common\n"
  },
  {
    "path": "src/common/expectile_loss_utils.h",
    "content": "/**\n * Copyright 2026, XGBoost contributors\n */\n#ifndef XGBOOST_COMMON_EXPECTILE_LOSS_UTILS_H_\n#define XGBOOST_COMMON_EXPECTILE_LOSS_UTILS_H_\n\n#include <algorithm>  // for all_of\n\n#include \"param_array.h\"        // for ParamArray\n#include \"xgboost/logging.h\"    // CHECK\n#include \"xgboost/parameter.h\"  // XGBoostParameter\n\nnamespace xgboost::common {\nstruct ExpectileLossParam : public XGBoostParameter<ExpectileLossParam> {\n  ParamArray<float> expectile_alpha{\"expectile_alpha\"};\n  DMLC_DECLARE_PARAMETER(ExpectileLossParam) {\n    DMLC_DECLARE_FIELD(expectile_alpha)\n        .describe(\"List of expectiles for expectile loss.\")\n        .set_default(ParamArray<float>{\"expectile_alpha\"});\n  }\n  void Validate() const {\n    CHECK(GetInitialised());\n    CHECK(!expectile_alpha.Get().empty());\n    auto const& array = expectile_alpha.Get();\n    auto valid =\n        std::all_of(array.cbegin(), array.cend(), [](auto q) { return q >= 0.0 && q <= 1.0; });\n    CHECK(valid) << \"expectile alpha must be in the range [0.0, 1.0].\";\n  }\n};\n}  // namespace xgboost::common\n#endif  // XGBOOST_COMMON_EXPECTILE_LOSS_UTILS_H_\n"
  },
  {
    "path": "src/common/group_data.h",
    "content": "/*!\n * Copyright 2014-2021 by Contributors\n * \\file group_data.h\n * \\brief this file defines utils to group data by integer keys\n *     Input: given input sequence (key,value), (k1,v1), (k2,v2)\n *     Ouptupt: an array of values data = [v1,v2,v3 .. vn]\n *              and a group pointer ptr,\n *              data[ptr[k]:ptr[k+1]] contains values that corresponds to key k\n *\n * This can be used to construct CSR/CSC matrix from un-ordered input\n * The major algorithm is a two pass linear scan algorithm that requires two pass scan over the data\n * \\author Tianqi Chen\n */\n#ifndef XGBOOST_COMMON_GROUP_DATA_H_\n#define XGBOOST_COMMON_GROUP_DATA_H_\n\n#include <cstddef>\n#include <vector>\n#include <algorithm>\n#include <utility>\n\n#include \"xgboost/base.h\"\n\nnamespace xgboost {\nnamespace common {\n/*!\n * \\brief multi-thread version of group builder\n * \\tparam ValueType type of entries in the sparse matrix\n * \\tparam SizeType type of the index range holder\n * \\tparam is_row_major bool value helps to reduce memory for row major\n */\ntemplate<typename ValueType, typename SizeType = bst_ulong, bool is_row_major = false>\nclass ParallelGroupBuilder {\n public:\n  /**\n   * \\brief parallel group builder of data.\n   *\n   * \\param [in,out]  p_rptr          Row offsets for CSR matrix.\n   * \\param [in,out]  p_data          Data vector to populate\n   * \\param           base_row_offset (Optional) If the matrix we are building\n   * is already partially populated, use this to indicate the row index we are\n   * starting from. This saves considerable amounts of time/memory when\n   * incrementaly building.\n   */\n  ParallelGroupBuilder(std::vector<SizeType> *p_rptr,\n                       std::vector<ValueType> *p_data,\n                       size_t base_row_offset = 0)\n      : rptr_(*p_rptr),\n        data_(*p_data),\n        base_row_offset_(base_row_offset) {}\n\n  /*!\n   * \\brief step 1: initialize the helper, with hint of number keys\n   *                and thread used in the construction\n   * \\param max_key number of keys in the matrix, can be smaller than expected,\n   *                for row major adapter max_key is equal to batch size\n   * \\param nthread number of thread that will be used in construction\n   */\n  void InitBudget(std::size_t max_key, int nthread) {\n    thread_rptr_.resize(nthread);\n    const size_t full_size = is_row_major ? max_key : max_key - std::min(base_row_offset_, max_key);\n    thread_displacement_ = is_row_major ? full_size / nthread : 0;\n    for (std::size_t i = 0; i < thread_rptr_.size() - 1; ++i) {\n      const size_t thread_size = is_row_major ? thread_displacement_ : full_size;\n      thread_rptr_[i].resize(thread_size, 0);\n    }\n    const size_t last_thread_size = is_row_major ? (full_size - (nthread - 1)*thread_displacement_)\n                                                 : full_size;\n    thread_rptr_[nthread - 1].resize(last_thread_size, 0);\n  }\n\n  /*!\n   * \\brief step 2: add budget to each key\n   * \\param key the key\n   * \\param threadid the id of thread that calls this function\n   * \\param nelem number of element budget add to this row\n   */\n  void AddBudget(std::size_t key, int threadid, SizeType nelem = 1) {\n    std::vector<SizeType> &trptr = thread_rptr_[threadid];\n    size_t offset_key = is_row_major ? (key - base_row_offset_ - threadid*thread_displacement_)\n                                     : (key - base_row_offset_);\n    if (trptr.size() < offset_key + 1) {\n      trptr.resize(offset_key + 1, 0);\n    }\n    trptr[offset_key] += nelem;\n  }\n\n  /*! \\brief step 3: initialize the necessary storage */\n  inline void InitStorage() {\n    if (is_row_major) {\n      size_t expected_rows = 0;\n      for (std::size_t tid = 0; tid < thread_rptr_.size(); ++tid) {\n        expected_rows += thread_rptr_[tid].size();\n      }\n      // initialize rptr to be beginning of each segment\n      SizeType rptr_fill_value = rptr_.empty() ? 0 : rptr_.back();\n      rptr_.resize(expected_rows + base_row_offset_ + 1, rptr_fill_value);\n\n      std::size_t count = 0;\n      size_t offset_idx = base_row_offset_ + 1;\n      for (std::size_t tid = 0; tid < thread_rptr_.size(); ++tid) {\n        std::vector<SizeType> &trptr = thread_rptr_[tid];\n        for (std::size_t i = 0; i < trptr.size(); ++i) {\n          std::size_t thread_count = trptr[i];  // how many entries in this row\n          trptr[i] = count + rptr_fill_value;\n          count += thread_count;\n          if (offset_idx < rptr_.size()) {\n            rptr_[offset_idx++] += count;\n          }\n        }\n      }\n      data_.resize(rptr_.back());  // usage of empty allocator can help to improve performance\n    } else {\n      // set rptr to correct size\n      SizeType rptr_fill_value = rptr_.empty() ? 0 : rptr_.back();\n      for (std::size_t tid = 0; tid < thread_rptr_.size(); ++tid) {\n        if (rptr_.size() <= thread_rptr_[tid].size() + base_row_offset_) {\n          rptr_.resize(thread_rptr_[tid].size() + base_row_offset_ + 1,\n                       rptr_fill_value);  // key + 1\n        }\n      }\n      // initialize rptr to be beginning of each segment\n      std::size_t count = 0;\n      for (std::size_t i = base_row_offset_; i + 1 < rptr_.size(); ++i) {\n        for (std::size_t tid = 0; tid < thread_rptr_.size(); ++tid) {\n          std::vector<SizeType> &trptr = thread_rptr_[tid];\n          if (i < trptr.size() +\n                      base_row_offset_) {  // i^th row is assigned for this thread\n            std::size_t thread_count =\n                trptr[i - base_row_offset_];  // how many entries in this row\n            trptr[i - base_row_offset_] = count + rptr_.back();\n            count += thread_count;\n          }\n        }\n        rptr_[i + 1] += count;  // pointer accumulated from all thread\n      }\n      data_.resize(rptr_.back());\n    }\n  }\n\n  /*!\n   * \\brief step 4: add data to the allocated space,\n   *   the calls to this function should be exactly match previous call to AddBudget\n   *\n   * \\param key the key of group.\n   * \\param value The value to be pushed to the group.\n   * \\param threadid the id of thread that calls this function\n   */\n  void Push(std::size_t key, ValueType&& value, int threadid) {\n    size_t offset_key = is_row_major ? (key - base_row_offset_ - threadid * thread_displacement_)\n                                     : (key - base_row_offset_);\n    SizeType &rp = thread_rptr_[threadid][offset_key];\n    data_[rp++] = std::move(value);\n  }\n\n private:\n  /*! \\brief pointer to the beginning and end of each continuous key */\n  std::vector<SizeType> &rptr_;\n  /*! \\brief index of nonzero entries in each row */\n  std::vector<ValueType> &data_;\n  /*! \\brief thread local data structure */\n  std::vector<std::vector<SizeType> > thread_rptr_;\n  /** \\brief Used when rows being pushed into the builder are strictly above some number. */\n  size_t base_row_offset_;\n  /** \\brief Used for row major adapters to handle reduced thread local memory allocation */\n  size_t thread_displacement_;\n};\n}  // namespace common\n}  // namespace xgboost\n#endif  // XGBOOST_COMMON_GROUP_DATA_H_\n"
  },
  {
    "path": "src/common/hist_util.cc",
    "content": "/**\n * Copyright 2017-2025, XGBoost Contributors\n * \\file hist_util.cc\n */\n#include \"hist_util.h\"\n\n#include <dmlc/timer.h>\n\n#include <vector>\n\n#include \"../data/adapter.h\"         // for SparsePageAdapterBatch\n#include \"../data/gradient_index.h\"  // for GHistIndexMatrix\n#include \"io.h\"                      // for AlignedResourceReadStream, AlignedFileWriteStream\n#include \"quantile.h\"\n#include \"xgboost/base.h\"\n#include \"xgboost/context.h\"  // for Context\n#include \"xgboost/data.h\"     // for SparsePage, SortedCSCPage\n\n#if defined(XGBOOST_MM_PREFETCH_PRESENT)\n#include <xmmintrin.h>\n#define PREFETCH_READ_T0(addr) _mm_prefetch(reinterpret_cast<const char *>(addr), _MM_HINT_T0)\n#elif defined(XGBOOST_BUILTIN_PREFETCH_PRESENT)\n#define PREFETCH_READ_T0(addr) __builtin_prefetch(reinterpret_cast<const char *>(addr), 0, 3)\n#else  // no SW pre-fetching available; PREFETCH_READ_T0 is no-op\n#define PREFETCH_READ_T0(addr) \\\n  do {                         \\\n  } while (0)\n#endif  // defined(XGBOOST_MM_PREFETCH_PRESENT)\n\nnamespace xgboost::common {\nHistogramCuts::HistogramCuts(bst_feature_t n_features)\n    : cut_ptrs_(static_cast<std::size_t>(n_features) + 1, 0) {}\n\nvoid HistogramCuts::Save(common::AlignedFileWriteStream *fo) const {\n  auto const &ptrs = this->Ptrs();\n  CHECK_LE(Span{ptrs}.size_bytes(), WriteVec(fo, ptrs));\n  auto const &vals = this->Values();\n  CHECK_LE(Span{vals}.size_bytes(), WriteVec(fo, vals));\n  CHECK_GE(fo->Write(has_categorical_), sizeof(has_categorical_));\n  CHECK_GE(fo->Write(max_cat_), sizeof(max_cat_));\n}\n\n[[nodiscard]] HistogramCuts *HistogramCuts::Load(common::AlignedResourceReadStream *fi) {\n  auto p_cuts = new HistogramCuts{0};\n  CHECK(ReadVec(fi, &p_cuts->cut_ptrs_.HostVector()));\n  CHECK(ReadVec(fi, &p_cuts->cut_values_.HostVector()));\n  CHECK(fi->Read(&p_cuts->has_categorical_));\n  CHECK(fi->Read(&p_cuts->max_cat_));\n  return p_cuts;\n}\n\nHistogramCuts SketchOnDMatrix(Context const *ctx, DMatrix *m, bst_bin_t max_bins, bool use_sorted,\n                              Span<float const> hessian) {\n  auto const &info = m->Info();\n  auto n_threads = ctx->Threads();\n  std::vector<bst_idx_t> reduced(info.num_col_, 0);\n  for (auto const &page : m->GetBatches<SparsePage>()) {\n    auto const &entries_per_column =\n        CalcColumnSize(data::SparsePageAdapterBatch{page.GetView()}, info.num_col_, n_threads,\n                       [](auto) { return true; });\n    CHECK_EQ(entries_per_column.size(), info.num_col_);\n    for (size_t i = 0; i < entries_per_column.size(); ++i) {\n      reduced[i] += entries_per_column[i];\n    }\n  }\n\n  if (!use_sorted) {\n    HostSketchContainer container(ctx, max_bins, m->Info().feature_types.ConstHostSpan(), reduced,\n                                  HostSketchContainer::UseGroup(info));\n    for (auto const &page : m->GetBatches<SparsePage>()) {\n      container.PushRowPage(page, info, hessian);\n    }\n    return container.MakeCuts(ctx, m->Info());\n  } else {\n    HostSketchContainer container{ctx, max_bins, m->Info().feature_types.ConstHostSpan(), reduced,\n                                  HostSketchContainer::UseGroup(info)};\n    for (auto const &page : m->GetBatches<SortedCSCPage>(ctx)) {\n      container.PushColPage(page, info, hessian);\n    }\n    return container.MakeCuts(ctx, m->Info());\n  }\n}\n\n/*!\n * \\brief Increment hist as dst += add in range [begin, end)\n */\nvoid IncrementHist(GHistRow dst, ConstGHistRow add, std::size_t begin, std::size_t end) {\n  double *pdst = reinterpret_cast<double *>(dst.data());\n  const double *padd = reinterpret_cast<const double *>(add.data());\n\n  for (std::size_t i = 2 * begin; i < 2 * end; ++i) {\n    pdst[i] += padd[i];\n  }\n}\n\n/*!\n * \\brief Copy hist from src to dst in range [begin, end)\n */\nvoid CopyHist(GHistRow dst, const GHistRow src, size_t begin, size_t end) {\n  double *pdst = reinterpret_cast<double *>(dst.data());\n  const double *psrc = reinterpret_cast<const double *>(src.data());\n\n  for (size_t i = 2 * begin; i < 2 * end; ++i) {\n    pdst[i] = psrc[i];\n  }\n}\n\n/*!\n * \\brief Compute Subtraction: dst = src1 - src2 in range [begin, end)\n */\nvoid SubtractionHist(GHistRow dst, const GHistRow src1, const GHistRow src2, size_t begin,\n                     size_t end) {\n  double *pdst = reinterpret_cast<double *>(dst.data());\n  const double *psrc1 = reinterpret_cast<const double *>(src1.data());\n  const double *psrc2 = reinterpret_cast<const double *>(src2.data());\n\n  for (size_t i = 2 * begin; i < 2 * end; ++i) {\n    pdst[i] = psrc1[i] - psrc2[i];\n  }\n}\n\nstruct Prefetch {\n public:\n  static constexpr size_t kCacheLineSize = 64;\n  static constexpr size_t kPrefetchOffset = 10;\n\n private:\n  static constexpr size_t kNoPrefetchSize =\n      kPrefetchOffset + kCacheLineSize / sizeof(decltype(GHistIndexMatrix::row_ptr)::value_type);\n\n public:\n  static size_t NoPrefetchSize(size_t rows) { return std::min(rows, kNoPrefetchSize); }\n\n  template <typename T>\n  static constexpr size_t GetPrefetchStep() {\n    return Prefetch::kCacheLineSize / sizeof(T);\n  }\n};\n\nconstexpr size_t Prefetch::kNoPrefetchSize;\n\nstruct RuntimeFlags {\n  const bool first_page;\n  const bool read_by_column;\n  const BinTypeSize bin_type_size;\n};\n\ntemplate <bool _any_missing, bool _first_page = false, bool _read_by_column = false,\n          typename BinIdxTypeName = uint8_t>\nclass GHistBuildingManager {\n public:\n  constexpr static bool kAnyMissing = _any_missing;\n  constexpr static bool kFirstPage = _first_page;\n  constexpr static bool kReadByColumn = _read_by_column;\n  using BinIdxType = BinIdxTypeName;\n\n private:\n  template <bool new_first_page>\n  struct SetFirstPage {\n    using Type = GHistBuildingManager<kAnyMissing, new_first_page, kReadByColumn, BinIdxType>;\n  };\n\n  template <bool new_read_by_column>\n  struct SetReadByColumn {\n    using Type = GHistBuildingManager<kAnyMissing, kFirstPage, new_read_by_column, BinIdxType>;\n  };\n\n  template <typename NewBinIdxType>\n  struct SetBinIdxType {\n    using Type = GHistBuildingManager<kAnyMissing, kFirstPage, kReadByColumn, NewBinIdxType>;\n  };\n\n  using Type = GHistBuildingManager<kAnyMissing, kFirstPage, kReadByColumn, BinIdxType>;\n\n public:\n  /* Entry point to dispatcher\n   * This function check matching run time flags to compile time flags.\n   * In case of difference, it creates a Manager with different template parameters\n   *  and forward the call there.\n   */\n  template <typename Fn>\n  static void DispatchAndExecute(const RuntimeFlags &flags, Fn &&fn) {\n    if (flags.first_page != kFirstPage) {\n      SetFirstPage<true>::Type::DispatchAndExecute(flags, std::forward<Fn>(fn));\n    } else if (flags.read_by_column != kReadByColumn) {\n      SetReadByColumn<true>::Type::DispatchAndExecute(flags, std::forward<Fn>(fn));\n    } else if (flags.bin_type_size != sizeof(BinIdxType)) {\n      DispatchBinType(flags.bin_type_size, [&](auto t) {\n        using NewBinIdxType = decltype(t);\n        SetBinIdxType<NewBinIdxType>::Type::DispatchAndExecute(flags, std::forward<Fn>(fn));\n      });\n    } else {\n      fn(Type());\n    }\n  }\n};\n\ntemplate <bool do_prefetch, class BuildingManager>\nvoid RowsWiseBuildHistKernel(Span<GradientPair const> gpair, Span<bst_idx_t const> row_indices,\n                             const GHistIndexMatrix &gmat, GHistRow hist) {\n  constexpr bool kAnyMissing = BuildingManager::kAnyMissing;\n  constexpr bool kFirstPage = BuildingManager::kFirstPage;\n  using BinIdxType = typename BuildingManager::BinIdxType;\n\n  const size_t size = row_indices.size();\n  bst_idx_t const *rid = row_indices.data();\n  auto const *p_gpair = reinterpret_cast<const float *>(gpair.data());\n  const BinIdxType *gradient_index = gmat.index.data<BinIdxType>();\n\n  auto const &row_ptr = gmat.row_ptr.data();\n  auto base_rowid = gmat.base_rowid;\n  std::uint32_t const *offsets = gmat.index.Offset();\n  // There's no feature-based compression if missing value is present.\n  if (kAnyMissing) {\n    CHECK(!offsets);\n  } else {\n    CHECK(offsets);\n  }\n\n  auto get_row_ptr = [&](bst_idx_t ridx) {\n    return kFirstPage ? row_ptr[ridx] : row_ptr[ridx - base_rowid];\n  };\n  auto get_rid = [&](bst_idx_t ridx) {\n    return kFirstPage ? ridx : (ridx - base_rowid);\n  };\n\n  CHECK_NE(row_indices.size(), 0);\n  const size_t n_features =\n      get_row_ptr(row_indices.data()[0] + 1) - get_row_ptr(row_indices.data()[0]);\n  auto hist_data = reinterpret_cast<double *>(hist.data());\n  const uint32_t two{2};  // Each element from 'gpair' and 'hist' contains\n                          // 2 FP values: gradient and hessian.\n                          // So we need to multiply each row-index/bin-index by 2\n                          // to work with gradient pairs as a singe row FP array\n\n  for (std::size_t i = 0; i < size; ++i) {\n    const size_t icol_start = kAnyMissing ? get_row_ptr(rid[i]) : get_rid(rid[i]) * n_features;\n    const size_t icol_end = kAnyMissing ? get_row_ptr(rid[i] + 1) : icol_start + n_features;\n\n    const size_t row_size = icol_end - icol_start;\n    const size_t idx_gh = two * rid[i];\n\n    if (do_prefetch) {\n      const size_t icol_start_prefetch =\n          kAnyMissing ? get_row_ptr(rid[i + Prefetch::kPrefetchOffset])\n                      : get_rid(rid[i + Prefetch::kPrefetchOffset]) * n_features;\n      const size_t icol_end_prefetch = kAnyMissing\n                                           ? get_row_ptr(rid[i + Prefetch::kPrefetchOffset] + 1)\n                                           : icol_start_prefetch + n_features;\n\n      PREFETCH_READ_T0(p_gpair + two * rid[i + Prefetch::kPrefetchOffset]);\n      for (size_t j = icol_start_prefetch; j < icol_end_prefetch;\n           j += Prefetch::GetPrefetchStep<uint32_t>()) {\n        PREFETCH_READ_T0(gradient_index + j);\n      }\n    }\n    const BinIdxType *gr_index_local = gradient_index + icol_start;\n\n    // The trick with pgh_t buffer helps the compiler to generate faster binary.\n    const float pgh_t[] = {p_gpair[idx_gh], p_gpair[idx_gh + 1]};\n    for (size_t j = 0; j < row_size; ++j) {\n      const uint32_t idx_bin =\n          two * (static_cast<uint32_t>(gr_index_local[j]) + (kAnyMissing ? 0 : offsets[j]));\n      auto hist_local = hist_data + idx_bin;\n      *(hist_local) += pgh_t[0];\n      *(hist_local + 1) += pgh_t[1];\n    }\n  }\n}\n\ntemplate <class BuildingManager>\nvoid ColsWiseBuildHistKernel(Span<GradientPair const> gpair, Span<bst_idx_t const> row_indices,\n                             const GHistIndexMatrix &gmat, GHistRow hist) {\n  constexpr bool kAnyMissing = BuildingManager::kAnyMissing;\n  constexpr bool kFirstPage = BuildingManager::kFirstPage;\n  using BinIdxType = typename BuildingManager::BinIdxType;\n  const size_t size = row_indices.size();\n  bst_idx_t const *rid = row_indices.data();\n  auto const *pgh = reinterpret_cast<const float *>(gpair.data());\n  const BinIdxType *gradient_index = gmat.index.data<BinIdxType>();\n\n  auto const &row_ptr = gmat.row_ptr.data();\n  auto base_rowid = gmat.base_rowid;\n  const uint32_t *offsets = gmat.index.Offset();\n  auto get_row_ptr = [&](bst_idx_t ridx) {\n    return kFirstPage ? row_ptr[ridx] : row_ptr[ridx - base_rowid];\n  };\n  auto get_rid = [&](bst_idx_t ridx) {\n    return kFirstPage ? ridx : (ridx - base_rowid);\n  };\n\n  const size_t n_features = gmat.cut.Ptrs().size() - 1;\n  const size_t n_columns = n_features;\n  auto hist_data = reinterpret_cast<double *>(hist.data());\n  const uint32_t two{2};  // Each element from 'gpair' and 'hist' contains\n                          // 2 FP values: gradient and hessian.\n                          // So we need to multiply each row-index/bin-index by 2\n                          // to work with gradient pairs as a singe row FP array\n  for (size_t cid = 0; cid < n_columns; ++cid) {\n    const uint32_t offset = kAnyMissing ? 0 : offsets[cid];\n    for (size_t i = 0; i < size; ++i) {\n      const size_t row_id = rid[i];\n      const size_t icol_start = kAnyMissing ? get_row_ptr(row_id) : get_rid(row_id) * n_features;\n      const size_t icol_end = kAnyMissing ? get_row_ptr(rid[i] + 1) : icol_start + n_features;\n\n      if (cid < icol_end - icol_start) {\n        const BinIdxType *gr_index_local = gradient_index + icol_start;\n        const uint32_t idx_bin = two * (static_cast<uint32_t>(gr_index_local[cid]) + offset);\n        auto hist_local = hist_data + idx_bin;\n\n        const size_t idx_gh = two * row_id;\n        // The trick with pgh_t buffer helps the compiler to generate faster binary.\n        const float pgh_t[] = {pgh[idx_gh], pgh[idx_gh + 1]};\n        *(hist_local) += pgh_t[0];\n        *(hist_local + 1) += pgh_t[1];\n      }\n    }\n  }\n}\n\ntemplate <class BuildingManager>\nvoid BuildHistDispatch(Span<GradientPair const> gpair, Span<bst_idx_t const> row_indices,\n                       const GHistIndexMatrix &gmat, GHistRow hist) {\n  if (BuildingManager::kReadByColumn) {\n    ColsWiseBuildHistKernel<BuildingManager>(gpair, row_indices, gmat, hist);\n  } else {\n    const size_t nrows = row_indices.size();\n    const size_t no_prefetch_size = Prefetch::NoPrefetchSize(nrows);\n    // if need to work with all rows from bin-matrix (e.g. root node)\n    const bool contiguousBlock =\n        (row_indices.begin()[nrows - 1] - row_indices.begin()[0]) == (nrows - 1);\n\n    if (contiguousBlock) {\n      if (row_indices.empty()) {\n        return;\n      }\n      // contiguous memory access, built-in HW prefetching is enough\n      RowsWiseBuildHistKernel<false, BuildingManager>(gpair, row_indices, gmat, hist);\n    } else {\n      auto span1 = row_indices.subspan(0, row_indices.size() - no_prefetch_size);\n      if (!span1.empty()) {\n        RowsWiseBuildHistKernel<true, BuildingManager>(gpair, span1, gmat, hist);\n      }\n      // no prefetching to avoid loading extra memory\n      auto span2 = row_indices.subspan(row_indices.size() - no_prefetch_size);\n      if (!span2.empty()) {\n        RowsWiseBuildHistKernel<false, BuildingManager>(gpair, span2, gmat, hist);\n      }\n    }\n  }\n}\n\ntemplate <bool any_missing>\nvoid BuildHist(Span<GradientPair const> gpair, Span<bst_idx_t const> row_indices,\n               const GHistIndexMatrix &gmat, GHistRow hist, bool read_by_column) {\n  bool first_page = gmat.base_rowid == 0;\n  auto bin_type_size = gmat.index.GetBinTypeSize();\n\n  GHistBuildingManager<any_missing>::DispatchAndExecute(\n      {first_page, read_by_column, bin_type_size}, [&](auto t) {\n        using BuildingManager = decltype(t);\n        BuildHistDispatch<BuildingManager>(gpair, row_indices, gmat, hist);\n      });\n}\n\ntemplate void BuildHist<true>(Span<GradientPair const> gpair, Span<bst_idx_t const> row_indices,\n                              const GHistIndexMatrix &gmat, GHistRow hist, bool read_by_column);\n\ntemplate void BuildHist<false>(Span<GradientPair const> gpair, Span<bst_idx_t const> row_indices,\n                               const GHistIndexMatrix &gmat, GHistRow hist, bool read_by_column);\n}  // namespace xgboost::common\n"
  },
  {
    "path": "src/common/hist_util.cu",
    "content": "/**\n * Copyright 2018~2026, XGBoost contributors\n */\n#include <thrust/binary_search.h>\n#include <thrust/copy.h>\n#include <thrust/execution_policy.h>\n#include <thrust/functional.h>\n#include <thrust/iterator/counting_iterator.h>\n#include <thrust/iterator/discard_iterator.h>\n#include <thrust/iterator/transform_iterator.h>\n#include <thrust/reduce.h>\n#include <thrust/sort.h>\n#include <thrust/tuple.h>  // for tuple\n#include <xgboost/logging.h>\n\n#include <cstddef>  // for size_t\n#include <utility>\n#include <vector>\n\n#include \"categorical.h\"\n#include \"cuda_context.cuh\"  // for CUDAContext\n#include \"device_helpers.cuh\"\n#include \"hist_util.cuh\"\n#include \"hist_util.h\"\n#include \"quantile.h\"\n#include \"xgboost/host_device_vector.h\"\n\nnamespace xgboost::common {\nconstexpr float SketchContainer::kFactor;\n\nnamespace detail {\nsize_t RequiredSampleCutsPerColumn(int max_bins, size_t num_rows) {\n  double eps = 1.0 / (WQSketch::kFactor * max_bins);\n  size_t num_cuts = WQuantileSketch::LimitSizeLevel(num_rows, eps);\n  return std::min(num_cuts, num_rows);\n}\n\nsize_t RequiredSampleCuts(bst_idx_t num_rows, bst_feature_t num_columns, size_t max_bins,\n                          bst_idx_t nnz) {\n  auto per_column = RequiredSampleCutsPerColumn(max_bins, num_rows);\n  auto if_dense = num_columns * per_column;\n  auto result = std::min(nnz, if_dense);\n  return result;\n}\n\nsize_t RequiredMemory(bst_idx_t num_rows, bst_feature_t num_columns, size_t nnz, size_t num_bins,\n                      bool with_weights) {\n  size_t peak = 0;\n  // 0. Allocate cut pointer in quantile container by increasing: n_columns + 1\n  size_t total = (num_columns + 1) * sizeof(SketchContainer::OffsetT);\n  // 1. Copy and sort: 2 * bytes_per_element * shape\n  total += BytesPerElement(with_weights) * num_rows * num_columns;\n  peak = std::max(peak, total);\n  // 2. Deallocate bytes_per_element * shape due to reusing memory in sort.\n  total -= BytesPerElement(with_weights) * num_rows * num_columns / 2;\n  // 3. Allocate colomn size scan by increasing: n_columns + 1\n  total += (num_columns + 1) * sizeof(SketchContainer::OffsetT);\n  // 4. Allocate cut pointer by increasing: n_columns + 1\n  total += (num_columns + 1) * sizeof(SketchContainer::OffsetT);\n  // 5. Allocate cuts: assuming rows is greater than bins: n_columns * limit_size\n  total += RequiredSampleCuts(num_rows, num_bins, num_bins, nnz) * sizeof(SketchEntry);\n  // 6. Deallocate copied entries by reducing: bytes_per_element * shape.\n  peak = std::max(peak, total);\n  total -= (BytesPerElement(with_weights) * num_rows * num_columns) / 2;\n  // 7. Deallocate column size scan.\n  peak = std::max(peak, total);\n  total -= (num_columns + 1) * sizeof(SketchContainer::OffsetT);\n  // 8. Deallocate cut size scan.\n  total -= (num_columns + 1) * sizeof(SketchContainer::OffsetT);\n  // 9. Allocate final cut values and cut ptrs: std::min(rows, bins + 1) * n_columns +\n  //    n_columns + 1\n  total += std::min(num_rows, num_bins) * num_columns * sizeof(float);\n  total +=\n      (num_columns + 1) *\n      sizeof(std::remove_reference_t<decltype(std::declval<HistogramCuts>().Ptrs())>::value_type);\n  peak = std::max(peak, total);\n\n  return peak;\n}\n\nbst_idx_t SketchBatchNumElements(bst_idx_t sketch_batch_num_elements, SketchShape shape, int device,\n                                 size_t num_cuts, bool has_weight, std::size_t container_bytes) {\n  auto constexpr kIntMax = static_cast<std::size_t>(std::numeric_limits<std::int32_t>::max());\n\n  // Device available memory is not accurate when a memory pool is used.\n  auto avoid_estimation_with_pool = [&] {\n    (void)device;\n    double total_mem = curt::TotalMemory() - container_bytes;\n    double total_f32 = total_mem / sizeof(float);\n    double n_max_used_f32 = std::max(total_f32 / 8.0, 1.0);\n    if (shape.nnz > shape.Size()) {\n      // Unknown nnz\n      shape.nnz = shape.Size();\n    }\n    return std::min(static_cast<bst_idx_t>(n_max_used_f32), shape.nnz);\n  };\n\n#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1\n  // Early exit with RMM pool\n  return avoid_estimation_with_pool();\n#endif  // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1\n  // Early exit with CUDA async pool\n  if (GlobalConfigThreadLocalStore::Get()->use_cuda_async_pool) {\n    return avoid_estimation_with_pool();\n  }\n\n  (void)container_bytes;  // We known the remaining size when RMM is not used.\n  if (sketch_batch_num_elements == detail::UnknownSketchNumElements()) {\n    auto required_memory =\n        RequiredMemory(shape.n_samples, shape.n_features, shape.nnz, num_cuts, has_weight);\n    // use up to 80% of available space\n    auto avail = dh::AvailableMemory(device) * 0.8;\n    CHECK_GT(avail, 0) << error::ZeroCudaMemory();\n    if (required_memory > avail) {\n      sketch_batch_num_elements = avail / BytesPerElement(has_weight);\n    } else {\n      sketch_batch_num_elements = std::min(shape.Size(), shape.nnz);\n    }\n  }\n\n  return std::min(sketch_batch_num_elements, kIntMax);\n}\n\nvoid SortByWeight(Context const* ctx, dh::device_vector<float>* weights,\n                  dh::device_vector<Entry>* sorted_entries) {\n  // Sort both entries and wegihts.\n  auto cuctx = ctx->CUDACtx();\n  CHECK_EQ(weights->size(), sorted_entries->size());\n  thrust::sort_by_key(cuctx->TP(), sorted_entries->begin(), sorted_entries->end(), weights->begin(),\n                      detail::EntryCompareOp());\n\n  // Scan weights\n  thrust::inclusive_scan_by_key(\n      cuctx->CTP(), sorted_entries->begin(), sorted_entries->end(), weights->begin(),\n      weights->begin(),\n      [=] __device__(const Entry& a, const Entry& b) { return a.index == b.index; });\n}\n\nvoid RemoveDuplicatedCategories(Context const* ctx, MetaInfo const& info,\n                                Span<bst_idx_t> d_cuts_ptr,\n                                dh::device_vector<Entry>* p_sorted_entries,\n                                dh::device_vector<float>* p_sorted_weights,\n                                dh::caching_device_vector<size_t>* p_column_sizes_scan) {\n  info.feature_types.SetDevice(ctx->Device());\n  auto d_feature_types = info.feature_types.ConstDeviceSpan();\n  CHECK(!d_feature_types.empty());\n  auto& column_sizes_scan = *p_column_sizes_scan;\n  auto& sorted_entries = *p_sorted_entries;\n  // Removing duplicated entries in categorical features.\n\n  // We don't need to accumulate weight for duplicated entries as there's no weighted\n  // sketching for categorical features, the categories are the cut values.\n  dh::caching_device_vector<size_t> new_column_scan(column_sizes_scan.size());\n  std::size_t n_uniques{0};\n  if (p_sorted_weights) {\n    using Pair = thrust::tuple<Entry, float>;\n    auto d_sorted_entries = dh::ToSpan(sorted_entries);\n    auto d_sorted_weights = dh::ToSpan(*p_sorted_weights);\n    auto val_in_it = thrust::make_zip_iterator(d_sorted_entries.data(), d_sorted_weights.data());\n    auto val_out_it = thrust::make_zip_iterator(d_sorted_entries.data(), d_sorted_weights.data());\n    n_uniques =\n        dh::SegmentedUnique(ctx->CUDACtx()->CTP(), column_sizes_scan.data().get(),\n                            column_sizes_scan.data().get() + column_sizes_scan.size(), val_in_it,\n                            val_in_it + sorted_entries.size(), new_column_scan.data().get(),\n                            val_out_it, [=] __device__(Pair const& l, Pair const& r) {\n                              Entry const& le = thrust::get<0>(l);\n                              Entry const& re = thrust::get<0>(r);\n                              if (le.index == re.index && IsCat(d_feature_types, le.index)) {\n                                return le.fvalue == re.fvalue;\n                              }\n                              return false;\n                            });\n    p_sorted_weights->resize(n_uniques);\n  } else {\n    n_uniques = dh::SegmentedUnique(ctx->CUDACtx()->CTP(), column_sizes_scan.data().get(),\n                                    column_sizes_scan.data().get() + column_sizes_scan.size(),\n                                    sorted_entries.begin(), sorted_entries.end(),\n                                    new_column_scan.data().get(), sorted_entries.begin(),\n                                    [=] __device__(Entry const& l, Entry const& r) {\n                                      if (l.index == r.index) {\n                                        if (IsCat(d_feature_types, l.index)) {\n                                          return l.fvalue == r.fvalue;\n                                        }\n                                      }\n                                      return false;\n                                    });\n  }\n  sorted_entries.resize(n_uniques);\n\n  // Renew the column scan and cut scan based on categorical data.\n  dh::caching_device_vector<SketchContainer::OffsetT> new_cuts_size(info.num_col_ + 1);\n  CHECK_EQ(new_column_scan.size(), new_cuts_size.size());\n  dh::LaunchN(new_column_scan.size(), ctx->CUDACtx()->Stream(),\n              [=, d_new_cuts_size = dh::ToSpan(new_cuts_size),\n               d_old_column_sizes_scan = dh::ToSpan(column_sizes_scan),\n               d_new_columns_ptr = dh::ToSpan(new_column_scan)] __device__(size_t idx) {\n                d_old_column_sizes_scan[idx] = d_new_columns_ptr[idx];\n                if (idx == d_new_columns_ptr.size() - 1) {\n                  return;\n                }\n                if (IsCat(d_feature_types, idx)) {\n                  // Cut size is the same as number of categories in input.\n                  d_new_cuts_size[idx] = d_new_columns_ptr[idx + 1] - d_new_columns_ptr[idx];\n                } else {\n                  d_new_cuts_size[idx] = d_cuts_ptr[idx + 1] - d_cuts_ptr[idx];\n                }\n              });\n  // Turn size into ptr.\n  thrust::exclusive_scan(ctx->CUDACtx()->CTP(), new_cuts_size.cbegin(), new_cuts_size.cend(),\n                         d_cuts_ptr.data());\n}\n}  // namespace detail\n\nvoid ProcessWeightedBatch(Context const* ctx, const SparsePage& page, MetaInfo const& info,\n                          std::size_t begin, std::size_t end,\n                          SketchContainer* sketch_container,  // <- output sketch\n                          int num_cuts_per_feature, common::Span<float const> sample_weight) {\n  dh::device_vector<Entry> sorted_entries;\n  if (page.data.DeviceCanRead()) {\n    // direct copy if data is already on device\n    auto const& d_data = page.data.ConstDevicePointer();\n    sorted_entries = dh::device_vector<Entry>(d_data + begin, d_data + end);\n  } else {\n    const auto& h_data = page.data.ConstHostVector();\n    sorted_entries = dh::device_vector<Entry>(h_data.begin() + begin, h_data.begin() + end);\n  }\n\n  bst_idx_t base_rowid = page.base_rowid;\n\n  dh::device_vector<float> entry_weight;\n  auto cuctx = ctx->CUDACtx();\n  if (!sample_weight.empty()) {\n    // Expand sample weight into entry weight.\n    CHECK_EQ(sample_weight.size(), info.num_row_);\n    entry_weight.resize(sorted_entries.size());\n    auto d_temp_weight = dh::ToSpan(entry_weight);\n    page.offset.SetDevice(ctx->Device());\n    auto row_ptrs = page.offset.ConstDeviceSpan();\n    thrust::for_each_n(cuctx->CTP(), thrust::make_counting_iterator(0ul), entry_weight.size(),\n                       [=] __device__(std::size_t idx) {\n                         std::size_t element_idx = idx + begin;\n                         std::size_t ridx = dh::SegmentId(row_ptrs, element_idx);\n                         d_temp_weight[idx] = sample_weight[ridx + base_rowid];\n                       });\n    detail::SortByWeight(ctx, &entry_weight, &sorted_entries);\n  } else {\n    thrust::sort(cuctx->TP(), sorted_entries.begin(), sorted_entries.end(),\n                 detail::EntryCompareOp());\n  }\n\n  HostDeviceVector<SketchContainer::OffsetT> cuts_ptr;\n  dh::caching_device_vector<size_t> column_sizes_scan;\n  data::IsValidFunctor dummy_is_valid(std::numeric_limits<float>::quiet_NaN());\n  auto batch_it = dh::MakeTransformIterator<data::COOTuple>(\n      sorted_entries.data().get(), [] __device__(Entry const& e) -> data::COOTuple {\n        return {0, e.index, e.fvalue};  // row_idx is not needed for scaning column size.\n      });\n  detail::GetColumnSizesScan(ctx->CUDACtx(), ctx->Device(), info.num_col_, num_cuts_per_feature,\n                             IterSpan{batch_it, sorted_entries.size()}, dummy_is_valid, &cuts_ptr,\n                             &column_sizes_scan);\n  auto d_cuts_ptr = cuts_ptr.DeviceSpan();\n  if (sketch_container->HasCategorical()) {\n    auto p_weight = entry_weight.empty() ? nullptr : &entry_weight;\n    detail::RemoveDuplicatedCategories(ctx, info, d_cuts_ptr, &sorted_entries, p_weight,\n                                       &column_sizes_scan);\n  }\n\n  auto const& h_cuts_ptr = cuts_ptr.ConstHostVector();\n  CHECK_EQ(d_cuts_ptr.size(), column_sizes_scan.size());\n\n  // Add cuts into sketches\n  sketch_container->Push(ctx, dh::ToSpan(sorted_entries), dh::ToSpan(column_sizes_scan), d_cuts_ptr,\n                         h_cuts_ptr.back(), dh::ToSpan(entry_weight));\n\n  sorted_entries.clear();\n  sorted_entries.shrink_to_fit();\n  CHECK_EQ(sorted_entries.capacity(), 0);\n  CHECK_NE(cuts_ptr.Size(), 0);\n}\n\n// Unify group weight, Hessian, and sample weight into sample weight.\n[[nodiscard]] Span<float const> UnifyWeight(CUDAContext const* cuctx, MetaInfo const& info,\n                                            common::Span<float const> hessian,\n                                            HostDeviceVector<float>* p_out_weight) {\n  if (hessian.empty()) {\n    if (info.IsRanking() && !info.weights_.Empty()) {\n      dh::device_vector<bst_group_t> group_ptr(info.group_ptr_);\n      auto d_group_ptr = dh::ToSpan(group_ptr);\n      CHECK_GE(d_group_ptr.size(), 2) << \"Must have at least 1 group for ranking.\";\n      auto d_weight = info.weights_.ConstDeviceSpan();\n      CHECK_EQ(d_weight.size(), d_group_ptr.size() - 1)\n          << \"Weight size should equal to number of groups.\";\n      p_out_weight->Resize(info.num_row_);\n      auto d_weight_out = p_out_weight->DeviceSpan();\n\n      thrust::for_each_n(cuctx->CTP(), thrust::make_counting_iterator(0ul), d_weight_out.size(),\n                         [=] XGBOOST_DEVICE(std::size_t i) {\n                           auto gidx = dh::SegmentId(d_group_ptr, i);\n                           d_weight_out[i] = d_weight[gidx];\n                         });\n      return p_out_weight->ConstDeviceSpan();\n    } else {\n      return info.weights_.ConstDeviceSpan();\n    }\n  }\n\n  // sketch with hessian as weight\n  p_out_weight->Resize(info.num_row_);\n  auto d_weight_out = p_out_weight->DeviceSpan();\n  if (!info.weights_.Empty()) {\n    // merge sample weight with hessian\n    auto d_weight = info.weights_.ConstDeviceSpan();\n    if (info.IsRanking()) {\n      dh::device_vector<bst_group_t> group_ptr(info.group_ptr_);\n      CHECK_EQ(hessian.size(), d_weight_out.size());\n      auto d_group_ptr = dh::ToSpan(group_ptr);\n      CHECK_GE(d_group_ptr.size(), 2) << \"Must have at least 1 group for ranking.\";\n      CHECK_EQ(d_weight.size(), d_group_ptr.size() - 1)\n          << \"Weight size should equal to number of groups.\";\n      thrust::for_each_n(cuctx->CTP(), thrust::make_counting_iterator(0ul), hessian.size(),\n                         [=] XGBOOST_DEVICE(std::size_t i) {\n                           d_weight_out[i] = d_weight[dh::SegmentId(d_group_ptr, i)] * hessian(i);\n                         });\n    } else {\n      CHECK_EQ(hessian.size(), info.num_row_);\n      CHECK_EQ(hessian.size(), d_weight.size());\n      CHECK_EQ(hessian.size(), d_weight_out.size());\n      thrust::for_each_n(\n          cuctx->CTP(), thrust::make_counting_iterator(0ul), hessian.size(),\n          [=] XGBOOST_DEVICE(std::size_t i) { d_weight_out[i] = d_weight[i] * hessian(i); });\n    }\n  } else {\n    // copy hessian as weight\n    CHECK_EQ(d_weight_out.size(), hessian.size());\n    dh::safe_cuda(cudaMemcpyAsync(d_weight_out.data(), hessian.data(), hessian.size_bytes(),\n                                  cudaMemcpyDefault));\n  }\n  return d_weight_out;\n}\n\nHistogramCuts DeviceSketchWithHessian(Context const* ctx, DMatrix* p_fmat, bst_bin_t max_bin,\n                                      Span<float const> hessian,\n                                      std::size_t sketch_batch_num_elements) {\n  auto const& info = p_fmat->Info();\n  bool has_weight = !info.weights_.Empty();\n  info.feature_types.SetDevice(ctx->Device());\n\n  HostDeviceVector<float> weight;\n  weight.SetDevice(ctx->Device());\n\n  // Configure batch size based on available memory\n  std::size_t num_cuts_per_feature = detail::RequiredSampleCutsPerColumn(max_bin, info.num_row_);\n  sketch_batch_num_elements = detail::SketchBatchNumElements(\n      sketch_batch_num_elements,\n      detail::SketchShape{info.num_row_, info.num_col_, info.num_nonzero_}, ctx->Ordinal(),\n      num_cuts_per_feature, has_weight, 0);\n\n  CUDAContext const* cuctx = ctx->CUDACtx();\n\n  info.weights_.SetDevice(ctx->Device());\n  auto d_weight = UnifyWeight(cuctx, info, hessian, &weight);\n\n  SketchContainer sketch_container(info.feature_types, max_bin, info.num_col_, ctx->Device());\n  CHECK_EQ(has_weight || !hessian.empty(), !d_weight.empty());\n  for (const auto& page : p_fmat->GetBatches<SparsePage>()) {\n    std::size_t page_nnz = page.data.Size();\n    for (auto begin = 0ull; begin < page_nnz; begin += sketch_batch_num_elements) {\n      std::size_t end =\n          std::min(page_nnz, static_cast<std::size_t>(begin + sketch_batch_num_elements));\n      ProcessWeightedBatch(ctx, page, info, begin, end, &sketch_container, num_cuts_per_feature,\n                           d_weight);\n    }\n  }\n\n  return sketch_container.MakeCuts(ctx, p_fmat->Info().IsColumnSplit());\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "src/common/hist_util.cuh",
    "content": "/**\n * Copyright 2020-2025, XGBoost contributors\n *\n * \\brief Front end and utilities for GPU based sketching.  Works on sliding window\n *        instead of stream.\n */\n#ifndef COMMON_HIST_UTIL_CUH_\n#define COMMON_HIST_UTIL_CUH_\n\n#include <thrust/host_vector.h>\n#include <thrust/sort.h>  // for sort\n\n#include <algorithm>  // for max\n#include <cstddef>    // for size_t\n#include <cstdint>    // for uint32_t\n#include <limits>     // for numeric_limits\n\n#include \"../data/entry.h\"   // for IsValidFunctor\n#include \"algorithm.cuh\"     // for CopyIf\n#include \"cuda_context.cuh\"  // for CUDAContext\n#include \"device_helpers.cuh\"\n#include \"hist_util.h\"\n#include \"quantile.cuh\"\n#include \"xgboost/span.h\"  // for IterSpan\n\nnamespace xgboost::common {\nnamespace detail {\nstruct EntryCompareOp {\n  __device__ bool operator()(const Entry& a, const Entry& b) {\n    if (a.index == b.index) {\n      return a.fvalue < b.fvalue;\n    }\n    return a.index < b.index;\n  }\n};\n\n// Get column size from adapter batch and for output cuts.\ntemplate <std::uint32_t kBlockThreads, typename CounterT, typename BatchIt>\n__global__ void GetColumnSizeSharedMemKernel(IterSpan<BatchIt> batch_iter,\n                                             data::IsValidFunctor is_valid,\n                                             Span<std::size_t> out_column_size) {\n  extern __shared__ char smem[];\n\n  auto smem_cs_ptr = reinterpret_cast<CounterT*>(smem);\n\n  dh::BlockFill(smem_cs_ptr, out_column_size.size(), 0);\n\n  __syncthreads();\n\n  auto n = batch_iter.size();\n\n  for (auto idx : dh::GridStrideRange(static_cast<std::size_t>(0), n)) {\n    auto e = batch_iter[idx];\n    if (is_valid(e)) {\n      atomicAdd(&smem_cs_ptr[e.column_idx], static_cast<CounterT>(1));\n    }\n  }\n\n  __syncthreads();\n\n  auto out_global_ptr = out_column_size;\n  for (auto i : dh::BlockStrideRange(static_cast<std::size_t>(0), out_column_size.size())) {\n    atomicAdd(&out_global_ptr[i], static_cast<std::size_t>(smem_cs_ptr[i]));\n  }\n}\n\ntemplate <std::uint32_t kBlockThreads, typename Kernel>\nstd::uint32_t EstimateGridSize(DeviceOrd device, Kernel kernel, std::size_t shared_mem) {\n  int n_mps = 0;\n  dh::safe_cuda(cudaDeviceGetAttribute(&n_mps, cudaDevAttrMultiProcessorCount, device.ordinal));\n  int n_blocks_per_mp = 0;\n  dh::safe_cuda(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&n_blocks_per_mp, kernel,\n                                                              kBlockThreads, shared_mem));\n  std::uint32_t grid_size = n_blocks_per_mp * n_mps;\n  return grid_size;\n}\n\n/**\n * \\brief Get the size of each column. This is a histogram with additional handling of\n *        invalid values.\n *\n * \\tparam BatchIt                 Type of input adapter batch.\n * \\tparam force_use_global_memory Used for testing. Force global atomic add.\n * \\tparam force_use_u64           Used for testing. For u64 as counter in shared memory.\n *\n * \\param device     CUDA device ordinal.\n * \\param batch_iter Iterator for input data from adapter batch.\n * \\param is_valid   Whehter an element is considered as missing.\n * \\param out_column_size Output buffer for the size of each column.\n */\ntemplate <typename BatchIt, bool force_use_global_memory = false, bool force_use_u64 = false>\nvoid LaunchGetColumnSizeKernel(CUDAContext const* cuctx, DeviceOrd device,\n                               IterSpan<BatchIt> batch_iter, data::IsValidFunctor is_valid,\n                               Span<std::size_t> out_column_size) {\n  thrust::fill_n(cuctx->CTP(), dh::tbegin(out_column_size), out_column_size.size(), 0);\n\n  std::size_t max_shared_memory = dh::MaxSharedMemory(device.ordinal);\n  // Not strictly correct as we should use number of samples to determine the type of\n  // counter. However, the sample size is not known due to sliding window on number of\n  // elements.\n  std::size_t n = batch_iter.size();\n\n  std::size_t required_shared_memory = 0;\n  bool use_u32{false};\n  if (!force_use_u64 && n < static_cast<std::size_t>(std::numeric_limits<std::uint32_t>::max())) {\n    required_shared_memory = out_column_size.size() * sizeof(std::uint32_t);\n    use_u32 = true;\n  } else {\n    required_shared_memory = out_column_size.size() * sizeof(std::size_t);\n    use_u32 = false;\n  }\n  bool use_shared = required_shared_memory <= max_shared_memory && required_shared_memory != 0;\n\n  if (!force_use_global_memory && use_shared) {\n    CHECK_NE(required_shared_memory, 0);\n    std::uint32_t constexpr kBlockThreads = 512;\n    if (use_u32) {\n      CHECK(!force_use_u64);\n      auto kernel = GetColumnSizeSharedMemKernel<kBlockThreads, std::uint32_t, BatchIt>;\n      auto grid_size = EstimateGridSize<kBlockThreads>(device, kernel, required_shared_memory);\n      dh::LaunchKernel{grid_size, kBlockThreads, required_shared_memory, cuctx->Stream()}(\n          kernel, batch_iter, is_valid, out_column_size);\n    } else {\n      auto kernel = GetColumnSizeSharedMemKernel<kBlockThreads, std::size_t, BatchIt>;\n      auto grid_size = EstimateGridSize<kBlockThreads>(device, kernel, required_shared_memory);\n      dh::LaunchKernel{grid_size, kBlockThreads, required_shared_memory, cuctx->Stream()}(\n          kernel, batch_iter, is_valid, out_column_size);\n    }\n  } else {\n    auto d_out_column_size = out_column_size;\n    dh::LaunchN(batch_iter.size(), cuctx->Stream(), [=] __device__(size_t idx) {\n      auto e = batch_iter[idx];\n      if (is_valid(e)) {\n        atomicAdd(&d_out_column_size[e.column_idx], static_cast<size_t>(1));\n      }\n    });\n  }\n}\n\ntemplate <typename BatchIt>\nvoid GetColumnSizesScan(CUDAContext const* cuctx, DeviceOrd device, size_t num_columns,\n                        std::size_t num_cuts_per_feature, IterSpan<BatchIt> batch_iter,\n                        data::IsValidFunctor is_valid,\n                        HostDeviceVector<SketchContainer::OffsetT>* cuts_ptr,\n                        dh::caching_device_vector<size_t>* column_sizes_scan) {\n  column_sizes_scan->resize(num_columns + 1);\n  cuts_ptr->SetDevice(device);\n  cuts_ptr->Resize(num_columns + 1, 0);\n\n  auto d_column_sizes_scan = dh::ToSpan(*column_sizes_scan);\n  LaunchGetColumnSizeKernel(cuctx, device, batch_iter, is_valid, d_column_sizes_scan);\n  // Calculate cuts CSC pointer\n  auto cut_ptr_it = dh::MakeTransformIterator<size_t>(\n      column_sizes_scan->begin(), [=] __device__(size_t column_size) {\n        return thrust::min(num_cuts_per_feature, column_size);\n      });\n  thrust::exclusive_scan(cuctx->CTP(), cut_ptr_it,\n                         cut_ptr_it + column_sizes_scan->size(), cuts_ptr->DevicePointer());\n  thrust::exclusive_scan(cuctx->CTP(), column_sizes_scan->begin(), column_sizes_scan->end(),\n                         column_sizes_scan->begin());\n}\n\ninline size_t constexpr BytesPerElement(bool has_weight) {\n  // Double the memory usage for sorting.  We need to assign weight for each element, so\n  // sizeof(float) is added to all elements.\n  return (has_weight ? sizeof(Entry) + sizeof(float) : sizeof(Entry)) * 2;\n}\n\nstruct SketchShape {\n  bst_idx_t n_samples;\n  bst_feature_t n_features;\n  bst_idx_t nnz;\n\n  template <typename F, std::enable_if_t<std::is_integral_v<F>>* = nullptr>\n  SketchShape(bst_idx_t n_samples, F n_features, bst_idx_t nnz)\n      : n_samples{n_samples}, n_features{static_cast<bst_feature_t>(n_features)}, nnz{nnz} {}\n\n  [[nodiscard]] bst_idx_t Size() const { return n_samples * n_features; }\n};\n\n/**\n * @brief Calcuate the length of sliding window. Returns `sketch_batch_num_elements`\n *        directly if it's not 0.\n */\nbst_idx_t SketchBatchNumElements(bst_idx_t sketch_batch_num_elements, SketchShape shape, int device,\n                                 size_t num_cuts, bool has_weight, std::size_t container_bytes);\n\n// Compute number of sample cuts needed on local node to maintain accuracy\n// We take more cuts than needed and then reduce them later\nsize_t RequiredSampleCutsPerColumn(int max_bins, size_t num_rows);\n\n/* \\brief Estimate required memory for each sliding window.\n *\n *   It's not precise as to obtain exact memory usage for sparse dataset we need to walk\n *   through the whole dataset first.  Also if data is from host DMatrix, we copy the\n *   weight, group and offset on first batch, which is not considered in the function.\n *\n * \\param num_rows     Number of rows in this worker.\n * \\param num_columns  Number of columns for this dataset.\n * \\param nnz          Number of non-zero element.  Put in something greater than rows *\n *                     cols if nnz is unknown.\n * \\param num_bins     Number of histogram bins.\n * \\param with_weights Whether weight is used, works the same for ranking and other models.\n *\n * \\return The estimated bytes\n */\nsize_t RequiredMemory(bst_idx_t num_rows, bst_feature_t num_columns, size_t nnz,\n                      size_t num_bins, bool with_weights);\n\n// Count the valid entries in each column and copy them out.\ntemplate <typename AdapterBatch, typename BatchIter>\nvoid MakeEntriesFromAdapter(CUDAContext const* cuctx, AdapterBatch const& batch,\n                            BatchIter batch_iter, Range1d range, float missing, size_t columns,\n                            size_t cuts_per_feature, DeviceOrd device,\n                            HostDeviceVector<SketchContainer::OffsetT>* cut_sizes_scan,\n                            dh::caching_device_vector<size_t>* column_sizes_scan,\n                            dh::device_vector<Entry>* sorted_entries) {\n  auto entry_iter = dh::MakeTransformIterator<Entry>(\n      thrust::make_counting_iterator(0llu), [=] __device__(size_t idx) {\n        return Entry(batch.GetElement(idx).column_idx, batch.GetElement(idx).value);\n      });\n  auto n = range.end() - range.begin();\n  auto span = IterSpan{batch_iter + range.begin(), n};\n  data::IsValidFunctor is_valid(missing);\n  // Work out how many valid entries we have in each column\n  GetColumnSizesScan(cuctx, device, columns, cuts_per_feature, span, is_valid, cut_sizes_scan,\n                     column_sizes_scan);\n  size_t num_valid = column_sizes_scan->back();\n  // Copy current subset of valid elements into temporary storage and sort\n  sorted_entries->resize(num_valid);\n  CopyIf(cuctx, entry_iter + range.begin(), entry_iter + range.end(), sorted_entries->begin(),\n         is_valid);\n}\n\nvoid SortByWeight(Context const* ctx, dh::device_vector<float>* weights,\n                  dh::device_vector<Entry>* sorted_entries);\n\nvoid RemoveDuplicatedCategories(Context const* ctx, MetaInfo const& info,\n                                Span<bst_idx_t> d_cuts_ptr,\n                                dh::device_vector<Entry>* p_sorted_entries,\n                                dh::device_vector<float>* p_sorted_weights,\n                                dh::caching_device_vector<size_t>* p_column_sizes_scan);\n\nconstexpr bst_idx_t UnknownSketchNumElements() { return 0; }\n}  // namespace detail\n\n/**\n * @brief Compute sketch on DMatrix with GPU and Hessian as weight.\n *\n * @param ctx     Runtime context\n * @param p_fmat  Training feature matrix\n * @param max_bin Maximum number of bins for each feature\n * @param hessian Hessian vector.\n * @param sketch_batch_num_elements 0 means autodetect. Only modify this for testing.\n *\n * @return Quantile cuts\n */\nHistogramCuts DeviceSketchWithHessian(Context const* ctx, DMatrix* p_fmat, bst_bin_t max_bin,\n                                      Span<float const> hessian,\n                                      std::size_t sketch_batch_num_elements = detail::UnknownSketchNumElements());\n\n/**\n * @brief Compute sketch on DMatrix with GPU.\n *\n * @param ctx     Runtime context\n * @param p_fmat  Training feature matrix\n * @param max_bin Maximum number of bins for each feature\n * @param sketch_batch_num_elements 0 means autodetect. Only modify this for testing.\n *\n * @return Quantile cuts\n */\ninline HistogramCuts DeviceSketch(\n    Context const* ctx, DMatrix* p_fmat, bst_bin_t max_bin,\n    std::size_t sketch_batch_num_elements = detail::UnknownSketchNumElements()) {\n  return DeviceSketchWithHessian(ctx, p_fmat, max_bin, {}, sketch_batch_num_elements);\n}\n\ntemplate <typename AdapterBatch>\nvoid ProcessSlidingWindow(Context const* ctx, AdapterBatch const& batch, MetaInfo const& info,\n                          size_t n_features, size_t begin, size_t end, float missing,\n                          SketchContainer* sketch_container, int num_cuts) {\n  // Copy current subset of valid elements into temporary storage and sort\n  dh::device_vector<Entry> sorted_entries;\n  dh::caching_device_vector<size_t> column_sizes_scan;\n  auto batch_iter = dh::MakeTransformIterator<data::COOTuple>(\n      thrust::make_counting_iterator(0llu),\n      [=] __device__(size_t idx) { return batch.GetElement(idx); });\n  HostDeviceVector<SketchContainer::OffsetT> cuts_ptr;\n  cuts_ptr.SetDevice(ctx->Device());\n  CUDAContext const* cuctx = ctx->CUDACtx();\n  detail::MakeEntriesFromAdapter(cuctx, batch, batch_iter, {begin, end}, missing, n_features,\n                                 num_cuts, ctx->Device(), &cuts_ptr, &column_sizes_scan,\n                                 &sorted_entries);\n  thrust::sort(cuctx->TP(), sorted_entries.begin(), sorted_entries.end(), detail::EntryCompareOp());\n\n  if (sketch_container->HasCategorical()) {\n    auto d_cuts_ptr = cuts_ptr.DeviceSpan();\n    detail::RemoveDuplicatedCategories(ctx, info, d_cuts_ptr, &sorted_entries, nullptr,\n                                       &column_sizes_scan);\n  }\n\n  auto d_cuts_ptr = cuts_ptr.DeviceSpan();\n  auto const& h_cuts_ptr = cuts_ptr.HostVector();\n  // Extract the cuts from all columns concurrently\n  sketch_container->Push(ctx, dh::ToSpan(sorted_entries), dh::ToSpan(column_sizes_scan), d_cuts_ptr,\n                         h_cuts_ptr.back());\n\n  sorted_entries.clear();\n  sorted_entries.shrink_to_fit();\n}\n\ntemplate <typename Batch>\nvoid ProcessWeightedSlidingWindow(Context const* ctx, Batch batch, MetaInfo const& info,\n                                  int num_cuts_per_feature, bool is_ranking, float missing,\n                                  size_t columns, size_t begin, size_t end,\n                                  SketchContainer* sketch_container) {\n  curt::SetDevice(ctx->Ordinal());\n  info.weights_.SetDevice(ctx->Device());\n  auto weights = info.weights_.ConstDeviceSpan();\n\n  auto batch_iter = dh::MakeTransformIterator<data::COOTuple>(\n      thrust::make_counting_iterator(0llu),\n      [=] __device__(size_t idx) { return batch.GetElement(idx); });\n  auto cuctx = ctx->CUDACtx();\n  dh::device_vector<Entry> sorted_entries;\n  dh::caching_device_vector<size_t> column_sizes_scan;\n  HostDeviceVector<SketchContainer::OffsetT> cuts_ptr;\n  detail::MakeEntriesFromAdapter(cuctx, batch, batch_iter, {begin, end}, missing, columns,\n                                 num_cuts_per_feature, ctx->Device(), &cuts_ptr, &column_sizes_scan,\n                                 &sorted_entries);\n  data::IsValidFunctor is_valid(missing);\n\n  dh::device_vector<float> temp_weights(sorted_entries.size());\n  auto d_temp_weights = dh::ToSpan(temp_weights);\n\n  if (is_ranking) {\n    if (!weights.empty()) {\n      CHECK_EQ(weights.size(), info.group_ptr_.size() - 1);\n    }\n    dh::caching_device_vector<bst_group_t> group_ptr(info.group_ptr_);\n    auto d_group_ptr = dh::ToSpan(group_ptr);\n    auto const weight_iter = dh::MakeTransformIterator<float>(\n        thrust::make_counting_iterator(0lu), [=] __device__(size_t idx) -> float {\n          auto ridx = batch.GetElement(idx).row_idx;\n          bst_group_t group_idx = dh::SegmentId(d_group_ptr, ridx);\n          return weights[group_idx];\n        });\n    auto retit = thrust::copy_if(cuctx->CTP(),\n                                 weight_iter + begin, weight_iter + end,\n                                 batch_iter + begin,\n                                 d_temp_weights.data(),  // output\n                                 is_valid);\n    CHECK_EQ(retit - d_temp_weights.data(), d_temp_weights.size());\n  } else {\n    CHECK_EQ(batch.NumRows(), weights.size());\n    auto const weight_iter = dh::MakeTransformIterator<float>(\n        thrust::make_counting_iterator(0lu),\n        [=]__device__(size_t idx) -> float {\n          return weights[batch.GetElement(idx).row_idx];\n        });\n    auto retit = thrust::copy_if(cuctx->CTP(),\n                                 weight_iter + begin, weight_iter + end,\n                                 batch_iter + begin,\n                                 d_temp_weights.data(),  // output\n                                 is_valid);\n    CHECK_EQ(retit - d_temp_weights.data(), d_temp_weights.size());\n  }\n\n  detail::SortByWeight(ctx, &temp_weights, &sorted_entries);\n\n  if (sketch_container->HasCategorical()) {\n    auto d_cuts_ptr = cuts_ptr.DeviceSpan();\n    detail::RemoveDuplicatedCategories(ctx, info, d_cuts_ptr, &sorted_entries, &temp_weights,\n                                       &column_sizes_scan);\n  }\n\n  auto const& h_cuts_ptr = cuts_ptr.ConstHostVector();\n  auto d_cuts_ptr = cuts_ptr.DeviceSpan();\n\n  // Extract cuts\n  sketch_container->Push(ctx, dh::ToSpan(sorted_entries), dh::ToSpan(column_sizes_scan), d_cuts_ptr,\n                         h_cuts_ptr.back(), dh::ToSpan(temp_weights));\n  sorted_entries.clear();\n  sorted_entries.shrink_to_fit();\n}\n\n/**\n * @brief Perform sketching on GPU.\n *\n * @param batch            A batch from adapter.\n * @param num_bins         Bins per column.\n * @param info             Metainfo used for sketching.\n * @param missing          Floating point value that represents invalid value.\n * @param sketch_container Container for output sketch.\n * @param sketch_batch_num_elements Number of element per-sliding window, use it only for\n *                                  testing.\n */\ntemplate <typename Batch>\nvoid AdapterDeviceSketch(Context const* ctx, Batch batch, bst_bin_t num_bins, MetaInfo const& info,\n                         float missing, SketchContainer* sketch_container,\n                         bst_idx_t sketch_batch_num_elements = detail::UnknownSketchNumElements()) {\n  bst_idx_t num_rows = batch.NumRows();\n  size_t num_cols = batch.NumCols();\n\n  bool weighted = !info.weights_.Empty();\n\n  bst_idx_t const kRemaining = batch.Size();\n  bst_idx_t begin = 0;\n\n  auto shape = detail::SketchShape{num_rows, num_cols, std::numeric_limits<bst_idx_t>::max()};\n\n  while (begin < kRemaining) {\n    // Use total number of samples to estimate the needed cuts first, this doesn't hurt\n    // accuracy as total number of samples is larger.\n    auto num_cuts_per_feature = detail::RequiredSampleCutsPerColumn(num_bins, num_rows);\n    // Estimate the memory usage based on the current available memory.\n    sketch_batch_num_elements = detail::SketchBatchNumElements(\n        sketch_batch_num_elements, shape, ctx->Ordinal(), num_cuts_per_feature, weighted,\n        sketch_container->MemCostBytes());\n    // Re-estimate the needed number of cuts based on the size of the sub-batch.\n    //\n    // The estimation of `sketch_batch_num_elements` assumes dense input, so the\n    // approximation here is reasonably accurate. It doesn't hurt accuracy since the\n    // estimated n_samples must be greater or equal to the actual n_samples thanks to the\n    // dense assumption.\n    auto approx_n_samples = std::max(sketch_batch_num_elements / num_cols, bst_idx_t{1});\n    num_cuts_per_feature = detail::RequiredSampleCutsPerColumn(num_bins, approx_n_samples);\n    bst_idx_t end =\n        std::min(batch.Size(), static_cast<std::size_t>(begin + sketch_batch_num_elements));\n\n    if (weighted) {\n      ProcessWeightedSlidingWindow(ctx, batch, info, num_cuts_per_feature,\n                                   HostSketchContainer::UseGroup(info), missing, num_cols, begin,\n                                   end, sketch_container);\n    } else {\n      ProcessSlidingWindow(ctx, batch, info, num_cols, begin, end, missing, sketch_container,\n                           num_cuts_per_feature);\n    }\n    begin += sketch_batch_num_elements;\n  }\n}\n}  // namespace xgboost::common\n#endif  // COMMON_HIST_UTIL_CUH_\n"
  },
  {
    "path": "src/common/hist_util.h",
    "content": "/**\n * Copyright 2017-2024, XGBoost Contributors\n * \\file hist_util.h\n * \\brief Utility for fast histogram aggregation\n * \\author Philip Cho, Tianqi Chen\n */\n#ifndef XGBOOST_COMMON_HIST_UTIL_H_\n#define XGBOOST_COMMON_HIST_UTIL_H_\n\n#include <algorithm>\n#include <cmath>\n#include <cstdint>  // for uint32_t\n#include <limits>\n#include <map>\n#include <utility>\n#include <vector>\n\n#include \"categorical.h\"\n#include \"quantile.h\"\n#include \"threading_utils.h\"\n#include \"xgboost/base.h\"  // for bst_feature_t, bst_bin_t\n#include \"xgboost/data.h\"\n\nnamespace xgboost {\nclass GHistIndexMatrix;\n\nnamespace common {\nclass AlignedFileWriteStream;\nclass AlignedResourceReadStream;\n\n/*!\n * \\brief A single row in global histogram index.\n *  Directly represent the global index in the histogram entry.\n */\nusing GHistIndexRow = Span<uint32_t const>;\n\n// A CSC matrix representing histogram cuts.\n// The cut values represent upper bounds of bins containing approximately equal numbers of elements\nclass HistogramCuts {\n  bool has_categorical_{false};\n  float max_cat_{-1.0f};\n\n protected:\n  void Swap(HistogramCuts&& that) noexcept(true) {\n    std::swap(cut_values_, that.cut_values_);\n    std::swap(cut_ptrs_, that.cut_ptrs_);\n\n    std::swap(has_categorical_, that.has_categorical_);\n    std::swap(max_cat_, that.max_cat_);\n  }\n\n  void Copy(HistogramCuts const& that) {\n    cut_values_.Resize(that.cut_values_.Size());\n    cut_ptrs_.Resize(that.cut_ptrs_.Size());\n    cut_values_.Copy(that.cut_values_);\n    cut_ptrs_.Copy(that.cut_ptrs_);\n    has_categorical_ = that.has_categorical_;\n    max_cat_ = that.max_cat_;\n  }\n\n public:\n  HostDeviceVector<float> cut_values_;   // NOLINT\n  HostDeviceVector<uint32_t> cut_ptrs_;  // NOLINT\n\n  HistogramCuts() = delete;\n  explicit HistogramCuts(bst_feature_t n_features);\n  HistogramCuts(HistogramCuts const& that) { this->Copy(that); }\n\n  HistogramCuts(HistogramCuts&& that) noexcept(true) {\n    this->Swap(std::forward<HistogramCuts>(that));\n  }\n\n  HistogramCuts& operator=(HistogramCuts const& that) {\n    this->Copy(that);\n    return *this;\n  }\n\n  HistogramCuts& operator=(HistogramCuts&& that) noexcept(true) {\n    this->Swap(std::forward<HistogramCuts>(that));\n    return *this;\n  }\n\n  [[nodiscard]] bst_bin_t FeatureBins(bst_feature_t feature) const {\n    return cut_ptrs_.ConstHostVector().at(feature + 1) - cut_ptrs_.ConstHostVector()[feature];\n  }\n  [[nodiscard]] bst_feature_t NumFeatures() const { return this->cut_ptrs_.Size() - 1; }\n\n  std::vector<uint32_t> const& Ptrs() const { return cut_ptrs_.ConstHostVector(); }\n  std::vector<float> const& Values() const { return cut_values_.ConstHostVector(); }\n\n  [[nodiscard]] bool HasCategorical() const { return has_categorical_; }\n  [[nodiscard]] float MaxCategory() const { return max_cat_; }\n  /**\n   * \\brief Set meta info about categorical features.\n   *\n   * \\param has_cat Do we have categorical feature in the data?\n   * \\param max_cat The maximum categorical value in all features.\n   */\n  void SetCategorical(bool has_cat, float max_cat) {\n    has_categorical_ = has_cat;\n    max_cat_ = max_cat;\n  }\n  /**\n   * @brief The total number of histogram bins (excluding min values.)\n   */\n  [[nodiscard]] bst_bin_t TotalBins() const { return this->cut_values_.Size(); }\n\n  // Return the index of a cut point that is strictly greater than the input\n  // value, or the last available index if none exists\n  [[nodiscard]] bst_bin_t SearchBin(float value, bst_feature_t column_id,\n                                    std::vector<uint32_t> const& ptrs,\n                                    std::vector<float> const& values) const {\n    auto end = ptrs[column_id + 1];\n    auto beg = ptrs[column_id];\n    auto it = std::upper_bound(values.cbegin() + beg, values.cbegin() + end, value);\n    auto idx = static_cast<bst_bin_t>(it - values.cbegin());\n    idx -= !!(idx == static_cast<bst_bin_t>(end));\n    return idx;\n  }\n\n  [[nodiscard]] bst_bin_t SearchBin(float value, bst_feature_t column_id) const {\n    return this->SearchBin(value, column_id, Ptrs(), Values());\n  }\n  /**\n   * \\brief Search the bin index for numerical feature.\n   */\n  [[nodiscard]] bst_bin_t SearchBin(Entry const& e) const { return SearchBin(e.fvalue, e.index); }\n\n  /**\n   * \\brief Search the bin index for categorical feature.\n   */\n  [[nodiscard]] bst_bin_t SearchCatBin(float value, bst_feature_t fidx,\n                                       std::vector<uint32_t> const& ptrs,\n                                       std::vector<float> const& vals) const {\n    auto end = ptrs.at(fidx + 1) + vals.cbegin();\n    auto beg = ptrs[fidx] + vals.cbegin();\n    // Truncates the value in case it's not perfectly rounded.\n    auto v = static_cast<float>(common::AsCat(value));\n    auto bin_idx = static_cast<bst_bin_t>(std::lower_bound(beg, end, v) - vals.cbegin());\n    if (bin_idx == static_cast<bst_bin_t>(ptrs.at(fidx + 1))) {\n      bin_idx -= 1;\n    }\n    return bin_idx;\n  }\n  [[nodiscard]] bst_bin_t SearchCatBin(float value, bst_feature_t fidx) const {\n    auto const& ptrs = this->Ptrs();\n    auto const& vals = this->Values();\n    return this->SearchCatBin(value, fidx, ptrs, vals);\n  }\n  [[nodiscard]] bst_bin_t SearchCatBin(Entry const& e) const {\n    return SearchCatBin(e.fvalue, e.index);\n  }\n\n  /**\n   * \\brief Return a representative numerical value for a bin.\n   */\n  static float NumericBinValue(std::vector<std::uint32_t> const& ptrs,\n                               std::vector<float> const& vals, bst_feature_t fidx,\n                               bst_bin_t bin_idx) {\n    auto lower = static_cast<bst_bin_t>(ptrs[fidx]);\n    if (bin_idx == lower) {\n      return std::nextafter(vals[lower], -std::numeric_limits<float>::infinity());\n    }\n    return vals[bin_idx - 1];\n  }\n\n  /**\n   * \\brief Return the lower bound of a numerical bin.\n   */\n  static float NumericBinLowerBound(std::vector<std::uint32_t> const& ptrs,\n                                    std::vector<float> const& vals, bst_feature_t fidx,\n                                    bst_bin_t bin_idx) {\n    auto lower = static_cast<bst_bin_t>(ptrs[fidx]);\n    if (bin_idx == lower) {\n      return -std::numeric_limits<float>::infinity();\n    }\n    return vals[bin_idx - 1];\n  }\n\n  void SetDevice(DeviceOrd d) const {\n    this->cut_ptrs_.SetDevice(d);\n    this->cut_ptrs_.ConstDevicePointer();\n\n    this->cut_values_.SetDevice(d);\n    this->cut_values_.ConstDevicePointer();\n  }\n\n  void Save(common::AlignedFileWriteStream* fo) const;\n  [[nodiscard]] static HistogramCuts* Load(common::AlignedResourceReadStream* fi);\n};\n\n/**\n * \\brief Run CPU sketching on DMatrix.\n *\n * \\param use_sorted Whether should we use SortedCSC for sketching, it's more efficient\n *                   but consumes more memory.\n */\nHistogramCuts SketchOnDMatrix(Context const* ctx, DMatrix* m, bst_bin_t max_bins,\n                              bool use_sorted = false, Span<float const> hessian = {});\n\nenum BinTypeSize : uint8_t {\n  kUint8BinsTypeSize = 1,\n  kUint16BinsTypeSize = 2,\n  kUint32BinsTypeSize = 4\n};\n\n/**\n * \\brief Dispatch for bin type, fn is a function that accepts a scalar of the bin type.\n */\ntemplate <typename Fn>\nauto DispatchBinType(BinTypeSize type, Fn&& fn) {\n  switch (type) {\n    case kUint8BinsTypeSize: {\n      return fn(uint8_t{});\n    }\n    case kUint16BinsTypeSize: {\n      return fn(uint16_t{});\n    }\n    case kUint32BinsTypeSize: {\n      return fn(uint32_t{});\n    }\n  }\n  LOG(FATAL) << \"Unreachable\";\n  return fn(uint32_t{});\n}\n\n/**\n * @brief Optionally compressed gradient index. The compression works only with dense\n *        data.\n *\n *   The main body of construction code is in gradient_index.cc, this struct is only a\n *   view class.\n */\nclass Index {\n private:\n  void SetBinTypeSize(BinTypeSize binTypeSize) {\n    binTypeSize_ = binTypeSize;\n    switch (binTypeSize) {\n      case kUint8BinsTypeSize:\n        func_ = &GetValueFromUint8;\n        break;\n      case kUint16BinsTypeSize:\n        func_ = &GetValueFromUint16;\n        break;\n      case kUint32BinsTypeSize:\n        func_ = &GetValueFromUint32;\n        break;\n      default:\n        CHECK(binTypeSize == kUint8BinsTypeSize || binTypeSize == kUint16BinsTypeSize ||\n              binTypeSize == kUint32BinsTypeSize);\n    }\n  }\n\n public:\n  // Inside the compressor, bin_idx is the index for cut value across all features. By\n  // subtracting it with starting pointer of each feature, we can reduce it to smaller\n  // value and store it with smaller types. Usable only with dense data.\n  //\n  // For sparse input we have to store an addition feature index (similar to sparse matrix\n  // formats like CSR) for each bin in index field to choose the right offset.\n  template <typename T>\n  struct CompressBin {\n    uint32_t const* offsets;\n\n    template <typename Bin, typename Feat>\n    auto operator()(Bin bin_idx, Feat fidx) const {\n      return static_cast<T>(bin_idx - offsets[fidx]);\n    }\n  };\n\n  template <typename T>\n  CompressBin<T> MakeCompressor() const {\n    uint32_t const* offsets = this->Offset();\n    return CompressBin<T>{offsets};\n  }\n\n  Index() { SetBinTypeSize(binTypeSize_); }\n\n  Index(Index const& i) = delete;\n  Index& operator=(Index const& i) = delete;\n  Index(Index&& i) = delete;\n\n  /** @brief Move assignment for lazy initialization. */\n  Index& operator=(Index&& i) = default;\n\n  /**\n   * @brief Construct the index from data.\n   *\n   * @param data     Storage for compressed histogram bin.\n   * @param bin_size Number of bytes for each bin.\n   */\n  Index(Span<std::uint8_t> data, BinTypeSize bin_size) : data_{data} {\n    this->SetBinTypeSize(bin_size);\n  }\n\n  uint32_t operator[](size_t i) const {\n    if (!bin_offset_.empty()) {\n      // dense, compressed\n      auto fidx = i % bin_offset_.size();\n      // restore the index by adding back its feature offset.\n      return func_(data_.data(), i) + bin_offset_[fidx];\n    } else {\n      return func_(data_.data(), i);\n    }\n  }\n  [[nodiscard]] BinTypeSize GetBinTypeSize() const { return binTypeSize_; }\n  template <typename T>\n  T const* data() const {  // NOLINT\n    return reinterpret_cast<T const*>(data_.data());\n  }\n  template <typename T>\n  T* data() {  // NOLINT\n    return reinterpret_cast<T*>(data_.data());\n  }\n  [[nodiscard]] std::uint32_t const* Offset() const { return bin_offset_.data(); }\n  [[nodiscard]] std::size_t OffsetSize() const { return bin_offset_.size(); }\n  [[nodiscard]] std::size_t Size() const { return data_.size() / (binTypeSize_); }\n\n  // set the offset used in compression, cut_ptrs is the CSC indptr in HistogramCuts\n  void SetBinOffset(std::vector<uint32_t> const& cut_ptrs) {\n    bin_offset_.resize(cut_ptrs.size() - 1);  // resize to number of features.\n    std::copy_n(cut_ptrs.begin(), bin_offset_.size(), bin_offset_.begin());\n  }\n  auto begin() const {  // NOLINT\n    return data_.data();\n  }\n  auto end() const {  // NOLINT\n    return data_.data() + data_.size();\n  }\n\n  auto begin() {  // NOLINT\n    return data_.data();\n  }\n  auto end() {  // NOLINT\n    return data_.data() + data_.size();\n  }\n\n private:\n  // Functions to decompress the index.\n  static uint32_t GetValueFromUint8(uint8_t const* t, size_t i) { return t[i]; }\n  static uint32_t GetValueFromUint16(uint8_t const* t, size_t i) {\n    return reinterpret_cast<uint16_t const*>(t)[i];\n  }\n  static uint32_t GetValueFromUint32(uint8_t const* t, size_t i) {\n    return reinterpret_cast<uint32_t const*>(t)[i];\n  }\n\n  using Func = uint32_t (*)(uint8_t const*, size_t);\n\n  Span<std::uint8_t> data_;\n  // starting position of each feature inside the cut values (the indptr of the CSC cut matrix\n  // HistogramCuts without the last entry.) Used for bin compression.\n  std::vector<uint32_t> bin_offset_;\n\n  BinTypeSize binTypeSize_{kUint8BinsTypeSize};\n  Func func_;\n};\n\ntemplate <typename GradientIndex>\nbst_bin_t XGBOOST_HOST_DEV_INLINE BinarySearchBin(std::size_t begin, std::size_t end,\n                                                  GradientIndex const& data,\n                                                  bst_feature_t const fidx_begin,\n                                                  bst_feature_t const fidx_end) {\n  size_t previous_middle = std::numeric_limits<size_t>::max();\n  while (end != begin) {\n    size_t middle = begin + (end - begin) / 2;\n    if (middle == previous_middle) {\n      break;\n    }\n    previous_middle = middle;\n\n    // index into all the bins\n    auto gidx = data[middle];\n\n    if (gidx >= fidx_begin && gidx < fidx_end) {\n      // Found the intersection.\n      return static_cast<int32_t>(gidx);\n    } else if (gidx < fidx_begin) {\n      begin = middle;\n    } else {\n      end = middle;\n    }\n  }\n  // Value is missing\n  return -1;\n}\n\nusing GHistRow = Span<xgboost::GradientPairPrecise>;\nusing ConstGHistRow = Span<xgboost::GradientPairPrecise const>;\n\n/*!\n * \\brief Increment hist as dst += add in range [begin, end)\n */\nvoid IncrementHist(GHistRow dst, ConstGHistRow add, std::size_t begin, std::size_t end);\n\n/*!\n * \\brief Copy hist from src to dst in range [begin, end)\n */\nvoid CopyHist(GHistRow dst, const GHistRow src, size_t begin, size_t end);\n\n/*!\n * \\brief Compute Subtraction: dst = src1 - src2 in range [begin, end)\n */\nvoid SubtractionHist(GHistRow dst, const GHistRow src1, const GHistRow src2, size_t begin,\n                     size_t end);\n\n/*!\n * \\brief histogram of gradient statistics for multiple nodes\n */\nclass HistCollection {\n public:\n  // access histogram for i-th node\n  GHistRow operator[](bst_uint nid) const {\n    constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max();\n    const size_t id = row_ptr_.at(nid);\n    CHECK_NE(id, kMax);\n    GradientPairPrecise* ptr = const_cast<GradientPairPrecise*>(data_[id].data());\n    return {ptr, nbins_};\n  }\n\n  // have we computed a histogram for i-th node?\n  [[nodiscard]] bool RowExists(bst_uint nid) const {\n    const uint32_t k_max = std::numeric_limits<uint32_t>::max();\n    return (nid < row_ptr_.size() && row_ptr_[nid] != k_max);\n  }\n  /**\n   * \\brief Initialize histogram collection.\n   *\n   * \\param n_total_bins Number of bins across all features.\n   */\n  void Init(std::uint32_t n_total_bins) {\n    if (nbins_ != n_total_bins) {\n      nbins_ = n_total_bins;\n      // quite expensive operation, so let's do this only once\n      data_.clear();\n    }\n    row_ptr_.clear();\n    n_nodes_added_ = 0;\n  }\n\n  // create an empty histogram for i-th node\n  void AddHistRow(bst_uint nid) {\n    constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max();\n    if (nid >= row_ptr_.size()) {\n      row_ptr_.resize(nid + 1, kMax);\n    }\n    CHECK_EQ(row_ptr_[nid], kMax);\n\n    if (data_.size() < (nid + 1)) {\n      data_.resize((nid + 1));\n    }\n\n    row_ptr_[nid] = n_nodes_added_;\n    n_nodes_added_++;\n  }\n  // allocate thread local memory i-th node\n  void AllocateData(bst_uint nid) {\n    if (data_[row_ptr_[nid]].size() == 0) {\n      data_[row_ptr_[nid]].resize(nbins_, {0, 0});\n    }\n  }\n\n private:\n  /*! \\brief number of all bins over all features */\n  uint32_t nbins_ = 0;\n  /*! \\brief amount of active nodes in hist collection */\n  uint32_t n_nodes_added_ = 0;\n  std::vector<std::vector<GradientPairPrecise>> data_;\n\n  /*! \\brief row_ptr_[nid] locates bin for histogram of node nid */\n  std::vector<size_t> row_ptr_;\n};\n\n/*!\n * \\brief Stores temporary histograms to compute them in parallel\n * Supports processing multiple tree-nodes for nested parallelism\n * Able to reduce histograms across threads in efficient way\n */\nclass ParallelGHistBuilder {\n public:\n  void Init(size_t nbins) {\n    if (nbins != nbins_) {\n      hist_buffer_.Init(nbins);\n      nbins_ = nbins;\n    }\n  }\n\n  // Add new elements if needed, mark all hists as unused\n  // targeted_hists - already allocated hists which should contain final results after Reduce() call\n  void Reset(size_t nthreads, size_t nodes, const BlockedSpace2d& space,\n             const std::vector<GHistRow>& targeted_hists) {\n    hist_buffer_.Init(nbins_);\n    tid_nid_to_hist_.clear();\n    threads_to_nids_map_.clear();\n\n    targeted_hists_ = targeted_hists;\n\n    CHECK_EQ(nodes, targeted_hists.size());\n\n    nodes_ = nodes;\n    nthreads_ = nthreads;\n\n    MatchThreadsToNodes(space);\n    AllocateAdditionalHistograms();\n    MatchNodeNidPairToHist();\n\n    hist_was_used_.resize(nthreads * nodes_);\n    std::fill(hist_was_used_.begin(), hist_was_used_.end(), static_cast<int>(false));\n  }\n\n  // Get specified hist, initialize hist by zeros if it wasn't used before\n  GHistRow GetInitializedHist(size_t tid, size_t nid) {\n    CHECK_LT(nid, nodes_);\n    CHECK_LT(tid, nthreads_);\n\n    int idx = tid_nid_to_hist_.at({tid, nid});\n    if (idx >= 0) {\n      hist_buffer_.AllocateData(idx);\n    }\n    GHistRow hist = idx == -1 ? targeted_hists_[nid] : hist_buffer_[idx];\n\n    if (!hist_was_used_[tid * nodes_ + nid]) {\n      std::fill_n(hist.data(), hist.size(), GradientPairPrecise{});\n      hist_was_used_[tid * nodes_ + nid] = static_cast<int>(true);\n    }\n\n    return hist;\n  }\n\n  // Reduce following bins (begin, end] for nid-node in dst across threads\n  void ReduceHist(size_t nid, size_t begin, size_t end) const {\n    CHECK_GT(end, begin);\n    CHECK_LT(nid, nodes_);\n\n    GHistRow dst = targeted_hists_[nid];\n\n    bool is_updated = false;\n    for (size_t tid = 0; tid < nthreads_; ++tid) {\n      if (hist_was_used_[tid * nodes_ + nid]) {\n        is_updated = true;\n\n        int idx = tid_nid_to_hist_.at({tid, nid});\n        GHistRow src = idx == -1 ? targeted_hists_[nid] : hist_buffer_[idx];\n\n        if (dst.data() != src.data()) {\n          IncrementHist(dst, src, begin, end);\n        }\n      }\n    }\n    if (!is_updated) {\n      // In distributed mode - some tree nodes can be empty on local machines,\n      // So we need just set local hist by zeros in this case\n      std::fill(dst.data() + begin, dst.data() + end, GradientPairPrecise{});\n    }\n  }\n\n  void MatchThreadsToNodes(const BlockedSpace2d& space) {\n    const size_t space_size = space.Size();\n    const size_t chunck_size = space_size / nthreads_ + !!(space_size % nthreads_);\n\n    threads_to_nids_map_.resize(nthreads_ * nodes_, false);\n\n    for (size_t tid = 0; tid < nthreads_; ++tid) {\n      size_t begin = chunck_size * tid;\n      size_t end = std::min(begin + chunck_size, space_size);\n\n      if (begin < space_size) {\n        size_t nid_begin = space.GetFirstDimension(begin);\n        size_t nid_end = space.GetFirstDimension(end - 1);\n\n        for (size_t nid = nid_begin; nid <= nid_end; ++nid) {\n          // true - means thread 'tid' will work to compute partial hist for node 'nid'\n          threads_to_nids_map_[tid * nodes_ + nid] = true;\n        }\n      }\n    }\n  }\n\n  void AllocateAdditionalHistograms() {\n    size_t hist_allocated_additionally = 0;\n\n    for (size_t nid = 0; nid < nodes_; ++nid) {\n      int nthreads_for_nid = 0;\n\n      for (size_t tid = 0; tid < nthreads_; ++tid) {\n        if (threads_to_nids_map_[tid * nodes_ + nid]) {\n          nthreads_for_nid++;\n        }\n      }\n\n      // In distributed mode - some tree nodes can be empty on local machines,\n      // set nthreads_for_nid to 0 in this case.\n      // In another case - allocate additional (nthreads_for_nid - 1) histograms,\n      // because one is already allocated externally (will store final result for the node).\n      hist_allocated_additionally += std::max<int>(0, nthreads_for_nid - 1);\n    }\n\n    for (size_t i = 0; i < hist_allocated_additionally; ++i) {\n      hist_buffer_.AddHistRow(i);\n    }\n  }\n\n  [[nodiscard]] bst_bin_t TotalBins() const { return nbins_; }\n\n private:\n  void MatchNodeNidPairToHist() {\n    size_t hist_allocated_additionally = 0;\n\n    for (size_t nid = 0; nid < nodes_; ++nid) {\n      bool first_hist = true;\n      for (size_t tid = 0; tid < nthreads_; ++tid) {\n        if (threads_to_nids_map_[tid * nodes_ + nid]) {\n          if (first_hist) {\n            tid_nid_to_hist_[{tid, nid}] = -1;\n            first_hist = false;\n          } else {\n            tid_nid_to_hist_[{tid, nid}] = hist_allocated_additionally++;\n          }\n        }\n      }\n    }\n  }\n\n  /*! \\brief number of bins in each histogram */\n  size_t nbins_ = 0;\n  /*! \\brief number of threads for parallel computation */\n  size_t nthreads_ = 0;\n  /*! \\brief number of nodes which will be processed in parallel  */\n  size_t nodes_ = 0;\n  /*! \\brief Buffer for additional histograms for Parallel processing  */\n  HistCollection hist_buffer_;\n  /*!\n   * \\brief Marks which hists were used, it means that they should be merged.\n   * Contains only {true or false} values\n   * but 'int' is used instead of 'bool', because std::vector<bool> isn't thread safe\n   */\n  std::vector<int> hist_was_used_;\n\n  /*! \\brief Buffer for additional histograms for Parallel processing  */\n  std::vector<bool> threads_to_nids_map_;\n  /*! \\brief Contains histograms for final results  */\n  std::vector<GHistRow> targeted_hists_;\n  /*!\n   * \\brief map pair {tid, nid} to index of allocated histogram from hist_buffer_ and targeted_hists_,\n   * -1 is reserved for targeted_hists_\n   */\n  std::map<std::pair<size_t, size_t>, int> tid_nid_to_hist_;\n};\n\n// construct a histogram via histogram aggregation\ntemplate <bool any_missing>\nvoid BuildHist(Span<GradientPair const> gpair, Span<bst_idx_t const> row_indices,\n               const GHistIndexMatrix& gmat, GHistRow hist, bool read_by_column);\n}  // namespace common\n}  // namespace xgboost\n#endif  // XGBOOST_COMMON_HIST_UTIL_H_\n"
  },
  {
    "path": "src/common/host_device_vector.cc",
    "content": "/**\n * Copyright 2017-2024 by XGBoost contributors\n */\n#ifndef XGBOOST_USE_CUDA\n#ifndef XGBOOST_USE_SYCL\n\n// dummy implementation of HostDeviceVector in case CUDA is not used\n\n#include <xgboost/base.h>\n#include <xgboost/data.h>\n#include <cstdint>\n#include <memory>\n#include <utility>\n#include \"xgboost/tree_model.h\"\n#include \"xgboost/host_device_vector.h\"\n\nnamespace xgboost {\n\ntemplate <typename T>\nstruct HostDeviceVectorImpl {\n  explicit HostDeviceVectorImpl(size_t size, T v) : data_h_(size, v) {}\n  HostDeviceVectorImpl(std::initializer_list<T> init) : data_h_(init) {}\n  explicit HostDeviceVectorImpl(std::vector<T>  init) : data_h_(std::move(init)) {}\n  HostDeviceVectorImpl(HostDeviceVectorImpl&& that) : data_h_(std::move(that.data_h_)) {}\n\n  void Swap(HostDeviceVectorImpl &other) {\n     data_h_.swap(other.data_h_);\n  }\n\n  std::vector<T>& Vec() { return data_h_; }\n\n private:\n  std::vector<T> data_h_;\n};\n\ntemplate <typename T>\nHostDeviceVector<T>::HostDeviceVector(size_t size, T v, DeviceOrd)\n  : impl_(nullptr) {\n  impl_ = new HostDeviceVectorImpl<T>(size, v);\n}\n\ntemplate <typename T>\nHostDeviceVector<T>::HostDeviceVector(std::initializer_list<T> init, DeviceOrd)\n  : impl_(nullptr) {\n  impl_ = new HostDeviceVectorImpl<T>(init);\n}\n\ntemplate <typename T>\nHostDeviceVector<T>::HostDeviceVector(const std::vector<T>& init, DeviceOrd)\n  : impl_(nullptr) {\n  impl_ = new HostDeviceVectorImpl<T>(init);\n}\n\ntemplate <typename T>\nHostDeviceVector<T>::HostDeviceVector(HostDeviceVector<T>&& that) {\n  impl_ = new HostDeviceVectorImpl<T>(std::move(*that.impl_));\n}\n\ntemplate <typename T>\nHostDeviceVector<T>& HostDeviceVector<T>::operator=(HostDeviceVector<T>&& that) {\n  if (this == &that) { return *this; }\n\n  std::unique_ptr<HostDeviceVectorImpl<T>> new_impl(\n      new HostDeviceVectorImpl<T>(std::move(*that.impl_)));\n  delete impl_;\n  impl_ = new_impl.release();\n  return *this;\n}\n\ntemplate <typename T>\nHostDeviceVector<T>::~HostDeviceVector() {\n  delete impl_;\n  impl_ = nullptr;\n}\n\ntemplate <typename T>\nGPUAccess HostDeviceVector<T>::DeviceAccess() const {\n  return kNone;\n}\n\ntemplate <typename T>\nsize_t HostDeviceVector<T>::Size() const { return impl_->Vec().size(); }\n\ntemplate <typename T>\nDeviceOrd HostDeviceVector<T>::Device() const { return DeviceOrd::CPU(); }\n\ntemplate <typename T>\nT* HostDeviceVector<T>::DevicePointer() { return nullptr; }\n\ntemplate <typename T>\nconst T* HostDeviceVector<T>::ConstDevicePointer() const {\n  return nullptr;\n}\n\ntemplate <typename T>\ncommon::Span<T> HostDeviceVector<T>::DeviceSpan() {\n  return common::Span<T>();\n}\n\ntemplate <typename T>\ncommon::Span<const T> HostDeviceVector<T>::ConstDeviceSpan() const {\n  return common::Span<const T>();\n}\n\ntemplate <typename T>\nstd::vector<T>& HostDeviceVector<T>::HostVector() { return impl_->Vec(); }\n\ntemplate <typename T>\nconst std::vector<T>& HostDeviceVector<T>::ConstHostVector() const {\n  return impl_->Vec();\n}\n\ntemplate <typename T>\nvoid HostDeviceVector<T>::Resize(size_t new_size, T v) {\n  impl_->Vec().resize(new_size, v);\n}\n\ntemplate <typename T>\nvoid HostDeviceVector<T>::Resize(size_t new_size) {\n  impl_->Vec().resize(new_size, T{});\n}\n\ntemplate <typename T>\nvoid HostDeviceVector<T>::Fill(T v) {\n  std::fill(HostVector().begin(), HostVector().end(), v);\n}\n\ntemplate <typename T>\nvoid HostDeviceVector<T>::Copy(const HostDeviceVector<T>& other) {\n  CHECK_EQ(Size(), other.Size());\n  std::copy(other.HostVector().begin(), other.HostVector().end(), HostVector().begin());\n}\n\ntemplate <typename T>\nvoid HostDeviceVector<T>::Copy(const std::vector<T>& other) {\n  CHECK_EQ(Size(), other.size());\n  std::copy(other.begin(), other.end(), HostVector().begin());\n}\n\ntemplate <typename T>\nvoid HostDeviceVector<T>::Copy(std::initializer_list<T> other) {\n  CHECK_EQ(Size(), other.size());\n  std::copy(other.begin(), other.end(), HostVector().begin());\n}\n\ntemplate <typename T>\nvoid HostDeviceVector<T>::Extend(HostDeviceVector const& other) {\n  auto ori_size = this->Size();\n  this->HostVector().resize(ori_size + other.Size());\n  std::copy(other.ConstHostVector().cbegin(), other.ConstHostVector().cend(),\n            this->HostVector().begin() + ori_size);\n}\n\ntemplate <typename T>\nbool HostDeviceVector<T>::HostCanRead() const {\n  return true;\n}\n\ntemplate <typename T>\nbool HostDeviceVector<T>::HostCanWrite() const {\n  return true;\n}\n\ntemplate <typename T>\nbool HostDeviceVector<T>::DeviceCanRead() const {\n  return false;\n}\n\ntemplate <typename T>\nbool HostDeviceVector<T>::DeviceCanWrite() const {\n  return false;\n}\n\ntemplate <typename T>\nvoid HostDeviceVector<T>::SetDevice(DeviceOrd) const {}\n\n// explicit instantiations are required, as HostDeviceVector isn't header-only\ntemplate class HostDeviceVector<bst_float>;\ntemplate class HostDeviceVector<double>;\ntemplate class HostDeviceVector<GradientPair>;\ntemplate class HostDeviceVector<GradientPairPrecise>;\ntemplate class HostDeviceVector<std::int32_t>;   // bst_node_t\ntemplate class HostDeviceVector<std::uint8_t>;\ntemplate class HostDeviceVector<std::int8_t>;\ntemplate class HostDeviceVector<FeatureType>;\ntemplate class HostDeviceVector<Entry>;\ntemplate class HostDeviceVector<bst_idx_t>;\ntemplate class HostDeviceVector<std::uint32_t>;  // bst_feature_t\ntemplate class HostDeviceVector<RegTree::Node>;\ntemplate class HostDeviceVector<RegTree::CategoricalSplitMatrix::Segment>;\ntemplate class HostDeviceVector<RTreeNodeStat>;\n\n#if defined(__APPLE__) || defined(__EMSCRIPTEN__)\n/*\n * On OSX:\n *\n * typedef unsigned int         uint32_t;\n * typedef unsigned long long   uint64_t;\n * typedef unsigned long       __darwin_size_t;\n *\n * On Emscripten:\n * typedef unsigned long        size_t;\n */\ntemplate class HostDeviceVector<std::size_t>;\n#endif  // defined(__APPLE__)\n\n}  // namespace xgboost\n\n#endif  // XGBOOST_USE_SYCL\n#endif  // XGBOOST_USE_CUDA\n"
  },
  {
    "path": "src/common/host_device_vector.cu",
    "content": "/**\n * Copyright 2017-2025, XGBoost contributors\n */\n#include <thrust/fill.h>\n\n#include <algorithm>\n#include <cstddef>  // for size_t\n#include <cstdint>\n\n#include \"cuda_stream.h\"  // for DefaultStream\n#include \"device_helpers.cuh\"\n#include \"device_vector.cuh\"  // for DeviceUVector\n#include \"xgboost/data.h\"\n#include \"xgboost/host_device_vector.h\"\n#include \"xgboost/tree_model.h\"  // for RegTree\n\nnamespace xgboost {\n\n// the handler to call instead of cudaSetDevice; only used for testing\nstatic void (*cudaSetDeviceHandler)(int) = nullptr;  // NOLINT\n\nvoid SetCudaSetDeviceHandler(void (*handler)(int)) {\n  cudaSetDeviceHandler = handler;\n}\n\ntemplate <typename T>\nclass HostDeviceVectorImpl {\n public:\n  HostDeviceVectorImpl(size_t size, T v, DeviceOrd device) : device_(device) {\n    if (device.IsCUDA()) {\n      gpu_access_ = GPUAccess::kWrite;\n      SetDevice();\n      data_d_->resize(size, v);\n    } else {\n      data_h_.resize(size, v);\n    }\n  }\n\n  // Initializer can be std::vector<T> or std::initializer_list<T>\n  template <class Initializer>\n  HostDeviceVectorImpl(const Initializer& init, DeviceOrd device) : device_(device) {\n    if (device.IsCUDA()) {\n      gpu_access_ = GPUAccess::kWrite;\n      LazyResizeDevice(init.size());\n      Copy(init);\n    } else {\n      data_h_ = init;\n    }\n  }\n\n  HostDeviceVectorImpl(HostDeviceVectorImpl<T>&& that) :\n    device_{that.device_},\n    data_h_{std::move(that.data_h_)},\n    data_d_{std::move(that.data_d_)},\n    gpu_access_{that.gpu_access_} {}\n\n  ~HostDeviceVectorImpl() {\n    if (device_.IsCUDA()) {\n      SetDevice();\n    }\n  }\n\n  [[nodiscard]] size_t Size() const {\n    return HostCanRead() ? data_h_.size() : data_d_ ? data_d_->size() : 0;\n  }\n\n  [[nodiscard]] DeviceOrd Device() const { return device_; }\n\n  T* DevicePointer() {\n    LazySyncDevice(GPUAccess::kWrite);\n    return data_d_->data();\n  }\n\n  const T* ConstDevicePointer() {\n    LazySyncDevice(GPUAccess::kRead);\n    return data_d_->data();\n  }\n\n  common::Span<T> DeviceSpan() {\n    LazySyncDevice(GPUAccess::kWrite);\n    return {this->DevicePointer(), Size()};\n  }\n\n  common::Span<const T> ConstDeviceSpan() {\n    LazySyncDevice(GPUAccess::kRead);\n    return {this->ConstDevicePointer(), Size()};\n  }\n\n  void Fill(T v) {  // NOLINT\n    if (HostCanWrite()) {\n      std::fill(data_h_.begin(), data_h_.end(), v);\n    } else {\n      gpu_access_ = GPUAccess::kWrite;\n      SetDevice();\n      auto s_data = dh::ToSpan(*data_d_);\n      dh::LaunchN(data_d_->size(), curt::DefaultStream(),\n                  [=] XGBOOST_DEVICE(size_t i) { s_data[i] = v; });\n    }\n  }\n\n  void Copy(HostDeviceVectorImpl<T>* other) {\n    CHECK_EQ(Size(), other->Size());\n    SetDevice(other->device_);\n    // Data is on host.\n    if (HostCanWrite() && other->HostCanWrite()) {\n      std::copy(other->data_h_.begin(), other->data_h_.end(), data_h_.begin());\n      return;\n    }\n    SetDevice();\n    CopyToDevice(other);\n  }\n\n  void Copy(const std::vector<T>& other) {\n    CHECK_EQ(Size(), other.size());\n    if (HostCanWrite()) {\n      std::copy(other.begin(), other.end(), data_h_.begin());\n    } else {\n      CopyToDevice(other.data());\n    }\n  }\n\n  void Copy(std::initializer_list<T> other) {\n    CHECK_EQ(Size(), other.size());\n    if (HostCanWrite()) {\n      std::copy(other.begin(), other.end(), data_h_.begin());\n    } else {\n      CopyToDevice(other.begin());\n    }\n  }\n\n  void Extend(HostDeviceVectorImpl* other) {\n    auto ori_size = this->Size();\n    this->Resize(ori_size + other->Size(), T{});\n    if (HostCanWrite() && other->HostCanRead()) {\n      auto& h_vec = this->HostVector();\n      auto& other_vec = other->HostVector();\n      CHECK_EQ(h_vec.size(), ori_size + other->Size());\n      std::copy(other_vec.cbegin(), other_vec.cend(), h_vec.begin() + ori_size);\n    } else {\n      auto ptr = other->ConstDevicePointer();\n      SetDevice();\n      CHECK_EQ(this->Device(), other->Device());\n      dh::safe_cuda(cudaMemcpyAsync(this->DevicePointer() + ori_size, ptr,\n                                    other->Size() * sizeof(T), cudaMemcpyDeviceToDevice,\n                                    curt::DefaultStream()));\n    }\n  }\n\n  std::vector<T>& HostVector() {\n    LazySyncHost(GPUAccess::kNone);\n    return data_h_;\n  }\n\n  const std::vector<T>& ConstHostVector() {\n    LazySyncHost(GPUAccess::kRead);\n    return data_h_;\n  }\n\n  void SetDevice(DeviceOrd device) {\n    if (device_ == device) { return; }\n    if (device_.IsCUDA()) {\n      LazySyncHost(GPUAccess::kNone);\n    }\n\n    if (device_.IsCUDA() && device.IsCUDA()) {\n      CHECK_EQ(device_.ordinal, device.ordinal)\n          << \"New device ordinal is different from previous one.\";\n    }\n    device_ = device;\n    if (device_.IsCUDA()) {\n      LazyResizeDevice(data_h_.size());\n    }\n  }\n\n  template <typename... U>\n  auto Resize(std::size_t new_size, U&&... args) {\n    if (new_size == Size()) {\n      return;\n    }\n    if ((Size() == 0 && device_.IsCUDA()) || (DeviceCanWrite() && device_.IsCUDA())) {\n      // fast on-device resize\n      gpu_access_ = GPUAccess::kWrite;\n      SetDevice();\n      auto old_size = data_d_->size();\n      data_d_->resize(new_size, std::forward<U>(args)...);\n    } else {\n      // resize on host\n      LazySyncHost(GPUAccess::kNone);\n      auto old_size = data_h_.size();\n      data_h_.resize(new_size, std::forward<U>(args)...);\n    }\n  }\n\n  void LazySyncHost(GPUAccess access) {\n    if (HostCanAccess(access)) { return; }\n    if (HostCanRead()) {\n      // data is present, just need to deny access to the device\n      gpu_access_ = access;\n      return;\n    }\n    gpu_access_ = access;\n    if (data_h_.size() != data_d_->size()) { data_h_.resize(data_d_->size()); }\n    SetDevice();\n    dh::safe_cuda(cudaMemcpy(data_h_.data(), data_d_->data(), data_d_->size() * sizeof(T),\n                             cudaMemcpyDeviceToHost));\n  }\n\n  void LazySyncDevice(GPUAccess access) {\n    if (DeviceCanAccess(access)) { return; }\n    if (DeviceCanRead()) {\n      // deny read to the host\n      gpu_access_ = access;\n      return;\n    }\n    // data is on the host\n    LazyResizeDevice(data_h_.size());\n    SetDevice();\n    dh::safe_cuda(cudaMemcpyAsync(data_d_->data(), data_h_.data(), data_d_->size() * sizeof(T),\n                                  cudaMemcpyHostToDevice, curt::DefaultStream()));\n    gpu_access_ = access;\n  }\n\n  [[nodiscard]] bool HostCanAccess(GPUAccess access) const { return gpu_access_ <= access; }\n  [[nodiscard]] bool HostCanRead() const { return HostCanAccess(GPUAccess::kRead); }\n  [[nodiscard]] bool HostCanWrite() const { return HostCanAccess(GPUAccess::kNone); }\n  [[nodiscard]] bool DeviceCanAccess(GPUAccess access) const { return gpu_access_ >= access; }\n  [[nodiscard]] bool DeviceCanRead() const { return DeviceCanAccess(GPUAccess::kRead); }\n  [[nodiscard]] bool DeviceCanWrite() const { return DeviceCanAccess(GPUAccess::kWrite); }\n  [[nodiscard]] GPUAccess Access() const { return gpu_access_; }\n\n private:\n  DeviceOrd device_{DeviceOrd::CPU()};\n  std::vector<T> data_h_{};\n  std::unique_ptr<dh::DeviceUVector<T>> data_d_{};\n  GPUAccess gpu_access_{GPUAccess::kNone};\n\n  void CopyToDevice(HostDeviceVectorImpl* other) {\n    if (other->HostCanWrite()) {\n      CopyToDevice(other->data_h_.data());\n    } else {\n      LazyResizeDevice(Size());\n      gpu_access_ = GPUAccess::kWrite;\n      SetDevice();\n      dh::safe_cuda(cudaMemcpyAsync(data_d_->data(), other->data_d_->data(),\n                                    data_d_->size() * sizeof(T), cudaMemcpyDefault,\n                                    curt::DefaultStream()));\n    }\n  }\n\n  void CopyToDevice(const T* begin) {\n    LazyResizeDevice(Size());\n    gpu_access_ = GPUAccess::kWrite;\n    SetDevice();\n    dh::safe_cuda(cudaMemcpyAsync(data_d_->data(), begin, data_d_->size() * sizeof(T),\n                                  cudaMemcpyDefault, curt::DefaultStream()));\n  }\n\n  void LazyResizeDevice(size_t new_size) {\n    if (data_d_ && new_size == data_d_->size()) { return; }\n    SetDevice();\n    data_d_->resize(new_size);\n  }\n\n  void SetDevice() {\n    CHECK_GE(device_.ordinal, 0);\n    if (cudaSetDeviceHandler == nullptr) {\n      dh::safe_cuda(cudaSetDevice(device_.ordinal));\n    } else {\n      (*cudaSetDeviceHandler)(device_.ordinal);\n    }\n\n    if (!data_d_) {\n      data_d_.reset(new dh::DeviceUVector<T>{});\n    }\n  }\n};\n\ntemplate<typename T>\nHostDeviceVector<T>::HostDeviceVector(size_t size, T v, DeviceOrd device)\n    : impl_(new HostDeviceVectorImpl<T>(size, v, device)) {}\n\ntemplate <typename T>\nHostDeviceVector<T>::HostDeviceVector(std::initializer_list<T> init, DeviceOrd device)\n    : impl_(new HostDeviceVectorImpl<T>(init, device)) {}\n\ntemplate <typename T>\nHostDeviceVector<T>::HostDeviceVector(const std::vector<T>& init, DeviceOrd device)\n    : impl_(new HostDeviceVectorImpl<T>(init, device)) {}\n\ntemplate <typename T>\nHostDeviceVector<T>::HostDeviceVector(HostDeviceVector<T>&& other)\n    : impl_(new HostDeviceVectorImpl<T>(std::move(*other.impl_))) {}\n\ntemplate <typename T>\nHostDeviceVector<T>& HostDeviceVector<T>::operator=(HostDeviceVector<T>&& other) {\n  if (this == &other) { return *this; }\n\n  std::unique_ptr<HostDeviceVectorImpl<T>> new_impl(\n      new HostDeviceVectorImpl<T>(std::move(*other.impl_)));\n  delete impl_;\n  impl_ = new_impl.release();\n  return *this;\n}\n\ntemplate <typename T>\nHostDeviceVector<T>::~HostDeviceVector() {\n  delete impl_;\n  impl_ = nullptr;\n}\n\ntemplate <typename T>\nsize_t HostDeviceVector<T>::Size() const { return impl_->Size(); }\n\ntemplate <typename T>\nDeviceOrd HostDeviceVector<T>::Device() const {\n  return impl_->Device();\n}\n\ntemplate <typename T>\nT* HostDeviceVector<T>::DevicePointer() {\n  return impl_->DevicePointer();\n}\n\ntemplate <typename T>\nconst T* HostDeviceVector<T>::ConstDevicePointer() const {\n  return impl_->ConstDevicePointer();\n}\n\ntemplate <typename T>\ncommon::Span<T> HostDeviceVector<T>::DeviceSpan() {\n  return impl_->DeviceSpan();\n}\n\ntemplate <typename T>\ncommon::Span<const T> HostDeviceVector<T>::ConstDeviceSpan() const {\n  return impl_->ConstDeviceSpan();\n}\n\ntemplate <typename T>\nvoid HostDeviceVector<T>::Fill(T v) {\n  impl_->Fill(v);\n}\n\ntemplate <typename T>\nvoid HostDeviceVector<T>::Copy(const HostDeviceVector<T>& other) {\n  impl_->Copy(other.impl_);\n}\n\ntemplate <typename T>\nvoid HostDeviceVector<T>::Copy(const std::vector<T>& other) {\n  impl_->Copy(other);\n}\n\ntemplate <typename T>\nvoid HostDeviceVector<T>::Copy(std::initializer_list<T> other) {\n  impl_->Copy(other);\n}\n\ntemplate <typename T>\nvoid HostDeviceVector<T>::Extend(HostDeviceVector const& other) {\n  impl_->Extend(other.impl_);\n}\n\ntemplate <typename T>\nstd::vector<T>& HostDeviceVector<T>::HostVector() { return impl_->HostVector(); }\n\ntemplate <typename T>\nconst std::vector<T>& HostDeviceVector<T>::ConstHostVector() const {\n  return impl_->ConstHostVector();\n}\n\ntemplate <typename T>\nbool HostDeviceVector<T>::HostCanRead() const {\n  return impl_->HostCanRead();\n}\n\ntemplate <typename T>\nbool HostDeviceVector<T>::HostCanWrite() const {\n  return impl_->HostCanWrite();\n}\n\ntemplate <typename T>\nbool HostDeviceVector<T>::DeviceCanRead() const {\n  return impl_->DeviceCanRead();\n}\n\ntemplate <typename T>\nbool HostDeviceVector<T>::DeviceCanWrite() const {\n  return impl_->DeviceCanWrite();\n}\n\ntemplate <typename T>\nGPUAccess HostDeviceVector<T>::DeviceAccess() const {\n  return impl_->Access();\n}\n\ntemplate <typename T>\nvoid HostDeviceVector<T>::SetDevice(DeviceOrd device) const {\n  impl_->SetDevice(device);\n}\n\ntemplate <typename T>\nvoid HostDeviceVector<T>::Resize(std::size_t new_size) {\n  impl_->Resize(new_size);\n}\n\ntemplate <typename T>\nvoid HostDeviceVector<T>::Resize(std::size_t new_size, T v) {\n  impl_->Resize(new_size, v);\n}\n\n// explicit instantiations are required, as HostDeviceVector isn't header-only\ntemplate class HostDeviceVector<bst_float>;\ntemplate class HostDeviceVector<double>;\ntemplate class HostDeviceVector<GradientPair>;\ntemplate class HostDeviceVector<GradientPairPrecise>;\ntemplate class HostDeviceVector<GradientPairInt64>;\ntemplate class HostDeviceVector<std::int32_t>;   // bst_node_t\ntemplate class HostDeviceVector<std::uint8_t>;\ntemplate class HostDeviceVector<std::int8_t>;\ntemplate class HostDeviceVector<FeatureType>;\ntemplate class HostDeviceVector<Entry>;\ntemplate class HostDeviceVector<bst_idx_t>;\ntemplate class HostDeviceVector<std::uint32_t>;  // bst_feature_t\ntemplate class HostDeviceVector<RegTree::Node>;\ntemplate class HostDeviceVector<RegTree::CategoricalSplitMatrix::Segment>;\ntemplate class HostDeviceVector<RTreeNodeStat>;\n\n#if defined(__APPLE__)\n/*\n * On OSX:\n *\n * typedef unsigned int         uint32_t;\n * typedef unsigned long long   uint64_t;\n * typedef unsigned long       __darwin_size_t;\n */\ntemplate class HostDeviceVector<std::size_t>;\n#endif  // defined(__APPLE__)\n}  // namespace xgboost\n"
  },
  {
    "path": "src/common/io.cc",
    "content": "/**\n * Copyright 2019-2025, by XGBoost Contributors\n */\n#include \"error_msg.h\"\n#if defined(__unix__) || defined(__APPLE__)\n\n#include <fcntl.h>     // for open, O_RDONLY, posix_fadvise\n#include <sys/mman.h>  // for mmap, munmap, madvise\n#include <unistd.h>    // for close, getpagesize\n\n#else\n\n#include <xgboost/windefs.h>\n\n#if defined(xgboost_IS_WIN)\n\n#include <windows.h>  // for CreateFileMapping2, CreateFileEx...\n\n#endif  // defined(xgboost_IS_WIN)\n\n#endif  // defined(__unix__) || defined(__APPLE__)\n\n#include <algorithm>     // for copy, transform\n#include <cctype>        // for tolower\n#include <cstddef>       // for size_t\n#include <cstdint>       // for int32_t, uint32_t\n#include <cstdio>        // for fread, fseek\n#include <cstring>       // for memcpy\n#include <filesystem>    // for filesystem, weakly_canonical\n#include <fstream>       // for ifstream\n#include <iterator>      // for distance\n#include <memory>        // for unique_ptr, make_unique\n#include <string>        // for string\n#include <utility>       // for move\n#include <vector>        // for vector\n\n#include \"io.h\"\n#include \"xgboost/logging.h\"            // for CHECK_LE\n#include \"xgboost/string_view.h\"        // for StringView\n\n#if !defined(__linux__) && !defined(__GLIBC__) && !defined(xgboost_IS_WIN)\n#include <limits>  // for numeric_limits\n#endif\n\n#if defined(__linux__)\n#include <sys/sysinfo.h>\n#endif\n\nnamespace xgboost::common {\nsize_t PeekableInStream::Read(void* dptr, size_t size) {\n  size_t nbuffer = buffer_.length() - buffer_ptr_;\n  if (nbuffer == 0) return strm_->Read(dptr, size);\n  if (nbuffer < size) {\n    std::memcpy(dptr, dmlc::BeginPtr(buffer_) + buffer_ptr_, nbuffer);\n    buffer_ptr_ += nbuffer;\n    return nbuffer + strm_->Read(reinterpret_cast<char*>(dptr) + nbuffer, size - nbuffer);\n  } else {\n    std::memcpy(dptr, dmlc::BeginPtr(buffer_) + buffer_ptr_, size);\n    buffer_ptr_ += size;\n    return size;\n  }\n}\n\nsize_t PeekableInStream::PeekRead(void* dptr, size_t size) {\n  size_t nbuffer = buffer_.length() - buffer_ptr_;\n  if (nbuffer < size) {\n    buffer_ = buffer_.substr(buffer_ptr_, buffer_.length());\n    buffer_ptr_ = 0;\n    buffer_.resize(size);\n    size_t nadd = strm_->Read(dmlc::BeginPtr(buffer_) + nbuffer, size - nbuffer);\n    buffer_.resize(nbuffer + nadd);\n    std::memcpy(dptr, dmlc::BeginPtr(buffer_), buffer_.length());\n    return buffer_.size();\n  } else {\n    std::memcpy(dptr, dmlc::BeginPtr(buffer_) + buffer_ptr_, size);\n    return size;\n  }\n}\n\nFixedSizeStream::FixedSizeStream(PeekableInStream* stream) : PeekableInStream(stream) {\n  size_t constexpr kInitialSize = 4096;\n  size_t size{kInitialSize}, total{0};\n  buffer_.clear();\n  while (true) {\n    buffer_.resize(size);\n    size_t read = stream->PeekRead(&buffer_[0], size);\n    total = read;\n    if (read < size) {\n      break;\n    }\n    size *= 2;\n  }\n  buffer_.resize(total);\n}\n\nsize_t FixedSizeStream::Read(void* dptr, size_t size) {\n  auto read = this->PeekRead(dptr, size);\n  pointer_ += read;\n  return read;\n}\n\nsize_t FixedSizeStream::PeekRead(void* dptr, size_t size) {\n  if (size >= buffer_.size() - pointer_) {\n    std::copy(buffer_.cbegin() + pointer_, buffer_.cend(), reinterpret_cast<char*>(dptr));\n    return std::distance(buffer_.cbegin() + pointer_, buffer_.cend());\n  } else {\n    auto const beg = buffer_.cbegin() + pointer_;\n    auto const end = beg + size;\n    std::copy(beg, end, reinterpret_cast<char*>(dptr));\n    return std::distance(beg, end);\n  }\n}\n\nvoid FixedSizeStream::Seek(size_t pos) {\n  pointer_ = pos;\n  CHECK_LE(pointer_, buffer_.size());\n}\n\nvoid FixedSizeStream::Take(std::string* out) {\n  CHECK(out);\n  *out = std::move(buffer_);\n}\n\nnamespace {\n// Get system alignment value for IO with mmap.\nstd::size_t GetMmapAlignment() {\n#if defined(xgboost_IS_WIN)\n  SYSTEM_INFO sys_info;\n  GetSystemInfo(&sys_info);\n  // During testing, `sys_info.dwPageSize` is of size 4096 while `dwAllocationGranularity` is of\n  // size 65536.\n  return sys_info.dwAllocationGranularity;\n#else\n  return getpagesize();\n#endif\n}\n}  // anonymous namespace\n\nstd::vector<char> LoadSequentialFile(std::string uri) {\n  auto OpenErr = [&uri]() {\n    std::string msg;\n    msg = \"Opening \" + uri + \" failed: \";\n    msg += error::SystemError().message();\n    LOG(FATAL) << msg;\n  };\n\n  auto parsed = dmlc::io::URI(uri.c_str());\n  CHECK((parsed.protocol == \"file://\" || parsed.protocol.length() == 0))\n      << \"Only local file is supported.\";\n  // Read from file.\n  auto path = std::filesystem::weakly_canonical(std::filesystem::u8path(uri));\n  std::ifstream ifs(path, std::ios_base::binary | std::ios_base::in);\n  if (!ifs) {\n    // https://stackoverflow.com/a/17338934\n    OpenErr();\n  }\n\n  auto file_size = std::filesystem::file_size(path);\n  std::vector<char> buffer(file_size);\n  ifs.read(&buffer[0], file_size);\n\n  return buffer;\n}\n\nstd::string FileExtension(std::string fname, bool lower) {\n  if (lower) {\n    std::transform(fname.begin(), fname.end(), fname.begin(),\n                   [](char c) { return std::tolower(c); });\n  }\n  auto splited = Split(fname, '.');\n  if (splited.size() > 1) {\n    return splited.back();\n  } else {\n    return \"\";\n  }\n}\n\nstruct MmapFileImpl {\n#if defined(xgboost_IS_WIN)\n  HANDLE fd{INVALID_HANDLE_VALUE};\n  HANDLE file_map{INVALID_HANDLE_VALUE};\n#else\n  std::int32_t fd{0};\n#endif  // defined(xgboost_IS_WIN)\n  std::byte* base_ptr{nullptr};\n  std::size_t base_size{0};\n  std::size_t delta{0};\n  std::string path;\n\n  MmapFileImpl() = default;\n\n#if defined(xgboost_IS_WIN)\n  MmapFileImpl(HANDLE fd, HANDLE fm, std::byte* base_ptr, std::size_t base_size, std::size_t delta,\n               std::string path)\n      : fd{fd},\n        file_map{fm},\n        base_ptr{base_ptr},\n        base_size{base_size},\n        delta{delta},\n        path{std::move(path)} {}\n#else\n  MmapFileImpl(std::int32_t fd, std::byte* base_ptr, std::size_t base_size, std::size_t delta,\n               std::string path)\n      : fd{fd}, base_ptr{base_ptr}, base_size{base_size}, delta{delta}, path{std::move(path)} {}\n#endif  // defined(xgboost_IS_WIN)\n\n  void const* Data() const { return this->base_ptr + this->delta; }\n  void* Data() { return this->base_ptr + this->delta; }\n};\n\nvoid const* MMAPFile::Data() const {\n  if (!this->p_impl) {\n    return nullptr;\n  }\n  return this->p_impl->Data();\n}\n\nvoid* MMAPFile::Data() {\n  if (!this->p_impl) {\n    return nullptr;\n  }\n  return this->p_impl->Data();\n}\n\n[[nodiscard]] Span<std::byte> MMAPFile::BasePtr() const {\n  return Span{this->p_impl->base_ptr, this->p_impl->base_size};\n}\n\n// For some reason, NVCC 12.1 marks the function deleted if we expose it in the header.\n// NVCC 11.8 doesn't allow `noexcept(false) = default` altogether.\nResourceHandler::~ResourceHandler() noexcept(false) {}  // NOLINT\n\nMMAPFile* detail::OpenMmap(std::string path, std::size_t offset, std::size_t length) {\n  if (length == 0) {\n    return new MMAPFile{};\n  }\n\n#if defined(xgboost_IS_WIN)\n  HANDLE fd = CreateFile(path.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING,\n                         FILE_ATTRIBUTE_NORMAL | FILE_FLAG_OVERLAPPED, nullptr);\n  CHECK_NE(fd, INVALID_HANDLE_VALUE)\n      << \"Failed to open:\" << path << \". \" << error::SystemError().message();\n#else\n  auto fd = open(path.c_str(), O_RDONLY);\n  CHECK_GE(fd, 0) << \"Failed to open:\" << path << \". \" << error::SystemError().message();\n#endif\n\n  std::byte* ptr{nullptr};\n  // Round down for alignment.\n  auto view_start = offset / GetMmapAlignment() * GetMmapAlignment();\n  auto view_size = length + (offset - view_start);\n\n#if defined(__linux__) || defined(__GLIBC__)\n  int prot{PROT_READ};\n  ptr = reinterpret_cast<std::byte*>(mmap(nullptr, view_size, prot, MAP_PRIVATE, fd, view_start));\n  CHECK_NE(ptr, MAP_FAILED) << \"Failed to map: \" << path << \". \" << error::SystemError().message();\n  auto handle = new MMAPFile{\n      std::make_unique<MmapFileImpl>(fd, ptr, view_size, offset - view_start, std::move(path))};\n#elif defined(xgboost_IS_WIN)\n  LARGE_INTEGER file_size;\n  CHECK_NE(GetFileSizeEx(fd, &file_size), 0) << error::SystemError().message();\n  auto map_file = CreateFileMappingA(fd, nullptr, PAGE_READONLY, file_size.HighPart,\n                                     file_size.LowPart, nullptr);\n  CHECK(map_file) << \"Failed to map: \" << path << \". \" << error::SystemError().message();\n\n  auto li_vs = reinterpret_cast<LARGE_INTEGER*>(&view_start);\n  ptr = reinterpret_cast<std::byte*>(\n      MapViewOfFile(map_file, FILE_MAP_READ, li_vs->HighPart, li_vs->LowPart, view_size));\n  CHECK_NE(ptr, nullptr) << \"Failed to map: \" << path << \". \" << error::SystemError().message();\n  auto handle = new MMAPFile{std::make_unique<MmapFileImpl>(fd, map_file, ptr, view_size,\n                                                            offset - view_start, std::move(path))};\n#else\n  CHECK_LE(offset, std::numeric_limits<off_t>::max())\n      << \"File size has exceeded the limit on the current system.\";\n  int prot{PROT_READ};\n  ptr = reinterpret_cast<std::byte*>(mmap(nullptr, view_size, prot, MAP_PRIVATE, fd, view_start));\n  CHECK_NE(ptr, MAP_FAILED) << \"Failed to map: \" << path << \". \" << error::SystemError().message();\n  auto handle = new MMAPFile{\n      std::make_unique<MmapFileImpl>(fd, ptr, view_size, offset - view_start, std::move(path))};\n#endif  // defined(__linux__) || defined(__GLIBC__)\n\n  return handle;\n}\n\nvoid detail::CloseMmap(MMAPFile* handle) {\n  if (!handle) {\n    return;\n  }\n#if defined(xgboost_IS_WIN)\n  if (handle->p_impl->base_ptr) {\n    CHECK(UnmapViewOfFile(handle->p_impl->base_ptr))\n        << \"Failed to call munmap: \" << error::SystemError().message();\n  }\n  if (handle->p_impl->fd != INVALID_HANDLE_VALUE) {\n    CHECK(CloseHandle(handle->p_impl->fd))\n        << \"Failed to close handle: \" << error::SystemError().message();\n  }\n  if (handle->p_impl->file_map != INVALID_HANDLE_VALUE) {\n    CHECK(CloseHandle(handle->p_impl->file_map))\n        << \"Failed to close mapping object: \" << error::SystemError().message();\n  }\n#else\n  if (handle->p_impl->base_ptr) {\n    CHECK_NE(munmap(handle->p_impl->base_ptr, handle->p_impl->base_size), -1)\n        << \"Failed to call munmap: `\" << handle->p_impl->path << \"`. \"\n        << error::SystemError().message();\n  }\n  if (handle->p_impl->fd != 0) {\n    CHECK_NE(close(handle->p_impl->fd), -1)\n        << \"Failed to close: `\" << handle->p_impl->path << \"`. \" << error::SystemError().message();\n  }\n#endif\n  delete handle;\n}\n\nMmapResource::MmapResource(StringView path, std::size_t offset, std::size_t length)\n    : ResourceHandler{kMmap},\n      handle_{detail::OpenMmap(std::string{path}, offset, length), detail::CloseMmap},\n      n_{length} {\n#if defined(__unix__) || defined(__APPLE__)\n  madvise(handle_->p_impl->base_ptr, handle_->p_impl->base_size, MADV_WILLNEED);\n#endif  // defined(__unix__) || defined(__APPLE__)\n}\n\nMmapResource::~MmapResource() noexcept(false) = default;\n\n[[nodiscard]] void* MmapResource::Data() {\n  if (!handle_) {\n    return nullptr;\n  }\n  return this->handle_->Data();\n}\n\n[[nodiscard]] std::size_t MmapResource::Size() const { return n_; }\n\n// For some reason, NVCC 12.1 marks the function deleted if we expose it in the header.\n// NVCC 11.8 doesn't allow `noexcept(false) = default` altogether.\nAlignedResourceReadStream::~AlignedResourceReadStream() noexcept(false) {}  // NOLINT\nPrivateMmapConstStream::~PrivateMmapConstStream() noexcept(false) {}        // NOLINT\n\nstd::shared_ptr<MallocResource> MemBufFileReadStream::ReadFileIntoBuffer(StringView path,\n                                                                         std::size_t offset,\n                                                                         std::size_t length) {\n  CHECK(std::filesystem::exists(path.c_str())) << \"`\" << path << \"` doesn't exist\";\n  auto res = std::make_shared<MallocResource>(length);\n  auto ptr = res->DataAs<char>();\n  std::unique_ptr<FILE, std::function<int(FILE*)>> fp{fopen(path.c_str(), \"rb\"), fclose};\n\n  auto err = [&] {\n    auto e = error::SystemError().message();\n    LOG(FATAL) << \"Failed to read file `\" << path << \"`. System error message: \" << e;\n  };\n#if defined(__linux__)\n  auto fd = fileno(fp.get());\n  if (fd == -1) {\n    err();\n  }\n  if (posix_fadvise(fd, offset, length, POSIX_FADV_SEQUENTIAL) != 0) {\n    LOG(FATAL) << error::SystemError().message();\n  }\n#endif  // defined(__linux__)\n\n  if (fseek(fp.get(), offset, SEEK_SET) != 0) {\n    err();\n  }\n  if (fread(ptr, length, 1, fp.get()) != 1) {\n    err();\n  }\n  return res;\n}\n\nAlignedFileWriteStream::AlignedFileWriteStream(StringView path, StringView flags)\n    : pimpl_{dmlc::Stream::Create(path.c_str(), flags.c_str())} {}\n\n[[nodiscard]] std::size_t AlignedFileWriteStream::DoWrite(const void* ptr,\n                                                          std::size_t n_bytes) noexcept(true) {\n  pimpl_->Write(ptr, n_bytes);\n  return n_bytes;\n}\n\nAlignedMemWriteStream::AlignedMemWriteStream(std::string* p_buf)\n    : pimpl_{std::make_unique<MemoryBufferStream>(p_buf)} {}\nAlignedMemWriteStream::~AlignedMemWriteStream() = default;\n\n[[nodiscard]] std::size_t AlignedMemWriteStream::DoWrite(const void* ptr,\n                                                         std::size_t n_bytes) noexcept(true) {\n  this->pimpl_->Write(ptr, n_bytes);\n  return n_bytes;\n}\n\n[[nodiscard]] std::size_t AlignedMemWriteStream::Tell() const noexcept(true) {\n  return this->pimpl_->Tell();\n}\n\n[[nodiscard]] std::string CmdOutput(StringView cmd) {\n#if defined(xgboost_IS_WIN)\n  std::unique_ptr<FILE, std::function<int(FILE*)>> pipe(_popen(cmd.c_str(), \"r\"), _pclose);\n#else\n  // popen is a convenient method, but it always returns a success even if the command\n  // fails.\n  std::unique_ptr<FILE, std::function<int(FILE*)>> pipe(popen(cmd.c_str(), \"r\"), pclose);\n#endif\n  CHECK(pipe);\n  std::array<char, 128> buffer;\n  std::string result;\n  while (std::fgets(buffer.data(), static_cast<std::int32_t>(buffer.size()), pipe.get())) {\n    result += buffer.data();\n  }\n  return result;\n}\n\n[[nodiscard]] std::size_t TotalMemory() {\n#if defined(__linux__)\n  struct sysinfo info;\n  CHECK_EQ(sysinfo(&info), 0) << error::SystemError().message();\n  return info.totalram * info.mem_unit;\n#elif defined(xgboost_IS_WIN)\n  MEMORYSTATUSEX status;\n  status.dwLength = sizeof(status);\n  CHECK(GlobalMemoryStatusEx(&status)) << error::SystemError().message();\n  return static_cast<std::size_t>(status.ullTotalPhys);\n#else\n  LOG(FATAL) << \"Not implemented\";\n#endif  // defined(__linux__)\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "src/common/io.h",
    "content": "/**\n * Copyright 2014-2025, XGBoost Contributors\n * \\file io.h\n * \\brief general stream interface for serialization, I/O\n * \\author Tianqi Chen\n */\n#ifndef XGBOOST_COMMON_IO_H_\n#define XGBOOST_COMMON_IO_H_\n\n#include <algorithm>    // for min, fill_n, copy_n\n#include <array>        // for array\n#include <cstddef>      // for byte, size_t\n#include <cstdlib>      // for malloc, realloc, free\n#include <cstring>      // for memcpy\n#include <fstream>      // for ifstream\n#include <functional>   // for function\n#include <limits>       // for numeric_limits\n#include <memory>       // for unique_ptr\n#include <string>       // for string\n#include <type_traits>  // for alignment_of_v, enable_if_t\n#include <utility>      // for move\n#include <vector>       // for vector\n\n#include \"common.h\"               // for DivRoundUp\n#include \"dmlc/io.h\"              // for SeekStream\n#include \"xgboost/string_view.h\"  // for StringView\n\nnamespace xgboost::common {\nstruct MemoryFixSizeBuffer : public dmlc::SeekStream {\n public:\n  // similar to SEEK_END in libc\n  static std::size_t constexpr kSeekEnd = std::numeric_limits<std::size_t>::max();\n\n public:\n  /**\n   * @brief Ctor\n   *\n   * @param p_buffer Pointer to the source buffer with size `buffer_size`.\n   * @param buffer_size Size of the source buffer\n   */\n  MemoryFixSizeBuffer(void *p_buffer, std::size_t buffer_size)\n      : p_buffer_(reinterpret_cast<char *>(p_buffer)), buffer_size_(buffer_size) {}\n  ~MemoryFixSizeBuffer() override = default;\n\n  std::size_t Read(void *ptr, std::size_t size) override {\n    std::size_t nread = std::min(buffer_size_ - curr_ptr_, size);\n    if (nread != 0) std::memcpy(ptr, p_buffer_ + curr_ptr_, nread);\n    curr_ptr_ += nread;\n    return nread;\n  }\n  std::size_t Write(const void *ptr, std::size_t size) override {\n    if (size == 0) return 0;\n    CHECK_LE(curr_ptr_ + size, buffer_size_);\n    std::memcpy(p_buffer_ + curr_ptr_, ptr, size);\n    curr_ptr_ += size;\n    return size;\n  }\n  void Seek(std::size_t pos) override {\n    if (pos == kSeekEnd) {\n      curr_ptr_ = buffer_size_;\n    } else {\n      curr_ptr_ = static_cast<std::size_t>(pos);\n    }\n  }\n  /**\n   * @brief Current position in the buffer (stream).\n   */\n  std::size_t Tell() override { return curr_ptr_; }\n  [[nodiscard]] virtual bool AtEnd() const { return curr_ptr_ == buffer_size_; }\n\n protected:\n  /*! \\brief in memory buffer */\n  char *p_buffer_{nullptr};\n  /*! \\brief current pointer */\n  std::size_t buffer_size_{0};\n  /*! \\brief current pointer */\n  std::size_t curr_ptr_{0};\n};\n\n/*! \\brief a in memory buffer that can be read and write as stream interface */\nstruct MemoryBufferStream : public dmlc::SeekStream {\n public:\n  explicit MemoryBufferStream(std::string *p_buffer)\n      : p_buffer_(p_buffer) {\n    curr_ptr_ = 0;\n  }\n  ~MemoryBufferStream() override = default;\n  size_t Read(void *ptr, size_t size) override {\n    CHECK_LE(curr_ptr_, p_buffer_->length()) << \"read can not have position excceed buffer length\";\n    size_t nread = std::min(p_buffer_->length() - curr_ptr_, size);\n    if (nread != 0) std::memcpy(ptr, &(*p_buffer_)[0] + curr_ptr_, nread);\n    curr_ptr_ += nread;\n    return nread;\n  }\n  std::size_t Write(const void *ptr, size_t size) override {\n    if (size == 0) return 0;\n    if (curr_ptr_ + size > p_buffer_->length()) {\n      p_buffer_->resize(curr_ptr_+size);\n    }\n    std::memcpy(&(*p_buffer_)[0] + curr_ptr_, ptr, size);\n    curr_ptr_ += size;\n    return size;\n  }\n  void Seek(size_t pos) override {\n    curr_ptr_ = static_cast<size_t>(pos);\n  }\n  size_t Tell() override {\n    return curr_ptr_;\n  }\n  virtual bool AtEnd() const {\n    return curr_ptr_ == p_buffer_->length();\n  }\n\n private:\n  /*! \\brief in memory buffer */\n  std::string *p_buffer_;\n  /*! \\brief current pointer */\n  size_t curr_ptr_;\n};  // class MemoryBufferStream\n\n/*!\n * \\brief Input stream that support additional PeekRead operation,\n *  besides read.\n */\nclass PeekableInStream : public dmlc::Stream {\n public:\n  explicit PeekableInStream(dmlc::Stream* strm) : strm_(strm) {}\n\n  size_t Read(void* dptr, size_t size) override;\n  virtual size_t PeekRead(void* dptr, size_t size);\n\n  std::size_t Write(const void*, size_t) override {\n    LOG(FATAL) << \"Not implemented\";\n    return 0;\n  }\n\n private:\n  /*! \\brief input stream */\n  dmlc::Stream *strm_;\n  /*! \\brief current buffer pointer */\n  size_t buffer_ptr_{0};\n  /*! \\brief internal buffer */\n  std::string buffer_;\n};\n/*!\n * \\brief A simple class used to consume `dmlc::Stream' all at once.\n *\n * With it one can load the rabit checkpoint into a known size string buffer.\n */\nclass FixedSizeStream : public PeekableInStream {\n public:\n  explicit FixedSizeStream(PeekableInStream* stream);\n  ~FixedSizeStream() override = default;\n\n  size_t Read(void* dptr, size_t size) override;\n  size_t PeekRead(void* dptr, size_t size) override;\n  [[nodiscard]] std::size_t Size() const { return buffer_.size(); }\n  [[nodiscard]] std::size_t Tell() const { return pointer_; }\n  void Seek(size_t pos);\n\n  std::size_t Write(const void*, size_t) override {\n    LOG(FATAL) << \"Not implemented\";\n    return 0;\n  }\n\n  /*!\n   *  \\brief Take the buffer from `FixedSizeStream'.  The one in `FixedSizeStream' will be\n   *  cleared out.\n   */\n  void Take(std::string* out);\n\n private:\n  size_t pointer_{0};\n  std::string buffer_;\n};\n\n/**\n * @brief Helper function for loading consecutive file.\n *\n * @param uri    URI or file name to file.\n *\n * @return File content.\n */\nstd::vector<char> LoadSequentialFile(std::string uri);\n\n/**\n * \\brief Get file extension from file name.\n *\n * \\param  lower Return in lower case.\n *\n * \\return File extension without the `.`\n */\nstd::string FileExtension(std::string fname, bool lower = true);\n\n/**\n * \\brief Read the whole buffer from dmlc stream.\n */\ninline std::string ReadAll(dmlc::Stream* fi, PeekableInStream* fp) {\n  std::string buffer;\n  if (auto fixed_size = dynamic_cast<common::MemoryFixSizeBuffer*>(fi)) {\n    fixed_size->Seek(common::MemoryFixSizeBuffer::kSeekEnd);\n    size_t size = fixed_size->Tell();\n    buffer.resize(size);\n    fixed_size->Seek(0);\n    CHECK_EQ(fixed_size->Read(&buffer[0], size), size);\n  } else {\n    FixedSizeStream{fp}.Take(&buffer);\n  }\n  return buffer;\n}\n\n/**\n * \\brief Read the whole file content into a string.\n */\ninline std::string ReadAll(std::string const &path) {\n  std::ifstream stream(path);\n  if (!stream.is_open()) {\n    LOG(FATAL) << \"Could not open file \" << path;\n  }\n  std::string content{std::istreambuf_iterator<char>(stream), std::istreambuf_iterator<char>()};\n  if (content.empty()) {\n    LOG(FATAL) << \"Empty file \" << path;\n  }\n  return content;\n}\n\nstruct MmapFileImpl;\n\n/**\n * @brief A handle to mmap file.\n */\nstruct MMAPFile {\n  std::unique_ptr<MmapFileImpl> p_impl;\n  [[nodiscard]] void const* Data() const;\n  [[nodiscard]] void* Data();\n  [[nodiscard]] Span<std::byte> BasePtr() const;\n};\n\nnamespace detail {\n// call mmap\n[[nodiscard]] MMAPFile* OpenMmap(std::string path, std::size_t offset, std::size_t length);\n// close the mapped file handle.\nvoid CloseMmap(MMAPFile* handle);\n}  // namespace detail\n\n/**\n * @brief Handler for one-shot resource. Unlike `std::pmr::*`, the resource handler is\n *        fixed once it's constructed. Users cannot use mutable operations like resize\n *        without acquiring the specific resource first.\n */\nclass ResourceHandler {\n public:\n  // RTTI\n  enum Kind : std::uint8_t {\n    kMalloc = 0,             // System memory.\n    kMmap = 1,               // Memory mapp.\n    kCudaMalloc = 2,         // CUDA device memory.\n    kCudaMmap = 3,           // CUDA with mmap.\n    kCudaHostCache = 4,      // CUDA pinned host memory.\n    kCudaGrowOnly = 5,       // CUDA virtual memory allocator.\n    kCudaPinnedMemPool = 6,  // CUDA memory pool for pinned host memory.\n  };\n\n private:\n  Kind kind_{kMalloc};\n\n public:\n  virtual void* Data() = 0;\n  template <typename T>\n  [[nodiscard]] T* DataAs() {\n    return reinterpret_cast<T*>(this->Data());\n  }\n\n  [[nodiscard]] virtual std::size_t Size() const = 0;\n  [[nodiscard]] auto Type() const { return kind_; }\n  [[nodiscard]] StringView TypeName() const {\n    switch (this->Type()) {\n      case kMalloc:\n        return \"Malloc\";\n      case kMmap:\n        return \"Mmap\";\n      case kCudaMalloc:\n        return \"CudaMalloc\";\n      case kCudaMmap:\n        return \"CudaMmap\";\n      case kCudaHostCache:\n        return \"CudaHostCache\";\n      case kCudaGrowOnly:\n        return \"CudaGrowOnly\";\n      case kCudaPinnedMemPool:\n        return \"CudaPinnedMemPool\";\n    }\n    LOG(FATAL) << \"Unreachable.\";\n    return {};\n  }\n\n  // Allow exceptions for cleaning up resource.\n  virtual ~ResourceHandler() noexcept(false);\n\n  explicit ResourceHandler(Kind kind) : kind_{kind} {}\n  // Use shared_ptr to manage a pool like resource handler. All copy and assignment\n  // operators are disabled.\n  ResourceHandler(ResourceHandler const& that) = delete;\n  ResourceHandler& operator=(ResourceHandler const& that) = delete;\n  ResourceHandler(ResourceHandler&& that) = delete;\n  ResourceHandler& operator=(ResourceHandler&& that) = delete;\n  /**\n   * @brief Wether two resources have the same type. (both malloc or both mmap).\n   */\n  [[nodiscard]] bool IsSameType(ResourceHandler const& that) const {\n    return this->Type() == that.Type();\n  }\n};\n\nclass MallocResource : public ResourceHandler {\n  void* ptr_{nullptr};\n  std::size_t n_{0};\n\n  void Clear() noexcept(true) {\n    std::free(ptr_);\n    ptr_ = nullptr;\n    n_ = 0;\n  }\n\n public:\n  explicit MallocResource(std::size_t n_bytes) : ResourceHandler{kMalloc} { this->Resize(n_bytes); }\n  ~MallocResource() noexcept(true) override { this->Clear(); }\n\n  void* Data() override { return ptr_; }\n  [[nodiscard]] std::size_t Size() const override { return n_; }\n  /**\n   * @brief Resize the resource to n_bytes. Unlike std::vector::resize, it prefers realloc\n   *        over malloc.\n   *\n   * @tparam force_malloc Force the use of malloc over realloc. Used for testing.\n   *\n   * @param n_bytes The new size.\n   */\n  template <bool force_malloc = false>\n  void Resize(std::size_t n_bytes, std::byte init = std::byte{0}) {\n    // realloc(ptr, 0) works, but is deprecated.\n    if (n_bytes == 0) {\n      this->Clear();\n      return;\n    }\n\n    // If realloc fails, we need to copy the data ourselves.\n    bool need_copy{false};\n    void* new_ptr{nullptr};\n    // use realloc first, it can handle nullptr.\n    if constexpr (!force_malloc) {\n      new_ptr = std::realloc(ptr_, n_bytes);\n    }\n    // retry with malloc if realloc fails\n    if (!new_ptr) {\n      // ptr_ is preserved if realloc fails\n      new_ptr = std::malloc(n_bytes);\n      need_copy = true;\n    }\n    if (!new_ptr) {\n      // malloc fails\n      LOG(FATAL) << \"bad_malloc: Failed to allocate \" << n_bytes << \" bytes.\";\n    }\n\n    if (need_copy) {\n      std::copy_n(reinterpret_cast<std::byte*>(ptr_), n_, reinterpret_cast<std::byte*>(new_ptr));\n    }\n    // default initialize\n    std::fill_n(reinterpret_cast<std::byte*>(new_ptr) + n_, n_bytes - n_, init);\n    // free the old ptr if malloc is used.\n    if (need_copy) {\n      this->Clear();\n    }\n\n    ptr_ = new_ptr;\n    n_ = n_bytes;\n  }\n};\n\n/**\n * @brief A class for wrapping mmap as a resource for RAII.\n */\nclass MmapResource : public ResourceHandler {\n  std::unique_ptr<MMAPFile, std::function<void(MMAPFile*)>> handle_;\n  std::size_t n_;\n\n public:\n  MmapResource(StringView path, std::size_t offset, std::size_t length);\n  ~MmapResource() noexcept(false) override;\n\n  [[nodiscard]] void* Data() override;\n  [[nodiscard]] std::size_t Size() const override;\n};\n\n/**\n * @param Alignment for resource read stream and aligned write stream.\n */\nconstexpr std::size_t IOAlignment() {\n  // For most of the pod types in XGBoost, 8 byte is sufficient.\n  return 8;\n}\n\n/**\n * @brief Wrap resource into a dmlc stream.\n *\n *  This class is to facilitate the use of mmap. Caller can optionally use the `Read()`\n *  method or the `Consume()` method. The former copies data into output, while the latter\n *  makes copy only if it's a primitive type.\n *\n *  Input is required to be aligned to IOAlignment().\n */\nclass AlignedResourceReadStream {\n  std::shared_ptr<ResourceHandler> resource_;\n  std::size_t curr_ptr_{0};\n\n  // Similar to SEEK_END in libc\n  static std::size_t constexpr kSeekEnd = std::numeric_limits<std::size_t>::max();\n\n public:\n  explicit AlignedResourceReadStream(std::shared_ptr<ResourceHandler> resource)\n      : resource_{std::move(resource)} {}\n\n  [[nodiscard]] std::shared_ptr<ResourceHandler> Share() noexcept(true) { return resource_; }\n  /**\n   * @brief Consume n_bytes of data, no copying is performed.\n   *\n   * @return A pair with the beginning pointer and the number of available bytes, which\n   *         may be smaller than requested.\n   */\n  [[nodiscard]] auto Consume(std::size_t n_bytes) noexcept(true) {\n    auto res_size = resource_->Size();\n    auto data = reinterpret_cast<std::byte*>(resource_->Data());\n    auto ptr = data + curr_ptr_;\n\n    // Move the cursor\n    auto aligned_n_bytes = DivRoundUp(n_bytes, IOAlignment()) * IOAlignment();\n    auto aligned_forward = std::min(res_size - curr_ptr_, aligned_n_bytes);\n    std::size_t forward = std::min(res_size - curr_ptr_, n_bytes);\n\n    curr_ptr_ += aligned_forward;\n\n    return std::pair{ptr, forward};\n  }\n\n  template <typename T>\n  [[nodiscard]] auto Consume(T* out) noexcept(false) -> std::enable_if_t<std::is_pod_v<T>, bool> {\n    auto [ptr, size] = this->Consume(sizeof(T));\n    if (size != sizeof(T)) {\n      return false;\n    }\n    CHECK_EQ(reinterpret_cast<std::uintptr_t>(ptr) % std::alignment_of_v<T>, 0);\n    *out = *reinterpret_cast<T*>(ptr);\n    return true;\n  }\n\n  [[nodiscard]] virtual std::size_t Tell() noexcept(true) { return curr_ptr_; }\n  /**\n   * @brief Read n_bytes of data, output is copied into ptr.\n   */\n  [[nodiscard]] std::size_t Read(void* ptr, std::size_t n_bytes) noexcept(true) {\n    auto [res_ptr, forward] = this->Consume(n_bytes);\n    if (forward != 0) {\n      std::memcpy(ptr, res_ptr, forward);\n    }\n    return forward;\n  }\n  /**\n   * @brief Read a primitive type.\n   *\n   * @return Whether the read is successful.\n   */\n  template <typename T>\n  [[nodiscard]] auto Read(T* out) noexcept(false) -> std::enable_if_t<std::is_pod_v<T>, bool> {\n    return this->Consume(out);\n  }\n  /**\n   * @brief Read a vector.\n   *\n   * @return Whether the read is successful.\n   */\n  template <typename T>\n  [[nodiscard]] bool Read(std::vector<T>* out) noexcept(true) {\n    std::uint64_t n{0};\n    if (!this->Consume(&n)) {\n      return false;\n    }\n    out->resize(n);\n\n    auto n_bytes = sizeof(T) * n;\n    if (this->Read(out->data(), n_bytes) != n_bytes) {\n      return false;\n    }\n    return true;\n  }\n\n  virtual ~AlignedResourceReadStream() noexcept(false);\n};\n\n/**\n * @brief Private mmap file as a read-only stream.\n *\n *  It can calculate alignment automatically based on system page size (or allocation\n *  granularity on Windows).\n *\n *  The file is required to be aligned by IOAlignment().\n */\nclass PrivateMmapConstStream : public AlignedResourceReadStream {\n public:\n  /**\n   * @brief Construct a private mmap stream.\n   *\n   * @param path      File path.\n   * @param offset    See the `offset` parameter of `mmap` for details.\n   * @param length    See the `length` parameter of `mmap` for details.\n   */\n  explicit PrivateMmapConstStream(StringView path, std::size_t offset, std::size_t length)\n      : AlignedResourceReadStream{std::shared_ptr<MmapResource>{  // NOLINT\n            new MmapResource{path, offset, length}}} {}\n  ~PrivateMmapConstStream() noexcept(false) override;\n};\n\n/**\n * @brief Read a portion of a file into a memory buffer. This class helps integration with\n *        external memory file format.\n */\nclass MemBufFileReadStream : public AlignedResourceReadStream {\n  static std::shared_ptr<MallocResource> ReadFileIntoBuffer(StringView path, std::size_t offset,\n                                                            std::size_t length);\n\n public:\n  /**\n   * @brief Construct a stream for reading file.\n   *\n   * @param path      File path.\n   * @param offset    The number of bytes into the file.\n   * @param length    The number of bytes to read.\n   */\n  explicit MemBufFileReadStream(StringView path, std::size_t offset, std::size_t length)\n      : AlignedResourceReadStream{ReadFileIntoBuffer(path, offset, length)} {}\n};\n\n/**\n * @brief Base class for write stream with alignment defined by IOAlignment().\n */\nclass AlignedWriteStream {\n protected:\n  [[nodiscard]] virtual std::size_t DoWrite(const void* ptr,\n                                            std::size_t n_bytes) noexcept(true) = 0;\n\n public:\n  virtual ~AlignedWriteStream() = default;\n\n  [[nodiscard]] std::size_t Write(const void* ptr, std::size_t n_bytes) noexcept(false) {\n    auto aligned_n_bytes = DivRoundUp(n_bytes, IOAlignment()) * IOAlignment();\n    auto w_n_bytes = this->DoWrite(ptr, n_bytes);\n    CHECK_EQ(w_n_bytes, n_bytes);\n    auto remaining = aligned_n_bytes - n_bytes;\n    if (remaining > 0) {\n      std::array<std::uint8_t, IOAlignment()> padding;\n      std::memset(padding.data(), '\\0', padding.size());\n      w_n_bytes = this->DoWrite(padding.data(), remaining);\n      CHECK_EQ(w_n_bytes, remaining);\n    }\n    return aligned_n_bytes;\n  }\n\n  template <typename T>\n  [[nodiscard]] std::enable_if_t<std::is_pod_v<T>, std::size_t> Write(T const& v) {\n    return this->Write(&v, sizeof(T));\n  }\n};\n\n/**\n * @brief Output stream backed by a file. Aligned to IOAlignment() bytes.\n */\nclass AlignedFileWriteStream : public AlignedWriteStream {\n  std::unique_ptr<dmlc::Stream> pimpl_;\n\n protected:\n  [[nodiscard]] std::size_t DoWrite(const void* ptr, std::size_t n_bytes) noexcept(true) override;\n\n public:\n  AlignedFileWriteStream() = default;\n  AlignedFileWriteStream(StringView path, StringView flags);\n  ~AlignedFileWriteStream() override = default;\n};\n\n/**\n * @brief Output stream backed by memory buffer. Aligned to IOAlignment() bytes.\n */\nclass AlignedMemWriteStream : public AlignedFileWriteStream {\n  std::unique_ptr<MemoryBufferStream> pimpl_;\n\n protected:\n  [[nodiscard]] std::size_t DoWrite(const void* ptr, std::size_t n_bytes) noexcept(true) override;\n\n public:\n  explicit AlignedMemWriteStream(std::string* p_buf);\n  ~AlignedMemWriteStream() override;\n\n  [[nodiscard]] std::size_t Tell() const noexcept(true);\n};\n\n// Run a system command, get its stdout.\n[[nodiscard]] std::string CmdOutput(StringView cmd);\n\n[[nodiscard]] std::size_t TotalMemory();\n}  // namespace xgboost::common\n#endif  // XGBOOST_COMMON_IO_H_\n"
  },
  {
    "path": "src/common/json.cc",
    "content": "/**\n * Copyright 2019-2024, XGBoost Contributors\n */\n#include \"xgboost/json.h\"\n\n#include <array>             // for array\n#include <cctype>            // for isdigit\n#include <cmath>             // for isinf, isnan\n#include <cstdint>           // for uint8_t, int16_t, int32_t, int64_t\n#include <cstdio>            // for EOF\n#include <cstdlib>           // for size_t, strtof\n#include <cstring>           // for memcpy\n#include <initializer_list>  // for initializer_list\n#include <iterator>          // for distance\n#include <limits>            // for numeric_limits\n#include <sstream>           // for operator<<, basic_ostream, operator&, ios, stringstream\n#include <system_error>      // for errc\n\n#include \"./math.h\"                 // for CheckNAN\n#include \"charconv.h\"               // for to_chars, NumericLimits, from_chars, to_chars_result\n#include \"common.h\"                 // for EscapeU8\n#include \"xgboost/base.h\"           // for XGBOOST_EXPECT\n#include \"xgboost/intrusive_ptr.h\"  // for IntrusivePtr\n#include \"xgboost/json_io.h\"        // for JsonReader, UBJReader, UBJWriter, JsonWriter, ToBigEn...\n#include \"xgboost/logging.h\"        // for LOG, LOG_FATAL, LogMessageFatal, LogCheck_NE, CHECK\n#include \"xgboost/string_view.h\"    // for StringView, operator<<\n\nnamespace xgboost {\n\nvoid JsonWriter::Save(Json json) { json.Ptr()->Save(this); }\n\nvoid JsonWriter::Visit(JsonArray const* arr) {\n  this->WriteArray(arr, [](auto const& v) { return v; });\n}\nvoid JsonWriter::Visit(F32Array const* arr) {\n  this->WriteArray(arr, [](float v) { return Json{v}; });\n}\nnamespace {\nauto to_i64 = [](auto v) { return Json{static_cast<int64_t>(v)}; };\n}  // anonymous namespace\nvoid JsonWriter::Visit(I8Array const* arr) { this->WriteArray(arr, to_i64); }\nvoid JsonWriter::Visit(U8Array const* arr) { this->WriteArray(arr, to_i64); }\nvoid JsonWriter::Visit(I16Array const* arr) { this->WriteArray(arr, to_i64); }\nvoid JsonWriter::Visit(U16Array const* arr) { this->WriteArray(arr, to_i64); }\nvoid JsonWriter::Visit(I32Array const* arr) { this->WriteArray(arr, to_i64); }\nvoid JsonWriter::Visit(U32Array const* arr) { this->WriteArray(arr, to_i64); }\nvoid JsonWriter::Visit(I64Array const* arr) { this->WriteArray(arr, to_i64); }\nvoid JsonWriter::Visit(U64Array const* arr) { this->WriteArray(arr, to_i64); }  // dangerous\n\nvoid JsonWriter::Visit(JsonObject const* obj) {\n  stream_->emplace_back('{');\n  size_t i = 0;\n  size_t size = obj->GetObject().size();\n\n  for (auto& value : obj->GetObject()) {\n    auto s = String{value.first};\n    this->Visit(&s);\n    stream_->emplace_back(':');\n    this->Save(value.second);\n\n    if (i != size-1) {\n      stream_->emplace_back(',');\n    }\n    i++;\n  }\n\n  stream_->emplace_back('}');\n}\n\nvoid JsonWriter::Visit(JsonNumber const* num) {\n  std::array<char, NumericLimits<float>::kToCharsSize> number;\n  auto res = to_chars(number.data(), number.data() + number.size(), num->GetNumber());\n  auto end = res.ptr;\n  auto ori_size = stream_->size();\n  stream_->resize(stream_->size() + end - number.data());\n  std::memcpy(stream_->data() + ori_size, number.data(), end - number.data());\n}\n\nvoid JsonWriter::Visit(JsonInteger const* num) {\n  std::array<char, NumericLimits<int64_t>::kToCharsSize> i2s_buffer_;\n  auto i = num->GetInteger();\n  auto ret =\n      to_chars(i2s_buffer_.data(), i2s_buffer_.data() + NumericLimits<int64_t>::kToCharsSize, i);\n  auto end = ret.ptr;\n  CHECK(ret.ec == std::errc());\n  auto digits = std::distance(i2s_buffer_.data(), end);\n  auto ori_size = stream_->size();\n  stream_->resize(ori_size + digits);\n  std::memcpy(stream_->data() + ori_size, i2s_buffer_.data(), digits);\n}\n\nvoid JsonWriter::Visit(JsonNull const* ) {\n    auto s = stream_->size();\n    stream_->resize(s + 4);\n    auto& buf = (*stream_);\n    buf[s + 0] = 'n';\n    buf[s + 1] = 'u';\n    buf[s + 2] = 'l';\n    buf[s + 3] = 'l';\n}\n\nvoid JsonWriter::Visit(JsonString const* str) {\n    std::string buffer;\n    buffer += '\"';\n    auto const& string = str->GetString();\n    common::EscapeU8(string, &buffer);\n    buffer += '\"';\n\n    auto s = stream_->size();\n    stream_->resize(s + buffer.size());\n    std::memcpy(stream_->data() + s, buffer.data(), buffer.size());\n}\n\nvoid JsonWriter::Visit(JsonBoolean const* boolean) {\n  bool val = boolean->GetBoolean();\n  auto s = stream_->size();\n  if (val) {\n    stream_->resize(s + 4);\n    auto& buf = (*stream_);\n    buf[s + 0] = 't';\n    buf[s + 1] = 'r';\n    buf[s + 2] = 'u';\n    buf[s + 3] = 'e';\n  } else {\n    stream_->resize(s + 5);\n    auto& buf = (*stream_);\n    buf[s + 0] = 'f';\n    buf[s + 1] = 'a';\n    buf[s + 2] = 'l';\n    buf[s + 3] = 's';\n    buf[s + 4] = 'e';\n  }\n}\n\n// Value\nstd::string Value::TypeStr() const {\n  switch (kind_) {\n    case ValueKind::kString:\n      return \"String\";\n    case ValueKind::kNumber:\n      return \"Number\";\n    case ValueKind::kObject:\n      return \"Object\";\n    case ValueKind::kArray:\n      return \"Array\";\n    case ValueKind::kBoolean:\n      return \"Boolean\";\n    case ValueKind::kNull:\n      return \"Null\";\n    case ValueKind::kInteger:\n      return \"Integer\";\n    case ValueKind::kF32Array:\n      return \"F32Array\";\n    case ValueKind::kF64Array:\n      return \"F64Array\";\n    case ValueKind::kI8Array:\n      return \"I8Array\";\n    case ValueKind::kU8Array:\n      return \"U8Array\";\n    case ValueKind::kI16Array:\n      return \"I16Array\";\n    case ValueKind::kU16Array:\n      return \"U16Array\";\n    case ValueKind::kI32Array:\n      return \"I32Array\";\n    case ValueKind::kU32Array:\n      return \"U32Array\";\n    case ValueKind::kI64Array:\n      return \"I64Array\";\n    case ValueKind::kU64Array:\n      return \"U64Array\";\n  }\n  return \"\";\n}\n\n// Only used for keeping old compilers happy about non-reaching return\n// statement.\nJson& DummyJsonObject() {\n  static Json obj;\n  return obj;\n}\n\nJson& Value::operator[](std::string const&) {\n  LOG(FATAL) << \"Object of type \" << TypeStr() << \" can not be indexed by string.\";\n  return DummyJsonObject();\n}\n\nJson& Value::operator[](int) {\n  LOG(FATAL) << \"Object of type \" << TypeStr() << \" can not be indexed by Integer.\";\n  return DummyJsonObject();\n}\n\n// Json Object\nJsonObject::JsonObject(JsonObject&& that) noexcept : Value(ValueKind::kObject) {\n  std::swap(that.object_, this->object_);\n}\n\nJsonObject::JsonObject(Map&& object) noexcept\n    : Value(ValueKind::kObject), object_{std::forward<Map>(object)} {}\n\nbool JsonObject::operator==(Value const& rhs) const {\n  if (!IsA<JsonObject>(&rhs)) {\n    return false;\n  }\n  return object_ == Cast<JsonObject const>(&rhs)->GetObject();\n}\n\nvoid JsonObject::Save(JsonWriter* writer) const { writer->Visit(this); }\n\n// Json String\nbool JsonString::operator==(Value const& rhs) const {\n  if (!IsA<JsonString>(&rhs)) { return false; }\n  return Cast<JsonString const>(&rhs)->GetString() == str_;\n}\n\n// FIXME: UTF-8 parsing support.\nvoid JsonString::Save(JsonWriter* writer) const { writer->Visit(this); }\n\n// Json Array\nJsonArray::JsonArray(JsonArray&& that) noexcept : Value(ValueKind::kArray) {\n  std::swap(that.vec_, this->vec_);\n}\n\nbool JsonArray::operator==(Value const& rhs) const {\n  if (!IsA<JsonArray>(&rhs)) {\n    return false;\n  }\n  auto& arr = Cast<JsonArray const>(&rhs)->GetArray();\n  if (vec_.size() != arr.size()) {\n    return false;\n  }\n  return std::equal(arr.cbegin(), arr.cend(), vec_.cbegin());\n}\n\nvoid JsonArray::Save(JsonWriter* writer) const { writer->Visit(this); }\n\n// typed array\nnamespace {\n// error C2668: 'fpclassify': ambiguous call to overloaded function\ntemplate <typename T>\nstd::enable_if_t<std::is_floating_point_v<T>, bool> IsInfMSVCWar(T v) {\n  return std::isinf(v);\n}\ntemplate <typename T>\nstd::enable_if_t<std::is_integral_v<T>, bool> IsInfMSVCWar(T) {\n  return false;\n}\n}  // namespace\n\ntemplate <typename T, Value::ValueKind kind>\nvoid JsonTypedArray<T, kind>::Save(JsonWriter* writer) const {\n  writer->Visit(this);\n}\n\ntemplate <typename T, Value::ValueKind kind>\nbool JsonTypedArray<T, kind>::operator==(Value const& rhs) const {\n  if (!IsA<JsonTypedArray<T, kind>>(&rhs)) {\n    return false;\n  }\n  auto& arr = Cast<JsonTypedArray<T, kind> const>(&rhs)->GetArray();\n  if (vec_.size() != arr.size()) {\n    return false;\n  }\n  if (std::is_same_v<float, T>) {\n    for (size_t i = 0; i < vec_.size(); ++i) {\n      bool equal{false};\n      if (common::CheckNAN(vec_[i])) {\n        equal = common::CheckNAN(arr[i]);\n      } else if (IsInfMSVCWar(vec_[i])) {\n        equal = IsInfMSVCWar(arr[i]);\n      } else {\n        equal = (arr[i] - vec_[i] == 0);\n      }\n      if (!equal) {\n        return false;\n      }\n    }\n    return true;\n  }\n  return std::equal(arr.cbegin(), arr.cend(), vec_.cbegin());\n}\n\ntemplate class JsonTypedArray<float, Value::ValueKind::kF32Array>;\ntemplate class JsonTypedArray<double, Value::ValueKind::kF64Array>;\ntemplate class JsonTypedArray<std::int8_t, Value::ValueKind::kI8Array>;\ntemplate class JsonTypedArray<std::uint8_t, Value::ValueKind::kU8Array>;\ntemplate class JsonTypedArray<std::int16_t, Value::ValueKind::kI16Array>;\ntemplate class JsonTypedArray<std::uint16_t, Value::ValueKind::kU16Array>;\ntemplate class JsonTypedArray<std::int32_t, Value::ValueKind::kI32Array>;\ntemplate class JsonTypedArray<std::uint32_t, Value::ValueKind::kU32Array>;\ntemplate class JsonTypedArray<std::int64_t, Value::ValueKind::kI64Array>;\ntemplate class JsonTypedArray<std::uint64_t, Value::ValueKind::kU64Array>;\n\n// Json Number\nbool JsonNumber::operator==(Value const& rhs) const {\n  if (!IsA<JsonNumber>(&rhs)) { return false; }\n  auto r_num = Cast<JsonNumber const>(&rhs)->GetNumber();\n  if (std::isinf(number_)) {\n    return std::isinf(r_num);\n  }\n  if (std::isnan(number_)) {\n    return std::isnan(r_num);\n  }\n  return number_ - r_num == 0;\n}\n\nvoid JsonNumber::Save(JsonWriter* writer) const { writer->Visit(this); }\n\n// Json Integer\nbool JsonInteger::operator==(Value const& rhs) const {\n  if (!IsA<JsonInteger>(&rhs)) { return false; }\n  return integer_ == Cast<JsonInteger const>(&rhs)->GetInteger();\n}\n\nvoid JsonInteger::Save(JsonWriter* writer) const { writer->Visit(this); }\n\n// Json Null\nbool JsonNull::operator==(Value const& rhs) const {\n  if (!IsA<JsonNull>(&rhs)) { return false; }\n  return true;\n}\n\nvoid JsonNull::Save(JsonWriter* writer) const { writer->Visit(this); }\n\n// Json Boolean\nbool JsonBoolean::operator==(Value const& rhs) const {\n  if (!IsA<JsonBoolean>(&rhs)) { return false; }\n  return boolean_ == Cast<JsonBoolean const>(&rhs)->GetBoolean();\n}\n\nvoid JsonBoolean::Save(JsonWriter* writer) const { writer->Visit(this); }\n\nsize_t constexpr JsonReader::kMaxNumLength;\n\nJson JsonReader::Parse() {\n  while (true) {\n    SkipSpaces();\n    auto c = PeekNextChar();\n    if (c == -1) { break; }\n\n    if (c == '{') {\n      return ParseObject();\n    } else if ( c == '[' ) {\n      return ParseArray();\n    } else if ( c == '-' || std::isdigit(c) ||\n                c == 'N' || c == 'I') {\n      // For now we only accept `NaN`, not `nan` as the later violates LR(1) with `null`.\n      return ParseNumber();\n    } else if ( c == '\\\"' ) {\n      return ParseString();\n    } else if ( c == 't' || c == 'f' ) {\n      return ParseBoolean();\n    } else if (c == 'n') {\n      return ParseNull();\n    } else {\n      Error(\"Unknown construct\");\n    }\n  }\n  return {};\n}\n\nJson JsonReader::Load() {\n  Json result = Parse();\n  return result;\n}\n\nvoid JsonReader::Error(std::string msg) const {\n  // just copy it.\n  std::stringstream str_s;\n  str_s << raw_str_.substr(0, raw_str_.size());\n\n  msg += \", around character position: \" + std::to_string(cursor_.Pos());\n  msg += '\\n';\n\n  if (cursor_.Pos() == 0) {\n    LOG(FATAL) << msg << \", \\\"\" << str_s.str() << \" \\\"\";\n  }\n\n  constexpr size_t kExtend = 8;\n  auto beg = static_cast<int64_t>(cursor_.Pos()) -\n             static_cast<int64_t>(kExtend) < 0 ? 0 : cursor_.Pos() - kExtend;\n  auto end = cursor_.Pos() + kExtend >= raw_str_.size() ?\n             raw_str_.size() : cursor_.Pos() + kExtend;\n\n  auto raw_portion = raw_str_.substr(beg, end - beg);\n  std::string portion;\n  for (auto c : raw_portion) {\n    if (c == '\\n') {\n      portion += \"\\\\n\";\n    } else if (c == '\\0') {\n      portion += \"\\\\0\";\n    } else {\n      portion += c;\n    }\n  }\n\n  msg += \"    \";\n  msg += portion;\n  msg += '\\n';\n\n  msg += \"    \";\n  for (size_t i = beg; i < cursor_.Pos() - 1; ++i) {\n    msg += '~';\n  }\n  msg += '^';\n  for (size_t i = cursor_.Pos(); i < end; ++i) {\n    msg += '~';\n  }\n  LOG(FATAL) << msg;\n}\n\nnamespace {\nbool IsSpace(JsonReader::Char c) { return c == ' ' || c == '\\n' || c == '\\r' || c == '\\t'; }\n}  // anonymous namespace\n\n// Json class\nvoid JsonReader::SkipSpaces() {\n  while (cursor_.Pos() < raw_str_.size()) {\n    Char c = raw_str_[cursor_.Pos()];\n    if (IsSpace(c)) {\n      cursor_.Forward();\n    } else {\n      break;\n    }\n  }\n}\n\nvoid ParseStr(std::string const& str) {\n  size_t end = 0;\n  for (size_t i = 0; i < str.size(); ++i) {\n    if (str[i] == '\"' && i > 0 && str[i-1] != '\\\\') {\n      end = i;\n      break;\n    }\n  }\n  std::string result;\n  result.resize(end);\n}\n\nJson JsonReader::ParseString() {\n  Char ch { GetConsecutiveChar('\\\"') };  // NOLINT\n  std::string str;\n  while (true) {\n    ch = GetNextChar();\n    if (ch == '\\\\') {\n      Char next{GetNextChar()};\n      switch (next) {\n        case 'r':  str += u8\"\\r\"; break;\n        case 'n':  str += u8\"\\n\"; break;\n        case '\\\\': str += u8\"\\\\\"; break;\n        case 't':  str += u8\"\\t\"; break;\n        case '\\\"': str += u8\"\\\"\"; break;\n        case 'u':\n          str += ch;\n          str += 'u';\n          break;\n        default: Error(\"Unknown escape\");\n      }\n    } else {\n      if (ch == '\\\"') break;\n      str += ch;\n    }\n    if (ch == EOF || ch == '\\r' || ch == '\\n') {\n      Expect('\\\"', ch);\n    }\n  }\n  return Json(std::move(str));\n}\n\nJson JsonReader::ParseNull() {\n  Char ch = GetNextNonSpaceChar();\n  std::string buffer{static_cast<char>(ch)};\n  for (size_t i = 0; i < 3; ++i) {\n    buffer.push_back(GetNextChar());\n  }\n  if (buffer != \"null\") {\n    Error(\"Expecting null value \\\"null\\\"\");\n  }\n  return Json{JsonNull()};\n}\n\nJson JsonReader::ParseArray() {\n  std::vector<Json> data;\n\n  Char ch { GetConsecutiveChar('[') };  // NOLINT\n  while (true) {\n    if (PeekNextChar() == ']') {\n      GetConsecutiveChar(']');\n      return Json(std::move(data));\n    }\n    auto obj = Parse();\n    data.emplace_back(obj);\n    ch = GetNextNonSpaceChar();\n    if (ch == ']') break;\n    if (ch != ',') {\n      Expect(',', ch);\n    }\n  }\n\n  return Json(std::move(data));\n}\n\nJson JsonReader::ParseObject() {\n  GetConsecutiveChar('{');\n\n  Object::Map data;\n  SkipSpaces();\n  auto ch = PeekNextChar();\n\n  if (ch == '}') {\n    GetConsecutiveChar('}');\n    return Json(std::move(data));\n  }\n\n  while (true) {\n    SkipSpaces();\n    ch = PeekNextChar();\n    CHECK_NE(ch, -1) << \"cursor_.Pos(): \" << cursor_.Pos() << \", \"\n                     << \"raw_str_.size():\" << raw_str_.size();\n    if (ch != '\"') {\n      Expect('\"', ch);\n    }\n    Json key = ParseString();\n\n    ch = GetNextNonSpaceChar();\n\n    if (ch != ':') {\n      Expect(':', ch);\n    }\n\n    Json value { Parse() };\n\n    data[get<String>(key)] = std::move(value);\n\n    ch = GetNextNonSpaceChar();\n\n    if (ch == '}') break;\n    if (ch != ',') {\n      Expect(',', ch);\n    }\n  }\n\n  return Json(std::move(data));\n}\n\nJson JsonReader::ParseNumber() {\n  // Adopted from sajson with some simplifications and small optimizations.\n  char const* p = raw_str_.c_str() + cursor_.Pos();\n  char const* const beg = p;  // keep track of current pointer\n\n  // TODO(trivialfis): Add back all the checks for number\n  if (XGBOOST_EXPECT(*p == 'N', false)) {\n    GetConsecutiveChar('N');\n    GetConsecutiveChar('a');\n    GetConsecutiveChar('N');\n    return Json(static_cast<Number::Float>(std::numeric_limits<float>::quiet_NaN()));\n  }\n\n  bool negative = false;\n  switch (*p) {\n  case '-': {\n    negative = true;\n    ++p;\n    break;\n  }\n  case '+': {\n    negative = false;\n    ++p;\n    break;\n  }\n  default: {\n    break;\n  }\n  }\n\n  if (XGBOOST_EXPECT(*p == 'I', false)) {\n    cursor_.Forward(std::distance(beg, p));  // +/-\n    for (auto i : {'I', 'n', 'f', 'i', 'n', 'i', 't', 'y'}) {\n      GetConsecutiveChar(i);\n    }\n    auto f = std::numeric_limits<float>::infinity();\n    if (negative) {\n      f = -f;\n    }\n    return Json(static_cast<Number::Float>(f));\n  }\n\n  bool is_float = false;\n\n  int64_t i = 0;\n\n  if (*p == '0') {\n    i = 0;\n    p++;\n  }\n\n  while (XGBOOST_EXPECT(*p >= '0' && *p <= '9', true)) {\n    i = i * 10 + (*p - '0');\n    p++;\n  }\n\n  if (*p == '.') {\n    p++;\n    is_float = true;\n\n    while (*p >= '0' && *p <= '9') {\n      i = i * 10 + (*p - '0');\n      p++;\n    }\n  }\n\n  if (*p == 'E' || *p == 'e') {\n    is_float = true;\n    p++;\n\n    switch (*p) {\n    case '-':\n    case '+': {\n      p++;\n      break;\n    }\n    default:\n      break;\n    }\n\n    if (XGBOOST_EXPECT(*p >= '0' && *p <= '9', true)) {\n      p++;\n      while (*p >= '0' && *p <= '9') {\n        p++;\n      }\n    } else {\n      Error(\"Expecting digit\");\n    }\n  }\n\n  auto moved = std::distance(beg, p);\n  this->cursor_.Forward(moved);\n\n  if (is_float) {\n    float f;\n    auto ret = from_chars(beg, p, f);\n    if (XGBOOST_EXPECT(ret.ec != std::errc(), false)) {\n      // Compatible with old format that generates very long mantissa from std stream.\n      f = std::strtof(beg, nullptr);\n    }\n    return Json(static_cast<Number::Float>(f));\n  } else {\n    if (negative) {\n      i = -i;\n    }\n    return Json(JsonInteger(i));\n  }\n}\n\nJson JsonReader::ParseBoolean() {\n  bool result = false;\n  Char ch = GetNextNonSpaceChar();\n  std::string const t_value = u8\"true\";\n  std::string const f_value = u8\"false\";\n\n  if (ch == 't') {\n    GetConsecutiveChar('r');\n    GetConsecutiveChar('u');\n    GetConsecutiveChar('e');\n    result = true;\n  } else {\n    GetConsecutiveChar('a');\n    GetConsecutiveChar('l');\n    GetConsecutiveChar('s');\n    GetConsecutiveChar('e');\n    result = false;\n  }\n  return Json{JsonBoolean{result}};\n}\n\nJson Json::Load(StringView str, std::ios::openmode mode) {\n  Json json;\n  if (mode & std::ios::binary) {\n    UBJReader reader{str};\n    json = Json::Load(&reader);\n  } else {\n    JsonReader reader(str);\n    json = reader.Load();\n  }\n  return json;\n}\n\nJson Json::Load(JsonReader* reader) {\n  Json json{reader->Load()};\n  return json;\n}\n\nvoid Json::Dump(Json json, std::string* str, std::ios::openmode mode) {\n  std::vector<char> buffer;\n  Dump(json, &buffer, mode);\n  str->resize(buffer.size());\n  std::copy(buffer.cbegin(), buffer.cend(), str->begin());\n}\n\nvoid Json::Dump(Json json, std::vector<char>* str, std::ios::openmode mode) {\n  str->clear();\n  if (mode & std::ios::binary) {\n    UBJWriter writer{str};\n    writer.Save(json);\n  } else {\n    JsonWriter writer(str);\n    writer.Save(json);\n  }\n}\n\nvoid Json::Dump(Json json, JsonWriter* writer) {\n  writer->Save(json);\n}\n\nstatic_assert(std::is_nothrow_move_constructible_v<Json>);\nstatic_assert(std::is_nothrow_move_constructible_v<Object>);\nstatic_assert(std::is_nothrow_move_constructible_v<Array>);\nstatic_assert(std::is_nothrow_move_constructible_v<String>);\n\nJson UBJReader::ParseArray() {\n  auto marker = PeekNextChar();\n\n  if (marker == '$') {  // typed array\n    GetNextChar();      // remove $\n    marker = GetNextChar();\n    auto type = marker;\n    GetConsecutiveChar('#');\n    GetConsecutiveChar('L');\n    auto n = this->ReadPrimitive<int64_t>();\n\n    marker = PeekNextChar();\n    switch (type) {\n      case 'd':\n        return ParseTypedArray<F32Array>(n);\n      case 'D':\n        return ParseTypedArray<F64Array>(n);\n      case 'i':\n        return ParseTypedArray<I8Array>(n);\n      case 'U':\n        return ParseTypedArray<U8Array>(n);\n      case 'I':\n        return ParseTypedArray<I16Array>(n);\n      case 'l':\n        return ParseTypedArray<I32Array>(n);\n      case 'L':\n        return ParseTypedArray<I64Array>(n);\n      default:\n        LOG(FATAL) << \"`\" + std::string{static_cast<char>(type)} +  // NOLINT\n                          \"` is not supported for typed array.\";\n    }\n  }\n  std::vector<Json> results;\n  if (marker == '#') {  // array with length optimization\n    GetNextChar();\n    GetConsecutiveChar('L');\n    auto n = this->ReadPrimitive<int64_t>();\n    results.resize(n);\n    for (int64_t i = 0; i < n; ++i) {\n      results[i] = Parse();\n    }\n  } else {  // normal array\n    while (marker != ']') {\n      results.emplace_back(Parse());\n      marker = PeekNextChar();\n    }\n    GetConsecutiveChar(']');\n  }\n\n  return Json{results};\n}\n\nstd::string UBJReader::DecodeStr() {\n  // only L is supported right now.\n  GetConsecutiveChar('L');\n  auto bsize = this->ReadPrimitive<int64_t>();\n\n  std::string str;\n  str.resize(bsize);\n  auto ptr = raw_str_.c_str() + cursor_.Pos();\n  std::memcpy(&str[0], ptr, bsize);\n  this->cursor_.Forward(bsize);\n  return str;\n}\n\nJson UBJReader::ParseObject() {\n  auto marker = PeekNextChar();\n  Object::Map results;\n\n  while (marker != '}') {\n    auto str = this->DecodeStr();\n    results.emplace(str, this->Parse());\n    marker = PeekNextChar();\n  }\n\n  GetConsecutiveChar('}');\n  return Json{std::move(results)};\n}\n\nJson UBJReader::Load() {\n  Json result = Parse();\n  return result;\n}\n\nJson UBJReader::Parse() {\n  while (true) {\n    auto c = PeekNextChar();\n    if (c == -1) {\n      break;\n    }\n\n    GetNextChar();\n    switch (c) {\n      case '{':\n        return ParseObject();\n      case '[':\n        return ParseArray();\n      case 'Z': {\n        return Json{nullptr};\n      }\n      case 'T': {\n        return Json{JsonBoolean{true}};\n      }\n      case 'F': {\n        return Json{JsonBoolean{false}};\n      }\n      case 'd': {\n        auto v = this->ReadPrimitive<float>();\n        return Json{v};\n      }\n      case 'D': {\n        auto v = this->ReadPrimitive<double>();\n        return Json{v};\n      }\n      case 'S': {\n        auto str = this->DecodeStr();\n        return Json{str};\n      }\n      case 'i': {\n        Integer::Int i = this->ReadPrimitive<int8_t>();\n        return Json{i};\n      }\n      case 'U': {\n        Integer::Int i = this->ReadPrimitive<uint8_t>();\n        return Json{i};\n      }\n      case 'I': {\n        Integer::Int i = this->ReadPrimitive<int16_t>();\n        return Json{i};\n      }\n      case 'l': {\n        Integer::Int i = this->ReadPrimitive<int32_t>();\n        return Json{i};\n      }\n      case 'L': {\n        auto i = this->ReadPrimitive<int64_t>();\n        return Json{i};\n      }\n      case 'C': {\n        Integer::Int i = this->ReadPrimitive<char>();\n        return Json{i};\n      }\n      case 'H': {\n        LOG(FATAL) << \"High precision number is not supported.\";\n        break;\n      }\n      default:\n        Error(\"Unknown construct\");\n    }\n  }\n  return {};\n}\n\nnamespace {\ntemplate <typename T>\nvoid WritePrimitive(T v, std::vector<char>* stream) {\n  v = ToBigEndian(v);\n  auto s = stream->size();\n  stream->resize(s + sizeof(v));\n  auto ptr = stream->data() + s;\n  std::memcpy(ptr, &v, sizeof(v));\n}\n\nvoid EncodeStr(std::vector<char>* stream, std::string const& string) {\n  stream->push_back('L');\n\n  int64_t bsize = string.size();\n  WritePrimitive(bsize, stream);\n\n  auto s = stream->size();\n  stream->resize(s + string.size());\n\n  auto ptr = stream->data() + s;\n  std::memcpy(ptr, string.data(), string.size());\n}\n}  // anonymous namespace\n\nvoid UBJWriter::Visit(JsonArray const* arr) {\n  stream_->emplace_back('[');\n  auto const& vec = arr->GetArray();\n  int64_t n = vec.size();\n  stream_->push_back('#');\n  stream_->push_back('L');\n  WritePrimitive(n, stream_);\n  for (auto const& v : vec) {\n    this->Save(v);\n  }\n}\n\ntemplate <typename T, Value::ValueKind kind>\nvoid WriteTypedArray(JsonTypedArray<T, kind> const* arr, std::vector<char>* stream) {\n  stream->emplace_back('[');\n  stream->push_back('$');\n  if (std::is_same_v<T, float>) {\n    stream->push_back('d');\n  } else if (std::is_same_v<T, double>) {\n    stream->push_back('D');\n  } else if (std::is_same_v<T, std::int8_t>) {\n    stream->push_back('i');\n  } else if (std::is_same_v<T, std::uint8_t>) {\n    stream->push_back('U');\n  } else if (std::is_same_v<T, std::int16_t>) {\n    stream->push_back('I');\n  } else if (std::is_same_v<T, std::int32_t>) {\n    stream->push_back('l');\n  } else if (std::is_same_v<T, std::int64_t>) {\n    stream->push_back('L');\n  } else {\n    LOG(FATAL) << \"Not implemented\";\n  }\n\n  stream->push_back('#');\n  stream->push_back('L');\n\n  int64_t n = arr->Size();\n  WritePrimitive(n, stream);\n  auto s = stream->size();\n  stream->resize(s + arr->Size() * sizeof(T));\n  auto const& vec = arr->GetArray();\n  for (int64_t i = 0; i < n; ++i) {\n    auto v = ToBigEndian(vec[i]);\n    std::memcpy(stream->data() + s, &v, sizeof(v));\n    s += sizeof(v);\n  }\n}\n\nvoid UBJWriter::Visit(F32Array const* arr) { WriteTypedArray(arr, stream_); }\nvoid UBJWriter::Visit(F64Array const* arr) { WriteTypedArray(arr, stream_); }\nvoid UBJWriter::Visit(I8Array const* arr) { WriteTypedArray(arr, stream_); }\nvoid UBJWriter::Visit(U8Array const* arr) { WriteTypedArray(arr, stream_); }\nvoid UBJWriter::Visit(I16Array const* arr) { WriteTypedArray(arr, stream_); }\nvoid UBJWriter::Visit(I32Array const* arr) { WriteTypedArray(arr, stream_); }\nvoid UBJWriter::Visit(I64Array const* arr) { WriteTypedArray(arr, stream_); }\n\nvoid UBJWriter::Visit(JsonObject const* obj) {\n  stream_->emplace_back('{');\n  for (auto const& value : obj->GetObject()) {\n    auto const& key = value.first;\n    EncodeStr(stream_, key);\n    this->Save(value.second);\n  }\n  stream_->emplace_back('}');\n}\n\nvoid UBJWriter::Visit(JsonNumber const* num) {\n  stream_->push_back('d');\n  auto val = num->GetNumber();\n  WritePrimitive(val, stream_);\n}\n\nvoid UBJWriter::Visit(JsonInteger const* num) {\n  auto i = num->GetInteger();\n  if (i > std::numeric_limits<int8_t>::min() && i < std::numeric_limits<int8_t>::max()) {\n    stream_->push_back('i');\n    WritePrimitive(static_cast<int8_t>(i), stream_);\n  } else if (i > std::numeric_limits<int16_t>::min() && i < std::numeric_limits<int16_t>::max()) {\n    stream_->push_back('I');\n    WritePrimitive(static_cast<int16_t>(i), stream_);\n  } else if (i > std::numeric_limits<int32_t>::min() && i < std::numeric_limits<int32_t>::max()) {\n    stream_->push_back('l');\n    WritePrimitive(static_cast<int32_t>(i), stream_);\n  } else {\n    stream_->push_back('L');\n    WritePrimitive(i, stream_);\n  }\n}\n\nvoid UBJWriter::Visit(JsonNull const*) { stream_->push_back('Z'); }\n\nvoid UBJWriter::Visit(JsonString const* str) {\n  stream_->push_back('S');\n  EncodeStr(stream_, str->GetString());\n}\n\nvoid UBJWriter::Visit(JsonBoolean const* boolean) {\n  stream_->push_back(boolean->GetBoolean() ? 'T' : 'F');\n}\n\nvoid UBJWriter::Save(Json json) { json.Ptr()->Save(this); }\n}  // namespace xgboost\n"
  },
  {
    "path": "src/common/json_utils.h",
    "content": "/**\n * Copyright 2023-2025, XGBoost Contributors\n *\n * @brief Utils tailored for XGBoost.\n */\n#pragma once\n\n#include <algorithm>    // for transform, copy\n#include <string>       // for string\n#include <type_traits>  // for enable_if_t, remove_const_t\n#include <vector>       // for vector\n\n#include \"xgboost/json.h\"\n#include \"xgboost/string_view.h\"  // for StringView\n\nnamespace xgboost {\nnamespace detail {\ntemplate <typename Head>\nbool TypeCheckImpl(Json const &value) {\n  return IsA<Head>(value);\n}\n\ntemplate <typename Head, typename... JT>\nstd::enable_if_t<sizeof...(JT) != 0, bool> TypeCheckImpl(Json const &value) {\n  return IsA<Head>(value) || TypeCheckImpl<JT...>(value);\n}\n\ntemplate <typename Head>\nstd::string TypeCheckError() {\n  return \"`\" + Head{}.TypeStr() + \"`\";\n}\n\ntemplate <typename Head, typename... JT>\nstd::enable_if_t<sizeof...(JT) != 0, std::string> TypeCheckError() {\n  return \"`\" + Head{}.TypeStr() + \"`, \" + TypeCheckError<JT...>();\n}\n}  // namespace detail\n\n/**\n * @brief Type check for JSON-based parameters\n *\n * @tparam JT    Expected JSON types.\n * @param  value Value to be checked.\n */\ntemplate <typename... JT>\nvoid TypeCheck(Json const &value, StringView name) {\n  if (!detail::TypeCheckImpl<JT...>(value)) {\n    LOG(FATAL) << \"Invalid type for: `\" << name << \"`, expecting one of the: {\"\n               << detail::TypeCheckError<JT...>() << \"}, got: `\" << value.GetValue().TypeStr()\n               << \"`\";\n  }\n}\n\ntemplate <typename JT>\nauto const &RequiredArg(Json const &in, StringView key, StringView func) {\n  auto const &obj = get<Object const>(in);\n  auto it = obj.find(key);\n  if (it == obj.cend() || IsA<Null>(it->second)) {\n    LOG(FATAL) << \"Argument `\" << key << \"` is required for `\" << func << \"`.\";\n  }\n  TypeCheck<JT>(it->second, StringView{key});\n  return get<std::remove_const_t<JT> const>(it->second);\n}\n\ntemplate <typename JT, typename T>\nauto const &OptionalArg(Json const &in, StringView key, T const &dft) {\n  auto const &obj = get<Object const>(in);\n  auto it = obj.find(key);\n  if (it != obj.cend() && !IsA<Null>(it->second)) {\n    TypeCheck<JT>(it->second, key);\n\n    return get<std::remove_const_t<JT> const>(it->second);\n  }\n  return dft;\n}\n\ntemplate <typename T, std::enable_if_t<std::is_floating_point_v<T>> * = nullptr>\nvoid SaveVector(std::vector<T> const &in, Json *p_out) {\n  auto &out = *p_out;\n  if (IsA<F32Array>(out)) {\n    auto &out_array = get<F32Array>(out);\n    out_array.resize(in.size());\n    std::copy(in.cbegin(), in.cend(), out_array.begin());\n  } else if (IsA<F64Array>(out)) {\n    auto &out_array = get<F64Array>(out);\n    out_array.resize(in.size());\n    std::copy(in.cbegin(), in.cend(), out_array.begin());\n  } else {\n    LOG(FATAL) << \"Invalid array type.\";\n  }\n}\n\ntemplate <typename T, std::enable_if_t<std::is_floating_point_v<T>> * = nullptr>\nvoid LoadVector(Json const &in, std::vector<T> *out) {\n  if (IsA<F32Array>(in)) {\n    // JSON\n    auto const &array = get<F32Array const>(in);\n    out->resize(array.size());\n    std::copy(array.cbegin(), array.cend(), out->begin());\n  } else if (IsA<F64Array>(in)) {\n    auto const &array = get<F64Array const>(in);\n    out->resize(array.size());\n    std::copy(array.cbegin(), array.cend(), out->begin());\n  } else {\n    // UBJSON\n    auto const &array = get<Array const>(in);\n    out->resize(array.size());\n    std::transform(array.cbegin(), array.cend(), out->begin(),\n                   [](Json const &v) { return get<Number const>(v); });\n  }\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "src/common/linalg_op.cc",
    "content": "/**\n * Copyright 2025, XGBoost Contributors\n */\n#include \"linalg_op.h\"\n\n#include <cstddef>  // for size_t\n\n#include \"optional_weight.h\"  // for OptionalWeights\n#include \"xgboost/context.h\"  // for Context\n\n#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_SYCL)\n#include \"common.h\"  // for AssertGPUSupport\n#endif\n\nnamespace xgboost::sycl::linalg {\nvoid SmallHistogram(Context const* ctx, xgboost::linalg::MatrixView<float const> indices,\n                    common::OptionalWeights const& weights,\n                    xgboost::linalg::VectorView<float> bins);\n#if !defined(XGBOOST_USE_SYCL)\nvoid SmallHistogram(Context const*, xgboost::linalg::MatrixView<float const>,\n                    common::OptionalWeights const&,\n                    xgboost::linalg::VectorView<float>) {\n  common::AssertSYCLSupport();\n}\n#endif\n}  // namespace xgboost::sycl::linalg\n\nnamespace xgboost::linalg {\nnamespace cuda_impl {\nvoid SmallHistogram(Context const* ctx, linalg::MatrixView<float const> indices,\n                    common::OptionalWeights const& weights, linalg::VectorView<float> bins);\n#if !defined(XGBOOST_USE_CUDA)\nvoid SmallHistogram(Context const*, linalg::MatrixView<float const>, common::OptionalWeights const&,\n                    linalg::VectorView<float>) {\n  common::AssertGPUSupport();\n}\n#endif\n}  // namespace cuda_impl\n\nvoid SmallHistogram(Context const* ctx, linalg::MatrixView<float const> indices,\n                    common::OptionalWeights const& weights, linalg::VectorView<float> bins) {\n  auto n = indices.Size();\n  if (ctx->IsCUDA()) {\n    cuda_impl::SmallHistogram(ctx, indices, weights, bins);\n  } else if (ctx->IsSycl()) {\n    sycl::linalg::SmallHistogram(ctx, indices, weights, bins);\n  } else {\n    for (std::size_t i = 0; i < n; ++i) {\n      auto y = indices(i);\n      auto w = weights[i];\n      bins(static_cast<std::size_t>(y)) += w;\n    }\n  }\n}\n}  // namespace xgboost::linalg\n"
  },
  {
    "path": "src/common/linalg_op.cu",
    "content": "/**\n * Copyright 2025, XGBoost Contributors\n */\n#include <thrust/scan.h>  // for inclusive_scan\n\n#include <cstddef>  // for size_t\n\n#include \"algorithm.cuh\"       // for ArgSort, RunLengthEncode\n#include \"device_helpers.cuh\"  // for MakeIndexTransformIter\n#include \"device_vector.cuh\"   // for DeviceUVector\n#include \"linalg_op.cuh\"\n#include \"optional_weight.h\"  // for OptionalWeights\n#include \"xgboost/linalg.h\"   // for VectorView\n\nnamespace xgboost::linalg::cuda_impl {\nvoid SmallHistogram(Context const* ctx, linalg::MatrixView<float const> indices,\n                    common::OptionalWeights const& d_weights, linalg::VectorView<float> bins) {\n  auto n_bins = bins.Size();\n  auto cuctx = ctx->CUDACtx();\n  // Sort for segmented sum\n  dh::DeviceUVector<std::size_t> sorted_idx(indices.Size());\n  common::ArgSort<true>(ctx, indices.Values(), dh::ToSpan(sorted_idx));\n  auto d_sorted_idx = dh::ToSpan(sorted_idx);\n\n  auto key_it = dh::MakeIndexTransformIter(\n      [=] XGBOOST_DEVICE(std::size_t i) { return indices(d_sorted_idx[i]); });\n\n  dh::device_vector<std::size_t> counts_out(n_bins + 1, 0);\n  // Obtain the segment boundaries for the segmented sum.\n  dh::DeviceUVector<float> unique(n_bins);\n  dh::CachingDeviceUVector<std::size_t> num_runs(1);\n  common::RunLengthEncode(cuctx->Stream(), key_it, unique.begin(), counts_out.begin() + 1,\n                          num_runs.begin(), indices.Size());\n  thrust::inclusive_scan(cuctx->CTP(), counts_out.begin(), counts_out.end(), counts_out.begin());\n\n  auto val_it = dh::MakeIndexTransformIter(\n      [=] XGBOOST_DEVICE(std::size_t i) { return d_weights[d_sorted_idx[i]]; });\n  // Sum weighted-label for each class to acc, counts_out is the segment ptr after inclusive_scan\n  common::SegmentedSum(cuctx->Stream(), val_it, linalg::tbegin(bins), n_bins, counts_out.cbegin(),\n                       counts_out.cbegin() + 1);\n}\n}  // namespace xgboost::linalg::cuda_impl\n"
  },
  {
    "path": "src/common/linalg_op.cuh",
    "content": "/**\n * Copyright 2021-2025, XGBoost Contributors\n */\n#ifndef XGBOOST_COMMON_LINALG_OP_CUH_\n#define XGBOOST_COMMON_LINALG_OP_CUH_\n\n#include <thrust/iterator/counting_iterator.h>  // for counting_iterator\n#include <thrust/iterator/zip_iterator.h>       // for make_zip_iterator\n#include <thrust/transform.h>                   // for transform\n\n#include <cstdint>            // for int32_t\n#include <cstdlib>            // for size_t\n#include <cuda/std/iterator>  // for iterator_traits\n#include <cuda/std/tuple>     // for get\n#include <cuda/std/version>   // for CCCL_MINOR_VERSION\n#include <tuple>              // for apply\n\n#include \"cuda_context.cuh\"\n#include \"device_helpers.cuh\"  // for LaunchN\n#include \"type.h\"              // for GetValueT\n#include \"xgboost/context.h\"   // for Context\n#include \"xgboost/linalg.h\"    // for TensorView\n\n#if (CCCL_MAJOR_VERSION >= 3) || (CCCL_MAJOR_VERSION >= 2 && CCCL_MINOR_VERSION >= 8)\n#define xgboost_CCCL_HAS_PROCLAIM_COPYABLE 1\n// CCCL 2.8.0 | CUDA 12.9\n#include <cuda/functional>  // for proclaim_copyable_arguments\n#endif\n\nnamespace xgboost::linalg {\nnamespace cuda_impl {\n// Use template specialization to dispatch, Windows + CUDA 11.8 doesn't support extended\n// lambda inside constexpr if\ntemplate <typename T, std::int32_t D>\nstruct ElementWiseImpl {\n  template <typename Fn>\n  void operator()(TensorView<T, D> t, Fn&& fn, cudaStream_t s) {\n    static_assert(D > 1);\n    dh::LaunchN(t.Size(), s, [=] __device__(std::size_t i) mutable {\n      std::apply(fn, linalg::UnravelIndex(i, t.Shape()));\n    });\n  }\n};\n\ntemplate <typename T>\nstruct ElementWiseImpl<T, 1> {\n  template <typename Fn>\n  void operator()(TensorView<T, 1> t, Fn&& fn, cudaStream_t s) {\n    dh::LaunchN(t.Size(), s, [=] __device__(std::size_t i) mutable { fn(i); });\n  }\n};\n\ntemplate <typename T, std::int32_t D, typename Fn>\nvoid ElementWiseKernel(TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr) {\n  dh::safe_cuda(cudaSetDevice(t.Device().ordinal));\n  ElementWiseImpl<T, D>{}(t, fn, s);\n}\n\ntemplate <typename T, std::int32_t D, typename Fn>\nvoid TransformIdxKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {\n  dh::safe_cuda(cudaSetDevice(t.Device().ordinal));\n  auto s = ctx->CUDACtx()->Stream();\n  if (t.Contiguous()) {\n    auto ptr = t.Values().data();\n    auto it =\n        thrust::make_zip_iterator(thrust::make_counting_iterator(static_cast<std::size_t>(0)), ptr);\n    using Tuple = typename cuda::std::iterator_traits<common::GetValueT<decltype(it)>>::value_type;\n    thrust::transform(ctx->CUDACtx()->CTP(), it, it + t.Size(), ptr,\n                      [=] XGBOOST_DEVICE(Tuple const& tup) {\n                        return fn(cuda::std::get<0>(tup), cuda::std::get<1>(tup));\n                      });\n  } else {\n    dh::LaunchN(t.Size(), s, [=] __device__(size_t i) mutable {\n      T& v = std::apply(t, UnravelIndex(i, t.Shape()));\n      v = fn(i, v);\n    });\n  }\n}\n\ntemplate <typename T, std::int32_t D, typename Fn>\nvoid TransformKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {\n  dh::safe_cuda(cudaSetDevice(t.Device().ordinal));\n  auto s = ctx->CUDACtx()->Stream();\n  if (t.Contiguous()) {\n    auto ptr = t.Values().data();\n#if defined(xgboost_CCCL_HAS_PROCLAIM_COPYABLE)\n    auto op = cuda::proclaim_copyable_arguments([=] XGBOOST_DEVICE(T const& v) { return fn(v); });\n#else\n    auto op = [=] XGBOOST_DEVICE(T const& v) {\n      return fn(v);\n    };\n#endif\n    thrust::transform(ctx->CUDACtx()->CTP(), ptr, ptr + t.Size(), ptr, op);\n  } else {\n    dh::LaunchN(t.Size(), s, [=] __device__(size_t i) mutable {\n      T& v = std::apply(t, UnravelIndex(i, t.Shape()));\n      v = fn(v);\n    });\n  }\n}\n}  // namespace cuda_impl\n\nnamespace detail {\ntemplate <typename T, std::int32_t D>\nstruct IterOp {\n  TensorView<T, D> v;\n  XGBOOST_DEVICE T& operator()(std::size_t i) { return std::apply(v, UnravelIndex(i, v.Shape())); }\n};\n}  // namespace detail\n\n// naming: thrust begin\n// returns a thrust iterator for a tensor view.\ntemplate <typename T, std::int32_t D>\nauto tcbegin(TensorView<T, D> v) {  // NOLINT\n  return thrust::make_transform_iterator(\n      thrust::make_counting_iterator(0ul),\n      detail::IterOp<std::add_const_t<std::remove_const_t<T>>, D>{v});\n}\n\ntemplate <typename T, std::int32_t D>\nauto tcend(TensorView<T, D> v) {  // NOLINT\n  return tcbegin(v) + v.Size();\n}\n\ntemplate <typename T, std::int32_t D>\nauto tbegin(TensorView<T, D> v) {  // NOLINT\n  return thrust::make_transform_iterator(thrust::make_counting_iterator(0ul),\n                                         detail::IterOp<std::remove_const_t<T>, D>{v});\n}\n\ntemplate <typename T, std::int32_t D>\nauto tend(TensorView<T, D> v) {  // NOLINT\n  return tbegin(v) + v.Size();\n}\n}  // namespace xgboost::linalg\n\n#if defined(xgboost_CCCL_HAS_PROCLAIM_COPYABLE)\n#undef xgboost_CCCL_HAS_PROCLAIM_COPYABLE\n#endif  // defined(xgboost_CCCL_HAS_PROCLAIM_COPYABLE)\n\n#endif  // XGBOOST_COMMON_LINALG_OP_CUH_\n"
  },
  {
    "path": "src/common/linalg_op.h",
    "content": "/**\n * Copyright 2021-2026, XGBoost Contributors\n *\n * @brief This module defines the dispatching functions for various linalg kernels.\n *\n * Client code can use utilities like @ref ElementWiseKernel by including this file in the\n * right translation unit. For CUDA-compatible kernels, include this header in a .cu TU.\n *\n * Be aware of potential violation of the one definition rule (ODR). The dispatching\n * functions should never be used in an inline function without a system tag.\n */\n#ifndef XGBOOST_COMMON_LINALG_OP_H_\n#define XGBOOST_COMMON_LINALG_OP_H_\n\n#include <cstddef>      // for size_t\n#include <cstdint>      // for int32_t\n#include <tuple>        // for apply\n#include <type_traits>  // for conditional_t\n\n#include \"json_utils.h\"  // for LoadVector, SaveVector\n#include \"threading_utils.h\"\n#include \"transform_iterator.h\"  // for MakeIndexTransformIter\n#include \"xgboost/json.h\"        // for Json\n#include \"xgboost/linalg.h\"\n\n#if defined(__CUDACC__)\n#include <utility>  // for forward\n\n#include \"linalg_op.cuh\"\n#endif\n\n#if defined(SYCL_LANGUAGE_VERSION)\n#include \"../../plugin/sycl/common/linalg_op.h\"\n#endif\n\n#if !defined(XGBOOST_USE_CUDA) && !defined(SYCL_LANGUAGE_VERSION)\n\n#include \"common.h\"           // for AssertGPUSupport\n#include \"xgboost/context.h\"  // for Context\n\n#endif  // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_SYCL)\n\nnamespace xgboost::common {\nstruct OptionalWeights;\n}\n\nnamespace xgboost::linalg {\nnamespace cpu_impl {\ntemplate <typename T, std::int32_t D, typename Fn>\nvoid TransformIdxKernel(linalg::TensorView<T, D> t, std::int32_t n_threads, Fn&& fn) {\n  if (t.Contiguous()) {\n    auto ptr = t.Values().data();\n    common::ParallelFor(t.Size(), n_threads, [&](std::size_t i) { ptr[i] = fn(i, ptr[i]); });\n  } else {\n    common::ParallelFor(t.Size(), n_threads, [&](std::size_t i) {\n      auto& v = std::apply(t, linalg::UnravelIndex(i, t.Shape()));\n      v = fn(i, v);\n    });\n  }\n}\n\ntemplate <typename T, std::int32_t D, typename Fn>\nvoid TransformKernel(linalg::TensorView<T, D> t, std::int32_t n_threads, Fn&& fn) {\n  if (t.Contiguous()) {\n    auto ptr = t.Values().data();\n    common::ParallelFor(t.Size(), n_threads, [&](std::size_t i) { ptr[i] = fn(ptr[i]); });\n  } else {\n    common::ParallelFor(t.Size(), n_threads, [&](std::size_t i) {\n      auto& v = std::apply(t, linalg::UnravelIndex(i, t.Shape()));\n      v = fn(v);\n    });\n  }\n}\n\ntemplate <typename T, std::int32_t D, typename Fn>\nvoid ElementWiseKernel(linalg::TensorView<T, D> t, std::int32_t n_threads, Fn&& fn) {\n  constexpr std::size_t kBlockSize = 2048;\n  if constexpr (D == 1) {\n    common::ParallelFor1d<kBlockSize>(t.Size(), n_threads, [&](auto&& block) {\n      for (std::size_t i = block.begin(); i < block.end(); ++i) {\n        fn(i);\n      }\n    });\n  } else if (D == 2 && t.CContiguous() && t.Shape(0) > t.Shape(1) * 64) {\n    // Heuristic. Tall, c-contiguous matrix,\n    auto n_rows = t.Shape(0);\n    auto n_columns = t.Shape(1);\n    common::ParallelFor1d<kBlockSize>(n_rows, n_threads, [&](auto&& block) {\n      for (std::size_t i = block.begin(); i < block.end(); ++i) {\n        for (std::size_t j = 0; j < n_columns; ++j) {\n          fn(i, j);\n        }\n      }\n    });\n  } else {\n    common::ParallelFor1d<kBlockSize>(t.Size(), n_threads, [&](auto&& block) {\n      for (std::size_t i = block.begin(); i < block.end(); ++i) {\n        std::apply(fn, linalg::UnravelIndex(i, t.Shape()));\n      }\n    });\n  }\n}\n}  // namespace cpu_impl\n\ntemplate <typename T, std::int32_t D>\nauto cbegin(TensorView<T, D> const& v) {  // NOLINT\n  auto it = common::MakeIndexTransformIter([&](std::size_t i) -> std::remove_cv_t<T> const& {\n    return std::apply(v, linalg::UnravelIndex(i, v.Shape()));\n  });\n  return it;\n}\n\ntemplate <typename T, std::int32_t D>\nauto cend(TensorView<T, D> const& v) {  // NOLINT\n  return cbegin(v) + v.Size();\n}\n\ntemplate <typename T, std::int32_t D>\nauto begin(TensorView<T, D>& v) {  // NOLINT\n  auto it = common::MakeIndexTransformIter(\n      [&](std::size_t i) -> T& { return std::apply(v, linalg::UnravelIndex(i, v.Shape())); });\n  return it;\n}\n\ntemplate <typename T, std::int32_t D>\nauto end(TensorView<T, D>& v) {  // NOLINT\n  return begin(v) + v.Size();\n}\n\nnamespace detail {\nusing SysTagImpl = std::int32_t;\n// Magic for complying with the ODR.\n#if defined(__CUDACC__)\nconstexpr SysTagImpl SysTag() { return 0; }\n#elif defined(XGBOOST_USE_SYCL)\nconstexpr SysTagImpl SysTag() { return 1; }\n#else\nconstexpr SysTagImpl SysTag() { return 2; }\n#endif\n}  // namespace detail\n\n/**\n * @brief Elementwise kernel without a return type.\n *\n * @tparam T  Element type of the input array.\n * @tparam D  Number of dimension of the input array.\n * @tparam Fn Transformation function.\n *\n * @param t  Input array.\n * @param fn Transformation function.\n */\n#if defined(__CUDACC__)\ntemplate <typename T, std::int32_t D, typename Fn, auto _tag = detail::SysTag()>\nvoid ElementWiseKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {\n  ctx->DispatchDevice(\n      [&] { cpu_impl::ElementWiseKernel(t, ctx->Threads(), std::forward<Fn>(fn)); },\n      [&] { cuda_impl::ElementWiseKernel(t, std::forward<Fn>(fn), ctx->CUDACtx()->Stream()); });\n}\n#elif defined(SYCL_LANGUAGE_VERSION)\ntemplate <typename T, std::int32_t D, typename Fn, auto _tag = detail::SysTag()>\nvoid ElementWiseKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {\n  ctx->DispatchDevice([&] { cpu_impl::ElementWiseKernel(t, ctx->Threads(), std::forward<Fn>(fn)); },\n                      [&] { LOG(FATAL) << \"Invalid TU\"; },\n                      [&] { ::xgboost::sycl::linalg::ElementWiseKernel(t, std::forward<Fn>(fn)); });\n}\n#else\ntemplate <typename T, std::int32_t D, typename Fn, auto _tag = detail::SysTag()>\nvoid ElementWiseKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {\n  CHECK(ctx->IsCPU());\n  ctx->DispatchDevice([&] { cpu_impl::ElementWiseKernel(t, ctx->Threads(), std::forward<Fn>(fn)); },\n                      [&] { LOG(FATAL) << \"Invalid TU\"; });\n}\n#endif\n\n/**\n * @brief Elementwise transform, with element index and the element itself as input.\n *\n * @tparam T  Element type of the input array.\n * @tparam D  Number of dimension of the input array.\n * @tparam Fn Transformation function, must return type T.\n *\n * @param t  Input array.\n * @param fn Transformation function, must return type T.\n */\n#if defined(__CUDACC__)\ntemplate <typename T, std::int32_t D, typename Fn, auto _tag = detail::SysTag()>\nvoid TransformIdxKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {\n  ctx->DispatchDevice(\n      [&] { cpu_impl::TransformIdxKernel(t, ctx->Threads(), std::forward<Fn>(fn)); },\n      [&] { cuda_impl::TransformIdxKernel(ctx, t, std::forward<Fn>(fn)); });\n}\n#elif defined(SYCL_LANGUAGE_VERSION)\ntemplate <typename T, std::int32_t D, typename Fn, auto _tag = detail::SysTag()>\nvoid TransformIdxKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {\n  ctx->DispatchDevice(\n      [&] { cpu_impl::TransformIdxKernel(t, ctx->Threads(), std::forward<Fn>(fn)); },\n      [&] { LOG(FATAL) << \"Invalid TU.\"; },\n      [&] {\n        static_assert(D == 1, \"Not implemented.\");\n        sycl::linalg::ElementWiseKernel(t, [=](std::size_t i) mutable { t(i) = fn(i, t(i)); });\n      });\n}\n#else\ntemplate <typename T, std::int32_t D, typename Fn, auto _tag = detail::SysTag()>\nvoid TransformIdxKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {\n  CHECK(ctx->IsCPU());\n  ctx->DispatchDevice(\n      [&] { cpu_impl::TransformIdxKernel(t, ctx->Threads(), std::forward<Fn>(fn)); },\n      [&] { LOG(FATAL) << \"Invalid TU.\"; });\n}\n#endif\n\n/**\n * @brief Elementwise transform, with the element itself as input. Rest is the same as @ref\n * TransformIdxKernel\n */\n#if defined(__CUDACC__)\ntemplate <typename T, std::int32_t D, typename Fn, auto _tag = detail::SysTag()>\nvoid TransformKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {\n  ctx->DispatchDevice([&] { cpu_impl::TransformKernel(t, ctx->Threads(), std::forward<Fn>(fn)); },\n                      [&] { cuda_impl::TransformKernel(ctx, t, std::forward<Fn>(fn)); });\n}\n#elif defined(SYCL_LANGUAGE_VERSION)\ntemplate <typename T, std::int32_t D, typename Fn, auto _tag = detail::SysTag()>\nvoid TransformKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {\n  ctx->DispatchDevice([&] { cpu_impl::TransformKernel(t, ctx->Threads(), std::forward<Fn>(fn)); },\n                      [&] { LOG(FATAL) << \"Invalid TU.\"; },\n                      [&] {\n                        static_assert(D == 1, \"Not implemented.\");\n                        sycl::linalg::ElementWiseKernel(\n                            t, [=](std::size_t i) mutable { t(i) = fn(t(i)); });\n                      });\n}\n#else\ntemplate <typename T, std::int32_t D, typename Fn, auto _tag = detail::SysTag()>\nvoid TransformKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {\n  CHECK(ctx->IsCPU());\n  ctx->DispatchDevice([&] { cpu_impl::TransformKernel(t, ctx->Threads(), std::forward<Fn>(fn)); },\n                      [&] { LOG(FATAL) << \"Invalid TU.\"; });\n}\n#endif\n\n// vector-scalar multiplication\ntemplate <auto _tag = detail::SysTag()>\nvoid VecScaMul(Context const* ctx, linalg::VectorView<float> x, double mul) {\n  CHECK_EQ(x.Device().ordinal, ctx->Device().ordinal);\n  TransformKernel(ctx, x, [=] XGBOOST_DEVICE(float v) { return v * mul; });\n}\n\n// vector-scalar division\ntemplate <auto _tag = detail::SysTag()>\nvoid VecScaDiv(Context const* ctx, linalg::VectorView<float> x, double div) {\n  return VecScaMul(ctx, x, 1.0 / div);\n}\n\ntemplate <auto _tag = detail::SysTag()>\nvoid LogE(Context const* ctx, linalg::VectorView<float> x, float rt_eps = 0.0f) {\n  CHECK_EQ(x.Device().ordinal, ctx->Device().ordinal);\n  TransformKernel(ctx, x, [=] XGBOOST_DEVICE(float v) { return log(v + rt_eps); });\n}\n\ntemplate <typename T, std::enable_if_t<std::is_floating_point_v<T>>* = nullptr>\nvoid SaveVector(linalg::Vector<T> const& in, Json* p_out) {\n  ::xgboost::SaveVector(in.Data()->HostVector(), p_out);\n}\n\ntemplate <typename T, std::enable_if_t<std::is_floating_point_v<T>>* = nullptr>\nvoid LoadVector(Json const& in, linalg::Vector<T>* out) {\n  ::xgboost::LoadVector(in, &out->Data()->HostVector());\n}\n\nvoid SmallHistogram(Context const* ctx, linalg::MatrixView<float const> indices,\n                    common::OptionalWeights const& weights, linalg::VectorView<float> bins);\n}  // namespace xgboost::linalg\n#endif  // XGBOOST_COMMON_LINALG_OP_H_\n"
  },
  {
    "path": "src/common/math.h",
    "content": "/**\n * Copyright 2015-2026, XGBoost Contributors\n * \\file math.h\n * \\brief additional math utils\n * \\author Tianqi Chen\n */\n#ifndef XGBOOST_COMMON_MATH_H_\n#define XGBOOST_COMMON_MATH_H_\n\n#include <xgboost/base.h>  // for XGBOOST_DEVICE\n\n#include <algorithm>    // for max\n#include <cmath>        // for exp, abs, log, lgamma\n#include <limits>       // for numeric_limits\n#include <type_traits>  // for is_floating_point_v, conditional, is_signed, is_same, declval\n#include <utility>      // for pair\n\nnamespace xgboost {\nnamespace common {\n\ntemplate <typename T>\nXGBOOST_DEVICE T Sqr(T const &w) {\n  return w * w;\n}\n\n/*!\n * \\brief calculate the sigmoid of the input.\n * \\param x input parameter\n * \\return the transformed value.\n */\nXGBOOST_DEVICE inline float Sigmoid(float x) {\n  float constexpr kEps = 1e-16;  // avoid 0 div\n  x = std::min(-x, 88.7f);       // avoid exp overflow\n  auto denom = expf(x) + 1.0f + kEps;\n  auto y = 1.0f / denom;\n  return y;\n}\n\nXGBOOST_DEVICE inline double Sigmoid(double x) {\n  auto denom = std::exp(-x) + 1.0;\n  auto y = 1.0 / denom;\n  return y;\n}\n\nXGBOOST_DEVICE inline float Logit(float x) { return -logf(1.0f / x - 1.0f); }\n\n/*!\n * \\brief Equality test for both integer and floating point.\n */\ntemplate <typename T, typename U>\nXGBOOST_DEVICE constexpr bool CloseTo(T a, U b) {\n  using Casted = typename std::conditional_t<\n      std::is_floating_point_v<T> || std::is_floating_point_v<U>, double,\n      typename std::conditional_t<std::is_signed_v<T> || std::is_signed_v<U>, std::int64_t,\n                                  std::uint64_t>>;\n  return std::is_floating_point_v<Casted>\n             ? std::abs(static_cast<Casted>(a) - static_cast<Casted>(b)) < 1e-6\n             : a == b;\n}\n\n/*!\n * \\brief Do inplace softmax transformaton on start to end\n *\n * \\tparam Iterator Input iterator type\n *\n * \\param start Start iterator of input\n * \\param end end iterator of input\n */\ntemplate <typename Iterator>\nXGBOOST_DEVICE void Softmax(Iterator start, Iterator end) {\n  static_assert(\n      std::is_same_v<\n          float, typename std::remove_reference_t<decltype(std::declval<Iterator>().operator*())>>,\n      \"Values should be of type float\");\n  float wmax = *start;\n  for (Iterator i = start + 1; i != end; ++i) {\n    wmax = fmaxf(*i, wmax);\n  }\n  double wsum = 0.0f;\n  for (Iterator i = start; i != end; ++i) {\n    *i = expf(*i - wmax);\n    wsum += *i;\n  }\n  for (Iterator i = start; i != end; ++i) {\n    *i /= static_cast<float>(wsum);\n  }\n}\n\n/*!\n * \\brief Find the maximum iterator within the iterators\n * \\param begin The beginning iterator.\n * \\param end The end iterator.\n * \\return the iterator point to the maximum value.\n * \\tparam Iterator The type of the iterator.\n */\ntemplate <typename Iterator>\nXGBOOST_DEVICE inline Iterator FindMaxIndex(Iterator begin, Iterator end) {\n  Iterator maxit = begin;\n  for (Iterator it = begin; it != end; ++it) {\n    if (*it > *maxit) maxit = it;\n  }\n  return maxit;\n}\n\n/*!\n * \\brief perform numerically safe logsum\n * \\param x left input operand\n * \\param y right input operand\n * \\return  log(exp(x) + exp(y))\n */\ninline float LogSum(float x, float y) {\n  if (x < y) {\n    return y + std::log(std::exp(x - y) + 1.0f);\n  } else {\n    return x + std::log(std::exp(y - x) + 1.0f);\n  }\n}\n\n/*!\n * \\brief perform numerically safe logsum\n * \\param begin The beginning iterator.\n * \\param end The end iterator.\n * \\return the iterator point to the maximum value.\n * \\tparam Iterator The type of the iterator.\n */\ntemplate <typename Iterator>\ninline float LogSum(Iterator begin, Iterator end) {\n  float mx = *begin;\n  for (Iterator it = begin; it != end; ++it) {\n    mx = std::max(mx, *it);\n  }\n  float sum = 0.0f;\n  for (Iterator it = begin; it != end; ++it) {\n    sum += std::exp(*it - mx);\n  }\n  return mx + std::log(sum);\n}\n\n// Redefined here to workaround a VC bug that doesn't support overloading for integer\n// types.\ntemplate <typename T>\nXGBOOST_DEVICE typename std::enable_if_t<std::numeric_limits<T>::is_integer, bool> CheckNAN(T) {\n  return false;\n}\n\n#if XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__)\n\nbool CheckNAN(double v);\n\n#else\n\nXGBOOST_DEVICE bool inline CheckNAN(float x) {\n#if defined(__CUDA_ARCH__)\n  return isnan(x);\n#else\n  return std::isnan(x);\n#endif  // defined(__CUDA_ARCH__)\n}\n\nXGBOOST_DEVICE bool inline CheckNAN(double x) {\n#if defined(__CUDA_ARCH__)\n  return isnan(x);\n#else\n  return std::isnan(x);\n#endif  // defined(__CUDA_ARCH__)\n}\n\n#endif  // XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__)\n// GPU version is not uploaded in CRAN anyway.\n// Specialize only when using R with CPU.\n#if XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA)\ndouble LogGamma(double v);\n\n#else  // Not R or R with GPU.\n\ntemplate <typename T>\nXGBOOST_DEVICE inline T LogGamma(T v) {\n#ifdef _MSC_VER\n\n#if _MSC_VER >= 1800\n  return lgamma(v);\n#else\n#pragma message(                                              \\\n    \"Warning: lgamma function was not available until VS2013\" \\\n    \", poisson regression will be disabled\")\n  utils::Error(\"lgamma function was not available until VS2013\");\n  return static_cast<T>(1.0);\n#endif  // _MSC_VER >= 1800\n\n#else\n  return lgamma(v);\n#endif  // _MSC_VER\n}\n\n#endif  // XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA)\n\n}  // namespace common\n}  // namespace xgboost\n#endif  // XGBOOST_COMMON_MATH_H_\n"
  },
  {
    "path": "src/common/numa_topo.cc",
    "content": "/**\n * Copyright 2025, XGBoost Contributors\n */\n#include \"numa_topo.h\"\n\n#if defined(__linux__)\n\n#include <linux/mempolicy.h>  // for MPOL_BIND\n#include <sys/syscall.h>      // for SYS_get_mempolicy\n#include <unistd.h>           // for syscall\n\n#endif  // defined(__linux__)\n\n#include <cctype>      // for isalnum\n#include <cstddef>     // for size_t\n#include <cstdint>     // for int32_t\n#include <filesystem>  // for path\n#include <fstream>     // for ifstream\n#include <string>      // for string, stoi\n#include <vector>      // for vector\n\n#include \"common.h\"     // for TrimLast, TrimFirst\n#include \"error_msg.h\"  // for SystemError\n#include \"xgboost/logging.h\"\n\nnamespace xgboost::common {\n\nnamespace {\nnamespace fs = std::filesystem;\n\nusing MaskT = unsigned long;  // NOLINT\ninline constexpr std::size_t kMaskBits = sizeof(MaskT) * 8;\n\n#if defined(__linux__)\n// Wrapper for the system call.\n//\n// https://github.com/torvalds/linux/blob/3f31a806a62e44f7498e2d17719c03f816553f11/mm/mempolicy.c#L1075\nauto GetMemPolicy(int *mode, MaskT *nodemask, unsigned long maxnode, void *addr,  // NOLINT\n                  unsigned long flags) {                                          // NOLINT\n  return syscall(SYS_get_mempolicy, mode, nodemask, maxnode, addr, flags);\n}\n\nauto GetMemPolicy(int *policy, MaskT *nodemask, unsigned long maxnode) {  // NOLINT\n  return GetMemPolicy(policy, nodemask, maxnode, nullptr, 0);\n}\n#endif  // defined(__linux__)\n}  // namespace\n\nvoid ReadCpuList(fs::path const &path, std::vector<std::int32_t> *p_cpus) {\n  auto &cpus = *p_cpus;\n  cpus.clear();\n\n  std::string buff;\n  std::ifstream fin{path};\n  fin >> buff;\n  if (fin.fail()) {\n    LOG(WARNING) << \"Failed to read: \" << path;\n    return;\n  }\n\n  CHECK(!buff.empty());\n  buff = common::TrimFirst(common::TrimLast(buff));\n\n  std::int32_t k = 0;\n  CHECK(std::isalnum(buff[k]));\n  while (static_cast<std::size_t>(k) < buff.size()) {\n    std::int32_t val0 = -1, val1 = -1;\n    std::size_t idx = 0;\n    CHECK(std::isalnum(buff[k])) << k << \" \" << buff;\n    val0 = std::stoi(buff.data() + k, &idx);\n    auto last = k + idx;\n    CHECK_LE(last, buff.size());\n    k = last + 1;  // new begin\n    if (last == buff.size() || buff[last] != '-') {\n      // Single value\n      cpus.push_back(val0);\n      continue;\n    }\n    CHECK_EQ(buff[last], '-') << last;\n\n    idx = -1;\n    CHECK_LT(k, buff.size());\n    val1 = std::stoi(buff.data() + k, &idx);\n    CHECK_GE(idx, 1);\n    // Range\n    for (auto i = val0; i <= val1; ++i) {\n      cpus.push_back(i);\n    }\n    k += (idx + 1);\n  }\n}\n\nvoid GetNumaNodeCpus(std::int32_t node_id, std::vector<std::int32_t> *p_cpus) {\n  p_cpus->clear();\n#if defined(__linux__)\n  std::string nodename = \"node\" + std::to_string(node_id);\n  auto p_cpulist = fs::path{\"/sys/devices/system/node\"} / nodename / \"cpulist\";  // NOLINT\n\n  if (!fs::exists(p_cpulist)) {\n    return;\n  }\n  ReadCpuList(p_cpulist, p_cpus);\n#endif  // defined(__linux__)\n}\n\n[[nodiscard]] std::int32_t GetNumaMaxNumNodes() {\n#if defined(__linux__)\n  auto p_possible = fs::path{\"/sys/devices/system/node/possible\"};\n\n  std::int32_t max_n_nodes = kMaskBits;\n\n  if (fs::exists(p_possible)) {\n    std::vector<std::int32_t> cpus;\n    ReadCpuList(p_possible, &cpus);\n    auto it = std::max_element(cpus.cbegin(), cpus.cend());\n    // +1 since node/CPU uses 0-based indexing.\n    if (it != cpus.cend() && (*it + 1) > max_n_nodes) {\n      max_n_nodes = (*it + 1);\n    }\n  }\n\n  // Just in case if it keeps getting into error\n  constexpr decltype(max_n_nodes) kStop = 16384;\n  // Estimate the size of the CPU set based on the error returned from get mempolicy.\n  // Strategy used by hwloc and libnuma.\n  while (true) {\n    std::vector<MaskT> mask(max_n_nodes / kMaskBits, 0);\n\n    std::int32_t mode = -1;\n    auto err = GetMemPolicy(&mode, mask.data(), max_n_nodes);\n    if (!err || errno != EINVAL) {\n      return max_n_nodes;  // Got it.\n    }\n    max_n_nodes *= 2;\n\n    if (max_n_nodes > kStop) {\n      break;\n    }\n  }\n#endif  // defined(__linux__)\n  return -1;\n}\n\n[[nodiscard]] bool GetNumaMemBind() {\n#if defined(__linux__)\n  std::int32_t mode = -1;\n  auto max_n_nodes = GetNumaMaxNumNodes();\n  if (max_n_nodes <= 0) {\n    return false;  // Sth went wrong, assume there's no membind.\n  }\n  CHECK_GE(max_n_nodes, kMaskBits);\n  std::vector<MaskT> mask(max_n_nodes / kMaskBits);\n  auto status = GetMemPolicy(&mode, mask.data(), max_n_nodes);\n  if (status < 0) {\n    auto msg = error::SystemError().message();\n    LOG(WARNING) << msg;\n    return false;\n  }\n  return mode == MPOL_BIND;\n#else\n  return false;\n#endif  // defined(__linux__)\n}\n\n[[nodiscard]] std::int32_t GetNumaNumNodes() {\n#if defined(__linux__)\n  fs::path p_node{\"/sys/devices/system/node\"};\n  if (!fs::exists(p_node)) {\n    return -1;\n  }\n  try {\n    std::int32_t n_nodes{0};\n    for (auto const &entry : fs::directory_iterator{p_node}) {\n      auto name = entry.path().filename().string();\n      if (name.find(\"node\") == 0) {  // starts with `node`\n        n_nodes += 1;\n      }\n    }\n    if (n_nodes == 0) {\n      // Something went wrong, we should have at lease 1 node.\n      LOG(WARNING) << \"Failed to list NUMA nodes.\";\n      return -1;\n    }\n    return n_nodes;\n  } catch (std::exception const &e) {\n    LOG(WARNING) << \"Failed to list NUMA nodes: \" << e.what();\n  }\n#endif  // defined(__linux__)\n  return -1;\n}\n\nvoid GetNumaHasNormalMemoryNodes(std::vector<std::int32_t> *p_nodes) {\n#if defined(__linux__)\n  fs::path has_nm{\"/sys/devices/system/node/has_normal_memory\"};\n  p_nodes->clear();\n  if (!fs::exists(has_nm)) {\n    return;\n  }\n  ReadCpuList(has_nm, p_nodes);\n#endif  // defined(__linux__)\n}\n\nvoid GetNumaHasCpuNodes(std::vector<std::int32_t> *p_nodes) {\n#if defined(__linux__)\n  fs::path has_cpu{\"/sys/devices/system/node/has_cpu\"};\n  p_nodes->clear();\n  if (!fs::exists(has_cpu)) {\n    return;\n  }\n  ReadCpuList(has_cpu, p_nodes);\n#endif  // defined(__linux__)\n}\n\n[[nodiscard]] bool GetCpuNuma(unsigned int* cpu, unsigned int* numa) {\n#ifdef SYS_getcpu\n  return syscall(SYS_getcpu, cpu, numa, NULL) == 0;\n#else\n  return false;\n#endif\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "src/common/numa_topo.h",
    "content": "/**\n * Copyright 2025, XGBoost Contributors\n */\n#pragma once\n#include <cstdint>     // for int32_t\n#include <filesystem>  // for path\n#include <vector>      // for vector\n\nnamespace xgboost::common {\n/**\n * @brief Read a file with the `cpulist` format.\n *\n *   Linux-Only.\n *\n */\nvoid ReadCpuList(std::filesystem::path const &path, std::vector<std::int32_t> *p_cpus);\n\n/**\n * @brief Get the list of CPU cores grouped under the NUMA node.\n *\n *   Linux-Only.\n *\n */\nvoid GetNumaNodeCpus(std::int32_t node_id, std::vector<std::int32_t> *p_cpus);\n\n/**\n * @brief Find the maximum number of NUMA nodes.\n *\n *   Linux-Only.\n *\n * @return -1 if fail to get the number of nodes. Otherwise, the maximum number of nodes\n *         for allocating node mask.\n */\n[[nodiscard]] std::int32_t GetNumaMaxNumNodes();\n\n/**\n * @brief Check whether the memory policy is set to bind.\n *\n *   Linux-Only.\n *\n */\n[[nodiscard]] bool GetNumaMemBind();\n\n/**\n * @brief Get the number of configured NUMA nodes. This does not represent the highest\n *        node ID as NUMA node ID doesn't have to be contiguous.\n *\n *   Linux-Only.\n *\n * @return -1 if there's no NUMA node. Otherwise, returns the number of NUMA nodes.\n */\n[[nodiscard]] std::int32_t GetNumaNumNodes();\n\n/**\n * @brief Read the `has_normal_memory` system file.\n */\nvoid GetNumaHasNormalMemoryNodes(std::vector<std::int32_t> *p_nodes);\n\n/**\n * @brief Read the `has_cpu` system file.\n */\nvoid GetNumaHasCpuNodes(std::vector<std::int32_t> *p_nodes);\n\n/**\n * @brief Get numa node on Linux. Other platforms are not supported. Returns false if the\n *        call fails.\n */\n[[nodiscard]] bool GetCpuNuma(unsigned int* cpu, unsigned int* numa);\n\n/**\n * @brief Is it physically possible to access the wrong memory?\n */\n[[nodiscard]] inline bool NumaMemCanCross() {\n  std::vector<std::int32_t> nodes;\n  GetNumaHasCpuNodes(&nodes);\n  bool result = nodes.size() > 1;\n  GetNumaHasNormalMemoryNodes(&nodes);\n  result &= nodes.size() > 1;\n  return result;\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "src/common/numeric.cc",
    "content": "/**\n * Copyright 2022-2024, XGBoost Contributors\n */\n#include \"numeric.h\"\n\n#include <type_traits>  // std::is_same_v\n\n#include \"xgboost/context.h\"             // Context\n#include \"xgboost/host_device_vector.h\"  // HostDeviceVector\n\nnamespace xgboost {\nnamespace common {\ndouble Reduce(Context const* ctx, HostDeviceVector<float> const& values) {\n  if (ctx->IsCUDA()) {\n    return cuda_impl::Reduce(ctx, values);\n  } else {\n    auto const& h_values = values.ConstHostVector();\n    auto result = cpu_impl::Reduce(ctx, h_values.cbegin(), h_values.cend(), 0.0);\n    static_assert(std::is_same_v<decltype(result), double>);\n    return result;\n  }\n}\n}  // namespace common\n}  // namespace xgboost\n"
  },
  {
    "path": "src/common/numeric.cu",
    "content": "/*!\n * Copyright 2022 by XGBoost Contributors\n */\n#include <thrust/execution_policy.h>\n\n#include \"device_helpers.cuh\"            // dh::Reduce, dh::XGBCachingDeviceAllocator\n#include \"numeric.h\"\n#include \"xgboost/context.h\"             // Context\n#include \"xgboost/host_device_vector.h\"  // HostDeviceVector\n\nnamespace xgboost::common::cuda_impl {\ndouble Reduce(Context const* ctx, HostDeviceVector<float> const& values) {\n  values.SetDevice(ctx->Device());\n  auto const d_values = values.ConstDeviceSpan();\n  dh::XGBCachingDeviceAllocator<char> alloc;\n  return dh::Reduce(thrust::cuda::par(alloc), dh::tcbegin(d_values), dh::tcend(d_values), 0.0,\n                    thrust::plus<float>{});\n}\n}  // namespace xgboost::common::cuda_impl\n"
  },
  {
    "path": "src/common/numeric.h",
    "content": "/**\n * Copyright 2022-2026, XGBoost contributors.\n */\n#ifndef XGBOOST_COMMON_NUMERIC_H_\n#define XGBOOST_COMMON_NUMERIC_H_\n\n#include <dmlc/common.h>  // OMPException\n\n#include <algorithm>    // for max\n#include <cstddef>      // for size_t\n#include <cstdint>      // for int32_t\n#include <iterator>     // for iterator_traits\n#include <numeric>      // for accumulate\n#include <type_traits>  // for is_same_v\n#include <vector>       // for vector\n\n#include \"threading_utils.h\"             // MemStackAllocator, DefaultMaxThreads\n#include \"xgboost/context.h\"             // Context\n#include \"xgboost/host_device_vector.h\"  // HostDeviceVector\n\n#if !defined(XGBOOST_USE_CUDA)\n\n#include \"common.h\"  // AssertGPUSupport\n\n#endif  // !defined(XGBOOST_USE_CUDA)\n\nnamespace xgboost::common {\n\n/**\n * \\brief Run length encode on CPU, input must be sorted.\n */\ntemplate <typename Iter, typename Idx>\nvoid RunLengthEncode(Iter begin, Iter end, std::vector<Idx>* p_out) {\n  auto& out = *p_out;\n  out = std::vector<Idx>{0};\n  size_t n = std::distance(begin, end);\n  for (size_t i = 1; i < n; ++i) {\n    if (begin[i] != begin[i - 1]) {\n      out.push_back(i);\n    }\n  }\n  if (out.back() != n) {\n    out.push_back(n);\n  }\n}\n\n/**\n * @brief Variant of std::partial_sum, out_it should point to a container that has n + 1\n *        elements. Useful for constructing a CSR indptr.\n */\ntemplate <typename InIt, typename OutIt, typename T>\nvoid PartialSum(int32_t n_threads, InIt begin, InIt end, T init, OutIt out_it) {\n  static_assert(std::is_same_v<T, typename std::iterator_traits<InIt>::value_type>);\n  static_assert(std::is_same_v<T, typename std::iterator_traits<OutIt>::value_type>);\n  // The number of threads is pegged to the batch size. If the OMP block is parallelized\n  // on anything other than the batch/block size, it should be reassigned\n  auto n = static_cast<size_t>(std::distance(begin, end));\n  const size_t batch_threads =\n      std::max(static_cast<size_t>(1), std::min(n, static_cast<size_t>(n_threads)));\n  MemStackAllocator<T, DefaultMaxThreads()> partial_sums(batch_threads);\n\n  size_t block_size = n / batch_threads;\n\n  // Phase 1: Compute local partial sums for each block\n  ParallelFor(batch_threads, static_cast<std::int32_t>(batch_threads), [&](auto tid) {\n    std::size_t ibegin = block_size * tid;\n    std::size_t iend = (tid == (batch_threads - 1) ? n : (block_size * (tid + 1)));\n\n    T running_sum = 0;\n    for (std::size_t ridx = ibegin; ridx < iend; ++ridx) {\n      running_sum += *(begin + ridx);\n      *(out_it + 1 + ridx) = running_sum;\n    }\n  });\n\n  // Phase 2: Compute prefix sums of block sums (sequential)\n  partial_sums[0] = init;\n  for (std::size_t i = 1; i < batch_threads; ++i) {\n    partial_sums[i] = partial_sums[i - 1] + *(out_it + i * block_size);\n  }\n\n  // Phase 3: Add block prefix to each element\n  ParallelFor(batch_threads, static_cast<std::int32_t>(batch_threads), [&](auto tid) {\n    std::size_t ibegin = block_size * tid;\n    std::size_t iend = (tid == (batch_threads - 1) ? n : (block_size * (tid + 1)));\n\n    for (std::size_t i = ibegin; i < iend; ++i) {\n      *(out_it + 1 + i) += partial_sums[tid];\n    }\n  });\n}\n\nnamespace cuda_impl {\ndouble Reduce(Context const* ctx, HostDeviceVector<float> const& values);\n#if !defined(XGBOOST_USE_CUDA)\ninline double Reduce(Context const*, HostDeviceVector<float> const&) {\n  AssertGPUSupport();\n  return 0;\n}\n#endif  // !defined(XGBOOST_USE_CUDA)\n}  // namespace cuda_impl\n\n/**\n * \\brief Reduction with iterator. init must be additive identity. (0 for primitive types)\n */\nnamespace cpu_impl {\ntemplate <typename It, typename V = typename It::value_type>\nV Reduce(Context const* ctx, It first, It second, V const& init) {\n  std::size_t n = std::distance(first, second);\n  auto n_threads = static_cast<std::size_t>(std::min(n, static_cast<std::size_t>(ctx->Threads())));\n  common::MemStackAllocator<V, common::DefaultMaxThreads()> result_tloc(n_threads, init);\n  common::ParallelFor(n, n_threads, [&](auto i) { result_tloc[omp_get_thread_num()] += first[i]; });\n  auto result = std::accumulate(result_tloc.cbegin(), result_tloc.cbegin() + n_threads, init);\n  return result;\n}\n}  // namespace cpu_impl\n\n/**\n * @brief Reduction on host device vector.\n */\ndouble Reduce(Context const* ctx, HostDeviceVector<float> const& values);\n\ntemplate <typename It, typename T = typename std::iterator_traits<It>::value_type>\nvoid Iota(Context const* ctx, It first, It last, T const& value) {\n  auto n = std::distance(first, last);\n  std::int32_t n_threads = ctx->Threads();\n  ParallelForBlock(static_cast<std::size_t>(n), n_threads, [&](auto&& blk) {\n    for (std::size_t i = blk.begin(); i < blk.end(); ++i) {\n      first[i] = static_cast<T>(i) + value;\n    }\n  });\n}\n}  // namespace xgboost::common\n\n#endif  // XGBOOST_COMMON_NUMERIC_H_\n"
  },
  {
    "path": "src/common/nvtx_utils.h",
    "content": "/**\n * Copyright 2024-2025, XGBoost contributors\n */\n#pragma once\n\n#if defined(XGBOOST_USE_NVTX)\n#include <nvtx3/nvtx3.hpp>\n#endif  // defined(XGBOOST_USE_NVTX)\n\n#include \"xgboost/string_view.h\"  // for StringView\n\nnamespace xgboost::nvtx {\nstruct Domain {\n  static constexpr char const* name{\"libxgboost\"};  // NOLINT\n};\n\n#if defined(XGBOOST_USE_NVTX)\nusing ScopedRange = ::nvtx3::scoped_range_in<Domain>;\nusing EventAttr = ::nvtx3::event_attributes;\nusing Rgb = ::nvtx3::rgb;\n\ninline auto MakeScopedRange(StringView name, Rgb color) {\n  ::nvtx3::v1::registered_string_in<Domain> const scope_name{name.c_str()};\n  ::nvtx3::v1::event_attributes const scope_attr{scope_name, color};\n  return ::nvtx3::v1::scoped_range_in<Domain>{scope_attr};\n}\n\n#else\nclass ScopedRange {\n public:\n  template <typename... Args>\n  explicit ScopedRange(Args&&...) {}\n};\nclass EventAttr {\n public:\n  template <typename... Args>\n  explicit EventAttr(Args&&...) {}\n};\nclass Rgb {\n public:\n  template <typename... Args>\n  explicit Rgb(Args&&...) {}\n};\n\ninline auto MakeScopedRange(StringView, Rgb) { return ScopedRange{}; }\n#endif  // defined(XGBOOST_USE_NVTX)\n}  // namespace xgboost::nvtx\n\n#if defined(XGBOOST_USE_NVTX)\n\n// Macro for making NVTX function range.\n#define xgboost_NVTX_FN_RANGE() NVTX3_FUNC_RANGE_IN(::xgboost::nvtx::Domain)\n\n// Macro for making colored NVTX function range.\n#define xgboost_NVTX_FN_RANGE_C(r, g, b) \\\n  auto __nvtx_scoped__ = ::xgboost::nvtx::MakeScopedRange(__func__, (nvtx::Rgb((r), (g), (b))))\n\n#else\n\n#define xgboost_NVTX_FN_RANGE()\n\n#define xgboost_NVTX_FN_RANGE_C(r, g, b)\n\n#endif  // defined(XGBOOST_USE_NVTX)\n"
  },
  {
    "path": "src/common/observer.h",
    "content": "/*!\n * Copyright 2019-2020 XGBoost contributors\n * \\file observer.h\n */\n#ifndef XGBOOST_COMMON_OBSERVER_H_\n#define XGBOOST_COMMON_OBSERVER_H_\n\n#include <iostream>\n#include <algorithm>\n#include <limits>\n#include <string>\n#include <vector>\n\n#include \"xgboost/host_device_vector.h\"\n#include \"xgboost/parameter.h\"\n#include \"xgboost/json.h\"\n#include \"xgboost/base.h\"\n#include \"xgboost/tree_model.h\"\n\n#if defined(XGBOOST_STRICT_R_MODE) && XGBOOST_STRICT_R_MODE == 1\n#define OBSERVER_PRINT LOG(INFO)\n#define OBSERVER_ENDL \"\"\n#define OBSERVER_NEWLINE \"\"\n#else  // defined(XGBOOST_STRICT_R_MODE) && XGBOOST_STRICT_R_MODE == 1\n#define OBSERVER_PRINT std::cout << std::setprecision(17)\n#define OBSERVER_ENDL std::endl\n#define OBSERVER_NEWLINE \"\\n\"\n#endif  // defined(XGBOOST_STRICT_R_MODE) && XGBOOST_STRICT_R_MODE == 1\n\nnamespace xgboost {\n/*\\brief  An observer for logging internal data structures.\n *\n *  This class is designed to be `diff` tool friendly, which means it uses plain\n *  `std::cout` for printing to avoid the time information emitted by `LOG(DEBUG)` or\n *  similiar facilities. Exception: use `LOG(INFO)` for the R package, to comply\n *  with CRAN policy.\n */\nclass TrainingObserver {\n#if defined(XGBOOST_USE_DEBUG_OUTPUT)\n  bool constexpr static kObserve {true};\n#else\n  bool constexpr static kObserve {false};\n#endif  // defined(XGBOOST_USE_DEBUG_OUTPUT)\n\n public:\n  void Update(int32_t iter) const {\n    if (XGBOOST_EXPECT(!kObserve, true)) { return; }\n    OBSERVER_PRINT << \"Iter: \" << iter << OBSERVER_ENDL;\n  }\n  /*\\brief Observe tree. */\n  void Observe(RegTree const& tree) {\n    if (XGBOOST_EXPECT(!kObserve, true)) { return; }\n    OBSERVER_PRINT << \"Tree:\" << OBSERVER_ENDL;\n    Json j_tree {Object()};\n    tree.SaveModel(&j_tree);\n    std::string str;\n    Json::Dump(j_tree, &str);\n    OBSERVER_PRINT << str << OBSERVER_ENDL;\n  }\n  /*\\brief Observe tree. */\n  void Observe(RegTree const* p_tree) {\n    if (XGBOOST_EXPECT(!kObserve, true)) { return; }\n    auto const& tree = *p_tree;\n    this->Observe(tree);\n  }\n  template <typename T>\n  void Observe(common::Span<T> span, std::string name,\n               size_t n = std::numeric_limits<std::size_t>::max()) {\n    std::vector<T> copy(span.size());\n    std::copy(span.cbegin(), span.cend(), copy.begin());\n    this->Observe(copy, name, n);\n  }\n  /*\\brief Observe data hosted by `std::vector'. */\n  template <typename T>\n  void Observe(std::vector<T> const& h_vec, std::string name,\n               size_t n = std::numeric_limits<std::size_t>::max()) const {\n    if (XGBOOST_EXPECT(!kObserve, true)) { return; }\n    OBSERVER_PRINT << \"Procedure: \" << name << OBSERVER_ENDL;\n\n    for (size_t i = 0; i < h_vec.size(); ++i) {\n      OBSERVER_PRINT << h_vec[i] << \", \";\n      if (i % 8 == 0 && i != 0) {\n        OBSERVER_PRINT << OBSERVER_NEWLINE;\n      }\n      if ((i + 1) == n) {\n        break;\n      }\n    }\n    OBSERVER_PRINT << OBSERVER_ENDL;\n  }\n  /*\\brief Observe data hosted by `HostDeviceVector'. */\n  template <typename T>\n  void Observe(HostDeviceVector<T> const& vec, std::string name,\n               size_t n = std::numeric_limits<std::size_t>::max()) const {\n    if (XGBOOST_EXPECT(!kObserve, true)) { return; }\n    auto const& h_vec = vec.HostVector();\n    this->Observe(h_vec, name, n);\n  }\n  template <typename T>\n  void Observe(HostDeviceVector<T>* vec, std::string name,\n               size_t n = std::numeric_limits<std::size_t>::max()) const {\n    if (XGBOOST_EXPECT(!kObserve, true)) { return; }\n    this->Observe(*vec, name, n);\n  }\n\n  /*\\brief Observe objects with `XGBoostParamer' type. */\n  template <typename Parameter,\n            typename std::enable_if_t<std::is_base_of_v<XGBoostParameter<Parameter>, Parameter>>* =\n                nullptr>\n  void Observe(const Parameter& p, std::string name) const {\n    if (XGBOOST_EXPECT(!kObserve, true)) { return; }\n\n    Json obj {toJson(p)};\n    OBSERVER_PRINT << \"Parameter: \" << name << \":\\n\" << obj << OBSERVER_ENDL;\n  }\n  /*\\brief Observe parameters provided by users. */\n  void Observe(Args const& args) const {\n    if (XGBOOST_EXPECT(!kObserve, true)) { return; }\n\n    for (auto kv : args) {\n      OBSERVER_PRINT << kv.first << \": \" << kv.second << OBSERVER_NEWLINE;\n    }\n    OBSERVER_PRINT << OBSERVER_ENDL;\n  }\n\n  /*\\brief Get a global instance. */\n  static TrainingObserver& Instance() {\n    static TrainingObserver observer;\n    return observer;\n  }\n};\n}  // namespace xgboost\n#endif  // XGBOOST_COMMON_OBSERVER_H_\n"
  },
  {
    "path": "src/common/optional_weight.cc",
    "content": "/**\n * Copyright 2025, XGBoost Contributors\n */\n#include \"optional_weight.h\"\n\n#include <numeric>  // for accumulate\n\n#include \"xgboost/base.h\"     // for bst_idx_t\n#include \"xgboost/context.h\"  // for Context\n\n#include \"common.h\"  // for AssertGPUSupport\n\nnamespace xgboost::common {\n#if defined(XGBOOST_USE_CUDA)\nnamespace cuda_impl {\ndouble SumOptionalWeights(Context const* ctx, OptionalWeights const& weights);\n}\n#endif\n\n#if defined(XGBOOST_USE_SYCL)\nnamespace sycl_impl {\ndouble SumOptionalWeights(Context const* ctx, OptionalWeights const& weights);\n}\n#endif\n\n[[nodiscard]] double SumOptionalWeights(Context const* ctx, OptionalWeights const& weights,\n                                        bst_idx_t n_samples) {\n  if (weights.Empty()) {\n    return n_samples * weights.dft;\n  }\n  if (ctx->IsCUDA()) {\n#if defined(XGBOOST_USE_CUDA)\n    return cuda_impl::SumOptionalWeights(ctx, weights);\n#else\n    common::AssertGPUSupport();\n#endif\n  }\n  if (ctx->IsSycl()) {\n#if defined(XGBOOST_USE_SYCL)\n    return sycl_impl::SumOptionalWeights(ctx, weights);\n#else\n    common::AssertSYCLSupport();\n#endif\n  }\n  auto sum_weight = std::accumulate(weights.Data(), weights.Data() + weights.Size(), 0.0);\n  return sum_weight;\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "src/common/optional_weight.cu",
    "content": "/**\n * Copyright 2025, XGBoost Contributors\n */\n#include <cuda/std/functional>  // for plus\n\n#include <cstddef>              // for size_t\n\n#include \"cuda_context.cuh\"\n#include \"device_helpers.cuh\"\n#include \"optional_weight.h\"\n#include \"xgboost/context.h\"  // for Context\n\nnamespace xgboost::common::cuda_impl {\ndouble SumOptionalWeights(Context const* ctx, OptionalWeights const& weights) {\n  auto w_it = dh::MakeIndexTransformIter([=] XGBOOST_DEVICE(std::size_t i) { return weights[i]; });\n  return dh::Reduce(ctx->CUDACtx()->CTP(), w_it, w_it + weights.Size(), 0.0, cuda::std::plus{});\n}\n}  // namespace xgboost::common::cuda_impl\n"
  },
  {
    "path": "src/common/optional_weight.h",
    "content": "/**\n * Copyright 2022-2025, XGBoost Contributors\n */\n#ifndef XGBOOST_COMMON_OPTIONAL_WEIGHT_H_\n#define XGBOOST_COMMON_OPTIONAL_WEIGHT_H_\n\n#include <cstddef>  // for size_t\n\n#include \"xgboost/base.h\"                // XGBOOST_DEVICE\n#include \"xgboost/context.h\"             // Context\n#include \"xgboost/host_device_vector.h\"  // HostDeviceVector\n#include \"xgboost/span.h\"                // Span\n\nnamespace xgboost::common {\nstruct OptionalWeights {\n  Span<float const> weights;\n  float dft{1.0f};  // fixme: make this compile time constant\n\n  explicit OptionalWeights(Span<float const> w) : weights{w} {}\n  explicit OptionalWeights(float w) : dft{w} {}\n\n  XGBOOST_DEVICE float operator[](std::size_t i) const {\n    return weights.empty() ? dft : weights[i];\n  }\n  [[nodiscard]] auto Empty() const { return weights.empty(); }\n  [[nodiscard]] auto Size() const { return weights.size(); }\n  [[nodiscard]] auto Data() const { return weights.data(); }\n};\n\ninline OptionalWeights MakeOptionalWeights(DeviceOrd device,\n                                           HostDeviceVector<float> const& weights) {\n  if (!device.IsCPU()) {\n    weights.SetDevice(device);\n  }\n  return OptionalWeights{device.IsCPU() ? weights.ConstHostSpan() : weights.ConstDeviceSpan()};\n}\n\n[[nodiscard]] double SumOptionalWeights(Context const* ctx, OptionalWeights const& weights,\n                                        bst_idx_t n_samples);\n}  // namespace xgboost::common\n#endif  // XGBOOST_COMMON_OPTIONAL_WEIGHT_H_\n"
  },
  {
    "path": "src/common/param_array.cc",
    "content": "/**\n * Copyright 2023-2025, XGBoost contributors\n */\n#include \"param_array.h\"\n\n#include <cctype>   // for isspace\n#include <cstddef>  // for size_t\n#include <istream>  // for istream\n#include <ostream>  // for ostream\n#include <string>   // for string\n#include <vector>   // for vector\n\n#include \"../common/json_utils.h\"  // for TypeCheck\n#include \"xgboost/json.h\"          // for F32Array, get, Number\n#include \"xgboost/json_io.h\"       // for JsonWriter\n#include \"xgboost/string_view.h\"   // for StringView\n\nnamespace xgboost::common {\n\nnamespace {\nstd::ostream& WriteStream(std::ostream& os,\n                          const ParamArray<float>& array) {  // NOLINT\n  auto const& t = array.Get();\n  F32Array arr{t.size()};\n  for (std::size_t i = 0; i < t.size(); ++i) {\n    arr.Set(i, t[i]);\n  }\n  std::vector<char> stream;\n  JsonWriter writer{&stream};\n  arr.Save(&writer);\n  for (auto c : stream) {\n    os << c;\n  }\n  return os;\n}\n}  // namespace\n\nstd::ostream& operator<<(std::ostream& os, const ParamArray<float>& array) {  // NOLINT\n  return WriteStream(os, array);\n}\n\nnamespace {\nstd::istream& ReadStream(std::istream& is, ParamArray<float>& array) {  // NOLINT\n  auto& t = array.Get();\n  t.clear();\n  std::string str;\n  while (!is.eof()) {\n    std::string tmp;\n    is >> tmp;\n    str += tmp;\n  }\n  std::size_t head{0};\n  // unify notation for parsing.\n  while (std::isspace(str[head])) {\n    ++head;\n  }\n  if (str[head] == '(') {\n    str[head] = '[';\n  }\n  auto tail = str.size() - 1;\n  while (std::isspace(str[tail])) {\n    --tail;\n  }\n  if (str[tail] == ')') {\n    str[tail] = ']';\n  }\n\n  auto jarr = Json::Load(StringView{str});\n  // return if there's only one element\n  if (IsA<Number>(jarr)) {\n    t.emplace_back(get<Number const>(jarr));\n    return is;\n  }\n  if (IsA<Integer>(jarr)) {\n    t.emplace_back(get<Integer const>(jarr));\n    return is;\n  }\n\n  auto const& jvec = get<Array const>(jarr);\n  for (auto v : jvec) {\n    TypeCheck<Number, Integer>(v, array.Name());\n    if (IsA<Number>(v)) {\n      t.emplace_back(get<Number const>(v));\n    } else {\n      t.emplace_back(get<Integer const>(v));\n    }\n  }\n  return is;\n}\n}  // namespace\n\nstd::istream& operator>>(std::istream& is, ParamArray<float>& array) {  // NOLINT\n  return ReadStream(is, array);\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "src/common/param_array.h",
    "content": "/**\n * Copyright 2023-2025, XGBoost contributors\n */\n#pragma once\n\n#include <istream>  // for istream\n#include <ostream>  // for ostream\n#include <string>   // for string\n#include <utility>  // for forward\n#include <vector>   // for vector\n\n#include \"xgboost/string_view.h\"  // for StringView\n\nnamespace xgboost::common {\n/**\n * @brief A shim to enable ADL for parameter parsing. Alternatively, we can put the stream\n * operators in std namespace, which seems to be less ideal.\n */\ntemplate <typename T>\nclass ParamArray {\n  std::string name_;\n  std::vector<T> values_;\n\n public:\n  using size_type = typename decltype(values_)::size_type;              // NOLINT\n  using const_reference = typename decltype(values_)::const_reference;  // NOLINT\n  using reference = typename decltype(values_)::reference;              // NOLINT\n\n public:\n  ParamArray() = default;\n\n  ParamArray(ParamArray const& that) = default;\n  ParamArray& operator=(ParamArray const& that) = default;\n\n  ParamArray(ParamArray&& that) = default;\n  ParamArray& operator=(ParamArray&& that) = default;\n\n  template <typename... Args>\n  explicit ParamArray(StringView name, Args&&... args)\n      : name_{name}, values_{std::forward<Args>(args)...} {}\n\n  [[nodiscard]] std::vector<T>& Get() { return values_; }\n  [[nodiscard]] std::vector<T> const& Get() const { return values_; }\n  const_reference operator[](size_type i) const { return values_[i]; }\n  reference operator[](size_type i) { return values_[i]; }\n  [[nodiscard]] bool empty() const { return values_.empty(); }       // NOLINT\n  [[nodiscard]] std::size_t size() const { return values_.size(); }  // NOLINT\n  [[nodiscard]] auto data() const { return values_.data(); }         // NOLINT\n  ParamArray& operator=(std::vector<T> const& that) {\n    this->values_ = that;\n    return *this;\n  }\n  [[nodiscard]] StringView Name() const { return this->name_; }\n  [[nodiscard]] auto cbegin() const { return this->values_.cbegin(); }  // NOLINT\n  [[nodiscard]] auto cend() const { return this->values_.cend(); }      // NOLINT\n  [[nodiscard]] auto begin() { return this->values_.begin(); }          // NOLINT\n  [[nodiscard]] auto end() { return this->values_.end(); }              // NOLINT\n\n  void Resize(size_type n, T const& init) { this->values_.resize(n, init); }  // NOLINT\n};\n\n// For parsing array-based parameters inside DMLC parameter. Input can be a string to a\n// single float or a list of floats.\nstd::ostream& operator<<(std::ostream& os, const ParamArray<float>& t);\nstd::istream& operator>>(std::istream& is, ParamArray<float>& t);\n}  // namespace xgboost::common\n"
  },
  {
    "path": "src/common/partition_builder.h",
    "content": "/**\n * Copyright 2021-2024, XGBoost Contributors\n * \\file row_set.h\n * \\brief Quick Utility to compute subset of rows\n * \\author Philip Cho, Tianqi Chen\n */\n#ifndef XGBOOST_COMMON_PARTITION_BUILDER_H_\n#define XGBOOST_COMMON_PARTITION_BUILDER_H_\n\n#include <xgboost/data.h>\n\n#include <algorithm>\n#include <cstddef>  // for size_t\n#include <memory>\n#include <utility>\n#include <vector>\n\n#include \"../tree/sample_position.h\"  // for SamplePosition\n#include \"categorical.h\"\n#include \"column_matrix.h\"\n#include \"row_set.h\"  // for RowSetCollection\n#include \"xgboost/context.h\"\n#include \"xgboost/tree_model.h\"\n\nnamespace xgboost::common {\n// The builder is required for samples partition to left and rights children for set of nodes\n// Responsible for:\n// 1) Effective memory allocation for intermediate results for multi-thread work\n// 2) Merging partial results produced by threads into original row set (row_set_collection_)\n// BlockSize is template to enable memory alignment easily with C++11 'alignas()' feature\ntemplate<size_t BlockSize>\nclass PartitionBuilder {\n  using BitVector = RBitField8;\n\n public:\n  template<typename Func>\n  void Init(const size_t n_tasks, size_t n_nodes, Func funcNTask) {\n    left_right_nodes_sizes_.resize(n_nodes);\n    blocks_offsets_.resize(n_nodes+1);\n\n    blocks_offsets_[0] = 0;\n    for (size_t i = 1; i < n_nodes+1; ++i) {\n      blocks_offsets_[i] = blocks_offsets_[i-1] + funcNTask(i-1);\n    }\n\n    if (n_tasks > max_n_tasks_) {\n      mem_blocks_.resize(n_tasks);\n      max_n_tasks_ = n_tasks;\n    }\n  }\n\n  // split row indexes (rid_span) to 2 parts (left_part, right_part) depending\n  // on comparison of indexes values (idx_span) and split point (split_cond)\n  // Handle dense columns\n  // Analog of std::stable_partition, but in no-inplace manner\n  template <bool default_left, bool any_missing, typename ColumnType, typename Predicate>\n  std::pair<size_t, size_t> PartitionKernel(ColumnType* p_column,\n                                            common::Span<bst_idx_t const> row_indices,\n                                            common::Span<bst_idx_t> left_part,\n                                            common::Span<bst_idx_t> right_part,\n                                            bst_idx_t base_rowid, Predicate&& pred) {\n    auto& column = *p_column;\n    bst_idx_t* p_left_part = left_part.data();\n    bst_idx_t* p_right_part = right_part.data();\n    bst_idx_t nleft_elems = 0;\n    bst_idx_t nright_elems = 0;\n\n    auto p_row_indices = row_indices.data();\n    auto n_samples = row_indices.size();\n\n    for (size_t i = 0; i < n_samples; ++i) {\n      auto rid = p_row_indices[i];\n      bst_bin_t const bin_id = column[rid - base_rowid];\n      if (any_missing && bin_id == ColumnType::kMissingId) {\n        if (default_left) {\n          p_left_part[nleft_elems++] = rid;\n        } else {\n          p_right_part[nright_elems++] = rid;\n        }\n      } else {\n        if (pred(rid, bin_id)) {\n          p_left_part[nleft_elems++] = rid;\n        } else {\n          p_right_part[nright_elems++] = rid;\n        }\n      }\n    }\n\n    return {nleft_elems, nright_elems};\n  }\n\n  template <typename Pred>\n  inline std::pair<size_t, size_t> PartitionRangeKernel(common::Span<const bst_idx_t> ridx,\n                                                        common::Span<bst_idx_t> left_part,\n                                                        common::Span<bst_idx_t> right_part,\n                                                        Pred pred) {\n    bst_idx_t* p_left_part = left_part.data();\n    bst_idx_t* p_right_part = right_part.data();\n    bst_idx_t nleft_elems = 0;\n    bst_idx_t nright_elems = 0;\n    for (auto row_id : ridx) {\n      if (pred(row_id)) {\n        p_left_part[nleft_elems++] = row_id;\n      } else {\n        p_right_part[nright_elems++] = row_id;\n      }\n    }\n    return {nleft_elems, nright_elems};\n  }\n\n  template <typename BinIdxType, bool any_missing, bool any_cat, typename ExpandEntry,\n            typename TreeView>\n  void Partition(const size_t node_in_set, std::vector<ExpandEntry> const& nodes,\n                 const common::Range1d range, const bst_bin_t split_cond,\n                 GHistIndexMatrix const& gmat, const common::ColumnMatrix& column_matrix,\n                 TreeView const& tree, bst_idx_t const* rid) {\n    common::Span<bst_idx_t const> rid_span{rid + range.begin(), rid + range.end()};\n    common::Span<bst_idx_t> left = GetLeftBuffer(node_in_set, range.begin(), range.end());\n    common::Span<bst_idx_t> right = GetRightBuffer(node_in_set, range.begin(), range.end());\n    std::size_t nid = nodes[node_in_set].nid;\n    bst_feature_t fid = tree.SplitIndex(nid);\n    bool default_left = tree.DefaultLeft(nid);\n    bool is_cat = tree.SplitType(nid) == FeatureType::kCategorical;\n    auto node_cats = tree.NodeCats(nid);\n    auto const& cut_values = gmat.cut.Values();\n\n    auto pred_hist = [&](auto ridx, auto bin_id) {\n      if (any_cat && is_cat) {\n        auto gidx = gmat.GetGindex(ridx, fid);\n        bool go_left = default_left;\n        if (gidx > -1) {\n          go_left = Decision(node_cats, cut_values[gidx]);\n        }\n        return go_left;\n      } else {\n        return bin_id <= split_cond;\n      }\n    };\n\n    auto pred_approx = [&](auto ridx) {\n      auto gidx = gmat.GetGindex(ridx, fid);\n      bool go_left = default_left;\n      if (gidx > -1) {\n        if (is_cat) {\n          go_left = Decision(node_cats, cut_values[gidx]);\n        } else {\n          go_left = cut_values[gidx] <= nodes[node_in_set].split.split_value;\n        }\n      }\n      return go_left;\n    };\n\n    std::pair<size_t, size_t> child_nodes_sizes;\n    if (!column_matrix.IsInitialized()) {\n      child_nodes_sizes = PartitionRangeKernel(rid_span, left, right, pred_approx);\n    } else {\n      if (column_matrix.GetColumnType(fid) == xgboost::common::kDenseColumn) {\n        auto column = column_matrix.DenseColumn<BinIdxType, any_missing>(fid);\n        if (default_left) {\n          child_nodes_sizes = PartitionKernel<true, any_missing>(&column, rid_span, left, right,\n                                                                 gmat.base_rowid, pred_hist);\n        } else {\n          child_nodes_sizes = PartitionKernel<false, any_missing>(&column, rid_span, left, right,\n                                                                  gmat.base_rowid, pred_hist);\n        }\n      } else {\n        CHECK_EQ(any_missing, true);\n        auto column =\n            column_matrix.SparseColumn<BinIdxType>(fid, rid_span.front() - gmat.base_rowid);\n        if (default_left) {\n          child_nodes_sizes = PartitionKernel<true, any_missing>(&column, rid_span, left, right,\n                                                                 gmat.base_rowid, pred_hist);\n        } else {\n          child_nodes_sizes = PartitionKernel<false, any_missing>(&column, rid_span, left, right,\n                                                                  gmat.base_rowid, pred_hist);\n        }\n      }\n    }\n\n    const size_t n_left  = child_nodes_sizes.first;\n    const size_t n_right = child_nodes_sizes.second;\n\n    SetNLeftElems(node_in_set, range.begin(), n_left);\n    SetNRightElems(node_in_set, range.begin(), n_right);\n  }\n\n  template <bool any_missing, typename ColumnType, typename Predicate>\n  void MaskKernel(ColumnType* p_column, common::Span<bst_idx_t const> row_indices,\n                  bst_idx_t base_rowid, BitVector* decision_bits, BitVector* missing_bits,\n                  Predicate&& pred) {\n    auto& column = *p_column;\n    for (auto const row_id : row_indices) {\n      auto const bin_id = column[row_id - base_rowid];\n      if (any_missing && bin_id == ColumnType::kMissingId) {\n        missing_bits->Set(row_id - base_rowid);\n      } else if (pred(row_id, bin_id)) {\n        decision_bits->Set(row_id - base_rowid);\n      }\n    }\n  }\n\n  /**\n   * @brief When data is split by column, we don't have all the features locally on the current\n   * worker, so we go through all the rows and mark the bit vectors on whether the decision is made\n   * to go right, or if the feature value used for the split is missing.\n   */\n  template <typename BinIdxType, bool any_missing, bool any_cat, typename ExpandEntry,\n            typename TreeView>\n  void MaskRows(const size_t node_in_set, std::vector<ExpandEntry> const& nodes,\n                const common::Range1d range, bst_bin_t split_cond, GHistIndexMatrix const& gmat,\n                const common::ColumnMatrix& column_matrix, TreeView const& tree,\n                bst_idx_t const* rid, BitVector* decision_bits, BitVector* missing_bits) {\n    common::Span<bst_idx_t const> rid_span{rid + range.begin(), rid + range.end()};\n    std::size_t nid = nodes[node_in_set].nid;\n    bst_feature_t fid = tree.SplitIndex(nid);\n    bool is_cat = tree.SplitType(nid) == FeatureType::kCategorical;\n    auto node_cats = tree.NodeCats(nid);\n    auto const& cut_values = gmat.cut.Values();\n\n    if (!column_matrix.IsInitialized()) {\n      for (auto row_id : rid_span) {\n        auto gidx = gmat.GetGindex(row_id, fid);\n        if (gidx > -1) {\n          bool go_left;\n          if (is_cat) {\n            go_left = Decision(node_cats, cut_values[gidx]);\n          } else {\n            go_left = cut_values[gidx] <= nodes[node_in_set].split.split_value;\n          }\n          if (go_left) {\n            decision_bits->Set(row_id - gmat.base_rowid);\n          }\n        } else {\n          missing_bits->Set(row_id - gmat.base_rowid);\n        }\n      }\n    } else {\n      auto pred_hist = [&](auto ridx, auto bin_id) {\n        if (any_cat && is_cat) {\n          auto gidx = gmat.GetGindex(ridx, fid);\n          CHECK_GT(gidx, -1);\n          return Decision(node_cats, cut_values[gidx]);\n        } else {\n          return bin_id <= split_cond;\n        }\n      };\n\n      if (column_matrix.GetColumnType(fid) == xgboost::common::kDenseColumn) {\n        auto column = column_matrix.DenseColumn<BinIdxType, any_missing>(fid);\n        MaskKernel<any_missing>(&column, rid_span, gmat.base_rowid, decision_bits, missing_bits,\n                                pred_hist);\n      } else {\n        CHECK_EQ(any_missing, true);\n        auto column =\n            column_matrix.SparseColumn<BinIdxType>(fid, rid_span.front() - gmat.base_rowid);\n        MaskKernel<any_missing>(&column, rid_span, gmat.base_rowid, decision_bits, missing_bits,\n                                pred_hist);\n      }\n    }\n  }\n\n  /**\n   * @brief Once we've aggregated the decision and missing bits from all the workers, we can then\n   * use them to partition the rows accordingly.\n   */\n  template <typename ExpandEntry, typename TreeView>\n  void PartitionByMask(const size_t node_in_set, std::vector<ExpandEntry> const& nodes,\n                       const common::Range1d range, GHistIndexMatrix const& gmat,\n                       TreeView const& tree, bst_idx_t const* rid, BitVector const& decision_bits,\n                       BitVector const& missing_bits) {\n    common::Span<bst_idx_t const> rid_span(rid + range.begin(), rid + range.end());\n    common::Span<bst_idx_t> left = GetLeftBuffer(node_in_set, range.begin(), range.end());\n    common::Span<bst_idx_t> right = GetRightBuffer(node_in_set, range.begin(), range.end());\n    std::size_t nid = nodes[node_in_set].nid;\n    bool default_left = tree.DefaultLeft(nid);\n\n    auto pred = [&](auto ridx) {\n      bool go_left = default_left;\n      bool is_missing = missing_bits.Check(ridx - gmat.base_rowid);\n      if (!is_missing) {\n        go_left = decision_bits.Check(ridx - gmat.base_rowid);\n      }\n      return go_left;\n    };\n\n    std::pair<size_t, size_t> child_nodes_sizes;\n    child_nodes_sizes = PartitionRangeKernel(rid_span, left, right, pred);\n\n    const size_t n_left  = child_nodes_sizes.first;\n    const size_t n_right = child_nodes_sizes.second;\n\n    SetNLeftElems(node_in_set, range.begin(), n_left);\n    SetNRightElems(node_in_set, range.begin(), n_right);\n  }\n\n  // allocate thread local memory, should be called for each specific task\n  void AllocateForTask(size_t id) {\n    if (mem_blocks_[id].get() == nullptr) {\n      BlockInfo* local_block_ptr = new BlockInfo;\n      CHECK_NE(local_block_ptr, (BlockInfo*)nullptr);\n      mem_blocks_[id].reset(local_block_ptr);\n    }\n  }\n\n  common::Span<bst_idx_t> GetLeftBuffer(int nid, size_t begin, size_t end) {\n    const size_t task_idx = GetTaskIdx(nid, begin);\n    return { mem_blocks_.at(task_idx)->Left(), end - begin };\n  }\n\n  common::Span<bst_idx_t> GetRightBuffer(int nid, size_t begin, size_t end) {\n    const size_t task_idx = GetTaskIdx(nid, begin);\n    return { mem_blocks_.at(task_idx)->Right(), end - begin };\n  }\n\n  void SetNLeftElems(int nid, size_t begin, size_t n_left) {\n    size_t task_idx = GetTaskIdx(nid, begin);\n    mem_blocks_.at(task_idx)->n_left = n_left;\n  }\n\n  void SetNRightElems(int nid, size_t begin, size_t n_right) {\n    size_t task_idx = GetTaskIdx(nid, begin);\n    mem_blocks_.at(task_idx)->n_right = n_right;\n  }\n\n\n  [[nodiscard]] std::size_t GetNLeftElems(int nid) const {\n    return left_right_nodes_sizes_[nid].first;\n  }\n\n  [[nodiscard]] std::size_t GetNRightElems(int nid) const {\n    return left_right_nodes_sizes_[nid].second;\n  }\n\n  // Each thread has partial results for some set of tree-nodes\n  // The function decides order of merging partial results into final row set\n  void CalculateRowOffsets() {\n    for (size_t i = 0; i < blocks_offsets_.size()-1; ++i) {\n      size_t n_left = 0;\n      for (size_t j = blocks_offsets_[i]; j < blocks_offsets_[i+1]; ++j) {\n        mem_blocks_[j]->n_offset_left = n_left;\n        n_left += mem_blocks_[j]->n_left;\n      }\n      size_t n_right = 0;\n      for (size_t j = blocks_offsets_[i]; j < blocks_offsets_[i + 1]; ++j) {\n        mem_blocks_[j]->n_offset_right = n_left + n_right;\n        n_right += mem_blocks_[j]->n_right;\n      }\n      left_right_nodes_sizes_[i] = {n_left, n_right};\n    }\n  }\n\n  void MergeToArray(bst_node_t nid, size_t begin, bst_idx_t* rows_indexes) {\n    size_t task_idx = GetTaskIdx(nid, begin);\n\n    bst_idx_t* left_result = rows_indexes + mem_blocks_[task_idx]->n_offset_left;\n    bst_idx_t* right_result = rows_indexes + mem_blocks_[task_idx]->n_offset_right;\n\n    bst_idx_t const* left = mem_blocks_[task_idx]->Left();\n    bst_idx_t const* right = mem_blocks_[task_idx]->Right();\n\n    std::copy_n(left, mem_blocks_[task_idx]->n_left, left_result);\n    std::copy_n(right, mem_blocks_[task_idx]->n_right, right_result);\n  }\n\n  size_t GetTaskIdx(int nid, size_t begin) {\n    return blocks_offsets_[nid] + begin / BlockSize;\n  }\n\n  // Copy row partitions into global cache for reuse in objective\n  template <typename Invalidp, typename TreeView>\n  void LeafPartition(Context const* ctx, TreeView const& tree, RowSetCollection const& row_set,\n                     Span<bst_node_t> position, Invalidp invalidp) const {\n    auto p_begin = row_set.Data()->data();\n    // For each node, walk through all the samples that fall in this node.\n    auto p_pos = position.data();\n    ParallelFor(row_set.Size(), ctx->Threads(), [&](auto i) {\n      auto const& node = row_set[i];\n      if (node.node_id < 0) {\n        return;\n      }\n      CHECK(tree.IsLeaf(node.node_id));\n      if (node.begin()) {  // guard for empty node.\n        std::size_t ptr_offset = node.end() - p_begin;\n        CHECK_LE(ptr_offset, row_set.Data()->size()) << node.node_id;\n        for (auto idx = node.begin(); idx != node.end(); ++idx) {\n          p_pos[*idx] = tree::SamplePosition::Encode(node.node_id, !invalidp(*idx));\n        }\n      }\n    });\n  }\n\n protected:\n  struct BlockInfo{\n    size_t n_left;\n    size_t n_right;\n\n    size_t n_offset_left;\n    size_t n_offset_right;\n\n    bst_idx_t* Left() {\n      return &left_data_[0];\n    }\n\n    bst_idx_t* Right() {\n      return &right_data_[0];\n    }\n   private:\n    bst_idx_t left_data_[BlockSize];\n    bst_idx_t right_data_[BlockSize];\n  };\n  std::vector<std::pair<size_t, size_t>> left_right_nodes_sizes_;\n  std::vector<size_t> blocks_offsets_;\n  std::vector<std::shared_ptr<BlockInfo>> mem_blocks_;\n  size_t max_n_tasks_ = 0;\n};\n}  // namespace xgboost::common\n#endif  // XGBOOST_COMMON_PARTITION_BUILDER_H_\n"
  },
  {
    "path": "src/common/probability_distribution.h",
    "content": "/*!\n * Copyright 2019-2020 by Contributors\n * \\file probability_distribution.h\n * \\brief Implementation of a few useful probability distributions\n * \\author Avinash Barnwal and Hyunsu Cho\n */\n\n#ifndef XGBOOST_COMMON_PROBABILITY_DISTRIBUTION_H_\n#define XGBOOST_COMMON_PROBABILITY_DISTRIBUTION_H_\n\n#include <cmath>\n\nnamespace xgboost {\nnamespace common {\n\n#ifndef __CUDACC__\n\nusing std::exp;\nusing std::sqrt;\nusing std::isinf;\nusing std::isnan;\n\n#endif  // __CUDACC__\n\n/*! \\brief Constant PI */\nconstexpr double kPI = 3.14159265358979323846;\n/*! \\brief The Euler-Mascheroni_constant */\nconstexpr double kEulerMascheroni = 0.57721566490153286060651209008240243104215933593992;\n\n/*! \\brief Enum encoding possible choices of probability distribution */\nenum class ProbabilityDistributionType : int {\n  kNormal = 0, kLogistic = 1, kExtreme = 2\n};\n\nstruct NormalDistribution {\n  XGBOOST_DEVICE static double PDF(double z) {\n    return exp(-z * z / 2.0) / sqrt(2.0 * kPI);\n  }\n\n  XGBOOST_DEVICE static double CDF(double z) {\n    return 0.5 * (1 + erf(z / sqrt(2.0)));\n  }\n\n  XGBOOST_DEVICE static double GradPDF(double z) {\n    return -z * PDF(z);\n  }\n\n  XGBOOST_DEVICE static double HessPDF(double z) {\n    return (z * z - 1.0) * PDF(z);\n  }\n\n  XGBOOST_DEVICE static ProbabilityDistributionType Type() {\n    return ProbabilityDistributionType::kNormal;\n  }\n};\n\nstruct LogisticDistribution {\n  XGBOOST_DEVICE static double PDF(double z) {\n    const double w = exp(z);\n    const double sqrt_denominator = 1 + w;\n    if (isinf(w) || isinf(w * w)) {\n      return 0.0;\n    } else {\n      return w / (sqrt_denominator * sqrt_denominator);\n    }\n  }\n\n  XGBOOST_DEVICE static double CDF(double z) {\n    const double w = exp(z);\n    return isinf(w) ? 1.0 : (w / (1 + w));\n  }\n\n  XGBOOST_DEVICE static double GradPDF(double z) {\n    const double w = exp(z);\n    return isinf(w) ? 0.0 : (PDF(z) * (1 - w) / (1 + w));\n  }\n\n  XGBOOST_DEVICE static double HessPDF(double z) {\n    const double w = exp(z);\n    if (isinf(w) || isinf(w * w)) {\n      return 0.0;\n    } else {\n      return PDF(z) * (w * w - 4 * w + 1) / ((1 + w) * (1 + w));\n    }\n  }\n\n  XGBOOST_DEVICE static ProbabilityDistributionType Type() {\n    return ProbabilityDistributionType::kLogistic;\n  }\n};\n\nstruct ExtremeDistribution {\n  XGBOOST_DEVICE static double PDF(double z) {\n    const double w = exp(z);\n    return isinf(w) ? 0.0 : (w * exp(-w));\n  }\n\n  XGBOOST_DEVICE static double CDF(double z) {\n    const double w = exp(z);\n    return 1 - exp(-w);\n  }\n\n  XGBOOST_DEVICE static double GradPDF(double z) {\n    const double w = exp(z);\n    return isinf(w) ? 0.0 : ((1 - w) * PDF(z));\n  }\n\n  XGBOOST_DEVICE static double HessPDF(double z) {\n    const double w = exp(z);\n    if (isinf(w) || isinf(w * w)) {\n      return 0.0;\n    } else {\n      return (w * w - 3 * w + 1) * PDF(z);\n    }\n  }\n\n  XGBOOST_DEVICE static ProbabilityDistributionType Type() {\n    return ProbabilityDistributionType::kExtreme;\n  }\n};\n\n}  // namespace common\n}  // namespace xgboost\n\n#endif  // XGBOOST_COMMON_PROBABILITY_DISTRIBUTION_H_\n"
  },
  {
    "path": "src/common/pseudo_huber.cc",
    "content": "/*!\n * Copyright 2022, by XGBoost Contributors\n */\n#include \"pseudo_huber.h\"\nnamespace xgboost {\nDMLC_REGISTER_PARAMETER(PseudoHuberParam);\n}\n"
  },
  {
    "path": "src/common/pseudo_huber.h",
    "content": "#ifndef XGBOOST_COMMON_PSEUDO_HUBER_H_\n#define XGBOOST_COMMON_PSEUDO_HUBER_H_\n/*!\n * Copyright 2022, by XGBoost Contributors\n */\n#include \"xgboost/parameter.h\"\n\nnamespace xgboost {\nstruct PseudoHuberParam : public XGBoostParameter<PseudoHuberParam> {\n  float huber_slope{1.0};\n\n  DMLC_DECLARE_PARAMETER(PseudoHuberParam) {\n    DMLC_DECLARE_FIELD(huber_slope)\n        .set_default(1.0f)\n        .describe(\"The delta term in Pseudo-Huber loss.\");\n  }\n};\n}  // namespace xgboost\n#endif  // XGBOOST_COMMON_PSEUDO_HUBER_H_\n"
  },
  {
    "path": "src/common/quantile.cc",
    "content": "/**\n * Copyright 2020-2025, XGBoost Contributors\n */\n#include \"quantile.h\"\n\n#include <cstddef>  // for byte\n#include <cstdint>  // for uint64_t\n#include <iterator>\n#include <limits>\n#include <type_traits>  // for is_trivially_copyable_v\n#include <utility>\n\n#include \"../collective/aggregator.h\"\n#include \"../common/error_msg.h\"  // for InvalidMaxBin\n#include \"../data/adapter.h\"\n#include \"categorical.h\"\n#include \"hist_util.h\"\n\nnamespace xgboost::common {\nHostSketchContainer::HostSketchContainer(Context const *ctx, bst_bin_t max_bin,\n                                         Span<FeatureType const> feature_types,\n                                         std::vector<bst_idx_t> columns_size, bool use_group)\n    : feature_types_(feature_types.cbegin(), feature_types.cend()),\n      columns_size_{std::move(columns_size)},\n      max_bins_{max_bin},\n      use_group_ind_{use_group},\n      n_threads_{ctx->Threads()} {\n  monitor_.Init(__func__);\n  CHECK_GE(max_bin, 2) << error::InvalidMaxBin();\n  CHECK_NE(columns_size_.size(), 0);\n  sketches_.resize(columns_size_.size());\n  CHECK_GE(n_threads_, 1);\n  categories_.resize(columns_size_.size());\n  has_categorical_ = std::any_of(feature_types_.cbegin(), feature_types_.cend(), IsCatOp{});\n  ParallelFor(sketches_.size(), n_threads_, Sched::Auto(), [&](auto i) {\n    auto n_bins = std::min(static_cast<bst_idx_t>(max_bins_), columns_size_[i]);\n    n_bins = std::max(n_bins, static_cast<decltype(n_bins)>(1));\n    auto eps = 1.0 / (static_cast<float>(n_bins) * WQSketch::kFactor);\n    if (!IsCat(this->feature_types_, i)) {\n      sketches_[i] = WQSketch{columns_size_[i], eps};\n    }\n  });\n}\n\nnamespace {\n// Function to merge hessian and sample weights\nstd::vector<float> MergeWeights(MetaInfo const &info, Span<float const> hessian, bool use_group,\n                                int32_t n_threads) {\n  CHECK_EQ(hessian.size(), info.num_row_);\n  std::vector<float> results(hessian.size());\n  auto const &group_ptr = info.group_ptr_;\n  auto const &weights = info.weights_.HostVector();\n  auto get_weight = [&](size_t i) {\n    return weights.empty() ? 1.0f : weights[i];\n  };\n  if (use_group) {\n    CHECK_GE(group_ptr.size(), 2);\n    CHECK_EQ(group_ptr.back(), hessian.size());\n    size_t cur_group = 0;\n    for (size_t i = 0; i < hessian.size(); ++i) {\n      while (cur_group + 1 < group_ptr.size() && i >= group_ptr[cur_group + 1]) {\n        ++cur_group;\n      }\n      results[i] = hessian[i] * get_weight(cur_group);\n    }\n  } else {\n    ParallelFor(hessian.size(), n_threads, Sched::Auto(),\n                [&](auto i) { results[i] = hessian[i] * get_weight(i); });\n  }\n  return results;\n}\n\ntemplate <typename T>\nvoid WritePODAt(std::vector<std::byte> *out, std::size_t offset, T value) {\n  static_assert(std::is_trivially_copyable_v<T>);\n  auto const *src = reinterpret_cast<std::byte const *>(&value);\n  std::copy_n(src, sizeof(T), out->begin() + static_cast<std::ptrdiff_t>(offset));\n}\n\ntemplate <typename T>\n[[nodiscard]] T ReadPOD(Span<std::byte const> bytes, std::size_t *cursor) {\n  static_assert(std::is_trivially_copyable_v<T>);\n  T value{};\n  CHECK_LE(*cursor, bytes.size());\n  CHECK_LE(sizeof(T), bytes.size() - *cursor);\n  auto *dst = reinterpret_cast<std::byte *>(&value);\n  std::copy_n(bytes.data() + *cursor, sizeof(T), dst);\n  *cursor += sizeof(T);\n  return value;\n}\n\n// Serialization payload for distributed numerical sketch merging over AllreduceV.\n// Encodes per-feature entry counts plus contiguous sketch entries.\nstruct SketchReducePayload {\n  [[nodiscard]] static std::vector<std::byte> SerializeFromSummaries(\n      Span<bst_feature_t const> numeric_features,\n      std::vector<WQuantileSketch::SummaryContainer> const &reduced) {\n    std::size_t total_entries = 0;\n    for (auto fidx : numeric_features) {\n      total_entries += reduced.at(fidx).Size();\n    }\n\n    std::vector<std::byte> bytes;\n    InitHeader(&bytes, numeric_features.size(), total_entries);\n\n    for (std::size_t i = 0; i < numeric_features.size(); ++i) {\n      auto fidx = numeric_features[i];\n      auto out_entries = reduced.at(fidx).Entries();\n      AppendEntries(&bytes, i, out_entries);\n    }\n    auto header_bytes = HeaderBytes(numeric_features.size());\n    CHECK_EQ((bytes.size() - header_bytes) / sizeof(WQuantileSketch::Entry), total_entries);\n    return bytes;\n  }\n\n  [[nodiscard]] static std::size_t HeaderBytes(std::size_t n_features) {\n    return sizeof(std::uint64_t) + n_features * sizeof(std::uint64_t);\n  }\n\n  static void AppendEntries(std::vector<std::byte> *bytes, std::size_t i,\n                            Span<WQuantileSketch::Entry const> entries) {\n    CHECK(bytes);\n    auto count_offset = sizeof(std::uint64_t) + i * sizeof(std::uint64_t);\n    CHECK_LE(count_offset + sizeof(std::uint64_t), bytes->size());\n    WritePODAt<std::uint64_t>(bytes, count_offset, static_cast<std::uint64_t>(entries.size()));\n    if (entries.empty()) {\n      return;\n    }\n    auto entries_bytes = entries.size() * sizeof(WQuantileSketch::Entry);\n    auto const *src = reinterpret_cast<std::byte const *>(entries.data());\n    bytes->insert(bytes->end(), src, src + entries_bytes);\n  }\n\n  static void InitHeader(std::vector<std::byte> *bytes, std::size_t n_features,\n                         std::size_t max_entries) {\n    CHECK(bytes);\n    auto const header_bytes = HeaderBytes(n_features);\n    bytes->clear();\n    bytes->reserve(header_bytes + max_entries * sizeof(WQuantileSketch::Entry));\n    bytes->resize(header_bytes);\n    WritePODAt<std::uint64_t>(bytes, 0, static_cast<std::uint64_t>(n_features));\n  }\n\n  [[nodiscard]] static SketchReducePayload Parse(Span<std::byte> bytes) {\n    std::size_t cursor = 0;\n    auto n_features = ReadPOD<std::uint64_t>(bytes, &cursor);\n\n    std::vector<std::size_t> offsets(n_features + 1, 0);\n    for (std::size_t i = 0; i < n_features; ++i) {\n      auto n_i = static_cast<std::size_t>(ReadPOD<std::uint64_t>(bytes, &cursor));\n      offsets[i + 1] = offsets[i] + n_i;\n    }\n\n    auto n_entries = offsets.back();\n    auto payload_bytes = n_entries * sizeof(WQuantileSketch::Entry);\n    CHECK_EQ(cursor + payload_bytes, bytes.size());\n\n    WQuantileSketch::Entry *entries = nullptr;\n    if (n_entries != 0) {\n      auto ptr = bytes.data() + cursor;\n      auto addr = reinterpret_cast<std::uintptr_t>(ptr);\n      CHECK_EQ(addr % alignof(WQuantileSketch::Entry), 0);\n      entries = reinterpret_cast<WQuantileSketch::Entry *>(ptr);\n    }\n\n    return {std::move(offsets), Span<WQuantileSketch::Entry>{entries, n_entries}};\n  }\n\n  [[nodiscard]] std::size_t NumFeatures() const { return offsets_.size() - 1; }\n  [[nodiscard]] std::size_t TotalEntries() const { return entries_.size(); }\n\n  [[nodiscard]] Span<WQuantileSketch::Entry> Entries(std::size_t idx) const {\n    auto beg = offsets_.at(idx);\n    auto end = offsets_.at(idx + 1);\n    auto n = end - beg;\n    if (n == 0) {\n      return Span<WQuantileSketch::Entry>{};\n    }\n    return {entries_.data() + beg, n};\n  }\n\n  [[nodiscard]] WQuantileSketch::Summary SummaryAt(std::size_t idx) const {\n    auto entries = this->Entries(idx);\n    return {entries, entries.size()};\n  }\n\n private:\n  SketchReducePayload(std::vector<std::size_t> offsets, Span<WQuantileSketch::Entry> entries)\n      : offsets_{std::move(offsets)}, entries_{entries} {}\n\n  std::vector<std::size_t> offsets_;\n  Span<WQuantileSketch::Entry> entries_;\n};\n\n// Serialization payload for distributed categorical value union over AllreduceV.\n// Encodes per-feature value counts plus contiguous category values.\nstruct CategoricalReducePayload {\n  [[nodiscard]] static std::vector<std::byte> SerializeFromCategories(\n      Span<bst_feature_t const> categorical_features,\n      std::vector<std::set<float>> const &categories) {\n    std::size_t total_values = 0;\n    for (auto fidx : categorical_features) {\n      total_values += categories.at(fidx).size();\n    }\n\n    std::vector<std::byte> bytes;\n    InitHeader(&bytes, categorical_features.size(), total_values);\n    for (std::size_t i = 0; i < categorical_features.size(); ++i) {\n      auto fidx = categorical_features[i];\n      AppendValues(&bytes, i, categories.at(fidx));\n    }\n\n    auto header_bytes = HeaderBytes(categorical_features.size());\n    CHECK_EQ((bytes.size() - header_bytes) / sizeof(float), total_values);\n    return bytes;\n  }\n\n  [[nodiscard]] static std::size_t HeaderBytes(std::size_t n_features) {\n    return sizeof(std::uint64_t) + n_features * sizeof(std::uint64_t);\n  }\n\n  static void AppendValues(std::vector<std::byte> *bytes, std::size_t i, Span<float const> values) {\n    CHECK(bytes);\n    auto count_offset = sizeof(std::uint64_t) + i * sizeof(std::uint64_t);\n    CHECK_LE(count_offset + sizeof(std::uint64_t), bytes->size());\n    WritePODAt<std::uint64_t>(bytes, count_offset, static_cast<std::uint64_t>(values.size()));\n    if (values.empty()) {\n      return;\n    }\n    auto values_bytes = values.size() * sizeof(float);\n    auto const *src = reinterpret_cast<std::byte const *>(values.data());\n    bytes->insert(bytes->end(), src, src + values_bytes);\n  }\n\n  static void AppendValues(std::vector<std::byte> *bytes, std::size_t i,\n                           std::set<float> const &values) {\n    CHECK(bytes);\n    auto count_offset = sizeof(std::uint64_t) + i * sizeof(std::uint64_t);\n    CHECK_LE(count_offset + sizeof(std::uint64_t), bytes->size());\n    WritePODAt<std::uint64_t>(bytes, count_offset, static_cast<std::uint64_t>(values.size()));\n    if (values.empty()) {\n      return;\n    }\n\n    auto offset = bytes->size();\n    bytes->resize(offset + values.size() * sizeof(float));\n    auto dst = bytes->begin() + static_cast<std::ptrdiff_t>(offset);\n    for (auto value : values) {\n      auto const *src = reinterpret_cast<std::byte const *>(&value);\n      dst = std::copy_n(src, sizeof(float), dst);\n    }\n  }\n\n  static void InitHeader(std::vector<std::byte> *bytes, std::size_t n_features,\n                         std::size_t max_values) {\n    CHECK(bytes);\n    auto const header_bytes = HeaderBytes(n_features);\n    bytes->clear();\n    bytes->reserve(header_bytes + max_values * sizeof(float));\n    bytes->resize(header_bytes);\n    WritePODAt<std::uint64_t>(bytes, 0, static_cast<std::uint64_t>(n_features));\n  }\n\n  [[nodiscard]] static CategoricalReducePayload Parse(Span<std::byte> bytes) {\n    std::size_t cursor = 0;\n    auto n_features = ReadPOD<std::uint64_t>(bytes, &cursor);\n\n    std::vector<std::size_t> offsets(n_features + 1, 0);\n    for (std::size_t i = 0; i < n_features; ++i) {\n      auto n_i = static_cast<std::size_t>(ReadPOD<std::uint64_t>(bytes, &cursor));\n      offsets[i + 1] = offsets[i] + n_i;\n    }\n\n    auto n_values = offsets.back();\n    auto payload_bytes = n_values * sizeof(float);\n    CHECK_EQ(cursor + payload_bytes, bytes.size());\n\n    float const *values = nullptr;\n    if (n_values != 0) {\n      auto ptr = bytes.data() + cursor;\n      auto addr = reinterpret_cast<std::uintptr_t>(ptr);\n      CHECK_EQ(addr % alignof(float), 0);\n      values = reinterpret_cast<float const *>(ptr);\n    }\n\n    return {std::move(offsets), Span<float const>{values, n_values}};\n  }\n\n  [[nodiscard]] std::size_t NumFeatures() const { return offsets_.size() - 1; }\n  [[nodiscard]] std::size_t TotalValues() const { return values_.size(); }\n\n  [[nodiscard]] Span<float const> Values(std::size_t idx) const {\n    auto beg = offsets_.at(idx);\n    auto end = offsets_.at(idx + 1);\n    auto n = end - beg;\n    if (n == 0) {\n      return Span<float const>{};\n    }\n    return {values_.data() + beg, n};\n  }\n\n private:\n  CategoricalReducePayload(std::vector<std::size_t> offsets, Span<float const> values)\n      : offsets_{std::move(offsets)}, values_{values} {}\n\n  std::vector<std::size_t> offsets_;\n  Span<float const> values_;\n};\n}  // anonymous namespace\n\nvoid HostSketchContainer::PushRowPage(SparsePage const &page, MetaInfo const &info,\n                                      Span<float const> hessian) {\n  monitor_.Start(__func__);\n  bst_feature_t n_columns = info.num_col_;\n  auto is_dense = info.num_nonzero_ == info.num_col_ * info.num_row_;\n  CHECK_GE(n_threads_, 1);\n  CHECK_EQ(sketches_.size(), n_columns);\n\n  // glue these conditions using ternary operator to avoid making data copies.\n  auto const &weights =\n      hessian.empty() ? (use_group_ind_ ? detail::UnrollGroupWeights(info)  // use group weight\n                                        : info.weights_.HostVector())       // use sample weight\n                      : MergeWeights(info, hessian, use_group_ind_,\n                                     n_threads_);  // use hessian merged with group/sample weights\n  if (!weights.empty()) {\n    CHECK_EQ(weights.size(), info.num_row_);\n  }\n\n  auto batch = data::SparsePageAdapterBatch{page.GetView()};\n  this->PushRowPageImpl(batch, page.base_rowid, OptionalWeights{weights}, page.data.Size(),\n                        info.num_col_, is_dense, [](auto) { return true; });\n  monitor_.Stop(__func__);\n}\n\ntemplate <typename Batch>\nvoid HostSketchContainer::PushAdapterBatch(Batch const &batch, size_t base_rowid,\n                                           MetaInfo const &info, float missing) {\n  auto const &h_weights =\n      (use_group_ind_ ? detail::UnrollGroupWeights(info) : info.weights_.HostVector());\n  if (!use_group_ind_ && !h_weights.empty()) {\n    CHECK_EQ(h_weights.size(), batch.Size()) << \"Invalid size of sample weight.\";\n  }\n\n  auto is_valid = data::IsValidFunctor{missing};\n  auto weights = OptionalWeights{Span<float const>{h_weights}};\n  // the nnz from info is not reliable as sketching might be the first place to go through\n  // the data.\n  auto is_dense = info.num_nonzero_ == info.num_col_ * info.num_row_;\n  CHECK(!this->columns_size_.empty());\n  this->PushRowPageImpl(batch, base_rowid, weights, info.num_nonzero_, info.num_col_, is_dense,\n                        is_valid);\n}\n\n#define INSTANTIATE(_type)                                          \\\n  template void HostSketchContainer::PushAdapterBatch<data::_type>( \\\n      data::_type const &batch, size_t base_rowid, MetaInfo const &info, float missing);\n\nINSTANTIATE(ArrayAdapterBatch)\nINSTANTIATE(DenseAdapterBatch)\nINSTANTIATE(CSRArrayAdapterBatch)\nINSTANTIATE(CSCArrayAdapterBatch)\nINSTANTIATE(SparsePageAdapterBatch)\nINSTANTIATE(ColumnarAdapterBatch)\nINSTANTIATE(EncColumnarAdapterBatch)\n\n#undef INSTANTIATE\n\nauto HostSketchContainer::AllreduceCategories(Context const *ctx, MetaInfo const &info,\n                                              Span<bst_feature_t const> categorical_features)\n    -> std::vector<std::set<float>> {\n  std::vector<std::set<float>> reduced_categories(categorical_features.size());\n  if (categorical_features.empty()) {\n    return reduced_categories;\n  }\n\n  if (collective::GetWorldSize() == 1 || info.IsColumnSplit()) {\n    for (std::size_t i = 0; i < categorical_features.size(); ++i) {\n      reduced_categories[i] = categories_[categorical_features[i]];\n    }\n    return reduced_categories;\n  }\n\n  auto merged =\n      CategoricalReducePayload::SerializeFromCategories(categorical_features, categories_);\n  std::vector<float> merge_workspace;\n  auto rc = collective::AllreduceV(\n      ctx, &merged,\n      [&](common::Span<std::byte const> a, common::Span<std::byte const> b,\n          std::vector<std::byte> *out) {\n        auto a_payload = CategoricalReducePayload::Parse(\n            Span<std::byte>{const_cast<std::byte *>(a.data()), a.size()});\n        auto b_payload = CategoricalReducePayload::Parse(\n            Span<std::byte>{const_cast<std::byte *>(b.data()), b.size()});\n        CHECK_EQ(a_payload.NumFeatures(), categorical_features.size());\n        CHECK_EQ(b_payload.NumFeatures(), categorical_features.size());\n\n        auto max_values = a_payload.TotalValues() + b_payload.TotalValues();\n        CategoricalReducePayload::InitHeader(out, categorical_features.size(), max_values);\n\n        for (std::size_t i = 0; i < categorical_features.size(); ++i) {\n          auto a_values = a_payload.Values(i);\n          auto b_values = b_payload.Values(i);\n          merge_workspace.clear();\n          merge_workspace.reserve(a_values.size() + b_values.size());\n          std::set_union(a_values.cbegin(), a_values.cend(), b_values.cbegin(), b_values.cend(),\n                         std::back_inserter(merge_workspace));\n          CategoricalReducePayload::AppendValues(out, i, Span<float const>{merge_workspace});\n        }\n      });\n  collective::SafeColl(rc);\n\n  auto reduced_payload = CategoricalReducePayload::Parse(Span<std::byte>{merged});\n  CHECK_EQ(reduced_payload.NumFeatures(), categorical_features.size());\n  for (std::size_t i = 0; i < categorical_features.size(); ++i) {\n    auto values = reduced_payload.Values(i);\n    reduced_categories[i].insert(values.cbegin(), values.cend());\n  }\n  return reduced_categories;\n}\n\nauto HostSketchContainer::AllReduce(Context const *ctx, MetaInfo const &info,\n                                    Span<bst_feature_t const> numeric_features)\n    -> std::vector<WQSketch::SummaryContainer> {\n  monitor_.Start(__func__);\n\n  // Sanity check the number of features across workers before allreduce\n  bst_feature_t n_columns = sketches_.size();\n  auto rc = collective::Allreduce(ctx, &n_columns, collective::Op::kMax);\n  collective::SafeColl(rc);\n  CHECK_EQ(n_columns, sketches_.size()) << \"Number of columns differs across workers\";\n\n  std::vector<WQSketch::SummaryContainer> reduced(sketches_.size());\n\n  // Cap the per-feature summary size during local and distributed merge.\n  auto const max_cut_target = static_cast<std::size_t>(max_bins_ * WQSketch::kFactor);\n  ParallelFor(numeric_features.size(), n_threads_, [&](size_t idx) {\n    auto fidx = numeric_features[idx];\n    reduced[fidx] = sketches_[fidx].GetSummary(max_cut_target);\n  });\n\n  // Early exit: no allreduce needed when one worker, column-split, or no numeric features.\n  if (collective::GetWorldSize() == 1 || info.IsColumnSplit() || numeric_features.empty()) {\n    monitor_.Stop(__func__);\n    return reduced;\n  }\n\n  // Serialize local sketches to a byte array for allreduce\n  auto merged = SketchReducePayload::SerializeFromSummaries(\n      Span<bst_feature_t const>{numeric_features}, reduced);\n  WQSketch::SummaryContainer tmp;\n  tmp.Reserve(max_cut_target * 2);  // workspace for merging sketches during allreduce\n  auto reduce_rc = collective::AllreduceV(\n      ctx, &merged,\n      [&](common::Span<std::byte const> a, common::Span<std::byte const> b,\n          std::vector<std::byte> *out) {\n        auto a_payload = SketchReducePayload::Parse(\n            Span<std::byte>{const_cast<std::byte *>(a.data()), a.size()});\n        auto b_payload = SketchReducePayload::Parse(\n            Span<std::byte>{const_cast<std::byte *>(b.data()), b.size()});\n        CHECK_EQ(a_payload.NumFeatures(), numeric_features.size());\n        CHECK_EQ(b_payload.NumFeatures(), numeric_features.size());\n\n        auto max_entries = a_payload.TotalEntries() + b_payload.TotalEntries();\n        auto max_pruned_entries = max_cut_target * numeric_features.size();\n        max_entries = std::min(max_entries, max_pruned_entries);\n        SketchReducePayload::InitHeader(out, numeric_features.size(), max_entries);\n\n        for (std::size_t i = 0; i < numeric_features.size(); ++i) {\n          auto a_summary = a_payload.SummaryAt(i);\n          auto b_summary = b_payload.SummaryAt(i);\n          tmp.CopyFrom(a_summary);\n          tmp.SetCombine(b_summary);\n          tmp.SetPrune(max_cut_target);\n\n          auto pruned_entries = tmp.Entries();\n          SketchReducePayload::AppendEntries(out, i, pruned_entries);\n        }\n      });\n  collective::SafeColl(reduce_rc);\n\n  // Deserialize the sketches back to summary containers.\n  auto reduced_payload = SketchReducePayload::Parse(Span<std::byte>{merged});\n  CHECK_EQ(reduced_payload.NumFeatures(), numeric_features.size());\n  for (std::size_t i = 0; i < numeric_features.size(); ++i) {\n    auto fidx = numeric_features[i];\n    auto entries = reduced_payload.Entries(i);\n    auto n_entries = entries.size();\n\n    reduced[fidx].Reserve(n_entries);\n    reduced[fidx].CopyFrom(WQSketch::Summary{entries, n_entries});\n  }\n  monitor_.Stop(__func__);\n  return reduced;\n}\n\nvoid AddCutPoints(WQSummaryContainer const &summary, size_t max_bin, HistogramCuts *cuts) {\n  size_t required_cuts = std::min(summary.Size(), static_cast<size_t>(max_bin));\n  auto &cut_values = cuts->cut_values_.HostVector();\n  auto const entries = summary.Entries();\n  // Use raw pointer in the cut extraction loop to avoid per-access bounds checks.\n  auto const *summary_data = entries.data();\n  // summary[0] is the observed minimum; the first bin lower bound is implicit.\n  for (size_t i = 1; i < required_cuts; ++i) {\n    bst_float cpt = summary_data[i].value;\n    if (i == 1 || cpt > cut_values.back()) {\n      cut_values.push_back(cpt);\n    }\n  }\n  auto const cpt = !entries.empty() ? entries.back().value : 1e-5f;\n  // This must be bigger than the last observed cut value.\n  auto const last = cpt + (std::fabs(cpt) + 1e-5f);\n  cut_values.push_back(last);\n}\n\nvoid AddCategories(std::set<float> const &categories, float *max_cat, HistogramCuts *cuts) {\n  if (std::any_of(categories.cbegin(), categories.cend(), InvalidCat)) {\n    InvalidCategory();\n  }\n  auto &cut_values = cuts->cut_values_.HostVector();\n  // With column-wise data split, the categories may be empty.\n  auto feature_max_cat =\n      categories.empty() ? 0.0f : *std::max_element(categories.cbegin(), categories.cend());\n  CheckMaxCat(feature_max_cat, categories.size());\n  *max_cat = std::max(*max_cat, feature_max_cat);\n  for (bst_cat_t i = 0; i <= AsCat(feature_max_cat); ++i) {\n    cut_values.push_back(i);\n  }\n}\n\nHistogramCuts HostSketchContainer::MakeCuts(Context const *ctx, MetaInfo const &info) {\n  monitor_.Start(__func__);\n  HistogramCuts cuts{static_cast<bst_feature_t>(sketches_.size())};\n  auto *p_cuts = &cuts;\n\n  std::vector<bst_feature_t> numeric_features;\n  std::vector<bst_feature_t> categorical_features;\n  numeric_features.reserve(sketches_.size());\n  categorical_features.reserve(sketches_.size());\n  for (bst_feature_t fidx = 0; fidx < sketches_.size(); ++fidx) {\n    if (IsCat(feature_types_, fidx)) {\n      categorical_features.push_back(fidx);\n    } else {\n      numeric_features.push_back(fidx);\n    }\n  }\n\n  auto reduced_numerical = this->AllReduce(ctx, info, Span<bst_feature_t const>{numeric_features});\n  auto reduced_categories =\n      this->AllreduceCategories(ctx, info, Span<bst_feature_t const>{categorical_features});\n  std::vector<std::size_t> categorical_index(sketches_.size(), 0);\n  for (std::size_t i = 0; i < categorical_features.size(); ++i) {\n    categorical_index[categorical_features[i]] = i;\n  }\n\n  auto &h_cut_ptrs = p_cuts->cut_ptrs_.HostVector();\n  // Prune size down to max_bins + 1 (reserve one extra for the max value)\n  // before extracting cut points.\n  ParallelFor(numeric_features.size(), n_threads_, Sched::Guided(), [&](size_t idx) {\n    auto fidx = numeric_features[idx];\n    reduced_numerical.at(fidx).SetPrune(max_bins_ + 1);  // reserve one extra for the max value\n  });\n\n  float max_cat{-1.f};\n  for (size_t fid = 0; fid < reduced_numerical.size(); ++fid) {\n    size_t max_num_bins = std::min(reduced_numerical[fid].Size(), static_cast<size_t>(max_bins_));\n    if (IsCat(feature_types_, fid)) {\n      AddCategories(reduced_categories[categorical_index[fid]], &max_cat, p_cuts);\n    } else {\n      AddCutPoints(reduced_numerical[fid], max_num_bins, p_cuts);\n    }\n\n    // Ensure that every feature gets at least one quantile point\n    CHECK_LE(p_cuts->cut_values_.HostVector().size(), std::numeric_limits<uint32_t>::max());\n    auto cut_size = static_cast<uint32_t>(p_cuts->cut_values_.HostVector().size());\n    CHECK_GT(cut_size, h_cut_ptrs[fid]);\n    h_cut_ptrs[fid + 1] = cut_size;\n  }\n\n  p_cuts->SetCategorical(this->has_categorical_, max_cat);\n  monitor_.Stop(__func__);\n  return cuts;\n}\n\nvoid HostSketchContainer::PushColPage(SparsePage const &page, MetaInfo const &info,\n                                      Span<float const> hessian) {\n  monitor_.Start(__func__);\n  // glue these conditions using ternary operator to avoid making data copies.\n  auto const &weights =\n      hessian.empty() ? (use_group_ind_ ? detail::UnrollGroupWeights(info)  // use group weight\n                                        : info.weights_.HostVector())       // use sample weight\n                      : MergeWeights(info, hessian, use_group_ind_,\n                                     n_threads_);  // use hessian merged with group/sample weights\n  CHECK_EQ(weights.size(), info.num_row_);\n\n  auto view = page.GetView();\n  ParallelFor(view.Size(), n_threads_, [&](size_t fidx) {\n    auto column = view[fidx];\n    if (IsCat(feature_types_, fidx)) {\n      for (auto c : column) {\n        categories_[fidx].emplace(c.fvalue);\n      }\n      return;\n    }\n    sketches_[fidx].PushSorted(column, weights, static_cast<size_t>(max_bins_));\n  });\n  monitor_.Stop(__func__);\n}\n\n}  // namespace xgboost::common\n"
  },
  {
    "path": "src/common/quantile.cu",
    "content": "/**\n * Copyright 2020-2026, XGBoost Contributors\n */\n#include <thrust/binary_search.h>\n#include <thrust/execution_policy.h>\n#include <thrust/tuple.h>  // for make_tuple\n#include <thrust/unique.h>\n\n#include <cstdint>      // for uintptr_t\n#include <limits>       // for numeric_limits\n#include <numeric>      // for partial_sum\n#include <type_traits>  // for is_same_v\n#include <utility>\n\n#include \"../collective/allgather.h\"\n#include \"../collective/allreduce.h\"\n#include \"../collective/communicator-inl.h\"  // for GetWorldSize, GetRank\n#include \"categorical.h\"\n#include \"common.h\"\n#include \"cuda_context.cuh\"  // for CUDAContext\n#include \"cuda_rt_utils.h\"   // for SetDevice\n#include \"device_helpers.cuh\"\n#include \"hist_util.h\"\n#include \"quantile.cuh\"\n#include \"quantile.h\"\n#include \"transform_iterator.h\"  // MakeIndexTransformIter\n#include \"xgboost/span.h\"\n\nnamespace xgboost::common {\nusing WQSketch = WQuantileSketch;\nusing SketchEntry = WQSketch::Entry;\n\n// Algorithm 4 in XGBoost's paper, using binary search to find i.\ntemplate <typename EntryIter>\n__device__ size_t BinarySearchQueryIndex(EntryIter beg, EntryIter end, float rank) {\n  assert(end - beg >= 2);\n  rank *= 2;\n  auto front = *beg;\n  if (rank < front.rmin + front.rmax) {\n    return 0;\n  }\n  auto back = *(end - 1);\n  if (rank >= back.rmin + back.rmax) {\n    return end - beg - 1;\n  }\n\n  auto search_begin = dh::MakeTransformIterator<float>(\n      beg, [=] __device__(SketchEntry const &entry) { return entry.rmin + entry.rmax; });\n  auto search_end = search_begin + (end - beg);\n  auto i =\n      thrust::upper_bound(thrust::seq, search_begin + 1, search_end - 1, rank) - search_begin - 1;\n  if (rank < (*(beg + i)).RMinNext() + (*(beg + i + 1)).RMaxPrev()) {\n    return i;\n  } else {\n    return i + 1;\n  }\n}\n\ntemplate <typename EntryFromIndex>\n// Select source indices for the pruned summary without materializing output entries.\nvoid SelectPruneIndices(common::Span<SketchContainer::OffsetT const> cuts_ptr,\n                        Span<SketchContainer::OffsetT const> columns_ptr_in,\n                        Span<FeatureType const> feature_types, Span<size_t> selected_idx,\n                        EntryFromIndex entry_from_index, cudaStream_t stream) {\n  dh::LaunchN(selected_idx.size(), stream, [=] __device__(size_t idx) {\n    size_t column_id = dh::SegmentId(cuts_ptr, idx);\n    auto in_begin = columns_ptr_in[column_id];\n    auto in_size = columns_ptr_in[column_id + 1] - columns_ptr_in[column_id];\n    auto to = cuts_ptr[column_id + 1] - cuts_ptr[column_id];\n    idx -= cuts_ptr[column_id];\n\n    auto is_cat = IsCat(feature_types, column_id);\n    if (in_size <= to || is_cat) {\n      selected_idx[cuts_ptr[column_id] + idx] = in_begin + idx;\n      return;\n    }\n    if (idx == 0) {\n      selected_idx[cuts_ptr[column_id]] = in_begin;\n      return;\n    }\n    if (idx == to - 1) {\n      selected_idx[cuts_ptr[column_id] + idx] = in_begin + in_size - 1;\n      return;\n    }\n\n    auto front = entry_from_index(in_begin);\n    auto back = entry_from_index(in_begin + in_size - 1);\n    float w = back.rmin - front.rmax;\n    auto q = ((static_cast<float>(idx) * w) / (static_cast<float>(to) - 1.0f) + front.rmax);\n    auto it = dh::MakeTransformIterator<SketchEntry>(\n        thrust::make_counting_iterator(in_begin),\n        [=] __device__(size_t abs_idx) { return entry_from_index(abs_idx); });\n    selected_idx[cuts_ptr[column_id] + idx] =\n        in_begin + BinarySearchQueryIndex(it, it + in_size, q);\n  });\n}\n\ntemplate <typename EntryFromIndex>\nvoid GatherPruneEntries(Span<size_t const> selected_idx, Span<SketchEntry> out_cuts,\n                        EntryFromIndex entry_from_index, cudaStream_t stream) {\n  dh::LaunchN(selected_idx.size(), stream,\n              [=] __device__(size_t idx) { out_cuts[idx] = entry_from_index(selected_idx[idx]); });\n}\n\ntemplate <typename InEntry, typename ToSketchEntry>\nvoid PruneImpl(common::Span<SketchContainer::OffsetT const> cuts_ptr,\n               Span<InEntry const> sorted_data,\n               Span<size_t const> columns_ptr_in,  // could be ptr for data or cuts\n               Span<FeatureType const> feature_types, Span<SketchEntry> out_cuts,\n               ToSketchEntry to_sketch_entry) {\n  dh::LaunchN(out_cuts.size(), [=] __device__(size_t idx) {\n    size_t column_id = dh::SegmentId(cuts_ptr, idx);\n    auto out_column =\n        out_cuts.subspan(cuts_ptr[column_id], cuts_ptr[column_id + 1] - cuts_ptr[column_id]);\n    auto in_column = sorted_data.subspan(columns_ptr_in[column_id],\n                                         columns_ptr_in[column_id + 1] - columns_ptr_in[column_id]);\n    auto to = cuts_ptr[column_id + 1] - cuts_ptr[column_id];\n    idx -= cuts_ptr[column_id];\n    auto front = to_sketch_entry(0ul, in_column, column_id);\n    auto back = to_sketch_entry(in_column.size() - 1, in_column, column_id);\n\n    auto is_cat = IsCat(feature_types, column_id);\n    if (in_column.size() <= to || is_cat) {\n      // cut idx equals sample idx\n      out_column[idx] = to_sketch_entry(idx, in_column, column_id);\n      return;\n    }\n    // 1 thread for each output.  See A.4 for detail.\n    auto d_out = out_column;\n    if (idx == 0) {\n      d_out.front() = front;\n      return;\n    }\n    if (idx == to - 1) {\n      d_out.back() = back;\n      return;\n    }\n\n    float w = back.rmin - front.rmax;\n    auto budget = static_cast<float>(d_out.size());\n    assert(budget != 0);\n    auto q = ((static_cast<float>(idx) * w) / (static_cast<float>(to) - 1.0f) + front.rmax);\n    auto it = dh::MakeTransformIterator<SketchEntry>(\n        thrust::make_counting_iterator(0ul), [=] __device__(size_t idx) {\n          auto e = to_sketch_entry(idx, in_column, column_id);\n          return e;\n        });\n    d_out[idx] = *(it + BinarySearchQueryIndex(it, it + in_column.size(), q));\n  });\n}\n\ntemplate <typename T, typename U>\nvoid CopyTo(Span<T> out, Span<U> src) {\n  CHECK_EQ(out.size(), src.size());\n  static_assert(std::is_same_v<std::remove_cv_t<T>, std::remove_cv_t<T>>);\n  dh::safe_cuda(cudaMemcpyAsync(out.data(), src.data(), out.size_bytes(), cudaMemcpyDefault));\n}\n\nXGBOOST_DEVICE thrust::tuple<uint64_t, uint64_t> MergePartition(Span<SketchEntry const> x,\n                                                                Span<SketchEntry const> y,\n                                                                uint64_t k) {\n  // Find the merge partition for the k-th output within one column.  The merged prefix of\n  // length k contains i entries from x and j entries from y, where k = i + j.\n  auto m = static_cast<uint64_t>(x.size());\n  auto n = static_cast<uint64_t>(y.size());\n  // Search for i inside the valid merge-partition range.  low/high clamp the partition so\n  // j = k - i always stays within [0, n].\n  auto low = k > n ? k - n : 0ul;\n  auto high = std::min(k, m);\n  auto candidate_it = thrust::make_counting_iterator<uint64_t>(low);\n  auto need_more_x = dh::MakeTransformIterator<bool>(candidate_it, [=] __device__(uint64_t i) {\n    // j is the number of elements taken from y when the partition takes i from x.\n    auto j = k - i;\n    // Move the boundary right while the last candidate from y still sorts ahead of the\n    // next candidate from x.  The first false value is the first valid merge boundary.\n    // j > 0: there is a left-hand candidate in y.\n    // i < m: there is a right-hand candidate in x.\n    return j > 0 && i < m && y[j - 1].value >= x[i].value;\n  });\n  auto partition_it = thrust::lower_bound(thrust::seq, need_more_x, need_more_x + (high - low + 1),\n                                          false, thrust::greater<bool>{});\n  auto a_ind = low + (partition_it - need_more_x);\n  return thrust::make_tuple(a_ind, k - a_ind);\n}\n\nvoid SketchContainer::SetCurrentColumns(Span<OffsetT const> columns_ptr) {\n  CHECK_EQ(columns_ptr.size(), num_columns_ + 1);\n  CHECK_EQ(columns_ptr_tmp_.Size(), num_columns_ + 1);\n  columns_ptr_.Resize(columns_ptr.size());\n  CopyTo(columns_ptr_.DeviceSpan(), columns_ptr);\n}\n\n// Merge d_x and d_y into out.  Because the final output depends on predicate (which\n// summary does the output element come from) result by definition of merged rank.  So we\n// compute the partition for each output directly and customize the standard merge\n// algorithm without storing a merge path buffer.\nvoid MergeImpl(Context const *ctx, Span<SketchEntry const> const &d_x,\n               Span<bst_idx_t const> const &x_ptr, Span<SketchEntry const> const &d_y,\n               Span<bst_idx_t const> const &y_ptr, Span<SketchEntry> d_out,\n               Span<bst_idx_t> out_ptr) {\n  CHECK_EQ(d_x.size() + d_y.size(), d_out.size());\n  CHECK_EQ(x_ptr.size(), out_ptr.size());\n  CHECK_EQ(y_ptr.size(), out_ptr.size());\n\n  dh::LaunchN(out_ptr.size(), ctx->CUDACtx()->Stream(),\n              [=] __device__(size_t i) { out_ptr[i] = x_ptr[i] + y_ptr[i]; });\n\n  auto merge_entry_at = [=] __device__(Span<SketchEntry const> d_x_column,\n                                       Span<SketchEntry const> d_y_column, uint64_t idx) {\n    // Materialize one merged entry for a single column and output position.\n    // Handle empty column. If both columns are empty, we should not get this column as\n    // result of binary search.\n    assert((d_x_column.size() != 0) || (d_y_column.size() != 0));\n    if (d_x_column.size() == 0) {\n      return d_y_column[idx];\n    }\n    if (d_y_column.size() == 0) {\n      return d_x_column[idx];\n    }\n\n    uint64_t a_ind, b_ind;\n    thrust::tie(a_ind, b_ind) = MergePartition(d_x_column, d_y_column, idx);\n\n    assert(b_ind <= d_y_column.size());\n    assert(a_ind <= d_x_column.size());\n\n    // Rank contribution from the opposite summary at the merge boundary.  `ind` is the\n    // insertion point of the current element into the other summary.\n    auto other_rmin = [] __device__(Span<SketchEntry const> d_column, uint64_t ind) {\n      if (ind == 0) {\n        return 0.0f;\n      }\n      if (ind == d_column.size()) {\n        return d_column.back().RMinNext();\n      }\n      return d_column[ind - 1].RMinNext();\n    };  // NOLINT\n    auto other_rmax = [] __device__(Span<SketchEntry const> d_column, uint64_t ind) {\n      if (ind == d_column.size()) {\n        return d_column.back().rmax;\n      }\n      return d_column[ind].RMaxPrev();\n    };  // NOLINT\n    // Apply the merge equations when the output element comes from x or y.\n    auto merge_from_x = [=] __device__(SketchEntry x_elem, uint64_t y_ind) {\n      return SketchEntry{x_elem.rmin + other_rmin(d_y_column, y_ind),\n                         x_elem.rmax + other_rmax(d_y_column, y_ind), x_elem.wmin, x_elem.value};\n    };  // NOLINT\n    auto merge_from_y = [=] __device__(SketchEntry y_elem, uint64_t x_ind) {\n      return SketchEntry{other_rmin(d_x_column, x_ind) + y_elem.rmin,\n                         other_rmax(d_x_column, x_ind) + y_elem.rmax, y_elem.wmin, y_elem.value};\n    };  // NOLINT\n\n    // Once one side is exhausted, all remaining outputs come from the other side with\n    // boundary ranks taken at the end of the exhausted summary.\n    if (a_ind == d_x_column.size()) {\n      return merge_from_y(d_y_column[b_ind], a_ind);\n    }\n    auto x_elem = d_x_column[a_ind];\n    if (b_ind == d_y_column.size()) {\n      return merge_from_x(x_elem, b_ind);\n    }\n    auto y_elem = d_y_column[b_ind];\n\n    /* Merge procedure.  See A.3 merge operation eq (26) ~ (28).  The trick to interpret\n       it is rewriting the symbols on both side of equality.  Take eq (26) as an example:\n       Expand it according to definition of extended rank then rewrite it into:\n\n       If $k_i$ is the $i$ element in output and \\textbf{comes from $D_1$}:\n\n         r_\\bar{D}(k_i) = r_{\\bar{D_1}}(k_i) + w_{\\bar{{D_1}}}(k_i) +\n                                          [r_{\\bar{D_2}}(x_i) + w_{\\bar{D_2}}(x_i)]\n\n       Where $x_i$ is the largest element in $D_2$ that's less than $k_i$.  $k_i$ can be\n       used in $D_1$ as it's since $k_i \\in D_1$.  Other 2 equations can be applied\n       similarly with $k_i$ comes from different $D$.  just use different symbol on\n       different source of summary.\n    */\n    // General merge case: combine equal values, otherwise land the smaller value and add\n    // the rank contribution from the opposite summary at the partition boundary.\n    if (x_elem.value == y_elem.value) {\n      return SketchEntry{x_elem.rmin + y_elem.rmin, x_elem.rmax + y_elem.rmax,\n                         x_elem.wmin + y_elem.wmin, x_elem.value};\n    }\n    if (x_elem.value < y_elem.value) {\n      return merge_from_x(x_elem, b_ind);\n    }\n\n    return merge_from_y(y_elem, a_ind);\n  };  // NOLINT\n\n  dh::LaunchN(d_out.size(), ctx->CUDACtx()->Stream(), [=] __device__(size_t idx) {\n    // Merge one output element after locating its column segment and per-column partition.\n    auto column_id = dh::SegmentId(out_ptr, idx);\n    auto out_begin = out_ptr[column_id];\n    auto out_idx = idx - out_begin;\n\n    auto d_x_column = d_x.subspan(x_ptr[column_id], x_ptr[column_id + 1] - x_ptr[column_id]);\n    auto d_y_column = d_y.subspan(y_ptr[column_id], y_ptr[column_id + 1] - y_ptr[column_id]);\n    d_out[idx] = merge_entry_at(d_x_column, d_y_column, out_idx);\n  });\n}\n\nvoid SketchContainer::Push(Context const *ctx, Span<Entry const> entries, Span<size_t> columns_ptr,\n                           common::Span<OffsetT> cuts_ptr, size_t total_cuts, Span<float> weights) {\n  curt::SetDevice(ctx->Ordinal());\n  auto &current = this->entries_;\n  auto &columns_ptr_out = this->columns_ptr_;\n  Span<SketchEntry> out;\n  dh::device_vector<SketchEntry> cuts;\n  bool first_window = current.empty();\n  if (!first_window) {\n    cuts.resize(total_cuts);\n    out = dh::ToSpan(cuts);\n  } else {\n    current.resize(total_cuts);\n    out = dh::ToSpan(current);\n  }\n  auto ft = this->feature_types_.ConstDeviceSpan();\n  if (weights.empty()) {\n    auto to_sketch_entry = [] __device__(size_t sample_idx, Span<Entry const> const &column,\n                                         size_t) {\n      float rmin = sample_idx;\n      float rmax = sample_idx + 1;\n      return SketchEntry{rmin, rmax, 1, column[sample_idx].fvalue};\n    };  // NOLINT\n    PruneImpl<Entry>(cuts_ptr, entries, columns_ptr, ft, out, to_sketch_entry);\n  } else {\n    auto to_sketch_entry = [weights, columns_ptr] __device__(size_t sample_idx,\n                                                             Span<Entry const> const &column,\n                                                             size_t column_id) {\n      Span<float const> column_weights_scan =\n          weights.subspan(columns_ptr[column_id], column.size());\n      float rmin = sample_idx > 0 ? column_weights_scan[sample_idx - 1] : 0.0f;\n      float rmax = column_weights_scan[sample_idx];\n      float wmin = rmax - rmin;\n      wmin = wmin < 0 ? kRtEps : wmin;  // GPU scan can generate floating error.\n      return SketchEntry{rmin, rmax, wmin, column[sample_idx].fvalue};\n    };  // NOLINT\n    PruneImpl<Entry>(cuts_ptr, entries, columns_ptr, ft, out, to_sketch_entry);\n  }\n  auto n_uniques = this->ScanInput(ctx, out, cuts_ptr);\n\n  if (!first_window) {\n    CHECK_EQ(columns_ptr_out.Size(), cuts_ptr.size());\n    out = out.subspan(0, n_uniques);\n    this->Merge(ctx, cuts_ptr, out);\n  } else {\n    current.resize(n_uniques);\n    columns_ptr_out.SetDevice(ctx->Device());\n    columns_ptr_out.Resize(cuts_ptr.size());\n\n    auto d_cuts_ptr = columns_ptr_out.DeviceSpan();\n    CopyTo(d_cuts_ptr, cuts_ptr);\n  }\n}\n\nsize_t SketchContainer::ScanInput(Context const *ctx, Span<SketchEntry> entries,\n                                  Span<OffsetT> d_columns_ptr_in) {\n  /* There are 2 types of duplication.  First is duplicated feature values, which comes\n   * from user input data.  Second is duplicated sketching entries, which is generated by\n   * pruning or merging. We preserve the first type and remove the second type.\n   */\n  timer_.Start(__func__);\n  curt::SetDevice(ctx->Ordinal());\n  CHECK_EQ(d_columns_ptr_in.size(), num_columns_ + 1);\n\n  auto key_it = dh::MakeTransformIterator<size_t>(\n      thrust::make_reverse_iterator(thrust::make_counting_iterator(entries.size())),\n      [=] __device__(size_t idx) { return dh::SegmentId(d_columns_ptr_in, idx); });\n  // Reverse scan to accumulate weights into first duplicated element on left.\n  auto val_it = thrust::make_reverse_iterator(dh::tend(entries));\n  thrust::inclusive_scan_by_key(ctx->CUDACtx()->CTP(), key_it, key_it + entries.size(), val_it,\n                                val_it, thrust::equal_to<size_t>{},\n                                [] __device__(SketchEntry const &r, SketchEntry const &l) {\n                                  // Only accumulate for the first type of duplication.\n                                  if (l.value - r.value == 0 && l.rmin - r.rmin != 0) {\n                                    auto w = l.wmin + r.wmin;\n                                    SketchEntry v{l.rmin, l.rmin + w, w, l.value};\n                                    return v;\n                                  }\n                                  return l;\n                                });\n\n  auto d_columns_ptr_out = this->columns_ptr_tmp_.DeviceSpan();\n  // thrust unique_by_key preserves the first element.\n  auto n_uniques =\n      dh::SegmentedUnique(ctx->CUDACtx()->CTP(), d_columns_ptr_in.data(),\n                          d_columns_ptr_in.data() + d_columns_ptr_in.size(), entries.data(),\n                          entries.data() + entries.size(), d_columns_ptr_out.data(), entries.data(),\n                          detail::SketchUnique{});\n  CopyTo(d_columns_ptr_in, d_columns_ptr_out);\n\n  timer_.Stop(__func__);\n  return n_uniques;\n}\n\nvoid SketchContainer::Prune(Context const *ctx, std::size_t to) {\n  timer_.Start(__func__);\n  curt::SetDevice(ctx->Ordinal());\n  auto &entries = this->entries_;\n  auto &scratch = this->entries_tmp_;\n  auto &columns_ptr = this->columns_ptr_;\n  auto &columns_ptr_tmp = this->columns_ptr_tmp_;\n  auto const &feature_types = this->feature_types_;\n\n  OffsetT to_total = 0;\n  auto &h_columns_ptr = columns_ptr_tmp.HostVector();\n  h_columns_ptr[0] = to_total;\n  auto const &h_feature_types = feature_types.ConstHostSpan();\n  for (bst_feature_t i = 0; i < num_columns_; ++i) {\n    size_t length = this->Column(i).size();\n    length = std::min(length, to);\n    if (IsCat(h_feature_types, i)) {\n      length = this->Column(i).size();\n    }\n    to_total += length;\n    h_columns_ptr[i + 1] = to_total;\n  }\n  scratch.resize(to_total);\n\n  auto d_columns_ptr_in = columns_ptr.ConstDeviceSpan();\n  auto d_columns_ptr_out = columns_ptr_tmp.ConstDeviceSpan();\n  auto out = dh::ToSpan(scratch);\n  auto in = dh::ToSpan(entries);\n  auto ft = feature_types.ConstDeviceSpan();\n  dh::device_vector<size_t> selected_idx(out.size());\n  auto d_selected_idx = dh::ToSpan(selected_idx);\n  HostDeviceVector<OffsetT> selected_columns_ptr(columns_ptr_tmp.Size());\n  selected_columns_ptr.SetDevice(ctx->Device());\n  auto entry_from_index = [=] __device__(size_t abs_idx) {\n    return in[abs_idx];\n  };  // NOLINT\n  auto stream = ctx->CUDACtx()->Stream();\n  SelectPruneIndices(d_columns_ptr_out, d_columns_ptr_in, ft, d_selected_idx, entry_from_index,\n                     stream);\n  auto n_selected = dh::SegmentedUnique(\n      ctx->CUDACtx()->CTP(), d_columns_ptr_out.data(),\n      d_columns_ptr_out.data() + d_columns_ptr_out.size(), d_selected_idx.data(),\n      d_selected_idx.data() + d_selected_idx.size(), selected_columns_ptr.DeviceSpan().data(),\n      d_selected_idx.data(), thrust::equal_to<size_t>{});\n  GatherPruneEntries(Span<size_t const>{d_selected_idx.data(), n_selected}, out, entry_from_index,\n                     stream);\n  entries.swap(scratch);\n  columns_ptr.Copy(selected_columns_ptr);\n  entries.resize(n_selected);\n  auto d_column_scan = columns_ptr.DeviceSpan();\n  HostDeviceVector<OffsetT> scan_out(d_column_scan.size());\n  scan_out.SetDevice(ctx->Device());\n  auto n_uniques = dh::SegmentedUnique(ctx->CUDACtx()->CTP(), d_column_scan.data(),\n                                       d_column_scan.data() + d_column_scan.size(), out.data(),\n                                       out.data() + n_selected, scan_out.DevicePointer(),\n                                       out.data(), detail::SketchUnique{});\n  columns_ptr.Copy(scan_out);\n  CHECK(!columns_ptr.HostCanRead());\n  entries.resize(n_uniques);\n  timer_.Stop(__func__);\n}\n\nvoid SketchContainer::Merge(Context const *ctx, Span<OffsetT const> d_that_columns_ptr,\n                            Span<SketchEntry const> that) {\n  curt::SetDevice(ctx->Ordinal());\n  auto &entries = this->entries_;\n  auto &scratch = this->entries_tmp_;\n  auto &columns_ptr = this->columns_ptr_;\n  auto &columns_ptr_tmp = this->columns_ptr_tmp_;\n  auto self = dh::ToSpan(entries);\n  LOG(DEBUG) << \"Merge: self:\" << HumanMemUnit(self.size_bytes()) << \". \"\n             << \"That:\" << HumanMemUnit(that.size_bytes()) << \". \"\n             << \"This capacity:\" << HumanMemUnit(this->MemCapacityBytes()) << \".\" << std::endl;\n\n  timer_.Start(__func__);\n  auto normalize_merged = [&] {\n    if (this->HasCategorical()) {\n      // Numerical summaries are normalized during prune.  Categorical features can still\n      // produce repeated category values, so compact those here before exposing the sketch.\n      auto d_feature_types = this->FeatureTypes().ConstDeviceSpan();\n      auto d_column_scan = columns_ptr.DeviceSpan();\n      auto merged_entries = dh::ToSpan(entries);\n      HostDeviceVector<OffsetT> scan_out(d_column_scan.size());\n      scan_out.SetDevice(ctx->Device());\n      auto n_uniques = dh::SegmentedUnique(\n          ctx->CUDACtx()->CTP(), d_column_scan.data(), d_column_scan.data() + d_column_scan.size(),\n          merged_entries.data(), merged_entries.data() + merged_entries.size(),\n          scan_out.DevicePointer(), merged_entries.data(), detail::SketchUnique{},\n          [d_feature_types] __device__(size_t l_fidx, size_t r_fidx) {\n            return l_fidx == r_fidx && IsCat(d_feature_types, l_fidx);\n          });\n      columns_ptr.Copy(scan_out);\n      entries.resize(n_uniques);\n    }\n    this->FixError();\n  };\n  if (entries.empty()) {\n    CHECK_EQ(columns_ptr.HostVector().back(), 0);\n    CHECK_EQ(columns_ptr.HostVector().size(), d_that_columns_ptr.size());\n    CHECK_EQ(columns_ptr.Size(), num_columns_ + 1);\n    thrust::copy(ctx->CUDACtx()->CTP(), d_that_columns_ptr.data(),\n                 d_that_columns_ptr.data() + d_that_columns_ptr.size(),\n                 columns_ptr.DevicePointer());\n    auto total = columns_ptr.HostVector().back();\n    entries.resize(total);\n    CopyTo(dh::ToSpan(entries), that);\n    normalize_merged();\n    timer_.Stop(__func__);\n    return;\n  }\n\n  std::size_t new_size = entries.size() + that.size();\n  try {\n    scratch.resize(new_size);\n  } catch (dmlc::Error const &) {\n    // Retry\n    scratch.clear();\n    scratch.shrink_to_fit();\n    scratch.resize(new_size);\n  }\n\n  CHECK_EQ(d_that_columns_ptr.size(), columns_ptr.Size());\n\n  MergeImpl(ctx, {entries.data().get(), entries.size()}, columns_ptr.ConstDeviceSpan(), that,\n            d_that_columns_ptr, dh::ToSpan(scratch), columns_ptr_tmp.DeviceSpan());\n  this->CommitScratch(new_size);\n  CHECK_EQ(columns_ptr.Size(), num_columns_ + 1);\n  normalize_merged();\n  timer_.Stop(__func__);\n}\n\nvoid SketchContainer::FixError() {\n  auto d_columns_ptr = this->columns_ptr_.ConstDeviceSpan();\n  auto in = dh::ToSpan(this->entries_);\n  dh::LaunchN(in.size(), [=] __device__(size_t idx) {\n    auto column_id = dh::SegmentId(d_columns_ptr, idx);\n    auto in_column = in.subspan(d_columns_ptr[column_id],\n                                d_columns_ptr[column_id + 1] - d_columns_ptr[column_id]);\n    idx -= d_columns_ptr[column_id];\n    float prev_rmin = idx == 0 ? 0.0f : in_column[idx - 1].rmin;\n    if (in_column[idx].rmin < prev_rmin) {\n      in_column[idx].rmin = prev_rmin;\n    }\n    float prev_rmax = idx == 0 ? 0.0f : in_column[idx - 1].rmax;\n    if (in_column[idx].rmax < prev_rmax) {\n      in_column[idx].rmax = prev_rmax;\n    }\n    float rmin_next = in_column[idx].RMinNext();\n    if (in_column[idx].rmax < rmin_next) {\n      in_column[idx].rmax = rmin_next;\n    }\n  });\n}\n\nvoid SketchContainer::AllReduce(Context const *ctx, bool is_column_split) {\n  curt::SetDevice(ctx->Ordinal());\n  auto world = collective::GetWorldSize();\n  if (world == 1 || is_column_split) {\n    return;\n  }\n\n  timer_.Start(__func__);\n  // Bound local sketch size before exchanging data across workers.\n  auto intermediate_num_cuts = static_cast<bst_idx_t>(num_bins_ * kFactor);\n  this->Prune(ctx, intermediate_num_cuts);\n\n  auto d_columns_ptr = this->columns_ptr_.ConstDeviceSpan();\n  CHECK_EQ(d_columns_ptr.size(), num_columns_ + 1);\n  size_t n = d_columns_ptr.size();\n  auto rc = collective::Allreduce(ctx, linalg::MakeVec(&n, 1), collective::Op::kMax);\n  SafeColl(rc);\n  CHECK_EQ(n, d_columns_ptr.size()) << \"Number of columns differs across workers\";\n\n  // Get the columns ptr from all workers\n  dh::device_vector<SketchContainer::OffsetT> gathered_ptrs;\n  gathered_ptrs.resize(d_columns_ptr.size() * world, 0);\n  size_t rank = collective::GetRank();\n  auto offset = rank * d_columns_ptr.size();\n  thrust::copy(thrust::device, d_columns_ptr.data(), d_columns_ptr.data() + d_columns_ptr.size(),\n               gathered_ptrs.begin() + offset);\n  rc = collective::Allreduce(\n      ctx, linalg::MakeVec(gathered_ptrs.data().get(), gathered_ptrs.size(), ctx->Device()),\n      collective::Op::kSum);\n  SafeColl(rc);\n\n  // Get the data from all workers.\n  std::vector<std::int64_t> recv_lengths;\n  HostDeviceVector<std::int8_t> recvbuf;\n  rc = collective::AllgatherV(\n      ctx, linalg::MakeVec(this->entries_.data().get(), this->entries_.size(), ctx->Device()),\n      &recv_lengths, &recvbuf);\n  collective::SafeColl(rc);\n  for (std::size_t i = 0; i < recv_lengths.size() - 1; ++i) {\n    recv_lengths[i] = recv_lengths[i + 1] - recv_lengths[i];\n  }\n  recv_lengths.resize(recv_lengths.size() - 1);\n\n  // Segment the received data.\n  auto s_recvbuf = recvbuf.DeviceSpan();\n  std::vector<Span<SketchEntry>> allworkers;\n  offset = 0;\n  for (int32_t i = 0; i < world; ++i) {\n    size_t length_as_bytes = recv_lengths.at(i);\n    auto raw = s_recvbuf.subspan(offset, length_as_bytes);\n    CHECK_EQ(length_as_bytes % sizeof(SketchEntry), 0)\n        << \"Allgathered GPU sketch buffer has invalid size.\";\n    auto ptr = reinterpret_cast<std::uintptr_t>(raw.data());\n    CHECK_EQ(ptr % alignof(SketchEntry), 0) << \"Allgathered GPU sketch buffer is misaligned.\";\n    auto sketch = Span<SketchEntry>(reinterpret_cast<SketchEntry *>(raw.data()),\n                                    length_as_bytes / sizeof(SketchEntry));\n    allworkers.emplace_back(sketch);\n    offset += length_as_bytes;\n  }\n  // Stop the timer early to avoid interference from the new sketch container.\n  timer_.Stop(__func__);\n\n  // Merge them into a new sketch.\n  SketchContainer new_sketch(this->feature_types_, num_bins_, this->num_columns_, ctx->Device());\n  for (size_t i = 0; i < allworkers.size(); ++i) {\n    auto worker = allworkers[i];\n    auto worker_ptr =\n        dh::ToSpan(gathered_ptrs).subspan(i * d_columns_ptr.size(), d_columns_ptr.size());\n    new_sketch.Merge(ctx, worker_ptr, worker);\n  }\n\n  *this = std::move(new_sketch);\n}\n\nnamespace {\nstruct InvalidCatOp {\n  Span<SketchEntry const> values;\n  Span<size_t const> ptrs;\n  Span<FeatureType const> ft;\n\n  XGBOOST_DEVICE bool operator()(size_t i) const {\n    auto fidx = dh::SegmentId(ptrs, i);\n    return IsCat(ft, fidx) && InvalidCat(values[i].value);\n  }\n};\n}  // anonymous namespace\n\nHistogramCuts SketchContainer::MakeCuts(Context const *ctx, bool is_column_split) {\n  curt::SetDevice(ctx->Ordinal());\n  HistogramCuts cuts{num_columns_};\n  auto *p_cuts = &cuts;\n\n  // Sync between workers.\n  this->AllReduce(ctx, is_column_split);\n\n  timer_.Start(__func__);\n  // Prune to final number of bins.\n  this->Prune(ctx, num_bins_ + 1);\n\n  // Set up inputs\n  auto d_in_columns_ptr = this->columns_ptr_.ConstDeviceSpan();\n\n  auto const in_cut_values = dh::ToSpan(this->entries_);\n\n  // Set up output ptr\n  p_cuts->cut_ptrs_.SetDevice(ctx->Device());\n  auto &h_out_columns_ptr = p_cuts->cut_ptrs_.HostVector();\n  h_out_columns_ptr.front() = 0;\n  auto const &h_feature_types = this->feature_types_.ConstHostSpan();\n\n  auto d_ft = feature_types_.ConstDeviceSpan();\n\n  std::vector<SketchEntry> max_values;\n  float max_cat{-1.f};\n  if (has_categorical_) {\n    auto key_it = dh::MakeTransformIterator<bst_feature_t>(\n        thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(size_t i) -> bst_feature_t {\n          return dh::SegmentId(d_in_columns_ptr, i);\n        });\n    auto invalid_op = InvalidCatOp{in_cut_values, d_in_columns_ptr, d_ft};\n    auto val_it = dh::MakeTransformIterator<SketchEntry>(\n        thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(size_t i) {\n          auto fidx = dh::SegmentId(d_in_columns_ptr, i);\n          auto v = in_cut_values[i];\n          if (IsCat(d_ft, fidx)) {\n            if (invalid_op(i)) {\n              // use inf to indicate invalid value, this way we can keep it as in\n              // indicator in the reduce operation as it's always the greatest value.\n              v.value = std::numeric_limits<float>::infinity();\n            }\n          }\n          return v;\n        });\n    CHECK_EQ(num_columns_, d_in_columns_ptr.size() - 1);\n    max_values.resize(d_in_columns_ptr.size() - 1);\n\n    // In some cases (e.g. column-wise data split), we may have empty columns, so we need to keep\n    // track of the unique keys (feature indices) after the thrust::reduce_by_key` call.\n    dh::caching_device_vector<size_t> d_max_keys(d_in_columns_ptr.size() - 1);\n    dh::caching_device_vector<SketchEntry> d_max_values(d_in_columns_ptr.size() - 1);\n    auto new_end = thrust::reduce_by_key(\n        ctx->CUDACtx()->CTP(), key_it, key_it + in_cut_values.size(), val_it, d_max_keys.begin(),\n        d_max_values.begin(), thrust::equal_to<bst_feature_t>{},\n        [] __device__(auto l, auto r) { return l.value > r.value ? l : r; });\n    d_max_keys.erase(new_end.first, d_max_keys.end());\n    d_max_values.erase(new_end.second, d_max_values.end());\n\n    // The device vector needs to be initialized explicitly since we may have some missing columns.\n    SketchEntry default_entry{};\n    dh::caching_device_vector<SketchEntry> d_max_results(d_in_columns_ptr.size() - 1,\n                                                         default_entry);\n    thrust::scatter(ctx->CUDACtx()->CTP(), d_max_values.begin(), d_max_values.end(),\n                    d_max_keys.begin(), d_max_results.begin());\n    dh::CopyDeviceSpanToVector(&max_values, dh::ToSpan(d_max_results));\n    auto max_it = MakeIndexTransformIter([&](auto i) {\n      if (IsCat(h_feature_types, i)) {\n        return max_values[i].value;\n      }\n      return -1.f;\n    });\n    max_cat = *std::max_element(max_it, max_it + max_values.size());\n    if (std::isinf(max_cat)) {\n      InvalidCategory();\n    }\n  }\n\n  // Set up output cuts\n  for (bst_feature_t i = 0; i < num_columns_; ++i) {\n    size_t column_size = std::max(static_cast<size_t>(1ul), this->Column(i).size());\n    if (IsCat(h_feature_types, i)) {\n      // column_size is the number of unique values in that feature.\n      CheckMaxCat(max_values[i].value, column_size);\n      h_out_columns_ptr[i + 1] = max_values[i].value + 1;  // includes both max_cat and 0.\n    } else {\n      h_out_columns_ptr[i + 1] =\n          std::min(static_cast<size_t>(column_size), static_cast<size_t>(num_bins_));\n    }\n  }\n  std::partial_sum(h_out_columns_ptr.begin(), h_out_columns_ptr.end(), h_out_columns_ptr.begin());\n  auto d_out_columns_ptr = p_cuts->cut_ptrs_.ConstDeviceSpan();\n\n  size_t total_bins = h_out_columns_ptr.back();\n  p_cuts->cut_values_.SetDevice(ctx->Device());\n  p_cuts->cut_values_.Resize(total_bins);\n  auto out_cut_values = p_cuts->cut_values_.DeviceSpan();\n\n  dh::LaunchN(total_bins, [=] __device__(size_t idx) {\n    auto column_id = dh::SegmentId(d_out_columns_ptr, idx);\n    auto in_column = in_cut_values.subspan(\n        d_in_columns_ptr[column_id], d_in_columns_ptr[column_id + 1] - d_in_columns_ptr[column_id]);\n    auto out_column =\n        out_cut_values.subspan(d_out_columns_ptr[column_id],\n                               d_out_columns_ptr[column_id + 1] - d_out_columns_ptr[column_id]);\n    idx -= d_out_columns_ptr[column_id];\n    if (in_column.size() == 0) {\n      // If the column is empty, we push a dummy value.  It won't affect training as the\n      // column is empty, trees cannot split on it.  This is just to be consistent with\n      // rest of the library.\n      if (idx == 0) {\n        out_column[0] = kRtEps;\n        assert(out_column.size() == 1);\n      }\n      return;\n    }\n\n    if (IsCat(d_ft, column_id)) {\n      out_column[idx] = idx;\n      return;\n    }\n\n    // Last thread is responsible for setting a value that's greater than other cuts.\n    if (idx == out_column.size() - 1) {\n      const bst_float cpt = in_column.back().value;\n      // this must be bigger than last value in a scale\n      const bst_float last = cpt + (fabs(cpt) + 1e-5);\n      out_column[idx] = last;\n      return;\n    }\n    assert(idx + 1 < in_column.size());\n    out_column[idx] = in_column[idx + 1].value;\n  });\n\n  p_cuts->SetCategorical(this->has_categorical_, max_cat);\n  timer_.Stop(__func__);\n  return cuts;\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "src/common/quantile.cuh",
    "content": "/**\n * Copyright 2020-2025, XGBoost Contributors\n */\n#ifndef XGBOOST_COMMON_QUANTILE_CUH_\n#define XGBOOST_COMMON_QUANTILE_CUH_\n\n#include <thrust/logical.h>  // for any_of\n\n#include <cstddef>     // for size_t\n#include <functional>  // for equal_to\n\n#include \"categorical.h\"\n#include \"common.h\"          // for HumanMemUnit\n#include \"cuda_context.cuh\"  // for CUDAContext\n#include \"cuda_rt_utils.h\"   // for SetDevice\n#include \"device_helpers.cuh\"\n#include \"error_msg.h\"  // for InvalidMaxBin\n#include \"quantile.h\"\n#include \"timer.h\"\n#include \"xgboost/data.h\"\n#include \"xgboost/span.h\"\n\nnamespace xgboost::common {\nclass HistogramCuts;\nusing WQSketch = WQuantileSketch;\nusing SketchEntry = WQSketch::Entry;\n\nnamespace detail {\nstruct SketchUnique {\n  XGBOOST_DEVICE bool operator()(SketchEntry const& a, SketchEntry const& b) const {\n    return a.value - b.value == 0;\n  }\n};\n}  // namespace detail\n\n/*!\n * \\brief A container that holds the device sketches.  Sketching is performed per-column,\n *        but fused into single operation for performance.\n */\nclass SketchContainer {\n public:\n  static constexpr float kFactor = WQSketch::kFactor;\n  using OffsetT = bst_idx_t;\n  static_assert(sizeof(OffsetT) == sizeof(size_t), \"Wrong type for sketch element offset.\");\n\n private:\n  Monitor timer_;\n  HostDeviceVector<FeatureType> feature_types_;\n  bst_feature_t num_columns_;\n  int32_t num_bins_;\n\n  // The container is just a CSC matrix plus scratch storage for out-of-place transforms.\n  dh::device_vector<SketchEntry> entries_;\n  dh::device_vector<SketchEntry> entries_tmp_;\n  HostDeviceVector<OffsetT> columns_ptr_;\n  HostDeviceVector<OffsetT> columns_ptr_tmp_;\n\n  bool has_categorical_{false};\n\n  void SetCurrentColumns(Span<OffsetT const> columns_ptr);\n  void CommitScratch(std::size_t n_entries) {\n    entries_.swap(entries_tmp_);\n    columns_ptr_.Copy(columns_ptr_tmp_);\n    entries_.resize(n_entries);\n  }\n\n  // Get the span of one column.\n  Span<SketchEntry> Column(bst_feature_t i) {\n    auto data = dh::ToSpan(this->entries_);\n    auto h_ptr = columns_ptr_.ConstHostSpan();\n    auto c = data.subspan(h_ptr[i], h_ptr[i+1] - h_ptr[i]);\n    return c;\n  }\n\n public:\n  /* \\breif GPU quantile structure, with sketch data for each columns.\n   *\n   * \\param max_bin     Maximum number of bins per columns\n   * \\param num_columns Total number of columns in dataset.\n   * \\param device      GPU ID.\n   */\n  SketchContainer(HostDeviceVector<FeatureType> const& feature_types, bst_bin_t max_bin,\n                  bst_feature_t num_columns, DeviceOrd device)\n      : num_columns_{num_columns}, num_bins_{max_bin} {\n    CHECK(device.IsCUDA());\n    // Initialize Sketches for this dmatrix\n    this->columns_ptr_.SetDevice(device);\n    this->columns_ptr_.Resize(num_columns + 1, 0);\n    this->columns_ptr_tmp_.SetDevice(device);\n    this->columns_ptr_tmp_.Resize(num_columns + 1, 0);\n\n    this->feature_types_.Resize(feature_types.Size());\n    this->feature_types_.Copy(feature_types);\n    // Pull to device.\n    this->feature_types_.SetDevice(device);\n    this->feature_types_.ConstDeviceSpan();\n    this->feature_types_.ConstHostSpan();\n\n    auto d_feature_types = feature_types_.ConstDeviceSpan();\n    has_categorical_ =\n        !d_feature_types.empty() &&\n        thrust::any_of(dh::tbegin(d_feature_types), dh::tend(d_feature_types), common::IsCatOp{});\n    CHECK_GE(max_bin, 2) << error::InvalidMaxBin();\n\n    timer_.Init(__func__);\n  }\n  /**\n   * @brief Calculate the memory cost of the container.\n   */\n  [[nodiscard]] std::size_t MemCapacityBytes() const {\n    auto constexpr kE = sizeof(typename decltype(this->entries_)::value_type);\n    auto n_bytes = (this->entries_.capacity() + this->entries_tmp_.capacity()) * kE;\n    n_bytes += (this->columns_ptr_.Size() + this->columns_ptr_tmp_.Size()) * sizeof(OffsetT);\n    n_bytes += this->feature_types_.Size() * sizeof(FeatureType);\n\n    return n_bytes;\n  }\n  [[nodiscard]] std::size_t MemCostBytes() const {\n    auto constexpr kE = sizeof(typename decltype(this->entries_)::value_type);\n    auto n_bytes = (this->entries_.size() + this->entries_tmp_.size()) * kE;\n    n_bytes += (this->columns_ptr_.Size() + this->columns_ptr_tmp_.Size()) * sizeof(OffsetT);\n    n_bytes += this->feature_types_.Size() * sizeof(FeatureType);\n\n    return n_bytes;\n  }\n  /* \\brief Whether the predictor matrix contains categorical features. */\n  bool HasCategorical() const { return has_categorical_; }\n  /* \\brief Accumulate weights of duplicated entries in input. */\n  size_t ScanInput(Context const* ctx, Span<SketchEntry> entries, Span<OffsetT> d_columns_ptr_in);\n  /* Fix rounding error and re-establish invariance.  The error is mostly generated by the\n   * addition inside `RMinNext` and subtraction in `RMaxPrev`. */\n  void FixError();\n\n  /* \\brief Push sorted entries.\n   *\n   * \\param entries Sorted entries.\n   * \\param columns_ptr CSC pointer for entries.\n   * \\param cuts_ptr CSC pointer for cuts.\n   * \\param total_cuts Total number of cuts, equal to the back of cuts_ptr.\n   * \\param weights (optional) data weights.\n   */\n  void Push(Context const* ctx, Span<Entry const> entries, Span<size_t> columns_ptr,\n            common::Span<OffsetT> cuts_ptr, size_t total_cuts, Span<float> weights = {});\n  /**\n   * @brief Prune the quantile structure.\n   *\n   * @param to The maximum size of pruned quantile.  If the size of quantile structure is\n   *           already less than `to`, then no operation is performed.\n   */\n  void Prune(Context const* ctx, size_t to);\n  /**\n   * @brief Merge another set of sketch.\n   *\n   * @param that_columns_ptr Column pointer of the quantile summary being merged.\n   * @param that Columns of the other quantile summary.\n   */\n  void Merge(Context const* ctx, Span<OffsetT const> that_columns_ptr,\n             Span<SketchEntry const> that);\n  /**\n   * @brief Shrink the internal data structure to reduce memory usage. Can be used after\n   *        prune.\n   */\n  void ShrinkToFit() {\n    this->entries_.shrink_to_fit();\n    this->entries_tmp_.clear();\n    this->entries_tmp_.shrink_to_fit();\n    LOG(DEBUG) << \"Quantile memory cost:\" << common::HumanMemUnit(this->MemCapacityBytes());\n  }\n\n  /* \\brief Merge quantiles from other GPU workers. */\n  void AllReduce(Context const* ctx, bool is_column_split);\n  /* \\brief Create the final histogram cut values. */\n  [[nodiscard]] HistogramCuts MakeCuts(Context const* ctx, bool is_column_split);\n\n  Span<SketchEntry const> Data() const { return {entries_.data().get(), entries_.size()}; }\n  HostDeviceVector<FeatureType> const& FeatureTypes() const { return feature_types_; }\n  Span<OffsetT const> ColumnsPtr() const { return columns_ptr_.ConstDeviceSpan(); }\n\n  SketchContainer(SketchContainer&&) = default;\n  SketchContainer& operator=(SketchContainer&&) = default;\n\n  SketchContainer(const SketchContainer&) = delete;\n  SketchContainer& operator=(const SketchContainer&) = delete;\n\n};\n}  // namespace xgboost::common\n\n#endif  // XGBOOST_COMMON_QUANTILE_CUH_\n"
  },
  {
    "path": "src/common/quantile.h",
    "content": "/**\n * Copyright 2014-2026, XGBoost Contributors\n * \\file quantile.h\n * \\brief util to compute quantiles\n * \\author Tianqi Chen\n */\n#ifndef XGBOOST_COMMON_QUANTILE_H_\n#define XGBOOST_COMMON_QUANTILE_H_\n\n#include <xgboost/data.h>\n#include <xgboost/logging.h>\n\n#include <algorithm>\n#include <cmath>\n#include <limits>\n#include <set>\n#include <tuple>\n#include <utility>\n#include <vector>\n\n#include \"categorical.h\"\n#include \"common.h\"\n#include \"error_msg.h\"        // GroupWeight\n#include \"optional_weight.h\"  // OptionalWeights\n#include \"threading_utils.h\"\n#include \"timer.h\"\n\nnamespace xgboost::common {\n/*!\n * \\brief experimental wsummary\n * \\tparam DType type of data content\n * \\tparam RType type of rank\n */\ntemplate <typename DType = bst_float, typename RType = bst_float>\nstruct WQSummary {\n  /*! \\brief an entry in the sketch summary */\n  struct Entry {\n    /*! \\brief minimum rank */\n    RType rmin{};\n    /*! \\brief maximum rank */\n    RType rmax{};\n    /*! \\brief maximum weight */\n    RType wmin{};\n    /*! \\brief the value of data */\n    DType value{};\n    // constructor\n    XGBOOST_DEVICE Entry() {}  // NOLINT\n    // constructor\n    XGBOOST_DEVICE Entry(RType rmin, RType rmax, RType wmin, DType value)\n        : rmin(rmin), rmax(rmax), wmin(wmin), value(value) {}\n    /*! \\return rmin estimation for v strictly bigger than value */\n    XGBOOST_DEVICE RType RMinNext() const { return rmin + wmin; }\n    /*! \\return rmax estimation for v strictly smaller than value */\n    XGBOOST_DEVICE RType RMaxPrev() const { return rmax - wmin; }\n  };\n  // constructor\n  WQSummary(Span<Entry> data, size_t current_elements)\n      : data_{data}, current_elements_{current_elements} {}\n  /*! \\brief Return the number of valid entries in this summary. */\n  [[nodiscard]] size_t Size() const { return current_elements_; }\n  /*! \\brief Return true if this summary has no valid entries. */\n  [[nodiscard]] bool Empty() const { return this->Size() == 0; }\n  /*! \\brief Return a const span over valid entries [0, Size()). */\n  [[nodiscard]] Span<Entry const> Entries() const { return {data_.data(), current_elements_}; }\n  /*! \\brief Set the number of valid entries in this summary. */\n  void SetSize(size_t n) {\n    CHECK_LE(n, data_.size());\n    current_elements_ = n;\n  }\n  /*! \\brief Clear summary contents while keeping allocated storage. */\n  void Clear() { current_elements_ = 0; }\n  /*!\n   * \\brief copy content from src\n   * \\param src source sketch\n   */\n  void CopyFrom(const WQSummary &src) {\n    if (!src.data_.data()) {\n      CHECK_EQ(src.current_elements_, 0);\n      this->Clear();\n      return;\n    }\n    if (!data_.data()) {\n      CHECK_EQ(this->current_elements_, 0);\n      CHECK_EQ(src.current_elements_, 0);\n      return;\n    }\n    current_elements_ = src.current_elements_;\n    std::copy(src.data_.data(), src.data_.data() + current_elements_, data_.data());\n  }\n\n  void SetFromSorted(std::vector<std::pair<DType, RType>> const &queue) {\n    this->Clear();\n    RType wsum = 0;\n    for (size_t i = 0; i < queue.size();) {\n      size_t j = i + 1;\n      RType w = queue[i].second;\n      while (j < queue.size() && queue[j].first == queue[i].first) {\n        w += queue[j].second;\n        ++j;\n      }\n      data_[current_elements_++] = Entry{wsum, wsum + w, w, queue[i].first};\n      wsum += w;\n      i = j;\n    }\n  }\n\n  /*!\n   * \\brief Set this summary from sorted column entries and prune by max_size.\n   *\n   * The input column must be sorted by feature value.\n   */\n  void SetPruneSorted(common::Span<::xgboost::Entry const> column,\n                      std::vector<float> const &weights, size_t max_size) {\n    CHECK_GE(max_size, 1);\n    CHECK_GE(data_.size(), max_size + 1);\n\n    this->Clear();\n    auto const *col_data = column.data();\n    auto const col_size = column.size();\n    double sum_total{0.0};\n    double rmin{0.0};\n    double wmin{0.0};\n    bst_float last_fvalue{0.0f};\n    double next_goal{-1.0f};\n\n    // first pass\n    for (size_t i = 0; i < col_size; ++i) {\n      auto const &c = col_data[i];\n      sum_total += weights[c.index];\n    }\n\n    // second pass\n    for (size_t i = 0; i < col_size; ++i) {\n      auto const &c = col_data[i];\n      if (next_goal == -1.0f) {\n        next_goal = 0.0f;\n        last_fvalue = c.fvalue;\n        wmin = weights[c.index];\n        continue;\n      }\n      if (last_fvalue != c.fvalue) {\n        double rmax = rmin + wmin;\n        auto summary_size = this->Size();\n        if (rmax >= next_goal && summary_size != max_size) {\n          if (summary_size == 0 || last_fvalue > data_[summary_size - 1].value) {\n            CHECK_LT(summary_size, max_size) << \"invalid maximum size max_size=\" << max_size\n                                             << \", stemp.current_elements=\" << summary_size;\n            data_[summary_size] = Entry(static_cast<bst_float>(rmin), static_cast<bst_float>(rmax),\n                                        static_cast<bst_float>(wmin), last_fvalue);\n            ++summary_size;\n            this->SetSize(summary_size);\n          }\n          if (summary_size == max_size) {\n            next_goal = sum_total * 2.0f + 1e-5f;\n          } else {\n            next_goal = static_cast<bst_float>(summary_size * sum_total / max_size);\n          }\n        } else if (rmax >= next_goal) {\n          LOG(DEBUG) << \"INFO: rmax=\" << rmax << \", sum_total=\" << sum_total\n                     << \", next_goal=\" << next_goal << \", size=\" << summary_size;\n        }\n        rmin = rmax;\n        wmin = weights[c.index];\n        last_fvalue = c.fvalue;\n      } else {\n        wmin += weights[c.index];\n      }\n    }\n\n    if (col_size != 0) {\n      auto summary_size = this->Size();\n      double rmax = rmin + wmin;\n      if (summary_size == 0 || last_fvalue > data_[summary_size - 1].value) {\n        CHECK_LE(summary_size, max_size) << \"Finalize: invalid maximum size, max_size=\" << max_size\n                                         << \", stemp.current_elements=\" << summary_size;\n        data_[summary_size] = Entry(static_cast<bst_float>(rmin), static_cast<bst_float>(rmax),\n                                    static_cast<bst_float>(wmin), last_fvalue);\n        ++summary_size;\n        this->SetSize(summary_size);\n      }\n    }\n  }\n  /*!\n   * \\brief prune current summary in place.\n   *\n   * \\param maxsize size we can afford in the pruned sketch\n   */\n  void SetPrune(size_t maxsize) {\n    if (maxsize == 0) {\n      this->current_elements_ = 0;\n      return;\n    }\n    auto const src_size = this->current_elements_;\n    if (src_size <= maxsize) {\n      return;\n    }\n    // Use raw pointers in this hot loop to avoid per-access Span bounds checks.\n    auto const *src_data = this->data_.data();\n    auto *dst_data = data_.data();\n    if (maxsize == 1) {\n      dst_data[0] = src_data[0];\n      this->current_elements_ = 1;\n      return;\n    }\n    const RType begin = src_data[0].rmax;\n    const RType range = src_data[src_size - 1].rmin - src_data[0].rmax;\n    const size_t n = maxsize - 1;\n    dst_data[0] = src_data[0];\n    this->current_elements_ = 1;\n    // lastidx is used to avoid duplicated records\n    size_t i = 1, lastidx = 0;\n    for (size_t k = 1; k < n; ++k) {\n      RType dx2 = 2 * ((k * range) / n + begin);\n      // find first i such that  d < (rmax[i+1] + rmin[i+1]) / 2\n      while (i < src_size - 1 && dx2 >= src_data[i + 1].rmax + src_data[i + 1].rmin) {\n        ++i;\n      }\n      if (i == src_size - 1) break;\n      if (dx2 < src_data[i].RMinNext() + src_data[i + 1].RMaxPrev()) {\n        if (i != lastidx) {\n          dst_data[current_elements_++] = src_data[i];\n          lastidx = i;\n        }\n      } else {\n        if (i + 1 != lastidx) {\n          dst_data[current_elements_++] = src_data[i + 1];\n          lastidx = i + 1;\n        }\n      }\n    }\n    if (lastidx != src_size - 1) {\n      dst_data[current_elements_++] = src_data[src_size - 1];\n    }\n  }\n  /*!\n   * \\brief combine `other` into `this`.\n   *\n   * \\param other Input summary to combine with `this`.\n   * \\param workspace Optional entry buffer for temporary merged entries.\n   */\n  void SetCombine(const WQSummary &other, std::vector<Entry> *workspace = nullptr) {\n    if (other.Empty()) {\n      return;\n    }\n    if (this->data_.size() == 0) {\n      this->current_elements_ = 0;\n      return;\n    }\n    if (this->Empty()) {\n      CHECK_GE(this->data_.size(), other.current_elements_);\n      this->CopyFrom(other);\n      return;\n    }\n    size_t const merged_size = this->current_elements_ + other.current_elements_;\n    CHECK_GE(this->data_.size(), merged_size);\n\n    std::vector<Entry> owned_workspace;\n    if (workspace == nullptr) {\n      workspace = &owned_workspace;\n    }\n    if (workspace->size() < merged_size) {\n      workspace->resize(merged_size);\n    }\n\n    WQSummary<DType, RType> merged{Span<Entry>{workspace->data(), merged_size}, 0};\n    // Merge with raw pointers to avoid Span bounds checks inside the tight loop.\n    const Entry *a = this->data_.data(), *a_end = this->data_.data() + this->current_elements_;\n    const Entry *b = other.data_.data(), *b_end = other.data_.data() + other.current_elements_;\n    // extended rmin value\n    RType aprev_rmin = 0, bprev_rmin = 0;\n    Entry *dst = merged.data_.data();\n    while (a != a_end && b != b_end) {\n      // duplicated value entry\n      if (a->value == b->value) {\n        *dst = Entry(a->rmin + b->rmin, a->rmax + b->rmax, a->wmin + b->wmin, a->value);\n        aprev_rmin = a->RMinNext();\n        bprev_rmin = b->RMinNext();\n        ++dst;\n        ++a;\n        ++b;\n      } else if (a->value < b->value) {\n        *dst = Entry(a->rmin + bprev_rmin, a->rmax + b->RMaxPrev(), a->wmin, a->value);\n        aprev_rmin = a->RMinNext();\n        ++dst;\n        ++a;\n      } else {\n        *dst = Entry(b->rmin + aprev_rmin, b->rmax + a->RMaxPrev(), b->wmin, b->value);\n        bprev_rmin = b->RMinNext();\n        ++dst;\n        ++b;\n      }\n    }\n    if (a != a_end) {\n      RType brmax = (b_end - 1)->rmax;\n      do {\n        *dst = Entry(a->rmin + bprev_rmin, a->rmax + brmax, a->wmin, a->value);\n        ++dst;\n        ++a;\n      } while (a != a_end);\n    }\n    if (b != b_end) {\n      RType armax = (a_end - 1)->rmax;\n      do {\n        *dst = Entry(b->rmin + aprev_rmin, b->rmax + armax, b->wmin, b->value);\n        ++dst;\n        ++b;\n      } while (b != b_end);\n    }\n    merged.current_elements_ = dst - merged.data_.data();\n\n    const RType tol = 10;\n    RType err_mingap, err_maxgap, err_wgap;\n    merged.FixError(&err_mingap, &err_maxgap, &err_wgap);\n    if (err_mingap > tol || err_maxgap > tol || err_wgap > tol) {\n      LOG(INFO) << \"mingap=\" << err_mingap << \", maxgap=\" << err_maxgap << \", wgap=\" << err_wgap;\n    }\n    CHECK(merged.current_elements_ <= this->current_elements_ + other.current_elements_)\n        << \"bug in combine\";\n\n    std::copy_n(merged.data_.data(), merged.current_elements_, this->data_.data());\n    this->current_elements_ = merged.current_elements_;\n  }\n\n protected:\n  /*!\n   * \\brief Rebind underlying storage span while preserving current logical size.\n   */\n  void SetStorage(Span<Entry> storage) {\n    data_ = storage;\n    CHECK_LE(current_elements_, data_.size());\n  }\n  /*!\n   * \\brief Reset storage binding and clear logical size.\n   */\n  void ResetStorage() {\n    data_ = Span<Entry>{};\n    current_elements_ = 0;\n  }\n\n private:\n  /*! \\brief data field */\n  Span<Entry> data_;\n  /*! \\brief number of elements in the summary */\n  size_t current_elements_;\n  // try to fix rounding error\n  // and re-establish invariance\n  void FixError(RType *err_mingap, RType *err_maxgap, RType *err_wgap) const {\n    *err_mingap = 0;\n    *err_maxgap = 0;\n    *err_wgap = 0;\n    RType prev_rmin = 0, prev_rmax = 0;\n    // Use raw pointer for the correction pass to avoid Span bounds checks.\n    auto *entries = data_.data();\n    for (size_t i = 0; i < this->current_elements_; ++i) {\n      if (entries[i].rmin < prev_rmin) {\n        entries[i].rmin = prev_rmin;\n        *err_mingap = std::max(*err_mingap, prev_rmin - entries[i].rmin);\n      } else {\n        prev_rmin = entries[i].rmin;\n      }\n      if (entries[i].rmax < prev_rmax) {\n        entries[i].rmax = prev_rmax;\n        *err_maxgap = std::max(*err_maxgap, prev_rmax - entries[i].rmax);\n      }\n      RType rmin_next = entries[i].RMinNext();\n      if (entries[i].rmax < rmin_next) {\n        entries[i].rmax = rmin_next;\n        *err_wgap = std::max(*err_wgap, entries[i].rmax - rmin_next);\n      }\n      prev_rmax = entries[i].rmax;\n    }\n  }\n};\n\ntemplate <typename DType = bst_float, typename RType = bst_float>\nstruct Queue {\n  using QEntry = std::pair<DType, RType>;  // value, weight\n\n  std::vector<QEntry> queue;\n  size_t max_size{1};\n\n  explicit Queue(size_t max_size_in = 1) {\n    CHECK_GE(max_size_in, 1);\n    max_size = max_size_in;\n    queue.reserve(1);\n  }\n\n  auto Size() const { return queue.size(); }\n\n  // push element to the queue, return false if the queue is full and need to be flushed\n  bool Push(DType x, RType w) {\n    if (queue.empty() || queue.back().first != x) {\n      // Keep capacity at 1 for tiny queues, reserve max capacity lazily.\n      if (queue.size() == 1 && queue.capacity() == 1) {\n        queue.reserve(max_size);\n      }\n      if (queue.size() == max_size) {\n        return false;\n      }\n      queue.emplace_back(x, w);\n      return true;\n    }\n    queue.back().second += w;\n    return true;\n  }\n\n  template <typename Summary>\n  void PopSummary(Summary *out) {\n    CHECK(out);\n    out->Reserve(queue.size());\n    std::sort(queue.begin(), queue.end(),\n              [](QEntry const &l, QEntry const &r) { return l.first < r.first; });\n    out->SetFromSorted(queue);\n    queue.clear();\n  }\n};\n\nstruct WQSummaryContainer : public WQSummary<> {\n  std::vector<WQSummary<>::Entry> space;\n  WQSummaryContainer() : WQSummary<>(Span<WQSummary<>::Entry>{}, 0) {}\n\n  WQSummaryContainer(WQSummaryContainer const &src) = delete;\n\n  WQSummaryContainer(WQSummaryContainer &&src) noexcept\n      : WQSummary<>(Span<WQSummary<>::Entry>{}, 0), space{std::move(src.space)} {\n    this->SetStorage({dmlc::BeginPtr(this->space), this->space.size()});\n    this->SetSize(src.Size());\n    src.ResetStorage();\n  }\n\n  WQSummaryContainer &operator=(WQSummaryContainer const &src) = delete;\n\n  WQSummaryContainer &operator=(WQSummaryContainer &&src) noexcept {\n    if (this == &src) {\n      return *this;\n    }\n    this->space = std::move(src.space);\n    this->SetStorage({dmlc::BeginPtr(this->space), this->space.size()});\n    this->SetSize(src.Size());\n    src.ResetStorage();\n    return *this;\n  }\n\n  void Reserve(size_t size) {\n    if (size > space.size()) {\n      space.resize(size);\n    }\n    this->SetStorage({dmlc::BeginPtr(space), space.size()});\n  }\n};\n\n/*! \\brief Weighted quantile sketch algorithm using merge/prune. */\nclass WQuantileSketch {\n public:\n  static float constexpr kFactor = 8.0;\n\n public:\n  using Summary = WQSummary<>;\n  using Entry = typename WQSummary<>::Entry;\n  using SummaryContainer = WQSummaryContainer;\n  WQuantileSketch() = default;\n  WQuantileSketch(size_t maxn, double eps) {\n    limit_size_ = LimitSizeLevel(maxn, eps);\n    inqueue_ = Queue<>(limit_size_ * 2);\n    data_.clear();\n    level_.clear();\n  }\n\n  static size_t LimitSizeLevel(size_t maxn, double eps) {\n    if (maxn == 0) {\n      // Empty columns can appear in distributed column-split settings.\n      return 1;\n    }\n    size_t nlevel = 1;\n    size_t limit_size = 1;\n    while (true) {\n      limit_size = static_cast<size_t>(ceil(nlevel / eps)) + 1;\n      limit_size = std::min(maxn, limit_size);\n      size_t n = (1ULL << nlevel);\n      if (n * limit_size >= maxn) break;\n      ++nlevel;\n    }\n    // check invariant\n    size_t n = (1ULL << nlevel);\n    CHECK(n * limit_size >= maxn) << \"invalid init parameter\";\n    CHECK(nlevel <= std::max(static_cast<size_t>(1), static_cast<size_t>(limit_size * eps)))\n        << \"invalid init parameter\";\n    return limit_size;\n  }\n\n  /*!\n   * \\brief add an element to a sketch\n   * \\param x The element added to the sketch\n   * \\param w The weight of the element.\n   */\n  void Push(bst_float x, bst_float w = 1) {\n    if (w == static_cast<bst_float>(0)) return;\n    if (!inqueue_.Push(x, w)) {\n      inqueue_.PopSummary(&temp_);\n      this->PushSummary(&temp_);\n      inqueue_.Push(x, w);\n    }\n  }\n\n  /*!\n   * \\brief Add sorted column entries into this sketch.\n   *\n   * \\param column Sorted column entries in ascending order by feature value.\n   * \\param weights Row weights.\n   * \\param num_retained_items Target number of summary items to retain from sorted input.\n   */\n  void PushSorted(common::Span<::xgboost::Entry const> column, std::vector<float> const &weights,\n                  size_t num_retained_items) {\n    CHECK_GE(num_retained_items, 1);\n    auto const max_size = num_retained_items;\n    this->temp_.Reserve(max_size + 1);\n    this->temp_.SetPruneSorted(column, weights, max_size);\n    if (!column.empty()) {\n      this->PushSummary(&temp_);\n    }\n  }\n\n  /*! \\brief push up a prepared summary */\n  void PushSummary(WQSummaryContainer *summary) {\n    CHECK(summary);\n    summary->Reserve(limit_size_ * 2);\n    size_t l = 0;\n    // Level-wise merge/prune with carry propagation.\n    //\n    // Reference:\n    //   Greenwald, M. and Khanna, S. \"Space-efficient Online Computation of\n    //   Quantile Summaries\", SIGMOD 2001.\n    while (true) {\n      this->LazyInitLevel(l + 1);\n      // Clamp the incoming summary to per-level capacity before combining.\n      summary->SetPrune(limit_size_);\n      // Merge with the resident level summary.\n      summary->SetCombine(level_[l], &combine_workspace_);\n      // Level[l] is consumed into `summary`. Clear it before carry propagation.\n      level_[l].Clear();\n      // If merged summary fits, store at this level. Otherwise carry upward.\n      if (summary->Size() <= limit_size_) {\n        break;\n      }\n      ++l;\n    }\n\n    // First level where merged summary fits.\n    level_[l].CopyFrom(*summary);\n  }\n\n public:\n  /*! \\brief get the summary after finalize */\n  [[nodiscard]] WQSummaryContainer GetSummary(size_t max_size) {\n    // Flush pending queue into level summaries first.\n    inqueue_.PopSummary(&temp_);\n    this->PushSummary(&temp_);\n\n    auto const prune_size = std::max(max_size, limit_size_);\n    // Reserve based on observed live storage after local merge.\n    // This keeps memory use small when the sketch has very few entries (e.g. sparse\n    // columns / few local instances) while still reserving enough for immediate merges.\n    std::size_t observed_level_entries = 0;\n    for (auto const &level_summary : level_) {\n      observed_level_entries += level_summary.Size();\n    }\n    auto initial_reserve = std::min<std::size_t>(observed_level_entries, prune_size + limit_size_);\n    WQSummaryContainer out;\n    if (initial_reserve > 0) {\n      out.Reserve(initial_reserve);\n    }\n\n    // Merge all levels into out.\n    for (auto &level_summary : level_) {\n      auto combine_needed = out.Size() + level_summary.Size();\n      if (combine_needed > out.space.size()) {\n        out.Reserve(combine_needed);\n      }\n      out.SetCombine(level_summary, &combine_workspace_);\n      out.SetPrune(prune_size);\n    }\n    out.SetPrune(max_size);\n    return out;\n  }\n\n private:\n  // initialize level space to at least nlevel\n  void LazyInitLevel(size_t nlevel) {\n    if (level_.size() >= nlevel) return;\n    data_.resize(limit_size_ * nlevel);\n    level_.clear();\n    level_.reserve(nlevel);\n    for (size_t l = 0; l < nlevel; ++l) {\n      level_.emplace_back(Span<Entry>{data_.data() + l * limit_size_, limit_size_}, 0);\n    }\n  }\n  // input data queue\n  Queue<> inqueue_{1};\n  // size of summary in each level\n  size_t limit_size_{1};\n  // the level of each summaries\n  std::vector<WQSummary<>> level_;\n  // content of the summary\n  std::vector<WQSummary<>::Entry> data_;\n  // temporal summary, used for temp-merge\n  WQSummaryContainer temp_;\n  // reusable workspace for combine-prune operations\n  std::vector<Entry> combine_workspace_;\n};\n\nnamespace detail {\ninline std::vector<float> UnrollGroupWeights(MetaInfo const &info) {\n  auto const &group_weights = info.weights_.HostVector();\n  if (group_weights.empty()) {\n    return group_weights;\n  }\n\n  auto const &group_ptr = info.group_ptr_;\n  CHECK_GE(group_ptr.size(), 2);\n  CHECK_EQ(group_weights.size(), group_ptr.size() - 1) << error::GroupWeight();\n  CHECK_EQ(group_ptr.back(), info.num_row_)\n      << error::GroupSize() << \" the number of rows from the data.\";\n\n  std::vector<float> out(info.num_row_);\n  size_t cur_group = 0;\n  for (bst_idx_t i = 0; i < info.num_row_; ++i) {\n    while (cur_group + 1 < group_ptr.size() && i >= group_ptr[cur_group + 1]) {\n      ++cur_group;\n    }\n    out[i] = group_weights[cur_group];\n  }\n  return out;\n}\n}  // namespace detail\n\nclass HistogramCuts;\n\ntemplate <typename Batch, typename IsValid>\nstd::vector<bst_idx_t> CalcColumnSize(Batch const &batch, bst_feature_t const n_columns,\n                                      size_t const n_threads, IsValid &&is_valid) {\n  std::vector<std::vector<bst_idx_t>> column_sizes_tloc(n_threads);\n  for (auto &column : column_sizes_tloc) {\n    column.resize(n_columns, 0);\n  }\n\n  ParallelFor(batch.Size(), n_threads, [&](omp_ulong i) {\n    auto &local_column_sizes = column_sizes_tloc.at(omp_get_thread_num());\n    auto const &line = batch.GetLine(i);\n    for (size_t j = 0; j < line.Size(); ++j) {\n      auto elem = line.GetElement(j);\n      if (is_valid(elem)) {\n        local_column_sizes[elem.column_idx]++;\n      }\n    }\n  });\n  // reduce to first thread\n  auto &entries_per_columns = column_sizes_tloc.front();\n  CHECK_EQ(entries_per_columns.size(), static_cast<size_t>(n_columns));\n  for (size_t i = 1; i < n_threads; ++i) {\n    CHECK_EQ(column_sizes_tloc[i].size(), static_cast<size_t>(n_columns));\n    for (size_t j = 0; j < n_columns; ++j) {\n      entries_per_columns[j] += column_sizes_tloc[i][j];\n    }\n  }\n  return entries_per_columns;\n}\n\ntemplate <typename Batch, typename IsValid>\nstd::vector<bst_feature_t> LoadBalance(Batch const &batch, size_t nnz, bst_feature_t n_columns,\n                                       size_t const nthreads, IsValid &&is_valid) {\n  /* Some sparse datasets have their mass concentrating on small number of features.  To\n   * avoid waiting for a few threads running forever, we here distribute different number\n   * of columns to different threads according to number of entries.\n   */\n  size_t const total_entries = nnz;\n  size_t const entries_per_thread = DivRoundUp(total_entries, nthreads);\n\n  // Need to calculate the size for each batch.\n  std::vector<bst_idx_t> entries_per_columns = CalcColumnSize(batch, n_columns, nthreads, is_valid);\n  std::vector<bst_feature_t> cols_ptr(nthreads + 1, 0);\n  size_t count{0};\n  size_t current_thread{1};\n\n  for (auto col : entries_per_columns) {\n    cols_ptr.at(current_thread)++;  // add one column to thread\n    count += col;\n    CHECK_LE(count, total_entries);\n    if (count > entries_per_thread) {\n      current_thread++;\n      count = 0;\n      cols_ptr.at(current_thread) = cols_ptr[current_thread - 1];\n    }\n  }\n  // Idle threads.\n  for (; current_thread < cols_ptr.size() - 1; ++current_thread) {\n    cols_ptr[current_thread + 1] = cols_ptr[current_thread];\n  }\n  return cols_ptr;\n}\n\n/*!\n * A sketch matrix storing sketches for each feature.\n */\nclass HostSketchContainer {\n protected:\n  using WQSketch = WQuantileSketch;\n  std::vector<WQSketch> sketches_;\n  std::vector<std::set<float>> categories_;\n  std::vector<FeatureType> const feature_types_;\n\n  std::vector<bst_idx_t> columns_size_;\n  bst_bin_t max_bins_;\n  bool use_group_ind_{false};\n  int32_t n_threads_;\n  bool has_categorical_{false};\n  Monitor monitor_;\n\n public:\n  /* \\brief Initialize necessary info.\n   *\n   * \\param columns_size Size of each column.\n   * \\param max_bin maximum number of bins for each feature.\n   * \\param use_group whether is assigned to group to data instance.\n   */\n  HostSketchContainer(Context const *ctx, bst_bin_t max_bin,\n                      common::Span<FeatureType const> feature_types,\n                      std::vector<bst_idx_t> columns_size, bool use_group);\n\n  static bool UseGroup(MetaInfo const &info) {\n    size_t const num_groups = info.group_ptr_.size() == 0 ? 0 : info.group_ptr_.size() - 1;\n    // Use group index for weights?\n    bool const use_group_ind = num_groups != 0 && (info.weights_.Size() != info.num_row_);\n    return use_group_ind;\n  }\n\n  /* \\brief Push a CSR matrix. */\n  void PushRowPage(SparsePage const &page, MetaInfo const &info, Span<float const> hessian = {});\n\n  template <typename Batch>\n  void PushAdapterBatch(Batch const &batch, size_t base_rowid, MetaInfo const &info, float missing);\n\n  /**\n   * \\brief Push a sorted CSC page.\n   */\n  void PushColPage(SparsePage const &page, MetaInfo const &info, Span<float const> hessian);\n\n  [[nodiscard]] HistogramCuts MakeCuts(Context const *ctx, MetaInfo const &info);\n\n protected:\n  template <typename Batch, typename IsValid>\n  void PushRowPageImpl(Batch const &batch, std::size_t base_rowid, OptionalWeights weights,\n                       size_t nnz, size_t n_features, bool is_dense, IsValid is_valid) {\n    auto thread_columns_ptr = LoadBalance(batch, nnz, n_features, n_threads_, is_valid);\n    ParallelFor(static_cast<std::size_t>(n_threads_), n_threads_, [&](std::size_t tid) {\n      auto const begin = thread_columns_ptr[tid];\n      auto const end = thread_columns_ptr[tid + 1];\n\n      // do not iterate if no columns are assigned to the thread\n      if (begin < end && end <= n_features) {\n        for (size_t ridx = 0; ridx < batch.Size(); ++ridx) {\n          auto const &line = batch.GetLine(ridx);\n          auto w = weights[ridx + base_rowid];\n          if (is_dense) {\n            for (size_t ii = begin; ii < end; ii++) {\n              auto elem = line.GetElement(ii);\n              if (is_valid(elem)) {\n                if (IsCat(feature_types_, ii)) {\n                  categories_[ii].emplace(elem.value);\n                } else {\n                  sketches_[ii].Push(elem.value, w);\n                }\n              }\n            }\n          } else {\n            for (size_t i = 0; i < line.Size(); ++i) {\n              auto const &elem = line.GetElement(i);\n              if (is_valid(elem) && elem.column_idx >= begin && elem.column_idx < end) {\n                if (IsCat(feature_types_, elem.column_idx)) {\n                  categories_[elem.column_idx].emplace(elem.value);\n                } else {\n                  sketches_[elem.column_idx].Push(elem.value, w);\n                }\n              }\n            }\n          }\n        }\n      }\n    });\n  }\n\n private:\n  // Merge categorical values from all workers.\n  [[nodiscard]] auto AllreduceCategories(Context const *ctx, MetaInfo const &info,\n                                         common::Span<bst_feature_t const> categorical_features)\n      -> std::vector<std::set<float>>;\n\n  // Merge numeric sketches from all workers.\n  [[nodiscard]] auto AllReduce(Context const *ctx, MetaInfo const &info,\n                               common::Span<bst_feature_t const> numeric_features)\n      -> std::vector<WQSketch::SummaryContainer>;\n};\n}  // namespace xgboost::common\n#endif  // XGBOOST_COMMON_QUANTILE_H_\n"
  },
  {
    "path": "src/common/quantile_loss_utils.cc",
    "content": "/**\n * Copyright 2023-2025, XGBoost contributors\n */\n#include \"quantile_loss_utils.h\"\n\nnamespace xgboost::common {\nDMLC_REGISTER_PARAMETER(QuantileLossParam);\n}  // namespace xgboost::common\n"
  },
  {
    "path": "src/common/quantile_loss_utils.h",
    "content": "/**\n * Copyright 2023-2025, XGBoost contributors\n */\n#ifndef XGBOOST_COMMON_QUANTILE_LOSS_UTILS_H_\n#define XGBOOST_COMMON_QUANTILE_LOSS_UTILS_H_\n\n#include <algorithm>  // for all_of\n\n#include \"param_array.h\"        // for ParamArray\n#include \"xgboost/logging.h\"    // CHECK\n#include \"xgboost/parameter.h\"  // XGBoostParameter\n\nnamespace xgboost::common {\nstruct QuantileLossParam : public XGBoostParameter<QuantileLossParam> {\n  ParamArray<float> quantile_alpha{\"quantile_alpha\"};\n  DMLC_DECLARE_PARAMETER(QuantileLossParam) {\n    DMLC_DECLARE_FIELD(quantile_alpha)\n        .describe(\"List of quantiles for quantile loss.\")\n        .set_default(ParamArray<float>{\"quantile_alpha\"});\n  }\n  void Validate() const {\n    CHECK(GetInitialised());\n    CHECK(!quantile_alpha.Get().empty());\n    auto const& array = quantile_alpha.Get();\n    auto valid =\n        std::all_of(array.cbegin(), array.cend(), [](auto q) { return q >= 0.0 && q <= 1.0; });\n    CHECK(valid) << \"quantile alpha must be in the range [0.0, 1.0].\";\n  }\n};\n\n}  // namespace xgboost::common\n#endif  // XGBOOST_COMMON_QUANTILE_LOSS_UTILS_H_\n"
  },
  {
    "path": "src/common/random.cc",
    "content": "/**\n * Copyright 2020-2026, XGBoost Contributors\n */\n#include \"random.h\"\n\n#include <algorithm>  // for sort, max, copy\n#include <memory>     // for shared_ptr\n#include <sstream>    // for stringstream\n#include <string>     // for string\n\n#include \"xgboost/host_device_vector.h\"  // for HostDeviceVector\n#include \"xgboost/json.h\"                // for Json, String, get\n\nnamespace xgboost::common {\nvoid SaveRng(Json *p_out, RandomEngine const &rng) {\n  std::stringstream ss;\n  ss << std::hex << rng;\n  auto &out = *p_out;\n  out[\"rng_state\"] = String{ss.str()};\n}\n\nvoid LoadRng(Json const &in, RandomEngine *rng) {\n  std::stringstream ss{get<String const>(in[\"rng_state\"])};\n  ss >> std::hex >> *rng;\n}\n\nstd::shared_ptr<HostDeviceVector<bst_feature_t>> ColumnSampler::ColSample(\n    Context const *ctx, std::shared_ptr<HostDeviceVector<bst_feature_t>> p_features,\n    float colsample) {\n  if (colsample == 1.0f) {\n    return p_features;\n  }\n\n  int n = std::max(1, static_cast<int>(colsample * p_features->Size()));\n  auto p_new_features = std::make_shared<HostDeviceVector<bst_feature_t>>();\n\n  if (ctx->IsCUDA()) {\n#if defined(XGBOOST_USE_CUDA)\n    cuda_impl::SampleFeature(ctx, n, p_features, p_new_features, this->feature_weights_,\n                             &this->weight_buffer_, &this->idx_buffer_);\n    return p_new_features;\n#else\n    AssertGPUSupport();\n    return nullptr;\n#endif  // defined(XGBOOST_USE_CUDA)\n  }\n\n  auto seed = ctx->Rng()();\n  RandomEngine rng(seed);\n  const auto &features = p_features->HostVector();\n  CHECK_GT(features.size(), 0);\n\n  auto &new_features = *p_new_features;\n\n  if (!feature_weights_.Empty()) {\n    auto const &h_features = p_features->HostVector();\n    auto const &h_feature_weight = feature_weights_.ConstHostVector();\n    auto &weight = this->weight_buffer_.HostVector();\n    weight.resize(h_features.size());\n    for (size_t i = 0; i < h_features.size(); ++i) {\n      weight[i] = h_feature_weight[h_features[i]];\n    }\n    new_features.HostVector() =\n        WeightedSamplingWithoutReplacement(ctx, &rng, p_features->HostVector(), weight, n);\n  } else {\n    new_features.Resize(features.size());\n    std::copy(features.begin(), features.end(), new_features.HostVector().begin());\n    std::shuffle(new_features.HostVector().begin(), new_features.HostVector().end(), rng);\n    new_features.Resize(n);\n  }\n  std::sort(new_features.HostVector().begin(), new_features.HostVector().end());\n  return p_new_features;\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "src/common/random.cu",
    "content": "/**\n * Copyright 2023-2026, XGBoost Contributors\n */\n#include <thrust/shuffle.h>  // for shuffle\n\n#include <memory>  // for shared_ptr\n\n#include \"algorithm.cuh\"     // for ArgSort\n#include \"cuda_context.cuh\"  // for CUDAContext\n#include \"device_helpers.cuh\"\n#include \"random.h\"\n#include \"xgboost/base.h\"                // for bst_feature_t\n#include \"xgboost/context.h\"             // for Context\n#include \"xgboost/host_device_vector.h\"  // for HostDeviceVector\n\nnamespace xgboost::common::cuda_impl {\n// GPU implementation for sampling without replacement, see the CPU version for references.\nvoid WeightedSamplingWithoutReplacement(Context const *ctx, common::Span<bst_feature_t const> array,\n                                        common::Span<float const> weights,\n                                        common::Span<bst_feature_t> results,\n                                        HostDeviceVector<bst_feature_t> *sorted_idx,\n                                        std::uint32_t seed) {\n  CUDAContext const *cuctx = ctx->CUDACtx();\n  CHECK_EQ(array.size(), weights.size());\n  // Sampling keys\n  dh::caching_device_vector<float> keys(weights.size());\n\n  auto d_keys = dh::ToSpan(keys);\n\n  constexpr auto kEps = kRtEps;  // avoid CUDA compilation error\n  thrust::for_each_n(cuctx->CTP(), thrust::make_counting_iterator(0ul), array.size(),\n                     [=] XGBOOST_DEVICE(std::size_t i) {\n                       thrust::default_random_engine rng;\n                       rng.seed(seed);\n                       rng.discard(i);\n                       thrust::uniform_real_distribution<float> dist;\n\n                       auto w = std::max(weights[i], kEps);\n                       auto u = dist(rng);\n                       auto k = std::log(u) / w;\n                       d_keys[i] = k;\n                     });\n  // Allocate buffer for sorted index.\n  auto d_idx = dh::LazyResize(ctx, sorted_idx, keys.size());\n\n  ArgSort<false>(ctx, d_keys, d_idx);\n\n  // Filter the result according to sorted index.\n  auto it = thrust::make_permutation_iterator(dh::tbegin(array), dh::tbegin(d_idx));\n  // |array| == |weights| == |keys| == |sorted_idx| >= |results|\n  for (auto size : {array.size(), weights.size(), keys.size()}) {\n    CHECK_EQ(size, d_idx.size());\n  }\n  CHECK_GE(array.size(), results.size());\n  thrust::copy_n(cuctx->CTP(), it, results.size(), dh::tbegin(results));\n}\n\nvoid SampleFeature(Context const *ctx, bst_feature_t n_features,\n                   std::shared_ptr<HostDeviceVector<bst_feature_t>> p_features,\n                   std::shared_ptr<HostDeviceVector<bst_feature_t>> p_new_features,\n                   HostDeviceVector<float> const &feature_weights,\n                   HostDeviceVector<float> *weight_buffer,\n                   HostDeviceVector<bst_feature_t> *idx_buffer) {\n  CUDAContext const *cuctx = ctx->CUDACtx();\n  auto &new_features = *p_new_features;\n  new_features.SetDevice(ctx->Device());\n  p_features->SetDevice(ctx->Device());\n  CHECK_LE(n_features, p_features->Size());\n\n  auto seed = ctx->Rng()();\n\n  if (!feature_weights.Empty()) {\n    CHECK_LE(p_features->Size(), feature_weights.Size());\n    idx_buffer->SetDevice(ctx->Device());\n    feature_weights.SetDevice(ctx->Device());\n\n    auto d_old_features = p_features->DeviceSpan();\n    auto d_weight_buffer = dh::LazyResize(ctx, weight_buffer, d_old_features.size());\n    // Filter weights according to the existing feature index.\n    auto d_feature_weight = feature_weights.ConstDeviceSpan();\n    auto it = thrust::make_permutation_iterator(dh::tcbegin(d_feature_weight),\n                                                dh::tcbegin(d_old_features));\n    thrust::copy_n(cuctx->CTP(), it, d_old_features.size(), dh::tbegin(d_weight_buffer));\n    new_features.Resize(n_features);\n    WeightedSamplingWithoutReplacement(ctx, d_old_features, d_weight_buffer,\n                                       new_features.DeviceSpan(), idx_buffer, seed);\n  } else {\n    new_features.Resize(p_features->Size());\n    new_features.Copy(*p_features);\n    auto d_feat = new_features.DeviceSpan();\n    thrust::default_random_engine rng;\n    rng.seed(seed);\n    thrust::shuffle(cuctx->CTP(), dh::tbegin(d_feat), dh::tend(d_feat), rng);\n    new_features.Resize(n_features);\n  }\n\n  auto d_new_features = new_features.DeviceSpan();\n  thrust::sort(cuctx->CTP(), dh::tbegin(d_new_features), dh::tend(d_new_features));\n}\n\nvoid InitFeatureSet(Context const *ctx,\n                    std::shared_ptr<HostDeviceVector<bst_feature_t>> p_features) {\n  CUDAContext const *cuctx = ctx->CUDACtx();\n  auto d_features = p_features->DeviceSpan();\n  thrust::sequence(cuctx->CTP(), dh::tbegin(d_features), dh::tend(d_features), 0);\n}\n}  // namespace xgboost::common::cuda_impl\n"
  },
  {
    "path": "src/common/random.h",
    "content": "/**\n * Copyright 2015-2026, XGBoost Contributors\n * \\file random.h\n * \\brief Utility related to random.\n * \\author Tianqi Chen\n */\n#ifndef XGBOOST_COMMON_RANDOM_H_\n#define XGBOOST_COMMON_RANDOM_H_\n\n#include <xgboost/logging.h>\n\n#include <algorithm>\n#include <functional>\n#include <map>\n#include <memory>\n#include <numeric>\n#include <vector>\n\n#include \"algorithm.h\"        // ArgSort\n#include \"xgboost/context.h\"  // Context\n#include \"xgboost/host_device_vector.h\"\n\nnamespace xgboost::common {\n/*\n * Original paper:\n * Weighted Random Sampling (2005; Efraimidis, Spirakis)\n *\n * Blog:\n * https://timvieira.github.io/blog/post/2019/09/16/algorithms-for-sampling-without-replacement/\n*/\ntemplate <typename T>\nstd::vector<T> WeightedSamplingWithoutReplacement(Context const* ctx, RandomEngine* p_rng,\n                                                  std::vector<T> const& array,\n                                                  std::vector<float> const& weights, size_t n) {\n  auto& rng = *p_rng;\n  // ES sampling.\n  CHECK_EQ(array.size(), weights.size());\n  std::vector<float> keys(weights.size());\n  std::uniform_real_distribution<float> dist;\n  for (size_t i = 0; i < array.size(); ++i) {\n    auto w = std::max(weights.at(i), kRtEps);\n    auto u = dist(rng);\n    auto k = std::log(u) / w;\n    keys[i] = k;\n  }\n  auto ind = ArgSort<std::size_t>(ctx, keys.data(), keys.data() + keys.size(), std::greater<>{});\n  ind.resize(n);\n\n  std::vector<T> results(ind.size());\n  for (size_t k = 0; k < ind.size(); ++k) {\n    auto idx = ind[k];\n    results[k] = array[idx];\n  }\n  return results;\n}\n\nnamespace cuda_impl {\nvoid SampleFeature(Context const* ctx, bst_feature_t n_features,\n                   std::shared_ptr<HostDeviceVector<bst_feature_t>> p_features,\n                   std::shared_ptr<HostDeviceVector<bst_feature_t>> p_new_features,\n                   HostDeviceVector<float> const& feature_weights,\n                   HostDeviceVector<float>* weight_buffer,\n                   HostDeviceVector<bst_feature_t>* idx_buffer);\n\nvoid InitFeatureSet(Context const* ctx,\n                    std::shared_ptr<HostDeviceVector<bst_feature_t>> p_features);\n}  // namespace cuda_impl\n\n/**\n * @brief Handles selection of columns due to colsample_bytree, colsample_bylevel and\n * colsample_bynode parameters. Should be initialised before tree construction and to\n * reset when tree construction is completed.\n */\nclass ColumnSampler {\n  std::shared_ptr<HostDeviceVector<bst_feature_t>> feature_set_tree_;\n  std::map<int, std::shared_ptr<HostDeviceVector<bst_feature_t>>> feature_set_level_;\n  HostDeviceVector<float> feature_weights_;\n  float colsample_bylevel_{1.0f};\n  float colsample_bytree_{1.0f};\n  float colsample_bynode_{1.0f};\n\n  // Used for weighted sampling.\n  HostDeviceVector<bst_feature_t> idx_buffer_;\n  HostDeviceVector<float> weight_buffer_;\n\n  std::shared_ptr<HostDeviceVector<bst_feature_t>> ColSample(\n      Context const* ctx, std::shared_ptr<HostDeviceVector<bst_feature_t>> p_features,\n      float colsample);\n\n public:\n  ColumnSampler() = default;\n\n  /**\n   * @brief Initialise this object before use.\n   *\n   * @param num_col\n   * @param colsample_bynode  Sampling rate for node.\n   * @param colsample_bylevel Sampling rate for tree level.\n   * @param colsample_bytree  Sampling rate for tree.\n   */\n  void Init(Context const* ctx, int64_t num_col, HostDeviceVector<float> const& feature_weights,\n            float colsample_bynode, float colsample_bylevel, float colsample_bytree) {\n    this->feature_weights_.SetDevice(ctx->Device()), feature_weights.SetDevice(ctx->Device());\n    this->feature_weights_.Resize(feature_weights.Size());\n    this->feature_weights_.Copy(feature_weights);\n\n    colsample_bylevel_ = colsample_bylevel;\n    colsample_bytree_ = colsample_bytree;\n    colsample_bynode_ = colsample_bynode;\n\n    if (feature_set_tree_ == nullptr) {\n      feature_set_tree_ = std::make_shared<HostDeviceVector<bst_feature_t>>();\n    }\n    Reset();\n\n    // We process ColumnSampler on host for SYCL. So don't need to push data to device\n    if (!ctx->Device().IsSycl()) {\n      feature_set_tree_->SetDevice(ctx->Device());\n    }\n    feature_set_tree_->Resize(num_col);\n    if (ctx->IsCUDA()) {\n#if defined(XGBOOST_USE_CUDA)\n      cuda_impl::InitFeatureSet(ctx, feature_set_tree_);\n#else\n      AssertGPUSupport();\n#endif\n    } else {\n      std::iota(feature_set_tree_->HostVector().begin(), feature_set_tree_->HostVector().end(), 0);\n    }\n\n    feature_set_tree_ = ColSample(ctx, feature_set_tree_, colsample_bytree_);\n  }\n\n  /**\n   * @brief Resets this object.\n   */\n  void Reset() {\n    feature_set_tree_->Resize(0);\n    feature_set_level_.clear();\n  }\n\n  /**\n   * @brief Samples a feature set.\n   *\n   * @param ctx  The runtime context.\n   * @param depth The tree depth of the node at which to sample.\n   * @return The sampled feature set.\n   *\n   * @note If colsample_bynode_ < 1.0, this method creates a new feature set each time it\n   * is called. Therefore, it should be called only once per node.\n   *\n   * @note With distributed xgboost, this function must be called exactly once for the\n   * construction of each tree node, and must be called the same number of times in each\n   * process and with the same parameters to return the same feature set across processes.\n   */\n  std::shared_ptr<HostDeviceVector<bst_feature_t>> GetFeatureSet(Context const* ctx, int depth) {\n    if (colsample_bylevel_ == 1.0f && colsample_bynode_ == 1.0f) {\n      return feature_set_tree_;\n    }\n\n    if (feature_set_level_.count(depth) == 0) {\n      // Level sampling, level does not yet exist so generate it\n      feature_set_level_[depth] = ColSample(ctx, feature_set_tree_, colsample_bylevel_);\n    }\n    if (colsample_bynode_ == 1.0f) {\n      // Level sampling\n      auto ptr = feature_set_level_[depth];\n      ptr->SetDevice(ctx->Device());\n      return ptr;\n    }\n    // Need to sample for the node individually\n    auto ptr = ColSample(ctx, feature_set_level_[depth], colsample_bynode_);\n    ptr->SetDevice(ctx->Device());\n    return ptr;\n  }\n};\n\nvoid SaveRng(Json* p_out, RandomEngine const& rng);\nvoid LoadRng(Json const& in, RandomEngine* rng);\n}  // namespace xgboost::common\n#endif  // XGBOOST_COMMON_RANDOM_H_\n"
  },
  {
    "path": "src/common/ranking_utils.cc",
    "content": "/**\n * Copyright 2023 by XGBoost contributors\n */\n#include \"ranking_utils.h\"\n\n#include <algorithm>          // for copy_n, max, min, none_of, all_of\n#include <cstddef>            // for size_t\n#include <cstdio>             // for sscanf\n#include <functional>         // for greater\n#include <string>             // for char_traits, string\n\n#include \"algorithm.h\"        // for ArgSort\n#include \"linalg_op.h\"        // for cbegin, cend\n#include \"optional_weight.h\"  // for MakeOptionalWeights\n#include \"threading_utils.h\"  // for ParallelFor\n#include \"xgboost/base.h\"     // for bst_group_t\n#include \"xgboost/context.h\"  // for Context\n#include \"xgboost/data.h\"     // for MetaInfo\n#include \"xgboost/linalg.h\"   // for All, TensorView, Range\n#include \"xgboost/logging.h\"  // for CHECK_EQ\n\nnamespace xgboost::ltr {\nvoid RankingCache::InitOnCPU(Context const* ctx, MetaInfo const& info) {\n  if (info.group_ptr_.empty()) {\n    group_ptr_.Resize(2, 0);\n    group_ptr_.HostVector()[1] = info.num_row_;\n  } else {\n    group_ptr_.HostVector() = info.group_ptr_;\n  }\n\n  auto const& gptr = group_ptr_.ConstHostVector();\n  for (std::size_t i = 1; i < gptr.size(); ++i) {\n    std::size_t n = gptr[i] - gptr[i - 1];\n    max_group_size_ = std::max(max_group_size_, n);\n  }\n\n  double sum_weights = 0;\n  auto n_groups = Groups();\n  auto device = ctx->Device().IsSycl() ? DeviceOrd::CPU() : ctx->Device();\n  auto weight = common::MakeOptionalWeights(device, info.weights_);\n  for (bst_omp_uint k = 0; k < n_groups; ++k) {\n    sum_weights += weight[k];\n  }\n  weight_norm_ = static_cast<double>(n_groups) / sum_weights;\n}\n\ncommon::Span<std::size_t const> RankingCache::MakeRankOnCPU(Context const* ctx,\n                                                            common::Span<float const> predt) {\n  auto gptr = this->DataGroupPtr(ctx);\n  auto rank = this->sorted_idx_cache_.HostSpan();\n  CHECK_EQ(rank.size(), predt.size());\n\n  common::ParallelFor(this->Groups(), ctx->Threads(), [&](auto g) {\n    auto cnt = gptr[g + 1] - gptr[g];\n    auto g_predt = predt.subspan(gptr[g], cnt);\n    auto g_rank = rank.subspan(gptr[g], cnt);\n    auto sorted_idx = common::ArgSort<std::size_t>(\n        ctx, g_predt.data(), g_predt.data() + g_predt.size(), std::greater<>{});\n    CHECK_EQ(g_rank.size(), sorted_idx.size());\n    std::copy_n(sorted_idx.data(), sorted_idx.size(), g_rank.data());\n  });\n\n  return rank;\n}\n\n#if !defined(XGBOOST_USE_CUDA)\nvoid RankingCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }\ncommon::Span<std::size_t const> RankingCache::MakeRankOnCUDA(Context const*,\n                                                             common::Span<float const>) {\n  common::AssertGPUSupport();\n  return {};\n}\n#endif  // !defined()\n\nvoid NDCGCache::InitOnCPU(Context const* ctx, MetaInfo const& info) {\n  auto const h_group_ptr = this->DataGroupPtr(ctx);\n\n  discounts_.Resize(MaxGroupSize(), 0);\n  auto& h_discounts = discounts_.HostVector();\n  for (std::size_t i = 0; i < MaxGroupSize(); ++i) {\n    h_discounts[i] = CalcDCGDiscount(i);\n  }\n\n  auto n_groups = h_group_ptr.size() - 1;\n  auto h_labels = info.labels.HostView().Slice(linalg::All(), 0);\n\n  CheckNDCGLabels(this->Param(), h_labels,\n                  [](auto beg, auto end, auto op) { return std::none_of(beg, end, op); });\n\n  inv_idcg_.Reshape(n_groups);\n  auto h_inv_idcg = inv_idcg_.HostView();\n  std::size_t topk = this->Param().TopK();\n  auto const exp_gain = this->Param().ndcg_exp_gain;\n\n  common::ParallelFor(n_groups, ctx->Threads(), [&](auto g) {\n    auto g_labels = h_labels.Slice(linalg::Range(h_group_ptr[g], h_group_ptr[g + 1]));\n    auto sorted_idx = common::ArgSort<std::size_t>(ctx, linalg::cbegin(g_labels),\n                                                   linalg::cend(g_labels), std::greater<>{});\n\n    double idcg{0.0};\n    for (std::size_t i = 0; i < std::min(g_labels.Size(), topk); ++i) {\n      if (exp_gain) {\n        idcg += h_discounts[i] * CalcDCGGain(g_labels(sorted_idx[i]));\n      } else {\n        idcg += h_discounts[i] * g_labels(sorted_idx[i]);\n      }\n    }\n    h_inv_idcg(g) = CalcInvIDCG(idcg);\n  });\n}\n\n#if !defined(XGBOOST_USE_CUDA)\nvoid NDCGCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }\n#endif  // !defined(XGBOOST_USE_CUDA)\n\nDMLC_REGISTER_PARAMETER(LambdaRankParam);\n\nvoid PreCache::InitOnCPU(Context const*, MetaInfo const& info) {\n  auto const& h_label = info.labels.HostView().Slice(linalg::All(), 0);\n  CheckPreLabels(\"pre\", h_label,\n                 [](auto beg, auto end, auto op) { return std::all_of(beg, end, op); });\n}\n\n#if !defined(XGBOOST_USE_CUDA)\nvoid PreCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }\n#endif  // !defined(XGBOOST_USE_CUDA)\n\nvoid MAPCache::InitOnCPU(Context const*, MetaInfo const& info) {\n  auto const& h_label = info.labels.HostView().Slice(linalg::All(), 0);\n  CheckPreLabels(\"map\", h_label,\n                 [](auto beg, auto end, auto op) { return std::all_of(beg, end, op); });\n}\n\n#if !defined(XGBOOST_USE_CUDA)\nvoid MAPCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }\n#endif  // !defined(XGBOOST_USE_CUDA)\n\nstd::string ParseMetricName(StringView name, StringView param, position_t* topn, bool* minus) {\n  std::string out_name;\n  if (!param.empty()) {\n    std::ostringstream os;\n    if (std::sscanf(param.c_str(), \"%u[-]?\", topn) == 1) {\n      os << name << '@' << param;\n      out_name = os.str();\n    } else {\n      os << name << param;\n      out_name = os.str();\n    }\n    if (*param.crbegin() == '-') {\n      *minus = true;\n    }\n  } else {\n    out_name = name.c_str();\n  }\n  return out_name;\n}\n\nstd::string MakeMetricName(StringView name, position_t topn, bool minus) {\n  std::ostringstream ss;\n  if (topn == LambdaRankParam::NotSet()) {\n    ss << name;\n  } else {\n    ss << name << \"@\" << topn;\n  }\n  if (minus) {\n    ss << \"-\";\n  }\n  std::string out_name = ss.str();\n  return out_name;\n}\n}  // namespace xgboost::ltr\n"
  },
  {
    "path": "src/common/ranking_utils.cu",
    "content": "/**\n * Copyright 2023-2026, XGBoost Contributors\n */\n#include <thrust/functional.h>                  // for maximum\n#include <thrust/iterator/counting_iterator.h>  // for make_counting_iterator\n#include <thrust/logical.h>                     // for none_of, all_of\n#include <thrust/reduce.h>                      // for reduce\n#include <thrust/scan.h>                        // for inclusive_scan\n\n#include <cstddef>           // for size_t\n#include <cuda/std/utility>  // for pair\n\n#include \"algorithm.cuh\"       // for SegmentedArgSort\n#include \"cuda_context.cuh\"    // for CUDAContext\n#include \"device_helpers.cuh\"  // for MakeTransformIterator, LaunchN\n#include \"optional_weight.h\"   // for MakeOptionalWeights, OptionalWeights\n#include \"ranking_utils.cuh\"   // for ThreadsForMean\n#include \"ranking_utils.h\"\n#include \"threading_utils.cuh\"  // for SegmentedTrapezoidThreads\n#include \"xgboost/base.h\"       // for XGBOOST_DEVICE, bst_group_t\n#include \"xgboost/context.h\"    // for Context\n#include \"xgboost/linalg.h\"     // for VectorView, All, Range\n#include \"xgboost/logging.h\"    // for CHECK\n#include \"xgboost/span.h\"       // for Span\n\nnamespace xgboost::ltr {\nnamespace cuda_impl {\nvoid CalcQueriesDCG(Context const* ctx, linalg::VectorView<float const> d_labels,\n                    common::Span<std::size_t const> d_sorted_idx, bool exp_gain,\n                    common::Span<bst_group_t const> d_group_ptr, std::size_t k,\n                    linalg::VectorView<double> out_dcg) {\n  CHECK_EQ(d_group_ptr.size() - 1, out_dcg.Size());\n  using IdxGroup = cuda::std::pair<std::size_t, std::size_t>;\n  auto group_it = dh::MakeTransformIterator<IdxGroup>(\n      thrust::make_counting_iterator(0ull), [=] XGBOOST_DEVICE(std::size_t idx) {\n        return cuda::std::make_pair(idx, dh::SegmentId(d_group_ptr, idx));\n      });\n  auto value_it = dh::MakeTransformIterator<double>(\n      group_it,\n      [exp_gain, d_labels, d_group_ptr, k,\n       d_sorted_idx] XGBOOST_DEVICE(IdxGroup const& l) -> double {\n        auto g_begin = d_group_ptr[l.second];\n        auto g_size = d_group_ptr[l.second + 1] - g_begin;\n\n        auto idx_in_group = l.first - g_begin;\n        if (idx_in_group >= k) {\n          return 0.0;\n        }\n        double gain{0.0};\n        auto g_sorted_idx = d_sorted_idx.subspan(g_begin, g_size);\n        auto g_labels = d_labels.Slice(linalg::Range(g_begin, g_begin + g_size));\n\n        if (exp_gain) {\n          gain = ltr::CalcDCGGain(g_labels(g_sorted_idx[idx_in_group]));\n        } else {\n          gain = g_labels(g_sorted_idx[idx_in_group]);\n        }\n        double discount = CalcDCGDiscount(idx_in_group);\n        return gain * discount;\n      });\n\n  CHECK(out_dcg.Contiguous());\n  std::size_t bytes;\n  dh::safe_cuda(cub::DeviceSegmentedReduce::Sum(nullptr, bytes, value_it, out_dcg.Values().data(),\n                                                d_group_ptr.size() - 1, d_group_ptr.data(),\n                                                d_group_ptr.data() + 1, ctx->CUDACtx()->Stream()));\n  dh::TemporaryArray<char> temp(bytes);\n  dh::safe_cuda(cub::DeviceSegmentedReduce::Sum(\n      temp.data().get(), bytes, value_it, out_dcg.Values().data(), d_group_ptr.size() - 1,\n      d_group_ptr.data(), d_group_ptr.data() + 1, ctx->CUDACtx()->Stream()));\n}\n\nvoid CalcQueriesInvIDCG(Context const* ctx, linalg::VectorView<float const> d_labels,\n                        common::Span<bst_group_t const> d_group_ptr,\n                        linalg::VectorView<double> out_inv_IDCG, ltr::LambdaRankParam const& p) {\n  CHECK_GE(d_group_ptr.size(), 2ul);\n  size_t n_groups = d_group_ptr.size() - 1;\n  CHECK_EQ(out_inv_IDCG.Size(), n_groups);\n  dh::device_vector<std::size_t> sorted_idx(d_labels.Size());\n  auto d_sorted_idx = dh::ToSpan(sorted_idx);\n  common::SegmentedArgSort<false, true>(ctx, d_labels.Values(), d_group_ptr, d_sorted_idx);\n  CalcQueriesDCG(ctx, d_labels, d_sorted_idx, p.ndcg_exp_gain, d_group_ptr, p.TopK(), out_inv_IDCG);\n  dh::LaunchN(out_inv_IDCG.Size(), ctx->CUDACtx()->Stream(),\n              [out_inv_IDCG] XGBOOST_DEVICE(size_t idx) mutable {\n                double idcg = out_inv_IDCG(idx);\n                out_inv_IDCG(idx) = CalcInvIDCG(idcg);\n              });\n}\n}  // namespace cuda_impl\n\nnamespace {\nstruct CheckNDCGOp {\n  CUDAContext const* cuctx;\n  template <typename It, typename Op>\n  bool operator()(It beg, It end, Op op) {\n    return thrust::none_of(cuctx->CTP(), beg, end, op);\n  }\n};\nstruct CheckMAPOp {\n  CUDAContext const* cuctx;\n  template <typename It, typename Op>\n  bool operator()(It beg, It end, Op op) {\n    return thrust::all_of(cuctx->CTP(), beg, end, op);\n  }\n};\n\nstruct ThreadGroupOp {\n  common::Span<bst_group_t const> d_group_ptr;\n  std::size_t n_pairs;\n\n  common::Span<std::size_t> out_thread_group_ptr;\n\n  XGBOOST_DEVICE void operator()(std::size_t i) {\n    out_thread_group_ptr[i + 1] =\n        cuda_impl::ThreadsForMean(d_group_ptr[i + 1] - d_group_ptr[i], n_pairs);\n  }\n};\n\nstruct GroupSizeOp {\n  common::Span<bst_group_t const> d_group_ptr;\n\n  XGBOOST_DEVICE auto operator()(std::size_t i) -> std::size_t {\n    return d_group_ptr[i + 1] - d_group_ptr[i];\n  }\n};\n\nstruct WeightOp {\n  common::OptionalWeights d_weight;\n  XGBOOST_DEVICE auto operator()(std::size_t i) -> double { return d_weight[i]; }\n};\n}  // anonymous namespace\n\nvoid RankingCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {\n  CUDAContext const* cuctx = ctx->CUDACtx();\n\n  group_ptr_.SetDevice(ctx->Device());\n  if (info.group_ptr_.empty()) {\n    group_ptr_.Resize(2, 0);\n    group_ptr_.HostVector()[1] = info.num_row_;\n  } else {\n    auto const& h_group_ptr = info.group_ptr_;\n    group_ptr_.Resize(h_group_ptr.size());\n    auto d_group_ptr = group_ptr_.DeviceSpan();\n    dh::safe_cuda(cudaMemcpyAsync(d_group_ptr.data(), h_group_ptr.data(), d_group_ptr.size_bytes(),\n                                  cudaMemcpyHostToDevice, cuctx->Stream()));\n  }\n\n  auto d_group_ptr = DataGroupPtr(ctx);\n  std::size_t n_groups = Groups();\n\n  auto it = dh::MakeTransformIterator<std::size_t>(thrust::make_counting_iterator(0ul),\n                                                   GroupSizeOp{d_group_ptr});\n  max_group_size_ =\n      thrust::reduce(cuctx->CTP(), it, it + n_groups, 0ul, thrust::maximum<std::size_t>{});\n\n  threads_group_ptr_.SetDevice(ctx->Device());\n  threads_group_ptr_.Resize(n_groups + 1, 0);\n  auto d_threads_group_ptr = threads_group_ptr_.DeviceSpan();\n  if (param_.HasTruncation()) {\n    n_cuda_threads_ =\n        common::SegmentedTrapezoidThreads(ctx, d_group_ptr, d_threads_group_ptr, Param().NumPair());\n  } else {\n    auto n_pairs = Param().NumPair();\n    dh::LaunchN(n_groups, cuctx->Stream(),\n                ThreadGroupOp{d_group_ptr, n_pairs, d_threads_group_ptr});\n    thrust::inclusive_scan(cuctx->CTP(), dh::tcbegin(d_threads_group_ptr),\n                           dh::tcend(d_threads_group_ptr), dh::tbegin(d_threads_group_ptr));\n    n_cuda_threads_ = info.num_row_ * param_.NumPair();\n  }\n\n  sorted_idx_cache_.SetDevice(ctx->Device());\n  sorted_idx_cache_.Resize(info.labels.Size(), 0);\n\n  auto weight = common::MakeOptionalWeights(ctx->Device(), info.weights_);\n  auto w_it =\n      dh::MakeTransformIterator<double>(thrust::make_counting_iterator(0ul), WeightOp{weight});\n  weight_norm_ = static_cast<double>(n_groups) / thrust::reduce(w_it, w_it + n_groups);\n}\n\ncommon::Span<std::size_t const> RankingCache::MakeRankOnCUDA(Context const* ctx,\n                                                             common::Span<float const> predt) {\n  auto d_sorted_idx = sorted_idx_cache_.DeviceSpan();\n  auto d_group_ptr = DataGroupPtr(ctx);\n  common::SegmentedArgSort<false, true>(ctx, predt, d_group_ptr, d_sorted_idx);\n  return d_sorted_idx;\n}\n\nvoid NDCGCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {\n  CUDAContext const* cuctx = ctx->CUDACtx();\n  auto labels = info.labels.View(ctx->Device()).Slice(linalg::All(), 0);\n  CheckNDCGLabels(this->Param(), labels, CheckNDCGOp{cuctx});\n\n  auto d_group_ptr = this->DataGroupPtr(ctx);\n\n  std::size_t n_groups = d_group_ptr.size() - 1;\n  inv_idcg_ = linalg::Zeros<double>(ctx, n_groups);\n  auto d_inv_idcg = inv_idcg_.View(ctx->Device());\n  cuda_impl::CalcQueriesInvIDCG(ctx, labels, d_group_ptr, d_inv_idcg, this->Param());\n  CHECK_GE(this->Param().NumPair(), 1ul);\n\n  discounts_.SetDevice(ctx->Device());\n  discounts_.Resize(MaxGroupSize());\n  auto d_discount = discounts_.DeviceSpan();\n  dh::LaunchN(MaxGroupSize(), cuctx->Stream(),\n              [=] XGBOOST_DEVICE(std::size_t i) { d_discount[i] = CalcDCGDiscount(i); });\n}\n\nvoid PreCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {\n  auto const d_label = info.labels.View(ctx->Device()).Slice(linalg::All(), 0);\n  CheckPreLabels(\"pre\", d_label, CheckMAPOp{ctx->CUDACtx()});\n}\n\nvoid MAPCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {\n  auto const d_label = info.labels.View(ctx->Device()).Slice(linalg::All(), 0);\n  CheckPreLabels(\"map\", d_label, CheckMAPOp{ctx->CUDACtx()});\n}\n}  // namespace xgboost::ltr\n"
  },
  {
    "path": "src/common/ranking_utils.cuh",
    "content": "/**\n * Copyright 2023 by XGBoost Contributors\n */\n#ifndef XGBOOST_COMMON_RANKING_UTILS_CUH_\n#define XGBOOST_COMMON_RANKING_UTILS_CUH_\n\n#include <cstddef>            // for size_t\n\n#include \"ranking_utils.h\"    // for LambdaRankParam\n#include \"xgboost/base.h\"     // for bst_group_t, XGBOOST_DEVICE\n#include \"xgboost/context.h\"  // for Context\n#include \"xgboost/linalg.h\"   // for VectorView\n#include \"xgboost/span.h\"     // for Span\n\nnamespace xgboost {\nnamespace ltr {\nnamespace cuda_impl {\nvoid CalcQueriesDCG(Context const *ctx, linalg::VectorView<float const> d_labels,\n                    common::Span<std::size_t const> d_sorted_idx, bool exp_gain,\n                    common::Span<bst_group_t const> d_group_ptr, std::size_t k,\n                    linalg::VectorView<double> out_dcg);\n\nvoid CalcQueriesInvIDCG(Context const *ctx, linalg::VectorView<float const> d_labels,\n                        common::Span<bst_group_t const> d_group_ptr,\n                        linalg::VectorView<double> out_inv_IDCG, ltr::LambdaRankParam const &p);\n\n// Functions for creating number of threads for CUDA, and getting back the number of pairs\n// from the number of threads.\nXGBOOST_DEVICE __forceinline__ std::size_t ThreadsForMean(std::size_t group_size,\n                                                          std::size_t n_pairs) {\n  return group_size * n_pairs;\n}\n// Number of threads in a group divided by the number of samples in this group, returns\n// the number of pairs for pair-wise ltr with sampling.\nXGBOOST_DEVICE __forceinline__ std::size_t PairsForGroup(std::size_t n_threads,\n                                                         std::size_t group_size) {\n  return n_threads / group_size;\n}\n}  // namespace cuda_impl\n}  // namespace ltr\n}  // namespace xgboost\n#endif  // XGBOOST_COMMON_RANKING_UTILS_CUH_\n"
  },
  {
    "path": "src/common/ranking_utils.h",
    "content": "/**\n * Copyright 2023 by XGBoost contributors\n */\n#ifndef XGBOOST_COMMON_RANKING_UTILS_H_\n#define XGBOOST_COMMON_RANKING_UTILS_H_\n#include <algorithm>                     // for min\n#include <cmath>                         // for log2, fabs, floor\n#include <cstddef>                       // for size_t\n#include <cstdint>                       // for uint32_t, uint8_t, int32_t\n#include <limits>                        // for numeric_limits\n#include <string>                        // for char_traits, string\n#include <vector>                        // for vector\n\n#include \"dmlc/parameter.h\"              // for FieldEntry, DMLC_DECLARE_FIELD\n#include \"error_msg.h\"                   // for GroupWeight, GroupSize, InvalidCUDAOrdinal\n#include \"xgboost/base.h\"                // for XGBOOST_DEVICE, bst_group_t\n#include \"xgboost/context.h\"             // for Context\n#include \"xgboost/data.h\"                // for MetaInfo\n#include \"xgboost/host_device_vector.h\"  // for HostDeviceVector\n#include \"xgboost/linalg.h\"              // for Vector, VectorView, Tensor\n#include \"xgboost/logging.h\"             // for CHECK_EQ, CHECK\n#include \"xgboost/parameter.h\"           // for XGBoostParameter\n#include \"xgboost/span.h\"                // for Span\n#include \"xgboost/string_view.h\"         // for StringView\n\nnamespace xgboost::ltr {\n/**\n * \\brief Relevance degree\n */\nusing rel_degree_t = std::uint32_t;  // NOLINT\n/**\n * \\brief top-k position\n */\nusing position_t = std::uint32_t;  // NOLINT\n\n/**\n * \\brief Maximum relevance degree for NDCG\n */\nconstexpr std::size_t MaxRel() { return sizeof(rel_degree_t) * 8 - 1; }\nstatic_assert(MaxRel() == 31);\n\nXGBOOST_DEVICE inline double CalcDCGGain(rel_degree_t label) {\n  return static_cast<double>((1u << label) - 1);\n}\n\nXGBOOST_DEVICE inline double CalcDCGDiscount(std::size_t idx) {\n  return 1.0 / std::log2(static_cast<double>(idx) + 2.0);\n}\n\nXGBOOST_DEVICE inline double CalcInvIDCG(double idcg) {\n  auto inv_idcg = (idcg == 0.0 ? 0.0 : (1.0 / idcg));  // handle irrelevant document\n  return inv_idcg;\n}\n\nenum class PairMethod : std::int32_t {\n  kTopK = 0,\n  kMean = 1,\n};\n}  // namespace xgboost::ltr\n\nDECLARE_FIELD_ENUM_CLASS(xgboost::ltr::PairMethod);\n\nnamespace xgboost::ltr {\nstruct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {\n private:\n  static constexpr position_t DefaultK() { return 32; }\n  static constexpr position_t DefaultSamplePairs() { return 1; }\n\n protected:\n  // pairs\n  // should be accessed by getter for auto configuration.\n  // nolint so that we can keep the string name.\n  PairMethod lambdarank_pair_method{PairMethod::kTopK};  // NOLINT\n  std::size_t lambdarank_num_pair_per_sample{NotSet()};  // NOLINT\n\n public:\n  static constexpr position_t NotSet() { return std::numeric_limits<position_t>::max(); }\n\n  // unbiased\n  bool lambdarank_unbiased{false};\n  bool lambdarank_normalization{true};\n  bool lambdarank_score_normalization{true};\n  double lambdarank_bias_norm{1.0};\n  // ndcg\n  bool ndcg_exp_gain{true};\n\n  bool operator==(LambdaRankParam const& that) const {\n    return lambdarank_pair_method == that.lambdarank_pair_method &&\n           lambdarank_num_pair_per_sample == that.lambdarank_num_pair_per_sample &&\n           lambdarank_unbiased == that.lambdarank_unbiased &&\n           lambdarank_normalization == that.lambdarank_normalization &&\n           lambdarank_score_normalization == that.lambdarank_score_normalization &&\n           lambdarank_bias_norm == that.lambdarank_bias_norm && ndcg_exp_gain == that.ndcg_exp_gain;\n  }\n  bool operator!=(LambdaRankParam const& that) const { return !(*this == that); }\n\n  [[nodiscard]] double Regularizer() const { return 1.0 / (1.0 + this->lambdarank_bias_norm); }\n\n  /**\n   * \\brief Get number of pairs for each sample\n   */\n  [[nodiscard]] position_t NumPair() const {\n    if (lambdarank_num_pair_per_sample == NotSet()) {\n      switch (lambdarank_pair_method) {\n        case PairMethod::kMean:\n          return DefaultSamplePairs();\n        case PairMethod::kTopK:\n          return DefaultK();\n      }\n    } else {\n      return lambdarank_num_pair_per_sample;\n    }\n    LOG(FATAL) << \"Unreachable.\";\n    return 0;\n  }\n\n  [[nodiscard]] bool HasTruncation() const { return lambdarank_pair_method == PairMethod::kTopK; }\n  [[nodiscard]] bool IsMean() const { return lambdarank_pair_method == PairMethod::kMean; }\n\n  // Used for evaluation metric and cache initialization, iterate through top-k or the whole list\n  [[nodiscard]] auto TopK() const {\n    if (HasTruncation()) {\n      return NumPair();\n    } else {\n      return NotSet();\n    }\n  }\n\n  DMLC_DECLARE_PARAMETER(LambdaRankParam) {\n    DMLC_DECLARE_FIELD(lambdarank_pair_method)\n        .set_default(PairMethod::kTopK)\n        .add_enum(\"mean\", PairMethod::kMean)\n        .add_enum(\"topk\", PairMethod::kTopK)\n        .describe(\"Method for constructing pairs.\");\n    DMLC_DECLARE_FIELD(lambdarank_num_pair_per_sample)\n        .set_default(NotSet())\n        .set_lower_bound(1)\n        .describe(\"Number of pairs for each sample in the list.\");\n    DMLC_DECLARE_FIELD(lambdarank_unbiased)\n        .set_default(false)\n        .describe(\"Unbiased lambda mart. Use extended IPW to debias click position\");\n    DMLC_DECLARE_FIELD(lambdarank_normalization)\n        .set_default(true)\n        .describe(\"Whether to normalize the leaf value for lambda rank.\");\n    DMLC_DECLARE_FIELD(lambdarank_score_normalization)\n        .set_default(true)\n        .describe(\"Whether to normalize the delta by prediction score difference.\");\n    DMLC_DECLARE_FIELD(lambdarank_bias_norm)\n        .set_default(1.0)\n        .set_lower_bound(0.0)\n        .describe(\"Lp regularization for unbiased lambdarank.\");\n    DMLC_DECLARE_FIELD(ndcg_exp_gain)\n        .set_default(true)\n        .describe(\"When set to true, the label gain is 2^rel - 1, otherwise it's rel.\");\n  }\n};\n\n/**\n * \\brief Common cached items for ranking tasks.\n */\nclass RankingCache {\n private:\n  void InitOnCPU(Context const* ctx, MetaInfo const& info);\n  void InitOnCUDA(Context const* ctx, MetaInfo const& info);\n  // Cached parameter\n  LambdaRankParam param_;\n  // offset to data groups.\n  HostDeviceVector<bst_group_t> group_ptr_;\n  // store the sorted index of prediction.\n  HostDeviceVector<std::size_t> sorted_idx_cache_;\n  // Maximum size of group\n  std::size_t max_group_size_{0};\n  // Normalization for weight\n  double weight_norm_{1.0};\n  /**\n   * CUDA cache\n   */\n  // offset to threads assigned to each group for gradient calculation\n  HostDeviceVector<std::size_t> threads_group_ptr_;\n  // Sorted index of label for finding buckets.\n  HostDeviceVector<std::size_t> y_sorted_idx_cache_;\n  // Cached labels sorted by the model\n  HostDeviceVector<float> y_ranked_by_model_;\n  // Rounding factor for CUDA deterministic floating point summation. One rounding factor\n  // for each ranking group.\n  linalg::Vector<GradientPair> roundings_;\n  // rounding factor for cost\n  HostDeviceVector<double> cost_rounding_;\n  // temporary storage for creating rounding factors. Stored as byte to avoid having cuda\n  // data structure in here.\n  HostDeviceVector<std::uint8_t> max_lambdas_;\n  // total number of cuda threads used for gradient calculation\n  std::size_t n_cuda_threads_{0};\n\n  // Create model rank list on GPU\n  common::Span<std::size_t const> MakeRankOnCUDA(Context const* ctx,\n                                                 common::Span<float const> predt);\n  // Create model rank list on CPU\n  common::Span<std::size_t const> MakeRankOnCPU(Context const* ctx,\n                                                common::Span<float const> predt);\n\n protected:\n  [[nodiscard]] std::size_t MaxGroupSize() const { return max_group_size_; }\n\n public:\n  RankingCache(Context const* ctx, MetaInfo const& info, LambdaRankParam const& p) : param_{p} {\n    CHECK(param_.GetInitialised());\n    if (!info.group_ptr_.empty()) {\n      CHECK_EQ(info.group_ptr_.back(), info.labels.Size())\n          << error::GroupSize() << \"the size of label.\";\n    }\n    if (ctx->IsCUDA()) {\n      this->InitOnCUDA(ctx, info);\n    } else {\n      this->InitOnCPU(ctx, info);\n    }\n    if (!info.weights_.Empty()) {\n      CHECK_EQ(Groups(), info.weights_.Size()) << error::GroupWeight();\n    }\n    if (param_.HasTruncation()) {\n      CHECK_GE(param_.NumPair(), 1);\n    }\n  }\n  [[nodiscard]] std::size_t MaxPositionSize() const {\n    // Use truncation level as bound.\n    if (param_.HasTruncation()) {\n      return param_.NumPair();\n    }\n    // Hardcoded maximum size of positions to track. We don't need too many of them as the\n    // bias decreases exponentially.\n    return std::min(max_group_size_, static_cast<std::size_t>(32));\n  }\n  // Constructed as [1, n_samples] if group ptr is not supplied by the user\n  common::Span<bst_group_t const> DataGroupPtr(Context const* ctx) const {\n    group_ptr_.SetDevice(ctx->Device());\n    return ctx->IsCUDA() ? group_ptr_.ConstDeviceSpan() : group_ptr_.ConstHostSpan();\n  }\n\n  [[nodiscard]] auto const& Param() const { return param_; }\n  [[nodiscard]] std::size_t Groups() const { return group_ptr_.Size() - 1; }\n  [[nodiscard]] double WeightNorm() const { return weight_norm_; }\n\n  // Create a rank list by model prediction\n  common::Span<std::size_t const> SortedIdx(Context const* ctx, common::Span<float const> predt) {\n    if (sorted_idx_cache_.Empty()) {\n      sorted_idx_cache_.SetDevice(ctx->Device());\n      sorted_idx_cache_.Resize(predt.size());\n    }\n    if (ctx->IsCUDA()) {\n      return this->MakeRankOnCUDA(ctx, predt);\n    } else {\n      return this->MakeRankOnCPU(ctx, predt);\n    }\n  }\n  // The function simply returns a uninitialized buffer as this is only used by the\n  // objective for creating pairs.\n  common::Span<std::size_t> SortedIdxY(Context const* ctx, std::size_t n_samples) {\n    CHECK(ctx->IsCUDA()) << error::InvalidCUDAOrdinal();\n    if (y_sorted_idx_cache_.Empty()) {\n      y_sorted_idx_cache_.SetDevice(ctx->Device());\n      y_sorted_idx_cache_.Resize(n_samples);\n    }\n    return y_sorted_idx_cache_.DeviceSpan();\n  }\n  common::Span<float> RankedY(Context const* ctx, std::size_t n_samples) {\n    CHECK(ctx->IsCUDA()) << error::InvalidCUDAOrdinal();\n    if (y_ranked_by_model_.Empty()) {\n      y_ranked_by_model_.SetDevice(ctx->Device());\n      y_ranked_by_model_.Resize(n_samples);\n    }\n    return y_ranked_by_model_.DeviceSpan();\n  }\n\n  // CUDA cache getters, the cache is shared between metric and objective, some of these\n  // fields are initialized lazily to avoid unnecessary allocation.\n  [[nodiscard]] common::Span<std::size_t const> CUDAThreadsGroupPtr() const {\n    CHECK(!threads_group_ptr_.Empty());\n    return threads_group_ptr_.ConstDeviceSpan();\n  }\n  [[nodiscard]] std::size_t CUDAThreads() const { return n_cuda_threads_; }\n\n  [[nodiscard]] linalg::VectorView<GradientPair> CUDARounding(Context const* ctx) {\n    if (roundings_.Size() == 0) {\n      roundings_.SetDevice(ctx->Device());\n      roundings_.Reshape(Groups());\n    }\n    return roundings_.View(ctx->Device());\n  }\n  [[nodiscard]] common::Span<double> CUDACostRounding(Context const* ctx) {\n    if (cost_rounding_.Size() == 0) {\n      cost_rounding_.SetDevice(ctx->Device());\n      cost_rounding_.Resize(1);\n    }\n    return cost_rounding_.DeviceSpan();\n  }\n  template <typename Type>\n  common::Span<Type> MaxLambdas(Context const* ctx, std::size_t n) {\n    max_lambdas_.SetDevice(ctx->Device());\n    std::size_t bytes = n * sizeof(Type);\n    if (bytes != max_lambdas_.Size()) {\n      max_lambdas_.Resize(bytes);\n    }\n    return common::Span<Type>{reinterpret_cast<Type*>(max_lambdas_.DevicePointer()), n};\n  }\n};\n\nclass NDCGCache : public RankingCache {\n  // NDCG discount\n  HostDeviceVector<double> discounts_;\n  // 1.0 / IDCG\n  linalg::Vector<double> inv_idcg_;\n  /**\n   * CUDA cache\n   */\n  // store the intermediate DCG calculation result for metric\n  linalg::Vector<double> dcg_;\n\n public:\n  void InitOnCPU(Context const* ctx, MetaInfo const& info);\n  void InitOnCUDA(Context const* ctx, MetaInfo const& info);\n\n public:\n  NDCGCache(Context const* ctx, MetaInfo const& info, LambdaRankParam const& p)\n      : RankingCache{ctx, info, p} {\n    if (ctx->IsCUDA()) {\n      this->InitOnCUDA(ctx, info);\n    } else {\n      this->InitOnCPU(ctx, info);\n    }\n  }\n\n  linalg::VectorView<double const> InvIDCG(Context const* ctx) const {\n  // This function doesn't have sycl-specific implementation yet.\n  // For that reason we transfer data to host in case of sycl is used for propper execution.\n    return inv_idcg_.View(ctx->Device().IsSycl() ? DeviceOrd::CPU() : ctx->Device());\n  }\n  common::Span<double const> Discount(Context const* ctx) const {\n    return ctx->IsCUDA() ? discounts_.ConstDeviceSpan() : discounts_.ConstHostSpan();\n  }\n  linalg::VectorView<double> Dcg(Context const* ctx) {\n    if (dcg_.Size() == 0) {\n      dcg_.SetDevice(ctx->Device());\n      dcg_.Reshape(this->Groups());\n    }\n    return dcg_.View(ctx->Device().IsSycl() ? DeviceOrd::CPU() : ctx->Device());\n  }\n};\n\n/**\n * \\brief Validate label for NDCG\n *\n * \\tparam NoneOf Implementation of std::none_of. Specified as a parameter to reuse the\n *                check for both CPU and GPU.\n */\ntemplate <typename NoneOf>\nvoid CheckNDCGLabels(ltr::LambdaRankParam const& p, linalg::VectorView<float const> labels,\n                     NoneOf none_of) {\n  auto d_labels = labels.Values();\n  if (p.ndcg_exp_gain) {\n    auto label_is_integer =\n        none_of(d_labels.data(), d_labels.data() + d_labels.size(), [] XGBOOST_DEVICE(float v) {\n          auto l = std::floor(v);\n          return std::fabs(l - v) > kRtEps || v < 0.0f;\n        });\n    CHECK(label_is_integer)\n        << \"When using relevance degree as target, label must be either 0 or positive integer.\";\n  }\n\n  if (p.ndcg_exp_gain) {\n    auto label_is_valid = none_of(d_labels.data(), d_labels.data() + d_labels.size(),\n                                  [] XGBOOST_DEVICE(ltr::rel_degree_t v) { return v > MaxRel(); });\n    CHECK(label_is_valid) << \"Relevance degress must be lesser than or equal to \" << MaxRel()\n                          << \" when the exponential NDCG gain function is used. \"\n                          << \"Set `ndcg_exp_gain` to false to use custom DCG gain.\";\n  }\n}\n\ntemplate <typename AllOf>\nbool IsBinaryRel(linalg::VectorView<float const> label, AllOf all_of) {\n  auto s_label = label.Values();\n  return all_of(s_label.data(), s_label.data() + s_label.size(), [] XGBOOST_DEVICE(float y) {\n    return std::abs(y - 1.0f) < kRtEps || std::abs(y - 0.0f) < kRtEps;\n  });\n}\n/**\n * \\brief Validate label for precision-based metric.\n *\n * \\tparam Implementation of std::all_of. Specified as a parameter to reuse the check for\n *         both CPU and GPU.\n */\ntemplate <typename AllOf>\nvoid CheckPreLabels(StringView name, linalg::VectorView<float const> label, AllOf all_of) {\n  auto is_binary = IsBinaryRel(label, all_of);\n  CHECK(is_binary) << name << \" can only be used with binary labels.\";\n}\n\nclass PreCache : public RankingCache {\n  HostDeviceVector<double> pre_;\n\n  void InitOnCPU(Context const* ctx, MetaInfo const& info);\n  void InitOnCUDA(Context const* ctx, MetaInfo const& info);\n\n public:\n  PreCache(Context const* ctx, MetaInfo const& info, LambdaRankParam const& p)\n      : RankingCache{ctx, info, p} {\n    if (ctx->IsCUDA()) {\n      this->InitOnCUDA(ctx, info);\n    } else {\n      this->InitOnCPU(ctx, info);\n    }\n  }\n\n  common::Span<double> Pre(Context const* ctx) {\n    if (pre_.Empty()) {\n      pre_.SetDevice(ctx->Device());\n      pre_.Resize(this->Groups());\n    }\n    return ctx->IsCUDA() ? pre_.DeviceSpan() : pre_.HostSpan();\n  }\n};\n\nclass MAPCache : public RankingCache {\n  // Total number of relevant documents for each group\n  HostDeviceVector<double> n_rel_;\n  // \\sum l_k/k\n  HostDeviceVector<double> acc_;\n  HostDeviceVector<double> map_;\n  // Number of samples in this dataset.\n  std::size_t n_samples_{0};\n\n  void InitOnCPU(Context const* ctx, MetaInfo const& info);\n  void InitOnCUDA(Context const* ctx, MetaInfo const& info);\n\n public:\n  MAPCache(Context const* ctx, MetaInfo const& info, LambdaRankParam const& p)\n      : RankingCache{ctx, info, p}, n_samples_{static_cast<std::size_t>(info.num_row_)} {\n    if (ctx->IsCUDA()) {\n      this->InitOnCUDA(ctx, info);\n    } else {\n      this->InitOnCPU(ctx, info);\n    }\n  }\n\n  common::Span<double> NumRelevant(Context const* ctx) {\n    if (n_rel_.Empty()) {\n      n_rel_.SetDevice(ctx->Device());\n      n_rel_.Resize(n_samples_);\n    }\n    return ctx->IsCUDA() ? n_rel_.DeviceSpan() : n_rel_.HostSpan();\n  }\n  common::Span<double> Acc(Context const* ctx) {\n    if (acc_.Empty()) {\n      acc_.SetDevice(ctx->Device());\n      acc_.Resize(n_samples_);\n    }\n    return ctx->IsCUDA() ? acc_.DeviceSpan() : acc_.HostSpan();\n  }\n  common::Span<double> Map(Context const* ctx) {\n    if (map_.Empty()) {\n      map_.SetDevice(ctx->Device());\n      map_.Resize(this->Groups());\n    }\n    return ctx->IsCUDA() ? map_.DeviceSpan() : map_.HostSpan();\n  }\n};\n\n/**\n * \\brief Parse name for ranking metric given parameters.\n *\n * \\param [in] name   Null terminated string for metric name\n * \\param [in] param  Null terminated string for parameter like the `3-` in `ndcg@3-`.\n * \\param [out] topn  Top n documents parsed from param. Unchanged if it's not specified.\n * \\param [out] minus Whether we should turn the score into loss. Unchanged if it's not\n *                    specified.\n *\n * \\return The name of the metric.\n */\nstd::string ParseMetricName(StringView name, StringView param, position_t* topn, bool* minus);\n\n/**\n * \\brief Parse name for ranking metric given parameters.\n */\nstd::string MakeMetricName(StringView name, position_t topn, bool minus);\n}  // namespace xgboost::ltr\n#endif  // XGBOOST_COMMON_RANKING_UTILS_H_\n"
  },
  {
    "path": "src/common/ref_resource_view.cuh",
    "content": "/**\n * Copyright 2024-2025, XGBoost Contributors\n */\n#pragma once\n\n#include <cstddef>  // for size_t\n#include <memory>   // for make_shared\n\n#include \"cuda_context.cuh\"     // for CUDAContext\n#include \"cuda_stream.h\"        // for StreamRef\n#include \"ref_resource_view.h\"  // for RefResourceView\n#include \"resource.cuh\"         // for CudaAllocResource\n#include \"xgboost/context.h\"    // for Context\n\nnamespace xgboost::common {\n/**\n * @brief Make a fixed size `RefResourceView` with cudaMalloc resource.\n */\ntemplate <typename T>\n[[nodiscard]] RefResourceView<T> MakeFixedVecWithCudaMalloc(std::size_t n_elements) {\n  auto resource = std::make_shared<common::CudaMallocResource>(n_elements * sizeof(T));\n  auto ref = RefResourceView{resource->DataAs<T>(), n_elements, resource};\n  return ref;\n}\n\ntemplate <typename T>\n[[nodiscard]] RefResourceView<T> MakeCudaGrowOnly(std::size_t n_elements) {\n  auto resource = std::make_shared<common::CudaGrowOnlyResource>(n_elements * sizeof(T));\n  auto ref = RefResourceView{resource->DataAs<T>(), n_elements, resource};\n  return ref;\n}\n\n/**\n * @brief Initialize the data in addition to allocation.\n */\ntemplate <typename T>\n[[nodiscard]] RefResourceView<T> MakeFixedVecWithCudaMalloc(Context const* ctx,\n                                                            std::size_t n_elements, T const& init) {\n  auto ref = MakeFixedVecWithCudaMalloc<T>(n_elements);\n  thrust::fill_n(ctx->CUDACtx()->CTP(), ref.data(), ref.size(), init);\n  return ref;\n}\n\ntemplate <typename T>\n[[nodiscard]] RefResourceView<T> MakeFixedVecWithPinnedMalloc(std::size_t n_elements) {\n  auto resource = std::make_shared<common::CudaPinnedResource>(n_elements * sizeof(T));\n  auto ref = RefResourceView{resource->DataAs<T>(), n_elements, resource};\n  return ref;\n}\n\n/**\n * @brief Create a fixed size resource view from a shared pinned memory pool.\n */\ntemplate <typename T>\n[[nodiscard]] RefResourceView<T> MakeFixedVecWithPinnedMemPool(\n    std::shared_ptr<cuda_impl::HostPinnedMemPool> pool, std::size_t n_elements,\n    curt::StreamRef stream) {\n  auto resource = std::make_shared<common::HostPinnedMemPoolResource>(\n      std::move(pool), n_elements * sizeof(T), stream);\n  auto ref = RefResourceView{resource->DataAs<T>(), n_elements, resource};\n  return ref;\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "src/common/ref_resource_view.h",
    "content": "/**\n * Copyright 2023-2025, XGBoost Contributors\n */\n#ifndef XGBOOST_COMMON_REF_RESOURCE_VIEW_H_\n#define XGBOOST_COMMON_REF_RESOURCE_VIEW_H_\n\n#include <algorithm>    // for fill_n\n#include <cstdint>      // for uint64_t\n#include <cstring>      // for memcpy\n#include <memory>       // for shared_ptr, make_shared\n#include <type_traits>  // for is_reference_v, remove_reference_t, is_same_v\n#include <utility>      // for swap, move\n\n#include \"io.h\"               // for ResourceHandler, AlignedResourceReadStream, MallocResource\n#include \"threading_utils.h\"  // for ParallelForBlock\n#include \"xgboost/context.h\"  // for Context\n#include \"xgboost/logging.h\"\n#include \"xgboost/span.h\"  // for Span\n\nnamespace xgboost::common {\n/**\n * @brief A vector-like type that holds a reference counted resource.\n *\n *    The vector size is immutable after construction. This way we can swap the underlying\n *    resource when needed.\n */\ntemplate <typename T>\nclass RefResourceView {\n  static_assert(!std::is_reference_v<T>);\n\n public:\n  using value_type = T;             // NOLINT\n  using size_type = std::uint64_t;  // NOLINT\n\n private:\n  value_type* ptr_{nullptr};\n  size_type size_{0};\n  std::shared_ptr<common::ResourceHandler> mem_{nullptr};\n\n protected:\n  void Init(value_type* ptr, size_type size, std::shared_ptr<common::ResourceHandler> mem) {\n    ptr_ = ptr;\n    size_ = size;\n    mem_ = std::move(mem);\n  }\n\n public:\n  /**\n   * @brief Construct a view on ptr with length n. The ptr is held by the mem resource.\n   *\n   * @param ptr  The pointer to view.\n   * @param n    The length of the view.\n   * @param mem  The owner of the pointer.\n   */\n  RefResourceView(value_type* ptr, size_type n, std::shared_ptr<common::ResourceHandler> mem)\n      : ptr_{ptr}, size_{n}, mem_{std::move(mem)} {\n    CHECK_GE(mem_->Size(), n);\n  }\n\n  ~RefResourceView() = default;\n\n  RefResourceView() = default;\n  RefResourceView(RefResourceView const& that) = delete;\n  RefResourceView& operator=(RefResourceView const& that) = delete;\n  /**\n   * @brief We allow move assignment for lazy initialization.\n   */\n  RefResourceView(RefResourceView&& that) = default;\n  RefResourceView& operator=(RefResourceView&& that) = default;\n\n  [[nodiscard]] size_type size() const { return size_; }  // NOLINT\n  [[nodiscard]] size_type size_bytes() const {            // NOLINT\n    return Span<const value_type>{data(), static_cast<std::size_t>(size())}.size_bytes();\n  }\n  [[nodiscard]] value_type* data() { return ptr_; };              // NOLINT\n  [[nodiscard]] value_type const* data() const { return ptr_; };  // NOLINT\n  [[nodiscard]] bool empty() const { return size() == 0; }        // NOLINT\n\n  [[nodiscard]] auto cbegin() const { return data(); }         // NOLINT\n  [[nodiscard]] auto begin() { return data(); }                // NOLINT\n  [[nodiscard]] auto begin() const { return cbegin(); }        // NOLINT\n  [[nodiscard]] auto cend() const { return data() + size(); }  // NOLINT\n  [[nodiscard]] auto end() { return data() + size(); }         // NOLINT\n  [[nodiscard]] auto end() const { return cend(); }            // NOLINT\n\n  [[nodiscard]] auto const& front() const { return data()[0]; }          // NOLINT\n  [[nodiscard]] auto& front() { return data()[0]; }                      // NOLINT\n  [[nodiscard]] auto const& back() const { return data()[size() - 1]; }  // NOLINT\n  [[nodiscard]] auto& back() { return data()[size() - 1]; }              // NOLINT\n\n  [[nodiscard]] value_type& operator[](size_type i) { return ptr_[i]; }\n  [[nodiscard]] value_type const& operator[](size_type i) const { return ptr_[i]; }\n  [[nodiscard]] value_type& at(size_type i) {  // NOLINT\n    SPAN_LT(i, this->size_);\n    return ptr_[i];\n  }\n  [[nodiscard]] value_type const& at(size_type i) const {  // NOLINT\n    SPAN_LT(i, this->size_);\n    return ptr_[i];\n  }\n  [[nodiscard]] Span<std::add_const_t<T>> ToSpan() const { return {this->data(), this->size()}; }\n  [[nodiscard]] Span<T> ToSpan() { return {this->data(), this->size()}; }\n\n  /**\n   * @brief Get the underlying resource.\n   */\n  auto Resource() const { return mem_; }\n};\n\n/**\n * @brief Read a vector from stream. Accepts both `std::vector` and `RefResourceView`.\n *\n *  If the output vector is a referenced counted view, no copying occur.\n */\ntemplate <typename Vec>\n[[nodiscard]] bool ReadVec(common::AlignedResourceReadStream* fi, Vec* vec) {\n  std::uint64_t n{0};\n  if (!fi->Read(&n)) {\n    return false;\n  }\n  if (n == 0) {\n    return true;\n  }\n\n  using T = typename Vec::value_type;\n  auto expected_bytes = sizeof(T) * n;\n\n  auto [ptr, n_bytes] = fi->Consume(expected_bytes);\n  if (n_bytes != expected_bytes) {\n    return false;\n  }\n\n  if constexpr (std::is_same_v<Vec, RefResourceView<T>>) {\n    *vec = RefResourceView<T>{reinterpret_cast<T*>(ptr), n, fi->Share()};\n  } else {\n    vec->resize(n);\n    std::memcpy(vec->data(), ptr, n_bytes);\n  }\n  return true;\n}\n\n/**\n * @brief Write a vector to stream. Accepts both `std::vector` and `RefResourceView`.\n */\ntemplate <typename Vec>\n[[nodiscard]] std::size_t WriteVec(AlignedFileWriteStream* fo, Vec const& vec) {\n  std::size_t bytes{0};\n  auto n = static_cast<std::uint64_t>(vec.size());\n  bytes += fo->Write(n);\n  if (n == 0) {\n    return sizeof(n);\n  }\n\n  using T = typename std::remove_reference_t<decltype(vec)>::value_type;\n  bytes += fo->Write(vec.data(), vec.size() * sizeof(T));\n\n  return bytes;\n}\n\n/**\n * @brief Make a fixed size `RefResourceView` with malloc resource.\n */\ntemplate <typename T>\n[[nodiscard]] RefResourceView<T> MakeFixedVecWithMalloc(std::size_t n_elements, T const& init) {\n  auto resource = std::make_shared<common::MallocResource>(n_elements * sizeof(T));\n  auto ref = RefResourceView{resource->DataAs<T>(), n_elements, resource};\n  std::fill_n(ref.data(), ref.size(), init);\n  return ref;\n}\n\n/**\n * @brief Make a fixed size `RefResourceView` with malloc resource.\n *\n * Use n_threads to initialize the storage\n */\ntemplate <typename T>\n[[nodiscard]] RefResourceView<T> MakeFixedVecWithMalloc(Context const* ctx, std::size_t n_elements,\n                                                        T const& init) {\n  auto resource = std::make_shared<common::MallocResource>(n_elements * sizeof(T));\n  auto ref = RefResourceView{resource->DataAs<T>(), n_elements, resource};\n  common::ParallelForBlock(n_elements, ctx->Threads(), [&](auto&& block) {\n    std::fill_n(ref.data() + block.begin(), block.Size(), init);\n  });\n  return ref;\n}\n\ntemplate <typename T>\nclass ReallocVector : public RefResourceView<T> {\n  static_assert(!std::is_reference_v<T>);\n  static_assert(!std::is_const_v<T>);\n  static_assert(std::is_trivially_copyable_v<T>);\n\n  using Upper = RefResourceView<T>;\n  using size_type = typename Upper::size_type;    // NOLINT\n  using value_type = typename Upper::value_type;  // NOLINT\n\n public:\n  ReallocVector() : RefResourceView<T>{MakeFixedVecWithMalloc(0, T{})} {}\n\n  ReallocVector(size_type n, value_type const& init)\n      : RefResourceView<T>{MakeFixedVecWithMalloc(n, init)} {}\n  ReallocVector(ReallocVector const& that) = delete;\n  ReallocVector(ReallocVector&& that) = delete;\n  ReallocVector& operator=(ReallocVector const& that) = delete;\n  ReallocVector& operator=(ReallocVector&& that) = delete;\n\n  void Resize(typename Upper::size_type new_size) {\n    auto resource = std::dynamic_pointer_cast<common::MallocResource>(this->Resource());\n    CHECK(resource);\n    resource->Resize(new_size * sizeof(T));\n    this->Init(resource->template DataAs<T>(), new_size, resource);\n  }\n};\n}  // namespace xgboost::common\n#endif  // XGBOOST_COMMON_REF_RESOURCE_VIEW_H_\n"
  },
  {
    "path": "src/common/resource.cu",
    "content": "/**\n * Copyright 2024-2025, XGBoost Contributors\n */\n#include \"cuda_stream.h\"       // for DefaultStream\n#include \"device_helpers.cuh\"  // for CurrentDevice\n#include \"resource.cuh\"\n#include \"xgboost/string_view.h\"  // for StringView\n\nnamespace xgboost::common {\nCudaMmapResource::CudaMmapResource(StringView path, std::size_t offset, std::size_t length)\n    : ResourceHandler{kCudaMmap},\n      handle_{detail::OpenMmap(std::string{path}, offset, length),\n              [](MMAPFile* handle) {\n                // Don't close the mmap while CUDA kernel is running.\n                if (handle) {\n                  curt::DefaultStream().Sync();\n                }\n                detail::CloseMmap(handle);\n              }},\n      n_{length} {\n  auto device = dh::CurrentDevice();\n  auto ptr = handle_->BasePtr();\n#if (CUDA_VERSION / 1000) >= 13\n  cudaMemLocation loc;\n  loc.type = cudaMemLocationTypeDevice;\n  loc.id = device;\n#else\n  auto loc = device;\n#endif  // (CUDA_VERSION / 1000) >= 13\n  dh::safe_cuda(cudaMemAdvise(ptr.data(), ptr.size(), cudaMemAdviseSetReadMostly, loc));\n  dh::safe_cuda(cudaMemAdvise(ptr.data(), ptr.size(), cudaMemAdviseSetPreferredLocation, loc));\n  dh::safe_cuda(cudaMemAdvise(ptr.data(), ptr.size(), cudaMemAdviseSetAccessedBy, loc));\n#if (CUDA_VERSION / 1000) >= 13\n  dh::safe_cuda(cudaMemPrefetchAsync(ptr.data(), ptr.size(), loc, 0, curt::DefaultStream()));\n#else\n  dh::safe_cuda(cudaMemPrefetchAsync(ptr.data(), ptr.size(), device, curt::DefaultStream()));\n#endif  // (CUDA_VERSION / 1000) >= 13\n}\n\n[[nodiscard]] void* CudaMmapResource::Data() {\n  if (!handle_) {\n    return nullptr;\n  }\n  return this->handle_->Data();\n}\n\n[[nodiscard]] std::size_t CudaMmapResource::Size() const { return n_; }\n\nCudaMmapResource::~CudaMmapResource() noexcept(false) = default;\n\nPrivateCudaMmapConstStream::~PrivateCudaMmapConstStream() noexcept(false) = default;\n}  // namespace xgboost::common\n"
  },
  {
    "path": "src/common/resource.cuh",
    "content": "/**\n * Copyright 2024-2025, XGBoost Contributors\n */\n#pragma once\n#include <cstddef>     // for size_t\n#include <functional>  // for function\n#include <utility>     // for move\n\n#include \"cuda_pinned_allocator.h\"  // for SamAllocator, HostPinnedMemPool\n#include \"cuda_stream.h\"            // for StreamRef\n#include \"device_vector.cuh\"        // for DeviceUVector, GrowOnlyVirtualMemVec\n#include \"io.h\"                     // for ResourceHandler, MMAPFile\n#include \"xgboost/string_view.h\"    // for StringView\n\nnamespace xgboost::common {\n/**\n * @brief Resource backed by `cudaMalloc`.\n */\nclass CudaMallocResource : public ResourceHandler {\n  dh::DeviceUVector<std::byte> storage_;\n\n  void Clear() noexcept(true) { this->Resize(0); }\n\n public:\n  explicit CudaMallocResource(std::size_t n_bytes) : ResourceHandler{kCudaMalloc} {\n    this->Resize(n_bytes);\n  }\n  ~CudaMallocResource() noexcept(true) override { this->Clear(); }\n\n  [[nodiscard]] void* Data() override { return storage_.data(); }\n  [[nodiscard]] std::size_t Size() const override { return storage_.size(); }\n  void Resize(std::size_t n_bytes) { this->storage_.resize(n_bytes); }\n};\n\n/**\n * @brief Device resource that only grows in size.\n */\nclass CudaGrowOnlyResource : public ResourceHandler {\n  static auto MakeNew() {\n    return std::make_unique<dh::detail::GrowOnlyVirtualMemVec>(CU_MEM_LOCATION_TYPE_DEVICE);\n  }\n\n  std::unique_ptr<dh::detail::GrowOnlyVirtualMemVec> alloc_;\n  std::size_t n_bytes_{0};\n\n public:\n  explicit CudaGrowOnlyResource(std::size_t n_bytes)\n      : ResourceHandler{kCudaGrowOnly}, alloc_{MakeNew()} {\n    this->Resize(n_bytes);\n  }\n  void Resize(std::size_t n_bytes) {\n    this->alloc_->GrowTo(n_bytes);\n    this->n_bytes_ = n_bytes;\n  }\n  void Clear() {\n    this->alloc_.reset();\n    this->alloc_ = MakeNew();\n    this->n_bytes_ = 0;\n  }\n  [[nodiscard]] void* Data() final { return this->alloc_->data(); }\n  [[nodiscard]] std::size_t Size() const final { return this->n_bytes_; }\n};\n\nclass CudaPinnedResource : public ResourceHandler {\n  std::vector<std::byte, cuda_impl::SamAllocator<std::byte>> storage_;\n\n  void Clear() noexcept(true) { this->Resize(0); }\n\n public:\n  explicit CudaPinnedResource(std::size_t n_bytes) : ResourceHandler{kCudaHostCache} {\n    this->Resize(n_bytes);\n  }\n  ~CudaPinnedResource() noexcept(true) override { this->Clear(); }\n\n  [[nodiscard]] void* Data() override { return storage_.data(); }\n  [[nodiscard]] std::size_t Size() const override { return storage_.size(); }\n  void Resize(std::size_t n_bytes) { this->storage_.resize(n_bytes); }\n};\n\n/**\n * @brief Resource for fixed-size memory allocated by @ref HostPinnedMemPool.\n *\n * This container shares the pool but owns the memory.\n */\nclass HostPinnedMemPoolResource : public ResourceHandler {\n  std::shared_ptr<cuda_impl::HostPinnedMemPool> pool_;\n  std::size_t n_bytes_;\n  curt::StreamRef stream_;\n  void* ptr_;\n\n public:\n  explicit HostPinnedMemPoolResource(std::shared_ptr<cuda_impl::HostPinnedMemPool> pool,\n                                     std::size_t n_bytes, curt::StreamRef stream)\n      : ResourceHandler{kCudaPinnedMemPool},\n        pool_{std::move(pool)},\n        n_bytes_{n_bytes},\n        stream_{stream},\n        ptr_{this->pool_->AllocateAsync(n_bytes, stream)} {}\n  ~HostPinnedMemPoolResource() override { this->pool_->DeallocateAsync(this->ptr_, this->stream_); }\n  [[nodiscard]] std::size_t Size() const override { return this->n_bytes_; }\n  [[nodiscard]] void* Data() override { return this->ptr_; }\n};\n\nclass CudaMmapResource : public ResourceHandler {\n  std::unique_ptr<MMAPFile, std::function<void(MMAPFile*)>> handle_;\n  std::size_t n_;\n\n public:\n  CudaMmapResource() : ResourceHandler{kCudaMmap} {}\n  CudaMmapResource(StringView path, std::size_t offset, std::size_t length);\n  ~CudaMmapResource() noexcept(false) override;\n\n  [[nodiscard]] void* Data() override;\n  [[nodiscard]] std::size_t Size() const override;\n};\n\nclass PrivateCudaMmapConstStream : public AlignedResourceReadStream {\n public:\n  explicit PrivateCudaMmapConstStream(StringView path, std::size_t offset, std::size_t length)\n      : AlignedResourceReadStream{\n            std::shared_ptr<CudaMmapResource>{new CudaMmapResource{path, offset, length}}} {}\n  ~PrivateCudaMmapConstStream() noexcept(false) override;\n};\n}  // namespace xgboost::common\n"
  },
  {
    "path": "src/common/row_set.h",
    "content": "/**\n * Copyright 2017-2024, XGBoost Contributors\n * \\file row_set.h\n * \\brief Quick Utility to compute subset of rows\n * \\author Philip Cho, Tianqi Chen\n */\n#ifndef XGBOOST_COMMON_ROW_SET_H_\n#define XGBOOST_COMMON_ROW_SET_H_\n\n#include <cstddef>   // for size_t\n#include <iterator>  // for distance\n#include <vector>    // for vector\n\n#include \"xgboost/base.h\"     // for bst_node_t\n#include \"xgboost/logging.h\"  // for CHECK\n\nnamespace xgboost::common {\n/**\n * @brief Collection of rows for each tree node.\n */\nclass RowSetCollection {\n public:\n  RowSetCollection() = default;\n  RowSetCollection(RowSetCollection const&) = delete;\n  RowSetCollection(RowSetCollection&&) = default;\n  RowSetCollection& operator=(RowSetCollection const&) = delete;\n  RowSetCollection& operator=(RowSetCollection&&) = default;\n\n  /**\n   * @brief data structure to store an instance set, a subset of rows (instances)\n   *        associated with a particular node in a decision tree.\n   */\n  struct Elem {\n   private:\n    bst_idx_t* begin_{nullptr};\n    bst_idx_t* end_{nullptr};\n\n   public:\n    bst_node_t node_id{-1};\n    // id of node associated with this instance set; -1 means uninitialized\n    Elem() = default;\n    Elem(bst_idx_t* begin, bst_idx_t* end, bst_node_t node_id = -1)\n        : begin_(begin), end_(end), node_id(node_id) {}\n\n    // Disable copy ctor to avoid casting away the constness via copy.\n    Elem(Elem const& that) = delete;\n    Elem& operator=(Elem const& that) = delete;\n    Elem(Elem&& that) = default;\n    Elem& operator=(Elem&& that) = default;\n\n    [[nodiscard]] std::size_t Size() const { return std::distance(begin(), end()); }\n\n    [[nodiscard]] bst_idx_t const* begin() const { return this->begin_; }  // NOLINT\n    [[nodiscard]] bst_idx_t const* end() const { return this->end_; }      // NOLINT\n    [[nodiscard]] bst_idx_t* begin() { return this->begin_; }              // NOLINT\n    [[nodiscard]] bst_idx_t* end() { return this->end_; }                  // NOLINT\n  };\n\n  [[nodiscard]] std::vector<Elem>::const_iterator begin() const {  // NOLINT\n    return elem_of_each_node_.cbegin();\n  }\n  [[nodiscard]] std::vector<Elem>::const_iterator end() const {  // NOLINT\n    return elem_of_each_node_.cend();\n  }\n\n  [[nodiscard]] std::size_t Size() const { return std::distance(begin(), end()); }\n\n  /** @brief return corresponding element set given the node_id */\n  [[nodiscard]] Elem const& operator[](bst_node_t node_id) const {\n    Elem const& e = elem_of_each_node_[node_id];\n    return e;\n  }\n  /** @brief return corresponding element set given the node_id */\n  [[nodiscard]] Elem& operator[](bst_node_t node_id) {\n    Elem& e = elem_of_each_node_[node_id];\n    return e;\n  }\n\n  // clear up things\n  void Clear() {\n    elem_of_each_node_.clear();\n  }\n  // initialize node id 0->everything\n  void Init() {\n    CHECK(elem_of_each_node_.empty());\n\n    if (row_indices_.empty()) {  // edge case: empty instance set\n      constexpr bst_idx_t* kBegin = nullptr;\n      constexpr bst_idx_t* kEnd = nullptr;\n      static_assert(kEnd - kBegin == 0);\n      elem_of_each_node_.emplace_back(kBegin, kEnd, 0);\n      return;\n    }\n\n    bst_idx_t* begin = row_indices_.data();\n    bst_idx_t* end = row_indices_.data() + row_indices_.size();\n    elem_of_each_node_.emplace_back(begin, end, 0);\n  }\n\n  [[nodiscard]] std::vector<bst_idx_t>* Data() { return &row_indices_; }\n  [[nodiscard]] std::vector<bst_idx_t> const* Data() const { return &row_indices_; }\n\n  // split rowset into two\n  void AddSplit(bst_node_t node_id, bst_node_t left_node_id, bst_node_t right_node_id,\n                bst_idx_t n_left, bst_idx_t n_right) {\n    Elem& e = elem_of_each_node_[node_id];\n\n    bst_idx_t* all_begin{nullptr};\n    bst_idx_t* begin{nullptr};\n    bst_idx_t* end{nullptr};\n    if (e.begin() == nullptr) {\n      CHECK_EQ(n_left, 0);\n      CHECK_EQ(n_right, 0);\n    } else {\n      all_begin = row_indices_.data();\n      begin = all_begin + (e.begin() - all_begin);\n      end = elem_of_each_node_[node_id].end();\n    }\n\n    CHECK_EQ(n_left + n_right, e.Size());\n    CHECK_LE(begin + n_left, e.end());\n    CHECK_EQ(begin + n_left + n_right, e.end());\n\n    if (left_node_id >= static_cast<bst_node_t>(elem_of_each_node_.size())) {\n      elem_of_each_node_.resize(left_node_id + 1);\n    }\n    if (right_node_id >= static_cast<bst_node_t>(elem_of_each_node_.size())) {\n      elem_of_each_node_.resize(right_node_id + 1);\n    }\n\n    elem_of_each_node_[left_node_id] = Elem{begin, begin + n_left, left_node_id};\n    elem_of_each_node_[right_node_id] = Elem{begin + n_left, end, right_node_id};\n    elem_of_each_node_[node_id] = Elem{nullptr, nullptr, -1};\n  }\n\n private:\n  // stores the row indexes in the set\n  std::vector<bst_idx_t> row_indices_;\n  // vector: node_id -> elements\n  std::vector<Elem> elem_of_each_node_;\n};\n}  // namespace xgboost::common\n\n#endif  // XGBOOST_COMMON_ROW_SET_H_\n"
  },
  {
    "path": "src/common/stats.cc",
    "content": "/**\n * Copyright 2022-2024, XGBoost Contributors\n */\n#include \"stats.h\"\n\n#include <cstddef>  // std::size_t\n#include <numeric>  // std::accumulate\n\n#include \"../collective/aggregator.h\"    // for GlobalSum\n#include \"linalg_op.h\"                   // for Matrix\n#include \"optional_weight.h\"             // OptionalWeights\n#include \"threading_utils.h\"             // ParallelFor, MemStackAllocator\n#include \"transform_iterator.h\"          // MakeIndexTransformIter\n#include \"xgboost/context.h\"             // Context\n#include \"xgboost/host_device_vector.h\"  // HostDeviceVector\n#include \"xgboost/linalg.h\"              // Tensor, UnravelIndex, Apply\n#include \"xgboost/logging.h\"             // CHECK_EQ\n\nnamespace xgboost::common {\nvoid Median(Context const* ctx, linalg::Matrix<float> const& t,\n            HostDeviceVector<float> const& weights, linalg::Tensor<float, 1>* out) {\n  if (ctx->IsCUDA()) {\n    weights.SetDevice(ctx->Device());\n    auto opt_weights = OptionalWeights(weights.ConstDeviceSpan());\n    auto t_v = t.View(ctx->Device());\n    cuda_impl::Median(ctx, t_v, opt_weights, out);\n  }\n\n  auto opt_weights = OptionalWeights(weights.ConstHostSpan());\n  auto t_v = t.HostView();\n  out->Reshape(t.Shape(1));\n  auto h_out = out->HostView();\n  for (std::size_t i{0}; i < t.Shape(1); ++i) {\n    auto ti_v = t_v.Slice(linalg::All(), i);\n    auto iter = linalg::cbegin(ti_v);\n    float q{0};\n    if (opt_weights.Empty()) {\n      q = common::Quantile(ctx, 0.5, iter, iter + ti_v.Size());\n    } else {\n      CHECK_NE(t_v.Shape(1), 0);\n      auto w_it = common::MakeIndexTransformIter([&](std::size_t i) { return opt_weights[i]; });\n      q = common::WeightedQuantile(ctx, 0.5, iter, iter + ti_v.Size(), w_it);\n    }\n    h_out(i) = q;\n  }\n}\n\nvoid Mean(Context const* ctx, linalg::VectorView<float const> v, linalg::Vector<float>* out) {\n  out->SetDevice(ctx->Device());\n  out->Reshape(1);\n\n  if (ctx->IsCUDA()) {\n    cuda_impl::Mean(ctx, v, out->View(ctx->Device()));\n  } else if (ctx->IsSycl()) {\n    sycl_impl::Mean(ctx, v, out->View(ctx->Device()));\n  } else {\n    auto h_v = v;\n    float n = v.Size();\n    MemStackAllocator<float, DefaultMaxThreads()> tloc(ctx->Threads(), 0.0f);\n    ParallelFor(v.Size(), ctx->Threads(),\n                [&](auto i) { tloc[omp_get_thread_num()] += h_v(i) / n; });\n    auto ret = std::accumulate(tloc.cbegin(), tloc.cend(), .0f);\n    out->HostView()(0) = ret;\n  }\n}\n\nvoid SampleMean(Context const* ctx, bool is_column_split, linalg::Matrix<float> const& v,\n                linalg::Vector<float>* out) {\n  *out = linalg::Zeros<float>(ctx, std::max(v.Shape(1), decltype(v.Shape(1)){1}));\n  if (!ctx->IsCUDA()) {\n    auto h_v = v.HostView();\n    CHECK(h_v.CContiguous());\n    std::int64_t n_samples = v.Shape(0);\n    SafeColl(collective::GlobalSum(ctx, is_column_split, linalg::MakeVec(&n_samples, 1)));\n    auto n_columns = v.Shape(1);\n    auto h_out = out->HostView();\n\n    auto n_rows_f64 = static_cast<double>(n_samples);\n    for (std::size_t j = 0; j < n_columns; ++j) {\n      MemStackAllocator<double, DefaultMaxThreads()> mean_tloc(ctx->Threads(), 0.0);\n      ParallelFor(v.Shape(0), ctx->Threads(),\n                  [&](auto i) { mean_tloc[omp_get_thread_num()] += (h_v(i, j) / n_rows_f64); });\n      auto mean = std::accumulate(mean_tloc.cbegin(), mean_tloc.cend(), 0.0);\n      h_out(j) = mean;\n    }\n    SafeColl(collective::GlobalSum(ctx, is_column_split, h_out));\n  } else {\n    auto d_v = v.View(ctx->Device());\n    auto d_out = out->View(ctx->Device());\n    cuda_impl::SampleMean(ctx, is_column_split, d_v, d_out);\n  }\n}\n\nvoid WeightedSampleMean(Context const* ctx, bool is_column_split, linalg::Matrix<float> const& v,\n                        HostDeviceVector<float> const& w, linalg::Vector<float>* out) {\n  *out = linalg::Zeros<float>(ctx, std::max(v.Shape(1), decltype(v.Shape(1)){1}));\n  CHECK_EQ(v.Shape(0), w.Size());\n  if (!ctx->IsCUDA()) {\n    auto h_v = v.HostView();\n    auto h_w = w.ConstHostSpan();\n    auto sum_w = std::accumulate(h_w.data(), h_w.data() + h_w.size(), 0.0);\n    SafeColl(collective::GlobalSum(ctx, is_column_split, linalg::MakeVec(&sum_w, 1)));\n    auto h_out = out->HostView();\n    for (std::size_t j = 0; j < v.Shape(1); ++j) {\n      MemStackAllocator<double, DefaultMaxThreads()> mean_tloc(ctx->Threads(), 0.0);\n      ParallelFor(v.Shape(0), ctx->Threads(),\n                  [&](auto i) { mean_tloc[omp_get_thread_num()] += (h_v(i, j) / sum_w * h_w(i)); });\n      auto mean = std::accumulate(mean_tloc.cbegin(), mean_tloc.cend(), 0.0);\n      h_out(j) = mean;\n    }\n    SafeColl(collective::GlobalSum(ctx, is_column_split, h_out));\n  } else {\n    auto d_v = v.View(ctx->Device());\n    w.SetDevice(ctx->Device());\n    auto d_w = w.ConstDeviceSpan();\n    auto d_out = out->View(ctx->Device());\n    cuda_impl::WeightedSampleMean(ctx, is_column_split, d_v, d_w, d_out);\n  }\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "src/common/stats.cu",
    "content": "/**\n * Copyright 2022-2025, XGBoost Contributors\n */\n\n#include <thrust/iterator/counting_iterator.h>  // thrust::make_counting_iterator\n\n#include <cstddef>  // size_t\n#include <tuple>    // for apply\n\n#include \"../collective/aggregator.h\"  // for GlobalSum\n#include \"cuda_context.cuh\"            // CUDAContext\n#include \"device_helpers.cuh\"          // dh::MakeTransformIterator, tcbegin, tcend\n#include \"optional_weight.h\"           // common::OptionalWeights\n#include \"stats.cuh\"          // common::SegmentedQuantile, common::SegmentedWeightedQuantile\n#include \"xgboost/base.h\"     // for XGBOOST_DEVICE\n#include \"xgboost/context.h\"  // for Context\n#include \"xgboost/host_device_vector.h\"  // for HostDeviceVector\n#include \"xgboost/linalg.h\"              // for TensorView, UnravelIndex, Apply\n\nnamespace xgboost::common::cuda_impl {\nvoid Median(Context const* ctx, linalg::TensorView<float const, 2> t,\n            common::OptionalWeights weights, linalg::Tensor<float, 1>* out) {\n  CHECK_GE(t.Shape(1), 1);\n  HostDeviceVector<std::size_t> segments(t.Shape(1) + 1, 0);\n  segments.SetDevice(ctx->Device());\n  auto d_segments = segments.DeviceSpan();\n  dh::LaunchN(d_segments.size(), ctx->CUDACtx()->Stream(),\n              [=] XGBOOST_DEVICE(std::size_t i) { d_segments[i] = t.Shape(0) * i; });\n  auto val_it = dh::MakeTransformIterator<float>(\n      thrust::make_counting_iterator(0ul),\n      [=] XGBOOST_DEVICE(size_t i) { return std::apply(t, linalg::UnravelIndex(i, t.Shape())); });\n\n  out->SetDevice(ctx->Device());\n  out->Reshape(t.Shape(1));\n  if (weights.Empty()) {\n    common::SegmentedQuantile(ctx, 0.5, dh::tcbegin(d_segments), dh::tcend(d_segments), val_it,\n                              val_it + t.Size(), out->Data());\n  } else {\n    CHECK_NE(t.Shape(1), 0);\n    auto w_it = dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),\n                                                 [=] XGBOOST_DEVICE(std::size_t i) {\n                                                   auto sample_idx = i / t.Shape(1);\n                                                   return weights[sample_idx];\n                                                 });\n    common::SegmentedWeightedQuantile(ctx, 0.5, dh::tcbegin(d_segments), dh::tcend(d_segments),\n                                      val_it, val_it + t.Size(), w_it, w_it + t.Size(),\n                                      out->Data());\n  }\n}\n\nvoid Mean(Context const* ctx, linalg::VectorView<float const> v, linalg::VectorView<float> out) {\n  float n = v.Size();\n  auto it = dh::MakeTransformIterator<float>(\n      thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) { return v(i) / n; });\n  std::size_t bytes;\n  CHECK_EQ(out.Size(), 1);\n  auto s = ctx->CUDACtx()->Stream();\n  dh::safe_cuda(cub::DeviceReduce::Sum(nullptr, bytes, it, out.Values().data(), v.Size(), s));\n  dh::TemporaryArray<char> temp{bytes};\n  dh::safe_cuda(\n      cub::DeviceReduce::Sum(temp.data().get(), bytes, it, out.Values().data(), v.Size(), s));\n}\n\nvoid SampleMean(Context const* ctx, bool is_column_split, linalg::MatrixView<float const> d_v,\n                linalg::VectorView<float> d_out) {\n  auto n_samples = d_v.Shape(0);\n  auto n_total_samples = n_samples;\n  auto cpu = ctx->MakeCPU();\n  SafeColl(collective::GlobalSum(&cpu, is_column_split, linalg::MakeVec(&n_total_samples, 1)));\n  auto column_it = dh::MakeTransformIterator<std::size_t>(thrust::make_counting_iterator(0ul),\n                                                          [=] XGBOOST_DEVICE(std::size_t i) {\n                                                            auto cidx = i / n_samples;\n                                                            return cidx;\n                                                          });\n  auto n_rows_f64 = static_cast<double>(n_total_samples);\n  auto val_it = dh::MakeTransformIterator<double>(thrust::make_counting_iterator(0ul),\n                                                  [=] XGBOOST_DEVICE(std::size_t i) -> double {\n                                                    auto cidx = i / n_samples;\n                                                    auto ridx = i % n_samples;\n                                                    return d_v(ridx, cidx) / n_rows_f64;\n                                                  });\n  auto cuctx = ctx->CUDACtx();\n  thrust::reduce_by_key(cuctx->CTP(), column_it, column_it + d_v.Size(), val_it,\n                        thrust::make_discard_iterator(), d_out.Values().data(),\n                        thrust::equal_to<>{}, thrust::plus<double>{});\n  SafeColl(collective::GlobalSum(ctx, is_column_split, d_out));\n}\n\nvoid WeightedSampleMean(Context const* ctx, bool is_column_split,\n                        linalg::MatrixView<float const> d_v, common::Span<float const> d_w,\n                        linalg::VectorView<float> d_out) {\n  CHECK(d_v.CContiguous());\n  auto n_rows = d_v.Shape(0);\n  // The use of `cidx = i / n_rows` does not imply the input is column-major, it simply\n  // states the order of the reduction operator, and we want to reduce over the first\n  // dimension (rows). `thrust::reduce_by_key` requires all keys within the same reduction\n  // segment to be next to each other. `array(ridx, cidx)` can be used with any memory\n  // layout.\n  auto column_it = dh::MakeTransformIterator<std::size_t>(thrust::make_counting_iterator(0ul),\n                                                          [=] XGBOOST_DEVICE(std::size_t i) {\n                                                            auto cidx = i / n_rows;\n                                                            return cidx;\n                                                          });\n  auto cuctx = ctx->CUDACtx();\n  auto sum_w =\n      dh::Reduce(cuctx->CTP(), d_w.data(), d_w.data() + d_w.size(), 0.0, thrust::plus<double>{});\n  auto cpu = ctx->MakeCPU();\n  SafeColl(collective::GlobalSum(&cpu, is_column_split, linalg::MakeVec(&sum_w, 1)));\n  auto val_it = dh::MakeTransformIterator<double>(thrust::make_counting_iterator(0ul),\n                                                  [=] XGBOOST_DEVICE(std::size_t i) -> double {\n                                                    auto cidx = i / n_rows;\n                                                    auto ridx = i % n_rows;\n                                                    return d_v(ridx, cidx) / sum_w * d_w(ridx);\n                                                  });\n  thrust::reduce_by_key(cuctx->CTP(), column_it, column_it + d_v.Size(), val_it,\n                        thrust::make_discard_iterator(), d_out.Values().data(),\n                        thrust::equal_to<>{}, thrust::plus<double>{});\n  SafeColl(collective::GlobalSum(ctx, is_column_split, d_out));\n}\n}  // namespace xgboost::common::cuda_impl\n"
  },
  {
    "path": "src/common/stats.cuh",
    "content": "/**\n * Copyright 2022-2026, XGBoost Contributors\n */\n#pragma once\n\n#include <thrust/binary_search.h>                  // for lower_bound\n#include <thrust/for_each.h>                       // for for_each_n\n#include <thrust/iterator/constant_iterator.h>     // for make_constant_iterator\n#include <thrust/iterator/counting_iterator.h>     // for make_counting_iterator\n#include <thrust/iterator/permutation_iterator.h>  // for make_permutation_iterator\n#include <thrust/scan.h>                           // for inclusive_scan_by_key\n\n#include <algorithm>    // for min\n#include <cstddef>      // for size_t\n#include <iterator>     // for distance\n#include <limits>       // for numeric_limits\n#include <type_traits>  // for is_floating_point_v,iterator_traits\n\n#include \"algorithm.cuh\"       // for SegmentedArgMergeSort\n#include \"cuda_context.cuh\"    // for CUDAContext\n#include \"device_helpers.cuh\"  // for SegmentId\n#include \"device_vector.cuh\"   // for device_vector\n#include \"xgboost/context.h\"   // for Context\n#include \"xgboost/linalg.h\"    // for UnravelIndex\n#include \"xgboost/span.h\"      // for Span\n\nnamespace xgboost::common {\nnamespace detail {\n// This should be a lambda function, but for some reason gcc-11 + nvcc-11.8 failed to\n// compile it. As a result, a functor is extracted instead.\n//\n// error: ‘__T288’ was not declared in this scope\ntemplate <typename SegIt, typename ValIt, typename AlphaIt>\nstruct QuantileSegmentOp {\n  SegIt seg_begin;\n  ValIt val;\n  AlphaIt alpha_it;\n  linalg::VectorView<float> d_results;\n\n  static_assert(std::is_floating_point_v<typename std::iterator_traits<ValIt>::value_type>,\n                \"Invalid value for quantile.\");\n  static_assert(std::is_floating_point_v<typename std::iterator_traits<ValIt>::value_type>,\n                \"Invalid alpha.\");\n\n  XGBOOST_DEVICE void operator()(std::size_t seg_idx) {\n    std::size_t begin = seg_begin[seg_idx];\n    auto n = static_cast<double>(seg_begin[seg_idx + 1] - begin);\n    double a = alpha_it[seg_idx];\n\n    if (n == 0) {\n      d_results(seg_idx) = std::numeric_limits<float>::quiet_NaN();\n      return;\n    }\n\n    if (a <= (1 / (n + 1))) {\n      d_results(seg_idx) = val[begin];\n      return;\n    }\n    if (a >= (n / (n + 1))) {\n      d_results(seg_idx) = val[common::LastOf(seg_idx, seg_begin)];\n      return;\n    }\n\n    double x = a * static_cast<double>(n + 1);\n    double k = std::floor(x) - 1;\n    double d = (x - 1) - k;\n\n    auto v0 = val[begin + static_cast<std::size_t>(k)];\n    auto v1 = val[begin + static_cast<std::size_t>(k) + 1];\n\n    d_results(seg_idx) = v0 + d * (v1 - v0);\n  }\n};\n\ntemplate <typename SegIt, typename ValIt, typename AlphaIt>\nXGBOOST_DEVICE auto MakeQSegOp(SegIt seg_it, ValIt val_it, AlphaIt alpha_it,\n                               linalg::VectorView<float> d_results) {\n  return QuantileSegmentOp<SegIt, ValIt, AlphaIt>{seg_it, val_it, alpha_it, d_results};\n}\n\ntemplate <typename SegIt>\nstruct SegOp {\n  SegIt seg_beg;\n  SegIt seg_end;\n\n  XGBOOST_DEVICE std::size_t operator()(std::size_t i) {\n    return dh::SegmentId(seg_beg, seg_end, i);\n  }\n};\n\ntemplate <typename WIter>\nstruct WeightOp {\n  WIter w_begin;\n  Span<std::size_t const> d_sorted_idx;\n  XGBOOST_DEVICE float operator()(std::size_t i) { return w_begin[d_sorted_idx[i]]; }\n};\n\ntemplate <typename SegIt, typename ValIt, typename AlphaIt>\nstruct WeightedQuantileSegOp {\n  AlphaIt alpha_it;\n  SegIt seg_beg;\n  ValIt val_begin;\n  Span<float const> d_weight_cdf;\n  Span<std::size_t const> d_sorted_idx;\n  linalg::VectorView<float> d_results;\n  static_assert(std::is_floating_point_v<typename std::iterator_traits<AlphaIt>::value_type>,\n                \"Invalid alpha.\");\n  static_assert(std::is_floating_point_v<typename std::iterator_traits<ValIt>::value_type>,\n                \"Invalid value for quantile.\");\n\n  XGBOOST_DEVICE void operator()(std::size_t seg_idx) {\n    std::size_t begin = seg_beg[seg_idx];\n    auto n = static_cast<double>(seg_beg[seg_idx + 1] - begin);\n    if (n == 0) {\n      d_results(seg_idx) = std::numeric_limits<float>::quiet_NaN();\n      return;\n    }\n    auto seg_cdf = d_weight_cdf.subspan(begin, static_cast<std::size_t>(n));\n    auto seg_sorted_idx = d_sorted_idx.subspan(begin, static_cast<std::size_t>(n));\n    double a = alpha_it[seg_idx];\n    double thresh = seg_cdf.back() * a;\n\n    std::size_t idx =\n        thrust::lower_bound(thrust::seq, seg_cdf.data(), seg_cdf.data() + seg_cdf.size(), thresh) -\n        seg_cdf.data();\n    idx = std::min(idx, static_cast<std::size_t>(n - 1));\n    d_results(seg_idx) = val_begin[seg_sorted_idx[idx]];\n  }\n};\n\ntemplate <typename SegIt, typename ValIt, typename AlphaIt>\nXGBOOST_DEVICE auto MakeWQSegOp(SegIt seg_it, ValIt val_it, AlphaIt alpha_it,\n                                Span<float const> d_weight_cdf,\n                                Span<std::size_t const> d_sorted_idx,\n                                linalg::VectorView<float> d_results) {\n  return WeightedQuantileSegOp<SegIt, ValIt, AlphaIt>{alpha_it,     seg_it,       val_it,\n                                                      d_weight_cdf, d_sorted_idx, d_results};\n}\n}  // namespace detail\n/**\n * @brief Compute segmented quantile on GPU.\n *\n * @tparam SegIt Iterator for CSR style segments indptr\n * @tparam ValIt Iterator for values\n * @tparam AlphaIt Iterator to alphas\n *\n * @param alpha The p^th quantile we want to compute, one for each segment.\n *\n *    std::distance(seg_begin, seg_end) should be equal to n_segments + 1\n */\ntemplate <typename SegIt, typename ValIt, typename AlphaIt,\n          std::enable_if_t<!std::is_floating_point_v<AlphaIt>>* = nullptr>\nvoid SegmentedQuantile(Context const* ctx, AlphaIt alpha_it, SegIt seg_begin, SegIt seg_end,\n                       ValIt val_begin, ValIt val_end, HostDeviceVector<float>* quantiles) {\n  dh::device_vector<std::size_t> sorted_idx;\n  common::SegmentedArgMergeSort(ctx, seg_begin, seg_end, val_begin, val_end, &sorted_idx);\n  auto n_segments = std::distance(seg_begin, seg_end) - 1;\n  if (n_segments <= 0) {\n    return;\n  }\n\n  auto d_sorted_idx = dh::ToSpan(sorted_idx);\n  auto val = thrust::make_permutation_iterator(val_begin, dh::tcbegin(d_sorted_idx));\n\n  quantiles->SetDevice(ctx->Device());\n  quantiles->Resize(n_segments);\n  auto d_results = linalg::MakeVec(ctx->Device(), quantiles->DeviceSpan());\n\n  thrust::for_each_n(ctx->CUDACtx()->CTP(), thrust::make_counting_iterator(0ul), n_segments,\n                     detail::MakeQSegOp(seg_begin, val, alpha_it, d_results));\n}\n\n/**\n * @brief Calculate multiple quantiles for multiple segments.\n *\n *    Each segment has `n_alphas` quantiles. All segments share the same set of quantiles.\n *\n *    The output quantiles are stored in a row-major matrix with shape: (n_segments, n_alphas).\n *\n * @param h_alphas Quantiles to be estimated.\n * @param values   A callable object that indexes the value matrix with shape (n, n_alphas).\n * @param n        The number of samples in values matrix, should equal to *(seg_end - 1).\n */\ntemplate <typename SegIt, typename ValIt>\nvoid SegmentedQuantile(Context const* ctx, std::vector<float> const& h_alphas, SegIt seg_begin,\n                       SegIt seg_end, ValIt values, std::size_t n,\n                       HostDeviceVector<float>* quantiles) {\n  // The values is a matrix with shape (n_samples, n_alphas), we have a 2-way segment\n  // here.\n  // For now, we simply iterate through the alphas on host as we are likely to have at most 3\n  // quantiles to compute.\n  auto n_segments = std::distance(seg_begin, seg_end) - 1;\n  if (n_segments <= 0) {\n    return;\n  }\n\n  auto n_alphas = h_alphas.size();\n  quantiles->SetDevice(ctx->Device());\n  quantiles->Resize(n_segments * n_alphas);\n  auto d_quantiles = linalg::MakeTensorView(ctx, quantiles->DeviceSpan(), n_segments, n_alphas);\n\n  for (std::size_t alpha_idx = 0; alpha_idx < n_alphas; ++alpha_idx) {\n    auto val_begin = dh::MakeIndexTransformIter(\n        [=] XGBOOST_DEVICE(std::size_t i) { return values(i, alpha_idx); });\n    auto val_end = val_begin + n;\n    dh::device_vector<std::size_t> sorted_idx;\n    common::SegmentedArgMergeSort(ctx, seg_begin, seg_end, val_begin, val_end, &sorted_idx);\n\n    auto d_sorted_idx = dh::ToSpan(sorted_idx);\n    auto val = thrust::make_permutation_iterator(val_begin, dh::tcbegin(d_sorted_idx));\n\n    auto row = d_quantiles.Slice(linalg::All(), alpha_idx);\n    auto alpha = h_alphas[alpha_idx];\n    thrust::for_each_n(\n        ctx->CUDACtx()->CTP(), thrust::make_counting_iterator(0ul), n_segments,\n        detail::MakeQSegOp(seg_begin, val, thrust::make_constant_iterator(alpha), row));\n  }\n}\n\n/**\n * @brief Compute segmented quantile on GPU.\n *\n * @tparam SegIt Iterator for CSR style segments indptr\n * @tparam ValIt Iterator for values\n *\n * @param alpha The p^th quantile we want to compute\n *\n *    std::distance(ptr_begin, ptr_end) should be equal to n_segments + 1\n */\ntemplate <typename SegIt, typename ValIt>\nvoid SegmentedQuantile(Context const* ctx, double alpha, SegIt seg_begin, SegIt seg_end,\n                       ValIt val_begin, ValIt val_end, HostDeviceVector<float>* quantiles) {\n  CHECK(alpha >= 0 && alpha <= 1);\n  auto alpha_it = thrust::make_constant_iterator(alpha);\n  return SegmentedQuantile(ctx, alpha_it, seg_begin, seg_end, val_begin, val_end, quantiles);\n}\n\n/**\n * @brief Compute segmented quantile on GPU with weighted inputs.\n *\n * @tparam SegIt Iterator for CSR style segments indptr\n * @tparam ValIt Iterator for values\n * @tparam WIter Iterator for weights\n *\n * @param alpha_it Iterator for the p^th quantile we want to compute, one per-segment\n * @param w_begin  Iterator for weight for each input element\n */\ntemplate <typename SegIt, typename ValIt, typename AlphaIt, typename WIter,\n          typename std::enable_if_t<\n              !std::is_same_v<typename std::iterator_traits<AlphaIt>::value_type, void>>* = nullptr>\nvoid SegmentedWeightedQuantile(Context const* ctx, AlphaIt alpha_it, SegIt seg_beg, SegIt seg_end,\n                               ValIt val_begin, ValIt val_end, WIter w_begin, WIter w_end,\n                               HostDeviceVector<float>* quantiles) {\n  auto cuctx = ctx->CUDACtx();\n  dh::device_vector<std::size_t> sorted_idx;\n  common::SegmentedArgMergeSort(ctx, seg_beg, seg_end, val_begin, val_end, &sorted_idx);\n  auto d_sorted_idx = dh::ToSpan(sorted_idx);\n  std::size_t n_weights = std::distance(w_begin, w_end);\n  dh::device_vector<float> weights_cdf(n_weights);\n  std::size_t n_elems = std::distance(val_begin, val_end);\n  CHECK_EQ(n_weights, n_elems);\n\n  auto scan_key = dh::MakeIndexTransformIter(detail::SegOp<SegIt>{seg_beg, seg_end});\n  auto scan_val = dh::MakeIndexTransformIter(detail::WeightOp<WIter>{w_begin, d_sorted_idx});\n  thrust::inclusive_scan_by_key(cuctx->CTP(), scan_key, scan_key + n_weights, scan_val,\n                                weights_cdf.begin());\n\n  auto n_segments = std::distance(seg_beg, seg_end) - 1;\n  quantiles->SetDevice(ctx->Device());\n  quantiles->Resize(n_segments);\n  auto d_results = linalg::MakeVec(ctx->Device(), quantiles->DeviceSpan());\n  auto d_weight_cdf = dh::ToSpan(weights_cdf);\n\n  thrust::for_each_n(\n      cuctx->CTP(), thrust::make_counting_iterator(0ul), n_segments,\n      detail::MakeWQSegOp(seg_beg, val_begin, alpha_it, d_weight_cdf, d_sorted_idx, d_results));\n}\n\n/**\n * @brief Calculate multiple weighted quantiles for multiple segments.\n *\n * @param h_alphas Quantiles to be estimated.\n * @param values   A callable object that indexes the value matrix with shape (n, n_alphas).\n * @param n        The number of samples in values matrix, should equal to *(seg_end - 1).\n */\ntemplate <typename SegIt, typename ValIt, typename WIter>\nvoid SegmentedWeightedQuantile(Context const* ctx, std::vector<float> const& h_alphas,\n                               SegIt seg_beg, SegIt seg_end, ValIt values, WIter w_begin,\n                               WIter w_end, HostDeviceVector<float>* quantiles) {\n  auto cuctx = ctx->CUDACtx();\n\n  auto n_segments = std::distance(seg_beg, seg_end) - 1;\n  if (n_segments <= 0) {\n    return;\n  }\n  auto n_alphas = h_alphas.size();\n  std::size_t n = std::distance(w_begin, w_end);\n\n  quantiles->SetDevice(ctx->Device());\n  quantiles->Resize(n_segments * n_alphas);\n  auto d_quantiles = linalg::MakeTensorView(ctx, quantiles->DeviceSpan(), n_segments, n_alphas);\n\n  for (std::size_t alpha_idx = 0; alpha_idx < n_alphas; ++alpha_idx) {\n    auto val_begin = dh::MakeIndexTransformIter(\n        [=] XGBOOST_DEVICE(std::size_t i) { return values(i, alpha_idx); });\n    auto val_end = val_begin + n;\n\n    dh::device_vector<std::size_t> sorted_idx;\n    common::SegmentedArgMergeSort(ctx, seg_beg, seg_end, val_begin, val_end, &sorted_idx);\n\n    auto d_sorted_idx = dh::ToSpan(sorted_idx);\n    dh::device_vector<float> weights_cdf(n);\n\n    auto scan_key = dh::MakeIndexTransformIter(detail::SegOp<SegIt>{seg_beg, seg_end});\n    auto scan_val = dh::MakeIndexTransformIter(detail::WeightOp<WIter>{w_begin, d_sorted_idx});\n    thrust::inclusive_scan_by_key(cuctx->CTP(), scan_key, scan_key + n, scan_val,\n                                  weights_cdf.begin());\n    auto d_weight_cdf = dh::ToSpan(weights_cdf);\n    auto alpha = h_alphas[alpha_idx];\n    auto row = d_quantiles.Slice(linalg::All(), alpha_idx);\n\n    thrust::for_each_n(\n        cuctx->CTP(), thrust::make_counting_iterator(0ul), n_segments,\n        detail::MakeWQSegOp(seg_beg, val_begin, thrust::make_constant_iterator(alpha), d_weight_cdf,\n                            d_sorted_idx, row));\n  }\n}\n\ntemplate <typename SegIt, typename ValIt, typename WIter>\nvoid SegmentedWeightedQuantile(Context const* ctx, double alpha, SegIt seg_beg, SegIt seg_end,\n                               ValIt val_begin, ValIt val_end, WIter w_begin, WIter w_end,\n                               HostDeviceVector<float>* quantiles) {\n  CHECK(alpha >= 0 && alpha <= 1);\n  return SegmentedWeightedQuantile(ctx, thrust::make_constant_iterator(alpha), seg_beg, seg_end,\n                                   val_begin, val_end, w_begin, w_end, quantiles);\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "src/common/stats.h",
    "content": "/**\n * Copyright 2022-2025, XGBoost Contributors\n */\n#ifndef XGBOOST_COMMON_STATS_H_\n#define XGBOOST_COMMON_STATS_H_\n#include <algorithm>\n#include <iterator>  // for distance\n#include <limits>\n#include <vector>\n\n#include \"algorithm.h\"        // for StableSort\n#include \"common.h\"           // AssertGPUSupport,AssertSYCLSupport\n#include \"optional_weight.h\"  // OptionalWeights\n#include \"xgboost/context.h\"  // Context\n#include \"xgboost/linalg.h\"   // TensorView,VectorView\n#include \"xgboost/logging.h\"  // CHECK_GE\n\nnamespace xgboost {\nnamespace common {\n\n/**\n * @brief Quantile using linear interpolation.\n *\n *   https://www.itl.nist.gov/div898/handbook/prc/section2/prc262.htm\n *\n * \\param alpha Quantile, must be in range [0, 1].\n * \\param begin Iterator begin for input array.\n * \\param end   Iterator end for input array.\n *\n * \\return The result of interpolation.\n */\ntemplate <typename Iter,\n          typename R = std::remove_reference_t<typename std::iterator_traits<Iter>::value_type>>\n[[nodiscard]] R Quantile(Context const* ctx, double alpha, Iter const& begin, Iter const& end) {\n  CHECK(alpha >= 0 && alpha <= 1);\n  auto n = static_cast<double>(std::distance(begin, end));\n  if (n == 0) {\n    return std::numeric_limits<float>::quiet_NaN();\n  }\n\n  std::vector<std::size_t> sorted_idx(n);\n  std::iota(sorted_idx.begin(), sorted_idx.end(), 0);\n  StableSort(ctx, sorted_idx.begin(), sorted_idx.end(),\n             [&](std::size_t l, std::size_t r) { return *(begin + l) < *(begin + r); });\n\n  auto val = [&](size_t i) {\n    return *(begin + sorted_idx[i]);\n  };\n  static_assert(std::is_same_v<decltype(val(0)), float>);\n\n  if (alpha <= (1 / (n + 1))) {\n    return val(0);\n  }\n  if (alpha >= (n / (n + 1))) {\n    return val(sorted_idx.size() - 1);\n  }\n\n  double x = alpha * static_cast<double>((n + 1));\n  double k = std::floor(x) - 1;\n  CHECK_GE(k, 0);\n  double d = (x - 1) - k;\n\n  auto v0 = val(static_cast<size_t>(k));\n  auto v1 = val(static_cast<size_t>(k) + 1);\n  return v0 + d * (v1 - v0);\n}\n\n/**\n * \\brief Calculate the weighted quantile with step function. Unlike the unweighted\n *        version, no interpolation is used.\n *\n *   See https://aakinshin.net/posts/weighted-quantiles/ for some discussions on computing\n *   weighted quantile with interpolation.\n */\ntemplate <typename Iter, typename WeightIter,\n          typename R = std::remove_reference_t<typename std::iterator_traits<Iter>::value_type>>\n[[nodiscard]] R WeightedQuantile(Context const* ctx, double alpha, Iter begin, Iter end,\n                                 WeightIter w_begin) {\n  auto n = static_cast<double>(std::distance(begin, end));\n  if (n == 0) {\n    return std::numeric_limits<float>::quiet_NaN();\n  }\n  std::vector<size_t> sorted_idx(n);\n  std::iota(sorted_idx.begin(), sorted_idx.end(), 0);\n  StableSort(ctx, sorted_idx.begin(), sorted_idx.end(),\n             [&](std::size_t l, std::size_t r) { return *(begin + l) < *(begin + r); });\n\n  auto val = [&](size_t i) {\n    return *(begin + sorted_idx[i]);\n  };\n\n  std::vector<float> weight_cdf(n);  // S_n\n  // weighted cdf is sorted during construction\n  weight_cdf[0] = *(w_begin + sorted_idx[0]);\n  for (size_t i = 1; i < n; ++i) {\n    weight_cdf[i] = weight_cdf[i - 1] + w_begin[sorted_idx[i]];\n  }\n  float thresh = weight_cdf.back() * alpha;\n  std::size_t idx =\n      std::lower_bound(weight_cdf.cbegin(), weight_cdf.cend(), thresh) - weight_cdf.cbegin();\n  idx = std::min(idx, static_cast<size_t>(n - 1));\n  return val(idx);\n}\n\nnamespace cuda_impl {\nvoid Median(Context const* ctx, linalg::TensorView<float const, 2> t, OptionalWeights weights,\n            linalg::Tensor<float, 1>* out);\n\nvoid Mean(Context const* ctx, linalg::VectorView<float const> v, linalg::VectorView<float> out);\n\nvoid SampleMean(Context const* ctx, bool is_column_split, linalg::MatrixView<float const> d_v,\n                linalg::VectorView<float> d_out);\n\nvoid WeightedSampleMean(Context const* ctx, bool is_column_split,\n                        linalg::MatrixView<float const> d_v, common::Span<float const> d_w,\n                        linalg::VectorView<float> d_out);\n\n#if !defined(XGBOOST_USE_CUDA)\ninline void Median(Context const*, linalg::TensorView<float const, 2>, OptionalWeights,\n                   linalg::Tensor<float, 1>*) {\n  common::AssertGPUSupport();\n}\ninline void Mean(Context const*, linalg::VectorView<float const>, linalg::VectorView<float>) {\n  common::AssertGPUSupport();\n}\n\ninline void SampleMean(Context const*, bool, linalg::MatrixView<float const>,\n                       linalg::VectorView<float>) {\n  common::AssertGPUSupport();\n}\n\ninline void WeightedSampleMean(Context const*, bool, linalg::MatrixView<float const>,\n                               common::Span<float const>, linalg::VectorView<float>) {\n  common::AssertGPUSupport();\n}\n\n#endif  // !defined(XGBOOST_USE_CUDA)\n}  // namespace cuda_impl\n\nnamespace sycl_impl {\nvoid Mean(Context const* ctx, linalg::VectorView<float const> v, linalg::VectorView<float> out);\n\n#if !defined(XGBOOST_USE_SYCL)\ninline void Mean(Context const*, linalg::VectorView<float const>, linalg::VectorView<float>) {\n  common::AssertSYCLSupport();\n}\n\n#endif  // !defined(XGBOOST_USE_SYCL)\n}  // namespace sycl_impl\n\n/**\n * @brief Calculate medians for each column of the input matrix.\n */\nvoid Median(Context const* ctx, linalg::Matrix<float> const& t,\n            HostDeviceVector<float> const& weights, linalg::Tensor<float, 1>* out);\n\n/**\n * @brief Calculate the mean value of a vector.\n */\nvoid Mean(Context const* ctx, linalg::VectorView<float const> v, linalg::Vector<float>* out);\n\n/**\n * @brief Calculate the mean value for the first axis.\n */\nvoid SampleMean(Context const* ctx, bool is_column_split, linalg::Matrix<float> const& v,\n                linalg::Vector<float>* out);\n\n/**\n * @brief Calculate the weighted mean value for the first axis, weights are assumed to be\n *        equal to or greater than zero.\n */\nvoid WeightedSampleMean(Context const* ctx, bool is_column_split, linalg::Matrix<float> const& v,\n                        HostDeviceVector<float> const& w, linalg::Vector<float>* out);\n}  // namespace common\n}  // namespace xgboost\n#endif  // XGBOOST_COMMON_STATS_H_\n"
  },
  {
    "path": "src/common/survival_util.cc",
    "content": "/*!\n * Copyright 2019-2020 by Contributors\n * \\file survival_util.cc\n * \\brief Utility functions, useful for implementing objective and metric functions for survival\n *        analysis\n * \\author Avinash Barnwal, Hyunsu Cho and Toby Hocking\n */\n\n#include <dmlc/registry.h>\n#include \"survival_util.h\"\n\nnamespace xgboost {\nnamespace common {\n\nDMLC_REGISTER_PARAMETER(AFTParam);\n\n}  // namespace common\n}  // namespace xgboost\n"
  },
  {
    "path": "src/common/survival_util.h",
    "content": "/*!\n * Copyright 2019-2020 by Contributors\n * \\file survival_util.h\n * \\brief Utility functions, useful for implementing objective and metric functions for survival\n *        analysis\n * \\author Avinash Barnwal, Hyunsu Cho and Toby Hocking\n */\n#ifndef XGBOOST_COMMON_SURVIVAL_UTIL_H_\n#define XGBOOST_COMMON_SURVIVAL_UTIL_H_\n\n/*\n * For the derivation of the loss, gradient, and hessian for the Accelerated Failure Time model,\n * refer to the paper \"Survival regression with accelerated failure time model in XGBoost\"\n * at https://arxiv.org/abs/2006.04920.\n */\n\n#include <xgboost/parameter.h>\n#include <memory>\n#include <algorithm>\n#include <limits>\n#include \"probability_distribution.h\"\n\nDECLARE_FIELD_ENUM_CLASS(xgboost::common::ProbabilityDistributionType);\n\nnamespace xgboost {\nnamespace common {\n\n#ifndef __CUDACC__\n\nusing std::log;\nusing std::fmax;\n\n#endif  // __CUDACC__\n\nenum class CensoringType : uint8_t {\n  kUncensored, kRightCensored, kLeftCensored, kIntervalCensored\n};\n\nnamespace aft {\n\n// Allowable range for gradient and hessian. Used for regularization\nconstexpr double kMinGradient = -15.0;\nconstexpr double kMaxGradient = 15.0;\nconstexpr double kMinHessian = 1e-16;  // Ensure that no data point gets zero hessian\nconstexpr double kMaxHessian = 15.0;\n\nconstexpr double kEps = 1e-12;  // A denominator in a fraction should not be too small\n\n// Clip (limit) x to fit range [x_min, x_max].\n// If x < x_min, return x_min; if x > x_max, return x_max; if x_min <= x <= x_max, return x.\n// This function assumes x_min < x_max; behavior is undefined if this assumption does not hold.\nXGBOOST_DEVICE\ninline double Clip(double x, double x_min, double x_max) {\n  if (x < x_min) {\n    return x_min;\n  }\n  if (x > x_max) {\n    return x_max;\n  }\n  return x;\n}\n\ntemplate<typename Distribution>\nXGBOOST_DEVICE inline double\nGetLimitGradAtInfPred(CensoringType censor_type, bool sign, double sigma);\n\ntemplate<typename Distribution>\nXGBOOST_DEVICE inline double\nGetLimitHessAtInfPred(CensoringType censor_type, bool sign, double sigma);\n\n}  // namespace aft\n\n/*! \\brief Parameter structure for AFT loss and metric */\nstruct AFTParam : public XGBoostParameter<AFTParam> {\n  /*! \\brief Choice of probability distribution for the noise term in AFT */\n  ProbabilityDistributionType aft_loss_distribution;\n  /*! \\brief Scaling factor to be applied to the distribution */\n  float aft_loss_distribution_scale;\n  DMLC_DECLARE_PARAMETER(AFTParam) {\n    DMLC_DECLARE_FIELD(aft_loss_distribution)\n        .set_default(ProbabilityDistributionType::kNormal)\n        .add_enum(\"normal\", ProbabilityDistributionType::kNormal)\n        .add_enum(\"logistic\", ProbabilityDistributionType::kLogistic)\n        .add_enum(\"extreme\", ProbabilityDistributionType::kExtreme)\n        .describe(\"Choice of distribution for the noise term in \"\n                  \"Accelerated Failure Time model\");\n    DMLC_DECLARE_FIELD(aft_loss_distribution_scale)\n        .set_default(1.0f)\n        .describe(\"Scaling factor used to scale the distribution in \"\n                  \"Accelerated Failure Time model\");\n  }\n};\n\n/*! \\brief The AFT loss function */\ntemplate<typename Distribution>\nstruct AFTLoss {\n  XGBOOST_DEVICE inline static\n  double Loss(double y_lower, double y_upper, double y_pred, double sigma) {\n    const double log_y_lower = log(y_lower);\n    const double log_y_upper = log(y_upper);\n\n    double cost;\n\n    if (y_lower == y_upper) {  // uncensored\n      const double z = (log_y_lower - y_pred) / sigma;\n      const double pdf = Distribution::PDF(z);\n      // Regularize the denominator with eps, to avoid INF or NAN\n      cost = -log(fmax(pdf / (sigma * y_lower), aft::kEps));\n    } else {  // censored; now check what type of censorship we have\n      double z_u, z_l, cdf_u, cdf_l;\n      if (isinf(y_upper)) {  // right-censored\n        cdf_u = 1;\n      } else {  // left-censored or interval-censored\n        z_u = (log_y_upper - y_pred) / sigma;\n        cdf_u = Distribution::CDF(z_u);\n      }\n      if (y_lower <= 0.0) {  // left-censored\n        cdf_l = 0;\n      } else {  // right-censored or interval-censored\n        z_l = (log_y_lower - y_pred) / sigma;\n        cdf_l = Distribution::CDF(z_l);\n      }\n      // Regularize the denominator with eps, to avoid INF or NAN\n      cost = -log(fmax(cdf_u - cdf_l, aft::kEps));\n    }\n\n    return cost;\n  }\n\n  XGBOOST_DEVICE inline static\n  double Gradient(double y_lower, double y_upper, double y_pred, double sigma) {\n    const double log_y_lower = log(y_lower);\n    const double log_y_upper = log(y_upper);\n    double numerator, denominator, gradient;  // numerator and denominator of gradient\n    CensoringType censor_type;\n    bool z_sign;  // sign of z-score\n\n    if (y_lower == y_upper) {  // uncensored\n      const double z = (log_y_lower - y_pred) / sigma;\n      const double pdf = Distribution::PDF(z);\n      const double grad_pdf = Distribution::GradPDF(z);\n      censor_type = CensoringType::kUncensored;\n      numerator = grad_pdf;\n      denominator = sigma * pdf;\n      z_sign = (z > 0);\n    } else {  // censored; now check what type of censorship we have\n      double z_u = 0.0, z_l = 0.0, pdf_u, pdf_l, cdf_u, cdf_l;\n      censor_type = CensoringType::kIntervalCensored;\n      if (isinf(y_upper)) {  // right-censored\n        pdf_u = 0;\n        cdf_u = 1;\n        censor_type = CensoringType::kRightCensored;\n      } else {  // interval-censored or left-censored\n        z_u = (log_y_upper - y_pred) / sigma;\n        pdf_u = Distribution::PDF(z_u);\n        cdf_u = Distribution::CDF(z_u);\n      }\n      if (y_lower <= 0.0) {  // left-censored\n        pdf_l = 0;\n        cdf_l = 0;\n        censor_type = CensoringType::kLeftCensored;\n      } else {  // interval-censored or right-censored\n        z_l = (log_y_lower - y_pred) / sigma;\n        pdf_l = Distribution::PDF(z_l);\n        cdf_l = Distribution::CDF(z_l);\n      }\n      z_sign = (z_u > 0 || z_l > 0);\n      numerator = pdf_u - pdf_l;\n      denominator = sigma * (cdf_u - cdf_l);\n    }\n    gradient = numerator / denominator;\n    if (denominator < aft::kEps && (isnan(gradient) || isinf(gradient))) {\n      gradient = aft::GetLimitGradAtInfPred<Distribution>(censor_type, z_sign, sigma);\n    }\n\n    return aft::Clip(gradient, aft::kMinGradient, aft::kMaxGradient);\n  }\n\n  XGBOOST_DEVICE inline static\n  double Hessian(double y_lower, double y_upper, double y_pred, double sigma) {\n    const double log_y_lower = log(y_lower);\n    const double log_y_upper = log(y_upper);\n    double numerator, denominator, hessian;  // numerator and denominator of hessian\n    CensoringType censor_type;\n    bool z_sign;  // sign of z-score\n\n    if (y_lower == y_upper) {  // uncensored\n      const double z = (log_y_lower - y_pred) / sigma;\n      const double pdf = Distribution::PDF(z);\n      const double grad_pdf = Distribution::GradPDF(z);\n      const double hess_pdf = Distribution::HessPDF(z);\n      censor_type = CensoringType::kUncensored;\n      numerator = -(pdf * hess_pdf - grad_pdf * grad_pdf);\n      denominator = sigma * sigma * pdf * pdf;\n      z_sign = (z > 0);\n    } else {  // censored; now check what type of censorship we have\n      double z_u = 0.0, z_l = 0.0, grad_pdf_u, grad_pdf_l, pdf_u, pdf_l, cdf_u, cdf_l;\n      censor_type = CensoringType::kIntervalCensored;\n      if (isinf(y_upper)) {  // right-censored\n        pdf_u = 0;\n        cdf_u = 1;\n        grad_pdf_u = 0;\n        censor_type = CensoringType::kRightCensored;\n      } else {  // interval-censored or left-censored\n        z_u = (log_y_upper - y_pred) / sigma;\n        pdf_u = Distribution::PDF(z_u);\n        cdf_u = Distribution::CDF(z_u);\n        grad_pdf_u = Distribution::GradPDF(z_u);\n      }\n      if (y_lower <= 0.0) {  // left-censored\n        pdf_l = 0;\n        cdf_l = 0;\n        grad_pdf_l = 0;\n        censor_type = CensoringType::kLeftCensored;\n      } else {  // interval-censored or right-censored\n        z_l = (log_y_lower - y_pred) / sigma;\n        pdf_l = Distribution::PDF(z_l);\n        cdf_l = Distribution::CDF(z_l);\n        grad_pdf_l = Distribution::GradPDF(z_l);\n      }\n      const double cdf_diff = cdf_u - cdf_l;\n      const double pdf_diff = pdf_u - pdf_l;\n      const double grad_diff = grad_pdf_u - grad_pdf_l;\n      const double sqrt_denominator = sigma * cdf_diff;\n      z_sign = (z_u > 0 || z_l > 0);\n      numerator = -(cdf_diff * grad_diff - pdf_diff * pdf_diff);\n      denominator = sqrt_denominator * sqrt_denominator;\n    }\n    hessian = numerator / denominator;\n    if (denominator < aft::kEps && (isnan(hessian) || isinf(hessian))) {\n      hessian = aft::GetLimitHessAtInfPred<Distribution>(censor_type, z_sign, sigma);\n    }\n\n    return aft::Clip(hessian, aft::kMinHessian, aft::kMaxHessian);\n  }\n};\n\nnamespace aft {\n\ntemplate <>\nXGBOOST_DEVICE inline double\nGetLimitGradAtInfPred<NormalDistribution>(CensoringType censor_type, bool sign, double sigma) {\n  // Remove unused parameter compiler warning.\n  (void) sigma;\n\n  switch (censor_type) {\n  case CensoringType::kUncensored:\n    return sign ? kMinGradient : kMaxGradient;\n  case CensoringType::kRightCensored:\n    return sign ? kMinGradient : 0.0;\n  case CensoringType::kLeftCensored:\n    return sign ? 0.0 : kMaxGradient;\n  case CensoringType::kIntervalCensored:\n    return sign ? kMinGradient : kMaxGradient;\n  }\n  return std::numeric_limits<double>::quiet_NaN();\n}\n\ntemplate <>\nXGBOOST_DEVICE inline double\nGetLimitHessAtInfPred<NormalDistribution>(CensoringType censor_type, bool sign, double sigma) {\n  switch (censor_type) {\n  case CensoringType::kUncensored:\n    return 1.0 / (sigma * sigma);\n  case CensoringType::kRightCensored:\n    return sign ? (1.0 / (sigma * sigma)) : kMinHessian;\n  case CensoringType::kLeftCensored:\n    return sign ? kMinHessian : (1.0 / (sigma * sigma));\n  case CensoringType::kIntervalCensored:\n    return 1.0 / (sigma * sigma);\n  }\n  return std::numeric_limits<double>::quiet_NaN();\n}\n\ntemplate <>\nXGBOOST_DEVICE inline double\nGetLimitGradAtInfPred<LogisticDistribution>(CensoringType censor_type, bool sign, double sigma) {\n  switch (censor_type) {\n  case CensoringType::kUncensored:\n    return sign ? (-1.0 / sigma) : (1.0 / sigma);\n  case CensoringType::kRightCensored:\n    return sign ? (-1.0 / sigma) : 0.0;\n  case CensoringType::kLeftCensored:\n    return sign ? 0.0 : (1.0 / sigma);\n  case CensoringType::kIntervalCensored:\n    return sign ? (-1.0 / sigma) : (1.0 / sigma);\n  }\n  return std::numeric_limits<double>::quiet_NaN();\n}\n\ntemplate <>\nXGBOOST_DEVICE inline double\nGetLimitHessAtInfPred<LogisticDistribution>(CensoringType censor_type, bool sign, double sigma) {\n  // Remove unused parameter compiler warning.\n  (void) sign;\n  (void) sigma;\n\n  switch (censor_type) {\n  case CensoringType::kUncensored:\n  case CensoringType::kRightCensored:\n  case CensoringType::kLeftCensored:\n  case CensoringType::kIntervalCensored:\n    return kMinHessian;\n  }\n  return std::numeric_limits<double>::quiet_NaN();\n}\n\ntemplate <>\nXGBOOST_DEVICE inline double\nGetLimitGradAtInfPred<ExtremeDistribution>(CensoringType censor_type, bool sign, double sigma) {\n  switch (censor_type) {\n  case CensoringType::kUncensored:\n    return sign ? kMinGradient : (1.0 / sigma);\n  case CensoringType::kRightCensored:\n    return sign ? kMinGradient : 0.0;\n  case CensoringType::kLeftCensored:\n    return sign ? 0.0 : (1.0 / sigma);\n  case CensoringType::kIntervalCensored:\n    return sign ? kMinGradient : (1.0 / sigma);\n  }\n  return std::numeric_limits<double>::quiet_NaN();\n}\n\ntemplate <>\nXGBOOST_DEVICE inline double\nGetLimitHessAtInfPred<ExtremeDistribution>(CensoringType censor_type, bool sign, double sigma) {\n  // Remove unused parameter compiler warning.\n  (void) sigma;\n\n  switch (censor_type) {\n  case CensoringType::kUncensored:\n  case CensoringType::kRightCensored:\n    return sign ? kMaxHessian : kMinHessian;\n  case CensoringType::kLeftCensored:\n    return kMinHessian;\n  case CensoringType::kIntervalCensored:\n    return sign ? kMaxHessian : kMinHessian;\n  }\n  return std::numeric_limits<double>::quiet_NaN();\n}\n\n}  // namespace aft\n\n}  // namespace common\n}  // namespace xgboost\n\n#endif  // XGBOOST_COMMON_SURVIVAL_UTIL_H_\n"
  },
  {
    "path": "src/common/threading_utils.cc",
    "content": "/**\n * Copyright 2022-2024, XGBoost Contributors\n */\n#include \"threading_utils.h\"\n\n#include <algorithm>   // for max, min\n#include <exception>   // for exception\n#include <filesystem>  // for path, exists\n#include <fstream>     // for ifstream\n#include <string>      // for string\n\n#include \"common.h\"  // for DivRoundUp\n\n#if defined(__linux__)\n#include <pthread.h>\n#include <sys/syscall.h>  // for SYS_getcpu\n#include <unistd.h>       // for syscall\n#endif\n\nnamespace xgboost::common {\n/**\n * Modified from\n * github.com/psiha/sweater/blob/master/include/boost/sweater/hardware_concurrency.hpp\n *\n * MIT License: Copyright (c) 2016 Domagoj Šarić\n */\nstd::int32_t GetCGroupV1Count(std::filesystem::path const& quota_path,\n                              std::filesystem::path const& peroid_path) {\n#if defined(__linux__)\n  // https://bugs.openjdk.java.net/browse/JDK-8146115\n  // http://hg.openjdk.java.net/jdk/hs/rev/7f22774a5f42\n  // RAM limit /sys/fs/cgroup/memory.limit_in_bytes\n  // swap limt /sys/fs/cgroup/memory.memsw.limit_in_bytes\n\n  auto read_int = [](char const* const file_path) noexcept {\n    std::ifstream fin(file_path);\n    if (!fin) {\n      return -1;\n    }\n    std::string value;\n    fin >> value;\n    try {\n      return std::stoi(value);\n    } catch (std::exception const&) {\n      return -1;\n    }\n  };\n  // complete fair scheduler from Linux\n  auto const cfs_quota(read_int(quota_path.c_str()));\n  auto const cfs_period(read_int(peroid_path.c_str()));\n  if ((cfs_quota > 0) && (cfs_period > 0)) {\n    return std::max(cfs_quota / cfs_period, 1);\n  }\n#endif  //  defined(__linux__)\n  return -1;\n}\n\nstd::int32_t GetCGroupV2Count(std::filesystem::path const& bandwidth_path) noexcept(true) {\n  std::int32_t cnt{-1};\n#if defined(__linux__)\n  namespace fs = std::filesystem;\n\n  std::int32_t a{0}, b{0};\n\n  auto warn = [] { LOG(WARNING) << \"Invalid cgroupv2 file.\"; };\n  try {\n    std::ifstream fin{bandwidth_path, std::ios::in};\n    fin >> a;\n    fin >> b;\n  } catch (std::exception const&) {\n    warn();\n    return cnt;\n  }\n  if (a > 0 && b > 0) {\n    cnt = std::max(common::DivRoundUp(a, b), 1);\n  }\n#endif  //  defined(__linux__)\n  return cnt;\n}\n\nstd::int32_t GetCfsCPUCount() noexcept {\n  namespace fs = std::filesystem;\n\n  try {\n    fs::path const bandwidth_path{\"/sys/fs/cgroup/cpu.max\"};\n    auto has_v2 = fs::exists(bandwidth_path);\n    if (has_v2) {\n      return GetCGroupV2Count(bandwidth_path);\n    }\n  } catch (std::exception const&) {\n    return -1;\n  }\n\n  try {\n    fs::path const quota_path{\"/sys/fs/cgroup/cpu/cpu.cfs_quota_us\"};\n    fs::path const peroid_path{\"/sys/fs/cgroup/cpu/cpu.cfs_period_us\"};\n    auto has_v1 = fs::exists(quota_path) && fs::exists(peroid_path);\n    if (has_v1) {\n      return GetCGroupV1Count(quota_path, peroid_path);\n    }\n  } catch (std::exception const&) {\n    return -1;\n  }\n\n  return -1;\n}\n\nstd::int32_t OmpGetNumThreads(std::int32_t n_threads) noexcept(true) {\n  // Don't use parallel if we are in a parallel region.\n  if (omp_in_parallel()) {\n    return 1;\n  }\n  // Honor the openmp thread limit, which can be set via environment variable.\n  auto max_n_threads = std::min({omp_get_num_procs(), omp_get_max_threads(), OmpGetThreadLimit()});\n  // If -1 or 0 is specified by the user, we default to maximum number of threads.\n  if (n_threads <= 0) {\n    n_threads = max_n_threads;\n  }\n  n_threads = std::min(n_threads, max_n_threads);\n  n_threads = std::max(n_threads, 1);\n  return n_threads;\n}\n\nvoid NameThread(std::thread* t, StringView name) {\n#if defined(__linux__) && (!defined(__ANDROID__) || __ANDROID_API__ >= 26)\n  auto handle = t->native_handle();\n  char old[16];\n  auto ret = pthread_getname_np(handle, old, 16);\n  if (ret != 0) {\n    LOG(DEBUG) << \"Failed to get the name from thread\";\n  }\n  auto new_name = std::string{old} + \">\" + name.c_str();  // NOLINT\n  if (new_name.size() > 15) {\n    new_name = new_name.substr(new_name.size() - 15);\n  }\n  ret = pthread_setname_np(handle, new_name.c_str());\n  if (ret != 0) {\n    LOG(DEBUG) << \"Failed to name thread:\" << ret << \" :\" << new_name;\n  }\n#else\n  (void)name;\n  (void)t;\n#endif\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "src/common/threading_utils.cuh",
    "content": "/**\n * Copyright 2021-2025, XGBoost Contributors\n */\n#ifndef XGBOOST_COMMON_THREADING_UTILS_CUH_\n#define XGBOOST_COMMON_THREADING_UTILS_CUH_\n\n#include <algorithm>  // std::min\n#include <cstddef>    // std::size_t\n\n#include \"./math.h\"            // Sqr\n#include \"algorithm.cuh\"       // for InclusiveSum\n#include \"common.h\"            // for safe_cuda\n#include \"device_helpers.cuh\"  // LaunchN\n#include \"xgboost/base.h\"      // XGBOOST_DEVICE\n#include \"xgboost/span.h\"      // Span\n\nnamespace xgboost::common {\n/**\n * \\param n Number of items (length of the base)\n * \\param h hight\n */\nXGBOOST_DEVICE inline std::size_t DiscreteTrapezoidArea(std::size_t n, std::size_t h) {\n  if (n == 0 || h == 0) {\n    return 0;\n  }\n  n -= 1;              // without diagonal entries\n  h = std::min(n, h);  // Used for ranking, h <= n\n  std::size_t total = ((n - (h - 1)) + n) * h / 2;\n  return total;\n}\n\n/**\n * Used for mapping many groups of trapezoid shaped computation onto CUDA blocks.  The\n * trapezoid must be on upper right corner.\n *\n * Equivalent to loops like:\n *\n * \\code\n *   for (std::size_t i = 0; i < h; ++i) {\n *     for (std::size_t j = i + 1; j < n; ++j) {\n *        do_something();\n *     }\n *   }\n * \\endcode\n *\n * with h <= n\n */\ntemplate <typename U>\nstd::size_t SegmentedTrapezoidThreads(Context const *ctx, Span<U> group_ptr,\n                                      Span<std::size_t> out_group_threads_ptr, std::size_t h) {\n  CHECK_GE(group_ptr.size(), 1);\n  CHECK_EQ(group_ptr.size(), out_group_threads_ptr.size());\n  dh::LaunchN(group_ptr.size(), [=] XGBOOST_DEVICE(std::size_t idx) {\n    if (idx == 0) {\n      out_group_threads_ptr[0] = 0;\n      return;\n    }\n\n    std::size_t cnt = static_cast<std::size_t>(group_ptr[idx] - group_ptr[idx - 1]);\n    out_group_threads_ptr[idx] = DiscreteTrapezoidArea(cnt, h);\n  });\n  InclusiveSum(ctx, out_group_threads_ptr.data(), out_group_threads_ptr.data(),\n               out_group_threads_ptr.size());\n  std::size_t total = 0;\n  dh::safe_cuda(cudaMemcpy(&total, out_group_threads_ptr.data() + out_group_threads_ptr.size() - 1,\n                           sizeof(total), cudaMemcpyDeviceToHost));\n  return total;\n}\n\n/**\n * Called inside kernel to obtain coordinate from trapezoid grid.\n */\nXGBOOST_DEVICE inline void UnravelTrapeziodIdx(std::size_t i_idx, std::size_t n, std::size_t *out_i,\n                                               std::size_t *out_j) {\n  auto &i = *out_i;\n  auto &j = *out_j;\n  double idx = static_cast<double>(i_idx);\n  double N = static_cast<double>(n);\n\n  i = std::ceil(-(0.5 - N + std::sqrt(common::Sqr(N - 0.5) + 2.0 * (-idx - 1.0)))) - 1.0;\n\n  auto I = static_cast<double>(i);\n  size_t n_elems = -0.5 * common::Sqr(I) + (N - 0.5) * I;\n\n  j = idx - n_elems + i + 1;\n}\n}  // namespace xgboost::common\n#endif  // XGBOOST_COMMON_THREADING_UTILS_CUH_\n"
  },
  {
    "path": "src/common/threading_utils.h",
    "content": "/**\n * Copyright 2019-2026, XGBoost Contributors\n */\n#ifndef XGBOOST_COMMON_THREADING_UTILS_H_\n#define XGBOOST_COMMON_THREADING_UTILS_H_\n\n#include <dmlc/common.h>\n#include <dmlc/omp.h>\n\n#include <algorithm>    // for min\n#include <cstddef>      // for size_t\n#include <cstdint>      // for int32_t\n#include <cstdlib>      // for malloc, free\n#include <new>          // for bad_alloc\n#include <thread>       // for thread\n#include <type_traits>  // for is_signed, conditional_t, is_integral_v, invoke_result_t\n#include <utility>      // for forward\n#include <vector>       // for vector\n\n#include \"common.h\"  // for DivRoundUp\n#include \"xgboost/logging.h\"\n#include \"xgboost/string_view.h\"  // for StringView\n\n#if !defined(_OPENMP)\nextern \"C\" {\ninline int32_t omp_get_thread_limit() __GOMP_NOTHROW { return 1; }  // NOLINT\n}\n#endif  // !defined(_OPENMP)\n\n// MSVC doesn't implement the thread limit.\n#if defined(_OPENMP) && defined(_MSC_VER)\n#include <limits>\n\nextern \"C\" {\ninline int32_t omp_get_thread_limit() { return std::numeric_limits<int32_t>::max(); }  // NOLINT\n}\n#endif  // defined(_MSC_VER)\n\nnamespace xgboost::common {\n// Represent simple range of indexes [begin, end)\n// Inspired by tbb::blocked_range\nclass Range1d {\n public:\n  Range1d(std::size_t begin, std::size_t end) : begin_{begin}, end_{end} { CHECK_LT(begin, end); }\n\n  [[nodiscard]] std::size_t begin() const {  // NOLINT\n    return begin_;\n  }\n  [[nodiscard]] std::size_t end() const {  // NOLINT\n    return end_;\n  }\n  [[nodiscard]] std::size_t Size() const { return this->end() - this->begin(); }\n\n private:\n  std::size_t begin_;\n  std::size_t end_;\n};\n\n// Split 2d space to balanced blocks\n// Implementation of the class is inspired by tbb::blocked_range2d\n// However, TBB provides only (n x m) 2d range (matrix) separated by blocks. Example:\n// [ 1,2,3 ]\n// [ 4,5,6 ]\n// [ 7,8,9 ]\n// But the class is able to work with different sizes in each 'row'. Example:\n// [ 1,2 ]\n// [ 3,4,5,6 ]\n// [ 7,8,9]\n// If grain_size is 2: It produces following blocks:\n// [1,2], [3,4], [5,6], [7,8], [9]\n// The class helps to process data in several tree nodes (non-balanced usually) in parallel\n// Using nested parallelism (by nodes and by data in each node)\n// it helps to improve CPU resources utilization\nclass BlockedSpace2d {\n public:\n  // Example of space:\n  // [ 1,2 ]\n  // [ 3,4,5,6 ]\n  // [ 7,8,9]\n  // BlockedSpace2d will create following blocks (tasks) if grain_size=2:\n  // 1-block: first_dimension = 0, range of indexes in a 'row' = [0,2) (includes [1,2] values)\n  // 2-block: first_dimension = 1, range of indexes in a 'row' = [0,2) (includes [3,4] values)\n  // 3-block: first_dimension = 1, range of indexes in a 'row' = [2,4) (includes [5,6] values)\n  // 4-block: first_dimension = 2, range of indexes in a 'row' = [0,2) (includes [7,8] values)\n  // 5-block: first_dimension = 2, range of indexes in a 'row' = [2,3) (includes [9] values)\n  // Arguments:\n  // dim1 - size of the first dimension in the space\n  // getter_size_dim2 - functor to get the second dimensions for each 'row' by row-index\n  // grain_size - max size of produced blocks\n  template <typename Getter>\n  BlockedSpace2d(std::size_t dim1, Getter&& getter_size_dim2, std::size_t grain_size) {\n    static_assert(std::is_integral_v<std::invoke_result_t<Getter, std::size_t>>);\n    for (std::size_t i = 0; i < dim1; ++i) {\n      std::size_t size = getter_size_dim2(i);\n      // Each row (second dim) is divided into n_blocks\n      std::size_t n_blocks = size / grain_size + !!(size % grain_size);\n      for (std::size_t iblock = 0; iblock < n_blocks; ++iblock) {\n        std::size_t begin = iblock * grain_size;\n        std::size_t end = std::min(begin + grain_size, size);\n        AddBlock(i, begin, end);\n      }\n    }\n  }\n\n  // Amount of blocks(tasks) in a space\n  [[nodiscard]] std::size_t Size() const { return ranges_.size(); }\n\n  // get index of the first dimension of i-th block(task)\n  [[nodiscard]] std::size_t GetFirstDimension(std::size_t i) const {\n    CHECK_LT(i, first_dimension_.size());\n    return first_dimension_[i];\n  }\n\n  // get a range of indexes for the second dimension of i-th block(task)\n  [[nodiscard]] Range1d GetRange(std::size_t i) const {\n    CHECK_LT(i, ranges_.size());\n    return ranges_[i];\n  }\n\n private:\n  /**\n   * @brief Add a parallel block.\n   *\n   * @param first_dim The row index.\n   * @param begin     The begin of the second dimension.\n   * @param end       The end of the second dimension.\n   */\n  void AddBlock(std::size_t first_dim, std::size_t begin, std::size_t end) {\n    first_dimension_.push_back(first_dim);\n    ranges_.emplace_back(begin, end);\n  }\n\n  std::vector<Range1d> ranges_;\n  std::vector<std::size_t> first_dimension_;\n};\n\n// Wrapper to implement nested parallelism with simple omp parallel for\ntemplate <typename Func>\nvoid ParallelFor2d(const BlockedSpace2d& space, std::int32_t n_threads, Func&& func) {\n  static_assert(std::is_void_v<std::invoke_result_t<Func, std::size_t, Range1d>>);\n  std::size_t n_blocks_in_space = space.Size();\n  CHECK_GE(n_threads, 1);\n\n  dmlc::OMPException exc;\n#pragma omp parallel num_threads(n_threads)\n  {\n    exc.Run([&]() {\n      std::size_t tid = omp_get_thread_num();\n      std::size_t chunck_size = n_blocks_in_space / n_threads + !!(n_blocks_in_space % n_threads);\n\n      std::size_t begin = chunck_size * tid;\n      std::size_t end = std::min(begin + chunck_size, n_blocks_in_space);\n      for (auto i = begin; i < end; i++) {\n        func(space.GetFirstDimension(i), space.GetRange(i));\n      }\n    });\n  }\n  exc.Rethrow();\n}\n\n/**\n * OpenMP schedule\n */\nstruct Sched {\n  enum {\n    kAuto,\n    kDynamic,\n    kStatic,\n    kGuided,\n  } sched;\n  size_t chunk{0};\n\n  Sched static Auto() { return Sched{kAuto}; }\n  Sched static Dyn(size_t n = 0) { return Sched{kDynamic, n}; }\n  Sched static Static(size_t n = 0) { return Sched{kStatic, n}; }\n  Sched static Guided() { return Sched{kGuided}; }\n};\n\ntemplate <typename Index, typename Func>\nvoid ParallelFor(Index size, std::int32_t n_threads, Sched sched, Func&& fn) {\n  if (n_threads == 1) {\n    // early exit\n    for (Index i = 0; i < size; ++i) {\n      fn(i);\n    }\n    return;\n  }\n\n#if defined(_MSC_VER)\n  // msvc doesn't support unsigned integer as openmp index.\n  using OmpInd = std::conditional_t<std::is_signed<Index>::value, Index, omp_ulong>;\n#else\n  using OmpInd = Index;\n#endif\n  OmpInd length = static_cast<OmpInd>(size);\n  CHECK_GE(n_threads, 1);\n\n  dmlc::OMPException exc;\n  switch (sched.sched) {\n    case Sched::kAuto: {\n#pragma omp parallel for num_threads(n_threads)\n      for (OmpInd i = 0; i < length; ++i) {\n        exc.Run(fn, i);\n      }\n      break;\n    }\n    case Sched::kDynamic: {\n      if (sched.chunk == 0) {\n#pragma omp parallel for num_threads(n_threads) schedule(dynamic)\n        for (OmpInd i = 0; i < length; ++i) {\n          exc.Run(fn, i);\n        }\n      } else {\n#pragma omp parallel for num_threads(n_threads) schedule(dynamic, sched.chunk)\n        for (OmpInd i = 0; i < length; ++i) {\n          exc.Run(fn, i);\n        }\n      }\n      break;\n    }\n    case Sched::kStatic: {\n      if (sched.chunk == 0) {\n#pragma omp parallel for num_threads(n_threads) schedule(static)\n        for (OmpInd i = 0; i < length; ++i) {\n          exc.Run(fn, i);\n        }\n      } else {\n#pragma omp parallel for num_threads(n_threads) schedule(static, sched.chunk)\n        for (OmpInd i = 0; i < length; ++i) {\n          exc.Run(fn, i);\n        }\n      }\n      break;\n    }\n    case Sched::kGuided: {\n#pragma omp parallel for num_threads(n_threads) schedule(guided)\n      for (OmpInd i = 0; i < length; ++i) {\n        exc.Run(fn, i);\n      }\n      break;\n    }\n  }\n  exc.Rethrow();\n}\n\ntemplate <typename Index, typename Func>\nvoid ParallelFor(Index size, std::int32_t n_threads, Func&& fn) {\n  ParallelFor(size, n_threads, Sched::Static(), std::forward<Func>(fn));\n}\n\n/**\n * @brief 1-d block-based parallel for loop.\n *\n * @tparam kBlockOfRowsSize The size of the block.\n * @tparam Index The type of the index.\n * @tparam Func The type of the function.\n *\n * @param size The size of the range.\n * @param n_threads The number of threads.\n * @param fn The function to execute. The function should take a Range1d as an argument.\n */\ntemplate <std::size_t kBlockOfRowsSize, typename Index, typename Func>\nvoid ParallelFor1d(Index size, std::int32_t n_threads, Func&& fn) {\n  static_assert(std::is_void_v<std::invoke_result_t<Func, common::Range1d>>);\n  auto const n_blocks = DivRoundUp(size, kBlockOfRowsSize);\n  common::ParallelFor(n_blocks, n_threads, [&](auto block_id) {\n    std::size_t const block_beg = block_id * kBlockOfRowsSize;\n    auto const block_size = std::min(static_cast<std::size_t>(size - block_beg), kBlockOfRowsSize);\n    fn(common::Range1d{block_beg, block_beg + block_size});\n  });\n}\n\n/** @brief Use n_threads as the number of blocks. */\ntemplate <typename Index, typename Func>\nvoid ParallelForBlock(Index size, std::int32_t n_threads, Func&& fn) {\n  static_assert(std::is_void_v<std::invoke_result_t<Func, common::Range1d>>);\n  std::size_t blk_size = size / n_threads + (size % n_threads > 0);\n  ParallelFor(n_threads, n_threads, [&](auto tid) {\n    auto blk_beg = tid * blk_size;\n    auto blk_end = std::min((tid + 1) * blk_size, static_cast<std::size_t>(size));\n    if (blk_end <= blk_beg) {\n      return;\n    }\n    fn(common::Range1d{blk_beg, blk_end});\n  });\n}\n\ninline std::int32_t OmpGetThreadLimit() {\n  std::int32_t limit = omp_get_thread_limit();\n  CHECK_GE(limit, 1) << \"Invalid thread limit for OpenMP.\";\n  return limit;\n}\n\n/**\n * @brief Get thread limit from CFS.\n *\n * This function has non-trivial overhead and should not be called repeatedly.\n */\nstd::int32_t GetCfsCPUCount() noexcept;\n\n/**\n * @brief Get the number of available threads based on n_threads specified by users.\n */\nstd::int32_t OmpGetNumThreads(std::int32_t n_threads) noexcept(true);\n\n/**\n * @brief A C-style array with in-stack allocation.\n *\n * As long as the array is smaller than MaxStackSize, it will be allocated inside the\n * stack. Otherwise, it will be heap-allocated.\n */\ntemplate <typename T, std::size_t MaxStackSize>\nclass MemStackAllocator {\n public:\n  explicit MemStackAllocator(size_t required_size) : required_size_(required_size) {\n    if (MaxStackSize >= required_size_) {\n      ptr_ = stack_mem_;\n    } else {\n      ptr_ = reinterpret_cast<T*>(std::malloc(required_size_ * sizeof(T)));\n    }\n    if (!ptr_) {\n      throw std::bad_alloc{};\n    }\n  }\n  MemStackAllocator(size_t required_size, T init) : MemStackAllocator{required_size} {\n    std::fill_n(ptr_, required_size_, init);\n  }\n\n  ~MemStackAllocator() {\n    if (required_size_ > MaxStackSize) {\n      std::free(ptr_);\n    }\n  }\n  T& operator[](size_t i) { return ptr_[i]; }\n  T const& operator[](size_t i) const { return ptr_[i]; }\n\n  auto data() const { return ptr_; }                   // NOLINT\n  auto data() { return ptr_; }                         // NOLINT\n  std::size_t size() const { return required_size_; }  // NOLINT\n\n  auto cbegin() const { return data(); }         // NOLINT\n  auto cend() const { return data() + size(); }  // NOLINT\n\n private:\n  T* ptr_ = nullptr;\n  size_t required_size_;\n  T stack_mem_[MaxStackSize];\n};\n\n/**\n * @brief Constant that can be used for initializing static thread local memory.\n */\nstd::int32_t constexpr DefaultMaxThreads() { return 128; }\n\n/**\n * @brief Give the thread a name. Supports only pthread on linux.\n */\nvoid NameThread(std::thread* t, StringView name);\n}  // namespace xgboost::common\n\n#endif  // XGBOOST_COMMON_THREADING_UTILS_H_\n"
  },
  {
    "path": "src/common/threadpool.h",
    "content": "/**\n * Copyright 2024, XGBoost Contributors\n */\n#pragma once\n#include <condition_variable>  // for condition_variable\n#include <cstdint>             // for int32_t\n#include <functional>          // for function\n#include <future>              // for promise\n#include <memory>              // for make_shared\n#include <mutex>               // for mutex, unique_lock\n#include <queue>               // for queue\n#include <string>              // for string\n#include <thread>              // for thread\n#include <type_traits>         // for invoke_result_t\n#include <utility>             // for move\n#include <vector>              // for vector\n\n#include \"threading_utils.h\"      // for NameThread\n#include \"xgboost/string_view.h\"  // for StringView\n\nnamespace xgboost::common {\n/**\n * @brief Simple implementation of a thread pool.\n */\nclass ThreadPool {\n  std::mutex mu_;\n  std::queue<std::function<void()>> tasks_;\n  std::condition_variable cv_;\n  std::vector<std::thread> pool_;\n  bool stop_{false};\n\n public:\n  /**\n   * @param name      Name prefix for threads.\n   * @param n_threads The number of threads this pool should hold.\n   * @param init_fn   Function called once during thread creation.\n   */\n  template <typename InitFn>\n  explicit ThreadPool(StringView name, std::int32_t n_threads, InitFn&& init_fn) {\n    for (std::int32_t i = 0; i < n_threads; ++i) {\n      pool_.emplace_back([&, init_fn = std::forward<InitFn>(init_fn)] {\n        init_fn();\n\n        while (true) {\n          std::unique_lock lock{mu_};\n          cv_.wait(lock, [this] { return !this->tasks_.empty() || stop_; });\n\n          if (this->stop_) {\n            while (!tasks_.empty()) {\n              auto fn = tasks_.front();\n              tasks_.pop();\n              fn();\n            }\n            return;\n          }\n\n          auto fn = tasks_.front();\n          tasks_.pop();\n          lock.unlock();\n          fn();\n        }\n      });\n      std::string name_i = name.c_str() + std::string{\"-\"} + std::to_string(i);  // NOLINT\n      NameThread(&pool_.back(), name_i);\n    }\n  }\n\n  ~ThreadPool() {\n    std::unique_lock lock{mu_};\n    stop_ = true;\n    lock.unlock();\n\n    for (auto& t : pool_) {\n      if (t.joinable()) {\n        std::unique_lock lock{mu_};\n        this->cv_.notify_one();\n        lock.unlock();\n      }\n    }\n\n    for (auto& t : pool_) {\n      if (t.joinable()) {\n        t.join();\n      }\n    }\n  }\n\n  /**\n   * @brief Submit a function that doesn't take any argument.\n   */\n  template <typename Fn, typename R = std::invoke_result_t<Fn>>\n  auto Submit(Fn&& fn) {\n    // Use shared ptr to make the task copy constructible.\n    auto p{std::make_shared<std::promise<R>>()};\n    auto fut = p->get_future();\n    auto ffn = std::function{[task = std::move(p), fn = std::forward<Fn>(fn)]() mutable {\n      if constexpr (std::is_void_v<R>) {\n        fn();\n        task->set_value();\n      } else {\n        task->set_value(fn());\n      }\n    }};\n\n    std::unique_lock lock{mu_};\n    this->tasks_.push(std::move(ffn));\n    lock.unlock();\n\n    cv_.notify_one();\n    return fut;\n  }\n\n  [[nodiscard]] auto NumWorkers() const {\n    // Use the same type as the typical n_threads parameter in XGBoost.\n    return static_cast<std::int32_t>(pool_.size());\n  }\n};\n}  // namespace xgboost::common\n"
  },
  {
    "path": "src/common/timer.cc",
    "content": "/**\n * Copyright 2019-2025, XGBoost Contributors\n */\n#include \"timer.h\"\n\n#include <utility>\n\n#include \"../collective/communicator-inl.h\"\n#include \"nvtx_utils.h\"  // for Domain\n\n#if defined(XGBOOST_USE_NVTX)\n#include <nvtx3/nvtx3.hpp>\n#endif  // defined(XGBOOST_USE_NVTX)\n\nnamespace xgboost::common {\nvoid Monitor::Start(std::string const &name) {\n  if (ConsoleLogger::ShouldLog(ConsoleLogger::LV::kDebug)) {\n    auto &stats = statistics_map_[name];\n    stats.timer.Start();\n#if defined(XGBOOST_USE_NVTX)\n    auto range_handle = nvtx3::start_range_in<nvtx::Domain>(label_ + \"::\" + name);\n    stats.nvtx_id = range_handle.get_value();\n#endif  // defined(XGBOOST_USE_NVTX)\n  }\n}\n\nvoid Monitor::Stop(const std::string &name) {\n  if (ConsoleLogger::ShouldLog(ConsoleLogger::LV::kDebug)) {\n    auto &stats = statistics_map_[name];\n    stats.timer.Stop();\n    stats.count++;\n#if defined(XGBOOST_USE_NVTX)\n    nvtx3::end_range_in<nvtx::Domain>(nvtx3::range_handle{stats.nvtx_id});\n#endif  // defined(XGBOOST_USE_NVTX)\n  }\n}\n\nvoid Monitor::PrintStatistics(StatMap const &statistics) const {\n  for (auto &kv : statistics) {\n    if (kv.second.first == 0) {\n      LOG(WARNING) << \"Timer for \" << kv.first << \" did not get stopped properly.\";\n      continue;\n    }\n    LOG(CONSOLE) << kv.first << \": \" << static_cast<double>(kv.second.second) / 1e+6 << \"s, \"\n                 << kv.second.first << \" calls @ \" << kv.second.second << \"us\" << std::endl;\n  }\n}\n\nvoid Monitor::Print() const {\n  if (!ConsoleLogger::ShouldLog(ConsoleLogger::LV::kDebug)) {\n    return;\n  }\n  auto rank = collective::GetRank();\n  StatMap stat_map;\n  for (auto const &kv : statistics_map_) {\n    stat_map[kv.first] = std::make_pair(\n        kv.second.count,\n        std::chrono::duration_cast<std::chrono::microseconds>(kv.second.timer.elapsed).count());\n  }\n  if (stat_map.empty()) {\n    return;\n  }\n  LOG(CONSOLE) << \"======== Monitor (\" << rank << \"): \" << label_ << \" ========\";\n  this->PrintStatistics(stat_map);\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "src/common/timer.h",
    "content": "/**\n * Copyright 2017-2024, XGBoost Contributors\n */\n#pragma once\n#include <xgboost/logging.h>\n\n#include <chrono>\n#include <map>\n#include <string>\n#include <utility>\n\nnamespace xgboost::common {\nstruct Timer {\n  using ClockT = std::chrono::high_resolution_clock;\n  using TimePointT = std::chrono::high_resolution_clock::time_point;\n  using DurationT = std::chrono::high_resolution_clock::duration;\n  using SecondsT = std::chrono::duration<double>;\n\n  TimePointT start;\n  DurationT elapsed;\n  Timer() { Reset(); }\n  void Reset() {\n    elapsed = DurationT::zero();\n    Start();\n  }\n  void Start() { start = ClockT::now(); }\n  void Stop() { elapsed += ClockT::now() - start; }\n  double ElapsedSeconds() const { return SecondsT(elapsed).count(); }\n  SecondsT Duration() const { return ClockT::now() - start; }\n  void PrintElapsed(std::string label) {\n    char buffer[255];\n    snprintf(buffer, sizeof(buffer), \"%s:\\t %fs\", label.c_str(),\n             SecondsT(elapsed).count());\n    LOG(CONSOLE) << buffer;\n    Reset();\n  }\n};\n\n/**\n * \\struct  Monitor\n *\n * \\brief Timing utility used to measure total method execution time over the\n * lifetime of the containing object.\n */\nstruct Monitor {\n private:\n  struct Statistics {\n    Timer timer;\n    size_t count{0};\n    uint64_t nvtx_id;\n  };\n\n  // from left to right, <name <count, elapsed>>\n  using StatMap = std::map<std::string, std::pair<size_t, size_t>>;\n\n  std::string label_ = \"\";\n  std::map<std::string, Statistics> statistics_map_;\n  Timer self_timer_;\n\n  void PrintStatistics(StatMap const& statistics) const;\n\n public:\n  Monitor() { self_timer_.Start(); }\n  /*\\brief Print statistics info during destruction.\n   *\n   * Please note that this may not work, as with distributed frameworks like Dask, the\n   * model is pickled to other workers, and the global parameters like `global_verbosity_`\n   * are not included in the pickle.\n   */\n  ~Monitor() {\n    this->Print();\n    self_timer_.Stop();\n  }\n\n  /*! \\brief Print all the statistics. */\n  void Print() const;\n\n  void Init(std::string label) { this->label_ = label; }\n  void Start(const std::string &name);\n  void Stop(const std::string &name);\n};\n}  // namespace xgboost::common\n"
  },
  {
    "path": "src/common/transform.h",
    "content": "/*!\n * Copyright 2018-2022 XGBoost contributors\n */\n#ifndef XGBOOST_COMMON_TRANSFORM_H_\n#define XGBOOST_COMMON_TRANSFORM_H_\n\n#include <dmlc/common.h>\n#include <dmlc/omp.h>\n#include <xgboost/data.h>\n\n#include <type_traits>  // enable_if\n#include <utility>\n#include <vector>\n\n#include \"common.h\"\n#include \"threading_utils.h\"\n#include \"xgboost/host_device_vector.h\"\n#include \"xgboost/span.h\"\n\n#if defined (__CUDACC__)\n#include \"device_helpers.cuh\"\n#endif  // defined (__CUDACC__)\n\n#if defined (SYCL_LANGUAGE_VERSION)\n#include \"../plugin/sycl/common/transform.h\"\n#endif  // defined (SYCL_LANGUAGE_VERSION)\n\nnamespace xgboost {\nnamespace common {\n\nconstexpr size_t kBlockThreads = 256;\n\nnamespace detail {\n\n#if defined(__CUDACC__)\ntemplate <typename Functor, typename... SpanType>\n__global__ void LaunchCUDAKernel(Functor _func, Range _range,\n                                 SpanType... _spans) {\n  for (auto i : dh::GridStrideRange(*_range.begin(), *_range.end())) {\n    _func(i, _spans...);\n  }\n}\n#endif  // defined(__CUDACC__)\n\n}  // namespace detail\n\n/*! \\brief Do Transformation on HostDeviceVectors.\n *\n *  \\tparam CompiledWithCuda A bool parameter used to distinguish compilation\n *         trajectories, users do not need to use it.\n *\n *  Note: Using Transform is a VERY tricky thing to do. Transform uses template\n *   argument to duplicate itself into two different types, one for CPU,\n *   another for CUDA.  The trick is not without its flaw:\n *\n *     If you use it in a function that can be compiled by both nvcc and host\n *     compiler, the behaviour is un-defined!  Because your function is NOT\n *     duplicated by `CompiledWithCuda`. At link time, CUDA compiler resolution\n *     will merge functions with same signature.\n */\ntemplate <bool CompiledWithCuda = WITH_CUDA()>\nclass Transform {\n private:\n  template <typename Functor>\n  struct Evaluator {\n   public:\n    Evaluator(Functor func, Range range, int32_t n_threads, DeviceOrd device)\n        : func_(func), range_{std::move(range)}, n_threads_{n_threads}, device_{device} {}\n\n    /*!\n     * \\brief Evaluate the functor with input pointers to HostDeviceVector.\n     *\n     * \\tparam HDV...  HostDeviceVectors type.\n     * \\param  vectors Pointers to HostDeviceVector.\n     */\n    template <typename... HDV>\n    void Eval(HDV... vectors) const {\n      if (device_.IsCUDA()) {\n        LaunchCUDA(func_, vectors...);\n      } else if (device_.IsSycl()) {\n        LaunchSycl(func_, vectors...);\n      } else {\n        LaunchCPU(func_, vectors...);\n      }\n    }\n\n   private:\n    // CUDA UnpackHDV\n    template <typename T>\n    Span<T> UnpackHDVOnDevice(HostDeviceVector<T>* _vec) const {\n      auto span = _vec->DeviceSpan();\n      return span;\n    }\n    template <typename T>\n    Span<T const> UnpackHDVOnDevice(const HostDeviceVector<T>* _vec) const {\n      auto span = _vec->ConstDeviceSpan();\n      return span;\n    }\n    // CPU UnpackHDV\n    template <typename T>\n    Span<T> UnpackHDV(HostDeviceVector<T>* _vec) const {\n      return Span<T> {_vec->HostPointer(),\n            static_cast<typename Span<T>::index_type>(_vec->Size())};\n    }\n    template <typename T>\n    Span<T const> UnpackHDV(const HostDeviceVector<T>* _vec) const {\n      return Span<T const> {_vec->ConstHostPointer(),\n            static_cast<typename Span<T>::index_type>(_vec->Size())};\n    }\n    // Recursive sync host\n    template <typename T>\n    void SyncHost(const HostDeviceVector<T> *_vector) const {\n      _vector->ConstHostPointer();\n    }\n    template <typename Head, typename... Rest>\n    void SyncHost(const HostDeviceVector<Head> *_vector,\n                  const HostDeviceVector<Rest> *... _vectors) const {\n      _vector->ConstHostPointer();\n      SyncHost(_vectors...);\n    }\n    // Recursive unpack for Shard.\n    template <typename T>\n    void UnpackShard(DeviceOrd device, const HostDeviceVector<T> *vector) const {\n      vector->SetDevice(device);\n    }\n    template <typename Head, typename... Rest>\n    void UnpackShard(DeviceOrd device,\n                     const HostDeviceVector<Head> *_vector,\n                     const HostDeviceVector<Rest> *... _vectors) const {\n      _vector->SetDevice(device);\n      UnpackShard(device, _vectors...);\n    }\n\n#if defined(__CUDACC__)\n    template <typename std::enable_if_t<CompiledWithCuda>* = nullptr,\n              typename... HDV>\n    void LaunchCUDA(Functor _func, HDV*... _vectors) const {\n      UnpackShard(device_, _vectors...);\n\n      size_t range_size = *range_.end() - *range_.begin();\n\n      // Extract index to deal with possible old OpenMP.\n      // This deals with situation like multi-class setting where\n      // granularity is used in data vector.\n      size_t shard_size = range_size;\n      Range shard_range {0, static_cast<Range::DifferenceType>(shard_size)};\n      dh::safe_cuda(cudaSetDevice(device_.ordinal));\n      const int kGrids =\n          static_cast<int>(DivRoundUp(*(range_.end()), kBlockThreads));\n      if (kGrids == 0) {\n        return;\n      }\n      detail::LaunchCUDAKernel<<<kGrids, kBlockThreads>>>(  // NOLINT\n          _func, shard_range, UnpackHDVOnDevice(_vectors)...);\n    }\n#else\n    /*! \\brief Dummy function defined when compiling for CPU.  */\n    template <typename std::enable_if_t<!CompiledWithCuda> * = nullptr, typename... HDV>\n    void LaunchCUDA(Functor _func, HDV *...) const {\n      // Remove unused parameter compiler warning.\n      (void) _func;\n\n      LOG(FATAL) << \"Not part of device code. WITH_CUDA: \" << WITH_CUDA();\n    }\n#endif  // defined(__CUDACC__)\n\n#if defined (SYCL_LANGUAGE_VERSION)\n    template <typename... HDV>\n    void LaunchSycl(Functor _func, HDV*... _vectors) const {\n      UnpackShard(device_, _vectors...);\n\n      size_t range_size = *range_.end() - *range_.begin();\n      Range shard_range {0, static_cast<Range::DifferenceType>(range_size)};\n      sycl::common::LaunchSyclKernel(device_, _func, shard_range, UnpackHDVOnDevice(_vectors)...);\n    }\n#else\n    template <typename... HDV>\n    void LaunchSycl(Functor _func, HDV *... _vectors) const {\n      LaunchCPU(_func, _vectors...);\n    }\n#endif  // defined(SYCL_LANGUAGE_VERSION)\n\n    template <typename... HDV>\n    void LaunchCPU(Functor func, HDV *...vectors) const {\n      omp_ulong end = static_cast<omp_ulong>(*(range_.end()));\n      SyncHost(vectors...);\n      ParallelFor(end, n_threads_, [&](omp_ulong idx) { func(idx, UnpackHDV(vectors)...); });\n    }\n\n   private:\n    /*! \\brief Callable object. */\n    Functor func_;\n    /*! \\brief Range object specifying parallel threads index range. */\n    Range range_;\n    int32_t n_threads_;\n    DeviceOrd device_;\n  };\n\n public:\n  /*!\n   * \\brief Initialize a Transform object.\n   *\n   * \\tparam Functor  A callable object type.\n   * \\return A Evaluator having one method Eval.\n   *\n   * \\param func    A callable object, accepting a size_t thread index,\n   *                  followed by a set of Span classes.\n   * \\param range   Range object specifying parallel threads index range.\n   * \\param n_threads  Number of CPU threads\n   * \\param device_idx GPU device ordinal\n   */\n  template <typename Functor>\n  static Evaluator<Functor> Init(Functor func, Range const range, int32_t n_threads,\n                                 DeviceOrd device) {\n    return Evaluator<Functor>{func, std::move(range), n_threads, device};\n  }\n};\n\n}  // namespace common\n}  // namespace xgboost\n\n#endif  // XGBOOST_COMMON_TRANSFORM_H_\n"
  },
  {
    "path": "src/common/transform_iterator.h",
    "content": "/**\n * Copyright 2022-2024, XGBoost Contributors\n */\n#ifndef XGBOOST_COMMON_TRANSFORM_ITERATOR_H_\n#define XGBOOST_COMMON_TRANSFORM_ITERATOR_H_\n\n#include <cstddef>      // std::size_t\n#include <iterator>     // std::random_access_iterator_tag\n#include <type_traits>  // for invoke_result_t, add_pointer_t, add_lvalue_reference_t\n#include <utility>      // std::forward\n\n#include \"xgboost/span.h\"  // ptrdiff_t\n\nnamespace xgboost {\nnamespace common {\n/**\n * \\brief Transform iterator that takes an index and calls transform operator.\n *\n *   This is CPU-only right now as taking host device function as operator complicates the\n *   code.  For device side one can use `thrust::transform_iterator` instead.\n */\ntemplate <typename Fn>\nclass IndexTransformIter {\n  std::size_t iter_{0};\n  Fn fn_;\n\n public:\n  using iterator_category = std::random_access_iterator_tag;                // NOLINT\n  using reference = std::invoke_result_t<Fn, std::size_t>;                  // NOLINT\n  using value_type = std::remove_cv_t<std::remove_reference_t<reference>>;  // NOLINT\n  using difference_type = detail::ptrdiff_t;                                // NOLINT\n  using pointer = std::add_pointer_t<value_type>;                           // NOLINT\n\n public:\n  /**\n   * \\param op Transform operator, takes a size_t index as input.\n   */\n  explicit IndexTransformIter(Fn &&op) : fn_{op} {}\n  IndexTransformIter(IndexTransformIter const &) = default;\n  IndexTransformIter &operator=(IndexTransformIter &&) = default;\n  IndexTransformIter &operator=(IndexTransformIter const &that) {\n    iter_ = that.iter_;\n    return *this;\n  }\n\n  reference operator*() const { return fn_(iter_); }\n  reference operator[](std::size_t i) const {\n    auto iter = *this + i;\n    return *iter;\n  }\n\n  auto operator-(IndexTransformIter const &that) const { return iter_ - that.iter_; }\n  bool operator==(IndexTransformIter const &that) const { return iter_ == that.iter_; }\n  bool operator!=(IndexTransformIter const &that) const { return !(*this == that); }\n  bool operator<(IndexTransformIter const &that) const { return iter_ < that.iter_; }\n  bool operator>(IndexTransformIter const &that) const { return that < *this; }\n  bool operator<=(IndexTransformIter const &that) const { return !(that < *this); }\n  bool operator>=(IndexTransformIter const &that) const { return !(*this < that); }\n\n  IndexTransformIter &operator++() {\n    iter_++;\n    return *this;\n  }\n  IndexTransformIter operator++(int) {\n    auto ret = *this;\n    ++(*this);\n    return ret;\n  }\n  IndexTransformIter &operator--() {\n    iter_--;\n    return *this;\n  }\n  IndexTransformIter operator--(int) {\n    auto ret = *this;\n    --(*this);\n    return ret;\n  }\n  IndexTransformIter &operator+=(difference_type n) {\n    iter_ += n;\n    return *this;\n  }\n  IndexTransformIter &operator-=(difference_type n) {\n    (*this) += -n;\n    return *this;\n  }\n  IndexTransformIter operator+(difference_type n) const {\n    auto ret = *this;\n    return ret += n;\n  }\n  IndexTransformIter operator-(difference_type n) const {\n    auto ret = *this;\n    return ret -= n;\n  }\n};\n\ntemplate <typename Fn>\nauto MakeIndexTransformIter(Fn &&fn) {\n  return IndexTransformIter<Fn>(std::forward<Fn>(fn));\n}\n}  // namespace common\n}  // namespace xgboost\n#endif  // XGBOOST_COMMON_TRANSFORM_ITERATOR_H_\n"
  },
  {
    "path": "src/common/type.h",
    "content": "/**\n * Copyright 2023-2025, XGBoost Contributors\n */\n#pragma once\n#include <cstdint>      // for int8_t\n#include <type_traits>  // for is_const_v, add_const_t, conditional_t, add_pointer_t\n\n#include \"xgboost/span.h\"  // for Span\nnamespace xgboost::common {\ntemplate <typename T, typename U = std::conditional_t<std::is_const_v<T>,\n                                                      std::add_const_t<std::int8_t>, std::int8_t>>\ncommon::Span<U> EraseType(common::Span<T> data) {\n  auto n_total_bytes = data.size_bytes();\n  auto erased = common::Span{reinterpret_cast<std::add_pointer_t<U>>(data.data()), n_total_bytes};\n  return erased;\n}\n\ntemplate <typename T, typename U>\ncommon::Span<T> RestoreType(common::Span<U> data) {\n  auto n_total_bytes = data.size_bytes();\n  auto restored = common::Span{reinterpret_cast<T*>(data.data()), n_total_bytes / sizeof(T)};\n  return restored;\n}\n\ntemplate <typename T>\nusing GetValueT = std::remove_cv_t<std::remove_reference_t<T>>;\n}  // namespace xgboost::common\n"
  },
  {
    "path": "src/common/utils.h",
    "content": "/**\n * Copyright 2024-2025, XGBoost Contributors\n */\n#pragma once\n#include <functional>  // for function\n#include <utility>     // for forward\n\n#include \"xgboost/base.h\"\n\nnamespace xgboost::common {\n/** @brief RAII guard, simplified version of absl::Cleanup . */\nclass Cleanup {\n  std::function<void()> cb_;\n\n public:\n  template <typename Callback>\n  explicit Cleanup(Callback&& cb) : cb_{std::forward<Callback>(cb)} {}\n\n  ~Cleanup() { this->cb_(); }\n};\n\ntemplate <typename Callback>\nauto MakeCleanup(Callback&& cb) {\n  return Cleanup{std::forward<Callback>(cb)};\n}\n\ntemplate <typename R>\nstruct NoOp {\n  R val;\n\n  explicit NoOp(R&& v) : val{std::forward<R>(v)} {}\n\n  template <typename... Args>\n  XGBOOST_DEVICE R operator()(Args&&...) const {\n    return val;\n  }\n};\n\ntemplate <>\nstruct NoOp<void> {\n  template <typename... Args>\n  XGBOOST_DEVICE void operator()(Args&&...) const {}\n};\n}  // namespace xgboost::common\n"
  },
  {
    "path": "src/common/version.cc",
    "content": "/*!\n * Copyright 2019 XGBoost contributors\n */\n#include <dmlc/io.h>\n\n#include <string>\n#include <tuple>\n#include <vector>\n\n#include \"xgboost/logging.h\"\n#include \"xgboost/json.h\"\n#include \"xgboost/version_config.h\"\n#include \"version.h\"\n\nnamespace xgboost {\n\nconst Version::TripletT Version::kInvalid {-1, -1, -1};\n\nVersion::TripletT Version::Load(Json const& in) {\n  if (get<Object const>(in).find(\"version\") == get<Object const>(in).cend()) {\n    return kInvalid;\n  }\n  Integer::Int major {0}, minor {0}, patch {0};\n  try {\n    auto const& j_version = get<Array const>(in[\"version\"]);\n    std::tie(major, minor, patch) = std::make_tuple(\n        get<Integer const>(j_version.at(0)),\n        get<Integer const>(j_version.at(1)),\n        get<Integer const>(j_version.at(2)));\n  } catch (dmlc::Error const& e) {\n    LOG(FATAL) << \"Invaid version format in loaded JSON object: \" << in;\n  }\n\n  return std::make_tuple(major, minor, patch);\n}\n\nVersion::TripletT Version::Load(dmlc::Stream* fi) {\n  XGBoostVersionT major{0}, minor{0}, patch{0};\n  // This is only used in DMatrix serialization, so doesn't break model compatibility.\n  std::string msg { \"Incorrect version format found in binary file.  \"\n                    \"Binary file from XGBoost < 1.0.0 is no longer supported. \"\n                    \"Please generate it again.\" };\n  std::string verstr { u8\"version:\" }, read;\n  read.resize(verstr.size(), 0);\n\n  CHECK_EQ(fi->Read(&read[0], verstr.size()), verstr.size()) << msg;\n  if (verstr != read) {\n    // read might contain `\\0` that terminates the string.\n    LOG(FATAL) << msg;\n  }\n\n  CHECK(fi->Read(&major)) << msg;\n  CHECK(fi->Read(&minor)) << msg;\n  CHECK(fi->Read(&patch)) << msg;\n\n  return std::make_tuple(major, minor, patch);\n}\n\nvoid Version::Save(Json* out) {\n  Integer::Int major, minor, patch;\n  std::tie(major, minor, patch)= Self();\n  (*out)[\"version\"] = std::vector<Json>{Json(Integer{major}),\n                                        Json(Integer{minor}),\n                                        Json(Integer{patch})};\n}\n\nvoid Version::Save(dmlc::Stream* fo) {\n  XGBoostVersionT major, minor, patch;\n  std::tie(major, minor, patch) = Self();\n  std::string verstr { u8\"version:\" };\n  fo->Write(&verstr[0], verstr.size());\n  fo->Write(major);\n  fo->Write(minor);\n  fo->Write(patch);\n}\n\nstd::string Version::String(TripletT const& version) {\n  std::stringstream ss;\n  ss << std::get<0>(version) << \".\" << get<1>(version) << \".\" << get<2>(version);\n  return ss.str();\n}\n\nVersion::TripletT Version::Self() {\n  return std::make_tuple(XGBOOST_VER_MAJOR, XGBOOST_VER_MINOR, XGBOOST_VER_PATCH);\n}\n\nbool Version::Same(TripletT const& triplet) {\n  return triplet == Self();\n}\n\n}  // namespace xgboost\n"
  },
  {
    "path": "src/common/version.h",
    "content": "/*!\n * Copyright 2019 XGBoost contributors\n */\n#ifndef XGBOOST_COMMON_VERSION_H_\n#define XGBOOST_COMMON_VERSION_H_\n\n#include <dmlc/io.h>\n#include <string>\n#include <tuple>\n\n#include \"xgboost/base.h\"\n\nnamespace xgboost {\nclass Json;\n// a static class for handling version info\nstruct Version {\n  using TripletT = std::tuple<XGBoostVersionT, XGBoostVersionT, XGBoostVersionT>;\n  static const TripletT kInvalid;\n\n  // Save/Load version info to JSON document\n  static TripletT Load(Json const& in);\n  static void Save(Json* out);\n\n  // Save/Load version info to dmlc::Stream\n  static Version::TripletT Load(dmlc::Stream* fi);\n  static void Save(dmlc::Stream* fo);\n\n  static std::string String(TripletT const& version);\n  static TripletT Self();\n\n  static bool Same(TripletT const& triplet);\n};\n\n}      // namespace xgboost\n#endif  // XGBOOST_COMMON_VERSION_H_\n"
  },
  {
    "path": "src/context.cc",
    "content": "/**\n * Copyright 2014-2026, XGBoost Contributors\n *\n * \\brief Context object used for controlling runtime parameters.\n */\n#include \"xgboost/context.h\"\n\n#include <algorithm>  // for find_if\n#include <charconv>   // for from_chars\n#include <iterator>   // for distance\n#include <optional>   // for optional\n#include <regex>      // for regex_replace, regex_match\n#include <sstream>    // for stringstream\n\n#include \"common/cuda_rt_utils.h\"  // for AllVisibleGPUs\n#include \"common/random.h\"\n#include \"common/threading_utils.h\"\n#include \"xgboost/json.h\"  // for Json, Object, String, ToJson, FromJson\n#include \"xgboost/string_view.h\"\n\n#if !defined(XGBOOST_USE_CUDA)\n\n#include \"common/common.h\"  // for AssertGPUSupport\n\n#endif  // !defined(XGBOOST_USE_CUDA)\n\n#if defined(XGBOOST_USE_SYCL)\n#include \"../plugin/sycl/context_helper.h\"\n#endif  // defined (XGBOOST_USE_SYCL)\n\nnamespace xgboost {\n\nDMLC_REGISTER_PARAMETER(Context);\n\nstd::int64_t constexpr Context::kDefaultSeed;\n\nContext::Context() : cfs_cpu_count_{common::GetCfsCPUCount()} {}\n\nnamespace {\ninline constexpr char const* kDevice = \"device\";\n\n#if !defined(XGBOOST_USE_CUDA)\nDeviceOrd CUDAOrdinal(DeviceOrd device, bool) {\n  device = DeviceOrd::CPU();\n  return device;\n}\n#else\n// Check CUDA on the current device, wrap the ordinal if necessary.\n[[nodiscard]] DeviceOrd CUDAOrdinal(DeviceOrd device, bool fail_on_invalid) {\n  // When booster is loaded from a memory image (Python pickle or R raw model), number of\n  // available GPUs could be different.  Wrap around it.\n  std::int32_t n_visible = curt::AllVisibleGPUs();\n  if (n_visible == 0) {\n    if (device.IsCUDA()) {\n      LOG(WARNING) << \"No visible GPU is found, setting device to CPU.\";\n    }\n    device = DeviceOrd::CPU();\n  } else if (fail_on_invalid) {\n    CHECK(device.IsCPU() || device.ordinal < n_visible)\n        << \"Only \" << n_visible << \" GPUs are visible, ordinal \" << device.ordinal\n        << \" is invalid.\";\n  } else if (device.IsCUDA() && device.ordinal >= n_visible) {\n    device.ordinal = device.ordinal % n_visible;\n    LOG(WARNING) << \"Only \" << n_visible << \" GPUs are visible, setting device ordinal to \"\n                 << device.ordinal;\n  }\n\n  if (device.IsCUDA()) {\n    curt::SetDevice(device.ordinal);\n  }\n  return device;\n}\n#endif  //  !defined(XGBOOST_USE_CUDA)\n\n[[nodiscard]] std::optional<std::int32_t> ParseInt(StringView ordinal) {\n  // Some basic checks to ensure valid `gpu_id` and device ordinal instead of directly parsing and\n  // letting go of unknown characters.\n  if (ordinal.empty()) {\n    return std::nullopt;\n  }\n\n  std::size_t offset{0};\n  if (ordinal[0] == '-') {\n    offset = 1;\n  }\n  if (ordinal.size() <= offset) {\n    return std::nullopt;\n  }\n\n  bool valid = std::all_of(ordinal.cbegin() + offset, ordinal.cend(),\n                           [](auto c) { return std::isdigit(c); });\n  if (!valid) {\n    return std::nullopt;\n  }\n\n  std::int32_t parsed_id{DeviceOrd::CPUOrdinal()};\n  auto res = std::from_chars(ordinal.c_str(), ordinal.c_str() + ordinal.size(), parsed_id);\n  if (res.ec != std::errc()) {\n    return std::nullopt;\n  }\n\n  return parsed_id;\n}\n\n[[nodiscard]] DeviceOrd MakeDeviceOrd(std::string const& input, bool fail_on_invalid_gpu_id) {\n  StringView msg{R\"(Invalid argument for `device`. Expected to be one of the following:\n- cpu\n- cuda\n- cuda:<device ordinal>  # e.g. cuda:0\n- gpu\n- gpu:<device ordinal>   # e.g. gpu:0\n)\"};\n  auto fatal = [&] {\n    LOG(FATAL) << msg << \"Got: `\" << input << \"`.\";\n  };\n\n#if defined(__MINGW32__)\n  // mingw hangs on regex using rtools 430. Basic checks only.\n  CHECK_GE(input.size(), 3) << msg;\n  auto substr = input.substr(0, 3);\n  bool valid = substr == \"cpu\" || substr == \"cud\" || substr == \"gpu\" || substr == \"syc\";\n  CHECK(valid) << msg;\n#else\n  thread_local static std::regex pattern{\n      \"gpu(:[0-9]+)?|cuda(:[0-9]+)?|cpu|sycl(:cpu|:gpu)?(:-1|:[0-9]+)?\"};\n  if (!std::regex_match(input, pattern)) {\n    fatal();\n  }\n#endif  // defined(__MINGW32__)\n\n  // handle alias\n#if defined(__MINGW32__)\n  // mingw hangs on regex using rtools 430. Basic checks only.\n  bool is_sycl = (substr == \"syc\");\n#else\n  bool is_sycl = std::regex_match(input, std::regex(\"sycl(:cpu|:gpu)?(:-1|:[0-9]+)?\"));\n#endif  // defined(__MINGW32__)\n\n  std::string s_device = input;\n  if (!is_sycl) {\n    s_device = std::regex_replace(s_device, std::regex{\"gpu\"}, DeviceSym::CUDA());\n  }\n\n  auto split_it = std::find(s_device.cbegin(), s_device.cend(), ':');\n\n  // For these cases we need to move iterator to the end, not to look for a ordinal.\n  if ((s_device == \"sycl:cpu\") || (s_device == \"sycl:gpu\")) {\n    split_it = s_device.cend();\n  }\n\n  // For s_device like \"sycl:gpu:1\"\n  if (split_it != s_device.cend()) {\n    auto second_split_it = std::find(split_it + 1, s_device.cend(), ':');\n    if (second_split_it != s_device.cend()) {\n      split_it = second_split_it;\n    }\n  }\n\n  DeviceOrd device;\n  device.ordinal = DeviceOrd::InvalidOrdinal();  // mark it invalid for check.\n  if (split_it == s_device.cend()) {\n    // no ordinal.\n    if (s_device == DeviceSym::CPU()) {\n      device = DeviceOrd::CPU();\n    } else if (s_device == DeviceSym::CUDA()) {\n      device = DeviceOrd::CUDA(0);  // use 0 as default;\n    } else if (s_device == DeviceSym::SyclDefault()) {\n      device = DeviceOrd::SyclDefault();\n    } else if (s_device == DeviceSym::SyclCPU()) {\n      device = DeviceOrd::SyclCPU();\n    } else if (s_device == DeviceSym::SyclGPU()) {\n      device = DeviceOrd::SyclGPU();\n    } else {\n      fatal();\n    }\n  } else {\n    // must be CUDA or SYCL when ordinal is specifed.\n    // +1 for colon\n    std::size_t offset = std::distance(s_device.cbegin(), split_it) + 1;\n    // substr\n    StringView s_ordinal = {s_device.data() + offset, s_device.size() - offset};\n    StringView s_type = {s_device.data(), offset - 1};\n    if (s_ordinal.empty()) {\n      fatal();\n    }\n    auto opt_id = ParseInt(s_ordinal);\n    if (!opt_id.has_value()) {\n      fatal();\n    }\n    CHECK_LE(opt_id.value(), std::numeric_limits<bst_d_ordinal_t>::max())\n        << \"Ordinal value too large.\";\n    if (s_type == DeviceSym::SyclDefault()) {\n      device = DeviceOrd::SyclDefault(opt_id.value());\n    } else if (s_type == DeviceSym::SyclCPU()) {\n      device = DeviceOrd::SyclCPU(opt_id.value());\n    } else if (s_type == DeviceSym::SyclGPU()) {\n      device = DeviceOrd::SyclGPU(opt_id.value());\n    } else {\n      device = DeviceOrd::CUDA(opt_id.value());\n    }\n  }\n\n  if (device.ordinal < DeviceOrd::CPUOrdinal()) {\n    fatal();\n  }\n  if (device.IsCUDA()) {\n    device = CUDAOrdinal(device, fail_on_invalid_gpu_id);\n    if (!device.IsCUDA()) {\n      // We allow loading a GPU-based pickle on a CPU-only machine.\n      LOG(WARNING) << \"Device is changed from GPU to CPU as we couldn't find any available GPU on \"\n                      \"the system.\";\n    }\n  }\n  return device;\n}\n}  // namespace\n\nstd::ostream& operator<<(std::ostream& os, DeviceOrd ord) {\n  os << ord.Name();\n  return os;\n}\n\nvoid Context::Init(Args const& kwargs) {\n  auto unknown = this->UpdateAllowUnknown(kwargs);\n  if (!unknown.empty()) {\n    std::stringstream ss;\n    std::size_t i = 0;\n    ss << \"[Internal Error] Unknown parameters passed to the Context {\";\n    for (auto const& [k, _] : unknown) {\n      ss << '\"' << k << '\"';\n      if (++i != unknown.size()) {\n        ss << \", \";\n      }\n    }\n    ss << \"}\\n\";\n    LOG(FATAL) << ss.str();\n  }\n}\n\nvoid Context::SetDeviceOrdinal(Args const& kwargs) {\n  auto gpu_id_it = std::find_if(kwargs.cbegin(), kwargs.cend(),\n                                [](auto const& p) { return p.first == \"gpu_id\"; });\n  auto has_gpu_id = gpu_id_it != kwargs.cend();\n  if (has_gpu_id) {\n    LOG(FATAL) << \"`gpu_id` has been removed since 3.1. Use `device` instead.\";\n  }\n\n  auto device_it = std::find_if(kwargs.cbegin(), kwargs.cend(),\n                                [](auto const& p) { return p.first == kDevice; });\n  auto has_device = device_it != kwargs.cend();\n  auto new_d = MakeDeviceOrd(this->device, this->fail_on_invalid_gpu_id);\n\n  if (!has_device) {\n    CHECK_EQ(new_d.ordinal, this->device_.ordinal);  // unchanged\n  }\n  this->SetDevice(new_d);\n\n  if (this->IsCPU()) {\n    CHECK_EQ(this->device_.ordinal, DeviceOrd::CPUOrdinal());\n  } else if (this->IsCUDA()) {\n    CHECK_GT(this->device_.ordinal, DeviceOrd::CPUOrdinal());\n  }\n}\n\nstd::int32_t Context::Threads() const {\n  auto n_threads = common::OmpGetNumThreads(nthread);\n  if (cfs_cpu_count_ > 0) {\n    n_threads = std::min(n_threads, cfs_cpu_count_);\n  }\n  return n_threads;\n}\n\nDeviceOrd Context::DeviceFP64() const {\n#if defined(XGBOOST_USE_SYCL)\n  return sycl::DeviceFP64(device_);\n#else\n  return device_;\n#endif  // defined(XGBOOST_USE_SYCL)\n}\n\n[[nodiscard]] Json Context::ToJson() const {\n  auto obj = Json{::xgboost::ToJson(*this)};\n  common::SaveRng(&obj, this->rng_);\n  return obj;\n}\n\nvoid Context::FromJson(Json const& in) {\n  ::xgboost::FromJson(in, this);\n  common::LoadRng(in, &this->rng_);\n}\n\n#if !defined(XGBOOST_USE_CUDA)\nCUDAContext const* Context::CUDACtx() const {\n  common::AssertGPUSupport();\n  return nullptr;\n}\n#endif  // defined(XGBOOST_USE_CUDA)\n}  // namespace xgboost\n"
  },
  {
    "path": "src/context.cu",
    "content": "/**\n * Copyright 2022 by XGBoost Contributors\n */\n#include \"common/cuda_context.cuh\"  // CUDAContext\n#include \"xgboost/context.h\"\n\nnamespace xgboost {\nCUDAContext const* Context::CUDACtx() const {\n  if (!cuctx_) {\n    cuctx_.reset(new CUDAContext{});\n  }\n  return cuctx_.get();\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "src/data/adapter.cc",
    "content": "/**\n *  Copyright 2019-2025, XGBoost Contributors\n */\n#include \"adapter.h\"\n\n#include <algorithm>  // for all_of\n#include <cstdint>    // for int32_t\n#include <numeric>    // for partial_sum\n#include <utility>    // for move\n#include <vector>     // for vector\n\n#include \"../c_api/c_api_error.h\"  // for API_BEGIN, API_END\n#include \"../encoder/ordinal.h\"    // for HostCatIndexView\n#include \"array_interface.h\"       // for ArrayInterface\n#include \"columnar.h\"              // for GetRefCats, GetArrowDictionary\n#include \"xgboost/c_api.h\"         // for DataIterHandle\n#include \"xgboost/json.h\"          // for Json, Object, Array\n#include \"xgboost/logging.h\"\n\nnamespace xgboost::data {\nnamespace {\nauto GetRefCats(Json handle) {\n  auto cats = reinterpret_cast<CatContainer const*>(get<Integer const>(handle));\n  CHECK(cats);\n  auto h_cats = cats->HostView();\n  return h_cats;\n}\n}  // anonymous namespace\n\nColumnarAdapter::ColumnarAdapter(StringView columns) {\n  auto jdf = Json::Load(columns);\n\n  if (IsA<Object>(jdf)) {\n    // Has reference categories.\n    this->ref_cats_ = GetRefCats(jdf[\"ref_categories\"]);\n    jdf = jdf[\"columns\"];\n  }\n\n  CHECK(IsA<Array>(jdf));\n  auto const& array = get<Array const>(jdf);\n  bst_idx_t n_samples{0};\n  std::vector<std::int32_t> cat_segments{0};\n  for (auto const& jcol : array) {\n    std::int32_t n_cats{0};\n    if (IsA<Array>(jcol)) {\n      // This is a dictionary type (categorical values).\n      auto const& first = get<Object const>(jcol[0]);\n      if (first.find(\"offsets\") == first.cend()) {\n        // numeric index\n        n_cats = GetArrowNumericIndex(DeviceOrd::CPU(), jcol, &this->cats_, &this->columns_,\n                                      &this->n_bytes_, &n_samples);\n      } else {\n        // string index\n        n_cats =\n            GetArrowDictionary(jcol, &this->cats_, &this->columns_, &this->n_bytes_, &n_samples);\n      }\n    } else {\n      // Numeric values\n      columns_.emplace_back(get<Object const>(jcol));\n      this->cats_.emplace_back();\n      this->n_bytes_ += columns_.back().ElementSize() * columns_.back().Shape<0>();\n      n_samples = std::max(n_samples, static_cast<bst_idx_t>(columns_.back().Shape<0>()));\n    }\n    cat_segments.push_back(n_cats);\n  }\n  std::partial_sum(cat_segments.cbegin(), cat_segments.cend(), cat_segments.begin());\n  auto no_overflow = std::is_sorted(cat_segments.cbegin(), cat_segments.cend());\n  CHECK(no_overflow) << \"Maximum number of categories exceeded.\";\n\n  // Check consistency.\n  bool consistent = columns_.empty() || std::all_of(columns_.cbegin(), columns_.cend(),\n                                                    [&](ArrayInterface<1> const& array) {\n                                                      return array.Shape<0>() == n_samples;\n                                                    });\n  this->cat_segments_ = std::move(cat_segments);\n  CHECK(consistent) << \"Size of columns should be the same.\";\n  batch_ = ColumnarAdapterBatch{columns_, NoOpAccessor{}};\n\n  if (!this->ref_cats_.Empty()) {\n    CHECK_EQ(this->ref_cats_.Size(), this->columns_.size())\n        << \"Invalid reference categories, different number of columns\";\n  }\n}\n\ntemplate <typename DataIterHandle, typename XGBCallbackDataIterNext, typename XGBoostBatchCSR>\nbool IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>::Next() {\n  if ((*next_callback_)(\n          data_handle_,\n          [](void* handle, XGBoostBatchCSR batch) -> int {\n            API_BEGIN();\n            static_cast<IteratorAdapter*>(handle)->SetData(batch);\n            API_END();\n          },\n          this) != 0) {\n    at_first_ = false;\n    return true;\n  } else {\n    return false;\n  }\n}\n\ntemplate class IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>;\n}  // namespace xgboost::data\n"
  },
  {
    "path": "src/data/adapter.h",
    "content": "/**\n *  Copyright 2019-2025, XGBoost Contributors\n * \\file adapter.h\n */\n#ifndef XGBOOST_DATA_ADAPTER_H_\n#define XGBOOST_DATA_ADAPTER_H_\n#include <dmlc/data.h>\n\n#include <algorithm>  // for transform, all_of\n#include <cstddef>    // for size_t\n#include <cstdint>    // for uint8_t\n#include <limits>     // for numeric_limits\n#include <memory>     // for unique_ptr, make_unique\n#include <utility>    // for move\n#include <variant>    // for variant\n#include <vector>     // for vector\n\n#include \"../data/cat_container.h\"  // for CatAccessor\n#include \"array_interface.h\"        // for ArrayInterface\n#include \"entry.h\"                  // for COOTuple\n#include \"xgboost/base.h\"\n#include \"xgboost/data.h\"\n#include \"xgboost/logging.h\"\n#include \"xgboost/span.h\"\n#include \"xgboost/string_view.h\"\n\nnamespace xgboost::data {\n/**  External data formats should implement an adapter as below. The\n * adapter provides a uniform access to data outside xgboost, allowing\n * construction of DMatrix objects from a range of sources without duplicating\n * code.\n *\n * The adapter object is an iterator that returns batches of data. Each batch\n * contains a number of \"lines\". A line represents a set of elements from a\n * sparse input matrix, normally a row in the case of a CSR matrix or a column\n * for a CSC matrix. Typically in sparse matrix formats we can efficiently\n * access subsets of elements at a time, but cannot efficiently lookups elements\n * by random access, hence the \"line\" abstraction, allowing the sparse matrix to\n * return subsets of elements efficiently. Individual elements are described by\n * a COO tuple (row index, column index, value).\n *\n * This abstraction allows us to read through different sparse matrix formats\n * using the same interface. In particular we can write a DMatrix constructor\n * that uses the same code to construct itself from a CSR matrix, CSC matrix,\n * dense matrix, CSV, LIBSVM file, or potentially other formats. To see why this\n * is necessary, imagine we have 5 external matrix formats and 5 internal\n * DMatrix types where each DMatrix needs a custom constructor for each possible\n * input. The number of constructors is 5*5=25. Using an abstraction over the\n * input data types the number of constructors is reduced to 5, as each DMatrix\n * is oblivious to the external data format. Adding a new input source is simply\n * a case of implementing an adapter.\n *\n * Most of the below adapters do not need more than one batch as the data\n * originates from an in memory source. The file adapter does require batches to\n * avoid loading the entire file in memory.\n *\n * An important detail is empty row/column handling. Files loaded from disk do\n * not provide meta information about the number of rows/columns to expect, this\n * needs to be inferred during construction. Other sparse formats may specify a\n * number of rows/columns, but we can encounter entirely sparse rows or columns,\n * leading to disagreement between the inferred number and the meta-info\n * provided. To resolve this, adapters have methods specifying the number of\n * rows/columns expected, these methods may return zero where these values must\n * be inferred from data. A constructed DMatrix should agree with the input\n * source on numbers of rows/columns, appending empty rows if necessary.\n *  */\n\n/** \\brief An adapter can return this value for number of rows or columns\n * indicating that this value is currently unknown and should be inferred while\n * passing over the data. */\nconstexpr size_t kAdapterUnknownSize = std::numeric_limits<size_t >::max();\n\nnamespace detail {\n\n/**\n * \\brief Simplifies the use of DataIter when there is only one batch.\n */\ntemplate <typename DType>\nclass SingleBatchDataIter : dmlc::DataIter<DType> {\n public:\n  void BeforeFirst() override { counter_ = 0; }\n  bool Next() override {\n    if (counter_ == 0) {\n      counter_++;\n      return true;\n    }\n    return false;\n  }\n\n private:\n  int counter_{0};\n};\n\n/** \\brief Indicates this data source cannot contain meta-info such as labels,\n * weights or qid. */\nclass NoMetaInfo {\n public:\n  const float* Labels() const { return nullptr; }\n  const float* Weights() const { return nullptr; }\n  const uint64_t* Qid() const { return nullptr; }\n  const float* BaseMargin() const { return nullptr; }\n};\n};  // namespace detail\n\nclass DenseAdapterBatch : public detail::NoMetaInfo {\n public:\n  DenseAdapterBatch(const float* values, bst_idx_t num_rows, bst_idx_t num_features)\n      : values_(values), num_rows_(num_rows), num_features_(num_features) {}\n\n private:\n  class Line {\n   public:\n    Line(const float* values, size_t size, size_t row_idx)\n        : row_idx_(row_idx), size_(size), values_(values) {}\n\n    size_t Size() const { return size_; }\n    COOTuple GetElement(size_t idx) const {\n      return COOTuple{row_idx_, idx, values_[idx]};\n    }\n\n   private:\n    size_t row_idx_;\n    size_t size_;\n    const float* values_;\n  };\n\n public:\n  size_t Size() const { return num_rows_; }\n  const Line GetLine(size_t idx) const {\n    return Line(values_ + idx * num_features_, num_features_, idx);\n  }\n  [[nodiscard]] std::size_t NumRows() const { return num_rows_; }\n  [[nodiscard]] std::size_t NumCols() const { return num_features_; }\n  static constexpr bool kIsRowMajor = true;\n\n private:\n  const float* values_;\n  size_t num_rows_;\n  size_t num_features_;\n};\n\nclass DenseAdapter : public detail::SingleBatchDataIter<DenseAdapterBatch> {\n public:\n  DenseAdapter(const float* values, size_t num_rows, size_t num_features)\n      : batch_(values, num_rows, num_features),\n        num_rows_(num_rows),\n        num_columns_(num_features) {}\n  const DenseAdapterBatch& Value() const override { return batch_; }\n\n  [[nodiscard]] std::size_t NumRows() const { return num_rows_; }\n  [[nodiscard]] std::size_t NumColumns() const { return num_columns_; }\n\n private:\n  DenseAdapterBatch batch_;\n  size_t num_rows_;\n  size_t num_columns_;\n};\n\nclass ArrayAdapterBatch : public detail::NoMetaInfo {\n public:\n  static constexpr bool kIsRowMajor = true;\n\n private:\n  ArrayInterface<2> array_interface_;\n\n  class Line {\n    ArrayInterface<2> array_interface_;\n    size_t ridx_;\n\n   public:\n    Line(ArrayInterface<2> array_interface, size_t ridx)\n        : array_interface_{std::move(array_interface)}, ridx_{ridx} {}\n\n    size_t Size() const { return array_interface_.Shape<1>(); }\n\n    COOTuple GetElement(size_t idx) const {\n      return {ridx_, idx, array_interface_(ridx_, idx)};\n    }\n  };\n\n public:\n  ArrayAdapterBatch() = default;\n  Line const GetLine(size_t idx) const {\n    return Line{array_interface_, idx};\n  }\n\n  [[nodiscard]] std::size_t NumRows() const { return array_interface_.Shape<0>(); }\n  [[nodiscard]] std::size_t NumCols() const { return array_interface_.Shape<1>(); }\n  [[nodiscard]] std::size_t Size() const { return this->NumRows(); }\n\n  explicit ArrayAdapterBatch(ArrayInterface<2> array_interface)\n      : array_interface_{std::move(array_interface)} {}\n};\n\n/**\n * Adapter for dense array on host, in Python that's `numpy.ndarray`.  This is similar to\n * `DenseAdapter`, but supports __array_interface__ instead of raw pointers.  An\n * advantage is this can handle various data type without making a copy.\n */\nclass ArrayAdapter : public detail::SingleBatchDataIter<ArrayAdapterBatch> {\n public:\n  explicit ArrayAdapter(StringView array_interface) {\n    auto j = Json::Load(array_interface);\n    array_interface_ = ArrayInterface<2>(get<Object const>(j));\n    batch_ = ArrayAdapterBatch{array_interface_};\n  }\n  [[nodiscard]] ArrayAdapterBatch const& Value() const override { return batch_; }\n  [[nodiscard]] std::size_t NumRows() const { return array_interface_.Shape<0>(); }\n  [[nodiscard]] std::size_t NumColumns() const { return array_interface_.Shape<1>(); }\n\n private:\n  ArrayAdapterBatch batch_;\n  ArrayInterface<2> array_interface_;\n};\n\nclass CSRArrayAdapterBatch : public detail::NoMetaInfo {\n  ArrayInterface<1> indptr_;\n  ArrayInterface<1> indices_;\n  ArrayInterface<1> values_;\n  bst_feature_t n_features_;\n\n  class Line {\n    ArrayInterface<1> indices_;\n    ArrayInterface<1> values_;\n    size_t ridx_;\n    size_t offset_;\n\n   public:\n    Line(ArrayInterface<1> indices, ArrayInterface<1> values, size_t ridx,\n         size_t offset)\n        : indices_{std::move(indices)}, values_{std::move(values)}, ridx_{ridx},\n          offset_{offset} {}\n\n    [[nodiscard]] COOTuple GetElement(std::size_t idx) const {\n      return {ridx_, TypedIndex<std::size_t, 1>{indices_}(offset_ + idx), values_(offset_ + idx)};\n    }\n\n    [[nodiscard]] std::size_t Size() const {\n      return values_.Shape<0>();\n    }\n  };\n\n public:\n  static constexpr bool kIsRowMajor = true;\n\n public:\n  CSRArrayAdapterBatch() = default;\n  CSRArrayAdapterBatch(ArrayInterface<1> indptr, ArrayInterface<1> indices,\n                       ArrayInterface<1> values, bst_feature_t n_features)\n      : indptr_{std::move(indptr)},\n        indices_{std::move(indices)},\n        values_{std::move(values)},\n        n_features_{n_features} {\n  }\n\n  [[nodiscard]] std::size_t NumRows() const {\n    size_t size = indptr_.Shape<0>();\n    size = size == 0 ? 0 : size - 1;\n    return size;\n  }\n  [[nodiscard]] std::size_t NumCols() const { return n_features_; }\n  [[nodiscard]] std::size_t Size() const { return this->NumRows(); }\n\n  [[nodiscard]] Line const GetLine(size_t idx) const {\n    auto begin_no_stride = TypedIndex<size_t, 1>{indptr_}(idx);\n    auto end_no_stride = TypedIndex<size_t, 1>{indptr_}(idx + 1);\n\n    auto indices = indices_;\n    auto values = values_;\n    // Slice indices and values, stride remains unchanged since this is slicing by\n    // specific index.\n    auto offset = indices.strides[0] * begin_no_stride;\n\n    indices.shape[0] = end_no_stride - begin_no_stride;\n    values.shape[0] = end_no_stride - begin_no_stride;\n\n    return Line{indices, values, idx, offset};\n  }\n};\n\n/**\n * @brief Adapter for CSR array on host, in Python that's `scipy.sparse.csr_matrix`.\n */\nclass CSRArrayAdapter : public detail::SingleBatchDataIter<CSRArrayAdapterBatch> {\n public:\n  CSRArrayAdapter(StringView indptr, StringView indices, StringView values,\n                  size_t num_cols)\n      : indptr_{indptr}, indices_{indices}, values_{values}, num_cols_{num_cols} {\n    batch_ = CSRArrayAdapterBatch{indptr_, indices_, values_,\n                                  static_cast<bst_feature_t>(num_cols_)};\n  }\n\n  [[nodiscard]] CSRArrayAdapterBatch const& Value() const override { return batch_; }\n  [[nodiscard]] std::size_t NumRows() const {\n    size_t size = indptr_.Shape<0>();\n    size = size == 0 ? 0 : size - 1;\n    return size;\n  }\n  [[nodiscard]] std::size_t NumColumns() const { return num_cols_; }\n\n private:\n  CSRArrayAdapterBatch batch_;\n  ArrayInterface<1> indptr_;\n  ArrayInterface<1> indices_;\n  ArrayInterface<1> values_;\n  size_t num_cols_;\n};\n\nclass CSCArrayAdapterBatch : public detail::NoMetaInfo {\n  ArrayInterface<1> indptr_;\n  ArrayInterface<1> indices_;\n  ArrayInterface<1> values_;\n\n  class Line {\n    std::size_t column_idx_;\n    ArrayInterface<1> row_idx_;\n    ArrayInterface<1> values_;\n    std::size_t offset_;\n\n   public:\n    Line(std::size_t idx, ArrayInterface<1> row_idx, ArrayInterface<1> values, std::size_t offset)\n        : column_idx_{idx},\n          row_idx_{std::move(row_idx)},\n          values_{std::move(values)},\n          offset_{offset} {}\n\n    [[nodiscard]] std::size_t Size() const { return values_.Shape<0>(); }\n    [[nodiscard]] COOTuple GetElement(std::size_t idx) const {\n      return {TypedIndex<std::size_t, 1>{row_idx_}(offset_ + idx), column_idx_,\n              values_(offset_ + idx)};\n    }\n  };\n\n public:\n  static constexpr bool kIsRowMajor = false;\n\n  CSCArrayAdapterBatch(ArrayInterface<1> indptr, ArrayInterface<1> indices,\n                       ArrayInterface<1> values)\n      : indptr_{std::move(indptr)}, indices_{std::move(indices)}, values_{std::move(values)} {}\n\n  [[nodiscard]] std::size_t Size() const noexcept(true) {\n    auto n = indptr_.n;\n    return (n == 0) ? n : (n - 1);\n  }\n  [[nodiscard]] Line GetLine(std::size_t idx) const {\n    auto begin_no_stride = TypedIndex<std::size_t, 1>{indptr_}(idx);\n    auto end_no_stride = TypedIndex<std::size_t, 1>{indptr_}(idx + 1);\n\n    auto indices = indices_;\n    auto values = values_;\n    // Slice indices and values, stride remains unchanged since this is slicing by\n    // specific index.\n    auto offset = indices.strides[0] * begin_no_stride;\n    indices.shape[0] = end_no_stride - begin_no_stride;\n    values.shape[0] = end_no_stride - begin_no_stride;\n\n    return Line{idx, indices, values, offset};\n  }\n};\n\n/**\n * @brief CSC adapter with support for array interface.\n */\nclass CSCArrayAdapter : public detail::SingleBatchDataIter<CSCArrayAdapterBatch> {\n  ArrayInterface<1> indptr_;\n  ArrayInterface<1> indices_;\n  ArrayInterface<1> values_;\n  size_t num_rows_;\n  CSCArrayAdapterBatch batch_;\n\n public:\n  CSCArrayAdapter(StringView indptr, StringView indices, StringView values, std::size_t num_rows)\n      : indptr_{indptr},\n        indices_{indices},\n        values_{values},\n        num_rows_{num_rows},\n        batch_{CSCArrayAdapterBatch{indptr_, indices_, values_}} {}\n\n  // JVM package sends 0 as unknown\n  [[nodiscard]] std::size_t NumRows() const {\n    return num_rows_ == 0 ? kAdapterUnknownSize : num_rows_;\n  }\n  [[nodiscard]] std::size_t NumColumns() const { return indptr_.n - 1; }\n  [[nodiscard]] const CSCArrayAdapterBatch& Value() const override { return batch_; }\n};\n\ntemplate <typename EncAccessor>\nclass EncColumnarAdapterBatchImpl : public detail::NoMetaInfo {\n  using ArrayInf = std::add_const_t<ArrayInterface<1>>;\n\n  common::Span<ArrayInf> columns_;\n  EncAccessor acc_;\n\n  class Line {\n    common::Span<ArrayInf> const& columns_;\n    std::size_t const ridx_;\n    EncAccessor const& acc_;\n\n   public:\n    explicit Line(common::Span<ArrayInf> const& columns, EncAccessor const& acc, std::size_t ridx)\n        : columns_{columns}, ridx_{ridx}, acc_{acc} {}\n    [[nodiscard]] std::size_t Size() const { return columns_.empty() ? 0 : columns_.size(); }\n\n    [[nodiscard]] COOTuple GetElement(std::size_t fidx) const {\n      auto const& column = columns_.data()[fidx];\n      float value = column.valid.Data() == nullptr || column.valid.Check(ridx_)\n                        ? column(ridx_)\n                        : std::numeric_limits<float>::quiet_NaN();\n      return {ridx_, fidx, acc_(value, fidx)};\n    }\n  };\n\n public:\n  EncColumnarAdapterBatchImpl() = default;\n  explicit EncColumnarAdapterBatchImpl(common::Span<ArrayInf> columns, EncAccessor acc)\n      : columns_{columns}, acc_{std::move(acc)} {}\n  [[nodiscard]] Line GetLine(std::size_t ridx) const { return Line{columns_, this->acc_, ridx}; }\n  [[nodiscard]] std::size_t Size() const {\n    return columns_.empty() ? 0 : columns_.front().template Shape<0>();\n  }\n  [[nodiscard]] std::size_t NumCols() const { return columns_.empty() ? 0 : columns_.size(); }\n  [[nodiscard]] std::size_t NumRows() const { return this->Size(); }\n\n  static constexpr bool kIsRowMajor = true;\n};\n\nusing ColumnarAdapterBatch = EncColumnarAdapterBatchImpl<NoOpAccessor>;\nusing EncColumnarAdapterBatch = EncColumnarAdapterBatchImpl<CatAccessor>;\n\n/**\n * @brief Adapter for columnar format (arrow).\n *\n *   Supports both numeric values and categorical values.\n *\n * See @ref XGDMatrixCreateFromColumnar for notes\n */\nclass ColumnarAdapter : public detail::SingleBatchDataIter<ColumnarAdapterBatch> {\n  std::vector<ArrayInterface<1>> columns_;\n  enc::HostColumnsView ref_cats_;\n  std::vector<enc::HostCatIndexView> cats_;\n  std::vector<std::int32_t> cat_segments_;\n  ColumnarAdapterBatch batch_;\n  std::size_t n_bytes_{0};\n\n  [[nodiscard]] static bool HasCatImpl(std::vector<enc::HostCatIndexView> const& cats) {\n    return !std::all_of(cats.cbegin(), cats.cend(), [](auto const& cats) {\n      return std::visit([](auto&& cats) { return cats.empty(); }, cats);\n    });\n  }\n\n public:\n  /**\n   * @brief JSON-encoded array of columns.\n   */\n  explicit ColumnarAdapter(StringView columns);\n\n  [[nodiscard]] ColumnarAdapterBatch const& Value() const override { return batch_; }\n\n  [[nodiscard]] bst_idx_t NumRows() const {\n    if (!columns_.empty()) {\n      return columns_.front().shape[0];\n    }\n    return 0;\n  }\n  [[nodiscard]] bst_idx_t NumColumns() const { return columns_.size(); }\n\n  [[nodiscard]] bool HasCategorical() const { return HasCatImpl(this->cats_); }\n  [[nodiscard]] bool HasRefCategorical() const { return !this->ref_cats_.Empty(); }\n\n  [[nodiscard]] std::size_t SizeBytes() const { return n_bytes_; }\n\n  [[nodiscard]] enc::HostColumnsView Cats() const {\n    return {this->cats_, this->cat_segments_,\n            static_cast<std::int32_t>(this->cat_segments_.back())};\n  }\n  [[nodiscard]] enc::HostColumnsView RefCats() const { return this->ref_cats_; }\n  [[nodiscard]] common::Span<ArrayInterface<1> const> Columns() const { return this->columns_; }\n};\n\ninline auto MakeEncColumnarBatch(Context const* ctx, ColumnarAdapter const* adapter) {\n  auto cats = std::make_unique<CatContainer>(adapter->RefCats(), true);\n  cats->Sort(ctx);\n  auto [acc, mapping] = cpu_impl::MakeCatAccessor(ctx, adapter->Cats(), cats.get());\n  return std::tuple{EncColumnarAdapterBatch{adapter->Columns(), acc}, std::move(mapping)};\n}\n\ninline auto MakeEncColumnarBatch(Context const* ctx,\n                                 std::shared_ptr<ColumnarAdapter> const& adapter) {\n  return MakeEncColumnarBatch(ctx, adapter.get());\n}\n\nclass FileAdapterBatch {\n public:\n  class Line {\n   public:\n    Line(size_t row_idx, const uint32_t *feature_idx, const float *value,\n         size_t size)\n        : row_idx_(row_idx),\n          feature_idx_(feature_idx),\n          value_(value),\n          size_(size) {}\n\n    size_t Size() { return size_; }\n    COOTuple GetElement(size_t idx) {\n      float fvalue = value_ == nullptr ? 1.0f : value_[idx];\n      return COOTuple{row_idx_, feature_idx_[idx], fvalue};\n    }\n\n   private:\n    size_t row_idx_;\n    const uint32_t* feature_idx_;\n    const float* value_;\n    size_t size_;\n  };\n  FileAdapterBatch(const dmlc::RowBlock<uint32_t>* block, size_t row_offset)\n      : block_(block), row_offset_(row_offset) {}\n  Line GetLine(size_t idx) const {\n    auto begin = block_->offset[idx];\n    auto end = block_->offset[idx + 1];\n    return Line{idx + row_offset_, &block_->index[begin], &block_->value[begin],\n                end - begin};\n  }\n  const float* Labels() const { return block_->label; }\n  const float* Weights() const { return block_->weight; }\n  const uint64_t* Qid() const { return block_->qid; }\n  const float* BaseMargin() const { return nullptr; }\n\n  size_t Size() const { return block_->size; }\n  static constexpr bool kIsRowMajor = true;\n\n private:\n  const dmlc::RowBlock<uint32_t>* block_;\n  size_t row_offset_;\n};\n\n/** \\brief FileAdapter wraps dmlc::parser to read files and provide access in a\n * common interface. */\nclass FileAdapter : dmlc::DataIter<FileAdapterBatch> {\n public:\n  explicit FileAdapter(dmlc::Parser<uint32_t>* parser) : parser_(parser) {}\n\n  const FileAdapterBatch& Value() const override { return *batch_.get(); }\n  void BeforeFirst() override {\n    batch_.reset();\n    parser_->BeforeFirst();\n    row_offset_ = 0;\n  }\n  bool Next() override {\n    bool next = parser_->Next();\n    batch_.reset(new FileAdapterBatch(&parser_->Value(), row_offset_));\n    row_offset_ += parser_->Value().size;\n    return next;\n  }\n  // Indicates a number of rows/columns must be inferred\n  size_t NumRows() const { return kAdapterUnknownSize; }\n  size_t NumColumns() const { return kAdapterUnknownSize; }\n\n private:\n  size_t row_offset_{0};\n  std::unique_ptr<FileAdapterBatch> batch_;\n  dmlc::Parser<uint32_t>* parser_;\n};\n\n/**\n * @brief Data iterator that takes callback to return data, used in JVM package for accepting data\n *        iterator.\n */\ntemplate <typename DataIterHandle, typename XGBCallbackDataIterNext, typename XGBoostBatchCSR>\nclass IteratorAdapter : public dmlc::DataIter<FileAdapterBatch> {\n public:\n  IteratorAdapter(DataIterHandle data_handle, XGBCallbackDataIterNext* next_callback)\n      : columns_{data::kAdapterUnknownSize},\n        data_handle_(data_handle),\n        next_callback_(next_callback) {}\n\n  // override functions\n  void BeforeFirst() override {\n    CHECK(at_first_) << \"Cannot reset IteratorAdapter\";\n  }\n\n  [[nodiscard]] bool Next() override;\n\n  [[nodiscard]] FileAdapterBatch const& Value() const override {\n    return *batch_.get();\n  }\n\n  // callback to set the data\n  void SetData(const XGBoostBatchCSR& batch) {\n    offset_.clear();\n    label_.clear();\n    weight_.clear();\n    index_.clear();\n    value_.clear();\n    offset_.insert(offset_.end(), batch.offset, batch.offset + batch.size + 1);\n\n    if (batch.label != nullptr) {\n      label_.insert(label_.end(), batch.label, batch.label + batch.size);\n    }\n    if (batch.weight != nullptr) {\n      weight_.insert(weight_.end(), batch.weight, batch.weight + batch.size);\n    }\n    if (batch.index != nullptr) {\n      index_.insert(index_.end(), batch.index + offset_[0],\n                    batch.index + offset_.back());\n    }\n    if (batch.value != nullptr) {\n      value_.insert(value_.end(), batch.value + offset_[0],\n                    batch.value + offset_.back());\n    }\n    if (offset_[0] != 0) {\n      size_t base = offset_[0];\n      for (size_t &item : offset_) {\n        item -= base;\n      }\n    }\n    CHECK(columns_ == data::kAdapterUnknownSize || columns_ == batch.columns)\n        << \"Number of columns between batches changed from \" << columns_\n        << \" to \" << batch.columns;\n\n    columns_ = batch.columns;\n    block_.size = batch.size;\n\n    block_.offset = dmlc::BeginPtr(offset_);\n    block_.label = dmlc::BeginPtr(label_);\n    block_.weight = dmlc::BeginPtr(weight_);\n    block_.qid = nullptr;\n    block_.field = nullptr;\n    block_.index = dmlc::BeginPtr(index_);\n    block_.value = dmlc::BeginPtr(value_);\n\n    batch_ = std::make_unique<FileAdapterBatch>(&block_, row_offset_);\n    row_offset_ += offset_.size() - 1;\n  }\n\n  [[nodiscard]] std::size_t NumColumns() const { return columns_; }\n  [[nodiscard]] std::size_t NumRows() const { return kAdapterUnknownSize; }\n\n private:\n  std::vector<size_t> offset_;\n  std::vector<dmlc::real_t> label_;\n  std::vector<dmlc::real_t> weight_;\n  std::vector<uint32_t> index_;\n  std::vector<dmlc::real_t> value_;\n\n  size_t columns_;\n  size_t row_offset_{0};\n  // at the beginning.\n  bool at_first_{true};\n  // handle to the iterator,\n  DataIterHandle data_handle_;\n  // call back to get the data.\n  XGBCallbackDataIterNext *next_callback_;\n  // internal Rowblock\n  dmlc::RowBlock<uint32_t> block_;\n  std::unique_ptr<FileAdapterBatch> batch_;\n};\n\nclass SparsePageAdapterBatch {\n  HostSparsePageView page_;\n\n public:\n  struct Line {\n    Entry const* inst;\n    size_t n;\n    bst_idx_t ridx;\n    COOTuple GetElement(size_t idx) const { return {ridx, inst[idx].index, inst[idx].fvalue}; }\n    size_t Size() const { return n; }\n  };\n\n  explicit SparsePageAdapterBatch(HostSparsePageView page) : page_{std::move(page)} {}\n  Line GetLine(size_t ridx) const { return Line{page_[ridx].data(), page_[ridx].size(), ridx}; }\n  size_t Size() const { return page_.Size(); }\n};\n}  // namespace xgboost::data\n#endif  // XGBOOST_DATA_ADAPTER_H_\n"
  },
  {
    "path": "src/data/array_interface.cc",
    "content": "/**\n * Copyright 2019-2026, XGBoost Contributors\n */\n#include \"array_interface.h\"\n\n#include \"../common/error_msg.h\"  // for Unreachable\n\n#if !defined(XGBOOST_USE_CUDA)\n\n#include \"../common/common.h\"  // for AssertGPUSupport\n\n#endif  // !defined(XGBOOST_USE_CUDA)\n\nnamespace xgboost {\nstd::string ArrayInterfaceHandler::TypeStr(Type type) {\n  auto name_fn = [](std::int32_t bits, char t) {\n    return std::to_string(bits) + \"-bit \" + ArrayInterfaceErrors::TypeStr(t);\n  };\n  switch (type) {\n    case kF2:\n      return name_fn(16, 'f');\n    case kF4:\n      return name_fn(32, 'f');\n    case kF8:\n      return name_fn(64, 'f');\n    case kF16:\n      return name_fn(128, 'f');\n    case kI1:\n      return name_fn(8, 'i');\n    case kI2:\n      return name_fn(16, 'i');\n    case kI4:\n      return name_fn(32, 'i');\n    case kI8:\n      return name_fn(64, 'i');\n    case kU1:\n      return name_fn(8, 'u');\n    case kU2:\n      return name_fn(16, 'u');\n    case kU4:\n      return name_fn(32, 'u');\n    case kU8:\n      return name_fn(64, 'u');\n  }\n  error::Unreachable();\n  return {};\n}\n\n#if !defined(XGBOOST_USE_CUDA)\nvoid ArrayInterfaceHandler::SyncCudaStream(int64_t) { common::AssertGPUSupport(); }\nbool ArrayInterfaceHandler::IsCudaPtr(void const *) { return false; }\n#endif  // !defined(XGBOOST_USE_CUDA)\n}  // namespace xgboost\n"
  },
  {
    "path": "src/data/array_interface.cu",
    "content": "/**\n * Copyright 2021-2025, XGBoost Contributors\n */\n#include <cstdint>  // for int64_t\n\n#include \"../common/cuda_stream.h\"  // for Event, StreamRef, DefaultStream\n#include \"array_interface.h\"\n#include \"xgboost/logging.h\"\n\nnamespace xgboost {\nvoid ArrayInterfaceHandler::SyncCudaStream(std::int64_t stream) {\n  switch (stream) {\n    case 0:\n      /**\n       * disallowed by the `__cuda_array_interface__`.  Quote:\n       *\n       *   This is disallowed as it would be ambiguous between None and the default\n       *   stream, and also between the legacy and per-thread default streams. Any use\n       *   case where 0 might be given should either use None, 1, or 2 instead for\n       *   clarity.\n       */\n      LOG(FATAL) << \"Invalid stream ID in array interface: \" << stream;\n    case 1:\n      // default legacy stream\n      break;\n    case 2:\n      // default per-thread stream\n    default: {\n      curt::Event e;\n      e.Record(curt::StreamRef{reinterpret_cast<cudaStream_t>(stream)});\n      curt::DefaultStream().Wait(e);\n    }\n  }\n}\n\nbool ArrayInterfaceHandler::IsCudaPtr(void const* ptr) {\n  if (!ptr) {\n    return false;\n  }\n  // clear potentially pre-existing/unrelated error\n  cudaGetLastError();\n  cudaPointerAttributes attr;\n  auto err = cudaPointerGetAttributes(&attr, ptr);\n  // reset error\n  CHECK_EQ(err, cudaGetLastError());\n  if (err == cudaErrorInvalidValue) {\n    // CUDA < 11\n    return false;\n  } else if (err == cudaSuccess) {\n    // CUDA >= 11\n    switch (attr.type) {\n      case cudaMemoryTypeUnregistered:\n      case cudaMemoryTypeHost:\n        return false;\n      default:\n        return true;\n    }\n    return true;\n  } else {\n    // other errors, `cudaErrorNoDevice`, `cudaErrorInsufficientDriver` etc.\n    return false;\n  }\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "src/data/array_interface.h",
    "content": "/**\n * Copyright 2019-2024, XGBoost Contributors\n * \\file array_interface.h\n * \\brief View of __array_interface__\n */\n#ifndef XGBOOST_DATA_ARRAY_INTERFACE_H_\n#define XGBOOST_DATA_ARRAY_INTERFACE_H_\n\n#include <algorithm>    // for all_of, transform, fill\n#include <cstddef>      // for size_t\n#include <cstdint>      // for int32_t, int64_t, ...\n#include <limits>       // for numeric_limits\n#include <map>          // for map\n#include <string>       // for string\n#include <type_traits>  // for alignment_of_v, remove_pointer_t, invoke_result_t\n#include <vector>       // for vector\n\n#include \"../common/bitfield.h\"   // for RBitField8\n#include \"../common/error_msg.h\"  // for NoF128\n#include \"xgboost/json.h\"         // for Json\n#include \"xgboost/linalg.h\"       // for CalcStride, TensorView\n#include \"xgboost/logging.h\"      // for CHECK\n#include \"xgboost/span.h\"         // for Span\n#include \"xgboost/string_view.h\"  // for StringView\n\n#if defined(XGBOOST_USE_CUDA)\n#include \"cuda_fp16.h\"  // for __half\n#endif\n\nnamespace xgboost {\n// Common errors in parsing columnar format.\nstruct ArrayInterfaceErrors {\n  static char const *Contiguous() { return \"Memory should be contiguous.\"; }\n  static char const *TypestrFormat() {\n    return \"`typestr' should be of format <endian><type><size of type in bytes>.\";\n  }\n  static char const *Dimension(int32_t d) {\n    static std::string str;\n    str.clear();\n    str += \"Only \";\n    str += std::to_string(d);\n    str += \" dimensional array is valid.\";\n    return str.c_str();\n  }\n  static char const *Version() {\n    return \"Only version <= 3 of `__cuda_array_interface__' and `__array_interface__' are \"\n           \"supported.\";\n  }\n  static char const *OfType(std::string const &type) {\n    static std::string str;\n    str.clear();\n    str += \" should be of \";\n    str += type;\n    str += \" type.\";\n    return str.c_str();\n  }\n\n  static std::string TypeStr(char c) {\n    switch (c) {\n      case 't':\n        return \"Bit field\";\n      case 'b':\n        return \"Boolean\";\n      case 'i':\n        return \"Integer\";\n      case 'u':\n        return \"Unsigned integer\";\n      case 'f':\n        return \"Floating point\";\n      case 'c':\n        return \"Complex floating point\";\n      case 'm':\n        return \"Timedelta\";\n      case 'M':\n        return \"Datetime\";\n      case 'O':\n        return \"Object\";\n      case 'S':\n        return \"String\";\n      case 'U':\n        return \"Unicode\";\n      case 'V':\n        return \"Other\";\n      default:\n        LOG(FATAL) << \"Invalid type code: \" << c << \" in `typestr' of input array.\"\n                   << \"\\nPlease verify the `__cuda_array_interface__/__array_interface__' \"\n                   << \"of your input data complies to: \"\n                   << \"https://docs.scipy.org/doc/numpy/reference/arrays.interface.html\"\n                   << \"\\nOr open an issue.\";\n        return \"\";\n    }\n  }\n\n  static std::string UnSupportedType(StringView typestr) {\n    return TypeStr(typestr[1]) + \"-\" + typestr[2] + \" is not supported.\";\n  }\n};\n\n/**\n * Utilities for consuming array interface.\n */\nclass ArrayInterfaceHandler {\n public:\n  enum Type : std::int8_t {\n    kF2 = 0,\n    kF4 = 1,\n    kF8 = 2,\n    kF16 = 3,\n    kI1 = 4,\n    kI2 = 5,\n    kI4 = 6,\n    kI8 = 7,\n    kU1 = 8,\n    kU2 = 9,\n    kU4 = 10,\n    kU8 = 11,\n  };\n\n  static std::string TypeStr(Type type);\n\n  template <typename PtrType>\n  static PtrType GetPtrFromArrayData(Object::Map const &obj) {\n    auto data_it = obj.find(\"data\");\n    if (data_it == obj.cend() || IsA<Null>(data_it->second)) {\n      LOG(FATAL) << \"Empty data passed in.\";\n    }\n    auto p_data = reinterpret_cast<PtrType>(\n        static_cast<size_t>(get<Integer const>(get<Array const>(data_it->second).at(0))));\n    return p_data;\n  }\n\n  static void Validate(Object::Map const &array) {\n    auto version_it = array.find(\"version\");\n    if (version_it == array.cend() || IsA<Null>(version_it->second)) {\n      LOG(FATAL) << \"Missing `version' field for array interface\";\n    }\n    if (get<Integer const>(version_it->second) > 3) {\n      LOG(FATAL) << ArrayInterfaceErrors::Version();\n    }\n\n    auto typestr_it = array.find(\"typestr\");\n    if (typestr_it == array.cend() || IsA<Null>(typestr_it->second)) {\n      LOG(FATAL) << \"Missing `typestr' field for array interface\";\n    }\n\n    auto typestr = get<String const>(typestr_it->second);\n    CHECK(typestr.size() == 3 || typestr.size() == 4) << ArrayInterfaceErrors::TypestrFormat();\n\n    auto shape_it = array.find(\"shape\");\n    if (shape_it == array.cend() || IsA<Null>(shape_it->second)) {\n      LOG(FATAL) << \"Missing `shape' field for array interface\";\n    }\n    auto data_it = array.find(\"data\");\n    if (data_it == array.cend() || IsA<Null>(data_it->second)) {\n      LOG(FATAL) << \"Missing `data' field for array interface\";\n    }\n  }\n\n  // Find null mask (validity mask) field\n  // Mask object is also an array interface, but with different requirements.\n  static size_t ExtractMask(Object::Map const &column,\n                            common::Span<RBitField8::value_type> *p_out) {\n    auto &s_mask = *p_out;\n    auto const &mask_it = column.find(\"mask\");\n    if (mask_it != column.cend() && !IsA<Null>(mask_it->second)) {\n      auto const &j_mask = get<Object const>(mask_it->second);\n      Validate(j_mask);\n\n      auto p_mask = GetPtrFromArrayData<RBitField8::value_type *>(j_mask);\n\n      auto j_shape = get<Array const>(j_mask.at(\"shape\"));\n      CHECK_EQ(j_shape.size(), 1) << ArrayInterfaceErrors::Dimension(1);\n      auto typestr = get<String const>(j_mask.at(\"typestr\"));\n      // For now this is just 1, we can support different size of interger in mask.\n      int64_t const type_length = typestr.at(2) - 48;\n\n      if (typestr.at(1) == 't') {\n        CHECK_EQ(type_length, 1) << \"mask with bitfield type should be of 1 byte per bitfield.\";\n      } else if (typestr.at(1) == 'i') {\n        CHECK_EQ(type_length, 1) << \"mask with integer type should be of 1 byte per integer.\";\n      } else {\n        LOG(FATAL) << \"mask must be of integer type or bit field type.\";\n      }\n      /*\n       * shape represents how many bits is in the mask. (This is a grey area, don't be\n       * suprised if it suddently represents something else when supporting a new\n       * implementation).  Quoting from numpy array interface:\n       *\n       *   The shape of this object should be \"broadcastable\" to the shape of the original\n       *   array.\n       *\n       * And that's the only requirement.\n       */\n      size_t const n_bits = static_cast<size_t>(get<Integer>(j_shape.at(0)));\n      // The size of span required to cover all bits.  Here with 8 bits bitfield, we\n      // assume 1 byte alignment.\n      size_t const span_size = RBitField8::ComputeStorageSize(n_bits);\n\n      auto strides_it = j_mask.find(\"strides\");\n      if (strides_it != j_mask.cend() && !IsA<Null>(strides_it->second)) {\n        auto strides = get<Array const>(strides_it->second);\n        CHECK_EQ(strides.size(), 1) << ArrayInterfaceErrors::Dimension(1);\n        CHECK_EQ(get<Integer>(strides.at(0)), type_length) << ArrayInterfaceErrors::Contiguous();\n      }\n\n      s_mask = {p_mask, span_size};\n      return n_bits;\n    }\n    return 0;\n  }\n  /**\n   * \\brief Handle vector inputs.  For higher dimension, we require strictly correct shape.\n   */\n  template <int32_t D>\n  static void HandleRowVector(std::vector<size_t> const &shape, std::vector<size_t> *p_out) {\n    auto &out = *p_out;\n    if (shape.size() == 2 && D == 1) {\n      auto m = shape[0];\n      auto n = shape[1];\n      CHECK(m == 1 || n == 1);\n      if (m == 1) {\n        // keep the number of columns\n        out[0] = out[1];\n        out.resize(1);\n      } else if (n == 1) {\n        // keep the number of rows.\n        out.resize(1);\n      }\n      // when both m and n are 1, above logic keeps the column.\n      // when neither m nor n is 1, caller should throw an error about Dimension.\n    }\n  }\n\n  template <int32_t D>\n  static void ExtractShape(Object::Map const &array, size_t (&out_shape)[D]) {\n    auto const &j_shape = get<Array const>(array.at(\"shape\"));\n    std::vector<size_t> shape_arr(j_shape.size(), 0);\n    std::transform(j_shape.cbegin(), j_shape.cend(), shape_arr.begin(),\n                   [](Json in) { return get<Integer const>(in); });\n    // handle column vector vs. row vector\n    HandleRowVector<D>(shape_arr, &shape_arr);\n    // Copy shape.\n    size_t i;\n    for (i = 0; i < shape_arr.size(); ++i) {\n      CHECK_LT(i, D) << ArrayInterfaceErrors::Dimension(D);\n      out_shape[i] = shape_arr[i];\n    }\n    // Fill the remaining dimensions\n    std::fill(out_shape + i, out_shape + D, 1);\n  }\n\n  /**\n   * \\brief Extracts the optiona `strides' field and returns whether the array is c-contiguous.\n   */\n  template <int32_t D>\n  static bool ExtractStride(Object::Map const &array, size_t itemsize,\n                            size_t (&shape)[D], size_t (&stride)[D]) {\n    auto strides_it = array.find(\"strides\");\n    // No stride is provided\n    if (strides_it == array.cend() || IsA<Null>(strides_it->second)) {\n      // No stride is provided, we can calculate it from shape.\n      linalg::detail::CalcStride(shape, stride);\n      // Quote:\n      //\n      //   strides: Either None to indicate a C-style contiguous array or a Tuple of\n      //            strides which provides the number of bytes\n      return true;\n    }\n    // Get shape, we need to make changes to handle row vector, so some duplicated code\n    // from `ExtractShape` for copying out the shape.\n    auto const &j_shape = get<Array const>(array.at(\"shape\"));\n    std::vector<size_t> shape_arr(j_shape.size(), 0);\n    std::transform(j_shape.cbegin(), j_shape.cend(), shape_arr.begin(),\n                   [](Json in) { return get<Integer const>(in); });\n    // Get stride\n    auto const &j_strides = get<Array const>(strides_it->second);\n    CHECK_EQ(j_strides.size(), j_shape.size()) << \"stride and shape don't match.\";\n    std::vector<size_t> stride_arr(j_strides.size(), 0);\n    std::transform(j_strides.cbegin(), j_strides.cend(), stride_arr.begin(),\n                   [](Json in) { return get<Integer const>(in); });\n\n    // Handle column vector vs. row vector\n    HandleRowVector<D>(shape_arr, &stride_arr);\n    size_t i;\n    for (i = 0; i < stride_arr.size(); ++i) {\n      // If one of the dim has shape 0 then total size is 0, stride is meaningless, but we\n      // set it to 0 here just to be consistent\n      CHECK_LT(i, D) << ArrayInterfaceErrors::Dimension(D);\n      // We use number of items instead of number of bytes\n      stride[i] = stride_arr[i] / itemsize;\n    }\n    std::fill(stride + i, stride + D, 1);\n    // If the stride can be calculated from shape then it's contiguous.\n    size_t stride_tmp[D];\n    linalg::detail::CalcStride(shape, stride_tmp);\n    return std::equal(stride_tmp, stride_tmp + D, stride);\n  }\n\n  static void *ExtractData(Object::Map const &array, size_t size) {\n    Validate(array);\n    void *p_data = ArrayInterfaceHandler::GetPtrFromArrayData<void *>(array);\n    if (!p_data) {\n      CHECK_EQ(size, 0) << \"Empty data with non-zero shape.\";\n    }\n    return p_data;\n  }\n  /**\n   * \\brief Whether the ptr is allocated by CUDA.\n   */\n  static bool IsCudaPtr(void const *ptr);\n  /**\n   * \\brief Sync the CUDA stream.\n   */\n  static void SyncCudaStream(int64_t stream);\n};\n\n/**\n * Dispatch compile time type to runtime type.\n */\ntemplate <typename T, typename E = void>\nstruct ToDType;\n// float\n#if defined(XGBOOST_USE_CUDA)\ntemplate <>\nstruct ToDType<__half> {\n  static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kF2;\n};\n#endif  // defined(XGBOOST_USE_CUDA)\ntemplate <>\nstruct ToDType<float> {\n  static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kF4;\n};\ntemplate <>\nstruct ToDType<double> {\n  static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kF8;\n};\ntemplate <typename T>\nstruct ToDType<T,\n               std::enable_if_t<std::is_same_v<T, long double> && sizeof(long double) == 16>> {\n  static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kF16;\n};\n// uint\ntemplate <>\nstruct ToDType<uint8_t> {\n  static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kU1;\n};\ntemplate <>\nstruct ToDType<uint16_t> {\n  static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kU2;\n};\ntemplate <>\nstruct ToDType<uint32_t> {\n  static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kU4;\n};\ntemplate <>\nstruct ToDType<uint64_t> {\n  static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kU8;\n};\n// int\ntemplate <>\nstruct ToDType<int8_t> {\n  static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kI1;\n};\ntemplate <>\nstruct ToDType<int16_t> {\n  static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kI2;\n};\ntemplate <>\nstruct ToDType<int32_t> {\n  static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kI4;\n};\ntemplate <>\nstruct ToDType<int64_t> {\n  static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kI8;\n};\n\n/**\n * \\brief A type erased view over __array_interface__ protocol defined by numpy\n *\n *   <a href=\"https://numpy.org/doc/stable/reference/arrays.interface.html\">numpy</a>.\n *\n * \\tparam D The number of maximum dimension.\n\n *   User input array must have dim <= D for all non-trivial dimensions.  During\n *   construction, the ctor can automatically remove those trivial dimensions.\n *\n * \\tparam allow_mask Whether masked array is accepted.\n *\n *   Currently this only supported for 1-dim vector, which is used by cuDF column\n *   (apache arrow format).  For general masked array, as the time of writting, only\n *   numpy has the proper support even though it's in the __cuda_array_interface__\n *   protocol defined by numba.\n */\ntemplate <std::int32_t D, bool allow_mask = (D == 1)>\nclass ArrayInterface {\n  static_assert(D > 0, \"Invalid dimension for array interface.\");\n\n  /**\n   * \\brief Initialize the object, by extracting shape, stride and type.\n   *\n   *   The function also perform some basic validation for input array.  Lastly it will\n   *   also remove trivial dimensions like converting a matrix with shape (n_samples, 1)\n   *   to a vector of size n_samples.  For for inputs like weights, this should be a 1\n   *   dimension column vector even though user might provide a matrix.\n   */\n  void Initialize(Object::Map const &array) {\n    ArrayInterfaceHandler::Validate(array);\n\n    auto typestr = get<String const>(array.at(\"typestr\"));\n    this->AssignType(StringView{typestr});\n    ArrayInterfaceHandler::ExtractShape(array, shape);\n    std::size_t itemsize = typestr[2] - '0';\n    is_contiguous = ArrayInterfaceHandler::ExtractStride(array, itemsize, shape, strides);\n    n = linalg::detail::CalcSize(shape);\n\n    data = ArrayInterfaceHandler::ExtractData(array, n);\n    static_assert(allow_mask ? D == 1 : D >= 1, \"Masked ndarray is not supported.\");\n\n    auto alignment = this->ElementAlignment();\n    auto ptr = reinterpret_cast<uintptr_t>(this->data);\n    if (!std::all_of(this->shape, this->shape + D, [](auto v) { return v == 0; })) {\n      CHECK_EQ(ptr % alignment, 0) << \"Input pointer misalignment.\";\n    }\n\n    if (allow_mask) {\n      common::Span<RBitField8::value_type> s_mask;\n      size_t n_bits = ArrayInterfaceHandler::ExtractMask(array, &s_mask);\n\n      valid = RBitField8(s_mask);\n\n      if (s_mask.data()) {\n        CHECK_EQ(n_bits, n) << \"Shape of bit mask doesn't match data shape. \"\n                            << \"XGBoost doesn't support internal broadcasting.\";\n      }\n    } else {\n      auto mask_it = array.find(\"mask\");\n      CHECK(mask_it == array.cend() || IsA<Null>(mask_it->second))\n          << \"Masked array is not yet supported.\";\n    }\n\n    auto stream_it = array.find(\"stream\");\n    if (stream_it != array.cend() && !IsA<Null>(stream_it->second)) {\n      int64_t stream = get<Integer const>(stream_it->second);\n      ArrayInterfaceHandler::SyncCudaStream(stream);\n    }\n  }\n\n public:\n  ArrayInterface() = default;\n  explicit ArrayInterface(Object::Map const &array) { this->Initialize(array); }\n\n  explicit ArrayInterface(Json const &array) {\n    if (IsA<Object>(array)) {\n      this->Initialize(get<Object const>(array));\n      return;\n    }\n    if (IsA<Array>(array)) {\n      CHECK_EQ(get<Array const>(array).size(), 1)\n          << \"Column: \" << ArrayInterfaceErrors::Dimension(1);\n      this->Initialize(get<Object const>(get<Array const>(array)[0]));\n      return;\n    }\n  }\n\n  explicit ArrayInterface(std::string const &str) : ArrayInterface{StringView{str}} {}\n\n  explicit ArrayInterface(StringView str) : ArrayInterface{Json::Load(str)} {}\n\n  void AssignType(StringView typestr) {\n    using T = ArrayInterfaceHandler::Type;\n    if (typestr.size() == 4 && typestr[1] == 'f' && typestr[2] == '1' && typestr[3] == '6') {\n      CHECK(sizeof(long double) == 16) << error::NoF128();\n      type = T::kF16;\n    } else if (typestr[1] == 'f' && typestr[2] == '2') {\n#if defined(XGBOOST_USE_CUDA)\n      type = T::kF2;\n#else\n      LOG(FATAL) << \"Half type is not supported.\";\n#endif  // defined(XGBOOST_USE_CUDA)\n    } else if (typestr[1] == 'f' && typestr[2] == '4') {\n      type = T::kF4;\n    } else if (typestr[1] == 'f' && typestr[2] == '8') {\n      type = T::kF8;\n    } else if (typestr[1] == 'i' && typestr[2] == '1') {\n      type = T::kI1;\n    } else if (typestr[1] == 'i' && typestr[2] == '2') {\n      type = T::kI2;\n    } else if (typestr[1] == 'i' && typestr[2] == '4') {\n      type = T::kI4;\n    } else if (typestr[1] == 'i' && typestr[2] == '8') {\n      type = T::kI8;\n    } else if (typestr[1] == 'u' && typestr[2] == '1') {\n      type = T::kU1;\n    } else if (typestr[1] == 'u' && typestr[2] == '2') {\n      type = T::kU2;\n    } else if (typestr[1] == 'u' && typestr[2] == '4') {\n      type = T::kU4;\n    } else if (typestr[1] == 'u' && typestr[2] == '8') {\n      type = T::kU8;\n    } else {\n      LOG(FATAL) << ArrayInterfaceErrors::UnSupportedType(typestr);\n      return;\n    }\n  }\n\n  template <std::size_t i>\n  [[nodiscard]] XGBOOST_DEVICE std::size_t Shape() const {\n    static_assert(i < D);\n    return shape[i];\n  }\n  template <std::size_t i>\n  [[nodiscard]] XGBOOST_DEVICE std::size_t Stride() const {\n    static_assert(i < D);\n    return strides[i];\n  }\n\n  template <typename Fn>\n  XGBOOST_HOST_DEV_INLINE decltype(auto) DispatchCall(Fn func) const {\n    using T = ArrayInterfaceHandler::Type;\n    switch (type) {\n      case T::kF2: {\n#if defined(XGBOOST_USE_CUDA)\n        return func(reinterpret_cast<__half const *>(data));\n#endif  // defined(XGBOOST_USE_CUDA)\n      }\n      case T::kF4:\n        return func(reinterpret_cast<float const *>(data));\n      case T::kF8:\n        return func(reinterpret_cast<double const *>(data));\n#ifdef __CUDA_ARCH__\n      case T::kF16: {\n        // CUDA device code doesn't support long double.\n        SPAN_CHECK(false);\n        return func(reinterpret_cast<double const *>(data));\n      }\n#else\n      case T::kF16:\n        return func(reinterpret_cast<long double const *>(data));\n#endif\n      case T::kI1:\n        return func(reinterpret_cast<int8_t const *>(data));\n      case T::kI2:\n        return func(reinterpret_cast<int16_t const *>(data));\n      case T::kI4:\n        return func(reinterpret_cast<int32_t const *>(data));\n      case T::kI8:\n        return func(reinterpret_cast<int64_t const *>(data));\n      case T::kU1:\n        return func(reinterpret_cast<uint8_t const *>(data));\n      case T::kU2:\n        return func(reinterpret_cast<uint16_t const *>(data));\n      case T::kU4:\n        return func(reinterpret_cast<uint32_t const *>(data));\n      case T::kU8:\n        return func(reinterpret_cast<uint64_t const *>(data));\n    }\n    SPAN_CHECK(false);\n    return func(reinterpret_cast<uint64_t const *>(data));\n  }\n\n  [[nodiscard]] XGBOOST_DEVICE std::size_t ElementSize() const {\n    return this->DispatchCall([](auto *typed_data_ptr) {\n      return sizeof(std::remove_pointer_t<decltype(typed_data_ptr)>);\n    });\n  }\n  [[nodiscard]] XGBOOST_DEVICE std::size_t ElementAlignment() const {\n    return this->DispatchCall([](auto *typed_data_ptr) {\n      return std::alignment_of_v<std::remove_pointer_t<decltype(typed_data_ptr)>>;\n    });\n  }\n\n  template <typename T = float, typename... Index>\n  XGBOOST_HOST_DEV_INLINE T operator()(Index &&...index) const {\n    static_assert(sizeof...(index) <= D, \"Invalid index.\");\n    return this->DispatchCall([=](auto const *p_values) -> T {\n      std::size_t offset = linalg::detail::Offset<0ul>(strides, 0ul, index...);\n#if defined(XGBOOST_USE_CUDA)\n      // No operator defined for half -> size_t\n      using Type = std::conditional_t<\n          std::is_same_v<__half, std::remove_cv_t<std::remove_pointer_t<decltype(p_values)>>> &&\n              std::is_same_v<std::size_t, std::remove_cv_t<T>>,\n          unsigned long long, T>;  // NOLINT\n      return static_cast<T>(static_cast<Type>(p_values[offset]));\n#else\n      return static_cast<T>(p_values[offset]);\n#endif  // defined(XGBOOST_USE_CUDA)\n    });\n  }\n\n  // Used only by columnar format.\n  RBitField8 valid;\n  // Array stride\n  std::size_t strides[D]{0};\n  // Array shape\n  std::size_t shape[D]{0};\n  // Type earsed pointer referencing the data.\n  void const *data{nullptr};\n  // Total number of items\n  std::size_t n{0};\n  // Whether the memory is c-contiguous\n  bool is_contiguous{false};\n  // RTTI, initialized to the f16 to avoid masking potential bugs in initialization.\n  ArrayInterfaceHandler::Type type{ArrayInterfaceHandler::kF16};\n};\n\ntemplate <typename Fn>\nauto DispatchDType(ArrayInterfaceHandler::Type dtype, Fn dispatch) {\n  switch (dtype) {\n    case ArrayInterfaceHandler::kF2: {\n#if defined(XGBOOST_USE_CUDA)\n      return dispatch(__half{});\n#else\n      LOG(FATAL) << \"half type is only supported for CUDA input.\";\n      break;\n#endif\n    }\n    case ArrayInterfaceHandler::kF4: {\n      return dispatch(float{});\n    }\n    case ArrayInterfaceHandler::kF8: {\n      return dispatch(double{});\n    }\n    case ArrayInterfaceHandler::kF16: {\n      using T = long double;\n      CHECK(sizeof(T) == 16) << error::NoF128();\n      // Avoid invalid type.\n      if constexpr (sizeof(T) == 16) {\n        return dispatch(T{});\n      } else {\n        return dispatch(double{});\n      }\n    }\n    case ArrayInterfaceHandler::kI1: {\n      return dispatch(std::int8_t{});\n    }\n    case ArrayInterfaceHandler::kI2: {\n      return dispatch(std::int16_t{});\n    }\n    case ArrayInterfaceHandler::kI4: {\n      return dispatch(std::int32_t{});\n    }\n    case ArrayInterfaceHandler::kI8: {\n      return dispatch(std::int64_t{});\n    }\n    case ArrayInterfaceHandler::kU1: {\n      return dispatch(std::uint8_t{});\n    }\n    case ArrayInterfaceHandler::kU2: {\n      return dispatch(std::uint16_t{});\n    }\n    case ArrayInterfaceHandler::kU4: {\n      return dispatch(std::uint32_t{});\n    }\n    case ArrayInterfaceHandler::kU8: {\n      return dispatch(std::uint64_t{});\n    }\n  }\n\n  return std::invoke_result_t<Fn, std::int8_t>();\n}\n\ntemplate <std::int32_t D, typename Fn>\nvoid DispatchDType(ArrayInterface<D> const array, DeviceOrd device, Fn fn) {\n  // Only used for cuDF at the moment.\n  CHECK_EQ(array.valid.Capacity(), 0);\n  auto dispatch = [&](auto t) {\n    using T = std::remove_const_t<decltype(t)> const;\n    // Set the data size to max as we don't know the original size of a sliced array:\n    //\n    // Slicing an array A with shape (4, 2, 3) and stride (6, 3, 1) by [:, 1, :] results\n    // in an array B with shape (4, 3) and strides (6, 1). We can't calculate the original\n    // size 24 based on the slice.\n    fn(linalg::TensorView<T, D>{common::Span<T const>{static_cast<T *>(array.data),\n                                                      std::numeric_limits<std::size_t>::max()},\n                                array.shape, array.strides, device});\n  };\n  DispatchDType(array.type, dispatch);\n}\n\n/**\n * \\brief Helper for type casting.\n */\ntemplate <typename T, int32_t D>\nstruct TypedIndex {\n  ArrayInterface<D> const &array;\n  template <typename... I>\n  XGBOOST_DEVICE T operator()(I &&...ind) const {\n    static_assert(sizeof...(ind) <= D, \"Invalid index.\");\n    return array.template operator()<T>(ind...);\n  }\n};\n\ntemplate <int32_t D>\ninline void CheckArrayInterface(StringView key, ArrayInterface<D> const &array) {\n  CHECK(!array.valid.Data()) << \"Meta info \" << key << \" should be dense, found validity mask\";\n}\n}  // namespace xgboost\n#endif  // XGBOOST_DATA_ARRAY_INTERFACE_H_\n"
  },
  {
    "path": "src/data/batch_utils.cc",
    "content": "/**\n * Copyright 2023-2025, XGBoost Contributors\n */\n#include \"batch_utils.h\"\n\n#include <algorithm>  // for max\n#include <cstddef>    // for size_t\n#include <cstdint>    // for int64_t\n#include <utility>    // for pair\n\n#include \"../common/common.h\"         // for AssertGPUSupport\n#include \"../common/cuda_rt_utils.h\"  // for TotalMemory\n#include \"../common/error_msg.h\"      // for InconsistentMaxBin\n\n#if defined(XGBOOST_USE_CUDA)\n\n#include \"../common/cuda_dr_utils.h\"  // for GetC2cLinkCountFromSmiGlobal\n\n#endif  // defined(XGBOOST_USE_CUDA)\n\nnamespace xgboost::data::detail {\nvoid CheckParam(BatchParam const& init, BatchParam const& param) {\n  CHECK_EQ(param.max_bin, init.max_bin) << error::InconsistentMaxBin();\n  CHECK(!param.regen && param.hess.empty())\n      << \"Only the `hist` tree method can use the `QuantileDMatrix`.\";\n}\n\n/**\n * @brief Check whether we should configure `min_cache_page_bytes`.\n *\n * Defined by @ref AutoCachePageBytes .\n */\n[[nodiscard]] bool CachePageBytesIsAuto(std::int64_t min_cache_page_bytes) {\n  return min_cache_page_bytes == cuda_impl::AutoCachePageBytes();\n}\n\n[[nodiscard]] std::pair<double, std::int64_t> DftPageSizeHostRatio(\n    std::size_t n_cache_bytes, bool is_validation, double cache_host_ratio,\n    std::int64_t min_cache_page_bytes) {\n  common::AssertGPUSupport();\n\n  if (!HostRatioIsAuto(cache_host_ratio)) {\n    // Use user config.\n    CHECK_GE(cache_host_ratio, 0.0f) << error::CacheHostRatioInvalid();\n    CHECK_LE(cache_host_ratio, 1.0f) << error::CacheHostRatioInvalid();\n  }\n\n#if defined(XGBOOST_USE_CUDA)\n  auto n_d_bytes = curt::TotalMemory();\n\n  using xgboost::cuda_impl::CachePageRatio;\n\n  auto lc = cudr::GetC2cLinkCountFromSmiGlobal();\n\n  /**\n   * Configure the min_cache_page_bytes\n   */\n  // -1 if PCIe device, or something went wrong when running nvidia-smi\n  //\n  // GH200 1 CPU + 1 GPU has 10. For 1 CPU + 2 GPU, it's 5.\n  //\n  // Either way, we configure the cache based on the ratio between cache sizes and the\n  // available memory.\n  // Use half of the device memory for cache.\n  auto d_cache_nbytes = n_d_bytes / 2;\n\n  // Since half of the device is used for the cache, we have to use smaller page size.\n  if (CachePageBytesIsAuto(min_cache_page_bytes)) {\n    min_cache_page_bytes = n_d_bytes * (CachePageRatio() / 2.0);\n  }\n\n  /**\n   * Configure the ratio.\n   */\n  if (!HostRatioIsAuto(cache_host_ratio)) {\n    // Do nothing if it's provided by the user\n    return {cache_host_ratio, min_cache_page_bytes};\n  } else if (is_validation) {\n    // Use full host cache for the validation dataset.\n    cache_host_ratio = 1.0;\n  } else if (n_cache_bytes <= d_cache_nbytes) {\n    // The total size of the cache is smaller than the available device cache.\n    cache_host_ratio = 0.0;\n  } else {\n    // The number of bytes that must be in the host memory.\n    auto h_cache_nbytes = n_cache_bytes - d_cache_nbytes * 0.85;\n    cache_host_ratio = static_cast<double>(h_cache_nbytes) / static_cast<double>(n_cache_bytes);\n    if (lc > 0) {\n      // No need to exceed half in practice.\n      cache_host_ratio = std::max(cache_host_ratio, 0.5);\n    }\n  }\n#else\n  (void)n_cache_bytes;\n  (void)is_validation;\n#endif  // defined(XGBOOST_USE_CUDA)\n  return {cache_host_ratio, min_cache_page_bytes};\n}\n}  // namespace xgboost::data::detail\n"
  },
  {
    "path": "src/data/batch_utils.h",
    "content": "/**\n * Copyright 2023-2025, XGBoost Contributors\n */\n#ifndef XGBOOST_DATA_BATCH_UTILS_H_\n#define XGBOOST_DATA_BATCH_UTILS_H_\n\n#include <cmath>    // for isnan\n#include <cstddef>  // for size_t\n#include <cstdint>  // for int64_t\n#include <limits>   // for numeric_limits\n#include <utility>  // for pair\n\n#include \"xgboost/data.h\"  // for BatchParam\n\nnamespace xgboost::data::detail {\n// At least one batch parameter is initialized.\ninline void CheckEmpty(BatchParam const& l, BatchParam const& r) {\n  if (!l.Initialized()) {\n    CHECK(r.Initialized()) << \"Batch parameter is not initialized.\";\n  }\n}\n\n/**\n * \\brief Should we regenerate the gradient index?\n *\n * \\param old Parameter stored in DMatrix.\n * \\param p   New parameter passed in by caller.\n */\ninline bool RegenGHist(BatchParam old, BatchParam p) {\n  // Parameter is renewed or caller requests a regen\n  if (!p.Initialized()) {\n    // Empty parameter is passed in, don't regenerate so that we can use gindex in\n    // predictor, which doesn't have any training parameter.\n    return false;\n  }\n  return p.regen || old.ParamNotEqual(p);\n}\n\n/**\n * @brief Validate the batch parameter from the caller\n */\nvoid CheckParam(BatchParam const& init, BatchParam const& param);\n\n/**\n * @brief Configure the `cache_host_ratio` and the `min_cache_page_bytes`.\n */\n[[nodiscard]] std::pair<double, std::int64_t> DftPageSizeHostRatio(\n    std::size_t n_cache_bytes, bool is_validation, double cache_host_ratio,\n    std::int64_t min_cache_page_bytes);\n\n/**\n * @brief Check whether we should configure `cache_host_ratio`.\n *\n * Defined by @ref AutoHostRatio .\n */\n[[nodiscard]] inline bool HostRatioIsAuto(float cache_host_ratio) {\n  return std::isnan(cache_host_ratio);\n}\n}  // namespace xgboost::data::detail\n\nnamespace xgboost::cuda_impl {\n// Indicator for XGBoost to not concatenate any page.\nconstexpr std::int64_t MatchingPageBytes() { return 0; }\n// Default size of the cached page, 1/8\nconstexpr double CachePageRatio() { return 0.125; }\n// Indicator for XGBoost to automatically concatenate pages.\nconstexpr std::int64_t AutoCachePageBytes() { return -1; }\n// Use two batch for prefecting. There's always one batch being worked on, while the other\n// batch being transferred.\nconstexpr auto DftPrefetchBatches() { return 2; }\n// The ratio of the cache split for external memory. Use -1 to indicate not-set.\nconstexpr float AutoHostRatio() { return std::numeric_limits<float>::quiet_NaN(); }\n\n// Empty parameter to prevent regen, only used to control external memory prefetching.\n//\n// Both the approx and hist initializes the DMatrix before creating the actual\n// implementation (InitDataOnce). Therefore, the `GPUHistMakerDevice` can use an empty\n// parameter to avoid any regen.\ninline BatchParam StaticBatch(bool prefetch_copy) {\n  BatchParam p;\n  p.prefetch_copy = prefetch_copy;\n  p.n_prefetch_batches = DftPrefetchBatches();\n  return p;\n}\n}  // namespace xgboost::cuda_impl\n#endif  // XGBOOST_DATA_BATCH_UTILS_H_\n"
  },
  {
    "path": "src/data/cat_container.cc",
    "content": "/**\n * Copyright 2025, XGBoost Contributors\n */\n#include \"cat_container.h\"\n\n#include <algorithm>  // for copy\n#include <cstddef>    // for size_t\n#include <memory>     // for make_unique\n#include <utility>    // for move\n#include <vector>     // for vector\n\n#include \"../collective/allreduce.h\"         // for Allreduce\n#include \"../collective/communicator-inl.h\"  // for GetRank, GetWorldSize\n#include \"../common/error_msg.h\"             // for NoFloatCat\n#include \"../encoder/types.h\"                // for Overloaded\n#include \"xgboost/json.h\"                    // for Json\n\nnamespace xgboost {\nCatContainer::CatContainer(enc::HostColumnsView const& df, bool is_ref) : CatContainer{} {\n  this->is_ref_ = is_ref;\n  this->n_total_cats_ = df.n_total_cats;\n  if (this->n_total_cats_ == 0) {\n    return;\n  }\n\n  this->feature_segments_.Resize(df.feature_segments.size());\n  auto& seg = this->feature_segments_.HostVector();\n  std::copy_n(df.feature_segments.data(), df.feature_segments.size(), seg.begin());\n\n  for (auto const& col : df.columns) {\n    std::visit(enc::Overloaded{\n                   [this](enc::CatStrArrayView str) {\n                     using T = typename cpu_impl::ViewToStorageImpl<enc::CatStrArrayView>::Type;\n                     this->cpu_impl_->columns.emplace_back();\n                     this->cpu_impl_->columns.back().emplace<T>();\n                     auto& v = std::get<T>(this->cpu_impl_->columns.back());\n                     v.offsets.resize(str.offsets.size());\n                     v.values.resize(str.values.size());\n                     std::copy_n(str.offsets.data(), str.offsets.size(), v.offsets.data());\n                     std::copy_n(str.values.data(), str.values.size(), v.values.data());\n                   },\n                   [this](auto&& values) {\n                     using T =\n                         typename cpu_impl::ViewToStorageImpl<std::decay_t<decltype(values)>>::Type;\n                     this->cpu_impl_->columns.emplace_back();\n                     using ElemT = typename T::value_type;\n\n                     if constexpr (std::is_floating_point_v<ElemT>) {\n                       LOG(FATAL) << error::NoFloatCat();\n                     }\n\n                     this->cpu_impl_->columns.back().emplace<T>();\n                     auto& v = std::get<T>(this->cpu_impl_->columns.back());\n                     v.resize(values.size());\n                     std::copy_n(values.data(), values.size(), v.data());\n                   }},\n               col);\n  }\n\n  this->sorted_idx_.Resize(0);\n  this->cpu_impl_->Finalize();\n\n  CHECK(!this->DeviceCanRead());\n  CHECK(this->HostCanRead());\n  CHECK_EQ(this->n_total_cats_, df.feature_segments.back());\n  CHECK_GE(this->n_total_cats_, 0) << \"Too many categories.\";\n  if (this->n_total_cats_ > 0) {\n    CHECK(!this->cpu_impl_->columns.empty());\n  }\n}\n\nnamespace {\ntemplate <typename T>\nstruct PrimToUbj;\n\ntemplate <>\nstruct PrimToUbj<std::uint8_t> {\n  using Type = U8Array;\n};\ntemplate <>\nstruct PrimToUbj<std::uint16_t> {\n  using Type = U16Array;\n};\ntemplate <>\nstruct PrimToUbj<std::uint32_t> {\n  using Type = U32Array;\n};\ntemplate <>\nstruct PrimToUbj<std::uint64_t> {\n  using Type = U64Array;\n};\ntemplate <>\nstruct PrimToUbj<std::int8_t> {\n  using Type = I8Array;\n};\ntemplate <>\nstruct PrimToUbj<std::int16_t> {\n  using Type = I16Array;\n};\ntemplate <>\nstruct PrimToUbj<std::int32_t> {\n  using Type = I32Array;\n};\ntemplate <>\nstruct PrimToUbj<std::int64_t> {\n  using Type = I64Array;\n};\ntemplate <>\nstruct PrimToUbj<float> {\n  using Type = F32Array;\n};\ntemplate <>\nstruct PrimToUbj<double> {\n  using Type = F64Array;\n};\n}  // anonymous namespace\n\nvoid CatContainer::Save(Json* p_out) const {\n  [[maybe_unused]] auto _ = this->HostView();\n  auto& out = *p_out;\n\n  auto const& columns = this->cpu_impl_->columns;\n  std::vector<Json> arr(this->cpu_impl_->columns.size());\n  for (std::size_t fidx = 0, n_features = columns.size(); fidx < n_features; ++fidx) {\n    auto& f_out = arr[fidx];\n\n    auto const& col = columns[fidx];\n    std::visit(enc::Overloaded{\n                   [&f_out](cpu_impl::CatStrArray const& str) {\n                     f_out = Object{};\n                     I32Array joffsets{str.offsets.size()};\n                     auto const& f_offsets = str.offsets;\n                     std::copy(f_offsets.cbegin(), f_offsets.cend(), joffsets.GetArray().begin());\n                     f_out[\"offsets\"] = std::move(joffsets);\n\n                     I8Array jnames{str.values.size()};  // fixme: uint8\n                     auto const& f_names = str.values;\n                     std::copy(f_names.cbegin(), f_names.cend(), jnames.GetArray().begin());\n                     f_out[\"values\"] = std::move(jnames);\n                   },\n                   [&f_out](auto&& values) {\n                     using T =\n                         std::remove_cv_t<typename std::decay_t<decltype(values)>::value_type>;\n                     using JT = typename PrimToUbj<T>::Type;\n                     JT array{values.size()};\n                     std::copy_n(values.data(), values.size(), array.GetArray().begin());\n\n                     Object out{};\n                     out[\"type\"] = static_cast<std::int64_t>(array.Type());\n                     out[\"values\"] = std::move(array);\n\n                     f_out = std::move(out);\n                   }},\n               col);\n  }\n\n  auto jf_segments = I32Array{this->feature_segments_.Size()};\n  auto const& hf_segments = this->feature_segments_.ConstHostVector();\n  std::copy(hf_segments.cbegin(), hf_segments.cend(), jf_segments.GetArray().begin());\n\n  auto jsorted_index = I32Array{this->sorted_idx_.Size()};\n  auto const& h_sorted_idx = this->sorted_idx_.ConstHostVector();\n  std::copy_n(h_sorted_idx.cbegin(), h_sorted_idx.size(), jsorted_index.GetArray().begin());\n\n  out = Object{};\n  out[\"sorted_idx\"] = std::move(jsorted_index);\n  out[\"feature_segments\"] = std::move(jf_segments);\n  out[\"enc\"] = arr;\n}\n\nnamespace {\n// Dispatch method for JSON and UBJSON\ntemplate <typename U, typename Vec>\nvoid LoadJson(Json jvalues, Vec* p_out) {\n  if (IsA<Array>(jvalues)) {\n    auto const& jarray = get<Array const>(jvalues);\n    std::vector<U> buf(jarray.size());\n    for (std::size_t i = 0, n = jarray.size(); i < n; ++i) {\n      buf[i] = static_cast<U>(get<Integer const>(jarray[i]));\n    }\n    *p_out = std::move(buf);\n    return;\n  }\n  auto const& values = get<std::add_const_t<typename PrimToUbj<U>::Type>>(jvalues);\n  *p_out = std::move(values);\n}\n}  // namespace\n\nvoid CatContainer::Load(Json const& in) {\n  auto array = get<Array const>(in[\"enc\"]);\n  auto n_features = array.size();\n\n  auto& columns = this->cpu_impl_->columns;\n  for (std::size_t fidx = 0; fidx < n_features; ++fidx) {\n    auto const& column = get<Object>(array[fidx]);\n    auto it = column.find(\"offsets\");\n    if (it != column.cend()) {\n      // str\n      cpu_impl::CatStrArray str{};\n      LoadJson<std::int32_t>(column.at(\"offsets\"), &str.offsets);\n      LoadJson<enc::CatCharT>(column.at(\"values\"), &str.values);\n\n      columns.emplace_back(str);\n    } else {\n      // numeric\n      auto type = get<Integer const>(column.at(\"type\"));\n      using T = Value::ValueKind;\n      auto const& jvalues = column.at(\"values\");\n      columns.emplace_back();\n      switch (static_cast<Value::ValueKind>(type)) {\n        case T::kI8Array: {\n          LoadJson<std::int8_t>(jvalues, &columns.back());\n          break;\n        }\n        case T::kU8Array: {\n          LoadJson<std::uint8_t>(jvalues, &columns.back());\n          break;\n        }\n        case T::kI16Array: {\n          LoadJson<std::int16_t>(jvalues, &columns.back());\n          break;\n        }\n        case T::kU16Array: {\n          LoadJson<std::uint16_t>(jvalues, &columns.back());\n          break;\n        }\n        case T::kI32Array: {\n          LoadJson<std::int32_t>(jvalues, &columns.back());\n          break;\n        }\n        case T::kU32Array: {\n          LoadJson<std::uint32_t>(jvalues, &columns.back());\n          break;\n        }\n        case T::kI64Array: {\n          LoadJson<std::int64_t>(jvalues, &columns.back());\n          break;\n        }\n        case T::kU64Array: {\n          LoadJson<std::uint64_t>(jvalues, &columns.back());\n          break;\n        }\n        case T::kF32Array: {\n          LoadJson<float>(jvalues, &columns.back());\n          break;\n        }\n        case T::kF64Array: {\n          LoadJson<double>(jvalues, &columns.back());\n          break;\n        }\n        default: {\n          LOG(FATAL) << \"Invalid type.\";\n        }\n      }\n    }\n  }\n\n  auto& hf_segments = this->feature_segments_.HostVector();\n  LoadJson<std::int32_t>(in[\"feature_segments\"], &hf_segments);\n  if (hf_segments.empty()) {\n    this->n_total_cats_ = 0;\n  } else {\n    this->n_total_cats_ = hf_segments.back();\n  }\n\n  auto& h_sorted_idx = this->sorted_idx_.HostVector();\n  LoadJson<std::int32_t>(in[\"sorted_idx\"], &h_sorted_idx);\n\n  this->cpu_impl_->Finalize();\n}\n\n#if !defined(XGBOOST_USE_CUDA)\nCatContainer::CatContainer() : cpu_impl_{std::make_unique<cpu_impl::CatContainerImpl>()} {}\n\nCatContainer::~CatContainer() = default;\n\nvoid CatContainer::Copy(Context const* ctx, CatContainer const& that) {\n  [[maybe_unused]] auto h_view = that.HostView();\n  this->CopyCommon(ctx, that);\n  this->cpu_impl_->Copy(that.cpu_impl_.get());\n}\n\n[[nodiscard]] enc::HostColumnsView CatContainer::HostView() const { return this->HostViewImpl(); }\n\n[[nodiscard]] bool CatContainer::Empty() const { return this->cpu_impl_->columns.empty(); }\n\n[[nodiscard]] std::size_t CatContainer::NumFeatures() const {\n  return this->cpu_impl_->columns.size();\n}\n\nvoid CatContainer::Sort(Context const* ctx) {\n  CHECK(ctx->IsCPU());\n  auto view = this->HostView();\n  this->sorted_idx_.HostVector().resize(view.n_total_cats);\n  enc::SortNames(enc::Policy<EncErrorPolicy>{}, view, this->sorted_idx_.HostSpan());\n}\n#endif  // !defined(XGBOOST_USE_CUDA)\n\nvoid SyncCategories(Context const* ctx, CatContainer* cats, bool is_empty) {\n  CHECK(cats);\n  if (!collective::IsDistributed()) {\n    return;\n  }\n\n  auto rank = collective::GetRank();\n  std::vector<std::int32_t> workers(collective::GetWorldSize(), 0);\n  workers[rank] = is_empty;\n  collective::SafeColl(collective::Allreduce(ctx, &workers, collective::Op::kSum));\n  if (cats->HasCategorical() &&\n      std::any_of(workers.cbegin(), workers.cend(), [](auto v) { return v == 1; })) {\n    LOG(FATAL)\n        << \"A worker cannot have empty input when a dataframe with categorical features is used. \"\n           \"XGBoost cannot infer the categories if the input is empty.\";\n  }\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "src/data/cat_container.cu",
    "content": "/**\n * Copyright 2025, XGBoost Contributors\n */\n#include <thrust/copy.h>  // for copy\n\n#include <memory>  // for make_unique\n#include <vector>  // for vector\n\n#include \"../common/cuda_context.cuh\"    // for CUDAContext\n#include \"../common/device_helpers.cuh\"  // for ToSpan\n#include \"../common/device_vector.cuh\"   // for device_vector\n#include \"../common/type.h\"              // for GetValueT\n#include \"../encoder/ordinal.cuh\"        // for SortNames\n#include \"../encoder/ordinal.h\"          // for DictionaryView\n#include \"../encoder/types.h\"            // for Overloaded\n#include \"cat_container.cuh\"             // for CatStrArray\n#include \"cat_container.h\"               // for CatContainer\n#include \"xgboost/span.h\"                // for Span\n\nnamespace xgboost {\nnamespace cuda_impl {\nstruct CatContainerImpl {\n  std::vector<ColumnType> columns;\n  dh::device_vector<enc::DeviceCatIndexView> columns_v;\n\n  template <typename VariantT>\n  void CopyFrom(Context const* ctx, enc::detail::ColumnsViewImpl<VariantT> that) {\n    this->columns.resize(that.columns.size());\n    this->columns_v.resize(that.columns.size());\n    CHECK_EQ(this->columns.size(), this->columns_v.size());\n    auto stream = ctx->CUDACtx()->Stream();\n\n    std::vector<decltype(columns_v)::value_type> h_columns_v(this->columns_v.size());\n    for (std::size_t f_idx = 0, n = that.columns.size(); f_idx < n; ++f_idx) {\n      auto const& col_v = that.columns[f_idx];\n      auto dispatch = enc::Overloaded{\n          [this, f_idx, &h_columns_v, stream](enc::CatStrArrayView const& str) {\n            this->columns[f_idx].emplace<CatStrArray>();\n            auto& col = std::get<CatStrArray>(this->columns[f_idx]);\n            // Handle the offsets\n            col.offsets.resize(str.offsets.size());\n            if (!str.offsets.empty()) {\n              dh::safe_cuda(cudaMemcpyAsync(thrust::raw_pointer_cast(col.offsets.data()),\n                                            str.offsets.data(), str.offsets.size_bytes(),\n                                            cudaMemcpyDefault, stream));\n            }\n            // Handle the values\n            col.values.resize(str.values.size());\n            if (!col.values.empty()) {\n              dh::safe_cuda(cudaMemcpyAsync(thrust::raw_pointer_cast(col.values.data()),\n                                            str.values.data(), str.values.size_bytes(),\n                                            cudaMemcpyDefault, stream));\n            }\n            // Create the view\n            h_columns_v[f_idx].emplace<enc::CatStrArrayView>();\n            auto& col_v = cuda::std::get<enc::CatStrArrayView>(h_columns_v[f_idx]);\n            col_v = {dh::ToSpan(col.offsets), dh::ToSpan(col.values)};\n          },\n          [this, f_idx, &h_columns_v, stream](auto&& values) {\n            using T = std::remove_cv_t<typename std::decay_t<decltype(values)>::value_type>;\n\n            this->columns[f_idx].emplace<dh::device_vector<T>>();\n            auto& col = std::get<dh::device_vector<T>>(this->columns[f_idx]);\n\n            col.resize(values.size());\n            if (!values.empty()) {\n              dh::safe_cuda(cudaMemcpyAsync(col.data().get(), values.data(), values.size_bytes(),\n                                            cudaMemcpyDefault, stream));\n            }\n\n            // Create the view\n            using V = common::Span<std::add_const_t<T>>;\n            h_columns_v[f_idx].emplace<V>();\n            auto& col_v = cuda::std::get<V>(h_columns_v[f_idx]);\n            col_v = dh::ToSpan(col);\n          }};\n      auto visit = [&](auto const& col) {\n        using ColT = common::GetValueT<decltype(col)>;\n        if constexpr (std::is_same_v<ColT, enc::HostCatIndexView>) {\n          std::visit(dispatch, col);\n        } else {\n          static_assert(std::is_same_v<ColT, enc::DeviceCatIndexView>);\n          cuda::std::visit(dispatch, col);\n        }\n      };\n      visit(col_v);\n    }\n    thrust::copy_n(h_columns_v.data(), h_columns_v.size(), this->columns_v.data());\n\n    CHECK_EQ(this->columns.size(), this->columns_v.size());\n  }\n\n  void CopyTo(cpu_impl::CatContainerImpl* that) {\n    CHECK_EQ(this->columns.size(), this->columns_v.size());\n    that->columns.clear();\n    for (auto const& col : this->columns) {\n      that->columns.emplace_back();\n      auto& out_col = that->columns.back();\n\n      std::visit(enc::Overloaded{\n                     [&](CatStrArray const& str) {\n                       out_col.emplace<cpu_impl::CatStrArray>();\n                       auto& out_str = std::get<cpu_impl::CatStrArray>(out_col);\n                       // Offsets\n                       out_str.offsets.resize(str.offsets.size());\n                       if (!out_str.offsets.empty()) {\n                         dh::safe_cuda(cudaMemcpyAsync(\n                             out_str.offsets.data(), thrust::raw_pointer_cast(str.offsets.data()),\n                             common::Span{out_str.offsets}.size_bytes(), cudaMemcpyDefault));\n                       }\n                       // Values\n                       out_str.values.resize(str.values.size());\n                       if (!out_str.values.empty()) {\n                         dh::safe_cuda(cudaMemcpyAsync(\n                             out_str.values.data(), thrust::raw_pointer_cast(str.values.data()),\n                             common::Span{out_str.values}.size_bytes(), cudaMemcpyDefault));\n                       }\n                     },\n                     [&](auto&& values) {\n                       using T0 = decltype(values);\n                       using T1 = std::add_const_t<typename std::decay_t<T0>::value_type>;\n                       using Vec = typename cpu_impl::ViewToStorageImpl<common::Span<T1>>::Type;\n                       out_col.emplace<Vec>();\n                       auto& out_vec = std::get<Vec>(out_col);\n                       out_vec.resize(values.size());\n                       if (!out_vec.empty()) {\n                         dh::safe_cuda(cudaMemcpyAsync(\n                             out_vec.data(), thrust::raw_pointer_cast(values.data()),\n                             common::Span{out_vec}.size_bytes(), cudaMemcpyDefault));\n                       }\n                     }},\n                 col);\n    }\n    that->Finalize();\n  }\n};\n\n[[nodiscard]] std::tuple<CatAccessor, dh::DeviceUVector<std::int32_t>> MakeCatAccessor(\n    Context const* ctx, enc::DeviceColumnsView const& new_enc, CatContainer const* orig_cats) {\n  dh::DeviceUVector<std::int32_t> mapping(new_enc.n_total_cats);\n  auto d_sorted_idx = orig_cats->RefSortedIndex(ctx);\n  auto orig_enc = orig_cats->DeviceView(ctx);\n  enc::Recode(EncPolicy, orig_enc, d_sorted_idx, new_enc, dh::ToSpan(mapping));\n  CHECK_EQ(new_enc.feature_segments.size(), orig_enc.feature_segments.size());\n  auto cats_mapping = enc::MappingView{new_enc.feature_segments, dh::ToSpan(mapping)};\n  auto acc = CatAccessor{cats_mapping};\n  return std::tuple{acc, std::move(mapping)};\n}\n}  // namespace cuda_impl\n\nCatContainer::CatContainer()  // NOLINT\n    : cpu_impl_{std::make_unique<cpu_impl::CatContainerImpl>()},\n      cu_impl_{std::make_unique<cuda_impl::CatContainerImpl>()} {}\n\nCatContainer::CatContainer(Context const* ctx, enc::DeviceColumnsView const& df, bool is_ref)\n    : CatContainer{} {\n  this->is_ref_ = is_ref;\n  this->n_total_cats_ = df.n_total_cats;\n\n  this->feature_segments_.SetDevice(ctx->Device());\n  this->feature_segments_.Resize(df.feature_segments.size());\n  auto d_segs = this->feature_segments_.DeviceSpan();\n  thrust::copy_n(ctx->CUDACtx()->CTP(), dh::tcbegin(df.feature_segments),\n                 df.feature_segments.size(), dh::tbegin(d_segs));\n\n  // FIXME(jiamingy): We can use a single kernel for copying data once cuDF can return\n  // device data. Remove this along with the one in the device cuDF adapter.\n  this->cu_impl_->CopyFrom(ctx, df);\n\n  this->sorted_idx_.SetDevice(ctx->Device());\n  this->sorted_idx_.Resize(0);\n  if (this->n_total_cats_ > 0) {\n    CHECK(this->DeviceCanRead());\n    CHECK(!this->HostCanRead());\n    CHECK(!this->cu_impl_->columns.empty());\n  }\n}\n\nCatContainer::~CatContainer() = default;\n\nvoid CatContainer::Copy(Context const* ctx, CatContainer const& that) {\n  if (ctx->IsCPU()) {\n    // Pull data to host\n    [[maybe_unused]] auto h_view = that.HostView();\n    this->CopyCommon(ctx, that);\n    this->cpu_impl_->Copy(that.cpu_impl_.get());\n    CHECK(!this->DeviceCanRead());\n  } else {\n    // Pull data to device\n    [[maybe_unused]] auto d_view = that.DeviceView(ctx);\n    this->CopyCommon(ctx, that);\n    auto const& that_impl = that.cu_impl_;\n    this->cu_impl_->columns.resize(that.cu_impl_->columns.size());\n\n    std::vector<decltype(this->cu_impl_->columns_v)::value_type> h_columns_v(\n        that.cu_impl_->columns_v.size());\n    for (std::size_t f_idx = 0, n = that_impl->columns.size(); f_idx < n; ++f_idx) {\n      auto const& col = that_impl->columns[f_idx];\n      std::visit(enc::Overloaded{\n                     [&](cuda_impl::CatStrArray const& str) {\n                       this->cu_impl_->columns[f_idx].emplace<cuda_impl::CatStrArray>();\n                       auto& col = std::get<cuda_impl::CatStrArray>(this->cu_impl_->columns[f_idx]);\n                       col.Copy(str);\n\n                       h_columns_v[f_idx].emplace<enc::CatStrArrayView>();\n                       auto& col_v = cuda::std::get<enc::CatStrArrayView>(h_columns_v[f_idx]);\n                       col_v = {dh::ToSpan(col.offsets), dh::ToSpan(col.values)};\n                     },\n                     [&](auto&& values) {\n                       using Vec = std::decay_t<decltype(values)>;\n                       using T = typename Vec::value_type;\n                       this->cu_impl_->columns[f_idx].emplace<Vec>();\n                       this->cu_impl_->columns[f_idx] = values;\n\n                       using S = common::Span<std::add_const_t<T>>;\n                       h_columns_v[f_idx].emplace<S>();\n                       auto& col_v = cuda::std::get<S>(h_columns_v[f_idx]);\n                       col_v = dh::ToSpan(values);\n                     }},\n                 col);\n    }\n    this->cu_impl_->columns_v = h_columns_v;\n    CHECK(this->Empty() || !this->HostCanRead());\n  }\n  if (ctx->IsCPU()) {\n    CHECK_EQ(this->cpu_impl_->columns_v.size(), that.cpu_impl_->columns_v.size());\n    CHECK_EQ(this->cpu_impl_->columns.size(), that.cpu_impl_->columns.size());\n    CHECK(this->HostCanRead());\n  } else {\n    CHECK_EQ(this->cu_impl_->columns_v.size(), that.cu_impl_->columns_v.size());\n    CHECK_EQ(this->cu_impl_->columns.size(), that.cu_impl_->columns.size());\n    CHECK(this->DeviceCanRead());\n  }\n  CHECK_EQ(this->Empty(), that.Empty());\n  CHECK_EQ(this->NumCatsTotal(), that.NumCatsTotal());\n}\n\n[[nodiscard]] bool CatContainer::Empty() const {\n  return this->HostCanRead() ? this->cpu_impl_->columns.empty() : this->cu_impl_->columns.empty();\n}\n\n[[nodiscard]] std::size_t CatContainer::NumFeatures() const {\n  if (this->HostCanRead()) {\n    return this->cpu_impl_->columns.size();\n  }\n  return this->cu_impl_->columns.size();\n}\n\nvoid CatContainer::Sort(Context const* ctx) {\n  if (!this->HasCategorical()) {\n    return;\n  }\n\n  if (ctx->IsCPU()) {\n    auto view = this->HostView();\n    CHECK(!view.Empty()) << view.n_total_cats;\n    this->sorted_idx_.HostVector().resize(view.n_total_cats);\n    enc::SortNames(cpu_impl::EncPolicy, view, this->sorted_idx_.HostSpan());\n  } else {\n    auto view = this->DeviceView(ctx);\n    CHECK(!view.Empty()) << view.n_total_cats;\n    this->sorted_idx_.SetDevice(ctx->Device());\n    this->sorted_idx_.Resize(view.n_total_cats);\n    enc::SortNames(cuda_impl::EncPolicy, view, this->sorted_idx_.DeviceSpan());\n  }\n}\n\n[[nodiscard]] enc::HostColumnsView CatContainer::HostView() const {\n  std::lock_guard guard{device_mu_};\n  if (!this->HostCanRead()) {\n    this->feature_segments_.ConstHostSpan();\n    // Lazy copy to host\n    this->cu_impl_->CopyTo(this->cpu_impl_.get());\n  }\n  CHECK(this->HostCanRead());\n  return this->HostViewImpl();\n}\n\n[[nodiscard]] enc::DeviceColumnsView CatContainer::DeviceView(Context const* ctx) const {\n  CHECK(ctx->IsCUDA());\n  std::lock_guard guard{device_mu_};\n  if (!this->DeviceCanRead()) {\n    this->feature_segments_.SetDevice(ctx->Device());\n    this->feature_segments_.ConstDeviceSpan();\n    // Lazy copy to device\n    auto h_view = this->HostViewImpl();\n    this->cu_impl_->CopyFrom(ctx, h_view);\n    CHECK_EQ(this->cu_impl_->columns_v.size(), this->cpu_impl_->columns_v.size());\n    CHECK_EQ(this->cu_impl_->columns.size(), this->cpu_impl_->columns.size());\n  }\n  CHECK(this->DeviceCanRead());\n  if (this->n_total_cats_ != 0) {\n    CHECK(!this->cu_impl_->columns_v.empty());\n    CHECK_EQ(this->feature_segments_.Size(), this->cu_impl_->columns_v.size() + 1);\n  }\n  return {dh::ToSpan(this->cu_impl_->columns_v), this->feature_segments_.ConstDeviceSpan(),\n          this->n_total_cats_};\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "src/data/cat_container.cuh",
    "content": "/**\n * Copyright 2024-2025, XGBoost Contributors\n */\n#pragma once\n#include \"../common/device_helpers.cuh\"  // for ToSpan\n#include \"../common/device_vector.cuh\"   // for device_vector, XGBDeviceAllocator\n#include \"../encoder/ordinal.h\"          // for CatCharT\n#include \"cat_container.h\"               // for EncErrorPolicy\n\nnamespace xgboost::cuda_impl {\nstruct CatStrArray {\n  dh::device_vector<std::int32_t> offsets;\n  dh::device_vector<enc::CatCharT> values;\n\n  CatStrArray() = default;\n  CatStrArray(CatStrArray const& that) = delete;\n  CatStrArray& operator=(CatStrArray const& that) = delete;\n\n  CatStrArray(CatStrArray&& that) = default;\n  CatStrArray& operator=(CatStrArray&& that) = default;\n\n  [[nodiscard]] explicit operator enc::CatStrArrayView() const {\n    return {dh::ToSpan(offsets), dh::ToSpan(values)};\n  }\n  [[nodiscard]] std::size_t size() const {  // NOLINT\n    return enc::CatStrArrayView(*this).size();\n  }\n\n  void Copy(CatStrArray const& that) {\n    this->offsets = that.offsets;\n    this->values = that.values;\n  }\n};\n\ntemplate <typename T>\nstruct ViewToStorageImpl;\n\ntemplate <>\nstruct ViewToStorageImpl<enc::CatStrArrayView> {\n  using Type = CatStrArray;\n};\n\ntemplate <typename T>\nstruct ViewToStorageImpl<common::Span<T const>> {\n  using Type = dh::device_vector<T>;\n};\n\ntemplate <typename... Ts>\nstruct ViewToStorage;\n\ntemplate <typename... Ts>\nstruct ViewToStorage<std::tuple<Ts...>> {\n  using Type = std::tuple<typename ViewToStorageImpl<Ts>::Type...>;\n};\n\nusing CatIndexTypes = ViewToStorage<enc::CatIndexViewTypes>::Type;\nusing ColumnType = enc::cpu_impl::TupToVarT<CatIndexTypes>;\n\nstruct EncThrustPolicy {\n  template <typename T>\n  using ThrustAllocator = dh::XGBDeviceAllocator<T>;\n\n  [[nodiscard]] auto ThrustPolicy() const {\n    dh::XGBCachingDeviceAllocator<char> alloc;\n    auto exec = thrust::cuda::par_nosync(alloc).on(curt::DefaultStream());\n    return exec;\n  }\n  [[nodiscard]] auto Stream() const { return curt::DefaultStream(); }\n};\n\nusing EncPolicyT = enc::Policy<EncErrorPolicy, EncThrustPolicy>;\n\ninline EncPolicyT EncPolicy = EncPolicyT{};\n\n[[nodiscard]] std::tuple<CatAccessor, dh::DeviceUVector<std::int32_t>> MakeCatAccessor(\n    Context const* ctx, enc::DeviceColumnsView const& new_enc, CatContainer const* orig_cats);\n}  // namespace xgboost::cuda_impl\n"
  },
  {
    "path": "src/data/cat_container.h",
    "content": "/**\n * Copyright 2025, XGBoost Contributors\n */\n#pragma once\n\n#include <cstdint>  // for int32_t, int8_t\n#include <memory>   // for unique_ptr\n#include <mutex>    // for mutex\n#include <string>   // for string\n#include <tuple>    // for tuple\n#include <utility>  // for move\n#include <vector>   // for vector\n\n#include \"../common/categorical.h\"       // for AsCat\n#include \"../encoder/ordinal.h\"          // for CatStrArrayView\n#include \"../encoder/types.h\"            // for Overloaded\n#include \"entry.h\"                       // for COOTuple\n#include \"xgboost/base.h\"                // for bst_cat_t\n#include \"xgboost/data.h\"                // for Entry\n#include \"xgboost/host_device_vector.h\"  // for HostDeviceVector\n\nnamespace xgboost {\nclass Json;\n\n/**\n * @brief Error policy class used to interface with the encoder implementaion.\n */\nstruct EncErrorPolicy {\n  void Error(std::string&& msg) const { LOG(FATAL) << msg; }\n};\n\nnamespace cpu_impl {\nstruct CatStrArray {\n  std::vector<std::int32_t> offsets;\n  std::vector<enc::CatCharT> values;\n\n  [[nodiscard]] explicit operator enc::CatStrArrayView() const { return {offsets, values}; }\n  [[nodiscard]] std::size_t size() const {  // NOLINT\n    return enc::CatStrArrayView(*this).size();\n  }\n};\n\n// Type mapping from the CPU view type to the storage type.\ntemplate <typename T>\nstruct ViewToStorageImpl;\n\ntemplate <>\nstruct ViewToStorageImpl<enc::CatStrArrayView> {\n  using Type = CatStrArray;\n};\n\ntemplate <typename T>\nstruct ViewToStorageImpl<common::Span<T const>> {\n  using Type = std::vector<T>;\n};\n\ntemplate <typename... Ts>\nstruct ViewToStorage;\n\ntemplate <typename... Ts>\nstruct ViewToStorage<std::tuple<Ts...>> {\n  using Type = std::tuple<typename ViewToStorageImpl<Ts>::Type...>;\n};\n\n// storage type list (tuple), used for meta programming.\nusing CatIndexTypes = ViewToStorage<enc::CatIndexViewTypes>::Type;\n// std::variant of the storage types, used for actual storage.\nusing ColumnType = enc::cpu_impl::TupToVarT<CatIndexTypes>;\n\n/**\n * @brief CPU storage for categories.\n */\nstruct CatContainerImpl {\n  std::vector<ColumnType> columns;\n  // View\n  std::vector<enc::HostCatIndexView> columns_v;\n\n  void Finalize() {\n    this->columns_v.clear();\n    for (auto const& col : this->columns) {\n      std::visit(enc::Overloaded{[this](CatStrArray const& str) {\n                                   this->columns_v.emplace_back(enc::CatStrArrayView(str));\n                                 },\n                                 [this](auto&& values) {\n                                   this->columns_v.emplace_back(common::Span{values});\n                                 }},\n                 col);\n    }\n  }\n\n  void Copy(CatContainerImpl const* that) {\n    this->columns = that->columns;\n    this->Finalize();\n  }\n};\n\nusing EncPolicyT = enc::Policy<EncErrorPolicy>;\n\ninline EncPolicyT EncPolicy = EncPolicyT{};\n};  // namespace cpu_impl\n\nnamespace cuda_impl {\nstruct CatContainerImpl;\n}\n\n/**\n * @brief A container class for user-provided categories (usually from a DataFrame).\n */\nclass CatContainer {\n  /**\n   * @brief Implementation of the Copy method, used by both CPU and GPU. Note that this\n   * method changes the permission in the HostDeviceVector as we need to pull data into\n   * targeted devices.\n   */\n  void CopyCommon(Context const* ctx, CatContainer const& that) {\n    auto device = ctx->Device();\n\n    that.sorted_idx_.SetDevice(device);\n    this->sorted_idx_.SetDevice(device);\n    this->sorted_idx_.Resize(that.sorted_idx_.Size());\n    this->sorted_idx_.Copy(that.sorted_idx_);\n\n    this->feature_segments_.SetDevice(device);\n    that.feature_segments_.SetDevice(device);\n    this->feature_segments_.Resize(that.feature_segments_.Size());\n    this->feature_segments_.Copy(that.feature_segments_);\n\n    this->n_total_cats_ = that.n_total_cats_;\n\n    if (!device.IsCPU()) {\n      // Pull to device\n      this->sorted_idx_.ConstDevicePointer();\n      this->feature_segments_.ConstDevicePointer();\n    }\n  }\n\n  [[nodiscard]] enc::HostColumnsView HostViewImpl() const {\n    CHECK_EQ(this->cpu_impl_->columns.size(), this->cpu_impl_->columns_v.size());\n    if (this->n_total_cats_ != 0) {\n      CHECK(!this->cpu_impl_->columns_v.empty());\n    }\n    return {common::Span{this->cpu_impl_->columns_v}, this->feature_segments_.ConstHostSpan(),\n            this->n_total_cats_};\n  }\n\n public:\n  CatContainer();\n  explicit CatContainer(enc::HostColumnsView const& df, bool is_ref);\n#if defined(XGBOOST_USE_CUDA)\n  explicit CatContainer(Context const* ctx, enc::DeviceColumnsView const& df, bool is_ref);\n#endif  // defined(XGBOOST_USE_CUDA)\n  ~CatContainer();\n\n  void Copy(Context const* ctx, CatContainer const& that);\n\n  [[nodiscard]] bool HostCanRead() const { return this->feature_segments_.HostCanRead(); }\n  [[nodiscard]] bool DeviceCanRead() const { return this->feature_segments_.DeviceCanRead(); }\n\n  // Mostly used for testing.\n  void Push(cpu_impl::ColumnType const& column) { this->cpu_impl_->columns.emplace_back(column); }\n  /**\n   * @brief Wether the container is initialized at all. If the input is not a DataFrame,\n   *        this method returns True.\n   */\n  [[nodiscard]] bool Empty() const;\n  [[nodiscard]] bool NeedRecode() const { return this->HasCategorical() && !this->is_ref_; }\n\n  [[nodiscard]] std::size_t NumFeatures() const;\n  /**\n   * @brief The number of categories across all features.\n   */\n  [[nodiscard]] std::size_t NumCatsTotal() const { return this->n_total_cats_; }\n\n  /**\n   * @brief Sort the categories using argsort.\n   *\n   * This provides a common ordering of the categories between the training dataset and\n   * the test dataset.\n   */\n  void Sort(Context const* ctx);\n  /**\n   * @brief Obtain a view to the sorted index created by the @ref Sort method.\n   */\n  [[nodiscard]] common::Span<bst_cat_t const> RefSortedIndex(Context const* ctx) const {\n    std::lock_guard guard{device_mu_};\n    if (ctx->IsCPU()) {\n      return this->sorted_idx_.ConstHostSpan();\n    } else {\n      sorted_idx_.SetDevice(ctx->Device());\n      return this->sorted_idx_.ConstDeviceSpan();\n    }\n  }\n  /**\n   * @brief Whether there's a categorical feature. If not,then all columns in this\n   * container is empty.\n   */\n  [[nodiscard]] bool HasCategorical() const { return this->n_total_cats_ != 0; }\n\n  // IO\n  void Save(Json* out) const;\n  void Load(Json const& in);\n  /**\n   * @brief Get a view to the CPU storage.\n   */\n  [[nodiscard]] enc::HostColumnsView HostView() const;\n\n#if defined(XGBOOST_USE_CUDA)\n  /**\n   * @brief Get a view to the GPU storage.\n   */\n  [[nodiscard]] enc::DeviceColumnsView DeviceView(Context const* ctx) const;\n#endif  // defined(XGBOOST_USE_CUDA)\n\n private:\n  mutable std::mutex device_mu_;  // mutex for copying between devices.\n  HostDeviceVector<std::int32_t> feature_segments_;\n  bst_cat_t n_total_cats_{0};\n\n  std::unique_ptr<cpu_impl::CatContainerImpl> cpu_impl_;\n\n  HostDeviceVector<bst_cat_t> sorted_idx_;\n#if defined(XGBOOST_USE_CUDA)\n  std::unique_ptr<cuda_impl::CatContainerImpl> cu_impl_;\n#endif  // defined(XGBOOST_USE_CUDA)\n  bool is_ref_{false};\n};\n\n/**\n * @brief Accessor for obtaining re-coded categories.\n */\nstruct CatAccessor {\n  enc::MappingView enc;\n\n  template <typename T, typename Fidx>\n  [[nodiscard]] XGBOOST_DEVICE T operator()(T fvalue, Fidx f_idx) const {\n    if (!enc.Empty() && !enc[f_idx].empty()) {\n      auto f_mapping = enc[f_idx];\n      auto cat_idx = common::AsCat(fvalue);\n      if (cat_idx >= 0 && cat_idx < common::AsCat(f_mapping.size())) {\n        fvalue = f_mapping.data()[cat_idx];\n      }\n    }\n    return fvalue;\n  }\n  [[nodiscard]] XGBOOST_DEVICE float operator()(Entry const& e) const {\n    return this->operator()(e.fvalue, e.index);\n  }\n  [[nodiscard]] XGBOOST_DEVICE float operator()(data::COOTuple const& e) const {\n    return this->operator()(e.value, e.column_idx);\n  }\n};\n\n/**\n * @brief No-op accessor used to handle numeric data.\n */\nstruct NoOpAccessor {\n  constexpr explicit NoOpAccessor(enc::MappingView const&) {}\n  constexpr NoOpAccessor() = default;\n  template <typename T, typename Fidx>\n  [[nodiscard]] XGBOOST_DEVICE T operator()(T fvalue, Fidx) const {\n    return fvalue;\n  }\n  [[nodiscard]] XGBOOST_DEVICE float operator()(data::COOTuple const& e) const { return e.value; }\n  [[nodiscard]] XGBOOST_DEVICE float operator()(Entry const& e) const { return e.fvalue; }\n};\n\nvoid SyncCategories(Context const* ctx, CatContainer* cats, bool is_empty);\n\nnamespace cpu_impl {\ninline auto MakeCatAccessor(Context const* ctx, enc::HostColumnsView const& new_enc,\n                            CatContainer const* orig_cats) {\n  std::vector<std::int32_t> mapping(new_enc.n_total_cats);\n  auto sorted_idx = orig_cats->RefSortedIndex(ctx);\n  auto orig_enc = orig_cats->HostView();\n  enc::Recode(cpu_impl::EncPolicy, orig_enc, sorted_idx, new_enc, common::Span{mapping});\n  CHECK_EQ(new_enc.feature_segments.size(), orig_enc.feature_segments.size());\n  auto cats_mapping = enc::MappingView{new_enc.feature_segments, mapping};\n  auto acc = CatAccessor{cats_mapping};\n  return std::tuple{acc, std::move(mapping)};\n}\n}  // namespace cpu_impl\n}  // namespace xgboost\n"
  },
  {
    "path": "src/data/columnar.h",
    "content": "/**\n *  Copyright 2025, XGBoost Contributors\n *\n * @brief Helpers for handling columnar data with adapters.\n */\n#pragma once\n\n#include <algorithm>    // for max\n#include <cstddef>      // for size_t\n#include <cstdint>      // for int32_t\n#include <type_traits>  // for is_floating_point_v\n#include <vector>       // for vector\n\n#include \"../common/error_msg.h\"  // for NoFloatCat\n#include \"../encoder/ordinal.h\"   // for CatStrArrayView\n#include \"array_interface.h\"      // for ArrayInterfaceHandler\n#include \"xgboost/context.h\"      // for DeviceOrd\n#include \"xgboost/json.h\"         // for Json, Object\n#include \"xgboost/span.h\"         // for Span\n\n#if !defined(XGBOOST_USE_CUDA)\n#include \"../common/common.h\"  // for AssertGPUSupport\n#else\n#include <cuda_runtime_api.h>  // for cudaMemcpy\n#endif\n\nnamespace xgboost::data {\n/**\n * @brief Get string-based category index from arrow.\n *\n * @return The extracted category index\n */\ntemplate <typename CategoricalIndex>\nauto GetArrowNames(Object::Map const& jnames, std::vector<CategoricalIndex>* p_cat_columns) {\n  auto& cat_columns = *p_cat_columns;\n  // There are 3 buffers for a StringArray, validity mask, offset, and data. Mask\n  // and data are represented by a single masked array.\n  auto const& joffset = get<Object const>(jnames.at(\"offsets\"));\n  auto offset = ArrayInterface<1>{joffset};\n  auto const& jstr = get<Object const>(jnames.at(\"values\"));\n  auto strbuf = ArrayInterface<1>(jstr);\n\n  // Obtain the size of the string buffer using the offset\n  CHECK_GE(offset.n, 2);\n  auto offset_last_idx = offset.n - 1;\n  if (ArrayInterfaceHandler::IsCudaPtr(offset.data)) {\n    CHECK_EQ(strbuf.n, 0);  // Unknown\n#if defined(XGBOOST_USE_CUDA)\n    DispatchDType(offset.type, [&](auto t) {\n      using T = decltype(t);\n      if (!std::is_same_v<T, std::int32_t>) {\n        LOG(FATAL) << \"Invalid type for the string offset from category index.\";\n      }\n#if defined(__CUDACC__)\n#pragma nv_diagnostic push\n#pragma nv_diag_suppress 20208  // long double is treated as double in device code\n#endif  // defined(__CUDACC__)\n      T back{0};\n      dh::safe_cuda(cudaMemcpy(&back, static_cast<T const*>(offset.data) + offset_last_idx,\n                               sizeof(T), cudaMemcpyDeviceToHost));\n      strbuf.n = back;\n#if defined(__CUDACC__)\n#pragma nv_diagnostic pop\n#endif  // defined(__CUDACC__)\n    });\n#else\n    common::AssertGPUSupport();\n#endif\n  } else {\n    DispatchDType(offset.type, [&](auto t) {\n      using T = decltype(t);\n      if (!std::is_same_v<T, std::int32_t>) {\n        LOG(FATAL) << \"Invalid type for the string offset from category index.\";\n      }\n      auto back = offset(offset_last_idx);\n      strbuf.n = back;\n    });\n  }\n\n  CHECK_EQ(strbuf.type, ArrayInterfaceHandler::kI1);\n  CHECK_EQ(offset.type, ArrayInterfaceHandler::kI4);\n  auto names = enc::CatStrArrayView{\n      common::Span{static_cast<std::int32_t const*>(offset.data), offset.Shape<0>()},\n      common::Span<std::int8_t const>{reinterpret_cast<std::int8_t const*>(strbuf.data), strbuf.n}};\n  cat_columns.emplace_back(names);\n  return names;\n}\n\n/**\n * @brief Get string names and codes for categorical features.\n *\n * @return The number of categories for the current column.\n */\ntemplate <typename CategoricalIndex, bool allow_mask>\n[[nodiscard]] std::size_t GetArrowDictionary(Json const& jcol,\n                                             std::vector<CategoricalIndex>* p_cat_columns,\n                                             std::vector<ArrayInterface<1, allow_mask>>* p_columns,\n                                             std::size_t* p_n_bytes, bst_idx_t* p_n_samples) {\n  auto const& tup = get<Array const>(jcol);\n  CHECK_EQ(tup.size(), 2);\n\n  auto names = GetArrowNames(get<Object const>(tup[0]), p_cat_columns);\n\n  // arrow Integer array for encoded categories\n  auto const& jcodes = get<Object const>(tup[1]);\n  auto codes = ArrayInterface<1>{jcodes};\n  p_columns->push_back(codes);\n\n  auto& n_bytes = *p_n_bytes;\n  n_bytes += codes.ElementSize() * codes.Shape<0>();\n  n_bytes += names.SizeBytes();\n\n  *p_n_samples = std::max(*p_n_samples, static_cast<bst_idx_t>(codes.Shape<0>()));\n  return names.size();\n}\n\n/**\n * @brief Get numeric-based category index from arrow.\n *\n * @return The extracted category index\n */\ntemplate <typename CategoricalIndex>\n[[nodiscard]] std::size_t GetArrowNumericNames(DeviceOrd device, Object::Map const& jnames,\n                                               std::vector<CategoricalIndex>* p_cat_columns,\n                                               std::size_t* p_n_bytes) {\n  auto names = ArrayInterface<1>{jnames};\n  auto& n_bytes = *p_n_bytes;\n  DispatchDType(names, device, [&](auto t) {\n    using T = typename decltype(t)::value_type;\n    constexpr bool kKnownType = enc::MemberOf<std::remove_cv_t<T>, enc::CatPrimIndexTypes>::value;\n    CHECK(kKnownType) << \"Unsupported categorical index type: `\"\n                      << ArrayInterfaceHandler::TypeStr(names.type) << \"`.\";\n    if constexpr (std::is_floating_point_v<T>) {\n      LOG(FATAL) << error::NoFloatCat();\n    }\n    auto span = common::Span{t.Values().data(), t.Size()};\n    if constexpr (kKnownType) {\n      p_cat_columns->emplace_back(span);\n      n_bytes += span.size_bytes();\n    }\n  });\n  return names.n;\n}\n\n/**\n * @brief Get numeric names and codes for categorical features.\n *\n * @return The number of categories for the current column.\n */\ntemplate <typename CategoricalIndex, bool allow_mask>\n[[nodiscard]] std::size_t GetArrowNumericIndex(\n    DeviceOrd device, Json jcol, std::vector<CategoricalIndex>* p_cat_columns,\n    std::vector<ArrayInterface<1, allow_mask>>* p_columns, std::size_t* p_n_bytes,\n    bst_idx_t* p_n_samples) {\n  auto const& first = get<Object const>(jcol[0]);\n  auto n_cats = GetArrowNumericNames(device, first, p_cat_columns, p_n_bytes);\n  auto& n_bytes = *p_n_bytes;\n  auto const& jcodes = get<Object const>(jcol[1]);\n  auto codes = ArrayInterface<1>{jcodes};\n  p_columns->push_back(codes);\n\n  n_bytes += codes.ElementSize() * codes.Shape<0>();\n  *p_n_samples = std::max(*p_n_samples, static_cast<bst_idx_t>(codes.Shape<0>()));\n\n  return n_cats;\n}\n}  // namespace xgboost::data\n"
  },
  {
    "path": "src/data/data.cc",
    "content": "/**\n * Copyright 2015-2026, XGBoost Contributors\n * \\file data.cc\n */\n#include \"xgboost/data.h\"\n\n#include <dmlc/registry.h>  // for DMLC_REGISTRY_ENABLE, DMLC_REGISTRY_LINK_TAG\n\n#include <algorithm>    // for copy, max, none_of, min\n#include <atomic>       // for atomic\n#include <cmath>        // for abs\n#include <cstdint>      // for uint64_t, int32_t, uint8_t, uint32_t\n#include <cstring>      // for size_t, strcmp, memcpy\n#include <iostream>     // for operator<<, basic_ostream, basic_ostream::op...\n#include <map>          // for map, operator!=\n#include <numeric>      // for accumulate, partial_sum\n#include <tuple>        // for get, apply\n#include <type_traits>  // for remove_pointer_t, remove_reference\n\n#include \"../collective/allgather.h\"          // for AllgatherStrings\n#include \"../collective/allreduce.h\"          // for Allreduce\n#include \"../collective/communicator-inl.h\"   // for GetRank, IsFederated\n#include \"../common/algorithm.h\"              // for StableSort\n#include \"../common/api_entry.h\"              // for XGBAPIThreadLocalEntry\n#include \"../common/error_msg.h\"              // for GroupSize, GroupWeight, InfInData\n#include \"../common/group_data.h\"             // for ParallelGroupBuilder\n#include \"../common/io.h\"                     // for PeekableInStream\n#include \"../common/linalg_op.h\"              // for ElementWiseTransformHost\n#include \"../common/math.h\"                   // for CheckNAN\n#include \"../common/numeric.h\"                // for Iota, RunLengthEncode\n#include \"../common/threading_utils.h\"        // for ParallelFor\n#include \"../common/version.h\"                // for Version\n#include \"../data/adapter.h\"                  // for FileAdapter\n#include \"../data/entry.h\"                    // for COOTuple, IsValidFunctor\n#include \"../data/extmem_quantile_dmatrix.h\"  // for ExtMemQuantileDMatrix\n#include \"../data/iterative_dmatrix.h\"        // for IterativeDMatrix\n#include \"./sparse_page_dmatrix.h\"            // for SparsePageDMatrix\n#include \"array_interface.h\"                  // for ArrayInterfaceHandler, ArrayInterface, Dispa...\n#include \"cat_container.h\"                    // for CatContainer\n#include \"dmlc/base.h\"                        // for BeginPtr\n#include \"dmlc/data.h\"                        // for Parser\n#include \"dmlc/endian.h\"                      // for ByteSwap, DMLC_IO_NO_ENDIAN_SWAP\n#include \"dmlc/io.h\"                          // for Stream\n#include \"dmlc/thread_local.h\"                // for ThreadLocalStore\n#include \"ellpack_page.h\"                     // for EllpackPage\n#include \"file_iterator.h\"                    // for ValidateFileFormat, FileIterator, Next, Reset\n#include \"gradient_index.h\"                   // for GHistIndexMatrix\n#include \"metainfo.h\"                         // for LabelsCheck, WeightsCheck, ValidateQueryGroup\n#include \"simple_dmatrix.h\"                   // for SimpleDMatrix\n#include \"sparse_page_writer.h\"               // for SparsePageFormatReg\n#include \"xgboost/base.h\"                     // for bst_group_t, bst_idx_t, bst_float, bst_ulong\n#include \"xgboost/context.h\"                  // for Context\n#include \"xgboost/host_device_vector.h\"       // for HostDeviceVector\n#include \"xgboost/learner.h\"                  // for HostDeviceVector\n#include \"xgboost/linalg.h\"                   // for Tensor, Stack, TensorView, Vector, ArrayInte...\n#include \"xgboost/logging.h\"                  // for Error, LogCheck_EQ, CHECK, CHECK_EQ, LOG\n#include \"xgboost/span.h\"                     // for Span, operator!=, SpanIterator\n#include \"xgboost/string_view.h\"              // for operator==, operator<<, StringView\n\nnamespace dmlc {\nDMLC_REGISTRY_ENABLE(::xgboost::data::SparsePageFormatReg<::xgboost::SparsePage>);\nDMLC_REGISTRY_ENABLE(::xgboost::data::SparsePageFormatReg<::xgboost::CSCPage>);\nDMLC_REGISTRY_ENABLE(::xgboost::data::SparsePageFormatReg<::xgboost::SortedCSCPage>);\nDMLC_REGISTRY_ENABLE(::xgboost::data::SparsePageFormatReg<::xgboost::EllpackPage>);\nDMLC_REGISTRY_ENABLE(::xgboost::data::SparsePageFormatReg<::xgboost::GHistIndexMatrix>);\n}  // namespace dmlc\n\nnamespace {\n\ntemplate <typename T>\nvoid SaveScalarField(dmlc::Stream* strm, const std::string& name, xgboost::DataType type,\n                     const T& field) {\n  strm->Write(name);\n  strm->Write(static_cast<uint8_t>(type));\n  strm->Write(true);  // is_scalar=True\n  strm->Write(field);\n}\n\ntemplate <typename T>\nvoid SaveVectorField(dmlc::Stream* strm, const std::string& name, xgboost::DataType type,\n                     std::pair<uint64_t, uint64_t> shape, const std::vector<T>& field) {\n  strm->Write(name);\n  strm->Write(static_cast<uint8_t>(type));\n  strm->Write(false);  // is_scalar=False\n  strm->Write(shape.first);\n  strm->Write(shape.second);\n  strm->Write(field);\n}\n\ntemplate <typename T>\nvoid SaveVectorField(dmlc::Stream* strm, const std::string& name, xgboost::DataType type,\n                     std::pair<uint64_t, uint64_t> shape,\n                     const xgboost::HostDeviceVector<T>& field) {\n  SaveVectorField(strm, name, type, shape, field.ConstHostVector());\n}\n\ntemplate <typename T, int32_t D>\nvoid SaveTensorField(dmlc::Stream* strm, const std::string& name, xgboost::DataType type,\n                     const xgboost::linalg::Tensor<T, D>& field) {\n  strm->Write(name);\n  strm->Write(static_cast<uint8_t>(type));\n  strm->Write(false);  // is_scalar=False\n  for (size_t i = 0; i < D; ++i) {\n    strm->Write(field.Shape(i));\n  }\n  strm->Write(field.Data()->HostVector());\n}\n\ntemplate <typename T>\nvoid LoadScalarField(dmlc::Stream* strm, const std::string& expected_name,\n                     xgboost::DataType expected_type, T* field) {\n  const std::string invalid{\"MetaInfo: Invalid format for \" + expected_name};\n  std::string name;\n  xgboost::DataType type;\n  bool is_scalar;\n  CHECK(strm->Read(&name)) << invalid;\n  CHECK_EQ(name, expected_name) << invalid << \" Expected field: \" << expected_name\n                                << \", got: \" << name;\n  uint8_t type_val;\n  CHECK(strm->Read(&type_val)) << invalid;\n  type = static_cast<xgboost::DataType>(type_val);\n  CHECK(type == expected_type) << invalid\n                               << \"Expected field of type: \" << static_cast<int>(expected_type)\n                               << \", \"\n                               << \"got field type: \" << static_cast<int>(type);\n  CHECK(strm->Read(&is_scalar)) << invalid;\n  CHECK(is_scalar) << invalid << \"Expected field \" << expected_name\n                   << \" to be a scalar; got a vector\";\n  CHECK(strm->Read(field)) << invalid;\n}\n\ntemplate <typename T>\nvoid LoadVectorField(dmlc::Stream* strm, const std::string& expected_name,\n                     xgboost::DataType expected_type, std::vector<T>* field) {\n  const std::string invalid{\"MetaInfo: Invalid format for \" + expected_name};\n  std::string name;\n  xgboost::DataType type;\n  bool is_scalar;\n  CHECK(strm->Read(&name)) << invalid;\n  CHECK_EQ(name, expected_name) << invalid << \" Expected field: \" << expected_name\n                                << \", got: \" << name;\n  uint8_t type_val;\n  CHECK(strm->Read(&type_val)) << invalid;\n  type = static_cast<xgboost::DataType>(type_val);\n  CHECK(type == expected_type) << invalid\n                               << \"Expected field of type: \" << static_cast<int>(expected_type)\n                               << \", \"\n                               << \"got field type: \" << static_cast<int>(type);\n  CHECK(strm->Read(&is_scalar)) << invalid;\n  CHECK(!is_scalar) << invalid << \"Expected field \" << expected_name\n                    << \" to be a vector; got a scalar\";\n  std::pair<uint64_t, uint64_t> shape;\n\n  CHECK(strm->Read(&shape.first));\n  CHECK(strm->Read(&shape.second));\n  // TODO(hcho3): this restriction may be lifted, once we add a field with more than 1 column.\n  CHECK_EQ(shape.second, 1) << invalid << \"Number of columns is expected to be 1.\";\n\n  CHECK(strm->Read(field)) << invalid;\n}\n\ntemplate <typename T>\nvoid LoadVectorField(dmlc::Stream* strm, const std::string& expected_name,\n                     xgboost::DataType expected_type, xgboost::HostDeviceVector<T>* field) {\n  LoadVectorField(strm, expected_name, expected_type, &field->HostVector());\n}\n\ntemplate <typename T, int32_t D>\nvoid LoadTensorField(dmlc::Stream* strm, std::string const& expected_name,\n                     xgboost::DataType expected_type, xgboost::linalg::Tensor<T, D>* p_out) {\n  const std::string invalid{\"MetaInfo: Invalid format for \" + expected_name};\n  std::string name;\n  xgboost::DataType type;\n  bool is_scalar;\n  CHECK(strm->Read(&name)) << invalid;\n  CHECK_EQ(name, expected_name) << invalid << \" Expected field: \" << expected_name\n                                << \", got: \" << name;\n  uint8_t type_val;\n  CHECK(strm->Read(&type_val)) << invalid;\n  type = static_cast<xgboost::DataType>(type_val);\n  CHECK(type == expected_type) << invalid\n                               << \"Expected field of type: \" << static_cast<int>(expected_type)\n                               << \", \"\n                               << \"got field type: \" << static_cast<int>(type);\n  CHECK(strm->Read(&is_scalar)) << invalid;\n  CHECK(!is_scalar) << invalid << \"Expected field \" << expected_name\n                    << \" to be a tensor; got a scalar\";\n  size_t shape[D];\n  for (size_t i = 0; i < D; ++i) {\n    CHECK(strm->Read(&(shape[i])));\n  }\n  p_out->Reshape(shape);\n  auto& field = p_out->Data()->HostVector();\n  CHECK(strm->Read(&field)) << invalid;\n}\n}  // anonymous namespace\n\nnamespace xgboost {\n\nuint64_t constexpr MetaInfo::kNumField;\n\nMetaInfo::MetaInfo() : cats_{std::make_shared<CatContainer>()} {}\n\n// implementation of inline functions\nvoid MetaInfo::Clear() {\n  num_row_ = num_col_ = num_nonzero_ = 0;\n  labels = decltype(labels){};\n  group_ptr_.clear();\n  weights_.HostVector().clear();\n  base_margin_ = decltype(base_margin_){};\n}\n\n/*\n * Binary serialization format for MetaInfo:\n *\n * | name               | type     | is_scalar | num_row     |     num_col | value                  |\n * |--------------------+----------+-----------+-------------+-------------+------------------------|\n * | num_row            | kUInt64  | True      | NA          |          NA | ${num_row_}            |\n * | num_col            | kUInt64  | True      | NA          |          NA | ${num_col_}            |\n * | num_nonzero        | kUInt64  | True      | NA          |          NA | ${num_nonzero_}        |\n * | labels             | kFloat32 | False     | ${size}     |           1 | ${labels_}             |\n * | group_ptr          | kUInt32  | False     | ${size}     |           1 | ${group_ptr_}          |\n * | weights            | kFloat32 | False     | ${size}     |           1 | ${weights_}            |\n * | base_margin        | kFloat32 | False     | ${Shape(0)} | ${Shape(1)} | ${base_margin_}        |\n * | labels_lower_bound | kFloat32 | False     | ${size}     |           1 | ${labels_lower_bound_} |\n * | labels_upper_bound | kFloat32 | False     | ${size}     |           1 | ${labels_upper_bound_} |\n * | feature_names      | kStr     | False     | ${size}     |           1 | ${feature_names}       |\n * | feature_types      | kStr     | False     | ${size}     |           1 | ${feature_types}       |\n * | feature_weights    | kFloat32 | False     | ${size}     |           1 | ${feature_weights}     |\n * | cats               | kStr     | False     | ${size}     |           1 | ${cats}     |\n *\n * Note that the scalar fields (is_scalar=True) will have num_row and num_col missing.\n * Also notice the difference between the saved name and the name used in `SetInfo':\n * the former uses the plural form.\n */\n\nvoid MetaInfo::SaveBinary(dmlc::Stream* fo) const {\n  Version::Save(fo);\n  fo->Write(kNumField);\n  int field_cnt = 0;  // make sure we are actually writing kNumField fields\n\n  SaveScalarField(fo, u8\"num_row\", DataType::kUInt64, num_row_);\n  ++field_cnt;\n  SaveScalarField(fo, u8\"num_col\", DataType::kUInt64, num_col_);\n  ++field_cnt;\n  SaveScalarField(fo, u8\"num_nonzero\", DataType::kUInt64, num_nonzero_);\n  ++field_cnt;\n  SaveTensorField(fo, u8\"labels\", DataType::kFloat32, labels);\n  ++field_cnt;\n  SaveVectorField(fo, u8\"group_ptr\", DataType::kUInt32, {group_ptr_.size(), 1}, group_ptr_);\n  ++field_cnt;\n  SaveVectorField(fo, u8\"weights\", DataType::kFloat32, {weights_.Size(), 1}, weights_);\n  ++field_cnt;\n  SaveTensorField(fo, u8\"base_margin\", DataType::kFloat32, base_margin_);\n  ++field_cnt;\n  SaveVectorField(fo, u8\"labels_lower_bound\", DataType::kFloat32, {labels_lower_bound_.Size(), 1},\n                  labels_lower_bound_);\n  ++field_cnt;\n  SaveVectorField(fo, u8\"labels_upper_bound\", DataType::kFloat32, {labels_upper_bound_.Size(), 1},\n                  labels_upper_bound_);\n  ++field_cnt;\n\n  SaveVectorField(fo, u8\"feature_names\", DataType::kStr, {feature_names.size(), 1}, feature_names);\n  ++field_cnt;\n  SaveVectorField(fo, u8\"feature_types\", DataType::kStr, {feature_type_names.size(), 1},\n                  feature_type_names);\n  ++field_cnt;\n  SaveVectorField(fo, u8\"feature_weights\", DataType::kFloat32, {feature_weights.Size(), 1},\n                  feature_weights);\n  ++field_cnt;\n\n  Json jcats{Object{}};\n  this->cats_->Save(&jcats);\n  std::vector<char> values;\n  Json::Dump(jcats, &values, std::ios::binary);\n  SaveVectorField(fo, u8\"cats\", DataType::kStr, {values.size(), 1}, values);\n  ++field_cnt;\n\n  CHECK_EQ(field_cnt, kNumField) << \"Wrong number of fields\";\n}\n\n/**\n * @brief Load feature type info from names, returns whether there's categorical features.\n */\n[[nodiscard]] bool LoadFeatureType(std::vector<std::string> const& type_names,\n                                   std::vector<FeatureType>* types) {\n  types->clear();\n  bool has_cat{false};\n  for (auto const& elem : type_names) {\n    if (elem == \"int\") {\n      types->emplace_back(FeatureType::kNumerical);\n    } else if (elem == \"float\") {\n      types->emplace_back(FeatureType::kNumerical);\n    } else if (elem == \"i\") {\n      types->emplace_back(FeatureType::kNumerical);\n    } else if (elem == \"q\") {\n      types->emplace_back(FeatureType::kNumerical);\n    } else if (elem == \"c\") {\n      types->emplace_back(FeatureType::kCategorical);\n      has_cat = true;\n    } else {\n      LOG(FATAL) << \"All feature_types must be one of {int, float, i, q, c}.\";\n    }\n  }\n  return has_cat;\n}\n\nconst std::vector<size_t>& MetaInfo::LabelAbsSort(Context const* ctx) const {\n  if (label_order_cache_.size() == labels.Size()) {\n    return label_order_cache_;\n  }\n  label_order_cache_.resize(labels.Size());\n  common::Iota(ctx, label_order_cache_.begin(), label_order_cache_.end(), 0);\n  const auto& l = labels.Data()->HostVector();\n  common::StableSort(ctx, label_order_cache_.begin(), label_order_cache_.end(),\n                     [&l](size_t i1, size_t i2) { return std::abs(l[i1]) < std::abs(l[i2]); });\n\n  return label_order_cache_;\n}\n\nvoid MetaInfo::LoadBinary(dmlc::Stream* fi) {\n  auto version = Version::Load(fi);\n  auto major = std::get<0>(version);\n  auto minor = std::get<1>(version);\n  // MetaInfo is saved in `SparsePageSource'.  So the version in MetaInfo represents the\n  // version of DMatrix.\n  std::stringstream msg;\n  msg << \"Binary DMatrix generated by XGBoost: \" << Version::String(version)\n      << \" is no longer supported. \"\n      << \"Please process and save your data in current version: \"\n      << Version::String(Version::Self()) << \" again.\";\n  CHECK_GE(major, 3) << msg.str();\n  CHECK_GE(minor, 1) << msg.str();\n\n  const uint64_t expected_num_field = kNumField;\n  uint64_t num_field{0};\n  CHECK(fi->Read(&num_field)) << \"MetaInfo: invalid format\";\n  size_t expected = 0;\n  if (major == 1 && std::get<1>(version) < 2) {\n    // feature names and types are added in 1.2\n    expected = expected_num_field - 2;\n  } else {\n    expected = expected_num_field;\n  }\n  CHECK_GE(num_field, expected) << \"MetaInfo: insufficient number of fields (expected at least \"\n                                << expected << \" fields, but the binary file only contains \"\n                                << num_field << \"fields.)\";\n  if (num_field > expected_num_field) {\n    LOG(WARNING) << \"MetaInfo: the given binary file contains extra fields \"\n                    \"which will be ignored.\";\n  }\n\n  LoadScalarField(fi, u8\"num_row\", DataType::kUInt64, &num_row_);\n  LoadScalarField(fi, u8\"num_col\", DataType::kUInt64, &num_col_);\n  LoadScalarField(fi, u8\"num_nonzero\", DataType::kUInt64, &num_nonzero_);\n  LoadTensorField(fi, u8\"labels\", DataType::kFloat32, &labels);\n  LoadVectorField(fi, u8\"group_ptr\", DataType::kUInt32, &group_ptr_);\n  LoadVectorField(fi, u8\"weights\", DataType::kFloat32, &weights_);\n  LoadTensorField(fi, u8\"base_margin\", DataType::kFloat32, &base_margin_);\n  LoadVectorField(fi, u8\"labels_lower_bound\", DataType::kFloat32, &labels_lower_bound_);\n  LoadVectorField(fi, u8\"labels_upper_bound\", DataType::kFloat32, &labels_upper_bound_);\n\n  LoadVectorField(fi, u8\"feature_names\", DataType::kStr, &feature_names);\n  LoadVectorField(fi, u8\"feature_types\", DataType::kStr, &feature_type_names);\n  LoadVectorField(fi, u8\"feature_weights\", DataType::kFloat32, &feature_weights);\n\n  this->has_categorical_ = LoadFeatureType(feature_type_names, &feature_types.HostVector());\n\n  std::vector<char> values;\n  LoadVectorField(fi, u8\"cats\", DataType::kStr, &values);\n  auto jcats = Json::Load(StringView{values.data(), values.size()}, std::ios::binary);\n  this->cats_->Load(jcats);\n}\n\nnamespace {\ntemplate <typename T>\nstd::vector<T> Gather(const std::vector<T>& in, common::Span<bst_idx_t const> ridxs,\n                      size_t stride = 1) {\n  if (in.empty()) {\n    return {};\n  }\n  auto size = ridxs.size();\n  std::vector<T> out(size * stride);\n  for (auto i = 0ull; i < size; i++) {\n    auto ridx = ridxs[i];\n    for (size_t j = 0; j < stride; ++j) {\n      out[i * stride + j] = in[ridx * stride + j];\n    }\n  }\n  return out;\n}\n}  // namespace\n\nnamespace cuda_impl {\nvoid SliceMetaInfo(Context const* ctx, MetaInfo const& info, common::Span<bst_idx_t const> ridx,\n                   MetaInfo* p_out);\n#if !defined(XGBOOST_USE_CUDA)\nvoid SliceMetaInfo(Context const*, MetaInfo const&, common::Span<bst_idx_t const>, MetaInfo*) {\n  common::AssertGPUSupport();\n}\n#endif\n}  // namespace cuda_impl\n\nMetaInfo MetaInfo::Slice(Context const* ctx, common::Span<bst_idx_t const> ridxs,\n                         bst_idx_t nnz) const {\n  /**\n   * Shape\n   */\n  MetaInfo out;\n  out.num_row_ = ridxs.size();\n  out.num_col_ = this->num_col_;\n  out.num_nonzero_ = nnz;\n\n  /**\n   * Feature Info\n   */\n  out.feature_weights.SetDevice(ctx->Device());\n  out.feature_weights.Resize(this->feature_weights.Size());\n  out.feature_weights.Copy(this->feature_weights);\n\n  out.feature_names = this->feature_names;\n\n  out.feature_types.SetDevice(ctx->Device());\n  out.feature_types.Resize(this->feature_types.Size());\n  out.feature_types.Copy(this->feature_types);\n\n  out.feature_type_names = this->feature_type_names;\n\n  /**\n   * Sample Info\n   */\n  if (ctx->IsCUDA()) {\n    cuda_impl::SliceMetaInfo(ctx, *this, ridxs, &out);\n    return out;\n  }\n\n  // Groups is maintained by a higher level Python function.  We should aim at deprecating\n  // the slice function.\n  if (this->labels.Size() != this->num_row_) {\n    auto t_labels = this->labels.View(this->labels.Data()->Device());\n    out.labels.Reshape(ridxs.size(), labels.Shape(1));\n    out.labels.Data()->HostVector() =\n        Gather(this->labels.Data()->HostVector(), ridxs, t_labels.Stride(0));\n  } else {\n    out.labels.ModifyInplace([&](auto* data, common::Span<size_t, 2> shape) {\n      data->HostVector() = Gather(this->labels.Data()->HostVector(), ridxs);\n      shape[0] = data->Size();\n      shape[1] = 1;\n    });\n  }\n\n  out.labels_upper_bound_.HostVector() = Gather(this->labels_upper_bound_.HostVector(), ridxs);\n  out.labels_lower_bound_.HostVector() = Gather(this->labels_lower_bound_.HostVector(), ridxs);\n  // weights\n  if (this->weights_.Size() + 1 == this->group_ptr_.size()) {\n    auto& h_weights = out.weights_.HostVector();\n    // Assuming all groups are available.\n    out.weights_.HostVector() = h_weights;\n  } else {\n    out.weights_.HostVector() = Gather(this->weights_.HostVector(), ridxs);\n  }\n\n  if (this->base_margin_.Size() != this->num_row_) {\n    CHECK_EQ(this->base_margin_.Size() % this->num_row_, 0)\n        << \"Incorrect size of base margin vector.\";\n    auto t_margin = this->base_margin_.View(this->base_margin_.Data()->Device());\n    out.base_margin_.Reshape(ridxs.size(), t_margin.Shape(1));\n    out.base_margin_.Data()->HostVector() =\n        Gather(this->base_margin_.Data()->HostVector(), ridxs, t_margin.Stride(0));\n  } else {\n    out.base_margin_.ModifyInplace([&](auto* data, common::Span<size_t, 2> shape) {\n      data->HostVector() = Gather(this->base_margin_.Data()->HostVector(), ridxs);\n      shape[0] = data->Size();\n      shape[1] = 1;\n    });\n  }\n\n  return out;\n}\n\nMetaInfo MetaInfo::Copy() const {\n  MetaInfo out;\n  out.Extend(*this, /*accumulate_rows=*/true, /*check_column=*/false);\n  return out;\n}\n\nnamespace {\ntemplate <int32_t D, typename T>\nvoid CopyTensorInfoImpl(Context const* ctx, Json arr_interface, linalg::Tensor<T, D>* p_out) {\n  ArrayInterface<D> array{arr_interface};\n  if (array.n == 0) {\n    p_out->Reshape(array.shape);\n    return;\n  }\n  CHECK_EQ(array.valid.Capacity(), 0)\n      << \"Meta info like label or weight can not have missing value.\";\n  if (array.is_contiguous && array.type == ToDType<T>::kType) {\n    // Handle contigious\n    p_out->ModifyInplace([&](HostDeviceVector<T>* data, common::Span<size_t, D> shape) {\n      // set shape\n      std::copy(array.shape, array.shape + D, shape.data());\n      // set data\n      data->Resize(array.n);\n      std::memcpy(data->HostPointer(), array.data, array.n * sizeof(T));\n    });\n    return;\n  }\n  p_out->Reshape(array.shape);\n  auto t_out = p_out->View(DeviceOrd::CPU());\n  CHECK(t_out.CContiguous());\n  auto const shape = t_out.Shape();\n  DispatchDType(array, DeviceOrd::CPU(), [&](auto&& in) {\n    linalg::cpu_impl::TransformIdxKernel(t_out, ctx->Threads(), [&](auto i, auto) {\n      return std::apply(in, linalg::UnravelIndex<D>(i, shape));\n    });\n  });\n}\n\nvoid ReshapeInfo(bst_idx_t n_samples, linalg::Matrix<float>* p_info, StringView name) {\n  if (n_samples != 0 && p_info->Shape(0) != n_samples) {\n    // API functions that don't use array interface don't understand shape.\n    CHECK_EQ(p_info->Size() % n_samples, 0)\n        << \"Invalid size for `\" << name << \"`:(\" << p_info->Shape(0) << \",\" << p_info->Shape(1)\n        << \"). n_samples:\" << n_samples;\n    std::size_t n_groups = p_info->Size() / n_samples;\n    p_info->Reshape(n_samples, n_groups);\n  }\n}\n}  // namespace\n\nvoid MetaInfo::SetInfo(Context const& ctx, StringView key, StringView in_array) {\n  Json j_interface = Json::Load(in_array);\n  bool is_cuda{false};\n  if (IsA<Array>(j_interface)) {\n    auto const& array = get<Array const>(j_interface);\n    CHECK_GE(array.size(), 0) << \"Invalid \" << key\n                              << \", must have at least 1 column even if it's empty.\";\n    auto const& first = get<Object const>(array.front());\n    auto ptr = ArrayInterfaceHandler::GetPtrFromArrayData<void*>(first);\n    is_cuda = first.find(\"stream\") != first.cend() || ArrayInterfaceHandler::IsCudaPtr(ptr);\n  } else {\n    auto const& first = get<Object const>(j_interface);\n    auto ptr = ArrayInterfaceHandler::GetPtrFromArrayData<void*>(first);\n    is_cuda = first.find(\"stream\") != first.cend() || ArrayInterfaceHandler::IsCudaPtr(ptr);\n  }\n\n  if (is_cuda) {\n    this->SetInfoFromCUDA(&ctx, key, j_interface);\n  } else {\n    this->SetInfoFromHost(&ctx, key, j_interface);\n  }\n}\n\nvoid MetaInfo::SetInfoFromHost(Context const* ctx, StringView key, Json arr) {\n  // multi-dim float info\n  using xgboost::data::MetaField;\n  auto copy_vec = [&](HostDeviceVector<float>* p_out) {\n    linalg::Tensor<float, 1> t;\n    CopyTensorInfoImpl<1>(ctx, arr, &t);\n    *p_out = std::move(*t.Data());\n  };\n  switch (data::MapMetaField(key, true)) {\n    case MetaField::kLabel: {\n      CopyTensorInfoImpl(ctx, arr, &this->labels);\n      ReshapeInfo(this->num_row_, &this->labels, \"label\");\n      auto const& h_labels = labels.Data()->ConstHostVector();\n      auto valid = std::none_of(h_labels.cbegin(), h_labels.cend(), data::LabelsCheck{});\n      CHECK(valid) << \"Label contains NaN, infinity or a value too large.\";\n      break;\n    }\n    case MetaField::kWeight: {\n      copy_vec(&this->weights_);\n      auto const& h_weights = this->weights_.ConstHostVector();\n      auto valid = std::none_of(h_weights.cbegin(), h_weights.cend(),\n                                [](float w) { return w < 0 || std::isinf(w) || std::isnan(w); });\n      CHECK(valid) << \"Weights must be positive values.\";\n      break;\n    }\n    case MetaField::kBaseMargin: {\n      CopyTensorInfoImpl(ctx, arr, &this->base_margin_);\n      ReshapeInfo(this->num_row_, &this->base_margin_, \"base_margin\");\n      break;\n    }\n    case MetaField::kLabelLowerBound: {\n      copy_vec(&this->labels_lower_bound_);\n      break;\n    }\n    case MetaField::kLabelUpperBound: {\n      copy_vec(&this->labels_upper_bound_);\n      break;\n    }\n    case MetaField::kFeatureWeights: {\n      copy_vec(&this->feature_weights);\n      auto const& h_feature_weights = feature_weights.ConstHostVector();\n      bool valid =\n          std::none_of(h_feature_weights.cbegin(), h_feature_weights.cend(), data::WeightsCheck{});\n      CHECK(valid) << \"Feature weight must be greater than 0.\";\n      break;\n    }\n    case MetaField::kGroupPtr: {\n      linalg::Vector<bst_group_t> t;\n      CopyTensorInfoImpl(ctx, arr, &t);\n      auto const& h_groups = t.Data()->HostVector();\n      group_ptr_.clear();\n      group_ptr_.resize(h_groups.size() + 1, 0);\n      group_ptr_[0] = 0;\n      std::partial_sum(h_groups.cbegin(), h_groups.cend(), group_ptr_.begin() + 1);\n      data::ValidateQueryGroup(group_ptr_);\n      break;\n    }\n    case MetaField::kQid: {\n      linalg::Tensor<bst_group_t, 1> t;\n      CopyTensorInfoImpl(ctx, arr, &t);\n      bool non_dec = true;\n      auto const& query_ids = t.Data()->HostVector();\n      for (std::size_t i = 1; i < query_ids.size(); ++i) {\n        if (query_ids[i] < query_ids[i - 1]) {\n          non_dec = false;\n          break;\n        }\n      }\n      CHECK(non_dec) << \"`qid` must be sorted in non-decreasing order along with data.\";\n      common::RunLengthEncode(query_ids.cbegin(), query_ids.cend(), &group_ptr_);\n      data::ValidateQueryGroup(group_ptr_);\n      break;\n    }\n  }\n}\n\n[[nodiscard]] TypedArrayRef MetaInfo::GetInfo(Context const* ctx, StringView key) const {\n  (void)ctx;  // TODO(jiamingy): Return the data in device memory.\n  auto get_vec_aif = [](HostDeviceVector<float> const& vec) {\n    auto hv = vec.ConstHostSpan();\n    return TypedArrayRef{DataType::kFloat32, TypedArrayRef::Shape{hv.size(), 1},\n                         TypedArrayRef::SizeType{1}, hv.data()};\n  };\n  auto get_mat_aif = [get_vec_aif](linalg::Matrix<float> const& mat) {\n    if (mat.Shape(1) <= 1) {\n      // Compatible with old XGBoost when we didn't have matrix info.\n      return get_vec_aif(*mat.Data());\n    } else {\n      auto hv = mat.HostView();\n      return TypedArrayRef{DataType::kFloat32, TypedArrayRef::Shape{hv.Shape(0), hv.Shape(1)},\n                           TypedArrayRef::SizeType{2}, hv.Values().data()};\n    }\n  };\n\n  using xgboost::data::MetaField;\n\n  switch (data::MapMetaField(key, false)) {\n    case MetaField::kLabel: {\n      return get_mat_aif(this->labels);\n    }\n    case MetaField::kWeight: {\n      return get_vec_aif(this->weights_);\n    }\n    case MetaField::kBaseMargin: {\n      return get_mat_aif(this->base_margin_);\n    }\n    case MetaField::kLabelLowerBound: {\n      return get_vec_aif(this->labels_lower_bound_);\n    }\n    case MetaField::kLabelUpperBound: {\n      return get_vec_aif(this->labels_upper_bound_);\n    }\n    case MetaField::kFeatureWeights: {\n      return get_vec_aif(this->feature_weights);\n    }\n    case MetaField::kGroupPtr: {\n      auto const& gptr = this->group_ptr_;\n      return TypedArrayRef{DataType::kUInt32, TypedArrayRef::Shape{gptr.size()},\n                           TypedArrayRef::SizeType{1}, gptr.data()};\n    }\n    case MetaField::kQid: {\n      LOG(FATAL) << \"Retrieving `qid` is not supported; use `group_ptr` instead.\";\n      break;\n    }\n    default: {\n      LOG(FATAL) << \"Unknown field name: \" << key;\n    }\n  }\n  error::Unreachable();\n  return {};\n}\n\nvoid MetaInfo::SetFeatureInfo(const char* key, const char** info, const bst_ulong size) {\n  bool is_col_split = this->IsColumnSplit();\n\n  if (size != 0 && this->num_col_ != 0 && !is_col_split) {\n    CHECK_EQ(size, this->num_col_) << \"Length of \" << key << \" must be equal to number of columns.\";\n    CHECK(info);\n  }\n\n  // Gather column info when data is split by columns\n  auto gather_columns = [is_col_split, key, n_columns = this->num_col_](auto const& inputs) {\n    if (is_col_split) {\n      std::remove_const_t<std::remove_reference_t<decltype(inputs)>> result;\n      auto rc = collective::AllgatherStrings(inputs, &result);\n      collective::SafeColl(rc);\n      CHECK_EQ(result.size(), n_columns)\n          << \"Length of \" << key << \" must be equal to number of columns.\";\n      return result;\n    }\n    return inputs;\n  };\n\n  if (StringView{key} == \"feature_type\") {  // NOLINT\n    this->feature_type_names.clear();\n    std::copy(info, info + size, std::back_inserter(feature_type_names));\n    feature_type_names = gather_columns(feature_type_names);\n    auto& h_feature_types = feature_types.HostVector();\n    this->has_categorical_ = LoadFeatureType(feature_type_names, &h_feature_types);\n  } else if (StringView{key} == \"feature_name\") {  // NOLINT\n    feature_names.clear();\n    if (is_col_split) {\n      auto const rank = collective::GetRank();\n      std::transform(info, info + size, std::back_inserter(feature_names),\n                     [rank](char const* elem) { return std::to_string(rank) + \".\" + elem; });\n    } else {\n      std::copy(info, info + size, std::back_inserter(feature_names));\n    }\n    feature_names = gather_columns(feature_names);\n  } else {\n    LOG(FATAL) << \"Unknown feature info name: \" << key;\n  }\n}\n\nvoid MetaInfo::GetFeatureInfo(const char* field, std::vector<std::string>* out_str_vecs) const {\n  auto& str_vecs = *out_str_vecs;\n  if (!std::strcmp(field, \"feature_type\")) {\n    str_vecs.resize(feature_type_names.size());\n    std::copy(feature_type_names.cbegin(), feature_type_names.cend(), str_vecs.begin());\n  } else if (!strcmp(field, \"feature_name\")) {\n    str_vecs.resize(feature_names.size());\n    std::copy(feature_names.begin(), feature_names.end(), str_vecs.begin());\n  } else {\n    LOG(FATAL) << \"Unknown feature info: \" << field;\n  }\n}\n\nvoid MetaInfo::Extend(MetaInfo const& that, bool accumulate_rows, bool check_column) {\n  /**\n   * shape\n   */\n  if (accumulate_rows) {\n    this->num_row_ += that.num_row_;\n  }\n  if (this->num_col_ != 0) {\n    if (check_column) {\n      CHECK_EQ(this->num_col_, that.num_col_)\n          << \"Number of columns must be consistent across batches.\";\n    } else {\n      this->num_col_ = std::max(this->num_col_, that.num_col_);\n    }\n  }\n  this->num_col_ = that.num_col_;\n\n  /**\n   * info with n_samples\n   */\n  linalg::Stack(&this->labels, that.labels);\n\n  this->weights_.SetDevice(that.weights_.Device());\n  this->weights_.Extend(that.weights_);\n\n  this->labels_lower_bound_.SetDevice(that.labels_lower_bound_.Device());\n  this->labels_lower_bound_.Extend(that.labels_lower_bound_);\n\n  this->labels_upper_bound_.SetDevice(that.labels_upper_bound_.Device());\n  this->labels_upper_bound_.Extend(that.labels_upper_bound_);\n\n  linalg::Stack(&this->base_margin_, that.base_margin_);\n\n  /**\n   * group\n   */\n  if (this->group_ptr_.size() == 0) {\n    this->group_ptr_ = that.group_ptr_;\n  } else {\n    CHECK_NE(that.group_ptr_.size(), 0);\n    auto group_ptr = that.group_ptr_;\n    for (size_t i = 1; i < group_ptr.size(); ++i) {\n      group_ptr[i] += this->group_ptr_.back();\n    }\n    this->group_ptr_.insert(this->group_ptr_.end(), group_ptr.begin() + 1, group_ptr.end());\n  }\n\n  /**\n   * info with n_features\n   */\n  if (!that.feature_names.empty()) {\n    this->feature_names = that.feature_names;\n  }\n\n  if (!this->feature_types.Empty()) {\n    data::CheckFeatureTypes(this->feature_types, that.feature_types);\n  }\n\n  if (!that.feature_type_names.empty()) {\n    this->feature_type_names = that.feature_type_names;\n    auto& h_feature_types = feature_types.HostVector();\n    this->has_categorical_ = LoadFeatureType(this->feature_type_names, &h_feature_types);\n  } else if (!that.feature_types.Empty()) {\n    // FIXME(jiamingy): https://github.com/dmlc/xgboost/pull/9171/files#r1440188612\n    this->feature_types.Resize(that.feature_types.Size());\n    this->feature_types.Copy(that.feature_types);\n    auto const& ft = this->feature_types.ConstHostVector();\n    this->has_categorical_ = std::any_of(ft.cbegin(), ft.cend(), common::IsCatOp{});\n  }\n\n  if (!that.feature_weights.Empty()) {\n    this->feature_weights.Resize(that.feature_weights.Size());\n    this->feature_weights.SetDevice(that.feature_weights.Device());\n    this->feature_weights.Copy(that.feature_weights);\n  }\n}\n\nvoid MetaInfo::SynchronizeNumberOfColumns(Context const* ctx, DataSplitMode split_mode) {\n  this->data_split_mode = split_mode;\n  auto op = IsColumnSplit() ? collective::Op::kSum : collective::Op::kMax;\n  auto rc = collective::Allreduce(ctx, linalg::MakeVec(&num_col_, 1), op);\n  collective::SafeColl(rc);\n}\n\nnamespace {\ntemplate <typename T>\nvoid CheckDevice(DeviceOrd device, HostDeviceVector<T> const& v) {\n  bool valid = v.Device().IsCPU() || device.IsCPU() || v.Device() == device;\n  if (!valid) {\n    LOG(FATAL) << \"Invalid device ordinal. Data is associated with a different device ordinal than \"\n                  \"the booster. The device ordinal of the data is: \"\n               << v.Device() << \"; the device ordinal of the Booster is: \" << device;\n  }\n}\n\ntemplate <typename T, std::int32_t D>\nvoid CheckDevice(DeviceOrd device, linalg::Tensor<T, D> const& v) {\n  CheckDevice(device, *v.Data());\n}\n}  // anonymous namespace\n\nvoid MetaInfo::Validate(DeviceOrd device) const {\n  if (group_ptr_.size() != 0 && weights_.Size() != 0) {\n    CHECK_EQ(group_ptr_.size(), weights_.Size() + 1) << error::GroupWeight();\n    return;\n  }\n  if (group_ptr_.size() != 0) {\n    CHECK_EQ(group_ptr_.back(), num_row_)\n        << error::GroupSize() << \"the actual number of rows given by data.\";\n  }\n\n  if (weights_.Size() != 0) {\n    CHECK_EQ(weights_.Size(), num_row_) << \"Size of weights must equal to number of rows.\";\n    CheckDevice(device, weights_);\n    return;\n  }\n  if (labels.Size() != 0) {\n    CHECK_EQ(labels.Shape(0), num_row_) << \"Size of labels must equal to number of rows.\";\n    CheckDevice(device, labels);\n    return;\n  }\n  if (labels_lower_bound_.Size() != 0) {\n    CHECK_EQ(labels_lower_bound_.Size(), num_row_)\n        << \"Size of label_lower_bound must equal to number of rows.\";\n    CheckDevice(device, labels_lower_bound_);\n    return;\n  }\n  if (feature_weights.Size() != 0) {\n    CHECK_EQ(feature_weights.Size(), num_col_)\n        << \"Size of feature_weights must equal to number of columns.\";\n    CheckDevice(device, feature_weights);\n  }\n  if (labels_upper_bound_.Size() != 0) {\n    CHECK_EQ(labels_upper_bound_.Size(), num_row_)\n        << \"Size of label_upper_bound must equal to number of rows.\";\n    CheckDevice(device, labels_upper_bound_);\n    return;\n  }\n  CHECK_LE(num_nonzero_, num_col_ * num_row_);\n  if (base_margin_.Size() != 0) {\n    CHECK_EQ(base_margin_.Size() % num_row_, 0)\n        << \"Size of base margin must be a multiple of number of rows.\";\n    CheckDevice(device, base_margin_);\n  }\n}\n\n#if !defined(XGBOOST_USE_CUDA)\nvoid MetaInfo::SetInfoFromCUDA(Context const*, StringView, Json) { common::AssertGPUSupport(); }\n#endif  // !defined(XGBOOST_USE_CUDA)\n\nbool MetaInfo::IsVerticalFederated() const { return collective::IsFederated() && IsColumnSplit(); }\n\nbool MetaInfo::ShouldHaveLabels() const {\n  return !IsVerticalFederated() || collective::GetRank() == 0;\n}\n\n[[nodiscard]] CatContainer const* MetaInfo::Cats() const { return this->cats_.get(); }\n[[nodiscard]] CatContainer* MetaInfo::Cats() { return this->cats_.get(); }\n\n[[nodiscard]] std::shared_ptr<CatContainer const> MetaInfo::CatsShared() const {\n  return this->cats_;\n}\n\nvoid MetaInfo::Cats(std::shared_ptr<CatContainer> cats) {\n  this->cats_ = std::move(cats);\n  CHECK_LT(cats_->NumCatsTotal(),\n           static_cast<decltype(cats->NumCatsTotal())>(std::numeric_limits<bst_cat_t>::max()));\n}\n\nusing DMatrixThreadLocal = dmlc::ThreadLocalStore<std::map<DMatrix const*, XGBAPIThreadLocalEntry>>;\n\nXGBAPIThreadLocalEntry& DMatrix::GetThreadLocal() const {\n  return (*DMatrixThreadLocal::Get())[this];\n}\n\nDMatrix::~DMatrix() {\n  auto local_map = DMatrixThreadLocal::Get();\n  if (local_map->find(this) != local_map->cend()) {\n    local_map->erase(this);\n  }\n}\n\nnamespace {\nDMatrix* TryLoadBinary(std::string fname, bool silent) {\n  std::int32_t magic;\n  std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), \"r\", true));\n  if (fi != nullptr) {\n    common::PeekableInStream is(fi.get());\n    if (is.PeekRead(&magic, sizeof(magic)) == sizeof(magic)) {\n      if (!DMLC_IO_NO_ENDIAN_SWAP) {\n        dmlc::ByteSwap(&magic, sizeof(magic), 1);\n      }\n      if (magic == data::SimpleDMatrix::kMagic) {\n        DMatrix* dmat = new data::SimpleDMatrix(&is);\n        if (!silent) {\n          LOG(INFO) << dmat->Info().num_row_ << 'x' << dmat->Info().num_col_ << \" matrix with \"\n                    << dmat->Info().num_nonzero_ << \" entries loaded from \" << fname;\n        }\n        return dmat;\n      }\n    }\n  }\n  return nullptr;\n}\n}  // namespace\n\nDMatrix* DMatrix::Load(const std::string& uri, bool silent, DataSplitMode data_split_mode) {\n  auto dlm_pos = uri.find('#');\n  CHECK(dlm_pos == std::string::npos)\n      << \"External memory training with text input has been removed.\";\n  std::string fname = uri;\n\n  // legacy handling of binary data loading\n  DMatrix* loaded = TryLoadBinary(fname, silent);\n  if (loaded) {\n    return loaded;\n  }\n\n  int partid = 0, npart = 1;\n\n  static std::once_flag warning_flag;\n  std::call_once(warning_flag,\n                 []() { LOG(WARNING) << \"Text file input has been deprecated since 3.1\"; });\n\n  fname = data::ValidateFileFormat(fname);\n  std::unique_ptr<dmlc::Parser<std::uint32_t>> parser(\n      dmlc::Parser<std::uint32_t>::Create(fname.c_str(), partid, npart, \"auto\"));\n  data::FileAdapter adapter(parser.get());\n  return DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), Context{}.Threads(), \"\",\n                         data_split_mode);\n}\n\ntemplate <typename DataIterHandle, typename DMatrixHandle, typename DataIterResetCallback,\n          typename XGDMatrixCallbackNext>\nDMatrix* DMatrix::Create(DataIterHandle iter, DMatrixHandle proxy, std::shared_ptr<DMatrix> ref,\n                         DataIterResetCallback* reset, XGDMatrixCallbackNext* next, float missing,\n                         int nthread, bst_bin_t max_bin) {\n  return new data::IterativeDMatrix(iter, proxy, ref, reset, next, missing, nthread, max_bin);\n}\n\ntemplate <typename DataIterHandle, typename DMatrixHandle, typename DataIterResetCallback,\n          typename XGDMatrixCallbackNext>\nDMatrix* DMatrix::Create(DataIterHandle iter, DMatrixHandle proxy, DataIterResetCallback* reset,\n                         XGDMatrixCallbackNext* next, ExtMemConfig const& config) {\n  return new data::SparsePageDMatrix{iter, proxy, reset, next, config};\n}\n\ntemplate <typename DataIterHandle, typename DMatrixHandle, typename DataIterResetCallback,\n          typename XGDMatrixCallbackNext>\nDMatrix* DMatrix::Create(DataIterHandle iter, DMatrixHandle proxy, std::shared_ptr<DMatrix> ref,\n                         DataIterResetCallback* reset, XGDMatrixCallbackNext* next,\n                         bst_bin_t max_bin, ExtMemConfig const& config) {\n  return new data::ExtMemQuantileDMatrix{iter, proxy, ref, reset, next, max_bin, config};\n}\n\ntemplate DMatrix* DMatrix::Create<DataIterHandle, DMatrixHandle, DataIterResetCallback,\n                                  XGDMatrixCallbackNext>(DataIterHandle iter, DMatrixHandle proxy,\n                                                         std::shared_ptr<DMatrix> ref,\n                                                         DataIterResetCallback* reset,\n                                                         XGDMatrixCallbackNext* next, float missing,\n                                                         int nthread, int max_bin);\n\ntemplate DMatrix* DMatrix::Create<DataIterHandle, DMatrixHandle, DataIterResetCallback,\n                                  XGDMatrixCallbackNext>(DataIterHandle iter, DMatrixHandle proxy,\n                                                         DataIterResetCallback* reset,\n                                                         XGDMatrixCallbackNext* next,\n                                                         ExtMemConfig const&);\n\ntemplate DMatrix*\nDMatrix::Create<DataIterHandle, DMatrixHandle, DataIterResetCallback, XGDMatrixCallbackNext>(\n    DataIterHandle, DMatrixHandle, std::shared_ptr<DMatrix>, DataIterResetCallback*,\n    XGDMatrixCallbackNext*, bst_bin_t, ExtMemConfig const&);\n\ntemplate <typename AdapterT>\nDMatrix* DMatrix::Create(AdapterT* adapter, float missing, int nthread, const std::string&,\n                         DataSplitMode data_split_mode) {\n  return new data::SimpleDMatrix(adapter, missing, nthread, data_split_mode);\n}\n\n// Instantiate the factory function for various adapters\n#define INSTANTIATION_CREATE(_AdapterT)                               \\\n  template DMatrix* DMatrix::Create<data::_AdapterT>(                 \\\n      data::_AdapterT * adapter, float missing, std::int32_t nthread, \\\n      const std::string& cache_prefix, DataSplitMode data_split_mode);\n\nINSTANTIATION_CREATE(DenseAdapter)\nINSTANTIATION_CREATE(ArrayAdapter)\nINSTANTIATION_CREATE(FileAdapter)\nINSTANTIATION_CREATE(CSRArrayAdapter)\nINSTANTIATION_CREATE(CSCArrayAdapter)\nINSTANTIATION_CREATE(ColumnarAdapter)\n\n#undef INSTANTIATION_CREATE\n\ntemplate DMatrix* DMatrix::Create(\n    data::IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>* adapter,\n    float missing, int nthread, std::string const& cache_prefix, DataSplitMode data_split_mode);\n\nSparsePage SparsePage::GetTranspose(int num_columns, int32_t n_threads) const {\n  SparsePage transpose;\n  common::ParallelGroupBuilder<Entry, bst_idx_t> builder(&transpose.offset.HostVector(),\n                                                         &transpose.data.HostVector());\n  builder.InitBudget(num_columns, n_threads);\n  long batch_size = static_cast<long>(this->Size());  // NOLINT(*)\n  auto page = this->GetView();\n  common::ParallelFor(batch_size, n_threads, [&](long i) {  // NOLINT(*)\n    int tid = omp_get_thread_num();\n    auto inst = page[i];\n    for (const auto& entry : inst) {\n      builder.AddBudget(entry.index, tid);\n    }\n  });\n  builder.InitStorage();\n  common::ParallelFor(batch_size, n_threads, [&](long i) {  // NOLINT(*)\n    int tid = omp_get_thread_num();\n    auto inst = page[i];\n    for (const auto& entry : inst) {\n      builder.Push(entry.index, Entry(static_cast<bst_uint>(this->base_rowid + i), entry.fvalue),\n                   tid);\n    }\n  });\n\n  if (this->data.Empty()) {\n    transpose.offset.Resize(num_columns + 1);\n    transpose.offset.Fill(0);\n  }\n  CHECK_EQ(transpose.offset.Size(), num_columns + 1);\n  return transpose;\n}\n\nbool SparsePage::IsIndicesSorted(int32_t n_threads) const {\n  auto& h_offset = this->offset.HostVector();\n  auto& h_data = this->data.HostVector();\n  n_threads = std::max(std::min(static_cast<std::size_t>(n_threads), this->Size()),\n                       static_cast<std::size_t>(1));\n  std::vector<int32_t> is_sorted_tloc(n_threads, 0);\n  common::ParallelFor(this->Size(), n_threads, [&](auto i) {\n    auto beg = h_offset[i];\n    auto end = h_offset[i + 1];\n    is_sorted_tloc[omp_get_thread_num()] +=\n        !!std::is_sorted(h_data.begin() + beg, h_data.begin() + end, Entry::CmpIndex);\n  });\n  auto is_sorted = std::accumulate(is_sorted_tloc.cbegin(), is_sorted_tloc.cend(),\n                                   static_cast<size_t>(0)) == this->Size();\n  return is_sorted;\n}\n\nvoid SparsePage::SortIndices(int32_t n_threads) {\n  auto& h_offset = this->offset.HostVector();\n  auto& h_data = this->data.HostVector();\n\n  common::ParallelFor(this->Size(), n_threads, [&](auto i) {\n    auto beg = h_offset[i];\n    auto end = h_offset[i + 1];\n    std::sort(h_data.begin() + beg, h_data.begin() + end, Entry::CmpIndex);\n  });\n}\n\nvoid SparsePage::Reindex(uint64_t feature_offset, int32_t n_threads) {\n  auto& h_data = this->data.HostVector();\n  common::ParallelFor(h_data.size(), n_threads, [&](auto i) { h_data[i].index += feature_offset; });\n}\n\nvoid SparsePage::SortRows(int32_t n_threads) {\n  auto& h_offset = this->offset.HostVector();\n  auto& h_data = this->data.HostVector();\n  common::ParallelFor(this->Size(), n_threads, [&](auto i) {\n    if (h_offset[i] < h_offset[i + 1]) {\n      std::sort(h_data.begin() + h_offset[i], h_data.begin() + h_offset[i + 1], Entry::CmpValue);\n    }\n  });\n}\n\nvoid SparsePage::Push(const SparsePage& batch) {\n  auto& data_vec = data.HostVector();\n  auto& offset_vec = offset.HostVector();\n  const auto& batch_offset_vec = batch.offset.HostVector();\n  const auto& batch_data_vec = batch.data.HostVector();\n  size_t top = offset_vec.back();\n  data_vec.resize(top + batch.data.Size());\n  if (dmlc::BeginPtr(data_vec) && dmlc::BeginPtr(batch_data_vec)) {\n    std::memcpy(dmlc::BeginPtr(data_vec) + top, dmlc::BeginPtr(batch_data_vec),\n                sizeof(Entry) * batch.data.Size());\n  }\n  size_t begin = offset.Size();\n  offset_vec.resize(begin + batch.Size());\n  for (size_t i = 0; i < batch.Size(); ++i) {\n    offset_vec[i + begin] = top + batch_offset_vec[i + 1];\n  }\n}\n\ntemplate <typename AdapterBatchT>\nbst_idx_t SparsePage::Push(AdapterBatchT const& batch, float missing, std::int32_t nthread) {\n  constexpr bool kIsRowMajor = AdapterBatchT::kIsRowMajor;\n  // Allow threading only for row-major case as column-major requires O(nthread*batch_size) memory\n  nthread = kIsRowMajor ? nthread : 1;\n  if (!kIsRowMajor) {\n    CHECK_EQ(nthread, 1);\n  }\n  auto& offset_vec = offset.HostVector();\n  auto& data_vec = data.HostVector();\n\n  size_t builder_base_row_offset = this->Size();\n  common::ParallelGroupBuilder<Entry, std::remove_reference<decltype(offset_vec)>::type::value_type,\n                               kIsRowMajor>\n      builder(&offset_vec, &data_vec, builder_base_row_offset);\n  // Estimate expected number of rows by using last element in batch\n  // This is not required to be exact but prevents unnecessary resizing\n  size_t expected_rows = 0;\n  if (batch.Size() > 0) {\n    auto last_line = batch.GetLine(batch.Size() - 1);\n    if (last_line.Size() > 0) {\n      expected_rows = last_line.GetElement(last_line.Size() - 1).row_idx - base_rowid;\n    }\n  }\n  size_t batch_size = batch.Size();\n  expected_rows = kIsRowMajor ? batch_size : expected_rows;\n  uint64_t max_columns = 0;\n  if (batch_size == 0) {\n    return max_columns;\n  }\n  const size_t thread_size = batch_size / nthread;\n\n  builder.InitBudget(expected_rows, nthread);\n  std::vector<std::vector<uint64_t>> max_columns_vector(nthread, std::vector<uint64_t>{0});\n  std::atomic<bool> valid{true};\n  // First-pass over the batch counting valid elements\n  common::ParallelFor(static_cast<std::size_t>(nthread), nthread, [&](std::size_t tid) {\n    std::size_t begin = tid * thread_size;\n    std::size_t end =\n        tid != static_cast<std::size_t>(nthread - 1) ? (tid + 1) * thread_size : batch_size;\n    std::uint64_t& max_columns_local = max_columns_vector[tid][0];\n\n    for (size_t i = begin; i < end; ++i) {\n      auto line = batch.GetLine(i);\n      for (auto j = 0ull; j < line.Size(); j++) {\n        data::COOTuple const& element = line.GetElement(j);\n        if (!std::isinf(missing) && std::isinf(element.value)) {\n          valid = false;\n        }\n        const size_t key = element.row_idx - base_rowid;\n        CHECK_GE(key, builder_base_row_offset);\n        max_columns_local =\n            std::max(max_columns_local, static_cast<uint64_t>(element.column_idx + 1));\n\n        if (!common::CheckNAN(element.value) && element.value != missing) {\n          // Adapter row index is absolute, here we want it relative to\n          // current page\n          builder.AddBudget(key, static_cast<int>(tid));\n        }\n      }\n    }\n  });\n  CHECK(valid) << error::InfInData();\n  for (const auto& max : max_columns_vector) {\n    max_columns = std::max(max_columns, max[0]);\n  }\n\n  builder.InitStorage();\n\n  // Second pass over batch, placing elements in correct position\n  auto is_valid = data::IsValidFunctor{missing};\n  common::ParallelFor(static_cast<std::size_t>(nthread), nthread, [&](std::size_t tid) {\n    std::size_t begin = tid * thread_size;\n    std::size_t end =\n        tid != static_cast<std::size_t>(nthread - 1) ? (tid + 1) * thread_size : batch_size;\n    for (std::size_t i = begin; i < end; ++i) {\n      auto line = batch.GetLine(i);\n      for (auto j = 0ull; j < line.Size(); j++) {\n        auto element = line.GetElement(j);\n        const size_t key = (element.row_idx - base_rowid);\n        if (is_valid(element)) {\n          builder.Push(key, Entry(element.column_idx, element.value),\n                       static_cast<std::int32_t>(tid));\n        }\n      }\n    }\n  });\n  return max_columns;\n}\n\nvoid SparsePage::PushCSC(const SparsePage& batch) {\n  std::vector<xgboost::Entry>& self_data = data.HostVector();\n  std::vector<bst_idx_t>& self_offset = offset.HostVector();\n\n  auto const& other_data = batch.data.ConstHostVector();\n  auto const& other_offset = batch.offset.ConstHostVector();\n\n  if (other_data.empty()) {\n    self_offset = other_offset;\n    return;\n  }\n  if (!self_data.empty()) {\n    CHECK_EQ(self_offset.size(), other_offset.size())\n        << \"self_data.size(): \" << this->data.Size() << \", \"\n        << \"other_data.size(): \" << other_data.size() << std::flush;\n  } else {\n    self_data = other_data;\n    self_offset = other_offset;\n    return;\n  }\n\n  std::vector<bst_idx_t> offset(other_offset.size());\n  offset[0] = 0;\n\n  std::vector<xgboost::Entry> data(self_data.size() + other_data.size());\n\n  // n_cols in original csr data matrix, here in csc is n_rows\n  size_t const n_features = other_offset.size() - 1;\n  size_t beg = 0;\n  size_t ptr = 1;\n  for (size_t i = 0; i < n_features; ++i) {\n    size_t const self_beg = self_offset.at(i);\n    size_t const self_length = self_offset.at(i + 1) - self_beg;\n    // It is possible that the current feature and further features aren't referenced\n    // in any rows accumulated thus far. It is also possible for this to happen\n    // in the current sparse page row batch as well.\n    // Hence, the incremental number of rows may stay constant thus equaling the data size\n    CHECK_LE(beg, data.size());\n    std::memcpy(dmlc::BeginPtr(data) + beg, dmlc::BeginPtr(self_data) + self_beg,\n                sizeof(Entry) * self_length);\n    beg += self_length;\n\n    size_t const other_beg = other_offset.at(i);\n    size_t const other_length = other_offset.at(i + 1) - other_beg;\n    CHECK_LE(beg, data.size());\n    std::memcpy(dmlc::BeginPtr(data) + beg, dmlc::BeginPtr(other_data) + other_beg,\n                sizeof(Entry) * other_length);\n    beg += other_length;\n\n    CHECK_LT(ptr, offset.size());\n    offset.at(ptr) = beg;\n    ptr++;\n  }\n\n  self_data = std::move(data);\n  self_offset = std::move(offset);\n}\n\n#define INSTANTIATE_PUSH(__BATCH_T)                                                    \\\n  template std::uint64_t SparsePage::Push(const data::__BATCH_T& batch, float missing, \\\n                                          std::int32_t nthread);\n\nINSTANTIATE_PUSH(DenseAdapterBatch)\nINSTANTIATE_PUSH(ArrayAdapterBatch)\nINSTANTIATE_PUSH(CSRArrayAdapterBatch)\nINSTANTIATE_PUSH(CSCArrayAdapterBatch)\nINSTANTIATE_PUSH(FileAdapterBatch)\nINSTANTIATE_PUSH(ColumnarAdapterBatch)\nINSTANTIATE_PUSH(EncColumnarAdapterBatch)\n\n#undef INSTANTIATE_PUSH\n\nnamespace data {\n// List of files that will be force linked in static links.\nDMLC_REGISTRY_LINK_TAG(sparse_page_raw_format);\nDMLC_REGISTRY_LINK_TAG(gradient_index_format);\n}  // namespace data\n}  // namespace xgboost\n"
  },
  {
    "path": "src/data/data.cu",
    "content": "/**\n * Copyright 2019-2026, XGBoost Contributors\n */\n#include <thrust/gather.h>   // for gather\n#include <thrust/logical.h>  // for none_of\n\n#include \"../common/algorithm.cuh\"  // for RunLengthEncode\n#include \"../common/cuda_context.cuh\"\n#include \"../common/device_helpers.cuh\"\n#include \"../common/linalg_op.cuh\"\n#include \"array_interface.h\"\n#include \"device_adapter.cuh\"  // for CudfAdapter, CupyAdapter\n#include \"metainfo.h\"          // for LabelsCheck, WeightsCheck, ValidateQueryGroup\n#include \"simple_dmatrix.h\"\n#include \"xgboost/data.h\"\n#include \"xgboost/json.h\"\n#include \"xgboost/logging.h\"\n\nnamespace xgboost {\nnamespace {\nauto SetDeviceToPtr(void const* ptr) {\n  cudaPointerAttributes attr;\n  dh::safe_cuda(cudaPointerGetAttributes(&attr, ptr));\n  int32_t ptr_device = attr.device;\n  dh::safe_cuda(cudaSetDevice(ptr_device));\n  return ptr_device;\n}\n\ntemplate <typename T, int32_t D>\nvoid CopyTensorInfoImpl(Context const* ctx, Json arr_interface, linalg::Tensor<T, D>* p_out) {\n  ArrayInterface<D> array(arr_interface);\n  if (array.n == 0) {\n    p_out->SetDevice(DeviceOrd::CUDA(0));\n    p_out->Reshape(array.shape);\n    return;\n  }\n  CHECK_EQ(array.valid.Capacity(), 0)\n      << \"Meta info like label or weight can not have missing value.\";\n  auto ptr_device = DeviceOrd::CUDA(SetDeviceToPtr(array.data));\n  p_out->SetDevice(ptr_device);\n\n  if (array.is_contiguous && array.type == ToDType<T>::kType) {\n    p_out->ModifyInplace([&](HostDeviceVector<T>* data, common::Span<size_t, D> shape) {\n      // set shape\n      std::copy(array.shape, array.shape + D, shape.data());\n      // set data\n      data->Resize(array.n);\n      dh::safe_cuda(cudaMemcpyAsync(data->DevicePointer(), array.data, array.n * sizeof(T),\n                                    cudaMemcpyDefault, ctx->CUDACtx()->Stream()));\n    });\n    return;\n  }\n  p_out->Reshape(array.shape);\n  auto t = p_out->View(ptr_device);\n  linalg::cuda_impl::TransformIdxKernel(ctx, t, [=] XGBOOST_DEVICE(std::size_t i, T) {\n    return std::apply(TypedIndex<T, D>{array}, linalg::UnravelIndex<D>(i, array.shape));\n  });\n}\n\nvoid CopyGroupInfoImpl(ArrayInterface<1> column, std::vector<bst_group_t>* out) {\n  CHECK(column.type != ArrayInterfaceHandler::kF4 && column.type != ArrayInterfaceHandler::kF8)\n      << \"Expected integer for group info.\";\n\n  auto ptr_device = SetDeviceToPtr(column.data);\n  CHECK_EQ(ptr_device, dh::CurrentDevice());\n  dh::TemporaryArray<bst_group_t> temp(column.Shape<0>());\n  auto d_tmp = temp.data().get();\n\n  dh::LaunchN(column.Shape<0>(),\n              [=] __device__(size_t idx) { d_tmp[idx] = TypedIndex<size_t, 1>{column}(idx); });\n  auto length = column.Shape<0>();\n  out->resize(length + 1);\n  out->at(0) = 0;\n  thrust::copy(temp.data(), temp.data() + length, out->begin() + 1);\n  std::partial_sum(out->begin(), out->end(), out->begin());\n}\n\nvoid CopyQidImpl(Context const* ctx, ArrayInterface<1> array_interface,\n                 std::vector<bst_group_t>* p_group_ptr) {\n  auto& group_ptr_ = *p_group_ptr;\n  auto it = dh::MakeTransformIterator<uint32_t>(\n      thrust::make_counting_iterator(0ul), [array_interface] __device__(size_t i) {\n        return TypedIndex<uint32_t, 1>{array_interface}(i);\n      });\n  dh::caching_device_vector<bool> flag(1);\n  auto d_flag = dh::ToSpan(flag);\n  auto d = DeviceOrd::CUDA(SetDeviceToPtr(array_interface.data));\n  auto cuctx = ctx->CUDACtx();\n  dh::LaunchN(1, cuctx->Stream(), [=] __device__(size_t) { d_flag[0] = true; });\n  dh::LaunchN(array_interface.Shape<0>() - 1, cuctx->Stream(), [=] __device__(size_t i) {\n    auto typed = TypedIndex<uint32_t, 1>{array_interface};\n    if (typed(i) > typed(i + 1)) {\n      d_flag[0] = false;\n    }\n  });\n  bool non_dec = true;\n  dh::safe_cuda(cudaMemcpy(&non_dec, flag.data().get(), sizeof(bool), cudaMemcpyDeviceToHost));\n  CHECK(non_dec) << \"`qid` must be sorted in increasing order along with data.\";\n\n  dh::caching_device_vector<uint32_t> out(array_interface.Shape<0>());\n  dh::caching_device_vector<uint32_t> cnt(array_interface.Shape<0>());\n  HostDeviceVector<int> d_num_runs_out(1, 0, d);\n\n  common::RunLengthEncode(cuctx->Stream(), it, out.begin(), cnt.begin(),\n                          d_num_runs_out.DevicePointer(), array_interface.Shape<0>());\n\n  auto h_num_runs_out = d_num_runs_out.HostSpan()[0];\n  group_ptr_.clear();\n  group_ptr_.resize(h_num_runs_out + 1, 0);\n  thrust::inclusive_scan(cuctx->CTP(), cnt.begin(), cnt.begin() + h_num_runs_out, cnt.begin());\n  thrust::copy(cnt.begin(), cnt.begin() + h_num_runs_out, group_ptr_.begin() + 1);\n}\n}  // namespace\n\nvoid MetaInfo::SetInfoFromCUDA(Context const* ctx, StringView key, Json array) {\n  // multi-dim float info\n  auto cuctx = ctx->CUDACtx();\n  using xgboost::data::MetaField;\n  auto copy_vec = [&](HostDeviceVector<float>* p_out) {\n    linalg::Tensor<float, 1> t;\n    CopyTensorInfoImpl(ctx, array, &t);\n    *p_out = std::move(*t.Data());\n  };\n\n  switch (data::MapMetaField(key, true)) {\n    case MetaField::kLabel: {\n      CopyTensorInfoImpl(ctx, array, &labels);\n      auto ptr = labels.Data()->ConstDevicePointer();\n      auto valid = thrust::none_of(cuctx->CTP(), ptr, ptr + labels.Size(), data::LabelsCheck{});\n      CHECK(valid) << \"Label contains NaN, infinity or a value too large.\";\n      break;\n    }\n    case MetaField::kWeight: {\n      copy_vec(&this->weights_);\n      auto ptr = weights_.ConstDevicePointer();\n      auto valid = thrust::none_of(cuctx->CTP(), ptr, ptr + weights_.Size(), data::WeightsCheck{});\n      CHECK(valid) << \"Weights must be positive values.\";\n      break;\n    }\n    case MetaField::kBaseMargin: {\n      CopyTensorInfoImpl(ctx, array, &base_margin_);\n      break;\n    }\n    case MetaField::kLabelLowerBound: {\n      copy_vec(&this->labels_lower_bound_);\n      break;\n    }\n    case MetaField::kLabelUpperBound: {\n      copy_vec(&this->labels_upper_bound_);\n      break;\n    }\n    case MetaField::kFeatureWeights: {\n      copy_vec(&this->feature_weights);\n      auto d_feature_weights = feature_weights.ConstDeviceSpan();\n      auto valid = thrust::none_of(cuctx->CTP(), d_feature_weights.data(),\n                                   d_feature_weights.data() + d_feature_weights.size(),\n                                   data::WeightsCheck{});\n      CHECK(valid) << \"Feature weight must be greater than 0.\";\n      break;\n    }\n    case MetaField::kGroupPtr: {\n      ArrayInterface<1> array_interface{array};\n      CopyGroupInfoImpl(array_interface, &group_ptr_);\n      data::ValidateQueryGroup(group_ptr_);\n      break;\n    }\n    case MetaField::kQid: {\n      ArrayInterface<1> array_interface{array};\n      CopyQidImpl(ctx, array_interface, &group_ptr_);\n      data::ValidateQueryGroup(group_ptr_);\n      break;\n    }\n    default: {\n      LOG(FATAL) << \"Unknown field name: \" << key;\n    }\n  }\n}\n\nnamespace {\nvoid Gather(Context const* ctx, linalg::MatrixView<float const> in,\n            common::Span<bst_idx_t const> ridx, linalg::Matrix<float>* p_out) {\n  if (in.Empty()) {\n    return;\n  }\n  auto& out = *p_out;\n  out.Reshape(ridx.size(), in.Shape(1));\n  auto d_out = out.View(ctx->Device());\n\n  auto cuctx = ctx->CUDACtx();\n  auto map_it = thrust::make_transform_iterator(thrust::make_counting_iterator(0ull),\n                                                [=] XGBOOST_DEVICE(bst_idx_t i) {\n                                                  auto [r, c] = linalg::UnravelIndex(i, in.Shape());\n                                                  return (ridx[r] * in.Shape(1)) + c;\n                                                });\n  CHECK_NE(in.Shape(1), 0);\n  thrust::gather(cuctx->TP(), map_it, map_it + out.Size(), linalg::tcbegin(in),\n                 linalg::tbegin(d_out));\n}\n\ntemplate <typename T>\nvoid Gather(Context const* ctx, HostDeviceVector<T> const& in, common::Span<bst_idx_t const> ridx,\n            HostDeviceVector<T>* p_out) {\n  if (in.Empty()) {\n    return;\n  }\n  in.SetDevice(ctx->Device());\n\n  auto& out = *p_out;\n  out.SetDevice(ctx->Device());\n  out.Resize(ridx.size());\n  auto d_out = out.DeviceSpan();\n\n  auto cuctx = ctx->CUDACtx();\n  auto d_in = in.ConstDeviceSpan();\n  thrust::gather(cuctx->TP(), dh::tcbegin(ridx), dh::tcend(ridx), dh::tcbegin(d_in),\n                 dh::tbegin(d_out));\n}\n}  // anonymous namespace\n\nnamespace cuda_impl {\nvoid SliceMetaInfo(Context const* ctx, MetaInfo const& info, common::Span<bst_idx_t const> ridx,\n                   MetaInfo* p_out) {\n  auto& out = *p_out;\n\n  Gather(ctx, info.labels.View(ctx->Device()), ridx, &p_out->labels);\n  Gather(ctx, info.base_margin_.View(ctx->Device()), ridx, &p_out->base_margin_);\n\n  Gather(ctx, info.labels_lower_bound_, ridx, &out.labels_lower_bound_);\n  Gather(ctx, info.labels_upper_bound_, ridx, &out.labels_upper_bound_);\n\n  Gather(ctx, info.weights_, ridx, &out.weights_);\n}\n}  // namespace cuda_impl\n\ntemplate <typename AdapterT>\nDMatrix* DMatrix::Create(AdapterT* adapter, float missing, int nthread,\n                         const std::string& cache_prefix, DataSplitMode data_split_mode) {\n  CHECK_EQ(cache_prefix.size(), 0)\n      << \"Device memory construction is not currently supported with external \"\n         \"memory.\";\n  return new data::SimpleDMatrix(adapter, missing, nthread, data_split_mode);\n}\n\ntemplate DMatrix* DMatrix::Create<data::CudfAdapter>(data::CudfAdapter* adapter, float missing,\n                                                     int nthread, const std::string& cache_prefix,\n                                                     DataSplitMode data_split_mode);\ntemplate DMatrix* DMatrix::Create<data::CupyAdapter>(data::CupyAdapter* adapter, float missing,\n                                                     int nthread, const std::string& cache_prefix,\n                                                     DataSplitMode data_split_mode);\n}  // namespace xgboost\n"
  },
  {
    "path": "src/data/device_adapter.cu",
    "content": "/**\n * Copyright 2019-2025, XGBoost Contributors\n */\n#include \"../common/cuda_rt_utils.h\"  // for SetDevice, CurrentDevice\n#include \"columnar.h\"                 // for GetRefCats, GetArrowDictionary\n#include \"device_adapter.cuh\"\n\nnamespace xgboost::data {\nnamespace {\nauto GetRefCats(Context const* ctx, Json handle,\n                std::vector<enc::DeviceCatIndexView>* p_h_ref_cats) {\n  auto& h_ref_cats = *p_h_ref_cats;\n  auto cats = reinterpret_cast<CatContainer const*>(get<Integer const>(handle));\n  CHECK(cats);\n  auto d_cats = cats->DeviceView(ctx);\n  // FIXME(jiamingy): Remove this along with the host copy in the cat container once\n  // cuDF can return device-only data.\n  h_ref_cats.resize(d_cats.columns.size());\n  thrust::copy(dh::tcbegin(d_cats.columns), dh::tcend(d_cats.columns), h_ref_cats.begin());\n  d_cats.columns = common::Span{h_ref_cats};\n  return d_cats;\n}\n}  // anonymous namespace\n\nCudfAdapter::CudfAdapter(StringView cuda_arrinf) {\n  Json jdf = Json::Load(cuda_arrinf);\n\n  if (IsA<Object>(jdf)) {\n    // Has reference categories.\n    auto ctx = Context{}.MakeCUDA(curt::CurrentDevice());\n    this->ref_cats_ = GetRefCats(&ctx, jdf[\"ref_categories\"], &this->h_ref_cats_);\n    jdf = jdf[\"columns\"];\n  }\n\n  std::vector<Json> const& jcolumns = get<Array>(jdf);\n  std::size_t n_columns = jcolumns.size();\n  CHECK_GT(n_columns, 0) << \"The number of columns must not equal to 0.\";\n\n  std::vector<ArrayInterface<1>> columns;\n  std::vector<std::int32_t> cat_segments{0};\n  std::int32_t device = -1;\n  for (auto const& jcol : jcolumns) {\n    std::int32_t n_cats{0};\n    if (IsA<Array>(jcol)) {\n      // This is a dictionary type (categorical values).\n      auto const& first = get<Object const>(jcol[0]);\n      if (first.find(\"offsets\") == first.cend()) {\n        // numeric index\n        if (device == -1) {\n          auto const& first = get<Object const>(jcol[0]);\n          auto names = ArrayInterface<1>{first};\n          device = dh::CudaGetPointerDevice(names.data);\n        }\n        n_cats = GetArrowNumericIndex(DeviceOrd::CUDA(device), jcol, &cats_, &columns, &n_bytes_,\n                                      &num_rows_);\n      } else {\n        // string index\n        n_cats = GetArrowDictionary(jcol, &cats_, &columns, &n_bytes_, &num_rows_);\n      }\n    } else {\n      // Numeric values\n      auto col = ArrayInterface<1>(get<Object const>(jcol));\n      columns.push_back(col);\n      this->cats_.emplace_back();\n      this->num_rows_ = std::max(num_rows_, col.Shape<0>());\n      CHECK_EQ(num_rows_, col.Shape<0>()) << \"All columns should have the same number of rows.\";\n      n_bytes_ += col.ElementSize() * col.Shape<0>();\n    }\n    cat_segments.emplace_back(n_cats);\n    if (device == -1) {\n      device = dh::CudaGetPointerDevice(columns.back().data);\n    }\n    CHECK_EQ(device, dh::CudaGetPointerDevice(columns.back().data))\n        << \"All columns should use the same device.\";\n  }\n  // Categories\n  std::partial_sum(cat_segments.cbegin(), cat_segments.cend(), cat_segments.begin());\n  this->n_total_cats_ = cat_segments.back();\n  this->cat_segments_ = std::move(cat_segments);\n  this->d_cats_ = this->cats_;  // thrust copy\n\n  CHECK(!columns.empty());\n  if (device < 0) {\n    // Empty dataset\n    CHECK_EQ(columns.front().Shape<0>(), 0);\n    device_ = DeviceOrd::CUDA(curt::CurrentDevice());\n  } else {\n    device_ = DeviceOrd::CUDA(device);\n  }\n  CHECK(device_.IsCUDA());\n  curt::SetDevice(device_.ordinal);\n\n  this->columns_ = columns;\n  batch_ = CudfAdapterBatch(dh::ToSpan(columns_), NoOpAccessor{}, num_rows_);\n\n  if (!this->ref_cats_.Empty()) {\n    CHECK_EQ(this->ref_cats_.Size(), this->columns_.size())\n        << \"Invalid reference categories, different number of columns\";\n  }\n}\n}  // namespace xgboost::data\n"
  },
  {
    "path": "src/data/device_adapter.cuh",
    "content": "/**\n * Copyright 2019-2025, XGBoost Contributors\n * @file device_adapter.cuh\n */\n#ifndef XGBOOST_DATA_DEVICE_ADAPTER_H_\n#define XGBOOST_DATA_DEVICE_ADAPTER_H_\n\n#include <thrust/functional.h>                   // for maximum\n\n#include <cstddef>           // for size_t\n#include <cuda/std/variant>  // for variant\n#include <limits>            // for numeric_limits\n#include <memory>            // for make_unique\n#include <string>            // for string\n\n#include \"../common/algorithm.cuh\"  // for AllOf\n#include \"../common/cuda_context.cuh\"\n#include \"../common/device_helpers.cuh\"\n#include \"adapter.h\"\n#include \"array_interface.h\"\n#include \"cat_container.cuh\"      // for MakeCatAccessor\n#include \"xgboost/string_view.h\"  // for StringView\n\nnamespace xgboost::data {\ntemplate <typename EncAccessor>\nclass EncCudfAdapterBatchImpl : public detail::NoMetaInfo {\n private:\n  common::Span<ArrayInterface<1> const> columns_;\n  bst_idx_t n_samples_{0};\n  EncAccessor acc_;\n\n public:\n  EncCudfAdapterBatchImpl() = default;\n  EncCudfAdapterBatchImpl(common::Span<ArrayInterface<1> const> columns, EncAccessor acc,\n                          bst_idx_t n_samples)\n      : columns_(columns), n_samples_(n_samples), acc_{std::move(acc)} {}\n  [[nodiscard]] std::size_t Size() const { return n_samples_ * columns_.size(); }\n  [[nodiscard]] __device__ __forceinline__ COOTuple GetElement(bst_idx_t idx) const {\n    auto column_idx = idx % columns_.size();\n    auto row_idx = idx / columns_.size();\n    auto value = this->GetElement(row_idx, column_idx);\n    return {row_idx, column_idx, value};\n  }\n\n  [[nodiscard]] __device__ float GetElement(bst_idx_t ridx, bst_feature_t fidx) const {\n    auto const& column = columns_[fidx];\n    float value = column.valid.Data() == nullptr || column.valid.Check(ridx)\n                      ? column(ridx)\n                      : std::numeric_limits<float>::quiet_NaN();\n    return acc_(value, fidx);\n  }\n\n  [[nodiscard]] XGBOOST_DEVICE bst_idx_t NumRows() const { return n_samples_; }\n  [[nodiscard]] XGBOOST_DEVICE bst_idx_t NumCols() const { return columns_.size(); }\n  [[nodiscard]] common::Span<ArrayInterface<1> const> Columns() const { return this->columns_; }\n};\n\nusing CudfAdapterBatch = EncCudfAdapterBatchImpl<NoOpAccessor>;\nusing EncCudfAdapterBatch = EncCudfAdapterBatchImpl<CatAccessor>;\n\n/**\n * @brief Device columnar format. We call it cuDF, but it's just arrow-CUDA since cuDF\n * adopts the arrow format.\n *\n * See @ref XGDMatrixCreateFromColumnar for notes\n */\nclass CudfAdapter : public detail::SingleBatchDataIter<CudfAdapterBatch> {\n public:\n  explicit CudfAdapter(StringView cuda_interfaces_str);\n  explicit CudfAdapter(std::string cuda_interfaces_str)\n      : CudfAdapter{StringView{cuda_interfaces_str}} {}\n\n  [[nodiscard]] CudfAdapterBatch const& Value() const override {\n    CHECK_EQ(batch_.Columns().data(), columns_.data().get());\n    return batch_;\n  }\n\n  [[nodiscard]] std::size_t NumRows() const { return num_rows_; }\n  [[nodiscard]] std::size_t NumColumns() const { return columns_.size(); }\n  [[nodiscard]] DeviceOrd Device() const { return device_; }\n  [[nodiscard]] bst_idx_t SizeBytes() const { return this->n_bytes_; }\n\n  [[nodiscard]] enc::DeviceColumnsView Cats() const {\n    return {common::Span{this->cats_}, dh::ToSpan(this->cat_segments_), this->n_total_cats_};\n  }\n  [[nodiscard]] enc::DeviceColumnsView DCats() const {\n    return {dh::ToSpan(this->d_cats_), dh::ToSpan(this->cat_segments_), this->n_total_cats_};\n  }\n  [[nodiscard]] enc::DeviceColumnsView RefCats() const { return ref_cats_; }\n  [[nodiscard]] bool HasCategorical() const { return n_total_cats_ != 0; }\n  [[nodiscard]] bool HasRefCategorical() const { return this->ref_cats_.n_total_cats != 0; }\n\n  [[nodiscard]] common::Span<ArrayInterface<1> const> Columns() const {\n    return dh::ToSpan(this->columns_);\n  }\n\n private:\n  CudfAdapterBatch batch_;\n  dh::device_vector<ArrayInterface<1>> columns_;\n\n  // Categories\n  std::vector<enc::DeviceCatIndexView> cats_;\n  dh::device_vector<enc::DeviceCatIndexView> d_cats_;\n  dh::device_vector<std::int32_t> cat_segments_;\n  std::int32_t n_total_cats_{0};\n\n  enc::DeviceColumnsView ref_cats_;                  // A view to the reference category.\n  std::vector<enc::DeviceCatIndexView> h_ref_cats_;  // host storage for column view\n\n  size_t num_rows_{0};\n  bst_idx_t n_bytes_{0};\n  DeviceOrd device_{DeviceOrd::CPU()};\n};\n\nclass CupyAdapterBatch : public detail::NoMetaInfo {\n public:\n  CupyAdapterBatch() = default;\n  explicit CupyAdapterBatch(ArrayInterface<2> array_interface)\n      : array_interface_(std::move(array_interface)) {}\n  // The total number of elements.\n  [[nodiscard]] std::size_t Size() const {\n    return array_interface_.Shape<0>() * array_interface_.Shape<1>();\n  }\n  [[nodiscard]] __device__ COOTuple GetElement(size_t idx) const {\n    size_t column_idx = idx % array_interface_.Shape<1>();\n    size_t row_idx = idx / array_interface_.Shape<1>();\n    float value = array_interface_(row_idx, column_idx);\n    return {row_idx, column_idx, value};\n  }\n  [[nodiscard]] __device__ float GetElement(bst_idx_t ridx, bst_feature_t fidx) const {\n    float value = array_interface_(ridx, fidx);\n    return value;\n  }\n\n  [[nodiscard]] XGBOOST_DEVICE bst_idx_t NumRows() const { return array_interface_.Shape<0>(); }\n  [[nodiscard]] XGBOOST_DEVICE bst_idx_t NumCols() const { return array_interface_.Shape<1>(); }\n\n private:\n  ArrayInterface<2> array_interface_;\n};\n\ninline auto MakeEncColumnarBatch(Context const* ctx, CudfAdapter const* adapter) {\n  auto cats = std::make_unique<CatContainer>(ctx, adapter->RefCats(), true);\n  cats->Sort(ctx);\n  auto [acc, mapping] = ::xgboost::cuda_impl::MakeCatAccessor(ctx, adapter->DCats(), cats.get());\n  return std::tuple{EncCudfAdapterBatch{adapter->Columns(), acc, adapter->NumRows()},\n                    std::move(mapping)};\n}\n\ninline auto MakeEncColumnarBatch(Context const* ctx, std::shared_ptr<CudfAdapter> const& adapter) {\n  return MakeEncColumnarBatch(ctx, adapter.get());\n}\n\nclass CupyAdapter : public detail::SingleBatchDataIter<CupyAdapterBatch> {\n public:\n  explicit CupyAdapter(StringView cuda_interface_str) {\n    Json json_array_interface = Json::Load(cuda_interface_str);\n    array_interface_ = ArrayInterface<2>(get<Object const>(json_array_interface));\n    batch_ = CupyAdapterBatch(array_interface_);\n    if (array_interface_.Shape<0>() == 0) {\n      return;\n    }\n    device_ = DeviceOrd::CUDA(dh::CudaGetPointerDevice(array_interface_.data));\n    this->n_bytes_ =\n        array_interface_.Shape<0>() * array_interface_.Shape<1>() * array_interface_.ElementSize();\n    CHECK(device_.IsCUDA());\n  }\n  explicit CupyAdapter(std::string cuda_interface_str)\n      : CupyAdapter{StringView{cuda_interface_str}} {}\n  [[nodiscard]] const CupyAdapterBatch& Value() const override { return batch_; }\n\n  [[nodiscard]] std::size_t NumRows() const { return array_interface_.Shape<0>(); }\n  [[nodiscard]] std::size_t NumColumns() const { return array_interface_.Shape<1>(); }\n  [[nodiscard]] DeviceOrd Device() const { return device_; }\n  [[nodiscard]] bst_idx_t SizeBytes() const { return this->n_bytes_; }\n\n private:\n  ArrayInterface<2> array_interface_;\n  CupyAdapterBatch batch_;\n  bst_idx_t n_bytes_{0};\n  DeviceOrd device_{DeviceOrd::CPU()};\n};\n\n// Returns maximum row length\ntemplate <typename AdapterBatchT>\nbst_idx_t GetRowCounts(Context const* ctx, const AdapterBatchT batch,\n                       common::Span<bst_idx_t> offset, DeviceOrd device, float missing) {\n  dh::safe_cuda(cudaSetDevice(device.ordinal));\n  IsValidFunctor is_valid(missing);\n  dh::safe_cuda(\n      cudaMemsetAsync(offset.data(), '\\0', offset.size_bytes(), ctx->CUDACtx()->Stream()));\n\n  auto n_samples = batch.NumRows();\n  bst_feature_t n_features = batch.NumCols();\n\n  // Use more than 1 threads for each row in case of dataset being too wide.\n  bst_feature_t stride{0};\n  if (n_features < 32) {\n    stride = std::min(n_features, 4u);\n  } else if (n_features < 64) {\n    stride = 8;\n  } else if (n_features < 128) {\n    stride = 16;\n  } else {\n    stride = 32;\n  }\n\n  // Count elements per row\n  dh::LaunchN(n_samples * stride, ctx->CUDACtx()->Stream(), [=] __device__(std::size_t idx) {\n    bst_idx_t cnt{0};\n    auto [ridx, fbeg] = linalg::UnravelIndex(idx, n_samples, stride);\n    SPAN_CHECK(ridx < n_samples);\n    for (bst_feature_t fidx = fbeg; fidx < n_features; fidx += stride) {\n      if (is_valid(batch.GetElement(ridx, fidx))) {\n        cnt++;\n      }\n    }\n\n    atomicAdd(reinterpret_cast<unsigned long long*>(  // NOLINT\n                  &offset[ridx]),\n              static_cast<unsigned long long>(cnt));  // NOLINT\n  });\n  bst_idx_t row_stride =\n      dh::Reduce(ctx->CUDACtx()->CTP(), thrust::device_pointer_cast(offset.data()),\n                 thrust::device_pointer_cast(offset.data()) + offset.size(),\n                 static_cast<bst_idx_t>(0), thrust::maximum<bst_idx_t>());\n  return row_stride;\n}\n\n/**\n * @brief Check there's no inf in data.\n */\ntemplate <typename AdapterBatchT>\nbool NoInfInData(Context const* ctx, AdapterBatchT const& batch, IsValidFunctor is_valid) {\n  auto it = dh::MakeIndexTransformIter(\n      [=] XGBOOST_DEVICE(std::size_t idx) { return batch.GetElement(idx).value; });\n  return common::AllOf(ctx->CUDACtx()->CTP(), it, it + batch.Size(), [=] XGBOOST_DEVICE(float v) {\n    if (is_valid(v) && isinf(v)) {\n      return false;\n    }\n    return true;\n  });\n}\n}  // namespace xgboost::data\n#endif  // XGBOOST_DATA_DEVICE_ADAPTER_H_\n"
  },
  {
    "path": "src/data/ellpack_page.cc",
    "content": "/**\n * Copyright 2019-2024, XGBoost contributors\n */\n#ifndef XGBOOST_USE_CUDA\n\n#include \"ellpack_page.h\"\n\n#include <xgboost/data.h>\n\n#include <memory>  // for shared_ptr\n\n// dummy implementation of EllpackPage in case CUDA is not used\nnamespace xgboost {\n\nclass EllpackPageImpl {\n  std::shared_ptr<common::HistogramCuts> cuts_;\n\n public:\n  [[nodiscard]] common::HistogramCuts const& Cuts() const { return *cuts_; }\n  [[nodiscard]] std::shared_ptr<common::HistogramCuts const> CutsShared() const { return cuts_; }\n};\n\nEllpackPage::EllpackPage() = default;\n\nEllpackPage::EllpackPage(Context const*, DMatrix*, const BatchParam&) {\n  LOG(FATAL) << \"Internal Error: XGBoost is not compiled with CUDA but \"\n                \"EllpackPage is required\";\n}\n\nEllpackPage::~EllpackPage() noexcept(false) {\n  LOG(FATAL) << \"Internal Error: XGBoost is not compiled with CUDA but \"\n                \"EllpackPage is required\";\n}\n\nvoid EllpackPage::SetBaseRowId(std::size_t) {\n  LOG(FATAL) << \"Internal Error: XGBoost is not compiled with CUDA but \"\n                \"EllpackPage is required\";\n}\nbst_idx_t EllpackPage::Size() const {\n  LOG(FATAL) << \"Internal Error: XGBoost is not compiled with CUDA but \"\n                \"EllpackPage is required\";\n  return 0;\n}\n\n[[nodiscard]] common::HistogramCuts const& EllpackPage::Cuts() const {\n  LOG(FATAL) << \"Internal Error: XGBoost is not compiled with CUDA but \"\n                \"EllpackPage is required\";\n  return impl_->Cuts();\n}\n\n[[nodiscard]] bst_idx_t EllpackPage::BaseRowId() const {\n  LOG(FATAL) << \"Internal Error: XGBoost is not compiled with CUDA but \"\n                \"EllpackPage is required\";\n  return 0;\n}\n}  // namespace xgboost\n\n#endif  // XGBOOST_USE_CUDA\n"
  },
  {
    "path": "src/data/ellpack_page.cu",
    "content": "/**\n * Copyright 2019-2026, XGBoost contributors\n */\n#include <thrust/binary_search.h>                       // for lower_bound,  upper_bound\n#include <thrust/extrema.h>                             // for max_element\n#include <thrust/iterator/counting_iterator.h>          // for make_counting_iterator\n#include <thrust/iterator/transform_output_iterator.h>  // for transform_output_iterator\n#include <thrust/tuple.h>                               // for tuple\n\n#include <algorithm>          // for copy\n#include <cuda/std/iterator>  // for distance\n#include <limits>             // for numeric_limits\n#include <utility>            // for move\n#include <vector>             // for vector\n\n#include \"../common/algorithm.cuh\"          // for InclusiveScan\n#include \"../common/categorical.h\"          // for IsCat\n#include \"../common/compressed_iterator.h\"  // for CompressedIterator\n#include \"../common/cuda_context.cuh\"       // for CUDAContext\n#include \"../common/cuda_rt_utils.h\"        // for SetDevice\n#include \"../common/cuda_stream.h\"          // for DefaultStream\n#include \"../common/hist_util.cuh\"          // for HistogramCuts\n#include \"../common/ref_resource_view.cuh\"  // for MakeFixedVecWithCudaMalloc\n#include \"../common/transform_iterator.h\"   // for MakeIndexTransformIter\n#include \"device_adapter.cuh\"               // for NoInfInData\n#include \"ellpack_page.cuh\"                 // for EllpackPageImpl\n#include \"ellpack_page.h\"                   // for EllpackPage\n#include \"gradient_index.h\"                 // for GHistIndexMatrix\n#include \"xgboost/context.h\"                // for Context\n#include \"xgboost/data.h\"                   // for DMatrix\n\nnamespace xgboost {\nEllpackPage::EllpackPage() : impl_{new EllpackPageImpl{}} {}\n\nEllpackPage::EllpackPage(Context const* ctx, DMatrix* dmat, const BatchParam& param)\n    : impl_{new EllpackPageImpl{ctx, dmat, param}} {}\n\nEllpackPage::~EllpackPage() noexcept(false) = default;\n\nEllpackPage::EllpackPage(EllpackPage&& that) { std::swap(impl_, that.impl_); }\n\n[[nodiscard]] bst_idx_t EllpackPage::Size() const { return impl_->Size(); }\n\nvoid EllpackPage::SetBaseRowId(std::size_t row_id) { impl_->SetBaseRowId(row_id); }\n\n[[nodiscard]] common::HistogramCuts const& EllpackPage::Cuts() const {\n  CHECK(impl_);\n  return impl_->Cuts();\n}\n\n[[nodiscard]] bst_idx_t EllpackPage::BaseRowId() const { return this->Impl()->base_rowid; }\n\n// Bin each input data entry, store the bin indices in compressed form.\ntemplate <bool HasNoMissing, bool kDenseCompressed>\n__global__ void CompressBinEllpackKernel(\n    common::CompressedBufferWriter wr,\n    common::CompressedByteT* __restrict__ buffer,  // gidx_buffer\n    const size_t* __restrict__ row_ptrs,           // row offset of input data\n    const Entry* __restrict__ entries,             // One batch of input data\n    const float* __restrict__ cuts,                // HistogramCuts::cut_values_\n    const uint32_t* __restrict__ cut_ptrs,         // HistogramCuts::cut_ptrs_\n    common::Span<FeatureType const> feature_types,\n    size_t base_row,  // batch_row_begin\n    size_t n_rows, size_t row_stride, std::uint32_t null_gidx_value) {\n  auto irow = threadIdx.x + blockIdx.x * blockDim.x;\n  auto cpr_fidx = threadIdx.y + blockIdx.y * blockDim.y;  // compressed fidx\n  if (irow >= n_rows || cpr_fidx >= row_stride) {\n    return;\n  }\n  auto row_length = static_cast<decltype(cpr_fidx)>(row_ptrs[irow + 1] - row_ptrs[irow]);\n  std::uint32_t bin = null_gidx_value;\n\n  // When treating a sparse matrix as dense, we need to write null values in between valid\n  // values. But we don't know where to write if the feature index is not recorded for a\n  // missing value. Here we use binary search to ensure `cpr_fidx` is the same as `fidx`.\n  if (kDenseCompressed && !HasNoMissing) {\n    auto row_beg = entries + row_ptrs[irow] - row_ptrs[0];\n    auto row_end = entries + row_ptrs[irow + 1] - row_ptrs[0];\n    auto it = thrust::make_transform_iterator(thrust::make_counting_iterator(0ul),\n                                              [=](std::size_t i) { return row_beg[i].index; });\n    auto it_end = it + cuda::std::distance(row_beg, row_end);\n    auto res_it = thrust::lower_bound(thrust::seq, it, it_end, cpr_fidx);\n    if (res_it == it_end || cpr_fidx != *res_it) {\n      wr.AtomicWriteSymbol(buffer, bin, (irow + base_row) * row_stride + cpr_fidx);\n      return;\n    }\n    cpr_fidx = cuda::std::distance(it, res_it);\n    SPAN_CHECK(cpr_fidx < row_length);\n  }\n\n  if (cpr_fidx < row_length) {\n    // We are using sub-batch of a SparsePage, need to account for the first offset within\n    // the sub-batch.\n    //\n    // The block.y idx is calculated using row_stride, which is the longest row. We can\n    // use `compressed_fidx` to fully index the sparse page row.\n    Entry entry = entries[row_ptrs[irow] - row_ptrs[0] + cpr_fidx];\n\n    bst_feature_t fidx = entry.index;\n    float fvalue = entry.fvalue;\n    // {feature_cuts, n_cuts} forms the array of cuts of the current `feature'.\n    float const* feature_cuts = &cuts[cut_ptrs[fidx]];\n    auto n_cuts = cut_ptrs[fidx + 1] - cut_ptrs[fidx];\n\n    bool is_cat = common::IsCat(feature_types, fidx);\n    // Assigning the bin in current entry.\n    // S.t.: fvalue < feature_cuts[bin]\n    bin = std::numeric_limits<decltype(bin)>::max();\n    if (is_cat) {\n      auto it =\n          dh::MakeTransformIterator<int>(feature_cuts, [](float v) { return common::AsCat(v); });\n      bin = thrust::lower_bound(thrust::seq, it, it + n_cuts, common::AsCat(fvalue)) - it;\n    } else {\n      bin = thrust::upper_bound(thrust::seq, feature_cuts, feature_cuts + n_cuts, fvalue) -\n            feature_cuts;\n    }\n\n    if (bin >= n_cuts) {\n      bin = n_cuts - 1;\n    }\n    if (!kDenseCompressed) {\n      // Sparse data, use the compressed fidx.  Add the number of bins in previous\n      // features since we can't compress it based on feature-local index.\n      bin += cut_ptrs[fidx];\n    } else {\n      // Write to the actual fidx for dense data.\n      cpr_fidx = fidx;\n    }\n  }\n  // Write to the gidx buffer for non-missing values.\n  wr.AtomicWriteSymbol(buffer, bin, (irow + base_row) * row_stride + cpr_fidx);\n}\n\n// Calculate the number of symbols for the compressed ellpack. Similar to what the CPU\n// implementation does, we compress the dense data by subtracting the bin values with the\n// starting bin of its feature if it's dense. In addition, we treat the data as dense if\n// there's no compression to be made by using ellpack.\n[[nodiscard]] EllpackPageImpl::Info CalcNumSymbols(\n    Context const* ctx, bst_idx_t row_stride, bool is_dense,\n    std::shared_ptr<common::HistogramCuts const> cuts) {\n  // Return the total number of symbols (total number of bins plus 1 for missing)\n  // The null value equals the total number of bins.\n  bst_idx_t n_symbols = cuts->TotalBins() + 1;\n  if (n_symbols == 1) {  // Empty DMatrix\n    return {static_cast<bst_feature_t>(0), n_symbols};\n  }\n\n  bst_idx_t n_features = cuts->NumFeatures();\n  cuts->cut_ptrs_.SetDevice(ctx->Device());\n  common::Span<std::uint32_t const> dptrs = cuts->cut_ptrs_.ConstDeviceSpan();\n  using PtrT = typename decltype(dptrs)::value_type;\n\n  // Calculate the number of required symbols if we treat the data as dense.\n  PtrT n_symbols_dense{0};\n  CUDAContext const* cuctx = ctx->CUDACtx();\n  auto it = dh::MakeTransformIterator<PtrT>(\n      thrust::make_counting_iterator(1ul),\n      [=] XGBOOST_DEVICE(std::size_t i) { return dptrs[i] - dptrs[i - 1]; });\n  CHECK_GE(dptrs.size(), 2);\n  auto max_it = thrust::max_element(cuctx->CTP(), it, it + dptrs.size() - 1);\n  dh::CachingDeviceUVector<PtrT> max_element(1);\n  auto d_me = max_element.data();\n  dh::LaunchN(1, cuctx->Stream(), [=] XGBOOST_DEVICE(std::size_t i) { d_me[i] = *max_it; });\n  dh::safe_cuda(cudaMemcpyAsync(&n_symbols_dense, d_me, sizeof(PtrT), cudaMemcpyDeviceToHost,\n                                cuctx->Stream()));\n  cuctx->Stream().Sync();\n  // Decide the type of the data.\n  CHECK_LE(row_stride, n_features);\n  if (is_dense) {\n    // No missing, hence no null value, hence no + 1 symbol.\n    LOG(INFO) << \"Ellpack is dense.\";\n    return {n_features, n_symbols_dense};\n  } else if (n_features == row_stride) {\n    // Treat the ellpack as dense if we can save memory.\n    LOG(INFO) << \"Ellpack is relatively dense.\";\n    return {n_features, n_symbols_dense + 1};  // +1 for missing value (null in ellpack)\n  } else {\n    LOG(INFO) << \"Ellpack is sparse.\";\n    return {row_stride, n_symbols};\n  }\n}\n\n// Construct an ELLPACK matrix with the given number of empty rows.\nEllpackPageImpl::EllpackPageImpl(Context const* ctx,\n                                 std::shared_ptr<common::HistogramCuts const> cuts, bool is_dense,\n                                 bst_idx_t row_stride, bst_idx_t n_rows)\n    : is_dense{is_dense},\n      n_rows{n_rows},\n      cuts_{std::move(cuts)},\n      info{CalcNumSymbols(ctx, row_stride, is_dense, this->cuts_)} {\n  monitor_.Init(\"ellpack_page\");\n  curt::SetDevice(ctx->Ordinal());\n  this->cuts_->SetDevice(ctx->Device());\n\n  this->InitCompressedData(ctx);\n}\n\nEllpackPageImpl::EllpackPageImpl(Context const* ctx,\n                                 std::shared_ptr<common::HistogramCuts const> cuts,\n                                 const SparsePage& page, bool is_dense, size_t row_stride,\n                                 common::Span<FeatureType const> feature_types)\n    : is_dense{is_dense},\n      n_rows{page.Size()},\n      cuts_{std::move(cuts)},\n      info{CalcNumSymbols(ctx, row_stride, is_dense, this->cuts_)} {\n  monitor_.Init(\"ellpack_page\");\n  curt::SetDevice(ctx->Ordinal());\n  this->cuts_->SetDevice(ctx->Device());\n\n  this->InitCompressedData(ctx);\n  this->CreateHistIndices(ctx, page, feature_types);\n}\n\n// Construct an ELLPACK matrix in memory.\nEllpackPageImpl::EllpackPageImpl(Context const* ctx, DMatrix* p_fmat, const BatchParam& param)\n    : is_dense{p_fmat->IsDense()},\n      n_rows{p_fmat->Info().num_row_},\n      // Create the quantile sketches for the dmatrix and initialize HistogramCuts.\n      cuts_{param.hess.empty()\n                ? std::make_shared<common::HistogramCuts>(\n                      common::DeviceSketch(ctx, p_fmat, param.max_bin))\n                : std::make_shared<common::HistogramCuts>(\n                      common::DeviceSketchWithHessian(ctx, p_fmat, param.max_bin, param.hess))},\n      info{CalcNumSymbols(ctx, GetRowStride(p_fmat), p_fmat->IsDense(), this->cuts_)} {\n  monitor_.Init(\"ellpack_page\");\n  curt::SetDevice(ctx->Ordinal());\n\n  this->InitCompressedData(ctx);\n\n  p_fmat->Info().feature_types.SetDevice(ctx->Device());\n  auto ft = p_fmat->Info().feature_types.ConstDeviceSpan();\n  CHECK(p_fmat->SingleColBlock());\n  for (auto const& page : p_fmat->GetBatches<SparsePage>()) {\n    this->CreateHistIndices(ctx, page, ft);\n  }\n}\n\ntemplate <typename AdapterBatchT, typename IterT>\nstruct WriteCompressedEllpackFunctor {\n  WriteCompressedEllpackFunctor(common::CompressedByteT* buffer,\n                                const common::CompressedBufferWriter& writer, AdapterBatchT batch,\n                                EllpackAccessorImpl<IterT> accessor,\n                                common::Span<FeatureType const> feature_types,\n                                const data::IsValidFunctor& is_valid)\n      : d_buffer(buffer),\n        writer(writer),\n        batch(std::move(batch)),\n        accessor(std::move(accessor)),\n        feature_types(std::move(feature_types)),\n        is_valid(is_valid) {}\n\n  common::CompressedByteT* d_buffer;\n  common::CompressedBufferWriter writer;\n  AdapterBatchT batch;\n  EllpackAccessorImpl<IterT> accessor;\n  common::Span<FeatureType const> feature_types;\n  data::IsValidFunctor is_valid;\n\n  // Tuple[0] = The row index of the input, used as a key to define segments\n  // Tuple[1] = Scanned flags of valid elements for each row\n  // Tuple[2] = The index in the input data\n  using Tuple = thrust::tuple<bst_idx_t, bst_idx_t, bst_idx_t>;\n\n  template <bool kIsDenseCompressed>\n  __device__ void Write(data::COOTuple const& e, bst_idx_t out_position) {\n    bst_bin_t bin_idx = 0;\n    if (common::IsCat(feature_types, e.column_idx)) {\n      bin_idx = accessor.template SearchBin<true>(e.value, e.column_idx);\n    } else {\n      bin_idx = accessor.template SearchBin<false>(e.value, e.column_idx);\n    }\n    if constexpr (kIsDenseCompressed) {\n      bin_idx -= accessor.feature_segments[e.column_idx];\n    }\n    writer.AtomicWriteSymbol(d_buffer, bin_idx, out_position);\n  }\n  // Used for dense or as dense data.\n  __device__ void operator()(bst_idx_t i) {\n    auto e = batch.GetElement(i);\n    if (is_valid(e)) {\n      this->Write<true>(e, i);\n    } else {\n      writer.AtomicWriteSymbol(d_buffer, accessor.NullValue(), i);\n    }\n  }\n  // Used for sparse data.\n  __device__ size_t operator()(Tuple const& out) {\n    auto e = batch.GetElement(thrust::get<2>(out));\n    if (is_valid(e)) {\n      // -1 because the scan is inclusive\n      size_t output_position = accessor.row_stride * e.row_idx + thrust::get<1>(out) - 1;\n      this->Write<false>(e, output_position);\n    }\n    return 0;\n  }\n};\n\ntemplate <typename Tuple>\nstruct TupleScanOp {\n  __device__ Tuple operator()(Tuple a, Tuple b) {\n    // Key equal\n    if (thrust::get<0>(a) == thrust::get<0>(b)) {\n      thrust::get<1>(b) += thrust::get<1>(a);\n      return b;\n    }\n    // Not equal\n    return b;\n  }\n};\n\n// Here the data is already correctly ordered and simply needs to be compacted\n// to remove missing data\ntemplate <bool kIsDenseCompressed, typename AdapterBatchT>\nvoid CopyDataToEllpack(Context const* ctx, const AdapterBatchT& batch,\n                       common::Span<FeatureType const> feature_types, EllpackPageImpl* dst,\n                       float missing) {\n  data::IsValidFunctor is_valid(missing);\n  bool valid = data::NoInfInData(ctx, batch, is_valid);\n  CHECK(valid) << error::InfInData();\n\n  auto cnt = thrust::make_counting_iterator(0llu);\n  auto n_symbols = dst->NumSymbols();\n  common::CompressedBufferWriter writer{n_symbols};\n  auto d_compressed_buffer = dst->gidx_buffer.data();\n\n  auto get_ridx = [=] __device__(std::size_t idx) {\n    return batch.GetElement(idx).row_idx;\n  };  // NOLINT\n  auto get_is_valid = [=] __device__(std::size_t idx) -> std::size_t {\n    return is_valid(batch.GetElement(idx));\n  };\n  dst->Visit(ctx, {}, [&](auto&& device_accessor) {\n    using IterT = typename std::remove_reference_t<decltype(device_accessor)>::IterType;\n    // We redirect the scan output into this functor to do the actual writing\n    using Tuple = typename WriteCompressedEllpackFunctor<AdapterBatchT, IterT>::Tuple;\n    dh::TypedDiscard<Tuple> discard;\n    WriteCompressedEllpackFunctor<AdapterBatchT, IterT> functor{\n        d_compressed_buffer, writer, batch, device_accessor, feature_types, is_valid};\n    // For dense compressed data, we can simply copy the data with the input position.\n    if (kIsDenseCompressed) {\n      CHECK(batch.NumRows() == 0 || batch.NumCols() == dst->info.row_stride);\n      thrust::for_each_n(ctx->CUDACtx()->CTP(), cnt, dst->Size() * dst->info.row_stride, functor);\n      return;\n    }\n\n    // Some witchcraft happens here.\n    //\n    // The goal is to copy valid elements out of the input to an ELLPACK matrix with a given\n    // row stride, using no extra working memory Standard stream compaction needs to be\n    // modified to do this, so we manually define a segmented stream compaction via\n    // operators on an inclusive scan. The output of this inclusive scan is fed to a custom\n    // function which works out the correct output position\n    auto key_iter = dh::MakeTransformIterator<size_t>(cnt, get_ridx);\n    auto value_iter = dh::MakeTransformIterator<size_t>(cnt, get_is_valid);\n\n    auto key_value_index_iter =\n        thrust::make_zip_iterator(thrust::make_tuple(key_iter, value_iter, cnt));\n    thrust::transform_output_iterator<decltype(functor), decltype(discard)> out(discard, functor);\n    common::InclusiveScan(ctx, key_value_index_iter, out, TupleScanOp<Tuple>{}, batch.Size());\n  });\n}\n\nvoid WriteNullValues(Context const* ctx, EllpackPageImpl* dst,\n                     common::Span<size_t const> row_counts) {\n  // Write the null values\n  auto null = dst->NullValue();\n  common::CompressedBufferWriter writer(dst->NumSymbols());\n  auto d_compressed_buffer = dst->gidx_buffer.data();\n  auto row_stride = dst->info.row_stride;\n  auto n = row_stride * dst->n_rows;\n  dh::LaunchN(n, ctx->CUDACtx()->Stream(), [=] __device__(bst_idx_t idx) mutable {\n    size_t row_idx = idx / row_stride;\n    size_t row_offset = idx % row_stride;\n    if (row_offset >= row_counts[row_idx]) {\n      writer.AtomicWriteSymbol(d_compressed_buffer, null, idx);\n    }\n  });\n}\n\ntemplate <typename AdapterBatch>\nEllpackPageImpl::EllpackPageImpl(Context const* ctx, AdapterBatch batch, float missing,\n                                 bool is_dense, common::Span<bst_idx_t const> row_counts,\n                                 common::Span<FeatureType const> feature_types,\n                                 bst_idx_t row_stride, bst_idx_t n_rows,\n                                 std::shared_ptr<common::HistogramCuts const> cuts)\n    : EllpackPageImpl{ctx, cuts, is_dense, row_stride, n_rows} {\n  curt::SetDevice(ctx->Ordinal());\n\n  if (this->IsDenseCompressed()) {\n    CopyDataToEllpack<true>(ctx, batch, feature_types, this, missing);\n  } else {\n    CopyDataToEllpack<false>(ctx, batch, feature_types, this, missing);\n    WriteNullValues(ctx, this, row_counts);\n  }\n}\n\n#define ELLPACK_BATCH_SPECIALIZE(__BATCH_T)                                                  \\\n  template EllpackPageImpl::EllpackPageImpl(                                                 \\\n      Context const* ctx, __BATCH_T batch, float missing, bool is_dense,                     \\\n      common::Span<bst_idx_t const> row_counts_span,                                         \\\n      common::Span<FeatureType const> feature_types, bst_idx_t row_stride, bst_idx_t n_rows, \\\n      std::shared_ptr<common::HistogramCuts const> cuts);\n\nELLPACK_BATCH_SPECIALIZE(data::CudfAdapterBatch)\nELLPACK_BATCH_SPECIALIZE(data::EncCudfAdapterBatch)\nELLPACK_BATCH_SPECIALIZE(data::CupyAdapterBatch)\n\n#undef ELLPACK_BATCH_SPECIALIZE\n\nnamespace {\ntemplate <typename T>\nvoid CopyGHistToEllpack(Context const* ctx, GHistIndexMatrix const& page,\n                        common::Span<bst_idx_t const> d_row_ptr, bst_idx_t row_stride,\n                        bst_bin_t null, bst_idx_t n_symbols,\n                        common::Span<bst_feature_t const> d_cut_ptrs,\n                        common::CompressedByteT* d_compressed_buffer) {\n  dh::device_vector<uint8_t> data(page.index.begin(), page.index.end());\n  auto d_data = dh::ToSpan(data);\n\n  // GPU employs the same dense compression as CPU, no need to handle page.index.Offset()\n  auto bin_type = page.index.GetBinTypeSize();\n  common::CompressedBufferWriter writer{n_symbols};\n  auto cuctx = ctx->CUDACtx();\n\n  bool dense_compress = row_stride == page.Features() && !page.IsDense();\n  auto n_samples = page.Size();\n  auto cnt = thrust::make_counting_iterator(0ul);\n  auto ptr = reinterpret_cast<T const*>(d_data.data());\n  auto fn = [=] __device__(std::size_t i) mutable {\n    auto [ridx, fidx] = linalg::UnravelIndex(i, n_samples, row_stride);\n    auto r_begin = d_row_ptr[ridx];\n    auto r_end = d_row_ptr[ridx + 1];\n    auto r_size = r_end - r_begin;\n\n    bst_bin_t bin_idx;\n    if (dense_compress) {\n      auto f_begin = d_cut_ptrs[fidx];\n      auto f_end = d_cut_ptrs[fidx + 1];\n      // CPU gidx is not compressed, can be used for binary search.\n      bin_idx = common::BinarySearchBin(r_begin, r_end, ptr, f_begin, f_end);\n      if (bin_idx == -1) {\n        bin_idx = null;\n      } else {\n        bin_idx -= d_cut_ptrs[fidx];\n      }\n    } else if (fidx >= r_size) {\n      bin_idx = null;\n    } else {\n      bin_idx = ptr[r_begin + fidx];\n    }\n\n    writer.AtomicWriteSymbol(d_compressed_buffer, bin_idx, i);\n  };\n  thrust::for_each_n(cuctx->CTP(), cnt, row_stride * page.Size(), fn);\n}\n}  // anonymous namespace\n\nEllpackPageImpl::EllpackPageImpl(Context const* ctx, GHistIndexMatrix const& page,\n                                 common::Span<FeatureType const> ft)\n    : is_dense{page.IsDense()},\n      base_rowid{page.base_rowid},\n      n_rows{page.Size()},\n      cuts_{[&] {\n        auto cuts = std::make_shared<common::HistogramCuts>(page.cut);\n        cuts->SetDevice(ctx->Device());\n        return cuts;\n      }()},\n      info{CalcNumSymbols(\n          ctx,\n          [&] {\n            if (page.Size() == 0) {\n              return static_cast<typename decltype(page.row_ptr)::value_type>(0);\n            }\n            CHECK_GE(page.row_ptr.size(), 2);\n            auto it = common::MakeIndexTransformIter(\n                [&](bst_idx_t i) { return page.row_ptr[i + 1] - page.row_ptr[i]; });\n            return *std::max_element(it, it + page.Size());\n          }(),\n          page.IsDense(), cuts_)} {\n  this->monitor_.Init(\"ellpack_page\");\n  CHECK(ctx->IsCUDA());\n  this->InitCompressedData(ctx);\n\n  // copy gidx\n  common::CompressedByteT* d_compressed_buffer = gidx_buffer.data();\n  dh::device_vector<size_t> row_ptr(page.row_ptr.size());\n  auto d_row_ptr = dh::ToSpan(row_ptr);\n  dh::safe_cuda(cudaMemcpyAsync(d_row_ptr.data(), page.row_ptr.data(), d_row_ptr.size_bytes(),\n                                cudaMemcpyHostToDevice, ctx->CUDACtx()->Stream()));\n\n  this->monitor_.Start(\"CopyGHistToEllpack\");\n  this->Visit(ctx, ft, [&](auto&& accessor) {\n    common::DispatchBinType(page.index.GetBinTypeSize(), [&](auto t) {\n      using T = decltype(t);\n      CopyGHistToEllpack<T>(ctx, page, d_row_ptr, this->info.row_stride, accessor.NullValue(),\n                            this->NumSymbols(), this->cuts_->cut_ptrs_.ConstDeviceSpan(),\n                            d_compressed_buffer);\n    });\n  });\n  this->monitor_.Stop(\"CopyGHistToEllpack\");\n}\n\nEllpackPageImpl::~EllpackPageImpl() noexcept(false) {\n  // Sync the stream to make sure all running CUDA kernels finish before deallocation.\n  auto status = curt::DefaultStream().Sync(false);\n  if (status != cudaSuccess) {\n    auto str = cudaGetErrorString(status);\n    // For external-memory, throwing here can trigger a series of calls to\n    // `std::terminate` by various destructors. For now, we just log the error.\n    LOG(WARNING) << \"Ran into CUDA error:\" << str << \"\\nXGBoost is likely to abort.\";\n  }\n  dh::safe_cuda(status);\n}\n\n// A functor that copies the data from one EllpackPage to another.\ntemplate <typename IterT>\nstruct CopyPage {\n  common::CompressedBufferWriter cbw;\n  common::CompressedByteT* dst_data_d;\n  IterT src_iterator_d;\n  // The number of elements to skip.\n  size_t offset;\n\n  CopyPage(EllpackPageImpl* dst, EllpackAccessorImpl<IterT> src, size_t offset)\n      : cbw{dst->NumSymbols()},\n        dst_data_d{dst->gidx_buffer.data()},\n        src_iterator_d{src.gidx_iter},\n        offset{offset} {}\n\n  __device__ void operator()(std::size_t element_id) {\n    cbw.AtomicWriteSymbol(dst_data_d, src_iterator_d[element_id], element_id + offset);\n  }\n};\n\n// Copy the data from the given EllpackPage to the current page.\nbst_idx_t EllpackPageImpl::Copy(Context const* ctx, EllpackPageImpl const* page, bst_idx_t offset) {\n  monitor_.Start(__func__);\n  bst_idx_t n_elements = page->n_rows * page->info.row_stride;\n  CHECK_NE(this, page);\n  CHECK_EQ(this->info.row_stride, page->info.row_stride);\n  CHECK_EQ(this->NumSymbols(), page->NumSymbols());\n  CHECK_GE(this->n_rows * this->info.row_stride, offset + n_elements);\n  page->Visit(ctx, {}, [&](auto&& src) {\n    thrust::for_each_n(ctx->CUDACtx()->CTP(), thrust::make_counting_iterator(0ul), n_elements,\n                       CopyPage{this, src, offset});\n  });\n  monitor_.Stop(__func__);\n  return n_elements;\n}\n\nvoid EllpackPageImpl::SetCuts(std::shared_ptr<common::HistogramCuts const> cuts) {\n  cuts_ = std::move(cuts);\n}\n\n// Initialize the buffer to stored compressed features.\nvoid EllpackPageImpl::InitCompressedData(Context const* ctx) {\n  monitor_.Start(__func__);\n  auto num_symbols = this->NumSymbols();\n  // Required buffer size for storing data matrix in ELLPack format.\n  std::size_t compressed_size_bytes = common::CompressedBufferWriter::CalculateBufferSize(\n      this->info.row_stride * this->n_rows, num_symbols);\n  auto init = static_cast<common::CompressedByteT>(0);\n  gidx_buffer = common::MakeFixedVecWithCudaMalloc(ctx, compressed_size_bytes, init);\n  monitor_.Stop(__func__);\n}\n\n// Compress a CSR page into ELLPACK.\nvoid EllpackPageImpl::CreateHistIndices(Context const* ctx, const SparsePage& row_batch,\n                                        common::Span<FeatureType const> feature_types) {\n  if (row_batch.Size() == 0) {\n    return;\n  }\n\n  this->monitor_.Start(__func__);\n  auto null_gidx_value = this->NullValue();\n\n  auto const& offset_vec = row_batch.offset.ConstHostVector();\n\n  // bin and compress entries in batches of rows\n  size_t gpu_batch_nrows =\n      std::min(curt::TotalMemory() / (16 * this->info.row_stride * sizeof(Entry)),\n               static_cast<size_t>(row_batch.Size()));\n\n  size_t gpu_nbatches = common::DivRoundUp(row_batch.Size(), gpu_batch_nrows);\n  auto writer = common::CompressedBufferWriter{this->NumSymbols()};\n  auto gidx_buffer_data = gidx_buffer.data();\n\n  for (size_t gpu_batch = 0; gpu_batch < gpu_nbatches; ++gpu_batch) {\n    size_t batch_row_begin = gpu_batch * gpu_batch_nrows;\n    size_t batch_row_end = std::min((gpu_batch + 1) * gpu_batch_nrows, row_batch.Size());\n    size_t batch_nrows = batch_row_end - batch_row_begin;\n\n    const auto ent_cnt_begin = offset_vec[batch_row_begin];\n    const auto ent_cnt_end = offset_vec[batch_row_end];\n\n    /*! \\brief row offset in SparsePage (the input data). */\n    using OffT = typename std::remove_reference_t<decltype(offset_vec)>::value_type;\n    dh::DeviceUVector<OffT> row_ptrs(batch_nrows + 1);\n    auto size =\n        std::distance(offset_vec.data() + batch_row_begin, offset_vec.data() + batch_row_end + 1);\n    dh::safe_cuda(cudaMemcpyAsync(row_ptrs.data(), offset_vec.data() + batch_row_begin,\n                                  size * sizeof(OffT), cudaMemcpyDefault,\n                                  ctx->CUDACtx()->Stream()));\n\n    // number of entries in this batch.\n    size_t n_entries = ent_cnt_end - ent_cnt_begin;\n    dh::DeviceUVector<Entry> entries_d(n_entries);\n    // copy data entries to device.\n    if (row_batch.data.DeviceCanRead()) {\n      auto const& d_data = row_batch.data.ConstDeviceSpan();\n      dh::safe_cuda(cudaMemcpyAsync(entries_d.data(), d_data.data() + ent_cnt_begin,\n                                    n_entries * sizeof(Entry), cudaMemcpyDefault,\n                                    ctx->CUDACtx()->Stream()));\n    } else {\n      const std::vector<Entry>& data_vec = row_batch.data.ConstHostVector();\n      dh::safe_cuda(cudaMemcpyAsync(entries_d.data(), data_vec.data() + ent_cnt_begin,\n                                    n_entries * sizeof(Entry), cudaMemcpyDefault,\n                                    ctx->CUDACtx()->Stream()));\n    }\n\n    const dim3 block3(32, 8, 1);  // 256 threads\n    const dim3 grid3(common::DivRoundUp(batch_nrows, block3.x),\n                     common::DivRoundUp(this->info.row_stride, block3.y), 1);\n    auto launcher = [&](auto kernel) {\n      this->Visit(ctx, {}, [&](auto&& device_accessor) {\n        dh::LaunchKernel{grid3, block3, 0, ctx->CUDACtx()->Stream()}(  // NOLINT\n            kernel, writer, gidx_buffer_data, row_ptrs.data(), entries_d.data(),\n            device_accessor.gidx_fvalue_map.data(), device_accessor.feature_segments, feature_types,\n            batch_row_begin, batch_nrows, this->info.row_stride, null_gidx_value);\n      });\n    };\n    if (this->IsDense()) {\n      launcher(CompressBinEllpackKernel<true, true>);\n    } else {\n      if (this->IsDenseCompressed()) {\n        launcher(CompressBinEllpackKernel<false, true>);\n      } else {\n        launcher(CompressBinEllpackKernel<false, false>);\n      }\n    }\n  }\n  this->monitor_.Stop(__func__);\n}\n\n// Return the number of rows contained in this page.\n[[nodiscard]] bst_idx_t EllpackPageImpl::Size() const { return n_rows; }\n\n[[nodiscard]] std::size_t EllpackPageImpl::MemCostBytes() const {\n  return this->gidx_buffer.size_bytes() + sizeof(this->is_dense) + sizeof(this->n_rows) +\n         sizeof(this->base_rowid) + sizeof(this->info) + this->d_gidx_buffer.size_bytes();\n}\n\n[[nodiscard]] EllpackAccessor EllpackPageImpl::GetDeviceEllpack(\n    Context const* ctx, common::Span<FeatureType const> feature_types) const {\n  // The compress iterator reads at least 5 bytes. The `CalculateBufferSize` method should\n  // guarantee that.\n  CHECK_GE(this->gidx_buffer.size_bytes() + this->d_gidx_buffer.size_bytes(), 5);\n  auto null = this->NullValue();\n  if (d_gidx_buffer.empty()) {\n    auto iter = common::CompressedIterator<std::uint32_t>{gidx_buffer.data(), this->NumSymbols()};\n    return EllpackDeviceAccessor{\n        ctx,  this->cuts_, this->info.row_stride, this->base_rowid, this->n_rows,\n        iter, null,        this->IsDense(),       feature_types};\n  } else {\n    auto iter = common::DoubleCompressedIter<std::uint32_t>{\n        gidx_buffer.data(), gidx_buffer.size_bytes(), d_gidx_buffer.data(), this->NumSymbols()};\n    return DoubleEllpackAccessor{\n        ctx,  this->cuts_, this->info.row_stride, this->base_rowid, this->n_rows,\n        iter, null,        this->IsDense(),       feature_types};\n  }\n}\n\n[[nodiscard]] EllpackAccessor EllpackPageImpl::GetHostEllpack(\n    Context const* ctx, std::vector<common::CompressedByteT>* h_gidx_buffer,\n    common::Span<FeatureType const> feature_types) const {\n  CHECK_GE(this->gidx_buffer.size_bytes() + this->d_gidx_buffer.size_bytes(), 5);\n  auto null = this->NullValue();\n\n  h_gidx_buffer->resize(this->gidx_buffer.size() + this->d_gidx_buffer.size());\n  if (!this->gidx_buffer.empty()) {\n    dh::safe_cuda(cudaMemcpyAsync(h_gidx_buffer->data(), this->gidx_buffer.data(),\n                                  this->gidx_buffer.size_bytes(), cudaMemcpyDefault,\n                                  ctx->CUDACtx()->Stream()));\n  }\n\n  if (!d_gidx_buffer.empty()) {\n    auto dst = h_gidx_buffer->data() + this->gidx_buffer.size_bytes();\n    auto src = d_gidx_buffer.data();\n    dh::safe_cuda(cudaMemcpyAsync(dst, src, this->d_gidx_buffer.size_bytes(), cudaMemcpyDefault,\n                                  ctx->CUDACtx()->Stream()));\n\n    auto iter = common::DoubleCompressedIter<std::uint32_t>{\n        h_gidx_buffer->data(), gidx_buffer.size_bytes(), dst, this->NumSymbols()};\n    return DoubleEllpackAccessor{\n        ctx,  this->cuts_, this->info.row_stride, this->base_rowid, this->n_rows,\n        iter, null,        this->IsDense(),       feature_types};\n  }\n\n  auto iter = common::CompressedIterator<std::uint32_t>{h_gidx_buffer->data(), this->NumSymbols()};\n  Context cpu_ctx;\n  auto sctx = ctx->IsCPU() ? ctx : &cpu_ctx;\n  return EllpackDeviceAccessor{\n      sctx, this->cuts_, this->info.row_stride, this->base_rowid, this->n_rows,\n      iter, null,        this->IsDense(),       feature_types};\n}\n\nnamespace {\ntemplate <typename Accessor>\nstruct CntOp {\n  Accessor d_acc;\n  explicit CntOp(Accessor d_acc) : d_acc{std::move(d_acc)} {}\n  XGBOOST_DEVICE auto operator()(std::size_t i) { return d_acc.gidx_iter[i]; }\n};\ntemplate <typename Accessor>\nstruct NotNullOp {\n  Accessor d_acc;\n  explicit NotNullOp(Accessor d_acc) : d_acc{std::move(d_acc)} {}\n\n  template <typename T>\n  XGBOOST_DEVICE auto operator()(T gidx) -> bool {\n    return gidx != d_acc.NullValue();\n  }\n};\n}  // namespace\n\n[[nodiscard]] bst_idx_t EllpackPageImpl::NumNonMissing(\n    Context const* ctx, common::Span<FeatureType const> feature_types) const {\n  if (this->IsDense()) {\n    return this->n_rows * this->info.row_stride;\n  }\n  return this->Visit(ctx, feature_types, [&](auto&& d_acc) -> bst_idx_t {\n    using T = typename decltype(d_acc.gidx_iter)::value_type;\n    auto it = thrust::make_transform_iterator(thrust::make_counting_iterator(0ull), CntOp{d_acc});\n    return thrust::count_if(ctx->CUDACtx()->CTP(), it, it + d_acc.row_stride * d_acc.n_rows,\n                            NotNullOp{d_acc});\n  });\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "src/data/ellpack_page.cuh",
    "content": "/**\n * Copyright 2019-2025, XGBoost Contributors\n */\n#ifndef XGBOOST_DATA_ELLPACK_PAGE_CUH_\n#define XGBOOST_DATA_ELLPACK_PAGE_CUH_\n\n#include <thrust/binary_search.h>\n\n#include <limits>  // for numeric_limits\n\n#include \"../common/categorical.h\"\n#include \"../common/compressed_iterator.h\"\n#include \"../common/device_helpers.cuh\"\n#include \"../common/hist_util.h\"\n#include \"../common/ref_resource_view.h\"  // for RefResourceView\n#include \"ellpack_page.h\"\n#include \"xgboost/data.h\"\n\nnamespace xgboost {\n/**\n * @brief Struct for accessing and manipulating an ELLPACK matrix on the device.\n *\n * Does not own the underlying memory and may be trivially copied into kernels.\n */\ntemplate <typename IterT>\nstruct EllpackAccessorImpl {\n private:\n  /**\n   * @brief Stores the null value and whether the matrix is dense. The `IsDense` is stored in the\n   * first bit of this value.\n   */\n  bst_idx_t null_value_;\n\n  constexpr static auto Ind() { return static_cast<bst_idx_t>(1); }\n  constexpr static std::size_t NullShift() { return sizeof(null_value_) * 8 - Ind(); }\n\n public:\n  using IterType = IterT;\n\n public:\n  /** @brief Row length for ELLPACK, equal to number of features when the data is dense. */\n  bst_idx_t row_stride;\n  /** @brief Starting index of the rows. Used for external memory. */\n  bst_idx_t base_rowid;\n  /** @brief Number of rows in this batch. */\n  bst_idx_t n_rows;\n  /** @brief Number of features in this page. */\n  bst_feature_t n_features;\n  /** @brief Acessor for the gradient index. */\n  IterType gidx_iter;\n  /** @brief Histogram cut pointers. Size equals to (number of features + 1). */\n  std::uint32_t const* feature_segments;\n  /** @brief Histogram cut values. Size equals to (bins per feature * number of features). */\n  common::Span<const float> gidx_fvalue_map;\n  /** @brief Type of each feature, categorical or numerical. */\n  common::Span<const FeatureType> feature_types;\n\n  EllpackAccessorImpl() = delete;\n  EllpackAccessorImpl(Context const* ctx, std::shared_ptr<const common::HistogramCuts> cuts,\n                      bst_idx_t row_stride, bst_idx_t base_rowid, bst_idx_t n_rows,\n                      IterType gidx_iter, bst_idx_t null_value, bool is_dense,\n                      common::Span<FeatureType const> feature_types)\n      : null_value_{null_value},\n        row_stride{row_stride},\n        base_rowid{base_rowid},\n        n_rows{n_rows},\n        n_features{cuts->NumFeatures()},\n        gidx_iter{gidx_iter},\n        feature_types{feature_types} {\n    if (ctx->IsCUDA()) {\n      gidx_fvalue_map = cuts->cut_values_.ConstDeviceSpan();\n      feature_segments = cuts->cut_ptrs_.ConstDevicePointer();\n    } else {\n      gidx_fvalue_map = cuts->cut_values_.ConstHostSpan();\n      feature_segments = cuts->cut_ptrs_.ConstHostPointer();\n    }\n\n    if (is_dense) {\n      static_assert(NullShift() == 63);\n      CHECK(!IsDense());\n      this->null_value_ |= (Ind() << NullShift());\n    }\n  }\n\n  [[nodiscard]] XGBOOST_HOST_DEV_INLINE bool IsDense() const {\n    return (this->null_value_ >> NullShift()) != 0;\n  }\n  [[nodiscard]] XGBOOST_HOST_DEV_INLINE bool IsDenseCompressed() const {\n    return this->row_stride == this->NumFeatures();\n  }\n  /**\n   * @brief Given a row index and a feature index, returns the corresponding bin index.\n   *\n   * Uses binary search for look up.\n   *\n   * @tparam global_ridx Whether the row index is global to all ellpack batches or it's\n   *                     local to the current batch.\n   *\n   * @return -1 if it's a missing value.\n   */\n  template <bool global_ridx = true>\n  [[nodiscard]] __device__ bst_bin_t GetBinIndex(bst_idx_t ridx, std::size_t fidx) const {\n    if (global_ridx) {\n      ridx -= base_rowid;\n    }\n    auto row_begin = row_stride * ridx;\n    if (!this->IsDenseCompressed()) {\n      // binary search returns -1 if it's missing\n      auto row_end = row_begin + row_stride;\n      bst_bin_t gidx = common::BinarySearchBin(row_begin, row_end, gidx_iter,\n                                               feature_segments[fidx], feature_segments[fidx + 1]);\n      return gidx;\n    }\n    bst_bin_t gidx = gidx_iter[row_begin + fidx];\n    if (gidx == this->NullValue()) {\n      // Missing value in a dense ellpack\n      return -1;\n    }\n    // Dense ellpack\n    gidx += this->feature_segments[fidx];\n    return gidx;\n  }\n  /**\n   * @brief Find a bin to place the value in. Used during construction of the Ellpack.\n   */\n  template <bool is_cat>\n  [[nodiscard]] __device__ bst_bin_t SearchBin(float value, std::size_t fidx) const {\n    auto beg = feature_segments[fidx];\n    auto end = feature_segments[fidx + 1];\n    bst_bin_t gidx = 0;\n    if (is_cat) {\n      auto it = dh::MakeTransformIterator<bst_cat_t>(gidx_fvalue_map.cbegin(),\n                                                     [](float v) { return common::AsCat(v); });\n      gidx = thrust::lower_bound(thrust::seq, it + beg, it + end, common::AsCat(value)) - it;\n    } else {\n      auto it = thrust::upper_bound(thrust::seq, gidx_fvalue_map.cbegin() + beg,\n                                    gidx_fvalue_map.cbegin() + end, value);\n      gidx = it - gidx_fvalue_map.cbegin();\n    }\n\n    if (gidx == end) {\n      gidx -= 1;\n    }\n    return gidx;\n  }\n\n  [[nodiscard]] __device__ float GetFvalue(bst_idx_t ridx, size_t fidx) const {\n    auto gidx = GetBinIndex(ridx, fidx);\n    if (gidx == -1) {\n      return std::numeric_limits<float>::quiet_NaN();\n    }\n    return gidx_fvalue_map[gidx];\n  }\n  [[nodiscard]] XGBOOST_HOST_DEV_INLINE bst_idx_t NullValue() const {\n    return this->null_value_ & ((Ind() << NullShift()) - Ind());\n  }\n  [[nodiscard]] XGBOOST_HOST_DEV_INLINE bst_idx_t NumBins() const { return gidx_fvalue_map.size(); }\n  [[nodiscard]] XGBOOST_HOST_DEV_INLINE bst_idx_t NumRows() const { return n_rows; }\n  [[nodiscard]] XGBOOST_HOST_DEV_INLINE size_t NumFeatures() const { return n_features; }\n};\n\nusing EllpackDeviceAccessor = EllpackAccessorImpl<common::CompressedIterator<std::uint32_t>>;\n\nusing DoubleEllpackAccessor = EllpackAccessorImpl<common::DoubleCompressedIter<std::uint32_t>>;\n\n/**\n * @brief The ellpack accessor uses different graident index iterator to facilitate\n *        external memory training.\n */\nusing EllpackAccessor = std::variant<EllpackDeviceAccessor, DoubleEllpackAccessor>;\n\nclass GHistIndexMatrix;\n\n/**\n * @brief This is either an Ellpack format matrix or a dense matrix.\n *\n * When there's no compression can be made by using ellpack, we use this structure as a\n * simple dense matrix. For dense matrix, we can provide extra compression by counting the\n * histogram bin for each feature instead of for the entire dataset.\n */\nclass EllpackPageImpl {\n public:\n  /**\n   * @brief Default constructor.\n   *\n   * This is used in the external memory case. An empty ELLPACK page is constructed with its content\n   * set later by the reader.\n   */\n  EllpackPageImpl() = default;\n\n  /**\n   * @brief Constructor from existing ellpack matrics.\n   *\n   * This is used in the sampling case. The ELLPACK page is constructed from an existing\n   * Ellpack page and the given number of rows.\n   */\n  EllpackPageImpl(Context const* ctx, std::shared_ptr<common::HistogramCuts const> cuts,\n                  bool is_dense, bst_idx_t row_stride, bst_idx_t n_rows);\n  /**\n   * @brief Constructor used for external memory with DMatrix.\n   */\n  EllpackPageImpl(Context const* ctx, std::shared_ptr<common::HistogramCuts const> cuts,\n                  const SparsePage& page, bool is_dense, size_t row_stride,\n                  common::Span<FeatureType const> feature_types);\n\n  /**\n   * @brief Constructor from an existing DMatrix.\n   *\n   * This is used in the in-memory case. The ELLPACK page is constructed from an existing DMatrix\n   * in CSR format.\n   */\n  explicit EllpackPageImpl(Context const* ctx, DMatrix* dmat, const BatchParam& parm);\n  /**\n   * @brief Constructor for Quantile DMatrix using an adapter.\n   */\n  template <typename AdapterBatch>\n  explicit EllpackPageImpl(Context const* ctx, AdapterBatch batch, float missing, bool is_dense,\n                           common::Span<bst_idx_t const> row_counts_span,\n                           common::Span<FeatureType const> feature_types, bst_idx_t row_stride,\n                           bst_idx_t n_rows, std::shared_ptr<common::HistogramCuts const> cuts);\n  /**\n   * @brief Constructor from an existing CPU gradient index.\n   */\n  explicit EllpackPageImpl(Context const* ctx, GHistIndexMatrix const& page,\n                           common::Span<FeatureType const> ft);\n\n  EllpackPageImpl(EllpackPageImpl const& that) = delete;\n  EllpackPageImpl& operator=(EllpackPageImpl const& that) = delete;\n\n  EllpackPageImpl(EllpackPageImpl&& that) = default;\n  EllpackPageImpl& operator=(EllpackPageImpl&& that) = default;\n\n  ~EllpackPageImpl() noexcept(false);\n\n  /**\n   * @brief Copy the elements of the given ELLPACK page into this page.\n   *\n   * @param ctx The GPU context.\n   * @param page The ELLPACK page to copy from.\n   * @param offset The number of elements to skip before copying.\n   * @returns The number of elements copied.\n   */\n  bst_idx_t Copy(Context const* ctx, EllpackPageImpl const* page, bst_idx_t offset);\n\n  /** @return Number of instances in the page. */\n  [[nodiscard]] bst_idx_t Size() const;\n\n  /** @brief Set the base row id for this page. */\n  void SetBaseRowId(std::size_t row_id) { base_rowid = row_id; }\n\n  [[nodiscard]] common::HistogramCuts const& Cuts() const { return *cuts_; }\n  [[nodiscard]] std::shared_ptr<common::HistogramCuts const> CutsShared() const { return cuts_; }\n  void SetCuts(std::shared_ptr<common::HistogramCuts const> cuts);\n  /**\n   * @brief Fully dense, there's not a single missing value.\n   */\n  [[nodiscard]] bool IsDense() const { return this->is_dense; }\n  /**\n   * @brief Stored as a dense matrix, but there might be missing values.\n   */\n  [[nodiscard]] bool IsDenseCompressed() const {\n    return this->cuts_->NumFeatures() == this->info.row_stride;\n  }\n\n  /** @return Estimation of memory cost of this page. */\n  [[nodiscard]] std::size_t MemCostBytes() const;\n\n  /**\n   * @brief Return the total number of symbols (total number of bins plus 1 for not\n   *        found).\n   */\n  [[nodiscard]] auto NumSymbols() const { return this->info.n_symbols; }\n  void SetNumSymbols(bst_idx_t n_symbols) { this->info.n_symbols = n_symbols; }\n  /**\n   * @brief Get the value used to represent missing.\n   */\n  [[nodiscard]] bst_idx_t NullValue() const {\n    return this->IsDense() ? this->NumSymbols() : this->NumSymbols() - 1;\n  }\n  /**\n   * @brief Copy basic shape from another page.\n   */\n  void CopyInfo(EllpackPageImpl const* page) {\n    CHECK_NE(this, page);\n    this->n_rows = page->Size();\n    this->is_dense = page->IsDense();\n    this->info.row_stride = page->info.row_stride;\n    this->SetBaseRowId(page->base_rowid);\n    this->SetNumSymbols(page->NumSymbols());\n  }\n  /**\n   * @brief Get an accessor backed by the device storage.\n   */\n  EllpackAccessor GetDeviceEllpack(Context const* ctx,\n                                   common::Span<FeatureType const> feature_types = {}) const;\n  /**\n   * @brief Get an accessor backed by the host storage.\n   *\n   * @param h_gidx_buffer A buffer used as the backing storage of the accessor.\n   *\n   * @return An accessor variant.\n   */\n  EllpackAccessor GetHostEllpack(Context const* ctx,\n                                 std::vector<common::CompressedByteT>* h_gidx_buffer,\n                                 common::Span<FeatureType const> feature_types = {}) const;\n  /**\n   * @brief Vistor pattern.\n   *\n   * @param fn A callable that accepts both variants of the ellpack accessor.\n   *\n   * @return An accessor variant.\n   */\n  template <typename Fn>\n  decltype(auto) Visit(Context const* ctx, common::Span<FeatureType const> feature_types,\n                       Fn&& fn) const {\n    auto acc = this->GetDeviceEllpack(ctx, feature_types);\n    return std::visit(std::forward<Fn>(fn), acc);\n  }\n  /**\n   * @brief Vistor pattern with a host accessor.\n   *\n   * @param h_gidx_buffer A buffer used as the backing storage of the accessor.\n   * @param fn A callable that accepts both variants of the ellpack accessor.\n   */\n  template <typename Fn>\n  decltype(auto) VisitOnHost(Context const* ctx,\n                             std::vector<common::CompressedByteT>* h_gidx_buffer,\n                             common::Span<FeatureType const> feature_types, Fn&& fn) const {\n    auto acc = this->GetHostEllpack(ctx, h_gidx_buffer, feature_types);\n    return std::visit(std::forward<Fn>(fn), acc);\n  }\n  // helper for visit that doesn't need the raw data.\n  template <typename Fn>\n  decltype(auto) VisitOnHost(Context const* ctx, Fn&& fn) const {\n    common::Span<FeatureType const> feature_types;\n    std::vector<common::CompressedByteT> h_gidx_buffer;\n    auto acc = this->GetHostEllpack(ctx, &h_gidx_buffer, feature_types);\n    return std::visit(std::forward<Fn>(fn), acc);\n  }\n  /**\n   * @brief Calculate the number of non-missing values.\n   */\n  [[nodiscard]] bst_idx_t NumNonMissing(Context const* ctx,\n                                        common::Span<FeatureType const> feature_types) const;\n\n private:\n  /**\n   * @brief Compress a single page of CSR data into ELLPACK.\n   *\n   * @param row_batch The CSR page.\n   */\n  void CreateHistIndices(Context const* ctx, const SparsePage& row_batch,\n                         common::Span<FeatureType const> feature_types);\n  /**\n   * @brief Initialize the buffer to store compressed features.\n   */\n  void InitCompressedData(Context const* ctx);\n\n  std::shared_ptr<common::HistogramCuts const> cuts_;\n\n public:\n  bool is_dense{false};\n\n  bst_idx_t base_rowid{0};\n  bst_idx_t n_rows{0};\n  /**\n   * @brief Index of the gradient histogram, which is stored in ELLPACK format.\n   *\n   * This can be backed by various storage types.\n   */\n  common::RefResourceView<common::CompressedByteT> gidx_buffer;\n  /**\n   * @brief Second buffer. Used for external memory where we might have a part of the\n   * cache in device and another part of the cache in host.\n   *\n   * This buffer is optional. It must be on device if not empty.\n   */\n  common::RefResourceView<common::CompressedByteT const> d_gidx_buffer;\n  /**\n   * @brief Compression infomation.\n   */\n  struct Info {\n    /** @brief Row length for ELLPACK. */\n    bst_idx_t row_stride{0};\n    /** @brief The number of unique bins including missing. */\n    bst_idx_t n_symbols{0};\n  } info;\n\n private:\n  common::Monitor monitor_;\n};\n\n[[nodiscard]] inline bst_idx_t GetRowStride(DMatrix* dmat) {\n  if (dmat->IsDense()) {\n    return dmat->Info().num_col_;\n  }\n\n  bst_idx_t row_stride = 0;\n  for (const auto& batch : dmat->GetBatches<SparsePage>()) {\n    const auto& row_offset = batch.offset.ConstHostVector();\n    for (auto i = 1ull; i < row_offset.size(); i++) {\n      row_stride = std::max(row_stride, static_cast<size_t>(row_offset[i] - row_offset[i - 1]));\n    }\n  }\n  return row_stride;\n}\n\n[[nodiscard]] EllpackPageImpl::Info CalcNumSymbols(\n    Context const* ctx, bst_idx_t row_stride, bool is_dense,\n    std::shared_ptr<common::HistogramCuts const> cuts);\n}  // namespace xgboost\n\n#endif  // XGBOOST_DATA_ELLPACK_PAGE_CUH_\n"
  },
  {
    "path": "src/data/ellpack_page.h",
    "content": "/**\n * Copyright 2017-2023 by XGBoost Contributors\n */\n#ifndef XGBOOST_DATA_ELLPACK_PAGE_H_\n#define XGBOOST_DATA_ELLPACK_PAGE_H_\n\n#include <memory>  // for unique_ptr\n\n#include \"../common/hist_util.h\"  // for HistogramCuts\n#include \"xgboost/context.h\"      // for Context\n#include \"xgboost/data.h\"         // for DMatrix, BatchParam\n\nnamespace xgboost {\nclass EllpackPageImpl;\n/**\n * @brief A page stored in ELLPACK format.\n *\n * This class uses the PImpl idiom (https://en.cppreference.com/w/cpp/language/pimpl) to avoid\n * including CUDA-specific implementation details in the header.\n *\n * See @ref EllpackPageImpl .\n */\nclass EllpackPage {\n public:\n  /**\n   * @brief Default constructor.\n   *\n   * This is used in the external memory case. An empty ELLPACK page is constructed with its content\n   * set later by the reader.\n   */\n  EllpackPage();\n  /**\n   * @brief Constructor from an existing DMatrix.\n   *\n   * This is used in the in-memory case. The ELLPACK page is constructed from an existing DMatrix\n   * in CSR format.\n   */\n  explicit EllpackPage(Context const* ctx, DMatrix* dmat, const BatchParam& param);\n\n  ~EllpackPage() noexcept(false);\n\n  EllpackPage(EllpackPage&& that);\n\n  /*! \\return Number of instances in the page. */\n  [[nodiscard]] bst_idx_t Size() const;\n\n  /*! \\brief Set the base row id for this page. */\n  void SetBaseRowId(std::size_t row_id);\n\n  [[nodiscard]] const EllpackPageImpl* Impl() const { return impl_.get(); }\n  EllpackPageImpl* Impl() { return impl_.get(); }\n\n  [[nodiscard]] common::HistogramCuts const& Cuts() const;\n  [[nodiscard]] bst_idx_t BaseRowId() const;\n\n private:\n  std::unique_ptr<EllpackPageImpl> impl_;\n};\n}  // namespace xgboost\n#endif  // XGBOOST_DATA_ELLPACK_PAGE_H_\n"
  },
  {
    "path": "src/data/ellpack_page_raw_format.cu",
    "content": "/**\n * Copyright 2019-2025, XGBoost contributors\n */\n#include <dmlc/registry.h>\n\n#include <cstddef>  // for size_t\n#include <vector>   // for vector\n\n#include \"../common/cuda_rt_utils.h\"\n#include \"../common/cuda_stream.h\"          // for Event\n#include \"../common/io.h\"                   // for AlignedResourceReadStream, AlignedFileWriteStream\n#include \"../common/ref_resource_view.cuh\"  // for MakeFixedVecWithCudaMalloc\n#include \"../common/ref_resource_view.h\"    // for ReadVec, WriteVec\n#include \"ellpack_page.cuh\"                 // for EllpackPage\n#include \"ellpack_page_raw_format.h\"\n#include \"ellpack_page_source.h\"\n\nnamespace xgboost::data {\nDMLC_REGISTRY_FILE_TAG(ellpack_page_raw_format);\n\nnamespace {\n// Function to support system without HMM or ATS\ntemplate <typename T>\n[[nodiscard]] bool ReadDeviceVec(common::AlignedResourceReadStream* fi,\n                                 common::RefResourceView<T>* vec) {\n  xgboost_NVTX_FN_RANGE();\n\n  std::uint64_t n{0};\n  if (!fi->Read(&n)) {\n    return false;\n  }\n  if (n == 0) {\n    return true;\n  }\n\n  auto expected_bytes = sizeof(T) * n;\n\n  auto [ptr, n_bytes] = fi->Consume(expected_bytes);\n  if (n_bytes != expected_bytes) {\n    return false;\n  }\n\n  *vec = common::MakeFixedVecWithCudaMalloc<T>(n);\n  dh::safe_cuda(\n      cudaMemcpyAsync(vec->data(), ptr, n_bytes, cudaMemcpyDefault, curt::DefaultStream()));\n  return true;\n}\n}  // namespace\n\n#define RET_IF_NOT(expr) \\\n  if (!(expr)) {         \\\n    return false;        \\\n  }\n\n[[nodiscard]] bool EllpackPageRawFormat::Read(EllpackPage* page,\n                                              common::AlignedResourceReadStream* fi) {\n  xgboost_NVTX_FN_RANGE();\n  auto* impl = page->Impl();\n\n  RET_IF_NOT(fi->Read(&impl->n_rows));\n  RET_IF_NOT(fi->Read(&impl->is_dense));\n  RET_IF_NOT(fi->Read(&impl->info.row_stride));\n\n  if (this->param_.prefetch_copy || !has_hmm_ats_) {\n    RET_IF_NOT(ReadDeviceVec(fi, &impl->gidx_buffer));\n  } else {\n    RET_IF_NOT(common::ReadVec(fi, &impl->gidx_buffer));\n  }\n  RET_IF_NOT(fi->Read(&impl->base_rowid));\n  bst_idx_t n_symbols{0};\n  RET_IF_NOT(fi->Read(&n_symbols));\n  impl->SetNumSymbols(n_symbols);\n\n  impl->SetCuts(this->cuts_);\n\n  curt::DefaultStream().Sync();\n  return true;\n}\n\n[[nodiscard]] std::size_t EllpackPageRawFormat::Write(EllpackPage const& page,\n                                                      common::AlignedFileWriteStream* fo) {\n  xgboost_NVTX_FN_RANGE();\n\n  std::size_t bytes{0};\n  auto* impl = page.Impl();\n  bytes += fo->Write(impl->n_rows);\n  bytes += fo->Write(impl->is_dense);\n  bytes += fo->Write(impl->info.row_stride);\n  std::vector<common::CompressedByteT> h_gidx_buffer;\n  Context ctx = Context{}.MakeCUDA(curt::CurrentDevice());\n  // write data into the h_gidx_buffer\n  [[maybe_unused]] auto h_accessor = impl->GetHostEllpack(&ctx, &h_gidx_buffer);\n  bytes += common::WriteVec(fo, h_gidx_buffer);\n  bytes += fo->Write(impl->base_rowid);\n  bytes += fo->Write(impl->NumSymbols());\n\n  curt::DefaultStream().Sync();\n  return bytes;\n}\n\n[[nodiscard]] bool EllpackPageRawFormat::Read(EllpackPage* page, EllpackHostCacheStream* fi) const {\n  xgboost_NVTX_FN_RANGE_C(252, 198, 3);\n\n  auto* impl = page->Impl();\n  CHECK(this->cuts_->cut_values_.DeviceCanRead());\n\n  auto ctx = Context{}.MakeCUDA(curt::CurrentDevice());\n\n  auto dispatch = [&] {\n    fi->Read(&ctx, page, this->param_.prefetch_copy || !this->has_hmm_ats_);\n    impl->SetCuts(this->cuts_);\n  };\n\n  if (ConsoleLogger::GlobalVerbosity() == ConsoleLogger::LogVerbosity::kDebug) {\n    curt::Event start{false}, stop{false};\n    float milliseconds = 0;\n    start.Record(ctx.CUDACtx()->Stream());\n\n    dispatch();\n\n    stop.Record(ctx.CUDACtx()->Stream());\n    stop.Sync();\n    dh::safe_cuda(cudaEventElapsedTime(&milliseconds, start, stop));\n    double n_bytes = page->Impl()->MemCostBytes();\n    double tp = (n_bytes / static_cast<double>((1ul << 30))) * 1000.0 / milliseconds;\n    LOG(DEBUG) << \"Ellpack \" << __func__ << \" throughput:\" << tp << \"GB/s\";\n  } else {\n    dispatch();\n  }\n\n  curt::DefaultStream().Sync();\n\n  return true;\n}\n\n[[nodiscard]] std::size_t EllpackPageRawFormat::Write(EllpackPage const& page,\n                                                      EllpackHostCacheStream* fo) const {\n  xgboost_NVTX_FN_RANGE_C(3, 252, 198);\n\n  bool new_page = fo->Write(page);\n  curt::DefaultStream().Sync();\n\n  if (new_page) {\n    auto cache = fo->Share();\n    return cache->SizeBytes(cache->Size() - 1);  // last page\n  } else {\n    return InvalidPageSize();\n  }\n}\n\n#undef RET_IF_NOT\n}  // namespace xgboost::data\n"
  },
  {
    "path": "src/data/ellpack_page_raw_format.h",
    "content": "/**\n * Copyright 2019-2024, XGBoost contributors\n */\n#pragma once\n\n#include <cstddef>  // for size_t\n#include <memory>   // for shared_ptr\n#include <utility>  // for move\n\n#include \"../common/io.h\"        // for AlignedResourceReadStream\n#include \"sparse_page_writer.h\"  // for SparsePageFormat\n#include \"xgboost/data.h\"        // for EllpackPage\n\n#if !defined(XGBOOST_USE_CUDA)\n#include \"../common/common.h\"  // for AssertGPUSupport\n#endif                         // !defined(XGBOOST_USE_CUDA)`\n\nnamespace xgboost::common {\nclass HistogramCuts;\n}\n\nnamespace xgboost::data {\n\nclass EllpackHostCacheStream;\n\nclass EllpackPageRawFormat : public SparsePageFormat<EllpackPage> {\n  std::shared_ptr<common::HistogramCuts const> cuts_;\n  DeviceOrd device_;\n  BatchParam param_;\n  // Supports CUDA HMM or ATS\n  bool has_hmm_ats_{false};\n\n public:\n  explicit EllpackPageRawFormat(std::shared_ptr<common::HistogramCuts const> cuts, DeviceOrd device,\n                                BatchParam param, bool has_hmm_ats)\n      : cuts_{std::move(cuts)},\n        device_{device},\n        param_{std::move(param)},\n        has_hmm_ats_{has_hmm_ats} {}\n  [[nodiscard]] bool Read(EllpackPage* page, common::AlignedResourceReadStream* fi) override;\n  [[nodiscard]] std::size_t Write(EllpackPage const& page,\n                                  common::AlignedFileWriteStream* fo) override;\n\n  [[nodiscard]] bool Read(EllpackPage* page, EllpackHostCacheStream* fi) const;\n  [[nodiscard]] std::size_t Write(EllpackPage const& page, EllpackHostCacheStream* fo) const;\n};\n\n#if !defined(XGBOOST_USE_CUDA)\ninline bool EllpackPageRawFormat::Read(EllpackPage*, common::AlignedResourceReadStream*) {\n  common::AssertGPUSupport();\n  return false;\n}\n\ninline std::size_t EllpackPageRawFormat::Write(const EllpackPage&,\n                                               common::AlignedFileWriteStream*) {\n  common::AssertGPUSupport();\n  return 0;\n}\n#endif  // !defined(XGBOOST_USE_CUDA)\n}  // namespace xgboost::data\n"
  },
  {
    "path": "src/data/ellpack_page_source.cu",
    "content": "/**\n * Copyright 2019-2025, XGBoost contributors\n */\n#include <algorithm>  // for max\n#include <cstddef>    // for size_t\n#include <cstdint>    // for int8_t, uint64_t, uint32_t\n#include <memory>     // for shared_ptr, make_unique, make_shared\n#include <numeric>    // for accumulate\n#include <utility>    // for move\n\n#include \"../common/common.h\"                // for HumanMemUnit, safe_cuda\n#include \"../common/cuda_dr_utils.h\"         // for CUDA_HW_DECOM_AVAILABLE\n#include \"../common/cuda_rt_utils.h\"         // for SetDevice, GetDrVersionGlobal\n#include \"../common/cuda_stream.h\"           // for StreamRef, DefaultStream, Event\n#include \"../common/cuda_stream_pool.h\"      // for StreamPool\n#include \"../common/device_compression.cuh\"  // for CompressSnappy, MakeSnappyDecomprMgr\n#include \"../common/device_helpers.cuh\"      // for CurrentDevice\n#include \"../common/numa_topo.h\"             // for NumaMemCanCross, GetNumaMemBind\n#include \"../common/ref_resource_view.cuh\"   // for MakeFixedVecWithCudaMalloc\n#include \"../common/resource.cuh\"            // for PrivateCudaMmapConstStream\n#include \"../common/transform_iterator.h\"    // for MakeIndexTransformIter\n#include \"batch_utils.h\"                     // for HostRatioIsAuto\n#include \"ellpack_page.cuh\"                  // for EllpackPageImpl\n#include \"ellpack_page.h\"                    // for EllpackPage\n#include \"ellpack_page_source.h\"\n#include \"proxy_dmatrix.cuh\"  // for DispatchAny\n#include \"xgboost/base.h\"     // for bst_idx_t\n\nnamespace xgboost::data {\nnamespace {\n// Can we use hardware decompression?\n[[nodiscard]] bool CanUseHwDecomp(EllpackPageImpl const* page, bool allow_fallback) {\n#if defined(CUDA_HW_DECOM_AVAILABLE) && defined(XGBOOST_USE_NVCOMP)\n  // We use it only for sparse pages.\n  return !page->IsDenseCompressed() && (dc::GetGlobalDeStatus().avail || allow_fallback);\n#else\n  (void)allow_fallback;\n  (void)page;\n  return false;\n#endif\n}\n}  // namespace\n\n/**\n * Cache\n */\nEllpackMemCache::EllpackMemCache(EllpackCacheInfo cinfo, std::int32_t n_workers)\n    : cache_mapping{std::move(cinfo.cache_mapping)},\n      buffer_bytes{std::move(cinfo.buffer_bytes)},\n      buffer_rows{std::move(cinfo.buffer_rows)},\n      cache_host_ratio{cinfo.cache_host_ratio},\n      hw_decomp_ratio{cinfo.hw_decomp_ratio},\n      allow_decomp_fallback{cinfo.allow_decomp_fallback},\n      streams{std::make_unique<curt::StreamPool>(n_workers)},\n      pool{[] {\n#if defined(__linux__)\n        std::int32_t major = -1, minor = -1;\n        curt::GetDrVersionGlobal(&major, &minor);\n        if (major >= 12 && minor >= 5 || major > 12) {\n          return std::make_shared<dc::HostPinnedMemPool>();\n        }\n        return std::shared_ptr<dc::HostPinnedMemPool>{nullptr};\n#else\n        return std::shared_ptr<dc::HostPinnedMemPool>{nullptr};\n#endif\n      }()} {\n  CHECK_EQ(buffer_bytes.size(), buffer_rows.size());\n  CHECK(!detail::HostRatioIsAuto(this->cache_host_ratio));\n  CHECK_GE(this->cache_host_ratio, 0.0) << error::CacheHostRatioInvalid();\n  CHECK_LE(this->cache_host_ratio, 1.0) << error::CacheHostRatioInvalid();\n}\n\nEllpackMemCache::~EllpackMemCache() = default;\n\n[[nodiscard]] std::size_t EllpackMemCache::SizeBytes() const noexcept(true) {\n  auto it = common::MakeIndexTransformIter([&](auto i) { return this->SizeBytes(i); });\n  using T = std::iterator_traits<decltype(it)>::value_type;\n  return std::accumulate(it, it + this->Size(), static_cast<T>(0));\n}\n\n[[nodiscard]] std::size_t EllpackMemCache::DeviceSizeBytes() const noexcept(true) {\n  auto it =\n      common::MakeIndexTransformIter([&](auto i) { return this->d_pages.at(i).size_bytes(); });\n  using T = std::iterator_traits<decltype(it)>::value_type;\n  return std::accumulate(it, it + this->Size(), static_cast<T>(0));\n}\n\n[[nodiscard]] std::size_t EllpackMemCache::SizeBytes(std::size_t i) const noexcept(true) {\n  return this->h_pages.at(i)->MemCostBytes() + this->d_pages.at(i).size_bytes() +\n         this->c_pages.at(i).first.DecompressedBytes();\n}\n\n[[nodiscard]] std::size_t EllpackMemCache::GidxSizeBytes(std::size_t i) const noexcept(true) {\n  return this->h_pages.at(i)->gidx_buffer.size_bytes() + this->d_pages.at(i).size_bytes() +\n         this->c_pages.at(i).first.DecompressedBytes();\n}\n\n[[nodiscard]] std::size_t EllpackMemCache::GidxSizeBytes() const noexcept(true) {\n  auto it = common::MakeIndexTransformIter([&](auto i) { return this->GidxSizeBytes(i); });\n  using T = std::iterator_traits<decltype(it)>::value_type;\n  return std::accumulate(it, it + this->Size(), static_cast<T>(0));\n}\n\n[[nodiscard]] EllpackMemCache::PagePtr EllpackMemCache::At(std::int32_t k) const {\n  auto const* h_ptr = this->h_pages.at(k).get();\n  auto const* d_ptr = &this->d_pages.at(k);\n  auto const* c_ptr = &this->c_pages.at(k);\n  return std::make_tuple(h_ptr, d_ptr, c_ptr);\n}\n\n[[nodiscard]] EllpackMemCache::PageRef EllpackMemCache::Back() {\n  auto& h_ref = this->h_pages.back();\n  auto& d_ref = this->d_pages.back();\n  auto& c_ref = this->c_pages.back();\n  return {h_ref, d_ref, c_ref};\n}\n\n/**\n * Cache stream.\n */\nclass EllpackHostCacheStreamImpl {\n  std::shared_ptr<EllpackMemCache> cache_;\n  std::int32_t ptr_{0};\n\n public:\n  explicit EllpackHostCacheStreamImpl(std::shared_ptr<EllpackMemCache> cache)\n      : cache_{std::move(cache)} {}\n\n  auto Share() const { return this->cache_; }\n\n  void Seek(bst_idx_t offset_bytes) {\n    std::size_t n_bytes{0};\n    std::int32_t k{-1};\n    for (std::size_t i = 0, n = cache_->h_pages.size(); i < n; ++i) {\n      if (n_bytes == offset_bytes) {\n        k = i;\n        break;\n      }\n      n_bytes += this->cache_->SizeBytes(i);\n    }\n    if (offset_bytes == n_bytes && k == -1) {\n      k = this->cache_->h_pages.size();  // seek end\n    }\n    CHECK_NE(k, -1) << \"Invalid offset:\" << offset_bytes;\n    ptr_ = k;\n  }\n\n  [[nodiscard]] bool Write(EllpackPage const& page) {\n    auto impl = page.Impl();\n    auto ctx = Context{}.MakeCUDA(dh::CurrentDevice());\n\n    this->cache_->sizes_orig.push_back(page.Impl()->MemCostBytes());\n    auto orig_ptr = this->cache_->sizes_orig.size() - 1;\n\n    CHECK_LT(orig_ptr, this->cache_->NumBatchesOrig());\n    auto cache_idx = this->cache_->cache_mapping.at(orig_ptr);\n    // Wrap up the previous page if this is a new page, or this is the last page.\n    auto new_page = cache_idx == this->cache_->h_pages.size();\n    // Last page expected from the user.\n    auto last_page = (orig_ptr + 1) == this->cache_->NumBatchesOrig();\n\n    bool const no_concat = this->cache_->NoConcat();\n\n    auto cache_host_ratio = this->cache_->cache_host_ratio;\n    CHECK_GE(cache_host_ratio, 0) << error::CacheHostRatioInvalid();\n    CHECK_LE(cache_host_ratio, 1) << error::CacheHostRatioInvalid();\n\n    // Get the size of the host cache.\n    auto get_host_nbytes = [&](EllpackPageImpl const* old_impl) {\n      // Special handling due to floating points.\n      if (this->cache_->cache_host_ratio == 1.0) {\n        return old_impl->gidx_buffer.size_bytes();\n      }\n      if (this->cache_->cache_host_ratio == 0.0) {\n        return static_cast<std::size_t>(0);\n      }\n      // Calculate based on the `cache_host_ratio` parameter.\n      auto n_bytes =\n          std::max(static_cast<std::size_t>(old_impl->gidx_buffer.size_bytes() * cache_host_ratio),\n                   std::size_t{1});\n      return n_bytes;\n    };\n\n    // Finish writing a (concatenated) cache page.\n    auto commit_page = [&](EllpackPageImpl const* old_impl) {\n      CHECK_EQ(old_impl->gidx_buffer.Resource()->Type(), common::ResourceHandler::kCudaMalloc);\n      auto new_impl = std::make_unique<EllpackPageImpl>();\n      new_impl->CopyInfo(old_impl);\n\n      // Split the cache into host cache, compressed host cache, and the device cache. We\n      // use the decompression engine only for sparse data.\n      auto n_bytes = get_host_nbytes(old_impl);\n      CHECK_LE(n_bytes, old_impl->gidx_buffer.size_bytes());\n      std::size_t n_h_bytes = n_bytes, n_comp_bytes = 0;\n      bool can_use_hw = CanUseHwDecomp(old_impl, this->cache_->allow_decomp_fallback);\n      if (can_use_hw) {\n        // FIXME(jiamingy): The decomp_ratio is not exposed to the user and we don't yet\n        // have auto configuration for this parameter. We can make it more flexible. More\n        // profiling is needed.\n        bool specified = std::isnan(this->cache_->hw_decomp_ratio);\n        auto hw_decomp_ratio = specified ? 0.4f : this->cache_->hw_decomp_ratio;\n        CHECK_LE(hw_decomp_ratio, 1.0);\n        CHECK_GE(hw_decomp_ratio, 0.0);\n        n_comp_bytes = n_bytes * hw_decomp_ratio;\n        n_h_bytes = n_bytes - n_comp_bytes;\n      }\n      CHECK_EQ(n_bytes, n_h_bytes + n_comp_bytes);\n\n      // Normal host cache\n      new_impl->gidx_buffer =\n          common::MakeFixedVecWithPinnedMalloc<common::CompressedByteT>(n_h_bytes);\n      if (n_h_bytes > 0) {\n        dh::safe_cuda(cudaMemcpyAsync(new_impl->gidx_buffer.data(), old_impl->gidx_buffer.data(),\n                                      n_h_bytes, cudaMemcpyDefault));\n      }\n\n      // Compressed host cache\n      dh::DeviceUVector<std::uint8_t> tmp;\n      dc::CuMemParams c_out;\n      std::size_t constexpr kChunkSize = 1ul << 21;\n      auto params = dc::CompressSnappy(\n          &ctx, old_impl->gidx_buffer.ToSpan().subspan(n_h_bytes, n_comp_bytes), &tmp, kChunkSize);\n      common::RefResourceView<std::uint8_t> c_buf = dc::CoalesceCompressedBuffersToHost(\n          ctx.CUDACtx()->Stream(), this->cache_->pool, params, tmp, &c_out);\n      auto c_page = dc::MakeSnappyDecomprMgr(ctx.CUDACtx()->Stream(), this->cache_->pool,\n                                             std::move(c_out), c_buf.ToSpan());\n      CHECK_EQ(c_page.DecompressedBytes() + new_impl->gidx_buffer.size_bytes(), n_bytes);\n\n      // Device cache\n      auto remaining = old_impl->gidx_buffer.size_bytes() - n_bytes;\n      auto d_page = common::MakeFixedVecWithCudaMalloc<common::CompressedByteT>(remaining);\n      if (remaining > 0) {\n        dh::safe_cuda(cudaMemcpyAsync(d_page.data(), old_impl->gidx_buffer.data() + n_bytes,\n                                      remaining, cudaMemcpyDefault));\n      }\n      CHECK_LE(new_impl->gidx_buffer.size(), old_impl->gidx_buffer.size());\n      CHECK_EQ(new_impl->MemCostBytes() + d_page.size_bytes() + c_page.DecompressedBytes(),\n               old_impl->MemCostBytes());\n      LOG(INFO) << \"Create cache page with size:\"\n                << common::HumanMemUnit(new_impl->MemCostBytes() + d_page.size_bytes() +\n                                        c_page.DecompressedBytes());\n      return std::make_tuple(std::move(new_impl), std::move(d_page),\n                             std::make_pair(std::move(c_page), std::move(c_buf)));\n    };\n\n    if (no_concat) {\n      CHECK(new_page);\n      auto old_impl = page.Impl();\n      auto [commited, d_page, c_page] = commit_page(old_impl);\n\n      this->cache_->offsets.push_back(old_impl->n_rows * old_impl->info.row_stride);\n      this->cache_->h_pages.emplace_back(std::move(commited));\n      this->cache_->d_pages.emplace_back(std::move(d_page));\n      this->cache_->c_pages.emplace_back(std::move(c_page));\n      return new_page;\n    }\n\n    if (new_page) {\n      if (!this->cache_->h_pages.empty()) {\n        // Need to wrap up the previous page.\n        // Replace the previous page (on device) with a new page on host.\n        this->cache_->Back() = commit_page(this->cache_->h_pages.back().get());\n      }\n      // Push a new page\n      auto n_bytes = this->cache_->buffer_bytes.at(this->cache_->h_pages.size());\n      auto n_samples = this->cache_->buffer_rows.at(this->cache_->h_pages.size());\n      auto new_impl = std::make_unique<EllpackPageImpl>(&ctx, impl->CutsShared(), impl->IsDense(),\n                                                        impl->info.row_stride, n_samples);\n      new_impl->SetBaseRowId(impl->base_rowid);\n      new_impl->SetNumSymbols(impl->NumSymbols());\n      new_impl->gidx_buffer =\n          common::MakeFixedVecWithCudaMalloc<common::CompressedByteT>(&ctx, n_bytes, 0);\n      auto offset = new_impl->Copy(&ctx, impl, 0);\n\n      this->cache_->offsets.push_back(offset);\n\n      // Make sure we can always access the back of the vectors\n      this->cache_->h_pages.emplace_back(std::move(new_impl));\n      this->cache_->d_pages.emplace_back();\n      this->cache_->c_pages.emplace_back();\n    } else {\n      // Concatenate into the device pages even though `d_pages` and `c_pages` are\n      // used. We split the page at the commit stage.\n      CHECK(!this->cache_->h_pages.empty());\n      CHECK_EQ(cache_idx, this->cache_->h_pages.size() - 1);\n      auto& new_impl = this->cache_->h_pages.back();\n      auto offset = new_impl->Copy(&ctx, impl, this->cache_->offsets.back());\n      this->cache_->offsets.back() += offset;\n    }\n\n    // No need to copy if it's already in device.\n    if (last_page) {\n      this->cache_->Back() = commit_page(this->cache_->h_pages.back().get());\n    }\n\n    CHECK_EQ(this->cache_->h_pages.size(), this->cache_->d_pages.size());\n    CHECK_EQ(this->cache_->h_pages.size(), this->cache_->c_pages.size());\n    return new_page;\n  }\n\n  void Read(Context const* ctx, EllpackPage* out, bool prefetch_copy) const {\n    CHECK_EQ(this->cache_->h_pages.size(), this->cache_->d_pages.size());\n    CHECK_EQ(this->cache_->h_pages.size(), this->cache_->c_pages.size());\n    auto [h_page, d_page, c_page] = this->cache_->At(this->ptr_);\n    // Skip copy if the full page is on device\n    bool on_device = (h_page->gidx_buffer.empty() && c_page->first.Empty()) && !d_page->empty();\n\n    auto out_impl = out->Impl();\n    // We can't access a compressed page directly.\n    if (!c_page->first.Empty()) {\n      prefetch_copy = true;\n    }\n\n    LOG(DEBUG) << \"On device: \" << on_device << \", prefetch copy:\" << prefetch_copy\n               << \", compressed:\" << (!c_page->first.Empty());\n    if (on_device) {\n      CHECK(h_page->gidx_buffer.empty());\n      auto d_res = d_page->Resource();\n      out_impl->gidx_buffer = common::RefResourceView<common::CompressedByteT>{\n          d_res->DataAs<common::CompressedByteT>(), d_page->size(), d_res};\n      CHECK(out_impl->d_gidx_buffer.empty());\n    } else if (prefetch_copy) {\n      // Copy the data in the same order as written\n      // Normal host cache\n      auto n_bytes = this->cache_->GidxSizeBytes(this->ptr_);\n      out_impl->gidx_buffer = common::MakeFixedVecWithCudaMalloc<common::CompressedByteT>(n_bytes);\n      if (!h_page->gidx_buffer.empty()) {\n        dh::safe_cuda(cudaMemcpyAsync(out_impl->gidx_buffer.data(), h_page->gidx_buffer.data(),\n                                      h_page->gidx_buffer.size_bytes(), cudaMemcpyDefault,\n                                      ctx->CUDACtx()->Stream()));\n      }\n      // Compressed host cache\n      if (!c_page->first.Empty()) {\n        auto stream = this->cache_->streams->Next();\n        auto out = out_impl->gidx_buffer.ToSpan().subspan(h_page->gidx_buffer.size_bytes(),\n                                                          c_page->first.DecompressedBytes());\n        dc::DecompressSnappy(stream, c_page->first, out, this->cache_->allow_decomp_fallback);\n        curt::Event e;\n        e.Record(stream);\n        ctx->CUDACtx()->Stream().Wait(e);\n      }\n      // Device cache\n      if (!d_page->empty()) {\n        auto out = out_impl->gidx_buffer.ToSpan().subspan(h_page->gidx_buffer.size_bytes() +\n                                                          c_page->first.DecompressedBytes());\n        CHECK_EQ(out.size_bytes(), d_page->size_bytes());\n        dh::safe_cuda(cudaMemcpyAsync(out.data(), d_page->data(), d_page->size_bytes(),\n                                      cudaMemcpyDefault, ctx->CUDACtx()->Stream()));\n      }\n    } else {\n      // Direct access\n      auto h_res = h_page->gidx_buffer.Resource();\n      CHECK(h_res->DataAs<common::CompressedByteT>() == h_page->gidx_buffer.data());\n      out_impl->gidx_buffer = common::RefResourceView<common::CompressedByteT>{\n          h_res->DataAs<common::CompressedByteT>(), h_page->gidx_buffer.size(), h_res};\n      CHECK(out_impl->d_gidx_buffer.empty());\n      if (!d_page->empty()) {\n        out_impl->d_gidx_buffer = common::RefResourceView<common::CompressedByteT const>{\n            d_page->data(), d_page->size(), d_page->Resource()};\n      }\n    }\n\n    out_impl->CopyInfo(h_page);\n  }\n};\n\n/**\n * EllpackHostCacheStream\n */\nEllpackHostCacheStream::EllpackHostCacheStream(std::shared_ptr<EllpackMemCache> cache)\n    : p_impl_{std::make_unique<EllpackHostCacheStreamImpl>(std::move(cache))} {}\n\nEllpackHostCacheStream::~EllpackHostCacheStream() = default;\n\nstd::shared_ptr<EllpackMemCache const> EllpackHostCacheStream::Share() const {\n  return p_impl_->Share();\n}\n\nvoid EllpackHostCacheStream::Seek(bst_idx_t offset_bytes) { this->p_impl_->Seek(offset_bytes); }\n\nvoid EllpackHostCacheStream::Read(Context const* ctx, EllpackPage* page, bool prefetch_copy) const {\n  this->p_impl_->Read(ctx, page, prefetch_copy);\n}\n\n[[nodiscard]] bool EllpackHostCacheStream::Write(EllpackPage const& page) {\n  return this->p_impl_->Write(page);\n}\n\n/**\n * EllpackCacheStreamPolicy\n */\ntemplate <typename S, template <typename> typename F>\n[[nodiscard]] std::unique_ptr<typename EllpackCacheStreamPolicy<S, F>::WriterT>\nEllpackCacheStreamPolicy<S, F>::CreateWriter(StringView, std::uint32_t iter) {\n  if (!this->p_cache_) {\n    CHECK(!detail::HostRatioIsAuto(this->CacheInfo().cache_host_ratio));\n    CHECK_GE(this->CacheInfo().cache_host_ratio, 0.0);\n    CHECK_LE(this->CacheInfo().cache_host_ratio, 1.0);\n    constexpr std::int32_t kMaxGpuExtMemWorkers = 4;\n    this->p_cache_ = std::make_unique<EllpackMemCache>(this->CacheInfo(), kMaxGpuExtMemWorkers);\n  }\n  auto fo = std::make_unique<EllpackHostCacheStream>(this->p_cache_);\n  if (iter == 0) {\n    CHECK(this->p_cache_->Empty());\n  } else {\n    fo->Seek(this->p_cache_->SizeBytes());\n  }\n  return fo;\n}\n\ntemplate <typename S, template <typename> typename F>\n[[nodiscard]] std::unique_ptr<typename EllpackCacheStreamPolicy<S, F>::ReaderT>\nEllpackCacheStreamPolicy<S, F>::CreateReader(StringView, bst_idx_t offset, bst_idx_t) const {\n  auto fi = std::make_unique<ReaderT>(this->p_cache_);\n  fi->Seek(offset);\n  return fi;\n}\n\n// Instantiation\ntemplate std::unique_ptr<\n    typename EllpackCacheStreamPolicy<EllpackPage, EllpackFormatPolicy>::WriterT>\nEllpackCacheStreamPolicy<EllpackPage, EllpackFormatPolicy>::CreateWriter(StringView name,\n                                                                         std::uint32_t iter);\n\ntemplate std::unique_ptr<\n    typename EllpackCacheStreamPolicy<EllpackPage, EllpackFormatPolicy>::ReaderT>\nEllpackCacheStreamPolicy<EllpackPage, EllpackFormatPolicy>::CreateReader(StringView name,\n                                                                         bst_idx_t offset,\n                                                                         bst_idx_t length) const;\n\n/**\n * EllpackMmapStreamPolicy\n */\n\ntemplate <typename S, template <typename> typename F>\n[[nodiscard]] std::unique_ptr<typename EllpackMmapStreamPolicy<S, F>::ReaderT>\nEllpackMmapStreamPolicy<S, F>::CreateReader(StringView name, bst_idx_t offset,\n                                            bst_idx_t length) const {\n  if (has_hmm_) {\n    return std::make_unique<common::PrivateCudaMmapConstStream>(name, offset, length);\n  } else {\n    return std::make_unique<common::PrivateMmapConstStream>(name, offset, length);\n  }\n}\n\n// Instantiation\ntemplate std::unique_ptr<\n    typename EllpackMmapStreamPolicy<EllpackPage, EllpackFormatPolicy>::ReaderT>\nEllpackMmapStreamPolicy<EllpackPage, EllpackFormatPolicy>::CreateReader(StringView name,\n                                                                        bst_idx_t offset,\n                                                                        bst_idx_t length) const;\n\nvoid CalcCacheMapping(Context const* ctx, bool is_dense,\n                      std::shared_ptr<common::HistogramCuts const> cuts,\n                      std::int64_t min_cache_page_bytes, ExternalDataInfo const& ext_info,\n                      bool is_validation, EllpackCacheInfo* cinfo) {\n  CHECK(cinfo->param.Initialized()) << \"Need to initialize scalar fields first.\";\n  auto ell_info = CalcNumSymbols(ctx, ext_info.row_stride, is_dense, cuts);\n\n  /**\n   * Configure the cache\n   */\n  // The total size of the cache.\n  std::size_t n_cache_bytes = 0;\n  for (std::size_t i = 0; i < ext_info.n_batches; ++i) {\n    auto n_samples = ext_info.base_rowids.at(i + 1) - ext_info.base_rowids[i];\n    auto n_bytes = common::CompressedBufferWriter::CalculateBufferSize(\n        ext_info.row_stride * n_samples, ell_info.n_symbols);\n    n_cache_bytes += n_bytes;\n  }\n  std::tie(cinfo->cache_host_ratio, min_cache_page_bytes) = detail::DftPageSizeHostRatio(\n      n_cache_bytes, is_validation, cinfo->cache_host_ratio, min_cache_page_bytes);\n\n  /**\n   * Calculate the cache buffer size\n   */\n  std::vector<std::size_t> cache_bytes;\n  std::vector<std::size_t> cache_mapping(ext_info.n_batches, 0);\n  std::vector<std::size_t> cache_rows;\n\n  for (std::size_t i = 0; i < ext_info.n_batches; ++i) {\n    auto n_samples = ext_info.base_rowids[i + 1] - ext_info.base_rowids[i];\n    auto n_bytes = common::CompressedBufferWriter::CalculateBufferSize(\n        ext_info.row_stride * n_samples, ell_info.n_symbols);\n\n    if (cache_bytes.empty()) {\n      // Push the first page\n      cache_bytes.push_back(n_bytes);\n      cache_rows.push_back(n_samples);\n    } else if (static_cast<decltype(min_cache_page_bytes)>(cache_bytes.back()) <\n               min_cache_page_bytes) {\n      // Concatenate to the previous page\n      cache_bytes.back() += n_bytes;\n      cache_rows.back() += n_samples;\n    } else {\n      // Push a new page\n      cache_bytes.push_back(n_bytes);\n      cache_rows.push_back(n_samples);\n    }\n    cache_mapping[i] = cache_bytes.size() - 1;\n  }\n\n  cinfo->cache_mapping = std::move(cache_mapping);\n  cinfo->buffer_bytes = std::move(cache_bytes);\n  cinfo->buffer_rows = std::move(cache_rows);\n\n  // Directly store in device if there's only one batch.\n  if (cinfo->NumBatchesCc() == 1) {\n    cinfo->cache_host_ratio = 0.0;\n  }\n\n  LOG(INFO) << \"`cache_host_ratio`=\" << cinfo->cache_host_ratio\n            << \" `min_cache_page_bytes`=\" << min_cache_page_bytes;\n}\n\n/**\n * EllpackPageSourceImpl\n */\ntemplate <typename F>\nvoid EllpackPageSourceImpl<F>::Fetch() {\n  curt::SetDevice(this->Device().ordinal);\n  if (!this->ReadCache()) {\n    if (this->Iter() != 0 && !this->sync_) {\n      // source is initialized to be the 0th page during construction, so when count_ is 0\n      // there's no need to increment the source.\n      ++(*this->source_);\n    }\n    // This is not read from cache so we still need it to be synced with sparse page source.\n    CHECK_EQ(this->Iter(), this->source_->Iter());\n    auto const& csr = this->source_->Page();\n    this->page_.reset(new EllpackPage{});\n    auto* impl = this->page_->Impl();\n    Context ctx = Context{}.MakeCUDA(this->Device().ordinal);\n    if (this->GetCuts()->HasCategorical()) {\n      CHECK(!this->feature_types_.empty());\n    }\n    *impl = EllpackPageImpl{&ctx, this->GetCuts(), *csr, is_dense_, row_stride_, feature_types_};\n    this->page_->SetBaseRowId(csr->base_rowid);\n    LOG(INFO) << \"Generated an Ellpack page with size: \"\n              << common::HumanMemUnit(impl->MemCostBytes())\n              << \" from a SparsePage with size:\" << common::HumanMemUnit(csr->MemCostBytes());\n    this->WriteCache();\n  }\n}\n\n// Instantiation\ntemplate void\nEllpackPageSourceImpl<DefaultFormatStreamPolicy<EllpackPage, EllpackFormatPolicy>>::Fetch();\ntemplate void\nEllpackPageSourceImpl<EllpackCacheStreamPolicy<EllpackPage, EllpackFormatPolicy>>::Fetch();\ntemplate void\nEllpackPageSourceImpl<EllpackMmapStreamPolicy<EllpackPage, EllpackFormatPolicy>>::Fetch();\n\n/**\n * ExtEllpackPageSourceImpl\n */\ntemplate <typename F>\nvoid ExtEllpackPageSourceImpl<F>::Fetch() {\n  curt::SetDevice(this->Device().ordinal);\n  if (!this->ReadCache()) {\n    auto iter = this->source_->Iter();\n    CHECK_EQ(this->Iter(), iter);\n    cuda_impl::DispatchAny(proxy_, [this](auto const& value) {\n      CHECK(this->proxy_->Ctx()->IsCUDA()) << \"All batches must use the same device type.\";\n      proxy_->Info().feature_types.SetDevice(dh::GetDevice(this->ctx_));\n      auto d_feature_types = proxy_->Info().feature_types.ConstDeviceSpan();\n      auto n_samples = value.NumRows();\n      if (this->GetCuts()->HasCategorical()) {\n        CHECK(!d_feature_types.empty());\n      }\n      dh::device_vector<size_t> row_counts(n_samples + 1, 0);\n      common::Span<size_t> row_counts_span(row_counts.data().get(), row_counts.size());\n      bst_idx_t row_stride = GetRowCounts(this->ctx_, value, row_counts_span,\n                                          dh::GetDevice(this->ctx_), this->missing_);\n      CHECK_LE(row_stride, this->ext_info_.row_stride);\n      this->page_.reset(new EllpackPage{});\n      *this->page_->Impl() = EllpackPageImpl{this->ctx_,\n                                             value,\n                                             this->missing_,\n                                             this->info_->IsDense(),\n                                             row_counts_span,\n                                             d_feature_types,\n                                             this->ext_info_.row_stride,\n                                             n_samples,\n                                             this->GetCuts()};\n      this->info_->Extend(proxy_->Info(), false, true);\n    });\n    LOG(DEBUG) << \"Generated an Ellpack page with size: \"\n               << common::HumanMemUnit(this->page_->Impl()->MemCostBytes())\n               << \" from an batch with estimated size: \"\n               << cuda_impl::DispatchAny<false>(proxy_, [](auto const& adapter) {\n                    return common::HumanMemUnit(adapter->SizeBytes());\n                  });\n    this->page_->SetBaseRowId(this->ext_info_.base_rowids.at(iter));\n    this->WriteCache();\n  }\n}\n\n// Instantiation\ntemplate void\nExtEllpackPageSourceImpl<DefaultFormatStreamPolicy<EllpackPage, EllpackFormatPolicy>>::Fetch();\ntemplate void\nExtEllpackPageSourceImpl<EllpackCacheStreamPolicy<EllpackPage, EllpackFormatPolicy>>::Fetch();\ntemplate void\nExtEllpackPageSourceImpl<EllpackMmapStreamPolicy<EllpackPage, EllpackFormatPolicy>>::Fetch();\n\nnamespace detail {\nvoid EllpackFormatCheckNuma(StringView msg) {\n#if defined(__linux__)\n  bool can_cross = common::NumaMemCanCross();\n  std::uint32_t numa = 0;\n  auto incorrect = [&numa] {\n    std::uint32_t cpu = 0;\n    return common::GetCpuNuma(&cpu, &numa) && static_cast<std::int32_t>(numa) != curt::GetNumaId();\n  };\n\n  if (can_cross && !common::GetNumaMemBind()) {\n    LOG(WARNING) << \"Running on a NUMA system without membind.\" << msg;\n  } else if (can_cross && incorrect()) {\n    LOG(WARNING) << \"Incorrect NUMA CPU bind, CPU node:\" << numa\n                 << \", GPU node:\" << curt::GetNumaId() << \".\" << msg;\n  }\n#else\n  (void)msg;\n#endif\n}\n}  // namespace detail\n}  // namespace xgboost::data\n"
  },
  {
    "path": "src/data/ellpack_page_source.h",
    "content": "/**\n * Copyright 2019-2025, XGBoost Contributors\n */\n\n#ifndef XGBOOST_DATA_ELLPACK_PAGE_SOURCE_H_\n#define XGBOOST_DATA_ELLPACK_PAGE_SOURCE_H_\n\n#include <cstdint>  // for int32_t\n#include <limits>   // for numeric_limits\n#include <memory>   // for shared_ptr\n#include <tuple>    // for tuple\n#include <utility>  // for move\n#include <vector>   // for vector\n\n#include \"../common/compressed_iterator.h\"  // for CompressedByteT\n#include \"../common/cuda_rt_utils.h\"        // for SupportsPageableMem, SupportsAts\n#include \"../common/device_compression.h\"   // for SnappyDecomprMgr\n#include \"../common/hist_util.h\"            // for HistogramCuts\n#include \"../common/ref_resource_view.h\"    // for RefResourceView\n#include \"../data/batch_utils.h\"            // for AutoHostRatio\n#include \"ellpack_page.h\"                   // for EllpackPage\n#include \"ellpack_page_raw_format.h\"        // for EllpackPageRawFormat\n#include \"sparse_page_source.h\"             // for PageSourceIncMixIn\n#include \"xgboost/base.h\"                   // for bst_idx_t\n#include \"xgboost/context.h\"                // for DeviceOrd\n#include \"xgboost/data.h\"                   // for BatchParam\n#include \"xgboost/span.h\"                   // for Span\n\nnamespace xgboost::curt {\nclass StreamPool;\n}\nnamespace xgboost::common::cuda_impl {\nclass HostPinnedMemPool;\n}  // namespace xgboost::common::cuda_impl\n\nnamespace xgboost::data {\nstruct EllpackCacheInfo {\n  BatchParam param;\n  // The size ratio the host cache vs. the total cache\n  double cache_host_ratio{::xgboost::cuda_impl::AutoHostRatio()};\n  float missing{std::numeric_limits<float>::quiet_NaN()};\n  // The ratio of the cache that can be compressed. Used for testing.\n  float hw_decomp_ratio{std::numeric_limits<float>::quiet_NaN()};\n  bool allow_decomp_fallback{false};\n  std::vector<bst_idx_t> cache_mapping;\n  std::vector<bst_idx_t> buffer_bytes;  // N bytes of the concatenated pages.\n  std::vector<bst_idx_t> buffer_rows;\n\n  EllpackCacheInfo() = default;\n  EllpackCacheInfo(BatchParam param, double h_ratio, float missing)\n      : param{std::move(param)}, cache_host_ratio{h_ratio}, missing{missing} {}\n  EllpackCacheInfo(BatchParam param, ExtMemConfig const& config)\n      : param{std::move(param)},\n        cache_host_ratio{config.cache_host_ratio},\n        missing{config.missing},\n        hw_decomp_ratio{config.hw_decomp_ratio},\n        allow_decomp_fallback{config.allow_decomp_fallback} {}\n\n  // Only effective for host-based cache.\n  // The number of batches for the concatenated cache.\n  [[nodiscard]] std::size_t NumBatchesCc() const { return this->buffer_rows.size(); }\n};\n\n// We need to decouple the storage and the view of the storage so that we can implement\n// concurrent read. As a result, there are two classes, one for cache storage, another one\n// for stream.\n//\n// This is a memory-based cache. It can be a mixed of the device memory and the host\n// memory.\nstruct EllpackMemCache {\n  // The host portion of each page.\n  std::vector<std::unique_ptr<EllpackPageImpl>> h_pages;\n  // The device portion of each page.\n  using DPage = common::RefResourceView<common::CompressedByteT>;\n  std::vector<DPage> d_pages;\n  // Storage for decompression parameters and the compressed buffer.\n  using CPage = std::pair<dc::SnappyDecomprMgr, common::RefResourceView<std::uint8_t>>;\n  // Compressed host page.\n  std::vector<CPage> c_pages;\n\n  using PagePtr = std::tuple<EllpackPageImpl const*, DPage const*, CPage const*>;\n  using PageRef = std::tuple<std::unique_ptr<EllpackPageImpl>&, DPage&, CPage&>;\n\n  std::vector<std::size_t> offsets;\n  // Size of each batch before concatenation.\n  std::vector<bst_idx_t> sizes_orig;\n  // Mapping of pages before concatenation to after concatenation.\n  std::vector<std::size_t> const cache_mapping;\n  // Cache info\n  std::vector<std::size_t> const buffer_bytes;\n  std::vector<bst_idx_t> const buffer_rows;\n  double const cache_host_ratio;\n  float const hw_decomp_ratio;\n  bool const allow_decomp_fallback;\n\n  std::unique_ptr<curt::StreamPool> streams;  // For decompression\n  std::shared_ptr<common::cuda_impl::HostPinnedMemPool> pool;\n\n  explicit EllpackMemCache(EllpackCacheInfo cinfo, std::int32_t n_workers);\n  ~EllpackMemCache();\n\n  // The number of bytes of the entire cache.\n  [[nodiscard]] std::size_t SizeBytes() const noexcept(true);\n  // The number of bytes of the device cache.\n  [[nodiscard]] std::size_t DeviceSizeBytes() const noexcept(true);\n  // The number of bytes of each page.\n  [[nodiscard]] std::size_t SizeBytes(std::size_t i) const noexcept(true);\n  // The number of bytes of the gradient index (ellpack).\n  [[nodiscard]] std::size_t GidxSizeBytes(std::size_t i) const noexcept(true);\n  // The number of bytes of the gradient index (ellpack) of the entire cache.\n  [[nodiscard]] std::size_t GidxSizeBytes() const noexcept(true);\n  // The number of pages in the cache.\n  [[nodiscard]] std::size_t Size() const { return this->h_pages.size(); }\n  // Is the cache empty?\n  [[nodiscard]] bool Empty() const { return this->SizeBytes() == 0; }\n  // No page concatenation is performed. If there's page concatenation, then the number of\n  // pages in the cache must be smaller than the input number of pages.\n  [[nodiscard]] bool NoConcat() const { return this->NumBatchesOrig() == this->buffer_rows.size(); }\n  // The number of pages before concatenatioin.\n  [[nodiscard]] bst_idx_t NumBatchesOrig() const { return cache_mapping.size(); }\n  // Get the pointers to the k^th concatenated page.\n  [[nodiscard]] PagePtr At(std::int32_t k) const;\n  // Get a reference to the last concatenated page.\n  [[nodiscard]] PageRef Back();\n};\n\n// Pimpl to hide CUDA calls from the host compiler.\nclass EllpackHostCacheStreamImpl;\n\n/**\n * @brief A view of the actual cache implemented by `EllpackHostCache`.\n */\nclass EllpackHostCacheStream {\n  std::unique_ptr<EllpackHostCacheStreamImpl> p_impl_;\n\n public:\n  explicit EllpackHostCacheStream(std::shared_ptr<EllpackMemCache> cache);\n  ~EllpackHostCacheStream();\n  /**\n   * @brief Get a shared handler to the cache.\n   */\n  std::shared_ptr<EllpackMemCache const> Share() const;\n  /**\n   * @brief Stream seek.\n   *\n   * @param offset_bytes This must align to the actual cached page size.\n   */\n  void Seek(bst_idx_t offset_bytes);\n  /**\n   * @brief Read a page from the cache.\n   *\n   * The read page might be concatenated during page write.\n   *\n   * @param page[out] The returned page.\n   * @param prefetch_copy[in] Does the stream need to copy the page?\n   */\n  void Read(Context const* ctx, EllpackPage* page, bool prefetch_copy) const;\n  /**\n   * @brief Add a new page to the host cache.\n   *\n   * This method might append the input page to a previously stored page to increase\n   * individual page size.\n   *\n   * @return Whether a new cache page is create. False if the new page is appended to the\n   * previous one.\n   */\n  [[nodiscard]] bool Write(EllpackPage const& page);\n};\n\nnamespace detail {\n// Not a member of `EllpackFormatPolicy`. Hide the impl without requiring template specialization.\nvoid EllpackFormatCheckNuma(StringView msg);\n}  // namespace detail\n\ntemplate <typename S>\nclass EllpackFormatPolicy {\n  std::shared_ptr<common::HistogramCuts const> cuts_{nullptr};\n  DeviceOrd device_;\n  bool has_hmm_{curt::SupportsPageableMem()};\n\n  EllpackCacheInfo cache_info_;\n  static_assert(std::is_same_v<S, EllpackPage>);\n\n public:\n  using FormatT = EllpackPageRawFormat;\n\n public:\n  EllpackFormatPolicy() {\n    StringView msg{\" The overhead of iterating through external memory might be significant.\"};\n    if (!(has_hmm_ || curt::SupportsAts())) {\n      LOG(WARNING) << \"CUDA heterogeneous memory management is not available.\" << msg;\n    }\n    if (!(GlobalConfigThreadLocalStore::Get()->use_rmm ||\n          GlobalConfigThreadLocalStore::Get()->use_cuda_async_pool)) {\n      LOG(WARNING) << \"Neither `use_rmm` nor `use_cuda_async_pool` is enabled.\" << msg;\n    }\n    if (GlobalConfigThreadLocalStore::Get()->use_rmm) {\n#if !defined(XGBOOST_USE_RMM)\n      LOG(WARNING) << \"XGBoost is not built with RMM support. But the `use_rmm` flag is enabled.\";\n#endif\n    }\n    std::int32_t major{0}, minor{0};\n    curt::GetDrVersionGlobal(&major, &minor);\n    if ((major < 12 || (major == 12 && minor < 7)) && curt::SupportsAts()) {\n      // Use ATS, but with an old kernel driver.\n      LOG(WARNING) << \"Using an old kernel driver with supported CTK<12.7.\"\n                   << \"The latest version of CTK supported by the current driver: \" << major << \".\"\n                   << minor << \".\" << msg;\n    }\n    detail::EllpackFormatCheckNuma(msg);\n  }\n  // For testing with the HMM flag.\n  explicit EllpackFormatPolicy(bool has_hmm) : has_hmm_{has_hmm} {}\n\n  [[nodiscard]] auto CreatePageFormat(BatchParam const& param) const {\n    CHECK_EQ(cuts_->cut_values_.Device(), device_);\n    std::unique_ptr<FormatT> fmt{new EllpackPageRawFormat{cuts_, device_, param, has_hmm_}};\n    return fmt;\n  }\n  void SetCuts(std::shared_ptr<common::HistogramCuts const> cuts, DeviceOrd device,\n               EllpackCacheInfo cinfo) {\n    std::swap(this->cuts_, cuts);\n    this->device_ = device;\n    CHECK(this->device_.IsCUDA());\n    this->cache_info_ = std::move(cinfo);\n  }\n  [[nodiscard]] auto GetCuts() const {\n    CHECK(cuts_);\n    return cuts_;\n  }\n  [[nodiscard]] auto Device() const { return this->device_; }\n  [[nodiscard]] auto const& CacheInfo() { return this->cache_info_; }\n};\n\ntemplate <typename S, template <typename> typename F>\nclass EllpackCacheStreamPolicy : public F<S> {\n  std::shared_ptr<EllpackMemCache> p_cache_;\n\n public:\n  using WriterT = EllpackHostCacheStream;\n  using ReaderT = EllpackHostCacheStream;\n\n public:\n  EllpackCacheStreamPolicy() = default;\n  [[nodiscard]] std::unique_ptr<WriterT> CreateWriter(StringView name, std::uint32_t iter);\n\n  [[nodiscard]] std::unique_ptr<ReaderT> CreateReader(StringView name, bst_idx_t offset,\n                                                      bst_idx_t length) const;\n  std::shared_ptr<EllpackMemCache const> Share() const { return p_cache_; }\n};\n\ntemplate <typename S, template <typename> typename F>\nclass EllpackMmapStreamPolicy : public F<S> {\n  bool has_hmm_{curt::SupportsPageableMem()};\n\n public:\n  using WriterT = common::AlignedFileWriteStream;\n  using ReaderT = common::AlignedResourceReadStream;\n\n public:\n  EllpackMmapStreamPolicy() = default;\n  // For testing with the HMM flag.\n  template <\n      typename std::enable_if_t<std::is_same_v<F<S>, EllpackFormatPolicy<EllpackPage>>>* = nullptr>\n  explicit EllpackMmapStreamPolicy(bool has_hmm) : F<S>{has_hmm}, has_hmm_{has_hmm} {}\n\n  [[nodiscard]] std::unique_ptr<WriterT> CreateWriter(StringView name, std::uint32_t iter) {\n    std::unique_ptr<common::AlignedFileWriteStream> fo;\n    if (iter == 0) {\n      fo = std::make_unique<common::AlignedFileWriteStream>(name, \"wb\");\n    } else {\n      fo = std::make_unique<common::AlignedFileWriteStream>(name, \"ab\");\n    }\n    return fo;\n  }\n\n  [[nodiscard]] std::unique_ptr<ReaderT> CreateReader(StringView name, bst_idx_t offset,\n                                                      bst_idx_t length) const;\n};\n\n/**\n * @brief Calculate the size of each internal cached page along with the mapping of old\n *        pages to the new pages.\n */\nvoid CalcCacheMapping(Context const* ctx, bool is_dense,\n                      std::shared_ptr<common::HistogramCuts const> cuts,\n                      std::int64_t min_cache_page_bytes, ExternalDataInfo const& ext_info,\n                      bool is_validation, EllpackCacheInfo* cinfo);\n\n/**\n * @brief Ellpack source with sparse pages as the underlying source.\n */\ntemplate <typename F>\nclass EllpackPageSourceImpl : public PageSourceIncMixIn<EllpackPage, F> {\n  using Super = PageSourceIncMixIn<EllpackPage, F>;\n  bool is_dense_;\n  bst_idx_t row_stride_;\n  BatchParam param_;\n  common::Span<FeatureType const> feature_types_;\n\n public:\n  EllpackPageSourceImpl(Context const* ctx, bst_feature_t n_features, std::size_t n_batches,\n                        std::shared_ptr<Cache> cache, std::shared_ptr<common::HistogramCuts> cuts,\n                        bool is_dense, bst_idx_t row_stride,\n                        common::Span<FeatureType const> feature_types,\n                        std::shared_ptr<SparsePageSource> source, EllpackCacheInfo const& cinfo)\n      : Super{cinfo.missing, ctx->Threads(), n_features, n_batches, cache, false},\n        is_dense_{is_dense},\n        row_stride_{row_stride},\n        param_{std::move(cinfo.param)},\n        feature_types_{feature_types} {\n    this->source_ = source;\n    cuts->SetDevice(ctx->Device());\n    this->SetCuts(std::move(cuts), ctx->Device(), cinfo);\n    this->Fetch();\n  }\n\n  void Fetch() final;\n};\n\n// Cache to host\nusing EllpackPageHostSource =\n    EllpackPageSourceImpl<EllpackCacheStreamPolicy<EllpackPage, EllpackFormatPolicy>>;\n\n// Cache to disk\nusing EllpackPageSource =\n    EllpackPageSourceImpl<EllpackMmapStreamPolicy<EllpackPage, EllpackFormatPolicy>>;\n\n/**\n * @brief Ellpack source directly interfaces with user-defined iterators.\n */\ntemplate <typename FormatCreatePolicy>\nclass ExtEllpackPageSourceImpl : public ExtQantileSourceMixin<EllpackPage, FormatCreatePolicy> {\n  using Super = ExtQantileSourceMixin<EllpackPage, FormatCreatePolicy>;\n\n  Context const* ctx_;\n  BatchParam p_;\n  DMatrixProxy* proxy_;\n  MetaInfo* info_;\n  ExternalDataInfo ext_info_;\n\n public:\n  ExtEllpackPageSourceImpl(\n      Context const* ctx, MetaInfo* info, ExternalDataInfo ext_info, std::shared_ptr<Cache> cache,\n      std::shared_ptr<common::HistogramCuts> cuts,\n      std::shared_ptr<DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>> source,\n      DMatrixProxy* proxy, EllpackCacheInfo const& cinfo)\n      : Super{cinfo.missing, ctx->Threads(), static_cast<bst_feature_t>(info->num_col_), source,\n              cache},\n        ctx_{ctx},\n        p_{cinfo.param},\n        proxy_{proxy},\n        info_{info},\n        ext_info_{std::move(ext_info)} {\n    cuts->SetDevice(ctx->Device());\n    this->SetCuts(std::move(cuts), ctx->Device(), cinfo);\n    CHECK(!this->cache_info_->written);\n    this->source_->Reset();\n    CHECK(this->source_->Next());\n    this->Fetch();\n  }\n\n  void Fetch() final;\n  // Need a specialized end iter as we can concatenate pages.\n  void EndIter() final {\n    if (this->cache_info_->written) {\n      CHECK_EQ(this->Iter(), this->cache_info_->Size());\n    } else {\n      CHECK_LE(this->cache_info_->Size(), this->ext_info_.n_batches);\n    }\n    this->cache_info_->Commit();\n    CHECK_GE(this->count_, 1);\n    this->count_ = 0;\n  }\n};\n\n// Cache to host\nusing ExtEllpackPageHostSource =\n    ExtEllpackPageSourceImpl<EllpackCacheStreamPolicy<EllpackPage, EllpackFormatPolicy>>;\n\n// Cache to disk\nusing ExtEllpackPageSource =\n    ExtEllpackPageSourceImpl<EllpackMmapStreamPolicy<EllpackPage, EllpackFormatPolicy>>;\n\n#if !defined(XGBOOST_USE_CUDA)\ntemplate <typename F>\ninline void EllpackPageSourceImpl<F>::Fetch() {\n  // silent the warning about unused variables.\n  (void)(row_stride_);\n  (void)(is_dense_);\n  common::AssertGPUSupport();\n}\n\ntemplate <typename F>\ninline void ExtEllpackPageSourceImpl<F>::Fetch() {\n  common::AssertGPUSupport();\n}\n#endif  // !defined(XGBOOST_USE_CUDA)\n}  // namespace xgboost::data\n\n#endif  // XGBOOST_DATA_ELLPACK_PAGE_SOURCE_H_\n"
  },
  {
    "path": "src/data/entry.h",
    "content": "/**\n *  Copyright 2019-2025, XGBoost Contributors\n */\n#pragma once\n\n#include \"../common/math.h\"  // for CheckNAN\n#include \"xgboost/base.h\"    // for bst_idx_t\n#include \"xgboost/data.h\"    // for Entry\n\nnamespace xgboost::data {\nstruct COOTuple {\n  COOTuple() = default;\n  XGBOOST_DEVICE COOTuple(bst_idx_t row_idx, bst_idx_t column_idx, float value)\n      : row_idx(row_idx), column_idx(column_idx), value(value) {}\n\n  bst_idx_t row_idx{0};\n  bst_idx_t column_idx{0};\n  float value{0};\n};\n\nstruct IsValidFunctor {\n  float missing;\n\n  XGBOOST_DEVICE explicit IsValidFunctor(float missing) : missing(missing) {}\n\n  XGBOOST_DEVICE bool operator()(float value) const {\n    return !(common::CheckNAN(value) || value == missing);\n  }\n\n  XGBOOST_DEVICE bool operator()(const data::COOTuple& e) const {\n    return !(common::CheckNAN(e.value) || e.value == missing);\n  }\n\n  XGBOOST_DEVICE bool operator()(const Entry& e) const {\n    return !(common::CheckNAN(e.fvalue) || e.fvalue == missing);\n  }\n};\n}  // namespace xgboost::data\n"
  },
  {
    "path": "src/data/extmem_quantile_dmatrix.cc",
    "content": "/**\n * Copyright 2024-2025, XGBoost Contributors\n */\n#include \"extmem_quantile_dmatrix.h\"\n\n#include <memory>  // for shared_ptr\n#include <string>  // for string\n#include <vector>  // for vector\n\n#include \"../common/error_msg.h\"    // for CacheHostRatio, InconsistentMaxBin\n#include \"../tree/param.h\"          // FIXME(jiamingy): Find a better way to share this parameter.\n#include \"batch_utils.h\"            // for CheckParam, RegenGHist\n#include \"proxy_dmatrix.h\"          // for DataIterProxy\n#include \"quantile_dmatrix.h\"       // for GetDataShape, MakeSketches\n#include \"simple_batch_iterator.h\"  // for SimpleBatchIteratorImpl\n#include \"sparse_page_source.h\"     // for MakeCachePrefix\n\n#if !defined(XGBOOST_USE_CUDA)\n#include \"../common/common.h\"  // for AssertGPUSupport\n#endif\n\nnamespace xgboost::data {\nExtMemQuantileDMatrix::ExtMemQuantileDMatrix(DataIterHandle iter_handle, DMatrixHandle proxy,\n                                             std::shared_ptr<DMatrix> ref,\n                                             DataIterResetCallback *reset,\n                                             XGDMatrixCallbackNext *next, bst_bin_t max_bin,\n                                             ExtMemConfig const &config)\n    : cache_prefix_{config.cache}, on_host_{config.on_host} {\n  cache_prefix_ = MakeCachePrefix(cache_prefix_);\n  auto iter = std::make_shared<DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>>(\n      iter_handle, reset, next);\n  iter->Reset();\n  // Fetch the first iter\n  bool valid = iter->Next();\n  CHECK(valid) << \"Qauntile DMatrix must have at least 1 batch.\";\n\n  auto pctx = MakeProxy(proxy)->Ctx();\n  Context ctx;\n  ctx.Init(Args{{\"nthread\", std::to_string(config.n_threads)}, {\"device\", pctx->DeviceName()}});\n\n  BatchParam p{max_bin, tree::TrainParam::DftSparseThreshold()};\n  if (ctx.IsCPU()) {\n    CHECK(detail::HostRatioIsAuto(config.cache_host_ratio)) << error::CacheHostRatioNotImpl();\n    this->InitFromCPU(&ctx, iter, proxy, p, config.missing, ref);\n  } else {\n    p.n_prefetch_batches = ::xgboost::cuda_impl::DftPrefetchBatches();\n    this->InitFromCUDA(&ctx, iter, proxy, p, ref, config);\n  }\n  this->batch_ = p;\n  this->fmat_ctx_ = ctx;\n\n  SyncCategories(&ctx, info_.Cats(), info_.num_row_ == 0);\n}\n\nExtMemQuantileDMatrix::~ExtMemQuantileDMatrix() {\n  // Clear out all resources before deleting the cache file.\n  ghist_index_source_.reset();\n  std::visit([](auto &&ptr) { ptr.reset(); }, ellpack_page_source_);\n\n  DeleteCacheFiles(cache_info_);\n}\n\nBatchSet<ExtSparsePage> ExtMemQuantileDMatrix::GetExtBatches(Context const *, BatchParam const &) {\n  LOG(FATAL) << \"Not implemented for `ExtMemQuantileDMatrix`.\";\n  auto begin_iter =\n      BatchIterator<ExtSparsePage>(new SimpleBatchIteratorImpl<ExtSparsePage>(nullptr));\n  return BatchSet<ExtSparsePage>{begin_iter};\n}\n\nvoid ExtMemQuantileDMatrix::InitFromCPU(\n    Context const *ctx,\n    std::shared_ptr<DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>> iter,\n    DMatrixHandle proxy_handle, BatchParam const &p, float missing, std::shared_ptr<DMatrix> ref) {\n  xgboost_NVTX_FN_RANGE();\n\n  auto proxy = MakeProxy(proxy_handle);\n  CHECK(proxy);\n\n  common::HistogramCuts cuts{0};\n  ExternalDataInfo ext_info;\n  cpu_impl::GetDataShape(ctx, proxy, iter.get(), missing, &ext_info);\n  ext_info.SetInfo(ctx, true, &this->info_);\n\n  this->n_batches_ = ext_info.n_batches;\n\n  /**\n   * Generate quantiles\n   */\n  std::vector<FeatureType> h_ft;\n  cpu_impl::MakeSketches(ctx, iter.get(), proxy, ref, missing, &cuts, p, this->info_, ext_info,\n                         &h_ft);\n\n  /**\n   * Generate gradient index\n   */\n  auto id = MakeCache(this, \".gradient_index.page\", false, cache_prefix_, &cache_info_);\n  this->ghist_index_source_ = std::make_unique<ExtGradientIndexPageSource>(\n      ctx, missing, &this->info_, cache_info_.at(id), p, cuts, iter, proxy, ext_info.base_rowids);\n\n  /**\n   * Force initialize the cache and do some sanity checks along the way\n   */\n  bst_idx_t batch_cnt = 0, k = 0;\n  bst_idx_t n_total_samples = 0;\n  for (auto const &page : this->GetGradientIndexImpl()) {\n    n_total_samples += page.Size();\n    CHECK_EQ(page.base_rowid, ext_info.base_rowids[k]);\n    CHECK_EQ(page.Features(), this->info_.num_col_);\n    ++k, ++batch_cnt;\n  }\n  CHECK_EQ(batch_cnt, ext_info.n_batches);\n  CHECK_EQ(n_total_samples, ext_info.accumulated_rows);\n  if (cuts.HasCategorical()) {\n    CHECK(!this->info_.feature_types.Empty());\n  }\n  CHECK_EQ(cuts.HasCategorical(), this->info_.HasCategorical());\n}\n\n[[nodiscard]] BatchSet<GHistIndexMatrix> ExtMemQuantileDMatrix::GetGradientIndexImpl() {\n  return BatchSet{BatchIterator<GHistIndexMatrix>{this->ghist_index_source_}};\n}\n\nBatchSet<GHistIndexMatrix> ExtMemQuantileDMatrix::GetGradientIndex(Context const *,\n                                                                   BatchParam const &param) {\n  if (param.Initialized()) {\n    detail::CheckParam(this->batch_, param);\n    CHECK(!detail::RegenGHist(param, batch_)) << error::InconsistentMaxBin();\n  }\n\n  CHECK(this->ghist_index_source_)\n      << \"The `ExtMemQuantileDMatrix` is initialized using GPU data, cannot be used for CPU.\";\n  this->ghist_index_source_->Reset(param);\n\n  if (!std::isnan(param.sparse_thresh) &&\n      param.sparse_thresh != tree::TrainParam::DftSparseThreshold()) {\n    LOG(WARNING) << \"`sparse_threshold` can not be changed when `QuantileDMatrix` is used instead \"\n                    \"of `DMatrix`.\";\n  }\n\n  return this->GetGradientIndexImpl();\n}\n\n#if !defined(XGBOOST_USE_CUDA)\nvoid ExtMemQuantileDMatrix::InitFromCUDA(\n    Context const *, std::shared_ptr<DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>>,\n    DMatrixHandle, BatchParam const &, std::shared_ptr<DMatrix>, ExtMemConfig const &) {\n  common::AssertGPUSupport();\n}\n\nBatchSet<EllpackPage> ExtMemQuantileDMatrix::GetEllpackBatches(Context const *,\n                                                               const BatchParam &) {\n  common::AssertGPUSupport();\n  auto batch_set = std::visit([](auto &&ptr) { return BatchSet{BatchIterator<EllpackPage>{ptr}}; },\n                              this->ellpack_page_source_);\n  return batch_set;\n}\n\nBatchSet<EllpackPage> ExtMemQuantileDMatrix::GetEllpackPageImpl() {\n  common::AssertGPUSupport();\n  auto batch_set = std::visit([](auto &&ptr) { return BatchSet{BatchIterator<EllpackPage>{ptr}}; },\n                              this->ellpack_page_source_);\n  return batch_set;\n}\n#endif\n}  // namespace xgboost::data\n"
  },
  {
    "path": "src/data/extmem_quantile_dmatrix.cu",
    "content": "/**\n * Copyright 2024-2025, XGBoost Contributors\n *\n * The @ref ExtMemQuantileDMatrix for GPU prefetches 2 pages by default and can optionally\n * cache page in the device memory for the validation DMatrix. In addition, it can\n * concatenate user-provded pages to form larger @ref EllpackPage to avoid small GPU\n * kernels.\n *\n * Given 1 training DMatrix and 1 validation DMatrix, we can have at most 4 pages in the\n * device memory. 2 from prefetched training DMatrix, 2 from prefetched validation\n * DMatrix. If set the minimum @ref EllpackPage to 12GB in a 96GB GPU, 4 pages have 48GB\n * size in total. Accounting for memory fragmentation, we still have some room in the\n * device that can be used as a faster cache.\n */\n\n#include <memory>   // for shared_ptr\n#include <variant>  // for visit, get_if\n\n#include \"../common/nvtx_utils.h\"  // for xgboost_NVTX_FN_RANGE\n#include \"batch_utils.h\"           // for CheckParam, RegenGHist\n#include \"batch_utils.h\"           // for AutoCachePageBytes\n#include \"ellpack_page.cuh\"        // for EllpackPage\n#include \"extmem_quantile_dmatrix.h\"\n#include \"proxy_dmatrix.h\"    // for DataIterProxy\n#include \"xgboost/context.h\"  // for Context\n#include \"xgboost/data.h\"     // for BatchParam\n\nnamespace xgboost::data {\nnamespace detail {\n[[nodiscard]] std::int64_t DftMinCachePageBytes(std::int64_t min_cache_page_bytes) {\n  // Set to 0 if it should match the user input size.\n  if (::xgboost::cuda_impl::AutoCachePageBytes() == min_cache_page_bytes) {\n    double n_total_bytes = curt::TotalMemory();\n    min_cache_page_bytes = n_total_bytes * xgboost::cuda_impl::CachePageRatio();\n  }\n  return min_cache_page_bytes;\n}\n}  // namespace detail\n\nvoid ExtMemQuantileDMatrix::InitFromCUDA(\n    Context const *ctx,\n    std::shared_ptr<DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>> iter,\n    DMatrixHandle proxy_handle, BatchParam const &p, std::shared_ptr<DMatrix> ref,\n    ExtMemConfig const &config) {\n  xgboost_NVTX_FN_RANGE();\n\n  // A handle passed to external iterator.\n  auto proxy = MakeProxy(proxy_handle);\n  CHECK(proxy);\n\n  /**\n   * Generate quantiles\n   */\n  auto cuts = std::make_shared<common::HistogramCuts>(0);\n  ExternalDataInfo ext_info;\n  cuda_impl::MakeSketches(ctx, iter.get(), proxy, ref, p, config.missing, cuts, this->info_,\n                          &ext_info);\n  ext_info.SetInfo(ctx, true, &this->info_);\n\n  /**\n   * Calculate cache info\n   */\n  auto is_validation = (ref != nullptr);\n  auto cinfo = EllpackCacheInfo{p, config};\n  CalcCacheMapping(ctx, this->info_.IsDense(), cuts, config.min_cache_page_bytes, ext_info,\n                   is_validation, &cinfo);\n  CHECK_EQ(cinfo.cache_mapping.size(), ext_info.n_batches);\n  CHECK_GE(cinfo.cache_host_ratio, 0.0);\n  CHECK_LE(cinfo.cache_host_ratio, 1.0);\n  auto n_batches = cinfo.NumBatchesCc();\n  LOG(INFO) << \"Number of batches after concatenation:\" << n_batches;\n\n  /**\n   * Generate gradient index\n   */\n  auto id = MakeCache(this, \".ellpack.page\", this->on_host_, cache_prefix_, &cache_info_);\n  if (on_host_ && std::get_if<EllpackHostPtr>(&ellpack_page_source_) == nullptr) {\n    ellpack_page_source_.emplace<EllpackHostPtr>(nullptr);\n  }\n\n  std::visit(\n      [&](auto &&ptr) {\n        using SourceT = typename std::remove_reference_t<decltype(ptr)>::element_type;\n        ptr = std::make_shared<SourceT>(ctx, &this->info_, ext_info, cache_info_.at(id), cuts, iter,\n                                        proxy, cinfo);\n      },\n      ellpack_page_source_);\n\n  /**\n   * Force initialize the cache and do some sanity checks along the way\n   */\n  bst_idx_t batch_cnt = 0, k = 0;\n  bst_idx_t n_total_samples = 0;\n  for (auto const &page : this->GetEllpackPageImpl()) {\n    n_total_samples += page.Size();\n    CHECK_EQ(page.Impl()->base_rowid, ext_info.base_rowids[k]);\n    CHECK_EQ(page.Impl()->info.row_stride, ext_info.row_stride);\n    ++k, ++batch_cnt;\n  }\n  CHECK_EQ(batch_cnt, ext_info.n_batches);\n  CHECK_EQ(n_total_samples, ext_info.accumulated_rows);\n\n  if (this->on_host_) {\n    CHECK_EQ(this->cache_info_.at(id)->Size(), n_batches);\n  } else {\n    CHECK_EQ(this->cache_info_.at(id)->Size(), ext_info.n_batches);\n  }\n  this->n_batches_ = this->cache_info_.at(id)->Size();\n  if (cuts->HasCategorical()) {\n    CHECK(!this->info_.feature_types.Empty());\n  }\n  CHECK_EQ(cuts->HasCategorical(), this->info_.HasCategorical());\n}\n\n[[nodiscard]] BatchSet<EllpackPage> ExtMemQuantileDMatrix::GetEllpackPageImpl() {\n  auto batch_set =\n      std::visit([this](auto &&ptr) { return BatchSet{BatchIterator<EllpackPage>{ptr}}; },\n                 this->ellpack_page_source_);\n  return batch_set;\n}\n\nBatchSet<EllpackPage> ExtMemQuantileDMatrix::GetEllpackBatches(Context const *,\n                                                               const BatchParam &param) {\n  if (param.Initialized()) {\n    detail::CheckParam(this->batch_, param);\n    CHECK(!detail::RegenGHist(param, batch_)) << error::InconsistentMaxBin();\n  }\n\n  std::visit(\n      [this, param](auto &&ptr) {\n        CHECK(ptr)\n            << \"The `ExtMemQuantileDMatrix` is initialized using CPU data, cannot be used for GPU.\";\n        ptr->Reset(param);\n      },\n      this->ellpack_page_source_);\n\n  return this->GetEllpackPageImpl();\n}\n}  // namespace xgboost::data\n"
  },
  {
    "path": "src/data/extmem_quantile_dmatrix.h",
    "content": "/**\n * Copyright 2024, XGBoost Contributors\n */\n#pragma once\n\n#include <map>      // for map\n#include <memory>   // for shared_ptr\n#include <string>   // for string\n#include <variant>  // for variant\n\n#include \"ellpack_page_source.h\"         // for EllpackPageSource, EllpackPageHostSource\n#include \"gradient_index_page_source.h\"  // for GradientIndexPageSource\n#include \"quantile_dmatrix.h\"            // for QuantileDMatrix, ExternalIter\n#include \"xgboost/base.h\"                // for bst_bin_t\n#include \"xgboost/c_api.h\"               // for DataIterHandle, DMatrixHandle\n#include \"xgboost/context.h\"             // for Context\n#include \"xgboost/data.h\"                // for MetaInfo, BatchParam\n\nnamespace xgboost::data {\n/**\n * @brief A DMatrix class for building a `QuantileDMatrix` from external memory iterator.\n *\n * This is a combination of `IterativeDMatrix` and the `SparsePageDMatrix` . It builds\n * gradient index directly from iterator inputs without going through the `SparsePage`,\n * similar to how the `IterativeDMatrix` works. Also, simlar to the `SparsePageDMatrix`,\n * it caches the gradient index and fetch them in batches on demand.\n */\nclass ExtMemQuantileDMatrix : public QuantileDMatrix {\n public:\n  ExtMemQuantileDMatrix(DataIterHandle iter_handle, DMatrixHandle proxy,\n                        std::shared_ptr<DMatrix> ref, DataIterResetCallback *reset,\n                        XGDMatrixCallbackNext *next, bst_bin_t max_bin, ExtMemConfig const &config);\n  ~ExtMemQuantileDMatrix() override;\n\n  [[nodiscard]] std::int32_t NumBatches() const override { return n_batches_; }\n\n private:\n  void InitFromCPU(\n      Context const *ctx,\n      std::shared_ptr<DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>> iter,\n      DMatrixHandle proxy, BatchParam const &p, float missing, std::shared_ptr<DMatrix> ref);\n  void InitFromCUDA(\n      Context const *ctx,\n      std::shared_ptr<DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>> iter,\n      DMatrixHandle proxy_handle, BatchParam const &p, std::shared_ptr<DMatrix> ref,\n      ExtMemConfig const &config);\n\n  [[nodiscard]] BatchSet<GHistIndexMatrix> GetGradientIndexImpl();\n  BatchSet<GHistIndexMatrix> GetGradientIndex(Context const *ctx, BatchParam const &param) override;\n\n  [[nodiscard]] BatchSet<EllpackPage> GetEllpackPageImpl();\n  BatchSet<EllpackPage> GetEllpackBatches(Context const *ctx, const BatchParam &param) override;\n\n  [[nodiscard]] bool EllpackExists() const override {\n    return std::visit([](auto &&v) { return static_cast<bool>(v); }, ellpack_page_source_);\n  }\n  [[nodiscard]] bool GHistIndexExists() const override {\n    return static_cast<bool>(ghist_index_source_);\n  }\n\n  [[nodiscard]] BatchSet<ExtSparsePage> GetExtBatches(Context const *ctx,\n                                                      BatchParam const &param) override;\n\n  std::map<std::string, std::shared_ptr<Cache>> cache_info_;\n  std::string cache_prefix_;\n  bool const on_host_;\n  BatchParam batch_;\n  bst_idx_t n_batches_{0};\n\n  using EllpackDiskPtr = std::shared_ptr<ExtEllpackPageSource>;\n  using EllpackHostPtr = std::shared_ptr<ExtEllpackPageHostSource>;\n  std::variant<EllpackDiskPtr, EllpackHostPtr> ellpack_page_source_;\n  std::shared_ptr<ExtGradientIndexPageSource> ghist_index_source_;\n};\n}  // namespace xgboost::data\n"
  },
  {
    "path": "src/data/file_iterator.cc",
    "content": "/**\n * Copyright 2021-2024, XGBoost contributors\n */\n#include \"file_iterator.h\"\n\n#include <xgboost/logging.h>  // for LogCheck_EQ, LogCheck_LE, CHECK_EQ, CHECK_LE, LOG, LOG_...\n\n#include <filesystem>  // for weakly_canonical, path, u8path\n#include <map>         // for map, operator==\n#include <ostream>     // for operator<<, basic_ostream, istringstream\n#include <vector>      // for vector\n\n#include \"../common/common.h\"  // for Split\n#include \"xgboost/linalg.h\"    // for ArrayInterfaceStr, MakeVec\n#include \"xgboost/linalg.h\"\n#include \"xgboost/logging.h\"      // for CHECK\n#include \"xgboost/string_view.h\"  // for operator<<, StringView\n\nnamespace xgboost::data {\nstd::string ValidateFileFormat(std::string const& uri) {\n  std::vector<std::string> name_args_cache = common::Split(uri, '#');\n  CHECK_LE(name_args_cache.size(), 2)\n      << \"Only one `#` is allowed in file path for cachefile specification\";\n\n  std::vector<std::string> name_args = common::Split(name_args_cache[0], '?');\n  StringView msg{\"URI parameter `format` is required for loading text data: filename?format=csv\"};\n  CHECK_EQ(name_args.size(), 2) << msg;\n\n  std::map<std::string, std::string> args;\n  std::vector<std::string> arg_list = common::Split(name_args[1], '&');\n  for (size_t i = 0; i < arg_list.size(); ++i) {\n    std::istringstream is(arg_list[i]);\n    std::pair<std::string, std::string> kv;\n    CHECK(std::getline(is, kv.first, '='))\n        << \"Invalid uri argument format\" << \" for key in arg \" << i + 1;\n    CHECK(std::getline(is, kv.second))\n        << \"Invalid uri argument format\" << \" for value in arg \" << i + 1;\n    args.insert(kv);\n  }\n  if (args.find(\"format\") == args.cend()) {\n    LOG(FATAL) << msg;\n  }\n\n  auto path = common::Split(uri, '?')[0];\n\n  namespace fs = std::filesystem;\n  name_args[0] = fs::weakly_canonical(fs::u8path(path)).string();\n  if (name_args_cache.size() == 1) {\n    return name_args[0] + \"?\" + name_args[1];\n  } else {\n    return name_args[0] + \"?\" + name_args[1] + '#' + name_args_cache[1];\n  }\n}\n\nint FileIterator::Next() {\n  CHECK(parser_);\n  if (parser_->Next()) {\n    row_block_ = parser_->Value();\n\n    indptr_ = linalg::Make1dInterface(row_block_.offset, row_block_.size + 1);\n    values_ = linalg::Make1dInterface(row_block_.value, row_block_.offset[row_block_.size]);\n    indices_ = linalg::Make1dInterface(row_block_.index, row_block_.offset[row_block_.size]);\n\n    size_t n_columns =\n        *std::max_element(row_block_.index, row_block_.index + row_block_.offset[row_block_.size]);\n    // dmlc parser converts 1-based indexing back to 0-based indexing so we can ignore\n    // this condition and just add 1 to n_columns\n    n_columns += 1;\n\n    XGProxyDMatrixSetDataCSR(proxy_, indptr_.c_str(), indices_.c_str(), values_.c_str(), n_columns);\n\n    if (row_block_.label) {\n      auto str = linalg::Make1dInterface(row_block_.label, row_block_.size);\n      XGDMatrixSetInfoFromInterface(proxy_, \"label\", str.c_str());\n    }\n    if (row_block_.qid) {\n      auto str = linalg::Make1dInterface(row_block_.qid, row_block_.size);\n      XGDMatrixSetInfoFromInterface(proxy_, \"qid\", str.c_str());\n    }\n    if (row_block_.weight) {\n      auto str = linalg::Make1dInterface(row_block_.weight, row_block_.size);\n      XGDMatrixSetInfoFromInterface(proxy_, \"weight\", str.c_str());\n    }\n    // Continue iteration\n    return true;\n  } else {\n    // Stop iteration\n    return false;\n  }\n}\n}  // namespace xgboost::data\n"
  },
  {
    "path": "src/data/file_iterator.h",
    "content": "/**\n * Copyright 2021-2024, XGBoost contributors\n */\n#ifndef XGBOOST_DATA_FILE_ITERATOR_H_\n#define XGBOOST_DATA_FILE_ITERATOR_H_\n\n#include <cstdint>    // for uint32_t\n#include <memory>     // for unique_ptr\n#include <string>     // for string\n#include <utility>    // for move\n\n#include \"dmlc/data.h\"        // for RowBlock, Parser\n#include \"xgboost/c_api.h\"    // for XGDMatrixFree, XGProxyDMatrixCreate\n\nnamespace xgboost::data {\n[[nodiscard]] std::string ValidateFileFormat(std::string const& uri);\n\n/**\n * An iterator for implementing external memory support with file inputs.  Users of\n * external memory are encouraged to define their own file parsers/loaders so this one is\n * just here for compatibility with old versions of XGBoost and CLI interface.\n */\nclass FileIterator {\n  // uri of input file, encodes parameters about whether it's 1-based index etc.  dmlc\n  // parser will decode these information.\n  std::string uri_;\n  // Equals to rank_id in distributed training, used to split file into parts for each\n  // worker.\n  uint32_t part_idx_;\n  // Equals to total number of workers.\n  uint32_t n_parts_;\n\n  DMatrixHandle proxy_;\n\n  std::unique_ptr<dmlc::Parser<uint32_t>> parser_;\n  // Temporary reference to stage the data.\n  dmlc::RowBlock<uint32_t, float> row_block_;\n  // Storage for the array interface strings.\n  std::string indptr_;\n  std::string values_;\n  std::string indices_;\n\n public:\n  FileIterator(std::string uri, unsigned part_index, unsigned num_parts)\n      : uri_{ValidateFileFormat(std::move(uri))}, part_idx_{part_index}, n_parts_{num_parts} {\n    XGProxyDMatrixCreate(&proxy_);\n  }\n  ~FileIterator() {\n    XGDMatrixFree(proxy_);\n  }\n\n  int Next();\n\n  auto Proxy() -> decltype(proxy_) { return proxy_; }\n\n  void Reset() {\n    parser_.reset(dmlc::Parser<uint32_t>::Create(uri_.c_str(), part_idx_, n_parts_, \"auto\"));\n  }\n};\n\nnamespace fileiter {\ninline void Reset(DataIterHandle self) {\n  static_cast<FileIterator*>(self)->Reset();\n}\n\ninline int Next(DataIterHandle self) {\n  return static_cast<FileIterator*>(self)->Next();\n}\n}  // namespace fileiter\n}  // namespace xgboost::data\n#endif  // XGBOOST_DATA_FILE_ITERATOR_H_\n"
  },
  {
    "path": "src/data/gradient_index.cc",
    "content": "/**\n * Copyright 2017-2025, XGBoost Contributors\n * \\brief Data type for fast histogram aggregation.\n */\n#include \"gradient_index.h\"\n\n#include <limits>\n#include <memory>\n#include <utility>  // for forward\n\n#include \"../common/column_matrix.h\"\n#include \"../common/hist_util.h\"\n#include \"../common/numeric.h\"\n#include \"../common/transform_iterator.h\"  // for MakeIndexTransformIter\n\nnamespace xgboost {\n\nGHistIndexMatrix::GHistIndexMatrix() : columns_{std::make_unique<common::ColumnMatrix>()} {}\n\nGHistIndexMatrix::GHistIndexMatrix(Context const *ctx, DMatrix *p_fmat, bst_bin_t max_bins_per_feat,\n                                   double sparse_thresh, bool sorted_sketch,\n                                   common::Span<float const> hess)\n    : max_numeric_bins_per_feat{max_bins_per_feat} {\n  CHECK(p_fmat->SingleColBlock());\n  // We use sorted sketching for approx tree method since it's more efficient in\n  // computation time (but higher memory usage).\n  cut = common::SketchOnDMatrix(ctx, p_fmat, max_bins_per_feat, sorted_sketch, hess);\n\n  const uint32_t nbins = cut.Ptrs().back();\n  hit_count = common::MakeFixedVecWithMalloc(ctx, nbins, std::size_t{0});\n  hit_count_tloc_.resize(ctx->Threads() * nbins, 0);\n\n  size_t new_size = 1;\n  for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {\n    new_size += batch.Size();\n  }\n\n  row_ptr = common::MakeFixedVecWithMalloc(ctx, new_size, std::size_t{0});\n\n  const bool isDense = p_fmat->IsDense();\n  this->isDense_ = isDense;\n  auto ft = p_fmat->Info().feature_types.ConstHostSpan();\n\n  for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {\n    this->PushBatch(ctx, batch, ft);\n  }\n  this->columns_ = std::make_unique<common::ColumnMatrix>();\n\n  // hessian is empty when hist tree method is used or when dataset is empty\n  if (hess.empty() && !std::isnan(sparse_thresh)) {\n    // hist\n    CHECK(!sorted_sketch);\n    for (auto const &page : p_fmat->GetBatches<SparsePage>()) {\n      this->columns_->InitFromSparse(page, *this, sparse_thresh, ctx->Threads());\n    }\n  }\n}\n\nGHistIndexMatrix::GHistIndexMatrix(MetaInfo const &info, common::HistogramCuts &&cuts,\n                                   bst_bin_t max_bin_per_feat)\n    : row_ptr{common::MakeFixedVecWithMalloc(info.num_row_ + 1, std::size_t{0})},\n      hit_count{common::MakeFixedVecWithMalloc(cuts.TotalBins(), std::size_t{0})},\n      cut{std::forward<common::HistogramCuts>(cuts)},\n      max_numeric_bins_per_feat(max_bin_per_feat),\n      isDense_{info.IsDense()} {}\n\nGHistIndexMatrix::GHistIndexMatrix(bst_idx_t n_samples, bst_idx_t base_rowid,\n                                   common::HistogramCuts &&cuts, bst_bin_t max_bin_per_feat,\n                                   bool is_dense)\n    : row_ptr{common::MakeFixedVecWithMalloc(n_samples + 1, std::size_t{0})},\n      hit_count{common::MakeFixedVecWithMalloc(cuts.TotalBins(), std::size_t{0})},\n      cut{std::forward<common::HistogramCuts>(cuts)},\n      max_numeric_bins_per_feat(max_bin_per_feat),\n      base_rowid{base_rowid},\n      isDense_{is_dense} {}\n\n#if !defined(XGBOOST_USE_CUDA)\nGHistIndexMatrix::GHistIndexMatrix(Context const *, MetaInfo const &, EllpackPage const &,\n                                   BatchParam const &) {\n  common::AssertGPUSupport();\n}\n#endif  // defined(XGBOOST_USE_CUDA)\n\nGHistIndexMatrix::~GHistIndexMatrix() = default;\n\nvoid GHistIndexMatrix::PushBatch(Context const *ctx, SparsePage const &batch,\n                                 common::Span<FeatureType const> ft) {\n  auto page = batch.GetView();\n  auto it = common::MakeIndexTransformIter([&](std::size_t ridx) { return page[ridx].size(); });\n  common::PartialSum(ctx->Threads(), it, it + page.Size(), static_cast<size_t>(0), row_ptr.begin());\n  data::SparsePageAdapterBatch adapter_batch{page};\n  auto is_valid = [](auto) {\n    return true;\n  };  // SparsePage always contains valid entries\n  PushBatchImpl(ctx, adapter_batch, 0, is_valid, ft);\n}\n\nGHistIndexMatrix::GHistIndexMatrix(Context const *ctx, SparsePage const &batch,\n                                   common::Span<FeatureType const> ft, common::HistogramCuts cuts,\n                                   bst_bin_t max_bins_per_feat, bool is_dense, double sparse_thresh)\n    : cut{std::move(cuts)},\n      max_numeric_bins_per_feat{max_bins_per_feat},\n      base_rowid{batch.base_rowid},\n      isDense_{is_dense} {\n  CHECK_EQ(row_ptr.size(), 0);\n  row_ptr = common::MakeFixedVecWithMalloc(batch.Size() + 1, std::size_t{0});\n\n  const uint32_t nbins = cut.Ptrs().back();\n  hit_count = common::MakeFixedVecWithMalloc(nbins, std::size_t{0});\n  auto n_threads = ctx->Threads();\n  hit_count_tloc_.resize(n_threads * nbins, 0);\n\n  this->PushBatch(ctx, batch, ft);\n  this->columns_ = std::make_unique<common::ColumnMatrix>();\n  if (!std::isnan(sparse_thresh)) {\n    this->columns_->InitFromSparse(batch, *this, sparse_thresh, n_threads);\n  }\n}\n\ntemplate <typename Batch>\nvoid GHistIndexMatrix::PushAdapterBatchColumns(Context const *ctx, Batch const &batch,\n                                               float missing, size_t rbegin) {\n  CHECK(columns_);\n  this->columns_->PushBatch(ctx->Threads(), batch, missing, *this, rbegin);\n}\n\n#define INSTANTIATION_PUSH(BatchT)                                 \\\n  template void GHistIndexMatrix::PushAdapterBatchColumns<BatchT>( \\\n      Context const *ctx, BatchT const &batch, float missing, size_t rbegin);\n\nINSTANTIATION_PUSH(data::CSRArrayAdapterBatch)\nINSTANTIATION_PUSH(data::ArrayAdapterBatch)\nINSTANTIATION_PUSH(data::DenseAdapterBatch)\nINSTANTIATION_PUSH(data::SparsePageAdapterBatch)\nINSTANTIATION_PUSH(data::ColumnarAdapterBatch)\nINSTANTIATION_PUSH(data::EncColumnarAdapterBatch)\n\n#undef INSTANTIATION_PUSH\n\nvoid GHistIndexMatrix::ResizeColumns(double sparse_thresh) {\n  CHECK(!std::isnan(sparse_thresh));\n  this->columns_ = std::make_unique<common::ColumnMatrix>(*this, sparse_thresh);\n}\n\nvoid GHistIndexMatrix::ResizeIndex(Context const *ctx, const size_t n_index, const bool isDense) {\n  auto make_index = [this, ctx, n_index](auto t, common::BinTypeSize t_size) {\n    // Must resize instead of allocating a new one. This function is called everytime a\n    // new batch is pushed, and we grow the size accordingly without loosing the data in\n    // the previous batches.\n    using T = decltype(t);\n    std::size_t n_bytes = sizeof(T) * n_index;\n    CHECK_GE(n_bytes, this->data.size());\n\n    auto resource = this->data.Resource();\n    decltype(this->data) new_vec;\n    if (!resource) {\n      CHECK(this->data.empty());\n      new_vec = common::MakeFixedVecWithMalloc(ctx, n_bytes, std::uint8_t{0});\n    } else {\n      CHECK(resource->Type() == common::ResourceHandler::kMalloc);\n      auto malloc_resource = std::dynamic_pointer_cast<common::MallocResource>(resource);\n      CHECK(malloc_resource);\n      malloc_resource->Resize(n_bytes);\n\n      // gcc-11.3 doesn't work if DataAs is used.\n      std::uint8_t *new_ptr = reinterpret_cast<std::uint8_t *>(malloc_resource->Data());\n      new_vec = {new_ptr, n_bytes / sizeof(std::uint8_t), malloc_resource};\n    }\n    this->data = std::move(new_vec);\n    this->index =\n        common::Index{common::Span{data.data(), static_cast<size_t>(data.size())}, t_size};\n  };\n\n  if ((MaxNumBinPerFeat() - 1 <= static_cast<int>(std::numeric_limits<uint8_t>::max())) &&\n      isDense) {\n    // compress dense index to uint8\n    make_index(std::uint8_t{}, common::kUint8BinsTypeSize);\n  } else if ((MaxNumBinPerFeat() - 1 > static_cast<int>(std::numeric_limits<uint8_t>::max()) &&\n              MaxNumBinPerFeat() - 1 <= static_cast<int>(std::numeric_limits<uint16_t>::max())) &&\n             isDense) {\n    // compress dense index to uint16\n    make_index(std::uint16_t{}, common::kUint16BinsTypeSize);\n  } else {\n    // no compression\n    make_index(std::uint32_t{}, common::kUint32BinsTypeSize);\n  }\n}\n\ncommon::ColumnMatrix const &GHistIndexMatrix::Transpose() const {\n  CHECK(columns_);\n  return *columns_;\n}\n\nbst_bin_t GHistIndexMatrix::GetGindex(size_t ridx, size_t fidx) const {\n  auto begin = RowIdx(ridx);\n  if (IsDense()) {\n    return static_cast<bst_bin_t>(this->index[begin + fidx]);\n  }\n  auto end = RowIdx(ridx + 1);\n  auto const &cut_ptrs = cut.Ptrs();\n  auto f_begin = cut_ptrs[fidx];\n  auto f_end = cut_ptrs[fidx + 1];\n  return BinarySearchBin(begin, end, this->index, f_begin, f_end);\n}\n\nfloat GHistIndexMatrix::GetFvalue(size_t ridx, size_t fidx, bool is_cat) const {\n  auto const &values = cut.Values();\n  auto const &ptrs = cut.Ptrs();\n  return this->GetFvalue(ptrs, values, ridx, fidx, is_cat);\n}\n\nfloat GetFvalueImpl(std::vector<std::uint32_t> const &ptrs, std::vector<float> const &values,\n                    bst_idx_t ridx, bst_feature_t fidx, bst_idx_t base_rowid,\n                    std::unique_ptr<common::ColumnMatrix> const &columns_) {\n  auto get_bin_val = [&](auto &column) {\n    auto bin_idx = column[ridx - base_rowid];\n    if (bin_idx == common::DenseColumnIter<uint8_t, true>::kMissingId) {\n      return std::numeric_limits<float>::quiet_NaN();\n    }\n    return common::HistogramCuts::NumericBinValue(ptrs, values, fidx, bin_idx);\n  };\n  switch (columns_->GetColumnType(fidx)) {\n    case common::kDenseColumn: {\n      if (columns_->AnyMissing()) {\n        return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {\n          auto column = columns_->DenseColumn<decltype(dtype), true>(fidx);\n          return get_bin_val(column);\n        });\n      } else {\n        return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {\n          auto column = columns_->DenseColumn<decltype(dtype), false>(fidx);\n          auto bin_idx = column[ridx - base_rowid];\n          return common::HistogramCuts::NumericBinValue(ptrs, values, fidx, bin_idx);\n        });\n      }\n    }\n    case common::kSparseColumn: {\n      return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {\n        auto column = columns_->SparseColumn<decltype(dtype)>(fidx, 0);\n        return get_bin_val(column);\n      });\n    }\n  }\n\n  SPAN_CHECK(false);\n  return std::numeric_limits<float>::quiet_NaN();\n}\n\nbool GHistIndexMatrix::ReadColumnPage(common::AlignedResourceReadStream *fi) {\n  return this->columns_->Read(fi, this->cut.Ptrs().data());\n}\n\nstd::size_t GHistIndexMatrix::WriteColumnPage(common::AlignedFileWriteStream *fo) const {\n  return this->columns_->Write(fo);\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "src/data/gradient_index.cu",
    "content": "/**\n * Copyright 2022-2025, XGBoost Contributors\n */\n#include <cstddef>  // for size_t\n#include <memory>   // for unique_ptr\n#include <vector>   // for vector\n\n#include \"../common/column_matrix.h\"\n#include \"../common/hist_util.h\"  // Index\n#include \"ellpack_page.cuh\"\n#include \"gradient_index.h\"\n#include \"xgboost/data.h\"\n\nnamespace xgboost {\n// Similar to GHistIndexMatrix::SetIndexData, but without the need for adaptor or bin\n// searching. Is there a way to unify the code?\ntemplate <typename BinT, typename DecompressOffset>\nvoid SetIndexData(Context const* ctx, EllpackPageImpl const* page,\n                  std::vector<size_t>* p_hit_count_tloc, DecompressOffset&& get_offset,\n                  GHistIndexMatrix* out) {\n  page->VisitOnHost(ctx, [&](auto&& accessor) {\n    auto const kNull = static_cast<bst_bin_t>(accessor.NullValue());\n\n    auto index_data_span = common::Span{out->index.data<BinT>(), out->index.Size()};\n    auto n_bins_total = page->Cuts().TotalBins();\n\n    auto& hit_count_tloc = *p_hit_count_tloc;\n    hit_count_tloc.clear();\n    hit_count_tloc.resize(ctx->Threads() * n_bins_total, 0);\n    bool dense_compressed = page->IsDenseCompressed() && !page->IsDense();\n    common::ParallelFor(page->Size(), ctx->Threads(), [&](auto ridx) {\n      auto tid = omp_get_thread_num();\n      size_t in_rbegin = page->info.row_stride * ridx;\n      size_t out_rbegin = out->row_ptr[ridx];\n      if (dense_compressed) {\n        for (std::size_t j = 0, k = 0; j < page->info.row_stride; ++j) {\n          bst_bin_t bin_idx = accessor.gidx_iter[in_rbegin + j];\n          if (XGBOOST_EXPECT((bin_idx != kNull), true)) {  // relatively dense\n            bin_idx = get_offset(bin_idx, j);\n            index_data_span[out_rbegin + k++] = bin_idx;\n            ++hit_count_tloc[tid * n_bins_total + bin_idx];\n          }\n        }\n      } else {\n        auto r_size = out->row_ptr[ridx + 1] - out->row_ptr[ridx];\n        for (size_t j = 0; j < r_size; ++j) {\n          bst_bin_t bin_idx = accessor.gidx_iter[in_rbegin + j];\n          assert(bin_idx != kNull);\n          index_data_span[out_rbegin + j] = bin_idx;\n          ++hit_count_tloc[tid * n_bins_total + get_offset(bin_idx, j)];\n        }\n      }\n    });\n  });\n}\n\nvoid GetRowPtrFromEllpack(Context const* ctx, EllpackPageImpl const* page,\n                          common::RefResourceView<std::size_t>* p_out) {\n  auto& row_ptr = *p_out;\n  row_ptr = common::MakeFixedVecWithMalloc(page->Size() + 1, std::size_t{0});\n  if (page->IsDense()) {\n    std::fill(row_ptr.begin() + 1, row_ptr.end(), page->info.row_stride);\n  } else {\n    page->VisitOnHost(ctx, [&](auto& accessor) {\n      auto const kNull = static_cast<bst_bin_t>(accessor.NullValue());\n\n      common::ParallelFor(page->Size(), ctx->Threads(), [&](auto i) {\n        size_t ibegin = page->info.row_stride * i;\n        for (size_t j = 0; j < page->info.row_stride; ++j) {\n          bst_bin_t bin_idx = accessor.gidx_iter[ibegin + j];\n          if (bin_idx != kNull) {\n            row_ptr[i + 1]++;\n          }\n        }\n      });\n    });\n  }\n  std::partial_sum(row_ptr.begin(), row_ptr.end(), row_ptr.begin());\n}\n\nGHistIndexMatrix::GHistIndexMatrix(Context const* ctx, MetaInfo const& info,\n                                   EllpackPage const& in_page, BatchParam const& p)\n    : cut{in_page.Cuts()},\n      max_numeric_bins_per_feat{p.max_bin},\n      isDense_{in_page.Impl()->IsDense()},\n      base_rowid{in_page.BaseRowId()} {\n  auto page = in_page.Impl();\n  CHECK_EQ(info.num_row_, in_page.Size());\n\n  // pull to host early, prevent race condition\n  this->cut.Ptrs();\n  this->cut.Values();\n\n  this->ResizeIndex(ctx, info.num_nonzero_, page->IsDense());\n  if (page->IsDense()) {\n    this->index.SetBinOffset(page->Cuts().Ptrs());\n  }\n\n  auto offset = page->Cuts().cut_ptrs_.ConstHostSpan();\n  auto n_bins_total = page->Cuts().TotalBins();\n  GetRowPtrFromEllpack(ctx, page, &this->row_ptr);\n  if (page->IsDenseCompressed()) {\n    common::DispatchBinType(this->index.GetBinTypeSize(), [&](auto dtype) {\n      using T = decltype(dtype);\n      ::xgboost::SetIndexData<T>(\n          ctx, page, &hit_count_tloc_,\n          [offset](bst_bin_t bin_idx, bst_feature_t fidx) { return bin_idx + offset[fidx]; }, this);\n    });\n  } else {\n    // no compression\n    ::xgboost::SetIndexData<uint32_t>(\n        ctx, page, &hit_count_tloc_, [&](auto bin_idx, auto) { return bin_idx; }, this);\n  }\n\n  this->hit_count = common::MakeFixedVecWithMalloc(n_bins_total, std::size_t{0});\n  this->GatherHitCount(ctx->Threads(), n_bins_total);\n\n  // sanity checks\n  CHECK_EQ(this->Features(), in_page.Cuts().NumFeatures());\n  CHECK_EQ(this->Size(), info.num_row_);\n  CHECK(this->cut.cut_ptrs_.HostCanRead());\n  CHECK(this->cut.cut_values_.HostCanRead());\n\n  this->columns_ = std::make_unique<common::ColumnMatrix>(*this, p.sparse_thresh);\n  this->columns_->InitFromGHist(ctx, *this);\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "src/data/gradient_index.h",
    "content": "/**\n * Copyright 2017-2025, XGBoost Contributors\n * \\brief Data type for fast histogram aggregation.\n */\n#ifndef XGBOOST_DATA_GRADIENT_INDEX_H_\n#define XGBOOST_DATA_GRADIENT_INDEX_H_\n\n#include <algorithm>  // for min\n#include <atomic>     // for atomic\n#include <cstddef>    // for size_t\n#include <cstdint>    // for uint32_t\n#include <limits>     // for numeric_limits\n#include <memory>     // for make_unique\n#include <vector>     // for vector\n\n#include \"../common/categorical.h\"\n#include \"../common/error_msg.h\"  // for InfInData\n#include \"../common/hist_util.h\"\n#include \"../common/numeric.h\"\n#include \"../common/ref_resource_view.h\"  // for RefResourceView\n#include \"../common/threading_utils.h\"\n#include \"../common/transform_iterator.h\"  // for MakeIndexTransformIter\n#include \"entry.h\"                         // for IsValidFunctor\n#include \"xgboost/base.h\"\n#include \"xgboost/data.h\"\n\nnamespace xgboost {\nnamespace common {\nclass ColumnMatrix;\nclass AlignedFileWriteStream;\n}  // namespace common\n\nfloat GetFvalueImpl(std::vector<std::uint32_t> const& ptrs, std::vector<float> const& values,\n                    bst_idx_t ridx, bst_feature_t fidx, bst_idx_t base_rowid,\n                    std::unique_ptr<common::ColumnMatrix> const& columns_);\n\n/**\n * @brief preprocessed global index matrix, in CSR format.\n *\n *  Transform floating values to integer index in histogram This is a global histogram\n *  index for CPU histogram.  On GPU ellpack page is used.\n */\nclass GHistIndexMatrix {\n  // Get the size of each row\n  template <typename AdapterBatchT>\n  static auto GetRowCounts(AdapterBatchT const& batch, float missing, int32_t n_threads) {\n    std::vector<size_t> valid_counts(batch.Size(), 0);\n    auto is_valid = data::IsValidFunctor{missing};\n    common::ParallelFor(batch.Size(), n_threads, [&](size_t i) {\n      auto line = batch.GetLine(i);\n      for (size_t j = 0; j < line.Size(); ++j) {\n        data::COOTuple elem = line.GetElement(j);\n        if (is_valid(elem)) {\n          valid_counts[i]++;\n        }\n      }\n    });\n    return valid_counts;\n  }\n\n  /**\n   * @brief Push a sparse page into the index matrix.\n   */\n  void PushBatch(Context const* ctx, SparsePage const& batch, common::Span<FeatureType const> ft);\n\n  template <typename Batch, typename BinIdxType, typename GetOffset, typename IsValid>\n  void SetIndexData(common::Span<BinIdxType> index_data_span, size_t rbegin,\n                    common::Span<FeatureType const> ft, size_t batch_threads, Batch const& batch,\n                    IsValid&& is_valid, size_t nbins, GetOffset&& get_offset) {\n    auto batch_size = batch.Size();\n    BinIdxType* index_data = index_data_span.data();\n    auto const& ptrs = cut.Ptrs();\n    auto const& values = cut.Values();\n    std::atomic<bool> valid{true};\n    common::ParallelFor(batch_size, batch_threads, [&](size_t i) {\n      auto line = batch.GetLine(i);\n      size_t ibegin = row_ptr[rbegin + i];  // index of first entry for current block\n      size_t k = 0;\n      auto tid = omp_get_thread_num();\n      for (size_t j = 0; j < line.Size(); ++j) {\n        data::COOTuple elem = line.GetElement(j);\n        if (is_valid(elem)) {\n          if (XGBOOST_EXPECT((std::isinf(elem.value)), false)) {\n            valid = false;\n          }\n          bst_bin_t bin_idx{-1};\n          if (common::IsCat(ft, elem.column_idx)) {\n            bin_idx = cut.SearchCatBin(elem.value, elem.column_idx, ptrs, values);\n          } else {\n            bin_idx = cut.SearchBin(elem.value, elem.column_idx, ptrs, values);\n          }\n          index_data[ibegin + k] = get_offset(bin_idx, j);\n          ++hit_count_tloc_[tid * nbins + bin_idx];\n          ++k;\n        }\n      }\n    });\n\n    CHECK(valid) << error::InfInData();\n  }\n\n  // Gather hit_count from all threads\n  void GatherHitCount(int32_t n_threads, bst_bin_t n_bins_total) {\n    CHECK_EQ(hit_count.size(), n_bins_total);\n    common::ParallelFor(n_bins_total, n_threads, [&](bst_omp_uint idx) {\n      for (int32_t tid = 0; tid < n_threads; ++tid) {\n        hit_count[idx] += hit_count_tloc_[tid * n_bins_total + idx];\n        hit_count_tloc_[tid * n_bins_total + idx] = 0;  // reset for next batch\n      }\n    });\n  }\n\n  template <typename Batch, typename IsValid>\n  void PushBatchImpl(Context const* ctx, Batch const& batch, size_t rbegin, IsValid&& is_valid,\n                     common::Span<FeatureType const> ft) {\n    // The number of threads is pegged to the batch size. If the OMP block is parallelized\n    // on anything other than the batch/block size, it should be reassigned\n    auto n_threads = ctx->Threads();\n    size_t batch_threads =\n        std::max(static_cast<size_t>(1), std::min(batch.Size(), static_cast<size_t>(n_threads)));\n\n    auto n_bins_total = cut.TotalBins();\n    const size_t n_index = row_ptr[rbegin + batch.Size()];  // number of entries in this page\n    ResizeIndex(ctx, n_index, isDense_);\n    if (isDense_) {\n      index.SetBinOffset(cut.Ptrs());\n    }\n    if (isDense_) {\n      common::DispatchBinType(index.GetBinTypeSize(), [&](auto dtype) {\n        using T = decltype(dtype);\n        common::Span<T> index_data_span = {index.data<T>(), index.Size()};\n        SetIndexData(index_data_span, rbegin, ft, batch_threads, batch, is_valid, n_bins_total,\n                     index.MakeCompressor<T>());\n      });\n    } else {\n      common::Span<uint32_t> index_data_span = {index.data<uint32_t>(), n_index};\n      // no compression\n      SetIndexData(index_data_span, rbegin, ft, batch_threads, batch, is_valid, n_bins_total,\n                   [](auto idx, auto) { return idx; });\n    }\n    this->GatherHitCount(n_threads, n_bins_total);\n  }\n\n  // The function is only created to avoid using the column matrix in the header.\n  void ResizeColumns(double sparse_thresh);\n\n public:\n  /** @brief row pointer to rows by element position */\n  common::RefResourceView<std::size_t> row_ptr;\n  /** @brief data storage for index. */\n  common::RefResourceView<std::uint8_t> data;\n  /** @brief The histogram index. */\n  common::Index index;\n  /** @brief hit count of each index, used for constructing the ColumnMatrix */\n  common::RefResourceView<std::size_t> hit_count;\n  /** @brief The corresponding cuts */\n  common::HistogramCuts cut{0};\n  /** @brief max_bin for each feature. */\n  bst_bin_t max_numeric_bins_per_feat;\n  /** @brief base row index for current page (used by external memory) */\n  bst_idx_t base_rowid{0};\n\n  [[nodiscard]] bst_bin_t MaxNumBinPerFeat() const {\n    return std::max(static_cast<bst_bin_t>(cut.MaxCategory() + 1), max_numeric_bins_per_feat);\n  }\n\n  ~GHistIndexMatrix();\n  /**\n   * @brief Constrcutor for SimpleDMatrix.\n   */\n  GHistIndexMatrix(Context const* ctx, DMatrix* x, bst_bin_t max_bins_per_feat,\n                   double sparse_thresh, bool sorted_sketch, common::Span<float const> hess = {});\n  /**\n   * @brief Constructor for Quantile DMatrix. Initialize basic information and prepare\n   *        for push batch.\n   */\n  GHistIndexMatrix(MetaInfo const& info, common::HistogramCuts&& cuts, bst_bin_t max_bin_per_feat);\n\n  /**\n   * @brief Constructor for the external memory Quantile DMatrix. Initialize basic\n   *        information and prepare for push batch.\n   */\n  GHistIndexMatrix(bst_idx_t n_samples, bst_idx_t base_rowid, common::HistogramCuts&& cuts,\n                   bst_bin_t max_bin_per_feat, bool is_dense);\n\n  /**\n   * @brief Constructor fro Quantile DMatrix where we might copy an existing ellpack page\n   *        to host gradient index.\n   */\n  GHistIndexMatrix(Context const* ctx, MetaInfo const& info, EllpackPage const& page,\n                   BatchParam const& p);\n\n  /**\n   * @brief Constructor for external memory.\n   */\n  GHistIndexMatrix(Context const* ctx, SparsePage const& page, common::Span<FeatureType const> ft,\n                   common::HistogramCuts cuts, bst_bin_t max_bins_per_feat, bool is_dense,\n                   double sparse_thresh);\n  GHistIndexMatrix();  // also for ext mem, empty ctor so that we can read the cache back.\n\n  /**\n   * @brief Push a single batch into the gradient index.\n   *\n   * @param n_samples_total The total number of rows for all batches, create a column\n   *        matrix once all batches are pushed.\n   */\n  template <typename Batch>\n  void PushAdapterBatch(Context const* ctx, std::size_t rbegin, std::size_t prev_sum,\n                        Batch const& batch, float missing, common::Span<FeatureType const> ft,\n                        double sparse_thresh, bst_idx_t n_samples_total) {\n    auto n_bins_total = cut.TotalBins();\n    hit_count_tloc_.clear();\n    hit_count_tloc_.resize(ctx->Threads() * n_bins_total, 0);\n\n    auto n_threads = ctx->Threads();\n    auto valid_counts = GetRowCounts(batch, missing, n_threads);\n\n    auto it = common::MakeIndexTransformIter([&](size_t ridx) { return valid_counts[ridx]; });\n    common::PartialSum(n_threads, it, it + batch.Size(), prev_sum, row_ptr.begin() + rbegin);\n    auto is_valid = data::IsValidFunctor{missing};\n\n    PushBatchImpl(ctx, batch, rbegin, is_valid, ft);\n\n    if (rbegin + batch.Size() == n_samples_total) {\n      // finished\n      this->ResizeColumns(sparse_thresh);\n    }\n  }\n\n  // Call ColumnMatrix::PushBatch\n  template <typename Batch>\n  void PushAdapterBatchColumns(Context const* ctx, Batch const& batch, float missing,\n                               size_t rbegin);\n\n  void ResizeIndex(Context const* ctx, const size_t n_index, const bool isDense);\n\n  void GetFeatureCounts(size_t* counts) const {\n    auto nfeature = cut.Ptrs().size() - 1;\n    for (unsigned fid = 0; fid < nfeature; ++fid) {\n      auto ibegin = cut.Ptrs()[fid];\n      auto iend = cut.Ptrs()[fid + 1];\n      for (auto i = ibegin; i < iend; ++i) {\n        counts[fid] += hit_count[i];\n      }\n    }\n  }\n\n  [[nodiscard]] bool IsDense() const { return isDense_; }\n  void SetDense(bool is_dense) { isDense_ = is_dense; }\n  [[nodiscard]] bst_idx_t BaseRowId() const { return base_rowid; }\n  /**\n   * @brief Get the local row index from the global row index.\n   */\n  [[nodiscard]] bst_idx_t RowIdx(bst_idx_t gridx) const {\n    return row_ptr[gridx - this->base_rowid];\n  }\n\n  [[nodiscard]] bst_idx_t Size() const { return row_ptr.empty() ? 0 : row_ptr.size() - 1; }\n  [[nodiscard]] bst_feature_t Features() const { return cut.NumFeatures(); }\n\n  [[nodiscard]] bool ReadColumnPage(common::AlignedResourceReadStream* fi);\n  [[nodiscard]] std::size_t WriteColumnPage(common::AlignedFileWriteStream* fo) const;\n\n  [[nodiscard]] common::ColumnMatrix const& Transpose() const;\n\n  [[nodiscard]] bst_bin_t GetGindex(size_t ridx, size_t fidx) const;\n\n  [[nodiscard]] float GetFvalue(size_t ridx, size_t fidx, bool is_cat) const;\n  [[nodiscard]] float GetFvalue(std::vector<std::uint32_t> const& ptrs,\n                                std::vector<float> const& values, bst_idx_t ridx,\n                                bst_feature_t fidx, bool is_cat) const {\n    if (is_cat) {\n      auto gidx = GetGindex(ridx, fidx);\n      if (gidx == -1) {\n        return std::numeric_limits<float>::quiet_NaN();\n      }\n      return values[gidx];\n    }\n    if (this->IsDense()) {\n      auto begin = RowIdx(ridx);\n      auto bin_idx = this->index[begin + fidx];\n      return common::HistogramCuts::NumericBinValue(ptrs, values, fidx, bin_idx);\n    }\n    return GetFvalueImpl(ptrs, values, ridx, fidx, this->base_rowid, this->columns_);\n  }\n\n  [[nodiscard]] common::HistogramCuts& Cuts() { return cut; }\n  [[nodiscard]] common::HistogramCuts const& Cuts() const { return cut; }\n\n private:\n  std::unique_ptr<common::ColumnMatrix> columns_;\n  std::vector<size_t> hit_count_tloc_;\n  bool isDense_;\n};\n\n/**\n * \\brief Helper for recovering feature index from row-based storage of histogram\n *        bin. (`GHistIndexMatrix`).\n *\n * \\param assign A callback function that takes bin index, index into the whole batch, row\n *               index and feature index\n */\ntemplate <typename Fn>\nvoid AssignColumnBinIndex(GHistIndexMatrix const& page, Fn&& assign) {\n  auto const batch_size = page.Size();\n  auto const& ptrs = page.cut.Ptrs();\n  std::size_t k{0};\n\n  auto dense = page.IsDense();\n\n  common::DispatchBinType(page.index.GetBinTypeSize(), [&](auto t) {\n    using BinT = decltype(t);\n    auto const& index = page.index;\n    for (std::size_t ridx = 0; ridx < batch_size; ++ridx) {\n      auto r_beg = page.row_ptr[ridx];\n      auto r_end = page.row_ptr[ridx + 1];\n      bst_feature_t fidx{0};\n      if (dense) {\n        // compressed, use the operator to obtain the true value.\n        for (std::size_t j = r_beg; j < r_end; ++j) {\n          bst_feature_t fidx = j - r_beg;\n          std::uint32_t bin_idx = index[k];\n          assign(bin_idx, k, ridx, fidx);\n          ++k;\n        }\n      } else {\n        // not compressed\n        auto const* row_index = index.data<BinT>() + page.row_ptr[page.base_rowid];\n        for (std::size_t j = r_beg; j < r_end; ++j) {\n          std::uint32_t bin_idx = row_index[k];\n          // find the feature index for current bin.\n          while (bin_idx >= ptrs[fidx + 1]) {\n            fidx++;\n          }\n          assign(bin_idx, k, ridx, fidx);\n          ++k;\n        }\n      }\n    }\n  });\n}\n}  // namespace xgboost\n#endif  // XGBOOST_DATA_GRADIENT_INDEX_H_\n"
  },
  {
    "path": "src/data/gradient_index_format.cc",
    "content": "/**\n * Copyright 2021-2025, XGBoost contributors\n */\n#include \"gradient_index_format.h\"\n\n#include <cstddef>      // for size_t\n#include <cstdint>      // for uint8_t\n#include <type_traits>  // for underlying_type_t\n#include <vector>       // for vector\n\n#include \"../common/hist_util.h\"          // for HistogramCuts\n#include \"../common/io.h\"                 // for AlignedResourceReadStream\n#include \"../common/nvtx_utils.h\"         // for xgboost_NVTX_FN_RANGE\n#include \"../common/ref_resource_view.h\"  // for ReadVec, WriteVec\n#include \"gradient_index.h\"               // for GHistIndexMatrix\n\nnamespace xgboost::data {\n[[nodiscard]] bool GHistIndexRawFormat::Read(GHistIndexMatrix* page,\n                                             common::AlignedResourceReadStream* fi) {\n  xgboost_NVTX_FN_RANGE();\n  CHECK(fi);\n\n  page->Cuts() = this->cuts_;\n  // indptr\n  if (!common::ReadVec(fi, &page->row_ptr)) {\n    return false;\n  }\n\n  // data\n  // - bin type\n  // Old gcc doesn't support reading from enum.\n  std::underlying_type_t<common::BinTypeSize> uint_bin_type{0};\n  if (!fi->Read(&uint_bin_type)) {\n    return false;\n  }\n  common::BinTypeSize size_type = static_cast<common::BinTypeSize>(uint_bin_type);\n  // - index buffer\n  if (!common::ReadVec(fi, &page->data)) {\n    return false;\n  }\n  // - index\n  page->index = common::Index{\n      common::Span{page->data.data(), static_cast<size_t>(page->data.size())}, size_type};\n\n  // hit count\n  if (!common::ReadVec(fi, &page->hit_count)) {\n    return false;\n  }\n  if (!fi->Read(&page->max_numeric_bins_per_feat)) {\n    return false;\n  }\n  if (!fi->Read(&page->base_rowid)) {\n    return false;\n  }\n  bool is_dense = false;\n  if (!fi->Read(&is_dense)) {\n    return false;\n  }\n  page->SetDense(is_dense);\n  if (is_dense) {\n    page->index.SetBinOffset(page->cut.Ptrs());\n  }\n\n  if (!page->ReadColumnPage(fi)) {\n    return false;\n  }\n  return true;\n}\n\n[[nodiscard]] std::size_t GHistIndexRawFormat::Write(GHistIndexMatrix const& page,\n                                                     common::AlignedFileWriteStream* fo) {\n  std::size_t bytes = 0;\n  // indptr\n  bytes += common::WriteVec(fo, page.row_ptr);\n\n  // data\n  // - bin type\n  std::underlying_type_t<common::BinTypeSize> uint_bin_type = page.index.GetBinTypeSize();\n  bytes += fo->Write(uint_bin_type);\n  // - index buffer\n  std::vector<std::uint8_t> data(page.index.begin(), page.index.end());\n  bytes += fo->Write(static_cast<std::uint64_t>(data.size()));\n  if (!data.empty()) {\n    bytes += fo->Write(data.data(), data.size());\n  }\n\n  // hit count\n  bytes += common::WriteVec(fo, page.hit_count);\n  // max_bins, base row, is_dense\n  bytes += fo->Write(page.max_numeric_bins_per_feat);\n  bytes += fo->Write(page.base_rowid);\n  bytes += fo->Write(page.IsDense());\n\n  bytes += page.WriteColumnPage(fo);\n  return bytes;\n}\n\nDMLC_REGISTRY_FILE_TAG(gradient_index_format);\n}  // namespace xgboost::data\n"
  },
  {
    "path": "src/data/gradient_index_format.h",
    "content": "/**\n * Copyright 2021-2024, XGBoost contributors\n */\n#pragma once\n\n#include <cstddef>  // for size_t\n#include <utility>  // for move\n\n#include \"../common/hist_util.h\"  // for HistogramCuts\n#include \"../common/io.h\"         // for AlignedFileWriteStream\n#include \"gradient_index.h\"       // for GHistIndexMatrix\n#include \"sparse_page_writer.h\"   // for SparsePageFormat\n\nnamespace xgboost::common {\nclass HistogramCuts;\n}\n\nnamespace xgboost::data {\nclass GHistIndexRawFormat : public SparsePageFormat<GHistIndexMatrix> {\n  common::HistogramCuts cuts_;\n\n public:\n  [[nodiscard]] bool Read(GHistIndexMatrix* page, common::AlignedResourceReadStream* fi) override;\n  [[nodiscard]] std::size_t Write(GHistIndexMatrix const& page,\n                                  common::AlignedFileWriteStream* fo) override;\n\n  explicit GHistIndexRawFormat(common::HistogramCuts cuts) : cuts_{std::move(cuts)} {}\n};\n}  // namespace xgboost::data\n"
  },
  {
    "path": "src/data/gradient_index_page_source.cc",
    "content": "/**\n * Copyright 2021-2025, XGBoost Contributors\n */\n#include \"gradient_index_page_source.h\"\n\n#include <memory>   // for make_shared\n#include <utility>  // for move\n\n#include \"../common/hist_util.h\"  // for HistogramCuts\n#include \"gradient_index.h\"       // for GHistIndexMatrix\n#include \"proxy_dmatrix.h\"        // for DispatchAny\n\nnamespace xgboost::data {\nvoid GradientIndexPageSource::Fetch() {\n  if (!this->ReadCache()) {\n    auto ctx = Context{};\n    ctx.Init(Args{{\"nthread\", std::to_string(nthreads_)}});\n    // source is initialized to be the 0th page during construction, so when count_ is 0\n    // there's no need to increment the source.\n    if (this->count_ != 0 && !this->sync_) {\n      // The mixin doesn't sync the source if `sync_` is false, we need to sync it\n      // ourselves.\n      ++(*source_);\n    }\n    // This is not read from cache so we still need it to be synced with sparse page source.\n    CHECK_EQ(this->count_, this->source_->Iter());\n    auto const& csr = this->source_->Page();\n    CHECK_NE(this->cuts_.Values().size(), 0);\n    this->page_.reset(new GHistIndexMatrix{&ctx, *csr, feature_types_, cuts_, max_bin_per_feat_,\n                                           is_dense_, sparse_thresh_});\n    this->WriteCache();\n  }\n}\n\nvoid ExtGradientIndexPageSource::Fetch() {\n  if (!this->ReadCache()) {\n    CHECK_EQ(count_, source_->Iter());\n    CHECK_NE(cuts_.Values().size(), 0);\n    cpu_impl::DispatchAny(proxy_, [this](auto const& value) {\n      CHECK(this->proxy_->Ctx()->IsCPU()) << \"All batches must use the same device type.\";\n      auto h_feature_types = proxy_->Info().feature_types.ConstHostSpan();\n      // This does three things:\n      // - Generate CSR matrix for gradient index.\n      // - Generate the column matrix for gradient index.\n      // - Concatenate the meta info.\n      common::HistogramCuts cuts{this->cuts_};\n      CHECK_EQ(this->cuts_.MaxCategory(), cuts.MaxCategory());\n      if (this->cuts_.HasCategorical()) {\n        CHECK(!h_feature_types.empty());\n      }\n      this->page_.reset();\n      // The external iterator has the data when the `next` method is called. Therefore,\n      // it's one step ahead of this source.\n\n      // FIXME(jiamingy): For now, we use the `info->IsDense()` to represent all batches\n      // similar to the sparse DMatrix source. We should use per-batch property with proxy\n      // DMatrix info instead. This requires more fine-grained tests.\n      this->page_ =\n          std::make_shared<GHistIndexMatrix>(value.NumRows(), this->base_rows_.at(source_->Iter()),\n                                             std::move(cuts), this->p_.max_bin, info_->IsDense());\n      bst_idx_t prev_sum = 0;\n      bst_idx_t rbegin = 0;\n      // Use `value.NumRows()` for the size of a single batch. Unlike the\n      // `IterativeDMatrix`, external memory doesn't concatenate the pages.\n      this->page_->PushAdapterBatch(ctx_, rbegin, prev_sum, value, this->missing_, h_feature_types,\n                                    this->p_.sparse_thresh, value.NumRows());\n      this->page_->PushAdapterBatchColumns(ctx_, value, this->missing_, rbegin);\n      this->info_->Extend(proxy_->Info(), false, false);\n    });\n    this->WriteCache();\n  }\n}\n}  // namespace xgboost::data\n"
  },
  {
    "path": "src/data/gradient_index_page_source.h",
    "content": "/**\n * Copyright 2021-2024, XGBoost Contributors\n */\n#ifndef XGBOOST_DATA_GRADIENT_INDEX_PAGE_SOURCE_H_\n#define XGBOOST_DATA_GRADIENT_INDEX_PAGE_SOURCE_H_\n\n#include <cmath>    // for isnan\n#include <cstdint>  // for int32_t\n#include <memory>   // for shared_ptr\n#include <utility>  // for move\n#include <vector>   // for vector\n\n#include \"../common/hist_util.h\"    // for HistogramCuts\n#include \"gradient_index.h\"         // for GHistIndexMatrix\n#include \"gradient_index_format.h\"  // for GHistIndexRawFormat\n#include \"sparse_page_source.h\"     // for PageSourceIncMixIn\n#include \"xgboost/base.h\"           // for bst_feature_t\n#include \"xgboost/data.h\"           // for BatchParam, FeatureType\n#include \"xgboost/span.h\"           // for Span\n\nnamespace xgboost::data {\n/**\n * @brief Policy for creating ghist index format. The storage is default (disk).\n */\ntemplate <typename S>\nclass GHistIndexFormatPolicy {\n protected:\n  common::HistogramCuts cuts_{0};\n\n public:\n  using FormatT = SparsePageFormat<GHistIndexMatrix>;\n\n public:\n  [[nodiscard]] auto CreatePageFormat(BatchParam const&) const {\n    std::unique_ptr<FormatT> fmt{new GHistIndexRawFormat{cuts_}};\n    return fmt;\n  }\n\n  void SetCuts(common::HistogramCuts cuts) { std::swap(cuts_, cuts); }\n};\n\nclass GradientIndexPageSource\n    : public PageSourceIncMixIn<\n          GHistIndexMatrix, DefaultFormatStreamPolicy<GHistIndexMatrix, GHistIndexFormatPolicy>> {\n  bool is_dense_;\n  std::int32_t max_bin_per_feat_;\n  common::Span<FeatureType const> feature_types_;\n  double sparse_thresh_;\n\n public:\n  GradientIndexPageSource(Context const* ctx, float missing, bst_feature_t n_features,\n                          bst_idx_t n_batches, std::shared_ptr<Cache> cache, BatchParam param,\n                          common::HistogramCuts cuts, bool is_dense,\n                          common::Span<FeatureType const> feature_types,\n                          std::shared_ptr<SparsePageSource> source)\n      : PageSourceIncMixIn(missing, ctx->Threads(), n_features, n_batches, cache,\n                           std::isnan(param.sparse_thresh)),\n        is_dense_{is_dense},\n        max_bin_per_feat_{param.max_bin},\n        feature_types_{feature_types},\n        sparse_thresh_{param.sparse_thresh} {\n    this->source_ = source;\n    this->SetCuts(std::move(cuts));\n    if (this->cuts_.HasCategorical()) {\n      CHECK(!this->feature_types_.empty());\n    }\n    this->Fetch();\n  }\n\n  void Fetch() final;\n};\n\nclass ExtGradientIndexPageSource\n    : public ExtQantileSourceMixin<\n          GHistIndexMatrix, DefaultFormatStreamPolicy<GHistIndexMatrix, GHistIndexFormatPolicy>> {\n  BatchParam p_;\n\n  Context const* ctx_;\n  DMatrixProxy* proxy_;\n  MetaInfo* info_;\n\n  std::vector<bst_idx_t> base_rows_;\n\n public:\n  ExtGradientIndexPageSource(\n      Context const* ctx, float missing, MetaInfo* info, std::shared_ptr<Cache> cache,\n      BatchParam param, common::HistogramCuts cuts,\n      std::shared_ptr<DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>> source,\n      DMatrixProxy* proxy, std::vector<bst_idx_t> base_rows)\n      : ExtQantileSourceMixin{missing, ctx->Threads(), static_cast<bst_feature_t>(info->num_col_),\n                              source, cache},\n        p_{std::move(param)},\n        ctx_{ctx},\n        proxy_{proxy},\n        info_{info},\n        base_rows_{std::move(base_rows)} {\n    CHECK(!this->cache_info_->written);\n    this->source_->Reset();\n    CHECK(this->source_->Next());\n    this->SetCuts(std::move(cuts));\n    this->Fetch();\n  }\n\n  void Fetch() final;\n};\n}  // namespace xgboost::data\n#endif  // XGBOOST_DATA_GRADIENT_INDEX_PAGE_SOURCE_H_\n"
  },
  {
    "path": "src/data/iterative_dmatrix.cc",
    "content": "/**\n * Copyright 2022-2025, XGBoost contributors\n */\n#include \"iterative_dmatrix.h\"\n\n#include <algorithm>  // for copy\n#include <cstddef>    // for size_t\n#include <memory>     // for shared_ptr\n#include <utility>    // for move\n#include <vector>     // for vector\n\n#include \"../common/categorical.h\"  // for IsCat\n#include \"../common/error_msg.h\"    // for Unreachable\n#include \"../common/hist_util.h\"    // for HistogramCuts\n#include \"../tree/param.h\"          // FIXME(jiamingy): Find a better way to share this parameter.\n#include \"batch_utils.h\"            // for RegenGHist\n#include \"cat_container.h\"          // for SyncCategories\n#include \"gradient_index.h\"         // for GHistIndexMatrix\n#include \"proxy_dmatrix.h\"          // for DataIterProxy, DispatchAny\n#include \"quantile_dmatrix.h\"       // for GetCutsFromRef\n#include \"quantile_dmatrix.h\"       // for GetDataShape, MakeSketches\n#include \"simple_batch_iterator.h\"  // for SimpleBatchIteratorImpl\n#include \"xgboost/data.h\"           // for FeatureType, DMatrix\n#include \"xgboost/logging.h\"\n\nnamespace xgboost::data {\nIterativeDMatrix::IterativeDMatrix(DataIterHandle iter_handle, DMatrixHandle proxy,\n                                   std::shared_ptr<DMatrix> ref, DataIterResetCallback* reset,\n                                   XGDMatrixCallbackNext* next, float missing, int nthread,\n                                   bst_bin_t max_bin)\n    : proxy_{proxy} {\n  // The external iterator, fetch the first batch\n  auto iter = DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>{iter_handle, reset, next};\n  iter.Reset();\n  bool valid = iter.Next();\n  CHECK(valid) << \"Iterative DMatrix must have at least 1 batch.\";\n\n  auto pctx = MakeProxy(proxy_)->Ctx();\n\n  Context ctx;\n  ctx.Init(Args{{\"nthread\", std::to_string(nthread)}, {\"device\", pctx->DeviceName()}});\n  // hardcoded parameter.\n  BatchParam p{max_bin, tree::TrainParam::DftSparseThreshold()};\n\n  if (ctx.IsCUDA()) {\n    this->InitFromCUDA(&ctx, p, std::move(iter), missing, ref);\n  } else {\n    this->InitFromCPU(&ctx, p, std::move(iter), missing, ref);\n  }\n\n  this->fmat_ctx_ = ctx;\n  this->batch_ = p;\n\n  SyncCategories(&ctx, info_.Cats(), info_.num_row_ == 0);\n\n  LOG(INFO) << \"Finished constructing the `IterativeDMatrix`: (\" << this->Info().num_row_ << \", \"\n            << this->Info().num_col_ << \", \" << this->info_.num_nonzero_ << \").\";\n}\n\nvoid IterativeDMatrix::InitFromCPU(\n    Context const* ctx, BatchParam const& p,\n    DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>&& iter, float missing,\n    std::shared_ptr<DMatrix> ref) {\n  DMatrixProxy* proxy = MakeProxy(proxy_);\n  CHECK(proxy);\n\n  common::HistogramCuts cuts{0};\n  ExternalDataInfo ext_info;\n  cpu_impl::GetDataShape(ctx, proxy, &iter, missing, &ext_info);\n  ext_info.SetInfo(ctx, true, &this->info_);\n\n  /**\n   * Generate quantiles\n   */\n  std::vector<FeatureType> h_ft;\n  cpu_impl::MakeSketches(ctx, &iter, proxy, ref, missing, &cuts, p, this->info_, ext_info, &h_ft);\n\n  /**\n   * Generate gradient index.\n   */\n  this->ghist_ = std::make_unique<GHistIndexMatrix>(this->info_, std::move(cuts), p.max_bin);\n  std::size_t rbegin = 0;\n  std::size_t prev_sum = 0;\n  std::size_t i = 0;\n  while (iter.Next()) {\n    cpu_impl::DispatchAny(proxy, [&](auto const& batch) {\n      proxy->Info().num_nonzero_ = ext_info.batch_nnz[i];\n      this->ghist_->PushAdapterBatch(ctx, rbegin, prev_sum, batch, missing, h_ft, p.sparse_thresh,\n                                     Info().num_row_);\n    });\n    if (ext_info.n_batches != 1) {\n      this->info_.Extend(std::move(proxy->Info()), false, true);\n    }\n    auto batch_size = BatchSamples(proxy);\n    prev_sum = this->ghist_->row_ptr[rbegin + batch_size];\n    rbegin += batch_size;\n    ++i;\n  }\n  iter.Reset();\n  CHECK_EQ(rbegin, Info().num_row_);\n  CHECK_EQ(this->ghist_->Features(), Info().num_col_);\n\n  /**\n   * Generate column matrix\n   */\n  bst_idx_t accumulated_rows = 0;\n  while (iter.Next()) {\n    cpu_impl::DispatchAny(proxy, [&](auto const& batch) {\n      this->ghist_->PushAdapterBatchColumns(ctx, batch, missing, accumulated_rows);\n    });\n    accumulated_rows += BatchSamples(proxy);\n  }\n  iter.Reset();\n  CHECK_EQ(accumulated_rows, this->info_.num_row_);\n\n  if (ext_info.n_batches == 1) {\n    this->info_ = std::move(proxy->Info());\n    ext_info.SetInfo(ctx, false, &this->info_);\n    CHECK_EQ(proxy->Info().labels.Size(), 0);\n  }\n\n  info_.feature_types.HostVector() = h_ft;\n}\n\nBatchSet<GHistIndexMatrix> IterativeDMatrix::GetGradientIndex(Context const* ctx,\n                                                              BatchParam const& param) {\n  if (param.Initialized()) {\n    detail::CheckParam(this->batch_, param);\n    CHECK(!detail::RegenGHist(param, batch_)) << error::InconsistentMaxBin();\n  }\n  if (!ellpack_ && !ghist_) {\n    LOG(FATAL) << \"`QuantileDMatrix` not initialized.\";\n  }\n\n  if (!ghist_) {\n    if (!ctx->IsCUDA()) {\n      ghist_ = std::make_shared<GHistIndexMatrix>(ctx, Info(), *ellpack_, param);\n    } else if (!fmat_ctx_.IsCUDA()) {\n      ghist_ = std::make_shared<GHistIndexMatrix>(&fmat_ctx_, Info(), *ellpack_, param);\n    } else {\n      // Can happen when QDM is initialized on GPU, but a CPU version is queried by a different QDM\n      // for cut reference.\n      auto cpu_ctx = ctx->MakeCPU();\n      ghist_ = std::make_shared<GHistIndexMatrix>(&cpu_ctx, Info(), *ellpack_, param);\n    }\n  }\n\n  if (!std::isnan(param.sparse_thresh) &&\n      param.sparse_thresh != tree::TrainParam::DftSparseThreshold()) {\n    LOG(WARNING) << \"`sparse_threshold` can not be changed when `QuantileDMatrix` is used instead \"\n                    \"of `DMatrix`.\";\n  }\n\n  auto begin_iter =\n      BatchIterator<GHistIndexMatrix>(new SimpleBatchIteratorImpl<GHistIndexMatrix>(ghist_));\n  return BatchSet<GHistIndexMatrix>(begin_iter);\n}\n\nBatchSet<ExtSparsePage> IterativeDMatrix::GetExtBatches(Context const* ctx,\n                                                        BatchParam const& param) {\n  for (auto const& page : this->GetGradientIndex(ctx, param)) {\n    auto p_out = std::make_shared<SparsePage>();\n    p_out->data.Resize(this->Info().num_nonzero_);\n    p_out->offset.Resize(this->Info().num_row_ + 1);\n\n    auto& h_offset = p_out->offset.HostVector();\n    CHECK_EQ(page.row_ptr.size(), h_offset.size());\n    std::copy(page.row_ptr.cbegin(), page.row_ptr.cend(), h_offset.begin());\n\n    auto& h_data = p_out->data.HostVector();\n    auto const& vals = page.cut.Values();\n    auto const& ptrs = page.cut.Ptrs();\n    auto ft = Info().feature_types.ConstHostSpan();\n\n    AssignColumnBinIndex(page, [&](auto bin_idx, std::size_t idx, std::size_t, bst_feature_t fidx) {\n      float v;\n      if (common::IsCat(ft, fidx)) {\n        v = vals[bin_idx];\n      } else {\n        v = common::HistogramCuts::NumericBinValue(ptrs, vals, fidx, bin_idx);\n      }\n      h_data[idx] = Entry{fidx, v};\n    });\n\n    auto p_ext_out = std::make_shared<ExtSparsePage>(p_out);\n    auto begin_iter =\n        BatchIterator<ExtSparsePage>(new SimpleBatchIteratorImpl<ExtSparsePage>(p_ext_out));\n    return BatchSet<ExtSparsePage>(begin_iter);\n  }\n  error::Unreachable();\n  auto begin_iter =\n      BatchIterator<ExtSparsePage>(new SimpleBatchIteratorImpl<ExtSparsePage>(nullptr));\n  return BatchSet<ExtSparsePage>(begin_iter);\n}\n\n#if !defined(XGBOOST_USE_CUDA)\nvoid IterativeDMatrix::InitFromCUDA(Context const*, BatchParam const&,\n                                    DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>&&,\n                                    float, std::shared_ptr<DMatrix>) {\n  // silent the warning about unused variables.\n  (void)(proxy_);\n  common::AssertGPUSupport();\n}\n\nBatchSet<EllpackPage> IterativeDMatrix::GetEllpackBatches(Context const*, BatchParam const&) {\n  common::AssertGPUSupport();\n  auto begin_iter = BatchIterator<EllpackPage>(new SimpleBatchIteratorImpl<EllpackPage>(ellpack_));\n  return BatchSet<EllpackPage>(BatchIterator<EllpackPage>(begin_iter));\n}\n\nvoid IterativeDMatrix::Save(common::AlignedFileWriteStream*) const {\n  LOG(FATAL) << \"Not implemented\";\n}\n\nIterativeDMatrix* IterativeDMatrix::Load(common::AlignedResourceReadStream*) {\n  LOG(FATAL) << \"Not implemented\";\n  return nullptr;\n}\n#endif  // !defined(XGBOOST_USE_CUDA)\n}  // namespace xgboost::data\n"
  },
  {
    "path": "src/data/iterative_dmatrix.cu",
    "content": "/**\n * Copyright 2020-2025, XGBoost contributors\n */\n#include <memory>   // for shared_ptr\n#include <utility>  // for move\n\n#include \"batch_utils.h\"  // for RegenGHist, CheckParam\n#include \"device_adapter.cuh\"\n#include \"ellpack_page.cuh\"\n#include \"ellpack_page_raw_format.h\"  // for EllpackPageRawFormat\n#include \"iterative_dmatrix.h\"\n#include \"proxy_dmatrix.cuh\"  // for DispatchAny\n#include \"proxy_dmatrix.h\"    // for BatchSamples, BatchColumns\n#include \"simple_batch_iterator.h\"\n\nnamespace xgboost::data {\nvoid IterativeDMatrix::InitFromCUDA(\n    Context const* ctx, BatchParam const& p,\n    DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>&& iter, float missing,\n    std::shared_ptr<DMatrix> ref) {\n  // A handle passed to external iterator.\n  DMatrixProxy* proxy = MakeProxy(proxy_);\n  CHECK(proxy);\n\n  // Sketch for all batches.\n  std::int32_t current_device{dh::CurrentDevice()};\n  auto get_ctx = [&]() {\n    Context d_ctx = (ctx->IsCUDA()) ? *ctx : Context{}.MakeCUDA(current_device);\n    CHECK(!d_ctx.IsCPU());\n    return d_ctx;\n  };\n\n  fmat_ctx_ = get_ctx();\n\n  /**\n   * Generate quantiles\n   */\n  auto cuts = std::make_shared<common::HistogramCuts>(0);\n  ExternalDataInfo ext_info;\n  cuda_impl::MakeSketches(ctx, &iter, proxy, ref, p, missing, cuts, this->info_, &ext_info);\n  ext_info.SetInfo(ctx, true, &this->info_);\n\n  auto init_page = [this, &cuts, &ext_info]() {\n    if (!ellpack_) {\n      // Should be put inside the while loop to protect against empty batch.  In\n      // that case device id is invalid.\n      ellpack_.reset(new EllpackPage);\n      *(ellpack_->Impl()) = EllpackPageImpl(&fmat_ctx_, cuts, this->IsDense(), ext_info.row_stride,\n                                            ext_info.accumulated_rows);\n    }\n  };\n\n  /**\n   * Generate gradient index.\n   */\n  bst_idx_t offset = 0;\n  iter.Reset();\n  bst_idx_t n_batches_for_verification = 0;\n  while (iter.Next()) {\n    init_page();\n    dh::safe_cuda(cudaSetDevice(dh::GetDevice(ctx).ordinal));\n    auto rows = BatchSamples(proxy);\n    dh::device_vector<size_t> row_counts(rows + 1, 0);\n    common::Span<size_t> row_counts_span(row_counts.data().get(), row_counts.size());\n    cuda_impl::DispatchAny(proxy, [=](auto const& value) {\n      return GetRowCounts(ctx, value, row_counts_span, dh::GetDevice(ctx), missing);\n    });\n    auto is_dense = this->IsDense();\n\n    proxy->Info().feature_types.SetDevice(dh::GetDevice(ctx));\n    auto d_feature_types = proxy->Info().feature_types.ConstDeviceSpan();\n    auto new_impl = cuda_impl::DispatchAny(proxy, [&](auto const& value) {\n      return EllpackPageImpl{\n          &fmat_ctx_,          value, missing, is_dense, row_counts_span, d_feature_types,\n          ext_info.row_stride, rows,  cuts};\n    });\n    bst_idx_t num_elements = ellpack_->Impl()->Copy(&fmat_ctx_, &new_impl, offset);\n    offset += num_elements;\n\n    proxy->Info().num_row_ = BatchSamples(proxy);\n    proxy->Info().num_col_ = ext_info.n_features;\n    if (ext_info.n_batches != 1) {\n      this->info_.Extend(std::move(proxy->Info()), false, true);\n    }\n    n_batches_for_verification++;\n  }\n  CHECK_EQ(ext_info.n_batches, n_batches_for_verification)\n      << \"Different number of batches returned between 2 iterations\";\n\n  if (ext_info.n_batches == 1) {\n    this->info_ = std::move(proxy->Info());\n    ext_info.SetInfo(ctx, false, &this->info_);\n    CHECK_EQ(proxy->Info().labels.Size(), 0);\n  }\n\n  iter.Reset();\n}\n\nBatchSet<EllpackPage> IterativeDMatrix::GetEllpackBatches(Context const* ctx,\n                                                          BatchParam const& param) {\n  if (param.Initialized()) {\n    detail::CheckParam(this->batch_, param);\n    CHECK(!detail::RegenGHist(param, batch_)) << error::InconsistentMaxBin();\n  }\n  if (!ellpack_ && !ghist_) {\n    LOG(FATAL) << \"`QuantileDMatrix` not initialized.\";\n  }\n\n  if (!ellpack_) {\n    ellpack_.reset(new EllpackPage());\n    if (ctx->IsCUDA()) {\n      this->Info().feature_types.SetDevice(ctx->Device());\n      *ellpack_->Impl() =\n          EllpackPageImpl(ctx, *this->ghist_, this->Info().feature_types.ConstDeviceSpan());\n    } else if (fmat_ctx_.IsCUDA()) {\n      this->Info().feature_types.SetDevice(fmat_ctx_.Device());\n      *ellpack_->Impl() =\n          EllpackPageImpl(&fmat_ctx_, *this->ghist_, this->Info().feature_types.ConstDeviceSpan());\n    } else {\n      // Can happen when QDM is initialized on CPU, but a GPU version is queried by a different QDM\n      // for cut reference.\n      auto cuda_ctx = ctx->MakeCUDA();\n      this->Info().feature_types.SetDevice(cuda_ctx.Device());\n      *ellpack_->Impl() =\n          EllpackPageImpl(&cuda_ctx, *this->ghist_, this->Info().feature_types.ConstDeviceSpan());\n    }\n  }\n  CHECK(ellpack_);\n  auto begin_iter = BatchIterator<EllpackPage>(new SimpleBatchIteratorImpl<EllpackPage>(ellpack_));\n  return BatchSet<EllpackPage>(begin_iter);\n}\n\nvoid IterativeDMatrix::Save(common::AlignedFileWriteStream* fo) const {\n  CHECK(fo);\n  CHECK(this->ellpack_) << \"Not implemented\";\n  // Save cuts\n  auto const& p_cuts = this->ellpack_->Impl()->CutsShared();\n  p_cuts->Save(fo);\n  // Save ellpack\n  auto fmt =\n      std::make_unique<EllpackPageRawFormat>(p_cuts, this->Ctx()->Device(), BatchParam{}, false);\n  auto n_bytes = fmt->Write(*this->ellpack_, fo);\n  CHECK_GE(n_bytes, this->ellpack_->Impl()->MemCostBytes());\n}\n\nIterativeDMatrix* IterativeDMatrix::Load(common::AlignedResourceReadStream* fi) {\n  CHECK(fi);\n  // Load cuts\n  std::shared_ptr<common::HistogramCuts> p_cuts{common::HistogramCuts::Load(fi)};\n  // Load ellpack\n  auto fmt = std::make_unique<EllpackPageRawFormat>(p_cuts, DeviceOrd::CUDA(dh::CurrentDevice()),\n                                                    BatchParam{}, false);\n  auto ellpack = std::make_shared<EllpackPage>();\n  CHECK(fmt->Read(ellpack.get(), fi));\n  return new IterativeDMatrix{std::move(ellpack)};\n}\n}  // namespace xgboost::data\n"
  },
  {
    "path": "src/data/iterative_dmatrix.h",
    "content": "/**\n * Copyright 2020-2025, XGBoost Contributors\n *\n * @brief Implementation of the higher-level `QuantileDMatrix`.\n */\n#ifndef XGBOOST_DATA_ITERATIVE_DMATRIX_H_\n#define XGBOOST_DATA_ITERATIVE_DMATRIX_H_\n\n#include <memory>   // for shared_ptr\n#include <utility>  // for move\n\n#include \"quantile_dmatrix.h\"  // for QuantileDMatrix\n#include \"xgboost/base.h\"      // for bst_bin_t\n#include \"xgboost/c_api.h\"     // for DataIterHandle, DMatrixHandle\n#include \"xgboost/context.h\"   // for Context\n#include \"xgboost/data.h\"      // for BatchSet\n\nnamespace xgboost {\nnamespace common {\nclass HistogramCuts;\nclass AlignedFileWriteStream;\nclass AlignedResourceReadStream;\n}  // namespace common\n\nnamespace data {\n/**\n * @brief DMatrix type for `QuantileDMatrix`, the naming `IterativeDMatix` is due to its\n *        construction process.\n *\n * During initializaion, it walks through the data multiple times iteratively in order to\n * perform quantilization. This design helps us reduce memory usage significantly by\n * avoiding data concatenation along with removing the CSR matrix `SparsePage`.\n */\nclass IterativeDMatrix : public QuantileDMatrix {\n  std::shared_ptr<EllpackPage> ellpack_;\n  std::shared_ptr<GHistIndexMatrix> ghist_;\n  BatchParam batch_;\n\n  DMatrixHandle proxy_;\n\n  void InitFromCUDA(Context const *ctx, BatchParam const &p,\n                    DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext> &&iter,\n                    float missing, std::shared_ptr<DMatrix> ref);\n  void InitFromCPU(Context const *ctx, BatchParam const &p,\n                   DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext> &&iter,\n                   float missing, std::shared_ptr<DMatrix> ref);\n\n  explicit IterativeDMatrix(std::shared_ptr<EllpackPage> ellpack) : ellpack_{std::move(ellpack)} {\n    this->fmat_ctx_.UpdateAllowUnknown(Args{{\"device\", DeviceSym::CUDA()}});\n  }\n\n public:\n  explicit IterativeDMatrix(DataIterHandle iter_handle, DMatrixHandle proxy,\n                            std::shared_ptr<DMatrix> ref, DataIterResetCallback *reset,\n                            XGDMatrixCallbackNext *next, float missing, int nthread,\n                            bst_bin_t max_bin);\n\n  ~IterativeDMatrix() override = default;\n\n  [[nodiscard]] bool EllpackExists() const override { return static_cast<bool>(ellpack_); }\n  [[nodiscard]] bool GHistIndexExists() const override { return static_cast<bool>(ghist_); }\n\n  BatchSet<GHistIndexMatrix> GetGradientIndex(Context const *ctx, BatchParam const &param) override;\n  BatchSet<EllpackPage> GetEllpackBatches(Context const *ctx, const BatchParam &param) override;\n  BatchSet<ExtSparsePage> GetExtBatches(Context const *ctx, BatchParam const &param) override;\n\n  void Save(common::AlignedFileWriteStream *fo) const;\n  [[nodiscard]] static IterativeDMatrix *Load(common::AlignedResourceReadStream *fi);\n};\n}  // namespace data\n}  // namespace xgboost\n\n#endif  // XGBOOST_DATA_ITERATIVE_DMATRIX_H_\n"
  },
  {
    "path": "src/data/metainfo.cc",
    "content": "/**\n * Copyright 2024-2026, XGBoost Contributors\n */\n#include \"metainfo.h\"\n\n#include <string>       // for string\n#include <type_traits>  // for add_pointer_t\n\n#include \"../common/error_msg.h\"         // for InconsistentFeatureTypes\n#include \"xgboost/data.h\"                // for FeatureType\n#include \"xgboost/host_device_vector.h\"  // for HostDeviceVector\n\n#if !defined(XGBOOST_USE_CUDA)\n\n#include \"../common/common.h\"  // for AssertGPUSupport\n\n#endif  // !defined(XGBOOST_USE_CUDA)\n\nnamespace xgboost {\nstd::string TypedArrayRef::ArrayInterfaceStr() const {\n  return data::DispatchDType(this->dtype, [this](auto dtype) {\n    using DType = decltype(dtype);\n    auto ptr = static_cast<std::add_pointer_t<std::add_const_t<DType>>>(this->data);\n    if (this->ndim == 1) {\n      auto vec = linalg::MakeVec(ptr, this->shape.front());\n      return linalg::ArrayInterfaceStr(vec);\n    } else {\n      auto n = this->Size();\n      if (ptr) {\n        CHECK_GT(n, 0);\n      }\n      if (n > 0) {\n        CHECK(ptr);\n      }\n      auto mat = linalg::MakeTensorView(DeviceOrd::CPU(), common::Span{ptr, n}, this->shape[0],\n                                        this->shape[1]);\n      return linalg::ArrayInterfaceStr(mat);\n    }\n  });\n}\n}  // namespace xgboost\n\nnamespace xgboost::data {\nvoid CheckFeatureTypes(HostDeviceVector<FeatureType> const& lhs,\n                       HostDeviceVector<FeatureType> const& rhs) {\n  CHECK_EQ(lhs.Size(), rhs.Size()) << error::InconsistentFeatureTypes();\n  if (lhs.DeviceCanRead() || rhs.DeviceCanRead()) {\n    return cuda_impl::CheckFeatureTypes(lhs, rhs);\n  }\n  auto const& h_lhs = lhs.ConstHostVector();\n  auto const& h_rhs = rhs.ConstHostVector();\n  auto ft_is_same = std::equal(h_lhs.cbegin(), h_lhs.cend(), h_rhs.cbegin());\n  CHECK(ft_is_same) << error::InconsistentFeatureTypes();\n}\n\n#if !defined(XGBOOST_USE_CUDA)\nnamespace cuda_impl {\nvoid CheckFeatureTypes(HostDeviceVector<FeatureType> const&, HostDeviceVector<FeatureType> const&) {\n  common::AssertGPUSupport();\n}\n}  // namespace cuda_impl\n#endif  // !defined(XGBOOST_USE_CUDA)\n}  // namespace xgboost::data\n"
  },
  {
    "path": "src/data/metainfo.cu",
    "content": "/**\n * Copyright 2024-2026, XGBoost Contributors\n */\n#include <thrust/equal.h>  // for equal\n\n#include \"../common/device_helpers.cuh\"  // for tcbegin\n#include \"../common/error_msg.h\"         // for InconsistentFeatureTypes\n#include \"metainfo.h\"\n#include \"xgboost/data.h\"                // for FeatureType\n#include \"xgboost/host_device_vector.h\"  // for HostDeviceVector\n\nnamespace xgboost::data::cuda_impl {\nvoid CheckFeatureTypes(HostDeviceVector<FeatureType> const& lhs,\n                       HostDeviceVector<FeatureType> const& rhs) {\n  auto device = lhs.DeviceCanRead() ? lhs.Device() : rhs.Device();\n  CHECK(device.IsCUDA());\n  lhs.SetDevice(device), rhs.SetDevice(device);\n  auto const& d_lhs = lhs.ConstDeviceSpan();\n  auto const& d_rhs = rhs.ConstDeviceSpan();\n  auto ft_is_same = thrust::equal(dh::tcbegin(d_lhs), dh::tcend(d_lhs), dh::tcbegin(d_rhs));\n  CHECK(ft_is_same) << error::InconsistentFeatureTypes();\n}\n}  // namespace xgboost::data::cuda_impl\n"
  },
  {
    "path": "src/data/metainfo.h",
    "content": "/**\n * Copyright 2021-2026, XGBoost Contributors\n */\n#pragma once\n\n#include <cmath>    // for isnan, isinf\n#include <cstdint>  // for int8_t\n#include <vector>   // for vector\n\n#include \"../common/error_msg.h\"         // for Unreachable\n#include \"xgboost/base.h\"                // for bst_group_t\n#include \"xgboost/data.h\"                // for FeatureType\n#include \"xgboost/host_device_vector.h\"  // for HostDeviceVector\n#include \"xgboost/logging.h\"\n#include \"xgboost/string_view.h\"  // for StringView\n\nnamespace xgboost::data {\nenum class MetaField : std::int8_t {\n  kLabel = 0,\n  kWeight = 1,\n  kBaseMargin = 2,\n  kLabelLowerBound = 3,\n  kLabelUpperBound = 4,\n  kFeatureWeights = 5,\n  kGroupPtr = 6,\n  kQid = 7,  // Converted into group ptr\n};\n\n// `group_ptr` is for the output, while input is `group`.\ninline MetaField MapMetaField(StringView key, bool is_input) {\n  if (key == \"label\") {\n    return MetaField::kLabel;\n  } else if (key == \"weight\") {\n    return MetaField::kWeight;\n  } else if (key == \"base_margin\") {\n    return MetaField::kBaseMargin;\n  } else if (key == \"label_lower_bound\") {\n    return MetaField::kLabelLowerBound;\n  } else if (key == \"label_upper_bound\") {\n    return MetaField::kLabelUpperBound;\n  } else if (key == \"feature_weights\") {\n    return MetaField::kFeatureWeights;\n  } else if (key == \"group_ptr\" && !is_input) {\n    return MetaField::kGroupPtr;\n  } else if (key == \"group\" && is_input) {\n    return MetaField::kGroupPtr;\n  } else if (key == \"qid\") {\n    return MetaField::kQid;\n  } else {\n    LOG(FATAL) << \"Unknown key:\" << key;\n  }\n  return {};\n}\n\nstruct LabelsCheck {\n  XGBOOST_DEVICE bool operator()(float y) {\n#if defined(__CUDA_ARCH__)\n    return ::isnan(y) || ::isinf(y);\n#else\n    return std::isnan(y) || std::isinf(y);\n#endif\n  }\n};\n\nstruct WeightsCheck {\n  XGBOOST_DEVICE bool operator()(float w) { return LabelsCheck{}(w) || w < 0; }  // NOLINT\n};\n\ninline void ValidateQueryGroup(std::vector<bst_group_t> const& group_ptr_) {\n  bool valid_query_group = true;\n  for (size_t i = 1; i < group_ptr_.size(); ++i) {\n    valid_query_group = valid_query_group && group_ptr_[i] >= group_ptr_[i - 1];\n    if (XGBOOST_EXPECT(!valid_query_group, false)) {\n      break;\n    }\n  }\n  CHECK(valid_query_group) << \"Invalid group structure.\";\n}\n\nnamespace cuda_impl {\nvoid CheckFeatureTypes(HostDeviceVector<FeatureType> const& lhs,\n                       HostDeviceVector<FeatureType> const& rhs);\n}\n\nvoid CheckFeatureTypes(HostDeviceVector<FeatureType> const& lhs,\n                       HostDeviceVector<FeatureType> const& rhs);\n\n// TODO(jiamingy): We have two sets of dtypes in XGBoost, one in `data.h`, another one in array\n// interface. We should unify them.\ntemplate <typename Fn>\ndecltype(auto) DispatchDType(DataType dtype, Fn&& fn) {\n  switch (dtype) {\n    case xgboost::DataType::kFloat32: {\n      return fn(float{});\n    }\n    case xgboost::DataType::kDouble: {\n      return fn(double{});\n    }\n    case xgboost::DataType::kUInt32: {\n      return fn(std::uint32_t{});\n    }\n    case xgboost::DataType::kUInt64: {\n      return fn(std::uint64_t{});\n    }\n    default:\n      LOG(FATAL) << \"Unknown data type\" << static_cast<uint8_t>(dtype);\n  }\n  error::Unreachable();\n  return fn(float{});\n}\n}  // namespace xgboost::data\n"
  },
  {
    "path": "src/data/proxy_dmatrix.cc",
    "content": "/**\n * Copyright 2021-2025, XGBoost Contributors\n */\n\n#include \"proxy_dmatrix.h\"\n\n#include <memory>       // for shared_ptr\n#include <type_traits>  // for is_same_v\n#include <utility>      // for move\n\n#include \"../common/type.h\"   // for GetValueT\n#include \"adapter.h\"          // for ColumnarAdapter\n#include \"xgboost/context.h\"  // for Context\n#include \"xgboost/data.h\"     // for DMatrix\n#include \"xgboost/logging.h\"\n#include \"xgboost/string_view.h\"  // for StringView\n\n#if !defined(XGBOOST_USE_CUDA)\n#include \"../common/common.h\"  // for AssertGPUSupport\n#endif\n\nnamespace xgboost::data {\nvoid DMatrixProxy::SetColumnar(StringView data) {\n  std::shared_ptr<ColumnarAdapter> adapter{new ColumnarAdapter{data}};\n  this->Info().num_col_ = adapter->NumColumns();\n  this->Info().num_row_ = adapter->NumRows();\n  this->batch_ = std::move(adapter);\n  this->ctx_.Init(Args{{\"device\", DeviceSym::CPU()}});\n}\n\nvoid DMatrixProxy::SetArray(StringView data) {\n  std::shared_ptr<ArrayAdapter> adapter{new ArrayAdapter{data}};\n  this->Info().num_col_ = adapter->NumColumns();\n  this->Info().num_row_ = adapter->NumRows();\n  this->batch_ = std::move(adapter);\n  this->ctx_.Init(Args{{\"device\", DeviceSym::CPU()}});\n}\n\nvoid DMatrixProxy::SetCsr(char const *c_indptr, char const *c_indices, char const *c_values,\n                          bst_feature_t n_features, bool on_host) {\n  CHECK(on_host) << \"Not implemented on device.\";\n  std::shared_ptr<CSRArrayAdapter> adapter{new CSRArrayAdapter(\n      StringView{c_indptr}, StringView{c_indices}, StringView{c_values}, n_features)};\n  this->Info().num_col_ = adapter->NumColumns();\n  this->Info().num_row_ = adapter->NumRows();\n  this->batch_ = std::move(adapter);\n  this->ctx_.Init(Args{{\"device\", DeviceSym::CPU()}});\n}\n\n#if !defined(XGBOOST_USE_CUDA)\nvoid DMatrixProxy::SetCudaArray(StringView) { common::AssertGPUSupport(); }\nvoid DMatrixProxy::SetCudaColumnar(StringView) { common::AssertGPUSupport(); }\n#endif  // !defined(XGBOOST_USE_CUDA)\n\nnamespace cuda_impl {\n#if !defined(XGBOOST_USE_CUDA)\n[[nodiscard]] bst_idx_t BatchSamples(DMatrixProxy const *) {\n  common::AssertGPUSupport();\n  return 0;\n}\n[[nodiscard]] bst_idx_t BatchColumns(DMatrixProxy const *) {\n  common::AssertGPUSupport();\n  return 0;\n}\n#else\nstd::shared_ptr<DMatrix> CreateDMatrixFromProxy(Context const *ctx,\n                                                std::shared_ptr<DMatrixProxy> proxy, float missing);\n#endif  // XGBOOST_USE_CUDA\n}  // namespace cuda_impl\n\nstd::shared_ptr<DMatrix> CreateDMatrixFromProxy(Context const *ctx,\n                                                std::shared_ptr<DMatrixProxy> proxy,\n                                                float missing) {\n  bool type_error{false};\n  std::shared_ptr<DMatrix> p_fmat{nullptr};\n\n  if (proxy->Ctx()->IsCUDA()) {\n#if defined(XGBOOST_USE_CUDA)\n    p_fmat = cuda_impl::CreateDMatrixFromProxy(ctx, proxy, missing);\n#else\n    common::AssertGPUSupport();\n#endif\n  } else {\n    p_fmat = data::cpu_impl::DispatchAny<false>(\n        proxy.get(),\n        [&](auto const &adapter) {\n          auto p_fmat =\n              std::shared_ptr<DMatrix>(DMatrix::Create(adapter.get(), missing, ctx->Threads()));\n          CHECK_EQ(p_fmat->Info().num_row_, adapter->NumRows());\n          return p_fmat;\n        },\n        &type_error);\n  }\n\n  CHECK(p_fmat) << \"Failed to fallback.\";\n  p_fmat->Info().Extend(proxy->Info(), /*accumulate_rows=*/false, true);\n  return p_fmat;\n}\n\n[[nodiscard]] bool BatchCatsIsRef(DMatrixProxy const *proxy) {\n  if (proxy->Device().IsCUDA()) {\n#if defined(XGBOOST_USE_CUDA)\n    return cuda_impl::BatchCatsIsRef(proxy);\n#else\n    common::AssertGPUSupport();\n#endif\n  }\n  return cpu_impl::DispatchAny<false>(proxy, [&](auto const &adapter) {\n    using AdapterT = typename common::GetValueT<decltype(adapter)>::element_type;\n    if constexpr (std::is_same_v<AdapterT, ColumnarAdapter>) {\n      return adapter->HasRefCategorical();\n    }\n    return false;\n  });\n}\n}  // namespace xgboost::data\n"
  },
  {
    "path": "src/data/proxy_dmatrix.cu",
    "content": "/**\n * Copyright 2020-2025, XGBoost contributors\n */\n#include \"../encoder/ordinal.h\"  // for DeviceColumnsView\n#include \"device_adapter.cuh\"\n#include \"proxy_dmatrix.cuh\"\n#include \"../common/type.h\"  // for GetValueT\n#include \"proxy_dmatrix.h\"\n\nnamespace xgboost::data {\nvoid DMatrixProxy::SetCudaColumnar(StringView data) {\n  auto adapter{std::make_shared<CudfAdapter>(data)};\n  this->batch_ = adapter;\n  this->Info().num_col_ = adapter->NumColumns();\n  this->Info().num_row_ = adapter->NumRows();\n  if (!adapter->Device().IsCUDA()) {\n    // empty data\n    CHECK_EQ(this->Info().num_row_, 0);\n    ctx_ = ctx_.MakeCUDA(dh::CurrentDevice());\n    return;\n  }\n  ctx_ = ctx_.MakeCUDA(adapter->Device().ordinal);\n}\n\nvoid DMatrixProxy::SetCudaArray(StringView data) {\n  auto adapter(std::make_shared<CupyAdapter>(StringView{data}));\n  this->batch_ = adapter;\n  this->Info().num_col_ = adapter->NumColumns();\n  this->Info().num_row_ = adapter->NumRows();\n  if (!adapter->Device().IsCUDA()) {\n    // empty data\n    CHECK_EQ(this->Info().num_row_, 0);\n    ctx_ = ctx_.MakeCUDA(dh::CurrentDevice());\n    return;\n  }\n  ctx_ = ctx_.MakeCUDA(adapter->Device().ordinal);\n}\n\nnamespace cuda_impl {\nstd::shared_ptr<DMatrix> CreateDMatrixFromProxy(Context const* ctx,\n                                                std::shared_ptr<DMatrixProxy> proxy,\n                                                float missing) {\n  return DispatchAny<false>(proxy.get(), [&](auto const& adapter) {\n    auto p_fmat = std::shared_ptr<DMatrix>{DMatrix::Create(adapter.get(), missing, ctx->Threads())};\n    CHECK_EQ(p_fmat->Info().num_row_, adapter->NumRows());\n    return p_fmat;\n  });\n}\n\n[[nodiscard]] bst_idx_t BatchSamples(DMatrixProxy const* proxy) {\n  return cuda_impl::DispatchAny(proxy, [](auto const& value) { return value.NumRows(); });\n}\n\n[[nodiscard]] bst_idx_t BatchColumns(DMatrixProxy const* proxy) {\n  return cuda_impl::DispatchAny(proxy, [](auto const& value) { return value.NumCols(); });\n}\n\n[[nodiscard]] bool BatchCatsIsRef(DMatrixProxy const* proxy) {\n  return DispatchAny<false>(proxy, [&](auto const& adapter) {\n    using AdapterT = typename common::GetValueT<decltype(adapter)>::element_type;\n    if constexpr (std::is_same_v<AdapterT, CudfAdapter>) {\n      return adapter->HasRefCategorical();\n    }\n    return false;\n  });\n}\n\n[[nodiscard]] enc::DeviceColumnsView BatchCats(DMatrixProxy const* proxy) {\n  return DispatchAny<false>(proxy, [&](auto const& adapter) {\n    using AdapterT = typename common::GetValueT<decltype(adapter)>::element_type;\n    if constexpr (std::is_same_v<AdapterT, CudfAdapter>) {\n      if (adapter->HasRefCategorical()) {\n        return adapter->RefCats();\n      }\n      return adapter->Cats();\n    }\n    return enc::DeviceColumnsView{};\n  });\n}\n}  // namespace cuda_impl\n}  // namespace xgboost::data\n"
  },
  {
    "path": "src/data/proxy_dmatrix.cuh",
    "content": "/**\n * Copyright 2021-2025, XGBoost contributors\n */\n#include <any>     // for any_cast\n#include <memory>  // for shared_ptr\n\n#include \"device_adapter.cuh\"  // for MakeEncColumnarBatch\n#include \"proxy_dmatrix.h\"\n\nnamespace xgboost::data::cuda_impl {\n// See the cpu impl for parameter documentation.\ntemplate <bool get_value = true, template <typename A> typename AddPtrT = std::shared_ptr,\n          typename Fn>\ndecltype(auto) DispatchAny(Context const* ctx, std::any x, Fn&& fn, bool* type_error = nullptr) {\n  auto has_type = [&] {\n    if (type_error) {\n      *type_error = false;\n    }\n  };\n  if (x.type() == typeid(AddPtrT<CupyAdapter>)) {\n    has_type();\n    if constexpr (get_value) {\n      auto value = std::any_cast<AddPtrT<CupyAdapter>>(x)->Value();\n      return fn(value);\n    } else {\n      auto value = std::any_cast<AddPtrT<CupyAdapter>>(x);\n      return fn(value);\n    }\n  } else if (x.type() == typeid(AddPtrT<CudfAdapter>)) {\n    has_type();\n    auto adapter = std::any_cast<AddPtrT<CudfAdapter>>(x);\n    if constexpr (get_value) {\n      auto value = adapter->Value();\n      if (adapter->HasRefCategorical()) {\n        auto [batch, mapping] = MakeEncColumnarBatch(ctx, adapter);\n        return fn(batch);\n      }\n      return fn(value);\n    } else {\n      return fn(adapter);\n    }\n  } else {\n    if (type_error) {\n      *type_error = true;\n    } else {\n      LOG(FATAL) << \"Unknown type: \" << x.type().name();\n    }\n  }\n\n  // Dummy return value\n  if constexpr (get_value) {\n    auto value = std::any_cast<AddPtrT<CudfAdapter>>(x)->Value();\n    return fn(value);\n  } else {\n    auto value = std::any_cast<AddPtrT<CudfAdapter>>(x);\n    return fn(value);\n  }\n}\n\ntemplate <bool get_value = true, typename Fn>\ndecltype(auto) DispatchAny(DMatrixProxy const* proxy, Fn&& fn, bool* type_error = nullptr) {\n  return DispatchAny<get_value>(proxy->Ctx(), proxy->Adapter(), std::forward<Fn>(fn), type_error);\n}\n}  // namespace xgboost::data::cuda_impl\n"
  },
  {
    "path": "src/data/proxy_dmatrix.h",
    "content": "/**\n * Copyright 2020-2025, XGBoost contributors\n */\n#ifndef XGBOOST_DATA_PROXY_DMATRIX_H_\n#define XGBOOST_DATA_PROXY_DMATRIX_H_\n\n#include <algorithm>    // for none_of\n#include <any>          // for any, any_cast\n#include <cstdint>      // for uint32_t, int32_t\n#include <memory>       // for shared_ptr\n#include <type_traits>  // for invoke_result_t, declval\n#include <utility>      // for forward\n#include <vector>       // for vector\n\n#include \"../common/nvtx_utils.h\"  // for xgboost_NVTX_FN_RANGE\n#include \"../encoder/ordinal.h\"    // for HostColumnsView\n#include \"adapter.h\"               // for ColumnarAdapter, ArrayAdapter, MakeEncColumnarBatch\n#include \"cat_container.h\"         // for CatContainer\n#include \"xgboost/c_api.h\"         // for DataIterHandle\n#include \"xgboost/context.h\"       // for Context\n#include \"xgboost/data.h\"          // for MetaInfo\n#include \"xgboost/string_view.h\"   // for StringView\n\nnamespace xgboost::data {\n/**\n * @brief A proxy to external iterator.\n *\n * @note The external iterator is actually 1-based since the first call to @ref Next\n * increases the counter to 1 and it's necessary to call the @ref Next method at least\n * once to get data. We here along with the page source together convert it back to\n * 0-based by calling @ref Next in the page source's constructor.\n */\ntemplate <typename ResetFn, typename NextFn>\nclass DataIterProxy {\n  DataIterHandle iter_;\n  ResetFn* reset_;\n  NextFn* next_;\n  std::int32_t count_{0};\n\n public:\n  DataIterProxy(DataIterHandle iter, ResetFn* reset, NextFn* next)\n      : iter_{iter}, reset_{reset}, next_{next} {}\n  DataIterProxy(DataIterProxy&& that) = default;\n  DataIterProxy& operator=(DataIterProxy&& that) = default;\n  DataIterProxy(DataIterProxy const& that) = delete;\n  DataIterProxy& operator=(DataIterProxy const& that) = delete;\n\n  [[nodiscard]] bool Next() {\n    xgboost_NVTX_FN_RANGE();\n\n    bool ret = !!next_(iter_);\n    if (!ret) {\n      return ret;\n    }\n    count_++;\n    return ret;\n  }\n  void Reset() {\n    reset_(iter_);\n    count_ = 0;\n  }\n  [[nodiscard]] std::int32_t Iter() const { return this->count_ == 0 ? 0 : this->count_ - 1; }\n  DataIterProxy& operator++() {\n    CHECK(this->Next());\n    return *this;\n  }\n};\n\n/**\n * @brief A proxy of DMatrix used by the external iterator.\n */\nclass DMatrixProxy : public DMatrix {\n  MetaInfo info_;\n  std::any batch_;\n  Context ctx_;\n\n public:\n  DeviceOrd Device() const { return ctx_.Device(); }\n\n  /**\n   * Device setters\n   */\n  void SetCudaColumnar(StringView data);\n  void SetCudaArray(StringView data);\n  /**\n   * Host setters\n   */\n  void SetColumnar(StringView data);\n  void SetArray(StringView data);\n  void SetCsr(char const* c_indptr, char const* c_indices, char const* c_values,\n              bst_feature_t n_features, bool on_host);\n\n  MetaInfo& Info() override { return info_; }\n  MetaInfo const& Info() const override { return info_; }\n  Context const* Ctx() const override { return &ctx_; }\n\n  [[nodiscard]] bool EllpackExists() const override { return false; }\n  [[nodiscard]] bool GHistIndexExists() const override { return false; }\n  [[nodiscard]] bool SparsePageExists() const override { return false; }\n\n  template <typename Page>\n  static BatchSet<Page> NoBatch() {\n    LOG(FATAL) << \"Proxy DMatrix cannot return data batch.\";\n    return BatchSet<Page>(BatchIterator<Page>(nullptr));\n  }\n\n  DMatrix* Slice(common::Span<int32_t const> /*ridxs*/) override {\n    LOG(FATAL) << \"Slicing DMatrix is not supported for Proxy DMatrix.\";\n    return nullptr;\n  }\n  DMatrix* SliceCol(int, int) override {\n    LOG(FATAL) << \"Slicing DMatrix columns is not supported for Proxy DMatrix.\";\n    return nullptr;\n  }\n  BatchSet<SparsePage> GetRowBatches() override { return NoBatch<SparsePage>(); }\n  BatchSet<CSCPage> GetColumnBatches(Context const*) override { return NoBatch<CSCPage>(); }\n  BatchSet<SortedCSCPage> GetSortedColumnBatches(Context const*) override {\n    return NoBatch<SortedCSCPage>();\n  }\n  BatchSet<EllpackPage> GetEllpackBatches(Context const*, BatchParam const&) override {\n    return NoBatch<EllpackPage>();\n  }\n  BatchSet<GHistIndexMatrix> GetGradientIndex(Context const*, BatchParam const&) override {\n    return NoBatch<GHistIndexMatrix>();\n  }\n  BatchSet<ExtSparsePage> GetExtBatches(Context const*, BatchParam const&) override {\n    return NoBatch<ExtSparsePage>();\n  }\n  std::any Adapter() const { return batch_; }\n};\n\ninline DMatrixProxy* MakeProxy(DMatrixHandle proxy) {\n  auto proxy_handle = static_cast<std::shared_ptr<DMatrix>*>(proxy);\n  CHECK(proxy_handle) << \"Invalid proxy handle.\";\n  DMatrixProxy* typed = static_cast<DMatrixProxy*>(proxy_handle->get());\n  CHECK(typed) << \"Invalid proxy handle.\";\n  return typed;\n}\n\n/**\n * @brief Shape and basic information for data fetched from an external data iterator.\n */\nstruct ExternalDataInfo {\n  bst_idx_t n_features = 0;               // The number of columns\n  bst_idx_t n_batches = 0;                // The number of batches from the external data iterator\n  bst_idx_t accumulated_rows = 0;         // The total number of rows\n  bst_idx_t nnz = 0;                      // The number of non-missing values\n  std::vector<bst_idx_t> column_sizes;    // The nnz for each column\n  std::vector<bst_idx_t> batch_nnz;       // nnz for each batch\n  std::vector<bst_idx_t> base_rowids{0};  // base_rowid\n  bst_idx_t row_stride{0};                // Used by ellpack, maximum row stride for all batches\n  std::shared_ptr<CatContainer> cats;     // Categories from one of the batches\n\n  void Validate() const {\n    CHECK(std::none_of(this->column_sizes.cbegin(), this->column_sizes.cend(), [&](auto f) {\n      return f > this->accumulated_rows;\n    })) << \"Something went wrong during iteration.\";\n\n    CHECK_GE(this->n_features, 1) << \"Data must has at least 1 column.\";\n    CHECK_EQ(this->base_rowids.size(), this->n_batches + 1);\n    CHECK_LE(this->row_stride, this->n_features);\n  }\n\n  void SetInfo(Context const* ctx, bool sync, MetaInfo* p_info) {\n    // From here on Info() has the correct data shape\n    auto& info = *p_info;\n    info.num_row_ = this->accumulated_rows;\n    info.num_col_ = this->n_features;\n    info.num_nonzero_ = this->nnz;\n    if (sync) {\n      info.SynchronizeNumberOfColumns(ctx, DataSplitMode::kRow);\n    }\n    info.Cats(this->cats);\n    this->Validate();\n  }\n};\n\nnamespace cpu_impl {\n/**\n * @brief Dispatch function call based on the input type.\n *\n * @tparam get_value Whether the funciton Fn accepts an adapter batch or the adapter itself.\n * @tparam AddPtrT   The type of the adapter pointer. Use std::add_pointer_t for raw pointer.\n * @tparam Fn        The type of the function to be dispatched.\n *\n * @param x     Any any object that contains a (shared) pointer to an adapter.\n * @param fn    The function to be dispatched.\n * @param type_error[out] Set to ture if it's not null and the input data is not recognized by\n *                        the host.\n *\n * @return The return value of the function being dispatched.\n */\ntemplate <bool get_value = true, template <typename A> typename AddPtrT = std::shared_ptr,\n          typename Fn>\ndecltype(auto) DispatchAny(Context const* ctx, std::any x, Fn&& fn, bool* type_error = nullptr) {\n  // CSC, FileAdapter, and IteratorAdapter are not supported.\n  auto has_type = [&] {\n    if (type_error) {\n      *type_error = false;\n    }\n  };\n  CHECK(x.has_value());\n  if (x.type() == typeid(AddPtrT<data::DenseAdapter>)) {\n    has_type();\n    if constexpr (get_value) {\n      auto value = std::any_cast<AddPtrT<DenseAdapter>>(x)->Value();\n      return fn(value);\n    } else {\n      auto value = std::any_cast<AddPtrT<DenseAdapter>>(x);\n      fn(value);\n    }\n  } else if (x.type() == typeid(AddPtrT<ArrayAdapter>)) {\n    has_type();\n    if constexpr (get_value) {\n      auto value = std::any_cast<AddPtrT<ArrayAdapter>>(x)->Value();\n      return fn(value);\n    } else {\n      auto value = std::any_cast<AddPtrT<ArrayAdapter>>(x);\n      return fn(value);\n    }\n  } else if (x.type() == typeid(AddPtrT<CSRArrayAdapter>)) {\n    has_type();\n    if constexpr (get_value) {\n      auto value = std::any_cast<AddPtrT<CSRArrayAdapter>>(x)->Value();\n      return fn(value);\n    } else {\n      auto value = std::any_cast<AddPtrT<CSRArrayAdapter>>(x);\n      return fn(value);\n    }\n  } else if (x.type() == typeid(AddPtrT<ColumnarAdapter>)) {\n    has_type();\n    auto adapter = std::any_cast<AddPtrT<ColumnarAdapter>>(x);\n    if constexpr (get_value) {\n      auto value = adapter->Value();\n      if (adapter->HasRefCategorical()) {\n        auto [batch, mapping] = MakeEncColumnarBatch(ctx, adapter);\n        return fn(batch);\n      }\n      return fn(value);\n    } else {\n      return fn(adapter);\n    }\n  } else {\n    if (type_error) {\n      *type_error = true;\n    } else {\n      LOG(FATAL) << \"Unknown type: \" << x.type().name();\n    }\n  }\n\n  if constexpr (get_value) {\n    return std::invoke_result_t<Fn, decltype(std::declval<AddPtrT<ArrayAdapter>>()->Value())>();\n  } else {\n    return std::invoke_result_t<Fn, decltype(std::declval<AddPtrT<ArrayAdapter>>())>();\n  }\n}\n\ntemplate <bool get_value = true, typename Fn>\ndecltype(auto) DispatchAny(DMatrixProxy const* proxy, Fn&& fn, bool* type_error = nullptr) {\n  return DispatchAny<get_value>(proxy->Ctx(), proxy->Adapter(), std::forward<Fn>(fn), type_error);\n}\n\n/**\n * @brief Get categories for the current batch.\n *\n * @return A host view to the categories\n */\n[[nodiscard]] inline decltype(auto) BatchCats(DMatrixProxy const* proxy) {\n  return DispatchAny<false>(proxy, [](auto const& adapter) -> decltype(auto) {\n    using AdapterT = typename std::remove_reference_t<decltype(adapter)>::element_type;\n    if constexpr (std::is_same_v<AdapterT, ColumnarAdapter>) {\n      if (adapter->HasRefCategorical()) {\n        return adapter->RefCats();\n      }\n      return adapter->Cats();\n    }\n    return enc::HostColumnsView{};\n  });\n}\n}  // namespace cpu_impl\n\n/**\n * @brief Create a `SimpleDMatrix` instance from a `DMatrixProxy`.\n *\n *    This is used for enabling inplace-predict fallback.\n */\nstd::shared_ptr<DMatrix> CreateDMatrixFromProxy(Context const* ctx,\n                                                std::shared_ptr<DMatrixProxy> proxy, float missing);\n\nnamespace cuda_impl {\n[[nodiscard]] bst_idx_t BatchSamples(DMatrixProxy const*);\n[[nodiscard]] bst_idx_t BatchColumns(DMatrixProxy const*);\n#if defined(XGBOOST_USE_CUDA)\n[[nodiscard]] bool BatchCatsIsRef(DMatrixProxy const*);\n[[nodiscard]] enc::DeviceColumnsView BatchCats(DMatrixProxy const*);\n#endif  // defined(XGBOOST_USE_CUDA)\n}  // namespace cuda_impl\n\n/**\n * @brief Get the number of samples for the current batch.\n */\n[[nodiscard]] inline bst_idx_t BatchSamples(DMatrixProxy const* proxy) {\n  bool type_error = false;\n  auto n_samples =\n      cpu_impl::DispatchAny(proxy, [](auto const& value) { return value.NumRows(); }, &type_error);\n  if (type_error) {\n    n_samples = cuda_impl::BatchSamples(proxy);\n  }\n  return n_samples;\n}\n\n/**\n * @brief Get the number of features for the current batch.\n */\n[[nodiscard]] inline bst_feature_t BatchColumns(DMatrixProxy const* proxy) {\n  bool type_error = false;\n  auto n_features =\n      cpu_impl::DispatchAny(proxy, [](auto const& value) { return value.NumCols(); }, &type_error);\n  if (type_error) {\n    n_features = cuda_impl::BatchColumns(proxy);\n  }\n  return n_features;\n}\n\nnamespace cpu_impl {}  // namespace cpu_impl\n[[nodiscard]] bool BatchCatsIsRef(DMatrixProxy const* proxy);\n}  // namespace xgboost::data\n#endif  // XGBOOST_DATA_PROXY_DMATRIX_H_\n"
  },
  {
    "path": "src/data/quantile_dmatrix.cc",
    "content": "/**\n * Copyright 2024-2025, XGBoost Contributors\n */\n#include \"quantile_dmatrix.h\"\n\n#include <numeric>  // for accumulate\n\n#include \"../collective/allreduce.h\"         // for Allreduce\n#include \"../collective/communicator-inl.h\"  // for IsDistributed\n#include \"../common/error_msg.h\"             // for InconsistentCategories\n#include \"../common/threading_utils.h\"       // for ParallelFor\n#include \"cat_container.h\"                   // for CatContainer\n#include \"gradient_index.h\"                  // for GHistIndexMatrix\n#include \"proxy_dmatrix.h\"                   // for DispatchAny\n#include \"xgboost/collective/result.h\"       // for SafeColl\n#include \"xgboost/linalg.h\"                  // for Tensor\n\nnamespace xgboost::data {\nvoid GetCutsFromRef(Context const* ctx, std::shared_ptr<DMatrix> ref, bst_feature_t n_features,\n                    BatchParam p, common::HistogramCuts* p_cuts) {\n  CHECK(ref);\n  CHECK(p_cuts);\n  p.forbid_regen = true;\n  // Fetch cuts from GIDX\n  auto csr = [&] {\n    for (auto const& page : ref->GetBatches<GHistIndexMatrix>(ctx, p)) {\n      *p_cuts = page.cut;\n      break;\n    }\n  };\n  // Fetch cuts from Ellpack.\n  auto ellpack = [&] {\n    for (auto const& page : ref->GetBatches<EllpackPage>(ctx, p)) {\n      GetCutsFromEllpack(page, p_cuts);\n      break;\n    }\n  };\n\n  if (ref->PageExists<GHistIndexMatrix>() && ref->PageExists<EllpackPage>()) {\n    // Both exists\n    if (ctx->IsCUDA()) {\n      ellpack();\n    } else {\n      csr();\n    }\n  } else if (ref->PageExists<GHistIndexMatrix>()) {\n    csr();\n  } else if (ref->PageExists<EllpackPage>()) {\n    ellpack();\n  } else {\n    // None exist\n    if (ctx->IsCUDA()) {\n      ellpack();\n    } else {\n      csr();\n    }\n  }\n  CHECK_EQ(ref->Info().num_col_, n_features)\n      << \"Invalid ref DMatrix, different number of features.\";\n}\n\n#if !defined(XGBOOST_USE_CUDA)\nvoid GetCutsFromEllpack(EllpackPage const&, common::HistogramCuts*) { common::AssertGPUSupport(); }\n#endif\n\nnamespace cpu_impl {\n// Synchronize feature type in case of empty DMatrix\nvoid SyncFeatureType(Context const* ctx, std::vector<FeatureType>* p_h_ft) {\n  if (!collective::IsDistributed()) {\n    return;\n  }\n  auto& h_ft = *p_h_ft;\n  bst_idx_t n_ft = h_ft.size();\n  collective::SafeColl(collective::Allreduce(ctx, &n_ft, collective::Op::kMax));\n  if (!h_ft.empty()) {\n    // Check correct size if this is not an empty DMatrix.\n    CHECK_EQ(h_ft.size(), n_ft);\n  }\n  if (n_ft > 0) {\n    h_ft.resize(n_ft);\n    auto ptr = reinterpret_cast<std::underlying_type_t<FeatureType>*>(h_ft.data());\n    collective::SafeColl(\n        collective::Allreduce(ctx, linalg::MakeVec(ptr, h_ft.size()), collective::Op::kMax));\n  }\n}\n\nvoid GetDataShape(Context const* ctx, DMatrixProxy* proxy,\n                  DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>* iter, float missing,\n                  ExternalDataInfo* p_info) {\n  auto& info = *p_info;\n\n  auto const is_valid = data::IsValidFunctor{missing};\n  auto nnz_cnt = [&]() {\n    return DispatchAny(proxy, [&](auto const& value) {\n      bst_idx_t n_threads = ctx->Threads();\n      bst_idx_t n_features = info.column_sizes.size();\n      linalg::Tensor<bst_idx_t, 2> column_sizes_tloc({n_threads, n_features}, DeviceOrd::CPU());\n      column_sizes_tloc.Data()->Fill(0ul);\n      auto view = column_sizes_tloc.HostView();\n      common::ParallelFor(value.Size(), n_threads, common::Sched::Static(256), [&](auto i) {\n        auto const& line = value.GetLine(i);\n        for (bst_idx_t j = 0; j < line.Size(); ++j) {\n          data::COOTuple const& elem = line.GetElement(j);\n          if (is_valid(elem)) {\n            view(omp_get_thread_num(), elem.column_idx)++;\n          }\n        }\n      });\n      auto ptr = column_sizes_tloc.Data()->HostPointer();\n      auto result = std::accumulate(ptr, ptr + column_sizes_tloc.Size(), static_cast<bst_idx_t>(0));\n      for (bst_idx_t tidx = 0; tidx < n_threads; ++tidx) {\n        for (bst_idx_t fidx = 0; fidx < n_features; ++fidx) {\n          info.column_sizes[fidx] += view(tidx, fidx);\n        }\n      }\n      return result;\n    });\n  };\n\n  /**\n   * CPU impl needs an additional loop for accumulating the column size.\n   */\n  do {\n    // We use do while here as the first batch is fetched in ctor\n    if (info.n_features == 0) {\n      info.n_features = BatchColumns(proxy);\n      collective::SafeColl(collective::Allreduce(ctx, &info.n_features, collective::Op::kMax));\n      info.column_sizes.clear();\n      info.column_sizes.resize(info.n_features, 0);\n      p_info->cats =\n          std::make_shared<CatContainer>(cpu_impl::BatchCats(proxy), BatchCatsIsRef(proxy));\n    } else {\n      CHECK_EQ(info.n_features, BatchColumns(proxy)) << \"Inconsistent number of columns.\";\n      auto cats = cpu_impl::BatchCats(proxy);\n      CHECK_EQ(cats.n_total_cats, p_info->cats->NumCatsTotal()) << error::InconsistentCategories();\n    }\n    bst_idx_t batch_size = BatchSamples(proxy);\n    info.batch_nnz.push_back(nnz_cnt());\n    info.base_rowids.push_back(batch_size);\n    info.nnz += info.batch_nnz.back();\n    info.accumulated_rows += batch_size;\n    info.n_batches++;\n  } while (iter->Next());\n  iter->Reset();\n\n  std::partial_sum(info.base_rowids.cbegin(), info.base_rowids.cend(), info.base_rowids.begin());\n}\n\nvoid MakeSketches(Context const* ctx,\n                  DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>* iter,\n                  DMatrixProxy* proxy, std::shared_ptr<DMatrix> ref, float missing,\n                  common::HistogramCuts* cuts, BatchParam const& p, MetaInfo const& info,\n                  ExternalDataInfo const& ext_info, std::vector<FeatureType>* p_h_ft) {\n  std::unique_ptr<common::HostSketchContainer> p_sketch;\n  auto& h_ft = *p_h_ft;\n  bst_idx_t accumulated_rows = 0;\n  if (ref) {\n    GetCutsFromRef(ctx, ref, info.num_col_, p, cuts);\n    h_ft = ref->Info().feature_types.HostVector();\n  } else {\n    size_t i = 0;\n    while (iter->Next()) {\n      if (!p_sketch) {\n        h_ft = proxy->Info().feature_types.ConstHostVector();\n        cpu_impl::SyncFeatureType(ctx, &h_ft);\n        p_sketch = std::make_unique<common::HostSketchContainer>(\n            ctx, p.max_bin, h_ft, ext_info.column_sizes, !proxy->Info().group_ptr_.empty());\n      }\n      DispatchAny(proxy, [&](auto const& batch) {\n        proxy->Info().num_nonzero_ = ext_info.batch_nnz[i];\n        // We don't need base row idx here as Info is from proxy and the number of rows in\n        // it is consistent with data batch.\n        p_sketch->PushAdapterBatch(batch, 0, proxy->Info(), missing);\n      });\n      accumulated_rows += BatchSamples(proxy);\n      ++i;\n    }\n    iter->Reset();\n    CHECK_EQ(accumulated_rows, info.num_row_);\n\n    CHECK(p_sketch);\n    *cuts = p_sketch->MakeCuts(ctx, info);\n  }\n\n  if (!h_ft.empty()) {\n    CHECK_EQ(h_ft.size(), ext_info.n_features);\n  }\n}\n}  // namespace cpu_impl\n}  // namespace xgboost::data\n"
  },
  {
    "path": "src/data/quantile_dmatrix.cu",
    "content": "/**\n * Copyright 2020-2025, XGBoost Contributors\n */\n#include <algorithm>  // for max\n#include <limits>     // for numeric_limits\n#include <numeric>    // for partial_sum\n#include <vector>     // for vector\n\n#include \"../collective/allreduce.h\"    // for Allreduce\n#include \"../common/cuda_context.cuh\"   // for CUDAContext\n#include \"../common/cuda_rt_utils.h\"    // for AllVisibleGPUs\n#include \"../common/device_vector.cuh\"  // for XGBCachingDeviceAllocator\n#include \"../common/error_msg.h\"        // for InconsistentCategories\n#include \"../common/hist_util.cuh\"      // for AdapterDeviceSketch\n#include \"../common/nvtx_utils.h\"       // for xgboost_NVTX_FN_RANGE\n#include \"../common/quantile.cuh\"       // for SketchContainer\n#include \"cat_container.h\"              // for CatContainer\n#include \"ellpack_page.cuh\"             // for EllpackPage\n#include \"proxy_dmatrix.cuh\"            // for DispatchAny\n#include \"proxy_dmatrix.h\"              // for DataIterProxy\n#include \"quantile_dmatrix.h\"           // for GetCutsFromRef\n\nnamespace xgboost::data {\nvoid GetCutsFromEllpack(EllpackPage const& page, common::HistogramCuts* cuts) {\n  *cuts = page.Impl()->Cuts();\n}\n\nnamespace cuda_impl {\nvoid MakeSketches(Context const* ctx,\n                  DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>* iter,\n                  DMatrixProxy* proxy, std::shared_ptr<DMatrix> ref, BatchParam const& p,\n                  float missing, std::shared_ptr<common::HistogramCuts> cuts, MetaInfo const& info,\n                  ExternalDataInfo* p_ext_info) {\n  xgboost_NVTX_FN_RANGE();\n  // Lazy because we need the `n_features`.\n  std::unique_ptr<common::SketchContainer> sketch;\n  auto& ext_info = *p_ext_info;\n\n  // Workaround empty input with CPU ctx.\n  Context new_ctx;\n  Context const* p_ctx;\n  if (ctx->IsCUDA()) {\n    p_ctx = ctx;\n  } else {\n    new_ctx.UpdateAllowUnknown(Args{{\"device\", dh::GetDevice(ctx).Name()}});\n    p_ctx = &new_ctx;\n  }\n\n  do {\n    /**\n     * Get the data shape.\n     */\n    // We use do while here as the first batch has been fetched in the ctor\n    CHECK_LT(ctx->Ordinal(), curt::AllVisibleGPUs());\n    auto device = dh::GetDevice(ctx);\n    curt::SetDevice(device.ordinal);\n    auto cats = cuda_impl::BatchCats(proxy);\n    if (ext_info.n_features == 0) {\n      ext_info.n_features = data::BatchColumns(proxy);\n      ext_info.cats =\n          std::make_shared<CatContainer>(p_ctx, cats, ::xgboost::data::BatchCatsIsRef(proxy));\n      auto rc = collective::Allreduce(ctx, linalg::MakeVec(&ext_info.n_features, 1),\n                                      collective::Op::kMax);\n      SafeColl(rc);\n    } else {\n      CHECK_EQ(cats.n_total_cats, ext_info.cats->NumCatsTotal()) << error::InconsistentCategories();\n      CHECK_EQ(ext_info.n_features, data::BatchColumns(proxy)) << \"Inconsistent number of columns.\";\n    }\n\n    auto batch_rows = data::BatchSamples(proxy);\n    ext_info.accumulated_rows += batch_rows;\n    // Prune to this after each batch\n    auto n_cuts_per_feat =\n        common::detail::RequiredSampleCutsPerColumn(p.max_bin, ext_info.accumulated_rows);\n\n    /**\n     * Handle sketching.\n     */\n    if (!ref) {\n      if (!sketch) {\n        sketch = std::make_unique<common::SketchContainer>(proxy->Info().feature_types, p.max_bin,\n                                                           ext_info.n_features, dh::GetDevice(ctx));\n      }\n      proxy->Info().weights_.SetDevice(dh::GetDevice(ctx));\n      DispatchAny(proxy, [&](auto const& value) {\n        common::AdapterDeviceSketch(p_ctx, value, p.max_bin, proxy->Info(), missing, sketch.get());\n      });\n      sketch->Prune(p_ctx, n_cuts_per_feat);\n      LOG(DEBUG) << \"Total capacity:\" << common::HumanMemUnit(sketch->MemCapacityBytes());\n    }\n\n    /**\n     * Rest of the data shape.\n     */\n    dh::device_vector<size_t> row_counts(batch_rows + 1, 0);\n    common::Span<size_t> row_counts_span(row_counts.data().get(), row_counts.size());\n    ext_info.row_stride =\n        std::max(ext_info.row_stride, DispatchAny(proxy, [=](auto const& value) {\n                   return GetRowCounts(ctx, value, row_counts_span, dh::GetDevice(ctx), missing);\n                 }));\n    ext_info.nnz += thrust::reduce(ctx->CUDACtx()->CTP(), row_counts.begin(), row_counts.end());\n    ext_info.n_batches++;\n    ext_info.base_rowids.push_back(batch_rows);\n  } while (iter->Next());\n  iter->Reset();\n\n  CHECK_GE(ext_info.n_features, 1) << \"Data must has at least 1 column.\";\n  std::partial_sum(ext_info.base_rowids.cbegin(), ext_info.base_rowids.cend(),\n                   ext_info.base_rowids.begin());\n\n  // Get reference\n  curt::SetDevice(dh::GetDevice(ctx).ordinal);\n  if (!ref) {\n    if (!sketch) {\n      // Empty local input can happen in distributed settings.\n      sketch = std::make_unique<common::SketchContainer>(proxy->Info().feature_types, p.max_bin,\n                                                         ext_info.n_features, dh::GetDevice(ctx));\n    }\n    *cuts = sketch->MakeCuts(ctx, info.IsColumnSplit());\n    sketch.reset();\n  } else {\n    GetCutsFromRef(ctx, ref, ext_info.n_features, p, cuts.get());\n  }\n\n  ctx->CUDACtx()->Stream().Sync();\n}\n}  // namespace cuda_impl\n}  // namespace xgboost::data\n"
  },
  {
    "path": "src/data/quantile_dmatrix.h",
    "content": "/**\n * Copyright 2024, XGBoost Contributors\n */\n#pragma once\n#include <cstdint>  // for int32_t\n#include <memory>   // for shared_ptr\n#include <vector>   // for vector\n\n#include \"proxy_dmatrix.h\"  // for DataIterProxy\n#include \"xgboost/data.h\"   // for DMatrix, BatchIterator, SparsePage\n#include \"xgboost/span.h\"   // for Span\n\nnamespace xgboost::common {\nclass HistogramCuts;\n}  // namespace xgboost::common\n\nnamespace xgboost::data {\n/**\n * @brief Base class for quantile-based DMatrix.\n *\n * `QuantileDMatrix` is an intermediate storage for quantilization results including\n * quantile cuts and histogram index. Quantilization is designed to be performed on stream\n * of data. In practice, we feed batches of data into the QuantileDMatrix.\n *\n * - It's only supported by hist tree method (both CPU and GPU) since approx requires a\n *   re-calculation of quantiles for each iteration. We can fix this by retaining a\n *   reference to the callback if there are feature requests.\n *\n * - The CPU format and the GPU format are different, the former uses a CSR + CSC for\n *   histogram index while the latter uses only Ellpack.\n */\nclass QuantileDMatrix : public DMatrix {\n  template <typename Page>\n  static auto InvalidTreeMethod() {\n    LOG(FATAL) << \"Only `hist` tree method can use `QuantileDMatrix`.\";\n    return BatchSet<Page>(BatchIterator<Page>(nullptr));\n  }\n\n public:\n  DMatrix *Slice(common::Span<std::int32_t const>) final {\n    LOG(FATAL) << \"Slicing DMatrix is not supported for external memory.\";\n    return nullptr;\n  }\n  DMatrix *SliceCol(std::int32_t, std::int32_t) final {\n    LOG(FATAL) << \"Slicing DMatrix columns is not supported for external memory.\";\n    return nullptr;\n  }\n\n  [[nodiscard]] bool SparsePageExists() const final { return false; }\n\n  BatchSet<SparsePage> GetRowBatches() final {\n    LOG(FATAL) << \"Not implemented for `QuantileDMatrix`.\";\n    return BatchSet<SparsePage>(BatchIterator<SparsePage>(nullptr));\n  }\n  BatchSet<CSCPage> GetColumnBatches(Context const *) final { return InvalidTreeMethod<CSCPage>(); }\n  BatchSet<SortedCSCPage> GetSortedColumnBatches(Context const *) final {\n    return InvalidTreeMethod<SortedCSCPage>();\n  }\n\n  [[nodiscard]] MetaInfo &Info() final { return info_; }\n  [[nodiscard]] MetaInfo const &Info() const final { return info_; }\n\n  [[nodiscard]] Context const *Ctx() const final { return &fmat_ctx_; }\n\n protected:\n  Context fmat_ctx_;\n  MetaInfo info_;\n};\n\n/**\n * @brief Get quantile cuts from reference (Quantile)DMatrix.\n *\n * @param ctx The context of the new DMatrix.\n * @param ref The reference DMatrix.\n * @param n_features Number of features, used for validation only.\n * @param p Batch parameter for the new DMatrix.\n * @param p_cuts Output quantile cuts.\n */\nvoid GetCutsFromRef(Context const *ctx, std::shared_ptr<DMatrix> ref, bst_feature_t n_features,\n                    BatchParam p, common::HistogramCuts *p_cuts);\n\n/**\n * @brief Get quantile cuts from ellpack page.\n */\nvoid GetCutsFromEllpack(EllpackPage const &page, common::HistogramCuts *cuts);\n\nnamespace cpu_impl {\nvoid SyncFeatureType(Context const *ctx, std::vector<FeatureType> *p_h_ft);\n\n/**\n * @brief Fetch the external data shape.\n */\nvoid GetDataShape(Context const *ctx, DMatrixProxy *proxy,\n                  DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext> *iter, float missing,\n                  ExternalDataInfo *p_info);\n\n/**\n * @brief Create quantile sketch for CPU from an external iterator or from a reference\n *        DMatrix.\n */\nvoid MakeSketches(Context const *ctx,\n                  DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext> *iter,\n                  DMatrixProxy *proxy, std::shared_ptr<DMatrix> ref, float missing,\n                  common::HistogramCuts *cuts, BatchParam const &p, MetaInfo const &info,\n                  ExternalDataInfo const &ext_info, std::vector<FeatureType> *p_h_ft);\n}  // namespace cpu_impl\n\nnamespace cuda_impl {\nvoid MakeSketches(Context const *ctx,\n                  DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext> *iter,\n                  DMatrixProxy *proxy, std::shared_ptr<DMatrix> ref, BatchParam const &p,\n                  float missing, std::shared_ptr<common::HistogramCuts> cuts, MetaInfo const &info,\n                  ExternalDataInfo *p_ext_info);\n}  // namespace cuda_impl\n}  // namespace xgboost::data\n"
  },
  {
    "path": "src/data/simple_batch_iterator.h",
    "content": "/**\n * Copyright 2019-2024, XGBoost contributors\n */\n#ifndef XGBOOST_DATA_SIMPLE_BATCH_ITERATOR_H_\n#define XGBOOST_DATA_SIMPLE_BATCH_ITERATOR_H_\n\n#include <memory>   // for shared_ptr\n#include <utility>  // for move\n\n#include \"xgboost/data.h\"  // for BatchIteratorImpl\n\nnamespace xgboost::data {\ntemplate <typename T>\nclass SimpleBatchIteratorImpl : public BatchIteratorImpl<T> {\n public:\n  explicit SimpleBatchIteratorImpl(std::shared_ptr<T const> page) : page_(std::move(page)) {}\n  const T& operator*() const override {\n    CHECK(page_ != nullptr);\n    return *page_;\n  }\n  SimpleBatchIteratorImpl& operator++() override {\n    page_ = nullptr;\n    return *this;\n  }\n  bool AtEnd() const override { return page_ == nullptr; }\n\n  std::shared_ptr<T const> Page() const override { return page_; }\n\n private:\n  std::shared_ptr<T const> page_{nullptr};\n};\n}  // namespace xgboost::data\n#endif  // XGBOOST_DATA_SIMPLE_BATCH_ITERATOR_H_\n"
  },
  {
    "path": "src/data/simple_dmatrix.cc",
    "content": "/**\n * Copyright 2014-2025, XGBoost Contributors\n * \\file simple_dmatrix.cc\n * \\brief the input data structure for gradient boosting\n * \\author Tianqi Chen\n */\n#include \"simple_dmatrix.h\"\n\n#include <algorithm>\n#include <limits>\n#include <numeric>  // for accumulate\n#include <type_traits>\n#include <vector>\n\n#include \"../collective/allgather.h\"\n#include \"../collective/communicator-inl.h\"  // for GetWorldSize, GetRank, Allgather\n#include \"../common/error_msg.h\"             // for InconsistentMaxBin\n#include \"./simple_batch_iterator.h\"\n#include \"adapter.h\"\n#include \"batch_utils.h\"    // for CheckEmpty, RegenGHist\n#include \"cat_container.h\"  // for CatContainer\n#include \"ellpack_page.h\"   // for EllpackPage\n#include \"gradient_index.h\"\n#include \"proxy_dmatrix.h\"  // for DispatchAny\n#include \"xgboost/c_api.h\"\n#include \"xgboost/data.h\"\n\nnamespace xgboost::data {\nMetaInfo& SimpleDMatrix::Info() { return info_; }\n\nconst MetaInfo& SimpleDMatrix::Info() const { return info_; }\n\nDMatrix* SimpleDMatrix::Slice(common::Span<int32_t const> ridxs) {\n  auto out = new SimpleDMatrix;\n  SparsePage& out_page = *out->sparse_page_;\n  // Convert to uint64 to avoid a breaking change in the C API. The performance impact is\n  // small since we have to iteratve through the sparse page.\n  std::vector<bst_idx_t> h_ridx(ridxs.data(), ridxs.data() + ridxs.size());\n  for (auto const& page : this->GetBatches<SparsePage>()) {\n    auto batch = page.GetView();\n    auto& h_data = out_page.data.HostVector();\n    auto& h_offset = out_page.offset.HostVector();\n    size_t rptr{0};\n    for (auto ridx : ridxs) {\n      auto inst = batch[ridx];\n      rptr += inst.size();\n      std::copy(inst.begin(), inst.end(), std::back_inserter(h_data));\n      h_offset.emplace_back(rptr);\n    }\n    auto ctx = this->fmat_ctx_.MakeCPU();\n    out->Info() = this->Info().Slice(&ctx, h_ridx, h_offset.back());\n  }\n  out->fmat_ctx_ = this->fmat_ctx_;\n\n  out->Info().Cats()->Copy(&fmat_ctx_, *this->Info().Cats());\n  return out;\n}\n\nDMatrix* SimpleDMatrix::SliceCol(int num_slices, int slice_id) {\n  if (this->Cats()->HasCategorical()) {\n    LOG(FATAL) << \"Slicing column is not supported for DataFrames with categorical columns.\";\n  }\n  auto out = new SimpleDMatrix;\n  SparsePage& out_page = *out->sparse_page_;\n  auto const slice_size = info_.num_col_ / num_slices;\n  auto const slice_start = slice_size * slice_id;\n  auto const slice_end = (slice_id == num_slices - 1) ? info_.num_col_ : slice_start + slice_size;\n  for (auto const& page : this->GetBatches<SparsePage>()) {\n    auto batch = page.GetView();\n    auto& h_data = out_page.data.HostVector();\n    auto& h_offset = out_page.offset.HostVector();\n    size_t rptr{0};\n    for (bst_idx_t i = 0; i < this->Info().num_row_; i++) {\n      auto inst = batch[i];\n      auto prev_size = h_data.size();\n      std::copy_if(inst.begin(), inst.end(), std::back_inserter(h_data),\n                   [&](Entry e) { return e.index >= slice_start && e.index < slice_end; });\n      rptr += h_data.size() - prev_size;\n      h_offset.emplace_back(rptr);\n    }\n    out->Info() = this->Info().Copy();\n    out->Info().num_nonzero_ = h_offset.back();\n  }\n  out->Info().data_split_mode = DataSplitMode::kCol;\n  return out;\n}\n\nvoid SimpleDMatrix::ReindexFeatures(Context const* ctx, DataSplitMode split_mode) {\n  if (split_mode == DataSplitMode::kCol && collective::GetWorldSize() > 1) {\n    std::vector<std::uint64_t> buffer(collective::GetWorldSize());\n    buffer[collective::GetRank()] = this->info_.num_col_;\n    auto rc = collective::Allgather(ctx, linalg::MakeVec(buffer.data(), buffer.size()));\n    SafeColl(rc);\n    auto offset = std::accumulate(buffer.cbegin(), buffer.cbegin() + collective::GetRank(), 0);\n    if (offset == 0) {\n      return;\n    }\n    sparse_page_->Reindex(offset, ctx->Threads());\n  }\n}\n\nBatchSet<SparsePage> SimpleDMatrix::GetRowBatches() {\n  // since csr is the default data structure so `source_` is always available.\n  auto begin_iter =\n      BatchIterator<SparsePage>(new SimpleBatchIteratorImpl<SparsePage>(sparse_page_));\n  return BatchSet<SparsePage>(begin_iter);\n}\n\nBatchSet<CSCPage> SimpleDMatrix::GetColumnBatches(Context const* ctx) {\n  // column page doesn't exist, generate it\n  if (!column_page_) {\n    auto n = std::numeric_limits<decltype(Entry::index)>::max();\n    if (this->sparse_page_->Size() > n) {\n      error::MaxSampleSize(n);\n    }\n    column_page_.reset(new CSCPage(sparse_page_->GetTranspose(info_.num_col_, ctx->Threads())));\n  }\n  auto begin_iter = BatchIterator<CSCPage>(new SimpleBatchIteratorImpl<CSCPage>(column_page_));\n  return BatchSet<CSCPage>(begin_iter);\n}\n\nBatchSet<SortedCSCPage> SimpleDMatrix::GetSortedColumnBatches(Context const* ctx) {\n  // Sorted column page doesn't exist, generate it\n  if (!sorted_column_page_) {\n    auto n = std::numeric_limits<decltype(Entry::index)>::max();\n    if (this->sparse_page_->Size() > n) {\n      error::MaxSampleSize(n);\n    }\n    sorted_column_page_.reset(\n        new SortedCSCPage(sparse_page_->GetTranspose(info_.num_col_, ctx->Threads())));\n    sorted_column_page_->SortRows(ctx->Threads());\n  }\n  auto begin_iter =\n      BatchIterator<SortedCSCPage>(new SimpleBatchIteratorImpl<SortedCSCPage>(sorted_column_page_));\n  return BatchSet<SortedCSCPage>(begin_iter);\n}\n\nBatchSet<EllpackPage> SimpleDMatrix::GetEllpackBatches(Context const* ctx,\n                                                       const BatchParam& param) {\n  detail::CheckEmpty(batch_param_, param);\n  if (ellpack_page_ && param.Initialized() && param.forbid_regen) {\n    if (detail::RegenGHist(batch_param_, param)) {\n      CHECK_EQ(batch_param_.max_bin, param.max_bin) << error::InconsistentMaxBin();\n    }\n    CHECK(!detail::RegenGHist(batch_param_, param));\n  }\n  if (!ellpack_page_ || detail::RegenGHist(batch_param_, param)) {\n    // ELLPACK page doesn't exist, generate it\n    LOG(INFO) << \"Generating new Ellpack page.\";\n    // These places can ask for a ellpack page:\n    // - GPU hist: the ctx must be on CUDA.\n    // - IterativeDMatrix::InitFromCUDA: The ctx must be on CUDA.\n    // - IterativeDMatrix::InitFromCPU: It asks for ellpack only if it exists. It should\n    //   not regen, otherwise it indicates a mismatched parameter like max_bin.\n    CHECK_GE(param.max_bin, 2);\n    if (ctx->IsCUDA()) {\n      // The context passed in is on GPU, we pick it first since we prioritize the context\n      // in Booster.\n      ellpack_page_.reset(new EllpackPage(ctx, this, param));\n    } else if (fmat_ctx_.IsCUDA()) {\n      // DMatrix was initialized on GPU, we use the context from initialization.\n      ellpack_page_.reset(new EllpackPage(&fmat_ctx_, this, param));\n    } else {\n      // Mismatched parameter, user set a new max_bin during training.\n      auto cuda_ctx = ctx->MakeCUDA();\n      ellpack_page_.reset(new EllpackPage(&cuda_ctx, this, param));\n    }\n\n    batch_param_ = param.MakeCache();\n  }\n  auto begin_iter =\n      BatchIterator<EllpackPage>(new SimpleBatchIteratorImpl<EllpackPage>(ellpack_page_));\n  return BatchSet<EllpackPage>(begin_iter);\n}\n\nBatchSet<GHistIndexMatrix> SimpleDMatrix::GetGradientIndex(Context const* ctx,\n                                                           const BatchParam& param) {\n  detail::CheckEmpty(batch_param_, param);\n  // Check whether we can regenerate the gradient index. This is to keep the consistency\n  // between evaluation data and training data.\n  if (gradient_index_ && param.Initialized() && param.forbid_regen) {\n    if (detail::RegenGHist(batch_param_, param)) {\n      CHECK_EQ(batch_param_.max_bin, param.max_bin) << error::InconsistentMaxBin();\n    }\n    CHECK(!detail::RegenGHist(batch_param_, param)) << \"Inconsistent sparse threshold.\";\n  }\n  if (!gradient_index_ || detail::RegenGHist(batch_param_, param)) {\n    // GIDX page doesn't exist, generate it\n    LOG(DEBUG) << \"Generating new Gradient Index.\";\n    // These places can ask for a CSR gidx:\n    // - CPU Hist: the ctx must be on CPU.\n    // - IterativeDMatrix::InitFromCPU: The ctx must be on CPU.\n    // - IterativeDMatrix::InitFromCUDA: It asks for gidx only if it exists. It should not\n    //   regen, otherwise it indicates a mismatched parameter like max_bin.\n    CHECK_GE(param.max_bin, 2);\n    // Used only by approx.\n    auto sorted_sketch = param.regen;\n    if (!ctx->IsCUDA()) {\n      // The context passed in is on CPU, we pick it first since we prioritize the context\n      // in Booster.\n      gradient_index_.reset(new GHistIndexMatrix{ctx, this, param.max_bin, param.sparse_thresh,\n                                                 sorted_sketch, param.hess});\n    } else if (!fmat_ctx_.IsCUDA()) {\n      // DMatrix was initialized on CPU, we use the context from initialization.\n      gradient_index_.reset(new GHistIndexMatrix{&fmat_ctx_, this, param.max_bin,\n                                                 param.sparse_thresh, sorted_sketch, param.hess});\n    } else {\n      // Mismatched parameter, user set a new max_bin during training.\n      auto cpu_ctx = ctx->MakeCPU();\n      gradient_index_.reset(new GHistIndexMatrix{&cpu_ctx, this, param.max_bin, param.sparse_thresh,\n                                                 sorted_sketch, param.hess});\n    }\n\n    batch_param_ = param.MakeCache();\n    CHECK_EQ(batch_param_.hess.data(), param.hess.data());\n  }\n  auto begin_iter = BatchIterator<GHistIndexMatrix>(\n      new SimpleBatchIteratorImpl<GHistIndexMatrix>(gradient_index_));\n  return BatchSet<GHistIndexMatrix>(begin_iter);\n}\n\nBatchSet<ExtSparsePage> SimpleDMatrix::GetExtBatches(Context const*, BatchParam const&) {\n  auto casted = std::make_shared<ExtSparsePage>(sparse_page_);\n  CHECK(casted);\n  auto begin_iter =\n      BatchIterator<ExtSparsePage>(new SimpleBatchIteratorImpl<ExtSparsePage>(casted));\n  return BatchSet<ExtSparsePage>(begin_iter);\n}\n\ntemplate <typename AdapterT>\nSimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread,\n                             DataSplitMode data_split_mode) {\n  Context ctx;\n  ctx.Init(Args{{\"nthread\", std::to_string(nthread)}});\n  std::vector<uint64_t> qids;\n  uint64_t default_max = std::numeric_limits<uint64_t>::max();\n  uint64_t last_group_id = default_max;\n  bst_uint group_size = 0;\n  auto& offset_vec = sparse_page_->offset.HostVector();\n  auto& data_vec = sparse_page_->data.HostVector();\n  // batch_size is either number of rows or cols, depending on data layout\n  uint64_t inferred_num_columns = 0;\n  uint64_t total_batch_size = 0;\n\n  adapter->BeforeFirst();\n  // Iterate over batches of input data\n  while (adapter->Next()) {\n    bool type_error = false;\n    auto push = [&](auto const& batch) {\n      return sparse_page_->Push(batch, missing, ctx.Threads());\n    };\n    bst_idx_t batch_max_columns =\n        cpu_impl::DispatchAny<true, std::add_pointer_t>(&ctx, adapter, push, &type_error);\n    auto& batch = adapter->Value();\n    if (type_error) {\n      // Not supported by the dispatch function.\n      batch_max_columns = push(batch);\n    }\n\n    inferred_num_columns = std::max(batch_max_columns, inferred_num_columns);\n    total_batch_size += batch.Size();\n    // Append meta information if available\n    if (batch.Labels() != nullptr) {\n      info_.labels.ModifyInplace([&](auto* data, common::Span<size_t, 2> shape) {\n        shape[1] = 1;\n        auto& labels = data->HostVector();\n        labels.insert(labels.end(), batch.Labels(), batch.Labels() + batch.Size());\n        shape[0] += batch.Size();\n      });\n    }\n    if (batch.Weights() != nullptr) {\n      auto& weights = info_.weights_.HostVector();\n      weights.insert(weights.end(), batch.Weights(), batch.Weights() + batch.Size());\n    }\n    if (batch.BaseMargin() != nullptr) {\n      info_.base_margin_ = decltype(info_.base_margin_){\n          batch.BaseMargin(), batch.BaseMargin() + batch.Size(), {batch.Size()}, DeviceOrd::CPU()};\n    }\n    if (batch.Qid() != nullptr) {\n      qids.insert(qids.end(), batch.Qid(), batch.Qid() + batch.Size());\n      // get group\n      for (size_t i = 0; i < batch.Size(); ++i) {\n        const uint64_t cur_group_id = batch.Qid()[i];\n        if (last_group_id == default_max || last_group_id != cur_group_id) {\n          info_.group_ptr_.push_back(group_size);\n        }\n        last_group_id = cur_group_id;\n        ++group_size;\n      }\n    }\n  }\n\n  if (last_group_id != default_max) {\n    if (group_size > info_.group_ptr_.back()) {\n      info_.group_ptr_.push_back(group_size);\n    }\n  }\n\n  // Deal with empty rows/columns if necessary\n  if (adapter->NumColumns() == kAdapterUnknownSize) {\n    info_.num_col_ = inferred_num_columns;\n  } else {\n    info_.num_col_ = adapter->NumColumns();\n  }\n\n  if constexpr (std::is_same_v<AdapterT, ColumnarAdapter>) {\n    if (adapter->HasRefCategorical()) {\n      info_.Cats(std::make_shared<CatContainer>(adapter->RefCats(), true));\n    } else if (adapter->HasCategorical()) {\n      info_.Cats(std::make_shared<CatContainer>(adapter->Cats(), false));\n    }\n  }\n\n  // Must called before sync column\n  this->ReindexFeatures(&ctx, data_split_mode);\n  this->info_.SynchronizeNumberOfColumns(&ctx, data_split_mode);\n\n  if (adapter->NumRows() == kAdapterUnknownSize) {\n    using IteratorAdapterT =\n        IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>;\n    // If AdapterT is either IteratorAdapter or FileAdapter type, use the total batch size to\n    // determine the correct number of rows, as offset_vec may be too short\n    if (std::is_same_v<AdapterT, IteratorAdapterT> || std::is_same_v<AdapterT, FileAdapter>) {\n      info_.num_row_ = total_batch_size;\n      // Ensure offset_vec.size() - 1 == [number of rows]\n      while (offset_vec.size() - 1 < total_batch_size) {\n        offset_vec.emplace_back(offset_vec.back());\n      }\n    } else {\n      CHECK((std::is_same_v<AdapterT, CSCArrayAdapter>)) << \"Expecting a CSC adapter.\";\n      info_.num_row_ = offset_vec.size() - 1;\n    }\n  } else {\n    if (offset_vec.empty()) {\n      offset_vec.emplace_back(0);\n    }\n    while (offset_vec.size() - 1 < adapter->NumRows()) {\n      offset_vec.emplace_back(offset_vec.back());\n    }\n    info_.num_row_ = adapter->NumRows();\n  }\n  info_.num_nonzero_ = data_vec.size();\n\n  SyncCategories(&ctx, info_.Cats(), info_.num_row_ == 0);\n\n  // Sort the index for row partitioners used by variuos tree methods.\n  if (!sparse_page_->IsIndicesSorted(ctx.Threads())) {\n    sparse_page_->SortIndices(ctx.Threads());\n  }\n\n  this->fmat_ctx_ = ctx;\n}\n\nSimpleDMatrix::SimpleDMatrix(dmlc::Stream* in_stream) {\n  int tmagic;\n  CHECK(in_stream->Read(&tmagic)) << \"invalid input file format\";\n  CHECK_EQ(tmagic, kMagic) << \"invalid format, magic number mismatch\";\n  info_.LoadBinary(in_stream);\n  in_stream->Read(&sparse_page_->offset.HostVector());\n  in_stream->Read(&sparse_page_->data.HostVector());\n}\n\nvoid SimpleDMatrix::SaveToLocalFile(const std::string& fname) {\n  std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname.c_str(), \"w\"));\n  int tmagic = kMagic;\n  fo->Write(tmagic);\n  info_.SaveBinary(fo.get());\n  fo->Write(sparse_page_->offset.HostVector());\n  fo->Write(sparse_page_->data.HostVector());\n}\n\n#define INSTANTIATE_SDCTOR(__ADAPTER_T)                                                            \\\n  template SimpleDMatrix::SimpleDMatrix(__ADAPTER_T* adapter, float missing, std::int32_t nthread, \\\n                                        DataSplitMode data_split_mode);\n\nINSTANTIATE_SDCTOR(DenseAdapter)\nINSTANTIATE_SDCTOR(ArrayAdapter)\nINSTANTIATE_SDCTOR(CSRArrayAdapter)\nINSTANTIATE_SDCTOR(CSCArrayAdapter)\nINSTANTIATE_SDCTOR(FileAdapter)\nINSTANTIATE_SDCTOR(ColumnarAdapter)\nnamespace {\nusing IterAdapterT = IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>;\n}\nINSTANTIATE_SDCTOR(IterAdapterT)\n\n#undef INSTANTIATE_SDCTOR\n}  // namespace xgboost::data\n"
  },
  {
    "path": "src/data/simple_dmatrix.cu",
    "content": "/**\n * Copyright 2019-2025, XGBoost Contributors\n */\n\n#include <cstdint>  // for int32_t, int8_t\n#include <memory>   // for make_shared\n\n#include \"../common/cuda_rt_utils.h\"  // for CurrentDevice, SetDevice\n#include \"cat_container.h\"            // for CatContainer\n#include \"device_adapter.cuh\"\n#include \"proxy_dmatrix.cuh\"  // for DispatchAny\n#include \"simple_dmatrix.cuh\"\n#include \"simple_dmatrix.h\"\n#include \"xgboost/context.h\"  // for Context\n#include \"xgboost/data.h\"\n\nnamespace xgboost::data {\n// Does not currently support metainfo as no on-device data source contains this\n// Current implementation assumes a single batch. More batches can\n// be supported in future. Does not currently support inferring row/column size\ntemplate <typename AdapterT>\nSimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, std::int32_t nthread,\n                             DataSplitMode data_split_mode) {\n  CHECK(data_split_mode != DataSplitMode::kCol)\n      << \"Column-wise data split is currently not supported by the GPU.\";\n  auto device = (!adapter->Device().IsCUDA() || adapter->NumRows() == 0)\n                    ? DeviceOrd::CUDA(curt::CurrentDevice())\n                    : adapter->Device();\n  CHECK(device.IsCUDA());\n  curt::SetDevice(device.ordinal);\n\n  Context ctx;\n  ctx.Init(Args{{\"nthread\", std::to_string(nthread)}, {\"device\", device.Name()}});\n\n  CHECK(adapter->NumRows() != kAdapterUnknownSize);\n  CHECK(adapter->NumColumns() != kAdapterUnknownSize);\n\n  adapter->BeforeFirst();\n  adapter->Next();\n\n  // Enforce single batch\n  CHECK(!adapter->Next());\n\n  cuda_impl::DispatchAny<true, std::add_pointer_t>(&ctx, adapter, [&](auto const& batch) {\n    info_.num_nonzero_ = CopyToSparsePage(&ctx, batch, device, missing, sparse_page_.get());\n  });\n  info_.num_col_ = adapter->NumColumns();\n  info_.num_row_ = adapter->NumRows();\n\n  if constexpr (std::is_same_v<AdapterT, CudfAdapter>) {\n    if (adapter->HasRefCategorical()) {\n      info_.Cats(std::make_shared<CatContainer>(&ctx, adapter->RefCats(), true));\n    } else if (adapter->HasCategorical()) {\n      info_.Cats(std::make_shared<CatContainer>(&ctx, adapter->Cats(), false));\n    }\n  }\n  this->info_.SynchronizeNumberOfColumns(&ctx, data_split_mode);\n\n  this->fmat_ctx_ = ctx;\n}\n\ntemplate SimpleDMatrix::SimpleDMatrix(CudfAdapter* adapter, float missing, std::int32_t nthread,\n                                      DataSplitMode data_split_mode);\ntemplate SimpleDMatrix::SimpleDMatrix(CupyAdapter* adapter, float missing, std::int32_t nthread,\n                                      DataSplitMode data_split_mode);\n}  // namespace xgboost::data\n"
  },
  {
    "path": "src/data/simple_dmatrix.cuh",
    "content": "/**\n * Copyright 2019-2024, XGBoost Contributors\n * \\file simple_dmatrix.cuh\n */\n#ifndef XGBOOST_DATA_SIMPLE_DMATRIX_CUH_\n#define XGBOOST_DATA_SIMPLE_DMATRIX_CUH_\n\n#include <thrust/copy.h>\n#include <thrust/execution_policy.h>\n#include <thrust/scan.h>\n\n#include \"../common/device_helpers.cuh\"\n#include \"../common/error_msg.h\"  // for InfInData\n#include \"../common/algorithm.cuh\"  // for CopyIf\n#include \"device_adapter.cuh\"     // for NoInfInData\n\nnamespace xgboost::data {\n\ntemplate <typename AdapterBatchT>\nstruct COOToEntryOp {\n  AdapterBatchT batch;\n  __device__ Entry operator()(size_t idx) {\n    const auto& e = batch.GetElement(idx);\n    return Entry(e.column_idx, e.value);\n  }\n};\n\n// Here the data is already correctly ordered and simply needs to be compacted\n// to remove missing data\ntemplate <typename AdapterBatchT>\nvoid CopyDataToDMatrix(Context const* ctx, AdapterBatchT batch, common::Span<Entry> data,\n                       float missing) {\n  auto counting = thrust::make_counting_iterator(0llu);\n  COOToEntryOp<decltype(batch)> transform_op{batch};\n  thrust::transform_iterator<decltype(transform_op), decltype(counting)> transform_iter(\n      counting, transform_op);\n  auto begin_output = thrust::device_pointer_cast(data.data());\n  common::CopyIf(ctx->CUDACtx(), transform_iter, transform_iter + batch.Size(), begin_output,\n                 IsValidFunctor(missing));\n}\n\ntemplate <typename AdapterBatchT>\nvoid CountRowOffsets(Context const* ctx, const AdapterBatchT& batch, common::Span<bst_idx_t> offset,\n                     DeviceOrd device, float missing) {\n  dh::safe_cuda(cudaSetDevice(device.ordinal));\n  IsValidFunctor is_valid(missing);\n  auto cuctx = ctx->CUDACtx();\n  // Count elements per row\n  dh::LaunchN(batch.Size(), cuctx->Stream(), [=] __device__(size_t idx) {\n    auto element = batch.GetElement(idx);\n    if (is_valid(element)) {\n      atomicAdd(reinterpret_cast<unsigned long long*>(  // NOLINT\n                    &offset[element.row_idx]),\n                static_cast<unsigned long long>(1));  // NOLINT\n    }\n  });\n\n  thrust::exclusive_scan(cuctx->CTP(), thrust::device_pointer_cast(offset.data()),\n                         thrust::device_pointer_cast(offset.data() + offset.size()),\n                         thrust::device_pointer_cast(offset.data()));\n}\n\ntemplate <typename AdapterBatchT>\nbst_idx_t CopyToSparsePage(Context const* ctx, AdapterBatchT const& batch, DeviceOrd device,\n                           float missing, SparsePage* page) {\n  bool valid = NoInfInData(ctx, batch, IsValidFunctor{missing});\n  CHECK(valid) << error::InfInData();\n\n  page->offset.SetDevice(device);\n  page->data.SetDevice(device);\n  page->offset.Resize(batch.NumRows() + 1);\n  auto s_offset = page->offset.DeviceSpan();\n  CountRowOffsets(ctx, batch, s_offset, device, missing);\n  auto num_nonzero_ = page->offset.HostVector().back();\n  page->data.Resize(num_nonzero_);\n  CopyDataToDMatrix(ctx, batch, page->data.DeviceSpan(), missing);\n\n  return num_nonzero_;\n}\n}  // namespace xgboost::data\n#endif  // XGBOOST_DATA_SIMPLE_DMATRIX_CUH_\n"
  },
  {
    "path": "src/data/simple_dmatrix.h",
    "content": "/**\n * Copyright 2015-2025, XGBoost Contributors\n * \\file simple_dmatrix.h\n * \\brief In-memory version of DMatrix.\n * \\author Tianqi Chen\n */\n#ifndef XGBOOST_DATA_SIMPLE_DMATRIX_H_\n#define XGBOOST_DATA_SIMPLE_DMATRIX_H_\n\n#include <xgboost/base.h>\n#include <xgboost/data.h>\n\n#include <memory>\n#include <string>\n\n#include \"gradient_index.h\"\n\nnamespace xgboost::data {\n// Used for single batch data.\nclass SimpleDMatrix : public DMatrix {\n public:\n  SimpleDMatrix() = default;\n  template <typename AdapterT>\n  explicit SimpleDMatrix(AdapterT* adapter, float missing, std::int32_t nthread,\n                         DataSplitMode data_split_mode = DataSplitMode::kRow);\n\n  explicit SimpleDMatrix(dmlc::Stream* in_stream);\n  ~SimpleDMatrix() override = default;\n\n  void SaveToLocalFile(const std::string& fname);\n\n  MetaInfo& Info() override;\n  const MetaInfo& Info() const override;\n  Context const* Ctx() const override { return &fmat_ctx_; }\n\n  DMatrix* Slice(common::Span<int32_t const> ridxs) override;\n  DMatrix* SliceCol(int num_slices, int slice_id) override;\n\n  /*! \\brief magic number used to identify SimpleDMatrix binary files */\n  static const int kMagic = 0xffffab01;\n\n protected:\n  BatchSet<SparsePage> GetRowBatches() override;\n  BatchSet<CSCPage> GetColumnBatches(Context const* ctx) override;\n  BatchSet<SortedCSCPage> GetSortedColumnBatches(Context const* ctx) override;\n  BatchSet<EllpackPage> GetEllpackBatches(Context const* ctx, const BatchParam& param) override;\n  BatchSet<GHistIndexMatrix> GetGradientIndex(Context const* ctx, const BatchParam& param) override;\n  BatchSet<ExtSparsePage> GetExtBatches(Context const* ctx, BatchParam const& param) override;\n\n  MetaInfo info_;\n  // Primary storage type\n  std::shared_ptr<SparsePage> sparse_page_ = std::make_shared<SparsePage>();\n  std::shared_ptr<CSCPage> column_page_{nullptr};\n  std::shared_ptr<SortedCSCPage> sorted_column_page_{nullptr};\n  std::shared_ptr<EllpackPage> ellpack_page_{nullptr};\n  std::shared_ptr<GHistIndexMatrix> gradient_index_{nullptr};\n  BatchParam batch_param_;\n\n  bool EllpackExists() const override { return static_cast<bool>(ellpack_page_); }\n  bool GHistIndexExists() const override { return static_cast<bool>(gradient_index_); }\n  bool SparsePageExists() const override { return true; }\n\n  /**\n   * @brief Reindex the features based on a global view.\n   *\n   * In some cases (e.g. column-wise data split and vertical federated learning), features are\n   * loaded locally with indices starting from 0. However, all the algorithms assume the features\n   * are globally indexed, so we reindex the features based on the offset needed to obtain the\n   * global view.\n   */\n  void ReindexFeatures(Context const* ctx, DataSplitMode split_mode);\n\n private:\n  // Context used only for DMatrix initialization.\n  Context fmat_ctx_;\n};\n}  // namespace xgboost::data\n#endif  // XGBOOST_DATA_SIMPLE_DMATRIX_H_\n"
  },
  {
    "path": "src/data/sparse_page_dmatrix.cc",
    "content": "/**\n * Copyright 2014-2025, XGBoost Contributors\n * \\file sparse_page_dmatrix.cc\n *\n * \\brief The external memory version of Page Iterator.\n * \\author Tianqi Chen\n */\n#include \"sparse_page_dmatrix.h\"\n\n#include <algorithm>  // for max\n#include <memory>     // for make_shared\n#include <string>     // for string\n#include <utility>    // for move\n#include <variant>    // for visit\n\n#include \"../common/error_msg.h\"  // for InconsistentCategories, CacheHostRatio\n#include \"batch_utils.h\"          // for RegenGHist\n#include \"cat_container.h\"        // for CatContainer\n#include \"gradient_index.h\"       // for GHistIndexMatrix\n#include \"sparse_page_source.h\"   // for MakeCachePrefix\n\nnamespace xgboost::data {\nMetaInfo &SparsePageDMatrix::Info() { return info_; }\n\nconst MetaInfo &SparsePageDMatrix::Info() const { return info_; }\n\nSparsePageDMatrix::SparsePageDMatrix(DataIterHandle iter_handle, DMatrixHandle proxy_handle,\n                                     DataIterResetCallback *reset, XGDMatrixCallbackNext *next,\n                                     ExtMemConfig const &config)\n    : proxy_{proxy_handle},\n      iter_{iter_handle},\n      reset_{reset},\n      next_{next},\n      missing_{config.missing},\n      cache_prefix_{config.cache},\n      on_host_{config.on_host},\n      cache_host_ratio_{config.cache_host_ratio},\n      min_cache_page_bytes_{config.min_cache_page_bytes} {\n  CHECK(detail::HostRatioIsAuto(config.cache_host_ratio)) << error::CacheHostRatioNotImpl();\n  Context ctx;\n  ctx.Init(Args{{\"nthread\", std::to_string(config.n_threads)}});\n  cache_prefix_ = MakeCachePrefix(cache_prefix_);\n\n  DMatrixProxy *proxy = MakeProxy(proxy_);\n  auto iter = DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>{iter_, reset_, next_};\n\n  auto get_cats = [](DMatrixProxy const *proxy) {\n    if (proxy->Ctx()->IsCPU()) {\n      return std::make_shared<CatContainer>(cpu_impl::BatchCats(proxy), BatchCatsIsRef(proxy));\n    } else {\n#if defined(XGBOOST_USE_CUDA)\n      return std::make_shared<CatContainer>(proxy->Ctx(), cuda_impl::BatchCats(proxy),\n                                            BatchCatsIsRef(proxy));\n#else\n      common::AssertGPUSupport();\n      return std::make_shared<CatContainer>();\n#endif\n    }\n  };\n\n  // The proxy is iterated together with the sparse page source so we can obtain all\n  // information in 1 pass.\n  for (auto const &page : this->GetRowBatchesImpl(&ctx)) {\n    this->info_.Extend(std::move(proxy->Info()), false, false);\n    ext_info_.n_features =\n        std::max(static_cast<bst_feature_t>(ext_info_.n_features), BatchColumns(proxy));\n    ext_info_.accumulated_rows += BatchSamples(proxy);\n    ext_info_.nnz += page.data.Size();\n    ext_info_.n_batches++;\n    ext_info_.base_rowids.push_back(page.Size());\n    ext_info_.batch_nnz.push_back(page.data.Size());\n    if (!ext_info_.cats) {\n      ext_info_.cats = get_cats(proxy);\n    } else {\n      CHECK_EQ(ext_info_.cats->NumCatsTotal(), get_cats(proxy)->NumCatsTotal())\n          << error::InconsistentCategories();\n    }\n  }\n  std::partial_sum(ext_info_.base_rowids.cbegin(), ext_info_.base_rowids.cend(),\n                   ext_info_.base_rowids.begin());\n\n  iter.Reset();\n\n  ext_info_.SetInfo(&ctx, true, &this->info_);\n  fmat_ctx_ = ctx;\n\n  SyncCategories(&ctx, info_.Cats(), info_.num_row_ == 0);\n}\n\nSparsePageDMatrix::~SparsePageDMatrix() {\n  // Clear out all resources before deleting the cache file.\n  sparse_page_source_.reset();\n  std::visit([](auto &&ptr) { ptr.reset(); }, ellpack_page_source_);\n  column_source_.reset();\n  sorted_column_source_.reset();\n  ghist_index_source_.reset();\n\n  DeleteCacheFiles(cache_info_);\n}\n\nvoid SparsePageDMatrix::InitializeSparsePage(Context const *ctx) {\n  auto id = MakeCache(this, \".row.page\", false, cache_prefix_, &cache_info_);\n  // Don't use proxy DMatrix once this is already initialized, this allows users to\n  // release the iterator and data.\n  if (cache_info_.at(id)->written) {\n    CHECK(this->sparse_page_source_);\n    this->sparse_page_source_->Reset({});\n    return;\n  }\n\n  auto iter = DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>{iter_, reset_, next_};\n  DMatrixProxy *proxy = MakeProxy(proxy_);\n  sparse_page_source_.reset();  // clear before creating new one to prevent conflicts.\n  // During initialization, the n_batches is 0.\n  CHECK_EQ(this->ext_info_.n_batches, static_cast<decltype(this->ext_info_.n_batches)>(0));\n  sparse_page_source_ = std::make_shared<SparsePageSource>(\n      std::move(iter), proxy, this->missing_, ctx->Threads(), this->info_.num_col_,\n      this->ext_info_.n_batches, cache_info_.at(id));\n}\n\nBatchSet<SparsePage> SparsePageDMatrix::GetRowBatchesImpl(Context const *ctx) {\n  this->InitializeSparsePage(ctx);\n  return BatchSet{BatchIterator<SparsePage>{this->sparse_page_source_}};\n}\n\nBatchSet<SparsePage> SparsePageDMatrix::GetRowBatches() {\n  // Use context from initialization for the default row page.\n  return this->GetRowBatchesImpl(&fmat_ctx_);\n}\n\nBatchSet<CSCPage> SparsePageDMatrix::GetColumnBatches(Context const *ctx) {\n  auto id = MakeCache(this, \".col.page\", false, cache_prefix_, &cache_info_);\n  CHECK_NE(this->Info().num_col_, 0);\n  this->InitializeSparsePage(ctx);\n  if (!column_source_) {\n    column_source_ = std::make_shared<CSCPageSource>(this->missing_, ctx->Threads(),\n                                                     this->Info().num_col_, this->NumBatches(),\n                                                     cache_info_.at(id), sparse_page_source_);\n  } else {\n    column_source_->Reset({});\n  }\n  return BatchSet{BatchIterator<CSCPage>{this->column_source_}};\n}\n\nBatchSet<SortedCSCPage> SparsePageDMatrix::GetSortedColumnBatches(Context const *ctx) {\n  auto id = MakeCache(this, \".sorted.col.page\", false, cache_prefix_, &cache_info_);\n  CHECK_NE(this->Info().num_col_, 0);\n  this->InitializeSparsePage(ctx);\n  if (!sorted_column_source_) {\n    sorted_column_source_ = std::make_shared<SortedCSCPageSource>(\n        this->missing_, ctx->Threads(), this->Info().num_col_, this->NumBatches(),\n        cache_info_.at(id), sparse_page_source_);\n  } else {\n    sorted_column_source_->Reset({});\n  }\n  return BatchSet{BatchIterator<SortedCSCPage>{this->sorted_column_source_}};\n}\n\nBatchSet<GHistIndexMatrix> SparsePageDMatrix::GetGradientIndex(Context const *ctx,\n                                                               const BatchParam &param) {\n  if (param.Initialized()) {\n    CHECK_GE(param.max_bin, 2);\n  }\n  detail::CheckEmpty(batch_param_, param);\n  auto id = MakeCache(this, \".gradient_index.page\", false, cache_prefix_, &cache_info_);\n  if (!cache_info_.at(id)->written || detail::RegenGHist(batch_param_, param)) {\n    this->InitializeSparsePage(ctx);\n    cache_info_.erase(id);\n    id = MakeCache(this, \".gradient_index.page\", false, cache_prefix_, &cache_info_);\n    LOG(INFO) << \"Generating new Gradient Index.\";\n    // Use sorted sketch for approx.\n    auto sorted_sketch = param.regen;\n    auto cuts = common::SketchOnDMatrix(ctx, this, param.max_bin, sorted_sketch, param.hess);\n    this->InitializeSparsePage(ctx);  // reset after use.\n\n    batch_param_ = param;\n    ghist_index_source_.reset();\n    CHECK_NE(cuts.Values().size(), 0);\n    auto ft = this->info_.feature_types.ConstHostSpan();\n    ghist_index_source_.reset(new GradientIndexPageSource(\n        ctx, this->missing_, this->Info().num_col_, this->NumBatches(), cache_info_.at(id), param,\n        std::move(cuts), this->IsDense(), ft, sparse_page_source_));\n  } else {\n    CHECK(ghist_index_source_);\n    ghist_index_source_->Reset(param);\n  }\n  return BatchSet{BatchIterator<GHistIndexMatrix>{this->ghist_index_source_}};\n}\n\n#if !defined(XGBOOST_USE_CUDA)\nBatchSet<EllpackPage> SparsePageDMatrix::GetEllpackBatches(Context const *, const BatchParam &) {\n  common::AssertGPUSupport();\n  return BatchSet{BatchIterator<EllpackPage>{nullptr}};\n}\n#endif  // !defined(XGBOOST_USE_CUDA)\n}  // namespace xgboost::data\n"
  },
  {
    "path": "src/data/sparse_page_dmatrix.cu",
    "content": "/**\n * Copyright 2021-2024, XGBoost contributors\n */\n#include <memory>   // for shared_ptr\n#include <utility>  // for move\n#include <variant>  // for visit\n#include <vector>   // for vector\n\n#include \"../common/hist_util.cuh\"\n#include \"../common/hist_util.h\"  // for HistogramCuts\n#include \"batch_utils.h\"          // for CheckEmpty, RegenGHist, CachePageRatio\n#include \"ellpack_page.cuh\"\n#include \"sparse_page_dmatrix.h\"\n#include \"xgboost/context.h\"  // for Context\n#include \"xgboost/data.h\"     // for BatchParam\n\nnamespace xgboost::data {\nBatchSet<EllpackPage> SparsePageDMatrix::GetEllpackBatches(Context const* ctx,\n                                                           const BatchParam& param) {\n  CHECK(ctx->IsCUDA());\n  if (param.Initialized()) {\n    CHECK_GE(param.max_bin, 2);\n  }\n  detail::CheckEmpty(batch_param_, param);\n  auto id = MakeCache(this, \".ellpack.page\", on_host_, cache_prefix_, &cache_info_);\n\n  if (!cache_info_.at(id)->written || detail::RegenGHist(batch_param_, param)) {\n    this->InitializeSparsePage(ctx);\n    // reinitialize the cache\n    cache_info_.erase(id);\n    id = MakeCache(this, \".ellpack.page\", on_host_, cache_prefix_, &cache_info_);\n    LOG(INFO) << \"Generating new a Ellpack page.\";\n    std::shared_ptr<common::HistogramCuts> cuts;\n    if (!param.hess.empty()) {\n      cuts = std::make_shared<common::HistogramCuts>(\n          common::DeviceSketchWithHessian(ctx, this, param.max_bin, param.hess));\n    } else {\n      cuts =\n          std::make_shared<common::HistogramCuts>(common::DeviceSketch(ctx, this, param.max_bin));\n    }\n    this->InitializeSparsePage(ctx);  // reset after use.\n\n    std::vector<bst_idx_t> base_rowids, nnz;\n    if (this->ext_info_.row_stride == 0) {\n      this->ext_info_.row_stride = GetRowStride(this);\n    }\n\n    this->InitializeSparsePage(ctx);  // reset after use.\n    batch_param_ = param;\n\n    auto ft = this->Info().feature_types.ConstDeviceSpan();\n    if (on_host_ && std::get_if<EllpackHostPtr>(&ellpack_page_source_) == nullptr) {\n      ellpack_page_source_.emplace<EllpackHostPtr>(nullptr);\n    }\n\n    auto cinfo = EllpackCacheInfo{param, this->cache_host_ratio_, this->missing_};\n    CalcCacheMapping(ctx, this->IsDense(), cuts, min_cache_page_bytes_, this->ext_info_, true,\n                     &cinfo);\n    CHECK_EQ(cinfo.cache_mapping.size(), this->ext_info_.n_batches)\n        << \"Page concatenation is only supported by the `ExtMemQuantileDMatrix`.\";\n    std::visit(\n        [&](auto&& ptr) {\n          ptr.reset();  // make sure resource is released before making new ones.\n          using SourceT = typename std::remove_reference_t<decltype(ptr)>::element_type;\n          ptr = std::make_shared<SourceT>(ctx, this->Info().num_col_, this->ext_info_.n_batches,\n                                          cache_info_.at(id), std::move(cuts), this->IsDense(),\n                                          this->ext_info_.row_stride, ft, this->sparse_page_source_,\n                                          cinfo);\n        },\n        ellpack_page_source_);\n  } else {\n    CHECK(sparse_page_source_);\n    std::visit([&](auto&& ptr) { ptr->Reset(param); }, this->ellpack_page_source_);\n  }\n\n  auto batch_set =\n      std::visit([this](auto&& ptr) { return BatchSet{BatchIterator<EllpackPage>{ptr}}; },\n                 this->ellpack_page_source_);\n  return batch_set;\n}\n}  // namespace xgboost::data\n"
  },
  {
    "path": "src/data/sparse_page_dmatrix.h",
    "content": "/**\n * Copyright 2015-2024, XGBoost Contributors\n * \\file sparse_page_dmatrix.h\n * \\brief External-memory version of DMatrix.\n * \\author Tianqi Chen\n */\n#ifndef XGBOOST_DATA_SPARSE_PAGE_DMATRIX_H_\n#define XGBOOST_DATA_SPARSE_PAGE_DMATRIX_H_\n\n#include <cstdint>  // for uint32_t, int32_t\n#include <map>      // for map\n#include <memory>   // for shared_ptr\n#include <string>   // for string\n#include <variant>  // for variant, visit\n\n#include \"ellpack_page_source.h\"         // for EllpackPageSource, EllpackPageHostSource\n#include \"gradient_index_page_source.h\"  // for GradientIndexPageSource\n#include \"sparse_page_source.h\"          // for SparsePageSource, Cache\n#include \"xgboost/context.h\"             // for Context\n#include \"xgboost/data.h\"                // for DMatrix, MetaInfo\n#include \"xgboost/logging.h\"\n#include \"xgboost/span.h\"  // for Span\n\nnamespace xgboost::data {\n/**\n * @brief DMatrix used for external memory.\n *\n * The external memory is created for controlling memory usage by splitting up data into\n * multiple batches.  However that doesn't mean we will actually process exactly 1 batch\n * at a time, which would be terribly slow considering that we have to loop through the\n * whole dataset for every tree split.  So we use async to pre-fetch pages and let the\n * caller to decide how many batches it wants to process by returning data as a shared\n * pointer. The caller can use async function to process the data or just stage those\n * batches based on its use cases. These two optimizations might defeat the purpose of\n * splitting up dataset since if you stage all the batches then the memory usage might be\n * even worse than using a single batch. As a result, we must control how many batches can\n * be in memory at any given time.\n *\n * Right now the write to the cache is a sequential operation and is blocking. Reading\n * from cache on ther other hand, is async but with a hard coded limit of 3 pages as an\n * heuristic.  So by sparse dmatrix itself there can be only 7 pages in main memory (might\n * be of different types) at the same time: 1 page pending for write, 3 pre-fetched sparse\n * pages, 3 pre-fetched dependent pages.\n *\n * Of course if the caller decides to retain some batches to perform parallel processing,\n * then we might load all pages in memory, which is also considered as a bug in caller's\n * code. So if the algo supports external memory, it must be careful that queue for async\n * call must have an upper limit.\n *\n * Another assumption we make is that the data must be immutable so caller should never\n * change the data.  Sparse page source returns const page to make sure of that.  If you\n * want to change the generated page like Ellpack, pass parameter into `GetBatches` to\n * re-generate them instead of trying to modify the pages in-place.\n *\n * The overall chain of responsibility of external memory DMatrix:\n *\n *    User defined iterator (in Python/C/R) -> Proxy DMatrix -> Sparse page Source ->\n *    Other sources (Like Ellpack) -> Sparse Page DMatrix -> Caller\n *\n * A possible optimization is skipping the sparse page source for `hist` based algorithms\n * similar to the Quantile DMatrix.\n */\nclass SparsePageDMatrix : public DMatrix {\n  MetaInfo info_;\n  BatchParam batch_param_;\n  std::map<std::string, std::shared_ptr<Cache>> cache_info_;\n\n  DMatrixHandle proxy_;\n  DataIterHandle iter_;\n  DataIterResetCallback *reset_;\n  XGDMatrixCallbackNext *next_;\n\n  float const missing_;\n  Context fmat_ctx_;\n  std::string cache_prefix_;\n  bool const on_host_;\n  float const cache_host_ratio_;\n  std::int64_t const min_cache_page_bytes_;\n  ExternalDataInfo ext_info_;\n\n  // sparse page is the source to other page types, we make a special member function.\n  void InitializeSparsePage(Context const *ctx);\n  // Non-virtual version that can be used in constructor\n  BatchSet<SparsePage> GetRowBatchesImpl(Context const *ctx);\n\n public:\n  explicit SparsePageDMatrix(DataIterHandle iter, DMatrixHandle proxy, DataIterResetCallback *reset,\n                             XGDMatrixCallbackNext *next, ExtMemConfig const &config);\n\n  ~SparsePageDMatrix() override;\n\n  [[nodiscard]] MetaInfo &Info() override;\n  [[nodiscard]] const MetaInfo &Info() const override;\n  [[nodiscard]] Context const *Ctx() const override { return &fmat_ctx_; }\n  [[nodiscard]] std::int32_t NumBatches() const override { return ext_info_.n_batches; }\n  DMatrix *Slice(common::Span<std::int32_t const>) override {\n    LOG(FATAL) << \"Slicing DMatrix is not supported for external memory.\";\n    return nullptr;\n  }\n  DMatrix *SliceCol(int, int) override {\n    LOG(FATAL) << \"Slicing DMatrix columns is not supported for external memory.\";\n    return nullptr;\n  }\n\n  [[nodiscard]] bool EllpackExists() const override {\n    return std::visit([](auto &&ptr) { return static_cast<bool>(ptr); }, ellpack_page_source_);\n  }\n  [[nodiscard]] bool GHistIndexExists() const override {\n    return static_cast<bool>(ghist_index_source_);\n  }\n  [[nodiscard]] bool SparsePageExists() const override {\n    return static_cast<bool>(sparse_page_source_);\n  }\n  // For testing, getter for the number of fetches for sparse page source.\n  [[nodiscard]] auto SparsePageFetchCount() const {\n    return this->sparse_page_source_->FetchCount();\n  }\n\n private:\n  BatchSet<SparsePage> GetRowBatches() override;\n  BatchSet<CSCPage> GetColumnBatches(Context const *ctx) override;\n  BatchSet<SortedCSCPage> GetSortedColumnBatches(Context const *ctx) override;\n  BatchSet<EllpackPage> GetEllpackBatches(Context const *ctx, const BatchParam &param) override;\n  BatchSet<GHistIndexMatrix> GetGradientIndex(Context const *ctx, const BatchParam &) override;\n  BatchSet<ExtSparsePage> GetExtBatches(Context const *, BatchParam const &) override {\n    LOG(FATAL) << \"Can not obtain a single CSR page for external memory DMatrix\";\n    return BatchSet<ExtSparsePage>(BatchIterator<ExtSparsePage>(nullptr));\n  }\n\n private:\n  // source data pointers.\n  std::shared_ptr<SparsePageSource> sparse_page_source_;\n  using EllpackDiskPtr = std::shared_ptr<EllpackPageSource>;\n  using EllpackHostPtr = std::shared_ptr<EllpackPageHostSource>;\n  std::variant<EllpackDiskPtr, EllpackHostPtr> ellpack_page_source_;\n  std::shared_ptr<CSCPageSource> column_source_;\n  std::shared_ptr<SortedCSCPageSource> sorted_column_source_;\n  std::shared_ptr<GradientIndexPageSource> ghist_index_source_;\n};\n}  // namespace xgboost::data\n#endif  // XGBOOST_DATA_SPARSE_PAGE_DMATRIX_H_\n"
  },
  {
    "path": "src/data/sparse_page_raw_format.cc",
    "content": "/**\n * Copyright 2015-2023, XGBoost Contributors\n * \\file sparse_page_raw_format.cc\n *  Raw binary format of sparse page.\n */\n#include <dmlc/registry.h>\n\n#include \"../common/io.h\"                 // for AlignedResourceReadStream, AlignedFileWriteStream\n#include \"../common/ref_resource_view.h\"  // for WriteVec\n#include \"./sparse_page_writer.h\"\n#include \"xgboost/data.h\"\n#include \"xgboost/logging.h\"\n\nnamespace xgboost::data {\nDMLC_REGISTRY_FILE_TAG(sparse_page_raw_format);\n\ntemplate <typename T>\nclass SparsePageRawFormat : public SparsePageFormat<T> {\n public:\n  bool Read(T* page, common::AlignedResourceReadStream* fi) override {\n    auto& offset_vec = page->offset.HostVector();\n    if (!common::ReadVec(fi, &offset_vec)) {\n      return false;\n    }\n    auto& data_vec = page->data.HostVector();\n    CHECK_NE(page->offset.Size(), 0U) << \"Invalid SparsePage file\";\n    data_vec.resize(offset_vec.back());\n    if (page->data.Size() != 0) {\n      if (!common::ReadVec(fi, &data_vec)) {\n        return false;\n      }\n    }\n    if (!fi->Read(&page->base_rowid, sizeof(page->base_rowid))) {\n      return false;\n    }\n    return true;\n  }\n\n  std::size_t Write(const T& page, common::AlignedFileWriteStream* fo) override {\n    const auto& offset_vec = page.offset.HostVector();\n    const auto& data_vec = page.data.HostVector();\n    CHECK(page.offset.Size() != 0 && offset_vec[0] == 0);\n    CHECK_EQ(offset_vec.back(), page.data.Size());\n\n    std::size_t bytes{0};\n    bytes += common::WriteVec(fo, offset_vec);\n    if (page.data.Size() != 0) {\n      bytes += common::WriteVec(fo, data_vec);\n    }\n    bytes += fo->Write(&page.base_rowid, sizeof(page.base_rowid));\n    return bytes;\n  }\n\n private:\n};\n\n#define SparsePageFmt SparsePageFormat<SparsePage>\nDMLC_REGISTRY_REGISTER(SparsePageFormatReg<SparsePage>, SparsePageFmt, raw)\n    .describe(\"Raw binary data format.\")\n    .set_body([]() { return new SparsePageRawFormat<SparsePage>(); });\n\n#define CSCPageFmt SparsePageFormat<CSCPage>\nDMLC_REGISTRY_REGISTER(SparsePageFormatReg<CSCPage>, CSCPageFmt, raw)\n    .describe(\"Raw binary data format.\")\n    .set_body([]() { return new SparsePageRawFormat<CSCPage>(); });\n\n#define SortedCSCPageFmt SparsePageFormat<SortedCSCPage>\nDMLC_REGISTRY_REGISTER(SparsePageFormatReg<SortedCSCPage>, SortedCSCPageFmt, raw)\n    .describe(\"Raw binary data format.\")\n    .set_body([]() { return new SparsePageRawFormat<SortedCSCPage>(); });\n}  // namespace xgboost::data\n"
  },
  {
    "path": "src/data/sparse_page_source.cc",
    "content": "/**\n *  Copyright 2021-2025, XGBoost Contributors\n */\n#include \"sparse_page_source.h\"\n\n#include <cstdio>       // for remove\n#include <filesystem>   // for exists, path, is_directory\n#include <numeric>      // for partial_sum\n#include <string>       // for string\n#include <string_view>  // for string_view\n\n#include \"../collective/communicator-inl.h\"  // for IsDistributed, GetRank\n\nnamespace xgboost::data {\nvoid Cache::Commit() {\n  if (!this->written) {\n    std::partial_sum(this->offset.begin(), this->offset.end(), this->offset.begin());\n    this->written = true;\n  }\n}\n\nvoid TryDeleteCacheFile(const std::string& file) {\n  // Don't throw, this is called in a destructor.\n  auto exists = std::filesystem::exists(file);\n  if (!exists) {\n    LOG(WARNING) << \"External memory cache file \" << file << \" is missing.\";\n  }\n  if (std::remove(file.c_str()) != 0) {\n    LOG(WARNING) << \"Couldn't remove external memory cache file \" << file\n                 << \"; you may want to remove it manually\";\n  }\n}\n\nstd::string MakeCachePrefix(std::string cache_prefix) {\n  std::string_view constexpr kDftname{\"DMatrix\"};\n  cache_prefix = cache_prefix.empty() ? kDftname : cache_prefix;\n  // Use the directory if one exists\n  if (std::filesystem::is_directory(cache_prefix)) {\n    cache_prefix = (std::filesystem::path{cache_prefix} / kDftname).string();  // NOLINT\n  }\n  // Avoid conflicts in distributed environments.\n  if (collective::IsDistributed()) {\n    cache_prefix += (\"-r\" + std::to_string(collective::GetRank()));\n  }\n  return cache_prefix;\n}\n}  // namespace xgboost::data\n"
  },
  {
    "path": "src/data/sparse_page_source.cu",
    "content": "/**\n * Copyright 2021-2025, XGBoost contributors\n */\n#include \"../common/device_helpers.cuh\"  // for CurrentDevice\n#include \"proxy_dmatrix.cuh\"             // for DispatchAny, DMatrixProxy\n#include \"simple_dmatrix.cuh\"            // for CopyToSparsePage\n#include \"sparse_page_source.h\"\n#include \"xgboost/data.h\"  // for SparsePage\n\nnamespace xgboost::data {\nvoid DevicePush(DMatrixProxy *proxy, float missing, SparsePage *page) {\n  auto device = proxy->Device();\n  if (!device.IsCUDA()) {\n    device = DeviceOrd::CUDA(dh::CurrentDevice());\n  }\n  CHECK(device.IsCUDA());\n  auto ctx = Context{}.MakeCUDA(device.ordinal);\n\n  cuda_impl::DispatchAny(\n      proxy, [&](auto const &value) { CopyToSparsePage(&ctx, value, device, missing, page); });\n}\n}  // namespace xgboost::data\n"
  },
  {
    "path": "src/data/sparse_page_source.h",
    "content": "/**\n *  Copyright 2014-2026, XGBoost Contributors\n * \\file sparse_page_source.h\n */\n#ifndef XGBOOST_DATA_SPARSE_PAGE_SOURCE_H_\n#define XGBOOST_DATA_SPARSE_PAGE_SOURCE_H_\n\n#include <algorithm>  // for min\n#include <atomic>     // for atomic\n#include <cstdint>    // for uint64_t\n#include <future>     // for future\n#include <limits>     // for numeric_limits\n#include <map>        // for map\n#include <memory>     // for unique_ptr\n#include <mutex>      // for mutex\n#include <string>     // for string\n#include <utility>    // for pair, move\n#include <vector>     // for vector\n\n#if !defined(XGBOOST_USE_CUDA)\n#include \"../common/common.h\"  // for AssertGPUSupport\n#endif                         // !defined(XGBOOST_USE_CUDA)\n\n#include \"../common/io.h\"           // for PrivateMmapConstStream\n#include \"../common/threadpool.h\"   // for ThreadPool\n#include \"../common/timer.h\"        // for Monitor, Timer\n#include \"proxy_dmatrix.h\"          // for DMatrixProxy\n#include \"sparse_page_writer.h\"     // for SparsePageFormat\n#include \"xgboost/base.h\"           // for bst_feature_t\n#include \"xgboost/data.h\"           // for SparsePage, CSCPage, SortedCSCPage\n#include \"xgboost/global_config.h\"  // for InitNewThread\n#include \"xgboost/logging.h\"        // for CHECK_EQ\n\nnamespace xgboost::data {\nvoid TryDeleteCacheFile(const std::string& file);\n\nstd::string MakeCachePrefix(std::string cache_prefix);\n\nauto constexpr InvalidPageSize() { return std::numeric_limits<bst_idx_t>::max(); }\n\n/**\n * @brief Information about the cache including path and page offsets.\n */\nstruct Cache {\n  // whether the write to the cache is complete\n  bool written;\n  bool on_host;\n  std::string name;\n  std::string format;\n  // offset into binary cache file.\n  std::vector<bst_idx_t> offset;\n\n  Cache(bool w, std::string n, std::string fmt, bool on_host)\n      : written{w}, on_host{on_host}, name{std::move(n)}, format{std::move(fmt)}, offset{0} {}\n\n  [[nodiscard]] static std::string ShardName(std::string name, std::string format) {\n    CHECK_EQ(format.front(), '.');\n    return name + format;\n  }\n\n  [[nodiscard]] std::string ShardName() const { return ShardName(this->name, this->format); }\n  [[nodiscard]] bool OnHost() const { return on_host; }\n  /**\n   * @brief Record a page with size of n_bytes.\n   */\n  void Push(bst_idx_t n_bytes) { offset.push_back(n_bytes); }\n  /**\n   * @brief Returns the view start and length for the i^th page.\n   */\n  [[nodiscard]] auto View(std::size_t i) const {\n    std::uint64_t off = offset.at(i);\n    std::uint64_t len = this->Bytes(i);\n    return std::pair{off, len};\n  }\n  /**\n   * @brief Get the number of bytes for the i^th page.\n   */\n  [[nodiscard]] bst_idx_t Bytes(std::size_t i) const { return offset.at(i + 1) - offset[i]; }\n  /**\n   * @brief Call this once the write for the cache is complete.\n   */\n  void Commit();\n  /**\n   * @brief Returns the number of pages in the cache.\n   */\n  [[nodiscard]] bst_idx_t Size() const { return this->offset.size() - 1; }\n};\n\ninline void DeleteCacheFiles(std::map<std::string, std::shared_ptr<Cache>> const& cache_info) {\n  for (auto const& kv : cache_info) {\n    CHECK(kv.second);\n    auto n = kv.second->ShardName();\n    if (kv.second->OnHost()) {\n      continue;\n    }\n    TryDeleteCacheFile(n);\n  }\n}\n\n[[nodiscard]] inline std::string MakeId(std::string prefix, void const* ptr) {\n  std::stringstream ss;\n  ss << ptr;\n  return prefix + \"-\" + ss.str();\n}\n\n/**\n * @brief Make cache if it doesn't exist yet.\n */\n[[nodiscard]] inline std::string MakeCache(void const* ptr, std::string format, bool on_host,\n                                           std::string prefix,\n                                           std::map<std::string, std::shared_ptr<Cache>>* out) {\n  auto& cache_info = *out;\n  auto name = MakeId(std::move(prefix), ptr);\n  auto id = name + format;\n  auto it = cache_info.find(id);\n  if (it == cache_info.cend()) {\n    cache_info[id].reset(new Cache{false, name, format, on_host});\n    if (!on_host) {\n      LOG(INFO) << \"Make cache:\" << cache_info[id]->ShardName();\n    }\n  }\n  return id;\n}\n\n// Prevents multi-threaded call to `GetBatches`.\nclass TryLockGuard {\n  std::mutex& lock_;\n\n public:\n  explicit TryLockGuard(std::mutex& lock) : lock_{lock} {  // NOLINT\n    CHECK(lock_.try_lock()) << \"Multiple threads attempting to use Sparse DMatrix.\";\n  }\n  ~TryLockGuard() { lock_.unlock(); }\n};\n\n// Similar to `dmlc::OMPException`, but doesn't need the threads to be joined before rethrow\nclass ExceHandler {\n  std::mutex mutex_;\n  std::atomic<bool> flag_{false};\n  std::exception_ptr curr_exce_{nullptr};\n\n public:\n  template <typename Fn>\n  decltype(auto) Run(Fn&& fn) noexcept(true) {\n    try {\n      return fn();\n    } catch (dmlc::Error const& e) {\n      std::lock_guard<std::mutex> guard{mutex_};\n      if (!curr_exce_) {\n        curr_exce_ = std::current_exception();\n      }\n      flag_ = true;\n    } catch (std::exception const& e) {\n      std::lock_guard<std::mutex> guard{mutex_};\n      if (!curr_exce_) {\n        curr_exce_ = std::current_exception();\n      }\n      flag_ = true;\n    } catch (...) {\n      std::lock_guard<std::mutex> guard{mutex_};\n      if (!curr_exce_) {\n        curr_exce_ = std::current_exception();\n      }\n      flag_ = true;\n    }\n    return std::invoke_result_t<Fn>();\n  }\n\n  void Rethrow() noexcept(false) {\n    if (flag_) {\n      CHECK(curr_exce_);\n      std::rethrow_exception(curr_exce_);\n    }\n  }\n};\n\ntemplate <typename WriterT>\nstd::unique_ptr<WriterT> DftCreateWriterImpl(StringView name, std::uint32_t iter) {\n  std::unique_ptr<common::AlignedFileWriteStream> fo;\n  if (iter == 0) {\n    fo = std::make_unique<common::AlignedFileWriteStream>(name, \"wb\");\n  } else {\n    fo = std::make_unique<common::AlignedFileWriteStream>(name, \"ab\");\n  }\n  return fo;\n}\n\n/**\n * @brief Default implementation of the stream creater.\n */\ntemplate <typename S, template <typename> typename F>\nclass DefaultFormatStreamPolicy : public F<S> {\n public:\n  using WriterT = common::AlignedFileWriteStream;\n  using ReaderT = common::AlignedResourceReadStream;\n\n public:\n  std::unique_ptr<WriterT> CreateWriter(StringView name, std::uint32_t iter) {\n    return DftCreateWriterImpl<WriterT>(name, iter);\n  }\n\n  std::unique_ptr<ReaderT> CreateReader(StringView name, std::uint64_t offset,\n                                        std::uint64_t length) const {\n    return std::make_unique<common::PrivateMmapConstStream>(std::string{name}, offset, length);\n  }\n};\n\ntemplate <typename S, template <typename> typename F>\nclass MemBufFileReadFormatStreamPolicy : public F<S> {\n public:\n  using WriterT = common::AlignedFileWriteStream;\n  using ReaderT = common::AlignedResourceReadStream;\n\n public:\n  std::unique_ptr<WriterT> CreateWriter(StringView name, std::uint32_t iter) {\n    return DftCreateWriterImpl<WriterT>(name, iter);\n  }\n\n  std::unique_ptr<ReaderT> CreateReader(StringView name, std::uint64_t offset,\n                                        std::uint64_t length) const {\n    return std::make_unique<common::MemBufFileReadStream>(std::string{name}, offset, length);\n  }\n};\n\n/**\n * @brief Default implementatioin of the format creator.\n */\ntemplate <typename S>\nclass DefaultFormatPolicy {\n public:\n  using FormatT = SparsePageFormat<S>;\n\n public:\n  auto CreatePageFormat(BatchParam const&) const {\n    std::unique_ptr<FormatT> fmt{::xgboost::data::CreatePageFormat<S>(\"raw\")};\n    return fmt;\n  }\n};\n\n/**\n * @brief Base class for all page sources. Handles fetching, writing, and iteration.\n *\n * The interface to external storage is divided into two types. The first one is the\n * format, representing how to read and write the binary. The second part is where to\n * store the binary cache. These policies are implemented in the `FormatStreamPolicy`\n * policy class. The format policy controls how to create the format (the first part), and\n * the stream policy decides where the stream should read from and write to (the second\n * part). This way we can compose the polices and page types with ease.\n */\ntemplate <typename S,\n          typename FormatStreamPolicy = DefaultFormatStreamPolicy<S, DefaultFormatPolicy>>\nclass SparsePageSourceImpl : public BatchIteratorImpl<S>, public FormatStreamPolicy {\n protected:\n  // Prevents calling this iterator from multiple places(or threads).\n  std::mutex single_threaded_;\n  // The current page.\n  std::shared_ptr<S> page_;\n  // Workers for fetching data from external memory.\n  common::ThreadPool workers_;\n\n  bool at_end_{false};\n  float missing_;\n  std::int32_t nthreads_;\n  bst_feature_t n_features_;\n  bst_idx_t fetch_cnt_{0};  // Used for sanity check.\n  // Index to the current page.\n  std::uint32_t count_{0};\n  // How we pre-fetch the data.\n  BatchParam param_;\n\n  std::shared_ptr<Cache> cache_info_;\n\n  using Ring = std::vector<std::future<std::shared_ptr<S>>>;\n  // A ring storing futures to data.  Since the DMatrix iterator is forward only, we can\n  // pre-fetch data in a ring.\n  std::unique_ptr<Ring> ring_{new Ring};\n  // Catching exception in pre-fetch threads to prevent segfault. Not always work though,\n  // OOM error can be delayed due to lazy commit. On the bright side, if mmap is used then\n  // OOM error should be rare.\n  ExceHandler exce_;\n  common::Monitor monitor_;\n\n  [[nodiscard]] bool ReadCache() {\n    if (!cache_info_->written) {\n      return false;\n    }\n    auto n_batches = this->cache_info_->Size();\n    if (ring_->empty()) {\n      ring_->resize(n_batches);\n    }\n\n    std::int32_t n_prefetches =\n        std::min(this->workers_.NumWorkers(), this->param_.n_prefetch_batches);\n    n_prefetches = std::max(n_prefetches, 1);\n    std::int32_t n_prefetch_batches = std::min(static_cast<bst_idx_t>(n_prefetches), n_batches);\n    CHECK_GT(n_prefetch_batches, 0);\n    CHECK_LE(n_prefetch_batches, this->param_.n_prefetch_batches);\n    std::size_t fetch_it = this->count_;\n\n    exce_.Rethrow();\n    // Clear out the existing page before loading new ones. This helps reduce memory usage\n    // when page is not loaded with mmap, in addition, it triggers necessary CUDA\n    // synchronizations by freeing memory.\n    page_.reset();\n\n    for (std::int32_t i = 0; i < n_prefetch_batches; ++i, ++fetch_it) {\n      bool restart = fetch_it == n_batches;\n      fetch_it %= n_batches;  // ring\n      if (ring_->at(fetch_it).valid()) {\n        continue;\n      }\n      auto const* self = this;  // make sure it's const\n      CHECK_LT(fetch_it, cache_info_->offset.size());\n      // Make sure the new iteration starts with a copy to avoid spilling configuration.\n      if (restart) {\n        this->param_.prefetch_copy = true;\n      }\n      auto p = this->param_;\n      ring_->at(fetch_it) = this->workers_.Submit([fetch_it, self, p, this] {\n        auto page = std::make_shared<S>();\n        this->exce_.Run([&] {\n          std::unique_ptr<typename FormatStreamPolicy::FormatT> fmt{self->CreatePageFormat(p)};\n          auto name = self->cache_info_->ShardName();\n          auto [offset, length] = self->cache_info_->View(fetch_it);\n          std::unique_ptr<typename FormatStreamPolicy::ReaderT> fi{\n              self->CreateReader(name, offset, length)};\n          CHECK(fmt->Read(page.get(), fi.get()));\n        });\n        return page;\n      });\n      this->fetch_cnt_++;\n    }\n\n    CHECK_EQ(std::count_if(ring_->cbegin(), ring_->cend(), [](auto const& f) { return f.valid(); }),\n             n_prefetch_batches)\n        << \"Sparse DMatrix assumes forward iteration.\";\n\n    monitor_.Start(\"Wait-\" + std::to_string(count_));\n    CHECK((*ring_)[count_].valid());\n    page_ = (*ring_)[count_].get();\n    monitor_.Stop(\"Wait-\" + std::to_string(count_));\n\n    exce_.Rethrow();\n\n    return true;\n  }\n\n  void WriteCache() {\n    CHECK(!cache_info_->written);\n    common::Timer timer;\n    timer.Start();\n    auto fmt{this->CreatePageFormat(this->param_)};\n\n    auto name = cache_info_->ShardName();\n    std::unique_ptr<typename FormatStreamPolicy::WriterT> fo{\n        this->CreateWriter(StringView{name}, this->Iter())};\n    auto bytes = fmt->Write(*page_, fo.get());\n\n    timer.Stop();\n    if (bytes != InvalidPageSize()) {\n      // Not entirely accurate, the kernel doesn't have to flush the data.\n      LOG(INFO) << common::HumanMemUnit(bytes) << \" written in \" << timer.ElapsedSeconds()\n                << \" seconds.\";\n      cache_info_->Push(bytes);\n    }\n  }\n\n  virtual void Fetch() = 0;\n\n public:\n  SparsePageSourceImpl(float missing, int nthreads, bst_feature_t n_features,\n                       std::shared_ptr<Cache> cache)\n      : workers_{StringView{\"ext-mem\"}, std::max(2, std::min(nthreads, 16)), InitNewThread{}},\n        missing_{missing},\n        nthreads_{nthreads},\n        n_features_{n_features},\n        cache_info_{std::move(cache)} {\n    monitor_.Init(typeid(S).name());  // not pretty, but works for basic profiling\n  }\n\n  SparsePageSourceImpl(SparsePageSourceImpl const& that) = delete;\n\n  ~SparsePageSourceImpl() override {\n    // Don't orphan the threads.\n    for (auto& fu : *ring_) {\n      if (fu.valid()) {\n        [[maybe_unused]] auto _ = fu.get();\n      }\n    }\n  }\n\n  [[nodiscard]] std::uint32_t Iter() const { return count_; }\n\n  [[nodiscard]] S const& operator*() const override {\n    CHECK(page_);\n    return *page_;\n  }\n\n  [[nodiscard]] std::shared_ptr<S const> Page() const override { return page_; }\n\n  [[nodiscard]] bool AtEnd() const override { return at_end_; }\n  // Call this at the last iteration (it == n_batches).\n  virtual void EndIter() {\n    this->cache_info_->Commit();\n    if (this->cache_info_->Size() != 0) {\n      CHECK_EQ(this->count_, this->cache_info_->Size());\n    }\n    CHECK_GE(this->count_, 1);\n    this->count_ = 0;\n  }\n\n  virtual void Reset(BatchParam const& param) {\n    TryLockGuard guard{single_threaded_};\n\n    auto at_end = false;\n    std::swap(this->at_end_, at_end);\n\n    bool changed = this->param_.n_prefetch_batches != param.n_prefetch_batches;\n    this->param_ = param;\n\n    this->count_ = 0;\n\n    if (!at_end || changed) {\n      // The last iteration did not get to the end, clear the ring to start from 0.\n      this->ring_ = std::make_unique<Ring>();\n    }\n    this->Fetch();  // Get the 0^th page, prefetch the next page.\n  }\n\n  [[nodiscard]] auto FetchCount() const { return this->fetch_cnt_; }\n};\n\n#if defined(XGBOOST_USE_CUDA)\n// Push data from CUDA.\nvoid DevicePush(DMatrixProxy* proxy, float missing, SparsePage* page);\n#else\ninline void DevicePush(DMatrixProxy*, float, SparsePage*) { common::AssertGPUSupport(); }\n#endif\n\nclass SparsePageSource : public SparsePageSourceImpl<SparsePage> {\n  // This is the source iterator from the user.\n  DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext> iter_;\n  DMatrixProxy* proxy_;\n  std::size_t base_row_id_{0};\n  // Total number of batches.\n  bst_idx_t n_batches_{0};\n\n  void Fetch() final {\n    page_ = std::make_shared<SparsePage>();\n    // The first round of reading, this is responsible for initialization.\n    if (!this->ReadCache()) {\n      bool type_error{false};\n      CHECK(proxy_);\n      cpu_impl::DispatchAny(\n          proxy_,\n          [&](auto const& adapter_batch) {\n            page_->Push(adapter_batch, this->missing_, this->nthreads_);\n          },\n          &type_error);\n      if (type_error) {\n        DevicePush(proxy_, missing_, page_.get());\n      }\n\n      page_->SetBaseRowId(base_row_id_);\n      base_row_id_ += page_->Size();\n      this->n_batches_++;\n      this->WriteCache();\n    }\n  }\n\n public:\n  SparsePageSource(DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext> iter,\n                   DMatrixProxy* proxy, float missing, int nthreads, bst_feature_t n_features,\n                   bst_idx_t n_batches, std::shared_ptr<Cache> cache)\n      : SparsePageSourceImpl(missing, nthreads, n_features, cache),\n        iter_{std::move(iter)},\n        proxy_{proxy},\n        n_batches_{n_batches} {\n    if (!cache_info_->written) {\n      iter_.Reset();\n      CHECK(iter_.Next()) << \"Must have at least 1 batch.\";\n    }\n    this->Fetch();\n  }\n\n  SparsePageSource& operator++() final {\n    TryLockGuard guard{single_threaded_};\n    count_++;\n\n    if (cache_info_->written) {\n      at_end_ = (count_ == n_batches_);\n    } else {\n      at_end_ = !iter_.Next();\n    }\n    CHECK_LE(count_, n_batches_);\n\n    if (at_end_) {\n      this->EndIter();\n      this->proxy_ = nullptr;\n    } else {\n      this->Fetch();\n    }\n\n    return *this;\n  }\n\n  void Reset(BatchParam const& param) override {\n    if (proxy_) {\n      TryLockGuard guard{single_threaded_};\n      iter_.Reset();\n    }\n    SparsePageSourceImpl::Reset(param);\n\n    TryLockGuard guard{single_threaded_};\n    this->base_row_id_ = 0;\n  }\n};\n\n/**\n * @brief A mixin for advancing the iterator with a sparse page source.\n */\ntemplate <typename S,\n          typename FormatCreatePolicy = DefaultFormatStreamPolicy<S, DefaultFormatPolicy>>\nclass PageSourceIncMixIn : public SparsePageSourceImpl<S, FormatCreatePolicy> {\n protected:\n  std::shared_ptr<SparsePageSource> source_;\n  using Super = SparsePageSourceImpl<S, FormatCreatePolicy>;\n  // synchronize the row page, `hist` and `gpu_hist` don't need the original sparse page\n  // so we avoid fetching it.\n  bool const sync_;\n  // Total number of batches.\n  bst_idx_t const n_batches_{0};\n\n public:\n  PageSourceIncMixIn(float missing, std::int32_t nthreads, bst_feature_t n_features,\n                     bst_idx_t n_batches, std::shared_ptr<Cache> cache, bool sync)\n      : Super::SparsePageSourceImpl{missing, nthreads, n_features, cache},\n        sync_{sync},\n        n_batches_{n_batches} {}\n  // This function always operate on the source first, then the downstream. The downstream\n  // can assume the source to be ready.\n  [[nodiscard]] PageSourceIncMixIn& operator++() final {\n    TryLockGuard guard{this->single_threaded_};\n\n    // Increment the source.\n    if (this->sync_) {\n      ++(*source_);\n    }\n    // Increment self.\n    ++this->count_;\n    // Set at end.\n    this->at_end_ = this->count_ == this->n_batches_;\n\n    if (this->at_end_) {\n      this->EndIter();\n      CHECK(this->cache_info_->written);\n      if (!this->sync_) {\n        source_.reset();  // Make sure no unnecessary fetch.\n      }\n    } else {\n      this->Fetch();\n    }\n\n    if (this->sync_) {\n      // Sanity check.\n      CHECK_EQ(source_->Iter(), this->count_);\n    }\n    return *this;\n  }\n\n  void Reset(BatchParam const& param) final {\n    if (this->sync_ || !this->cache_info_->written) {\n      this->source_->Reset(param);\n    }\n    Super::Reset(param);\n  }\n};\n\nclass CSCPageSource : public PageSourceIncMixIn<CSCPage> {\n protected:\n  void Fetch() final {\n    if (!this->ReadCache()) {\n      auto const& csr = source_->Page();\n      this->page_.reset(new CSCPage{});\n      // we might be able to optimize this by merging transpose and pushcsc\n      this->page_->PushCSC(csr->GetTranspose(n_features_, nthreads_));\n      page_->SetBaseRowId(csr->base_rowid);\n      this->WriteCache();\n    }\n  }\n\n public:\n  CSCPageSource(float missing, int nthreads, bst_feature_t n_features, uint32_t n_batches,\n                std::shared_ptr<Cache> cache, std::shared_ptr<SparsePageSource> source)\n      : PageSourceIncMixIn(missing, nthreads, n_features, n_batches, cache, true) {\n    this->source_ = source;\n    this->Fetch();\n  }\n};\n\nclass SortedCSCPageSource : public PageSourceIncMixIn<SortedCSCPage> {\n protected:\n  void Fetch() final {\n    if (!this->ReadCache()) {\n      auto const& csr = this->source_->Page();\n      this->page_.reset(new SortedCSCPage{});\n      // we might be able to optimize this by merging transpose and pushcsc\n      this->page_->PushCSC(csr->GetTranspose(n_features_, nthreads_));\n      CHECK_EQ(this->page_->Size(), n_features_);\n      CHECK_EQ(this->page_->data.Size(), csr->data.Size());\n      this->page_->SortRows(this->nthreads_);\n      page_->SetBaseRowId(csr->base_rowid);\n      this->WriteCache();\n    }\n  }\n\n public:\n  SortedCSCPageSource(float missing, int nthreads, bst_feature_t n_features, uint32_t n_batches,\n                      std::shared_ptr<Cache> cache, std::shared_ptr<SparsePageSource> source)\n      : PageSourceIncMixIn(missing, nthreads, n_features, n_batches, cache, true) {\n    this->source_ = source;\n    this->Fetch();\n  }\n};\n\n/**\n * @brief operator++ implementation for ExtMemQDM.\n */\ntemplate <typename S, typename FormatCreatePolicy>\nclass ExtQantileSourceMixin : public SparsePageSourceImpl<S, FormatCreatePolicy> {\n protected:\n  std::shared_ptr<DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>> source_;\n  using Super = SparsePageSourceImpl<S, FormatCreatePolicy>;\n\n public:\n  ExtQantileSourceMixin(\n      float missing, std::int32_t n_threads, bst_feature_t n_features,\n      std::shared_ptr<DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>> source,\n      std::shared_ptr<Cache> cache)\n      : Super::SparsePageSourceImpl{missing, n_threads, n_features, cache},\n        source_{std::move(source)} {}\n  // This function always operate on the source first, then the downstream. The downstream\n  // can assume the source to be ready.\n  [[nodiscard]] ExtQantileSourceMixin& operator++() final {\n    TryLockGuard guard{this->single_threaded_};\n    // Increment self.\n    ++this->count_;\n    // Set at end.\n    if (this->cache_info_->written) {\n      this->at_end_ = (this->Iter() == this->cache_info_->Size());\n    } else {\n      CHECK(this->source_);\n      this->at_end_ = !this->source_->Next();\n    }\n\n    if (this->at_end_) {\n      this->EndIter();\n\n      CHECK(this->cache_info_->written);\n      source_.reset();  // release the source\n    } else {\n      this->Fetch();\n    }\n\n    return *this;\n  }\n\n  void Reset(BatchParam const& param) final {\n    if (this->source_) {\n      this->source_->Reset();\n    }\n    Super::Reset(param);\n  }\n};\n}  // namespace xgboost::data\n#endif  // XGBOOST_DATA_SPARSE_PAGE_SOURCE_H_\n"
  },
  {
    "path": "src/data/sparse_page_writer.h",
    "content": "/**\n * Copyright 2014-2024, XGBoost Contributors\n * \\file sparse_page_writer.h\n * \\author Tianqi Chen\n */\n#ifndef XGBOOST_DATA_SPARSE_PAGE_WRITER_H_\n#define XGBOOST_DATA_SPARSE_PAGE_WRITER_H_\n\n#include <functional>  // for function\n#include <string>      // for string\n\n#include \"../common/io.h\"   // for AlignedResourceReadStream, AlignedFileWriteStream\n#include \"dmlc/registry.h\"  // for Registry, FunctionRegEntryBase\n\nnamespace xgboost::data {\ntemplate<typename T>\nstruct SparsePageFormatReg;\n\n/**\n * @brief Format specification of various data formats like SparsePage.\n */\ntemplate <typename T>\nclass SparsePageFormat {\n public:\n  virtual ~SparsePageFormat() = default;\n  /**\n   * @brief Load all the segments into page, advance fi to end of the block.\n   *\n   * @param page The data to read page into.\n   * @param fi the input stream of the file\n   * @return true of the loading as successful, false if end of file was reached\n   */\n  virtual bool Read(T* page, common::AlignedResourceReadStream* fi) = 0;\n  /**\n   * @brief save the data to fo, when a page was written.\n   *\n   * @param fo output stream\n   */\n  virtual size_t Write(const T& page, common::AlignedFileWriteStream* fo) = 0;\n};\n\n/*!\n * \\brief Create sparse page of format.\n * \\return The created format functors.\n */\ntemplate<typename T>\ninline SparsePageFormat<T>* CreatePageFormat(const std::string& name) {\n  auto *e = ::dmlc::Registry<SparsePageFormatReg<T>>::Get()->Find(name);\n  if (e == nullptr) {\n    LOG(FATAL) << \"Unknown format type \" << name;\n    return nullptr;\n  }\n  return (e->body)();\n}\n\n/**\n * @brief Registry entry for sparse page format.\n */\ntemplate<typename T>\nstruct SparsePageFormatReg\n    : public dmlc::FunctionRegEntryBase<SparsePageFormatReg<T>,\n                                        std::function<SparsePageFormat<T>* ()>> {\n};\n}  // namespace xgboost::data\n#endif  // XGBOOST_DATA_SPARSE_PAGE_WRITER_H_\n"
  },
  {
    "path": "src/encoder/ordinal.cuh",
    "content": "/**\n * Copyright 2025, XGBoost contributors\n */\n#pragma once\n\n#include <thrust/binary_search.h>                // for lower_bound\n#include <thrust/copy.h>                         // for copy\n#include <thrust/device_vector.h>                // for device_vector\n#include <thrust/find.h>                         // for find_if\n#include <thrust/for_each.h>                     // for for_each_n\n#include <thrust/iterator/counting_iterator.h>   // for make_counting_iterator\n#include <thrust/iterator/transform_iterator.h>  // for make_transform_iterator\n#include <thrust/sort.h>                         // for sort\n\n#include <cstddef>           // for size_t\n#include <cstdint>           // for int32_t, int8_t\n#include <cuda/functional>   // for proclaim_return_type\n#include <cuda/std/utility>  // for make_pair, pair\n#include <cuda/std/variant>  // for get\n#include <sstream>           // for stringstream\n\n#include \"../common/device_helpers.cuh\"\n#include \"ordinal.h\"\n#include \"types.h\"  // for Overloaded\n\nnamespace enc {\nnamespace cuda_impl {\nstruct SegmentedSearchSortedStrOp {\n  DeviceColumnsView haystack_v;             // The training set\n  Span<std::int32_t const> ref_sorted_idx;  // Sorted index for the training set\n  DeviceColumnsView needles_v;              // Keys\n  std::size_t f_idx;                        // Feature (segment) index\n\n  [[nodiscard]] __device__ std::int32_t operator()(std::int32_t i) const {\n    using detail::SearchKey;\n    auto haystack = cuda::std::get<CatStrArrayView>(haystack_v.columns[f_idx]);\n    auto needles = cuda::std::get<CatStrArrayView>(needles_v.columns[f_idx]);\n    // Get the search key\n    auto idx = i - needles_v.feature_segments[f_idx];  // index local to the feature\n    auto begin = needles.offsets[idx];\n    auto end = needles.offsets[idx + 1];\n    auto needle = needles.values.subspan(begin, end - begin);\n\n    // Search the key from the training set\n    auto it = thrust::make_counting_iterator(0);\n    auto f_sorted_idx = ref_sorted_idx.subspan(\n        haystack_v.feature_segments[f_idx],\n        haystack_v.feature_segments[f_idx + 1] - haystack_v.feature_segments[f_idx]);\n    auto end_it = it + f_sorted_idx.size();\n    auto ret_it = thrust::lower_bound(thrust::seq, it, end_it, SearchKey(), [&](auto l, auto r) {\n      Span<std::int8_t const> l_str;\n      if (l == SearchKey()) {\n        l_str = needle;\n      } else {\n        auto l_idx = f_sorted_idx[l];\n        auto l_beg = haystack.offsets[l_idx];\n        auto l_end = haystack.offsets[l_idx + 1];\n        l_str = haystack.values.subspan(l_beg, l_end - l_beg);\n      }\n\n      Span<std::int8_t const> r_str;\n      if (r == SearchKey()) {\n        r_str = needle;\n      } else {\n        auto r_idx = f_sorted_idx[r];\n        auto r_beg = haystack.offsets[r_idx];\n        auto r_end = haystack.offsets[r_idx + 1];\n        r_str = haystack.values.subspan(r_beg, r_end - r_beg);\n      }\n\n      return l_str < r_str;\n    });\n    if (ret_it == it + f_sorted_idx.size()) {\n      return detail::NotFound();\n    }\n    return *ret_it;\n  }\n};\n\ntemplate <typename T>\nstruct SegmentedSearchSortedNumOp {\n  DeviceColumnsView haystack_v;             // The training set\n  Span<std::int32_t const> ref_sorted_idx;  // Sorted index for the training set\n  DeviceColumnsView needles_v;              // Keys\n  std::size_t f_idx;                        // Feature (segment) index\n\n  [[nodiscard]] __device__ std::int32_t operator()(std::int32_t i) const {\n    using detail::SearchKey;\n    auto haystack = cuda::std::get<Span<T const>>(haystack_v.columns[f_idx]);\n    auto needles = cuda::std::get<Span<T const>>(needles_v.columns[f_idx]);\n    // Get the search key\n    auto idx = i - needles_v.feature_segments[f_idx];  // index local to the feature\n    auto needle = needles[idx];\n    // Search the key from the training set\n    auto it = thrust::make_counting_iterator(0);\n    auto f_sorted_idx = ref_sorted_idx.subspan(\n        haystack_v.feature_segments[f_idx],\n        haystack_v.feature_segments[f_idx + 1] - haystack_v.feature_segments[f_idx]);\n    auto end_it = it + f_sorted_idx.size();\n    auto ret_it = thrust::lower_bound(thrust::seq, it, end_it, SearchKey(), [&](auto l, auto r) {\n      T l_value = l == SearchKey() ? needle : haystack[f_sorted_idx[l]];\n      T r_value = r == SearchKey() ? needle : haystack[f_sorted_idx[r]];\n      return l_value < r_value;\n    });\n    if (ret_it == it + f_sorted_idx.size()) {\n      return detail::NotFound();\n    }\n    return *ret_it;\n  }\n};\n\ntemplate <typename ThrustExec, typename U, typename V>\nvoid SegmentedIota(ThrustExec const& policy, Span<U> d_offset_ptr, Span<V> out_sequence) {\n  thrust::for_each_n(policy, thrust::make_counting_iterator(0ul), out_sequence.size(),\n                     [out_sequence, d_offset_ptr] __device__(std::size_t idx) {\n                       auto group = dh::SegmentId(d_offset_ptr, idx);\n                       out_sequence[idx] = idx - d_offset_ptr[group];\n                     });\n}\n\nstruct DftThrustPolicy {\n  template <typename T>\n  using ThrustAllocator = thrust::device_allocator<T>;\n\n  [[nodiscard]] auto ThrustPolicy() const { return thrust::cuda::par_nosync; }\n  [[nodiscard]] auto Stream() const { return cudaStreamPerThread; }\n};\n}  // namespace cuda_impl\n\n/**\n * @brief Default exection policy for the device implementation. Users are expected to\n *        customize it.\n */\nusing DftDevicePolicy = Policy<cuda_impl::DftThrustPolicy, detail::DftErrorHandler>;\n\n/**\n * @brief Sort the categories for the training set. Returns a list of sorted index.\n *\n * @tparam ExecPolicy The @ref Policy class, accepts an error policy and a thrust exec policy.\n *\n * @param policy     The execution policy.\n * @param orig_enc   The encoding scheme of the training set.\n * @param sorted_idx The output sorted index.\n */\ntemplate <typename ExecPolicy>\nvoid SortNames(ExecPolicy const& policy, DeviceColumnsView orig_enc,\n               Span<std::int32_t> sorted_idx) {\n  typename ExecPolicy::template ThrustAllocator<char> alloc;\n  auto exec = thrust::cuda::par_nosync(alloc).on(policy.Stream());\n\n  auto n_total_cats = orig_enc.n_total_cats;\n  if (static_cast<std::int32_t>(sorted_idx.size()) != orig_enc.n_total_cats) {\n    policy.Error(\"`sorted_idx` should have the same size as `n_total_cats`.\");\n  }\n  auto d_sorted_idx = dh::ToSpan(sorted_idx);\n  cuda_impl::SegmentedIota(exec, orig_enc.feature_segments, d_sorted_idx);\n\n  // <fidx, sorted_idx>\n  using Pair = cuda::std::pair<std::int32_t, std::int32_t>;\n  using Alloc = typename ExecPolicy::template ThrustAllocator<Pair>;\n  thrust::device_vector<Pair, Alloc> keys(n_total_cats);\n  auto key_it = thrust::make_transform_iterator(\n      thrust::make_counting_iterator(0),\n      cuda::proclaim_return_type<Pair>([=] __device__(std::int32_t i) {\n        auto seg = dh::SegmentId(orig_enc.feature_segments, i);\n        auto idx = d_sorted_idx[i];\n        return cuda::std::make_pair(static_cast<std::int32_t>(seg), idx);\n      }));\n  thrust::copy(exec, key_it, key_it + n_total_cats, keys.begin());\n\n  thrust::sort(exec, keys.begin(), keys.end(),\n               cuda::proclaim_return_type<bool>([=] __device__(Pair const& l, Pair const& r) {\n                 if (l.first == r.first) {  // same feature\n                   auto const& col = orig_enc.columns[l.first];\n                   return cuda::std::visit(\n                       Overloaded{[&l, &r](CatStrArrayView const& str) -> bool {\n                                    auto l_beg = str.offsets[l.second];\n                                    auto l_end = str.offsets[l.second + 1];\n                                    auto l_str = str.values.subspan(l_beg, l_end - l_beg);\n\n                                    auto r_beg = str.offsets[r.second];\n                                    auto r_end = str.offsets[r.second + 1];\n                                    auto r_str = str.values.subspan(r_beg, r_end - r_beg);\n                                    return l_str < r_str;\n                                  },\n                                  [&](auto&& values) {\n                                    return values[l.second] < values[r.second];\n                                  }},\n                       col);\n                 }\n                 return l.first < r.first;\n               }));\n\n  // Extract the sorted index out from sorted keys.\n  auto s_keys = dh::ToSpan(keys);\n  auto it = thrust::make_transform_iterator(\n      thrust::make_counting_iterator(0),\n      cuda::proclaim_return_type<decltype(Pair{}.second)>(\n          [=] __device__(std::int32_t i) { return s_keys[i].second; }));\n  thrust::copy(exec, it, it + sorted_idx.size(), dh::tbegin(sorted_idx));\n}\n\n/**\n * @brief Calculate a mapping for recoding the data given old and new encoding.\n *\n * @tparam ExecPolicy The @ref Policy class, accepts an error policy and a thrust exec policy\n *\n * @param policy     The execution policy.\n * @param orig_enc   The encoding scheme of the training set.\n * @param sorted_idx The sorted index of the training set encoding scheme, produced by\n *                   @ref SortNames .\n * @param new_enc    The scheme that needs to be recoded.\n * @param mapping    The output mapping.\n */\ntemplate <typename ExecPolicy>\nvoid Recode(ExecPolicy const& policy, DeviceColumnsView orig_enc,\n            Span<std::int32_t const> sorted_idx, DeviceColumnsView new_enc,\n            Span<std::int32_t> mapping) {\n  typename ExecPolicy::template ThrustAllocator<char> alloc;\n  auto exec = thrust::cuda::par_nosync(alloc).on(policy.Stream());\n  detail::BasicChecks(policy, orig_enc, sorted_idx, new_enc, mapping);\n  /**\n   * Check consistency.\n   */\n  auto check_it = thrust::make_transform_iterator(\n      thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) -> bool {\n        auto const& l_f = orig_enc.columns[i];\n        auto const& r_f = new_enc.columns[i];\n        if (l_f.index() != r_f.index()) {\n          return false;\n        }\n        auto l_is_empty = cuda::std::visit([](auto&& arg) { return arg.empty(); }, l_f);\n        auto r_is_empty = cuda::std::visit([](auto&& arg) { return arg.empty(); }, r_f);\n        return l_is_empty == r_is_empty;\n      });\n  bool valid = thrust::reduce(exec, check_it, check_it + new_enc.Size(), true,\n                              [=] XGBOOST_DEVICE(bool l, bool r) -> bool { return l && r; });\n  if (!valid) {\n    policy.Error(\n        \"Invalid new DataFrame. \"\n        \"The data type doesn't match the one used in the training dataset. \"\n        \"Both should be either numeric or categorical. \"\n        \"For a categorical feature, the index type must match between the training and test set.\");\n  }\n\n  /**\n   * search the index for the new encoding\n   */\n  thrust::for_each_n(\n      exec, thrust::make_counting_iterator(0), new_enc.n_total_cats,\n      [=] __device__(std::int32_t i) {\n        auto f_idx = dh::SegmentId(new_enc.feature_segments, i);\n        std::int32_t searched_idx{detail::NotFound()};\n        auto const& col = orig_enc.columns[f_idx];\n        cuda::std::visit(Overloaded{[&](CatStrArrayView const&) {\n                                      auto op = cuda_impl::SegmentedSearchSortedStrOp{\n                                          orig_enc, sorted_idx, new_enc, f_idx};\n                                      searched_idx = op(i);\n                                    },\n                                    [&](auto&& values) {\n                                      using T = typename std::decay_t<decltype(values)>::value_type;\n                                      auto op = cuda_impl::SegmentedSearchSortedNumOp<T>{\n                                          orig_enc, sorted_idx, new_enc, f_idx};\n                                      searched_idx = op(i);\n                                    }},\n                         col);\n\n        auto f_sorted_idx = sorted_idx.subspan(\n            orig_enc.feature_segments[f_idx],\n            orig_enc.feature_segments[f_idx + 1] - orig_enc.feature_segments[f_idx]);\n\n        std::int32_t idx = -1;\n        if (searched_idx != detail::NotFound()) {\n          idx = f_sorted_idx[searched_idx];\n        }\n\n        auto f_beg = new_enc.feature_segments[f_idx];\n        auto f_end = new_enc.feature_segments[f_idx + 1];\n        auto f_mapping = mapping.subspan(f_beg, f_end - f_beg);\n        f_mapping[i - f_beg] = idx;\n      });\n\n  auto err_it = thrust::find_if(\n      exec, dh::tcbegin(mapping), dh::tcend(mapping),\n      [=] XGBOOST_DEVICE(std::int32_t v) -> bool { return v == detail::NotFound(); });\n\n  if (err_it != dh::tcend(mapping)) {\n    // Report missing cat.\n    std::vector<decltype(mapping)::value_type> h_mapping(mapping.size());\n    thrust::copy_n(dh::tcbegin(mapping), mapping.size(), h_mapping.begin());\n    std::vector<decltype(new_enc.feature_segments)::value_type> h_feature_segments(\n        new_enc.feature_segments.size());\n    thrust::copy(dh::tcbegin(new_enc.feature_segments), dh::tcend(new_enc.feature_segments),\n                 h_feature_segments.begin());\n    auto h_idx = std::distance(dh::tcbegin(mapping), err_it);\n    auto f_idx = dh::SegmentId(Span<std::int32_t const>{h_feature_segments}, h_idx);\n    auto f_beg = h_feature_segments[f_idx];\n    auto f_local_idx = h_idx - f_beg;\n\n    std::vector<DeviceColumnsView::VariantT> h_columns(new_enc.columns.size());\n    thrust::copy_n(dh::tcbegin(new_enc.columns), new_enc.columns.size(), h_columns.begin());\n\n    std::stringstream name;\n    auto const& col = h_columns[f_idx];\n    cuda::std::visit(\n        Overloaded{[&](CatStrArrayView const& str) {\n                     std::vector<CatCharT> values(str.values.size());\n                     std::vector<std::int32_t> offsets(str.offsets.size());\n                     thrust::copy_n(dh::tcbegin(str.values), str.values.size(), values.data());\n                     thrust::copy_n(dh::tcbegin(str.offsets), str.offsets.size(), offsets.data());\n\n                     auto cat = Span{values}.subspan(\n                         offsets[f_local_idx], offsets[f_local_idx + 1] - offsets[f_local_idx]);\n                     for (auto v : cat) {\n                       name.put(v);\n                     }\n                   },\n                   [&](auto&& values) {\n                     using T = typename std::decay_t<decltype(values)>::value_type;\n                     std::vector<std::remove_cv_t<T>> h_values(values.size());\n                     thrust::copy_n(dh::tcbegin(values), values.size(), h_values.data());\n                     auto cat = h_values[f_local_idx];\n                     name << cat;\n                   }},\n        col);\n\n    detail::ReportMissing(policy, name.str(), f_idx);\n  }\n}\n}  // namespace enc\n"
  },
  {
    "path": "src/encoder/ordinal.h",
    "content": "/**\n * Copyright 2025, XGBoost contributors\n *\n * @brief Orindal re-coder for categorical features.\n *\n * For training with dataframes, we use the default encoding provided by the dataframe\n * implementation. However, we need a way to ensure the encoding is consistent at test\n * time, which is often not the case. This module re-codes the test data given the train\n * time encoding (mapping between categories to dense discrete integers starting from 0).\n *\n * The algorithm proceeds as follow:\n *\n * Given the categories used for training [c, b, d, a], the ordering of this list is the\n * encoding, c maps to 0, b maps to 1, so on and so forth. At test time, we receive an\n * encoding [c, a, b], which differs from the encoding used for training and we need to\n * re-code the data.\n *\n * First, we perform an `argsort` on the training categories in the increasing order,\n * obtaining a list of index: [3, 1, 0, 2], which corresponds to [a, b, c, d] as a sorted\n * list. Then we perform binary search for each category in the test time encoding [c, a,\n * b] with the training encoding as the sorted haystack. Since c is the third item of\n * sorted training encoding, we have an index 2 (0-based) for c, index 0 for a, and index\n * 1 for b. After the bianry search, we obtain a new list of index [2, 0, 1]. Using this\n * index list, we can recover the training encoding for the test dataset [0, 3, 1]. This\n * has O(NlogN) complexity with N as the number of categories (assuming the length of the\n * strings as constant). Originally, the encoding for test data set is [0, 1, 2] for [c,\n * a, b], now we have a mapping {0 -> 0, 1 -> 3, 2 -> 1} for re-coding the data.\n *\n * This module exposes 2 functions and an execution policy:\n * - @ref Recode\n * - @ref SortNames\n * Each of them has a device counterpart.\n */\n\n#pragma once\n#include <algorithm>    // for stable_sort, lower_bound\n#include <cstddef>      // for size_t\n#include <cstdint>      // for int32_t, int8_t\n#include <iterator>     // for iterator_traits, distance\n#include <numeric>      // for accumulate, iota\n#include <sstream>      // for stringstream\n#include <stdexcept>    // for logic_error\n#include <string>       // for string\n#include <tuple>        // for tuple\n#include <type_traits>  // for decay_t\n#include <utility>      // for forward\n#include <variant>      // for variant, visit\n#include <vector>       // for vector\n\n#include \"../common/transform_iterator.h\"  // for MakeIndexTransformIter\n#include \"types.h\"                         // for Overloaded, TupToVarT\n#include \"xgboost/span.h\"                  // for Span\n\nnamespace enc {\nusing xgboost::common::MakeIndexTransformIter;\nusing xgboost::common::Span;\n\nusing CatCharT = std::int8_t;\n\n/**\n * @brief String names of categorical data. Represented in the arrow StringArray format.\n */\nstruct CatStrArrayView {\n  Span<std::int32_t const> offsets;\n  Span<CatCharT const> values;\n\n  [[nodiscard]] ENC_DEVICE bool empty() const { return offsets.empty(); }  // NOLINT\n  [[nodiscard]] ENC_DEVICE std::size_t size() const {                      // NOLINT\n    return this->empty() ? 0 : this->offsets.size() - 1;\n  }\n\n  [[nodiscard]] std::size_t SizeBytes() const {\n    return this->offsets.size_bytes() + values.size_bytes();\n  }\n};\n\n// We keep a single type list here for supported types and use various transformations to\n// add specializations. This way we can modify the type list with ease.\n\n/**\n * @brief All the primitive types supported by the encoder.\n */\nusing CatPrimIndexTypes =\n    std::tuple<std::uint8_t, std::int8_t, std::uint16_t, std::int16_t, std::uint32_t, std::int32_t,\n               std::uint64_t, std::int64_t, float, double>;\n\n/**\n * @brief All the column types supported by the encoder.\n */\nusing CatIndexViewTypes =\n    decltype(std::tuple_cat(std::tuple<CatStrArrayView>{}, PrimToSpan<CatPrimIndexTypes>::Type{}));\n\n/**\n * @brief Host categories view for a single column.\n */\nusing HostCatIndexView = cpu_impl::TupToVarT<CatIndexViewTypes>;\n\n#if defined(XGBOOST_USE_CUDA)\n/**\n * @brief Device categories view for a single column.\n */\nusing DeviceCatIndexView = cuda_impl::TupToVarT<CatIndexViewTypes>;\n#endif  // defined(XGBOOST_USE_CUDA)\n\n/**\n * @brief Container for the execution policies used by the encoder.\n *\n * Accepted policies:\n *\n * - A class with a `ThrustPolicy` method that returns a thrust execution policy, along with a\n *   `ThrustAllocator` template type. In addition, a `Stream` method that returns a CUDA stream.\n *   This is only used for the GPU implementation.\n *\n * - An error handling policy that exposes a single `Error` method, which takes a single\n *   string parameter for error message.\n */\ntemplate <typename... Derived>\nstruct Policy : public Derived... {};\n\nnamespace detail {\nconstexpr std::int32_t SearchKey() { return -1; }\nconstexpr std::int32_t NotFound() { return -1; }\n\ntemplate <typename Variant>\nstruct ColumnsViewImpl {\n  using VariantT = Variant;\n\n  Span<Variant const> columns;\n\n  // Segment pointer for features, each segment represents the number of categories in a feature.\n  Span<std::int32_t const> feature_segments;\n  // The total number of cats in all features, equals feature_segments.back()\n  std::int32_t n_total_cats{0};\n\n  [[nodiscard]] std::size_t Size() const { return columns.size(); }\n  [[nodiscard]] bool Empty() const { return this->Size() == 0; }\n  [[nodiscard]] auto operator[](std::size_t i) const { return columns[i]; }\n  [[nodiscard]] auto HasCategorical() const { return n_total_cats != 0; }\n};\n\nstruct DftErrorHandler {\n  void Error(std::string &&msg) const { throw std::logic_error{std::forward<std::string>(msg)}; }\n};\n\ntemplate <typename ExecPolicy>\nvoid ReportMissing(ExecPolicy const &policy, std::string const &name, std::size_t f_idx) {\n  std::stringstream ss;\n  ss << \"Found a category not in the training set for the \" << f_idx << \"th (0-based) column: `\"\n     << name << \"`\";\n  policy.Error(ss.str());\n}\n}  // namespace detail\n\n/**\n * @brief Host view of the encoding scheme for all columns.\n */\nusing HostColumnsView = detail::ColumnsViewImpl<HostCatIndexView>;\n#if defined(XGBOOST_USE_CUDA)\n/**\n * @brief Device view of the encoding scheme for all columns.\n */\nusing DeviceColumnsView = detail::ColumnsViewImpl<DeviceCatIndexView>;\n#endif  // defined(XGBOOST_USE_CUDA)\n\nnamespace detail {\ntemplate <typename ExecPolicy, typename IndexType>\nvoid BasicChecks(ExecPolicy const &policy, detail::ColumnsViewImpl<IndexType> orig_enc,\n                 Span<std::int32_t const> sorted_idx, detail::ColumnsViewImpl<IndexType> new_enc,\n                 Span<std::int32_t> mapping) {\n  if (orig_enc.Size() != new_enc.Size()) {\n    policy.Error(\"New and old encoding should have the same number of columns.\");\n  }\n  if (static_cast<std::int32_t>(mapping.size()) != new_enc.n_total_cats) {\n    policy.Error(\"`mapping` should have the same size as `new_enc.n_total_cats`.\");\n  }\n  if (static_cast<std::int32_t>(sorted_idx.size()) != orig_enc.n_total_cats) {\n    policy.Error(\"`sorted_idx` should have the same size as `orig_enc.n_total_cats`.\");\n  }\n  if (orig_enc.feature_segments.size() != orig_enc.columns.size() + 1) {\n    policy.Error(\"Invalid original encoding.\");\n  }\n  if (new_enc.feature_segments.size() != new_enc.columns.size() + 1) {\n    policy.Error(\"Invalid new encoding.\");\n  }\n}\n}  // namespace detail\n\n/**\n * @brief The result encoding. User needs to construct it from the offsets from the new\n *        dictionary along with the mapping returned by the recode function.\n */\nstruct MappingView {\n  Span<std::int32_t const> offsets;\n  Span<std::int32_t const> mapping;\n\n  /**\n   * @brief Get the encoding for a specific feature.\n   */\n  [[nodiscard]] ENC_DEVICE auto operator[](std::size_t f_idx) const {\n    return mapping.subspan(offsets[f_idx], offsets[f_idx + 1] - offsets[f_idx]);\n  }\n  [[nodiscard]] ENC_DEVICE bool Empty() const { return offsets.empty(); }\n};\n\nnamespace cpu_impl {\ntemplate <typename InIt, typename OutIt, typename Comp>\nvoid ArgSort(InIt in_first, InIt in_last, OutIt out_first, Comp comp = std::less{}) {\n  auto n = std::distance(in_first, in_last);\n  using Idx = typename std::iterator_traits<OutIt>::value_type;\n\n  auto out_last = out_first + n;\n  std::iota(out_first, out_last, 0);\n  auto op = [&](Idx const &l, Idx const &r) {\n    return comp(in_first[l], in_first[r]);\n  };\n  std::stable_sort(out_first, out_last, op);\n}\n\n[[nodiscard]] inline std::int32_t SearchSorted(CatStrArrayView haystack,\n                                               Span<std::int32_t const> ref_sorted_idx,\n                                               Span<std::int8_t const> needle) {\n  auto it = MakeIndexTransformIter([](auto i) { return static_cast<std::int32_t>(i); });\n  auto const h_off = haystack.offsets;\n  auto const h_data = haystack.values;\n  using detail::SearchKey;\n  auto ret_it = std::lower_bound(it, it + haystack.size(), SearchKey(), [&](auto l, auto r) {\n    Span<std::int8_t const> l_str;\n    if (l == SearchKey()) {\n      l_str = needle;\n    } else {\n      auto l_idx = ref_sorted_idx[l];\n      auto l_beg = h_off[l_idx];\n      auto l_end = h_off[l_idx + 1];\n      l_str = h_data.subspan(l_beg, l_end - l_beg);\n    }\n\n    Span<std::int8_t const> r_str;\n    if (r == SearchKey()) {\n      r_str = needle;\n    } else {\n      auto r_idx = ref_sorted_idx[r];\n      auto r_beg = h_off[r_idx];\n      auto r_end = h_off[r_idx + 1];\n      r_str = h_data.subspan(r_beg, r_end - r_beg);\n    }\n\n    return l_str < r_str;\n  });\n  if (ret_it == it + haystack.size()) {\n    return detail::NotFound();\n  }\n  return *ret_it;\n}\n\ntemplate <typename T>\n[[nodiscard]] std::enable_if_t<std::is_integral_v<T> || std::is_floating_point_v<T>, std::int32_t>\nSearchSorted(Span<T const> haystack, Span<std::int32_t const> ref_sorted_idx, T needle) {\n  using detail::SearchKey;\n  auto it = MakeIndexTransformIter([](auto i) { return static_cast<std::int32_t>(i); });\n  auto ret_it = std::lower_bound(it, it + haystack.size(), SearchKey(), [&](auto l, auto r) {\n    T l_value = l == SearchKey() ? needle : haystack[ref_sorted_idx[l]];\n    T r_value = r == SearchKey() ? needle : haystack[ref_sorted_idx[r]];\n    return l_value < r_value;\n  });\n  if (ret_it == it + haystack.size()) {\n    return detail::NotFound();\n  }\n  return *ret_it;\n}\n\ntemplate <typename ExecPolicy>\nvoid SortNames(ExecPolicy const &policy, HostCatIndexView const &cats,\n               Span<std::int32_t> sorted_idx) {\n  auto it = MakeIndexTransformIter([](auto i) { return i; });\n  using T = typename std::iterator_traits<decltype(it)>::value_type;\n  auto n_categories = std::visit([](auto &&arg) { return arg.size(); }, cats);\n  if (sorted_idx.size() != n_categories) {\n    policy.Error(\"Invalid size of sorted index.\");\n  }\n  std::visit(Overloaded{[&](CatStrArrayView const &str) {\n                          cpu_impl::ArgSort(it, it + str.size(), sorted_idx.begin(), [&](T l, T r) {\n                            auto l_beg = str.offsets[l];\n                            auto l_str = str.values.subspan(l_beg, str.offsets[l + 1] - l_beg);\n\n                            auto r_beg = str.offsets[r];\n                            auto r_str = str.values.subspan(r_beg, str.offsets[r + 1] - r_beg);\n\n                            return l_str < r_str;\n                          });\n                        },\n                        [&](auto &&values) {\n                          cpu_impl::ArgSort(it, it + values.size(), sorted_idx.begin(),\n                                            [&](T l, T r) { return values[l] < values[r]; });\n                        }},\n             cats);\n}\n}  // namespace cpu_impl\n\n/**\n * @brief Sort the categories for the training set. Returns a list of sorted index.\n *\n * @tparam ExecPolicy The @ref Policy class, only an error policy is needed for the CPU\n *                    implementation.\n *\n * @param policy     The execution policy.\n * @param orig_enc   The encoding scheme of the training set.\n * @param sorted_idx The output sorted index.\n */\ntemplate <typename ExecPolicy>\nvoid SortNames(ExecPolicy const &policy, HostColumnsView orig_enc, Span<std::int32_t> sorted_idx) {\n  if (static_cast<std::int32_t>(sorted_idx.size()) != orig_enc.n_total_cats) {\n    policy.Error(\"`sorted_idx` should have the same size as `n_total_cats`.\");\n  }\n  for (std::size_t f_idx = 0, n = orig_enc.Size(); f_idx < n; ++f_idx) {\n    auto beg = orig_enc.feature_segments[f_idx];\n    auto f_sorted_idx = sorted_idx.subspan(beg, orig_enc.feature_segments[f_idx + 1] - beg);\n    cpu_impl::SortNames(policy, orig_enc.columns[f_idx], f_sorted_idx);\n  }\n}\n\n/**\n * @brief Default exection policy for the host implementation. Users are expected to\n *        customize it.\n */\nusing DftHostPolicy = Policy<detail::DftErrorHandler>;\n\n/**\n * @brief Calculate a mapping for recoding the data given old and new encoding.\n *\n * @tparam ExecPolicy The @ref Policy class, only an error policy is needed for the CPU\n *                    implementation.\n *\n * @param policy     The execution policy.\n * @param orig_enc   The encoding scheme of the training set.\n * @param sorted_idx The sorted index of the training set encoding scheme, produced by\n *                   @ref SortNames .\n * @param new_enc    The scheme that needs to be recoded.\n * @param mapping    The output mapping.\n */\ntemplate <typename ExecPolicy>\nvoid Recode(ExecPolicy const &policy, HostColumnsView orig_enc, Span<std::int32_t const> sorted_idx,\n            HostColumnsView new_enc, Span<std::int32_t> mapping) {\n  detail::BasicChecks(policy, orig_enc, sorted_idx, new_enc, mapping);\n\n  std::size_t out_idx = 0;\n  for (std::size_t f_idx = 0, n_features = orig_enc.Size(); f_idx < n_features; f_idx++) {\n    auto const& l_f = orig_enc.columns[f_idx];\n    auto const& r_f = new_enc.columns[f_idx];\n    auto report = [&] {\n      std::stringstream ss;\n      ss << \"Invalid new DataFrame input for the: \" << f_idx << \"th feature (0-based). \"\n         << \"The data type doesn't match the one used in the training dataset. \"\n         << \"Both should be either numeric or categorical. For a categorical feature, the index \"\n            \"type must match between the training and test set.\";\n      policy.Error(ss.str());\n    };\n    if (l_f.index() != r_f.index()) {\n      report();\n    }\n    bool is_empty = std::visit([](auto &&arg) { return arg.empty(); }, l_f);\n    bool new_is_empty = std::visit([](auto &&arg) { return arg.empty(); }, r_f);\n    if (is_empty != new_is_empty) {\n      report();\n    }\n    if (is_empty) {\n      continue;\n    }\n\n    auto f_beg = orig_enc.feature_segments[f_idx];\n    auto ref_sorted_idx = sorted_idx.subspan(f_beg, orig_enc.feature_segments[f_idx + 1] - f_beg);\n\n    auto n_new_categories =\n        std::visit([](auto &&arg) { return arg.size(); }, new_enc.columns[f_idx]);\n    std::vector<std::int32_t> searched_idx(n_new_categories, -1);\n    auto const &col = new_enc.columns[f_idx];\n    std::visit(Overloaded{[&](CatStrArrayView const &str) {\n                            for (std::size_t j = 1, m = n_new_categories + 1; j < m; ++j) {\n                              auto begin = str.offsets[j - 1];\n                              auto end = str.offsets[j];\n                              auto needle = str.values.subspan(begin, end - begin);\n                              searched_idx[j - 1] = cpu_impl::SearchSorted(\n                                  std::get<CatStrArrayView>(orig_enc.columns[f_idx]),\n                                  ref_sorted_idx, needle);\n                              if (searched_idx[j - 1] == detail::NotFound()) {\n                                std::stringstream ss;\n                                for (auto c : needle) {\n                                  ss.put(c);\n                                }\n                                detail::ReportMissing(policy, ss.str(), f_idx);\n                              }\n                            }\n                          },\n                          [&](auto &&values) {\n                            using T = typename std::decay_t<decltype(values)>::value_type;\n                            for (std::size_t j = 0; j < n_new_categories; ++j) {\n                              auto needle = values[j];\n                              searched_idx[j] = cpu_impl::SearchSorted(\n                                  std::get<Span<std::add_const_t<T>>>(orig_enc.columns[f_idx]),\n                                  ref_sorted_idx, needle);\n                              if (searched_idx[j] == detail::NotFound()) {\n                                std::stringstream ss;\n                                ss << needle;\n                                detail::ReportMissing(policy, ss.str(), f_idx);\n                              }\n                            }\n                          }},\n               col);\n\n    for (auto i : searched_idx) {\n      auto idx = ref_sorted_idx[i];\n      mapping[out_idx++] = idx;\n    }\n  }\n}\n\ninline std::ostream &operator<<(std::ostream &os, CatStrArrayView const &strings) {\n  auto const &offset = strings.offsets;\n  auto const &data = strings.values;\n  os << \"[\";\n  for (std::size_t i = 1, n = offset.size(); i < n; ++i) {\n    auto begin = offset[i - 1];\n    auto end = offset[i];\n    auto str = data.subspan(begin, end - begin);\n    for (auto v : str) {\n      os.put(v);\n    }\n    if (i != n - 1) {\n      os << \", \";\n    }\n  }\n  os << \"]\";\n  return os;\n}\n\ninline std::ostream &operator<<(std::ostream &os, HostColumnsView const &h_enc) {\n  for (std::size_t i = 0; i < h_enc.columns.size(); ++i) {\n    auto const &col = h_enc.columns[i];\n    os << \"f\" << i << \": \";\n    std::visit(enc::Overloaded{[&](enc::CatStrArrayView const &str) { os << str; },\n                               [&](auto &&values) {\n                                 os << \"[\";\n                                 for (std::size_t j = 0, n = values.size(); j < n; ++j) {\n                                   os << values[j];\n                                   if (j != n - 1) {\n                                     os << \", \";\n                                   }\n                                 }\n                                 os << \"]\";\n                               }},\n               col);\n    os << std::endl;\n  }\n  return os;\n}\n}  // namespace enc\n"
  },
  {
    "path": "src/encoder/types.h",
    "content": "/**\n * Copyright 2024, XGBoost contributors\n */\n#pragma once\n\n#if defined(__CUDA__) || defined(__NVCC__)\n#define ENC_DEVICE __host__ __device__\n#else\n#define ENC_DEVICE\n#endif  // defined (__CUDA__) || defined(__NVCC__)\n\n#include <tuple>    // for tuple\n#include <variant>  // for variant\n\n#include \"xgboost/span.h\"  // for Span\n\n#if defined(XGBOOST_USE_CUDA)\n\n#include <cuda/std/variant>  // for variant\n\n#endif  // defined(XGBOOST_USE_CUDA)\n\nnamespace enc {\ntemplate <typename... Ts>\nstruct Overloaded : Ts... {\n  using Ts::operator()...;\n};\n\ntemplate <typename... Ts>\nENC_DEVICE Overloaded(Ts...) -> Overloaded<Ts...>;\n\n// Whether a type is a member of a type list (a.k.a tuple).\ntemplate <typename... Ts>\nstruct MemberOf;\n\ntemplate <typename T, typename... Ts>\nstruct MemberOf<T, std::tuple<Ts...>> : public std::disjunction<std::is_same<T, Ts>...> {};\n\n// Convert primitive types to span types.\ntemplate <typename... Ts>\nstruct PrimToSpan;\n\ntemplate <typename... Ts>\nstruct PrimToSpan<std::tuple<Ts...>> {\n  using Type = std::tuple<xgboost::common::Span<std::add_const_t<Ts>>...>;\n};\n\nnamespace cpu_impl {\n// Convert tuple of types to variant of types.\ntemplate <typename... Ts>\nstruct TupToVar;\n\ntemplate <typename... Ts>\nstruct TupToVar<std::tuple<Ts...>> {\n  using Type = std::variant<Ts...>;\n};\n\ntemplate <typename... Ts>\nusing TupToVarT = typename TupToVar<Ts...>::Type;\n}  // namespace cpu_impl\n\n#if defined(XGBOOST_USE_CUDA)\nnamespace cuda_impl {\n// Convert tuple of types to CUDA variant of types.\ntemplate <typename... Ts>\nstruct TupToVar {};\n\ntemplate <typename... Ts>\nstruct TupToVar<std::tuple<Ts...>> {\n  using Type = cuda::std::variant<Ts...>;\n};\n\ntemplate <typename... Ts>\nusing TupToVarT = typename TupToVar<Ts...>::Type;\n}  // namespace cuda_impl\n#endif  // defined(XGBOOST_USE_CUDA)\n}  // namespace enc\n"
  },
  {
    "path": "src/gbm/gblinear.cc",
    "content": "/**\n * Copyright 2014-2025, XGBoost Contributors\n * \\file gblinear.cc\n * \\brief Implementation of Linear booster, with L1/L2 regularization: Elastic Net\n *        the update rule is parallel coordinate descent (shotgun)\n * \\author Tianqi Chen\n */\n#include <dmlc/omp.h>\n#include <dmlc/parameter.h>\n\n#include <algorithm>\n#include <numeric>\n#include <string>\n#include <vector>\n\n#include \"../common/error_msg.h\"      // NoCategorical, DeprecatedFunc\n#include \"../common/threading_utils.h\"\n#include \"../common/timer.h\"\n#include \"gblinear_model.h\"\n#include \"xgboost/gbm.h\"\n#include \"xgboost/json.h\"\n#include \"xgboost/learner.h\"\n#include \"xgboost/linalg.h\"\n#include \"xgboost/linear_updater.h\"\n#include \"xgboost/logging.h\"\n#include \"xgboost/predictor.h\"\n\nnamespace xgboost::gbm {\nDMLC_REGISTRY_FILE_TAG(gblinear);\n\n// training parameters\nstruct GBLinearTrainParam : public XGBoostParameter<GBLinearTrainParam> {\n  std::string updater;\n  float tolerance;\n  size_t max_row_perbatch;\n\n  DMLC_DECLARE_PARAMETER(GBLinearTrainParam) {\n    DMLC_DECLARE_FIELD(updater)\n        .set_default(\"shotgun\")\n        .describe(\"Update algorithm for linear model. One of shotgun/coord_descent\");\n    DMLC_DECLARE_FIELD(tolerance)\n        .set_lower_bound(0.0f)\n        .set_default(0.0f)\n        .describe(\"Stop if largest weight update is smaller than this number.\");\n    DMLC_DECLARE_FIELD(max_row_perbatch)\n        .set_default(std::numeric_limits<size_t>::max())\n        .describe(\"Maximum rows per batch.\");\n  }\n};\n\nvoid LinearCheckLayer(unsigned layer_begin) {\n  CHECK_EQ(layer_begin, 0) << \"Linear booster does not support prediction range.\";\n}\n\n/*!\n * \\brief gradient boosted linear model\n */\nclass GBLinear : public GradientBooster {\n public:\n  explicit GBLinear(LearnerModelParam const* learner_model_param, Context const* ctx)\n      : GradientBooster{ctx},\n        learner_model_param_{learner_model_param},\n        model_{learner_model_param},\n        previous_model_{learner_model_param} {\n    monitor_.Init(__func__);\n  }\n\n  void Configure(const Args& cfg) override {\n    if (model_.weight.size() == 0) {\n      model_.Configure(cfg);\n    }\n    param_.UpdateAllowUnknown(cfg);\n    if (param_.updater == \"gpu_coord_descent\") {\n      LOG(FATAL) << error::DeprecatedFunc(\"gpu_coord_descent\", \"2.0.0\",\n                                          R\"(device=\"cuda\", updater=\"coord_descent\")\");\n    }\n\n    auto name = (param_.updater == \"coord_descent\")\n                    // Dispatch for coordinate descent\n                    ? this->ctx_->DispatchDevice([] { return \"coord_descent\"; },\n                                                 [] { return \"gpu_coord_descent\"; })\n                    : param_.updater;\n    LOG(INFO) << \"Using the updater:\" << name;\n\n    updater_.reset(LinearUpdater::Create(name, ctx_));\n    updater_->Configure(cfg);\n  }\n\n  int32_t BoostedRounds() const override {\n    return model_.num_boosted_rounds;\n  }\n\n  bool ModelFitted() const override { return BoostedRounds() != 0; }\n\n  void SaveModel(Json* p_out) const override {\n    auto& out = *p_out;\n    out[\"name\"] = String{\"gblinear\"};\n\n    out[\"model\"] = Object();\n    auto& model = out[\"model\"];\n    model_.SaveModel(&model);\n  }\n  void LoadModel(Json const& in) override {\n    CHECK_EQ(get<String>(in[\"name\"]), \"gblinear\");\n    auto const& model = in[\"model\"];\n    model_.LoadModel(model);\n  }\n\n  void LoadConfig(Json const& in) override {\n    CHECK_EQ(get<String>(in[\"name\"]), \"gblinear\");\n    FromJson(in[\"gblinear_train_param\"], &param_);\n    updater_.reset(LinearUpdater::Create(param_.updater, ctx_));\n    this->updater_->LoadConfig(in[\"updater\"]);\n  }\n  void SaveConfig(Json* p_out) const override {\n    auto& out = *p_out;\n    out[\"name\"] = String{\"gblinear\"};\n    out[\"gblinear_train_param\"] = ToJson(param_);\n\n    out[\"updater\"] = Object();\n    auto& j_updater = out[\"updater\"];\n    CHECK(this->updater_);\n    this->updater_->SaveConfig(&j_updater);\n  }\n\n  void DoBoost(DMatrix* p_fmat, GradientContainer* in_gpair, PredictionCacheEntry*,\n               ObjFunction const*) override {\n    if (in_gpair->HasValueGrad()) {\n      LOG(FATAL)\n          << \"Multi-target with reduced gradient is not implemented for the current booster.\";\n    }\n\n    monitor_.Start(\"DoBoost\");\n\n    CHECK(!p_fmat->Info().HasCategorical()) << error::NoCategorical(\"`gblinear`\");\n    model_.LazyInitModel();\n    this->LazySumWeights(p_fmat);\n\n    if (!this->CheckConvergence()) {\n      updater_->Update(in_gpair->Grad(), p_fmat, &model_, sum_instance_weight_);\n    }\n    model_.num_boosted_rounds++;\n    monitor_.Stop(\"DoBoost\");\n  }\n\n  void PredictBatch(DMatrix* p_fmat, PredictionCacheEntry* predts, bool /*training*/,\n                    bst_layer_t layer_begin, bst_layer_t) override {\n    monitor_.Start(\"PredictBatch\");\n    LinearCheckLayer(layer_begin);\n    auto* out_preds = &predts->predictions;\n    this->PredictBatchInternal(p_fmat, &out_preds->HostVector());\n    monitor_.Stop(\"PredictBatch\");\n  }\n\n  void PredictLeaf(DMatrix *, HostDeviceVector<bst_float> *, unsigned, unsigned) override {\n    LOG(FATAL) << \"gblinear does not support prediction of leaf index\";\n  }\n\n  void PredictContribution(DMatrix* p_fmat, HostDeviceVector<bst_float>* out_contribs,\n                           bst_layer_t layer_begin, bst_layer_t /*layer_end*/, bool) override {\n    model_.LazyInitModel();\n    LinearCheckLayer(layer_begin);\n    auto base_margin = p_fmat->Info().base_margin_.View(DeviceOrd::CPU());\n    const int ngroup = model_.learner_model_param->num_output_group;\n    const size_t ncolumns = model_.learner_model_param->num_feature + 1;\n    // allocate space for (#features + bias) times #groups times #rows\n    std::vector<bst_float>& contribs = out_contribs->HostVector();\n    contribs.resize(p_fmat->Info().num_row_ * ncolumns * ngroup);\n    // make sure contributions is zeroed, we could be reusing a previously allocated one\n    std::fill(contribs.begin(), contribs.end(), 0);\n    auto base_score = learner_model_param_->BaseScore(ctx_);\n    // start collecting the contributions\n    for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {\n      // parallel over local batch\n      const auto nsize = static_cast<bst_omp_uint>(batch.Size());\n      auto page = batch.GetView();\n      common::ParallelFor(nsize, ctx_->Threads(), [&](bst_omp_uint i) {\n        auto inst = page[i];\n        auto row_idx = static_cast<size_t>(batch.base_rowid + i);\n        // loop over output groups\n        for (int gid = 0; gid < ngroup; ++gid) {\n          bst_float *p_contribs = &contribs[(row_idx * ngroup + gid) * ncolumns];\n          // calculate linear terms' contributions\n          for (auto& ins : inst) {\n            if (ins.index >= model_.learner_model_param->num_feature) continue;\n            p_contribs[ins.index] = ins.fvalue * model_[ins.index][gid];\n          }\n          // add base margin to BIAS\n          p_contribs[ncolumns - 1] =\n              model_.Bias()[gid] +\n              ((base_margin.Size() != 0) ? base_margin(row_idx, gid) : base_score(0));\n        }\n      });\n    }\n  }\n\n  void PredictInteractionContributions(DMatrix* p_fmat, HostDeviceVector<float>* out_contribs,\n                                       bst_layer_t layer_begin, bst_layer_t /*layer_end*/,\n                                       bool) override {\n    LinearCheckLayer(layer_begin);\n    std::vector<bst_float>& contribs = out_contribs->HostVector();\n\n    // linear models have no interaction effects\n    const size_t nelements = model_.learner_model_param->num_feature *\n                             model_.learner_model_param->num_feature;\n    contribs.resize(p_fmat->Info().num_row_ * nelements *\n                    model_.learner_model_param->num_output_group);\n    std::fill(contribs.begin(), contribs.end(), 0);\n  }\n\n  [[nodiscard]] std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats,\n                                                   std::string format) const override {\n    return model_.DumpModel(fmap, with_stats, format);\n  }\n\n  void FeatureScore(std::string const &importance_type,\n                    common::Span<int32_t const> trees,\n                    std::vector<bst_feature_t> *out_features,\n                    std::vector<float> *out_scores) const override {\n    CHECK(!model_.weight.empty()) << \"Model is not initialized\";\n    CHECK(trees.empty()) << \"gblinear doesn't support number of trees for feature importance.\";\n    CHECK_EQ(importance_type, \"weight\")\n        << \"gblinear only has `weight` defined for feature importance.\";\n    out_features->resize(this->learner_model_param_->num_feature, 0);\n    std::iota(out_features->begin(), out_features->end(), 0);\n    // Don't include the bias term in the feature importance scores\n    // The bias is the last weight\n    out_scores->resize(model_.weight.size() - learner_model_param_->num_output_group, 0);\n    auto n_groups = learner_model_param_->num_output_group;\n    auto scores = linalg::MakeTensorView(DeviceOrd::CPU(),\n                                         common::Span{out_scores->data(), out_scores->size()},\n                                         learner_model_param_->num_feature, n_groups);\n    for (size_t i = 0; i < learner_model_param_->num_feature; ++i) {\n      for (bst_group_t g = 0; g < n_groups; ++g) {\n        scores(i, g) = model_[i][g];\n      }\n    }\n  }\n\n protected:\n  void PredictBatchInternal(DMatrix *p_fmat,\n                            std::vector<bst_float> *out_preds) {\n    monitor_.Start(\"PredictBatchInternal\");\n    model_.LazyInitModel();\n    std::vector<bst_float> &preds = *out_preds;\n    auto base_margin = p_fmat->Info().base_margin_.View(DeviceOrd::CPU());\n    // start collecting the prediction\n    const int ngroup = model_.learner_model_param->num_output_group;\n    preds.resize(p_fmat->Info().num_row_ * ngroup);\n\n    auto base_score = learner_model_param_->BaseScore(DeviceOrd::CPU());\n    for (const auto &page : p_fmat->GetBatches<SparsePage>()) {\n      auto const& batch = page.GetView();\n      // output convention: nrow * k, where nrow is number of rows\n      // k is number of group\n      // parallel over local batch\n      const auto nsize = static_cast<omp_ulong>(batch.Size());\n      if (base_margin.Size() != 0) {\n        CHECK_EQ(base_margin.Size(), nsize * ngroup);\n      }\n      common::ParallelFor(nsize, ctx_->Threads(), [&](omp_ulong i) {\n        const size_t ridx = page.base_rowid + i;\n        // loop over output groups\n        for (int gid = 0; gid < ngroup; ++gid) {\n          float margin = (base_margin.Size() != 0) ? base_margin(ridx, gid) : base_score(0);\n          this->Pred(batch[i], &preds[ridx * ngroup], gid, margin);\n        }\n      });\n    }\n    monitor_.Stop(\"PredictBatchInternal\");\n  }\n\n  bool CheckConvergence() {\n    if (param_.tolerance == 0.0f) return false;\n    if (is_converged_) return true;\n    if (previous_model_.weight.size() != model_.weight.size()) {\n      previous_model_ = model_;\n      return false;\n    }\n    float largest_dw = 0.0;\n    for (size_t i = 0; i < model_.weight.size(); i++) {\n      largest_dw = std::max(\n          largest_dw, std::abs(model_.weight[i] - previous_model_.weight[i]));\n    }\n    previous_model_ = model_;\n\n    is_converged_ = largest_dw <= param_.tolerance;\n    return is_converged_;\n  }\n\n  void LazySumWeights(DMatrix *p_fmat) {\n    if (!sum_weight_complete_) {\n      auto &info = p_fmat->Info();\n      for (size_t i = 0; i < info.num_row_; i++) {\n        sum_instance_weight_ += info.GetWeight(i);\n      }\n      sum_weight_complete_ = true;\n    }\n  }\n\n  void Pred(const SparsePage::Inst &inst, bst_float *preds, int gid,\n            bst_float base) {\n    bst_float psum = model_.Bias()[gid] + base;\n    for (const auto& ins : inst) {\n      if (ins.index >= model_.learner_model_param->num_feature) continue;\n      psum += ins.fvalue * model_[ins.index][gid];\n    }\n    preds[gid] = psum;\n  }\n\n  // biase margin score\n  LearnerModelParam const* learner_model_param_;\n  // model field\n  GBLinearModel model_;\n  GBLinearModel previous_model_;\n  GBLinearTrainParam param_;\n  std::unique_ptr<LinearUpdater> updater_;\n  double sum_instance_weight_{};\n  bool sum_weight_complete_{false};\n  common::Monitor monitor_;\n  bool is_converged_{false};\n};\n\n// register the objective functions\nDMLC_REGISTER_PARAMETER(GBLinearTrainParam);\n\nXGBOOST_REGISTER_GBM(GBLinear, \"gblinear\")\n    .describe(\"Linear booster, implement generalized linear model.\")\n    .set_body([](LearnerModelParam const* booster_config, Context const* ctx) {\n      return new GBLinear(booster_config, ctx);\n    });\n}  // namespace xgboost::gbm\n"
  },
  {
    "path": "src/gbm/gblinear_model.cc",
    "content": "/**\n * Copyright 2019-2025, XGBoost Contributors\n */\n#include <algorithm>\n#include <utility>\n#include \"xgboost/json.h\"\n#include \"gblinear_model.h\"\n\nnamespace xgboost::gbm {\nvoid GBLinearModel::SaveModel(Json* p_out) const {\n  auto& out = *p_out;\n\n  size_t const n_weights = weight.size();\n  F32Array j_weights{n_weights};\n  std::copy(weight.begin(), weight.end(), j_weights.GetArray().begin());\n  out[\"weights\"] = std::move(j_weights);\n  out[\"boosted_rounds\"] = Json{this->num_boosted_rounds};\n}\n\nvoid GBLinearModel::LoadModel(Json const& in) {\n  auto const& obj = get<Object const>(in);\n  auto weight_it = obj.find(\"weights\");\n  if (IsA<F32Array>(weight_it->second)) {\n    auto const& j_weights = get<F32Array const>(weight_it->second);\n    weight.resize(j_weights.size());\n    std::copy(j_weights.begin(), j_weights.end(), weight.begin());\n  } else {\n    auto const& j_weights = get<Array const>(weight_it->second);\n    auto n_weights = j_weights.size();\n    weight.resize(n_weights);\n    for (size_t i = 0; i < n_weights; ++i) {\n      weight[i] = get<Number const>(j_weights[i]);\n    }\n  }\n\n  auto boosted_rounds = obj.find(\"boosted_rounds\");\n  if (boosted_rounds != obj.cend()) {\n    this->num_boosted_rounds = get<Integer const>(boosted_rounds->second);\n  } else {\n    this->num_boosted_rounds = 0;\n  }\n}\n}  // namespace xgboost::gbm\n"
  },
  {
    "path": "src/gbm/gblinear_model.h",
    "content": "/**\n * Copyright 2018-2025, XGBoost Contributors\n */\n#pragma once\n#include <dmlc/io.h>\n#include <dmlc/parameter.h>\n#include <xgboost/learner.h>\n\n#include <vector>\n#include <string>\n#include <cstring>\n\n#include \"xgboost/base.h\"\n#include \"xgboost/feature_map.h\"\n#include \"xgboost/model.h\"\n#include \"xgboost/json.h\"\n\nnamespace xgboost {\nclass Json;\nnamespace gbm {\n// model for linear booster\nclass GBLinearModel : public Model {\n public:\n  std::int32_t num_boosted_rounds{0};\n  LearnerModelParam const* learner_model_param;\n\n public:\n  explicit GBLinearModel(LearnerModelParam const *learner_model_param)\n      : learner_model_param{learner_model_param} {}\n  void Configure(Args const &) { }\n\n  // weight for each of feature, bias is the last one\n  std::vector<bst_float> weight;\n  // initialize the model parameter\n  inline void LazyInitModel() {\n    if (!weight.empty()) {\n      return;\n    }\n    // bias is the last weight\n    weight.resize((learner_model_param->num_feature + 1) *\n                  learner_model_param->num_output_group);\n    std::fill(weight.begin(), weight.end(), 0.0f);\n  }\n\n  void SaveModel(Json *p_out) const override;\n  void LoadModel(Json const &in) override;\n\n  // model bias\n  inline bst_float *Bias() {\n    return &weight[learner_model_param->num_feature *\n                   learner_model_param->num_output_group];\n  }\n  inline const bst_float *Bias() const {\n    return &weight[learner_model_param->num_feature *\n                   learner_model_param->num_output_group];\n  }\n  // get i-th weight\n  inline bst_float *operator[](size_t i) {\n    return &weight[i * learner_model_param->num_output_group];\n  }\n  inline const bst_float *operator[](size_t i) const {\n    return &weight[i * learner_model_param->num_output_group];\n  }\n\n  std::vector<std::string> DumpModel(const FeatureMap &, bool,\n                                     std::string format) const {\n    const int ngroup = learner_model_param->num_output_group;\n    const unsigned nfeature = learner_model_param->num_feature;\n\n    std::stringstream fo(\"\");\n    if (format == \"json\") {\n      fo << \"  { \\\"bias\\\": [\" << std::endl;\n      for (int gid = 0; gid < ngroup; ++gid) {\n        if (gid != 0) {\n          fo << \",\" << std::endl;\n        }\n        fo << \"      \" << this->Bias()[gid];\n      }\n      fo << std::endl\n         << \"    ],\" << std::endl\n         << \"    \\\"weight\\\": [\" << std::endl;\n      for (unsigned i = 0; i < nfeature; ++i) {\n        for (int gid = 0; gid < ngroup; ++gid) {\n          if (i != 0 || gid != 0) {\n            fo << \",\" << std::endl;\n          }\n          fo << \"      \" << (*this)[i][gid];\n        }\n      }\n      fo << std::endl << \"    ]\" << std::endl << \"  }\";\n    } else if (format == \"text\") {\n      fo << \"bias:\\n\";\n      for (int gid = 0; gid < ngroup; ++gid) {\n        fo << this->Bias()[gid] << std::endl;\n      }\n      fo << \"weight:\\n\";\n      for (unsigned i = 0; i < nfeature; ++i) {\n        for (int gid = 0; gid < ngroup; ++gid) {\n          fo << (*this)[i][gid] << std::endl;\n        }\n      }\n    } else {\n      LOG(FATAL) << \"Dump format `\" << format << \"` is not supported by the gblinear model.\";\n    }\n    std::vector<std::string> v;\n    v.push_back(fo.str());\n    return v;\n  }\n};\n\n}  // namespace gbm\n}  // namespace xgboost\n"
  },
  {
    "path": "src/gbm/gbm.cc",
    "content": "/**\n * Copyright 2015-2025, XGBoost Contributors\n * \\file gbm.cc\n * \\brief Registry of gradient boosters.\n */\n#include \"xgboost/gbm.h\"\n\n#include <dmlc/registry.h>\n\n#include <string>\n\n#include \"xgboost/context.h\"\n#include \"xgboost/learner.h\"\n\nnamespace dmlc {\nDMLC_REGISTRY_ENABLE(::xgboost::GradientBoosterReg);\n}  // namespace dmlc\n\nnamespace xgboost {\nGradientBooster* GradientBooster::Create(const std::string& name, Context const* ctx,\n                                         LearnerModelParam const* learner_model_param) {\n  auto const& gbm_name = name == \"dart\" ? std::string{\"gbtree\"} : name;\n  auto* e = ::dmlc::Registry<::xgboost::GradientBoosterReg>::Get()->Find(gbm_name);\n  if (e == nullptr) {\n    LOG(FATAL) << \"Unknown gbm type \" << name;\n  }\n  auto p_bst = (e->body)(learner_model_param, ctx);\n  return p_bst;\n}\n}  // namespace xgboost\n\nnamespace xgboost {\nnamespace gbm {\n// List of files that will be force linked in static links.\nDMLC_REGISTRY_LINK_TAG(gblinear);\nDMLC_REGISTRY_LINK_TAG(gbtree);\n}  // namespace gbm\n}  // namespace xgboost\n"
  },
  {
    "path": "src/gbm/gbtree.cc",
    "content": "/**\n * Copyright 2014-2026, XGBoost Contributors\n *\n * \\file gbtree.cc\n * \\brief gradient boosted tree implementation.\n * \\author Tianqi Chen\n */\n#include \"gbtree.h\"\n\n#include <dmlc/omp.h>\n#include <dmlc/parameter.h>\n\n#include <algorithm>  // for equal\n#include <cstdint>    // for uint32_t\n#include <memory>\n#include <string>\n#include <utility>\n#include <vector>\n\n#include \"../common/common.h\"\n#include \"../common/cuda_rt_utils.h\"  // for AllVisibleGPUs\n#include \"../common/error_msg.h\"  // for UnknownDevice, WarnOldSerialization, InplacePredictProxy\n#include \"../common/threading_utils.h\"\n#include \"../common/timer.h\"\n#include \"../data/proxy_dmatrix.h\"  // for DMatrixProxy, HostAdapterDispatch\n#include \"gbtree_model.h\"\n#include \"xgboost/base.h\"\n#include \"xgboost/data.h\"\n#include \"xgboost/gbm.h\"\n#include \"xgboost/host_device_vector.h\"\n#include \"xgboost/json.h\"\n#include \"xgboost/logging.h\"\n#include \"xgboost/model.h\"\n#include \"xgboost/objective.h\"\n#include \"xgboost/predictor.h\"\n#include \"xgboost/string_view.h\"  // for StringView\n#include \"xgboost/tree_model.h\"   // for RegTree\n#include \"xgboost/tree_updater.h\"\n\nnamespace xgboost::gbm {\nDMLC_REGISTRY_FILE_TAG(gbtree);\n\nnamespace {\n/** @brief Map the `tree_method` parameter to the `updater` parameter. */\nstd::string MapTreeMethodToUpdaters(Context const* ctx, TreeMethod tree_method) {\n  // Choose updaters according to tree_method parameters\n  if (ctx->IsCUDA()) {\n    common::AssertGPUSupport();\n  }\n\n  switch (tree_method) {\n    case TreeMethod::kAuto:  // Use hist as default in 2.0\n    case TreeMethod::kHist: {\n      return ctx->DispatchDevice([] { return \"grow_quantile_histmaker\"; },\n                                 [] { return \"grow_gpu_hist\"; },\n                                 [] { return \"grow_quantile_histmaker_sycl\"; });\n    }\n    case TreeMethod::kApprox: {\n      return ctx->DispatchDevice([] { return \"grow_histmaker\"; }, [] { return \"grow_gpu_approx\"; });\n    }\n    case TreeMethod::kExact:\n      CHECK(ctx->IsCPU()) << \"The `exact` tree method is not supported on GPU.\";\n      return \"grow_colmaker,prune\";\n    default:\n      auto tm = static_cast<std::underlying_type_t<TreeMethod>>(tree_method);\n      LOG(FATAL) << \"Unknown tree_method: `\" << tm << \"`.\";\n  }\n\n  LOG(FATAL) << \"unreachable\";\n  return \"\";\n}\n\nbool UpdatersMatched(std::vector<std::string> updater_seq,\n                     std::vector<std::unique_ptr<TreeUpdater>> const& updaters) {\n  if (updater_seq.size() != updaters.size()) {\n    return false;\n  }\n\n  return std::equal(updater_seq.cbegin(), updater_seq.cend(), updaters.cbegin(),\n                    [](std::string const& name, std::unique_ptr<TreeUpdater> const& up) {\n                      return name == up->Name();\n                    });\n}\n}  // namespace\n\nvoid GBTree::Configure(Args const& cfg) {\n  tparam_.UpdateAllowUnknown(cfg);\n  dparam_.UpdateAllowUnknown(cfg);\n  tree_param_.UpdateAllowUnknown(cfg);\n\n  model_.Configure(cfg);\n\n  // for the 'update' process_type, move trees into trees_to_update\n  if (tparam_.process_type == TreeProcessType::kUpdate) {\n    model_.InitTreesToUpdate();\n  }\n\n  // configure predictors\n  if (!cpu_predictor_) {\n    cpu_predictor_ = std::unique_ptr<Predictor>(Predictor::Create(\"cpu_predictor\", this->ctx_));\n  }\n  cpu_predictor_->Configure(cfg);\n#if defined(XGBOOST_USE_CUDA)\n  auto n_gpus = curt::AllVisibleGPUs();\n  if (!gpu_predictor_) {\n    gpu_predictor_ = std::unique_ptr<Predictor>(Predictor::Create(\"gpu_predictor\", this->ctx_));\n  }\n  if (n_gpus != 0) {\n    gpu_predictor_->Configure(cfg);\n  }\n#endif  // defined(XGBOOST_USE_CUDA)\n\n#if defined(XGBOOST_USE_SYCL)\n  if (!sycl_predictor_) {\n    sycl_predictor_ = std::unique_ptr<Predictor>(Predictor::Create(\"sycl_predictor\", this->ctx_));\n  }\n  sycl_predictor_->Configure(cfg);\n#endif  // defined(XGBOOST_USE_SYCL)\n\n  // `updater` parameter was manually specified\n  specified_updater_ =\n      std::any_of(cfg.cbegin(), cfg.cend(), [](auto const& arg) { return arg.first == \"updater\"; });\n  if (specified_updater_) {\n    error::WarnManualUpdater();\n  }\n  LOG(DEBUG) << \"Using tree method: \" << static_cast<int>(tparam_.tree_method);\n\n  if (!specified_updater_) {\n    this->tparam_.updater_seq = MapTreeMethodToUpdaters(ctx_, tparam_.tree_method);\n  }\n\n  auto up_names = common::Split(tparam_.updater_seq, ',');\n  if (!UpdatersMatched(up_names, updaters_)) {\n    updaters_.clear();\n    for (auto const& name : up_names) {\n      std::unique_ptr<TreeUpdater> up(\n          TreeUpdater::Create(name.c_str(), ctx_, &model_.learner_model_param->task));\n      updaters_.push_back(std::move(up));\n    }\n  }\n\n  for (auto& up : updaters_) {\n    up->Configure(cfg);\n  }\n}\n\nvoid GBTreeModel::InitTreesToUpdate() {\n  if (trees_to_update.empty()) {\n    for (auto& tree : trees) {\n      trees_to_update.push_back(std::move(tree));\n    }\n\n    trees.clear();\n    param.num_trees = 0;\n    tree_info.HostVector().clear();\n\n    iteration_indptr.clear();\n    iteration_indptr.push_back(0);\n  }\n}\n\nvoid GPUCopyGradient(Context const*, linalg::Matrix<GradientPair> const*, bst_group_t,\n                     linalg::Matrix<GradientPair>*)\n#if defined(XGBOOST_USE_CUDA)\n    ;  // NOLINT\n#else\n{\n  common::AssertGPUSupport();\n}\n#endif\n\nvoid CopyGradient(Context const* ctx, linalg::Matrix<GradientPair> const* in_gpair,\n                  bst_group_t group_id, linalg::Matrix<GradientPair>* out_gpair) {\n  out_gpair->SetDevice(ctx->Device());\n  out_gpair->Reshape(in_gpair->Shape(0), 1);\n  if (ctx->IsCUDA()) {\n    GPUCopyGradient(ctx, in_gpair, group_id, out_gpair);\n  } else {\n    auto const& in = *in_gpair;\n    auto h_tmp = out_gpair->HostView();\n    auto h_in = in.HostView().Slice(linalg::All(), group_id);\n    CHECK_EQ(h_tmp.Size(), h_in.Size());\n    common::ParallelFor(h_in.Size(), ctx->Threads(), [&](auto i) { h_tmp(i) = h_in(i); });\n  }\n}\n\n/** Increment the prediction on GPU.\n *\n * \\param out_predts Prediction for the whole model.\n * \\param predts     Prediction for current tree.\n * \\param tree_w     Tree weight.\n */\nvoid GPUDartPredictInc(common::Span<float>, common::Span<float>, float, size_t, bst_group_t,\n                       bst_group_t)\n#if defined(XGBOOST_USE_CUDA)\n    ;  // NOLINT\n#else\n{\n  common::AssertGPUSupport();\n}\n#endif\n\nvoid GBTree::UpdateTreeLeaf(DMatrix const* p_fmat, HostDeviceVector<float> const& predictions,\n                            ObjFunction const* obj, std::int32_t group_idx,\n                            std::vector<HostDeviceVector<bst_node_t>> const& node_position,\n                            TreesOneGroup* p_trees) {\n  CHECK(!updaters_.empty());\n  if (!updaters_.back()->HasNodePosition()) {\n    return;\n  }\n  if (!obj || !obj->Task().UpdateTreeLeaf()) {\n    return;\n  }\n\n  auto& trees = *p_trees;\n  CHECK_EQ(model_.param.num_parallel_tree, trees.size());\n  for (std::size_t tree_idx = 0; tree_idx < trees.size(); ++tree_idx) {\n    auto const& position = node_position[tree_idx];\n    obj->UpdateTreeLeaf(position, p_fmat->Info(), tree_param_.learning_rate / trees.size(),\n                        predictions, group_idx, trees[tree_idx].get());\n  }\n}\n\nvoid GBTree::DoBoost(DMatrix* p_fmat, GradientContainer* in_gpair, PredictionCacheEntry* predt,\n                     ObjFunction const* obj) {\n  if (model_.learner_model_param->IsVectorLeaf()) {\n    CHECK(tparam_.tree_method == TreeMethod::kHist || tparam_.tree_method == TreeMethod::kAuto)\n        << \"Only the hist tree method is supported for building multi-target trees with vector \"\n           \"leaf.\";\n  }\n\n  TreesOneIter new_trees;\n  bst_target_t const n_groups = model_.learner_model_param->OutputLength();\n  monitor_.Start(\"BoostNewTrees\");\n\n  // Define the categories.\n  if (this->model_.Cats()->Empty() && !p_fmat->Cats()->Empty()) {\n    auto in_cats = p_fmat->Cats();\n    this->model_.Cats()->Copy(this->ctx_, *in_cats);\n    this->model_.Cats()->Sort(this->ctx_);\n  } else {\n    CHECK_EQ(this->model_.Cats()->NumCatsTotal(), p_fmat->Cats()->NumCatsTotal())\n        << \"A new dataset with different categorical features is used for training an existing \"\n           \"model.\";\n  }\n\n  predt->predictions.SetDevice(ctx_->Device());\n  auto out = linalg::MakeTensorView(ctx_, &predt->predictions, p_fmat->Info().num_row_,\n                                    model_.learner_model_param->OutputLength());\n  CHECK_NE(n_groups, 0);\n\n  // The node position for each row, 1 HDV for each tree in the forest.  Note that the\n  // position is negated if the row is sampled out.\n  std::vector<HostDeviceVector<bst_node_t>> node_position;\n\n  if (model_.learner_model_param->IsVectorLeaf()) {\n    // Multi-target, vector leaf\n    TreesOneGroup ret;\n    BoostNewTrees(in_gpair, p_fmat, 0, &node_position, &ret);\n    UpdateTreeLeaf(p_fmat, predt->predictions, obj, 0, node_position, &ret);\n    std::size_t num_new_trees = ret.size();\n    new_trees.push_back(std::move(ret));\n    if (updaters_.size() > 0 && num_new_trees == 1 && predt->predictions.Size() > 0 &&\n        updaters_.back()->UpdatePredictionCache(p_fmat, common::Span{node_position}, out)) {\n      predt->Update(1);\n    }\n  } else if (model_.learner_model_param->OutputLength() == 1u) {\n    // Single target\n    TreesOneGroup ret;\n    BoostNewTrees(in_gpair, p_fmat, 0, &node_position, &ret);\n    UpdateTreeLeaf(p_fmat, predt->predictions, obj, 0, node_position, &ret);\n    const size_t num_new_trees = ret.size();\n    new_trees.push_back(std::move(ret));\n    if (updaters_.size() > 0 && num_new_trees == 1 && predt->predictions.Size() > 0 &&\n        updaters_.back()->UpdatePredictionCache(p_fmat, common::Span{node_position}, out)) {\n      predt->Update(1);\n    }\n  } else {\n    // Multi-target, scalar leaf\n    CHECK_EQ(in_gpair->gpair.Size() % n_groups, 0U)\n        << \"Must have exactly n_groups * n_samples gpairs.\";\n    GradientContainer tmp;\n    tmp.gpair = linalg::Matrix<GradientPair>{\n        {in_gpair->gpair.Shape(0), static_cast<std::size_t>(1ul)}, ctx_->Device()};\n    bool update_predict = true;\n    for (bst_target_t gid = 0; gid < n_groups; ++gid) {\n      node_position.clear();\n      CopyGradient(ctx_, &in_gpair->gpair, gid, &tmp.gpair);\n      TreesOneGroup ret;\n      BoostNewTrees(&tmp, p_fmat, gid, &node_position, &ret);\n      UpdateTreeLeaf(p_fmat, predt->predictions, obj, gid, node_position, &ret);\n      const size_t num_new_trees = ret.size();\n      new_trees.push_back(std::move(ret));\n      auto v_predt = out.Slice(linalg::All(), linalg::Range(gid, gid + 1));\n      // random forest doesn't support the prediction cache yet.\n      if (!(updaters_.size() > 0 && predt->predictions.Size() > 0 && num_new_trees == 1 &&\n            updaters_.back()->UpdatePredictionCache(p_fmat, common::Span{node_position},\n                                                    v_predt))) {\n        update_predict = false;\n      }\n    }\n    if (update_predict) {\n      predt->Update(1);\n    }\n  }\n\n  monitor_.Stop(\"BoostNewTrees\");\n  this->CommitModel(std::move(new_trees));\n}\n\nstd::vector<RegTree*> GBTree::InitNewTrees(bst_target_t bst_group, TreesOneGroup* ret) {\n  std::vector<RegTree*> new_trees;\n  ret->clear();\n  // create the trees\n  for (int i = 0; i < model_.param.num_parallel_tree; ++i) {\n    if (tparam_.process_type == TreeProcessType::kDefault) {\n      CHECK(!updaters_.empty());\n      CHECK(!updaters_.front()->CanModifyTree())\n          << \"Updater: `\" << updaters_.front()->Name() << \"` \"\n          << \"can not be used to create new trees. \"\n          << \"Set `process_type` to `update` if you want to update existing \"\n             \"trees.\";\n      // create new tree\n      std::unique_ptr<RegTree> ptr(new RegTree{this->model_.learner_model_param->LeafLength(),\n                                               this->model_.learner_model_param->num_feature});\n      new_trees.push_back(ptr.get());\n      ret->push_back(std::move(ptr));\n    } else if (tparam_.process_type == TreeProcessType::kUpdate) {\n      for (auto const& up : updaters_) {\n        CHECK(up->CanModifyTree())\n            << \"Updater: `\" << up->Name() << \"` \"\n            << \"can not be used to modify existing trees. \"\n            << \"Set `process_type` to `default` if you want to build new trees.\";\n      }\n      CHECK_LT(model_.trees.size(), model_.trees_to_update.size())\n          << \"No more tree left for updating.  For updating existing trees, \"\n          << \"boosting rounds can not exceed previous training rounds\";\n      // move an existing tree from trees_to_update\n      auto t = std::move(model_.trees_to_update[model_.trees.size() +\n                                                bst_group * model_.param.num_parallel_tree + i]);\n      new_trees.push_back(t.get());\n      ret->push_back(std::move(t));\n    }\n  }\n  return new_trees;\n}\n\nvoid GBTree::BoostNewTrees(GradientContainer* gpair, DMatrix* p_fmat, int bst_group,\n                           std::vector<HostDeviceVector<bst_node_t>>* out_position,\n                           TreesOneGroup* ret) {\n  std::vector<RegTree*> new_trees = this->InitNewTrees(bst_group, ret);\n\n  // update the trees\n  auto n_out = model_.learner_model_param->OutputLength() * p_fmat->Info().num_row_;\n  StringView msg{\n      \"Mismatching size between number of rows from input data and size of gradient vector.\"};\n  if (!model_.learner_model_param->IsVectorLeaf() && p_fmat->Info().num_row_ != 0) {\n    CHECK_EQ(n_out % gpair->gpair.Size(), 0) << msg;\n  } else if (model_.learner_model_param->IsVectorLeaf()) {\n    // vector leaf\n    if (!gpair->HasValueGrad()) {\n      CHECK_EQ(gpair->gpair.Size(), n_out) << msg;\n    }\n  }\n\n  out_position->resize(new_trees.size());\n\n  // Rescale learning rate according to the number of trees\n  auto lr = tree_param_.learning_rate;\n  tree_param_.learning_rate /= static_cast<float>(new_trees.size());\n  for (auto& up : updaters_) {\n    up->Update(&tree_param_, gpair, p_fmat,\n               common::Span<HostDeviceVector<bst_node_t>>{*out_position}, new_trees);\n  }\n  tree_param_.learning_rate = lr;\n}\n\nvoid GBTree::CommitModel(TreesOneIter&& new_trees) {\n  monitor_.Start(\"CommitModel\");\n  auto n_old_trees = model_.trees.size();\n  auto has_tree_weights = !weight_drop_.empty();\n  auto dropout_configured =\n      dparam_.rate_drop != 0.0f || dparam_.one_drop || dparam_.skip_drop != 0.0f;\n  auto track_tree_weights = has_tree_weights || dropout_configured;\n  if (track_tree_weights && weight_drop_.size() < n_old_trees) {\n    weight_drop_.insert(weight_drop_.cend(), n_old_trees - weight_drop_.size(), 1.0f);\n  }\n  auto n_new_trees = model_.CommitModel(std::forward<TreesOneIter>(new_trees));\n  if (track_tree_weights) {\n    auto num_drop = this->NormalizeTrees(n_new_trees);\n    LOG(INFO) << \"drop \" << num_drop << \" trees, \"\n              << \"weight = \" << weight_drop_.back();\n  }\n  monitor_.Stop(\"CommitModel\");\n}\n\nvoid GBTree::LoadConfig(Json const& in) {\n  auto name = get<String const>(in[\"name\"]);\n  CHECK(name == \"gbtree\" || name == \"dart\")\n      << \"Unknown booster name in model JSON: `\" << name\n      << \"`. Only `gbtree` or legacy `dart` boosters are accepted here.\";\n  auto const& config = name == \"dart\" ? in[\"gbtree\"] : in;\n  FromJson(config[\"gbtree_train_param\"], &tparam_);\n  FromJson(config[\"tree_train_param\"], &tree_param_);\n  auto const& obj = get<Object const>(config);\n  auto it = obj.find(\"dart_train_param\");\n  if (it != obj.cend()) {\n    FromJson(it->second, &dparam_);\n  } else if (name == \"dart\") {\n    FromJson(in[\"dart_train_param\"], &dparam_);\n  } else {\n    dparam_ = {};\n  }\n\n  // Process type cannot be kUpdate from loaded model\n  // This would cause all trees to be pushed to trees_to_update\n  // e.g. updating a model, then saving and loading it would result in an empty model\n  tparam_.process_type = TreeProcessType::kDefault;\n  std::int32_t const n_gpus = curt::AllVisibleGPUs();\n\n  std::vector<Json> updater_seq;\n  if (IsA<Object>(config[\"updater\"])) {\n    // before 2.0\n    error::WarnOldSerialization();\n    for (auto const& kv : get<Object const>(config[\"updater\"])) {\n      auto name = kv.first;\n      auto config = kv.second;\n      config[\"name\"] = name;\n      updater_seq.push_back(config);\n    }\n  } else {\n    // after 2.0\n    auto const& j_updaters = get<Array const>(config[\"updater\"]);\n    updater_seq = j_updaters;\n  }\n\n  updaters_.clear();\n\n  for (auto const& config : updater_seq) {\n    auto name = get<String>(config[\"name\"]);\n    if (n_gpus == 0 && name == \"grow_gpu_hist\") {\n      name = \"grow_quantile_histmaker\";\n      LOG(WARNING) << \"Changing updater from `grow_gpu_hist` to `grow_quantile_histmaker`.\";\n    }\n    updaters_.emplace_back(TreeUpdater::Create(name, ctx_, &model_.learner_model_param->task));\n    updaters_.back()->LoadConfig(config);\n  }\n\n  specified_updater_ = get<Boolean>(config[\"specified_updater\"]);\n}\n\nvoid GBTree::SaveConfig(Json* p_out) const {\n  auto& out = *p_out;\n  out[\"name\"] = String(\"gbtree\");\n  out[\"gbtree_train_param\"] = ToJson(tparam_);\n  out[\"tree_train_param\"] = ToJson(tree_param_);\n  out[\"dart_train_param\"] = ToJson(dparam_);\n\n  // Process type cannot be kUpdate from loaded model\n  // This would cause all trees to be pushed to trees_to_update\n  // e.g. updating a model, then saving and loading it would result in an empty\n  // model\n  out[\"gbtree_train_param\"][\"process_type\"] = String(\"default\");\n  // Duplicated from SaveModel so that user can get `num_parallel_tree` without parsing\n  // the model. We might remove this once we can deprecate `best_ntree_limit` so that the\n  // language binding doesn't need to know about the forest size.\n  out[\"gbtree_model_param\"] = ToJson(model_.param);\n\n  out[\"updater\"] = Array{};\n  auto& j_updaters = get<Array>(out[\"updater\"]);\n\n  for (auto const& up : this->updaters_) {\n    Json up_config{Object{}};\n    up_config[\"name\"] = String{up->Name()};\n    up->SaveConfig(&up_config);\n    j_updaters.emplace_back(up_config);\n  }\n  out[\"specified_updater\"] = Boolean{specified_updater_};\n}\n\nvoid GBTree::LoadModel(Json const& in) {\n  auto name = get<String const>(in[\"name\"]);\n  CHECK(name == \"gbtree\" || name == \"dart\");\n  auto const& model = name == \"dart\" ? in[\"gbtree\"] : in;\n  model_.LoadModel(model[\"model\"]);\n  auto const& obj = get<Object const>(name == \"dart\" ? in : model);\n  auto it = obj.find(\"weight_drop\");\n  if (it != obj.cend()) {\n    auto const& j_weight_drop = get<Array const>(it->second);\n    weight_drop_.resize(j_weight_drop.size());\n    for (size_t i = 0; i < weight_drop_.size(); ++i) {\n      weight_drop_[i] = get<Number const>(j_weight_drop[i]);\n    }\n  } else {\n    weight_drop_.clear();\n  }\n}\n\nvoid GBTree::SaveModel(Json* p_out) const {\n  auto& out = *p_out;\n  out[\"name\"] = String(\"gbtree\");\n  out[\"model\"] = Object();\n  auto& model = out[\"model\"];\n  model_.SaveModel(&model);\n  if (!weight_drop_.empty()) {\n    std::vector<Json> j_weight_drop(weight_drop_.size());\n    for (size_t i = 0; i < weight_drop_.size(); ++i) {\n      j_weight_drop[i] = Number(weight_drop_[i]);\n    }\n    out[\"weight_drop\"] = Array(std::move(j_weight_drop));\n  }\n}\n\nstd::vector<float> GBTree::DropTrees(bool is_training) {\n  if (!is_training) {\n    return {};\n  }\n  auto dropout_configured =\n      dparam_.rate_drop != 0.0f || dparam_.one_drop || dparam_.skip_drop != 0.0f;\n  if (weight_drop_.empty()) {\n    if (!dropout_configured || model_.trees.empty()) {\n      return {};\n    }\n    weight_drop_.resize(model_.trees.size(), 1.0f);\n  }\n  idx_drop_.clear();\n\n  std::uniform_real_distribution<> runif(0.0, 1.0);\n  auto& rnd = ctx_->Rng();\n  bool skip = false;\n  if (dparam_.skip_drop > 0.0) {\n    skip = (runif(rnd) < dparam_.skip_drop);\n  }\n  if (skip) {\n    return {};\n  }\n\n  if (dparam_.sample_type == DartSampleType::kWeighted) {\n    bst_float sum_weight = 0.0;\n    for (auto elem : weight_drop_) {\n      sum_weight += elem;\n    }\n    for (size_t i = 0; i < weight_drop_.size(); ++i) {\n      if (runif(rnd) < dparam_.rate_drop * weight_drop_.size() * weight_drop_[i] / sum_weight) {\n        idx_drop_.push_back(i);\n      }\n    }\n    if (dparam_.one_drop && idx_drop_.empty() && !weight_drop_.empty()) {\n      size_t i = std::discrete_distribution<size_t>(\n          weight_drop_.size(), 0., static_cast<double>(weight_drop_.size()),\n          [this](double x) -> double { return weight_drop_[static_cast<size_t>(x)]; })(rnd);\n      idx_drop_.push_back(i);\n    }\n  } else {\n    for (size_t i = 0; i < weight_drop_.size(); ++i) {\n      if (runif(rnd) < dparam_.rate_drop) {\n        idx_drop_.push_back(i);\n      }\n    }\n    if (dparam_.one_drop && idx_drop_.empty() && !weight_drop_.empty()) {\n      size_t i = std::uniform_int_distribution<size_t>(0, weight_drop_.size() - 1)(rnd);\n      idx_drop_.push_back(i);\n    }\n  }\n\n  if (idx_drop_.empty()) {\n    return {};\n  }\n\n  auto dropped_weights = weight_drop_;\n  for (auto idx : idx_drop_) {\n    dropped_weights.at(idx) = 0.0f;\n  }\n  return dropped_weights;\n}\n\nstd::size_t GBTree::NormalizeTrees(size_t size_new_trees) {\n  CHECK(tree_param_.GetInitialised());\n  float lr = 1.0 * tree_param_.learning_rate / size_new_trees;\n  size_t num_drop = idx_drop_.size();\n  if (num_drop == 0) {\n    for (size_t i = 0; i < size_new_trees; ++i) {\n      weight_drop_.push_back(1.0);\n    }\n  } else if (dparam_.normalize_type == 1) {\n    float factor = 1.0 / (1.0 + lr);\n    for (auto i : idx_drop_) {\n      weight_drop_[i] *= factor;\n    }\n    for (size_t i = 0; i < size_new_trees; ++i) {\n      weight_drop_.push_back(factor);\n    }\n  } else {\n    float factor = 1.0 * num_drop / (num_drop + lr);\n    for (auto i : idx_drop_) {\n      weight_drop_[i] *= factor;\n    }\n    for (size_t i = 0; i < size_new_trees; ++i) {\n      weight_drop_.push_back(1.0 / (num_drop + lr));\n    }\n  }\n  idx_drop_.clear();\n  return num_drop;\n}\n\nvoid GBTree::Slice(bst_layer_t begin, bst_layer_t end, bst_layer_t step, GradientBooster* out,\n                   bool* out_of_bound) const {\n  CHECK(out);\n\n  auto p_gbtree = dynamic_cast<GBTree*>(out);\n  CHECK(p_gbtree);\n  GBTreeModel& out_model = p_gbtree->model_;\n  CHECK(this->model_.learner_model_param->Initialized());\n\n  end = end == 0 ? model_.BoostedRounds() : end;\n  CHECK_GE(step, 1);\n  CHECK_NE(end, begin) << \"Empty slice is not allowed.\";\n\n  if (step > (end - begin)) {\n    *out_of_bound = true;\n    return;\n  }\n\n  auto& out_indptr = out_model.iteration_indptr;\n  TreesOneGroup& out_trees = out_model.trees;\n  auto& out_tree_info = out_model.tree_info.HostVector();\n\n  auto const& in_tree_info = this->model_.tree_info.ConstHostVector();\n\n  bst_layer_t n_layers = (end - begin) / step;\n  out_indptr.resize(n_layers + 1, 0);\n\n  if (!this->model_.trees_to_update.empty()) {\n    CHECK_EQ(this->model_.trees_to_update.size(), this->model_.trees.size())\n        << \"Not all trees are updated, \"\n        << this->model_.trees_to_update.size() - this->model_.trees.size()\n        << \" trees remain.  Slice the model before making update if you only \"\n           \"want to update a portion of trees.\";\n  }\n\n  *out_of_bound =\n      detail::SliceTrees(begin, end, step, this->model_, [&](auto in_tree_idx, auto out_l) {\n        std::unique_ptr<RegTree> new_tree{this->model_.trees.at(in_tree_idx)->Copy()};\n        out_trees.emplace_back(std::move(new_tree));\n\n        bst_group_t group = in_tree_info[in_tree_idx];\n        out_tree_info.push_back(group);\n\n        out_model.iteration_indptr[out_l + 1]++;\n      });\n\n  std::partial_sum(out_indptr.cbegin(), out_indptr.cend(), out_indptr.begin());\n  CHECK_EQ(out_model.iteration_indptr.front(), 0);\n\n  out_model.param.num_trees = out_model.trees.size();\n  out_model.param.num_parallel_tree = model_.param.num_parallel_tree;\n\n  p_gbtree->dparam_ = this->dparam_;\n  p_gbtree->idx_drop_.clear();\n  p_gbtree->weight_drop_.clear();\n  if (!this->weight_drop_.empty()) {\n    detail::SliceTrees(begin, end, step, model_, [&](auto in_tree_idx, auto const&) {\n      p_gbtree->weight_drop_.push_back(this->weight_drop_.at(in_tree_idx));\n    });\n  }\n}\n\nvoid GBTree::PredictBatchImpl(DMatrix* p_fmat, PredictionCacheEntry* out_preds, bool is_training,\n                              bst_layer_t layer_begin, bst_layer_t layer_end,\n                              std::vector<float> const* tree_weights) const {\n  // Unweighted prediction can reuse a cached prefix of the model output by tracking how many\n  // boosting iterations have already been accumulated in `out_preds->version`.\n  //\n  // Weighted prediction is used by DART and does not participate in this cache, since tree\n  // weights can change the accumulated output independently of the cached unweighted prefix.\n  if (layer_end == 0) {\n    layer_end = this->BoostedRounds();\n  }\n\n  auto cache_version = out_preds->version;\n  // We can preserve the cache only when:\n  // - prediction is unweighted\n  // - prediction starts from iteration 0, so the result is a cacheable prefix\n  auto preserve_cache = tree_weights == nullptr && layer_begin == 0;\n  // We can reuse the existing cached prefix only when:\n  // - the result itself is cacheable\n  // - the requested range does not move backwards past the cached version\n  auto reuse_cache = preserve_cache && layer_end >= static_cast<bst_layer_t>(cache_version);\n  // Initialize output when:\n  // - the cached prefix cannot be reused, or\n  // - the cache is valid but still empty\n  auto initialize_output = !reuse_cache || cache_version == 0;\n  auto prediction_begin = reuse_cache ? cache_version : layer_begin;\n\n  if (!reuse_cache) {\n    out_preds->version = 0;\n    cache_version = 0;\n  }\n\n  if (out_preds->predictions.Size() == 0 && p_fmat->Info().num_row_ != 0) {\n    CHECK_EQ(out_preds->version, 0);\n  }\n\n  auto const& predictor = GetPredictor(is_training, &out_preds->predictions, p_fmat);\n  if (initialize_output) {\n    // out_preds->Size() can be non-zero as it's initialized here before any\n    // tree is built at the 0^th iterator.\n    predictor->InitOutPredictions(p_fmat->Info(), &out_preds->predictions, model_);\n  }\n\n  auto [tree_begin, tree_end] = detail::LayerToTree(model_, prediction_begin, layer_end);\n  CHECK_LE(tree_end, model_.trees.size()) << \"Invalid number of trees.\";\n  if (tree_end > tree_begin) {\n    predictor->PredictBatch(p_fmat, out_preds, model_, tree_begin, tree_end, tree_weights);\n  }\n\n  if (!preserve_cache) {\n    out_preds->version = 0;\n  } else {\n    out_preds->Update(layer_end - cache_version);\n  }\n}\n\nvoid GBTree::PredictBatch(DMatrix* p_fmat, PredictionCacheEntry* out_preds, bool is_training,\n                          bst_layer_t layer_begin, bst_layer_t layer_end) {\n  auto const* tree_weights = this->TreeWeights();\n  auto dropped_weights = this->DropTrees(is_training);\n  if (!dropped_weights.empty()) {\n    tree_weights = &dropped_weights;\n  }\n  this->PredictBatchImpl(p_fmat, out_preds, is_training, layer_begin, layer_end, tree_weights);\n}\n\nvoid GBTree::InplacePredict(std::shared_ptr<DMatrix> p_m, float missing,\n                            PredictionCacheEntry* out_preds, bst_layer_t layer_begin,\n                            bst_layer_t layer_end) const {\n  auto const* tree_weights = this->TreeWeights();\n  if (tree_weights != nullptr) {\n    CHECK(!this->model_.learner_model_param->IsVectorLeaf()) << \"dart\" << MTNotImplemented();\n  }\n  auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end);\n  CHECK_LE(tree_end, model_.trees.size()) << \"Invalid number of trees.\";\n  if (p_m->Ctx()->Device() != this->ctx_->Device()) {\n    error::MismatchedDevices(this->ctx_, p_m->Ctx());\n    CHECK_EQ(out_preds->version, 0);\n    auto proxy = std::dynamic_pointer_cast<data::DMatrixProxy>(p_m);\n    CHECK(proxy) << error::InplacePredictProxy();\n    auto p_fmat = data::CreateDMatrixFromProxy(ctx_, proxy, missing);\n    this->PredictBatchImpl(p_fmat.get(), out_preds, false, layer_begin, layer_end, tree_weights);\n    return;\n  }\n\n  bool known_type = this->ctx_->DispatchDevice(\n      [&, begin = tree_begin, end = tree_end] {\n        return this->cpu_predictor_->InplacePredict(p_m, model_, missing, out_preds, begin, end,\n                                                    tree_weights);\n      },\n      [&, begin = tree_begin, end = tree_end] {\n        return this->gpu_predictor_->InplacePredict(p_m, model_, missing, out_preds, begin, end,\n                                                    tree_weights);\n#if defined(XGBOOST_USE_SYCL)\n      },\n      [&, begin = tree_begin, end = tree_end] {\n        return this->sycl_predictor_->InplacePredict(p_m, model_, missing, out_preds, begin, end,\n                                                     tree_weights);\n#endif  // defined(XGBOOST_USE_SYCL)\n      });\n  if (!known_type) {\n    auto proxy = std::dynamic_pointer_cast<data::DMatrixProxy>(p_m);\n    CHECK(proxy) << error::InplacePredictProxy();\n    LOG(FATAL) << \"Unknown data type for inplace prediction:\" << proxy->Adapter().type().name();\n  }\n}\n\n[[nodiscard]] std::unique_ptr<Predictor> const& GBTree::GetPredictor(\n    bool is_training, HostDeviceVector<float> const* out_pred, DMatrix* f_dmat) const {\n  // Data comes from SparsePageDMatrix. Since we are loading data in pages, no need to\n  // prevent data copy.\n  if (f_dmat && !f_dmat->SingleColBlock()) {\n    if (ctx_->IsCPU()) {\n      return cpu_predictor_;\n    } else if (ctx_->IsCUDA()) {\n      common::AssertGPUSupport();\n      CHECK(gpu_predictor_);\n      return gpu_predictor_;\n    } else {\n#if defined(XGBOOST_USE_SYCL)\n      common::AssertSYCLSupport();\n      CHECK(sycl_predictor_);\n      return sycl_predictor_;\n#endif  // defined(XGBOOST_USE_SYCL)\n    }\n  }\n\n  // Data comes from Device DMatrix.\n  auto is_ellpack =\n      f_dmat && f_dmat->PageExists<EllpackPage>() && !f_dmat->PageExists<SparsePage>();\n  // Data comes from device memory, like CuDF or CuPy.\n  auto is_from_device = f_dmat && f_dmat->PageExists<SparsePage>() &&\n                        (*(f_dmat->GetBatches<SparsePage>().begin())).data.DeviceCanRead();\n  auto on_device = is_ellpack || is_from_device;\n\n  // Use GPU Predictor if data is already on device and gpu_id is set.\n  if (on_device && ctx_->IsCUDA()) {\n    common::AssertGPUSupport();\n    CHECK(gpu_predictor_);\n    return gpu_predictor_;\n  }\n\n  // GPU_Hist by default has prediction cache calculated from quantile values,\n  // so GPU Predictor is not used for training dataset.  But when XGBoost\n  // performs continue training with an existing model, the prediction cache is\n  // not available and number of trees doesn't equal zero, the whole training\n  // dataset got copied into GPU for precise prediction.  This condition tries\n  // to avoid such copy by calling CPU Predictor instead.\n  if ((out_pred && out_pred->Size() == 0) && (model_.param.num_trees != 0) &&\n      // FIXME(trivialfis): Implement a better method for testing whether data\n      // is on device after DMatrix refactoring is done.\n      !on_device && is_training) {\n    CHECK(cpu_predictor_);\n    return cpu_predictor_;\n  }\n\n  if (ctx_->IsCPU()) {\n    return cpu_predictor_;\n  } else if (ctx_->IsCUDA()) {\n    common::AssertGPUSupport();\n    CHECK(gpu_predictor_);\n    return gpu_predictor_;\n  } else {\n#if defined(XGBOOST_USE_SYCL)\n    common::AssertSYCLSupport();\n    CHECK(sycl_predictor_);\n    return sycl_predictor_;\n#endif  // defined(XGBOOST_USE_SYCL)\n  }\n\n  return cpu_predictor_;\n}\n\n// register the objective functions\nDMLC_REGISTER_PARAMETER(GBTreeModelParam);\nDMLC_REGISTER_PARAMETER(GBTreeTrainParam);\nDMLC_REGISTER_PARAMETER(DartTrainParam);\n\nXGBOOST_REGISTER_GBM(GBTree, \"gbtree\")\n    .describe(\"Tree booster, gradient boosted trees.\")\n    .set_body([](LearnerModelParam const* booster_config, Context const* ctx) {\n      auto* p = new GBTree{booster_config, ctx};\n      return p;\n    });\n}  // namespace xgboost::gbm\n"
  },
  {
    "path": "src/gbm/gbtree.cu",
    "content": "/**\n * Copyright 2021-2025, XGBoost Contributors\n */\n#include <thrust/iterator/counting_iterator.h>  // for make_counting_iterator\n\n#include \"../common/cuda_context.cuh\"\n#include \"../common/device_helpers.cuh\"  // for MakeTransformIterator\n#include \"xgboost/base.h\"                // for GradientPair\n#include \"xgboost/linalg.h\"              // for Matrix\n\nnamespace xgboost::gbm {\nvoid GPUCopyGradient(Context const *ctx, linalg::Matrix<GradientPair> const *in_gpair,\n                     bst_group_t group_id, linalg::Matrix<GradientPair> *out_gpair) {\n  auto v_in = in_gpair->View(ctx->Device()).Slice(linalg::All(), group_id);\n  out_gpair->SetDevice(ctx->Device());\n  out_gpair->Reshape(v_in.Size(), 1);\n  auto d_out = out_gpair->View(ctx->Device());\n  auto cuctx = ctx->CUDACtx();\n  auto it = dh::MakeTransformIterator<GradientPair>(\n      thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) { return v_in(i); });\n  thrust::copy(cuctx->CTP(), it, it + v_in.Size(), d_out.Values().data());\n}\n\nvoid GPUDartPredictInc(common::Span<float> out_predts,\n                       common::Span<float> predts, float tree_w, size_t n_rows,\n                       bst_group_t n_groups, bst_group_t group) {\n  dh::LaunchN(n_rows, [=] XGBOOST_DEVICE(size_t ridx) {\n    const size_t offset = ridx * n_groups + group;\n    out_predts[offset] += (predts[offset] * tree_w);\n  });\n}\n\nvoid GPUDartInplacePredictInc(common::Span<float> out_predts, common::Span<float> predts,\n                              float tree_w, size_t n_rows,\n                              linalg::TensorView<float const, 1> base_score, bst_group_t n_groups,\n                              bst_group_t group) {\n  CHECK_EQ(base_score.Size(), n_groups);\n  dh::LaunchN(n_rows, [=] XGBOOST_DEVICE(size_t ridx) {\n    const size_t offset = ridx * n_groups + group;\n    out_predts[offset] += (predts[offset] - base_score(group)) * tree_w;\n  });\n}\n}  // namespace xgboost::gbm\n"
  },
  {
    "path": "src/gbm/gbtree.h",
    "content": "/**\n * Copyright 2014-2026, XGBoost Contributors\n * \\file gbtree.cc\n * \\brief gradient boosted tree implementation.\n * \\author Tianqi Chen\n */\n#ifndef XGBOOST_GBM_GBTREE_H_\n#define XGBOOST_GBM_GBTREE_H_\n\n#include <dmlc/omp.h>\n\n#include <algorithm>\n#include <cstdint>  // std::int32_t\n#include <memory>\n#include <numeric>  // for iota\n#include <string>\n#include <utility>\n#include <vector>\n\n#include \"../common/timer.h\"\n#include \"../tree/param.h\"      // TrainParam\n#include \"../tree/tree_view.h\"  // for WalkTree\n#include \"gbtree_model.h\"\n#include \"xgboost/base.h\"\n#include \"xgboost/data.h\"\n#include \"xgboost/gbm.h\"\n#include \"xgboost/host_device_vector.h\"\n#include \"xgboost/json.h\"\n#include \"xgboost/logging.h\"\n#include \"xgboost/parameter.h\"\n#include \"xgboost/predictor.h\"\n#include \"xgboost/tree_updater.h\"\n\nnamespace xgboost {\nenum class TreeMethod : int {\n  kAuto = 0,\n  kApprox = 1,\n  kExact = 2,\n  kHist = 3,\n};\n\n// boosting process types\nenum class TreeProcessType : int { kDefault = 0, kUpdate = 1 };\n\n// Sampling type for dart weights.\nenum class DartSampleType : std::int32_t {\n  kUniform = 0,\n  kWeighted = 1,\n};\n}  // namespace xgboost\n\nDECLARE_FIELD_ENUM_CLASS(xgboost::TreeMethod);\nDECLARE_FIELD_ENUM_CLASS(xgboost::TreeProcessType);\nDECLARE_FIELD_ENUM_CLASS(xgboost::DartSampleType);\n\nnamespace xgboost::gbm {\n/*! \\brief training parameters */\nstruct GBTreeTrainParam : public XGBoostParameter<GBTreeTrainParam> {\n  /*! \\brief tree updater sequence */\n  std::string updater_seq;\n  /*! \\brief type of boosting process to run */\n  TreeProcessType process_type;\n  // tree construction method\n  TreeMethod tree_method;\n  // declare parameters\n  DMLC_DECLARE_PARAMETER(GBTreeTrainParam) {\n    DMLC_DECLARE_FIELD(updater_seq).describe(\"Tree updater sequence.\").set_default(\"\");\n    DMLC_DECLARE_FIELD(process_type)\n        .set_default(TreeProcessType::kDefault)\n        .add_enum(\"default\", TreeProcessType::kDefault)\n        .add_enum(\"update\", TreeProcessType::kUpdate)\n        .describe(\n            \"Whether to run the normal boosting process that creates new trees,\"\n            \" or to update the trees in an existing model.\");\n    DMLC_DECLARE_ALIAS(updater_seq, updater);\n    DMLC_DECLARE_FIELD(tree_method)\n        .set_default(TreeMethod::kAuto)\n        .add_enum(\"auto\", TreeMethod::kAuto)\n        .add_enum(\"approx\", TreeMethod::kApprox)\n        .add_enum(\"exact\", TreeMethod::kExact)\n        .add_enum(\"hist\", TreeMethod::kHist)\n        .describe(\"Choice of tree construction method.\");\n  }\n};\n\n/** @brief Dart training parameters */\nstruct DartTrainParam : public XGBoostParameter<DartTrainParam> {\n  DartSampleType sample_type;\n  /*! \\brief type of normalization algorithm */\n  int normalize_type;\n  /*! \\brief fraction of trees to drop during the dropout */\n  float rate_drop;\n  /*! \\brief whether at least one tree should always be dropped during the dropout */\n  bool one_drop;\n  /*! \\brief probability of skipping the dropout during an iteration */\n  float skip_drop;\n\n  DMLC_DECLARE_PARAMETER(DartTrainParam) {\n    DMLC_DECLARE_FIELD(sample_type)\n        .set_default(DartSampleType::kUniform)\n        .add_enum(\"uniform\", DartSampleType::kUniform)\n        .add_enum(\"weighted\", DartSampleType::kWeighted)\n        .describe(\"Different types of sampling algorithm.\");\n    DMLC_DECLARE_FIELD(normalize_type)\n        .set_default(0)\n        .add_enum(\"tree\", 0)\n        .add_enum(\"forest\", 1)\n        .describe(\"Different types of normalization algorithm.\");\n    DMLC_DECLARE_FIELD(rate_drop)\n        .set_range(0.0f, 1.0f)\n        .set_default(0.0f)\n        .describe(\"Fraction of trees to drop during the dropout.\");\n    DMLC_DECLARE_FIELD(one_drop).set_default(false).describe(\n        \"Whether at least one tree should always be dropped during the dropout.\");\n    DMLC_DECLARE_FIELD(skip_drop)\n        .set_range(0.0f, 1.0f)\n        .set_default(0.0f)\n        .describe(\"Probability of skipping the dropout during a boosting iteration.\");\n  }\n};\n\nnamespace detail {\n// From here on, layer becomes concrete trees.\ninline std::pair<bst_tree_t, bst_tree_t> LayerToTree(gbm::GBTreeModel const& model,\n                                                     bst_layer_t begin, bst_layer_t end) {\n  CHECK(!model.iteration_indptr.empty());\n  end = end == 0 ? model.BoostedRounds() : end;\n  CHECK_LE(end, model.BoostedRounds()) << \"Out of range for tree layers.\";\n  bst_tree_t tree_begin = model.iteration_indptr[begin];\n  bst_tree_t tree_end = model.iteration_indptr[end];\n  if (model.trees.size() != 0) {\n    CHECK_LE(tree_begin, tree_end);\n  }\n  return {tree_begin, tree_end};\n}\n\n// Call fn for each pair of input output tree.  Return true if index is out of bound.\ntemplate <typename Func>\nbool SliceTrees(bst_layer_t begin, bst_layer_t end, bst_layer_t step, GBTreeModel const& model,\n                Func&& fn) {\n  end = end == 0 ? model.iteration_indptr.size() : end;\n  CHECK_GE(step, 1);\n  if (step > end - begin) {\n    return true;\n  }\n  if (end > model.BoostedRounds()) {\n    return true;\n  }\n\n  bst_layer_t n_layers = (end - begin) / step;\n  bst_layer_t out_l = 0;\n\n  for (bst_layer_t l = begin; l < end; l += step) {\n    auto [tree_begin, tree_end] = detail::LayerToTree(model, l, l + 1);\n    if (tree_end > static_cast<bst_tree_t>(model.trees.size())) {\n      return true;\n    }\n\n    for (bst_tree_t tree_idx = tree_begin; tree_idx < tree_end; ++tree_idx) {\n      fn(tree_idx, out_l);\n    }\n    ++out_l;\n  }\n\n  CHECK_EQ(out_l, n_layers);\n  return false;\n}\n}  // namespace detail\n\n// gradient boosted trees\nclass GBTree : public GradientBooster {\n public:\n  explicit GBTree(LearnerModelParam const* booster_config, Context const* ctx)\n      : GradientBooster{ctx}, model_(booster_config, ctx_) {\n    monitor_.Init(__func__);\n  }\n\n  void Configure(Args const& cfg) override;\n  /**\n   * @brief Optionally update the leaf value.\n   */\n  void UpdateTreeLeaf(DMatrix const* p_fmat, HostDeviceVector<float> const& predictions,\n                      ObjFunction const* obj, std::int32_t group_idx,\n                      std::vector<HostDeviceVector<bst_node_t>> const& node_position,\n                      std::vector<std::unique_ptr<RegTree>>* p_trees);\n  /**\n   * @brief Carry out one iteration of boosting.\n   */\n  void DoBoost(DMatrix* p_fmat, GradientContainer* in_gpair, PredictionCacheEntry* predt,\n               ObjFunction const* obj) override;\n\n  [[nodiscard]] GBTreeTrainParam const& GetTrainParam() const { return tparam_; }\n\n  void LoadConfig(Json const& in) override;\n  void SaveConfig(Json* p_out) const override;\n\n  void SaveModel(Json* p_out) const override;\n  void LoadModel(Json const& in) override;\n\n  // slice the trees, out must be already allocated\n  void Slice(bst_layer_t begin, bst_layer_t end, bst_layer_t step, GradientBooster* out,\n             bool* out_of_bound) const override;\n\n  [[nodiscard]] std::int32_t BoostedRounds() const override { return this->model_.BoostedRounds(); }\n  [[nodiscard]] bool ModelFitted() const override {\n    return !model_.trees.empty() || !model_.trees_to_update.empty();\n  }\n\n  void PredictBatchImpl(DMatrix* p_fmat, PredictionCacheEntry* out_preds, bool is_training,\n                        bst_layer_t layer_begin, bst_layer_t layer_end,\n                        std::vector<float> const* tree_weights = nullptr) const;\n\n  void PredictBatch(DMatrix* p_fmat, PredictionCacheEntry* out_preds, bool training,\n                    bst_layer_t layer_begin, bst_layer_t layer_end) override;\n\n  void InplacePredict(std::shared_ptr<DMatrix> p_m, float missing, PredictionCacheEntry* out_preds,\n                      bst_layer_t layer_begin, bst_layer_t layer_end) const override;\n\n  void FeatureScore(std::string const& importance_type, common::Span<int32_t const> trees,\n                    std::vector<bst_feature_t>* features,\n                    std::vector<float>* scores) const override {\n    // Because feature with no importance doesn't appear in the return value so\n    // we need to set up another pair of vectors to store the values during\n    // computation.\n    std::vector<size_t> split_counts(this->model_.learner_model_param->num_feature, 0);\n    std::vector<float> gain_map(this->model_.learner_model_param->num_feature, 0);\n    std::vector<int32_t> tree_idx;\n    if (trees.empty()) {\n      tree_idx.resize(this->model_.trees.size());\n      std::iota(tree_idx.begin(), tree_idx.end(), 0);\n      trees = common::Span<int32_t const>(tree_idx);\n    }\n\n    auto total_n_trees = model_.trees.size();\n    auto add_score = [&](auto fn) {\n      for (auto idx : trees) {\n        CHECK_LE(idx, total_n_trees) << \"Invalid tree index.\";\n        auto const& tree = *model_.trees[idx];\n        tree::WalkTree(tree, [&](auto const& tree, bst_node_t nidx) {\n          if (!tree.IsLeaf(nidx)) {\n            split_counts[tree.SplitIndex(nidx)]++;\n            fn(tree, nidx, tree.SplitIndex(nidx));\n          }\n          return true;\n        });\n      }\n    };\n\n    if (importance_type == \"weight\") {\n      add_score([&](auto const&, bst_node_t, bst_feature_t split) {\n        gain_map[split] = split_counts[split];\n      });\n    } else if (importance_type == \"gain\" || importance_type == \"total_gain\") {\n      add_score([&](auto const& tree, bst_node_t nidx, bst_feature_t split) {\n        if constexpr (tree::IsScalarTree<decltype(tree)>()) {\n          gain_map[split] += tree.Stat(nidx).loss_chg;\n        } else {\n          gain_map[split] += tree.LossChg(nidx);\n        }\n      });\n    } else if (importance_type == \"cover\" || importance_type == \"total_cover\") {\n      add_score([&](auto const& tree, bst_node_t nidx, bst_feature_t split) {\n        if constexpr (tree::IsScalarTree<decltype(tree)>()) {\n          gain_map[split] += tree.Stat(nidx).sum_hess;\n        } else {\n          gain_map[split] += tree.SumHess(nidx);\n        }\n      });\n    } else {\n      LOG(FATAL) << \"Unknown feature importance type, expected one of: \"\n                 << R\"({\"weight\", \"total_gain\", \"total_cover\", \"gain\", \"cover\"}, got: )\"\n                 << importance_type;\n    }\n    if (importance_type == \"gain\" || importance_type == \"cover\") {\n      for (size_t i = 0; i < gain_map.size(); ++i) {\n        gain_map[i] /= std::max(1.0f, static_cast<float>(split_counts[i]));\n      }\n    }\n\n    features->clear();\n    scores->clear();\n    for (size_t i = 0; i < split_counts.size(); ++i) {\n      if (split_counts[i] != 0) {\n        features->push_back(i);\n        scores->push_back(gain_map[i]);\n      }\n    }\n  }\n\n  [[nodiscard]] CatContainer const* Cats() const override { return this->model_.Cats(); }\n\n  void PredictLeaf(DMatrix* p_fmat, HostDeviceVector<bst_float>* out_preds, uint32_t layer_begin,\n                   uint32_t layer_end) override {\n    auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end);\n    CHECK_EQ(tree_begin, 0) << \"Predict leaf supports only iteration end: [0, \"\n                               \"n_iteration), use model slicing instead.\";\n    this->GetPredictor(false)->PredictLeaf(p_fmat, out_preds, model_, tree_end);\n  }\n\n  void PredictContribution(DMatrix* p_fmat, HostDeviceVector<float>* out_contribs,\n                           bst_layer_t layer_begin, bst_layer_t layer_end,\n                           bool approximate) override {\n    auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end);\n    CHECK_EQ(tree_begin, 0) << \"Predict contribution supports only iteration end: [0, \"\n                               \"n_iteration), using model slicing instead.\";\n    this->GetPredictor(false)->PredictContribution(p_fmat, out_contribs, model_, tree_end,\n                                                   this->TreeWeights(), approximate);\n  }\n\n  void PredictInteractionContributions(DMatrix* p_fmat, HostDeviceVector<float>* out_contribs,\n                                       bst_layer_t layer_begin, bst_layer_t layer_end,\n                                       bool approximate) override {\n    auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end);\n    CHECK_EQ(tree_begin, 0) << \"Predict interaction contribution supports only iteration end: [0, \"\n                               \"n_iteration), using model slicing instead.\";\n    this->GetPredictor(false)->PredictInteractionContributions(\n        p_fmat, out_contribs, model_, tree_end, this->TreeWeights(), approximate);\n  }\n\n  [[nodiscard]] std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats,\n                                                   std::string format) const override {\n    return model_.DumpModel(fmap, with_stats, this->ctx_->Threads(), format);\n  }\n\n protected:\n  [[nodiscard]] std::vector<float> const* TreeWeights() const {\n    return weight_drop_.empty() ? nullptr : &weight_drop_;\n  }\n\n  [[nodiscard]] std::vector<float> DropTrees(bool is_training);\n  std::size_t NormalizeTrees(std::size_t size_new_trees);\n\n  void BoostNewTrees(GradientContainer* gpair, DMatrix* p_fmat, int bst_group,\n                     std::vector<HostDeviceVector<bst_node_t>>* out_position,\n                     std::vector<std::unique_ptr<RegTree>>* ret);\n\n  std::vector<RegTree*> InitNewTrees(bst_target_t bst_group, TreesOneGroup* ret);\n\n  [[nodiscard]] std::unique_ptr<Predictor> const& GetPredictor(\n      bool is_training, HostDeviceVector<float> const* out_pred = nullptr,\n      DMatrix* f_dmat = nullptr) const;\n\n  // commit new trees all at once\n  virtual void CommitModel(TreesOneIter&& new_trees);\n\n  // --- data structure ---\n  GBTreeModel model_;\n  // training parameter\n  GBTreeTrainParam tparam_;\n  DartTrainParam dparam_{};\n  // Tree training parameter\n  tree::TrainParam tree_param_;\n  bool specified_updater_{false};\n  // the updaters that can be applied to each of tree\n  std::vector<std::unique_ptr<TreeUpdater>> updaters_;\n  // Predictors\n  std::unique_ptr<Predictor> cpu_predictor_;\n  std::unique_ptr<Predictor> gpu_predictor_{nullptr};\n#if defined(XGBOOST_USE_SYCL)\n  std::unique_ptr<Predictor> sycl_predictor_;\n#endif  // defined(XGBOOST_USE_SYCL)\n  /*! \\brief per-tree dropout weights */\n  std::vector<bst_float> weight_drop_;\n  // indexes of dropped trees\n  std::vector<size_t> idx_drop_;\n  common::Monitor monitor_;\n};\n\n}  // namespace xgboost::gbm\n\n#endif  // XGBOOST_GBM_GBTREE_H_\n"
  },
  {
    "path": "src/gbm/gbtree_model.cc",
    "content": "/**\n * Copyright 2019-2025, XGBoost Contributors\n */\n#include \"gbtree_model.h\"\n\n#include <algorithm>  // for transform, max_element\n#include <cstddef>    // for size_t\n#include <numeric>    // for partial_sum\n#include <utility>    // for move, pair\n\n#include \"../common/threading_utils.h\"  // for ParallelFor\n#include \"xgboost/context.h\"            // for Context\n#include \"xgboost/json.h\"               // for Json, get, Integer, Array, FromJson, ToJson, Json...\n#include \"xgboost/learner.h\"            // for LearnerModelParam\n#include \"xgboost/logging.h\"            // for LogCheck_EQ, CHECK_EQ, CHECK\n#include \"xgboost/tree_model.h\"         // for RegTree\n\nnamespace xgboost::gbm {\nnamespace {\n// For creating the tree indptr from old models.\nvoid MakeIndptr(GBTreeModel* out_model) {\n  auto const& tree_info = out_model->tree_info.ConstHostVector();\n  if (tree_info.empty()) {\n    return;\n  }\n\n  auto n_groups = *std::max_element(tree_info.cbegin(), tree_info.cend()) + 1;\n\n  auto& indptr = out_model->iteration_indptr;\n  auto layer_trees = out_model->param.num_parallel_tree * n_groups;\n  CHECK_NE(layer_trees, 0);\n  indptr.resize(out_model->param.num_trees / layer_trees + 1, 0);\n  indptr[0] = 0;\n\n  for (std::size_t i = 1; i < indptr.size(); ++i) {\n    indptr[i] = n_groups * out_model->param.num_parallel_tree;\n  }\n  std::partial_sum(indptr.cbegin(), indptr.cend(), indptr.begin());\n}\n\n// Validate the consistency of the model.\nvoid Validate(GBTreeModel const& model) {\n  CHECK_EQ(model.trees.size(), model.param.num_trees);\n  CHECK_EQ(model.tree_info.Size(), model.param.num_trees);\n  // True even if the model is empty since we should always have 0 as the first element.\n  CHECK_EQ(model.iteration_indptr.back(), model.param.num_trees);\n}\n}  // namespace\n\nvoid GBTreeModel::SaveModel(Json* p_out) const {\n  auto& out = *p_out;\n  CHECK_EQ(param.num_trees, static_cast<int>(trees.size()));\n  out[\"gbtree_model_param\"] = ToJson(param);\n  std::vector<Json> trees_json(trees.size());\n\n  common::ParallelFor(trees.size(), ctx_->Threads(), [&](auto t) {\n    auto const& tree = trees[t];\n    Json jtree{Object{}};\n    tree->SaveModel(&jtree);\n    jtree[\"id\"] = Integer{static_cast<Integer::Int>(t)};\n    trees_json[t] = std::move(jtree);\n  });\n\n  auto const& h_tree_info = tree_info.ConstHostVector();\n  std::vector<Json> tree_info_json(tree_info.Size());\n  for (size_t i = 0; i < h_tree_info.size(); ++i) {\n    tree_info_json[i] = Integer(h_tree_info[i]);\n  }\n\n  out[\"trees\"] = Array(std::move(trees_json));\n  out[\"tree_info\"] = Array(std::move(tree_info_json));\n\n  std::vector<Json> jiteration_indptr(iteration_indptr.size());\n  std::transform(iteration_indptr.cbegin(), iteration_indptr.cend(), jiteration_indptr.begin(),\n                 [](bst_tree_t i) { return Integer{i}; });\n  out[\"iteration_indptr\"] = Array{std::move(jiteration_indptr)};\n\n  this->Cats()->Save(&out[\"cats\"]);\n}\n\nvoid GBTreeModel::LoadModel(Json const& in) {\n  FromJson(in[\"gbtree_model_param\"], &param);\n\n  trees.clear();\n  trees_to_update.clear();\n\n  auto const& jmodel = get<Object const>(in);\n\n  auto const& trees_json = get<Array const>(jmodel.at(\"trees\"));\n  CHECK_EQ(trees_json.size(), param.num_trees);\n  trees.resize(param.num_trees);\n\n  auto const& tree_info_json = get<Array const>(jmodel.at(\"tree_info\"));\n  CHECK_EQ(tree_info_json.size(), param.num_trees);\n  auto& h_tree_info = this->tree_info.HostVector();\n  h_tree_info.resize(param.num_trees);\n\n  common::ParallelFor(param.num_trees, ctx_->Threads(), [&](auto t) {\n    auto tree_id = get<Integer const>(trees_json[t][\"id\"]);\n    trees.at(tree_id).reset(new RegTree{});\n    trees[tree_id]->LoadModel(trees_json[t]);\n  });\n\n  for (bst_tree_t i = 0; i < param.num_trees; ++i) {\n    h_tree_info[i] = get<Integer const>(tree_info_json[i]);\n  }\n\n  auto indptr_it = jmodel.find(\"iteration_indptr\");\n  iteration_indptr.clear();\n  if (indptr_it != jmodel.cend()) {\n    auto const& vec = get<Array const>(indptr_it->second);\n    iteration_indptr.resize(vec.size());\n    std::transform(vec.cbegin(), vec.cend(), iteration_indptr.begin(),\n                   [](Json const& v) { return get<Integer const>(v); });\n  } else {\n    MakeIndptr(this);\n  }\n\n  auto p_cats = std::make_shared<CatContainer>();\n  auto cat_it = jmodel.find(\"cats\");\n  if (cat_it != jmodel.cend()) {\n    p_cats->Load(cat_it->second);\n  }\n  this->cats_ = std::move(p_cats);\n  Validate(*this);\n}\n\nbst_tree_t GBTreeModel::CommitModel(TreesOneIter&& new_trees) {\n  CHECK(!iteration_indptr.empty());\n  CHECK_EQ(iteration_indptr.back(), param.num_trees);\n  bst_tree_t n_new_trees{0};\n\n  if (learner_model_param->IsVectorLeaf()) {\n    n_new_trees += new_trees.front().size();\n    this->CommitModelGroup(std::move(new_trees.front()), 0);\n  } else {\n    for (bst_target_t gidx{0}; gidx < learner_model_param->OutputLength(); ++gidx) {\n      n_new_trees += new_trees[gidx].size();\n      this->CommitModelGroup(std::move(new_trees[gidx]), gidx);\n    }\n  }\n\n  iteration_indptr.push_back(n_new_trees + iteration_indptr.back());\n  Validate(*this);\n  return n_new_trees;\n}\n\nvoid GBTreeModel::CommitModelGroup(TreesOneGroup&& new_trees, bst_target_t group_idx) {\n  auto& h_tree_info = this->tree_info.HostVector();\n  for (auto& new_tree : new_trees) {\n    trees.push_back(std::move(new_tree));\n    h_tree_info.push_back(group_idx);\n  }\n  param.num_trees += static_cast<int>(new_trees.size());\n}\n\ncommon::Span<bst_target_t const> GBTreeModel::TreeGroups(DeviceOrd device) const {\n  return device.IsCPU() ? this->tree_info.ConstHostSpan()\n                        : (this->tree_info.SetDevice(device), this->tree_info.ConstDeviceSpan());\n}\n}  // namespace xgboost::gbm\n"
  },
  {
    "path": "src/gbm/gbtree_model.h",
    "content": "/**\n * Copyright 2017-2025, XGBoost Contributors\n *\n * @file gbtree_model.h\n */\n#ifndef XGBOOST_GBM_GBTREE_MODEL_H_\n#define XGBOOST_GBM_GBTREE_MODEL_H_\n\n#include <dmlc/parameter.h>\n\n#include <memory>\n#include <mutex>\n#include <string>\n#include <vector>\n\n#include \"../common/threading_utils.h\"\n#include \"../data/cat_container.h\"  // for CatContainer\n#include \"xgboost/context.h\"\n#include \"xgboost/learner.h\"\n#include \"xgboost/model.h\"\n#include \"xgboost/tree_model.h\"\n\nnamespace xgboost {\n\nclass Json;\n\nnamespace gbm {\n/**\n * @brief Container for all trees built (not update) for one group.\n */\nusing TreesOneGroup = std::vector<std::unique_ptr<RegTree>>;\n/**\n * @brief Container for all trees built (not update) for one iteration.\n */\nusing TreesOneIter = std::vector<TreesOneGroup>;\n\n/** @brief GBTree model parameters. */\nstruct GBTreeModelParam : public dmlc::Parameter<GBTreeModelParam> {\n public:\n  /**\n   * @brief The number of trees.\n   */\n  std::int32_t num_trees{0};\n  /**\n   * @brief Number of trees for a single forest.\n   */\n  std::int32_t num_parallel_tree{1};\n\n  GBTreeModelParam() = default;\n\n  // declare parameters, only declare those that need to be set.\n  DMLC_DECLARE_PARAMETER(GBTreeModelParam) {\n    DMLC_DECLARE_FIELD(num_trees).set_lower_bound(0).set_default(0).describe(\n        \"Number of trees for the entire booster model.\");\n    DMLC_DECLARE_FIELD(num_parallel_tree)\n        .set_default(1)\n        .set_lower_bound(1)\n        .describe(\n            \"Number of parallel trees constructed during each iteration.\"\n            \" This option is used to support boosted random forest.\");\n  }\n};\n\nstruct GBTreeModel : public Model {\n public:\n  explicit GBTreeModel(LearnerModelParam const* learner_model, Context const* ctx)\n      : learner_model_param{learner_model}, ctx_{ctx} {}\n  void Configure(Args const& cfg) {\n    // initialize model parameters if not yet been initialized.\n    if (trees.size() == 0) {\n      param.UpdateAllowUnknown(cfg);\n    }\n  }\n  /** @brief Move existing trees into the update queue. */\n  void InitTreesToUpdate();\n\n  void SaveModel(Json* p_out) const override;\n  void LoadModel(Json const& p_out) override;\n\n  [[nodiscard]] std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats,\n                                                   int32_t n_threads, std::string format) const {\n    std::vector<std::string> dump(trees.size());\n    common::ParallelFor(trees.size(), n_threads,\n                        [&](size_t i) { dump[i] = trees[i]->DumpModel(fmap, with_stats, format); });\n    return dump;\n  }\n  /**\n   * @brief Add trees to the model.\n   *\n   * @return The number of new trees.\n   */\n  bst_tree_t CommitModel(TreesOneIter&& new_trees);\n\n  void CommitModelGroup(TreesOneGroup&& new_trees, bst_target_t group_idx);\n\n  [[nodiscard]] std::int32_t BoostedRounds() const {\n    if (trees.empty()) {\n      CHECK_EQ(iteration_indptr.size(), 1);\n    }\n    return static_cast<std::int32_t>(iteration_indptr.size() - 1);\n  }\n\n  /** @brief Global model properties. */\n  LearnerModelParam const* learner_model_param;\n  /** @brief GBTree model parameters. */\n  GBTreeModelParam param;\n  /*! \\brief vector of trees stored in the model */\n  std::vector<std::unique_ptr<RegTree>> trees;\n  /*! \\brief for the update process, a place to keep the initial trees */\n  std::vector<std::unique_ptr<RegTree>> trees_to_update;\n  /**\n   * @brief Group index for trees.\n   */\n  HostDeviceVector<bst_target_t> tree_info;\n  /**\n   * @brief Number of trees accumulated for each iteration.\n   */\n  std::vector<bst_tree_t> iteration_indptr{0};\n\n  [[nodiscard]] CatContainer const* Cats() const { return this->cats_.get(); }\n  [[nodiscard]] CatContainer* Cats() { return this->cats_.get(); }\n  [[nodiscard]] std::shared_ptr<CatContainer> CatsShared() const { return this->cats_; }\n  void Cats(std::shared_ptr<CatContainer> cats) { this->cats_ = cats; }\n\n  auto const* Ctx() const { return this->ctx_; }\n  /**\n   * @brief Getter for the tree group index.\n   */\n  common::Span<bst_target_t const> TreeGroups(DeviceOrd device) const;\n  [[nodiscard]] std::mutex& Mutex() const { return tree_view_mu_; }\n\n private:\n  /**\n   * @brief Categories in the training data.\n   */\n  std::shared_ptr<CatContainer> cats_{std::make_shared<CatContainer>()};\n  mutable std::mutex tree_view_mu_;\n  Context const* ctx_;\n};\n}  // namespace gbm\n}  // namespace xgboost\n\n#endif  // XGBOOST_GBM_GBTREE_MODEL_H_\n"
  },
  {
    "path": "src/global_config.cc",
    "content": "/**\n * Copyright 2020-2025, XGBoost Contributors\n * \\file global_config.cc\n * \\brief Global configuration for XGBoost\n * \\author Hyunsu Cho\n */\n\n#include \"xgboost/global_config.h\"\n\n#include <dmlc/thread_local.h>\n\n#include \"common/cuda_rt_utils.h\"  // for SetDevice\n\nnamespace xgboost {\nDMLC_REGISTER_PARAMETER(GlobalConfiguration);\n\nInitNewThread::InitNewThread()\n    : config{*GlobalConfigThreadLocalStore::Get()}, device{curt::CurrentDevice(false)} {}\n\nvoid InitNewThread::operator()() const {\n  *GlobalConfigThreadLocalStore::Get() = config;\n  if (config.nthread > 0) {\n    omp_set_num_threads(config.nthread);\n  }\n  if (device >= 0) {\n    curt::SetDevice(this->device);\n  }\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "src/learner.cc",
    "content": "/**\n * Copyright 2014-2026, XGBoost Contributors\n * \\file learner.cc\n * \\brief Implementation of learning algorithm.\n * \\author Tianqi Chen\n */\n#include \"xgboost/learner.h\"\n\n#include <dmlc/io.h>            // for Stream\n#include <dmlc/parameter.h>     // for FieldEntry, DMLC_DECLARE_FIELD, Parameter, DMLC...\n#include <dmlc/thread_local.h>  // for ThreadLocalStore\n\n#include <algorithm>      // for equal, max, transform, sort, find_if, all_of\n#include <atomic>         // for atomic\n#include <cctype>         // for isalpha, isspace\n#include <cmath>          // for isnan, isinf\n#include <cstdint>        // for int32_t, uint32_t, int64_t, uint64_t\n#include <cstdlib>        // for atoi\n#include <cstring>        // for memcpy, size_t, memset\n#include <iomanip>        // for operator<<, setiosflags\n#include <iterator>       // for back_insert_iterator, distance, back_inserter\n#include <limits>         // for numeric_limits\n#include <memory>         // for allocator, unique_ptr, shared_ptr, operator==\n#include <mutex>          // for mutex, lock_guard\n#include <sstream>        // for operator<<, basic_ostream, basic_ostream::opera...\n#include <stack>          // for stack\n#include <string>         // for basic_string, char_traits, operator<, string\n#include <system_error>   // for errc\n#include <unordered_map>  // for operator!=, unordered_map\n#include <utility>        // for pair, as_const, move, swap\n#include <vector>         // for vector\n\n#include \"collective/aggregator.h\"        // for ApplyWithLabels\n#include \"collective/communicator-inl.h\"  // for Allreduce, Broadcast, GetRank, IsDistributed\n#include \"common/api_entry.h\"             // for XGBAPIThreadLocalEntry\n#include \"common/charconv.h\"              // for to_chars, to_chars_result, NumericLimits, from_...\n#include \"common/error_msg.h\"             // for MaxFeatureSize, WarnOldSerialization, ...\n#include \"common/io.h\"                    // for PeekableInStream, ReadAll, FixedSizeStream, Mem...\n#include \"common/observer.h\"              // for TrainingObserver\n#include \"common/param_array.h\"           // for ParamArray\n#include \"common/timer.h\"                 // for Monitor\n#include \"common/version.h\"               // for Version\n#include \"xgboost/base.h\"                 // for Args, GradientPair, bst_feature_t\n#include \"xgboost/context.h\"              // for Context\n#include \"xgboost/data.h\"                 // for DMatrix, MetaInfo\n#include \"xgboost/gbm.h\"                  // for GradientBooster\n#include \"xgboost/global_config.h\"        // for GlobalConfiguration, GlobalConfigThreadLocalStore\n#include \"xgboost/host_device_vector.h\"   // for HostDeviceVector\n#include \"xgboost/json.h\"                 // for Json, get, Object, String, IsA, Array, ToJson\n#include \"xgboost/linalg.h\"               // for Vector, VectorView\n#include \"xgboost/logging.h\"              // for CHECK, LOG, CHECK_EQ\n#include \"xgboost/metric.h\"               // for Metric\n#include \"xgboost/objective.h\"            // for ObjFunction\n#include \"xgboost/parameter.h\"            // for DECLARE_FIELD_ENUM_CLASS, XGBoostParameter\n#include \"xgboost/predictor.h\"            // for PredictionContainer, PredictionCacheEntry\n#include \"xgboost/string_view.h\"          // for operator<<, StringView\n#include \"xgboost/task.h\"                 // for ObjInfo\n\nnamespace {\nconst char* kMaxDeltaStepDefaultValue = \"0.7\";\n}  // anonymous namespace\n\nDECLARE_FIELD_ENUM_CLASS(xgboost::MultiStrategy);\n\nnamespace xgboost {\nLearner::~Learner() = default;\nnamespace {\nStringView ModelNotFitted() { return \"Model is not yet initialized (not fitted).\"; }\n\ntemplate <typename T>\nT& UsePtr(T& ptr) {  // NOLINT\n  CHECK(ptr);\n  return ptr;\n}\n}  // anonymous namespace\n\n/*! \\brief training parameter for regression\n *\n * Should be deprecated, but still used for being compatible with binary IO.\n * Once it's gone, `LearnerModelParam` should handle transforming `base_score`\n * with objective by itself.\n */\nstruct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy> {\n  /** @brief Global bias/intercept. */\n  common::ParamArray<float> base_score{\"base_score\"};\n  /** @brief number of features  */\n  bst_feature_t num_feature{0};\n  /** @brief number of classes, if it is multi-class classification, 0 otherwise.  */\n  std::int32_t num_class{0};\n  /**! @brief the version of XGBoost. */\n  std::int32_t major_version{std::get<0>(Version::Self())};\n  std::int32_t minor_version{std::get<1>(Version::Self())};\n  /**\n   * @brief Number of target variables.\n   */\n  bst_target_t num_target{1};\n  /**\n   * @brief Whether we should calculate the base score from training data.\n   *\n   *   This is a private parameter as we can't expose it as boolean due to binary model\n   *   format. Exposing it as integer creates inconsistency with other parameters.\n   *\n   *   Automatically disabled when base_score is specifed by user. int32 is used instead\n   *   of bool for the ease of serialization.\n   */\n  std::int32_t boost_from_average{true};\n\n  LearnerModelParamLegacy() = default;\n\n  [[nodiscard]] Json ToJson() const {\n    Json obj{Object{}};\n    std::stringstream ss;\n    ss << base_score;\n    obj[\"base_score\"] = ss.str();\n\n    char integers[NumericLimits<int64_t>::kToCharsSize];\n    auto ret = to_chars(integers, integers + NumericLimits<int64_t>::kToCharsSize,\n                        static_cast<int64_t>(num_feature));\n    CHECK(ret.ec == std::errc());\n    obj[\"num_feature\"] =\n        std::string{integers, static_cast<size_t>(std::distance(integers, ret.ptr))};\n    ret = to_chars(integers, integers + NumericLimits<int64_t>::kToCharsSize,\n                   static_cast<int64_t>(num_class));\n    CHECK(ret.ec == std::errc());\n    obj[\"num_class\"] = std::string{integers, static_cast<size_t>(std::distance(integers, ret.ptr))};\n\n    ret = to_chars(integers, integers + NumericLimits<int64_t>::kToCharsSize,\n                   static_cast<int64_t>(num_target));\n    obj[\"num_target\"] =\n        std::string{integers, static_cast<size_t>(std::distance(integers, ret.ptr))};\n\n    ret = to_chars(integers, integers + NumericLimits<std::int64_t>::kToCharsSize,\n                   static_cast<std::int64_t>(boost_from_average));\n    obj[\"boost_from_average\"] =\n        std::string{integers, static_cast<std::size_t>(std::distance(integers, ret.ptr))};\n\n    return obj;\n  }\n  void FromJson(Json const& obj) {\n    auto const& j_param = get<Object const>(obj);\n    std::map<std::string, std::string> m;\n    m[\"num_feature\"] = get<String const>(j_param.at(\"num_feature\"));\n    m[\"num_class\"] = get<String const>(j_param.at(\"num_class\"));\n    auto n_targets_it = j_param.find(\"num_target\");\n    if (n_targets_it != j_param.cend()) {\n      m[\"num_target\"] = get<String const>(n_targets_it->second);\n    }\n    auto bse_it = j_param.find(\"boost_from_average\");\n    if (bse_it != j_param.cend()) {\n      m[\"boost_from_average\"] = get<String const>(bse_it->second);\n    }\n    std::string str = get<String const>(j_param.at(\"base_score\"));\n    m[\"base_score\"] = str;\n    this->Init(m);\n    this->HandleOldFormat();\n  }\n  // Handle old model formats, before 3.1, the intercept was always a scalar.\n  void HandleOldFormat() {\n    if (this->base_score.size() == 1 && this->OutputLength() > 1) {\n      this->base_score.Resize(this->OutputLength(), this->base_score[0]);\n    }\n  }\n\n  template <typename Container>\n  Args UpdateAllowUnknown(Container const& kwargs) {\n    // Detect whether user has made their own base score.\n    auto has_key = [&kwargs](char const* key) {\n      return std::find_if(kwargs.cbegin(), kwargs.cend(),\n                          [key](auto const& kv) { return kv.first == key; }) != kwargs.cend();\n    };\n    if (has_key(\"base_score\")) {\n      this->boost_from_average = false;\n    }\n    return dmlc::Parameter<LearnerModelParamLegacy>::UpdateAllowUnknown(kwargs);\n  }\n  // The number of outputs of the model.\n  [[nodiscard]] bst_target_t OutputLength() const noexcept {\n    return std::max({this->num_target, static_cast<bst_target_t>(this->num_class),\n                     static_cast<bst_target_t>(1)});\n  }\n\n  // Sanity checks\n  void Validate(Context const* ctx) const {\n    this->ValidateLength();\n    CHECK(std::none_of(base_score.cbegin(), base_score.cend(),\n                       [](float v) { return std::isnan(v) || std::isinf(v); }));\n\n    if (!collective::IsDistributed()) {\n      return;\n    }\n\n    std::vector<char> data;\n    Json::Dump(this->ToJson(), &data, std::ios::binary);\n    std::vector<char> sync{data};\n\n    auto rc = collective::Broadcast(ctx, linalg::MakeVec(sync.data(), sync.size()), 0);\n    collective::SafeColl(rc);\n\n    CHECK(std::equal(data.cbegin(), data.cend(), sync.cbegin()))\n        << \"Different model parameter across workers:\\n\\t\"\n        << Json::Load(StringView{data.data(), data.size()}, std::ios::binary) << \"\\nvs.\\n\\t\"\n        << Json::Load(StringView{sync.data(), sync.size()}, std::ios::binary);\n  }\n\n  void ValidateLength() const {\n    CHECK_GE(this->base_score.size(), 1);\n    std::size_t n_classes = static_cast<std::size_t>(num_class),\n                n_targets = static_cast<std::size_t>(num_target);\n    if (!(base_score.size() == n_classes || base_score.size() == n_targets)) {\n      error::InvalidIntercept(n_classes, n_targets, base_score.size());\n    }\n  }\n\n  // declare parameters\n  DMLC_DECLARE_PARAMETER(LearnerModelParamLegacy) {\n    DMLC_DECLARE_FIELD(base_score)\n        .describe(\"Global bias of the model.\")\n        .set_default(common::ParamArray<float>{\"base_score\"});\n    DMLC_DECLARE_FIELD(num_feature)\n        .set_default(0)\n        .describe(\n            \"Number of features in training data, this parameter will be automatically detected by \"\n            \"learner.\");\n    DMLC_DECLARE_FIELD(num_class).set_default(0).set_lower_bound(0).describe(\n        \"Number of class option for multi-class classifier. \"\n        \" By default equals 0 and corresponds to binary classifier.\");\n    DMLC_DECLARE_FIELD(num_target)\n        .set_default(1)\n        .set_lower_bound(1)\n        .describe(\"Number of output targets. Can be set automatically if not specified.\");\n    DMLC_DECLARE_FIELD(boost_from_average)\n        .set_default(true)\n        .describe(\"Whether we should calculate the base score from training data.\");\n  }\n};\n}  // namespace xgboost\n\nnamespace xgboost {\nLearnerModelParam::LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t,\n                                     MultiStrategy multi_strategy)\n    : num_feature{user_param.num_feature},\n      num_output_group{user_param.OutputLength()},\n      task{t},\n      multi_strategy{multi_strategy} {\n  if (user_param.num_class > 1 && user_param.num_target > 1) {\n    LOG(FATAL) << \"multi-target-multi-class is not yet supported. Output classes:\"\n               << user_param.num_class << \", output targets:\" << user_param.num_target;\n  }\n}\n\nLearnerModelParam::LearnerModelParam(Context const* ctx, LearnerModelParamLegacy const& user_param,\n                                     linalg::Vector<float> base_score, ObjInfo t,\n                                     MultiStrategy multi_strategy)\n    : LearnerModelParam{user_param, t, multi_strategy} {\n  std::swap(base_score_, base_score);\n  // Make sure read access everywhere for thread-safe prediction.\n  std::as_const(base_score_).HostView();\n  if (!ctx->IsCPU()) {\n    std::as_const(base_score_).View(ctx->Device());\n  }\n  CHECK(std::as_const(base_score_).Data()->HostCanRead());\n}\n\nlinalg::VectorView<float const> LearnerModelParam::BaseScore(DeviceOrd device) const {\n  // multi-class is not yet supported.\n  CHECK_GE(base_score_.Size(), 1) << ModelNotFitted();\n  if (device.IsCPU()) {\n    // Make sure that we won't run into race condition.\n    CHECK(base_score_.Data()->HostCanRead());\n    return base_score_.HostView();\n  }\n  // Make sure that we won't run into race condition.\n  CHECK(base_score_.Data()->DeviceCanRead());\n  auto v = base_score_.View(device);\n  CHECK(base_score_.Data()->HostCanRead());  // make sure read access is not removed.\n  return v;\n}\n\nlinalg::VectorView<float const> LearnerModelParam::BaseScore(Context const* ctx) const {\n  return this->BaseScore(ctx->Device());\n}\n\nvoid LearnerModelParam::Copy(LearnerModelParam const& that) {\n  base_score_.Reshape(that.base_score_.Shape());\n  base_score_.Data()->SetDevice(that.base_score_.Device());\n  base_score_.Data()->Copy(*that.base_score_.Data());\n  std::as_const(base_score_).HostView();\n  if (!that.base_score_.Device().IsCPU()) {\n    std::as_const(base_score_).View(that.base_score_.Device());\n  }\n  CHECK_EQ(base_score_.Data()->DeviceCanRead(), that.base_score_.Data()->DeviceCanRead());\n  CHECK(base_score_.Data()->HostCanRead());\n\n  num_feature = that.num_feature;\n  num_output_group = that.num_output_group;\n  task = that.task;\n  multi_strategy = that.multi_strategy;\n}\n\nstruct LearnerTrainParam : public XGBoostParameter<LearnerTrainParam> {\n  // flag to disable default metric\n  bool disable_default_eval_metric{false};\n  // FIXME(trivialfis): The following parameters belong to model itself, but can be\n  // specified by users.  Move them to model parameter once we can get rid of binary IO.\n  std::string booster;\n  std::string objective;\n  // This is a training parameter and is not saved (nor loaded) in the model.\n  MultiStrategy multi_strategy{MultiStrategy::kOneOutputPerTree};\n\n  // declare parameters\n  DMLC_DECLARE_PARAMETER(LearnerTrainParam) {\n    DMLC_DECLARE_FIELD(disable_default_eval_metric)\n        .set_default(false)\n        .describe(\"Flag to disable default metric. Set to >0 to disable\");\n    DMLC_DECLARE_FIELD(booster).set_default(\"gbtree\").describe(\n        \"Gradient booster used for training.\");\n    DMLC_DECLARE_FIELD(objective)\n        .set_default(\"reg:squarederror\")\n        .describe(\"Objective function used for obtaining gradient.\");\n    DMLC_DECLARE_FIELD(multi_strategy)\n        .add_enum(\"one_output_per_tree\", MultiStrategy::kOneOutputPerTree)\n        .add_enum(\"multi_output_tree\", MultiStrategy::kMultiOutputTree)\n        .set_default(MultiStrategy::kOneOutputPerTree)\n        .describe(\n            \"Strategy used for training multi-target models. `multi_output_tree` means building \"\n            \"one single tree for all targets.\");\n  }\n};\n\nDMLC_REGISTER_PARAMETER(LearnerModelParamLegacy);\nDMLC_REGISTER_PARAMETER(LearnerTrainParam);\n\nusing LearnerAPIThreadLocalStore =\n    dmlc::ThreadLocalStore<std::map<Learner const*, XGBAPIThreadLocalEntry>>;\n\nnamespace {\nstd::string CanonicalizeBoosterName(std::string booster) {\n  if (booster == \"dart\") {\n    static std::once_flag flag;\n    std::call_once(flag, [] {\n      LOG(WARNING) << \"`booster=dart` is deprecated. Use the tree booster directly with \"\n                      \"dropout parameters like `rate_drop`, `skip_drop`, or `one_drop`.\";\n    });\n    return \"gbtree\";\n  }\n  return booster;\n}\n\n/**\n * @brief Handler for the `n_targets` property and the intercept.\n */\nclass Intercept : public Learner {\n  using CacheT = common::GetValueT<decltype(std::declval<PredictionContainer>().Container())>;\n\n protected:\n  /**\n   * @brief User-provided model parameter.\n   *\n   * This parameter is the most difficult one in XGBoost. It stores basic properties of\n   * the booster model and is saved as part of the booster. We need to configure it\n   * automatically from input training data while taking user-provided parameters into\n   * account.\n   *\n   * It's difficult because XGBoost has an interface that exposes many states. For\n   * instance, we need to have a valid model after configuration, without seeing the\n   * training data. This exposes a partially initialized model that's semi-valid.\n   */\n  LearnerModelParamLegacy mparam_;\n  /**\n   * @brief Internal model parameter.\n   */\n  LearnerModelParam learner_model_param_;\n\n private:\n  void InitEstimation(MetaInfo const& info, linalg::Vector<float>* base_score) {\n    base_score->SetDevice(this->Ctx()->Device());\n    base_score->Reshape(this->mparam_.OutputLength());\n    collective::ApplyWithLabels(this->Ctx(), info, base_score->Data(),\n                                [&] { UsePtr(obj_)->InitEstimation(info, base_score); });\n  }\n\n  [[nodiscard]] bool NeedFit() const {\n    return this->mparam_.boost_from_average && !UsePtr(gbm_)->ModelFitted();\n  }\n\n  // Create the internal model parameter from user inputs, this requires the user input to\n  // be initialized first.\n  //\n  // Don't apply the link function if the base_score is a dummy value.\n  //\n  // This function should be called for every `Configure` call ot make sure the base_score\n  // is stored in the right place.\n  void InitModelParam(LearnerTrainParam const& tparam, bool apply_link) {\n    auto const& in = this->mparam_.base_score;\n    auto task = UsePtr(this->obj_)->Task();\n    linalg::Vector<float> base_score{in.cbegin(), in.cend(), {in.size()}, this->ctx_.Device()};\n    if (apply_link) {\n      UsePtr(this->obj_)->ProbToMargin(&base_score);\n    }\n\n    learner_model_param_ =\n        LearnerModelParam{Ctx(), mparam_, std::move(base_score), task, tparam.multi_strategy};\n  }\n\n  /**\n   * Get the number of targets from the cache using the objective function.\n   */\n  void GetNumTargets(CacheT const& cache) {\n    CHECK(this->obj_);\n    bst_target_t n_targets = 1;\n    for (auto const& d : cache) {\n      if (n_targets == 1) {\n        n_targets = this->obj_->Targets(d.first.ptr->Info());\n      } else {\n        auto t = this->obj_->Targets(d.first.ptr->Info());\n        CHECK(n_targets == t || 1 == t) << \"Inconsistent labels.\";\n      }\n    }\n\n    if (mparam_.num_target > 1) {\n      CHECK(n_targets == 1 || n_targets == mparam_.num_target)\n          << \"Inconsistent configuration of the `num_target`.  Configuration result from input \"\n          << \"data:\" << n_targets << \", configuration from parameters:\" << mparam_.num_target;\n    } else {\n      mparam_.num_target = n_targets;\n    }\n  }\n\n protected:\n  void CheckModelInitialized() const {\n    CHECK(learner_model_param_.Initialized()) << ModelNotFitted();\n    CHECK_NE(learner_model_param_.BaseScore(this->Ctx()).Size(), 0) << ModelNotFitted();\n  }\n\n  void InitModelUserParam(LearnerTrainParam const& tparam, CacheT const& cache) {\n    this->GetNumTargets(cache);\n\n    if (this->NeedFit()) {\n      // Initialize with a sensible default value to get prediction/model io going.\n      this->mparam_.base_score.Resize(this->mparam_.OutputLength(),\n                                      ObjFunction::DefaultBaseScore());\n      this->InitModelParam(tparam, false);\n      // This should not be altered, we will estimate it later.\n      CHECK(this->NeedFit());\n    } else if (this->gbm_->ModelFitted()) {\n      this->mparam_.ValidateLength();\n      // Init with a valid (configured) mparam\n      this->InitModelParam(tparam, true);\n    } else {\n      // user-provided\n      this->mparam_.HandleOldFormat();\n      this->InitModelParam(tparam, true);\n    }\n  }\n\n  /**\n   * @brief Calculate the `base_score` based on input data.\n   *\n   * @param p_fmat The training DMatrix used to estimate the base score.\n   */\n  void FitIntercept(LearnerTrainParam const& tparam, DMatrix const* p_fmat) {\n    // Estimate the intercept if this is the first iteration.\n    if (this->NeedFit()) {\n      // The DMatrix can be null if a method other than training is called.\n      if (p_fmat) {\n        auto const& info = p_fmat->Info();\n        info.Validate(Ctx()->Device());\n        // We estimate it from the input data.\n        linalg::Vector<float> base_score;\n        this->InitEstimation(info, &base_score);\n\n        mparam_.base_score = base_score.Data()->ConstHostVector();\n      }\n      this->InitModelParam(tparam, true);\n      // Check whether the base score is valid.\n      mparam_.Validate(&ctx_);\n    }\n\n    this->CheckModelInitialized();\n  }\n};\n}  // namespace\n\nclass LearnerConfiguration : public Intercept {\n private:\n  std::mutex config_lock_;\n\n protected:\n  static std::string const kEvalMetric;  // NOLINT\n\n protected:\n  std::atomic<bool> need_configuration_;\n  std::map<std::string, std::string> cfg_;\n  // Stores information like best-iteration for early stopping.\n  std::map<std::string, std::string> attributes_;\n  // Name of each feature, usually set from DMatrix.\n  std::vector<std::string> feature_names_;\n  // Type of each feature, usually set from DMatrix.\n  std::vector<std::string> feature_types_;\n\n  common::Monitor monitor_;\n  LearnerTrainParam tparam_;\n  // Initial prediction.\n  PredictionContainer prediction_container_;\n\n  std::vector<std::string> metric_names_;\n\n public:\n  explicit LearnerConfiguration(std::vector<std::shared_ptr<DMatrix>> cache)\n      : need_configuration_{true} {\n    monitor_.Init(\"Learner\");\n    for (std::shared_ptr<DMatrix> const& d : cache) {\n      if (d) {\n        prediction_container_.Cache(d, DeviceOrd::CPU());\n      }\n    }\n  }\n\n  // Configuration before data is known.\n  void Configure() override {\n    // Varient of double checked lock\n    if (!this->need_configuration_) {\n      return;\n    }\n    std::lock_guard<std::mutex> guard(config_lock_);\n    if (!this->need_configuration_) {\n      return;\n    }\n\n    monitor_.Start(\"Configure\");\n    auto old_tparam = tparam_;\n    Args args = {cfg_.cbegin(), cfg_.cend()};\n\n    tparam_.UpdateAllowUnknown(args);\n    mparam_.UpdateAllowUnknown(args);\n\n    auto initialized = ctx_.GetInitialised();\n    auto old_seed = ctx_.seed;\n    ctx_.UpdateAllowUnknown(args);\n\n    ConsoleLogger::Configure(args);\n\n    // set seed only before the model is initialized\n    if (!initialized || ctx_.seed != old_seed) {\n      ctx_.Rng().seed(ctx_.seed);\n    }\n\n    // must precede configure gbm since num_features is required for gbm\n    this->ConfigureNumFeatures();\n    args = {cfg_.cbegin(), cfg_.cend()};  // renew\n    this->ConfigureObjective(old_tparam, &args);\n\n    learner_model_param_.task = obj_->Task();  // required by gbm configuration.\n    this->ConfigureGBM(old_tparam, args);\n\n    this->InitModelUserParam(this->tparam_, this->prediction_container_.Container());\n\n    this->ConfigureMetrics(args);\n\n    this->need_configuration_ = false;\n    if (ctx_.validate_parameters) {\n      this->ValidateParameters();\n    }\n\n    cfg_.clear();\n    monitor_.Stop(\"Configure\");\n  }\n\n  void LoadConfig(Json const& in) override {\n    // If configuration is loaded, ensure that the model came from the same version\n    CHECK(IsA<Object>(in));\n    auto origin_version = Version::Load(in);\n    if (std::get<0>(Version::kInvalid) == std::get<0>(origin_version)) {\n      LOG(WARNING) << \"Invalid version string in config\";\n    }\n\n    if (!Version::Same(origin_version)) {\n      error::WarnOldSerialization();\n      return;  // skip configuration if version is not matched\n    }\n\n    auto const& learner_parameters = get<Object>(in[\"learner\"]);\n    FromJson(learner_parameters.at(\"learner_train_param\"), &tparam_);\n\n    auto const& gradient_booster = learner_parameters.at(\"gradient_booster\");\n\n    auto const& objective_fn = learner_parameters.at(\"objective\");\n    if (!obj_) {\n      CHECK_EQ(get<String const>(objective_fn[\"name\"]), tparam_.objective);\n      obj_.reset(ObjFunction::Create(tparam_.objective, &ctx_));\n    }\n    obj_->LoadConfig(objective_fn);\n    learner_model_param_.task = obj_->Task();\n\n    tparam_.booster = CanonicalizeBoosterName(get<String>(gradient_booster[\"name\"]));\n    if (!gbm_) {\n      gbm_.reset(GradientBooster::Create(tparam_.booster, &ctx_, &learner_model_param_));\n    }\n    gbm_->LoadConfig(gradient_booster);\n\n    auto const& j_metrics = learner_parameters.at(\"metrics\");\n    auto n_metrics = get<Array const>(j_metrics).size();\n    metric_names_.resize(n_metrics);\n    metrics_.resize(n_metrics);\n    for (size_t i = 0; i < n_metrics; ++i) {\n      auto old_serialization = IsA<String>(j_metrics[i]);\n      if (old_serialization) {\n        error::WarnOldSerialization();\n        metric_names_[i] = get<String>(j_metrics[i]);\n      } else {\n        metric_names_[i] = get<String>(j_metrics[i][\"name\"]);\n      }\n      metrics_[i] = std::unique_ptr<Metric>(Metric::Create(metric_names_[i], &ctx_));\n      if (!old_serialization) {\n        metrics_[i]->LoadConfig(j_metrics[i]);\n      }\n    }\n\n    ctx_.FromJson(learner_parameters.at(\"generic_param\"));\n\n    this->need_configuration_ = true;\n  }\n\n  void SaveConfig(Json* p_out) const override {\n    CHECK(!this->need_configuration_) << \"Call Configure before saving model.\";\n    Version::Save(p_out);\n    Json& out{*p_out};\n    // parameters\n    out[\"learner\"] = Object();\n    auto& learner_parameters = out[\"learner\"];\n\n    learner_parameters[\"learner_train_param\"] = ToJson(tparam_);\n    learner_parameters[\"learner_model_param\"] = mparam_.ToJson();\n    learner_parameters[\"gradient_booster\"] = Object();\n    auto& gradient_booster = learner_parameters[\"gradient_booster\"];\n    gbm_->SaveConfig(&gradient_booster);\n\n    learner_parameters[\"objective\"] = Object();\n    auto& objective_fn = learner_parameters[\"objective\"];\n    obj_->SaveConfig(&objective_fn);\n\n    std::vector<Json> metrics(metrics_.size());\n    for (size_t i = 0; i < metrics_.size(); ++i) {\n      metrics[i] = Object{};\n      metrics_[i]->SaveConfig(&metrics[i]);\n    }\n    learner_parameters[\"metrics\"] = Array(std::move(metrics));\n\n    learner_parameters[\"generic_param\"] = ctx_.ToJson();\n  }\n\n  void SetParam(const std::string& key, const std::string& value) override {\n    this->need_configuration_ = true;\n    if (key == kEvalMetric) {\n      if (std::find(metric_names_.cbegin(), metric_names_.cend(), value) == metric_names_.cend()) {\n        metric_names_.emplace_back(value);\n      }\n    } else {\n      cfg_[key] = value;\n    }\n  }\n  // Short hand for setting multiple parameters\n  void SetParams(std::vector<std::pair<std::string, std::string>> const& args) override {\n    for (auto const& kv : args) {\n      this->SetParam(kv.first, kv.second);\n    }\n  }\n\n  uint32_t GetNumFeature() const override { return learner_model_param_.num_feature; }\n\n  void SetAttr(const std::string& key, const std::string& value) override {\n    attributes_[key] = value;\n  }\n\n  bool GetAttr(const std::string& key, std::string* out) const override {\n    auto it = attributes_.find(key);\n    if (it == attributes_.end()) return false;\n    *out = it->second;\n    return true;\n  }\n\n  bool DelAttr(const std::string& key) override {\n    auto it = attributes_.find(key);\n    if (it == attributes_.end()) {\n      return false;\n    }\n    attributes_.erase(it);\n    return true;\n  }\n\n  void SetFeatureNames(std::vector<std::string> const& fn) override { feature_names_ = fn; }\n\n  void GetFeatureNames(std::vector<std::string>* fn) const override { *fn = feature_names_; }\n\n  void SetFeatureTypes(std::vector<std::string> const& ft) override { this->feature_types_ = ft; }\n\n  void GetFeatureTypes(std::vector<std::string>* p_ft) const override {\n    auto& ft = *p_ft;\n    ft = this->feature_types_;\n  }\n  [[nodiscard]] CatContainer const* Cats() const override {\n    this->CheckModelInitialized();\n    return this->gbm_->Cats();\n  }\n\n  std::vector<std::string> GetAttrNames() const override {\n    std::vector<std::string> out;\n    for (auto const& kv : attributes_) {\n      out.emplace_back(kv.first);\n    }\n    return out;\n  }\n\n  const std::map<std::string, std::string>& GetConfigurationArguments() const override {\n    return cfg_;\n  }\n\n  Context const* Ctx() const override { return &ctx_; }\n\n private:\n  void ValidateParameters() {\n    Json config{Object()};\n    this->SaveConfig(&config);\n    std::stack<Json> stack;\n    stack.push(config);\n    std::string const postfix{\"_param\"};\n\n    auto is_parameter = [&postfix](std::string const& key) {\n      return key.size() > postfix.size() &&\n             std::equal(postfix.rbegin(), postfix.rend(), key.rbegin());\n    };\n\n    // Extract all parameters\n    std::vector<std::string> keys;\n    // First global parameters\n    Json const global_config{ToJson(*GlobalConfigThreadLocalStore::Get())};\n    for (auto const& items : get<Object const>(global_config)) {\n      keys.emplace_back(items.first);\n    }\n    // Parameters in various xgboost components.\n    while (!stack.empty()) {\n      auto j_obj = stack.top();\n      stack.pop();\n      auto const& obj = get<Object const>(j_obj);\n\n      for (auto const& kv : obj) {\n        if (is_parameter(kv.first)) {\n          auto parameter = get<Object const>(kv.second);\n          std::transform(\n              parameter.begin(), parameter.end(), std::back_inserter(keys),\n              [](std::pair<std::string const&, Json const&> const& kv) { return kv.first; });\n        } else if (IsA<Object>(kv.second)) {\n          stack.push(kv.second);\n        } else if (IsA<Array>(kv.second)) {\n          auto const& array = get<Array const>(kv.second);\n          for (auto const& v : array) {\n            if (IsA<Object>(v) || IsA<Array>(v)) {\n              stack.push(v);\n            }\n          }\n        }\n      }\n    }\n\n    // FIXME(trivialfis): Make eval_metric a training parameter.\n    keys.emplace_back(kEvalMetric);\n    keys.emplace_back(\"num_output_group\");\n\n    std::sort(keys.begin(), keys.end());\n\n    std::vector<std::string> provided;\n    for (auto const& kv : cfg_) {\n      if (std::any_of(kv.first.cbegin(), kv.first.cend(),\n                      [](char ch) { return std::isspace(ch); })) {\n        LOG(FATAL) << \"Invalid parameter \\\"\" << kv.first << \"\\\" contains whitespace.\";\n      }\n      provided.push_back(kv.first);\n    }\n    std::sort(provided.begin(), provided.end());\n\n    std::vector<std::string> diff;\n    std::set_difference(provided.begin(), provided.end(), keys.begin(), keys.end(),\n                        std::back_inserter(diff));\n    if (diff.size() != 0) {\n      std::stringstream ss;\n      ss << \"\\nParameters: { \";\n      for (size_t i = 0; i < diff.size() - 1; ++i) {\n        ss << \"\\\"\" << diff[i] << \"\\\", \";\n      }\n      ss << \"\\\"\" << diff.back() << \"\\\"\";\n      ss << R\"W( } are not used.\n)W\";\n      LOG(WARNING) << ss.str();\n    }\n  }\n\n  void ConfigureNumFeatures() {\n    // Compute number of global features if parameter not already set\n    if (mparam_.num_feature == 0) {\n      // TODO(hcho3): Change num_feature to 64-bit integer\n      unsigned num_feature = 0;\n      for (auto const& matrix : prediction_container_.Container()) {\n        CHECK(matrix.first.ptr);\n        CHECK(!matrix.second.ref.expired());\n        const uint64_t num_col = matrix.first.ptr->Info().num_col_;\n        error::MaxFeatureSize(num_col);\n        num_feature = std::max(num_feature, static_cast<uint32_t>(num_col));\n      }\n\n      auto rc =\n          collective::Allreduce(&ctx_, linalg::MakeVec(&num_feature, 1), collective::Op::kMax);\n      collective::SafeColl(rc);\n      if (num_feature > mparam_.num_feature) {\n        mparam_.num_feature = num_feature;\n      }\n    }\n    CHECK_NE(mparam_.num_feature, 0)\n        << \"0 feature is supplied.  Are you using raw Booster interface?\";\n  }\n\n  void ConfigureGBM(LearnerTrainParam const& old, Args const& args) {\n    tparam_.booster = CanonicalizeBoosterName(tparam_.booster);\n    if (tparam_.booster == \"gblinear\") {\n      LOG(WARNING) << \"`booster=gblinear` is deprecated and support will be removed in a future \"\n                      \"release.\";\n    }\n    auto old_booster = CanonicalizeBoosterName(old.booster);\n    if (gbm_ == nullptr || old_booster != tparam_.booster) {\n      gbm_.reset(GradientBooster::Create(tparam_.booster, &ctx_, &learner_model_param_));\n    }\n    gbm_->Configure(args);\n  }\n\n  void ConfigureObjective(LearnerTrainParam const& old, Args* p_args) {\n    // Once binary IO is gone, NONE of these config is useful.\n    if (cfg_.find(\"num_class\") != cfg_.cend() && cfg_.at(\"num_class\") != \"0\" &&\n        tparam_.objective != \"multi:softprob\") {\n      cfg_[\"num_output_group\"] = cfg_[\"num_class\"];\n      if (atoi(cfg_[\"num_class\"].c_str()) > 1 && cfg_.count(\"objective\") == 0) {\n        tparam_.objective = \"multi:softmax\";\n      }\n    }\n\n    if (cfg_.find(\"max_delta_step\") == cfg_.cend() && cfg_.find(\"objective\") != cfg_.cend() &&\n        tparam_.objective == \"count:poisson\") {\n      // max_delta_step is a duplicated parameter in Poisson regression and tree param.\n      // Rename one of them once binary IO is gone.\n      cfg_[\"max_delta_step\"] = kMaxDeltaStepDefaultValue;\n    }\n    if (obj_ == nullptr || tparam_.objective != old.objective) {\n      obj_.reset(ObjFunction::Create(tparam_.objective, &ctx_));\n    }\n\n    bool has_nc{cfg_.find(\"num_class\") != cfg_.cend()};\n    // Inject num_class into configuration.\n    // FIXME(jiamingy): Remove the duplicated parameter in softmax\n    cfg_[\"num_class\"] = std::to_string(mparam_.num_class);\n    auto& args = *p_args;\n    args = {cfg_.cbegin(), cfg_.cend()};  // renew\n    obj_->Configure(args);\n    if (!has_nc) {\n      cfg_.erase(\"num_class\");\n    }\n  }\n\n  void ConfigureMetrics(Args const& args) {\n    for (auto const& name : metric_names_) {\n      auto DupCheck = [&name](std::unique_ptr<Metric> const& m) {\n        return m->Name() != name;\n      };\n      if (std::all_of(metrics_.begin(), metrics_.end(), DupCheck)) {\n        metrics_.emplace_back(std::unique_ptr<Metric>(Metric::Create(name, &ctx_)));\n      }\n    }\n\n    for (auto& p_metric : metrics_) {\n      p_metric->Configure(args);\n    }\n  }\n\n  void InitEstimation(MetaInfo const& info, linalg::Vector<float>* base_score) {\n    base_score->SetDevice(this->Ctx()->Device());\n    base_score->Reshape(this->mparam_.OutputLength());\n    collective::ApplyWithLabels(this->Ctx(), info, base_score->Data(),\n                                [&] { UsePtr(obj_)->InitEstimation(info, base_score); });\n  }\n};\n\nstd::string const LearnerConfiguration::kEvalMetric{\"eval_metric\"};  // NOLINT\n\nclass LearnerIO : public LearnerConfiguration {\n protected:\n  void ClearCaches() { this->prediction_container_ = PredictionContainer{}; }\n\n public:\n  explicit LearnerIO(std::vector<std::shared_ptr<DMatrix>> cache) : LearnerConfiguration{cache} {}\n\n  void LoadModel(Json const& in) override {\n    CHECK(IsA<Object>(in));\n    auto version = Version::Load(in);\n    if (std::get<0>(version) == 1 && std::get<1>(version) < 6) {\n      LOG(WARNING)\n          << \"Found JSON model saved before XGBoost 1.6, please save the model using current \"\n             \"version again. The support for old JSON model will be discontinued in XGBoost 3.2\";\n    }\n\n    auto const& learner = get<Object>(in[\"learner\"]);\n    mparam_.FromJson(learner.at(\"learner_model_param\"));\n\n    auto const& objective_fn = learner.at(\"objective\");\n\n    std::string name = get<String>(objective_fn[\"name\"]);\n    tparam_.UpdateAllowUnknown(Args{{\"objective\", name}});\n    obj_.reset(ObjFunction::Create(name, &ctx_));\n    obj_->LoadConfig(objective_fn);\n\n    auto const& gradient_booster = learner.at(\"gradient_booster\");\n    name = get<String>(gradient_booster[\"name\"]);\n    tparam_.UpdateAllowUnknown(Args{{\"booster\", name}});\n    tparam_.booster = CanonicalizeBoosterName(tparam_.booster);\n    gbm_.reset(GradientBooster::Create(tparam_.booster, &ctx_, &learner_model_param_));\n    gbm_->LoadModel(gradient_booster);\n\n    auto const& j_attributes = get<Object const>(learner.at(\"attributes\"));\n    attributes_.clear();\n    for (auto const& kv : j_attributes) {\n      attributes_[kv.first] = get<String const>(kv.second);\n    }\n\n    // feature names and types are saved in xgboost 1.4\n    auto it = learner.find(\"feature_names\");\n    if (it != learner.cend()) {\n      auto const& feature_names = get<Array const>(it->second);\n      feature_names_.resize(feature_names.size());\n      std::transform(feature_names.cbegin(), feature_names.cend(), feature_names_.begin(),\n                     [](Json const& fn) { return get<String const>(fn); });\n    }\n    it = learner.find(\"feature_types\");\n    if (it != learner.cend()) {\n      auto const& feature_types = get<Array const>(it->second);\n      feature_types_.resize(feature_types.size());\n      std::transform(feature_types.cbegin(), feature_types.cend(), feature_types_.begin(),\n                     [](Json const& fn) { return get<String const>(fn); });\n    }\n\n    this->need_configuration_ = true;\n    this->ClearCaches();\n  }\n\n  void SaveModel(Json* p_out) const override {\n    CHECK(!this->need_configuration_) << \"Call Configure before saving model.\";\n    this->CheckModelInitialized();\n\n    Version::Save(p_out);\n    Json& out{*p_out};\n\n    out[\"learner\"] = Object();\n    auto& learner = out[\"learner\"];\n\n    learner[\"learner_model_param\"] = mparam_.ToJson();\n    learner[\"gradient_booster\"] = Object();\n    auto& gradient_booster = learner[\"gradient_booster\"];\n    gbm_->SaveModel(&gradient_booster);\n\n    learner[\"objective\"] = Object();\n    auto& objective_fn = learner[\"objective\"];\n    obj_->SaveConfig(&objective_fn);\n\n    learner[\"attributes\"] = Object();\n    for (auto const& kv : attributes_) {\n      learner[\"attributes\"][kv.first] = String(kv.second);\n    }\n\n    learner[\"feature_names\"] = Array();\n    auto& feature_names = get<Array>(learner[\"feature_names\"]);\n    for (auto const& name : feature_names_) {\n      feature_names.emplace_back(name);\n    }\n    learner[\"feature_types\"] = Array();\n    auto& feature_types = get<Array>(learner[\"feature_types\"]);\n    for (auto const& type : feature_types_) {\n      feature_types.emplace_back(type);\n    }\n  }\n\n  void Save(dmlc::Stream* fo) const override {\n    this->CheckModelInitialized();\n\n    Json memory_snapshot{Object()};\n    memory_snapshot[\"Model\"] = Object();\n    auto& model = memory_snapshot[\"Model\"];\n    this->SaveModel(&model);\n    memory_snapshot[\"Config\"] = Object();\n    auto& config = memory_snapshot[\"Config\"];\n    this->SaveConfig(&config);\n\n    std::vector<char> stream;\n    Json::Dump(memory_snapshot, &stream, std::ios::binary);\n    fo->Write(stream.data(), stream.size());\n  }\n\n  void Load(dmlc::Stream* fi) override {\n    common::PeekableInStream fp(fi);\n    char header[2];\n    fp.PeekRead(header, 2);\n    StringView msg = \"Invalid serialization file.\";\n    CHECK_EQ(header[0], '{') << msg;\n\n    auto buffer = common::ReadAll(fi, &fp);\n    Json memory_snapshot;\n    CHECK(std::isalpha(header[1])) << msg;\n    if (header[1] == '\"') {\n      memory_snapshot = Json::Load(StringView{buffer});\n      error::WarnOldSerialization();\n    } else if (std::isalpha(header[1])) {\n      memory_snapshot = Json::Load(StringView{buffer}, std::ios::binary);\n    } else {\n      LOG(FATAL) << \"Invalid serialization file.\";\n    }\n\n    this->LoadModel(memory_snapshot[\"Model\"]);\n    this->LoadConfig(memory_snapshot[\"Config\"]);\n  }\n};\n\n/*!\n * \\brief learner that performs gradient boosting for a specific objective\n * function. It does training and prediction.\n */\nclass LearnerImpl : public LearnerIO {\n public:\n  explicit LearnerImpl(std::vector<std::shared_ptr<DMatrix>> cache) : LearnerIO{cache} {}\n  ~LearnerImpl() override {\n    auto local_map = LearnerAPIThreadLocalStore::Get();\n    if (local_map->find(this) != local_map->cend()) {\n      local_map->erase(this);\n    }\n  }\n\n  std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats,\n                                     std::string format) override {\n    this->Configure();\n    this->CheckModelInitialized();\n\n    return gbm_->DumpModel(fmap, with_stats, format);\n  }\n\n  Learner* Slice(bst_layer_t begin, bst_layer_t end, bst_layer_t step,\n                 bool* out_of_bound) override {\n    this->Configure();\n    this->CheckModelInitialized();\n\n    CHECK_NE(this->learner_model_param_.num_feature, 0);\n    CHECK_GE(begin, 0);\n    auto* out_impl = new LearnerImpl({});\n    out_impl->learner_model_param_.Copy(this->learner_model_param_);\n    out_impl->ctx_ = this->ctx_;\n    auto gbm = std::unique_ptr<GradientBooster>(GradientBooster::Create(\n        this->tparam_.booster, &out_impl->ctx_, &out_impl->learner_model_param_));\n    this->gbm_->Slice(begin, end, step, gbm.get(), out_of_bound);\n    out_impl->gbm_ = std::move(gbm);\n\n    Json config{Object()};\n    this->SaveConfig(&config);\n    out_impl->mparam_ = this->mparam_;\n    out_impl->attributes_ = this->attributes_;\n    out_impl->SetFeatureNames(this->feature_names_);\n    out_impl->SetFeatureTypes(this->feature_types_);\n    out_impl->LoadConfig(config);\n    out_impl->Configure();\n    CHECK_EQ(out_impl->learner_model_param_.num_feature, this->learner_model_param_.num_feature);\n    CHECK_NE(out_impl->learner_model_param_.num_feature, 0);\n\n    auto erase_attr = [&](std::string attr) {\n      // Erase invalid attributes.\n      auto attr_it = out_impl->attributes_.find(attr);\n      if (attr_it != out_impl->attributes_.cend()) {\n        out_impl->attributes_.erase(attr_it);\n      }\n    };\n    erase_attr(\"best_iteration\");\n    erase_attr(\"best_score\");\n    return out_impl;\n  }\n\n  void Reset() override {\n    this->Configure();\n    this->CheckModelInitialized();\n    // Global data\n    auto local_map = LearnerAPIThreadLocalStore::Get();\n    if (local_map->find(this) != local_map->cend()) {\n      local_map->erase(this);\n    }\n\n    // Model\n    std::string buf;\n    common::MemoryBufferStream fo(&buf);\n    this->Save(&fo);\n\n    common::MemoryFixSizeBuffer fs(buf.data(), buf.size());\n    this->Load(&fs);\n\n    // Learner self cache. Prediction is cleared in the load method\n    CHECK(this->prediction_container_.Container().empty());\n    this->gpair_ = decltype(this->gpair_){};\n  }\n\n  void UpdateOneIter(int iter, std::shared_ptr<DMatrix> train) override {\n    monitor_.Start(\"UpdateOneIter\");\n    TrainingObserver::Instance().Update(iter);\n    this->Configure();\n    this->FitIntercept(this->tparam_, train.get());\n\n    if (ctx_.seed_per_iteration) {\n      ctx_.Rng().seed(ctx_.seed * kRandSeedMagic + this->BoostedRounds());\n    }\n\n    this->ValidateDMatrix(train.get(), true);\n\n    auto predt = prediction_container_.Cache(train, ctx_.Device());\n\n    monitor_.Start(\"PredictRaw\");\n    this->PredictRaw(train.get(), predt.get(), true, 0, 0);\n    TrainingObserver::Instance().Observe(predt->predictions, \"Predictions\");\n    monitor_.Stop(\"PredictRaw\");\n\n    monitor_.Start(\"GetGradient\");\n    GetGradient(predt->predictions, train->Info(), iter, &gpair_.gpair);\n    monitor_.Stop(\"GetGradient\");\n    TrainingObserver::Instance().Observe(gpair_.Grad()->Data(), \"Gradients\");\n\n    gbm_->DoBoost(train.get(), &gpair_, predt.get(), obj_.get());\n    monitor_.Stop(\"UpdateOneIter\");\n  }\n\n  void BoostOneIter(std::int32_t, std::shared_ptr<DMatrix> train,\n                    GradientContainer* in_gpair) override {\n    this->monitor_.Start(__func__);\n    this->Configure();\n\n    if (ctx_.seed_per_iteration) {\n      ctx_.Rng().seed(ctx_.seed * kRandSeedMagic + this->BoostedRounds());\n    }\n\n    this->ValidateDMatrix(train.get(), true);\n    if (in_gpair->HasValueGrad()) {\n      CHECK_EQ(this->learner_model_param_.OutputLength(), in_gpair->NumTargets())\n          << \"Value gradient should have the same number of targets as the overall model.\";\n    } else {\n      CHECK_EQ(this->learner_model_param_.OutputLength(), in_gpair->NumSplitTargets())\n          << \"The number of columns in gradient should be equal to the number of \"\n             \"targets/classes in the model.\";\n    }\n    auto predt = prediction_container_.Cache(train, ctx_.Device());\n    this->gbm_->DoBoost(train.get(), in_gpair, predt.get(), obj_.get());\n    this->monitor_.Stop(__func__);\n  }\n\n  std::string EvalOneIter(int iter, const std::vector<std::shared_ptr<DMatrix>>& data_sets,\n                          const std::vector<std::string>& data_names) override {\n    monitor_.Start(\"EvalOneIter\");\n    this->Configure();\n    this->CheckModelInitialized();\n\n    std::ostringstream os;\n    os.precision(std::numeric_limits<double>::max_digits10);\n    os << '[' << iter << ']' << std::setiosflags(std::ios::fixed);\n    if (metrics_.empty() && !tparam_.disable_default_eval_metric) {\n      metrics_.emplace_back(Metric::Create(obj_->DefaultEvalMetric(), &ctx_));\n      auto config = obj_->DefaultMetricConfig();\n      if (!IsA<Null>(config)) {\n        metrics_.back()->LoadConfig(config);\n      }\n      metrics_.back()->Configure({cfg_.begin(), cfg_.end()});\n    }\n\n    for (size_t i = 0; i < data_sets.size(); ++i) {\n      std::shared_ptr<DMatrix> m = data_sets[i];\n      auto predt = prediction_container_.Cache(m, ctx_.Device());\n      this->ValidateDMatrix(m.get(), false);\n      this->PredictRaw(m.get(), predt.get(), false, 0, 0);\n\n      auto& out = output_predictions_.Cache(m, ctx_.Device())->predictions;\n      out.Resize(predt->predictions.Size());\n      out.Copy(predt->predictions);\n\n      obj_->EvalTransform(&out);\n      for (auto& ev : metrics_) {\n        os << '\\t' << data_names[i] << '-' << ev->Name() << ':' << ev->Evaluate(out, m);\n      }\n    }\n\n    monitor_.Stop(\"EvalOneIter\");\n    return os.str();\n  }\n\n  void Predict(std::shared_ptr<DMatrix> data, bool output_margin,\n               HostDeviceVector<float>* out_preds, bst_layer_t layer_begin, bst_layer_t layer_end,\n               bool training, bool pred_leaf, bool pred_contribs, bool approx_contribs,\n               bool pred_interactions) override {\n    int multiple_predictions = static_cast<int>(pred_leaf) + static_cast<int>(pred_interactions) +\n                               static_cast<int>(pred_contribs);\n    this->Configure();\n    if (training) {\n      this->FitIntercept(this->tparam_, nullptr);\n    }\n    this->CheckModelInitialized();\n\n    CHECK_LE(multiple_predictions, 1) << \"Perform one kind of prediction at a time.\";\n    if (pred_contribs) {\n      gbm_->PredictContribution(data.get(), out_preds, layer_begin, layer_end, approx_contribs);\n    } else if (pred_interactions) {\n      gbm_->PredictInteractionContributions(data.get(), out_preds, layer_begin, layer_end,\n                                            approx_contribs);\n    } else if (pred_leaf) {\n      gbm_->PredictLeaf(data.get(), out_preds, layer_begin, layer_end);\n    } else {\n      auto predt = prediction_container_.Cache(data, ctx_.Device());\n      this->PredictRaw(data.get(), predt.get(), training, layer_begin, layer_end);\n      // Copy the prediction cache to output prediction. out_preds comes from C API\n      out_preds->SetDevice(ctx_.Device());\n      out_preds->Resize(predt->predictions.Size());\n      out_preds->Copy(predt->predictions);\n      if (!output_margin) {\n        obj_->PredTransform(out_preds);\n      }\n    }\n  }\n\n  int32_t BoostedRounds() const override {\n    if (!this->gbm_) {\n      return 0;\n    }  // haven't call train or LoadModel.\n    CHECK(!this->need_configuration_);\n    return this->gbm_->BoostedRounds();\n  }\n\n  uint32_t Groups() const override {\n    CHECK(!this->need_configuration_);\n    this->CheckModelInitialized();\n    return this->learner_model_param_.num_output_group;\n  }\n\n  XGBAPIThreadLocalEntry& GetThreadLocal() const override {\n    return (*LearnerAPIThreadLocalStore::Get())[this];\n  }\n\n  void InplacePredict(std::shared_ptr<DMatrix> p_m, PredictionType type, float missing,\n                      HostDeviceVector<float>** out_preds, bst_layer_t iteration_begin,\n                      bst_layer_t iteration_end) override {\n    this->Configure();\n    this->CheckModelInitialized();\n\n    auto& out_predictions = this->GetThreadLocal().prediction_entry;\n    out_predictions.Reset();\n\n    this->gbm_->InplacePredict(p_m, missing, &out_predictions, iteration_begin, iteration_end);\n\n    if (type == PredictionType::kValue) {\n      obj_->PredTransform(&out_predictions.predictions);\n    } else if (type == PredictionType::kMargin) {\n      // do nothing\n    } else {\n      LOG(FATAL) << \"Unsupported prediction type:\" << static_cast<int>(type);\n    }\n    *out_preds = &out_predictions.predictions;\n  }\n\n  void CalcFeatureScore(std::string const& importance_type, common::Span<int32_t const> trees,\n                        std::vector<bst_feature_t>* features, std::vector<float>* scores) override {\n    this->Configure();\n    this->CheckModelInitialized();\n\n    gbm_->FeatureScore(importance_type, trees, features, scores);\n  }\n\n  const std::map<std::string, std::string>& GetConfigurationArguments() const override {\n    return cfg_;\n  }\n\n protected:\n  /*!\n   * \\brief get un-transformed prediction\n   * \\param data training data matrix\n   * \\param out_preds output vector that stores the prediction\n   * \\param ntree_limit limit number of trees used for boosted tree\n   *   predictor, when it equals 0, this means we are using all the trees\n   * \\param training allow dropout when the DART booster is being used\n   */\n  void PredictRaw(DMatrix* data, PredictionCacheEntry* out_preds, bool training,\n                  unsigned layer_begin, unsigned layer_end) const {\n    CHECK(gbm_ != nullptr) << \"Predict must happen after Load or configuration\";\n    this->CheckModelInitialized();\n    this->ValidateDMatrix(data, false);\n    gbm_->PredictBatch(data, out_preds, training, layer_begin, layer_end);\n  }\n\n  void ValidateDMatrix(DMatrix* p_fmat, bool is_training) const {\n    MetaInfo const& info = p_fmat->Info();\n    info.Validate(ctx_.Device());\n\n    if (is_training) {\n      CHECK_EQ(learner_model_param_.num_feature, p_fmat->Info().num_col_)\n          << \"Number of columns does not match number of features in \"\n             \"booster.\";\n    } else {\n      CHECK_GE(learner_model_param_.num_feature, p_fmat->Info().num_col_)\n          << \"Number of columns does not match number of features in \"\n             \"booster.\";\n    }\n\n    if (p_fmat->Info().num_row_ == 0) {\n      error::WarnEmptyDataset();\n    }\n    if (!p_fmat->Info().base_margin_.Empty()) {\n      CHECK_EQ(p_fmat->Info().base_margin_.Shape(1), this->mparam_.OutputLength());\n    }\n  }\n\n private:\n  void GetGradient(HostDeviceVector<float> const& preds, MetaInfo const& info, std::int32_t iter,\n                   linalg::Matrix<GradientPair>* out_gpair) {\n    out_gpair->Reshape(info.num_row_, this->learner_model_param_.OutputLength());\n    collective::ApplyWithLabels(&ctx_, info, out_gpair->Data(),\n                                [&] { obj_->GetGradient(preds, info, iter, out_gpair); });\n  }\n\n  /*! \\brief random number transformation seed. */\n  static int32_t constexpr kRandSeedMagic = 127;\n  // gradient pairs\n  GradientContainer gpair_;\n  /*! \\brief Temporary storage to prediction.  Useful for storing data transformed by\n   *  objective function */\n  PredictionContainer output_predictions_;\n};\n\nconstexpr int32_t LearnerImpl::kRandSeedMagic;\n\nLearner* Learner::Create(const std::vector<std::shared_ptr<DMatrix>>& cache_data) {\n  return new LearnerImpl(cache_data);\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "src/linear/coordinate_common.h",
    "content": "/**\n * Copyright 2018-2026, XGBoost Contributors\n * \\author Rory Mitchell\n */\n#pragma once\n#include <algorithm>\n#include <cmath>  // for fpclassify\n#include <limits>\n#include <numeric>  // for accumulate, iota\n#include <string>\n#include <utility>\n#include <vector>\n\n#include \"../common/threading_utils.h\"\n#include \"../gbm/gblinear_model.h\"\n#include \"./param.h\"\n#include \"xgboost/data.h\"\n#include \"xgboost/parameter.h\"\n\nnamespace xgboost {\nnamespace linear {\n\nstruct CoordinateParam : public XGBoostParameter<CoordinateParam> {\n  int top_k;\n  DMLC_DECLARE_PARAMETER(CoordinateParam) {\n    DMLC_DECLARE_FIELD(top_k).set_lower_bound(0).set_default(0).describe(\n        \"The number of top features to select in 'thrifty' feature_selector. \"\n        \"The value of zero means using all the features.\");\n  }\n};\n\n/**\n * \\brief Calculate change in weight for a given feature. Applies l1/l2 penalty normalised by the\n *        number of training instances.\n *\n * \\param sum_grad            The sum gradient.\n * \\param sum_hess            The sum hess.\n * \\param w                   The weight.\n * \\param reg_alpha           Unnormalised L1 penalty.\n * \\param reg_lambda          Unnormalised L2 penalty.\n *\n * \\return  The weight update.\n */\ninline double CoordinateDelta(double sum_grad, double sum_hess, double w, double reg_alpha,\n                              double reg_lambda) {\n  if (sum_hess < 1e-5f) return 0.0f;\n  const double sum_grad_l2 = sum_grad + reg_lambda * w;\n  const double sum_hess_l2 = sum_hess + reg_lambda;\n  const double tmp = w - sum_grad_l2 / sum_hess_l2;\n  if (tmp >= 0) {\n    return std::max(-(sum_grad_l2 + reg_alpha) / sum_hess_l2, -w);\n  } else {\n    return std::min(-(sum_grad_l2 - reg_alpha) / sum_hess_l2, -w);\n  }\n}\n\n/**\n * \\brief Calculate update to bias.\n *\n * \\param sum_grad  The sum gradient.\n * \\param sum_hess  The sum hess.\n *\n * \\return  The weight update.\n */\ninline double CoordinateDeltaBias(double sum_grad, double sum_hess) {\n  auto b = -sum_grad / sum_hess;\n  if (std::isnan(b) || std::isinf(b)) {\n    b = 0;\n  }\n  return b;\n}\n\n/**\n * \\brief Get the gradient with respect to a single feature.\n *\n * \\param group_idx Zero-based index of the group.\n * \\param num_group Number of groups.\n * \\param fidx      The target feature.\n * \\param gpair     Gradients.\n * \\param p_fmat    The feature matrix.\n *\n * \\return  The gradient and diagonal Hessian entry for a given feature.\n */\ninline std::pair<double, double> GetGradient(Context const *ctx, int group_idx, int num_group,\n                                             bst_feature_t fidx,\n                                             std::vector<GradientPair> const &gpair,\n                                             DMatrix *p_fmat) {\n  double sum_grad = 0.0, sum_hess = 0.0;\n  for (const auto &batch : p_fmat->GetBatches<CSCPage>(ctx)) {\n    auto page = batch.GetView();\n    auto col = page[fidx];\n    const auto ndata = static_cast<bst_omp_uint>(col.size());\n    for (bst_omp_uint j = 0; j < ndata; ++j) {\n      const bst_float v = col[j].fvalue;\n      auto &p = gpair[col[j].index * num_group + group_idx];\n      if (p.GetHess() < 0.0f) continue;\n      sum_grad += p.GetGrad() * v;\n      sum_hess += p.GetHess() * v * v;\n    }\n  }\n  return std::make_pair(sum_grad, sum_hess);\n}\n\n/**\n * \\brief Get the gradient with respect to a single feature. Row-wise multithreaded.\n *\n * \\param group_idx Zero-based index of the group.\n * \\param num_group Number of groups.\n * \\param fidx      The target feature.\n * \\param gpair     Gradients.\n * \\param p_fmat    The feature matrix.\n *\n * \\return  The gradient and diagonal Hessian entry for a given feature.\n */\ninline std::pair<double, double> GetGradientParallel(Context const *ctx, int group_idx,\n                                                     int num_group, int fidx,\n                                                     const std::vector<GradientPair> &gpair,\n                                                     DMatrix *p_fmat) {\n  std::vector<double> sum_grad_tloc(ctx->Threads(), 0.0);\n  std::vector<double> sum_hess_tloc(ctx->Threads(), 0.0);\n\n  for (const auto &batch : p_fmat->GetBatches<CSCPage>(ctx)) {\n    auto page = batch.GetView();\n    auto col = page[fidx];\n    const auto ndata = static_cast<bst_omp_uint>(col.size());\n    common::ParallelFor(ndata, ctx->Threads(), [&](size_t j) {\n      const bst_float v = col[j].fvalue;\n      auto &p = gpair[col[j].index * num_group + group_idx];\n      if (p.GetHess() < 0.0f) {\n        return;\n      }\n      auto t_idx = omp_get_thread_num();\n      sum_grad_tloc[t_idx] += p.GetGrad() * v;\n      sum_hess_tloc[t_idx] += p.GetHess() * v * v;\n    });\n  }\n  double sum_grad = std::accumulate(sum_grad_tloc.cbegin(), sum_grad_tloc.cend(), 0.0);\n  double sum_hess = std::accumulate(sum_hess_tloc.cbegin(), sum_hess_tloc.cend(), 0.0);\n  return std::make_pair(sum_grad, sum_hess);\n}\n\n/**\n * \\brief Get the gradient with respect to the bias. Row-wise multithreaded.\n *\n * \\param group_idx Zero-based index of the group.\n * \\param num_group Number of groups.\n * \\param gpair     Gradients.\n * \\param p_fmat    The feature matrix.\n *\n * \\return  The gradient and diagonal Hessian entry for the bias.\n */\ninline std::pair<double, double> GetBiasGradientParallel(int group_idx, int num_group,\n                                                         const std::vector<GradientPair> &gpair,\n                                                         DMatrix *p_fmat, int32_t n_threads) {\n  const auto ndata = static_cast<bst_omp_uint>(p_fmat->Info().num_row_);\n  std::vector<double> sum_grad_tloc(n_threads, 0);\n  std::vector<double> sum_hess_tloc(n_threads, 0);\n\n  common::ParallelFor(ndata, n_threads, [&](auto i) {\n    auto tid = omp_get_thread_num();\n    auto &p = gpair[i * num_group + group_idx];\n    if (p.GetHess() >= 0.0f) {\n      sum_grad_tloc[tid] += p.GetGrad();\n      sum_hess_tloc[tid] += p.GetHess();\n    }\n  });\n  double sum_grad = std::accumulate(sum_grad_tloc.cbegin(), sum_grad_tloc.cend(), 0.0);\n  double sum_hess = std::accumulate(sum_hess_tloc.cbegin(), sum_hess_tloc.cend(), 0.0);\n  return std::make_pair(sum_grad, sum_hess);\n}\n\n/**\n * \\brief Updates the gradient vector with respect to a change in weight.\n *\n * \\param fidx      The feature index.\n * \\param group_idx Zero-based index of the group.\n * \\param num_group Number of groups.\n * \\param dw        The change in weight.\n * \\param in_gpair  The gradient vector to be updated.\n * \\param p_fmat    The input feature matrix.\n */\ninline void UpdateResidualParallel(Context const *ctx, bst_feature_t fidx, int group_idx,\n                                   int num_group, float dw, std::vector<GradientPair> *in_gpair,\n                                   DMatrix *p_fmat) {\n  if (dw == 0.0f) return;\n  for (const auto &batch : p_fmat->GetBatches<CSCPage>(ctx)) {\n    auto page = batch.GetView();\n    auto col = page[fidx];\n    // update grad value\n    const auto num_row = static_cast<bst_omp_uint>(col.size());\n    common::ParallelFor(num_row, ctx->Threads(), [&](auto j) {\n      GradientPair &p = (*in_gpair)[col[j].index * num_group + group_idx];\n      if (p.GetHess() < 0.0f) return;\n      p += GradientPair(p.GetHess() * col[j].fvalue * dw, 0);\n    });\n  }\n}\n\n/**\n * \\brief Updates the gradient vector based on a change in the bias.\n *\n * \\param group_idx Zero-based index of the group.\n * \\param num_group Number of groups.\n * \\param dbias     The change in bias.\n * \\param in_gpair  The gradient vector to be updated.\n * \\param p_fmat    The input feature matrix.\n */\ninline void UpdateBiasResidualParallel(Context const *ctx, int group_idx, int num_group,\n                                       float dbias, std::vector<GradientPair> *in_gpair,\n                                       DMatrix *p_fmat) {\n  if (dbias == 0.0f) return;\n  const auto ndata = static_cast<bst_omp_uint>(p_fmat->Info().num_row_);\n  common::ParallelFor(ndata, ctx->Threads(), [&](auto i) {\n    GradientPair &g = (*in_gpair)[i * num_group + group_idx];\n    if (g.GetHess() < 0.0f) return;\n    g += GradientPair(g.GetHess() * dbias, 0);\n  });\n}\n\n/**\n * \\brief Abstract class for stateful feature selection or ordering\n *        in coordinate descent algorithms.\n */\nclass FeatureSelector {\n public:\n  FeatureSelector() = default;\n  /*! \\brief factory method */\n  static FeatureSelector *Create(int choice);\n  /*! \\brief virtual destructor */\n  virtual ~FeatureSelector() = default;\n  /**\n   * \\brief Setting up the selector state prior to looping through features.\n   *\n   * \\param ctx    The booster context.\n   * \\param model  The model.\n   * \\param gpair  The gpair.\n   * \\param p_fmat The feature matrix.\n   * \\param alpha  Regularisation alpha.\n   * \\param lambda Regularisation lambda.\n   * \\param param  A parameter with algorithm-dependent use.\n   */\n  virtual void Setup(Context const *, const gbm::GBLinearModel &, const std::vector<GradientPair> &,\n                     DMatrix *, float, float, int) {}\n  /**\n   * \\brief Select next coordinate to update.\n   *\n   * \\param ctx       Booster context\n   * \\param iteration The iteration in a loop through features\n   * \\param model     The model.\n   * \\param group_idx Zero-based index of the group.\n   * \\param gpair     The gpair.\n   * \\param p_fmat    The feature matrix.\n   * \\param alpha     Regularisation alpha.\n   * \\param lambda    Regularisation lambda.\n   *\n   * \\return  The index of the selected feature. -1 indicates none selected.\n   */\n  virtual int NextFeature(Context const *ctx, int iteration, const gbm::GBLinearModel &model,\n                          int group_idx, const std::vector<GradientPair> &gpair, DMatrix *p_fmat,\n                          float alpha, float lambda) = 0;\n};\n\n/**\n * \\brief Deterministic selection by cycling through features one at a time.\n */\nclass CyclicFeatureSelector : public FeatureSelector {\n public:\n  using FeatureSelector::FeatureSelector;\n  int NextFeature(Context const *, int iteration, const gbm::GBLinearModel &model, int,\n                  const std::vector<GradientPair> &, DMatrix *, float, float) override {\n    return iteration % model.learner_model_param->num_feature;\n  }\n};\n\n/**\n * \\brief Similar to Cyclic but with random feature shuffling prior to each update.\n * \\note Its randomness is controllable by setting a random seed.\n */\nclass ShuffleFeatureSelector : public FeatureSelector {\n public:\n  using FeatureSelector::FeatureSelector;\n  void Setup(Context const *ctx, const gbm::GBLinearModel &model, const std::vector<GradientPair> &,\n             DMatrix *, float, float, int) override {\n    if (feat_index_.size() == 0) {\n      feat_index_.resize(model.learner_model_param->num_feature);\n      std::iota(feat_index_.begin(), feat_index_.end(), 0);\n    }\n    std::shuffle(feat_index_.begin(), feat_index_.end(), ctx->Rng());\n  }\n\n  int NextFeature(Context const *, int iteration, const gbm::GBLinearModel &model, int,\n                  const std::vector<GradientPair> &, DMatrix *, float, float) override {\n    return feat_index_[iteration % model.learner_model_param->num_feature];\n  }\n\n protected:\n  std::vector<bst_uint> feat_index_;\n};\n\n/**\n * \\brief A random (with replacement) coordinate selector.\n * \\note Its randomness is controllable by setting a random seed.\n */\nclass RandomFeatureSelector : public FeatureSelector {\n public:\n  using FeatureSelector::FeatureSelector;\n  int NextFeature(Context const *ctx, int, const gbm::GBLinearModel &model, int,\n                  const std::vector<GradientPair> &, DMatrix *, float, float) override {\n    return ctx->Rng()() % model.learner_model_param->num_feature;\n  }\n};\n\n/**\n * \\brief Select coordinate with the greatest gradient magnitude.\n * \\note It has O(num_feature^2) complexity. It is fully deterministic.\n *\n * \\note It allows restricting the selection to top_k features per group with\n * the largest magnitude of univariate weight change, by passing the top_k value\n * through the `param` argument of Setup(). That would reduce the complexity to\n * O(num_feature*top_k).\n */\nclass GreedyFeatureSelector : public FeatureSelector {\n public:\n  using FeatureSelector::FeatureSelector;\n  void Setup(Context const *, const gbm::GBLinearModel &model, const std::vector<GradientPair> &,\n             DMatrix *, float, float, int param) override {\n    top_k_ = static_cast<bst_uint>(param);\n    const bst_uint ngroup = model.learner_model_param->num_output_group;\n    if (param <= 0) top_k_ = std::numeric_limits<bst_uint>::max();\n    if (counter_.size() == 0) {\n      counter_.resize(ngroup);\n      gpair_sums_.resize(model.learner_model_param->num_feature * ngroup);\n    }\n    for (bst_uint gid = 0u; gid < ngroup; ++gid) {\n      counter_[gid] = 0u;\n    }\n  }\n\n  int NextFeature(Context const *ctx, int, const gbm::GBLinearModel &model, int group_idx,\n                  const std::vector<GradientPair> &gpair, DMatrix *p_fmat, float alpha,\n                  float lambda) override {\n    // k-th selected feature for a group\n    auto k = counter_[group_idx]++;\n    // stop after either reaching top-K or going through all the features in a group\n    if (k >= top_k_ || counter_[group_idx] == model.learner_model_param->num_feature) return -1;\n\n    const int ngroup = model.learner_model_param->num_output_group;\n    const bst_omp_uint nfeat = model.learner_model_param->num_feature;\n    // Calculate univariate gradient sums\n    std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));\n    for (const auto &batch : p_fmat->GetBatches<CSCPage>(ctx)) {\n      auto page = batch.GetView();\n      common::ParallelFor(nfeat, ctx->Threads(), [&](bst_omp_uint i) {\n        const auto col = page[i];\n        const bst_uint ndata = col.size();\n        auto &sums = gpair_sums_[group_idx * nfeat + i];\n        for (bst_uint j = 0u; j < ndata; ++j) {\n          const bst_float v = col[j].fvalue;\n          auto &p = gpair[col[j].index * ngroup + group_idx];\n          if (p.GetHess() < 0.f) continue;\n          sums.first += p.GetGrad() * v;\n          sums.second += p.GetHess() * v * v;\n        }\n      });\n    }\n    // Find a feature with the largest magnitude of weight change\n    int best_fidx = 0;\n    double best_weight_update = 0.0f;\n    for (bst_omp_uint fidx = 0; fidx < nfeat; ++fidx) {\n      auto &s = gpair_sums_[group_idx * nfeat + fidx];\n      float dw = std::abs(static_cast<bst_float>(\n          CoordinateDelta(s.first, s.second, model[fidx][group_idx], alpha, lambda)));\n      if (dw > best_weight_update) {\n        best_weight_update = dw;\n        best_fidx = fidx;\n      }\n    }\n    return best_fidx;\n  }\n\n protected:\n  bst_uint top_k_;\n  std::vector<bst_uint> counter_;\n  std::vector<std::pair<double, double>> gpair_sums_;\n};\n\n/**\n * \\brief Thrifty, approximately-greedy feature selector.\n *\n * \\note Prior to cyclic updates, reorders features in descending magnitude of\n * their univariate weight changes. This operation is multithreaded and is a\n * linear complexity approximation of the quadratic greedy selection.\n *\n * \\note It allows restricting the selection to top_k features per group with\n * the largest magnitude of univariate weight change, by passing the top_k value\n * through the `param` argument of Setup().\n */\nclass ThriftyFeatureSelector : public FeatureSelector {\n public:\n  using FeatureSelector::FeatureSelector;\n\n  void Setup(Context const *ctx, const gbm::GBLinearModel &model,\n             const std::vector<GradientPair> &gpair, DMatrix *p_fmat, float alpha, float lambda,\n             int param) override {\n    top_k_ = static_cast<bst_uint>(param);\n    if (param <= 0) top_k_ = std::numeric_limits<bst_uint>::max();\n    const bst_uint ngroup = model.learner_model_param->num_output_group;\n    const bst_omp_uint nfeat = model.learner_model_param->num_feature;\n\n    if (deltaw_.size() == 0) {\n      deltaw_.resize(nfeat * ngroup);\n      sorted_idx_.resize(nfeat * ngroup);\n      counter_.resize(ngroup);\n      gpair_sums_.resize(nfeat * ngroup);\n    }\n    // Calculate univariate gradient sums\n    std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));\n    for (const auto &batch : p_fmat->GetBatches<CSCPage>(ctx)) {\n      auto page = batch.GetView();\n      // column-parallel is usually fastaer than row-parallel\n      common::ParallelFor(nfeat, ctx->Threads(), [&](auto i) {\n        const auto col = page[i];\n        const bst_uint ndata = col.size();\n        for (bst_uint gid = 0u; gid < ngroup; ++gid) {\n          auto &sums = gpair_sums_[gid * nfeat + i];\n          for (bst_uint j = 0u; j < ndata; ++j) {\n            const bst_float v = col[j].fvalue;\n            auto &p = gpair[col[j].index * ngroup + gid];\n            if (p.GetHess() < 0.f) continue;\n            sums.first += p.GetGrad() * v;\n            sums.second += p.GetHess() * v * v;\n          }\n        }\n      });\n    }\n    // rank by descending weight magnitude within the groups\n    std::fill(deltaw_.begin(), deltaw_.end(), 0.f);\n    std::iota(sorted_idx_.begin(), sorted_idx_.end(), 0);\n    bst_float *pdeltaw = &deltaw_[0];\n    for (bst_uint gid = 0u; gid < ngroup; ++gid) {\n      // Calculate univariate weight changes\n      for (bst_omp_uint i = 0; i < nfeat; ++i) {\n        auto ii = gid * nfeat + i;\n        auto &s = gpair_sums_[ii];\n        deltaw_[ii] = static_cast<bst_float>(\n            CoordinateDelta(s.first, s.second, model[i][gid], alpha, lambda));\n      }\n      // sort in descending order of deltaw abs values\n      auto start = sorted_idx_.begin() + gid * nfeat;\n      std::sort(start, start + nfeat, [pdeltaw](size_t i, size_t j) {\n        return std::abs(*(pdeltaw + i)) > std::abs(*(pdeltaw + j));\n      });\n      counter_[gid] = 0u;\n    }\n  }\n\n  int NextFeature(Context const *, int, const gbm::GBLinearModel &model, int group_idx,\n                  const std::vector<GradientPair> &, DMatrix *, float, float) override {\n    // k-th selected feature for a group\n    auto k = counter_[group_idx]++;\n    // stop after either reaching top-N or going through all the features in a group\n    if (k >= top_k_ || counter_[group_idx] == model.learner_model_param->num_feature) return -1;\n    // note that sorted_idx stores the \"long\" indices\n    const size_t grp_offset = group_idx * model.learner_model_param->num_feature;\n    return static_cast<int>(sorted_idx_[grp_offset + k] - grp_offset);\n  }\n\n protected:\n  bst_uint top_k_;\n  std::vector<bst_float> deltaw_;\n  std::vector<size_t> sorted_idx_;\n  std::vector<bst_uint> counter_;\n  std::vector<std::pair<double, double>> gpair_sums_;\n};\n\ninline FeatureSelector *FeatureSelector::Create(int choice) {\n  switch (choice) {\n    case kCyclic:\n      return new CyclicFeatureSelector;\n    case kShuffle:\n      return new ShuffleFeatureSelector;\n    case kThrifty:\n      return new ThriftyFeatureSelector;\n    case kGreedy:\n      return new GreedyFeatureSelector;\n    case kRandom:\n      return new RandomFeatureSelector;\n    default:\n      LOG(FATAL) << \"unknown coordinate selector: \" << choice;\n  }\n  return nullptr;\n}\n\n}  // namespace linear\n}  // namespace xgboost\n"
  },
  {
    "path": "src/linear/linear_updater.cc",
    "content": "/*!\n * Copyright 2018\n */\n#include <xgboost/linear_updater.h>\n#include <dmlc/registry.h>\n#include \"./param.h\"\n\nnamespace dmlc {\nDMLC_REGISTRY_ENABLE(::xgboost::LinearUpdaterReg);\n}  // namespace dmlc\n\nnamespace xgboost {\n\nLinearUpdater* LinearUpdater::Create(const std::string& name, Context const* ctx) {\n  auto *e = ::dmlc::Registry< ::xgboost::LinearUpdaterReg>::Get()->Find(name);\n  if (e == nullptr) {\n    LOG(FATAL) << \"Unknown linear updater \" << name;\n  }\n  auto p_linear = (e->body)();\n  p_linear->ctx_ = ctx;\n  return p_linear;\n}\n\n}  // namespace xgboost\n\nnamespace xgboost {\nnamespace linear {\nDMLC_REGISTER_PARAMETER(LinearTrainParam);\n\n// List of files that will be force linked in static links.\nDMLC_REGISTRY_LINK_TAG(updater_shotgun);\nDMLC_REGISTRY_LINK_TAG(updater_coordinate);\n#ifdef XGBOOST_USE_CUDA\nDMLC_REGISTRY_LINK_TAG(updater_gpu_coordinate);\n#endif  // XGBOOST_USE_CUDA\n}  // namespace linear\n}  // namespace xgboost\n"
  },
  {
    "path": "src/linear/param.h",
    "content": "/*!\n * Copyright 2018 by Contributors\n * \\file param.h\n * \\brief training parameters.\n */\n#ifndef XGBOOST_LINEAR_PARAM_H_\n#define XGBOOST_LINEAR_PARAM_H_\n#include \"xgboost/parameter.h\"\n\nnamespace xgboost {\nnamespace linear {\n/**\n * \\brief A set of available FeatureSelector's\n */\nenum FeatureSelectorEnum {\n  kCyclic = 0,\n  kShuffle,\n  kThrifty,\n  kGreedy,\n  kRandom\n};\n\nstruct LinearTrainParam : public XGBoostParameter<LinearTrainParam> {\n  /*! \\brief learning_rate */\n  float learning_rate;\n  /*! \\brief regularization weight for L2 norm */\n  float reg_lambda;\n  /*! \\brief regularization weight for L1 norm */\n  float reg_alpha;\n  int feature_selector;\n  // declare parameters\n  DMLC_DECLARE_PARAMETER(LinearTrainParam) {\n    DMLC_DECLARE_FIELD(learning_rate)\n        .set_lower_bound(0.0f)\n        .set_default(0.5f)\n        .describe(\"Learning rate of each update.\");\n    DMLC_DECLARE_FIELD(reg_lambda)\n        .set_lower_bound(0.0f)\n        .set_default(0.0f)\n        .describe(\"L2 regularization on weights.\");\n    DMLC_DECLARE_FIELD(reg_alpha)\n        .set_lower_bound(0.0f)\n        .set_default(0.0f)\n        .describe(\"L1 regularization on weights.\");\n    DMLC_DECLARE_FIELD(feature_selector)\n        .set_default(kCyclic)\n        .add_enum(\"cyclic\", kCyclic)\n        .add_enum(\"shuffle\", kShuffle)\n        .add_enum(\"thrifty\", kThrifty)\n        .add_enum(\"greedy\", kGreedy)\n        .add_enum(\"random\", kRandom)\n        .describe(\"Feature selection or ordering method.\");\n    // alias of parameters\n    DMLC_DECLARE_ALIAS(learning_rate, eta);\n    DMLC_DECLARE_ALIAS(reg_lambda, lambda);\n    DMLC_DECLARE_ALIAS(reg_alpha, alpha);\n  }\n  /*! \\brief Denormalizes the regularization penalties - to be called at each update */\n  void DenormalizePenalties(double sum_instance_weight) {\n    reg_lambda_denorm = reg_lambda * sum_instance_weight;\n    reg_alpha_denorm = reg_alpha * sum_instance_weight;\n  }\n  // denormalizated regularization penalties\n  float reg_lambda_denorm;\n  float reg_alpha_denorm;\n};\n\n}  // namespace linear\n}  // namespace xgboost\n\n#endif  // XGBOOST_LINEAR_PARAM_H_\n"
  },
  {
    "path": "src/linear/updater_coordinate.cc",
    "content": "/**\n * Copyright 2018-2025, XGBoost Contributors\n * \\author Rory Mitchell\n */\n\n#include <xgboost/linear_updater.h>\n#include \"./param.h\"\n#include \"../common/timer.h\"\n#include \"coordinate_common.h\"\n#include \"xgboost/json.h\"\n\nnamespace xgboost::linear {\n\nDMLC_REGISTER_PARAMETER(CoordinateParam);\nDMLC_REGISTRY_FILE_TAG(updater_coordinate);\n\n// training parameter\n/**\n * \\class CoordinateUpdater\n *\n * \\brief Coordinate descent algorithm that updates one feature per iteration\n */\n\nclass CoordinateUpdater : public LinearUpdater {\n public:\n  // set training parameter\n  void Configure(Args const& args) override {\n    const std::vector<std::pair<std::string, std::string> > rest {\n      tparam_.UpdateAllowUnknown(args)\n    };\n    cparam_.UpdateAllowUnknown(rest);\n    selector_.reset(FeatureSelector::Create(tparam_.feature_selector));\n    monitor_.Init(\"CoordinateUpdater\");\n  }\n\n  void LoadConfig(Json const& in) override {\n    auto const& config = get<Object const>(in);\n    FromJson(config.at(\"linear_train_param\"), &tparam_);\n    FromJson(config.at(\"coordinate_param\"), &cparam_);\n  }\n  void SaveConfig(Json *p_out) const override {\n    LOG(DEBUG) << \"Save config for CPU updater.\";\n    auto &out = *p_out;\n    out[\"linear_train_param\"] = ToJson(tparam_);\n    out[\"coordinate_param\"] = ToJson(cparam_);\n  }\n\n  void Update(linalg::Matrix<GradientPair> *in_gpair, DMatrix *p_fmat, gbm::GBLinearModel *model,\n              double sum_instance_weight) override {\n    auto gpair = in_gpair->Data();\n    tparam_.DenormalizePenalties(sum_instance_weight);\n    auto ngroup = model->learner_model_param->num_output_group;\n    // update bias\n    for (decltype(ngroup) group_idx = 0; group_idx < ngroup; ++group_idx) {\n      auto grad = GetBiasGradientParallel(group_idx, ngroup, gpair->ConstHostVector(), p_fmat,\n                                          ctx_->Threads());\n      auto dbias =\n          static_cast<float>(tparam_.learning_rate * CoordinateDeltaBias(grad.first, grad.second));\n      model->Bias()[group_idx] += dbias;\n      UpdateBiasResidualParallel(ctx_, group_idx, ngroup, dbias, &gpair->HostVector(), p_fmat);\n    }\n    // prepare for updating the weights\n    selector_->Setup(ctx_, *model, gpair->ConstHostVector(), p_fmat, tparam_.reg_alpha_denorm,\n                     tparam_.reg_lambda_denorm, cparam_.top_k);\n    // update weights\n    for (decltype(ngroup) group_idx = 0; group_idx < ngroup; ++group_idx) {\n      for (unsigned i = 0U; i < model->learner_model_param->num_feature; i++) {\n        int fidx =\n            selector_->NextFeature(ctx_, i, *model, group_idx, gpair->ConstHostVector(), p_fmat,\n                                   tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm);\n        if (fidx < 0) break;\n        this->UpdateFeature(fidx, group_idx, &gpair->HostVector(), p_fmat, model);\n      }\n    }\n    monitor_.Stop(\"UpdateFeature\");\n  }\n\n  void UpdateFeature(int fidx, int group_idx, std::vector<GradientPair> *in_gpair, DMatrix *p_fmat,\n                     gbm::GBLinearModel *model) {\n    const int ngroup = model->learner_model_param->num_output_group;\n    bst_float &w = (*model)[fidx][group_idx];\n    auto gradient = GetGradientParallel(ctx_, group_idx, ngroup, fidx,\n                                        *in_gpair, p_fmat);\n    auto dw = static_cast<float>(\n        tparam_.learning_rate *\n        CoordinateDelta(gradient.first, gradient.second, w, tparam_.reg_alpha_denorm,\n                        tparam_.reg_lambda_denorm));\n    w += dw;\n    UpdateResidualParallel(ctx_, fidx, group_idx, ngroup, dw, in_gpair, p_fmat);\n  }\n\n private:\n  CoordinateParam cparam_;\n  // training parameter\n  LinearTrainParam tparam_;\n  std::unique_ptr<FeatureSelector> selector_;\n  common::Monitor monitor_;\n};\n\nXGBOOST_REGISTER_LINEAR_UPDATER(CoordinateUpdater, \"coord_descent\")\n    .describe(\"Update linear model according to coordinate descent algorithm.\")\n    .set_body([]() { return new CoordinateUpdater(); });\n}  // namespace xgboost::linear\n"
  },
  {
    "path": "src/linear/updater_gpu_coordinate.cu",
    "content": "/**\n * Copyright 2018-2023 by XGBoost Contributors\n * \\author Rory Mitchell\n */\n\n#include <thrust/execution_policy.h>\n#include <thrust/inner_product.h>\n#include <xgboost/data.h>\n#include <xgboost/linear_updater.h>\n#include \"xgboost/span.h\"\n\n#include \"coordinate_common.h\"\n#include \"../common/common.h\"\n#include \"../common/device_helpers.cuh\"\n#include \"../common/timer.h\"\n#include \"./param.h\"\n\nnamespace xgboost::linear {\n\nDMLC_REGISTRY_FILE_TAG(updater_gpu_coordinate);\n\n/**\n * \\class GPUCoordinateUpdater\n *\n * \\brief Coordinate descent algorithm that updates one feature per iteration\n */\n\nclass GPUCoordinateUpdater : public LinearUpdater {  // NOLINT\n public:\n  // set training parameter\n  void Configure(Args const &args) override {\n    tparam_.UpdateAllowUnknown(args);\n    coord_param_.UpdateAllowUnknown(args);\n    selector_.reset(FeatureSelector::Create(tparam_.feature_selector));\n    monitor_.Init(\"GPUCoordinateUpdater\");\n  }\n\n  void LoadConfig(Json const& in) override {\n    auto const& config = get<Object const>(in);\n    FromJson(config.at(\"linear_train_param\"), &tparam_);\n    FromJson(config.at(\"coordinate_param\"), &coord_param_);\n  }\n  void SaveConfig(Json *p_out) const override {\n    LOG(DEBUG) << \"Save config for GPU updater.\";\n    auto &out = *p_out;\n    out[\"linear_train_param\"] = ToJson(tparam_);\n    out[\"coordinate_param\"] = ToJson(coord_param_);\n  }\n\n  void LazyInitDevice(DMatrix *p_fmat, const LearnerModelParam &model_param) {\n    if (ctx_->IsCPU()) return;\n\n    num_row_ = static_cast<size_t>(p_fmat->Info().num_row_);\n\n    CHECK(p_fmat->SingleColBlock());\n    SparsePage const &batch = *(p_fmat->GetBatches<CSCPage>(ctx_).begin());\n    auto page = batch.GetView();\n\n    if (IsEmpty()) {\n      return;\n    }\n\n    dh::safe_cuda(cudaSetDevice(ctx_->Ordinal()));\n    // The begin and end indices for the section of each column associated with\n    // this device\n    std::vector<std::pair<bst_uint, bst_uint>> column_segments;\n    row_ptr_ = {0};\n    // iterate through columns\n    for (size_t fidx = 0; fidx < batch.Size(); fidx++) {\n      common::Span<Entry const> col = page[fidx];\n      auto cmp = [](Entry e1, Entry e2) {\n        return e1.index < e2.index;\n      };\n      auto column_begin =\n          std::lower_bound(col.cbegin(), col.cend(),\n                           xgboost::Entry(0, 0.0f), cmp);\n      auto column_end =\n          std::lower_bound(col.cbegin(), col.cend(),\n                           xgboost::Entry(num_row_, 0.0f), cmp);\n      column_segments.emplace_back(static_cast<bst_uint>(column_begin - col.cbegin()),\n                                   static_cast<bst_uint>(column_end - col.cbegin()));\n      row_ptr_.push_back(row_ptr_.back() + (column_end - column_begin));\n    }\n    data_.resize(row_ptr_.back());\n    gpair_.resize(num_row_ * model_param.num_output_group);\n    for (size_t fidx = 0; fidx < batch.Size(); fidx++) {\n      auto col = page[fidx];\n      auto seg = column_segments[fidx];\n      dh::safe_cuda(cudaMemcpy(\n          data_.data().get() + row_ptr_[fidx],\n          col.data() + seg.first,\n          sizeof(Entry) * (seg.second - seg.first), cudaMemcpyHostToDevice));\n    }\n  }\n\n  void Update(linalg::Matrix<GradientPair> *in_gpair, DMatrix *p_fmat, gbm::GBLinearModel *model,\n              double sum_instance_weight) override {\n    tparam_.DenormalizePenalties(sum_instance_weight);\n    monitor_.Start(\"LazyInitDevice\");\n    this->LazyInitDevice(p_fmat, *(model->learner_model_param));\n    monitor_.Stop(\"LazyInitDevice\");\n\n    monitor_.Start(\"UpdateGpair\");\n\n    // Update gpair\n    if (ctx_->IsCUDA()) {\n      this->UpdateGpair(in_gpair->Data()->ConstHostVector());\n    }\n    monitor_.Stop(\"UpdateGpair\");\n\n    monitor_.Start(\"UpdateBias\");\n    this->UpdateBias(model);\n    monitor_.Stop(\"UpdateBias\");\n    // prepare for updating the weights\n    selector_->Setup(ctx_, *model, in_gpair->Data()->ConstHostVector(), p_fmat,\n                     tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm, coord_param_.top_k);\n    monitor_.Start(\"UpdateFeature\");\n    for (uint32_t group_idx = 0; group_idx < model->learner_model_param->num_output_group;\n         ++group_idx) {\n      for (auto i = 0U; i < model->learner_model_param->num_feature; i++) {\n        auto fidx =\n            selector_->NextFeature(ctx_, i, *model, group_idx, in_gpair->Data()->ConstHostVector(),\n                                   p_fmat, tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm);\n        if (fidx < 0) break;\n        this->UpdateFeature(fidx, group_idx, model);\n      }\n    }\n    monitor_.Stop(\"UpdateFeature\");\n  }\n\n  void UpdateBias(gbm::GBLinearModel *model) {\n    for (uint32_t group_idx = 0; group_idx < model->learner_model_param->num_output_group;\n         ++group_idx) {\n      // Get gradient\n      auto grad = GradientPair(0, 0);\n      if (ctx_->IsCUDA()) {\n        grad = GetBiasGradient(group_idx, model->learner_model_param->num_output_group);\n      }\n      auto dbias = static_cast<float>(\n          tparam_.learning_rate *\n              CoordinateDeltaBias(grad.GetGrad(), grad.GetHess()));\n      model->Bias()[group_idx] += dbias;\n\n      // Update residual\n      if (ctx_->IsCUDA()) {\n        UpdateBiasResidual(dbias, group_idx, model->learner_model_param->num_output_group);\n      }\n    }\n  }\n\n  void UpdateFeature(int fidx, int group_idx,\n                     gbm::GBLinearModel *model) {\n    bst_float &w = (*model)[fidx][group_idx];\n    // Get gradient\n    auto grad = GradientPair(0, 0);\n    if (ctx_->IsCUDA()) {\n      grad = GetGradient(group_idx, model->learner_model_param->num_output_group, fidx);\n    }\n    auto dw = static_cast<float>(tparam_.learning_rate *\n                                 CoordinateDelta(grad.GetGrad(), grad.GetHess(),\n                                                 w, tparam_.reg_alpha_denorm,\n                                                 tparam_.reg_lambda_denorm));\n    w += dw;\n\n    if (ctx_->IsCUDA()) {\n      UpdateResidual(dw, group_idx, model->learner_model_param->num_output_group, fidx);\n    }\n  }\n\n  // This needs to be public because of the __device__ lambda.\n  GradientPair GetBiasGradient(int group_idx, int num_group) {\n    dh::safe_cuda(cudaSetDevice(ctx_->Ordinal()));\n    auto counting = thrust::make_counting_iterator(0ull);\n    auto f = [=] __device__(size_t idx) {\n      return idx * num_group + group_idx;\n    };  // NOLINT\n    thrust::transform_iterator<decltype(f), decltype(counting), size_t> skip(\n        counting, f);\n    auto perm = thrust::make_permutation_iterator(gpair_.data(), skip);\n\n    return dh::SumReduction(perm, num_row_);\n  }\n\n  // This needs to be public because of the __device__ lambda.\n  void UpdateBiasResidual(float dbias, int group_idx, int num_groups) {\n    if (dbias == 0.0f) return;\n    auto d_gpair = dh::ToSpan(gpair_);\n    dh::LaunchN(num_row_, [=] __device__(size_t idx) {\n      auto &g = d_gpair[idx * num_groups + group_idx];\n      g += GradientPair(g.GetHess() * dbias, 0);\n    });\n  }\n\n  // This needs to be public because of the __device__ lambda.\n  GradientPair GetGradient(int group_idx, int num_group, int fidx) {\n    dh::safe_cuda(cudaSetDevice(ctx_->Ordinal()));\n    common::Span<xgboost::Entry> d_col = dh::ToSpan(data_).subspan(row_ptr_[fidx]);\n    size_t col_size = row_ptr_[fidx + 1] - row_ptr_[fidx];\n    common::Span<GradientPair> d_gpair = dh::ToSpan(gpair_);\n    auto counting = thrust::make_counting_iterator(0ull);\n    auto f = [=] __device__(size_t idx) {\n      auto entry = d_col[idx];\n      auto g = d_gpair[entry.index * num_group + group_idx];\n      return GradientPair{g.GetGrad() * entry.fvalue, g.GetHess() * entry.fvalue * entry.fvalue};\n    };  // NOLINT\n    thrust::transform_iterator<decltype(f), decltype(counting), GradientPair>\n        multiply_iterator(counting, f);\n    return dh::SumReduction(multiply_iterator, col_size);\n  }\n\n  // This needs to be public because of the __device__ lambda.\n  void UpdateResidual(float dw, int group_idx, int num_groups, int fidx) {\n    common::Span<GradientPair> d_gpair = dh::ToSpan(gpair_);\n    common::Span<Entry> d_col = dh::ToSpan(data_).subspan(row_ptr_[fidx]);\n    size_t col_size = row_ptr_[fidx + 1] - row_ptr_[fidx];\n    dh::LaunchN(col_size, [=] __device__(size_t idx) {\n      auto entry = d_col[idx];\n      auto &g = d_gpair[entry.index * num_groups + group_idx];\n      g += GradientPair(g.GetHess() * dw * entry.fvalue, 0);\n    });\n  }\n\n private:\n  bool IsEmpty() {\n    return num_row_ == 0;\n  }\n\n  void UpdateGpair(const std::vector<GradientPair> &host_gpair) {\n    dh::safe_cuda(cudaMemcpyAsync(\n        gpair_.data().get(),\n        host_gpair.data(),\n        gpair_.size() * sizeof(GradientPair), cudaMemcpyHostToDevice));\n  }\n\n  // training parameter\n  LinearTrainParam tparam_;\n  CoordinateParam coord_param_;\n  std::unique_ptr<FeatureSelector> selector_;\n  common::Monitor monitor_;\n\n  std::vector<size_t> row_ptr_;\n  dh::device_vector<xgboost::Entry> data_;\n  dh::caching_device_vector<GradientPair> gpair_;\n  size_t num_row_;\n};\n\nXGBOOST_REGISTER_LINEAR_UPDATER(GPUCoordinateUpdater, \"gpu_coord_descent\")\n    .describe(\n        \"Update linear model according to coordinate descent algorithm. GPU \"\n        \"accelerated.\")\n    .set_body([]() { return new GPUCoordinateUpdater(); });\n}  // namespace xgboost::linear\n"
  },
  {
    "path": "src/linear/updater_shotgun.cc",
    "content": "/**\n * Copyright 2018-2023 by XGBoost Contributors\n * \\author Tianqi Chen, Rory Mitchell\n */\n\n#include <xgboost/linear_updater.h>\n#include \"coordinate_common.h\"\n\nnamespace xgboost::linear {\n\nDMLC_REGISTRY_FILE_TAG(updater_shotgun);\n\nclass ShotgunUpdater : public LinearUpdater {\n public:\n  // set training parameter\n  void Configure(Args const& args) override {\n    param_.UpdateAllowUnknown(args);\n    if (param_.feature_selector != kCyclic &&\n        param_.feature_selector != kShuffle) {\n      LOG(FATAL) << \"Unsupported feature selector for shotgun updater.\\n\"\n                 << \"Supported options are: {cyclic, shuffle}\";\n    }\n    selector_.reset(FeatureSelector::Create(param_.feature_selector));\n  }\n  void LoadConfig(Json const& in) override {\n    auto const& config = get<Object const>(in);\n    FromJson(config.at(\"linear_train_param\"), &param_);\n  }\n  void SaveConfig(Json* p_out) const override {\n    auto& out = *p_out;\n    out[\"linear_train_param\"] = ToJson(param_);\n  }\n\n  void Update(linalg::Matrix<GradientPair> *in_gpair, DMatrix *p_fmat, gbm::GBLinearModel *model,\n              double sum_instance_weight) override {\n    auto gpair = in_gpair->Data();\n    param_.DenormalizePenalties(sum_instance_weight);\n    const int ngroup = model->learner_model_param->num_output_group;\n\n    // update bias\n    for (int gid = 0; gid < ngroup; ++gid) {\n      auto grad = GetBiasGradientParallel(gid, ngroup, gpair->ConstHostVector(), p_fmat,\n                                          ctx_->Threads());\n      auto dbias = static_cast<bst_float>(param_.learning_rate *\n                               CoordinateDeltaBias(grad.first, grad.second));\n      model->Bias()[gid] += dbias;\n      UpdateBiasResidualParallel(ctx_, gid, ngroup, dbias, &gpair->HostVector(), p_fmat);\n    }\n\n    // lock-free parallel updates of weights\n    selector_->Setup(ctx_, *model, gpair->ConstHostVector(), p_fmat, param_.reg_alpha_denorm,\n                     param_.reg_lambda_denorm, 0);\n    auto &h_gpair = gpair->HostVector();\n    for (const auto &batch : p_fmat->GetBatches<CSCPage>(ctx_)) {\n      auto page = batch.GetView();\n      const auto nfeat = static_cast<bst_omp_uint>(batch.Size());\n      common::ParallelFor(nfeat, ctx_->Threads(), [&](auto i) {\n        int ii = selector_->NextFeature(ctx_, i, *model, 0, gpair->ConstHostVector(), p_fmat,\n                                        param_.reg_alpha_denorm, param_.reg_lambda_denorm);\n        if (ii < 0) return;\n        const bst_uint fid = ii;\n        auto col = page[ii];\n        for (int gid = 0; gid < ngroup; ++gid) {\n          double sum_grad = 0.0, sum_hess = 0.0;\n          for (auto &c : col) {\n            const GradientPair &p = h_gpair[c.index * ngroup + gid];\n            if (p.GetHess() < 0.0f) continue;\n            const bst_float v = c.fvalue;\n            sum_grad += p.GetGrad() * v;\n            sum_hess += p.GetHess() * v * v;\n          }\n          bst_float &w = (*model)[fid][gid];\n          auto dw = static_cast<bst_float>(\n              param_.learning_rate * CoordinateDelta(sum_grad, sum_hess, w, param_.reg_alpha_denorm,\n                                                     param_.reg_lambda_denorm));\n          if (dw == 0.f) continue;\n          w += dw;\n          // update grad values\n          for (auto &c : col) {\n            GradientPair &p = h_gpair[c.index * ngroup + gid];\n            if (p.GetHess() < 0.0f) continue;\n            p += GradientPair(p.GetHess() * c.fvalue * dw, 0);\n          }\n        }\n      });\n    }\n  }\n\n protected:\n  // training parameters\n  LinearTrainParam param_;\n\n  std::unique_ptr<FeatureSelector> selector_;\n};\n\nXGBOOST_REGISTER_LINEAR_UPDATER(ShotgunUpdater, \"shotgun\")\n    .describe(\n        \"Update linear model according to shotgun coordinate descent \"\n        \"algorithm.\")\n    .set_body([]() { return new ShotgunUpdater(); });\n}  // namespace xgboost::linear\n"
  },
  {
    "path": "src/logging.cc",
    "content": "/**\n * Copyright 2015-2024, XGBoost Contributors\n * \\file logging.cc\n * \\brief Implementation of loggers.\n * \\author Tianqi Chen\n */\n#include \"xgboost/logging.h\"\n\n#include <string>  // for string\n\n#include \"collective/communicator-inl.h\"\n\n#if !defined(XGBOOST_STRICT_R_MODE) || XGBOOST_STRICT_R_MODE == 0\n// Override logging mechanism for non-R interfaces\nvoid dmlc::CustomLogMessage::Log(const std::string& msg) {\n  const xgboost::LogCallbackRegistry *registry =\n      xgboost::LogCallbackRegistryStore::Get();\n  auto callback = registry->Get();\n  callback(msg.c_str());\n}\n\nnamespace xgboost {\n\nConsoleLogger::~ConsoleLogger() {\n  if (ShouldLog(cur_verbosity_)) {\n    dmlc::CustomLogMessage::Log(BaseLogger::log_stream_.str());\n  }\n}\n\nTrackerLogger::~TrackerLogger() {\n  log_stream_ << '\\n';\n  collective::Print(log_stream_.str());\n}\n\n}  // namespace xgboost\n\n#endif  // !defined(XGBOOST_STRICT_R_MODE) || XGBOOST_STRICT_R_MODE == 0\n\nnamespace xgboost {\n\nbool ConsoleLogger::ShouldLog(LogVerbosity verbosity) {\n  return static_cast<int>(verbosity) <=\n             (GlobalConfigThreadLocalStore::Get()->verbosity) ||\n         verbosity == LV::kIgnore;\n}\n\nvoid ConsoleLogger::Configure(Args const& args) {\n  auto& param = *GlobalConfigThreadLocalStore::Get();\n  param.UpdateAllowUnknown(args);\n}\n\nConsoleLogger::LogVerbosity ConsoleLogger::DefaultVerbosity() {\n  return LogVerbosity::kWarning;\n}\n\nConsoleLogger::LogVerbosity ConsoleLogger::GlobalVerbosity() {\n  LogVerbosity global_verbosity { LogVerbosity::kWarning };\n  switch (GlobalConfigThreadLocalStore::Get()->verbosity) {\n  case 0:\n    global_verbosity = LogVerbosity::kSilent;\n    break;\n  case 1:\n    global_verbosity = LogVerbosity::kWarning;\n    break;\n  case 2:\n    global_verbosity = LogVerbosity::kInfo;\n    break;\n  case 3:\n    global_verbosity = LogVerbosity::kDebug;\n  default:\n    // global verbosity doesn't require kIgnore\n    break;\n  }\n\n  return global_verbosity;\n}\n\nConsoleLogger::ConsoleLogger(LogVerbosity cur_verb) :\n    cur_verbosity_{cur_verb} {}\n\nConsoleLogger::ConsoleLogger(\n    const std::string& file, int line, LogVerbosity cur_verb) {\n  cur_verbosity_ = cur_verb;\n  switch (cur_verbosity_) {\n    case LogVerbosity::kWarning:\n      BaseLogger::log_stream_ << \"WARNING: \"\n                              << file << \":\" << line << \": \";\n      break;\n    case LogVerbosity::kDebug:\n      BaseLogger::log_stream_ << \"DEBUG: \"\n                              << file << \":\" << line << \": \";\n      break;\n    case LogVerbosity::kInfo:\n      BaseLogger::log_stream_ << \"INFO: \"\n                              << file << \":\" << line << \": \";\n      break;\n    case LogVerbosity::kIgnore:\n      BaseLogger::log_stream_ << file << \":\" << line << \": \";\n      break;\n    case LogVerbosity::kSilent:\n      break;\n  }\n}\n\n}  // namespace xgboost\n"
  },
  {
    "path": "src/metric/auc.cc",
    "content": "/**\n * Copyright 2021-2024, XGBoost Contributors\n */\n#include \"auc.h\"\n\n#include <algorithm>\n#include <array>\n#include <atomic>\n#include <functional>\n#include <limits>\n#include <memory>\n#include <numeric>\n#include <tuple>\n#include <utility>\n#include <vector>\n\n#include \"../common/algorithm.h\"        // ArgSort\n#include \"../common/math.h\"\n#include \"../common/optional_weight.h\"  // OptionalWeights\n#include \"metric_common.h\"              // MetricNoCache\n#include \"xgboost/context.h\"\n#include \"xgboost/host_device_vector.h\"\n#include \"xgboost/linalg.h\"\n#include \"xgboost/metric.h\"\n\nnamespace xgboost::metric {\n// tag the this file, used by force static link later.\nDMLC_REGISTRY_FILE_TAG(auc);\n/**\n * Calculate AUC for binary classification problem.  This function does not normalize the\n * AUC by 1 / (num_positive * num_negative), instead it returns a tuple for caller to\n * handle the normalization.\n */\ntemplate <typename Fn>\nstd::tuple<double, double, double>\nBinaryAUC(common::Span<float const> predts, linalg::VectorView<float const> labels,\n          common::OptionalWeights weights,\n          std::vector<size_t> const &sorted_idx, Fn &&area_fn) {\n  CHECK_NE(labels.Size(), 0);\n  CHECK_EQ(labels.Size(), predts.size());\n  auto p_predts = predts.data();\n\n  double auc{0};\n\n  float label = labels(sorted_idx.front());\n  float w = weights[sorted_idx[0]];\n  double fp = (1.0 - label) * w, tp = label * w;\n  double tp_prev = 0, fp_prev = 0;\n  // TODO(jiaming): We can parallize this if we have a parallel scan for CPU.\n  for (size_t i = 1; i < sorted_idx.size(); ++i) {\n    if (p_predts[sorted_idx[i]] != p_predts[sorted_idx[i - 1]]) {\n      auc += area_fn(fp_prev, fp, tp_prev, tp);\n      tp_prev = tp;\n      fp_prev = fp;\n    }\n    label = labels(sorted_idx[i]);\n    float w = weights[sorted_idx[i]];\n    fp += (1.0f - label) * w;\n    tp += label * w;\n  }\n\n  auc += area_fn(fp_prev, fp, tp_prev, tp);\n  if (fp <= 0.0f || tp <= 0.0f) {\n    auc = 0;\n    fp = 0;\n    tp = 0;\n  }\n\n  return std::make_tuple(fp, tp, auc);\n}\n\n/**\n * Calculate AUC for multi-class classification problem using 1-vs-rest approach.\n *\n * TODO(jiaming): Use better algorithms like:\n *\n * - Kleiman, Ross and Page, David. $AUC_{\\mu}$: A Performance Metric for Multi-Class\n *   Machine Learning Models\n */\ntemplate <typename BinaryAUC>\ndouble MultiClassOVR(Context const *ctx, common::Span<float const> predts, MetaInfo const &info,\n                     size_t n_classes, int32_t n_threads, BinaryAUC &&binary_auc) {\n  CHECK_NE(n_classes, 0);\n  auto const labels = info.labels.HostView();\n  if (labels.Shape(0) != 0) {\n    CHECK_EQ(labels.Shape(1), 1) << \"AUC doesn't support multi-target model.\";\n  }\n\n  std::vector<double> results_storage(n_classes * 3, 0);\n  auto results = linalg::MakeTensorView(ctx, results_storage, n_classes, 3);\n  auto local_area = results.Slice(linalg::All(), 0);\n  auto tp = results.Slice(linalg::All(), 1);\n  auto auc = results.Slice(linalg::All(), 2);\n\n  auto weights = common::OptionalWeights{info.weights_.ConstHostSpan()};\n  auto predts_t = linalg::MakeTensorView(ctx, predts, info.num_row_, n_classes);\n\n  if (info.labels.Size() != 0) {\n    common::ParallelFor(n_classes, n_threads, [&](auto c) {\n      std::vector<float> proba(info.labels.Size());\n      std::vector<float> response(info.labels.Size());\n      for (size_t i = 0; i < proba.size(); ++i) {\n        proba[i] = predts_t(i, c);\n        response[i] = labels(i) == c ? 1.0f : 0.0;\n      }\n      double fp;\n      std::tie(fp, tp(c), auc(c)) = binary_auc(\n          ctx, proba, linalg::MakeVec(response.data(), response.size(), ctx->Device()), weights);\n      local_area(c) = fp * tp(c);\n    });\n  }\n\n  // we have 2 averages going in here, first is among workers, second is among\n  // classes. allreduce sums up fp/tp auc for each class.\n  auto rc = collective::GlobalSum(ctx, info, results);\n  collective::SafeColl(rc);\n\n  double auc_sum{0};\n  double tp_sum{0};\n  for (size_t c = 0; c < n_classes; ++c) {\n    if (local_area(c) != 0) {\n      // normalize and weight it by prevalence.  After allreduce, `local_area`\n      // means the total covered area (not area under curve, rather it's the\n      // accessible area for each worker) for each class.\n      auc_sum += auc(c) / local_area(c) * tp(c);\n      tp_sum += tp(c);\n    } else {\n      auc_sum = std::numeric_limits<double>::quiet_NaN();\n      break;\n    }\n  }\n  if (tp_sum == 0 || std::isnan(auc_sum)) {\n    auc_sum = std::numeric_limits<double>::quiet_NaN();\n  } else {\n    auc_sum /= tp_sum;\n  }\n  return auc_sum;\n}\n\nstd::tuple<double, double, double> BinaryROCAUC(Context const *ctx,\n                                                common::Span<float const> predts,\n                                                linalg::VectorView<float const> labels,\n                                                common::OptionalWeights weights) {\n  auto const sorted_idx =\n      common::ArgSort<size_t>(ctx, predts.data(), predts.data() + predts.size(), std::greater<>{});\n  return BinaryAUC(predts, labels, weights, sorted_idx, TrapezoidArea);\n}\n\n/**\n * Calculate AUC for 1 ranking group;\n */\ndouble GroupRankingROC(Context const* ctx, common::Span<float const> predts,\n                       linalg::VectorView<float const> labels, float w) {\n  // on ranking, we just count all pairs.\n  double auc{0};\n  // argsort doesn't support tensor input yet.\n  auto raw_labels = labels.Values().subspan(0, labels.Size());\n  auto const sorted_idx = common::ArgSort<size_t>(\n      ctx, raw_labels.data(), raw_labels.data() + raw_labels.size(), std::greater<>{});\n  w = common::Sqr(w);\n\n  double sum_w = 0.0f;\n  for (size_t i = 0; i < labels.Size(); ++i) {\n    for (size_t j = i + 1; j < labels.Size(); ++j) {\n      auto predt = predts[sorted_idx[i]] - predts[sorted_idx[j]];\n      if (predt > 0) {\n        predt = 1.0;\n      } else if (predt == 0) {\n        predt = 0.5;\n      } else {\n        predt = 0;\n      }\n      auc += predt * w;\n      sum_w += w;\n    }\n  }\n  if (sum_w != 0) {\n    auc /= sum_w;\n  }\n  CHECK_LE(auc, 1.0 + kRtEps);\n  return auc;\n}\n\n/**\n * \\brief PR-AUC for binary classification.\n *\n *   https://doi.org/10.1371/journal.pone.0092209\n */\nstd::tuple<double, double, double> BinaryPRAUC(Context const *ctx, common::Span<float const> predts,\n                                               linalg::VectorView<float const> labels,\n                                               common::OptionalWeights weights) {\n  auto const sorted_idx =\n      common::ArgSort<size_t>(ctx, predts.data(), predts.data() + predts.size(), std::greater<>{});\n  double total_pos{0}, total_neg{0};\n  for (size_t i = 0; i < labels.Size(); ++i) {\n    auto w = weights[i];\n    total_pos += w * labels(i);\n    total_neg += w * (1.0f - labels(i));\n  }\n  if (total_pos <= 0 || total_neg <= 0) {\n    return {1.0f, 1.0f, std::numeric_limits<float>::quiet_NaN()};\n  }\n  auto fn = [total_pos](double fp_prev, double fp, double tp_prev, double tp) {\n    return detail::CalcDeltaPRAUC(fp_prev, fp, tp_prev, tp, total_pos);\n  };\n\n  double tp{0}, fp{0}, auc{0};\n  std::tie(fp, tp, auc) = BinaryAUC(predts, labels, weights, sorted_idx, fn);\n  return std::make_tuple(1.0, 1.0, auc);\n}\n\n/**\n * Cast LTR problem to binary classification problem by comparing pairs.\n */\ntemplate <bool is_roc>\nstd::pair<double, uint32_t> RankingAUC(Context const *ctx, std::vector<float> const &predts,\n                                       MetaInfo const &info, int32_t n_threads) {\n  CHECK_GE(info.group_ptr_.size(), 2);\n  uint32_t n_groups = info.group_ptr_.size() - 1;\n  auto s_predts = common::Span<float const>{predts};\n  auto labels = info.labels.View(ctx->Device());\n  auto s_weights = info.weights_.ConstHostSpan();\n\n  std::atomic<uint32_t> invalid_groups{0};\n\n  std::vector<double> auc_tloc(n_threads, 0);\n  common::ParallelFor(n_groups, n_threads, [&](size_t g) {\n    g += 1;  // indexing needs to start from 1\n    size_t cnt = info.group_ptr_[g] - info.group_ptr_[g - 1];\n    float w = s_weights.empty() ? 1.0f : s_weights[g - 1];\n    auto g_predts = s_predts.subspan(info.group_ptr_[g - 1], cnt);\n    auto g_labels = labels.Slice(linalg::Range(info.group_ptr_[g - 1], info.group_ptr_[g]));\n    double auc;\n    if (is_roc && g_labels.Size() < 3) {\n      // With 2 documents, there's only 1 comparison can be made.  So either\n      // TP or FP will be zero.\n      invalid_groups++;\n      auc = 0;\n    } else {\n      if (is_roc) {\n        auc = GroupRankingROC(ctx, g_predts, g_labels, w);\n      } else {\n        auc = std::get<2>(BinaryPRAUC(ctx, g_predts, g_labels, common::OptionalWeights{w}));\n      }\n      if (std::isnan(auc)) {\n        invalid_groups++;\n        auc = 0;\n      }\n    }\n    auc_tloc[omp_get_thread_num()] += auc;\n  });\n  double sum_auc = std::accumulate(auc_tloc.cbegin(), auc_tloc.cend(), 0.0);\n\n  return std::make_pair(sum_auc, n_groups - invalid_groups);\n}\n\ntemplate <typename Curve>\nclass EvalAUC : public MetricNoCache {\n  double Eval(const HostDeviceVector<bst_float> &preds, const MetaInfo &info) override {\n    double auc {0};\n    if (ctx_->Device().IsCUDA()) {\n      preds.SetDevice(ctx_->Device());\n      info.labels.SetDevice(ctx_->Device());\n      info.weights_.SetDevice(ctx_->Device());\n    }\n    //  We use the global size to handle empty dataset.\n    std::array<bst_idx_t, 2> meta{info.labels.Size(), preds.Size()};\n    if (!info.IsVerticalFederated()) {\n      auto rc = collective::Allreduce(\n          ctx_,\n          linalg::MakeTensorView(DeviceOrd::CPU(), common::Span{meta.data(), meta.size()},\n                                 meta.size()),\n          collective::Op::kMax);\n      collective::SafeColl(rc);\n    }\n    if (meta[0] == 0) {\n      // Empty across all workers, which is not supported.\n      auc = std::numeric_limits<double>::quiet_NaN();\n    } else if (!info.group_ptr_.empty()) {\n      /**\n       * learning to rank\n       */\n      if (!info.weights_.Empty()) {\n        CHECK_EQ(info.weights_.Size(), info.group_ptr_.size() - 1);\n      }\n      uint32_t valid_groups = 0;\n      if (info.labels.Size() != 0) {\n        CHECK_EQ(info.group_ptr_.back(), info.labels.Size());\n        std::tie(auc, valid_groups) =\n            static_cast<Curve *>(this)->EvalRanking(preds, info);\n      }\n      if (valid_groups != info.group_ptr_.size() - 1) {\n        InvalidGroupAUC();\n      }\n\n      auc = collective::GlobalRatio(ctx_, info, auc, static_cast<double>(valid_groups));\n      if (!std::isnan(auc)) {\n        CHECK_LE(auc, 1.0 + kRtEps) << \"Total AUC across groups: \" << auc * valid_groups\n                                    << \", valid groups: \" << valid_groups;\n      }\n    } else if (meta[0] != meta[1] && meta[1] % meta[0] == 0) {\n      /**\n       * multi class\n       */\n      size_t n_classes = meta[1] / meta[0];\n      CHECK_NE(n_classes, 0);\n      auc = static_cast<Curve *>(this)->EvalMultiClass(preds, info, n_classes);\n    } else {\n      /**\n       * binary classification\n       */\n      double fp{0}, tp{0};\n      if (!(preds.Empty() || info.labels.Size() == 0)) {\n        std::tie(fp, tp, auc) =\n            static_cast<Curve *>(this)->EvalBinary(preds, info);\n      }\n      auc = collective::GlobalRatio(ctx_, info, auc, fp * tp);\n      if (!std::isnan(auc)) {\n        CHECK_LE(auc, 1.0 + kRtEps);\n        auc = std::min(auc, 1.0);\n      }\n    }\n    if (std::isnan(auc)) {\n      LOG(WARNING) << \"Dataset is empty, or contains only positive or negative samples.\";\n    }\n    return auc;\n  }\n};\n\nclass EvalROCAUC : public EvalAUC<EvalROCAUC> {\n  std::shared_ptr<DeviceAUCCache> d_cache_;\n\n public:\n  std::pair<double, uint32_t> EvalRanking(HostDeviceVector<float> const &predts,\n                                          MetaInfo const &info) {\n    double auc{0};\n    uint32_t valid_groups = 0;\n    auto n_threads = ctx_->Threads();\n    if (ctx_->IsCUDA()) {\n      std::tie(auc, valid_groups) =\n          GPURankingAUC(ctx_, predts.ConstDeviceSpan(), info, &this->d_cache_);\n    } else {\n      std::tie(auc, valid_groups) =\n          RankingAUC<true>(ctx_, predts.ConstHostVector(), info, n_threads);\n    }\n    return std::make_pair(auc, valid_groups);\n  }\n\n  double EvalMultiClass(HostDeviceVector<float> const &predts,\n                        MetaInfo const &info, size_t n_classes) {\n    double auc{0};\n    auto n_threads = ctx_->Threads();\n    CHECK_NE(n_classes, 0);\n    if (ctx_->IsCUDA()) {\n      auc = GPUMultiClassROCAUC(ctx_, predts.ConstDeviceSpan(), info, &this->d_cache_, n_classes);\n    } else {\n      auc = MultiClassOVR(ctx_, predts.ConstHostVector(), info, n_classes, n_threads, BinaryROCAUC);\n    }\n    return auc;\n  }\n\n  std::tuple<double, double, double>\n  EvalBinary(HostDeviceVector<float> const &predts, MetaInfo const &info) {\n    double fp, tp, auc;\n    if (ctx_->IsCUDA()) {\n      std::tie(fp, tp, auc) =\n          GPUBinaryROCAUC(ctx_, predts.ConstDeviceSpan(), info, &this->d_cache_);\n    } else {\n      std::tie(fp, tp, auc) = BinaryROCAUC(ctx_, predts.ConstHostVector(),\n                                           info.labels.HostView().Slice(linalg::All(), 0),\n                                           common::OptionalWeights{info.weights_.ConstHostSpan()});\n    }\n    return std::make_tuple(fp, tp, auc);\n  }\n\n public:\n  [[nodiscard]] char const* Name() const override {\n    return \"auc\";\n  }\n};\n\nXGBOOST_REGISTER_METRIC(EvalAUC, \"auc\")\n.describe(\"Receiver Operating Characteristic Area Under the Curve.\")\n.set_body([](const char*) { return new EvalROCAUC(); });\n\n#if !defined(XGBOOST_USE_CUDA)\nstd::tuple<double, double, double> GPUBinaryROCAUC(Context const *, common::Span<float const>,\n                                                   MetaInfo const &,\n                                                   std::shared_ptr<DeviceAUCCache> *) {\n  common::AssertGPUSupport();\n  return {};\n}\n\ndouble GPUMultiClassROCAUC(Context const *, common::Span<float const>, MetaInfo const &,\n                           std::shared_ptr<DeviceAUCCache> *, std::size_t) {\n  common::AssertGPUSupport();\n  return 0.0;\n}\n\nstd::pair<double, std::uint32_t> GPURankingAUC(Context const *, common::Span<float const>,\n                                               MetaInfo const &,\n                                               std::shared_ptr<DeviceAUCCache> *) {\n  common::AssertGPUSupport();\n  return {};\n}\nstruct DeviceAUCCache {};\n#endif  // !defined(XGBOOST_USE_CUDA)\n\nclass EvalPRAUC : public EvalAUC<EvalPRAUC> {\n  std::shared_ptr<DeviceAUCCache> d_cache_;\n\n public:\n  std::tuple<double, double, double>\n  EvalBinary(HostDeviceVector<float> const &predts, MetaInfo const &info) {\n    double pr, re, auc;\n    if (ctx_->IsCUDA()) {\n      std::tie(pr, re, auc) = GPUBinaryPRAUC(ctx_, predts.ConstDeviceSpan(), info, &this->d_cache_);\n    } else {\n      std::tie(pr, re, auc) =\n          BinaryPRAUC(ctx_, predts.ConstHostSpan(), info.labels.HostView().Slice(linalg::All(), 0),\n                      common::OptionalWeights{info.weights_.ConstHostSpan()});\n    }\n    return std::make_tuple(pr, re, auc);\n  }\n\n  double EvalMultiClass(HostDeviceVector<float> const &predts, MetaInfo const &info,\n                        size_t n_classes) {\n    if (ctx_->IsCUDA()) {\n      return GPUMultiClassPRAUC(ctx_, predts.ConstDeviceSpan(), info, &d_cache_, n_classes);\n    } else {\n      auto n_threads = this->ctx_->Threads();\n      return MultiClassOVR(ctx_, predts.ConstHostSpan(), info, n_classes, n_threads, BinaryPRAUC);\n    }\n  }\n\n  std::pair<double, uint32_t> EvalRanking(HostDeviceVector<float> const &predts,\n                                          MetaInfo const &info) {\n    double auc{0};\n    uint32_t valid_groups = 0;\n    auto n_threads = ctx_->Threads();\n    if (ctx_->IsCUDA()) {\n      std::tie(auc, valid_groups) =\n          GPURankingPRAUC(ctx_, predts.ConstDeviceSpan(), info, &d_cache_);\n    } else {\n      auto labels = info.labels.Data()->ConstHostSpan();\n      if (std::any_of(labels.cbegin(), labels.cend(), PRAUCLabelInvalid{})) {\n        InvalidLabels();\n      }\n      std::tie(auc, valid_groups) =\n          RankingAUC<false>(ctx_, predts.ConstHostVector(), info, n_threads);\n    }\n    return std::make_pair(auc, valid_groups);\n  }\n\n public:\n  [[nodiscard]] const char *Name() const override { return \"aucpr\"; }\n};\n\nXGBOOST_REGISTER_METRIC(AUCPR, \"aucpr\")\n    .describe(\"Area under PR curve for both classification and rank.\")\n    .set_body([](char const *) { return new EvalPRAUC{}; });\n\n#if !defined(XGBOOST_USE_CUDA)\nstd::tuple<double, double, double> GPUBinaryPRAUC(Context const *, common::Span<float const>,\n                                                  MetaInfo const &,\n                                                  std::shared_ptr<DeviceAUCCache> *) {\n  common::AssertGPUSupport();\n  return {};\n}\n\ndouble GPUMultiClassPRAUC(Context const *, common::Span<float const>, MetaInfo const &,\n                          std::shared_ptr<DeviceAUCCache> *, std::size_t) {\n  common::AssertGPUSupport();\n  return {};\n}\n\nstd::pair<double, std::uint32_t> GPURankingPRAUC(Context const *, common::Span<float const>,\n                                                 MetaInfo const &,\n                                                 std::shared_ptr<DeviceAUCCache> *) {\n  common::AssertGPUSupport();\n  return {};\n}\n#endif\n}  // namespace xgboost::metric\n"
  },
  {
    "path": "src/metric/auc.cu",
    "content": "/**\n * Copyright 2021-2026, XGBoost Contributors\n */\n#include <thrust/copy.h>     // for copy\n#include <thrust/logical.h>  // for any_of\n#include <thrust/scan.h>\n\n#include <cassert>\n#include <cuda/std/tuple>    // for tuple, get, tie\n#include <cuda/std/utility>  // for pair\n#include <functional>        // for equal_to\n#include <limits>\n#include <memory>\n#include <tuple>\n#include <utility>\n\n#include \"../collective/allreduce.h\"\n#include \"../common/algorithm.cuh\"        // SegmentedArgSort, InclusiveScan\n#include \"../common/optional_weight.h\"    // OptionalWeights\n#include \"../common/threading_utils.cuh\"  // UnravelTrapeziodIdx,SegmentedTrapezoidThreads\n#include \"auc.h\"\n#include \"xgboost/data.h\"\n#include \"xgboost/span.h\"\n\nnamespace xgboost {\nnamespace metric {\n// tag the this file, used by force static link later.\nDMLC_REGISTRY_FILE_TAG(auc_gpu);\n\nnamespace {\n// Pair of FP/TP\nusing Pair = cuda::std::pair<double, double>;\n\ntemplate <typename T, typename U, typename P = cuda::std::pair<T, U>>\nstruct PairPlus {\n  XGBOOST_DEVICE P operator()(P const &l, P const &r) const {\n    return cuda::std::make_pair(l.first + r.first, l.second + r.second);\n  }\n};\n}  // namespace\n\n/**\n * A cache to GPU data to avoid reallocating memory.\n */\nstruct DeviceAUCCache {\n  // index sorted by prediction value\n  dh::device_vector<size_t> sorted_idx;\n  // track FP/TP for computation on trapezoid area\n  dh::device_vector<Pair> fptp;\n  // track FP_PREV/TP_PREV for computation on trapezoid area\n  dh::device_vector<Pair> neg_pos;\n  // index of unique prediction values.\n  dh::device_vector<size_t> unique_idx;\n  // p^T: transposed prediction matrix, used by MultiClassAUC\n  dh::device_vector<float> predts_t;\n\n  void Init(common::Span<float const> predts, bool is_multi) {\n    if (sorted_idx.size() != predts.size()) {\n      sorted_idx.resize(predts.size());\n      fptp.resize(sorted_idx.size());\n      unique_idx.resize(sorted_idx.size());\n      neg_pos.resize(sorted_idx.size());\n      if (is_multi) {\n        predts_t.resize(sorted_idx.size());\n      }\n    }\n  }\n};\n\ntemplate <bool is_multi>\nvoid InitCacheOnce(common::Span<float const> predts, std::shared_ptr<DeviceAUCCache> *p_cache) {\n  auto &cache = *p_cache;\n  if (!cache) {\n    cache.reset(new DeviceAUCCache);\n  }\n  cache->Init(predts, is_multi);\n}\n\n/**\n * The GPU implementation uses same calculation as CPU with a few more steps to distribute\n * work across threads:\n *\n * - Run scan to obtain TP/FP values, which are right coordinates of trapezoid.\n * - Find distinct prediction values and get the corresponding FP_PREV/TP_PREV value,\n *   which are left coordinates of trapezoids.\n * - Reduce the scan array into 1 AUC value.\n */\ntemplate <typename Fn>\nstd::tuple<double, double, double> GPUBinaryAUC(Context const *ctx,\n                                                common::Span<float const> predts,\n                                                MetaInfo const &info,\n                                                common::Span<size_t const> d_sorted_idx, Fn area_fn,\n                                                std::shared_ptr<DeviceAUCCache> cache) {\n  auto labels = info.labels.View(ctx->Device());\n  auto weights = info.weights_.ConstDeviceSpan();\n  dh::safe_cuda(cudaSetDevice(ctx->Ordinal()));\n\n  CHECK_NE(labels.Size(), 0);\n  CHECK_EQ(labels.Size(), predts.size());\n\n  /**\n   * Linear scan\n   */\n  auto get_weight = common::OptionalWeights{weights};\n  auto get_fp_tp = [=] XGBOOST_DEVICE(size_t i) {\n    size_t idx = d_sorted_idx[i];\n\n    float label = labels(idx);\n    float w = get_weight[d_sorted_idx[i]];\n\n    float fp = (1.0 - label) * w;\n    float tp = label * w;\n\n    return cuda::std::make_pair(fp, tp);\n  };  // NOLINT\n  auto d_fptp = dh::ToSpan(cache->fptp);\n  dh::LaunchN(d_sorted_idx.size(), ctx->CUDACtx()->Stream(),\n              [=] XGBOOST_DEVICE(size_t i) { d_fptp[i] = get_fp_tp(i); });\n\n  auto d_unique_idx = dh::ToSpan(cache->unique_idx);\n  dh::Iota(d_unique_idx, ctx->CUDACtx()->Stream());\n\n  auto uni_key = dh::MakeTransformIterator<float>(\n      thrust::make_counting_iterator(0),\n      [=] XGBOOST_DEVICE(size_t i) { return predts[d_sorted_idx[i]]; });\n  auto end_unique = thrust::unique_by_key_copy(\n      ctx->CUDACtx()->TP(), uni_key, uni_key + d_sorted_idx.size(), dh::tbegin(d_unique_idx),\n      thrust::make_discard_iterator(), dh::tbegin(d_unique_idx));\n  d_unique_idx = d_unique_idx.subspan(0, end_unique.second - dh::tbegin(d_unique_idx));\n\n  common::InclusiveScan(ctx, dh::tbegin(d_fptp), dh::tbegin(d_fptp), PairPlus<double, double>{},\n                        d_fptp.size());\n\n  auto d_neg_pos = dh::ToSpan(cache->neg_pos);\n  // scatter unique negaive/positive values\n  // shift to right by 1 with initial value being 0\n  dh::LaunchN(d_unique_idx.size(), ctx->CUDACtx()->Stream(), [=] XGBOOST_DEVICE(size_t i) {\n    if (d_unique_idx[i] == 0) {  // first unique index is 0\n      assert(i == 0);\n      d_neg_pos[0] = {0, 0};\n      return;\n    }\n    d_neg_pos[d_unique_idx[i]] = d_fptp[d_unique_idx[i] - 1];\n    if (i == d_unique_idx.size() - 1) {\n      // last one needs to be included, may override above assignment if the last\n      // prediction value is distinct from previous one.\n      d_neg_pos.back() = d_fptp[d_unique_idx[i] - 1];\n      return;\n    }\n  });\n\n  auto in = dh::MakeTransformIterator<double>(\n      thrust::make_counting_iterator(0), [=] XGBOOST_DEVICE(size_t i) {\n        double fp, tp;\n        double fp_prev, tp_prev;\n        if (i == 0) {\n          // handle the last element\n          cuda::std::tie(fp, tp) = d_fptp.back();\n          cuda::std::tie(fp_prev, tp_prev) = d_neg_pos[d_unique_idx.back()];\n        } else {\n          cuda::std::tie(fp, tp) = d_fptp[d_unique_idx[i] - 1];\n          cuda::std::tie(fp_prev, tp_prev) = d_neg_pos[d_unique_idx[i - 1]];\n        }\n        return area_fn(fp_prev, fp, tp_prev, tp);\n      });\n\n  Pair last = cache->fptp.back();\n  double auc = thrust::reduce(ctx->CUDACtx()->CTP(), in, in + d_unique_idx.size());\n  return std::make_tuple(last.first, last.second, auc);\n}\n\nstd::tuple<double, double, double> GPUBinaryROCAUC(Context const *ctx,\n                                                   common::Span<float const> predts,\n                                                   MetaInfo const &info,\n                                                   std::shared_ptr<DeviceAUCCache> *p_cache) {\n  auto &cache = *p_cache;\n  InitCacheOnce<false>(predts, p_cache);\n\n  /**\n   * Create sorted index for each class\n   */\n  auto d_sorted_idx = dh::ToSpan(cache->sorted_idx);\n  common::ArgSort<false>(ctx, predts, d_sorted_idx);\n  // Create lambda to avoid pass function pointer.\n  return GPUBinaryAUC(\n      ctx, predts, info, d_sorted_idx,\n      [] XGBOOST_DEVICE(double x0, double x1, double y0, double y1) -> double {\n        return TrapezoidArea(x0, x1, y0, y1);\n      },\n      cache);\n}\n\nvoid Transpose(common::Span<float const> in, common::Span<float> out, size_t m, size_t n) {\n  CHECK_EQ(in.size(), out.size());\n  CHECK_EQ(in.size(), m * n);\n  dh::LaunchN(in.size(), [=] XGBOOST_DEVICE(size_t i) {\n    size_t col = i / m;\n    size_t row = i % m;\n    size_t idx = row * n + col;\n    out[i] = in[idx];\n  });\n}\n\ndouble ScaleClasses(Context const *ctx, bool is_column_split, common::Span<double> results,\n                    common::Span<double> local_area, common::Span<double> tp,\n                    common::Span<double> auc, size_t n_classes) {\n  // With vertical federated learning, only the root has label, other parties are not\n  // evaluation metrics.\n  if (collective::IsDistributed() && !(is_column_split && collective::IsFederated())) {\n    std::int32_t device = dh::CurrentDevice();\n    CHECK_EQ(dh::CudaGetPointerDevice(results.data()), device);\n    auto rc = collective::Allreduce(\n        ctx, linalg::MakeVec(results.data(), results.size(), ctx->Device()), collective::Op::kSum);\n  }\n  auto reduce_in = dh::MakeTransformIterator<Pair>(\n      thrust::make_counting_iterator(0), [=] XGBOOST_DEVICE(size_t i) {\n        if (local_area[i] > 0) {\n          return cuda::std::make_pair(auc[i] / local_area[i] * tp[i], tp[i]);\n        }\n        return cuda::std::make_pair(std::numeric_limits<double>::quiet_NaN(), 0.0);\n      });\n\n  double tp_sum;\n  double auc_sum;\n  cuda::std::tie(auc_sum, tp_sum) =\n      thrust::reduce(ctx->CUDACtx()->CTP(), reduce_in, reduce_in + n_classes, Pair{0.0, 0.0},\n                     PairPlus<double, double>{});\n  if (tp_sum != 0 && !std::isnan(auc_sum)) {\n    auc_sum /= tp_sum;\n  } else {\n    return std::numeric_limits<double>::quiet_NaN();\n  }\n  return auc_sum;\n}\n\n/**\n * Calculate FP/TP for multi-class and PR-AUC ranking. `segment_id` is a function for\n * getting class id or group id given scan index.\n */\ntemplate <typename Fn>\nvoid SegmentedFPTP(Context const *ctx, common::Span<Pair> d_fptp, Fn segment_id) {\n  using Triple = cuda::std::tuple<uint32_t, double, double>;\n  // expand to tuple to include idx\n  auto fptp_it_in = dh::MakeTransformIterator<Triple>(\n      thrust::make_counting_iterator(0), [=] XGBOOST_DEVICE(size_t i) {\n        return cuda::std::make_tuple(i, d_fptp[i].first, d_fptp[i].second);\n      });\n  // shrink down to pair\n  auto fptp_it_out = thrust::make_transform_output_iterator(\n      dh::TypedDiscard<Triple>{}, [d_fptp] XGBOOST_DEVICE(Triple const &t) {\n        d_fptp[cuda::std::get<0>(t)] =\n            cuda::std::make_pair(cuda::std::get<1>(t), cuda::std::get<2>(t));\n        return t;\n      });\n  common::InclusiveScan(\n      ctx, fptp_it_in, fptp_it_out,\n      [=] XGBOOST_DEVICE(Triple const &l, Triple const &r) {\n        uint32_t l_gid = segment_id(cuda::std::get<0>(l));\n        uint32_t r_gid = segment_id(cuda::std::get<0>(r));\n        if (l_gid != r_gid) {\n          return r;\n        }\n\n        return Triple(cuda::std::get<0>(r),\n                      cuda::std::get<1>(l) + cuda::std::get<1>(r),   // fp\n                      cuda::std::get<2>(l) + cuda::std::get<2>(r));  // tp\n      },\n      d_fptp.size());\n}\n\n/**\n * Reduce the values of AUC for each group/class.\n */\ntemplate <typename Area, typename Seg>\nvoid SegmentedReduceAUC(Context const *ctx, common::Span<size_t const> d_unique_idx,\n                        common::Span<uint32_t const> d_class_ptr,\n                        common::Span<uint32_t const> d_unique_class_ptr,\n                        std::shared_ptr<DeviceAUCCache> cache, Area area_fn, Seg segment_id,\n                        common::Span<double> d_auc) {\n  auto d_fptp = dh::ToSpan(cache->fptp);\n  auto d_neg_pos = dh::ToSpan(cache->neg_pos);\n  auto key_in = dh::MakeTransformIterator<uint32_t>(thrust::make_counting_iterator(0),\n                                                    [=] XGBOOST_DEVICE(size_t i) {\n                                                      size_t class_id = segment_id(d_unique_idx[i]);\n                                                      return class_id;\n                                                    });\n  auto val_in = dh::MakeTransformIterator<double>(\n      thrust::make_counting_iterator(0), [=] XGBOOST_DEVICE(size_t i) {\n        size_t class_id = segment_id(d_unique_idx[i]);\n\n        double fp, tp, fp_prev, tp_prev;\n        if (i == d_unique_class_ptr[class_id]) {\n          // first item is ignored, we use this thread to calculate the last item\n          cuda::std::tie(fp, tp) = d_fptp[common::LastOf(class_id, d_class_ptr)];\n          cuda::std::tie(fp_prev, tp_prev) =\n              d_neg_pos[d_unique_idx[common::LastOf(class_id, d_unique_class_ptr)]];\n        } else {\n          cuda::std::tie(fp, tp) = d_fptp[d_unique_idx[i] - 1];\n          cuda::std::tie(fp_prev, tp_prev) = d_neg_pos[d_unique_idx[i - 1]];\n        }\n        double auc = area_fn(fp_prev, fp, tp_prev, tp, class_id);\n        return auc;\n      });\n  thrust::reduce_by_key(ctx->CUDACtx()->TP(), key_in, key_in + d_unique_idx.size(), val_in,\n                        thrust::make_discard_iterator(), dh::tbegin(d_auc));\n}\n\n/**\n * MultiClass implementation is similar to binary classification, except we need to split\n * up each class in all kernels.\n */\ntemplate <bool scale, typename Fn>\ndouble GPUMultiClassAUCOVR(Context const *ctx, MetaInfo const &info,\n                           common::Span<uint32_t> d_class_ptr, size_t n_classes,\n                           std::shared_ptr<DeviceAUCCache> cache, Fn area_fn) {\n  dh::safe_cuda(cudaSetDevice(ctx->Ordinal()));\n  /**\n   * Sorted idx\n   */\n  auto d_predts_t = dh::ToSpan(cache->predts_t);\n  // Index is sorted within class.\n  auto d_sorted_idx = dh::ToSpan(cache->sorted_idx);\n\n  auto labels = info.labels.View(ctx->Device());\n  auto weights = info.weights_.ConstDeviceSpan();\n\n  size_t n_samples = labels.Shape(0);\n\n  if (n_samples == 0) {\n    dh::TemporaryArray<double> resutls(n_classes * 4, 0.0f);\n    auto d_results = dh::ToSpan(resutls);\n    dh::LaunchN(n_classes * 4, [=] XGBOOST_DEVICE(size_t i) { d_results[i] = 0.0f; });\n    auto local_area = d_results.subspan(0, n_classes);\n    auto tp = d_results.subspan(2 * n_classes, n_classes);\n    auto auc = d_results.subspan(3 * n_classes, n_classes);\n    return ScaleClasses(ctx, info.IsColumnSplit(), d_results, local_area, tp, auc, n_classes);\n  }\n\n  /**\n   * Linear scan\n   */\n  dh::caching_device_vector<double> d_auc(n_classes, 0);\n  auto get_weight = common::OptionalWeights{weights};\n  auto d_fptp = dh::ToSpan(cache->fptp);\n  auto get_fp_tp = [=] XGBOOST_DEVICE(size_t i) {\n    size_t idx = d_sorted_idx[i];\n\n    size_t class_id = i / n_samples;\n    // labels is a vector of size n_samples.\n    float label = labels(idx % n_samples) == class_id;\n\n    float w = get_weight[d_sorted_idx[i] % n_samples];\n    float fp = (1.0 - label) * w;\n    float tp = label * w;\n    return cuda::std::make_pair(fp, tp);\n  };  // NOLINT\n  dh::LaunchN(d_sorted_idx.size(), [=] XGBOOST_DEVICE(size_t i) { d_fptp[i] = get_fp_tp(i); });\n\n  /**\n   *  Handle duplicated predictions\n   */\n  auto d_unique_idx = dh::ToSpan(cache->unique_idx);\n  dh::Iota(d_unique_idx, ctx->CUDACtx()->Stream());\n  auto uni_key = dh::MakeTransformIterator<cuda::std::pair<std::uint32_t, float>>(\n      thrust::make_counting_iterator(0), [=] XGBOOST_DEVICE(size_t i) {\n        uint32_t class_id = i / n_samples;\n        float predt = d_predts_t[d_sorted_idx[i]];\n        return cuda::std::make_pair(class_id, predt);\n      });\n\n  // unique values are sparse, so we need a CSR style indptr\n  dh::TemporaryArray<uint32_t> unique_class_ptr(d_class_ptr.size());\n  auto d_unique_class_ptr = dh::ToSpan(unique_class_ptr);\n  auto n_uniques = dh::SegmentedUniqueByKey(\n      ctx->CUDACtx()->TP(), dh::tbegin(d_class_ptr), dh::tend(d_class_ptr), uni_key,\n      uni_key + d_sorted_idx.size(), dh::tbegin(d_unique_idx), d_unique_class_ptr.data(),\n      dh::tbegin(d_unique_idx), std::equal_to<cuda::std::pair<uint32_t, float>>{});\n  d_unique_idx = d_unique_idx.subspan(0, n_uniques);\n\n  auto get_class_id = [=] XGBOOST_DEVICE(size_t idx) {\n    return idx / n_samples;\n  };\n  SegmentedFPTP(ctx, d_fptp, get_class_id);\n\n  // scatter unique FP_PREV/TP_PREV values\n  auto d_neg_pos = dh::ToSpan(cache->neg_pos);\n  // When dataset is not empty, each class must have at least 1 (unique) sample\n  // prediction, so no need to handle special case.\n  dh::LaunchN(d_unique_idx.size(), ctx->CUDACtx()->Stream(), [=] XGBOOST_DEVICE(size_t i) {\n    if (d_unique_idx[i] % n_samples == 0) {  // first unique index is 0\n      assert(d_unique_idx[i] % n_samples == 0);\n      d_neg_pos[d_unique_idx[i]] = {0, 0};  // class_id * n_samples = i\n      return;\n    }\n    uint32_t class_id = d_unique_idx[i] / n_samples;\n    d_neg_pos[d_unique_idx[i]] = d_fptp[d_unique_idx[i] - 1];\n    if (i == common::LastOf(class_id, d_unique_class_ptr)) {\n      // last one needs to be included.\n      size_t last = d_unique_idx[common::LastOf(class_id, d_unique_class_ptr)];\n      d_neg_pos[common::LastOf(class_id, d_class_ptr)] = d_fptp[last - 1];\n      return;\n    }\n  });\n\n  /**\n   * Reduce the result for each class\n   */\n  auto s_d_auc = dh::ToSpan(d_auc);\n  SegmentedReduceAUC(ctx, d_unique_idx, d_class_ptr, d_unique_class_ptr, cache, area_fn,\n                     get_class_id, s_d_auc);\n\n  /**\n   * Scale the classes with number of samples for each class.\n   */\n  dh::TemporaryArray<double> resutls(n_classes * 4);\n  auto d_results = dh::ToSpan(resutls);\n  auto local_area = d_results.subspan(0, n_classes);\n  auto fp = d_results.subspan(n_classes, n_classes);\n  auto tp = d_results.subspan(2 * n_classes, n_classes);\n  auto auc = d_results.subspan(3 * n_classes, n_classes);\n\n  dh::LaunchN(n_classes, ctx->CUDACtx()->Stream(), [=] XGBOOST_DEVICE(size_t c) {\n    auc[c] = s_d_auc[c];\n    auto last = d_fptp[n_samples * c + (n_samples - 1)];\n    fp[c] = last.first;\n    if (scale) {\n      local_area[c] = last.first * last.second;\n      tp[c] = last.second;\n    } else {\n      local_area[c] = 1.0f;\n      tp[c] = 1.0f;\n    }\n  });\n  return ScaleClasses(ctx, info.IsColumnSplit(), d_results, local_area, tp, auc, n_classes);\n}\n\nvoid MultiClassSortedIdx(Context const *ctx, common::Span<float const> predts,\n                         common::Span<uint32_t> d_class_ptr,\n                         std::shared_ptr<DeviceAUCCache> cache) {\n  size_t n_classes = d_class_ptr.size() - 1;\n  auto d_predts_t = dh::ToSpan(cache->predts_t);\n  auto n_samples = d_predts_t.size() / n_classes;\n  if (n_samples == 0) {\n    return;\n  }\n  Transpose(predts, d_predts_t, n_samples, n_classes);\n  dh::LaunchN(n_classes + 1, ctx->CUDACtx()->Stream(),\n              [=] XGBOOST_DEVICE(size_t i) { d_class_ptr[i] = i * n_samples; });\n  auto d_sorted_idx = dh::ToSpan(cache->sorted_idx);\n  common::SegmentedArgSort<false, false>(ctx, d_predts_t, d_class_ptr, d_sorted_idx);\n}\n\ndouble GPUMultiClassROCAUC(Context const *ctx, common::Span<float const> predts,\n                           MetaInfo const &info, std::shared_ptr<DeviceAUCCache> *p_cache,\n                           std::size_t n_classes) {\n  auto &cache = *p_cache;\n  InitCacheOnce<true>(predts, p_cache);\n\n  /**\n   * Create sorted index for each class\n   */\n  dh::TemporaryArray<uint32_t> class_ptr(n_classes + 1, 0);\n  MultiClassSortedIdx(ctx, predts, dh::ToSpan(class_ptr), cache);\n\n  auto fn = [] XGBOOST_DEVICE(double fp_prev, double fp, double tp_prev, double tp,\n                              size_t /*class_id*/) {\n    return TrapezoidArea(fp_prev, fp, tp_prev, tp);\n  };\n  return GPUMultiClassAUCOVR<true>(ctx, info, dh::ToSpan(class_ptr), n_classes, cache, fn);\n}\n\nnamespace {\nstruct RankScanItem {\n  size_t idx;\n  double predt;\n  double w;\n  bst_group_t group_id;\n};\n}  // anonymous namespace\n\nstd::pair<double, std::uint32_t> GPURankingAUC(Context const *ctx, common::Span<float const> predts,\n                                               MetaInfo const &info,\n                                               std::shared_ptr<DeviceAUCCache> *p_cache) {\n  auto &cache = *p_cache;\n  InitCacheOnce<false>(predts, p_cache);\n\n  dh::caching_device_vector<bst_group_t> group_ptr(info.group_ptr_);\n\n  auto d_group_ptr = dh::ToSpan(group_ptr);\n  /**\n   * Validate the dataset\n   */\n  auto check_it = dh::MakeTransformIterator<size_t>(\n      thrust::make_counting_iterator(0),\n      [=] XGBOOST_DEVICE(size_t i) { return d_group_ptr[i + 1] - d_group_ptr[i]; });\n  size_t n_valid =\n      thrust::count_if(ctx->CUDACtx()->CTP(), check_it, check_it + group_ptr.size() - 1,\n                       [=] XGBOOST_DEVICE(size_t len) { return len >= 3; });\n  if (n_valid < info.group_ptr_.size() - 1) {\n    InvalidGroupAUC();\n  }\n  if (n_valid == 0) {\n    return std::make_pair(0.0, 0);\n  }\n\n  /**\n   * Sort the labels\n   */\n  auto d_labels = info.labels.View(ctx->Device());\n\n  auto d_sorted_idx = dh::ToSpan(cache->sorted_idx);\n  common::SegmentedArgSort<false, false>(ctx, d_labels.Values(), d_group_ptr, d_sorted_idx);\n\n  auto d_weights = info.weights_.ConstDeviceSpan();\n\n  dh::caching_device_vector<size_t> threads_group_ptr(group_ptr.size(), 0);\n  auto d_threads_group_ptr = dh::ToSpan(threads_group_ptr);\n  // Use max to represent triangle\n  auto n_threads = common::SegmentedTrapezoidThreads(ctx, d_group_ptr, d_threads_group_ptr,\n                                                     std::numeric_limits<std::size_t>::max());\n  CHECK_LT(n_threads, std::numeric_limits<int32_t>::max());\n  // get the coordinate in nested summation\n  auto get_i_j = [=] XGBOOST_DEVICE(size_t idx, size_t query_group_idx) {\n    auto data_group_begin = d_group_ptr[query_group_idx];\n    size_t n_samples = d_group_ptr[query_group_idx + 1] - data_group_begin;\n    auto thread_group_begin = d_threads_group_ptr[query_group_idx];\n    auto idx_in_thread_group = idx - thread_group_begin;\n\n    size_t i, j;\n    common::UnravelTrapeziodIdx(idx_in_thread_group, n_samples, &i, &j);\n    // we use global index among all groups for sorted idx, so i, j should also be global\n    // index.\n    i += data_group_begin;\n    j += data_group_begin;\n    return cuda::std::make_pair(i, j);\n  };  // NOLINT\n  auto in = dh::MakeTransformIterator<RankScanItem>(\n      thrust::make_counting_iterator(0), [=] XGBOOST_DEVICE(size_t idx) {\n        bst_group_t query_group_idx = dh::SegmentId(d_threads_group_ptr, idx);\n        auto data_group_begin = d_group_ptr[query_group_idx];\n        size_t n_samples = d_group_ptr[query_group_idx + 1] - data_group_begin;\n        if (n_samples < 3) {\n          // at least 3 documents are required.\n          return RankScanItem{idx, 0, 0, query_group_idx};\n        }\n\n        size_t i, j;\n        cuda::std::tie(i, j) = get_i_j(idx, query_group_idx);\n\n        float predt = predts[d_sorted_idx[i]] - predts[d_sorted_idx[j]];\n        float w = common::Sqr(d_weights.empty() ? 1.0f : d_weights[query_group_idx]);\n        if (predt > 0) {\n          predt = 1.0;\n        } else if (predt == 0) {\n          predt = 0.5;\n        } else {\n          predt = 0;\n        }\n        predt *= w;\n        return RankScanItem{idx, predt, w, query_group_idx};\n      });\n\n  dh::TemporaryArray<double> d_auc(group_ptr.size() - 1);\n  auto s_d_auc = dh::ToSpan(d_auc);\n  auto out = thrust::make_transform_output_iterator(\n      dh::TypedDiscard<RankScanItem>{},\n      [=] XGBOOST_DEVICE(RankScanItem const &item) -> RankScanItem {\n        auto group_id = item.group_id;\n        assert(group_id < d_group_ptr.size());\n        auto data_group_begin = d_group_ptr[group_id];\n        size_t n_samples = d_group_ptr[group_id + 1] - data_group_begin;\n        // last item of current group\n        if (item.idx == common::LastOf(group_id, d_threads_group_ptr)) {\n          if (item.w > 0) {\n            s_d_auc[group_id] = item.predt / item.w;\n          } else {\n            s_d_auc[group_id] = 0;\n          }\n        }\n        return {};  // discard\n      });\n  common::InclusiveScan(\n      ctx, in, out,\n      [] XGBOOST_DEVICE(RankScanItem const &l, RankScanItem const &r) {\n        if (l.group_id != r.group_id) {\n          return r;\n        }\n        return RankScanItem{r.idx, l.predt + r.predt, l.w + r.w, l.group_id};\n      },\n      n_threads);\n\n  /**\n   * Scale the AUC with number of items in each group.\n   */\n  double auc = thrust::reduce(ctx->CUDACtx()->CTP(), dh::tbegin(s_d_auc), dh::tend(s_d_auc), 0.0);\n  return std::make_pair(auc, n_valid);\n}\n\nstd::tuple<double, double, double> GPUBinaryPRAUC(Context const *ctx,\n                                                  common::Span<float const> predts,\n                                                  MetaInfo const &info,\n                                                  std::shared_ptr<DeviceAUCCache> *p_cache) {\n  auto &cache = *p_cache;\n  InitCacheOnce<false>(predts, p_cache);\n\n  /**\n   * Create sorted index for each class\n   */\n  auto d_sorted_idx = dh::ToSpan(cache->sorted_idx);\n  common::ArgSort<false>(ctx, predts, d_sorted_idx);\n\n  auto labels = info.labels.View(ctx->Device());\n  auto d_weights = info.weights_.ConstDeviceSpan();\n  auto get_weight = common::OptionalWeights{d_weights};\n  auto it = dh::MakeTransformIterator<Pair>(thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(\n                                                                                     size_t i) {\n    auto w = get_weight[d_sorted_idx[i]];\n    return cuda::std::make_pair(labels(d_sorted_idx[i]) * w, (1.0f - labels(d_sorted_idx[i])) * w);\n  });\n  double total_pos, total_neg;\n  cuda::std::tie(total_pos, total_neg) = thrust::reduce(\n      ctx->CUDACtx()->CTP(), it, it + labels.Size(), Pair{0.0, 0.0}, PairPlus<double, double>{});\n\n  if (total_pos <= 0.0 || total_neg <= 0.0) {\n    return {0.0f, 0.0f, 0.0f};\n  }\n\n  auto fn = [total_pos] XGBOOST_DEVICE(double fp_prev, double fp, double tp_prev, double tp) {\n    return detail::CalcDeltaPRAUC(fp_prev, fp, tp_prev, tp, total_pos);\n  };\n  double fp, tp, auc;\n  std::tie(fp, tp, auc) = GPUBinaryAUC(ctx, predts, info, d_sorted_idx, fn, cache);\n  return std::make_tuple(1.0, 1.0, auc);\n}\n\ndouble GPUMultiClassPRAUC(Context const *ctx, common::Span<float const> predts,\n                          MetaInfo const &info, std::shared_ptr<DeviceAUCCache> *p_cache,\n                          std::size_t n_classes) {\n  auto &cache = *p_cache;\n  InitCacheOnce<true>(predts, p_cache);\n\n  /**\n   * Create sorted index for each class\n   */\n  dh::TemporaryArray<uint32_t> class_ptr(n_classes + 1, 0);\n  auto d_class_ptr = dh::ToSpan(class_ptr);\n  MultiClassSortedIdx(ctx, predts, d_class_ptr, cache);\n  auto d_sorted_idx = dh::ToSpan(cache->sorted_idx);\n\n  auto d_weights = info.weights_.ConstDeviceSpan();\n\n  /**\n   * Get total positive/negative\n   */\n  auto labels = info.labels.View(ctx->Device());\n  auto n_samples = info.num_row_;\n  dh::caching_device_vector<Pair> totals(n_classes);\n  auto key_it = dh::MakeTransformIterator<size_t>(thrust::make_counting_iterator(0ul),\n                                                  [n_samples] XGBOOST_DEVICE(size_t i) {\n                                                    return i / n_samples;  // class id\n                                                  });\n  auto get_weight = common::OptionalWeights{d_weights};\n  auto val_it = dh::MakeTransformIterator<cuda::std::pair<double, double>>(\n      thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(size_t i) {\n        auto idx = d_sorted_idx[i] % n_samples;\n        auto w = get_weight[idx];\n        auto class_id = i / n_samples;\n        auto y = labels(idx) == class_id;\n        return cuda::std::make_pair(y * w, (1.0f - y) * w);\n      });\n  thrust::reduce_by_key(ctx->CUDACtx()->CTP(), key_it, key_it + predts.size(), val_it,\n                        thrust::make_discard_iterator(), totals.begin(), thrust::equal_to<size_t>{},\n                        PairPlus<double, double>{});\n\n  /**\n   * Calculate AUC\n   */\n  auto d_totals = dh::ToSpan(totals);\n  auto fn = [d_totals] XGBOOST_DEVICE(double fp_prev, double fp, double tp_prev, double tp,\n                                      size_t class_id) {\n    auto total_pos = d_totals[class_id].first;\n    return detail::CalcDeltaPRAUC(fp_prev, fp, tp_prev, tp, d_totals[class_id].first);\n  };\n  return GPUMultiClassAUCOVR<false>(ctx, info, d_class_ptr, n_classes, cache, fn);\n}\n\ntemplate <typename Fn>\nstd::pair<double, uint32_t> GPURankingPRAUCImpl(Context const *ctx,\n                                                common::Span<float const> predts,\n                                                MetaInfo const &info,\n                                                common::Span<uint32_t> d_group_ptr,\n                                                std::shared_ptr<DeviceAUCCache> cache, Fn area_fn) {\n  /**\n   * Sorted idx\n   */\n  auto d_sorted_idx = dh::ToSpan(cache->sorted_idx);\n\n  auto labels = info.labels.View(ctx->Device());\n  auto weights = info.weights_.ConstDeviceSpan();\n\n  uint32_t n_groups = static_cast<uint32_t>(info.group_ptr_.size() - 1);\n\n  /**\n   * Linear scan\n   */\n  size_t n_samples = labels.Shape(0);\n  dh::caching_device_vector<double> d_auc(n_groups, 0);\n  auto get_weight = common::OptionalWeights{weights};\n  auto d_fptp = dh::ToSpan(cache->fptp);\n  auto get_fp_tp = [=] XGBOOST_DEVICE(size_t i) {\n    size_t idx = d_sorted_idx[i];\n\n    size_t group_id = dh::SegmentId(d_group_ptr, idx);\n    float label = labels(idx);\n\n    float w = get_weight[group_id];\n    float fp = (1.0 - label) * w;\n    float tp = label * w;\n    return cuda::std::make_pair(fp, tp);\n  };  // NOLINT\n  dh::LaunchN(d_sorted_idx.size(), ctx->CUDACtx()->Stream(),\n              [=] XGBOOST_DEVICE(size_t i) { d_fptp[i] = get_fp_tp(i); });\n\n  /**\n   *  Handle duplicated predictions\n   */\n  auto d_unique_idx = dh::ToSpan(cache->unique_idx);\n  dh::Iota(d_unique_idx, ctx->CUDACtx()->Stream());\n  auto uni_key = dh::MakeTransformIterator<cuda::std::pair<uint32_t, float>>(\n      thrust::make_counting_iterator(0), [=] XGBOOST_DEVICE(size_t i) {\n        auto idx = d_sorted_idx[i];\n        bst_group_t group_id = dh::SegmentId(d_group_ptr, idx);\n        float predt = predts[idx];\n        return cuda::std::make_pair(group_id, predt);\n      });\n\n  // unique values are sparse, so we need a CSR style indptr\n  dh::TemporaryArray<uint32_t> unique_class_ptr(d_group_ptr.size());\n  auto d_unique_class_ptr = dh::ToSpan(unique_class_ptr);\n  auto n_uniques = dh::SegmentedUniqueByKey(\n      ctx->CUDACtx()->TP(), dh::tbegin(d_group_ptr), dh::tend(d_group_ptr), uni_key,\n      uni_key + d_sorted_idx.size(), dh::tbegin(d_unique_idx), d_unique_class_ptr.data(),\n      dh::tbegin(d_unique_idx), std::equal_to<cuda::std::pair<uint32_t, float>>{});\n  d_unique_idx = d_unique_idx.subspan(0, n_uniques);\n\n  auto get_group_id = [=] XGBOOST_DEVICE(size_t idx) {\n    return dh::SegmentId(d_group_ptr, idx);\n  };\n  SegmentedFPTP(ctx, d_fptp, get_group_id);\n\n  // scatter unique FP_PREV/TP_PREV values\n  auto d_neg_pos = dh::ToSpan(cache->neg_pos);\n  dh::LaunchN(d_unique_idx.size(), [=] XGBOOST_DEVICE(size_t i) {\n    if (thrust::binary_search(thrust::seq, d_unique_class_ptr.cbegin(), d_unique_class_ptr.cend(),\n                              i)) {  // first unique index is 0\n      d_neg_pos[d_unique_idx[i]] = {0, 0};\n      return;\n    }\n    auto group_idx = dh::SegmentId(d_group_ptr, d_unique_idx[i]);\n    d_neg_pos[d_unique_idx[i]] = d_fptp[d_unique_idx[i] - 1];\n    if (i == common::LastOf(group_idx, d_unique_class_ptr)) {\n      // last one needs to be included.\n      size_t last = d_unique_idx[common::LastOf(group_idx, d_unique_class_ptr)];\n      d_neg_pos[common::LastOf(group_idx, d_group_ptr)] = d_fptp[last - 1];\n      return;\n    }\n  });\n\n  /**\n   * Reduce the result for each group\n   */\n  auto s_d_auc = dh::ToSpan(d_auc);\n  SegmentedReduceAUC(ctx, d_unique_idx, d_group_ptr, d_unique_class_ptr, cache, area_fn,\n                     get_group_id, s_d_auc);\n\n  /**\n   * Scale the groups with number of samples for each group.\n   */\n  double auc;\n  uint32_t invalid_groups;\n  {\n    auto it = dh::MakeTransformIterator<cuda::std::pair<double, uint32_t>>(\n        thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(size_t g) {\n          double fp, tp;\n          cuda::std::tie(fp, tp) = d_fptp[common::LastOf(g, d_group_ptr)];\n          double area = fp * tp;\n          auto n_documents = d_group_ptr[g + 1] - d_group_ptr[g];\n          if (area > 0 && n_documents >= 2) {\n            return cuda::std::make_pair(s_d_auc[g], static_cast<uint32_t>(0));\n          }\n          return cuda::std::make_pair(0.0, static_cast<uint32_t>(1));\n        });\n    cuda::std::tie(auc, invalid_groups) =\n        thrust::reduce(ctx->CUDACtx()->CTP(), it, it + n_groups,\n                       cuda::std::pair<double, uint32_t>(0.0, 0), PairPlus<double, uint32_t>{});\n  }\n  return std::make_pair(auc, n_groups - invalid_groups);\n}\n\nstd::pair<double, std::uint32_t> GPURankingPRAUC(Context const *ctx,\n                                                 common::Span<float const> predts,\n                                                 MetaInfo const &info,\n                                                 std::shared_ptr<DeviceAUCCache> *p_cache) {\n  dh::safe_cuda(cudaSetDevice(ctx->Ordinal()));\n  if (predts.empty()) {\n    return std::make_pair(0.0, static_cast<uint32_t>(0));\n  }\n\n  auto &cache = *p_cache;\n  InitCacheOnce<false>(predts, p_cache);\n\n  dh::device_vector<bst_group_t> group_ptr(info.group_ptr_.size());\n  thrust::copy(info.group_ptr_.begin(), info.group_ptr_.end(), group_ptr.begin());  // NOLINT\n  auto d_group_ptr = dh::ToSpan(group_ptr);\n  CHECK_GE(info.group_ptr_.size(), 1) << \"Must have at least 1 query group for LTR.\";\n  size_t n_groups = info.group_ptr_.size() - 1;\n\n  /**\n   * Create sorted index for each group\n   */\n  auto d_sorted_idx = dh::ToSpan(cache->sorted_idx);\n  common::SegmentedArgSort<false, false>(ctx, predts, d_group_ptr, d_sorted_idx);\n\n  auto labels = info.labels.View(ctx->Device());\n  if (thrust::any_of(ctx->CUDACtx()->CTP(), dh::tbegin(labels.Values()), dh::tend(labels.Values()),\n                     PRAUCLabelInvalid{})) {\n    InvalidLabels();\n  }\n  /**\n   * Get total positive/negative for each group.\n   */\n  auto d_weights = info.weights_.ConstDeviceSpan();\n  dh::caching_device_vector<cuda::std::pair<double, double>> totals(n_groups);\n  auto key_it = dh::MakeTransformIterator<size_t>(\n      thrust::make_counting_iterator(0ul),\n      [=] XGBOOST_DEVICE(size_t i) { return dh::SegmentId(d_group_ptr, i); });\n  auto val_it = dh::MakeTransformIterator<Pair>(thrust::make_counting_iterator(0ul),\n                                                [=] XGBOOST_DEVICE(size_t i) {\n                                                  float w = 1.0f;\n                                                  // Avoid a binary search if the groups\n                                                  // are not weighted.\n                                                  if (!d_weights.empty()) {\n                                                    auto g = dh::SegmentId(d_group_ptr, i);\n                                                    w = d_weights[g];\n                                                  }\n                                                  auto y = labels(i);\n                                                  return cuda::std::make_pair(y * w, (1.0 - y) * w);\n                                                });\n  thrust::reduce_by_key(ctx->CUDACtx()->CTP(), key_it, key_it + predts.size(), val_it,\n                        thrust::make_discard_iterator(), totals.begin(), std::equal_to<size_t>{},\n                        PairPlus<double, double>{});  // NOLINT\n\n  /**\n   * Calculate AUC\n   */\n  auto d_totals = dh::ToSpan(totals);\n  auto fn = [d_totals] XGBOOST_DEVICE(double fp_prev, double fp, double tp_prev, double tp,\n                                      size_t group_id) {\n    auto total_pos = d_totals[group_id].first;\n    return detail::CalcDeltaPRAUC(fp_prev, fp, tp_prev, tp, d_totals[group_id].first);\n  };\n  return GPURankingPRAUCImpl(ctx, predts, info, d_group_ptr, cache, fn);\n}\n}  // namespace metric\n}  // namespace xgboost\n"
  },
  {
    "path": "src/metric/auc.h",
    "content": "/**\n * Copyright 2021-2024, XGBoost Contributors\n */\n#ifndef XGBOOST_METRIC_AUC_H_\n#define XGBOOST_METRIC_AUC_H_\n#include <cmath>\n#include <memory>\n#include <tuple>\n#include <utility>\n\n#include \"../collective/communicator-inl.h\"\n#include \"xgboost/base.h\"\n#include \"xgboost/data.h\"\n#include \"xgboost/metric.h\"\n#include \"xgboost/span.h\"\n\nnamespace xgboost::metric {\n/***********\n * ROC AUC *\n ***********/\nXGBOOST_DEVICE inline double TrapezoidArea(double x0, double x1, double y0, double y1) {\n  return std::abs(x0 - x1) * (y0 + y1) * 0.5f;\n}\n\nstruct DeviceAUCCache;\n\nstd::tuple<double, double, double> GPUBinaryROCAUC(Context const *ctx,\n                                                   common::Span<float const> predts,\n                                                   MetaInfo const &info,\n                                                   std::shared_ptr<DeviceAUCCache> *p_cache);\n\ndouble GPUMultiClassROCAUC(Context const *ctx, common::Span<float const> predts,\n                           MetaInfo const &info, std::shared_ptr<DeviceAUCCache> *p_cache,\n                           std::size_t n_classes);\n\nstd::pair<double, std::uint32_t> GPURankingAUC(Context const *ctx, common::Span<float const> predts,\n                                               MetaInfo const &info,\n                                               std::shared_ptr<DeviceAUCCache> *cache);\n\n/**********\n * PR AUC *\n **********/\nstd::tuple<double, double, double> GPUBinaryPRAUC(Context const *ctx,\n                                                  common::Span<float const> predts,\n                                                  MetaInfo const &info,\n                                                  std::shared_ptr<DeviceAUCCache> *p_cache);\n\ndouble GPUMultiClassPRAUC(Context const *ctx, common::Span<float const> predts,\n                          MetaInfo const &info, std::shared_ptr<DeviceAUCCache> *p_cache,\n                          std::size_t n_classes);\n\nstd::pair<double, std::uint32_t> GPURankingPRAUC(Context const *ctx,\n                                                 common::Span<float const> predts,\n                                                 MetaInfo const &info,\n                                                 std::shared_ptr<DeviceAUCCache> *cache);\n\nnamespace detail {\nXGBOOST_DEVICE inline double CalcH(double fp_a, double fp_b, double tp_a,\n                                   double tp_b) {\n  return (fp_b - fp_a) / (tp_b - tp_a);\n}\n\nXGBOOST_DEVICE inline double CalcB(double fp_a, double h, double tp_a, double total_pos) {\n  return (fp_a - h * tp_a) / total_pos;\n}\n\nXGBOOST_DEVICE inline double CalcA(double h) { return h + 1; }\n\nXGBOOST_DEVICE inline double CalcDeltaPRAUC(double fp_prev, double fp,\n                                            double tp_prev, double tp,\n                                            double total_pos) {\n  double pr_prev = tp_prev / total_pos;\n  double pr = tp / total_pos;\n\n  double h{0}, a{0}, b{0};\n\n  if (tp == tp_prev) {\n    a = 1.0;\n    b = 0.0;\n  } else {\n    h = detail::CalcH(fp_prev, fp, tp_prev, tp);\n    a = detail::CalcA(h);\n    b = detail::CalcB(fp_prev, h, tp_prev, total_pos);\n  }\n\n  double area = 0;\n  if (b != 0.0) {\n    area = (pr - pr_prev -\n            b / a * (std::log(a * pr + b) - std::log(a * pr_prev + b))) /\n           a;\n  } else {\n    area = (pr - pr_prev) / a;\n  }\n  return area;\n}\n}  // namespace detail\n\ninline void InvalidGroupAUC() {\n  LOG(INFO) << \"Invalid group with less than 3 samples is found on worker \"\n            << collective::GetRank() << \".  Calculating AUC value requires at \"\n            << \"least 2 pairs of samples.\";\n}\n\nstruct PRAUCLabelInvalid {\n  XGBOOST_DEVICE bool operator()(float y) { return y < 0.0f || y > 1.0f; }\n};\n\ninline void InvalidLabels() {\n  LOG(FATAL) << \"PR-AUC supports only binary relevance for learning to rank.\";\n}\n}  // namespace xgboost::metric\n#endif  // XGBOOST_METRIC_AUC_H_\n"
  },
  {
    "path": "src/metric/elementwise_metric.cc",
    "content": "/*!\n * Copyright 2018 XGBoost contributors\n */\n// Dummy file to keep the CUDA conditional compile trick.\n\n#if !defined(XGBOOST_USE_CUDA)\n#include \"elementwise_metric.cu\"\n#endif  // !defined(XGBOOST_USE_CUDA)\n"
  },
  {
    "path": "src/metric/elementwise_metric.cu",
    "content": "/**\n * Copyright 2015-2025, XGBoost Contributors\n * \\file elementwise_metric.cu\n * \\brief evaluation metrics for elementwise binary or regression.\n * \\author Kailong Chen, Tianqi Chen\n *\n *  The expressions like wsum == 0 ? esum : esum / wsum is used to handle empty dataset.\n */\n#include <dmlc/registry.h>\n\n#include <array>\n#include <cmath>\n#include <numeric>  // for accumulate\n\n#include \"../common/expectile_loss_utils.h\"  // ExpectileLossParam\n#include \"../common/math.h\"\n#include \"../common/nvtx_utils.h\"       // for xgboost_NVTX_FN_RANGE\n#include \"../common/optional_weight.h\"  // OptionalWeights\n#include \"../common/pseudo_huber.h\"\n#include \"../common/quantile_loss_utils.h\"  // QuantileLossParam\n#include \"../common/threading_utils.h\"\n#include \"metric_common.h\"              // MetricNoCache\n#include \"xgboost/collective/result.h\"  // for SafeColl\n#include \"xgboost/metric.h\"\n\n#if defined(XGBOOST_USE_CUDA)\n#include <thrust/functional.h>  // thrust::plus<>\n#include <thrust/iterator/counting_iterator.h>\n#include <thrust/transform_reduce.h>\n\n#include \"../common/cuda_context.cuh\"  // for CUDAContext\n#else\n#include \"../common/common.h\"  // for AssertGPUSupport\n#endif                         // XGBOOST_USE_CUDA\n\nnamespace xgboost::metric {\n// tag the this file, used by force static link later.\nDMLC_REGISTRY_FILE_TAG(elementwise_metric);\n\nnamespace {\n/**\n * \\brief Reduce function for element wise metrics.\n *\n *   The loss function should handle all the computation for each sample, including\n *   applying the weights.  A tuple of {error_i, weight_i} is expected as return.\n */\ntemplate <typename Fn>\nPackedReduceResult Reduce(Context const* ctx, MetaInfo const& info, Fn&& loss,\n                          size_t num_preds = 1) {\n  PackedReduceResult result;\n  // This function doesn't have sycl-specific implementation yet.\n  // For that reason we transfer data to host in case of sycl is used for propper execution.\n  auto labels = info.labels.View(ctx->Device().IsSycl() ? DeviceOrd::CPU() : ctx->Device());\n  if (ctx->IsCUDA()) {\n#if defined(XGBOOST_USE_CUDA)\n    thrust::counting_iterator<size_t> begin(0);\n    thrust::counting_iterator<size_t> end = begin + labels.Size() * num_preds;\n    result = thrust::transform_reduce(\n        ctx->CUDACtx()->CTP(), begin, end,\n        [=] XGBOOST_DEVICE(size_t i) {\n          auto idx = linalg::UnravelIndex(i, labels.Shape());\n          auto sample_id = std::get<0>(idx);\n          auto target_id = std::get<1>(idx);\n          auto res = loss(i, sample_id, target_id);\n          float v{std::get<0>(res)}, wt{std::get<1>(res)};\n          return PackedReduceResult{v, wt};\n        },\n        PackedReduceResult{}, thrust::plus<PackedReduceResult>());\n#else\n    common::AssertGPUSupport();\n#endif  //  defined(XGBOOST_USE_CUDA)\n  } else {\n    auto n_threads = ctx->Threads();\n    std::vector<double> score_tloc(n_threads, 0.0);\n    std::vector<double> weight_tloc(n_threads, 0.0);\n    // We sum over losses over all samples and targets instead of performing this for each\n    // target since the first one approach more accurate while the second approach is used\n    // for approximation in distributed setting.  For rmse:\n    // - sqrt(1/w(sum_t0 + sum_t1 + ... + sum_tm))       // multi-target\n    // - sqrt(avg_t0) + sqrt(avg_t1) + ... sqrt(avg_tm)  // distributed\n\n    auto size = info.labels.Size() * num_preds;\n    std::size_t constexpr kBlockSize = 2048;\n    common::ParallelFor1d<kBlockSize>(size, n_threads, [&](auto&& block) {\n      double sum_score = 0, sum_weight = 0;\n      for (std::size_t i = block.begin(), n = block.end(); i < n; ++i) {\n        auto [sample_id, target_id] = linalg::UnravelIndex(i, labels.Shape());\n\n        auto [v, wt] = loss(i, sample_id, target_id);\n        sum_score += v;\n        sum_weight += wt;\n      }\n\n      auto t_idx = omp_get_thread_num();\n      score_tloc[t_idx] += sum_score;\n      weight_tloc[t_idx] += sum_weight;\n    });\n\n    double residue_sum = std::accumulate(score_tloc.cbegin(), score_tloc.cend(), 0.0);\n    double weights_sum = std::accumulate(weight_tloc.cbegin(), weight_tloc.cend(), 0.0);\n    result = PackedReduceResult{residue_sum, weights_sum};\n  }\n  return result;\n}\n}  // anonymous namespace\n\nstruct EvalRowRMSE {\n  char const* Name() const { return \"rmse\"; }\n\n  XGBOOST_DEVICE bst_float EvalRow(bst_float label, bst_float pred) const {\n    bst_float diff = label - pred;\n    return diff * diff;\n  }\n  static double GetFinal(double esum, double wsum) {\n    return wsum == 0 ? std::sqrt(esum) : std::sqrt(esum / wsum);\n  }\n};\n\nstruct EvalRowRMSLE {\n  char const* Name() const { return \"rmsle\"; }\n\n  XGBOOST_DEVICE bst_float EvalRow(bst_float label, bst_float pred) const {\n    bst_float diff = std::log1p(label) - std::log1p(pred);\n    return diff * diff;\n  }\n  static double GetFinal(double esum, double wsum) {\n    return wsum == 0 ? std::sqrt(esum) : std::sqrt(esum / wsum);\n  }\n};\n\nstruct EvalRowMAE {\n  const char* Name() const { return \"mae\"; }\n\n  XGBOOST_DEVICE bst_float EvalRow(bst_float label, bst_float pred) const {\n    return std::abs(label - pred);\n  }\n  static double GetFinal(double esum, double wsum) { return wsum == 0 ? esum : esum / wsum; }\n};\n\nstruct EvalRowMAPE {\n  const char* Name() const { return \"mape\"; }\n  XGBOOST_DEVICE bst_float EvalRow(bst_float label, bst_float pred) const {\n    return std::abs((label - pred) / label);\n  }\n  static double GetFinal(double esum, double wsum) { return wsum == 0 ? esum : esum / wsum; }\n};\n\nnamespace {\nXGBOOST_DEVICE inline float LogLoss(float y, float py) {\n  auto xlogy = [](float x, float y) {\n    float eps = 1e-16;\n    return (x - 0.0f == 0.0f) ? 0.0f : (x * std::log(std::max(y, eps)));\n  };\n  const bst_float pneg = 1.0f - py;\n  return xlogy(-y, py) + xlogy(-(1.0f - y), pneg);\n}\n}  // anonymous namespace\n\nstruct EvalRowLogLoss {\n  const char* Name() const { return \"logloss\"; }\n\n  XGBOOST_DEVICE bst_float EvalRow(bst_float y, bst_float py) const { return LogLoss(y, py); }\n  static double GetFinal(double esum, double wsum) { return wsum == 0 ? esum : esum / wsum; }\n};\n\nclass PseudoErrorLoss : public MetricNoCache {\n  PseudoHuberParam param_;\n\n public:\n  const char* Name() const override { return \"mphe\"; }\n  void Configure(Args const& args) override { param_.UpdateAllowUnknown(args); }\n  void LoadConfig(Json const& in) override { FromJson(in[\"pseudo_huber_param\"], &param_); }\n  void SaveConfig(Json* p_out) const override {\n    auto& out = *p_out;\n    out[\"name\"] = String(this->Name());\n    out[\"pseudo_huber_param\"] = ToJson(param_);\n  }\n\n  double Eval(const HostDeviceVector<bst_float>& preds, const MetaInfo& info) override {\n    xgboost_NVTX_FN_RANGE();\n\n    CHECK_EQ(info.labels.Shape(0), info.num_row_);\n    auto device = ctx_->Device().IsSycl() ? DeviceOrd::CPU() : ctx_->Device();\n    auto labels = info.labels.View(device);\n    preds.SetDevice(device);\n    auto predts = ctx_->IsCUDA() ? preds.ConstDeviceSpan() : preds.ConstHostSpan();\n    info.weights_.SetDevice(device);\n    common::OptionalWeights weights(ctx_->IsCUDA() ? info.weights_.ConstDeviceSpan()\n                                                   : info.weights_.ConstHostSpan());\n    float slope = this->param_.huber_slope;\n    CHECK_NE(slope, 0.0) << \"slope for pseudo huber cannot be 0.\";\n    PackedReduceResult result =\n        Reduce(ctx_, info, [=] XGBOOST_DEVICE(size_t i, size_t sample_id, size_t target_id) {\n          float wt = weights[sample_id];\n          auto a = labels(sample_id, target_id) - predts[i];\n          auto v = common::Sqr(slope) * (std::sqrt((1 + common::Sqr(a / slope))) - 1) * wt;\n          return std::make_tuple(v, wt);\n        });\n    std::array<double, 2> dat{result.Residue(), result.Weights()};\n    auto rc = collective::GlobalSum(ctx_, info, linalg::MakeVec(dat.data(), dat.size()));\n    collective::SafeColl(rc);\n    return EvalRowMAPE::GetFinal(dat[0], dat[1]);\n  }\n};\n\nstruct EvalError {\n  explicit EvalError(const char* param) {\n    if (param != nullptr) {\n      CHECK_EQ(sscanf(param, \"%f\", &threshold_), 1)\n          << \"unable to parse the threshold value for the error metric\";\n      has_param_ = true;\n    } else {\n      threshold_ = 0.5f;\n      has_param_ = false;\n    }\n  }\n  [[nodiscard]] const char* Name() const {\n    static thread_local std::string name;\n    if (has_param_) {\n      std::ostringstream os;\n      os << \"error\";\n      if (threshold_ != 0.5f) os << '@' << threshold_;\n      name = os.str();\n      return name.c_str();\n    } else {\n      return \"error\";\n    }\n  }\n\n  [[nodiscard]] XGBOOST_DEVICE bst_float EvalRow(bst_float label, bst_float pred) const {\n    // assume label is in [0,1]\n    return pred > threshold_ ? 1.0f - label : label;\n  }\n\n  static double GetFinal(double esum, double wsum) { return wsum == 0 ? esum : esum / wsum; }\n\n private:\n  bst_float threshold_;\n  bool has_param_;\n};\n\nstruct EvalPoissonNegLogLik {\n  [[nodiscard]] const char* Name() const { return \"poisson-nloglik\"; }\n\n  [[nodiscard]] XGBOOST_DEVICE bst_float EvalRow(bst_float y, bst_float py) const {\n    const bst_float eps = 1e-16f;\n    if (py < eps) py = eps;\n    return common::LogGamma(y + 1.0f) + py - std::log(py) * y;\n  }\n\n  static double GetFinal(double esum, double wsum) { return wsum == 0 ? esum : esum / wsum; }\n};\n\n/**\n * Gamma deviance\n *\n *   Expected input:\n *   label >= 0\n *   predt >= 0\n */\nstruct EvalGammaDeviance {\n  [[nodiscard]] const char* Name() const { return \"gamma-deviance\"; }\n\n  [[nodiscard]] XGBOOST_DEVICE bst_float EvalRow(bst_float label, bst_float predt) const {\n    predt += kRtEps;\n    label += kRtEps;\n    return std::log(predt / label) + label / predt - 1;\n  }\n\n  static double GetFinal(double esum, double wsum) {\n    if (wsum <= 0) {\n      wsum = kRtEps;\n    }\n    return 2 * esum / wsum;\n  }\n};\n\nstruct EvalGammaNLogLik {\n  static const char* Name() { return \"gamma-nloglik\"; }\n\n  [[nodiscard]] XGBOOST_DEVICE bst_float EvalRow(bst_float y, bst_float py) const {\n    py = std::max(py, 1e-6f);\n    // hardcoded dispersion.\n    float constexpr kPsi = 1.0;\n    bst_float theta = -1. / py;\n    bst_float a = kPsi;\n    float b = -std::log(-theta);\n    // c = 1. / kPsi^2 * std::log(y/kPsi) - std::log(y) - common::LogGamma(1. / kPsi);\n    //   = 1.0f        * std::log(y)      - std::log(y) - 0 = 0\n    float c = 0;\n    // general form for exponential family.\n    return -((y * theta - b) / a + c);\n  }\n  static double GetFinal(double esum, double wsum) { return wsum == 0 ? esum : esum / wsum; }\n};\n\nstruct EvalTweedieNLogLik {\n  explicit EvalTweedieNLogLik(const char* param) {\n    CHECK(param != nullptr) << \"tweedie-nloglik must be in format tweedie-nloglik@rho\";\n    rho_ = atof(param);\n    CHECK(rho_ < 2 && rho_ >= 1) << \"tweedie variance power must be in interval [1, 2)\";\n  }\n  [[nodiscard]] const char* Name() const {\n    static thread_local std::string name;\n    std::ostringstream os;\n    os << \"tweedie-nloglik@\" << rho_;\n    name = os.str();\n    return name.c_str();\n  }\n\n  [[nodiscard]] XGBOOST_DEVICE bst_float EvalRow(bst_float y, bst_float p) const {\n    bst_float a = y * std::exp((1 - rho_) * std::log(p)) / (1 - rho_);\n    bst_float b = std::exp((2 - rho_) * std::log(p)) / (2 - rho_);\n    return -a + b;\n  }\n  static double GetFinal(double esum, double wsum) { return wsum == 0 ? esum : esum / wsum; }\n\n protected:\n  bst_float rho_;\n};\n/*!\n * \\brief base class of element-wise evaluation\n * \\tparam Derived the name of subclass\n */\ntemplate <typename Policy>\nstruct EvalEWiseBase : public MetricNoCache {\n  EvalEWiseBase() = default;\n  explicit EvalEWiseBase(char const* policy_param) : policy_{policy_param} {}\n\n  double Eval(HostDeviceVector<bst_float> const& preds, const MetaInfo& info) override {\n    CHECK_EQ(preds.Size(), info.labels.Size())\n        << \"label and prediction size not match, \"\n        << \"hint: use merror or mlogloss for multi-class classification\";\n    if (info.labels.Size() != 0) {\n      CHECK_NE(info.labels.Shape(1), 0);\n    }\n    auto device = ctx_->Device().IsSycl() ? DeviceOrd::CPU() : ctx_->Device();\n    auto labels = info.labels.View(device);\n    info.weights_.SetDevice(device);\n    common::OptionalWeights weights(ctx_->IsCUDA() ? info.weights_.ConstDeviceSpan()\n                                                   : info.weights_.ConstHostSpan());\n    preds.SetDevice(device);\n    auto predts = ctx_->IsCUDA() ? preds.ConstDeviceSpan() : preds.ConstHostSpan();\n\n    auto d_policy = policy_;\n    auto result =\n        Reduce(ctx_, info, [=] XGBOOST_DEVICE(size_t i, size_t sample_id, size_t target_id) {\n          float wt = weights[sample_id];\n          float residue = d_policy.EvalRow(labels(sample_id, target_id), predts[i]);\n          residue *= wt;\n          return std::make_tuple(residue, wt);\n        });\n\n    std::array<double, 2> dat{result.Residue(), result.Weights()};\n    auto rc = collective::GlobalSum(ctx_, info, linalg::MakeVec(dat.data(), dat.size()));\n    collective::SafeColl(rc);\n    return Policy::GetFinal(dat[0], dat[1]);\n  }\n\n  [[nodiscard]] const char* Name() const override { return policy_.Name(); }\n\n private:\n  Policy policy_;\n};\n\nXGBOOST_REGISTER_METRIC(RMSE, \"rmse\")\n    .describe(\"Rooted mean square error.\")\n    .set_body([](const char*) { return new EvalEWiseBase<EvalRowRMSE>(); });\n\nXGBOOST_REGISTER_METRIC(RMSLE, \"rmsle\")\n    .describe(\"Rooted mean square log error.\")\n    .set_body([](const char*) { return new EvalEWiseBase<EvalRowRMSLE>(); });\n\nXGBOOST_REGISTER_METRIC(MAE, \"mae\").describe(\"Mean absolute error.\").set_body([](const char*) {\n  return new EvalEWiseBase<EvalRowMAE>();\n});\n\nXGBOOST_REGISTER_METRIC(MAPE, \"mape\")\n    .describe(\"Mean absolute percentage error.\")\n    .set_body([](const char*) { return new EvalEWiseBase<EvalRowMAPE>(); });\n\nXGBOOST_REGISTER_METRIC(LogLoss, \"logloss\")\n    .describe(\"Negative loglikelihood for logistic regression.\")\n    .set_body([](const char*) { return new EvalEWiseBase<EvalRowLogLoss>(); });\n\nXGBOOST_REGISTER_METRIC(PseudoErrorLoss, \"mphe\")\n    .describe(\"Mean Pseudo-huber error.\")\n    .set_body([](const char*) { return new PseudoErrorLoss{}; });\n\nXGBOOST_REGISTER_METRIC(PossionNegLoglik, \"poisson-nloglik\")\n    .describe(\"Negative loglikelihood for poisson regression.\")\n    .set_body([](const char*) { return new EvalEWiseBase<EvalPoissonNegLogLik>(); });\n\nXGBOOST_REGISTER_METRIC(GammaDeviance, \"gamma-deviance\")\n    .describe(\"Residual deviance for gamma regression.\")\n    .set_body([](const char*) { return new EvalEWiseBase<EvalGammaDeviance>(); });\n\nXGBOOST_REGISTER_METRIC(GammaNLogLik, \"gamma-nloglik\")\n    .describe(\"Negative log-likelihood for gamma regression.\")\n    .set_body([](const char*) { return new EvalEWiseBase<EvalGammaNLogLik>(); });\n\nXGBOOST_REGISTER_METRIC(Error, \"error\")\n    .describe(\"Binary classification error.\")\n    .set_body([](const char* param) { return new EvalEWiseBase<EvalError>(param); });\n\nXGBOOST_REGISTER_METRIC(TweedieNLogLik, \"tweedie-nloglik\")\n    .describe(\"tweedie-nloglik@rho for tweedie regression.\")\n    .set_body([](const char* param) { return new EvalEWiseBase<EvalTweedieNLogLik>(param); });\n\nclass QuantileError : public MetricNoCache {\n  HostDeviceVector<float> alpha_;\n  common::QuantileLossParam param_;\n\n public:\n  void Configure(Args const& args) override {\n    param_.UpdateAllowUnknown(args);\n    param_.Validate();\n    alpha_.HostVector() = param_.quantile_alpha.Get();\n  }\n\n  double Eval(HostDeviceVector<bst_float> const& preds, const MetaInfo& info) override {\n    CHECK(!alpha_.Empty());\n    if (info.num_row_ == 0) {\n      // empty DMatrix on distributed env\n      std::array<double, 2> dat{0.0, 0.0};\n      auto rc = collective::GlobalSum(ctx_, info, linalg::MakeVec(dat.data(), dat.size()));\n      collective::SafeColl(rc);\n      CHECK_GT(dat[1], 0);\n      return dat[0] / dat[1];\n    }\n\n    auto const* ctx = ctx_;\n    auto y_true = info.labels.View(ctx->Device());\n    preds.SetDevice(ctx->Device());\n    alpha_.SetDevice(ctx->Device());\n    auto alpha = ctx->IsCPU() ? alpha_.ConstHostSpan() : alpha_.ConstDeviceSpan();\n    std::size_t n_targets = preds.Size() / info.num_row_ / alpha_.Size();\n    CHECK_NE(n_targets, 0);\n    auto y_predt = linalg::MakeTensorView(ctx, &preds, static_cast<std::size_t>(info.num_row_),\n                                          alpha_.Size(), n_targets);\n\n    info.weights_.SetDevice(ctx->Device());\n    common::OptionalWeights weight{ctx->IsCPU() ? info.weights_.ConstHostSpan()\n                                                : info.weights_.ConstDeviceSpan()};\n\n    auto result = Reduce(\n        ctx, info,\n        [=] XGBOOST_DEVICE(std::size_t i, std::size_t sample_id, std::size_t target_id) {\n          auto idx = linalg::UnravelIndex(i, y_predt.Shape());\n          sample_id = std::get<0>(idx);\n          std::size_t quantile_id = std::get<1>(idx);\n          target_id = std::get<2>(idx);\n\n          auto loss = [a = alpha[quantile_id]](float p, float y) {\n            auto d = y - p;\n            float sign = d >= 0.0f;\n            auto res = (a * sign * d) - (1.0f - a) * (1.0f - sign) * d;\n            return res;\n          };\n          auto w = weight[sample_id];\n          auto l =\n              loss(y_predt(sample_id, quantile_id, target_id), y_true(sample_id, target_id)) * w;\n          return std::make_tuple(l, w);\n        },\n        alpha_.Size());\n    std::array<double, 2> dat{result.Residue(), result.Weights()};\n    auto rc = collective::GlobalSum(ctx, info, linalg::MakeVec(dat.data(), dat.size()));\n    collective::SafeColl(rc);\n    CHECK_GT(dat[1], 0);\n    return dat[0] / dat[1];\n  }\n\n  const char* Name() const override { return \"quantile\"; }\n  void LoadConfig(Json const& in) override {\n    auto const& obj = get<Object const>(in);\n    auto it = obj.find(\"quantile_loss_param\");\n    if (it != obj.cend()) {\n      FromJson(it->second, &param_);\n      auto const& name = get<String const>(in[\"name\"]);\n      CHECK_EQ(name, \"quantile\");\n    }\n  }\n  void SaveConfig(Json* p_out) const override {\n    auto& out = *p_out;\n    out[\"name\"] = String(this->Name());\n    out[\"quantile_loss_param\"] = ToJson(param_);\n  }\n};\n\nXGBOOST_REGISTER_METRIC(QuantileError, \"quantile\")\n    .describe(\"Quantile regression error.\")\n    .set_body([](const char*) { return new QuantileError{}; });\n\nclass ExpectileError : public MetricNoCache {\n  HostDeviceVector<float> alpha_;\n  common::ExpectileLossParam param_;\n\n public:\n  void Configure(Args const& args) override {\n    param_.UpdateAllowUnknown(args);\n    param_.Validate();\n    alpha_.HostVector() = param_.expectile_alpha.Get();\n  }\n\n  double Eval(HostDeviceVector<bst_float> const& preds, const MetaInfo& info) override {\n    CHECK(!alpha_.Empty());\n    if (info.num_row_ == 0) {\n      // empty DMatrix on distributed env\n      std::array<double, 2> dat{0.0, 0.0};\n      auto rc = collective::GlobalSum(ctx_, info, linalg::MakeVec(dat.data(), dat.size()));\n      collective::SafeColl(rc);\n      CHECK_GT(dat[1], 0);\n      return dat[0] / dat[1];\n    }\n\n    auto const* ctx = ctx_;\n    auto y_true = info.labels.View(ctx->Device());\n    preds.SetDevice(ctx->Device());\n    alpha_.SetDevice(ctx->Device());\n    auto alpha = ctx->IsCPU() ? alpha_.ConstHostSpan() : alpha_.ConstDeviceSpan();\n    std::size_t n_targets = preds.Size() / info.num_row_ / alpha_.Size();\n    CHECK_NE(n_targets, 0);\n    auto y_predt = linalg::MakeTensorView(ctx, &preds, static_cast<std::size_t>(info.num_row_),\n                                          alpha_.Size(), n_targets);\n\n    info.weights_.SetDevice(ctx->Device());\n    common::OptionalWeights weight{ctx->IsCPU() ? info.weights_.ConstHostSpan()\n                                                : info.weights_.ConstDeviceSpan()};\n\n    auto result = Reduce(\n        ctx, info,\n        [=] XGBOOST_DEVICE(std::size_t i, std::size_t sample_id, std::size_t target_id) {\n          auto idx = linalg::UnravelIndex(i, y_predt.Shape());\n          sample_id = std::get<0>(idx);\n          std::size_t expectile_id = std::get<1>(idx);\n          target_id = std::get<2>(idx);\n\n          auto pred = y_predt(sample_id, expectile_id, target_id);\n          auto label = y_true(sample_id, target_id);\n          auto diff = pred - label;\n          auto expectile = alpha[expectile_id];\n          auto weight_scale = diff >= 0.0f ? (1.0f - expectile) : expectile;\n          auto sample_weight = weight[sample_id];\n          auto loss = weight_scale * diff * diff * sample_weight;\n          return std::make_tuple(loss, sample_weight);\n        },\n        alpha_.Size());\n    std::array<double, 2> dat{result.Residue(), result.Weights()};\n    auto rc = collective::GlobalSum(ctx, info, linalg::MakeVec(dat.data(), dat.size()));\n    collective::SafeColl(rc);\n    CHECK_GT(dat[1], 0);\n    return dat[0] / dat[1];\n  }\n\n  const char* Name() const override { return \"expectile\"; }\n  void LoadConfig(Json const& in) override {\n    auto const& obj = get<Object const>(in);\n    auto it = obj.find(\"expectile_loss_param\");\n    if (it != obj.cend()) {\n      FromJson(it->second, &param_);\n      auto const& name = get<String const>(in[\"name\"]);\n      CHECK_EQ(name, \"expectile\");\n      param_.Validate();\n      alpha_.HostVector() = param_.expectile_alpha.Get();\n    }\n  }\n  void SaveConfig(Json* p_out) const override {\n    auto& out = *p_out;\n    out[\"name\"] = String(this->Name());\n    out[\"expectile_loss_param\"] = ToJson(param_);\n  }\n};\n\nXGBOOST_REGISTER_METRIC(ExpectileError, \"expectile\")\n    .describe(\"Expectile regression error.\")\n    .set_body([](const char*) { return new ExpectileError{}; });\n}  // namespace xgboost::metric\n"
  },
  {
    "path": "src/metric/metric.cc",
    "content": "/**\n * Copyright 2015-2023 by XGBoost Contributors\n * \\file metric_registry.cc\n * \\brief Registry of objective functions.\n */\n#include <dmlc/registry.h>\n#include <xgboost/context.h>\n#include <xgboost/metric.h>\n\n#include \"metric_common.h\"\n\nnamespace xgboost {\ntemplate <typename MetricRegistry>\nMetric* CreateMetricImpl(const std::string& name) {\n  std::string buf = name;\n  std::string prefix = name;\n  const char* param;\n  auto pos = buf.find('@');\n  if (pos == std::string::npos) {\n    if (!buf.empty() && buf.back() == '-') {\n      // Metrics of form \"metric-\"\n      prefix = buf.substr(0, buf.length() - 1);  // Chop off '-'\n      param = \"-\";\n    } else {\n      prefix = buf;\n      param = nullptr;\n    }\n    auto *e = ::dmlc::Registry<MetricRegistry>::Get()->Find(prefix.c_str());\n    if (e == nullptr) {\n      return nullptr;\n    }\n    auto p_metric = (e->body)(param);\n    return p_metric;\n  } else {\n    std::string prefix = buf.substr(0, pos);\n    auto *e = ::dmlc::Registry<MetricRegistry>::Get()->Find(prefix.c_str());\n    if (e == nullptr) {\n      return nullptr;\n    }\n    auto p_metric = (e->body)(buf.substr(pos + 1, buf.length()).c_str());\n    return p_metric;\n  }\n}\n\nMetric *\nMetric::Create(const std::string& name, Context const* ctx) {\n  auto metric = CreateMetricImpl<MetricReg>(name);\n  if (metric == nullptr) {\n    LOG(FATAL) << \"Unknown metric function \" << name;\n  }\n\n  metric->ctx_ = ctx;\n  return metric;\n}\n}  // namespace xgboost\n\nnamespace dmlc {\nDMLC_REGISTRY_ENABLE(::xgboost::MetricReg);\n}\n\nnamespace xgboost::metric {\n// List of files that will be force linked in static links.\nDMLC_REGISTRY_LINK_TAG(auc);\nDMLC_REGISTRY_LINK_TAG(elementwise_metric);\nDMLC_REGISTRY_LINK_TAG(multiclass_metric);\nDMLC_REGISTRY_LINK_TAG(survival_metric);\nDMLC_REGISTRY_LINK_TAG(rank_metric);\n#ifdef XGBOOST_USE_CUDA\nDMLC_REGISTRY_LINK_TAG(auc_gpu);\nDMLC_REGISTRY_LINK_TAG(rank_metric_gpu);\n#endif\n}  // namespace xgboost::metric\n"
  },
  {
    "path": "src/metric/metric_common.h",
    "content": "/**\n * Copyright 2018-2024, Contributors\n */\n#ifndef XGBOOST_METRIC_METRIC_COMMON_H_\n#define XGBOOST_METRIC_METRIC_COMMON_H_\n\n#include <limits>\n#include <memory>  // shared_ptr\n#include <string>\n\n#include \"../collective/aggregator.h\"\n#include \"xgboost/metric.h\"\n\nnamespace xgboost {\nstruct Context;\n// Metric that doesn't need to cache anything based on input data.\nclass MetricNoCache : public Metric {\n public:\n  virtual double Eval(HostDeviceVector<float> const &predts, MetaInfo const &info) = 0;\n\n  double Evaluate(HostDeviceVector<float> const &predts, std::shared_ptr<DMatrix> p_fmat) final {\n    double result{0.0};\n    auto const &info = p_fmat->Info();\n    collective::ApplyWithLabels(ctx_, info, &result, sizeof(double),\n                                [&] { result = this->Eval(predts, info); });\n    return result;\n  }\n};\n\nnamespace metric {\n// Ranking config to be used on device and host\nstruct EvalRankConfig {\n public:\n  // Parsed from metric name, the top-n number of instances within a group after\n  // ranking to use for evaluation.\n  unsigned topn{std::numeric_limits<unsigned>::max()};\n  std::string name;\n  bool minus{false};\n};\n\nclass PackedReduceResult {\n  double residue_sum_{0};\n  double weights_sum_{0};\n\n public:\n  XGBOOST_DEVICE PackedReduceResult() {}  // NOLINT\n  XGBOOST_DEVICE PackedReduceResult(double residue, double weight)\n      : residue_sum_{residue}, weights_sum_{weight} {}\n\n  XGBOOST_DEVICE\n  PackedReduceResult operator+(PackedReduceResult const &other) const {\n    return PackedReduceResult{residue_sum_ + other.residue_sum_, weights_sum_ + other.weights_sum_};\n  }\n  PackedReduceResult &operator+=(PackedReduceResult const &other) {\n    this->residue_sum_ += other.residue_sum_;\n    this->weights_sum_ += other.weights_sum_;\n    return *this;\n  }\n  [[nodiscard]] double Residue() const { return residue_sum_; }\n  [[nodiscard]] double Weights() const { return weights_sum_; }\n};\n\n}  // namespace metric\n}  // namespace xgboost\n\n#endif  // XGBOOST_METRIC_METRIC_COMMON_H_\n"
  },
  {
    "path": "src/metric/multiclass_metric.cc",
    "content": "/*!\n * Copyright 2019 XGBoost contributors\n */\n// Dummy file to keep the CUDA conditional compile trick.\n\n#if !defined(XGBOOST_USE_CUDA)\n#include \"multiclass_metric.cu\"\n#endif  // !defined(XGBOOST_USE_CUDA)\n"
  },
  {
    "path": "src/metric/multiclass_metric.cu",
    "content": "/**\n * Copyright 2015-2024, XGBoost Contributors\n * \\file multiclass_metric.cc\n * \\brief evaluation metrics for multiclass classification.\n * \\author Kailong Chen, Tianqi Chen\n */\n#include <xgboost/metric.h>\n\n#include <array>\n#include <atomic>\n#include <cmath>\n#include <numeric>  // for accumulate\n\n#include \"../common/math.h\"\n#include \"../common/threading_utils.h\"\n#include \"metric_common.h\"  // MetricNoCache\n\n#if defined(XGBOOST_USE_CUDA)\n#include <thrust/functional.h>        // thrust::plus<>\n#include <thrust/iterator/counting_iterator.h>\n#include <thrust/transform_reduce.h>\n\n#include \"../common/cuda_context.cuh\"  // for CUDAContext\n#include \"../common/device_helpers.cuh\"\n#endif  // XGBOOST_USE_CUDA\n\nnamespace xgboost::metric {\n// tag the this file, used by force static link later.\nDMLC_REGISTRY_FILE_TAG(multiclass_metric);\n\ntemplate <typename EvalRowPolicy>\nclass MultiClassMetricsReduction {\n  void CheckLabelError(int32_t label_error, size_t n_class) const {\n    CHECK(label_error >= 0 && label_error < static_cast<int32_t>(n_class))\n        << \"MultiClassEvaluation: label must be in [0, num_class),\"\n        << \" num_class=\" << n_class << \" but found \" << label_error << \" in label\";\n  }\n\n public:\n  MultiClassMetricsReduction() = default;\n\n  [[nodiscard]] PackedReduceResult CpuReduceMetrics(const HostDeviceVector<bst_float>& weights,\n                                                    const HostDeviceVector<bst_float>& labels,\n                                                    const HostDeviceVector<bst_float>& preds,\n                                                    const size_t n_class, int32_t n_threads) const {\n    size_t ndata = labels.Size();\n\n    const auto& h_labels = labels.HostVector();\n    const auto& h_weights = weights.HostVector();\n    const auto& h_preds = preds.HostVector();\n\n    std::atomic<int> label_error {0};\n    bool const is_null_weight = weights.Size() == 0;\n\n    std::vector<double> scores_tloc(n_threads, 0);\n    std::vector<double> weights_tloc(n_threads, 0);\n    common::ParallelFor(ndata, n_threads, [&](size_t idx) {\n        bst_float weight = is_null_weight ? 1.0f : h_weights[idx];\n        auto label = static_cast<int>(h_labels[idx]);\n        if (label >= 0 && label < static_cast<int>(n_class)) {\n          auto t_idx = omp_get_thread_num();\n          scores_tloc[t_idx] +=\n              EvalRowPolicy::EvalRow(label, h_preds.data() + idx * n_class,\n                                     n_class) *\n              weight;\n          weights_tloc[t_idx] += weight;\n        } else {\n          label_error = label;\n        }\n    });\n\n    double residue_sum =\n        std::accumulate(scores_tloc.cbegin(), scores_tloc.cend(), 0.0);\n    double weights_sum =\n        std::accumulate(weights_tloc.cbegin(), weights_tloc.cend(), 0.0);\n\n    CheckLabelError(label_error, n_class);\n    PackedReduceResult res { residue_sum, weights_sum };\n\n    return res;\n  }\n\n#if defined(XGBOOST_USE_CUDA)\n\n  PackedReduceResult DeviceReduceMetrics(Context const* ctx,\n                                         const HostDeviceVector<bst_float>& weights,\n                                         const HostDeviceVector<bst_float>& labels,\n                                         const HostDeviceVector<bst_float>& preds,\n                                         const size_t n_class) {\n    size_t n_data = labels.Size();\n\n    thrust::counting_iterator<size_t> begin(0);\n    thrust::counting_iterator<size_t> end = begin + n_data;\n\n    auto s_labels = labels.DeviceSpan();\n    auto s_preds = preds.DeviceSpan();\n    auto s_weights = weights.DeviceSpan();\n\n    bool const is_null_weight = weights.Size() == 0;\n    auto s_label_error = label_error_.GetSpan<int32_t>(1);\n    s_label_error[0] = 0;\n\n    PackedReduceResult result = thrust::transform_reduce(\n        ctx->CUDACtx()->CTP(),\n        begin, end,\n        [=] XGBOOST_DEVICE(size_t idx) {\n          bst_float weight = is_null_weight ? 1.0f : s_weights[idx];\n          bst_float residue = 0;\n          auto label = static_cast<int>(s_labels[idx]);\n          if (label >= 0 && label < static_cast<int32_t>(n_class)) {\n            residue = EvalRowPolicy::EvalRow(\n                label, &s_preds[idx * n_class], n_class) * weight;\n          } else {\n            s_label_error[0] = label;\n          }\n          return PackedReduceResult{ residue, weight };\n        },\n        PackedReduceResult(),\n        thrust::plus<PackedReduceResult>());\n    CheckLabelError(s_label_error[0], n_class);\n\n    return result;\n  }\n\n#endif  // XGBOOST_USE_CUDA\n\n  PackedReduceResult Reduce(Context const* ctx, size_t n_class,\n                            const HostDeviceVector<bst_float>& weights,\n                            const HostDeviceVector<bst_float>& labels,\n                            const HostDeviceVector<bst_float>& preds) {\n    PackedReduceResult result;\n\n    if (ctx->IsCPU()) {\n      result = CpuReduceMetrics(weights, labels, preds, n_class, ctx->Threads());\n    }\n#if defined(XGBOOST_USE_CUDA)\n    else {  // NOLINT\n      preds.SetDevice(ctx->Device());\n      labels.SetDevice(ctx->Device());\n      weights.SetDevice(ctx->Device());\n\n      dh::safe_cuda(cudaSetDevice(ctx->Ordinal()));\n      result = DeviceReduceMetrics(ctx, weights, labels, preds, n_class);\n    }\n#endif  // defined(XGBOOST_USE_CUDA)\n    return result;\n  }\n\n private:\n#if defined(XGBOOST_USE_CUDA)\n  dh::PinnedMemory label_error_;\n#endif  // defined(XGBOOST_USE_CUDA)\n};\n\n/*!\n * \\brief base class of multi-class evaluation\n * \\tparam Derived the name of subclass\n */\ntemplate<typename Derived>\nstruct EvalMClassBase : public MetricNoCache {\n  double Eval(const HostDeviceVector<float> &preds, const MetaInfo &info) override {\n    if (info.labels.Size() == 0) {\n      CHECK_EQ(preds.Size(), 0);\n    } else {\n      CHECK(preds.Size() % info.labels.Size() == 0) << \"label and prediction size not match\";\n    }\n    std::array<double, 2> dat{0.0, 0.0};\n    if (info.labels.Size() != 0) {\n      const size_t nclass = preds.Size() / info.labels.Size();\n      CHECK_GE(nclass, 1U)\n          << \"mlogloss and merror are only used for multi-class classification,\"\n          << \" use logloss for binary classification\";\n      auto result = reducer_.Reduce(this->ctx_, nclass, info.weights_, *info.labels.Data(), preds);\n      dat[0] = result.Residue();\n      dat[1] = result.Weights();\n    }\n    auto rc = collective::GlobalSum(ctx_, info, linalg::MakeVec(dat.data(), dat.size()));\n    collective::SafeColl(rc);\n    return Derived::GetFinal(dat[0], dat[1]);\n  }\n  /*!\n   * \\brief to be implemented by subclass,\n   *   get evaluation result from one row\n   * \\param label label of current instance\n   * \\param pred prediction value of current instance\n   * \\param nclass number of class in the prediction\n   */\n  XGBOOST_DEVICE static bst_float EvalRow(int label,\n                                          const bst_float *pred,\n                                          size_t nclass);\n  /*!\n   * \\brief to be overridden by subclass, final transformation\n   * \\param esum the sum statistics returned by EvalRow\n   * \\param wsum sum of weight\n   */\n  inline static double GetFinal(double esum, double wsum) {\n    return esum / wsum;\n  }\n\n private:\n  MultiClassMetricsReduction<Derived> reducer_;\n  // used to store error message\n  const char *error_msg_;\n};\n\n/*! \\brief match error */\nstruct EvalMatchError : public EvalMClassBase<EvalMatchError> {\n  const char* Name() const override {\n    return \"merror\";\n  }\n  XGBOOST_DEVICE static bst_float EvalRow(int label,\n                                          const bst_float *pred,\n                                          size_t nclass) {\n    return common::FindMaxIndex(pred, pred + nclass) != pred + static_cast<int>(label);\n  }\n};\n\n/*! \\brief match error */\nstruct EvalMultiLogLoss : public EvalMClassBase<EvalMultiLogLoss> {\n  const char* Name() const override {\n    return \"mlogloss\";\n  }\n  XGBOOST_DEVICE static bst_float EvalRow(int label, const bst_float* pred, size_t /*nclass*/) {\n    const bst_float eps = 1e-16f;\n    auto k = static_cast<size_t>(label);\n    if (pred[k] > eps) {\n      return -std::log(pred[k]);\n    } else {\n      return -std::log(eps);\n    }\n  }\n};\n\nXGBOOST_REGISTER_METRIC(MatchError, \"merror\")\n    .describe(\"Multiclass classification error.\")\n    .set_body([](const char*) { return new EvalMatchError(); });\n\nXGBOOST_REGISTER_METRIC(MultiLogLoss, \"mlogloss\")\n    .describe(\"Multiclass negative loglikelihood.\")\n    .set_body([](const char*) { return new EvalMultiLogLoss(); });\n}  // namespace xgboost::metric\n"
  },
  {
    "path": "src/metric/rank_metric.cc",
    "content": "/**\n * Copyright 2020-2026, XGBoost contributors\n */\n#include \"rank_metric.h\"\n\n#include <dmlc/omp.h>\n#include <dmlc/registry.h>\n\n#include <algorithm>   // for stable_sort, copy, fill_n, min, max\n#include <array>       // for array\n#include <cmath>       // for log, sqrt\n#include <functional>  // for less, greater\n#include <map>         // for operator!=, _Rb_tree_const_iterator\n#include <memory>      // for allocator, unique_ptr, shared_ptr, __shared_...\n#include <numeric>     // for accumulate\n#include <ostream>     // for operator<<, basic_ostream, ostringstream\n#include <string>      // for char_traits, operator<, basic_string, to_string\n#include <utility>     // for pair, make_pair\n#include <vector>      // for vector\n\n#include \"../collective/aggregator.h\"    // for ApplyWithLabels\n#include \"../common/algorithm.h\"         // for ArgSort, Sort\n#include \"../common/linalg_op.h\"         // for cbegin, cend\n#include \"../common/optional_weight.h\"   // for OptionalWeights, MakeOptionalWeights\n#include \"metric_common.h\"               // for MetricNoCache, GPUMetric, PackedReduceResult\n#include \"xgboost/base.h\"                // for bst_float, bst_omp_uint, bst_group_t, Args\n#include \"xgboost/cache.h\"               // for DMatrixCache\n#include \"xgboost/context.h\"             // for Context\n#include \"xgboost/data.h\"                // for MetaInfo, DMatrix\n#include \"xgboost/host_device_vector.h\"  // for HostDeviceVector\n#include \"xgboost/json.h\"                // for Json, FromJson, IsA, ToJson, get, Null, Object\n#include \"xgboost/linalg.h\"              // for Tensor, TensorView, Range, VectorView, MakeT...\n#include \"xgboost/logging.h\"             // for CHECK, ConsoleLogger, LOG_INFO, CHECK_EQ\n#include \"xgboost/metric.h\"              // for MetricReg, XGBOOST_REGISTER_METRIC, Metric\n#include \"xgboost/string_view.h\"         // for StringView\n\nnamespace {\nusing PredIndPair = std::pair<xgboost::bst_float, xgboost::ltr::rel_degree_t>;\nusing PredIndPairContainer = std::vector<PredIndPair>;\n}  // anonymous namespace\n\nnamespace xgboost::metric {\n// tag the this file, used by force static link later.\nDMLC_REGISTRY_FILE_TAG(rank_metric);\n\n/*! \\brief AMS: also records best threshold */\nstruct EvalAMS : public MetricNoCache {\n public:\n  explicit EvalAMS(const char* param) {\n    CHECK(param != nullptr)  // NOLINT\n        << \"AMS must be in format ams@k\";\n    ratio_ = atof(param);\n    std::ostringstream os;\n    os << \"ams@\" << ratio_;\n    name_ = os.str();\n  }\n\n  double Eval(const HostDeviceVector<bst_float>& preds, const MetaInfo& info) override {\n    CHECK(!collective::IsDistributed()) << \"metric AMS do not support distributed evaluation\";\n    using namespace std;  // NOLINT(*)\n\n    const auto ndata = static_cast<bst_omp_uint>(info.labels.Size());\n    PredIndPairContainer rec(ndata);\n\n    const auto& h_preds = preds.ConstHostVector();\n    common::ParallelFor(ndata, ctx_->Threads(),\n                        [&](bst_omp_uint i) { rec[i] = std::make_pair(h_preds[i], i); });\n    common::Sort(ctx_, rec.begin(), rec.end(),\n                 [](auto const& l, auto const& r) { return l.first > r.first; });\n    auto ntop = static_cast<unsigned>(ratio_ * ndata);\n    if (ntop == 0) ntop = ndata;\n    const double br = 10.0;\n    unsigned thresindex = 0;\n    double s_tp = 0.0, b_fp = 0.0, tams = 0.0;\n    const auto& labels = info.labels.View(DeviceOrd::CPU());\n    for (unsigned i = 0; i < static_cast<unsigned>(ndata - 1) && i < ntop; ++i) {\n      const unsigned ridx = rec[i].second;\n      const bst_float wt = info.GetWeight(ridx);\n      if (labels(ridx) > 0.5f) {\n        s_tp += wt;\n      } else {\n        b_fp += wt;\n      }\n      if (rec[i].first != rec[i + 1].first) {\n        double ams = sqrt(2 * ((s_tp + b_fp + br) * log(1.0 + s_tp / (b_fp + br)) - s_tp));\n        if (tams < ams) {\n          thresindex = i;\n          tams = ams;\n        }\n      }\n    }\n    if (ntop == ndata) {\n      LOG(INFO) << \"best-ams-ratio=\" << static_cast<bst_float>(thresindex) / ndata;\n      return static_cast<bst_float>(tams);\n    } else {\n      return static_cast<bst_float>(\n          sqrt(2 * ((s_tp + b_fp + br) * log(1.0 + s_tp / (b_fp + br)) - s_tp)));\n    }\n  }\n\n  [[nodiscard]] const char* Name() const override { return name_.c_str(); }\n\n private:\n  std::string name_;\n  float ratio_;\n};\n\n/*! \\brief Evaluate rank list */\nstruct EvalRank : public MetricNoCache, public EvalRankConfig {\n public:\n  double Eval(const HostDeviceVector<bst_float>& preds, const MetaInfo& info) override {\n    CHECK_EQ(preds.Size(), info.labels.Size()) << \"label size predict size not match\";\n\n    // quick consistency when group is not available\n    std::vector<unsigned> tgptr(2, 0);\n    tgptr[1] = static_cast<unsigned>(preds.Size());\n    const auto& gptr = info.group_ptr_.size() == 0 ? tgptr : info.group_ptr_;\n\n    CHECK_NE(gptr.size(), 0U) << \"must specify group when constructing rank file\";\n    CHECK_EQ(gptr.back(), preds.Size())\n        << \"EvalRank: group structure must match number of prediction\";\n\n    const auto ngroups = static_cast<bst_omp_uint>(gptr.size() - 1);\n    // sum statistics\n    const auto& h_labels = info.labels.HostView();\n    CHECK_LE(h_labels.Shape(1), 1);\n    const auto& h_preds = preds.ConstHostVector();\n\n    std::vector<double> sum_tloc(ctx_->Threads(), 0.0);\n    common::ParallelForBlock(ngroups, ctx_->Threads(), [&](auto&& blk) {\n      for (auto group_idx = blk.begin(); group_idx < blk.end(); ++group_idx) {\n        PredIndPairContainer rec;\n        for (unsigned j = gptr[group_idx]; j < gptr[group_idx + 1]; ++j) {\n          rec.emplace_back(h_preds[j], static_cast<int>(h_labels(j)));\n        }\n        sum_tloc[omp_get_thread_num()] += this->EvalGroup(&rec);\n      }\n    });\n    double sum_metric = std::accumulate(sum_tloc.cbegin(), sum_tloc.cend(), 0.0);\n    return collective::GlobalRatio(ctx_, info, sum_metric, static_cast<double>(ngroups));\n  }\n\n  [[nodiscard]] const char* Name() const override { return name.c_str(); }\n\n protected:\n  explicit EvalRank(const char* name, const char* param) {\n    this->name = ltr::ParseMetricName(name, param, &topn, &minus);\n  }\n\n  virtual double EvalGroup(PredIndPairContainer* recptr) const = 0;\n};\n\n/*! \\brief Cox: Partial likelihood of the Cox proportional hazards model */\nstruct EvalCox : public MetricNoCache {\n public:\n  EvalCox() = default;\n  double Eval(const HostDeviceVector<bst_float>& preds, const MetaInfo& info) override {\n    CHECK(!collective::IsDistributed()) << \"Cox metric does not support distributed evaluation\";\n    using namespace std;  // NOLINT(*)\n\n    const auto ndata = static_cast<bst_omp_uint>(info.labels.Size());\n    const auto& label_order = info.LabelAbsSort(ctx_);\n\n    // pre-compute a sum for the denominator\n    double exp_p_sum = 0;  // we use double because we might need the precision with large datasets\n\n    const auto& h_preds = preds.ConstHostVector();\n    for (omp_ulong i = 0; i < ndata; ++i) {\n      exp_p_sum += h_preds[i];\n    }\n\n    double out = 0;\n    double accumulated_sum = 0;\n    bst_omp_uint num_events = 0;\n    const auto& labels = info.labels.HostView();\n    for (bst_omp_uint i = 0; i < ndata; ++i) {\n      const size_t ind = label_order[i];\n      const auto label = labels(ind);\n      if (label > 0) {\n        out -= log(h_preds[ind]) - log(exp_p_sum);\n        ++num_events;\n      }\n\n      // only update the denominator after we move forward in time (labels are sorted)\n      accumulated_sum += h_preds[ind];\n      if (i == ndata - 1 || std::abs(label) < std::abs(labels(label_order[i + 1]))) {\n        exp_p_sum -= accumulated_sum;\n        accumulated_sum = 0;\n      }\n    }\n\n    return out / num_events;  // normalize by the number of events\n  }\n\n  [[nodiscard]] const char* Name() const override { return \"cox-nloglik\"; }\n};\n\nXGBOOST_REGISTER_METRIC(AMS, \"ams\")\n    .describe(\"AMS metric for higgs.\")\n    .set_body([](const char* param) { return new EvalAMS(param); });\n\nXGBOOST_REGISTER_METRIC(Cox, \"cox-nloglik\")\n    .describe(\"Negative log partial likelihood of Cox proportional hazards model.\")\n    .set_body([](const char*) { return new EvalCox(); });\n\n// ranking metrics that requires cache\ntemplate <typename Cache>\nclass EvalRankWithCache : public Metric {\n protected:\n  ltr::LambdaRankParam param_;\n  bool minus_{false};\n  std::string name_;\n\n  DMatrixCache<Cache> cache_{DMatrixCache<Cache>::DefaultSize()};\n\n public:\n  EvalRankWithCache(StringView name, const char* param) {\n    auto constexpr kMax = ltr::LambdaRankParam::NotSet();\n    std::uint32_t topn{kMax};\n    this->name_ = ltr::ParseMetricName(name, param, &topn, &minus_);\n    if (topn != kMax) {\n      param_.UpdateAllowUnknown(Args{{\"lambdarank_num_pair_per_sample\", std::to_string(topn)},\n                                     {\"lambdarank_pair_method\", \"topk\"}});\n    }\n    param_.UpdateAllowUnknown(Args{});\n  }\n  void LoadConfig(Json const& in) override {\n    if (IsA<Null>(in)) {\n      return;\n    }\n    auto const& obj = get<Object const>(in);\n    auto it = obj.find(\"lambdarank_param\");\n    if (it != obj.cend()) {\n      FromJson(it->second, &param_);\n    }\n  }\n\n  void SaveConfig(Json* p_out) const override {\n    auto& out = *p_out;\n    out[\"name\"] = String{this->Name()};\n    out[\"lambdarank_param\"] = ToJson(param_);\n  }\n\n  double Evaluate(HostDeviceVector<float> const& preds, std::shared_ptr<DMatrix> p_fmat) override {\n    double result{0.0};\n    auto const& info = p_fmat->Info();\n    collective::ApplyWithLabels(ctx_, info, &result, sizeof(double), [&] {\n      auto p_cache = cache_.CacheItem(p_fmat, ctx_, info, param_);\n      if (p_cache->Param() != param_) {\n        p_cache = cache_.ResetItem(p_fmat, ctx_, info, param_);\n      }\n      CHECK(p_cache->Param() == param_);\n      CHECK_EQ(preds.Size(), info.labels.Size());\n\n      result = this->Eval(preds, info, p_cache);\n    });\n    return result;\n  }\n\n  [[nodiscard]] const char* Name() const override { return name_.c_str(); }\n\n  virtual double Eval(HostDeviceVector<float> const& preds, MetaInfo const& info,\n                      std::shared_ptr<Cache> p_cache) = 0;\n};\n\nnamespace {\ndouble Finalize(Context const* ctx, MetaInfo const& info, double score, double sw) {\n  std::array<double, 2> dat{score, sw};\n  auto rc = collective::GlobalSum(ctx, info, linalg::MakeVec(dat.data(), 2));\n  collective::SafeColl(rc);\n  std::tie(score, sw) = std::tuple_cat(dat);\n  if (sw > 0.0) {\n    score = score / sw;\n  }\n\n  CHECK_LE(score, 1.0 + kRtEps)\n      << \"Invalid output score, might be caused by invalid query group weight.\";\n  score = std::min(1.0, score);\n\n  return score;\n}\n}  // namespace\n\nclass EvalPrecision : public EvalRankWithCache<ltr::PreCache> {\n public:\n  using EvalRankWithCache::EvalRankWithCache;\n\n  double Eval(HostDeviceVector<float> const& predt, MetaInfo const& info,\n              std::shared_ptr<ltr::PreCache> p_cache) final {\n    auto n_groups = p_cache->Groups();\n    if (!info.weights_.Empty()) {\n      CHECK_EQ(info.weights_.Size(), n_groups) << error::GroupWeight();\n    }\n\n    if (ctx_->IsCUDA()) {\n      auto pre = cuda_impl::PreScore(ctx_, info, predt, p_cache);\n      return Finalize(ctx_, info, pre.Residue(), pre.Weights());\n    }\n\n    auto gptr = p_cache->DataGroupPtr(ctx_);\n    auto h_label = info.labels.HostView().Slice(linalg::All(), 0);\n    auto rank_idx = p_cache->SortedIdx(ctx_, predt.ConstHostSpan());\n\n    auto weight = common::MakeOptionalWeights(ctx_->Device(), info.weights_);\n    auto pre = p_cache->Pre(ctx_);\n\n    common::ParallelFor(p_cache->Groups(), ctx_->Threads(), [&](auto g) {\n      auto g_label = h_label.Slice(linalg::Range(gptr[g], gptr[g + 1]));\n      auto g_rank = rank_idx.subspan(gptr[g], gptr[g + 1] - gptr[g]);\n\n      auto n = std::min(static_cast<std::size_t>(param_.TopK()), g_label.Size());\n      double n_hits{0.0};\n      for (std::size_t i = 0; i < n; ++i) {\n        n_hits += g_label(g_rank[i]) * weight[g];\n      }\n      pre[g] = n_hits / static_cast<double>(n);\n    });\n\n    auto sw = 0.0;\n    for (std::size_t i = 0; i < pre.size(); ++i) {\n      sw += weight[i];\n    }\n\n    auto sum = std::accumulate(pre.cbegin(), pre.cend(), 0.0);\n    return Finalize(ctx_, info, sum, sw);\n  }\n};\n\n/**\n * \\brief Implement the NDCG score function for learning to rank.\n *\n *     Ties are ignored, which can lead to different result with other implementations.\n */\nclass EvalNDCG : public EvalRankWithCache<ltr::NDCGCache> {\n public:\n  using EvalRankWithCache::EvalRankWithCache;\n\n  void Configure(Args const& args) override {\n    // do not configure, otherwise the ndcg param like top-k will be forced into the same\n    // as the one in objective. The metric has its own syntax for parameter.\n    for (auto const& [key, value] : args) {\n      // Make a special case for the exp gain parameter, which is not exposed in the\n      // metric configuration syntax.\n      if (key == \"ndcg_exp_gain\") {\n        this->param_.UpdateAllowUnknown(Args{{key, value}});\n      }\n    }\n  }\n\n  double Eval(HostDeviceVector<float> const& preds, MetaInfo const& info,\n              std::shared_ptr<ltr::NDCGCache> p_cache) override {\n    if (ctx_->IsCUDA()) {\n      auto ndcg = cuda_impl::NDCGScore(ctx_, info, preds, minus_, p_cache);\n      return Finalize(ctx_, info, ndcg.Residue(), ndcg.Weights());\n    }\n\n    // group local ndcg\n    auto group_ptr = p_cache->DataGroupPtr(ctx_);\n    bst_group_t n_groups = group_ptr.size() - 1;\n    auto ndcg_gloc = p_cache->Dcg(ctx_);\n    std::fill_n(ndcg_gloc.Values().data(), ndcg_gloc.Size(), 0.0);\n\n    auto h_inv_idcg = p_cache->InvIDCG(ctx_);\n    auto p_discount = p_cache->Discount(ctx_).data();\n\n    auto h_label = info.labels.HostView();\n    auto h_predt = linalg::MakeTensorView(ctx_, &preds, preds.Size());\n    auto weights = common::MakeOptionalWeights(ctx_->Device(), info.weights_);\n\n    common::ParallelFor(n_groups, ctx_->Threads(), [&](auto g) {\n      auto g_predt = h_predt.Slice(linalg::Range(group_ptr[g], group_ptr[g + 1]));\n      auto g_labels = h_label.Slice(linalg::Range(group_ptr[g], group_ptr[g + 1]), 0);\n      auto sorted_idx = common::ArgSort<std::size_t>(ctx_, linalg::cbegin(g_predt),\n                                                     linalg::cend(g_predt), std::greater<>{});\n      double ndcg{.0};\n      double inv_idcg = h_inv_idcg(g);\n      if (inv_idcg <= 0.0) {\n        ndcg_gloc(g) = minus_ ? 0.0 : 1.0;\n        return;\n      }\n      std::size_t n{std::min(sorted_idx.size(), static_cast<std::size_t>(param_.TopK()))};\n      if (param_.ndcg_exp_gain) {\n        for (std::size_t i = 0; i < n; ++i) {\n          ndcg += p_discount[i] * ltr::CalcDCGGain(g_labels(sorted_idx[i])) * inv_idcg;\n        }\n      } else {\n        for (std::size_t i = 0; i < n; ++i) {\n          ndcg += p_discount[i] * g_labels(sorted_idx[i]) * inv_idcg;\n        }\n      }\n      ndcg_gloc(g) += ndcg * weights[g];\n    });\n    double sum_w{0};\n    if (weights.Empty()) {\n      sum_w = n_groups;\n    } else {\n      sum_w = std::accumulate(weights.weights.cbegin(), weights.weights.cend(), 0.0);\n    }\n    auto ndcg = std::accumulate(linalg::cbegin(ndcg_gloc), linalg::cend(ndcg_gloc), 0.0);\n    return Finalize(ctx_, info, ndcg, sum_w);\n  }\n};\n\nclass EvalMAPScore : public EvalRankWithCache<ltr::MAPCache> {\n public:\n  using EvalRankWithCache::EvalRankWithCache;\n\n  double Eval(HostDeviceVector<float> const& predt, MetaInfo const& info,\n              std::shared_ptr<ltr::MAPCache> p_cache) override {\n    if (ctx_->IsCUDA()) {\n      auto map = cuda_impl::MAPScore(ctx_, info, predt, minus_, p_cache);\n      return Finalize(ctx_, info, map.Residue(), map.Weights());\n    }\n\n    auto gptr = p_cache->DataGroupPtr(ctx_);\n    auto h_label = info.labels.HostView().Slice(linalg::All(), 0);\n\n    auto map_gloc = p_cache->Map(ctx_);\n    std::fill_n(map_gloc.data(), map_gloc.size(), 0.0);\n    auto rank_idx = p_cache->SortedIdx(ctx_, predt.ConstHostSpan());\n\n    common::ParallelFor(p_cache->Groups(), ctx_->Threads(), [&](auto g) {\n      auto g_label = h_label.Slice(linalg::Range(gptr[g], gptr[g + 1]));\n      auto g_rank = rank_idx.subspan(gptr[g], gptr[g + 1] - gptr[g]);\n\n      auto n = std::min(static_cast<std::size_t>(param_.TopK()), g_label.Size());\n      double n_hits{0.0};\n      for (std::size_t i = 0; i < n; ++i) {\n        auto p = g_label(g_rank[i]);\n        n_hits += p;\n        map_gloc[g] += n_hits / static_cast<double>((i + 1)) * p;\n      }\n      for (std::size_t i = n; i < g_label.Size(); ++i) {\n        n_hits += g_label(g_rank[i]);\n      }\n      if (n_hits > 0.0) {\n        map_gloc[g] /= std::min(n_hits, static_cast<double>(param_.TopK()));\n      } else {\n        map_gloc[g] = minus_ ? 0.0 : 1.0;\n      }\n    });\n\n    auto sw = 0.0;\n    auto weight = common::MakeOptionalWeights(ctx_->Device(), info.weights_);\n    if (!weight.Empty()) {\n      CHECK_EQ(weight.weights.size(), p_cache->Groups());\n    }\n    for (std::size_t i = 0; i < map_gloc.size(); ++i) {\n      map_gloc[i] = map_gloc[i] * weight[i];\n      sw += weight[i];\n    }\n    auto sum = std::accumulate(map_gloc.cbegin(), map_gloc.cend(), 0.0);\n    return Finalize(ctx_, info, sum, sw);\n  }\n};\n\nXGBOOST_REGISTER_METRIC(Precision, \"pre\")\n    .describe(\"precision@k for rank.\")\n    .set_body([](const char* param) { return new EvalPrecision(\"pre\", param); });\n\nXGBOOST_REGISTER_METRIC(EvalMAP, \"map\")\n    .describe(\"map@k for ranking.\")\n    .set_body([](char const* param) { return new EvalMAPScore{\"map\", param}; });\n\nXGBOOST_REGISTER_METRIC(EvalNDCG, \"ndcg\")\n    .describe(\"ndcg@k for ranking.\")\n    .set_body([](char const* param) { return new EvalNDCG{\"ndcg\", param}; });\n}  // namespace xgboost::metric\n"
  },
  {
    "path": "src/metric/rank_metric.cu",
    "content": "/**\n * Copyright 2020-2024, XGBoost Contributors\n */\n#include <dmlc/registry.h>\n#include <thrust/iterator/counting_iterator.h>  // for make_counting_iterator\n#include <thrust/reduce.h>                      // for reduce\n\n#include <algorithm>                            // for transform\n#include <cstddef>                              // for size_t\n#include <memory>                               // for shared_ptr\n#include <vector>                               // for vector\n\n#include \"../common/cuda_context.cuh\"           // for CUDAContext\n#include \"../common/device_helpers.cuh\"         // for MakeTransformIterator\n#include \"../common/optional_weight.h\"          // for MakeOptionalWeights\n#include \"../common/ranking_utils.cuh\"          // for CalcQueriesDCG, NDCGCache\n#include \"metric_common.h\"\n#include \"rank_metric.h\"\n#include \"xgboost/base.h\"                // for XGBOOST_DEVICE\n#include \"xgboost/context.h\"             // for Context\n#include \"xgboost/data.h\"                // for MetaInfo\n#include \"xgboost/host_device_vector.h\"  // for HostDeviceVector\n#include \"xgboost/linalg.h\"              // for MakeTensorView\n#include \"xgboost/logging.h\"             // for CHECK\n#include \"xgboost/metric.h\"\n\nnamespace xgboost::metric {\n// tag the this file, used by force static link later.\nDMLC_REGISTRY_FILE_TAG(rank_metric_gpu);\n\nnamespace cuda_impl {\nPackedReduceResult PreScore(Context const *ctx, MetaInfo const &info,\n                            HostDeviceVector<float> const &predt,\n                            std::shared_ptr<ltr::PreCache> p_cache) {\n  auto d_gptr = p_cache->DataGroupPtr(ctx);\n  auto d_label = info.labels.View(ctx->Device()).Slice(linalg::All(), 0);\n\n  predt.SetDevice(ctx->Device());\n  auto d_rank_idx = p_cache->SortedIdx(ctx, predt.ConstDeviceSpan());\n  auto topk = p_cache->Param().TopK();\n  auto d_weight = common::MakeOptionalWeights(ctx->Device(), info.weights_);\n\n  auto it = dh::MakeTransformIterator<double>(\n      thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) {\n        auto g = dh::SegmentId(d_gptr, i);\n        auto g_begin = d_gptr[g];\n        auto g_end = d_gptr[g + 1];\n        i -= g_begin;\n        auto g_label = d_label.Slice(linalg::Range(g_begin, g_end));\n        auto g_rank = d_rank_idx.subspan(g_begin, g_end - g_begin);\n        double y = g_label(g_rank[i]);\n        auto n = std::min(static_cast<std::size_t>(topk), g_label.Size());\n        double w{d_weight[g]};\n        if (i >= n) {\n          return 0.0;\n        }\n        return y / static_cast<double>(n) * w;\n      });\n\n  auto cuctx = ctx->CUDACtx();\n  auto pre = p_cache->Pre(ctx);\n  thrust::fill_n(cuctx->CTP(), pre.data(), pre.size(), 0.0);\n\n  std::size_t bytes;\n  dh::safe_cuda(cub::DeviceSegmentedReduce::Sum(nullptr, bytes, it, pre.data(), p_cache->Groups(),\n                                                d_gptr.data(), d_gptr.data() + 1, cuctx->Stream()));\n  dh::TemporaryArray<char> temp(bytes);\n  dh::safe_cuda(cub::DeviceSegmentedReduce::Sum(temp.data().get(), bytes, it, pre.data(),\n                                                p_cache->Groups(), d_gptr.data(), d_gptr.data() + 1,\n                                                cuctx->Stream()));\n\n  auto w_it =\n      dh::MakeTransformIterator<double>(thrust::make_counting_iterator(0ul),\n                                        [=] XGBOOST_DEVICE(std::size_t g) { return d_weight[g]; });\n  auto n_weights = p_cache->Groups();\n  auto sw = dh::Reduce(cuctx->CTP(), w_it, w_it + n_weights, 0.0, thrust::plus<double>{});\n  auto sum =\n      dh::Reduce(cuctx->CTP(), dh::tcbegin(pre), dh::tcend(pre), 0.0, thrust::plus<double>{});\n  auto result = PackedReduceResult{sum, sw};\n  return result;\n}\n\nPackedReduceResult NDCGScore(Context const *ctx, MetaInfo const &info,\n                             HostDeviceVector<float> const &predt, bool minus,\n                             std::shared_ptr<ltr::NDCGCache> p_cache) {\n  CHECK(p_cache);\n\n  auto const &p = p_cache->Param();\n  auto d_weight = common::MakeOptionalWeights(ctx->Device(), info.weights_);\n  if (!d_weight.Empty()) {\n    CHECK_EQ(d_weight.weights.size(), p_cache->Groups());\n  }\n  auto d_label = info.labels.View(ctx->Device()).Slice(linalg::All(), 0);\n  predt.SetDevice(ctx->Device());\n  auto d_predt = linalg::MakeTensorView(ctx, predt.ConstDeviceSpan(), predt.Size());\n\n  auto d_group_ptr = p_cache->DataGroupPtr(ctx);\n\n  auto d_inv_idcg = p_cache->InvIDCG(ctx);\n  auto d_sorted_idx = p_cache->SortedIdx(ctx, d_predt.Values());\n  auto d_out_dcg = p_cache->Dcg(ctx);\n\n  ltr::cuda_impl::CalcQueriesDCG(ctx, d_label, d_sorted_idx, p.ndcg_exp_gain, d_group_ptr, p.TopK(),\n                                 d_out_dcg);\n\n  auto it = dh::MakeTransformIterator<PackedReduceResult>(\n      thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) {\n        if (d_inv_idcg(i) <= 0.0) {\n          return PackedReduceResult{minus ? 0.0 : 1.0, static_cast<double>(d_weight[i])};\n        }\n        return PackedReduceResult{d_out_dcg(i) * d_inv_idcg(i) * d_weight[i],\n                                  static_cast<double>(d_weight[i])};\n      });\n  auto pair = thrust::reduce(ctx->CUDACtx()->CTP(), it, it + d_out_dcg.Size(),\n                             PackedReduceResult{0.0, 0.0});\n  return pair;\n}\n\nPackedReduceResult MAPScore(Context const *ctx, MetaInfo const &info,\n                            HostDeviceVector<float> const &predt, bool minus,\n                            std::shared_ptr<ltr::MAPCache> p_cache) {\n  auto d_group_ptr = p_cache->DataGroupPtr(ctx);\n  auto d_label = info.labels.View(ctx->Device()).Slice(linalg::All(), 0);\n\n  predt.SetDevice(ctx->Device());\n  auto d_rank_idx = p_cache->SortedIdx(ctx, predt.ConstDeviceSpan());\n  auto key_it = dh::MakeTransformIterator<std::size_t>(\n      thrust::make_counting_iterator(0ul),\n      [=] XGBOOST_DEVICE(std::size_t i) { return dh::SegmentId(d_group_ptr, i); });\n\n  auto get_label = [=] XGBOOST_DEVICE(std::size_t i) {\n    auto g = key_it[i];\n    auto g_begin = d_group_ptr[g];\n    auto g_end = d_group_ptr[g + 1];\n    i -= g_begin;\n    auto g_label = d_label.Slice(linalg::Range(g_begin, g_end));\n    auto g_rank = d_rank_idx.subspan(g_begin, g_end - g_begin);\n    return g_label(g_rank[i]);\n  };\n  auto it = dh::MakeTransformIterator<double>(thrust::make_counting_iterator(0ul), get_label);\n\n  auto cuctx = ctx->CUDACtx();\n  auto n_rel = p_cache->NumRelevant(ctx);\n  thrust::inclusive_scan_by_key(cuctx->CTP(), key_it, key_it + d_label.Size(), it, n_rel.data());\n\n  double topk = p_cache->Param().TopK();\n  auto map = p_cache->Map(ctx);\n  thrust::fill_n(cuctx->CTP(), map.data(), map.size(), 0.0);\n  {\n    auto val_it = dh::MakeTransformIterator<double>(\n        thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) {\n          auto g = key_it[i];\n          auto g_begin = d_group_ptr[g];\n          auto g_end = d_group_ptr[g + 1];\n          i -= g_begin;\n          if (i >= topk) {\n            return 0.0;\n          }\n\n          auto g_label = d_label.Slice(linalg::Range(g_begin, g_end));\n          auto g_rank = d_rank_idx.subspan(g_begin, g_end - g_begin);\n          auto label = g_label(g_rank[i]);\n\n          auto g_n_rel = n_rel.subspan(g_begin, g_end - g_begin);\n          auto nhits = g_n_rel[i];\n          return nhits / static_cast<double>(i + 1) * label;\n        });\n\n    std::size_t bytes;\n    dh::safe_cuda(cub::DeviceSegmentedReduce::Sum(nullptr, bytes, val_it, map.data(),\n                                                  p_cache->Groups(), d_group_ptr.data(),\n                                                  d_group_ptr.data() + 1, cuctx->Stream()));\n    dh::TemporaryArray<char> temp(bytes);\n    dh::safe_cuda(cub::DeviceSegmentedReduce::Sum(temp.data().get(), bytes, val_it, map.data(),\n                                                  p_cache->Groups(), d_group_ptr.data(),\n                                                  d_group_ptr.data() + 1, cuctx->Stream()));\n  }\n\n  PackedReduceResult result{0.0, 0.0};\n  {\n    auto d_weight = common::MakeOptionalWeights(ctx->Device(), info.weights_);\n    if (!d_weight.Empty()) {\n      CHECK_EQ(d_weight.weights.size(), p_cache->Groups());\n    }\n    auto val_it = dh::MakeTransformIterator<PackedReduceResult>(\n        thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t g) {\n          auto g_begin = d_group_ptr[g];\n          auto g_end = d_group_ptr[g + 1];\n          auto g_n_rel = n_rel.subspan(g_begin, g_end - g_begin);\n          if (!g_n_rel.empty() && g_n_rel.back() > 0.0) {\n            return PackedReduceResult{map[g] * d_weight[g] / std::min(g_n_rel.back(), topk),\n                                      static_cast<double>(d_weight[g])};\n          }\n          return PackedReduceResult{minus ? 0.0 : 1.0, static_cast<double>(d_weight[g])};\n        });\n    result =\n        thrust::reduce(cuctx->CTP(), val_it, val_it + map.size(), PackedReduceResult{0.0, 0.0});\n  }\n  return result;\n}\n}  // namespace cuda_impl\n}  // namespace xgboost::metric\n"
  },
  {
    "path": "src/metric/rank_metric.h",
    "content": "#ifndef XGBOOST_METRIC_RANK_METRIC_H_\n#define XGBOOST_METRIC_RANK_METRIC_H_\n/**\n * Copyright 2023 by XGBoost Contributors\n */\n#include <memory>  // for shared_ptr\n\n#include \"../common/common.h\"            // for AssertGPUSupport\n#include \"../common/ranking_utils.h\"     // for NDCGCache, MAPCache\n#include \"metric_common.h\"               // for PackedReduceResult\n#include \"xgboost/context.h\"             // for Context\n#include \"xgboost/data.h\"                // for MetaInfo\n#include \"xgboost/host_device_vector.h\"  // for HostDeviceVector\n\nnamespace xgboost::metric::cuda_impl {\nPackedReduceResult NDCGScore(Context const *ctx, MetaInfo const &info,\n                             HostDeviceVector<float> const &predt, bool minus,\n                             std::shared_ptr<ltr::NDCGCache> p_cache);\n\nPackedReduceResult MAPScore(Context const *ctx, MetaInfo const &info,\n                            HostDeviceVector<float> const &predt, bool minus,\n                            std::shared_ptr<ltr::MAPCache> p_cache);\n\nPackedReduceResult PreScore(Context const *ctx, MetaInfo const &info,\n                            HostDeviceVector<float> const &predt,\n                            std::shared_ptr<ltr::PreCache> p_cache);\n\n#if !defined(XGBOOST_USE_CUDA)\ninline PackedReduceResult NDCGScore(Context const *, MetaInfo const &,\n                                    HostDeviceVector<float> const &, bool,\n                                    std::shared_ptr<ltr::NDCGCache>) {\n  common::AssertGPUSupport();\n  return {};\n}\n\ninline PackedReduceResult MAPScore(Context const *, MetaInfo const &,\n                                   HostDeviceVector<float> const &, bool,\n                                   std::shared_ptr<ltr::MAPCache>) {\n  common::AssertGPUSupport();\n  return {};\n}\n\ninline PackedReduceResult PreScore(Context const *, MetaInfo const &,\n                                   HostDeviceVector<float> const &,\n                                   std::shared_ptr<ltr::PreCache>) {\n  common::AssertGPUSupport();\n  return {};\n}\n#endif\n}  // namespace xgboost::metric::cuda_impl\n#endif  // XGBOOST_METRIC_RANK_METRIC_H_\n"
  },
  {
    "path": "src/metric/survival_metric.cc",
    "content": "/*!\n * Copyright 2019-2020 by Contributors\n * \\file survival_metric.cc\n * \\brief Metrics for survival analysis\n * \\author Avinash Barnwal, Hyunsu Cho and Toby Hocking\n */\n\n// Dummy file to keep the CUDA conditional compile trick.\n#if !defined(XGBOOST_USE_CUDA)\n#include \"survival_metric.cu\"\n#endif  // !defined(XGBOOST_USE_CUDA)\n"
  },
  {
    "path": "src/metric/survival_metric.cu",
    "content": "/**\n * Copyright 2019-2024, Contributors\n * \\file survival_metric.cu\n * \\brief Metrics for survival analysis\n * \\author Avinash Barnwal, Hyunsu Cho and Toby Hocking\n */\n\n#include <dmlc/registry.h>\n\n#include <array>\n#include <memory>\n#include <numeric>  // for accumulate\n#include <vector>\n\n#include \"../common/survival_util.h\"\n#include \"../common/threading_utils.h\"\n#include \"metric_common.h\"  // MetricNoCache\n#include \"xgboost/host_device_vector.h\"\n#include \"xgboost/json.h\"\n#include \"xgboost/metric.h\"\n\n#if defined(XGBOOST_USE_CUDA)\n#include \"../common/cuda_context.cuh\"  // for CUDAContext\n#endif  // XGBOOST_USE_CUDA\n\nusing AFTParam = xgboost::common::AFTParam;\nusing ProbabilityDistributionType = xgboost::common::ProbabilityDistributionType;\ntemplate <typename Distribution>\nusing AFTLoss = xgboost::common::AFTLoss<Distribution>;\n\nnamespace xgboost::metric {\n// tag the this file, used by force static link later.\nDMLC_REGISTRY_FILE_TAG(survival_metric);\n\ntemplate <typename EvalRow>\nclass ElementWiseSurvivalMetricsReduction {\n public:\n  ElementWiseSurvivalMetricsReduction() = default;\n  void Configure(EvalRow policy) {\n    policy_ = policy;\n  }\n\n  [[nodiscard]] PackedReduceResult CpuReduceMetrics(\n      const HostDeviceVector<bst_float>& weights,\n      const HostDeviceVector<bst_float>& labels_lower_bound,\n      const HostDeviceVector<bst_float>& labels_upper_bound,\n      const HostDeviceVector<bst_float>& preds, int32_t n_threads) const {\n    size_t ndata = labels_lower_bound.Size();\n    CHECK_EQ(ndata, labels_upper_bound.Size());\n\n    const auto& h_labels_lower_bound = labels_lower_bound.HostVector();\n    const auto& h_labels_upper_bound = labels_upper_bound.HostVector();\n    const auto& h_weights = weights.HostVector();\n    const auto& h_preds = preds.HostVector();\n\n    std::vector<double> score_tloc(n_threads, 0.0);\n    std::vector<double> weight_tloc(n_threads, 0.0);\n\n    common::ParallelFor(ndata, n_threads, [&](size_t i) {\n      const double wt =\n          h_weights.empty() ? 1.0 : static_cast<double>(h_weights[i]);\n      auto t_idx = omp_get_thread_num();\n      score_tloc[t_idx] +=\n          policy_.EvalRow(static_cast<double>(h_labels_lower_bound[i]),\n                          static_cast<double>(h_labels_upper_bound[i]),\n                          static_cast<double>(h_preds[i])) *\n          wt;\n      weight_tloc[t_idx] += wt;\n    });\n\n    double residue_sum = std::accumulate(score_tloc.cbegin(), score_tloc.cend(), 0.0);\n    double weights_sum = std::accumulate(weight_tloc.cbegin(), weight_tloc.cend(), 0.0);\n\n    PackedReduceResult res{residue_sum, weights_sum};\n    return res;\n  }\n\n#if defined(XGBOOST_USE_CUDA)\n\n  PackedReduceResult DeviceReduceMetrics(Context const* ctx,\n                                         const HostDeviceVector<bst_float>& weights,\n                                         const HostDeviceVector<bst_float>& labels_lower_bound,\n                                         const HostDeviceVector<bst_float>& labels_upper_bound,\n                                         const HostDeviceVector<bst_float>& preds) {\n    size_t ndata = labels_lower_bound.Size();\n    CHECK_EQ(ndata, labels_upper_bound.Size());\n\n    thrust::counting_iterator<size_t> begin(0);\n    thrust::counting_iterator<size_t> end = begin + ndata;\n\n    auto s_label_lower_bound = labels_lower_bound.DeviceSpan();\n    auto s_label_upper_bound = labels_upper_bound.DeviceSpan();\n    auto s_preds = preds.DeviceSpan();\n    auto s_weights = weights.DeviceSpan();\n\n    const bool is_null_weight = (weights.Size() == 0);\n\n    auto d_policy = policy_;\n\n    PackedReduceResult result = thrust::transform_reduce(\n        ctx->CUDACtx()->CTP(), begin, end,\n        [=] XGBOOST_DEVICE(size_t idx) {\n          double weight = is_null_weight ? 1.0 : static_cast<double>(s_weights[idx]);\n          double residue = d_policy.EvalRow(static_cast<double>(s_label_lower_bound[idx]),\n                                            static_cast<double>(s_label_upper_bound[idx]),\n                                            static_cast<double>(s_preds[idx]));\n          residue *= weight;\n          return PackedReduceResult{residue, weight};\n        },\n        PackedReduceResult(), thrust::plus<PackedReduceResult>());\n\n    return result;\n  }\n\n#endif  // XGBOOST_USE_CUDA\n\n  PackedReduceResult Reduce(Context const* ctx, const HostDeviceVector<bst_float>& weights,\n                            const HostDeviceVector<bst_float>& labels_lower_bound,\n                            const HostDeviceVector<bst_float>& labels_upper_bound,\n                            const HostDeviceVector<bst_float>& preds) {\n    PackedReduceResult result;\n\n    if (ctx->IsCPU()) {\n      result =\n          CpuReduceMetrics(weights, labels_lower_bound, labels_upper_bound, preds, ctx->Threads());\n    }\n#if defined(XGBOOST_USE_CUDA)\n    else {  // NOLINT\n      preds.SetDevice(ctx->Device());\n      labels_lower_bound.SetDevice(ctx->Device());\n      labels_upper_bound.SetDevice(ctx->Device());\n      weights.SetDevice(ctx->Device());\n\n      dh::safe_cuda(cudaSetDevice(ctx->Ordinal()));\n      result = DeviceReduceMetrics(ctx, weights, labels_lower_bound, labels_upper_bound, preds);\n    }\n#endif  // defined(XGBOOST_USE_CUDA)\n    return result;\n  }\n\n private:\n  EvalRow policy_;\n};\n\nstruct EvalIntervalRegressionAccuracy {\n  void Configure(const Args&) {}\n\n  [[nodiscard]] const char* Name() const {\n    return \"interval-regression-accuracy\";\n  }\n\n  XGBOOST_DEVICE double EvalRow(\n      double label_lower_bound, double label_upper_bound, double log_pred) const {\n    const double pred = exp(log_pred);\n    return (pred >= label_lower_bound && pred <= label_upper_bound) ? 1.0 : 0.0;\n  }\n\n  static double GetFinal(double esum, double wsum) {\n    return wsum == 0 ? esum : esum / wsum;\n  }\n};\n\n/*! \\brief Negative log likelihood of Accelerated Failure Time model */\ntemplate <typename Distribution>\nstruct EvalAFTNLogLik {\n  void Configure(const Args& args) {\n    param_.UpdateAllowUnknown(args);\n  }\n\n  [[nodiscard]] const char* Name() const {\n    return \"aft-nloglik\";\n  }\n\n  XGBOOST_DEVICE double EvalRow(\n      double label_lower_bound, double label_upper_bound, double pred) const {\n    return AFTLoss<Distribution>::Loss(\n        label_lower_bound, label_upper_bound, pred, param_.aft_loss_distribution_scale);\n  }\n\n  static double GetFinal(double esum, double wsum) {\n    return wsum == 0 ? esum : esum / wsum;\n  }\n private:\n  AFTParam param_;\n};\n\ntemplate <typename Policy>\nstruct EvalEWiseSurvivalBase : public MetricNoCache {\n  explicit EvalEWiseSurvivalBase(Context const* ctx) { ctx_ = ctx; }\n  EvalEWiseSurvivalBase() = default;\n\n  void Configure(const Args& args) override {\n    policy_.Configure(args);\n    reducer_.Configure(policy_);\n    CHECK(ctx_);\n  }\n\n  double Eval(const HostDeviceVector<float>& preds, const MetaInfo& info) override {\n    CHECK_EQ(preds.Size(), info.labels_lower_bound_.Size());\n    CHECK_EQ(preds.Size(), info.labels_upper_bound_.Size());\n    CHECK(ctx_);\n    auto result = reducer_.Reduce(ctx_, info.weights_, info.labels_lower_bound_,\n                                  info.labels_upper_bound_, preds);\n\n    std::array<double, 2> dat{result.Residue(), result.Weights()};\n    auto rc = collective::GlobalSum(ctx_, info, linalg::MakeVec(dat.data(), dat.size()));\n    collective::SafeColl(rc);\n    return Policy::GetFinal(dat[0], dat[1]);\n  }\n\n  [[nodiscard]] const char* Name() const override {\n    return policy_.Name();\n  }\n\n private:\n  Policy policy_;\n  ElementWiseSurvivalMetricsReduction<Policy> reducer_;\n  int device_{-1};  // used only for GPU metric\n};\n\n// This class exists because we want to perform dispatch according to the distribution type at\n// configuration time, not at prediction time.\nstruct AFTNLogLikDispatcher : public MetricNoCache {\n  [[nodiscard]] const char* Name() const override {\n    return \"aft-nloglik\";\n  }\n\n  double Eval(const HostDeviceVector<bst_float>& preds, const MetaInfo& info) override {\n    CHECK(metric_) << \"AFT metric must be configured first, with distribution type and scale\";\n    return metric_->Eval(preds, info);\n  }\n\n  void Configure(const Args& args) override {\n    param_.UpdateAllowUnknown(args);\n    switch (param_.aft_loss_distribution) {\n    case common::ProbabilityDistributionType::kNormal:\n      metric_.reset(new EvalEWiseSurvivalBase<EvalAFTNLogLik<common::NormalDistribution>>(ctx_));\n      break;\n    case common::ProbabilityDistributionType::kLogistic:\n      metric_.reset(new EvalEWiseSurvivalBase<EvalAFTNLogLik<common::LogisticDistribution>>(ctx_));\n      break;\n    case common::ProbabilityDistributionType::kExtreme:\n      metric_.reset(new EvalEWiseSurvivalBase<EvalAFTNLogLik<common::ExtremeDistribution>>(ctx_));\n      break;\n    default:\n      LOG(FATAL) << \"Unknown probability distribution\";\n    }\n    metric_->Configure(args);\n  }\n\n  void SaveConfig(Json* p_out) const override {\n    auto& out = *p_out;\n    out[\"name\"] = String(this->Name());\n    out[\"aft_loss_param\"] = ToJson(param_);\n  }\n\n  void LoadConfig(const Json& in) override {\n    FromJson(in[\"aft_loss_param\"], &param_);\n  }\n\n private:\n  AFTParam param_;\n  std::unique_ptr<MetricNoCache> metric_;\n};\n\nXGBOOST_REGISTER_METRIC(AFTNLogLik, \"aft-nloglik\")\n    .describe(\"Negative log likelihood of Accelerated Failure Time model.\")\n    .set_body([](const char*) { return new AFTNLogLikDispatcher(); });\n\nXGBOOST_REGISTER_METRIC(IntervalRegressionAccuracy, \"interval-regression-accuracy\")\n    .describe(\"\")\n    .set_body([](const char*) {\n      return new EvalEWiseSurvivalBase<EvalIntervalRegressionAccuracy>();\n    });\n\n}  // namespace xgboost::metric\n"
  },
  {
    "path": "src/objective/adaptive.cc",
    "content": "/**\n * Copyright 2022-2026, XGBoost Contributors\n */\n#include \"adaptive.h\"\n\n#include <algorithm>  // for transform,find_if,copy,unique,max\n#include <cmath>      // std::isnan\n#include <cstddef>    // std::size_t\n#include <iterator>   // std::distance\n#include <vector>     // std::vector\n\n#include \"../common/algorithm.h\"           // ArgSort\n#include \"../common/linalg_op.h\"           // for VecScaMul\n#include \"../common/numeric.h\"             // RunLengthEncode\n#include \"../common/stats.h\"               // Quantile,WeightedQuantile\n#include \"../common/threading_utils.h\"     // ParallelFor\n#include \"../common/transform_iterator.h\"  // MakeIndexTransformIter\n#include \"../tree/sample_position.h\"       // for SamplePosition\n#include \"../tree/tree_view.h\"             // for WalkTree\n#include \"xgboost/base.h\"                  // bst_node_t\n#include \"xgboost/context.h\"               // Context\n#include \"xgboost/data.h\"                  // MetaInfo\n#include \"xgboost/host_device_vector.h\"    // HostDeviceVector\n#include \"xgboost/linalg.h\"                // MakeTensorView\n#include \"xgboost/span.h\"                  // Span\n#include \"xgboost/tree_model.h\"            // RegTree\n\n#if !defined(XGBOOST_USE_CUDA)\n#include \"../common/common.h\"  // AssertGPUSupport\n#endif                         // !defined(XGBOOST_USE_CUDA)\n\nnamespace xgboost::obj {\nvoid EncodeTreeLeafHost(Context const* ctx, RegTree const& tree,\n                        std::vector<bst_node_t> const& position, std::vector<size_t>* p_nptr,\n                        std::vector<bst_node_t>* p_nidx, std::vector<size_t>* p_ridx) {\n  auto& nptr = *p_nptr;\n  auto& nidx = *p_nidx;\n  auto& ridx = *p_ridx;\n  ridx = common::ArgSort<size_t>(ctx, position.cbegin(), position.cend());\n  std::vector<bst_node_t> sorted_pos(position);\n  // permutation\n  for (size_t i = 0; i < position.size(); ++i) {\n    sorted_pos[i] = position[ridx[i]];\n  }\n  // find the first non-sampled row\n  size_t begin_pos = std::distance(\n      sorted_pos.cbegin(),\n      std::find_if(sorted_pos.cbegin(), sorted_pos.cend(),\n                   [](bst_node_t nidx) { return tree::SamplePosition::IsValid(nidx); }));\n  CHECK_LE(begin_pos, sorted_pos.size());\n\n  std::vector<bst_node_t> leaf;\n  tree::WalkTree(tree, [&](auto const& tree, bst_node_t nidx) {\n    if (tree.IsLeaf(nidx)) {\n      leaf.push_back(nidx);\n    }\n    return true;\n  });\n\n  if (begin_pos == sorted_pos.size()) {\n    nidx = leaf;\n    return;\n  }\n\n  auto beg_it = sorted_pos.begin() + begin_pos;\n  common::RunLengthEncode(beg_it, sorted_pos.end(), &nptr);\n  CHECK_GT(nptr.size(), 0);\n  // skip the sampled rows in indptr\n  std::transform(nptr.begin(), nptr.end(), nptr.begin(),\n                 [begin_pos](size_t ptr) { return ptr + begin_pos; });\n\n  size_t n_leaf = nptr.size() - 1;\n  auto n_unique = std::unique(beg_it, sorted_pos.end()) - beg_it;\n  CHECK_EQ(n_unique, n_leaf);\n  nidx.resize(n_leaf);\n  std::copy(beg_it, beg_it + n_unique, nidx.begin());\n\n  if (n_leaf != leaf.size()) {\n    detail::FillMissingLeaf(leaf, &nidx, &nptr);\n  }\n}\n\nnamespace cpu_impl {\nnamespace {\n[[nodiscard]] std::int32_t AllocThreads(Context const* ctx, std::vector<size_t> const& h_node_ptr) {\n  // A heuristic to use parallel sort. If we use multiple threads here, the sorting is\n  // performed using a single thread as openmp cannot allocate new threads inside a\n  // parallel region.\n  std::int32_t n_threads;\n  if constexpr (kHasParallelStableSort) {\n    CHECK_GE(h_node_ptr.size(), 1);\n    auto it = common::MakeIndexTransformIter(\n        [&](std::size_t i) { return h_node_ptr[i + 1] - h_node_ptr[i]; });\n    n_threads = std::any_of(it, it + h_node_ptr.size() - 1,\n                            [](auto n) {\n                              constexpr std::size_t kNeedParallelSort = 1ul << 19;\n                              return n > kNeedParallelSort;\n                            })\n                    ? 1\n                    : ctx->Threads();\n  } else {\n    n_threads = ctx->Threads();\n  }\n  return n_threads;\n}\n}  // namespace\n\nvoid UpdateTreeLeaf(Context const* ctx, std::vector<bst_node_t> const& position,\n                    bst_target_t group_idx, MetaInfo const& info, float learning_rate,\n                    HostDeviceVector<float> const& predt, std::vector<float> const& alphas,\n                    RegTree* p_tree) {\n  std::vector<bst_node_t> nidx;\n  std::vector<size_t> nptr;\n  std::vector<size_t> ridx;\n  EncodeTreeLeafHost(ctx, *p_tree, position, &nptr, &nidx, &ridx);\n  std::size_t n_leaves = nidx.size();\n  std::size_t n_alphas = alphas.size();\n  if (nptr.empty()) {\n    std::vector<float> quantiles;\n    detail::UpdateLeafValues(ctx, &quantiles, nidx, info, learning_rate, p_tree);\n    return;\n  }\n\n  CHECK(!position.empty());\n  std::vector<float> quantiles(n_leaves * n_alphas, 0.0f);\n  std::vector<bst_node_t> n_valids(n_leaves, 0);\n\n  auto h_quantiles = linalg::MakeTensorView(ctx, common::Span{quantiles}, n_leaves, n_alphas);\n  CHECK_LE(nptr.back(), info.num_row_);\n  auto h_predt = linalg::MakeTensorView(ctx, predt.ConstHostSpan(), info.num_row_,\n                                        predt.Size() / info.num_row_);\n  if (p_tree->IsMultiTarget()) {\n    CHECK_EQ(h_predt.Shape(1), alphas.size());\n  }\n  std::int32_t n_threads = AllocThreads(ctx, nptr);\n\n  collective::ApplyWithLabels(\n      ctx, info, static_cast<void*>(quantiles.data()), quantiles.size() * sizeof(float), [&] {\n        // Loop over each leaf\n        common::ParallelFor(n_leaves, n_threads, [&](auto k) {\n          CHECK_LT(k + 1, nptr.size());\n          size_t n = nptr[k + 1] - nptr[k];\n          auto h_row_set = common::Span<size_t const>{ridx}.subspan(nptr[k], n);\n\n          linalg::MatrixView<float const> h_labels = info.labels.HostView();\n          auto h_weights = linalg::MakeVec(&info.weights_);\n          // Loop over each target (quantile).\n          for (std::size_t alpha_idx = 0; alpha_idx < n_alphas; ++alpha_idx) {\n            // If it's vector-leaf, group_idx is 0, alpha_idx is used. Otherwise,\n            // alpha_idx is 0, the group idx is used.\n            auto predt_idx = std::max(alpha_idx, static_cast<std::size_t>(group_idx));\n            // label is a single column for quantile regression, but it's a matrix for MAE.\n            auto y_idx = std::max(alpha_idx, static_cast<std::size_t>(group_idx));\n            y_idx = std::min(y_idx, h_labels.Shape(1) - 1);\n            auto iter = common::MakeIndexTransformIter([&](std::size_t i) -> float {\n              auto row_idx = h_row_set[i];\n              return h_labels(row_idx, y_idx) - h_predt(row_idx, predt_idx);\n            });\n            auto w_it = common::MakeIndexTransformIter([&](std::size_t i) -> float {\n              auto row_idx = h_row_set[i];\n              return h_weights(row_idx);\n            });\n            auto alpha = alphas[alpha_idx];\n\n            float q{0};\n            if (info.weights_.Empty()) {\n              q = common::Quantile(ctx, alpha, iter, iter + h_row_set.size());\n            } else {\n              q = common::WeightedQuantile(ctx, alpha, iter, iter + h_row_set.size(), w_it);\n            }\n            if (std::isnan(q)) {\n              CHECK(h_row_set.empty());\n            }\n            h_quantiles(k, alpha_idx) = q;\n          }\n        });\n      });\n\n  if (p_tree->IsMultiTarget()) {\n    linalg::VecScaMul(ctx, linalg::MakeVec(ctx->Device(), common::Span{quantiles}), learning_rate);\n    p_tree->SetLeaves(nidx, common::Span{quantiles});\n  } else {\n    detail::UpdateLeafValues(ctx, &quantiles, nidx, info, learning_rate, p_tree);\n  }\n}\n}  // namespace cpu_impl\n\nnamespace cuda_impl {\n#if !defined(XGBOOST_USE_CUDA)\nvoid UpdateTreeLeaf(Context const*, common::Span<bst_node_t const>, bst_target_t, MetaInfo const&,\n                    float, HostDeviceVector<float> const&, std::vector<float> const&, RegTree*) {\n  common::AssertGPUSupport();\n}\n#endif  // !defined(XGBOOST_USE_CUDA)\n}  // namespace cuda_impl\n}  // namespace xgboost::obj\n"
  },
  {
    "path": "src/objective/adaptive.cu",
    "content": "/**\n * Copyright 2022-2026, XGBoost Contributors\n */\n#include <thrust/sort.h>\n\n#include <cub/cub.cuh>         // NOLINT\n\n#include \"../collective/aggregator.h\"\n#include \"../common/cuda_context.cuh\"  // CUDAContext\n#include \"../common/cuda_stream.h\"     // for Event, Stream\n#include \"../common/device_helpers.cuh\"\n#include \"../common/linalg_op.h\"  // for VecScaMul\n#include \"../common/stats.cuh\"\n#include \"../tree/sample_position.h\"  // for SamplePosition\n#include \"../tree/tree_view.h\"        // for WalkTree\n#include \"adaptive.h\"\n#include \"xgboost/context.h\"\n\nnamespace xgboost::obj {\nvoid EncodeTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> position,\n                          dh::device_vector<size_t>* p_ridx, HostDeviceVector<size_t>* p_nptr,\n                          HostDeviceVector<bst_node_t>* p_nidx, RegTree const& tree) {\n  // copy position to buffer\n  dh::safe_cuda(cudaSetDevice(ctx->Ordinal()));\n  auto cuctx = ctx->CUDACtx();\n  size_t n_samples = position.size();\n  dh::device_vector<bst_node_t> sorted_position(position.size());\n  dh::safe_cuda(cudaMemcpyAsync(sorted_position.data().get(), position.data(),\n                                position.size_bytes(), cudaMemcpyDeviceToDevice, cuctx->Stream()));\n\n  p_ridx->resize(position.size());\n  dh::Iota(dh::ToSpan(*p_ridx), cuctx->Stream());\n  // sort row index according to node index\n  thrust::stable_sort_by_key(cuctx->TP(), sorted_position.begin(),\n                             sorted_position.begin() + n_samples, p_ridx->begin());\n  // Find the first one that's not sampled (nidx not been negated).\n  size_t beg_pos = thrust::find_if(cuctx->CTP(), sorted_position.cbegin(), sorted_position.cend(),\n                                   [] XGBOOST_DEVICE(bst_node_t nidx) {\n                                     return tree::SamplePosition::IsValid(nidx);\n                                   }) -\n                   sorted_position.cbegin();\n  if (beg_pos == sorted_position.size()) {\n    auto& leaf = p_nidx->HostVector();\n    tree::WalkTree(tree, [&](auto const& tree, bst_node_t nidx) {\n      if (tree.IsLeaf(nidx)) {\n        leaf.push_back(nidx);\n      }\n      return true;\n    });\n    return;\n  }\n\n  size_t n_leaf = tree.GetNumLeaves();\n  size_t max_n_unique = n_leaf;\n\n  dh::caching_device_vector<size_t> counts_out(max_n_unique + 1, 0);\n  auto d_counts_out = dh::ToSpan(counts_out).subspan(0, max_n_unique);\n  auto d_num_runs_out = dh::ToSpan(counts_out).subspan(max_n_unique, 1);\n  dh::caching_device_vector<bst_node_t> unique_out(max_n_unique, 0);\n  auto d_unique_out = dh::ToSpan(unique_out);\n\n  size_t nbytes{0};\n  auto begin_it = sorted_position.begin() + beg_pos;\n  dh::safe_cuda(cub::DeviceRunLengthEncode::Encode(\n      nullptr, nbytes, begin_it, unique_out.data().get(), counts_out.data().get(),\n      d_num_runs_out.data(), n_samples - beg_pos, ctx->CUDACtx()->Stream()));\n  dh::TemporaryArray<char> temp(nbytes);\n  dh::safe_cuda(cub::DeviceRunLengthEncode::Encode(\n      temp.data().get(), nbytes, begin_it, unique_out.data().get(), counts_out.data().get(),\n      d_num_runs_out.data(), n_samples - beg_pos, ctx->CUDACtx()->Stream()));\n\n  dh::PinnedMemory pinned_pool;\n  auto pinned = pinned_pool.GetSpan<char>(sizeof(size_t) + sizeof(bst_node_t));\n  curt::Stream copy_stream;\n  size_t* h_num_runs = reinterpret_cast<size_t*>(pinned.subspan(0, sizeof(size_t)).data());\n\n  curt::Event e;\n  e.Record(cuctx->Stream());\n  copy_stream.View().Wait(e);\n  // flag for whether there's ignored position\n  bst_node_t* h_first_unique =\n      reinterpret_cast<bst_node_t*>(pinned.subspan(sizeof(size_t), sizeof(bst_node_t)).data());\n  dh::safe_cuda(cudaMemcpyAsync(h_num_runs, d_num_runs_out.data(), sizeof(size_t),\n                                cudaMemcpyDeviceToHost, copy_stream.View()));\n  dh::safe_cuda(cudaMemcpyAsync(h_first_unique, d_unique_out.data(), sizeof(bst_node_t),\n                                cudaMemcpyDeviceToHost, copy_stream.View()));\n\n  /**\n   * copy node index (leaf index)\n   */\n  auto& nidx = *p_nidx;\n  auto& nptr = *p_nptr;\n  nidx.SetDevice(ctx->Device());\n  nidx.Resize(n_leaf);\n  auto d_node_idx = nidx.DeviceSpan();\n\n  nptr.SetDevice(ctx->Device());\n  nptr.Resize(n_leaf + 1, 0);\n  auto d_node_ptr = nptr.DeviceSpan();\n\n  dh::LaunchN(n_leaf, [=] XGBOOST_DEVICE(size_t i) {\n    if (i >= d_num_runs_out[0]) {\n      // d_num_runs_out <= max_n_unique\n      // this omits all the leaf that are empty. A leaf can be empty when there's\n      // missing data, which can be caused by sparse input and distributed training.\n      return;\n    }\n    d_node_idx[i] = d_unique_out[i];\n    d_node_ptr[i + 1] = d_counts_out[i];\n    if (i == 0) {\n      d_node_ptr[0] = beg_pos;\n    }\n  });\n  thrust::inclusive_scan(cuctx->CTP(), dh::tbegin(d_node_ptr), dh::tend(d_node_ptr),\n                         dh::tbegin(d_node_ptr));\n  copy_stream.View().Sync();\n  CHECK_GT(*h_num_runs, 0);\n  CHECK_LE(*h_num_runs, n_leaf);\n\n  if (*h_num_runs < n_leaf) {\n    // shrink to omit the sampled nodes.\n    nptr.Resize(*h_num_runs + 1);\n    nidx.Resize(*h_num_runs);\n\n    std::vector<bst_node_t> leaves;\n    tree::WalkTree(tree, [&](auto const& tree, bst_node_t nidx) {\n      if (tree.IsLeaf(nidx)) {\n        leaves.push_back(nidx);\n      }\n      return true;\n    });\n    CHECK_EQ(leaves.size(), n_leaf);\n    // Fill all the leaves that don't have any sample. This is hacky and inefficient. An\n    // alternative is to leave the objective to handle missing leaf, which is more messy\n    // as we need to take other distributed workers into account.\n    auto& h_nidx = nidx.HostVector();\n    auto& h_nptr = nptr.HostVector();\n    detail::FillMissingLeaf(leaves, &h_nidx, &h_nptr);\n    nidx.DevicePointer();\n    nptr.DevicePointer();\n  }\n  CHECK_EQ(nidx.Size(), n_leaf);\n  CHECK_EQ(nptr.Size(), n_leaf + 1);\n}\n\nnamespace cuda_impl {\nvoid UpdateTreeLeaf(Context const* ctx, common::Span<bst_node_t const> position,\n                    bst_target_t group_idx, MetaInfo const& info, float learning_rate,\n                    HostDeviceVector<float> const& predt, std::vector<float> const& h_alphas,\n                    RegTree* p_tree) {\n  dh::safe_cuda(cudaSetDevice(ctx->Ordinal()));\n  dh::device_vector<size_t> ridx;\n  HostDeviceVector<size_t> nptr;\n  HostDeviceVector<bst_node_t> nidx;\n\n  EncodeTreeLeafDevice(ctx, position, &ridx, &nptr, &nidx, *p_tree);\n\n  if (nptr.Empty()) {\n    std::vector<float> quantiles;\n    detail::UpdateLeafValues(ctx, &quantiles, nidx.ConstHostVector(), info, learning_rate, p_tree);\n  }\n\n  predt.SetDevice(ctx->Device());\n  auto d_predt = linalg::MakeTensorView(ctx, predt.ConstDeviceSpan(), info.num_row_,\n                                        predt.Size() / info.num_row_);\n  CHECK_LT(group_idx, d_predt.Shape(1));\n  if (p_tree->IsMultiTarget()) {\n    CHECK_EQ(d_predt.Shape(1), h_alphas.size());\n  }\n  HostDeviceVector<float> quantiles;\n\n  auto d_row_index = dh::ToSpan(ridx);\n  // node segments\n  auto seg_beg = nptr.ConstDevicePointer();\n  auto seg_end = seg_beg + nptr.Size();\n  CHECK_EQ(nidx.Size() + 1, nptr.Size());\n\n  collective::ApplyWithLabels(ctx, info, &quantiles, [&] {\n    auto d_labels = info.labels.View(ctx->Device());\n\n    auto values = [=] XGBOOST_DEVICE(std::size_t i, std::size_t j) {\n      // If it's vector-leaf, group_idx is 0, j is used. Otherwise, j is 0, group idx is used.\n      auto p_idx = cuda::std::max(j, static_cast<std::size_t>(group_idx));\n      auto p = d_predt(d_row_index[i], p_idx);\n      // label is a single column for quantile regression, but it's a matrix for MAE.\n      auto y_idx = cuda::std::max(j, static_cast<std::size_t>(group_idx));\n      y_idx = cuda::std::min(y_idx, d_labels.Shape(1) - 1);\n      auto y = d_labels(d_row_index[i], y_idx);\n      return y - p;\n    };\n    CHECK_EQ(d_labels.Shape(0), position.size());\n\n    if (info.weights_.Empty()) {\n      common::SegmentedQuantile(ctx, h_alphas, seg_beg, seg_end, values, info.num_row_, &quantiles);\n    } else {\n      info.weights_.SetDevice(ctx->Device());\n      auto d_weights = info.weights_.ConstDeviceSpan();\n      CHECK_EQ(d_weights.size(), d_row_index.size());\n      auto w_it =\n          thrust::make_permutation_iterator(dh::tcbegin(d_weights), dh::tcbegin(d_row_index));\n      common::SegmentedWeightedQuantile(ctx, h_alphas, seg_beg, seg_end, values, w_it,\n                                        w_it + d_weights.size(), &quantiles);\n    }\n  });\n\n  if (p_tree->IsMultiTarget()) {\n    linalg::VecScaMul(ctx, linalg::MakeVec(ctx->Device(), quantiles.DeviceSpan()), learning_rate);\n    p_tree->SetLeaves(nidx.ConstHostVector(), quantiles.ConstHostSpan());\n  } else {\n    detail::UpdateLeafValues(ctx, &quantiles.HostVector(), nidx.ConstHostVector(), info,\n                             learning_rate, p_tree);\n  }\n}\n}  // namespace cuda_impl\n}  // namespace xgboost::obj\n"
  },
  {
    "path": "src/objective/adaptive.h",
    "content": "/**\n * Copyright 2022-2026, XGBoost Contributors\n */\n#pragma once\n\n#include <algorithm>\n#include <cstdint>  // std::int32_t\n#include <limits>\n#include <vector>  // std::vector\n\n#include \"../collective/aggregator.h\"\n#include \"xgboost/base.h\"                // for bst_node_t\n#include \"xgboost/context.h\"             // for Context\n#include \"xgboost/data.h\"                // MetaInfo\n#include \"xgboost/host_device_vector.h\"  // HostDeviceVector\n#include \"xgboost/tree_model.h\"          // RegTree\n\nnamespace xgboost::obj {\nnamespace detail {\ninline void FillMissingLeaf(std::vector<bst_node_t> const& maybe_missing,\n                            std::vector<bst_node_t>* p_nidx, std::vector<size_t>* p_nptr) {\n  auto& h_node_idx = *p_nidx;\n  auto& h_node_ptr = *p_nptr;\n\n  for (auto leaf : maybe_missing) {\n    if (std::binary_search(h_node_idx.cbegin(), h_node_idx.cend(), leaf)) {\n      continue;\n    }\n    auto it = std::upper_bound(h_node_idx.cbegin(), h_node_idx.cend(), leaf);\n    auto pos = it - h_node_idx.cbegin();\n    h_node_idx.insert(h_node_idx.cbegin() + pos, leaf);\n    h_node_ptr.insert(h_node_ptr.cbegin() + pos, h_node_ptr[pos]);\n  }\n}\n\ninline void UpdateLeafValues(Context const* ctx, std::vector<float>* p_quantiles,\n                             std::vector<bst_node_t> const& nidx, MetaInfo const& info,\n                             float learning_rate, RegTree* p_tree) {\n  auto& tree = *p_tree;\n  auto& quantiles = *p_quantiles;\n  auto const& h_node_idx = nidx;\n\n  bst_idx_t n_leaf = collective::GlobalMax(ctx, info, static_cast<bst_idx_t>(h_node_idx.size()));\n  CHECK(quantiles.empty() || quantiles.size() == n_leaf);\n  if (quantiles.empty()) {\n    quantiles.resize(n_leaf, std::numeric_limits<float>::quiet_NaN());\n  }\n\n  // number of workers that have valid quantiles\n  std::vector<int32_t> n_valids(quantiles.size());\n  std::transform(quantiles.cbegin(), quantiles.cend(), n_valids.begin(),\n                 [](float q) { return static_cast<int32_t>(!std::isnan(q)); });\n  auto rc = collective::GlobalSum(ctx, info, linalg::MakeVec(n_valids.data(), n_valids.size()));\n  collective::SafeColl(rc);\n\n  // convert to 0 for all reduce\n  std::replace_if(quantiles.begin(), quantiles.end(), [](float q) { return std::isnan(q); }, 0.f);\n  // use the mean value\n  rc = collective::GlobalSum(ctx, info, linalg::MakeVec(quantiles.data(), quantiles.size()));\n  collective::SafeColl(rc);\n\n  for (size_t i = 0; i < n_leaf; ++i) {\n    if (n_valids[i] > 0) {\n      quantiles[i] /= static_cast<float>(n_valids[i]);\n    } else {\n      // Use original leaf value if no worker can provide the quantile.\n      quantiles[i] = tree[h_node_idx[i]].LeafValue();\n    }\n  }\n\n  for (size_t i = 0; i < nidx.size(); ++i) {\n    auto nidx = h_node_idx[i];\n    auto q = quantiles[i];\n    CHECK(tree[nidx].IsLeaf());\n    tree[nidx].SetLeaf(q * learning_rate);\n  }\n}\n\ninline std::size_t IdxY(MetaInfo const& info, bst_group_t group_idx) {\n  std::size_t y_idx{0};\n  if (info.labels.Shape(1) > 1) {\n    y_idx = group_idx;\n  }\n  CHECK_LE(y_idx, info.labels.Shape(1));\n  return y_idx;\n}\n}  // namespace detail\n\nnamespace cpu_impl {\nvoid UpdateTreeLeaf(Context const* ctx, std::vector<bst_node_t> const& position,\n                    bst_target_t group_idx, MetaInfo const& info, float learning_rate,\n                    HostDeviceVector<float> const& predt, std::vector<float> const& alphas,\n                    RegTree* p_tree);\n}\n\nnamespace cuda_impl {\nvoid UpdateTreeLeaf(Context const* ctx, common::Span<bst_node_t const> position,\n                    bst_target_t group_idx, MetaInfo const& info, float learning_rate,\n                    HostDeviceVector<float> const& predt, std::vector<float> const& alphas,\n                    RegTree* p_tree);\n}\n\ninline void UpdateTreeLeaf(Context const* ctx, HostDeviceVector<bst_node_t> const& position,\n                           bst_target_t group_idx, MetaInfo const& info, float learning_rate,\n                           HostDeviceVector<float> const& predt, std::vector<float> const& alphas,\n                           RegTree* p_tree) {\n  if (ctx->IsCUDA()) {\n    position.SetDevice(ctx->Device());\n    cuda_impl::UpdateTreeLeaf(ctx, position.ConstDeviceSpan(), group_idx, info, learning_rate,\n                              predt, alphas, p_tree);\n  } else {\n    cpu_impl::UpdateTreeLeaf(ctx, position.ConstHostVector(), group_idx, info, learning_rate, predt,\n                             alphas, p_tree);\n  }\n}\n}  // namespace xgboost::obj\n"
  },
  {
    "path": "src/objective/aft_obj.cc",
    "content": "/*!\n * Copyright 2019-2020 by Contributors\n * \\file aft_obj.cc\n * \\brief Definition of AFT loss for survival analysis.\n * \\author Avinash Barnwal, Hyunsu Cho and Toby Hocking\n */\n\n// Dummy file to keep the CUDA conditional compile trick.\n\n#include <dmlc/registry.h>\nnamespace xgboost {\nnamespace obj {\n\nDMLC_REGISTRY_FILE_TAG(aft_obj);\n\n}  // namespace obj\n}  // namespace xgboost\n\n#ifndef XGBOOST_USE_CUDA\n#include \"aft_obj.cu\"\n#endif  // XGBOOST_USE_CUDA\n"
  },
  {
    "path": "src/objective/aft_obj.cu",
    "content": "/**\n * Copyright 2019-2025, XGBoost Contributors\n * \\file aft_obj.cu\n * \\brief Definition of AFT loss for survival analysis.\n * \\author Avinash Barnwal, Hyunsu Cho and Toby Hocking\n */\n\n#include <cmath>    // for log\n#include <cstddef>  // for size_t\n\n#include \"../common/linalg_op.h\"  // for ElementWiseKernel\n#include \"../common/survival_util.h\"\n#include \"../common/transform.h\"\n#include \"xgboost/host_device_vector.h\"\n#include \"xgboost/json.h\"\n#include \"xgboost/logging.h\"\n#include \"xgboost/objective.h\"\n#include \"xgboost/span.h\"\n\nusing AFTParam = xgboost::common::AFTParam;\nusing ProbabilityDistributionType = xgboost::common::ProbabilityDistributionType;\ntemplate <typename Distribution>\nusing AFTLoss = xgboost::common::AFTLoss<Distribution>;\n\nnamespace xgboost {\nnamespace obj {\n\n#if defined(XGBOOST_USE_CUDA)\nDMLC_REGISTRY_FILE_TAG(aft_obj_gpu);\n#endif  // defined(XGBOOST_USE_CUDA)\n\nclass AFTObj : public ObjFunction {\n public:\n  void Configure(Args const& args) override {\n    param_.UpdateAllowUnknown(args);\n  }\n\n  ObjInfo Task() const override { return ObjInfo::kSurvival; }\n\n  template <typename Distribution>\n  void GetGradientImpl(const HostDeviceVector<bst_float>& preds, const MetaInfo& info,\n                       linalg::Matrix<GradientPair>* out_gpair, size_t ndata, DeviceOrd device,\n                       bool is_null_weight, float aft_loss_distribution_scale) {\n    common::Transform<>::Init(\n        [=] XGBOOST_DEVICE(size_t _idx,\n        common::Span<GradientPair> _out_gpair,\n        common::Span<const bst_float> _preds,\n        common::Span<const bst_float> _labels_lower_bound,\n        common::Span<const bst_float> _labels_upper_bound,\n        common::Span<const bst_float> _weights) {\n      const double pred = static_cast<double>(_preds[_idx]);\n      const double label_lower_bound = static_cast<double>(_labels_lower_bound[_idx]);\n      const double label_upper_bound = static_cast<double>(_labels_upper_bound[_idx]);\n      const float grad = static_cast<float>(\n          AFTLoss<Distribution>::Gradient(label_lower_bound, label_upper_bound,\n                                          pred, aft_loss_distribution_scale));\n      const float hess = static_cast<float>(\n          AFTLoss<Distribution>::Hessian(label_lower_bound, label_upper_bound,\n                                         pred, aft_loss_distribution_scale));\n      const bst_float w = is_null_weight ? 1.0f : _weights[_idx];\n      _out_gpair[_idx] = GradientPair(grad * w, hess * w);\n    },\n    common::Range{0, static_cast<int64_t>(ndata)}, this->ctx_->Threads(), device).Eval(\n        out_gpair->Data(), &preds, &info.labels_lower_bound_, &info.labels_upper_bound_,\n        &info.weights_);\n  }\n\n  void GetGradient(const HostDeviceVector<bst_float>& preds, const MetaInfo& info, int /*iter*/,\n                   linalg::Matrix<GradientPair>* out_gpair) override {\n    const size_t ndata = preds.Size();\n    CHECK_EQ(info.labels_lower_bound_.Size(), ndata);\n    CHECK_EQ(info.labels_upper_bound_.Size(), ndata);\n    out_gpair->SetDevice(ctx_->Device());\n    out_gpair->Reshape(ndata, 1);\n    const auto device = ctx_->Device();\n    const float aft_loss_distribution_scale = param_.aft_loss_distribution_scale;\n    const bool is_null_weight = info.weights_.Size() == 0;\n    if (!is_null_weight) {\n      CHECK_EQ(info.weights_.Size(), ndata)\n        << \"Number of weights should be equal to number of data points.\";\n    }\n\n    switch (param_.aft_loss_distribution) {\n    case common::ProbabilityDistributionType::kNormal:\n      GetGradientImpl<common::NormalDistribution>(preds, info, out_gpair, ndata, device,\n                                                  is_null_weight, aft_loss_distribution_scale);\n      break;\n    case common::ProbabilityDistributionType::kLogistic:\n      GetGradientImpl<common::LogisticDistribution>(preds, info, out_gpair, ndata, device,\n                                                    is_null_weight, aft_loss_distribution_scale);\n      break;\n    case common::ProbabilityDistributionType::kExtreme:\n      GetGradientImpl<common::ExtremeDistribution>(preds, info, out_gpair, ndata, device,\n                                                   is_null_weight, aft_loss_distribution_scale);\n      break;\n    default:\n      LOG(FATAL) << \"Unrecognized distribution\";\n    }\n  }\n\n  void PredTransform(HostDeviceVector<bst_float> *io_preds) const override {\n    // Trees give us a prediction in log scale, so exponentiate\n    common::Transform<>::Init(\n        [] XGBOOST_DEVICE(size_t _idx, common::Span<bst_float> _preds) {\n          _preds[_idx] = exp(_preds[_idx]);\n        },\n        common::Range{0, static_cast<int64_t>(io_preds->Size())}, this->ctx_->Threads(),\n        io_preds->Device())\n        .Eval(io_preds);\n  }\n\n  void EvalTransform(HostDeviceVector<bst_float>* /*io_preds*/) override {\n    // do nothing here, since the AFT metric expects untransformed prediction score\n  }\n\n  void ProbToMargin(linalg::Vector<float>* base_score) const override {\n    auto intercept = base_score->View(this->ctx_->Device());\n    linalg::ElementWiseKernel(ctx_, intercept, [=] XGBOOST_DEVICE(std::size_t i) mutable {\n      intercept(i) = std::log(intercept(i));\n    });\n  }\n\n  const char* DefaultEvalMetric() const override {\n    return \"aft-nloglik\";\n  }\n\n  void SaveConfig(Json* p_out) const override {\n    auto& out = *p_out;\n    out[\"name\"] = String(\"survival:aft\");\n    out[\"aft_loss_param\"] = ToJson(param_);\n  }\n\n  void LoadConfig(Json const& in) override {\n    FromJson(in[\"aft_loss_param\"], &param_);\n  }\n  Json DefaultMetricConfig() const override {\n    Json config{Object{}};\n    config[\"name\"] = String{this->DefaultEvalMetric()};\n    config[\"aft_loss_param\"] = ToJson(param_);\n    return config;\n  }\n\n private:\n  AFTParam param_;\n};\n\n// register the objective functions\nXGBOOST_REGISTER_OBJECTIVE(AFTObj, \"survival:aft\")\n    .describe(\"AFT loss function\")\n    .set_body([]() { return new AFTObj(); });\n\n}  // namespace obj\n}  // namespace xgboost\n"
  },
  {
    "path": "src/objective/hinge.cc",
    "content": "/*!\n * Copyright 2018 XGBoost contributors\n */\n\n// Dummy file to keep the CUDA conditional compile trick.\n\n#include <dmlc/registry.h>\nnamespace xgboost {\nnamespace obj {\n\nDMLC_REGISTRY_FILE_TAG(hinge_obj);\n\n}  // namespace obj\n}  // namespace xgboost\n\n#ifndef XGBOOST_USE_CUDA\n#include \"hinge.cu\"\n#endif  // XGBOOST_USE_CUDA\n"
  },
  {
    "path": "src/objective/hinge.cu",
    "content": "/**\n * Copyright 2018-2025, XGBoost Contributors\n * \\file hinge.cc\n * \\brief Provides an implementation of the hinge loss function\n * \\author Henry Gouk\n */\n#include <algorithm>  // for max\n#include <cstddef>    // for size_t\n#include <cstdint>    // for int32_t\n\n#include \"../common/common.h\"            // for Range\n#include \"../common/linalg_op.h\"         // for ElementWiseKernel\n#include \"../common/optional_weight.h\"   // for OptionalWeights\n#include \"../common/transform.h\"         // for Transform\n#include \"init_estimation.h\"             // for FitIntercept\n#include \"xgboost/data.h\"                // for MetaInfo\n#include \"xgboost/host_device_vector.h\"  // HostDeviceVector\n#include \"xgboost/json.h\"                // for Json\n#include \"xgboost/linalg.h\"              // for UnravelIndex\n#include \"xgboost/span.h\"                // for Span\n\nnamespace xgboost::obj {\n#if defined(XGBOOST_USE_CUDA)\nDMLC_REGISTRY_FILE_TAG(hinge_obj_gpu);\n#endif  // defined(XGBOOST_USE_CUDA)\n\nclass HingeObj : public FitIntercept {\n public:\n  HingeObj() = default;\n\n  void Configure(Args const &) override {}\n  ObjInfo Task() const override { return ObjInfo::kRegression; }\n\n  [[nodiscard]] bst_target_t Targets(MetaInfo const &info) const override {\n    // Multi-target regression.\n    return std::max(static_cast<std::size_t>(1), info.labels.Shape(1));\n  }\n\n  void GetGradient(HostDeviceVector<float> const &preds, MetaInfo const &info,\n                   std::int32_t /*iter*/, linalg::Matrix<GradientPair> *out_gpair) override {\n    CheckInitInputs(info);\n    CHECK_EQ(info.labels.Size(), preds.Size()) << \"Invalid shape of labels.\";\n    if (!info.weights_.Empty()) {\n      CHECK_EQ(info.weights_.Size(), info.num_row_)\n          << \"Number of weights should be equal to number of data points.\";\n    }\n\n    bst_target_t n_targets = this->Targets(info);\n    out_gpair->Reshape(info.num_row_, n_targets);\n    auto gpair = out_gpair->View(ctx_->Device());\n\n    preds.SetDevice(ctx_->Device());\n    auto predt = linalg::MakeTensorView(ctx_, &preds, info.num_row_, n_targets);\n\n    auto labels = info.labels.View(ctx_->Device());\n\n    info.weights_.SetDevice(ctx_->Device());\n    common::OptionalWeights weight{ctx_->IsCPU() ? info.weights_.ConstHostSpan()\n                                                 : info.weights_.ConstDeviceSpan()};\n\n    linalg::ElementWiseKernel(this->ctx_, labels,\n                              [=] XGBOOST_DEVICE(std::size_t i, std::size_t j) mutable {\n                                auto w = weight[i];\n\n                                auto p = predt(i, j);\n                                auto y = labels(i, j) * 2.0 - 1.0;\n\n                                float g, h;\n                                if (p * y < 1.0) {\n                                  g = -y * w;\n                                  h = w;\n                                } else {\n                                  g = 0.0;\n                                  h = std::numeric_limits<float>::min();\n                                }\n                                gpair(i, j) = GradientPair{g, h};\n                              });\n  }\n\n  void PredTransform(HostDeviceVector<float> *io_preds) const override {\n    common::Transform<>::Init(\n        [] XGBOOST_DEVICE(std::size_t _idx, common::Span<float> _preds) {\n          _preds[_idx] = _preds[_idx] > 0.0 ? 1.0 : 0.0;\n        },\n        common::Range{0, static_cast<int64_t>(io_preds->Size()), 1}, this->ctx_->Threads(),\n        io_preds->Device())\n        .Eval(io_preds);\n  }\n\n  [[nodiscard]] const char *DefaultEvalMetric() const override { return \"error\"; }\n\n  void SaveConfig(Json *p_out) const override {\n    auto &out = *p_out;\n    out[\"name\"] = String(\"binary:hinge\");\n  }\n  void LoadConfig(Json const &) override {}\n};\n\n// register the objective functions\nXGBOOST_REGISTER_OBJECTIVE(HingeObj, \"binary:hinge\")\n    .describe(\"Hinge loss. Expects labels to be in [0,1f]\")\n    .set_body([]() { return new HingeObj(); });\n\n}  // namespace xgboost::obj\n"
  },
  {
    "path": "src/objective/init_estimation.cc",
    "content": "/**\n * Copyright 2022-2023 by XGBoost contributors\n */\n#include \"init_estimation.h\"\n\n#include <memory>                        // unique_ptr\n\n#include \"../common/stats.h\"             // Mean\n#include \"../tree/fit_stump.h\"           // FitStump\n#include \"xgboost/base.h\"                // GradientPair\n#include \"xgboost/data.h\"                // MetaInfo\n#include \"xgboost/host_device_vector.h\"  // HostDeviceVector\n#include \"xgboost/json.h\"                // Json\n#include \"xgboost/linalg.h\"              // Tensor,Vector\n#include \"xgboost/task.h\"                // ObjInfo\n\nnamespace xgboost::obj {\nvoid FitIntercept::InitEstimation(MetaInfo const& info, linalg::Vector<float>* base_score) const {\n  if (this->Task().task == ObjInfo::kRegression) {\n    CheckInitInputs(info);\n  }\n  // Avoid altering any state in child objective.\n  HostDeviceVector<float> dummy_predt(info.labels.Size(), 0.0f, this->ctx_->Device());\n  linalg::Matrix<GradientPair> gpair(info.labels.Shape(), this->ctx_->Device());\n\n  Json config{Object{}};\n  this->SaveConfig(&config);\n\n  std::unique_ptr<ObjFunction> new_obj{\n      ObjFunction::Create(get<String const>(config[\"name\"]), this->ctx_)};\n  new_obj->LoadConfig(config);\n  new_obj->GetGradient(dummy_predt, info, 0, &gpair);\n\n  bst_target_t n_targets = this->Targets(info);\n  tree::FitStump(this->ctx_, info, gpair, n_targets, base_score);\n  this->PredTransform(base_score->Data());\n}\n\nvoid FitInterceptGlmLike::InitEstimation(MetaInfo const& info,\n                                         linalg::Vector<float>* base_score) const {\n  if (this->Task().task == ObjInfo::kRegression) {\n    CheckInitInputs(info);\n  }\n  if (info.weights_.Empty()) {\n    common::SampleMean(this->ctx_, info.IsColumnSplit(), info.labels, base_score);\n  } else {\n    common::WeightedSampleMean(this->ctx_, info.IsColumnSplit(), info.labels, info.weights_,\n                               base_score);\n  }\n  CHECK_GE(base_score->Size(), 1);\n}\n}  // namespace xgboost::obj\n"
  },
  {
    "path": "src/objective/init_estimation.h",
    "content": "/**\n * Copyright 2022-2023 by XGBoost contributors\n */\n#ifndef XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_\n#define XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_\n#include \"xgboost/data.h\"       // MetaInfo\n#include \"xgboost/linalg.h\"     // Tensor\n#include \"xgboost/objective.h\"  // ObjFunction\n\nnamespace xgboost::obj {\nclass FitIntercept : public ObjFunction {\n public:\n  void InitEstimation(MetaInfo const& info, linalg::Vector<float>* base_score) const override;\n};\n\nclass FitInterceptGlmLike : public FitIntercept {\n public:\n  void InitEstimation(MetaInfo const& info, linalg::Vector<float>* base_score) const override;\n};\n\ninline void CheckInitInputs(MetaInfo const& info) {\n  CHECK_EQ(info.labels.Shape(0), info.num_row_) << \"Invalid shape of labels.\";\n  if (!info.weights_.Empty()) {\n    CHECK_EQ(info.weights_.Size(), info.num_row_)\n        << \"Number of weights should be equal to number of data points.\";\n  }\n}\n}  // namespace xgboost::obj\n#endif  // XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_\n"
  },
  {
    "path": "src/objective/lambdarank_obj.cc",
    "content": "/**\n * Copyright 2023-2025, XGBoost contributors\n */\n#include \"lambdarank_obj.h\"\n\n#include <dmlc/registry.h>  // for DMLC_REGISTRY_FILE_TAG\n\n#include <algorithm>    // for transform, copy, fill_n, min, max\n#include <cmath>        // for pow, log2\n#include <cstddef>      // for size_t\n#include <cstdint>      // for int32_t\n#include <map>          // for operator!=\n#include <memory>       // for shared_ptr, __shared_ptr_access, allocator\n#include <ostream>      // for operator<<, basic_ostream\n#include <string>       // for char_traits, operator<, basic_string, string\n#include <tuple>        // for apply, make_tuple\n#include <type_traits>  // for is_floating_point\n#include <utility>      // for pair, swap\n\n#include \"../common/error_msg.h\"         // for GroupWeight, LabelScoreSize\n#include \"../common/linalg_op.h\"         // for begin, cbegin, cend, SaveVector\n#include \"../common/optional_weight.h\"   // for MakeOptionalWeights, OptionalWeights\n#include \"../common/ranking_utils.h\"     // for RankingCache, LambdaRankParam, MAPCache, NDCGC...\n#include \"../common/threading_utils.h\"   // for ParallelFor, Sched\n#include \"init_estimation.h\"             // for FitIntercept\n#include \"xgboost/base.h\"                // for bst_group_t, GradientPair, kRtEps, GradientPai...\n#include \"xgboost/context.h\"             // for Context\n#include \"xgboost/data.h\"                // for MetaInfo\n#include \"xgboost/host_device_vector.h\"  // for HostDeviceVector\n#include \"xgboost/json.h\"                // for Json, get, Value, ToJson, F32Array, FromJson, IsA\n#include \"xgboost/linalg.h\"              // for Vector, Range, TensorView, VectorView, All\n#include \"xgboost/logging.h\"             // for LogCheck_EQ, CHECK_EQ, CHECK, LogCheck_LE, CHE...\n#include \"xgboost/objective.h\"           // for ObjFunctionReg, XGBOOST_REGISTER_OBJECTIVE\n#include \"xgboost/span.h\"                // for Span, operator!=\n#include \"xgboost/string_view.h\"         // for operator<<, StringView\n#include \"xgboost/task.h\"                // for ObjInfo\n\nnamespace xgboost::obj {\nnamespace cpu_impl {\nvoid LambdaRankUpdatePositionBias(Context const* ctx, linalg::VectorView<double const> li_full,\n                                  linalg::VectorView<double const> lj_full,\n                                  linalg::Vector<double>* p_ti_plus,\n                                  linalg::Vector<double>* p_tj_minus, linalg::Vector<double>* p_li,\n                                  linalg::Vector<double>* p_lj,\n                                  std::shared_ptr<ltr::RankingCache> p_cache) {\n  auto ti_plus = p_ti_plus->HostView();\n  auto tj_minus = p_tj_minus->HostView();\n  auto li = p_li->HostView();\n  auto lj = p_lj->HostView();\n\n  auto gptr = p_cache->DataGroupPtr(ctx);\n  auto n_groups = p_cache->Groups();\n  auto regularizer = p_cache->Param().Regularizer();\n\n  // Aggregate over query groups\n  for (bst_group_t g{0}; g < n_groups; ++g) {\n    auto begin = gptr[g];\n    auto end = gptr[g + 1];\n    std::size_t group_size = end - begin;\n    auto n = std::min(group_size, p_cache->MaxPositionSize());\n\n    auto g_li = li_full.Slice(linalg::Range(begin, end));\n    auto g_lj = lj_full.Slice(linalg::Range(begin, end));\n\n    for (std::size_t i{0}; i < n; ++i) {\n      li(i) += g_li(i);\n      lj(i) += g_lj(i);\n    }\n  }\n\n  // The ti+ is not guaranteed to decrease since it depends on the |\\delta Z|\n  //\n  // The update normalizes the ti+ to make ti+(0) equal to 1, which breaks the probability\n  // meaning. The reasoning behind the normalization is not clear, here we are just\n  // following the authors.\n  for (std::size_t i = 0; i < ti_plus.Size(); ++i) {\n    if (li(0) >= Eps64()) {\n      ti_plus(i) = std::pow(li(i) / li(0), regularizer);  // eq.30\n    }\n    if (lj(0) >= Eps64()) {\n      tj_minus(i) = std::pow(lj(i) / lj(0), regularizer);  // eq.31\n    }\n    assert(!std::isinf(ti_plus(i)));\n    assert(!std::isinf(tj_minus(i)));\n  }\n}\n}  // namespace cpu_impl\n\n/**\n * \\brief Base class for pair-wise learning to rank.\n *\n *   See `From RankNet to LambdaRank to LambdaMART: An Overview` for a description of the\n *   algorithm.\n *\n *   In addition to ranking, this also implements `Unbiased LambdaMART: An Unbiased\n *   Pairwise Learning-to-Rank Algorithm`.\n */\ntemplate <typename Loss, typename Cache>\nclass LambdaRankObj : public FitIntercept {\n  MetaInfo const* p_info_{nullptr};\n\n  // Update position biased for unbiased click data\n  void UpdatePositionBias() {\n    li_full_.SetDevice(ctx_->Device());\n    lj_full_.SetDevice(ctx_->Device());\n    li_.SetDevice(ctx_->Device());\n    lj_.SetDevice(ctx_->Device());\n\n    if (ctx_->IsCUDA()) {\n      cuda_impl::LambdaRankUpdatePositionBias(ctx_, li_full_.View(ctx_->Device()),\n                                              lj_full_.View(ctx_->Device()), &ti_plus_, &tj_minus_,\n                                              &li_, &lj_, p_cache_);\n    } else {\n      // This function doesn't have sycl-specific implementation yet.\n      // For that reason we transfer data to host in case of sycl is used for propper execution.\n      auto device = ctx_->Device().IsSycl() ? DeviceOrd::CPU() : ctx_->Device();\n      cpu_impl::LambdaRankUpdatePositionBias(ctx_, li_full_.View(device), lj_full_.View(device),\n                                             &ti_plus_, &tj_minus_, &li_, &lj_, p_cache_);\n    }\n\n    li_full_.Data()->Fill(0.0);\n    lj_full_.Data()->Fill(0.0);\n\n    li_.Data()->Fill(0.0);\n    lj_.Data()->Fill(0.0);\n  }\n\n protected:\n  // L / tj-* (eq. 30)\n  linalg::Vector<double> li_;\n  // L / ti+* (eq. 31)\n  linalg::Vector<double> lj_;\n  // position bias ratio for relevant doc, ti+ (eq. 30)\n  linalg::Vector<double> ti_plus_;\n  // position bias ratio for irrelevant doc, tj- (eq. 31)\n  linalg::Vector<double> tj_minus_;\n  // li buffer for all samples\n  linalg::Vector<double> li_full_;\n  // lj buffer for all samples\n  linalg::Vector<double> lj_full_;\n\n  ltr::LambdaRankParam param_;\n  // cache\n  std::shared_ptr<ltr::RankingCache> p_cache_;\n\n  [[nodiscard]] std::shared_ptr<Cache> GetCache() const {\n    auto ptr = std::static_pointer_cast<Cache>(p_cache_);\n    CHECK(ptr);\n    return ptr;\n  }\n\n  // get group view for li/lj\n  linalg::VectorView<double> GroupLoss(bst_group_t g, linalg::Vector<double>* v) const {\n    auto gptr = p_cache_->DataGroupPtr(ctx_);\n    auto begin = gptr[g];\n    auto end = gptr[g + 1];\n    if (param_.lambdarank_unbiased) {\n      return v->HostView().Slice(linalg::Range(begin, end));\n    }\n    return v->HostView();\n  }\n\n  // Calculate lambda gradient for each group on CPU.\n  template <bool unbiased, bool norm_by_diff, typename Delta>\n  void CalcLambdaForGroup(std::int32_t iter, common::Span<float const> g_predt,\n                          linalg::VectorView<float const> g_label, float w,\n                          common::Span<std::size_t const> g_rank, bst_group_t g, Delta delta,\n                          linalg::VectorView<GradientPair> g_gpair) {\n    std::fill_n(g_gpair.Values().data(), g_gpair.Size(), GradientPair{});\n\n    auto ti_plus = ti_plus_.HostView();\n    auto tj_minus = tj_minus_.HostView();\n\n    auto li = GroupLoss(g, &li_full_);\n    auto lj = GroupLoss(g, &lj_full_);\n\n    // Normalization, first used by LightGBM.\n    // https://github.com/lightgbm-org/LightGBM/pull/2331#issuecomment-523259298\n    double sum_lambda{0.0};\n\n    auto delta_op = [&](auto const&... args) {\n      return delta(args..., g);\n    };\n\n    auto loop = [&](std::size_t i, std::size_t j) {\n      // higher/lower on the target ranked list\n      std::size_t rank_high = i, rank_low = j;\n      if (g_label(g_rank[rank_high]) == g_label(g_rank[rank_low])) {\n        return;\n      }\n      if (g_label(g_rank[rank_high]) < g_label(g_rank[rank_low])) {\n        std::swap(rank_high, rank_low);\n      }\n\n      double cost;\n      auto pg = LambdaGrad<unbiased, norm_by_diff>(g_label, g_predt, g_rank, rank_high, rank_low,\n                                                   delta_op, ti_plus, tj_minus, &cost);\n      auto ng = Repulse(pg);\n\n      std::size_t idx_high = g_rank[rank_high];\n      std::size_t idx_low = g_rank[rank_low];\n      g_gpair(idx_high) += pg;\n      g_gpair(idx_low) += ng;\n\n      if (unbiased) {\n        auto k = ti_plus.Size();\n        // We can probably use all the positions. If we skip the update due to having\n        // high/low > k, we might be losing out too many pairs. On the other hand, if we\n        // cap the position, then we might be accumulating too many tail bias into the\n        // last tracked position.\n        // We use `idx_high` since it represents the original position from the label\n        // list, and label list is assumed to be sorted.\n        if (idx_high < k && idx_low < k) {\n          if (tj_minus(idx_low) >= Eps64()) {\n            li(idx_high) += cost / tj_minus(idx_low);  // eq.30\n          }\n          if (ti_plus(idx_high) >= Eps64()) {\n            lj(idx_low) += cost / ti_plus(idx_high);  // eq.31\n          }\n        }\n      }\n\n      sum_lambda += -2.0 * static_cast<double>(pg.GetGrad());\n    };\n\n    MakePairs(ctx_, iter, p_cache_, g, g_label, g_rank, loop);\n    if (param_.lambdarank_normalization) {\n      double norm = 1.0;\n      if (param_.IsMean()) {\n        // Normalize using the number of pairs for mean.\n        auto n_pairs = this->p_cache_->Param().NumPair();\n        auto scale = 1.0 / static_cast<double>(n_pairs);\n        norm = scale;\n      } else {\n        // Normalize using gradient for top-k.\n        if (sum_lambda > 0.0) {\n          norm = std::log2(1.0 + sum_lambda) / sum_lambda;\n        }\n      }\n      if (norm != 1.0) {\n        std::transform(linalg::begin(g_gpair), linalg::end(g_gpair), linalg::begin(g_gpair),\n                       [norm](GradientPair const& g) { return g * norm; });\n      }\n    }\n\n    auto w_norm = p_cache_->WeightNorm();\n    std::transform(g_gpair.Values().data(), g_gpair.Values().data() + g_gpair.Size(),\n                   g_gpair.Values().data(),\n                   [&](GradientPair const& gpair) { return gpair * w * w_norm; });\n  }\n\n public:\n  void Configure(Args const& args) override { param_.UpdateAllowUnknown(args); }\n  void SaveConfig(Json* p_out) const override {\n    auto& out = *p_out;\n    out[\"name\"] = String(Loss::Name());\n    out[\"lambdarank_param\"] = ToJson(param_);\n\n    if (param_.lambdarank_unbiased) {\n      out[\"ti+\"] = F32Array();\n      linalg::SaveVector(ti_plus_, &out[\"ti+\"]);\n      out[\"tj-\"] = F32Array();\n      linalg::SaveVector(tj_minus_, &out[\"tj-\"]);\n    }\n  }\n  void LoadConfig(Json const& in) override {\n    auto const& obj = get<Object const>(in);\n    if (obj.find(\"lambdarank_param\") != obj.cend()) {\n      FromJson(in[\"lambdarank_param\"], &param_);\n    }\n\n    if (param_.lambdarank_unbiased) {\n      linalg::LoadVector(in[\"ti+\"], &ti_plus_);\n      linalg::LoadVector(in[\"tj-\"], &tj_minus_);\n    }\n  }\n\n  [[nodiscard]] ObjInfo Task() const override { return ObjInfo{ObjInfo::kRanking}; }\n\n  [[nodiscard]] bst_target_t Targets(MetaInfo const& info) const override {\n    CHECK_LE(info.labels.Shape(1), 1) << \"multi-output for LTR is not yet supported.\";\n    return 1;\n  }\n\n  [[nodiscard]] const char* RankEvalMetric(StringView metric) const {\n    static thread_local std::string name;\n    if (param_.HasTruncation()) {\n      name = ltr::MakeMetricName(metric, param_.NumPair(), false);\n    } else {\n      name = ltr::MakeMetricName(metric, param_.NotSet(), false);\n    }\n    return name.c_str();\n  }\n\n  void GetGradient(HostDeviceVector<float> const& predt, MetaInfo const& info, std::int32_t iter,\n                   linalg::Matrix<GradientPair>* out_gpair) override {\n    CHECK_EQ(info.labels.Size(), predt.Size()) << error::LabelScoreSize();\n\n    // init/renew cache\n    if (!p_cache_ || p_info_ != &info || p_cache_->Param() != param_) {\n      p_cache_ = std::make_shared<Cache>(ctx_, info, param_);\n      p_info_ = &info;\n    }\n    auto n_groups = p_cache_->Groups();\n    if (!info.weights_.Empty()) {\n      CHECK_EQ(info.weights_.Size(), n_groups) << error::GroupWeight();\n    }\n\n    if ((ti_plus_.Empty() || li_full_.Empty()) && param_.lambdarank_unbiased) {\n      CHECK_EQ(iter, 0);\n      ti_plus_ = linalg::Constant<double>(ctx_, 1.0, p_cache_->MaxPositionSize());\n      tj_minus_ = linalg::Constant<double>(ctx_, 1.0, p_cache_->MaxPositionSize());\n\n      li_ = linalg::Zeros<double>(ctx_, p_cache_->MaxPositionSize());\n      lj_ = linalg::Zeros<double>(ctx_, p_cache_->MaxPositionSize());\n\n      li_full_ = linalg::Zeros<double>(ctx_, info.num_row_);\n      lj_full_ = linalg::Zeros<double>(ctx_, info.num_row_);\n    }\n    static_cast<Loss*>(this)->GetGradientImpl(iter, predt, info, out_gpair);\n\n    if (param_.lambdarank_unbiased) {\n      this->UpdatePositionBias();\n    }\n  }\n};\n\nclass LambdaRankNDCG : public LambdaRankObj<LambdaRankNDCG, ltr::NDCGCache> {\n public:\n  template <bool unbiased, bool exp_gain>\n  void CalcLambdaForGroupNDCG(std::int32_t iter, common::Span<float const> g_predt,\n                              linalg::VectorView<float const> g_label, float w,\n                              common::Span<std::size_t const> g_rank,\n                              linalg::VectorView<GradientPair> g_gpair,\n                              linalg::VectorView<double const> inv_IDCG,\n                              common::Span<double const> discount, bst_group_t g) {\n    auto delta = [&](auto y_high, auto y_low, std::size_t rank_high, std::size_t rank_low,\n                     bst_group_t g) {\n      static_assert(std::is_floating_point_v<decltype(y_high)>);\n      return DeltaNDCG<exp_gain>(y_high, y_low, rank_high, rank_low, inv_IDCG(g), discount);\n    };\n\n    if (this->param_.lambdarank_score_normalization) {\n      this->CalcLambdaForGroup<unbiased, true>(iter, g_predt, g_label, w, g_rank, g, delta,\n                                               g_gpair);\n    } else {\n      this->CalcLambdaForGroup<unbiased, false>(iter, g_predt, g_label, w, g_rank, g, delta,\n                                                g_gpair);\n    }\n  }\n\n  void GetGradientImpl(std::int32_t iter, const HostDeviceVector<float>& predt,\n                       const MetaInfo& info, linalg::Matrix<GradientPair>* out_gpair) {\n    if (ctx_->IsCUDA()) {\n      cuda_impl::LambdaRankGetGradientNDCG(\n          ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->Device()),\n          tj_minus_.View(ctx_->Device()), li_full_.View(ctx_->Device()),\n          lj_full_.View(ctx_->Device()), out_gpair);\n      return;\n    }\n\n    auto device = ctx_->Device().IsSycl() ? DeviceOrd::CPU() : ctx_->Device();\n    bst_group_t n_groups = p_cache_->Groups();\n    auto gptr = p_cache_->DataGroupPtr(ctx_);\n\n    out_gpair->SetDevice(device);\n    out_gpair->Reshape(info.num_row_, 1);\n\n    auto h_gpair = out_gpair->HostView();\n    auto h_predt = predt.ConstHostSpan();\n    auto h_label = info.labels.HostView();\n    auto h_weight = common::MakeOptionalWeights(device, info.weights_);\n    auto make_range = [&](bst_group_t g) {\n      return linalg::Range(gptr[g], gptr[g + 1]);\n    };\n\n    auto dct = GetCache()->Discount(ctx_);\n    auto rank_idx = p_cache_->SortedIdx(ctx_, h_predt);\n    auto inv_IDCG = GetCache()->InvIDCG(ctx_);\n\n    common::ParallelFor(n_groups, ctx_->Threads(), common::Sched::Guided(), [&](auto g) {\n      std::size_t cnt = gptr[g + 1] - gptr[g];\n      auto w = h_weight[g];\n      auto g_predt = h_predt.subspan(gptr[g], cnt);\n      auto g_gpair =\n          h_gpair.Slice(linalg::Range(static_cast<std::size_t>(gptr[g]), gptr[g] + cnt), 0);\n      auto g_label = h_label.Slice(make_range(g), 0);\n      auto g_rank = rank_idx.subspan(gptr[g], cnt);\n\n      auto args =\n          std::make_tuple(this, iter, g_predt, g_label, w, g_rank, g_gpair, inv_IDCG, dct, g);\n\n      if (param_.lambdarank_unbiased) {\n        if (param_.ndcg_exp_gain) {\n          std::apply(&LambdaRankNDCG::CalcLambdaForGroupNDCG<true, true>, args);\n        } else {\n          std::apply(&LambdaRankNDCG::CalcLambdaForGroupNDCG<true, false>, args);\n        }\n      } else {\n        if (param_.ndcg_exp_gain) {\n          std::apply(&LambdaRankNDCG::CalcLambdaForGroupNDCG<false, true>, args);\n        } else {\n          std::apply(&LambdaRankNDCG::CalcLambdaForGroupNDCG<false, false>, args);\n        }\n      }\n    });\n  }\n\n  static char const* Name() { return \"rank:ndcg\"; }\n  [[nodiscard]] const char* DefaultEvalMetric() const override {\n    return this->RankEvalMetric(\"ndcg\");\n  }\n  [[nodiscard]] Json DefaultMetricConfig() const override {\n    Json config{Object{}};\n    config[\"name\"] = String{DefaultEvalMetric()};\n    config[\"lambdarank_param\"] = ToJson(param_);\n    return config;\n  }\n};\n\nnamespace cuda_impl {\n#if !defined(XGBOOST_USE_CUDA)\nvoid LambdaRankGetGradientNDCG(Context const*, std::int32_t, HostDeviceVector<float> const&,\n                               const MetaInfo&, std::shared_ptr<ltr::NDCGCache>,\n                               linalg::VectorView<double const>,  // input bias ratio\n                               linalg::VectorView<double const>,  // input bias ratio\n                               linalg::VectorView<double>, linalg::VectorView<double>,\n                               linalg::Matrix<GradientPair>*) {\n  common::AssertGPUSupport();\n}\n\nvoid LambdaRankUpdatePositionBias(Context const*, linalg::VectorView<double const>,\n                                  linalg::VectorView<double const>, linalg::Vector<double>*,\n                                  linalg::Vector<double>*, linalg::Vector<double>*,\n                                  linalg::Vector<double>*, std::shared_ptr<ltr::RankingCache>) {\n  common::AssertGPUSupport();\n}\n#endif  // !defined(XGBOOST_USE_CUDA)\n}  // namespace cuda_impl\n\nnamespace cpu_impl {\nvoid MAPStat(Context const* ctx, linalg::VectorView<float const> label,\n             common::Span<std::size_t const> rank_idx, std::shared_ptr<ltr::MAPCache> p_cache) {\n  auto h_n_rel = p_cache->NumRelevant(ctx);\n  auto gptr = p_cache->DataGroupPtr(ctx);\n\n  CHECK_EQ(h_n_rel.size(), gptr.back());\n  CHECK_EQ(h_n_rel.size(), label.Size());\n\n  auto h_acc = p_cache->Acc(ctx);\n\n  common::ParallelFor(p_cache->Groups(), ctx->Threads(), [&](auto g) {\n    auto cnt = gptr[g + 1] - gptr[g];\n    auto g_n_rel = h_n_rel.subspan(gptr[g], cnt);\n    auto g_rank = rank_idx.subspan(gptr[g], cnt);\n    auto g_label = label.Slice(linalg::Range(gptr[g], gptr[g + 1]));\n\n    // The number of relevant documents at each position\n    g_n_rel[0] = g_label(g_rank[0]);\n    for (std::size_t k = 1; k < g_rank.size(); ++k) {\n      g_n_rel[k] = g_n_rel[k - 1] + g_label(g_rank[k]);\n    }\n\n    // \\sum l_k/k\n    auto g_acc = h_acc.subspan(gptr[g], cnt);\n    g_acc[0] = g_label(g_rank[0]) / 1.0;\n\n    for (std::size_t k = 1; k < g_rank.size(); ++k) {\n      g_acc[k] = g_acc[k - 1] + (g_label(g_rank[k]) / static_cast<double>(k + 1));\n    }\n  });\n}\n}  // namespace cpu_impl\n\nclass LambdaRankMAP : public LambdaRankObj<LambdaRankMAP, ltr::MAPCache> {\n public:\n  void GetGradientImpl(std::int32_t iter, const HostDeviceVector<float>& predt,\n                       const MetaInfo& info, linalg::Matrix<GradientPair>* out_gpair) {\n    if (ctx_->IsCUDA()) {\n      return cuda_impl::LambdaRankGetGradientMAP(\n          ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->Device()),\n          tj_minus_.View(ctx_->Device()), li_full_.View(ctx_->Device()),\n          lj_full_.View(ctx_->Device()), out_gpair);\n    }\n\n    auto gptr = p_cache_->DataGroupPtr(ctx_).data();\n    bst_group_t n_groups = p_cache_->Groups();\n\n    CHECK_EQ(info.labels.Shape(1), 1) << \"multi-target for learning to rank is not yet supported.\";\n    auto device = ctx_->Device().IsSycl() ? DeviceOrd::CPU() : ctx_->Device();\n    out_gpair->SetDevice(device);\n    out_gpair->Reshape(info.num_row_, this->Targets(info));\n\n    auto h_gpair = out_gpair->HostView();\n    auto h_label = info.labels.HostView().Slice(linalg::All(), 0);\n    auto h_predt = predt.ConstHostSpan();\n    auto rank_idx = p_cache_->SortedIdx(ctx_, h_predt);\n    auto h_weight = common::MakeOptionalWeights(device, info.weights_);\n\n    auto make_range = [&](bst_group_t g) {\n      return linalg::Range(gptr[g], gptr[g + 1]);\n    };\n\n    cpu_impl::MAPStat(ctx_, h_label, rank_idx, GetCache());\n    auto n_rel = GetCache()->NumRelevant(ctx_);\n    auto acc = GetCache()->Acc(ctx_);\n\n    auto delta_map = [&](auto y_high, auto y_low, std::size_t rank_high, std::size_t rank_low,\n                         bst_group_t g) {\n      if (rank_high > rank_low) {\n        std::swap(rank_high, rank_low);\n        std::swap(y_high, y_low);\n      }\n      auto cnt = gptr[g + 1] - gptr[g];\n      // In a hot loop\n      auto g_n_rel = common::Span<double const>{n_rel.data() + gptr[g], cnt};\n      auto g_acc = common::Span<double const>{acc.data() + gptr[g], cnt};\n      auto d = DeltaMAP(y_high, y_low, rank_high, rank_low, g_n_rel, g_acc);\n      return d;\n    };\n    using D = decltype(delta_map);\n\n    common::ParallelFor(n_groups, ctx_->Threads(), [&](auto g) {\n      auto cnt = gptr[g + 1] - gptr[g];\n      auto w = h_weight[g];\n      auto g_predt = h_predt.subspan(gptr[g], cnt);\n      auto g_gpair = h_gpair.Slice(linalg::Range(gptr[g], gptr[g] + cnt), 0);\n      auto g_label = h_label.Slice(make_range(g));\n      auto g_rank = rank_idx.subspan(gptr[g], cnt);\n\n      auto args = std::make_tuple(this, iter, g_predt, g_label, w, g_rank, g, delta_map, g_gpair);\n\n      if (param_.lambdarank_unbiased) {\n        if (this->param_.lambdarank_score_normalization) {\n          std::apply(&LambdaRankMAP::CalcLambdaForGroup<true, true, D>, args);\n        } else {\n          std::apply(&LambdaRankMAP::CalcLambdaForGroup<true, false, D>, args);\n        }\n      } else {\n        if (this->param_.lambdarank_score_normalization) {\n          std::apply(&LambdaRankMAP::CalcLambdaForGroup<false, true, D>, args);\n        } else {\n          std::apply(&LambdaRankMAP::CalcLambdaForGroup<false, false, D>, args);\n        }\n      }\n    });\n  }\n  static char const* Name() { return \"rank:map\"; }\n  [[nodiscard]] const char* DefaultEvalMetric() const override {\n    return this->RankEvalMetric(\"map\");\n  }\n};\n\n#if !defined(XGBOOST_USE_CUDA)\nnamespace cuda_impl {\nvoid MAPStat(Context const*, MetaInfo const&, common::Span<std::size_t const>,\n             std::shared_ptr<ltr::MAPCache>) {\n  common::AssertGPUSupport();\n}\n\nvoid LambdaRankGetGradientMAP(Context const*, std::int32_t, HostDeviceVector<float> const&,\n                              const MetaInfo&, std::shared_ptr<ltr::MAPCache>,\n                              linalg::VectorView<double const>,  // input bias ratio\n                              linalg::VectorView<double const>,  // input bias ratio\n                              linalg::VectorView<double>, linalg::VectorView<double>,\n                              linalg::Matrix<GradientPair>*) {\n  common::AssertGPUSupport();\n}\n}  // namespace cuda_impl\n#endif  // !defined(XGBOOST_USE_CUDA)\n\n/**\n * \\brief The RankNet loss.\n */\nclass LambdaRankPairwise : public LambdaRankObj<LambdaRankPairwise, ltr::RankingCache> {\n public:\n  void GetGradientImpl(std::int32_t iter, const HostDeviceVector<float>& predt,\n                       const MetaInfo& info, linalg::Matrix<GradientPair>* out_gpair) {\n    if (ctx_->IsCUDA()) {\n      return cuda_impl::LambdaRankGetGradientPairwise(\n          ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->Device()),\n          tj_minus_.View(ctx_->Device()), li_full_.View(ctx_->Device()),\n          lj_full_.View(ctx_->Device()), out_gpair);\n    }\n\n    auto gptr = p_cache_->DataGroupPtr(ctx_);\n    bst_group_t n_groups = p_cache_->Groups();\n\n    out_gpair->SetDevice(ctx_->Device());\n    out_gpair->Reshape(info.num_row_, this->Targets(info));\n\n    auto h_gpair = out_gpair->HostView();\n    auto h_label = info.labels.HostView().Slice(linalg::All(), 0);\n    auto h_predt = predt.ConstHostSpan();\n    auto h_weight = common::MakeOptionalWeights(ctx_->Device(), info.weights_);\n\n    auto make_range = [&](bst_group_t g) {\n      return linalg::Range(gptr[g], gptr[g + 1]);\n    };\n    auto rank_idx = p_cache_->SortedIdx(ctx_, h_predt);\n\n    auto delta = [](auto...) {\n      return 1.0;\n    };\n    using D = decltype(delta);\n\n    common::ParallelFor(n_groups, ctx_->Threads(), [&](auto g) {\n      auto cnt = gptr[g + 1] - gptr[g];\n      auto w = h_weight[g];\n      auto g_predt = h_predt.subspan(gptr[g], cnt);\n      auto g_gpair = h_gpair.Slice(linalg::Range(gptr[g], gptr[g] + cnt), 0);\n      auto g_label = h_label.Slice(make_range(g));\n      auto g_rank = rank_idx.subspan(gptr[g], cnt);\n\n      auto args = std::make_tuple(this, iter, g_predt, g_label, w, g_rank, g, delta, g_gpair);\n      if (param_.lambdarank_unbiased) {\n        if (this->param_.lambdarank_score_normalization) {\n          std::apply(&LambdaRankPairwise::CalcLambdaForGroup<true, true, D>, args);\n        } else {\n          std::apply(&LambdaRankPairwise::CalcLambdaForGroup<true, false, D>, args);\n        }\n      } else {\n        if (this->param_.lambdarank_score_normalization) {\n          std::apply(&LambdaRankPairwise::CalcLambdaForGroup<false, true, D>, args);\n        } else {\n          std::apply(&LambdaRankPairwise::CalcLambdaForGroup<false, false, D>, args);\n        }\n      }\n    });\n  }\n\n  static char const* Name() { return \"rank:pairwise\"; }\n  [[nodiscard]] const char* DefaultEvalMetric() const override {\n    return this->RankEvalMetric(\"ndcg\");\n  }\n\n  [[nodiscard]] Json DefaultMetricConfig() const override {\n    Json config{Object{}};\n    config[\"name\"] = String{DefaultEvalMetric()};\n    config[\"lambdarank_param\"] = ToJson(param_);\n    return config;\n  }\n};\n\n#if !defined(XGBOOST_USE_CUDA)\nnamespace cuda_impl {\nvoid LambdaRankGetGradientPairwise(Context const*, std::int32_t, HostDeviceVector<float> const&,\n                                   const MetaInfo&, std::shared_ptr<ltr::RankingCache>,\n                                   linalg::VectorView<double const>,  // input bias ratio\n                                   linalg::VectorView<double const>,  // input bias ratio\n                                   linalg::VectorView<double>, linalg::VectorView<double>,\n                                   linalg::Matrix<GradientPair>*) {\n  common::AssertGPUSupport();\n}\n}  // namespace cuda_impl\n#endif  // !defined(XGBOOST_USE_CUDA)\n\nXGBOOST_REGISTER_OBJECTIVE(LambdaRankNDCG, LambdaRankNDCG::Name())\n    .describe(\"LambdaRank with NDCG loss as objective\")\n    .set_body([]() { return new LambdaRankNDCG{}; });\n\nXGBOOST_REGISTER_OBJECTIVE(LambdaRankPairwise, LambdaRankPairwise::Name())\n    .describe(\"LambdaRank with RankNet loss as objective\")\n    .set_body([]() { return new LambdaRankPairwise{}; });\n\nXGBOOST_REGISTER_OBJECTIVE(LambdaRankMAP, LambdaRankMAP::Name())\n    .describe(\"LambdaRank with MAP loss as objective.\")\n    .set_body([]() { return new LambdaRankMAP{}; });\n\nDMLC_REGISTRY_FILE_TAG(lambdarank_obj);\n}  // namespace xgboost::obj\n"
  },
  {
    "path": "src/objective/lambdarank_obj.cu",
    "content": "/**\n * Copyright 2015-2026, XGBoost contributors\n *\n * \\brief CUDA implementation of lambdarank.\n */\n#include <dmlc/registry.h>                      // for DMLC_REGISTRY_FILE_TAG\n#include <thrust/fill.h>                        // for fill_n\n#include <thrust/for_each.h>                    // for for_each_n\n#include <thrust/iterator/counting_iterator.h>  // for make_counting_iterator\n#include <thrust/iterator/zip_iterator.h>       // for make_zip_iterator\n#include <thrust/tuple.h>                       // for make_tuple (zip_iterator)\n\n#include <algorithm>       // for min\n#include <cassert>         // for assert\n#include <cmath>           // for abs, log2, isinf\n#include <cstddef>         // for size_t\n#include <cstdint>         // for int32_t\n#include <cuda/std/tuple>  // for make_tuple, tuple, get\n#include <memory>          // for shared_ptr\n#include <utility>\n\n#include \"../common/algorithm.cuh\"       // for SegmentedArgSort\n#include \"../common/cuda_context.cuh\"    // for CUDAContext\n#include \"../common/deterministic.cuh\"   // for CreateRoundingFactor, TruncateWithRounding\n#include \"../common/device_helpers.cuh\"  // for SegmentId, TemporaryArray, AtomicAddGpair\n#include \"../common/optional_weight.h\"   // for MakeOptionalWeights\n#include \"../common/ranking_utils.h\"     // for NDCGCache, LambdaRankParam, rel_degree_t\n#include \"lambdarank_obj.cuh\"\n#include \"lambdarank_obj.h\"\n#include \"xgboost/base.h\"                // for bst_group_t, XGBOOST_DEVICE, GradientPair\n#include \"xgboost/context.h\"             // for Context\n#include \"xgboost/data.h\"                // for MetaInfo\n#include \"xgboost/host_device_vector.h\"  // for HostDeviceVector\n#include \"xgboost/linalg.h\"              // for VectorView, Range, Vector\n#include \"xgboost/logging.h\"\n#include \"xgboost/span.h\"  // for Span\n\nnamespace xgboost::obj {\nDMLC_REGISTRY_FILE_TAG(lambdarank_obj_cu);\n\nnamespace cuda_impl {\nnamespace {\n/**\n * \\brief Calculate minimum value of bias for floating point truncation.\n */\nvoid MinBias(Context const* ctx, std::shared_ptr<ltr::RankingCache> p_cache,\n             linalg::VectorView<double const> t_plus, linalg::VectorView<double const> tj_minus,\n             common::Span<double> d_min) {\n  CHECK_EQ(d_min.size(), 2);\n  auto cuctx = ctx->CUDACtx();\n\n  auto k = t_plus.Size();\n  auto const& p = p_cache->Param();\n  CHECK_GT(k, 0);\n  CHECK_EQ(k, p_cache->MaxPositionSize());\n\n  auto key_it = dh::MakeTransformIterator<std::size_t>(\n      thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) { return i * k; });\n  auto val_it = dh::MakeTransformIterator<double>(thrust::make_counting_iterator(0ul),\n                                                  [=] XGBOOST_DEVICE(std::size_t i) {\n                                                    if (i >= k) {\n                                                      return std::abs(tj_minus(i - k));\n                                                    }\n                                                    return std::abs(t_plus(i));\n                                                  });\n  std::size_t bytes;\n  dh::safe_cuda(cub::DeviceSegmentedReduce::Min(nullptr, bytes, val_it, d_min.data(), 2, key_it,\n                                                key_it + 1, cuctx->Stream()));\n  dh::TemporaryArray<char> temp(bytes);\n  dh::safe_cuda(cub::DeviceSegmentedReduce::Min(temp.data().get(), bytes, val_it, d_min.data(), 2,\n                                                key_it, key_it + 1, cuctx->Stream()));\n}\n\n/**\n * \\brief Type for gradient statistic. (Gradient, cost for unbiased LTR, normalization factor)\n */\nusing GradCostNorm = cuda::std::tuple<GradientPair, double, double>;\n\n/**\n * \\brief Obtain and update the gradient for one pair.\n */\ntemplate <bool unbiased, bool has_truncation, bool norm_by_diff, typename Delta>\nstruct GetGradOp {\n  MakePairsOp<has_truncation> make_pair;\n  Delta delta;\n\n  bool const need_update;\n\n  auto __device__ operator()(std::size_t idx) -> GradCostNorm {\n    auto const& args = make_pair.args;\n    auto g = dh::SegmentId(args.d_threads_group_ptr, idx);\n\n    auto data_group_begin = static_cast<std::size_t>(args.d_group_ptr[g]);\n    std::size_t n_data = args.d_group_ptr[g + 1] - data_group_begin;\n    // obtain group segment data.\n    auto g_label = args.labels.Slice(linalg::Range(data_group_begin, data_group_begin + n_data), 0);\n    auto g_predt = args.predts.subspan(data_group_begin, n_data);\n    auto g_gpair = args.gpairs.Slice(linalg::Range(data_group_begin, data_group_begin + n_data));\n    auto g_rank = args.d_sorted_idx.subspan(data_group_begin, n_data);\n\n    auto [i, j] = make_pair(idx, g);\n\n    std::size_t rank_high = i, rank_low = j;\n    if (g_label(g_rank[i]) == g_label(g_rank[j])) {\n      return cuda::std::make_tuple(GradientPair{}, 0.0, 0.0);\n    }\n    if (g_label(g_rank[i]) < g_label(g_rank[j])) {\n      thrust::swap(rank_high, rank_low);\n    }\n\n    double cost{0};\n\n    auto delta_op = [&](auto const&... args) {\n      return delta(args..., g);\n    };\n    GradientPair pg =\n        LambdaGrad<unbiased, norm_by_diff>(g_label, g_predt, g_rank, rank_high, rank_low, delta_op,\n                                           args.ti_plus, args.tj_minus, &cost);\n\n    std::size_t idx_high = g_rank[rank_high];\n    std::size_t idx_low = g_rank[rank_low];\n\n    if (need_update) {\n      // second run, update the gradient\n      auto ng = Repulse(pg);\n\n      auto gr = args.d_roundings(g);\n      // positive gradient truncated\n      auto pgt = GradientPair{common::TruncateWithRounding(gr.GetGrad(), pg.GetGrad()),\n                              common::TruncateWithRounding(gr.GetHess(), pg.GetHess())};\n      // negative gradient truncated\n      auto ngt = GradientPair{common::TruncateWithRounding(gr.GetGrad(), ng.GetGrad()),\n                              common::TruncateWithRounding(gr.GetHess(), ng.GetHess())};\n\n      dh::AtomicAddGpair(&g_gpair(idx_high), pgt);\n      dh::AtomicAddGpair(&g_gpair(idx_low), ngt);\n    }\n\n    if (unbiased && need_update) {\n      // second run, update the cost\n      assert(args.tj_minus.Size() == args.ti_plus.Size() && \"Invalid size of position bias\");\n\n      auto g_li = args.li.Slice(linalg::Range(data_group_begin, data_group_begin + n_data));\n      auto g_lj = args.lj.Slice(linalg::Range(data_group_begin, data_group_begin + n_data));\n\n      if (idx_high < args.ti_plus.Size() && idx_low < args.ti_plus.Size()) {\n        if (args.tj_minus(idx_low) >= Eps64()) {\n          // eq.30\n          atomicAdd(&g_li(idx_high), common::TruncateWithRounding(args.d_cost_rounding[0],\n                                                                  cost / args.tj_minus(idx_low)));\n        }\n        if (args.ti_plus(idx_high) >= Eps64()) {\n          // eq.31\n          atomicAdd(&g_lj(idx_low), common::TruncateWithRounding(args.d_cost_rounding[0],\n                                                                 cost / args.ti_plus(idx_high)));\n        }\n      }\n    }\n\n    return cuda::std::make_tuple(GradientPair{std::abs(pg.GetGrad()), std::abs(pg.GetHess())},\n                                 std::abs(cost), -2.0 * static_cast<double>(pg.GetGrad()));\n  }\n};\n\ntemplate <bool unbiased, bool has_truncation, bool norm_by_diff, typename Delta>\nstruct MakeGetGrad {\n  MakePairsOp<has_truncation> make_pair;\n  Delta delta;\n\n  [[nodiscard]] KernelInputs const& Args() const { return make_pair.args; }\n\n  MakeGetGrad(KernelInputs args, Delta d) : make_pair{args}, delta{std::move(d)} {}\n\n  auto operator()(bool need_update) {\n    return GetGradOp<unbiased, has_truncation, norm_by_diff, Delta>{make_pair, delta, need_update};\n  }\n};\n\n/**\n * \\brief Calculate gradient for all pairs using update op created by make_get_grad.\n *\n * We need to run gradient calculation twice, the first time gathers infomation like\n * maximum gradient, maximum cost, and the normalization term using reduction. The second\n * time performs the actual update.\n *\n * Without normalization, we only need to run it once since we can manually calculate\n * the bounds of gradient (NDCG \\in [0, 1], delta_NDCG \\in [0, 1], ti+/tj- are from the\n * previous iteration so the bound can be calculated for current iteration). However, if\n * normalization is used, the delta score is un-bounded and we need to obtain the sum\n * gradient. As a tradeoff, we simply run the kernel twice, once as reduction, second\n * one as for_each.\n *\n * Alternatively, we can bound the delta score by limiting the output of the model using\n * sigmoid for binary output and some normalization for multi-level. But effect to the\n * accuracy is not known yet, and it's only used by GPU.\n *\n * For performance, the segmented sort for sorted scores is the bottleneck and takes up\n * about half of the time, while the reduction and for_each takes up the second half.\n */\ntemplate <bool unbiased, bool has_truncation, bool norm_by_diff, typename Delta>\nvoid CalcGrad(Context const* ctx, MetaInfo const& info, std::shared_ptr<ltr::RankingCache> p_cache,\n              MakeGetGrad<unbiased, has_truncation, norm_by_diff, Delta> make_get_grad) {\n  auto n_groups = p_cache->Groups();\n  auto d_threads_group_ptr = p_cache->CUDAThreadsGroupPtr();\n  auto d_gptr = p_cache->DataGroupPtr(ctx);\n  auto d_gpair = make_get_grad.Args().gpairs;\n\n  /**\n   * First pass, gather info for normalization and rounding factor.\n   */\n  auto val_it = dh::MakeTransformIterator<GradCostNorm>(thrust::make_counting_iterator(0ul),\n                                                        make_get_grad(false));\n  auto reduction_op = [] XGBOOST_DEVICE(GradCostNorm const& l,\n                                        GradCostNorm const& r) -> GradCostNorm {\n    // get maximum gradient for each group, along with cost and the normalization term\n    auto const& lg = cuda::std::get<0>(l);\n    auto const& rg = cuda::std::get<0>(r);\n    auto grad = std::max(lg.GetGrad(), rg.GetGrad());\n    auto hess = std::max(lg.GetHess(), rg.GetHess());\n    auto cost = std::max(cuda::std::get<1>(l), cuda::std::get<1>(r));\n    double sum_lambda = cuda::std::get<2>(l) + cuda::std::get<2>(r);\n    return cuda::std::make_tuple(GradientPair{grad, hess}, cost, sum_lambda);\n  };\n  auto init = cuda::std::make_tuple(GradientPair{0.0f, 0.0f}, 0.0, 0.0);\n  common::Span<GradCostNorm> d_max_lambdas = p_cache->MaxLambdas<GradCostNorm>(ctx, n_groups);\n  CHECK_EQ(n_groups * sizeof(GradCostNorm), d_max_lambdas.size_bytes());\n  // Reduce by group.\n  std::size_t bytes;\n  dh::safe_cuda(cub::DeviceSegmentedReduce::Reduce(\n      nullptr, bytes, val_it, d_max_lambdas.data(), n_groups, d_threads_group_ptr.data(),\n      d_threads_group_ptr.data() + 1, reduction_op, init, ctx->CUDACtx()->Stream()));\n  dh::TemporaryArray<char> temp(bytes);\n  dh::safe_cuda(cub::DeviceSegmentedReduce::Reduce(\n      temp.data().get(), bytes, val_it, d_max_lambdas.data(), n_groups, d_threads_group_ptr.data(),\n      d_threads_group_ptr.data() + 1, reduction_op, init, ctx->CUDACtx()->Stream()));\n\n  dh::TemporaryArray<double> min_bias(2);\n  auto d_min_bias = dh::ToSpan(min_bias);\n  if (unbiased) {\n    MinBias(ctx, p_cache, make_get_grad.Args().ti_plus, make_get_grad.Args().tj_minus, d_min_bias);\n  }\n  /**\n   * Create rounding factors\n   */\n  auto d_cost_rounding = p_cache->CUDACostRounding(ctx);\n  auto d_rounding = p_cache->CUDARounding(ctx);\n  dh::LaunchN(n_groups, ctx->CUDACtx()->Stream(), [=] XGBOOST_DEVICE(std::size_t g) mutable {\n    auto group_size = d_gptr[g + 1] - d_gptr[g];\n    auto const& max_grad = cuda::std::get<0>(d_max_lambdas[g]);\n    // float group size\n    auto fgs = static_cast<float>(group_size);\n    auto grad = common::CreateRoundingFactor(fgs * max_grad.GetGrad(), group_size);\n    auto hess = common::CreateRoundingFactor(fgs * max_grad.GetHess(), group_size);\n    d_rounding(g) = GradientPair{grad, hess};\n\n    auto cost = cuda::std::get<1>(d_max_lambdas[g]);\n    if (unbiased) {\n      cost /= std::min(d_min_bias[0], d_min_bias[1]);\n      d_cost_rounding[0] = common::CreateRoundingFactor(fgs * cost, group_size);\n    }\n  });\n\n  /**\n   * Second pass, actual update to gradient and bias.\n   */\n  thrust::for_each_n(ctx->CUDACtx()->CTP(), thrust::make_counting_iterator(0ul),\n                     p_cache->CUDAThreads(), make_get_grad(true));\n\n  /**\n   * Lastly, normalization and weight.\n   */\n  auto d_weights = common::MakeOptionalWeights(ctx->Device(), info.weights_);\n  auto w_norm = p_cache->WeightNorm();\n  auto need_norm = p_cache->Param().lambdarank_normalization;\n  auto n_pairs = p_cache->Param().NumPair();\n  bool is_mean = p_cache->Param().IsMean();\n  CHECK_EQ(is_mean, !has_truncation);\n  thrust::for_each_n(ctx->CUDACtx()->CTP(), thrust::make_counting_iterator(0ul), d_gpair.Size(),\n                     [=] XGBOOST_DEVICE(std::size_t i) mutable {\n                       auto g = dh::SegmentId(d_gptr, i);\n                       if (need_norm) {\n                         double norm = 1.0;\n                         if (has_truncation) {\n                           // Normalize using gradient for top-k.\n                           auto sum_lambda = cuda::std::get<2>(d_max_lambdas[g]);\n                           if (sum_lambda > 0.0) {\n                             norm = std::log2(1.0 + sum_lambda) / sum_lambda;\n                           }\n                         } else {\n                           // Normalize using the number of pairs for mean.\n                           double scale = 1.0 / static_cast<double>(n_pairs);\n                           norm = scale;\n                         }\n                         d_gpair(i, 0) *= norm;\n                       }\n\n                       d_gpair(i, 0) *= (d_weights[g] * w_norm);\n                     });\n}\n\n/**\n * @brief Handles boilerplate code like getting device spans.\n */\ntemplate <bool norm_by_diff, typename Delta>\nvoid Launch(Context const* ctx, std::int32_t iter, HostDeviceVector<float> const& preds,\n            const MetaInfo& info, std::shared_ptr<ltr::RankingCache> p_cache, Delta delta,\n            linalg::VectorView<double const> ti_plus,   // input bias ratio\n            linalg::VectorView<double const> tj_minus,  // input bias ratio\n            linalg::VectorView<double> li, linalg::VectorView<double> lj,\n            linalg::Matrix<GradientPair>* out_gpair) {\n  // boilerplate\n  auto device = ctx->Device();\n  dh::safe_cuda(cudaSetDevice(device.ordinal));\n  auto n_groups = p_cache->Groups();\n\n  info.labels.SetDevice(device);\n  preds.SetDevice(device);\n  out_gpair->SetDevice(ctx->Device());\n  out_gpair->Reshape(preds.Size(), 1);\n\n  CHECK(p_cache);\n  auto d_rounding = p_cache->CUDARounding(ctx);\n  auto d_cost_rounding = p_cache->CUDACostRounding(ctx);\n\n  CHECK_NE(d_rounding.Size(), 0);\n\n  auto label = info.labels.View(ctx->Device());\n  auto predts = preds.ConstDeviceSpan();\n  auto gpairs = out_gpair->View(ctx->Device());\n  thrust::fill_n(ctx->CUDACtx()->CTP(), gpairs.Values().data(), gpairs.Size(),\n                 GradientPair{0.0f, 0.0f});\n\n  auto const d_threads_group_ptr = p_cache->CUDAThreadsGroupPtr();\n  auto const d_gptr = p_cache->DataGroupPtr(ctx);\n  auto const rank_idx = p_cache->SortedIdx(ctx, predts);\n\n  auto const unbiased = p_cache->Param().lambdarank_unbiased;\n\n  common::Span<std::size_t const> d_y_sorted_idx;\n  if (!p_cache->Param().HasTruncation()) {\n    d_y_sorted_idx = SortY(ctx, info, rank_idx, p_cache);\n  }\n\n  KernelInputs args{ti_plus,        tj_minus, li,     lj,     d_gptr,     d_threads_group_ptr,\n                    rank_idx,       label,    predts, gpairs, d_rounding, d_cost_rounding.data(),\n                    d_y_sorted_idx, iter};\n\n  // dispatch based on unbiased and truncation\n  if (p_cache->Param().HasTruncation()) {\n    if (unbiased) {\n      CalcGrad(ctx, info, p_cache, MakeGetGrad<true, true, norm_by_diff, Delta>{args, delta});\n    } else {\n      CalcGrad(ctx, info, p_cache, MakeGetGrad<false, true, norm_by_diff, Delta>{args, delta});\n    }\n  } else {\n    if (unbiased) {\n      CalcGrad(ctx, info, p_cache, MakeGetGrad<true, false, norm_by_diff, Delta>{args, delta});\n    } else {\n      CalcGrad(ctx, info, p_cache, MakeGetGrad<false, false, norm_by_diff, Delta>{args, delta});\n    }\n  }\n}\n}  // anonymous namespace\n\ncommon::Span<std::size_t const> SortY(Context const* ctx, MetaInfo const& info,\n                                      common::Span<std::size_t const> d_rank,\n                                      std::shared_ptr<ltr::RankingCache> p_cache) {\n  auto const d_group_ptr = p_cache->DataGroupPtr(ctx);\n  auto label = info.labels.View(ctx->Device());\n  // The buffer for ranked y is necessary as cub segmented sort accepts only pointer.\n  auto d_y_ranked = p_cache->RankedY(ctx, info.num_row_);\n  thrust::for_each_n(ctx->CUDACtx()->CTP(), thrust::make_counting_iterator(0ul), d_y_ranked.size(),\n                     [=] XGBOOST_DEVICE(std::size_t i) {\n                       auto g = dh::SegmentId(d_group_ptr, i);\n                       auto g_label =\n                           label.Slice(linalg::Range(d_group_ptr[g], d_group_ptr[g + 1]), 0);\n                       auto g_rank_idx = d_rank.subspan(d_group_ptr[g], g_label.Size());\n                       i -= d_group_ptr[g];\n                       auto g_y_ranked = d_y_ranked.subspan(d_group_ptr[g], g_label.Size());\n                       g_y_ranked[i] = g_label(g_rank_idx[i]);\n                     });\n  auto d_y_sorted_idx = p_cache->SortedIdxY(ctx, info.num_row_);\n  common::SegmentedArgSort<false, true>(ctx, d_y_ranked, d_group_ptr, d_y_sorted_idx);\n  return d_y_sorted_idx;\n}\n\nvoid LambdaRankGetGradientNDCG(Context const* ctx, std::int32_t iter,\n                               const HostDeviceVector<float>& preds, const MetaInfo& info,\n                               std::shared_ptr<ltr::NDCGCache> p_cache,\n                               linalg::VectorView<double const> ti_plus,   // input bias ratio\n                               linalg::VectorView<double const> tj_minus,  // input bias ratio\n                               linalg::VectorView<double> li, linalg::VectorView<double> lj,\n                               linalg::Matrix<GradientPair>* out_gpair) {\n  // boilerplate\n  auto device = ctx->Device();\n  dh::safe_cuda(cudaSetDevice(device.ordinal));\n  auto const d_inv_IDCG = p_cache->InvIDCG(ctx);\n  auto const discount = p_cache->Discount(ctx);\n\n  info.labels.SetDevice(device);\n  preds.SetDevice(device);\n\n  auto const exp_gain = p_cache->Param().ndcg_exp_gain;\n  auto delta_ndcg = [=] XGBOOST_DEVICE(float y_high, float y_low, std::size_t rank_high,\n                                       std::size_t rank_low, bst_group_t g) {\n    return exp_gain ? DeltaNDCG<true>(y_high, y_low, rank_high, rank_low, d_inv_IDCG(g), discount)\n                    : DeltaNDCG<false>(y_high, y_low, rank_high, rank_low, d_inv_IDCG(g), discount);\n  };\n  if (p_cache->Param().lambdarank_score_normalization) {\n    Launch<true>(ctx, iter, preds, info, p_cache, delta_ndcg, ti_plus, tj_minus, li, lj, out_gpair);\n  } else {\n    Launch<false>(ctx, iter, preds, info, p_cache, delta_ndcg, ti_plus, tj_minus, li, lj,\n                  out_gpair);\n  }\n}\n\nvoid MAPStat(Context const* ctx, MetaInfo const& info, common::Span<std::size_t const> d_rank_idx,\n             std::shared_ptr<ltr::MAPCache> p_cache) {\n  common::Span<double> out_n_rel = p_cache->NumRelevant(ctx);\n  common::Span<double> out_acc = p_cache->Acc(ctx);\n\n  CHECK_EQ(out_n_rel.size(), info.num_row_);\n  CHECK_EQ(out_acc.size(), info.num_row_);\n\n  auto group_ptr = p_cache->DataGroupPtr(ctx);\n  auto key_it = dh::MakeTransformIterator<std::size_t>(\n      thrust::make_counting_iterator(0ul),\n      [=] XGBOOST_DEVICE(std::size_t i) -> std::size_t { return dh::SegmentId(group_ptr, i); });\n  auto label = info.labels.View(ctx->Device()).Slice(linalg::All(), 0);\n  auto const* cuctx = ctx->CUDACtx();\n\n  {\n    // calculate number of relevant documents\n    auto val_it = dh::MakeTransformIterator<double>(\n        thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) -> double {\n          auto g = dh::SegmentId(group_ptr, i);\n          auto g_label = label.Slice(linalg::Range(group_ptr[g], group_ptr[g + 1]));\n          auto idx_in_group = i - group_ptr[g];\n          auto g_sorted_idx = d_rank_idx.subspan(group_ptr[g], group_ptr[g + 1] - group_ptr[g]);\n          return static_cast<double>(g_label(g_sorted_idx[idx_in_group]));\n        });\n    thrust::inclusive_scan_by_key(cuctx->CTP(), key_it, key_it + info.num_row_, val_it,\n                                  out_n_rel.data());\n  }\n  {\n    // \\sum l_k/k\n    auto val_it = dh::MakeTransformIterator<double>(\n        thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) -> double {\n          auto g = dh::SegmentId(group_ptr, i);\n          auto g_label = label.Slice(linalg::Range(group_ptr[g], group_ptr[g + 1]));\n          auto g_sorted_idx = d_rank_idx.subspan(group_ptr[g], group_ptr[g + 1] - group_ptr[g]);\n          auto idx_in_group = i - group_ptr[g];\n          double rank_in_group = idx_in_group + 1.0;\n          return static_cast<double>(g_label(g_sorted_idx[idx_in_group])) / rank_in_group;\n        });\n    thrust::inclusive_scan_by_key(cuctx->CTP(), key_it, key_it + info.num_row_, val_it,\n                                  out_acc.data());\n  }\n}\n\nvoid LambdaRankGetGradientMAP(Context const* ctx, std::int32_t iter,\n                              HostDeviceVector<float> const& predt, const MetaInfo& info,\n                              std::shared_ptr<ltr::MAPCache> p_cache,\n                              linalg::VectorView<double const> ti_plus,   // input bias ratio\n                              linalg::VectorView<double const> tj_minus,  // input bias ratio\n                              linalg::VectorView<double> li, linalg::VectorView<double> lj,\n                              linalg::Matrix<GradientPair>* out_gpair) {\n  auto device = ctx->Device();\n  dh::safe_cuda(cudaSetDevice(device.ordinal));\n\n  info.labels.SetDevice(device);\n  predt.SetDevice(device);\n\n  CHECK(p_cache);\n\n  auto d_predt = predt.ConstDeviceSpan();\n  auto const d_sorted_idx = p_cache->SortedIdx(ctx, d_predt);\n\n  MAPStat(ctx, info, d_sorted_idx, p_cache);\n  auto d_n_rel = p_cache->NumRelevant(ctx);\n  auto d_acc = p_cache->Acc(ctx);\n  auto d_gptr = p_cache->DataGroupPtr(ctx).data();\n\n  auto delta_map = [=] XGBOOST_DEVICE(float y_high, float y_low, std::size_t rank_high,\n                                      std::size_t rank_low, bst_group_t g) {\n    if (rank_high > rank_low) {\n      thrust::swap(rank_high, rank_low);\n      thrust::swap(y_high, y_low);\n    }\n    auto cnt = d_gptr[g + 1] - d_gptr[g];\n    auto g_n_rel = d_n_rel.subspan(d_gptr[g], cnt);\n    auto g_acc = d_acc.subspan(d_gptr[g], cnt);\n    auto d = DeltaMAP(y_high, y_low, rank_high, rank_low, g_n_rel, g_acc);\n    return d;\n  };\n  if (p_cache->Param().lambdarank_score_normalization) {\n    Launch<true>(ctx, iter, predt, info, p_cache, delta_map, ti_plus, tj_minus, li, lj, out_gpair);\n  } else {\n    Launch<false>(ctx, iter, predt, info, p_cache, delta_map, ti_plus, tj_minus, li, lj, out_gpair);\n  }\n}\n\nvoid LambdaRankGetGradientPairwise(Context const* ctx, std::int32_t iter,\n                                   HostDeviceVector<float> const& predt, const MetaInfo& info,\n                                   std::shared_ptr<ltr::RankingCache> p_cache,\n                                   linalg::VectorView<double const> ti_plus,   // input bias ratio\n                                   linalg::VectorView<double const> tj_minus,  // input bias ratio\n                                   linalg::VectorView<double> li, linalg::VectorView<double> lj,\n                                   linalg::Matrix<GradientPair>* out_gpair) {\n  auto device = ctx->Device();\n  dh::safe_cuda(cudaSetDevice(device.ordinal));\n\n  info.labels.SetDevice(device);\n  predt.SetDevice(device);\n\n  auto delta = [] XGBOOST_DEVICE(float, float, std::size_t, std::size_t, bst_group_t) {\n    return 1.0;\n  };\n\n  if (p_cache->Param().lambdarank_score_normalization) {\n    Launch<true>(ctx, iter, predt, info, p_cache, delta, ti_plus, tj_minus, li, lj, out_gpair);\n  } else {\n    Launch<false>(ctx, iter, predt, info, p_cache, delta, ti_plus, tj_minus, li, lj, out_gpair);\n  }\n}\n\nnamespace {\nstruct ReduceOp {\n  template <typename Tup>\n  Tup XGBOOST_DEVICE operator()(Tup const& l, Tup const& r) {\n    return thrust::make_tuple(thrust::get<0>(l) + thrust::get<0>(r),\n                              thrust::get<1>(l) + thrust::get<1>(r));\n  }\n};\n}  // namespace\n\nvoid LambdaRankUpdatePositionBias(Context const* ctx, linalg::VectorView<double const> li_full,\n                                  linalg::VectorView<double const> lj_full,\n                                  linalg::Vector<double>* p_ti_plus,\n                                  linalg::Vector<double>* p_tj_minus,\n                                  linalg::Vector<double>* p_li,  // loss\n                                  linalg::Vector<double>* p_lj,\n                                  std::shared_ptr<ltr::RankingCache> p_cache) {\n  auto const d_group_ptr = p_cache->DataGroupPtr(ctx);\n  auto n_groups = d_group_ptr.size() - 1;\n\n  auto ti_plus = p_ti_plus->View(ctx->Device());\n  auto tj_minus = p_tj_minus->View(ctx->Device());\n\n  auto li = p_li->View(ctx->Device());\n  auto lj = p_lj->View(ctx->Device());\n  CHECK_EQ(li.Size(), ti_plus.Size());\n\n  auto const& param = p_cache->Param();\n  auto regularizer = param.Regularizer();\n  std::size_t k = p_cache->MaxPositionSize();\n\n  CHECK_EQ(li.Size(), k);\n  CHECK_EQ(lj.Size(), k);\n  // reduce li_full to li for each group.\n  auto make_iter = [&](linalg::VectorView<double const> l_full) {\n    auto l_it = [=] XGBOOST_DEVICE(std::size_t i) {\n      // group index\n      auto g = i % n_groups;\n      // rank is the position within a group, also the segment index\n      auto r = i / n_groups;\n\n      auto begin = d_group_ptr[g];\n      std::size_t group_size = d_group_ptr[g + 1] - begin;\n      auto n = std::min(group_size, k);\n      // r can be greater than n since we allocate threads based on truncation level\n      // instead of actual group size.\n      if (r >= n) {\n        return 0.0;\n      }\n      return l_full(r + begin);\n    };\n    return l_it;\n  };\n  auto li_it =\n      dh::MakeTransformIterator<double>(thrust::make_counting_iterator(0ul), make_iter(li_full));\n  auto lj_it =\n      dh::MakeTransformIterator<double>(thrust::make_counting_iterator(0ul), make_iter(lj_full));\n  // k segments, each segment has size n_groups.\n  auto key_it = dh::MakeTransformIterator<std::size_t>(\n      thrust::make_counting_iterator(0ul),\n      [=] XGBOOST_DEVICE(std::size_t i) { return i * n_groups; });\n  auto val_it = thrust::make_zip_iterator(thrust::make_tuple(li_it, lj_it));\n  auto out_it =\n      thrust::make_zip_iterator(thrust::make_tuple(li.Values().data(), lj.Values().data()));\n\n  auto init = thrust::make_tuple(0.0, 0.0);\n  std::size_t bytes;\n  dh::safe_cuda(cub::DeviceSegmentedReduce::Reduce(nullptr, bytes, val_it, out_it, k, key_it,\n                                                   key_it + 1, ReduceOp{}, init,\n                                                   ctx->CUDACtx()->Stream()));\n  dh::TemporaryArray<char> temp(bytes);\n  dh::safe_cuda(cub::DeviceSegmentedReduce::Reduce(temp.data().get(), bytes, val_it, out_it, k,\n                                                   key_it, key_it + 1, ReduceOp{}, init,\n                                                   ctx->CUDACtx()->Stream()));\n\n  thrust::for_each_n(ctx->CUDACtx()->CTP(), thrust::make_counting_iterator(0ul), li.Size(),\n                     [=] XGBOOST_DEVICE(std::size_t i) mutable {\n                       if (li(0) >= Eps64()) {\n                         ti_plus(i) = std::pow(li(i) / li(0), regularizer);\n                       }\n                       if (lj(0) >= Eps64()) {\n                         tj_minus(i) = std::pow(lj(i) / lj(0), regularizer);\n                       }\n                       assert(!isinf(ti_plus(i)));\n                       assert(!isinf(tj_minus(i)));\n                     });\n}\n}  // namespace cuda_impl\n}  // namespace xgboost::obj\n"
  },
  {
    "path": "src/objective/lambdarank_obj.cuh",
    "content": "/**\n * Copyright 2023-2024, XGBoost contributors\n */\n#ifndef XGBOOST_OBJECTIVE_LAMBDARANK_OBJ_CUH_\n#define XGBOOST_OBJECTIVE_LAMBDARANK_OBJ_CUH_\n\n#include <thrust/binary_search.h>                      // for lower_bound, upper_bound\n#include <thrust/functional.h>                         // for greater\n#include <thrust/iterator/counting_iterator.h>         // for make_counting_iterator\n#include <thrust/random/linear_congruential_engine.h>  // for minstd_rand\n#include <thrust/random/uniform_int_distribution.h>    // for uniform_int_distribution\n\n#include <cassert>                                     // for cassert\n#include <cstddef>                                     // for size_t\n#include <cstdint>                                     // for int32_t\n#include <tuple>                                       // for make_tuple, tuple\n\n#include \"../common/device_helpers.cuh\"                // for MakeTransformIterator\n#include \"../common/ranking_utils.cuh\"                 // for PairsForGroup\n#include \"../common/ranking_utils.h\"                   // for RankingCache\n#include \"../common/threading_utils.cuh\"               // for UnravelTrapeziodIdx\n#include \"xgboost/base.h\"    // for bst_group_t, GradientPair, XGBOOST_DEVICE\n#include \"xgboost/data.h\"    // for MetaInfo\n#include \"xgboost/linalg.h\"  // for VectorView, Range, UnravelIndex\n#include \"xgboost/span.h\"    // for Span\n\nnamespace xgboost::obj::cuda_impl {\n/**\n * \\brief Find number of elements left to the label bucket\n */\ntemplate <typename It, typename T = typename std::iterator_traits<It>::value_type>\nXGBOOST_DEVICE __forceinline__ std::size_t CountNumItemsToTheLeftOf(It items, std::size_t n, T v) {\n  return thrust::lower_bound(thrust::seq, items, items + n, v, thrust::greater<T>{}) - items;\n}\n/**\n * \\brief Find number of elements right to the label bucket\n */\ntemplate <typename It, typename T = typename std::iterator_traits<It>::value_type>\nXGBOOST_DEVICE __forceinline__ std::size_t CountNumItemsToTheRightOf(It items, std::size_t n, T v) {\n  return n - (thrust::upper_bound(thrust::seq, items, items + n, v, thrust::greater<T>{}) - items);\n}\n/**\n * \\brief Sort labels according to rank list for making pairs.\n */\ncommon::Span<std::size_t const> SortY(Context const *ctx, MetaInfo const &info,\n                                      common::Span<std::size_t const> d_rank,\n                                      std::shared_ptr<ltr::RankingCache> p_cache);\n\n/**\n * \\brief Parameters needed for calculating gradient\n */\nstruct KernelInputs {\n  linalg::VectorView<double const> ti_plus;   // input bias ratio\n  linalg::VectorView<double const> tj_minus;  // input bias ratio\n  linalg::VectorView<double> li;\n  linalg::VectorView<double> lj;\n\n  common::Span<bst_group_t const> d_group_ptr;\n  common::Span<std::size_t const> d_threads_group_ptr;\n  common::Span<std::size_t const> d_sorted_idx;\n\n  linalg::MatrixView<float const> labels;\n  common::Span<float const> predts;\n  linalg::MatrixView<GradientPair> gpairs;\n\n  linalg::VectorView<GradientPair const> d_roundings;\n  double const *d_cost_rounding;\n\n  common::Span<std::size_t const> d_y_sorted_idx;\n\n  std::int32_t iter;\n};\n/**\n * @brief Functor for generating pairs\n */\ntemplate <bool has_truncation>\nstruct MakePairsOp {\n  KernelInputs args;\n  /**\n   * @brief Make pair for the topk pair method.\n   */\n  [[nodiscard]] XGBOOST_DEVICE std::tuple<std::size_t, std::size_t> WithTruncation(\n      std::size_t idx, bst_group_t g) const {\n    auto thread_group_begin = args.d_threads_group_ptr[g];\n    auto idx_in_thread_group = idx - thread_group_begin;\n\n    auto data_group_begin = static_cast<std::size_t>(args.d_group_ptr[g]);\n    std::size_t n_data = args.d_group_ptr[g + 1] - data_group_begin;\n\n    std::size_t i = 0, j = 0;\n    common::UnravelTrapeziodIdx(idx_in_thread_group, n_data, &i, &j);\n\n    std::size_t rank_high = i, rank_low = j;\n    return std::make_tuple(rank_high, rank_low);\n  }\n  /**\n   * @brief Make pair for the mean pair method\n   */\n  XGBOOST_DEVICE std::tuple<std::size_t, std::size_t> WithSampling(std::size_t idx,\n                                                                   bst_group_t g) const {\n    std::size_t n_samples = args.labels.Size();\n    assert(n_samples == args.predts.size());\n    // Constructed from ranking cache.\n    std::size_t n_pairs =\n        ltr::cuda_impl::PairsForGroup(args.d_threads_group_ptr[g + 1] - args.d_threads_group_ptr[g],\n                                      args.d_group_ptr[g + 1] - args.d_group_ptr[g]);\n\n    assert(n_pairs > 0);\n    auto [sample_idx, sample_pair_idx] = linalg::UnravelIndex(idx, {n_samples, n_pairs});\n\n    auto g_begin = static_cast<std::size_t>(args.d_group_ptr[g]);\n    std::size_t n_data = args.d_group_ptr[g + 1] - g_begin;\n\n    auto g_label = args.labels.Slice(linalg::Range(g_begin, g_begin + n_data));\n    auto g_rank_idx = args.d_sorted_idx.subspan(args.d_group_ptr[g], n_data);\n    auto g_y_sorted_idx = args.d_y_sorted_idx.subspan(g_begin, n_data);\n\n    std::size_t const i = sample_idx - g_begin;\n    assert(sample_pair_idx < n_samples);\n    assert(i <= sample_idx);\n\n    auto g_sorted_label = dh::MakeTransformIterator<float>(\n        thrust::make_counting_iterator(0ul),\n        [&](std::size_t i) { return g_label(g_rank_idx[g_y_sorted_idx[i]]); });\n\n    // Are the labels diverse enough? If they are all the same, then there is nothing to pick\n    // from another group - bail sooner\n    if (g_label.Size() == 0 || g_sorted_label[0] == g_sorted_label[n_data - 1]) {\n      auto z = static_cast<std::size_t>(0ul);\n      return std::make_tuple(z, z);\n    }\n\n    std::size_t n_lefts = CountNumItemsToTheLeftOf(g_sorted_label, i + 1, g_sorted_label[i]);\n    std::size_t n_rights =\n        CountNumItemsToTheRightOf(g_sorted_label + i, n_data - i, g_sorted_label[i]);\n    // The index pointing to the first element of the next bucket\n    std::size_t right_bound = n_data - n_rights;\n\n    std::uint32_t seed = args.iter * (static_cast<std::uint32_t>(args.d_group_ptr.size()) - 1) + g;\n    thrust::minstd_rand rng(seed);\n    auto pair_idx = i;\n    rng.discard(idx - args.d_threads_group_ptr[g]);  // idx within group\n    thrust::uniform_int_distribution<std::size_t> dist(0, n_lefts + n_rights - 1);\n    auto ridx = dist(rng);\n    SPAN_CHECK(ridx < n_lefts + n_rights);\n    if (ridx >= n_lefts) {\n      ridx = ridx - n_lefts + right_bound;  // fixme\n    }\n\n    auto idx0 = g_y_sorted_idx[pair_idx];\n    auto idx1 = g_y_sorted_idx[ridx];\n\n    return std::make_tuple(idx0, idx1);\n  }\n  /**\n   * \\brief Generate a single pair.\n   *\n   * \\param idx Pair index (CUDA thread index).\n   * \\param g   Query group index.\n   */\n  XGBOOST_DEVICE auto operator()(std::size_t idx, bst_group_t g) const {\n    if (has_truncation) {\n      return this->WithTruncation(idx, g);\n    } else {\n      return this->WithSampling(idx, g);\n    }\n  }\n};\n}  // namespace xgboost::obj::cuda_impl\n#endif  // XGBOOST_OBJECTIVE_LAMBDARANK_OBJ_CUH_\n"
  },
  {
    "path": "src/objective/lambdarank_obj.h",
    "content": "/**\n * Copyright 2023-2025, XGBoost contributors\n *\n * Vocabulary explanation:\n *\n * There are two different lists we need to handle in the objective, first is the list of\n * labels (relevance degree) provided by the user. Its order has no particular meaning\n * when bias estimation is NOT used. Another one is generated by our model, sorted index\n * based on prediction scores. `rank_high` refers to the position index of the model rank\n * list that is higher than `rank_low`, while `idx_high` refers to where does the\n * `rank_high` sample comes from. Simply put, `rank_high` indexes into the rank list\n * obtained from the model, while `idx_high` indexes into the user provided sample list.\n */\n#ifndef XGBOOST_OBJECTIVE_LAMBDARANK_OBJ_H_\n#define XGBOOST_OBJECTIVE_LAMBDARANK_OBJ_H_\n#include <algorithm>   // for min, max\n#include <cassert>     // for assert\n#include <cmath>       // for log, abs\n#include <cstddef>     // for size_t\n#include <functional>  // for greater\n#include <memory>      // for shared_ptr\n#include <random>      // for minstd_rand, uniform_int_distribution\n#include <vector>      // for vector\n\n#include \"../common/algorithm.h\"           // for ArgSort\n#include \"../common/math.h\"                // for Sigmoid\n#include \"../common/ranking_utils.h\"       // for CalcDCGGain\n#include \"../common/transform_iterator.h\"  // for MakeIndexTransformIter\n#include \"xgboost/base.h\"                  // for GradientPair, XGBOOST_DEVICE, kRtEps\n#include \"xgboost/context.h\"               // for Context\n#include \"xgboost/data.h\"                  // for MetaInfo\n#include \"xgboost/host_device_vector.h\"    // for HostDeviceVector\n#include \"xgboost/linalg.h\"                // for VectorView, Vector\n#include \"xgboost/logging.h\"               // for CHECK_EQ\n#include \"xgboost/span.h\"                  // for Span\n\nnamespace xgboost::obj {\ndouble constexpr Eps64() { return 1e-16; }\n\ntemplate <bool exp>\nXGBOOST_DEVICE double DeltaNDCG(float y_high, float y_low, std::size_t rank_high,\n                                std::size_t rank_low, double inv_IDCG,\n                                common::Span<double const> discount) {\n  // Use rank_high instead of idx_high as we are calculating discount based on ranks\n  // provided by the model.\n  double gain_high = exp ? ltr::CalcDCGGain(y_high) : y_high;\n  double discount_high = discount[rank_high];\n\n  double gain_low = exp ? ltr::CalcDCGGain(y_low) : y_low;\n  double discount_low = discount[rank_low];\n\n  double original = gain_high * discount_high + gain_low * discount_low;\n  double changed = gain_low * discount_high + gain_high * discount_low;\n\n  double delta_NDCG = (original - changed) * inv_IDCG;\n  assert(delta_NDCG >= -1.0);\n  assert(delta_NDCG <= 1.0);\n  return delta_NDCG;\n}\n\nXGBOOST_DEVICE inline double DeltaMAP(float y_high, float y_low, std::size_t rank_high,\n                                      std::size_t rank_low, common::Span<double const> n_rel,\n                                      common::Span<double const> acc) {\n  double r_h = static_cast<double>(rank_high) + 1.0;\n  double r_l = static_cast<double>(rank_low) + 1.0;\n  double delta{0.0};\n  double n_total_relevances = n_rel.back();\n  assert(n_total_relevances > 0.0);\n  auto m = n_rel[rank_low];\n  double n = n_rel[rank_high];\n\n  if (y_high < y_low) {\n    auto a = m / r_l - (n + 1.0) / r_h;\n    auto b = acc[rank_low - 1] - acc[rank_high];\n    delta = (a - b) / n_total_relevances;\n  } else {\n    auto a = n / r_h - m / r_l;\n    auto b = acc[rank_low - 1] - acc[rank_high];\n    delta = (a + b) / n_total_relevances;\n  }\n  return delta;\n}\n/**\n * @brief Calculate lambda gradient based on delta weight.\n *\n * @tparam unbiased Whether positioin bias is taken into account.\n * @tparam norm_by_diff Do we need to normalize the delta metric using the score difference.\n * @tparam Functor for calculating the delta weight.\n */\ntemplate <bool unbiased, bool norm_by_diff, typename Delta>\nXGBOOST_DEVICE GradientPair\nLambdaGrad(linalg::VectorView<float const> labels, common::Span<float const> predts,\n           common::Span<size_t const> sorted_idx,\n           std::size_t rank_high,                     // higher index on the model rank list\n           std::size_t rank_low,                      // lower index on the model rank list\n           Delta delta,                               // function to calculate delta score\n           linalg::VectorView<double const> t_plus,   // input bias ratio\n           linalg::VectorView<double const> t_minus,  // input bias ratio\n           double* p_cost) {\n  assert(sorted_idx.size() > 0 && \"Empty sorted idx for a group.\");\n  std::size_t idx_high = sorted_idx[rank_high];\n  std::size_t idx_low = sorted_idx[rank_low];\n\n  if (labels(idx_high) == labels(idx_low)) {\n    *p_cost = 0;\n    return {0.0f, 0.0f};\n  }\n\n  auto best_score = predts[sorted_idx.front()];\n  auto worst_score = predts[sorted_idx.back()];\n\n  auto y_high = labels(idx_high);\n  float s_high = predts[idx_high];\n  auto y_low = labels(idx_low);\n  float s_low = predts[idx_low];\n\n  // Use double whenever possible as we are working on the exp space.\n  double delta_score = std::abs(s_high - s_low);\n  double const sigmoid = common::Sigmoid(s_high - s_low);\n  // Change in metric score like \\delta NDCG or \\delta MAP\n  double delta_metric = std::abs(delta(y_high, y_low, rank_high, rank_low));\n\n  if (norm_by_diff && best_score != worst_score) {\n    delta_metric /= (delta_score + 0.01);\n  }\n\n  if (unbiased) {\n    *p_cost = std::log(1.0 / (1.0 - sigmoid)) * delta_metric;\n  }\n\n  auto lambda_ij = (sigmoid - 1.0) * delta_metric;\n  auto hessian_ij = std::max(sigmoid * (1.0 - sigmoid), Eps64()) * delta_metric * 2.0;\n\n  auto k = t_plus.Size();\n  assert(t_minus.Size() == k && \"Invalid size of position bias\");\n\n  // We need to skip samples that exceed the maximum number of tracked positions, and\n  // samples that have low probability and might bring us floating point issues.\n  if (unbiased && idx_high < k && idx_low < k && t_minus(idx_low) >= Eps64() &&\n      t_plus(idx_high) >= Eps64()) {\n    // The index should be ranks[idx_low], since we assume label is sorted, this reduces\n    // to `idx_low`, which represents the position on the input list, as explained in the\n    // file header.\n    lambda_ij /= (t_plus(idx_high) * t_minus(idx_low));\n    hessian_ij /= (t_plus(idx_high) * t_minus(idx_low));\n  }\n  auto pg = GradientPair{static_cast<float>(lambda_ij), static_cast<float>(hessian_ij)};\n  return pg;\n}\n\nXGBOOST_DEVICE inline GradientPair Repulse(GradientPair pg) {\n  auto ng = GradientPair{-pg.GetGrad(), pg.GetHess()};\n  return ng;\n}\n\nnamespace cuda_impl {\nvoid LambdaRankGetGradientNDCG(Context const* ctx, std::int32_t iter,\n                               HostDeviceVector<float> const& preds, MetaInfo const& info,\n                               std::shared_ptr<ltr::NDCGCache> p_cache,\n                               linalg::VectorView<double const> t_plus,   // input bias ratio\n                               linalg::VectorView<double const> t_minus,  // input bias ratio\n                               linalg::VectorView<double> li, linalg::VectorView<double> lj,\n                               linalg::Matrix<GradientPair>* out_gpair);\n\n/**\n * \\brief Generate statistic for MAP used for calculating \\Delta Z in lambda mart.\n */\nvoid MAPStat(Context const* ctx, MetaInfo const& info, common::Span<std::size_t const> d_rank_idx,\n             std::shared_ptr<ltr::MAPCache> p_cache);\n\nvoid LambdaRankGetGradientMAP(Context const* ctx, std::int32_t iter,\n                              HostDeviceVector<float> const& predt, MetaInfo const& info,\n                              std::shared_ptr<ltr::MAPCache> p_cache,\n                              linalg::VectorView<double const> t_plus,   // input bias ratio\n                              linalg::VectorView<double const> t_minus,  // input bias ratio\n                              linalg::VectorView<double> li, linalg::VectorView<double> lj,\n                              linalg::Matrix<GradientPair>* out_gpair);\n\nvoid LambdaRankGetGradientPairwise(Context const* ctx, std::int32_t iter,\n                                   HostDeviceVector<float> const& predt, const MetaInfo& info,\n                                   std::shared_ptr<ltr::RankingCache> p_cache,\n                                   linalg::VectorView<double const> ti_plus,   // input bias ratio\n                                   linalg::VectorView<double const> tj_minus,  // input bias ratio\n                                   linalg::VectorView<double> li, linalg::VectorView<double> lj,\n                                   linalg::Matrix<GradientPair>* out_gpair);\n\nvoid LambdaRankUpdatePositionBias(Context const* ctx, linalg::VectorView<double const> li_full,\n                                  linalg::VectorView<double const> lj_full,\n                                  linalg::Vector<double>* p_ti_plus,\n                                  linalg::Vector<double>* p_tj_minus, linalg::Vector<double>* p_li,\n                                  linalg::Vector<double>* p_lj,\n                                  std::shared_ptr<ltr::RankingCache> p_cache);\n}  // namespace cuda_impl\n\nnamespace cpu_impl {\n/**\n * \\brief Generate statistic for MAP used for calculating \\Delta Z in lambda mart.\n *\n * \\param label    Ground truth relevance label.\n * \\param rank_idx Sorted index of prediction.\n * \\param p_cache  An initialized MAPCache.\n */\nvoid MAPStat(Context const* ctx, linalg::VectorView<float const> label,\n             common::Span<std::size_t const> rank_idx, std::shared_ptr<ltr::MAPCache> p_cache);\n}  // namespace cpu_impl\n\n/**\n * \\param Construct pairs on CPU\n *\n * \\tparam Op Functor for upgrading a pair of gradients.\n *\n * \\param ctx     The global context.\n * \\param iter    The boosting iteration.\n * \\param cache   ltr cache.\n * \\param g       The current query group\n * \\param g_label label The labels for the current query group\n * \\param g_rank  Sorted index of model scores for the current query group.\n * \\param op      A callable that accepts two index for a pair of documents. The index is for\n *                the ranked list (labels sorted according to model scores).\n */\ntemplate <typename Op>\nvoid MakePairs(Context const* ctx, std::int32_t iter,\n               std::shared_ptr<ltr::RankingCache> const cache, bst_group_t g,\n               linalg::VectorView<float const> g_label, common::Span<std::size_t const> g_rank,\n               Op op) {\n  auto group_ptr = cache->DataGroupPtr(ctx);\n  ltr::position_t cnt = group_ptr[g + 1] - group_ptr[g];\n\n  if (cache->Param().HasTruncation()) {\n    for (std::size_t i = 0, n = std::min(cnt, cache->Param().NumPair()); i < n; ++i) {\n      for (std::size_t j = i + 1; j < cnt; ++j) {\n        op(i, j);\n      }\n    }\n  } else {\n    CHECK_EQ(g_rank.size(), g_label.Size());\n\n    std::uint32_t seed = (iter + 1) * (static_cast<std::uint32_t>(group_ptr.size()) - 1) + g;\n    std::minstd_rand rnd(seed);\n    // sort label according to the rank list\n    auto it = common::MakeIndexTransformIter(\n        [&g_rank, &g_label](std::size_t idx) { return g_label(g_rank[idx]); });\n    std::vector<std::size_t> y_sorted_idx =\n        common::ArgSort<std::size_t>(ctx, it, it + cnt, std::greater<>{});\n    // permutation iterator to get the original label\n    auto rev_it = common::MakeIndexTransformIter(\n        [&](std::size_t idx) { return g_label(g_rank[y_sorted_idx[idx]]); });\n    for (std::size_t i = 0; i < cnt;) {\n      std::size_t j = i + 1;\n      // find the bucket boundary\n      while (j < cnt && rev_it[i] == rev_it[j]) {\n        ++j;\n      }\n      // Bucket [i,j), construct n_samples pairs for each sample inside the bucket with\n      // another sample outside the bucket.\n      //\n      // n elements left to the bucket, and n elements right to the bucket\n      std::size_t n_lefts = i, n_rights = static_cast<std::size_t>(cnt - j);\n      if (n_lefts + n_rights == 0) {\n        i = j;\n        continue;\n      }\n\n      auto n_samples = cache->Param().NumPair();\n      // for each pair specifed by the user\n      while (n_samples--) {\n        // for each sample in the bucket\n        for (std::size_t pair_idx = i; pair_idx < j; ++pair_idx) {\n          std::size_t ridx = std::uniform_int_distribution<std::size_t>(\n              static_cast<std::size_t>(0), n_lefts + n_rights - 1)(rnd);\n          if (ridx >= n_lefts) {\n            ridx = ridx - i + j;  // shift to the right of the bucket\n          }\n          // index that points to the rank list.\n          auto idx0 = y_sorted_idx[pair_idx];\n          auto idx1 = y_sorted_idx[ridx];\n          op(idx0, idx1);\n        }\n      }\n      i = j;\n    }\n  }\n}\n}  // namespace xgboost::obj\n#endif  // XGBOOST_OBJECTIVE_LAMBDARANK_OBJ_H_\n"
  },
  {
    "path": "src/objective/multiclass_obj.cc",
    "content": "/*!\n * Copyright 2018 XGBoost contributors\n */\n\n// Dummy file to keep the CUDA conditional compile trick.\n\n#include <dmlc/registry.h>\nnamespace xgboost {\nnamespace obj {\n\nDMLC_REGISTRY_FILE_TAG(multiclass_obj);\n\n}  // namespace obj\n}  // namespace xgboost\n\n#ifndef XGBOOST_USE_CUDA\n#include \"multiclass_obj.cu\"\n#endif  // XGBOOST_USE_CUDA\n"
  },
  {
    "path": "src/objective/multiclass_obj.cu",
    "content": "/**\n * Copyright 2015-2025, XGBoost Contributors\n * \\file multi_class.cc\n * \\brief Definition of multi-class classification objectives.\n * \\author Tianqi Chen\n */\n#include <dmlc/omp.h>\n\n#include <cassert>  // for assert\n#include <limits>\n\n#include \"../collective/aggregator.h\"  // for GlobalSum\n#include \"../common/common.h\"          // for AssertGPUSupport\n#include \"../common/linalg_op.h\"\n#include \"../common/math.h\"\n#include \"../common/optional_weight.h\"  // for MakeOptionalWeights\n#include \"../common/stats.h\"            // for Mean\n#include \"../common/transform.h\"\n#include \"xgboost/data.h\"\n#include \"xgboost/json.h\"\n#include \"xgboost/logging.h\"\n#include \"xgboost/objective.h\"\n\n#if defined(XGBOOST_USE_CUDA)\n\n#include \"../common/algorithm.cuh\"     // for AllOf\n#include \"../common/cuda_context.cuh\"  // for CUDAContext\n\n#endif  // defined(XGBOOST_USE_CUDA)\n\n#include \"multiclass_param.h\"\n\nnamespace xgboost::obj {\n#if defined(XGBOOST_USE_CUDA)\nDMLC_REGISTRY_FILE_TAG(multiclass_obj_gpu);\n#endif  // defined(XGBOOST_USE_CUDA)\n\nnamespace {\nvoid ValidateLabel(Context const* ctx, MetaInfo const& info, std::int64_t n_classes) {\n  auto label = info.labels.View(ctx->Device());\n  CHECK_LE(label.Shape(1), 1) << \"multi-class-multi-label is not yet supported.\";\n  auto check = [=] XGBOOST_DEVICE(float y) -> bool {\n    return y >= 0 && y < n_classes && std::floor(y) == y;\n  };\n  auto valid = ctx->DispatchDevice(\n      [&] { return std::all_of(linalg::cbegin(label), linalg::cend(label), check); },\n      [&] {\n#if defined(XGBOOST_USE_CUDA)\n        return common::AllOf(ctx->CUDACtx()->CTP(), linalg::tcbegin(label), linalg::tcend(label),\n                             check);\n#else\n        common::AssertGPUSupport();\n        return false;\n#endif  // defined(XGBOOST_USE_CUDA)\n      },\n      [&] {\n#if defined(XGBOOST_USE_SYCL)\n        return sycl::linalg::Validate(ctx->Device(), label, check);\n#else\n        common::AssertSYCLSupport();\n        return false;\n#endif  // defined(XGBOOST_USE_SYCL)\n      });\n  CHECK(valid)\n      << \"SoftmaxMultiClassObj: label must be discrete values in the range of [0, num_class).\";\n}\n}  // namespace\n\nclass SoftmaxMultiClassObj : public ObjFunction {\n public:\n  explicit SoftmaxMultiClassObj(bool output_prob) : output_prob_(output_prob) {}\n\n  void Configure(Args const& args) override { param_.UpdateAllowUnknown(args); }\n\n  ObjInfo Task() const override { return ObjInfo::kClassification; }\n\n  void GetGradient(HostDeviceVector<float> const& preds, const MetaInfo& info, std::int32_t iter,\n                   linalg::Matrix<GradientPair>* out_gpair) override {\n    if (info.labels.Size() == 0) {\n      return;\n    }\n    std::int64_t n_classes = param_.num_class;\n    CHECK(preds.Size() == (static_cast<std::size_t>(n_classes) * info.labels.Size()))\n        << \"SoftmaxMultiClassObj: label size and pred size does not match.\\n\"\n        << \"label.Size() * num_class: \" << info.labels.Size() * n_classes << \"\\n\"\n        << \"num_class: \" << param_.num_class << \"\\n\"\n        << \"preds.Size(): \" << preds.Size();\n\n    if (iter == 0) {\n      ValidateLabel(this->ctx_, info, n_classes);\n    }\n\n    const auto n_samples = preds.Size() / n_classes;\n    CHECK_EQ(n_samples, info.num_row_);\n\n    // fallback to cpu if current device doesn't supports fp64\n    auto device = ctx_->DeviceFP64();\n    auto labels = info.labels.View(device);\n\n    out_gpair->SetDevice(device);\n    out_gpair->Reshape(info.num_row_, n_classes);\n    auto gpair = out_gpair->View(device);\n\n    if (!info.weights_.Empty()) {\n      CHECK_EQ(info.weights_.Size(), n_samples)\n          << \"Number of weights should be equal to number of data points.\";\n    }\n    info.weights_.SetDevice(device);\n    auto weights = common::MakeOptionalWeights(this->ctx_->Device(), info.weights_);\n\n    preds.SetDevice(device);\n    auto predt = linalg::MakeTensorView(this->ctx_, &preds, n_samples, n_classes);\n    CHECK_EQ(labels.Shape(1), 1);\n    auto y1d = labels.Slice(linalg::All(), 0);\n    CHECK_EQ(y1d.Shape(0), info.num_row_);\n    linalg::ElementWiseKernel(this->ctx_, y1d, [=] XGBOOST_DEVICE(std::size_t idx) mutable {\n      auto point = predt.Slice(idx, linalg::All());\n      assert(point.Size() == static_cast<std::size_t>(n_classes));\n\n      // Part of the common::Softmax function\n      float wmax = std::numeric_limits<float>::min();\n      for (std::size_t k = 0, m = point.Size(); k < m; ++k) {\n        wmax = fmaxf(point(k), wmax);\n      }\n      double wsum = 0.0f;\n      for (std::size_t k = 0, m = point.Size(); k < m; ++k) {\n        wsum += expf(point(k) - wmax);\n      }\n      auto label = y1d(idx);\n\n      float wt = weights[idx];\n      for (decltype(n_classes) k = 0; k < n_classes; ++k) {\n        // Computation duplicated to avoid creating a cache.\n        float p = expf(point(k) - wmax) / static_cast<float>(wsum);\n        constexpr float kEps = 1e-16f;\n        float h = fmax(2.0f * p * (1.0f - p) * wt, kEps);\n        p = label == k ? p - 1.0f : p;\n        gpair(idx, k) = GradientPair{p * wt, h};\n      }\n    });\n  }\n\n  void PredTransform(HostDeviceVector<float>* io_preds) const override {\n    this->Transform(io_preds, output_prob_);\n  }\n  void EvalTransform(HostDeviceVector<float>* io_preds) override {\n    this->Transform(io_preds, true);\n  }\n  const char* DefaultEvalMetric() const override { return \"mlogloss\"; }\n\n  void Transform(HostDeviceVector<float>* io_preds, bool prob) const {\n    const int n_classes = param_.num_class;\n    const auto n_samples = static_cast<int64_t>(io_preds->Size() / n_classes);\n\n    auto device = io_preds->Device();\n    if (prob) {\n      common::Transform<>::Init(\n          [=] XGBOOST_DEVICE(size_t _idx, common::Span<float> _preds) {\n            common::Span<float> point = _preds.subspan(_idx * n_classes, n_classes);\n            common::Softmax(point.begin(), point.end());\n          },\n          common::Range{0, n_samples}, this->ctx_->Threads(), device)\n          .Eval(io_preds);\n    } else {\n      io_preds->SetDevice(device);\n      HostDeviceVector<float> max_preds;\n      max_preds.SetDevice(device);\n      max_preds.Resize(n_samples);\n      common::Transform<>::Init(\n          [=] XGBOOST_DEVICE(size_t _idx, common::Span<const float> _preds,\n                             common::Span<float> _max_preds) {\n            common::Span<const float> point = _preds.subspan(_idx * n_classes, n_classes);\n            _max_preds[_idx] = common::FindMaxIndex(point.cbegin(), point.cend()) - point.cbegin();\n          },\n          common::Range{0, n_samples}, this->ctx_->Threads(), device)\n          .Eval(io_preds, &max_preds);\n      io_preds->Resize(max_preds.Size());\n      io_preds->Copy(max_preds);\n    }\n  }\n\n  void SaveConfig(Json* p_out) const override {\n    auto& out = *p_out;\n    if (this->output_prob_) {\n      out[\"name\"] = String(\"multi:softprob\");\n    } else {\n      out[\"name\"] = String(\"multi:softmax\");\n    }\n    out[\"softmax_multiclass_param\"] = ToJson(param_);\n  }\n\n  void LoadConfig(Json const& in) override { FromJson(in[\"softmax_multiclass_param\"], &param_); }\n\n  void InitEstimation(MetaInfo const& info, linalg::Vector<float>* base_score) const override {\n    std::int64_t n_classes = this->param_.num_class;\n    ValidateLabel(this->ctx_, info, n_classes);\n\n    *base_score = linalg::Zeros<float>(this->ctx_, n_classes);\n\n    std::size_t n = info.labels.Size();\n    // Calculate probability\n    auto labels = info.labels.View(ctx_->Device());\n    auto weights = common::MakeOptionalWeights(this->ctx_->Device(), info.weights_);\n    auto intercept = base_score->View(ctx_->Device());\n    CHECK_EQ(intercept.Size(), n_classes);\n    CHECK_EQ(n, info.num_row_);\n    linalg::SmallHistogram(ctx_, labels, weights, intercept);\n    auto sum_weight = common::SumOptionalWeights(this->ctx_, weights, n);\n    auto status = collective::GlobalSum(this->ctx_, info, intercept, &sum_weight);\n    collective::SafeColl(status);\n    CHECK_GE(sum_weight, kRtEps);\n    linalg::VecScaDiv(this->ctx_, intercept, sum_weight);\n    CHECK_EQ(base_score->Size(), n_classes);\n\n    // Transform it back to margin\n    // ln(v) - E[ln(v)]\n    linalg::Vector<float> mean;\n    linalg::LogE(this->ctx_, intercept, kRtEps);\n    common::Mean(this->ctx_, intercept, &mean);\n    auto d_mean = mean.View(this->ctx_->Device());\n    TransformKernel(this->ctx_, intercept, [=] XGBOOST_DEVICE(float v) { return v - d_mean(0); });\n  }\n\n private:\n  // output probability\n  bool const output_prob_;\n  // parameter\n  SoftmaxMultiClassParam param_;\n};\n\n// register the objective functions\nDMLC_REGISTER_PARAMETER(SoftmaxMultiClassParam);\n\nXGBOOST_REGISTER_OBJECTIVE(SoftmaxMultiClass, \"multi:softmax\")\n    .describe(\"Softmax for multi-class classification, output class index.\")\n    .set_body([]() { return new SoftmaxMultiClassObj(false); });\n\nXGBOOST_REGISTER_OBJECTIVE(SoftprobMultiClass, \"multi:softprob\")\n    .describe(\"Softmax for multi-class classification, output probability distribution.\")\n    .set_body([]() { return new SoftmaxMultiClassObj(true); });\n}  // namespace xgboost::obj\n"
  },
  {
    "path": "src/objective/multiclass_param.h",
    "content": "/**\n * Copyright 2015-2025, XGBoost Contributors\n *\n * @brief Definition of multi-class classification parameters.\n */\n#ifndef XGBOOST_OBJECTIVE_MULTICLASS_PARAM_H_\n#define XGBOOST_OBJECTIVE_MULTICLASS_PARAM_H_\n\n#include \"xgboost/parameter.h\"\n\nnamespace xgboost::obj {\nstruct SoftmaxMultiClassParam : public XGBoostParameter<SoftmaxMultiClassParam> {\n  int num_class{1};\n  // declare parameters\n  DMLC_DECLARE_PARAMETER(SoftmaxMultiClassParam) {\n    DMLC_DECLARE_FIELD(num_class).set_lower_bound(1).describe(\n        \"Number of output class in the multi-class classification.\");\n  }\n};\n}  // namespace xgboost::obj\n#endif  // XGBOOST_OBJECTIVE_MULTICLASS_PARAM_H_\n"
  },
  {
    "path": "src/objective/objective.cc",
    "content": "/**\n * Copyright 2015-2025, XGBoost Contributors\n *\n * @brief Registry of all objective functions.\n */\n#include <dmlc/registry.h>\n#include <xgboost/context.h>\n#include <xgboost/objective.h>\n\n#include <sstream>  // for stringstream\n#include <string>   // for string\n\nnamespace dmlc {\nDMLC_REGISTRY_ENABLE(::xgboost::ObjFunctionReg);\n}  // namespace dmlc\n\nnamespace xgboost {\n// implement factory functions\nObjFunction* ObjFunction::Create(const std::string& name, Context const* ctx) {\n  std::string obj_name = name;\n  auto *e = ::dmlc::Registry< ::xgboost::ObjFunctionReg>::Get()->Find(obj_name);\n  if (e == nullptr) {\n    std::stringstream ss;\n    for (const auto& entry : ::dmlc::Registry< ::xgboost::ObjFunctionReg>::List()) {\n      ss << \"Objective candidate: \" << entry->name << \"\\n\";\n    }\n    LOG(FATAL) << \"Unknown objective function: `\" << name << \"`\\n\"\n               << ss.str();\n  }\n  auto pobj = (e->body)();\n  pobj->ctx_ = ctx;\n  return pobj;\n}\n\nvoid ObjFunction::InitEstimation(MetaInfo const& info, linalg::Vector<float>* base_score) const {\n  CHECK(base_score);\n  auto n_targets = this->Targets(info);\n  *base_score = linalg::Constant(this->ctx_, DefaultBaseScore(), n_targets);\n}\n}  // namespace xgboost\n\nnamespace xgboost {\nnamespace obj {\n// List of files that will be force linked in static links.\n#ifdef XGBOOST_USE_CUDA\nDMLC_REGISTRY_LINK_TAG(regression_obj_gpu);\nDMLC_REGISTRY_LINK_TAG(quantile_obj_gpu);\nDMLC_REGISTRY_LINK_TAG(hinge_obj_gpu);\nDMLC_REGISTRY_LINK_TAG(multiclass_obj_gpu);\nDMLC_REGISTRY_LINK_TAG(lambdarank_obj);\nDMLC_REGISTRY_LINK_TAG(lambdarank_obj_cu);\n#else\nDMLC_REGISTRY_LINK_TAG(regression_obj);\nDMLC_REGISTRY_LINK_TAG(quantile_obj);\nDMLC_REGISTRY_LINK_TAG(hinge_obj);\nDMLC_REGISTRY_LINK_TAG(multiclass_obj);\nDMLC_REGISTRY_LINK_TAG(lambdarank_obj);\n#endif  // XGBOOST_USE_CUDA\n}  // namespace obj\n}  // namespace xgboost\n"
  },
  {
    "path": "src/objective/quantile_obj.cc",
    "content": "/**\n * Copyright 2023 by XGBoost Contributors\n */\n\n// Dummy file to enable the CUDA conditional compile trick.\n\n#include <dmlc/registry.h>\nnamespace xgboost {\nnamespace obj {\n\nDMLC_REGISTRY_FILE_TAG(quantile_obj);\n\n}  // namespace obj\n}  // namespace xgboost\n\n#ifndef XGBOOST_USE_CUDA\n#include \"quantile_obj.cu\"\n#endif  // !defined(XBGOOST_USE_CUDA)\n"
  },
  {
    "path": "src/objective/quantile_obj.cu",
    "content": "/**\n * Copyright 2023-2026, XGBoost contributors\n */\n#include <array>                            // std::array\n#include <cstddef>                          // std::size_t\n#include <cstdint>                          // std::int32_t\n#include <vector>                           // std::vector\n\n#include \"../common/linalg_op.h\"            // ElementWiseKernel,cbegin,cend\n#include \"../common/quantile_loss_utils.h\"  // QuantileLossParam\n#include \"../common/stats.h\"                // Quantile,WeightedQuantile\n#include \"adaptive.h\"                       // UpdateTreeLeaf\n#include \"init_estimation.h\"                // CheckInitInputs\n#include \"xgboost/base.h\"                   // GradientPair,XGBOOST_DEVICE,bst_target_t\n#include \"xgboost/data.h\"                   // MetaInfo\n#include \"xgboost/host_device_vector.h\"     // HostDeviceVector\n#include \"xgboost/json.h\"                   // Json,String,ToJson,FromJson\n#include \"xgboost/linalg.h\"                 // Tensor,MakeTensorView,MakeVec\n#include \"xgboost/objective.h\"              // ObjFunction\n\n#if defined(XGBOOST_USE_CUDA)\n\n#include \"../common/stats.cuh\"      // SegmentedQuantile\n\n#endif                              // defined(XGBOOST_USE_CUDA)\n\nnamespace xgboost::obj {\nclass QuantileRegression : public ObjFunction {\n  common::QuantileLossParam param_;\n  HostDeviceVector<float> alpha_;\n\n  [[nodiscard]] bst_target_t Targets(MetaInfo const& info) const override {\n    auto const& alpha = param_.quantile_alpha.Get();\n    CHECK_EQ(alpha.size(), alpha_.Size()) << \"The objective is not yet configured.\";\n    if (info.ShouldHaveLabels()) {\n      CHECK_EQ(info.labels.Shape(1), 1)\n          << \"Multi-target is not yet supported by the quantile loss.\";\n    }\n    CHECK(!alpha.empty());\n    // We have some placeholders for multi-target in the quantile loss. But it's not\n    // supported as the gbtree doesn't know how to slice the gradient and there's no 3-dim\n    // model shape in general.\n    auto n_y = std::max(static_cast<std::size_t>(1), info.labels.Shape(1));\n    return alpha_.Size() * n_y;\n  }\n\n public:\n  void GetGradient(HostDeviceVector<float> const& preds, const MetaInfo& info, std::int32_t iter,\n                   linalg::Matrix<GradientPair>* out_gpair) override {\n    if (iter == 0) {\n      CheckInitInputs(info);\n    }\n    CHECK_EQ(param_.quantile_alpha.Get().size(), alpha_.Size());\n\n    using SizeT = decltype(info.num_row_);\n    SizeT n_targets = this->Targets(info);\n    SizeT n_alphas = alpha_.Size();\n    CHECK_NE(n_alphas, 0);\n    CHECK_GE(n_targets, n_alphas);\n    CHECK_EQ(preds.Size(), info.num_row_ * n_targets);\n\n    auto labels = info.labels.View(ctx_->Device());\n\n    out_gpair->SetDevice(ctx_->Device());\n    CHECK_EQ(info.labels.Shape(1), 1)\n        << \"Multi-target for quantile regression is not yet supported.\";\n    out_gpair->Reshape(info.num_row_, n_targets);\n    auto gpair = out_gpair->View(ctx_->Device());\n\n    info.weights_.SetDevice(ctx_->Device());\n    common::OptionalWeights weight{ctx_->IsCPU() ? info.weights_.ConstHostSpan()\n                                                 : info.weights_.ConstDeviceSpan()};\n\n    preds.SetDevice(ctx_->Device());\n    auto predt = linalg::MakeTensorView(ctx_, &preds, info.num_row_, n_targets);\n\n    alpha_.SetDevice(ctx_->Device());\n    auto alpha = ctx_->IsCPU() ? alpha_.ConstHostSpan() : alpha_.ConstDeviceSpan();\n\n    linalg::ElementWiseKernel(ctx_, gpair,\n                              [=] XGBOOST_DEVICE(std::size_t i, std::size_t j) mutable {\n                                // j is the quantile index\n                                // 0 is the target index\n                                auto d = predt(i, j) - labels(i, 0);\n                                auto h = weight[i];\n                                if (d >= 0) {\n                                  auto g = (1.0f - alpha[j]) * weight[i];\n                                  gpair(i, j) = GradientPair{g, h};\n                                } else {\n                                  auto g = (-alpha[j] * weight[i]);\n                                  gpair(i, j) = GradientPair{g, h};\n                                }\n                              });\n  }\n\n  void InitEstimation(MetaInfo const& info, linalg::Vector<float>* base_score) const override {\n    CHECK(!alpha_.Empty());\n\n    auto n_targets = this->Targets(info);\n    base_score->SetDevice(ctx_->Device());\n    base_score->Reshape(n_targets);\n\n    if (ctx_->IsCUDA()) {\n#if defined(XGBOOST_USE_CUDA)\n      alpha_.SetDevice(ctx_->Device());\n      auto d_alpha = alpha_.ConstDeviceSpan();\n      auto d_labels = info.labels.View(ctx_->Device());\n      auto seg_it = dh::MakeTransformIterator<std::size_t>(\n          thrust::make_counting_iterator(0ul),\n          [=] XGBOOST_DEVICE(std::size_t i) { return i * d_labels.Shape(0); });\n      CHECK_EQ(d_labels.Shape(1), 1);\n      auto val_it = dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),\n                                                     [=] XGBOOST_DEVICE(std::size_t i) {\n                                                       auto sample_idx = i % d_labels.Shape(0);\n                                                       return d_labels(sample_idx, 0);\n                                                     });\n      auto n = d_labels.Size() * d_alpha.size();\n      CHECK_EQ(base_score->Size(), d_alpha.size());\n      if (info.weights_.Empty()) {\n        common::SegmentedQuantile(ctx_, d_alpha.data(), seg_it, seg_it + d_alpha.size() + 1, val_it,\n                                  val_it + n, base_score->Data());\n      } else {\n        info.weights_.SetDevice(ctx_->Device());\n        auto d_weights = info.weights_.ConstDeviceSpan();\n        auto weight_it = dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),\n                                                          [=] XGBOOST_DEVICE(std::size_t i) {\n                                                            auto sample_idx = i % d_labels.Shape(0);\n                                                            return d_weights[sample_idx];\n                                                          });\n        common::SegmentedWeightedQuantile(ctx_, d_alpha.data(), seg_it, seg_it + d_alpha.size() + 1,\n                                          val_it, val_it + n, weight_it, weight_it + n,\n                                          base_score->Data());\n      }\n#else\n      common::AssertGPUSupport();\n#endif  // defined(XGBOOST_USE_CUDA)\n    } else {\n      auto quantiles = base_score->HostView();\n      auto h_weights = info.weights_.ConstHostVector();\n      for (bst_target_t t{0}; t < n_targets; ++t) {\n        auto alpha = param_.quantile_alpha[t];\n        auto h_labels = info.labels.HostView();\n        if (h_weights.empty()) {\n          quantiles(t) =\n              common::Quantile(ctx_, alpha, linalg::cbegin(h_labels), linalg::cend(h_labels));\n        } else {\n          CHECK_EQ(h_weights.size(), h_labels.Size());\n          quantiles(t) = common::WeightedQuantile(ctx_, alpha, linalg::cbegin(h_labels),\n                                                  linalg::cend(h_labels), std::cbegin(h_weights));\n        }\n      }\n    }\n\n    // Global mean. There's no strong preference on whether weighted mean should be used\n    // with weighted quantiles. The proper way to do this might be using an approximated\n    // quantile algorithm with stream inputs, but it's also much more expensive.\n    auto intercept = base_score->View(this->ctx_->Device());\n    collective::SafeColl(collective::GlobalSum(ctx_, info, intercept));\n    double n_workers = info.IsColumnSplit() ? 1.0 : collective::GetWorldSize();\n    linalg::VecScaDiv(ctx_, intercept, n_workers);\n  }\n\n  void UpdateTreeLeaf(HostDeviceVector<bst_node_t> const& position, MetaInfo const& info,\n                      float learning_rate, HostDeviceVector<float> const& prediction,\n                      bst_target_t group_idx, RegTree* p_tree) const override {\n    auto const& alphas = param_.quantile_alpha.Get();\n    if (p_tree->IsMultiTarget()) {\n      CHECK_EQ(group_idx, 0);\n      // Pass all the alphas\n      ::xgboost::obj::UpdateTreeLeaf(ctx_, position, group_idx, info, learning_rate, prediction,\n                                     alphas, p_tree);\n    } else {\n      // Use only the alpha for the current group.\n      ::xgboost::obj::UpdateTreeLeaf(ctx_, position, group_idx, info, learning_rate, prediction,\n                                     std::vector{alphas[group_idx]}, p_tree);\n    }\n  }\n\n  void Configure(Args const& args) override {\n    param_.UpdateAllowUnknown(args);\n    param_.Validate();\n    this->alpha_.HostVector() = param_.quantile_alpha.Get();\n  }\n  [[nodiscard]] ObjInfo Task() const override { return {ObjInfo::kRegression, true, true}; }\n  static char const* Name() { return \"reg:quantileerror\"; }\n\n  void SaveConfig(Json* p_out) const override {\n    auto& out = *p_out;\n    out[\"name\"] = String(Name());\n    out[\"quantile_loss_param\"] = ToJson(param_);\n  }\n  void LoadConfig(Json const& in) override {\n    CHECK_EQ(get<String const>(in[\"name\"]), Name());\n    FromJson(in[\"quantile_loss_param\"], &param_);\n    alpha_.HostVector() = param_.quantile_alpha.Get();\n  }\n\n  [[nodiscard]] const char* DefaultEvalMetric() const override { return \"quantile\"; }\n  [[nodiscard]] Json DefaultMetricConfig() const override {\n    CHECK(param_.GetInitialised());\n    Json config{Object{}};\n    config[\"name\"] = String{this->DefaultEvalMetric()};\n    config[\"quantile_loss_param\"] = ToJson(param_);\n    return config;\n  }\n};\n\nXGBOOST_REGISTER_OBJECTIVE(QuantileRegression, QuantileRegression::Name())\n    .describe(\"Regression with quantile loss.\")\n    .set_body([]() { return new QuantileRegression(); });\n\n#if defined(XGBOOST_USE_CUDA)\nDMLC_REGISTRY_FILE_TAG(quantile_obj_gpu);\n#endif  // defined(XGBOOST_USE_CUDA)\n}  // namespace xgboost::obj\n"
  },
  {
    "path": "src/objective/regression_loss.h",
    "content": "/**\n * Copyright 2017-2026, XGBoost contributors\n */\n#ifndef XGBOOST_OBJECTIVE_REGRESSION_LOSS_H_\n#define XGBOOST_OBJECTIVE_REGRESSION_LOSS_H_\n\n#include <cmath>\n\n#include \"../common/common.h\"  // Min, Max\n#include \"../common/math.h\"\n#include \"xgboost/string_view.h\"\n#include \"xgboost/task.h\"  // ObjInfo\n\nnamespace xgboost::obj {\n// linear regression\nstruct LinearSquareLoss {\n  XGBOOST_DEVICE static bst_float PredTransform(bst_float x) { return x; }\n  XGBOOST_DEVICE static bool CheckLabel(bst_float) { return true; }\n  XGBOOST_DEVICE static bst_float FirstOrderGradient(bst_float predt, bst_float label) {\n    return predt - label;\n  }\n  XGBOOST_DEVICE static bst_float SecondOrderGradient(bst_float, bst_float) { return 1.0f; }\n\n  XGBOOST_DEVICE static float ProbToMargin(float base_score) { return base_score; }\n  constexpr static StringView InterceptErrorMsg() { return \"\"; }\n  XGBOOST_DEVICE static bool CheckIntercept(float) { return true; }\n\n  static const char* LabelErrorMsg() { return \"\"; }\n  static const char* DefaultEvalMetric() { return \"rmse\"; }\n\n  static const char* Name() { return \"reg:squarederror\"; }\n  static ObjInfo Info() { return {ObjInfo::kRegression, true, false}; }\n};\n\nstruct SquaredLogError {\n  XGBOOST_DEVICE static bst_float PredTransform(bst_float x) { return x; }\n  XGBOOST_DEVICE static bool CheckLabel(bst_float label) { return label > -1; }\n  XGBOOST_DEVICE static bst_float FirstOrderGradient(bst_float predt, bst_float label) {\n    predt = fmaxf(predt, -1 + 1e-6);  // ensure correct value for log1p\n    return (std::log1p(predt) - std::log1p(label)) / (predt + 1);\n  }\n  XGBOOST_DEVICE static bst_float SecondOrderGradient(bst_float predt, bst_float label) {\n    predt = fmaxf(predt, -1 + 1e-6);\n    float res = (-std::log1p(predt) + std::log1p(label) + 1) / std::pow(predt + 1, 2);\n    res = fmaxf(res, 1e-6f);\n    return res;\n  }\n\n  XGBOOST_DEVICE static float ProbToMargin(float base_score) { return base_score; }\n  constexpr static StringView InterceptErrorMsg() { return \"\"; }\n  XGBOOST_DEVICE static bool CheckIntercept(float) { return true; }\n\n  static const char* LabelErrorMsg() {\n    return \"label must be greater than -1 for rmsle so that log(label + 1) can be valid.\";\n  }\n  static const char* DefaultEvalMetric() { return \"rmsle\"; }\n\n  static const char* Name() { return \"reg:squaredlogerror\"; }\n\n  static ObjInfo Info() { return ObjInfo::kRegression; }\n};\n\n// logistic loss for probability regression task\nstruct LogisticRegression {\n  XGBOOST_DEVICE static bst_float PredTransform(bst_float x) { return common::Sigmoid(x); }\n  XGBOOST_DEVICE static bool CheckLabel(bst_float x) { return x >= 0.0f && x <= 1.0f; }\n  XGBOOST_DEVICE static bst_float FirstOrderGradient(bst_float predt, bst_float label) {\n    return predt - label;\n  }\n  XGBOOST_DEVICE static bst_float SecondOrderGradient(bst_float predt, bst_float) {\n    const float eps = 1e-16f;\n    return fmaxf(predt * (1.0f - predt), eps);\n  }\n  XGBOOST_DEVICE static float ProbToMargin(float base_score) {\n    // Bound the base score\n    base_score = common::Min(common::Max(base_score, kRtEps), 1.0f - kRtEps);\n    return common::Logit(base_score);\n  }\n  constexpr static StringView InterceptErrorMsg() {\n    return \"base_score must be in (0,1) for the logistic loss.\";\n  }\n  XGBOOST_DEVICE static bool CheckIntercept(float base_score) {\n    // We accept equality for degenerate cases where all label is the same.\n    // https://github.com/dmlc/xgboost/issues/11499\n    return base_score >= 0.0f && base_score <= 1.0f;\n  }\n\n  static const char* LabelErrorMsg() { return \"label must be in (0, 1) for logistic regression\"; }\n  static const char* DefaultEvalMetric() { return \"rmse\"; }\n\n  static const char* Name() { return \"reg:logistic\"; }\n\n  static ObjInfo Info() { return ObjInfo::kRegression; }\n};\n\n// logistic loss for binary classification task\nstruct LogisticClassification : public LogisticRegression {\n  static const char* DefaultEvalMetric() { return \"logloss\"; }\n  static const char* Name() { return \"binary:logistic\"; }\n  static ObjInfo Info() { return ObjInfo::kBinary; }\n};\n\n// logistic loss, but predict un-transformed margin\nstruct LogisticRaw : public LogisticRegression {\n  XGBOOST_DEVICE static bst_float PredTransform(bst_float x) { return x; }\n  XGBOOST_DEVICE static bst_float FirstOrderGradient(bst_float predt, bst_float label) {\n    predt = common::Sigmoid(predt);\n    return predt - label;\n  }\n  XGBOOST_DEVICE static bst_float SecondOrderGradient(bst_float predt, bst_float) {\n    const float eps = 1e-16f;\n    predt = common::Sigmoid(predt);\n    return fmaxf(predt * (1.0f - predt), eps);\n  }\n\n  XGBOOST_DEVICE static float ProbToMargin(float base_score) { return base_score; }\n  constexpr static StringView InterceptErrorMsg() { return \"\"; }\n  XGBOOST_DEVICE static bool CheckIntercept(float) { return true; }\n\n  static const char* DefaultEvalMetric() { return \"logloss\"; }\n\n  static const char* Name() { return \"binary:logitraw\"; }\n\n  static ObjInfo Info() { return ObjInfo::kRegression; }\n};\n\n// gamma deviance loss.\nclass GammaDeviance {\n public:\n  XGBOOST_DEVICE static float PredTransform(float x) { return std::exp(x); }\n\n  XGBOOST_DEVICE static float ProbToMargin(float x) { return std::log(x); }\n  constexpr static StringView InterceptErrorMsg() {\n    return \"`base_score` must be greater than 0 for gamma regression\";\n  }\n  XGBOOST_DEVICE static bool CheckIntercept(float base_score) { return base_score > 0; }\n\n  XGBOOST_DEVICE static float FirstOrderGradient(float p, float y) { return 1.0f - y / p; }\n  XGBOOST_DEVICE static float SecondOrderGradient(float p, float y) { return y / p; }\n  static ObjInfo Info() { return ObjInfo::kRegression; }\n  static const char* Name() { return \"reg:gamma\"; }\n  static const char* DefaultEvalMetric() { return \"gamma-deviance\"; }\n  XGBOOST_DEVICE static bool CheckLabel(float x) { return x > 0.0f; }\n  static const char* LabelErrorMsg() { return \"label must be positive for gamma regression.\"; }\n};\n\n// Label validation for Poisson regression (labels must be non-negative)\nstruct PoissonLabel {\n  XGBOOST_DEVICE static bool CheckLabel(float x) { return x >= 0.0f; }\n  static const char* LabelErrorMsg() {\n    return \"label must be non-negative for Poisson/Tweedie regression.\";\n  }\n};\n\n// Label validation for Tweedie regression (labels must be non-negative)\nusing TweedieLabel = PoissonLabel;\n}  // namespace xgboost::obj\n#endif  // XGBOOST_OBJECTIVE_REGRESSION_LOSS_H_\n"
  },
  {
    "path": "src/objective/regression_obj.cc",
    "content": "/*!\n * Copyright 2018 XGBoost contributors\n */\n\n// Dummy file to keep the CUDA conditional compile trick.\n\n#include <dmlc/registry.h>\nnamespace xgboost {\nnamespace obj {\n\nDMLC_REGISTRY_FILE_TAG(regression_obj);\n\n}  // namespace obj\n}  // namespace xgboost\n\n#ifndef XGBOOST_USE_CUDA\n#include \"regression_obj.cu\"\n#endif  // XGBOOST_USE_CUDA\n"
  },
  {
    "path": "src/objective/regression_obj.cu",
    "content": "/**\n * Copyright 2015-2026, XGBoost Contributors\n * \\file regression_obj.cu\n * \\brief Definition of single-value regression and classification objectives.\n * \\author Tianqi Chen, Kailong Chen\n */\n#include <dmlc/omp.h>\n\n#include <algorithm>  // for all_of\n#include <cmath>\n#include <cstdint>  // for int32_t\n#include <vector>   // for vector\n\n#include \"../common/common.h\"\n#include \"../common/expectile_loss_utils.h\"  // for ExpectileLossParam\n#include \"../common/linalg_op.h\"             // for ElementWiseKernel\n#include \"../common/numeric.h\"               // for Reduce\n#include \"../common/optional_weight.h\"       // for MakeOptionalWeights\n#include \"../common/pseudo_huber.h\"\n#include \"../common/stats.h\"\n#include \"../common/threading_utils.h\"\n#include \"../common/transform.h\"\n#include \"../common/utils.h\"  // for NoOp\n#include \"./regression_loss.h\"\n#include \"adaptive.h\"\n#include \"init_estimation.h\"  // FitIntercept\n#include \"regression_param.h\"\n#include \"xgboost/base.h\"\n#include \"xgboost/context.h\"  // Context\n#include \"xgboost/data.h\"     // MetaInfo\n#include \"xgboost/host_device_vector.h\"\n#include \"xgboost/json.h\"\n#include \"xgboost/linalg.h\"\n#include \"xgboost/logging.h\"\n#include \"xgboost/objective.h\"  // ObjFunction\n#include \"xgboost/parameter.h\"\n#include \"xgboost/span.h\"\n#include \"xgboost/tree_model.h\"  // RegTree\n\n#if defined(XGBOOST_USE_CUDA)\n#include \"../common/algorithm.cuh\"       // for AllOf\n#include \"../common/cuda_context.cuh\"    // for CUDAContext\n#include \"../common/device_helpers.cuh\"  // for MakeIndexTransformIter\n#endif                                   // defined(XGBOOST_USE_CUDA)\n\nnamespace xgboost::obj {\nnamespace {\nvoid CheckRegInputs(MetaInfo const& info, HostDeviceVector<float> const& preds) {\n  CheckInitInputs(info);\n  CHECK_EQ(info.labels.Size(), preds.Size()) << \"Invalid shape of labels.\";\n}\n\ntemplate <typename Loss>\nvoid ValidateLabel(Context const* ctx, MetaInfo const& info) {\n  auto label = info.labels.View(ctx->Device());\n  auto valid = ctx->DispatchDevice(\n      [&] {\n        return std::all_of(linalg::cbegin(label), linalg::cend(label),\n                           [](float y) -> bool { return Loss::CheckLabel(y); });\n      },\n      [&] {\n#if defined(XGBOOST_USE_CUDA)\n        auto it = dh::MakeIndexTransformIter([=] XGBOOST_DEVICE(std::size_t i) -> float {\n          auto [m, n] = linalg::UnravelIndex(i, label.Shape());\n          return label(m, n);\n        });\n        return common::AllOf(ctx->CUDACtx()->CTP(), it, it + label.Size(),\n                             [] XGBOOST_DEVICE(float y) { return Loss::CheckLabel(y); });\n#else\n        common::AssertGPUSupport();\n        return false;\n#endif  // defined(XGBOOST_USE_CUDA)\n      },\n      [&] {\n#if defined(XGBOOST_USE_SYCL)\n        return sycl::linalg::Validate(ctx->Device(), label,\n                                      [](float y) -> bool { return Loss::CheckLabel(y); });\n#else\n        common::AssertSYCLSupport();\n        return false;\n#endif  // defined(XGBOOST_USE_SYCL)\n      });\n  if (!valid) {\n    LOG(FATAL) << Loss::LabelErrorMsg();\n  }\n  if (!info.weights_.Empty()) {\n    CHECK_EQ(info.weights_.Size(), info.num_row_)\n        << \"Number of weights should be equal to the number of data points.\";\n  }\n}\n\ntemplate <typename Fn, typename Chk = common::NoOp<bool>, typename Err = common::NoOp<StringView>>\nvoid ProbToMarginImpl(Context const* ctx, linalg::Vector<float>* base_score, Fn&& fn,\n                      Chk check = common::NoOp{true}, Err error = common::NoOp<StringView>{{}}) {\n  auto intercept = base_score->View(ctx->Device());\n  bool is_valid = ctx->DispatchDevice(\n      [&] { return std::all_of(linalg::cbegin(intercept), linalg::cend(intercept), check); },\n      [&] {\n#if defined(XGBOOST_USE_CUDA)\n        return common::AllOf(ctx->CUDACtx()->CTP(), linalg::tcbegin(intercept),\n                             linalg::tcend(intercept), check);\n#else\n        common::AssertGPUSupport();\n        return false;\n#endif  // defined(XGBOOST_USE_CUDA)\n      },\n      [&] {\n#if defined(XGBOOST_USE_SYCL)\n        return sycl::linalg::Validate(ctx->Device(), intercept, check);\n#else\n        common::AssertSYCLSupport();\n        return false;\n#endif  // defined(XGBOOST_USE_SYCL)\n      });\n  CHECK(is_valid) << error();\n  linalg::ElementWiseKernel(ctx, intercept, [=] XGBOOST_DEVICE(std::size_t i) mutable {\n    intercept(i) = fn(intercept(i));\n  });\n}\n}  // anonymous namespace\n\n#if defined(XGBOOST_USE_CUDA)\nDMLC_REGISTRY_FILE_TAG(regression_obj_gpu);\n#endif  // defined(XGBOOST_USE_CUDA)\n\ntemplate <typename Loss>\nclass RegLossObj : public FitInterceptGlmLike {\n protected:\n  HostDeviceVector<float> additional_input_;\n\n public:\n  // 0 - scale_pos_weight, 1 - is_null_weight\n  RegLossObj() : additional_input_(2) {}\n\n  void Configure(Args const& args) override { param_.UpdateAllowUnknown(args); }\n\n  [[nodiscard]] ObjInfo Task() const override { return Loss::Info(); }\n\n  [[nodiscard]] bst_target_t Targets(MetaInfo const& info) const override {\n    // Multi-target regression.\n    return std::max(static_cast<std::size_t>(1), info.labels.Shape(1));\n  }\n\n  void GetGradient(const HostDeviceVector<float>& preds, const MetaInfo& info, std::int32_t iter,\n                   linalg::Matrix<GradientPair>* out_gpair) override {\n    CheckRegInputs(info, preds);\n    if (iter == 0) {\n      ValidateLabel<Loss>(this->ctx_, info);\n    }\n\n    size_t const ndata = preds.Size();\n    out_gpair->SetDevice(ctx_->Device());\n    auto device = ctx_->Device();\n\n    bool is_null_weight = info.weights_.Size() == 0;\n    auto scale_pos_weight = param_.scale_pos_weight;\n    additional_input_.HostVector().begin()[0] = scale_pos_weight;\n    additional_input_.HostVector().begin()[1] = is_null_weight;\n\n    const size_t nthreads = ctx_->Threads();\n    bool on_device = !device.IsCPU();\n    // On CPU we run the transformation each thread processing a contigious block of data\n    // for better performance.\n    const size_t n_data_blocks = std::max(static_cast<size_t>(1), (on_device ? ndata : nthreads));\n    const size_t block_size = ndata / n_data_blocks + !!(ndata % n_data_blocks);\n    auto const n_targets = this->Targets(info);\n    out_gpair->Reshape(info.num_row_, n_targets);\n\n    common::Transform<>::Init(\n        [block_size, ndata, n_targets] XGBOOST_DEVICE(\n            size_t data_block_idx, common::Span<float> _additional_input,\n            common::Span<GradientPair> _out_gpair, common::Span<const bst_float> _preds,\n            common::Span<const bst_float> _labels, common::Span<const bst_float> _weights) {\n          const bst_float* preds_ptr = _preds.data();\n          const bst_float* labels_ptr = _labels.data();\n          const bst_float* weights_ptr = _weights.data();\n          GradientPair* out_gpair_ptr = _out_gpair.data();\n          const size_t begin = data_block_idx * block_size;\n          const size_t end = std::min(ndata, begin + block_size);\n          const float _scale_pos_weight = _additional_input[0];\n          const bool _is_null_weight = _additional_input[1];\n\n          for (size_t idx = begin; idx < end; ++idx) {\n            bst_float p = Loss::PredTransform(preds_ptr[idx]);\n            bst_float w = _is_null_weight ? 1.0f : weights_ptr[idx / n_targets];\n            bst_float label = labels_ptr[idx];\n            if (label == 1.0f) {\n              w *= _scale_pos_weight;\n            }\n            out_gpair_ptr[idx] = GradientPair(Loss::FirstOrderGradient(p, label) * w,\n                                              Loss::SecondOrderGradient(p, label) * w);\n          }\n        },\n        common::Range{0, static_cast<int64_t>(n_data_blocks)}, nthreads, device)\n        .Eval(&additional_input_, out_gpair->Data(), &preds, info.labels.Data(), &info.weights_);\n  }\n\n public:\n  [[nodiscard]] const char* DefaultEvalMetric() const override { return Loss::DefaultEvalMetric(); }\n\n  void PredTransform(HostDeviceVector<float>* io_preds) const override {\n    common::Transform<>::Init(\n        [] XGBOOST_DEVICE(size_t _idx, common::Span<float> _preds) {\n          _preds[_idx] = Loss::PredTransform(_preds[_idx]);\n        },\n        common::Range{0, static_cast<int64_t>(io_preds->Size())}, this->ctx_->Threads(),\n        io_preds->Device())\n        .Eval(io_preds);\n  }\n\n  void InitEstimation(MetaInfo const& info, linalg::Vector<float>* base_score) const override {\n    if (std::abs(this->param_.scale_pos_weight - 1.0f) > kRtEps) {\n      // Use newton method if `scale_pos_weight` is present. The alternative is to use\n      // weighted mean, but we also need to take sample weight into account.\n      FitIntercept::InitEstimation(info, base_score);\n    } else {\n      FitInterceptGlmLike::InitEstimation(info, base_score);\n    }\n  }\n\n  void ProbToMargin(linalg::Vector<float>* base_score) const override {\n    ProbToMarginImpl(\n        this->ctx_, base_score, [] XGBOOST_DEVICE(float v) { return Loss::ProbToMargin(v); },\n        [] XGBOOST_DEVICE(float v) { return Loss::CheckIntercept(v); }, Loss::InterceptErrorMsg);\n  }\n\n  void SaveConfig(Json* p_out) const override {\n    auto& out = *p_out;\n    out[\"name\"] = String(Loss::Name());\n    out[\"reg_loss_param\"] = ToJson(param_);\n  }\n\n  void LoadConfig(Json const& in) override {\n    auto obj = get<Object const>(in);\n    auto it = obj.find(\"reg_loss_param\");\n    if (it != obj.cend()) {\n      FromJson(it->second, &param_);\n    }\n  }\n\n protected:\n  RegLossParam param_;\n};\n\n// register the objective functions\nDMLC_REGISTER_PARAMETER(RegLossParam);\n\nXGBOOST_REGISTER_OBJECTIVE(SquaredLossRegression, LinearSquareLoss::Name())\n    .describe(\"Regression with squared error.\")\n    .set_body([]() { return new RegLossObj<LinearSquareLoss>(); });\n\nXGBOOST_REGISTER_OBJECTIVE(LogisticRegression, LogisticRegression::Name())\n    .describe(\"Logistic regression for probability regression task.\")\n    .set_body([]() { return new RegLossObj<LogisticRegression>(); });\n\nXGBOOST_REGISTER_OBJECTIVE(LogisticClassification, LogisticClassification::Name())\n    .describe(\"Logistic regression for binary classification task.\")\n    .set_body([]() { return new RegLossObj<LogisticClassification>(); });\n\nXGBOOST_REGISTER_OBJECTIVE(LogisticRaw, LogisticRaw::Name())\n    .describe(\n        \"Logistic regression for classification, output score \"\n        \"before logistic transformation.\")\n    .set_body([]() { return new RegLossObj<LogisticRaw>(); });\n\nXGBOOST_REGISTER_OBJECTIVE(GammaRegression, GammaDeviance::Name())\n    .describe(\"Gamma regression using the gamma deviance loss with log link.\")\n    .set_body([]() { return new RegLossObj<GammaDeviance>(); });\n\n// Deprecated functions\nXGBOOST_REGISTER_OBJECTIVE(LinearRegression, \"reg:linear\")\n    .describe(\"Regression with squared error.\")\n    .set_body([]() {\n      LOG(WARNING) << \"reg:linear is now deprecated in favor of reg:squarederror.\";\n      return new RegLossObj<LinearSquareLoss>();\n    });\n// End deprecated\n\nclass SquaredLogErrorRegression : public FitIntercept {\n public:\n  static auto Name() { return SquaredLogError::Name(); }\n\n  void Configure(Args const&) override {}\n  [[nodiscard]] ObjInfo Task() const override { return ObjInfo::kRegression; }\n  [[nodiscard]] bst_target_t Targets(MetaInfo const& info) const override {\n    return std::max(static_cast<std::size_t>(1), info.labels.Shape(1));\n  }\n  void GetGradient(HostDeviceVector<bst_float> const& preds, const MetaInfo& info,\n                   std::int32_t iter, linalg::Matrix<GradientPair>* out_gpair) override {\n    CheckRegInputs(info, preds);\n    if (iter == 0) {\n      ValidateLabel<SquaredLogError>(this->ctx_, info);\n    }\n    auto labels = info.labels.View(ctx_->Device());\n\n    out_gpair->SetDevice(ctx_->Device());\n    out_gpair->Reshape(info.num_row_, this->Targets(info));\n    auto gpair = out_gpair->View(ctx_->Device());\n\n    preds.SetDevice(ctx_->Device());\n    auto predt = linalg::MakeTensorView(ctx_, &preds, info.num_row_, this->Targets(info));\n\n    auto weight = common::MakeOptionalWeights(ctx_->Device(), info.weights_);\n    linalg::ElementWiseKernel(this->ctx_, labels,\n                              [=] XGBOOST_DEVICE(std::size_t i, std::size_t j) mutable {\n                                auto p = predt(i, j);\n                                auto y = labels(i, j);\n                                auto w = weight[i];\n                                auto grad = SquaredLogError::FirstOrderGradient(p, y);\n                                auto hess = SquaredLogError::SecondOrderGradient(p, y);\n                                gpair(i, j) = {grad * w, hess * w};\n                              });\n  }\n  [[nodiscard]] const char* DefaultEvalMetric() const override { return \"rmsle\"; }\n\n  void SaveConfig(Json* p_out) const override {\n    auto& out = *p_out;\n    out[\"name\"] = String(Name());\n  }\n  void LoadConfig(Json const&) override {}\n};\n\nXGBOOST_REGISTER_OBJECTIVE(SquaredLogErrorRegression, SquaredLogErrorRegression::Name())\n    .describe(\"Root mean squared log error.\")\n    .set_body([]() { return new SquaredLogErrorRegression(); });\n\nclass PseudoHuberRegression : public FitIntercept {\n  PseudoHuberParam param_;\n\n public:\n  void Configure(Args const& args) override { param_.UpdateAllowUnknown(args); }\n  [[nodiscard]] ObjInfo Task() const override { return ObjInfo::kRegression; }\n  [[nodiscard]] bst_target_t Targets(MetaInfo const& info) const override {\n    return std::max(static_cast<std::size_t>(1), info.labels.Shape(1));\n  }\n\n  void GetGradient(HostDeviceVector<bst_float> const& preds, const MetaInfo& info, int /*iter*/,\n                   linalg::Matrix<GradientPair>* out_gpair) override {\n    CheckRegInputs(info, preds);\n    auto slope = param_.huber_slope;\n    CHECK_NE(slope, 0.0) << \"slope for pseudo huber cannot be 0.\";\n    auto labels = info.labels.View(ctx_->Device());\n\n    out_gpair->SetDevice(ctx_->Device());\n    out_gpair->Reshape(info.num_row_, this->Targets(info));\n    auto gpair = out_gpair->View(ctx_->Device());\n\n    preds.SetDevice(ctx_->Device());\n    auto predt = linalg::MakeTensorView(ctx_, &preds, info.num_row_, this->Targets(info));\n\n    auto weight = common::MakeOptionalWeights(ctx_->Device(), info.weights_);\n    linalg::ElementWiseKernel(\n        ctx_, labels, [=] XGBOOST_DEVICE(std::size_t i, std::size_t j) mutable {\n          float z = predt(i, j) - labels(i, j);\n          float scale_sqrt = std::sqrt(1 + common::Sqr(z) / common::Sqr(slope));\n          float grad = z / scale_sqrt;\n\n          auto scale = common::Sqr(slope) + common::Sqr(z);\n          float hess = common::Sqr(slope) / (scale * scale_sqrt);\n\n          auto w = weight[i];\n          gpair(i, j) = {grad * w, hess * w};\n        });\n  }\n\n  [[nodiscard]] const char* DefaultEvalMetric() const override { return \"mphe\"; }\n\n  void SaveConfig(Json* p_out) const override {\n    auto& out = *p_out;\n    out[\"name\"] = String(\"reg:pseudohubererror\");\n    out[\"pseudo_huber_param\"] = ToJson(param_);\n  }\n\n  void LoadConfig(Json const& in) override {\n    auto const& config = get<Object const>(in);\n    if (config.find(\"pseudo_huber_param\") == config.cend()) {\n      // The parameter is added in 1.6.\n      return;\n    }\n    FromJson(in[\"pseudo_huber_param\"], &param_);\n  }\n  [[nodiscard]] Json DefaultMetricConfig() const override {\n    CHECK(param_.GetInitialised());\n    Json config{Object{}};\n    config[\"name\"] = String{this->DefaultEvalMetric()};\n    config[\"pseudo_huber_param\"] = ToJson(param_);\n    return config;\n  }\n};\n\nXGBOOST_REGISTER_OBJECTIVE(PseudoHuberRegression, \"reg:pseudohubererror\")\n    .describe(\"Regression Pseudo Huber error.\")\n    .set_body([]() { return new PseudoHuberRegression(); });\n\nclass ExpectileRegression : public FitIntercept {\n  common::ExpectileLossParam param_;\n  HostDeviceVector<float> alpha_;\n\n  [[nodiscard]] bst_target_t Targets(MetaInfo const& info) const override {\n    auto const& alpha = param_.expectile_alpha.Get();\n    CHECK_EQ(alpha.size(), alpha_.Size()) << \"The objective is not yet configured.\";\n    if (info.ShouldHaveLabels()) {\n      CHECK_EQ(info.labels.Shape(1), 1)\n          << \"Multi-target is not yet supported by the expectile loss.\";\n    }\n    CHECK(!alpha.empty());\n    auto n_y = std::max(static_cast<std::size_t>(1), info.labels.Shape(1));\n    return alpha_.Size() * n_y;\n  }\n\n public:\n  void Configure(Args const& args) override {\n    param_.UpdateAllowUnknown(args);\n    param_.Validate();\n    alpha_.HostVector() = param_.expectile_alpha.Get();\n  }\n\n  [[nodiscard]] ObjInfo Task() const override { return ObjInfo::kRegression; }\n\n  void GetGradient(HostDeviceVector<float> const& preds, const MetaInfo& info, std::int32_t iter,\n                   linalg::Matrix<GradientPair>* out_gpair) override {\n    if (iter == 0) {\n      CheckInitInputs(info);\n    }\n    CHECK_EQ(param_.expectile_alpha.Get().size(), alpha_.Size());\n\n    using SizeT = decltype(info.num_row_);\n    SizeT n_targets = this->Targets(info);\n    SizeT n_alphas = alpha_.Size();\n    CHECK_NE(n_alphas, 0);\n    CHECK_GE(n_targets, n_alphas);\n    CHECK_EQ(preds.Size(), info.num_row_ * n_targets);\n\n    auto labels = info.labels.View(ctx_->Device());\n\n    out_gpair->SetDevice(ctx_->Device());\n    CHECK_EQ(info.labels.Shape(1), 1)\n        << \"Multi-target for expectile regression is not yet supported.\";\n    out_gpair->Reshape(info.num_row_, n_targets);\n    auto gpair = out_gpair->View(ctx_->Device());\n\n    info.weights_.SetDevice(ctx_->Device());\n    common::OptionalWeights weight{ctx_->IsCPU() ? info.weights_.ConstHostSpan()\n                                                 : info.weights_.ConstDeviceSpan()};\n\n    preds.SetDevice(ctx_->Device());\n    auto predt = linalg::MakeTensorView(ctx_, &preds, info.num_row_, n_targets);\n\n    alpha_.SetDevice(ctx_->Device());\n    auto alpha = ctx_->IsCPU() ? alpha_.ConstHostSpan() : alpha_.ConstDeviceSpan();\n\n    linalg::ElementWiseKernel(ctx_, gpair,\n                              [=] XGBOOST_DEVICE(std::size_t i, std::size_t j) mutable {\n                                auto pred = predt(i, j);\n                                auto label = labels(i, 0);\n                                auto expectile = alpha[j];\n                                auto diff = pred - label;\n                                auto weight_scale = diff >= 0.0f ? (1.0f - expectile) : expectile;\n                                auto sample_weight = weight[i];\n                                auto grad = weight_scale * diff * sample_weight;\n                                auto hess = weight_scale * sample_weight;\n                                gpair(i, j) = GradientPair{grad, hess};\n                              });\n  }\n\n  void InitEstimation(MetaInfo const& info, linalg::Vector<float>* base_score) const override {\n    CHECK(!alpha_.Empty());\n    auto n_targets = this->Targets(info);\n    base_score->SetDevice(ctx_->Device());\n    base_score->Reshape(n_targets);\n\n    linalg::Vector<float> label_mean;\n    if (info.weights_.Empty()) {\n      common::SampleMean(ctx_, info.IsColumnSplit(), info.labels, &label_mean);\n    } else {\n      common::WeightedSampleMean(ctx_, info.IsColumnSplit(), info.labels, info.weights_,\n                                 &label_mean);\n    }\n    CHECK_EQ(label_mean.Size(), 1);\n\n    auto mean_host = label_mean.HostView();\n    auto h_labels = info.labels.HostView();\n    auto h_weights = info.weights_.ConstHostSpan();\n    auto const& alpha = param_.expectile_alpha.Get();\n\n    std::vector<double> sums(2 * n_targets, 0.0);\n    for (std::size_t i = 0; i < info.num_row_; ++i) {\n      auto label = h_labels(i, 0);\n      auto diff = mean_host(0) - label;\n      for (std::size_t j = 0; j < n_targets; ++j) {\n        auto expectile = alpha[j];\n        auto weight_scale = diff >= 0.0f ? (1.0f - expectile) : expectile;\n        double w = weight_scale;\n        if (!h_weights.empty()) {\n          w *= h_weights[i];\n        }\n        sums[2 * j] += w * label;\n        sums[2 * j + 1] += w;\n      }\n    }\n\n    collective::SafeColl(\n        collective::GlobalSum(ctx_, info, linalg::MakeVec(sums.data(), sums.size())));\n\n    auto out = base_score->HostView();\n    for (std::size_t j = 0; j < n_targets; ++j) {\n      auto denom = sums[2 * j + 1];\n      if (common::CloseTo(denom, 0.0)) {\n        out(j) = mean_host(0);\n      } else {\n        out(j) = sums[2 * j] / denom;\n      }\n    }\n  }\n\n  [[nodiscard]] const char* DefaultEvalMetric() const override { return \"expectile\"; }\n  [[nodiscard]] Json DefaultMetricConfig() const override {\n    CHECK(param_.GetInitialised());\n    Json config{Object{}};\n    config[\"name\"] = String{this->DefaultEvalMetric()};\n    config[\"expectile_loss_param\"] = ToJson(param_);\n    return config;\n  }\n\n  void SaveConfig(Json* p_out) const override {\n    auto& out = *p_out;\n    out[\"name\"] = String(\"reg:expectileerror\");\n    out[\"expectile_loss_param\"] = ToJson(param_);\n  }\n\n  void LoadConfig(Json const& in) override {\n    CHECK_EQ(get<String const>(in[\"name\"]), \"reg:expectileerror\");\n    auto const& obj = get<Object const>(in);\n    auto it = obj.find(\"expectile_loss_param\");\n    if (it != obj.cend()) {\n      FromJson(it->second, &param_);\n      alpha_.HostVector() = param_.expectile_alpha.Get();\n    }\n  }\n};\n\nXGBOOST_REGISTER_OBJECTIVE(ExpectileRegression, \"reg:expectileerror\")\n    .describe(\"Regression with expectile loss.\")\n    .set_body([]() { return new ExpectileRegression(); });\n\n// declare parameter\nstruct PoissonRegressionParam : public XGBoostParameter<PoissonRegressionParam> {\n  float max_delta_step;\n  DMLC_DECLARE_PARAMETER(PoissonRegressionParam) {\n    DMLC_DECLARE_FIELD(max_delta_step)\n        .set_lower_bound(0.0f)\n        .set_default(0.7f)\n        .describe(\n            \"Maximum delta step we allow each weight estimation to be.\"\n            \" This parameter is required for possion regression.\");\n  }\n};\n\n// poisson regression for count\nclass PoissonRegression : public FitInterceptGlmLike {\n public:\n  // declare functions\n  void Configure(Args const& args) override { param_.UpdateAllowUnknown(args); }\n\n  [[nodiscard]] ObjInfo Task() const override { return ObjInfo::kRegression; }\n\n  [[nodiscard]] bst_target_t Targets(MetaInfo const& info) const override {\n    return std::max(static_cast<std::size_t>(1), info.labels.Shape(1));\n  }\n\n  void GetGradient(HostDeviceVector<float> const& preds, const MetaInfo& info, std::int32_t iter,\n                   linalg::Matrix<GradientPair>* out_gpair) override {\n    CheckRegInputs(info, preds);\n    if (iter == 0) {\n      ValidateLabel<PoissonLabel>(this->ctx_, info);\n    }\n    auto const n_targets = this->Targets(info);\n    out_gpair->SetDevice(ctx_->Device());\n    out_gpair->Reshape(info.num_row_, n_targets);\n\n    auto labels = info.labels.View(ctx_->Device());\n    preds.SetDevice(ctx_->Device());\n    auto predt = linalg::MakeTensorView(ctx_, &preds, info.num_row_, n_targets);\n\n    auto gpair = out_gpair->View(ctx_->Device());\n\n    auto weight = common::MakeOptionalWeights(ctx_->Device(), info.weights_);\n    bst_float max_delta_step = param_.max_delta_step;\n    linalg::ElementWiseKernel(this->ctx_, labels,\n                              [=] XGBOOST_DEVICE(std::size_t i, std::size_t j) mutable {\n                                auto p = predt(i, j);\n                                auto y = labels(i, j);\n                                auto w = weight[i];\n                                auto grad = (expf(p) - y) * w;\n                                auto hess = expf(p + max_delta_step) * w;\n                                gpair(i, j) = GradientPair{grad, hess};\n                              });\n  }\n  void PredTransform(HostDeviceVector<bst_float>* io_preds) const override {\n    common::Transform<>::Init(\n        [] XGBOOST_DEVICE(size_t _idx, common::Span<bst_float> _preds) {\n          _preds[_idx] = expf(_preds[_idx]);\n        },\n        common::Range{0, static_cast<int64_t>(io_preds->Size())}, this->ctx_->Threads(),\n        io_preds->Device())\n        .Eval(io_preds);\n  }\n  void ProbToMargin(linalg::Vector<float>* base_score) const override {\n    ProbToMarginImpl(this->ctx_, base_score, [] XGBOOST_DEVICE(float v) { return std::log(v); });\n  }\n  [[nodiscard]] const char* DefaultEvalMetric() const override { return \"poisson-nloglik\"; }\n\n  void SaveConfig(Json* p_out) const override {\n    auto& out = *p_out;\n    out[\"name\"] = String(\"count:poisson\");\n    out[\"poisson_regression_param\"] = ToJson(param_);\n  }\n\n  void LoadConfig(Json const& in) override { FromJson(in[\"poisson_regression_param\"], &param_); }\n\n private:\n  PoissonRegressionParam param_;\n};\n\n// register the objective functions\nDMLC_REGISTER_PARAMETER(PoissonRegressionParam);\n\nXGBOOST_REGISTER_OBJECTIVE(PoissonRegression, \"count:poisson\")\n    .describe(\"Poisson regression for count data.\")\n    .set_body([]() { return new PoissonRegression(); });\n\n// cox regression for survival data (negative values mean they are censored)\nclass CoxRegression : public FitIntercept {\n public:\n  void Configure(Args const&) override {}\n  [[nodiscard]] ObjInfo Task() const override { return ObjInfo::kRegression; }\n\n  void GetGradient(const HostDeviceVector<bst_float>& preds, const MetaInfo& info, int,\n                   linalg::Matrix<GradientPair>* out_gpair) override {\n    CHECK_NE(info.labels.Size(), 0U) << \"label set cannot be empty\";\n    CHECK_EQ(preds.Size(), info.labels.Size()) << \"labels are not correctly provided\";\n    const auto& preds_h = preds.HostVector();\n    out_gpair->Reshape(info.num_row_, this->Targets(info));\n    auto gpair = out_gpair->HostView();\n    const std::vector<size_t>& label_order = info.LabelAbsSort(ctx_);\n\n    const omp_ulong ndata = static_cast<omp_ulong>(preds_h.size());  // NOLINT(*)\n    const bool is_null_weight = info.weights_.Size() == 0;\n    if (!is_null_weight) {\n      CHECK_EQ(info.weights_.Size(), ndata)\n          << \"Number of weights should be equal to number of data points.\";\n    }\n\n    // pre-compute a sum\n    double exp_p_sum = 0;  // we use double because we might need the precision with large datasets\n    for (omp_ulong i = 0; i < ndata; ++i) {\n      exp_p_sum += std::exp(preds_h[label_order[i]]);\n    }\n\n    // start calculating grad and hess\n    const auto& labels = info.labels.HostView();\n    double r_k = 0;\n    double s_k = 0;\n    double last_exp_p = 0.0;\n    double last_abs_y = 0.0;\n    double accumulated_sum = 0;\n    for (omp_ulong i = 0; i < ndata; ++i) {  // NOLINT(*)\n      const size_t ind = label_order[i];\n      const double p = preds_h[ind];\n      const double exp_p = std::exp(p);\n      const double w = info.GetWeight(ind);\n      const double y = labels(ind);\n      const double abs_y = std::abs(y);\n\n      // only update the denominator after we move forward in time (labels are sorted)\n      // this is Breslow's method for ties\n      accumulated_sum += last_exp_p;\n      if (last_abs_y < abs_y) {\n        exp_p_sum -= accumulated_sum;\n        accumulated_sum = 0;\n      } else {\n        CHECK(last_abs_y <= abs_y) << \"CoxRegression: labels must be in sorted order, \"\n                                   << \"MetaInfo::LabelArgsort failed!\";\n      }\n\n      if (y > 0) {\n        r_k += 1.0 / exp_p_sum;\n        s_k += 1.0 / (exp_p_sum * exp_p_sum);\n      }\n\n      const double grad = exp_p * r_k - static_cast<bst_float>(y > 0);\n      const double hess = exp_p * r_k - exp_p * exp_p * s_k;\n      gpair(ind) = GradientPair(grad * w, hess * w);\n\n      last_abs_y = abs_y;\n      last_exp_p = exp_p;\n    }\n  }\n  void PredTransform(HostDeviceVector<bst_float>* io_preds) const override {\n    std::vector<bst_float>& preds = io_preds->HostVector();\n    const long ndata = static_cast<long>(preds.size());        // NOLINT(*)\n    common::ParallelFor(ndata, ctx_->Threads(), [&](long j) {  // NOLINT(*)\n      preds[j] = std::exp(preds[j]);\n    });\n  }\n  void EvalTransform(HostDeviceVector<bst_float>* io_preds) override { PredTransform(io_preds); }\n  void ProbToMargin(linalg::Vector<float>* base_score) const override {\n    ProbToMarginImpl(this->ctx_, base_score, [] XGBOOST_DEVICE(float v) { return std::log(v); });\n  }\n  [[nodiscard]] const char* DefaultEvalMetric() const override { return \"cox-nloglik\"; }\n\n  void SaveConfig(Json* p_out) const override {\n    auto& out = *p_out;\n    out[\"name\"] = String(\"survival:cox\");\n  }\n  void LoadConfig(Json const&) override {}\n};\n\n// register the objective function\nXGBOOST_REGISTER_OBJECTIVE(CoxRegression, \"survival:cox\")\n    .describe(\n        \"Cox regression for censored survival data (negative labels are considered censored).\")\n    .set_body([]() { return new CoxRegression(); });\n\n// declare parameter\nstruct TweedieRegressionParam : public XGBoostParameter<TweedieRegressionParam> {\n  float tweedie_variance_power;\n  DMLC_DECLARE_PARAMETER(TweedieRegressionParam) {\n    DMLC_DECLARE_FIELD(tweedie_variance_power)\n        .set_range(1.0f, 2.0f)\n        .set_default(1.5f)\n        .describe(\"Tweedie variance power.  Must be between in range [1, 2).\");\n  }\n};\n\n// tweedie regression\nclass TweedieRegression : public FitInterceptGlmLike {\n public:\n  // declare functions\n  void Configure(Args const& args) override {\n    param_.UpdateAllowUnknown(args);\n    std::ostringstream os;\n    os << \"tweedie-nloglik@\" << param_.tweedie_variance_power;\n    metric_ = os.str();\n  }\n\n  [[nodiscard]] ObjInfo Task() const override { return ObjInfo::kRegression; }\n\n  [[nodiscard]] bst_target_t Targets(MetaInfo const& info) const override {\n    return std::max(static_cast<std::size_t>(1), info.labels.Shape(1));\n  }\n\n  void GetGradient(HostDeviceVector<float> const& preds, MetaInfo const& info, std::int32_t iter,\n                   linalg::Matrix<GradientPair>* out_gpair) override {\n    CheckRegInputs(info, preds);\n    if (iter == 0) {\n      ValidateLabel<TweedieLabel>(this->ctx_, info);\n    }\n    auto const n_targets = this->Targets(info);\n    out_gpair->SetDevice(ctx_->Device());\n    out_gpair->Reshape(info.num_row_, n_targets);\n\n    auto labels = info.labels.View(ctx_->Device());\n    preds.SetDevice(ctx_->Device());\n    auto predt = linalg::MakeTensorView(ctx_, &preds, info.num_row_, n_targets);\n\n    auto gpair = out_gpair->View(ctx_->Device());\n\n    auto weight = common::MakeOptionalWeights(ctx_->Device(), info.weights_);\n    const float rho = param_.tweedie_variance_power;\n    linalg::ElementWiseKernel(\n        this->ctx_, labels, [=] XGBOOST_DEVICE(std::size_t i, std::size_t j) mutable {\n          auto p = predt(i, j);\n          auto y = labels(i, j);\n          auto w = weight[i];\n          auto grad = -y * expf((1 - rho) * p) + expf((2 - rho) * p);\n          auto hess = -y * (1 - rho) * std::exp((1 - rho) * p) + (2 - rho) * expf((2 - rho) * p);\n          gpair(i, j) = GradientPair{grad * w, hess * w};\n        });\n  }\n  void PredTransform(HostDeviceVector<bst_float>* io_preds) const override {\n    common::Transform<>::Init(\n        [] XGBOOST_DEVICE(size_t _idx, common::Span<bst_float> _preds) {\n          _preds[_idx] = expf(_preds[_idx]);\n        },\n        common::Range{0, static_cast<int64_t>(io_preds->Size())}, this->ctx_->Threads(),\n        io_preds->Device())\n        .Eval(io_preds);\n  }\n  void ProbToMargin(linalg::Vector<float>* base_score) const override {\n    ProbToMarginImpl(this->ctx_, base_score, [] XGBOOST_DEVICE(float v) { return std::log(v); });\n  }\n\n  [[nodiscard]] const char* DefaultEvalMetric() const override { return metric_.c_str(); }\n\n  void SaveConfig(Json* p_out) const override {\n    auto& out = *p_out;\n    out[\"name\"] = String(\"reg:tweedie\");\n    out[\"tweedie_regression_param\"] = ToJson(param_);\n  }\n  void LoadConfig(Json const& in) override { FromJson(in[\"tweedie_regression_param\"], &param_); }\n\n private:\n  std::string metric_;\n  TweedieRegressionParam param_;\n};\n\n// register the objective functions\nDMLC_REGISTER_PARAMETER(TweedieRegressionParam);\n\nXGBOOST_REGISTER_OBJECTIVE(TweedieRegression, \"reg:tweedie\")\n    .describe(\"Tweedie regression for insurance data.\")\n    .set_body([]() { return new TweedieRegression(); });\n\nclass MeanAbsoluteError : public ObjFunction {\n public:\n  void Configure(Args const&) override {}\n  [[nodiscard]] ObjInfo Task() const override { return {ObjInfo::kRegression, true, true}; }\n  [[nodiscard]] bst_target_t Targets(MetaInfo const& info) const override {\n    return std::max(static_cast<std::size_t>(1), info.labels.Shape(1));\n  }\n\n  void GetGradient(HostDeviceVector<float> const& preds, const MetaInfo& info,\n                   std::int32_t /*iter*/, linalg::Matrix<GradientPair>* out_gpair) override {\n    CheckRegInputs(info, preds);\n    auto labels = info.labels.View(ctx_->Device());\n\n    out_gpair->SetDevice(ctx_->Device());\n    out_gpair->Reshape(info.num_row_, this->Targets(info));\n    auto gpair = out_gpair->View(ctx_->Device());\n\n    preds.SetDevice(ctx_->Device());\n    auto predt = linalg::MakeTensorView(ctx_, &preds, info.num_row_, this->Targets(info));\n    auto weight = common::MakeOptionalWeights(ctx_->Device(), info.weights_);\n    linalg::ElementWiseKernel(\n        ctx_, labels, [=] XGBOOST_DEVICE(std::size_t i, std::size_t j) mutable {\n          auto sign = [](auto x) {\n            return (x > static_cast<decltype(x)>(0)) - (x < static_cast<decltype(x)>(0));\n          };\n          auto y = labels(i, j);\n          auto hess = weight[i];\n          auto grad = sign(predt(i, j) - y) * hess;\n          gpair(i, j) = GradientPair{grad, hess};\n        });\n  }\n\n  void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_score) const override {\n    CheckInitInputs(info);\n    base_score->Reshape(this->Targets(info));\n\n    double sum_weight{0.0};\n    if (info.weights_.Empty()) {\n      sum_weight = static_cast<double>(info.num_row_);\n    } else {\n      sum_weight = common::Reduce(ctx_, info.weights_);\n    }\n\n    if (info.num_row_ == 0) {\n      auto out = base_score->HostView();\n      std::fill(linalg::begin(out), linalg::end(out), 0.0f);\n    } else {\n      common::Median(ctx_, info.labels, info.weights_, base_score);\n    }\n\n    auto intercept = base_score->View(this->ctx_->Device());\n    // weighted avg\n    linalg::VecScaMul(this->ctx_, intercept, sum_weight);\n    auto rc = collective::GlobalSum(ctx_, info, intercept, &sum_weight);\n    collective::SafeColl(rc);\n\n    if (common::CloseTo(sum_weight, 0.0)) {\n      // Mostly for handling empty dataset test.\n      LOG(WARNING) << \"Sum of weights is close to 0.0, skipping base score estimation.\";\n      *base_score = linalg::Zeros<float>(ctx_, base_score->Shape(0));\n      return;\n    }\n\n    linalg::VecScaDiv(this->ctx_, intercept, sum_weight);\n  }\n\n  void UpdateTreeLeaf(HostDeviceVector<bst_node_t> const& position, MetaInfo const& info,\n                      float learning_rate, HostDeviceVector<float> const& prediction,\n                      bst_target_t group_idx, RegTree* p_tree) const override {\n    std::vector<float> alphas;\n    if (p_tree->IsMultiTarget()) {\n      alphas.resize(p_tree->NumTargets(), 0.5);\n    } else {\n      alphas.push_back(0.5);\n    }\n    ::xgboost::obj::UpdateTreeLeaf(ctx_, position, group_idx, info, learning_rate, prediction,\n                                   alphas, p_tree);\n  }\n\n  [[nodiscard]] const char* DefaultEvalMetric() const override { return \"mae\"; }\n\n  void SaveConfig(Json* p_out) const override {\n    auto& out = *p_out;\n    out[\"name\"] = String(\"reg:absoluteerror\");\n  }\n\n  void LoadConfig(Json const& in) override {\n    CHECK_EQ(StringView{get<String const>(in[\"name\"])}, StringView{\"reg:absoluteerror\"});\n  }\n};\n\nXGBOOST_REGISTER_OBJECTIVE(MeanAbsoluteError, \"reg:absoluteerror\")\n    .describe(\"Mean absoluate error.\")\n    .set_body([]() { return new MeanAbsoluteError(); });\n}  // namespace xgboost::obj\n"
  },
  {
    "path": "src/objective/regression_param.h",
    "content": "/*!\n * Copyright 2015-2023 by Contributors\n * \\file multiclass_param.h\n * \\brief Definition of single-value regression and classification parameters.\n */\n#ifndef XGBOOST_OBJECTIVE_REGRESSION_PARAM_H_\n#define XGBOOST_OBJECTIVE_REGRESSION_PARAM_H_\n\n#include \"xgboost/parameter.h\"\n\nnamespace xgboost {\nnamespace obj {\n\nstruct RegLossParam : public XGBoostParameter<RegLossParam> {\n  float scale_pos_weight;\n  // declare parameters\n  DMLC_DECLARE_PARAMETER(RegLossParam) {\n    DMLC_DECLARE_FIELD(scale_pos_weight).set_default(1.0f).set_lower_bound(0.0f)\n      .describe(\"Scale the weight of positive examples by this factor\");\n  }\n};\n\n}  // namespace obj\n}  // namespace xgboost\n#endif  // XGBOOST_OBJECTIVE_REGRESSION_PARAM_H_\n"
  },
  {
    "path": "src/predictor/array_tree_layout.h",
    "content": "/**\n * Copyright 2021-2026, XGBoost Contributors\n * \\file array_tree_layout.cc\n * \\brief Implementation of array tree layout -- a powerfull inference optimization method.\n */\n#ifndef XGBOOST_PREDICTOR_ARRAY_TREE_LAYOUT_H_\n#define XGBOOST_PREDICTOR_ARRAY_TREE_LAYOUT_H_\n\n#include <array>\n#include <limits>\n#include <type_traits>  // for conditional_t\n\n#include \"../common/categorical.h\"            // for IsCat\n#include \"xgboost/tree_model.h\"               // for RegTree\n\nnamespace xgboost::predictor {\n\n/**\n * @brief The class holds the array-based representation of the top levels of a single tree.\n *\n * @tparam has_categorical if the tree has categorical features\n *\n * @tparam any_missing if the class is able to process missing values\n *\n * @tparam kNumDeepLevels number of tree leveles being unrolled into array-based structure\n */\ntemplate <bool has_categorical, bool any_missing, int kNumDeepLevels, typename TreeView>\nclass ArrayTreeLayout {\n private:\n  /* Number of nodes in the array based representation of the top levels of the tree\n   */\n  constexpr static size_t kNodesCount = (1u << kNumDeepLevels) - 1;\n\n  struct Empty {};\n  using DefaultLeftType =\n      typename std::conditional_t<any_missing, std::array<uint8_t, kNodesCount>, Empty>;\n  using IsCatType =\n      typename std::conditional_t<has_categorical, std::array<uint8_t, kNodesCount>, Empty>;\n  using CatSegmentType =\n      typename std::conditional_t<has_categorical,\n                                  std::array<common::Span<uint32_t const>, kNodesCount>, Empty>;\n\n  DefaultLeftType default_left_;\n  IsCatType is_cat_;\n  CatSegmentType cat_segment_;\n\n  std::array<bst_feature_t, kNodesCount> split_index_;\n  std::array<float, kNodesCount> split_cond_;\n  /* The nodes at tree levels 0, 1, ..., kNumDeepLevels - 1 are unrolled into an array-based structure.\n   *  If the tree has additional levels, this array stores the node indices of the sub-trees at level kNumDeepLevels.\n   *  This is necessary to continue processing nodes that are not eligible for array-based unrolling.\n   *  The number of sub-trees packed into this array is equal to the number of nodes at tree level kNumDeepLevels,\n   *  which is calculated as (1u << kNumDeepLevels) == kNodesCount + 1.\n   */\n  // Mapping from array node index to the RegTree node index.\n  std::array<bst_node_t, kNodesCount + 1> nidx_in_tree_;\n\n /**\n * @brief Traverse the top levels of original tree and fill internal arrays\n *\n * @tparam depth the tree level being processing\n *\n * @param tree the original tree\n * @param cats matrix of categorical splits\n * @param nidx_array node idx in the array layout\n * @param nidx node idx in the original tree\n */\n  template <int depth = 0>\n  void Populate(TreeView const& tree, RegTree::CategoricalSplitMatrix const& cats,\n                bst_node_t nidx_array = 0, bst_node_t nidx = 0) {\n    if constexpr (depth == kNumDeepLevels + 1) {\n      return;\n    } else if constexpr (depth == kNumDeepLevels) {\n        /* We store the node index in the original tree to ensure continued processing\n         * for nodes that are not eligible for array layout optimization.\n         */\n        nidx_in_tree_[nidx_array - kNodesCount] = nidx;\n    } else {\n      if (tree.IsLeaf(nidx)) {\n        split_index_[nidx_array]  = 0;\n\n        /*\n         * If the tree is not fully populated, we can reduce transfer costs.\n         * The values for the unpopulated parts of the tree are set to ensure\n         * that any move will always proceed in the \"right\" direction.\n         * This is achieved by exploiting the fact that comparisons with NaN always result in false.\n         */\n        if constexpr (any_missing) default_left_[nidx_array] = 0;\n        if constexpr (has_categorical) is_cat_[nidx_array] = 0;\n        split_cond_[nidx_array]   = std::numeric_limits<float>::quiet_NaN();\n\n        Populate<depth + 1>(tree, cats, 2 * nidx_array + 2, nidx);\n      } else {\n        if constexpr (any_missing) default_left_[nidx_array] = tree.DefaultLeft(nidx);\n        if constexpr (has_categorical) {\n          is_cat_[nidx_array] = common::IsCat(cats.split_type, nidx);\n          if (is_cat_[nidx_array]) {\n            cat_segment_[nidx_array] = cats.categories.subspan(cats.node_ptr[nidx].beg,\n                                                               cats.node_ptr[nidx].size);\n          }\n        }\n\n        split_index_[nidx_array]  = tree.SplitIndex(nidx);\n        split_cond_[nidx_array]   = tree.SplitCond(nidx);\n\n        /*\n         * LeftChild is used to determine if a node is a leaf, so it is always a valid value.\n         * However, RightChild can be invalid in some exotic cases.\n         * A tree with an invalid RightChild can still be correctly processed using classical methods\n         * if the split conditions are correct.\n         * However, in an array layout, an invalid RightChild, even if unreachable, can lead to memory corruption.\n         * A check should be added to prevent this.\n         */\n        Populate<depth + 1>(tree, cats, 2 * nidx_array + 1, tree.LeftChild(nidx));\n        bst_node_t right_child = tree.RightChild(nidx);\n        if (right_child != RegTree::kInvalidNodeId) {\n          Populate<depth + 1>(tree, cats, 2 * nidx_array + 2, right_child);\n        }\n      }\n    }\n  }\n\n  bool GetDecision(float fvalue, bst_node_t nidx) const {\n    if constexpr (has_categorical) {\n      if (is_cat_[nidx]) {\n       return common::Decision(cat_segment_[nidx], fvalue);\n      } else {\n        return fvalue < split_cond_[nidx];\n      }\n    } else {\n      return fvalue < split_cond_[nidx];\n    }\n  }\n\n public:\n  /* Ad-hoc value.\n   * Increasing doesn't lead to perf gain, since bottleneck is now at gather instructions.\n   */\n  constexpr static int kMaxNumDeepLevels = 6;\n  static_assert(kNumDeepLevels <= kMaxNumDeepLevels);\n\n  ArrayTreeLayout(TreeView const& tree, RegTree::CategoricalSplitMatrix const &cats) {\n    Populate(tree, cats);\n  }\n\n  const auto& SplitIndex() const {\n    return split_index_;\n  }\n\n  const auto& SplitCond() const {\n    return split_cond_;\n  }\n\n  const auto& DefaultLeft() const {\n    return default_left_;\n  }\n\n  const auto& NidxInTree() const {\n    return nidx_in_tree_;\n  }\n\n  /**\n   * @brief Traverse the top levels of the tree for the entire block_size.\n   *\n   * In the array layout, it is organized to guarantee that if a node at the current level\n   * has index nidx, then the node index for the left child at the next level is always\n   * 2*nidx, and the node index for the right child at the next level is always 2*nidx+1.\n   * This greatly improves data locality.\n   *\n   * @param fvec_tloc buffer holding the feature values\n   * @param block_size size of the current block (1 < block_size <= 64)\n   * @param p_nidx Pointer to the vector of node indexes in the original tree with size\n   *               equals to the block size. (One node per sample). The value corresponds\n   *               to the level next after kNumDeepLevels\n   */\n  void Process(common::Span<RegTree::FVec> fvec_tloc, std::size_t const block_size,\n               bst_node_t* p_nidx) {\n    for (int depth = 0; depth < kNumDeepLevels; ++depth) {\n      std::size_t first_node = (1u << depth) - 1;\n\n      for (std::size_t i = 0; i < block_size; ++i) {\n        bst_node_t idx = p_nidx[i];\n\n        const auto& feat = fvec_tloc[i];\n        bst_feature_t split = split_index_[first_node + idx];\n        auto fvalue = feat.GetFvalue(split);\n        if constexpr (any_missing) {\n          bool go_left = feat.IsMissing(split) ? default_left_[first_node + idx]\n                                               : GetDecision(fvalue, first_node + idx);\n          p_nidx[i] = 2 * idx + !go_left;\n        } else {\n          p_nidx[i] = 2 * idx + !GetDecision(fvalue, first_node + idx);\n        }\n      }\n    }\n    // Remap to the original index.\n    for (std::size_t i = 0; i < block_size; ++i) {\n      p_nidx[i] = nidx_in_tree_[p_nidx[i]];\n    }\n  }\n};\n\ntemplate <bool has_categorical, bool any_missing, int num_deep_levels = 1, typename TreeView>\nvoid ProcessArrayTree(TreeView const& tree, common::Span<RegTree::FVec> fvec_tloc,\n                      std::size_t const block_size, bst_node_t* p_nidx, bst_node_t tree_depth) {\n  constexpr int kMaxNumDeepLevels =\n      ArrayTreeLayout<has_categorical, any_missing, 0, TreeView>::kMaxNumDeepLevels;\n\n  // Fill the array tree, then output predicted node idx.\n  if constexpr (num_deep_levels == kMaxNumDeepLevels) {\n    ArrayTreeLayout<has_categorical, any_missing, num_deep_levels, TreeView> buffer{\n        tree, tree.GetCategoriesMatrix()};\n    buffer.Process(fvec_tloc, block_size, p_nidx);\n  } else {\n    if (tree_depth <= num_deep_levels) {\n      ArrayTreeLayout<has_categorical, any_missing, num_deep_levels, TreeView> buffer{\n          tree, tree.GetCategoriesMatrix()};\n      buffer.Process(fvec_tloc, block_size, p_nidx);\n    } else {\n      ProcessArrayTree<has_categorical, any_missing, num_deep_levels + 1>(\n          tree, fvec_tloc, block_size, p_nidx, tree_depth);\n    }\n  }\n}\n}  // namespace xgboost::predictor\n#endif  // XGBOOST_PREDICTOR_ARRAY_TREE_LAYOUT_H_\n"
  },
  {
    "path": "src/predictor/cpu_predictor.cc",
    "content": "/**\n * Copyright 2017-2026, XGBoost Contributors\n */\n#include <algorithm>  // for max, fill, min\n#include <cassert>    // for assert\n#include <cstddef>    // for size_t\n#include <cstdint>    // for uint32_t, int32_t, uint64_t\n#include <memory>     // for unique_ptr, shared_ptr\n#include <vector>     // for vector\n\n#include \"../collective/allreduce.h\"         // for Allreduce\n#include \"../collective/communicator-inl.h\"  // for IsDistributed\n#include \"../common/bitfield.h\"              // for RBitField8\n#include \"../common/column_matrix.h\"         // for ColumnMatrix\n#include \"../common/error_msg.h\"             // for InplacePredictProxy\n#include \"../common/math.h\"                  // for CheckNAN\n#include \"../common/optional_weight.h\"       // for OptionalWeights\n#include \"../common/threading_utils.h\"       // for ParallelFor\n#include \"../data/adapter.h\"                 // for ArrayAdapter, CSRAdapter, CSRArrayAdapter\n#include \"../data/cat_container.h\"           // for CatContainer\n#include \"../data/gradient_index.h\"          // for GHistIndexMatrix\n#include \"../data/proxy_dmatrix.h\"           // for DMatrixProxy\n#include \"../gbm/gbtree_model.h\"             // for GBTreeModel, GBTreeModelParam\n#include \"array_tree_layout.h\"               // for ProcessArrayTree\n#include \"data_accessor.h\"                   // for GHistIndexMatrixView, SparsePageView\n#include \"dmlc/registry.h\"                   // for DMLC_REGISTRY_FILE_TAG\n#include \"gbtree_view.h\"                     // for GBTreeModelView\n#include \"interpretability/shap.h\"  // for ShapValues, ApproxFeatureImportance, ShapInteractionValues\n#include \"predict_fn.h\"             // for GetNextNode, GetNextNodeMulti\n#include \"utils.h\"                  // for CheckProxyDMatrix\n#include \"xgboost/base.h\"           // for bst_float, bst_node_t, bst_omp_uint, bst_fe...\n#include \"xgboost/context.h\"        // for Context\n#include \"xgboost/data.h\"           // for Entry, DMatrix, MetaInfo, SparsePage, Batch...\n#include \"xgboost/host_device_vector.h\"       // for HostDeviceVector\n#include \"xgboost/learner.h\"                  // for LearnerModelParam\n#include \"xgboost/linalg.h\"                   // for TensorView, All, VectorView, Tensor\n#include \"xgboost/logging.h\"                  // for LogCheck_EQ, CHECK_EQ, CHECK, LogCheck_NE\n#include \"xgboost/multi_target_tree_model.h\"  // for MultiTargetTree\n#include \"xgboost/predictor.h\"                // for PredictionCacheEntry, Predictor, PredictorReg\n#include \"xgboost/span.h\"                     // for Span\n#include \"xgboost/tree_model.h\"               // for RegTree, MTNotImplemented, RTreeNodeStat\n\nnamespace xgboost::predictor {\n\nDMLC_REGISTRY_FILE_TAG(cpu_predictor);\n\nnamespace {\nusing TreeViewVar = std::variant<tree::ScalarTreeView, tree::MultiTargetTreeView>;\nstruct CopyViews {\n  void operator()(std::vector<TreeViewVar> *p_dst, std::vector<TreeViewVar> &&src) const {\n    std::swap(src, *p_dst);\n  }\n};\n\ntemplate <typename T>\nusing Vec = std::vector<T, std::allocator<T>>;\n// The input device should be DeviceOrd::CPU() instead of Context::Device(). The GBTree\n// has an optimization to use CPU predictor when the DMatrix SparsePage is on CPU, even if\n// the context is a CUDA context.\nusing HostModel = GBTreeModelView<Vec, TreeViewVar, CopyViews>;\n\ntemplate <bool has_missing, bool has_categorical, typename TreeView>\nbst_node_t GetLeafIndex(TreeView const &tree, const RegTree::FVec &feat,\n                        RegTree::CategoricalSplitMatrix const &cats, bst_node_t nidx) {\n  while (!tree.IsLeaf(nidx)) {\n    bst_feature_t split_index = tree.SplitIndex(nidx);\n    auto fvalue = feat.GetFvalue(split_index);\n    nidx = GetNextNode<has_missing, has_categorical>(\n        tree, nidx, fvalue, has_missing && feat.IsMissing(split_index), cats);\n  }\n  return nidx;\n}\n}  // namespace\n\nnamespace scalar {\ntemplate <bool has_categorical>\n[[nodiscard]] float PredValueByOneTree(const RegTree::FVec &p_feats,\n                                       tree::ScalarTreeView const &tree,\n                                       RegTree::CategoricalSplitMatrix const &cats,\n                                       bst_node_t nidx) noexcept(true) {\n  const bst_node_t leaf = p_feats.HasMissing()\n                              ? GetLeafIndex<true, has_categorical>(tree, p_feats, cats, nidx)\n                              : GetLeafIndex<false, has_categorical>(tree, p_feats, cats, nidx);\n  return tree.LeafValue(leaf);\n}\n\ntemplate <bool has_categorical, bool any_missing, bool use_array_tree_layout>\nvoid PredValueByOneTree(tree::ScalarTreeView const &tree, std::size_t const predict_offset,\n                        common::Span<RegTree::FVec> fvec_tloc, std::size_t const block_size,\n                        linalg::MatrixView<float> out_predt, bst_node_t *p_nidx, int depth, int gid,\n                        float tree_weight) {\n  auto const &cats = tree.GetCategoriesMatrix();\n  if constexpr (use_array_tree_layout) {\n    ProcessArrayTree<has_categorical, any_missing>(tree, fvec_tloc, block_size, p_nidx, depth);\n  }\n  for (std::size_t i = 0; i < block_size; ++i) {\n    bst_node_t nidx = 0;\n    /*\n     * If array_tree_layout was used, we start processing from the nidx calculated using\n     * the array tree.\n     */\n    if constexpr (use_array_tree_layout) {\n      nidx = p_nidx[i];\n      p_nidx[i] = 0;\n    }\n    out_predt(predict_offset + i, gid) +=\n        PredValueByOneTree<has_categorical>(fvec_tloc[i], tree, cats, nidx) * tree_weight;\n  }\n}\n}  // namespace scalar\n\nnamespace multi {\ntemplate <bool has_categorical>\nvoid PredValueByOneTree(RegTree::FVec const &p_feats, tree::MultiTargetTreeView const &tree,\n                        RegTree::CategoricalSplitMatrix const &cats,\n                        linalg::VectorView<float> out_predt, bst_node_t nidx) {\n  bst_node_t const leaf = p_feats.HasMissing()\n                              ? GetLeafIndex<true, has_categorical>(tree, p_feats, cats, nidx)\n                              : GetLeafIndex<false, has_categorical>(tree, p_feats, cats, nidx);\n  auto leaf_value = tree.LeafValue(leaf);\n  assert(out_predt.Shape(0) == leaf_value.Shape(0) && \"shape mismatch.\");\n  for (size_t i = 0; i < leaf_value.Size(); ++i) {\n    out_predt(i) += leaf_value(i);\n  }\n}\n\ntemplate <bool has_categorical, bool any_missing, bool use_array_tree_layout>\nvoid PredValueByOneTree(tree::MultiTargetTreeView const &tree, std::size_t const predict_offset,\n                        common::Span<RegTree::FVec> fvec_tloc, std::size_t const block_size,\n                        linalg::MatrixView<float> out_predt, bst_node_t *p_nidx, bst_node_t depth,\n                        float tree_weight) {\n  auto const &cats = tree.GetCategoriesMatrix();\n  if constexpr (use_array_tree_layout) {\n    ProcessArrayTree<has_categorical, any_missing>(tree, fvec_tloc, block_size, p_nidx, depth);\n  }\n  for (std::size_t i = 0; i < block_size; ++i) {\n    bst_node_t nidx = RegTree::kRoot;\n    if constexpr (use_array_tree_layout) {\n      nidx = p_nidx[i];\n      p_nidx[i] = RegTree::kRoot;\n    }\n    auto leaf = fvec_tloc[i].HasMissing()\n                    ? GetLeafIndex<true, has_categorical>(tree, fvec_tloc[i], cats, nidx)\n                    : GetLeafIndex<false, has_categorical>(tree, fvec_tloc[i], cats, nidx);\n    auto leaf_value = tree.LeafValue(leaf);\n    auto t_predts = out_predt.Slice(predict_offset + i, linalg::All());\n    assert(t_predts.Shape(0) == leaf_value.Shape(0) && \"shape mismatch.\");\n    for (size_t j = 0; j < leaf_value.Size(); ++j) {\n      t_predts(j) += leaf_value(j) * tree_weight;\n    }\n  }\n}\n}  // namespace multi\n\nnamespace {\ntemplate <bool use_array_tree_layout, bool any_missing>\nvoid PredictBlockByAllTrees(HostModel const &model, std::size_t const predict_offset,\n                            common::Span<RegTree::FVec> fvec_tloc, std::size_t const block_size,\n                            linalg::MatrixView<float> out_predt, const std::vector<int> &tree_depth,\n                            common::OptionalWeights tree_weights) {\n  std::vector<bst_node_t> nidx;\n  if constexpr (use_array_tree_layout) {\n    nidx.resize(block_size, 0);\n  }\n  auto trees = model.Trees();\n  for (bst_tree_t tree_id = 0, n_trees = model.Trees().size(); tree_id < n_trees; ++tree_id) {\n    bst_node_t depth = use_array_tree_layout ? tree_depth[tree_id] : 0;\n    auto weight = tree_weights[tree_id];\n    std::visit(\n        enc::Overloaded{[&](tree::ScalarTreeView const &tree) {\n                          bool has_categorical = tree.HasCategoricalSplit();\n                          auto const gid = model.tree_groups[tree_id];\n                          if (has_categorical) {\n                            scalar::PredValueByOneTree<true, any_missing, use_array_tree_layout>(\n                                tree, predict_offset, fvec_tloc, block_size, out_predt, nidx.data(),\n                                depth, gid, weight);\n                          } else {\n                            scalar::PredValueByOneTree<false, any_missing, use_array_tree_layout>(\n                                tree, predict_offset, fvec_tloc, block_size, out_predt, nidx.data(),\n                                depth, gid, weight);\n                          }\n                        },\n                        [&](tree::MultiTargetTreeView const &tree) {\n                          bool has_categorical = tree.HasCategoricalSplit();\n                          if (has_categorical) {\n                            multi::PredValueByOneTree<true, any_missing, use_array_tree_layout>(\n                                tree, predict_offset, fvec_tloc, block_size, out_predt, nidx.data(),\n                                depth, weight);\n                          } else {\n                            multi::PredValueByOneTree<false, any_missing, use_array_tree_layout>(\n                                tree, predict_offset, fvec_tloc, block_size, out_predt, nidx.data(),\n                                depth, weight);\n                          }\n                        }},\n        trees[tree_id]);\n  }\n}\n\n// Dispatch between template implementations\nvoid DispatchArrayLayout(HostModel const &model, std::size_t const predict_offset,\n                         common::Span<RegTree::FVec> fvec_tloc, std::size_t const block_size,\n                         linalg::MatrixView<float> out_predt, const std::vector<int> &tree_depth,\n                         bool any_missing, common::OptionalWeights tree_weights) {\n  auto n_trees = model.tree_end - model.tree_begin;\n  CHECK_EQ(n_trees, model.Trees().size());\n  /*\n   * We transform trees to array layout for each block of data to avoid memory overheads.\n   * It makes the array layout inefficient for block_size == 1\n   */\n  const bool use_array_tree_layout = block_size > 1;\n  if (use_array_tree_layout) {\n    CHECK_EQ(n_trees, tree_depth.size());\n    // Recheck if the current block has missing values.\n    if (any_missing) {\n      any_missing = false;\n      for (std::size_t i = 0; i < block_size; ++i) {\n        any_missing |= fvec_tloc[i].HasMissing();\n        if (any_missing) {\n          break;\n        }\n      }\n    }\n    if (any_missing) {\n      PredictBlockByAllTrees<true, true>(model, predict_offset, fvec_tloc, block_size, out_predt,\n                                         tree_depth, tree_weights);\n    } else {\n      PredictBlockByAllTrees<true, false>(model, predict_offset, fvec_tloc, block_size, out_predt,\n                                          tree_depth, tree_weights);\n    }\n  } else {\n    PredictBlockByAllTrees<false, true>(model, predict_offset, fvec_tloc, block_size, out_predt,\n                                        tree_depth, tree_weights);\n  }\n}\n\nbool ShouldUseBlock(DMatrix *p_fmat) {\n  // Threshold to use block-based prediction.\n  constexpr double kDensityThresh = .125;\n  bst_idx_t n_samples = p_fmat->Info().num_row_;\n  bst_idx_t total = std::max(n_samples * p_fmat->Info().num_col_, static_cast<bst_idx_t>(1));\n  double density = static_cast<double>(p_fmat->Info().num_nonzero_) / static_cast<double>(total);\n  bool blocked = density > kDensityThresh;\n  return blocked;\n}\n\nusing cpu_impl::MakeCatAccessor;\n\n// Ordinal re-coder.\nstruct EncAccessorPolicy {\n private:\n  std::vector<int32_t> mapping_;\n\n public:\n  EncAccessorPolicy() = default;\n\n  EncAccessorPolicy &operator=(EncAccessorPolicy const &that) = delete;\n  EncAccessorPolicy(EncAccessorPolicy const &that) = delete;\n\n  EncAccessorPolicy &operator=(EncAccessorPolicy &&that) = default;\n  EncAccessorPolicy(EncAccessorPolicy &&that) = default;\n\n  [[nodiscard]] auto MakeAccessor(Context const *ctx, enc::HostColumnsView new_enc,\n                                  gbm::GBTreeModel const &model) {\n    auto [acc, mapping] = MakeCatAccessor(ctx, new_enc, model.Cats());\n    std::swap(mapping, this->mapping_);\n    return acc;\n  }\n};\n\nstruct NullEncAccessorPolicy {\n  template <typename... Args>\n  [[nodiscard]] auto MakeAccessor(Args &&...) const {\n    return NoOpAccessor{};\n  }\n};\n\n// Block-based parallel.\nstruct BlockPolicy {\n  constexpr static std::size_t kBlockOfRowsSize = 64;\n};\n\nstruct NullBlockPolicy {\n  constexpr static std::size_t kBlockOfRowsSize = 1;\n};\n\n/**\n * @brief Policy class, requires a block policy and an accessor policy.\n */\ntemplate <typename... Args>\nstruct LaunchConfig : public Args... {\n  Context const *ctx;\n  DMatrix *p_fmat;\n  gbm::GBTreeModel const &model;\n\n  LaunchConfig(Context const *ctx, DMatrix *p_fmat, gbm::GBTreeModel const &model)\n      : ctx{ctx}, p_fmat{p_fmat}, model{model} {}\n\n  LaunchConfig(LaunchConfig const &that) = delete;\n  LaunchConfig &operator=(LaunchConfig const &that) = delete;\n  LaunchConfig(LaunchConfig &&that) = default;\n  LaunchConfig &operator=(LaunchConfig &&that) = default;\n\n  // Helper for running prediction with DMatrix inputs.\n  template <typename Fn>\n  void ForEachBatch(Fn &&fn) {\n    auto acc = this->MakeAccessor(ctx, p_fmat->Cats()->HostView(), model);\n\n    if (!p_fmat->PageExists<SparsePage>()) {\n      auto ft = p_fmat->Info().feature_types.ConstHostVector();\n      for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx, {})) {\n        fn(GHistIndexMatrixView{page, acc, ft});\n      }\n    } else {\n      for (auto const &page : p_fmat->GetBatches<SparsePage>()) {\n        fn(SparsePageView{page.GetView(), page.base_rowid, acc});\n      }\n    }\n  }\n};\n\n/**\n * @brief Dispatch for the prediction function.\n *\n * @tparam Fn         A function that accepts a @ref LaunchConfig object.\n * @tparam NeedRecode Given a DMatrix input, returns whether we need to recode the categorical\n *                    features.\n */\ntemplate <typename Fn, typename NeedRecode>\nvoid LaunchPredict(Context const *ctx, DMatrix *p_fmat, gbm::GBTreeModel const &model, Fn &&fn,\n                   NeedRecode &&need_recode) {\n  bool blocked = ShouldUseBlock(p_fmat);\n\n  if (blocked) {\n    if (model.Cats()->HasCategorical() && need_recode(p_fmat)) {\n      using Policy = LaunchConfig<BlockPolicy, EncAccessorPolicy>;\n      fn(Policy{ctx, p_fmat, model});\n    } else {\n      using Policy = LaunchConfig<BlockPolicy, NullEncAccessorPolicy>;\n      fn(Policy{ctx, p_fmat, model});\n    }\n  } else {\n    if (model.Cats()->HasCategorical() && need_recode(p_fmat)) {\n      using Policy = LaunchConfig<NullBlockPolicy, EncAccessorPolicy>;\n      fn(Policy{ctx, p_fmat, model});\n    } else {\n      using Policy = LaunchConfig<NullBlockPolicy, NullEncAccessorPolicy>;\n      fn(Policy{ctx, p_fmat, model});\n    }\n  }\n}\n\ntemplate <typename Fn>\nvoid LaunchPredict(Context const *ctx, DMatrix *p_fmat, gbm::GBTreeModel const &model, Fn &&fn) {\n  LaunchPredict(ctx, p_fmat, model, fn,\n                [](DMatrix const *p_fmat) { return p_fmat->Cats()->NeedRecode(); });\n}\n\n/**\n * @brief Thread-local buffer for the feature matrix.\n */\ntemplate <std::size_t kBlockOfRowsSize>\nclass ThreadTmp {\n private:\n  std::vector<RegTree::FVec> feat_vecs_;\n\n public:\n  /**\n   * @param blocked Whether block-based parallelism is used.\n   */\n  explicit ThreadTmp(std::int32_t n_threads) {\n    std::size_t n = n_threads * kBlockOfRowsSize;\n    std::size_t prev_thread_temp_size = feat_vecs_.size();\n    if (prev_thread_temp_size < n) {\n      feat_vecs_.resize(n, RegTree::FVec{});\n    }\n  }\n  /**\n   * @brief Get a thread local buffer.\n   *\n   * @param n The size of the thread local block.\n   */\n  common::Span<RegTree::FVec> ThreadBuffer(std::size_t n) {\n    std::int32_t thread_idx = omp_get_thread_num();\n    auto const fvec_offset = thread_idx * kBlockOfRowsSize;\n    auto fvec_tloc = common::Span{feat_vecs_}.subspan(fvec_offset, n);\n    return fvec_tloc;\n  }\n};\n\ntemplate <std::size_t kBlockOfRowsSize, typename DataView>\nvoid PredictBatchByBlockKernel(DataView const &batch, HostModel const &model,\n                               ThreadTmp<kBlockOfRowsSize> *p_fvec, std::int32_t n_threads,\n                               bool any_missing, linalg::TensorView<float, 2> out_predt,\n                               common::OptionalWeights tree_weights) {\n  auto &fvec = *p_fvec;\n  // Parallel over local batches\n  auto const n_samples = batch.Size();\n  auto const n_features = model.n_features;\n\n  /* Precalculate depth for each tree.\n   * These values are required only for the ArrayLayout optimization,\n   * so we don't need them if kBlockOfRowsSize == 1\n   */\n  std::vector<int> tree_depth;\n  if constexpr (kBlockOfRowsSize > 1) {\n    tree_depth.resize(model.tree_end - model.tree_begin);\n    CHECK_EQ(tree_depth.size(), model.Trees().size());\n    common::ParallelFor(model.tree_end - model.tree_begin, n_threads, [&](auto i) {\n      std::visit([&](auto &&tree) { tree_depth[i] = tree.MaxDepth(); }, model.Trees()[i]);\n    });\n  }\n  common::ParallelFor1d<kBlockOfRowsSize>(n_samples, n_threads, [&](auto &&block) {\n    auto fvec_tloc = fvec.ThreadBuffer(block.Size());\n\n    batch.FVecFill(block, n_features, fvec_tloc);\n    DispatchArrayLayout(model, block.begin() + batch.base_rowid, fvec_tloc, block.Size(), out_predt,\n                        tree_depth, any_missing, tree_weights);\n    batch.FVecDrop(fvec_tloc);\n  });\n}\n\n}  // anonymous namespace\n\n/**\n * @brief A helper class for prediction when the DMatrix is split by column.\n *\n * When data is split by column, a local DMatrix only contains a subset of features. All the workers\n * in a distributed/federated environment need to cooperate to produce a prediction. This is done in\n * two passes with the help of bit vectors.\n *\n * First pass:\n * for each tree:\n *   for each row:\n *     for each node:\n *       if the feature is available and passes the filter, mark the corresponding decision bit\n *       if the feature is missing, mark the missing bit\n *\n * Once the two bit vectors are populated, run allreduce on both, using bitwise OR for the decision\n * bits, and bitwise AND for the missing bits.\n *\n * Second pass:\n * for each tree:\n *   for each row:\n *     find the leaf node using the decision and missing bits, return the leaf value\n *\n * The size of the decision/missing bit vector is:\n *   number of rows in a batch * sum(number of nodes in each tree)\n */\nclass ColumnSplitHelper {\n public:\n  ColumnSplitHelper(std::int32_t n_threads, gbm::GBTreeModel const &model, bst_tree_t tree_begin,\n                    bst_tree_t tree_end, common::OptionalWeights tree_weights)\n      : n_threads_{n_threads},\n        model_{model},\n        tree_begin_{tree_begin},\n        tree_end_{tree_end},\n        tree_weights_{tree_weights},\n        feat_vecs_{n_threads} {\n    CHECK(!model.learner_model_param->IsVectorLeaf())\n        << \"Predict DMatrix with column split\" << MTNotImplemented();\n    CHECK(!model.Cats()->HasCategorical())\n        << \"Categorical feature is not yet supported with column-split.\";\n    CHECK(xgboost::collective::IsDistributed())\n        << \"column-split prediction is only supported for distributed training\";\n\n    auto const n_trees = tree_end_ - tree_begin_;\n    tree_sizes_.resize(n_trees);\n    tree_offsets_.resize(n_trees);\n    for (decltype(tree_begin) i = 0; i < n_trees; i++) {\n      auto const &tree = *model_.trees[tree_begin_ + i];\n      tree_sizes_[i] = tree.Size();\n    }\n    // std::exclusive_scan (only available in c++17) equivalent to get tree offsets.\n    tree_offsets_[0] = 0;\n    for (decltype(tree_begin) i = 1; i < n_trees; i++) {\n      tree_offsets_[i] = tree_offsets_[i - 1] + tree_sizes_[i - 1];\n    }\n    // Add the size of the last tree since this is exclusive_scan\n    bits_per_row_ = tree_offsets_.back() + tree_sizes_.back();\n  }\n\n  // Disable copy (and move) semantics.\n  ColumnSplitHelper(ColumnSplitHelper const &) = delete;\n  ColumnSplitHelper &operator=(ColumnSplitHelper const &) = delete;\n  ColumnSplitHelper(ColumnSplitHelper &&) noexcept = delete;\n  ColumnSplitHelper &operator=(ColumnSplitHelper &&) noexcept = delete;\n\n  void PredictDMatrix(Context const *ctx, DMatrix *p_fmat, std::vector<bst_float> *out_preds) {\n    if (!p_fmat->PageExists<SparsePage>()) {\n      LOG(FATAL) << \"Predict with `QuantileDMatrix` is not supported with column-split.\";\n    }\n    for (auto const &batch : p_fmat->GetBatches<SparsePage>()) {\n      CHECK_EQ(out_preds->size(),\n               p_fmat->Info().num_row_ * model_.learner_model_param->num_output_group);\n      PredictBatchKernel<kBlockOfRowsSize>(\n          ctx, SparsePageView{batch.GetView(), batch.base_rowid, NoOpAccessor{}}, out_preds);\n    }\n  }\n\n  void PredictLeaf(Context const *ctx, DMatrix *p_fmat, std::vector<bst_float> *out_preds) {\n    for (auto const &batch : p_fmat->GetBatches<SparsePage>()) {\n      CHECK_EQ(out_preds->size(), p_fmat->Info().num_row_ * (tree_end_ - tree_begin_));\n      PredictBatchKernel<kBlockOfRowsSize, true>(\n          ctx, SparsePageView{batch.GetView(), batch.base_rowid, NoOpAccessor{}}, out_preds);\n    }\n  }\n\n private:\n  using BitVector = RBitField8;\n\n  void InitBitVectors(std::size_t n_rows) {\n    n_rows_ = n_rows;\n    auto const size = BitVector::ComputeStorageSize(bits_per_row_ * n_rows_);\n    decision_storage_.resize(size);\n    decision_bits_ = BitVector(common::Span<BitVector::value_type>(decision_storage_));\n    missing_storage_.resize(size);\n    missing_bits_ = BitVector(common::Span<BitVector::value_type>(missing_storage_));\n  }\n\n  void ClearBitVectors() {\n    std::fill(decision_storage_.begin(), decision_storage_.end(), 0);\n    std::fill(missing_storage_.begin(), missing_storage_.end(), 0);\n  }\n\n  [[nodiscard]] std::size_t BitIndex(std::size_t tree_id, std::size_t row_id,\n                                     std::size_t node_id) const {\n    size_t tree_index = tree_id - tree_begin_;\n    return tree_offsets_[tree_index] * n_rows_ + row_id * tree_sizes_[tree_index] + node_id;\n  }\n\n  void AllreduceBitVectors(Context const *ctx) {\n    auto rc = collective::Success() << [&] {\n      return collective::Allreduce(\n          ctx, linalg::MakeVec(decision_storage_.data(), decision_storage_.size()),\n          collective::Op::kBitwiseOR);\n    } << [&] {\n      return collective::Allreduce(\n          ctx, linalg::MakeVec(missing_storage_.data(), missing_storage_.size()),\n          collective::Op::kBitwiseAND);\n    };\n    collective::SafeColl(rc);\n  }\n\n  void MaskOneTree(RegTree::FVec const &feat, std::size_t tree_id, std::size_t row_id) {\n    auto const tree = model_.trees[tree_id]->HostScView();\n    auto const &cats = tree.GetCategoriesMatrix();\n    bst_node_t n_nodes = tree.Size();\n\n    for (bst_node_t nid = 0; nid < n_nodes; nid++) {\n      if (tree.IsDeleted(nid) || tree.IsLeaf(nid)) {\n        continue;\n      }\n\n      auto const bit_index = BitIndex(tree_id, row_id, nid);\n      unsigned split_index = tree.SplitIndex(nid);\n      if (feat.IsMissing(split_index)) {\n        missing_bits_.Set(bit_index);\n        continue;\n      }\n\n      auto const fvalue = feat.GetFvalue(split_index);\n      auto const decision = tree.HasCategoricalSplit()\n                                ? GetDecision<true>(tree, nid, fvalue, cats)\n                                : GetDecision<false>(tree, nid, fvalue, cats);\n      if (decision) {\n        decision_bits_.Set(bit_index);\n      }\n    }\n  }\n\n  void MaskAllTrees(std::size_t batch_offset, common::Span<RegTree::FVec> feat_vecs,\n                    std::size_t block_size) {\n    for (auto tree_id = tree_begin_; tree_id < tree_end_; ++tree_id) {\n      for (size_t i = 0; i < block_size; ++i) {\n        MaskOneTree(feat_vecs[i], tree_id, batch_offset + i);\n      }\n    }\n  }\n\n  bst_node_t GetNextNode(tree::ScalarTreeView const &tree, bst_node_t nidx, std::size_t bit_index) {\n    if (missing_bits_.Check(bit_index)) {\n      return tree.DefaultChild(nidx);\n    } else {\n      return tree.LeftChild(nidx) + !decision_bits_.Check(bit_index);\n    }\n  }\n\n  bst_node_t GetLeafIndex(tree::ScalarTreeView const &tree, std::size_t tree_id,\n                          std::size_t row_id) {\n    bst_node_t nidx = RegTree::kRoot;\n    while (!tree.IsLeaf(nidx)) {\n      auto const bit_index = BitIndex(tree_id, row_id, nidx);\n      nidx = GetNextNode(tree, nidx, bit_index);\n    }\n    return nidx;\n  }\n\n  template <bool predict_leaf = false>\n  bst_float PredictOneTree(std::size_t tree_id, std::size_t row_id) {\n    auto const tree = model_.trees[tree_id]->HostScView();\n    auto const leaf = GetLeafIndex(tree, tree_id, row_id);\n    if constexpr (predict_leaf) {\n      return static_cast<bst_float>(leaf);\n    } else {\n      return tree.LeafValue(leaf);\n    }\n  }\n\n  template <bool predict_leaf = false>\n  void PredictAllTrees(common::Span<bst_target_t const> h_tree_groups,\n                       std::vector<bst_float> *out_preds, std::size_t batch_offset,\n                       std::size_t predict_offset, std::size_t num_group, std::size_t block_size) {\n    auto &preds = *out_preds;\n    for (auto tree_id = tree_begin_; tree_id < tree_end_; ++tree_id) {\n      auto const gid = h_tree_groups[tree_id];\n      auto const tree_weight = tree_weights_[tree_id - tree_begin_];\n      for (size_t i = 0; i < block_size; ++i) {\n        auto const result = PredictOneTree<predict_leaf>(tree_id, batch_offset + i);\n        if constexpr (predict_leaf) {\n          preds[(predict_offset + i) * (tree_end_ - tree_begin_) + tree_id] = result;\n        } else {\n          preds[(predict_offset + i) * num_group + gid] += result * tree_weight;\n        }\n      }\n    }\n  }\n\n  template <size_t block_of_rows_size, bool predict_leaf = false, typename DataView>\n  void PredictBatchKernel(Context const *ctx, DataView batch, std::vector<bst_float> *out_preds) {\n    auto const num_group = model_.learner_model_param->num_output_group;\n\n    // parallel over local batch\n    auto const n_samples = batch.Size();\n    auto const n_features = model_.learner_model_param->num_feature;\n\n    InitBitVectors(n_samples);\n\n    common::ParallelFor1d<kBlockOfRowsSize>(n_samples, n_threads_, [&](auto &&block) {\n      auto fvec_tloc = feat_vecs_.ThreadBuffer(block.Size());\n\n      batch.FVecFill(block, n_features, fvec_tloc);\n      MaskAllTrees(block.begin(), fvec_tloc, block.Size());\n      batch.FVecDrop(fvec_tloc);\n    });\n\n    AllreduceBitVectors(ctx);\n    auto h_tree_groups = this->model_.TreeGroups(ctx->Device());\n\n    common::ParallelFor1d<kBlockOfRowsSize>(n_samples, n_threads_, [&](auto &&block) {\n      PredictAllTrees<predict_leaf>(h_tree_groups, out_preds, block.begin(),\n                                    block.begin() + batch.base_rowid, num_group, block.Size());\n    });\n\n    ClearBitVectors();\n  }\n\n  static std::size_t constexpr kBlockOfRowsSize = BlockPolicy::kBlockOfRowsSize;\n\n  std::int32_t const n_threads_;\n  gbm::GBTreeModel const &model_;\n  bst_tree_t const tree_begin_;\n  bst_tree_t const tree_end_;\n  common::OptionalWeights tree_weights_;\n\n  std::vector<std::size_t> tree_sizes_{};\n  std::vector<std::size_t> tree_offsets_{};\n  std::size_t bits_per_row_{};\n  ThreadTmp<kBlockOfRowsSize> feat_vecs_;\n\n  std::size_t n_rows_;\n  /**\n   * @brief Stores decision bit for each split node.\n   *\n   * Conceptually it's a 3-dimensional bit matrix:\n   *   - 1st dimension is the tree index, from `tree_begin_` to `tree_end_`.\n   *   - 2nd dimension is the row index, for each row in the batch.\n   *   - 3rd dimension is the node id, for each node in the tree.\n   *\n   * Since we have to ship the whole thing over the wire to do an allreduce, the matrix is flattened\n   * into a 1-dimensional array.\n   *\n   * First, it's divided by the tree index:\n   *\n   * [ tree 0 ] [ tree 1 ] ...\n   *\n   * Then each tree is divided by row:\n   *\n   * [             tree 0              ] [           tree 1     ] ...\n   * [ row 0 ] [ row 1 ] ... [ row n-1 ] [ row 0 ] ...\n   *\n   * Finally, each row is divided by the node id:\n   *\n   * [                             tree 0                                         ]\n   * [              row 0                 ] [        row 1           ] ...\n   * [ node 0 ] [ node 1 ] ... [ node n-1 ] [ node 0 ] ...\n   *\n   * The first two dimensions are fixed length, while the last dimension is variable length since\n   * each tree may have a different number of nodes. We precompute the tree offsets, which are the\n   * cumulative sums of tree sizes. The index of tree t, row r, node n is:\n   *   index(t, r, n) = tree_offsets[t] * n_rows + r * tree_sizes[t] + n\n   */\n  std::vector<BitVector::value_type> decision_storage_{};\n  BitVector decision_bits_{};\n  /**\n   * @brief Stores whether the feature is missing for each split node.\n   *\n   * See above for the storage layout.\n   */\n  std::vector<BitVector::value_type> missing_storage_{};\n  BitVector missing_bits_{};\n};\n\nclass CPUPredictor : public Predictor {\n protected:\n  void PredictDMatrix(DMatrix *p_fmat, std::vector<float> *out_preds, gbm::GBTreeModel const &model,\n                      bst_tree_t tree_begin, bst_tree_t tree_end,\n                      common::OptionalWeights tree_weights) const {\n    if (p_fmat->Info().IsColumnSplit()) {\n      ColumnSplitHelper helper(this->ctx_->Threads(), model, tree_begin, tree_end, tree_weights);\n      helper.PredictDMatrix(ctx_, p_fmat, out_preds);\n      return;\n    }\n\n    auto const n_threads = this->ctx_->Threads();\n\n    // Create a writable view on the output prediction vector.\n    bst_idx_t n_groups = model.learner_model_param->OutputLength();\n    bst_idx_t n_samples = p_fmat->Info().num_row_;\n    CHECK_EQ(out_preds->size(), n_samples * n_groups);\n    auto out_predt = linalg::MakeTensorView(ctx_, *out_preds, n_samples, n_groups);\n    bool any_missing = !(p_fmat->IsDense());\n    auto const h_model =\n        HostModel{DeviceOrd::CPU(), model, false, tree_begin, tree_end, CopyViews{}};\n\n    LaunchPredict(this->ctx_, p_fmat, model, [&](auto &&policy) {\n      using Policy = common::GetValueT<decltype(policy)>;\n      ThreadTmp<Policy::kBlockOfRowsSize> feat_vecs{n_threads};\n      policy.ForEachBatch([&](auto &&batch) {\n        PredictBatchByBlockKernel<Policy::kBlockOfRowsSize>(batch, h_model, &feat_vecs, n_threads,\n                                                            any_missing, out_predt, tree_weights);\n      });\n    });\n  }\n\n public:\n  explicit CPUPredictor(Context const *ctx) : Predictor::Predictor{ctx} {}\n\n  void PredictBatch(DMatrix *dmat, PredictionCacheEntry *predts, gbm::GBTreeModel const &model,\n                    bst_tree_t tree_begin, bst_tree_t tree_end = 0,\n                    std::vector<float> const *tree_weights = nullptr) const override {\n    auto *out_preds = &predts->predictions;\n    // This is actually already handled in gbm, but large amount of tests rely on the\n    // behaviour.\n    if (tree_end == 0) {\n      tree_end = model.trees.size();\n    }\n    auto weights = tree_weights == nullptr ? common::OptionalWeights{1.0f}\n                                           : common::OptionalWeights{common::Span<float const>{\n                                                 tree_weights->data() + tree_begin,\n                                                 static_cast<std::size_t>(tree_end - tree_begin)}};\n    this->PredictDMatrix(dmat, &out_preds->HostVector(), model, tree_begin, tree_end, weights);\n  }\n\n  [[nodiscard]] bool InplacePredict(std::shared_ptr<DMatrix> p_m, gbm::GBTreeModel const &model,\n                                    float missing, PredictionCacheEntry *out_preds,\n                                    bst_tree_t tree_begin, bst_tree_t tree_end,\n                                    std::vector<float> const *tree_weights) const override {\n    auto proxy = dynamic_cast<data::DMatrixProxy *>(p_m.get());\n    CHECK(proxy) << error::InplacePredictProxy();\n    if (tree_end == 0) {\n      tree_end = model.trees.size();\n    }\n\n    this->InitOutPredictions(p_m->Info(), &(out_preds->predictions), model);\n    auto &predictions = out_preds->predictions.HostVector();\n    bool any_missing = true;\n\n    auto const n_threads = this->ctx_->Threads();\n    // Always use block as we don't know the nnz.\n    ThreadTmp<BlockPolicy::kBlockOfRowsSize> feat_vecs{n_threads};\n    bst_idx_t n_groups = model.learner_model_param->OutputLength();\n    auto const h_model =\n        HostModel{DeviceOrd::CPU(), model, false, tree_begin, tree_end, CopyViews{}};\n    auto weights = tree_weights == nullptr ? common::OptionalWeights{1.0f}\n                                           : common::OptionalWeights{common::Span<float const>{\n                                                 tree_weights->data() + tree_begin,\n                                                 static_cast<std::size_t>(tree_end - tree_begin)}};\n\n    auto kernel = [&](auto &&view) {\n      auto out_predt = linalg::MakeTensorView(ctx_, predictions, view.Size(), n_groups);\n      PredictBatchByBlockKernel<BlockPolicy::kBlockOfRowsSize>(view, h_model, &feat_vecs, n_threads,\n                                                               any_missing, out_predt, weights);\n    };\n    auto dispatch = [&](auto x) {\n      using AdapterT = typename decltype(x)::element_type;\n      CheckProxyDMatrix(x, proxy, model.learner_model_param);\n      LaunchPredict(\n          this->ctx_, proxy, model,\n          [&](auto &&policy) {\n            if constexpr (std::is_same_v<AdapterT, data::ColumnarAdapter>) {\n              auto view =\n                  AdapterView{x.get(), missing, policy.MakeAccessor(ctx_, x->Cats(), model)};\n              kernel(view);\n            } else {\n              auto view = AdapterView{x.get(), missing, NoOpAccessor{}};\n              kernel(view);\n            }\n          },\n          [&](auto) {\n            if constexpr (std::is_same_v<AdapterT, data::ColumnarAdapter>) {\n              return !x->Cats().Empty();\n            } else {\n              return false;\n            }\n          });\n    };\n\n    bool type_error = false;\n    data::cpu_impl::DispatchAny<false>(proxy, dispatch, &type_error);\n    return !type_error;\n  }\n\n  void PredictLeaf(DMatrix *p_fmat, HostDeviceVector<float> *out_preds,\n                   gbm::GBTreeModel const &model, bst_tree_t ntree_limit) const override {\n    auto const n_threads = this->ctx_->Threads();\n    // number of valid trees\n    ntree_limit = GetTreeLimit(model.trees, ntree_limit);\n    const MetaInfo &info = p_fmat->Info();\n    std::vector<float> &preds = out_preds->HostVector();\n    preds.resize(info.num_row_ * ntree_limit);\n\n    if (p_fmat->Info().IsColumnSplit()) {\n      ColumnSplitHelper helper(n_threads, model, 0, ntree_limit, common::OptionalWeights{1.0f});\n      helper.PredictLeaf(ctx_, p_fmat, &preds);\n      return;\n    }\n\n    auto n_features = model.learner_model_param->num_feature;\n    ThreadTmp<1> feat_vecs{n_threads};\n\n    auto const h_model = HostModel{DeviceOrd::CPU(), model, false, 0, ntree_limit, CopyViews{}};\n    LaunchPredict(this->ctx_, p_fmat, model, [&](auto &&policy) {\n      policy.ForEachBatch([&](auto &&batch) {\n        common::ParallelFor1d<1>(batch.Size(), n_threads, [&](auto &&block) {\n          auto ridx = static_cast<bst_idx_t>(batch.base_rowid + block.begin());\n          auto fvec_tloc = feat_vecs.ThreadBuffer(block.Size());\n          batch.FVecFill(block, n_features, fvec_tloc);\n\n          for (bst_tree_t j = 0; j < ntree_limit; ++j) {\n            bst_node_t nidx = std::visit(\n                [&](auto &&tree) {\n                  return GetLeafIndex<true, true>(tree, fvec_tloc.front(),\n                                                  tree.GetCategoriesMatrix(), RegTree::kRoot);\n                },\n                h_model.Trees()[j]);\n            preds[ridx * ntree_limit + j] = static_cast<float>(nidx);\n          }\n          batch.FVecDrop(fvec_tloc);\n        });\n      });\n    });\n  }\n\n  void PredictContribution(DMatrix *p_fmat, HostDeviceVector<float> *out_contribs,\n                           const gbm::GBTreeModel &model, bst_tree_t ntree_limit,\n                           std::vector<float> const *tree_weights, bool approximate, int condition,\n                           unsigned condition_feature) const override {\n    if (approximate) {\n      interpretability::ApproxFeatureImportance(this->ctx_, p_fmat, out_contribs, model,\n                                                ntree_limit, tree_weights);\n    } else {\n      interpretability::ShapValues(this->ctx_, p_fmat, out_contribs, model, ntree_limit,\n                                   tree_weights, condition, condition_feature);\n    }\n  }\n\n  void PredictInteractionContributions(DMatrix *p_fmat, HostDeviceVector<float> *out_contribs,\n                                       gbm::GBTreeModel const &model, bst_tree_t ntree_limit,\n                                       std::vector<float> const *tree_weights,\n                                       bool approximate) const override {\n    interpretability::ShapInteractionValues(this->ctx_, p_fmat, out_contribs, model, ntree_limit,\n                                            tree_weights, approximate);\n  }\n};\n\nXGBOOST_REGISTER_PREDICTOR(CPUPredictor, \"cpu_predictor\")\n    .describe(\"Make predictions using CPU.\")\n    .set_body([](Context const *ctx) { return new CPUPredictor(ctx); });\n}  // namespace xgboost::predictor\n"
  },
  {
    "path": "src/predictor/data_accessor.h",
    "content": "/**\n * Copyright 2017-2026, XGBoost Contributors\n */\n#pragma once\n\n#include <cstddef>\n#include <cstdint>\n#include <limits>\n#include <utility>\n#include <vector>\n\n#include \"../common/categorical.h\"    // for IsCat\n#include \"../common/column_matrix.h\"  // for ColumnMatrix\n#include \"../common/common.h\"         // for Range1d\n#include \"../common/hist_util.h\"      // for DispatchBinType, HistogramCuts\n#include \"../common/math.h\"           // for CheckNAN\n#include \"../data/cat_container.h\"    // for NoOpAccessor\n#include \"../data/gradient_index.h\"   // for GHistIndexMatrix\n#include \"xgboost/data.h\"             // for HostSparsePageView\n#include \"xgboost/span.h\"             // for Span\n#include \"xgboost/tree_model.h\"       // for RegTree::FVec\n\nnamespace xgboost::predictor {\n// Convert a single sample in batch view to FVec.\ntemplate <typename BatchView>\nstruct DataToFeatVec {\n  void Fill(bst_idx_t ridx, RegTree::FVec* p_feats) const {\n    auto& feats = *p_feats;\n    auto n_valid = static_cast<BatchView const*>(this)->DoFill(ridx, feats.Data().data());\n    feats.HasMissing(n_valid != feats.Size());\n  }\n\n  // Fill the data into the feature vector.\n  void FVecFill(common::Range1d const& block, bst_feature_t n_features,\n                common::Span<RegTree::FVec> s_feats_vec) const {\n    auto feats_vec = s_feats_vec.data();\n    for (std::size_t i = 0; i < block.Size(); ++i) {\n      RegTree::FVec& feats = feats_vec[i];\n      if (feats.Size() == 0) {\n        feats.Init(n_features);\n      }\n      this->Fill(block.begin() + i, &feats);\n    }\n  }\n  // Clear the feature vector.\n  static void FVecDrop(common::Span<RegTree::FVec> s_feats) {\n    auto p_feats = s_feats.data();\n    for (size_t i = 0, n = s_feats.size(); i < n; ++i) {\n      p_feats[i].Drop();\n    }\n  }\n};\n\ntemplate <typename EncAccessor = NoOpAccessor>\nclass SparsePageView : public DataToFeatVec<SparsePageView<EncAccessor>> {\n  EncAccessor acc_;\n  HostSparsePageView const view_;\n\n public:\n  bst_idx_t const base_rowid;\n\n  SparsePageView(HostSparsePageView const p, bst_idx_t base_rowid, EncAccessor acc)\n      : acc_{std::move(acc)}, view_{p}, base_rowid{base_rowid} {}\n\n  [[nodiscard]] std::size_t Size() const { return view_.Size(); }\n\n  [[nodiscard]] bst_idx_t DoFill(bst_idx_t ridx, float* out) const {\n    auto p_data = view_[ridx].data();\n\n    for (std::size_t i = 0, n = view_[ridx].size(); i < n; ++i) {\n      auto const& entry = p_data[i];\n      out[entry.index] = acc_(entry);\n    }\n\n    return view_[ridx].size();\n  }\n};\n\ntemplate <typename EncAccessor = NoOpAccessor>\nclass GHistIndexMatrixView : public DataToFeatVec<GHistIndexMatrixView<EncAccessor>> {\n private:\n  GHistIndexMatrix const& page_;\n  EncAccessor acc_;\n  common::Span<FeatureType const> ft_;\n\n  std::vector<std::uint32_t> const& ptrs_;\n  std::vector<float> const& values_;\n  common::ColumnMatrix const& columns_;\n\n public:\n  bst_idx_t const base_rowid;\n\n public:\n  GHistIndexMatrixView(GHistIndexMatrix const& page, EncAccessor acc,\n                       common::Span<FeatureType const> ft)\n      : page_{page},\n        acc_{std::move(acc)},\n        ft_{ft},\n        ptrs_{page.cut.Ptrs()},\n        values_{page.cut.Values()},\n        columns_{page.Transpose()},\n        base_rowid{page.base_rowid} {}\n\n  [[nodiscard]] bst_idx_t DoFill(bst_idx_t ridx, float* out) const {\n    auto gridx = ridx + this->base_rowid;\n    auto n_features = page_.Features();\n\n    bst_idx_t n_non_missings = 0;\n    if (page_.IsDense()) {\n      common::DispatchBinType(page_.index.GetBinTypeSize(), [&](auto t) {\n        using T = decltype(t);\n        auto ptr = this->page_.index.template data<T>();\n        auto rbeg = this->page_.row_ptr[ridx];\n        for (bst_feature_t fidx = 0; fidx < n_features; ++fidx) {\n          bst_bin_t bin_idx;\n          float fvalue;\n          if (common::IsCat(ft_, fidx)) {\n            bin_idx = page_.GetGindex(gridx, fidx);\n            fvalue = this->values_[bin_idx];\n          } else {\n            bin_idx = ptr[rbeg + fidx] + page_.index.Offset()[fidx];\n            // Route quantized prediction through the bin lower bound; the first numerical\n            // bin has an implicit lower bound of negative infinity.\n            fvalue =\n                common::HistogramCuts::NumericBinLowerBound(this->ptrs_, values_, fidx, bin_idx);\n          }\n          out[fidx] = acc_(fvalue, fidx);\n        }\n      });\n      n_non_missings += n_features;\n    } else {\n      for (bst_feature_t fidx = 0; fidx < n_features; ++fidx) {\n        float fvalue = std::numeric_limits<float>::quiet_NaN();\n        bool is_cat = common::IsCat(ft_, fidx);\n        if (columns_.GetColumnType(fidx) == common::kSparseColumn) {\n          // Special handling for extremely sparse data. Just binary search.\n          auto bin_idx = page_.GetGindex(gridx, fidx);\n          if (bin_idx != -1) {\n            if (is_cat) {\n              fvalue = values_[bin_idx];\n            } else {\n              fvalue =\n                  common::HistogramCuts::NumericBinLowerBound(this->ptrs_, values_, fidx, bin_idx);\n            }\n          }\n        } else {\n          if (is_cat) {\n            fvalue = page_.GetFvalue(ptrs_, values_, gridx, fidx, is_cat);\n          } else {\n            auto bin_idx = page_.GetGindex(gridx, fidx);\n            if (bin_idx != -1) {\n              fvalue = common::HistogramCuts::NumericBinLowerBound(ptrs_, values_, fidx, bin_idx);\n            }\n          }\n        }\n        if (!common::CheckNAN(fvalue)) {\n          out[fidx] = acc_(fvalue, fidx);\n          n_non_missings++;\n        }\n      }\n    }\n    return n_non_missings;\n  }\n\n  [[nodiscard]] bst_idx_t Size() const { return page_.Size(); }\n};\n\ntemplate <typename Adapter, typename EncAccessor = NoOpAccessor>\nclass AdapterView : public DataToFeatVec<AdapterView<Adapter, EncAccessor>> {\n  Adapter const* adapter_;\n  float missing_;\n  EncAccessor acc_;\n\n public:\n  explicit AdapterView(Adapter const* adapter, float missing, EncAccessor acc)\n      : adapter_{adapter}, missing_{missing}, acc_{std::move(acc)} {}\n\n  [[nodiscard]] bst_idx_t DoFill(bst_idx_t ridx, float* out) const {\n    auto const& batch = adapter_->Value();\n    auto row = batch.GetLine(ridx);\n    bst_idx_t n_non_missings = 0;\n    for (size_t c = 0; c < row.Size(); ++c) {\n      auto e = row.GetElement(c);\n      if (missing_ != e.value && !common::CheckNAN(e.value)) {\n        auto fvalue = this->acc_(e);\n        out[e.column_idx] = fvalue;\n        n_non_missings++;\n      }\n    }\n    return n_non_missings;\n  }\n\n  [[nodiscard]] bst_idx_t Size() const { return adapter_->NumRows(); }\n\n  bst_idx_t const static base_rowid = 0;  // NOLINT\n};\n}  // namespace xgboost::predictor\n"
  },
  {
    "path": "src/predictor/gbtree_view.h",
    "content": "/**\n * Copyright 2025-2026, XGBoost Contributors\n */\n#pragma once\n\n#include <mutex>    // for mutex, lock_guard\n#include <utility>  // for move\n#include <vector>   // for vector\n\n#include \"../gbm/gbtree_model.h\"  // for GBTreeModel\n#include \"../tree/tree_view.h\"    // for MultiTargetTreeView, ScalarTreeView\n#include \"xgboost/base.h\"         // for bst_tree_t, bst_target_t\n#include \"xgboost/context.h\"      // for DeviceOrd\n#include \"xgboost/span.h\"         // for Span\n\nnamespace xgboost::predictor {\n/**\n * @brief A view for the boosted trees to ensure thread safety.\n *\n *   This class contains a subset of trees based on the input tree range.\n *\n * @tparam Container   The container for storing the tree view variants.\n * @tparam TreeViewVar A std::variant for different view types.\n * @tparam CopyViews   A policy for how to copy the tree views into the container.\n */\ntemplate <template <typename> typename Container, typename TreeViewVar, typename CopyViews>\nclass GBTreeModelView {\n private:\n  Container<TreeViewVar> trees_;\n\n public:\n  bst_tree_t const tree_begin;\n  bst_tree_t const tree_end;\n  common::Span<bst_target_t const> tree_groups;\n  bst_target_t const n_groups;\n  bst_feature_t const n_features;\n  bst_node_t n_nodes{0};\n\n public:\n  explicit GBTreeModelView(DeviceOrd device, gbm::GBTreeModel const& model, bool need_stat,\n                           bst_tree_t tree_begin, bst_tree_t tree_end, CopyViews&& copy)\n      : tree_begin{tree_begin},\n        tree_end{tree_end},\n        n_groups{model.learner_model_param->OutputLength()},\n        n_features{model.learner_model_param->num_feature} {\n    // Make sure the trees are pulled to target device without race.\n    std::lock_guard guard{model.Mutex()};\n    // Create tree views.\n    std::vector<TreeViewVar> trees;\n    for (bst_tree_t tree_idx = this->tree_begin; tree_idx < this->tree_end; ++tree_idx) {\n      auto const& p_tree = model.trees[tree_idx];\n      if (p_tree->IsMultiTarget()) {\n        auto tree = tree::MultiTargetTreeView{device, need_stat, p_tree.get()};\n        this->n_nodes += tree.Size();\n        trees.emplace_back(tree);\n      } else {\n        auto tree = tree::ScalarTreeView{device, need_stat, p_tree.get()};\n        this->n_nodes += tree.Size();\n        trees.emplace_back(tree);\n      }\n    }\n\n    copy(&this->trees_, std::move(trees));  // NOLINT[build/include_what_you_use]\n\n    CHECK_GE(this->tree_end, this->tree_begin);\n    auto n_trees = this->tree_end - this->tree_begin;\n    model.tree_info.SetDevice(device);\n    this->tree_groups = model.TreeGroups(device).subspan(this->tree_begin, n_trees);\n    CHECK_EQ(n_trees, this->trees_.size());\n  }\n\n  [[nodiscard]] common::Span<TreeViewVar const> Trees() const {\n    return {trees_.data(), trees_.size()};\n  }\n\n  GBTreeModelView() = delete;\n  GBTreeModelView(GBTreeModelView const&) = delete;\n  GBTreeModelView& operator=(GBTreeModelView const&) = delete;\n  GBTreeModelView(GBTreeModelView&&) = default;\n  GBTreeModelView& operator=(GBTreeModelView&&) = delete;\n};\n}  // namespace xgboost::predictor\n"
  },
  {
    "path": "src/predictor/gpu_data_accessor.cuh",
    "content": "/**\n * Copyright 2017-2026, XGBoost Contributors\n */\n#pragma once\n\n#include <cmath>\n#include <limits>\n#include <type_traits>\n#include <utility>\n\n#include \"../common/categorical.h\"  // for IsCat\n#include \"xgboost/context.h\"        // for Context\n#include \"xgboost/data.h\"           // for Entry, SparsePage\n#include \"xgboost/span.h\"           // for Span\n\nnamespace xgboost::predictor {\nstruct SparsePageView {\n  common::Span<const Entry> d_data;\n  common::Span<const bst_idx_t> d_row_ptr;\n  bst_feature_t num_features;\n\n  SparsePageView() = default;\n  explicit SparsePageView(Context const* ctx, SparsePage const& page, bst_feature_t n_features)\n      : d_data{[&] {\n          page.data.SetDevice(ctx->Device());\n          return page.data.ConstDeviceSpan();\n        }()},\n        d_row_ptr{[&] {\n          page.offset.SetDevice(ctx->Device());\n          return page.offset.ConstDeviceSpan();\n        }()},\n        num_features{n_features} {}\n\n  [[nodiscard]] __device__ float GetElement(size_t ridx, size_t fidx) const {\n    // Binary search\n    auto begin_ptr = d_data.begin() + d_row_ptr[ridx];\n    auto end_ptr = d_data.begin() + d_row_ptr[ridx + 1];\n    if (end_ptr - begin_ptr == this->NumCols()) {\n      // Bypass span check for dense data\n      return d_data.data()[d_row_ptr[ridx] + fidx].fvalue;\n    }\n    common::Span<const Entry>::iterator previous_middle;\n    while (end_ptr != begin_ptr) {\n      auto middle = begin_ptr + (end_ptr - begin_ptr) / 2;\n      if (middle == previous_middle) {\n        break;\n      } else {\n        previous_middle = middle;\n      }\n\n      if (middle->index == fidx) {\n        return middle->fvalue;\n      } else if (middle->index < fidx) {\n        begin_ptr = middle;\n      } else {\n        end_ptr = middle;\n      }\n    }\n    // Value is missing\n    return std::numeric_limits<float>::quiet_NaN();\n  }\n\n  [[nodiscard]] XGBOOST_DEVICE size_t NumRows() const { return d_row_ptr.size() - 1; }\n  [[nodiscard]] XGBOOST_DEVICE size_t NumCols() const { return num_features; }\n};\n\ntemplate <typename EncAccessor>\nstruct SparsePageLoaderNoShared {\n public:\n  using SupportShmemLoad = std::false_type;\n\n  SparsePageView data;\n  EncAccessor acc;\n\n  template <typename Fidx>\n  [[nodiscard]] __device__ float GetElement(bst_idx_t ridx, Fidx fidx) const {\n    return acc(data.GetElement(ridx, fidx), fidx);\n  }\n  [[nodiscard]] XGBOOST_DEVICE bst_idx_t NumRows() const { return data.NumRows(); }\n  [[nodiscard]] XGBOOST_DEVICE bst_idx_t NumCols() const { return data.NumCols(); }\n};\n\ntemplate <typename Accessor, typename EncAccessor>\nstruct EllpackLoader {\n public:\n  using SupportShmemLoad = std::false_type;\n\n  Accessor matrix;\n  EncAccessor acc;\n\n  XGBOOST_DEVICE EllpackLoader(Accessor m, bool /*use_shared*/, bst_feature_t /*n_features*/,\n                               bst_idx_t /*n_samples*/, float /*missing*/, EncAccessor&& acc)\n      : matrix{std::move(m)}, acc{std::forward<EncAccessor>(acc)} {}\n\n  [[nodiscard]] XGBOOST_DEV_INLINE float GetElement(size_t ridx, size_t fidx) const {\n    auto gidx = matrix.template GetBinIndex<false>(ridx, fidx);\n    if (gidx == -1) {\n      return std::numeric_limits<float>::quiet_NaN();\n    }\n    if (common::IsCat(matrix.feature_types, fidx)) {\n      return this->acc(matrix.gidx_fvalue_map[gidx], fidx);\n    }\n    if (gidx == matrix.feature_segments[fidx]) {\n      return -std::numeric_limits<float>::infinity();\n    }\n    return matrix.gidx_fvalue_map[gidx - 1];\n  }\n\n  [[nodiscard]] XGBOOST_DEVICE bst_idx_t NumCols() const { return this->matrix.NumFeatures(); }\n  [[nodiscard]] XGBOOST_DEVICE bst_idx_t NumRows() const { return this->matrix.n_rows; }\n};\n}  // namespace xgboost::predictor\n"
  },
  {
    "path": "src/predictor/gpu_predictor.cu",
    "content": "/**\n * Copyright 2017-2026, XGBoost Contributors\n */\n#include <thrust/copy.h>\n#include <thrust/device_vector.h>\n#include <thrust/fill.h>\n\n#include <cuda/functional>   // for proclaim_return_type\n#include <cuda/std/utility>  // for swap\n#include <memory>\n\n#include \"../collective/allreduce.h\"\n#include \"../common/bitfield.h\"\n#include \"../common/categorical.h\"\n#include \"../common/common.h\"\n#include \"../common/cuda_context.cuh\"  // for CUDAContext\n#include \"../common/cuda_rt_utils.h\"   // for AllVisibleGPUs, SetDevice\n#include \"../common/device_helpers.cuh\"\n#include \"../common/error_msg.h\"   // for InplacePredictProxy\n#include \"../common/nvtx_utils.h\"  // for xgboost_NVTX_FN_RANGE\n#include \"../common/optional_weight.h\"\n#include \"../data/batch_utils.h\"      // for StaticBatch\n#include \"../data/cat_container.cuh\"  // for EncPolicy\n#include \"../data/device_adapter.cuh\"\n#include \"../data/ellpack_page.cuh\"\n#include \"../data/proxy_dmatrix.cuh\"  // for DispatchAny\n#include \"../data/proxy_dmatrix.h\"\n#include \"../gbm/gbtree_model.h\"\n#include \"../tree/tree_view.h\"\n#include \"gbtree_view.h\"  // for GBTreeModelView\n#include \"gpu_data_accessor.cuh\"\n#include \"interpretability/shap.h\"\n#include \"predict_fn.h\"\n#include \"utils.h\"  // for CheckProxyDMatrix\n#include \"xgboost/data.h\"\n#include \"xgboost/host_device_vector.h\"\n#include \"xgboost/multi_target_tree_model.h\"  // for MultiTargetTree, MultiTargetTreeView\n#include \"xgboost/predictor.h\"\n#include \"xgboost/tree_model.h\"\n#include \"xgboost/tree_updater.h\"\n\nnamespace xgboost::predictor {\nDMLC_REGISTRY_FILE_TAG(gpu_predictor);\n\nusing cuda_impl::StaticBatch;\n\ntemplate <typename EncAccessor>\nstruct SparsePageLoader {\n public:\n  using SupportShmemLoad = std::true_type;\n\n private:\n  EncAccessor acc_;\n\n public:\n  bool use_shared;\n  SparsePageView data;\n  float* smem;\n\n  __device__ SparsePageLoader(SparsePageView data, bool use_shared, bst_feature_t num_features,\n                              bst_idx_t num_rows, float, EncAccessor&& acc)\n      : use_shared(use_shared), data(data), acc_{std::forward<EncAccessor>(acc)} {\n    extern __shared__ float _smem[];\n    smem = _smem;\n    // Copy instances\n    if (use_shared) {\n      bst_uint global_idx = blockDim.x * blockIdx.x + threadIdx.x;\n      int shared_elements = blockDim.x * data.num_features;\n      dh::BlockFill(smem, shared_elements, std::numeric_limits<float>::quiet_NaN());\n      __syncthreads();\n      if (global_idx < num_rows) {\n        bst_uint elem_begin = data.d_row_ptr[global_idx];\n        bst_uint elem_end = data.d_row_ptr[global_idx + 1];\n        for (bst_uint elem_idx = elem_begin; elem_idx < elem_end; elem_idx++) {\n          Entry elem = data.d_data[elem_idx];\n          smem[threadIdx.x * data.num_features + elem.index] = this->acc_(elem);\n        }\n      }\n      __syncthreads();\n    }\n  }\n  [[nodiscard]] __device__ float GetElement(size_t ridx, size_t fidx) const {\n    if (use_shared) {\n      return smem[threadIdx.x * data.num_features + fidx];\n    } else {\n      return this->acc_(data.GetElement(ridx, fidx), fidx);\n    }\n  }\n};\n\n/**\n * @brief Use for in-place predict.\n */\ntemplate <typename Batch, typename EncAccessor>\nstruct DeviceAdapterLoader {\n public:\n  using SupportShmemLoad = std::true_type;\n\n private:\n  Batch batch_;\n  EncAccessor acc_;\n\n public:\n  bst_feature_t n_features;\n  float* smem;\n  bool use_shared;\n  data::IsValidFunctor is_valid;\n\n  XGBOOST_DEV_INLINE DeviceAdapterLoader(Batch&& batch, bool use_shared, bst_feature_t n_features,\n                                         bst_idx_t n_samples, float missing, EncAccessor&& acc)\n      : batch_{std::move(batch)},\n        acc_{std::forward<EncAccessor>(acc)},\n        n_features{n_features},\n        use_shared{use_shared},\n        is_valid{missing} {\n    extern __shared__ float _smem[];\n    this->smem = _smem;\n    if (this->use_shared) {\n      auto global_idx = blockDim.x * blockIdx.x + threadIdx.x;\n      size_t shared_elements = blockDim.x * n_features;\n      dh::BlockFill(smem, shared_elements, std::numeric_limits<float>::quiet_NaN());\n      __syncthreads();\n      if (global_idx < n_samples) {\n        auto beg = global_idx * n_features;\n        auto end = (global_idx + 1) * n_features;\n        for (size_t i = beg; i < end; ++i) {\n          data::COOTuple const& e = this->batch_.GetElement(i);\n          if (is_valid(e)) {\n            smem[threadIdx.x * n_features + (i - beg)] = this->acc_(e);\n          }\n        }\n      }\n    }\n    __syncthreads();\n  }\n\n  [[nodiscard]] XGBOOST_DEV_INLINE float GetElement(size_t ridx, size_t fidx) const {\n    if (use_shared) {\n      return smem[threadIdx.x * n_features + fidx];\n    }\n    auto value = this->batch_.GetElement(ridx * n_features + fidx).value;\n    if (is_valid(value)) {\n      return this->acc_(value, fidx);\n    } else {\n      return std::numeric_limits<float>::quiet_NaN();\n    }\n  }\n};\n\nnamespace {\ntemplate <bool has_missing, bool has_categorical, typename TreeView, typename Loader>\n__device__ bst_node_t GetLeafIndex(bst_idx_t ridx, TreeView const& tree, Loader* loader) {\n  bst_node_t nidx = 0;\n  while (!tree.IsLeaf(nidx)) {\n    float fvalue = loader->GetElement(ridx, tree.SplitIndex(nidx));\n    bool is_missing = has_missing && common::CheckNAN(fvalue);\n    auto next = GetNextNode<has_missing, has_categorical>(tree, nidx, fvalue, is_missing,\n                                                          tree.GetCategoriesMatrix());\n    assert(nidx < next);\n    nidx = next;\n  }\n  return nidx;\n}\n\ntemplate <bool has_missing, typename TreeView, typename Loader>\n__device__ auto GetLeafWeight(bst_idx_t ridx, TreeView const& tree, Loader* loader) {\n  bst_node_t nidx = -1;\n  if (tree.HasCategoricalSplit()) {\n    nidx = GetLeafIndex<has_missing, true>(ridx, tree, loader);\n  } else {\n    nidx = GetLeafIndex<has_missing, false>(ridx, tree, loader);\n  }\n  return tree.LeafValue(nidx);\n}\n}  // namespace\n\nusing TreeViewVar = cuda::std::variant<tree::ScalarTreeView, tree::MultiTargetTreeView>;\n\ntemplate <typename Loader, typename Data, bool has_missing, typename EncAccessor>\n__global__ void PredictLeafKernel(Data data, common::Span<TreeViewVar const> d_trees,\n                                  common::Span<float> d_out_predictions, bst_tree_t tree_begin,\n                                  bst_tree_t tree_end, bst_feature_t num_features, bool use_shared,\n                                  float missing, EncAccessor acc) {\n  auto n_rows = data.NumRows();\n  bst_idx_t ridx = blockDim.x * blockIdx.x + threadIdx.x;\n  if (ridx >= n_rows) {\n    return;\n  }\n  Loader loader{std::move(data), use_shared, num_features, n_rows, missing, std::move(acc)};\n  for (bst_tree_t tree_idx = tree_begin; tree_idx < tree_end; ++tree_idx) {\n    auto const& d_tree = d_trees[tree_idx - tree_begin];\n    cuda::std::visit(\n        [&](auto&& tree) {\n          bst_node_t leaf = -1;\n          if (tree.HasCategoricalSplit()) {\n            leaf = GetLeafIndex<has_missing, true>(ridx, tree, &loader);\n          } else {\n            leaf = GetLeafIndex<has_missing, false>(ridx, tree, &loader);\n          }\n          d_out_predictions[ridx * (tree_end - tree_begin) + tree_idx] = leaf;\n        },\n        d_tree);\n  }\n}\n\ntemplate <typename Loader, typename Data, bool has_missing, typename EncAccessor>\n__global__ void PredictKernel(Data data, common::Span<TreeViewVar const> d_trees,\n                              common::Span<float> d_out_predictions,\n                              common::Span<bst_target_t const> d_tree_groups,\n                              common::OptionalWeights tree_weights, bst_feature_t num_features,\n                              bool use_shared, bst_target_t n_groups, float missing,\n                              EncAccessor acc) {\n  auto n_rows = data.NumRows();\n  bst_idx_t global_idx = blockDim.x * blockIdx.x + threadIdx.x;\n  Loader loader{std::move(data), use_shared, num_features, n_rows, missing, std::move(acc)};\n  if (global_idx >= n_rows) {\n    return;\n  }\n\n  if (n_groups == 1u) {\n    float sum = 0;\n    for (bst_tree_t tree_idx = 0; tree_idx < d_trees.size(); ++tree_idx) {\n      auto const& d_tree = d_trees[tree_idx];\n      auto const& sc_tree = cuda::std::get<tree::ScalarTreeView>(d_tree);\n      float leaf = GetLeafWeight<has_missing>(global_idx, sc_tree, &loader);\n      sum += leaf * tree_weights[tree_idx];\n    }\n    d_out_predictions[global_idx] += sum;\n  } else {\n    for (bst_tree_t tree_idx = 0, k = d_trees.size(); tree_idx < k; tree_idx++) {\n      // Both d_tree_group and d_tress are subset of trees.\n      auto tree_group = d_tree_groups[tree_idx];\n      auto const& d_tree = d_trees[tree_idx];\n      cuda::std::visit(\n          enc::Overloaded{[&](tree::ScalarTreeView const& tree) {\n                            auto leaf = GetLeafWeight<has_missing>(global_idx, tree, &loader);\n                            bst_idx_t out_prediction_idx = global_idx * n_groups + tree_group;\n                            d_out_predictions[out_prediction_idx] += leaf * tree_weights[tree_idx];\n                          },\n                          [&](tree::MultiTargetTreeView const& tree) {\n                            // Tree group is 0.\n                            auto leaf = GetLeafWeight<has_missing>(global_idx, tree, &loader);\n                            for (std::size_t i = 0, n = leaf.Shape(0); i < n; ++i) {\n                              bst_idx_t out_prediction_idx = global_idx * n_groups + i;\n                              d_out_predictions[out_prediction_idx] +=\n                                  leaf(i) * tree_weights[tree_idx];\n                            }\n                          }},\n          d_tree);\n    }\n  }\n}\n\nnamespace {\nstruct CopyViews {\n  Context const* ctx;\n  explicit CopyViews(Context const* ctx) : ctx{ctx} {}\n\n  void operator()(dh::DeviceUVector<TreeViewVar>* p_dst, std::vector<TreeViewVar>&& src) {\n    xgboost_NVTX_FN_RANGE();\n    p_dst->resize(src.size());\n    auto d_dst = dh::ToSpan(*p_dst);\n    dh::safe_cuda(cudaMemcpyAsync(d_dst.data(), src.data(), d_dst.size_bytes(), cudaMemcpyDefault,\n                                  ctx->CUDACtx()->Stream()));\n  }\n};\n\nusing DeviceModel = GBTreeModelView<dh::DeviceUVector, TreeViewVar, CopyViews>;\n}  // namespace\n\nnamespace {\ntemplate <std::size_t kBlockThreads>\n[[nodiscard]] std::size_t SharedMemoryBytes(std::size_t n_features, std::size_t max_shmem_bytes) {\n  CHECK_GT(max_shmem_bytes, 0);\n  size_t shared_memory_bytes = static_cast<size_t>(sizeof(float) * n_features * kBlockThreads);\n  if (shared_memory_bytes > max_shmem_bytes) {\n    shared_memory_bytes = 0;\n  }\n  return shared_memory_bytes;\n}\n\nusing BitVector = LBitField64;\n\n__global__ void MaskBitVectorKernel(SparsePageView data, common::Span<TreeViewVar const> d_trees,\n                                    BitVector decision_bits, BitVector missing_bits,\n                                    bst_tree_t tree_begin, bst_tree_t tree_end,\n                                    bst_feature_t num_features, std::size_t num_nodes,\n                                    bool use_shared, float missing) {\n  // This needs to be always instantiated since the data is loaded cooperatively by all threads.\n  SparsePageLoader loader{data, use_shared, num_features, data.NumRows(), missing, NoOpAccessor{}};\n  auto const row_idx = blockIdx.x * blockDim.x + threadIdx.x;\n  if (row_idx >= data.NumRows()) {\n    return;\n  }\n\n  std::size_t tree_offset = 0;\n  for (auto tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) {\n    auto const& d_tree = cuda::std::get<tree::ScalarTreeView>(d_trees[tree_idx - tree_begin]);\n    auto const tree_nodes = d_tree.Size();\n    for (auto nid = 0; nid < tree_nodes; nid++) {\n      if (d_tree.IsDeleted(nid) || d_tree.IsLeaf(nid)) {\n        continue;\n      }\n      auto const fvalue = loader.GetElement(row_idx, d_tree.SplitIndex(nid));\n      auto const is_missing = common::CheckNAN(fvalue);\n      auto const bit_index = row_idx * num_nodes + tree_offset + nid;\n      if (is_missing) {\n        missing_bits.Set(bit_index);\n      } else {\n        auto const decision =\n            d_tree.HasCategoricalSplit()\n                ? GetDecision<true>(d_tree, nid, fvalue, d_tree.GetCategoriesMatrix())\n                : GetDecision<false>(d_tree, nid, fvalue, d_tree.GetCategoriesMatrix());\n        if (decision) {\n          decision_bits.Set(bit_index);\n        }\n      }\n    }\n    tree_offset += tree_nodes;\n  }\n}\n\ntemplate <typename TreeView>\n__device__ bst_node_t GetLeafIndexByBitVector(bst_idx_t ridx, TreeView const& tree,\n                                              BitVector const& decision_bits,\n                                              BitVector const& missing_bits, std::size_t num_nodes,\n                                              std::size_t tree_offset) {\n  bst_node_t nidx = 0;\n  while (!tree.IsLeaf(nidx)) {\n    auto const bit_index = ridx * num_nodes + tree_offset + nidx;\n    if (missing_bits.Check(bit_index)) {\n      nidx = tree.DefaultChild(nidx);\n    } else {\n      nidx = tree.LeftChild(nidx) + !decision_bits.Check(bit_index);\n    }\n  }\n  return nidx;\n}\n\ntemplate <typename TreeView>\n__device__ float GetLeafWeightByBitVector(bst_idx_t ridx, TreeView const& tree,\n                                          BitVector const& decision_bits,\n                                          BitVector const& missing_bits, std::size_t num_nodes,\n                                          std::size_t tree_offset) {\n  auto const nidx =\n      GetLeafIndexByBitVector(ridx, tree, decision_bits, missing_bits, num_nodes, tree_offset);\n  return tree.LeafValue(nidx);\n}\n\ntemplate <bool predict_leaf>\n__global__ void PredictByBitVectorKernel(\n    common::Span<TreeViewVar const> d_trees, common::Span<float> d_out_predictions,\n    common::Span<bst_target_t const> d_tree_groups, BitVector decision_bits, BitVector missing_bits,\n    bst_tree_t tree_begin, bst_tree_t tree_end, std::size_t num_rows, std::size_t num_nodes,\n    std::uint32_t num_group, common::OptionalWeights tree_weights) {\n  auto const row_idx = blockIdx.x * blockDim.x + threadIdx.x;\n  if (row_idx >= num_rows) {\n    return;\n  }\n\n  std::size_t tree_offset = 0;\n  if constexpr (predict_leaf) {\n    for (auto tree_idx = tree_begin; tree_idx < tree_end; ++tree_idx) {\n      auto const& d_tree = cuda::std::get<tree::ScalarTreeView>(d_trees[tree_idx - tree_begin]);\n      auto const leaf = GetLeafIndexByBitVector(row_idx, d_tree, decision_bits, missing_bits,\n                                                num_nodes, tree_offset);\n      d_out_predictions[row_idx * (tree_end - tree_begin) + tree_idx] = static_cast<float>(leaf);\n      tree_offset += d_tree.Size();\n    }\n  } else {\n    if (num_group == 1) {\n      float sum = 0;\n      for (auto tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) {\n        auto const& d_tree = cuda::std::get<tree::ScalarTreeView>(d_trees[tree_idx - tree_begin]);\n        sum += GetLeafWeightByBitVector(row_idx, d_tree, decision_bits, missing_bits, num_nodes,\n                                        tree_offset) *\n               tree_weights[tree_idx - tree_begin];\n        tree_offset += d_tree.Size();\n      }\n      d_out_predictions[row_idx] += sum;\n    } else {\n      for (auto tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) {\n        auto const tree_group = d_tree_groups[tree_idx - tree_begin];\n        auto const& d_tree = cuda::std::get<tree::ScalarTreeView>(d_trees[tree_idx - tree_begin]);\n        bst_uint out_prediction_idx = row_idx * num_group + tree_group;\n        d_out_predictions[out_prediction_idx] +=\n            GetLeafWeightByBitVector(row_idx, d_tree, decision_bits, missing_bits, num_nodes,\n                                     tree_offset) *\n            tree_weights[tree_idx - tree_begin];\n        tree_offset += d_tree.Size();\n      }\n    }\n  }\n}\n\nclass ColumnSplitHelper {\n public:\n  explicit ColumnSplitHelper(Context const* ctx) : ctx_{ctx} {}\n\n  void PredictBatch(DMatrix* dmat, HostDeviceVector<float>* out_preds,\n                    gbm::GBTreeModel const& model, DeviceModel const& d_model,\n                    common::OptionalWeights tree_weights) const {\n    CHECK(dmat->PageExists<SparsePage>()) << \"Column split for external memory is not support.\";\n    PredictDMatrix<false>(dmat, out_preds, d_model, model.learner_model_param->num_feature,\n                          model.learner_model_param->num_output_group, tree_weights);\n  }\n\n  void PredictLeaf(DMatrix* dmat, HostDeviceVector<float>* out_preds, gbm::GBTreeModel const& model,\n                   DeviceModel const& d_model) const {\n    CHECK(dmat->PageExists<SparsePage>()) << \"Column split for external memory is not support.\";\n    PredictDMatrix<true>(dmat, out_preds, d_model, model.learner_model_param->num_feature,\n                         model.learner_model_param->num_output_group,\n                         common::OptionalWeights{1.0f});\n  }\n\n private:\n  using BitType = BitVector::value_type;\n\n  template <bool predict_leaf>\n  void PredictDMatrix(DMatrix* dmat, HostDeviceVector<float>* out_preds, DeviceModel const& d_model,\n                      bst_feature_t num_features, std::uint32_t num_group,\n                      common::OptionalWeights tree_weights) const {\n    dh::safe_cuda(cudaSetDevice(ctx_->Ordinal()));\n    dh::caching_device_vector<BitType> decision_storage{};\n    dh::caching_device_vector<BitType> missing_storage{};\n\n    auto constexpr kBlockThreads = 128;\n    auto const max_shared_memory_bytes = dh::MaxSharedMemory(ctx_->Ordinal());\n    auto const shared_memory_bytes =\n        SharedMemoryBytes<kBlockThreads>(num_features, max_shared_memory_bytes);\n    auto const use_shared = shared_memory_bytes != 0;\n\n    auto const num_nodes = d_model.n_nodes;\n    std::size_t batch_offset = 0;\n    for (auto const& batch : dmat->GetBatches<SparsePage>()) {\n      auto const num_rows = batch.Size();\n      ResizeBitVectors(&decision_storage, &missing_storage, num_rows * num_nodes);\n      BitVector decision_bits{dh::ToSpan(decision_storage)};\n      BitVector missing_bits{dh::ToSpan(missing_storage)};\n\n      SparsePageView data{ctx_, batch, num_features};\n      auto const grid = static_cast<uint32_t>(common::DivRoundUp(num_rows, kBlockThreads));\n      auto d_tree_groups = d_model.tree_groups;\n      dh::LaunchKernel{grid, kBlockThreads, shared_memory_bytes,  // NOLINT(whitespace/braces)\n                       ctx_->CUDACtx()->Stream()}(\n          MaskBitVectorKernel, data, d_model.Trees(), decision_bits, missing_bits,\n          d_model.tree_begin, d_model.tree_end, num_features, num_nodes, use_shared,\n          std::numeric_limits<float>::quiet_NaN());\n\n      AllReduceBitVectors(&decision_storage, &missing_storage);\n\n      dh::LaunchKernel{grid, kBlockThreads, 0,  // NOLINT(whitespace/braces)\n                       ctx_->CUDACtx()->Stream()}(\n          PredictByBitVectorKernel<predict_leaf>, d_model.Trees(),\n          out_preds->DeviceSpan().subspan(batch_offset), d_tree_groups, decision_bits, missing_bits,\n          d_model.tree_begin, d_model.tree_end, num_rows, num_nodes, num_group, tree_weights);\n\n      batch_offset += batch.Size() * num_group;\n    }\n  }\n\n  void AllReduceBitVectors(dh::caching_device_vector<BitType>* decision_storage,\n                           dh::caching_device_vector<BitType>* missing_storage) const {\n    auto rc = collective::Success() << [&] {\n      return collective::Allreduce(\n          ctx_,\n          linalg::MakeVec(decision_storage->data().get(), decision_storage->size(), ctx_->Device()),\n          collective::Op::kBitwiseOR);\n    } << [&] {\n      return collective::Allreduce(\n          ctx_,\n          linalg::MakeVec(missing_storage->data().get(), missing_storage->size(), ctx_->Device()),\n          collective::Op::kBitwiseAND);\n    };\n    collective::SafeColl(rc);\n  }\n\n  void ResizeBitVectors(dh::caching_device_vector<BitType>* decision_storage,\n                        dh::caching_device_vector<BitType>* missing_storage,\n                        std::size_t total_bits) const {\n    auto const size = BitVector::ComputeStorageSize(total_bits);\n    if (decision_storage->size() < size) {\n      decision_storage->resize(size);\n    }\n    thrust::fill(ctx_->CUDACtx()->CTP(), decision_storage->begin(), decision_storage->end(), 0);\n    if (missing_storage->size() < size) {\n      missing_storage->resize(size);\n    }\n    thrust::fill(ctx_->CUDACtx()->CTP(), missing_storage->begin(), missing_storage->end(), 0);\n  }\n\n  Context const* ctx_;\n};\n\nusing cuda_impl::MakeCatAccessor;\n\n// Provide configuration for launching the predict kernel.\ntemplate <typename IsDense, typename EncAccessor>\nclass LaunchConfig {\n public:\n  static constexpr bool HasMissing() { return !IsDense::value; }\n  using EncAccessorT = EncAccessor;\n\n  template <typename T, std::uint32_t block_threads>\n  struct LoaderType {\n    using Type = T;\n    constexpr static std::uint32_t kBlockThreads = block_threads;\n\n    static std::size_t AllocShmem(Context const* ctx, bst_feature_t n_features) {\n      if constexpr (typename Type::SupportShmemLoad{}) {\n        return SharedMemoryBytes<kBlockThreads>(n_features, ConfigureDevice(ctx->Device()));\n      }\n      return 0;\n    }\n  };\n\n private:\n  static auto constexpr NotSet() { return std::numeric_limits<bst_idx_t>::max(); }\n\n  Context const* ctx_;\n  bst_feature_t n_features_;\n  std::size_t shared_memory_bytes_{0};\n\n public:\n  template <typename Loader, typename K, typename BatchT, typename... Args>\n  void Launch(K&& kernel, BatchT&& batch, Args&&... args) const {\n    auto grid = static_cast<uint32_t>(common::DivRoundUp(batch.NumRows(), Loader::kBlockThreads));\n    dh::LaunchKernel{grid, Loader::kBlockThreads, this->shared_memory_bytes_,  // NOLINT\n                     this->ctx_->CUDACtx()->Stream()}(kernel, std::forward<BatchT>(batch),\n                                                      std::forward<Args>(args)...);\n  }\n  template <typename Loader, typename Data>\n  void LaunchPredictKernel(Data batch, float missing, bst_feature_t n_features,\n                           DeviceModel const& d_model, EncAccessorT acc, bst_idx_t batch_offset,\n                           HostDeviceVector<float>* predictions,\n                           common::OptionalWeights tree_weights) {\n    auto kernel = PredictKernel<typename Loader::Type, common::GetValueT<decltype(batch)>,\n                                HasMissing(), EncAccessorT>;\n    auto d_tree_groups = d_model.tree_groups;\n    this->Launch<Loader>(\n        kernel, std::move(batch), d_model.Trees(), predictions->DeviceSpan().subspan(batch_offset),\n        d_tree_groups, tree_weights, n_features, this->UseShared(), d_model.n_groups, missing, acc);\n  }\n\n  [[nodiscard]] bool UseShared() const { return shared_memory_bytes_ != 0; }\n\n  [[nodiscard]] static std::size_t ConfigureDevice(DeviceOrd const& device) {\n    thread_local std::unordered_map<std::int32_t, std::size_t> max_shared;\n    auto it = max_shared.find(device.ordinal);\n    if (it == max_shared.cend()) {\n      max_shared[device.ordinal] = dh::MaxSharedMemory(device.ordinal);\n      it = max_shared.find(device.ordinal);\n    }\n    return it->second;\n  }\n\n  template <typename Loader>\n  void AllocShmem() {\n    this->shared_memory_bytes_ = Loader::AllocShmem(this->ctx_, this->n_features_);\n  }\n\n public:\n  LaunchConfig(Context const* ctx, bst_feature_t n_features) : ctx_{ctx}, n_features_{n_features} {}\n\n  template <typename Fn>\n  void ForEachBatch(DMatrix* p_fmat, Fn&& fn) {\n    if (p_fmat->PageExists<SparsePage>()) {\n      constexpr std::uint32_t kBlockThreads = 128;\n      using LoaderImpl = SparsePageLoader<EncAccessor>;\n      using Loader = LoaderType<LoaderImpl, kBlockThreads>;\n      this->AllocShmem<Loader>();\n      for (auto& page : p_fmat->GetBatches<SparsePage>()) {\n        SparsePageView batch{ctx_, page, n_features_};\n        fn(Loader{}, std::forward<SparsePageView>(batch));\n      }\n    } else {\n      p_fmat->Info().feature_types.SetDevice(ctx_->Device());\n      auto feature_types = p_fmat->Info().feature_types.ConstDeviceSpan();\n\n      for (auto const& page : p_fmat->GetBatches<EllpackPage>(ctx_, StaticBatch(true))) {\n        page.Impl()->Visit(ctx_, feature_types, [&](auto&& batch) {\n          using Acc = std::remove_reference_t<decltype(batch)>;\n          // No shared memory use for ellpack\n          using Loader = EllpackLoader<Acc, EncAccessor>;\n          constexpr std::uint32_t kBlockThreads = 256;\n          fn(LoaderType<Loader, kBlockThreads>{},\n             std::forward<common::GetValueT<decltype(batch)>>(batch));\n        });\n      }\n    }\n  }\n};\n\ntemplate <typename Kernel>\nvoid LaunchPredict(Context const* ctx, bool is_dense, enc::DeviceColumnsView const& new_enc,\n                   gbm::GBTreeModel const& model, Kernel&& launch) {\n  if (is_dense) {\n    if (model.Cats() && model.Cats()->HasCategorical() && new_enc.HasCategorical()) {\n      auto [acc, mapping] = MakeCatAccessor(ctx, new_enc, model.Cats());\n      auto cfg =\n          LaunchConfig<std::true_type, decltype(acc)>{ctx, model.learner_model_param->num_feature};\n      launch(std::move(cfg), std::move(acc));\n    } else {\n      auto cfg =\n          LaunchConfig<std::true_type, NoOpAccessor>{ctx, model.learner_model_param->num_feature};\n      launch(std::move(cfg), NoOpAccessor{});\n    }\n  } else {\n    if (model.Cats() && model.Cats()->HasCategorical() && new_enc.HasCategorical()) {\n      auto [acc, mapping] = MakeCatAccessor(ctx, new_enc, model.Cats());\n      auto cfg =\n          LaunchConfig<std::false_type, decltype(acc)>{ctx, model.learner_model_param->num_feature};\n      launch(std::move(cfg), std::move(acc));\n    } else {\n      auto cfg =\n          LaunchConfig<std::false_type, NoOpAccessor>{ctx, model.learner_model_param->num_feature};\n      launch(std::move(cfg), NoOpAccessor{});\n    }\n  }\n}\n\n}  // anonymous namespace\n\nclass GPUPredictor : public xgboost::Predictor {\n private:\n  void PredictDMatrix(DMatrix* p_fmat, HostDeviceVector<float>* out_preds,\n                      gbm::GBTreeModel const& model, bst_tree_t tree_begin, bst_tree_t tree_end,\n                      common::OptionalWeights tree_weights) const {\n    if (tree_end - tree_begin == 0) {\n      return;\n    }\n    out_preds->SetDevice(ctx_->Device());\n    auto const& info = p_fmat->Info();\n\n    DeviceModel d_model{this->ctx_->Device(), model,    false,\n                        tree_begin,           tree_end, CopyViews{this->ctx_}};\n\n    if (info.IsColumnSplit()) {\n      column_split_helper_.PredictBatch(p_fmat, out_preds, model, d_model, tree_weights);\n      return;\n    }\n\n    CHECK_LE(p_fmat->Info().num_col_, model.learner_model_param->num_feature);\n    auto n_features = model.learner_model_param->num_feature;\n\n    auto new_enc =\n        p_fmat->Cats()->NeedRecode() ? p_fmat->Cats()->DeviceView(ctx_) : enc::DeviceColumnsView{};\n    LaunchPredict(ctx_, p_fmat->IsDense(), new_enc, model, [&](auto&& cfg, auto&& acc) {\n      using Config = common::GetValueT<decltype(cfg)>;\n\n      bst_idx_t batch_offset = 0;\n      cfg.ForEachBatch(p_fmat, [&](auto&& loader_t, auto&& batch) {\n        using Loader = typename common::GetValueT<decltype(loader_t)>;\n        auto n_rows = batch.NumRows();\n        cfg.template LaunchPredictKernel<Loader>(\n            std::move(batch), std::numeric_limits<float>::quiet_NaN(), n_features, d_model, acc,\n            batch_offset, out_preds, tree_weights);\n        batch_offset += n_rows * model.learner_model_param->OutputLength();\n      });\n    });\n  }\n\n public:\n  explicit GPUPredictor(Context const* ctx) : Predictor{ctx}, column_split_helper_{ctx} {}\n\n  ~GPUPredictor() override {\n    if (ctx_->IsCUDA() && ctx_->Ordinal() < curt::AllVisibleGPUs()) {\n      dh::safe_cuda(cudaSetDevice(ctx_->Ordinal()));\n    }\n  }\n\n  void PredictBatch(DMatrix* dmat, PredictionCacheEntry* predts, const gbm::GBTreeModel& model,\n                    bst_tree_t tree_begin, bst_tree_t tree_end = 0,\n                    std::vector<float> const* tree_weights = nullptr) const override {\n    xgboost_NVTX_FN_RANGE();\n    CHECK(ctx_->Device().IsCUDA()) << \"Set `device' to `cuda` for processing GPU data.\";\n    auto* out_preds = &predts->predictions;\n    if (tree_end == 0) {\n      tree_end = model.trees.size();\n    }\n    HostDeviceVector<float> weights;\n    auto pred_weights = common::OptionalWeights{1.0f};\n    if (tree_weights != nullptr) {\n      weights.SetDevice(ctx_->Device());\n      weights.HostVector().assign(tree_weights->cbegin() + tree_begin,\n                                  tree_weights->cbegin() + tree_end);\n      pred_weights = common::MakeOptionalWeights(ctx_->Device(), weights);\n    }\n    this->PredictDMatrix(dmat, out_preds, model, tree_begin, tree_end, pred_weights);\n  }\n\n  template <typename Adapter>\n  void DispatchedInplacePredict(std::shared_ptr<Adapter> m, std::shared_ptr<DMatrix> p_m,\n                                const gbm::GBTreeModel& model, float missing,\n                                PredictionCacheEntry* out_preds, bst_tree_t tree_begin,\n                                bst_tree_t tree_end, common::OptionalWeights tree_weights) const {\n    CHECK_EQ(dh::CurrentDevice(), m->Device().ordinal)\n        << \"XGBoost is running on device: \" << this->ctx_->Device().Name() << \", \"\n        << \"but data is on: \" << m->Device().Name();\n    this->InitOutPredictions(p_m->Info(), &(out_preds->predictions), model);\n    out_preds->predictions.SetDevice(m->Device());\n    using BatchT = common::GetValueT<decltype(std::declval<Adapter>().Value())>;\n\n    auto n_samples = m->NumRows();\n    auto n_features = model.learner_model_param->num_feature;\n\n    DeviceModel d_model{ctx_->Device(), model, false, tree_begin, tree_end, CopyViews{this->ctx_}};\n\n    if constexpr (std::is_same_v<Adapter, data::CudfAdapter>) {\n      if (m->HasCategorical()) {\n        auto new_enc = m->DCats();\n        LaunchPredict(this->ctx_, false, new_enc, model, [&](auto&& cfg, auto&& acc) {\n          using EncAccessor = std::remove_reference_t<decltype(acc)>;\n          using LoaderImpl = DeviceAdapterLoader<BatchT, EncAccessor>;\n          using Loader =\n              typename common::GetValueT<decltype(cfg)>::template LoaderType<LoaderImpl, 128>;\n          cfg.template AllocShmem<Loader>();\n          cfg.template LaunchPredictKernel<Loader>(m->Value(), missing, n_features, d_model, acc, 0,\n                                                   &out_preds->predictions, tree_weights);\n        });\n        return;\n      }\n    }\n\n    LaunchPredict(this->ctx_, false, enc::DeviceColumnsView{}, model, [&](auto&& cfg, auto&& acc) {\n      using EncAccessor = std::remove_reference_t<decltype(acc)>;\n      CHECK((std::is_same_v<EncAccessor, NoOpAccessor>));\n      using LoaderImpl = DeviceAdapterLoader<BatchT, EncAccessor>;\n      using Loader =\n          typename common::GetValueT<decltype(cfg)>::template LoaderType<LoaderImpl, 128>;\n      cfg.template AllocShmem<Loader>();\n      cfg.template LaunchPredictKernel<Loader>(m->Value(), missing, n_features, d_model, acc, 0,\n                                               &out_preds->predictions, tree_weights);\n    });\n  }\n\n  [[nodiscard]] bool InplacePredict(std::shared_ptr<DMatrix> p_m, gbm::GBTreeModel const& model,\n                                    float missing, PredictionCacheEntry* out_preds,\n                                    bst_tree_t tree_begin, bst_tree_t tree_end,\n                                    std::vector<float> const* tree_weights) const override {\n    xgboost_NVTX_FN_RANGE();\n    auto proxy = dynamic_cast<data::DMatrixProxy*>(p_m.get());\n    CHECK(proxy) << error::InplacePredictProxy();\n    if (tree_end == 0) {\n      tree_end = model.trees.size();\n    }\n    HostDeviceVector<float> weights;\n    auto pred_weights = common::OptionalWeights{1.0f};\n    if (tree_weights != nullptr) {\n      weights.SetDevice(ctx_->Device());\n      weights.HostVector().assign(tree_weights->cbegin() + tree_begin,\n                                  tree_weights->cbegin() + tree_end);\n      pred_weights = common::MakeOptionalWeights(ctx_->Device(), weights);\n    }\n    bool type_error = false;\n    data::cuda_impl::DispatchAny<false>(\n        proxy,\n        [&](auto x) {\n          CheckProxyDMatrix(x, proxy, model.learner_model_param);\n          this->DispatchedInplacePredict(x, p_m, model, missing, out_preds, tree_begin, tree_end,\n                                         pred_weights);\n        },\n        &type_error);\n    return !type_error;\n  }\n\n  void PredictContribution(DMatrix* p_fmat, HostDeviceVector<float>* out_contribs,\n                           const gbm::GBTreeModel& model, bst_tree_t tree_end,\n                           std::vector<float> const* tree_weights, bool approximate, int,\n                           unsigned) const override {\n    xgboost_NVTX_FN_RANGE();\n    if (approximate) {\n      LOG(FATAL) << \"Approximated contribution is not implemented in the GPU predictor, use CPU \"\n                    \"instead.\";\n    }\n    interpretability::ShapValues(ctx_, p_fmat, out_contribs, model, tree_end, tree_weights, 0, 0);\n  }\n\n  void PredictInteractionContributions(DMatrix* p_fmat, HostDeviceVector<float>* out_contribs,\n                                       gbm::GBTreeModel const& model, bst_tree_t tree_end,\n                                       std::vector<float> const* tree_weights,\n                                       bool approximate) const override {\n    xgboost_NVTX_FN_RANGE();\n    if (approximate) {\n      LOG(FATAL) << \"Approximated contribution is not implemented in GPU predictor, use cpu \"\n                    \"instead.\";\n    }\n    interpretability::ShapInteractionValues(ctx_, p_fmat, out_contribs, model, tree_end,\n                                            tree_weights, approximate);\n  }\n\n  void PredictLeaf(DMatrix* p_fmat, HostDeviceVector<float>* predictions,\n                   gbm::GBTreeModel const& model, bst_tree_t tree_end) const override {\n    xgboost_NVTX_FN_RANGE();\n    dh::safe_cuda(cudaSetDevice(ctx_->Ordinal()));\n\n    const MetaInfo& info = p_fmat->Info();\n    bst_idx_t n_samples = info.num_row_;\n    tree_end = GetTreeLimit(model.trees, tree_end);\n    predictions->SetDevice(ctx_->Device());\n    predictions->Resize(n_samples * tree_end);\n\n    DeviceModel d_model{ctx_->Device(), model, false, 0, tree_end, CopyViews{this->ctx_}};\n\n    if (info.IsColumnSplit()) {\n      column_split_helper_.PredictLeaf(p_fmat, predictions, model, d_model);\n      return;\n    }\n\n    bst_feature_t n_features = model.learner_model_param->num_feature;\n    auto new_enc =\n        p_fmat->Cats()->NeedRecode() ? p_fmat->Cats()->DeviceView(ctx_) : enc::DeviceColumnsView{};\n\n    LaunchPredict(ctx_, p_fmat->IsDense(), new_enc, model, [&](auto&& cfg, auto&& acc) {\n      bst_idx_t batch_offset = 0;\n      cfg.ForEachBatch(p_fmat, [&](auto&& loader_t, auto&& batch) {\n        using Loader = typename common::GetValueT<decltype(loader_t)>;\n        using Config = common::GetValueT<decltype(cfg)>;\n        auto n_rows = batch.NumRows();\n        auto kernel = PredictLeafKernel<typename Loader::Type, common::GetValueT<decltype(batch)>,\n                                        Config::HasMissing(), typename Config::EncAccessorT>;\n        cfg.template Launch<Loader>(kernel, std::move(batch), d_model.Trees(),\n                                    predictions->DeviceSpan().subspan(batch_offset),\n                                    d_model.tree_begin, d_model.tree_end, n_features,\n                                    cfg.UseShared(), std::numeric_limits<float>::quiet_NaN(),\n                                    std::forward<typename Config::EncAccessorT>(acc));\n\n        batch_offset += n_rows;\n      });\n    });\n  }\n\n private:\n  ColumnSplitHelper column_split_helper_;\n};\n\nXGBOOST_REGISTER_PREDICTOR(GPUPredictor, \"gpu_predictor\")\n    .describe(\"Make predictions using GPU.\")\n    .set_body([](Context const* ctx) { return new GPUPredictor(ctx); });\n\n}  // namespace xgboost::predictor\n"
  },
  {
    "path": "src/predictor/interpretability/shap.cc",
    "content": "/**\n * Copyright 2017-2026, XGBoost Contributors\n */\n#include \"shap.h\"\n\n#include <algorithm>    // for fill\n#include <type_traits>  // for remove_const_t\n#include <vector>       // for vector\n\n#include \"../../common/threading_utils.h\"  // for ParallelFor\n#include \"../../gbm/gbtree_model.h\"        // for GBTreeModel\n#include \"../../tree/tree_view.h\"          // for ScalarTreeView\n#include \"../data_accessor.h\"              // for GHistIndexMatrixView\n#include \"../predict_fn.h\"                 // for GetTreeLimit\n#include \"../treeshap.h\"                   // for CalculateContributions\n#include \"dmlc/omp.h\"                      // for omp_get_thread_num\n#include \"xgboost/base.h\"                  // for bst_omp_uint\n#include \"xgboost/logging.h\"               // for CHECK\n#include \"xgboost/tree_model.h\"            // for MTNotImplemented\n\nnamespace xgboost::interpretability {\nnamespace {\nvoid ValidateTreeWeights(std::vector<float> const *tree_weights, bst_tree_t tree_end) {\n  if (tree_weights == nullptr) {\n    return;\n  }\n  CHECK_GE(tree_weights->size(), static_cast<std::size_t>(tree_end));\n}\n\nfloat FillNodeMeanValues(tree::ScalarTreeView const &tree, bst_node_t nidx,\n                         std::vector<float> *mean_values) {\n  float result;\n  auto &node_mean_values = *mean_values;\n  if (tree.IsLeaf(nidx)) {\n    result = tree.LeafValue(nidx);\n  } else {\n    result = FillNodeMeanValues(tree, tree.LeftChild(nidx), mean_values) *\n             tree.Stat(tree.LeftChild(nidx)).sum_hess;\n    result += FillNodeMeanValues(tree, tree.RightChild(nidx), mean_values) *\n              tree.Stat(tree.RightChild(nidx)).sum_hess;\n    result /= tree.Stat(nidx).sum_hess;\n  }\n  node_mean_values[nidx] = result;\n  return result;\n}\n\nvoid FillNodeMeanValues(tree::ScalarTreeView const &tree, std::vector<float> *mean_values) {\n  auto n_nodes = tree.Size();\n  if (static_cast<decltype(n_nodes)>(mean_values->size()) == n_nodes) {\n    return;\n  }\n  mean_values->resize(n_nodes);\n  FillNodeMeanValues(tree, 0, mean_values);\n}\n\nvoid CalculateApproxContributions(tree::ScalarTreeView const &tree, RegTree::FVec const &feats,\n                                  std::vector<float> *mean_values,\n                                  std::vector<bst_float> *out_contribs) {\n  CHECK_EQ(out_contribs->size(), feats.Size() + 1);\n  CalculateContributionsApprox(tree, feats, mean_values, out_contribs->data());\n}\n\ntemplate <typename EncAccessor, typename Fn>\nvoid DispatchByBatchView(Context const *ctx, DMatrix *p_fmat, EncAccessor acc, Fn &&fn) {\n  using AccT = std::decay_t<EncAccessor>;\n  if (p_fmat->PageExists<SparsePage>()) {\n    for (auto const &page : p_fmat->GetBatches<SparsePage>()) {\n      predictor::SparsePageView<AccT> view{page.GetView(), page.base_rowid, acc};\n      fn(view);\n    }\n  } else {\n    auto ft = p_fmat->Info().feature_types.ConstHostVector();\n    for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx, {})) {\n      predictor::GHistIndexMatrixView<AccT> view{page, acc, ft};\n      fn(view);\n    }\n  }\n}\n\ntemplate <typename Fn>\nvoid LaunchShap(Context const *ctx, DMatrix *p_fmat, gbm::GBTreeModel const &model, Fn &&fn) {\n  if (model.Cats()->HasCategorical() && p_fmat->Cats()->NeedRecode()) {\n    auto new_enc = p_fmat->Cats()->HostView();\n    auto [acc, mapping] = ::xgboost::cpu_impl::MakeCatAccessor(ctx, new_enc, model.Cats());\n    DispatchByBatchView(ctx, p_fmat, acc, fn);\n  } else {\n    DispatchByBatchView(ctx, p_fmat, NoOpAccessor{}, fn);\n  }\n}\n}  // namespace\n\nnamespace cpu_impl {\nvoid ShapValues(Context const *ctx, DMatrix *p_fmat, HostDeviceVector<float> *out_contribs,\n                gbm::GBTreeModel const &model, bst_tree_t tree_end,\n                std::vector<float> const *tree_weights, int condition, unsigned condition_feature) {\n  CHECK(!model.learner_model_param->IsVectorLeaf()) << \"Predict contribution\" << MTNotImplemented();\n  CHECK(!p_fmat->Info().IsColumnSplit())\n      << \"Predict contribution support for column-wise data split is not yet implemented.\";\n  MetaInfo const &info = p_fmat->Info();\n  // number of valid trees\n  tree_end = predictor::GetTreeLimit(model.trees, tree_end);\n  CHECK_GE(tree_end, 0);\n  ValidateTreeWeights(tree_weights, tree_end);\n  auto const n_trees = static_cast<std::size_t>(tree_end);\n  auto const n_threads = ctx->Threads();\n  size_t const ncolumns = model.learner_model_param->num_feature + 1;\n  // allocate space for (number of features + bias) times the number of rows\n  std::vector<bst_float> &contribs = out_contribs->HostVector();\n  contribs.resize(info.num_row_ * ncolumns * model.learner_model_param->num_output_group);\n  // make sure contributions is zeroed, we could be reusing a previously allocated one\n  std::fill(contribs.begin(), contribs.end(), 0);\n  // initialize tree node mean values\n  std::vector<std::vector<float>> mean_values(n_trees);\n  common::ParallelFor(n_trees, n_threads, [&](auto i) {\n    FillNodeMeanValues(model.trees[i]->HostScView(), &(mean_values[i]));\n  });\n\n  auto const n_groups = model.learner_model_param->num_output_group;\n  CHECK_NE(n_groups, 0);\n  auto const base_score = model.learner_model_param->BaseScore(DeviceOrd::CPU());\n  auto const h_tree_groups = model.TreeGroups(DeviceOrd::CPU());\n  std::vector<RegTree::FVec> feats_tloc(n_threads);\n  std::vector<std::vector<bst_float>> contribs_tloc(n_threads, std::vector<bst_float>(ncolumns));\n\n  auto device = ctx->Device().IsSycl() ? DeviceOrd::CPU() : ctx->Device();\n  auto base_margin = info.base_margin_.View(device);\n\n  auto process_view = [&](auto &&view) {\n    common::ParallelFor(view.Size(), n_threads, [&](auto i) {\n      auto tid = omp_get_thread_num();\n      auto &feats = feats_tloc[tid];\n      if (feats.Size() == 0) {\n        feats.Init(model.learner_model_param->num_feature);\n      }\n      auto &this_tree_contribs = contribs_tloc[tid];\n      auto row_idx = view.base_rowid + i;\n      auto n_valid = view.DoFill(i, feats.Data().data());\n      feats.HasMissing(n_valid != feats.Size());\n      for (bst_target_t gid = 0; gid < n_groups; ++gid) {\n        float *p_contribs = &contribs[(row_idx * n_groups + gid) * ncolumns];\n        for (bst_tree_t j = 0; j < tree_end; ++j) {\n          if (h_tree_groups[j] != gid) {\n            continue;\n          }\n          std::fill(this_tree_contribs.begin(), this_tree_contribs.end(), 0);\n          auto const sc_tree = model.trees[j]->HostScView();\n          CalculateContributions(sc_tree, feats, &mean_values[j], this_tree_contribs.data(),\n                                 condition, condition_feature);\n          for (size_t ci = 0; ci < ncolumns; ++ci) {\n            p_contribs[ci] +=\n                this_tree_contribs[ci] * (tree_weights == nullptr ? 1 : (*tree_weights)[j]);\n          }\n        }\n        if (base_margin.Size() != 0) {\n          CHECK_EQ(base_margin.Shape(1), n_groups);\n          p_contribs[ncolumns - 1] += base_margin(row_idx, gid);\n        } else {\n          p_contribs[ncolumns - 1] += base_score(gid);\n        }\n      }\n      feats.Drop();\n    });\n  };\n\n  LaunchShap(ctx, p_fmat, model, process_view);\n}\n\nvoid ApproxFeatureImportance(Context const *ctx, DMatrix *p_fmat,\n                             HostDeviceVector<float> *out_contribs, gbm::GBTreeModel const &model,\n                             bst_tree_t tree_end, std::vector<float> const *tree_weights) {\n  CHECK(!model.learner_model_param->IsVectorLeaf()) << \"Predict contribution\" << MTNotImplemented();\n  CHECK(!p_fmat->Info().IsColumnSplit())\n      << \"Predict contribution support for column-wise data split is not yet implemented.\";\n  MetaInfo const &info = p_fmat->Info();\n  tree_end = predictor::GetTreeLimit(model.trees, tree_end);\n  CHECK_GE(tree_end, 0);\n  ValidateTreeWeights(tree_weights, tree_end);\n  auto const n_trees = static_cast<std::size_t>(tree_end);\n  auto const n_threads = ctx->Threads();\n  size_t const ncolumns = model.learner_model_param->num_feature + 1;\n  std::vector<bst_float> &contribs = out_contribs->HostVector();\n  contribs.resize(info.num_row_ * ncolumns * model.learner_model_param->num_output_group);\n  std::fill(contribs.begin(), contribs.end(), 0);\n  std::vector<std::vector<float>> mean_values(n_trees);\n  common::ParallelFor(n_trees, n_threads, [&](auto i) {\n    FillNodeMeanValues(model.trees[i]->HostScView(), &(mean_values[i]));\n  });\n\n  auto const n_groups = model.learner_model_param->num_output_group;\n  CHECK_NE(n_groups, 0);\n  auto const base_score = model.learner_model_param->BaseScore(DeviceOrd::CPU());\n  auto const h_tree_groups = model.TreeGroups(DeviceOrd::CPU());\n  std::vector<RegTree::FVec> feats_tloc(n_threads);\n  std::vector<std::vector<bst_float>> contribs_tloc(n_threads, std::vector<bst_float>(ncolumns));\n\n  auto device = ctx->Device().IsSycl() ? DeviceOrd::CPU() : ctx->Device();\n  auto base_margin = info.base_margin_.View(device);\n\n  auto process_view = [&](auto &&view) {\n    common::ParallelFor(view.Size(), n_threads, [&](auto i) {\n      auto tid = omp_get_thread_num();\n      auto &feats = feats_tloc[tid];\n      if (feats.Size() == 0) {\n        feats.Init(model.learner_model_param->num_feature);\n      }\n      auto &this_tree_contribs = contribs_tloc[tid];\n      auto row_idx = view.base_rowid + i;\n      auto n_valid = view.DoFill(i, feats.Data().data());\n      feats.HasMissing(n_valid != feats.Size());\n      for (bst_target_t gid = 0; gid < n_groups; ++gid) {\n        float *p_contribs = &contribs[(row_idx * n_groups + gid) * ncolumns];\n        for (bst_tree_t j = 0; j < tree_end; ++j) {\n          if (h_tree_groups[j] != gid) {\n            continue;\n          }\n          std::fill(this_tree_contribs.begin(), this_tree_contribs.end(), 0);\n          auto const sc_tree = model.trees[j]->HostScView();\n          CalculateApproxContributions(sc_tree, feats, &mean_values[j], &this_tree_contribs);\n          for (size_t ci = 0; ci < ncolumns; ++ci) {\n            p_contribs[ci] +=\n                this_tree_contribs[ci] * (tree_weights == nullptr ? 1 : (*tree_weights)[j]);\n          }\n        }\n        if (base_margin.Size() != 0) {\n          CHECK_EQ(base_margin.Shape(1), n_groups);\n          p_contribs[ncolumns - 1] += base_margin(row_idx, gid);\n        } else {\n          p_contribs[ncolumns - 1] += base_score(gid);\n        }\n      }\n      feats.Drop();\n    });\n  };\n\n  LaunchShap(ctx, p_fmat, model, process_view);\n}\n\nvoid ShapInteractionValues(Context const *ctx, DMatrix *p_fmat,\n                           HostDeviceVector<float> *out_contribs, gbm::GBTreeModel const &model,\n                           bst_tree_t tree_end, std::vector<float> const *tree_weights,\n                           bool approximate) {\n  CHECK(!model.learner_model_param->IsVectorLeaf())\n      << \"Predict interaction contribution\" << MTNotImplemented();\n  CHECK(!p_fmat->Info().IsColumnSplit()) << \"Predict interaction contribution support for \"\n                                            \"column-wise data split is not yet implemented.\";\n  MetaInfo const &info = p_fmat->Info();\n  auto const ngroup = model.learner_model_param->num_output_group;\n  auto const ncolumns = model.learner_model_param->num_feature;\n  const unsigned row_chunk = ngroup * (ncolumns + 1) * (ncolumns + 1);\n  const unsigned mrow_chunk = (ncolumns + 1) * (ncolumns + 1);\n  const unsigned crow_chunk = ngroup * (ncolumns + 1);\n\n  // allocate space for (number of features^2) times the number of rows and tmp off/on contribs\n  std::vector<bst_float> &contribs = out_contribs->HostVector();\n  contribs.resize(info.num_row_ * ngroup * (ncolumns + 1) * (ncolumns + 1));\n  HostDeviceVector<bst_float> contribs_off_hdv(info.num_row_ * ngroup * (ncolumns + 1));\n  auto &contribs_off = contribs_off_hdv.HostVector();\n  HostDeviceVector<bst_float> contribs_on_hdv(info.num_row_ * ngroup * (ncolumns + 1));\n  auto &contribs_on = contribs_on_hdv.HostVector();\n  HostDeviceVector<bst_float> contribs_diag_hdv(info.num_row_ * ngroup * (ncolumns + 1));\n  auto &contribs_diag = contribs_diag_hdv.HostVector();\n\n  // Compute the difference in effects when conditioning on each of the features on and off\n  // see: Axiomatic characterizations of probabilistic and\n  //      cardinal-probabilistic interaction indices\n  if (approximate) {\n    ApproxFeatureImportance(ctx, p_fmat, &contribs_diag_hdv, model, tree_end, tree_weights);\n  } else {\n    ShapValues(ctx, p_fmat, &contribs_diag_hdv, model, tree_end, tree_weights, 0, 0);\n  }\n  for (size_t i = 0; i < ncolumns + 1; ++i) {\n    if (approximate) {\n      ApproxFeatureImportance(ctx, p_fmat, &contribs_off_hdv, model, tree_end, tree_weights);\n      ApproxFeatureImportance(ctx, p_fmat, &contribs_on_hdv, model, tree_end, tree_weights);\n    } else {\n      ShapValues(ctx, p_fmat, &contribs_off_hdv, model, tree_end, tree_weights, -1, i);\n      ShapValues(ctx, p_fmat, &contribs_on_hdv, model, tree_end, tree_weights, 1, i);\n    }\n\n    for (size_t j = 0; j < info.num_row_; ++j) {\n      for (std::remove_const_t<decltype(ngroup)> l = 0; l < ngroup; ++l) {\n        const unsigned o_offset = j * row_chunk + l * mrow_chunk + i * (ncolumns + 1);\n        const unsigned c_offset = j * crow_chunk + l * (ncolumns + 1);\n        contribs[o_offset + i] = 0;\n        for (size_t k = 0; k < ncolumns + 1; ++k) {\n          // fill in the diagonal with additive effects, and off-diagonal with the interactions\n          if (k == i) {\n            contribs[o_offset + i] += contribs_diag[c_offset + k];\n          } else {\n            contribs[o_offset + k] = (contribs_on[c_offset + k] - contribs_off[c_offset + k]) / 2.0;\n            contribs[o_offset + i] -= contribs[o_offset + k];\n          }\n        }\n      }\n    }\n  }\n}\n}  // namespace cpu_impl\n}  // namespace xgboost::interpretability\n"
  },
  {
    "path": "src/predictor/interpretability/shap.cu",
    "content": "/**\n * Copyright 2017-2026, XGBoost Contributors\n */\n#include <GPUTreeShap/gpu_treeshap.h>\n#include <thrust/copy.h>\n#include <thrust/device_vector.h>\n#include <thrust/execution_policy.h>\n#include <thrust/extrema.h>\n#include <thrust/fill.h>\n#include <thrust/scan.h>\n\n#include <algorithm>\n#include <cuda/functional>   // for proclaim_return_type\n#include <cuda/std/utility>  // for swap\n#include <cuda/std/variant>  // for variant\n#include <limits>\n#include <memory>\n#include <string>\n#include <type_traits>\n#include <utility>\n#include <vector>\n\n#include \"../../common/categorical.h\"\n#include \"../../common/common.h\"\n#include \"../../common/cuda_context.cuh\"  // for CUDAContext\n#include \"../../common/cuda_rt_utils.h\"   // for SetDevice\n#include \"../../common/device_helpers.cuh\"\n#include \"../../common/nvtx_utils.h\"\n#include \"../../common/optional_weight.h\"\n#include \"../../data/batch_utils.h\"      // for StaticBatch\n#include \"../../data/cat_container.cuh\"  // for EncPolicy, MakeCatAccessor\n#include \"../../data/cat_container.h\"    // for NoOpAccessor\n#include \"../../data/ellpack_page.cuh\"\n#include \"../../gbm/gbtree_model.h\"\n#include \"../../tree/tree_view.h\"\n#include \"../gbtree_view.h\"\n#include \"../gpu_data_accessor.cuh\"\n#include \"../predict_fn.h\"  // for GetTreeLimit\n#include \"shap.h\"\n#include \"xgboost/data.h\"\n#include \"xgboost/host_device_vector.h\"\n#include \"xgboost/linalg.h\"  // for UnravelIndex\n#include \"xgboost/logging.h\"\n#include \"xgboost/multi_target_tree_model.h\"  // for MTNotImplemented\n\nnamespace xgboost::interpretability::cuda_impl {\nnamespace {\nusing predictor::EllpackLoader;\nusing predictor::GBTreeModelView;\nusing predictor::SparsePageLoaderNoShared;\nusing predictor::SparsePageView;\nusing ::xgboost::cuda_impl::StaticBatch;\n\nusing TreeViewVar = cuda::std::variant<tree::ScalarTreeView, tree::MultiTargetTreeView>;\n\nstruct CopyViews {\n  Context const* ctx;\n  explicit CopyViews(Context const* ctx) : ctx{ctx} {}\n\n  void operator()(dh::DeviceUVector<TreeViewVar>* p_dst, std::vector<TreeViewVar>&& src) {\n    xgboost_NVTX_FN_RANGE();\n    p_dst->resize(src.size());\n    auto d_dst = dh::ToSpan(*p_dst);\n    dh::safe_cuda(cudaMemcpyAsync(d_dst.data(), src.data(), d_dst.size_bytes(), cudaMemcpyDefault,\n                                  ctx->CUDACtx()->Stream()));\n  }\n};\n\nusing DeviceModel = GBTreeModelView<dh::DeviceUVector, TreeViewVar, CopyViews>;\n\nstruct ShapSplitCondition {\n  ShapSplitCondition() = default;\n  XGBOOST_DEVICE\n  ShapSplitCondition(float feature_lower_bound, float feature_upper_bound, bool is_missing_branch,\n                     common::CatBitField cats)\n      : feature_lower_bound(feature_lower_bound),\n        feature_upper_bound(feature_upper_bound),\n        is_missing_branch(is_missing_branch),\n        categories{std::move(cats)} {\n    assert(feature_lower_bound <= feature_upper_bound);\n  }\n\n  float feature_lower_bound;\n  float feature_upper_bound;\n  common::CatBitField categories;\n  bool is_missing_branch;\n\n  [[nodiscard]] XGBOOST_DEVICE bool EvaluateSplit(float x) const {\n    if (isnan(x)) {\n      return is_missing_branch;\n    }\n    if (categories.Capacity() != 0) {\n      auto cat = static_cast<uint32_t>(x);\n      return categories.Check(cat);\n    } else {\n      return x >= feature_lower_bound && x < feature_upper_bound;\n    }\n  }\n\n  XGBOOST_DEVICE static common::CatBitField Intersect(common::CatBitField l,\n                                                      common::CatBitField r) {\n    if (l.Data() == r.Data()) {\n      return l;\n    }\n    if (l.Capacity() > r.Capacity()) {\n      cuda::std::swap(l, r);\n    }\n    auto l_bits = l.Bits();\n    auto r_bits = r.Bits();\n    auto n_bits = l_bits.size() < r_bits.size() ? l_bits.size() : r_bits.size();\n    for (size_t i = 0; i < n_bits; ++i) {\n      l_bits[i] &= r_bits[i];\n    }\n    return l;\n  }\n\n  XGBOOST_DEVICE void Merge(ShapSplitCondition other) {\n    if (categories.Capacity() != 0 || other.categories.Capacity() != 0) {\n      categories = Intersect(categories, other.categories);\n    } else {\n      feature_lower_bound = max(feature_lower_bound, other.feature_lower_bound);\n      feature_upper_bound = min(feature_upper_bound, other.feature_upper_bound);\n    }\n    is_missing_branch = is_missing_branch && other.is_missing_branch;\n  }\n};\n\nstruct PathInfo {\n  std::size_t length;\n  bst_node_t nidx;\n  bst_tree_t tree_idx;\n\n  [[nodiscard]] XGBOOST_DEVICE bool IsLeaf() const { return nidx != -1; }\n};\nstatic_assert(sizeof(PathInfo) == 16);\n\nauto MakeTreeSegments(Context const* ctx, bst_tree_t tree_begin, bst_tree_t tree_end,\n                      gbm::GBTreeModel const& model) {\n  auto tree_segments = HostDeviceVector<size_t>({}, ctx->Device());\n  auto& h_tree_segments = tree_segments.HostVector();\n  h_tree_segments.reserve((tree_end - tree_begin) + 1);\n  std::size_t sum = 0;\n  h_tree_segments.push_back(sum);\n  for (auto tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) {\n    auto const& p_tree = model.trees.at(tree_idx);\n    CHECK(!p_tree->IsMultiTarget()) << \" SHAP \" << MTNotImplemented();\n    sum += p_tree->Size();\n    h_tree_segments.push_back(sum);\n  }\n  return tree_segments;\n}\n\nvoid ExtractPaths(Context const* ctx,\n                  dh::device_vector<gpu_treeshap::PathElement<ShapSplitCondition>>* paths,\n                  gbm::GBTreeModel const& h_model, DeviceModel const& d_model,\n                  dh::device_vector<uint32_t>* path_categories,\n                  common::OptionalWeights tree_weights) {\n  curt::SetDevice(ctx->Ordinal());\n\n  dh::caching_device_vector<PathInfo> info(d_model.n_nodes);\n  auto d_trees = d_model.Trees();\n  auto tree_segments = MakeTreeSegments(ctx, d_model.tree_begin, d_model.tree_end, h_model);\n  CHECK_EQ(tree_segments.ConstHostVector().back(), d_model.n_nodes);\n  auto d_tree_segments = tree_segments.ConstDeviceSpan();\n\n  auto path_it = dh::MakeIndexTransformIter(\n      cuda::proclaim_return_type<PathInfo>([=] __device__(size_t idx) -> PathInfo {\n        bst_tree_t const tree_idx = dh::SegmentId(d_tree_segments, idx);\n        bst_node_t const nidx = idx - d_tree_segments[tree_idx];\n        auto const& tree = cuda::std::get<tree::ScalarTreeView>(d_trees[tree_idx]);\n        if (!tree.IsLeaf(nidx) || tree.IsDeleted(nidx)) {\n          return PathInfo{0, -1, 0};\n        }\n        std::size_t path_length = 1;\n        auto iter_nidx = nidx;\n        while (!tree.IsRoot(iter_nidx)) {\n          iter_nidx = tree.Parent(iter_nidx);\n          path_length++;\n        }\n        return PathInfo{path_length, nidx, tree_idx};\n      }));\n  auto end = thrust::copy_if(\n      ctx->CUDACtx()->CTP(), path_it, path_it + d_model.n_nodes, info.begin(),\n      cuda::proclaim_return_type<bool>([=] __device__(PathInfo const& e) { return e.IsLeaf(); }));\n\n  info.resize(end - info.begin());\n  using LenT = decltype(std::declval<PathInfo>().length);\n  auto length_iterator = dh::MakeTransformIterator<LenT>(\n      info.begin(), cuda::proclaim_return_type<LenT>(\n                        [=] __device__(PathInfo const& info) { return info.length; }));\n  dh::caching_device_vector<size_t> path_segments(info.size() + 1);\n  thrust::fill_n(ctx->CUDACtx()->CTP(), path_segments.begin(), 1, std::size_t{0});\n  thrust::inclusive_scan(ctx->CUDACtx()->CTP(), length_iterator, length_iterator + info.size(),\n                         path_segments.begin() + 1);\n\n  paths->resize(path_segments.back());\n\n  auto d_paths = dh::ToSpan(*paths);\n  auto d_info = info.data().get();\n  auto d_tree_groups = d_model.tree_groups;\n  auto d_path_segments = path_segments.data().get();\n\n  std::size_t max_cat = 0;\n  if (std::any_of(h_model.trees.cbegin(), h_model.trees.cend(),\n                  [](auto const& p_tree) { return p_tree->HasCategoricalSplit(); })) {\n    auto max_elem_it = dh::MakeIndexTransformIter([=] __device__(std::size_t i) -> std::size_t {\n      auto tree_idx = dh::SegmentId(d_tree_segments, i);\n      auto nidx = i - d_tree_segments[tree_idx];\n      return cuda::std::get<tree::ScalarTreeView>(d_trees[tree_idx])\n          .GetCategoriesMatrix()\n          .node_ptr[nidx]\n          .size;\n    });\n    auto max_cat_it =\n        thrust::max_element(ctx->CUDACtx()->CTP(), max_elem_it, max_elem_it + d_model.n_nodes);\n    dh::CachingDeviceUVector<std::size_t> d_max_cat(1);\n    auto s_max_cat = dh::ToSpan(d_max_cat);\n    dh::LaunchN(1, ctx->CUDACtx()->Stream(),\n                [=] __device__(std::size_t) { s_max_cat[0] = *max_cat_it; });\n    dh::safe_cuda(\n        cudaMemcpy(&max_cat, s_max_cat.data(), s_max_cat.size_bytes(), cudaMemcpyDeviceToHost));\n    CHECK_GE(max_cat, 1);\n    path_categories->resize(max_cat * paths->size());\n  }\n\n  common::Span<uint32_t> d_path_categories = dh::ToSpan(*path_categories);\n\n  dh::LaunchN(info.size(), ctx->CUDACtx()->Stream(), [=] __device__(size_t idx) {\n    auto path_info = d_info[idx];\n    auto tree = cuda::std::get<tree::ScalarTreeView>(d_trees[path_info.tree_idx]);\n    std::int32_t group = d_tree_groups[path_info.tree_idx];\n    auto child_nidx = path_info.nidx;\n\n    // TreeSHAP is linear in the leaf outputs, so DART weights can be applied by\n    // scaling each tree's leaf value before it enters the path representation.\n    float v = tree.LeafValue(child_nidx) * tree_weights[path_info.tree_idx];\n    const float inf = std::numeric_limits<float>::infinity();\n    size_t output_position = d_path_segments[idx + 1] - 1;\n\n    while (!tree.IsRoot(child_nidx)) {\n      auto parent_nidx = tree.Parent(child_nidx);\n      double child_cover = tree.SumHess(child_nidx);\n      double parent_cover = tree.SumHess(parent_nidx);\n      double zero_fraction = child_cover / parent_cover;\n\n      bool is_left_path = tree.LeftChild(parent_nidx) == child_nidx;\n      bool is_missing_path = (!tree.DefaultLeft(parent_nidx) && !is_left_path) ||\n                             (tree.DefaultLeft(parent_nidx) && is_left_path);\n\n      float lower_bound = -inf;\n      float upper_bound = inf;\n      common::CatBitField bits;\n      if (common::IsCat(tree.cats.split_type, tree.Parent(child_nidx))) {\n        auto path_cats = d_path_categories.subspan(max_cat * output_position, max_cat);\n        auto node_cats = tree.NodeCats(tree.Parent(child_nidx));\n        SPAN_CHECK(path_cats.size() >= node_cats.size());\n        for (size_t i = 0; i < node_cats.size(); ++i) {\n          path_cats[i] = is_left_path ? ~node_cats[i] : node_cats[i];\n        }\n        bits = common::CatBitField{path_cats};\n      } else {\n        lower_bound = is_left_path ? -inf : tree.SplitCond(parent_nidx);\n        upper_bound = is_left_path ? tree.SplitCond(parent_nidx) : inf;\n      }\n      d_paths[output_position--] = gpu_treeshap::PathElement<ShapSplitCondition>{\n          idx,           tree.SplitIndex(parent_nidx),\n          group,         ShapSplitCondition{lower_bound, upper_bound, is_missing_path, bits},\n          zero_fraction, v};\n\n      child_nidx = parent_nidx;\n    }\n    d_paths[output_position] = {idx, -1, group, ShapSplitCondition{-inf, inf, false, {}}, 1.0, v};\n  });\n}\n\ntemplate <typename EncAccessor, typename Fn>\nvoid DispatchByBatchLoader(Context const* ctx, DMatrix* p_fmat, bst_feature_t n_features,\n                           EncAccessor acc, Fn&& fn) {\n  using AccT = std::decay_t<EncAccessor>;\n  if (p_fmat->PageExists<SparsePage>()) {\n    for (auto& page : p_fmat->GetBatches<SparsePage>()) {\n      SparsePageView batch{ctx, page, n_features};\n      auto loader = SparsePageLoaderNoShared<AccT>{batch, acc};\n      fn(std::move(loader), page.base_rowid);\n    }\n  } else {\n    p_fmat->Info().feature_types.SetDevice(ctx->Device());\n    auto feature_types = p_fmat->Info().feature_types.ConstDeviceSpan();\n\n    for (auto const& page : p_fmat->GetBatches<EllpackPage>(ctx, StaticBatch(true))) {\n      page.Impl()->Visit(ctx, feature_types, [&](auto&& batch) {\n        using BatchT = std::remove_reference_t<decltype(batch)>;\n        auto loader = EllpackLoader<BatchT, AccT>{batch,\n                                                  /*use_shared=*/false,\n                                                  n_features,\n                                                  batch.NumRows(),\n                                                  std::numeric_limits<float>::quiet_NaN(),\n                                                  AccT{acc}};\n        fn(std::move(loader), batch.base_rowid);\n      });\n    }\n  }\n}\n\ntemplate <typename Fn>\nvoid LaunchShap(Context const* ctx, DMatrix* p_fmat, enc::DeviceColumnsView const& new_enc,\n                gbm::GBTreeModel const& model, Fn&& fn) {\n  auto n_features = model.learner_model_param->num_feature;\n  if (model.Cats() && model.Cats()->HasCategorical() && new_enc.HasCategorical()) {\n    auto [acc, mapping] = ::xgboost::cuda_impl::MakeCatAccessor(ctx, new_enc, model.Cats());\n    DispatchByBatchLoader(ctx, p_fmat, n_features, std::move(acc), fn);\n  } else {\n    DispatchByBatchLoader(ctx, p_fmat, n_features, NoOpAccessor{}, fn);\n  }\n}\n}  // namespace\n\nvoid ShapValues(Context const* ctx, DMatrix* p_fmat, HostDeviceVector<float>* out_contribs,\n                gbm::GBTreeModel const& model, bst_tree_t tree_end,\n                std::vector<float> const* tree_weights, int, unsigned) {\n  xgboost_NVTX_FN_RANGE();\n  StringView not_implemented{\n      \"contribution is not implemented in the GPU predictor, use CPU instead.\"};\n  CHECK(!p_fmat->Info().IsColumnSplit())\n      << \"Predict contribution support for column-wise data split is not yet implemented.\";\n  dh::safe_cuda(cudaSetDevice(ctx->Ordinal()));\n  out_contribs->SetDevice(ctx->Device());\n  tree_end = predictor::GetTreeLimit(model.trees, tree_end);\n\n  const int ngroup = model.learner_model_param->num_output_group;\n  CHECK_NE(ngroup, 0);\n  size_t contributions_columns = model.learner_model_param->num_feature + 1;\n  auto dim_size = contributions_columns * model.learner_model_param->num_output_group;\n  out_contribs->Resize(p_fmat->Info().num_row_ * dim_size);\n  out_contribs->Fill(0.0f);\n  auto phis = out_contribs->DeviceSpan();\n\n  dh::device_vector<gpu_treeshap::PathElement<ShapSplitCondition>> device_paths;\n  DeviceModel d_model{ctx->Device(), model, true, 0, tree_end, CopyViews{ctx}};\n  dh::device_vector<float> d_tree_weights;\n  auto weights = common::OptionalWeights{1.0f};\n  if (tree_weights != nullptr) {\n    // GPU TreeSHAP consumes device-resident path data, so materialize the optional\n    // tree weights on device before extracting the weighted leaf outputs.\n    d_tree_weights.assign(tree_weights->cbegin(), tree_weights->cbegin() + tree_end);\n    weights = common::OptionalWeights{common::Span<float const>{\n        thrust::raw_pointer_cast(d_tree_weights.data()), d_tree_weights.size()}};\n  }\n\n  auto new_enc =\n      p_fmat->Cats()->NeedRecode() ? p_fmat->Cats()->DeviceView(ctx) : enc::DeviceColumnsView{};\n\n  dh::device_vector<uint32_t> categories;\n  ExtractPaths(ctx, &device_paths, model, d_model, &categories, weights);\n\n  LaunchShap(ctx, p_fmat, new_enc, model, [&](auto&& loader, bst_idx_t base_rowid) {\n    auto begin = dh::tbegin(phis) + base_rowid * dim_size;\n    gpu_treeshap::GPUTreeShap<dh::XGBDeviceAllocator<int>>(\n        loader, device_paths.begin(), device_paths.end(), ngroup, begin, dh::tend(phis));\n  });\n\n  p_fmat->Info().base_margin_.SetDevice(ctx->Device());\n  const auto margin = p_fmat->Info().base_margin_.Data()->ConstDeviceSpan();\n\n  auto base_score = model.learner_model_param->BaseScore(ctx);\n  bst_idx_t n_samples = p_fmat->Info().num_row_;\n  dh::LaunchN(n_samples * ngroup, ctx->CUDACtx()->Stream(), [=] __device__(std::size_t idx) {\n    auto [_, gid] = linalg::UnravelIndex(idx, n_samples, ngroup);\n    phis[(idx + 1) * contributions_columns - 1] += margin.empty() ? base_score(gid) : margin[idx];\n  });\n}\n\nvoid ShapInteractionValues(Context const* ctx, DMatrix* p_fmat,\n                           HostDeviceVector<float>* out_contribs, gbm::GBTreeModel const& model,\n                           bst_tree_t tree_end, std::vector<float> const* tree_weights,\n                           bool approximate) {\n  xgboost_NVTX_FN_RANGE();\n  std::string not_implemented{\"contribution is not implemented in GPU predictor, use cpu instead.\"};\n  if (approximate) {\n    LOG(FATAL) << \"Approximated \" << not_implemented;\n  }\n  dh::safe_cuda(cudaSetDevice(ctx->Ordinal()));\n  out_contribs->SetDevice(ctx->Device());\n  tree_end = predictor::GetTreeLimit(model.trees, tree_end);\n\n  const int ngroup = model.learner_model_param->num_output_group;\n  CHECK_NE(ngroup, 0);\n  size_t contributions_columns = model.learner_model_param->num_feature + 1;\n  auto dim_size =\n      contributions_columns * contributions_columns * model.learner_model_param->num_output_group;\n  out_contribs->Resize(p_fmat->Info().num_row_ * dim_size);\n  out_contribs->Fill(0.0f);\n  auto phis = out_contribs->DeviceSpan();\n\n  dh::device_vector<gpu_treeshap::PathElement<ShapSplitCondition>> device_paths;\n  DeviceModel d_model{ctx->Device(), model, true, 0, tree_end, CopyViews{ctx}};\n  dh::device_vector<float> d_tree_weights;\n  auto weights = common::OptionalWeights{1.0f};\n  if (tree_weights != nullptr) {\n    // GPU TreeSHAP consumes device-resident path data, so materialize the optional\n    // tree weights on device before extracting the weighted leaf outputs.\n    d_tree_weights.assign(tree_weights->cbegin(), tree_weights->cbegin() + tree_end);\n    weights = common::OptionalWeights{common::Span<float const>{\n        thrust::raw_pointer_cast(d_tree_weights.data()), d_tree_weights.size()}};\n  }\n\n  dh::device_vector<uint32_t> categories;\n  ExtractPaths(ctx, &device_paths, model, d_model, &categories, weights);\n  auto new_enc =\n      p_fmat->Cats()->NeedRecode() ? p_fmat->Cats()->DeviceView(ctx) : enc::DeviceColumnsView{};\n\n  LaunchShap(ctx, p_fmat, new_enc, model, [&](auto&& loader, bst_idx_t base_rowid) {\n    auto begin = dh::tbegin(phis) + base_rowid * dim_size;\n    gpu_treeshap::GPUTreeShapInteractions<dh::XGBDeviceAllocator<int>>(\n        loader, device_paths.begin(), device_paths.end(), ngroup, begin, dh::tend(phis));\n  });\n\n  p_fmat->Info().base_margin_.SetDevice(ctx->Device());\n  const auto margin = p_fmat->Info().base_margin_.Data()->ConstDeviceSpan();\n\n  auto base_score = model.learner_model_param->BaseScore(ctx);\n  size_t n_features = model.learner_model_param->num_feature;\n  bst_idx_t n_samples = p_fmat->Info().num_row_;\n  dh::LaunchN(n_samples * ngroup, ctx->CUDACtx()->Stream(), [=] __device__(size_t idx) {\n    auto [ridx, gidx] = linalg::UnravelIndex(idx, n_samples, ngroup);\n    phis[gpu_treeshap::IndexPhiInteractions(ridx, ngroup, gidx, n_features, n_features,\n                                            n_features)] +=\n        margin.empty() ? base_score(gidx) : margin[idx];\n  });\n}\n\nvoid ApproxFeatureImportance(Context const*, DMatrix*, HostDeviceVector<float>*,\n                             gbm::GBTreeModel const&, bst_tree_t, std::vector<float> const*) {\n  StringView not_implemented{\n      \"contribution is not implemented in the GPU predictor, use CPU instead.\"};\n  LOG(FATAL) << \"Approximated \" << not_implemented;\n}\n}  // namespace xgboost::interpretability::cuda_impl\n"
  },
  {
    "path": "src/predictor/interpretability/shap.h",
    "content": "/**\n * Copyright 2017-2026, XGBoost Contributors\n */\n#pragma once\n\n#include <vector>  // for vector\n\n#include \"xgboost/context.h\"             // for Context\n#include \"xgboost/data.h\"                // for DMatrix, MetaInfo\n#include \"xgboost/host_device_vector.h\"  // for HostDeviceVector\n\nnamespace xgboost::gbm {\nstruct GBTreeModel;\n}  // namespace xgboost::gbm\n\nnamespace xgboost::interpretability {\nnamespace cpu_impl {\nvoid ShapValues(Context const* ctx, DMatrix* p_fmat, HostDeviceVector<float>* out_contribs,\n                gbm::GBTreeModel const& model, bst_tree_t tree_end,\n                std::vector<float> const* tree_weights, int condition, unsigned condition_feature);\n\nvoid ApproxFeatureImportance(Context const* ctx, DMatrix* p_fmat,\n                             HostDeviceVector<float>* out_contribs, gbm::GBTreeModel const& model,\n                             bst_tree_t tree_end, std::vector<float> const* tree_weights);\n\nvoid ShapInteractionValues(Context const* ctx, DMatrix* p_fmat,\n                           HostDeviceVector<float>* out_contribs, gbm::GBTreeModel const& model,\n                           bst_tree_t tree_end, std::vector<float> const* tree_weights,\n                           bool approximate);\n}  // namespace cpu_impl\n\n#if defined(XGBOOST_USE_CUDA)\nnamespace cuda_impl {\nvoid ShapValues(Context const* ctx, DMatrix* p_fmat, HostDeviceVector<float>* out_contribs,\n                gbm::GBTreeModel const& model, bst_tree_t tree_end,\n                std::vector<float> const* tree_weights, int condition, unsigned condition_feature);\nvoid ApproxFeatureImportance(Context const* ctx, DMatrix* p_fmat,\n                             HostDeviceVector<float>* out_contribs, gbm::GBTreeModel const& model,\n                             bst_tree_t tree_end, std::vector<float> const* tree_weights);\nvoid ShapInteractionValues(Context const* ctx, DMatrix* p_fmat,\n                           HostDeviceVector<float>* out_contribs, gbm::GBTreeModel const& model,\n                           bst_tree_t tree_end, std::vector<float> const* tree_weights,\n                           bool approximate);\n}  // namespace cuda_impl\n#endif  // defined(XGBOOST_USE_CUDA)\n\ninline void ShapValues(Context const* ctx, DMatrix* p_fmat, HostDeviceVector<float>* out_contribs,\n                       gbm::GBTreeModel const& model, bst_tree_t tree_end,\n                       std::vector<float> const* tree_weights, int condition,\n                       unsigned condition_feature) {\n#if defined(XGBOOST_USE_CUDA)\n  if (ctx->IsCUDA()) {\n    cuda_impl::ShapValues(ctx, p_fmat, out_contribs, model, tree_end, tree_weights, condition,\n                          condition_feature);\n    return;\n  }\n#endif  // defined(XGBOOST_USE_CUDA)\n  cpu_impl::ShapValues(ctx, p_fmat, out_contribs, model, tree_end, tree_weights, condition,\n                       condition_feature);\n}\n\ninline void ApproxFeatureImportance(Context const* ctx, DMatrix* p_fmat,\n                                    HostDeviceVector<float>* out_contribs,\n                                    gbm::GBTreeModel const& model, bst_tree_t tree_end,\n                                    std::vector<float> const* tree_weights) {\n#if defined(XGBOOST_USE_CUDA)\n  if (ctx->IsCUDA()) {\n    cuda_impl::ApproxFeatureImportance(ctx, p_fmat, out_contribs, model, tree_end, tree_weights);\n    return;\n  }\n#endif  // defined(XGBOOST_USE_CUDA)\n  cpu_impl::ApproxFeatureImportance(ctx, p_fmat, out_contribs, model, tree_end, tree_weights);\n}\n\ninline void ShapInteractionValues(Context const* ctx, DMatrix* p_fmat,\n                                  HostDeviceVector<float>* out_contribs,\n                                  gbm::GBTreeModel const& model, bst_tree_t tree_end,\n                                  std::vector<float> const* tree_weights, bool approximate) {\n#if defined(XGBOOST_USE_CUDA)\n  if (ctx->IsCUDA()) {\n    cuda_impl::ShapInteractionValues(ctx, p_fmat, out_contribs, model, tree_end, tree_weights,\n                                     approximate);\n    return;\n  }\n#endif  // defined(XGBOOST_USE_CUDA)\n  cpu_impl::ShapInteractionValues(ctx, p_fmat, out_contribs, model, tree_end, tree_weights,\n                                  approximate);\n}\n}  // namespace xgboost::interpretability\n"
  },
  {
    "path": "src/predictor/predict_fn.h",
    "content": "/**\n * Copyright 2021-2025, XGBoost Contributors\n */\n#ifndef XGBOOST_PREDICTOR_PREDICT_FN_H_\n#define XGBOOST_PREDICTOR_PREDICT_FN_H_\n\n#include <memory>  // for unique_ptr\n#include <vector>  // for vector\n\n#include \"../common/categorical.h\"  // for IsCat, Decision\n#include \"xgboost/tree_model.h\"     // for RegTree\n\nnamespace xgboost::predictor {\n/** @brief Whether it should traverse to the left branch of a tree. */\ntemplate <bool has_categorical, typename TreeView>\nXGBOOST_DEVICE bool GetDecision(TreeView const &tree, bst_node_t nid, float fvalue,\n                                RegTree::CategoricalSplitMatrix const &cats) {\n  if (has_categorical && common::IsCat(cats.split_type, nid)) {\n    auto node_categories = cats.categories.subspan(cats.node_ptr[nid].beg, cats.node_ptr[nid].size);\n    return common::Decision(node_categories, fvalue);\n  } else {\n    return fvalue < tree.SplitCond(nid);\n  }\n}\n\ntemplate <bool has_missing, bool has_categorical, typename TreeView>\nXGBOOST_DEVICE bst_node_t GetNextNode(TreeView const &tree, const bst_node_t nid, float fvalue,\n                                      bool is_missing,\n                                      RegTree::CategoricalSplitMatrix const &cats) {\n  if (has_missing && is_missing) {\n    return tree.DefaultChild(nid);\n  } else {\n    return tree.LeftChild(nid) + !GetDecision<has_categorical>(tree, nid, fvalue, cats);\n  }\n}\n\n/**\n * @brief Some old prediction methods accept the ntree_limit parameter and they use 0 to\n *        indicate no limit.\n */\ninline bst_tree_t GetTreeLimit(std::vector<std::unique_ptr<RegTree>> const &trees,\n                               bst_tree_t ntree_limit) {\n  auto n_trees = static_cast<bst_tree_t>(trees.size());\n  if (ntree_limit == 0 || ntree_limit > n_trees) {\n    ntree_limit = n_trees;\n  }\n  return ntree_limit;\n}\n}  // namespace xgboost::predictor\n#endif  // XGBOOST_PREDICTOR_PREDICT_FN_H_\n"
  },
  {
    "path": "src/predictor/predictor.cc",
    "content": "/**\n * Copyright 2017-2025, XGBoost Contributors\n */\n#include \"xgboost/predictor.h\"\n\n#include <dmlc/registry.h>  // for DMLC_REGISTRY_LINK_TAG\n\n#include <cstdint>  // for int32_t\n#include <string>   // for string, to_string\n\n#include \"../gbm/gbtree_model.h\"         // for GBTreeModel\n#include \"xgboost/base.h\"                // for Args, bst_group_t, bst_idx_t\n#include \"xgboost/context.h\"             // for Context\n#include \"xgboost/data.h\"                // for MetaInfo\n#include \"xgboost/host_device_vector.h\"  // for HostDeviceVector\n#include \"xgboost/learner.h\"             // for LearnerModelParam\n#include \"xgboost/linalg.h\"              // for Tensor, TensorView\n#include \"xgboost/logging.h\"             // for CHECK_EQ, CHECK_NE, LOG\n\nnamespace dmlc {\nDMLC_REGISTRY_ENABLE(::xgboost::PredictorReg);\n}  // namespace dmlc\n\nnamespace xgboost {\nvoid Predictor::Configure(Args const&) {}\n\nPredictor* Predictor::Create(std::string const& name, Context const* ctx) {\n  auto* e = ::dmlc::Registry<PredictorReg>::Get()->Find(name);\n  if (e == nullptr) {\n    LOG(FATAL) << \"Unknown predictor type \" << name;\n  }\n  auto p_predictor = (e->body)(ctx);\n  return p_predictor;\n}\n\ntemplate <int32_t D>\nvoid ValidateBaseMarginShape(linalg::Tensor<float, D> const& margin, bst_idx_t n_samples,\n                             bst_group_t n_groups) {\n  // FIXME: Bindings other than Python and R don't have shape.\n  std::string expected{\"Invalid shape of base_margin. Expected: (\" + std::to_string(n_samples) +\n                       \", \" + std::to_string(n_groups) + \")\"};\n  CHECK_EQ(margin.Shape(0), n_samples) << expected;\n  CHECK_EQ(margin.Shape(1), n_groups) << expected;\n}\n\nnamespace cuda_impl {\nvoid InitOutPredictions(Context const* ctx, linalg::VectorView<float const> base_score,\n                        linalg::MatrixView<float> predt);\n}\n\nnamespace sycl_impl {\nvoid InitOutPredictions(Context const* ctx, linalg::VectorView<float const> base_score,\n                        linalg::MatrixView<float> predt);\n}\n\nvoid Predictor::InitOutPredictions(const MetaInfo& info, HostDeviceVector<float>* out_preds,\n                                   gbm::GBTreeModel const& model) const {\n  CHECK_NE(model.learner_model_param->num_output_group, 0);\n\n  if (!ctx_->Device().IsCPU()) {\n    out_preds->SetDevice(ctx_->Device());\n  }\n\n  // Cannot rely on the Resize to fill as it might skip if the size is already correct.\n  auto n = static_cast<size_t>(model.learner_model_param->OutputLength() * info.num_row_);\n  out_preds->Resize(n);\n\n  HostDeviceVector<float> const* base_margin = info.base_margin_.Data();\n  if (!base_margin->Empty()) {\n    ValidateBaseMarginShape(info.base_margin_, info.num_row_,\n                            model.learner_model_param->OutputLength());\n    out_preds->Copy(*base_margin);\n    return;\n  }\n\n  auto base_score = model.learner_model_param->BaseScore(this->ctx_->Device());\n  if (base_score.Size() == 1) {\n    // Fill a scalar\n    out_preds->Fill(model.learner_model_param->BaseScore(DeviceOrd::CPU())(0));\n    return;\n  }\n\n  // Handle multi-output models where base_score is a vector.\n  auto predt = linalg::MakeTensorView(this->ctx_, out_preds, info.num_row_,\n                                      model.learner_model_param->OutputLength());\n  CHECK_EQ(predt.Size(), out_preds->Size());\n\n  if (this->ctx_->IsCUDA()) {\n#if defined(XGBOOST_USE_CUDA)\n    cuda_impl::InitOutPredictions(this->ctx_, base_score, predt);\n#else\n    common::AssertGPUSupport();\n#endif\n  } else if (this->ctx_->IsSycl()) {\n#if defined(XGBOOST_USE_SYCL)\n    sycl_impl::InitOutPredictions(this->ctx_, base_score, predt);\n#else\n    common::AssertSYCLSupport();\n#endif\n  } else {\n    common::ParallelFor(info.num_row_, this->ctx_->Threads(), [&](auto i) {\n      for (std::size_t j = 0, m = predt.Shape(1); j < m; ++j) {\n        predt(i, j) = base_score(j);\n      }\n    });\n  }\n}\n}  // namespace xgboost\n\nnamespace xgboost::predictor {\n// List of files that will be force linked in static links.\n#ifdef XGBOOST_USE_CUDA\nDMLC_REGISTRY_LINK_TAG(gpu_predictor);\n#endif  // XGBOOST_USE_CUDA\nDMLC_REGISTRY_LINK_TAG(cpu_predictor);\n}  // namespace xgboost::predictor\n"
  },
  {
    "path": "src/predictor/predictor.cu",
    "content": "/**\n * Copyright 2025, XGBoost Contributors\n */\n#include <thrust/for_each.h>                    // for for_each_n\n#include <thrust/iterator/counting_iterator.h>  // for make_counting_iterator\n\n#include \"../common/cuda_context.cuh\"\n#include \"xgboost/linalg.h\"  // for UnravelIndex\n\nnamespace xgboost::cuda_impl {\nvoid InitOutPredictions(Context const* ctx, linalg::VectorView<float const> base_score,\n                        linalg::MatrixView<float> predt) {\n  thrust::for_each_n(ctx->CUDACtx()->CTP(), thrust::make_counting_iterator(0ul), predt.Size(),\n                     [=] XGBOOST_DEVICE(std::size_t k) mutable {\n                       auto [i, j] = linalg::UnravelIndex(k, predt.Shape());\n                       predt(i, j) = base_score(j);\n                     });\n}\n}  // namespace xgboost::cuda_impl\n"
  },
  {
    "path": "src/predictor/treeshap.cc",
    "content": "/**\n * Copyright 2017-2025, XGBoost Contributors\n */\n#include \"treeshap.h\"\n\n#include <algorithm>  // copy\n#include <cstdint>    // std::uint32_t\n\n#include \"../tree/tree_view.h\"  // for ScalarTreeView\n#include \"predict_fn.h\"         // GetNextNode\n#include \"xgboost/base.h\"       // bst_node_t\n#include \"xgboost/logging.h\"\n#include \"xgboost/tree_model.h\"  // RegTree\n\nnamespace xgboost {\nvoid CalculateContributionsApprox(tree::ScalarTreeView const& tree, const RegTree::FVec& feat,\n                                  std::vector<float>* mean_values, float* out_contribs) {\n  CHECK_GT(mean_values->size(), 0U);\n  bst_feature_t split_index = 0;\n  // update bias value\n  float node_value = (*mean_values)[0];\n  out_contribs[feat.Size()] += node_value;\n  if (tree.IsLeaf(RegTree::kRoot)) {\n    // nothing to do anymore\n    return;\n  }\n\n  bst_node_t nidx = 0;\n  auto const& cats = tree.GetCategoriesMatrix();\n\n  while (!tree.IsLeaf(nidx)) {\n    split_index = tree.SplitIndex(nidx);\n    nidx = predictor::GetNextNode<true, true>(tree, nidx, feat.GetFvalue(split_index),\n                                              feat.IsMissing(split_index), cats);\n    bst_float new_value = (*mean_values)[nidx];\n    // update feature weight\n    out_contribs[split_index] += new_value - node_value;\n    node_value = new_value;\n  }\n  float leaf_value = tree.LeafValue(nidx);\n  // update leaf feature weight\n  out_contribs[split_index] += leaf_value - node_value;\n}\n\n// Used by TreeShap\n// data we keep about our decision path\n// note that pweight is included for convenience and is not tied with the other attributes\n// the pweight of the i'th path element is the permutation weight of paths with i-1 ones in them\nstruct PathElement {\n  int feature_index;\n  float zero_fraction;\n  float one_fraction;\n  float pweight;\n  PathElement() = default;\n  PathElement(int i, float z, float o, float w)\n      : feature_index(i), zero_fraction(z), one_fraction(o), pweight(w) {}\n};\n\n// extend our decision path with a fraction of one and zero extensions\nvoid ExtendPath(PathElement* unique_path, std::uint32_t unique_depth, float zero_fraction,\n                float one_fraction, int feature_index) {\n  unique_path[unique_depth].feature_index = feature_index;\n  unique_path[unique_depth].zero_fraction = zero_fraction;\n  unique_path[unique_depth].one_fraction = one_fraction;\n  unique_path[unique_depth].pweight = (unique_depth == 0 ? 1.0f : 0.0f);\n  for (int i = unique_depth - 1; i >= 0; i--) {\n    unique_path[i + 1].pweight +=\n        one_fraction * unique_path[i].pweight * (i + 1) / static_cast<float>(unique_depth + 1);\n    unique_path[i].pweight = zero_fraction * unique_path[i].pweight * (unique_depth - i) /\n                             static_cast<float>(unique_depth + 1);\n  }\n}\n\n// undo a previous extension of the decision path\nvoid UnwindPath(PathElement* unique_path, std::uint32_t unique_depth, std::uint32_t path_index) {\n  const float one_fraction = unique_path[path_index].one_fraction;\n  const float zero_fraction = unique_path[path_index].zero_fraction;\n  float next_one_portion = unique_path[unique_depth].pweight;\n\n  for (int i = unique_depth - 1; i >= 0; --i) {\n    if (one_fraction != 0) {\n      const float tmp = unique_path[i].pweight;\n      unique_path[i].pweight =\n          next_one_portion * (unique_depth + 1) / static_cast<float>((i + 1) * one_fraction);\n      next_one_portion = tmp - unique_path[i].pweight * zero_fraction * (unique_depth - i) /\n                                   static_cast<float>(unique_depth + 1);\n    } else {\n      unique_path[i].pweight = (unique_path[i].pweight * (unique_depth + 1)) /\n                               static_cast<float>(zero_fraction * (unique_depth - i));\n    }\n  }\n\n  for (auto i = path_index; i < unique_depth; ++i) {\n    unique_path[i].feature_index = unique_path[i + 1].feature_index;\n    unique_path[i].zero_fraction = unique_path[i + 1].zero_fraction;\n    unique_path[i].one_fraction = unique_path[i + 1].one_fraction;\n  }\n}\n\n// determine what the total permutation weight would be if\n// we unwound a previous extension in the decision path\nfloat UnwoundPathSum(const PathElement* unique_path, std::uint32_t unique_depth,\n                     std::uint32_t path_index) {\n  const float one_fraction = unique_path[path_index].one_fraction;\n  const float zero_fraction = unique_path[path_index].zero_fraction;\n  float next_one_portion = unique_path[unique_depth].pweight;\n  float total = 0;\n  for (int i = unique_depth - 1; i >= 0; --i) {\n    if (one_fraction != 0) {\n      const float tmp =\n          next_one_portion * (unique_depth + 1) / static_cast<float>((i + 1) * one_fraction);\n      total += tmp;\n      next_one_portion =\n          unique_path[i].pweight -\n          tmp * zero_fraction * ((unique_depth - i) / static_cast<float>(unique_depth + 1));\n    } else if (zero_fraction != 0) {\n      total += (unique_path[i].pweight / zero_fraction) /\n               ((unique_depth - i) / static_cast<float>(unique_depth + 1));\n    } else {\n      CHECK_EQ(unique_path[i].pweight, 0) << \"Unique path \" << i << \" must have zero weight\";\n    }\n  }\n  return total;\n}\n\n/**\n * \\brief Recursive function that computes the feature attributions for a single tree.\n * \\param feat dense feature vector, if the feature is missing the field is set to NaN\n * \\param phi dense output vector of feature attributions\n * \\param node_index the index of the current node in the tree\n * \\param unique_depth how many unique features are above the current node in the tree\n * \\param parent_unique_path a vector of statistics about our current path through the tree\n * \\param parent_zero_fraction what fraction of the parent path weight is coming as 0 (integrated)\n * \\param parent_one_fraction what fraction of the parent path weight is coming as 1 (fixed)\n * \\param parent_feature_index what feature the parent node used to split\n * \\param condition fix one feature to either off (-1) on (1) or not fixed (0 default)\n * \\param condition_feature the index of the feature to fix\n * \\param condition_fraction what fraction of the current weight matches our conditioning feature\n */\nvoid TreeShap(tree::ScalarTreeView const& tree, const RegTree::FVec& feat, float* phi,\n              bst_node_t nidx, std::uint32_t unique_depth, PathElement* parent_unique_path,\n              float parent_zero_fraction, float parent_one_fraction, int parent_feature_index,\n              int condition, std::uint32_t condition_feature, float condition_fraction) {\n  // stop if we have no weight coming down to us\n  if (condition_fraction == 0) return;\n\n  // extend the unique path\n  PathElement* unique_path = parent_unique_path + unique_depth + 1;\n  std::copy(parent_unique_path, parent_unique_path + unique_depth + 1, unique_path);\n\n  if (condition == 0 || condition_feature != static_cast<std::uint32_t>(parent_feature_index)) {\n    ExtendPath(unique_path, unique_depth, parent_zero_fraction, parent_one_fraction,\n               parent_feature_index);\n  }\n  const std::uint32_t split_index = tree.SplitIndex(nidx);\n\n  // leaf node\n  if (tree.IsLeaf(nidx)) {\n    for (std::uint32_t i = 1; i <= unique_depth; ++i) {\n      const float w = UnwoundPathSum(unique_path, unique_depth, i);\n      const PathElement& el = unique_path[i];\n      phi[el.feature_index] +=\n          w * (el.one_fraction - el.zero_fraction) * tree.LeafValue(nidx) * condition_fraction;\n    }\n\n    // internal node\n  } else {\n    // find which branch is \"hot\" (meaning x would follow it)\n    auto const& cats = tree.GetCategoriesMatrix();\n    bst_node_t hot_index = predictor::GetNextNode<true, true>(\n        tree, nidx, feat.GetFvalue(split_index), feat.IsMissing(split_index), cats);\n\n    const auto cold_index =\n        (hot_index == tree.LeftChild(nidx) ? tree.RightChild(nidx) : tree.LeftChild(nidx));\n    const float w = tree.Stat(nidx).sum_hess;\n    const float hot_zero_fraction = tree.Stat(hot_index).sum_hess / w;\n    const float cold_zero_fraction = tree.Stat(cold_index).sum_hess / w;\n    float incoming_zero_fraction = 1;\n    float incoming_one_fraction = 1;\n\n    // see if we have already split on this feature,\n    // if so we undo that split so we can redo it for this node\n    std::uint32_t path_index = 0;\n    for (; path_index <= unique_depth; ++path_index) {\n      if (static_cast<std::uint32_t>(unique_path[path_index].feature_index) == split_index) break;\n    }\n    if (path_index != unique_depth + 1) {\n      incoming_zero_fraction = unique_path[path_index].zero_fraction;\n      incoming_one_fraction = unique_path[path_index].one_fraction;\n      UnwindPath(unique_path, unique_depth, path_index);\n      unique_depth -= 1;\n    }\n\n    // divide up the condition_fraction among the recursive calls\n    float hot_condition_fraction = condition_fraction;\n    float cold_condition_fraction = condition_fraction;\n    if (condition > 0 && split_index == condition_feature) {\n      cold_condition_fraction = 0;\n      unique_depth -= 1;\n    } else if (condition < 0 && split_index == condition_feature) {\n      hot_condition_fraction *= hot_zero_fraction;\n      cold_condition_fraction *= cold_zero_fraction;\n      unique_depth -= 1;\n    }\n\n    TreeShap(tree, feat, phi, hot_index, unique_depth + 1, unique_path,\n             hot_zero_fraction * incoming_zero_fraction, incoming_one_fraction, split_index,\n             condition, condition_feature, hot_condition_fraction);\n\n    TreeShap(tree, feat, phi, cold_index, unique_depth + 1, unique_path,\n             cold_zero_fraction * incoming_zero_fraction, 0, split_index, condition,\n             condition_feature, cold_condition_fraction);\n  }\n}\n\nvoid CalculateContributions(tree::ScalarTreeView const& tree, const RegTree::FVec& feat,\n                            std::vector<float>* mean_values, float* out_contribs, int condition,\n                            std::uint32_t condition_feature) {\n  // find the expected value of the tree's predictions\n  if (condition == 0) {\n    float node_value = (*mean_values)[0];\n    out_contribs[feat.Size()] += node_value;\n  }\n\n  // Preallocate space for the unique path data\n  bst_node_t const maxd = tree.MaxDepth() + 2;\n  std::vector<PathElement> unique_path_data((maxd * (maxd + 1)) / 2);\n\n  TreeShap(tree, feat, out_contribs, 0, 0, unique_path_data.data(), 1, 1, -1, condition,\n           condition_feature, 1);\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "src/predictor/treeshap.h",
    "content": "/**\n * Copyright 2017-2025, XGBoost Contributors\n */\n#pragma once\n\n#include <vector>  // for vector\n\n#include \"xgboost/tree_model.h\"  // for RegTree\n\nnamespace xgboost {\n/**\n * @brief calculate the approximate feature contributions for the given root\n *\n *   This follows the idea of http://blog.datadive.net/interpreting-random-forests/\n *\n * @param feat dense feature vector, if the feature is missing the field is set to NaN\n * @param out_contribs output vector to hold the contributions\n */\nvoid CalculateContributionsApprox(tree::ScalarTreeView const& tree, const RegTree::FVec& feat,\n                                  std::vector<float>* mean_values, float* out_contribs);\n\n/**\n * @brief calculate the feature contributions (https://arxiv.org/abs/1706.06060) for the tree\n *\n * @param feat dense feature vector, if the feature is missing the field is set to NaN\n * @param out_contribs output vector to hold the contributions\n * @param condition fix one feature to either off (-1) on (1) or not fixed (0 default)\n * @param condition_feature the index of the feature to fix\n */\nvoid CalculateContributions(tree::ScalarTreeView const& tree, const RegTree::FVec& feat,\n                            std::vector<float>* mean_values, float* out_contribs, int condition,\n                            unsigned condition_feature);\n}  // namespace xgboost\n"
  },
  {
    "path": "src/predictor/utils.h",
    "content": "/**\n * Copyright 2017-2025, XGBoost Contributors\n */\n#pragma once\n#include <memory>  // for shared_ptr\n\n#include \"../data/proxy_dmatrix.h\"  // for DMatrixProxy\n#include \"xgboost/data.h\"           // for DMatrix\n#include \"xgboost/learner.h\"        // LearnerModelParam\n\nnamespace xgboost::predictor {\ntemplate <typename Adapter>\nvoid CheckProxyDMatrix(std::shared_ptr<Adapter> m, data::DMatrixProxy const* proxy,\n                       LearnerModelParam const* p) {\n  CHECK(proxy);\n  CHECK(!proxy->Info().IsColumnSplit())\n      << \"Inplace predict support for column-wise data split is not yet implemented.\";\n  auto n_features_data = m->NumColumns();\n  auto n_features_model = p->num_feature;\n  CHECK_EQ(n_features_data, n_features_model)\n      << \"Number of columns in data must equal to the trained model.\";\n  CHECK_EQ(proxy->Info().num_row_, m->NumRows());\n  CHECK_EQ(proxy->Info().num_col_, m->NumColumns());\n  CHECK_EQ(proxy->Info().num_nonzero_, 0);  // unknown\n}\n}  // namespace xgboost::predictor\n"
  },
  {
    "path": "src/tree/common_row_partitioner.h",
    "content": "/**\n * Copyright 2021-2023, XGBoost contributors\n * \\file common_row_partitioner.h\n * \\brief Common partitioner logic for hist and approx methods.\n */\n#ifndef XGBOOST_TREE_COMMON_ROW_PARTITIONER_H_\n#define XGBOOST_TREE_COMMON_ROW_PARTITIONER_H_\n\n#include <algorithm>  // for all_of, fill\n#include <cstdint>    // for uint32_t, int32_t\n#include <limits>     // for numeric_limits\n#include <vector>     // for vector\n\n#include \"../collective/allreduce.h\"      // for Allreduce\n#include \"../common/bitfield.h\"           // for RBitField8\n#include \"../common/linalg_op.h\"          // for cbegin\n#include \"../common/numeric.h\"            // for Iota\n#include \"../common/partition_builder.h\"  // for PartitionBuilder\n#include \"../common/row_set.h\"            // for RowSetCollection\n#include \"../common/threading_utils.h\"    // for ParallelFor2d\n#include \"tree_view.h\"                    // for ScalarTreeView\n#include \"xgboost/base.h\"                 // for bst_idx_t\n#include \"xgboost/collective/result.h\"    // for Success, SafeColl\n#include \"xgboost/context.h\"              // for Context\n#include \"xgboost/linalg.h\"               // for TensorView\n#include \"xgboost/span.h\"                 // for Span\n\nnamespace xgboost::tree {\n\nstatic constexpr size_t kPartitionBlockSize = 2048;\n\nclass ColumnSplitHelper {\n public:\n  ColumnSplitHelper() = default;\n\n  ColumnSplitHelper(bst_idx_t num_row,\n                    common::PartitionBuilder<kPartitionBlockSize>* partition_builder,\n                    common::RowSetCollection* row_set_collection)\n      : partition_builder_{partition_builder}, row_set_collection_{row_set_collection} {\n    auto n_bytes = BitVector::ComputeStorageSize(num_row);\n    decision_storage_.resize(n_bytes);\n    decision_bits_ = BitVector{common::Span<BitVector::value_type>{decision_storage_}};\n    missing_storage_.resize(n_bytes);\n    missing_bits_ = BitVector{common::Span<BitVector::value_type>{missing_storage_}};\n  }\n\n  template <typename BinIdxType, bool any_missing, bool any_cat, typename ExpandEntry,\n            typename TreeView>\n  void Partition(Context const* ctx, common::BlockedSpace2d const& space, std::int32_t n_threads,\n                 GHistIndexMatrix const& gmat, common::ColumnMatrix const& column_matrix,\n                 std::vector<ExpandEntry> const& nodes,\n                 std::vector<std::int32_t> const& split_conditions, TreeView const& tree) {\n    // When data is split by column, we don't have all the feature values in the local worker, so\n    // we first collect all the decisions and whether the feature is missing into bit vectors.\n    std::fill(decision_storage_.begin(), decision_storage_.end(), 0);\n    std::fill(missing_storage_.begin(), missing_storage_.end(), 0);\n\n    this->tloc_decision_.resize(decision_storage_.size() * n_threads);\n    this->tloc_missing_.resize(decision_storage_.size() * n_threads);\n    std::fill_n(this->tloc_decision_.data(), this->tloc_decision_.size(), 0);\n    std::fill_n(this->tloc_missing_.data(), this->tloc_missing_.size(), 0);\n\n    // Make thread-local storage.\n    using T = decltype(decision_storage_)::value_type;\n    auto make_tloc = [&](std::vector<T>& storage, std::int32_t tidx) {\n      auto span = common::Span<T>{storage};\n      auto n = decision_storage_.size();\n      auto bitvec = BitVector{span.subspan(n * tidx, n)};\n      return bitvec;\n    };\n\n    common::ParallelFor2d(space, n_threads, [&](std::size_t node_in_set, common::Range1d r) {\n      bst_node_t const nid = nodes[node_in_set].nid;\n      auto tidx = omp_get_thread_num();\n      auto decision = make_tloc(this->tloc_decision_, tidx);\n      auto missing = make_tloc(this->tloc_missing_, tidx);\n      bst_bin_t split_cond = column_matrix.IsInitialized() ? split_conditions[node_in_set] : 0;\n      partition_builder_->MaskRows<BinIdxType, any_missing, any_cat>(\n          node_in_set, nodes, r, split_cond, gmat, column_matrix, tree,\n          (*row_set_collection_)[nid].begin(), &decision, &missing);\n    });\n\n    // Reduce thread local\n    auto decision = make_tloc(this->tloc_decision_, 0);\n    auto missing = make_tloc(this->tloc_missing_, 0);\n    for (std::int32_t tidx = 1; tidx < n_threads; ++tidx) {\n      decision |= make_tloc(this->tloc_decision_, tidx);\n      missing |= make_tloc(this->tloc_missing_, tidx);\n    }\n    CHECK_EQ(decision_storage_.size(), decision.NumValues());\n    std::copy_n(decision.Data(), decision_storage_.size(), decision_storage_.data());\n    std::copy_n(missing.Data(), missing_storage_.size(), missing_storage_.data());\n\n    // Then aggregate the bit vectors across all the workers.\n    auto rc = collective::Success() << [&] {\n      return collective::Allreduce(ctx, &decision_storage_, collective::Op::kBitwiseOR);\n    } << [&] {\n      return collective::Allreduce(ctx, &missing_storage_, collective::Op::kBitwiseAND);\n    };\n    collective::SafeColl(rc);\n\n    // Finally use the bit vectors to partition the rows.\n    common::ParallelFor2d(space, n_threads, [&](size_t node_in_set, common::Range1d r) {\n      size_t begin = r.begin();\n      const int32_t nid = nodes[node_in_set].nid;\n      const size_t task_id = partition_builder_->GetTaskIdx(node_in_set, begin);\n      partition_builder_->AllocateForTask(task_id);\n      partition_builder_->PartitionByMask(node_in_set, nodes, r, gmat, tree,\n                                          (*row_set_collection_)[nid].begin(), decision_bits_,\n                                          missing_bits_);\n    });\n  }\n\n private:\n  using BitVector = RBitField8;\n  std::vector<BitVector::value_type> decision_storage_{};\n  BitVector decision_bits_{};\n  std::vector<BitVector::value_type> missing_storage_{};\n  BitVector missing_bits_{};\n\n  std::vector<BitVector::value_type> tloc_decision_;\n  std::vector<BitVector::value_type> tloc_missing_;\n\n  common::PartitionBuilder<kPartitionBlockSize>* partition_builder_;\n  common::RowSetCollection* row_set_collection_;\n};\n\nclass CommonRowPartitioner {\n public:\n  bst_idx_t base_rowid = 0;\n\n  CommonRowPartitioner() = default;\n  CommonRowPartitioner(Context const* ctx, bst_idx_t num_row, bst_idx_t _base_rowid,\n                       bool is_col_split)\n      : base_rowid{_base_rowid}, is_col_split_{is_col_split} {\n    Reset(ctx, num_row, _base_rowid, is_col_split);\n  }\n\n  void Reset(Context const* ctx, bst_idx_t num_row, bst_idx_t _base_rowid, bool is_col_split) {\n    base_rowid = _base_rowid;\n    is_col_split_ = is_col_split;\n\n    std::vector<bst_idx_t>& row_indices = *row_set_collection_.Data();\n    row_indices.resize(num_row);\n\n    bst_idx_t* p_row_indices = row_indices.data();\n    common::Iota(ctx, p_row_indices, p_row_indices + num_row, base_rowid);\n\n    row_set_collection_.Clear();\n    row_set_collection_.Init();\n\n    if (is_col_split_) {\n      column_split_helper_ = ColumnSplitHelper{num_row, &partition_builder_, &row_set_collection_};\n    }\n  }\n\n  /* Making GHistIndexMatrix_t a templete parameter allows reuse this function for sycl-plugin */\n  template <typename ExpandEntry, typename GHistIndexMatrixT, typename TreeView>\n  static void FindSplitConditions(const std::vector<ExpandEntry>& nodes, TreeView const& tree,\n                                  GHistIndexMatrixT const& gmat,\n                                  std::vector<int32_t>* p_split_conditions) {\n    auto const& ptrs = gmat.cut.Ptrs();\n    auto const& vals = gmat.cut.Values();\n    auto& split_conditions = *p_split_conditions;\n\n    for (std::size_t i = 0; i < nodes.size(); ++i) {\n      bst_node_t const nidx = nodes[i].nid;\n      bst_feature_t const fidx = tree.SplitIndex(nidx);\n      float const split_pt = tree.SplitCond(nidx);\n      std::uint32_t const lower_bound = ptrs[fidx];\n      std::uint32_t const upper_bound = ptrs[fidx + 1];\n      bst_bin_t split_cond = -1;\n      // convert floating-point split_pt into corresponding bin_id\n      // split_cond = -1 indicates that split_pt is less than all known cut points\n      CHECK_LT(upper_bound, static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));\n      for (auto bound = lower_bound; bound < upper_bound; ++bound) {\n        if (split_pt == vals[bound]) {\n          split_cond = static_cast<bst_bin_t>(bound);\n        }\n      }\n      split_conditions[i] = split_cond;\n    }\n  }\n\n  template <typename ExpandEntry, typename TreeView>\n  void AddSplitsToRowSet(const std::vector<ExpandEntry>& nodes, TreeView const& tree) {\n    const size_t n_nodes = nodes.size();\n    for (unsigned int i = 0; i < n_nodes; ++i) {\n      const int32_t nidx = nodes[i].nid;\n      const size_t n_left = partition_builder_.GetNLeftElems(i);\n      const size_t n_right = partition_builder_.GetNRightElems(i);\n      CHECK_EQ(tree.LeftChild(nidx) + 1, tree.RightChild(nidx));\n      row_set_collection_.AddSplit(nidx, tree.LeftChild(nidx), tree.RightChild(nidx), n_left,\n                                   n_right);\n    }\n  }\n\n  template <typename ExpandEntry, typename TreeView>\n  void UpdatePosition(Context const* ctx, GHistIndexMatrix const& gmat,\n                      std::vector<ExpandEntry> const& nodes, TreeView const& tree) {\n    auto const& column_matrix = gmat.Transpose();\n    if (column_matrix.IsInitialized()) {\n      if (gmat.cut.HasCategorical()) {\n        this->template UpdatePosition<true>(ctx, gmat, column_matrix, nodes, tree);\n      } else {\n        this->template UpdatePosition<false>(ctx, gmat, column_matrix, nodes, tree);\n      }\n    } else {\n      /* ColumnMatrix is not initilized.\n       * It means that we use 'approx' method.\n       * any_missing and any_cat don't metter in this case.\n       * Jump directly to the main method.\n       */\n      this->template UpdatePosition<uint8_t, true, true>(ctx, gmat, column_matrix, nodes, tree);\n    }\n  }\n\n  template <bool any_cat, typename ExpandEntry, typename TreeView>\n  void UpdatePosition(Context const* ctx, GHistIndexMatrix const& gmat,\n                      const common::ColumnMatrix& column_matrix,\n                      std::vector<ExpandEntry> const& nodes, TreeView const& tree) {\n    if (column_matrix.AnyMissing()) {\n      this->template UpdatePosition<true, any_cat>(ctx, gmat, column_matrix, nodes, tree);\n    } else {\n      this->template UpdatePosition<false, any_cat>(ctx, gmat, column_matrix, nodes, tree);\n    }\n  }\n\n  template <bool any_missing, bool any_cat, typename ExpandEntry, typename TreeView>\n  void UpdatePosition(Context const* ctx, GHistIndexMatrix const& gmat,\n                      const common::ColumnMatrix& column_matrix,\n                      std::vector<ExpandEntry> const& nodes, TreeView const& tree) {\n    common::DispatchBinType(column_matrix.GetTypeSize(), [&](auto t) {\n      using T = decltype(t);\n      this->template UpdatePosition<T, any_missing, any_cat>(ctx, gmat, column_matrix, nodes, tree);\n    });\n  }\n\n  template <typename BinIdxType, bool any_missing, bool any_cat, typename ExpandEntry,\n            typename TreeView>\n  void UpdatePosition(Context const* ctx, GHistIndexMatrix const& gmat,\n                      const common::ColumnMatrix& column_matrix,\n                      std::vector<ExpandEntry> const& nodes, TreeView const& tree) {\n    // 1. Find split condition for each split\n    size_t n_nodes = nodes.size();\n\n    std::vector<bst_bin_t> split_conditions;\n    if (column_matrix.IsInitialized()) {\n      split_conditions.resize(n_nodes);\n      FindSplitConditions(nodes, tree, gmat, &split_conditions);\n    }\n\n    // 2.1 Create a blocked space of size SUM(samples in each node)\n    common::BlockedSpace2d space(\n        n_nodes,\n        [&](std::size_t node_in_set) {\n          auto nid = nodes[node_in_set].nid;\n          return row_set_collection_[nid].Size();\n        },\n        kPartitionBlockSize);\n\n    // 2.2 Initialize the partition builder\n    // allocate buffers for storage intermediate results by each thread\n    partition_builder_.Init(space.Size(), n_nodes, [&](size_t node_in_set) {\n      const int32_t nid = nodes[node_in_set].nid;\n      const size_t size = row_set_collection_[nid].Size();\n      const size_t n_tasks = size / kPartitionBlockSize + !!(size % kPartitionBlockSize);\n      return n_tasks;\n    });\n    CHECK_EQ(base_rowid, gmat.base_rowid);\n\n    // 2.3 Split elements of row_set_collection_ to left and right child-nodes for each node\n    // Store results in intermediate buffers from partition_builder_\n    if (is_col_split_) {\n      column_split_helper_.Partition<BinIdxType, any_missing, any_cat>(\n          ctx, space, ctx->Threads(), gmat, column_matrix, nodes, split_conditions, tree);\n    } else {\n      common::ParallelFor2d(space, ctx->Threads(), [&](size_t node_in_set, common::Range1d r) {\n        size_t begin = r.begin();\n        const int32_t nid = nodes[node_in_set].nid;\n        const size_t task_id = partition_builder_.GetTaskIdx(node_in_set, begin);\n        partition_builder_.AllocateForTask(task_id);\n        bst_bin_t split_cond = column_matrix.IsInitialized() ? split_conditions[node_in_set] : 0;\n        partition_builder_.template Partition<BinIdxType, any_missing, any_cat>(\n            node_in_set, nodes, r, split_cond, gmat, column_matrix, tree,\n            row_set_collection_[nid].begin());\n      });\n    }\n\n    // 3. Compute offsets to copy blocks of row-indexes\n    // from partition_builder_ to row_set_collection_\n    partition_builder_.CalculateRowOffsets();\n\n    // 4. Copy elements from partition_builder_ to row_set_collection_ back\n    // with updated row-indexes for each tree-node\n    common::ParallelFor2d(space, ctx->Threads(), [&](size_t node_in_set, common::Range1d r) {\n      const int32_t nid = nodes[node_in_set].nid;\n      partition_builder_.MergeToArray(node_in_set, r.begin(), row_set_collection_[nid].begin());\n    });\n\n    // 5. Add info about splits into row_set_collection_\n    AddSplitsToRowSet(nodes, tree);\n  }\n\n  [[nodiscard]] auto const& Partitions() const { return row_set_collection_; }\n\n  [[nodiscard]] std::size_t Size() const {\n    return std::distance(row_set_collection_.begin(), row_set_collection_.end());\n  }\n\n  auto& operator[](bst_node_t nidx) { return row_set_collection_[nidx]; }\n  auto const& operator[](bst_node_t nidx) const { return row_set_collection_[nidx]; }\n\n  void LeafPartition(Context const* ctx, ScalarTreeView const& tree, common::Span<float const> hess,\n                     common::Span<bst_node_t> out_position) const {\n    partition_builder_.LeafPartition(\n        ctx, tree, this->Partitions(), out_position,\n        [&](size_t idx) -> bool { return hess[idx - this->base_rowid] - .0f == .0f; });\n  }\n\n  template <typename TreeView>\n  void LeafPartition(Context const* ctx, TreeView const& tree,\n                     linalg::MatrixView<GradientPair const> gpair,\n                     common::Span<bst_node_t> out_position) const {\n    if (gpair.Shape(1) > 1) {\n      partition_builder_.LeafPartition(\n          ctx, tree, this->Partitions(), out_position, [&](std::size_t idx) -> bool {\n            auto sample = gpair.Slice(idx - this->base_rowid, linalg::All());\n            return std::all_of(linalg::cbegin(sample), linalg::cend(sample),\n                               [](GradientPair const& g) { return g.GetHess() - .0f == .0f; });\n          });\n    } else {\n      auto s = gpair.Slice(linalg::All(), 0);\n      partition_builder_.LeafPartition(ctx, tree, this->Partitions(), out_position,\n                                       [&](std::size_t idx) -> bool {\n                                         return s(idx - this->base_rowid).GetHess() - .0f == .0f;\n                                       });\n    }\n  }\n\n private:\n  common::PartitionBuilder<kPartitionBlockSize> partition_builder_;\n  common::RowSetCollection row_set_collection_;\n  bool is_col_split_;\n  ColumnSplitHelper column_split_helper_;\n};\n\n}  // namespace xgboost::tree\n#endif  // XGBOOST_TREE_COMMON_ROW_PARTITIONER_H_\n"
  },
  {
    "path": "src/tree/constraints.cc",
    "content": "/*!\n * Copyright 2018-2019 by Contributors\n */\n#include <algorithm>\n#include <unordered_set>\n#include <vector>\n\n#include \"xgboost/span.h\"\n#include \"xgboost/json.h\"\n#include \"constraints.h\"\n#include \"param.h\"\n\nnamespace xgboost {\nvoid FeatureInteractionConstraintHost::Configure(tree::TrainParam const& param,\n                                                 bst_feature_t const n_features) {\n  if (param.interaction_constraints.empty()) {\n    enabled_ = !param.interaction_constraints.empty();\n    return;  // short-circuit if no constraint is specified\n  }\n  enabled_ = true;\n\n  this->interaction_constraint_str_ = param.interaction_constraints;\n  this->n_features_ = n_features;\n  this->Reset();\n}\n\nvoid FeatureInteractionConstraintHost::Reset() {\n  if (!enabled_) {\n    return;\n  }\n  // Read std::vector<std::vector<bst_feature_t>> first and then\n  //   convert to std::vector<std::unordered_set<bst_feature_t>>\n  std::vector<std::vector<bst_feature_t>> tmp;\n  try {\n    ParseInteractionConstraint(this->interaction_constraint_str_, &tmp);\n  } catch (dmlc::Error const &e) {\n    LOG(FATAL) << \"Failed to parse feature interaction constraint:\\n\"\n               << this->interaction_constraint_str_ << \"\\n\"\n               << \"With error:\\n\" << e.what();\n  }\n  for (const auto& e : tmp) {\n    interaction_constraints_.emplace_back(e.begin(), e.end());\n  }\n\n  // Initialise interaction constraints record with all variables permitted for the first node\n  node_constraints_.clear();\n  node_constraints_.resize(1, std::unordered_set<bst_feature_t>());\n  node_constraints_[0].reserve(n_features_);\n  for (bst_feature_t i = 0; i < n_features_; ++i) {\n    node_constraints_[0].insert(i);\n  }\n\n  // Initialise splits record\n  splits_.clear();\n  splits_.resize(1, std::unordered_set<bst_feature_t>());\n}\n\nvoid FeatureInteractionConstraintHost::SplitImpl(\n    bst_node_t node_id, bst_feature_t feature_id, bst_node_t left_id, bst_node_t right_id) {\n  bst_node_t newsize = std::max(left_id, right_id) + 1;\n\n  // Record previous splits for child nodes\n  auto feature_splits = splits_[node_id];  // fid history of current node\n  feature_splits.insert(feature_id);  // add feature of current node\n  splits_.resize(newsize);\n  splits_[left_id] = feature_splits;\n  splits_[right_id] = feature_splits;\n\n  // Resize constraints record, initialise all features to be not permitted for new nodes\n  CHECK_NE(newsize, 0);\n  node_constraints_.resize(newsize, std::unordered_set<bst_feature_t>());\n\n  // Permit features used in previous splits\n  for (bst_feature_t fid : feature_splits) {\n    node_constraints_[left_id].insert(fid);\n    node_constraints_[right_id].insert(fid);\n  }\n\n  // Loop across specified interactions in constraints\n  for (const auto &constraint : interaction_constraints_) {\n    // flags whether the specified interaction is still relevant\n    bst_uint flag = 1;\n\n    // Test relevance of specified interaction by checking all previous\n    // features are included\n    for (bst_uint checkvar : feature_splits) {\n      if (constraint.count(checkvar) == 0) {\n        flag = 0;\n        break;   // interaction is not relevant due to unmet constraint\n      }\n    }\n\n    // If interaction is still relevant, permit all other features in the\n    // interaction\n    if (flag == 1) {\n      for (bst_uint k : constraint) {\n        node_constraints_[left_id].insert(k);\n        node_constraints_[right_id].insert(k);\n      }\n    }\n  }\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "src/tree/constraints.cu",
    "content": "/**\n * Copyright 2019-2026, XGBoost contributors\n */\n#include <thrust/copy.h>\n#include <thrust/device_vector.h>\n#include <thrust/execution_policy.h>\n#include <thrust/iterator/counting_iterator.h>\n\n#include <set>\n#include <string>\n\n#include \"../common/cuda_context.cuh\"  // for CUDAContext\n#include \"../common/device_helpers.cuh\"\n#include \"constraints.cuh\"\n#include \"param.h\"\n#include \"xgboost/logging.h\"\n#include \"xgboost/span.h\"\n\nnamespace xgboost {\n\nsize_t FeatureInteractionConstraintDevice::Features() const {\n  return d_sets_ptr_.size() - 1;\n}\n\nvoid FeatureInteractionConstraintDevice::Configure(\n    tree::TrainParam const& param, int32_t const n_features) {\n  has_constraint_ = true;\n  if (param.interaction_constraints.length() == 0) {\n    has_constraint_ = false;\n    return;\n  }\n  // --- Parse interaction constraints\n  // Interaction constraints parsed from string parameter.  After\n  // parsing, this looks like {{0, 1, 2}, {2, 3 ,4}}.\n  std::vector<std::vector<bst_feature_t>> h_feature_constraints;\n  try {\n    ParseInteractionConstraint(param.interaction_constraints, &h_feature_constraints);\n  } catch (dmlc::Error const& e) {\n    LOG(FATAL) << \"Failed to parse feature interaction constraint:\\n\"\n               << param.interaction_constraints << \"\\n\"\n               << \"With error:\\n\" << e.what();\n  }\n  n_sets_ = h_feature_constraints.size();\n\n  size_t const n_feat_storage = LBitField64::ComputeStorageSize(n_features);\n  if (n_feat_storage == 0 && n_features != 0) {\n    LOG(FATAL) << \"Wrong storage size, n_features: \" << n_features;\n  }\n\n  // --- Initialize allowed features attached to nodes.\n  int32_t n_nodes { param.MaxNodes() };\n  node_constraints_.resize(n_nodes);\n  node_constraints_storage_.resize(n_nodes);\n  for (auto& n : node_constraints_storage_) {\n    n.resize(LBitField64::ComputeStorageSize(n_features));\n  }\n  for (size_t i = 0; i < node_constraints_storage_.size(); ++i) {\n    auto span = dh::ToSpan(node_constraints_storage_[i]);\n    node_constraints_[i] = LBitField64(span);\n  }\n  s_node_constraints_ = common::Span<LBitField64>(node_constraints_.data(),\n                                               node_constraints_.size());\n\n  // Represent constraints as CSR format, flatten is the value vector,\n  // ptr is row_ptr vector in CSR.\n  std::vector<uint32_t> h_feature_constraints_flatten;\n  for (auto const& constraints : h_feature_constraints) {\n    for (uint32_t c : constraints) {\n      h_feature_constraints_flatten.emplace_back(c);\n    }\n  }\n  std::vector<size_t> h_feature_constraints_ptr;\n  size_t n_features_in_constraints = 0;\n  h_feature_constraints_ptr.emplace_back(n_features_in_constraints);\n  for (auto const& v : h_feature_constraints) {\n    n_features_in_constraints += v.size();\n    h_feature_constraints_ptr.emplace_back(n_features_in_constraints);\n  }\n  // Copy the CSR to device.\n  d_fconstraints_.resize(h_feature_constraints_flatten.size());\n  thrust::copy(h_feature_constraints_flatten.cbegin(), h_feature_constraints_flatten.cend(),\n               d_fconstraints_.begin());\n  s_fconstraints_ = dh::ToSpan(d_fconstraints_);\n  d_fconstraints_ptr_.resize(h_feature_constraints_ptr.size());\n  thrust::copy(h_feature_constraints_ptr.cbegin(), h_feature_constraints_ptr.cend(),\n               d_fconstraints_ptr_.begin());\n  s_fconstraints_ptr_ = dh::ToSpan(d_fconstraints_ptr_);\n\n  // --- Compute interaction sets attached to each feature.\n  // Use a set to eliminate duplicated entries.\n  std::vector<std::set<int32_t> > h_features_set(n_features);\n  int32_t cid = 0;\n  for (auto const& constraints : h_feature_constraints) {\n    for (auto const& feat : constraints) {\n      h_features_set.at(feat).insert(cid);\n    }\n    cid++;\n  }\n  // Compute device sets.\n  std::vector<int32_t> h_sets;\n  int32_t ptr = 0;\n  std::vector<int32_t> h_sets_ptr {ptr};\n  for (auto const& feature : h_features_set) {\n    for (auto constraint_id : feature) {\n      h_sets.emplace_back(constraint_id);\n    }\n    // empty set is well defined here.\n    ptr += feature.size();\n    h_sets_ptr.emplace_back(ptr);\n  }\n  d_sets_ = h_sets;\n  d_sets_ptr_ = h_sets_ptr;\n  s_sets_ = dh::ToSpan(d_sets_);\n  s_sets_ptr_ = dh::ToSpan(d_sets_ptr_);\n\n  d_feature_buffer_storage_.resize(LBitField64::ComputeStorageSize(n_features));\n  feature_buffer_ = LBitField64{dh::ToSpan(d_feature_buffer_storage_)};\n\n  // --- Initialize result buffers.\n  output_buffer_bits_storage_.resize(LBitField64::ComputeStorageSize(n_features));\n  output_buffer_bits_ = LBitField64(dh::ToSpan(output_buffer_bits_storage_));\n  input_buffer_bits_storage_.resize(LBitField64::ComputeStorageSize(n_features));\n  input_buffer_bits_ = LBitField64(dh::ToSpan(input_buffer_bits_storage_));\n  result_buffer_.resize(n_features);\n  s_result_buffer_ = dh::ToSpan(result_buffer_);\n}\n\nFeatureInteractionConstraintDevice::FeatureInteractionConstraintDevice(\n    tree::TrainParam const& param, int32_t const n_features) :\n    has_constraint_{true}, n_sets_{0} {\n  this->Configure(param, n_features);\n}\n\nvoid FeatureInteractionConstraintDevice::Reset(Context const* ctx) {\n  for (auto& node : node_constraints_storage_) {\n    thrust::fill(ctx->CUDACtx()->CTP(), node.begin(), node.end(), 0);\n  }\n}\n\n__global__ void ClearBuffersKernel(\n    LBitField64 result_buffer_output, LBitField64 result_buffer_input) {\n  auto tid = blockIdx.x * blockDim.x + threadIdx.x;\n  if (tid < result_buffer_output.Capacity()) {\n    result_buffer_output.Clear(tid);\n  }\n  if (tid < result_buffer_input.Capacity()) {\n    result_buffer_input.Clear(tid);\n  }\n}\n\nvoid FeatureInteractionConstraintDevice::ClearBuffers() {\n  CHECK_EQ(output_buffer_bits_.Capacity(), input_buffer_bits_.Capacity());\n  CHECK_LE(feature_buffer_.Capacity(), output_buffer_bits_.Capacity());\n  uint32_t constexpr kBlockThreads = 256;\n  auto const n_grids = static_cast<uint32_t>(\n      common::DivRoundUp(input_buffer_bits_.Capacity(), kBlockThreads));\n  dh::LaunchKernel {n_grids, kBlockThreads} (\n      ClearBuffersKernel,\n      output_buffer_bits_, input_buffer_bits_);\n}\n\ncommon::Span<bst_feature_t> FeatureInteractionConstraintDevice::QueryNode(Context const* ctx,\n                                                                          bst_node_t node_id) {\n  if (!has_constraint_) { return {}; }\n  CHECK_LT(node_id, s_node_constraints_.size());\n\n  ClearBuffers();\n\n  thrust::counting_iterator<int32_t> begin(0);\n  thrust::counting_iterator<int32_t> end(result_buffer_.size());\n  auto p_result_buffer = result_buffer_.data();\n  LBitField64 node_constraints = s_node_constraints_[node_id];\n\n  thrust::device_ptr<bst_feature_t> const out_end = thrust::copy_if(\n      ctx->CUDACtx()->CTP(), begin, end, p_result_buffer, [=] __device__(int32_t pos) {\n        bool res = node_constraints.Check(pos);\n        return res;\n      });\n  size_t const n_available = std::distance(result_buffer_.data(), out_end);\n\n  return {s_result_buffer_.data(), s_result_buffer_.data() + n_available};\n}\n\n__global__ void SetInputBufferKernel(common::Span<bst_feature_t const> feature_list_input,\n                                     LBitField64 result_buffer_input) {\n  uint32_t tid = threadIdx.x + blockIdx.x * blockDim.x;\n  if (tid < feature_list_input.size()) {\n    result_buffer_input.Set(feature_list_input[tid]);\n  }\n}\n\n__global__ void QueryFeatureListKernel(LBitField64 node_constraints,\n                                       LBitField64 result_buffer_input,\n                                       LBitField64 result_buffer_output) {\n  result_buffer_output |= node_constraints;\n  result_buffer_output &= result_buffer_input;\n}\n\ncommon::Span<bst_feature_t const> FeatureInteractionConstraintDevice::Query(\n    common::Span<bst_feature_t const> feature_list, bst_node_t nidx) {\n  if (!has_constraint_ || nidx == 0) {\n    return feature_list;\n  }\n\n  ClearBuffers();\n\n  LBitField64 node_constraints = s_node_constraints_[nidx];\n  CHECK_EQ(input_buffer_bits_.Capacity(), output_buffer_bits_.Capacity());\n\n  uint32_t constexpr kBlockThreads = 256;\n  auto n_grids = static_cast<uint32_t>(\n      common::DivRoundUp(output_buffer_bits_.Capacity(), kBlockThreads));\n  dh::LaunchKernel {n_grids, kBlockThreads} (\n      SetInputBufferKernel,\n      feature_list, input_buffer_bits_);\n  dh::LaunchKernel {n_grids, kBlockThreads} (\n      QueryFeatureListKernel,\n      node_constraints, input_buffer_bits_, output_buffer_bits_);\n\n  thrust::counting_iterator<int32_t> begin(0);\n  thrust::counting_iterator<int32_t> end(result_buffer_.size());\n\n  LBitField64 local_result_buffer = output_buffer_bits_;\n\n  thrust::device_ptr<bst_feature_t> const out_end = thrust::copy_if(\n      thrust::device,\n      begin, end,\n      result_buffer_.data(),\n      [=]__device__(int32_t pos) {\n        bool res = local_result_buffer.Check(pos);\n        return res;\n      });\n  size_t const n_available = std::distance(result_buffer_.data(), out_end);\n\n  common::Span<bst_feature_t> result =\n      {s_result_buffer_.data(), s_result_buffer_.data() + n_available};\n  return result;\n}\n\n// Find interaction sets for each feature, then store all features in\n// those sets in a buffer.\n__global__ void RestoreFeatureListFromSetsKernel(\n    LBitField64 feature_buffer,\n\n    bst_feature_t fid,\n    common::Span<bst_feature_t> feature_interactions,\n    common::Span<size_t> feature_interactions_ptr,  // of size n interaction set + 1\n\n    common::Span<bst_feature_t> interactions_list,\n    common::Span<size_t> interactions_list_ptr) {\n  auto const tid_x = threadIdx.x + blockIdx.x * blockDim.x;\n  auto const tid_y = threadIdx.y + blockIdx.y * blockDim.y;\n  // painful mapping: fid -> sets related to it -> features related to sets.\n  auto const beg = interactions_list_ptr[fid];\n  auto const end = interactions_list_ptr[fid+1];\n  auto const n_sets = end - beg;\n  if (tid_x < n_sets) {\n    auto const set_id_pos = beg + tid_x;\n    auto const set_id = interactions_list[set_id_pos];\n    auto const set_beg = feature_interactions_ptr[set_id];\n    auto const set_end = feature_interactions_ptr[set_id + 1];\n    auto const feature_pos = set_beg + tid_y;\n    if (feature_pos < set_end) {\n      feature_buffer.Set(feature_interactions[feature_pos]);\n    }\n  }\n}\n\n__global__ void InteractionConstraintSplitKernel(LBitField64 feature,\n                                                 int32_t feature_id,\n                                                 LBitField64 node,\n                                                 LBitField64 left,\n                                                 LBitField64 right) {\n  auto tid = threadIdx.x + blockDim.x * blockIdx.x;\n  if (tid > node.Capacity()) {\n    return;\n  }\n  // enable constraints from feature\n  node |= feature;\n\n  // enable constraints from parent\n  left  |= node;\n  right |= node;\n\n  if (tid == feature_id) {\n    // enable the split feature, set all of them at last instead of\n    // setting it for parent to avoid race.\n    node.Set(feature_id);\n    left.Set(feature_id);\n    right.Set(feature_id);\n  }\n}\n\nvoid FeatureInteractionConstraintDevice::Split(\n    bst_node_t node_id, bst_feature_t feature_id, bst_node_t left_id, bst_node_t right_id) {\n  if (!has_constraint_) { return; }\n  CHECK_NE(node_id, left_id)\n      << \" Split node: \" << node_id << \" and its left child: \"\n      << left_id << \" cannot be the same.\";\n  CHECK_NE(node_id, right_id)\n      << \" Split node: \" << node_id << \" and its right child: \"\n      << right_id << \" cannot be the same.\";\n  CHECK_LT(right_id, s_node_constraints_.size());\n  CHECK_NE(s_node_constraints_.size(), 0);\n\n  LBitField64 node = s_node_constraints_[node_id];\n  LBitField64 left = s_node_constraints_[left_id];\n  LBitField64 right = s_node_constraints_[right_id];\n\n  dim3 const block3(16, 64, 1);\n  dim3 const grid3(common::DivRoundUp(n_sets_, 16),\n                   common::DivRoundUp(s_fconstraints_.size(), 64));\n  dh::LaunchKernel {grid3, block3} (\n      RestoreFeatureListFromSetsKernel,\n      feature_buffer_, feature_id,\n      s_fconstraints_, s_fconstraints_ptr_,\n      s_sets_, s_sets_ptr_);\n\n  uint32_t constexpr kBlockThreads = 256;\n  auto n_grids = static_cast<uint32_t>(common::DivRoundUp(node.Capacity(), kBlockThreads));\n\n  dh::LaunchKernel {n_grids, kBlockThreads} (\n      InteractionConstraintSplitKernel,\n      feature_buffer_,\n      feature_id,\n      node, left, right);\n\n  // clear the buffer after use\n  thrust::fill_n(dh::CachingThrustPolicy(), feature_buffer_.Data(), feature_buffer_.NumValues(), 0);\n}\n\n}  // namespace xgboost\n"
  },
  {
    "path": "src/tree/constraints.cuh",
    "content": "/**\n * Copyright 2019-2026, XGBoost contributors\n *\n * \\file Various constraints used in GPU_Hist.\n */\n#ifndef XGBOOST_TREE_CONSTRAINTS_H_\n#define XGBOOST_TREE_CONSTRAINTS_H_\n\n#include <dmlc/json.h>\n\n#include <vector>\n\n#include \"../common/bitfield.h\"\n#include \"../common/device_vector.cuh\"  // for device_vector\n#include \"constraints.h\"\n#include \"param.h\"\n#include \"xgboost/span.h\"\n\nnamespace xgboost {\n// Feature interaction constraints built for GPU Hist updater.\nstruct FeatureInteractionConstraintDevice {\n protected:\n  // Whether interaction constraint is used.\n  bool has_constraint_;\n  // n interaction sets.\n  size_t n_sets_;\n\n  // The parsed feature interaction constraints as CSR.\n  dh::device_vector<bst_feature_t> d_fconstraints_;\n  common::Span<bst_feature_t> s_fconstraints_;\n  dh::device_vector<size_t> d_fconstraints_ptr_;\n  common::Span<size_t> s_fconstraints_ptr_;\n  /* Interaction sets for each feature as CSR.  For an input like:\n   * [[0, 1], [1, 2]], this will have values:\n   *\n   * fid:                                |0 | 1  | 2|\n   * sets a feature belongs to(d_sets_): |0 |0, 1| 1|\n   *\n   * d_sets_ptr_:                        |0, 1, 3, 4|\n   */\n  dh::device_vector<bst_feature_t> d_sets_;\n  common::Span<bst_feature_t> s_sets_;\n  dh::device_vector<size_t> d_sets_ptr_;\n  common::Span<size_t> s_sets_ptr_;\n\n  // Allowed features attached to each node, have n_nodes bitfields,\n  // each of size n_features.\n  std::vector<dh::device_vector<LBitField64::value_type>> node_constraints_storage_;\n  std::vector<LBitField64> node_constraints_;\n  common::Span<LBitField64> s_node_constraints_;\n\n  // buffer storing return feature list from Query, of size n_features.\n  dh::device_vector<bst_feature_t> result_buffer_;\n  common::Span<bst_feature_t> s_result_buffer_;\n\n  // Temp buffers, one bit for each possible feature.\n  dh::device_vector<LBitField64::value_type> output_buffer_bits_storage_;\n  LBitField64 output_buffer_bits_;\n  dh::device_vector<LBitField64::value_type> input_buffer_bits_storage_;\n  LBitField64 input_buffer_bits_;\n  /*\n   * Combined features from all interaction sets that one feature belongs to.\n   * For an input with [[0, 1], [1, 2]], the feature 1 belongs to sets {0, 1}\n   */\n  dh::device_vector<LBitField64::value_type> d_feature_buffer_storage_;\n  LBitField64 feature_buffer_;  // of Size n features.\n\n  // Clear out all temp buffers except for `feature_buffer_', which is\n  // handled in `Split'.\n  void ClearBuffers();\n\n public:\n  size_t Features() const;\n  FeatureInteractionConstraintDevice() = default;\n  void Configure(tree::TrainParam const& param, int32_t const n_features);\n  FeatureInteractionConstraintDevice(tree::TrainParam const& param, int32_t const n_features);\n  FeatureInteractionConstraintDevice(FeatureInteractionConstraintDevice const& that) = default;\n  FeatureInteractionConstraintDevice(FeatureInteractionConstraintDevice&& that) = default;\n  /*! \\brief Reset before constructing a new tree. */\n  void Reset(Context const* ctx);\n  /*! \\brief Return a list of features given node id */\n  common::Span<bst_feature_t> QueryNode(Context const* ctx, bst_node_t nid);\n  /**\n   * @brief Return a list of selected features from given feature_list and node id.\n   *\n   * @param feature_list A list of features\n   * @param nidx node id\n   *\n   * @return A list of features picked from `feature_list' that conform to constraints in\n   * node.\n   */\n  common::Span<bst_feature_t const> Query(common::Span<bst_feature_t const> feature_list,\n                                          bst_node_t nidx);\n  /*! \\brief Apply split for node_id. */\n  void Split(bst_node_t node_id, bst_feature_t feature_id, bst_node_t left_id, bst_node_t right_id);\n};\n\n}      // namespace xgboost\n#endif  // XGBOOST_TREE_CONSTRAINTS_H_\n"
  },
  {
    "path": "src/tree/constraints.h",
    "content": "/**\n * Copyright 2018-2023 by Contributors\n */\n#ifndef XGBOOST_TREE_CONSTRAINTS_H_\n#define XGBOOST_TREE_CONSTRAINTS_H_\n\n#include <string>\n#include <unordered_set>\n#include <vector>\n\n#include \"param.h\"\n#include \"xgboost/base.h\"\n\nnamespace xgboost {\n/*!\n * \\brief Feature interaction constraint implementation for CPU tree updaters.\n *\n * The interface is similar to the one for GPU Hist.\n */\nclass FeatureInteractionConstraintHost {\n protected:\n  // interaction_constraints_[constraint_id] contains a single interaction\n  //   constraint, which specifies a group of feature IDs that can interact\n  //   with each other\n  std::vector< std::unordered_set<bst_feature_t> > interaction_constraints_;\n  // int_cont_[nid] contains the set of all feature IDs that are allowed to\n  //   be used for a split at node nid\n  std::vector< std::unordered_set<bst_feature_t> > node_constraints_;\n  // splits_[nid] contains the set of all feature IDs that have been used for\n  //   splits in node nid and its parents\n  std::vector< std::unordered_set<bst_feature_t> > splits_;\n  // string passed by user.\n  std::string interaction_constraint_str_;\n  // number of features in DMatrix/Booster\n  bst_feature_t n_features_;\n  bool enabled_{false};\n\n  void SplitImpl(int32_t node_id, bst_feature_t feature_id, bst_node_t left_id,\n                 bst_node_t right_id);\n\n public:\n  FeatureInteractionConstraintHost() = default;\n  void Split(int32_t node_id, bst_feature_t feature_id, bst_node_t left_id,\n             bst_node_t right_id) {\n    if (!enabled_) {\n      return;\n    } else {\n      this->SplitImpl(node_id, feature_id, left_id, right_id);\n    }\n  }\n\n  bool Query(bst_node_t nid, bst_feature_t fid) const {\n    if (!enabled_) { return true; }\n    return node_constraints_.at(nid).find(fid) != node_constraints_.at(nid).cend();\n  }\n\n  void Reset();\n\n  void Configure(tree::TrainParam const& param, bst_feature_t const n_features);\n};\n}  // namespace xgboost\n\n#endif  // XGBOOST_TREE_CONSTRAINTS_H_\n"
  },
  {
    "path": "src/tree/driver.h",
    "content": "/*!\n * Copyright 2021 by XGBoost Contributors\n */\n#ifndef XGBOOST_TREE_DRIVER_H_\n#define XGBOOST_TREE_DRIVER_H_\n#include <xgboost/span.h>\n#include <queue>\n#include <vector>\n#include \"./param.h\"\n\nnamespace xgboost {\nnamespace tree {\n\ntemplate <typename ExpandEntryT>\ninline bool DepthWise(const ExpandEntryT& lhs, const ExpandEntryT& rhs) {\n  return lhs.GetNodeId() > rhs.GetNodeId();  // favor small depth\n}\n\ntemplate <typename ExpandEntryT>\ninline bool LossGuide(const ExpandEntryT& lhs, const ExpandEntryT& rhs) {\n  if (lhs.GetLossChange() == rhs.GetLossChange()) {\n    return lhs.GetNodeId() > rhs.GetNodeId();  // favor small timestamp\n  } else {\n    return lhs.GetLossChange() < rhs.GetLossChange();  // favor large loss_chg\n  }\n}\n\n// Drives execution of tree building on device\ntemplate <typename ExpandEntryT>\nclass Driver {\n  using ExpandQueue =\n      std::priority_queue<ExpandEntryT, std::vector<ExpandEntryT>,\n                          std::function<bool(ExpandEntryT, ExpandEntryT)>>;\n\n public:\n  explicit Driver(TrainParam param, std::size_t max_node_batch_size = 256)\n      : param_(param),\n        max_node_batch_size_(max_node_batch_size),\n        queue_(param.grow_policy == TrainParam::kDepthWise ? DepthWise<ExpandEntryT>\n                                                           : LossGuide<ExpandEntryT>) {}\n  template <typename EntryIterT>\n  void Push(EntryIterT begin, EntryIterT end) {\n    for (auto it = begin; it != end; ++it) {\n      const ExpandEntryT& e = *it;\n      if (e.split.loss_chg > kRtEps) {\n        queue_.push(e);\n      }\n    }\n  }\n  void Push(const std::vector<ExpandEntryT> &entries) {\n    this->Push(entries.begin(), entries.end());\n  }\n  void Push(ExpandEntryT const& e) { queue_.push(e); }\n\n  bool IsEmpty() {\n    return queue_.empty();\n  }\n\n  // Can a child of this entry still be expanded?\n  // can be used to avoid extra work\n  bool IsChildValid(ExpandEntryT const& parent_entry) {\n    if (param_.max_depth > 0 && parent_entry.depth + 1 >= param_.max_depth) return false;\n    if (param_.max_leaves > 0 && num_leaves_ >= param_.max_leaves) return false;\n    return true;\n  }\n\n  // Return the set of nodes to be expanded\n  // This set has no dependencies between entries so they may be expanded in\n  // parallel or asynchronously\n  std::vector<ExpandEntryT> Pop() {\n    if (queue_.empty()) return {};\n    // Return a single entry for loss guided mode\n    if (param_.grow_policy == TrainParam::kLossGuide) {\n      ExpandEntryT e = queue_.top();\n      queue_.pop();\n\n      if (e.IsValid(param_, num_leaves_)) {\n        num_leaves_++;\n        return {e};\n      } else {\n        return {};\n      }\n    }\n    // Return nodes on same level for depth wise\n    std::vector<ExpandEntryT> result;\n    ExpandEntryT e = queue_.top();\n    int level = e.depth;\n    while (e.depth == level && !queue_.empty() && result.size() < max_node_batch_size_) {\n      queue_.pop();\n      if (e.IsValid(param_, num_leaves_)) {\n        num_leaves_++;\n        result.emplace_back(e);\n      }\n\n      if (!queue_.empty()) {\n        e = queue_.top();\n      }\n    }\n    return result;\n  }\n\n private:\n  TrainParam param_;\n  bst_node_t num_leaves_ = 1;\n  std::size_t max_node_batch_size_;\n  ExpandQueue queue_;\n};\n}  // namespace tree\n}  // namespace xgboost\n\n#endif  // XGBOOST_TREE_DRIVER_H_\n"
  },
  {
    "path": "src/tree/fit_stump.cc",
    "content": "/**\n * Copyright 2022-2026, XGBoost Contributors\n *\n * @brief Utilities for estimating initial score.\n */\n#include \"fit_stump.h\"\n\n#include <cstddef>  // for size_t\n#include <cstdint>  // for int32_t\n\n#include \"../collective/aggregator.h\"   // for GlobalSum\n#include \"../common/threading_utils.h\"  // for ParallelFor\n#include \"xgboost/base.h\"               // for bst_target_t, GradientPairPrecise\n#include \"xgboost/context.h\"            // for Context\n#include \"xgboost/linalg.h\"             // for MatrixView, Matrix, Constant\n#include \"xgboost/logging.h\"            // CHECK_EQ\n\n#if !defined(XGBOOST_USE_CUDA)\n#include \"../common/common.h\"  // AssertGPUSupport\n#endif\n\nnamespace xgboost::tree {\nnamespace cpu_impl {\nvoid SumGradients(Context const* ctx, linalg::MatrixView<GradientPair const> gpair,\n                  linalg::VectorView<GradientPairPrecise> out) {\n  auto n_targets = out.Size();\n  CHECK_EQ(n_targets, gpair.Shape(1));\n  linalg::Matrix<GradientPairPrecise> sum_tloc =\n      linalg::Constant(ctx, GradientPairPrecise{}, ctx->Threads(), n_targets);\n  auto h_sum_tloc = sum_tloc.HostView();\n  // first dim for gpair is samples, second dim is target.\n  // Reduce by column, parallel by samples\n  common::ParallelFor(gpair.Shape(0), ctx->Threads(), [&](auto i) {\n    for (bst_target_t t = 0; t < n_targets; ++t) {\n      h_sum_tloc(omp_get_thread_num(), t) += GradientPairPrecise{gpair(i, t)};\n    }\n  });\n  // Aggregate to the first row.\n  auto h_sum = h_sum_tloc.Slice(0, linalg::All());\n  for (std::int32_t i = 1, t = ctx->Threads(); i < t; ++i) {\n    for (bst_target_t j = 0; j < n_targets; ++j) {\n      h_sum(j) += h_sum_tloc(i, j);\n    }\n  }\n  for (std::size_t i = 0; i < n_targets; ++i) {\n    out(i) = h_sum(i);\n  }\n}\n\nvoid FitStump(Context const* ctx, MetaInfo const& info,\n              linalg::MatrixView<GradientPair const> gpair, linalg::VectorView<float> out) {\n  auto n_targets = out.Size();\n  CHECK_EQ(n_targets, gpair.Shape(1));\n  auto sum = linalg::Empty<GradientPairPrecise>(ctx, n_targets);\n  SumGradients(ctx, gpair, sum.HostView());\n  auto h_sum = sum.HostView();\n  CHECK(h_sum.CContiguous());\n  auto as_double = linalg::MakeTensorView(\n      ctx, common::Span{reinterpret_cast<double*>(h_sum.Values().data()), h_sum.Size() * 2},\n      h_sum.Size() * 2);\n  auto rc = collective::GlobalSum(ctx, info, as_double);\n  collective::SafeColl(rc);\n\n  for (std::size_t i = 0; i < h_sum.Size(); ++i) {\n    out(i) = static_cast<float>(CalcUnregularizedWeight(h_sum(i).GetGrad(), h_sum(i).GetHess()));\n  }\n}\n}  // namespace cpu_impl\n\nnamespace cuda_impl {\nvoid FitStump(Context const* ctx, MetaInfo const& info,\n              linalg::TensorView<GradientPair const, 2> gpair, linalg::VectorView<float> out);\n\n#if !defined(XGBOOST_USE_CUDA)\ninline void FitStump(Context const*, MetaInfo const&, linalg::TensorView<GradientPair const, 2>,\n                     linalg::VectorView<float>) {\n  common::AssertGPUSupport();\n}\n#endif  // !defined(XGBOOST_USE_CUDA)\n}  // namespace cuda_impl\n\nvoid FitStump(Context const* ctx, MetaInfo const& info, linalg::Matrix<GradientPair> const& gpair,\n              bst_target_t n_targets, linalg::Vector<float>* out) {\n  out->SetDevice(ctx->Device());\n  out->Reshape(n_targets);\n\n  gpair.SetDevice(ctx->Device());\n  auto gpair_t = gpair.View(ctx->Device().IsSycl() ? DeviceOrd::CPU() : ctx->Device());\n  ctx->IsCUDA() ? cuda_impl::FitStump(ctx, info, gpair_t, out->View(ctx->Device()))\n                : cpu_impl::FitStump(ctx, info, gpair_t, out->HostView());\n}\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "src/tree/fit_stump.cu",
    "content": "/**\n * Copyright 2022-2024, XGBoost Contributors\n *\n * @brief Utilities for estimating initial score.\n */\n#include <thrust/execution_policy.h>            // cuda::par\n#include <thrust/iterator/counting_iterator.h>  // thrust::make_counting_iterator\n\n#include <cstddef>  // std::size_t\n\n#include \"../collective/aggregator.cuh\"  // for GlobalSum\n#include \"../common/cuda_context.cuh\"\n#include \"../common/device_helpers.cuh\"  // dh::MakeTransformIterator\n#include \"fit_stump.h\"\n#include \"xgboost/base.h\"     // GradientPairPrecise, GradientPair, XGBOOST_DEVICE\n#include \"xgboost/context.h\"  // Context\n#include \"xgboost/linalg.h\"   // TensorView, Tensor, Constant\n#include \"xgboost/logging.h\"  // CHECK_EQ\n#include \"xgboost/span.h\"     // span\n\nnamespace xgboost::tree::cuda_impl {\nvoid FitStump(Context const* ctx, MetaInfo const& info,\n              linalg::TensorView<GradientPair const, 2> gpair, linalg::VectorView<float> out) {\n  auto n_targets = out.Size();\n  CHECK_EQ(n_targets, gpair.Shape(1));\n  linalg::Vector<GradientPairPrecise> sum = linalg::Constant(ctx, GradientPairPrecise{}, n_targets);\n  CHECK(out.Contiguous());\n\n  // Reduce by column\n  auto key_it = dh::MakeTransformIterator<bst_target_t>(\n      thrust::make_counting_iterator(0ul),\n      [=] XGBOOST_DEVICE(std::size_t i) -> bst_target_t { return i / gpair.Shape(0); });\n  auto grad_it = dh::MakeTransformIterator<GradientPairPrecise>(\n      thrust::make_counting_iterator(0ul),\n      [=] XGBOOST_DEVICE(std::size_t i) -> GradientPairPrecise {\n        auto target = i / gpair.Shape(0);\n        auto sample = i % gpair.Shape(0);\n        return GradientPairPrecise{gpair(sample, target)};\n      });\n  auto d_sum = sum.View(ctx->Device());\n  CHECK(d_sum.CContiguous());\n\n  thrust::reduce_by_key(ctx->CUDACtx()->CTP(), key_it, key_it + gpair.Size(), grad_it,\n                        thrust::make_discard_iterator(), dh::tbegin(d_sum.Values()));\n\n  auto rc = collective::GlobalSum(ctx, info,\n                                  linalg::MakeVec(reinterpret_cast<double*>(d_sum.Values().data()),\n                                                  d_sum.Size() * 2, ctx->Device()));\n  SafeColl(rc);\n\n  thrust::for_each_n(ctx->CUDACtx()->CTP(), thrust::make_counting_iterator(0ul), n_targets,\n                     [=] XGBOOST_DEVICE(std::size_t i) mutable {\n                       out(i) = static_cast<float>(\n                           CalcUnregularizedWeight(d_sum(i).GetGrad(), d_sum(i).GetHess()));\n                     });\n}\n}  // namespace xgboost::tree::cuda_impl\n"
  },
  {
    "path": "src/tree/fit_stump.h",
    "content": "/**\n * Copyright 2022-2026, XGBoost Contributors\n *\n * @brief Utilities for estimating initial score.\n */\n\n#ifndef XGBOOST_TREE_FIT_STUMP_H_\n#define XGBOOST_TREE_FIT_STUMP_H_\n\n#include <algorithm>  // std::max\n\n#include \"xgboost/base.h\"     // GradientPair\n#include \"xgboost/context.h\"  // Context\n#include \"xgboost/data.h\"     // MetaInfo\n#include \"xgboost/linalg.h\"   // TensorView\n\nnamespace xgboost::tree {\nnamespace cpu_impl {\n/**\n * @brief Sum gradients for each target.\n */\nvoid SumGradients(Context const* ctx, linalg::MatrixView<GradientPair const> gpair,\n                  linalg::VectorView<GradientPairPrecise> out);\n}  // namespace cpu_impl\n\ntemplate <typename T>\nXGBOOST_DEVICE inline double CalcUnregularizedWeight(T sum_grad, T sum_hess) {\n  return -sum_grad / std::max(sum_hess, static_cast<double>(kRtEps));\n}\n\n/**\n * @brief Fit a tree stump as an estimation of base_score.\n */\nvoid FitStump(Context const* ctx, MetaInfo const& info, linalg::Matrix<GradientPair> const& gpair,\n              bst_target_t n_targets, linalg::Vector<float>* out);\n}  // namespace xgboost::tree\n#endif  // XGBOOST_TREE_FIT_STUMP_H_\n"
  },
  {
    "path": "src/tree/gpu_hist/evaluate_splits.cu",
    "content": "/**\n * Copyright 2020-2026, XGBoost Contributors\n */\n#include <algorithm>  // for :max\n#include <limits>     // for numeric_limits\n\n#include \"../../collective/allgather.h\"\n#include \"../../collective/communicator-inl.h\"  // for GetWorldSize, GetRank\n#include \"../../common/categorical.h\"\n#include \"../../common/cuda_context.cuh\"  // for CUDAContext\n#include \"../../common/cuda_stream.h\"     // for Event\n#include \"evaluate_splits.cuh\"\n#include \"expand_entry.cuh\"\n\nnamespace xgboost::tree {\n// With constraints\nXGBOOST_DEVICE float LossChangeMissing(\n    const GradientPairInt64 &scan, const GradientPairInt64 &missing,\n    const GradientPairInt64 &parent_sum, const GPUTrainingParam &param, bst_node_t nidx,\n    bst_feature_t fidx, TreeEvaluator::SplitEvaluator<GPUTrainingParam> evaluator,\n    bool &missing_left_out, const GradientQuantiser &quantiser) {  // NOLINT\n  const auto left_sum = scan + missing;\n  float missing_left_gain =\n      evaluator.CalcSplitGain(param, nidx, fidx, quantiser.ToFloatingPoint(left_sum),\n                              quantiser.ToFloatingPoint(parent_sum - left_sum));\n  float missing_right_gain =\n      evaluator.CalcSplitGain(param, nidx, fidx, quantiser.ToFloatingPoint(scan),\n                              quantiser.ToFloatingPoint(parent_sum - scan));\n\n  missing_left_out = missing_left_gain > missing_right_gain;\n  return missing_left_out ? missing_left_gain : missing_right_gain;\n}\n\n// This kernel uses block_size == warp_size. This is an unusually small block size for a cuda kernel\n// - normally a larger block size is preferred to increase the number of resident warps on each SM\n// (occupancy). In the below case each thread has a very large amount of work per thread relative to\n// typical cuda kernels. Thus the SM can be highly utilised by a small number of threads. It was\n// discovered by experiments that a small block size here is significantly faster. Furthermore,\n// using only a single warp, synchronisation barriers are eliminated and broadcasts can be performed\n// using warp intrinsics instead of slower shared memory.\ntemplate <int kBlockSize>\nclass EvaluateSplitAgent {\n public:\n  using ArgMaxT = cub::KeyValuePair<std::uint32_t, float>;\n  using BlockScanT = cub::BlockScan<GradientPairInt64, kBlockSize>;\n  using MaxReduceT = cub::WarpReduce<ArgMaxT>;\n  using SumReduceT = cub::WarpReduce<GradientPairInt64>;\n\n  struct TempStorage {\n    typename BlockScanT::TempStorage scan;\n    typename MaxReduceT::TempStorage max_reduce;\n    typename SumReduceT::TempStorage sum_reduce;\n  };\n\n  const int fidx;\n  const int nidx;\n  const uint32_t gidx_begin;  // beginning bin\n  const uint32_t gidx_end;    // end bin for i^th feature\n  const dh::LDGIterator<float> feature_values;\n  const GradientPairInt64 *node_histogram;\n  const GradientQuantiser &rounding;\n  const GradientPairInt64 parent_sum;\n  const GradientPairInt64 missing;\n  const GPUTrainingParam &param;\n  const TreeEvaluator::SplitEvaluator<GPUTrainingParam> &evaluator;\n  TempStorage *temp_storage;\n  SumCallbackOp<GradientPairInt64> prefix_op;\n  static float constexpr kNullGain = -std::numeric_limits<bst_float>::infinity();\n\n  __device__ EvaluateSplitAgent(TempStorage *temp_storage, int fidx,\n                                const EvaluateSplitInputs &inputs,\n                                const EvaluateSplitSharedInputs &shared_inputs,\n                                const TreeEvaluator::SplitEvaluator<GPUTrainingParam> &evaluator)\n      : temp_storage(temp_storage),\n        nidx(inputs.nidx),\n        fidx(fidx),\n        gidx_begin(__ldg(shared_inputs.feature_segments.data() + fidx)),\n        gidx_end(__ldg(shared_inputs.feature_segments.data() + fidx + 1)),\n        feature_values(shared_inputs.feature_values.data()),\n        node_histogram(inputs.gradient_histogram.data()),\n        rounding(shared_inputs.rounding),\n        parent_sum(dh::LDGIterator<GradientPairInt64>(&inputs.parent_sum)[0]),\n        param(shared_inputs.param),\n        evaluator(evaluator),\n        missing(parent_sum - ReduceFeature()) {\n    static_assert(kBlockSize == 32, \"This kernel relies on the assumption block_size == warp_size\");\n    // There should be no missing value gradients for a dense matrix\n    KERNEL_CHECK(!shared_inputs.is_dense || missing.GetQuantisedHess() == 0);\n  }\n  __device__ GradientPairInt64 ReduceFeature() {\n    GradientPairInt64 local_sum;\n    for (int idx = gidx_begin + threadIdx.x; idx < gidx_end; idx += kBlockSize) {\n      local_sum += LoadGpair(node_histogram + idx);\n    }\n    local_sum = SumReduceT(temp_storage->sum_reduce).Sum(local_sum);  // NOLINT\n    // Broadcast result from thread 0\n    return {__shfl_sync(0xffffffff, local_sum.GetQuantisedGrad(), 0),\n            __shfl_sync(0xffffffff, local_sum.GetQuantisedHess(), 0)};\n  }\n\n  // Load using efficient 128 vector load instruction\n  __device__ __forceinline__ static GradientPairInt64 LoadGpair(const GradientPairInt64 *ptr) {\n    float4 tmp = *reinterpret_cast<const float4 *>(ptr);\n    auto gpair = *reinterpret_cast<const GradientPairInt64 *>(&tmp);\n    static_assert(sizeof(decltype(gpair)) == sizeof(float4),\n                  \"Vector type size does not match gradient pair size.\");\n    return gpair;\n  }\n\n  __device__ __forceinline__ void Numerical(DeviceSplitCandidate *best_split) {\n    for (bst_bin_t scan_begin = gidx_begin; scan_begin < gidx_end; scan_begin += kBlockSize) {\n      bool thread_active = (scan_begin + threadIdx.x) < gidx_end;\n      GradientPairInt64 bin = thread_active ? LoadGpair(node_histogram + scan_begin + threadIdx.x)\n                                            : GradientPairInt64();\n#if CUB_VERSION >= 200800\n      BlockScanT(temp_storage->scan).ExclusiveScan(bin, bin, cuda::std::plus{}, prefix_op);\n#else\n      BlockScanT(temp_storage->scan).ExclusiveScan(bin, bin, cub::Sum{}, prefix_op);\n#endif\n      // Whether the gradient of missing values is put to the left side.\n      bool missing_left = true;\n      float gain = thread_active ? LossChangeMissing(bin, missing, parent_sum, param, nidx, fidx,\n                                                     evaluator, missing_left, rounding)\n                                 : kNullGain;\n      // Find thread with best gain\n      auto best = MaxReduceT(temp_storage->max_reduce).Reduce({threadIdx.x, gain}, cub::ArgMax());\n      // This reduce result is only valid in thread 0\n      // broadcast to the rest of the warp\n      auto best_thread = __shfl_sync(0xffffffff, best.key, 0);\n\n      // Best thread updates the split\n      if (threadIdx.x == best_thread) {\n        // Use pointer from cut to indicate begin and end of bins for each feature.\n        int split_gidx = (scan_begin + threadIdx.x) - 1;\n        float fvalue = split_gidx < static_cast<int>(gidx_begin)\n                           ? -std::numeric_limits<float>::infinity()\n                           : feature_values[split_gidx];\n        GradientPairInt64 left = missing_left ? bin + missing : bin;\n        GradientPairInt64 right = parent_sum - left;\n        best_split->Update(gain, missing_left ? kLeftDir : kRightDir, fvalue, fidx, left, right,\n                           false, param, rounding);\n      }\n\n      __syncwarp();\n    }\n  }\n\n  __device__ __forceinline__ void OneHot(DeviceSplitCandidate *best_split) {\n    for (int scan_begin = gidx_begin; scan_begin < gidx_end; scan_begin += kBlockSize) {\n      bool thread_active = (scan_begin + threadIdx.x) < gidx_end;\n\n      auto rest = thread_active ? LoadGpair(node_histogram + scan_begin + threadIdx.x)\n                                : GradientPairInt64();\n      GradientPairInt64 bin = parent_sum - rest - missing;\n      // Whether the gradient of missing values is put to the left side.\n      bool missing_left = true;\n      float gain = thread_active ? LossChangeMissing(bin, missing, parent_sum, param, nidx, fidx,\n                                                     evaluator, missing_left, rounding)\n                                 : kNullGain;\n\n      // Find thread with best gain\n      auto best = MaxReduceT(temp_storage->max_reduce).Reduce({threadIdx.x, gain}, cub::ArgMax());\n      // This reduce result is only valid in thread 0\n      // broadcast to the rest of the warp\n      auto best_thread = __shfl_sync(0xffffffff, best.key, 0);\n      // Best thread updates the split\n      if (threadIdx.x == best_thread) {\n        int32_t split_gidx = (scan_begin + threadIdx.x);\n        float fvalue = feature_values[split_gidx];\n        GradientPairInt64 left = missing_left ? bin + missing : bin;\n        GradientPairInt64 right = parent_sum - left;\n        best_split->UpdateCat(gain, missing_left ? kLeftDir : kRightDir,\n                              static_cast<bst_cat_t>(fvalue), fidx, left, right, param, rounding);\n      }\n\n      __syncwarp();\n    }\n  }\n  /**\n   * \\brief Gather and update the best split.\n   */\n  __device__ __forceinline__ void PartitionUpdate(bst_bin_t scan_begin, bool thread_active,\n                                                  bool missing_left, bst_bin_t it,\n                                                  GradientPairInt64 const &left_sum,\n                                                  GradientPairInt64 const &right_sum,\n                                                  DeviceSplitCandidate *best_split) {\n    auto gain = thread_active\n                    ? evaluator.CalcSplitGain(param, nidx, fidx, rounding.ToFloatingPoint(left_sum),\n                                              rounding.ToFloatingPoint(right_sum))\n                    : kNullGain;\n\n    // Find thread with best gain\n    auto best = MaxReduceT(temp_storage->max_reduce).Reduce({threadIdx.x, gain}, cub::ArgMax());\n    // This reduce result is only valid in thread 0\n    // broadcast to the rest of the warp\n    auto best_thread = __shfl_sync(0xffffffff, best.key, 0);\n    // Best thread updates the split\n    if (threadIdx.x == best_thread) {\n      assert(thread_active);\n      // index of best threshold inside a feature.\n      auto best_thresh = it - gidx_begin;\n      best_split->UpdateCat(gain, missing_left ? kLeftDir : kRightDir, best_thresh, fidx, left_sum,\n                            right_sum, param, rounding);\n    }\n\n    __syncwarp();\n  }\n  /**\n   * \\brief Partition-based split for categorical feature.\n   */\n  __device__ __forceinline__ void Partition(DeviceSplitCandidate *best_split,\n                                            common::Span<bst_feature_t> sorted_idx,\n                                            std::size_t node_offset,\n                                            GPUTrainingParam const &param) {\n    bst_bin_t n_bins_feature = gidx_end - gidx_begin;\n    auto n_bins = std::min(param.max_cat_threshold, n_bins_feature);\n\n    bst_bin_t it_begin = gidx_begin;\n    bst_bin_t it_end = it_begin + n_bins - 1;\n\n    // forward\n    for (bst_bin_t scan_begin = it_begin; scan_begin < it_end; scan_begin += kBlockSize) {\n      auto it = scan_begin + static_cast<bst_bin_t>(threadIdx.x);\n      bool thread_active = it < it_end;\n\n      auto right_sum = thread_active ? LoadGpair(node_histogram + sorted_idx[it] - node_offset)\n                                     : GradientPairInt64();\n      // No min value for cat feature, use inclusive scan.\n      BlockScanT(temp_storage->scan).InclusiveSum(right_sum, right_sum, prefix_op);\n      GradientPairInt64 left_sum = parent_sum - right_sum;\n\n      PartitionUpdate(scan_begin, thread_active, true, it, left_sum, right_sum, best_split);\n    }\n\n    // backward\n    it_begin = gidx_end - 1;\n    it_end = it_begin - n_bins + 1;\n    prefix_op = SumCallbackOp<GradientPairInt64>{};  // reset\n\n    for (bst_bin_t scan_begin = it_begin; scan_begin > it_end; scan_begin -= kBlockSize) {\n      auto it = scan_begin - static_cast<bst_bin_t>(threadIdx.x);\n      bool thread_active = it > it_end;\n\n      auto left_sum = thread_active ? LoadGpair(node_histogram + sorted_idx[it] - node_offset)\n                                    : GradientPairInt64();\n      // No min value for cat feature, use inclusive scan.\n      BlockScanT(temp_storage->scan).InclusiveSum(left_sum, left_sum, prefix_op);\n      GradientPairInt64 right_sum = parent_sum - left_sum;\n\n      PartitionUpdate(scan_begin, thread_active, false, it, left_sum, right_sum, best_split);\n    }\n  }\n};\n\ntemplate <int kBlockThreads>\n__global__ __launch_bounds__(kBlockThreads) void EvaluateSplitsKernel(\n    bst_feature_t max_active_features, common::Span<const EvaluateSplitInputs> d_inputs,\n    const EvaluateSplitSharedInputs shared_inputs, common::Span<bst_feature_t> sorted_idx,\n    const TreeEvaluator::SplitEvaluator<GPUTrainingParam> evaluator,\n    common::Span<DeviceSplitCandidate> out_candidates) {\n  // Aligned && shared storage for best_split\n  __shared__ cub::Uninitialized<DeviceSplitCandidate> uninitialized_split;\n  DeviceSplitCandidate &best_split = uninitialized_split.Alias();\n\n  if (threadIdx.x == 0) {\n    best_split = DeviceSplitCandidate{};\n  }\n\n  __syncthreads();\n\n  // Allocate blocks to one feature of one node\n  const auto input_idx = blockIdx.x / max_active_features;\n  const EvaluateSplitInputs &inputs = d_inputs[input_idx];\n  // One block for each feature. Features are sampled, so fidx != blockIdx.x\n  // Some blocks may not have any feature to work on, simply return\n  int feature_offset = blockIdx.x % max_active_features;\n  if (feature_offset >= inputs.feature_set.size()) {\n    return;\n  }\n  int fidx = inputs.feature_set[feature_offset];\n\n  using AgentT = EvaluateSplitAgent<kBlockThreads>;\n  __shared__ typename AgentT::TempStorage temp_storage;\n  AgentT agent(&temp_storage, fidx, inputs, shared_inputs, evaluator);\n\n  if (common::IsCat(shared_inputs.feature_types, fidx)) {\n    auto n_bins_in_feat =\n        shared_inputs.feature_segments[fidx + 1] - shared_inputs.feature_segments[fidx];\n    if (common::UseOneHot(n_bins_in_feat, shared_inputs.param.max_cat_to_onehot)) {\n      agent.OneHot(&best_split);\n    } else {\n      auto total_bins = shared_inputs.feature_values.size();\n      size_t offset = total_bins * input_idx;\n      auto node_sorted_idx = sorted_idx.subspan(offset, total_bins);\n      agent.Partition(&best_split, node_sorted_idx, offset, shared_inputs.param);\n    }\n  } else {\n    agent.Numerical(&best_split);\n  }\n\n  __syncthreads();\n  if (threadIdx.x == 0) {\n    // Record best loss for each feature\n    out_candidates[blockIdx.x] = best_split;\n  }\n}\n\n__device__ DeviceSplitCandidate operator+(const DeviceSplitCandidate &a,\n                                          const DeviceSplitCandidate &b) {\n  return b.loss_chg > a.loss_chg ? b : a;\n}\n\n/**\n * \\brief Set the bits for categorical splits based on the split threshold.\n */\n__device__ void SetCategoricalSplit(const EvaluateSplitSharedInputs &shared_inputs,\n                                    common::Span<bst_feature_t const> d_sorted_idx,\n                                    bst_feature_t fidx, std::size_t input_idx,\n                                    common::Span<common::CatBitField::value_type> out,\n                                    DeviceSplitCandidate *p_out_split) {\n  auto &out_split = *p_out_split;\n  auto out_cats = common::CatBitField{out};\n\n  // Simple case for one hot split\n  if (common::UseOneHot(shared_inputs.FeatureBins(fidx), shared_inputs.param.max_cat_to_onehot)) {\n    out_cats.Set(common::AsCat(out_split.thresh));\n    return;\n  }\n\n  // partition-based split\n  auto node_sorted_idx = d_sorted_idx.subspan(shared_inputs.feature_values.size() * input_idx,\n                                              shared_inputs.feature_values.size());\n  size_t node_offset = input_idx * shared_inputs.feature_values.size();\n  auto const best_thresh = out_split.thresh;\n  if (best_thresh == -1) {\n    return;\n  }\n  auto f_sorted_idx = node_sorted_idx.subspan(shared_inputs.feature_segments[fidx],\n                                              shared_inputs.FeatureBins(fidx));\n  bool forward = out_split.dir == kLeftDir;\n  bst_bin_t partition = forward ? best_thresh + 1 : best_thresh;\n  auto beg = dh::tcbegin(f_sorted_idx);\n  assert(partition > 0 && \"Invalid partition.\");\n  thrust::for_each(thrust::seq, beg, beg + partition, [&](size_t c) {\n    auto cat = shared_inputs.feature_values[c - node_offset];\n    out_cats.Set(common::AsCat(cat));\n  });\n}\n\nvoid GPUHistEvaluator::LaunchEvaluateSplits(\n    Context const *ctx, bst_feature_t max_active_features,\n    common::Span<const EvaluateSplitInputs> d_inputs, EvaluateSplitSharedInputs shared_inputs,\n    TreeEvaluator::SplitEvaluator<GPUTrainingParam> evaluator,\n    common::Span<DeviceSplitCandidate> out_splits) {\n  if (need_sort_histogram_) {\n    this->SortHistogram(ctx, d_inputs, shared_inputs, evaluator);\n  }\n\n  size_t combined_num_features = max_active_features * d_inputs.size();\n  dh::TemporaryArray<DeviceSplitCandidate> feature_best_splits(combined_num_features,\n                                                               DeviceSplitCandidate());\n\n  // One block for each feature\n  uint32_t constexpr kBlockThreads = 32;\n  dh::LaunchKernel{static_cast<uint32_t>(combined_num_features), kBlockThreads, 0,  // NOLINT\n                   ctx->CUDACtx()->Stream()}(\n      EvaluateSplitsKernel<kBlockThreads>, max_active_features, d_inputs, shared_inputs,\n      this->SortedIdx(d_inputs.size(), shared_inputs.feature_values.size()), evaluator,\n      dh::ToSpan(feature_best_splits));\n\n  // Reduce to get best candidate for left and right child over all features\n  auto reduce_offset = dh::MakeTransformIterator<size_t>(\n      thrust::make_counting_iterator(0llu),\n      [=] __device__(size_t idx) -> size_t { return idx * max_active_features; });\n  size_t temp_storage_bytes = 0;\n  auto num_segments = out_splits.size();\n  dh::safe_cuda(cub::DeviceSegmentedReduce::Sum(\n      nullptr, temp_storage_bytes, feature_best_splits.data(), out_splits.data(), num_segments,\n      reduce_offset, reduce_offset + 1, ctx->CUDACtx()->Stream()));\n  dh::TemporaryArray<int8_t> temp(temp_storage_bytes);\n  dh::safe_cuda(cub::DeviceSegmentedReduce::Sum(\n      temp.data().get(), temp_storage_bytes, feature_best_splits.data(), out_splits.data(),\n      num_segments, reduce_offset, reduce_offset + 1, ctx->CUDACtx()->Stream()));\n}\n\nvoid GPUHistEvaluator::CopyToHost(const std::vector<bst_node_t> &nidx) {\n  if (!has_categoricals_) return;\n  auto d_cats = this->DeviceCatStorage(nidx);\n  auto h_cats = this->HostCatStorage(nidx);\n  curt::Event event;\n  event.Record(curt::DefaultStream());\n  for (auto idx : nidx) {\n    copy_stream_.View().Wait(event);\n    dh::safe_cuda(cudaMemcpyAsync(\n        h_cats.GetNodeCatStorage(idx).data(), d_cats.GetNodeCatStorage(idx).data(),\n        d_cats.GetNodeCatStorage(idx).size_bytes(), cudaMemcpyDeviceToHost, copy_stream_.View()));\n  }\n}\n\nvoid GPUHistEvaluator::EvaluateSplits(Context const *ctx, const std::vector<bst_node_t> &nidx,\n                                      bst_feature_t max_active_features,\n                                      common::Span<const EvaluateSplitInputs> d_inputs,\n                                      EvaluateSplitSharedInputs shared_inputs,\n                                      common::Span<GPUExpandEntry> out_entries) {\n  auto evaluator = this->tree_evaluator_.template GetEvaluator<GPUTrainingParam>();\n\n  dh::TemporaryArray<DeviceSplitCandidate> splits_out_storage(d_inputs.size());\n  auto out_splits = dh::ToSpan(splits_out_storage);\n  this->LaunchEvaluateSplits(ctx, max_active_features, d_inputs, shared_inputs, evaluator,\n                             out_splits);\n\n  if (is_column_split_) {\n    // With column-wise data split, we gather the split candidates from all the workers and find the\n    // global best candidates.\n    auto const world_size = collective::GetWorldSize();\n    dh::TemporaryArray<DeviceSplitCandidate> all_candidate_storage(out_splits.size() * world_size);\n    auto all_candidates = dh::ToSpan(all_candidate_storage);\n    auto current_rank =\n        all_candidates.subspan(collective::GetRank() * out_splits.size(), out_splits.size());\n    dh::safe_cuda(cudaMemcpyAsync(current_rank.data(), out_splits.data(),\n                                  out_splits.size() * sizeof(DeviceSplitCandidate),\n                                  cudaMemcpyDeviceToDevice, ctx->CUDACtx()->Stream()));\n    auto rc = collective::Allgather(\n        ctx, linalg::MakeVec(all_candidates.data(), all_candidates.size(), ctx->Device()));\n    collective::SafeColl(rc);\n\n    // Reduce to get the best candidate from all workers.\n    dh::LaunchN(out_splits.size(), ctx->CUDACtx()->Stream(),\n                [world_size, all_candidates, out_splits] __device__(size_t i) {\n                  out_splits[i] = all_candidates[i];\n                  for (auto rank = 1; rank < world_size; rank++) {\n                    out_splits[i] = out_splits[i] + all_candidates[rank * out_splits.size() + i];\n                  }\n                });\n  }\n\n  auto d_sorted_idx = this->SortedIdx(d_inputs.size(), shared_inputs.feature_values.size());\n  auto d_entries = out_entries;\n  auto device_cats_accessor = this->DeviceCatStorage(nidx);\n  // turn candidate into entry, along with handling sort based split.\n  dh::LaunchN(d_inputs.size(), ctx->CUDACtx()->Stream(), [=] __device__(size_t i) mutable {\n    auto const input = d_inputs[i];\n    auto &split = out_splits[i];\n    // Subtract parent gain here\n    // As it is constant, this is more efficient than doing it during every\n    // split evaluation\n    float parent_gain =\n        CalcGain(shared_inputs.param, shared_inputs.rounding.ToFloatingPoint(input.parent_sum));\n    split.loss_chg -= parent_gain;\n    auto fidx = out_splits[i].findex;\n\n    if (split.is_cat) {\n      SetCategoricalSplit(shared_inputs, d_sorted_idx, fidx, i,\n                          device_cats_accessor.GetNodeCatStorage(input.nidx), &out_splits[i]);\n    }\n\n    float base_weight = evaluator.CalcWeight(\n        input.nidx, shared_inputs.param,\n        shared_inputs.rounding.ToFloatingPoint(split.left_sum + split.right_sum));\n    float left_weight = evaluator.CalcWeight(\n        input.nidx, shared_inputs.param, shared_inputs.rounding.ToFloatingPoint(split.left_sum));\n    float right_weight = evaluator.CalcWeight(\n        input.nidx, shared_inputs.param, shared_inputs.rounding.ToFloatingPoint(split.right_sum));\n\n    d_entries[i] = GPUExpandEntry{input.nidx,  input.depth, out_splits[i],\n                                  base_weight, left_weight, right_weight};\n  });\n\n  this->CopyToHost(nidx);\n}\n\nGPUExpandEntry GPUHistEvaluator::EvaluateSingleSplit(Context const *ctx, EvaluateSplitInputs input,\n                                                     EvaluateSplitSharedInputs shared_inputs) {\n  dh::CachingDeviceUVector<EvaluateSplitInputs> inputs(1);\n  dh::safe_cuda(cudaMemcpyAsync(inputs.data(), &input, sizeof(input), cudaMemcpyDefault));\n\n  dh::TemporaryArray<GPUExpandEntry> out_entries(1);\n  this->EvaluateSplits(ctx, {input.nidx}, input.feature_set.size(), dh::ToSpan(inputs),\n                       shared_inputs, dh::ToSpan(out_entries));\n  GPUExpandEntry root_entry;\n  dh::safe_cuda(cudaMemcpyAsync(&root_entry, out_entries.data().get(), sizeof(GPUExpandEntry),\n                                cudaMemcpyDeviceToHost));\n  return root_entry;\n}\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "src/tree/gpu_hist/evaluate_splits.cuh",
    "content": "/**\n * Copyright 2020-2026, XGBoost Contributors\n */\n#ifndef EVALUATE_SPLITS_CUH_\n#define EVALUATE_SPLITS_CUH_\n#include <cuda/std/tuple>  // for tuple\n\n#include <xgboost/span.h>\n\n#include \"../../common/categorical.h\"\n#include \"../../common/cuda_pinned_allocator.h\"\n#include \"../../common/cuda_stream.h\"  // for Stream\n#include \"../split_evaluator.h\"\n#include \"../updater_gpu_common.cuh\"  // for DeviceSplitCandidate\n#include \"expand_entry.cuh\"\n\nnamespace xgboost {\nnamespace common {\nclass HistogramCuts;\n}\n\nnamespace tree {\n\n// Inputs specific to each node\nstruct EvaluateSplitInputs {\n  bst_node_t nidx;\n  bst_node_t depth;\n  GradientPairInt64 parent_sum;\n  common::Span<const bst_feature_t> feature_set;\n  common::Span<const GradientPairInt64> gradient_histogram;\n};\n\n// Inputs necessary for all nodes\nstruct EvaluateSplitSharedInputs {\n  GPUTrainingParam param;\n  GradientQuantiser rounding;\n  common::Span<FeatureType const> feature_types;\n  common::Span<const uint32_t> feature_segments;\n  common::Span<const float> feature_values;\n  bool is_dense;\n  [[nodiscard]] XGBOOST_DEVICE auto Features() const { return feature_segments.size() - 1; }\n  [[nodiscard]] __device__ std::uint32_t FeatureBins(bst_feature_t fidx) const {\n    return feature_segments[fidx + 1] - feature_segments[fidx];\n  }\n};\n\n// Used to return internal storage regions for categoricals\n// Usable on device\nstruct CatAccessor {\n  common::Span<common::CatBitField::value_type> cat_storage;\n  std::size_t node_categorical_storage_size;\n  XGBOOST_DEVICE common::Span<common::CatBitField::value_type> GetNodeCatStorage(bst_node_t nidx) {\n    return this->cat_storage.subspan(nidx * this->node_categorical_storage_size,\n                                     this->node_categorical_storage_size);\n  }\n};\n\nclass GPUHistEvaluator {\n  using CatST = common::CatBitField::value_type;  // categorical storage type\n  // use pinned memory to stage the categories, used for sort based splits.\n  using Alloc = xgboost::common::cuda_impl::PinnedAllocator<CatST>;\n\n private:\n  TreeEvaluator tree_evaluator_;\n  // storage for categories for each node, used for sort based splits.\n  dh::device_vector<CatST> split_cats_;\n  // host storage for categories for each node, used for sort based splits.\n  std::vector<CatST, Alloc> h_split_cats_;\n  // stream for copying categories from device back to host for expanding the decision tree.\n  curt::Stream copy_stream_;\n  // storage for sorted index of feature histogram, used for sort based splits.\n  dh::device_vector<bst_feature_t> cat_sorted_idx_;\n  // cached input for sorting the histogram, used for sort based splits.\n  using SortPair = cuda::std::tuple<std::uint32_t, float>;\n  dh::device_vector<SortPair> sort_input_;\n  // cache for feature index\n  dh::device_vector<bst_feature_t> feature_idx_;\n  // Training param used for evaluation\n  TrainParam param_;\n  // Do we have any categorical features that require sorting histograms?\n  // use this to skip the expensive sort step\n  bool need_sort_histogram_ = false;\n  bool has_categoricals_ = false;\n  // Number of elements of categorical storage type\n  // needed to hold categoricals for a single mode\n  std::size_t node_categorical_storage_size_ = 0;\n  // Is the data split column-wise?\n  bool is_column_split_ = false;\n  DeviceOrd device_;\n\n  // Copy the categories from device to host asynchronously.\n  void CopyToHost(const std::vector<bst_node_t> &nidx);\n\n  /**\n   * \\brief Get host category storage of nidx for internal calculation.\n   */\n  auto HostCatStorage(const std::vector<bst_node_t> &nidx) {\n    if (!has_categoricals_) return CatAccessor{};\n    auto max_nidx = *std::max_element(nidx.begin(), nidx.end());\n    std::size_t min_size = (max_nidx + 2) * node_categorical_storage_size_;\n    if (h_split_cats_.size() < min_size) {\n      h_split_cats_.resize(min_size);\n    }\n    return CatAccessor{{h_split_cats_.data(), h_split_cats_.size()},\n                       node_categorical_storage_size_};\n  }\n\n  /**\n   * @brief Get device category storage of nidx for internal calculation.\n   */\n  auto DeviceCatStorage(const std::vector<bst_node_t> &nidx) {\n    if (!has_categoricals_) return CatAccessor{};\n    auto max_nidx = *std::max_element(nidx.begin(), nidx.end());\n    std::size_t min_size = (max_nidx + 2) * node_categorical_storage_size_;\n    if (split_cats_.size() < min_size) {\n      split_cats_.resize(min_size);\n    }\n    return CatAccessor{dh::ToSpan(split_cats_), node_categorical_storage_size_};\n  }\n\n  /**\n   * \\brief Get sorted index storage based on the left node of inputs.\n   */\n  auto SortedIdx(int num_nodes, bst_bin_t total_bins) {\n    if (!need_sort_histogram_) return common::Span<bst_feature_t>{};\n    cat_sorted_idx_.resize(num_nodes * total_bins);\n    return dh::ToSpan(cat_sorted_idx_);\n  }\n\n  auto SortInput(int num_nodes, bst_feature_t total_bins) {\n    if (!need_sort_histogram_) return common::Span<SortPair>();\n    sort_input_.resize(num_nodes * total_bins);\n    return dh::ToSpan(sort_input_);\n  }\n\n public:\n  GPUHistEvaluator(TrainParam const &param, bst_feature_t n_features, DeviceOrd device)\n      : tree_evaluator_{param, n_features, device}, param_{param} {}\n  /**\n   * \\brief Reset the evaluator, should be called before any use.\n   */\n  void Reset(Context const *ctx, common::HistogramCuts const &cuts,\n             common::Span<FeatureType const> ft, bst_feature_t n_features, TrainParam const &param,\n             bool is_column_split);\n\n  /**\n   * \\brief Get host category storage for nidx.  Different from the internal version, this\n   *        returns strictly 1 node.\n   */\n  [[nodiscard]] common::Span<CatST const> GetHostNodeCats(bst_node_t nidx) const {\n    copy_stream_.View().Sync();\n    auto cats_out = common::Span<CatST const>{h_split_cats_}.subspan(\n        nidx * node_categorical_storage_size_, node_categorical_storage_size_);\n    return cats_out;\n  }\n\n  [[nodiscard]] auto GetDeviceNodeCats(bst_node_t nidx) {\n    if (has_categoricals_) {\n      copy_stream_.View().Sync();\n      CatAccessor accessor = {dh::ToSpan(split_cats_), node_categorical_storage_size_};\n      return common::KCatBitField{accessor.GetNodeCatStorage(nidx)};\n    } else {\n      return common::KCatBitField{};\n    }\n  }\n  /**\n   * \\brief Add a split to the internal tree evaluator.\n   */\n  void ApplyTreeSplit(GPUExpandEntry const &candidate, RegTree *p_tree) {\n    auto &tree = *p_tree;\n    // Set up child constraints\n    auto left_child = tree[candidate.nidx].LeftChild();\n    auto right_child = tree[candidate.nidx].RightChild();\n    tree_evaluator_.AddSplit(candidate.nidx, left_child, right_child,\n                             tree[candidate.nidx].SplitIndex(), candidate.left_weight,\n                             candidate.right_weight);\n  }\n\n  auto GetEvaluator() { return tree_evaluator_.GetEvaluator<GPUTrainingParam>(); }\n  /**\n   * \\brief Sort the histogram based on output to obtain contiguous partitions.\n   */\n  common::Span<bst_feature_t const> SortHistogram(\n      Context const *ctx, common::Span<const EvaluateSplitInputs> d_inputs,\n      EvaluateSplitSharedInputs shared_inputs,\n      TreeEvaluator::SplitEvaluator<GPUTrainingParam> evaluator);\n\n  // impl of evaluate splits, contains CUDA kernels so it's public\n  void LaunchEvaluateSplits(Context const *ctx, bst_feature_t max_active_features,\n                            common::Span<const EvaluateSplitInputs> d_inputs,\n                            EvaluateSplitSharedInputs shared_inputs,\n                            TreeEvaluator::SplitEvaluator<GPUTrainingParam> evaluator,\n                            common::Span<DeviceSplitCandidate> out_splits);\n  /**\n   * \\brief Evaluate splits for left and right nodes.\n   */\n  void EvaluateSplits(Context const *ctx, const std::vector<bst_node_t> &nidx,\n                      bst_feature_t max_active_features,\n                      common::Span<const EvaluateSplitInputs> d_inputs,\n                      EvaluateSplitSharedInputs shared_inputs,\n                      common::Span<GPUExpandEntry> out_splits);\n  /**\n   * \\brief Evaluate splits for root node.\n   */\n  GPUExpandEntry EvaluateSingleSplit(Context const *ctx, EvaluateSplitInputs input,\n                                     EvaluateSplitSharedInputs shared_inputs);\n};\n\n// Input for evaluation kernel for each tree node.\nstruct MultiEvaluateSplitInputs {\n  bst_node_t nidx;\n  bst_node_t depth;\n  common::Span<GradientPairInt64 const> parent_sum;\n  common::Span<bst_feature_t const> feature_set;\n  common::Span<GradientPairInt64 const> histogram;\n};\n\n// Input for evaluation kernel that can be shared by multiple tree nodes.\nstruct MultiEvaluateSplitSharedInputs {\n  // len == n_targets\n  common::Span<GradientQuantiser const> roundings;\n  // cut pointers\n  common::Span<std::uint32_t const> feature_segments;\n  // cut values\n  float const *feature_values;\n  // Number of bins for one feature and one target\n  bst_bin_t n_bins_per_feat_tar;\n  bst_feature_t max_active_feature;\n  GPUTrainingParam param;\n\n  // Used for testing\n  enum OnePass {\n    kNone,      // normal\n    kForward,   // only perform the forward pass\n    kBackward,  // only perform the backward pass\n  } one_pass{kNone};\n\n  [[nodiscard]] XGBOOST_DEVICE bst_target_t Targets() const { return roundings.size(); }\n  [[nodiscard]] XGBOOST_DEVICE bst_feature_t Features() const {\n    return this->feature_segments.size() - 1;\n  }\n};\n}  // namespace tree\n}  // namespace xgboost\n\n#endif  // EVALUATE_SPLITS_CUH_\n"
  },
  {
    "path": "src/tree/gpu_hist/evaluator.cu",
    "content": "/**\n * Copyright 2022-2026, XGBoost Contributors\n *\n * @brief Some components of GPU Hist evaluator, this file only exist to reduce nvcc\n *        compilation time.\n */\n#include <thrust/logical.h>  // thrust::any_of\n#include <thrust/sort.h>     // thrust::stable_sort\n\n#include <cuda/std/tuple>  // for make_tuple, get\n\n#include \"../../common/cuda_context.cuh\"  // for CUDAContext\n#include \"../../common/device_helpers.cuh\"\n#include \"../../common/hist_util.h\"  // common::HistogramCuts\n#include \"evaluate_splits.cuh\"\n#include \"xgboost/data.h\"\n\nnamespace xgboost::tree {\nvoid GPUHistEvaluator::Reset(Context const *ctx, common::HistogramCuts const &cuts,\n                             common::Span<FeatureType const> ft, bst_feature_t n_features,\n                             TrainParam const &param, bool is_column_split) {\n  param_ = param;\n  tree_evaluator_ = TreeEvaluator{param, n_features, ctx->Device()};\n  has_categoricals_ = cuts.HasCategorical();\n  if (cuts.HasCategorical()) {\n    auto ptrs = cuts.cut_ptrs_.ConstDeviceSpan();\n    auto beg = thrust::make_counting_iterator<size_t>(1ul);\n    auto end = thrust::make_counting_iterator<size_t>(ptrs.size());\n    auto to_onehot = param.max_cat_to_onehot;\n    // This condition avoids sort-based split function calls if the users want\n    // onehot-encoding-based splits.\n    // For some reason, any_of adds 1.5 minutes to compilation time for CUDA 11.x.\n    need_sort_histogram_ =\n        thrust::any_of(ctx->CUDACtx()->CTP(), beg, end, [=] XGBOOST_DEVICE(size_t i) {\n          auto idx = i - 1;\n          if (common::IsCat(ft, idx)) {\n            auto n_bins = ptrs[i] - ptrs[idx];\n            bool use_sort = !common::UseOneHot(n_bins, to_onehot);\n            return use_sort;\n          }\n          return false;\n        });\n\n    node_categorical_storage_size_ =\n        common::CatBitField::ComputeStorageSize(cuts.MaxCategory() + 1);\n    CHECK_NE(node_categorical_storage_size_, 0);\n    split_cats_.resize(node_categorical_storage_size_);\n    h_split_cats_.resize(node_categorical_storage_size_);\n    dh::safe_cuda(cudaMemsetAsync(split_cats_.data().get(), '\\0',\n                                  split_cats_.size() * sizeof(CatST), ctx->CUDACtx()->Stream()));\n\n    cat_sorted_idx_.resize(cuts.cut_values_.Size() * 2);  // evaluate 2 nodes at a time.\n    sort_input_.resize(cat_sorted_idx_.size());\n\n    /**\n     * cache feature index binary search result\n     */\n    feature_idx_.resize(cat_sorted_idx_.size());\n    auto it = thrust::make_counting_iterator(0ul);\n    thrust::transform(ctx->CUDACtx()->CTP(), it, it + feature_idx_.size(), feature_idx_.begin(),\n                      [=] XGBOOST_DEVICE(size_t i) {\n                        auto fidx = dh::SegmentId(ptrs, i);\n                        return fidx;\n                      });\n  }\n  is_column_split_ = is_column_split;\n  device_ = ctx->Device();\n}\n\ncommon::Span<bst_feature_t const> GPUHistEvaluator::SortHistogram(\n    Context const *ctx, common::Span<const EvaluateSplitInputs> d_inputs,\n    EvaluateSplitSharedInputs shared_inputs,\n    TreeEvaluator::SplitEvaluator<GPUTrainingParam> evaluator) {\n  auto sorted_idx = this->SortedIdx(d_inputs.size(), shared_inputs.feature_values.size());\n  dh::Iota(sorted_idx, ctx->CUDACtx()->Stream());\n  auto data = this->SortInput(d_inputs.size(), shared_inputs.feature_values.size());\n  auto it = thrust::make_counting_iterator(0u);\n  auto d_feature_idx = dh::ToSpan(feature_idx_);\n  auto total_bins = shared_inputs.feature_values.size();\n  thrust::transform(ctx->CUDACtx()->CTP(), it, it + data.size(), dh::tbegin(data),\n                    [=] XGBOOST_DEVICE(uint32_t i) {\n                      auto const &input = d_inputs[i / total_bins];\n                      auto j = i % total_bins;\n                      auto fidx = d_feature_idx[j];\n                      if (common::IsCat(shared_inputs.feature_types, fidx)) {\n                        auto grad =\n                            shared_inputs.rounding.ToFloatingPoint(input.gradient_histogram[j]);\n                        auto lw = evaluator.CalcWeightCat(shared_inputs.param, grad);\n                        return cuda::std::make_tuple(i, lw);\n                      }\n                      return cuda::std::make_tuple(i, 0.0f);\n                    });\n  // Sort an array segmented according to\n  // - nodes\n  // - features within each node\n  // - gradients within each feature\n  thrust::stable_sort_by_key(ctx->CUDACtx()->CTP(), dh::tbegin(data), dh::tend(data),\n                             dh::tbegin(sorted_idx),\n                             [=] XGBOOST_DEVICE(SortPair const &l, SortPair const &r) {\n                               auto li = cuda::std::get<0>(l);\n                               auto ri = cuda::std::get<0>(r);\n\n                               auto l_node = li / total_bins;\n                               auto r_node = ri / total_bins;\n\n                               if (l_node != r_node) {\n                                 return l_node < r_node;  // not the same node\n                               }\n\n                               li = li % total_bins;\n                               ri = ri % total_bins;\n\n                               auto lfidx = d_feature_idx[li];\n                               auto rfidx = d_feature_idx[ri];\n\n                               if (lfidx != rfidx) {\n                                 return lfidx < rfidx;  // not the same feature\n                               }\n\n                               if (common::IsCat(shared_inputs.feature_types, lfidx)) {\n                                 auto lw = cuda::std::get<1>(l);\n                                 auto rw = cuda::std::get<1>(r);\n                                 return lw < rw;\n                               }\n                               return li < ri;\n                             });\n  return dh::ToSpan(cat_sorted_idx_);\n}\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "src/tree/gpu_hist/expand_entry.cu",
    "content": "/**\n * Copyright 2025, XGBoost Contributors\n */\n#include <cstddef>  // for size_t\n#include <ostream>  // for ostream\n#include <vector>   // for vector\n\n#include \"../../common/device_helpers.cuh\"  // for CopyDeviceSpanToVector\n#include \"../../common/type.h\"              // for GetValueT\n#include \"expand_entry.cuh\"\n\nnamespace xgboost::tree::cuda_impl {\nstd::ostream& operator<<(std::ostream& os, MultiExpandEntry const& e) {\n  os << \"MultiExpandEntry:\\n\"\n     << \"nidx: \" << e.nidx << \"\\n\"\n     << \"depth: \" << e.depth << \"\\n\"\n     << \"loss: \" << e.split.loss_chg << \"\\n\";\n\n  std::vector<GradientPairInt64> h_node_sum(e.split.child_sum.size());\n  dh::CopyDeviceSpanToVector(&h_node_sum, e.split.child_sum);\n\n  auto print_span = [&](auto const& span) {\n    using T = typename common::GetValueT<decltype(span)>::value_type;\n    std::vector<T> h_vec(span.size());\n    dh::CopyDeviceSpanToVector(&h_vec, span);\n\n    os << \"[\";\n    for (std::size_t i = 0; i < h_vec.size(); ++i) {\n      os << h_vec[i];\n      if (i != h_vec.size() - 1) {\n        os << \", \";\n      }\n    }\n    os << \"]\\n\";\n  };\n  if (e.split.dir == kRightDir) {\n    os << \"left_sum: \";\n  } else {\n    os << \"right_sum: \";\n  }\n  print_span(e.split.child_sum);\n\n  os << \"base_weight: \";\n  print_span(e.base_weight);\n\n  return os;\n}\n}  // namespace xgboost::tree::cuda_impl\n"
  },
  {
    "path": "src/tree/gpu_hist/expand_entry.cuh",
    "content": "/**\n * Copyright 2020-2026, XGBoost Contributors\n */\n#ifndef EXPAND_ENTRY_CUH_\n#define EXPAND_ENTRY_CUH_\n\n#include <limits>   // for numeric_limits\n#include <utility>  // for move\n\n#include \"../param.h\"                 // for TrainParam\n#include \"../updater_gpu_common.cuh\"  // for DeviceSplitCandidate\n#include \"xgboost/base.h\"             // for bst_node_t\n\nnamespace xgboost::tree {\nstruct GPUExpandEntry {\n  bst_node_t nidx;\n  bst_node_t depth;\n  DeviceSplitCandidate split;\n\n  float base_weight{std::numeric_limits<float>::quiet_NaN()};\n  float left_weight{std::numeric_limits<float>::quiet_NaN()};\n  float right_weight{std::numeric_limits<float>::quiet_NaN()};\n\n  GPUExpandEntry() = default;\n  XGBOOST_DEVICE GPUExpandEntry(bst_node_t nid, bst_node_t depth, DeviceSplitCandidate split,\n                                float base, float left, float right)\n      : nidx(nid),\n        depth(depth),\n        split(std::move(split)),\n        base_weight{base},\n        left_weight{left},\n        right_weight{right} {}\n  [[nodiscard]] bool IsValid(TrainParam const& param, bst_node_t num_leaves) const {\n    if (split.loss_chg <= kRtEps) {\n      return false;\n    }\n    if (split.left_sum.GetQuantisedHess() == 0 || split.right_sum.GetQuantisedHess() == 0) {\n      return false;\n    }\n    if (split.loss_chg < param.min_split_loss) {\n      return false;\n    }\n    if (param.max_depth > 0 && depth == param.max_depth) {\n      return false;\n    }\n    if (param.max_leaves > 0 && num_leaves == param.max_leaves) {\n      return false;\n    }\n    return true;\n  }\n\n  [[nodiscard]] float GetLossChange() const { return split.loss_chg; }\n\n  [[nodiscard]] bst_node_t GetNodeId() const { return nidx; }\n\n  [[nodiscard]] bst_node_t GetDepth() const { return depth; }\n\n  friend std::ostream& operator<<(std::ostream& os, const GPUExpandEntry& e) {\n    os << \"GPUExpandEntry: \\n\";\n    os << \"nidx: \" << e.nidx << \"\\n\";\n    os << \"depth: \" << e.depth << \"\\n\";\n    os << \"loss: \" << e.split.loss_chg << \"\\n\";\n    os << \"left_sum: \" << e.split.left_sum << \"\\n\";\n    os << \"right_sum: \" << e.split.right_sum << \"\\n\";\n    return os;\n  }\n\n  void Save(Json* p_out) const {\n    auto& out = *p_out;\n\n    out[\"nid\"] = Integer{this->nidx};\n    out[\"depth\"] = Integer{this->depth};\n    // GPU specific\n    out[\"base_weight\"] = this->base_weight;\n    out[\"left_weight\"] = this->left_weight;\n    out[\"right_weight\"] = this->right_weight;\n\n    /**\n     * Handle split\n     */\n    out[\"split\"] = Object{};\n    auto& split = out[\"split\"];\n    split[\"loss_chg\"] = this->split.loss_chg;\n    split[\"sindex\"] = Integer{this->split.findex};\n    split[\"split_value\"] = this->split.fvalue;\n\n    // cat\n    split[\"thresh\"] = Integer{this->split.thresh};\n    split[\"is_cat\"] = Boolean{this->split.is_cat};\n    /**\n     * Gradients\n     */\n    auto save = [&](std::string const& name, GradientPairInt64 const& sum) {\n      out[name] = I64Array{2};\n      auto& array = get<I64Array>(out[name]);\n      array[0] = sum.GetQuantisedGrad();\n      array[1] = sum.GetQuantisedHess();\n    };\n    save(\"left_sum\", this->split.left_sum);\n    save(\"right_sum\", this->split.right_sum);\n  }\n\n  void Load(Json const& in) {\n    this->nidx = get<Integer const>(in[\"nid\"]);\n    this->depth = get<Integer const>(in[\"depth\"]);\n    // GPU specific\n    this->base_weight = get<Number const>(in[\"base_weight\"]);\n    this->left_weight = get<Number const>(in[\"left_weight\"]);\n    this->right_weight = get<Number const>(in[\"right_weight\"]);\n\n    /**\n     * Handle split\n     */\n    auto const& split = in[\"split\"];\n    this->split.loss_chg = get<Number const>(split[\"loss_chg\"]);\n    this->split.findex = get<Integer const>(split[\"sindex\"]);\n    this->split.fvalue = get<Number const>(split[\"split_value\"]);\n    // cat\n    this->split.thresh = get<Integer const>(split[\"thresh\"]);\n    this->split.is_cat = get<Boolean const>(split[\"is_cat\"]);\n    /**\n     * Gradients\n     */\n    auto const& left_sum = get<I64Array const>(in[\"left_sum\"]);\n    this->split.left_sum = GradientPairInt64{left_sum[0], left_sum[1]};\n    auto const& right_sum = get<I64Array const>(in[\"right_sum\"]);\n    this->split.right_sum = GradientPairInt64{right_sum[0], right_sum[1]};\n  }\n};\n\nnamespace cuda_impl {\nstruct MultiExpandEntry {\n  bst_node_t nidx{0};\n  bst_node_t depth{0};\n  MultiSplitCandidate split;\n\n  common::Span<float> base_weight;\n  // Sum of hessians across all targets for left/right children.\n  double left_sum{0};\n  double right_sum{0};\n\n  MultiExpandEntry() = default;\n\n  [[nodiscard]] float GetLossChange() const { return split.loss_chg; }\n\n  [[nodiscard]] bst_node_t GetNodeId() const { return nidx; }\n\n  [[nodiscard]] bst_node_t GetDepth() const { return depth; }\n\n  [[nodiscard]] bool IsValid(TrainParam const& param, bst_node_t n_leaves) const {\n    // The split evaluator handles the zero Hessian case. It returns an expand entry with\n    // -inf loss_chg if the Hessian is invalid.\n    if (split.loss_chg <= kRtEps) {\n      return false;\n    }\n    if (base_weight.empty()) {\n      return false;\n    }\n    if (split.loss_chg < param.min_split_loss) {\n      return false;\n    }\n    if (param.max_depth > 0 && depth == param.max_depth) {\n      return false;\n    }\n    if (param.max_leaves > 0 && n_leaves == param.max_leaves) {\n      return false;\n    }\n    return true;\n  }\n\n  /**\n   * @brief Update hessian statistics.\n   * @param left_hess  Sum of hessians across all targets for left child.\n   * @param right_hess Sum of hessians across all targets for right child.\n   */\n  __device__ void UpdateHessian(double left_hess, double right_hess) {\n    this->left_sum = left_hess;\n    this->right_sum = right_hess;\n  }\n\n  friend std::ostream& operator<<(std::ostream& os, MultiExpandEntry const& entry);\n};\n}  // namespace cuda_impl\n}  // namespace xgboost::tree\n\n#endif  // EXPAND_ENTRY_CUH_\n"
  },
  {
    "path": "src/tree/gpu_hist/feature_groups.cu",
    "content": "/**\n * Copyright 2020-2025, XGBoost Contributors\n */\n\n#include <algorithm>  // for max\n#include <cstddef>    // for size_t\n#include <cstdint>    // for uint32_t\n#include <vector>     // for vector\n\n#include \"../../common/hist_util.h\"  // for HistogramCuts\n#include \"feature_groups.cuh\"\n\nnamespace xgboost::tree {\nFeatureGroups::FeatureGroups(common::HistogramCuts const& cuts, bool is_dense, size_t shm_size)\n    : max_group_bins{0} {\n  // Only use a single feature group for sparse matrices.\n  bool single_group = !is_dense;\n  if (single_group) {\n    InitSingle(cuts);\n    return;\n  }\n\n  auto& feature_segments_h = feature_segments.HostVector();\n  auto& bin_segments_h = bin_segments.HostVector();\n  feature_segments_h.push_back(0);\n  bin_segments_h.push_back(0);\n\n  std::vector<std::uint32_t> const& cut_ptrs = cuts.Ptrs();\n  // Maximum number of bins that can be placed into shared memory (single target).\n  std::size_t max_shmem_bins = shm_size / sizeof(GradientPairInt64);\n\n  for (size_t i = 2; i < cut_ptrs.size(); ++i) {\n    int last_start = bin_segments_h.back();\n    // Push a new group whenever the size of required bin storage is greater than the\n    // shared memory size.\n    if (cut_ptrs[i] - last_start > max_shmem_bins) {\n      feature_segments_h.push_back(i - 1);\n      bin_segments_h.push_back(cut_ptrs[i - 1]);\n      max_group_bins = std::max(max_group_bins, bin_segments_h.back() - last_start);\n    }\n  }\n  feature_segments_h.push_back(cut_ptrs.size() - 1);\n  bin_segments_h.push_back(cut_ptrs.back());\n  max_group_bins =\n      std::max(max_group_bins, bin_segments_h.back() - bin_segments_h[bin_segments_h.size() - 2]);\n}\n\nvoid FeatureGroups::InitSingle(common::HistogramCuts const& cuts) {\n  auto& feature_segments_h = feature_segments.HostVector();\n  feature_segments_h.push_back(0);\n  feature_segments_h.push_back(cuts.Ptrs().size() - 1);\n\n  auto& bin_segments_h = bin_segments.HostVector();\n  bin_segments_h.push_back(0);\n  bin_segments_h.push_back(cuts.TotalBins());\n\n  max_group_bins = cuts.TotalBins();\n}\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "src/tree/gpu_hist/feature_groups.cuh",
    "content": "/**\n * Copyright 2020-2025, XGBoost Contributors\n */\n#ifndef FEATURE_GROUPS_CUH_\n#define FEATURE_GROUPS_CUH_\n\n#include <xgboost/host_device_vector.h>\n#include <xgboost/span.h>\n\nnamespace xgboost {\n\n// Forward declarations.\nnamespace common {\nclass HistogramCuts;\n}  // namespace common\n\nnamespace tree {\n\n/**\n * @brief FeatureGroup is a single group of features.\n *\n * It is defined by a range of consecutive feature indices, and also contains a range of\n * all bin indices associated with those features.\n */\nstruct FeatureGroup {\n  XGBOOST_DEVICE FeatureGroup(bst_feature_t start_feature, bst_feature_t n_features,\n                              bst_bin_t start_bin, bst_bin_t num_bins)\n      : start_feature{start_feature},\n        num_features{n_features},\n        start_bin{start_bin},\n        num_bins{num_bins} {}\n  /** The first feature of the group. */\n  bst_feature_t start_feature;\n  /** The number of features in the group. */\n  bst_feature_t num_features;\n  /** The first bin in the group. */\n  bst_bin_t start_bin;\n  /** The number of bins in the group. */\n  bst_bin_t num_bins;\n};\n\n/** @brief FeatureGroupsAccessor is a non-owning accessor for FeatureGroups. */\nstruct FeatureGroupsAccessor {\n  FeatureGroupsAccessor(common::Span<const bst_feature_t> feature_segments,\n                        common::Span<const bst_bin_t> bin_segments, bst_bin_t max_group_bins)\n      : feature_segments{feature_segments},\n        bin_segments{bin_segments.data()},\n        max_group_bins{max_group_bins} {}\n\n  common::Span<const bst_feature_t> feature_segments;\n  int const* bin_segments;\n  bst_bin_t max_group_bins;\n\n  /** @brief Gets the number of feature groups. */\n  XGBOOST_DEVICE int NumGroups() const { return feature_segments.size() - 1; }\n\n  /** @brief Gets the information about a feature group with index i. */\n  XGBOOST_DEVICE FeatureGroup operator[](bst_feature_t i) const {\n    auto p_fs = feature_segments.data();\n    return {p_fs[i], p_fs[i + 1] - p_fs[i], bin_segments[i], bin_segments[i + 1] - bin_segments[i]};\n  }\n  /** @brief The needed shared memory size for the largest group. */\n  [[nodiscard]] std::size_t ShmemSize() const {\n    return sizeof(GradientPairInt64) * this->max_group_bins;\n  }\n};\n\n/**\n * @brief FeatureGroups contains information that defines a split of features\n *   into groups. Bins of a single feature group typically fit into shared\n *   memory, so the histogram for the features of a single group can be computed\n *   faster.\n *\n * @note Known limitations:\n *\n *  - splitting features into groups currently works only for dense matrices,\n *    where it is easy to get a feature value in a row by its index; for sparse\n *    matrices, the structure contains only a single group containing all\n *    features;\n *\n *  - if a single feature requires more bins than fit into shared memory, the\n *    histogram is computed in global memory even if there are multiple feature\n *    groups; note that this is unlikely to occur in practice, as the default\n *    number of bins per feature is 256, whereas a thread block with 48 KiB\n *    shared memory can contain 3072 bins if each gradient sum component is a\n *     64-bit floating-point value (double)\n*/\nstruct FeatureGroups {\n  /** Group cuts for features. Size equals to (number of groups + 1). */\n  HostDeviceVector<bst_feature_t> feature_segments;\n  /** Group cuts for bins. Size equals to (number of groups + 1)  */\n  HostDeviceVector<int> bin_segments;\n  /** Maximum number of bins in a group. Useful to compute the amount of dynamic\n      shared memory when launching a kernel. */\n  int max_group_bins;\n\n  /**\n   * @brief Creates feature groups by splitting features into groups.\n   *\n   * @param cuts Histogram cuts that given the number of bins per feature.\n   * @param is_dense Whether the data matrix is dense.\n   * @param shm_size Available size of shared memory per thread block (in bytes) used to\n   *  compute feature groups.\n   */\n  FeatureGroups(common::HistogramCuts const& cuts, bool is_dense, size_t shm_size);\n\n  /**\n   * @brief Creates a single feature group containing all features and bins.\n   *\n   * @notes This is used as a fallback for sparse matrices, and is also useful for\n   *        testing.\n   */\n  explicit FeatureGroups(const common::HistogramCuts& cuts) { this->InitSingle(cuts); }\n\n  [[nodiscard]] FeatureGroupsAccessor DeviceAccessor(DeviceOrd device) const {\n    feature_segments.SetDevice(device);\n    bin_segments.SetDevice(device);\n    return {feature_segments.ConstDeviceSpan(), bin_segments.ConstDeviceSpan(), max_group_bins};\n  }\n\nprivate:\n  void InitSingle(const common::HistogramCuts& cuts);\n};\n\n}  // namespace tree\n}  // namespace xgboost\n\n#endif  // FEATURE_GROUPS_CUH_\n"
  },
  {
    "path": "src/tree/gpu_hist/histogram.cu",
    "content": "/**\n * Copyright 2020-2026, XGBoost Contributors\n */\n#include <cstdint>          // uint32_t, int32_t\n#include <cuda/functional>  // for proclaim_copyable_arguments\n#include <memory>           // for unique_ptr\n\n#include \"../../collective/aggregator.h\"\n#include \"../../common/cuda_context.cuh\"  // for CUDAContext\n#include \"../../common/cuda_rt_utils.h\"   // for GetMpCnt\n#include \"../../common/device_helpers.cuh\"\n#include \"../../data/ellpack_page.cuh\"\n#include \"histogram.cuh\"\n#include \"row_partitioner.cuh\"\n#include \"xgboost/base.h\"\n\nnamespace xgboost::tree {\nnamespace {\ntemplate <typename IterT>\nXGBOOST_DEV_INLINE bst_idx_t IterIdx(EllpackAccessorImpl<IterT> const& matrix,\n                                     RowPartitioner::RowIndexT ridx, bst_feature_t fidx) {\n  // # Row index local to each batch\n  // ridx_local = ridx - base_rowid\n  // # Starting entry index for this row in the matrix\n  // entry_idx = ridx_local * row_stride\n  // # Inside a row, first column inside this feature group\n  // entry_idx += start_feature\n  // # The feature index local to the current feature group\n  // idx - ridx * feature_stride == idx % feature_stride\n  // # Final index\n  // entry_idx += idx % feature_stride\n  return (ridx - matrix.base_rowid) * matrix.row_stride + fidx;\n}\n}  // anonymous namespace\n\nXGBOOST_DEV_INLINE void AtomicAddGpairShared(xgboost::GradientPairInt64* dest,\n                                             xgboost::GradientPairInt64 const& gpair) {\n  auto dst_ptr = reinterpret_cast<int64_t*>(dest);\n  auto g = gpair.GetQuantisedGrad();\n  auto h = gpair.GetQuantisedHess();\n\n  AtomicAdd64As32(dst_ptr, g);\n  AtomicAdd64As32(dst_ptr + 1, h);\n}\n\n// Global 64 bit integer atomics at the time of writing do not benefit from being separated into two\n// 32 bit atomics\nXGBOOST_DEV_INLINE void AtomicAddGpairGlobal(xgboost::GradientPairInt64* dest,\n                                             xgboost::GradientPairInt64 const& gpair) {\n  auto dst_ptr = reinterpret_cast<uint64_t*>(dest);\n  auto g = gpair.GetQuantisedGrad();\n  auto h = gpair.GetQuantisedHess();\n\n  atomicAdd(dst_ptr, *reinterpret_cast<uint64_t*>(&g));\n  atomicAdd(dst_ptr + 1, *reinterpret_cast<uint64_t*>(&h));\n}\n\ntemplate <std::int32_t BlockThreads, std::int32_t MinBlocks>\nstruct HistTuning {\n  static constexpr std::int32_t kBlockThreads = BlockThreads;\n  static constexpr std::int32_t kMinBlocks = MinBlocks;\n};\n\nnamespace {\nconstexpr std::int32_t kItemsPerThread = 8;\n\n// https://docs.nvidia.com/cuda/cuda-c-programming-guide/#feature-set-compiler-targets\n// Technical Specifications                  7.5  | 8.0  | 8.6  8.7 | 8.9 | 9.0 10.0 | 11.0 12.0\n// Maximum number of resident blocks per SM  16   | 32   | 16       | 24  | 32       | 24\n// Maximum number of resident warps per SM   32   | 64   | 48             | 64       | 48\n// Maximum number of resident threads per SM 1024 | 2048 | 1536           | 2048     | 1536\n\nusing HistSm75 = HistTuning<1024, 1>;\n\nusing HistSm80 = HistTuning<1024, 2>;\n\nusing HistSm86 = HistTuning<768, 2>;\n\nusing HistSm90 = HistTuning<1024, 2>;\n\nusing HistSm110 = HistTuning<768, 2>;\n\n// Multi-target launch bounds\n#if __CUDA_ARCH__ >= 1100\nusing MtHistBound = HistSm110;\n#elif __CUDA_ARCH__ >= 900\nusing MtHistBound = HistSm90;\n#elif __CUDA_ARCH__ >= 860\nusing MtHistBound = HistSm86;\n#elif __CUDA_ARCH__ >= 800\nusing MtHistBound = HistSm80;\n#else\nusing MtHistBound = HistSm75;\n#endif\n\n// Single-target launch bounds\n// Maximize the number of threads instead of tuning for occupancy for single target.\nusing StHistBound = HistSm75;\n\ntemplate <typename HistArchPolicy, std::int32_t ItemsPerThread, bool Dense, bool Compressed,\n          bool SharedMem>\nstruct HistPolicy : public HistArchPolicy {\n  static constexpr std::int32_t kItemsPerThread = ItemsPerThread;\n  static constexpr std::int32_t kTileSize = HistArchPolicy::kBlockThreads * ItemsPerThread;\n  static constexpr bool kDense = Dense;\n  static constexpr bool kCompressed = Compressed;\n  static constexpr bool kSharedMem = SharedMem;\n};\n\ntemplate <typename Fn>\nvoid DispatchCudaSm(std::int32_t device, Fn&& fn) {\n  std::int32_t version = 0;\n  dh::safe_cuda(cub::SmVersion(version, device));\n  if (version >= 1100) {\n    fn(HistSm110{});\n  } else if (version >= 900) {\n    fn(HistSm90{});\n  } else if (version >= 860) {\n    fn(HistSm86{});\n  } else if (version >= 800) {\n    fn(HistSm80{});\n  } else {\n    fn(HistSm75{});\n  }\n}\n\n__device__ GradientPairInt64 LoadGpair(GradientPairInt64 const* XGBOOST_RESTRICT gpairs) {\n  static_assert(sizeof(int4) == sizeof(GradientPairInt64));\n  auto g = *reinterpret_cast<int4 const*>(gpairs);\n  return *reinterpret_cast<GradientPairInt64*>(&g);\n}\n\n// Build the histogram for a single target in a single node.\ntemplate <typename Policy, typename Accessor, typename RidxIterSpan>\n__device__ void HistKernelOneNodeTarget(Accessor const& matrix, FeatureGroup const& group,\n                                        RidxIterSpan d_ridx_iter, GradientPairInt64 const* gpair,\n                                        GradientPairInt64* smem_hist, GradientPairInt64* gmem_hist,\n                                        bst_idx_t offset, std::uint32_t stride) {\n  bst_feature_t const feature_stride = Policy::kCompressed ? group.num_features : matrix.row_stride;\n\n  using Idx = RowPartitioner::RowIndexT;\n\n  Idx const ridx_size = d_ridx_iter.size();\n  auto const d_ridx = d_ridx_iter.data();\n\n  if constexpr (Policy::kSharedMem) {\n    dh::BlockFill(smem_hist, group.num_bins, GradientPairInt64{});\n    __syncthreads();\n  }\n\n  auto atomic_add = [&](auto bin_idx, auto const& adjusted) {\n    if constexpr (Policy::kSharedMem) {\n      AtomicAddGpairShared(smem_hist + bin_idx, adjusted);\n    } else {\n      // gmem_hist is a subspan for the current target.\n      AtomicAddGpairGlobal(gmem_hist + bin_idx, adjusted);\n    }\n  };\n\n  auto process_valid_tile = [&](auto idx) {\n    // unrolled version unravel to save registers:\n    // auto [ridx, fidx] = unravel_index(idx, (n_rows, feature_stride));\n    //\n    // ridx_in_set: Index into the row batch\n    // fidx_in_set: Index into the feature group\n    Idx ridx_in_set = idx / feature_stride;\n    Idx fidx_in_set = idx - ridx_in_set * feature_stride;\n\n    Idx ridx = d_ridx[ridx_in_set];\n    auto fidx = fidx_in_set + group.start_feature;\n\n    bst_bin_t compressed_bin = matrix.gidx_iter[IterIdx(matrix, ridx, fidx)];\n    if (Policy::kDense || compressed_bin != matrix.NullValue()) {\n      auto g = LoadGpair(gpair + ridx);\n      if constexpr (Policy::kCompressed) {\n        compressed_bin += matrix.feature_segments[fidx];\n      }\n      if constexpr (Policy::kSharedMem) {\n        compressed_bin -= group.start_bin;\n      }\n      atomic_add(compressed_bin, g);\n    }\n  };\n\n  // The number of elements for this grid to process\n  bst_idx_t const n_elements = static_cast<std::size_t>(ridx_size) * feature_stride;\n\n  auto process_gpair_tile = [&](auto full_tile, auto offset) {\n#pragma unroll 1\n    for (std::int32_t j = 0; j < Policy::kItemsPerThread; ++j) {\n      bst_idx_t const idx = offset + j * Policy::kBlockThreads + threadIdx.x;\n      if (full_tile || idx < n_elements) {\n        process_valid_tile(idx);\n      }\n    }\n  };\n\n  while (offset < n_elements) {\n    std::int32_t const valid_items =\n        cuda::std::min(n_elements - offset, static_cast<bst_idx_t>(Policy::kTileSize));\n    if (Policy::kTileSize == valid_items) {\n      process_gpair_tile(std::true_type{}, offset);\n    } else {\n      process_gpair_tile(std::false_type{}, offset);\n    }\n    offset += stride;\n  }\n\n  if constexpr (!Policy::kSharedMem) {\n    return;\n  }\n\n  // Write shared memory back to global memory\n  __syncthreads();\n\n  for (auto bin_idx : dh::BlockStrideRange(0, group.num_bins)) {\n    AtomicAddGpairGlobal(gmem_hist + group.start_bin + bin_idx, smem_hist[bin_idx]);\n  }\n}\n}  // namespace\n\n/**\n * @brief Kernel for the single-target histogram.\n */\ntemplate <typename Policy, typename Accessor>\n__global__ __launch_bounds__(StHistBound::kBlockThreads, StHistBound::kMinBlocks) void StHistKernel(\n    Accessor const matrix, FeatureGroupsAccessor const feature_groups,\n    common::Span<cuda_impl::RowIndexT const> d_ridx_iter,\n    common::Span<GradientPairInt64 const> d_gpair, common::Span<GradientPairInt64> node_hist) {\n  extern __align__(cuda::std::alignment_of_v<GradientPairInt64>) __shared__ char shmem[];\n\n  // Privatized histogram\n  auto smem_hist = reinterpret_cast<GradientPairInt64*>(shmem);\n\n  // Offset of the first grid\n  bst_idx_t offset = blockIdx.x * Policy::kTileSize;\n  // Grid-strided loop\n  auto const kStride = Policy::kTileSize * gridDim.x;\n\n  FeatureGroup group = feature_groups[blockIdx.y];\n\n  HistKernelOneNodeTarget<Policy>(matrix, group, d_ridx_iter, d_gpair.data(), smem_hist,\n                                  node_hist.data(), offset, kStride);\n}\n\n/**\n * @brief Kernel for the multi-target histogram.\n *\n * @param matrix         An ellpack accessor.\n * @param feature_groups Grouping for privatized histogram.\n * @param d_ridx_iters   Pointer to row index spans. One span per node.\n * @param blk_ptr        Indptr for mapping blockIdx.x to nidx_in_set.\n */\ntemplate <typename Policy, typename Accessor, typename RidxIterSpan>\n__global__ __launch_bounds__(MtHistBound::kBlockThreads, MtHistBound::kMinBlocks) void MtHistKernel(\n    Accessor const matrix, FeatureGroupsAccessor const feature_groups, RidxIterSpan* d_ridx_iters,\n    common::Span<std::uint32_t const> blk_ptr, common::Span<GradientPairInt64>* node_hists,\n    GradientPairInt64 const* d_gpair, bst_idx_t n_samples, bst_target_t n_targets) {\n  using Idx = RowPartitioner::RowIndexT;\n\n  // Find the node for this block.\n  auto const* XGBOOST_RESTRICT p_blk_ptr = blk_ptr.data();\n  Idx nidx_in_set = dh::SegmentId(p_blk_ptr, p_blk_ptr + blk_ptr.size(), blockIdx.x);\n  Idx starting_blk = p_blk_ptr[nidx_in_set];\n\n  extern __align__(cuda::std::alignment_of_v<GradientPairInt64>) __shared__ char shmem[];\n\n  // Privatized histogram\n  auto smem_hist = reinterpret_cast<GradientPairInt64*>(shmem);\n  auto d_node_hist = node_hists + nidx_in_set;\n  auto const n_bins_per_target = d_node_hist->size() / n_targets;\n\n  // The number of blocks in this sub-grid\n  auto n_blks = p_blk_ptr[nidx_in_set + 1] - starting_blk;\n  auto blkid_in_set = blockIdx.x - starting_blk;\n\n  // unravel_index(blkdid_in_set, {n_blocks_one_node_target, n_targets})\n  auto blkid_for_node = blkid_in_set / n_targets;\n  bst_target_t target_idx = blkid_in_set - blkid_for_node * n_targets;\n\n  // Offset of the first grid\n  bst_idx_t offset = blkid_for_node * Policy::kTileSize;\n  // Grid-strided loop\n  auto const kStride = Policy::kTileSize * (n_blks / n_targets);\n\n  FeatureGroup group = feature_groups[blockIdx.y];\n\n  // With a target-major layout, we don't have to pack the histogram for all targets into\n  // the shared memory. Since we launch one block for each target, the histogram index can\n  // be shared in L2.\n  auto gmem_hist = d_node_hist->data() + target_idx * n_bins_per_target;\n  d_gpair = d_gpair + n_samples * target_idx;\n\n  HistKernelOneNodeTarget<Policy>(matrix, group, d_ridx_iters[nidx_in_set], d_gpair, smem_hist,\n                                  gmem_hist, offset, kStride);\n}\n\n// Dispatcher for the histogram kernel.\nstruct HistKernel {\n  /**\n   * @brief Partition the grid into sub-grid for nodes.\n   *\n   * @param sizes_csum          cumulative sum of node sizes (csum of n_samples for each node).\n   * @param columns_per_group   Estimated number of columns for each feature group.\n   * @param max_blocks_per_node The maximum sub-grid size for a node.\n   * @param p_out_blocks        The total number of blocks (grid size).\n   */\n  template <typename Policy>\n  static auto AllocateBlocks(std::vector<std::size_t> const& sizes_csum,\n                             std::int32_t columns_per_group, std::size_t max_blocks_per_node,\n                             bst_target_t n_targets, std::uint32_t* p_out_blocks) {\n    CHECK_GT(max_blocks_per_node, 0);\n    std::vector<std::uint32_t> blk_ptr{0};\n    bst_idx_t n_total_blocks = 0;\n    for (std::size_t j = 1; j < sizes_csum.size(); ++j) {\n      auto nidx_in_set = j - 1;\n      auto n_samples = sizes_csum[j] - sizes_csum[j - 1];\n      std::size_t items_per_group = n_samples * columns_per_group;\n      auto n_blocks = common::DivRoundUp(items_per_group, Policy::kTileSize);\n      CHECK_GT(n_blocks, 0);  // at least one block for each node.\n      n_blocks = std::min(n_blocks, max_blocks_per_node) * n_targets;\n      blk_ptr.push_back(blk_ptr[nidx_in_set] + n_blocks);\n      n_total_blocks += n_blocks;\n    }\n    // check overflow\n    CHECK_EQ(n_total_blocks, blk_ptr.back());\n    *p_out_blocks = blk_ptr.back();\n    return dh::device_vector<std::uint32_t>{blk_ptr};\n  }\n\n  struct HistKernelConfig {\n    std::int32_t n_blocks_per_mp = 0;\n    std::size_t shmem_bytes = 0;\n\n    template <typename Policy, typename Kernel>\n    void Reset(std::size_t new_shmem_bytes, Kernel* kernel, Policy, std::size_t max_shared_bytes) {\n      if (new_shmem_bytes > 0) {\n        // This function is the reason for all this trouble to cache the\n        // configuration. It blocks the device.\n        //\n        // Also, it must precede the `cudaOccupancyMaxActiveBlocksPerMultiprocessor`,\n        // otherwise the shmem bytes might be invalid.\n        dh::safe_cuda(cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize,\n                                           max_shared_bytes));\n      }\n      if (new_shmem_bytes > this->shmem_bytes) {\n        this->shmem_bytes = new_shmem_bytes;\n      }\n      // Use this as a limiter, works for root node. Not too bad an option for child nodes.\n      dh::safe_cuda(cudaOccupancyMaxActiveBlocksPerMultiprocessor(\n          &this->n_blocks_per_mp, kernel, Policy::kBlockThreads, shmem_bytes));\n    }\n  };\n\n  // Maps kernel instantiations to their configurations. This is a mutable state, as a\n  // result the histogram kernel is not thread safe.\n  std::map<void*, HistKernelConfig> cfg;\n  // The number of multi-processor for the selected GPU\n  std::int32_t const n_mps;\n  // Maximum size of the shared memory (optin)\n  std::size_t const max_shared_bytes;\n  // Use global memory for testing\n  bool const force_global;\n\n  template <typename Policy, typename Kernel>\n  void SetCfg(Policy policy, std::size_t shmem_bytes, Kernel kernel) {\n    auto it = this->cfg.find(reinterpret_cast<void*>(kernel));\n\n    HistKernelConfig v;\n    if (it == cfg.cend()) {\n      v.Reset(shmem_bytes, kernel, policy, max_shared_bytes);\n      this->cfg[reinterpret_cast<void*>(kernel)] = v;\n    }\n  }\n\n  explicit HistKernel(Context const* ctx, bool force_global)\n      : n_mps{curt::GetMpCnt(ctx->Ordinal())},\n        max_shared_bytes{dh::MaxSharedMemoryOptin(ctx->Ordinal())},\n        force_global{force_global} {}\n\n  // Single target\n  template <bool kDense, bool kCompressed, typename Accessor>\n  void DispatchHistShmem(Context const* ctx, Accessor const& matrix,\n                         FeatureGroupsAccessor const& feature_groups,\n                         common::Span<GradientPairInt64 const> gpair,\n                         common::Span<cuda_impl::RowIndexT const> ridx,\n                         common::Span<GradientPairInt64> hist) {\n    std::size_t shmem_bytes = feature_groups.ShmemSize();\n    bool use_shared = !this->force_global && shmem_bytes <= this->max_shared_bytes;\n    shmem_bytes = use_shared ? shmem_bytes : 0;\n\n    auto launch = [&](auto policy, auto kernel) {\n      auto const& v = this->cfg.at(reinterpret_cast<void*>(kernel));\n      using Policy = common::GetValueT<decltype(policy)>;\n      int columns_per_group = common::DivRoundUp(matrix.row_stride, feature_groups.NumGroups());\n      CHECK_GT(v.n_blocks_per_mp, 0);\n      std::size_t items_per_group = ridx.size() * columns_per_group;\n      std::uint32_t n_blocks =\n          std::min(static_cast<cuda_impl::RowIndexT>(v.n_blocks_per_mp * this->n_mps),\n                   static_cast<cuda_impl::RowIndexT>(\n                       common::DivRoundUp(items_per_group, Policy::kTileSize)));\n      dim3 conf(n_blocks, feature_groups.NumGroups());\n      dh::LaunchKernel(conf, Policy::kBlockThreads, shmem_bytes, ctx->CUDACtx()->Stream())(\n          kernel, matrix, feature_groups, ridx, gpair, hist);\n      dh::safe_cuda(cudaPeekAtLastError());\n    };\n    using Arch = StHistBound;\n\n    if (use_shared) {\n      using Policy = HistPolicy<Arch, kItemsPerThread, kDense, kCompressed, true>;\n      auto kernel = StHistKernel<Policy, Accessor>;\n      this->SetCfg(Policy{}, shmem_bytes, kernel);\n      launch(Policy{}, kernel);\n    } else {\n      using Policy = HistPolicy<Arch, kItemsPerThread, kDense, kCompressed, false>;\n      auto kernel = StHistKernel<Policy, Accessor>;\n      this->SetCfg(Policy{}, shmem_bytes, kernel);\n      launch(Policy{}, kernel);\n    }\n  }\n  // Vector leaf\n  template <bool kDense, bool kCompressed, typename Accessor, typename RidxIterSpan>\n  void DispatchHistShmem(Context const* ctx, Accessor const& matrix,\n                         FeatureGroupsAccessor const& feature_groups,\n                         linalg::MatrixView<GradientPairInt64 const> gpair,\n                         RidxIterSpan* ridx_iters,\n                         common::Span<common::Span<GradientPairInt64>> hists,\n                         std::vector<std::size_t> const& h_sizes_csum) {\n    CHECK(gpair.FContiguous());\n    auto n_samples = gpair.Shape(0);\n    auto n_targets = gpair.Shape(1);\n    auto d_gpair = gpair.Values().data();\n\n    std::size_t shmem_bytes = feature_groups.ShmemSize();\n    bool use_shared = !force_global && shmem_bytes <= this->max_shared_bytes;\n    shmem_bytes = use_shared ? shmem_bytes : 0;\n\n    auto launch = [&](auto policy, auto kernel) {\n      auto const& v = this->cfg.at(reinterpret_cast<void*>(kernel));\n      using Policy = common::GetValueT<decltype(policy)>;\n      int columns_per_group = common::DivRoundUp(matrix.row_stride, feature_groups.NumGroups());\n      CHECK_GT(v.n_blocks_per_mp, 0);\n      std::uint32_t n_blocks = 0;\n      auto blk_ptr = AllocateBlocks<Policy>(h_sizes_csum, columns_per_group,\n                                            v.n_blocks_per_mp * n_mps, n_targets, &n_blocks);\n      CHECK_GE(n_blocks, hists.size());\n      dim3 conf(n_blocks, feature_groups.NumGroups());\n      dh::LaunchKernel(conf, Policy::kBlockThreads, shmem_bytes, ctx->CUDACtx()->Stream())(\n          kernel, matrix, feature_groups, ridx_iters, dh::ToSpan(blk_ptr), hists.data(), d_gpair,\n          n_samples, n_targets);\n      dh::safe_cuda(cudaPeekAtLastError());\n    };\n\n    CHECK(gpair.FContiguous());\n    if (use_shared) {\n      DispatchCudaSm(ctx->Ordinal(), [&](auto arch) {\n        using Arch = common::GetValueT<decltype(arch)>;\n        using Policy = HistPolicy<Arch, kItemsPerThread, kDense, kCompressed, true>;\n        auto kernel = MtHistKernel<Policy, Accessor, RidxIterSpan>;\n        this->SetCfg(Policy{}, shmem_bytes, kernel);\n        launch(Policy{}, kernel);\n      });\n    } else {\n      DispatchCudaSm(ctx->Ordinal(), [&](auto arch) {\n        using Arch = common::GetValueT<decltype(arch)>;\n        using Policy = HistPolicy<Arch, kItemsPerThread, kDense, kCompressed, false>;\n        auto kernel = MtHistKernel<Policy, Accessor, RidxIterSpan>;\n        this->SetCfg(Policy{}, shmem_bytes, kernel);\n        launch(Policy{}, kernel);\n      });\n    }\n  }\n\n  template <typename Accessor, typename... Args>\n  void DispatchHistCompress(Context const* ctx, Accessor const& matrix, Args&&... args) {\n    if (matrix.IsDense()) {\n      DispatchHistShmem<true, true>(ctx, matrix, std::forward<Args>(args)...);\n    } else if (matrix.IsDenseCompressed()) {\n      DispatchHistShmem<false, true>(ctx, matrix, std::forward<Args>(args)...);\n    } else {\n      DispatchHistShmem<false, false>(ctx, matrix, std::forward<Args>(args)...);\n    }\n  }\n\n  template <typename... Args>\n  void Dispatch(Args&&... args) {\n    this->DispatchHistCompress(std::forward<Args>(args)...);\n  }\n};\n\ntemplate <typename Accessor>\nclass DeviceHistogramDispatchAccessor {\n  std::unique_ptr<HistKernel> kernel_{nullptr};\n\n public:\n  void Reset(Context const* ctx, bool force_global_memory) {\n    this->kernel_ = std::make_unique<HistKernel>(ctx, force_global_memory);\n  }\n\n  void BuildHistogram(Context const* ctx, Accessor const& matrix,\n                      FeatureGroupsAccessor const& feature_groups,\n                      common::Span<GradientPairInt64 const> gpair,\n                      common::Span<cuda_impl::RowIndexT const> ridx,\n                      common::Span<GradientPairInt64> hist) {\n    this->kernel_->Dispatch(ctx, matrix, feature_groups, gpair, ridx, hist);\n  }\n\n  void BuildHistogram(Context const* ctx, Accessor const& matrix,\n                      FeatureGroupsAccessor const& feature_groups,\n                      linalg::MatrixView<GradientPairInt64 const> gpair,\n                      common::Span<common::Span<cuda_impl::RowIndexT const>> ridxs,\n                      common::Span<common::Span<GradientPairInt64>> hists,\n                      std::vector<std::size_t> const& h_sizes_csum) {\n    std::size_t n_total_samples = h_sizes_csum.back();\n    if (ridxs.size() == 1 && n_total_samples == matrix.n_rows) {\n      // Special optimization for the root node.\n      using RidxIter = thrust::counting_iterator<cuda_impl::RowIndexT>;\n      CHECK_LT(matrix.base_rowid, std::numeric_limits<cuda_impl::RowIndexT>::max());\n      auto iter = common::IterSpan{\n          thrust::make_counting_iterator(static_cast<cuda_impl::RowIndexT>(matrix.base_rowid)),\n          matrix.n_rows};\n      dh::caching_device_vector<common::IterSpan<RidxIter>> ridx_iters(hists.size(), iter);\n      this->kernel_->Dispatch(ctx, matrix, feature_groups, gpair, ridx_iters.data().get(), hists,\n                              h_sizes_csum);\n    } else {\n      using RidxIter = cuda_impl::RowIndexT const;\n      this->kernel_->Dispatch(ctx, matrix, feature_groups, gpair, ridxs.data(), hists,\n                              h_sizes_csum);\n    }\n  }\n};\n\n// Dispatch between single buffer accessor and double buffer accessor.\nstruct DeviceHistogramBuilderImpl {\n  DeviceHistogramDispatchAccessor<EllpackDeviceAccessor> simpl;\n  DeviceHistogramDispatchAccessor<DoubleEllpackAccessor> dimpl;\n\n  template <typename... Args>\n  void Reset(Args&&... args) {\n    this->simpl.Reset(std::forward<Args>(args)...);\n    this->dimpl.Reset(std::forward<Args>(args)...);\n  }\n\n  template <typename Accessor, typename... Args>\n  void BuildHistogram(Context const* ctx, Accessor const& matrix, Args&&... args) {\n    if constexpr (std::is_same_v<Accessor, EllpackDeviceAccessor>) {\n      this->simpl.BuildHistogram(ctx, matrix, std::forward<Args>(args)...);\n    } else {\n      static_assert(std::is_same_v<Accessor, DoubleEllpackAccessor>);\n      this->dimpl.BuildHistogram(ctx, matrix, std::forward<Args>(args)...);\n    }\n  }\n};\n\nDeviceHistogramBuilder::DeviceHistogramBuilder()\n    : p_impl_{std::make_unique<DeviceHistogramBuilderImpl>()} {\n  monitor_.Init(__func__);\n}\n\nDeviceHistogramBuilder::~DeviceHistogramBuilder() = default;\n\nvoid DeviceHistogramBuilder::Reset(Context const* ctx, std::size_t max_cached_hist_nodes,\n                                   bst_bin_t n_total_bins, bool force_global_memory) {\n  this->monitor_.Start(__func__);\n  this->p_impl_->Reset(ctx, force_global_memory);\n  this->hist_.Reset(ctx, n_total_bins, max_cached_hist_nodes);\n  this->monitor_.Stop(__func__);\n}\n\nvoid DeviceHistogramBuilder::BuildHistogram(Context const* ctx, EllpackAccessor const& matrix,\n                                            FeatureGroupsAccessor const& feature_groups,\n                                            common::Span<GradientPairInt64 const> gpair,\n                                            common::Span<cuda_impl::RowIndexT const> ridx,\n                                            common::Span<GradientPairInt64> histogram) {\n  this->monitor_.Start(__func__);\n  std::visit(\n      [&](auto&& matrix) {\n        this->p_impl_->BuildHistogram(ctx, matrix, feature_groups, gpair, ridx, histogram);\n      },\n      matrix);\n  this->monitor_.Stop(__func__);\n}\n\nvoid DeviceHistogramBuilder::BuildHistogram(\n    Context const* ctx, EllpackAccessor const& matrix, FeatureGroupsAccessor const& feature_groups,\n    linalg::MatrixView<GradientPairInt64 const> gpair,\n    common::Span<common::Span<cuda_impl::RowIndexT const>> ridxs,\n    common::Span<common::Span<GradientPairInt64>> hists,\n    std::vector<std::size_t> const& h_sizes_csum) {\n  std::visit(\n      [&](auto&& matrix) {\n        this->p_impl_->BuildHistogram(ctx, matrix, feature_groups, gpair, ridxs, hists,\n                                      h_sizes_csum);\n      },\n      matrix);\n}\n\nvoid DeviceHistogramBuilder::AllReduceHist(Context const* ctx, MetaInfo const& info,\n                                           bst_node_t nidx, std::size_t num_histograms) {\n  this->monitor_.Start(__func__);\n  auto d_node_hist = hist_.GetNodeHistogram(nidx);\n  using ReduceT = typename std::remove_pointer<decltype(d_node_hist.data())>::type::ValueT;\n  auto rc = collective::GlobalSum(\n      ctx, info,\n      linalg::MakeVec(reinterpret_cast<ReduceT*>(d_node_hist.data()),\n                      d_node_hist.size() * 2 * num_histograms, ctx->Device()));\n  SafeColl(rc);\n  this->monitor_.Stop(__func__);\n}\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "src/tree/gpu_hist/histogram.cuh",
    "content": "/**\n * Copyright 2020-2026, XGBoost Contributors\n */\n#pragma once\n\n#include <cstddef>  // for size_t\n#include <cstdint>  // for int32_t\n#include <memory>   // for unique_ptr\n\n#include \"../../common/cuda_context.cuh\"    // for CUDAContext\n#include \"../../common/device_helpers.cuh\"  // for LaunchN\n#include \"../../common/device_vector.cuh\"   // for device_vector\n#include \"../../data/ellpack_page.cuh\"      // for EllpackDeviceAccessor\n#include \"feature_groups.cuh\"               // for FeatureGroupsAccessor\n#include \"xgboost/base.h\"                   // for GradientPair, GradientPairInt64\n#include \"xgboost/context.h\"                // for Context\n#include \"xgboost/span.h\"                   // for Span\n\nnamespace xgboost::tree {\n// Single-target shared memory policy\n[[nodiscard]] inline std::size_t DftStHistShmemBytes(std::int32_t device) {\n  auto optin = dh::MaxSharedMemoryOptin(device);\n  return std::min(optin, std::size_t{96} * 1024);\n}\n\n// Multi-target shared memory policy\n[[nodiscard]] inline std::size_t DftMtHistShmemBytes(std::int32_t device) {\n  auto max_shared_optin = dh::MaxSharedMemoryOptin(device);\n  auto max_shared = dh::MaxSharedMemory(device);\n  // Use larger shared memory if available.\n  //\n  // By default, max_shared is 48 kB for most GPUs. Optin size varies between archs, some\n  // have large optin size, like the H200. We expand the shared memory size for those\n  // large devices.\n  constexpr std::size_t kThreshold = 4;\n  if (max_shared_optin > max_shared * kThreshold) {\n    return 2 * max_shared;\n  }\n  return max_shared;\n}\n\n/**\n * @brief An atomicAdd designed for gradient pair with better performance.  For general\n *        int64_t atomicAdd, one can simply cast it to unsigned long long. Exposed for testing.\n */\nXGBOOST_DEV_INLINE void AtomicAdd64As32(int64_t* dst, int64_t src) {\n  uint32_t* y_low = reinterpret_cast<uint32_t*>(dst);\n  uint32_t* y_high = y_low + 1;\n\n  auto cast_src = reinterpret_cast<uint64_t *>(&src);\n\n  uint32_t const x_low = static_cast<uint32_t>(src);\n  uint32_t const x_high = (*cast_src) >> 32;\n\n  auto const old = atomicAdd(y_low, x_low);\n  uint32_t const carry = old > (std::numeric_limits<uint32_t>::max() - x_low) ? 1 : 0;\n  uint32_t const sig = x_high + carry;\n  atomicAdd(y_high, sig);\n}\n\nnamespace cuda_impl {\n// Start with about 16mb\nstd::size_t constexpr DftReserveSize() { return 1 << 22; }\n}  // namespace cuda_impl\n\n/**\n * @brief Data storage for node histograms on device. Automatically expands.\n *\n * @author  Rory\n * @date    28/07/2018\n */\nclass DeviceHistogramStorage {\n private:\n  using GradientSumT = GradientPairInt64;\n  std::size_t stop_growing_size_{0};\n  /** @brief Map nidx to starting index of its histogram. */\n  std::map<int, size_t> nidx_map_;\n  // Large buffer of zeroed memory, caches histograms\n  dh::device_vector<typename GradientSumT::ValueT> data_;\n  // If we run out of storage allocate one histogram at a time in overflow. Not cached,\n  // overwritten when a new histogram is requested\n  dh::device_vector<typename GradientSumT::ValueT> overflow_;\n  std::map<int, size_t> overflow_nidx_map_;\n  // The total number of bins across all features and targets\n  bst_bin_t n_total_bins_;\n  static constexpr std::size_t kNumItemsInGradientSum =\n      sizeof(GradientSumT) / sizeof(typename GradientSumT::ValueT);\n  static_assert(kNumItemsInGradientSum == 2, \"Number of items in gradient type should be 2.\");\n\n public:\n  explicit DeviceHistogramStorage() { data_.reserve(cuda_impl::DftReserveSize()); }\n\n  void Reset(Context const* ctx, bst_bin_t n_total_bins, std::size_t max_cached_nodes) {\n    this->n_total_bins_ = n_total_bins;\n    auto d_data = data_.data().get();\n    dh::LaunchN(data_.size(), ctx->CUDACtx()->Stream(),\n                [=] __device__(size_t idx) { d_data[idx] = 0.0f; });\n    nidx_map_.clear();\n    overflow_nidx_map_.clear();\n\n    auto max_cached_bin_values =\n        static_cast<std::size_t>(n_total_bins) * max_cached_nodes * kNumItemsInGradientSum;\n    this->stop_growing_size_ = max_cached_bin_values;\n  }\n\n  [[nodiscard]] bool HistogramExists(bst_node_t nidx) const {\n    return nidx_map_.find(nidx) != nidx_map_.cend() ||\n           overflow_nidx_map_.find(nidx) != overflow_nidx_map_.cend();\n  }\n  [[nodiscard]] int Bins() const { return n_total_bins_; }\n  [[nodiscard]] size_t HistogramSize() const { return n_total_bins_ * kNumItemsInGradientSum; }\n  dh::device_vector<typename GradientSumT::ValueT>& Data() { return data_; }\n\n  void AllocateHistograms(Context const* ctx, std::vector<bst_node_t> const& new_nidxs) {\n    for (int nidx : new_nidxs) {\n      CHECK(!HistogramExists(nidx));\n    }\n    // Number of items currently used in data\n    const size_t used_size = nidx_map_.size() * HistogramSize();\n    const size_t new_used_size = used_size + HistogramSize() * new_nidxs.size();\n    CHECK_GE(this->stop_growing_size_, kNumItemsInGradientSum);\n    if (used_size >= this->stop_growing_size_) {\n      // Use overflow\n      // Delete previous entries\n      overflow_nidx_map_.clear();\n      overflow_.resize(HistogramSize() * new_nidxs.size());\n      // Zero memory\n      auto d_data = overflow_.data().get();\n      dh::LaunchN(overflow_.size(), ctx->CUDACtx()->Stream(),\n                  [=] __device__(size_t idx) { d_data[idx] = 0.0; });\n      // Append new histograms\n      for (int nidx : new_nidxs) {\n        overflow_nidx_map_[nidx] = overflow_nidx_map_.size() * HistogramSize();\n      }\n    } else {\n      CHECK_GE(data_.size(), used_size);\n      // Expand if necessary\n      if (data_.size() < new_used_size) {\n        data_.resize(std::max(data_.size() * 2, new_used_size));\n      }\n      // Append new histograms\n      for (int nidx : new_nidxs) {\n        nidx_map_[nidx] = nidx_map_.size() * HistogramSize();\n      }\n    }\n\n    CHECK_GE(data_.size(), nidx_map_.size() * HistogramSize());\n  }\n\n  /**\n   * \\summary   Return pointer to histogram memory for a given node.\n   * \\param nidx    Tree node index.\n   * \\return    hist pointer.\n   */\n  common::Span<GradientSumT> GetNodeHistogram(int nidx) {\n    CHECK(this->HistogramExists(nidx));\n\n    if (nidx_map_.find(nidx) != nidx_map_.cend()) {\n      // Fetch from normal cache\n      auto ptr = data_.data().get() + nidx_map_.at(nidx);\n      return {reinterpret_cast<GradientSumT*>(ptr), static_cast<std::size_t>(n_total_bins_)};\n    } else {\n      // Fetch from overflow\n      auto ptr = overflow_.data().get() + overflow_nidx_map_.at(nidx);\n      return {reinterpret_cast<GradientSumT*>(ptr), static_cast<std::size_t>(n_total_bins_)};\n    }\n  }\n};\n\nclass DeviceHistogramBuilderImpl;\n\nclass DeviceHistogramBuilder {\n  std::unique_ptr<DeviceHistogramBuilderImpl> p_impl_;\n  DeviceHistogramStorage hist_;\n  common::Monitor monitor_;\n\n public:\n  explicit DeviceHistogramBuilder();\n  ~DeviceHistogramBuilder();\n  // TODO(jiamingy): use a type larger than bst_bin_t since we need to support multi-target.\n  void Reset(Context const* ctx, std::size_t max_cached_hist_nodes, bst_bin_t n_total_bins,\n             bool force_global_memory);\n  // Build histogram for single target and single node.\n  void BuildHistogram(Context const* ctx, EllpackAccessor const& matrix,\n                      FeatureGroupsAccessor const& feature_groups,\n                      common::Span<GradientPairInt64 const> gpair,\n                      common::Span<std::uint32_t const> ridx,\n                      common::Span<GradientPairInt64> histogram);\n  // Build histograms for multiple nodes and multiple targets\n  void BuildHistogram(Context const* ctx, EllpackAccessor const& matrix,\n                      FeatureGroupsAccessor const& feature_groups,\n                      linalg::MatrixView<GradientPairInt64 const> gpair,\n                      common::Span<common::Span<const std::uint32_t>> ridxs,\n                      common::Span<common::Span<GradientPairInt64>> hists,\n                      std::vector<std::size_t> const& h_sizes_csum);\n\n  [[nodiscard]] auto GetNodeHistogram(bst_node_t nidx) { return hist_.GetNodeHistogram(nidx); }\n\n  // num histograms is the number of contiguous histograms in memory to reduce over\n  void AllReduceHist(Context const* ctx, MetaInfo const& info, bst_node_t nidx,\n                     std::size_t num_histograms);\n\n  [[nodiscard]] bool CanSubtract(bst_node_t nidx_parent, bst_node_t nidx_histogram) const {\n    return hist_.HistogramExists(nidx_parent) && hist_.HistogramExists(nidx_histogram);\n  }\n  // Attempt to do subtraction trick\n  // return true if succeeded\n  [[nodiscard]] bool SubtractionTrick(Context const* ctx, bst_node_t nidx_parent,\n                                      bst_node_t nidx_histogram, bst_node_t nidx_subtraction) {\n    if (!this->CanSubtract(nidx_parent, nidx_histogram)) {\n      return false;\n    }\n    auto d_node_hist_parent = hist_.GetNodeHistogram(nidx_parent);\n    auto d_node_hist_histogram = hist_.GetNodeHistogram(nidx_histogram);\n    auto d_node_hist_subtraction = hist_.GetNodeHistogram(nidx_subtraction);\n\n    dh::LaunchN(d_node_hist_parent.size(), ctx->CUDACtx()->Stream(), [=] __device__(size_t idx) {\n      d_node_hist_subtraction[idx] = d_node_hist_parent[idx] - d_node_hist_histogram[idx];\n    });\n    return true;\n  }\n\n  template <typename ExpandEntry>\n  [[nodiscard]] auto SubtractHist(Context const* ctx, std::vector<ExpandEntry> const& candidates,\n                                  std::vector<bst_node_t> const& build_nidx,\n                                  std::vector<bst_node_t> const& subtraction_nidx) {\n    this->monitor_.Start(__func__);\n    std::vector<bst_node_t> need_build;\n    for (std::size_t i = 0; i < subtraction_nidx.size(); i++) {\n      auto build_hist_nidx = build_nidx.at(i);\n      auto subtraction_trick_nidx = subtraction_nidx.at(i);\n      auto parent_nidx = candidates.at(i).nidx;\n\n      if (!this->SubtractionTrick(ctx, parent_nidx, build_hist_nidx, subtraction_trick_nidx)) {\n        need_build.push_back(subtraction_trick_nidx);\n      }\n    }\n    this->monitor_.Stop(__func__);\n    return need_build;\n  }\n\n  void AllocateHistograms(Context const* ctx, std::vector<bst_node_t> const& nodes_to_build,\n                          std::vector<bst_node_t> const& nodes_to_sub) {\n    this->monitor_.Start(__func__);\n    std::vector<bst_node_t> all_new = nodes_to_build;\n    all_new.insert(all_new.end(), nodes_to_sub.cbegin(), nodes_to_sub.cend());\n    // Allocate the histograms\n    // Guaranteed contiguous memory\n    this->AllocateHistograms(ctx, all_new);\n    this->monitor_.Stop(__func__);\n  }\n\n  void AllocateHistograms(Context const* ctx, std::vector<int> const& new_nidxs) {\n    this->hist_.AllocateHistograms(ctx, new_nidxs);\n  }\n};\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "src/tree/gpu_hist/leaf_sum.cu",
    "content": "/**\n * Copyright 2025, XGBoost contributors\n */\n#include <thrust/scan.h>     // for inclusive_scan\n#include <thrust/version.h>  // for THRUST_MAJOR_VERSION\n\n#include <cstddef>                                 // for size_t\n#include <cstdint>                                 // for int32_t\n#include <cub/device/device_segmented_reduce.cuh>  // for DeviceSegmentedReduce\n#include <vector>                                  // for vector\n\n#include \"../updater_gpu_common.cuh\"  // for GPUTrainingParam\n#include \"leaf_sum.cuh\"\n#include \"quantiser.cuh\"        // for GradientQuantiser\n#include \"row_partitioner.cuh\"  // for RowIndexT, LeafInfo\n#include \"xgboost/base.h\"       // for GradientPairInt64\n#include \"xgboost/context.h\"    // for Context\n#include \"xgboost/linalg.h\"     // for MatrixView\n#include \"xgboost/span.h\"       // for Span\n\n#if THRUST_MAJOR_VERSION >= 3\n\n#if THRUST_MINOR_VERSION >= 3\n// thrust 3.3\n#include <cuda/iterator>  // for make_tabulate_output_iterator\n#else\n// thrust 3.2/3.1\n#include <thrust/iterator/tabulate_output_iterator.h>  // for make_tabulate_output_iterator\n\n#endif  // THRUST_MINOR_VERSION >= 3\n\n#else\n\n#include \"../../common/linalg_op.cuh\"  // for tbegin\n\n#endif  // THRUST_MAJOR_VERSION >= 3\n\nnamespace xgboost::tree::cuda_impl {\nvoid LeafGradSum(Context const* ctx, std::vector<LeafInfo> const& h_leaves,\n                 common::Span<GradientQuantiser const> roundings,\n                 common::Span<RowIndexT const> sorted_ridx,\n                 linalg::MatrixView<GradientPair const> grad,\n                 linalg::MatrixView<GradientPairInt64> out_sum) {\n  CHECK_EQ(h_leaves.size(), out_sum.Shape(0));\n\n  dh::device_vector<LeafInfo> leaves(h_leaves);\n  auto d_leaves = dh::ToSpan(leaves);\n\n  std::vector<RowIndexT> h_indptr{0};\n  for (auto const& node : h_leaves) {\n    h_indptr.push_back(node.node.segment.Size());\n  }\n  // leaves form a complete partition\n  dh::device_vector<RowIndexT> indptr{h_indptr};\n  thrust::inclusive_scan(ctx->CUDACtx()->CTP(), indptr.cbegin(), indptr.cend(), indptr.begin());\n  CHECK_EQ(roundings.size(), grad.Shape(1));\n  CHECK_EQ(roundings.size(), out_sum.Shape(1));\n  CHECK_EQ(out_sum.Shape(0), indptr.size() - 1);\n  CHECK_EQ(indptr.size(), h_leaves.size() + 1);\n  auto d_indptr = dh::ToSpan(indptr);\n\n  for (bst_target_t t = 0, n_targets = grad.Shape(1); t < n_targets; ++t) {\n    auto out_t = out_sum.Slice(linalg::All(), t);  // len == n_leaves\n    auto it = dh::MakeIndexTransformIter([=] XGBOOST_DEVICE(std::size_t i) {\n      auto nidx_in_set = dh::SegmentId(d_indptr, i);\n      // Index within segment\n      auto k = i - d_indptr[nidx_in_set];\n      // Global index (within a batch).\n      auto j = d_leaves[nidx_in_set].node.segment.begin + k;\n      // gradient\n      auto g = grad(sorted_ridx[j], t);\n      return roundings[t].ToFixedPoint(g);\n    });\n    // Use an output iterator to implement running sum. Old thrust versions either don't\n    // have this iterator, or unusable with segmented sum.\n#if THRUST_MAJOR_VERSION >= 3\n\n#if THRUST_MINOR_VERSION >= 2\n    auto out_it = cuda::make_tabulate_output_iterator(\n        [=] XGBOOST_DEVICE(std::int32_t idx, GradientPairInt64 v) mutable { out_t(idx) += v; });\n#else\n    auto out_it = thrust::make_tabulate_output_iterator(\n        [=] XGBOOST_DEVICE(std::int32_t idx, GradientPairInt64 v) mutable { out_t(idx) += v; });\n#endif\n\n#else\n    // Doesn't work with external memory.\n    auto out_it = linalg::tbegin(out_t);\n#endif\n\n    std::size_t n_bytes = 0;\n    dh::safe_cuda(cub::DeviceSegmentedReduce::Sum(nullptr, n_bytes, it, out_it, h_leaves.size(),\n                                                  indptr.data(), indptr.data() + 1,\n                                                  ctx->CUDACtx()->Stream()));\n    dh::TemporaryArray<char> alloc(n_bytes);\n    dh::safe_cuda(cub::DeviceSegmentedReduce::Sum(alloc.data().get(), n_bytes, it, out_it,\n                                                  h_leaves.size(), indptr.data(), indptr.data() + 1,\n                                                  ctx->CUDACtx()->Stream()));\n  }\n}\n\nvoid LeafWeight(Context const* ctx, GPUTrainingParam const& param,\n                common::Span<GradientQuantiser const> roundings,\n                linalg::MatrixView<GradientPairInt64 const> grad_sum,\n                linalg::MatrixView<float> out_weights) {\n  CHECK(grad_sum.Contiguous());\n  dh::LaunchN(grad_sum.Size(), ctx->CUDACtx()->Stream(), [=] XGBOOST_DEVICE(std::size_t i) mutable {\n    auto [nidx_in_set, t] = linalg::UnravelIndex(i, grad_sum.Shape());\n    auto g = roundings[t].ToFloatingPoint(grad_sum(nidx_in_set, t));\n    out_weights(nidx_in_set, t) = CalcWeight(param, g.GetGrad(), g.GetHess()) * param.learning_rate;\n  });\n}\n}  // namespace xgboost::tree::cuda_impl\n"
  },
  {
    "path": "src/tree/gpu_hist/leaf_sum.cuh",
    "content": "/**\n * Copyright 2025, XGBoost contributors\n */\n#pragma once\n\n#include <vector>  // for vector\n\n#include \"../updater_gpu_common.cuh\"  // for GPUTrainingParam\n#include \"quantiser.cuh\"              // for GradientQuantiser\n#include \"row_partitioner.cuh\"        // for RowIndexT, LeafInfo\n#include \"xgboost/context.h\"          // for Context\n#include \"xgboost/linalg.h\"           // for MatrixView\n#include \"xgboost/span.h\"             // for Span\n\nnamespace xgboost::tree::cuda_impl {\n/**\n * @brief Calculate gradient sum for leaf nodes based on row partitions.\n *\n *   shape(out_sum) == (n_leaves, n_targets)\n */\nvoid LeafGradSum(Context const* ctx, std::vector<LeafInfo> const& h_leaves,\n                 common::Span<GradientQuantiser const> roundings,\n                 common::Span<RowIndexT const> sorted_ridx,\n                 linalg::MatrixView<GradientPair const> grad,\n                 linalg::MatrixView<GradientPairInt64> out_sum);\n\n/**\n * @brief Calculate leaf weights from gradient sum.\n *\n *   shape(grad_sum) == (n_leaves, n_targets)\n *   shape(out_weights) == (n_leaves, n_targets)\n */\nvoid LeafWeight(Context const* ctx, GPUTrainingParam const& param,\n                common::Span<GradientQuantiser const> roundings,\n                linalg::MatrixView<GradientPairInt64 const> grad_sum,\n                linalg::MatrixView<float> out_weights);\n}  // namespace xgboost::tree::cuda_impl\n"
  },
  {
    "path": "src/tree/gpu_hist/multi_evaluate_splits.cu",
    "content": "/**\n * Copyright 2025-2026, XGBoost contributors\n */\n#include <thrust/reduce.h>  // for reduce_by_key, reduce\n\n#include <cub/block/block_scan.cuh>  // for BlockScan\n#include <cub/util_type.cuh>         // for KeyValuePair\n#include <cub/warp/warp_reduce.cuh>  // for WarpReduce\n#include <cuda/ptx>                  // for get_sreg_laneid\n#include <cuda/std/functional>       // for identity\n#include <limits>\n#include <vector>  // for vector\n\n#include \"../../common/cuda_context.cuh\"\n#include \"../tree_view.h\"             // for MultiTargetTreeView\n#include \"multi_evaluate_splits.cuh\"  // for MultiEvalauteSplitInputs, MultiEvaluateSplitSharedInputs\n#include \"quantiser.cuh\"              // for GradientQuantiser\n#include \"xgboost/base.h\"             // for GradientPairInt64\n#include \"xgboost/span.h\"             // for Span\n\nnamespace xgboost::tree::cuda_impl {\nnamespace {\n/**\n * @brief Calculate the gradient index for the reverse pass\n *\n * @note All inputs are global across features.\n */\n__device__ bst_bin_t RevBinIdx(bst_bin_t gidx_begin, bst_bin_t gidx_end, bst_bin_t bin_idx) {\n  return gidx_begin + (gidx_end - bin_idx - 1);\n}\n\n// Scan the histogram in 2 dim for all nodes\nstruct ScanHistogramAgent {\n  using WarpScanT = cub::WarpScan<GradientPairInt64>;\n\n  typename WarpScanT::TempStorage *tmp_storage;\n  bst_bin_t gidx_begin;\n  bst_bin_t gidx_end;\n  bst_target_t n_targets;\n\n  template <typename BinIndexFn>\n  __device__ void ScanFeature(GradientPairInt64 const *node_histogram,\n                              GradientPairInt64 *scan_result, bst_target_t t,\n                              BinIndexFn &&bin_idx_fn) {\n    auto lane_id = cuda::ptx::get_sreg_laneid();\n    // The forward pass and the backward pass differs in where the bin is read, which is\n    // specified by the callback bin_idx_fn(). They write to the same output location.\n    GradientPairInt64 warp_aggregate;\n    for (auto scan_begin = gidx_begin; scan_begin < gidx_end; scan_begin += dh::WarpThreads()) {\n      auto bin_idx = scan_begin + lane_id;\n      bool thread_active = bin_idx < gidx_end;\n      // Read from histogram: [target][bins]\n      auto bin = thread_active ? node_histogram[bin_idx_fn(bin_idx)] : GradientPairInt64{};\n      if (lane_id == 0) {\n        bin += warp_aggregate;\n      }\n      WarpScanT(*tmp_storage).InclusiveScan(bin, bin, cuda::std::plus{}, warp_aggregate);\n      // Required by the warp scan.\n      __syncwarp();\n      if (thread_active) {\n        // Write to scan result: [bins][targets]\n        // The layout is changed from target-major to bin-major here.\n        scan_result[bin_idx * n_targets + t] = bin;\n      }\n    }\n  }\n  // Forward scan pass\n  __device__ void Forward(GradientPairInt64 const *node_histogram,\n                          common::Span<GradientPairInt64> scan_result, bst_target_t t) {\n    this->ScanFeature(node_histogram, scan_result.data(), t, cuda::std::identity{});\n  }\n  // Backward scan pass for missing values\n  __device__ void Backward(GradientPairInt64 const *node_histogram,\n                           common::Span<GradientPairInt64> scan_result, bst_target_t t) {\n    this->ScanFeature(node_histogram, scan_result.data(), t,\n                      [&](bst_bin_t bin_idx) { return RevBinIdx(gidx_begin, gidx_end, bin_idx); });\n  }\n};\n}  // namespace\n\n// The scan kernel reads from target-major histogram layout and writes the bin-major scan\n// buffer. This helps us keep a reference to the bin in the split candidate.\ntemplate <std::int32_t kBlockThreads>\n__global__ __launch_bounds__(kBlockThreads) void ScanHistogramKernel(\n    common::Span<MultiEvaluateSplitInputs const> nodes, MultiEvaluateSplitSharedInputs shared,\n    common::Span<common::Span<GradientPairInt64>> outputs) {\n  static_assert(kBlockThreads % dh::WarpThreads() == 0);\n\n  constexpr std::int32_t kWarpsPerBlk = kBlockThreads / dh::WarpThreads();\n  auto const warp_id_in_blk = static_cast<std::int32_t>(threadIdx.x) / dh::WarpThreads();\n  // The warp index across the entire grid\n  auto const warp_id = warp_id_in_blk + kWarpsPerBlk * blockIdx.x;\n  bst_target_t const n_targets = shared.Targets();\n  auto const n_valid_warps = nodes.size() * shared.max_active_feature * n_targets;\n\n  if (warp_id >= n_valid_warps) {\n    return;\n  }\n\n  auto [nidx_in_set, fidx_in_set, target_idx] =\n      linalg::UnravelIndex(warp_id, nodes.size(), shared.max_active_feature, n_targets);\n  auto const &node = nodes[nidx_in_set];\n  auto out = outputs[nidx_in_set];\n  // This node might have a smaller number of sampled features.\n  if (fidx_in_set >= node.feature_set.size()) {\n    return;\n  }\n  auto fidx = node.feature_set[fidx_in_set];\n  // The histogram is full, regardless of whether a feature is sampled.\n  bst_bin_t gidx_begin = shared.feature_segments[fidx];\n  bst_bin_t gidx_end = shared.feature_segments[fidx + 1];\n\n  // Get total bins from feature_segments (last element)\n  bst_bin_t n_bins_per_target = shared.feature_segments.back();\n\n  using AgentT = ScanHistogramAgent;\n  __shared__ typename AgentT::WarpScanT::TempStorage tmp_storage[kWarpsPerBlk];\n  ScanHistogramAgent agent{&tmp_storage[warp_id_in_blk], gidx_begin, gidx_end, n_targets};\n  auto t_hist = node.histogram.subspan(n_bins_per_target * target_idx, n_bins_per_target);\n\n  if (shared.one_pass != MultiEvaluateSplitSharedInputs::kBackward) {\n    auto forward = out.subspan(0, node.histogram.size());\n    agent.Forward(t_hist.data(), forward, target_idx);\n  }\n  // TODO(jiamingy): Skip the backward pass if there's no missing value.\n  if (shared.one_pass != MultiEvaluateSplitSharedInputs::kForward) {\n    auto backward = out.subspan(node.histogram.size(), node.histogram.size());\n    agent.Backward(t_hist.data(), backward, target_idx);\n  }\n}\n\nnamespace {\nstruct EvaluateSplitAgent {\n  using ArgMaxT = cub::KeyValuePair<std::uint32_t, double>;\n  using MaxReduceT = cub::WarpReduce<ArgMaxT>;\n\n  typename MaxReduceT::TempStorage *temp_storage;\n  bst_feature_t fidx;\n\n  template <std::int32_t d_step>\n  __device__ void Numerical(MultiEvaluateSplitInputs const &node,\n                            MultiEvaluateSplitSharedInputs const &shared,\n                            common::Span<GradientPairInt64 const> node_scan,\n                            MultiSplitCandidate *best_split) {\n    static_assert(d_step == +1 || d_step == -1, \"Invalid step.\");\n    // Calculate split gain for each bin\n    auto n_targets = shared.Targets();\n    auto roundings = shared.roundings.data();\n    auto lane_id = cuda::ptx::get_sreg_laneid();\n\n    bst_bin_t gidx_begin = shared.feature_segments[fidx];\n    bst_bin_t gidx_end = shared.feature_segments[fidx + 1];\n\n    for (auto scan_begin = gidx_begin; scan_begin < gidx_end; scan_begin += dh::WarpThreads()) {\n      auto bin_idx = scan_begin + lane_id;\n      bool thread_active = bin_idx < gidx_end;\n\n      auto constexpr kNullGain = -std::numeric_limits<double>::infinity();\n      double gain = thread_active ? 0 : kNullGain;\n\n      if (thread_active) {\n        // Scan result layout: [bins][targets]\n        // bin_idx is the global bin index\n        auto scan_bin_offset = bin_idx * n_targets;\n        for (bst_target_t t = 0; t < n_targets; ++t) {\n          auto parent_sum = roundings[t].ToFloatingPoint(node.parent_sum[t]);\n          // left\n          auto left_sum = roundings[t].ToFloatingPoint(node_scan[scan_bin_offset + t]);\n          auto lw_t =\n              ::xgboost::tree::CalcWeight(shared.param, left_sum.GetGrad(), left_sum.GetHess());\n          // right\n          auto right_sum = parent_sum - left_sum;\n          auto rw_t =\n              ::xgboost::tree::CalcWeight(shared.param, right_sum.GetGrad(), right_sum.GetHess());\n\n          gain += -lw_t * ThresholdL1(left_sum.GetGrad(), shared.param.reg_alpha);\n          gain += -rw_t * ThresholdL1(right_sum.GetGrad(), shared.param.reg_alpha);\n        }\n      }\n\n      auto best = MaxReduceT(*temp_storage).Reduce({threadIdx.x, gain}, cub::ArgMax{});\n      auto best_thread = __shfl_sync(0xffffffff, best.key, 0);\n\n      if (threadIdx.x == best_thread && !isinf(gain)) {\n        // Update\n        bst_bin_t split_gidx = bin_idx;\n        if (d_step == -1) {\n          split_gidx = RevBinIdx(gidx_begin, gidx_end, bin_idx);\n        }\n        float fvalue;\n        if (d_step == +1) {\n          fvalue = shared.feature_values[split_gidx];\n        } else {\n          if (split_gidx == gidx_begin) {\n            fvalue = -std::numeric_limits<float>::infinity();\n          } else {\n            fvalue = shared.feature_values[split_gidx - 1];\n          }\n        }\n        // Scan result layout: [bins][targets] - all targets for this bin are contiguous\n        // bin_idx is the global bin index\n        auto scan_bin_offset = bin_idx * n_targets;\n        auto scan_bin = node_scan.subspan(scan_bin_offset, n_targets);\n        // Missing values go to right in the forward pass, go to left in the backward pass.\n        best_split->Update(gain, d_step == 1 ? kRightDir : kLeftDir, fvalue, fidx, scan_bin, false,\n                           shared.param, shared.roundings);\n      }\n\n      __syncwarp();\n    }\n  }\n};\n}  // namespace\n\n// Find the best split based on the scan result\n//\n// The scan buffer has a bin-major layout.\ntemplate <std::int32_t kBlockThreads>\n__global__ __launch_bounds__(kBlockThreads) void EvaluateSplitsKernel(\n    common::Span<MultiEvaluateSplitInputs const> nodes, MultiEvaluateSplitSharedInputs shared,\n    common::Span<common::Span<GradientPairInt64>> bin_scans,\n    common::Span<MultiSplitCandidate> out_candidates) {\n  static_assert(kBlockThreads % dh::WarpThreads() == 0);\n\n  constexpr std::int32_t kWarpsPerBlk = kBlockThreads / dh::WarpThreads();\n  auto const warp_id_in_blk = static_cast<std::int32_t>(threadIdx.x) / dh::WarpThreads();\n  // The warp index across the entire grid\n  auto const warp_id = warp_id_in_blk + kWarpsPerBlk * blockIdx.x;\n  auto const n_valid_warps = nodes.size() * shared.max_active_feature;\n\n  if (warp_id >= n_valid_warps) {\n    return;\n  }\n\n  using AgentT = EvaluateSplitAgent;\n  __shared__ typename AgentT::MaxReduceT::TempStorage temp_storage[kWarpsPerBlk];\n\n  const auto nidx = warp_id / shared.max_active_feature;\n  auto const &node = nodes[nidx];\n\n  bst_feature_t fidx_in_set = warp_id - (nidx * shared.max_active_feature);\n  // This node might have a smaller number of sampled features.\n  if (fidx_in_set >= node.feature_set.size()) {\n    return;\n  }\n  auto fidx = node.feature_set[fidx_in_set];\n  AgentT agent{&temp_storage[warp_id_in_blk], fidx};\n  // The number of candidates is allocated using active features\n  auto candidate_idx = nidx * shared.max_active_feature + fidx_in_set;\n\n  if (shared.one_pass != MultiEvaluateSplitSharedInputs::kBackward) {\n    auto forward = bin_scans[nidx].subspan(0, node.histogram.size());\n    agent.template Numerical<+1>(node, shared, forward, &out_candidates[candidate_idx]);\n  }\n  if (shared.one_pass != MultiEvaluateSplitSharedInputs::kForward) {\n    auto backward = bin_scans[nidx].subspan(node.histogram.size(), node.histogram.size());\n    agent.template Numerical<-1>(node, shared, backward, &out_candidates[candidate_idx]);\n  }\n}\n\n[[nodiscard]] MultiExpandEntry MultiHistEvaluator::EvaluateSingleSplit(\n    Context const *ctx, MultiEvaluateSplitInputs const &input,\n    MultiEvaluateSplitSharedInputs const &shared_inputs) {\n  dh::device_vector<MultiEvaluateSplitInputs> inputs{input};\n  dh::device_vector<MultiExpandEntry> outputs(1);\n\n  auto d_outputs = dh::ToSpan(outputs);\n  this->EvaluateSplits(ctx, dh::ToSpan(inputs), shared_inputs, input.nidx, d_outputs);\n\n  // The `EvaluateSplits` apply eta for leaf nodes only, we need to apply it for the base\n  // weight.\n  auto n_targets = shared_inputs.Targets();\n  dh::LaunchN(n_targets, ctx->CUDACtx()->Stream(), [=] XGBOOST_DEVICE(std::size_t t) {\n    auto weight = d_outputs[0].base_weight;\n    weight[t] *= shared_inputs.param.learning_rate;\n  });\n\n  return outputs[0];\n}\n\nvoid MultiHistEvaluator::EvaluateSplits(Context const *ctx,\n                                        common::Span<MultiEvaluateSplitInputs const> d_inputs,\n                                        MultiEvaluateSplitSharedInputs const &shared_inputs,\n                                        bst_node_t max_nidx,\n                                        common::Span<MultiExpandEntry> out_splits) {\n  auto n_targets = shared_inputs.Targets();\n  auto n_bins_per_feat_tar = shared_inputs.n_bins_per_feat_tar;\n  CHECK_GE(n_bins_per_feat_tar, 1);\n  auto n_features = shared_inputs.max_active_feature;\n  CHECK_GE(n_features, 1);\n  CHECK_LT(n_features, shared_inputs.feature_segments.size());\n\n  std::uint32_t n_nodes = d_inputs.size();\n  CHECK_EQ(n_nodes, out_splits.size());\n\n  if (n_nodes == 0) {\n    return;\n  }\n\n  // Allocate weight and split sum storage on demand for the maximum node ID being evaluated.\n  this->AllocNodeWeight(max_nidx, n_targets);\n  this->split_sums_.Alloc(max_nidx, n_targets);\n\n  // Calculate total scan buffer size needed for all nodes\n  auto node_hist_size = n_targets * shared_inputs.Features() * n_bins_per_feat_tar;\n  std::size_t total_hist_size = node_hist_size * n_nodes;\n\n  // Scan the histograms. One for forward and the other for backward.\n  // Since there's only store op on the scan buffer, no need to initialize it.\n  this->scan_buffer_.resize(total_hist_size * 2);\n\n  // Create spans for each node's scan results\n  std::vector<common::Span<GradientPairInt64>> h_scans(n_nodes);\n  for (decltype(n_nodes) nidx_in_set = 0; nidx_in_set < n_nodes; ++nidx_in_set) {\n    h_scans[nidx_in_set] = dh::ToSpan(this->scan_buffer_)\n                               .subspan(nidx_in_set * node_hist_size * 2, node_hist_size * 2);\n  }\n  dh::device_vector<common::Span<GradientPairInt64>> scans(h_scans);\n\n  // Launch histogram scan kernel, each warp handles one target of one feature of one node.\n  {\n    std::uint32_t constexpr kBlockThreads = 512;\n    constexpr std::int32_t kWarpsPerBlk = kBlockThreads / dh::WarpThreads();\n    auto n_warps = n_nodes * n_targets * n_features;\n    auto n_blocks = common::DivRoundUp(n_warps, kWarpsPerBlk);\n    dh::LaunchKernel{n_blocks, kBlockThreads}(  // NOLINT\n        ScanHistogramKernel<kBlockThreads>, d_inputs, shared_inputs, dh::ToSpan(scans));\n  }\n\n  // Launch split evaluation kernel\n  dh::device_vector<MultiSplitCandidate> d_splits(n_nodes * n_features);\n  {\n    std::uint32_t constexpr kBlockThreads = 512;\n    constexpr std::int32_t kWarpsPerBlk = kBlockThreads / dh::WarpThreads();\n    auto n_warps = n_nodes * n_features;\n    auto n_blocks = common::DivRoundUp(n_warps, kWarpsPerBlk);\n    dh::LaunchKernel{n_blocks, kBlockThreads, 0, ctx->CUDACtx()->Stream()}(  // NOLINT\n        EvaluateSplitsKernel<kBlockThreads>, d_inputs, shared_inputs, dh::ToSpan(scans),\n        dh::ToSpan(d_splits));\n  }\n\n  // Find best split for each node\n  auto d_weights = this->GetNodeWeights(n_targets);\n  auto d_split_sums = this->split_sums_.View();\n  auto s_d_splits = dh::ToSpan(d_splits);\n\n  // Process results for each node\n  // Find best splits among all features for all nodes\n  auto key_it = dh::MakeIndexTransformIter([=] XGBOOST_DEVICE(std::size_t i) {\n    // Returns nidx_in_set\n    return i / n_features;\n  });\n  dh::device_vector<MultiSplitCandidate> best_splits(out_splits.size());\n  thrust::reduce_by_key(\n      ctx->CUDACtx()->CTP(), key_it, key_it + s_d_splits.size(), dh::tcbegin(s_d_splits),\n      thrust::make_discard_iterator(), best_splits.begin(), std::equal_to{},\n      [=] XGBOOST_DEVICE(MultiSplitCandidate const &lhs, MultiSplitCandidate const &rhs) {\n        return lhs.loss_chg > rhs.loss_chg ? lhs : rhs;\n      });\n  auto d_best_splits = dh::ToSpan(best_splits);\n\n  dh::LaunchN(n_nodes, ctx->CUDACtx()->Stream(), [=] __device__(std::size_t nidx_in_set) {\n    auto input = d_inputs[nidx_in_set];\n    MultiSplitCandidate best_split = d_best_splits[nidx_in_set];\n    if (best_split.child_sum.empty()) {\n      // Invalid split\n      out_splits[nidx_in_set] = {};\n      return;\n    }\n\n    // Calculate weights for this node using the actual node id for persistent storage\n    bst_node_t nidx = input.nidx;\n    auto base_weight = d_weights.Base(nidx);\n    auto left_weight = d_weights.Left(nidx);\n    auto right_weight = d_weights.Right(nidx);\n\n    auto roundings = shared_inputs.roundings;\n    auto split_sum = best_split.child_sum;\n\n    // Copy split sum to persistent buffer for loss-guide grow policy support.\n    // The child_sum span in best_split points to scan_buffer_ which gets reused,\n    // so we store it persistently indexed by node id.\n    auto split_sum_dest = GetNodeSumImpl(d_split_sums, nidx, n_targets);\n\n    bool l = true, r = true;\n    float parent_gain = 0;\n    double left_hess = 0, right_hess = 0;  // Sum of child hessians across all targets\n    auto eta = shared_inputs.param.learning_rate;\n\n    for (bst_target_t t = 0; t < n_targets; ++t) {\n      auto quantizer = roundings[t];\n      auto sibling_sum = input.parent_sum[t] - split_sum[t];\n\n      // Base weight and parent gain\n      auto g = quantizer.ToFloatingPoint(input.parent_sum[t]);\n      base_weight[t] = CalcWeight(shared_inputs.param, g.GetGrad(), g.GetHess());\n      parent_gain += -base_weight[t] * ThresholdL1(g.GetGrad(), shared_inputs.param.reg_alpha);\n      split_sum_dest[t] = split_sum[t];\n\n      // Check for empty hessian\n      l = l && (split_sum[t].GetQuantisedHess() == 0);\n      r = r && (sibling_sum.GetQuantisedHess() == 0);\n\n      // Left/right weights\n      GradientPairPrecise lg, rg;\n      if (best_split.dir == kRightDir) {\n        // forward pass, split_sum is the left sum\n        lg = quantizer.ToFloatingPoint(split_sum[t]);\n        left_weight[t] = CalcWeight(shared_inputs.param, lg.GetGrad(), lg.GetHess()) * eta;\n        rg = quantizer.ToFloatingPoint(sibling_sum);\n        right_weight[t] = CalcWeight(shared_inputs.param, rg.GetGrad(), rg.GetHess()) * eta;\n      } else {\n        // backward pass, split_sum is the right sum\n        rg = quantizer.ToFloatingPoint(split_sum[t]);\n        right_weight[t] = CalcWeight(shared_inputs.param, rg.GetGrad(), rg.GetHess()) * eta;\n        lg = quantizer.ToFloatingPoint(sibling_sum);\n        left_weight[t] = CalcWeight(shared_inputs.param, lg.GetGrad(), lg.GetHess()) * eta;\n      }\n\n      left_hess += lg.GetHess();\n      right_hess += rg.GetHess();\n    }\n\n    // Set up the output entry with spans pointing to persistent weight storage\n    out_splits[nidx_in_set] = {nidx, input.depth, best_split, base_weight};\n    out_splits[nidx_in_set].split.loss_chg -= parent_gain;\n    out_splits[nidx_in_set].UpdateHessian(left_hess, right_hess);\n\n    if (l || r) {\n      out_splits[nidx_in_set].split.loss_chg = -std::numeric_limits<float>::max();\n    }\n  });\n}\n\nvoid MultiHistEvaluator::ApplyTreeSplit(Context const *ctx, RegTree const *p_tree,\n                                        common::Span<MultiExpandEntry const> d_candidates,\n                                        bst_target_t n_targets) {\n  // Assign the node sums here, for the next evaluate split call.\n  auto mt_tree = MultiTargetTreeView{ctx->Device(), false, p_tree};\n  auto max_in_it = dh::MakeIndexTransformIter([=] __device__(std::size_t i) -> bst_node_t {\n    return std::max(mt_tree.LeftChild(d_candidates[i].nidx),\n                    mt_tree.RightChild(d_candidates[i].nidx));\n  });\n  auto max_node = thrust::reduce(\n      ctx->CUDACtx()->CTP(), max_in_it, max_in_it + d_candidates.size(), 0,\n      [=] XGBOOST_DEVICE(bst_node_t l, bst_node_t r) { return cuda::std::max(l, r); });\n  this->AllocNodeSum(max_node, n_targets);\n\n  auto node_sums = this->node_sums_.View();\n  // Use the internal split sums buffer instead of candidate.split.child_sum . It may be\n  // stale in loss-guide grow policy (entries can remain in priority queue across\n  // evaluation rounds).\n  auto split_sums = this->split_sums_.View();\n\n  dh::LaunchN(n_targets * d_candidates.size(), ctx->CUDACtx()->Stream(),\n              [=] XGBOOST_DEVICE(std::size_t i) {\n                auto get_node_sum = [&](bst_node_t nidx) {\n                  return GetNodeSumImpl(node_sums, nidx, n_targets);\n                };\n                auto nidx_in_set = i / n_targets;\n                auto t = i % n_targets;\n\n                auto const &candidate = d_candidates[nidx_in_set];\n                auto const &best_split = candidate.split;\n\n                auto parent_sum = get_node_sum(candidate.nidx);\n                // Look up split sum from persistent buffer by node id.\n                // Use split_targets for indexing since that's what was used during storage.\n                auto split_sum = GetNodeSumImpl(split_sums, candidate.nidx, n_targets);\n                auto left_sum = get_node_sum(mt_tree.LeftChild(candidate.nidx));\n                auto right_sum = get_node_sum(mt_tree.RightChild(candidate.nidx));\n\n                auto split_sum_t = split_sum[t];\n                auto sibling_sum = parent_sum[t] - split_sum_t;\n                if (best_split.dir == kRightDir) {\n                  // forward pass, node_sum is the left sum\n                  left_sum[t] = split_sum_t;\n                  right_sum[t] = sibling_sum;\n                } else {\n                  // backward pass, node_sum is the right sum\n                  right_sum[t] = split_sum_t;\n                  left_sum[t] = sibling_sum;\n                }\n              });\n}\n}  // namespace xgboost::tree::cuda_impl\n"
  },
  {
    "path": "src/tree/gpu_hist/multi_evaluate_splits.cuh",
    "content": "/**\n * Copyright 2025-2026, XGBoost contributors\n */\n#pragma once\n\n#include \"../../common/device_vector.cuh\"  // for device_vector\n#include \"evaluate_splits.cuh\"             // for MultiEvaluateSplitSharedInputs\n#include \"xgboost/base.h\"                  // for GradientPairInt64\n#include \"xgboost/context.h\"               // for Context\n\nnamespace xgboost::tree::cuda_impl {\n/** @brief Evaluator for vector leaf. */\nclass MultiHistEvaluator {\n public:\n  template <typename GradT>\n  static XGBOOST_DEVICE common::Span<GradT> GetNodeSumImpl(common::Span<GradT> node_sums,\n                                                           bst_node_t nidx,\n                                                           bst_target_t n_targets) {\n    auto offset = nidx * n_targets;\n    return node_sums.subspan(offset, n_targets);\n  }\n\n  /** @brief Buffer to access node weights indexed by node id. */\n  struct NodeWeightBuffer {\n    // * 3 because of base, left, right weights per node.\n    constexpr static bst_node_t kWeightsPerNode = 3;\n\n    common::Span<float> weights;\n    bst_target_t n_targets;\n\n    // Get the base weight buffer for a node\n    [[nodiscard]] XGBOOST_DEVICE common::Span<float> Base(bst_node_t nidx) const {\n      return weights.subspan(nidx * n_targets * kWeightsPerNode, n_targets);\n    }\n    // Get the left child weight buffer for a node\n    [[nodiscard]] XGBOOST_DEVICE common::Span<float> Left(bst_node_t nidx) const {\n      return weights.subspan(nidx * n_targets * kWeightsPerNode + n_targets, n_targets);\n    }\n    // Get the right child weight buffer for a node\n    [[nodiscard]] XGBOOST_DEVICE common::Span<float> Right(bst_node_t nidx) const {\n      return weights.subspan(nidx * n_targets * kWeightsPerNode + n_targets * 2, n_targets);\n    }\n  };\n\n  struct NodeSumBuffer {\n    dh::DeviceUVector<GradientPairInt64> node_sums;\n\n    /**\n     * @brief Allocate storage for node sums up to the given node ID.\n     */\n    void Alloc(bst_node_t nidx, bst_target_t n_targets) {\n      auto end = (nidx + 1) * n_targets;\n      if (this->node_sums.size() < end) {\n        this->node_sums.resize(end);\n      }\n    }\n    [[nodiscard]] common::Span<GradientPairInt64> GetNode(bst_node_t nidx, bst_target_t n_targets) {\n      return GetNodeSumImpl(dh::ToSpan(this->node_sums), nidx, n_targets);\n    }\n    [[nodiscard]] common::Span<GradientPairInt64 const> GetNode(bst_node_t nidx,\n                                                                bst_target_t n_targets) const {\n      return GetNodeSumImpl(dh::ToSpan(this->node_sums), nidx, n_targets);\n    }\n    auto View() { return dh::ToSpan(this->node_sums); }\n    auto View() const { return dh::ToSpan(this->node_sums); }\n  };\n\n private:\n  // Persistent buffer for node weights, indexed by node id.\n  dh::DeviceUVector<float> node_weights_;\n  // Buffer for histogram scans.\n  dh::DeviceUVector<GradientPairInt64> scan_buffer_;\n  // Buffer for node gradient sums. Nodes stored in this buffer are valid nodes (exist in\n  // the output tree) instead of candidates.\n  NodeSumBuffer node_sums_;\n  // Buffer for split sums (child_sum at split point), indexed by node id. This temporary\n  // buffer is needed because we don't have the child node index during evaluation, which\n  // is only available after applying split to the tree.\n  NodeSumBuffer split_sums_;\n\n public:\n  /**\n   * @brief Run evaluation for the root node.\n   */\n  [[nodiscard]] MultiExpandEntry EvaluateSingleSplit(\n      Context const *ctx, MultiEvaluateSplitInputs const &input,\n      MultiEvaluateSplitSharedInputs const &shared_inputs);\n  /**\n   * @brief Run evaluation for multiple nodes.\n   *\n   * @param max_nidx Maximum node ID among the nodes being evaluated. Used to allocate\n   *                 weight storage on demand.\n   */\n  void EvaluateSplits(Context const *ctx, common::Span<MultiEvaluateSplitInputs const> d_inputs,\n                      MultiEvaluateSplitSharedInputs const &shared_inputs, bst_node_t max_nidx,\n                      common::Span<MultiExpandEntry> out_splits);\n\n  /**\n   * @brief Allocate storage for node sums up to the given node ID.\n   */\n  void AllocNodeSum(bst_node_t nidx, bst_target_t n_targets) {\n    this->node_sums_.Alloc(nidx, n_targets);\n  }\n  [[nodiscard]] common::Span<GradientPairInt64> GetNodeSum(bst_node_t nidx,\n                                                           bst_target_t n_targets) {\n    return this->node_sums_.GetNode(nidx, n_targets);\n  }\n\n  /**\n   * @brief Allocate storage for weights up to the given node ID.\n   */\n  void AllocNodeWeight(bst_node_t nidx, bst_target_t n_targets) {\n    auto required = (nidx + 1) * n_targets * NodeWeightBuffer::kWeightsPerNode;\n    if (this->node_weights_.size() < required) {\n      this->node_weights_.resize(required);\n    }\n  }\n  [[nodiscard]] NodeWeightBuffer GetNodeWeights(bst_target_t n_targets) {\n    return NodeWeightBuffer{dh::ToSpan(this->node_weights_), n_targets};\n  }\n  /**\n   * @brief Copy weights for a node from device to host vectors.\n   *\n   * Uses the split targets count stored during allocation, which may differ from tree targets\n   * when using reduced gradient.\n   *\n   * TODO(jiamingy): Remove this method and use device-only buffer.\n   */\n  void CopyNodeWeightsToHost(bst_node_t nidx, bst_target_t n_targets,\n                             std::vector<float> *base_weight, std::vector<float> *left_weight,\n                             std::vector<float> *right_weight) {\n    auto weights = this->GetNodeWeights(n_targets);\n    base_weight->resize(n_targets);\n    left_weight->resize(n_targets);\n    right_weight->resize(n_targets);\n    dh::CopyDeviceSpanToVector(base_weight, weights.Base(nidx));\n    dh::CopyDeviceSpanToVector(left_weight, weights.Left(nidx));\n    dh::CopyDeviceSpanToVector(right_weight, weights.Right(nidx));\n  }\n\n  // Track the child gradient sum.\n  void ApplyTreeSplit(Context const *ctx, RegTree const *p_tree,\n                      common::Span<MultiExpandEntry const> d_candidates, bst_target_t n_targets);\n};\n}  // namespace xgboost::tree::cuda_impl\n"
  },
  {
    "path": "src/tree/gpu_hist/quantiser.cu",
    "content": "/**\n * Copyright 2020-2026, XGBoost Contributors\n */\n#include <thrust/copy.h>                         // for copy_n\n#include <thrust/iterator/transform_iterator.h>  // for make_transform_iterator\n\n#include <algorithm>\n#include <cstdint>          // uint32_t, int32_t\n#include <cuda/functional>  // for proclaim_copyable_arguments\n#include <vector>           // for vector\n\n#include \"../../collective/aggregator.h\"\n#include \"../../common/cuda_context.cuh\"  // for CUDAContext\n#include \"../../common/deterministic.cuh\"\n#include \"../../common/device_helpers.cuh\"\n#include \"../../common/linalg_op.cuh\"  // for tbegin, tcbegin\n#include \"quantiser.cuh\"\n#include \"xgboost/base.h\"\n\nnamespace xgboost::tree {\nnamespace {\nstruct Pair {\n  GradientPairPrecise first;\n  GradientPairPrecise second;\n};\n__host__ XGBOOST_DEV_INLINE Pair operator+(Pair const& lhs, Pair const& rhs) {\n  return {lhs.first + rhs.first, lhs.second + rhs.second};\n}\n\nstruct Clip {\n  static XGBOOST_DEV_INLINE float Pclip(float v) { return v > 0 ? v : 0; }\n  static XGBOOST_DEV_INLINE float Nclip(float v) { return v < 0 ? abs(v) : 0; }\n\n  XGBOOST_DEV_INLINE Pair operator()(GradientPair x) const {\n    auto pg = Pclip(x.GetGrad());\n    auto ph = Pclip(x.GetHess());\n\n    auto ng = Nclip(x.GetGrad());\n    auto nh = Nclip(x.GetHess());\n\n    return {GradientPairPrecise{pg, ph}, GradientPairPrecise{ng, nh}};\n  }\n};\n\n/**\n * In algorithm 5 (see common::CreateRoundingFactor) the bound is calculated as\n * $max(|v_i|) * n$.  Here we use the bound:\n *\n * @begin{equation}\n *   max( fl(\\sum^{V}_{v_i>0}{v_i}), fl(\\sum^{V}_{v_i<0}|v_i|) )\n * @end{equation}\n *\n * to avoid outliers, as the full reduction is reproducible on GPU with reduction tree.\n */\nPair MakeQuantiserForTarget(Context const* ctx, linalg::VectorView<GradientPair const> gpair) {\n  using T = typename GradientPairPrecise::ValueT;\n\n  auto beg = thrust::make_transform_iterator(linalg::tcbegin(gpair), Clip{});\n  Pair p =\n      dh::Reduce(ctx->CUDACtx()->CTP(), beg, beg + gpair.Size(), Pair{}, cuda::std::plus<Pair>{});\n  return p;\n}\n\nGradientQuantiser BuildQuantiserFromPair(Pair const& p, std::size_t total_rows) {\n  using GradientSumT = GradientPairPrecise;\n  using T = typename GradientSumT::ValueT;\n\n  GradientSumT positive_sum{p.first}, negative_sum{p.second};\n\n  auto histogram_rounding =\n      GradientSumT{common::CreateRoundingFactor<T>(\n                       std::max(positive_sum.GetGrad(), negative_sum.GetGrad()), total_rows),\n                   common::CreateRoundingFactor<T>(\n                       std::max(positive_sum.GetHess(), negative_sum.GetHess()), total_rows)};\n\n  using IntT = typename GradientPairInt64::ValueT;\n\n  auto to_floating_point =\n      histogram_rounding /\n      static_cast<T>(static_cast<IntT>(1)\n                     << (sizeof(typename GradientSumT::ValueT) * 8 - 2));  // keep 1 for sign bit\n  auto to_fixed_point = GradientSumT{static_cast<T>(1) / to_floating_point.GetGrad(),\n                                     static_cast<T>(1) / to_floating_point.GetHess()};\n  return GradientQuantiser{to_fixed_point, to_floating_point};\n}\n}  // anonymous namespace\n\nGradientQuantiserGroup::GradientQuantiserGroup(Context const* ctx,\n                                               linalg::MatrixView<GradientPair const> gpair,\n                                               MetaInfo const& info) {\n  auto n_targets = gpair.Shape(1);\n  CHECK_GE(n_targets, 1);\n\n  // Local reduction per target — these are fast device-local operations.\n  using ReduceT = typename GradientPairPrecise::ValueT;\n  std::vector<Pair> h_pairs(n_targets);\n  std::size_t n_samples = gpair.Shape(0);\n  for (bst_target_t t = 0; t < n_targets; ++t) {\n    h_pairs[t] = MakeQuantiserForTarget(ctx, gpair.Slice(linalg::All(), t));\n  }\n\n  auto rc = collective::Success() << [&]() {\n    static_assert(sizeof(Pair) == sizeof(ReduceT) * 4);\n    auto casted = linalg::MakeVec(reinterpret_cast<ReduceT*>(h_pairs.data()), 4 * n_targets);\n    return collective::GlobalSum(ctx, info, casted);\n  } << [&] {\n    // Single GlobalSum for total_rows (shared across targets).\n    return collective::GlobalSum(ctx, info, linalg::MakeVec(&n_samples, 1));\n  };\n  collective::SafeColl(rc);\n\n  // Build quantisers on host from the reduced pairs.\n  h_quantizers_.resize(n_targets);\n  for (bst_target_t t = 0; t < n_targets; ++t) {\n    h_quantizers_[t] = BuildQuantiserFromPair(h_pairs[t], n_samples);\n  }\n\n  // Copy to device.\n  d_quantizers_.resize(n_targets);\n  dh::safe_cuda(cudaMemcpyAsync(d_quantizers_.data(), h_quantizers_.data(),\n                                n_targets * sizeof(GradientQuantiser), cudaMemcpyHostToDevice,\n                                ctx->CUDACtx()->Stream()));\n}\n\nGradientQuantiserGroup::GradientQuantiserGroup(Context const* ctx,\n                                               linalg::VectorView<GradientPair const> gpair,\n                                               MetaInfo const& info)\n    : GradientQuantiserGroup(\n          ctx, linalg::MakeTensorView(ctx, gpair.Values(), gpair.Size(), bst_target_t{1}), info) {}\n\nvoid CalcQuantizedGpairs(Context const* ctx, linalg::MatrixView<GradientPair const> gpairs,\n                         common::Span<GradientQuantiser const> roundings,\n                         linalg::Matrix<GradientPairInt64>* p_out) {\n  auto shape = gpairs.Shape();\n  if (p_out->Empty()) {\n    *p_out = linalg::Matrix<GradientPairInt64>{shape, ctx->Device(), linalg::kF};\n  } else {\n    p_out->Reshape(shape);\n  }\n\n  auto out_gpair = p_out->View(ctx->Device());\n  CHECK(out_gpair.FContiguous());\n  auto it = dh::MakeIndexTransformIter([=] XGBOOST_DEVICE(std::size_t i) {\n    auto [ridx, target_idx] = linalg::UnravelIndex(i, gpairs.Shape());\n    auto g = gpairs(ridx, target_idx);\n    return roundings[target_idx].ToFixedPoint(g);\n  });\n  thrust::copy_n(ctx->CUDACtx()->CTP(), it, gpairs.Size(), linalg::tbegin(out_gpair));\n}\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "src/tree/gpu_hist/quantiser.cuh",
    "content": "/**\n * Copyright 2020-2026, XGBoost Contributors\n */\n#pragma once\n#include <vector>  // for vector\n\n#include \"../../common/deterministic.cuh\"   // for CreateRoundingFactor\n#include \"../../common/device_helpers.cuh\"  // for ToSpan\n#include \"../../common/device_vector.cuh\"   // for device_vector, DeviceUVector\n#include \"xgboost/base.h\"                   // for GradientPairPrecise, GradientPairInt64\n#include \"xgboost/context.h\"                // for Context\n#include \"xgboost/data.h\"                   // for MetaInfo\n#include \"xgboost/linalg.h\"                 // for VectorView, MatrixView\n\nnamespace xgboost::tree {\n\n/**\n * @brief A simple quantiser for single float values to enable deterministic summation.\n *\n * Similar to GradientQuantiser but for a single float channel.\n */\nstruct FloatQuantiser {\n  double to_fixed_point;\n  double to_floating_point;\n  FloatQuantiser(double max_abs, bst_idx_t n) {\n    auto rounding = common::CreateRoundingFactor<double>(max_abs, n);\n    // See the gradient quantizer for details.\n    constexpr std::int64_t kMaxInt = static_cast<std::int64_t>(1) << 62;\n    to_floating_point = rounding / static_cast<double>(kMaxInt);\n    to_fixed_point = static_cast<double>(1.0) / to_floating_point;\n  }\n};\n// Functors that can be easily passed into thrust algorithms\nstruct ToFixedPointOp {\n  double factor;\n  explicit ToFixedPointOp(FloatQuantiser const& q) : factor{q.to_fixed_point} {}\n  XGBOOST_DEVICE std::int64_t operator()(double val) const {\n    return static_cast<std::int64_t>(val * factor);\n  }\n};\nstruct ToFloatingPointOp {\n  double factor;\n  explicit ToFloatingPointOp(FloatQuantiser const& q) : factor{q.to_floating_point} {}\n  XGBOOST_DEVICE double operator()(std::int64_t val) const {\n    return static_cast<double>(val) * factor;\n  }\n};\n\n/**\n * @brief Per-target quantiser for converting gradients between floating-point and\n *        fixed-point representations.\n */\nclass GradientQuantiser {\n private:\n  /* Convert gradient to fixed point representation. */\n  GradientPairPrecise to_fixed_point_;\n  /* Convert fixed point representation back to floating point. */\n  GradientPairPrecise to_floating_point_;\n\n public:\n  GradientQuantiser() = default;\n  // Used for test\n  GradientQuantiser(GradientPairPrecise to_fixed, GradientPairPrecise to_float)\n      : to_fixed_point_{to_fixed}, to_floating_point_{to_float} {}\n  [[nodiscard]] XGBOOST_DEVICE GradientPairInt64 ToFixedPoint(GradientPair const& gpair) const {\n    auto adjusted = GradientPairInt64(gpair.GetGrad() * to_fixed_point_.GetGrad(),\n                                      gpair.GetHess() * to_fixed_point_.GetHess());\n    return adjusted;\n  }\n  [[nodiscard]] XGBOOST_DEVICE GradientPairInt64\n  ToFixedPoint(GradientPairPrecise const& gpair) const {\n    auto adjusted = GradientPairInt64(gpair.GetGrad() * to_fixed_point_.GetGrad(),\n                                      gpair.GetHess() * to_fixed_point_.GetHess());\n    return adjusted;\n  }\n  [[nodiscard]] XGBOOST_DEVICE GradientPairPrecise\n  ToFloatingPoint(const GradientPairInt64& gpair) const {\n    auto g = gpair.GetQuantisedGrad() * to_floating_point_.GetGrad();\n    auto h = gpair.GetQuantisedHess() * to_floating_point_.GetHess();\n    return {g, h};\n  }\n};\n\n/**\n * @brief Unified quantiser group for single-target and multi-target gradient quantisation.\n */\nclass GradientQuantiserGroup {\n private:\n  std::vector<GradientQuantiser> h_quantizers_;\n  dh::DeviceUVector<GradientQuantiser> d_quantizers_;\n\n public:\n  /** @brief Construct from a gradient matrix (n_samples x n_targets). */\n  GradientQuantiserGroup(Context const* ctx, linalg::MatrixView<GradientPair const> gpair,\n                         MetaInfo const& info);\n  /** @brief Convenience constructor from a vector (single-target). */\n  GradientQuantiserGroup(Context const* ctx, linalg::VectorView<GradientPair const> gpair,\n                         MetaInfo const& info);\n\n  [[nodiscard]] common::Span<GradientQuantiser const> DeviceSpan() const {\n    return dh::ToSpan(this->d_quantizers_);\n  }\n  [[nodiscard]] GradientQuantiser const& operator[](bst_target_t t) const {\n    return this->h_quantizers_[t];\n  }\n  [[nodiscard]] bst_target_t Size() const { return this->h_quantizers_.size(); }\n};\n\nvoid CalcQuantizedGpairs(Context const* ctx, linalg::MatrixView<GradientPair const> gpairs,\n                         common::Span<GradientQuantiser const> roundings,\n                         linalg::Matrix<GradientPairInt64>* p_out);\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "src/tree/gpu_hist/row_partitioner.cu",
    "content": "/**\n * Copyright 2017-2025, XGBoost contributors\n */\n#include <thrust/sequence.h>  // for sequence\n\n#include <vector>  // for vector\n\n#include \"../../common/cuda_context.cuh\"    // for CUDAContext\n#include \"../../common/device_helpers.cuh\"  // for CopyDeviceSpanToVector, ToSpan\n#include \"row_partitioner.cuh\"\n\nnamespace xgboost::tree {\nvoid RowPartitioner::Reset(Context const* ctx, bst_idx_t n_samples, bst_idx_t base_rowid) {\n  ridx_segments_.clear();\n  ridx_.resize(n_samples);\n  tmp_.clear();\n  n_nodes_ = 1;  // Root\n\n  CHECK_LE(n_samples, std::numeric_limits<cuda_impl::RowIndexT>::max());\n  ridx_segments_.emplace_back(\n      NodePositionInfo{Segment{0, static_cast<cuda_impl::RowIndexT>(n_samples)}});\n\n  thrust::sequence(ctx->CUDACtx()->CTP(), ridx_.data(), ridx_.data() + ridx_.size(), base_rowid);\n\n  // Pre-allocate some host memory\n  this->pinned_.GetSpan<std::int32_t>(1 << 11);\n  this->pinned2_.GetSpan<std::int32_t>(1 << 13);\n}\n\nRowPartitioner::~RowPartitioner() = default;\n\ncommon::Span<const RowPartitioner::RowIndexT> RowPartitioner::GetRows(bst_node_t nidx) {\n  auto segment = ridx_segments_.at(nidx).segment;\n  return dh::ToSpan(ridx_).subspan(segment.begin, segment.Size());\n}\n\ncommon::Span<const RowPartitioner::RowIndexT> RowPartitioner::GetRows() const {\n  return dh::ToSpan(ridx_);\n}\n\nstd::vector<RowPartitioner::RowIndexT> RowPartitioner::GetRowsHost(bst_node_t nidx) {\n  auto span = GetRows(nidx);\n  std::vector<RowIndexT> rows(span.size());\n  dh::CopyDeviceSpanToVector(&rows, span);\n  return rows;\n}\n};  // namespace xgboost::tree\n"
  },
  {
    "path": "src/tree/gpu_hist/row_partitioner.cuh",
    "content": "/**\n * Copyright 2017-2025, XGBoost contributors\n */\n#pragma once\n#include <thrust/iterator/counting_iterator.h>          // for make_counting_iterator\n#include <thrust/iterator/transform_output_iterator.h>  // for make_transform_output_iterator\n\n#include <algorithm>        // for max\n#include <cstddef>          // for size_t\n#include <cstdint>          // for int32_t, uint32_t\n#include <cuda/functional>  // for proclaim_return_type\n#include <vector>           // for vector\n\n#include \"../../common/cuda_context.cuh\"    // for CUDAContext\n#include \"../../common/device_helpers.cuh\"  // for MakeTransformIterator\n#include \"xgboost/base.h\"                   // for bst_idx_t\n#include \"xgboost/context.h\"                // for Context\n#include \"xgboost/span.h\"                   // for Span\n\nnamespace xgboost::tree {\nnamespace cuda_impl {\nusing RowIndexT = std::uint32_t;\n// TODO(Rory): Can be larger. To be tuned alongside other batch operations.\ninline constexpr std::int32_t kMaxUpdatePositionBatchSize = 32;\n}  // namespace cuda_impl\n\n/**\n * @brief Used to demarcate a contiguous set of row indices associated with some tree\n *        node.\n */\nstruct Segment {\n  cuda_impl::RowIndexT begin{0};\n  cuda_impl::RowIndexT end{0};\n\n  Segment() = default;\n\n  Segment(cuda_impl::RowIndexT begin, cuda_impl::RowIndexT end) : begin(begin), end(end) {\n    CHECK_GE(end, begin);\n  }\n  [[nodiscard]] XGBOOST_DEVICE bst_idx_t Size() const { return end - begin; }\n};\n\ntemplate <typename OpDataT>\nstruct PerNodeData {\n  Segment segment;\n  OpDataT data;\n};\n\n/**\n * @param global_thread_idx In practice, the row index within the total number of rows for\n *        this node batch.\n * @param batch_idx The nidx within this node batch (not the actual node index in a tree).\n * @param item_idx The resulting global row index (without accounting for base_rowid). This maps the\n *        row index within the node batch back to the global row index.\n */\ntemplate <typename T>\nXGBOOST_DEV_INLINE void AssignBatch(dh::LDGIterator<T> const& batch_info_iter,\n                                    std::size_t global_thread_idx, int* batch_idx,\n                                    std::size_t* item_idx) {\n  cuda_impl::RowIndexT sum = 0;\n  // Search for the nidx in batch and the corresponding global row index, exit once found.\n  for (std::int32_t i = 0; i < cuda_impl::kMaxUpdatePositionBatchSize; i++) {\n    if (sum + batch_info_iter[i].segment.Size() > global_thread_idx) {\n      *batch_idx = i;\n      // the beginning of the segment plus the offset into that segment\n      *item_idx = (global_thread_idx - sum) + batch_info_iter[i].segment.begin;\n      break;\n    }\n    sum += batch_info_iter[i].segment.Size();\n  }\n}\n\n/**\n * @param total_rows The total number of rows for this batch of nodes.\n */\ntemplate <int kBlockSize, typename OpDataT>\n__global__ __launch_bounds__(kBlockSize) void SortPositionCopyKernel(\n    dh::LDGIterator<PerNodeData<OpDataT>> batch_info_iter,\n    common::Span<cuda_impl::RowIndexT> d_ridx,\n    common::Span<cuda_impl::RowIndexT const> const ridx_tmp, bst_idx_t total_rows) {\n  for (auto idx : dh::GridStrideRange<std::size_t>(0, total_rows)) {\n    std::int32_t batch_idx;  // unused\n    std::size_t item_idx = std::numeric_limits<std::size_t>::max();\n    AssignBatch(batch_info_iter, idx, &batch_idx, &item_idx);\n    d_ridx[item_idx] = ridx_tmp[item_idx];\n  }\n}\n\n// We can scan over this tuple, where the scan gives us information on how to partition inputs\n// according to the flag\nstruct IndexFlagTuple {\n  cuda_impl::RowIndexT idx;        // The location of the item we are working on in ridx_\n  cuda_impl::RowIndexT flag_scan;  // This gets populated after scanning\n  std::int32_t batch_idx;          // Which node in the batch does this item belong to\n  bool flag;                       // Result of op (is this item going left?)\n};\n\nstruct IndexFlagOp {\n  __device__ IndexFlagTuple operator()(const IndexFlagTuple& a, const IndexFlagTuple& b) const {\n    // Segmented scan - resets if we cross batch boundaries\n    if (a.batch_idx == b.batch_idx) {\n      // Accumulate the flags, everything else stays the same\n      return {b.idx, a.flag_scan + b.flag_scan, b.batch_idx, b.flag};\n    } else {\n      return b;\n    }\n  }\n};\n\n// Scatter from `ridx_in` to `ridx_out`.\ntemplate <typename OpDataT>\nstruct WriteResultsFunctor {\n  dh::LDGIterator<PerNodeData<OpDataT>> batch_info;\n  cuda_impl::RowIndexT const* ridx_in;\n  cuda_impl::RowIndexT* ridx_out;\n  cuda_impl::RowIndexT* counts;\n\n  __device__ IndexFlagTuple operator()(IndexFlagTuple const& x) {\n    cuda_impl::RowIndexT scatter_address;\n    // Get the segment that this row belongs to.\n    const Segment& segment = batch_info[x.batch_idx].segment;\n    if (x.flag) {\n      // Go left.\n      cuda_impl::RowIndexT num_previous_flagged = x.flag_scan - 1;  // -1 because inclusive scan\n      scatter_address = segment.begin + num_previous_flagged;\n    } else {\n      cuda_impl::RowIndexT num_previous_unflagged = (x.idx - segment.begin) - x.flag_scan;\n      scatter_address = segment.end - num_previous_unflagged - 1;\n    }\n    ridx_out[scatter_address] = ridx_in[x.idx];\n\n    if (x.idx == (segment.end - 1)) {\n      // Write out counts\n      counts[x.batch_idx] = x.flag_scan;\n    }\n\n    // Discard\n    return {};\n  }\n};\n\n/**\n * @param d_batch_info Node data, with the size of the input number of nodes.\n */\ntemplate <typename OpT, typename OpDataT>\nvoid SortPositionBatch(Context const* ctx, common::Span<const PerNodeData<OpDataT>> d_batch_info,\n                       common::Span<cuda_impl::RowIndexT> ridx,\n                       common::Span<cuda_impl::RowIndexT> ridx_tmp,\n                       common::Span<cuda_impl::RowIndexT> d_counts, bst_idx_t total_rows, OpT op,\n                       dh::DeviceUVector<int8_t>* tmp) {\n  dh::LDGIterator<PerNodeData<OpDataT>> batch_info_itr(d_batch_info.data());\n  WriteResultsFunctor<OpDataT> write_results{batch_info_itr, ridx.data(), ridx_tmp.data(),\n                                             d_counts.data()};\n\n  auto discard_write_iterator =\n      thrust::make_transform_output_iterator(dh::TypedDiscard<IndexFlagTuple>(), write_results);\n  auto counting = thrust::make_counting_iterator(0llu);\n  auto input_iterator = dh::MakeTransformIterator<IndexFlagTuple>(\n      counting, cuda::proclaim_return_type<IndexFlagTuple>([=] __device__(std::size_t idx) {\n        std::int32_t nidx_in_batch;\n        std::size_t item_idx;\n        AssignBatch(batch_info_itr, idx, &nidx_in_batch, &item_idx);\n        auto go_left = op(ridx[item_idx], nidx_in_batch, batch_info_itr[nidx_in_batch].data);\n        return IndexFlagTuple{static_cast<cuda_impl::RowIndexT>(item_idx), go_left, nidx_in_batch,\n                              go_left};\n      }));\n  // Reach down to the dispatch function to avoid using int as the offset type.\n  std::size_t n_bytes = 0;\n  if (tmp->empty()) {\n    // The size of temporary storage is calculated based on the total number of\n    // rows. Since the root node has all the rows, subsequence allocatioin must be smaller\n    // than the root node. As a result, we can calculate this once and reuse it throughout\n    // the iteration.\n    auto ret =\n        cub::DispatchScan<decltype(input_iterator), decltype(discard_write_iterator), IndexFlagOp,\n                          cub::NullType, std::uint64_t>::Dispatch(nullptr, n_bytes, input_iterator,\n                                                                  discard_write_iterator,\n                                                                  IndexFlagOp{}, cub::NullType{},\n                                                                  static_cast<std::uint64_t>(\n                                                                      total_rows),\n                                                                  ctx->CUDACtx()->Stream());\n    dh::safe_cuda(ret);\n    tmp->resize(n_bytes);\n  }\n  n_bytes = tmp->size();\n  auto ret =\n      cub::DispatchScan<decltype(input_iterator), decltype(discard_write_iterator), IndexFlagOp,\n                        cub::NullType, std::uint64_t>::Dispatch(tmp->data(), n_bytes,\n                                                                input_iterator,\n                                                                discard_write_iterator,\n                                                                IndexFlagOp{}, cub::NullType{},\n                                                                static_cast<std::uint64_t>(\n                                                                    total_rows),\n                                                                ctx->CUDACtx()->Stream());\n  dh::safe_cuda(ret);\n\n  constexpr int kBlockSize = 256;\n\n  // Value found by experimentation\n  const int kItemsThread = 12;\n  std::uint32_t const kGridSize =\n      xgboost::common::DivRoundUp(total_rows, kBlockSize * kItemsThread);\n  dh::LaunchKernel{kGridSize, kBlockSize, 0, ctx->CUDACtx()->Stream()}(\n      SortPositionCopyKernel<kBlockSize, OpDataT>, batch_info_itr, ridx, ridx_tmp, total_rows);\n}\n\nstruct NodePositionInfo {\n  Segment segment;\n  bst_node_t left_child = -1;\n  bst_node_t right_child = -1;\n  [[nodiscard]] XGBOOST_DEVICE bool IsLeaf() const { return left_child == -1; }\n};\n\nstruct LeafInfo {\n  bst_node_t nidx;\n  NodePositionInfo node;\n};\n\nXGBOOST_DEV_INLINE int GetPositionFromSegments(std::size_t idx,\n                                               const NodePositionInfo* d_node_info) {\n  int position = 0;\n  NodePositionInfo node = d_node_info[position];\n  while (!node.IsLeaf()) {\n    NodePositionInfo left = d_node_info[node.left_child];\n    NodePositionInfo right = d_node_info[node.right_child];\n    if (idx >= left.segment.begin && idx < left.segment.end) {\n      position = node.left_child;\n      node = left;\n    } else if (idx >= right.segment.begin && idx < right.segment.end) {\n      position = node.right_child;\n      node = right;\n    } else {\n      KERNEL_CHECK(false);\n    }\n  }\n  return position;\n}\n\ntemplate <int kBlockSize, typename OpT>\n__global__ __launch_bounds__(kBlockSize) void FinalisePositionKernel(\n    common::Span<const NodePositionInfo> d_node_info, bst_idx_t base_ridx,\n    common::Span<const cuda_impl::RowIndexT> d_ridx, common::Span<bst_node_t> d_out_position,\n    OpT op) {\n  for (auto idx : dh::GridStrideRange<std::size_t>(0, d_ridx.size())) {\n    auto position = GetPositionFromSegments(idx, d_node_info.data());\n    cuda_impl::RowIndexT ridx = d_ridx[idx] - base_ridx;\n    bst_node_t new_position = op(ridx, position);\n    d_out_position[ridx] = new_position;\n  }\n}\n\n/** \\brief Class responsible for tracking subsets of rows as we add splits and\n * partition training rows into different leaf nodes. */\nclass RowPartitioner {\n public:\n  using RowIndexT = cuda_impl::RowIndexT;\n\n private:\n  /**\n   * In here if you want to find the rows belong to a node nid, first you need to get the\n   * indices segment from ridx_segments[nid], then get the row index that represents\n   * position of row in input data X.  `RowPartitioner::GetRows` would be a good starting\n   * place to get a sense what are these vector storing.\n   *\n   * node id -> segment -> indices of rows belonging to node\n   */\n\n  /** @brief Range of row index for each node, pointers into ridx below. */\n  std::vector<NodePositionInfo> ridx_segments_;\n  /**\n   * @brief mapping for node id -> rows.\n   *\n   * This looks like:\n   * node id  |    1    |    2   |\n   * rows idx | 3, 5, 1 | 13, 31 |\n   */\n  dh::DeviceUVector<RowIndexT> ridx_;\n  dh::DeviceUVector<int8_t> tmp_;\n  dh::PinnedMemory pinned_;\n  dh::PinnedMemory pinned2_;\n  bst_node_t n_nodes_{0};  // Counter for internal checks.\n\n public:\n  /**\n   * @param ctx Context for device ordinal and stream.\n   * @param n_samples The number of samples in each batch.\n   * @param base_rowid The base row index for the current batch.\n   */\n  RowPartitioner() = default;\n  void Reset(Context const* ctx, bst_idx_t n_samples, bst_idx_t base_rowid);\n\n  ~RowPartitioner();\n  RowPartitioner(const RowPartitioner&) = delete;\n  RowPartitioner& operator=(const RowPartitioner&) = delete;\n\n  /**\n   * \\brief Gets the row indices of training instances in a given node.\n   */\n  common::Span<const RowIndexT> GetRows(bst_node_t nidx);\n\n  /**\n   * \\brief Gets all training rows in the set.\n   */\n  common::Span<const RowIndexT> GetRows() const;\n  /**\n   * @brief Get the number of rows in this partitioner.\n   */\n  std::size_t Size() const { return this->GetRows().size(); }\n\n  [[nodiscard]] bst_node_t GetNumNodes() const { return n_nodes_; }\n\n  /**\n   * @brief Convenience method for testing.\n   */\n  std::vector<RowIndexT> GetRowsHost(bst_node_t nidx);\n\n  [[nodiscard]] std::vector<LeafInfo> GetLeaves() const {\n    std::vector<LeafInfo> leaves;\n    bst_node_t nidx = 0;\n    for (auto const& node : this->ridx_segments_) {\n      if (node.IsLeaf()) {\n        leaves.emplace_back(LeafInfo{nidx, node});\n      }\n      nidx += 1;\n    }\n    return leaves;\n  }\n\n  /**\n   * \\brief Updates the tree position for set of training instances being split\n   * into left and right child nodes. Accepts a user-defined lambda specifying\n   * which branch each training instance should go down.\n   *\n   * \\tparam  UpdatePositionOpT\n   * \\tparam  OpDataT\n   * \\param nidx        The index of the nodes being split.\n   * \\param left_nidx   The left child indices.\n   * \\param right_nidx  The right child indices.\n   * \\param op_data     User-defined data provided as the second argument to op\n   * \\param op          Device lambda with the row index as the first argument and op_data as the\n   * second. Returns true if this training instance goes on the left partition.\n   */\n  template <typename UpdatePositionOpT, typename OpDataT>\n  void UpdatePositionBatch(Context const* ctx, std::vector<bst_node_t> const& nidx,\n                           std::vector<bst_node_t> const& left_nidx,\n                           std::vector<bst_node_t> const& right_nidx,\n                           std::vector<OpDataT> const& op_data, common::Span<RowIndexT> ridx_tmp,\n                           UpdatePositionOpT op) {\n    if (nidx.empty()) {\n      return;\n    }\n\n    CHECK_EQ(nidx.size(), left_nidx.size());\n    CHECK_EQ(nidx.size(), right_nidx.size());\n    CHECK_EQ(nidx.size(), op_data.size());\n    this->n_nodes_ += (left_nidx.size() + right_nidx.size());\n    common::Span<PerNodeData<OpDataT>> h_batch_info =\n        pinned2_.GetSpan<PerNodeData<OpDataT>>(nidx.size());\n    dh::TemporaryArray<PerNodeData<OpDataT>> d_batch_info(nidx.size());\n\n    for (std::size_t i = 0; i < nidx.size(); i++) {\n      h_batch_info[i] = {ridx_segments_.at(nidx[i]).segment, op_data[i]};\n    }\n    dh::safe_cuda(cudaMemcpyAsync(d_batch_info.data().get(), h_batch_info.data(),\n                                  h_batch_info.size_bytes(), cudaMemcpyDefault,\n                                  ctx->CUDACtx()->Stream()));\n    // Temporary arrays\n    auto h_counts = pinned_.GetSpan<RowIndexT>(nidx.size());\n    // Must initialize with 0 as 0 count is not written in the kernel.\n    dh::TemporaryArray<RowIndexT> d_counts(nidx.size(), 0);\n    CHECK_EQ(ridx_tmp.size(), this->Size());\n\n    // Process a sub-batch\n    auto sub_batch_impl = [&](common::Span<bst_node_t const> nidx,\n                              common::Span<PerNodeData<OpDataT>> d_batch_info,\n                              common::Span<RowIndexT> d_counts) {\n      std::size_t total_rows = 0;\n      for (bst_node_t i : nidx) {\n        total_rows += this->ridx_segments_[i].segment.Size();\n      }\n\n      // Partition the rows according to the operator\n      SortPositionBatch<UpdatePositionOpT, OpDataT>(ctx, d_batch_info, dh::ToSpan(this->ridx_),\n                                                    ridx_tmp, d_counts, total_rows, op,\n                                                    &this->tmp_);\n    };\n\n    // Divide inputs into sub-batches.\n    for (std::size_t batch_begin = 0, n = nidx.size(); batch_begin < n;\n         batch_begin += cuda_impl::kMaxUpdatePositionBatchSize) {\n      auto constexpr kMax = static_cast<decltype(n)>(cuda_impl::kMaxUpdatePositionBatchSize);\n      auto batch_size = std::min(kMax, n - batch_begin);\n      auto nidx_batch = common::Span{nidx}.subspan(batch_begin, batch_size);\n      auto d_info_batch = dh::ToSpan(d_batch_info).subspan(batch_begin, batch_size);\n      auto d_counts_batch = dh::ToSpan(d_counts).subspan(batch_begin, batch_size);\n      sub_batch_impl(nidx_batch, d_info_batch, d_counts_batch);\n    }\n\n    dh::safe_cuda(cudaMemcpyAsync(h_counts.data(), d_counts.data().get(), h_counts.size_bytes(),\n                                  cudaMemcpyDefault, ctx->CUDACtx()->Stream()));\n    // TODO(Rory): this synchronisation hurts performance a lot\n    // Future optimisation should find a way to skip this\n    ctx->CUDACtx()->Stream().Sync();\n\n    // Update segments\n    for (std::size_t i = 0; i < nidx.size(); i++) {\n      auto segment = ridx_segments_.at(nidx[i]).segment;\n      auto left_count = h_counts[i];\n      CHECK_LE(left_count, segment.Size());\n      ridx_segments_.resize(std::max(static_cast<bst_node_t>(ridx_segments_.size()),\n                                     std::max(left_nidx[i], right_nidx[i]) + 1));\n      ridx_segments_[nidx[i]] = NodePositionInfo{segment, left_nidx[i], right_nidx[i]};\n      ridx_segments_[left_nidx[i]] =\n          NodePositionInfo{Segment{segment.begin, segment.begin + left_count}};\n      ridx_segments_[right_nidx[i]] =\n          NodePositionInfo{Segment{segment.begin + left_count, segment.end}};\n    }\n  }\n\n  /**\n   * @brief Finalise the position of all training instances after tree construction is\n   * complete. Does not update any other meta information in this data structure, so\n   * should only be used at the end of training.\n   *\n   * @param p_out_position Node index for each row.\n   * @param op Device lambda. Should provide the row index and current position as an\n   *           argument and return the new position for this training instance.\n   */\n  template <typename FinalisePositionOpT>\n  void FinalisePosition(Context const* ctx, common::Span<bst_node_t> d_out_position,\n                        bst_idx_t base_ridx, FinalisePositionOpT op) const {\n    dh::TemporaryArray<NodePositionInfo> d_node_info_storage(ridx_segments_.size());\n    dh::safe_cuda(cudaMemcpyAsync(d_node_info_storage.data().get(), ridx_segments_.data(),\n                                  sizeof(NodePositionInfo) * ridx_segments_.size(),\n                                  cudaMemcpyDefault, ctx->CUDACtx()->Stream()));\n\n    constexpr std::uint32_t kBlockSize = 512;\n    const int kItemsThread = 8;\n    const std::uint32_t grid_size =\n        xgboost::common::DivRoundUp(ridx_.size(), kBlockSize * kItemsThread);\n    common::Span<RowIndexT const> d_ridx{ridx_.data(), ridx_.size()};\n    dh::LaunchKernel{grid_size, kBlockSize, 0, ctx->CUDACtx()->Stream()}(\n        FinalisePositionKernel<kBlockSize, FinalisePositionOpT>, dh::ToSpan(d_node_info_storage),\n        base_ridx, d_ridx, d_out_position, op);\n  }\n};\n\n// Partitioner for all batches, used for external memory training.\nclass RowPartitionerBatches {\n private:\n  // Temporary buffer for sorting the samples.\n  dh::DeviceUVector<cuda_impl::RowIndexT> ridx_tmp_;\n  // Partitioners for each batch.\n  std::vector<std::unique_ptr<RowPartitioner>> partitioners_;\n\n public:\n  void Reset(Context const* ctx, std::vector<bst_idx_t> const& batch_ptr) {\n    CHECK_GE(batch_ptr.size(), 2);\n    std::size_t n_batches = batch_ptr.size() - 1;\n    if (partitioners_.size() != n_batches) {\n      partitioners_.clear();\n    }\n\n    bst_idx_t n_max_samples = 0;\n    for (std::size_t k = 0; k < n_batches; ++k) {\n      if (partitioners_.size() != n_batches) {\n        // First run.\n        partitioners_.emplace_back(std::make_unique<RowPartitioner>());\n      }\n      auto base_ridx = batch_ptr[k];\n      auto n_samples = batch_ptr.at(k + 1) - base_ridx;\n      partitioners_[k]->Reset(ctx, n_samples, base_ridx);\n      CHECK_LE(n_samples, std::numeric_limits<cuda_impl::RowIndexT>::max());\n      n_max_samples = std::max(n_samples, n_max_samples);\n    }\n    this->ridx_tmp_.resize(n_max_samples);\n  }\n\n  // Accessors\n  [[nodiscard]] decltype(auto) operator[](std::size_t i) { return partitioners_[i]; }\n  decltype(auto) At(std::size_t i) { return partitioners_.at(i); }\n  [[nodiscard]] std::size_t Size() const { return this->partitioners_.size(); }\n  decltype(auto) cbegin() const { return this->partitioners_.cbegin(); }  // NOLINT\n  decltype(auto) cend() const { return this->partitioners_.cend(); }      // NOLINT\n  decltype(auto) begin() const { return this->partitioners_.cbegin(); }   // NOLINT\n  decltype(auto) end() const { return this->partitioners_.cend(); }       // NOLINT\n\n  [[nodiscard]] decltype(auto) Front() { return this->partitioners_.front(); }\n  [[nodiscard]] bool Empty() const { return this->partitioners_.empty(); }\n\n  template <typename UpdatePositionOpT, typename OpDataT>\n  void UpdatePositionBatch(Context const* ctx, std::int32_t batch_idx,\n                           std::vector<bst_node_t> const& nidx,\n                           std::vector<bst_node_t> const& left_nidx,\n                           std::vector<bst_node_t> const& right_nidx,\n                           std::vector<OpDataT> const& op_data, UpdatePositionOpT op) {\n    auto& part = this->At(batch_idx);\n    auto ridx_tmp = dh::ToSpan(this->ridx_tmp_).subspan(0, part->Size());\n    part->UpdatePositionBatch(ctx, nidx, left_nidx, right_nidx, op_data, ridx_tmp, op);\n  }\n};\n};  // namespace xgboost::tree\n"
  },
  {
    "path": "src/tree/gpu_hist/sampler.cu",
    "content": "/**\n * Copyright 2019-2026, XGBoost Contributors\n */\n\n#include <thrust/copy.h>  // for copy_n\n#include <thrust/functional.h>\n#include <thrust/iterator/transform_iterator.h>         // for make_transform_iterator\n#include <thrust/iterator/transform_output_iterator.h>  // for make_transform_output_iterator\n#include <thrust/random.h>\n#include <thrust/sort.h>  // for sort\n#include <thrust/transform.h>\n#include <thrust/version.h>\n\n#include \"../../common/nvtx_utils.h\"\n\n#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 2)\n#include <cub/device/device_segmented_reduce.cuh>  // for DeviceSegmentedReduce\n#else\n#include <thrust/reduce.h>  // for reduce_by_key\n#endif\n\n#include <cstddef>            // for size_t\n#include <cuda/std/iterator>  // for distance\n#include <limits>\n\n#include \"../../common/cuda_context.cuh\"    // for CUDAContext\n#include \"../../common/device_helpers.cuh\"  // for MakeTransformIterator\n#include \"../../common/random.h\"\n#include \"../hist/sampler.h\"  // for kDefaultMvsLambda\n#include \"../param.h\"\n#include \"quantiser.cuh\"  // for GradientQuantiser\n#include \"sampler.cuh\"\n\nnamespace xgboost::tree::cuda_impl {\n/** @brief A functor that returns random weights. */\nclass RandomWeight {\n public:\n  explicit RandomWeight(std::size_t seed) : seed_(seed) {}\n\n  XGBOOST_DEVICE float operator()(std::size_t i) const {\n    thrust::default_random_engine rng(seed_);\n    thrust::uniform_real_distribution<float> dist;\n    rng.discard(i);\n    return dist(rng);\n  }\n\n private:\n  std::uint32_t seed_;\n};\n\n/** @brief A functor that performs a Bernoulli trial to discard a gradient pair. */\nclass BernoulliTrial {\n public:\n  BernoulliTrial(std::size_t seed, float p) : rnd_(seed), p_(p) {}\n\n  XGBOOST_DEVICE bool operator()(std::size_t i) const { return rnd_(i) > p_; }\n\n private:\n  RandomWeight rnd_;\n  float p_;\n};\n\n/**\n * @brief A functor that calculates the difference between the sample rate and the desired\n *        sample rows, given a cumulative gradient sum.\n */\nclass SampleRateDelta {\n public:\n  SampleRateDelta(common::Span<float> threshold, bst_idx_t n_samples, bst_idx_t sample_rows)\n      : threshold_(threshold), n_samples_(n_samples), sample_rows_(sample_rows) {}\n\n  XGBOOST_DEVICE float operator()(float gradient_sum, bst_idx_t i) const {\n    float lower = threshold_[i];\n    float upper = threshold_[i + 1];\n\n    bst_idx_t n_above = n_samples_ - i - 1;\n    float denom = static_cast<float>(sample_rows_) - static_cast<float>(n_above);\n    // i is too small, sampling too many rows\n    if (denom <= 0) {\n      return std::numeric_limits<float>::max();\n    }\n\n    float u = gradient_sum / denom;\n    if (u > lower && u <= upper) {\n      // Found it, set the value for future use.\n      threshold_[i + 1] = u;\n      return 0.0f;\n    } else {\n      return std::numeric_limits<float>::max();\n    }\n  }\n\n private:\n  common::Span<float> threshold_;\n  bst_idx_t n_samples_;\n  bst_idx_t sample_rows_;\n};\n\nnamespace {\n[[nodiscard]] std::size_t CalcThresholdIndex(Context const* ctx, common::Span<float> reg_abs_grad,\n                                             common::Span<float> thresholds,\n                                             common::Span<float> grad_csum, bst_idx_t sample_rows) {\n  auto cuctx = ctx->CUDACtx();\n  // Set a sentinel for upper bound.\n  thrust::fill(cuctx->CTP(), dh::tend(thresholds) - 1, dh::tend(thresholds),\n               std::numeric_limits<float>::max());\n  // Sort thresholds\n  thrust::copy(cuctx->CTP(), dh::tcbegin(reg_abs_grad), dh::tcend(reg_abs_grad),\n               dh::tbegin(thresholds));\n  thrust::sort(cuctx->TP(), dh::tbegin(thresholds), dh::tend(thresholds) - 1);\n  auto n_samples = reg_abs_grad.size();\n  return CalculateThresholdIndex(ctx, thresholds, grad_csum, n_samples, sample_rows);\n}\n}  // anonymous namespace\n\n/** @brief A functor that performs Poisson sampling, and scales gradient pairs by 1/p_i. */\nclass PoissonSampling {\n public:\n  PoissonSampling(common::Span<GradientQuantiser const> roundings,\n                  common::Span<float const> threshold, common::Span<float const> rag,\n                  std::size_t threshold_index, RandomWeight rnd)\n      : roundings_{roundings},\n        threshold_{threshold},\n        regularized_abs_grad_{rag},\n        threshold_index_{threshold_index},\n        rnd_{rnd} {}\n\n  XGBOOST_DEVICE GradientPairInt64 operator()(GradientPairInt64 const& gpair, std::size_t i) {\n    // If the gradient and hessian are both empty, we should never select this row.\n    if (gpair.GetQuantisedGrad() == 0 && gpair.GetQuantisedHess() == 0) {\n      return gpair;\n    }\n    auto n_samples = threshold_.size() - 1;\n    auto [ridx, tidx] = linalg::UnravelIndex(i, n_samples, roundings_.size());\n    auto q = roundings_[tidx];\n\n    float p = SamplingProbability(threshold_[threshold_index_], regularized_abs_grad_[ridx]);\n    if (p >= 1.0f) {\n      // Always select this row.\n      return gpair;\n    } else {\n      // Select this row randomly with probability proportional to the combined gradient.\n      // Scale gpair by 1/p.\n      if (rnd_(ridx) <= p) {\n        return q.ToFixedPoint(RescaleGrad(p, q.ToFloatingPoint(gpair)));\n      } else {\n        return {};\n      }\n    }\n  }\n\n private:\n  common::Span<GradientQuantiser const> roundings_;\n  common::Span<float const> threshold_;\n  common::Span<float const> regularized_abs_grad_;\n  std::size_t threshold_index_;\n  RandomWeight rnd_;\n};\n\nvoid UniformSampling::Sample(Context const* ctx, linalg::MatrixView<GradientPairInt64> gpair,\n                             common::Span<GradientQuantiser const>) {\n  // Set gradient pair to 0 with p = 1 - subsample\n  auto cuctx = ctx->CUDACtx();\n  auto n_targets = gpair.Shape(1);\n  BernoulliTrial trial{ctx->Rng()(), subsample_};\n  thrust::replace_if(\n      cuctx->CTP(), linalg::tbegin(gpair), linalg::tend(gpair), thrust::make_counting_iterator(0ul),\n      [=] XGBOOST_DEVICE(std::size_t i) {\n        auto ridx = i / n_targets;\n        return trial(ridx);\n      },\n      GradientPairInt64{});\n}\n\nvoid UniformSampling::ApplySampling(Context const* ctx,\n                                    linalg::MatrixView<GradientPairInt64 const> sampled_split_gpair,\n                                    linalg::Matrix<GradientPair>* value_gpair) {\n  CHECK_EQ(sampled_split_gpair.Shape(0), value_gpair->Shape(0));\n  auto d_split_gpair = sampled_split_gpair;\n  auto d_value = value_gpair->View(ctx->Device());\n  auto n_targets = value_gpair->Shape(1);\n  thrust::replace_if(\n      ctx->CUDACtx()->CTP(), linalg::tbegin(d_value), linalg::tend(d_value),\n      thrust::make_counting_iterator(0ul),\n      [=] XGBOOST_DEVICE(std::size_t i) {\n        auto ridx = i / n_targets;\n        // Check if this row was not sampled (hessian is zero in split gradient)\n        return d_split_gpair(ridx, 0).GetQuantisedHess() == 0;\n      },\n      GradientPair{});\n}\n\nGradientBasedSampling::GradientBasedSampling(std::size_t n_samples, float subsample)\n    : subsample_{subsample},\n      reg_abs_grad_(n_samples, 0.0f),\n      thresholds_(n_samples + 1, 0.0f),\n      grad_csum_(n_samples, 0.0f) {}\n\ntemplate <typename GPair, typename ToFloat>\nvoid ReduceGradImpl(Context const* ctx, linalg::MatrixView<GPair const> gpairs, ToFloat&& to_float,\n                    common::Span<float> reg_abs_grad) {\n  float mvs_lambda = kDefaultMvsLambda;\n  auto n_segments = gpairs.Shape(0);\n  CHECK_EQ(n_segments, reg_abs_grad.size());\n  auto n_targets = gpairs.Shape(1);\n  auto grad_op = MvsGradOp{mvs_lambda};\n\n  auto op = [=] XGBOOST_DEVICE(cuda::std::tuple<std::size_t, GPair> tup) -> float {\n    auto [i, gpair] = tup;\n    return grad_op(to_float(i, gpair));\n  };\n  auto in_it = thrust::make_transform_iterator(\n      thrust::make_zip_iterator(thrust::make_counting_iterator(0ul), linalg::tcbegin(gpairs)), op);\n\n  if (gpairs.Shape(1) <= 1) {\n    CHECK_EQ(gpairs.Size(), reg_abs_grad.size());\n    thrust::copy_n(ctx->CUDACtx()->CTP(), in_it, gpairs.Size(), dh::tbegin(reg_abs_grad));\n    return;\n  }\n\n#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 2)\n  // Fixed size segment support:\n  // https://github.com/NVIDIA/cccl/commit/ae0bbef407fa8fea2b654f35f886a6f3420f5897\n  auto s = ctx->CUDACtx()->Stream();\n  std::size_t n_bytes = 0;\n  dh::safe_cuda(cub::DeviceSegmentedReduce::Sum(nullptr, n_bytes, in_it, dh::tbegin(reg_abs_grad),\n                                                /*num_segments=*/n_segments,\n                                                /*segment_size=*/n_targets, s));\n  dh::TemporaryArray<char> alloc(n_bytes);\n  dh::safe_cuda(cub::DeviceSegmentedReduce::Sum(alloc.data().get(), n_bytes, /*d_in=*/in_it,\n                                                /*d_out=*/dh::tbegin(reg_abs_grad),\n                                                /*num_segments=*/n_segments,\n                                                /*segment_size=*/n_targets, s));\n#else\n  auto key_it =\n      dh::MakeIndexTransformIter([=] XGBOOST_DEVICE(std::size_t i) { return i / n_targets; });\n  thrust::reduce_by_key(ctx->CUDACtx()->CTP(), key_it, key_it + gpairs.Size(), in_it,\n                        thrust::make_discard_iterator(), dh::tbegin(reg_abs_grad));\n#endif\n}\n\nvoid ReduceGrad(Context const* ctx, linalg::MatrixView<GradientPairInt64 const> gpairs,\n                common::Span<GradientQuantiser const> roundings, common::Span<float> reg_abs_grad) {\n  auto n_targets = gpairs.Shape(1);\n  auto to_float = [=] XGBOOST_DEVICE(std::size_t i, GradientPairInt64 gpair) {\n    auto cidx = i % n_targets;\n    return roundings[cidx].ToFloatingPoint(gpair);\n  };\n  ReduceGradImpl(ctx, gpairs, to_float, reg_abs_grad);\n}\n\nvoid ReduceGradValue(Context const* ctx, linalg::MatrixView<GradientPair const> gpairs,\n                     common::Span<float> reg_abs_grad) {\n  auto to_float = [=] XGBOOST_DEVICE(std::size_t, GradientPair gpair) {\n    return gpair;\n  };\n  ReduceGradImpl(ctx, gpairs, to_float, reg_abs_grad);\n}\n\nstd::size_t CalculateThresholdIndex(Context const* ctx, common::Span<float> sorted_rag,\n                                    common::Span<float> grad_csum, bst_idx_t n_samples,\n                                    bst_idx_t sample_rows) {\n  auto cuctx = ctx->CUDACtx();\n\n  // scan is not yet made deterministic\n  double h_total_sum = thrust::reduce(cuctx->CTP(), dh::tbegin(sorted_rag),\n                                      dh::tend(sorted_rag) - 1, 0.0, cuda::std::plus{});\n  FloatQuantiser quantiser{h_total_sum, n_samples};\n  auto in_it =\n      dh::MakeTransformIterator<std::int64_t>(dh::tbegin(sorted_rag), ToFixedPointOp{quantiser});\n  auto out_it =\n      thrust::make_transform_output_iterator(dh::tbegin(grad_csum), ToFloatingPointOp{quantiser});\n  thrust::inclusive_scan(cuctx->CTP(), in_it, in_it + n_samples, out_it);\n\n  // Find the threshold u for each row.\n  thrust::transform(cuctx->CTP(), dh::tbegin(grad_csum), dh::tend(grad_csum),\n                    thrust::make_counting_iterator(0ul), dh::tbegin(grad_csum),\n                    SampleRateDelta{sorted_rag, n_samples, sample_rows});\n  // Find the first 0 element in grad_sum, which is within the threshold bound\n  thrust::device_ptr<float> min =\n      thrust::min_element(cuctx->CTP(), dh::tbegin(grad_csum), dh::tend(grad_csum));\n  return cuda::std::distance(dh::tbegin(grad_csum), min) + 1;\n}\n\nvoid GradientBasedSampling::Sample(Context const* ctx, linalg::MatrixView<GradientPairInt64> gpair,\n                                   common::Span<GradientQuantiser const> roundings) {\n  auto cuctx = ctx->CUDACtx();\n  std::size_t n_samples = gpair.Shape(0);\n  CHECK_EQ(n_samples, this->reg_abs_grad_.size());\n  CHECK_EQ(n_samples, this->grad_csum_.size());\n  CHECK_EQ(n_samples + 1, this->thresholds_.size());\n\n  // Create the regularized absolute gradient.\n  ReduceGrad(ctx, gpair, roundings, dh::ToSpan(reg_abs_grad_));\n  thrust::transform(cuctx->CTP(), reg_abs_grad_.cbegin(), reg_abs_grad_.cend(),\n                    reg_abs_grad_.begin(),\n                    [] XGBOOST_DEVICE(float gpair) { return cuda::std::sqrt(gpair); });\n\n  bst_idx_t sample_rows = n_samples * subsample_;\n  auto threshold_index = CalcThresholdIndex(ctx, dh::ToSpan(reg_abs_grad_), dh::ToSpan(thresholds_),\n                                            dh::ToSpan(grad_csum_), sample_rows);\n\n  auto seed = ctx->Rng()();\n  // Perform sequential Poisson sampling in place.\n  // Only the threshold_[threshold_index] is used. (that is the \\mu in the paper)\n  thrust::transform(cuctx->CTP(), linalg::tcbegin(gpair), linalg::tcend(gpair),\n                    thrust::make_counting_iterator(0ul), linalg::tbegin(gpair),\n                    PoissonSampling{roundings, dh::ToSpan(thresholds_), dh::ToSpan(reg_abs_grad_),\n                                    threshold_index, RandomWeight{seed}});\n}\n\nvoid GradientBasedSampling::ApplySampling(\n    Context const* ctx, linalg::MatrixView<GradientPairInt64 const> sampled_split_gpair,\n    linalg::Matrix<GradientPair>* value_gpair) {\n  CHECK_EQ(sampled_split_gpair.Shape(0), value_gpair->Shape(0));\n  auto d_split_gpair = sampled_split_gpair;\n  auto d_value = value_gpair->View(ctx->Device());\n  auto n_targets = value_gpair->Shape(1);\n  auto n_samples = value_gpair->Shape(0);\n  CHECK_EQ(n_samples, this->reg_abs_grad_.size());\n  CHECK_EQ(n_samples, this->grad_csum_.size());\n  CHECK_EQ(n_samples + 1, this->thresholds_.size());\n\n  // Create the regularized absolute gradient from value gradient.\n  ReduceGradValue(ctx, d_value, dh::ToSpan(reg_abs_grad_));\n  thrust::transform(ctx->CUDACtx()->CTP(), reg_abs_grad_.cbegin(), reg_abs_grad_.cend(),\n                    reg_abs_grad_.begin(),\n                    [] XGBOOST_DEVICE(float gpair) { return cuda::std::sqrt(gpair); });\n  bst_idx_t sample_rows = n_samples * subsample_;\n  auto threshold_index = CalcThresholdIndex(ctx, dh::ToSpan(reg_abs_grad_), dh::ToSpan(thresholds_),\n                                            dh::ToSpan(grad_csum_), sample_rows);\n\n  auto threshold = dh::ToSpan(thresholds_);\n  auto rag = dh::ToSpan(reg_abs_grad_);\n  thrust::transform(ctx->CUDACtx()->CTP(), linalg::tcbegin(d_value), linalg::tcend(d_value),\n                    thrust::make_counting_iterator(0ul), linalg::tbegin(d_value),\n                    [=] XGBOOST_DEVICE(GradientPair gpair, std::size_t i) {\n                      auto ridx = i / n_targets;\n                      // Check if this row was not sampled (hessian is zero in split gradient)\n                      if (d_split_gpair(ridx, 0).GetQuantisedHess() == 0) {\n                        return GradientPair{};\n                      }\n                      float p = SamplingProbability(threshold[threshold_index], rag[ridx]);\n                      return RescaleGrad(p, gpair);\n                    });\n}\n\nSampler::Sampler(bst_idx_t n_samples, float subsample, int sampling_method) {\n  bool is_sampling = subsample < 1.0;\n\n  if (!is_sampling) {\n    strategy_ = std::make_unique<SamplingStrategy>();\n    return;\n  }\n\n  switch (sampling_method) {\n    case TrainParam::kUniform: {\n      strategy_ = std::make_unique<UniformSampling>(subsample);\n      break;\n    }\n    case TrainParam::kGradientBased: {\n      strategy_ = std::make_unique<GradientBasedSampling>(n_samples, subsample);\n      break;\n    }\n    default:\n      LOG(FATAL) << \"Unknown sampling method.\";\n  }\n}\n\nvoid Sampler::Sample(Context const* ctx, linalg::MatrixView<GradientPairInt64> gpair,\n                     common::Span<GradientQuantiser const> roundings) {\n  xgboost_NVTX_FN_RANGE();\n  strategy_->Sample(ctx, gpair, roundings);\n}\n\nvoid Sampler::ApplySampling(Context const* ctx,\n                            linalg::Matrix<GradientPairInt64> const& sampled_split_gpair,\n                            linalg::Matrix<GradientPair>* value_gpair) {\n  xgboost_NVTX_FN_RANGE();\n  strategy_->ApplySampling(ctx, sampled_split_gpair.View(ctx->Device()), value_gpair);\n}\n}  // namespace xgboost::tree::cuda_impl\n"
  },
  {
    "path": "src/tree/gpu_hist/sampler.cuh",
    "content": "/**\n * Copyright 2019-2026, XGBoost Contributors\n */\n#pragma once\n#include <cstddef>  // for size_t\n\n#include \"../../common/device_vector.cuh\"  // for device_vector, caching_device_vector\n#include \"quantiser.cuh\"                   // for GradientQuantiser\n#include \"xgboost/base.h\"                  // for GradientPair\n#include \"xgboost/data.h\"                  // for BatchParam\n#include \"xgboost/linalg.h\"                // for MatrixView\n\nnamespace xgboost::tree::cuda_impl {\n// no-op base class.\nclass SamplingStrategy {\n public:\n  virtual void Sample(Context const*, linalg::MatrixView<GradientPairInt64>,\n                      common::Span<GradientQuantiser const>) {}\n  virtual void ApplySampling(Context const*, linalg::MatrixView<GradientPairInt64 const>,\n                             linalg::Matrix<GradientPair>*) {};\n  virtual ~SamplingStrategy() = default;\n};\n\n/** @brief Uniform sampling */\nclass UniformSampling : public SamplingStrategy {\n public:\n  explicit UniformSampling(float subsample) : subsample_{subsample} {}\n  void Sample(Context const* ctx, linalg::MatrixView<GradientPairInt64> gpair,\n              common::Span<GradientQuantiser const> roundings) override;\n  void ApplySampling(Context const* ctx,\n                     linalg::MatrixView<GradientPairInt64 const> sampled_split_gpair,\n                     linalg::Matrix<GradientPair>* value_gpair) override;\n\n private:\n  float const subsample_;\n};\n\n/** @brief Gradient-based sampling. */\nclass GradientBasedSampling : public SamplingStrategy {\n public:\n  GradientBasedSampling(std::size_t n_samples, float subsample);\n  void Sample(Context const* ctx, linalg::MatrixView<GradientPairInt64> gpair,\n              common::Span<GradientQuantiser const> roundings) override;\n  void ApplySampling(Context const* ctx,\n                     linalg::MatrixView<GradientPairInt64 const> sampled_split_gpair,\n                     linalg::Matrix<GradientPair>* value_gpair) override;\n\n private:\n  float const subsample_;\n  // abs gradient\n  dh::device_vector<float> reg_abs_grad_;\n  // sorted abs gradient\n  dh::device_vector<float> thresholds_;\n  // csum of sorted abs gradient\n  dh::device_vector<float> grad_csum_;\n};\n\n/**\n * @brief Draw sample rows by setting non-selected gradient to 0.\n *\n * @see Ke, G., Meng, Q., Finley, T., Wang, T., Chen, W., Ma, W., ... & Liu, T. Y. (2017).\n * Lightgbm: A highly efficient gradient boosting decision tree. In Advances in Neural Information\n * Processing Systems (pp. 3146-3154).\n * @see Zhu, R. (2016). Gradient-based sampling: An adaptive importance sampling for least-squares.\n * In Advances in Neural Information Processing Systems (pp. 406-414).\n * @see Ohlsson, E. (1998). Sequential Poisson sampling. Journal of official Statistics, 14(2), 149.\n * @see Rong Ou. (2020). Out-of-Core GPU Gradient Boosting.\n */\nclass Sampler {\n public:\n  Sampler(bst_idx_t n_samples, float subsample, int sampling_method);\n\n  /** @brief Sample from a DMatrix based on the given gradient pairs. */\n  void Sample(Context const* ctx, linalg::MatrixView<GradientPairInt64> gpair,\n              common::Span<GradientQuantiser const> roundings);\n  /** @brief Apply sampling weights to value gradient. */\n  void ApplySampling(Context const* ctx,\n                     linalg::Matrix<GradientPairInt64> const& sampled_split_gpair,\n                     linalg::Matrix<GradientPair>* value_gpair);\n\n private:\n  std::unique_ptr<SamplingStrategy> strategy_;\n};\n\nstd::size_t CalculateThresholdIndex(Context const* ctx, common::Span<float> sorted_rag,\n                                    common::Span<float> grad_csum, bst_idx_t n_samples,\n                                    bst_idx_t sample_rows);\n}  // namespace xgboost::tree::cuda_impl\n"
  },
  {
    "path": "src/tree/hist/evaluate_splits.h",
    "content": "/**\n * Copyright 2021-2026, XGBoost Contributors\n */\n#ifndef XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_\n#define XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_\n\n#include <algorithm>  // for copy\n#include <cstddef>    // for size_t\n#include <limits>     // for numeric_limits\n#include <memory>     // for shared_ptr\n#include <numeric>    // for accumulate\n#include <utility>    // for move\n#include <vector>     // for vector\n\n#include \"../../collective/allgather.h\"\n#include \"../../collective/communicator-inl.h\"  // for GetWorldSize\n#include \"../../common/categorical.h\"           // for CatBitField\n#include \"../../common/hist_util.h\"             // for GHistRow, HistogramCuts\n#include \"../../common/linalg_op.h\"             // for cbegin, cend, begin\n#include \"../../common/random.h\"                // for ColumnSampler\n#include \"../constraints.h\"                     // for FeatureInteractionConstraintHost\n#include \"../param.h\"                           // for TrainParam\n#include \"../sample_position.h\"                 // for SamplePosition\n#include \"../split_evaluator.h\"                 // for TreeEvaluator\n#include \"../tree_view.h\"                       // for MultiTargetTreeView\n#include \"expand_entry.h\"                       // for MultiExpandEntry\n#include \"hist_cache.h\"                         // for BoundedHistCollection\n#include \"xgboost/base.h\"                       // for bst_node_t, bst_target_t, bst_feature_t\n#include \"xgboost/context.h\"                    // for COntext\n#include \"xgboost/linalg.h\"                     // for Constants, Vector\n\nnamespace xgboost::tree {\n/**\n * @brief Gather the expand entries from all the workers.\n * @param entries Local expand entries on this worker.\n * @return Global expand entries gathered from all workers.\n */\ntemplate <typename ExpandEntry>\nstd::enable_if_t<std::is_same_v<ExpandEntry, CPUExpandEntry> ||\n                     std::is_same_v<ExpandEntry, MultiExpandEntry>,\n                 std::vector<ExpandEntry>>\nAllgatherColumnSplit(Context const *ctx, std::vector<ExpandEntry> const &entries) {\n  auto const n_entries = entries.size();\n\n  // First, gather all the primitive fields.\n  std::vector<ExpandEntry> local_entries(n_entries);\n\n  // Collect and serialize all entries\n  std::vector<std::vector<char>> serialized_entries;\n  for (std::size_t i = 0; i < n_entries; ++i) {\n    Json jentry{Object{}};\n    entries[i].Save(&jentry);\n\n    std::vector<char> out;\n    Json::Dump(jentry, &out, std::ios::binary);\n\n    serialized_entries.emplace_back(std::move(out));\n  }\n  auto all_serialized = collective::VectorAllgatherV(ctx, serialized_entries);\n  CHECK_GE(all_serialized.size(), local_entries.size());\n\n  std::vector<ExpandEntry> all_entries(all_serialized.size());\n  std::transform(all_serialized.cbegin(), all_serialized.cend(), all_entries.begin(),\n                 [](std::vector<char> const &e) {\n                   ExpandEntry entry;\n                   auto je = Json::Load(StringView{e.data(), e.size()}, std::ios::binary);\n                   entry.Load(je);\n                   return entry;\n                 });\n\n  return all_entries;\n}\n\nclass HistEvaluator {\n private:\n  struct NodeEntry {\n    /*! \\brief statics for node entry */\n    GradStats stats;\n    /*! \\brief loss of this node, without split */\n    bst_float root_gain{0.0f};\n  };\n\n private:\n  Context const *ctx_;\n  TrainParam const *param_;\n  std::shared_ptr<common::ColumnSampler> column_sampler_;\n  TreeEvaluator tree_evaluator_;\n  bool is_col_split_{false};\n  FeatureInteractionConstraintHost interaction_constraints_;\n  std::vector<NodeEntry> snode_;\n\n  // if sum of statistics for non-missing values in the node\n  // is equal to sum of statistics for all values:\n  // then - there are no missing values\n  // else - there are missing values\n  bool static SplitContainsMissingValues(const GradStats e, const NodeEntry &snode) {\n    if (e.GetGrad() == snode.stats.GetGrad() && e.GetHess() == snode.stats.GetHess()) {\n      return false;\n    } else {\n      return true;\n    }\n  }\n\n  [[nodiscard]] bool IsValid(GradStats const &left, GradStats const &right) const {\n    return left.GetHess() >= param_->min_child_weight &&\n           right.GetHess() >= param_->min_child_weight;\n  }\n\n  /**\n   * \\brief Use learned direction with one-hot split. Other implementations (LGB) create a\n   *        pseudo-category for missing value but here we just do a complete scan to avoid\n   *        making specialized histogram bin.\n   */\n  void EnumerateOneHot(common::HistogramCuts const &cut, common::ConstGHistRow hist,\n                       bst_feature_t fidx, bst_node_t nidx,\n                       TreeEvaluator::SplitEvaluator<TrainParam> const &evaluator,\n                       SplitEntry *p_best) const {\n    const std::vector<uint32_t> &cut_ptr = cut.Ptrs();\n    const std::vector<bst_float> &cut_val = cut.Values();\n\n    bst_bin_t ibegin = static_cast<bst_bin_t>(cut_ptr[fidx]);\n    bst_bin_t iend = static_cast<bst_bin_t>(cut_ptr[fidx + 1]);\n    bst_bin_t n_bins = iend - ibegin;\n\n    GradStats left_sum;\n    GradStats right_sum;\n    // best split so far\n    SplitEntry best;\n    best.is_cat = false;  // marker for whether it's updated or not.\n\n    auto f_hist = hist.subspan(cut_ptr[fidx], n_bins);\n    auto feature_sum = GradStats{\n        std::accumulate(f_hist.data(), f_hist.data() + f_hist.size(), GradientPairPrecise{})};\n    GradStats missing;\n    auto const &parent = snode_[nidx];\n    missing.SetSubstract(parent.stats, feature_sum);\n\n    for (bst_bin_t i = ibegin; i != iend; i += 1) {\n      auto split_pt = cut_val[i];\n\n      // missing on left (treat missing as other categories)\n      right_sum = GradStats{hist[i]};\n      left_sum.SetSubstract(parent.stats, right_sum);\n      if (IsValid(left_sum, right_sum)) {\n        auto missing_left_chg =\n            static_cast<float>(evaluator.CalcSplitGain(*param_, nidx, fidx, GradStats{left_sum},\n                                                       GradStats{right_sum}) -\n                               parent.root_gain);\n        best.Update(missing_left_chg, fidx, split_pt, true, true, left_sum, right_sum);\n      }\n\n      // missing on right (treat missing as chosen category)\n      right_sum.Add(missing);\n      left_sum.SetSubstract(parent.stats, right_sum);\n      if (IsValid(left_sum, right_sum)) {\n        auto missing_right_chg =\n            static_cast<float>(evaluator.CalcSplitGain(*param_, nidx, fidx, GradStats{left_sum},\n                                                       GradStats{right_sum}) -\n                               parent.root_gain);\n        best.Update(missing_right_chg, fidx, split_pt, false, true, left_sum, right_sum);\n      }\n    }\n\n    if (best.is_cat) {\n      auto n = common::CatBitField::ComputeStorageSize(n_bins + 1);\n      best.cat_bits.resize(n, 0);\n      common::CatBitField cat_bits{best.cat_bits};\n      cat_bits.Set(best.split_value);\n    }\n\n    p_best->Update(best);\n  }\n\n  /**\n   * \\brief Enumerate with partition-based splits.\n   *\n   * The implementation is different from LightGBM. Firstly we don't have a\n   * pseudo-cateogry for missing value, instead of we make 2 complete scans over the\n   * histogram. Secondly, both scan directions generate splits in the same\n   * order. Following table depicts the scan process, square bracket means the gradient in\n   * missing values is resided on that partition:\n   *\n   *   | Forward  | Backward |\n   *   |----------+----------|\n   *   | [BCDE] A | E [ABCD] |\n   *   | [CDE] AB | DE [ABC] |\n   *   | [DE] ABC | CDE [AB] |\n   *   | [E] ABCD | BCDE [A] |\n   */\n  template <int d_step>\n  void EnumeratePart(common::HistogramCuts const &cut, common::Span<size_t const> sorted_idx,\n                     common::ConstGHistRow hist, bst_feature_t fidx, bst_node_t nidx,\n                     TreeEvaluator::SplitEvaluator<TrainParam> const &evaluator,\n                     SplitEntry *p_best) {\n    static_assert(d_step == +1 || d_step == -1, \"Invalid step.\");\n\n    auto const &cut_ptr = cut.Ptrs();\n    auto const &cut_val = cut.Values();\n    auto const &parent = snode_[nidx];\n\n    bst_bin_t f_begin = cut_ptr[fidx];\n    bst_bin_t f_end = cut_ptr[fidx + 1];\n    bst_bin_t n_bins_feature{f_end - f_begin};\n    auto n_bins = std::min(param_->max_cat_threshold, n_bins_feature);\n\n    // statistics on both sides of split\n    GradStats left_sum;\n    GradStats right_sum;\n    // best split so far\n    SplitEntry best;\n\n    auto f_hist = hist.subspan(f_begin, n_bins_feature);\n    bst_bin_t it_begin, it_end;\n    if (d_step > 0) {\n      it_begin = f_begin;\n      it_end = it_begin + n_bins - 1;\n    } else {\n      it_begin = f_end - 1;\n      it_end = it_begin - n_bins + 1;\n    }\n\n    bst_bin_t best_thresh{-1};\n    for (bst_bin_t i = it_begin; i != it_end; i += d_step) {\n      auto j = i - f_begin;  // index local to current feature\n      if (d_step == 1) {\n        right_sum.Add(f_hist[sorted_idx[j]].GetGrad(), f_hist[sorted_idx[j]].GetHess());\n        left_sum.SetSubstract(parent.stats, right_sum);  // missing on left\n      } else {\n        left_sum.Add(f_hist[sorted_idx[j]].GetGrad(), f_hist[sorted_idx[j]].GetHess());\n        right_sum.SetSubstract(parent.stats, left_sum);  // missing on right\n      }\n      if (IsValid(left_sum, right_sum)) {\n        auto loss_chg = evaluator.CalcSplitGain(*param_, nidx, fidx, GradStats{left_sum},\n                                                GradStats{right_sum}) -\n                        parent.root_gain;\n        // We don't have a numeric split point, nan here is a dummy split.\n        if (best.Update(loss_chg, fidx, std::numeric_limits<float>::quiet_NaN(), d_step == 1, true,\n                        left_sum, right_sum)) {\n          best_thresh = i;\n        }\n      }\n    }\n\n    if (best_thresh != -1) {\n      auto n = common::CatBitField::ComputeStorageSize(n_bins_feature);\n      best.cat_bits = decltype(best.cat_bits)(n, 0);\n      common::CatBitField cat_bits{best.cat_bits};\n      bst_bin_t partition = d_step == 1 ? (best_thresh - it_begin + 1) : (best_thresh - f_begin);\n      CHECK_GT(partition, 0);\n      std::for_each(sorted_idx.begin(), sorted_idx.begin() + partition, [&](std::size_t c) {\n        auto cat = cut_val[c + f_begin];\n        cat_bits.Set(cat);\n      });\n    }\n\n    p_best->Update(best);\n  }\n\n  // Enumerate/Scan the split values of specific feature\n  // Returns the sum of gradients corresponding to the data points that contains\n  // a non-missing value for the particular feature fid.\n  template <int d_step>\n  GradStats EnumerateSplit(common::HistogramCuts const &cut, common::ConstGHistRow hist,\n                           bst_feature_t fidx, bst_node_t nidx,\n                           TreeEvaluator::SplitEvaluator<TrainParam> const &evaluator,\n                           SplitEntry *p_best) const {\n    static_assert(d_step == +1 || d_step == -1, \"Invalid step.\");\n\n    // aliases\n    const std::vector<uint32_t> &cut_ptr = cut.Ptrs();\n    const std::vector<bst_float> &cut_val = cut.Values();\n    auto const &parent = snode_[nidx];\n\n    // statistics on both sides of split\n    GradStats left_sum;\n    GradStats right_sum;\n    // best split so far\n    SplitEntry best;\n\n    // bin boundaries\n    CHECK_LE(cut_ptr[fidx], static_cast<uint32_t>(std::numeric_limits<bst_bin_t>::max()));\n    CHECK_LE(cut_ptr[fidx + 1], static_cast<uint32_t>(std::numeric_limits<bst_bin_t>::max()));\n    // ibegin, iend: smallest/largest cut points for feature fid use int to allow for\n    // value -1\n    bst_bin_t ibegin, iend;\n    if (d_step > 0) {\n      ibegin = static_cast<bst_bin_t>(cut_ptr[fidx]);\n      iend = static_cast<bst_bin_t>(cut_ptr.at(fidx + 1));\n    } else {\n      ibegin = static_cast<bst_bin_t>(cut_ptr[fidx + 1]) - 1;\n      iend = static_cast<bst_bin_t>(cut_ptr[fidx]) - 1;\n    }\n\n    for (bst_bin_t i = ibegin; i != iend; i += d_step) {\n      // start working\n      // try to find a split\n      left_sum.Add(hist[i].GetGrad(), hist[i].GetHess());\n      right_sum.SetSubstract(parent.stats, left_sum);\n      if (IsValid(left_sum, right_sum)) {\n        bst_float loss_chg;\n        bst_float split_pt;\n        if (d_step > 0) {\n          // forward enumeration: split at right bound of each bin\n          loss_chg =\n              static_cast<float>(evaluator.CalcSplitGain(*param_, nidx, fidx, GradStats{left_sum},\n                                                         GradStats{right_sum}) -\n                                 parent.root_gain);\n          split_pt = cut_val[i];  // not used for partition based\n          best.Update(loss_chg, fidx, split_pt, d_step == -1, false, left_sum, right_sum);\n        } else {\n          // backward enumeration: split at left bound of each bin\n          loss_chg =\n              static_cast<float>(evaluator.CalcSplitGain(*param_, nidx, fidx, GradStats{right_sum},\n                                                         GradStats{left_sum}) -\n                                 parent.root_gain);\n          split_pt = common::HistogramCuts::NumericBinLowerBound(cut_ptr, cut_val, fidx, i);\n          best.Update(loss_chg, fidx, split_pt, d_step == -1, false, right_sum, left_sum);\n        }\n      }\n    }\n\n    p_best->Update(best);\n    return left_sum;\n  }\n\n public:\n  void EvaluateSplits(const BoundedHistCollection &hist, common::HistogramCuts const &cut,\n                      common::Span<FeatureType const> feature_types, const RegTree &tree,\n                      std::vector<CPUExpandEntry> *p_entries) {\n    auto n_threads = ctx_->Threads();\n    auto &entries = *p_entries;\n    // All nodes are on the same level, so we can store the shared ptr.\n    std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> features(entries.size());\n    for (size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) {\n      auto nidx = entries[nidx_in_set].nid;\n      features[nidx_in_set] = column_sampler_->GetFeatureSet(ctx_, tree.GetDepth(nidx));\n    }\n    CHECK(!features.empty());\n    const size_t grain_size = std::max<size_t>(1, features.front()->Size() / n_threads);\n    common::BlockedSpace2d space(\n        entries.size(), [&](size_t nidx_in_set) { return features[nidx_in_set]->Size(); },\n        grain_size);\n\n    std::vector<CPUExpandEntry> tloc_candidates(n_threads * entries.size());\n    for (size_t i = 0; i < entries.size(); ++i) {\n      for (decltype(n_threads) j = 0; j < n_threads; ++j) {\n        tloc_candidates[i * n_threads + j] = entries[i];\n      }\n    }\n    auto evaluator = tree_evaluator_.GetEvaluator();\n    auto const &cut_ptrs = cut.Ptrs();\n\n    common::ParallelFor2d(space, n_threads, [&](size_t nidx_in_set, common::Range1d r) {\n      auto tidx = omp_get_thread_num();\n      auto entry = &tloc_candidates[n_threads * nidx_in_set + tidx];\n      auto best = &entry->split;\n      auto nidx = entry->nid;\n      auto histogram = hist[nidx];\n      auto features_set = features[nidx_in_set]->ConstHostSpan();\n      for (auto fidx_in_set = r.begin(); fidx_in_set < r.end(); fidx_in_set++) {\n        auto fidx = features_set[fidx_in_set];\n        bool is_cat = common::IsCat(feature_types, fidx);\n        if (!interaction_constraints_.Query(nidx, fidx)) {\n          continue;\n        }\n        if (is_cat) {\n          auto n_bins = cut_ptrs.at(fidx + 1) - cut_ptrs[fidx];\n          if (common::UseOneHot(n_bins, param_->max_cat_to_onehot)) {\n            EnumerateOneHot(cut, histogram, fidx, nidx, evaluator, best);\n          } else {\n            std::vector<size_t> sorted_idx(n_bins);\n            std::iota(sorted_idx.begin(), sorted_idx.end(), 0);\n            auto feat_hist = histogram.subspan(cut_ptrs[fidx], n_bins);\n            // Sort the histogram to get contiguous partitions.\n            std::stable_sort(sorted_idx.begin(), sorted_idx.end(), [&](size_t l, size_t r) {\n              auto ret = evaluator.CalcWeightCat(*param_, feat_hist[l]) <\n                         evaluator.CalcWeightCat(*param_, feat_hist[r]);\n              return ret;\n            });\n            EnumeratePart<+1>(cut, sorted_idx, histogram, fidx, nidx, evaluator, best);\n            EnumeratePart<-1>(cut, sorted_idx, histogram, fidx, nidx, evaluator, best);\n          }\n        } else {\n          auto grad_stats = EnumerateSplit<+1>(cut, histogram, fidx, nidx, evaluator, best);\n          if (SplitContainsMissingValues(grad_stats, snode_[nidx])) {\n            EnumerateSplit<-1>(cut, histogram, fidx, nidx, evaluator, best);\n          }\n        }\n      }\n    });\n\n    for (unsigned nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) {\n      for (auto tidx = 0; tidx < n_threads; ++tidx) {\n        entries[nidx_in_set].split.Update(tloc_candidates[n_threads * nidx_in_set + tidx].split);\n      }\n    }\n\n    if (is_col_split_) {\n      // With column-wise data split, we gather the best splits from all the workers and update the\n      // expand entries accordingly.\n      auto all_entries = AllgatherColumnSplit(ctx_, entries);\n      for (auto worker = 0; worker < collective::GetWorldSize(); ++worker) {\n        for (std::size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) {\n          entries[nidx_in_set].split.Update(\n              all_entries[worker * entries.size() + nidx_in_set].split);\n        }\n      }\n    }\n  }\n\n  // Add splits to tree, handles all statistic\n  void ApplyTreeSplit(CPUExpandEntry const &candidate, RegTree *p_tree) {\n    auto evaluator = tree_evaluator_.GetEvaluator();\n    RegTree &tree = *p_tree;\n\n    GradStats parent_sum = candidate.split.left_sum;\n    parent_sum.Add(candidate.split.right_sum);\n    auto base_weight = evaluator.CalcWeight(candidate.nid, *param_, GradStats{parent_sum});\n    auto left_weight =\n        evaluator.CalcWeight(candidate.nid, *param_, GradStats{candidate.split.left_sum});\n    auto right_weight =\n        evaluator.CalcWeight(candidate.nid, *param_, GradStats{candidate.split.right_sum});\n\n    if (candidate.split.is_cat) {\n      tree.ExpandCategorical(\n          candidate.nid, candidate.split.SplitIndex(), candidate.split.cat_bits,\n          candidate.split.DefaultLeft(), base_weight, left_weight * param_->learning_rate,\n          right_weight * param_->learning_rate, candidate.split.loss_chg, parent_sum.GetHess(),\n          candidate.split.left_sum.GetHess(), candidate.split.right_sum.GetHess());\n    } else {\n      tree.ExpandNode(candidate.nid, candidate.split.SplitIndex(), candidate.split.split_value,\n                      candidate.split.DefaultLeft(), base_weight,\n                      left_weight * param_->learning_rate, right_weight * param_->learning_rate,\n                      candidate.split.loss_chg, parent_sum.GetHess(),\n                      candidate.split.left_sum.GetHess(), candidate.split.right_sum.GetHess());\n    }\n\n    // Set up child constraints\n    auto left_child = tree[candidate.nid].LeftChild();\n    auto right_child = tree[candidate.nid].RightChild();\n    tree_evaluator_.AddSplit(candidate.nid, left_child, right_child,\n                             tree[candidate.nid].SplitIndex(), left_weight, right_weight);\n    evaluator = tree_evaluator_.GetEvaluator();\n\n    snode_.resize(tree.Size());\n    snode_.at(left_child).stats = candidate.split.left_sum;\n    snode_.at(left_child).root_gain =\n        evaluator.CalcGain(candidate.nid, *param_, GradStats{candidate.split.left_sum});\n    snode_.at(right_child).stats = candidate.split.right_sum;\n    snode_.at(right_child).root_gain =\n        evaluator.CalcGain(candidate.nid, *param_, GradStats{candidate.split.right_sum});\n\n    interaction_constraints_.Split(candidate.nid, tree[candidate.nid].SplitIndex(), left_child,\n                                   right_child);\n  }\n\n  [[nodiscard]] auto Evaluator() const { return tree_evaluator_.GetEvaluator(); }\n  [[nodiscard]] auto const &Stats() const { return snode_; }\n\n  float InitRoot(GradStats const &root_sum) {\n    snode_.resize(1);\n    auto root_evaluator = tree_evaluator_.GetEvaluator();\n\n    snode_[0].stats = GradStats{root_sum.GetGrad(), root_sum.GetHess()};\n    snode_[0].root_gain =\n        root_evaluator.CalcGain(RegTree::kRoot, *param_, GradStats{snode_[0].stats});\n    auto weight = root_evaluator.CalcWeight(RegTree::kRoot, *param_, GradStats{snode_[0].stats});\n    return weight;\n  }\n\n public:\n  // The column sampler must be constructed by caller since we need to preserve the rng\n  // for the entire training session.\n  explicit HistEvaluator(Context const *ctx, TrainParam const *param, MetaInfo const &info,\n                         std::shared_ptr<common::ColumnSampler> sampler)\n      : ctx_{ctx},\n        param_{param},\n        column_sampler_{std::move(sampler)},\n        tree_evaluator_{*param, static_cast<bst_feature_t>(info.num_col_), DeviceOrd::CPU()},\n        is_col_split_{info.IsColumnSplit()} {\n    interaction_constraints_.Configure(*param, info.num_col_);\n    column_sampler_->Init(ctx, info.num_col_, info.feature_weights, param_->colsample_bynode,\n                          param_->colsample_bylevel, param_->colsample_bytree);\n  }\n};\n\nclass HistMultiEvaluator {\n  std::vector<double> gain_;\n  linalg::Matrix<GradientPairPrecise> stats_;\n  TrainParam const *param_;\n  FeatureInteractionConstraintHost interaction_constraints_;\n  std::shared_ptr<common::ColumnSampler> column_sampler_;\n  Context const *ctx_;\n  bool is_col_split_{false};\n\n private:\n  static double MultiCalcSplitGain(TrainParam const &param,\n                                   linalg::VectorView<GradientPairPrecise const> left_sum,\n                                   linalg::VectorView<GradientPairPrecise const> right_sum,\n                                   linalg::VectorView<float> left_weight,\n                                   linalg::VectorView<float> right_weight) {\n    CalcWeight(param, left_sum, left_weight);\n    CalcWeight(param, right_sum, right_weight);\n\n    auto left_gain = CalcGainGivenWeight(param, left_sum, left_weight);\n    auto right_gain = CalcGainGivenWeight(param, right_sum, right_weight);\n    return left_gain + right_gain;\n  }\n\n  template <bst_bin_t d_step>\n  bool EnumerateSplit(common::HistogramCuts const &cut, bst_feature_t fidx,\n                      common::Span<common::ConstGHistRow> hist,\n                      linalg::VectorView<GradientPairPrecise const> parent_sum, double parent_gain,\n                      SplitEntryContainer<std::vector<GradientPairPrecise>> *p_best) const {\n    auto const &cut_ptr = cut.Ptrs();\n    auto const &cut_val = cut.Values();\n\n    auto sum = linalg::Empty<GradientPairPrecise>(ctx_, 2, hist.size());\n    auto left_sum = sum.Slice(0, linalg::All());\n    auto right_sum = sum.Slice(1, linalg::All());\n\n    bst_bin_t ibegin, iend;\n    if (d_step > 0) {\n      ibegin = static_cast<bst_bin_t>(cut_ptr[fidx]);\n      iend = static_cast<bst_bin_t>(cut_ptr[fidx + 1]);\n    } else {\n      ibegin = static_cast<bst_bin_t>(cut_ptr[fidx + 1]) - 1;\n      iend = static_cast<bst_bin_t>(cut_ptr[fidx]) - 1;\n    }\n    auto n_targets = hist.size();\n    auto weight = linalg::Empty<float>(ctx_, 2, n_targets);\n    auto left_weight = weight.Slice(0, linalg::All());\n    auto right_weight = weight.Slice(1, linalg::All());\n\n    for (bst_bin_t i = ibegin; i != iend; i += d_step) {\n      for (bst_target_t t = 0; t < n_targets; ++t) {\n        auto t_hist = hist[t];\n        auto t_p = parent_sum(t);\n        left_sum(t) += t_hist[i];\n        right_sum(t) = t_p - left_sum(t);\n      }\n\n      if (d_step > 0) {\n        auto split_pt = cut_val[i];\n        auto loss_chg =\n            MultiCalcSplitGain(*param_, right_sum, left_sum, right_weight, left_weight) -\n            parent_gain;\n        p_best->Update(loss_chg, fidx, split_pt, d_step == -1, false, left_sum, right_sum);\n      } else {\n        auto split_pt = common::HistogramCuts::NumericBinLowerBound(cut_ptr, cut_val, fidx, i);\n        auto loss_chg =\n            MultiCalcSplitGain(*param_, right_sum, left_sum, left_weight, right_weight) -\n            parent_gain;\n        p_best->Update(loss_chg, fidx, split_pt, d_step == -1, false, right_sum, left_sum);\n      }\n    }\n    // return true if there's missing. Doesn't handle floating-point error well.\n    if (d_step == +1) {\n      return !std::equal(linalg::cbegin(left_sum), linalg::cend(left_sum),\n                         linalg::cbegin(parent_sum));\n    }\n    return false;\n  }\n\n  void EnumerateOneHot(common::HistogramCuts const &cut, bst_feature_t fidx,\n                       common::Span<common::ConstGHistRow> hist,\n                       linalg::VectorView<GradientPairPrecise const> parent_sum, double parent_gain,\n                       SplitEntryContainer<std::vector<GradientPairPrecise>> *p_best) const {\n    auto const &cut_ptr = cut.Ptrs();\n    auto const &cut_val = cut.Values();\n\n    bst_bin_t ibegin = static_cast<bst_bin_t>(cut_ptr[fidx]);\n    bst_bin_t iend = static_cast<bst_bin_t>(cut_ptr[fidx + 1]);\n    bst_bin_t n_bins = iend - ibegin;\n    auto n_targets = hist.size();\n\n    auto sum = linalg::Empty<GradientPairPrecise>(ctx_, 2, n_targets);\n    auto left_sum = sum.Slice(0, linalg::All());\n    auto right_sum = sum.Slice(1, linalg::All());\n\n    auto weight = linalg::Empty<float>(ctx_, 2, n_targets);\n    auto left_weight = weight.Slice(0, linalg::All());\n    auto right_weight = weight.Slice(1, linalg::All());\n\n    // Per-target missing gradient: parent_sum - sum_of_all_bins.\n    auto missing_storage = linalg::Empty<GradientPairPrecise>(ctx_, n_targets);\n    auto missing = missing_storage.HostView();\n    for (bst_target_t t = 0; t < n_targets; ++t) {\n      auto f_hist = hist[t].subspan(cut_ptr[fidx], n_bins);\n      GradientPairPrecise feature_sum{};\n      for (bst_bin_t b = 0; b < n_bins; ++b) {\n        feature_sum += f_hist[b];\n      }\n      missing(t) = parent_sum(t) - feature_sum;\n    }\n\n    SplitEntryContainer<std::vector<GradientPairPrecise>> best;\n    best.is_cat = false;\n\n    for (bst_bin_t i = ibegin; i != iend; ++i) {\n      auto split_pt = cut_val[i];\n\n      // Missing on left (missing grouped with other categories).\n      for (bst_target_t t = 0; t < n_targets; ++t) {\n        right_sum(t) = GradientPairPrecise{hist[t][i]};\n        left_sum(t) = parent_sum(t) - right_sum(t);\n      }\n      auto missing_left_gain =\n          MultiCalcSplitGain(*param_, left_sum, right_sum, left_weight, right_weight) - parent_gain;\n      best.Update(missing_left_gain, fidx, split_pt, true, true, left_sum, right_sum);\n\n      // Missing on right (missing grouped with chosen category).\n      for (bst_target_t t = 0; t < n_targets; ++t) {\n        right_sum(t) = GradientPairPrecise{hist[t][i]} + missing(t);  // NOLINT\n        left_sum(t) = parent_sum(t) - right_sum(t);\n      }\n      auto missing_right_gain =\n          MultiCalcSplitGain(*param_, left_sum, right_sum, left_weight, right_weight) - parent_gain;\n      best.Update(missing_right_gain, fidx, split_pt, false, true, left_sum, right_sum);\n    }\n\n    if (best.is_cat) {\n      auto n = common::CatBitField::ComputeStorageSize(n_bins + 1);\n      best.cat_bits.resize(n, 0);\n      common::CatBitField cat_bits{best.cat_bits};\n      cat_bits.Set(best.split_value);\n    }\n\n    p_best->Update(best);\n  }\n\n public:\n  void EvaluateSplits(RegTree const &tree, common::Span<const BoundedHistCollection *> hist,\n                      common::HistogramCuts const &cut,\n                      common::Span<FeatureType const> feature_types,\n                      std::vector<MultiExpandEntry> *p_entries) {\n    auto &entries = *p_entries;\n    std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> features(entries.size());\n\n    for (std::size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) {\n      auto nidx = entries[nidx_in_set].nid;\n      features[nidx_in_set] = column_sampler_->GetFeatureSet(ctx_, tree.GetDepth(nidx));\n    }\n    CHECK(!features.empty());\n\n    std::int32_t n_threads = ctx_->Threads();\n    std::size_t const grain_size = std::max<std::size_t>(1, features.front()->Size() / n_threads);\n    common::BlockedSpace2d space(\n        entries.size(), [&](std::size_t nidx_in_set) { return features[nidx_in_set]->Size(); },\n        grain_size);\n\n    std::vector<MultiExpandEntry> tloc_candidates(n_threads * entries.size());\n    for (std::size_t i = 0; i < entries.size(); ++i) {\n      for (std::int32_t j = 0; j < n_threads; ++j) {\n        tloc_candidates[i * n_threads + j] = entries[i];\n      }\n    }\n    common::ParallelFor2d(space, n_threads, [&](std::size_t nidx_in_set, common::Range1d r) {\n      auto tidx = omp_get_thread_num();\n      auto entry = &tloc_candidates[n_threads * nidx_in_set + tidx];\n      auto best = &entry->split;\n      auto parent_sum = stats_.Slice(entry->nid, linalg::All());\n      std::vector<common::ConstGHistRow> node_hist;\n      for (auto t_hist : hist) {\n        node_hist.emplace_back((*t_hist)[entry->nid]);\n      }\n      auto features_set = features[nidx_in_set]->ConstHostSpan();\n\n      for (auto fidx_in_set = r.begin(); fidx_in_set < r.end(); fidx_in_set++) {\n        auto fidx = features_set[fidx_in_set];\n        if (!interaction_constraints_.Query(entry->nid, fidx)) {\n          continue;\n        }\n        auto parent_gain = gain_[entry->nid];\n        bool is_cat = common::IsCat(feature_types, fidx);\n        if (is_cat) {\n          this->EnumerateOneHot(cut, fidx, node_hist, parent_sum, parent_gain, best);\n        } else {\n          bool missing =\n              this->EnumerateSplit<+1>(cut, fidx, node_hist, parent_sum, parent_gain, best);\n          if (missing) {\n            this->EnumerateSplit<-1>(cut, fidx, node_hist, parent_sum, parent_gain, best);\n          }\n        }\n      }\n    });\n\n    for (std::size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) {\n      for (auto tidx = 0; tidx < n_threads; ++tidx) {\n        entries[nidx_in_set].split.Update(tloc_candidates[n_threads * nidx_in_set + tidx].split);\n      }\n    }\n\n    if (is_col_split_) {\n      // With column-wise data split, we gather the best splits from all the workers and update the\n      // expand entries accordingly.\n      auto all_entries = AllgatherColumnSplit(ctx_, entries);\n      for (auto worker = 0; worker < collective::GetWorldSize(); ++worker) {\n        for (std::size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) {\n          entries[nidx_in_set].split.Update(\n              all_entries[worker * entries.size() + nidx_in_set].split);\n        }\n      }\n    }\n  }\n\n  linalg::Vector<float> InitRoot(linalg::VectorView<GradientPairPrecise const> root_sum) {\n    auto n_targets = root_sum.Size();\n    stats_ = linalg::Constant(ctx_, GradientPairPrecise{}, 1, n_targets);\n    gain_.resize(1);\n\n    linalg::Vector<float> weight({n_targets}, ctx_->Device());\n    CalcWeight(*param_, root_sum, weight.HostView());\n    auto root_gain = CalcGainGivenWeight(*param_, root_sum, weight.HostView());\n    gain_.front() = root_gain;\n\n    auto h_stats = stats_.HostView();\n    std::copy(linalg::cbegin(root_sum), linalg::cend(root_sum), linalg::begin(h_stats));\n\n    return weight;\n  }\n\n  void ApplyTreeSplit(MultiExpandEntry const &candidate, RegTree *p_tree) {\n    // Use the split gradient's number of targets for intermediate weights\n    // This may differ from p_tree->NumTargets() when using reduced gradient\n    auto n_split_targets = candidate.split.left_sum.size();\n    auto parent_sum = stats_.Slice(candidate.nid, linalg::All());\n\n    auto weight = linalg::Empty<float>(ctx_, 3, n_split_targets);\n    auto base_weight = weight.Slice(0, linalg::All());\n    CalcWeight(*param_, parent_sum, base_weight);\n\n    auto left_weight = weight.Slice(1, linalg::All());\n    auto left_sum =\n        linalg::MakeVec(candidate.split.left_sum.data(), candidate.split.left_sum.size());\n    CalcWeight(*param_, left_sum, param_->learning_rate, left_weight);\n\n    auto right_weight = weight.Slice(2, linalg::All());\n    auto right_sum =\n        linalg::MakeVec(candidate.split.right_sum.data(), candidate.split.right_sum.size());\n    CalcWeight(*param_, right_sum, param_->learning_rate, right_weight);\n\n    // Compute the loss_chg and sum hessians for parent and children\n    float loss_chg = candidate.split.loss_chg;\n    // Sum hessians across all targets for each child\n    float left_sum_hess = 0.0f, right_sum_hess = 0.0f;\n    for (std::size_t t = 0; t < candidate.split.left_sum.size(); ++t) {\n      left_sum_hess += candidate.split.left_sum[t].GetHess();\n      right_sum_hess += candidate.split.right_sum[t].GetHess();\n    }\n    float sum_hess = left_sum_hess + right_sum_hess;\n\n    if (candidate.split.is_cat) {\n      p_tree->ExpandCategorical(candidate.nid, candidate.split.SplitIndex(),\n                                candidate.split.cat_bits, candidate.split.DefaultLeft(),\n                                base_weight, left_weight, right_weight, loss_chg, sum_hess,\n                                left_sum_hess, right_sum_hess);\n    } else {\n      p_tree->ExpandNode(candidate.nid, candidate.split.SplitIndex(), candidate.split.split_value,\n                         candidate.split.DefaultLeft(), base_weight, left_weight, right_weight,\n                         loss_chg, sum_hess, left_sum_hess, right_sum_hess);\n    }\n\n    CHECK(p_tree->IsMultiTarget());\n    auto mt_tree = p_tree->HostMtView();\n    auto left_child = mt_tree.LeftChild(candidate.nid);\n    CHECK_GT(left_child, candidate.nid);\n    auto right_child = mt_tree.RightChild(candidate.nid);\n    CHECK_GT(right_child, candidate.nid);\n\n    std::size_t n_nodes = mt_tree.Size();\n    gain_.resize(n_nodes);\n    // Re-calculate weight without learning rate.\n    CalcWeight(*param_, left_sum, left_weight);\n    CalcWeight(*param_, right_sum, right_weight);\n    gain_[left_child] = CalcGainGivenWeight(*param_, left_sum, left_weight);\n    gain_[right_child] = CalcGainGivenWeight(*param_, right_sum, right_weight);\n\n    if (n_nodes >= stats_.Shape(0)) {\n      stats_.Reshape(n_nodes * 2, stats_.Shape(1));\n    }\n    CHECK_EQ(stats_.Shape(1), n_split_targets);\n    auto left_sum_stat = stats_.Slice(left_child, linalg::All());\n    std::copy(candidate.split.left_sum.cbegin(), candidate.split.left_sum.cend(),\n              linalg::begin(left_sum_stat));\n    auto right_sum_stat = stats_.Slice(right_child, linalg::All());\n    std::copy(candidate.split.right_sum.cbegin(), candidate.split.right_sum.cend(),\n              linalg::begin(right_sum_stat));\n  }\n\n  explicit HistMultiEvaluator(Context const *ctx, MetaInfo const &info, TrainParam const *param,\n                              std::shared_ptr<common::ColumnSampler> sampler)\n      : param_{param},\n        column_sampler_{std::move(sampler)},\n        ctx_{ctx},\n        is_col_split_{info.IsColumnSplit()} {\n    interaction_constraints_.Configure(*param, info.num_col_);\n    column_sampler_->Init(ctx, info.num_col_, info.feature_weights, param_->colsample_bynode,\n                          param_->colsample_bylevel, param_->colsample_bytree);\n  }\n};\n\n/**\n * @brief CPU implementation of update prediction cache, which calculates the leaf value\n *        for the last tree and accumulates it to prediction vector.\n *\n * @param last_tree The last tree being updated by tree updater\n */\ninline void UpdatePredictionCacheImpl(Context const *ctx, ScalarTreeView const &last_tree,\n                                      common::Span<bst_node_t const> node_position,\n                                      linalg::VectorView<float> out_preds) {\n  CHECK(out_preds.Device().IsCPU());\n  common::ParallelFor(out_preds.Size(), ctx->Threads(), [&](std::size_t idx) {\n    bst_node_t nidx = node_position[idx];\n    nidx = SamplePosition::Decode(nidx);\n    auto weight = last_tree.LeafValue(nidx);\n    out_preds(idx) += weight;\n  });\n}\n\ninline void UpdatePredictionCacheImpl(Context const *ctx, RegTree const *p_last_tree,\n                                      common::Span<bst_node_t const> node_position,\n                                      linalg::MatrixView<float> out_preds) {\n  CHECK_GT(out_preds.Size(), 0U);\n  CHECK(p_last_tree);\n\n  auto const &tree = *p_last_tree;\n  if (!tree.IsMultiTarget()) {\n    return UpdatePredictionCacheImpl(ctx, p_last_tree->HostScView(), node_position,\n                                     out_preds.Slice(linalg::All(), 0));\n  }\n\n  auto const mt_tree = tree.HostMtView();\n  auto n_targets = mt_tree.NumTargets();\n  CHECK_EQ(out_preds.Shape(1), n_targets);\n  CHECK(out_preds.Device().IsCPU());\n\n  common::ParallelFor(out_preds.Shape(0), ctx->Threads(), [&](std::size_t sample_idx) {\n    bst_node_t nidx = node_position[sample_idx];\n    nidx = SamplePosition::Decode(nidx);\n    auto weight = mt_tree.LeafValue(nidx);\n    for (bst_target_t target_idx = 0; target_idx < n_targets; ++target_idx) {\n      out_preds(sample_idx, target_idx) += weight(target_idx);\n    }\n  });\n}\n}  // namespace xgboost::tree\n#endif  // XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_\n"
  },
  {
    "path": "src/tree/hist/expand_entry.h",
    "content": "/**\n * Copyright 2021-2023, XGBoost Contributors\n */\n#ifndef XGBOOST_TREE_HIST_EXPAND_ENTRY_H_\n#define XGBOOST_TREE_HIST_EXPAND_ENTRY_H_\n\n#include <algorithm>    // for all_of\n#include <ostream>      // for ostream\n#include <string>       // for string\n#include <type_traits>  // for add_const_t\n#include <utility>      // for move\n#include <vector>       // for vector\n\n#include \"../../common/type.h\"  // for EraseType\n#include \"../param.h\"           // for SplitEntry, SplitEntryContainer, TrainParam\n#include \"xgboost/base.h\"       // for GradientPairPrecise, bst_node_t\n#include \"xgboost/json.h\"       // for Json\n\nnamespace xgboost::tree {\n/**\n * \\brief Structure for storing tree split candidate.\n */\ntemplate <typename Impl>\nstruct ExpandEntryImpl {\n  bst_node_t nid{0};\n  bst_node_t depth{0};\n\n  [[nodiscard]] float GetLossChange() const {\n    return static_cast<Impl const*>(this)->split.loss_chg;\n  }\n  [[nodiscard]] bst_node_t GetNodeId() const { return nid; }\n\n  [[nodiscard]] bool IsValid(TrainParam const& param, bst_node_t num_leaves) const {\n    return static_cast<Impl const*>(this)->IsValidImpl(param, num_leaves);\n  }\n\n  void Save(Json* p_out) const {\n    auto& out = *p_out;\n    auto self = static_cast<Impl const*>(this);\n\n    out[\"nid\"] = Integer{this->nid};\n    out[\"depth\"] = Integer{this->depth};\n\n    /**\n     * Handle split\n     */\n    out[\"split\"] = Object{};\n    auto& split = out[\"split\"];\n    split[\"loss_chg\"] = self->split.loss_chg;\n    split[\"sindex\"] = Integer{self->split.sindex};\n    split[\"split_value\"] = self->split.split_value;\n\n    auto const& cat_bits = self->split.cat_bits;\n    auto s_cat_bits = common::Span{cat_bits.data(), cat_bits.size()};\n    split[\"cat_bits\"] = U8Array{s_cat_bits.size_bytes()};\n    auto& j_cat_bits = get<U8Array>(split[\"cat_bits\"]);\n    using T = typename decltype(self->split.cat_bits)::value_type;\n    auto erased =\n        common::EraseType<std::add_const_t<T>, std::add_const_t<std::uint8_t>>(s_cat_bits);\n    for (std::size_t i = 0; i < erased.size(); ++i) {\n      j_cat_bits[i] = erased[i];\n    }\n\n    split[\"is_cat\"] = Boolean{self->split.is_cat};\n\n    self->SaveGrad(&split);\n  }\n\n  void Load(Json const& in) {\n    auto self = static_cast<Impl*>(this);\n\n    this->nid = get<Integer const>(in[\"nid\"]);\n    this->depth = get<Integer const>(in[\"depth\"]);\n\n    /**\n     * Handle split\n     */\n    auto const& split = in[\"split\"];\n    self->split.loss_chg = get<Number const>(split[\"loss_chg\"]);\n    self->split.sindex = get<Integer const>(split[\"sindex\"]);\n    self->split.split_value = get<Number const>(split[\"split_value\"]);\n\n    auto const& j_cat_bits = get<U8Array const>(split[\"cat_bits\"]);\n    using T = typename decltype(self->split.cat_bits)::value_type;\n    auto restored = common::RestoreType<std::add_const_t<T>>(\n        common::Span{j_cat_bits.data(), j_cat_bits.size()});\n    self->split.cat_bits.resize(restored.size());\n    for (std::size_t i = 0; i < restored.size(); ++i) {\n      self->split.cat_bits[i] = restored[i];\n    }\n\n    self->split.is_cat = get<Boolean const>(split[\"is_cat\"]);\n    self->LoadGrad(split);\n  }\n};\n\nstruct CPUExpandEntry : public ExpandEntryImpl<CPUExpandEntry> {\n  SplitEntry split;\n\n  CPUExpandEntry() = default;\n  CPUExpandEntry(bst_node_t nidx, bst_node_t depth, SplitEntry split)\n      : ExpandEntryImpl{nidx, depth}, split(std::move(split)) {}\n  CPUExpandEntry(bst_node_t nidx, bst_node_t depth) : ExpandEntryImpl{nidx, depth} {}\n\n  void SaveGrad(Json* p_out) const {\n    auto& out = *p_out;\n    auto save = [&](std::string const& name, GradStats const& sum) {\n      out[name] = F64Array{2};\n      auto& array = get<F64Array>(out[name]);\n      array[0] = sum.GetGrad();\n      array[1] = sum.GetHess();\n    };\n    save(\"left_sum\", this->split.left_sum);\n    save(\"right_sum\", this->split.right_sum);\n  }\n  void LoadGrad(Json const& in) {\n    auto const& left_sum = get<F64Array const>(in[\"left_sum\"]);\n    this->split.left_sum = GradStats{left_sum[0], left_sum[1]};\n    auto const& right_sum = get<F64Array const>(in[\"right_sum\"]);\n    this->split.right_sum = GradStats{right_sum[0], right_sum[1]};\n  }\n\n  [[nodiscard]] bool IsValidImpl(TrainParam const& param, bst_node_t num_leaves) const {\n    if (split.loss_chg <= kRtEps) return false;\n    if (split.left_sum.GetHess() == 0 || split.right_sum.GetHess() == 0) {\n      return false;\n    }\n    if (split.loss_chg < param.min_split_loss) {\n      return false;\n    }\n    if (param.max_depth > 0 && depth == param.max_depth) {\n      return false;\n    }\n    if (param.max_leaves > 0 && num_leaves == param.max_leaves) {\n      return false;\n    }\n    return true;\n  }\n\n  friend std::ostream& operator<<(std::ostream& os, CPUExpandEntry const& e) {\n    os << \"ExpandEntry:\\n\";\n    os << \"nidx: \" << e.nid << \"\\n\";\n    os << \"depth: \" << e.depth << \"\\n\";\n    os << \"loss: \" << e.split.loss_chg << \"\\n\";\n    os << \"split:\\n\" << e.split << std::endl;\n    return os;\n  }\n\n  /**\n   * @brief Copy primitive fields into this, and collect cat_bits into a vector.\n   *\n   * This is used for allgather.\n   *\n   * @param that The other entry to copy from\n   * @param collected_cat_bits The vector to collect cat_bits\n   * @param cat_bits_sizes The sizes of the collected cat_bits\n   */\n  void CopyAndCollect(CPUExpandEntry const& that, std::vector<uint32_t>* collected_cat_bits,\n                      std::vector<std::size_t>* cat_bits_sizes) {\n    nid = that.nid;\n    depth = that.depth;\n    split.CopyAndCollect(that.split, collected_cat_bits, cat_bits_sizes);\n  }\n};\n\nstruct MultiExpandEntry : public ExpandEntryImpl<MultiExpandEntry> {\n  SplitEntryContainer<std::vector<GradientPairPrecise>> split;\n\n  MultiExpandEntry() = default;\n  MultiExpandEntry(bst_node_t nidx, bst_node_t depth) : ExpandEntryImpl{nidx, depth} {}\n\n  void SaveGrad(Json* p_out) const {\n    auto& out = *p_out;\n    auto save = [&](std::string const& name, std::vector<GradientPairPrecise> const& sum) {\n      out[name] = F64Array{sum.size() * 2};\n      auto& array = get<F64Array>(out[name]);\n      for (std::size_t i = 0, j = 0; i < sum.size(); i++, j += 2) {\n        array[j] = sum[i].GetGrad();\n        array[j + 1] = sum[i].GetHess();\n      }\n    };\n    save(\"left_sum\", this->split.left_sum);\n    save(\"right_sum\", this->split.right_sum);\n  }\n  void LoadGrad(Json const& in) {\n    auto load = [&](std::string const& name, std::vector<GradientPairPrecise>* p_sum) {\n      auto const& array = get<F64Array const>(in[name]);\n      auto& sum = *p_sum;\n      sum.resize(array.size() / 2);\n      for (std::size_t i = 0, j = 0; i < sum.size(); ++i, j += 2) {\n        sum[i] = GradientPairPrecise{array[j], array[j + 1]};\n      }\n    };\n    load(\"left_sum\", &this->split.left_sum);\n    load(\"right_sum\", &this->split.right_sum);\n  }\n\n  [[nodiscard]] bool IsValidImpl(TrainParam const& param, bst_node_t num_leaves) const {\n    if (split.loss_chg <= kRtEps) return false;\n    auto is_zero = [](auto const& sum) {\n      return std::all_of(sum.cbegin(), sum.cend(),\n                         [&](auto const& g) { return g.GetHess() - .0 == .0; });\n    };\n    if (is_zero(split.left_sum) || is_zero(split.right_sum)) {\n      return false;\n    }\n    if (split.loss_chg < param.min_split_loss) {\n      return false;\n    }\n    if (param.max_depth > 0 && depth == param.max_depth) {\n      return false;\n    }\n    if (param.max_leaves > 0 && num_leaves == param.max_leaves) {\n      return false;\n    }\n    return true;\n  }\n\n  friend std::ostream& operator<<(std::ostream& os, MultiExpandEntry const& e) {\n    os << \"ExpandEntry: \\n\";\n    os << \"nidx: \" << e.nid << \"\\n\";\n    os << \"depth: \" << e.depth << \"\\n\";\n    os << \"loss: \" << e.split.loss_chg << \"\\n\";\n    os << \"split cond:\" << e.split.split_value << \"\\n\";\n    os << \"split ind:\" << e.split.SplitIndex() << \"\\n\";\n    os << \"left_sum: [\";\n    for (auto v : e.split.left_sum) {\n      os << v << \", \";\n    }\n    os << \"]\\n\";\n\n    os << \"right_sum: [\";\n    for (auto v : e.split.right_sum) {\n      os << v << \", \";\n    }\n    os << \"]\\n\";\n    return os;\n  }\n\n  /**\n   * @brief Copy primitive fields into this, and collect cat_bits and gradients into vectors.\n   *\n   * This is used for allgather.\n   *\n   * @param that The other entry to copy from\n   * @param collected_cat_bits The vector to collect cat_bits\n   * @param cat_bits_sizes The sizes of the collected cat_bits\n   * @param collected_gradients The vector to collect gradients\n   */\n  void CopyAndCollect(MultiExpandEntry const& that, std::vector<uint32_t>* collected_cat_bits,\n                      std::vector<std::size_t>* cat_bits_sizes,\n                      std::vector<GradientPairPrecise>* collected_gradients) {\n    nid = that.nid;\n    depth = that.depth;\n    split.CopyAndCollect(that.split, collected_cat_bits, cat_bits_sizes, collected_gradients);\n  }\n};\n}  // namespace xgboost::tree\n#endif  // XGBOOST_TREE_HIST_EXPAND_ENTRY_H_\n"
  },
  {
    "path": "src/tree/hist/hist_cache.h",
    "content": "/**\n * Copyright 2023-2024 by XGBoost Contributors\n */\n#ifndef XGBOOST_TREE_HIST_HIST_CACHE_H_\n#define XGBOOST_TREE_HIST_HIST_CACHE_H_\n#include <cstddef>  // for size_t\n#include <map>      // for map\n#include <memory>   // for unique_ptr\n#include <vector>   // for vector\n\n#include \"../../common/hist_util.h\"          // for GHistRow, ConstGHistRow\n#include \"../../common/ref_resource_view.h\"  // for ReallocVector\n#include \"xgboost/base.h\"                    // for bst_node_t, bst_bin_t\n#include \"xgboost/logging.h\"                 // for CHECK_EQ\n#include \"xgboost/span.h\"                    // for Span\n\nnamespace xgboost::tree {\n/**\n * @brief A persistent cache for CPU histogram.\n *\n *   The size of the cache is first bounded by the `Driver` class then by this cache\n *   implementaiton. The former limits the number of nodes that can be built for each node\n *   batch, while this cache limits the number of all nodes up to the size of\n *   max(|node_batch|, n_cached_node).\n *\n *   The caller is responsible for clearing up the cache as it needs to rearrange the\n *   nodes before making overflowed allocations. The strcut only reports whether the size\n *   limit has benn reached.\n */\nclass BoundedHistCollection {\n  // maps node index to offset in `data_`.\n  std::map<bst_node_t, std::size_t> node_map_;\n  // currently allocated bins, used for tracking consistentcy.\n  std::size_t current_size_{0};\n\n  // stores the histograms in a contiguous buffer\n  using Vec = common::ReallocVector<GradientPairPrecise>;\n  std::unique_ptr<Vec> data_{new Vec{}};  // nvcc 12.1 trips over std::make_unique\n\n  // number of histogram bins across all features\n  bst_bin_t n_total_bins_{0};\n  // limits the number of nodes that can be in the cache for each tree\n  std::size_t max_cached_nodes_{0};\n  // whether the tree has grown beyond the cache limit\n  bool has_exceeded_{false};\n\n public:\n  BoundedHistCollection() = default;\n  common::GHistRow operator[](std::size_t idx) {\n    auto offset = node_map_.at(idx);\n    return common::Span{data_->data(), static_cast<size_t>(data_->size())}.subspan(\n        offset, n_total_bins_);\n  }\n  common::ConstGHistRow operator[](std::size_t idx) const {\n    auto offset = node_map_.at(idx);\n    return common::Span{data_->data(), static_cast<size_t>(data_->size())}.subspan(\n        offset, n_total_bins_);\n  }\n  void Reset(bst_bin_t n_total_bins, std::size_t n_cached_nodes) {\n    n_total_bins_ = n_total_bins;\n    max_cached_nodes_ = n_cached_nodes;\n    this->Clear(false);\n  }\n  /**\n   * @brief Clear the cache, mark whether the cache is exceeded the limit.\n   */\n  void Clear(bool exceeded) {\n    node_map_.clear();\n    current_size_ = 0;\n    has_exceeded_ = exceeded;\n  }\n\n  [[nodiscard]] bool CanHost(common::Span<bst_node_t const> nodes_to_build,\n                             common::Span<bst_node_t const> nodes_to_sub) const {\n    auto n_new_nodes = nodes_to_build.size() + nodes_to_sub.size();\n    return n_new_nodes + node_map_.size() <= max_cached_nodes_;\n  }\n\n  /**\n   * @brief Allocate histogram buffers for all nodes.\n   *\n   *   The resulting histogram buffer is contiguous for all nodes in the order of\n   *   allocation.\n   */\n  void AllocateHistograms(common::Span<bst_node_t const> nodes_to_build,\n                          common::Span<bst_node_t const> nodes_to_sub) {\n    auto n_new_nodes = nodes_to_build.size() + nodes_to_sub.size();\n    auto alloc_size = n_new_nodes * n_total_bins_;\n    auto new_size = alloc_size + current_size_;\n    if (new_size > data_->size()) {\n      data_->Resize(new_size);\n    }\n    for (auto nidx : nodes_to_build) {\n      node_map_[nidx] = current_size_;\n      current_size_ += n_total_bins_;\n    }\n    for (auto nidx : nodes_to_sub) {\n      node_map_[nidx] = current_size_;\n      current_size_ += n_total_bins_;\n    }\n    CHECK_EQ(current_size_, new_size);\n  }\n  void AllocateHistograms(std::vector<bst_node_t> const& nodes) {\n    this->AllocateHistograms(common::Span<bst_node_t const>{nodes},\n                             common::Span<bst_node_t const>{});\n  }\n\n  [[nodiscard]] bool HasExceeded() const { return has_exceeded_; }\n  [[nodiscard]] bool HistogramExists(bst_node_t nidx) const {\n    return node_map_.find(nidx) != node_map_.cend();\n  }\n  [[nodiscard]] std::size_t Size() const { return current_size_; }\n};\n}  // namespace xgboost::tree\n#endif  // XGBOOST_TREE_HIST_HIST_CACHE_H_\n"
  },
  {
    "path": "src/tree/hist/hist_param.cc",
    "content": "/**\n * Copyright 2021-2025, XGBoost Contributors\n */\n#include \"hist_param.h\"\n\n#include <ios>     // for binary\n#include <string>  // for string\n\n#include \"../../collective/broadcast.h\"         // for Broadcast\n#include \"../../collective/communicator-inl.h\"  // for GetRank\n#include \"xgboost/json.h\"                       // for Object, Json\n#include \"xgboost/linalg.h\"                     // for MakeVec\n#include \"xgboost/tree_model.h\"                 // for RegTree\n\nnamespace xgboost::tree {\nDMLC_REGISTER_PARAMETER(HistMakerTrainParam);\n\nvoid HistMakerTrainParam::CheckTreesSynchronized(Context const* ctx,\n                                                 RegTree const* local_tree) const {\n  if (!this->debug_synchronize) {\n    return;\n  }\n\n  std::string s_model;\n  Json model{Object{}};\n  int rank = collective::GetRank();\n  if (rank == 0) {\n    local_tree->SaveModel(&model);\n  }\n  Json::Dump(model, &s_model, std::ios::binary);\n\n  auto nchars{static_cast<std::int64_t>(s_model.size())};\n  auto rc = collective::Success() << [&] {\n    return collective::Broadcast(ctx, linalg::MakeVec(&nchars, 1), 0);\n  } << [&] {\n    s_model.resize(nchars);\n    return collective::Broadcast(ctx, linalg::MakeVec(s_model.data(), s_model.size()), 0);\n  };\n  collective::SafeColl(rc);\n\n  RegTree ref_tree{};  // rank 0 tree\n  auto j_ref_tree = Json::Load(StringView{s_model}, std::ios::binary);\n  ref_tree.LoadModel(j_ref_tree);\n  CHECK(*local_tree == ref_tree);\n}\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "src/tree/hist/hist_param.h",
    "content": "/**\n * Copyright 2021-2025, XGBoost Contributors\n */\n#pragma once\n\n#include <cstddef>  // for size_t\n#include <limits>   // for numeric_limits\n\n#include \"xgboost/context.h\"     // for DeviceOrd\n#include \"xgboost/parameter.h\"   // for XGBoostParameter\n#include \"xgboost/tree_model.h\"  // for RegTree\n\nnamespace xgboost::tree {\nstruct HistMakerTrainParam : public XGBoostParameter<HistMakerTrainParam> {\n private:\n  constexpr static std::size_t NotSet() { return std::numeric_limits<std::size_t>::max(); }\n\n  std::size_t max_cached_hist_node{NotSet()};  // NOLINT\n\n public:\n  // Smaller for GPU due to memory limitation.\n  constexpr static std::size_t CpuDefaultNodes() { return static_cast<std::size_t>(1) << 16; }\n  constexpr static std::size_t CudaDefaultNodes() { return static_cast<std::size_t>(1) << 12; }\n\n  bool debug_synchronize{false};\n\n  void CheckTreesSynchronized(Context const* ctx, RegTree const* local_tree) const;\n\n  std::size_t MaxCachedHistNodes(DeviceOrd device) const {\n    if (max_cached_hist_node != NotSet()) {\n      return max_cached_hist_node;\n    }\n    return device.IsCPU() ? CpuDefaultNodes() : CudaDefaultNodes();\n  }\n\n  // declare parameters\n  DMLC_DECLARE_PARAMETER(HistMakerTrainParam) {\n    DMLC_DECLARE_FIELD(debug_synchronize)\n        .set_default(false)\n        .describe(\"Check if all distributed tree are identical after tree construction.\");\n    DMLC_DECLARE_FIELD(max_cached_hist_node)\n        .set_default(NotSet())\n        .set_lower_bound(1)\n        .describe(\"Maximum number of nodes in histogram cache.\");\n  }\n};\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "src/tree/hist/histogram.cc",
    "content": "/**\n * Copyright 2023-2025, XGBoost Contributors\n */\n#include \"histogram.h\"\n\n#include <cstddef>  // for size_t\n#include <numeric>  // for accumulate\n#include <utility>  // for swap\n#include <vector>   // for vector\n\n#include \"../../common/transform_iterator.h\"  // for MakeIndexTransformIter\n#include \"../tree_view.h\"                     // for ScalarTreeView, MultiTargetTreeView\n#include \"expand_entry.h\"                     // for MultiExpandEntry, CPUExpandEntry\n#include \"xgboost/logging.h\"                  // for CHECK_EQ\n#include \"xgboost/span.h\"                     // for Span\n#include \"xgboost/tree_model.h\"               // for RegTree\n\nnamespace xgboost::tree {\nvoid AssignNodes(MultiTargetTreeView const &tree,\n                 std::vector<MultiExpandEntry> const &valid_candidates,\n                 common::Span<bst_node_t> nodes_to_build, common::Span<bst_node_t> nodes_to_sub) {\n  CHECK_EQ(nodes_to_build.size(), valid_candidates.size());\n\n  std::size_t n_idx = 0;\n  for (auto const &c : valid_candidates) {\n    auto left_nidx = tree.LeftChild(c.nid);\n    auto right_nidx = tree.RightChild(c.nid);\n\n    auto build_nidx = left_nidx;\n    auto subtract_nidx = right_nidx;\n    auto lit =\n        common::MakeIndexTransformIter([&](auto i) { return c.split.left_sum[i].GetHess(); });\n    auto left_sum = std::accumulate(lit, lit + c.split.left_sum.size(), .0);\n    auto rit =\n        common::MakeIndexTransformIter([&](auto i) { return c.split.right_sum[i].GetHess(); });\n    auto right_sum = std::accumulate(rit, rit + c.split.right_sum.size(), .0);\n    auto fewer_right = right_sum < left_sum;\n    if (fewer_right) {\n      std::swap(build_nidx, subtract_nidx);\n    }\n    nodes_to_build[n_idx] = build_nidx;\n    nodes_to_sub[n_idx] = subtract_nidx;\n    ++n_idx;\n  }\n}\n\nvoid AssignNodes(ScalarTreeView const &tree, std::vector<CPUExpandEntry> const &candidates,\n                 common::Span<bst_node_t> nodes_to_build, common::Span<bst_node_t> nodes_to_sub) {\n  std::size_t n_idx = 0;\n  for (auto const &c : candidates) {\n    auto left_nidx = tree.LeftChild(c.nid);\n    auto right_nidx = tree.RightChild(c.nid);\n    auto fewer_right = c.split.right_sum.GetHess() < c.split.left_sum.GetHess();\n\n    auto build_nidx = left_nidx;\n    auto subtract_nidx = right_nidx;\n    if (fewer_right) {\n      std::swap(build_nidx, subtract_nidx);\n    }\n    nodes_to_build[n_idx] = build_nidx;\n    nodes_to_sub[n_idx] = subtract_nidx;\n    ++n_idx;\n  }\n}\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "src/tree/hist/histogram.h",
    "content": "/**\n * Copyright 2021-2025, XGBoost Contributors\n */\n#ifndef XGBOOST_TREE_HIST_HISTOGRAM_H_\n#define XGBOOST_TREE_HIST_HISTOGRAM_H_\n\n#include <algorithm>   // for max\n#include <cstddef>     // for size_t\n#include <cstdint>     // for int32_t\n#include <utility>     // for move\n#include <vector>      // for vector\n\n#include \"../../collective/allreduce.h\"    // for Allreduce\n#include \"../../common/hist_util.h\"        // for GHistRow, ParallelGHi...\n#include \"../../common/row_set.h\"          // for RowSetCollection\n#include \"../../common/threading_utils.h\"  // for ParallelFor2d, Range1d, BlockedSpace2d\n#include \"../../common/cache_manager.h\"    // for CacheManager\n#include \"../../data/gradient_index.h\"     // for GHistIndexMatrix\n#include \"expand_entry.h\"                  // for MultiExpandEntry, CPUExpandEntry\n#include \"hist_cache.h\"                    // for BoundedHistCollection\n#include \"hist_param.h\"                    // for HistMakerTrainParam\n#include \"xgboost/base.h\"                  // for bst_node_t, bst_target_t, bst_bin_t\n#include \"xgboost/context.h\"               // for Context\n#include \"xgboost/data.h\"                  // for BatchIterator, BatchSet\n#include \"xgboost/linalg.h\"                // for MatrixView, All, Vect...\n#include \"xgboost/logging.h\"               // for CHECK_GE\n#include \"xgboost/span.h\"                  // for Span\n#include \"xgboost/tree_model.h\"            // for RegTree\n\nnamespace xgboost::tree {\n/**\n * @brief Decide which node as the build node for multi-target trees.\n */\nvoid AssignNodes(MultiTargetTreeView const &tree,\n                 std::vector<MultiExpandEntry> const &valid_candidates,\n                 common::Span<bst_node_t> nodes_to_build, common::Span<bst_node_t> nodes_to_sub);\n\n/**\n * @brief Decide which node as the build node.\n */\nvoid AssignNodes(ScalarTreeView const &tree, std::vector<CPUExpandEntry> const &candidates,\n                 common::Span<bst_node_t> nodes_to_build, common::Span<bst_node_t> nodes_to_sub);\n\nclass HistogramBuilder {\n  /*! \\brief culmulative histogram of gradients. */\n  common::Monitor monitor_;\n  BoundedHistCollection hist_;\n  common::ParallelGHistBuilder buffer_;\n  BatchParam param_;\n  std::int32_t n_threads_{-1};\n  // Whether XGBoost is running in distributed environment.\n  bool is_distributed_{false};\n  bool is_col_split_{false};\n\n public:\n  /**\n   * @brief Reset the builder, should be called before growing a new tree.\n   *\n   * @param total_bins       Total number of bins across all features\n   * @param is_distributed   Mostly used for testing to allow injecting parameters instead\n   *                         of using global rabit variable.\n   */\n  void Reset(Context const *ctx, bst_bin_t total_bins, BatchParam const &p, bool is_distributed,\n             bool is_col_split, HistMakerTrainParam const *param) {\n    n_threads_ = ctx->Threads();\n    param_ = p;\n    hist_.Reset(total_bins, param->MaxCachedHistNodes(ctx->Device()));\n    buffer_.Init(total_bins);\n    is_distributed_ = is_distributed;\n    is_col_split_ = is_col_split;\n  }\n\n  template <bool any_missing>\n  void BuildLocalHistograms(common::BlockedSpace2d const &space, GHistIndexMatrix const &gidx,\n                            std::vector<bst_node_t> const &nodes_to_build,\n                            common::RowSetCollection const &row_set_collection,\n                            common::Span<GradientPair const> gpair_h, bool read_by_column) {\n    // Parallel processing by nodes and data in each node\n    common::ParallelFor2d(space, this->n_threads_, [&](size_t nid_in_set, common::Range1d r) {\n      const auto tid = static_cast<unsigned>(omp_get_thread_num());\n      bst_node_t const nidx = nodes_to_build[nid_in_set];\n      auto const& elem = row_set_collection[nidx];\n      auto start_of_row_set = std::min(r.begin(), elem.Size());\n      auto end_of_row_set = std::min(r.end(), elem.Size());\n      auto rid_set = common::Span<bst_idx_t const>{elem.begin() + start_of_row_set,\n                                                   elem.begin() + end_of_row_set};\n      auto hist = buffer_.GetInitializedHist(tid, nid_in_set);\n      if (rid_set.size() != 0) {\n        common::BuildHist<any_missing>(gpair_h, rid_set, gidx, hist, read_by_column);\n      }\n    });\n  }\n\n  /**\n   * @brief Allocate histogram, rearrange the nodes if `rearrange` is true and the tree\n   *        has reached the cache size limit.\n   */\n  template <typename TreeView>\n  void AddHistRows(TreeView const &tree, std::vector<bst_node_t> *p_nodes_to_build,\n                   std::vector<bst_node_t> *p_nodes_to_sub, bool rearrange) {\n    CHECK(p_nodes_to_build);\n    auto &nodes_to_build = *p_nodes_to_build;\n    CHECK(p_nodes_to_sub);\n    auto &nodes_to_sub = *p_nodes_to_sub;\n\n    // We first check whether the cache size is already exceeded or about to be exceeded.\n    // If not, then we can allocate histograms without clearing the cache and without\n    // worrying about missing parent histogram.\n    //\n    // Otherwise, we need to rearrange the nodes before the allocation to make sure the\n    // resulting buffer is contiguous. This is to facilitate efficient allreduce.\n\n    bool can_host = this->hist_.CanHost(nodes_to_build, nodes_to_sub);\n    // True if the tree is still within the size of cache limit. Allocate histogram as\n    // usual.\n    auto cache_is_valid = can_host && !this->hist_.HasExceeded();\n\n    if (!can_host) {\n      this->hist_.Clear(true);\n    }\n\n    if (!rearrange || cache_is_valid) {\n      // If not rearrange, we allocate the histogram as usual, assuming the nodes have\n      // been properly arranged by other builders.\n      this->hist_.AllocateHistograms(nodes_to_build, nodes_to_sub);\n      if (rearrange) {\n        CHECK(!this->hist_.HasExceeded());\n      }\n      return;\n    }\n\n    // The cache is full, parent histogram might be removed in previous iterations to\n    // saved memory.\n    std::vector<bst_node_t> can_subtract;\n    for (auto const &v : nodes_to_sub) {\n      if (this->hist_.HistogramExists(tree.Parent(v))) {\n        // We can still use the subtraction trick for this node\n        can_subtract.push_back(v);\n      } else {\n        // This node requires a full build\n        nodes_to_build.push_back(v);\n      }\n    }\n\n    nodes_to_sub = std::move(can_subtract);\n    this->hist_.AllocateHistograms(nodes_to_build, nodes_to_sub);\n  }\n\n  /** Main entry point of this class, build histogram for tree nodes. */\n  void BuildHist(std::size_t page_idx, common::BlockedSpace2d const &space,\n                 GHistIndexMatrix const &gidx, common::RowSetCollection const &row_set_collection,\n                 std::vector<bst_node_t> const &nodes_to_build,\n                 linalg::VectorView<GradientPair const> gpair, bool read_by_column) {\n    monitor_.Start(__func__);\n    CHECK(gpair.Contiguous());\n\n    if (page_idx == 0) {\n      // Add the local histogram cache to the parallel buffer before processing the first page.\n      auto n_nodes = nodes_to_build.size();\n      std::vector<common::GHistRow> target_hists(n_nodes);\n      for (size_t i = 0; i < n_nodes; ++i) {\n        auto const nidx = nodes_to_build[i];\n        target_hists[i] = hist_[nidx];\n      }\n      buffer_.Reset(this->n_threads_, n_nodes, space, target_hists);\n    }\n\n    if (gidx.IsDense()) {\n      this->BuildLocalHistograms<false>(space, gidx, nodes_to_build, row_set_collection,\n                                        gpair.Values(), read_by_column);\n    } else {\n      this->BuildLocalHistograms<true>(space, gidx, nodes_to_build, row_set_collection,\n                                       gpair.Values(), read_by_column);\n    }\n    monitor_.Stop(__func__);\n  }\n\n  template <typename TreeView>\n  void SyncHistogram(Context const *ctx, TreeView const &tree,\n                     std::vector<bst_node_t> const &nodes_to_build,\n                     std::vector<bst_node_t> const &nodes_to_trick) {\n    auto n_total_bins = buffer_.TotalBins();\n    common::BlockedSpace2d space(\n        nodes_to_build.size(), [&](std::size_t) { return n_total_bins; }, 1024);\n    common::ParallelFor2d(space, this->n_threads_, [&](size_t node, common::Range1d r) {\n      // Merging histograms from each thread.\n      this->buffer_.ReduceHist(node, r.begin(), r.end());\n    });\n    if (is_distributed_ && !is_col_split_) {\n      // The cache is contiguous, we can perform allreduce for all nodes in one go.\n      CHECK(!nodes_to_build.empty());\n      auto first_nidx = nodes_to_build.front();\n      std::size_t n = n_total_bins * nodes_to_build.size() * 2;\n      auto rc = collective::Allreduce(\n          ctx, linalg::MakeVec(reinterpret_cast<double *>(this->hist_[first_nidx].data()), n),\n          collective::Op::kSum);\n      SafeColl(rc);\n    }\n\n    common::BlockedSpace2d const &subspace =\n        nodes_to_trick.size() == nodes_to_build.size()\n            ? space\n            : common::BlockedSpace2d{nodes_to_trick.size(),\n                                     [&](std::size_t) { return n_total_bins; }, 1024};\n    common::ParallelFor2d(\n        subspace, this->n_threads_, [&](std::size_t nidx_in_set, common::Range1d r) {\n          auto subtraction_nidx = nodes_to_trick[nidx_in_set];\n          auto parent_id = tree.Parent(subtraction_nidx);\n          auto sibling_nidx = tree.IsLeftChild(subtraction_nidx) ? tree.RightChild(parent_id)\n                                                                 : tree.LeftChild(parent_id);\n          auto sibling_hist = this->hist_[sibling_nidx];\n          auto parent_hist = this->hist_[parent_id];\n          auto subtract_hist = this->hist_[subtraction_nidx];\n          common::SubtractionHist(subtract_hist, parent_hist, sibling_hist, r.begin(), r.end());\n        });\n  }\n\n public:\n  /* Getters for tests. */\n  [[nodiscard]] BoundedHistCollection const &Histogram() const { return hist_; }\n  [[nodiscard]] BoundedHistCollection &Histogram() { return hist_; }\n  auto &Buffer() { return buffer_; }\n};\n\n// Construct a work space for building histogram.  Eventually we should move this\n// function into histogram builder once hist tree method supports external memory.\ntemplate <typename Partitioner>\ncommon::BlockedSpace2d ConstructHistSpace(Partitioner const &partitioners,\n                                          std::vector<bst_node_t> const &nodes_to_build,\n                                          const GHistIndexMatrix &gidx,\n                                          std::size_t l1_size, bst_bin_t max_bin,\n                                          bool read_by_column) {\n  // FIXME(jiamingy): Handle different size of space.  Right now we use the maximum\n  // partition size for the buffer, which might not be efficient if partition sizes\n  // has significant variance.\n  std::vector<std::size_t> partition_size(nodes_to_build.size(), 0);\n  for (auto const &partition : partitioners) {\n    size_t k = 0;\n    for (auto nidx : nodes_to_build) {\n      auto n_rows_in_node = partition.Partitions()[nidx].Size();\n      partition_size[k] = std::max(partition_size[k], n_rows_in_node);\n      k++;\n    }\n  }\n\n  // Estimate the size of each data block based on model parameters and L1 capacity\n  // The general idea is to keep as much working-set data in L1 as possible.\n  /* Each processed row occupies ~32 bytes in L1:\n   * - gradient pair (p_gpair): sizeof(GradientPair)\n   * - row index (rid[i]): sizeof(size_t)\n   * - icol_start and icol_end: 2 * sizeof(size_t)\n   */\n  std::size_t l1_row_foot_print = (sizeof(GradientPair) + 3 * sizeof(size_t));\n  double usable_l1_size = 0.8 * l1_size;\n\n  std::size_t space_in_l1_for_rows;\n  if (read_by_column) {\n   /* In this case, an accurate block_size estimate is performance-critical.\n    * For column-wise histogram construction, each column is processed over the\n    * same block of rows. If the block fits in L1, the row data are loaded once\n    * and reused across all columns; otherwise, the cache must be refilled for\n    * each column.\n    */\n\n    /* First step: determine whether one histogram column fits into L1.\n     * Note: column-wise kernel is used for dense data only.\n     */\n    std::size_t hist_col_size = 2 * sizeof(GradientPairPrecise) * max_bin;\n    bool hist_col_fit_to_l1 = hist_col_size < usable_l1_size;\n\n    /* Second step: compute available L1 space for row data. */\n    space_in_l1_for_rows = usable_l1_size - (hist_col_fit_to_l1 ? hist_col_size : 0);\n  } else {\n    /* In this case, block_size is less critical.\n    * For row-wise histogram construction, columns are processed for each row.\n    * Rows do not need to remain in L1 across iterations, but choosing a\n    * reasonable block_size allows the histogram buffer and offsets to stay in L1,\n    * which gives a small performance benefit.\n    */\n\n    /* First step: estimate the size of the histogram and the offsets vector. */\n    std::size_t n_bins = gidx.cut.Ptrs().back();\n    std::size_t n_columns = gidx.cut.Ptrs().size() - 1;\n    bool any_missing = !gidx.IsDense();\n    std::size_t hist_size = 2 * sizeof(GradientPairPrecise) * n_bins;\n    std::size_t offsets_size = any_missing ? 0 : n_columns * sizeof(uint32_t);\n\n    /* Second step: estimate the extra L1 footprint caused by prefetching.\n     * Prefetching is not always active, so the estimate is intentionally conservative.\n     */\n    l1_row_foot_print += sizeof(GradientPair);\n    std::size_t idx_bin_size = n_columns * sizeof(uint32_t);\n\n    bool hist_fit_to_l1 = (hist_size + offsets_size + idx_bin_size) < usable_l1_size;\n\n    /* Third step: compute available L1 space for row data. */\n    std::size_t occupied_space = (hist_fit_to_l1 ? hist_size : 0) + offsets_size + idx_bin_size;\n    space_in_l1_for_rows = usable_l1_size > occupied_space ? usable_l1_size - occupied_space : 0;\n  }\n  std::size_t block_size = space_in_l1_for_rows / l1_row_foot_print;\n\n  /* Minimum block size = 8 rows.\n   * This ensures that a full cache line is utilized when loading gradient pairs.\n   */\n  constexpr std::size_t kCacheLineSize = 64;\n  constexpr std::size_t kMinBlockSize = kCacheLineSize / sizeof(GradientPair);\n  block_size = std::max<std::size_t>(kMinBlockSize, block_size);\n\n  common::BlockedSpace2d space{\n      nodes_to_build.size(), [&](size_t nidx_in_set) {\n                                return partition_size[nidx_in_set];\n                              }, block_size};\n  return space;\n}\n\n/**\n * @brief Histogram builder that can handle multiple targets.\n */\nclass MultiHistogramBuilder {\n  std::vector<HistogramBuilder> target_builders_;\n  Context const *ctx_;\n  common::CacheManager cache_manager_;\n\n  bool ReadByColumn(const GHistIndexMatrix &gidx, bool force_read_by_column) const {\n    if (force_read_by_column) return true;\n\n    auto nbins = gidx.cut.Ptrs().back();\n    size_t hist_size = 2 * sizeof(double) * nbins;\n\n    double l3_per_thread = static_cast<double>(cache_manager_.L3Size()) / ctx_->Threads();\n    double usable_cache_size =  0.8 * (cache_manager_.L2Size() + l3_per_thread);\n    const bool hist_fit_to_l2 = usable_cache_size > hist_size;\n\n    /* In row-wise histogram construction, each iteration of the outer (row-wise) loop\n     * accesses bins across the entire histogram; the bins are not localized.\n     * If the histogram is too large to fit in L2 cache, random access becomes a major performance bottleneck.\n     *\n     * or dense data, using column-wise histogram construction,\n     * each iteration of the outer (column-wise) loop accesses only a localized portion of the histogram:\n     * idx_bin = gradient_index(row_id, col_id) + offset[col_id].\n     * This improves cache locality, so the column-wise kernel outperforms the row-wise kernel in this case.\n     */\n    bool read_by_column = !hist_fit_to_l2 && gidx.IsDense();\n    return read_by_column;\n  }\n\n public:\n  /**\n   * @brief Build the histogram for root node.\n   */\n  template <typename Partitioner, typename ExpandEntry, typename TreeView>\n  void BuildRootHist(DMatrix *p_fmat, TreeView const &tree,\n                     std::vector<Partitioner> const &partitioners,\n                     linalg::MatrixView<GradientPair const> gpair, ExpandEntry const &best,\n                     BatchParam const &param, bool force_read_by_column = false) {\n    auto n_targets = gpair.Shape(1);\n    CHECK_EQ(p_fmat->Info().num_row_, gpair.Shape(0));\n    CHECK_EQ(target_builders_.size(), n_targets);\n    std::vector<bst_node_t> nodes{best.nid};\n    std::vector<bst_node_t> dummy_sub;\n\n    for (bst_target_t t{0}; t < n_targets; ++t) {\n      this->target_builders_[t].AddHistRows(tree, &nodes, &dummy_sub, false);\n    }\n    CHECK(dummy_sub.empty());\n\n    std::size_t page_idx{0};\n    for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, param)) {\n      bool read_by_column = ReadByColumn(gidx, force_read_by_column);\n\n      auto space = ConstructHistSpace(partitioners, nodes, gidx,\n                                      cache_manager_.L1Size(), param.max_bin, read_by_column);\n      for (bst_target_t t{0}; t < n_targets; ++t) {\n        auto t_gpair = gpair.Slice(linalg::All(), t);\n        this->target_builders_[t].BuildHist(page_idx, space, gidx,\n                                            partitioners[page_idx].Partitions(), nodes, t_gpair,\n                                            read_by_column);\n      }\n      ++page_idx;\n    }\n\n    for (bst_target_t t = 0; t < n_targets; ++t) {\n      this->target_builders_[t].SyncHistogram(ctx_, tree, nodes, dummy_sub);\n    }\n  }\n  /**\n   * @brief Build histogram for left and right child of valid candidates\n   */\n  template <typename Partitioner, typename ExpandEntry, typename TreeView>\n  void BuildHistLeftRight(Context const *ctx, DMatrix *p_fmat, TreeView const &tree,\n                          std::vector<Partitioner> const &partitioners,\n                          std::vector<ExpandEntry> const &valid_candidates,\n                          linalg::MatrixView<GradientPair const> gpair, BatchParam const &param,\n                          bool force_read_by_column = false) {\n    std::vector<bst_node_t> nodes_to_build(valid_candidates.size());\n    std::vector<bst_node_t> nodes_to_sub(valid_candidates.size());\n    AssignNodes(tree, valid_candidates, nodes_to_build, nodes_to_sub);\n\n    // use the first builder for getting number of valid nodes.\n    target_builders_.front().AddHistRows(tree, &nodes_to_build, &nodes_to_sub, true);\n    CHECK_GE(nodes_to_build.size(), nodes_to_sub.size());\n    CHECK_EQ(nodes_to_sub.size() + nodes_to_build.size(), valid_candidates.size() * 2);\n\n    // allocate storage for the rest of the builders\n    for (bst_target_t t = 1; t < target_builders_.size(); ++t) {\n      target_builders_[t].AddHistRows(tree, &nodes_to_build, &nodes_to_sub, false);\n    }\n\n    std::size_t page_idx{0};\n    for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, param)) {\n      bool read_by_column = ReadByColumn(page, force_read_by_column);\n\n      auto space = ConstructHistSpace(partitioners, nodes_to_build, page,\n                                      cache_manager_.L1Size(), param.max_bin, read_by_column);\n\n      auto n_targets = gpair.Shape(1);\n      for (bst_target_t t = 0; t < n_targets; ++t) {\n        auto t_gpair = gpair.Slice(linalg::All(), t);\n        CHECK_EQ(t_gpair.Shape(0), p_fmat->Info().num_row_);\n        this->target_builders_[t].BuildHist(page_idx, space, page,\n                                            partitioners[page_idx].Partitions(), nodes_to_build,\n                                            t_gpair, read_by_column);\n      }\n      page_idx++;\n    }\n\n    auto n_targets = gpair.Shape(1);\n    for (bst_target_t t = 0; t < n_targets; ++t) {\n      this->target_builders_[t].SyncHistogram(ctx, tree, nodes_to_build, nodes_to_sub);\n    }\n  }\n\n  [[nodiscard]] auto const &Histogram(bst_target_t t) const {\n    return target_builders_[t].Histogram();\n  }\n  [[nodiscard]] auto &Histogram(bst_target_t t) { return target_builders_[t].Histogram(); }\n  // Number of targets for histogram building (may differ from tree.NumTargets() for reduced grad)\n  [[nodiscard]] bst_target_t NumTargets() const { return target_builders_.size(); }\n\n  void Reset(Context const *ctx, bst_bin_t total_bins, bst_target_t n_targets, BatchParam const &p,\n             bool is_distributed, bool is_col_split, HistMakerTrainParam const *param) {\n    ctx_ = ctx;\n    target_builders_.resize(n_targets);\n    CHECK_GE(n_targets, 1);\n    for (auto &v : target_builders_) {\n      v.Reset(ctx, total_bins, p, is_distributed, is_col_split, param);\n    }\n  }\n};\n}  // namespace xgboost::tree\n#endif  // XGBOOST_TREE_HIST_HISTOGRAM_H_\n"
  },
  {
    "path": "src/tree/hist/sampler.cc",
    "content": "/**\n * Copyright 2026, XGBoost Contributors\n */\n#include \"sampler.h\"  // for kDefaultMvsLambda\n\n#include <cmath>    // for sqrt\n#include <cstddef>  // for size_t\n#include <limits>   // for numeric_limits\n#include <numeric>  // for partial_sum\n#include <random>   // for default_random_engine, uniform_real_distribution\n#include <vector>   // for vector\n\n#include \"../../common/algorithm.h\"  // for Sort\n#include \"xgboost/base.h\"            // for GradientPair, GradientPairPrecise\n#include \"xgboost/linalg.h\"          // for MatrixView\n#include \"xgboost/span.h\"            // for Span\n\nnamespace xgboost::tree::cpu_impl {\ntemplate <typename Fn>\nvoid ParallelSampling(Context const* ctx, bst_idx_t n_samples, Fn&& fn) {\n  auto& rnd = ctx->Rng();\n  auto n_threads = ctx->Threads();\n  std::uint64_t initial_seed = rnd();\n  std::size_t const discard_size = n_samples / n_threads;\n  common::ParallelFor(n_threads, n_threads, [&](auto tid) {\n    std::size_t ibegin = tid * discard_size;\n    std::size_t iend = (tid == (n_threads - 1)) ? n_samples : ibegin + discard_size;\n\n    // Setup the eng\n    const uint64_t displaced_seed =\n        RandomReplace::SimpleSkip(ibegin, initial_seed, RandomReplace::kBase, RandomReplace::kMod);\n    RandomReplace::EngineT eng(displaced_seed);\n\n    fn(ibegin, iend, eng);\n  });\n}\n\nnamespace {\n[[nodiscard]] float CalcSamplingInfo(Context const* ctx, linalg::MatrixView<GradientPair> gpairs,\n                                     float subsample, std::vector<float>* p_reg_abs_grad) {\n  std::size_t n_samples = gpairs.Shape(0);\n  std::size_t sample_rows = static_cast<std::size_t>(n_samples * subsample);\n\n  std::vector<float> thresholds;\n  *p_reg_abs_grad = CalcRegAbsGrad(ctx, gpairs, &thresholds);\n\n  std::vector<float> grad_csum(n_samples);\n  std::partial_sum(thresholds.begin(), thresholds.end() - 1, grad_csum.begin());\n  float threshold =\n      CalculateThreshold(common::Span{thresholds}, common::Span{grad_csum}, n_samples, sample_rows);\n  return threshold;\n}\n\nvoid GradientBasedSampling(Context const* ctx, linalg::MatrixView<GradientPair> gpairs,\n                           common::Span<float const> reg_abs_grad, float threshold) {\n  std::uniform_real_distribution<float> dist{0.0f, 1.0f};\n  auto n_samples = gpairs.Shape(0);\n  auto n_targets = gpairs.Shape(1);\n  ParallelSampling(ctx, n_samples, [&](std::size_t ibegin, std::size_t iend, auto& eng) {\n    for (std::size_t i = ibegin; i < iend; ++i) {\n      float p = SamplingProbability(threshold, reg_abs_grad[i]);\n      // Skip rows with zero gradient (already zero)\n      if (gpairs(i, 0).GetGrad() == 0.0 && gpairs(i, 0).GetHess() == 0.0) {\n        continue;\n      }\n\n      if (p >= 1.0f) {\n        // Always select this row.\n        continue;\n      }\n      float rand_val = dist(eng);\n      if (rand_val <= p) {\n        for (std::size_t t = 0; t < n_targets; ++t) {\n          gpairs(i, t) = RescaleGrad(p, gpairs(i, t));\n        }\n      } else {\n        // Not selected: zero out\n        for (std::size_t t = 0; t < n_targets; ++t) {\n          gpairs(i, t) = GradientPair{};\n        }\n      }\n    }\n  });\n}\n\nvoid ApplyMvsWeights(Context const* ctx, linalg::MatrixView<GradientPair const> sampled_split_gpair,\n                     linalg::Matrix<GradientPair>* value_gpair,\n                     common::Span<float const> reg_abs_grad, float threshold) {\n  CHECK_EQ(sampled_split_gpair.Shape(0), value_gpair->Shape(0));\n  auto h_split = sampled_split_gpair;\n  auto h_value = value_gpair->HostView();\n  auto n_samples = h_value.Shape(0);\n  auto n_targets = h_value.Shape(1);\n\n  common::ParallelFor(n_samples, ctx->Threads(), [&](bst_idx_t i) {\n    // Check if this row was not sampled (hessian is zero in split gradient)\n    if (h_split(i, 0).GetHess() == 0.0f) {\n      for (bst_target_t t = 0; t < n_targets; ++t) {\n        h_value(i, t) = GradientPair{};\n      }\n      return;\n    }\n    float p = SamplingProbability(threshold, reg_abs_grad[i]);\n    for (bst_target_t t = 0; t < n_targets; ++t) {\n      h_value(i, t) = RescaleGrad(p, h_value(i, t));\n    }\n  });\n}\n\nvoid ApplySamplingMask(Context const* ctx,\n                       linalg::MatrixView<GradientPair const> sampled_split_gpair,\n                       linalg::Matrix<GradientPair>* value_gpair) {\n  CHECK_EQ(sampled_split_gpair.Shape(0), value_gpair->Shape(0));\n  auto h_split = sampled_split_gpair;\n  auto h_value = value_gpair->HostView();\n  auto n_samples = h_value.Shape(0);\n  auto n_targets = h_value.Shape(1);\n\n  common::ParallelFor(n_samples, ctx->Threads(), [&](bst_idx_t i) {\n    // Check if this row was not sampled (hessian is zero in split gradient)\n    if (h_split(i, 0).GetHess() == 0.0f) {\n      for (bst_target_t t = 0; t < n_targets; ++t) {\n        h_value(i, t) = GradientPair{};\n      }\n    }\n  });\n}\n}  // namespace\n\nstd::vector<float> CalcRegAbsGrad(Context const* ctx, linalg::MatrixView<GradientPair const> gpairs,\n                                  std::vector<float>* p_thresholds) {\n  float mvs_lambda = kDefaultMvsLambda;\n  std::size_t n_samples = gpairs.Shape(0);\n  std::size_t n_targets = gpairs.Shape(1);\n  std::vector<float> reg_abs_grad(n_samples);\n  auto grad_op = MvsGradOp{mvs_lambda};\n  common::ParallelFor(n_samples, ctx->Threads(), [&](auto i) {\n    float sum_sq = 0.0f;\n    for (std::size_t t = 0; t < n_targets; ++t) {\n      sum_sq += grad_op(gpairs(i, t));\n    }\n    reg_abs_grad[i] = std::sqrt(sum_sq);\n  });\n\n  auto& thresholds = *p_thresholds;\n  thresholds = reg_abs_grad;                                // Copy for sorting\n  thresholds.push_back(std::numeric_limits<float>::max());  // sentinel\n  common::Sort(ctx, thresholds.begin(), thresholds.end() - 1, std::less{});\n\n  return reg_abs_grad;\n}\n\nvoid UniformSample(Context const* ctx, linalg::MatrixView<GradientPair> out, float subsample) {\n  bst_idx_t n_samples = out.Shape(0);\n  std::size_t n_targets = out.Shape(1);\n  std::bernoulli_distribution coin_flip{subsample};\n  CHECK_GE(n_targets, 1);\n\n  ParallelSampling(ctx, n_samples, [&](std::size_t ibegin, std::size_t iend, auto& eng) {\n    for (std::size_t i = ibegin; i < iend; ++i) {\n      if (!coin_flip(eng)) {\n        for (std::size_t j = 0; j < n_targets; ++j) {\n          out(i, j) = GradientPair{};\n        }\n      }\n    }\n  });\n}\n\nfloat CalculateThreshold(common::Span<float const> sorted_rag, common::Span<float const> grad_csum,\n                         bst_idx_t n_samples, bst_idx_t sample_rows) {\n  CHECK_GE(n_samples, 1);\n  // Use binary search to find the threshold index\n  std::int64_t low_idx = 0;\n  std::int64_t high_idx = n_samples - 1;\n  while (low_idx <= high_idx) {\n    std::int64_t i = low_idx + (high_idx - low_idx) / 2;\n\n    float lower = sorted_rag[i];\n    // Upper bound is next element or max for last element\n    float upper = sorted_rag[i + 1];\n\n    bst_idx_t n_above = n_samples - i - 1;\n    float denom = static_cast<float>(sample_rows) - static_cast<float>(n_above);\n\n    if (denom <= 0) {\n      // i is too small, need to go right to increase denom\n      low_idx = i + 1;\n      continue;\n    }\n\n    float u = grad_csum[i] / denom;\n\n    if (u > lower && u <= upper) {\n      return u;\n    }\n\n    if (u <= lower) {\n      high_idx = i - 1;\n    } else {\n      low_idx = i + 1;\n    }\n  }\n\n  // p will be extremely small, no row can be sampled.\n  if (sample_rows == 0) {\n    return std::numeric_limits<float>::max();\n  }\n  // Degenerate case: all gradients are the same, so u cannot be greater than the lower\n  // bound. Fall back to using the total sum divided by sample_rows.\n  return grad_csum.back() / sample_rows;\n}\n\nvoid Sampler::Sample(Context const* ctx, linalg::MatrixView<GradientPair> out) {\n  CHECK(out.Contiguous());\n  std::size_t n_samples = out.Shape(0);\n  std::size_t sample_rows = static_cast<std::size_t>(n_samples * subsample_);\n  if (sample_rows >= n_samples || n_samples == 0) {\n    is_sampling_ = false;\n    return;\n  }\n  is_sampling_ = true;\n\n  switch (sampling_method_) {\n    case TrainParam::kUniform:\n      UniformSample(ctx, out, subsample_);\n      break;\n    case TrainParam::kGradientBased: {\n      std::vector<float> reg_abs_grad;\n      auto threshold = CalcSamplingInfo(ctx, out, subsample_, &reg_abs_grad);\n      GradientBasedSampling(ctx, out, common::Span{reg_abs_grad}, threshold);\n      break;\n    }\n    default:\n      LOG(FATAL) << \"Unknown sampling method: \" << sampling_method_;\n  }\n}\n\nvoid Sampler::ApplySampling(Context const* ctx,\n                            linalg::MatrixView<GradientPair const> sampled_split_gpair,\n                            linalg::Matrix<GradientPair>* value_gpair) const {\n  if (!is_sampling_) {\n    return;\n  }\n  switch (sampling_method_) {\n    case TrainParam::kUniform: {\n      ApplySamplingMask(ctx, sampled_split_gpair, value_gpair);\n      break;\n    }\n    case TrainParam::kGradientBased: {\n      std::vector<float> reg_abs_grad;\n      auto threshold = CalcSamplingInfo(ctx, value_gpair->HostView(), subsample_, &reg_abs_grad);\n      ApplyMvsWeights(ctx, sampled_split_gpair, value_gpair, reg_abs_grad, threshold);\n      break;\n    }\n    default:\n      LOG(FATAL) << \"Unknown sampling method: \" << sampling_method_;\n  }\n}\n}  // namespace xgboost::tree::cpu_impl\n"
  },
  {
    "path": "src/tree/hist/sampler.h",
    "content": "/**\n * Copyright 2020-2026, XGBoost Contributors\n */\n#ifndef XGBOOST_TREE_HIST_SAMPLER_H_\n#define XGBOOST_TREE_HIST_SAMPLER_H_\n\n#include <cstdint>  // for uint64_t\n#include <random>   // for bernoulli_distribution, linear_congruential_engine\n#include <vector>   // for vector\n\n#include \"../../common/math.h\"  // for Sqr\n#include \"../param.h\"           // for TrainParam\n#include \"xgboost/base.h\"       // for GradientPair, bst_idx_t\n#include \"xgboost/context.h\"    // for Context\n#include \"xgboost/data.h\"       // for MetaInfo\n#include \"xgboost/linalg.h\"     // for TensorView\n#include \"xgboost/span.h\"       // for Span\n\nnamespace xgboost::tree {\nstruct RandomReplace {\n public:\n  // similar value as for minstd_rand\n  static constexpr std::uint64_t kBase = 16807;\n  static constexpr std::uint64_t kMod = static_cast<std::uint64_t>(1) << 63;\n\n  using EngineT = std::linear_congruential_engine<uint64_t, kBase, 0, kMod>;\n\n  /*\n    Right-to-left binary method: https://en.wikipedia.org/wiki/Modular_exponentiation\n  */\n  static std::uint64_t SimpleSkip(std::uint64_t exponent, std::uint64_t initial_seed,\n                                  std::uint64_t base, std::uint64_t mod) {\n    CHECK_LE(exponent, mod);\n    std::uint64_t result = 1;\n    while (exponent > 0) {\n      if (exponent % 2 == 1) {\n        result = (result * base) % mod;\n      }\n      base = (base * base) % mod;\n      exponent = exponent >> 1;\n    }\n    // with result we can now find the new seed\n    return (result * initial_seed) % mod;\n  }\n};\n\n// TODO(jiamingy): Estimate it.\nconstexpr float kDefaultMvsLambda = 0.1f;\n\nstruct MvsGradOp {\n  float lambda;\n  template <typename GradientType>\n  XGBOOST_DEVICE float operator()(GradientType const& gpair) const {\n    auto g = gpair.GetGrad();\n    auto h = gpair.GetHess();\n    return common::Sqr(g) + lambda * common::Sqr(h);\n  }\n};\n\nXGBOOST_DEVICE inline float SamplingProbability(float u, float reg_abs_grad) {\n  if (::fabs(u) < kRtEps) {\n    u = ::copysign(kRtEps, u);\n  }\n  return reg_abs_grad / u;\n}\n\ntemplate <typename T>\nXGBOOST_DEVICE inline detail::GradientPairInternal<T> RescaleGrad(\n    float p, detail::GradientPairInternal<T> const& gpair) {\n  if (p >= 1.0f) {\n    return gpair;\n  }\n  return gpair * (1.0f / p);\n}\n\nnamespace cpu_impl {\n// Calculate regularized absolute gradient for each row.\nstd::vector<float> CalcRegAbsGrad(Context const* ctx, linalg::MatrixView<GradientPair const> gpairs,\n                                  std::vector<float>* p_thresholds);\n\nfloat CalculateThreshold(common::Span<float const> sorted_rag, common::Span<float const> grad_csum,\n                         bst_idx_t n_samples, bst_idx_t sample_rows);\n\nclass Sampler {\n public:\n  explicit Sampler(TrainParam const& param)\n      : sampling_method_{param.sampling_method}, subsample_{param.subsample} {}\n\n  void Sample(Context const* ctx, linalg::MatrixView<GradientPair> out);\n  void ApplySampling(Context const* ctx, linalg::MatrixView<GradientPair const> sampled_split_gpair,\n                     linalg::Matrix<GradientPair>* value_gpair) const;\n\n private:\n  int sampling_method_{TrainParam::kUniform};\n  float subsample_{1.0f};\n  bool is_sampling_{false};\n};\n}  // namespace cpu_impl\n}  // namespace xgboost::tree\n#endif  // XGBOOST_TREE_HIST_SAMPLER_H_\n"
  },
  {
    "path": "src/tree/io_utils.h",
    "content": "/**\n * Copyright 2023-2025, XGBoost Contributors\n */\n#ifndef XGBOOST_TREE_IO_UTILS_H_\n#define XGBOOST_TREE_IO_UTILS_H_\n#include <limits>       // for numeric_limits\n#include <string>       // for string\n#include <type_traits>  // for enable_if_t, is_same_v, conditional_t\n#include <vector>       // for vector\n\n#include \"xgboost/json.h\"  // for Json\n\nnamespace xgboost {\ntemplate <bool typed>\nusing FloatArrayT = std::conditional_t<typed, F32Array const, Array const>;\ntemplate <bool typed>\nusing U8ArrayT = std::conditional_t<typed, U8Array const, Array const>;\ntemplate <bool typed>\nusing I32ArrayT = std::conditional_t<typed, I32Array const, Array const>;\ntemplate <bool typed>\nusing I64ArrayT = std::conditional_t<typed, I64Array const, Array const>;\ntemplate <bool typed, bool feature_is_64>\nusing IndexArrayT = std::conditional_t<feature_is_64, I64ArrayT<typed>, I32ArrayT<typed>>;\n\n// typed array, not boolean\ntemplate <typename JT, typename T>\nstd::enable_if_t<!std::is_same_v<T, Json> && !std::is_same_v<JT, Boolean>, T> GetElem(\n    std::vector<T> const& arr, size_t i) {\n  return arr[i];\n}\n// typed array boolean\ntemplate <typename JT, typename T>\nstd::enable_if_t<\n    !std::is_same_v<T, Json> && std::is_same_v<T, uint8_t> && std::is_same_v<JT, Boolean>, bool>\nGetElem(std::vector<T> const& arr, size_t i) {\n  return arr[i] == 1;\n}\n// json array\ntemplate <typename JT, typename T>\nstd::enable_if_t<std::is_same_v<T, Json>,\n                 std::conditional_t<std::is_same_v<JT, Integer>, int64_t,\n                                    std::conditional_t<std::is_same_v<Boolean, JT>, bool, float>>>\nGetElem(std::vector<T> const& arr, size_t i) {\n  if (std::is_same_v<JT, Boolean> && !IsA<Boolean>(arr[i])) {\n    return get<Integer const>(arr[i]) == 1;\n  }\n  return get<JT const>(arr[i]);\n}\n\nnamespace tree_field {\ninline std::string const kLossChg{\"loss_changes\"};\ninline std::string const kSumHess{\"sum_hessian\"};\ninline std::string const kBaseWeight{\"base_weights\"};\ninline std::string const kLeafWeight{\"leaf_weights\"};\n\ninline std::string const kSplitIdx{\"split_indices\"};\ninline std::string const kSplitCond{\"split_conditions\"};\ninline std::string const kDftLeft{\"default_left\"};\n\ninline std::string const kParent{\"parents\"};\ninline std::string const kLeft{\"left_children\"};\ninline std::string const kRight{\"right_children\"};\n}  // namespace tree_field\n\nconstexpr float DftBadValue() { return std::numeric_limits<float>::denorm_min(); }\n}  // namespace xgboost\n#endif  // XGBOOST_TREE_IO_UTILS_H_\n"
  },
  {
    "path": "src/tree/multi_target_tree_model.cc",
    "content": "/**\n * Copyright 2023-2026, XGBoost Contributors\n */\n#include \"xgboost/multi_target_tree_model.h\"\n\n#include <algorithm>    // for copy_n\n#include <cstddef>      // for size_t\n#include <cstdint>      // for int32_t, uint8_t\n#include <limits>       // for numeric_limits\n#include <string_view>  // for string_view\n#include <utility>      // for move\n#include <vector>       // for vector\n\n#include \"../common/cuda_rt_utils.h\"  // for MemcpyAsync\n#include \"../common/linalg_op.h\"      // for cbegin\n#include \"io_utils.h\"                 // for I32ArrayT, FloatArrayT, GetElem, ...\n#include \"xgboost/base.h\"             // for bst_node_t, bst_feature_t, bst_target_t\n#include \"xgboost/json.h\"             // for Json, get, Object, Number, Integer, ...\n#include \"xgboost/logging.h\"\n#include \"xgboost/tree_model.h\"  // for TreeParam\n\nnamespace xgboost {\nMultiTargetTree::MultiTargetTree(TreeParam const* param)\n    : param_{param},\n      left_(1ul, InvalidNodeId()),\n      right_(1ul, InvalidNodeId()),\n      parent_(1ul, InvalidNodeId()),\n      split_index_(1ul, 0),\n      default_left_(1ul, 0),\n      split_conds_(1ul, DftBadValue()),\n      loss_chg_(1ul, 0.0f),\n      sum_hess_(1ul, 0.0f) {\n  CHECK_GT(param_->size_leaf_vector, 1);\n}\n\nMultiTargetTree::MultiTargetTree(MultiTargetTree const& that)\n    : param_{that.param_},\n      left_(that.left_.Size(), 0, that.left_.Device()),\n      right_(that.right_.Size(), 0, that.right_.Device()),\n      parent_(that.parent_.Size(), 0, that.parent_.Device()),\n      split_index_(that.split_index_.Size(), 0, that.split_index_.Device()),\n      default_left_(that.default_left_.Size(), 0, that.default_left_.Device()),\n      split_conds_(that.split_conds_.Size(), 0.0f, that.split_conds_.Device()),\n      weights_(that.weights_.Size(), 0.0f, that.weights_.Device()),\n      leaf_weights_(that.leaf_weights_.Size(), 0.0f, that.leaf_weights_.Device()),\n      loss_chg_(that.loss_chg_.Size(), 0.0f, that.loss_chg_.Device()),\n      sum_hess_(that.sum_hess_.Size(), 0.0f, that.sum_hess_.Device()) {\n  this->left_.Copy(that.left_);\n  this->right_.Copy(that.right_);\n  this->parent_.Copy(that.parent_);\n  this->split_index_.Copy(that.split_index_);\n  this->default_left_.Copy(that.default_left_);\n  this->split_conds_.Copy(that.split_conds_);\n  this->weights_.Copy(that.weights_);\n  this->leaf_weights_.Copy(that.leaf_weights_);\n  this->loss_chg_.Copy(that.loss_chg_);\n  this->sum_hess_.Copy(that.sum_hess_);\n}\n\nvoid MultiTargetTree::SetRoot(linalg::VectorView<float const> weight, float sum_hess) {\n  CHECK(!weight.Empty());\n  auto const next_nidx = RegTree::kRoot + 1;\n\n  this->weights_.SetDevice(weight.Device());\n  this->weights_.Resize(weight.Size(), DftBadValue());\n\n  CHECK_LE(weight.Size(), this->NumTargets());\n  CHECK_GE(weights_.Size(), next_nidx * weight.Size());\n\n  if (weight.Device().IsCUDA()) {\n    auto out_weight = weights_.DeviceSpan().subspan(RegTree::kRoot * weight.Size(), weight.Size());\n    CHECK(weight.Contiguous());\n    curt::MemcpyAsync(out_weight.data(), weight.Values().data(), out_weight.size_bytes(),\n                      curt::DefaultStream());\n  } else {\n    auto out_weight = weights_.HostSpan().subspan(RegTree::kRoot * weight.Size(), weight.Size());\n    for (std::size_t i = 0, n = weight.Size(); i < n; ++i) {\n      out_weight[i] = weight(i);\n    }\n  }\n\n  // Set root statistics\n  sum_hess_.Resize(next_nidx, 0.0f);\n  sum_hess_.HostVector()[RegTree::kRoot] = sum_hess;\n  loss_chg_.Resize(next_nidx, 0.0f);\n\n  CHECK_EQ(this->param_->num_nodes, 1);\n  CHECK_EQ(this->NumSplitTargets(), weight.Size());\n}\n\nvoid MultiTargetTree::Expand(bst_node_t nidx, bst_feature_t split_idx, float split_cond,\n                             bool default_left, linalg::VectorView<float const> base_weight,\n                             linalg::VectorView<float const> left_weight,\n                             linalg::VectorView<float const> right_weight, float loss_chg,\n                             float sum_hess, float left_sum, float right_sum) {\n  CHECK(this->IsLeaf(nidx));\n  CHECK_GE(parent_.Size(), 1);\n  CHECK_EQ(parent_.Size(), left_.Size());\n  CHECK_EQ(left_.Size(), right_.Size());\n  auto n_split_targets = this->NumSplitTargets();\n  CHECK_EQ(base_weight.Size(), n_split_targets);\n\n  std::size_t n = param_->num_nodes + 2;\n  CHECK_LT(split_idx, this->param_->num_feature);\n  left_.Resize(n, InvalidNodeId());\n  right_.Resize(n, InvalidNodeId());\n  parent_.Resize(n, InvalidNodeId());\n\n  auto left_child = parent_.Size() - 2;\n  auto right_child = parent_.Size() - 1;\n\n  CHECK_NE(left_child, nidx);\n  left_.HostVector()[nidx] = left_child;\n  right_.HostVector()[nidx] = right_child;\n\n  auto& h_parent = parent_.HostVector();\n  if (nidx != 0) {\n    CHECK_NE(h_parent[nidx], InvalidNodeId());\n  }\n\n  h_parent[left_child] = nidx;\n  h_parent[right_child] = nidx;\n\n  split_index_.Resize(n);\n  split_index_.HostVector()[nidx] = split_idx;\n\n  split_conds_.Resize(n, DftBadValue());\n  split_conds_.HostVector()[nidx] = split_cond;\n\n  default_left_.Resize(n);\n  default_left_.HostVector()[nidx] = static_cast<std::uint8_t>(default_left);\n\n  // Set weights\n  weights_.Resize(n * base_weight.Size());\n  auto p_weight = this->NodeWeight(nidx, n_split_targets);\n  CHECK_GE(p_weight.Size(), base_weight.Size());\n  auto l_weight = this->NodeWeight(left_child, n_split_targets);\n  CHECK_GE(l_weight.Size(), left_weight.Size());\n  auto r_weight = this->NodeWeight(right_child, n_split_targets);\n  CHECK_GE(r_weight.Size(), right_weight.Size());\n\n  CHECK_EQ(base_weight.Size(), left_weight.Size());\n  CHECK_EQ(base_weight.Size(), right_weight.Size());\n\n  for (std::size_t i = 0, n = base_weight.Size(); i < n; ++i) {\n    p_weight(i) = base_weight(i);\n    l_weight(i) = left_weight(i);\n    r_weight(i) = right_weight(i);\n  }\n\n  loss_chg_.Resize(n, 0.0f);\n  loss_chg_.HostVector()[nidx] = loss_chg;\n\n  sum_hess_.Resize(n, 0.0f);\n  auto& h_hess = sum_hess_.HostVector();\n  h_hess[nidx] = sum_hess;\n  h_hess[left_child] = left_sum;\n  h_hess[right_child] = right_sum;\n}\n\nvoid MultiTargetTree::SetLeaves(std::vector<bst_node_t> leaves, common::Span<float const> weights) {\n  auto is_partial_tree = this->NumLeaves() == 0;\n  CHECK(is_partial_tree || leaves.size() == this->NumLeaves());\n  auto n_targets = this->NumTargets();\n  std::int32_t nidx_in_set = 0;\n  auto n_leaves = leaves.size();\n  this->leaf_weights_.Resize(n_leaves * n_targets);\n  auto h_weights = this->leaf_weights_.HostSpan();\n  // Reuse the right child as the leaf weight mapping.\n  auto h_leaf_mapping = this->right_.HostSpan();\n\n  for (auto nidx : leaves) {\n    CHECK(this->IsLeaf(nidx));\n    auto w_in = weights.subspan(nidx_in_set * n_targets, n_targets);\n    auto w_out = h_weights.subspan(nidx_in_set * n_targets, n_targets);\n    std::copy(w_in.cbegin(), w_in.cend(), w_out.begin());\n    if (is_partial_tree) {\n      CHECK_EQ(h_leaf_mapping[nidx], InvalidNodeId());\n    }\n    h_leaf_mapping[nidx] = nidx_in_set;\n    nidx_in_set++;\n  }\n}\n\nvoid MultiTargetTree::SetLeaves() {\n  CHECK_EQ(this->NumLeaves(), 0);\n  auto n_targets = this->NumTargets();\n  CHECK_EQ(n_targets, this->NumSplitTargets());\n  auto n_nodes = this->param_->num_nodes;\n  // Reuse the right child as the leaf weight mapping.\n  auto h_leaf_mapping = this->right_.HostSpan();\n\n  bst_node_t nidx_in_set = 0;\n  auto& h_weights = this->leaf_weights_.HostVector();\n  CHECK(h_weights.empty());\n  for (bst_node_t nidx = 0; nidx < n_nodes; ++nidx) {\n    if (!IsLeaf(nidx)) {\n      continue;\n    }\n    auto w_in = this->NodeWeight(nidx);\n    h_weights.resize((nidx_in_set + 1) * n_targets);\n    auto w_out = common::Span{h_weights}.subspan(nidx_in_set * n_targets, n_targets);\n    std::copy(linalg::cbegin(w_in), linalg::cend(w_in), w_out.begin());\n    CHECK_EQ(h_leaf_mapping[nidx], InvalidNodeId());\n    h_leaf_mapping[nidx] = nidx_in_set;\n    nidx_in_set++;\n  }\n}\n\ntemplate <bool typed, bool feature_is_64>\nvoid LoadModelImpl(Json const& in, HostDeviceVector<float>* p_weights,\n                   HostDeviceVector<float>* p_leaf_weights, HostDeviceVector<bst_node_t>* p_lefts,\n                   HostDeviceVector<bst_node_t>* p_rights, HostDeviceVector<bst_node_t>* p_parents,\n                   HostDeviceVector<float>* p_conds, HostDeviceVector<bst_feature_t>* p_fidx,\n                   HostDeviceVector<std::uint8_t>* p_dft_left, HostDeviceVector<float>* p_gain,\n                   HostDeviceVector<float>* p_sum_hess) {\n  namespace tf = tree_field;\n\n  auto get_float = [&](std::string_view name, HostDeviceVector<float>* p_out) {\n    auto& values = get<FloatArrayT<typed>>(get<Object const>(in).find(name)->second);\n    auto& out = *p_out;\n    out.Resize(values.size());\n    auto& h_out = out.HostVector();\n    for (std::size_t i = 0; i < values.size(); ++i) {\n      h_out[i] = GetElem<Number>(values, i);\n    }\n  };\n  get_float(tf::kBaseWeight, p_weights);\n  get_float(tf::kLeafWeight, p_leaf_weights);\n  get_float(tf::kSplitCond, p_conds);\n\n  auto get_nidx = [&](std::string_view name, HostDeviceVector<bst_node_t>* p_nidx) {\n    auto& nidx = get<I32ArrayT<typed>>(get<Object const>(in).find(name)->second);\n    auto& out_nidx = p_nidx->HostVector();\n    out_nidx.resize(nidx.size());\n    for (std::size_t i = 0; i < nidx.size(); ++i) {\n      out_nidx[i] = GetElem<Integer>(nidx, i);\n    }\n  };\n  get_nidx(tf::kLeft, p_lefts);\n  get_nidx(tf::kRight, p_rights);\n  get_nidx(tf::kParent, p_parents);\n\n  auto const& splits = get<IndexArrayT<typed, feature_is_64> const>(in[tf::kSplitIdx]);\n  p_fidx->Resize(splits.size());\n  auto& out_fidx = p_fidx->HostVector();\n  for (std::size_t i = 0; i < splits.size(); ++i) {\n    out_fidx[i] = GetElem<Integer>(splits, i);\n  }\n\n  auto const& dft_left = get<U8ArrayT<typed> const>(in[tf::kDftLeft]);\n  p_dft_left->Resize(dft_left.size());\n  auto& out_dft_l = p_dft_left->HostVector();\n  for (std::size_t i = 0; i < dft_left.size(); ++i) {\n    out_dft_l[i] = GetElem<Boolean>(dft_left, i);\n  }\n\n  // Load statistics\n  get_float(tf::kLossChg, p_gain);\n  get_float(tf::kSumHess, p_sum_hess);\n}\n\nvoid MultiTargetTree::LoadModel(Json const& in) {\n  namespace tf = tree_field;\n  bool typed = IsA<F32Array>(in[tf::kBaseWeight]);\n  bool feature_is_64 = IsA<I64Array>(in[tf::kSplitIdx]);\n\n  if (typed && feature_is_64) {\n    LoadModelImpl<true, true>(in, &weights_, &leaf_weights_, &left_, &right_, &parent_,\n                              &split_conds_, &split_index_, &default_left_, &loss_chg_, &sum_hess_);\n  } else if (typed && !feature_is_64) {\n    LoadModelImpl<true, false>(in, &weights_, &leaf_weights_, &left_, &right_, &parent_,\n                               &split_conds_, &split_index_, &default_left_, &loss_chg_,\n                               &sum_hess_);\n  } else if (!typed && feature_is_64) {\n    LoadModelImpl<false, true>(in, &weights_, &leaf_weights_, &left_, &right_, &parent_,\n                               &split_conds_, &split_index_, &default_left_, &loss_chg_,\n                               &sum_hess_);\n  } else {\n    LoadModelImpl<false, false>(in, &weights_, &leaf_weights_, &left_, &right_, &parent_,\n                                &split_conds_, &split_index_, &default_left_, &loss_chg_,\n                                &sum_hess_);\n  }\n}\n\nvoid MultiTargetTree::SaveModel(Json* p_out) const {\n  CHECK(p_out);\n  auto& out = *p_out;\n\n  auto n_nodes = param_->num_nodes;\n\n  // nodes\n  I32Array lefts(n_nodes);\n  I32Array rights(n_nodes);\n  I32Array parents(n_nodes);\n  F32Array conds(n_nodes);\n  U8Array default_left(n_nodes);\n  F32Array weights(this->weights_.Size());\n  F32Array loss_chg(n_nodes);\n  F32Array sum_hess(n_nodes);\n\n  auto n_leaves = this->NumLeaves();\n  CHECK_GE(n_leaves, 1);\n  F32Array leaf_weights(n_leaves * this->NumTargets());\n\n  auto const& h_left = this->left_.ConstHostVector();\n  auto const& h_right = this->right_.ConstHostVector();\n  auto const& h_parent = this->parent_.ConstHostVector();\n  auto const& h_split_index = this->split_index_.ConstHostVector();\n  auto const& h_split_conds = this->split_conds_.ConstHostVector();\n  auto const& h_default_left = this->default_left_.ConstHostVector();\n  auto const& h_loss_chg = this->loss_chg_.ConstHostVector();\n  auto const& h_sum_hess = this->sum_hess_.ConstHostVector();\n\n  auto save_tree = [&](auto* p_indices_array) {\n    auto& indices_array = *p_indices_array;\n    for (bst_node_t nidx = 0; nidx < n_nodes; ++nidx) {\n      CHECK_LT(nidx, left_.Size());\n      lefts.Set(nidx, h_left[nidx]);\n      CHECK_LT(nidx, right_.Size());\n      rights.Set(nidx, h_right[nidx]);\n      CHECK_LT(nidx, parent_.Size());\n      parents.Set(nidx, h_parent[nidx]);\n      CHECK_LT(nidx, split_index_.Size());\n      indices_array.Set(nidx, h_split_index[nidx]);\n      conds.Set(nidx, h_split_conds[nidx]);\n      default_left.Set(nidx, h_default_left[nidx]);\n      loss_chg.Set(nidx, h_loss_chg[nidx]);\n      sum_hess.Set(nidx, h_sum_hess[nidx]);\n\n      // Save internal weights\n      auto in_weight = this->NodeWeight(nidx);\n      auto weight_out = common::Span<float>(weights.GetArray())\n                            .subspan(nidx * this->NumSplitTargets(), this->NumSplitTargets());\n      CHECK_EQ(in_weight.Size(), weight_out.size());\n      std::copy_n(in_weight.Values().data(), in_weight.Size(), weight_out.data());\n\n      // Save leaf weights\n      if (IsLeaf(nidx)) {\n        auto in_weight = this->LeafValue(nidx);\n        auto leaf_idx = this->LeafIdx(nidx);\n        auto weight_out = common::Span<float>(leaf_weights.GetArray())\n                              .subspan(leaf_idx * this->NumTargets(), this->NumTargets());\n        CHECK_EQ(in_weight.Size(), weight_out.size());\n        std::copy_n(in_weight.Values().data(), in_weight.Size(), weight_out.data());\n      }\n    }\n  };\n\n  namespace tf = tree_field;\n\n  if (this->param_->num_feature >\n      static_cast<bst_feature_t>(std::numeric_limits<std::int32_t>::max())) {\n    I64Array indices_64(n_nodes);\n    save_tree(&indices_64);\n    out[tf::kSplitIdx] = std::move(indices_64);\n  } else {\n    I32Array indices_32(n_nodes);\n    save_tree(&indices_32);\n    out[tf::kSplitIdx] = std::move(indices_32);\n  }\n\n  out[tf::kBaseWeight] = std::move(weights);\n  out[tf::kLeafWeight] = std::move(leaf_weights);\n  out[tf::kLeft] = std::move(lefts);\n  out[tf::kRight] = std::move(rights);\n  out[tf::kParent] = std::move(parents);\n\n  out[tf::kSplitCond] = std::move(conds);\n  out[tf::kDftLeft] = std::move(default_left);\n  out[tf::kLossChg] = std::move(loss_chg);\n  out[tf::kSumHess] = std::move(sum_hess);\n}\n\n[[nodiscard]] bst_target_t MultiTargetTree::NumTargets() const { return param_->size_leaf_vector; }\n[[nodiscard]] bst_target_t MultiTargetTree::NumSplitTargets() const {\n  auto n_targets = this->weights_.Size() / this->left_.Size();\n  CHECK_NE(n_targets, 0);\n  return n_targets;\n}\n[[nodiscard]] std::size_t MultiTargetTree::Size() const { return parent_.Size(); }\n\n[[nodiscard]] MultiTargetTree* MultiTargetTree::Copy(TreeParam const* param) const {\n  auto ptr = new MultiTargetTree{*this};\n  ptr->param_ = param;\n  return ptr;\n}\n\n[[nodiscard]] std::size_t MultiTargetTree::MemCostBytes() const {\n  std::size_t n_bytes = 0;\n  n_bytes += left_.SizeBytes();\n  n_bytes += right_.SizeBytes();\n  n_bytes += parent_.SizeBytes();\n  n_bytes += split_index_.SizeBytes();\n  n_bytes += default_left_.SizeBytes();\n  n_bytes += split_conds_.SizeBytes();\n  n_bytes += weights_.SizeBytes();\n  n_bytes += leaf_weights_.SizeBytes();\n  n_bytes += loss_chg_.SizeBytes();\n  n_bytes += sum_hess_.SizeBytes();\n  return n_bytes;\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "src/tree/param.cc",
    "content": "/*!\n * Copyright by Contributors 2019\n */\n#include <iostream>\n#include <vector>\n#include <utility>\n\n#include \"xgboost/json.h\"\n#include \"param.h\"\n\nnamespace std {\nstd::istream &operator>>(std::istream &is, std::vector<int> &t) {\n  t.clear();\n  // get (\n  while (true) {\n    char ch = is.peek();\n    if (isdigit(ch)) {\n      int idx;\n      if (is >> idx) {\n        t.emplace_back(idx);\n      }\n      return is;\n    }\n    is.get();\n    if (ch == '(') {\n      break;\n    }\n    if (!isspace(ch)) {\n      is.setstate(std::ios::failbit);\n      return is;\n    }\n  }\n  int idx;\n  std::vector<int> tmp;\n  while (true) {\n    char ch = is.peek();\n    if (isspace(ch)) {\n      is.get();\n    } else {\n      break;\n    }\n  }\n  if (is.peek() == ')') {\n    is.get();\n    return is;\n  }\n  while (is >> idx) {\n    tmp.push_back(idx);\n    char ch;\n    do {\n      ch = is.get();\n    } while (isspace(ch));\n    if (ch == 'L') {\n      ch = is.get();\n    }\n    if (ch == ',') {\n      while (true) {\n        ch = is.peek();\n        if (isspace(ch)) {\n          is.get();\n          continue;\n        }\n        if (ch == ')') {\n          is.get();\n          break;\n        }\n        break;\n      }\n      if (ch == ')') {\n        break;\n      }\n    } else if (ch == ')') {\n      break;\n    } else {\n      is.setstate(std::ios::failbit);\n      return is;\n    }\n  }\n  t = std::move(tmp);\n  return is;\n}\n}  // namespace std\n\nnamespace xgboost {\nvoid ParseInteractionConstraint(\n    std::string const &constraint_str,\n    std::vector<std::vector<bst_feature_t>> *p_out) {\n  auto &out = *p_out;\n  auto j_inc = Json::Load({constraint_str.c_str(), constraint_str.size()});\n  auto const &all = get<Array>(j_inc);\n  out.resize(all.size());\n  for (size_t i = 0; i < all.size(); ++i) {\n    auto const &set = get<Array const>(all[i]);\n    for (auto const &v : set) {\n      if (XGBOOST_EXPECT(IsA<Integer const>(v), true)) {\n        auto u = static_cast<bst_feature_t>(get<Integer const>(v));\n        out[i].emplace_back(u);\n      } else if (IsA<Number>(v)) {\n        double d = get<Number const>(v);\n        CHECK_EQ(std::floor(d), d)\n            << \"Found floating point number in interaction constraints\";\n        out[i].emplace_back(static_cast<uint32_t>(d));\n      } else {\n        LOG(FATAL) << \"Unknown value type for interaction constraint:\"\n                   << v.GetValue().TypeStr();\n      }\n    }\n  }\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "src/tree/param.h",
    "content": "/**\n * Copyright 2014-2026, XGBoost Contributors\n * \\file param.h\n * \\brief training parameters, statistics used to support tree construction.\n * \\author Tianqi Chen\n */\n#ifndef XGBOOST_TREE_PARAM_H_\n#define XGBOOST_TREE_PARAM_H_\n\n#include <algorithm>\n#include <cmath>\n#include <cstring>\n#include <string>\n#include <vector>\n\n#include \"../common/linalg_op.h\"\n#include \"../common/math.h\"\n#include \"xgboost/linalg.h\"\n#include \"xgboost/parameter.h\"\n\nnamespace xgboost {\nnamespace tree {\n\n/*! \\brief training parameters for regression tree */\nstruct TrainParam : public XGBoostParameter<TrainParam> {\n  // learning step size for a time\n  float learning_rate;\n  // minimum loss change required for a split\n  float min_split_loss;\n  // maximum depth of a tree\n  bst_node_t max_depth;\n  // maximum number of leaves\n  bst_node_t max_leaves;\n  // if using histogram based algorithm, maximum number of bins per feature\n  bst_bin_t max_bin;\n  // growing policy\n  enum TreeGrowPolicy { kDepthWise = 0, kLossGuide = 1 };\n  int grow_policy;\n\n  std::uint32_t max_cat_to_onehot{4};\n\n  bst_bin_t max_cat_threshold{64};\n\n  //----- the rest parameters are less important ----\n  // minimum amount of hessian(weight) allowed in a child\n  float min_child_weight;\n  // L2 regularization factor\n  float reg_lambda;\n  // L1 regularization factor\n  float reg_alpha;\n  // maximum delta update we can add in weight estimation\n  // this parameter can be used to stabilize update\n  // default=0 means no constraint on weight delta\n  float max_delta_step;\n  // whether we want to do subsample\n  float subsample;\n  // sampling method\n  enum SamplingMethod { kUniform = 0, kGradientBased = 1 };\n  int sampling_method;\n  // whether to subsample columns in each split (node)\n  float colsample_bynode;\n  // whether to subsample columns in each level\n  float colsample_bylevel;\n  // whether to subsample columns during tree construction\n  float colsample_bytree;\n  // whether refresh updater needs to update the leaf values\n  bool refresh_leaf;\n\n  std::vector<int> monotone_constraints;\n  // Stored as a JSON string.\n  std::string interaction_constraints;\n\n  // ------ From CPU quantile histogram -------.\n  // percentage threshold for treating a feature as sparse\n  // e.g. 0.2 indicates a feature with fewer than 20% nonzeros is considered sparse\n  static constexpr double DftSparseThreshold() { return 0.2; }\n\n  double sparse_threshold{DftSparseThreshold()};\n\n  // declare the parameters\n  DMLC_DECLARE_PARAMETER(TrainParam) {\n    DMLC_DECLARE_FIELD(learning_rate)\n        .set_lower_bound(0.0f)\n        .set_default(0.3f)\n        .describe(\"Learning rate(step size) of update.\");\n    DMLC_DECLARE_FIELD(min_split_loss)\n        .set_lower_bound(0.0f)\n        .set_default(0.0f)\n        .describe(\n            \"Minimum loss reduction required to make a further partition.\");\n    DMLC_DECLARE_FIELD(max_depth)\n        .set_lower_bound(0)\n        .set_default(6)\n        .describe(\n            \"Maximum depth of the tree; 0 indicates no limit; a limit is required \"\n            \"for depthwise policy\");\n    DMLC_DECLARE_FIELD(max_leaves).set_lower_bound(0).set_default(0).describe(\n        \"Maximum number of leaves; 0 indicates no limit.\");\n    DMLC_DECLARE_FIELD(max_bin).set_lower_bound(2).set_default(256).describe(\n        \"if using histogram-based algorithm, maximum number of bins per feature\");\n    DMLC_DECLARE_FIELD(grow_policy)\n        .set_default(kDepthWise)\n        .add_enum(\"depthwise\", kDepthWise)\n        .add_enum(\"lossguide\", kLossGuide)\n        .describe(\n            \"Tree growing policy. 0: favor splitting at nodes closest to the node, \"\n            \"i.e. grow depth-wise. 1: favor splitting at nodes with highest loss \"\n            \"change. (cf. LightGBM)\");\n    DMLC_DECLARE_FIELD(max_cat_to_onehot)\n        .set_default(4)\n        .set_lower_bound(1)\n        .describe(\"Maximum number of categories to use one-hot encoding based split.\");\n    DMLC_DECLARE_FIELD(max_cat_threshold)\n        .set_default(64)\n        .set_lower_bound(1)\n        .describe(\n            \"Maximum number of categories considered for split. Used only by partition-based\"\n            \"splits.\");\n    DMLC_DECLARE_FIELD(min_child_weight)\n        .set_lower_bound(0.0f)\n        .set_default(1.0f)\n        .describe(\"Minimum sum of instance weight(hessian) needed in a child.\");\n    DMLC_DECLARE_FIELD(reg_lambda)\n        .set_lower_bound(0.0f)\n        .set_default(1.0f)\n        .describe(\"L2 regularization on leaf weight\");\n    DMLC_DECLARE_FIELD(reg_alpha)\n        .set_lower_bound(0.0f)\n        .set_default(0.0f)\n        .describe(\"L1 regularization on leaf weight\");\n    DMLC_DECLARE_FIELD(max_delta_step)\n        .set_lower_bound(0.0f)\n        .set_default(0.0f)\n        .describe(\"Maximum delta step we allow each tree's weight estimate to be. \"\\\n                  \"If the value is set to 0, it means there is no constraint\");\n    DMLC_DECLARE_FIELD(subsample)\n        .set_range(0.0f, 1.0f)\n        .set_default(1.0f)\n        .describe(\"Row subsample ratio of training instance.\");\n    DMLC_DECLARE_FIELD(sampling_method)\n        .set_default(kUniform)\n        .add_enum(\"uniform\", kUniform)\n        .add_enum(\"gradient_based\", kGradientBased)\n        .describe(\n            \"Sampling method. 0: select random training instances uniformly. \"\n            \"1: select random training instances with higher probability when the \"\n            \"gradient and hessian are larger. (cf. CatBoost)\");\n    DMLC_DECLARE_FIELD(colsample_bynode)\n        .set_range(0.0f, 1.0f)\n        .set_default(1.0f)\n        .describe(\"Subsample ratio of columns, resample on each node (split).\");\n    DMLC_DECLARE_FIELD(colsample_bylevel)\n        .set_range(0.0f, 1.0f)\n        .set_default(1.0f)\n        .describe(\"Subsample ratio of columns, resample on each level.\");\n    DMLC_DECLARE_FIELD(colsample_bytree)\n        .set_range(0.0f, 1.0f)\n        .set_default(1.0f)\n        .describe(\"Subsample ratio of columns, resample on each tree construction.\");\n    DMLC_DECLARE_FIELD(refresh_leaf)\n        .set_default(true)\n        .describe(\"Whether the refresh updater needs to update leaf values.\");\n    DMLC_DECLARE_FIELD(monotone_constraints)\n        .set_default(std::vector<int>())\n        .describe(\"Constraint of variable monotonicity\");\n    DMLC_DECLARE_FIELD(interaction_constraints)\n        .set_default(\"\")\n        .describe(\"Constraints for interaction representing permitted interactions.\"\n                  \"The constraints must be specified in the form of a nest list,\"\n                  \"e.g. [[0, 1], [2, 3, 4]], where each inner list is a group of\"\n                  \"indices of features that are allowed to interact with each other.\"\n                  \"See tutorial for more information\");\n\n    // ------ From cpu quantile histogram -------.\n    DMLC_DECLARE_FIELD(sparse_threshold)\n        .set_range(0, 1.0)\n        .set_default(DftSparseThreshold())\n        .describe(\"percentage threshold for treating a feature as sparse\");\n\n    // add alias of parameters\n    DMLC_DECLARE_ALIAS(reg_lambda, lambda);\n    DMLC_DECLARE_ALIAS(reg_alpha, alpha);\n    DMLC_DECLARE_ALIAS(min_split_loss, gamma);\n    DMLC_DECLARE_ALIAS(learning_rate, eta);\n  }\n\n  /*! \\brief given the loss change, whether we need to invoke pruning */\n  [[nodiscard]] bool NeedPrune(double loss_chg, int depth) const {\n    return loss_chg < this->min_split_loss || (this->max_depth != 0 && depth > this->max_depth);\n  }\n\n  [[nodiscard]] bst_node_t MaxNodes() const {\n    if (this->max_depth == 0 && this->max_leaves == 0) {\n      LOG(FATAL) << \"Max leaves and max depth cannot both be unconstrained.\";\n    }\n    bst_node_t n_nodes{0};\n    if (this->max_leaves > 0) {\n      n_nodes = this->max_leaves * 2 - 1;\n    } else {\n      // bst_node_t will overflow.\n      CHECK_LE(this->max_depth, 30)\n          << \"max_depth can not be greater than 30 as that might generate 2^31 - 1\"\n             \"nodes.\";\n      // same as: (1 << (max_depth + 1)) - 1, but avoids 1 << 31, which overflows.\n      n_nodes = (1 << this->max_depth) + ((1 << this->max_depth) - 1);\n    }\n    CHECK_GT(n_nodes, 0);\n    return n_nodes;\n  }\n};\n\n/*! \\brief Loss functions */\n\n// functions for L1 cost\ntemplate <typename T1, typename T2>\nXGBOOST_DEVICE inline static T1 ThresholdL1(T1 w, T2 alpha) {\n  if (w > + alpha) {\n    return w - alpha;\n  }\n  if (w < - alpha) {\n    return w + alpha;\n  }\n  return 0.0;\n}\n\n// calculate the cost of loss function\ntemplate <typename TrainingParams, typename T>\nXGBOOST_DEVICE inline T CalcGainGivenWeight(const TrainingParams &p, T sum_grad, T sum_hess, T w) {\n  return -(static_cast<T>(2.0) * sum_grad * w + (sum_hess + p.reg_lambda) * common::Sqr(w));\n}\n\n// calculate weight given the statistics\ntemplate <typename TrainingParams, typename T>\nXGBOOST_DEVICE std::enable_if_t<std::is_floating_point_v<T>, T> CalcWeight(TrainingParams const &p,\n                                                                           T sum_grad, T sum_hess) {\n  if (sum_hess < p.min_child_weight || sum_hess <= 0.0) {\n    return 0.0;\n  }\n  T dw = -ThresholdL1(sum_grad, p.reg_alpha) / (sum_hess + p.reg_lambda);\n  if (p.max_delta_step != 0.0f && ::fabs(dw) > p.max_delta_step) {\n    dw = ::copysign(p.max_delta_step, dw);\n  }\n  return dw;\n}\n\n// calculate the cost of loss function\ntemplate <typename TrainingParams, typename T>\nXGBOOST_DEVICE T CalcGain(TrainingParams const &p, T sum_grad, T sum_hess) {\n  if (sum_hess < p.min_child_weight || sum_hess <= 0.0) {\n    return static_cast<T>(0.0);\n  }\n  if (p.max_delta_step == 0.0f) {\n    if (p.reg_alpha == 0.0f) {\n      return common::Sqr(sum_grad) / (sum_hess + p.reg_lambda);\n    } else {\n      return common::Sqr(ThresholdL1(sum_grad, p.reg_alpha)) / (sum_hess + p.reg_lambda);\n    }\n  } else {\n    T w = CalcWeight(p, sum_grad, sum_hess);\n    T ret = CalcGainGivenWeight(p, sum_grad, sum_hess, w);\n    if (p.reg_alpha == 0.0f) {\n      return ret;\n    } else {\n      return ret + p.reg_alpha * std::abs(w);\n    }\n  }\n}\n\ntemplate <typename TrainingParams,\n          typename StatT, typename T = decltype(StatT().GetHess())>\nXGBOOST_DEVICE inline T CalcGain(const TrainingParams &p, StatT stat) {\n  return CalcGain(p, stat.GetGrad(), stat.GetHess());\n}\n\n// Used in GPU code where GradientPair is used for gradient sum, not GradStats.\ntemplate <typename TrainingParams, typename GpairT>\nXGBOOST_DEVICE inline float CalcWeight(const TrainingParams &p, GpairT sum_grad) {\n  return CalcWeight(p, sum_grad.GetGrad(), sum_grad.GetHess());\n}\n\n/**\n * @brief multi-target weight, calculated with learning rate.\n */\ninline void CalcWeight(TrainParam const &p, linalg::VectorView<GradientPairPrecise const> grad_sum,\n                       float eta, linalg::VectorView<float> out_w) {\n  for (bst_target_t t = 0, n_targets = out_w.Size(); t < n_targets; ++t) {\n    out_w(t) = CalcWeight(p, grad_sum(t).GetGrad(), grad_sum(t).GetHess()) * eta;\n  }\n}\n\n/**\n * @brief multi-target weight\n */\ninline void CalcWeight(TrainParam const &p, linalg::VectorView<GradientPairPrecise const> grad_sum,\n                       linalg::VectorView<float> out_w) {\n  return CalcWeight(p, grad_sum, 1.0f, out_w);\n}\n\ninline double CalcGainGivenWeight(TrainParam const &p,\n                                  linalg::VectorView<GradientPairPrecise const> sum_grad,\n                                  linalg::VectorView<float const> weight) {\n  double gain{0};\n  for (bst_target_t t = 0, n_targets = weight.Size(); t < n_targets; ++t) {\n    gain += -weight(t) * ThresholdL1(sum_grad(t).GetGrad(), p.reg_alpha);\n  }\n  return gain;\n}\n\n/*! \\brief core statistics used for tree construction */\nstruct XGBOOST_ALIGNAS(16) GradStats {\n  using GradType = double;\n  /*! \\brief sum gradient statistics */\n  GradType sum_grad { 0 };\n  /*! \\brief sum hessian statistics */\n  GradType sum_hess { 0 };\n\n public:\n  [[nodiscard]] XGBOOST_DEVICE GradType GetGrad() const { return sum_grad; }\n  [[nodiscard]] XGBOOST_DEVICE GradType GetHess() const { return sum_hess; }\n\n  friend std::ostream& operator<<(std::ostream& os, GradStats s) {\n    os << s.GetGrad() << \"/\" << s.GetHess();\n    return os;\n  }\n\n  XGBOOST_DEVICE GradStats() {\n    static_assert(sizeof(GradStats) == 16,\n                  \"Size of GradStats is not 16 bytes.\");\n  }\n\n  template <typename GpairT>\n  XGBOOST_DEVICE explicit GradStats(const GpairT &sum)\n      : sum_grad(sum.GetGrad()), sum_hess(sum.GetHess()) {}\n  explicit GradStats(const GradType grad, const GradType hess)\n      : sum_grad(grad), sum_hess(hess) {}\n  /*!\n   * \\brief accumulate statistics\n   * \\param p the gradient pair\n   */\n  inline void Add(GradientPair p) { this->Add(p.GetGrad(), p.GetHess()); }\n\n  /*! \\brief add statistics to the data */\n  inline void Add(const GradStats& b) {\n    sum_grad += b.sum_grad;\n    sum_hess += b.sum_hess;\n  }\n  /*! \\brief same as add, reduce is used in All Reduce */\n  inline static void Reduce(GradStats& a, const GradStats& b) { // NOLINT(*)\n    a.Add(b);\n  }\n  /*! \\brief set current value to a - b */\n  inline void SetSubstract(const GradStats& a, const GradStats& b) {\n    sum_grad = a.sum_grad - b.sum_grad;\n    sum_hess = a.sum_hess - b.sum_hess;\n  }\n  /*! \\return whether the statistics is not used yet */\n  [[nodiscard]] bool Empty() const { return sum_hess == 0.0; }\n  /*! \\brief add statistics to the data */\n  inline void Add(GradType grad, GradType hess) {\n    sum_grad += grad;\n    sum_hess += hess;\n  }\n};\n\n// Helper functions for copying gradient statistic, one for vector leaf, another for normal scalar.\ntemplate <typename T, typename U>\nstd::vector<T> &CopyStats(linalg::VectorView<U> const &src, std::vector<T> *dst) {  // NOLINT\n  dst->resize(src.Size());\n  std::copy(linalg::cbegin(src), linalg::cend(src), dst->begin());\n  return *dst;\n}\n\ninline GradStats &CopyStats(GradStats const &src, GradStats *dst) {  // NOLINT\n  *dst = src;\n  return *dst;\n}\n\n/*!\n * \\brief statistics that is helpful to store\n *   and represent a split solution for the tree\n */\ntemplate<typename GradientT>\nstruct SplitEntryContainer {\n  /*! \\brief loss change after split this node */\n  bst_float loss_chg {0.0f};\n  /*! \\brief split index */\n  bst_feature_t sindex{0};\n  bst_float split_value{0.0f};\n  std::vector<std::uint32_t> cat_bits;\n  bool is_cat{false};\n\n  GradientT left_sum;\n  GradientT right_sum;\n\n  SplitEntryContainer() = default;\n\n  friend std::ostream &operator<<(std::ostream &os, SplitEntryContainer const &s) {\n    os << \"loss_chg: \" << s.loss_chg << \"\\n\"\n       << \"dft_left: \" << s.DefaultLeft() << \"\\n\"\n       << \"split_index: \" << s.SplitIndex() << \"\\n\"\n       << \"split_value: \" << s.split_value << \"\\n\"\n       << \"is_cat: \" << s.is_cat << \"\\n\";\n    if constexpr (std::is_same_v<GradStats, GradientT>) {\n      os << \"left_sum: \" << s.left_sum << \"\\n\"\n         << \"right_sum: \" << s.right_sum << std::endl;\n    } else {\n      auto print_vec = [&](auto const &vec) {\n        for (std::size_t i = 0; i < vec.size(); ++i) {\n          os << vec[i];\n          if (i != vec.size() - 1) {\n            os << \", \";\n          }\n        }\n      };\n\n      os << \"left_sum: [\";\n      print_vec(s.left_sum);\n      os << \"]\\n\";\n\n      os << \"right_sum: [\";\n      print_vec(s.right_sum);\n      os << \"]\\n\";\n    }\n\n    return os;\n  }\n\n  /**\n   * @brief Copy primitive fields into this, and collect cat_bits into a vector.\n   *\n   * This is used for allgather.\n   *\n   * @param that The other entry to copy from\n   * @param collected_cat_bits The vector to collect cat_bits\n   * @param cat_bits_sizes The sizes of the collected cat_bits\n   */\n  void CopyAndCollect(SplitEntryContainer<GradientT> const &that,\n                      std::vector<uint32_t> *collected_cat_bits,\n                      std::vector<std::size_t> *cat_bits_sizes) {\n    loss_chg = that.loss_chg;\n    sindex = that.sindex;\n    split_value = that.split_value;\n    is_cat = that.is_cat;\n    static_assert(std::is_trivially_copyable_v<GradientT>);\n    left_sum = that.left_sum;\n    right_sum = that.right_sum;\n    collected_cat_bits->insert(collected_cat_bits->end(), that.cat_bits.cbegin(),\n                               that.cat_bits.cend());\n    cat_bits_sizes->emplace_back(that.cat_bits.size());\n  }\n\n  /**\n   * @brief Copy primitive fields into this, and collect cat_bits and gradient sums into vectors.\n   *\n   * This is used for allgather.\n   *\n   * @param that The other entry to copy from\n   * @param collected_cat_bits The vector to collect cat_bits\n   * @param cat_bits_sizes The sizes of the collected cat_bits\n   * @param collected_gradients The vector to collect gradients\n   */\n  template <typename G>\n  void CopyAndCollect(SplitEntryContainer<GradientT> const &that,\n                      std::vector<uint32_t> *collected_cat_bits,\n                      std::vector<std::size_t> *cat_bits_sizes,\n                      std::vector<G> *collected_gradients) {\n    loss_chg = that.loss_chg;\n    sindex = that.sindex;\n    split_value = that.split_value;\n    is_cat = that.is_cat;\n    collected_cat_bits->insert(collected_cat_bits->end(), that.cat_bits.cbegin(),\n                               that.cat_bits.cend());\n    cat_bits_sizes->emplace_back(that.cat_bits.size());\n    static_assert(!std::is_trivially_copyable_v<GradientT>);\n    collected_gradients->insert(collected_gradients->end(), that.left_sum.cbegin(),\n                                that.left_sum.cend());\n    collected_gradients->insert(collected_gradients->end(), that.right_sum.cbegin(),\n                                that.right_sum.cend());\n  }\n\n  /*!\\return feature index to split on */\n  [[nodiscard]] bst_feature_t SplitIndex() const { return sindex & ((1U << 31) - 1U); }\n  /*!\\return whether missing value goes to left branch */\n  [[nodiscard]] bool DefaultLeft() const { return (sindex >> 31) != 0; }\n  /*!\n   * \\brief decides whether we can replace current entry with the given statistics\n   *\n   *   This function gives better priority to lower index when loss_chg == new_loss_chg.\n   *   Not the best way, but helps to give consistent result during multi-thread\n   *   execution.\n   *\n   * \\param new_loss_chg the loss reduction get through the split\n   * \\param split_index the feature index where the split is on\n   */\n  [[nodiscard]] bool NeedReplace(bst_float new_loss_chg, unsigned split_index) const {\n    if (std::isinf(new_loss_chg)) {  // in some cases new_loss_chg can be NaN or Inf,\n                                     // for example when lambda = 0 & min_child_weight = 0\n                                     // skip value in this case\n      return false;\n    } else if (this->SplitIndex() <= split_index) {\n      return new_loss_chg > this->loss_chg;\n    } else {\n      return !(this->loss_chg > new_loss_chg);\n    }\n  }\n  /*!\n   * \\brief update the split entry, replace it if e is better\n   * \\param e candidate split solution\n   * \\return whether the proposed split is better and can replace current split\n   */\n  inline bool Update(const SplitEntryContainer &e) {\n    if (this->NeedReplace(e.loss_chg, e.SplitIndex())) {\n      this->loss_chg = e.loss_chg;\n      this->sindex = e.sindex;\n      this->split_value = e.split_value;\n      this->is_cat = e.is_cat;\n      this->cat_bits = e.cat_bits;\n      this->left_sum = e.left_sum;\n      this->right_sum = e.right_sum;\n      return true;\n    } else {\n      return false;\n    }\n  }\n  /*!\n   * \\brief update the split entry, replace it if e is better\n   * \\param new_loss_chg loss reduction of new candidate\n   * \\param split_index feature index to split on\n   * \\param new_split_value the split point\n   * \\param default_left whether the missing value goes to left\n   * \\return whether the proposed split is better and can replace current split\n   */\n  template <typename GradientSumT>\n  bool Update(bst_float new_loss_chg, bst_feature_t split_index, float new_split_value,\n              bool default_left, bool is_cat, GradientSumT const &left_sum,\n              GradientSumT const &right_sum) {\n    if (this->NeedReplace(new_loss_chg, split_index)) {\n      this->loss_chg = new_loss_chg;\n      if (default_left) {\n        split_index |= (1U << 31);\n      }\n      this->sindex = split_index;\n      this->split_value = new_split_value;\n      this->is_cat = is_cat;\n      CopyStats(left_sum, &this->left_sum);\n      CopyStats(right_sum, &this->right_sum);\n      return true;\n    } else {\n      return false;\n    }\n  }\n\n  /*! \\brief same as update, used by AllReduce*/\n  inline static void Reduce(SplitEntryContainer &dst,         // NOLINT(*)\n                            const SplitEntryContainer &src) { // NOLINT(*)\n    dst.Update(src);\n  }\n};\n\nusing SplitEntry = SplitEntryContainer<GradStats>;\n}  // namespace tree\n\n/*\n * \\brief Parse the interaction constraints from string.\n * \\param constraint_str String storing the interaction constraints:\n *\n *  Example input string:\n *\n *    \"[[1, 2], [3, 4]]\"\"\n *\n * \\param p_out Pointer to output\n */\nvoid ParseInteractionConstraint(\n    std::string const &constraint_str,\n    std::vector<std::vector<xgboost::bst_feature_t>> *p_out);\n}  // namespace xgboost\n\n// define string serializer for vector, to get the arguments\nnamespace std {\ninline std::ostream &operator<<(std::ostream &os, const std::vector<int> &t) {\n  os << '(';\n  for (auto it = t.begin(); it != t.end(); ++it) {\n    if (it != t.begin()) {\n      os << ',';\n    }\n    os << *it;\n  }\n  // python style tuple\n  if (t.size() == 1) {\n    os << ',';\n  }\n  os << ')';\n  return os;\n}\n\nstd::istream &operator>>(std::istream &is, std::vector<int> &t);\n}  // namespace std\n\n#endif  // XGBOOST_TREE_PARAM_H_\n"
  },
  {
    "path": "src/tree/sample_position.h",
    "content": "/**\n * Copyright 2024, XGBoost Contributors\n */\n#pragma once\n#include \"xgboost/base.h\"  // for bst_node_t\n\nnamespace xgboost::tree {\n// Utility for maniputing the node index. This is used by the tree methods and the\n// adaptive objectives to share the node index. A row is invalid if it's not used in the\n// last iteration (due to sampling). For these rows, the corresponding tree node index is\n// negated.\nstruct SamplePosition {\n  [[nodiscard]] bst_node_t static XGBOOST_HOST_DEV_INLINE Encode(bst_node_t nidx, bool is_valid) {\n    return is_valid ? nidx : ~nidx;\n  }\n  [[nodiscard]] bst_node_t static XGBOOST_HOST_DEV_INLINE Decode(bst_node_t nidx) {\n    return IsValid(nidx) ? nidx : ~nidx;\n  }\n  [[nodiscard]] bool static XGBOOST_HOST_DEV_INLINE IsValid(bst_node_t nidx) { return nidx >= 0; }\n};\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "src/tree/split_evaluator.h",
    "content": "/**\n * Copyright 2018-2023 by Contributors\n * \\file split_evaluator.h\n * \\brief Used for implementing a loss term specific to decision trees. Useful for custom regularisation.\n * \\author Henry Gouk\n */\n\n#ifndef XGBOOST_TREE_SPLIT_EVALUATOR_H_\n#define XGBOOST_TREE_SPLIT_EVALUATOR_H_\n\n#include <dmlc/registry.h>\n#include <xgboost/base.h>\n\n#include <algorithm>\n#include <limits>\n#include <vector>\n\n#include \"../common/math.h\"\n#include \"../common/transform.h\"\n#include \"param.h\"\n#include \"xgboost/context.h\"\n#include \"xgboost/host_device_vector.h\"\n\nnamespace xgboost::tree {\nclass TreeEvaluator {\n  // hist and exact use parent id to calculate constraints.\n  static constexpr bst_node_t kRootParentId = (-1 & static_cast<bst_node_t>((1U << 31) - 1));\n\n  HostDeviceVector<float> lower_bounds_;\n  HostDeviceVector<float> upper_bounds_;\n  HostDeviceVector<int32_t> monotone_;\n  DeviceOrd device_;\n  bool has_constraint_;\n\n public:\n  TreeEvaluator(TrainParam const& p, bst_feature_t n_features, DeviceOrd device) {\n    device_ = device;\n    if (device.IsCUDA()) {\n      lower_bounds_.SetDevice(device);\n      upper_bounds_.SetDevice(device);\n      monotone_.SetDevice(device);\n    }\n\n    if (p.monotone_constraints.empty()) {\n      monotone_.HostVector().resize(n_features, 0);\n      has_constraint_ = false;\n    } else {\n      CHECK_LE(p.monotone_constraints.size(), n_features)\n          << \"The size of monotone constraint should be less or equal to the number of features.\";\n      monotone_.HostVector() = p.monotone_constraints;\n      monotone_.HostVector().resize(n_features, 0);\n      // Initialised to some small size, can grow if needed\n      lower_bounds_.Resize(256, -std::numeric_limits<float>::max());\n      upper_bounds_.Resize(256, std::numeric_limits<float>::max());\n      has_constraint_ = true;\n    }\n\n    if (device_.IsCUDA()) {\n      // Pull to device early.\n      lower_bounds_.ConstDeviceSpan();\n      upper_bounds_.ConstDeviceSpan();\n      monotone_.ConstDeviceSpan();\n    }\n  }\n\n  template <typename ParamT>\n  struct SplitEvaluator {\n    const int* constraints;\n    const float* lower;\n    const float* upper;\n    bool has_constraint;\n\n    template <typename GradientSumT>\n    XGBOOST_DEVICE float CalcSplitGain(const ParamT& param, bst_node_t nidx, bst_feature_t fidx,\n                                       GradientSumT const& left, GradientSumT const& right) const {\n      int constraint = has_constraint ? constraints[fidx] : 0;\n      const float negative_infinity = -std::numeric_limits<float>::infinity();\n      float wleft = this->CalcWeight(nidx, param, left);\n      float wright = this->CalcWeight(nidx, param, right);\n\n      float gain = this->CalcGainGivenWeight(param, left, wleft) +\n                    this->CalcGainGivenWeight(param, right, wright);\n\n      if (constraint == 0) {\n        // no constraint\n        return gain;\n      } else if (constraint > 0) {\n        return wleft <= wright ? gain : negative_infinity;\n      } else {\n        return wleft >= wright ? gain : negative_infinity;\n      }\n    }\n\n    template <typename GradientSumT>\n    XGBOOST_DEVICE float CalcWeight(bst_node_t nodeid, const ParamT &param,\n                                    GradientSumT const& stats) const {\n      float w = ::xgboost::tree::CalcWeight(param, stats);\n      if (!has_constraint) {\n        return w;\n      }\n      // Calculate bound weight\n      if (nodeid == kRootParentId) {\n        return w;\n      } else if (w < lower[nodeid]) {\n        return lower[nodeid];\n      } else if (w > upper[nodeid]) {\n        return upper[nodeid];\n      } else {\n        return w;\n      }\n    }\n\n    template <typename GradientSumT>\n    XGBOOST_DEVICE float CalcWeightCat(ParamT const& param, GradientSumT const& stats) const {\n      // FIXME(jiamingy): This is a temporary solution until we have categorical feature\n      // specific regularization parameters.  During sorting we should try to avoid any\n      // regularization.\n      return ::xgboost::tree::CalcWeight(param, stats);\n    }\n\n    // Fast floating point division instruction on device\n    [[nodiscard]] XGBOOST_DEVICE float Divide(float a, float b) const {\n#ifdef __CUDA_ARCH__\n      return __fdividef(a, b);\n#else\n      return a / b;\n#endif\n    }\n\n    template <typename GradientSumT>\n    XGBOOST_DEVICE float CalcGainGivenWeight(ParamT const& p, GradientSumT const& stats,\n                                             float w) const {\n      if (stats.GetHess() <= 0) {\n        return .0f;\n      }\n      // Avoiding tree::CalcGainGivenWeight can significantly reduce avg floating point error.\n      if (p.max_delta_step == 0.0f && has_constraint == false) {\n        return Divide(common::Sqr(ThresholdL1(stats.GetGrad(), p.reg_alpha)),\n                      (stats.GetHess() + p.reg_lambda));\n      }\n      return tree::CalcGainGivenWeight<ParamT, float>(p, stats.GetGrad(),\n                                                      stats.GetHess(), w);\n    }\n    template <typename GradientSumT>\n    XGBOOST_DEVICE float CalcGain(bst_node_t nid, ParamT const &p,\n                                  GradientSumT const& stats) const {\n      return this->CalcGainGivenWeight(p, stats, this->CalcWeight(nid, p, stats));\n    }\n  };\n\n public:\n  /* Get a view to the evaluator that can be passed down to device. */\n  template <typename ParamT = TrainParam> auto GetEvaluator() const {\n    if (device_.IsCUDA()) {\n      auto constraints = monotone_.ConstDevicePointer();\n      return SplitEvaluator<ParamT>{constraints, lower_bounds_.ConstDevicePointer(),\n                                    upper_bounds_.ConstDevicePointer(), has_constraint_};\n    } else {\n      auto constraints = monotone_.ConstHostPointer();\n      return SplitEvaluator<ParamT>{constraints, lower_bounds_.ConstHostPointer(),\n                                    upper_bounds_.ConstHostPointer(), has_constraint_};\n    }\n  }\n\n  template <bool CompiledWithCuda = WITH_CUDA()>\n  void AddSplit(bst_node_t nodeid, bst_node_t leftid, bst_node_t rightid,\n                bst_feature_t f, float left_weight, float right_weight) {\n    if (!has_constraint_) {\n      return;\n    }\n\n    size_t max_nidx = std::max(leftid, rightid);\n    if (lower_bounds_.Size() <= max_nidx) {\n      lower_bounds_.Resize(max_nidx * 2 + 1, -std::numeric_limits<float>::max());\n    }\n    if (upper_bounds_.Size() <= max_nidx) {\n      upper_bounds_.Resize(max_nidx * 2 + 1, std::numeric_limits<float>::max());\n    }\n\n    common::Transform<>::Init(\n        [=] XGBOOST_DEVICE(size_t, common::Span<float> lower,\n                           common::Span<float> upper,\n                           common::Span<int> monotone) {\n          lower[leftid] = lower[nodeid];\n          upper[leftid] = upper[nodeid];\n\n          lower[rightid] = lower[nodeid];\n          upper[rightid] = upper[nodeid];\n          int32_t c = monotone[f];\n          bst_float mid = (left_weight + right_weight) / 2;\n\n          SPAN_CHECK(!common::CheckNAN(mid));\n\n          if (c < 0) {\n            lower[leftid] = mid;\n            upper[rightid] = mid;\n          } else if (c > 0) {\n            upper[leftid] = mid;\n            lower[rightid] = mid;\n          }\n        },\n        common::Range(0, 1), 1, device_)\n        .Eval(&lower_bounds_, &upper_bounds_, &monotone_);\n  }\n};\n}  // namespace xgboost::tree\n\n#endif  // XGBOOST_TREE_SPLIT_EVALUATOR_H_\n"
  },
  {
    "path": "src/tree/tree_model.cc",
    "content": "/**\n * Copyright 2015-2026, XGBoost Contributors\n * \\file tree_model.cc\n * \\brief model structure for tree\n */\n#include \"xgboost/tree_model.h\"\n\n#include <dmlc/json.h>\n#include <dmlc/registry.h>\n\n#include <cmath>\n#include <iomanip>\n#include <limits>\n#include <sstream>\n#include <type_traits>  // for is_floating_point_v\n\n#include \"../common/categorical.h\"  // for GetNodeCats\n#include \"../common/common.h\"       // for EscapeU8\n#include \"io_utils.h\"               // for GetElem\n#include \"param.h\"\n#include \"tree_view.h\"\n#include \"xgboost/base.h\"\n#include \"xgboost/data.h\"\n#include \"xgboost/json.h\"\n#include \"xgboost/logging.h\"\n\nnamespace xgboost {\nnamespace tree {\nDMLC_REGISTER_PARAMETER(TrainParam);\n}\n\nnamespace {\ntemplate <typename Float>\nstd::enable_if_t<std::is_floating_point_v<Float>, std::string> ToStr(Float value) {\n  int32_t constexpr kFloatMaxPrecision = std::numeric_limits<float>::max_digits10;\n  static_assert(std::is_floating_point_v<Float>,\n                \"Use std::to_string instead for non-floating point values.\");\n  std::stringstream ss;\n  ss << std::setprecision(kFloatMaxPrecision) << value;\n  return ss.str();\n}\n\ntemplate <typename Float>\nstd::string ToStr(linalg::VectorView<Float> value) {\n  // Hardcoded limit to avoid dumping long arrays into dot graph.\n  constexpr bst_target_t kLimit = 3;\n  int32_t constexpr kFloatMaxPrecision = std::numeric_limits<float>::max_digits10;\n  static_assert(std::is_floating_point_v<Float>,\n                \"Use std::to_string instead for non-floating point values.\");\n  std::stringstream ss;\n  ss << std::setprecision(kFloatMaxPrecision);\n  if (value.Size() == 1) {\n    ss << value(0);\n    return ss.str();\n  }\n  CHECK_GE(kLimit, 2);\n  auto n = std::min(static_cast<bst_target_t>(value.Size() - 1), kLimit - 1);\n  ss << \"[\";\n  for (std::size_t i = 0; i < n; ++i) {\n    ss << value(i) << \", \";\n  }\n  if (value.Size() > kLimit) {\n    ss << \"..., \";\n  }\n  ss << value(value.Size() - 1) << \"]\";\n  return ss.str();\n}\n}  // namespace\n\n/**\n * @brief Base class for dump model implementation.\n */\ntemplate <typename TreeView>\nclass TreeGenerator {\n protected:\n  FeatureMap const& fmap_;\n  std::stringstream ss_;\n  bool const with_stats_;\n\n  static std::string Tabs(uint32_t n) {\n    std::string res;\n    for (uint32_t i = 0; i < n; ++i) {\n      res += '\\t';\n    }\n    return res;\n  }\n  /* @brief Find the first occurrence of key in input and replace it with corresponding\n   *        value.\n   */\n  [[nodiscard]] static std::string Match(std::string const& input,\n                                         std::map<std::string, std::string> const& replacements) {\n    std::string result = input;\n    for (auto const& kv : replacements) {\n      auto pos = result.find(kv.first);\n      CHECK_NE(pos, std::string::npos);\n      result.replace(pos, kv.first.length(), kv.second);\n    }\n    return result;\n  }\n\n  virtual std::string Indicator(TreeView /*tree*/, int32_t /*nid*/, uint32_t /*depth*/) const {\n    return \"\";\n  }\n  virtual std::string Categorical(TreeView, int32_t, uint32_t) const = 0;\n  virtual std::string Integer(TreeView /*tree*/, int32_t /*nid*/, uint32_t /*depth*/) const {\n    return \"\";\n  }\n  virtual std::string Quantitive(TreeView /*tree*/, int32_t /*nid*/, uint32_t /*depth*/) const {\n    return \"\";\n  }\n  virtual std::string NodeStat(TreeView /*tree*/, int32_t /*nid*/) const { return \"\"; }\n\n  virtual std::string PlainNode(TreeView /*tree*/, int32_t /*nid*/, uint32_t /*depth*/) const = 0;\n\n  virtual std::string SplitNode(TreeView tree, int32_t nid, uint32_t depth) {\n    auto const split_index = tree.SplitIndex(nid);\n    std::string result;\n    auto is_categorical = tree.SplitType(nid) == FeatureType::kCategorical;\n    if (split_index < fmap_.Size()) {\n      auto check_categorical = [&]() {\n        CHECK(is_categorical) << fmap_.Name(split_index)\n                              << \" in feature map is numerical but tree node is categorical.\";\n      };\n      auto check_numerical = [&]() {\n        auto is_numerical = !is_categorical;\n        CHECK(is_numerical) << fmap_.Name(split_index)\n                            << \" in feature map is categorical but tree node is numerical.\";\n      };\n\n      switch (fmap_.TypeOf(split_index)) {\n        case FeatureMap::kCategorical: {\n          check_categorical();\n          result = this->Categorical(tree, nid, depth);\n          break;\n        }\n        case FeatureMap::kIndicator: {\n          check_numerical();\n          result = this->Indicator(tree, nid, depth);\n          break;\n        }\n        case FeatureMap::kInteger: {\n          check_numerical();\n          result = this->Integer(tree, nid, depth);\n          break;\n        }\n        case FeatureMap::kFloat:\n        case FeatureMap::kQuantitive: {\n          check_numerical();\n          result = this->Quantitive(tree, nid, depth);\n          break;\n        }\n        default:\n          LOG(FATAL) << \"Unknown feature map type.\";\n      }\n    } else {\n      if (is_categorical) {\n        result = this->Categorical(tree, nid, depth);\n      } else {\n        result = this->PlainNode(tree, nid, depth);\n      }\n    }\n    return result;\n  }\n\n  virtual std::string LeafNode(TreeView tree, int32_t nid, uint32_t depth) const = 0;\n  virtual std::string BuildTree(TreeView tree, int32_t nid, uint32_t depth) = 0;\n\n public:\n  TreeGenerator(FeatureMap const& _fmap, bool with_stats) : fmap_{_fmap}, with_stats_{with_stats} {}\n  virtual ~TreeGenerator() = default;\n\n  virtual void BuildTree(TreeView tree) { ss_ << this->BuildTree(tree, 0, 0); }\n\n  std::string Str() const { return ss_.str(); }\n};\n}  // namespace xgboost\n\nnamespace xgboost {\nnamespace {\ntemplate <typename TreeView>\nstd::vector<bst_cat_t> GetSplitCategories(TreeView const& tree, int32_t nidx) {\n  auto const& csr = tree.GetCategoriesMatrix();\n  auto seg = csr.node_ptr[nidx];\n  auto split = common::KCatBitField{csr.categories.subspan(seg.beg, seg.size)};\n\n  std::vector<bst_cat_t> cats;\n  for (size_t i = 0; i < split.Capacity(); ++i) {\n    if (split.Check(i)) {\n      cats.push_back(static_cast<bst_cat_t>(i));\n    }\n  }\n  return cats;\n}\n\nstd::string PrintCatsAsSet(std::vector<bst_cat_t> const& cats) {\n  std::stringstream ss;\n  ss << \"{\";\n  for (size_t i = 0; i < cats.size(); ++i) {\n    ss << cats[i];\n    if (i != cats.size() - 1) {\n      ss << \",\";\n    }\n  }\n  ss << \"}\";\n  return ss.str();\n}\n\nstd::string GetFeatureName(FeatureMap const& fmap, bst_feature_t split_index) {\n  CHECK_LE(fmap.Size(), std::numeric_limits<decltype(split_index)>::max());\n  auto fname = split_index < static_cast<decltype(split_index)>(fmap.Size())\n                   ? fmap.Name(split_index)\n                   : ('f' + std::to_string(split_index));\n  return common::EscapeU8(fname);\n}\n}  // anonymous namespace\n\ntemplate <typename TreeView>\nclass TextGenerator : public TreeGenerator<TreeView> {\n  using SuperT = TreeGenerator<TreeView>;\n\n public:\n  TextGenerator(FeatureMap const& fmap, bool with_stats) : SuperT(fmap, with_stats) {}\n\n  std::string LeafNode(TreeView tree, int32_t nid, uint32_t depth) const override {\n    static std::string kLeafTemplate = \"{tabs}{nid}:leaf={leaf}{stats}\";\n    static std::string kStatTemplate = \",cover={cover}\";\n    std::string result = SuperT::Match(\n        kLeafTemplate,\n        {{\"{tabs}\", SuperT::Tabs(depth)},\n         {\"{nid}\", std::to_string(nid)},\n         {\"{leaf}\", ToStr(tree.LeafValue(nid))},\n         {\"{stats}\", SuperT::with_stats_\n                         ? SuperT::Match(kStatTemplate, {{\"{cover}\", ToStr(tree.SumHess(nid))}})\n                         : \"\"}});\n    return result;\n  }\n\n  std::string Indicator(TreeView tree, bst_node_t nid, uint32_t) const override {\n    static std::string const kIndicatorTemplate = \"{nid}:[{fname}] yes={yes},no={no}\";\n    int32_t nyes = tree.DefaultLeft(nid) ? tree.RightChild(nid) : tree.LeftChild(nid);\n    auto split_index = tree.SplitIndex(nid);\n    std::string result =\n        SuperT::Match(kIndicatorTemplate, {{\"{nid}\", std::to_string(nid)},\n                                           {\"{fname}\", GetFeatureName(SuperT::fmap_, split_index)},\n                                           {\"{yes}\", std::to_string(nyes)},\n                                           {\"{no}\", std::to_string(tree.DefaultChild(nid))}});\n    return result;\n  }\n\n  std::string SplitNodeImpl(TreeView tree, bst_node_t nid, std::string const& template_str,\n                            std::string cond, uint32_t depth) const {\n    auto split_index = tree.SplitIndex(nid);\n    std::string const result =\n        SuperT::Match(template_str, {{\"{tabs}\", SuperT::Tabs(depth)},\n                                     {\"{nid}\", std::to_string(nid)},\n                                     {\"{fname}\", GetFeatureName(SuperT::fmap_, split_index)},\n                                     {\"{cond}\", cond},\n                                     {\"{left}\", std::to_string(tree.LeftChild(nid))},\n                                     {\"{right}\", std::to_string(tree.RightChild(nid))},\n                                     {\"{missing}\", std::to_string(tree.DefaultChild(nid))}});\n    return result;\n  }\n\n  std::string Integer(TreeView tree, int32_t nid, uint32_t depth) const override {\n    static std::string const kIntegerTemplate =\n        \"{tabs}{nid}:[{fname}<{cond}] yes={left},no={right},missing={missing}\";\n    auto cond = tree.SplitCond(nid);\n    const bst_float floored = std::floor(cond);\n    const int32_t integer_threshold =\n        (floored == cond) ? static_cast<int>(floored) : static_cast<int>(floored) + 1;\n    return SplitNodeImpl(tree, nid, kIntegerTemplate, std::to_string(integer_threshold), depth);\n  }\n\n  std::string Quantitive(TreeView tree, int32_t nid, uint32_t depth) const override {\n    static std::string const kQuantitiveTemplate =\n        \"{tabs}{nid}:[{fname}<{cond}] yes={left},no={right},missing={missing}\";\n    auto cond = tree.SplitCond(nid);\n    return SplitNodeImpl(tree, nid, kQuantitiveTemplate, ToStr(cond), depth);\n  }\n\n  std::string PlainNode(TreeView tree, int32_t nid, uint32_t depth) const override {\n    auto cond = tree.SplitCond(nid);\n    static std::string const kNodeTemplate =\n        \"{tabs}{nid}:[{fname}<{cond}] yes={left},no={right},missing={missing}\";\n    return SplitNodeImpl(tree, nid, kNodeTemplate, ToStr(cond), depth);\n  }\n\n  std::string Categorical(TreeView tree, bst_node_t nid, uint32_t depth) const override {\n    auto cats = GetSplitCategories(tree, nid);\n    std::string cats_str = PrintCatsAsSet(cats);\n    static std::string const kNodeTemplate =\n        \"{tabs}{nid}:[{fname}:{cond}] yes={right},no={left},missing={missing}\";\n    std::string const result = SplitNodeImpl(tree, nid, kNodeTemplate, cats_str, depth);\n    return result;\n  }\n\n  std::string NodeStat(TreeView tree, bst_node_t nid) const override {\n    static std::string const kStatTemplate = \",gain={loss_chg},cover={sum_hess}\";\n    std::string const result = SuperT::Match(\n        kStatTemplate,\n        {{\"{loss_chg}\", ToStr(tree.LossChg(nid))}, {\"{sum_hess}\", ToStr(tree.SumHess(nid))}});\n    return result;\n  }\n\n  std::string BuildTree(TreeView tree, int32_t nid, uint32_t depth) override {\n    if (tree.IsLeaf(nid)) {\n      return this->LeafNode(tree, nid, depth);\n    }\n    static std::string const kNodeTemplate = \"{parent}{stat}\\n{left}\\n{right}\";\n    auto result = SuperT::Match(\n        kNodeTemplate, {{\"{parent}\", this->SplitNode(tree, nid, depth)},\n                        {\"{stat}\", SuperT::with_stats_ ? this->NodeStat(tree, nid) : \"\"},\n                        {\"{left}\", this->BuildTree(tree, tree.LeftChild(nid), depth + 1)},\n                        {\"{right}\", this->BuildTree(tree, tree.RightChild(nid), depth + 1)}});\n    return result;\n  }\n\n  void BuildTree(TreeView tree) override {\n    static std::string const& kTreeTemplate = \"{nodes}\\n\";\n    auto result = SuperT::Match(kTreeTemplate, {{\"{nodes}\", this->BuildTree(tree, 0, 0)}});\n    SuperT::ss_ << result;\n  }\n};\n\ntemplate <typename TreeView>\nclass JsonGenerator : public TreeGenerator<TreeView> {\n  using SuperT = TreeGenerator<TreeView>;\n\n public:\n  JsonGenerator(FeatureMap const& fmap, bool with_stats) : SuperT{fmap, with_stats} {}\n\n  std::string Indent(uint32_t depth) const {\n    std::string result;\n    for (uint32_t i = 0; i < depth + 1; ++i) {\n      result += \"  \";\n    }\n    return result;\n  }\n\n  std::string LeafNode(TreeView tree, bst_node_t nid, uint32_t) const override {\n    static std::string const kLeafTemplate = R\"L({ \"nodeid\": {nid}, \"leaf\": {leaf} {stat}})L\";\n    static std::string const kStatTemplate = R\"S(, \"cover\": {sum_hess} )S\";\n    std::string result = SuperT::Match(\n        kLeafTemplate,\n        {{\"{nid}\", std::to_string(nid)},\n         {\"{leaf}\", ToStr(tree.LeafValue(nid))},\n         {\"{stat}\", SuperT::with_stats_\n                        ? SuperT::Match(kStatTemplate, {{\"{sum_hess}\", ToStr(tree.SumHess(nid))}})\n                        : \"\"}});\n    return result;\n  }\n\n  std::string Indicator(TreeView tree, bst_node_t nid, uint32_t depth) const override {\n    int32_t nyes = tree.DefaultLeft(nid) ? tree.RightChild(nid) : tree.LeftChild(nid);\n    static std::string const kIndicatorTemplate =\n        R\"ID( \"nodeid\": {nid}, \"depth\": {depth}, \"split\": \"{fname}\", \"yes\": {yes}, \"no\": {no})ID\";\n    auto split_index = tree.SplitIndex(nid);\n    auto result =\n        SuperT::Match(kIndicatorTemplate, {{\"{nid}\", std::to_string(nid)},\n                                           {\"{depth}\", std::to_string(depth)},\n                                           {\"{fname}\", GetFeatureName(SuperT::fmap_, split_index)},\n                                           {\"{yes}\", std::to_string(nyes)},\n                                           {\"{no}\", std::to_string(tree.DefaultChild(nid))}});\n    return result;\n  }\n\n  std::string Categorical(TreeView tree, bst_node_t nid, uint32_t depth) const override {\n    auto cats = GetSplitCategories(tree, nid);\n    static std::string const kCategoryTemplate =\n        R\"I( \"nodeid\": {nid}, \"depth\": {depth}, \"split\": \"{fname}\", )I\"\n        R\"I(\"split_condition\": {cond}, \"yes\": {right}, \"no\": {left}, )I\"\n        R\"I(\"missing\": {missing})I\";\n    std::string cats_ptr = \"[\";\n    for (size_t i = 0; i < cats.size(); ++i) {\n      cats_ptr += std::to_string(cats[i]);\n      if (i != cats.size() - 1) {\n        cats_ptr += \", \";\n      }\n    }\n    cats_ptr += \"]\";\n    auto results = SplitNodeImpl(tree, nid, kCategoryTemplate, cats_ptr, depth);\n    return results;\n  }\n\n  std::string SplitNodeImpl(TreeView tree, bst_node_t nid, std::string const& template_str,\n                            std::string cond, uint32_t depth) const {\n    auto split_index = tree.SplitIndex(nid);\n    std::string const result =\n        SuperT::Match(template_str, {{\"{nid}\", std::to_string(nid)},\n                                     {\"{depth}\", std::to_string(depth)},\n                                     {\"{fname}\", GetFeatureName(SuperT::fmap_, split_index)},\n                                     {\"{cond}\", cond},\n                                     {\"{left}\", std::to_string(tree.LeftChild(nid))},\n                                     {\"{right}\", std::to_string(tree.RightChild(nid))},\n                                     {\"{missing}\", std::to_string(tree.DefaultChild(nid))}});\n    return result;\n  }\n\n  std::string Integer(TreeView tree, int32_t nid, uint32_t depth) const override {\n    auto cond = tree.SplitCond(nid);\n    const bst_float floored = std::floor(cond);\n    const int32_t integer_threshold =\n        (floored == cond) ? static_cast<int32_t>(floored) : static_cast<int32_t>(floored) + 1;\n    static std::string const kIntegerTemplate =\n        R\"I( \"nodeid\": {nid}, \"depth\": {depth}, \"split\": \"{fname}\", )I\"\n        R\"I(\"split_condition\": {cond}, \"yes\": {left}, \"no\": {right}, )I\"\n        R\"I(\"missing\": {missing})I\";\n    return SplitNodeImpl(tree, nid, kIntegerTemplate, std::to_string(integer_threshold), depth);\n  }\n\n  std::string Quantitive(TreeView tree, int32_t nid, uint32_t depth) const override {\n    static std::string const kQuantitiveTemplate =\n        R\"I( \"nodeid\": {nid}, \"depth\": {depth}, \"split\": \"{fname}\", )I\"\n        R\"I(\"split_condition\": {cond}, \"yes\": {left}, \"no\": {right}, )I\"\n        R\"I(\"missing\": {missing})I\";\n    bst_float cond = tree.SplitCond(nid);\n    return SplitNodeImpl(tree, nid, kQuantitiveTemplate, ToStr(cond), depth);\n  }\n\n  std::string PlainNode(TreeView tree, int32_t nid, uint32_t depth) const override {\n    auto cond = tree.SplitCond(nid);\n    static std::string const kNodeTemplate =\n        R\"I( \"nodeid\": {nid}, \"depth\": {depth}, \"split\": \"{fname}\", )I\"\n        R\"I(\"split_condition\": {cond}, \"yes\": {left}, \"no\": {right}, )I\"\n        R\"I(\"missing\": {missing})I\";\n    return SplitNodeImpl(tree, nid, kNodeTemplate, ToStr(cond), depth);\n  }\n\n  std::string NodeStat(TreeView tree, int32_t nid) const override {\n    static std::string kStatTemplate = R\"S(, \"gain\": {loss_chg}, \"cover\": {sum_hess})S\";\n    auto result = SuperT::Match(kStatTemplate, {{\"{loss_chg}\", ToStr(tree.LossChg(nid))},\n                                                {\"{sum_hess}\", ToStr(tree.SumHess(nid))}});\n    return result;\n  }\n\n  std::string SplitNode(TreeView tree, int32_t nid, uint32_t depth) override {\n    std::string properties = SuperT::SplitNode(tree, nid, depth);\n    static std::string const kSplitNodeTemplate =\n        \"{{properties} {stat}, \\\"children\\\": [{left}, {right}\\n{indent}]}\";\n    auto result = SuperT::Match(\n        kSplitNodeTemplate, {{\"{properties}\", properties},\n                             {\"{stat}\", SuperT::with_stats_ ? this->NodeStat(tree, nid) : \"\"},\n                             {\"{left}\", this->BuildTree(tree, tree.LeftChild(nid), depth + 1)},\n                             {\"{right}\", this->BuildTree(tree, tree.RightChild(nid), depth + 1)},\n                             {\"{indent}\", this->Indent(depth)}});\n    return result;\n  }\n\n  std::string BuildTree(TreeView tree, int32_t nid, uint32_t depth) override {\n    static std::string const kNodeTemplate = \"{newline}{indent}{nodes}\";\n    auto result = SuperT::Match(\n        kNodeTemplate, {{\"{newline}\", depth == 0 ? \"\" : \"\\n\"},\n                        {\"{indent}\", Indent(depth)},\n                        {\"{nodes}\", tree.IsLeaf(nid) ? this->LeafNode(tree, nid, depth)\n                                                     : this->SplitNode(tree, nid, depth)}});\n    return result;\n  }\n};\n\nstruct GraphvizParam : public XGBoostParameter<GraphvizParam> {\n  std::string yes_color;\n  std::string no_color;\n  std::string rankdir;\n  std::string condition_node_params;\n  std::string leaf_node_params;\n  std::string graph_attrs;\n\n  DMLC_DECLARE_PARAMETER(GraphvizParam) {\n    DMLC_DECLARE_FIELD(yes_color).set_default(\"#0000FF\").describe(\n        \"Edge color when meets the node condition.\");\n    DMLC_DECLARE_FIELD(no_color).set_default(\"#FF0000\").describe(\n        \"Edge color when doesn't meet the node condition.\");\n    DMLC_DECLARE_FIELD(rankdir).set_default(\"TB\").describe(\"Passed to graphiz via graph_attr.\");\n    DMLC_DECLARE_FIELD(condition_node_params)\n        .set_default(\"\")\n        .describe(\"Conditional node configuration\");\n    DMLC_DECLARE_FIELD(leaf_node_params).set_default(\"\").describe(\"Leaf node configuration\");\n    DMLC_DECLARE_FIELD(graph_attrs)\n        .set_default(\"\")\n        .describe(\"Any other extra attributes for graphviz `graph_attr`.\");\n  }\n};\n\nDMLC_REGISTER_PARAMETER(GraphvizParam);\n\ntemplate <typename TreeView>\nclass GraphvizGenerator : public TreeGenerator<TreeView> {\n  using SuperT = TreeGenerator<TreeView>;\n  GraphvizParam param_;\n\n public:\n  GraphvizGenerator(FeatureMap const& fmap, std::string const& attrs, bool with_stats)\n      : SuperT{fmap, with_stats} {\n    param_.UpdateAllowUnknown(std::map<std::string, std::string>{});\n    using KwArg = std::map<std::string, std::map<std::string, std::string>>;\n    KwArg kwargs;\n    if (attrs.length() != 0) {\n      std::istringstream iss(attrs);\n      try {\n        dmlc::JSONReader reader(&iss);\n        reader.Read(&kwargs);\n      } catch (dmlc::Error const& e) {\n        LOG(FATAL) << \"Failed to parse graphviz parameters:\\n\\t\" << attrs << \"\\n\"\n                   << \"With error:\\n\"\n                   << e.what();\n      }\n    }\n    // This turns out to be tricky, as `dmlc::Parameter::Load(JSONReader*)` doesn't\n    // support loading nested json objects.\n    if (kwargs.find(\"condition_node_params\") != kwargs.cend()) {\n      auto const& cnp = kwargs[\"condition_node_params\"];\n      for (auto const& kv : cnp) {\n        param_.condition_node_params += kv.first + '=' + \"\\\"\" + kv.second + \"\\\" \";\n      }\n      kwargs.erase(\"condition_node_params\");\n    }\n    if (kwargs.find(\"leaf_node_params\") != kwargs.cend()) {\n      auto const& lnp = kwargs[\"leaf_node_params\"];\n      for (auto const& kv : lnp) {\n        param_.leaf_node_params += kv.first + '=' + \"\\\"\" + kv.second + \"\\\" \";\n      }\n      kwargs.erase(\"leaf_node_params\");\n    }\n\n    if (kwargs.find(\"edge\") != kwargs.cend()) {\n      if (kwargs[\"edge\"].find(\"yes_color\") != kwargs[\"edge\"].cend()) {\n        param_.yes_color = kwargs[\"edge\"][\"yes_color\"];\n      }\n      if (kwargs[\"edge\"].find(\"no_color\") != kwargs[\"edge\"].cend()) {\n        param_.no_color = kwargs[\"edge\"][\"no_color\"];\n      }\n      kwargs.erase(\"edge\");\n    }\n    auto const& extra = kwargs[\"graph_attrs\"];\n    static std::string const kGraphTemplate = \"    graph [ {key}=\\\"{value}\\\" ]\\n\";\n    for (auto const& kv : extra) {\n      param_.graph_attrs +=\n          SuperT::Match(kGraphTemplate, {{\"{key}\", kv.first}, {\"{value}\", kv.second}});\n    }\n\n    kwargs.erase(\"graph_attrs\");\n    if (kwargs.size() != 0) {\n      std::stringstream ss;\n      ss << \"The following parameters for graphviz are not recognized:\\n\";\n      for (auto kv : kwargs) {\n        ss << kv.first << \", \";\n      }\n      LOG(WARNING) << ss.str();\n    }\n  }\n\n protected:\n  template <bool is_categorical>\n  std::string BuildEdge(TreeView tree, bst_node_t nidx, int32_t child, bool left) const {\n    static std::string const kEdgeTemplate =\n        \"    {nid} -> {child} [label=\\\"{branch}\\\" color=\\\"{color}\\\"]\\n\";\n    // Is this the default child for missing value?\n    bool is_missing = tree.DefaultChild(nidx) == child;\n    std::string branch;\n    if (is_categorical) {\n      branch = std::string{left ? \"no\" : \"yes\"} + std::string{is_missing ? \", missing\" : \"\"};\n    } else {\n      branch = std::string{left ? \"yes\" : \"no\"} + std::string{is_missing ? \", missing\" : \"\"};\n    }\n    std::string buffer =\n        SuperT::Match(kEdgeTemplate, {{\"{nid}\", std::to_string(nidx)},\n                                      {\"{child}\", std::to_string(child)},\n                                      {\"{color}\", is_missing ? param_.yes_color : param_.no_color},\n                                      {\"{branch}\", branch}});\n    return buffer;\n  }\n\n  // Only indicator is different, so we combine all different node types into this\n  // function.\n  std::string PlainNode(TreeView tree, bst_node_t nidx, uint32_t) const override {\n    auto split_index = tree.SplitIndex(nidx);\n    auto cond = tree.SplitCond(nidx);\n    static std::string const kNodeTemplate =\n        \"    {nid} [ label=\\\"{fname}{<}{cond}{stat}\\\" {params}]\\n\";\n\n    bool has_less = (split_index >= SuperT::fmap_.Size()) ||\n                    SuperT::fmap_.TypeOf(split_index) != FeatureMap::kIndicator;\n    std::string result;\n    if (this->with_stats_) {\n      CHECK(tree::IsScalarTree(tree)) << MTNotImplemented();\n      result =\n          SuperT::Match(kNodeTemplate, {{\"{nid}\", std::to_string(nidx)},\n                                        {\"{fname}\", GetFeatureName(SuperT::fmap_, split_index)},\n                                        {\"{<}\", has_less ? \"<\" : \"\"},\n                                        {\"{cond}\", has_less ? ToStr(cond) : \"\"},\n                                        {\"{stat}\", this->NodeStat(tree, nidx)},\n                                        {\"{params}\", param_.condition_node_params}});\n    } else {\n      result =\n          SuperT::Match(kNodeTemplate, {{\"{nid}\", std::to_string(nidx)},\n                                        {\"{fname}\", GetFeatureName(SuperT::fmap_, split_index)},\n                                        {\"{<}\", has_less ? \"<\" : \"\"},\n                                        {\"{cond}\", has_less ? ToStr(cond) : \"\"},\n                                        {\"{stat}\", \"\"},\n                                        {\"{params}\", param_.condition_node_params}});\n    }\n\n    result += BuildEdge<false>(tree, nidx, tree.LeftChild(nidx), true);\n    result += BuildEdge<false>(tree, nidx, tree.RightChild(nidx), false);\n\n    return result;\n  };\n\n  std::string NodeStat(TreeView tree, bst_node_t nidx) const override {\n    return SuperT::Match(\"\\ngain={gain}\\ncover={cover}\", {{\"{cover}\", ToStr(tree.SumHess(nidx))},\n                                                          {\"{gain}\", ToStr(tree.LossChg(nidx))}});\n  }\n\n  std::string Categorical(TreeView tree, bst_node_t nidx, uint32_t /*depth*/) const override {\n    static std::string const kLabelTemplate =\n        \"    {nid} [ label=\\\"{fname}:{cond}{stat}\\\" {params}]\\n\";\n    auto cats = GetSplitCategories(tree, nidx);\n    auto cats_str = PrintCatsAsSet(cats);\n    auto split_index = tree.SplitIndex(nidx);\n\n    std::string result =\n        SuperT::Match(kLabelTemplate, {{\"{nid}\", std::to_string(nidx)},\n                                       {\"{fname}\", GetFeatureName(SuperT::fmap_, split_index)},\n                                       {\"{cond}\", cats_str},\n                                       {\"{stat}\", this->NodeStat(tree, nidx)},\n                                       {\"{params}\", param_.condition_node_params}});\n\n    result += BuildEdge<true>(tree, nidx, tree.LeftChild(nidx), true);\n    result += BuildEdge<true>(tree, nidx, tree.RightChild(nidx), false);\n\n    return result;\n  }\n\n  std::string LeafNode(TreeView tree, bst_node_t nidx, uint32_t) const override {\n    static std::string const kCoverTemplate = \"\\ncover={cover}\";\n    static std::string const kLeafTemplate =\n        \"    {nid} [ label=\\\"leaf={leaf-value}{cover}\\\" {params}]\\n\";\n    auto value = tree.LeafValue(nidx);\n    return SuperT::Match(\n        kLeafTemplate,\n        {{\"{nid}\", std::to_string(nidx)},\n         {\"{leaf-value}\", ToStr(value)},\n         {\"{cover}\", this->with_stats_\n                         ? SuperT::Match(kCoverTemplate, {{\"{cover}\", ToStr(tree.SumHess(nidx))}})\n                         : \"\"},\n         {\"{params}\", param_.leaf_node_params}});\n  }\n\n  std::string BuildTree(TreeView tree, bst_node_t nidx, uint32_t depth) override {\n    if (tree.IsLeaf(nidx)) {\n      return this->LeafNode(tree, nidx, depth);\n    }\n    static std::string const kNodeTemplate = \"{parent}\\n{left}\\n{right}\";\n    auto node = tree.SplitType(nidx) == FeatureType::kCategorical\n                    ? this->Categorical(tree, nidx, depth)\n                    : this->PlainNode(tree, nidx, depth);\n    auto result = SuperT::Match(\n        kNodeTemplate, {{\"{parent}\", node},\n                        {\"{left}\", this->BuildTree(tree, tree.LeftChild(nidx), depth + 1)},\n                        {\"{right}\", this->BuildTree(tree, tree.RightChild(nidx), depth + 1)}});\n    return result;\n  }\n\n  void BuildTree(TreeView tree) override {\n    static std::string const kTreeTemplate =\n        \"digraph {\\n\"\n        \"    graph [ rankdir={rankdir} ]\\n\"\n        \"{graph_attrs}\\n\"\n        \"{nodes}}\";\n    auto result = SuperT::Match(kTreeTemplate, {{\"{rankdir}\", param_.rankdir},\n                                                {\"{graph_attrs}\", param_.graph_attrs},\n                                                {\"{nodes}\", this->BuildTree(tree, 0, 0)}});\n    SuperT::ss_ << result;\n  };\n};\n\nconstexpr bst_node_t RegTree::kRoot;\n\nvoid TreeParam::FromJson(Json const& in) {\n  auto const& obj = get<Object const>(in);\n  auto n_deleted_it = obj.find(StringView{\"num_deleted\"});\n  if (n_deleted_it != obj.cend()) {\n    // Missing in 1.0 models.\n    this->num_deleted = std::stoi(get<String const>(n_deleted_it->second));\n  }\n  this->num_feature = std::stoul(get<String const>(obj.at(\"num_feature\")));\n  this->num_nodes = std::stoi(get<String const>(obj.at(\"num_nodes\")));\n  this->size_leaf_vector = std::stoul(get<String const>(obj.at(\"size_leaf_vector\")));\n}\n\nvoid TreeParam::ToJson(Json* p_out) const {\n  auto& out = *p_out;\n  out[\"num_deleted\"] = std::to_string(this->num_deleted);\n  out[\"num_feature\"] = std::to_string(this->num_feature);\n  out[\"num_nodes\"] = std::to_string(this->num_nodes);\n  out[\"size_leaf_vector\"] = std::to_string(this->size_leaf_vector);\n}\n\ntemplate <typename TreeView>\nstd::unique_ptr<TreeGenerator<TreeView>> CreateTreeGenerator(std::string const& attrs,\n                                                             FeatureMap const& fmap,\n                                                             bool with_stats) {\n  auto pos = attrs.find(':');\n  std::string name;\n  std::string params;\n  if (pos != std::string::npos) {\n    name = attrs.substr(0, pos);\n    params = attrs.substr(pos + 1, attrs.length() - pos - 1);\n    // Eliminate all occurrences of single quote string.\n    size_t pos = std::string::npos;\n    while ((pos = params.find('\\'')) != std::string::npos) {\n      params.replace(pos, 1, \"\\\"\");\n    }\n  } else {\n    name = attrs;\n  }\n  std::unique_ptr<TreeGenerator<TreeView>> ptr;\n  if (name == \"dot\") {\n    return std::make_unique<GraphvizGenerator<TreeView>>(fmap, params, with_stats);\n  } else if (name == \"text\") {\n    return std::make_unique<TextGenerator<TreeView>>(fmap, with_stats);\n  } else if (name == \"json\") {\n    return std::make_unique<JsonGenerator<TreeView>>(fmap, with_stats);\n  } else {\n    LOG(FATAL) << \"Unknown Model Builder:\" << name;\n  }\n  return {nullptr};\n}\n\nstd::string RegTree::DumpModel(const FeatureMap& fmap, bool with_stats, std::string format) const {\n  auto impl = [](auto builder, auto view) {\n    builder->BuildTree(view);\n    std::string result = builder->Str();\n    return result;\n  };\n  if (this->IsMultiTarget()) {\n    CHECK(!with_stats) << \" Tree dump with statistic \" << MTNotImplemented();\n    return impl(CreateTreeGenerator<tree::MultiTargetTreeView>(format, fmap, with_stats),\n                this->HostMtView());\n  } else {\n    return impl(CreateTreeGenerator<tree::ScalarTreeView>(format, fmap, with_stats),\n                this->HostScView());\n  }\n}\n\nbool RegTree::Equal(const RegTree& b) const {\n  CHECK(!IsMultiTarget());\n  if (NumExtraNodes() != b.NumExtraNodes()) {\n    return false;\n  }\n  auto const& self = *this;\n  bool ret{true};\n  auto sc_tree = this->HostScView();\n  auto const& lhs = self.nodes_.ConstHostVector();\n  auto const& rhs = b.nodes_.ConstHostVector();\n  sc_tree.WalkTree([&](bst_node_t nidx) {\n    if (!(lhs.at(nidx) == rhs.at(nidx))) {\n      ret = false;\n      return false;\n    }\n    return true;\n  });\n  return ret;\n}\n\n[[nodiscard]] bst_node_t RegTree::GetNumLeaves() const {\n  bst_node_t leaves{0};\n  tree::WalkTree(*this, [&leaves](auto const& tree, bst_node_t nidx) {\n    if (tree.IsLeaf(nidx)) {\n      leaves++;\n    }\n    return true;\n  });\n  return leaves;\n}\n\n[[nodiscard]] bst_node_t RegTree::GetNumSplitNodes() const {\n  bst_node_t splits{0};\n  tree::WalkTree(*this, [&splits](auto const& tree, bst_node_t nidx) {\n    if (!tree.IsLeaf(nidx)) {\n      splits++;\n    }\n    return true;\n  });\n  return splits;\n}\n\n[[nodiscard]] bst_node_t RegTree::GetDepth(bst_node_t nidx) const {\n  if (nidx == 0) {\n    return 0;\n  }\n  if (this->IsMultiTarget()) {\n    return this->HostMtView().GetDepth(nidx);\n  }\n  return this->HostScView().GetDepth(nidx);\n}\n\n[[nodiscard]] bst_node_t RegTree::MaxDepth() const {\n  if (this->IsMultiTarget()) {\n    return this->HostMtView().MaxDepth(RegTree::kRoot);\n  }\n  return this->HostScView().MaxDepth(RegTree::kRoot);\n}\n\nvoid RegTree::ExpandNode(bst_node_t nid, unsigned split_index, bst_float split_value,\n                         bool default_left, bst_float base_weight, bst_float left_leaf_weight,\n                         bst_float right_leaf_weight, bst_float loss_change, float sum_hess,\n                         float left_sum, float right_sum, bst_node_t leaf_right_child) {\n  CHECK(!IsMultiTarget());\n  int pleft = this->AllocNode();\n  int pright = this->AllocNode();\n  auto& h_nodes = nodes_.HostVector();\n\n  auto& node = h_nodes[nid];\n  CHECK(node.IsLeaf());\n  node.SetLeftChild(pleft);\n  node.SetRightChild(pright);\n  h_nodes[node.LeftChild()].SetParent(nid, true);\n  h_nodes[node.RightChild()].SetParent(nid, false);\n  node.SetSplit(split_index, split_value, default_left);\n\n  h_nodes[pleft].SetLeaf(left_leaf_weight, leaf_right_child);\n  h_nodes[pright].SetLeaf(right_leaf_weight, leaf_right_child);\n\n  this->Stat(nid) = {loss_change, sum_hess, base_weight};\n  this->Stat(pleft) = {0.0f, left_sum, left_leaf_weight};\n  this->Stat(pright) = {0.0f, right_sum, right_leaf_weight};\n\n  this->split_types_.HostVector().at(nid) = FeatureType::kNumerical;\n}\n\nvoid RegTree::ExpandNode(bst_node_t nidx, bst_feature_t split_index, float split_cond,\n                         bool default_left, linalg::VectorView<float const> base_weight,\n                         linalg::VectorView<float const> left_weight,\n                         linalg::VectorView<float const> right_weight, float loss_chg,\n                         float sum_hess, float left_sum, float right_sum) {\n  CHECK(IsMultiTarget());\n  CHECK_LT(split_index, this->param_.num_feature);\n  CHECK(this->p_mt_tree_);\n  CHECK_GT(param_.size_leaf_vector, 1);\n\n  this->p_mt_tree_->Expand(nidx, split_index, split_cond, default_left, base_weight, left_weight,\n                           right_weight, loss_chg, sum_hess, left_sum, right_sum);\n\n  split_types_.HostVector().resize(this->Size(), FeatureType::kNumerical);\n  split_categories_segments_.HostVector().resize(this->Size());\n  this->split_types_.HostVector().at(nidx) = FeatureType::kNumerical;\n\n  this->param_.num_nodes = this->p_mt_tree_->Size();\n}\n\nvoid RegTree::SetLeaves(std::vector<bst_node_t> leaves, common::Span<float const> weights) {\n  CHECK(IsMultiTarget());\n  this->p_mt_tree_->SetLeaves(std::move(leaves), weights);\n}\n\nvoid RegTree::ExpandCategorical(bst_node_t nidx, bst_feature_t split_index,\n                                common::Span<common::KCatBitField::value_type> split_cat,\n                                bool default_left, bst_float base_weight,\n                                bst_float left_leaf_weight, bst_float right_leaf_weight,\n                                bst_float loss_change, float sum_hess, float left_sum,\n                                float right_sum) {\n  CHECK(!IsMultiTarget());\n  this->ExpandNode(nidx, split_index, DftBadValue(), default_left, base_weight, left_leaf_weight,\n                   right_leaf_weight, loss_change, sum_hess, left_sum, right_sum);\n\n  auto& h_split_categories = split_categories_.HostVector();\n  std::size_t orig_size = h_split_categories.size();\n  h_split_categories.resize(orig_size + split_cat.size());\n  std::copy(split_cat.data(), split_cat.data() + split_cat.size(),\n            h_split_categories.begin() + orig_size);\n\n  this->split_types_.HostVector().at(nidx) = FeatureType::kCategorical;\n\n  auto& h_split_categories_segments = this->split_categories_segments_.HostVector();\n  h_split_categories_segments.at(nidx).beg = orig_size;\n  h_split_categories_segments.at(nidx).size = split_cat.size();\n}\n\nvoid RegTree::ExpandCategorical(bst_node_t nidx, bst_feature_t split_index,\n                                common::Span<common::KCatBitField::value_type> split_cat,\n                                bool default_left, linalg::VectorView<float const> base_weight,\n                                linalg::VectorView<float const> left_weight,\n                                linalg::VectorView<float const> right_weight, float loss_chg,\n                                float sum_hess, float left_sum, float right_sum) {\n  CHECK(IsMultiTarget());\n  this->ExpandNode(nidx, split_index, DftBadValue(), default_left, base_weight, left_weight,\n                   right_weight, loss_chg, sum_hess, left_sum, right_sum);\n\n  auto& h_split_categories = split_categories_.HostVector();\n  std::size_t orig_size = h_split_categories.size();\n  h_split_categories.resize(orig_size + split_cat.size());\n  std::copy(split_cat.data(), split_cat.data() + split_cat.size(),\n            h_split_categories.begin() + orig_size);\n\n  this->split_types_.HostVector().at(nidx) = FeatureType::kCategorical;\n\n  auto& h_split_categories_segments = this->split_categories_segments_.HostVector();\n  h_split_categories_segments.at(nidx).beg = orig_size;\n  h_split_categories_segments.at(nidx).size = split_cat.size();\n}\n\nRegTree* RegTree::Copy() const {\n  auto ptr = new RegTree{};\n  ptr->param_ = this->param_;\n\n  auto copy = [](auto* lhs, auto const& rhs) {\n    lhs->SetDevice(rhs.Device());\n    lhs->Resize(rhs.Size());\n    lhs->Copy(rhs);\n  };\n\n  copy(&ptr->nodes_, this->nodes_);\n  ptr->deleted_nodes_ = this->deleted_nodes_;\n  copy(&ptr->stats_, this->stats_);\n  copy(&ptr->split_types_, this->split_types_);\n  copy(&ptr->split_categories_, this->split_categories_);\n  copy(&ptr->split_categories_segments_, this->split_categories_segments_);\n\n  if (this->p_mt_tree_) {\n    ptr->p_mt_tree_.reset(this->p_mt_tree_->Copy(&ptr->param_));\n  }\n  return ptr;\n}\n\ntree::ScalarTreeView RegTree::HostScView() const { return tree::ScalarTreeView{this}; }\n\ntree::MultiTargetTreeView RegTree::HostMtView() const { return tree::MultiTargetTreeView{this}; }\n\ntemplate <bool typed>\nvoid RegTree::LoadCategoricalSplit(Json const& in) {\n  auto const& categories_segments = get<I64ArrayT<typed>>(in[\"categories_segments\"]);\n  auto const& categories_sizes = get<I64ArrayT<typed>>(in[\"categories_sizes\"]);\n  auto const& categories_nodes = get<I32ArrayT<typed>>(in[\"categories_nodes\"]);\n  auto const& categories = get<I32ArrayT<typed>>(in[\"categories\"]);\n\n  auto split_type = get<U8ArrayT<typed>>(in[\"split_type\"]);\n  bst_node_t n_nodes = split_type.size();\n  std::size_t cnt = 0;\n  bst_node_t last_cat_node = -1;\n  if (!categories_nodes.empty()) {\n    last_cat_node = GetElem<Integer>(categories_nodes, cnt);\n  }\n  // `categories_segments' is only available for categorical nodes to prevent overhead for\n  // numerical node. As a result, we need to track the categorical nodes we have processed\n  // so far.\n  auto& h_split_types = split_types_.HostVector();\n  h_split_types.resize(n_nodes, FeatureType::kNumerical);\n  auto& h_split_categories_segments = split_categories_segments_.HostVector();\n  h_split_categories_segments.resize(n_nodes);\n  auto& h_split_categories = this->split_categories_.HostVector();\n\n  for (bst_node_t nidx = 0; nidx < n_nodes; ++nidx) {\n    h_split_types[nidx] = static_cast<FeatureType>(GetElem<Integer>(split_type, nidx));\n    if (nidx == last_cat_node) {\n      auto j_begin = GetElem<Integer>(categories_segments, cnt);\n      auto j_end = GetElem<Integer>(categories_sizes, cnt) + j_begin;\n      bst_cat_t max_cat{std::numeric_limits<bst_cat_t>::min()};\n      CHECK_GT(j_end - j_begin, 0) << nidx;\n\n      for (auto j = j_begin; j < j_end; ++j) {\n        auto const& category = GetElem<Integer>(categories, j);\n        auto cat = common::AsCat(category);\n        max_cat = std::max(max_cat, cat);\n      }\n      // Have at least 1 category in split.\n      CHECK_NE(std::numeric_limits<bst_cat_t>::min(), max_cat);\n      size_t n_cats = max_cat + 1;  // cat 0\n      size_t size = common::KCatBitField::ComputeStorageSize(n_cats);\n      std::vector<uint32_t> cat_bits_storage(size, 0);\n      common::CatBitField cat_bits{common::Span<uint32_t>(cat_bits_storage)};\n      for (auto j = j_begin; j < j_end; ++j) {\n        cat_bits.Set(common::AsCat(GetElem<Integer>(categories, j)));\n      }\n\n      auto begin = h_split_categories.size();\n      h_split_categories.resize(begin + cat_bits_storage.size());\n      std::copy(cat_bits_storage.begin(), cat_bits_storage.end(),\n                h_split_categories.begin() + begin);\n      h_split_categories_segments[nidx].beg = begin;\n      h_split_categories_segments[nidx].size = cat_bits_storage.size();\n\n      ++cnt;\n      if (cnt == categories_nodes.size()) {\n        last_cat_node = -1;  // Don't break, we still need to initialize the remaining nodes.\n      } else {\n        last_cat_node = GetElem<Integer>(categories_nodes, cnt);\n      }\n    } else {\n      h_split_categories_segments[nidx].beg = categories.size();\n      h_split_categories_segments[nidx].size = 0;\n    }\n  }\n}\n\ntemplate void RegTree::LoadCategoricalSplit<true>(Json const& in);\ntemplate void RegTree::LoadCategoricalSplit<false>(Json const& in);\n\nvoid RegTree::SaveCategoricalSplit(Json* p_out) const {\n  auto& out = *p_out;\n  CHECK_EQ(this->split_types_.Size(), this->Size());\n  CHECK_EQ(this->GetSplitCategoriesPtr().size(), this->Size());\n\n  I64Array categories_segments;\n  I64Array categories_sizes;\n  I32Array categories;        // bst_cat_t = int32_t\n  I32Array categories_nodes;  // bst_note_t = int32_t\n  U8Array split_type(split_types_.Size());\n\n  auto const& h_split_types = this->split_types_.ConstHostVector();\n  auto const& h_split_categories_segments = this->split_categories_segments_.ConstHostVector();\n\n  for (size_t i = 0; i < nodes_.Size(); ++i) {\n    split_type.Set(i, static_cast<std::underlying_type_t<FeatureType>>(h_split_types[i]));\n    if (h_split_types[i] == FeatureType::kCategorical) {\n      categories_nodes.GetArray().emplace_back(static_cast<std::int32_t>(i));\n      auto begin = categories.Size();\n      categories_segments.GetArray().emplace_back(begin);\n      auto segment = h_split_categories_segments[i];\n      auto cat_bits = common::GetNodeCats(this->GetSplitCategories(DeviceOrd::CPU()), segment);\n      for (size_t i = 0; i < cat_bits.Capacity(); ++i) {\n        if (cat_bits.Check(i)) {\n          categories.GetArray().emplace_back(static_cast<std::int32_t>(i));\n        }\n      }\n      size_t size = categories.Size() - begin;\n      categories_sizes.GetArray().emplace_back(size);\n      CHECK_NE(size, 0);\n    }\n  }\n\n  out[\"split_type\"] = std::move(split_type);\n  out[\"categories_segments\"] = std::move(categories_segments);\n  out[\"categories_sizes\"] = std::move(categories_sizes);\n  out[\"categories_nodes\"] = std::move(categories_nodes);\n  out[\"categories\"] = std::move(categories);\n}\n\ntemplate <bool typed, bool feature_is_64>\nvoid LoadModelImpl(Json const& in, TreeParam const& param, std::vector<RTreeNodeStat>* p_stats,\n                   std::vector<RegTree::Node>* p_nodes) {\n  namespace tf = tree_field;\n  auto& stats = *p_stats;\n  auto& nodes = *p_nodes;\n\n  auto n_nodes = param.num_nodes;\n  CHECK_NE(n_nodes, 0);\n  // stats\n  auto const& loss_changes = get<FloatArrayT<typed>>(in[tf::kLossChg]);\n  CHECK_EQ(loss_changes.size(), n_nodes);\n  auto const& sum_hessian = get<FloatArrayT<typed>>(in[tf::kSumHess]);\n  CHECK_EQ(sum_hessian.size(), n_nodes);\n  auto const& base_weights = get<FloatArrayT<typed>>(in[tf::kBaseWeight]);\n  CHECK_EQ(base_weights.size(), n_nodes);\n  // nodes\n  auto const& lefts = get<I32ArrayT<typed>>(in[tf::kLeft]);\n  CHECK_EQ(lefts.size(), n_nodes);\n  auto const& rights = get<I32ArrayT<typed>>(in[tf::kRight]);\n  CHECK_EQ(rights.size(), n_nodes);\n  auto const& parents = get<I32ArrayT<typed>>(in[tf::kParent]);\n  CHECK_EQ(parents.size(), n_nodes);\n  auto const& indices = get<IndexArrayT<typed, feature_is_64>>(in[tf::kSplitIdx]);\n  CHECK_EQ(indices.size(), n_nodes);\n  auto const& conds = get<FloatArrayT<typed>>(in[tf::kSplitCond]);\n  CHECK_EQ(conds.size(), n_nodes);\n  auto const& default_left = get<U8ArrayT<typed>>(in[tf::kDftLeft]);\n  CHECK_EQ(default_left.size(), n_nodes);\n\n  // Initialization\n  stats = std::remove_reference_t<decltype(stats)>(n_nodes);\n  nodes = std::remove_reference_t<decltype(nodes)>(n_nodes);\n\n  static_assert(std::is_integral_v<decltype(GetElem<Integer>(lefts, 0))>);\n  static_assert(std::is_floating_point_v<decltype(GetElem<Number>(loss_changes, 0))>);\n\n  // Set node\n  for (int32_t i = 0; i < n_nodes; ++i) {\n    auto& s = stats[i];\n    s.loss_chg = GetElem<Number>(loss_changes, i);\n    s.sum_hess = GetElem<Number>(sum_hessian, i);\n    s.base_weight = GetElem<Number>(base_weights, i);\n\n    auto& n = nodes[i];\n    bst_node_t left = GetElem<Integer>(lefts, i);\n    bst_node_t right = GetElem<Integer>(rights, i);\n    bst_node_t parent = GetElem<Integer>(parents, i);\n    bst_feature_t ind = GetElem<Integer>(indices, i);\n    float cond{GetElem<Number>(conds, i)};\n    bool dft_left{GetElem<Boolean>(default_left, i)};\n    n = RegTree::Node{left, right, parent, ind, cond, dft_left};\n  }\n}\n\nvoid RegTree::LoadModel(Json const& in) {\n  namespace tf = tree_field;\n\n  bool typed = IsA<I32Array>(in[tf::kParent]);\n  auto const& in_obj = get<Object const>(in);\n  // basic properties\n  param_.FromJson(in[\"tree_param\"]);\n  // categorical splits\n  bool has_cat = in_obj.find(\"split_type\") != in_obj.cend();\n  if (has_cat) {\n    if (typed) {\n      this->LoadCategoricalSplit<true>(in);\n    } else {\n      this->LoadCategoricalSplit<false>(in);\n    }\n  }\n  // multi-target\n  if (param_.size_leaf_vector > 1) {\n    this->p_mt_tree_.reset(new MultiTargetTree{&param_});\n    this->GetMultiTargetTree()->LoadModel(in);\n    return;\n  }\n\n  bool feature_is_64 = IsA<I64Array>(in[\"split_indices\"]);\n  auto& h_stats = this->stats_.HostVector();\n  auto& h_nodes = this->nodes_.HostVector();\n  if (typed && feature_is_64) {\n    LoadModelImpl<true, true>(in, param_, &h_stats, &h_nodes);\n  } else if (typed && !feature_is_64) {\n    LoadModelImpl<true, false>(in, param_, &h_stats, &h_nodes);\n  } else if (!typed && feature_is_64) {\n    LoadModelImpl<false, true>(in, param_, &h_stats, &h_nodes);\n  } else {\n    LoadModelImpl<false, false>(in, param_, &h_stats, &h_nodes);\n  }\n\n  if (!has_cat) {\n    this->split_categories_segments_.HostVector().resize(this->param_.num_nodes);\n    auto& h_split_types = this->split_types_.HostVector();\n    h_split_types.resize(this->param_.num_nodes);\n    std::fill(h_split_types.begin(), h_split_types.end(), FeatureType::kNumerical);\n  }\n\n  deleted_nodes_.clear();\n  for (bst_node_t i = 1; i < param_.num_nodes; ++i) {\n    if (h_nodes[i].IsDeleted()) {\n      deleted_nodes_.push_back(i);\n    }\n  }\n  // easier access to [] operator\n  auto& self = *this;\n  for (auto nid = 1; nid < param_.num_nodes; ++nid) {\n    auto parent = self[nid].Parent();\n    CHECK_NE(parent, RegTree::kInvalidNodeId);\n    self[nid].SetParent(self[nid].Parent(), self[parent].LeftChild() == nid);\n  }\n  CHECK_EQ(static_cast<bst_node_t>(deleted_nodes_.size()), param_.num_deleted);\n  CHECK_EQ(this->split_categories_segments_.Size(), param_.num_nodes);\n}\n\nvoid RegTree::SaveModel(Json* p_out) const {\n  auto& out = *p_out;\n  // basic properties\n  out[\"tree_param\"] = Object{};\n  param_.ToJson(&out[\"tree_param\"]);\n  // categorical splits\n  this->SaveCategoricalSplit(p_out);\n  // multi-target\n  if (this->IsMultiTarget()) {\n    CHECK_GT(param_.size_leaf_vector, 1);\n    this->GetMultiTargetTree()->SaveModel(p_out);\n    return;\n  }\n  /*  Here we are treating leaf node and internal node equally.  Some information like\n   *  child node id doesn't make sense for leaf node but we will have to save them to\n   *  avoid creating a huge map.  One difficulty is XGBoost has deleted node created by\n   *  pruner, and this pruner can be used inside another updater so leaf are not necessary\n   *  at the end of node array.\n   */\n  CHECK_EQ(param_.num_nodes, static_cast<int>(nodes_.Size()));\n  CHECK_EQ(param_.num_nodes, static_cast<int>(stats_.Size()));\n\n  CHECK_EQ(get<String>(out[\"tree_param\"][\"num_nodes\"]), std::to_string(param_.num_nodes));\n  auto n_nodes = param_.num_nodes;\n\n  // stats\n  F32Array loss_changes(n_nodes);\n  F32Array sum_hessian(n_nodes);\n  F32Array base_weights(n_nodes);\n\n  // nodes\n  I32Array lefts(n_nodes);\n  I32Array rights(n_nodes);\n  I32Array parents(n_nodes);\n\n  F32Array conds(n_nodes);\n  U8Array default_left(n_nodes);\n  CHECK_EQ(this->split_types_.Size(), param_.num_nodes);\n\n  namespace tf = tree_field;\n\n  auto const& h_nodes = this->nodes_.ConstHostVector();\n  auto const& h_stats = this->stats_.ConstHostVector();\n\n  auto save_tree = [&](auto* p_indices_array) {\n    auto& indices_array = *p_indices_array;\n    for (bst_node_t i = 0; i < n_nodes; ++i) {\n      auto const& s = h_stats[i];\n      loss_changes.Set(i, s.loss_chg);\n      sum_hessian.Set(i, s.sum_hess);\n      base_weights.Set(i, s.base_weight);\n\n      auto const& n = h_nodes[i];\n      lefts.Set(i, n.LeftChild());\n      rights.Set(i, n.RightChild());\n      parents.Set(i, n.Parent());\n      indices_array.Set(i, n.SplitIndex());\n      conds.Set(i, n.SplitCond());\n      default_left.Set(i, static_cast<uint8_t>(!!n.DefaultLeft()));\n    }\n  };\n  if (this->param_.num_feature > static_cast<bst_feature_t>(std::numeric_limits<int32_t>::max())) {\n    I64Array indices_64(n_nodes);\n    save_tree(&indices_64);\n    out[tf::kSplitIdx] = std::move(indices_64);\n  } else {\n    I32Array indices_32(n_nodes);\n    save_tree(&indices_32);\n    out[tf::kSplitIdx] = std::move(indices_32);\n  }\n\n  out[tf::kLossChg] = std::move(loss_changes);\n  out[tf::kSumHess] = std::move(sum_hessian);\n  out[tf::kBaseWeight] = std::move(base_weights);\n\n  out[tf::kLeft] = std::move(lefts);\n  out[tf::kRight] = std::move(rights);\n  out[tf::kParent] = std::move(parents);\n\n  out[tf::kSplitCond] = std::move(conds);\n  out[tf::kDftLeft] = std::move(default_left);\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "src/tree/tree_updater.cc",
    "content": "/**\n * Copyright 2015-2023 by XGBoost Contributors\n * \\file tree_updater.cc\n * \\brief Registry of tree updaters.\n */\n#include \"xgboost/tree_updater.h\"\n\n#include <dmlc/registry.h>\n\n#include <string>  // for string\n\nnamespace dmlc {\nDMLC_REGISTRY_ENABLE(::xgboost::TreeUpdaterReg);\n}  // namespace dmlc\n\nnamespace xgboost {\nTreeUpdater* TreeUpdater::Create(const std::string& name, Context const* ctx, ObjInfo const* task) {\n  auto* e = ::dmlc::Registry< ::xgboost::TreeUpdaterReg>::Get()->Find(name);\n  if (e == nullptr) {\n    LOG(FATAL) << \"Unknown tree updater \" << name;\n  }\n  auto p_updater = (e->body)(ctx, task);\n  return p_updater;\n}\n}  // namespace xgboost\n\nnamespace xgboost::tree {\n// List of files that will be force linked in static links.\nDMLC_REGISTRY_LINK_TAG(updater_colmaker);\nDMLC_REGISTRY_LINK_TAG(updater_refresh);\nDMLC_REGISTRY_LINK_TAG(updater_prune);\nDMLC_REGISTRY_LINK_TAG(updater_quantile_hist);\nDMLC_REGISTRY_LINK_TAG(updater_approx);\nDMLC_REGISTRY_LINK_TAG(updater_sync);\n#ifdef XGBOOST_USE_CUDA\nDMLC_REGISTRY_LINK_TAG(updater_gpu_hist);\n#endif  // XGBOOST_USE_CUDA\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "src/tree/tree_view.cc",
    "content": "/**\n * Copyright 2025-2026, XGBoost Contributors\n */\n#include \"tree_view.h\"\n\n#include \"xgboost/context.h\"             // for Context\n#include \"xgboost/host_device_vector.h\"  // for HostDeviceVector\n#include \"xgboost/linalg.h\"              // for MakeTensorView\n#include \"xgboost/span.h\"                // for Span\n\nnamespace xgboost::tree {\nnamespace {\ntemplate <typename T>\nauto DispatchPtr(DeviceOrd device, HostDeviceVector<T> const& vec) {\n  if (device.IsCPU()) {\n    return vec.ConstHostPointer();\n  }\n  vec.SetDevice(device);\n  return vec.ConstDevicePointer();\n}\n\nauto DispatchWeight(DeviceOrd device, RegTree const* tree) {\n  auto const* mt_tree = tree->GetMultiTargetTree();\n  auto n_targets = mt_tree->NumTargets();\n  auto n_leaves = mt_tree->NumLeaves();\n  common::Span<float const> weights = tree->GetMultiTargetTree()->LeafWeights(device);\n  if (n_leaves > 0) {\n    CHECK(!weights.empty());\n  }\n  return linalg::MakeTensorView(device, weights, n_leaves, n_targets);\n}\n}  // namespace\n\nScalarTreeView::ScalarTreeView(DeviceOrd device, bool need_stat, RegTree const* tree)\n    : CategoriesMixIn{tree->GetCategoriesMatrix(device)},\n      nodes{tree->GetNodes(device).data()},\n      stats{need_stat ? tree->GetStats(device).data() : nullptr},\n      n{tree->NumNodes()} {\n  CHECK(!tree->IsMultiTarget());\n}\n\nMultiTargetTreeView::MultiTargetTreeView(DeviceOrd device, bool need_stat, RegTree const* tree)\n    : CategoriesMixIn{tree->GetCategoriesMatrix(device)},\n      left{DispatchPtr(device, tree->GetMultiTargetTree()->left_)},\n      right{DispatchPtr(device, tree->GetMultiTargetTree()->right_)},\n      parent{DispatchPtr(device, tree->GetMultiTargetTree()->parent_)},\n      split_index{DispatchPtr(device, tree->GetMultiTargetTree()->split_index_)},\n      default_left{DispatchPtr(device, tree->GetMultiTargetTree()->default_left_)},\n      split_conds{DispatchPtr(device, tree->GetMultiTargetTree()->split_conds_)},\n      n{tree->NumNodes()},\n      leaf_weights{DispatchWeight(device, tree)},\n      loss_chg{need_stat ? DispatchPtr(device, tree->GetMultiTargetTree()->loss_chg_) : nullptr},\n      sum_hess{need_stat ? DispatchPtr(device, tree->GetMultiTargetTree()->sum_hess_) : nullptr} {}\n\nMultiTargetTreeView::MultiTargetTreeView(RegTree const* tree)\n    : MultiTargetTreeView{DeviceOrd::CPU(), true, tree} {}\n\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "src/tree/tree_view.h",
    "content": "/**\n * Copyright 2025-2026, XGBoost Contributors\n *\n * The file provides views for two tree models. We hope to eventually unify them, but the\n * original scalar tree `Node` struct is used extensively in the codebase.\n */\n#pragma once\n#include <algorithm>  // for max\n#include <cstdint>    // for uint8_t\n#include <stack>      // for stack\n#include <utility>    // for move\n\n#include \"../common/type.h\"      // for GetValueT\n#include \"xgboost/base.h\"        // for bst_node_t\n#include \"xgboost/context.h\"     // for DeviceOrd\n#include \"xgboost/tree_model.h\"  // for RegTree\n\nnamespace xgboost::tree {\ntemplate <typename Base>\nstruct WalkTreeMixIn {\n  /**\n   * @brief Iterate through all nodes in this tree.\n   *\n   * @param Function that accepts a node index, and returns false when iteration should\n   *        stop, otherwise returns true.\n   */\n  template <typename Fn>\n  void WalkTree(Fn&& func) const {\n    std::stack<bst_node_t> nodes;\n    nodes.push(RegTree::kRoot);\n    auto self = static_cast<Base const*>(this);\n    while (!nodes.empty()) {\n      auto nidx = nodes.top();\n      nodes.pop();\n      if (!func(nidx)) {\n        return;\n      }\n      auto left = self->LeftChild(nidx);\n      auto right = self->RightChild(nidx);\n      if (!self->IsLeaf(nidx)) {\n        nodes.push(left);\n        nodes.push(right);\n      }\n    }\n  }\n\n  /**\n   * @brief Get the depth of a node.\n   * @param nidx node id\n   */\n  [[nodiscard]] bst_node_t GetDepth(bst_node_t nidx) const {\n    bst_node_t depth = 0;\n    auto self = static_cast<Base const*>(this);\n    while (!self->IsRoot(nidx)) {\n      ++depth;\n      nidx = self->Parent(nidx);\n    }\n    return depth;\n  }\n\n  [[nodiscard]] bst_node_t MaxDepth(bst_node_t nidx) const {\n    auto self = static_cast<Base const*>(this);\n    if (self->IsLeaf(nidx)) {\n      return 0;\n    }\n    return std::max(this->MaxDepth(self->LeftChild(nidx)) + 1,\n                    this->MaxDepth(self->RightChild(nidx)) + 1);\n  }\n  [[nodiscard]] bst_node_t MaxDepth() const { return this->MaxDepth(RegTree::kRoot); }\n};\n\nstruct CategoriesMixIn {\n  RegTree::CategoricalSplitMatrix cats;\n\n  [[nodiscard]] XGBOOST_DEVICE bool HasCategoricalSplit() const { return !cats.categories.empty(); }\n  [[nodiscard]] XGBOOST_DEVICE RegTree::CategoricalSplitMatrix const& GetCategoriesMatrix() const {\n    return cats;\n  }\n  /**\n   * @brief Get the bit storage of categories used by a node.\n   */\n  [[nodiscard]] XGBOOST_DEVICE common::Span<uint32_t const> NodeCats(bst_node_t nidx) const {\n    auto node_ptr = this->GetCategoriesMatrix().node_ptr;\n    auto categories = this->GetCategoriesMatrix().categories;\n    auto segment = node_ptr[nidx];\n    auto node_cats = categories.subspan(segment.beg, segment.size);\n    return node_cats;\n  }\n  [[nodiscard]] XGBOOST_DEVICE FeatureType SplitType(bst_node_t nidx) const {\n    return cats.split_type[nidx];\n  }\n};\n\n/**\n * @brief Tree view for scalar leaf.\n */\nstruct ScalarTreeView : public WalkTreeMixIn<ScalarTreeView>, public CategoriesMixIn {\n  static bst_node_t constexpr InvalidNodeId() { return RegTree::kInvalidNodeId; }\n  static constexpr bst_node_t RootId() { return RegTree::kRoot; }\n\n  RegTree::Node const* nodes;\n\n  RTreeNodeStat const* stats;\n  // The number of nodes\n  bst_node_t n{0};\n\n  [[nodiscard]] XGBOOST_DEVICE bool IsLeaf(bst_node_t nidx) const { return nodes[nidx].IsLeaf(); }\n  [[nodiscard]] XGBOOST_DEVICE bst_node_t Parent(bst_node_t nidx) const {\n    return nodes[nidx].Parent();\n  }\n  [[nodiscard]] XGBOOST_DEVICE bst_node_t LeftChild(bst_node_t nidx) const {\n    return nodes[nidx].LeftChild();\n  }\n  [[nodiscard]] XGBOOST_DEVICE bst_node_t RightChild(bst_node_t nidx) const {\n    return nodes[nidx].RightChild();\n  }\n  [[nodiscard]] XGBOOST_DEVICE bst_feature_t SplitIndex(bst_node_t nidx) const {\n    return nodes[nidx].SplitIndex();\n  }\n  [[nodiscard]] XGBOOST_DEVICE bool IsDeleted(bst_node_t nidx) const {\n    return nodes[nidx].IsDeleted();\n  }\n  [[nodiscard]] XGBOOST_DEVICE float SplitCond(bst_node_t nidx) const {\n    return nodes[nidx].SplitCond();\n  }\n  [[nodiscard]] XGBOOST_DEVICE bool DefaultLeft(bst_node_t nidx) const {\n    return nodes[nidx].DefaultLeft();\n  }\n  [[nodiscard]] XGBOOST_DEVICE bool IsLeftChild(bst_node_t nidx) const {\n    return nodes[nidx].IsLeftChild();\n  }\n  [[nodiscard]] XGBOOST_DEVICE bst_node_t DefaultChild(bst_node_t nidx) const {\n    return this->DefaultLeft(nidx) ? this->LeftChild(nidx) : this->RightChild(nidx);\n  }\n  [[nodiscard]] XGBOOST_DEVICE float LeafValue(bst_node_t nidx) const {\n    return this->nodes[nidx].LeafValue();\n  }\n\n  [[nodiscard]] bst_target_t NumTargets() const { return 1; }\n  [[nodiscard]] XGBOOST_DEVICE bst_node_t Size() const { return this->n; }\n  [[nodiscard]] XGBOOST_DEVICE bool IsRoot(bst_node_t nidx) const {\n    return this->nodes[nidx].IsRoot();\n  }\n\n  [[nodiscard]] RTreeNodeStat const& Stat(bst_node_t nidx) const { return stats[nidx]; }\n  [[nodiscard]] XGBOOST_DEVICE auto SumHess(bst_node_t nidx) const { return stats[nidx].sum_hess; }\n  [[nodiscard]] XGBOOST_DEVICE auto LossChg(bst_node_t nidx) const { return stats[nidx].loss_chg; }\n\n  XGBOOST_DEVICE explicit ScalarTreeView(RegTree::Node const* nodes, RTreeNodeStat const* stats,\n                                         RegTree::CategoricalSplitMatrix cats, bst_node_t n_nodes)\n      : CategoriesMixIn{std::move(cats)}, nodes{nodes}, stats{stats}, n{n_nodes} {}\n\n  /**\n   * @brief Create a device view\n   *\n   * @param need_stat We can skip the stat when performing normal inference.\n   */\n  explicit ScalarTreeView(DeviceOrd device, bool need_stat, RegTree const* tree);\n  /** @brief Create a host view */\n  explicit ScalarTreeView(RegTree const* tree)\n      : CategoriesMixIn{tree->GetCategoriesMatrix(DeviceOrd::CPU())},\n        nodes{tree->GetNodes(DeviceOrd::CPU()).data()},\n        stats{tree->GetStats(DeviceOrd::CPU()).data()},\n        n{tree->NumNodes()} {\n    CHECK(!tree->IsMultiTarget());\n  }\n};\n\n/**\n * @brief A view to the @ref MultiTargetTree suitable for both host and device.\n */\nstruct MultiTargetTreeView : public WalkTreeMixIn<MultiTargetTreeView>, public CategoriesMixIn {\n  static bst_node_t constexpr InvalidNodeId() { return MultiTargetTree::InvalidNodeId(); }\n\n  bst_node_t const* left;\n  bst_node_t const* right;\n  bst_node_t const* parent;\n\n  bst_feature_t const* split_index;\n  std::uint8_t const* default_left;\n  float const* split_conds;\n\n  // The number of nodes\n  bst_node_t n{0};\n\n  linalg::MatrixView<float const> leaf_weights;\n\n  // Statistics\n  float const* loss_chg{nullptr};\n  float const* sum_hess{nullptr};\n\n  [[nodiscard]] XGBOOST_DEVICE bool IsLeaf(bst_node_t nidx) const {\n    return left[nidx] == InvalidNodeId();\n  }\n\n  [[nodiscard]] XGBOOST_DEVICE bst_node_t Parent(bst_node_t nidx) const { return parent[nidx]; }\n  [[nodiscard]] XGBOOST_DEVICE bst_node_t LeftChild(bst_node_t nidx) const { return left[nidx]; }\n  [[nodiscard]] XGBOOST_DEVICE bst_node_t RightChild(bst_node_t nidx) const { return right[nidx]; }\n\n  [[nodiscard]] bool IsLeftChild(bst_node_t nidx) const {\n    auto p = this->Parent(nidx);\n    return nidx == this->LeftChild(p);\n  }\n  [[nodiscard]] XGBOOST_DEVICE bst_feature_t SplitIndex(bst_node_t nidx) const {\n    return split_index[nidx];\n  }\n  [[nodiscard]] XGBOOST_DEVICE float SplitCond(bst_node_t nidx) const { return split_conds[nidx]; }\n  [[nodiscard]] XGBOOST_DEVICE bool DefaultLeft(bst_node_t nidx) const {\n    return default_left[nidx];\n  }\n  [[nodiscard]] XGBOOST_DEVICE bst_node_t DefaultChild(bst_node_t nidx) const {\n    return this->DefaultLeft(nidx) ? this->LeftChild(nidx) : this->RightChild(nidx);\n  }\n  [[nodiscard]] XGBOOST_DEVICE linalg::VectorView<float const> LeafValue(bst_node_t nidx) const {\n    auto leaf_idx = this->right[nidx];\n    return this->leaf_weights.Slice(leaf_idx, linalg::All());\n  }\n\n  [[nodiscard]] bst_target_t NumTargets() const { return this->leaf_weights.Shape(1); }\n  [[nodiscard]] bst_node_t Size() const { return this->n; }\n  [[nodiscard]] XGBOOST_DEVICE bool IsRoot(bst_node_t nidx) const { return nidx == RegTree::kRoot; }\n\n  // These methods require need_stat=true when constructing the view.\n  // Will crash with nullptr dereference if stats were not loaded.\n  [[nodiscard]] float SumHess(bst_node_t nidx) const { return sum_hess[nidx]; }\n  [[nodiscard]] float LossChg(bst_node_t nidx) const { return loss_chg[nidx]; }\n  /**\n   * @brief Create a device view\n   *\n   * @param need_stat We can skip the stat when performing normal inference.\n   */\n  explicit MultiTargetTreeView(DeviceOrd device, bool need_stat, RegTree const* tree);\n  /** @brief Create a host view */\n  explicit MultiTargetTreeView(RegTree const* tree);\n};\n\n/**\n * @brief Iterate through all nodes in a tree.\n *\n * @param tree  The tree to traversal\n * @param fn    See @ref WalkTreeMixIn , addition tree views are passed into the function if @ref\n *              trees is not empty.\n * @param trees Additional trees that have the same target type as @ref tree . We can\n *              dispatch all trees together for easier access.\n */\ntemplate <typename Fn, typename... Tree>\nvoid WalkTree(RegTree const& tree, Fn&& fn, Tree const&... trees) {\n  if (tree.IsMultiTarget()) {\n    auto mt_tree = tree.HostMtView();\n    mt_tree.WalkTree([&](bst_node_t nidx) { return fn(mt_tree, trees.HostMtView()..., nidx); });\n  } else {\n    auto sc_tree = tree.HostScView();\n    sc_tree.WalkTree([&](bst_node_t nidx) { return fn(sc_tree, trees.HostScView()..., nidx); });\n  }\n}\n\ntemplate <typename TreeView>\n[[nodiscard]] bool constexpr IsScalarTree() {\n  return std::is_same_v<common::GetValueT<TreeView>, ScalarTreeView>;\n}\n\ntemplate <typename TreeView>\n[[nodiscard]] bool constexpr IsScalarTree(TreeView const&) {\n  return IsScalarTree<TreeView>();\n}\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "src/tree/updater_approx.cc",
    "content": "/**\n * Copyright 2021-2026, XGBoost contributors\n *\n * \\brief Implementation for the approx tree method.\n */\n#include <algorithm>  // for max, transform, fill_n\n#include <cstddef>    // for size_t\n#include <map>        // for map\n#include <memory>     // for allocator, unique_ptr, make_shared, make_unique\n#include <utility>    // for move\n#include <vector>     // for vector\n\n#include \"../collective/aggregator.h\"        // for GlobalSum\n#include \"../collective/communicator-inl.h\"  // for IsDistributed\n#include \"../common/hist_util.h\"             // for HistogramCuts\n#include \"../common/random.h\"                // for ColumnSampler\n#include \"../common/timer.h\"                 // for Monitor\n#include \"../data/gradient_index.h\"          // for GHistIndexMatrix\n#include \"common_row_partitioner.h\"          // for CommonRowPartitioner\n#include \"dmlc/registry.h\"                   // for DMLC_REGISTRY_FILE_TAG\n#include \"driver.h\"                          // for Driver\n#include \"hist/evaluate_splits.h\"            // for HistEvaluator, UpdatePredictionCacheImpl\n#include \"hist/expand_entry.h\"               // for CPUExpandEntry\n#include \"hist/hist_param.h\"                 // for HistMakerTrainParam\n#include \"hist/histogram.h\"                  // for MultiHistogramBuilder\n#include \"hist/sampler.h\"                    // for Sampler\n#include \"param.h\"                           // for GradStats, TrainParam\n#include \"xgboost/base.h\"                    // for Args, GradientPair, bst_node_t, bst_bin_t\n#include \"xgboost/context.h\"                 // for Context\n#include \"xgboost/data.h\"                    // for DMatrix, BatchSet, BatchIterator, MetaInfo\n#include \"xgboost/gradient.h\"                // for GradientContainer\n#include \"xgboost/host_device_vector.h\"      // for HostDeviceVector\n#include \"xgboost/json.h\"                    // for Object, Json, FromJson, ToJson, get\n#include \"xgboost/linalg.h\"                  // for Matrix, MakeTensorView, Empty, MatrixView\n#include \"xgboost/logging.h\"                 // for LogCheck_EQ, CHECK_EQ, CHECK\n#include \"xgboost/span.h\"                    // for Span\n#include \"xgboost/task.h\"                    // for ObjInfo\n#include \"xgboost/tree_model.h\"              // for RegTree, RTreeNodeStat\n#include \"xgboost/tree_updater.h\"            // for TreeUpdater, TreeUpdaterReg, XGBOOST_REGISTE...\n\nnamespace xgboost::tree {\n\nDMLC_REGISTRY_FILE_TAG(updater_approx);\n\nnamespace {\n// Return the BatchParam used by DMatrix.\nauto BatchSpec(TrainParam const &p, common::Span<float> hess, ObjInfo const task) {\n  return BatchParam{p.max_bin, hess, !task.const_hess};\n}\n\nauto BatchSpec(TrainParam const &p, common::Span<float> hess) {\n  return BatchParam{p.max_bin, hess, false};\n}\n}  // anonymous namespace\n\nclass GlobalApproxBuilder {\n protected:\n  TrainParam const *param_;\n  HistMakerTrainParam const *hist_param_{nullptr};\n  std::shared_ptr<common::ColumnSampler> col_sampler_;\n  HistEvaluator evaluator_;\n  MultiHistogramBuilder histogram_builder_;\n  Context const *ctx_;\n  ObjInfo const *const task_;\n\n  std::vector<CommonRowPartitioner> partitioner_;\n  // Pointer to last updated tree, used for update prediction cache.\n  RegTree *p_last_tree_{nullptr};\n  common::Monitor *monitor_;\n  size_t n_batches_{0};\n  // Cache for histogram cuts.\n  common::HistogramCuts feature_values_{0};\n\n public:\n  void InitData(DMatrix *p_fmat, RegTree const *p_tree, common::Span<float> hess) {\n    monitor_->Start(__func__);\n\n    n_batches_ = 0;\n    bst_bin_t n_total_bins = 0;\n    partitioner_.clear();\n    // Generating the GHistIndexMatrix is quite slow, is there a way to speed it up?\n    for (auto const &page :\n         p_fmat->GetBatches<GHistIndexMatrix>(ctx_, BatchSpec(*param_, hess, *task_))) {\n      if (n_total_bins == 0) {\n        n_total_bins = page.cut.TotalBins();\n        feature_values_ = page.cut;\n      } else {\n        CHECK_EQ(n_total_bins, page.cut.TotalBins());\n      }\n      partitioner_.emplace_back(this->ctx_, page.Size(), page.base_rowid,\n                                p_fmat->Info().IsColumnSplit());\n      n_batches_++;\n    }\n\n    histogram_builder_.Reset(ctx_, n_total_bins, p_tree->NumTargets(), BatchSpec(*param_, hess),\n                             collective::IsDistributed(), p_fmat->Info().IsColumnSplit(),\n                             hist_param_);\n    monitor_->Stop(__func__);\n  }\n\n  CPUExpandEntry InitRoot(DMatrix *p_fmat, std::vector<GradientPair> const &gpair,\n                          common::Span<float> hess, RegTree *p_tree) {\n    monitor_->Start(__func__);\n    CPUExpandEntry best;\n    best.nid = RegTree::kRoot;\n    best.depth = 0;\n    GradStats root_sum;\n    for (auto const &g : gpair) {\n      root_sum.Add(g);\n    }\n    auto rc = collective::GlobalSum(ctx_, p_fmat->Info(),\n                                    linalg::MakeVec(reinterpret_cast<double *>(&root_sum), 2));\n    collective::SafeColl(rc);\n\n    std::vector<CPUExpandEntry> nodes{best};\n    this->histogram_builder_.BuildRootHist(p_fmat, p_tree->HostScView(), partitioner_,\n                                           linalg::MakeTensorView(ctx_, gpair, gpair.size(), 1),\n                                           best, BatchSpec(*param_, hess));\n\n    auto weight = evaluator_.InitRoot(root_sum);\n    p_tree->Stat(RegTree::kRoot).sum_hess = root_sum.GetHess();\n    p_tree->Stat(RegTree::kRoot).base_weight = weight;\n    (*p_tree)[RegTree::kRoot].SetLeaf(param_->learning_rate * weight);\n\n    auto const &histograms = histogram_builder_.Histogram(0);\n    auto ft = p_fmat->Info().feature_types.ConstHostSpan();\n    evaluator_.EvaluateSplits(histograms, feature_values_, ft, *p_tree, &nodes);\n    monitor_->Stop(__func__);\n\n    return nodes.front();\n  }\n\n  void UpdatePredictionCache(DMatrix const *p_fmat, common::Span<bst_node_t const> node_position,\n                             linalg::MatrixView<float> out_preds) const {\n    monitor_->Start(__func__);\n    // Caching prediction seems redundant for approx tree method, as sketching takes up\n    // majority of training time.\n    CHECK_EQ(out_preds.Size(), p_fmat->Info().num_row_);\n    CHECK_EQ(node_position.size(), p_fmat->Info().num_row_);\n    UpdatePredictionCacheImpl(ctx_, p_last_tree_, node_position, out_preds);\n    monitor_->Stop(__func__);\n  }\n\n  void BuildHistogram(DMatrix *p_fmat, RegTree *p_tree,\n                      std::vector<CPUExpandEntry> const &valid_candidates,\n                      std::vector<GradientPair> const &gpair, common::Span<float> hess) {\n    monitor_->Start(__func__);\n    this->histogram_builder_.BuildHistLeftRight(\n        ctx_, p_fmat, p_tree->HostScView(), partitioner_, valid_candidates,\n        linalg::MakeTensorView(ctx_, gpair, gpair.size(), 1), BatchSpec(*param_, hess));\n    monitor_->Stop(__func__);\n  }\n\n  void LeafPartition(RegTree const &tree, common::Span<float const> hess,\n                     std::vector<bst_node_t> *p_out_position) {\n    monitor_->Start(__func__);\n    p_out_position->resize(hess.size());\n    for (auto const &part : partitioner_) {\n      part.LeafPartition(ctx_, tree.HostScView(), hess,\n                         common::Span{p_out_position->data(), p_out_position->size()});\n    }\n    monitor_->Stop(__func__);\n  }\n\n public:\n  explicit GlobalApproxBuilder(TrainParam const *param, HistMakerTrainParam const *hist_param,\n                               MetaInfo const &info, Context const *ctx,\n                               std::shared_ptr<common::ColumnSampler> column_sampler,\n                               ObjInfo const *task, common::Monitor *monitor)\n      : param_{param},\n        hist_param_{hist_param},\n        col_sampler_{std::move(column_sampler)},\n        evaluator_{ctx, param_, info, col_sampler_},\n        ctx_{ctx},\n        task_{task},\n        monitor_{monitor} {}\n\n  void UpdateTree(DMatrix *p_fmat, std::vector<GradientPair> const &gpair, common::Span<float> hess,\n                  RegTree *p_tree, HostDeviceVector<bst_node_t> *p_out_position) {\n    p_last_tree_ = p_tree;\n    this->InitData(p_fmat, p_tree, hess);\n\n    Driver<CPUExpandEntry> driver(*param_);\n    auto &tree = *p_tree;\n    driver.Push({this->InitRoot(p_fmat, gpair, hess, p_tree)});\n    auto expand_set = driver.Pop();\n\n    /**\n     * Note for update position\n     * Root:\n     *   Not applied: No need to update position as initialization has got all the rows ordered.\n     *   Applied: Update position is run on applied nodes so the rows are partitioned.\n     * Non-root:\n     *   Not applied: That node is root of the subtree, same rule as root.\n     *   Applied: Ditto\n     */\n\n    while (!expand_set.empty()) {\n      // candidates that can be further splited.\n      std::vector<CPUExpandEntry> valid_candidates;\n      // candidates that can be applied.\n      std::vector<CPUExpandEntry> applied;\n      for (auto const &candidate : expand_set) {\n        evaluator_.ApplyTreeSplit(candidate, p_tree);\n        applied.push_back(candidate);\n        if (driver.IsChildValid(candidate)) {\n          valid_candidates.emplace_back(candidate);\n        }\n      }\n\n      monitor_->Start(\"UpdatePosition\");\n      size_t page_id = 0;\n      for (auto const &page :\n           p_fmat->GetBatches<GHistIndexMatrix>(ctx_, BatchSpec(*param_, hess))) {\n        partitioner_.at(page_id).UpdatePosition(ctx_, page, applied, p_tree->HostScView());\n        page_id++;\n      }\n      monitor_->Stop(\"UpdatePosition\");\n\n      std::vector<CPUExpandEntry> best_splits;\n      if (!valid_candidates.empty()) {\n        this->BuildHistogram(p_fmat, p_tree, valid_candidates, gpair, hess);\n        for (auto const &candidate : valid_candidates) {\n          int left_child_nidx = tree[candidate.nid].LeftChild();\n          int right_child_nidx = tree[candidate.nid].RightChild();\n          CPUExpandEntry l_best{left_child_nidx, tree.GetDepth(left_child_nidx)};\n          CPUExpandEntry r_best{right_child_nidx, tree.GetDepth(right_child_nidx)};\n          best_splits.push_back(l_best);\n          best_splits.push_back(r_best);\n        }\n        auto const &histograms = histogram_builder_.Histogram(0);\n        auto ft = p_fmat->Info().feature_types.ConstHostSpan();\n        monitor_->Start(\"EvaluateSplits\");\n        evaluator_.EvaluateSplits(histograms, feature_values_, ft, *p_tree, &best_splits);\n        monitor_->Stop(\"EvaluateSplits\");\n      }\n      driver.Push(best_splits.begin(), best_splits.end());\n      expand_set = driver.Pop();\n    }\n\n    auto &h_position = p_out_position->HostVector();\n    this->LeafPartition(tree, hess, &h_position);\n  }\n};\n\n/**\n * \\brief Implementation for the approx tree method.  It constructs quantile for every\n *        iteration.\n */\nclass GlobalApproxUpdater : public TreeUpdater {\n  common::Monitor monitor_;\n  // specializations for different histogram precision.\n  std::unique_ptr<GlobalApproxBuilder> pimpl_;\n  // pointer to the last DMatrix, used for update prediction cache.\n  DMatrix *cached_{nullptr};\n  std::shared_ptr<common::ColumnSampler> column_sampler_;\n  ObjInfo const *task_;\n  HistMakerTrainParam hist_param_;\n\n public:\n  explicit GlobalApproxUpdater(Context const *ctx, ObjInfo const *task)\n      : TreeUpdater(ctx), column_sampler_{std::make_shared<common::ColumnSampler>()}, task_{task} {\n    monitor_.Init(__func__);\n  }\n\n  void Configure(Args const &args) override { hist_param_.UpdateAllowUnknown(args); }\n  void LoadConfig(Json const &in) override {\n    auto const &config = get<Object const>(in);\n    FromJson(config.at(\"hist_train_param\"), &hist_param_);\n  }\n  void SaveConfig(Json *p_out) const override {\n    auto &out = *p_out;\n    out[\"hist_train_param\"] = ToJson(hist_param_);\n  }\n\n  void InitData(TrainParam const &param, linalg::Matrix<GradientPair> const *gpair,\n                linalg::Matrix<GradientPair> *sampled) {\n    *sampled = linalg::Empty<GradientPair>(ctx_, gpair->Size(), 1);\n    auto in = gpair->HostView().Values();\n    std::copy(in.data(), in.data() + in.size(), sampled->HostView().Values().data());\n    cpu_impl::Sampler sampler{param};\n    sampler.Sample(ctx_, sampled->HostView());\n  }\n\n  [[nodiscard]] char const *Name() const override { return \"grow_histmaker\"; }\n\n  void Update(TrainParam const *param, GradientContainer *in_gpair, DMatrix *m,\n              common::Span<HostDeviceVector<bst_node_t>> out_position,\n              const std::vector<RegTree *> &trees) override {\n    CHECK(hist_param_.GetInitialised());\n    pimpl_ = std::make_unique<GlobalApproxBuilder>(param, &hist_param_, m->Info(), ctx_,\n                                                   column_sampler_, task_, &monitor_);\n    auto gpair = in_gpair->FullGradOnly();\n\n    linalg::Matrix<GradientPair> h_gpair;\n    // Obtain the hessian values for weighted sketching\n    InitData(*param, gpair, &h_gpair);\n    std::vector<float> hess(h_gpair.Size());\n    auto const &s_gpair = h_gpair.Data()->ConstHostVector();\n    std::transform(s_gpair.begin(), s_gpair.end(), hess.begin(),\n                   [](auto g) { return g.GetHess(); });\n\n    cached_ = m;\n\n    std::size_t t_idx = 0;\n    for (auto p_tree : trees) {\n      this->pimpl_->UpdateTree(m, s_gpair, hess, p_tree, &out_position[t_idx]);\n      hist_param_.CheckTreesSynchronized(ctx_, p_tree);\n      ++t_idx;\n    }\n  }\n\n  bool UpdatePredictionCache(DMatrix const *p_fmat,\n                             common::Span<HostDeviceVector<bst_node_t>> out_position,\n                             linalg::MatrixView<float> out_preds) override {\n    if (p_fmat != cached_ || !pimpl_) {\n      return false;\n    }\n    if (out_position.size() > 1) {\n      return false;\n    }\n    this->pimpl_->UpdatePredictionCache(p_fmat, out_position.front().ConstHostSpan(), out_preds);\n    return true;\n  }\n\n  [[nodiscard]] bool HasNodePosition() const override { return true; }\n};\n\nDMLC_REGISTRY_FILE_TAG(grow_histmaker);\n\nXGBOOST_REGISTER_TREE_UPDATER(GlobalHistMaker, \"grow_histmaker\")\n    .describe(\n        \"Tree constructor that uses approximate histogram construction \"\n        \"for each node.\")\n    .set_body([](Context const *ctx, ObjInfo const *task) {\n      return new GlobalApproxUpdater(ctx, task);\n    });\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "src/tree/updater_colmaker.cc",
    "content": "/**\n * Copyright 2014-2026, XGBoost Contributors\n * \\file updater_colmaker.cc\n * \\brief use columnwise update to construct a tree\n * \\author Tianqi Chen\n */\n#include <algorithm>\n#include <cmath>\n#include <vector>\n\n#include \"../collective/communicator-inl.h\"  // for IsDistributed\n#include \"../common/error_msg.h\"             // for NoCategorical\n#include \"../common/random.h\"\n#include \"constraints.h\"\n#include \"param.h\"\n#include \"sample_position.h\"  // for SamplePosition\n#include \"split_evaluator.h\"\n#include \"tree_view.h\"         // for ScalarTreeView\n#include \"xgboost/gradient.h\"  // for GradientContainer\n#include \"xgboost/json.h\"\n#include \"xgboost/logging.h\"\n#include \"xgboost/parameter.h\"\n#include \"xgboost/tree_updater.h\"\n\nnamespace xgboost::tree {\n\nDMLC_REGISTRY_FILE_TAG(updater_colmaker);\n\nstruct ColMakerTrainParam : XGBoostParameter<ColMakerTrainParam> {\n  // speed optimization for dense column\n  float opt_dense_col;\n  // default direction choice\n  int default_direction;\n\n  DMLC_DECLARE_PARAMETER(ColMakerTrainParam) {\n    DMLC_DECLARE_FIELD(opt_dense_col)\n        .set_range(0.0f, 1.0f)\n        .set_default(1.0f)\n        .describe(\"EXP Param: speed optimization for dense column.\");\n    DMLC_DECLARE_FIELD(default_direction)\n        .set_default(0)\n        .add_enum(\"learn\", 0)\n        .add_enum(\"left\", 1)\n        .add_enum(\"right\", 2)\n        .describe(\"Default direction choice when encountering a missing value\");\n  }\n\n  /*! \\brief whether need forward small to big search: default right */\n  inline bool NeedForwardSearch(float col_density, bool indicator) const {\n    return default_direction == 2 ||\n           (default_direction == 0 && (col_density < opt_dense_col) && !indicator);\n  }\n  /*! \\brief whether need backward big to small search: default left */\n  inline bool NeedBackwardSearch() const { return default_direction != 2; }\n};\n\nDMLC_REGISTER_PARAMETER(ColMakerTrainParam);\n\n/*! \\brief column-wise update to construct a tree */\nclass ColMaker : public TreeUpdater {\n public:\n  explicit ColMaker(Context const *ctx)\n      : TreeUpdater(ctx), column_sampler_{std::make_shared<common::ColumnSampler>()} {}\n  void Configure(const Args &args) override { colmaker_param_.UpdateAllowUnknown(args); }\n\n  void LoadConfig(Json const &in) override {\n    auto const &config = get<Object const>(in);\n    FromJson(config.at(\"colmaker_train_param\"), &this->colmaker_param_);\n  }\n  void SaveConfig(Json *p_out) const override {\n    auto &out = *p_out;\n    out[\"colmaker_train_param\"] = ToJson(colmaker_param_);\n  }\n\n  char const *Name() const override { return \"grow_colmaker\"; }\n\n  void LazyGetColumnDensity(DMatrix *dmat) {\n    // Finds densities if we don't already have them\n    if (column_densities_.empty()) {\n      std::vector<size_t> column_size(dmat->Info().num_col_);\n      for (const auto &batch : dmat->GetBatches<SortedCSCPage>(ctx_)) {\n        auto page = batch.GetView();\n        for (auto i = 0u; i < batch.Size(); i++) {\n          column_size[i] += page[i].size();\n        }\n      }\n      column_densities_.resize(column_size.size());\n      for (auto i = 0u; i < column_densities_.size(); i++) {\n        size_t nmiss = dmat->Info().num_row_ - column_size[i];\n        column_densities_[i] = 1.0f - (static_cast<float>(nmiss)) / dmat->Info().num_row_;\n      }\n    }\n  }\n\n  void Update(TrainParam const *param, GradientContainer *in_gpair, DMatrix *dmat,\n              common::Span<HostDeviceVector<bst_node_t>> /*out_position*/,\n              const std::vector<RegTree *> &trees) override {\n    if (collective::IsDistributed()) {\n      LOG(FATAL) << \"Updater `grow_colmaker` or `exact` tree method doesn't \"\n                    \"support distributed training.\";\n    }\n    if (!dmat->SingleColBlock()) {\n      LOG(FATAL) << \"Updater `grow_colmaker` or `exact` tree method doesn't \"\n                    \"support external memory training.\";\n    }\n    if (dmat->Info().HasCategorical()) {\n      LOG(FATAL) << error::NoCategorical(\"Updater `grow_colmaker` or `exact` tree method\");\n    }\n    if (param->colsample_bynode - 1.0 != 0.0) {\n      LOG(FATAL) << \"column sample by node is not yet supported by the exact tree method\";\n    }\n    this->LazyGetColumnDensity(dmat);\n    // rescale learning rate according to size of trees\n    interaction_constraints_.Configure(*param, dmat->Info().num_row_);\n    // build tree\n    auto gpair = in_gpair->FullGradOnly();\n    CHECK_EQ(gpair->Shape(1), 1) << MTNotImplemented();\n    for (auto tree : trees) {\n      CHECK(ctx_);\n      Builder builder(*param, colmaker_param_, interaction_constraints_, ctx_, column_densities_,\n                      column_sampler_);\n      builder.Update(gpair->Data()->ConstHostVector(), dmat, tree);\n    }\n  }\n\n protected:\n  ColMakerTrainParam colmaker_param_;\n  std::vector<float> column_densities_;\n  std::shared_ptr<common::ColumnSampler> column_sampler_;\n\n  FeatureInteractionConstraintHost interaction_constraints_;\n  // data structure\n  /*! \\brief per thread x per node entry to store tmp data */\n  struct ThreadEntry {\n    /*! \\brief statistics of data */\n    GradStats stats;\n    /*! \\brief last feature value scanned */\n    bst_float last_fvalue{0};\n    /*! \\brief current best solution */\n    SplitEntry best;\n    // constructor\n    ThreadEntry() = default;\n  };\n  struct NodeEntry {\n    /*! \\brief statics for node entry */\n    GradStats stats;\n    /*! \\brief loss of this node, without split */\n    bst_float root_gain{0.0f};\n    /*! \\brief weight calculated related to current data */\n    bst_float weight{0.0f};\n    /*! \\brief current best solution */\n    SplitEntry best;\n    // constructor\n    NodeEntry() = default;\n  };\n  // actual builder that runs the algorithm\n  class Builder {\n   public:\n    explicit Builder(const TrainParam &param, const ColMakerTrainParam &colmaker_train_param,\n                     FeatureInteractionConstraintHost _interaction_constraints, Context const *ctx,\n                     const std::vector<float> &column_densities,\n                     std::shared_ptr<common::ColumnSampler> column_sampler)\n        : param_(param),\n          colmaker_train_param_{colmaker_train_param},\n          ctx_{ctx},\n          column_sampler_{std::move(column_sampler)},\n          tree_evaluator_(param_, column_densities.size(), DeviceOrd::CPU()),\n          interaction_constraints_{std::move(_interaction_constraints)},\n          column_densities_(column_densities) {}\n    // update one tree, growing\n    virtual void Update(const std::vector<GradientPair> &gpair, DMatrix *p_fmat, RegTree *p_tree) {\n      std::vector<int> newnodes;\n      this->InitData(gpair, *p_fmat);\n      this->InitNewNode(qexpand_, gpair, *p_fmat, *p_tree);\n      // We can check max_leaves too, but might break some grid searching pipelines.\n      CHECK_GT(param_.max_depth, 0) << \"exact tree method doesn't support unlimited depth.\";\n      for (int depth = 0; depth < param_.max_depth; ++depth) {\n        this->FindSplit(depth, qexpand_, gpair, p_fmat, p_tree);\n        this->ResetPosition(qexpand_, p_fmat, *p_tree);\n        this->UpdateQueueExpand(*p_tree, qexpand_, &newnodes);\n        this->InitNewNode(newnodes, gpair, *p_fmat, *p_tree);\n        for (auto nid : qexpand_) {\n          if ((*p_tree)[nid].IsLeaf()) {\n            continue;\n          }\n          int cleft = (*p_tree)[nid].LeftChild();\n          int cright = (*p_tree)[nid].RightChild();\n\n          tree_evaluator_.AddSplit(nid, cleft, cright, snode_[nid].best.SplitIndex(),\n                                   snode_[cleft].weight, snode_[cright].weight);\n          interaction_constraints_.Split(nid, snode_[nid].best.SplitIndex(), cleft, cright);\n        }\n        qexpand_ = newnodes;\n        // if nothing left to be expand, break\n        if (qexpand_.size() == 0) break;\n      }\n      // set all the rest expanding nodes to leaf\n      for (const int nid : qexpand_) {\n        (*p_tree)[nid].SetLeaf(snode_[nid].weight * param_.learning_rate);\n      }\n      // remember auxiliary statistics in the tree node\n      for (int nid = 0; nid < p_tree->NumNodes(); ++nid) {\n        auto &stat = p_tree->Stat(nid);\n        stat.loss_chg = snode_[nid].best.loss_chg;\n        stat.base_weight = snode_[nid].weight;\n        stat.sum_hess = static_cast<float>(snode_[nid].stats.sum_hess);\n      }\n    }\n\n   protected:\n    // initialize temp data structure\n    inline void InitData(const std::vector<GradientPair> &gpair, const DMatrix &fmat) {\n      {\n        // setup position\n        position_.resize(gpair.size());\n        CHECK_EQ(fmat.Info().num_row_, position_.size());\n        std::fill(position_.begin(), position_.end(), 0);\n        // mark delete for the deleted datas\n        for (size_t ridx = 0; ridx < position_.size(); ++ridx) {\n          if (gpair[ridx].GetHess() < 0.0f) position_[ridx] = ~position_[ridx];\n        }\n        // mark subsample\n        if (param_.subsample < 1.0f) {\n          CHECK_EQ(param_.sampling_method, TrainParam::kUniform)\n              << \"Only uniform sampling is supported, \"\n              << \"gradient-based sampling is only support by the `hist` tree method.\";\n          std::bernoulli_distribution coin_flip(param_.subsample);\n          auto &rnd = ctx_->Rng();\n          for (size_t ridx = 0; ridx < position_.size(); ++ridx) {\n            if (gpair[ridx].GetHess() < 0.0f) continue;\n            if (!coin_flip(rnd)) position_[ridx] = ~position_[ridx];\n          }\n        }\n      }\n      {\n        column_sampler_->Init(ctx_, fmat.Info().num_col_, fmat.Info().feature_weights,\n                              param_.colsample_bynode, param_.colsample_bylevel,\n                              param_.colsample_bytree);\n      }\n      {\n        // setup temp space for each thread\n        // reserve a small space\n        stemp_.clear();\n        stemp_.resize(this->ctx_->Threads(), std::vector<ThreadEntry>());\n        for (auto &i : stemp_) {\n          i.clear();\n          i.reserve(256);\n        }\n        snode_.reserve(256);\n      }\n      {\n        // expand query\n        qexpand_.reserve(256);\n        qexpand_.clear();\n        qexpand_.push_back(0);\n      }\n    }\n    /*!\n     * \\brief initialize the base_weight, root_gain,\n     *  and NodeEntry for all the new nodes in qexpand\n     */\n    void InitNewNode(const std::vector<int> &qexpand, const std::vector<GradientPair> &gpair,\n                     const DMatrix &fmat, RegTree const &tree) {\n      auto n_nodes = tree.NumNodes();\n      auto sc_tree = tree.HostScView();\n      {\n        // setup statistics space for each tree node\n        for (auto &i : stemp_) {\n          i.resize(n_nodes, ThreadEntry());\n        }\n        snode_.resize(n_nodes, NodeEntry());\n      }\n      const MetaInfo &info = fmat.Info();\n      // setup position\n      common::ParallelFor(info.num_row_, ctx_->Threads(), [&](auto ridx) {\n        int32_t const tid = omp_get_thread_num();\n        if (position_[ridx] < 0) return;\n        stemp_[tid][position_[ridx]].stats.Add(gpair[ridx]);\n      });\n      // sum the per thread statistics together\n      for (int nid : qexpand) {\n        GradStats stats;\n        for (auto &s : stemp_) {\n          stats.Add(s[nid].stats);\n        }\n        // update node statistics\n        snode_[nid].stats = stats;\n      }\n\n      auto evaluator = tree_evaluator_.GetEvaluator();\n      // calculating the weights\n      for (bst_node_t nidx : qexpand) {\n        bst_node_t parentid = sc_tree.Parent(nidx);\n        snode_[nidx].weight =\n            static_cast<float>(evaluator.CalcWeight(parentid, param_, snode_[nidx].stats));\n        snode_[nidx].root_gain =\n            static_cast<float>(evaluator.CalcGain(parentid, param_, snode_[nidx].stats));\n      }\n    }\n    /*! \\brief update queue expand add in new leaves */\n    void UpdateQueueExpand(RegTree const &tree, const std::vector<bst_node_t> &qexpand,\n                           std::vector<int> *p_newnodes) {\n      p_newnodes->clear();\n      auto sc_tree = tree.HostScView();\n      for (bst_node_t nidx : qexpand) {\n        if (!sc_tree.IsLeaf(nidx)) {\n          p_newnodes->push_back(sc_tree.LeftChild(nidx));\n          p_newnodes->push_back(sc_tree.RightChild(nidx));\n        }\n      }\n    }\n\n    // update enumeration solution\n    inline void UpdateEnumeration(\n        int nid, GradientPair gstats, bst_float fvalue, int d_step, bst_uint fid,\n        GradStats &c,                    // NOLINT\n        std::vector<ThreadEntry> &temp,  // NOLINT(*)\n        TreeEvaluator::SplitEvaluator<TrainParam> const &evaluator) const {\n      // get the statistics of nid\n      ThreadEntry &e = temp[nid];\n      // test if first hit, this is fine, because we set 0 during init\n      if (e.stats.Empty()) {\n        e.stats.Add(gstats);\n        e.last_fvalue = fvalue;\n      } else {\n        // try to find a split\n        if (fvalue != e.last_fvalue && e.stats.sum_hess >= param_.min_child_weight) {\n          c.SetSubstract(snode_[nid].stats, e.stats);\n          if (c.sum_hess >= param_.min_child_weight) {\n            bst_float loss_chg{0};\n            if (d_step == -1) {\n              loss_chg = static_cast<bst_float>(\n                  evaluator.CalcSplitGain(param_, nid, fid, c, e.stats) - snode_[nid].root_gain);\n              bst_float proposed_split = (fvalue + e.last_fvalue) * 0.5f;\n              if (proposed_split == fvalue) {\n                e.best.Update(loss_chg, fid, e.last_fvalue, d_step == -1, false, c, e.stats);\n              } else {\n                e.best.Update(loss_chg, fid, proposed_split, d_step == -1, false, c, e.stats);\n              }\n            } else {\n              loss_chg = static_cast<bst_float>(\n                  evaluator.CalcSplitGain(param_, nid, fid, e.stats, c) - snode_[nid].root_gain);\n              bst_float proposed_split = (fvalue + e.last_fvalue) * 0.5f;\n              if (proposed_split == fvalue) {\n                e.best.Update(loss_chg, fid, e.last_fvalue, d_step == -1, false, e.stats, c);\n              } else {\n                e.best.Update(loss_chg, fid, proposed_split, d_step == -1, false, e.stats, c);\n              }\n            }\n          }\n        }\n        // update the statistics\n        e.stats.Add(gstats);\n        e.last_fvalue = fvalue;\n      }\n    }\n    // same as EnumerateSplit, with cacheline prefetch optimization\n    void EnumerateSplit(const Entry *begin, const Entry *end, int d_step, bst_uint fid,\n                        const std::vector<GradientPair> &gpair,\n                        std::vector<ThreadEntry> &temp,  // NOLINT(*)\n                        TreeEvaluator::SplitEvaluator<TrainParam> const &evaluator) const {\n      const std::vector<int> &qexpand = qexpand_;\n      // clear all the temp statistics\n      for (auto nid : qexpand) {\n        temp[nid].stats = GradStats();\n      }\n      // left statistics\n      GradStats c;\n      // local cache buffer for position and gradient pair\n      constexpr int kBuffer = 32;\n      int buf_position[kBuffer] = {};\n      GradientPair buf_gpair[kBuffer] = {};\n      // aligned ending position\n      const Entry *align_end;\n      if (d_step > 0) {\n        align_end = begin + (end - begin) / kBuffer * kBuffer;\n      } else {\n        align_end = begin - (begin - end) / kBuffer * kBuffer;\n      }\n      int i;\n      const Entry *it;\n      const int align_step = d_step * kBuffer;\n      // internal cached loop\n      for (it = begin; it != align_end; it += align_step) {\n        const Entry *p;\n        for (i = 0, p = it; i < kBuffer; ++i, p += d_step) {\n          buf_position[i] = position_[p->index];\n          buf_gpair[i] = gpair[p->index];\n        }\n        for (i = 0, p = it; i < kBuffer; ++i, p += d_step) {\n          const int nid = buf_position[i];\n          if (nid < 0 || !interaction_constraints_.Query(nid, fid)) {\n            continue;\n          }\n          this->UpdateEnumeration(nid, buf_gpair[i], p->fvalue, d_step, fid, c, temp, evaluator);\n        }\n      }\n\n      // finish up the ending piece\n      for (it = align_end, i = 0; it != end; ++i, it += d_step) {\n        buf_position[i] = position_[it->index];\n        buf_gpair[i] = gpair[it->index];\n      }\n      for (it = align_end, i = 0; it != end; ++i, it += d_step) {\n        const int nid = buf_position[i];\n        if (nid < 0 || !interaction_constraints_.Query(nid, fid)) {\n          continue;\n        }\n        this->UpdateEnumeration(nid, buf_gpair[i], it->fvalue, d_step, fid, c, temp, evaluator);\n      }\n      // finish updating all statistics, check if it is possible to include all sum statistics\n      for (int nid : qexpand) {\n        ThreadEntry &e = temp[nid];\n        c.SetSubstract(snode_[nid].stats, e.stats);\n        if (e.stats.sum_hess >= param_.min_child_weight && c.sum_hess >= param_.min_child_weight) {\n          bst_float loss_chg;\n          const bst_float gap = std::abs(e.last_fvalue) + kRtEps;\n          const bst_float delta = d_step == +1 ? gap : -gap;\n          if (d_step == -1) {\n            loss_chg = static_cast<bst_float>(\n                evaluator.CalcSplitGain(param_, nid, fid, c, e.stats) - snode_[nid].root_gain);\n            e.best.Update(loss_chg, fid, e.last_fvalue + delta, d_step == -1, false, c, e.stats);\n          } else {\n            loss_chg = static_cast<bst_float>(\n                evaluator.CalcSplitGain(param_, nid, fid, e.stats, c) - snode_[nid].root_gain);\n            e.best.Update(loss_chg, fid, e.last_fvalue + delta, d_step == -1, false, e.stats, c);\n          }\n        }\n      }\n    }\n\n    // update the solution candidate\n    void UpdateSolution(SortedCSCPage const &batch, const std::vector<bst_feature_t> &feat_set,\n                        const std::vector<GradientPair> &gpair) {\n      // start enumeration\n      const auto num_features = feat_set.size();\n      CHECK(this->ctx_);\n      const int batch_size =  // NOLINT\n          std::max(static_cast<int>(num_features / this->ctx_->Threads() / 32), 1);\n      auto page = batch.GetView();\n      common::ParallelFor(\n          num_features, ctx_->Threads(), common::Sched::Dyn(batch_size), [&](auto i) {\n            auto evaluator = tree_evaluator_.GetEvaluator();\n            bst_feature_t const fid = feat_set[i];\n            int32_t const tid = omp_get_thread_num();\n            auto c = page[fid];\n            const bool ind = c.size() != 0 && c[0].fvalue == c[c.size() - 1].fvalue;\n            if (colmaker_train_param_.NeedForwardSearch(column_densities_[fid], ind)) {\n              this->EnumerateSplit(c.data(), c.data() + c.size(), +1, fid, gpair, stemp_[tid],\n                                   evaluator);\n            }\n            if (colmaker_train_param_.NeedBackwardSearch()) {\n              this->EnumerateSplit(c.data() + c.size() - 1, c.data() - 1, -1, fid, gpair,\n                                   stemp_[tid], evaluator);\n            }\n          });\n    }\n\n    // find splits at current level, do split per level\n    void FindSplit(bst_node_t depth, const std::vector<int> &qexpand,\n                   std::vector<GradientPair> const &gpair, DMatrix *p_fmat, RegTree *p_tree) {\n      auto evaluator = tree_evaluator_.GetEvaluator();\n\n      auto feat_set = column_sampler_->GetFeatureSet(ctx_, depth);\n      for (const auto &batch : p_fmat->GetBatches<SortedCSCPage>(ctx_)) {\n        this->UpdateSolution(batch, feat_set->HostVector(), gpair);\n      }\n      // after this each thread's stemp will get the best candidates, aggregate results\n      this->SyncBestSolution(qexpand);\n      // get the best result, we can synchronize the solution\n      for (int nid : qexpand) {\n        NodeEntry const &e = snode_[nid];\n        // now we know the solution in snode[nid], set split\n        if (e.best.loss_chg > kRtEps) {\n          bst_float left_leaf_weight =\n              evaluator.CalcWeight(nid, param_, e.best.left_sum) * param_.learning_rate;\n          bst_float right_leaf_weight =\n              evaluator.CalcWeight(nid, param_, e.best.right_sum) * param_.learning_rate;\n          p_tree->ExpandNode(nid, e.best.SplitIndex(), e.best.split_value, e.best.DefaultLeft(),\n                             e.weight, left_leaf_weight, right_leaf_weight, e.best.loss_chg,\n                             e.stats.sum_hess, e.best.left_sum.GetHess(),\n                             e.best.right_sum.GetHess(), 0);\n        } else {\n          (*p_tree)[nid].SetLeaf(e.weight * param_.learning_rate);\n        }\n      }\n    }\n    // reset position of each data points after split is created in the tree\n    void ResetPosition(const std::vector<int> &qexpand, DMatrix *p_fmat, const RegTree &tree) {\n      auto sc_tree = tree.HostScView();\n      // set the positions in the nondefault\n      this->SetNonDefaultPosition(qexpand, p_fmat, tree);\n      // set rest of instances to default position\n      // set default direct nodes to default\n      // for leaf nodes that are not fresh, mark then to ~nid,\n      // so that they are ignored in future statistics collection\n      common::ParallelFor(p_fmat->Info().num_row_, this->ctx_->Threads(), [&](auto ridx) {\n        CHECK_LT(ridx, position_.size()) << \"ridx exceed bound \"\n                                         << \"ridx=\" << ridx << \" pos=\" << position_.size();\n        const bst_node_t nidx = SamplePosition::Decode(position_[ridx]);\n        if (sc_tree.IsLeaf(nidx)) {\n          // mark finish when it is not a fresh leaf\n          if (sc_tree.RightChild(nidx) == -1) {\n            position_[ridx] = ~nidx;\n          }\n        } else {\n          // push to default branch\n          if (sc_tree.DefaultLeft(nidx)) {\n            this->SetEncodePosition(ridx, sc_tree.LeftChild(nidx));\n          } else {\n            this->SetEncodePosition(ridx, sc_tree.RightChild(nidx));\n          }\n        }\n      });\n    }\n    // customization part\n    // synchronize the best solution of each node\n    virtual void SyncBestSolution(const std::vector<int> &qexpand) {\n      for (int nid : qexpand) {\n        NodeEntry &e = snode_[nid];\n        CHECK(this->ctx_);\n        for (int tid = 0; tid < this->ctx_->Threads(); ++tid) {\n          e.best.Update(stemp_[tid][nid].best);\n        }\n      }\n    }\n    virtual void SetNonDefaultPosition(const std::vector<int> &qexpand, DMatrix *p_fmat,\n                                       const RegTree &tree) {\n      // step 1, classify the non-default data into right places\n      auto sc_tree = tree.HostScView();\n      std::vector<unsigned> fsplits;\n      for (int nid : qexpand) {\n        if (!sc_tree.IsLeaf(nid)) {\n          fsplits.push_back(sc_tree.SplitIndex(nid));\n        }\n      }\n      std::sort(fsplits.begin(), fsplits.end());\n      fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin());\n      for (const auto &batch : p_fmat->GetBatches<SortedCSCPage>(ctx_)) {\n        auto page = batch.GetView();\n        for (auto fid : fsplits) {\n          auto col = page[fid];\n          common::ParallelFor(col.size(), this->ctx_->Threads(), [&](auto j) {\n            const bst_uint ridx = col[j].index;\n            bst_node_t nidx = SamplePosition::Decode(position_[ridx]);\n            const float fvalue = col[j].fvalue;\n            // go back to parent, correct those who are not default\n            if (!sc_tree.IsLeaf(nidx) && sc_tree.SplitIndex(nidx) == fid) {\n              if (fvalue < sc_tree.SplitCond(nidx)) {\n                this->SetEncodePosition(ridx, sc_tree.LeftChild(nidx));\n              } else {\n                this->SetEncodePosition(ridx, sc_tree.RightChild(nidx));\n              }\n            }\n          });\n        }\n      }\n    }\n    // utils to get/set position, with encoded format\n    // return decoded position\n    // encode the encoded position value for ridx\n    void SetEncodePosition(bst_idx_t ridx, bst_node_t nidx) {\n      bool is_invalid = position_[ridx] < 0;\n      position_[ridx] = SamplePosition::Encode(nidx, !is_invalid);\n    }\n    //  --data fields--\n    const TrainParam &param_;\n    const ColMakerTrainParam &colmaker_train_param_;\n    // number of omp thread used during training\n    Context const *ctx_;\n    std::shared_ptr<common::ColumnSampler> column_sampler_;\n    // Instance Data: current node position in the tree of each instance\n    std::vector<int> position_;\n    // PerThread x PerTreeNode: statistics for per thread construction\n    std::vector<std::vector<ThreadEntry>> stemp_;\n    /*! \\brief TreeNode Data: statistics for each constructed node */\n    std::vector<NodeEntry> snode_;\n    /*! \\brief queue of nodes to be expanded */\n    std::vector<int> qexpand_;\n    TreeEvaluator tree_evaluator_;\n\n    FeatureInteractionConstraintHost interaction_constraints_;\n    const std::vector<float> &column_densities_;\n  };\n};\n\nXGBOOST_REGISTER_TREE_UPDATER(ColMaker, \"grow_colmaker\")\n    .describe(\"Grow tree with parallelization over columns.\")\n    .set_body([](Context const *ctx, auto) { return new ColMaker(ctx); });\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "src/tree/updater_gpu_common.cuh",
    "content": "/**\n * Copyright 2017-2024, XGBoost contributors\n */\n#pragma once\n#include <limits>   // for numeric_limits\n#include <ostream>  // for ostream\n\n#include \"../data/batch_utils.h\"   // for DftPrefetchBatches, StaticBatch\n#include \"gpu_hist/quantiser.cuh\"  // for GradientQuantiser\n#include \"param.h\"                 // for TrainParam\n#include \"xgboost/base.h\"          // for bst_bin_t\n#include \"xgboost/task.h\"          // for ObjInfo\n\nnamespace xgboost::tree {\nstruct GPUTrainingParam {\n  // minimum amount of hessian(weight) allowed in a child\n  float min_child_weight;\n  // L2 regularization factor\n  float reg_lambda;\n  // L1 regularization factor\n  float reg_alpha;\n  // maximum delta update we can add in weight estimation\n  // this parameter can be used to stabilize update\n  // default=0 means no constraint on weight delta\n  float max_delta_step;\n  float learning_rate;\n  uint32_t max_cat_to_onehot;\n  bst_bin_t max_cat_threshold;\n\n  GPUTrainingParam() = default;\n\n  XGBOOST_DEVICE explicit GPUTrainingParam(const TrainParam& param)\n      : min_child_weight(param.min_child_weight),\n        reg_lambda(param.reg_lambda),\n        reg_alpha(param.reg_alpha),\n        max_delta_step(param.max_delta_step),\n        learning_rate{param.learning_rate},\n        max_cat_to_onehot{param.max_cat_to_onehot},\n        max_cat_threshold{param.max_cat_threshold} {}\n};\n\n/**\n * @brief Default direction to be followed in case of missing values\n */\nenum DefaultDirection {\n  /** move to left child */\n  kLeftDir = 0,\n  /** move to right child */\n  kRightDir\n};\n\nstruct DeviceSplitCandidate {\n  float loss_chg{-std::numeric_limits<float>::max()};\n  DefaultDirection dir{kLeftDir};\n  int findex{-1};\n  float fvalue{0};\n  // categorical split, either it's the split category for OHE or the threshold for partition-based\n  // split.\n  bst_cat_t thresh{-1};\n\n  bool is_cat{false};\n\n  GradientPairInt64 left_sum;\n  GradientPairInt64 right_sum;\n\n  XGBOOST_DEVICE DeviceSplitCandidate() {}  // NOLINT\n\n  XGBOOST_DEVICE void Update(float loss_chg_in, DefaultDirection dir_in, float fvalue_in,\n                             int findex_in, GradientPairInt64 left_sum_in,\n                             GradientPairInt64 right_sum_in, bool cat,\n                             const GPUTrainingParam& param, const GradientQuantiser& quantiser) {\n    if (loss_chg_in > loss_chg &&\n        quantiser.ToFloatingPoint(left_sum_in).GetHess() >= param.min_child_weight &&\n        quantiser.ToFloatingPoint(right_sum_in).GetHess() >= param.min_child_weight) {\n      loss_chg = loss_chg_in;\n      dir = dir_in;\n      fvalue = fvalue_in;\n      is_cat = cat;\n      left_sum = left_sum_in;\n      right_sum = right_sum_in;\n      findex = findex_in;\n    }\n  }\n\n  /**\n   * \\brief Update for partition-based splits.\n   */\n  XGBOOST_DEVICE void UpdateCat(float loss_chg_in, DefaultDirection dir_in, bst_cat_t thresh_in,\n                                bst_feature_t findex_in, GradientPairInt64 left_sum_in,\n                                GradientPairInt64 right_sum_in, GPUTrainingParam const& param,\n                                const GradientQuantiser& quantiser) {\n    if (loss_chg_in > loss_chg &&\n        quantiser.ToFloatingPoint(left_sum_in).GetHess() >= param.min_child_weight &&\n        quantiser.ToFloatingPoint(right_sum_in).GetHess() >= param.min_child_weight) {\n      loss_chg = loss_chg_in;\n      dir = dir_in;\n      fvalue = std::numeric_limits<float>::quiet_NaN();\n      thresh = thresh_in;\n      is_cat = true;\n      left_sum = left_sum_in;\n      right_sum = right_sum_in;\n      findex = findex_in;\n    }\n  }\n\n  [[nodiscard]] XGBOOST_DEVICE bool IsValid() const { return loss_chg > 0.0f; }\n\n  friend std::ostream& operator<<(std::ostream& os, DeviceSplitCandidate const& c) {\n    os << \"loss_chg:\" << c.loss_chg << \", \"\n       << \"dir: \" << c.dir << \", \"\n       << \"findex: \" << c.findex << \", \"\n       << \"fvalue: \" << c.fvalue << \", \"\n       << \"thresh: \" << c.thresh << \", \"\n       << \"is_cat: \" << c.is_cat << \", \"\n       << \"left sum: \" << c.left_sum << \", \"\n       << \"right sum: \" << c.right_sum << std::endl;\n    return os;\n  }\n};\n\nstruct MultiSplitCandidate {\n  float loss_chg{-std::numeric_limits<float>::max()};\n  DefaultDirection dir{kLeftDir};\n  int findex{-1};\n  float fvalue{0};\n  // categorical split, either it's the split category for OHE or the threshold for partition-based\n  // split.\n  bst_cat_t thresh{-1};\n\n  bool is_cat{false};\n\n  common::Span<GradientPairInt64 const> child_sum;\n\n  MultiSplitCandidate() = default;\n\n  XGBOOST_DEVICE void Update(float loss_chg_in, DefaultDirection dir_in, float fvalue_in,\n                             int findex_in, common::Span<GradientPairInt64 const> node_sum_in,\n                             bool cat, GPUTrainingParam const& /*param*/,\n                             common::Span<GradientQuantiser const> /*roundings*/) {\n    // TODO(jiamingy): Support min_child_weight\n    if (loss_chg_in > loss_chg) {\n      loss_chg = loss_chg_in;\n      dir = dir_in;\n      fvalue = fvalue_in;\n      is_cat = cat;\n      child_sum = node_sum_in;\n      findex = findex_in;\n    }\n  }\n  XGBOOST_DEVICE void Update(MultiSplitCandidate const& that, GPUTrainingParam const& param,\n                             common::Span<GradientQuantiser const> roundings) {\n    this->Update(that.loss_chg, that.dir, that.fvalue, that.findex, that.child_sum, that.is_cat,\n                 param, roundings);\n  }\n\n  [[nodiscard]] XGBOOST_DEVICE bool IsValid() const { return loss_chg > 0.0f; }\n};\n\nnamespace cuda_impl {\ninline BatchParam HistBatch(TrainParam const& param) {\n  auto p = BatchParam{param.max_bin, TrainParam::DftSparseThreshold()};\n  p.prefetch_copy = true;\n  p.n_prefetch_batches = ::xgboost::cuda_impl::DftPrefetchBatches();\n  return p;\n}\n\ninline BatchParam ApproxBatch(TrainParam const& p, common::Span<float const> hess,\n                              ObjInfo const& task) {\n  auto batch = BatchParam{p.max_bin, hess, !task.const_hess};\n  batch.prefetch_copy = true;\n  batch.n_prefetch_batches = ::xgboost::cuda_impl::DftPrefetchBatches();\n  return batch;\n}\n}  // namespace cuda_impl\n\ntemplate <typename T>\nstruct SumCallbackOp {\n  // Running prefix\n  T running_total{T{}};\n\n  SumCallbackOp() = default;\n  XGBOOST_DEVICE T operator()(T block_aggregate) {\n    T old_prefix = running_total;\n    running_total += block_aggregate;\n    return old_prefix;\n  }\n};\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "src/tree/updater_gpu_hist.cu",
    "content": "/**\n * Copyright 2017-2026, XGBoost contributors\n */\n#include <thrust/transform.h>  // for transform\n\n#include <algorithm>        // for max\n#include <cmath>            // for isnan\n#include <cstdint>          // for int32_t, uint32_t\n#include <cuda/functional>  // for plus\n#include <memory>           // for unique_ptr, make_unique\n#include <numeric>          // for partial_sum\n#include <string>           // for string\n#include <type_traits>      // for is_trivially_copyable_v\n#include <utility>          // for move\n#include <vector>           // for vector\n\n#include \"../../src/collective/comm.h\"  // for Op\n#include \"../collective/aggregator.h\"\n#include \"../common/categorical.h\"     // for KCatBitField\n#include \"../common/cuda_context.cuh\"  // for CUDAContext\n#include \"../common/cuda_rt_utils.h\"   // for SetDevice\n#include \"../common/cuda_stream.h\"     // for DefaultStream\n#include \"../common/device_helpers.cuh\"\n#include \"../common/device_vector.cuh\"  // for device_vector\n#include \"../common/hist_util.h\"        // for HistogramCuts\n#include \"../common/random.h\"           // for ColumnSampler\n#include \"../common/timer.h\"\n#include \"../data/batch_utils.h\"     // for StaticBatch\n#include \"../data/ellpack_page.cuh\"  // for EllpackPageImpl\n#include \"../data/ellpack_page.h\"    // for EllpackPage\n#include \"constraints.cuh\"\n#include \"driver.h\"\n#include \"gpu_hist/evaluate_splits.cuh\"\n#include \"gpu_hist/expand_entry.cuh\"\n#include \"gpu_hist/feature_groups.cuh\"  // for FeatureGroups\n#include \"gpu_hist/histogram.cuh\"\n#include \"gpu_hist/quantiser.cuh\"        // for GradientQuantiser\n#include \"gpu_hist/row_partitioner.cuh\"  // for RowPartitioner\n#include \"gpu_hist/sampler.cuh\"          // for GradientBasedSampler\n#include \"hist/hist_param.h\"             // for HistMakerTrainParam\n#include \"param.h\"                       // for TrainParam\n#include \"sample_position.h\"             // for SamplePosition\n#include \"tree_view.h\"                   // for ScalarTreeView\n#include \"updater_gpu_common.cuh\"        // for HistBatch\n#include \"updater_gpu_hist.cuh\"          // for MultiTargetHistMaker\n#include \"xgboost/base.h\"                // for bst_idx_t\n#include \"xgboost/collective/result.h\"   // for Success, SafeColl\n#include \"xgboost/context.h\"             // for Context\n#include \"xgboost/data.h\"                // for DMatrix\n#include \"xgboost/gradient.h\"            // for GradientContainer\n#include \"xgboost/host_device_vector.h\"  // for HostDeviceVector\n#include \"xgboost/json.h\"                // for Json\n#include \"xgboost/linalg.h\"              // for MakeVec\n#include \"xgboost/logging.h\"             // for CHECK_EQ, CHECK_LE, CHECK_GE\n#include \"xgboost/span.h\"                // for Span\n#include \"xgboost/task.h\"                // for ObjInfo\n#include \"xgboost/tree_model.h\"          // for RegTree\n#include \"xgboost/tree_updater.h\"        // for TreeUpdater\n\nnamespace xgboost::tree {\nDMLC_REGISTRY_FILE_TAG(updater_gpu_hist);\n\nusing cuda_impl::ApproxBatch;\nusing cuda_impl::HistBatch;\nusing xgboost::cuda_impl::StaticBatch;\n\nnamespace {\ninline constexpr std::size_t kNeedCopyThreshold = 4;\n}  // anonymous namespace\n\n// Extra data for each node that is passed to the update position function\nstruct NodeSplitData {\n  RegTree::Node split_node;\n  FeatureType split_type;\n  common::KCatBitField node_cats;\n};\nstatic_assert(std::is_trivially_copyable_v<NodeSplitData>);\n\n// GPU tree updater implementation.\nstruct GPUHistMakerDevice {\n private:\n  GPUHistEvaluator evaluator_;\n  Context const* ctx_;\n  std::shared_ptr<common::ColumnSampler> column_sampler_;\n  // Set of row partitioners, one for each batch (external memory). When the training is\n  // in-core, there's only one partitioner.\n  RowPartitionerBatches partitioners_;\n\n  DeviceHistogramBuilder histogram_;\n  std::vector<bst_idx_t> const batch_ptr_;\n  HistMakerTrainParam const* hist_param_;\n  std::shared_ptr<common::HistogramCuts const> const cuts_;\n  std::unique_ptr<FeatureGroups> feature_groups_;\n\n  struct PartitionNodes {\n    std::vector<bst_node_t> nidx;\n    std::vector<bst_node_t> left_nidx;\n    std::vector<bst_node_t> right_nidx;\n    std::vector<NodeSplitData> split_data;\n\n    explicit PartitionNodes(std::size_t n_candidates)\n        : nidx(n_candidates),\n          left_nidx(n_candidates),\n          right_nidx(n_candidates),\n          split_data(n_candidates) {}\n  };\n\n  PartitionNodes CreatePartitionNodes(RegTree const* p_tree,\n                                      std::vector<GPUExpandEntry> const& candidates) {\n    PartitionNodes nodes(candidates.size());\n    auto tree = p_tree->HostScView();\n    for (std::size_t i = 0, n = candidates.size(); i < n; i++) {\n      auto const& e = candidates[i];\n      RegTree::Node split_node = tree.nodes[e.nidx];\n      auto split_type = tree.SplitType(e.nidx);\n      nodes.nidx.at(i) = e.nidx;\n      nodes.left_nidx[i] = tree.LeftChild(e.nidx);\n      nodes.right_nidx[i] = tree.RightChild(e.nidx);\n      nodes.split_data[i] =\n          NodeSplitData{split_node, split_type, this->evaluator_.GetDeviceNodeCats(e.nidx)};\n\n      CHECK_EQ(split_type == FeatureType::kCategorical, e.split.is_cat);\n    }\n    return nodes;\n  }\n\n public:\n  linalg::Matrix<GradientPairInt64> d_gpair;  // storage for gpair;\n  dh::device_vector<int> monotone_constraints;\n\n  TrainParam const param;\n\n  std::unique_ptr<GradientQuantiserGroup> quantiser;\n\n  dh::PinnedMemory pinned;\n  dh::PinnedMemory pinned2;\n\n  FeatureInteractionConstraintDevice interaction_constraints;\n\n  std::unique_ptr<cuda_impl::Sampler> sampler;\n\n  common::Monitor monitor;\n\n  GPUHistMakerDevice(Context const* ctx, TrainParam _param, HistMakerTrainParam const* hist_param,\n                     std::shared_ptr<common::ColumnSampler> column_sampler, MetaInfo const& info,\n                     std::vector<bst_idx_t> batch_ptr,\n                     std::shared_ptr<common::HistogramCuts const> cuts, bool dense_compressed)\n      : evaluator_{_param, static_cast<bst_feature_t>(info.num_col_), ctx->Device()},\n        ctx_{ctx},\n        column_sampler_{std::move(column_sampler)},\n        batch_ptr_{std::move(batch_ptr)},\n        hist_param_{hist_param},\n        cuts_{std::move(cuts)},\n        feature_groups_{std::make_unique<FeatureGroups>(\n            *cuts_, dense_compressed, DftStHistShmemBytes(this->ctx_->Ordinal()))},\n        param{std::move(_param)},\n        interaction_constraints(param, static_cast<bst_feature_t>(info.num_col_)),\n        sampler{std::make_unique<cuda_impl::Sampler>(info.num_row_, param.subsample,\n                                                     param.sampling_method)} {\n    if (!param.monotone_constraints.empty()) {\n      // Copy assigning an empty vector causes an exception in MSVC debug builds\n      monotone_constraints = param.monotone_constraints;\n    }\n\n    CHECK(column_sampler_);\n    monitor.Init(std::string(\"GPUHistMakerDevice\") + ctx_->Device().Name());\n  }\n\n  ~GPUHistMakerDevice() = default;\n\n  // Reset values for each update iteration\n  [[nodiscard]] DMatrix* Reset(HostDeviceVector<GradientPair> const* dh_gpair, DMatrix* p_fmat) {\n    this->monitor.Start(__func__);\n    curt::SetDevice(ctx_->Ordinal());\n\n    auto const& info = p_fmat->Info();\n\n    this->quantiser = std::make_unique<GradientQuantiserGroup>(\n        ctx_, linalg::MakeVec(this->ctx_->Device(), dh_gpair->ConstDeviceSpan()), p_fmat->Info());\n    auto gpair =\n        linalg::MakeTensorView(this->ctx_, dh_gpair->ConstDeviceSpan(), dh_gpair->Size(), 1);\n    CalcQuantizedGpairs(this->ctx_, gpair, this->quantiser->DeviceSpan(), &this->d_gpair);\n\n    /**\n     * Sampling\n     */\n    auto gpairs = this->d_gpair.View(this->ctx_->Device());\n    this->sampler->Sample(ctx_, gpairs, this->quantiser->DeviceSpan());\n    p_fmat->Info().feature_types.SetDevice(ctx_->Device());\n\n    /**\n     * Initialize the partitioners\n     */\n    this->partitioners_.Reset(this->ctx_, this->batch_ptr_);\n\n    /**\n     * Initialize the evaluator\n     */\n    this->column_sampler_->Init(ctx_, info.num_col_, info.feature_weights, param.colsample_bynode,\n                                param.colsample_bylevel, param.colsample_bytree);\n    this->interaction_constraints.Reset(ctx_);\n    this->evaluator_.Reset(this->ctx_, *cuts_, info.feature_types.ConstDeviceSpan(), info.num_col_,\n                           this->param, info.IsColumnSplit());\n\n    /**\n     * Other initializations\n     */\n    this->histogram_.Reset(ctx_, this->hist_param_->MaxCachedHistNodes(ctx_->Device()),\n                           cuts_->TotalBins(), false);\n    this->monitor.Stop(__func__);\n    return p_fmat;\n  }\n\n  GPUExpandEntry EvaluateRootSplit(DMatrix const* p_fmat, GradientPairInt64 root_sum) {\n    bst_node_t nidx = RegTree::kRoot;\n    GPUTrainingParam gpu_param(param);\n    auto sampled_features = column_sampler_->GetFeatureSet(ctx_, 0);\n    sampled_features->SetDevice(ctx_->Device());\n    common::Span<bst_feature_t const> feature_set =\n        interaction_constraints.Query(sampled_features->ConstDeviceSpan(), nidx);\n    EvaluateSplitInputs inputs{nidx, 0, root_sum, feature_set, histogram_.GetNodeHistogram(nidx)};\n    EvaluateSplitSharedInputs shared_inputs{gpu_param,\n                                            (*quantiser)[0],\n                                            p_fmat->Info().feature_types.ConstDeviceSpan(),\n                                            cuts_->cut_ptrs_.ConstDeviceSpan(),\n                                            cuts_->cut_values_.ConstDeviceSpan(),\n                                            p_fmat->IsDense() && !collective::IsDistributed()};\n    auto split = this->evaluator_.EvaluateSingleSplit(ctx_, inputs, shared_inputs);\n    return split;\n  }\n\n  void EvaluateSplits(DMatrix const* p_fmat, const std::vector<GPUExpandEntry>& candidates,\n                      const RegTree& tree, common::Span<GPUExpandEntry> pinned_candidates_out) {\n    if (candidates.empty()) {\n      return;\n    }\n    this->monitor.Start(__func__);\n    dh::TemporaryArray<EvaluateSplitInputs> d_node_inputs(2 * candidates.size());\n    dh::TemporaryArray<DeviceSplitCandidate> splits_out(2 * candidates.size());\n    std::vector<bst_node_t> nidx(2 * candidates.size());\n    auto h_node_inputs = pinned2.GetSpan<EvaluateSplitInputs>(2 * candidates.size());\n    EvaluateSplitSharedInputs shared_inputs{\n        GPUTrainingParam{param}, (*quantiser)[0], p_fmat->Info().feature_types.ConstDeviceSpan(),\n        cuts_->cut_ptrs_.ConstDeviceSpan(), cuts_->cut_values_.ConstDeviceSpan(),\n        // is_dense represents the local data\n        p_fmat->IsDense() && !collective::IsDistributed()};\n    dh::TemporaryArray<GPUExpandEntry> entries(2 * candidates.size());\n    // Store the feature set ptrs so they don't go out of scope before the kernel is called\n    std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> feature_sets;\n    auto sc_tree = tree.HostScView();\n    for (std::size_t i = 0; i < candidates.size(); i++) {\n      auto candidate = candidates.at(i);\n      bst_node_t left_nidx = sc_tree.LeftChild(candidate.nidx);\n      bst_node_t right_nidx = sc_tree.RightChild(candidate.nidx);\n      nidx[i * 2] = left_nidx;\n      nidx[i * 2 + 1] = right_nidx;\n      auto left_sampled_features = column_sampler_->GetFeatureSet(ctx_, tree.GetDepth(left_nidx));\n      feature_sets.emplace_back(left_sampled_features);\n      common::Span<bst_feature_t const> left_feature_set =\n          interaction_constraints.Query(left_sampled_features->ConstDeviceSpan(), left_nidx);\n      auto right_sampled_features = column_sampler_->GetFeatureSet(ctx_, tree.GetDepth(right_nidx));\n      feature_sets.emplace_back(right_sampled_features);\n      common::Span<bst_feature_t const> right_feature_set =\n          interaction_constraints.Query(right_sampled_features->ConstDeviceSpan(), right_nidx);\n      h_node_inputs[i * 2] = {left_nidx, candidate.depth + 1, candidate.split.left_sum,\n                              left_feature_set, histogram_.GetNodeHistogram(left_nidx)};\n      h_node_inputs[i * 2 + 1] = {right_nidx, candidate.depth + 1, candidate.split.right_sum,\n                                  right_feature_set, histogram_.GetNodeHistogram(right_nidx)};\n    }\n    bst_feature_t max_active_features = 0;\n    for (auto input : h_node_inputs) {\n      max_active_features =\n          std::max(max_active_features, static_cast<bst_feature_t>(input.feature_set.size()));\n    }\n    dh::safe_cuda(cudaMemcpyAsync(d_node_inputs.data().get(), h_node_inputs.data(),\n                                  h_node_inputs.size() * sizeof(EvaluateSplitInputs),\n                                  cudaMemcpyDefault));\n\n    this->evaluator_.EvaluateSplits(ctx_, nidx, max_active_features, dh::ToSpan(d_node_inputs),\n                                    shared_inputs, dh::ToSpan(entries));\n    dh::safe_cuda(cudaMemcpyAsync(pinned_candidates_out.data(), entries.data().get(),\n                                  sizeof(GPUExpandEntry) * entries.size(), cudaMemcpyDeviceToHost));\n    this->monitor.Stop(__func__);\n  }\n\n  void BuildHist(EllpackPage const& page, std::int32_t k, bst_bin_t nidx) {\n    monitor.Start(__func__);\n    auto d_node_hist = histogram_.GetNodeHistogram(nidx);\n    auto d_ridx = partitioners_.At(k)->GetRows(nidx);\n    auto acc = page.Impl()->GetDeviceEllpack(this->ctx_, {});\n    auto gpair = this->d_gpair.View(this->ctx_->Device());\n    this->histogram_.BuildHistogram(ctx_, acc, feature_groups_->DeviceAccessor(ctx_->Device()),\n                                    gpair.Values(), d_ridx, d_node_hist);\n    monitor.Stop(__func__);\n  }\n\n  void ReduceHist(DMatrix* p_fmat, std::vector<GPUExpandEntry> const& candidates,\n                  std::vector<bst_node_t> const& build_nidx,\n                  std::vector<bst_node_t> const& subtraction_nidx) {\n    if (candidates.empty()) {\n      return;\n    }\n    this->monitor.Start(__func__);\n\n    // Reduce all in one go\n    // This gives much better latency in a distributed setting when processing a large batch\n    this->histogram_.AllReduceHist(ctx_, p_fmat->Info(), build_nidx.at(0), build_nidx.size());\n    // Perform subtraction for sibiling nodes\n    auto need_build = this->histogram_.SubtractHist(ctx_, candidates, build_nidx, subtraction_nidx);\n    if (need_build.empty()) {\n      this->monitor.Stop(__func__);\n      return;\n    }\n\n    // Build the nodes that can not obtain the histogram using subtraction. This is the slow path.\n    std::int32_t k = 0;\n    for (auto const& page : p_fmat->GetBatches<EllpackPage>(ctx_, StaticBatch(true))) {\n      for (auto nidx : need_build) {\n        this->BuildHist(page, k, nidx);\n      }\n      ++k;\n    }\n    for (auto nidx : need_build) {\n      this->histogram_.AllReduceHist(ctx_, p_fmat->Info(), nidx, 1);\n    }\n    this->monitor.Stop(__func__);\n  }\n\n  template <typename Iter>\n  void UpdatePositionColumnSplit(EllpackAccessorImpl<Iter> d_matrix,\n                                 std::vector<NodeSplitData> const& split_data,\n                                 std::vector<bst_node_t> const& nidx,\n                                 std::vector<bst_node_t> const& left_nidx,\n                                 std::vector<bst_node_t> const& right_nidx) {\n    auto const num_candidates = split_data.size();\n\n    using BitVector = LBitField64;\n    using BitType = BitVector::value_type;\n    auto const size = BitVector::ComputeStorageSize(d_matrix.n_rows * num_candidates);\n    dh::TemporaryArray<BitType> decision_storage(size, 0);\n    dh::TemporaryArray<BitType> missing_storage(size, 0);\n    BitVector decision_bits{dh::ToSpan(decision_storage)};\n    BitVector missing_bits{dh::ToSpan(missing_storage)};\n\n    auto cuctx = this->ctx_->CUDACtx();\n    dh::TemporaryArray<NodeSplitData> split_data_storage(num_candidates);\n    dh::safe_cuda(cudaMemcpyAsync(split_data_storage.data().get(), split_data.data(),\n                                  num_candidates * sizeof(NodeSplitData), cudaMemcpyDefault,\n                                  cuctx->Stream()));\n    auto d_split_data = dh::ToSpan(split_data_storage);\n\n    dh::LaunchN(d_matrix.n_rows, cuctx->Stream(), [=] __device__(std::size_t ridx) mutable {\n      for (auto i = 0; i < num_candidates; i++) {\n        auto const& data = d_split_data[i];\n        auto const cut_value = d_matrix.GetFvalue(ridx, data.split_node.SplitIndex());\n        if (isnan(cut_value)) {\n          missing_bits.Set(ridx * num_candidates + i);\n        } else {\n          bool go_left;\n          if (data.split_type == FeatureType::kCategorical) {\n            go_left = common::Decision(data.node_cats.Bits(), cut_value);\n          } else {\n            go_left = cut_value <= data.split_node.SplitCond();\n          }\n          if (go_left) {\n            decision_bits.Set(ridx * num_candidates + i);\n          }\n        }\n      }\n    });\n\n    auto rc = collective::Success() << [&] {\n      return collective::Allreduce(\n          ctx_, linalg::MakeTensorView(ctx_, dh::ToSpan(decision_storage), decision_storage.size()),\n          collective::Op::kBitwiseOR);\n    } << [&] {\n      return collective::Allreduce(\n          ctx_, linalg::MakeTensorView(ctx_, dh::ToSpan(missing_storage), missing_storage.size()),\n          collective::Op::kBitwiseAND);\n    };\n    collective::SafeColl(rc);\n\n    CHECK_EQ(partitioners_.Size(), 1) << \"External memory with column split is not yet supported.\";\n    partitioners_.UpdatePositionBatch(\n        ctx_, 0, nidx, left_nidx, right_nidx, split_data,\n        [=] __device__(bst_uint ridx, int nidx_in_batch, NodeSplitData const& data) {\n          auto const index = ridx * num_candidates + nidx_in_batch;\n          bool go_left;\n          if (missing_bits.Check(index)) {\n            go_left = data.split_node.DefaultLeft();\n          } else {\n            go_left = decision_bits.Check(index);\n          }\n          return go_left;\n        });\n  }\n\n  template <typename Accessor>\n  struct GoLeftOp {\n    Accessor d_matrix;\n\n    __device__ bool operator()(cuda_impl::RowIndexT ridx, NodeSplitData const& data) const {\n      RegTree::Node const& node = data.split_node;\n      // given a row index, returns the node id it belongs to\n      float cut_value = d_matrix.GetFvalue(ridx, node.SplitIndex());\n      // Missing value\n      bool go_left = true;\n      if (isnan(cut_value)) {\n        go_left = node.DefaultLeft();\n      } else {\n        if (data.split_type == FeatureType::kCategorical) {\n          go_left = common::Decision(data.node_cats.Bits(), cut_value);\n        } else {\n          go_left = cut_value <= node.SplitCond();\n        }\n      }\n      return go_left;\n    }\n  };\n\n  // Heuristic to avoid copying the data batch.\n  [[nodiscard]] bool NeedCopy(DMatrix* p_fmat,\n                              std::vector<GPUExpandEntry> const& candidates) const {\n    if (p_fmat->SingleColBlock()) {\n      return true;  // use default if it's in-core\n    }\n    bst_idx_t n_total_samples = p_fmat->Info().num_row_;\n    bst_idx_t n_samples = 0;\n    for (auto const& c : candidates) {\n      for (auto const& part : this->partitioners_) {\n        n_samples += part->GetRows(c.nidx).size();\n      }\n    }\n    // avoid copy if the kernel is small.\n    return n_samples * kNeedCopyThreshold > n_total_samples;\n  }\n\n  // Update position and build histogram. We merge these two functions for external\n  // memory, where we want to bundle as many computation as possible for each data read.\n  void PartitionAndBuildHist(DMatrix* p_fmat, std::vector<GPUExpandEntry> const& expand_set,\n                             std::vector<GPUExpandEntry> const& candidates, RegTree const* p_tree) {\n    if (expand_set.empty()) {\n      return;\n    }\n    monitor.Start(__func__);\n    CHECK_LE(candidates.size(), expand_set.size());\n\n    // Update all the nodes if working with external memory, this saves us from working\n    // with the finalize position call, which adds an additional iteration and requires\n    // special handling for row index.\n    bool const is_single_block = p_fmat->SingleColBlock();\n\n    // Prepare for update partition\n    auto nodes = this->CreatePartitionNodes(p_tree, is_single_block ? candidates : expand_set);\n\n    // Prepare for build hist\n    std::vector<bst_node_t> build_nidx(candidates.size());\n    std::vector<bst_node_t> subtraction_nidx(candidates.size());\n    auto const& tree = p_tree->HostScView();\n    cuda_impl::AssignNodes(tree, candidates, build_nidx, subtraction_nidx,\n                           [&](GPUExpandEntry const& e) {\n                             auto const& q = (*this->quantiser)[0];\n                             auto left_sum = q.ToFloatingPoint(e.split.left_sum);\n                             auto right_sum = q.ToFloatingPoint(e.split.right_sum);\n                             bool fewer_right = right_sum.GetHess() < left_sum.GetHess();\n                             return fewer_right;\n                           });\n    auto prefetch_copy = !build_nidx.empty() && this->NeedCopy(p_fmat, candidates);\n\n    this->histogram_.AllocateHistograms(ctx_, build_nidx, subtraction_nidx);\n\n    monitor.Start(\"Partition-BuildHist\");\n\n    std::int32_t k{0};\n    for (auto const& page : p_fmat->GetBatches<EllpackPage>(ctx_, StaticBatch(prefetch_copy))) {\n      page.Impl()->Visit(ctx_, {}, [&](auto&& d_acc) {\n        using Acc = std::remove_reference_t<decltype(d_acc)>;\n        using GoLeft = GoLeftOp<Acc>;\n        auto go_left = GoLeft{d_acc};\n\n        // Partition rows.\n        monitor.Start(\"UpdatePositionBatch\");\n        if (p_fmat->Info().IsColumnSplit()) {\n          UpdatePositionColumnSplit(d_acc, nodes.split_data, nodes.nidx, nodes.left_nidx,\n                                    nodes.right_nidx);\n        } else {\n          partitioners_.UpdatePositionBatch(ctx_, k, nodes.nidx, nodes.left_nidx, nodes.right_nidx,\n                                            nodes.split_data,\n                                            cuda_impl::GoLeftWrapperOp<GoLeft>{go_left});\n        }\n        monitor.Stop(\"UpdatePositionBatch\");\n\n        // Build histograms.\n        for (auto nidx : build_nidx) {\n          this->BuildHist(page, k, nidx);\n        }\n      });\n      ++k;\n    }\n\n    monitor.Stop(\"Partition-BuildHist\");\n\n    this->ReduceHist(p_fmat, candidates, build_nidx, subtraction_nidx);\n\n    monitor.Stop(__func__);\n  }\n\n  template <typename Accessor>\n  struct FinalizeOp {\n    common::Span<NodeSplitData> s_split_data;\n    GoLeftOp<Accessor> go_left_op;\n    cuda_impl::EncodeOp encode_op;\n\n    __device__ auto operator()(bst_idx_t row_id, bst_node_t nidx) const {\n      auto split_data = s_split_data[nidx];\n      auto node = split_data.split_node;\n      while (!node.IsLeaf()) {\n        auto go_left = go_left_op(row_id, split_data);\n        nidx = go_left ? node.LeftChild() : node.RightChild();\n        node = s_split_data[nidx].split_node;\n      }\n      return encode_op(row_id, nidx);\n    }\n  };\n\n  // After tree update is finished, update the position of all training\n  // instances to their final leaf. This information is used later to update the\n  // prediction cache\n  void FinalisePosition(DMatrix* p_fmat, RegTree const* p_tree,\n                        HostDeviceVector<bst_node_t>* p_out_position) {\n    xgboost_NVTX_FN_RANGE();\n\n    p_out_position->SetDevice(ctx_->Device());\n    p_out_position->Resize(p_fmat->Info().num_row_);\n    auto d_out_position = p_out_position->DeviceSpan();\n\n    auto gpair = this->d_gpair.View(this->ctx_->Device());\n\n    if (!p_fmat->SingleColBlock()) {\n      for (std::size_t k = 0; k < partitioners_.Size(); ++k) {\n        auto& part = partitioners_.At(k);\n        CHECK_EQ(part->GetNumNodes(), p_tree->NumNodes());\n        auto base_ridx = batch_ptr_[k];\n        auto n_samples = batch_ptr_.at(k + 1) - base_ridx;\n        part->FinalisePosition(ctx_, d_out_position.subspan(base_ridx, n_samples), base_ridx,\n                               cuda_impl::EncodeOp{gpair});\n      }\n      return;\n    }\n\n    dh::CachingDeviceUVector<std::uint32_t> categories;\n    dh::CopyTo(p_tree->GetSplitCategories(DeviceOrd::CPU()), &categories,\n               this->ctx_->CUDACtx()->Stream());\n    auto const& cat_segments = p_tree->GetSplitCategoriesPtr();\n    auto d_categories = dh::ToSpan(categories);\n    auto ft = p_fmat->Info().feature_types.ConstDeviceSpan();\n    auto const& tree = p_tree->HostScView();\n\n    for (auto const& page : p_fmat->GetBatches<EllpackPage>(ctx_, StaticBatch(true))) {\n      std::vector<NodeSplitData> split_data(p_tree->NumNodes());\n      for (std::size_t i = 0, n = split_data.size(); i < n; ++i) {\n        RegTree::Node split_node = tree.nodes[i];\n        auto split_type = tree.SplitType(i);\n        auto node_cats = common::GetNodeCats(d_categories, cat_segments[i]);\n        split_data[i] = NodeSplitData{std::move(split_node), split_type, node_cats};\n      }\n\n      dh::CachingDeviceUVector<NodeSplitData> d_split_data;\n      dh::CopyTo(split_data, &d_split_data, this->ctx_->CUDACtx()->Stream());\n      auto s_split_data = dh::ToSpan(d_split_data);\n\n      page.Impl()->Visit(ctx_, ft, [&](auto&& d_matrix) {\n        auto go_left_op = GoLeftOp<std::remove_reference_t<decltype(d_matrix)>>{d_matrix};\n        partitioners_.Front()->FinalisePosition(\n            ctx_, d_out_position, page.BaseRowId(),\n            FinalizeOp<std::remove_reference_t<decltype(d_matrix)>>{s_split_data, go_left_op,\n                                                                    cuda_impl::EncodeOp{gpair}});\n      });\n    }\n  }\n\n  bool UpdatePredictionCache(linalg::MatrixView<float> out_preds_d,\n                             common::Span<HostDeviceVector<bst_node_t>> out_position,\n                             RegTree const* p_tree) {\n    CHECK(p_tree);\n    CHECK(out_preds_d.Device().IsCUDA());\n    CHECK_EQ(out_preds_d.Device().ordinal, ctx_->Ordinal());\n\n    CHECK_EQ(out_position.size(), 1);\n    auto d_position = out_position.front().ConstDeviceSpan();\n    CHECK_EQ(out_preds_d.Size(), d_position.size());\n\n    // Use the nodes from tree, the leaf value might be changed by the objective since the\n    // last update tree call.\n    dh::CachingDeviceUVector<RegTree::Node> nodes;\n    // We can remove the CPU copy once we refactor the GPU hist to use the device tree.\n    dh::CopyTo(p_tree->GetNodes(DeviceOrd::CPU()), &nodes, this->ctx_->CUDACtx()->Stream());\n    common::Span<RegTree::Node> d_nodes = dh::ToSpan(nodes);\n    CHECK_EQ(out_preds_d.Shape(1), 1);\n    dh::LaunchN(d_position.size(), ctx_->CUDACtx()->Stream(),\n                [=] XGBOOST_DEVICE(std::size_t idx) mutable {\n                  bst_node_t nidx = d_position[idx];\n                  nidx = SamplePosition::Decode(nidx);\n                  auto weight = d_nodes[nidx].LeafValue();\n                  out_preds_d(idx, 0) += weight;\n                });\n    return true;\n  }\n\n  void ApplySplit(const GPUExpandEntry& candidate, RegTree* p_tree) {\n    RegTree& tree = *p_tree;\n\n    // Sanity check - have we created a leaf with no training instances?\n    if (!collective::IsDistributed() && partitioners_.Size() == 1) {\n      CHECK(partitioners_.Front()->GetRows(candidate.nidx).size() > 0)\n          << \"No training instances in this leaf!\";\n    }\n\n    auto base_weight = candidate.base_weight;\n    auto left_weight = candidate.left_weight * param.learning_rate;\n    auto right_weight = candidate.right_weight * param.learning_rate;\n    auto const& q = (*quantiser)[0];\n    auto parent_hess =\n        q.ToFloatingPoint(candidate.split.left_sum + candidate.split.right_sum).GetHess();\n    auto left_hess = q.ToFloatingPoint(candidate.split.left_sum).GetHess();\n    auto right_hess = q.ToFloatingPoint(candidate.split.right_sum).GetHess();\n\n    auto is_cat = candidate.split.is_cat;\n    if (is_cat) {\n      // should be set to nan in evaluation split.\n      CHECK(common::CheckNAN(candidate.split.fvalue));\n      std::vector<common::CatBitField::value_type> split_cats;\n\n      auto h_cats = this->evaluator_.GetHostNodeCats(candidate.nidx);\n      auto n_bins_feature = cuts_->FeatureBins(candidate.split.findex);\n      split_cats.resize(common::CatBitField::ComputeStorageSize(n_bins_feature), 0);\n      CHECK_LE(split_cats.size(), h_cats.size());\n      std::copy(h_cats.data(), h_cats.data() + split_cats.size(), split_cats.data());\n\n      tree.ExpandCategorical(candidate.nidx, candidate.split.findex, split_cats,\n                             candidate.split.dir == kLeftDir, base_weight, left_weight,\n                             right_weight, candidate.split.loss_chg, parent_hess, left_hess,\n                             right_hess);\n    } else {\n      CHECK(!common::CheckNAN(candidate.split.fvalue));\n      tree.ExpandNode(candidate.nidx, candidate.split.findex, candidate.split.fvalue,\n                      candidate.split.dir == kLeftDir, base_weight, left_weight, right_weight,\n                      candidate.split.loss_chg, parent_hess, left_hess, right_hess);\n    }\n    evaluator_.ApplyTreeSplit(candidate, p_tree);\n\n    const auto& parent = tree[candidate.nidx];\n    interaction_constraints.Split(candidate.nidx, parent.SplitIndex(), parent.LeftChild(),\n                                  parent.RightChild());\n  }\n\n  GPUExpandEntry InitRoot(DMatrix* p_fmat, RegTree* p_tree) {\n    this->monitor.Start(__func__);\n\n    constexpr bst_node_t kRootNIdx = RegTree::kRoot;\n    auto gpair_it = linalg::tcbegin(this->d_gpair.View(this->ctx_->Device()));\n    GradientPairInt64 root_sum_quantised =\n        dh::Reduce(ctx_->CUDACtx()->CTP(), gpair_it, gpair_it + this->d_gpair.Size(),\n                   GradientPairInt64{}, cuda::std::plus<GradientPairInt64>{});\n    using ReduceT = typename decltype(root_sum_quantised)::ValueT;\n    auto rc = collective::GlobalSum(\n        ctx_, p_fmat->Info(), linalg::MakeVec(reinterpret_cast<ReduceT*>(&root_sum_quantised), 2));\n    collective::SafeColl(rc);\n\n    histogram_.AllocateHistograms(ctx_, {kRootNIdx});\n    std::int32_t k = 0;\n    CHECK_EQ(p_fmat->NumBatches(), this->partitioners_.Size());\n    for (auto const& page : p_fmat->GetBatches<EllpackPage>(ctx_, StaticBatch(true))) {\n      this->BuildHist(page, k, kRootNIdx);\n      ++k;\n    }\n    this->histogram_.AllReduceHist(ctx_, p_fmat->Info(), kRootNIdx, 1);\n\n    // Remember root stats\n    auto root_sum = (*this->quantiser)[0].ToFloatingPoint(root_sum_quantised);\n    p_tree->Stat(kRootNIdx).sum_hess = root_sum.GetHess();\n    auto weight = CalcWeight(param, root_sum);\n    p_tree->Stat(kRootNIdx).base_weight = weight;\n    (*p_tree)[kRootNIdx].SetLeaf(param.learning_rate * weight);\n\n    // Generate first split\n    auto root_entry = this->EvaluateRootSplit(p_fmat, root_sum_quantised);\n\n    this->monitor.Stop(__func__);\n    return root_entry;\n  }\n\n  void UpdateTree(HostDeviceVector<GradientPair>* gpair_all, DMatrix* p_fmat, RegTree* p_tree,\n                  HostDeviceVector<bst_node_t>* p_out_position) {\n    Driver<GPUExpandEntry> driver{param, cuda_impl::kMaxNodeBatchSize};\n\n    p_fmat = this->Reset(gpair_all, p_fmat);\n    driver.Push({this->InitRoot(p_fmat, p_tree)});\n\n    // The set of leaves that can be expanded asynchronously\n    auto expand_set = driver.Pop();\n    while (!expand_set.empty()) {\n      for (auto& candidate : expand_set) {\n        this->ApplySplit(candidate, p_tree);\n      }\n      // Get the candidates we are allowed to expand further\n      // e.g. We do not bother further processing nodes whose children are beyond max depth\n      std::vector<GPUExpandEntry> valid_candidates;\n      std::copy_if(expand_set.begin(), expand_set.end(), std::back_inserter(valid_candidates),\n                   [&](auto const& e) { return driver.IsChildValid(e); });\n\n      // Allocaate children nodes.\n      auto new_candidates = pinned.GetSpan(valid_candidates.size() * 2, GPUExpandEntry{});\n\n      this->PartitionAndBuildHist(p_fmat, expand_set, valid_candidates, p_tree);\n\n      this->EvaluateSplits(p_fmat, valid_candidates, *p_tree, new_candidates);\n      curt::DefaultStream().Sync();\n\n      driver.Push(new_candidates.begin(), new_candidates.end());\n      expand_set = driver.Pop();\n    }\n    // Row partitioner can have lesser nodes than the tree since we skip some leaf\n    // nodes. These nodes are handled in the `FinalisePosition` call. However, a leaf can\n    // be spliable before evaluation but invalid after evaluation as we have more\n    // restrictions like min loss change after evalaution. Therefore, the check condition\n    // is greater than or equal to.\n    if (p_fmat->SingleColBlock()) {\n      CHECK_GE(p_tree->NumNodes(), this->partitioners_.Front()->GetNumNodes());\n    }\n    this->FinalisePosition(p_fmat, p_tree, p_out_position);\n  }\n};\n\nstd::pair<std::shared_ptr<common::HistogramCuts const>, bool> InitBatchCuts(\n    Context const* ctx, DMatrix* p_fmat, BatchParam const& batch,\n    std::vector<bst_idx_t>* p_batch_ptr) {\n  std::vector<bst_idx_t>& batch_ptr = *p_batch_ptr;\n  batch_ptr = {0};\n  std::shared_ptr<common::HistogramCuts const> cuts;\n\n  std::int32_t dense_compressed = -1;\n  for (auto const& page : p_fmat->GetBatches<EllpackPage>(ctx, batch)) {\n    batch_ptr.push_back(page.Size());\n    cuts = page.Impl()->CutsShared();\n    CHECK(cuts->cut_values_.DeviceCanRead());\n    if (dense_compressed != -1) {\n      CHECK_EQ(page.Impl()->IsDenseCompressed(), static_cast<bool>(dense_compressed));\n    }\n    dense_compressed = page.Impl()->IsDenseCompressed();\n  }\n  CHECK(cuts);\n  CHECK_EQ(p_fmat->NumBatches(), batch_ptr.size() - 1);\n  std::partial_sum(batch_ptr.cbegin(), batch_ptr.cend(), batch_ptr.begin());\n  return {cuts, static_cast<bool>(dense_compressed)};\n}\n\nclass GPUHistMaker : public TreeUpdater {\n public:\n  explicit GPUHistMaker(Context const* ctx, ObjInfo const* task)\n      : TreeUpdater(ctx),\n        task_{task},\n        column_sampler_{std::make_shared<common::ColumnSampler>()} {};\n  void Configure(const Args& args) override {\n    // Used in test to count how many configurations are performed\n    LOG(DEBUG) << \"[GPU Hist]: Configure\";\n    hist_maker_param_.UpdateAllowUnknown(args);\n    initialised_ = false;\n\n    monitor_.Init(\"updater_gpu_hist\");\n  }\n\n  void LoadConfig(Json const& in) override {\n    auto const& config = get<Object const>(in);\n    FromJson(config.at(\"hist_train_param\"), &this->hist_maker_param_);\n    initialised_ = false;\n  }\n  void SaveConfig(Json* p_out) const override {\n    auto& out = *p_out;\n    out[\"hist_train_param\"] = ToJson(hist_maker_param_);\n  }\n\n  ~GPUHistMaker() override { dh::GlobalMemoryLogger().Log(); }\n\n  void Update(TrainParam const* param, GradientContainer* in_gpair, DMatrix* p_fmat,\n              common::Span<HostDeviceVector<bst_node_t>> out_position,\n              std::vector<RegTree*> const& trees) override {\n    in_gpair->gpair.SetDevice(this->ctx_->Device());\n\n    // build tree\n    std::size_t t_idx{0};\n    for (xgboost::RegTree* p_tree : trees) {\n      this->InitData(param, p_fmat, p_tree);\n      if (p_tree->IsMultiTarget()) {\n        p_mtimpl_->UpdateTree(in_gpair, p_fmat, task_, p_tree, &out_position[t_idx]);\n      } else {\n        CHECK_EQ(in_gpair->gpair.Shape(1), 1);\n        p_scimpl_->UpdateTree(in_gpair->gpair.Data(), p_fmat, p_tree, &out_position[t_idx]);\n      }\n      this->hist_maker_param_.CheckTreesSynchronized(ctx_, p_tree);\n      ++t_idx;\n    }\n    monitor_.Stop(__func__);\n  }\n\n  void InitDataOnce(TrainParam const* param, DMatrix* p_fmat) {\n    monitor_.Start(__func__);\n    CHECK_GE(ctx_->Ordinal(), 0) << \"Must have at least one device\";\n\n    curt::SetDevice(ctx_->Ordinal());\n    p_fmat->Info().feature_types.SetDevice(ctx_->Device());\n\n    std::vector<bst_idx_t> batch_ptr;\n    auto batch = HistBatch(*param);\n    auto [cuts, dense_compressed] = InitBatchCuts(ctx_, p_fmat, batch, &batch_ptr);\n\n    this->p_scimpl_ =\n        std::make_unique<GPUHistMakerDevice>(ctx_, *param, &hist_maker_param_, column_sampler_,\n                                             p_fmat->Info(), batch_ptr, cuts, dense_compressed);\n    this->p_mtimpl_ = std::make_unique<cuda_impl::MultiTargetHistMaker>(\n        this->ctx_, *param, &hist_maker_param_, this->column_sampler_, batch_ptr, cuts,\n        dense_compressed);\n\n    p_last_fmat_ = p_fmat;\n    initialised_ = true;\n    monitor_.Stop(__func__);\n  }\n\n  void InitData(TrainParam const* param, DMatrix* dmat, RegTree const* p_tree) {\n    monitor_.Start(__func__);\n    if (!initialised_) {\n      this->InitDataOnce(param, dmat);\n    }\n    p_last_tree_ = p_tree;\n    CHECK(hist_maker_param_.GetInitialised());\n    monitor_.Stop(__func__);\n  }\n\n  void UpdateTree(TrainParam const* param, linalg::Matrix<GradientPair>* gpair, DMatrix* p_fmat,\n                  RegTree* p_tree, HostDeviceVector<bst_node_t>* p_out_position) {\n    this->InitData(param, p_fmat, p_tree);\n    gpair->SetDevice(ctx_->Device());\n    auto gpair_hdv = gpair->Data();\n    CHECK(!p_tree->IsMultiTarget());\n    p_scimpl_->UpdateTree(gpair_hdv, p_fmat, p_tree, p_out_position);\n  }\n\n  bool UpdatePredictionCache(DMatrix const* p_fmat,\n                             common::Span<HostDeviceVector<bst_node_t>> out_position,\n                             linalg::MatrixView<float> p_out_preds) override {\n    if (p_scimpl_ == nullptr || p_last_fmat_ == nullptr || p_last_fmat_ != p_fmat) {\n      return false;\n    }\n    if (out_position.size() > 1) {\n      return false;\n    }\n\n    xgboost_NVTX_FN_RANGE();\n\n    if (this->p_last_tree_->IsMultiTarget()) {\n      CHECK(p_mtimpl_);\n      return p_mtimpl_->UpdatePredictionCache(p_out_preds, out_position, p_last_tree_);\n    } else {\n      return p_scimpl_->UpdatePredictionCache(p_out_preds, out_position, p_last_tree_);\n    }\n  }\n\n  [[nodiscard]] char const* Name() const override { return \"grow_gpu_hist\"; }\n  [[nodiscard]] bool HasNodePosition() const override { return true; }\n\n private:\n  bool initialised_{false};\n\n  // Scalar tree implementation\n  std::unique_ptr<GPUHistMakerDevice> p_scimpl_;\n  // Vector tree implementation\n  std::unique_ptr<cuda_impl::MultiTargetHistMaker> p_mtimpl_;\n\n  HistMakerTrainParam hist_maker_param_;\n\n  DMatrix* p_last_fmat_{nullptr};\n  RegTree const* p_last_tree_{nullptr};\n  ObjInfo const* task_{nullptr};\n\n  common::Monitor monitor_;\n  std::shared_ptr<common::ColumnSampler> column_sampler_;\n};\n\nXGBOOST_REGISTER_TREE_UPDATER(GPUHistMaker, \"grow_gpu_hist\")\n    .describe(\"Grow tree with GPU.\")\n    .set_body([](Context const* ctx, ObjInfo const* task) { return new GPUHistMaker(ctx, task); });\n\nclass GPUGlobalApproxMaker : public TreeUpdater {\n public:\n  explicit GPUGlobalApproxMaker(Context const* ctx, ObjInfo const* task)\n      : TreeUpdater(ctx),\n        task_{task},\n        column_sampler_{std::make_shared<common::ColumnSampler>()} {};\n  void Configure(Args const& args) override {\n    // Used in test to count how many configurations are performed\n    LOG(DEBUG) << \"[GPU Approx]: Configure\";\n    hist_maker_param_.UpdateAllowUnknown(args);\n    initialised_ = false;\n\n    monitor_.Init(this->Name());\n  }\n\n  void LoadConfig(Json const& in) override {\n    auto const& config = get<Object const>(in);\n    FromJson(config.at(\"hist_train_param\"), &this->hist_maker_param_);\n    initialised_ = false;\n  }\n  void SaveConfig(Json* p_out) const override {\n    auto& out = *p_out;\n    out[\"hist_train_param\"] = ToJson(hist_maker_param_);\n  }\n  ~GPUGlobalApproxMaker() override { dh::GlobalMemoryLogger().Log(); }\n\n  void Update(TrainParam const* param, GradientContainer* in_gpair, DMatrix* p_fmat,\n              common::Span<HostDeviceVector<bst_node_t>> out_position,\n              const std::vector<RegTree*>& trees) override {\n    monitor_.Start(__func__);\n\n    this->InitDataOnce(p_fmat);\n    auto gpair = in_gpair->FullGradOnly();\n    // build tree\n    hess_.resize(gpair->Size());\n    auto hess = dh::ToSpan(hess_);\n\n    gpair->SetDevice(ctx_->Device());\n    auto d_gpair = gpair->Data()->ConstDeviceSpan();\n    auto cuctx = ctx_->CUDACtx();\n    thrust::transform(cuctx->CTP(), dh::tcbegin(d_gpair), dh::tcend(d_gpair), dh::tbegin(hess),\n                      [=] XGBOOST_DEVICE(GradientPair const& g) { return g.GetHess(); });\n\n    auto const& info = p_fmat->Info();\n    info.feature_types.SetDevice(ctx_->Device());\n\n    std::vector<bst_idx_t> batch_ptr;\n    auto batch = ApproxBatch(*param, hess, *task_);\n    auto [cuts, dense_compressed] = InitBatchCuts(ctx_, p_fmat, batch, &batch_ptr);\n    batch.regen = false;  // Regen only at the beginning of the iteration.\n\n    this->maker_ =\n        std::make_unique<GPUHistMakerDevice>(ctx_, *param, &hist_maker_param_, column_sampler_,\n                                             p_fmat->Info(), batch_ptr, cuts, dense_compressed);\n\n    std::size_t t_idx{0};\n    for (xgboost::RegTree* tree : trees) {\n      this->UpdateTree(gpair->Data(), p_fmat, tree, &out_position[t_idx]);\n      this->hist_maker_param_.CheckTreesSynchronized(ctx_, tree);\n      ++t_idx;\n    }\n\n    monitor_.Stop(__func__);\n  }\n\n  void InitDataOnce(DMatrix* p_fmat) {\n    if (this->initialised_) {\n      return;\n    }\n\n    monitor_.Start(__func__);\n    CHECK(ctx_->IsCUDA()) << error::InvalidCUDAOrdinal();\n\n    p_last_fmat_ = p_fmat;\n    initialised_ = true;\n    monitor_.Stop(__func__);\n  }\n\n  void InitData(DMatrix* p_fmat, RegTree const* p_tree) {\n    this->InitDataOnce(p_fmat);\n    p_last_tree_ = p_tree;\n    CHECK(hist_maker_param_.GetInitialised());\n  }\n\n  void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* p_fmat, RegTree* p_tree,\n                  HostDeviceVector<bst_node_t>* p_out_position) {\n    monitor_.Start(\"InitData\");\n    this->InitData(p_fmat, p_tree);\n    monitor_.Stop(\"InitData\");\n\n    gpair->SetDevice(ctx_->Device());\n    maker_->UpdateTree(gpair, p_fmat, p_tree, p_out_position);\n  }\n\n  bool UpdatePredictionCache(DMatrix const* p_fmat,\n                             common::Span<HostDeviceVector<bst_node_t>> out_position,\n                             linalg::MatrixView<float> p_out_preds) override {\n    if (maker_ == nullptr || p_last_fmat_ == nullptr || p_last_fmat_ != p_fmat) {\n      return false;\n    }\n    if (out_position.size() > 1) {\n      return false;\n    }\n    monitor_.Start(__func__);\n    bool result = maker_->UpdatePredictionCache(p_out_preds, out_position, p_last_tree_);\n    monitor_.Stop(__func__);\n    return result;\n  }\n\n  [[nodiscard]] char const* Name() const override { return \"grow_gpu_approx\"; }\n  [[nodiscard]] bool HasNodePosition() const override { return true; }\n\n private:\n  bool initialised_{false};\n\n  HistMakerTrainParam hist_maker_param_;\n  dh::device_vector<float> hess_;\n  std::shared_ptr<common::ColumnSampler> column_sampler_;\n  std::unique_ptr<GPUHistMakerDevice> maker_;\n\n  DMatrix* p_last_fmat_{nullptr};\n  RegTree const* p_last_tree_{nullptr};\n  ObjInfo const* task_{nullptr};\n\n  common::Monitor monitor_;\n};\n\nXGBOOST_REGISTER_TREE_UPDATER(GPUApproxMaker, \"grow_gpu_approx\")\n    .describe(\"Grow tree with GPU.\")\n    .set_body([](Context const* ctx, ObjInfo const* task) {\n      return new GPUGlobalApproxMaker(ctx, task);\n    });\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "src/tree/updater_gpu_hist.cuh",
    "content": "/**\n * Copyright 2025-2026, XGBoost contributors\n */\n#pragma once\n#include <thrust/reduce.h>   // for reduce_by_key\n#include <thrust/version.h>  // for THRUST_MAJOR_VERSION\n\n#include <memory>  // for unique_ptr\n#include <vector>  // for vector\n\n#include \"../collective/communicator-inl.h\"    // for IsDistributed\n#include \"../common/device_helpers.cuh\"        // for MakeTransformIterator\n#include \"../common/nvtx_utils.h\"              // for xgboost_NVTX_FN_RANGE\n#include \"../common/random.h\"                  // for ColumnSampler\n#include \"constraints.cuh\"                     // for FeatureInteractionConstraintDevice\n#include \"driver.h\"                            // for Driver\n#include \"gpu_hist/feature_groups.cuh\"         // for FeatureGroups\n#include \"gpu_hist/histogram.cuh\"              // for DeviceHistogramBuilder\n#include \"gpu_hist/leaf_sum.cuh\"               // for LeafGradSum\n#include \"gpu_hist/multi_evaluate_splits.cuh\"  // for MultiHistEvaluator\n#include \"gpu_hist/row_partitioner.cuh\"        // for RowPartitioner\n#include \"gpu_hist/sampler.cuh\"                // for GradientBasedSampler\n#include \"hist/hist_param.h\"                   // for HistMakerTrainParam\n#include \"sample_position.h\"                   // for SamplePosition\n#include \"tree_view.h\"                         // for MultiTargetTreeView\n#include \"xgboost/base.h\"                      // for bst_idx_t\n#include \"xgboost/context.h\"                   // for Context\n#include \"xgboost/gradient.h\"                  // for GradientContainer\n#include \"xgboost/host_device_vector.h\"        // for HostDeviceVector\n#include \"xgboost/tree_model.h\"                // for RegTree\n\nnamespace xgboost::tree::cuda_impl {\n// Use a large number to handle external memory with deep trees.\ninline constexpr std::size_t kMaxNodeBatchSize = 1024;\nusing xgboost::cuda_impl::StaticBatch;\n\ntemplate <typename GoLeftOp>\nstruct GoLeftWrapperOp {\n  GoLeftOp go_left;\n  template <typename NodeSplitData>\n  __device__ bool operator()(RowIndexT ridx, int /*nidx_in_batch*/,\n                             const NodeSplitData& data) const {\n    return go_left(ridx, data);\n  }\n};\n\n/** @brief Encode sampling information in the position. */\nstruct EncodeOp {\n  linalg::MatrixView<GradientPairInt64 const> d_gpair;\n  [[nodiscard]] __device__ bst_node_t operator()(RowIndexT ridx, bst_node_t nidx) const {\n    // Check the first target - all targets in a row have the same sampling decision\n    bool is_sampled = d_gpair(ridx, 0).GetQuantisedHess() != 0;\n    return SamplePosition::Encode(nidx, is_sampled);\n  }\n};\n\n// Some nodes we will manually compute histograms, others we will do by subtraction\ntemplate <typename TreeView, typename ExpandEntry, typename HessComp>\nvoid AssignNodes(TreeView const& tree, std::vector<ExpandEntry> const& candidates,\n                 common::Span<bst_node_t> nodes_to_build, common::Span<bst_node_t> nodes_to_sub,\n                 HessComp&& compare_hess) {\n  std::size_t nidx_in_set{0};\n  auto p_build_nidx = nodes_to_build.data();\n  auto p_sub_nidx = nodes_to_sub.data();\n  for (auto& e : candidates) {\n    // Decide whether to build the left histogram or right histogram Use sum of Hessian as\n    // a heuristic to select node with fewest training instances This optimization is for\n    // distributed training to avoid an allreduce call for synchronizing the number of\n    // instances for each node.\n    bool fewer_right = compare_hess(e);\n    if (fewer_right) {\n      p_build_nidx[nidx_in_set] = tree.RightChild(e.nidx);\n      p_sub_nidx[nidx_in_set] = tree.LeftChild(e.nidx);\n    } else {\n      p_build_nidx[nidx_in_set] = tree.LeftChild(e.nidx);\n      p_sub_nidx[nidx_in_set] = tree.RightChild(e.nidx);\n    }\n    ++nidx_in_set;\n  }\n}\n\ninline void CalcRootSum(Context const* ctx, linalg::MatrixView<GradientPairInt64> d_gpair,\n                        common::Span<GradientPairInt64> root_sum) {\n  auto n_samples = d_gpair.Shape(0);\n  auto n_targets = d_gpair.Shape(1);\n  // Calculate the root sum\n  CHECK_EQ(n_targets, root_sum.size());\n\n  auto key_it = dh::MakeIndexTransformIter([=] XGBOOST_DEVICE(std::size_t i) {\n    auto cidx = i / n_samples;\n    return cidx;\n  });\n  auto val_it = dh::MakeIndexTransformIter([=] XGBOOST_DEVICE(std::size_t i) -> GradientPairInt64 {\n    auto cidx = i / n_samples;\n    auto ridx = i % n_samples;\n    auto g = d_gpair(ridx, cidx);\n    return g;\n  });\n  thrust::reduce_by_key(ctx->CUDACtx()->CTP(), key_it, key_it + d_gpair.Size(), val_it,\n                        thrust::make_discard_iterator(), dh::tbegin(root_sum));\n}\n\n/**\n * @brief Implementation for vector leaf.\n */\nclass MultiTargetHistMaker {\n private:\n  Context const* ctx_;\n\n  TrainParam const param_;\n  std::vector<bst_idx_t> const batch_ptr_;\n  Sampler sampler_;\n\n  RowPartitionerBatches partitioners_;\n\n  HistMakerTrainParam const* hist_param_;\n  std::shared_ptr<common::HistogramCuts const> const cuts_;\n  std::unique_ptr<FeatureGroups> feature_groups_;\n  DeviceHistogramBuilder histogram_;\n  std::unique_ptr<GradientQuantiserGroup> split_quantizer_;\n  std::unique_ptr<GradientQuantiserGroup> value_quantizer_;\n\n  MultiHistEvaluator evaluator_;\n  std::shared_ptr<common::ColumnSampler> column_sampler_;\n  std::unique_ptr<FeatureInteractionConstraintDevice> interaction_constraints_;\n\n  // Gradient used for building the tree structure\n  linalg::Matrix<GradientPairInt64> split_gpair_;\n  // Gradient used for calculating the leaf values\n  linalg::Matrix<GradientPair> value_gpair_;\n\n  dh::PinnedMemory pinned_;\n\n  void BuildHist(EllpackPage const& page, std::int32_t k, bst_node_t nidx) {\n    this->BuildHist(page, k, std::vector{nidx});\n  }\n\n  void BuildHist(EllpackPage const& page, std::int32_t k, std::vector<bst_node_t> build_nodes) {\n    xgboost_NVTX_FN_RANGE();\n\n    auto d_gpair = this->split_gpair_.View(this->ctx_->Device());\n    CHECK(!this->partitioners_.Empty());\n\n    auto acc = page.Impl()->GetDeviceEllpack(this->ctx_, {});\n\n    std::vector<common::Span<GradientPairInt64>> h_hists;\n    std::vector<common::Span<RowIndexT const>> h_ridxs;\n    std::vector<std::size_t> h_sizes_csum{0};\n    std::size_t nidx_in_set = 0;\n    // TODO(jiamingy): We can assume the histogram buffers contiguous if we don't skip\n    // nodes here.\n    for (auto nidx : build_nodes) {\n      auto d_ridx = this->partitioners_.At(k)->GetRows(nidx);\n      if (d_ridx.empty()) {\n        // Node has no rows - can happen with external memory when all rows go to the\n        // sibling node.\n        CHECK_GT(this->batch_ptr_.size(), 2);\n        continue;\n      }\n      h_ridxs.push_back(d_ridx);\n      auto d_hist = histogram_.GetNodeHistogram(nidx);\n      h_hists.push_back(d_hist);\n\n      h_sizes_csum.push_back(d_ridx.size() + h_sizes_csum[nidx_in_set]);\n\n      ++nidx_in_set;\n    }\n\n    dh::device_vector<common::Span<GradientPairInt64>> hists{h_hists};\n    dh::device_vector<common::Span<RowIndexT const>> ridxs{h_ridxs};\n\n    this->histogram_.BuildHistogram(this->ctx_, acc,\n                                    this->feature_groups_->DeviceAccessor(this->ctx_->Device()),\n                                    d_gpair, dh::ToSpan(ridxs), dh::ToSpan(hists), h_sizes_csum);\n  }\n\n  auto MakeSharedInputs(bst_feature_t max_active_feature) const {\n    common::Span<GradientQuantiser const> d_roundings = this->split_quantizer_->DeviceSpan();\n    GPUTrainingParam d_param{this->param_};\n    return MultiEvaluateSplitSharedInputs{d_roundings,\n                                          this->cuts_->cut_ptrs_.ConstDeviceSpan(),\n                                          this->cuts_->cut_values_.ConstDevicePointer(),\n                                          this->param_.max_bin,\n                                          max_active_feature,\n                                          d_param};\n  }\n\n public:\n  void Reset(linalg::Matrix<GradientPair>* gpair_all, DMatrix* p_fmat) {\n    /**\n     * Initialize the partitioners\n     */\n    partitioners_.Reset(this->ctx_, batch_ptr_);\n\n    auto const& info = p_fmat->Info();\n    this->column_sampler_->Init(ctx_, info.num_col_, info.feature_weights, param_.colsample_bynode,\n                                param_.colsample_bylevel, param_.colsample_bytree);\n\n    /**\n     * Initialize the gradient matrix\n     */\n    auto in_gpair = gpair_all->View(ctx_->Device());\n    CHECK(in_gpair.CContiguous());\n\n    this->split_quantizer_ =\n        std::make_unique<GradientQuantiserGroup>(this->ctx_, in_gpair, p_fmat->Info());\n    CalcQuantizedGpairs(this->ctx_, in_gpair, this->split_quantizer_->DeviceSpan(),\n                        &this->split_gpair_);\n\n    // Sampling\n    this->sampler_.Sample(this->ctx_, this->split_gpair_.View(this->ctx_->Device()),\n                          this->split_quantizer_->DeviceSpan());\n    if (!this->value_gpair_.Empty()) {\n      this->value_quantizer_ = std::make_unique<GradientQuantiserGroup>(\n          this->ctx_, value_gpair_.View(ctx_->Device()), p_fmat->Info());\n      this->sampler_.ApplySampling(this->ctx_, this->split_gpair_, &this->value_gpair_);\n    }\n\n    /**\n     * Initialize the histogram\n     */\n    bool force_global = false;\n    bst_idx_t n_split_targets = gpair_all->Shape(1);\n    auto n_total_bins = cuts_->TotalBins() * static_cast<bst_idx_t>(n_split_targets);\n    CHECK_LT(n_total_bins, std::numeric_limits<bst_bin_t>::max())\n        << \"Too many histogram bins: n_total_bins = max_bin * n_features * n_targets\";\n    histogram_.Reset(this->ctx_, this->hist_param_->MaxCachedHistNodes(ctx_->Device()),\n                     cuts_->TotalBins() * n_split_targets, force_global);\n  }\n\n  [[nodiscard]] MultiExpandEntry InitRoot(DMatrix* p_fmat, RegTree* p_tree) {\n    xgboost_NVTX_FN_RANGE();\n\n    auto d_gpair = split_gpair_.View(ctx_->Device());\n    auto n_targets = d_gpair.Shape(1);\n\n    // Calculate the root sum\n    this->evaluator_.AllocNodeSum(RegTree::kRoot, n_targets);\n    auto d_root_sum = this->evaluator_.GetNodeSum(RegTree::kRoot, n_targets);\n    CalcRootSum(this->ctx_, d_gpair, d_root_sum);\n\n    // Build the root histogram.\n    histogram_.AllocateHistograms(ctx_, {RegTree::kRoot});\n\n    CHECK_EQ(p_fmat->NumBatches(), this->partitioners_.Size());\n    std::int32_t k = 0;\n    for (auto const& page : p_fmat->GetBatches<EllpackPage>(ctx_, StaticBatch(true))) {\n      this->BuildHist(page, k, RegTree::kRoot);\n      ++k;\n    }\n\n    // Evaluate root split\n    auto node_hist = this->histogram_.GetNodeHistogram(RegTree::kRoot);\n    auto sampled_features = column_sampler_->GetFeatureSet(ctx_, 0);\n    common::Span<bst_feature_t const> feature_set =\n        interaction_constraints_->Query(sampled_features->ConstDeviceSpan(), RegTree::kRoot);\n    MultiEvaluateSplitInputs input{RegTree::kRoot, p_tree->GetDepth(RegTree::kRoot), d_root_sum,\n                                   feature_set, node_hist};\n\n    auto shared_inputs = MakeSharedInputs(static_cast<bst_feature_t>(feature_set.size()));\n    auto entry = this->evaluator_.EvaluateSingleSplit(ctx_, input, shared_inputs);\n    auto weights = this->evaluator_.GetNodeWeights(n_targets);\n    // Root's sum_hess is the sum of left and right child hessians\n    float root_sum_hess = static_cast<float>(entry.left_sum + entry.right_sum);\n    p_tree->SetRoot(linalg::MakeVec(this->ctx_->Device(), weights.Base(RegTree::kRoot)),\n                    root_sum_hess);\n\n    return entry;\n  }\n\n  void ApplySplit(std::vector<MultiExpandEntry> const& h_candidates, RegTree* p_tree) {\n    xgboost_NVTX_FN_RANGE();\n\n    CHECK(!h_candidates.empty());\n    auto n_targets = this->split_gpair_.Shape(1);\n\n    // Get weights by node ID from the evaluator's buffer.\n    //\n    // TODO(jiamingy): Avoid device to host copies.\n    for (auto const& candidate : h_candidates) {\n      std::vector<float> h_base_weight, h_left_weight, h_right_weight;\n      this->evaluator_.CopyNodeWeightsToHost(candidate.nidx, n_targets, &h_base_weight,\n                                             &h_left_weight, &h_right_weight);\n      // Get loss_chg from the split, and sum hessians for parent and children\n      float loss_chg = candidate.split.loss_chg;\n      float left_sum = static_cast<float>(candidate.left_sum);\n      float right_sum = static_cast<float>(candidate.right_sum);\n      float sum_hess = left_sum + right_sum;\n      p_tree->ExpandNode(candidate.nidx, candidate.split.findex, candidate.split.fvalue,\n                         candidate.split.dir == kLeftDir, linalg::MakeVec(h_base_weight),\n                         linalg::MakeVec(h_left_weight), linalg::MakeVec(h_right_weight), loss_chg,\n                         sum_hess, left_sum, right_sum);\n    }\n\n    dh::device_vector<MultiExpandEntry> candidates{h_candidates};\n    this->evaluator_.ApplyTreeSplit(this->ctx_, p_tree, dh::ToSpan(candidates), n_targets);\n  }\n  /**\n   * @brief Calculate the leaf weight based on the node sum for each leaf.\n   *\n   * This method helps support reduced gradient. Weights in p_tree are calculated using\n   * split gradient. This function replaces those weights with new weights calculated from\n   * value gradient.\n   */\n  void ExpandTreeLeaf(RegTree* p_tree) const {\n    auto n_leaves = static_cast<bst_target_t>(p_tree->GetNumLeaves());\n    auto out_sum = linalg::Constant(ctx_, GradientPairInt64{}, n_leaves, p_tree->NumTargets());\n    auto d_out_sum = out_sum.View(this->ctx_->Device());\n\n    auto d_full_grad = this->value_gpair_.View(this->ctx_->Device());\n    auto d_roundings = this->value_quantizer_->DeviceSpan();\n    // Node indices for all leaves\n    std::vector<bst_node_t> leaves_idx(n_leaves);\n\n#if THRUST_MAJOR_VERSION >= 3\n    // do nothing\n#else\n    CHECK_EQ(this->partitioners_.Size(), 1)\n        << \"External memory not implemented for old CCCL versions. (thrust < 3.0)\";\n#endif\n    std::int32_t batch_idx = 0;\n    for (auto const& p_part : this->partitioners_) {\n      auto leaves = p_part->GetLeaves();\n      CHECK_EQ(leaves.size(), n_leaves);\n      LeafGradSum(this->ctx_, leaves, d_roundings, p_part->GetRows(), d_full_grad, d_out_sum);\n      if (batch_idx == 0) {\n        // Populate the node indices\n        std::transform(leaves.begin(), leaves.end(), leaves_idx.begin(),\n                       [](LeafInfo const& leaf) { return leaf.nidx; });\n      }\n      // Sanity check: all partitioners should have the same set of leaves\n      if (this->hist_param_->debug_synchronize) {\n        auto it = common::MakeIndexTransformIter([&](std::size_t i) { return leaves.at(i).nidx; });\n        CHECK(std::equal(it, it + n_leaves, leaves_idx.cbegin()));\n      }\n      ++batch_idx;\n    }\n\n    auto param = GPUTrainingParam{this->param_};\n    auto out_weight = linalg::Empty<float>(this->ctx_, n_leaves, p_tree->NumTargets());\n    // Use full value gradient for leaf values.\n    LeafWeight(this->ctx_, param, this->value_quantizer_->DeviceSpan(),\n               out_sum.View(this->ctx_->Device()), out_weight.View(this->ctx_->Device()));\n\n    p_tree->SetLeaves(leaves_idx, out_weight.Data()->ConstHostSpan());\n  }\n\n  struct NodeSplitData {\n    bst_node_t nidx;\n  };\n\n  struct PartitionNodes {\n    std::vector<bst_node_t> nidx;\n    std::vector<bst_node_t> left_nidx;\n    std::vector<bst_node_t> right_nidx;\n    std::vector<NodeSplitData> split_data;\n\n    explicit PartitionNodes(std::size_t n_candidates)\n        : nidx(n_candidates),\n          left_nidx(n_candidates),\n          right_nidx(n_candidates),\n          split_data(n_candidates) {}\n  };\n\n  PartitionNodes CreatePartitionNodes(RegTree const* p_tree,\n                                      std::vector<MultiExpandEntry> const& candidates) {\n    PartitionNodes nodes(candidates.size());\n    auto tree = p_tree->HostMtView();\n    // TODO(jiamingy) Avoid pulling the host tree.\n    for (std::size_t i = 0, n = candidates.size(); i < n; i++) {\n      auto const& e = candidates[i];\n      auto split_type = tree.SplitType(e.nidx);\n      nodes.nidx.at(i) = e.nidx;\n      nodes.left_nidx[i] = tree.LeftChild(e.nidx);\n      nodes.right_nidx[i] = tree.RightChild(e.nidx);\n      nodes.split_data[i] = NodeSplitData{e.nidx};\n\n      CHECK_EQ(split_type == FeatureType::kCategorical, e.split.is_cat);\n    }\n    return nodes;\n  }\n\n  // TODO(jiamingy): Merge this with the single target version. Make sure copying tree\n  // data doesn't block external memory execution.\n  //\n  // Pulling in the device view has negative performance impact as we need to resize the\n  // tree internal buffers repeatedly, which invokes many small data copies and memory\n  // allocations.\n  template <typename Accessor>\n  struct GoLeftOp {\n    Accessor d_matrix;\n    MultiTargetTreeView tree;\n    __device__ bool operator()(RowIndexT ridx, NodeSplitData const& data) const {\n      // given a row index, returns the node id it belongs to\n      float cut_value = d_matrix.GetFvalue(ridx, tree.SplitIndex(data.nidx));\n      // Missing value\n      bool go_left = true;\n      if (isnan(cut_value)) {\n        go_left = tree.DefaultLeft(data.nidx);\n      } else {\n        if (tree.SplitType(data.nidx) == FeatureType::kCategorical) {\n          go_left = common::Decision(tree.NodeCats(data.nidx), cut_value);\n        } else {\n          go_left = cut_value <= tree.SplitCond(data.nidx);\n        }\n      }\n      return go_left;\n    }\n  };\n\n  void ReduceHist(DMatrix* p_fmat, std::vector<MultiExpandEntry> const& candidates,\n                  std::vector<bst_node_t> const& build_nidx,\n                  std::vector<bst_node_t> const& subtraction_nidx) {\n    if (candidates.empty()) {\n      return;\n    }\n\n    xgboost_NVTX_FN_RANGE();\n\n    // Perform subtraction for sibling nodes\n    auto need_build = this->histogram_.SubtractHist(ctx_, candidates, build_nidx, subtraction_nidx);\n    if (need_build.empty()) {\n      return;\n    }\n\n    // Build the nodes that can not obtain the histogram using subtraction. This is the slow path.\n    std::int32_t k = 0;\n    for (auto const& page : p_fmat->GetBatches<EllpackPage>(ctx_, StaticBatch(true))) {\n      this->BuildHist(page, k, need_build);\n      ++k;\n    }\n  }\n\n  void PartitionAndBuildHist(DMatrix* p_fmat, std::vector<MultiExpandEntry> const& expand_set,\n                             std::vector<MultiExpandEntry> const& candidates,\n                             RegTree const* p_tree) {\n    if (expand_set.empty()) {\n      return;\n    }\n\n    xgboost_NVTX_FN_RANGE();\n\n    CHECK_LE(candidates.size(), expand_set.size());\n    // TODO(jiamingy): Implement finalize partition using candidates instead of expand_set when\n    // performing in-core training.\n\n    // Prepare for update partition\n    auto nodes = this->CreatePartitionNodes(p_tree, expand_set);\n\n    std::vector<bst_node_t> build_nidx(candidates.size());\n    std::vector<bst_node_t> subtraction_nidx(candidates.size());\n    auto mt_tree = p_tree->HostMtView();\n    AssignNodes(mt_tree, candidates, build_nidx, subtraction_nidx, [](MultiExpandEntry const& e) {\n      bool fewer_right = e.right_sum < e.left_sum;\n      return fewer_right;\n    });\n\n    // TODO(jiamingy): Define threshold for deep trees.\n    bool prefetch_copy = !build_nidx.empty();\n\n    histogram_.AllocateHistograms(this->ctx_, build_nidx, subtraction_nidx);\n\n    // Pull to device (stats not needed for partitioning)\n    mt_tree = MultiTargetTreeView{this->ctx_->Device(), false, p_tree};\n\n    std::int32_t k{0};\n    for (auto const& page :\n         p_fmat->GetBatches<EllpackPage>(this->ctx_, StaticBatch(prefetch_copy))) {\n      page.Impl()->Visit(this->ctx_, {}, [&](auto&& d_acc) {\n        using Acc = std::remove_reference_t<decltype(d_acc)>;\n        using GoLeft = GoLeftOp<Acc>;\n        auto go_left = GoLeft{d_acc, mt_tree};\n\n        // Partition rows.\n        partitioners_.UpdatePositionBatch(this->ctx_, k, nodes.nidx, nodes.left_nidx,\n                                          nodes.right_nidx, nodes.split_data,\n                                          GoLeftWrapperOp<GoLeft>{go_left});\n\n        // Build histograms.\n        if (!build_nidx.empty()) {\n          this->BuildHist(page, k, build_nidx);\n        }\n      });\n      ++k;\n    }\n\n    this->ReduceHist(p_fmat, expand_set, build_nidx, subtraction_nidx);\n  }\n\n  void EvaluateSplits(std::vector<MultiExpandEntry> const& candidates, RegTree const& tree,\n                      common::Span<MultiExpandEntry> pinned_candidates_out) {\n    if (candidates.empty()) {\n      return;\n    }\n    xgboost_NVTX_FN_RANGE();\n\n    dh::device_vector<MultiEvaluateSplitInputs> inputs(2 * candidates.size());\n    dh::device_vector<MultiExpandEntry> outputs(2 * candidates.size());\n\n    auto mt_tree = tree.HostMtView();\n    std::vector<MultiEvaluateSplitInputs> h_node_inputs(candidates.size() * 2);\n\n    // Store the feature set ptrs so they don't go out of scope before the kernel is called\n    std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> feature_sets;\n\n    auto n_targets = this->split_gpair_.Shape(1);\n    bst_feature_t max_active_feature = 0;\n    // Track max node ID for buffer allocation\n    bst_node_t max_nidx = 0;\n\n    for (std::size_t i = 0; i < candidates.size(); i++) {\n      auto candidate = candidates.at(i);\n      bst_node_t left_nidx = mt_tree.LeftChild(candidate.nidx);\n      bst_node_t right_nidx = mt_tree.RightChild(candidate.nidx);\n\n      auto left_sampled_features = column_sampler_->GetFeatureSet(ctx_, tree.GetDepth(left_nidx));\n      feature_sets.emplace_back(left_sampled_features);\n      common::Span<bst_feature_t const> left_feature_set =\n          interaction_constraints_->Query(left_sampled_features->ConstDeviceSpan(), left_nidx);\n\n      auto right_sampled_features = column_sampler_->GetFeatureSet(ctx_, tree.GetDepth(right_nidx));\n      feature_sets.emplace_back(right_sampled_features);\n      common::Span<bst_feature_t const> right_feature_set =\n          interaction_constraints_->Query(right_sampled_features->ConstDeviceSpan(), right_nidx);\n\n      // Make sure no allocation is happening.\n      // The parent sum is calculated in the last apply tree split.\n      auto left = MultiEvaluateSplitInputs{\n          left_nidx, candidate.depth + 1, this->evaluator_.GetNodeSum(left_nidx, n_targets),\n          left_feature_set, histogram_.GetNodeHistogram(left_nidx)};\n      auto right = MultiEvaluateSplitInputs{\n          right_nidx, candidate.depth + 1, this->evaluator_.GetNodeSum(right_nidx, n_targets),\n          right_feature_set, histogram_.GetNodeHistogram(right_nidx)};\n      h_node_inputs[i * 2] = left;\n      h_node_inputs[i * 2 + 1] = right;\n\n      max_active_feature = std::max({left_feature_set.size(), right_feature_set.size(),\n                                     static_cast<std::size_t>(max_active_feature)});\n      max_nidx = std::max({max_nidx, left_nidx, right_nidx});\n    }\n    dh::safe_cuda(cudaMemcpyAsync(inputs.data().get(), h_node_inputs.data(),\n                                  common::SizeBytes<MultiEvaluateSplitInputs>(h_node_inputs.size()),\n                                  cudaMemcpyDefault, ctx_->CUDACtx()->Stream()));\n\n    auto shared_inputs = MakeSharedInputs(max_active_feature);\n    this->evaluator_.EvaluateSplits(this->ctx_, dh::ToSpan(inputs), shared_inputs, max_nidx,\n                                    dh::ToSpan(outputs));\n    dh::safe_cuda(cudaMemcpyAsync(pinned_candidates_out.data(), outputs.data().get(),\n                                  pinned_candidates_out.size_bytes(), cudaMemcpyDefault,\n                                  ctx_->CUDACtx()->Stream()));\n  }\n\n  void FinalizePosition(DMatrix* p_fmat, RegTree const* p_tree,\n                        HostDeviceVector<bst_node_t>* p_out_position) {\n    xgboost_NVTX_FN_RANGE();\n\n    p_out_position->SetDevice(ctx_->Device());\n    p_out_position->Resize(p_fmat->Info().num_row_);\n    auto d_out_position = p_out_position->DeviceSpan();\n    auto d_gpair = this->split_gpair_.View(this->ctx_->Device());\n\n    for (std::size_t k = 0; k < partitioners_.Size(); ++k) {\n      auto& part = partitioners_.At(k);\n      CHECK_EQ(part->GetNumNodes(), p_tree->NumNodes());\n      auto base_rowid = batch_ptr_[k];\n      auto n_samples = batch_ptr_.at(k + 1) - base_rowid;\n      part->FinalisePosition(ctx_, d_out_position.subspan(base_rowid, n_samples), base_rowid,\n                             EncodeOp{d_gpair});\n    }\n  }\n\n  bool UpdatePredictionCache(linalg::MatrixView<float> out_preds_d,\n                             common::Span<HostDeviceVector<bst_node_t>> out_position,\n                             RegTree const* p_tree) {\n    xgboost_NVTX_FN_RANGE();\n\n    CHECK_EQ(out_position.size(), 1);\n    auto d_position = out_position.front().ConstDeviceSpan();\n    CHECK_EQ(out_preds_d.Shape(0), d_position.size());\n    auto mt_tree = MultiTargetTreeView{this->ctx_->Device(), false, p_tree};\n    thrust::for_each_n(this->ctx_->CUDACtx()->CTP(), thrust::make_counting_iterator(0ul),\n                       out_preds_d.Size(), [=] XGBOOST_DEVICE(std::size_t i) mutable {\n                         auto [sample_idx, target_idx] =\n                             linalg::UnravelIndex(i, out_preds_d.Shape());\n                         bst_node_t nidx = d_position[sample_idx];\n                         nidx = SamplePosition::Decode(nidx);\n                         auto weight = mt_tree.LeafValue(nidx);\n                         out_preds_d(sample_idx, target_idx) += weight(target_idx);\n                       });\n    return true;\n  }\n\n  void UpdateTree(GradientContainer* gpair, DMatrix* p_fmat, ObjInfo const* task, RegTree* p_tree,\n                  HostDeviceVector<bst_node_t>* p_out_position) {\n    xgboost_NVTX_FN_RANGE();\n\n    if (!param_.monotone_constraints.empty()) {\n      LOG(FATAL) << \"Monotonic constraint\" << MTNotImplemented();\n    }\n    if (!param_.interaction_constraints.empty()) {\n      LOG(FATAL) << \"Interaction constraint\" << MTNotImplemented();\n    }\n    if (collective::IsDistributed()) {\n      CHECK(!gpair->HasValueGrad()) << \"Distributed training with vector leaf\" << MTNotImplemented();\n    }\n    if (this->cuts_->HasCategorical()) {\n      LOG(FATAL) << \"Categorical features\" << MTNotImplemented();\n    }\n\n    auto* split_grad = gpair->Grad();\n    if (gpair->HasValueGrad()) {\n      this->value_gpair_ = linalg::Matrix<GradientPair>{gpair->value_gpair.Shape(), ctx_->Device()};\n      gpair->value_gpair.SetDevice(this->ctx_->Device());\n      this->value_gpair_.Data()->Copy(*gpair->value_gpair.Data());\n    }\n    CHECK_LE(split_grad->Shape(1), p_tree->NumTargets());\n\n    this->GrowTree(split_grad, p_fmat, task, p_tree, p_out_position);\n\n    if (gpair->HasValueGrad()) {\n      this->ExpandTreeLeaf(p_tree);\n    } else {\n      p_tree->GetMultiTargetTree()->SetLeaves();\n    }\n  }\n\n  void GrowTree(linalg::Matrix<GradientPair>* split_gpair, DMatrix* p_fmat, ObjInfo const*,\n                RegTree* p_tree, HostDeviceVector<bst_node_t>* p_out_position) {\n    xgboost_NVTX_FN_RANGE();\n    Driver<MultiExpandEntry> driver{param_, kMaxNodeBatchSize};\n\n    this->Reset(split_gpair, p_fmat);\n    driver.Push({this->InitRoot(p_fmat, p_tree)});\n\n    // The set of leaves that can be expanded asynchronously\n    auto expand_set = driver.Pop();\n    while (!expand_set.empty()) {\n      this->ApplySplit(expand_set, p_tree);\n      // Get the candidates we are allowed to expand further\n      // e.g. We do not bother further processing nodes whose children are beyond max depth\n      std::vector<MultiExpandEntry> valid_candidates;\n      std::copy_if(expand_set.begin(), expand_set.end(), std::back_inserter(valid_candidates),\n                   [&](auto const& e) { return driver.IsChildValid(e); });\n\n      // Allocate children nodes.\n      auto new_candidates = pinned_.GetSpan(valid_candidates.size() * 2, MultiExpandEntry{});\n\n      this->PartitionAndBuildHist(p_fmat, expand_set, valid_candidates, p_tree);\n\n      this->EvaluateSplits(valid_candidates, *p_tree, new_candidates);\n      this->ctx_->CUDACtx()->Stream().Sync();\n\n      driver.Push(new_candidates.begin(), new_candidates.end());\n\n      expand_set = driver.Pop();\n    }\n\n    if (p_fmat->SingleColBlock()) {\n      CHECK_GE(p_tree->NumNodes(), this->partitioners_.Front()->GetNumNodes());\n    }\n    this->FinalizePosition(p_fmat, p_tree, p_out_position);\n  }\n\n  explicit MultiTargetHistMaker(Context const* ctx, TrainParam param,\n                                HistMakerTrainParam const* hist_param,\n                                std::shared_ptr<common::ColumnSampler> column_sampler,\n                                std::vector<bst_idx_t> batch_ptr,\n                                std::shared_ptr<common::HistogramCuts const> cuts,\n                                bool dense_compressed)\n      : ctx_{ctx},\n        param_{std::move(param)},\n        batch_ptr_{std::move(batch_ptr)},\n        sampler_{batch_ptr_.back(), param_.subsample, param_.sampling_method},\n        hist_param_{hist_param},\n        cuts_{std::move(cuts)},\n        feature_groups_{std::make_unique<FeatureGroups>(*cuts_, dense_compressed,\n                                                        DftMtHistShmemBytes(ctx_->Ordinal()))},\n        column_sampler_{std::move(column_sampler)},\n        interaction_constraints_{\n            std::make_unique<FeatureInteractionConstraintDevice>(param_, cuts_->NumFeatures())} {}\n};\n}  // namespace xgboost::tree::cuda_impl\n"
  },
  {
    "path": "src/tree/updater_prune.cc",
    "content": "/**\n * Copyright 2014-2025, XGBoost Contributors\n * \\file updater_prune.cc\n * \\brief prune a tree given the statistics\n * \\author Tianqi Chen\n */\n#include <xgboost/tree_updater.h>\n\n#include <memory>\n\n#include \"../common/timer.h\"\n#include \"./param.h\"\n#include \"xgboost/base.h\"\n#include \"xgboost/gradient.h\"  // for GradientContainer\n#include \"xgboost/json.h\"\n\nnamespace xgboost::tree {\nDMLC_REGISTRY_FILE_TAG(updater_prune);\n\n/*! \\brief pruner that prunes a tree after growing finishes */\nclass TreePruner : public TreeUpdater {\n public:\n  explicit TreePruner(Context const* ctx, ObjInfo const* task) : TreeUpdater(ctx) {\n    syncher_.reset(TreeUpdater::Create(\"sync\", ctx_, task));\n    pruner_monitor_.Init(\"TreePruner\");\n  }\n  [[nodiscard]] char const* Name() const override { return \"prune\"; }\n  // set training parameter\n  void Configure(const Args& args) override { syncher_->Configure(args); }\n\n  void LoadConfig(Json const&) override {}\n  void SaveConfig(Json*) const override {}\n  [[nodiscard]] bool CanModifyTree() const override { return true; }\n\n  // update the tree, do pruning\n  void Update(TrainParam const* param, GradientContainer* in_gpair, DMatrix* p_fmat,\n              common::Span<HostDeviceVector<bst_node_t>> out_position,\n              const std::vector<RegTree*>& trees) override {\n    pruner_monitor_.Start(\"PrunerUpdate\");\n    for (auto tree : trees) {\n      this->DoPrune(param, tree);\n    }\n    syncher_->Update(param, in_gpair, p_fmat, out_position, trees);\n    pruner_monitor_.Stop(\"PrunerUpdate\");\n  }\n\n private:\n  // try to prune off current leaf\n  bst_node_t TryPruneLeaf(TrainParam const* param, RegTree* p_tree, int nid, int depth,\n                          int npruned) {\n    auto& tree = *p_tree;\n    CHECK(tree[nid].IsLeaf());\n    if (tree[nid].IsRoot()) {\n      return npruned;\n    }\n    bst_node_t pid = tree[nid].Parent();\n    CHECK(!tree[pid].IsLeaf());\n    RTreeNodeStat const &s = tree.Stat(pid);\n    // Only prune when both child are leaf.\n    auto left = tree[pid].LeftChild();\n    auto right = tree[pid].RightChild();\n    bool balanced = tree[left].IsLeaf() &&\n                    right != RegTree::kInvalidNodeId && tree[right].IsLeaf();\n    if (balanced && param->NeedPrune(s.loss_chg, depth)) {\n      // need to be pruned\n      tree.ChangeToLeaf(pid, param->learning_rate * s.base_weight);\n      // tail recursion\n      return this->TryPruneLeaf(param, p_tree, pid, depth - 1, npruned + 2);\n    } else {\n      return npruned;\n    }\n  }\n  /*! \\brief do pruning of a tree */\n  void DoPrune(TrainParam const* param, RegTree* p_tree) {\n    auto& tree = *p_tree;\n    bst_node_t npruned = 0;\n    for (int nid = 0; nid < tree.NumNodes(); ++nid) {\n      if (tree[nid].IsLeaf() && !tree[nid].IsDeleted()) {\n        npruned = this->TryPruneLeaf(param, p_tree, nid, tree.GetDepth(nid), npruned);\n      }\n    }\n    LOG(INFO) << \"tree pruning end, \"\n              << tree.NumExtraNodes() << \" extra nodes, \" << npruned\n              << \" pruned nodes, max_depth=\" << tree.MaxDepth();\n  }\n\n private:\n  // synchronizer\n  std::unique_ptr<TreeUpdater> syncher_;\n  common::Monitor pruner_monitor_;\n};\n\nXGBOOST_REGISTER_TREE_UPDATER(TreePruner, \"prune\")\n    .describe(\"Pruner that prune the tree according to statistics.\")\n    .set_body([](Context const* ctx, ObjInfo const* task) {\n      return new TreePruner{ctx, task};\n    });\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "src/tree/updater_quantile_hist.cc",
    "content": "/**\n * Copyright 2017-2026, XGBoost Contributors\n * \\file updater_quantile_hist.cc\n * \\brief use quantized feature values to construct a tree\n * \\author Philip Cho, Tianqi Checn, Egor Smirnov\n */\n#include <algorithm>  // for max, copy, transform\n#include <cstddef>    // for size_t\n#include <cstdint>    // for uint32_t, int32_t\n#include <memory>     // for allocator, unique_ptr, make_unique, shared_ptr\n#include <ostream>    // for operator<<, basic_ostream, char_traits\n#include <utility>    // for move\n#include <vector>     // for vector\n\n#include \"../collective/aggregator.h\"        // for GlobalSum\n#include \"../collective/communicator-inl.h\"  // for IsDistributed\n#include \"../common/hist_util.h\"             // for HistogramCuts, GHistRow\n#include \"../common/linalg_op.h\"             // for begin, cbegin, cend\n#include \"../common/random.h\"                // for ColumnSampler\n#include \"../common/threading_utils.h\"       // for ParallelFor\n#include \"../common/timer.h\"                 // for Monitor\n#include \"../data/gradient_index.h\"          // for GHistIndexMatrix\n#include \"common_row_partitioner.h\"          // for CommonRowPartitioner\n#include \"dmlc/registry.h\"                   // for DMLC_REGISTRY_FILE_TAG\n#include \"driver.h\"                          // for Driver\n#include \"fit_stump.h\"                       // for SumGradients\n#include \"hist/evaluate_splits.h\"            // for HistEvaluator, HistMultiEvaluator, UpdatePre...\n#include \"hist/expand_entry.h\"               // for MultiExpandEntry, CPUExpandEntry\n#include \"hist/hist_cache.h\"                 // for BoundedHistCollection\n#include \"hist/hist_param.h\"                 // for HistMakerTrainParam\n#include \"hist/histogram.h\"                  // for MultiHistogramBuilder\n#include \"hist/sampler.h\"                    // for Sampler\n#include \"param.h\"                           // for TrainParam, GradStats\n#include \"xgboost/base.h\"                    // for Args, GradientPairPrecise, GradientPair, Gra...\n#include \"xgboost/context.h\"                 // for Context\n#include \"xgboost/data.h\"                    // for BatchSet, DMatrix, BatchIterator, MetaInfo\n#include \"xgboost/gradient.h\"                // for GradientContainer\n#include \"xgboost/host_device_vector.h\"      // for HostDeviceVector\n#include \"xgboost/json.h\"                    // for Object, Json, FromJson, ToJson, get\n#include \"xgboost/linalg.h\"                  // for MatrixView, TensorView, All, Matrix, Empty\n#include \"xgboost/logging.h\"                 // for LogCheck_EQ, CHECK_EQ, CHECK, LogCheck_GE\n#include \"xgboost/span.h\"                    // for Span, operator!=, SpanIterator\n#include \"xgboost/string_view.h\"             // for operator<<\n#include \"xgboost/task.h\"                    // for ObjInfo\n#include \"xgboost/tree_model.h\"              // for RegTree, MTNotImplemented, RTreeNodeStat\n#include \"xgboost/tree_updater.h\"            // for TreeUpdater, TreeUpdaterReg, XGBOOST_REGISTE...\n\nnamespace xgboost::tree {\n\nDMLC_REGISTRY_FILE_TAG(updater_quantile_hist);\n\nBatchParam HistBatch(TrainParam const *param) { return {param->max_bin, param->sparse_threshold}; }\n\n/**\n * @brief Sum a 3D tensor along the first axis, producing a 2D matrix.\n *\n * Used to reduce thread-local accumulators: [n_threads, rows, cols] -> [rows, cols]\n */\ntemplate <typename T>\nlinalg::Matrix<T> ReduceToRows(Context const *ctx, linalg::TensorView<T, 3> const &tloc) {\n  auto out = linalg::Constant<T>(ctx, T{}, tloc.Shape(1), tloc.Shape(2));\n  auto h_out = out.HostView();\n  for (std::size_t i = 0; i < tloc.Shape(0); ++i) {\n    for (std::size_t j = 0; j < tloc.Shape(1); ++j) {\n      for (std::size_t k = 0; k < tloc.Shape(2); ++k) {\n        h_out(j, k) += tloc(i, j, k);\n      }\n    }\n  }\n  return out;\n}\n\n/**\n * @brief Verify all partitioners have the same set of leaf nodes.\n *\n * For external memory, each batch has its own partitioner, but the tree structure\n * is shared. This check ensures consistency across all partitions.\n */\nvoid CheckPartitionerLeaves(std::vector<CommonRowPartitioner> const &partitioners,\n                            MultiTargetTreeView const &tree,\n                            std::vector<bst_node_t> const &leaves_idx) {\n  for (auto const &part : partitioners) {\n    std::vector<bst_node_t> part_leaves;\n    for (auto const &node : part.Partitions()) {\n      if (node.node_id >= 0 && tree.IsLeaf(node.node_id)) {\n        part_leaves.push_back(node.node_id);\n      }\n    }\n    CHECK_EQ(part_leaves.size(), leaves_idx.size());\n    CHECK(std::equal(part_leaves.begin(), part_leaves.end(), leaves_idx.begin()));\n  }\n}\n\ntemplate <typename ExpandEntry, typename Updater>\nvoid UpdateTree(common::Monitor *monitor, linalg::MatrixView<GradientPair const> gpair,\n                Updater *updater, DMatrix *p_fmat, TrainParam const *param,\n                HostDeviceVector<bst_node_t> *p_out_position, RegTree *p_tree) {\n  monitor->Start(__func__);\n  updater->InitData(p_fmat, p_tree, gpair);\n\n  Driver<ExpandEntry> driver{*param};\n  auto const &tree = *p_tree;\n  driver.Push(updater->InitRoot(p_fmat, gpair, p_tree));\n  auto expand_set = driver.Pop();\n\n  /**\n   * Note for update position\n   * Root:\n   *   Not applied: No need to update position as initialization has got all the rows ordered.\n   *   Applied: Update position is run on applied nodes so the rows are partitioned.\n   * Non-root:\n   *   Not applied: That node is root of the subtree, same rule as root.\n   *   Applied: Ditto\n   */\n  while (!expand_set.empty()) {\n    // candidates that can be further splited.\n    std::vector<ExpandEntry> valid_candidates;\n    // candidaates that can be applied.\n    std::vector<ExpandEntry> applied;\n    for (auto const &candidate : expand_set) {\n      updater->ApplyTreeSplit(candidate, p_tree);\n      CHECK_GT(p_tree->LeftChild(candidate.nid), candidate.nid);\n      applied.push_back(candidate);\n      if (driver.IsChildValid(candidate)) {\n        valid_candidates.emplace_back(candidate);\n      }\n    }\n\n    updater->UpdatePosition(p_fmat, p_tree, applied);\n\n    std::vector<ExpandEntry> best_splits;\n    if (!valid_candidates.empty()) {\n      updater->BuildHistogram(p_fmat, p_tree, valid_candidates, gpair);\n      for (auto const &candidate : valid_candidates) {\n        auto left_child_nidx = tree.LeftChild(candidate.nid);\n        auto right_child_nidx = tree.RightChild(candidate.nid);\n        ExpandEntry l_best{left_child_nidx, tree.GetDepth(left_child_nidx)};\n        ExpandEntry r_best{right_child_nidx, tree.GetDepth(right_child_nidx)};\n        best_splits.push_back(l_best);\n        best_splits.push_back(r_best);\n      }\n      updater->EvaluateSplits(p_fmat, p_tree, &best_splits);\n    }\n    driver.Push(best_splits.begin(), best_splits.end());\n    expand_set = driver.Pop();\n  }\n\n  auto &h_out_position = p_out_position->HostVector();\n  updater->LeafPartition(tree, gpair, &h_out_position);\n  monitor->Stop(__func__);\n}\n\n/**\n * \\brief Updater for building multi-target trees. The implementation simply iterates over\n *        each target.\n */\nclass MultiTargetHistBuilder {\n private:\n  common::Monitor *monitor_{nullptr};\n  TrainParam const *param_{nullptr};\n  HistMakerTrainParam const *hist_param_{nullptr};\n  std::shared_ptr<common::ColumnSampler> col_sampler_;\n  std::unique_ptr<HistMultiEvaluator> evaluator_;\n  // Histogram builder for each target.\n  std::unique_ptr<MultiHistogramBuilder> histogram_builder_;\n  Context const *ctx_{nullptr};\n  // Partitioner for each data batch.\n  std::vector<CommonRowPartitioner> partitioner_;\n  // Pointer to last updated tree, used for update prediction cache.\n  RegTree const *p_last_tree_{nullptr};\n  DMatrix const *p_last_fmat_{nullptr};\n\n public:\n  void UpdatePosition(DMatrix *p_fmat, RegTree const *p_tree,\n                      std::vector<MultiExpandEntry> const &applied) {\n    monitor_->Start(__func__);\n    std::size_t page_id{0};\n    for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(this->param_))) {\n      this->partitioner_.at(page_id).UpdatePosition(this->ctx_, page, applied,\n                                                    p_tree->HostMtView());\n      page_id++;\n    }\n    monitor_->Stop(__func__);\n  }\n\n  void ApplyTreeSplit(MultiExpandEntry const &candidate, RegTree *p_tree) {\n    this->evaluator_->ApplyTreeSplit(candidate, p_tree);\n  }\n\n  void InitData(DMatrix *p_fmat, RegTree const *p_tree,\n                linalg::MatrixView<GradientPair const> gpair) {\n    monitor_->Start(__func__);\n\n    p_last_fmat_ = p_fmat;\n    bst_bin_t n_total_bins = 0;\n    size_t page_idx = 0;\n    for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {\n      if (n_total_bins == 0) {\n        n_total_bins = page.cut.TotalBins();\n      } else {\n        CHECK_EQ(n_total_bins, page.cut.TotalBins());\n      }\n      if (page_idx < partitioner_.size()) {\n        partitioner_[page_idx].Reset(ctx_, page.Size(), page.base_rowid,\n                                     p_fmat->Info().IsColumnSplit());\n      } else {\n        partitioner_.emplace_back(ctx_, page.Size(), page.base_rowid,\n                                  p_fmat->Info().IsColumnSplit());\n      }\n      page_idx++;\n    }\n    partitioner_.resize(page_idx);\n\n    bst_target_t n_targets = gpair.Shape(1);\n    histogram_builder_ = std::make_unique<MultiHistogramBuilder>();\n    histogram_builder_->Reset(ctx_, n_total_bins, n_targets, HistBatch(param_),\n                              collective::IsDistributed(), p_fmat->Info().IsColumnSplit(),\n                              hist_param_);\n\n    evaluator_ = std::make_unique<HistMultiEvaluator>(ctx_, p_fmat->Info(), param_, col_sampler_);\n    p_last_tree_ = p_tree;\n    monitor_->Stop(__func__);\n  }\n\n  MultiExpandEntry InitRoot(DMatrix *p_fmat, linalg::MatrixView<GradientPair const> gpair,\n                            RegTree *p_tree) {\n    monitor_->Start(__func__);\n    MultiExpandEntry best;\n    best.nid = RegTree::kRoot;\n    best.depth = 0;\n\n    auto n_targets = gpair.Shape(1);\n    auto root_sum = linalg::Empty<GradientPairPrecise>(ctx_, n_targets);\n    cpu_impl::SumGradients(ctx_, gpair, root_sum.HostView());\n    auto h_root_sum = root_sum.HostView();\n    CHECK(h_root_sum.CContiguous());\n    auto rc = collective::GlobalSum(\n        ctx_, p_fmat->Info(),\n        linalg::MakeVec(reinterpret_cast<double *>(h_root_sum.Values().data()),\n                        h_root_sum.Size() * 2));\n    collective::SafeColl(rc);\n\n    histogram_builder_->BuildRootHist(p_fmat, p_tree->HostMtView(), partitioner_, gpair, best,\n                                      HistBatch(param_));\n\n    auto weight = evaluator_->InitRoot(h_root_sum);\n    auto weight_t = weight.HostView();\n    std::transform(linalg::cbegin(weight_t), linalg::cend(weight_t), linalg::begin(weight_t),\n                   [&](float w) { return w * param_->learning_rate; });\n\n    // Compute root sum_hess by summing hessians across all targets\n    float root_sum_hess = 0.0f;\n    for (bst_target_t t{0}; t < n_targets; ++t) {\n      root_sum_hess += static_cast<float>(h_root_sum(t).GetHess());\n    }\n    p_tree->SetRoot(weight_t, root_sum_hess);\n    std::vector<BoundedHistCollection const *> hists;\n    std::vector<MultiExpandEntry> nodes{{RegTree::kRoot, 0}};\n\n    for (bst_target_t t{0}; t < n_targets; ++t) {\n      hists.push_back(&(*histogram_builder_).Histogram(t));\n    }\n    auto ft = p_fmat->Info().feature_types.ConstHostSpan();\n    for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {\n      evaluator_->EvaluateSplits(*p_tree, hists, gmat.cut, ft, &nodes);\n      break;\n    }\n    monitor_->Stop(__func__);\n\n    return nodes.front();\n  }\n\n  void BuildHistogram(DMatrix *p_fmat, RegTree const *p_tree,\n                      std::vector<MultiExpandEntry> const &valid_candidates,\n                      linalg::MatrixView<GradientPair const> gpair) {\n    monitor_->Start(__func__);\n    histogram_builder_->BuildHistLeftRight(ctx_, p_fmat, p_tree->HostMtView(), partitioner_,\n                                           valid_candidates, gpair, HistBatch(param_));\n    monitor_->Stop(__func__);\n  }\n\n  void EvaluateSplits(DMatrix *p_fmat, RegTree const *p_tree,\n                      std::vector<MultiExpandEntry> *best_splits) {\n    monitor_->Start(__func__);\n    std::vector<BoundedHistCollection const *> hists;\n    // Use histogram builder's number of targets (may differ from tree for reduced gradient)\n    auto n_targets = histogram_builder_->NumTargets();\n    for (bst_target_t t{0}; t < n_targets; ++t) {\n      hists.push_back(&(*histogram_builder_).Histogram(t));\n    }\n    auto ft = p_fmat->Info().feature_types.ConstHostSpan();\n    for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {\n      evaluator_->EvaluateSplits(*p_tree, hists, gmat.cut, ft, best_splits);\n      break;\n    }\n    monitor_->Stop(__func__);\n  }\n\n  void LeafPartition(RegTree const &tree, linalg::MatrixView<GradientPair const> gpair,\n                     std::vector<bst_node_t> *p_out_position) {\n    monitor_->Start(__func__);\n    p_out_position->resize(gpair.Shape(0));\n    for (auto const &part : partitioner_) {\n      part.LeafPartition(ctx_, tree.HostMtView(), gpair,\n                         common::Span{p_out_position->data(), p_out_position->size()});\n    }\n    monitor_->Stop(__func__);\n  }\n\n  /**\n   * @brief Calculate leaf weights using value gradient.\n   *\n   * This method supports reduced gradient. Weights in p_tree are calculated using split\n   * gradient during tree building. This function replaces those weights with new weights\n   * calculated from value gradient.\n   */\n  void ExpandTreeLeaf(linalg::Matrix<GradientPair> const &full_grad, RegTree *p_tree) {\n    auto tree = p_tree->HostMtView();\n    auto n_targets = p_tree->NumTargets();\n    auto value_gpair = full_grad.HostView();\n\n    // Collect all leaf nodes from the first partitioner\n    std::vector<bst_node_t> leaves_idx;\n    CHECK(!partitioner_.empty());\n    for (auto const &node : partitioner_.front().Partitions()) {\n      if (node.node_id >= 0 && tree.IsLeaf(node.node_id)) {\n        leaves_idx.push_back(node.node_id);\n      }\n    }\n\n    auto n_leaves = leaves_idx.size();\n    CHECK_EQ(p_tree->GetNumLeaves(), n_leaves);\n    CHECK_GT(n_leaves, 0);\n\n    // Sanity check: all partitioners should have the same set of leaves\n    if (hist_param_->debug_synchronize) {\n      CheckPartitionerLeaves(this->partitioner_, tree, leaves_idx);\n    }\n\n    // Calculate gradient sum for each leaf using thread-local storage\n    // Shape: [n_threads, n_leaves, n_targets]\n    auto n_threads = ctx_->Threads();\n    auto leaf_sums_tloc =\n        linalg::Constant(ctx_, GradientPairPrecise{}, n_threads, n_leaves, n_targets);\n    auto h_leaf_sums_tloc = leaf_sums_tloc.HostView();\n\n    for (auto const &part : partitioner_) {\n      common::BlockedSpace2d space(\n          n_leaves, [&](std::size_t leaf_idx) { return part[leaves_idx[leaf_idx]].Size(); }, 1024);\n      // when Size() is 0 (node has no rows in a partition), no blocks are created for\n      // that leaf and the lambda is never called.\n      common::ParallelFor2d(space, n_threads, [&](std::size_t leaf_idx, common::Range1d r) {\n        auto const &node = part[leaves_idx[leaf_idx]];\n        auto tidx = omp_get_thread_num();\n        // Sum gradients for rows in this leaf (row indices are global)\n        for (auto it = node.begin() + r.begin(); it != node.begin() + r.end(); ++it) {\n          for (bst_target_t t = 0; t < n_targets; ++t) {\n            h_leaf_sums_tloc(tidx, leaf_idx, t) += GradientPairPrecise{value_gpair(*it, t)};\n          }\n        }\n      });\n    }\n\n    // Reduce thread-local sums: [n_threads, n_leaves, n_targets] -> [n_leaves, n_targets]\n    auto leaf_sums = ReduceToRows(ctx_, h_leaf_sums_tloc);\n    auto h_leaf_sums = leaf_sums.HostView();\n\n    // Calculate weights for each leaf\n    linalg::Matrix<float> weights = linalg::Empty<float>(ctx_, n_leaves, n_targets);\n    auto h_weights = weights.HostView();\n    auto eta = this->param_->learning_rate;\n\n    common::ParallelFor(n_leaves, n_threads, [&](auto leaf_idx) {\n      auto grad_sum = h_leaf_sums.Slice(leaf_idx, linalg::All());\n      auto weight = h_weights.Slice(leaf_idx, linalg::All());\n      CalcWeight(*param_, grad_sum, eta, weight);\n    });\n\n    // Set leaf weights\n    p_tree->SetLeaves(leaves_idx, h_weights.Values());\n  }\n\n public:\n  explicit MultiTargetHistBuilder(Context const *ctx, TrainParam const *param,\n                                  HistMakerTrainParam const *hist_param,\n                                  std::shared_ptr<common::ColumnSampler> column_sampler,\n                                  common::Monitor *monitor)\n      : monitor_{monitor},\n        param_{param},\n        hist_param_{hist_param},\n        col_sampler_{std::move(column_sampler)},\n        ctx_{ctx} {\n    monitor_->Init(__func__);\n  }\n\n  bool UpdatePredictionCache(DMatrix const *p_fmat, common::Span<bst_node_t const> node_position,\n                             linalg::MatrixView<float> out_preds) const {\n    // p_last_fmat_ is a valid pointer as long as UpdatePredictionCache() is called in\n    // conjunction with Update().\n    if (!p_last_fmat_ || !p_last_tree_ || p_fmat != p_last_fmat_) {\n      return false;\n    }\n    monitor_->Start(__func__);\n    CHECK_EQ(out_preds.Size(), p_fmat->Info().num_row_ * p_last_tree_->NumTargets());\n    CHECK_EQ(node_position.size(), p_fmat->Info().num_row_);\n    UpdatePredictionCacheImpl(ctx_, p_last_tree_, node_position, out_preds);\n    monitor_->Stop(__func__);\n    return true;\n  }\n};\n\n/**\n * @brief Tree updater for single-target trees.\n */\nclass HistUpdater {\n private:\n  common::Monitor *monitor_;\n  TrainParam const *param_;\n  HistMakerTrainParam const *hist_param_{nullptr};\n  std::shared_ptr<common::ColumnSampler> col_sampler_;\n  std::unique_ptr<HistEvaluator> evaluator_;\n  std::vector<CommonRowPartitioner> partitioner_;\n\n  // back pointers to tree and data matrix\n  const RegTree *p_last_tree_{nullptr};\n  DMatrix const *const p_last_fmat_{nullptr};\n\n  std::unique_ptr<MultiHistogramBuilder> histogram_builder_;\n  // Context for number of threads\n  Context const *ctx_{nullptr};\n\n public:\n  explicit HistUpdater(Context const *ctx, std::shared_ptr<common::ColumnSampler> column_sampler,\n                       TrainParam const *param, HistMakerTrainParam const *hist_param,\n                       DMatrix const *fmat, common::Monitor *monitor)\n      : monitor_{monitor},\n        param_{param},\n        hist_param_{hist_param},\n        col_sampler_{std::move(column_sampler)},\n        p_last_fmat_(fmat),\n        histogram_builder_{new MultiHistogramBuilder},\n        ctx_{ctx} {\n    monitor_->Init(__func__);\n  }\n\n  bool UpdatePredictionCache(DMatrix const *data, common::Span<bst_node_t const> node_position,\n                             linalg::MatrixView<float> out_preds) const {\n    // p_last_fmat_ is a valid pointer as long as UpdatePredictionCache() is called in\n    // conjunction with Update().\n    if (!p_last_fmat_ || !p_last_tree_ || data != p_last_fmat_) {\n      return false;\n    }\n    monitor_->Start(__func__);\n    CHECK_EQ(out_preds.Size(), data->Info().num_row_);\n    CHECK_EQ(node_position.size(), data->Info().num_row_);\n    UpdatePredictionCacheImpl(ctx_, p_last_tree_, node_position, out_preds);\n    monitor_->Stop(__func__);\n    return true;\n  }\n\n public:\n  // initialize temp data structure\n  void InitData(DMatrix *fmat, RegTree const *p_tree, linalg::MatrixView<GradientPair const>) {\n    monitor_->Start(__func__);\n    bst_bin_t n_total_bins{0};\n    size_t page_idx = 0;\n    for (auto const &page : fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {\n      if (n_total_bins == 0) {\n        n_total_bins = page.cut.TotalBins();\n      } else {\n        CHECK_EQ(n_total_bins, page.cut.TotalBins());\n      }\n      if (page_idx < partitioner_.size()) {\n        partitioner_[page_idx].Reset(this->ctx_, page.Size(), page.base_rowid,\n                                     fmat->Info().IsColumnSplit());\n      } else {\n        partitioner_.emplace_back(this->ctx_, page.Size(), page.base_rowid,\n                                  fmat->Info().IsColumnSplit());\n      }\n      page_idx++;\n    }\n    partitioner_.resize(page_idx);\n    histogram_builder_->Reset(ctx_, n_total_bins, 1, HistBatch(param_), collective::IsDistributed(),\n                              fmat->Info().IsColumnSplit(), hist_param_);\n    evaluator_ = std::make_unique<HistEvaluator>(ctx_, this->param_, fmat->Info(), col_sampler_);\n    p_last_tree_ = p_tree;\n    monitor_->Stop(__func__);\n  }\n\n  void EvaluateSplits(DMatrix *p_fmat, RegTree const *p_tree,\n                      std::vector<CPUExpandEntry> *best_splits) {\n    monitor_->Start(__func__);\n    auto const &histograms = histogram_builder_->Histogram(0);\n    auto ft = p_fmat->Info().feature_types.ConstHostSpan();\n    for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {\n      evaluator_->EvaluateSplits(histograms, gmat.cut, ft, *p_tree, best_splits);\n      break;\n    }\n    monitor_->Stop(__func__);\n  }\n\n  void ApplyTreeSplit(CPUExpandEntry const &candidate, RegTree *p_tree) {\n    this->evaluator_->ApplyTreeSplit(candidate, p_tree);\n  }\n\n  CPUExpandEntry InitRoot(DMatrix *p_fmat, linalg::MatrixView<GradientPair const> gpair,\n                          RegTree *p_tree) {\n    monitor_->Start(__func__);\n    CPUExpandEntry node(RegTree::kRoot, p_tree->GetDepth(0));\n\n    this->histogram_builder_->BuildRootHist(p_fmat, p_tree->HostScView(), partitioner_, gpair, node,\n                                            HistBatch(param_));\n\n    {\n      GradientPairPrecise grad_stat;\n      if (p_fmat->IsDense() && !collective::IsDistributed()) {\n        /**\n         * Specialized code for dense data: For dense data (with no missing value), the sum\n         * of gradient histogram is equal to snode[nid]\n         */\n        auto const &gmat = *(p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_)).begin());\n        std::vector<std::uint32_t> const &row_ptr = gmat.cut.Ptrs();\n        CHECK_GE(row_ptr.size(), 2);\n        std::uint32_t const ibegin = row_ptr[0];\n        std::uint32_t const iend = row_ptr[1];\n        auto hist = this->histogram_builder_->Histogram(0)[RegTree::kRoot];\n        auto begin = hist.data();\n        for (std::uint32_t i = ibegin; i < iend; ++i) {\n          GradientPairPrecise const &et = begin[i];\n          grad_stat.Add(et.GetGrad(), et.GetHess());\n        }\n      } else {\n        auto gpair_h = gpair.Slice(linalg::All(), 0).Values();\n        for (auto const &grad : gpair_h) {\n          grad_stat.Add(grad.GetGrad(), grad.GetHess());\n        }\n        auto rc = collective::GlobalSum(ctx_, p_fmat->Info(),\n                                        linalg::MakeVec(reinterpret_cast<double *>(&grad_stat), 2));\n        collective::SafeColl(rc);\n      }\n\n      auto weight = evaluator_->InitRoot(GradStats{grad_stat});\n      p_tree->Stat(RegTree::kRoot).sum_hess = grad_stat.GetHess();\n      p_tree->Stat(RegTree::kRoot).base_weight = weight;\n      (*p_tree)[RegTree::kRoot].SetLeaf(param_->learning_rate * weight);\n\n      std::vector<CPUExpandEntry> entries{node};\n      monitor_->Start(\"EvaluateSplits\");\n      auto ft = p_fmat->Info().feature_types.ConstHostSpan();\n      for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {\n        evaluator_->EvaluateSplits(histogram_builder_->Histogram(0), gmat.cut, ft, *p_tree,\n                                   &entries);\n        break;\n      }\n      monitor_->Stop(\"EvaluateSplits\");\n      node = entries.front();\n    }\n\n    monitor_->Stop(__func__);\n    return node;\n  }\n\n  void BuildHistogram(DMatrix *p_fmat, RegTree *p_tree,\n                      std::vector<CPUExpandEntry> const &valid_candidates,\n                      linalg::MatrixView<GradientPair const> gpair) {\n    monitor_->Start(__func__);\n    this->histogram_builder_->BuildHistLeftRight(ctx_, p_fmat, p_tree->HostScView(), partitioner_,\n                                                 valid_candidates, gpair, HistBatch(param_));\n    monitor_->Stop(__func__);\n  }\n\n  void UpdatePosition(DMatrix *p_fmat, RegTree const *p_tree,\n                      std::vector<CPUExpandEntry> const &applied) {\n    monitor_->Start(__func__);\n    std::size_t page_id{0};\n    for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {\n      this->partitioner_.at(page_id).UpdatePosition(this->ctx_, page, applied,\n                                                    p_tree->HostScView());\n      page_id++;\n    }\n    monitor_->Stop(__func__);\n  }\n\n  void LeafPartition(RegTree const &tree, linalg::MatrixView<GradientPair const> gpair,\n                     std::vector<bst_node_t> *p_out_position) {\n    monitor_->Start(__func__);\n    p_out_position->resize(gpair.Shape(0));\n    for (auto const &part : partitioner_) {\n      part.LeafPartition(ctx_, tree.HostScView(), gpair,\n                         common::Span{p_out_position->data(), p_out_position->size()});\n    }\n    monitor_->Stop(__func__);\n  }\n};\n\n/*! \\brief construct a tree using quantized feature values */\nclass QuantileHistMaker : public TreeUpdater {\n  std::unique_ptr<HistUpdater> p_impl_{nullptr};\n  std::unique_ptr<MultiTargetHistBuilder> p_mtimpl_{nullptr};\n  std::shared_ptr<common::ColumnSampler> column_sampler_;\n\n  common::Monitor monitor_;\n  HistMakerTrainParam hist_param_;\n\n public:\n  explicit QuantileHistMaker(Context const *ctx, ObjInfo const *)\n      : TreeUpdater{ctx}, column_sampler_{std::make_shared<common::ColumnSampler>()} {}\n\n  void Configure(Args const &args) override { hist_param_.UpdateAllowUnknown(args); }\n  void LoadConfig(Json const &in) override {\n    auto const &config = get<Object const>(in);\n    FromJson(config.at(\"hist_train_param\"), &hist_param_);\n  }\n  void SaveConfig(Json *p_out) const override {\n    auto &out = *p_out;\n    out[\"hist_train_param\"] = ToJson(hist_param_);\n  }\n\n  [[nodiscard]] char const *Name() const override { return \"grow_quantile_histmaker\"; }\n\n  void Update(TrainParam const *param, GradientContainer *in_gpair, DMatrix *p_fmat,\n              common::Span<HostDeviceVector<bst_node_t>> out_position,\n              const std::vector<RegTree *> &trees) override {\n    if (trees.front()->IsMultiTarget()) {\n      CHECK(hist_param_.GetInitialised());\n      if (!param->monotone_constraints.empty()) {\n        LOG(FATAL) << \"Monotonic constraint\" << MTNotImplemented();\n      }\n      if (!param->interaction_constraints.empty()) {\n        LOG(FATAL) << \"Interaction constraint\" << MTNotImplemented();\n      }\n      if (!p_mtimpl_) {\n        this->p_mtimpl_ = std::make_unique<MultiTargetHistBuilder>(ctx_, param, &hist_param_,\n                                                                   column_sampler_, &monitor_);\n      }\n    } else {\n      CHECK(hist_param_.GetInitialised());\n      if (!p_impl_) {\n        p_impl_ = std::make_unique<HistUpdater>(ctx_, column_sampler_, param, &hist_param_, p_fmat,\n                                                &monitor_);\n      }\n    }\n\n    bst_target_t n_targets = trees.front()->NumTargets();\n    // Use split gradient for tree building\n    auto h_gpair = in_gpair->Grad()->HostView();\n\n    linalg::Matrix<GradientPair> sample_out;\n    auto h_sample_out = h_gpair;\n    auto need_copy = [&] {\n      return trees.size() > 1 || n_targets > 1;\n    };\n    if (need_copy()) {\n      // allocate buffer\n      sample_out = decltype(sample_out){h_gpair.Shape(), ctx_->Device(), linalg::Order::kF};\n      h_sample_out = sample_out.HostView();\n    }\n\n    cpu_impl::Sampler sampler{*param};\n    for (auto tree_it = trees.begin(); tree_it != trees.end(); ++tree_it) {\n      if (need_copy()) {\n        // Copy gradient into buffer for sampling. This converts C-order to F-order.\n        std::copy(linalg::cbegin(h_gpair), linalg::cend(h_gpair), linalg::begin(h_sample_out));\n      }\n      sampler.Sample(ctx_, h_sample_out);\n      auto *h_out_position = &out_position[tree_it - trees.begin()];\n      if ((*tree_it)->IsMultiTarget()) {\n        UpdateTree<MultiExpandEntry>(&monitor_, h_sample_out, p_mtimpl_.get(), p_fmat, param,\n                                     h_out_position, *tree_it);\n        if (in_gpair->HasValueGrad()) {\n          // Copy the value gradient and apply sampling mask from split gradient\n          auto value_grad = linalg::Empty<GradientPair>(ctx_, in_gpair->value_gpair.Shape(0),\n                                                        in_gpair->value_gpair.Shape(1));\n          auto h_value_grad = value_grad.HostView();\n          auto h_value_grad_in = in_gpair->value_gpair.HostView();\n          std::copy(linalg::cbegin(h_value_grad_in), linalg::cend(h_value_grad_in),\n                    linalg::begin(h_value_grad));\n          sampler.ApplySampling(ctx_, h_sample_out, &value_grad);\n          // Refresh the leaf weights.\n          p_mtimpl_->ExpandTreeLeaf(value_grad, *tree_it);\n        } else {\n          (*tree_it)->GetMultiTargetTree()->SetLeaves();\n        }\n      } else {\n        UpdateTree<CPUExpandEntry>(&monitor_, h_sample_out, p_impl_.get(), p_fmat, param,\n                                   h_out_position, *tree_it);\n      }\n\n      hist_param_.CheckTreesSynchronized(ctx_, *tree_it);\n    }\n  }\n\n  bool UpdatePredictionCache(DMatrix const *p_fmat,\n                             common::Span<HostDeviceVector<bst_node_t>> node_position,\n                             linalg::MatrixView<float> out_preds) override {\n    if (node_position.size() > 1) {\n      return false;\n    }\n    auto position = node_position.front().ConstHostSpan();\n    if (out_preds.Shape(1) > 1) {\n      CHECK(p_mtimpl_);\n      return p_mtimpl_->UpdatePredictionCache(p_fmat, position, out_preds);\n    } else {\n      CHECK(p_impl_);\n      return p_impl_->UpdatePredictionCache(p_fmat, position, out_preds);\n    }\n  }\n\n  [[nodiscard]] bool HasNodePosition() const override { return true; }\n};\n\nXGBOOST_REGISTER_TREE_UPDATER(QuantileHistMaker, \"grow_quantile_histmaker\")\n    .describe(\"Grow tree using quantized histogram.\")\n    .set_body([](Context const *ctx, ObjInfo const *task) {\n      return new QuantileHistMaker{ctx, task};\n    });\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "src/tree/updater_refresh.cc",
    "content": "/**\n * Copyright 2014-2026, XGBoost Contributors\n * \\file updater_refresh.cc\n * \\brief refresh the statistics and leaf value on the tree on the dataset\n * \\author Tianqi Chen\n */\n#include <limits>\n#include <vector>\n\n#include \"../collective/allreduce.h\"\n#include \"../common/threading_utils.h\"\n#include \"../predictor/predict_fn.h\"\n#include \"../tree/tree_view.h\"  // for ScalarTreeView\n#include \"./param.h\"\n#include \"xgboost/gradient.h\"  // for GradientContainer\n#include \"xgboost/json.h\"\n#include \"xgboost/tree_updater.h\"\n\nnamespace xgboost::tree {\n\nDMLC_REGISTRY_FILE_TAG(updater_refresh);\n\n/*! \\brief pruner that prunes a tree after growing finishes */\nclass TreeRefresher : public TreeUpdater {\n public:\n  explicit TreeRefresher(Context const *ctx) : TreeUpdater(ctx) {}\n  void Configure(const Args &) override {}\n  void LoadConfig(Json const &) override {}\n  void SaveConfig(Json *) const override {}\n\n  [[nodiscard]] char const *Name() const override { return \"refresh\"; }\n  [[nodiscard]] bool CanModifyTree() const override { return true; }\n  // Update the tree, do pruning\n  void Update(TrainParam const *param, GradientContainer *in_gpair, DMatrix *p_fmat,\n              common::Span<HostDeviceVector<bst_node_t>> /*out_position*/,\n              const std::vector<RegTree *> &trees) override {\n    if (trees.size() == 0) {\n      return;\n    }\n    auto gpair = in_gpair->FullGradOnly();\n    CHECK_EQ(gpair->Shape(1), 1) << MTNotImplemented();\n    const std::vector<GradientPair> &gpair_h = gpair->Data()->ConstHostVector();\n    // Thread local variables.\n    std::vector<std::vector<GradStats>> stemp;\n    std::vector<RegTree::FVec> fvec_temp;\n    // setup temp space for each thread\n    const int nthread = ctx_->Threads();\n    fvec_temp.resize(nthread, RegTree::FVec());\n    stemp.resize(nthread, std::vector<GradStats>());\n\n    bst_node_t num_nodes = 0;\n    for (auto tree : trees) {\n      num_nodes += tree->NumNodes();\n    }\n    common::ParallelFor(nthread, nthread, [&](auto tid) {\n      stemp[tid].resize(num_nodes);\n      std::fill(stemp[tid].begin(), stemp[tid].end(), GradStats{});\n      fvec_temp[tid].Init(trees.front()->NumFeatures());\n    });\n\n    const MetaInfo &info = p_fmat->Info();\n    // start accumulating statistics\n    for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {\n      auto page = batch.GetView();\n      CHECK_LT(batch.Size(), std::numeric_limits<unsigned>::max());\n      common::ParallelFor(batch.Size(), ctx_->Threads(), [&](auto i) {\n        SparsePage::Inst inst = page[i];\n        const int tid = omp_get_thread_num();\n        const auto ridx = static_cast<bst_uint>(batch.base_rowid + i);\n        RegTree::FVec &feats = fvec_temp[tid];\n        feats.Fill(inst);\n        int offset = 0;\n        for (auto tree : trees) {\n          AddStats(*tree, feats, gpair_h, info, ridx, dmlc::BeginPtr(stemp[tid]) + offset);\n          offset += tree->NumNodes();\n        }\n        feats.Drop();\n      });\n    }\n\n    // aggregate the statistics\n    common::ParallelFor(num_nodes, ctx_->Threads(), [&](int nid) {\n      for (int tid = 1; tid < nthread; ++tid) {\n        stemp[0][nid].Add(stemp[tid][nid]);\n      }\n    });\n\n    // Synchronize the aggregated result.\n    auto &sum_grad = stemp[0];\n    // x2 for gradient and hessian.\n    auto rc = collective::Allreduce(\n        ctx_, linalg::MakeVec(&sum_grad.data()->sum_grad, sum_grad.size() * 2),\n        collective::Op::kMax);\n    collective::SafeColl(rc);\n    bst_node_t offset = 0;\n    for (auto tree : trees) {\n      this->Refresh(param, dmlc::BeginPtr(sum_grad) + offset, 0, tree);\n      offset += tree->NumNodes();\n    }\n  }\n\n private:\n  inline static void AddStats(const RegTree &tree, const RegTree::FVec &feat,\n                              const std::vector<GradientPair> &gpair, const MetaInfo &,\n                              const bst_uint ridx, GradStats *gstats) {\n    // start from groups that belongs to current data\n    auto pid = 0;\n    gstats[pid].Add(gpair[ridx]);\n    // traverse tree\n    auto sc_tree = tree.HostScView();\n    while (!sc_tree.IsLeaf(pid)) {\n      unsigned split_index = sc_tree.SplitIndex(pid);\n      pid = predictor::GetNextNode<true, true>(sc_tree, pid, feat.GetFvalue(split_index),\n                                               feat.IsMissing(split_index), sc_tree.cats);\n      gstats[pid].Add(gpair[ridx]);\n    }\n  }\n  void Refresh(TrainParam const *param, const GradStats *gstats, int nid, RegTree *p_tree) {\n    RegTree &tree = *p_tree;\n    tree.Stat(nid).base_weight = static_cast<bst_float>(CalcWeight(*param, gstats[nid]));\n    tree.Stat(nid).sum_hess = static_cast<bst_float>(gstats[nid].sum_hess);\n    if (tree[nid].IsLeaf()) {\n      if (param->refresh_leaf) {\n        tree[nid].SetLeaf(tree.Stat(nid).base_weight * param->learning_rate);\n      }\n    } else {\n      tree.Stat(nid).loss_chg =\n          static_cast<bst_float>(xgboost::tree::CalcGain(*param, gstats[tree[nid].LeftChild()]) +\n                                 xgboost::tree::CalcGain(*param, gstats[tree[nid].RightChild()]) -\n                                 xgboost::tree::CalcGain(*param, gstats[nid]));\n      this->Refresh(param, gstats, tree[nid].LeftChild(), p_tree);\n      this->Refresh(param, gstats, tree[nid].RightChild(), p_tree);\n    }\n  }\n};\n\nXGBOOST_REGISTER_TREE_UPDATER(TreeRefresher, \"refresh\")\n    .describe(\"Refresher that refreshes the weight and statistics according to data.\")\n    .set_body([](Context const *ctx, auto) { return new TreeRefresher(ctx); });\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "src/tree/updater_sync.cc",
    "content": "/**\n * Copyright 2014-2025, XBGoost Contributors\n * \\file updater_sync.cc\n * \\brief synchronize the tree in all distributed nodes\n */\n#include <string>\n#include <vector>\n\n#include \"../collective/broadcast.h\"         // for Broadcast\n#include \"../collective/communicator-inl.h\"  // for GetRank, GetWorldSize\n#include \"xgboost/context.h\"                 // for Context\n#include \"xgboost/gradient.h\"                // for GradientContainer\n#include \"xgboost/json.h\"                    // for Json, Object\n#include \"xgboost/linalg.h\"                  // for Matrix\n#include \"xgboost/tree_updater.h\"            // for TreeUpdater\n\nnamespace xgboost::tree {\n\nDMLC_REGISTRY_FILE_TAG(updater_sync);\n\n/*!\n * \\brief syncher that synchronize the tree in all distributed nodes\n * can implement various strategies, so far it is always set to node 0's tree\n */\nclass TreeSyncher : public TreeUpdater {\n public:\n  explicit TreeSyncher(Context const* tparam) : TreeUpdater{tparam} {}\n  void Configure(Args const&) override {}\n\n  void LoadConfig(Json const&) override {}\n  void SaveConfig(Json*) const override {}\n\n  [[nodiscard]] char const* Name() const override { return \"sync\"; }\n\n  void Update(TrainParam const*, GradientContainer*, DMatrix*,\n              common::Span<HostDeviceVector<bst_node_t>> /*out_position*/,\n              std::vector<RegTree*> const& trees) override {\n    if (collective::GetWorldSize() == 1) {\n      return;\n    }\n\n    Json model{Object{}};\n    auto rank = collective::GetRank();\n    if (rank == 0) {\n      for (auto tree : trees) {\n        tree->SaveModel(&model);\n      }\n    }\n    std::vector<char> jmodel;\n    Json::Dump(model, &jmodel, std::ios::binary);\n    auto rc = collective::Broadcast(ctx_, linalg::MakeVec(jmodel.data(), jmodel.size()), 0);\n    SafeColl(rc);\n\n    for (auto tree : trees) {\n      tree->LoadModel(model);\n    }\n  }\n};\n\nXGBOOST_REGISTER_TREE_UPDATER(TreeSyncher, \"sync\")\n    .describe(\"Syncher that synchronize the tree in all distributed nodes.\")\n    .set_body([](Context const* ctx, auto) { return new TreeSyncher(ctx); });\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "tests/README.md",
    "content": "This folder contains test cases for XGBoost c++ core, Python package and some other CI\nfacilities.\n\n# Directories\n  * ci_build:  Test facilities for Jenkins CI and GitHub action.\n  * cli: Basic test for command line executable `xgboost`.  Most of the other command line\n    specific tests are in Python test `test_cli.py`.\n  * cpp: Tests for C++ core, using Google test framework.\n  * python: Tests for Python package, demonstrations and CLI.  For how to setup the\n    dependencies for tests, see conda files in `ci_build`.\n  * python-gpu: Similar to python tests, but for GPU.\n  * travis: CI facilities for Travis.\n  * test_distributed: Test for distributed systems including spark and dask.\n\n# Others\n  * pytest.ini: Describes the `pytest` marker for python tests, some markers are generated\n    by `conftest.py` file.\n"
  },
  {
    "path": "tests/cpp/CMakeLists.txt",
    "content": "# The testxgboost executable is created in the top level CMakeLists. Most of the\n# properties and compilation flags are already set. We just need to add source files and\n# link gtest here.\nif(USE_DMLC_GTEST)\n  if(NOT TARGET gtest)\n    message(FATAL_ERROR \"USE_DMLC_GTEST=ON but dmlc-core didn't bundle gtest\")\n  endif()\n  set(GTEST_LIBRARIES gtest gmock)\nelse()\n  find_package(GTest REQUIRED)\nendif()\n\nfile(GLOB_RECURSE TEST_SOURCES \"*.cc\")\n\nif(USE_CUDA)\n  file(GLOB_RECURSE CUDA_TEST_SOURCES \"*.cu\")\n  list(APPEND TEST_SOURCES ${CUDA_TEST_SOURCES})\nendif()\n\n# We will add them back later to separate the definition.\nfile(GLOB_RECURSE FEDERATED_TEST_SOURCES \"plugin/federated/*.*\")\nlist(REMOVE_ITEM TEST_SOURCES ${FEDERATED_TEST_SOURCES})\n\nfile(GLOB_RECURSE SYCL_TEST_SOURCES \"plugin/test_sycl_*.cc\")\nlist(REMOVE_ITEM TEST_SOURCES ${SYCL_TEST_SOURCES})\n\nif(PLUGIN_SYCL)\n  set(CMAKE_CXX_COMPILER \"icpx\")\n  file(GLOB_RECURSE SYCL_TEST_SOURCES \"plugin/test_sycl_*.cc\")\n  add_library(plugin_sycl_test OBJECT ${SYCL_TEST_SOURCES})\n\n  target_include_directories(plugin_sycl_test\n    PRIVATE\n    ${gtest_SOURCE_DIR}/include\n    ${xgboost_SOURCE_DIR}/include\n    ${xgboost_SOURCE_DIR}/dmlc-core/include)\n\n  target_compile_definitions(plugin_sycl_test PUBLIC -DXGBOOST_USE_SYCL=1)\n  target_link_libraries(plugin_sycl_test PUBLIC -fsycl)\n  target_link_libraries(plugin_sycl_test PRIVATE ${GTEST_LIBRARIES})\n\n  set_target_properties(plugin_sycl_test PROPERTIES\n    COMPILE_FLAGS -fsycl\n    CXX_STANDARD 17\n    CXX_STANDARD_REQUIRED ON\n    POSITION_INDEPENDENT_CODE ON)\n  if(USE_OPENMP)\n    find_package(OpenMP REQUIRED)\n    set_target_properties(plugin_sycl_test PROPERTIES\n    COMPILE_FLAGS \"-fsycl -qopenmp\")\n  endif()\n  # Get compilation and link flags of plugin_sycl and propagate to testxgboost\n  target_link_libraries(testxgboost PUBLIC plugin_sycl_test)\n  # Add all objects of plugin_sycl to testxgboost\n  target_sources(testxgboost INTERFACE $<TARGET_OBJECTS:plugin_sycl_test>)\nendif()\n\nif(PLUGIN_FEDERATED)\n  add_subdirectory(${xgboost_SOURCE_DIR}/tests/cpp/plugin/federated)\nendif()\n\ntarget_sources(\n  testxgboost PRIVATE\n  ${TEST_SOURCES}\n  ${xgboost_SOURCE_DIR}/plugin/example/custom_obj.cc\n)\n\nif(USE_CUDA AND PLUGIN_RMM)\n  target_include_directories(testxgboost PRIVATE ${CUDA_INCLUDE_DIRS})\nendif()\n\ntarget_include_directories(testxgboost\n  PRIVATE\n  ${xgboost_SOURCE_DIR}/include\n  ${xgboost_SOURCE_DIR}/dmlc-core/include)\ntarget_link_libraries(testxgboost\n  PRIVATE\n  $<TARGET_NAME_IF_EXISTS:rmm::rmm_logger>\n  $<TARGET_NAME_IF_EXISTS:rmm::rmm_logger_impl>\n  GTest::gtest GTest::gmock)\n\nset_output_directory(testxgboost ${xgboost_BINARY_DIR})\n\n# This grouping organises source files nicely in visual studio\nauto_source_group(\"${TEST_SOURCES}\")\n"
  },
  {
    "path": "tests/cpp/c_api/test_c_api.cc",
    "content": "/**\n * Copyright 2019-2025, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/c_api.h>\n#include <xgboost/data.h>\n#include <xgboost/json.h>  // Json\n#include <xgboost/learner.h>\n#include <xgboost/version_config.h>\n\n#include <algorithm>   // for copy_n\n#include <array>       // for array\n#include <cstddef>     // std::size_t\n#include <filesystem>  // std::filesystem\n#include <limits>      // std::numeric_limits\n#include <string>      // std::string\n#include <vector>\n\n#include \"../../../src/c_api/c_api_error.h\"\n#include \"../../../src/common/io.h\"\n#include \"../../../src/data/adapter.h\"              // for ArrayAdapter\n#include \"../../../src/data/array_interface.h\"      // for ArrayInterface\n#include \"../../../src/data/batch_utils.h\"          // for MatchingPageBytes\n#include \"../../../src/data/gradient_index.h\"       // for GHistIndexMatrix\n#include \"../../../src/data/iterative_dmatrix.h\"    // for IterativeDMatrix\n#include \"../../../src/data/sparse_page_dmatrix.h\"  // for SparsePageDMatrix\n#include \"../helpers.h\"\n\nTEST(CAPI, XGDMatrixCreateFromMatOmp) {\n  std::vector<bst_ulong> num_rows = {100, 11374, 15000};\n  for (auto row : num_rows) {\n    bst_ulong num_cols = 50;\n    int num_missing = 5;\n    DMatrixHandle handle;\n    std::vector<float> data(num_cols * row, 1.5);\n    for (int i = 0; i < num_missing; i++) {\n      data[i] = std::numeric_limits<float>::quiet_NaN();\n    }\n\n    XGDMatrixCreateFromMat_omp(data.data(), row, num_cols, std::numeric_limits<float>::quiet_NaN(),\n                               &handle, 0);\n\n    std::shared_ptr<xgboost::DMatrix> *dmat =\n        static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle);\n    xgboost::MetaInfo &info = (*dmat)->Info();\n    ASSERT_EQ(info.num_col_, num_cols);\n    ASSERT_EQ(info.num_row_, row);\n    ASSERT_EQ(info.num_nonzero_, num_cols * row - num_missing);\n\n    for (const auto &batch : (*dmat)->GetBatches<xgboost::SparsePage>()) {\n      auto page = batch.GetView();\n      for (size_t i = 0; i < batch.Size(); i++) {\n        auto inst = page[i];\n        for (auto e : inst) {\n          ASSERT_EQ(e.fvalue, 1.5);\n        }\n      }\n    }\n    delete dmat;\n  }\n}\n\nnamespace xgboost {\n\nTEST(CAPI, Version) {\n  int patch{0};\n  XGBoostVersion(NULL, NULL, &patch);  // NOLINT\n  ASSERT_EQ(patch, XGBOOST_VER_PATCH);\n}\n\nTEST(CAPI, XGDMatrixCreateFromCSR) {\n  HostDeviceVector<std::size_t> indptr{0, 3};\n  HostDeviceVector<double> data{0.0, 1.0, 2.0};\n  HostDeviceVector<std::size_t> indices{0, 1, 2};\n  auto indptr_arr = GetArrayInterface(&indptr, 2, 1);\n  auto indices_arr = GetArrayInterface(&indices, 3, 1);\n  auto data_arr = GetArrayInterface(&data, 3, 1);\n  std::string sindptr, sindices, sdata, sconfig;\n  Json::Dump(indptr_arr, &sindptr);\n  Json::Dump(indices_arr, &sindices);\n  Json::Dump(data_arr, &sdata);\n  Json config{Object{}};\n  config[\"missing\"] = Number{std::numeric_limits<float>::quiet_NaN()};\n  config[\"data_split_mode\"] = Integer{static_cast<int64_t>(DataSplitMode::kCol)};\n  Json::Dump(config, &sconfig);\n\n  DMatrixHandle handle;\n  XGDMatrixCreateFromCSR(sindptr.c_str(), sindices.c_str(), sdata.c_str(), 3, sconfig.c_str(),\n                         &handle);\n  bst_ulong n;\n  ASSERT_EQ(XGDMatrixNumRow(handle, &n), 0);\n  ASSERT_EQ(n, 1);\n  ASSERT_EQ(XGDMatrixNumCol(handle, &n), 0);\n  ASSERT_EQ(n, 3);\n  ASSERT_EQ(XGDMatrixNumNonMissing(handle, &n), 0);\n  ASSERT_EQ(n, 3);\n  ASSERT_EQ(XGDMatrixDataSplitMode(handle, &n), 0);\n  ASSERT_EQ(n, static_cast<int64_t>(DataSplitMode::kCol));\n\n  std::shared_ptr<xgboost::DMatrix> *pp_fmat =\n      static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle);\n  ASSERT_EQ((*pp_fmat)->Ctx()->Threads(), AllThreadsForTest());\n\n  XGDMatrixFree(handle);\n}\n\nTEST(CAPI, ConfigIO) {\n  size_t constexpr kRows = 10;\n  auto p_dmat = RandomDataGenerator(kRows, 10, 0).GenerateDMatrix();\n  std::vector<std::shared_ptr<DMatrix>> mat{p_dmat};\n  std::vector<bst_float> labels(kRows);\n  for (size_t i = 0; i < labels.size(); ++i) {\n    labels[i] = i;\n  }\n  p_dmat->Info().labels.Data()->HostVector() = labels;\n  p_dmat->Info().labels.Reshape(kRows);\n\n  std::shared_ptr<Learner> learner{Learner::Create(mat)};\n\n  BoosterHandle handle = learner.get();\n  learner->UpdateOneIter(0, p_dmat);\n\n  std::array<char const *, 1> out;\n  bst_ulong len{0};\n  XGBoosterSaveJsonConfig(handle, &len, out.data());\n\n  std::string config_str_0{out[0]};\n  auto config_0 = Json::Load({config_str_0.c_str(), config_str_0.size()});\n  XGBoosterLoadJsonConfig(handle, out[0]);\n\n  bst_ulong len_1{0};\n  std::string config_str_1{out[0]};\n  XGBoosterSaveJsonConfig(handle, &len_1, out.data());\n  auto config_1 = Json::Load({config_str_1.c_str(), config_str_1.size()});\n\n  ASSERT_EQ(config_0, config_1);\n}\n\nTEST(CAPI, JsonModelIO) {\n  size_t constexpr kRows = 10;\n  size_t constexpr kCols = 10;\n  auto tempdir = std::filesystem::temp_directory_path();\n\n  auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();\n  std::vector<std::shared_ptr<DMatrix>> mat{p_dmat};\n  std::vector<bst_float> labels(kRows);\n  for (size_t i = 0; i < labels.size(); ++i) {\n    labels[i] = i;\n  }\n  p_dmat->Info().labels.Data()->HostVector() = labels;\n  p_dmat->Info().labels.Reshape(kRows);\n\n  std::shared_ptr<Learner> learner{Learner::Create(mat)};\n\n  learner->UpdateOneIter(0, p_dmat);\n  BoosterHandle handle = learner.get();\n\n  auto modelfile_0 = tempdir / std::filesystem::u8path(u8\"모델_0.json\");\n  XGBoosterSaveModel(handle, modelfile_0.u8string().c_str());\n  XGBoosterLoadModel(handle, modelfile_0.u8string().c_str());\n\n  bst_ulong num_feature{0};\n  ASSERT_EQ(XGBoosterGetNumFeature(handle, &num_feature), 0);\n  ASSERT_EQ(num_feature, kCols);\n\n  auto modelfile_1 = tempdir / \"model_1.json\";\n  XGBoosterSaveModel(handle, modelfile_1.u8string().c_str());\n\n  auto model_str_0 = common::LoadSequentialFile(modelfile_0.u8string());\n  auto model_str_1 = common::LoadSequentialFile(modelfile_1.u8string());\n\n  ASSERT_EQ(model_str_0.front(), '{');\n  ASSERT_EQ(model_str_0, model_str_1);\n\n  /**\n   * In memory\n   */\n  bst_ulong len{0};\n  char const *data;\n  XGBoosterSaveModelToBuffer(handle, R\"({\"format\": \"ubj\"})\", &len, &data);\n  ASSERT_GT(len, 3);\n\n  XGBoosterLoadModelFromBuffer(handle, data, len);\n  char const *saved;\n  bst_ulong saved_len{0};\n  XGBoosterSaveModelToBuffer(handle, R\"({\"format\": \"ubj\"})\", &saved_len, &saved);\n  ASSERT_EQ(len, saved_len);\n  auto l = StringView{data, static_cast<size_t>(len)};\n  auto r = StringView{saved, static_cast<size_t>(saved_len)};\n  ASSERT_EQ(l.size(), r.size());\n  ASSERT_EQ(l, r);\n\n  std::string buffer;\n  Json::Dump(Json::Load(l, std::ios::binary), &buffer);\n  ASSERT_EQ(model_str_0.size(), buffer.size());\n  ASSERT_EQ(model_str_0.back(), '}');\n  ASSERT_TRUE(std::equal(model_str_0.begin(), model_str_0.end() - 1, buffer.begin()));\n\n  ASSERT_EQ(XGBoosterSaveModelToBuffer(handle, R\"({})\", &len, &data), -1);\n  ASSERT_EQ(XGBoosterSaveModelToBuffer(handle, R\"({\"format\": \"foo\"})\", &len, &data), -1);\n}\n\nTEST(CAPI, CatchDMLCError) {\n  DMatrixHandle out;\n  ASSERT_EQ(XGDMatrixCreateFromFile(\"foo\", 0, &out), -1);\n  EXPECT_THROW({ dmlc::Stream::Create(\"foo\", \"r\"); }, dmlc::Error);\n}\n\nTEST(CAPI, CatchDMLCErrorURI) {\n  Json config{Object()};\n  config[\"uri\"] = String{\"foo\"};\n  config[\"silent\"] = Integer{0};\n  std::string config_str;\n  Json::Dump(config, &config_str);\n  DMatrixHandle out;\n  ASSERT_EQ(XGDMatrixCreateFromURI(config_str.c_str(), &out), -1);\n  EXPECT_THROW({ dmlc::Stream::Create(\"foo\", \"r\"); }, dmlc::Error);\n}\n\nTEST(CAPI, DMatrixSetFeatureName) {\n  size_t constexpr kRows = 10;\n  bst_feature_t constexpr kCols = 2;\n\n  DMatrixHandle handle;\n  std::vector<float> data(kCols * kRows, 1.5);\n\n  XGDMatrixCreateFromMat_omp(data.data(), kRows, kCols, std::numeric_limits<float>::quiet_NaN(),\n                             &handle, 0);\n  std::vector<std::string> feature_names;\n  for (bst_feature_t i = 0; i < kCols; ++i) {\n    feature_names.emplace_back(std::to_string(i));\n  }\n  std::vector<char const *> c_feature_names;\n  c_feature_names.resize(feature_names.size());\n  std::transform(feature_names.cbegin(), feature_names.cend(), c_feature_names.begin(),\n                 [](auto const &str) { return str.c_str(); });\n  XGDMatrixSetStrFeatureInfo(handle, u8\"feature_name\", c_feature_names.data(),\n                             c_feature_names.size());\n  bst_ulong out_len = 0;\n  char const **c_out_features;\n  XGDMatrixGetStrFeatureInfo(handle, u8\"feature_name\", &out_len, &c_out_features);\n\n  CHECK_EQ(out_len, kCols);\n  std::vector<std::string> out_features;\n  for (bst_ulong i = 0; i < out_len; ++i) {\n    ASSERT_EQ(std::to_string(i), c_out_features[i]);\n  }\n\n  std::array<char const *, 2> feat_types{\"i\", \"q\"};\n  static_assert(sizeof(feat_types) / sizeof(feat_types[0]) == kCols);\n  XGDMatrixSetStrFeatureInfo(handle, \"feature_type\", feat_types.data(), kCols);\n  char const **c_out_types;\n  XGDMatrixGetStrFeatureInfo(handle, u8\"feature_type\", &out_len, &c_out_types);\n  for (bst_ulong i = 0; i < out_len; ++i) {\n    ASSERT_STREQ(feat_types[i], c_out_types[i]);\n  }\n\n  XGDMatrixFree(handle);\n}\n\nint TestExceptionCatching() {\n  API_BEGIN();\n  throw std::bad_alloc();\n  API_END();\n}\n\nTEST(CAPI, Exception) {\n  ASSERT_NO_THROW({ TestExceptionCatching(); });\n  ASSERT_EQ(TestExceptionCatching(), -1);\n  auto error = XGBGetLastError();\n  // Not null\n  ASSERT_TRUE(error);\n}\n\nTEST(CAPI, XGBGlobalConfig) {\n  int ret;\n  {\n    const char *config_str = R\"json(\n    {\n      \"verbosity\": 0,\n      \"use_rmm\": false\n    }\n  )json\";\n    ret = XGBSetGlobalConfig(config_str);\n    ASSERT_EQ(ret, 0);\n    const char *updated_config_cstr;\n    ret = XGBGetGlobalConfig(&updated_config_cstr);\n    ASSERT_EQ(ret, 0);\n\n    std::string updated_config_str{updated_config_cstr};\n    auto updated_config = Json::Load({updated_config_str.data(), updated_config_str.size()});\n    ASSERT_EQ(get<Integer>(updated_config[\"verbosity\"]), 0);\n    ASSERT_EQ(get<Boolean>(updated_config[\"use_rmm\"]), false);\n  }\n  {\n    const char *config_str = R\"json(\n    {\n      \"use_rmm\": true\n    }\n  )json\";\n    ret = XGBSetGlobalConfig(config_str);\n    ASSERT_EQ(ret, 0);\n    const char *updated_config_cstr;\n    ret = XGBGetGlobalConfig(&updated_config_cstr);\n    ASSERT_EQ(ret, 0);\n\n    std::string updated_config_str{updated_config_cstr};\n    auto updated_config = Json::Load({updated_config_str.data(), updated_config_str.size()});\n    ASSERT_EQ(get<Boolean>(updated_config[\"use_rmm\"]), true);\n  }\n  {\n    const char *config_str = R\"json(\n    {\n      \"foo\": 0\n    }\n  )json\";\n    ret = XGBSetGlobalConfig(config_str);\n    ASSERT_EQ(ret, -1);\n    auto err = std::string{XGBGetLastError()};\n    ASSERT_NE(err.find(\"foo\"), std::string::npos);\n  }\n  {\n    const char *config_str = R\"json(\n    {\n      \"foo\": 0,\n      \"verbosity\": 0\n    }\n  )json\";\n    ret = XGBSetGlobalConfig(config_str);\n    ASSERT_EQ(ret, -1);\n    auto err = std::string{XGBGetLastError()};\n    ASSERT_NE(err.find(\"foo\"), std::string::npos);\n    ASSERT_EQ(err.find(\"verbosity\"), std::string::npos);\n  }\n}\n\nTEST(CAPI, BuildInfo) {\n  char const *out;\n  XGBuildInfo(&out);\n  auto loaded = Json::Load(StringView{out});\n  ASSERT_TRUE(get<Object const>(loaded).find(\"USE_OPENMP\") != get<Object const>(loaded).cend());\n  ASSERT_TRUE(get<Object const>(loaded).find(\"USE_CUDA\") != get<Object const>(loaded).cend());\n  ASSERT_TRUE(get<Object const>(loaded).find(\"USE_NCCL\") != get<Object const>(loaded).cend());\n}\n\nTEST(CAPI, NullPtr) {\n  ASSERT_EQ(XGBSetGlobalConfig(nullptr), -1);\n  auto const *err = XGBGetLastError();\n  auto pos = std::string{err}.find(\"Invalid pointer argument: json_str\");\n  ASSERT_NE(pos, std::string::npos);\n  XGBAPISetLastError(\"\");\n}\n\nTEST(CAPI, JArgs) {\n  {\n    Json args{Object{}};\n    args[\"key\"] = String{\"value\"};\n    args[\"null\"] = Null{};\n    auto value = OptionalArg<String>(args, \"key\", std::string{\"foo\"});\n    ASSERT_EQ(value, \"value\");\n    value = OptionalArg<String const>(args, \"key\", std::string{\"foo\"});\n    ASSERT_EQ(value, \"value\");\n\n    ASSERT_THROW({ OptionalArg<Number>(args, \"key\", 0.0f); }, dmlc::Error);\n    value = OptionalArg<String const>(args, \"bar\", std::string{\"foo\"});\n    ASSERT_EQ(value, \"foo\");\n    value = OptionalArg<String const>(args, \"null\", std::string{\"foo\"});\n    ASSERT_EQ(value, \"foo\");\n  }\n\n  {\n    Json args{Object{}};\n    args[\"key\"] = String{\"value\"};\n    args[\"null\"] = Null{};\n    auto value = RequiredArg<String>(args, \"key\", __func__);\n    ASSERT_EQ(value, \"value\");\n    value = RequiredArg<String const>(args, \"key\", __func__);\n    ASSERT_EQ(value, \"value\");\n\n    ASSERT_THROW({ RequiredArg<Integer>(args, \"key\", __func__); }, dmlc::Error);\n    ASSERT_THROW({ RequiredArg<String const>(args, \"foo\", __func__); }, dmlc::Error);\n    ASSERT_THROW({ RequiredArg<String>(args, \"null\", __func__); }, dmlc::Error);\n  }\n}\n\nnamespace {\nvoid MakeLabelForTest(std::shared_ptr<DMatrix> Xy, DMatrixHandle cxy) {\n  auto n_samples = Xy->Info().num_row_;\n  std::vector<float> y(n_samples);\n  for (std::size_t i = 0; i < y.size(); ++i) {\n    y[i] = static_cast<float>(i);\n  }\n\n  Xy->Info().labels.Reshape(n_samples);\n  Xy->Info().labels.Data()->HostVector() = y;\n\n  auto y_int = GetArrayInterface(Xy->Info().labels.Data(), n_samples, 1);\n  std::string s_y_int;\n  Json::Dump(y_int, &s_y_int);\n\n  XGDMatrixSetInfoFromInterface(cxy, \"label\", s_y_int.c_str());\n}\n\nauto MakeSimpleDMatrixForTest(bst_idx_t n_samples, bst_feature_t n_features, Json dconfig) {\n  HostDeviceVector<float> storage;\n  auto arr_int = RandomDataGenerator{n_samples, n_features, 0.5f}.GenerateArrayInterface(&storage);\n\n  data::ArrayAdapter adapter{StringView{arr_int}};\n  std::shared_ptr<DMatrix> Xy{\n      DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), Context{}.Threads())};\n\n  DMatrixHandle p_fmat;\n  std::string s_dconfig;\n  Json::Dump(dconfig, &s_dconfig);\n  CHECK_EQ(XGDMatrixCreateFromDense(arr_int.c_str(), s_dconfig.c_str(), &p_fmat), 0);\n\n  MakeLabelForTest(Xy, p_fmat);\n  return std::pair{p_fmat, Xy};\n}\n\nauto MakeQDMForTest(Context const *ctx, bst_idx_t n_samples, bst_feature_t n_features,\n                    Json dconfig) {\n  bst_bin_t n_bins{16};\n  dconfig[\"max_bin\"] = Integer{n_bins};\n\n  std::size_t n_batches{4};\n  std::unique_ptr<ArrayIterForTest> iter_0;\n  if (ctx->IsCUDA()) {\n    iter_0 = std::make_unique<CudaArrayIterForTest>(0.0f, n_samples, n_features, n_batches);\n  } else {\n    iter_0 = std::make_unique<NumpyArrayIterForTest>(0.0f, n_samples, n_features, n_batches);\n  }\n  std::string s_dconfig;\n  Json::Dump(dconfig, &s_dconfig);\n  DMatrixHandle p_fmat;\n  CHECK_EQ(XGQuantileDMatrixCreateFromCallback(static_cast<DataIterHandle>(iter_0.get()),\n                                               iter_0->Proxy(), nullptr, Reset, Next,\n                                               s_dconfig.c_str(), &p_fmat),\n           0);\n\n  std::unique_ptr<ArrayIterForTest> iter_1;\n  if (ctx->IsCUDA()) {\n    iter_1 = std::make_unique<CudaArrayIterForTest>(0.0f, n_samples, n_features, n_batches);\n  } else {\n    iter_1 = std::make_unique<NumpyArrayIterForTest>(0.0f, n_samples, n_features, n_batches);\n  }\n  auto Xy =\n      std::make_shared<data::IterativeDMatrix>(iter_1.get(), iter_1->Proxy(), nullptr, Reset, Next,\n                                               std::numeric_limits<float>::quiet_NaN(), 0, n_bins);\n  return std::pair{p_fmat, Xy};\n}\n\nauto MakeExtMemForTest(bst_idx_t n_samples, bst_feature_t n_features, Json dconfig) {\n  std::size_t n_batches{4};\n  NumpyArrayIterForTest iter_0{0.0f, n_samples, n_features, n_batches};\n  std::string s_dconfig;\n  dconfig[\"cache_prefix\"] = String{\"cache\"};\n  Json::Dump(dconfig, &s_dconfig);\n  DMatrixHandle p_fmat;\n  CHECK_EQ(XGDMatrixCreateFromCallback(static_cast<DataIterHandle>(&iter_0), iter_0.Proxy(), Reset,\n                                       Next, s_dconfig.c_str(), &p_fmat),\n           0);\n\n  NumpyArrayIterForTest iter_1{0.0f, n_samples, n_features, n_batches};\n  auto config = ExtMemConfig{\"\",\n                             false,\n                             cuda_impl::AutoHostRatio(),\n                             cuda_impl::MatchingPageBytes(),\n                             std::numeric_limits<float>::quiet_NaN(),\n                             0};\n  auto Xy = std::make_shared<data::SparsePageDMatrix>(&iter_1, iter_1.Proxy(), Reset, Next, config);\n  MakeLabelForTest(Xy, p_fmat);\n  return std::pair{p_fmat, Xy};\n}\n\ntemplate <typename Page>\nvoid CheckResult(Context const *ctx, bst_feature_t n_features, std::shared_ptr<DMatrix> Xy,\n                 float const *out_data, std::uint64_t const *out_indptr) {\n  for (auto const &page : Xy->GetBatches<Page>(ctx, BatchParam{16, 0.2})) {\n    auto const &cut = page.Cuts();\n    auto const &ptrs = cut.Ptrs();\n    auto const &vals = cut.Values();\n    auto ft = Xy->Info().feature_types.ConstHostSpan();\n    std::uint64_t n_numeric{0};\n    for (bst_feature_t f = 0; f < Xy->Info().num_col_; ++f) {\n      ASSERT_EQ(ptrs[f] + n_numeric, out_indptr[f]);\n      auto beg = out_indptr[f];\n      auto end = out_indptr[f + 1];\n      auto val_beg = ptrs[f];\n      if (!common::IsCat(ft, f)) {\n        ASSERT_EQ(common::HistogramCuts::NumericBinLowerBound(ptrs, vals, f, ptrs[f]),\n                  out_data[beg]);\n        ++beg;\n        ++n_numeric;\n      }\n      for (std::uint64_t i = beg, j = val_beg; i < end; ++i, ++j) {\n        ASSERT_EQ(vals[j], out_data[i]);\n      }\n    }\n\n    ASSERT_EQ(ptrs[n_features] + n_numeric, out_indptr[n_features]);\n  }\n}\n\nvoid TestXGDMatrixGetQuantileCut(Context const *ctx) {\n  bst_idx_t n_samples{1024};\n  bst_feature_t n_features{16};\n\n  Json dconfig{Object{}};\n  dconfig[\"ntread\"] = Integer{Context{}.Threads()};\n  dconfig[\"missing\"] = Number{std::numeric_limits<float>::quiet_NaN()};\n\n  auto check_result = [n_features, &ctx](std::shared_ptr<DMatrix> Xy, StringView s_out_data,\n                                         StringView s_out_indptr) {\n    auto i_out_data = ArrayInterface<1, false>{s_out_data};\n    ASSERT_EQ(i_out_data.type, ArrayInterfaceHandler::kF4);\n    auto out_data = static_cast<float const *>(i_out_data.data);\n    ASSERT_TRUE(out_data);\n\n    auto i_out_indptr = ArrayInterface<1, false>{s_out_indptr};\n    ASSERT_EQ(i_out_indptr.type, ArrayInterfaceHandler::kU8);\n    auto out_indptr = static_cast<std::uint64_t const *>(i_out_indptr.data);\n    ASSERT_TRUE(out_data);\n\n    if (ctx->IsCPU()) {\n      CheckResult<GHistIndexMatrix>(ctx, n_features, Xy, out_data, out_indptr);\n    } else {\n      CheckResult<EllpackPage>(ctx, n_features, Xy, out_data, out_indptr);\n    }\n  };\n\n  Json config{Null{}};\n  std::string s_config;\n  Json::Dump(config, &s_config);\n  char const *out_indptr;\n  char const *out_data;\n\n  {\n    // SimpleDMatrix\n    auto [p_fmat, Xy] = MakeSimpleDMatrixForTest(n_samples, n_features, dconfig);\n    // assert fail, we don't have the quantile yet.\n    ASSERT_EQ(XGDMatrixGetQuantileCut(p_fmat, s_config.c_str(), &out_indptr, &out_data), -1);\n\n    std::array<DMatrixHandle, 1> mats{p_fmat};\n    BoosterHandle booster;\n    ASSERT_EQ(XGBoosterCreate(mats.data(), 1, &booster), 0);\n    ASSERT_EQ(XGBoosterSetParam(booster, \"max_bin\", \"16\"), 0);\n    if (ctx->IsCUDA()) {\n      ASSERT_EQ(XGBoosterSetParam(booster, \"device\", ctx->DeviceName().c_str()), 0);\n    }\n    ASSERT_EQ(XGBoosterUpdateOneIter(booster, 0, p_fmat), 0);\n    ASSERT_EQ(XGDMatrixGetQuantileCut(p_fmat, s_config.c_str(), &out_indptr, &out_data), 0);\n\n    check_result(Xy, out_data, out_indptr);\n\n    XGDMatrixFree(p_fmat);\n    XGBoosterFree(booster);\n  }\n\n  {\n    // IterativeDMatrix\n    auto [p_fmat, Xy] = MakeQDMForTest(ctx, n_samples, n_features, dconfig);\n    ASSERT_EQ(XGDMatrixGetQuantileCut(p_fmat, s_config.c_str(), &out_indptr, &out_data), 0);\n\n    check_result(Xy, out_data, out_indptr);\n    XGDMatrixFree(p_fmat);\n  }\n\n  {\n    // SparsePageDMatrix\n    auto [p_fmat, Xy] = MakeExtMemForTest(n_samples, n_features, dconfig);\n    // assert fail, we don't have the quantile yet.\n    ASSERT_EQ(XGDMatrixGetQuantileCut(p_fmat, s_config.c_str(), &out_indptr, &out_data), -1);\n\n    std::array<DMatrixHandle, 1> mats{p_fmat};\n    BoosterHandle booster;\n    ASSERT_EQ(XGBoosterCreate(mats.data(), 1, &booster), 0);\n    ASSERT_EQ(XGBoosterSetParam(booster, \"max_bin\", \"16\"), 0);\n    if (ctx->IsCUDA()) {\n      ASSERT_EQ(XGBoosterSetParam(booster, \"device\", ctx->DeviceName().c_str()), 0);\n    }\n    ASSERT_EQ(XGBoosterUpdateOneIter(booster, 0, p_fmat), 0);\n    ASSERT_EQ(XGDMatrixGetQuantileCut(p_fmat, s_config.c_str(), &out_indptr, &out_data), 0);\n\n    XGDMatrixFree(p_fmat);\n    XGBoosterFree(booster);\n  }\n}\n}  // namespace\n\nTEST(CAPI, XGDMatrixGetQuantileCut) {\n  Context ctx;\n  TestXGDMatrixGetQuantileCut(&ctx);\n}\n\n#if defined(XGBOOST_USE_CUDA)\nTEST(CAPI, GPUXGDMatrixGetQuantileCut) {\n  auto ctx = MakeCUDACtx(0);\n  TestXGDMatrixGetQuantileCut(&ctx);\n}\n#endif  // defined(XGBOOST_USE_CUDA)\n\nTEST(CAPI, PredictReuseProxy) {\n  // Configuration for creating DMatrix\n  Json fmat_cfg{Object{}};\n  fmat_cfg[\"missing\"] = std::numeric_limits<float>::quiet_NaN();\n  auto sfmat_cfg = Json::Dump(fmat_cfg);\n\n  // Configuration for prediction\n  Json config{Object{}};\n  config[\"type\"] = Integer{0};\n  config[\"iteration_begin\"] = config[\"iteration_end\"] = Integer{0};\n  config[\"missing\"] = Number{std::numeric_limits<float>::quiet_NaN()};\n  config[\"strict_shape\"] = Boolean{true};\n  config[\"training\"] = Boolean{false};\n  auto scfg = Json::Dump(config);\n\n  HostDeviceVector<float> storage;\n  bst_idx_t n_samples = 1024;\n  auto inf = RandomDataGenerator{n_samples, 256, 0.0}.GenerateArrayInterface(&storage);\n  HostDeviceVector<float> storage_y;\n  auto y_inf = RandomDataGenerator{n_samples, 1, 0.0}.GenerateArrayInterface(&storage_y);\n\n  // Create a DMatrix for training\n  DMatrixHandle fmat_hdl{nullptr};\n  ASSERT_EQ(XGDMatrixCreateFromDense(inf.c_str(), sfmat_cfg.c_str(), &fmat_hdl), 0);\n  ASSERT_EQ(XGDMatrixSetInfoFromInterface(fmat_hdl, \"label\", y_inf.c_str()), 0);\n\n  // Create booster and train.\n  std::array<DMatrixHandle, 1> mats{fmat_hdl};\n  BoosterHandle booster_hdl;\n  ASSERT_EQ(XGBoosterCreate(mats.data(), 1, &booster_hdl), 0);\n\n  for (std::int32_t i = 0; i < 3; ++i) {\n    ASSERT_EQ(XGBoosterUpdateOneIter(booster_hdl, i, fmat_hdl), 0);\n  }\n\n  // Create a proxy that can be reused.\n  DMatrixHandle proxy_hdl{nullptr};\n  ASSERT_EQ(XGProxyDMatrixCreate(&proxy_hdl), 0);\n\n  bst_ulong const *outshape{nullptr};\n  bst_ulong outdim{0};\n  float const *result{nullptr};\n\n  {\n    // Prediction with DMatrix\n    ASSERT_EQ(XGBoosterPredictFromDMatrix(booster_hdl, fmat_hdl, scfg.c_str(), &outshape, &outdim,\n                                          &result),\n              0);\n    bst_ulong n_samples_ret = 0;\n    ASSERT_EQ(XGDMatrixNumRow(fmat_hdl, &n_samples_ret), 0);\n    std::vector<float> vec_0(n_samples_ret);\n    ASSERT_EQ(vec_0.size(), n_samples);\n    ASSERT_EQ(outdim, 2);\n    std::copy_n(result, vec_0.size(), vec_0.begin());\n\n    // In-place predict\n    ASSERT_EQ(XGBoosterPredictFromDense(booster_hdl, inf.c_str(), scfg.c_str(), proxy_hdl,\n                                        &outshape, &outdim, &result),\n              0);\n    ASSERT_EQ(XGDMatrixNumRow(proxy_hdl, &n_samples_ret), 0);\n    std::vector<float> vec_1(n_samples_ret);\n    ASSERT_EQ(vec_1.size(), n_samples);\n    ASSERT_EQ(outdim, 2);\n    std::copy_n(result, vec_1.size(), vec_1.begin());\n\n    // Same result\n    ASSERT_EQ(vec_0, vec_1);\n  }\n\n  {\n    bst_idx_t n_samples = 512;\n\n    // Prediction with DMatrix\n    auto inf = RandomDataGenerator{n_samples, 256, 0.0}.GenerateArrayInterface(&storage);\n    DMatrixHandle fmat_hdl{nullptr};\n    ASSERT_EQ(XGDMatrixCreateFromDense(inf.c_str(), sfmat_cfg.c_str(), &fmat_hdl), 0);\n\n    ASSERT_EQ(XGBoosterPredictFromDMatrix(booster_hdl, fmat_hdl, scfg.c_str(), &outshape, &outdim,\n                                          &result),\n              0);\n    bst_ulong n_samples_ret = 0;\n    ASSERT_EQ(XGDMatrixNumRow(fmat_hdl, &n_samples_ret), 0);\n    std::vector<float> vec_0(n_samples_ret);\n    ASSERT_EQ(vec_0.size(), n_samples);\n    ASSERT_EQ(outdim, 2);\n    std::copy_n(result, vec_0.size(), vec_0.begin());\n\n    // In-place predict, same proxy as before\n    ASSERT_EQ(XGBoosterPredictFromDense(booster_hdl, inf.c_str(), scfg.c_str(), proxy_hdl,\n                                        &outshape, &outdim, &result),\n              0);\n    ASSERT_EQ(XGDMatrixNumRow(proxy_hdl, &n_samples_ret), 0);\n    std::vector<float> vec_1(n_samples_ret);\n    ASSERT_EQ(vec_1.size(), n_samples);\n    ASSERT_EQ(outdim, 2);\n    std::copy_n(result, vec_1.size(), vec_1.begin());\n\n    // Same result\n    ASSERT_EQ(vec_0, vec_1);\n\n    ASSERT_EQ(XGDMatrixFree(fmat_hdl), 0);\n  }\n\n  ASSERT_EQ(XGDMatrixFree(fmat_hdl), 0);\n  ASSERT_EQ(XGBoosterFree(booster_hdl), 0);\n  ASSERT_EQ(XGDMatrixFree(proxy_hdl), 0);\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/categorical_helpers.h",
    "content": "/*!\n * Copyright 2021 by XGBoost Contributors\n *\n * \\brief Utilities for testing categorical data support.\n */\n#include <numeric>\n#include <vector>\n\n#include \"xgboost/span.h\"\n#include \"helpers.h\"\n#include \"../../src/common/categorical.h\"\n\nnamespace xgboost {\ninline std::vector<float> OneHotEncodeFeature(std::vector<float> x,\n                                              size_t num_cat) {\n  std::vector<float> ret(x.size() * num_cat, 0);\n  size_t n_rows = x.size();\n  for (size_t r = 0; r < n_rows; ++r) {\n    bst_cat_t cat = common::AsCat(x[r]);\n    ret.at(num_cat * r + cat) = 1;\n  }\n  return ret;\n}\n\n} // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/collective/test_allgather.cc",
    "content": "/**\n * Copyright 2023-2024, XGBoost Contributors\n */\n#include <gtest/gtest.h>   // for ASSERT_EQ\n#include <xgboost/span.h>  // for Span, oper...\n\n#include <algorithm>  // for min\n#include <chrono>     // for seconds\n#include <cstddef>    // for size_t\n#include <cstdint>    // for int32_t\n#include <numeric>    // for iota\n#include <string>     // for string\n#include <thread>     // for thread\n#include <vector>     // for vector\n\n#include \"../../../src/collective/allgather.h\"  // for RingAllgather\n#include \"../../../src/collective/coll.h\"       // for Coll\n#include \"../../../src/collective/comm.h\"       // for RabitComm\n#include \"gtest/gtest.h\"                        // for AssertionR...\n#include \"test_worker.h\"                        // for TestDistri...\n#include \"xgboost/collective/result.h\"          // for Result\n\nnamespace xgboost::collective {\nnamespace {\nclass AllgatherTest : public TrackerTest {};\n\nclass Worker : public WorkerForTest {\n public:\n  using WorkerForTest::WorkerForTest;\n\n  void Run() {\n    {\n      // basic test\n      std::vector<std::int32_t> data(comm_.World(), 0);\n      data[comm_.Rank()] = comm_.Rank();\n\n      auto rc = RingAllgather(this->comm_, common::Span{data.data(), data.size()});\n      SafeColl(rc);\n\n      for (std::int32_t r = 0; r < comm_.World(); ++r) {\n        ASSERT_EQ(data[r], r);\n      }\n    }\n    {\n      // test for limited socket buffer\n      this->LimitSockBuf(4096);\n\n      std::size_t n = 8192;  // n_bytes = 8192 * sizeof(int)\n      std::vector<std::int32_t> data(comm_.World() * n, 0);\n      auto s_data = common::Span<std::int32_t>{data};\n      auto seg = s_data.subspan(comm_.Rank() * n, n);\n      std::iota(seg.begin(), seg.end(), comm_.Rank());\n\n      auto rc = RingAllgather(comm_, common::Span{data.data(), data.size()});\n      SafeColl(rc);\n\n      for (std::int32_t r = 0; r < comm_.World(); ++r) {\n        auto seg = s_data.subspan(r * n, n);\n        for (std::int32_t i = 0; i < static_cast<std::int32_t>(seg.size()); ++i) {\n          auto v = seg[i];\n          ASSERT_EQ(v, r + i);\n        }\n      }\n    }\n  }\n\n  void CheckV(common::Span<std::int32_t> result) {\n    std::int32_t k{0};\n    for (std::int32_t r = 0; r < comm_.World(); ++r) {\n      auto seg = common::Span{result.data(), result.size()}.subspan(k, (r + 1));\n      if (comm_.Rank() == 0) {\n        for (auto v : seg) {\n          ASSERT_EQ(v, r);\n        }\n        k += seg.size();\n      }\n    }\n  }\n  void TestVRing() {\n    // V test\n    std::vector<std::int32_t> data(comm_.Rank() + 1, comm_.Rank());\n    std::vector<std::int32_t> result;\n    auto rc = RingAllgatherV(comm_, common::Span{data.data(), data.size()}, &result);\n    SafeColl(rc);\n    ASSERT_EQ(result.size(), (1 + comm_.World()) * comm_.World() / 2);\n    CheckV(result);\n  }\n\n  void TestVBasic() {\n    // basic test\n    std::int32_t n{comm_.Rank()};\n    std::vector<std::int32_t> result;\n    auto rc = RingAllgatherV(comm_, common::Span{&n, 1}, &result);\n    SafeColl(rc);\n    for (std::int32_t i = 0; i < comm_.World(); ++i) {\n      ASSERT_EQ(result[i], i);\n    }\n  }\n\n  void TestVAlgo() {\n    // V test, broadcast\n    std::vector<std::int32_t> data(comm_.Rank() + 1, comm_.Rank());\n    auto s_data = common::Span{data.data(), data.size()};\n\n    std::vector<std::int64_t> sizes(comm_.World(), 0);\n    sizes[comm_.Rank()] = s_data.size_bytes();\n    auto rc = RingAllgather(comm_, common::Span{sizes.data(), sizes.size()});\n    SafeColl(rc);\n    std::shared_ptr<Coll> pcoll{new Coll{}};\n\n    std::vector<std::int64_t> recv_segments(comm_.World() + 1, 0);\n    std::vector<std::int32_t> recv(std::accumulate(sizes.cbegin(), sizes.cend(), 0));\n\n    auto s_recv = common::Span{recv.data(), recv.size()};\n\n    rc = pcoll->AllgatherV(comm_, common::EraseType(s_data),\n                           common::Span{sizes.data(), sizes.size()},\n                           common::Span{recv_segments.data(), recv_segments.size()},\n                           common::EraseType(s_recv), AllgatherVAlgo::kBcast);\n    SafeColl(rc);\n    CheckV(s_recv);\n\n    // Test inplace\n    auto test_inplace = [&] (AllgatherVAlgo algo) {\n      std::fill_n(s_recv.data(), s_recv.size(), 0);\n      auto current = s_recv.subspan(recv_segments[comm_.Rank()],\n                                    recv_segments[comm_.Rank() + 1] - recv_segments[comm_.Rank()]);\n      std::copy_n(data.data(), data.size(), current.data());\n      rc = pcoll->AllgatherV(comm_, common::EraseType(current),\n                             common::Span{sizes.data(), sizes.size()},\n                             common::Span{recv_segments.data(), recv_segments.size()},\n                             common::EraseType(s_recv), algo);\n      SafeColl(rc);\n      CheckV(s_recv);\n    };\n\n    test_inplace(AllgatherVAlgo::kBcast);\n    test_inplace(AllgatherVAlgo::kRing);\n  }\n};\n}  // namespace\n\nTEST_F(AllgatherTest, Basic) {\n  std::int32_t n_workers = std::min(7u, std::thread::hardware_concurrency());\n  TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,\n                                 std::int32_t r) {\n    Worker worker{host, port, timeout, n_workers, r};\n    worker.Run();\n  });\n}\n\nTEST_F(AllgatherTest, VBasic) {\n  std::int32_t n_workers = std::min(7u, std::thread::hardware_concurrency());\n  TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,\n                                 std::int32_t r) {\n    Worker worker{host, port, timeout, n_workers, r};\n    worker.TestVBasic();\n  });\n}\n\nTEST_F(AllgatherTest, VRing) {\n  std::int32_t n_workers = std::min(7u, std::thread::hardware_concurrency());\n  TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,\n                                 std::int32_t r) {\n    Worker worker{host, port, timeout, n_workers, r};\n    worker.TestVRing();\n  });\n}\n\nTEST_F(AllgatherTest, VAlgo) {\n  std::int32_t n_workers = std::min(7u, std::thread::hardware_concurrency());\n  TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,\n                                 std::int32_t r) {\n    Worker worker{host, port, timeout, n_workers, r};\n    worker.TestVAlgo();\n  });\n}\n\nTEST(VectorAllgatherV, Basic) {\n  std::int32_t n_workers{3};\n  TestDistributedGlobal(n_workers, []() {\n    auto n_workers = collective::GetWorldSize();\n    ASSERT_EQ(n_workers, 3);\n    auto rank = collective::GetRank();\n    // Construct input that has different length for each worker.\n    std::vector<std::vector<char>> inputs;\n    for (std::int32_t i = 0; i < rank + 1; ++i) {\n      std::vector<char> in;\n      for (std::int32_t j = 0; j < rank + 1; ++j) {\n        in.push_back(static_cast<char>(j));\n      }\n      inputs.emplace_back(std::move(in));\n    }\n\n    Context ctx;\n    auto outputs = VectorAllgatherV(&ctx, inputs);\n\n    ASSERT_EQ(outputs.size(), (1 + n_workers) * n_workers / 2);\n    auto const& res = outputs;\n\n    for (std::int32_t i = 0; i < n_workers; ++i) {\n      std::int32_t k = 0;\n      for (auto v : res[i]) {\n        ASSERT_EQ(v, k++);\n      }\n    }\n  });\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "tests/cpp/collective/test_allgather.cu",
    "content": "/**\n * Copyright 2023-2024, XGBoost Contributors\n */\n#if defined(XGBOOST_USE_NCCL)\n#include <gtest/gtest.h>\n#include <thrust/device_vector.h>  // for device_vector\n#include <thrust/equal.h>          // for equal\n#include <xgboost/span.h>          // for Span\n\n#include <cstddef>  // for size_t\n#include <cstdint>  // for int32_t, int64_t\n#include <vector>   // for vector\n\n#include \"../../../src/collective/allgather.h\"     // for RingAllgather\n#include \"../../../src/common/device_helpers.cuh\"  // for ToSpan,  device_vector\n#include \"../../../src/common/type.h\"              // for EraseType\n#include \"test_worker.cuh\"                         // for NCCLWorkerForTest\n#include \"test_worker.h\"                           // for TestDistributed, WorkerForTest\n\nnamespace xgboost::collective {\nnamespace {\nclass Worker : public NCCLWorkerForTest {\n public:\n  using NCCLWorkerForTest::NCCLWorkerForTest;\n\n  void TestV(AllgatherVAlgo algo) {\n    {\n      // basic test\n      std::size_t n = 1;\n      // create data\n      dh::device_vector<std::int32_t> data(n, comm_.Rank());\n      auto s_data = common::EraseType(common::Span{data.data().get(), data.size()});\n      // get size\n      std::vector<std::int64_t> sizes(comm_.World(), -1);\n      sizes[comm_.Rank()] = s_data.size_bytes();\n      auto rc = RingAllgather(comm_, common::Span{sizes.data(), sizes.size()});\n      SafeColl(rc);\n      // create result\n      dh::device_vector<std::int32_t> result(comm_.World(), -1);\n      auto s_result = common::EraseType(dh::ToSpan(result));\n\n      std::vector<std::int64_t> recv_seg(nccl_comm_->World() + 1, 0);\n      rc = nccl_coll_->AllgatherV(*nccl_comm_, s_data, common::Span{sizes.data(), sizes.size()},\n                                  common::Span{recv_seg.data(), recv_seg.size()}, s_result, algo);\n      SafeColl(rc);\n\n      for (std::int32_t i = 0; i < comm_.World(); ++i) {\n        ASSERT_EQ(result[i], i);\n      }\n    }\n    {\n      // V test\n      std::size_t n = 256 * 256;\n      // create data\n      dh::device_vector<std::int32_t> data(n * nccl_comm_->Rank(), nccl_comm_->Rank());\n      auto s_data = common::EraseType(common::Span{data.data().get(), data.size()});\n      // get size\n      std::vector<std::int64_t> sizes(nccl_comm_->World(), 0);\n      sizes[comm_.Rank()] = dh::ToSpan(data).size_bytes();\n      auto rc = RingAllgather(comm_, common::Span{sizes.data(), sizes.size()});\n      SafeColl(rc);\n      auto n_bytes = std::accumulate(sizes.cbegin(), sizes.cend(), 0);\n      // create result\n      dh::device_vector<std::int32_t> result(n_bytes / sizeof(std::int32_t), -1);\n      auto s_result = common::EraseType(dh::ToSpan(result));\n\n      std::vector<std::int64_t> recv_seg(nccl_comm_->World() + 1, 0);\n      rc = nccl_coll_->AllgatherV(*nccl_comm_, s_data, common::Span{sizes.data(), sizes.size()},\n                                  common::Span{recv_seg.data(), recv_seg.size()}, s_result, algo);\n      SafeColl(rc);\n      // check segment size\n      if (algo != AllgatherVAlgo::kBcast) {\n        auto size = recv_seg[nccl_comm_->Rank() + 1] - recv_seg[nccl_comm_->Rank()];\n        ASSERT_EQ(size, n * nccl_comm_->Rank() * sizeof(std::int32_t));\n        ASSERT_EQ(size, sizes[nccl_comm_->Rank()]);\n      }\n      // check data\n      std::size_t k{0};\n      for (std::int32_t r = 0; r < nccl_comm_->World(); ++r) {\n        std::size_t s = n * r;\n        auto current = dh::ToSpan(result).subspan(k, s);\n        std::vector<std::int32_t> h_data(current.size());\n        dh::CopyDeviceSpanToVector(&h_data, current);\n        for (auto v : h_data) {\n          ASSERT_EQ(v, r);\n        }\n        k += s;\n      }\n    }\n  }\n};\n\nclass MGPUAllgatherTest : public SocketTest {};\n}  // namespace\n\nTEST_F(MGPUAllgatherTest, MGPUTestVRing) {\n  auto n_workers = curt::AllVisibleGPUs();\n  TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,\n                                 std::int32_t r) {\n    Worker w{host, port, timeout, n_workers, r};\n    w.Setup();\n    w.TestV(AllgatherVAlgo::kRing);\n    w.TestV(AllgatherVAlgo::kBcast);\n  });\n}\n\nTEST_F(MGPUAllgatherTest, MGPUTestVBcast) {\n  auto n_workers = curt::AllVisibleGPUs();\n  TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,\n                                 std::int32_t r) {\n    Worker w{host, port, timeout, n_workers, r};\n    w.Setup();\n    w.TestV(AllgatherVAlgo::kBcast);\n  });\n}\n}  // namespace xgboost::collective\n#endif  // defined(XGBOOST_USE_NCCL)\n"
  },
  {
    "path": "tests/cpp/collective/test_allreduce.cc",
    "content": "/**\n * Copyright 2023-2024, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n\n#include <numeric>  // for iota\n\n#include \"../../../src/collective/allreduce.h\"\n#include \"../../../src/collective/coll.h\"  // for Coll\n#include \"../../../src/common/type.h\"      // for EraseType\n#include \"test_worker.h\"                   // for WorkerForTest, TestDistributed\n\nnamespace xgboost::collective {\nnamespace {\nclass AllreduceWorker : public WorkerForTest {\n public:\n  using WorkerForTest::WorkerForTest;\n\n  void Basic() {\n    {\n      std::vector<double> data(13, 0.0);\n      auto rc = Allreduce(comm_, common::Span{data.data(), data.size()}, [](auto lhs, auto rhs) {\n        for (std::size_t i = 0; i < rhs.size(); ++i) {\n          rhs[i] += lhs[i];\n        }\n      });\n      SafeColl(rc);\n      ASSERT_EQ(std::accumulate(data.cbegin(), data.cend(), 0.0), 0.0);\n    }\n    {\n      std::vector<double> data(1, 1.0);\n      auto rc = Allreduce(comm_, common::Span{data.data(), data.size()}, [](auto lhs, auto rhs) {\n        for (std::size_t i = 0; i < rhs.size(); ++i) {\n          rhs[i] += lhs[i];\n        }\n      });\n      SafeColl(rc);\n      ASSERT_EQ(data[0], static_cast<double>(comm_.World()));\n    }\n  }\n\n  void Restricted() {\n    this->LimitSockBuf(4096);\n\n    std::size_t n = 4096 * 4;\n    std::vector<std::int32_t> data(comm_.World() * n, 1);\n    auto rc = Allreduce(comm_, common::Span{data.data(), data.size()}, [](auto lhs, auto rhs) {\n      for (std::size_t i = 0; i < rhs.size(); ++i) {\n        rhs[i] += lhs[i];\n      }\n    });\n    SafeColl(rc);\n    for (auto v : data) {\n      ASSERT_EQ(v, comm_.World());\n    }\n  }\n\n  void Acc() {\n    std::vector<double> data(314, 1.5);\n    auto rc = Allreduce(comm_, common::Span{data.data(), data.size()}, [](auto lhs, auto rhs) {\n      for (std::size_t i = 0; i < rhs.size(); ++i) {\n        rhs[i] += lhs[i];\n      }\n    });\n    SafeColl(rc);\n    for (std::size_t i = 0; i < data.size(); ++i) {\n      auto v = data[i];\n      ASSERT_EQ(v, 1.5 * static_cast<double>(comm_.World())) << i;\n    }\n  }\n\n  void BitOr() {\n    std::vector<std::uint32_t> data(comm_.World(), 0);\n    data[comm_.Rank()] = ~std::uint32_t{0};\n    auto pcoll = std::shared_ptr<Coll>{new Coll{}};\n    auto rc = pcoll->Allreduce(comm_, common::EraseType(common::Span{data.data(), data.size()}),\n                               ArrayInterfaceHandler::kU4, Op::kBitwiseOR);\n    SafeColl(rc);\n    for (auto v : data) {\n      ASSERT_EQ(v, ~std::uint32_t{0});\n    }\n  }\n\n  void VariableAllreduce() {\n    auto reduce_fn = [](auto a, auto b, std::vector<std::int32_t>* out) {\n      auto n = std::max(a.size(), b.size());\n      out->assign(n, 0);\n      for (std::size_t i = 0; i < a.size(); ++i) {\n        (*out)[i] += a[i];\n      }\n      for (std::size_t i = 0; i < b.size(); ++i) {\n        (*out)[i] += b[i];\n      }\n    };\n\n    for (std::size_t trial = 0; trial < 2; ++trial) {\n      std::vector<std::int32_t> data(comm_.Rank() + 1, 1);\n      auto rc = AllreduceV(comm_, &data, reduce_fn);\n      SafeColl(rc);\n\n      ASSERT_EQ(data.size(), static_cast<std::size_t>(comm_.World()));\n      for (std::size_t i = 0; i < data.size(); ++i) {\n        ASSERT_EQ(data[i], comm_.World() - static_cast<std::int32_t>(i));\n      }\n    }\n  }\n};\n\nclass AllreduceTest : public SocketTest {};\n}  // namespace\n\nTEST_F(AllreduceTest, Basic) {\n  std::int32_t n_workers = std::min(7u, std::thread::hardware_concurrency());\n  TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,\n                                 std::int32_t r) {\n    AllreduceWorker worker{host, port, timeout, n_workers, r};\n    worker.Basic();\n  });\n}\n\nTEST_F(AllreduceTest, Sum) {\n  std::int32_t n_workers = std::min(7u, std::thread::hardware_concurrency());\n  TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,\n                                 std::int32_t r) {\n    AllreduceWorker worker{host, port, timeout, n_workers, r};\n    worker.Acc();\n  });\n}\n\nTEST_F(AllreduceTest, BitOr) {\n  std::int32_t n_workers = std::min(7u, std::thread::hardware_concurrency());\n  TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,\n                                 std::int32_t r) {\n    AllreduceWorker worker{host, port, timeout, n_workers, r};\n    worker.BitOr();\n  });\n}\n\nTEST_F(AllreduceTest, AllreduceV) {\n  std::int32_t n_workers = std::min(7u, std::thread::hardware_concurrency());\n  TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,\n                                 std::int32_t r) {\n    AllreduceWorker worker{host, port, timeout, n_workers, r};\n    worker.VariableAllreduce();\n  });\n}\n\nTEST_F(AllreduceTest, Restricted) {\n  std::int32_t n_workers = std::min(3u, std::thread::hardware_concurrency());\n  auto timeout = std::chrono::seconds{4};\n  TestDistributed(\n      n_workers,\n      [=](std::string host, std::int32_t port, std::chrono::seconds timeout, std::int32_t r) {\n        AllreduceWorker worker{host, port, timeout, n_workers, r};\n        worker.Restricted();\n      },\n      timeout);\n}\n\nTEST(AllreduceGlobal, Basic) {\n  auto n_workers = 3;\n  TestDistributedGlobal(n_workers, [&]() {\n    std::vector<float> values(n_workers * 2, 0);\n    auto rank = GetRank();\n    auto s_values = common::Span{values.data(), values.size()};\n    auto self = s_values.subspan(rank * 2, 2);\n    for (auto& v : self) {\n      v = 1.0f;\n    }\n    Context ctx;\n    auto rc =\n        Allreduce(&ctx, linalg::MakeVec(s_values.data(), s_values.size()), collective::Op::kSum);\n    SafeColl(rc);\n    for (auto v : s_values) {\n      ASSERT_EQ(v, 1);\n    }\n  });\n}\n\nTEST(AllreduceGlobal, Small) {\n  // Test when the data is not large enougth to be divided by the number of workers\n  auto n_workers = 8;\n  TestDistributedGlobal(n_workers, [&]() {\n    std::uint64_t value{1};\n    Context ctx;\n    auto rc = Allreduce(&ctx, linalg::MakeVec(&value, 1), collective::Op::kSum);\n    SafeColl(rc);\n    ASSERT_EQ(value, n_workers);\n  });\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "tests/cpp/collective/test_allreduce.cu",
    "content": "/**\n * Copyright 2023-2024, XGBoost Contributors\n */\n#if defined(XGBOOST_USE_NCCL)\n#include <gtest/gtest.h>\n#include <thrust/host_vector.h>  // for host_vector\n\n#include \"../../../src/collective/comm.cuh\"        // for NCCLComm\n#include \"../../../src/common/cuda_rt_utils.h\"     // for AllVisibleGPUs\n#include \"../../../src/common/device_helpers.cuh\"  // for ToSpan,  device_vector\n#include \"../../../src/common/type.h\"              // for EraseType\n#include \"test_worker.cuh\"                         // for NCCLWorkerForTest\n#include \"test_worker.h\"                           // for WorkerForTest, TestDistributed\n\nnamespace xgboost::collective {\nnamespace {\nclass MGPUAllreduceTest : public SocketTest {};\n\nclass Worker : public NCCLWorkerForTest {\n public:\n  using NCCLWorkerForTest::NCCLWorkerForTest;\n\n  bool SkipIfOld() {\n    auto nccl = dynamic_cast<NCCLComm const*>(nccl_comm_.get());\n    std::int32_t major = 0, minor = 0, patch = 0;\n    SafeColl(nccl->Stub()->GetVersion(&major, &minor, &patch));\n    CHECK_GE(major, 2);\n    bool too_old = minor < 23;\n    if (too_old) {\n      LOG(INFO) << \"NCCL compile version:\" << NCCL_VERSION_CODE << \" runtime version:\" << major\n                << \".\" << minor << \".\" << patch;\n    }\n    return too_old;\n  }\n\n  void BitOr() {\n    dh::device_vector<std::uint32_t> data(comm_.World(), 0);\n    data[comm_.Rank()] = ~std::uint32_t{0};\n    auto rc = nccl_coll_->Allreduce(*nccl_comm_, common::EraseType(dh::ToSpan(data)),\n                                    ArrayInterfaceHandler::kU4, Op::kBitwiseOR);\n    SafeColl(rc);\n    thrust::host_vector<std::uint32_t> h_data(data.size());\n    thrust::copy(data.cbegin(), data.cend(), h_data.begin());\n    for (auto v : h_data) {\n      ASSERT_EQ(v, ~std::uint32_t{0});\n    }\n  }\n\n  void Acc() {\n    dh::device_vector<double> data(314, 1.5);\n    auto rc = nccl_coll_->Allreduce(*nccl_comm_, common::EraseType(dh::ToSpan(data)),\n                                    ArrayInterfaceHandler::kF8, Op::kSum);\n    SafeColl(rc);\n    for (std::size_t i = 0; i < data.size(); ++i) {\n      auto v = data[i];\n      ASSERT_EQ(v, 1.5 * static_cast<double>(comm_.World())) << i;\n    }\n  }\n\n  Result NoCheck() {\n    dh::device_vector<double> data(314, 1.5);\n    auto rc = nccl_coll_->Allreduce(*nccl_comm_, common::EraseType(dh::ToSpan(data)),\n                                    ArrayInterfaceHandler::kF8, Op::kSum);\n    return rc;\n  }\n\n  ~Worker() noexcept(false) override = default;\n};\n}  // namespace\n\nTEST_F(MGPUAllreduceTest, BitOr) {\n  auto n_workers = curt::AllVisibleGPUs();\n  TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,\n                                 std::int32_t r) {\n    Worker w{host, port, timeout, n_workers, r};\n    w.Setup();\n    w.BitOr();\n  });\n}\n\nTEST_F(MGPUAllreduceTest, Sum) {\n  auto n_workers = curt::AllVisibleGPUs();\n  TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,\n                                 std::int32_t r) {\n    Worker w{host, port, timeout, n_workers, r};\n    w.Setup();\n    w.Acc();\n  });\n}\n\nTEST_F(MGPUAllreduceTest, Timeout) {\n  auto n_workers = curt::AllVisibleGPUs();\n  if (n_workers <= 1) {\n    GTEST_SKIP_(\"Requires more than one GPU to run.\");\n  }\n  using std::chrono_literals::operator\"\"s;\n\n  TestDistributed(\n      n_workers,\n      [=](std::string host, std::int32_t port, std::chrono::seconds, std::int32_t r) {\n        auto w = std::make_unique<Worker>(host, port, 1s, n_workers, r);\n        w->Setup();\n        if (w->SkipIfOld()) {\n          GTEST_SKIP_(\"nccl is too old.\");\n          return;\n        }\n        // 1s for worker timeout, sleeping for 2s should trigger a timeout error.\n        if (r == 0) {\n          std::this_thread::sleep_for(2s);\n        }\n        auto rc = w->NoCheck();\n        if (r == 1) {\n          auto rep = rc.Report();\n          ASSERT_NE(rep.find(\"NCCL timeout:\"), std::string::npos) << rep;\n        }\n\n        w.reset();\n      },\n      // We use 8s for the tracker to make sure shutdown is successful.\n      8s);\n\n  TestDistributed(\n      n_workers,\n      [=](std::string host, std::int32_t port, std::chrono::seconds, std::int32_t r) {\n        auto w = std::make_unique<Worker>(host, port, 1s, n_workers, r);\n        w->Setup();\n        if (w->SkipIfOld()) {\n          GTEST_SKIP_(\"nccl is too old.\");\n          return;\n        }\n        // Only one of the workers is doing allreduce.\n        if (r == 0) {\n          auto rc = w->NoCheck();\n          ASSERT_NE(rc.Report().find(\"NCCL timeout:\"), std::string::npos) << rc.Report();\n        }\n\n        w.reset();\n      },\n      // We use 8s for the tracker to make sure shutdown is successful.\n      8s);\n}\n}  // namespace xgboost::collective\n#endif  // defined(XGBOOST_USE_NCCL)\n"
  },
  {
    "path": "tests/cpp/collective/test_broadcast.cc",
    "content": "/**\n * Copyright 2023-2024, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/collective/socket.h>\n\n#include <cstdint>  // for int32_t\n#include <string>   // for string\n#include <thread>   // for thread\n#include <vector>   // for vector\n\n#include \"../../../src/collective/broadcast.h\"  // for Broadcast\n#include \"test_worker.h\"                        // for WorkerForTest, TestDistributed\n\nnamespace xgboost::collective {\nnamespace {\nclass Worker : public WorkerForTest {\n public:\n  using WorkerForTest::WorkerForTest;\n\n  void Run() {\n    for (std::int32_t r = 0; r < comm_.World(); ++r) {\n      // basic test\n      std::vector<std::int32_t> data(1, comm_.Rank());\n      auto rc = Broadcast(this->comm_, common::Span{data.data(), data.size()}, r);\n      SafeColl(rc);\n      ASSERT_EQ(data[0], r);\n    }\n\n    for (std::int32_t r = 0; r < comm_.World(); ++r) {\n      std::vector<std::int32_t> data(1 << 16, comm_.Rank());\n      auto rc = Broadcast(this->comm_, common::Span{data.data(), data.size()}, r);\n      SafeColl(rc);\n      ASSERT_EQ(data[0], r);\n    }\n  }\n};\n\nclass BroadcastTest : public SocketTest {};\n}  // namespace\n\nTEST_F(BroadcastTest, Basic) {\n  std::int32_t n_workers = std::min(2u, std::thread::hardware_concurrency());\n  TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,\n                                 std::int32_t r) {\n    Worker worker{host, port, timeout, n_workers, r};\n    worker.Run();\n  });\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "tests/cpp/collective/test_coll_c_api.cc",
    "content": "/**\n * Copyright 2023, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/c_api.h>\n\n#include <chrono>  // for \"\"s\n#include <thread>  // for thread\n\n#include \"../../../src/collective/allgather.h\"  // for RingAllgather\n#include \"../../../src/collective/tracker.h\"\n#include \"test_worker.h\"   // for SocketTest\n#include \"xgboost/json.h\"  // for Json\n\nnamespace xgboost::collective {\nnamespace {\nclass TrackerAPITest : public SocketTest {};\n}  // namespace\n\nTEST_F(TrackerAPITest, CAPI) {\n  TrackerHandle handle;\n  Json config{Object{}};\n  std::int32_t n_workers{2};\n  config[\"dmlc_communicator\"] = String{\"rabit\"};\n  config[\"n_workers\"] = n_workers;\n  config[\"timeout\"] = 1;\n  auto config_str = Json::Dump(config);\n  auto rc = XGTrackerCreate(config_str.c_str(), &handle);\n  ASSERT_EQ(rc, 0);\n  rc = XGTrackerRun(handle, nullptr);\n  ASSERT_EQ(rc, 0);\n\n  std::thread bg_wait{[&] {\n    Json config{Object{}};\n    auto config_str = Json::Dump(config);\n    auto rc = XGTrackerWaitFor(handle, config_str.c_str());\n    ASSERT_EQ(rc, 0);\n  }};\n\n  char const* cargs;\n  rc = XGTrackerWorkerArgs(handle, &cargs);\n  ASSERT_EQ(rc, 0);\n  auto args = Json::Load(StringView{cargs});\n\n  std::string host;\n  SafeColl(GetHostAddress(&host));\n  ASSERT_EQ(host, get<String const>(args[\"dmlc_tracker_uri\"]));\n  auto port = get<Integer const>(args[\"dmlc_tracker_port\"]);\n  ASSERT_NE(port, 0);\n\n  std::vector<std::thread> workers;\n  using std::chrono_literals::operator\"\"s;\n  for (std::int32_t r = 0; r < n_workers; ++r) {\n    workers.emplace_back([=] {\n      WorkerForTest w{host, static_cast<std::int32_t>(port), 8s, n_workers, r};\n      // basic test\n      std::vector<std::int32_t> data(w.Comm().World(), 0);\n      data[w.Comm().Rank()] = w.Comm().Rank();\n\n      auto rc = RingAllgather(w.Comm(), common::Span{data.data(), data.size()});\n      SafeColl(rc);\n\n      for (std::int32_t r = 0; r < w.Comm().World(); ++r) {\n        ASSERT_EQ(data[r], r);\n      }\n    });\n  }\n  for (auto& w : workers) {\n    w.join();\n  }\n\n  rc = XGTrackerFree(handle);\n  ASSERT_EQ(rc, 0);\n\n  bg_wait.join();\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "tests/cpp/collective/test_comm.cc",
    "content": "/**\n * Copyright 2023-2024, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n\n#include \"../../../src/collective/comm.h\"\n#include \"../../../src/common/type.h\"  // for EraseType\n#include \"test_worker.h\"               // for TrackerTest\n\nnamespace xgboost::collective {\nnamespace {\nclass CommTest : public TrackerTest {};\n}  // namespace\n\nTEST_F(CommTest, Channel) {\n  auto n_workers = 4;\n  RabitTracker tracker{MakeTrackerConfig(host, n_workers, timeout)};\n  auto fut = tracker.Run();\n\n  std::vector<std::thread> workers;\n  std::int32_t port = tracker.Port();\n\n  for (std::int32_t i = 0; i < n_workers; ++i) {\n    workers.emplace_back([=] {\n      WorkerForTest worker{host, port, timeout, n_workers, i};\n      if (i % 2 == 0) {\n        auto p_chan = worker.Comm().Chan(i + 1);\n        auto rc = Success() << [&] {\n          return p_chan->SendAll(\n              EraseType(common::Span<std::int32_t const>{&i, static_cast<std::size_t>(1)}));\n        } << [&] { return p_chan->Block(); };\n        SafeColl(rc);\n      } else {\n        auto p_chan = worker.Comm().Chan(i - 1);\n        std::int32_t r{-1};\n        auto rc = Success() << [&] {\n          return p_chan->RecvAll(\n              EraseType(common::Span<std::int32_t>{&r, static_cast<std::size_t>(1)}));\n        } << [&] { return p_chan->Block(); };\n        SafeColl(rc);\n        ASSERT_EQ(r, i - 1);\n      }\n    });\n  }\n\n  for (auto &w : workers) {\n    w.join();\n  }\n\n  SafeColl(fut.get());\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "tests/cpp/collective/test_comm_group.cc",
    "content": "/**\n * Copyright 2023, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/json.h>  // for Json\n\n#include <chrono>   // for seconds\n#include <cstdint>  // for int32_t\n#include <string>   // for string\n#include <thread>   // for thread\n\n#include \"../../../src/collective/comm.h\"\n#include \"../../../src/collective/comm_group.h\"\n#include \"../../../src/common/common.h\"  // for AllVisibleGPUs\n#include \"../helpers.h\"                  // for MakeCUDACtx\n#include \"test_worker.h\"                 // for TestDistributed\n\nnamespace xgboost::collective {\nnamespace {\nclass CommGroupTest : public SocketTest {};\n}  // namespace\n\nTEST_F(CommGroupTest, Basic) {\n  std::int32_t n_workers = std::min(std::thread::hardware_concurrency(), 5u);\n  TestDistributed(n_workers, [&](std::string host, std::int32_t port, std::chrono::seconds timeout,\n                                 std::int32_t r) {\n    Context ctx;\n    auto config = MakeDistributedTestConfig(host, port, timeout, r);\n    std::unique_ptr<CommGroup> ptr{CommGroup::Create(config)};\n    ASSERT_TRUE(ptr->IsDistributed());\n    ASSERT_EQ(ptr->World(), n_workers);\n    auto const& comm = ptr->Ctx(&ctx, DeviceOrd::CPU());\n    ASSERT_EQ(comm.TaskID(), std::to_string(r));\n    ASSERT_EQ(comm.Retry(), 2);\n  });\n}\n\n#if defined(XGBOOST_USE_NCCL)\nTEST_F(CommGroupTest, BasicMGPU) {\n  std::int32_t n_workers = curt::AllVisibleGPUs();\n  TestDistributed(n_workers, [&](std::string host, std::int32_t port, std::chrono::seconds timeout,\n                                 std::int32_t r) {\n    auto ctx = MakeCUDACtx(r);\n    auto config = MakeDistributedTestConfig(host, port, timeout, r);\n    std::unique_ptr<CommGroup> ptr{CommGroup::Create(config)};\n    auto const& comm = ptr->Ctx(&ctx, ctx.Device());\n    ASSERT_EQ(comm.TaskID(), std::to_string(r));\n    ASSERT_EQ(comm.Retry(), 2);\n  });\n}\n#endif  // for defined(XGBOOST_USE_NCCL)\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "tests/cpp/collective/test_loop.cc",
    "content": "/**\n * Copyright 2023-2024, XGBoost Contributors\n */\n#include <gtest/gtest.h>                // for ASSERT_TRUE, ASSERT_EQ\n#include <xgboost/collective/socket.h>  // for TCPSocket, Connect, SocketFinalize, SocketStartup\n#include <xgboost/string_view.h>        // for StringView\n\n#include <chrono>        // for seconds\n#include <cstdint>       // for int8_t\n#include <memory>        // for make_shared, shared_ptr\n#include <system_error>  // for make_error_code, errc\n#include <utility>       // for pair\n#include <vector>        // for vector\n\n#include \"../../../src/collective/loop.h\"  // for Loop\n\nnamespace xgboost::collective {\nnamespace {\nclass LoopTest : public ::testing::Test {\n protected:\n  std::pair<TCPSocket, TCPSocket> pair_;\n  std::shared_ptr<Loop> loop_;\n\n protected:\n  void SetUp() override {\n    system::SocketStartup();\n    std::chrono::seconds timeout{1};\n\n    auto domain = SockDomain::kV4;\n    pair_.first = TCPSocket::Create(domain);\n    std::int32_t port{0};\n    auto rc = Success() << [&] {\n      return pair_.first.BindHost(&port);\n    } << [&] {\n      return pair_.first.Listen();\n    };\n    SafeColl(rc);\n\n    auto const& addr = SockAddrV4::Loopback().Addr();\n    rc = Connect(StringView{addr}, port, 1, timeout, &pair_.second);\n    SafeColl(rc);\n    rc = pair_.second.NonBlocking(true);\n    SafeColl(rc);\n\n    pair_.first = pair_.first.Accept();\n    rc = pair_.first.NonBlocking(true);\n    SafeColl(rc);\n\n    loop_ = std::shared_ptr<Loop>{new Loop{timeout}};\n  }\n\n  void TearDown() override {\n    pair_ = decltype(pair_){};\n    system::SocketFinalize();\n  }\n};\n}  // namespace\n\nTEST_F(LoopTest, Timeout) {\n  std::vector<std::int8_t> data(1);\n  Loop::Op op{Loop::Op::kRead, 0, data.data(), data.size(), &pair_.second, 0};\n  loop_->Submit(std::move(op));\n  auto rc = loop_->Block();\n  ASSERT_FALSE(rc.OK());\n  ASSERT_EQ(rc.Code(), std::make_error_code(std::errc::timed_out)) << rc.Report();\n}\n\nTEST_F(LoopTest, Op) {\n  TCPSocket& send = pair_.first;\n  TCPSocket& recv = pair_.second;\n\n  std::vector<std::int8_t> wbuf(1, 1);\n  std::vector<std::int8_t> rbuf(1, 0);\n\n  Loop::Op wop{Loop::Op::kWrite, 0, wbuf.data(), wbuf.size(), &send, 0};\n  Loop::Op rop{Loop::Op::kRead, 0, rbuf.data(), rbuf.size(), &recv, 0};\n\n  loop_->Submit(std::move(wop));\n  loop_->Submit(std::move(rop));\n\n  auto rc = loop_->Block();\n  SafeColl(rc);\n\n  ASSERT_EQ(rbuf[0], wbuf[0]);\n}\n\nTEST_F(LoopTest, Block) {\n  // We need to ensure that a blocking call doesn't go unanswered.\n  auto op = Loop::Op::Sleep(2);\n\n  common::Timer t;\n  t.Start();\n  loop_->Submit(std::move(op));\n  t.Stop();\n  // submit is non-blocking\n  ASSERT_LT(t.ElapsedSeconds(), 1);\n\n  t.Start();\n  auto rc = loop_->Block();\n  t.Stop();\n  SafeColl(rc);\n  ASSERT_GE(t.ElapsedSeconds(), 1);\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "tests/cpp/collective/test_result.cc",
    "content": "/**\n *  Copyright 2024, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/collective/result.h>\n\nnamespace xgboost::collective {\nTEST(Result, Concat) {\n  auto rc0 = Fail(\"foo\");\n  auto rc1 = Fail(\"bar\");\n  auto rc = std::move(rc0) + std::move(rc1);\n  ASSERT_NE(rc.Report().find(\"foo\"), std::string::npos);\n  ASSERT_NE(rc.Report().find(\"bar\"), std::string::npos);\n\n  auto rc2 = Fail(\"Another\", std::move(rc));\n  auto assert_that = [](Result const& rc) {\n    ASSERT_NE(rc.Report().find(\"Another\"), std::string::npos);\n    ASSERT_NE(rc.Report().find(\"foo\"), std::string::npos);\n    ASSERT_NE(rc.Report().find(\"bar\"), std::string::npos);\n  };\n  assert_that(rc2);\n\n  auto empty = Success();\n  auto rc3 = std::move(empty) + std::move(rc2);\n  assert_that(rc3);\n\n  empty = Success();\n  auto rc4 = std::move(rc3) + std::move(empty);\n  assert_that(rc4);\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "tests/cpp/collective/test_socket.cc",
    "content": "/**\n * Copyright 2022-2024, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/collective/socket.h>\n\n#include <cerrno>        // EADDRNOTAVAIL\n#include <system_error>  // std::error_code, std::system_category\n\n#include \"test_worker.h\"  // for SocketTest\n\nnamespace xgboost::collective {\nTEST_F(SocketTest, Basic) {\n  SockAddress addr{SockAddrV6::Loopback()};\n  ASSERT_TRUE(addr.IsV6());\n  addr = SockAddress{SockAddrV4::Loopback()};\n  ASSERT_TRUE(addr.IsV4());\n\n  std::string msg{\"Skipping IPv6 test\"};\n\n  auto run_test = [msg](SockDomain domain) {\n    auto server = TCPSocket::Create(domain);\n    ASSERT_EQ(server.Domain(), domain);\n    std::int32_t port{0};\n    auto rc = Success() << [&] {\n      return server.BindHost(&port);\n    } << [&] {\n      return server.Listen();\n    };\n    SafeColl(rc);\n\n    TCPSocket client;\n    if (domain == SockDomain::kV4) {\n      auto const& addr = SockAddrV4::Loopback().Addr();\n      auto rc = Connect(StringView{addr}, port, 1, std::chrono::seconds{3}, &client);\n      SafeColl(rc);\n    } else {\n      auto const& addr = SockAddrV6::Loopback().Addr();\n      auto rc = Connect(StringView{addr}, port, 1, std::chrono::seconds{3}, &client);\n      // some environment (docker) has restricted network configuration.\n      if (!rc.OK() && rc.Code() == std::error_code{EADDRNOTAVAIL, std::system_category()}) {\n        GTEST_SKIP_(msg.c_str());\n      }\n      ASSERT_EQ(rc, Success()) << rc.Report();\n    }\n    ASSERT_EQ(client.Domain(), domain);\n\n    auto accepted = server.Accept();\n    StringView msg{\"Hello world.\"};\n    accepted.Send(msg);\n\n    std::string str;\n    rc = client.Recv(&str);\n    SafeColl(rc);\n    ASSERT_EQ(StringView{str}, msg);\n  };\n\n  run_test(SockDomain::kV4);\n\n  if (SkipTest()) {\n    GTEST_SKIP_(skip_msg_.c_str());\n  }\n  run_test(SockDomain::kV6);\n}\n\nTEST_F(SocketTest, Bind) {\n  auto run = [](SockDomain domain) {\n    auto any =\n        domain == SockDomain::kV4 ? SockAddrV4::InaddrAny().Addr() : SockAddrV6::InaddrAny().Addr();\n    auto sock = TCPSocket::Create(domain);\n    std::int32_t port{0};\n    auto rc = sock.Bind(any, &port);\n    SafeColl(rc);\n    ASSERT_NE(port, 0);\n  };\n\n  run(SockDomain::kV4);\n  if (SkipTest()) {\n    GTEST_SKIP_(skip_msg_.c_str());\n  }\n  run(SockDomain::kV6);\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "tests/cpp/collective/test_tracker.cc",
    "content": "/**\n * Copyright 2023-2024, XGBoost Contributors\n */\n#include <gmock/gmock.h>\n#include <gtest/gtest.h>\n\n#include <chrono>   // for seconds\n#include <cstdint>  // for int32_t\n#include <string>   // for string\n#include <thread>   // for thread\n#include <vector>   // for vector\n\n#include \"../../../src/collective/comm.h\"\n#include \"../helpers.h\"  // for GMockThrow\n#include \"test_worker.h\"\n\nnamespace xgboost::collective {\nnamespace {\nclass PrintWorker : public WorkerForTest {\n public:\n  using WorkerForTest::WorkerForTest;\n\n  void Print() {\n    auto rc = comm_.LogTracker(\"ack:\" + std::to_string(this->comm_.Rank()));\n    SafeColl(rc);\n  }\n};\n}  // namespace\n\nTEST_F(TrackerTest, Bootstrap) {\n  RabitTracker tracker{MakeTrackerConfig(host, n_workers, timeout)};\n  ASSERT_TRUE(HasTimeout(tracker.Timeout()));\n  ASSERT_FALSE(tracker.Ready());\n  auto fut = tracker.Run();\n\n  std::vector<std::thread> workers;\n\n  auto args = tracker.WorkerArgs();\n  ASSERT_TRUE(tracker.Ready());\n  ASSERT_EQ(get<String const>(args[\"dmlc_tracker_uri\"]), host);\n\n  std::int32_t port = tracker.Port();\n\n  for (std::int32_t i = 0; i < n_workers; ++i) {\n    workers.emplace_back([=] { WorkerForTest worker{host, port, timeout, n_workers, i}; });\n  }\n  for (auto &w : workers) {\n    w.join();\n  }\n  SafeColl(fut.get());\n\n  ASSERT_FALSE(HasTimeout(std::chrono::seconds{-1}));\n  ASSERT_FALSE(HasTimeout(std::chrono::seconds{0}));\n}\n\nTEST_F(TrackerTest, Print) {\n  RabitTracker tracker{MakeTrackerConfig(host, n_workers, timeout)};\n  auto fut = tracker.Run();\n\n  std::vector<std::thread> workers;\n  auto rc = tracker.WaitUntilReady();\n  SafeColl(rc);\n\n  std::int32_t port = tracker.Port();\n\n  for (std::int32_t i = 0; i < n_workers; ++i) {\n    workers.emplace_back([=] {\n      PrintWorker worker{host, port, timeout, n_workers, i};\n      worker.Print();\n    });\n  }\n\n  for (auto &w : workers) {\n    w.join();\n  }\n\n  SafeColl(fut.get());\n}\n\nTEST_F(TrackerTest, GetHostAddress) { ASSERT_TRUE(host.find(\"127.\") == std::string::npos); }\n\n/**\n * Test connecting the tracker after it has finished. This should not hang the workers.\n */\nTEST_F(TrackerTest, AfterShutdown) {\n  RabitTracker tracker{MakeTrackerConfig(host, n_workers, timeout)};\n  auto fut = tracker.Run();\n\n  std::vector<std::thread> workers;\n  auto rc = tracker.WaitUntilReady();\n  SafeColl(rc);\n\n  std::int32_t port = tracker.Port();\n\n  // Launch no-op workers to cause the tracker to shutdown.\n  for (std::int32_t i = 0; i < n_workers; ++i) {\n    workers.emplace_back([=] { WorkerForTest worker{host, port, timeout, n_workers, i}; });\n  }\n\n  for (auto &w : workers) {\n    w.join();\n  }\n\n  SafeColl(fut.get());\n\n  // Launch workers again, they should fail.\n  workers.clear();\n  for (std::int32_t i = 0; i < n_workers; ++i) {\n    auto assert_that = [=] {\n      WorkerForTest worker{host, port, timeout, n_workers, i};\n    };\n    // On a Linux platform, the connection will be refused, on Apple platform, this gets\n    // an operation now in progress poll failure, on Windows, it's a timeout error.\n#if defined(__linux__)\n    workers.emplace_back([=] { ASSERT_THAT(assert_that, GMockThrow(\"Connection refused\")); });\n#else\n    workers.emplace_back([=] { ASSERT_THAT(assert_that, GMockThrow(\"Failed to connect to\")); });\n#endif\n  }\n  for (auto &w : workers) {\n    w.join();\n  }\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "tests/cpp/collective/test_worker.cuh",
    "content": "/**\n * Copyright 2023, XGBoost Contributors\n */\n#pragma once\n#include <memory>  // for shared_ptr\n\n#include \"../../../src/collective/coll.h\"  // for Coll\n#include \"../../../src/collective/comm.h\"  // for Comm\n#include \"test_worker.h\"\n#include \"xgboost/context.h\"  // for Context\n\nnamespace xgboost::collective {\nclass NCCLWorkerForTest : public WorkerForTest {\n protected:\n  std::shared_ptr<Coll> coll_;\n  std::shared_ptr<xgboost::collective::Comm> nccl_comm_;\n  std::shared_ptr<Coll> nccl_coll_;\n  Context ctx_;\n\n public:\n  using WorkerForTest::WorkerForTest;\n\n  void Setup() {\n    ctx_ = MakeCUDACtx(comm_.Rank());\n    coll_.reset(new Coll{});\n    nccl_comm_.reset(this->comm_.MakeCUDAVar(&ctx_, coll_));\n    nccl_coll_.reset(coll_->MakeCUDAVar());\n    ASSERT_EQ(comm_.World(), nccl_comm_->World());\n    ASSERT_EQ(comm_.Rank(), nccl_comm_->Rank());\n  }\n};\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "tests/cpp/collective/test_worker.h",
    "content": "/**\n * Copyright 2023-2026, XGBoost Contributors\n */\n#pragma once\n#include <gtest/gtest.h>\n#include <xgboost/global_config.h>  // for InitNewThread\n\n#include <algorithm>  // for max\n#include <chrono>     // for seconds\n#include <cstdint>    // for int32_t\n#include <fstream>    // for ifstream\n#include <string>     // for string\n#include <thread>     // for thread\n#include <utility>    // for move\n#include <vector>     // for vector\n\n#include \"../../../src/collective/comm.h\"              // for RabitComm\n#include \"../../../src/collective/communicator-inl.h\"  // for Init, Finalize\n#include \"../../../src/collective/tracker.h\"           // for GetHostAddress\n#include \"../../../src/common/cuda_rt_utils.h\"         // for AllVisibleGPUs\n#include \"../../../src/common/threading_utils.h\"       // for NameThread\n#include \"../helpers.h\"                                // for FileExists\n\n#if defined(XGBOOST_USE_FEDERATED)\n#include \"../plugin/federated/test_worker.h\"\n#endif  // defined(XGBOOST_USE_FEDERATED)\n\nnamespace xgboost::collective {\nclass WorkerForTest {\n  std::string tracker_host_;\n  std::int32_t tracker_port_;\n  std::int32_t world_size_;\n\n protected:\n  std::int32_t retry_{1};\n  std::string task_id_;\n  RabitComm comm_;\n\n public:\n  WorkerForTest(std::string host, std::int32_t port, std::chrono::seconds timeout,\n                std::int32_t world, std::int32_t rank)\n      : tracker_host_{std::move(host)},\n        tracker_port_{port},\n        world_size_{world},\n        task_id_{\"t:\" + std::to_string(rank)},\n        comm_{tracker_host_, tracker_port_, timeout, retry_, task_id_, DefaultNcclName(), 0} {\n    CHECK_EQ(world_size_, comm_.World());\n  }\n  virtual ~WorkerForTest() noexcept(false) { SafeColl(comm_.Shutdown()); }\n  auto& Comm() { return comm_; }\n\n  void LimitSockBuf(std::int32_t n_bytes) {\n    for (std::int32_t i = 0; i < comm_.World(); ++i) {\n      if (i != comm_.Rank() && comm_.HasChan(i)) {\n        ASSERT_TRUE(comm_.Chan(i)->Socket()->NonBlocking());\n        SafeColl(comm_.Chan(i)->Socket()->SetBufSize(n_bytes));\n        SafeColl(comm_.Chan(i)->Socket()->SetNoDelay());\n      }\n    }\n  }\n};\n\nclass SocketTest : public ::testing::Test {\n protected:\n  std::string skip_msg_{\"Skipping IPv6 test\"};\n\n  bool SkipTest() {\n    std::string path{\"/sys/module/ipv6/parameters/disable\"};\n    if (FileExists(path)) {\n      std::ifstream fin(path);\n      if (!fin) {\n        return true;\n      }\n      std::string s_value;\n      fin >> s_value;\n      auto value = std::stoi(s_value);\n      if (value != 0) {\n        return true;\n      }\n    } else {\n      return true;\n    }\n    return false;\n  }\n\n protected:\n  void SetUp() override { system::SocketStartup(); }\n  void TearDown() override { system::SocketFinalize(); }\n};\n\nclass TrackerTest : public SocketTest {\n public:\n  std::int32_t n_workers{2};\n  std::chrono::seconds timeout{1};\n  std::string host;\n\n  void SetUp() override {\n    SocketTest::SetUp();\n    auto rc = GetHostAddress(&host);\n    SafeColl(rc);\n  }\n};\n\ninline Json MakeTrackerConfig(std::string host, std::int32_t n_workers,\n                              std::chrono::seconds timeout) {\n  Json config{Object{}};\n  config[\"host\"] = host;\n  config[\"port\"] = Integer{0};\n  config[\"n_workers\"] = Integer{n_workers};\n  config[\"sortby\"] = Integer{static_cast<std::int32_t>(Tracker::SortBy::kHost)};\n  config[\"timeout\"] = static_cast<std::int64_t>(timeout.count());\n  return config;\n}\n\ntemplate <typename WorkerFn>\nvoid TestDistributed(std::int32_t n_workers, WorkerFn worker_fn,\n                     std::chrono::seconds timeout = std::chrono::seconds{3}) {\n  std::string host;\n  auto rc = GetHostAddress(&host);\n  SafeColl(rc);\n  LOG(INFO) << \"Using \" << n_workers << \" workers for test.\";\n  RabitTracker tracker{MakeTrackerConfig(host, n_workers, timeout)};\n  auto fut = tracker.Run();\n\n  std::vector<std::thread> workers;\n  std::int32_t port = tracker.Port();\n\n  for (std::int32_t i = 0; i < n_workers; ++i) {\n    workers.emplace_back([=, init = InitNewThread{}] {\n      init();\n      worker_fn(host, port, timeout, i);\n    });\n  }\n\n  for (auto& t : workers) {\n    t.join();\n  }\n\n  SafeColl(fut.get());\n}\n\ninline auto MakeDistributedTestConfig(std::string host, std::int32_t port,\n                                      std::chrono::seconds timeout, std::int32_t r) {\n  Json config{Object{}};\n  config[\"dmlc_communicator\"] = std::string{\"rabit\"};\n  config[\"dmlc_tracker_uri\"] = host;\n  config[\"dmlc_tracker_port\"] = port;\n  config[\"dmlc_timeout\"] = static_cast<std::int64_t>(timeout.count());\n  config[\"dmlc_task_id\"] = std::to_string(r);\n  config[\"dmlc_retry\"] = 2;\n  return config;\n}\n\ntemplate <typename WorkerFn>\nvoid TestDistributedGlobal(std::int32_t n_workers, WorkerFn worker_fn, bool need_finalize = true,\n                           std::chrono::seconds test_timeout = std::chrono::seconds{30}) {\n  system::SocketStartup();\n  std::chrono::seconds poll_timeout{5};\n\n  std::string host;\n  auto rc = GetHostAddress(&host);\n  SafeColl(rc);\n\n  RabitTracker tracker{MakeTrackerConfig(host, n_workers, poll_timeout)};\n  auto fut = tracker.Run();\n\n  std::vector<std::thread> workers;\n  std::int32_t port = tracker.Port();\n\n  for (std::int32_t i = 0; i < n_workers; ++i) {\n    workers.emplace_back([=, init = InitNewThread{}] {\n      init();\n      auto fut = std::async(std::launch::async, [=] {\n        init();\n        auto config = MakeDistributedTestConfig(host, port, poll_timeout, i);\n        Init(config);\n        worker_fn();\n        if (need_finalize) {\n          Finalize();\n        }\n      });\n      auto status = fut.wait_for(test_timeout);\n      CHECK(status == std::future_status::ready) << \"Test timeout\";\n      fut.get();\n    });\n\n    std::string name = \"tw-\" + std::to_string(i);\n    common::NameThread(&workers.back(), name.c_str());\n  }\n\n  for (auto& t : workers) {\n    t.join();\n  }\n\n  SafeColl(fut.get());\n  system::SocketFinalize();\n}\n\n[[nodiscard]] inline std::int32_t GetWorkerLocalThreads(std::int32_t n_workers) {\n  std::int32_t n_total_threads = std::thread::hardware_concurrency();\n  auto n_threads = std::max(n_total_threads / n_workers, 1);\n  return n_threads;\n}\n\ninline void GetWorkerLocalThreads(std::int32_t n_workers, Context* ctx) {\n  auto n_threads = GetWorkerLocalThreads(n_workers);\n  ctx->UpdateAllowUnknown(\n      Args{{\"nthread\", std::to_string(n_threads)}, {\"device\", ctx->DeviceName()}});\n}\n\nclass BaseMGPUTest : public ::testing::Test {\n public:\n  /**\n   * @param emulate_if_single Emulate multi-GPU for federated test if there's only one GPU\n   *                          available.\n   */\n  template <typename Fn>\n  auto DoTest([[maybe_unused]] Fn&& fn, bool is_federated,\n              [[maybe_unused]] bool emulate_if_single = false) const {\n    auto n_gpus = curt::AllVisibleGPUs();\n    if (is_federated) {\n#if defined(XGBOOST_USE_FEDERATED)\n      if (n_gpus == 1 && emulate_if_single) {\n        // Emulate multiple GPUs.\n        // We don't use nccl and can have multiple communicators running on the same device.\n        n_gpus = 3;\n      }\n      TestFederatedGlobal(n_gpus, fn);\n#else\n      GTEST_SKIP_(\"Not compiled with federated learning.\");\n#endif  // defined(XGBOOST_USE_FEDERATED)\n    } else {\n#if defined(XGBOOST_USE_NCCL)\n      TestDistributedGlobal(n_gpus, fn);\n#else\n      GTEST_SKIP_(\"Not compiled with NCCL.\");\n#endif  // defined(XGBOOST_USE_NCCL)\n    }\n  }\n};\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "tests/cpp/common/test_algorithm.cc",
    "content": "/**\n * Copyright 2020-2023 by XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/context.h>  // Context\n#include <xgboost/span.h>\n\n#include <algorithm>  // is_sorted\n\n#include \"../../../src/common/algorithm.h\"\n\nnamespace xgboost {\nnamespace common {\nTEST(Algorithm, ArgSort) {\n  Context ctx;\n  std::vector<float> inputs{3.0, 2.0, 1.0};\n  auto ret = ArgSort<bst_feature_t>(&ctx, inputs.cbegin(), inputs.cend());\n  std::vector<bst_feature_t> sol{2, 1, 0};\n  ASSERT_EQ(ret, sol);\n}\n\nTEST(Algorithm, Sort) {\n  Context ctx;\n  ctx.Init(Args{{\"nthread\", \"8\"}});\n  std::vector<float> inputs{3.0, 1.0, 2.0};\n\n  Sort(&ctx, inputs.begin(), inputs.end(), std::less<>{});\n  ASSERT_TRUE(std::is_sorted(inputs.cbegin(), inputs.cend()));\n\n  inputs = {3.0, 1.0, 2.0};\n  StableSort(&ctx, inputs.begin(), inputs.end(), std::less<>{});\n  ASSERT_TRUE(std::is_sorted(inputs.cbegin(), inputs.cend()));\n}\n}  // namespace common\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/common/test_algorithm.cu",
    "content": "/**\n * Copyright 2023-2025, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <thrust/copy.h>      // copy\n#include <thrust/sequence.h>  // sequence\n#include <thrust/sort.h>      // is_sorted\n\n#include <algorithm>          // is_sorted\n#include <cstddef>            // size_t\n\n#include \"../../../src/common/algorithm.cuh\"\n#include \"../../../src/common/device_helpers.cuh\"\n#include \"../helpers.h\"  // MakeCUDACtx\n\nnamespace xgboost::common {\nvoid TestSegmentedArgSort() {\n  auto ctx = MakeCUDACtx(0);\n\n  size_t constexpr kElements = 100, kGroups = 3;\n  dh::device_vector<size_t> sorted_idx(kElements, 0);\n  dh::device_vector<size_t> offset_ptr(kGroups + 1, 0);\n  offset_ptr[0] = 0;\n  offset_ptr[1] = 2;\n  offset_ptr[2] = 78;\n  offset_ptr[kGroups] = kElements;\n  auto d_offset_ptr = dh::ToSpan(offset_ptr);\n\n  auto d_sorted_idx = dh::ToSpan(sorted_idx);\n  dh::LaunchN(sorted_idx.size(), [=] XGBOOST_DEVICE(size_t idx) {\n    auto group = dh::SegmentId(d_offset_ptr, idx);\n    d_sorted_idx[idx] = idx - d_offset_ptr[group];\n  });\n\n  dh::device_vector<float> values(kElements, 0.0f);\n  thrust::sequence(values.begin(), values.end(), 0.0f);\n  SegmentedArgSort<false, true>(&ctx, dh::ToSpan(values), d_offset_ptr, d_sorted_idx);\n\n  std::vector<size_t> h_sorted_index(sorted_idx.size());\n  thrust::copy(sorted_idx.begin(), sorted_idx.end(), h_sorted_index.begin());\n\n  for (size_t i = 1; i < kGroups + 1; ++i) {\n    auto group_sorted_idx =\n        Span<size_t>(h_sorted_index).subspan(offset_ptr[i - 1], offset_ptr[i] - offset_ptr[i - 1]);\n    ASSERT_TRUE(std::is_sorted(group_sorted_idx.begin(), group_sorted_idx.end(), std::greater<>{}));\n    ASSERT_EQ(group_sorted_idx.back(), 0);\n    for (auto j : group_sorted_idx) {\n      ASSERT_LT(j, group_sorted_idx.size());\n    }\n  }\n}\n\nTEST(Algorithm, SegmentedArgSort) { TestSegmentedArgSort(); }\n\nTEST(Algorithm, GpuArgSort) {\n  auto ctx = MakeCUDACtx(0);\n\n  dh::device_vector<float> values(20);\n  dh::Iota(dh::ToSpan(values), ctx.CUDACtx()->Stream());  // accending\n  dh::device_vector<size_t> sorted_idx(20);\n  ArgSort<false>(&ctx, dh::ToSpan(values), dh::ToSpan(sorted_idx));  // sort to descending\n  ASSERT_TRUE(thrust::is_sorted(ctx.CUDACtx()->CTP(), sorted_idx.begin(), sorted_idx.end(),\n                                thrust::greater<size_t>{}));\n\n  dh::Iota(dh::ToSpan(values), ctx.CUDACtx()->Stream());\n  dh::device_vector<size_t> groups(3);\n  groups[0] = 0;\n  groups[1] = 10;\n  groups[2] = 20;\n  SegmentedArgSort<false, false>(&ctx, dh::ToSpan(values), dh::ToSpan(groups),\n                                 dh::ToSpan(sorted_idx));\n  ASSERT_FALSE(thrust::is_sorted(thrust::device, sorted_idx.begin(), sorted_idx.end(),\n                                 thrust::greater<size_t>{}));\n  ASSERT_TRUE(\n      thrust::is_sorted(sorted_idx.begin(), sorted_idx.begin() + 10, thrust::greater<size_t>{}));\n  ASSERT_TRUE(\n      thrust::is_sorted(sorted_idx.begin() + 10, sorted_idx.end(), thrust::greater<size_t>{}));\n}\n\nTEST(Algorithm, SegmentedSequence) {\n  dh::device_vector<std::size_t> idx(16);\n  dh::device_vector<std::size_t> ptr(3);\n  Context ctx = MakeCUDACtx(0);\n  ptr[0] = 0;\n  ptr[1] = 4;\n  ptr[2] = idx.size();\n  SegmentedSequence(&ctx, dh::ToSpan(ptr), dh::ToSpan(idx));\n  ASSERT_EQ(idx[0], 0);\n  ASSERT_EQ(idx[4], 0);\n  ASSERT_EQ(idx[3], 3);\n  ASSERT_EQ(idx[15], 11);\n}\n\nnamespace {\nvoid TestAllOf(std::size_t n) {\n  auto ctx = MakeCUDACtx(0);\n  dh::device_vector<double> values(n);\n  dh::Iota(dh::ToSpan(values), ctx.CUDACtx()->Stream());\n  EXPECT_TRUE(AllOf(ctx.CUDACtx()->CTP(), values.cbegin(), values.cend(),\n                    [n] XGBOOST_DEVICE(double v) { return v < n; }));\n  if (n == 0) {\n    return;\n  }\n  EXPECT_FALSE(AllOf(ctx.CUDACtx()->CTP(), values.cbegin(), values.cend(),\n                     [n] XGBOOST_DEVICE(double v) { return v < n && v > 0; }));\n}\n}  // namespace\n\nTEST(Algorithm, AllOf) {\n  TestAllOf(0);\n  TestAllOf(1);\n  TestAllOf(2);\n  TestAllOf(4096);\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/common/test_bitfield.cc",
    "content": "/**\n * Copyright 2019-2023, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include \"../../../src/common/bitfield.h\"\n\nnamespace xgboost {\n\nTEST(BitField, Check) {\n  {\n    std::vector<LBitField64::value_type> storage(4, 0);\n    storage[2] = 2;\n    auto bits = LBitField64({storage.data(),\n                static_cast<typename common::Span<LBitField64::value_type>::index_type>(\n                    storage.size())});\n    size_t true_bit = 190;\n    for (size_t i = true_bit + 1; i < bits.Capacity(); ++i) {\n      ASSERT_FALSE(bits.Check(i));\n    }\n    ASSERT_TRUE(bits.Check(true_bit));\n    for (size_t i = 0; i < true_bit; ++i) {\n      ASSERT_FALSE(bits.Check(i));\n    }\n  }\n\n  {\n    std::vector<RBitField8::value_type> storage(4, 0);\n    storage[2] = 1 << 3;\n    auto bits = RBitField8({storage.data(),\n                static_cast<typename common::Span<RBitField8::value_type>::index_type>(\n                    storage.size())});\n    size_t true_bit = 19;\n    for (size_t i = 0; i < true_bit; ++i) {\n      ASSERT_FALSE(bits.Check(i));\n    }\n    ASSERT_TRUE(bits.Check(true_bit));\n    for (size_t i = true_bit + 1; i < bits.Capacity(); ++i) {\n      ASSERT_FALSE(bits.Check(i));\n    }\n  }\n\n  {\n    // regression test for correct index type.\n    std::vector<RBitField8::value_type> storage(33, 0);\n    storage[32] = static_cast<uint8_t>(1);\n    auto bits = RBitField8({storage.data(), storage.size()});\n    ASSERT_TRUE(bits.Check(256));\n  }\n}\n\ntemplate <typename BitFieldT, typename VT = typename BitFieldT::value_type>\nvoid TestBitFieldSet(typename BitFieldT::value_type res, size_t index, size_t true_bit) {\n  using IndexT = typename common::Span<VT>::index_type;\n  std::vector<VT> storage(4, 0);\n  auto bits = BitFieldT({storage.data(), static_cast<IndexT>(storage.size())});\n\n  bits.Set(true_bit);\n\n  for (size_t i = 0; i < true_bit; ++i) {\n    ASSERT_FALSE(bits.Check(i));\n  }\n\n  ASSERT_TRUE(bits.Check(true_bit));\n\n  for (size_t i = true_bit + 1; i < storage.size() * BitFieldT::kValueSize; ++i) {\n    ASSERT_FALSE(bits.Check(i));\n  }\n  ASSERT_EQ(storage[index], res);\n}\n\nTEST(BitField, Set) {\n  {\n    TestBitFieldSet<LBitField64>(2, 2, 190);\n  }\n  {\n    TestBitFieldSet<RBitField8>(1 << 3, 2, 19);\n  }\n}\n\ntemplate <typename BitFieldT, typename VT = typename BitFieldT::value_type>\nvoid TestBitFieldClear(size_t clear_bit) {\n  using IndexT = typename common::Span<VT>::index_type;\n  std::vector<VT> storage(4, 0);\n  auto bits = BitFieldT({storage.data(), static_cast<IndexT>(storage.size())});\n\n  bits.Set(clear_bit);\n  bits.Clear(clear_bit);\n\n  ASSERT_FALSE(bits.Check(clear_bit));\n}\n\nTEST(BitField, Clear) {\n  {\n    TestBitFieldClear<LBitField64>(190);\n  }\n  {\n    TestBitFieldClear<RBitField8>(19);\n  }\n}\n\nTEST(BitField, CTZ) {\n  {\n    auto cnt = TrailingZeroBits(0);\n    ASSERT_EQ(cnt, sizeof(std::uint32_t) * 8);\n  }\n  {\n    auto cnt = TrailingZeroBits(0b00011100);\n    ASSERT_EQ(cnt, 2);\n    cnt = detail::TrailingZeroBitsImpl(0b00011100);\n    ASSERT_EQ(cnt, 2);\n  }\n  {\n    auto cnt = TrailingZeroBits(0b00011101);\n    ASSERT_EQ(cnt, 0);\n    cnt = detail::TrailingZeroBitsImpl(0b00011101);\n    ASSERT_EQ(cnt, 0);\n  }\n  {\n    auto cnt = TrailingZeroBits(0b1000000000000000);\n    ASSERT_EQ(cnt, 15);\n    cnt = detail::TrailingZeroBitsImpl(0b1000000000000000);\n    ASSERT_EQ(cnt, 15);\n  }\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/common/test_bitfield.cu",
    "content": "/**\n * Copyright 2019-2023, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <thrust/copy.h>\n#include <thrust/device_vector.h>\n#include <vector>\n#include \"../../../src/common/bitfield.h\"\n#include \"../../../src/common/device_helpers.cuh\"\n\nnamespace xgboost {\n\n__global__ void TestSetKernel(LBitField64 bits) {\n  auto tid = threadIdx.x + blockIdx.x * blockDim.x;\n  if (tid < bits.Capacity()) {\n    bits.Set(tid);\n  }\n}\n\nTEST(BitField, StorageSize) {\n  size_t constexpr kElements { 16 };\n  size_t size = LBitField64::ComputeStorageSize(kElements);\n  ASSERT_EQ(1, size);\n  size = RBitField8::ComputeStorageSize(4);\n  ASSERT_EQ(1, size);\n  size = RBitField8::ComputeStorageSize(kElements);\n  ASSERT_EQ(2, size);\n}\n\nTEST(BitField, GPUSet) {\n  dh::device_vector<LBitField64::value_type> storage;\n  uint32_t constexpr kBits = 128;\n  storage.resize(128);\n  auto bits = LBitField64(dh::ToSpan(storage));\n  TestSetKernel<<<1, kBits>>>(bits);\n\n  std::vector<LBitField64::value_type> h_storage(storage.size());\n  thrust::copy(storage.begin(), storage.end(), h_storage.begin());\n  LBitField64 outputs{\n      common::Span<LBitField64::value_type>{h_storage.data(), h_storage.data() + h_storage.size()}};\n  for (size_t i = 0; i < kBits; ++i) {\n    ASSERT_TRUE(outputs.Check(i));\n  }\n}\n\nnamespace {\ntemplate <bool is_and, typename Op>\nvoid TestGPULogic(Op op) {\n  uint32_t constexpr kBits = 128;\n  dh::device_vector<LBitField64::value_type> lhs_storage(kBits);\n  dh::device_vector<LBitField64::value_type> rhs_storage(kBits);\n  auto lhs = LBitField64(dh::ToSpan(lhs_storage));\n  auto rhs = LBitField64(dh::ToSpan(rhs_storage));\n  thrust::fill(lhs_storage.begin(), lhs_storage.end(), 0UL);\n  thrust::fill(rhs_storage.begin(), rhs_storage.end(), ~static_cast<LBitField64::value_type>(0UL));\n  dh::LaunchN(kBits, [=] __device__(auto) mutable { op(lhs, rhs); });\n\n  std::vector<LBitField64::value_type> h_storage(lhs_storage.size());\n  thrust::copy(lhs_storage.begin(), lhs_storage.end(), h_storage.begin());\n  LBitField64 outputs{{h_storage.data(), h_storage.data() + h_storage.size()}};\n  if (is_and) {\n    for (size_t i = 0; i < kBits; ++i) {\n      ASSERT_FALSE(outputs.Check(i));\n    }\n  } else {\n    for (size_t i = 0; i < kBits; ++i) {\n      ASSERT_TRUE(outputs.Check(i));\n    }\n  }\n}\n\nvoid TestGPUAnd() {\n  TestGPULogic<true>([] XGBOOST_DEVICE(LBitField64 & lhs, LBitField64 const& rhs) { lhs &= rhs; });\n}\n\nvoid TestGPUOr() {\n  TestGPULogic<false>([] XGBOOST_DEVICE(LBitField64 & lhs, LBitField64 const& rhs) { lhs |= rhs; });\n}\n}  // namespace\n\nTEST(BitField, GPUAnd) { TestGPUAnd(); }\n\nTEST(BitField, GPUOr) { TestGPUOr(); }\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/common/test_categorical.cc",
    "content": "/*!\n * Copyright 2021-2022 by XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/json.h>\n#include <xgboost/learner.h>\n\n#include <limits>\n\n#include \"../../../src/common/categorical.h\"\n#include \"../helpers.h\"\n\nnamespace xgboost {\nnamespace common {\nTEST(Categorical, Decision) {\n  // inf\n  float a = std::numeric_limits<float>::infinity();\n\n  ASSERT_TRUE(common::InvalidCat(a));\n  std::vector<uint32_t> cats(256, 0);\n  ASSERT_TRUE(Decision(cats, a));\n\n  // larger than size\n  a = 256;\n  ASSERT_TRUE(Decision(cats, a));\n\n  // negative\n  a = -1;\n  ASSERT_TRUE(Decision(cats, a));\n\n  CatBitField bits{cats};\n  bits.Set(0);\n  a = -0.5;\n  ASSERT_TRUE(Decision(cats, a));\n\n  // round toward 0\n  a = 0.5;\n  ASSERT_FALSE(Decision(cats, a));\n\n  // valid\n  a = 13;\n  bits.Set(a);\n  ASSERT_FALSE(Decision(bits.Bits(), a));\n}\n\n/**\n * Test for running inference with input category greater than the one stored in tree.\n */\nTEST(Categorical, MinimalSet) {\n  std::size_t constexpr kRows = 256, kCols = 1, kCat = 3;\n  std::vector<FeatureType> types{FeatureType::kCategorical};\n  auto Xy =\n      RandomDataGenerator{kRows, kCols, 0.0}.Type(types).MaxCategory(kCat).GenerateDMatrix(true);\n\n  std::unique_ptr<Learner> learner{Learner::Create({Xy})};\n  learner->SetParam(\"max_depth\", \"1\");\n  learner->SetParam(\"tree_method\", \"hist\");\n  learner->Configure();\n  learner->UpdateOneIter(0, Xy);\n\n  Json model{Object{}};\n  learner->SaveModel(&model);\n  auto tree = model[\"learner\"][\"gradient_booster\"][\"model\"][\"trees\"][0];\n  ASSERT_GE(get<I32Array const>(tree[\"categories\"]).size(), 1);\n  auto v = get<I32Array const>(tree[\"categories\"])[0];\n\n  HostDeviceVector<float> predt;\n  {\n    std::vector<float> data{static_cast<float>(kCat),\n                            static_cast<float>(kCat + 1), 32.0f, 33.0f, 34.0f};\n    auto test = GetDMatrixFromData(data, data.size(), kCols);\n    learner->Predict(test, false, &predt, 0, 0, false, /*pred_leaf=*/true);\n    ASSERT_EQ(predt.Size(), data.size());\n    auto const& h_predt = predt.ConstHostSpan();\n    for (auto v : h_predt) {\n      ASSERT_EQ(v, 1);  // left child of root node\n    }\n  }\n\n  {\n    std::unique_ptr<Learner> learner{Learner::Create({Xy})};\n    learner->LoadModel(model);\n    std::vector<float> data = {static_cast<float>(v)};\n    auto test = GetDMatrixFromData(data, data.size(), kCols);\n    learner->Predict(test, false, &predt, 0, 0, false, /*pred_leaf=*/true);\n    auto const& h_predt = predt.ConstHostSpan();\n    for (auto v : h_predt) {\n      ASSERT_EQ(v, 2);  // right child of root node\n    }\n  }\n}\n}  // namespace common\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/common/test_charconv.cc",
    "content": "/*\n * The code is adopted from original (half) c implementation:\n * https://github.com/ulfjack/ryu.git with some more comments and tidying.  License is\n * attached below.\n *\n * Copyright 2018 Ulf Adams\n *\n * The contents of this file may be used under the terms of the Apache License,\n * Version 2.0.\n *\n *    (See accompanying file LICENSE-Apache or copy at\n *     http: *www.apache.org/licenses/LICENSE-2.0)\n *\n * Alternatively, the contents of this file may be used under the terms of\n * the Boost Software License, Version 1.0.\n *    (See accompanying file LICENSE-Boost or copy at\n *     https://www.boost.org/LICENSE_1_0.txt)\n *\n * Unless required by applicable law or agreed to in writing, this software\n * is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied.\n */\n#include <cstddef>\n#include <gtest/gtest.h>\n#include <limits>\n#include \"../../../src/common/charconv.h\"\n\nnamespace xgboost {\nnamespace {\nvoid TestInteger(char const* res, int64_t i) {\n  char result[xgboost::NumericLimits<int64_t>::kToCharsSize];\n  auto ret = to_chars(result, result + sizeof(result), i);\n  *ret.ptr = '\\0';\n  EXPECT_STREQ(res, result);\n}\n\nstatic float Int32Bits2Float(uint32_t bits) {\n  float f;\n  memcpy(&f, &bits, sizeof(float));\n  return f;\n}\n\nvoid TestRyu(char const *res, float v) {\n  char result[xgboost::NumericLimits<float>::kToCharsSize];\n  auto ret = to_chars(result, result + sizeof(result), v);\n  *ret.ptr = '\\0';\n  EXPECT_STREQ(res, result);\n}\n}  // anonymous namespace\n\nTEST(Ryu, Subnormal) {\n  TestRyu(\"0E0\", 0.0f);\n  TestRyu(\"-0E0\", -0.0f);\n  TestRyu(\"1E0\", 1.0f);\n  TestRyu(\"-1E0\", -1.0f);\n  TestRyu(\"NaN\", NAN);\n  TestRyu(\"Infinity\", INFINITY);\n  TestRyu(\"-Infinity\", -INFINITY);\n\n  TestRyu(\"1E-45\", std::numeric_limits<float>::denorm_min());\n}\n\nTEST(Ryu, Denormal) {\n  TestRyu(\"1E-45\", std::numeric_limits<float>::denorm_min());\n}\n\nTEST(Ryu, SwitchToSubnormal) {\n  TestRyu(\"1.1754944E-38\", 1.1754944E-38f);\n}\n\nTEST(Ryu, MinAndMax) {\n  TestRyu(\"3.4028235E38\", Int32Bits2Float(0x7f7fffff));\n  TestRyu(\"1E-45\", Int32Bits2Float(1));\n}\n\n// Check that we return the exact boundary if it is the shortest\n// representation, but only if the original floating point number is even.\nTEST(Ryu, BoundaryRoundEven) {\n  TestRyu(\"3.355445E7\", 3.355445E7f);\n  TestRyu(\"9E9\", 8.999999E9f);\n  TestRyu(\"3.436672E10\", 3.4366717E10f);\n}\n\n// If the exact value is exactly halfway between two shortest representations,\n// then we round to even. It seems like this only makes a difference if the\n// last two digits are ...2|5 or ...7|5, and we cut off the 5.\nTEST(Ryu, ExactValueRoundEven) {\n  TestRyu(\"3.0540412E5\", 3.0540412E5f);\n  TestRyu(\"8.0990312E3\", 8.0990312E3f);\n}\n\nTEST(Ryu, LotsOfTrailingZeros) {\n  // Pattern for the first test: 00111001100000000000000000000000\n  TestRyu(\"2.4414062E-4\", 2.4414062E-4f);\n  TestRyu(\"2.4414062E-3\", 2.4414062E-3f);\n  TestRyu(\"4.3945312E-3\", 4.3945312E-3f);\n  TestRyu(\"6.3476562E-3\", 6.3476562E-3f);\n}\n\nTEST(Ryu, Regression) {\n  TestRyu(\"4.7223665E21\", 4.7223665E21f);\n  TestRyu(\"8.388608E6\", 8388608.0f);\n  TestRyu(\"1.6777216E7\", 1.6777216E7f);\n  TestRyu(\"3.3554436E7\", 3.3554436E7f);\n  TestRyu(\"6.7131496E7\", 6.7131496E7f);\n  TestRyu(\"1.9310392E-38\", 1.9310392E-38f);\n  TestRyu(\"-2.47E-43\", -2.47E-43f);\n  TestRyu(\"1.993244E-38\", 1.993244E-38f);\n  TestRyu(\"4.1039004E3\", 4103.9003f);\n  TestRyu(\"5.3399997E9\", 5.3399997E9f);\n  TestRyu(\"6.0898E-39\", 6.0898E-39f);\n  TestRyu(\"1.0310042E-3\", 0.0010310042f);\n  TestRyu(\"2.882326E17\", 2.8823261E17f);\n  TestRyu(\"7.038531E-26\", 7.0385309E-26f);\n  TestRyu(\"9.223404E17\", 9.2234038E17f);\n  TestRyu(\"6.710887E7\", 6.7108872E7f);\n  TestRyu(\"1E-44\", 1.0E-44f);\n  TestRyu(\"2.816025E14\", 2.816025E14f);\n  TestRyu(\"9.223372E18\", 9.223372E18f);\n  TestRyu(\"1.5846086E29\", 1.5846085E29f);\n  TestRyu(\"1.1811161E19\", 1.1811161E19f);\n  TestRyu(\"5.368709E18\", 5.368709E18f);\n  TestRyu(\"4.6143166E18\", 4.6143165E18f);\n  TestRyu(\"7.812537E-3\", 0.007812537f);\n  TestRyu(\"1E-45\", 1.4E-45f);\n  TestRyu(\"1.18697725E20\", 1.18697724E20f);\n  TestRyu(\"1.00014165E-36\", 1.00014165E-36f);\n  TestRyu(\"2E2\", 200.0f);\n  TestRyu(\"3.3554432E7\", 3.3554432E7f);\n\n  static_assert(1.1920929E-7f == std::numeric_limits<float>::epsilon());\n  TestRyu(\"1.1920929E-7\", std::numeric_limits<float>::epsilon());\n}\n\nTEST(Ryu, RoundTrip) {\n  float f = -1.1493590134238582e-40;\n  char result[NumericLimits<float>::kToCharsSize] { 0 };\n  auto ret = to_chars(result, result + sizeof(result), f);\n  size_t dis = std::distance(result, ret.ptr);\n  float back;\n  auto from_ret = from_chars(result, result + dis, back);\n  ASSERT_EQ(from_ret.ec, std::errc());\n  std::string str;\n  for (size_t i = 0; i < dis; ++i) {\n    str.push_back(result[i]);\n  }\n  ASSERT_EQ(f, back);\n}\n\nTEST(Ryu, LooksLikePow5) {\n  // These numbers have a mantissa that is the largest power of 5 that fits,\n  // and an exponent that causes the computation for q to result in 10, which is a corner\n  // case for Ryu.\n  TestRyu(\"6.7108864E17\", Int32Bits2Float(0x5D1502F9));\n  TestRyu(\"1.3421773E18\", Int32Bits2Float(0x5D9502F9));\n  TestRyu(\"2.6843546E18\", Int32Bits2Float(0x5E1502F9));\n}\n\nTEST(Ryu, OutputLength) {\n  TestRyu(\"1E0\", 1.0f); // already tested in Basic\n  TestRyu(\"1.2E0\", 1.2f);\n  TestRyu(\"1.23E0\", 1.23f);\n  TestRyu(\"1.234E0\", 1.234f);\n  TestRyu(\"1.2345E0\", 1.2345f);\n  TestRyu(\"1.23456E0\", 1.23456f);\n  TestRyu(\"1.234567E0\", 1.234567f);\n  TestRyu(\"1.2345678E0\", 1.2345678f);\n  TestRyu(\"1.23456735E-36\", 1.23456735E-36f);\n}\n\nTEST(IntegerPrinting, Basic) {\n  TestInteger(\"0\", 0);\n  auto str = std::to_string(std::numeric_limits<int64_t>::min());\n  TestInteger(str.c_str(), std::numeric_limits<int64_t>::min());\n  str = std::to_string(std::numeric_limits<int64_t>::max());\n  TestInteger(str.c_str(), std::numeric_limits<int64_t>::max());\n}\n\nvoid TestRyuParse(float f, std::string in) {\n  float res;\n  auto ret = from_chars(in.c_str(), in.c_str() + in.size(), res);\n  ASSERT_EQ(ret.ec, std::errc());\n  ASSERT_EQ(f, res);\n}\n\nTEST(Ryu, Basic) {\n  TestRyuParse(0.0f, \"0\");\n  TestRyuParse(-0.0f, \"-0\");\n  TestRyuParse(1.0f, \"1\");\n  TestRyuParse(-1.0f, \"-1\");\n  TestRyuParse(123456792.0f, \"123456789\");\n  TestRyuParse(299792448.0f, \"299792458\");\n}\n\nTEST(Ryu, MinMax) {\n  TestRyuParse(1e-45f, \"1e-45\");\n  TestRyuParse(FLT_MIN, \"1.1754944e-38\");\n  TestRyuParse(FLT_MAX, \"3.4028235e+38\");\n}\n\nTEST(Ryu, MantissaRoundingOverflow) {\n  TestRyuParse(1.0f, \"0.999999999\");\n  TestRyuParse(INFINITY, \"3.4028236e+38\");\n  TestRyuParse(1.1754944e-38f, \"1.17549430e-38\"); // FLT_MIN\n}\n\nTEST(Ryu, TrailingZeros) {\n  TestRyuParse(26843550.0f, \"26843549.5\");\n  TestRyuParse(50000004.0f, \"50000002.5\");\n  TestRyuParse(99999992.0f, \"99999989.5\");\n}\n\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/common/test_column_matrix.cc",
    "content": "/**\n * Copyright 2018-2023 by XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/base.h>     // for bst_bin_t\n#include <xgboost/context.h>  // for Context\n#include <xgboost/data.h>     // for BatchIterator, BatchSet, DMatrix, Met...\n\n#include <cstddef>      // for size_t\n#include <cstdint>      // for int32_t, uint16_t, uint8_t\n#include <limits>       // for numeric_limits\n#include <memory>       // for shared_ptr, __shared_ptr_access, allo...\n#include <type_traits>  // for remove_reference_t\n\n#include \"../../../src/common/column_matrix.h\"      // for ColumnMatrix, Column, DenseColumnIter\n#include \"../../../src/common/hist_util.h\"          // for DispatchBinType, BinTypeSize, Index\n#include \"../../../src/common/ref_resource_view.h\"  // for RefResourceView\n#include \"../../../src/data/gradient_index.h\"       // for GHistIndexMatrix\n#include \"../../../src/data/iterative_dmatrix.h\"    // for IterativeDMatrix\n#include \"../../../src/tree/param.h\"                // for TrainParam\n#include \"../helpers.h\"                             // for RandomDataGenerator, NumpyArrayIterFo...\n\nnamespace xgboost::common {\nTEST(ColumnMatrix, Basic) {\n  int32_t max_num_bins[] = {static_cast<int32_t>(std::numeric_limits<uint8_t>::max()) + 1,\n                            static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 1,\n                            static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 2};\n  Context ctx;\n  BinTypeSize last{kUint8BinsTypeSize};\n  for (int32_t max_num_bin : max_num_bins) {\n    auto dmat = RandomDataGenerator(100, 10, 0.0).GenerateDMatrix();\n    auto sparse_thresh = 0.2;\n    GHistIndexMatrix gmat{&ctx, dmat.get(), max_num_bin, sparse_thresh, false};\n    ColumnMatrix column_matrix;\n    for (auto const& page : dmat->GetBatches<SparsePage>()) {\n      column_matrix.InitFromSparse(page, gmat, sparse_thresh, ctx.Threads());\n    }\n    ASSERT_GE(column_matrix.GetTypeSize(), last);\n    ASSERT_LE(column_matrix.GetTypeSize(), kUint32BinsTypeSize);\n    last = column_matrix.GetTypeSize();\n    ASSERT_FALSE(column_matrix.AnyMissing());\n    for (auto i = 0ull; i < dmat->Info().num_row_; i++) {\n      for (auto j = 0ull; j < dmat->Info().num_col_; j++) {\n        DispatchBinType(column_matrix.GetTypeSize(), [&](auto dtype) {\n          using T = decltype(dtype);\n          auto col = column_matrix.DenseColumn<T, false>(j);\n          ASSERT_EQ(gmat.index[i * dmat->Info().num_col_ + j], col.GetGlobalBinIdx(i));\n        });\n      }\n    }\n  }\n}\n\ntemplate <typename BinIdxType>\nvoid CheckSparseColumn(SparseColumnIter<BinIdxType>* p_col, const GHistIndexMatrix& gmat) {\n  auto& col = *p_col;\n\n  size_t n_samples = gmat.row_ptr.size() - 1;\n  ASSERT_EQ(col.Size(), gmat.index.Size());\n  for (auto i = 0ull; i < col.Size(); i++) {\n    ASSERT_EQ(gmat.index[gmat.row_ptr[col.GetRowIdx(i)]], col.GetGlobalBinIdx(i));\n  }\n\n  for (auto i = 0ull; i < n_samples; i++) {\n    if (col[i] == Column<BinIdxType>::kMissingId) {\n      auto beg = gmat.row_ptr[i];\n      auto end = gmat.row_ptr[i + 1];\n      ASSERT_EQ(end - beg, 0);\n    }\n  }\n}\n\nTEST(ColumnMatrix, SparseColumn) {\n  int32_t max_num_bins[] = {static_cast<int32_t>(std::numeric_limits<uint8_t>::max()) + 1,\n                            static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 1,\n                            static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 2};\n  Context ctx;\n  for (int32_t max_num_bin : max_num_bins) {\n    auto dmat = RandomDataGenerator(100, 1, 0.85).GenerateDMatrix();\n    GHistIndexMatrix gmat{&ctx, dmat.get(), max_num_bin, 0.5f, false};\n    ColumnMatrix column_matrix;\n    for (auto const& page : dmat->GetBatches<SparsePage>()) {\n      column_matrix.InitFromSparse(page, gmat, 1.0, ctx.Threads());\n    }\n    common::DispatchBinType(column_matrix.GetTypeSize(), [&](auto dtype) {\n      using T = decltype(dtype);\n      auto col = column_matrix.SparseColumn<T>(0, 0);\n      CheckSparseColumn(&col, gmat);\n    });\n  }\n}\n\ntemplate <typename BinIdxType>\nvoid CheckColumWithMissingValue(const DenseColumnIter<BinIdxType, true>& col,\n                                const GHistIndexMatrix& gmat) {\n  for (auto i = 0ull; i < col.Size(); i++) {\n    if (col.IsMissing(i)) {\n      continue;\n    }\n    EXPECT_EQ(gmat.index[gmat.row_ptr[i]], col.GetGlobalBinIdx(i));\n  }\n}\n\nTEST(ColumnMatrix, DenseColumnWithMissing) {\n  int32_t max_num_bins[] = {static_cast<int32_t>(std::numeric_limits<uint8_t>::max()) + 1,\n                            static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 1,\n                            static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 2};\n  Context ctx;\n  for (int32_t max_num_bin : max_num_bins) {\n    auto dmat = RandomDataGenerator(100, 1, 0.5).GenerateDMatrix();\n    GHistIndexMatrix gmat(&ctx, dmat.get(), max_num_bin, 0.2, false);\n    ColumnMatrix column_matrix;\n    for (auto const& page : dmat->GetBatches<SparsePage>()) {\n      column_matrix.InitFromSparse(page, gmat, 0.2, ctx.Threads());\n    }\n    ASSERT_TRUE(column_matrix.AnyMissing());\n    DispatchBinType(column_matrix.GetTypeSize(), [&](auto dtype) {\n      using T = decltype(dtype);\n      auto col = column_matrix.DenseColumn<T, true>(0);\n      CheckColumWithMissingValue(col, gmat);\n    });\n  }\n}\n\nTEST(ColumnMatrix, GrowMissing) {\n  float sparsity = 0.5;\n  NumpyArrayIterForTest iter(sparsity);\n  auto n_threads = 0;\n  bst_bin_t n_bins = 16;\n  BatchParam batch{n_bins, tree::TrainParam::DftSparseThreshold()};\n  Context ctx;\n  auto m = std::make_shared<data::IterativeDMatrix>(&iter, iter.Proxy(), nullptr, Reset, Next,\n                                                    std::numeric_limits<float>::quiet_NaN(),\n                                                    n_threads, n_bins);\n  for (auto const& page : m->GetBatches<GHistIndexMatrix>(&ctx, batch)) {\n    auto const& column_matrix = page.Transpose();\n    auto const& missing = column_matrix.Missing();\n    auto n = NumpyArrayIterForTest::Rows() * NumpyArrayIterForTest::Cols();\n    auto expected = std::remove_reference_t<decltype(missing)>::BitFieldT::ComputeStorageSize(n);\n    auto got = missing.storage.size();\n    ASSERT_EQ(expected, got);\n    DispatchBinType(column_matrix.GetTypeSize(), [&](auto dtype) {\n      using T = decltype(dtype);\n      auto col = column_matrix.DenseColumn<T, true>(0);\n      CheckColumWithMissingValue(col, page);\n    });\n  }\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/common/test_common.cc",
    "content": "/**\n * Copyright 2024-2025, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n\n#include <algorithm>    // for equal\n#include <cstddef>      // for size_t\n#include <string>       // for string\n#include <string_view>  // for string_view\n\n#include \"../../../src/common/common.h\"\n\nnamespace xgboost::common {\nTEST(Common, HumanMemUnit) {\n  auto name = HumanMemUnit(1024 * 1024 * 1024ul);\n  ASSERT_EQ(name, \"1GB\");\n  name = HumanMemUnit(1024 * 1024ul);\n  ASSERT_EQ(name, \"1MB\");\n  name = HumanMemUnit(1024);\n  ASSERT_EQ(name, \"1KB\");\n  name = HumanMemUnit(1);\n  ASSERT_EQ(name, \"1B\");\n}\n\nTEST(Common, Trim) {\n  // string\n  {\n    std::string in{\"foobar \"};\n    auto out = TrimLast(in);\n    ASSERT_EQ(out, \"foobar\");\n  }\n  {\n    std::string in{R\"(foobar\n)\"};\n    auto out = TrimLast(in);\n    ASSERT_EQ(out, \"foobar\");\n  }\n  // string view\n  {\n    auto res = TrimFirst(\" foo \");\n    ASSERT_EQ(res, std::string_view{\"foo \"});\n  }\n  {\n    auto res = TrimLast(\" foo \");\n    ASSERT_EQ(res, std::string_view{\" foo\"});\n  }\n  {\n    auto res = TrimLast(\"  \");\n    ASSERT_EQ(res, std::string_view{});\n  }\n  {\n    auto res = TrimFirst(\"  \");\n    ASSERT_EQ(res, std::string_view{});\n  }\n  {\n    auto res = TrimFirst(\"\");\n    ASSERT_EQ(res, std::string_view{});\n  }\n}\n\nTEST(Common, Split) {\n  auto check = [](char const* chars, std::size_t n) {\n    std::string str{chars};\n    auto res_str = Split(str, ',');\n    std::string_view view{chars};\n    auto res_view = Split(view, ',');\n    ASSERT_EQ(res_view.size(), res_str.size());\n    ASSERT_EQ(res_view.size(), n);\n    for (std::size_t i = 0; i < res_str.size(); ++i) {\n      ASSERT_EQ(res_str[i].size(), res_view[i].size());\n      auto eq = std::equal(res_str[i].cbegin(), res_str[i].cend(), res_view[i].cbegin());\n      ASSERT_TRUE(eq);\n    }\n  };\n  check(\"foo,bar\", 2);\n  check(\"foo,bar,\", 2);\n  check(\",foo,bar\", 3);\n  check(\",foo,bar,\", 3);  // last is ignored\n  check(\",,,,foo,bar\", 6);\n  check(\",foo,,,,bar\", 6);\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/common/test_compressed_iterator.cc",
    "content": "/**\n * Copyright 2017-2024, XGBoost contributors\n */\n#include \"../../../src/common/compressed_iterator.h\"\n#include \"gtest/gtest.h\"\n#include <algorithm>\n\nnamespace xgboost::common {\nTEST(CompressedIterator, Size) {\n  bst_idx_t n = 2048;\n  {\n    bst_idx_t n_symbols = 256;\n    auto n_bytes = CompressedBufferWriter::CalculateBufferSize(n, n_symbols);\n    ASSERT_EQ(n_bytes, n + detail::kPadding);\n  }\n  {\n    bst_idx_t n_symbols = 64;\n    auto n_bytes = CompressedBufferWriter::CalculateBufferSize(n, n_symbols);\n    ASSERT_EQ(n_bytes, 1544);\n  }\n}\n\nTEST(CompressedIterator, Test) {\n  ASSERT_TRUE(detail::SymbolBits(256) == 8);\n  ASSERT_TRUE(detail::SymbolBits(150) == 8);\n  std::vector<int> test_cases = {1, 3, 426, 21, 64, 256, 100000, INT32_MAX};\n  int num_elements = 1000;\n  int repetitions = 1000;\n  srand(9);\n\n  for (auto alphabet_size : test_cases) {\n    for (int i = 0; i < repetitions; i++) {\n      std::vector<int> input(num_elements);\n      std::generate(input.begin(), input.end(),\n        [=]() { return rand() % alphabet_size; });\n      CompressedBufferWriter cbw(alphabet_size);\n\n      // Test write entire array\n      std::vector<unsigned char> buffer(\n        CompressedBufferWriter::CalculateBufferSize(input.size(),\n          alphabet_size));\n\n      cbw.Write(buffer.data(), input.begin(), input.end());\n\n      CompressedIterator<int> ci(buffer.data(), alphabet_size);\n      std::vector<int> output(input.size());\n      for (size_t i = 0; i < input.size(); i++) {\n        output[i] = ci[i];\n      }\n\n      ASSERT_TRUE(input == output);\n\n      // Test write Symbol\n      std::vector<unsigned char> buffer2(\n        CompressedBufferWriter::CalculateBufferSize(input.size(),\n          alphabet_size));\n      for (size_t i = 0; i < input.size(); i++) {\n        cbw.WriteSymbol(buffer2.data(), input[i], i);\n      }\n      CompressedIterator<int> ci2(buffer.data(), alphabet_size);\n      std::vector<int> output2(input.size());\n      for (size_t i = 0; i < input.size(); i++) {\n        output2[i] = ci2[i];\n      }\n      ASSERT_TRUE(input == output2);\n    }\n  }\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/common/test_cuda_dr_utils.cc",
    "content": "/**\n * Copyright 2025, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n\n#if defined(XGBOOST_USE_CUDA) && defined(__linux__)\n#include \"../../../src/common/cuda_dr_utils.h\"\n\nnamespace xgboost::cudr {\nTEST(DrUtils, GetVersionFromSmi) {\n  std::int32_t major = 0, minor = 0;\n  bool result = GetVersionFromSmi(&major, &minor);\n\n  if (result) {\n    EXPECT_GE(major, 0);\n    EXPECT_GE(minor, 0);\n  } else {\n    EXPECT_EQ(major, -1);\n    EXPECT_EQ(minor, -1);\n  }\n}\n\nTEST(DrUtils, GetC2cLinkCountFromSmi) {\n  {\n    auto out = R\"(GPU 0: NVIDIA GH200 480GB (UUID: GPU-********-****-****-****-************)\n    C2C Link 0: 44.712 GB/s\n    C2C Link 1: 44.712 GB/s\n    C2C Link 2: 44.712 GB/s\n    C2C Link 3: 44.712 GB/s\n    C2C Link 4: 44.712 GB/s\n    C2C Link 5: 44.712 GB/s\n    C2C Link 6: 44.712 GB/s\n    C2C Link 7: 44.712 GB/s\n    C2C Link 8: 44.712 GB/s\n    C2C Link 9: 44.712 GB/s\n  )\";\n    auto lc = detail::GetC2cLinkCountFromSmiImpl(out);\n    ASSERT_EQ(lc, 10);\n  }\n  {\n    auto out = R\"(No Devices support C2C.\n)\";\n    auto lc = detail::GetC2cLinkCountFromSmiImpl(out);\n    ASSERT_EQ(lc, -1);\n  }\n\n  {\n    [[maybe_unused]] auto _ = GetC2cLinkCountFromSmi();\n  }\n  {\n    [[maybe_unused]] auto _ = GetC2cLinkCountFromSmiGlobal();\n  }\n}\n}  // namespace xgboost::cudr\n#endif  // defined(XGBOOST_USE_CUDA)\n"
  },
  {
    "path": "tests/cpp/common/test_cuda_host_allocator.cu",
    "content": "/**\n * Copyright 2024-2025, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/context.h>  // for Context\n\n#include <vector>\n\n#include \"../../../src/common/cuda_pinned_allocator.h\"\n#include \"../../../src/common/cuda_stream.h\"       // for DefaultStream\n#include \"../../../src/common/device_helpers.cuh\"\n#include \"../../../src/common/numeric.h\"      // for Iota\n\nnamespace xgboost {\nTEST(CudaHostMalloc, Pinned) {\n  std::vector<float, common::cuda_impl::PinnedAllocator<float>> vec;\n  vec.resize(10);\n  ASSERT_EQ(vec.size(), 10);\n  Context ctx;\n  common::Iota(&ctx, vec.begin(), vec.end(), 0);\n  float k = 0;\n  for (auto v : vec) {\n    ASSERT_EQ(v, k);\n    ++k;\n  }\n}\n\nTEST(CudaHostMalloc, Managed) {\n  std::vector<float, common::cuda_impl::ManagedAllocator<float>> vec;\n  vec.resize(10);\n#if defined(__linux__)\n#if (CUDA_VERSION / 1000) >= 13\n  cudaMemLocation loc;\n  loc.type = cudaMemLocationTypeDevice;\n  loc.id = 0;\n  dh::safe_cuda(\n      cudaMemPrefetchAsync(vec.data(), vec.size() * sizeof(float), loc, 0, curt::DefaultStream()));\n#else\n  dh::safe_cuda(\n      cudaMemPrefetchAsync(vec.data(), vec.size() * sizeof(float), 0, curt::DefaultStream()));\n#endif  // (CUDA_VERSION / 1000) >= 13\n#endif\n  curt::DefaultStream().Sync();\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/common/test_cuda_rt_utils.cu",
    "content": "/**\n * Copyright 2025-2026, XGBoost contributors\n */\n\n#include <gtest/gtest.h>\n\n#include <array>    // for array\n#include <cstdint>  // for int32_t\n#include <set>      // for set\n\n#include \"../../../src/common/cuda_rt_utils.h\"\n#include \"../../../src/common/cuda_stream_pool.h\"\n#include \"xgboost/span.h\"  // for Span\n\nnamespace xgboost::curt {\nTEST(RtUtils, Uuid) {\n  std::array<unsigned char, kUuidLength> uuid;\n  GetUuid(uuid, 0);\n  auto str = PrintUuid(uuid);\n  ASSERT_EQ(str.substr(0, 4), \"GPU-\");\n  ASSERT_EQ(str.length(), 40);\n  ASSERT_EQ(str.size(), str.length());\n}\n\nTEST(RtUtils, StreamPool) {\n  auto n_streams = 16;\n  auto pool = std::make_unique<StreamPool>(n_streams);\n  std::set<cudaStream_t> hdls;\n\n  for (std::int32_t i = 0; i < n_streams; ++i) {\n    hdls.insert(cudaStream_t{pool->Next()});\n  }\n\n  ASSERT_EQ(hdls.size(), n_streams);\n  ASSERT_EQ(hdls.size(), pool->Size());\n\n  for (std::int32_t i = 0; i < n_streams; ++i) {\n    hdls.insert(cudaStream_t{pool->Next()});\n  }\n  ASSERT_EQ(hdls.size(), n_streams);\n  ASSERT_EQ(hdls.size(), pool->Size());\n}\n}  // namespace xgboost::curt\n"
  },
  {
    "path": "tests/cpp/common/test_device_compression.cu",
    "content": "/**\n * Copyright 2025, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <thrust/sequence.h>  // for sequence\n\n#include <cstddef>  // for size_t\n#include <cstdint>  // for uint8_t\n#include <memory>   // for make_shared\n#include <tuple>    // for tuple\n\n#include \"../../../src/common/cuda_context.cuh\"         // for CUDAContext\n#include \"../../../src/common/cuda_pinned_allocator.h\"  // for HostPinnedMemPool\n#include \"../../../src/common/device_compression.cuh\"\n#include \"../../../src/common/device_helpers.cuh\"     // for ToSpan\n#include \"../../../src/common/device_vector.cuh\"      // for DeviceUVector\n#include \"../../../src/common/ref_resource_view.cuh\"  // for MakeFixedVecWithPinnedMemPool\n#include \"../helpers.h\"                               // for MakeCUDACtx\n\nnamespace xgboost::dc {\n// We skip the tests but keep the code at compilation time if nvcomp is not enabled. This\n// helps us to ensure correct symbol definitions.\nTEST(NvComp, Snappy) {\n#if !defined(XGBOOST_USE_NVCOMP)\n  GTEST_SKIP_(\"XGBoost is not compiled with nvcomp.\");\n#endif\n  auto ctx = MakeCUDACtx(0);\n  auto cuctx = ctx.CUDACtx();\n  dh::DeviceUVector<common::CompressedByteT> in(1024);\n  thrust::sequence(ctx.CUDACtx()->CTP(), in.begin(), in.end(), 0);\n  dh::DeviceUVector<std::uint8_t> compr;\n\n  std::size_t chunk_size = 512;\n  auto params = CompressSnappy(&ctx, dh::ToSpan(in), &compr, chunk_size);\n  ASSERT_GE(params.size(), 1);\n\n  auto pool = std::make_shared<common::cuda_impl::HostPinnedMemPool>();\n  auto h_in =\n      common::MakeFixedVecWithPinnedMemPool<std::uint8_t>(pool, compr.size(), cuctx->Stream());\n  dh::safe_cuda(cudaMemcpyAsync(h_in.data(), compr.data(), compr.size() * sizeof(std::uint8_t),\n                                cudaMemcpyDefault, cuctx->Stream()));\n\n  dh::device_vector<common::CompressedByteT> dout(in.size(), 0);\n  auto mgr = MakeSnappyDecomprMgr(cuctx->Stream(), pool, params, h_in.ToSpan());\n  DecompressSnappy(cuctx->Stream(), mgr, dh::ToSpan(dout), true);\n\n  bool eq = thrust::equal(ctx.CUDACtx()->CTP(), dout.cbegin(), dout.cend(), in.cbegin());\n  ASSERT_TRUE(eq);\n\n  auto const& status = GetGlobalDeStatus();\n  ASSERT_LT(status.max_output_size, 1ul << 24);\n}\n\nclass TestNvComp : public ::testing::TestWithParam<std::tuple<std::size_t, std::size_t>> {\n public:\n  void Run(std::size_t n_bytes, std::size_t n_chunk_bytes) {\n    auto ctx = MakeCUDACtx(0);\n    auto cuctx = ctx.CUDACtx();\n\n    dh::DeviceUVector<common::CompressedByteT> in(n_bytes);\n    thrust::sequence(ctx.CUDACtx()->CTP(), in.begin(), in.end(), 0);\n    dh::DeviceUVector<std::uint8_t> compr;\n\n    auto params = CompressSnappy(&ctx, dh::ToSpan(in), &compr, n_chunk_bytes);\n    if (n_bytes != 0) {\n      ASSERT_GE(params.size(), 1);\n    } else {\n      ASSERT_TRUE(params.empty());\n    }\n    if (n_chunk_bytes < n_bytes) {\n      ASSERT_GE(params.size(), n_bytes / n_chunk_bytes);\n    }\n\n    auto pool = std::make_shared<common::cuda_impl::HostPinnedMemPool>();\n\n    CuMemParams out_params;\n    auto page = CoalesceCompressedBuffersToHost(cuctx->Stream(), pool, params, compr, &out_params);\n\n    dh::device_vector<common::CompressedByteT> dout(in.size(), 0);\n    auto mgr = MakeSnappyDecomprMgr(cuctx->Stream(), pool, out_params, page.ToSpan());\n    DecompressSnappy(cuctx->Stream(), mgr, dh::ToSpan(dout), true);\n\n    bool eq = thrust::equal(ctx.CUDACtx()->CTP(), dout.cbegin(), dout.cend(), in.cbegin());\n    ASSERT_TRUE(eq);\n  }\n};\n\nTEST_P(TestNvComp, HostBuf) {\n#if !defined(XGBOOST_USE_NVCOMP)\n  GTEST_SKIP_(\"XGBoost is not compiled with nvcomp.\");\n#endif\n  auto [n_bytes, n_chunk_bytes] = this->GetParam();\n  this->Run(n_bytes, n_chunk_bytes);\n}\n\nINSTANTIATE_TEST_SUITE_P(TestNvComp, TestNvComp,\n                         ::testing::Combine(::testing::Values(0, 1, 512, 1024),\n                                            ::testing::Values(1, 256, 512, 1024, 2048)));\n}  // namespace xgboost::dc\n"
  },
  {
    "path": "tests/cpp/common/test_device_helpers.cu",
    "content": "/**\n * Copyright 2017-2025, XGBoost contributors\n */\n#include <thrust/device_vector.h>\n#include <thrust/sort.h>  // for is_sorted\n#include <xgboost/base.h>\n\n#include <cstddef>\n#include <cstdint>\n#include <functional>  // for equal_to\n#include <vector>\n\n#include \"../../../src/common/cuda_context.cuh\"\n#include \"../../../src/common/device_helpers.cuh\"\n#include \"../../../src/common/quantile.h\"\n#include \"../helpers.h\"\n#include \"gtest/gtest.h\"\n\nTEST(SumReduce, Test) {\n  thrust::device_vector<float> data(100, 1.0f);\n  auto sum = dh::SumReduction(data.data().get(), data.size());\n  ASSERT_NEAR(sum, 100.0f, 1e-5);\n}\n\nvoid TestAtomicSizeT() {\n  size_t constexpr kThreads = 235;\n  dh::device_vector<size_t> out(1, 0);\n  auto d_out = dh::ToSpan(out);\n  dh::LaunchN(kThreads, [=] __device__(size_t idx) {\n    atomicAdd(&d_out[0], static_cast<size_t>(1));\n  });\n  ASSERT_EQ(out[0], kThreads);\n}\n\nTEST(AtomicAdd, SizeT) {\n  TestAtomicSizeT();\n}\n\nvoid TestSegmentID() {\n  std::vector<size_t> segments{0, 1, 3};\n  thrust::device_vector<size_t> d_segments(segments);\n  auto s_segments = dh::ToSpan(d_segments);\n  dh::LaunchN(1, [=]__device__(size_t idx) {\n    auto id = dh::SegmentId(s_segments, 0);\n    SPAN_CHECK(id == 0);\n    id = dh::SegmentId(s_segments, 1);\n    SPAN_CHECK(id == 1);\n    id = dh::SegmentId(s_segments, 2);\n    SPAN_CHECK(id == 1);\n  });\n}\n\nTEST(SegmentID, Basic) {\n  TestSegmentID();\n}\n\nTEST(SegmentedUnique, Basic) {\n  std::vector<float> values{0.1f, 0.2f, 0.3f, 0.62448811531066895f, 0.62448811531066895f, 0.4f};\n  std::vector<size_t> segments{0, 3, 6};\n\n  thrust::device_vector<float> d_values(values);\n  thrust::device_vector<xgboost::bst_feature_t> d_segments{segments};\n\n  thrust::device_vector<xgboost::bst_feature_t> d_segs_out(d_segments.size());\n  thrust::device_vector<float> d_vals_out(d_values.size());\n\n  auto ctx = xgboost::MakeCUDACtx(0);\n  size_t n_uniques = dh::SegmentedUnique(\n      ctx.CUDACtx()->CTP(), d_segments.data().get(), d_segments.data().get() + d_segments.size(),\n      d_values.data().get(), d_values.data().get() + d_values.size(), d_segs_out.data().get(),\n      d_vals_out.data().get(), std::equal_to{});\n  CHECK_EQ(n_uniques, 5);\n\n  std::vector<float> values_sol{0.1f, 0.2f, 0.3f, 0.62448811531066895f, 0.4f};\n  for (size_t i = 0 ; i < values_sol.size(); i ++) {\n    ASSERT_EQ(d_vals_out[i], values_sol[i]);\n  }\n\n  std::vector<xgboost::bst_feature_t> segments_sol{0, 3, 5};\n  for (size_t i = 0; i < d_segments.size(); ++i) {\n    ASSERT_EQ(segments_sol[i], d_segs_out[i]);\n  }\n\n  d_segments[1] = 4;\n  d_segments[2] = 6;\n  n_uniques = dh::SegmentedUnique(\n      ctx.CUDACtx()->CTP(), d_segments.data().get(), d_segments.data().get() + d_segments.size(),\n      d_values.data().get(), d_values.data().get() + d_values.size(), d_segs_out.data().get(),\n      d_vals_out.data().get(), std::equal_to{});\n  ASSERT_EQ(n_uniques, values.size());\n  for (size_t i = 0 ; i < values.size(); i ++) {\n    ASSERT_EQ(d_vals_out[i], values[i]);\n  }\n}\n\nnamespace {\nusing SketchEntry = xgboost::common::WQSummary<float, float>::Entry;\nstruct SketchUnique {\n  bool __device__ operator()(SketchEntry const& a, SketchEntry const& b) const {\n    return a.value - b.value == 0;\n  }\n};\nstruct IsSorted {\n  bool __device__ operator()(SketchEntry const& a, SketchEntry const& b) const {\n    return a.value < b.value;\n  }\n};\n}  // namespace\n\nnamespace xgboost {\nvoid TestSegmentedUniqueRegression(std::vector<SketchEntry> values, size_t n_duplicated) {\n  std::vector<bst_feature_t> segments{0, static_cast<bst_feature_t>(values.size())};\n\n  thrust::device_vector<SketchEntry> d_values(values);\n  thrust::device_vector<bst_feature_t> d_segments(segments);\n  thrust::device_vector<bst_feature_t> d_segments_out(segments.size());\n\n  auto ctx = xgboost::MakeCUDACtx(0);\n\n  size_t n_uniques = dh::SegmentedUnique(\n      ctx.CUDACtx()->CTP(), d_segments.data().get(), d_segments.data().get() + d_segments.size(),\n      d_values.data().get(), d_values.data().get() + d_values.size(), d_segments_out.data().get(),\n      d_values.data().get(), SketchUnique{});\n  ASSERT_EQ(n_uniques, values.size() - n_duplicated);\n  ASSERT_TRUE(thrust::is_sorted(thrust::device, d_values.begin(),\n                                d_values.begin() + n_uniques, IsSorted{}));\n  ASSERT_EQ(segments.at(0), d_segments_out[0]);\n  ASSERT_EQ(segments.at(1), d_segments_out[1] + n_duplicated);\n}\n\nTEST(DeviceHelpers, Reduce) {\n  size_t kSize = std::numeric_limits<uint32_t>::max();\n  auto it = thrust::make_counting_iterator(0ul);\n  dh::XGBCachingDeviceAllocator<char> alloc;\n  auto batched = dh::Reduce(thrust::cuda::par(alloc), it, it + kSize, 0ul, thrust::maximum<size_t>{});\n  CHECK_EQ(batched, kSize - 1);\n}\n\n\nTEST(SegmentedUnique, Regression) {\n  {\n    std::vector<SketchEntry> values{{3149, 3150, 1, 0.62392902374267578},\n                                    {3151, 3152, 1, 0.62418866157531738},\n                                    {3152, 3153, 1, 0.62419462203979492},\n                                    {3153, 3154, 1, 0.62431186437606812},\n                                    {3154, 3155, 1, 0.6244881153106689453125},\n                                    {3155, 3156, 1, 0.6244881153106689453125},\n                                    {3155, 3156, 1, 0.6244881153106689453125},\n                                    {3155, 3156, 1, 0.6244881153106689453125},\n                                    {3157, 3158, 1, 0.62552797794342041},\n                                    {3158, 3159, 1, 0.6256556510925293},\n                                    {3159, 3160, 1, 0.62571090459823608},\n                                    {3160, 3161, 1, 0.62577134370803833}};\n    TestSegmentedUniqueRegression(values, 3);\n  }\n  {\n    std::vector<SketchEntry> values{{3149, 3150, 1, 0.62392902374267578},\n                                    {3151, 3152, 1, 0.62418866157531738},\n                                    {3152, 3153, 1, 0.62419462203979492},\n                                    {3153, 3154, 1, 0.62431186437606812},\n                                    {3154, 3155, 1, 0.6244881153106689453125},\n                                    {3157, 3158, 1, 0.62552797794342041},\n                                    {3158, 3159, 1, 0.6256556510925293},\n                                    {3159, 3160, 1, 0.62571090459823608},\n                                    {3160, 3161, 1, 0.62577134370803833}};\n    TestSegmentedUniqueRegression(values, 0);\n  }\n  {\n    std::vector<SketchEntry> values;\n    TestSegmentedUniqueRegression(values, 0);\n  }\n}\n\nTEST(Allocator, DISABLED_OOM) {\n  auto size = dh::AvailableMemory(0) * 4;\n  ASSERT_THROW({dh::caching_device_vector<char> vec(size);}, dmlc::Error);\n  ASSERT_THROW({dh::device_vector<char> vec(size);}, dmlc::Error);\n  // Clear last error so we don't fail subsequent tests\n  cudaGetLastError();\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/common/test_device_vector.cu",
    "content": "/**\n * Copyright 2024-2025, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <thrust/iterator/counting_iterator.h>  // for make_counting_iterator\n#include <thrust/sequence.h>                    // for sequence\n\n#include <numeric>  // for iota\n#include <thread>   // for thread\n\n#include \"../../../src/common/cuda_rt_utils.h\"     // for DrVersion\n#include \"../../../src/common/device_helpers.cuh\"  // for CachingThrustPolicy, PinnedMemory\n#include \"../../../src/common/device_vector.cuh\"\n#include \"xgboost/global_config.h\"  // for GlobalConfigThreadLocalStore\n#include \"xgboost/windefs.h\"        // for xgboost_IS_WIN\n\nnamespace dh {\n#if !defined(XGBOOST_USE_RMM)\nTEST(AsyncPoolAllocator, Basic) {\n  if (!xgboost::curt::MemoryPoolsSupported(xgboost::curt::CurrentDevice())) {\n    GTEST_SKIP_(\"The async memory pool is not available on the current device.\");\n  }\n\n  for (bool use_async_pool : {true, false}) {\n    detail::XGBAsyncPoolAllocator<float> alloc;\n    alloc.SetAsync(use_async_pool);\n    std::size_t n = 16;\n    auto ptr = alloc.allocate(n);\n    ASSERT_TRUE(ptr);\n    alloc.deallocate(ptr, n);\n  }\n}\n#endif  // !defined(XGBOOST_USE_RMM)\n\nTEST(DeviceUVector, Basic) {\n  GlobalMemoryLogger().Clear();\n  std::int32_t verbosity{3};\n  std::swap(verbosity, xgboost::GlobalConfigThreadLocalStore::Get()->verbosity);\n  DeviceUVector<float> uvec;\n  uvec.resize(12);\n  auto peak = GlobalMemoryLogger().PeakMemory();\n  auto n_bytes = sizeof(decltype(uvec)::value_type) * uvec.size();\n  ASSERT_EQ(peak, n_bytes);\n  std::swap(verbosity, xgboost::GlobalConfigThreadLocalStore::Get()->verbosity);\n\n  DeviceUVector<double> uvec1{16};\n  ASSERT_EQ(uvec1.size(), 16);\n  uvec1.resize(3);\n  ASSERT_EQ(uvec1.size(), 3);\n  ASSERT_EQ(uvec1.Capacity(), 16);\n  ASSERT_EQ(std::distance(uvec1.begin(), uvec1.end()), uvec1.size());\n  auto orig = uvec1.size();\n\n  thrust::sequence(dh::CachingThrustPolicy(), uvec1.begin(), uvec1.end(), 0);\n  uvec1.resize(32);\n  ASSERT_EQ(uvec1.size(), 32);\n  ASSERT_EQ(uvec1.Capacity(), 32);\n  auto eq = thrust::equal(dh::CachingThrustPolicy(), uvec1.cbegin(), uvec1.cbegin() + orig,\n                          thrust::make_counting_iterator(0));\n  ASSERT_TRUE(eq);\n\n  uvec1.clear();\n  ASSERT_EQ(uvec1.size(), 0);\n  ASSERT_EQ(uvec1.Capacity(), 32);\n}\n\n#if defined(__linux__)\nnamespace {\nclass TestVirtualMem : public ::testing::TestWithParam<CUmemLocationType> {\n public:\n  void Run() {\n    auto type = this->GetParam();\n    detail::GrowOnlyVirtualMemVec vec{type};\n    auto prop = xgboost::cudr::MakeAllocProp(type);\n    auto gran = xgboost::cudr::GetAllocGranularity(&prop);\n    ASSERT_GE(gran, 2);\n    auto data = vec.GetSpan<std::int32_t>(32);  // should be smaller than granularity\n    ASSERT_EQ(data.size(), 32);\n    static_assert(std::is_same_v<typename decltype(data)::value_type, std::int32_t>);\n\n    std::vector<std::int32_t> h_data(data.size());\n    auto check = [&] {\n      for (std::size_t i = 0; i < h_data.size(); ++i) {\n        ASSERT_EQ(h_data[i], i);\n      }\n    };\n    auto fill = [&](std::int32_t n_orig, xgboost::common::Span<std::int32_t> data) {\n      if (type == CU_MEM_LOCATION_TYPE_DEVICE) {\n        thrust::sequence(dh::CachingThrustPolicy(), data.data() + n_orig, data.data() + data.size(),\n                         n_orig);\n        dh::safe_cuda(cudaMemcpy(h_data.data(), data.data(), data.size_bytes(), cudaMemcpyDefault));\n      } else {\n        std::iota(data.data() + n_orig, data.data() + data.size(), n_orig);\n        std::copy_n(data.data(), data.size(), h_data.data());\n      }\n    };\n\n    fill(0, data);\n    check();\n\n    auto n_orig = data.size();\n    // Should be smaller than granularity, use already reserved.\n    data = vec.GetSpan<std::int32_t>(128);\n    h_data.resize(data.size());\n    fill(n_orig, data);\n    check();\n    if (128 < gran) {\n      ASSERT_EQ(vec.Capacity(), gran);\n    }\n\n    n_orig = data.size();\n    data = vec.GetSpan<std::int32_t>(gran / 2);\n    h_data.resize(data.size());\n    fill(n_orig, data);\n    check();\n    ASSERT_EQ(vec.Capacity(), gran * 2);\n\n    n_orig = data.size();\n    data = vec.GetSpan<std::int32_t>(gran);\n    h_data.resize(data.size());\n    fill(n_orig, data);\n    check();\n    ASSERT_EQ(vec.Capacity(), gran * 4);\n  }\n};\n}  // anonymous namespace\n\nTEST_P(TestVirtualMem, Alloc) { this->Run(); }\n\nINSTANTIATE_TEST_SUITE_P(\n    Basic, TestVirtualMem,\n    ::testing::Values(CU_MEM_LOCATION_TYPE_DEVICE, CU_MEM_LOCATION_TYPE_HOST_NUMA),\n    [](::testing::TestParamInfo<TestVirtualMem::ParamType> const& info) -> char const* {\n      auto type = info.param;\n      switch (type) {\n        case CU_MEM_LOCATION_TYPE_DEVICE:\n          return \"Device\";\n        case CU_MEM_LOCATION_TYPE_HOST_NUMA:\n          return \"HostNuma\";\n        default:\n          LOG(FATAL) << \"unreachable\";\n      }\n      return nullptr;\n    });\n#endif  // defined(__linux__)\n\nTEST(TestVirtualMem, Version) {\n  std::int32_t major, minor;\n  xgboost::curt::GetDrVersionGlobal(&major, &minor);\n  LOG(INFO) << \"Latest supported CUDA version by the driver:\" << major << \".\" << minor;\n  PinnedMemory pinned;\n#if defined(xgboost_IS_WIN)\n  ASSERT_FALSE(pinned.IsVm());\n#else  // defined(xgboost_IS_WIN)\n  if (major == 12 && minor >= 5 || major > 12) {\n    ASSERT_TRUE(pinned.IsVm());\n  } else {\n    ASSERT_FALSE(pinned.IsVm());\n  }\n#endif  // defined(xgboost_IS_WIN)\n}\n\nTEST(AtomitFetch, Max) {\n  auto n_threads = std::thread::hardware_concurrency();\n  std::vector<std::thread> threads;\n  std::atomic<std::int64_t> n{0};\n  decltype(n)::value_type add = 64;\n  for (decltype(n_threads) t = 0; t < n_threads; ++t) {\n    threads.emplace_back([=, &n] {\n      for (decltype(add) i = 0; i < add; ++i) {\n        detail::AtomicFetchMax(n, static_cast<decltype(add)>(t + i));\n      }\n    });\n  }\n  for (auto& t : threads) {\n    t.join();\n  }\n  ASSERT_EQ(n, n_threads - 1 + add - 1);  // 0-based indexing\n}\n}  // namespace dh\n"
  },
  {
    "path": "tests/cpp/common/test_gpu_compressed_iterator.cu",
    "content": "/**\n * Copyright 2018-2025, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <thrust/device_vector.h>\n#include <thrust/sequence.h>  // for sequence\n\n#include <algorithm>  // for generate\n#include <cstddef>    // for size_t\n#include <cstdint>    // for int32_t, uint32_t\n#include <vector>     // for vector\n\n#include \"../../../src/common/compressed_iterator.h\"\n#include \"../../../src/common/cuda_context.cuh\"    // for CUDAContext\n#include \"../../../src/common/device_helpers.cuh\"  // for LaunchN\n#include \"../../../src/common/device_vector.cuh\"   // for DeviceUVector\n#include \"../helpers.h\"\n\nnamespace xgboost::common {\nstruct WriteSymbolFunction {\n  CompressedBufferWriter cbw;\n  unsigned char* buffer_data_d;\n  int const* input_data_d;\n  WriteSymbolFunction(CompressedBufferWriter cbw, unsigned char* buffer_data_d,\n                      int const* input_data_d)\n      : cbw(cbw), buffer_data_d(buffer_data_d), input_data_d(input_data_d) {}\n\n  __device__ void operator()(size_t i) { cbw.AtomicWriteSymbol(buffer_data_d, input_data_d[i], i); }\n};\n\nstruct ReadSymbolFunction {\n  CompressedIterator<int> ci;\n  int* output_data_d;\n  ReadSymbolFunction(CompressedIterator<int> ci, int* output_data_d)\n    : ci(ci), output_data_d(output_data_d) {}\n\n  __device__ void operator()(size_t i) {\n    output_data_d[i] = ci[i];\n  }\n};\n\nTEST(CompressedIterator, TestGPU) {\n  dh::safe_cuda(cudaSetDevice(0));\n  std::vector<int> test_cases = {1, 3, 426, 21, 64, 256, 100000, INT32_MAX};\n  int num_elements = 1000;\n  int repetitions = 1000;\n  srand(9);\n\n  for (auto alphabet_size : test_cases) {\n    for (int i = 0; i < repetitions; i++) {\n      std::vector<int> input(num_elements);\n      std::generate(input.begin(), input.end(),\n        [=]() { return rand() % alphabet_size; });\n      CompressedBufferWriter cbw(alphabet_size);\n      thrust::device_vector<int> input_d(input);\n\n      thrust::device_vector<unsigned char> buffer_d(\n        CompressedBufferWriter::CalculateBufferSize(input.size(),\n          alphabet_size));\n\n      // write the data on device\n      auto input_data_d = input_d.data().get();\n      auto buffer_data_d = buffer_d.data().get();\n      dh::LaunchN(input_d.size(),\n                  WriteSymbolFunction(cbw, buffer_data_d, input_data_d));\n\n      // read the data on device\n      CompressedIterator<int> ci(buffer_d.data().get(), alphabet_size);\n      thrust::device_vector<int> output_d(input.size());\n      auto output_data_d = output_d.data().get();\n      dh::LaunchN(output_d.size(), ReadSymbolFunction(ci, output_data_d));\n\n      std::vector<int> output(output_d.size());\n      thrust::copy(output_d.begin(), output_d.end(), output.begin());\n\n      ASSERT_TRUE(input == output);\n    }\n  }\n}\n\nnamespace {\nclass TestDoubleCompressedIter : public ::testing::TestWithParam<std::size_t> {\n public:\n  constexpr std::size_t static CompressedBytes() { return 28; }\n\n private:\n  dh::DeviceUVector<std::int32_t> input_;\n  Context ctx_{MakeCUDACtx(0)};\n  std::size_t n_symbols_{11};\n\n  void SetUp() override {\n    input_.resize(n_symbols_ * 3);\n    auto policy = ctx_.CUDACtx()->CTP();\n    for (std::size_t i = 0; i < 3; ++i) {\n      auto beg = input_.begin() + n_symbols_ * i;\n      auto end = beg + n_symbols_;\n      thrust::sequence(policy, beg, end, 0);\n    }\n  }\n\n public:\n  void Run(std::size_t n0_bytes) const {\n    auto policy = ctx_.CUDACtx()->CTP();\n\n    auto compressed_nbytes = CompressedBufferWriter::CalculateBufferSize(input_.size(), n_symbols_);\n    ASSERT_EQ(compressed_nbytes, CompressedBytes());\n\n    dh::device_vector<CompressedByteT> buf(compressed_nbytes, 0);\n    CompressedBufferWriter cbw(n_symbols_);\n    dh::LaunchN(input_.size(), ctx_.CUDACtx()->Stream(),\n                WriteSymbolFunction{cbw, buf.data().get(), input_.data()});\n\n    dh::device_vector<CompressedByteT> buf0(n0_bytes);\n    dh::device_vector<CompressedByteT> buf1(compressed_nbytes - buf0.size());\n    thrust::copy_n(policy, buf.begin(), buf0.size(), buf0.begin());\n    thrust::copy_n(policy, buf.begin() + buf0.size(), buf1.size(), buf1.begin());\n\n    HostDeviceVector<std::int32_t> output(input_.size(), 0, ctx_.Device());\n    auto it = DoubleCompressedIter<std::uint32_t>{buf0.data().get(), buf0.size(), buf1.data().get(),\n                                                  n_symbols_};\n    auto d_out = output.DeviceSpan();\n    dh::LaunchN(input_.size(), ctx_.CUDACtx()->Stream(),\n                [=] __device__(std::size_t i) { d_out[i] = it[i]; });\n    auto h_out = output.ConstHostVector();\n    for (std::size_t i = 0; i < 3; ++i) {\n      auto beg = h_out.begin() + n_symbols_ * i;\n      auto end = beg + n_symbols_;\n      std::size_t k = 0;\n      for (auto it = beg; it != end; ++it) {\n        ASSERT_EQ(*it, k);\n        k++;\n      }\n    }\n  }\n};\n\ninline auto kCnBytes = TestDoubleCompressedIter::CompressedBytes();\n}  // namespace\n\nTEST_P(TestDoubleCompressedIter, Basic) {\n  auto n0_bytes = this->GetParam();\n  this->Run(n0_bytes);\n}\n\nINSTANTIATE_TEST_SUITE_P(Gpu, TestDoubleCompressedIter,\n                         ::testing::Values(0, kCnBytes, 1, kCnBytes - 1, kCnBytes / 2, kCnBytes / 3,\n                                           kCnBytes / 4, kCnBytes / 6, kCnBytes / 8,\n                                           kCnBytes / 12));\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/common/test_group_data.cc",
    "content": "/*!\n * Copyright 2019 by Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/data.h>\n#include \"../../../src/common/group_data.h\"\n\nnamespace xgboost {\nnamespace common {\n\nTEST(GroupData, ParallelGroupBuilder) {\n  std::vector<size_t> offsets;\n  std::vector<Entry> data;\n  ParallelGroupBuilder<Entry, size_t> builder(&offsets, &data);\n  builder.InitBudget(0, 1);\n  // Add two rows with two elements each\n  builder.AddBudget(0, 0, 2);\n  builder.AddBudget(1, 0, 2);\n\n  builder.InitStorage();\n  builder.Push(0, Entry(0, 0), 0);\n  builder.Push(0, Entry(1, 1), 0);\n  builder.Push(1, Entry(0, 2), 0);\n  builder.Push(1, Entry(1, 3), 0);\n\n  std::vector<Entry> expected_data{\n      Entry(0, 0),\n      Entry(1, 1),\n      Entry(0, 2),\n      Entry(1, 3),\n  };\n  std::vector<size_t> expected_offsets{0, 2, 4};\n\n  EXPECT_EQ(data, expected_data);\n  EXPECT_EQ(offsets, expected_offsets);\n\n  // Create new builder, add one more row given already populated offsets/data\n  ParallelGroupBuilder<Entry, size_t> builder2(&offsets, &data,\n                                               offsets.size() - 1);\n  builder2.InitBudget(0, 1);\n  builder2.AddBudget(2, 0, 2);\n  builder2.InitStorage();\n  builder2.Push(2, Entry(0, 4), 0);\n  builder2.Push(2, Entry(1, 5), 0);\n\n  expected_data.emplace_back(0, 4);\n  expected_data.emplace_back(1, 5);\n  expected_offsets.emplace_back(6);\n\n  EXPECT_EQ(data, expected_data);\n  EXPECT_EQ(offsets, expected_offsets);\n}\n\n}  // namespace common\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/common/test_hist_util.cc",
    "content": "/**\n * Copyright 2019-2025, XGBoost Contributors\n */\n#include \"test_hist_util.h\"\n\n#include <gtest/gtest.h>\n#include <xgboost/data.h>                // for ExtMemConfig\n#include <xgboost/host_device_vector.h>  // for HostDeviceVector\n\n#include <memory>  // for shared_ptr\n#include <string>\n#include <vector>\n\n#include \"../../../src/common/hist_util.h\"\n#include \"../../../src/data/gradient_index.h\"\n#include \"../helpers.h\"\n\nnamespace xgboost::common {\nvoid ParallelGHistBuilderReset() {\n  constexpr size_t kBins = 10;\n  constexpr size_t kNodes = 5;\n  constexpr size_t kNodesExtended = 10;\n  constexpr size_t kTasksPerNode = 10;\n  constexpr double kValue = 1.0;\n  const size_t nthreads = AllThreadsForTest();\n\n  HistCollection collection;\n  collection.Init(kBins);\n\n  for (size_t inode = 0; inode < kNodesExtended; inode++) {\n    collection.AddHistRow(inode);\n    collection.AllocateData(inode);\n  }\n  ParallelGHistBuilder hist_builder;\n  hist_builder.Init(kBins);\n  std::vector<GHistRow> target_hist(kNodes);\n  for (size_t i = 0; i < target_hist.size(); ++i) {\n    target_hist[i] = collection[i];\n  }\n\n  common::BlockedSpace2d space(kNodes, [&](size_t /* node*/) { return kTasksPerNode; }, 1);\n  hist_builder.Reset(nthreads, kNodes, space, target_hist);\n\n  common::ParallelFor2d(space, nthreads, [&](size_t inode, common::Range1d) {\n    const size_t tid = omp_get_thread_num();\n\n    GHistRow hist = hist_builder.GetInitializedHist(tid, inode);\n    // fill hist by some non-null values\n    for (size_t j = 0; j < kBins; ++j) {\n      hist[j].Add(kValue, kValue);\n    }\n  });\n\n  // reset and extend buffer\n  target_hist.resize(kNodesExtended);\n  for (size_t i = 0; i < target_hist.size(); ++i) {\n    target_hist[i] = collection[i];\n  }\n  common::BlockedSpace2d space2(kNodesExtended, [&](size_t /*node*/) { return kTasksPerNode; }, 1);\n  hist_builder.Reset(nthreads, kNodesExtended, space2, target_hist);\n\n  common::ParallelFor2d(space2, nthreads, [&](size_t inode, common::Range1d) {\n    const size_t tid = omp_get_thread_num();\n\n    GHistRow hist = hist_builder.GetInitializedHist(tid, inode);\n    // fill hist by some non-null values\n    for (size_t j = 0; j < kBins; ++j) {\n      ASSERT_EQ(0.0, hist[j].GetGrad());\n      ASSERT_EQ(0.0, hist[j].GetHess());\n    }\n  });\n}\n\nvoid ParallelGHistBuilderReduceHist() {\n  constexpr size_t kBins = 10;\n  constexpr size_t kNodes = 5;\n  constexpr size_t kTasksPerNode = 10;\n  constexpr double kValue = 1.0;\n  const size_t nthreads = AllThreadsForTest();\n\n  HistCollection collection;\n  collection.Init(kBins);\n\n  for (size_t inode = 0; inode < kNodes; inode++) {\n    collection.AddHistRow(inode);\n    collection.AllocateData(inode);\n  }\n  ParallelGHistBuilder hist_builder;\n  hist_builder.Init(kBins);\n  std::vector<GHistRow> target_hist(kNodes);\n  for (size_t i = 0; i < target_hist.size(); ++i) {\n    target_hist[i] = collection[i];\n  }\n\n  common::BlockedSpace2d space(kNodes, [&](size_t /*node*/) { return kTasksPerNode; }, 1);\n  hist_builder.Reset(nthreads, kNodes, space, target_hist);\n\n  // Simple analog of BuildHist function, works in parallel for both tree-nodes and data in node\n  common::ParallelFor2d(space, nthreads, [&](size_t inode, common::Range1d) {\n    const size_t tid = omp_get_thread_num();\n\n    GHistRow hist = hist_builder.GetInitializedHist(tid, inode);\n    for (size_t i = 0; i < kBins; ++i) {\n      hist[i].Add(kValue, kValue);\n    }\n  });\n\n  for (size_t inode = 0; inode < kNodes; inode++) {\n    hist_builder.ReduceHist(inode, 0, kBins);\n\n    // We had kTasksPerNode tasks to add kValue to each bin for each node\n    // So, after reducing we expect to have (kValue * kTasksPerNode) in each node\n    for (size_t i = 0; i < kBins; ++i) {\n      ASSERT_EQ(kValue * kTasksPerNode, collection[inode][i].GetGrad());\n      ASSERT_EQ(kValue * kTasksPerNode, collection[inode][i].GetHess());\n    }\n  }\n}\n\nTEST(ParallelGHistBuilder, Reset) { ParallelGHistBuilderReset(); }\n\nTEST(ParallelGHistBuilder, ReduceHist) { ParallelGHistBuilderReduceHist(); }\n\nTEST(HistUtil, DenseCutsCategorical) {\n  Context ctx;\n  int categorical_sizes[] = {2, 6, 8, 12};\n  int num_bins = 256;\n  int sizes[] = {25, 100, 1000};\n  for (auto n : sizes) {\n    for (auto num_categories : categorical_sizes) {\n      auto x = GenerateRandomCategoricalSingleColumn(n, num_categories);\n      std::vector<float> x_sorted(x);\n      std::sort(x_sorted.begin(), x_sorted.end());\n      auto dmat = GetDMatrixFromData(x, n, 1);\n      HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins);\n      auto cuts_from_sketch = cuts.Values();\n      EXPECT_GT(cuts_from_sketch.front(), x_sorted.front());\n      EXPECT_GE(cuts_from_sketch.back(), x_sorted.back());\n      EXPECT_EQ(cuts_from_sketch.size(), static_cast<size_t>(num_categories));\n    }\n  }\n}\n\nTEST(HistUtil, DenseCutsAccuracyTest) {\n  Context ctx;\n  int bin_sizes[] = {2, 16, 256, 512};\n  int sizes[] = {100};\n  int num_columns = 5;\n  for (auto num_rows : sizes) {\n    auto x = GenerateRandom(num_rows, num_columns);\n    auto dmat = GetDMatrixFromData(x, num_rows, num_columns);\n    for (auto num_bins : bin_sizes) {\n      HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins);\n      ValidateCuts(cuts, dmat.get(), num_bins);\n    }\n  }\n}\n\nTEST(HistUtil, DenseCutsAccuracyTestWeights) {\n  int bin_sizes[] = {2, 16, 256, 512};\n  int sizes[] = {100, 1000, 1500};\n  int num_columns = 5;\n  Context ctx;\n  for (auto num_rows : sizes) {\n    auto x = GenerateRandom(num_rows, num_columns);\n    auto dmat = GetDMatrixFromData(x, num_rows, num_columns);\n    auto w = GenerateRandomWeights(num_rows);\n    dmat->Info().weights_.HostVector() = w;\n    for (auto num_bins : bin_sizes) {\n      {\n        HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins, true);\n        ValidateCuts(cuts, dmat.get(), num_bins);\n      }\n      {\n        HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins, false);\n        ValidateCuts(cuts, dmat.get(), num_bins);\n      }\n    }\n  }\n}\n\nvoid TestQuantileWithHessian(bool use_sorted) {\n  int bin_sizes[] = {2, 16, 256, 512};\n  int sizes[] = {1000, 1500};\n  int num_columns = 5;\n  Context ctx;\n  for (auto num_rows : sizes) {\n    auto x = GenerateRandom(num_rows, num_columns);\n    auto dmat = GetDMatrixFromData(x, num_rows, num_columns);\n    auto w = GenerateRandomWeights(num_rows);\n    auto hessian = GenerateRandomWeights(num_rows);\n    std::mt19937 rng(0);\n    std::shuffle(hessian.begin(), hessian.end(), rng);\n    dmat->Info().weights_.HostVector() = w;\n\n    for (auto num_bins : bin_sizes) {\n      HistogramCuts cuts_hess = SketchOnDMatrix(&ctx, dmat.get(), num_bins, use_sorted, hessian);\n      for (size_t i = 0; i < w.size(); ++i) {\n        dmat->Info().weights_.HostVector()[i] = w[i] * hessian[i];\n      }\n      ValidateCuts(cuts_hess, dmat.get(), num_bins);\n\n      HistogramCuts cuts_wh = SketchOnDMatrix(&ctx, dmat.get(), num_bins, use_sorted);\n      ValidateCuts(cuts_wh, dmat.get(), num_bins);\n\n      ASSERT_EQ(cuts_hess.Values().size(), cuts_wh.Values().size());\n      for (size_t i = 0; i < cuts_hess.Values().size(); ++i) {\n        ASSERT_NEAR(cuts_wh.Values()[i], cuts_hess.Values()[i], kRtEps);\n      }\n\n      dmat->Info().weights_.HostVector() = w;\n    }\n  }\n}\n\nTEST(HistUtil, QuantileWithHessian) {\n  TestQuantileWithHessian(true);\n  TestQuantileWithHessian(false);\n}\n\nTEST(HistUtil, DenseCutsExternalMemory) {\n  int bin_sizes[] = {2, 16, 256, 512};\n  int sizes[] = {100, 1000, 1500};\n  int num_columns = 5;\n  Context ctx;\n  for (auto num_rows : sizes) {\n    HostDeviceVector<float> x{GenerateRandom(num_rows, num_columns)};\n    common::TemporaryDirectory tmpdir;\n    auto dmat = GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, tmpdir);\n    for (auto num_bins : bin_sizes) {\n      HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins);\n      ValidateCuts(cuts, dmat.get(), num_bins);\n    }\n  }\n}\n\nTEST(HistUtil, IndexBinBound) {\n  uint64_t bin_sizes[] = {static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()) + 1,\n                          static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 1,\n                          static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 2};\n  BinTypeSize expected_bin_type_sizes[] = {kUint8BinsTypeSize, kUint16BinsTypeSize,\n                                           kUint32BinsTypeSize};\n  size_t constexpr kRows = 100;\n  size_t constexpr kCols = 10;\n  Context ctx;\n  size_t bin_id = 0;\n  for (auto max_bin : bin_sizes) {\n    auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();\n\n    GHistIndexMatrix hmat(&ctx, p_fmat.get(), max_bin, 0.5, false);\n    EXPECT_EQ(hmat.index.Size(), kRows * kCols);\n    EXPECT_EQ(expected_bin_type_sizes[bin_id++], hmat.index.GetBinTypeSize());\n  }\n}\n\ntemplate <typename T>\nvoid CheckIndexData(T const* data_ptr, uint32_t const* offsets, const GHistIndexMatrix& hmat,\n                    size_t n_cols) {\n  for (size_t i = 0; i < hmat.index.Size(); ++i) {\n    EXPECT_EQ(data_ptr[i] + offsets[i % n_cols], hmat.index[i]);\n  }\n}\n\nTEST(HistUtil, IndexBinData) {\n  uint64_t constexpr kBinSizes[] = {\n      static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()) + 1,\n      static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 1,\n      static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 2};\n  size_t constexpr kRows = 100;\n  size_t constexpr kCols = 10;\n  Context ctx;\n\n  for (auto max_bin : kBinSizes) {\n    auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();\n    GHistIndexMatrix hmat(&ctx, p_fmat.get(), max_bin, 0.5, false);\n    uint32_t const* offsets = hmat.index.Offset();\n    EXPECT_EQ(hmat.index.Size(), kRows * kCols);\n    switch (max_bin) {\n      case kBinSizes[0]:\n        CheckIndexData(hmat.index.data<uint8_t>(), offsets, hmat, kCols);\n        break;\n      case kBinSizes[1]:\n        CheckIndexData(hmat.index.data<uint16_t>(), offsets, hmat, kCols);\n        break;\n      case kBinSizes[2]:\n        CheckIndexData(hmat.index.data<uint32_t>(), offsets, hmat, kCols);\n        break;\n    }\n  }\n}\n\nvoid TestSketchFromWeights(bool with_group) {\n  size_t constexpr kRows = 300, kCols = 20, kBins = 256;\n  size_t constexpr kGroups = 10;\n  auto m = RandomDataGenerator{kRows, kCols, 0}.Device(DeviceOrd::CUDA(0)).GenerateDMatrix();\n  Context ctx;\n  common::HistogramCuts cuts = SketchOnDMatrix(&ctx, m.get(), kBins);\n\n  MetaInfo info;\n  auto& h_weights = info.weights_.HostVector();\n  if (with_group) {\n    h_weights.resize(kGroups);\n  } else {\n    h_weights.resize(kRows);\n  }\n  std::fill(h_weights.begin(), h_weights.end(), 1.0f);\n\n  std::vector<bst_group_t> groups(kGroups);\n  if (with_group) {\n    for (size_t i = 0; i < kGroups; ++i) {\n      groups[i] = kRows / kGroups;\n    }\n    auto sg = linalg::Make1dInterface(groups.data(), kGroups);\n    info.SetInfo(ctx, \"group\", sg.c_str());\n  }\n\n  info.num_row_ = kRows;\n  info.num_col_ = kCols;\n\n  // Assign weights.\n  if (with_group) {\n    m->SetInfo(\"group\", Make1dInterfaceTest(groups.data(), kGroups));\n  }\n\n  m->SetInfo(\"weight\", Make1dInterfaceTest(h_weights.data(), h_weights.size()));\n  m->Info().num_col_ = kCols;\n  m->Info().num_row_ = kRows;\n  ASSERT_EQ(cuts.Ptrs().size(), kCols + 1);\n  ValidateCuts(cuts, m.get(), kBins);\n\n  if (with_group) {\n    m->Info().weights_ = decltype(m->Info().weights_)();  // remove weight\n    HistogramCuts non_weighted = SketchOnDMatrix(&ctx, m.get(), kBins);\n    for (size_t i = 0; i < cuts.Values().size(); ++i) {\n      EXPECT_EQ(cuts.Values()[i], non_weighted.Values()[i]);\n    }\n    for (size_t i = 0; i < cuts.Ptrs().size(); ++i) {\n      ASSERT_EQ(cuts.Ptrs().at(i), non_weighted.Ptrs().at(i));\n    }\n  }\n\n  if (with_group) {\n    std::vector<float> group_weights(kGroups);\n    // Generate different weight.\n    for (size_t i = 0; i < group_weights.size(); ++i) {\n      group_weights[i] = static_cast<float>(i + 1) / static_cast<float>(kGroups);\n    }\n    m->SetInfo(\"weight\", Make1dInterfaceTest(group_weights.data(), group_weights.size()));\n    HistogramCuts weighted = SketchOnDMatrix(&ctx, m.get(), kBins);\n    ValidateCuts(weighted, m.get(), kBins);\n  }\n}\n\nTEST(HistUtil, SketchFromWeights) {\n  TestSketchFromWeights(true);\n  TestSketchFromWeights(false);\n}\n\nTEST(HistUtil, UnrollGroupWeights) {\n  MetaInfo info;\n  info.num_row_ = 6;\n  info.group_ptr_ = {0, 2, 3, 6};\n  info.weights_.HostVector() = {1.0f, 5.0f, 9.0f};\n\n  std::vector<float> expected{1.0f, 1.0f, 5.0f, 9.0f, 9.0f, 9.0f};\n  ASSERT_EQ(detail::UnrollGroupWeights(info), expected);\n}\n\nnamespace {\nvoid TestGroupWeightsEquivalentToRowWeights(bool use_sorted) {\n  Context ctx;\n  std::vector<float> x{\n      0.0f, 5.0f, 1.0f, 4.0f, 2.0f, 3.0f, 3.0f, 2.0f, 4.0f, 1.0f, 5.0f, 0.0f,\n  };\n  auto grouped = GetDMatrixFromData(x, 6, 2);\n  auto per_row = GetDMatrixFromData(x, 6, 2);\n\n  std::vector<bst_group_t> group_sizes{1, 1, 4};\n  std::vector<float> group_weights{1.0f, 1000.0f, 1.0f};\n  std::vector<float> row_weights{1.0f, 1000.0f, 1.0f, 1.0f, 1.0f, 1.0f};\n\n  grouped->SetInfo(\"group\", Make1dInterfaceTest(group_sizes.data(), group_sizes.size()));\n  grouped->SetInfo(\"weight\", Make1dInterfaceTest(group_weights.data(), group_weights.size()));\n  per_row->SetInfo(\"weight\", Make1dInterfaceTest(row_weights.data(), row_weights.size()));\n\n  auto grouped_cuts = SketchOnDMatrix(&ctx, grouped.get(), 2, use_sorted);\n  auto per_row_cuts = SketchOnDMatrix(&ctx, per_row.get(), 2, use_sorted);\n\n  ASSERT_EQ(grouped_cuts.Ptrs().size(), per_row_cuts.Ptrs().size());\n  for (size_t i = 0; i < grouped_cuts.Ptrs().size(); ++i) {\n    ASSERT_EQ(grouped_cuts.Ptrs()[i], per_row_cuts.Ptrs()[i]);\n  }\n\n  ASSERT_EQ(grouped_cuts.Values().size(), per_row_cuts.Values().size());\n  for (size_t i = 0; i < grouped_cuts.Values().size(); ++i) {\n    ASSERT_FLOAT_EQ(grouped_cuts.Values()[i], per_row_cuts.Values()[i]);\n  }\n}\n}  // anonymous namespace\n\nTEST(HistUtil, GroupWeightsEquivalentToRowWeights) {\n  TestGroupWeightsEquivalentToRowWeights(true);\n  TestGroupWeightsEquivalentToRowWeights(false);\n}\n\nTEST(HistUtil, SketchCategoricalFeatures) {\n  Context ctx;\n  TestCategoricalSketch(1000, 256, 32, false, [&ctx](DMatrix* p_fmat, int32_t num_bins) {\n    return SketchOnDMatrix(&ctx, p_fmat, num_bins);\n  });\n  TestCategoricalSketch(1000, 256, 32, true, [&ctx](DMatrix* p_fmat, int32_t num_bins) {\n    return SketchOnDMatrix(&ctx, p_fmat, num_bins);\n  });\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/common/test_hist_util.cu",
    "content": "/**\n * Copyright 2019-2025, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <thrust/device_vector.h>\n#include <xgboost/base.h>  // for bst_bin_t\n#include <xgboost/c_api.h>\n#include <xgboost/data.h>\n\n#include <algorithm>  // for transform\n#include <cmath>      // for floor\n#include <cstddef>    // for size_t\n#include <limits>     // for numeric_limits\n#include <string>     // for string, to_string\n#include <tuple>      // for tuple, make_tuple\n#include <vector>     // for vector\n\n#include \"../../../include/xgboost/logging.h\"\n#include \"../../../src/common/cuda_context.cuh\"\n#include \"../../../src/common/cuda_rt_utils.h\"  // for SetDevice\n#include \"../../../src/common/device_helpers.cuh\"\n#include \"../../../src/common/hist_util.cuh\"\n#include \"../../../src/common/hist_util.h\"\n#include \"../../../src/data/device_adapter.cuh\"\n#include \"../../../src/data/simple_dmatrix.h\"\n#include \"../data/test_array_interface.h\"\n#include \"../filesystem.h\"  // for TemporaryDirectory\n#include \"../helpers.h\"\n#include \"test_hist_util.h\"\n\nnamespace xgboost::common {\n\ntemplate <typename AdapterT>\nHistogramCuts GetHostCuts(Context const* ctx, AdapterT* adapter, int num_bins, float missing) {\n  data::SimpleDMatrix dmat(adapter, missing, 1);\n  HistogramCuts cuts = SketchOnDMatrix(ctx, &dmat, num_bins);\n  return cuts;\n}\n\nTEST(HistUtil, DeviceSketch) {\n  auto ctx = MakeCUDACtx(0);\n  int num_columns = 1;\n  int num_bins = 4;\n  std::vector<float> x = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 7.0f, -1.0f};\n  int num_rows = x.size();\n  auto dmat = GetDMatrixFromData(x, num_rows, num_columns);\n\n  auto device_cuts = DeviceSketch(&ctx, dmat.get(), num_bins);\n\n  Context cpu_ctx;\n  HistogramCuts host_cuts = SketchOnDMatrix(&cpu_ctx, dmat.get(), num_bins);\n\n  EXPECT_EQ(device_cuts.Values(), host_cuts.Values());\n  EXPECT_EQ(device_cuts.Ptrs(), host_cuts.Ptrs());\n}\n\nTEST(HistUtil, SketchBatchNumElements) {\n#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1\n  GTEST_SKIP_(\"Test not runnable with RMM enabled.\");\n#endif  // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1\n  size_t constexpr kCols = 10000;\n  std::int32_t device = dh::CurrentDevice();\n  auto avail = static_cast<size_t>(dh::AvailableMemory(device) * 0.8);\n  auto per_elem = detail::BytesPerElement(false);\n  auto avail_elem = avail / per_elem;\n  size_t rows = avail_elem / kCols * 10;\n  auto shape = detail::SketchShape{rows, kCols, rows * kCols};\n  auto batch = detail::SketchBatchNumElements(detail::UnknownSketchNumElements(), shape, device,\n                                              256, false, 0);\n  ASSERT_EQ(batch, avail_elem);\n}\n\nTEST(HistUtil, DeviceSketchMemory) {\n  auto ctx = MakeCUDACtx(0);\n  int num_columns = 100;\n  int num_rows = 1000;\n  int num_bins = 256;\n  auto x = GenerateRandom(num_rows, num_columns);\n  auto dmat = GetDMatrixFromData(x, num_rows, num_columns);\n\n  dh::GlobalMemoryLogger().Clear();\n  ConsoleLogger::Configure({{\"verbosity\", \"3\"}});\n  auto device_cuts = DeviceSketch(&ctx, dmat.get(), num_bins);\n\n  size_t bytes_required =\n      detail::RequiredMemory(num_rows, num_columns, num_rows * num_columns, num_bins, false);\n  EXPECT_LE(dh::GlobalMemoryLogger().PeakMemory(), bytes_required * 1.05);\n  EXPECT_GE(dh::GlobalMemoryLogger().PeakMemory(), bytes_required * 0.95);\n  ConsoleLogger::Configure({{\"verbosity\", \"0\"}});\n}\n\nTEST(HistUtil, DeviceSketchWeightsMemory) {\n  auto ctx = MakeCUDACtx(0);\n  int num_columns = 100;\n  int num_rows = 1000;\n  int num_bins = 256;\n  auto x = GenerateRandom(num_rows, num_columns);\n  auto dmat = GetDMatrixFromData(x, num_rows, num_columns);\n  dmat->Info().weights_.HostVector() = GenerateRandomWeights(num_rows);\n\n  dh::GlobalMemoryLogger().Clear();\n  ConsoleLogger::Configure({{\"verbosity\", \"3\"}});\n  auto device_cuts = DeviceSketch(&ctx, dmat.get(), num_bins);\n  ConsoleLogger::Configure({{\"verbosity\", \"0\"}});\n\n  size_t bytes_required =\n      detail::RequiredMemory(num_rows, num_columns, num_rows * num_columns, num_bins, true);\n  EXPECT_LE(dh::GlobalMemoryLogger().PeakMemory(), bytes_required * 1.05);\n  EXPECT_GE(dh::GlobalMemoryLogger().PeakMemory(), bytes_required);\n}\n\nTEST(HistUtil, DeviceSketchDeterminism) {\n  auto ctx = MakeCUDACtx(0);\n  int num_rows = 500;\n  int num_columns = 5;\n  int num_bins = 256;\n  auto x = GenerateRandom(num_rows, num_columns);\n  auto dmat = GetDMatrixFromData(x, num_rows, num_columns);\n  auto reference_sketch = DeviceSketch(&ctx, dmat.get(), num_bins);\n  size_t constexpr kRounds{100};\n  for (size_t r = 0; r < kRounds; ++r) {\n    auto new_sketch = DeviceSketch(&ctx, dmat.get(), num_bins);\n    ASSERT_EQ(reference_sketch.Values(), new_sketch.Values());\n  }\n}\n\nTEST(HistUtil, DeviceSketchCategoricalAsNumeric) {\n  auto ctx = MakeCUDACtx(0);\n  auto categorical_sizes = {2, 6, 8, 12};\n  int num_bins = 256;\n  auto sizes = {25, 100, 1000};\n  for (auto n : sizes) {\n    for (auto num_categories : categorical_sizes) {\n      auto x = GenerateRandomCategoricalSingleColumn(n, num_categories);\n      auto dmat = GetDMatrixFromData(x, n, 1);\n      auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins);\n      ValidateCuts(cuts, dmat.get(), num_bins);\n    }\n  }\n}\n\nTEST(HistUtil, DeviceSketchCategoricalFeatures) {\n  auto ctx = MakeCUDACtx(0);\n  TestCategoricalSketch(1000, 256, 32, false, [ctx](DMatrix* p_fmat, int32_t num_bins) {\n    return DeviceSketch(&ctx, p_fmat, num_bins);\n  });\n  TestCategoricalSketch(1000, 256, 32, true, [ctx](DMatrix* p_fmat, int32_t num_bins) {\n    return DeviceSketch(&ctx, p_fmat, num_bins);\n  });\n}\n\nvoid TestMixedSketch() {\n  size_t n_samples = 1000, n_features = 2, n_categories = 3;\n  bst_bin_t n_bins = 64;\n\n  std::vector<float> data(n_samples * n_features);\n  SimpleLCG gen;\n  SimpleRealUniformDistribution<float> cat_d{0.0f, static_cast<float>(n_categories)};\n  SimpleRealUniformDistribution<float> num_d{0.0f, 3.0f};\n  for (size_t i = 0; i < n_samples * n_features; ++i) {\n    // two features, row major. The first column is numeric and the second is categorical.\n    if (i % 2 == 0) {\n      data[i] = std::floor(cat_d(&gen));\n    } else {\n      data[i] = num_d(&gen);\n    }\n  }\n\n  auto m = GetDMatrixFromData(data, n_samples, n_features);\n  m->Info().feature_types.HostVector().push_back(FeatureType::kCategorical);\n  m->Info().feature_types.HostVector().push_back(FeatureType::kNumerical);\n\n  auto ctx = MakeCUDACtx(0);\n  auto cuts = DeviceSketch(&ctx, m.get(), n_bins);\n  ASSERT_EQ(cuts.Values().size(), n_bins + n_categories);\n}\n\nTEST(HistUtil, DeviceSketchMixedFeatures) { TestMixedSketch(); }\n\nTEST(HistUtil, RemoveDuplicatedCategories) {\n  bst_idx_t n_samples = 512;\n  bst_feature_t n_features = 3;\n  bst_cat_t n_categories = 5;\n\n  auto ctx = MakeCUDACtx(0);\n  SimpleLCG rng;\n  SimpleRealUniformDistribution<float> cat_d{0.0f, static_cast<float>(n_categories)};\n\n  dh::device_vector<Entry> sorted_entries(n_samples * n_features);\n  for (std::size_t i = 0; i < n_samples; ++i) {\n    for (bst_feature_t j = 0; j < n_features; ++j) {\n      float fvalue{0.0f};\n      // The second column is categorical\n      if (j == 1) {\n        fvalue = std::floor(cat_d(&rng));\n      } else {\n        fvalue = i;\n      }\n      sorted_entries[i * n_features + j] = Entry{j, fvalue};\n    }\n  }\n\n  MetaInfo info;\n  info.num_col_ = n_features;\n  info.num_row_ = n_samples;\n  info.feature_types.HostVector() = std::vector<FeatureType>{\n      FeatureType::kNumerical, FeatureType::kCategorical, FeatureType::kNumerical};\n  ASSERT_EQ(info.feature_types.Size(), n_features);\n\n  HostDeviceVector<bst_idx_t> cuts_ptr{0, n_samples, n_samples * 2, n_samples * 3};\n  cuts_ptr.SetDevice(DeviceOrd::CUDA(0));\n\n  dh::device_vector<float> weight(n_samples * n_features, 0);\n  dh::Iota(dh::ToSpan(weight), ctx.CUDACtx()->Stream());\n\n  dh::caching_device_vector<bst_idx_t> columns_ptr(4);\n  for (std::size_t i = 0; i < columns_ptr.size(); ++i) {\n    columns_ptr[i] = i * n_samples;\n  }\n  // sort into column major\n  thrust::sort_by_key(sorted_entries.begin(), sorted_entries.end(), weight.begin(),\n                      detail::EntryCompareOp());\n\n  detail::RemoveDuplicatedCategories(&ctx, info, cuts_ptr.DeviceSpan(), &sorted_entries, &weight,\n                                     &columns_ptr);\n\n  auto const& h_cptr = cuts_ptr.ConstHostVector();\n  ASSERT_EQ(h_cptr.back(), n_samples * 2 + n_categories);\n  // check numerical\n  for (std::size_t i = 0; i < n_samples; ++i) {\n    ASSERT_EQ(weight[i], i * 3);\n  }\n  auto beg = n_samples + n_categories;\n  for (std::size_t i = 0; i < n_samples; ++i) {\n    ASSERT_EQ(weight[i + beg], i * 3 + 2);\n  }\n  // check categorical\n  beg = n_samples;\n  for (bst_cat_t i = 0; i < n_categories; ++i) {\n    // all from the second column\n    ASSERT_EQ(static_cast<bst_feature_t>(weight[i + beg]) % n_features, 1);\n  }\n}\n\nTEST(HistUtil, DeviceSketchMultipleColumns) {\n  auto ctx = MakeCUDACtx(0);\n  auto bin_sizes = {2, 16, 256, 512};\n  auto sizes = {100, 1000, 1500};\n  int num_columns = 5;\n  for (auto num_rows : sizes) {\n    auto x = GenerateRandom(num_rows, num_columns);\n    auto dmat = GetDMatrixFromData(x, num_rows, num_columns);\n    for (auto num_bins : bin_sizes) {\n      auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins);\n      ValidateCuts(cuts, dmat.get(), num_bins);\n    }\n  }\n}\n\nTEST(HistUtil, DeviceSketchMultipleColumnsWeights) {\n  auto ctx = MakeCUDACtx(0);\n  auto bin_sizes = {2, 16, 256, 512};\n  auto sizes = {100, 1000, 1500};\n  int num_columns = 5;\n  for (auto num_rows : sizes) {\n    auto x = GenerateRandom(num_rows, num_columns);\n    auto dmat = GetDMatrixFromData(x, num_rows, num_columns);\n    dmat->Info().weights_.HostVector() = GenerateRandomWeights(num_rows);\n    for (auto num_bins : bin_sizes) {\n      auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins);\n      ValidateCuts(cuts, dmat.get(), num_bins);\n    }\n  }\n}\n\nTEST(HistUitl, DeviceSketchWeights) {\n  auto ctx = MakeCUDACtx(0);\n  auto bin_sizes = {2, 16, 256, 512};\n  auto sizes = {100, 1000, 1500};\n  int num_columns = 5;\n  for (auto num_rows : sizes) {\n    auto x = GenerateRandom(num_rows, num_columns);\n    auto dmat = GetDMatrixFromData(x, num_rows, num_columns);\n    auto weighted_dmat = GetDMatrixFromData(x, num_rows, num_columns);\n    auto& h_weights = weighted_dmat->Info().weights_.HostVector();\n    h_weights.resize(num_rows);\n    std::fill(h_weights.begin(), h_weights.end(), 1.0f);\n    for (auto num_bins : bin_sizes) {\n      auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins);\n      auto wcuts = DeviceSketch(&ctx, weighted_dmat.get(), num_bins);\n      ASSERT_EQ(cuts.Ptrs(), wcuts.Ptrs());\n      ASSERT_EQ(cuts.Values(), wcuts.Values());\n      ValidateCuts(cuts, dmat.get(), num_bins);\n      ValidateCuts(wcuts, weighted_dmat.get(), num_bins);\n    }\n  }\n}\n\nTEST(HistUtil, DeviceSketchBatches) {\n  auto ctx = MakeCUDACtx(0);\n  int num_bins = 256;\n  int num_rows = 5000;\n  auto batch_sizes = {0, 100, 1500, 6000};\n  int num_columns = 5;\n  for (auto batch_size : batch_sizes) {\n    auto x = GenerateRandom(num_rows, num_columns);\n    auto dmat = GetDMatrixFromData(x, num_rows, num_columns);\n    auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins, batch_size);\n    ValidateCuts(cuts, dmat.get(), num_bins);\n  }\n\n  num_rows = 1000;\n  size_t batches = 16;\n  auto x = GenerateRandom(num_rows * batches, num_columns);\n  auto dmat = GetDMatrixFromData(x, num_rows * batches, num_columns);\n  auto cuts_with_batches = DeviceSketch(&ctx, dmat.get(), num_bins, num_rows);\n  auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins, 0);\n\n  auto const& cut_values_batched = cuts_with_batches.Values();\n  auto const& cut_values = cuts.Values();\n  CHECK_EQ(cut_values.size(), cut_values_batched.size());\n  for (size_t i = 0; i < cut_values.size(); ++i) {\n    ASSERT_NEAR(cut_values_batched[i], cut_values[i], 1e5);\n  }\n}\n\nTEST(HistUtil, DeviceSketchMultipleColumnsExternal) {\n  auto ctx = MakeCUDACtx(0);\n  auto bin_sizes = {2, 16, 256, 512};\n  auto sizes = {100, 1000, 1500};\n  int num_columns = 5;\n  for (auto num_rows : sizes) {\n    HostDeviceVector<float> x{GenerateRandom(num_rows, num_columns)};\n    common::TemporaryDirectory temp;\n    auto dmat = GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, temp);\n    for (auto num_bins : bin_sizes) {\n      auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins);\n      ValidateCuts(cuts, dmat.get(), num_bins);\n    }\n  }\n}\n\n// See https://github.com/dmlc/xgboost/issues/5866.\nTEST(HistUtil, DeviceSketchExternalMemoryWithWeights) {\n  auto ctx = MakeCUDACtx(0);\n  auto bin_sizes = {2, 16, 256, 512};\n  auto sizes = {100, 1000, 1500};\n  int num_columns = 5;\n  common::TemporaryDirectory temp;\n  for (auto num_rows : sizes) {\n    HostDeviceVector<float> x{GenerateRandom(num_rows, num_columns)};\n    auto dmat = GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, temp);\n    dmat->Info().weights_.HostVector() = GenerateRandomWeights(num_rows);\n    for (auto num_bins : bin_sizes) {\n      auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins);\n      ValidateCuts(cuts, dmat.get(), num_bins);\n    }\n  }\n}\n\ntemplate <typename Adapter>\nauto MakeUnweightedCutsForTest(Context const* ctx, Adapter adapter, int32_t num_bins, float missing,\n                               size_t batch_size = 0) {\n  HostDeviceVector<FeatureType> ft;\n  SketchContainer sketch_container(ft, num_bins, adapter.NumColumns(), DeviceOrd::CUDA(0));\n  MetaInfo info;\n  AdapterDeviceSketch(ctx, adapter.Value(), num_bins, info, missing, &sketch_container, batch_size);\n  return sketch_container.MakeCuts(ctx, info.IsColumnSplit());\n}\n\ntemplate <typename Adapter>\nvoid ValidateBatchedCuts(Context const* ctx, Adapter adapter, int num_bins, DMatrix* dmat,\n                         size_t batch_size = 0) {\n  common::HistogramCuts batched_cuts = MakeUnweightedCutsForTest(\n      ctx, adapter, num_bins, std::numeric_limits<float>::quiet_NaN(), batch_size);\n  ValidateCuts(batched_cuts, dmat, num_bins);\n}\n\nTEST(HistUtil, AdapterDeviceSketch) {\n  auto ctx = MakeCUDACtx(0);\n  int rows = 5;\n  int cols = 1;\n  int num_bins = 4;\n  float missing = -1.0;\n  thrust::device_vector<float> data(rows * cols);\n  auto json_array_interface = Generate2dArrayInterface(rows, cols, \"<f4\", &data);\n  data = std::vector<float>{1.0, 2.0, 3.0, 4.0, 5.0};\n  std::string str;\n  Json::Dump(json_array_interface, &str);\n\n  data::CupyAdapter adapter(str);\n\n  auto device_cuts = MakeUnweightedCutsForTest(&ctx, adapter, num_bins, missing);\n  ctx = ctx.MakeCPU();\n  auto host_cuts = GetHostCuts(&ctx, &adapter, num_bins, missing);\n\n  EXPECT_EQ(device_cuts.Values(), host_cuts.Values());\n  EXPECT_EQ(device_cuts.Ptrs(), host_cuts.Ptrs());\n}\n\nTEST(HistUtil, AdapterDeviceSketchMemory) {\n  auto ctx = MakeCUDACtx(0);\n  int num_columns = 100;\n  int num_rows = 1000;\n  int num_bins = 256;\n  auto x = GenerateRandom(num_rows, num_columns);\n  auto x_device = thrust::device_vector<float>(x);\n  auto adapter = AdapterFromData(x_device, num_rows, num_columns);\n\n  dh::GlobalMemoryLogger().Clear();\n  ConsoleLogger::Configure({{\"verbosity\", \"3\"}});\n  auto cuts =\n      MakeUnweightedCutsForTest(&ctx, adapter, num_bins, std::numeric_limits<float>::quiet_NaN());\n  ConsoleLogger::Configure({{\"verbosity\", \"0\"}});\n  size_t bytes_required =\n      detail::RequiredMemory(num_rows, num_columns, num_rows * num_columns, num_bins, false);\n  EXPECT_LE(dh::GlobalMemoryLogger().PeakMemory(), bytes_required * 1.05);\n  EXPECT_GE(dh::GlobalMemoryLogger().PeakMemory(), bytes_required * 0.95);\n}\n\nTEST(HistUtil, AdapterSketchSlidingWindowMemory) {\n  auto ctx = MakeCUDACtx(0);\n  int num_columns = 100;\n  int num_rows = 1000;\n  int num_bins = 256;\n  auto x = GenerateRandom(num_rows, num_columns);\n  auto x_device = thrust::device_vector<float>(x);\n  auto adapter = AdapterFromData(x_device, num_rows, num_columns);\n  MetaInfo info;\n\n  dh::GlobalMemoryLogger().Clear();\n  ConsoleLogger::Configure({{\"verbosity\", \"3\"}});\n  HostDeviceVector<FeatureType> ft;\n  SketchContainer sketch_container(ft, num_bins, num_columns, DeviceOrd::CUDA(0));\n  AdapterDeviceSketch(&ctx, adapter.Value(), num_bins, info,\n                      std::numeric_limits<float>::quiet_NaN(), &sketch_container);\n  [[maybe_unused]] auto cuts = sketch_container.MakeCuts(&ctx, info.IsColumnSplit());\n  size_t bytes_required =\n      detail::RequiredMemory(num_rows, num_columns, num_rows * num_columns, num_bins, false);\n  EXPECT_LE(dh::GlobalMemoryLogger().PeakMemory(), bytes_required * 1.05);\n  EXPECT_GE(dh::GlobalMemoryLogger().PeakMemory(), bytes_required * 0.95);\n  ConsoleLogger::Configure({{\"verbosity\", \"0\"}});\n}\n\nTEST(HistUtil, AdapterSketchSlidingWindowWeightedMemory) {\n  auto ctx = MakeCUDACtx(0);\n  int num_columns = 100;\n  int num_rows = 1000;\n  int num_bins = 256;\n  auto x = GenerateRandom(num_rows, num_columns);\n  auto x_device = thrust::device_vector<float>(x);\n  auto adapter = AdapterFromData(x_device, num_rows, num_columns);\n  MetaInfo info;\n  auto& h_weights = info.weights_.HostVector();\n  h_weights.resize(num_rows);\n  std::fill(h_weights.begin(), h_weights.end(), 1.0f);\n\n  dh::GlobalMemoryLogger().Clear();\n  ConsoleLogger::Configure({{\"verbosity\", \"3\"}});\n  HostDeviceVector<FeatureType> ft;\n  SketchContainer sketch_container(ft, num_bins, num_columns, DeviceOrd::CUDA(0));\n  AdapterDeviceSketch(&ctx, adapter.Value(), num_bins, info,\n                      std::numeric_limits<float>::quiet_NaN(), &sketch_container);\n\n  [[maybe_unused]] auto cuts = sketch_container.MakeCuts(&ctx, info.IsColumnSplit());\n  ConsoleLogger::Configure({{\"verbosity\", \"0\"}});\n  size_t bytes_required =\n      detail::RequiredMemory(num_rows, num_columns, num_rows * num_columns, num_bins, true);\n  EXPECT_LE(dh::GlobalMemoryLogger().PeakMemory(), bytes_required * 1.05);\n  EXPECT_GE(dh::GlobalMemoryLogger().PeakMemory(), bytes_required);\n}\n\nvoid TestCategoricalSketchAdapter(size_t n, size_t num_categories, int32_t num_bins,\n                                  bool weighted) {\n  auto ctx = MakeCUDACtx(0);\n  auto h_x = GenerateRandomCategoricalSingleColumn(n, num_categories);\n  thrust::device_vector<float> x(h_x);\n  auto adapter = AdapterFromData(x, n, 1);\n  MetaInfo info;\n  info.num_row_ = n;\n  info.num_col_ = 1;\n  info.feature_types.HostVector().push_back(FeatureType::kCategorical);\n\n  if (weighted) {\n    std::vector<float> weights(n, 0);\n    SimpleLCG lcg;\n    SimpleRealUniformDistribution<float> dist(0, 1);\n    for (auto& v : weights) {\n      v = dist(&lcg);\n    }\n    info.weights_.HostVector() = weights;\n  }\n\n  ASSERT_EQ(info.feature_types.Size(), 1);\n  SketchContainer container(info.feature_types, num_bins, 1, DeviceOrd::CUDA(0));\n  AdapterDeviceSketch(&ctx, adapter.Value(), num_bins, info,\n                      std::numeric_limits<float>::quiet_NaN(), &container);\n  auto cuts = container.MakeCuts(&ctx, info.IsColumnSplit());\n\n  thrust::sort(x.begin(), x.end());\n  auto n_uniques = thrust::unique(x.begin(), x.end()) - x.begin();\n  ASSERT_NE(n_uniques, x.size());\n  ASSERT_EQ(cuts.TotalBins(), n_uniques);\n  ASSERT_EQ(n_uniques, num_categories);\n\n  auto& values = cuts.cut_values_.HostVector();\n  ASSERT_TRUE(std::is_sorted(values.cbegin(), values.cend()));\n  auto is_unique = (std::unique(values.begin(), values.end()) - values.begin()) == n_uniques;\n  ASSERT_TRUE(is_unique);\n\n  x.resize(n_uniques);\n  h_x.resize(n_uniques);\n  thrust::copy(x.begin(), x.end(), h_x.begin());\n  for (decltype(n_uniques) i = 0; i < n_uniques; ++i) {\n    ASSERT_EQ(h_x[i], values[i]);\n  }\n}\n\nTEST(HistUtil, AdapterDeviceSketchCategorical) {\n  auto categorical_sizes = {2, 6, 8, 12};\n  int num_bins = 256;\n  auto ctx = MakeCUDACtx(0);\n  auto sizes = {25, 100, 1000};\n  for (auto n : sizes) {\n    for (auto num_categories : categorical_sizes) {\n      auto x = GenerateRandomCategoricalSingleColumn(n, num_categories);\n      auto dmat = GetDMatrixFromData(x, n, 1);\n      auto x_device = thrust::device_vector<float>(x);\n      auto adapter = AdapterFromData(x_device, n, 1);\n      ValidateBatchedCuts(&ctx, adapter, num_bins, dmat.get());\n      TestCategoricalSketchAdapter(n, num_categories, num_bins, true);\n      TestCategoricalSketchAdapter(n, num_categories, num_bins, false);\n    }\n  }\n}\n\nTEST(HistUtil, AdapterDeviceSketchMultipleColumns) {\n  auto bin_sizes = {2, 16, 256, 512};\n  auto sizes = {100, 1000, 1500};\n  int num_columns = 5;\n  auto ctx = MakeCUDACtx(0);\n  for (auto num_rows : sizes) {\n    auto x = GenerateRandom(num_rows, num_columns);\n    auto dmat = GetDMatrixFromData(x, num_rows, num_columns);\n    auto x_device = thrust::device_vector<float>(x);\n    for (auto num_bins : bin_sizes) {\n      auto adapter = AdapterFromData(x_device, num_rows, num_columns);\n      ValidateBatchedCuts(&ctx, adapter, num_bins, dmat.get());\n    }\n  }\n}\n\nTEST(HistUtil, AdapterDeviceSketchBatches) {\n  int num_bins = 256;\n  int num_rows = 5000;\n  auto batch_sizes = {0, 100, 1500, 6000};\n  int num_columns = 5;\n  auto ctx = MakeCUDACtx(0);\n  for (auto batch_size : batch_sizes) {\n    auto x = GenerateRandom(num_rows, num_columns);\n    auto dmat = GetDMatrixFromData(x, num_rows, num_columns);\n    auto x_device = thrust::device_vector<float>(x);\n    auto adapter = AdapterFromData(x_device, num_rows, num_columns);\n    ValidateBatchedCuts(&ctx, adapter, num_bins, dmat.get(), batch_size);\n  }\n}\n\nnamespace {\nauto MakeData(Context const* ctx, std::size_t n_samples, bst_feature_t n_features) {\n  curt::SetDevice(ctx->Ordinal());\n  auto n = n_samples * n_features;\n  std::vector<float> x;\n  x.resize(n);\n\n  std::iota(x.begin(), x.end(), 0);\n  std::int32_t c{0};\n  float missing = n_samples * n_features;\n  for (std::size_t i = 0; i < x.size(); ++i) {\n    if (i % 5 == 0) {\n      x[i] = missing;\n      c++;\n    }\n  }\n  thrust::device_vector<float> d_x;\n  d_x = x;\n\n  auto n_invalids = n / 10 * 2 + 1;\n  auto is_valid = data::IsValidFunctor{missing};\n  return std::tuple{x, d_x, n_invalids, is_valid};\n}\n\nvoid TestGetColumnSize(std::size_t n_samples) {\n  auto ctx = MakeCUDACtx(0);\n  bst_feature_t n_features = 12;\n  [[maybe_unused]] auto [x, d_x, n_invalids, is_valid] = MakeData(&ctx, n_samples, n_features);\n\n  auto adapter = AdapterFromData(d_x, n_samples, n_features);\n  auto batch = adapter.Value();\n\n  auto batch_iter = dh::MakeTransformIterator<data::COOTuple>(\n      thrust::make_counting_iterator(0llu),\n      [=] __device__(std::size_t idx) { return batch.GetElement(idx); });\n\n  dh::caching_device_vector<std::size_t> column_sizes_scan;\n  column_sizes_scan.resize(n_features + 1);\n  std::vector<std::size_t> h_column_size(column_sizes_scan.size());\n  std::vector<std::size_t> h_column_size_1(column_sizes_scan.size());\n\n  auto cuctx = ctx.CUDACtx();\n  detail::LaunchGetColumnSizeKernel<decltype(batch_iter), true, true>(\n      cuctx, ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid,\n      dh::ToSpan(column_sizes_scan));\n  thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size.begin());\n\n  detail::LaunchGetColumnSizeKernel<decltype(batch_iter), true, false>(\n      cuctx, ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid,\n      dh::ToSpan(column_sizes_scan));\n  thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size_1.begin());\n  ASSERT_EQ(h_column_size, h_column_size_1);\n\n  detail::LaunchGetColumnSizeKernel<decltype(batch_iter), false, true>(\n      cuctx, ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid,\n      dh::ToSpan(column_sizes_scan));\n  thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size_1.begin());\n  ASSERT_EQ(h_column_size, h_column_size_1);\n\n  detail::LaunchGetColumnSizeKernel<decltype(batch_iter), false, false>(\n      cuctx, ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid,\n      dh::ToSpan(column_sizes_scan));\n  thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size_1.begin());\n  ASSERT_EQ(h_column_size, h_column_size_1);\n}\n}  // namespace\n\nTEST(HistUtil, GetColumnSize) {\n  bst_idx_t n_samples = 4096;\n  TestGetColumnSize(n_samples);\n}\n\n// Check sketching from adapter or DMatrix results in the same answer\n// Consistency here is useful for testing and user experience\nTEST(HistUtil, SketchingEquivalent) {\n  auto ctx = MakeCUDACtx(0);\n  auto bin_sizes = {2, 16, 256, 512};\n  auto sizes = {100, 1000, 1500};\n  int num_columns = 5;\n  for (auto num_rows : sizes) {\n    auto x = GenerateRandom(num_rows, num_columns);\n    auto dmat = GetDMatrixFromData(x, num_rows, num_columns);\n    for (auto num_bins : bin_sizes) {\n      auto dmat_cuts = DeviceSketch(&ctx, dmat.get(), num_bins);\n      auto x_device = thrust::device_vector<float>(x);\n      auto adapter = AdapterFromData(x_device, num_rows, num_columns);\n      common::HistogramCuts adapter_cuts = MakeUnweightedCutsForTest(\n          &ctx, adapter, num_bins, std::numeric_limits<float>::quiet_NaN());\n      EXPECT_EQ(dmat_cuts.Values(), adapter_cuts.Values());\n      EXPECT_EQ(dmat_cuts.Ptrs(), adapter_cuts.Ptrs());\n\n      ValidateBatchedCuts(&ctx, adapter, num_bins, dmat.get());\n    }\n  }\n}\n\nTEST(HistUtil, DeviceSketchFromGroupWeights) {\n  auto ctx = MakeCUDACtx(0);\n  size_t constexpr kRows = 3000, kCols = 200, kBins = 256;\n  size_t constexpr kGroups = 10;\n  auto m = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();\n\n  // sketch with group weight\n  auto& h_weights = m->Info().weights_.HostVector();\n  h_weights.resize(kGroups);\n  std::fill(h_weights.begin(), h_weights.end(), 1.0f);\n  std::vector<bst_group_t> groups(kGroups);\n  for (size_t i = 0; i < kGroups; ++i) {\n    groups[i] = kRows / kGroups;\n  }\n  m->SetInfo(\"group\", Make1dInterfaceTest(groups.data(), kGroups));\n  HistogramCuts weighted_cuts = DeviceSketch(&ctx, m.get(), kBins, 0);\n\n  // sketch with no weight\n  h_weights.clear();\n  HistogramCuts cuts = DeviceSketch(&ctx, m.get(), kBins, 0);\n\n  ASSERT_EQ(cuts.Values().size(), weighted_cuts.Values().size());\n  ASSERT_EQ(cuts.Ptrs().size(), weighted_cuts.Ptrs().size());\n\n  for (size_t i = 0; i < cuts.Values().size(); ++i) {\n    EXPECT_EQ(cuts.Values()[i], weighted_cuts.Values()[i]) << \"i:\" << i;\n  }\n  for (size_t i = 0; i < cuts.Ptrs().size(); ++i) {\n    ASSERT_EQ(cuts.Ptrs().at(i), weighted_cuts.Ptrs().at(i));\n  }\n  ValidateCuts(weighted_cuts, m.get(), kBins);\n}\n\nvoid TestAdapterSketchFromWeights(bool with_group) {\n  size_t constexpr kRows = 300, kCols = 20, kBins = 256;\n  size_t constexpr kGroups = 10;\n  HostDeviceVector<float> storage;\n  std::string m = RandomDataGenerator{kRows, kCols, 0}\n                      .Device(DeviceOrd::CUDA(0))\n                      .GenerateArrayInterface(&storage);\n  MetaInfo info;\n  auto ctx = MakeCUDACtx(0);\n  auto& h_weights = info.weights_.HostVector();\n  if (with_group) {\n    h_weights.resize(kGroups);\n  } else {\n    h_weights.resize(kRows);\n  }\n  std::fill(h_weights.begin(), h_weights.end(), 1.0f);\n\n  std::vector<bst_group_t> groups(kGroups);\n  if (with_group) {\n    for (size_t i = 0; i < kGroups; ++i) {\n      groups[i] = kRows / kGroups;\n    }\n    info.SetInfo(ctx, \"group\", Make1dInterfaceTest(groups.data(), kGroups));\n  }\n\n  info.weights_.SetDevice(DeviceOrd::CUDA(0));\n  info.num_row_ = kRows;\n  info.num_col_ = kCols;\n\n  data::CupyAdapter adapter(m);\n  auto const& batch = adapter.Value();\n  HostDeviceVector<FeatureType> ft;\n  SketchContainer sketch_container(ft, kBins, kCols, DeviceOrd::CUDA(0));\n  AdapterDeviceSketch(&ctx, adapter.Value(), kBins, info, std::numeric_limits<float>::quiet_NaN(),\n                      &sketch_container);\n\n  auto cuts = sketch_container.MakeCuts(&ctx, info.IsColumnSplit());\n\n  auto dmat = GetDMatrixFromData(storage.HostVector(), kRows, kCols);\n  if (with_group) {\n    dmat->Info().SetInfo(ctx, \"group\", Make1dInterfaceTest(groups.data(), kGroups));\n  }\n\n  dmat->Info().SetInfo(ctx, \"weight\", Make1dInterfaceTest(h_weights.data(), h_weights.size()));\n  dmat->Info().num_col_ = kCols;\n  dmat->Info().num_row_ = kRows;\n  ASSERT_EQ(cuts.Ptrs().size(), kCols + 1);\n  ValidateCuts(cuts, dmat.get(), kBins);\n\n  if (with_group) {\n    dmat->Info().weights_ = decltype(dmat->Info().weights_)();  // remove weight\n    HistogramCuts non_weighted = DeviceSketch(&ctx, dmat.get(), kBins, 0);\n    for (size_t i = 0; i < cuts.Values().size(); ++i) {\n      ASSERT_EQ(cuts.Values()[i], non_weighted.Values()[i]);\n    }\n    for (size_t i = 0; i < cuts.Ptrs().size(); ++i) {\n      ASSERT_EQ(cuts.Ptrs().at(i), non_weighted.Ptrs().at(i));\n    }\n  }\n\n  if (with_group) {\n    common::HistogramCuts weighted{0};\n    auto& h_weights = info.weights_.HostVector();\n    h_weights.resize(kGroups);\n    // Generate different weight.\n    for (size_t i = 0; i < h_weights.size(); ++i) {\n      // FIXME(jiamingy): Some entries generated GPU test cannot pass the validate cuts if\n      // we use more diverse weights, partially caused by\n      // https://github.com/dmlc/xgboost/issues/7946\n      h_weights[i] = (i % 2 == 0 ? 1 : 2) / static_cast<float>(kGroups);\n    }\n    SketchContainer sketch_container{ft, kBins, kCols, DeviceOrd::CUDA(0)};\n    AdapterDeviceSketch(&ctx, adapter.Value(), kBins, info, std::numeric_limits<float>::quiet_NaN(),\n                        &sketch_container);\n    weighted = sketch_container.MakeCuts(&ctx, info.IsColumnSplit());\n    ValidateCuts(weighted, dmat.get(), kBins);\n  }\n}\n\nTEST(HistUtil, AdapterSketchFromWeights) {\n  TestAdapterSketchFromWeights(false);\n  TestAdapterSketchFromWeights(true);\n}\n\nnamespace {\nclass DeviceSketchWithHessianTest\n    : public ::testing::TestWithParam<std::tuple<bool, bst_idx_t, bst_bin_t>> {\n  bst_feature_t n_features_ = 5;\n  bst_group_t n_groups_{3};\n\n  auto GenerateHessian(Context const* ctx, bst_idx_t n_samples) const {\n    HostDeviceVector<float> hessian;\n    auto& h_hess = hessian.HostVector();\n    h_hess = GenerateRandomWeights(n_samples);\n    std::mt19937 rng(0);\n    std::shuffle(h_hess.begin(), h_hess.end(), rng);\n    hessian.SetDevice(ctx->Device());\n    return hessian;\n  }\n\n  void CheckReg(Context const* ctx, std::shared_ptr<DMatrix> p_fmat, bst_bin_t n_bins,\n                HostDeviceVector<float> const& hessian, std::vector<float> const& w,\n                std::size_t n_elements) const {\n    auto const& h_hess = hessian.ConstHostVector();\n    {\n      auto& h_weight = p_fmat->Info().weights_.HostVector();\n      h_weight = w;\n    }\n\n    HistogramCuts cuts_hess =\n        DeviceSketchWithHessian(ctx, p_fmat.get(), n_bins, hessian.ConstDeviceSpan(), n_elements);\n    ValidateCuts(cuts_hess, p_fmat.get(), n_bins);\n\n    // merge hessian\n    {\n      auto& h_weight = p_fmat->Info().weights_.HostVector();\n      ASSERT_EQ(h_weight.size(), h_hess.size());\n      for (std::size_t i = 0; i < h_weight.size(); ++i) {\n        h_weight[i] = w[i] * h_hess[i];\n      }\n    }\n\n    HistogramCuts cuts_wh = DeviceSketch(ctx, p_fmat.get(), n_bins, n_elements);\n    ValidateCuts(cuts_wh, p_fmat.get(), n_bins);\n    ASSERT_EQ(cuts_hess.Values().size(), cuts_wh.Values().size());\n    for (std::size_t i = 0; i < cuts_hess.Values().size(); ++i) {\n      ASSERT_NEAR(cuts_wh.Values()[i], cuts_hess.Values()[i], kRtEps);\n    }\n\n    p_fmat->Info().weights_.HostVector() = w;\n  }\n\n protected:\n  Context ctx_ = MakeCUDACtx(0);\n\n  void TestLTR(Context const* ctx, bst_idx_t n_samples, bst_bin_t n_bins,\n               std::size_t n_elements) const {\n    auto x = GenerateRandom(n_samples, n_features_);\n\n    std::vector<bst_group_t> gptr;\n    gptr.resize(n_groups_ + 1, 0);\n    gptr[1] = n_samples / n_groups_;\n    gptr[2] = n_samples / n_groups_ + gptr[1];\n    gptr.back() = n_samples;\n\n    auto hessian = this->GenerateHessian(ctx, n_samples);\n    auto const& h_hess = hessian.ConstHostVector();\n    auto p_fmat = GetDMatrixFromData(x, n_samples, n_features_);\n    p_fmat->Info().group_ptr_ = gptr;\n\n    // test with constant group weight\n    std::vector<float> w(n_groups_, 1.0f);\n    p_fmat->Info().weights_.HostVector() = w;\n    HistogramCuts cuts_hess =\n        DeviceSketchWithHessian(ctx, p_fmat.get(), n_bins, hessian.ConstDeviceSpan(), n_elements);\n    // make validation easier by converting it into sample weight.\n    p_fmat->Info().weights_.HostVector() = h_hess;\n    p_fmat->Info().group_ptr_.clear();\n    ValidateCuts(cuts_hess, p_fmat.get(), n_bins);\n    // restore ltr properties\n    p_fmat->Info().weights_.HostVector() = w;\n    p_fmat->Info().group_ptr_ = gptr;\n\n    // test with random group weight\n    w = GenerateRandomWeights(n_groups_);\n    p_fmat->Info().weights_.HostVector() = w;\n    cuts_hess =\n        DeviceSketchWithHessian(ctx, p_fmat.get(), n_bins, hessian.ConstDeviceSpan(), n_elements);\n    // make validation easier by converting it into sample weight.\n    p_fmat->Info().weights_.HostVector() = h_hess;\n    p_fmat->Info().group_ptr_.clear();\n    ValidateCuts(cuts_hess, p_fmat.get(), n_bins);\n\n    // merge hessian with sample weight\n    p_fmat->Info().weights_.Resize(n_samples);\n    p_fmat->Info().group_ptr_.clear();\n    for (std::size_t i = 0; i < h_hess.size(); ++i) {\n      auto gidx = dh::SegmentId(Span{gptr.data(), gptr.size()}, i);\n      p_fmat->Info().weights_.HostVector()[i] = w[gidx] * h_hess[i];\n    }\n    auto cuts = DeviceSketch(ctx, p_fmat.get(), n_bins, n_elements);\n    ValidateCuts(cuts, p_fmat.get(), n_bins);\n    ASSERT_EQ(cuts.Values().size(), cuts_hess.Values().size());\n    for (std::size_t i = 0; i < cuts.Values().size(); ++i) {\n      EXPECT_NEAR(cuts.Values()[i], cuts_hess.Values()[i], 1e-4f);\n    }\n  }\n\n  void TestRegression(Context const* ctx, bst_idx_t n_samples, bst_bin_t n_bins,\n                      std::size_t n_elements) const {\n    auto x = GenerateRandom(n_samples, n_features_);\n    auto p_fmat = GetDMatrixFromData(x, n_samples, n_features_);\n    std::vector<float> w = GenerateRandomWeights(n_samples);\n\n    auto hessian = this->GenerateHessian(ctx, n_samples);\n\n    this->CheckReg(ctx, p_fmat, n_bins, hessian, w, n_elements);\n  }\n};\n\nauto MakeParamsForTest() {\n  std::vector<bst_idx_t> sizes = {1, 2, 256, 512, 1000, 1500};\n  std::vector<bst_bin_t> bin_sizes = {2, 16, 256, 512};\n  std::vector<std::tuple<bool, bst_idx_t, bst_bin_t>> configs;\n  for (auto n_samples : sizes) {\n    for (auto n_bins : bin_sizes) {\n      configs.emplace_back(true, n_samples, n_bins);\n      configs.emplace_back(false, n_samples, n_bins);\n    }\n  }\n  return configs;\n}\n}  // namespace\n\nTEST_P(DeviceSketchWithHessianTest, DeviceSketchWithHessian) {\n  auto param = GetParam();\n  auto n_samples = std::get<1>(param);\n  auto n_bins = std::get<2>(param);\n  if (std::get<0>(param)) {\n    this->TestLTR(&ctx_, n_samples, n_bins, 0);\n    this->TestLTR(&ctx_, n_samples, n_bins, 512);\n  } else {\n    this->TestRegression(&ctx_, n_samples, n_bins, 0);\n    this->TestRegression(&ctx_, n_samples, n_bins, 512);\n  }\n}\n\nINSTANTIATE_TEST_SUITE_P(\n    HistUtil, DeviceSketchWithHessianTest, ::testing::ValuesIn(MakeParamsForTest()),\n    [](::testing::TestParamInfo<DeviceSketchWithHessianTest::ParamType> const& info) {\n      auto task = std::get<0>(info.param) ? \"ltr\" : \"reg\";\n      auto n_samples = std::to_string(std::get<1>(info.param));\n      auto n_bins = std::to_string(std::get<2>(info.param));\n      return std::string(task) + \"_\" + n_samples + \"_\" + n_bins;\n    });\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/common/test_hist_util.h",
    "content": "/**\n * Copyright 2019-2025, XGBoost Contributors\n */\n#pragma once\n#include <gtest/gtest.h>\n\n#include <cmath>\n#include <memory>  // for shared_ptr\n#include <random>\n#include <string>\n#include <vector>\n\n#include \"../../../src/common/hist_util.h\"\n#include \"../../../src/data/adapter.h\"\n#include \"../../../src/data/simple_dmatrix.h\"\n#include \"../helpers.h\"\n\n#ifdef __CUDACC__\n#include <xgboost/json.h>\n\n#include \"../../../src/data/device_adapter.cuh\"\n#endif  // __CUDACC__\n\n// Some helper functions used to test both GPU and CPU algorithms\n//\nnamespace xgboost::common {\n// Generate columns with different ranges\ninline std::vector<float> GenerateRandom(int num_rows, int num_columns) {\n  std::vector<float> x(num_rows * num_columns);\n  std::mt19937 rng(0);\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n  std::generate(x.begin(), x.end(), [&]() { return dist(rng); });\n  for (auto i = 0; i < num_columns; i++) {\n    for (auto j = 0; j < num_rows; j++) {\n      x[j * num_columns + i] += i;\n    }\n  }\n  return x;\n}\n\ninline std::vector<float> GenerateRandomWeights(int num_rows) {\n  std::vector<float> w(num_rows);\n  std::mt19937 rng(1);\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n  std::generate(w.begin(), w.end(), [&]() { return dist(rng); });\n  return w;\n}\n\n#ifdef __CUDACC__\ninline data::CupyAdapter AdapterFromData(const thrust::device_vector<float>& x, int num_rows,\n                                         int num_columns) {\n  Json array_interface{Object()};\n  std::vector<Json> shape = {Json(static_cast<Integer::Int>(num_rows)),\n                             Json(static_cast<Integer::Int>(num_columns))};\n  array_interface[\"shape\"] = Array(shape);\n  std::vector<Json> j_data{Json(Integer(reinterpret_cast<Integer::Int>(x.data().get()))),\n                           Json(Boolean(false))};\n  array_interface[\"data\"] = j_data;\n  array_interface[\"version\"] = 3;\n  array_interface[\"typestr\"] = String(\"<f4\");\n  std::string str;\n  Json::Dump(array_interface, &str);\n  return data::CupyAdapter(str);\n}\n#endif\n\ninline std::shared_ptr<data::SimpleDMatrix> GetDMatrixFromData(const std::vector<float>& x,\n                                                               int num_rows, int num_columns) {\n  data::DenseAdapter adapter(x.data(), num_rows, num_columns);\n  return std::shared_ptr<data::SimpleDMatrix>(\n      new data::SimpleDMatrix(&adapter, std::numeric_limits<float>::quiet_NaN(), 1));\n}\n\n// Test that elements are approximately equally distributed among bins\ninline void TestBinDistribution(const HistogramCuts& cuts, int column_idx,\n                                const std::vector<float>& sorted_column,\n                                const std::vector<float>& sorted_weights) {\n  std::map<int, int> bin_weights;\n  for (auto i = 0ull; i < sorted_column.size(); i++) {\n    auto bin_idx = cuts.SearchBin(sorted_column[i], column_idx);\n    if (bin_weights.find(bin_idx) == bin_weights.cend()) {\n      bin_weights[bin_idx] = 0;\n    }\n    bin_weights.at(bin_idx) += sorted_weights[i];\n  }\n  int local_num_bins = cuts.Ptrs()[column_idx + 1] - cuts.Ptrs()[column_idx];\n  auto total_weight = std::accumulate(sorted_weights.begin(), sorted_weights.end(), 0);\n  int expected_bin_weight = total_weight / local_num_bins;\n  // Allow up to 30% deviation. This test is not very strict, it only ensures\n  // roughly equal distribution\n  int allowable_error = std::max(2, static_cast<int>(expected_bin_weight * 0.3));\n\n  // First and last bin can have smaller\n  for (auto& kv : bin_weights) {\n    ASSERT_LE(std::abs(bin_weights[kv.first] - expected_bin_weight), allowable_error);\n  }\n}\n\n// Test sketch quantiles against the real quantiles Not a very strict\n// test\ninline void TestRank(const std::vector<float>& column_cuts, const std::vector<float>& sorted_x,\n                     const std::vector<float>& sorted_weights) {\n  double eps = 0.05;\n  auto total_weight = std::accumulate(sorted_weights.begin(), sorted_weights.end(), 0.0);\n  // Ignore the last cut, its special\n  double sum_weight = 0.0;\n  size_t j = 0;\n  for (size_t i = 0; i < column_cuts.size() - 1; i++) {\n    while (column_cuts[i] > sorted_x[j]) {\n      sum_weight += sorted_weights[j];\n      j++;\n    }\n    double expected_rank = ((i + 1) * total_weight) / column_cuts.size();\n    double acceptable_error = std::max(2.9, total_weight * eps);\n    EXPECT_LE(std::abs(expected_rank - sum_weight), acceptable_error);\n  }\n}\n\ninline void ValidateColumn(const HistogramCuts& cuts, int column_idx,\n                           const std::vector<float>& sorted_column,\n                           const std::vector<float>& sorted_weights, size_t num_bins) {\n  // Check the endpoints are correct\n  CHECK_GT(sorted_column.size(), 0);\n  auto first_bin = common::HistogramCuts::NumericBinLowerBound(\n      cuts.Ptrs(), cuts.Values(), column_idx, cuts.Ptrs().at(column_idx));\n  EXPECT_TRUE(std::isinf(first_bin));\n  EXPECT_LT(first_bin, 0.0f);\n  EXPECT_GT(cuts.Values()[cuts.Ptrs()[column_idx]], sorted_column.front());\n  EXPECT_GE(cuts.Values()[cuts.Ptrs()[column_idx + 1] - 1], sorted_column.back());\n\n  // Check the cuts are sorted\n  auto cuts_begin = cuts.Values().begin() + cuts.Ptrs()[column_idx];\n  auto cuts_end = cuts.Values().begin() + cuts.Ptrs()[column_idx + 1];\n  EXPECT_TRUE(std::is_sorted(cuts_begin, cuts_end));\n\n  // Check all cut points are unique\n  EXPECT_EQ(std::set<float>(cuts_begin, cuts_end).size(),\n            static_cast<size_t>(cuts_end - cuts_begin));\n\n  auto unique = std::set<float>(sorted_column.begin(), sorted_column.end());\n  if (unique.size() <= num_bins) {\n    // Less unique values than number of bins\n    // Each value should get its own bin\n    int i = 0;\n    for (auto v : unique) {\n      ASSERT_EQ(cuts.SearchBin(v, column_idx), cuts.Ptrs()[column_idx] + i);\n      i++;\n    }\n  } else {\n    int num_cuts_column = cuts.Ptrs()[column_idx + 1] - cuts.Ptrs()[column_idx];\n    std::vector<float> column_cuts(num_cuts_column);\n    std::copy(cuts.Values().begin() + cuts.Ptrs()[column_idx],\n              cuts.Values().begin() + cuts.Ptrs()[column_idx + 1], column_cuts.begin());\n    TestBinDistribution(cuts, column_idx, sorted_column, sorted_weights);\n    TestRank(column_cuts, sorted_column, sorted_weights);\n  }\n}\n\ninline void ValidateCuts(const HistogramCuts& cuts, DMatrix* dmat, int num_bins) {\n  // Collect data into columns\n  std::vector<std::vector<float>> columns(dmat->Info().num_col_);\n  for (auto& batch : dmat->GetBatches<SparsePage>()) {\n    auto page = batch.GetView();\n    ASSERT_GT(batch.Size(), 0ul);\n    for (auto i = 0ull; i < batch.Size(); i++) {\n      for (auto e : page[i]) {\n        columns[e.index].push_back(e.fvalue);\n      }\n    }\n  }\n\n  // construct weights.\n  std::vector<float> w = dmat->Info().group_ptr_.empty() ? dmat->Info().weights_.HostVector()\n                                                         : detail::UnrollGroupWeights(dmat->Info());\n\n  // Sort\n  for (auto i = 0ull; i < columns.size(); i++) {\n    auto& col = columns.at(i);\n    std::vector<size_t> index(col.size());\n    std::iota(index.begin(), index.end(), 0);\n    std::sort(index.begin(), index.end(), [=](size_t a, size_t b) { return col[a] < col[b]; });\n\n    std::vector<float> sorted_column(col.size());\n    std::vector<float> sorted_weights(col.size(), 1.0);\n\n    for (auto j = 0ull; j < col.size(); j++) {\n      sorted_column[j] = col[index[j]];\n      if (w.size() == col.size()) {\n        sorted_weights[j] = w[index[j]];\n      }\n    }\n\n    ValidateColumn(cuts, i, sorted_column, sorted_weights, num_bins);\n  }\n}\n\n/**\n * \\brief Test for sketching on categorical data.\n *\n * \\param sketch Sketch function, can be on device or on host.\n */\ntemplate <typename Fn>\nvoid TestCategoricalSketch(size_t n, size_t num_categories, int32_t num_bins, bool weighted,\n                           Fn sketch) {\n  auto x = GenerateRandomCategoricalSingleColumn(n, num_categories);\n  auto dmat = GetDMatrixFromData(x, n, 1);\n  dmat->Info().feature_types.HostVector().push_back(FeatureType::kCategorical);\n\n  if (weighted) {\n    std::vector<float> weights(n, 0);\n    SimpleLCG lcg;\n    SimpleRealUniformDistribution<float> dist(0, 1);\n    for (auto& v : weights) {\n      v = dist(&lcg);\n    }\n    dmat->Info().weights_.HostVector() = weights;\n  }\n\n  ASSERT_EQ(dmat->Info().feature_types.Size(), 1);\n  auto cuts = sketch(dmat.get(), num_bins);\n  ASSERT_EQ(cuts.MaxCategory(), num_categories - 1);\n  std::sort(x.begin(), x.end());\n  auto n_uniques = std::unique(x.begin(), x.end()) - x.begin();\n  ASSERT_NE(n_uniques, x.size());\n  ASSERT_EQ(cuts.TotalBins(), n_uniques);\n  ASSERT_EQ(n_uniques, num_categories);\n\n  auto& values = cuts.cut_values_.HostVector();\n  ASSERT_TRUE(std::is_sorted(values.cbegin(), values.cend()));\n  auto is_unique = (std::unique(values.begin(), values.end()) - values.begin()) == n_uniques;\n  ASSERT_TRUE(is_unique);\n\n  x.resize(n_uniques);\n  for (decltype(n_uniques) i = 0; i < n_uniques; ++i) {\n    ASSERT_EQ(x[i], values[i]);\n  }\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/common/test_host_device_vector.cu",
    "content": "/**\n * Copyright 2018-2024, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <thrust/equal.h>\n#include <thrust/iterator/counting_iterator.h>\n#include <xgboost/host_device_vector.h>\n\n#include \"../../../src/common/cuda_rt_utils.h\"  // for SetDevice\n#include \"../../../src/common/device_helpers.cuh\"\n\nnamespace xgboost::common {\nnamespace {\nvoid SetDeviceForTest(DeviceOrd device) {\n  int n_devices;\n  dh::safe_cuda(cudaGetDeviceCount(&n_devices));\n  device.ordinal %= n_devices;\n  dh::safe_cuda(cudaSetDevice(device.ordinal));\n}\n}  // namespace\n\nstruct HostDeviceVectorSetDeviceHandler {\n  template <typename Functor>\n  explicit HostDeviceVectorSetDeviceHandler(Functor f) {\n    SetCudaSetDeviceHandler(f);\n  }\n\n  ~HostDeviceVectorSetDeviceHandler() {\n    SetCudaSetDeviceHandler(nullptr);\n  }\n};\n\nvoid InitHostDeviceVector(size_t n, DeviceOrd device, HostDeviceVector<int> *v) {\n  // create the vector\n  v->SetDevice(device);\n  v->Resize(n);\n\n  ASSERT_EQ(v->Size(), n);\n  ASSERT_EQ(v->Device(), device);\n  // ensure that the device have read-write access\n  ASSERT_TRUE(v->DeviceCanRead());\n  ASSERT_TRUE(v->DeviceCanWrite());\n  // ensure that the host has no access\n  ASSERT_FALSE(v->HostCanRead());\n  ASSERT_FALSE(v->HostCanWrite());\n\n  // fill in the data on the host\n  std::vector<int>& data_h = v->HostVector();\n  // ensure that the host has full access, while the device have none\n  ASSERT_TRUE(v->HostCanRead());\n  ASSERT_TRUE(v->HostCanWrite());\n  ASSERT_FALSE(v->DeviceCanRead());\n  ASSERT_FALSE(v->DeviceCanWrite());\n  ASSERT_EQ(data_h.size(), n);\n  std::copy_n(thrust::make_counting_iterator(0), n, data_h.begin());\n}\n\nvoid PlusOne(HostDeviceVector<int> *v) {\n  auto device = v->Device();\n  SetDeviceForTest(device);\n  thrust::transform(dh::tcbegin(*v), dh::tcend(*v), dh::tbegin(*v),\n                    [=]__device__(unsigned int a){ return a + 1; });\n  ASSERT_TRUE(v->DeviceCanWrite());\n}\n\nvoid CheckDevice(HostDeviceVector<int>* v,\n                 size_t size,\n                 unsigned int first,\n                 GPUAccess access) {\n  ASSERT_EQ(v->Size(), size);\n  SetDeviceForTest(v->Device());\n\n  ASSERT_TRUE(thrust::equal(dh::tcbegin(*v), dh::tcend(*v),\n                            thrust::make_counting_iterator(first)));\n  ASSERT_TRUE(v->DeviceCanRead());\n  // ensure that the device has at most the access specified by access\n  ASSERT_EQ(v->DeviceCanWrite(), access == GPUAccess::kWrite);\n  ASSERT_EQ(v->HostCanRead(), access == GPUAccess::kRead);\n  ASSERT_FALSE(v->HostCanWrite());\n\n  ASSERT_TRUE(thrust::equal(dh::tbegin(*v), dh::tend(*v),\n                            thrust::make_counting_iterator(first)));\n  ASSERT_TRUE(v->DeviceCanRead());\n  ASSERT_TRUE(v->DeviceCanWrite());\n  ASSERT_FALSE(v->HostCanRead());\n  ASSERT_FALSE(v->HostCanWrite());\n}\n\nvoid CheckHost(HostDeviceVector<int> *v, GPUAccess access) {\n  const std::vector<int>& data_h = access == GPUAccess::kNone ?\n    v->HostVector() : v->ConstHostVector();\n  for (size_t i = 0; i < v->Size(); ++i) {\n    ASSERT_EQ(data_h.at(i), i + 1);\n  }\n  ASSERT_TRUE(v->HostCanRead());\n  ASSERT_EQ(v->HostCanWrite(), access == GPUAccess::kNone);\n  ASSERT_EQ(v->DeviceCanRead(), access == GPUAccess::kRead);\n  // the devices should have no write access\n  ASSERT_FALSE(v->DeviceCanWrite());\n}\n\nvoid TestHostDeviceVector(size_t n, DeviceOrd device) {\n  HostDeviceVectorSetDeviceHandler hdvec_dev_hndlr(curt::SetDevice);\n  HostDeviceVector<int> v;\n  InitHostDeviceVector(n, device, &v);\n  CheckDevice(&v, n, 0, GPUAccess::kRead);\n  PlusOne(&v);\n  CheckDevice(&v, n, 1, GPUAccess::kWrite);\n  CheckHost(&v, GPUAccess::kRead);\n  CheckHost(&v, GPUAccess::kNone);\n}\n\nTEST(HostDeviceVector, Basic) {\n  size_t n = 1001;\n  DeviceOrd device = DeviceOrd::CUDA(0);\n  TestHostDeviceVector(n, device);\n}\n\nTEST(HostDeviceVector, Copy) {\n  size_t n = 1001;\n  auto device = DeviceOrd::CUDA(0);\n  HostDeviceVectorSetDeviceHandler hdvec_dev_hndlr(curt::SetDevice);\n\n  HostDeviceVector<int> v;\n  {\n    // a separate scope to ensure that v1 is gone before further checks\n    HostDeviceVector<int> v1;\n    InitHostDeviceVector(n, device, &v1);\n    v.Resize(v1.Size());\n    v.Copy(v1);\n  }\n  CheckDevice(&v, n, 0, GPUAccess::kRead);\n  PlusOne(&v);\n  CheckDevice(&v, n, 1, GPUAccess::kWrite);\n  CheckHost(&v, GPUAccess::kRead);\n  CheckHost(&v, GPUAccess::kNone);\n}\n\nTEST(HostDeviceVector, SetDevice) {\n  std::vector<int> h_vec (2345);\n  for (size_t i = 0; i < h_vec.size(); ++i) {\n    h_vec[i] = i;\n  }\n  HostDeviceVector<int> vec (h_vec);\n  auto device = DeviceOrd::CUDA(0);\n\n  vec.SetDevice(device);\n  ASSERT_EQ(vec.Size(), h_vec.size());\n  vec.DeviceSpan();  // sync to device\n\n  vec.SetDevice(DeviceOrd::CPU());  // pull back to cpu.\n  ASSERT_EQ(vec.Size(), h_vec.size());\n  ASSERT_EQ(vec.Device(), DeviceOrd::CPU());\n\n  auto h_vec_1 = vec.HostVector();\n  ASSERT_TRUE(std::equal(h_vec_1.cbegin(), h_vec_1.cend(), h_vec.cbegin()));\n}\n\nTEST(HostDeviceVector, Span) {\n  HostDeviceVector<float> vec {1.0f, 2.0f, 3.0f, 4.0f};\n  vec.SetDevice(DeviceOrd::CUDA(0));\n  auto span = vec.DeviceSpan();\n  ASSERT_EQ(vec.Size(), span.size());\n  ASSERT_EQ(vec.DevicePointer(), span.data());\n  auto const_span = vec.ConstDeviceSpan();\n  ASSERT_EQ(vec.Size(), const_span.size());\n  ASSERT_EQ(vec.ConstDevicePointer(), const_span.data());\n\n  auto h_span = vec.ConstHostSpan();\n  ASSERT_TRUE(vec.HostCanRead());\n  ASSERT_FALSE(vec.HostCanWrite());\n  ASSERT_EQ(h_span.size(), vec.Size());\n  ASSERT_EQ(h_span.data(), vec.ConstHostPointer());\n\n  h_span = vec.HostSpan();\n  ASSERT_TRUE(vec.HostCanWrite());\n}\n\nTEST(HostDeviceVector, Empty) {\n  HostDeviceVector<float> vec {1.0f, 2.0f, 3.0f, 4.0f};\n  HostDeviceVector<float> another { std::move(vec) };\n  ASSERT_FALSE(another.Empty());\n  ASSERT_TRUE(vec.Empty());\n}\n\nTEST(HostDeviceVector, Resize) {\n  auto check = [&](HostDeviceVector<float> const& vec) {\n    auto const& h_vec = vec.ConstHostSpan();\n    for (std::size_t i = 0; i < 4; ++i) {\n      ASSERT_EQ(h_vec[i], i + 1);\n    }\n    for (std::size_t i = 4; i < vec.Size(); ++i) {\n      ASSERT_EQ(h_vec[i], 3.0);\n    }\n  };\n  {\n    HostDeviceVector<float> vec{1.0f, 2.0f, 3.0f, 4.0f};\n    vec.SetDevice(DeviceOrd::CUDA(0));\n    vec.ConstDeviceSpan();\n    ASSERT_TRUE(vec.DeviceCanRead());\n    ASSERT_FALSE(vec.DeviceCanWrite());\n    vec.DeviceSpan();\n    vec.Resize(7, 3.0f);\n    ASSERT_TRUE(vec.DeviceCanWrite());\n    check(vec);\n  }\n  {\n    HostDeviceVector<float> vec{{1.0f, 2.0f, 3.0f, 4.0f}, DeviceOrd::CUDA(0)};\n    ASSERT_TRUE(vec.DeviceCanWrite());\n    vec.Resize(7, 3.0f);\n    ASSERT_TRUE(vec.DeviceCanWrite());\n    check(vec);\n  }\n  {\n    HostDeviceVector<float> vec{1.0f, 2.0f, 3.0f, 4.0f};\n    ASSERT_TRUE(vec.HostCanWrite());\n    vec.Resize(7, 3.0f);\n    ASSERT_TRUE(vec.HostCanWrite());\n    check(vec);\n  }\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/common/test_intrusive_ptr.cc",
    "content": "/**\n * Copyright 2020-2024, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/intrusive_ptr.h>\n\nnamespace xgboost {\nnamespace {\nclass NotCopyConstructible {\n public:\n  float data;\n\n  explicit NotCopyConstructible(float d) : data{d} {}\n  NotCopyConstructible(NotCopyConstructible const &that) = delete;\n  NotCopyConstructible &operator=(NotCopyConstructible const &that) = delete;\n  NotCopyConstructible(NotCopyConstructible&& that) = default;\n};\nstatic_assert(!std::is_trivially_copy_constructible_v<NotCopyConstructible>);\nstatic_assert(!std::is_trivially_copy_assignable_v<NotCopyConstructible>);\n\nclass ForIntrusivePtrTest {\n public:\n  mutable class IntrusivePtrCell ref;\n  float data { 0 };\n\n  friend IntrusivePtrCell &\n  IntrusivePtrRefCount(ForIntrusivePtrTest const *t) noexcept {  // NOLINT\n    return t->ref;\n  }\n\n  ForIntrusivePtrTest() = default;\n  ForIntrusivePtrTest(float a, int32_t b) : data{a + static_cast<float>(b)} {}\n\n  explicit ForIntrusivePtrTest(NotCopyConstructible a) : data{a.data} {}\n};\n}  // anonymous namespace\n\nTEST(IntrusivePtr, Basic) {\n  IntrusivePtr<ForIntrusivePtrTest> ptr {new ForIntrusivePtrTest};\n  auto p = ptr.get();\n\n  // Copy ctor\n  IntrusivePtr<ForIntrusivePtrTest> ptr_1 { ptr };\n  ASSERT_EQ(ptr_1.get(), p);\n\n  ASSERT_EQ((*ptr_1).data, ptr_1->data);\n  ASSERT_EQ(ptr.use_count(), 2);\n\n  // hash\n  ASSERT_EQ(std::hash<IntrusivePtr<ForIntrusivePtrTest>>{}(ptr_1),\n            std::hash<ForIntrusivePtrTest*>{}(ptr_1.get()));\n\n  // Raw ptr comparison\n  ASSERT_EQ(ptr, p);\n  ASSERT_EQ(ptr_1, ptr);\n\n  ForIntrusivePtrTest* raw_ptr {nullptr};\n  ASSERT_NE(ptr_1, raw_ptr);\n  ASSERT_NE(raw_ptr, ptr_1);\n\n  // Reset with raw ptr.\n  auto p_1 = new ForIntrusivePtrTest;\n  ptr.reset(p_1);\n\n  ASSERT_EQ(ptr_1.use_count(), 1);\n  ASSERT_EQ(ptr.use_count(), 1);\n\n  ASSERT_TRUE(ptr);\n  ASSERT_TRUE(ptr_1);\n\n  // Swap\n  std::swap(ptr, ptr_1);\n  ASSERT_NE(ptr, p_1);\n  ASSERT_EQ(ptr_1, p_1);\n\n  // Reset\n  ptr.reset();\n  ASSERT_FALSE(ptr);\n  ASSERT_EQ(ptr.use_count(), 0);\n\n  // Comparison operators\n  ASSERT_EQ(ptr < ptr_1, ptr.get() < ptr_1.get());\n  ASSERT_EQ(ptr > ptr_1, ptr.get() > ptr_1.get());\n\n  ASSERT_LE(ptr, ptr);\n  ASSERT_GE(ptr, ptr);\n\n  // Copy assign\n  IntrusivePtr<ForIntrusivePtrTest> ptr_2;\n  ptr_2 = ptr_1;\n  ASSERT_EQ(ptr_2, ptr_1);\n  ASSERT_EQ(ptr_2.use_count(), 2);\n\n  // Move assign\n  IntrusivePtr<ForIntrusivePtrTest> ptr_3;\n  ptr_3 = std::move(ptr_2);\n  ASSERT_EQ(ptr_2.use_count(), 0);  // NOLINT\n  ASSERT_EQ(ptr_3.use_count(), 2);\n\n  // Move ctor\n  IntrusivePtr<ForIntrusivePtrTest> ptr_4 { std::move(ptr_3) };\n  ASSERT_EQ(ptr_3.use_count(), 0);  // NOLINT\n  ASSERT_EQ(ptr_4.use_count(), 2);\n\n  // Comparison\n  ASSERT_EQ(ptr_1 > ptr_2, ptr_1.get() > ptr_2.get());\n  ASSERT_EQ(ptr_1, ptr_1);\n  ASSERT_EQ(ptr_1 < ptr_2, ptr_1.get() < ptr_2.get());\n}\n} // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/common/test_io.cc",
    "content": "/**\n * Copyright 2019-2025, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n\n#include <cstddef>  // for size_t\n#include <fstream>  // for ofstream\n#include <numeric>  // for iota\n\n#include \"../../../src/common/io.h\"\n#include \"../filesystem.h\"  // TemporaryDirectory\n#include \"../helpers.h\"\n\nnamespace xgboost::common {\nTEST(MemoryFixSizeBuffer, Seek) {\n  size_t constexpr kSize{64};\n  std::vector<int32_t> memory(kSize);\n  MemoryFixSizeBuffer buf(memory.data(), memory.size());\n  buf.Seek(MemoryFixSizeBuffer::kSeekEnd);\n  size_t end = buf.Tell();\n  ASSERT_EQ(end, kSize);\n}\n\nTEST(IO, FileExtension) {\n  std::string filename{u8\"model.json\"};\n  auto ext = FileExtension(filename);\n  ASSERT_EQ(ext, u8\"json\");\n}\n\nTEST(IO, FixedSizeStream) {\n  std::string buffer{\"This is the content of stream\"};\n  {\n    MemoryFixSizeBuffer stream(static_cast<void *>(&buffer[0]), buffer.size());\n    PeekableInStream peekable(&stream);\n    FixedSizeStream fixed(&peekable);\n\n    std::string out_buffer;\n    fixed.Take(&out_buffer);\n    ASSERT_EQ(buffer, out_buffer);\n  }\n\n  {\n    std::string huge_buffer;\n    for (size_t i = 0; i < 512; i++) {\n      huge_buffer += buffer;\n    }\n\n    MemoryFixSizeBuffer stream(static_cast<void *>(&huge_buffer[0]), huge_buffer.size());\n    PeekableInStream peekable(&stream);\n    FixedSizeStream fixed(&peekable);\n\n    std::string out_buffer;\n    fixed.Take(&out_buffer);\n    ASSERT_EQ(huge_buffer, out_buffer);\n  }\n}\n\nTEST(IO, LoadSequentialFile) {\n  EXPECT_THROW(LoadSequentialFile(\"non-exist\"), dmlc::Error);\n\n  common::TemporaryDirectory tempdir;\n  std::ofstream fout(tempdir.Path() / \"test_file\");\n  std::string content;\n\n  // Generate a JSON file.\n  size_t constexpr kRows = 1000, kCols = 100;\n  std::shared_ptr<DMatrix> p_dmat{RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true)};\n  std::unique_ptr<Learner> learner{Learner::Create({p_dmat})};\n  learner->SetParam(\"tree_method\", \"hist\");\n  learner->Configure();\n\n  for (int32_t iter = 0; iter < 10; ++iter) {\n    learner->UpdateOneIter(iter, p_dmat);\n  }\n  Json out{Object()};\n  learner->SaveModel(&out);\n  std::vector<char> str;\n  Json::Dump(out, &str);\n\n  std::string tmpfile = tempdir.Str() + \"/model.json\";\n  {\n    std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(tmpfile.c_str(), \"w\"));\n    fo->Write(str.data(), str.size());\n  }\n\n  auto loaded = LoadSequentialFile(tmpfile);\n  ASSERT_EQ(loaded, str);\n}\n\nTEST(IO, Resource) {\n  {\n    // test malloc basic\n    std::size_t n = 128;\n    std::shared_ptr<ResourceHandler> resource = std::make_shared<MallocResource>(n);\n    ASSERT_EQ(resource->Size(), n);\n    ASSERT_EQ(resource->Type(), ResourceHandler::kMalloc);\n  }\n\n  // test malloc resize\n  auto test_malloc_resize = [](bool force_malloc) {\n    std::size_t n = 64;\n    std::shared_ptr<ResourceHandler> resource = std::make_shared<MallocResource>(n);\n    auto ptr = reinterpret_cast<std::uint8_t *>(resource->Data());\n    std::iota(ptr, ptr + n, 0);\n\n    auto malloc_resource = std::dynamic_pointer_cast<MallocResource>(resource);\n    ASSERT_TRUE(malloc_resource);\n    if (force_malloc) {\n      malloc_resource->Resize<true>(n * 2);\n    } else {\n      malloc_resource->Resize<false>(n * 2);\n    }\n    for (std::size_t i = 0; i < n; ++i) {\n      ASSERT_EQ(malloc_resource->DataAs<std::uint8_t>()[i], i) << force_malloc;\n    }\n    for (std::size_t i = n; i < 2 * n; ++i) {\n      ASSERT_EQ(malloc_resource->DataAs<std::uint8_t>()[i], 0);\n    }\n\n    ptr = malloc_resource->DataAs<std::uint8_t>();\n    std::fill_n(ptr, malloc_resource->Size(), 7);\n    if (force_malloc) {\n      malloc_resource->Resize<true>(n * 3, std::byte{3});\n    } else {\n      malloc_resource->Resize<false>(n * 3, std::byte{3});\n    }\n    for (std::size_t i = 0; i < n * 2; ++i) {\n      ASSERT_EQ(malloc_resource->DataAs<std::uint8_t>()[i], 7);\n    }\n    for (std::size_t i = n * 2; i < n * 3; ++i) {\n      ASSERT_EQ(malloc_resource->DataAs<std::uint8_t>()[i], 3);\n    }\n  };\n  test_malloc_resize(true);\n  test_malloc_resize(false);\n\n  {\n    // test mmap\n    common::TemporaryDirectory tmpdir;\n    auto path = tmpdir.Str() + \"/testfile\";\n\n    std::ofstream fout(path, std::ios::binary);\n    double val{1.0};\n    fout.write(reinterpret_cast<char const *>(&val), sizeof(val));\n    fout << 1.0 << std::endl;\n    fout.close();\n\n    auto resource = std::shared_ptr<MmapResource>{new MmapResource{path, 0, sizeof(double)}};\n    ASSERT_EQ(resource->Size(), sizeof(double));\n    ASSERT_EQ(resource->Type(), ResourceHandler::kMmap);\n    ASSERT_EQ(resource->DataAs<double>()[0], val);\n  }\n}\n\nclass TestFileStream : public ::testing::Test {\n public:\n  template <typename TestStreamT>\n  void Run() {\n    common::TemporaryDirectory tempdir;\n    auto path = tempdir.Str() + \"/testfile\";\n\n    // The page size on Linux is usually set to 4096, while the allocation granularity on\n    // the Windows machine where this test is writted is 65536. We span the test to cover\n    // all of them.\n    std::size_t n_batches{64};\n    std::size_t multiplier{2048};\n\n    std::vector<std::vector<std::int32_t>> batches;\n    std::vector<std::size_t> offset{0ul};\n\n    using T = std::int32_t;\n\n    {\n      std::unique_ptr<dmlc::Stream> fo{dmlc::Stream::Create(path.c_str(), \"w\")};\n      for (std::size_t i = 0; i < n_batches; ++i) {\n        std::size_t size = (i + 1) * multiplier;\n        std::vector<T> data(size, 0);\n        std::iota(data.begin(), data.end(), i * i);\n\n        fo->Write(static_cast<std::uint64_t>(data.size()));\n        fo->Write(data.data(), data.size() * sizeof(T));\n\n        std::size_t bytes = sizeof(std::uint64_t) + data.size() * sizeof(T);\n        offset.push_back(bytes);\n\n        batches.emplace_back(std::move(data));\n      }\n    }\n\n    // Turn size info offset\n    std::partial_sum(offset.begin(), offset.end(), offset.begin());\n\n    // Test read\n    for (std::size_t i = 0; i < n_batches; ++i) {\n      std::size_t off = offset[i];\n      std::size_t n = offset.at(i + 1) - offset[i];\n      auto fi{std::make_unique<TestStreamT>(path, off, n)};\n      std::vector<T> data;\n\n      std::uint64_t size{0};\n      ASSERT_TRUE(fi->Read(&size));\n      ASSERT_EQ(fi->Tell(), sizeof(size));\n      data.resize(size);\n\n      ASSERT_EQ(fi->Read(data.data(), size * sizeof(T)), size * sizeof(T));\n      ASSERT_EQ(data, batches[i]);\n    }\n\n    // Test consume\n    for (std::size_t i = 0; i < n_batches; ++i) {\n      std::size_t off = offset[i];\n      std::size_t n = offset.at(i + 1) - offset[i];\n      std::unique_ptr<AlignedResourceReadStream> fi{std::make_unique<TestStreamT>(path, off, n)};\n      std::vector<T> data;\n\n      std::uint64_t size{0};\n      ASSERT_TRUE(fi->Consume(&size));\n      ASSERT_EQ(fi->Tell(), sizeof(size));\n      data.resize(size);\n\n      ASSERT_EQ(fi->Read(data.data(), size * sizeof(T)), sizeof(T) * size);\n      ASSERT_EQ(data, batches[i]);\n    }\n  }\n};\n\nTEST_F(TestFileStream, PrivateMmapStream) { this->Run<PrivateMmapConstStream>(); }\n\nTEST_F(TestFileStream, MemBufFileReadStream) { this->Run<MemBufFileReadStream>(); }\n\nTEST(IO, CmdOutput) {\n  // Use a simple command that works in cmd.exe\n  std::string output = CmdOutput(\"echo HelloWorld\");\n  ASSERT_EQ(output, R\"(HelloWorld\n)\");\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/common/test_json.cc",
    "content": "/**\n * Copyright 2019-2025, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n\n#include <fstream>\n#include <limits>  // for numeric_limits\n#include <map>\n#include <numeric>  // for iota\n\n#include \"../../../src/common/io.h\"\n#include \"../../../src/common/json_utils.h\"\n#include \"../../../src/common/threading_utils.h\"  // for ParallelFor\n#include \"../filesystem.h\"                        // for TemporaryDirectory\n#include \"../helpers.h\"\n#include \"dmlc/logging.h\"\n#include \"xgboost/json.h\"\n#include \"xgboost/json_io.h\"\n#include \"xgboost/logging.h\"\n\nnamespace xgboost {\n\nstd::string GetModelStr() {\n  std::string model_json = R\"json(\n{\n  \"model_parameter\": {\n    \"base_score\": \"0.5\",\n    \"num_class\": \"0\",\n    \"num_feature\": \"10\"\n  },\n  \"train_parameter\": {\n    \"debug_verbose\": \"0\",\n    \"disable_default_eval_metric\": \"0\",\n    \"nthread\": \"0\",\n    \"seed\": \"0\",\n    \"seed_per_iteration\": \"0\",\n    \"test_flag\": \"\",\n    \"tree_method\": \"gpu_hist\"\n  },\n  \"configuration\": {\n    \"booster\": \"gbtree\",\n    \"gpu_id\": \"0\",\n    \"num_class\": \"0\",\n    \"num_feature\": \"10\",\n    \"objective\": \"reg:linear\",\n    \"tree_method\": \"gpu_hist\",\n    \"updater\": \"grow_gpu_hist\"\n  },\n  \"objective\": \"reg:linear\",\n  \"booster\": \"gbtree\",\n  \"gbm\": {\n    \"GBTreeModelParam\": {\n      \"num_feature\": \"10\",\n      \"num_output_group\": \"1\",\n      \"num_roots\": \"1\",\n      \"size_leaf_vector\": \"0\"\n    },\n    \"trees\": [{\n        \"TreeParam\": {\n          \"num_feature\": \"10\",\n          \"num_roots\": \"1\",\n          \"size_leaf_vector\": \"0\"\n        },\n        \"num_nodes\": \"9\",\n        \"nodes\": [\n          {\n            \"depth\": 0,\n            \"gain\": 31.8892,\n            \"hess\": 10,\n            \"left\": 1,\n            \"missing\": 1,\n            \"nodeid\": 0,\n            \"right\": 2,\n            \"split_condition\": 0.580717,\n            \"split_index\": 2\n          },\n          {\n            \"depth\": 1,\n            \"gain\": 1.5625,\n            \"hess\": 3,\n            \"left\": 5,\n            \"missing\": 5,\n            \"nodeid\": 2,\n            \"right\": 6,\n            \"split_condition\": 0.160345,\n            \"split_index\": 0\n          },\n          {\n            \"depth\": 2,\n            \"gain\": 0.25,\n            \"hess\": 2,\n            \"left\": 7,\n            \"missing\": 7,\n            \"nodeid\": 6,\n            \"right\": 8,\n            \"split_condition\": 0.62788,\n            \"split_index\": 0\n          },\n          {\n            \"hess\": 1,\n            \"leaf\": 0.375,\n            \"nodeid\": 8\n          },\n          {\n            \"hess\": 1,\n            \"leaf\": 0.075,\n            \"nodeid\": 7\n          },\n          {\n            \"hess\": 1,\n            \"leaf\": -0.075,\n            \"nodeid\": 5\n          },\n          {\n            \"depth\": 3,\n            \"gain\": 10.4866,\n            \"hess\": 7,\n            \"left\": 3,\n            \"missing\": 3,\n            \"nodeid\": 1,\n            \"right\": 4,\n            \"split_condition\": 0.238748,\n            \"split_index\": 1\n          },\n          {\n            \"hess\": 6,\n            \"leaf\": 1.54286,\n            \"nodeid\": 4\n          },\n          {\n            \"hess\": 1,\n            \"leaf\": 0.225,\n            \"nodeid\": 3\n          }\n        ],\n        \"leaf_vector\": []\n      }],\n    \"tree_info\": [0]\n  }\n}\n)json\";\n  return model_json;\n}\n\nTEST(Json, TestParseObject) {\n  std::string str = R\"obj({\"TreeParam\" : {\"num_feature\": \"10\"}})obj\";\n  auto json = Json::Load(StringView{str.c_str(), str.size()});\n}\n\nTEST(Json, ParseNumber) {\n  {\n    std::string str = \"31.8892\";\n    auto json = Json::Load(StringView{str.c_str(), str.size()});\n    ASSERT_EQ(get<JsonNumber>(json), 31.8892f);\n  }\n  {\n    std::string str = \"-31.8892\";\n    auto json = Json::Load(StringView{str.c_str(), str.size()});\n    ASSERT_EQ(get<JsonNumber>(json), -31.8892f);\n  }\n  {\n    std::string str = \"2e4\";\n    auto json = Json::Load(StringView{str.c_str(), str.size()});\n    ASSERT_EQ(get<JsonNumber>(json), 2e4f);\n  }\n  {\n    std::string str = \"2e-4\";\n    auto json = Json::Load(StringView{str.c_str(), str.size()});\n    ASSERT_EQ(get<JsonNumber>(json), 2e-4f);\n  }\n  {\n    std::string str = \"-2e-4\";\n    auto json = Json::Load(StringView{str.c_str(), str.size()});\n    ASSERT_EQ(get<JsonNumber>(json), -2e-4f);\n  }\n  {\n    std::string str = \"-0.0\";\n    auto json = Json::Load(StringView{str.c_str(), str.size()});\n    ASSERT_TRUE(std::signbit(get<JsonNumber>(json)));\n    ASSERT_EQ(get<JsonNumber>(json), -0);\n  }\n  {\n    std::string str = \"-5.37645816802978516e-01\";\n    auto json = Json::Load(StringView{str.c_str(), str.size()});\n    ASSERT_TRUE(std::signbit(get<JsonNumber>(json)));\n    // Larger than fast path limit.\n    ASSERT_EQ(get<JsonNumber>(json), -5.37645816802978516e-01);\n  }\n  {\n    std::string str = \"9.86623668670654297e+00\";\n    auto json = Json::Load(StringView{str.c_str(), str.size()});\n    ASSERT_FALSE(std::signbit(get<JsonNumber>(json)));\n    ASSERT_EQ(get<JsonNumber>(json), 9.86623668670654297e+00);\n  }\n}\n\nTEST(Json, ParseArray) {\n  std::string str = R\"json(\n{\n    \"nodes\": [\n        {\n\t    \"depth\": 3,\n\t    \"gain\": 10.4866,\n\t    \"hess\": 7,\n\t    \"left\": 3,\n\t    \"missing\": 3,\n\t    \"nodeid\": 1,\n\t    \"right\": 4,\n\t    \"split_condition\": 0.238748,\n\t    \"split_index\": 1\n        },\n        {\n\t    \"hess\": 6,\n\t    \"leaf\": 1.54286,\n\t    \"nodeid\": 4\n        },\n        {\n\t    \"hess\": 1,\n\t    \"leaf\": 0.225,\n\t    \"nodeid\": 3\n        }\n    ]\n}\n)json\";\n  auto json = Json::Load(StringView{str.c_str(), str.size()});\n  json = json[\"nodes\"];\n  std::vector<Json> arr = get<JsonArray>(json);\n  ASSERT_EQ(arr.size(), 3ul);\n  Json v0 = arr[0];\n  ASSERT_EQ(get<Integer>(v0[\"depth\"]), 3);\n  ASSERT_NEAR(get<Number>(v0[\"gain\"]), 10.4866, kRtEps);\n\n  {\n    std::string str =\n        \"[5.04713470458984375e+02,9.86623668670654297e+00,4.94847229003906250e+\"\n        \"02,2.13924217224121094e+00,7.72699451446533203e+00,2.\"\n        \"30380615234375000e+02,2.64466613769531250e+02]\";\n    auto json = Json::Load(StringView{str.c_str(), str.size()});\n\n    auto const& vec = get<Array const>(json);\n    ASSERT_EQ(get<Number const>(vec[0]), 5.04713470458984375e+02);\n    ASSERT_EQ(get<Number const>(vec[1]), 9.86623668670654297e+00);\n    ASSERT_EQ(get<Number const>(vec[2]), 4.94847229003906250e+02);\n    ASSERT_EQ(get<Number const>(vec[3]), 2.13924217224121094e+00);\n    ASSERT_EQ(get<Number const>(vec[4]), 7.72699451446533203e+00);\n    ASSERT_EQ(get<Number const>(vec[5]), 2.30380615234375000e+02);\n    ASSERT_EQ(get<Number const>(vec[6]), 2.64466613769531250e+02);\n  }\n}\n\nTEST(Json, Null) {\n  Json json {JsonNull()};\n  std::string ss;\n  Json::Dump(json, &ss);\n  ASSERT_EQ(ss, \"null\");\n\n  std::string null_input {R\"null({\"key\":  null })null\"};\n\n  json = Json::Load({null_input.c_str(), null_input.size()});\n  ASSERT_TRUE(IsA<Null>(json[\"key\"]));\n\n  std::string dumped;\n  Json::Dump(json, &dumped, std::ios::binary);\n  ASSERT_TRUE(IsA<Null>(Json::Load(StringView{dumped}, std::ios::binary)[\"key\"]));\n}\n\nTEST(Json, EmptyObject) {\n  std::string str = R\"json(\n{\n  \"rank\": 1,\n  \"statistic\": {\n\n  }\n}\n)json\";\n  std::stringstream iss(str);\n  auto json = Json::Load(StringView{str.c_str(), str.size()});\n  ASSERT_TRUE(IsA<Object>(json[\"statistic\"]));\n\n  str = R\"json({\"Config\": {},\"Model\": {}})json\"; // NOLINT\n  json = Json::Load(StringView{str.c_str(), str.size()});\n  ASSERT_TRUE(IsA<Object>(json[\"Model\"]));\n}\n\nTEST(Json, EmptyArray) {\n  std::string str = R\"json(\n{\n  \"leaf_vector\": []\n}\n)json\";\n  std::istringstream iss(str);\n  auto json = Json::Load(StringView{str.c_str(), str.size()});\n  auto arr = get<JsonArray>(json[\"leaf_vector\"]);\n  ASSERT_EQ(arr.size(), 0ul);\n}\n\nTEST(Json, Boolean) {\n  std::string str = R\"json(\n{\n  \"left_child\": true,\n  \"right_child\": false\n}\n)json\";\n  Json j {Json::Load(StringView{str.c_str(), str.size()})};\n  ASSERT_EQ(get<JsonBoolean>(j[\"left_child\"]), true);\n  ASSERT_EQ(get<JsonBoolean>(j[\"right_child\"]), false);\n\n  std::string dumped;\n  Json::Dump(j, &dumped, std::ios::binary);\n  ASSERT_TRUE(get<Boolean const>(Json::Load(StringView{dumped}, std::ios::binary)[\"left_child\"]));\n}\n\nTEST(Json, Indexing) {\n  auto str = GetModelStr();\n  JsonReader reader(StringView{str.c_str(), str.size()});\n  Json j {Json::Load(&reader)};\n  auto& value_1 = j[\"model_parameter\"];\n  auto& value = value_1[\"base_score\"];\n  std::string result = Cast<JsonString>(&value.GetValue())->GetString();\n\n  ASSERT_EQ(result, \"0.5\");\n}\n\nTEST(Json, AssigningObjects) {\n  {\n    Json json;\n    json = JsonObject();\n    json[\"Okay\"] = JsonArray();\n    ASSERT_EQ(get<JsonArray>(json[\"Okay\"]).size(), 0ul);\n  }\n\n  {\n    std::map<std::string, Json> objects;\n    Json json_objects { JsonObject() };\n    std::vector<Json> arr_0 (1, Json(3.3f));\n    json_objects[\"tree_parameters\"] = JsonArray(arr_0);\n    std::vector<Json> json_arr = get<JsonArray>(json_objects[\"tree_parameters\"]);\n    ASSERT_NEAR(get<JsonNumber>(json_arr[0]), 3.3f, kRtEps);\n  }\n\n  {\n    Json json_object { JsonObject() };\n    auto str = JsonString(\"1\");\n    auto& k = json_object[\"1\"];\n    k  = std::move(str);\n    ASSERT_TRUE(str.GetString().empty());  // NOLINT\n    auto& m = json_object[\"1\"];\n    std::string value = get<JsonString>(m);\n    ASSERT_EQ(value, \"1\");\n    ASSERT_EQ(get<JsonString>(json_object[\"1\"]), \"1\");\n  }\n}\n\nTEST(Json, AssigningArray) {\n  Json json;\n  json = JsonArray();\n  std::vector<Json> tmp_0 {Json(Number(1.0f)), Json(Number(2.0f))};\n  json = tmp_0;\n  std::vector<Json> tmp_1 {Json(Number(3.0f))};\n  get<Array>(json) = tmp_1;\n  std::vector<Json> res = get<Array>(json);\n  ASSERT_EQ(get<Number>(res[0]), 3);\n}\n\nTEST(Json, AssigningNumber) {\n  {\n    // right value\n    Json json = Json{ Number(4.0f) };\n    get<Number>(json) = 15;\n    ASSERT_EQ(get<Number>(json), 15);\n  }\n\n  {\n    // left value ref\n    Json json = Json{ Number(4.0f) };\n    Number::Float& ref = get<Number>(json);\n    ref = 15;\n    ASSERT_EQ(get<Number>(json), 15);\n  }\n\n  {\n    // left value\n    Json json = Json{ Number(4.0f) };\n    double value = get<Number>(json);\n    ASSERT_EQ(value, 4);\n    value = 15;  // NOLINT\n    ASSERT_EQ(get<Number>(json), 4);\n  }\n\n  {\n    Json value {Number(std::numeric_limits<float>::quiet_NaN())};\n    ASSERT_TRUE(IsA<Number>(value));\n  }\n}\n\nTEST(Json, AssigningString) {\n  {\n    // right value\n    Json json = Json{ String(\"str\") };\n    get<String>(json) = \"modified\";\n    ASSERT_EQ(get<String>(json), \"modified\");\n  }\n\n  {\n    // left value ref\n    Json json = Json{ String(\"str\") };\n    std::string& ref = get<String>(json);\n    ref = \"modified\";\n    ASSERT_EQ(get<String>(json), \"modified\");\n  }\n\n  {\n    // left value\n    Json json = Json{ String(\"str\") };\n    std::string value = get<String>(json);\n    value = \"modified\";\n    ASSERT_EQ(get<String>(json), \"str\");\n  }\n}\n\nTEST(Json, LoadDump) {\n  std::string ori_buffer = GetModelStr();\n  Json origin{Json::Load(StringView{ori_buffer.c_str(), ori_buffer.size()})};\n\n  common::TemporaryDirectory tempdir;\n  auto const& path = tempdir.Path() / \"test_model_dump\";\n\n  std::string out;\n  Json::Dump(origin, &out);\n\n  std::ofstream fout(path);\n  ASSERT_TRUE(fout);\n  fout << out << std::flush;\n\n  std::vector<char> new_buffer = common::LoadSequentialFile(path.string());\n\n  Json load_back{Json::Load(StringView(new_buffer.data(), new_buffer.size()))};\n  ASSERT_EQ(load_back, origin);\n}\n\nTEST(Json, Invalid) {\n  {\n    std::string str = \"}\";\n    bool has_thrown = false;\n    try {\n      Json load{Json::Load(StringView(str.c_str(), str.size()))};\n    } catch (dmlc::Error const &e) {\n      std::string msg = e.what();\n      ASSERT_NE(msg.find(\"Unknown\"), std::string::npos);\n      has_thrown = true;\n    };\n    ASSERT_TRUE(has_thrown);\n  }\n  {\n    std::string str = R\"json({foo)json\";\n    bool has_thrown = false;\n    try {\n      Json load{Json::Load(StringView(str.c_str(), str.size()))};\n    } catch (dmlc::Error const &e) {\n      std::string msg = e.what();\n      ASSERT_NE(msg.find(\"position: 1\"), std::string::npos);\n      has_thrown = true;\n    };\n    ASSERT_TRUE(has_thrown);\n  }\n  {\n    std::string str = R\"json({\"foo\")json\";\n    bool has_thrown = false;\n    try {\n      Json load{Json::Load(StringView(str.c_str(), str.size()))};\n    } catch (dmlc::Error const& e) {\n      std::string msg = e.what();\n      // EOF is printed as 255 on s390x\n      ASSERT_TRUE(msg.find(\"EOF\") != std::string::npos || msg.find(\"255\") != std::string::npos);\n      has_thrown = true;\n    };\n    ASSERT_TRUE(has_thrown);\n  }\n}\n\n// For now Json is quite ignorance about unicode.\nTEST(Json, CopyUnicode) {\n  std::string json_str = R\"json(\n{\"m\": [\"\\ud834\\udd1e\", \"\\u20ac\", \"\\u0416\", \"\\u00f6\"]}\n)json\";\n  Json loaded {Json::Load(StringView{json_str.c_str(), json_str.size()})};\n\n  std::string dumped_string;\n  Json::Dump(loaded, &dumped_string);\n\n  ASSERT_NE(dumped_string.find(\"\\\\u20ac\"), std::string::npos);\n}\n\nTEST(Json, WrongCasts) {\n  {\n    Json json = Json{ String{\"str\"} };\n    ASSERT_ANY_THROW(get<Number>(json));\n  }\n  {\n    Json json = Json{ Array{ std::vector<Json>{ Json{ Number{1.0f} } } } };\n    ASSERT_ANY_THROW(get<Number>(json));\n  }\n  {\n    Json json = Json{Object{{{\"key\", Json{String{\"value\"}}}}}};\n    ASSERT_ANY_THROW(get<Number>(json));\n  }\n}\n\nTEST(Json, Integer) {\n  for (int64_t i = 1; i < 10000; i *= 10) {\n    auto ten = Json{Integer{i}};\n    std::string str;\n    Json::Dump(ten, &str);\n    ASSERT_EQ(str, std::to_string(i));\n  }\n}\n\nTEST(Json, IntVSFloat) {\n  // If integer is parsed as float, calling `get<Integer>()' will throw.\n  {\n    std::string str = R\"json(\n{\n  \"number\": 123.4,\n  \"integer\": 123\n})json\";\n\n    Json obj = Json::Load({str.c_str(), str.size()});\n    JsonNumber::Float number = get<Number>(obj[\"number\"]);\n    ASSERT_NEAR(number, 123.4f, kRtEps);\n    JsonInteger::Int integer = get<Integer>(obj[\"integer\"]);\n    ASSERT_EQ(integer, 123);\n  }\n\n  {\n    std::string str = R\"json(\n{\"data\": [2503595760, false], \"shape\": [10]}\n)json\";\n    Json obj = Json::Load({str.c_str(), str.size()});\n    auto array = get<Array>(obj[\"data\"]);\n    auto ptr = get<Integer>(array[0]);\n    ASSERT_EQ(ptr, 2503595760);\n  }\n}\n\nnamespace {\nvoid TestRroundTrip(std::ios::openmode mode) {\n  uint32_t i = 0;\n  SimpleLCG rng;\n  SimpleRealUniformDistribution<float> dist(1.0f, 4096.0f);\n\n  while (i <= std::numeric_limits<uint32_t>::max()) {\n    float f;\n    std::memcpy(&f, &i, sizeof(f));\n\n    Json jf{f};\n    std::string str;\n    Json::Dump(jf, &str, mode);\n    auto loaded = Json::Load(StringView{str}, mode);\n    if (XGBOOST_EXPECT(std::isnan(f), false)) {\n      ASSERT_TRUE(std::isnan(get<Number const>(loaded)));\n    } else {\n      ASSERT_EQ(get<Number const>(loaded), f);\n    }\n\n    auto t = i;\n    i += static_cast<uint32_t>(dist(&rng));\n    if (i < t) {\n      break;\n    }\n  }\n}\n}  // namespace\n\nTEST(Json, RoundTrip) {\n  TestRroundTrip(std::ios::out);\n  TestRroundTrip(std::ios::binary);\n}\n\nTEST(Json, DISABLED_RoundTripExhaustive) {\n  auto test = [](uint32_t i) {\n    float f;\n    std::memcpy(&f, &i, sizeof(f));\n\n    Json jf{f};\n    std::string str;\n    Json::Dump(jf, &str);\n    auto loaded = Json::Load({str.c_str(), str.size()});\n    if (XGBOOST_EXPECT(std::isnan(f), false)) {\n      EXPECT_TRUE(std::isnan(get<Number const>(loaded)));\n    } else {\n      EXPECT_EQ(get<Number const>(loaded), f);\n    }\n  };\n  int64_t int32_max = static_cast<int64_t>(std::numeric_limits<uint32_t>::max());\n  Context ctx;\n  common::ParallelFor(int32_max, ctx.Threads(), [&](auto i) { test(static_cast<uint32_t>(i)); });\n}\n\nTEST(Json, TypedArray) {\n  size_t n = 16;\n  F32Array f32{n};\n  std::iota(f32.GetArray().begin(), f32.GetArray().end(), -8);\n  I8Array i8{n};\n  std::iota(i8.GetArray().begin(), i8.GetArray().end(), 0);\n  U8Array u8{n};\n  std::iota(u8.GetArray().begin(), u8.GetArray().end(), 0);\n  I32Array i16{n};\n  std::iota(i16.GetArray().begin(), i16.GetArray().end(), -8);\n  I32Array i32{n};\n  std::iota(i32.GetArray().begin(), i32.GetArray().end(), -8);\n  I64Array i64{n};\n  std::iota(i64.GetArray().begin(), i64.GetArray().end(), -8);\n\n  Json json{Object{}};\n  json[\"u8\"] = std::move(u8);\n  ASSERT_TRUE(IsA<U8Array>(json[\"u8\"]));\n  json[\"i8\"] = std::move(i8);\n  ASSERT_TRUE(IsA<I8Array>(json[\"i8\"]));\n  json[\"f32\"] = std::move(f32);\n  ASSERT_TRUE(IsA<F32Array>(json[\"f32\"]));\n  json[\"i16\"] = std::move(i16);\n  ASSERT_TRUE(IsA<I32Array>(json[\"i16\"]));\n  json[\"i32\"] = std::move(i32);\n  ASSERT_TRUE(IsA<I32Array>(json[\"i32\"]));\n  json[\"i64\"] = std::move(i64);\n  ASSERT_TRUE(IsA<I64Array>(json[\"i64\"]));\n\n  std::string str;\n  Json::Dump(json, &str);\n  {\n    auto loaded = Json::Load(StringView{str});\n    // for text output there's no typed array.\n    ASSERT_TRUE(IsA<Array>(loaded[\"u8\"]));\n    auto const& arr = loaded[\"f32\"];\n    for (int32_t i = -8; i < 8; ++i) {\n      ASSERT_EQ(get<Number>(arr[i + 8]), i);\n    }\n  }\n\n  std::string binary;\n  Json::Dump(json, &binary, std::ios::binary);\n  {\n    auto loaded = Json::Load(StringView{binary}, std::ios::binary);\n    ASSERT_TRUE(IsA<U8Array>(loaded[\"u8\"]));\n    auto const& arr = get<F32Array>(loaded[\"f32\"]);\n    for (int32_t i = -8; i < 8; ++i) {\n      ASSERT_EQ(arr[i + 8], i);\n    }\n\n    ASSERT_TRUE(IsA<I8Array>(loaded[\"i8\"])) << loaded[\"i8\"].GetValue().TypeStr();\n    auto const& i8_arr = get<I8Array>(loaded[\"i8\"]);\n    for (decltype(n) i = 0; i < n; ++i) {\n      ASSERT_EQ(i8_arr[i], i);\n    }\n  }\n\n  {\n    Json f64{Object{}};\n    auto array = F64Array();\n    auto& vec = array.GetArray();\n    // Construct test data\n    vec.resize(18);\n    std::iota(vec.begin(), vec.end(), 0.0);\n    // special values\n    vec.push_back(std::numeric_limits<double>::epsilon());\n    vec.push_back(std::numeric_limits<double>::max());\n    vec.push_back(std::numeric_limits<double>::min());\n    vec.push_back(std::numeric_limits<double>::denorm_min());\n    vec.push_back(std::numeric_limits<double>::quiet_NaN());\n\n    static_assert(\n        std::is_same_v<double, typename std::remove_reference_t<decltype(vec)>::value_type>);\n\n    f64[\"f64\"] = std::move(array);\n    ASSERT_TRUE(IsA<F64Array>(f64[\"f64\"]));\n    std::vector<char> out;\n    Json::Dump(f64, &out, std::ios::binary);\n\n    auto loaded = Json::Load(StringView{out.data(), out.size()}, std::ios::binary);\n    ASSERT_TRUE(IsA<F64Array>(loaded[\"f64\"]));\n    auto const& result = get<F64Array const>(loaded[\"f64\"]);\n\n    auto& vec1 = get<F64Array const>(f64[\"f64\"]);\n    ASSERT_EQ(result.size(), vec1.size());\n    for (std::size_t i = 0; i < vec1.size() - 1; ++i) {\n      ASSERT_EQ(result[i], vec1[i]);\n    }\n    ASSERT_TRUE(std::isnan(result.back()));\n  }\n}\n\nTEST(UBJson, Basic) {\n  auto run_test = [](StringView str) {\n    auto json = Json::Load(str);\n    std::vector<char> stream;\n    UBJWriter writer{&stream};\n    Json::Dump(json, &writer);\n    {\n      std::ofstream fout{\"test.ubj\", std::ios::binary | std::ios::out};\n      fout.write(stream.data(), stream.size());\n    }\n\n    auto data = common::LoadSequentialFile(\"test.ubj\");\n    UBJReader reader{StringView{data.data(), data.size()}};\n    json = reader.Load();\n    return json;\n  };\n  {\n    // empty\n    auto ret = run_test(R\"({})\");\n    std::stringstream ss;\n    ss << ret;\n    ASSERT_EQ(ss.str(), \"{}\");\n  }\n  {\n    auto ret = run_test(R\"({\"\":[]})\");\n    std::stringstream ss;\n    ss << ret;\n    ASSERT_EQ(ss.str(), R\"({\"\":[]})\");\n  }\n  {\n    // basic\n    auto ret = run_test(R\"({\"test\": [2.71, 3.14, Infinity]})\");\n    ASSERT_TRUE(std::isinf(get<Number>(get<Array>(ret[\"test\"])[2])));\n    ASSERT_FLOAT_EQ(3.14, get<Number>(get<Array>(ret[\"test\"])[1]));\n    ASSERT_FLOAT_EQ(2.71, get<Number>(get<Array>(ret[\"test\"])[0]));\n  }\n  {\n    // boolean\n    Json boolean{Object{}};\n    boolean[\"foo\"] = Boolean{false};\n    std::vector<char> out;\n    Json::Dump(boolean, &out, std::ios::binary);\n    auto loaded = Json::Load(StringView{out.data(), out.size()}, std::ios::binary);\n\n    ASSERT_EQ(boolean, loaded);\n\n    boolean[\"foo\"] = Boolean{true};\n    Json::Dump(boolean, &out, std::ios::binary);\n    loaded = Json::Load(StringView{out.data(), out.size()}, std::ios::binary);\n    ASSERT_EQ(boolean, loaded);\n  }\n}\n\n\nTEST(Json, TypeCheck) {\n  Json config{Object{}};\n  config[\"foo\"] = String{\"bar\"};\n  auto test = [&]() { TypeCheck<Number, Integer, Array, I32Array>(config[\"foo\"], \"foo\"); };\n  ASSERT_THROW({ test(); }, dmlc::Error);\n  try {\n    test();\n  } catch (dmlc::Error const& e) {\n    auto err = std::string{e.what()};\n    ASSERT_NE(err.find(\"Number\"), std::string::npos);\n    ASSERT_NE(err.find(\"I32Array\"), std::string::npos);\n    ASSERT_NE(err.find(\"foo\"), std::string::npos);\n  }\n}\n\nTEST(Json, Dump) {\n  auto str = GetModelStr();\n  auto jobj = Json::Load(str);\n  std::string result_s = Json::Dump(jobj);\n\n  std::vector<char> result_v = Json::Dump<std::vector<char>>(jobj);\n  ASSERT_EQ(result_s.size(), result_v.size());\n  for (std::size_t i = 0; i < result_s.size(); ++i) {\n    ASSERT_EQ(result_s[i], result_v[i]);\n  }\n}\n\nTEST(Json, NonNullTerminated) {\n  // garbage at the end, not terminated by \\0\n  std::vector<char> str{'{', '\"', 'a', '\"', ':', '\"', 'b', '\"', '}', 'c', 'c'};\n  auto jobj = Json::Load(StringView{str.data(), str.size()});\n  ASSERT_EQ(get<String const>(jobj[\"a\"]), \"b\");\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/common/test_linalg.cc",
    "content": "/**\n * Copyright 2021-2026, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/context.h>\n#include <xgboost/host_device_vector.h>  // for HostDeviceVector\n#include <xgboost/linalg.h>\n\n#include <cstddef>  // size_t\n#include <numeric>  // iota\n#include <vector>   // for vector\n\n#include \"../../../src/common/linalg_op.h\"\n#include \"test_linalg.h\"  // for TestLinalgDispatch\n\nnamespace xgboost::linalg {\nnamespace {\nDeviceOrd CPU() { return DeviceOrd::CPU(); }\n\ntemplate <typename T>\nvoid ConstView(linalg::VectorView<T> v1, linalg::VectorView<std::add_const_t<T>> v2) {\n  // compile test for being able to pass non-const view to const view.\n  auto s = v1.Slice(linalg::All());\n  ASSERT_EQ(s.Size(), v1.Size());\n  auto s2 = v2.Slice(linalg::All());\n  ASSERT_EQ(s2.Size(), v2.Size());\n}\n}  // namespace\n\nauto MakeMatrixFromTest(HostDeviceVector<float> *storage, std::size_t n_rows, std::size_t n_cols) {\n  storage->Resize(n_rows * n_cols);\n  auto &h_storage = storage->HostVector();\n\n  std::iota(h_storage.begin(), h_storage.end(), 0);\n\n  auto m = linalg::TensorView<float, 2>{h_storage, {n_rows, static_cast<size_t>(n_cols)}, CPU()};\n  return m;\n}\n\nTEST(Linalg, MatrixView) {\n  size_t kRows = 31, kCols = 77;\n  HostDeviceVector<float> storage;\n  auto m = MakeMatrixFromTest(&storage, kRows, kCols);\n  ASSERT_EQ(m.Device(), CPU());\n  ASSERT_EQ(m(0, 0), 0);\n  ASSERT_EQ(m(kRows - 1, kCols - 1), storage.Size() - 1);\n}\n\nTEST(Linalg, VectorView) {\n  size_t kRows = 31, kCols = 77;\n  HostDeviceVector<float> storage;\n  auto m = MakeMatrixFromTest(&storage, kRows, kCols);\n  auto v = m.Slice(linalg::All(), 3);\n  for (size_t i = 0; i < v.Size(); ++i) {\n    ASSERT_EQ(v(i), m(i, 3));\n  }\n\n  ASSERT_EQ(v(0), 3);\n}\n\nTEST(Linalg, TensorView) {\n  Context ctx;\n  std::vector<double> data(2 * 3 * 4, 0);\n  std::iota(data.begin(), data.end(), 0);\n\n  auto t = MakeTensorView(&ctx, data, 2, 3, 4);\n  ASSERT_EQ(t.Shape()[0], 2);\n  ASSERT_EQ(t.Shape()[1], 3);\n  ASSERT_EQ(t.Shape()[2], 4);\n\n  float v = t(0, 1, 2);\n  ASSERT_EQ(v, 6);\n\n  auto s = t.Slice(1, All(), All());\n  ASSERT_EQ(s.Shape().size(), 2);\n  ASSERT_EQ(s.Shape()[0], 3);\n  ASSERT_EQ(s.Shape()[1], 4);\n\n  std::vector<std::vector<double>> sol{\n      {12.0, 13.0, 14.0, 15.0}, {16.0, 17.0, 18.0, 19.0}, {20.0, 21.0, 22.0, 23.0}};\n  for (size_t i = 0; i < s.Shape()[0]; ++i) {\n    for (size_t j = 0; j < s.Shape()[1]; ++j) {\n      ASSERT_EQ(s(i, j), sol[i][j]);\n    }\n  }\n\n  {\n    // as vector\n    TensorView<double, 1> vec{data, {data.size()}, CPU()};\n    ASSERT_EQ(vec.Size(), data.size());\n    ASSERT_EQ(vec.Shape(0), data.size());\n    ASSERT_EQ(vec.Shape().size(), 1);\n    for (size_t i = 0; i < data.size(); ++i) {\n      ASSERT_EQ(vec(i), data[i]);\n    }\n  }\n\n  {\n    // as matrix\n    TensorView<double, 2> mat(data, {6, 4}, CPU());\n    auto s = mat.Slice(2, All());\n    ASSERT_EQ(s.Shape().size(), 1);\n    s = mat.Slice(All(), 1);\n    ASSERT_EQ(s.Shape().size(), 1);\n  }\n\n  {\n    // assignment\n    TensorView<double, 3> t{data, {2, 3, 4}, CPU()};\n    double pi = 3.14159;\n    auto old = t(1, 2, 3);\n    t(1, 2, 3) = pi;\n    ASSERT_EQ(t(1, 2, 3), pi);\n    t(1, 2, 3) = old;\n    ASSERT_EQ(t(1, 2, 3), old);\n  }\n\n  {\n    // Don't assign the initial dimension, tensor should be able to deduce the correct dim\n    // for Slice.\n    static_assert(decltype(MakeTensorView(&ctx, data, 2, 3, 4).Slice(1, 2, All()))::kDimension ==\n                  1);\n  }\n  {\n    auto t = MakeTensorView(&ctx, data, 2, 3, 4);\n    auto s = t.Slice(1, linalg::All(), 1);\n    ASSERT_EQ(s(0), 13);\n    ASSERT_EQ(s(1), 17);\n    ASSERT_EQ(s(2), 21);\n  }\n  {\n    // range slice\n    auto t = MakeTensorView(&ctx, data, 2, 3, 4);\n    auto s = t.Slice(linalg::All(), linalg::Range(1, 3), 2);\n    static_assert(decltype(s)::kDimension == 2);\n    std::vector<double> sol{6, 10, 18, 22};\n    auto k = 0;\n    for (size_t i = 0; i < s.Shape(0); ++i) {\n      for (size_t j = 0; j < s.Shape(1); ++j) {\n        ASSERT_EQ(s(i, j), sol.at(k));\n        k++;\n      }\n    }\n    ASSERT_FALSE(s.CContiguous());\n  }\n  {\n    // range slice\n    auto t = MakeTensorView(&ctx, data, 2, 3, 4);\n    auto s = t.Slice(1, linalg::Range(1, 3), linalg::Range(1, 3));\n    static_assert(decltype(s)::kDimension == 2);\n    std::vector<double> sol{17, 18, 21, 22};\n    auto k = 0;\n    for (size_t i = 0; i < s.Shape(0); ++i) {\n      for (size_t j = 0; j < s.Shape(1); ++j) {\n        ASSERT_EQ(s(i, j), sol.at(k));\n        k++;\n      }\n    }\n    ASSERT_FALSE(s.CContiguous());\n  }\n  {\n    // same as no slice.\n    auto t = MakeTensorView(&ctx, data, 2, 3, 4);\n    auto s = t.Slice(linalg::All(), linalg::Range(0, 3), linalg::Range(0, 4));\n    static_assert(decltype(s)::kDimension == 3);\n    auto all = t.Slice(linalg::All(), linalg::All(), linalg::All());\n    for (size_t i = 0; i < s.Shape(0); ++i) {\n      for (size_t j = 0; j < s.Shape(1); ++j) {\n        for (size_t k = 0; k < s.Shape(2); ++k) {\n          ASSERT_EQ(s(i, j, k), all(i, j, k));\n        }\n      }\n    }\n    ASSERT_TRUE(s.CContiguous());\n    ASSERT_TRUE(all.CContiguous());\n  }\n\n  {\n    // copy and move constructor.\n    auto t = MakeTensorView(&ctx, data, 2, 3, 4);\n    auto from_copy = t;\n    auto from_move = std::move(t);\n    for (size_t i = 0; i < t.Shape().size(); ++i) {\n      ASSERT_EQ(from_copy.Shape(i), from_move.Shape(i));\n      ASSERT_EQ(from_copy.Stride(i), from_copy.Stride(i));\n    }\n  }\n\n  {\n    // multiple slices\n    auto t = MakeTensorView(&ctx, data, 2, 3, 4);\n    auto s_0 = t.Slice(linalg::All(), linalg::Range(0, 2), linalg::Range(1, 4));\n    ASSERT_FALSE(s_0.CContiguous());\n    auto s_1 = s_0.Slice(1, 1, linalg::Range(0, 2));\n    ASSERT_EQ(s_1.Size(), 2);\n    ASSERT_TRUE(s_1.CContiguous());\n    ASSERT_TRUE(s_1.Contiguous());\n    ASSERT_EQ(s_1(0), 17);\n    ASSERT_EQ(s_1(1), 18);\n\n    auto s_2 = s_0.Slice(1, linalg::All(), linalg::Range(0, 2));\n    std::vector<double> sol{13, 14, 17, 18};\n    auto k = 0;\n    for (size_t i = 0; i < s_2.Shape(0); i++) {\n      for (size_t j = 0; j < s_2.Shape(1); ++j) {\n        ASSERT_EQ(s_2(i, j), sol[k]);\n        k++;\n      }\n    }\n  }\n  {\n    // f-contiguous\n    TensorView<double, 3> t{data, {4, 3, 2}, {1, 4, 12}, CPU()};\n    ASSERT_TRUE(t.Contiguous());\n    ASSERT_TRUE(t.FContiguous());\n    ASSERT_FALSE(t.CContiguous());\n  }\n  {\n    // const\n    TensorView<double, 1> t{data, {data.size()}, CPU()};\n    ConstView(t, t);\n  }\n}\n\nTEST(Linalg, Tensor) {\n  {\n    Tensor<float, 3> t{{2, 3, 4}, CPU(), Order::kC};\n    auto view = t.View(CPU());\n\n    auto const &as_const = t;\n    auto k_view = as_const.View(CPU());\n\n    size_t n = 2 * 3 * 4;\n    ASSERT_EQ(t.Size(), n);\n    ASSERT_TRUE(\n        std::equal(k_view.Values().cbegin(), k_view.Values().cend(), view.Values().cbegin()));\n\n    Tensor<float, 3> t_0{std::move(t)};\n    ASSERT_EQ(t_0.Size(), n);\n    ASSERT_EQ(t_0.Shape(0), 2);\n    ASSERT_EQ(t_0.Shape(1), 3);\n    ASSERT_EQ(t_0.Shape(2), 4);\n  }\n  {\n    // Reshape\n    Tensor<float, 3> t{{2, 3, 4}, CPU(), Order::kC};\n    t.Reshape(4, 3, 2);\n    ASSERT_EQ(t.Size(), 24);\n    ASSERT_EQ(t.Shape(2), 2);\n    t.Reshape(1);\n    ASSERT_EQ(t.Size(), 1);\n    t.Reshape(0, 0, 0);\n    ASSERT_EQ(t.Size(), 0);\n    t.Reshape(0, 3, 0);\n    ASSERT_EQ(t.Size(), 0);\n    ASSERT_EQ(t.Shape(1), 3);\n    t.Reshape(3, 3, 3);\n    ASSERT_EQ(t.Size(), 27);\n  }\n}\n\nTEST(Linalg, Empty) {\n  {\n    auto t = TensorView<double, 2>{{}, {0, 3}, CPU(), Order::kC};\n    for (int32_t i : {0, 1, 2}) {\n      auto s = t.Slice(All(), i);\n      ASSERT_EQ(s.Size(), 0);\n      ASSERT_EQ(s.Shape().size(), 1);\n      ASSERT_EQ(s.Shape(0), 0);\n    }\n  }\n  {\n    auto t = Tensor<double, 2>{{0, 3}, CPU(), Order::kC};\n    ASSERT_EQ(t.Size(), 0);\n    auto view = t.View(CPU());\n\n    for (int32_t i : {0, 1, 2}) {\n      auto s = view.Slice(All(), i);\n      ASSERT_EQ(s.Size(), 0);\n      ASSERT_EQ(s.Shape().size(), 1);\n      ASSERT_EQ(s.Shape(0), 0);\n    }\n  }\n}\n\nTEST(Linalg, ArrayInterface) {\n  auto cpu = CPU();\n  auto t = Tensor<double, 2>{{3, 3}, cpu, Order::kC};\n  auto v = t.View(cpu);\n  std::iota(v.Values().begin(), v.Values().end(), 0);\n  auto arr = Json::Load(StringView{ArrayInterfaceStr(v)});\n  ASSERT_EQ(get<Integer>(arr[\"shape\"][0]), 3);\n  ASSERT_EQ(get<Integer>(arr[\"strides\"][0]), 3 * sizeof(double));\n\n  ASSERT_FALSE(get<Boolean>(arr[\"data\"][1]));\n  ASSERT_EQ(reinterpret_cast<double *>(get<Integer>(arr[\"data\"][0])), v.Values().data());\n\n  TensorView<double const, 2> as_const = v;\n  auto const_arr = ArrayInterface(as_const);\n  ASSERT_TRUE(get<Boolean>(const_arr[\"data\"][1]));\n}\n\nTEST(Linalg, Popc) {\n  {\n    uint32_t v{0};\n    ASSERT_EQ(detail::NativePopc(v), 0);\n    ASSERT_EQ(detail::Popc(v), 0);\n    v = 1;\n    ASSERT_EQ(detail::NativePopc(v), 1);\n    ASSERT_EQ(detail::Popc(v), 1);\n    v = 0xffffffff;\n    ASSERT_EQ(detail::NativePopc(v), 32);\n    ASSERT_EQ(detail::Popc(v), 32);\n  }\n  {\n    uint64_t v{0};\n    ASSERT_EQ(detail::NativePopc(v), 0);\n    ASSERT_EQ(detail::Popc(v), 0);\n    v = 1;\n    ASSERT_EQ(detail::NativePopc(v), 1);\n    ASSERT_EQ(detail::Popc(v), 1);\n    v = 0xffffffff;\n    ASSERT_EQ(detail::NativePopc(v), 32);\n    ASSERT_EQ(detail::Popc(v), 32);\n    v = 0xffffffffffffffff;\n    ASSERT_EQ(detail::NativePopc(v), 64);\n    ASSERT_EQ(detail::Popc(v), 64);\n  }\n}\n\nTEST(Linalg, Stack) {\n  Tensor<float, 3> l{{2, 3, 4}, CPU(), Order::kC};\n  cpu_impl::TransformIdxKernel(l.View(CPU()), omp_get_max_threads(),\n                               [=](size_t i, float) { return i; });\n  Tensor<float, 3> r_0{{2, 3, 4}, CPU(), Order::kC};\n  cpu_impl::TransformIdxKernel(r_0.View(CPU()), omp_get_max_threads(),\n                               [=](size_t i, float) { return i; });\n\n  Stack(&l, r_0);\n\n  Tensor<float, 3> r_1{{0, 3, 4}, CPU(), Order::kC};\n  Stack(&l, r_1);\n  ASSERT_EQ(l.Shape(0), 4);\n\n  Stack(&r_1, l);\n  ASSERT_EQ(r_1.Shape(0), l.Shape(0));\n}\n\nTEST(Linalg, FOrder) {\n  std::size_t constexpr kRows = 16, kCols = 3;\n  std::vector<float> data(kRows * kCols);\n  MatrixView<float> mat{data, {kRows, kCols}, CPU(), Order::kF};\n  float k{0};\n  for (std::size_t i = 0; i < kRows; ++i) {\n    for (std::size_t j = 0; j < kCols; ++j) {\n      mat(i, j) = k;\n      k++;\n    }\n  }\n  auto column = mat.Slice(linalg::All(), 1);\n  ASSERT_TRUE(column.FContiguous());\n  ASSERT_EQ(column.Stride(0), 1);\n  ASSERT_TRUE(column.CContiguous());\n  k = 1;\n  for (auto it = linalg::cbegin(column); it != linalg::cend(column); ++it) {\n    ASSERT_EQ(*it, k);\n    k += kCols;\n  }\n  k = 1;\n  auto ptr = column.Values().data();\n  for (auto it = ptr; it != ptr + kRows; ++it) {\n    ASSERT_EQ(*it, k);\n    k += kCols;\n  }\n}\n\nTEST(Linalg, IO) {\n  std::vector<double> data(128, 0);\n  std::iota(data.begin(), data.end(), 0.0f);\n  Vector<double> vec(data.begin(), data.end(), {data.size()}, DeviceOrd::CPU());\n  Json jvec{F32Array{}};\n  SaveVector(vec, &jvec);\n\n  auto check = [&data](linalg::Vector<double> const &loaded) {\n    ASSERT_EQ(loaded.Size(), data.size());\n    for (std::size_t i = 0; i < data.size(); ++i) {\n      ASSERT_NEAR(data[i], loaded(i), kRtEps);\n    }\n  };\n\n  {\n    auto str = Json::Dump(jvec);\n    auto jloaded = Json::Load(StringView{str});\n\n    Vector<double> loaded;\n    LoadVector(jloaded, &loaded);\n    check(loaded);\n  }\n  {\n    Vector<double> loaded;\n    LoadVector(jvec, &loaded);\n    check(loaded);\n  }\n  {\n    std::vector<char> str;\n    Json::Dump(jvec, &str, std::ios::binary);\n    auto jloaded = Json::Load(StringView{str.data(), str.size()}, std::ios::binary);\n\n    Vector<double> loaded;\n    LoadVector(jloaded, &loaded);\n    check(loaded);\n  }\n}\n\nTEST(Linalg, CpuDispatch) {\n  Context ctx;\n  TestLinalgDispatch(&ctx, [](auto v) { return v + 1; });\n}\n\nTEST(Linalg, ExpandDim) {\n  Context ctx;\n  linalg::Matrix<float> x = Zeros<float>(&ctx, 16, 8);\n  std::size_t i = 0;\n  for (auto &v : x.HostView()) {\n    v = static_cast<float>(i);\n    ++i;\n  }\n  auto y = x.Slice(linalg::All(), 2);\n  auto z = ExpandDim(y);\n  ASSERT_EQ(z.Size(), x.Shape(0));\n  ASSERT_EQ(z.Shape(0), x.Shape(0));\n  ASSERT_EQ(z.Shape(1), 1);\n  for (std::size_t i = 0; i < z.Size(); ++i) {\n    ASSERT_EQ(z(i, 0), y(i));\n  }\n}\n}  // namespace xgboost::linalg\n"
  },
  {
    "path": "tests/cpp/common/test_linalg.cu",
    "content": "/**\n * Copyright 2021-2025, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <thrust/equal.h>                       // for equal\n#include <thrust/iterator/constant_iterator.h>  // for make_constant_iterator\n#include <thrust/sequence.h>                    // for sequence\n\n#include \"../../../src/common/cuda_context.cuh\"\n#include \"../../../src/common/linalg_op.h\"\n#include \"../../../src/common/optional_weight.h\"  // for MakeOptionalWeights\n#include \"../helpers.h\"\n#include \"test_linalg.h\"     // for TestLinalgDispatch\n#include \"thrust/random.h\"   // for default_random_engine\n#include \"thrust/shuffle.h\"  // for shuffle\n#include \"xgboost/context.h\"\n#include \"xgboost/linalg.h\"\n\nnamespace xgboost::linalg {\nnamespace {\nvoid TestElementWiseKernel() {\n  auto ctx = MakeCUDACtx(0);\n  auto device = ctx.Device();\n  Tensor<float, 3> l{{2, 3, 4}, device};\n  {\n    /**\n     * Non-contiguous\n     */\n    // GPU view\n    auto t = l.View(device).Slice(linalg::All(), 1, linalg::All());\n    ASSERT_FALSE(t.CContiguous());\n    cuda_impl::TransformIdxKernel(&ctx, t, [] XGBOOST_DEVICE(std::size_t i, float) { return i; });\n    // CPU view\n    t = l.View(DeviceOrd::CPU()).Slice(linalg::All(), 1, linalg::All());\n    std::size_t k = 0;\n    for (size_t i = 0; i < l.Shape(0); ++i) {\n      for (size_t j = 0; j < l.Shape(2); ++j) {\n        ASSERT_EQ(k++, t(i, j));\n      }\n    }\n\n    t = l.View(device).Slice(linalg::All(), 1, linalg::All());\n    cuda_impl::ElementWiseKernel(\n        t, [=] XGBOOST_DEVICE(std::size_t i, std::size_t j) mutable { t(i, j) = i + j; });\n\n    t = l.Slice(linalg::All(), 1, linalg::All());\n    for (size_t i = 0; i < l.Shape(0); ++i) {\n      for (size_t j = 0; j < l.Shape(2); ++j) {\n        ASSERT_EQ(i + j, t(i, j));\n      }\n    }\n  }\n\n  {\n    /**\n     * Contiguous\n     */\n    auto t = l.View(device);\n    cuda_impl::TransformIdxKernel(&ctx, t, [] XGBOOST_DEVICE(size_t i, float) { return i; });\n    ASSERT_TRUE(t.CContiguous());\n    // CPU view\n    t = l.View(DeviceOrd::CPU());\n\n    size_t ind = 0;\n    for (size_t i = 0; i < l.Shape(0); ++i) {\n      for (size_t j = 0; j < l.Shape(1); ++j) {\n        for (size_t k = 0; k < l.Shape(2); ++k) {\n          ASSERT_EQ(ind++, t(i, j, k));\n        }\n      }\n    }\n  }\n}\n\nvoid TestSlice() {\n  auto ctx = MakeCUDACtx(1);\n  thrust::device_vector<double> data(2 * 3 * 4);\n  auto t = MakeTensorView(&ctx, dh::ToSpan(data), 2, 3, 4);\n  dh::LaunchN(1, [=] __device__(size_t) {\n    auto s = t.Slice(linalg::All(), linalg::Range(0, 3), linalg::Range(0, 4));\n    auto all = t.Slice(linalg::All(), linalg::All(), linalg::All());\n    static_assert(decltype(s)::kDimension == 3);\n    for (size_t i = 0; i < s.Shape(0); ++i) {\n      for (size_t j = 0; j < s.Shape(1); ++j) {\n        for (size_t k = 0; k < s.Shape(2); ++k) {\n          SPAN_CHECK(s(i, j, k) == all(i, j, k));\n        }\n      }\n    }\n  });\n}\n\nvoid TestWriteAccess(CUDAContext const* cuctx, linalg::TensorView<double, 3> t) {\n  thrust::for_each(cuctx->CTP(), linalg::tbegin(t), linalg::tend(t),\n                   [=] XGBOOST_DEVICE(double& v) { v = 0; });\n  auto eq = thrust::equal(cuctx->CTP(), linalg::tcbegin(t), linalg::tcend(t),\n                          thrust::make_constant_iterator<double>(0.0), thrust::equal_to<>{});\n  ASSERT_TRUE(eq);\n}\n}  // anonymous namespace\n\nTEST(Linalg, GPUElementWise) { TestElementWiseKernel(); }\n\nTEST(Linalg, GPUTensorView) { TestSlice(); }\n\nTEST(Linalg, GPUIter) {\n  auto ctx = MakeCUDACtx(1);\n  auto cuctx = ctx.CUDACtx();\n\n  dh::device_vector<double> data(2 * 3 * 4);\n  thrust::sequence(cuctx->CTP(), data.begin(), data.end(), 1.0);\n\n  auto t = MakeTensorView(&ctx, dh::ToSpan(data), 2, 3, 4);\n  static_assert(!std::is_const_v<decltype(t)::element_type>);\n  static_assert(!std::is_const_v<decltype(t)::value_type>);\n\n  auto n = std::distance(linalg::tcbegin(t), linalg::tcend(t));\n  ASSERT_EQ(n, t.Size());\n  ASSERT_FALSE(t.Empty());\n\n  bool eq = thrust::equal(cuctx->CTP(), data.cbegin(), data.cend(), linalg::tcbegin(t));\n  ASSERT_TRUE(eq);\n\n  TestWriteAccess(cuctx, t);\n}\n\nTEST(Linalg, SmallHistogram) {\n  auto ctx = MakeCUDACtx(0);\n  // Generate random data with 4 bins and 32 elements for each bin.\n  std::size_t cnt = 32, n_bins = 4;\n  dh::device_vector<float> values(cnt * n_bins);\n  for (std::size_t i = 0; i < n_bins; ++i) {\n    thrust::fill_n(ctx.CUDACtx()->CTP(), values.begin() + i * cnt, cnt, i);\n  }\n  thrust::default_random_engine rng;\n  rng.seed(2025);\n  thrust::shuffle(ctx.CUDACtx()->CTP(), values.begin(), values.end(), rng);\n\n  linalg::MatrixView<float> indices =\n      linalg::MakeTensorView(&ctx, dh::ToSpan(values), values.size(), 1);\n  dh::CachingDeviceUVector<float> bins(n_bins);\n  HostDeviceVector<float> weights;\n  SmallHistogram(&ctx, indices, common::MakeOptionalWeights(ctx.Device(), weights),\n                 linalg::MakeTensorView(&ctx, dh::ToSpan(bins), bins.size()));\n\n  std::vector<float> h_bins(n_bins);\n  dh::safe_cuda(cudaMemcpyAsync(h_bins.data(), bins.data(), dh::ToSpan(bins).size_bytes(),\n                                cudaMemcpyDefault, ctx.CUDACtx()->Stream()));\n  for (std::size_t i = 0; i < n_bins; ++i) {\n    ASSERT_EQ(h_bins[i], cnt);\n  }\n}\nnamespace {\nvoid TestGpuDispatch() {\n  auto ctx = MakeCUDACtx(0);\n  TestLinalgDispatch(&ctx, [] XGBOOST_DEVICE(double v) { return v + 1; });\n}\n}  // namespace\nTEST(Linalg, GpuDispatch) { TestGpuDispatch(); }\n}  // namespace xgboost::linalg\n"
  },
  {
    "path": "tests/cpp/common/test_linalg.h",
    "content": "/**\n * Copyright 2025, XGBoost Contributors\n */\n#pragma once\n\n#include <gtest/gtest.h>\n#include <xgboost/context.h>\n#include <xgboost/linalg.h>  // for Vector\n\n#include <numeric>  // for iota\n#include <vector>   // for vector\n\n#include \"../../../src/common/linalg_op.h\"\n\nnamespace xgboost::linalg {\ntemplate <typename Fn>\nvoid TestLinalgDispatch(Context const* ctx, Fn&& fn) {\n  std::vector<double> data(128, 0);\n  std::iota(data.begin(), data.end(), 0.0);\n  Vector<double> vec(data.begin(), data.end(), {data.size()}, DeviceOrd::CPU());\n\n  TransformKernel(ctx, vec.View(ctx->Device()), [=] XGBOOST_DEVICE(double v) { return fn(v); });\n  auto h_v = vec.HostView();\n  for (std::size_t i = 0; i < h_v.Size(); ++i) {\n    ASSERT_EQ(h_v(i), fn(i));\n  }\n}\n}  // namespace xgboost::linalg\n"
  },
  {
    "path": "tests/cpp/common/test_math.cc",
    "content": "/**\n * Copyright 2025, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <numeric>  // for accumulate\n\n#include \"../../../src/common/math.h\"\n\nnamespace xgboost::common {\nTEST(Math, Softmax) {\n  std::vector<float> values{2.0f, 2.0f, 3.0f, 4.0f};\n\n  Softmax(values.begin(), values.end());\n  ASSERT_NEAR(std::accumulate(values.cbegin(), values.cend(), 0.0f), 1.0f, 1e-5f);\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/common/test_monitor.cc",
    "content": "#include <gtest/gtest.h>\n#include <xgboost/logging.h>\n#include <string>\n#include \"../../../src/common/timer.h\"\n\nnamespace xgboost {\nnamespace common {\nTEST(Monitor, Logging) {\n  auto run_monitor =\n      []() {\n        Monitor monitor_;\n        monitor_.Init(\"Monitor test\");\n        monitor_.Start(\"basic\");\n        monitor_.Stop(\"basic\");\n      };\n\n  Args args = {std::make_pair(\"verbosity\", \"3\")};\n  ConsoleLogger::Configure(args);\n  ASSERT_EQ(ConsoleLogger::GlobalVerbosity(), ConsoleLogger::LogVerbosity::kDebug);\n\n  testing::internal::CaptureStderr();\n  run_monitor();\n  std::string output = testing::internal::GetCapturedStderr();\n  ASSERT_NE(output.find(\"Monitor\"), std::string::npos);\n\n  // Monitor only prints messages when set to DEBUG.\n  args = {std::make_pair(\"verbosity\", \"2\")};\n  ConsoleLogger::Configure(args);\n  testing::internal::CaptureStderr();\n  run_monitor();\n  output = testing::internal::GetCapturedStderr();\n  ASSERT_EQ(output.size(), 0);\n}\n}  // namespace common\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/common/test_numa_topo.cc",
    "content": "/**\n * Copyright 2025, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n\n#include <filesystem>  // for path\n#include <fstream>     // for ofstream\n#include <vector>      // for vector\n\n#include \"../../../src/common/numa_topo.h\"\n#include \"../filesystem.h\"  // for TemporaryDirectory\n\nnamespace xgboost::common {\nnamespace {\nnamespace fs = std::filesystem;\n}\n\nTEST(Numa, CpuListParser) {\n  common::TemporaryDirectory tmpdir;\n  auto path = tmpdir.Path() / \"cpulist\";\n  std::vector<std::int32_t> cpus;\n\n  auto write = [&](auto const& cpulist) {\n    std::ofstream fout{path};\n    fout << cpulist;\n  };\n\n  {\n    std::string cpulist = R\"(1\n)\";\n    write(cpulist);\n    ReadCpuList(path, &cpus);\n    ASSERT_EQ(cpus[0], 1);\n    ASSERT_EQ(cpus.size(), 1);\n  }\n  {\n    std::string cpulist = R\"(2)\";\n    write(cpulist);\n    ReadCpuList(path, &cpus);\n    ASSERT_EQ(cpus.size(), 1);\n    ASSERT_EQ(cpus[0], 2);\n  }\n  {\n    std::string cpulist = R\"(2,3)\";\n    write(cpulist);\n    ReadCpuList(path, &cpus);\n    ASSERT_EQ(cpus.size(), 2);\n    ASSERT_EQ(cpus[0], 2);\n    ASSERT_EQ(cpus[1], 3);\n  }\n\n  auto check_4cpu_case = [&] {\n    ASSERT_EQ(cpus.size(), 4);\n    for (std::size_t i = 0; i < cpus.size(); ++i) {\n      ASSERT_EQ(cpus[i], static_cast<std::int32_t>(i));\n    }\n  };\n  {\n    std::string cpulist = R\"(0-3)\";\n    write(cpulist);\n    ReadCpuList(path, &cpus);\n    check_4cpu_case();\n  }\n  {\n    std::string cpulist = R\"(0-2,3)\";\n    write(cpulist);\n    ReadCpuList(path, &cpus);\n    check_4cpu_case();\n  }\n  {\n    std::string cpulist = R\"(0,1-3)\";\n    write(cpulist);\n    ReadCpuList(path, &cpus);\n    check_4cpu_case();\n  }\n  {\n    std::string cpulist = R\"(0,1-2,3)\";\n    write(cpulist);\n    ReadCpuList(path, &cpus);\n    check_4cpu_case();\n  }\n  {\n    std::string cpulist = R\"(0,1,2,3)\";\n    write(cpulist);\n    ReadCpuList(path, &cpus);\n    check_4cpu_case();\n  }\n  {\n    std::string cpulist = R\"(0,1,2-3)\";\n    write(cpulist);\n    ReadCpuList(path, &cpus);\n    check_4cpu_case();\n  }\n  {\n    std::string cpulist = R\"(0-1,2,3)\";\n    write(cpulist);\n    ReadCpuList(path, &cpus);\n    check_4cpu_case();\n  }\n  {\n    std::string cpulist = R\"(0-1,2-3)\";\n    write(cpulist);\n    ReadCpuList(path, &cpus);\n    check_4cpu_case();\n  }\n  {\n    auto path = tmpdir.Path() / \"foo\";\n    testing::internal::CaptureStderr();\n    ReadCpuList(path, &cpus);\n    std::string output = testing::internal::GetCapturedStderr();\n    ASSERT_TRUE(cpus.empty());\n    ASSERT_NE(output.find(\"foo\"), std::string::npos);\n  }\n}\n\nTEST(Numa, GetCpus) {\n  std::vector<std::int32_t> cpus;\n  if (GetNumaNumNodes() > 0) {\n    GetNumaNodeCpus(0, &cpus);\n    ASSERT_FALSE(cpus.empty());\n  } else {\n    GTEST_SKIP();\n  }\n}\n\nTEST(Numa, GetMaxNumNodes) {\n  auto n_nodes = GetNumaMaxNumNodes();\n#if defined(__linux__)\n  ASSERT_GE(n_nodes, 0);\n#else\n  ASSERT_EQ(n_nodes, -1);\n#endif  // defined(__linux__)\n}\n\nTEST(Numa, GetMemBind) {\n  // You can run this test with:\n  // numactl --membind=0 ./testxgboost --gtest_filter=\"Numa.GetMemBind\"\n  // or\n  // hwloc-bind --strict --membind node:0 ./testxgboost --gtest_filter=\"Numa.GetMemBind\"\n  // The strict flag is required.\n  [[maybe_unused]] auto bind = GetNumaMemBind();\n}\n\nTEST(Numa, GetNumNodes) {\n  auto n_nodes = GetNumaNumNodes();\n#if defined(__linux__)\n  ASSERT_GE(n_nodes, 1);\n#else\n  ASSERT_EQ(n_nodes, -1);\n#endif  // defined(__linux__)\n}\n\nTEST(Numa, GetHasCpuNodes) {\n  std::vector<std::int32_t> nodes;\n  GetNumaHasCpuNodes(&nodes);\n#if defined(__linux__)\n  ASSERT_GE(nodes.size(), 1);\n#else\n  ASSERT_EQ(nodes.size(), 0);\n#endif  // defined(__linux__)\n}\n\nTEST(Numa, GetHasNormalMemoryNodes) {\n  std::vector<std::int32_t> nodes;\n  GetNumaHasNormalMemoryNodes(&nodes);\n#if defined(__linux__)\n  ASSERT_GE(nodes.size(), 1);\n#else\n  ASSERT_EQ(nodes.size(), 0);\n#endif  // defined(__linux__)\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/common/test_numeric.cc",
    "content": "/**\n * Copyright 2022-2026, XGBoost contributors.\n */\n#include <gtest/gtest.h>\n\n#include <numeric>\n\n#include \"../../../src/common/numeric.h\"\n\nnamespace xgboost::common {\nTEST(Numeric, PartialSum) {\n  {\n    std::vector<size_t> values{1, 2, 3, 4};\n    std::vector<size_t> result(values.size() + 1);\n    Context ctx;\n    PartialSum(ctx.Threads(), values.begin(), values.end(), static_cast<size_t>(0), result.begin());\n    std::vector<size_t> sol(values.size() + 1, 0);\n    std::partial_sum(values.begin(), values.end(), sol.begin() + 1);\n    ASSERT_EQ(sol, result);\n  }\n  {\n    std::vector<double> values{1.5, 2.5, 3.5, 4.5};\n    std::vector<double> result(values.size() + 1);\n    Context ctx;\n    PartialSum(ctx.Threads(), values.begin(), values.end(), 0.0, result.begin());\n    std::vector<double> sol(values.size() + 1, 0.0);\n    std::partial_sum(values.begin(), values.end(), sol.begin() + 1);\n    ASSERT_EQ(sol, result);\n  }\n}\n\nTEST(Numeric, Reduce) {\n  Context ctx;\n  ASSERT_TRUE(ctx.IsCPU());\n  HostDeviceVector<float> values(20);\n  auto& h_values = values.HostVector();\n  std::iota(h_values.begin(), h_values.end(), 0.0f);\n  auto sum = Reduce(&ctx, values);\n  ASSERT_EQ(sum, (values.Size() - 1) * values.Size() / 2);\n}\n\nTEST(Numeric, Iota) {\n  Context ctx;\n  auto run = [&](std::size_t n) {\n    std::vector<float> values(n);\n    float init = 1.2f;\n    Iota(&ctx, values.begin(), values.end(), init);\n    for (std::size_t i = 0; i < values.size(); ++i) {\n      ASSERT_EQ(values[i], init + i);\n    }\n  };\n  run(1234);\n  run(0);\n  run(1);\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/common/test_optional_weight.cc",
    "content": "/**\n * Copyright 2023 by XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/context.h>             // Context\n#include <xgboost/host_device_vector.h>  // HostDeviceVector\n\n#include \"../../../src/common/optional_weight.h\"\nnamespace xgboost {\nnamespace common {\nTEST(OptionalWeight, Basic) {\n  HostDeviceVector<float> weight{{2.0f, 3.0f, 4.0f}};\n  Context ctx;\n  auto opt_w = MakeOptionalWeights(ctx.Device(), weight);\n  ASSERT_EQ(opt_w[0], 2.0f);\n  ASSERT_FALSE(opt_w.Empty());\n\n  weight.HostVector().clear();\n  opt_w = MakeOptionalWeights(ctx.Device(), weight);\n  ASSERT_EQ(opt_w[0], 1.0f);\n  ASSERT_TRUE(opt_w.Empty());\n}\n}  // namespace common\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/common/test_param_array.cc",
    "content": "/**\n * Copyright 2025, XGBoost contributors\n */\n#include <gmock/gmock.h>\n#include <gtest/gtest.h>\n#include <xgboost/base.h>         // for kRtEps\n#include <xgboost/json.h>         // for Json\n#include <xgboost/parameter.h>    // for XGBoostParameter\n#include <xgboost/string_view.h>  // for StringView\n\n#include <sstream>  // for istringstream, ostringstream\n#include <string>   // for string\n\n#include \"../../../src/common/param_array.h\"\n#include \"../helpers.h\"\n\nnamespace xgboost::common {\nTEST(ParamArray, Float) {\n  ParamArray<float> values{\"values\"};\n  {\n    std::istringstream sin{\"1.1\"};\n    sin >> values;\n    ASSERT_EQ(values.size(), 1);\n    ASSERT_NEAR(values[0], 1.1, kRtEps);\n    std::ostringstream sout;\n    sout << values;\n    auto jarr = Json::Load(StringView{sout.str()});\n    for (std::size_t i = 0; i < values.size(); ++i) {\n      ASSERT_EQ(get<Number const>(jarr[i]), values[i]);\n    }\n  }\n  {\n    std::string str = \"[1.1, 1.3]\";\n    std::istringstream sin{str};\n    sin >> values;\n    ASSERT_EQ(values.size(), 2);\n    ASSERT_NEAR(values[0], 1.1, kRtEps);\n    ASSERT_NEAR(values[1], 1.3, kRtEps);\n    std::ostringstream sout;\n    sout << values;\n    auto jarr = Json::Load(StringView{sout.str()});\n    for (std::size_t i = 0; i < values.size(); ++i) {\n      ASSERT_EQ(get<Number const>(jarr[i]), values[i]);\n    }\n  }\n  {\n    ParamArray<float> values{\"values\"};\n    std::istringstream sin{\"[\\\"foo\\\"]\"};\n    ASSERT_THAT(\n        [&] { sin >> values; },\n        GMockThrow(\n            R\"(Invalid type for: `values`, expecting one of the: {`Number`, `Integer`}, got: `String`)\"));\n  }\n}\n\nnamespace {\nstruct TestParamArray : public XGBoostParameter<TestParamArray> {\n  ParamArray<float> test_key{\"test_key\", 0.2f};\n  DMLC_DECLARE_PARAMETER(TestParamArray) {\n    DMLC_DECLARE_FIELD(test_key).describe(\"test\").set_default(ParamArray<float>{\"test_key\", 0.2f});\n  }\n};\n\nDMLC_REGISTER_PARAMETER(TestParamArray);\n}  // namespace\n\nTEST(ParamArray, Update) {\n  TestParamArray param;\n  param.UpdateAllowUnknown(Args{{}});\n  ASSERT_EQ(param.test_key.size(), 1);\n  ASSERT_EQ(param.test_key.Name(), \"test_key\");\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/common/test_parameter.cc",
    "content": "/**\n * Copyright 2019-2025, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/base.h>\n#include <xgboost/parameter.h>\n\n#include \"xgboost/json.h\"  // for ToJson, FromJson\n\nenum class Foo : int {\n  kBar = 0, kFrog = 1, kCat = 2, kDog = 3\n};\n\nDECLARE_FIELD_ENUM_CLASS(Foo);\n\nstruct MyEnumParam : xgboost::XGBoostParameter<MyEnumParam> {\n  Foo foo;\n  int bar;\n  DMLC_DECLARE_PARAMETER(MyEnumParam) {\n    DMLC_DECLARE_FIELD(foo)\n      .set_default(Foo::kBar)\n      .add_enum(\"bar\", Foo::kBar)\n      .add_enum(\"frog\", Foo::kFrog)\n      .add_enum(\"cat\", Foo::kCat)\n      .add_enum(\"dog\", Foo::kDog);\n    DMLC_DECLARE_FIELD(bar)\n      .set_default(-1);\n  }\n};\n\nDMLC_REGISTER_PARAMETER(MyEnumParam);\n\nTEST(EnumClassParam, Basic) {\n  MyEnumParam param;\n  std::map<std::string, std::string> kwargs{\n    {\"foo\", \"frog\"}, {\"bar\", \"10\"}\n  };\n  // try initializing\n  param.Init(kwargs); // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult)\n  ASSERT_EQ(param.foo, Foo::kFrog);\n  ASSERT_EQ(param.bar, 10);\n\n  // try all possible enum values\n  kwargs[\"foo\"] = \"bar\";\n  param.Init(kwargs);\n  ASSERT_EQ(param.foo, Foo::kBar);\n  kwargs[\"foo\"] = \"frog\";\n  param.Init(kwargs);\n  ASSERT_EQ(param.foo, Foo::kFrog);\n  kwargs[\"foo\"] = \"cat\";\n  param.Init(kwargs);\n  ASSERT_EQ(param.foo, Foo::kCat);\n  kwargs[\"foo\"] = \"dog\";\n  param.Init(kwargs);\n  ASSERT_EQ(param.foo, Foo::kDog);\n\n  // try setting non-existent enum value\n  kwargs[\"foo\"] = \"human\";\n  ASSERT_THROW(param.Init(kwargs), dmlc::ParamError);\n}\n\nstruct UpdatableParam : xgboost::XGBoostParameter<UpdatableParam> {\n  float f { 0.0f };\n  double d { 0.0 };\n\n  DMLC_DECLARE_PARAMETER(UpdatableParam) {\n    DMLC_DECLARE_FIELD(f)\n        .set_default(11.0f);\n    DMLC_DECLARE_FIELD(d)\n        .set_default(2.71828f);\n  }\n};\n\nDMLC_REGISTER_PARAMETER(UpdatableParam);\n\nTEST(XGBoostParameter, Update) {\n  {\n    UpdatableParam p;\n    auto constexpr kRtEps = xgboost::kRtEps;\n\n    p.UpdateAllowUnknown(xgboost::Args{});\n    // When it's not initialized, perform set_default.\n    ASSERT_NEAR(p.f, 11.0f, kRtEps);\n    ASSERT_NEAR(p.d, 2.71828f, kRtEps);\n\n    p.d = 3.14149;\n\n    p.UpdateAllowUnknown(xgboost::Args{{\"f\", \"2.71828\"}});\n    ASSERT_NEAR(p.f, 2.71828f, kRtEps);\n\n    // p.d is un-effected by the update.\n    ASSERT_NEAR(p.d, 3.14149, kRtEps);\n  }\n  {\n    UpdatableParam p;\n    auto constexpr kRtEps = xgboost::kRtEps;\n    p.UpdateAllowUnknown(xgboost::Args{{\"f\", \"2.71828\"}});\n    ASSERT_NEAR(p.f, 2.71828f, kRtEps);\n    ASSERT_NEAR(p.d, 2.71828, kRtEps);  // default\n  }\n\n  // Just in case dmlc's use of global memory has any impact in parameters.\n  UpdatableParam a, b;\n  a.UpdateAllowUnknown(xgboost::Args{{\"f\", \"2.71828\"}});\n  ASSERT_NE(a.f, b.f);\n}\nnamespace xgboost {\nTEST(XGBoostParameter, Json) {\n  UpdatableParam a, b;\n  a.UpdateAllowUnknown(Args{{\"f\", \"1024\"}, {\"d\", \"2048\"}});\n  auto ja = Json{ToJson(a)};\n\n  UpdatableParam c;\n  FromJson(ja, &c);\n  ASSERT_FLOAT_EQ(a.f, 1024);\n  ASSERT_FLOAT_EQ(c.f, 1024);\n  ASSERT_FLOAT_EQ(b.f, 0);  // Make sure dmlc global variable is not used here.\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/common/test_partition_builder.cc",
    "content": "/**\n * Copyright 2020-2024, XGBoost contributors\n */\n#include <gtest/gtest.h>\n\n#include <string>\n#include <utility>\n#include <vector>\n\n#include \"../../../src/common/partition_builder.h\"\n#include \"../../../src/common/row_set.h\"\n#include \"../helpers.h\"\n\nnamespace xgboost::common {\nTEST(PartitionBuilder, BasicTest) {\n  constexpr size_t kBlockSize = 16;\n  constexpr size_t kNodes = 5;\n  constexpr size_t kTasks = 3 + 5 + 10 + 1 + 2;\n\n  std::vector<size_t> tasks = { 3, 5, 10, 1, 2 };\n\n  PartitionBuilder<kBlockSize> builder;\n  builder.Init(kTasks, kNodes, [&](size_t i) {\n    return tasks[i];\n  });\n\n  std::vector<size_t> rows_for_left_node = { 2, 12, 0, 16, 8 };\n\n  for(size_t nid = 0; nid < kNodes; ++nid) {\n    size_t value_left = 0;\n    size_t value_right = 0;\n\n    size_t left_total = tasks[nid] * rows_for_left_node[nid];\n\n    for(size_t j = 0; j < tasks[nid]; ++j) {\n      size_t begin = kBlockSize*j;\n      size_t end = kBlockSize*(j+1);\n      const size_t id = builder.GetTaskIdx(nid, begin);\n      builder.AllocateForTask(id);\n\n      auto left  = builder.GetLeftBuffer(nid, begin, end);\n      auto right = builder.GetRightBuffer(nid, begin, end);\n\n      size_t n_left   = rows_for_left_node[nid];\n      size_t n_right = kBlockSize - rows_for_left_node[nid];\n\n      for(size_t i = 0; i < n_left; i++) {\n        left[i] = value_left++;\n      }\n\n      for(size_t i = 0; i < n_right; i++) {\n        right[i] = left_total + value_right++;\n      }\n\n      builder.SetNLeftElems(nid, begin, n_left);\n      builder.SetNRightElems(nid, begin, n_right);\n    }\n  }\n  builder.CalculateRowOffsets();\n\n  std::vector<bst_idx_t> v(*std::max_element(tasks.begin(), tasks.end()) * kBlockSize);\n\n  for(size_t nid = 0; nid < kNodes; ++nid) {\n\n    for(size_t j = 0; j < tasks[nid]; ++j) {\n      builder.MergeToArray(nid, kBlockSize*j, v.data());\n    }\n\n    for(size_t j = 0; j < tasks[nid] * kBlockSize; ++j) {\n      ASSERT_EQ(v[j], j);\n    }\n    size_t n_left  = builder.GetNLeftElems(nid);\n    size_t n_right = builder.GetNRightElems(nid);\n\n    ASSERT_EQ(n_left, rows_for_left_node[nid] * tasks[nid]);\n    ASSERT_EQ(n_right, (kBlockSize - rows_for_left_node[nid]) * tasks[nid]);\n  }\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/common/test_probability_distribution.cc",
    "content": "/*!\n * Copyright (c) by Contributors 2020\n */\n#include <gtest/gtest.h>\n#include <memory>\n#include <cmath>\n\n#include \"xgboost/logging.h\"\n#include \"../../../src/common/probability_distribution.h\"\n\nnamespace xgboost {\nnamespace common {\n\ntemplate <typename Distribution>\nvoid RunDistributionGenericTest() {\n  double integral_of_pdf = Distribution::CDF(-2.0);\n  double integral_of_grad_pdf = Distribution::PDF(-2.0);\n  double integral_of_hess_pdf = Distribution::GradPDF(-2.0);\n  // Perform numerical differentiation and integration\n  // Enumerate 4000 grid points in range [-2, 2]\n  for (int i = 0; i <= 4000; ++i) {\n    const double x = static_cast<double>(i) / 1000.0 - 2.0;\n    // Numerical differentiation (p. 246, Numerical Analysis 2nd ed. by Timothy Sauer)\n    EXPECT_NEAR((Distribution::CDF(x + 1e-5) - Distribution::CDF(x - 1e-5)) / 2e-5,\n                Distribution::PDF(x), 6e-11);\n    EXPECT_NEAR((Distribution::PDF(x + 1e-5) - Distribution::PDF(x - 1e-5)) / 2e-5,\n                Distribution::GradPDF(x), 6e-11);\n    EXPECT_NEAR((Distribution::GradPDF(x + 1e-5) - Distribution::GradPDF(x - 1e-5)) / 2e-5,\n                Distribution::HessPDF(x), 6e-11);\n    // Numerical integration using Trapezoid Rule (p. 257, Sauer)\n    integral_of_pdf += 5e-4 * (Distribution::PDF(x - 1e-3) + Distribution::PDF(x));\n    integral_of_grad_pdf += 5e-4 * (Distribution::GradPDF(x - 1e-3) + Distribution::GradPDF(x));\n    integral_of_hess_pdf += 5e-4 * (Distribution::HessPDF(x - 1e-3) + Distribution::HessPDF(x));\n    EXPECT_NEAR(integral_of_pdf, Distribution::CDF(x), 2e-4);\n    EXPECT_NEAR(integral_of_grad_pdf, Distribution::PDF(x), 2e-4);\n    EXPECT_NEAR(integral_of_hess_pdf, Distribution::GradPDF(x), 2e-4);\n  }\n}\n\nTEST(ProbabilityDistribution, DistributionGeneric) {\n  // Assert d/dx CDF = PDF, d/dx PDF = GradPDF, d/dx GradPDF = HessPDF\n  // Do this for every distribution type\n  RunDistributionGenericTest<NormalDistribution>();\n  RunDistributionGenericTest<LogisticDistribution>();\n  RunDistributionGenericTest<ExtremeDistribution>();\n}\n\nTEST(ProbabilityDistribution, NormalDist) {\n  // \"Three-sigma rule\" (https://en.wikipedia.org/wiki/68–95–99.7_rule)\n  //   68% of values are within 1 standard deviation away from the mean\n  //   95% of values are within 2 standard deviation away from the mean\n  // 99.7% of values are within 3 standard deviation away from the mean\n  EXPECT_NEAR(NormalDistribution::CDF(0.5) - NormalDistribution::CDF(-0.5), 0.3829, 0.00005);\n  EXPECT_NEAR(NormalDistribution::CDF(1.0) - NormalDistribution::CDF(-1.0), 0.6827, 0.00005);\n  EXPECT_NEAR(NormalDistribution::CDF(1.5) - NormalDistribution::CDF(-1.5), 0.8664, 0.00005);\n  EXPECT_NEAR(NormalDistribution::CDF(2.0) - NormalDistribution::CDF(-2.0), 0.9545, 0.00005);\n  EXPECT_NEAR(NormalDistribution::CDF(2.5) - NormalDistribution::CDF(-2.5), 0.9876, 0.00005);\n  EXPECT_NEAR(NormalDistribution::CDF(3.0) - NormalDistribution::CDF(-3.0), 0.9973, 0.00005);\n  EXPECT_NEAR(NormalDistribution::CDF(3.5) - NormalDistribution::CDF(-3.5), 0.9995, 0.00005);\n  EXPECT_NEAR(NormalDistribution::CDF(4.0) - NormalDistribution::CDF(-4.0), 0.9999, 0.00005);\n}\n\nTEST(ProbabilityDistribution, LogisticDist) {\n  /**\n   * Enforce known properties of the logistic distribution.\n   * (https://en.wikipedia.org/wiki/Logistic_distribution)\n   **/\n\n  // Enumerate 4000 grid points in range [-2, 2]\n  for (int i = 0; i <= 4000; ++i) {\n    const double x = static_cast<double>(i) / 1000.0 - 2.0;\n    // PDF = 1/4 * sech(x/2)**2\n    const double sech_x = 1.0 / std::cosh(x * 0.5);  // hyperbolic secant at x/2\n    EXPECT_NEAR(0.25 * sech_x * sech_x, LogisticDistribution::PDF(x), 1e-15);\n    // CDF = 1/2 + 1/2 * tanh(x/2)\n    EXPECT_NEAR(0.5 + 0.5 * std::tanh(x * 0.5), LogisticDistribution::CDF(x), 1e-15);\n  }\n}\n\nTEST(ProbabilityDistribution, ExtremeDist) {\n  /**\n   * Enforce known properties of the extreme distribution (also known as Gumbel distribution).\n   * The mean is the negative of the Euler-Mascheroni constant.\n   * The variance is 1/6 * pi**2. (https://mathworld.wolfram.com/GumbelDistribution.html)\n   **/\n\n  // Enumerate 25000 grid points in range [-20, 5].\n  // Compute the mean (expected value) of the distribution using numerical integration.\n  // Nearly all mass of the extreme distribution is concentrated between -20 and 5,\n  // so numerically integrating x*PDF(x) over [-20, 5] gives good estimate of the mean.\n  double mean = 0.0;\n  for (int i = 0; i <= 25000; ++i) {\n    const double x = static_cast<double>(i) / 1000.0 - 20.0;\n    // Numerical integration using Trapezoid Rule (p. 257, Sauer)\n    mean +=\n      5e-4 * ((x - 1e-3) * ExtremeDistribution::PDF(x - 1e-3) + x * ExtremeDistribution::PDF(x));\n  }\n  EXPECT_NEAR(mean, -kEulerMascheroni, 1e-7);\n\n  // Enumerate 25000 grid points in range [-20, 5].\n  // Compute the variance of the distribution using numerical integration.\n  // Nearly all mass of the extreme distribution is concentrated between -20 and 5,\n  // so numerically integrating (x-mean)*PDF(x) over [-20, 5] gives good estimate of the variance.\n  double variance = 0.0;\n  for (int i = 0; i <= 25000; ++i) {\n    const double x = static_cast<double>(i) / 1000.0 - 20.0;\n    // Numerical integration using Trapezoid Rule (p. 257, Sauer)\n    variance += 5e-4 * ((x - 1e-3 - mean) * (x - 1e-3 - mean) * ExtremeDistribution::PDF(x - 1e-3)\n                        + (x - mean) * (x - mean) * ExtremeDistribution::PDF(x));\n  }\n  EXPECT_NEAR(variance, kPI * kPI / 6.0, 1e-6);\n}\n\n} // namespace common\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/common/test_quantile.cc",
    "content": "/**\n * Copyright 2020-2024, XGBoost Contributors\n */\n#include \"test_quantile.h\"\n\n#include <gtest/gtest.h>\n\n#include <cstdint>  // for int64_t\n\n#include \"../../../src/collective/allreduce.h\"\n#include \"../../../src/common/hist_util.h\"\n#include \"../../../src/data/adapter.h\"\n#include \"../collective/test_worker.h\"  // for TestDistributedGlobal\n#include \"xgboost/context.h\"\n\nnamespace xgboost::common {\nTEST(Quantile, LoadBalance) {\n  size_t constexpr kRows = 1000, kCols = 100;\n  auto m = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();\n  std::vector<bst_feature_t> cols_ptr;\n  Context ctx;\n  for (auto const& page : m->GetBatches<SparsePage>(&ctx)) {\n    data::SparsePageAdapterBatch adapter{page.GetView()};\n    cols_ptr = LoadBalance(adapter, page.data.Size(), kCols, 13, [](auto) { return true; });\n  }\n  size_t n_cols = 0;\n  for (size_t i = 1; i < cols_ptr.size(); ++i) {\n    n_cols += cols_ptr[i] - cols_ptr[i - 1];\n  }\n  CHECK_EQ(n_cols, kCols);\n}\n\nTEST(Quantile, InitWithEmptyColumn) {\n  WQuantileSketch sketch{0, 0.1};\n\n  auto out = sketch.GetSummary(1);\n  ASSERT_EQ(out.Size(), 0);\n}\n\nTEST(Quantile, SetPruneInplace) {\n  using Summary = WQSummary<>;\n  using Entry = Summary::Entry;\n\n  SimpleLCG lcg;\n  for (size_t trial = 0; trial < 256; ++trial) {\n    size_t n = (lcg() % 256) + 1;\n    size_t max_size = (lcg() % n) + 1;\n\n    std::vector<Entry> src_storage(n);\n    float running_rank = 0.0f;\n    for (size_t i = 0; i < n; ++i) {\n      float w = static_cast<float>((lcg() % 7) + 1);\n      float value = static_cast<float>(i);\n      src_storage[i] = Entry{running_rank, running_rank + w, w, value};\n      running_rank += w;\n    }\n\n    std::vector<Entry> ref_storage(n);\n    Summary src_ref{Span<Entry>{src_storage.data(), src_storage.size()}, n};\n    Summary out_ref{Span<Entry>{ref_storage.data(), ref_storage.size()}, 0};\n    out_ref.CopyFrom(src_ref);\n    out_ref.SetPrune(max_size);\n\n    Summary in_place{Span<Entry>{src_storage.data(), src_storage.size()}, n};\n    in_place.SetPrune(max_size);\n\n    ASSERT_EQ(in_place.Size(), out_ref.Size()) << \"trial=\" << trial;\n    auto const in_entries = in_place.Entries();\n    auto const ref_entries = out_ref.Entries();\n    for (size_t i = 0; i < in_place.Size(); ++i) {\n      EXPECT_FLOAT_EQ(in_entries[i].rmin, ref_entries[i].rmin) << \"trial=\" << trial;\n      EXPECT_FLOAT_EQ(in_entries[i].rmax, ref_entries[i].rmax) << \"trial=\" << trial;\n      EXPECT_FLOAT_EQ(in_entries[i].wmin, ref_entries[i].wmin) << \"trial=\" << trial;\n      EXPECT_FLOAT_EQ(in_entries[i].value, ref_entries[i].value) << \"trial=\" << trial;\n    }\n  }\n}\n\nnamespace {\ntemplate <bool use_column>\nvoid PushPage(HostSketchContainer* container, SparsePage const& page, MetaInfo const& info,\n              Span<float const> hessian) {\n  if constexpr (use_column) {\n    container->PushColPage(page, info, hessian);\n  } else {\n    container->PushRowPage(page, info, hessian);\n  }\n}\n\ntemplate <bool use_column>\nvoid DoTestDistributedQuantile(size_t rows, size_t cols) {\n  Context ctx;\n  auto const world = collective::GetWorldSize();\n  std::vector<MetaInfo> infos(2);\n  auto& h_weights = infos.front().weights_.HostVector();\n  h_weights.resize(rows);\n  SimpleLCG lcg;\n  SimpleRealUniformDistribution<float> dist(3, 1000);\n  std::generate(h_weights.begin(), h_weights.end(), [&]() { return dist(&lcg); });\n  std::vector<bst_idx_t> column_size(cols, rows);\n  bst_bin_t n_bins = 64;\n\n  // Generate cuts for distributed environment.\n  auto sparsity = 0.5f;\n  auto rank = collective::GetRank();\n  std::vector<FeatureType> ft(cols);\n  for (size_t i = 0; i < ft.size(); ++i) {\n    ft[i] = (i % 2 == 0) ? FeatureType::kNumerical : FeatureType::kCategorical;\n  }\n\n  auto m = RandomDataGenerator{rows, cols, sparsity}\n               .Seed(rank)\n               .Lower(.0f)\n               .Upper(1.0f)\n               .Type(ft)\n               .MaxCategory(13)\n               .GenerateDMatrix();\n\n  std::vector<float> hessian(rows, 1.0);\n  auto hess = Span<float const>{hessian};\n\n  HostSketchContainer sketch_distributed(&ctx, n_bins, m->Info().feature_types.ConstHostSpan(),\n                                         column_size, false);\n\n  if (use_column) {\n    for (auto const& page : m->GetBatches<SortedCSCPage>(&ctx)) {\n      PushPage<use_column>(&sketch_distributed, page, m->Info(), hess);\n    }\n  } else {\n    for (auto const& page : m->GetBatches<SparsePage>(&ctx)) {\n      PushPage<use_column>(&sketch_distributed, page, m->Info(), hess);\n    }\n  }\n\n  auto distributed_cuts = sketch_distributed.MakeCuts(&ctx, m->Info());\n\n  // Generate cuts for single node environment\n  collective::Finalize();\n\n  CHECK_EQ(collective::GetWorldSize(), 1);\n  std::for_each(column_size.begin(), column_size.end(), [=](auto& size) { size *= world; });\n  m->Info().num_row_ = world * rows;\n  HostSketchContainer sketch_on_single_node(&ctx, n_bins, m->Info().feature_types.ConstHostSpan(),\n                                            column_size, false);\n  m->Info().num_row_ = rows;\n\n  for (auto rank = 0; rank < world; ++rank) {\n    auto m = RandomDataGenerator{rows, cols, sparsity}\n                 .Seed(rank)\n                 .Type(ft)\n                 .MaxCategory(13)\n                 .Lower(.0f)\n                 .Upper(1.0f)\n                 .GenerateDMatrix();\n    if (use_column) {\n      for (auto const& page : m->GetBatches<SortedCSCPage>(&ctx)) {\n        PushPage<use_column>(&sketch_on_single_node, page, m->Info(), hess);\n      }\n    } else {\n      for (auto const& page : m->GetBatches<SparsePage>()) {\n        PushPage<use_column>(&sketch_on_single_node, page, m->Info(), hess);\n      }\n    }\n  }\n\n  auto single_node_cuts = sketch_on_single_node.MakeCuts(&ctx, m->Info());\n\n  auto const& sptrs = single_node_cuts.Ptrs();\n  auto const& dptrs = distributed_cuts.Ptrs();\n  auto const& svals = single_node_cuts.Values();\n  auto const& dvals = distributed_cuts.Values();\n\n  ASSERT_EQ(sptrs.size(), dptrs.size());\n  for (size_t i = 0; i < sptrs.size(); ++i) {\n    ASSERT_EQ(sptrs[i], dptrs[i]) << i;\n  }\n\n  ASSERT_EQ(svals.size(), dvals.size());\n  for (size_t i = 0; i < svals.size(); ++i) {\n    ASSERT_NEAR(svals[i], dvals[i], 2e-2f);\n  }\n}\n\ntemplate <bool use_column>\nvoid TestDistributedQuantile(size_t const rows, size_t const cols) {\n  auto constexpr kWorkers = 4;\n  collective::TestDistributedGlobal(\n      kWorkers, [=] { DoTestDistributedQuantile<use_column>(rows, cols); }, false);\n}\n}  // anonymous namespace\n\nTEST(Quantile, DistributedBasic) {\n  constexpr size_t kRows = 10, kCols = 10;\n  TestDistributedQuantile<false>(kRows, kCols);\n}\n\nTEST(Quantile, Distributed) {\n  constexpr size_t kRows = 4000, kCols = 200;\n  TestDistributedQuantile<false>(kRows, kCols);\n}\n\nTEST(Quantile, SortedDistributedBasic) {\n  constexpr size_t kRows = 10, kCols = 10;\n  TestDistributedQuantile<true>(kRows, kCols);\n}\n\nTEST(Quantile, SortedDistributed) {\n  constexpr size_t kRows = 4000, kCols = 200;\n  TestDistributedQuantile<true>(kRows, kCols);\n}\n\nnamespace {\ntemplate <bool use_column>\nvoid DoTestColSplitQuantile(size_t rows, size_t cols) {\n  Context ctx;\n  auto const world = collective::GetWorldSize();\n  auto const rank = collective::GetRank();\n\n  auto m = std::unique_ptr<DMatrix>{[=]() {\n    auto sparsity = 0.5f;\n    std::vector<FeatureType> ft(cols);\n    for (size_t i = 0; i < ft.size(); ++i) {\n      ft[i] = (i % 2 == 0) ? FeatureType::kNumerical : FeatureType::kCategorical;\n    }\n    auto dmat = RandomDataGenerator{rows, cols, sparsity}\n                    .Seed(0)\n                    .Lower(.0f)\n                    .Upper(1.0f)\n                    .Type(ft)\n                    .MaxCategory(13)\n                    .GenerateDMatrix();\n    return dmat->SliceCol(world, rank);\n  }()};\n\n  std::vector<bst_idx_t> column_size(cols, 0);\n  auto const slice_size = cols / world;\n  auto const slice_start = slice_size * rank;\n  auto const slice_end = (rank == world - 1) ? cols : slice_start + slice_size;\n  for (auto i = slice_start; i < slice_end; i++) {\n    column_size[i] = rows;\n  }\n\n  auto const n_bins = 64;\n\n  // Generate cuts for distributed environment.\n  HistogramCuts distributed_cuts{0};\n  {\n    HostSketchContainer sketch_distributed(&ctx, n_bins, m->Info().feature_types.ConstHostSpan(),\n                                           column_size, false);\n\n    std::vector<float> hessian(rows, 1.0);\n    auto hess = Span<float const>{hessian};\n    if (use_column) {\n      for (auto const& page : m->GetBatches<SortedCSCPage>(&ctx)) {\n        PushPage<use_column>(&sketch_distributed, page, m->Info(), hess);\n      }\n    } else {\n      for (auto const& page : m->GetBatches<SparsePage>(&ctx)) {\n        PushPage<use_column>(&sketch_distributed, page, m->Info(), hess);\n      }\n    }\n\n    distributed_cuts = sketch_distributed.MakeCuts(&ctx, m->Info());\n  }\n\n  // Generate cuts for single node environment\n  collective::Finalize();\n  CHECK_EQ(collective::GetWorldSize(), 1);\n  HistogramCuts single_node_cuts{0};\n  {\n    HostSketchContainer sketch_on_single_node(&ctx, n_bins, m->Info().feature_types.ConstHostSpan(),\n                                              column_size, false);\n\n    std::vector<float> hessian(rows, 1.0);\n    auto hess = Span<float const>{hessian};\n    if (use_column) {\n      for (auto const& page : m->GetBatches<SortedCSCPage>(&ctx)) {\n        PushPage<use_column>(&sketch_on_single_node, page, m->Info(), hess);\n      }\n    } else {\n      for (auto const& page : m->GetBatches<SparsePage>(&ctx)) {\n        PushPage<use_column>(&sketch_on_single_node, page, m->Info(), hess);\n      }\n    }\n\n    single_node_cuts = sketch_on_single_node.MakeCuts(&ctx, m->Info());\n  }\n\n  auto const& sptrs = single_node_cuts.Ptrs();\n  auto const& dptrs = distributed_cuts.Ptrs();\n  auto const& svals = single_node_cuts.Values();\n  auto const& dvals = distributed_cuts.Values();\n\n  EXPECT_EQ(sptrs.size(), dptrs.size());\n  for (size_t i = 0; i < sptrs.size(); ++i) {\n    EXPECT_EQ(sptrs[i], dptrs[i]) << \"rank: \" << rank << \", i: \" << i;\n  }\n\n  EXPECT_EQ(svals.size(), dvals.size());\n  for (size_t i = 0; i < svals.size(); ++i) {\n    EXPECT_NEAR(svals[i], dvals[i], 2e-2f) << \"rank: \" << rank << \", i: \" << i;\n  }\n}\n\ntemplate <bool use_column>\nvoid TestColSplitQuantile(size_t rows, size_t cols) {\n  auto constexpr kWorkers = 4;\n  collective::TestDistributedGlobal(kWorkers,\n                                    [=] { DoTestColSplitQuantile<use_column>(rows, cols); });\n}\n}  // anonymous namespace\n\nTEST(Quantile, ColumnSplitBasic) {\n  constexpr size_t kRows = 10, kCols = 10;\n  TestColSplitQuantile<false>(kRows, kCols);\n}\n\nTEST(Quantile, ColumnSplit) {\n  constexpr size_t kRows = 4000, kCols = 200;\n  TestColSplitQuantile<false>(kRows, kCols);\n}\n\nTEST(Quantile, ColumnSplitSortedBasic) {\n  constexpr size_t kRows = 10, kCols = 10;\n  TestColSplitQuantile<true>(kRows, kCols);\n}\n\nTEST(Quantile, ColumnSplitSorted) {\n  constexpr size_t kRows = 4000, kCols = 200;\n  TestColSplitQuantile<true>(kRows, kCols);\n}\n\nnamespace {\nvoid TestSameOnAllWorkers() {\n  auto const world = collective::GetWorldSize();\n  constexpr size_t kRows = 1000, kCols = 100;\n  Context ctx;\n\n  RunWithSeedsAndBins(kRows, [=, &ctx](int32_t seed, size_t n_bins, MetaInfo const&) {\n    auto rank = collective::GetRank();\n    HostDeviceVector<float> storage;\n    std::vector<FeatureType> ft(kCols);\n    for (size_t i = 0; i < ft.size(); ++i) {\n      ft[i] = (i % 2 == 0) ? FeatureType::kNumerical : FeatureType::kCategorical;\n    }\n\n    auto m = RandomDataGenerator{kRows, kCols, 0}\n                 .Device(DeviceOrd::CPU())\n                 .Type(ft)\n                 .MaxCategory(17)\n                 .Seed(rank + seed)\n                 .GenerateDMatrix();\n    auto cuts = SketchOnDMatrix(&ctx, m.get(), n_bins);\n    std::vector<float> cut_values(cuts.Values().size() * world, 0);\n    std::vector<typename std::remove_reference_t<decltype(cuts.Ptrs())>::value_type> cut_ptrs(\n        cuts.Ptrs().size() * world, 0);\n\n    std::int64_t value_size = cuts.Values().size();\n    std::int64_t ptr_size = cuts.Ptrs().size();\n\n    auto rc = collective::Success() << [&] {\n      return collective::Allreduce(&ctx, &value_size, collective::Op::kMax);\n    } << [&] {\n      return collective::Allreduce(&ctx, &ptr_size, collective::Op::kMax);\n    };\n    collective::SafeColl(rc);\n    ASSERT_EQ(ptr_size, kCols + 1);\n\n    std::size_t value_offset = value_size * rank;\n    std::copy(cuts.Values().begin(), cuts.Values().end(), cut_values.begin() + value_offset);\n    std::size_t ptr_offset = ptr_size * rank;\n    std::copy(cuts.Ptrs().cbegin(), cuts.Ptrs().cend(), cut_ptrs.begin() + ptr_offset);\n\n    rc = std::move(rc) << [&] {\n      return collective::Allreduce(&ctx, linalg::MakeVec(cut_values.data(), cut_values.size()),\n                                   collective::Op::kSum);\n    } << [&] {\n      return collective::Allreduce(&ctx, linalg::MakeVec(cut_ptrs.data(), cut_ptrs.size()),\n                                   collective::Op::kSum);\n    };\n    collective::SafeColl(rc);\n\n    for (std::int32_t i = 0; i < world; i++) {\n      for (std::int64_t j = 0; j < value_size; ++j) {\n        size_t idx = i * value_size + j;\n        ASSERT_NEAR(cuts.Values().at(j), cut_values.at(idx), kRtEps);\n      }\n\n      for (std::int64_t j = 0; j < ptr_size; ++j) {\n        size_t idx = i * ptr_size + j;\n        EXPECT_EQ(cuts.Ptrs().at(j), cut_ptrs.at(idx));\n      }\n    }\n  });\n}\n}  // anonymous namespace\n\nTEST(Quantile, SameOnAllWorkers) {\n  auto constexpr kWorkers = 4;\n  collective::TestDistributedGlobal(kWorkers, [] { TestSameOnAllWorkers(); });\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/common/test_quantile.cu",
    "content": "/**\n * Copyright 2020-2026, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <thrust/iterator/zip_iterator.h>  // for make_zip_iterator\n\n#include <cuda/std/tuple>  // for make_tuple, tuple\n\n#include \"../../../src/collective/allreduce.h\"\n#include \"../../../src/common/hist_util.cuh\"\n#include \"../../../src/common/quantile.cuh\"\n#include \"../../../src/data/device_adapter.cuh\"  // CupyAdapter\n#include \"../collective/test_worker.h\"           // for BaseMGPUTest\n#include \"../helpers.h\"\n#include \"test_quantile.h\"\n\nnamespace xgboost {\nnamespace {\nstruct IsSorted {\n  XGBOOST_DEVICE bool operator()(common::SketchEntry const& a, common::SketchEntry const& b) const {\n    return a.value < b.value;\n  }\n};\n\nstruct RepeatedValueOp {\n  std::size_t cols;\n\n  XGBOOST_DEVICE float operator()(cuda::std::tuple<size_t, float> const& tuple) const {\n    auto i = cuda::std::get<0>(tuple);\n    auto ridx = i / cols;\n    return static_cast<float>((ridx / 8) % 4);\n  }\n};\n}  // namespace\n\nnamespace common {\nclass MGPUQuantileTest : public collective::BaseMGPUTest {};\n\nTEST(GPUQuantile, Basic) {\n  auto ctx = MakeCUDACtx(0);\n  constexpr size_t kCols = 100, kBins = 256;\n  HostDeviceVector<FeatureType> ft;\n  SketchContainer sketch(ft, kBins, kCols, ctx.Device());\n  dh::caching_device_vector<Entry> entries;\n  dh::device_vector<bst_idx_t> cuts_ptr(kCols + 1);\n  thrust::fill(cuts_ptr.begin(), cuts_ptr.end(), 0);\n  // Push empty\n  sketch.Push(&ctx, dh::ToSpan(entries), dh::ToSpan(cuts_ptr), dh::ToSpan(cuts_ptr), 0);\n  ASSERT_EQ(sketch.Data().size(), 0);\n}\n\n// if with_error is true, the test tolerates floating point error\nvoid TestQuantileElemRank(DeviceOrd device, Span<SketchEntry const> in,\n                          Span<bst_idx_t const> d_columns_ptr, bool with_error = false) {\n  dh::safe_cuda(cudaSetDevice(device.ordinal));\n  std::vector<SketchEntry> h_in(in.size());\n  dh::CopyDeviceSpanToVector(&h_in, in);\n  std::vector<bst_idx_t> h_columns_ptr(d_columns_ptr.size());\n  dh::CopyDeviceSpanToVector(&h_columns_ptr, d_columns_ptr);\n\n  for (size_t i = 1; i < d_columns_ptr.size(); ++i) {\n    auto column_id = i - 1;\n    auto beg = h_columns_ptr[column_id];\n    auto end = h_columns_ptr[i];\n\n    auto in_column = Span<SketchEntry>{h_in}.subspan(beg, end - beg);\n    for (size_t idx = 1; idx < in_column.size(); ++idx) {\n      float prev_rmin = in_column[idx - 1].rmin;\n      float prev_rmax = in_column[idx - 1].rmax;\n      float rmin_next = in_column[idx].RMinNext();\n      if (with_error) {\n        ASSERT_GE(in_column[idx].rmin + in_column[idx].rmin * kRtEps, prev_rmin);\n        ASSERT_GE(in_column[idx].rmax + in_column[idx].rmin * kRtEps, prev_rmax);\n        ASSERT_GE(in_column[idx].rmax + in_column[idx].rmin * kRtEps, rmin_next);\n      } else {\n        ASSERT_GE(in_column[idx].rmin, prev_rmin);\n        ASSERT_GE(in_column[idx].rmax, prev_rmax);\n        ASSERT_GE(in_column[idx].rmax, rmin_next);\n      }\n    }\n  }\n}\n\nTEST(GPUQuantile, Prune) {\n  constexpr size_t kRows = 1000, kCols = 100;\n  RunWithSeedsAndBins(kRows, [=](std::int32_t seed, bst_bin_t n_bins, MetaInfo const& info) {\n    auto ctx = MakeCUDACtx(0);\n    HostDeviceVector<FeatureType> ft;\n    SketchContainer sketch(ft, n_bins, kCols, ctx.Device());\n\n    HostDeviceVector<float> storage;\n    std::string interface_str = RandomDataGenerator{kRows, kCols, 0}\n                                    .Device(ctx.Device())\n                                    .Seed(seed)\n                                    .GenerateArrayInterface(&storage);\n    data::CupyAdapter adapter(interface_str);\n    AdapterDeviceSketch(&ctx, adapter.Value(), n_bins, info,\n                        std::numeric_limits<float>::quiet_NaN(), &sketch);\n    auto n_cuts = detail::RequiredSampleCutsPerColumn(n_bins, kRows);\n    // LE because kRows * kCols is pushed into sketch, after removing\n    // duplicated entries we might not have that much inputs for prune.\n    ASSERT_LE(sketch.Data().size(), n_cuts * kCols);\n\n    sketch.Prune(&ctx, n_bins);\n    ASSERT_LE(sketch.Data().size(), kRows * kCols);\n    std::vector<bst_idx_t> h_columns_ptr(sketch.ColumnsPtr().size());\n    dh::CopyDeviceSpanToVector(&h_columns_ptr, sketch.ColumnsPtr());\n    std::vector<SketchEntry> h_data(sketch.Data().size());\n    dh::CopyDeviceSpanToVector(&h_data, sketch.Data());\n    for (size_t i = 1; i < h_columns_ptr.size(); ++i) {\n      auto begin = h_columns_ptr[i - 1];\n      auto column = Span<SketchEntry>{h_data}.subspan(begin, h_columns_ptr[i] - begin);\n      ASSERT_TRUE(std::adjacent_find(column.begin(), column.end(),\n                                     [](SketchEntry const& l, SketchEntry const& r) {\n                                       return l.value == r.value;\n                                     }) == column.end());\n    }\n    TestQuantileElemRank(ctx.Device(), sketch.Data(), sketch.ColumnsPtr());\n  });\n}\n\nTEST(GPUQuantile, PruneDuplicated) {\n  constexpr size_t kRows = 512, kCols = 8;\n  RunWithSeedsAndBins(kRows, [=](std::int32_t seed, bst_bin_t n_bins, MetaInfo const& info) {\n    auto ctx = MakeCUDACtx(0);\n    HostDeviceVector<FeatureType> ft;\n    SketchContainer sketch(ft, n_bins, kCols, ctx.Device());\n\n    HostDeviceVector<float> storage;\n    std::string interface_str = RandomDataGenerator{kRows, kCols, 0}\n                                    .Device(ctx.Device())\n                                    .Seed(seed)\n                                    .GenerateArrayInterface(&storage);\n    auto d_data = storage.DeviceSpan();\n    auto tuple_it =\n        cuda::std::make_tuple(thrust::make_counting_iterator<size_t>(0ul), d_data.data());\n    auto it = thrust::make_zip_iterator(tuple_it);\n    thrust::transform(ctx.CUDACtx()->CTP(), it, it + d_data.size(), d_data.data(),\n                      RepeatedValueOp{kCols});\n\n    data::CupyAdapter adapter(interface_str);\n    AdapterDeviceSketch(&ctx, adapter.Value(), n_bins, info,\n                        std::numeric_limits<float>::quiet_NaN(), &sketch);\n\n    sketch.Prune(&ctx, n_bins);\n\n    std::vector<bst_idx_t> h_columns_ptr(sketch.ColumnsPtr().size());\n    dh::CopyDeviceSpanToVector(&h_columns_ptr, sketch.ColumnsPtr());\n    std::vector<SketchEntry> h_data(sketch.Data().size());\n    dh::CopyDeviceSpanToVector(&h_data, sketch.Data());\n    for (size_t i = 1; i < h_columns_ptr.size(); ++i) {\n      auto begin = h_columns_ptr[i - 1];\n      auto column = Span<SketchEntry>{h_data}.subspan(begin, h_columns_ptr[i] - begin);\n      ASSERT_TRUE(std::adjacent_find(column.begin(), column.end(),\n                                     [](SketchEntry const& l, SketchEntry const& r) {\n                                       return l.value == r.value;\n                                     }) == column.end());\n    }\n    TestQuantileElemRank(ctx.Device(), sketch.Data(), sketch.ColumnsPtr());\n  });\n}\n\nTEST(GPUQuantile, MergeEmpty) {\n  constexpr size_t kRows = 1000, kCols = 100;\n  size_t n_bins = 10;\n  auto ctx = MakeCUDACtx(0);\n  HostDeviceVector<FeatureType> ft;\n  SketchContainer sketch_0(ft, n_bins, kCols, ctx.Device());\n  HostDeviceVector<float> storage_0;\n  std::string interface_str_0 =\n      RandomDataGenerator{kRows, kCols, 0}.Device(ctx.Device()).GenerateArrayInterface(&storage_0);\n  data::CupyAdapter adapter_0(interface_str_0);\n  MetaInfo info;\n  AdapterDeviceSketch(&ctx, adapter_0.Value(), n_bins, info,\n                      std::numeric_limits<float>::quiet_NaN(), &sketch_0);\n\n  std::vector<SketchEntry> entries_before(sketch_0.Data().size());\n  dh::CopyDeviceSpanToVector(&entries_before, sketch_0.Data());\n  std::vector<bst_idx_t> ptrs_before(sketch_0.ColumnsPtr().size());\n  dh::CopyDeviceSpanToVector(&ptrs_before, sketch_0.ColumnsPtr());\n  thrust::device_vector<size_t> columns_ptr(kCols + 1);\n  // Merge an empty sketch\n  sketch_0.Merge(&ctx, dh::ToSpan(columns_ptr), Span<SketchEntry>{});\n\n  std::vector<SketchEntry> entries_after(sketch_0.Data().size());\n  dh::CopyDeviceSpanToVector(&entries_after, sketch_0.Data());\n  std::vector<bst_idx_t> ptrs_after(sketch_0.ColumnsPtr().size());\n  dh::CopyDeviceSpanToVector(&ptrs_after, sketch_0.ColumnsPtr());\n\n  CHECK_EQ(entries_before.size(), entries_after.size());\n  CHECK_EQ(ptrs_before.size(), ptrs_after.size());\n  for (size_t i = 0; i < entries_before.size(); ++i) {\n    CHECK_EQ(entries_before[i].value, entries_after[i].value);\n    CHECK_EQ(entries_before[i].rmin, entries_after[i].rmin);\n    CHECK_EQ(entries_before[i].rmax, entries_after[i].rmax);\n    CHECK_EQ(entries_before[i].wmin, entries_after[i].wmin);\n  }\n  for (size_t i = 0; i < ptrs_before.size(); ++i) {\n    CHECK_EQ(ptrs_before[i], ptrs_after[i]);\n  }\n}\n\nTEST(GPUQuantile, MergeBasic) {\n  constexpr size_t kRows = 1000, kCols = 100;\n  RunWithSeedsAndBins(kRows, [=](std::int32_t seed, bst_bin_t n_bins, MetaInfo const& info) {\n    auto ctx = MakeCUDACtx(0);\n    HostDeviceVector<FeatureType> ft;\n    SketchContainer sketch_0(ft, n_bins, kCols, ctx.Device());\n    HostDeviceVector<float> storage_0;\n    std::string interface_str_0 = RandomDataGenerator{kRows, kCols, 0}\n                                      .Device(ctx.Device())\n                                      .Seed(seed)\n                                      .GenerateArrayInterface(&storage_0);\n    data::CupyAdapter adapter_0(interface_str_0);\n    AdapterDeviceSketch(&ctx, adapter_0.Value(), n_bins, info,\n                        std::numeric_limits<float>::quiet_NaN(), &sketch_0);\n\n    SketchContainer sketch_1(ft, n_bins, kCols, ctx.Device());\n    HostDeviceVector<float> storage_1;\n    std::string interface_str_1 = RandomDataGenerator{kRows, kCols, 0}\n                                      .Device(ctx.Device())\n                                      .Seed(seed)\n                                      .GenerateArrayInterface(&storage_1);\n    data::CupyAdapter adapter_1(interface_str_1);\n    AdapterDeviceSketch(&ctx, adapter_1.Value(), n_bins, info,\n                        std::numeric_limits<float>::quiet_NaN(), &sketch_1);\n\n    size_t size_before_merge = sketch_0.Data().size();\n    sketch_0.Merge(&ctx, sketch_1.ColumnsPtr(), sketch_1.Data());\n    TestQuantileElemRank(ctx.Device(), sketch_0.Data(), sketch_0.ColumnsPtr());\n\n    auto columns_ptr = sketch_0.ColumnsPtr();\n    std::vector<bst_idx_t> h_columns_ptr(columns_ptr.size());\n    dh::CopyDeviceSpanToVector(&h_columns_ptr, columns_ptr);\n    ASSERT_EQ(h_columns_ptr.back(), sketch_1.Data().size() + size_before_merge);\n\n    std::vector<SketchEntry> h_data(sketch_0.Data().size());\n    dh::CopyDeviceSpanToVector(&h_data, sketch_0.Data());\n    for (size_t i = 1; i < h_columns_ptr.size(); ++i) {\n      auto begin = h_columns_ptr[i - 1];\n      auto column = Span<SketchEntry>{h_data}.subspan(begin, h_columns_ptr[i] - begin);\n      ASSERT_TRUE(std::is_sorted(column.begin(), column.end(), IsSorted{}));\n    }\n  });\n}\n\nvoid TestMergeDuplicated(int32_t n_bins, size_t cols, size_t rows, float frac) {\n  auto ctx = MakeCUDACtx(0);\n  MetaInfo info;\n  int32_t seed = 0;\n  HostDeviceVector<FeatureType> ft;\n  SketchContainer sketch_0(ft, n_bins, cols, ctx.Device());\n  HostDeviceVector<float> storage_0;\n  std::string interface_str_0 = RandomDataGenerator{rows, cols, 0}\n                                    .Device(ctx.Device())\n                                    .Seed(seed)\n                                    .GenerateArrayInterface(&storage_0);\n  data::CupyAdapter adapter_0(interface_str_0);\n  AdapterDeviceSketch(&ctx, adapter_0.Value(), n_bins, info,\n                      std::numeric_limits<float>::quiet_NaN(), &sketch_0);\n\n  size_t f_rows = rows * frac;\n  SketchContainer sketch_1(ft, n_bins, cols, ctx.Device());\n  HostDeviceVector<float> storage_1;\n  std::string interface_str_1 = RandomDataGenerator{f_rows, cols, 0}\n                                    .Device(ctx.Device())\n                                    .Seed(seed)\n                                    .GenerateArrayInterface(&storage_1);\n  auto data_1 = storage_1.DeviceSpan();\n  auto tuple_it = cuda::std::make_tuple(thrust::make_counting_iterator<size_t>(0ul), data_1.data());\n  using Tuple = cuda::std::tuple<size_t, float>;\n  auto it = thrust::make_zip_iterator(tuple_it);\n  thrust::transform(ctx.CUDACtx()->CTP(), it, it + data_1.size(), data_1.data(),\n                    [=] XGBOOST_DEVICE(Tuple const& tuple) {\n                      auto i = cuda::std::get<0>(tuple);\n                      if (i % 2 == 0) {\n                        return 0.0f;\n                      } else {\n                        return cuda::std::get<1>(tuple);\n                      }\n                    });\n  data::CupyAdapter adapter_1(interface_str_1);\n  AdapterDeviceSketch(&ctx, adapter_1.Value(), n_bins, info,\n                      std::numeric_limits<float>::quiet_NaN(), &sketch_1);\n\n  size_t size_before_merge = sketch_0.Data().size();\n  sketch_0.Merge(&ctx, sketch_1.ColumnsPtr(), sketch_1.Data());\n  TestQuantileElemRank(ctx.Device(), sketch_0.Data(), sketch_0.ColumnsPtr());\n\n  auto columns_ptr = sketch_0.ColumnsPtr();\n  std::vector<bst_idx_t> h_columns_ptr(columns_ptr.size());\n  dh::CopyDeviceSpanToVector(&h_columns_ptr, columns_ptr);\n  ASSERT_EQ(h_columns_ptr.back(), sketch_1.Data().size() + size_before_merge);\n\n  std::vector<SketchEntry> h_data(sketch_0.Data().size());\n  dh::CopyDeviceSpanToVector(&h_data, sketch_0.Data());\n  for (size_t i = 1; i < h_columns_ptr.size(); ++i) {\n    auto begin = h_columns_ptr[i - 1];\n    auto column = Span<SketchEntry>{h_data}.subspan(begin, h_columns_ptr[i] - begin);\n    ASSERT_TRUE(std::is_sorted(column.begin(), column.end(), IsSorted{}));\n  }\n}\n\nTEST(GPUQuantile, MergeDuplicated) {\n  size_t n_bins = 256;\n  constexpr size_t kRows = 1000, kCols = 100;\n  for (float frac = 0.5; frac < 2.5; frac += 0.5) {\n    TestMergeDuplicated(n_bins, kRows, kCols, frac);\n  }\n}\n\nTEST(GPUQuantile, MergeCategorical) {\n  auto ctx = MakeCUDACtx(0);\n  constexpr bst_feature_t kCols = 2;\n  bst_bin_t n_bins = 16;\n\n  HostDeviceVector<FeatureType> ft;\n  ft.HostVector() = {FeatureType::kCategorical, FeatureType::kNumerical};\n  SketchContainer sketch_0(ft, n_bins, kCols, ctx.Device());\n  SketchContainer sketch_1(ft, n_bins, kCols, ctx.Device());\n\n  std::vector<Entry> entries_0{{0, 0.0f}, {0, 0.0f}, {0, 1.0f}, {0, 2.0f},\n                               {0, 2.0f}, {1, 0.1f}, {1, 0.2f}, {1, 0.4f}};\n  std::vector<Entry> entries_1{{0, 1.0f}, {0, 1.0f},  {0, 2.0f},  {0, 3.0f},\n                               {0, 3.0f}, {1, 0.15f}, {1, 0.25f}, {1, 0.5f}};\n\n  dh::device_vector<Entry> d_entries_0{entries_0};\n  dh::device_vector<Entry> d_entries_1{entries_1};\n  dh::device_vector<size_t> columns_ptr_0{0, 5, 8};\n  dh::device_vector<size_t> columns_ptr_1{0, 5, 8};\n  dh::device_vector<size_t> cuts_ptr_0{0, 5, 8};\n  dh::device_vector<size_t> cuts_ptr_1{0, 5, 8};\n\n  sketch_0.Push(&ctx, dh::ToSpan(d_entries_0), dh::ToSpan(columns_ptr_0), dh::ToSpan(cuts_ptr_0),\n                entries_0.size(), {});\n  sketch_1.Push(&ctx, dh::ToSpan(d_entries_1), dh::ToSpan(columns_ptr_1), dh::ToSpan(cuts_ptr_1),\n                entries_1.size(), {});\n\n  sketch_0.Merge(&ctx, sketch_1.ColumnsPtr(), sketch_1.Data());\n  TestQuantileElemRank(ctx.Device(), sketch_0.Data(), sketch_0.ColumnsPtr());\n\n  std::vector<bst_idx_t> h_columns_ptr(sketch_0.ColumnsPtr().size());\n  dh::CopyDeviceSpanToVector(&h_columns_ptr, sketch_0.ColumnsPtr());\n  std::vector<SketchEntry> h_data(sketch_0.Data().size());\n  dh::CopyDeviceSpanToVector(&h_data, sketch_0.Data());\n\n  auto cat_column = Span<SketchEntry>{h_data}.subspan(h_columns_ptr[0], h_columns_ptr[1]);\n  ASSERT_TRUE(std::adjacent_find(cat_column.begin(), cat_column.end(),\n                                 [](SketchEntry const& l, SketchEntry const& r) {\n                                   return l.value == r.value;\n                                 }) == cat_column.end());\n}\n\nTEST(GPUQuantile, MultiMerge) {\n  constexpr size_t kRows = 20, kCols = 1;\n  int32_t world = 2;\n  RunWithSeedsAndBins(kRows, [=](std::int32_t seed, bst_bin_t n_bins, MetaInfo const& info) {\n    // Set up single node version\n    HostDeviceVector<FeatureType> ft;\n    auto ctx = MakeCUDACtx(0);\n    SketchContainer sketch_on_single_node(ft, n_bins, kCols, ctx.Device());\n\n    size_t intermediate_num_cuts =\n        std::min(kRows * world, static_cast<size_t>(n_bins * WQSketch::kFactor));\n    std::vector<SketchContainer> containers;\n    for (auto rank = 0; rank < world; ++rank) {\n      HostDeviceVector<float> storage;\n      std::string interface_str = RandomDataGenerator{kRows, kCols, 0}\n                                      .Device(ctx.Device())\n                                      .Seed(rank + seed)\n                                      .GenerateArrayInterface(&storage);\n      data::CupyAdapter adapter(interface_str);\n      HostDeviceVector<FeatureType> ft;\n      containers.emplace_back(ft, n_bins, kCols, ctx.Device());\n      AdapterDeviceSketch(&ctx, adapter.Value(), n_bins, info,\n                          std::numeric_limits<float>::quiet_NaN(), &containers.back());\n    }\n    for (auto& sketch : containers) {\n      sketch.Prune(&ctx, intermediate_num_cuts);\n      sketch_on_single_node.Merge(&ctx, sketch.ColumnsPtr(), sketch.Data());\n    }\n    TestQuantileElemRank(ctx.Device(), sketch_on_single_node.Data(),\n                         sketch_on_single_node.ColumnsPtr());\n  });\n}\n\nTEST(GPUQuantile, MissingColumns) {\n  auto dmat = std::unique_ptr<DMatrix>{[=]() {\n    std::size_t constexpr kRows = 1000, kCols = 100;\n    auto sparsity = 0.5f;\n    std::vector<FeatureType> ft(kCols);\n    for (size_t i = 0; i < ft.size(); ++i) {\n      ft[i] = (i % 2 == 0) ? FeatureType::kNumerical : FeatureType::kCategorical;\n    }\n    auto dmat = RandomDataGenerator{kRows, kCols, sparsity}\n                    .Seed(0)\n                    .Lower(.0f)\n                    .Upper(1.0f)\n                    .Type(ft)\n                    .MaxCategory(13)\n                    .GenerateDMatrix();\n    return dmat->SliceCol(2, 1);\n  }()};\n  dmat->Info().data_split_mode = DataSplitMode::kRow;\n\n  auto ctx = MakeCUDACtx(0);\n  std::size_t constexpr kBins = 64;\n  HistogramCuts cuts = common::DeviceSketch(&ctx, dmat.get(), kBins);\n  ASSERT_TRUE(cuts.HasCategorical());\n}\n\nnamespace {\nvoid TestAllReduceBasic() {\n  auto const world = collective::GetWorldSize();\n  constexpr size_t kRows = 1000, kCols = 100;\n  RunWithSeedsAndBins(kRows, [=](std::int32_t seed, bst_bin_t n_bins, MetaInfo const& info) {\n    auto const device = DeviceOrd::CUDA(GPUIDX);\n    auto ctx = MakeCUDACtx(device.ordinal);\n\n    /**\n     * Set up single node version.\n     */\n    HostDeviceVector<FeatureType> ft({}, device);\n    SketchContainer sketch_on_single_node(ft, n_bins, kCols, device);\n\n    size_t intermediate_num_cuts =\n        std::min(kRows * world, static_cast<size_t>(n_bins * WQSketch::kFactor));\n    std::vector<SketchContainer> containers;\n    for (auto rank = 0; rank < world; ++rank) {\n      HostDeviceVector<float> storage({}, device);\n      std::string interface_str = RandomDataGenerator{kRows, kCols, 0}\n                                      .Device(device)\n                                      .Seed(rank + seed)\n                                      .GenerateArrayInterface(&storage);\n      data::CupyAdapter adapter(interface_str);\n      HostDeviceVector<FeatureType> ft({}, device);\n      containers.emplace_back(ft, n_bins, kCols, device);\n      AdapterDeviceSketch(&ctx, adapter.Value(), n_bins, info,\n                          std::numeric_limits<float>::quiet_NaN(), &containers.back());\n    }\n    for (auto& sketch : containers) {\n      sketch.Prune(&ctx, intermediate_num_cuts);\n      sketch_on_single_node.Merge(&ctx, sketch.ColumnsPtr(), sketch.Data());\n    }\n    TestQuantileElemRank(device, sketch_on_single_node.Data(), sketch_on_single_node.ColumnsPtr(),\n                         true);\n\n    /**\n     * Set up distributed version.  We rely on using rank as seed to generate\n     * the exact same copy of data.\n     */\n    auto rank = collective::GetRank();\n    SketchContainer sketch_distributed(ft, n_bins, kCols, device);\n    HostDeviceVector<float> storage({}, device);\n    std::string interface_str = RandomDataGenerator{kRows, kCols, 0}\n                                    .Device(device)\n                                    .Seed(rank + seed)\n                                    .GenerateArrayInterface(&storage);\n    data::CupyAdapter adapter(interface_str);\n    AdapterDeviceSketch(&ctx, adapter.Value(), n_bins, info,\n                        std::numeric_limits<float>::quiet_NaN(), &sketch_distributed);\n    if (world == 1) {\n      auto n_samples_global = kRows * world;\n      intermediate_num_cuts =\n          std::min(n_samples_global, static_cast<size_t>(n_bins * SketchContainer::kFactor));\n      sketch_distributed.Prune(&ctx, intermediate_num_cuts);\n    }\n    sketch_distributed.AllReduce(&ctx, false);\n\n    ASSERT_EQ(sketch_distributed.ColumnsPtr().size(), sketch_on_single_node.ColumnsPtr().size());\n    ASSERT_EQ(sketch_distributed.Data().size(), sketch_on_single_node.Data().size());\n\n    TestQuantileElemRank(device, sketch_distributed.Data(), sketch_distributed.ColumnsPtr(), true);\n\n    std::vector<SketchEntry> single_node_data(sketch_on_single_node.Data().size());\n    dh::CopyDeviceSpanToVector(&single_node_data, sketch_on_single_node.Data());\n\n    std::vector<SketchEntry> distributed_data(sketch_distributed.Data().size());\n    dh::CopyDeviceSpanToVector(&distributed_data, sketch_distributed.Data());\n    float Eps = 2e-4 * world;\n\n    for (size_t i = 0; i < single_node_data.size(); ++i) {\n      ASSERT_NEAR(single_node_data[i].value, distributed_data[i].value, Eps);\n      ASSERT_NEAR(single_node_data[i].rmax, distributed_data[i].rmax, Eps);\n      ASSERT_NEAR(single_node_data[i].rmin, distributed_data[i].rmin, Eps);\n      ASSERT_NEAR(single_node_data[i].wmin, distributed_data[i].wmin, Eps);\n    }\n  });\n}\n}  // anonymous namespace\n\nTEST_F(MGPUQuantileTest, AllReduceBasic) {\n  this->DoTest([] { TestAllReduceBasic(); }, true);\n  this->DoTest([] { TestAllReduceBasic(); }, false);\n}\n\nnamespace {\nvoid TestColumnSplit(DMatrix* dmat) {\n  auto const world = collective::GetWorldSize();\n  auto const rank = collective::GetRank();\n  auto m = std::unique_ptr<DMatrix>{dmat->SliceCol(world, rank)};\n\n  // Generate cuts for distributed environment.\n  auto ctx = MakeCUDACtx(GPUIDX);\n  std::size_t constexpr kBins = 64;\n  HistogramCuts distributed_cuts = common::DeviceSketch(&ctx, m.get(), kBins);\n\n  // Generate cuts for single node environment\n  collective::Finalize();\n  CHECK_EQ(collective::GetWorldSize(), 1);\n  HistogramCuts single_node_cuts = common::DeviceSketch(&ctx, m.get(), kBins);\n\n  auto const& sptrs = single_node_cuts.Ptrs();\n  auto const& dptrs = distributed_cuts.Ptrs();\n  auto const& svals = single_node_cuts.Values();\n  auto const& dvals = distributed_cuts.Values();\n\n  EXPECT_EQ(sptrs.size(), dptrs.size());\n  for (size_t i = 0; i < sptrs.size(); ++i) {\n    EXPECT_EQ(sptrs[i], dptrs[i]) << \"rank: \" << rank << \", i: \" << i;\n  }\n\n  EXPECT_EQ(svals.size(), dvals.size());\n  for (size_t i = 0; i < svals.size(); ++i) {\n    EXPECT_NEAR(svals[i], dvals[i], 2e-2f) << \"rank: \" << rank << \", i: \" << i;\n  }\n}\n}  // anonymous namespace\n\nTEST_F(MGPUQuantileTest, ColumnSplitBasic) {\n  std::size_t constexpr kRows = 1000, kCols = 100;\n  auto dmat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();\n  this->DoTest([&] { TestColumnSplit(dmat.get()); }, true);\n  this->DoTest([&] { TestColumnSplit(dmat.get()); }, false);\n}\n\nTEST_F(MGPUQuantileTest, ColumnSplitCategorical) {\n  std::size_t constexpr kRows = 1000, kCols = 100;\n  auto sparsity = 0.5f;\n  std::vector<FeatureType> ft(kCols);\n  for (size_t i = 0; i < ft.size(); ++i) {\n    ft[i] = (i % 2 == 0) ? FeatureType::kNumerical : FeatureType::kCategorical;\n  }\n  auto dmat = RandomDataGenerator{kRows, kCols, sparsity}\n                  .Seed(0)\n                  .Lower(.0f)\n                  .Upper(1.0f)\n                  .Type(ft)\n                  .MaxCategory(13)\n                  .GenerateDMatrix();\n  this->DoTest([&] { TestColumnSplit(dmat.get()); }, true);\n  this->DoTest([&] { TestColumnSplit(dmat.get()); }, false);\n}\n\nnamespace {\nvoid TestSameOnAllWorkers() {\n  auto world = collective::GetWorldSize();\n  constexpr size_t kRows = 1000, kCols = 100;\n  RunWithSeedsAndBins(kRows, [=](std::int32_t seed, bst_bin_t n_bins, MetaInfo const& info) {\n    auto const rank = collective::GetRank();\n    auto const device = DeviceOrd::CUDA(GPUIDX);\n    Context ctx = MakeCUDACtx(device.ordinal);\n    HostDeviceVector<FeatureType> ft({}, device);\n    SketchContainer sketch_distributed(ft, n_bins, kCols, device);\n    HostDeviceVector<float> storage({}, device);\n    std::string interface_str = RandomDataGenerator{kRows, kCols, 0}\n                                    .Device(device)\n                                    .Seed(rank + seed)\n                                    .GenerateArrayInterface(&storage);\n    data::CupyAdapter adapter(interface_str);\n    AdapterDeviceSketch(&ctx, adapter.Value(), n_bins, info,\n                        std::numeric_limits<float>::quiet_NaN(), &sketch_distributed);\n    sketch_distributed.AllReduce(&ctx, false);\n    TestQuantileElemRank(device, sketch_distributed.Data(), sketch_distributed.ColumnsPtr(), true);\n\n    // Test for all workers having the same sketch.\n    size_t n_data = sketch_distributed.Data().size();\n    auto rc = collective::Allreduce(&ctx, linalg::MakeVec(&n_data, 1), collective::Op::kMax);\n    SafeColl(rc);\n    ASSERT_EQ(n_data, sketch_distributed.Data().size());\n    size_t size_as_float = sketch_distributed.Data().size_bytes() / sizeof(float);\n    auto local_data = Span<float const>{\n        reinterpret_cast<float const*>(sketch_distributed.Data().data()), size_as_float};\n\n    dh::caching_device_vector<float> all_workers(size_as_float * world);\n    thrust::fill(all_workers.begin(), all_workers.end(), 0);\n    thrust::copy(thrust::device, local_data.data(), local_data.data() + local_data.size(),\n                 all_workers.begin() + local_data.size() * rank);\n    rc = collective::Allreduce(\n        &ctx, linalg::MakeVec(all_workers.data().get(), all_workers.size(), ctx.Device()),\n        collective::Op::kSum);\n    SafeColl(rc);\n\n    auto base_line = dh::ToSpan(all_workers).subspan(0, size_as_float);\n    std::vector<float> h_base_line(base_line.size());\n    dh::CopyDeviceSpanToVector(&h_base_line, base_line);\n\n    size_t offset = 0;\n    for (decltype(world) i = 0; i < world; ++i) {\n      auto comp = dh::ToSpan(all_workers).subspan(offset, size_as_float);\n      std::vector<float> h_comp(comp.size());\n      dh::CopyDeviceSpanToVector(&h_comp, comp);\n      ASSERT_EQ(comp.size(), base_line.size());\n      for (size_t j = 0; j < h_comp.size(); ++j) {\n        ASSERT_NEAR(h_base_line[j], h_comp[j], kRtEps);\n      }\n      offset += size_as_float;\n    }\n  });\n}\n}  // anonymous namespace\n\nTEST_F(MGPUQuantileTest, SameOnAllWorkers) {\n  this->DoTest([] { TestSameOnAllWorkers(); }, true);\n  this->DoTest([] { TestSameOnAllWorkers(); }, false);\n}\n\nTEST(GPUQuantile, Push) {\n  size_t constexpr kRows = 100;\n  std::vector<float> data(kRows);\n  auto ctx = MakeCUDACtx(0);\n\n  std::fill(data.begin(), data.begin() + (data.size() / 2), 0.3f);\n  std::fill(data.begin() + (data.size() / 2), data.end(), 0.5f);\n  int32_t n_bins = 128;\n  bst_feature_t constexpr kCols = 1;\n\n  std::vector<Entry> entries(kRows);\n  for (bst_feature_t i = 0; i < entries.size(); ++i) {\n    Entry e{i, data[i]};\n    entries[i] = e;\n  }\n\n  dh::device_vector<Entry> d_entries(entries);\n  dh::device_vector<size_t> columns_ptr(2);\n  columns_ptr[0] = 0;\n  columns_ptr[1] = kRows;\n\n  HostDeviceVector<FeatureType> ft;\n  SketchContainer sketch(ft, n_bins, kCols, ctx.Device());\n  sketch.Push(&ctx, dh::ToSpan(d_entries), dh::ToSpan(columns_ptr), dh::ToSpan(columns_ptr), kRows,\n              {});\n\n  auto sketch_data = sketch.Data();\n\n  thrust::host_vector<SketchEntry> h_sketch_data(sketch_data.size());\n\n  auto ptr = thrust::device_ptr<SketchEntry const>(sketch_data.data());\n  thrust::copy(ptr, ptr + sketch_data.size(), h_sketch_data.begin());\n  ASSERT_EQ(h_sketch_data.size(), 2);\n\n  auto v_0 = h_sketch_data[0];\n  ASSERT_EQ(v_0.rmin, 0);\n  ASSERT_EQ(v_0.wmin, kRows / 2.0f);\n  ASSERT_EQ(v_0.rmax, kRows / 2.0f);\n\n  auto v_1 = h_sketch_data[1];\n  ASSERT_EQ(v_1.rmin, kRows / 2.0f);\n  ASSERT_EQ(v_1.wmin, kRows / 2.0f);\n  ASSERT_EQ(v_1.rmax, static_cast<float>(kRows));\n}\n\nTEST(GPUQuantile, MultiColPush) {\n  size_t constexpr kRows = 100, kCols = 4;\n  std::vector<float> data(kRows * kCols);\n  std::fill(data.begin(), data.begin() + (data.size() / 2), 0.3f);\n\n  auto ctx = MakeCUDACtx(0);\n  std::vector<Entry> entries(kRows * kCols);\n\n  for (bst_feature_t c = 0; c < kCols; ++c) {\n    for (size_t r = 0; r < kRows; ++r) {\n      float v = (r >= kRows / 2) ? 0.7 : 0.4;\n      auto e = Entry{c, v};\n      entries[c * kRows + r] = e;\n    }\n  }\n\n  int32_t n_bins = 16;\n  HostDeviceVector<FeatureType> ft;\n  SketchContainer sketch(ft, n_bins, kCols, ctx.Device());\n  dh::device_vector<Entry> d_entries{entries};\n\n  dh::device_vector<size_t> columns_ptr(kCols + 1, 0);\n  for (size_t i = 1; i < kCols + 1; ++i) {\n    columns_ptr[i] = kRows;\n  }\n  thrust::inclusive_scan(thrust::device, columns_ptr.begin(), columns_ptr.end(),\n                         columns_ptr.begin());\n  dh::device_vector<size_t> cuts_ptr(columns_ptr);\n\n  sketch.Push(&ctx, dh::ToSpan(d_entries), dh::ToSpan(columns_ptr), dh::ToSpan(cuts_ptr),\n              kRows * kCols, {});\n\n  auto sketch_data = sketch.Data();\n  ASSERT_EQ(sketch_data.size(), kCols * 2);\n  auto ptr = thrust::device_ptr<SketchEntry const>(sketch_data.data());\n  std::vector<SketchEntry> h_sketch_data(sketch_data.size());\n  thrust::copy(ptr, ptr + sketch_data.size(), h_sketch_data.begin());\n\n  for (size_t i = 0; i < kCols; ++i) {\n    auto v_0 = h_sketch_data[i * 2];\n    ASSERT_EQ(v_0.rmin, 0);\n    ASSERT_EQ(v_0.wmin, kRows / 2.0f);\n    ASSERT_EQ(v_0.rmax, kRows / 2.0f);\n\n    auto v_1 = h_sketch_data[i * 2 + 1];\n    ASSERT_EQ(v_1.rmin, kRows / 2.0f);\n    ASSERT_EQ(v_1.wmin, kRows / 2.0f);\n    ASSERT_EQ(v_1.rmax, static_cast<float>(kRows));\n  }\n}\n}  // namespace common\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/common/test_quantile.h",
    "content": "/**\n * Copyright 2020-2024, XGBoost Contributors\n */\n#ifndef XGBOOST_TESTS_CPP_COMMON_TEST_QUANTILE_H_\n#define XGBOOST_TESTS_CPP_COMMON_TEST_QUANTILE_H_\n\n#include <algorithm>\n#include <vector>\n\n#include \"../helpers.h\"\n\nnamespace xgboost::common {\ntemplate <typename Fn> void RunWithSeedsAndBins(size_t rows, Fn fn) {\n  std::vector<int32_t> seeds(2);\n  SimpleLCG lcg;\n  SimpleRealUniformDistribution<float> dist(3, 1000);\n  std::generate(seeds.begin(), seeds.end(), [&](){ return dist(&lcg); });\n\n  std::vector<bst_bin_t> bins(2);\n  for (size_t i = 0; i < bins.size() - 1; ++i) {\n    bins[i] = i * 35 + 2;\n  }\n  bins.back() = rows + 160;  // provide a bin number greater than rows.\n\n  std::vector<MetaInfo> infos(2);\n  auto& h_weights = infos.front().weights_.HostVector();\n  h_weights.resize(rows);\n\n  SimpleRealUniformDistribution<float> weight_dist(0, 10);\n  std::generate(h_weights.begin(), h_weights.end(), [&]() { return weight_dist(&lcg); });\n\n  for (auto seed : seeds) {\n    for (auto n_bin : bins) {\n      for (auto const& info : infos) {\n        fn(seed, n_bin, info);\n      }\n    }\n  }\n}\n}  // namespace xgboost::common\n\n#endif  // XGBOOST_TESTS_CPP_COMMON_TEST_QUANTILE_H_\n"
  },
  {
    "path": "tests/cpp/common/test_quantile_utils.cc",
    "content": "/**\n * Copyright 2023 by XGBoost contributors\n */\n#include <gtest/gtest.h>\n\n#include \"../../../src/common/quantile_loss_utils.h\"\n#include \"xgboost/base.h\"  // Args\n\nnamespace xgboost {\nnamespace common {\nTEST(QuantileLossParam, Basic) {\n  QuantileLossParam param;\n  auto& ref = param.quantile_alpha.Get();\n\n  param.UpdateAllowUnknown(Args{{\"quantile_alpha\", \"0.3\"}});\n  ASSERT_EQ(ref.size(), 1);\n  ASSERT_NEAR(ref[0], 0.3, kRtEps);\n\n  param.UpdateAllowUnknown(Args{{\"quantile_alpha\", \"[0.3, 0.6]\"}});\n  ASSERT_EQ(param.quantile_alpha.Get().size(), 2);\n  ASSERT_NEAR(ref[0], 0.3, kRtEps);\n  ASSERT_NEAR(ref[1], 0.6, kRtEps);\n\n  param.UpdateAllowUnknown(Args{{\"quantile_alpha\", \"(0.6, 0.3)\"}});\n  ASSERT_EQ(param.quantile_alpha.Get().size(), 2);\n  ASSERT_NEAR(ref[0], 0.6, kRtEps);\n  ASSERT_NEAR(ref[1], 0.3, kRtEps);\n}\n}  // namespace common\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/common/test_random.cc",
    "content": "/**\n * Copyright 2018-2026, XGBoost Contributors\n */\n#include \"../../../src/common/random.h\"\n#include \"../helpers.h\"\n#include \"gtest/gtest.h\"\n#include \"xgboost/context.h\"  // for Context\n\nnamespace xgboost::common {\nnamespace {\nvoid TestBasic(Context const* ctx) {\n  int n = 128;\n  ColumnSampler cs;\n  HostDeviceVector<float> feature_weights;\n\n  // No node sampling\n  cs.Init(ctx, n, feature_weights, 1.0f, 0.5f, 0.5f);\n  auto set0 = cs.GetFeatureSet(ctx, 0);\n  ASSERT_EQ(set0->Size(), 32);\n\n  auto set1 = cs.GetFeatureSet(ctx, 0);\n\n  ASSERT_EQ(set0->HostVector(), set1->HostVector());\n\n  auto set2 = cs.GetFeatureSet(ctx, 1);\n  ASSERT_NE(set1->HostVector(), set2->HostVector());\n  ASSERT_EQ(set2->Size(), 32);\n\n  // Node sampling\n  cs.Init(ctx, n, feature_weights, 0.5f, 1.0f, 0.5f);\n  auto set3 = cs.GetFeatureSet(ctx, 0);\n  ASSERT_EQ(set3->Size(), 32);\n\n  auto set4 = cs.GetFeatureSet(ctx, 0);\n\n  ASSERT_NE(set3->HostVector(), set4->HostVector());\n  ASSERT_EQ(set4->Size(), 32);\n\n  // No level or node sampling, should be the same at different depth\n  cs.Init(ctx, n, feature_weights, 1.0f, 1.0f, 0.5f);\n  ASSERT_EQ(cs.GetFeatureSet(ctx, 0)->HostVector(), cs.GetFeatureSet(ctx, 1)->HostVector());\n\n  cs.Init(ctx, n, feature_weights, 1.0f, 1.0f, 1.0f);\n  auto set5 = cs.GetFeatureSet(ctx, 0);\n  ASSERT_EQ(set5->Size(), n);\n  cs.Init(ctx, n, feature_weights, 1.0f, 1.0f, 1.0f);\n  auto set6 = cs.GetFeatureSet(ctx, 0);\n  ASSERT_EQ(set5->HostVector(), set6->HostVector());\n\n  // Should always be a minimum of one feature\n  cs.Init(ctx, n, feature_weights, 1e-16f, 1e-16f, 1e-16f);\n  ASSERT_EQ(cs.GetFeatureSet(ctx, 0)->Size(), 1);\n}\n}  // namespace\n\nTEST(ColumnSampler, Test) {\n  Context ctx;\n  TestBasic(&ctx);\n}\n\n#if defined(XGBOOST_USE_CUDA)\nTEST(ColumnSampler, GPUTest) {\n  auto ctx = MakeCUDACtx(0);\n  TestBasic(&ctx);\n}\n#endif  // defined(XGBOOST_USE_CUDA)\n\n// Test if different threads using the same seed produce the same result.\n// Each thread gets its own Context (since ctx->Rng() is not thread-safe) with the same\n// seed. All threads should produce identical column samples.\nTEST(ColumnSampler, ThreadSynchronisation) {\n  // NOLINTBEGIN(clang-analyzer-deadcode.DeadStores)\n#if defined(__linux__)\n  std::int64_t const n_threads = std::thread::hardware_concurrency() * 128;\n#else\n  std::int64_t const n_threads = std::thread::hardware_concurrency();\n#endif\n  // NOLINTEND(clang-analyzer-deadcode.DeadStores)\n  int n = 128;\n  size_t iterations = 10;\n  size_t levels = 5;\n  std::vector<bst_feature_t> reference_result;\n  HostDeviceVector<float> feature_weights;\n  bool success = true;\n#pragma omp parallel num_threads(n_threads)\n  {\n    for (auto j = 0ull; j < iterations; j++) {\n      Context ctx;\n      ctx.Init({{\"seed\", std::to_string(j)}});\n      ColumnSampler cs;\n      cs.Init(&ctx, n, feature_weights, 0.5f, 0.5f, 0.5f);\n      for (auto level = 0ull; level < levels; level++) {\n        auto result = cs.GetFeatureSet(&ctx, level)->ConstHostVector();\n#pragma omp single\n        {\n          reference_result = result;\n        }\n        if (result != reference_result) {\n          success = false;\n        }\n#pragma omp barrier\n      }\n    }\n  }\n  ASSERT_TRUE(success);\n}\n\nnamespace {\nvoid TestWeightedSampling(Context const* ctx) {\n  auto test_basic = [ctx](int first) {\n    HostDeviceVector<float> feature_weights(2);\n    feature_weights.HostVector()[0] = std::abs(first - 1.0f);\n    feature_weights.HostVector()[1] = first - 0.0f;\n    ColumnSampler cs;\n    cs.Init(ctx, 2, feature_weights, 1.0, 1.0, 0.5);\n    auto feature_sets = cs.GetFeatureSet(ctx, 0);\n    auto const& h_feat_set = feature_sets->HostVector();\n    ASSERT_EQ(h_feat_set.size(), 1);\n    ASSERT_EQ(h_feat_set[0], first - 0);\n  };\n\n  test_basic(0);\n  test_basic(1);\n\n  size_t constexpr kCols = 64;\n  HostDeviceVector<float> feature_weights(kCols);\n  SimpleLCG rng;\n  SimpleRealUniformDistribution<float> dist(.0f, 12.0f);\n  std::generate(feature_weights.HostVector().begin(), feature_weights.HostVector().end(),\n                [&]() { return dist(&rng); });\n  ColumnSampler cs;\n  cs.Init(ctx, kCols, feature_weights, 0.5f, 1.0f, 1.0f);\n  std::vector<bst_feature_t> features(kCols);\n  std::iota(features.begin(), features.end(), 0);\n  std::vector<float> freq(kCols, 0);\n  for (size_t i = 0; i < 1024; ++i) {\n    auto fset = cs.GetFeatureSet(ctx, 0);\n    ASSERT_EQ(kCols * 0.5, fset->Size());\n    auto const& h_fset = fset->HostVector();\n    for (auto f : h_fset) {\n      freq[f] += 1.0f;\n    }\n  }\n\n  auto norm = std::accumulate(freq.cbegin(), freq.cend(), .0f);\n  for (auto& f : freq) {\n    f /= norm;\n  }\n  auto& h_feature_weights = feature_weights.HostVector();\n  norm = std::accumulate(h_feature_weights.cbegin(), h_feature_weights.cend(), .0f);\n  for (auto& f : h_feature_weights) {\n    f /= norm;\n  }\n\n  for (size_t i = 0; i < h_feature_weights.size(); ++i) {\n    EXPECT_NEAR(freq[i], h_feature_weights[i], 1e-2);\n  }\n}\n}  // namespace\n\nTEST(ColumnSampler, WeightedSampling) {\n  Context ctx;\n  TestWeightedSampling(&ctx);\n}\n\n#if defined(XGBOOST_USE_CUDA)\nTEST(ColumnSampler, GPUWeightedSampling) {\n  auto ctx = MakeCUDACtx(0);\n  TestWeightedSampling(&ctx);\n}\n#endif  // defined(XGBOOST_USE_CUDA)\n\nnamespace {\nvoid TestWeightedMultiSampling(Context const* ctx) {\n  size_t constexpr kCols = 32;\n  HostDeviceVector<float> feature_weights(kCols, 0);\n  auto& h_feature_weights = feature_weights.HostVector();\n  for (size_t i = 0; i < h_feature_weights.size(); ++i) {\n    h_feature_weights[i] = i;\n  }\n  ColumnSampler cs;\n  float bytree{0.5}, bylevel{0.5}, bynode{0.5};\n  cs.Init(ctx, h_feature_weights.size(), feature_weights, bytree, bylevel, bynode);\n  auto feature_set = cs.GetFeatureSet(ctx, 0);\n  size_t n_sampled = kCols * bytree * bylevel * bynode;\n  ASSERT_EQ(feature_set->Size(), n_sampled);\n  feature_set = cs.GetFeatureSet(ctx, 1);\n  ASSERT_EQ(feature_set->Size(), n_sampled);\n}\n}  // namespace\n\nTEST(ColumnSampler, WeightedMultiSampling) {\n  Context ctx;\n  TestWeightedMultiSampling(&ctx);\n}\n\n#if defined(XGBOOST_USE_CUDA)\nTEST(ColumnSampler, GPUWeightedMultiSampling) {\n  auto ctx = MakeCUDACtx(0);\n  TestWeightedMultiSampling(&ctx);\n}\n#endif  // defined(XGBOOST_USE_CUDA)\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/common/test_ranking_utils.cc",
    "content": "/**\n * Copyright 2023 by XGBoost Contributors\n */\n#include \"test_ranking_utils.h\"\n\n#include <gtest/gtest.h>\n#include <xgboost/base.h>                       // for Args, bst_group_t, kRtEps\n#include <xgboost/context.h>                    // for Context\n#include <xgboost/data.h>                       // for MetaInfo, DMatrix\n#include <xgboost/host_device_vector.h>         // for HostDeviceVector\n#include <xgboost/logging.h>                    // for Error\n#include <xgboost/string_view.h>                // for StringView\n\n#include <cstddef>                              // for size_t\n#include <cstdint>                              // for uint32_t\n#include <numeric>                              // for iota\n#include <utility>                              // for move\n#include <vector>                               // for vector\n\n#include \"../../../src/common/numeric.h\"        // for Iota\n#include \"../../../src/common/ranking_utils.h\"  // for LambdaRankParam, ParseMetricName, MakeMet...\n#include \"../helpers.h\"                         // for EmptyDMatrix\n\nnamespace xgboost::ltr {\nTEST(RankingUtils, LambdaRankParam) {\n  // make sure no memory is shared in dmlc parameter.\n  LambdaRankParam p0;\n  p0.UpdateAllowUnknown(Args{{\"lambdarank_num_pair_per_sample\", \"3\"}});\n  ASSERT_EQ(p0.NumPair(), 3);\n\n  LambdaRankParam p1;\n  p1.UpdateAllowUnknown(Args{{\"lambdarank_num_pair_per_sample\", \"8\"}});\n\n  ASSERT_EQ(p0.NumPair(), 3);\n  ASSERT_EQ(p1.NumPair(), 8);\n\n  p0.UpdateAllowUnknown(Args{{\"lambdarank_num_pair_per_sample\", \"17\"}});\n  ASSERT_EQ(p0.NumPair(), 17);\n  ASSERT_EQ(p1.NumPair(), 8);\n}\n\nTEST(RankingUtils, ParseMetricName) {\n  std::uint32_t topn{32};\n  bool minus{false};\n  auto name = ParseMetricName(\"ndcg\", \"3-\", &topn, &minus);\n  ASSERT_EQ(name, \"ndcg@3-\");\n  ASSERT_EQ(topn, 3);\n  ASSERT_TRUE(minus);\n\n  name = ParseMetricName(\"ndcg\", \"6\", &topn, &minus);\n  ASSERT_EQ(topn, 6);\n  ASSERT_TRUE(minus);  // unchanged\n\n  minus = false;\n  name = ParseMetricName(\"ndcg\", \"-\", &topn, &minus);\n  ASSERT_EQ(topn, 6);  // unchanged\n  ASSERT_TRUE(minus);\n\n  name = ParseMetricName(\"ndcg\", nullptr, &topn, &minus);\n  ASSERT_EQ(topn, 6);  // unchanged\n  ASSERT_TRUE(minus);  // unchanged\n\n  name = ParseMetricName(\"ndcg\", StringView{}, &topn, &minus);\n  ASSERT_EQ(topn, 6);  // unchanged\n  ASSERT_TRUE(minus);  // unchanged\n}\n\nTEST(RankingUtils, MakeMetricName) {\n  auto name = MakeMetricName(\"map\", LambdaRankParam::NotSet(), true);\n  ASSERT_EQ(name, \"map-\");\n  name = MakeMetricName(\"map\", LambdaRankParam::NotSet(), false);\n  ASSERT_EQ(name, \"map\");\n  name = MakeMetricName(\"map\", 2, true);\n  ASSERT_EQ(name, \"map@2-\");\n  name = MakeMetricName(\"map\", 2, false);\n  ASSERT_EQ(name, \"map@2\");\n}\n\nvoid TestRankingCache(Context const* ctx) {\n  auto p_fmat = EmptyDMatrix();\n  MetaInfo& info = p_fmat->Info();\n\n  info.num_row_ = 16;\n  info.labels.Reshape(info.num_row_);\n  auto& h_label = info.labels.Data()->HostVector();\n  for (std::size_t i = 0; i < h_label.size(); ++i) {\n    h_label[i] = i % 2;\n  }\n\n  LambdaRankParam param;\n  param.UpdateAllowUnknown(Args{});\n\n  RankingCache cache{ctx, info, param};\n\n  HostDeviceVector<float> predt(info.num_row_, 0);\n  auto& h_predt = predt.HostVector();\n  std::iota(h_predt.begin(), h_predt.end(), 0.0f);\n  predt.SetDevice(ctx->Device());\n\n  auto rank_idx =\n      cache.SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());\n\n  for (std::size_t i = 0; i < rank_idx.size(); ++i) {\n    ASSERT_EQ(rank_idx[i], rank_idx.size() - i - 1);\n  }\n}\n\nTEST(RankingCache, InitFromCPU) {\n  Context ctx;\n  TestRankingCache(&ctx);\n}\n\nvoid TestNDCGCache(Context const* ctx) {\n  auto p_fmat = EmptyDMatrix();\n  MetaInfo& info = p_fmat->Info();\n  LambdaRankParam param;\n  param.UpdateAllowUnknown(Args{});\n\n  {\n    // empty\n    NDCGCache cache{ctx, info, param};\n    ASSERT_EQ(cache.DataGroupPtr(ctx).size(), 2);\n  }\n\n  info.num_row_ = 3;\n  info.group_ptr_ = {static_cast<bst_group_t>(0), static_cast<bst_group_t>(info.num_row_)};\n\n  {\n    auto fail = [&]() { NDCGCache cache{ctx, info, param}; };\n    // empty label\n    ASSERT_THROW(fail(), dmlc::Error);\n    info.labels = linalg::Matrix<float>{{0.0f, 0.1f, 0.2f}, {3}, DeviceOrd::CPU()};\n    // invalid label\n    ASSERT_THROW(fail(), dmlc::Error);\n    auto h_labels = info.labels.HostView();\n    for (std::size_t i = 0; i < h_labels.Size(); ++i) {\n      h_labels(i) *= 10;\n    }\n    param.UpdateAllowUnknown(Args{{\"ndcg_exp_gain\", \"false\"}});\n    NDCGCache cache{ctx, info, param};\n    Context cpuctx;\n    auto inv_idcg = cache.InvIDCG(&cpuctx);\n    ASSERT_EQ(inv_idcg.Size(), 1);\n    ASSERT_NEAR(1.0 / inv_idcg(0), 2.63093, kRtEps);\n  }\n\n  {\n    param.UpdateAllowUnknown(Args{{\"lambdarank_unbiased\", \"false\"}});\n\n    std::vector<float> h_data(32);\n\n    common::Iota(ctx, h_data.begin(), h_data.end(), 0.0f);\n    info.labels.Reshape(h_data.size());\n    info.num_row_ = h_data.size();\n    info.group_ptr_.back() = info.num_row_;\n    info.labels.Data()->HostVector() = std::move(h_data);\n\n    {\n      NDCGCache cache{ctx, info, param};\n      Context cpuctx;\n      auto inv_idcg = cache.InvIDCG(&cpuctx);\n      ASSERT_NEAR(inv_idcg(0), 0.00551782, kRtEps);\n    }\n\n    param.UpdateAllowUnknown(\n        Args{{\"lambdarank_num_pair_per_sample\", \"3\"}, {\"lambdarank_pair_method\", \"topk\"}});\n    {\n      NDCGCache cache{ctx, info, param};\n      Context cpuctx;\n      auto inv_idcg = cache.InvIDCG(&cpuctx);\n      ASSERT_NEAR(inv_idcg(0), 0.01552123, kRtEps);\n    }\n  }\n}\n\nTEST(NDCGCache, InitFromCPU) {\n  Context ctx;\n  TestNDCGCache(&ctx);\n}\n\nvoid TestMAPCache(Context const* ctx) {\n  auto p_fmat = EmptyDMatrix();\n  MetaInfo& info = p_fmat->Info();\n  LambdaRankParam param;\n  param.UpdateAllowUnknown(Args{});\n\n  std::vector<float> h_data(32);\n\n  common::Iota(ctx, h_data.begin(), h_data.end(), 0.0f);\n  info.labels.Reshape(h_data.size());\n  info.num_row_ = h_data.size();\n  info.labels.Data()->HostVector() = std::move(h_data);\n\n  auto fail = [&]() { std::make_shared<MAPCache>(ctx, info, param); };\n  // binary label\n  ASSERT_THROW(fail(), dmlc::Error);\n\n  h_data = std::vector<float>(32, 0.0f);\n  h_data[1] = 1.0f;\n  info.labels.Data()->HostVector() = h_data;\n  auto p_cache = std::make_shared<MAPCache>(ctx, info, param);\n\n  ASSERT_EQ(p_cache->Acc(ctx).size(), info.num_row_);\n  ASSERT_EQ(p_cache->NumRelevant(ctx).size(), info.num_row_);\n}\n\nTEST(MAPCache, InitFromCPU) {\n  Context ctx;\n  ctx.Init(Args{});\n  TestMAPCache(&ctx);\n}\n}  // namespace xgboost::ltr\n"
  },
  {
    "path": "tests/cpp/common/test_ranking_utils.cu",
    "content": "/**\n * Copyright 2023 by XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/base.h>                          // for Args, XGBOOST_DEVICE, bst_group_t, kRtEps\n#include <xgboost/context.h>                       // for Context\n#include <xgboost/linalg.h>                        // for MakeTensorView, Vector\n\n#include <cstddef>                                 // for size_t\n#include <memory>                                  // for shared_ptr\n#include <numeric>                                 // for iota\n#include <vector>                                  // for vector\n\n#include \"../../../src/common/algorithm.cuh\"       // for SegmentedSequence\n#include \"../../../src/common/cuda_context.cuh\"    // for CUDAContext\n#include \"../../../src/common/device_helpers.cuh\"  // for device_vector, ToSpan\n#include \"../../../src/common/ranking_utils.cuh\"   // for CalcQueriesInvIDCG\n#include \"../../../src/common/ranking_utils.h\"     // for LambdaRankParam, RankingCache\n#include \"../helpers.h\"                            // for EmptyDMatrix\n#include \"test_ranking_utils.h\"                    // for TestNDCGCache\n#include \"xgboost/data.h\"                          // for MetaInfo\n#include \"xgboost/host_device_vector.h\"            // for HostDeviceVector\n\nnamespace xgboost::ltr {\nvoid TestCalcQueriesInvIDCG() {\n  auto ctx = MakeCUDACtx(0);\n  std::size_t n_groups = 5, n_samples_per_group = 32;\n\n  dh::device_vector<float> scores(n_samples_per_group * n_groups);\n  dh::device_vector<bst_group_t> group_ptr(n_groups + 1);\n  auto d_group_ptr = dh::ToSpan(group_ptr);\n  dh::LaunchN(d_group_ptr.size(), ctx.CUDACtx()->Stream(),\n              [=] XGBOOST_DEVICE(std::size_t i) { d_group_ptr[i] = i * n_samples_per_group; });\n\n  auto d_scores = dh::ToSpan(scores);\n  common::SegmentedSequence(&ctx, d_group_ptr, d_scores);\n\n  linalg::Vector<double> inv_IDCG({n_groups}, ctx.Device());\n\n  ltr::LambdaRankParam p;\n  p.UpdateAllowUnknown(Args{{\"ndcg_exp_gain\", \"false\"}});\n\n  cuda_impl::CalcQueriesInvIDCG(&ctx, linalg::MakeTensorView(&ctx, d_scores, d_scores.size()),\n                                dh::ToSpan(group_ptr), inv_IDCG.View(ctx.Device()), p);\n  for (std::size_t i = 0; i < n_groups; ++i) {\n    double inv_idcg = inv_IDCG(i);\n    ASSERT_NEAR(inv_idcg, 0.00551782, kRtEps);\n  }\n}\n\nTEST(RankingUtils, CalcQueriesInvIDCG) { TestCalcQueriesInvIDCG(); }\n\nnamespace {\nvoid TestRankingCache(Context const* ctx) {\n  auto p_fmat = EmptyDMatrix();\n  MetaInfo& info = p_fmat->Info();\n\n  info.num_row_ = 16;\n  info.labels.Reshape(info.num_row_);\n  auto& h_label = info.labels.Data()->HostVector();\n  for (std::size_t i = 0; i < h_label.size(); ++i) {\n    h_label[i] = i % 2;\n  }\n\n  LambdaRankParam param;\n  param.UpdateAllowUnknown(Args{});\n\n  RankingCache cache{ctx, info, param};\n\n  HostDeviceVector<float> predt(info.num_row_, 0);\n  auto& h_predt = predt.HostVector();\n  std::iota(h_predt.begin(), h_predt.end(), 0.0f);\n  predt.SetDevice(ctx->Device());\n\n  auto rank_idx =\n      cache.SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());\n\n  std::vector<std::size_t> h_rank_idx(rank_idx.size());\n  dh::CopyDeviceSpanToVector(&h_rank_idx, rank_idx);\n  for (std::size_t i = 0; i < rank_idx.size(); ++i) {\n    ASSERT_EQ(h_rank_idx[i], h_rank_idx.size() - i - 1);\n  }\n}\n}  // namespace\n\nTEST(RankingCache, InitFromGPU) {\n  auto ctx = MakeCUDACtx(0);\n  TestRankingCache(&ctx);\n}\n\nTEST(NDCGCache, InitFromGPU) {\n  auto ctx = MakeCUDACtx(0);\n  TestNDCGCache(&ctx);\n}\n\nTEST(MAPCache, InitFromGPU) {\n  auto ctx = MakeCUDACtx(0);\n  TestMAPCache(&ctx);\n}\n}  // namespace xgboost::ltr\n"
  },
  {
    "path": "tests/cpp/common/test_ranking_utils.h",
    "content": "/**\n * Copyright 2023 by XGBoost Contributors\n */\n#pragma once\n#include <xgboost/context.h>  // for Context\n\nnamespace xgboost::ltr {\nvoid TestNDCGCache(Context const* ctx);\n\nvoid TestMAPCache(Context const* ctx);\n}  // namespace xgboost::ltr\n"
  },
  {
    "path": "tests/cpp/common/test_ref_resource_view.cc",
    "content": "/**\n * Copyright 2023-2024, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n\n#include <cstddef>  // for size_t\n#include <memory>   // for make_shared, make_unique\n#include <numeric>  // for iota\n#include <vector>   // for vector\n\n#include \"../../../src/common/ref_resource_view.h\"\n#include \"dmlc/filesystem.h\"  // for TemporaryDirectory\n\nnamespace xgboost::common {\nTEST(RefResourceView, Basic) {\n  std::size_t n_bytes = 1024;\n  auto mem = std::make_shared<MallocResource>(n_bytes);\n  {\n    RefResourceView view{static_cast<float*>(mem->Data()), mem->Size() / sizeof(float), mem};\n\n    RefResourceView kview{static_cast<float const*>(mem->Data()), mem->Size() / sizeof(float), mem};\n    ASSERT_EQ(mem.use_count(), 3);\n    ASSERT_EQ(view.size(), n_bytes / sizeof(1024));\n    ASSERT_EQ(kview.size(), n_bytes / sizeof(1024));\n  }\n  {\n    RefResourceView view{static_cast<float*>(mem->Data()), mem->Size() / sizeof(float), mem};\n    std::fill_n(static_cast<float*>(mem->Data()), mem->Size() / sizeof(float), 1.5f);\n    for (auto v : view) {\n      ASSERT_EQ(v, 1.5f);\n    }\n    std::iota(view.begin(), view.end(), 0.0f);\n    ASSERT_EQ(view.front(), 0.0f);\n    ASSERT_EQ(view.back(), static_cast<float>(view.size() - 1));\n\n    view.front() = 1.0f;\n    view.back() = 2.0f;\n    ASSERT_EQ(view.front(), 1.0f);\n    ASSERT_EQ(view.back(), 2.0f);\n  }\n  ASSERT_EQ(mem.use_count(), 1);\n}\n\nTEST(RefResourceView, IO) {\n  dmlc::TemporaryDirectory tmpdir;\n  auto path = tmpdir.path + \"/testfile\";\n  auto data = MakeFixedVecWithMalloc(123, std::size_t{1});\n\n  {\n    auto fo = std::make_unique<AlignedFileWriteStream>(StringView{path}, \"wb\");\n    ASSERT_EQ(fo->Write(data.data(), data.size_bytes()), data.size_bytes());\n  }\n  {\n    auto fo = std::make_unique<AlignedFileWriteStream>(StringView{path}, \"wb\");\n    ASSERT_EQ(WriteVec(fo.get(), data),\n              data.size_bytes() + sizeof(RefResourceView<std::size_t>::size_type));\n  }\n  {\n    auto fi = std::make_unique<PrivateMmapConstStream>(\n        path, 0, data.size_bytes() + sizeof(RefResourceView<std::size_t>::size_type));\n    auto read = MakeFixedVecWithMalloc(123, std::size_t{1});\n    ASSERT_TRUE(ReadVec(fi.get(), &read));\n    for (auto v : read) {\n      ASSERT_EQ(v, 1ul);\n    }\n  }\n}\n\nTEST(RefResourceView, IOAligned) {\n  dmlc::TemporaryDirectory tmpdir;\n  auto path = tmpdir.path + \"/testfile\";\n  auto data = MakeFixedVecWithMalloc(123, 1.0f);\n\n  {\n    auto fo = std::make_unique<AlignedFileWriteStream>(StringView{path}, \"wb\");\n    // + sizeof(float) for alignment\n    ASSERT_EQ(WriteVec(fo.get(), data),\n              data.size_bytes() + sizeof(RefResourceView<std::size_t>::size_type) + sizeof(float));\n  }\n  {\n    auto fi = std::make_unique<PrivateMmapConstStream>(\n        path, 0, data.size_bytes() + sizeof(RefResourceView<std::size_t>::size_type));\n    // wrong type, float vs. double\n    auto read = MakeFixedVecWithMalloc(123, 2.0);\n    ASSERT_FALSE(ReadVec(fi.get(), &read));\n  }\n  {\n    auto fi = std::make_unique<PrivateMmapConstStream>(\n        path, 0, data.size_bytes() + sizeof(RefResourceView<std::size_t>::size_type));\n    auto read = MakeFixedVecWithMalloc(123, 2.0f);\n    ASSERT_TRUE(ReadVec(fi.get(), &read));\n    for (auto v : read) {\n      ASSERT_EQ(v, 1ul);\n    }\n  }\n  {\n    // Test std::vector\n    std::vector<float> data(123);\n    std::iota(data.begin(), data.end(), 0.0f);\n    auto fo = std::make_unique<AlignedFileWriteStream>(StringView{path}, \"wb\");\n    // + sizeof(float) for alignment\n    ASSERT_EQ(WriteVec(fo.get(), data), data.size() * sizeof(float) +\n                                            sizeof(RefResourceView<std::size_t>::size_type) +\n                                            sizeof(float));\n  }\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/common/test_ref_resource_view.cu",
    "content": "/**\n * Copyright 2024-2025, XGBoost Contributors\n */\n#if defined(__linux__)\n\n#include <gtest/gtest.h>\n#include <thrust/equal.h>                       // for equal\n#include <thrust/fill.h>                        // for fill_n\n#include <thrust/iterator/constant_iterator.h>  // for make_constant_iterator\n#include <thrust/sequence.h>                    // for sequence\n\n#include \"../../../src/common/ref_resource_view.cuh\"\n#include \"../../../src/common/threadpool.h\"  // for ThreadPool\n#include \"../helpers.h\"                      // for MakeCUDACtx\n\nnamespace xgboost::common {\nclass TestCudaGrowOnly : public ::testing::TestWithParam<std::size_t> {\n public:\n  void TestGrow(std::size_t m, std::size_t n) {\n    auto ctx = MakeCUDACtx(0);\n    ctx.CUDACtx()->Stream().Sync();\n\n    auto ref = MakeCudaGrowOnly<double>(m);\n    ASSERT_EQ(ref.size_bytes(), m * sizeof(double));\n    thrust::sequence(ctx.CUDACtx()->CTP(), ref.begin(), ref.end(), 0.0);\n    auto res = std::dynamic_pointer_cast<common::CudaGrowOnlyResource>(ref.Resource());\n    CHECK(res);\n    res->Resize(n * sizeof(double));\n\n    auto ref1 = RefResourceView<double>(res->DataAs<double>(), res->Size() / sizeof(double),\n                                        ref.Resource());\n    ASSERT_EQ(res->Size(), n * sizeof(double));\n    ASSERT_EQ(ref1.size(), n);\n    thrust::sequence(ctx.CUDACtx()->CTP(), ref1.begin(), ref1.end(), static_cast<double>(0.0));\n    std::vector<double> h_vec(ref1.size());\n    dh::safe_cuda(cudaMemcpyAsync(h_vec.data(), ref1.data(), ref1.size_bytes(), cudaMemcpyDefault));\n    for (std::size_t i = 0; i < h_vec.size(); ++i) {\n      ASSERT_EQ(h_vec[i], i);\n    }\n  }\n\n  void Run(std::size_t n) { this->TestGrow(1024, n); }\n};\n\nTEST_P(TestCudaGrowOnly, Resize) { this->Run(this->GetParam()); }\n\nINSTANTIATE_TEST_SUITE_P(RefResourceView, TestCudaGrowOnly, ::testing::Values(1 << 20, 1 << 21));\n\nTEST(HostPinnedMemPool, Alloc) {\n  std::vector<RefResourceView<double>> refs;\n\n  {\n    // pool goes out of scope before refs does. Test memory safety.\n    auto pool = std::make_shared<cuda_impl::HostPinnedMemPool>();\n    for (std::size_t i = 0; i < 4; ++i) {\n      auto ref = MakeFixedVecWithPinnedMemPool<double>(pool, 128 + i, curt::DefaultStream());\n      refs.emplace_back(std::move(ref));\n    }\n    for (std::size_t i = 0; i < 4; ++i) {\n      auto const& ref = refs[i];\n      ASSERT_EQ(ref.size(), 128 + i);\n      ASSERT_EQ(ref.size_bytes(), ref.size() * sizeof(double));\n    }\n\n    // Thread safety.\n    auto n_threads = static_cast<std::int32_t>(std::thread::hardware_concurrency());\n    common::ThreadPool workers{\"tmempool\", n_threads, [] {\n                               }};\n    std::vector<std::future<RefResourceView<double>>> alloc_futs;\n    for (std::int32_t i = 0, n = n_threads * 4; i < n; ++i) {\n      auto fut = workers.Submit([i, pool] {\n        auto ref = MakeFixedVecWithPinnedMemPool<double>(pool, 128 + i, curt::DefaultStream());\n        return ref;\n      });\n      alloc_futs.emplace_back(std::move(fut));\n    }\n    std::vector<std::future<void>> free_futs(alloc_futs.size());\n    for (std::int32_t i = 0, n = n_threads * 4; i < n; ++i) {\n      auto fut = workers.Submit([i, pool, &alloc_futs, &free_futs] {\n        auto ref = alloc_futs[i].get();\n        ASSERT_EQ(ref.size(), 128 + i);\n      });\n      free_futs[i] = std::move(fut);\n    }\n    for (std::int32_t i = 0, n = n_threads * 4; i < n; ++i) {\n      free_futs[i].get();\n    }\n  }\n}\n}  // namespace xgboost::common\n\n#endif  // defined(__linux__)\n"
  },
  {
    "path": "tests/cpp/common/test_span.cc",
    "content": "/**\n * Copyright 2018-2024, XGBoost contributors\n */\n#include \"test_span.h\"\n\n#include <gtest/gtest.h>\n#include <xgboost/span.h>\n\n#include <vector>\n\n#include \"../../../src/common/transform_iterator.h\"  // for MakeIndexTransformIter\n\nnamespace xgboost::common {\nnamespace {\nusing ST = common::Span<int, dynamic_extent>;\nstatic_assert(std::is_trivially_copyable_v<ST>);\nstatic_assert(std::is_trivially_move_assignable_v<ST>);\nstatic_assert(std::is_trivially_move_constructible_v<ST>);\nstatic_assert(std::is_trivially_copy_assignable_v<ST>);\nstatic_assert(std::is_trivially_copy_constructible_v<ST>);\n}  // namespace\n\nTEST(Span, TestStatus) {\n  int status = 1;\n  TestTestStatus {&status}();\n  ASSERT_EQ(status, -1);\n\n  std::vector<double> foo;\n  auto bar = Span{foo};\n  ASSERT_FALSE(bar.data());\n  ASSERT_EQ(bar.size(), 0);\n}\n\nTEST(Span, DlfConstructors) {\n  // Dynamic extent\n  {\n    Span<int> s;\n    ASSERT_EQ(s.size(), 0);\n    ASSERT_EQ(s.data(), nullptr);\n\n    Span<int const> cs;\n    ASSERT_EQ(cs.size(), 0);\n    ASSERT_EQ(cs.data(), nullptr);\n  }\n\n  // Static extent\n  {\n    Span<int, 0> s;\n    ASSERT_EQ(s.size(), 0);\n    ASSERT_EQ(s.data(), nullptr);\n\n    Span<int const, 0> cs;\n    ASSERT_EQ(cs.size(), 0);\n    ASSERT_EQ(cs.data(), nullptr);\n  }\n\n  // Init list.\n  {\n    Span<float> s {};\n    ASSERT_EQ(s.size(), 0);\n    ASSERT_EQ(s.data(), nullptr);\n\n    Span<int const> cs {};\n    ASSERT_EQ(cs.size(), 0);\n    ASSERT_EQ(cs.data(), nullptr);\n  }\n}\n\nTEST(Span, FromNullPtr) {\n  // dynamic extent\n  {\n    Span<float> s {nullptr, static_cast<Span<float>::index_type>(0)};\n    ASSERT_EQ(s.size(), 0);\n    ASSERT_EQ(s.data(), nullptr);\n\n    Span<float const> cs {nullptr, static_cast<Span<float>::index_type>(0)};\n    ASSERT_EQ(cs.size(), 0);\n    ASSERT_EQ(cs.data(), nullptr);\n  }\n  // static extent\n  {\n    Span<float, 0> s {nullptr, static_cast<Span<float>::index_type>(0)};\n    ASSERT_EQ(s.size(), 0);\n    ASSERT_EQ(s.data(), nullptr);\n\n    Span<float const, 0> cs {nullptr, static_cast<Span<float>::index_type>(0)};\n    ASSERT_EQ(cs.size(), 0);\n    ASSERT_EQ(cs.data(), nullptr);\n  }\n}\n\nTEST(Span, FromPtrLen) {\n  float arr[16];\n  InitializeRange(arr, arr+16);\n\n  // static extent\n  {\n    Span<float> s (arr, 16);\n    ASSERT_EQ (s.size(), 16);\n    ASSERT_EQ (s.data(), arr);\n\n    for (Span<float>::index_type i = 0; i < 16; ++i) {\n      ASSERT_EQ (s[i], arr[i]);\n    }\n\n    Span<float const> cs (arr, 16);\n    ASSERT_EQ (cs.size(), 16);\n    ASSERT_EQ (cs.data(), arr);\n\n    for (Span<float const>::index_type i = 0; i < 16; ++i) {\n      ASSERT_EQ (cs[i], arr[i]);\n    }\n  }\n\n  // dynamic extent\n  {\n    Span<float, 16> s (arr, 16);\n    ASSERT_EQ (s.size(), 16);\n    ASSERT_EQ (s.data(), arr);\n\n    for (size_t i = 0; i < 16; ++i) {\n      ASSERT_EQ (s[i], arr[i]);\n    }\n\n    Span<float const, 16> cs (arr, 16);\n    ASSERT_EQ (cs.size(), 16);\n    ASSERT_EQ (cs.data(), arr);\n\n    for (Span<float const>::index_type i = 0; i < 16; ++i) {\n      ASSERT_EQ (cs[i], arr[i]);\n    }\n  }\n}\n\nTEST(SpanDeathTest, FromPtrLen) {\n  float arr[16];\n  InitializeRange(arr, arr+16);\n  {\n    auto lazy = [=]() {Span<float const, 16> tmp (arr, 5);};\n    EXPECT_DEATH(lazy(), \"\");\n  }\n}\n\nTEST(Span, FromFirstLast) {\n  float arr[16];\n  InitializeRange(arr, arr+16);\n\n  // dynamic extent\n  {\n    Span<float> s (arr, arr + 16);\n    ASSERT_EQ (s.size(), 16);\n    ASSERT_EQ (s.data(), arr);\n    ASSERT_EQ (s.data() + s.size(), arr + 16);\n\n    for (size_t i = 0; i < 16; ++i) {\n      ASSERT_EQ (s[i], arr[i]);\n    }\n\n    Span<float const> cs (arr, arr + 16);\n    ASSERT_EQ (cs.size(), 16);\n    ASSERT_EQ (cs.data(), arr);\n    ASSERT_EQ (cs.data() + cs.size(), arr + 16);\n\n    for (size_t i = 0; i < 16; ++i) {\n      ASSERT_EQ (cs[i], arr[i]);\n    }\n  }\n\n  // static extent\n  {\n    Span<float, 16> s (arr, arr + 16);\n    ASSERT_EQ (s.size(), 16);\n    ASSERT_EQ (s.data(), arr);\n    ASSERT_EQ (s.data() + s.size(), arr + 16);\n\n    for (size_t i = 0; i < 16; ++i) {\n      ASSERT_EQ (s[i], arr[i]);\n    }\n\n    Span<float const> cs (arr, arr + 16);\n    ASSERT_EQ (cs.size(), 16);\n    ASSERT_EQ (cs.data(), arr);\n    ASSERT_EQ (cs.data() + cs.size(), arr + 16);\n\n    for (size_t i = 0; i < 16; ++i) {\n      ASSERT_EQ (cs[i], arr[i]);\n    }\n  }\n}\n\nTEST(Span, FromOther) {\n  // convert constructor\n  {\n    Span<int> derived;\n    Span<int const> base{derived};\n    ASSERT_EQ(base.size(), derived.size());\n    ASSERT_EQ(base.data(), derived.data());\n  }\n\n  float arr[16];\n  InitializeRange(arr, arr + 16);\n\n  // default copy constructor\n  {\n    Span<float> s0 (arr);\n    Span<float> s1 (s0);\n    ASSERT_EQ(s0.size(), s1.size());\n    ASSERT_EQ(s0.data(), s1.data());\n  }\n}\n\nTEST(Span, FromArray) {\n  float arr[16];\n  InitializeRange(arr, arr + 16);\n\n  {\n    Span<float> s (arr);\n    ASSERT_EQ(&arr[0], s.data());\n    ASSERT_EQ(s.size(), 16);\n    for (size_t i = 0; i < 16; ++i) {\n      ASSERT_EQ(arr[i], s[i]);\n    }\n  }\n\n  {\n    Span<float, 16> s (arr);\n    ASSERT_EQ(&arr[0], s.data());\n    ASSERT_EQ(s.size(), 16);\n    for (size_t i = 0; i < 16; ++i) {\n      ASSERT_EQ(arr[i], s[i]);\n    }\n  }\n}\n\nTEST(Span, FromContainer) {\n  std::vector<float> vec (16);\n  InitializeRange(vec.begin(), vec.end());\n\n  Span<float> s(vec);\n  ASSERT_EQ(s.size(), vec.size());\n  ASSERT_EQ(s.data(), vec.data());\n\n  bool res = std::equal(vec.begin(), vec.end(), s.begin());\n  ASSERT_TRUE(res);\n}\n\nTEST(Span, Assignment) {\n  int status = 1;\n  TestAssignment{&status}();\n  ASSERT_EQ(status, 1);\n}\n\nTEST(SpanIter, Construct) {\n  int status = 1;\n  TestIterConstruct{&status}();\n  ASSERT_EQ(status, 1);\n}\n\nTEST(SpanIter, Ref) {\n  int status = 1;\n  TestIterRef{&status}();\n  ASSERT_EQ(status, 1);\n}\n\nTEST(SpanIter, Calculate) {\n  int status = 1;\n  TestIterCalculate{&status}();\n  ASSERT_EQ(status, 1);\n}\n\nTEST(SpanIter, Compare) {\n  int status = 1;\n  TestIterCompare{&status}();\n  ASSERT_EQ(status, 1);\n}\n\nTEST(Span, BeginEnd) {\n  int status = 1;\n  TestBeginEnd{&status}();\n  ASSERT_EQ(status, 1);\n}\n\nTEST(Span, RBeginREnd) {\n  int status = 1;\n  TestRBeginREnd{&status}();\n  ASSERT_EQ(status, 1);\n}\n\nTEST(Span, ElementAccess) {\n  float arr[16];\n  InitializeRange(arr, arr + 16);\n\n  Span<float> s (arr);\n  size_t j = 0;\n  for (auto i : s) {\n    ASSERT_EQ(i, arr[j]);\n    ++j;\n  }\n}\n\nTEST(SpanDeathTest, ElementAccess) {\n  float arr[16];\n  InitializeRange(arr, arr + 16);\n\n  Span<float> s (arr);\n  EXPECT_DEATH(s[16], \"\");\n  EXPECT_DEATH(s[-1], \"\");\n\n  EXPECT_DEATH(s(16), \"\");\n  EXPECT_DEATH(s(-1), \"\");\n}\n\nTEST(Span, Obversers) {\n  int status = 1;\n  TestObservers{&status}();\n  ASSERT_EQ(status, 1);\n}\n\nTEST(Span, FrontBack) {\n  {\n    float arr[4] {0, 1, 2, 3};\n    Span<float, 4> s(arr);\n    ASSERT_EQ(s.front(), 0);\n    ASSERT_EQ(s.back(), 3);\n  }\n  {\n    std::vector<double> arr {0, 1, 2, 3};\n    Span<double> s(arr);\n    ASSERT_EQ(s.front(), 0);\n    ASSERT_EQ(s.back(), 3);\n  }\n}\n\nTEST(SpanDeathTest, FrontBack) {\n  {\n    Span<float, 0> s;\n    EXPECT_DEATH(s.front(), \"\");\n    EXPECT_DEATH(s.back(), \"\");\n  }\n  {\n    Span<float> s;\n    EXPECT_DEATH(s.front(), \"\");\n    EXPECT_DEATH(s.back(), \"\");\n  }\n}\n\nTEST(Span, FirstLast) {\n  // static extent\n  {\n    float arr[16];\n    InitializeRange(arr, arr + 16);\n\n    Span<float> s (arr);\n    Span<float, 4> first = s.first<4>();\n\n    ASSERT_EQ(first.size(), 4);\n    ASSERT_EQ(first.data(), arr);\n\n    for (size_t i = 0; i < first.size(); ++i) {\n      ASSERT_EQ(first[i], arr[i]);\n    }\n  }\n\n  {\n    float arr[16];\n    InitializeRange(arr, arr + 16);\n\n    Span<float> s (arr);\n    Span<float, 4> last = s.last<4>();\n\n    ASSERT_EQ(last.size(), 4);\n    ASSERT_EQ(last.data(), arr + 12);\n\n    for (size_t i = 0; i < last.size(); ++i) {\n      ASSERT_EQ(last[i], arr[i+12]);\n    }\n  }\n\n  // dynamic extent\n  {\n    float *arr = new float[16];\n    InitializeRange(arr, arr + 16);\n    Span<float> s (arr, 16);\n    Span<float> first = s.first(4);\n\n    ASSERT_EQ(first.size(), 4);\n    ASSERT_EQ(first.data(), s.data());\n\n    for (size_t i = 0; i < first.size(); ++i) {\n      ASSERT_EQ(first[i], s[i]);\n    }\n\n    delete [] arr;\n  }\n\n  {\n    float *arr = new float[16];\n    InitializeRange(arr, arr + 16);\n    Span<float> s (arr, 16);\n    Span<float> last = s.last(4);\n\n    ASSERT_EQ(last.size(), 4);\n    ASSERT_EQ(last.data(), s.data() + 12);\n\n    for (size_t i = 0; i < last.size(); ++i) {\n      ASSERT_EQ(s[12 + i], last[i]);\n    }\n\n    delete [] arr;\n  }\n}\n\nTEST(SpanDeathTest, FirstLast) {\n  // static extent\n  {\n    float arr[16];\n    InitializeRange(arr, arr + 16);\n\n    Span<float> s (arr);\n    auto constexpr kOne = static_cast<Span<float, 4>::index_type>(-1);\n    EXPECT_DEATH(s.first<kOne>(), \"\");\n    EXPECT_DEATH(s.first<17>(), \"\");\n    EXPECT_DEATH(s.first<32>(), \"\");\n  }\n\n  {\n    float arr[16];\n    InitializeRange(arr, arr + 16);\n\n    Span<float> s (arr);\n    auto constexpr kOne = static_cast<Span<float, 4>::index_type>(-1);\n    EXPECT_DEATH(s.last<kOne>(), \"\");\n    EXPECT_DEATH(s.last<17>(), \"\");\n    EXPECT_DEATH(s.last<32>(), \"\");\n  }\n\n  // dynamic extent\n  {\n    float *arr = new float[16];\n    InitializeRange(arr, arr + 16);\n    Span<float> s (arr, 16);\n    EXPECT_DEATH(s.first(-1), \"\");\n    EXPECT_DEATH(s.first(17), \"\");\n    EXPECT_DEATH(s.first(32), \"\");\n\n    delete [] arr;\n  }\n\n  {\n    float *arr = new float[16];\n    InitializeRange(arr, arr + 16);\n    Span<float> s (arr, 16);\n    EXPECT_DEATH(s.last(-1), \"\");\n    EXPECT_DEATH(s.last(17), \"\");\n    EXPECT_DEATH(s.last(32), \"\");\n\n    delete [] arr;\n  }\n}\n\nTEST(Span, Subspan) {\n  int arr[16] {0};\n  Span<int> s1 (arr);\n  auto s2 = s1.subspan<4>();\n  ASSERT_EQ(s1.size() - 4, s2.size());\n\n  auto s3 = s1.subspan(2, 4);\n  ASSERT_EQ(s1.data() + 2, s3.data());\n  ASSERT_EQ(s3.size(), 4);\n\n  auto s4 = s1.subspan(2, dynamic_extent);\n  ASSERT_EQ(s1.data() + 2, s4.data());\n  ASSERT_EQ(s4.size(), s1.size() - 2);\n}\n\nTEST(SpanDeathTest, Subspan) {\n  int arr[16] {0};\n  Span<int> s1 (arr);\n  EXPECT_DEATH(s1.subspan(-1, 0), \"\");\n  EXPECT_DEATH(s1.subspan(17, 0), \"\");\n\n  auto constexpr kOne = static_cast<Span<int, 4>::index_type>(-1);\n  EXPECT_DEATH(s1.subspan<kOne>(), \"\");\n  EXPECT_DEATH(s1.subspan<17>(), \"\");\n}\n\nTEST(Span, Compare) {\n  int status = 1;\n  TestCompare{&status}();\n  ASSERT_EQ(status, 1);\n}\n\nTEST(Span, AsBytes) {\n  int status = 1;\n  TestAsBytes{&status}();\n  ASSERT_EQ(status, 1);\n}\n\nTEST(Span, AsWritableBytes) {\n  int status = 1;\n  TestAsWritableBytes{&status}();\n  ASSERT_EQ(status, 1);\n}\n\nTEST(Span, Empty) {\n  {\n    Span<float> s {nullptr, static_cast<Span<float>::index_type>(0)};\n    auto res = s.subspan(0);\n    ASSERT_EQ(res.data(), nullptr);\n    ASSERT_EQ(res.size(), 0);\n\n    res = s.subspan(0, 0);\n    ASSERT_EQ(res.data(), nullptr);\n    ASSERT_EQ(res.size(), 0);\n  }\n\n  {\n    Span<float, 0> s {nullptr, static_cast<Span<float>::index_type>(0)};\n    auto res = s.subspan(0);\n    ASSERT_EQ(res.data(), nullptr);\n    ASSERT_EQ(res.size(), 0);\n\n    res = s.subspan(0, 0);\n    ASSERT_EQ(res.data(), nullptr);\n    ASSERT_EQ(res.size(), 0);\n  }\n}\n\nTEST(SpanDeathTest, Empty) {\n  std::vector<float> data(1, 0);\n  ASSERT_TRUE(data.data());\n  // ok to define 0 size span.\n  Span<float> s{data.data(), static_cast<Span<float>::index_type>(0)};\n  EXPECT_DEATH(s[0], \"\");  // not ok to use it.\n}\n\nTEST(IterSpan, Basic) {\n  auto iter = common::MakeIndexTransformIter([](std::size_t i) { return i; });\n  std::size_t n = 13;\n  auto span = IterSpan{iter, n};\n  ASSERT_EQ(span.size(), n);\n  for (std::size_t i = 0; i < n; ++i) {\n    ASSERT_EQ(span[i], i);\n  }\n  ASSERT_EQ(span.subspan(1).size(), n - 1);\n  ASSERT_EQ(span.subspan(1)[0], 1);\n  ASSERT_EQ(span.subspan(1, 2)[1], 2);\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/common/test_span.cu",
    "content": "/**\n * Copyright 2018-2024, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <thrust/device_vector.h>\n#include <thrust/execution_policy.h>\n#include <thrust/host_vector.h>\n#include <xgboost/span.h>\n\n#include <numeric>  // for iota\n\n#include \"../../../src/common/device_helpers.cuh\"\n#include \"test_span.h\"\n\nnamespace xgboost {\nnamespace common {\n\nstruct TestStatus {\n private:\n  int *status_;\n\n public:\n  TestStatus () {\n    dh::safe_cuda(cudaMalloc(&status_, sizeof(int)));\n    int h_status = 1;\n    dh::safe_cuda(cudaMemcpy(status_, &h_status,\n                             sizeof(int), cudaMemcpyHostToDevice));\n  }\n  ~TestStatus() {\n    dh::safe_cuda(cudaFree(status_));\n  }\n\n  int Get() {\n    int h_status;\n    dh::safe_cuda(cudaMemcpy(&h_status, status_,\n                             sizeof(int), cudaMemcpyDeviceToHost));\n    return h_status;\n  }\n\n  int* Data() {\n    return status_;\n  }\n};\n\n__global__ void TestFromOtherKernel(Span<float> span) {\n  // don't get optimized out\n  size_t idx = threadIdx.x + blockIdx.x * blockDim.x;\n\n  if (idx >= span.size()) {\n    return;\n  }\n}\n// Test converting different T\n__global__ void TestFromOtherKernelConst(Span<float const, 16> span) {\n  // don't get optimized out\n  size_t idx = threadIdx.x + blockIdx.x * blockDim.x;\n\n  if (idx >= span.size()) {\n    return;\n  }\n}\n\n/*!\n * \\brief Here we just test whether the code compiles.\n */\nTEST(GPUSpan, FromOther) {\n  thrust::host_vector<float> h_vec (16);\n  std::iota(h_vec.begin(), h_vec.end(), 0);\n\n  thrust::device_vector<float> d_vec (h_vec.size());\n  thrust::copy(h_vec.begin(), h_vec.end(), d_vec.begin());\n  // dynamic extent\n  {\n    Span<float> span (d_vec.data().get(), d_vec.size());\n    TestFromOtherKernel<<<1, 16>>>(span);\n  }\n  {\n    Span<float> span (d_vec.data().get(), d_vec.size());\n    TestFromOtherKernelConst<<<1, 16>>>(span);\n  }\n  // static extent\n  {\n    Span<float, 16> span(d_vec.data().get(), d_vec.data().get() + 16);\n    TestFromOtherKernel<<<1, 16>>>(span);\n  }\n  {\n    Span<float, 16> span(d_vec.data().get(), d_vec.data().get() + 16);\n    TestFromOtherKernelConst<<<1, 16>>>(span);\n  }\n}\n\nTEST(GPUSpan, Assignment) {\n  dh::safe_cuda(cudaSetDevice(0));\n  TestStatus status;\n  dh::LaunchN(16, TestAssignment{status.Data()});\n  ASSERT_EQ(status.Get(), 1);\n}\n\nTEST(GPUSpan, TestStatus) {\n  dh::safe_cuda(cudaSetDevice(0));\n  TestStatus status;\n  dh::LaunchN(16, TestTestStatus{status.Data()});\n  ASSERT_EQ(status.Get(), -1);\n}\n\ntemplate <typename T>\nstruct TestEqual {\n private:\n  T *lhs_, *rhs_;\n  int *status_;\n\n public:\n  TestEqual(T* _lhs, T* _rhs, int * _status) :\n      lhs_(_lhs), rhs_(_rhs), status_(_status) {}\n\n  XGBOOST_DEVICE void operator()(size_t _idx) {\n    bool res = lhs_[_idx] == rhs_[_idx];\n    SPAN_ASSERT_TRUE(res, status_);\n  }\n};\n\nTEST(GPUSpan, WithTrust) {\n  dh::safe_cuda(cudaSetDevice(0));\n  // Not adviced to initialize span with host_vector, since h_vec.data() is\n  // a host function.\n  thrust::host_vector<float> h_vec (16);\n  std::iota(h_vec.begin(), h_vec.end(), 0);\n\n  thrust::device_vector<float> d_vec (h_vec.size());\n  thrust::copy(h_vec.begin(), h_vec.end(), d_vec.begin());\n\n  // Can't initialize span with device_vector, since d_vec.data() is not raw\n  // pointer\n  {\n    Span<float> s (d_vec.data().get(), d_vec.size());\n\n    ASSERT_EQ(d_vec.size(), s.size());\n    ASSERT_EQ(d_vec.data().get(), s.data());\n  }\n\n  {\n    TestStatus status;\n    thrust::device_vector<float> d_vec1 (d_vec.size());\n    thrust::copy(thrust::device, d_vec.begin(), d_vec.end(), d_vec1.begin());\n    Span<float> s (d_vec1.data().get(), d_vec.size());\n\n    dh::LaunchN(16, TestEqual<float>{\n        thrust::raw_pointer_cast(d_vec1.data()),\n        s.data(), status.Data()});\n    ASSERT_EQ(status.Get(), 1);\n\n    // FIXME(trivialfis): memory error!\n    // bool res = thrust::equal(thrust::device,\n    //                          d_vec.begin(), d_vec.end(),\n    //                          s.begin());\n  }\n}\n\nTEST(GPUSpan, BeginEnd) {\n  dh::safe_cuda(cudaSetDevice(0));\n  TestStatus status;\n  dh::LaunchN(16, TestBeginEnd{status.Data()});\n  ASSERT_EQ(status.Get(), 1);\n}\n\nTEST(GPUSpan, RBeginREnd) {\n  dh::safe_cuda(cudaSetDevice(0));\n  TestStatus status;\n  dh::LaunchN(16, TestRBeginREnd{status.Data()});\n  ASSERT_EQ(status.Get(), 1);\n}\n\n__global__ void TestModifyKernel(Span<float> span) {\n  size_t idx = threadIdx.x + blockIdx.x * blockDim.x;\n\n  if (idx >= span.size()) {\n    return;\n  }\n  span[idx] = span.size() - idx;\n}\n\nTEST(GPUSpan, Modify) {\n  thrust::host_vector<float> h_vec (16);\n  InitializeRange(h_vec.begin(), h_vec.end());\n\n  thrust::device_vector<float> d_vec (h_vec.size());\n  thrust::copy(h_vec.begin(), h_vec.end(), d_vec.begin());\n\n  Span<float> span (d_vec.data().get(), d_vec.size());\n\n  TestModifyKernel<<<1, 16>>>(span);\n\n  for (size_t i = 0; i < d_vec.size(); ++i) {\n    ASSERT_EQ(d_vec[i], d_vec.size() - i);\n  }\n}\n\nTEST(GPUSpan, Observers) {\n  dh::safe_cuda(cudaSetDevice(0));\n  TestStatus status;\n  dh::LaunchN(16, TestObservers{status.Data()});\n  ASSERT_EQ(status.Get(), 1);\n}\n\nTEST(GPUSpan, Compare) {\n  dh::safe_cuda(cudaSetDevice(0));\n  TestStatus status;\n  dh::LaunchN(16, TestIterCompare{status.Data()});\n  ASSERT_EQ(status.Get(), 1);\n}\n\nstruct TestElementAccess {\n private:\n  Span<float> span_;\n\n public:\n  XGBOOST_DEVICE explicit TestElementAccess (Span<float> _span) : span_(_span) {}\n\n  XGBOOST_DEVICE float operator()(size_t _idx) {\n    float tmp = span_[_idx];\n    return tmp;\n  }\n};\n\nTEST(GPUSpanDeathTest, ElementAccess) {\n  dh::safe_cuda(cudaSetDevice(0));\n  auto test_element_access = []() {\n    thrust::host_vector<float> h_vec (16);\n    InitializeRange(h_vec.begin(), h_vec.end());\n\n    thrust::device_vector<float> d_vec (h_vec.size());\n    thrust::copy(h_vec.begin(), h_vec.end(), d_vec.begin());\n\n    Span<float> span (d_vec.data().get(), d_vec.size());\n    dh::LaunchN(17, TestElementAccess{span});\n  };\n\n  testing::internal::CaptureStdout();\n  EXPECT_DEATH(test_element_access(), \"\");\n  std::string output = testing::internal::GetCapturedStdout();\n}\n\n__global__ void TestFirstDynamicKernel(Span<float> _span) {\n  _span.first<static_cast<Span<float>::index_type>(-1)>();\n}\n__global__ void TestFirstStaticKernel(Span<float> _span) {\n  _span.first(static_cast<Span<float>::index_type>(-1));\n}\n__global__ void TestLastDynamicKernel(Span<float> _span) {\n  _span.last<static_cast<Span<float>::index_type>(-1)>();\n}\n__global__ void TestLastStaticKernel(Span<float> _span) {\n  _span.last(static_cast<Span<float>::index_type>(-1));\n}\n\nTEST(GPUSpanDeathTest, FirstLast) {\n  // We construct vectors multiple times since thrust can not recover from\n  // death test.\n  auto lambda_first_dy = []() {\n    thrust::host_vector<float> h_vec (4);\n    InitializeRange(h_vec.begin(), h_vec.end());\n\n    thrust::device_vector<float> d_vec (h_vec.size());\n    thrust::copy(h_vec.begin(), h_vec.end(), d_vec.begin());\n\n    Span<float> span (d_vec.data().get(), d_vec.size());\n    TestFirstDynamicKernel<<<1, 1>>>(span);\n  };\n  testing::internal::CaptureStdout();\n  EXPECT_DEATH(lambda_first_dy(), \"\");\n  std::string output = testing::internal::GetCapturedStdout();\n\n  auto lambda_first_static = []() {\n    thrust::host_vector<float> h_vec (4);\n    InitializeRange(h_vec.begin(), h_vec.end());\n\n    thrust::device_vector<float> d_vec (h_vec.size());\n    thrust::copy(h_vec.begin(), h_vec.end(), d_vec.begin());\n\n    Span<float> span (d_vec.data().get(), d_vec.size());\n    TestFirstStaticKernel<<<1, 1>>>(span);\n  };\n  testing::internal::CaptureStdout();\n  EXPECT_DEATH(lambda_first_static(), \"\");\n  output = testing::internal::GetCapturedStdout();\n\n  auto lambda_last_dy = []() {\n    thrust::host_vector<float> h_vec (4);\n    InitializeRange(h_vec.begin(), h_vec.end());\n\n    thrust::device_vector<float> d_vec (h_vec.size());\n    thrust::copy(h_vec.begin(), h_vec.end(), d_vec.begin());\n\n    Span<float> span (d_vec.data().get(), d_vec.size());\n    TestLastDynamicKernel<<<1, 1>>>(span);\n  };\n  testing::internal::CaptureStdout();\n  EXPECT_DEATH(lambda_last_dy(), \"\");\n  output = testing::internal::GetCapturedStdout();\n\n  auto lambda_last_static = []() {\n    thrust::host_vector<float> h_vec (4);\n    InitializeRange(h_vec.begin(), h_vec.end());\n\n    thrust::device_vector<float> d_vec (h_vec.size());\n    thrust::copy(h_vec.begin(), h_vec.end(), d_vec.begin());\n\n    Span<float> span (d_vec.data().get(), d_vec.size());\n    TestLastStaticKernel<<<1, 1>>>(span);\n  };\n  testing::internal::CaptureStdout();\n  EXPECT_DEATH(lambda_last_static(), \"\");\n  output = testing::internal::GetCapturedStdout();\n}\n\nnamespace {\nvoid TestFrontBack() {\n  Span<float> s;\n  EXPECT_DEATH(\n      {\n        // make sure the termination happens inside this test.\n        try {\n          dh::LaunchN(1, [=] __device__(size_t) { s.front(); });\n          dh::safe_cuda(cudaDeviceSynchronize());\n          dh::safe_cuda(cudaGetLastError());\n        } catch (dmlc::Error const& e) {\n          std::terminate();\n        }\n      },\n      \"\");\n  EXPECT_DEATH(\n      {\n        try {\n          dh::LaunchN(1, [=] __device__(size_t) { s.back(); });\n          dh::safe_cuda(cudaDeviceSynchronize());\n          dh::safe_cuda(cudaGetLastError());\n        } catch (dmlc::Error const& e) {\n          std::terminate();\n        }\n      },\n      \"\");\n}\n}  // namespace\n\nTEST(GPUSpanDeathTest, FrontBack) {\n  TestFrontBack();\n}\n\n__global__ void TestSubspanDynamicKernel(Span<float> _span) {\n  _span.subspan(16, 0);\n}\n__global__ void TestSubspanStaticKernel(Span<float> _span) {\n  _span.subspan<16>();\n}\nTEST(GPUSpanDeathTest, Subspan) {\n  auto lambda_subspan_dynamic = []() {\n    thrust::host_vector<float> h_vec (4);\n    InitializeRange(h_vec.begin(), h_vec.end());\n\n    thrust::device_vector<float> d_vec (h_vec.size());\n    thrust::copy(h_vec.begin(), h_vec.end(), d_vec.begin());\n\n    Span<float> span (d_vec.data().get(), d_vec.size());\n    TestSubspanDynamicKernel<<<1, 1>>>(span);\n  };\n  testing::internal::CaptureStdout();\n  EXPECT_DEATH(lambda_subspan_dynamic(), \"\");\n  std::string output = testing::internal::GetCapturedStdout();\n\n  auto lambda_subspan_static = []() {\n    thrust::host_vector<float> h_vec (4);\n    InitializeRange(h_vec.begin(), h_vec.end());\n\n    thrust::device_vector<float> d_vec (h_vec.size());\n    thrust::copy(h_vec.begin(), h_vec.end(), d_vec.begin());\n\n    Span<float> span (d_vec.data().get(), d_vec.size());\n    TestSubspanStaticKernel<<<1, 1>>>(span);\n  };\n  testing::internal::CaptureStdout();\n  EXPECT_DEATH(lambda_subspan_static(), \"\");\n  output = testing::internal::GetCapturedStdout();\n}\n\nTEST(GPUSpanIter, Construct) {\n  dh::safe_cuda(cudaSetDevice(0));\n  TestStatus status;\n  dh::LaunchN(16, TestIterConstruct{status.Data()});\n  ASSERT_EQ(status.Get(), 1);\n}\n\nTEST(GPUSpanIter, Ref) {\n  dh::safe_cuda(cudaSetDevice(0));\n  TestStatus status;\n  dh::LaunchN(16, TestIterRef{status.Data()});\n  ASSERT_EQ(status.Get(), 1);\n}\n\nTEST(GPUSpanIter, Calculate) {\n  dh::safe_cuda(cudaSetDevice(0));\n  TestStatus status;\n  dh::LaunchN(16, TestIterCalculate{status.Data()});\n  ASSERT_EQ(status.Get(), 1);\n}\n\nTEST(GPUSpanIter, Compare) {\n  dh::safe_cuda(cudaSetDevice(0));\n  TestStatus status;\n  dh::LaunchN(16, TestIterCompare{status.Data()});\n  ASSERT_EQ(status.Get(), 1);\n}\n\nTEST(GPUSpan, AsBytes) {\n  dh::safe_cuda(cudaSetDevice(0));\n  TestStatus status;\n  dh::LaunchN(16, TestAsBytes{status.Data()});\n  ASSERT_EQ(status.Get(), 1);\n}\n\nTEST(GPUSpan, AsWritableBytes) {\n  dh::safe_cuda(cudaSetDevice(0));\n  TestStatus status;\n  dh::LaunchN(16, TestAsWritableBytes{status.Data()});\n  ASSERT_EQ(status.Get(), 1);\n}\n\n}  // namespace common\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/common/test_span.h",
    "content": "/*!\n * Copyright 2018 XGBoost contributors\n */\n#ifndef XGBOOST_TEST_SPAN_H_\n#define XGBOOST_TEST_SPAN_H_\n\n#include <xgboost/base.h>\n#include <xgboost/span.h>\n\ntemplate <typename Iter>\nXGBOOST_DEVICE void InitializeRange(Iter _begin, Iter _end) {\n  float j = 0;\n  for (Iter i = _begin; i != _end; ++i, ++j) {\n    *i = j;\n  }\n}\n\nnamespace xgboost {\nnamespace common {\n\n#define SPAN_ASSERT_TRUE(cond, status)          \\\n  if (!(cond)) {                                \\\n    *(status) = -1;                             \\\n  }\n\n#define SPAN_ASSERT_FALSE(cond, status)         \\\n  if ((cond)) {                                 \\\n    *(status) = -1;                             \\\n  }\n\nstruct TestTestStatus {\n  int * status_;\n\n  TestTestStatus(int* _status): status_(_status) {}\n\n  XGBOOST_DEVICE void operator()() {\n    this->operator()(0);\n  }\n  XGBOOST_DEVICE void operator()(size_t) {  // size_t for CUDA index\n    SPAN_ASSERT_TRUE(false, status_);\n  }\n};\n\nstruct TestAssignment {\n  int* status_;\n\n  TestAssignment(int* _status) : status_(_status) {}\n\n  XGBOOST_DEVICE void operator()() {\n    this->operator()(0);\n  }\n  XGBOOST_DEVICE void operator()(size_t) {  // size_t for CUDA index\n    Span<float> s1;\n\n    float arr[] = {3, 4, 5};\n\n    Span<const float> s2 = arr;\n    SPAN_ASSERT_TRUE(s2.size() == 3, status_);\n    SPAN_ASSERT_TRUE(s2.data() == &arr[0], status_);\n\n    s2 = s1;\n    SPAN_ASSERT_TRUE(s2.empty(), status_);\n  }\n};\n\nstruct TestBeginEnd {\n  int* status_;\n\n  TestBeginEnd(int* _status) : status_(_status) {}\n\n  XGBOOST_DEVICE void operator()() {\n    this->operator()(0);\n  }\n  XGBOOST_DEVICE void operator()(size_t) {  // size_t for CUDA index\n    float arr[16];\n    InitializeRange(arr, arr + 16);\n\n    Span<float> s (arr);\n    Span<float>::iterator beg { s.begin() };\n    Span<float>::iterator end { s.end() };\n\n    SPAN_ASSERT_TRUE(end ==  beg + 16, status_);\n    SPAN_ASSERT_TRUE(*beg == arr[0], status_);\n    SPAN_ASSERT_TRUE(*(end - 1) == arr[15], status_);\n  }\n};\n\nstruct TestRBeginREnd {\n  int * status_;\n\n  TestRBeginREnd(int* _status): status_(_status) {}\n\n  XGBOOST_DEVICE void operator()() {\n    this->operator()(0);\n  }\n  XGBOOST_DEVICE void operator()(size_t) {  // size_t for CUDA index\n    float arr[16];\n    InitializeRange(arr, arr + 16);\n\n    Span<float> s (arr);\n\n#if defined(__CUDA_ARCH__)\n    auto rbeg = dh::trbegin(s);\n    auto rend = dh::trend(s);\n#else\n    Span<float>::reverse_iterator rbeg{s.rbegin()};\n    Span<float>::reverse_iterator rend{s.rend()};\n#endif\n\n    SPAN_ASSERT_TRUE(rbeg + 16 == rend, status_);\n    SPAN_ASSERT_TRUE(*(rbeg) == arr[15], status_);\n    SPAN_ASSERT_TRUE(*(rend - 1) == arr[0], status_);\n  }\n};\n\nstruct TestObservers {\n  int * status_;\n\n  TestObservers(int * _status): status_(_status) {}\n\n  XGBOOST_DEVICE void operator()() {\n    this->operator()(0);\n  }\n  XGBOOST_DEVICE void operator()(size_t) {  // size_t for CUDA index\n    // empty\n    {\n      float *arr = nullptr;\n      Span<float> s(arr, static_cast<Span<float>::index_type>(0));\n      SPAN_ASSERT_TRUE(s.empty(), status_);\n    }\n\n    // size, size_types\n    {\n      float* arr = new float[16];\n      Span<float> s (arr, 16);\n      SPAN_ASSERT_TRUE(s.size() == 16, status_);\n      SPAN_ASSERT_TRUE(s.size_bytes() == 16 * sizeof(float), status_);\n      delete [] arr;\n    }\n  }\n};\n\nstruct TestCompare {\n  int * status_;\n\n  TestCompare(int * _status): status_(_status) {}\n\n  XGBOOST_DEVICE void operator()() {\n    this->operator()(0);\n  }\n  XGBOOST_DEVICE void operator()(size_t) {  // size_t for CUDA index\n    float lhs_arr[16], rhs_arr[16];\n    InitializeRange(lhs_arr, lhs_arr + 16);\n    InitializeRange(rhs_arr, rhs_arr + 16);\n\n    Span<float> lhs(lhs_arr);\n    Span<float> rhs(rhs_arr);\n\n    SPAN_ASSERT_TRUE(lhs == rhs, status_);\n    SPAN_ASSERT_FALSE(lhs != rhs, status_);\n\n    SPAN_ASSERT_TRUE(lhs <= rhs, status_);\n    SPAN_ASSERT_TRUE(lhs >= rhs, status_);\n\n    lhs[2] -= 1;\n\n    SPAN_ASSERT_FALSE(lhs == rhs, status_);\n    SPAN_ASSERT_TRUE(lhs < rhs, status_);\n    SPAN_ASSERT_FALSE(lhs > rhs, status_);\n  }\n};\n\nstruct TestIterConstruct {\n  int * status_;\n\n  TestIterConstruct(int * _status): status_(_status) {}\n\n  XGBOOST_DEVICE void operator()() {\n    this->operator()(0);\n  }\n  XGBOOST_DEVICE void operator()(size_t) {  // size_t for CUDA index.\n    Span<float>::iterator it1;\n    Span<float>::iterator it2;\n    SPAN_ASSERT_TRUE(it1 == it2, status_);\n\n    Span<float>::const_iterator cit1;\n    Span<float>::const_iterator cit2;\n    SPAN_ASSERT_TRUE(cit1 == cit2, status_);\n  }\n};\n\nstruct TestIterRef {\n  int * status_;\n\n  TestIterRef(int * _status): status_(_status) {}\n\n  XGBOOST_DEVICE void operator()() {\n    this->operator()(0);\n  }\n  XGBOOST_DEVICE void operator()(size_t) {  // size_t for CUDA index\n    float arr[16];\n    InitializeRange(arr, arr + 16);\n\n    Span<float> s (arr);\n    SPAN_ASSERT_TRUE(*(s.begin()) == s[0], status_);\n    SPAN_ASSERT_TRUE(*(s.end() - 1) == s[15], status_);\n  }\n};\n\nstruct TestIterCalculate {\n  int * status_;\n\n  TestIterCalculate(int * _status): status_(_status) {}\n\n  XGBOOST_DEVICE void operator()() {\n    this->operator()(0);\n  }\n  XGBOOST_DEVICE void operator()(size_t) {  // size_t for CUDA index\n    float arr[16];\n    InitializeRange(arr, arr + 16);\n\n    Span<float> s (arr);\n    Span<float>::iterator beg { s.begin() };\n\n    beg += 4;\n    SPAN_ASSERT_TRUE(*beg == 4, status_);\n\n    beg -= 2;\n    SPAN_ASSERT_TRUE(*beg == 2, status_);\n\n    ++beg;\n    SPAN_ASSERT_TRUE(*beg == 3, status_);\n\n    --beg;\n    SPAN_ASSERT_TRUE(*beg == 2, status_);\n\n    beg++;\n    beg--;\n    SPAN_ASSERT_TRUE(*beg == 2, status_);\n  }\n};\n\nstruct TestIterCompare {\n  int * status_;\n\n  TestIterCompare(int * _status): status_(_status) {}\n\n  XGBOOST_DEVICE void operator()() {\n    this->operator()(0);\n  }\n  XGBOOST_DEVICE void operator()(size_t) {  // size_t for CUDA index\n    float arr[16];\n    InitializeRange(arr, arr + 16);\n    Span<float> s (arr);\n    Span<float>::iterator left { s.begin() };\n    Span<float>::iterator right { s.end() };\n\n    left += 1;\n    right -= 15;\n\n    SPAN_ASSERT_TRUE(left == right, status_);\n\n    SPAN_ASSERT_TRUE(left >= right, status_);\n    SPAN_ASSERT_TRUE(left <= right, status_);\n\n    ++right;\n    SPAN_ASSERT_TRUE(right > left, status_);\n    SPAN_ASSERT_TRUE(left < right, status_);\n    SPAN_ASSERT_TRUE(left <= right, status_);\n  }\n};\n\nstruct TestAsBytes {\n  int * status_;\n\n  TestAsBytes(int * _status): status_(_status) {}\n\n  XGBOOST_DEVICE void operator()() {\n    this->operator()(0);\n  }\n  XGBOOST_DEVICE void operator()(size_t) {  // size_t for CUDA index\n    float arr[16];\n    InitializeRange(arr, arr + 16);\n\n    {\n      const Span<const float> s {arr};\n      const Span<const byte> bs = as_bytes(s);\n      SPAN_ASSERT_TRUE(bs.size() == s.size_bytes(), status_);\n      SPAN_ASSERT_TRUE(static_cast<const void*>(bs.data()) ==\n                       static_cast<const void*>(s.data()),\n                       status_);\n    }\n\n    {\n      Span<float> s;\n      const Span<const byte> bs = as_bytes(s);\n      SPAN_ASSERT_TRUE(bs.size() == s.size(), status_);\n      SPAN_ASSERT_TRUE(bs.size() == 0, status_);\n      SPAN_ASSERT_TRUE(bs.size_bytes() == 0, status_);\n      SPAN_ASSERT_TRUE(static_cast<const void*>(bs.data()) ==\n                       static_cast<const void*>(s.data()),\n                       status_);\n      SPAN_ASSERT_TRUE(bs.data() == nullptr, status_);\n    }\n  }\n};\n\nstruct TestAsWritableBytes {\n  int * status_;\n\n  TestAsWritableBytes(int * _status): status_(_status) {}\n\n  XGBOOST_DEVICE void operator()() {\n    this->operator()(0);\n  }\n  XGBOOST_DEVICE void operator()(size_t) {  // size_t for CUDA index\n    float arr[16];\n    InitializeRange(arr, arr + 16);\n\n    {\n      Span<float> s;\n      Span<byte> bs = as_writable_bytes(s);\n      SPAN_ASSERT_TRUE(bs.size() == s.size(), status_);\n      SPAN_ASSERT_TRUE(bs.size_bytes() == s.size_bytes(), status_);\n      SPAN_ASSERT_TRUE(bs.size() == 0, status_);\n      SPAN_ASSERT_TRUE(bs.size_bytes() == 0, status_);\n      SPAN_ASSERT_TRUE(bs.data() == nullptr, status_);\n      SPAN_ASSERT_TRUE(static_cast<void*>(bs.data()) ==\n                       static_cast<void*>(s.data()), status_);\n    }\n\n    {\n      Span<float> s { arr };\n      Span<byte> bs { as_writable_bytes(s) };\n      SPAN_ASSERT_TRUE(s.size_bytes() == bs.size_bytes(), status_);\n      SPAN_ASSERT_TRUE(static_cast<void*>(bs.data()) ==\n                       static_cast<void*>(s.data()), status_);\n    }\n  }\n};\n\n}  // namespace common\n}  // namespace xgboost\n\n#endif\n"
  },
  {
    "path": "tests/cpp/common/test_stats.cc",
    "content": "/**\n * Copyright 2022-2025, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/context.h>\n#include <xgboost/linalg.h>  // Tensor,Vector\n\n#include <algorithm>  // for min\n#include <thread>     // for thread\n\n#include \"../../../src/common/linalg_op.h\"  // for begin, end\n#include \"../../../src/common/stats.h\"\n#include \"../../../src/common/transform_iterator.h\"  // common::MakeIndexTransformIter\n#include \"../collective/test_worker.h\"\n#include \"../helpers.h\"\n\nnamespace xgboost::common {\nTEST(Stats, Quantile) {\n  Context ctx;\n  {\n    linalg::Tensor<float, 1> arr({20.f, 0.f, 15.f, 50.f, 40.f, 0.f, 35.f}, {7}, DeviceOrd::CPU());\n    std::vector<size_t> index{0, 2, 3, 4, 6};\n    auto h_arr = arr.HostView();\n    auto beg = MakeIndexTransformIter([&](size_t i) { return h_arr(index[i]); });\n    auto end = beg + index.size();\n    auto q = Quantile(&ctx, 0.40f, beg, end);\n    ASSERT_EQ(q, 26.0);\n\n    q = Quantile(&ctx, 0.20f, beg, end);\n    ASSERT_EQ(q, 16.0);\n\n    q = Quantile(&ctx, 0.10f, beg, end);\n    ASSERT_EQ(q, 15.0);\n  }\n\n  {\n    std::vector<float> vec{1., 2., 3., 4., 5.};\n    auto beg = MakeIndexTransformIter([&](size_t i) { return vec[i]; });\n    auto end = beg + vec.size();\n    auto q = Quantile(&ctx, 0.5f, beg, end);\n    ASSERT_EQ(q, 3.);\n  }\n}\n\nTEST(Stats, WeightedQuantile) {\n  Context ctx;\n  linalg::Tensor<float, 1> arr({1.f, 2.f, 3.f, 4.f, 5.f}, {5}, DeviceOrd::CPU());\n  linalg::Tensor<float, 1> weight({1.f, 1.f, 1.f, 1.f, 1.f}, {5}, DeviceOrd::CPU());\n\n  auto h_arr = arr.HostView();\n  auto h_weight = weight.HostView();\n\n  auto beg = MakeIndexTransformIter([&](size_t i) { return h_arr(i); });\n  auto end = beg + arr.Size();\n  auto w = MakeIndexTransformIter([&](size_t i) { return h_weight(i); });\n\n  auto q = WeightedQuantile(&ctx, 0.50f, beg, end, w);\n  ASSERT_EQ(q, 3);\n\n  q = WeightedQuantile(&ctx, 0.0, beg, end, w);\n  ASSERT_EQ(q, 1);\n\n  q = WeightedQuantile(&ctx, 1.0, beg, end, w);\n  ASSERT_EQ(q, 5);\n}\n\nTEST(Stats, Median) {\n  Context ctx;\n\n  {\n    linalg::Tensor<float, 2> values{{.0f, .0f, 1.f, 2.f}, {4}, DeviceOrd::CPU()};\n    HostDeviceVector<float> weights;\n    linalg::Tensor<float, 1> out;\n    Median(&ctx, values, weights, &out);\n    auto m = out(0);\n    ASSERT_EQ(m, .5f);\n\n#if defined(XGBOOST_USE_CUDA)\n    ctx = ctx.MakeCUDA(0);\n    ASSERT_FALSE(ctx.IsCPU());\n    Median(&ctx, values, weights, &out);\n    m = out(0);\n    ASSERT_EQ(m, .5f);\n#endif  // defined(XGBOOST_USE_CUDA)\n  }\n\n  {\n    ctx = ctx.MakeCPU();\n    // 4x2 matrix\n    linalg::Tensor<float, 2> values{{0.f, 0.f, 0.f, 0.f, 1.f, 1.f, 2.f, 2.f}, {4, 2}, ctx.Device()};\n    HostDeviceVector<float> weights;\n    linalg::Tensor<float, 1> out;\n    Median(&ctx, values, weights, &out);\n    ASSERT_EQ(out(0), .5f);\n    ASSERT_EQ(out(1), .5f);\n\n#if defined(XGBOOST_USE_CUDA)\n    ctx = ctx.MakeCUDA(0);\n    Median(&ctx, values, weights, &out);\n    ASSERT_EQ(out(0), .5f);\n    ASSERT_EQ(out(1), .5f);\n#endif  // defined(XGBOOST_USE_CUDA)\n  }\n}\n\nnamespace {\nvoid TestMean(Context const* ctx) {\n  std::size_t n{128};\n  linalg::Vector<float> data({n}, ctx->Device());\n  auto h_v = data.HostView().Values();\n  std::iota(h_v.begin(), h_v.end(), .0f);\n\n  auto nf = static_cast<float>(n);\n  float mean = nf * (nf - 1) / 2 / n;\n\n  linalg::Vector<float> res{{1}, ctx->Device()};\n  Mean(ctx, data.View(ctx->Device()), &res);\n  auto h_res = res.HostView();\n  ASSERT_EQ(h_res.Size(), 1);\n  ASSERT_EQ(mean, h_res(0));\n}\n}  // anonymous namespace\n\nTEST(Stats, Mean) {\n  Context ctx;\n  TestMean(&ctx);\n}\n\n#if defined(XGBOOST_USE_CUDA)\nTEST(Stats, GpuMean) {\n  auto ctx = MakeCUDACtx(0);\n  TestMean(&ctx);\n}\n#endif  // defined(XGBOOST_USE_CUDA)\n\nnamespace {\nvoid TestSampleMean(Context const* ctx) {\n  std::size_t m{32}, n{16};\n  linalg::Matrix<float> data({m, n}, ctx->Device());\n  auto h_data = data.HostView();\n  std::iota(linalg::begin(h_data), linalg::end(h_data), .0f);\n  linalg::Vector<float> mean;\n  SampleMean(ctx, false, data, &mean);\n  ASSERT_FLOAT_EQ(mean(0), 248.0f);\n  for (std::size_t i = 1; i < mean.Size(); ++i) {\n    ASSERT_EQ(mean(i), mean(i - 1) + 1.0f);\n  }\n}\n\nvoid TestSampleMeanDistributed(Context const* ctx) {\n  std::size_t m{32}, n{16};\n  auto device = ctx->Device();\n  std::int32_t n_workers =\n      device.IsCPU() ? std::min(4u, std::thread::hardware_concurrency()) : curt::AllVisibleGPUs();\n  collective::TestDistributedGlobal(n_workers, [m, n, device, n_workers] {\n    auto rank = collective::GetRank();\n    Context ctx = device.IsCUDA() ? MakeCUDACtx(DistGpuIdx()) : Context{};\n    collective::GetWorkerLocalThreads(collective::GetWorldSize(), &ctx);\n    linalg::Matrix<float> data({m, n}, ctx.Device());\n    auto h_data = data.HostView();\n    for (std::size_t i = 0; i < m; ++i) {\n      for (std::size_t j = 0; j < n; ++j) {\n        h_data(i, j) = i + (m * rank) + j;\n      }\n    }\n    linalg::Vector<float> mean;\n    SampleMean(&ctx, false, data, &mean);\n    ASSERT_EQ(mean.Size(), n);\n    double total = n_workers * m;\n    for (std::size_t i = 0; i < n; ++i) {\n      ASSERT_EQ(mean(i), (i + total - 1.0 + i) * total / 2.0 / total);\n    }\n  });\n}\n\nvoid TestWeightedSampleMean(Context const* ctx) {\n  std::size_t m{32}, n{16};\n  {\n    auto data = linalg::Constant(ctx, 1.0f, m, n);\n    HostDeviceVector<float> w{m, 0.0f, ctx->Device()};\n    auto h_w = w.HostSpan();\n    std::iota(h_w.data(), h_w.data() + h_w.size(), 1.0f);\n    linalg::Vector<float> mean;\n    WeightedSampleMean(ctx, false, data, w, &mean);\n    for (auto v : mean.HostView()) {\n      ASSERT_FLOAT_EQ(v, 1.0f);\n    }\n  }\n  {\n    linalg::Matrix<float> data({m, n}, ctx->Device());\n    auto h_data = data.HostView();\n    std::iota(linalg::begin(h_data), linalg::end(h_data), .0f);\n    HostDeviceVector<float> w{m, 1.0f, ctx->Device()};\n    linalg::Vector<float> mean;\n    WeightedSampleMean(ctx, false, data, w, &mean);\n    ASSERT_FLOAT_EQ(mean(0), 248.0f);\n    for (std::size_t i = 1; i < mean.Size(); ++i) {\n      ASSERT_EQ(mean(i), mean(i - 1) + 1.0f);\n    }\n  }\n}\n\nvoid TestWeightedSampleMeanDistributed(Context const* ctx) {\n  std::size_t m{32}, n{16};\n  auto device = ctx->Device();\n  std::int32_t n_workers =\n      device.IsCPU() ? std::min(4u, std::thread::hardware_concurrency()) : curt::AllVisibleGPUs();\n\n  collective::TestDistributedGlobal(n_workers, [m, n, device, n_workers] {\n    auto rank = collective::GetRank();\n    Context ctx = device.IsCUDA() ? MakeCUDACtx(DistGpuIdx()) : Context{};\n    collective::GetWorkerLocalThreads(collective::GetWorldSize(), &ctx);\n    linalg::Matrix<float> data({m, n}, ctx.Device());\n    auto h_data = data.HostView();\n    for (std::size_t i = 0; i < m; ++i) {\n      for (std::size_t j = 0; j < n; ++j) {\n        h_data(i, j) = i + (m * rank) + j;\n      }\n    }\n    HostDeviceVector<float> w{m, 1.0f, ctx.Device()};\n    linalg::Vector<float> mean;\n    WeightedSampleMean(&ctx, false, data, w, &mean);\n    ASSERT_EQ(mean.Size(), n);\n    double total = n_workers * m;\n    for (std::size_t i = 0; i < n; ++i) {\n      ASSERT_EQ(mean(i), (i + total - 1.0 + i) * total / 2.0 / total);\n    }\n  });\n}\n}  // namespace\n\nTEST(Stats, SampleMean) {\n  Context ctx;\n  TestSampleMean(&ctx);\n}\n\nTEST(Stats, SampleMeanDist) {\n  Context ctx;\n  TestSampleMeanDistributed(&ctx);\n}\n\n\nTEST(Stats, WeightedSampleMean) {\n  Context ctx;\n  TestWeightedSampleMean(&ctx);\n}\n\nTEST(Stats, WeightedSampleMeanDist) {\n  Context ctx;\n  TestWeightedSampleMeanDistributed(&ctx);\n}\n\n#if defined(XGBOOST_USE_CUDA)\nTEST(Stats, GpuSampleMean) {\n  auto ctx = MakeCUDACtx(0);\n  TestSampleMean(&ctx);\n}\n\n#if defined(XGBOOST_USE_NCCL)\nTEST(Stats, MGPUSampleMeanDist) {\n  auto ctx = MakeCUDACtx(0);\n  TestSampleMeanDistributed(&ctx);\n}\n#endif  // defined(XGBOOST_USE_NCCL)\n\nTEST(Stats, GpuWeightedSampleMean) {\n  auto ctx = MakeCUDACtx(0);\n  TestWeightedSampleMean(&ctx);\n}\n\n#if defined(XGBOOST_USE_NCCL)\nTEST(Stats, MGPUWeightedSampleMeanDist) {\n  auto ctx = MakeCUDACtx(0);\n  TestWeightedSampleMeanDistributed(&ctx);\n}\n#endif  // defined(XGBOOST_USE_NCCL)\n#endif  // defined(XGBOOST_USE_CUDA)\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/common/test_stats.cu",
    "content": "/**\n * Copyright 2022-2025, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n\n#include <cstddef>  // std::size_t\n#include <utility>  // std::pair\n#include <vector>   // std::vector\n\n#include \"../../../src/common/linalg_op.cuh\"  // ElementWiseTransformKernel\n#include \"../../../src/common/stats.cuh\"\n#include \"../helpers.h\"\n#include \"xgboost/base.h\"                // XGBOOST_DEVICE\n#include \"xgboost/context.h\"             // Context\n#include \"xgboost/host_device_vector.h\"  // HostDeviceVector\n#include \"xgboost/linalg.h\"              // Tensor\n\nnamespace xgboost {\nnamespace common {\nnamespace {\nclass StatsGPU : public ::testing::Test {\n private:\n  linalg::Tensor<float, 1> arr_{{1.f, 2.f, 3.f, 4.f, 5.f, 2.f, 4.f, 5.f, 3.f, 1.f}, {10}, FstCU()};\n  linalg::Tensor<std::size_t, 1> indptr_{{0, 5, 10}, {3}, FstCU()};\n  HostDeviceVector<float> results_;\n  using TestSet = std::vector<std::pair<float, float>>;\n  Context ctx_;\n\n  void Check(float expected) {\n    auto const& h_results = results_.HostVector();\n    ASSERT_EQ(h_results.size(), indptr_.Size() - 1);\n    ASSERT_EQ(h_results.front(), expected);\n    ASSERT_EQ(h_results.back(), expected);\n  }\n\n public:\n  void SetUp() override { ctx_  = MakeCUDACtx(0); }\n\n  void WeightedMulti() {\n    // data for one segment\n    std::vector<float> seg{1.f, 2.f, 3.f, 4.f, 5.f};\n    auto seg_size = seg.size();\n\n    // 3 segments\n    std::vector<float> data;\n    data.insert(data.cend(), seg.begin(), seg.end());\n    data.insert(data.cend(), seg.begin(), seg.end());\n    data.insert(data.cend(), seg.begin(), seg.end());\n    linalg::Tensor<float, 1> arr{data.cbegin(), data.cend(), {data.size()}, FstCU()};\n    auto d_arr = arr.View(DeviceOrd::CUDA(0));\n\n    auto key_it = dh::MakeTransformIterator<std::size_t>(\n        thrust::make_counting_iterator(0ul),\n        [=] XGBOOST_DEVICE(std::size_t i) { return i * seg_size; });\n    auto val_it =\n        dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),\n                                         [=] XGBOOST_DEVICE(std::size_t i) { return d_arr(i); });\n\n    // one alpha for each segment\n    HostDeviceVector<float> alphas{0.0f, 0.5f, 1.0f};\n    alphas.SetDevice(FstCU());\n    auto d_alphas = alphas.ConstDeviceSpan();\n    auto w_it = thrust::make_constant_iterator(0.1f);\n    SegmentedWeightedQuantile(&ctx_, d_alphas.data(), key_it, key_it + d_alphas.size() + 1, val_it,\n                              val_it + d_arr.Size(), w_it, w_it + d_arr.Size(), &results_);\n\n    auto const& h_results = results_.HostVector();\n    ASSERT_EQ(1.0f, h_results[0]);\n    ASSERT_EQ(3.0f, h_results[1]);\n    ASSERT_EQ(5.0f, h_results[2]);\n  }\n\n  void Weighted() {\n    auto d_arr = arr_.View(DeviceOrd::CUDA(0));\n    auto d_key = indptr_.View(DeviceOrd::CUDA(0));\n\n    auto key_it = dh::MakeTransformIterator<std::size_t>(\n        thrust::make_counting_iterator(0ul),\n        [=] XGBOOST_DEVICE(std::size_t i) { return d_key(i); });\n    auto val_it =\n        dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),\n                                         [=] XGBOOST_DEVICE(std::size_t i) { return d_arr(i); });\n    linalg::Tensor<float, 1> weights{{10}, FstCU()};\n    linalg::cuda_impl::TransformIdxKernel(\n        &ctx_, weights.View(DeviceOrd::CUDA(0)),\n        [=] XGBOOST_DEVICE(std::size_t, float) { return 1.0; });\n    auto w_it = weights.Data()->ConstDevicePointer();\n    for (auto const& pair : TestSet{{0.0f, 1.0f}, {0.5f, 3.0f}, {1.0f, 5.0f}}) {\n      SegmentedWeightedQuantile(&ctx_, pair.first, key_it, key_it + indptr_.Size(), val_it,\n                                val_it + arr_.Size(), w_it, w_it + weights.Size(), &results_);\n      this->Check(pair.second);\n    }\n  }\n\n  void NonWeightedMulti() {\n    // data for one segment\n    std::vector<float> seg{20.f, 15.f, 50.f, 40.f, 35.f};\n    auto seg_size = seg.size();\n\n    // 3 segments\n    std::vector<float> data;\n    data.insert(data.cend(), seg.begin(), seg.end());\n    data.insert(data.cend(), seg.begin(), seg.end());\n    data.insert(data.cend(), seg.begin(), seg.end());\n    linalg::Tensor<float, 1> arr{data.cbegin(), data.cend(), {data.size()}, FstCU()};\n    auto d_arr = arr.View(DeviceOrd::CUDA(0));\n\n    auto key_it = dh::MakeTransformIterator<std::size_t>(\n        thrust::make_counting_iterator(0ul),\n        [=] XGBOOST_DEVICE(std::size_t i) { return i * seg_size; });\n    auto val_it =\n        dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),\n                                         [=] XGBOOST_DEVICE(std::size_t i) { return d_arr(i); });\n\n    // one alpha for each segment\n    HostDeviceVector<float> alphas{0.1f, 0.2f, 0.4f};\n    alphas.SetDevice(FstCU());\n    auto d_alphas = alphas.ConstDeviceSpan();\n    SegmentedQuantile(&ctx_, d_alphas.data(), key_it, key_it + d_alphas.size() + 1, val_it,\n                      val_it + d_arr.Size(), &results_);\n\n    auto const& h_results = results_.HostVector();\n    EXPECT_EQ(15.0f, h_results[0]);\n    EXPECT_EQ(16.0f, h_results[1]);\n    ASSERT_EQ(26.0f, h_results[2]);\n  }\n\n  void NonWeighted() {\n    auto d_arr = arr_.View(DeviceOrd::CUDA(0));\n    auto d_key = indptr_.View(DeviceOrd::CUDA(0));\n\n    auto key_it = dh::MakeTransformIterator<std::size_t>(\n        thrust::make_counting_iterator(0ul), [=] __device__(std::size_t i) { return d_key(i); });\n    auto val_it =\n        dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),\n                                         [=] XGBOOST_DEVICE(std::size_t i) { return d_arr(i); });\n\n    for (auto const& pair : TestSet{{0.0f, 1.0f}, {0.5f, 3.0f}, {1.0f, 5.0f}}) {\n      SegmentedQuantile(&ctx_, pair.first, key_it, key_it + indptr_.Size(), val_it,\n                        val_it + arr_.Size(), &results_);\n      this->Check(pair.second);\n    }\n  }\n};\n}  // anonymous namespace\n\nTEST_F(StatsGPU, Quantile) {\n  this->NonWeighted();\n  this->NonWeightedMulti();\n}\n\nTEST_F(StatsGPU, WeightedQuantile) {\n  this->Weighted();\n  this->WeightedMulti();\n}\n}  // namespace common\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/common/test_string_view.cc",
    "content": "/**\n * Copyright 2021-2023 by XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/string_view.h>\n\n#include <algorithm>  // std::equal\n#include <sstream>    // std::stringstream\n#include <string>     // std::string\n\nnamespace xgboost {\nTEST(StringView, Basic) {\n  StringView str{\"This is a string.\"};\n  std::stringstream ss;\n  ss << str;\n\n  std::string res = ss.str();\n  ASSERT_EQ(str.size(), res.size());\n  ASSERT_TRUE(std::equal(res.cbegin(), res.cend(), str.cbegin()));\n\n  auto substr = str.substr(5, 2);\n  ASSERT_EQ(substr.size(), 2);\n\n  ASSERT_EQ(StringView{\"is\"}.size(), 2);\n  ASSERT_TRUE(substr == \"is\");\n  ASSERT_FALSE(substr != \"is\");\n  ASSERT_FALSE(substr == \"foobar\");\n  ASSERT_FALSE(substr == \"i\");\n\n  ASSERT_TRUE(std::equal(substr.crbegin(), substr.crend(), StringView{\"si\"}.cbegin()));\n\n  {\n    StringView empty{nullptr};\n    ASSERT_TRUE(empty.empty());\n  }\n  {\n    StringView empty{\"\"};\n    ASSERT_TRUE(empty.empty());\n    StringView empty2{nullptr};\n    ASSERT_EQ(empty, empty2);\n  }\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/common/test_survival_util.cc",
    "content": "/*!\n * Copyright (c) by Contributors 2020\n */\n#include <gtest/gtest.h>\n\n#include \"../../../src/common/survival_util.h\"\n\nnamespace xgboost {\nnamespace common {\n\ntemplate <typename Distribution>\ninline static void RobustTestSuite(double y_lower, double y_upper, double sigma) {\n  for (int i = 50; i >= -50; --i) {\n    const double y_pred = std::pow(10.0, static_cast<double>(i));\n    const double z = (std::log(y_lower) - std::log(y_pred)) / sigma;\n    const double gradient\n      = AFTLoss<Distribution>::Gradient(y_lower, y_upper, std::log(y_pred), sigma);\n    const double hessian\n      = AFTLoss<Distribution>::Hessian(y_lower, y_upper, std::log(y_pred), sigma);\n    ASSERT_FALSE(std::isnan(gradient)) << \"z = \" << z << \", y \\\\in [\"\n      << y_lower << \", \" << y_upper << \"], y_pred = \" << y_pred\n      << \", dist = \" << static_cast<int>(Distribution::Type());\n    ASSERT_FALSE(std::isinf(gradient)) << \"z = \" << z << \", y \\\\in [\"\n      << y_lower << \", \" << y_upper << \"], y_pred = \" << y_pred\n      << \", dist = \" << static_cast<int>(Distribution::Type());\n    ASSERT_FALSE(std::isnan(hessian)) << \"z = \" << z << \", y \\\\in [\"\n      << y_lower << \", \" << y_upper << \"], y_pred = \" << y_pred\n      << \", dist = \" << static_cast<int>(Distribution::Type());\n    ASSERT_FALSE(std::isinf(hessian)) << \"z = \" << z << \", y \\\\in [\"\n      << y_lower << \", \" << y_upper << \"], y_pred = \" << y_pred\n      << \", dist = \" << static_cast<int>(Distribution::Type());\n  }\n}\n\nTEST(AFTLoss, RobustGradientPair) {  // Ensure that INF and NAN don't show up in gradient pair\n  RobustTestSuite<NormalDistribution>(16.0, 200.0, 2.0);\n  RobustTestSuite<LogisticDistribution>(16.0, 200.0, 2.0);\n  RobustTestSuite<ExtremeDistribution>(16.0, 200.0, 2.0);\n  RobustTestSuite<NormalDistribution>(100.0, 100.0, 2.0);\n  RobustTestSuite<LogisticDistribution>(100.0, 100.0, 2.0);\n  RobustTestSuite<ExtremeDistribution>(100.0, 100.0, 2.0);\n}\n\n}  // namespace common\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/common/test_threading_utils.cc",
    "content": "/**\n * Copyright 2019-2024, XGBoost Contributors\n */\n#include <dmlc/omp.h>  // for omp_in_parallel\n#include <gtest/gtest.h>\n\n#include <cstddef>  // for std::size_t\n\n#include \"../../../src/common/threading_utils.h\"  // BlockedSpace2d,ParallelFor2d,ParallelFor\n#include \"xgboost/context.h\"                      // Context\n\nnamespace xgboost::common {\nTEST(ParallelFor2d, CreateBlockedSpace2d) {\n  constexpr size_t kDim1 = 5;\n  constexpr size_t kDim2 = 3;\n  constexpr size_t kGrainSize = 1;\n\n  BlockedSpace2d space(\n      kDim1, [&](size_t) { return kDim2; }, kGrainSize);\n\n  ASSERT_EQ(kDim1 * kDim2, space.Size());\n\n  for (size_t i = 0; i < kDim1; i++) {\n    for (size_t j = 0; j < kDim2; j++) {\n      ASSERT_EQ(space.GetFirstDimension(i*kDim2 + j), i);\n      ASSERT_EQ(j, space.GetRange(i*kDim2 + j).begin());\n      ASSERT_EQ(j + kGrainSize, space.GetRange(i*kDim2 + j).end());\n    }\n  }\n}\n\nTEST(ParallelFor2d, Test) {\n  constexpr size_t kDim1 = 100;\n  constexpr size_t kDim2 = 15;\n  constexpr size_t kGrainSize = 2;\n\n  // working space is matrix of size (kDim1 x kDim2)\n  std::vector<int> matrix(kDim1 * kDim2, 0);\n  BlockedSpace2d space(\n      kDim1, [&](size_t) { return kDim2; }, kGrainSize);\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"nthread\", \"4\"}});\n  ASSERT_EQ(ctx.nthread, 4);\n\n  ParallelFor2d(space, ctx.Threads(), [&](size_t i, Range1d r) {\n    for (auto j = r.begin(); j < r.end(); ++j) {\n      matrix[i * kDim2 + j] += 1;\n    }\n  });\n\n  for (size_t i = 0; i < kDim1 * kDim2; i++) {\n    ASSERT_EQ(matrix[i], 1);\n  }\n}\n\nTEST(ParallelFor2d, NonUniform) {\n  constexpr size_t kDim1 = 5;\n  constexpr size_t kGrainSize = 256;\n\n  // here are quite non-uniform distribution in space\n  // but ParallelFor2d should split them by blocks with max size = kGrainSize\n  // and process in balanced manner (optimal performance)\n  std::vector<size_t> dim2 { 1024, 500, 255, 5, 10000 };\n  BlockedSpace2d space(kDim1, [&](size_t i) {\n      return dim2[i];\n  }, kGrainSize);\n\n  std::vector<std::vector<int>> working_space(kDim1);\n  for (size_t i = 0; i < kDim1; i++) {\n    working_space[i].resize(dim2[i], 0);\n  }\n\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"nthread\", \"4\"}});\n  ASSERT_EQ(ctx.nthread, 4);\n\n  ParallelFor2d(space, ctx.Threads(), [&](size_t i, Range1d r) {\n    for (auto j = r.begin(); j < r.end(); ++j) {\n      working_space[i][j] += 1;\n    }\n  });\n\n  for (size_t i = 0; i < kDim1; i++) {\n    for (size_t j = 0; j < dim2[i]; j++) {\n      ASSERT_EQ(working_space[i][j], 1);\n    }\n  }\n}\n\nTEST(ParallelFor, Basic) {\n  Context ctx;\n  std::size_t n{16};\n  auto n_threads = ctx.Threads();\n  ParallelFor(n, n_threads, [&](auto i) {\n    ASSERT_EQ(ctx.Threads(), 1);\n    if (n_threads > 1) {\n      ASSERT_TRUE(omp_in_parallel());\n    }\n    ASSERT_LT(i, n);\n  });\n  ASSERT_FALSE(omp_in_parallel());\n  ParallelFor(n, 1, [&](auto) { ASSERT_FALSE(omp_in_parallel()); });\n}\n\nTEST(OmpGetNumThreads, Max) {\n#if defined(_OPENMP)\n  auto n_threads = OmpGetNumThreads(1 << 18);\n  ASSERT_LE(n_threads, std::thread::hardware_concurrency());  // le due to container\n  n_threads = OmpGetNumThreads(0);\n  ASSERT_GE(n_threads, 1);\n  ASSERT_LE(n_threads, std::thread::hardware_concurrency());\n#endif\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/common/test_threading_utils.cu",
    "content": "/**\n * Copyright 2021-2024, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <thrust/copy.h>  // thrust::copy\n\n#include \"../../../src/common/device_helpers.cuh\"\n#include \"../../../src/common/threading_utils.cuh\"\n#include \"../helpers.h\"  // for MakeCUDACtx\n\nnamespace xgboost::common {\nTEST(SegmentedTrapezoidThreads, Basic) {\n  size_t constexpr kElements = 24, kGroups = 3;\n  auto ctx = MakeCUDACtx(0);\n  dh::device_vector<size_t> offset_ptr(kGroups + 1, 0);\n  offset_ptr[0] = 0;\n  offset_ptr[1] = 8;\n  offset_ptr[2] = 16;\n  offset_ptr[kGroups] = kElements;\n\n  size_t h = 1;\n  dh::device_vector<size_t> thread_ptr(kGroups + 1, 0);\n  size_t total = SegmentedTrapezoidThreads(&ctx, dh::ToSpan(offset_ptr), dh::ToSpan(thread_ptr), h);\n  ASSERT_EQ(total, kElements - kGroups);\n\n  h = 2;\n  SegmentedTrapezoidThreads(&ctx, dh::ToSpan(offset_ptr), dh::ToSpan(thread_ptr), h);\n  std::vector<size_t> h_thread_ptr(thread_ptr.size());\n  thrust::copy(thread_ptr.cbegin(), thread_ptr.cend(), h_thread_ptr.begin());\n  for (size_t i = 1; i < h_thread_ptr.size(); ++i) {\n    ASSERT_EQ(h_thread_ptr[i] - h_thread_ptr[i - 1], 13);\n  }\n\n  h = 7;\n  SegmentedTrapezoidThreads(&ctx, dh::ToSpan(offset_ptr), dh::ToSpan(thread_ptr), h);\n  thrust::copy(thread_ptr.cbegin(), thread_ptr.cend(), h_thread_ptr.begin());\n  for (size_t i = 1; i < h_thread_ptr.size(); ++i) {\n    ASSERT_EQ(h_thread_ptr[i] - h_thread_ptr[i - 1], 28);\n  }\n}\n\nTEST(SegmentedTrapezoidThreads, Unravel) {\n  size_t i = 0, j = 0;\n  size_t constexpr kN = 8;\n\n  UnravelTrapeziodIdx(6, kN, &i, &j);\n  ASSERT_EQ(i, 0);\n  ASSERT_EQ(j, 7);\n\n  UnravelTrapeziodIdx(12, kN, &i, &j);\n  ASSERT_EQ(i, 1);\n  ASSERT_EQ(j, 7);\n\n  UnravelTrapeziodIdx(15, kN, &i, &j);\n  ASSERT_EQ(i, 2);\n  ASSERT_EQ(j, 5);\n\n  UnravelTrapeziodIdx(21, kN, &i, &j);\n  ASSERT_EQ(i, 3);\n  ASSERT_EQ(j, 7);\n\n  UnravelTrapeziodIdx(25, kN, &i, &j);\n  ASSERT_EQ(i, 5);\n  ASSERT_EQ(j, 6);\n\n  UnravelTrapeziodIdx(27, kN, &i, &j);\n  ASSERT_EQ(i, 6);\n  ASSERT_EQ(j, 7);\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/common/test_threadpool.cc",
    "content": "/**\n * Copyright 2024, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/global_config.h>  // for GlobalConfigThreadLocalStore\n\n#include <cstddef>  // for size_t\n#include <cstdint>  // for int32_t\n#include <future>   // for future\n#include <thread>   // for sleep_for, thread\n\n#include \"../../../src/common/threadpool.h\"\n\nnamespace xgboost::common {\nTEST(ThreadPool, Basic) {\n  std::int32_t n_threads = std::thread::hardware_concurrency();\n\n  // Set verbosity to 0 for thread-local variable.\n  auto orig = GlobalConfigThreadLocalStore::Get()->verbosity;\n  GlobalConfigThreadLocalStore::Get()->verbosity = 4;\n  // 4 is an invalid value, it's only possible to set it by bypassing the parameter\n  // validation.\n  ASSERT_NE(orig, GlobalConfigThreadLocalStore::Get()->verbosity);\n  ThreadPool pool{StringView{\"test\"}, n_threads, [config = *GlobalConfigThreadLocalStore::Get()] {\n                    *GlobalConfigThreadLocalStore::Get() = config;\n                  }};\n  GlobalConfigThreadLocalStore::Get()->verbosity = orig;  // restore\n\n  {\n    auto fut = pool.Submit([] { return GlobalConfigThreadLocalStore::Get()->verbosity; });\n    ASSERT_EQ(fut.get(), 4);\n    ASSERT_EQ(GlobalConfigThreadLocalStore::Get()->verbosity, orig);\n  }\n  {\n    auto fut = pool.Submit([] { return 3; });\n    ASSERT_EQ(fut.get(), 3);\n  }\n  {\n    auto fut = pool.Submit([] { return std::string{\"ok\"}; });\n    ASSERT_EQ(fut.get(), \"ok\");\n  }\n  {\n    std::vector<std::future<std::size_t>> futures;\n    for (std::size_t i = 0; i < static_cast<std::size_t>(n_threads) * 16; ++i) {\n      futures.emplace_back(pool.Submit([=] {\n        std::this_thread::sleep_for(std::chrono::milliseconds{10});\n        return i;\n      }));\n    }\n    for (std::size_t i = 0; i < futures.size(); ++i) {\n      ASSERT_EQ(futures[i].get(), i);\n    }\n  }\n  {\n    std::vector<std::future<std::size_t>> futures;\n    for (std::size_t i = 0; i < static_cast<std::size_t>(n_threads) * 16; ++i) {\n      futures.emplace_back(pool.Submit([=] {\n        return i;\n      }));\n    }\n    for (std::size_t i = 0; i < futures.size(); ++i) {\n      ASSERT_EQ(futures[i].get(), i);\n    }\n  }\n  {\n    std::int32_t val{0};\n    auto fut = pool.Submit([&] { val = 3; });\n    static_assert(std::is_void_v<decltype(fut.get())>);\n    fut.get();\n    ASSERT_EQ(val, 3);\n  }\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/common/test_transform_iterator.cc",
    "content": "/**\n * Copyright 2022 by XGBoost Contributors\n */\n#include <gtest/gtest.h>\n\n#include <cstddef>  // std::size_t\n\n#include \"../../../src/common/transform_iterator.h\"\n\nnamespace xgboost {\nnamespace common {\nTEST(IndexTransformIter, Basic) {\n  auto sqr = [](std::size_t i) { return i * i; };\n  auto iter = MakeIndexTransformIter(sqr);\n  for (std::size_t i = 0; i < 4; ++i) {\n    ASSERT_EQ(iter[i], sqr(i));\n  }\n}\n}  // namespace common\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/common/test_transform_range.cc",
    "content": "/**\n * Copyright 2018-2024, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/base.h>\n#include <xgboost/host_device_vector.h>\n#include <xgboost/span.h>\n\n#include <numeric>  // for iota\n#include <vector>\n\n#include \"../../../src/common/transform.h\"\n#include \"../helpers.h\"\n\nnamespace xgboost::common {\nnamespace {\nconstexpr DeviceOrd TransformDevice() {\n#if defined(__CUDACC__)\n  return DeviceOrd::CUDA(0);\n#else\n  return DeviceOrd::CPU();\n#endif\n}\n}  // namespace\n\ntemplate <typename T>\nstruct TestTransformRange {\n  void XGBOOST_DEVICE operator()(std::size_t _idx, Span<float> _out, Span<const float> _in) {\n    _out[_idx] = _in[_idx];\n  }\n};\n\nTEST(Transform, DeclareUnifiedTest(Basic)) {\n  const size_t size{256};\n  std::vector<float> h_in(size);\n  std::vector<float> h_out(size);\n  std::iota(h_in.begin(), h_in.end(), 0);\n  std::vector<float> h_sol(size);\n  std::iota(h_sol.begin(), h_sol.end(), 0);\n\n  auto device = TransformDevice();\n  HostDeviceVector<float> const in_vec{h_in, device};\n  HostDeviceVector<float> out_vec{h_out, device};\n  out_vec.Fill(0);\n\n  Transform<>::Init(TestTransformRange<float>{},\n                    Range{0, static_cast<Range::DifferenceType>(size)}, AllThreadsForTest(),\n                    TransformDevice())\n      .Eval(&out_vec, &in_vec);\n  std::vector<float> res = out_vec.HostVector();\n\n  ASSERT_TRUE(std::equal(h_sol.begin(), h_sol.end(), res.begin()));\n}\n\n#if !defined(__CUDACC__)\nTEST(TransformDeathTest, Exception) {\n  size_t const kSize{16};\n  std::vector<float> h_in(kSize);\n  const HostDeviceVector<float> in_vec{h_in, DeviceOrd::CPU()};\n  EXPECT_DEATH(\n      {\n        Transform<>::Init([](size_t idx, common::Span<float const> _in) { _in[idx + 1]; },\n                          Range(0, static_cast<Range::DifferenceType>(kSize)), AllThreadsForTest(),\n                          DeviceOrd::CPU())\n            .Eval(&in_vec);\n      },\n      \"\");\n}\n#endif\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/common/test_transform_range.cu",
    "content": "/**\n * Copyright 2023 XGBoost contributors\n */\n// Dummy file to keep the CUDA tests.\n#include \"test_transform_range.cc\"\n"
  },
  {
    "path": "tests/cpp/common/test_version.cc",
    "content": "/**\n * Copyright 2019-2025, XGBoost contributors\n */\n#include <dmlc/io.h>\n#include <gtest/gtest.h>\n#include <xgboost/base.h>\n#include <xgboost/json.h>\n#include <xgboost/version_config.h>\n\n#include <string>\n\n#include \"../../../src/common/version.h\"\n#include \"../filesystem.h\"  // dmlc::TemporaryDirectory\n\nnamespace xgboost {\nTEST(Version, Basic) {\n  Json j_ver { Object() };\n  Version::Save(&j_ver);\n  auto triplet { Version::Load(j_ver) };\n  ASSERT_TRUE(Version::Same(triplet));\n\n  common::TemporaryDirectory tempdir;\n  const std::string fname = tempdir.Str() + \"/version\";\n\n  {\n    std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname.c_str(), \"w\"));\n    Version::Save(fo.get());\n  }\n\n  {\n    std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), \"r\"));\n    auto triplet { Version::Load(fi.get())};;\n    ASSERT_TRUE(Version::Same(triplet));\n  }\n\n  std::string str { Version::String(triplet) };\n\n  size_t ptr {0};\n  XGBoostVersionT v {0};\n  v = std::stoi(str, &ptr);\n  ASSERT_EQ(str.at(ptr), '.');\n  ASSERT_EQ(v, XGBOOST_VER_MAJOR) << \"major: \" << v;\n\n  str = str.substr(ptr+1);\n\n  ptr = 0;\n  v = std::stoi(str, &ptr);\n  ASSERT_EQ(str.at(ptr), '.');\n  ASSERT_EQ(v, XGBOOST_VER_MINOR) << \"minor: \" << v;;\n\n  str = str.substr(ptr+1);\n\n  ptr = 0;\n  v = std::stoi(str, &ptr);\n  ASSERT_EQ(v, XGBOOST_VER_PATCH) << \"patch: \" << v;;\n\n  str = str.substr(ptr);\n  ASSERT_EQ(str.size(), 0);\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/data/test_adapter.cc",
    "content": "/**\n *  Copyright 2019-2025, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/data.h>\n\n#include <type_traits>\n#include <utility>\n\n#include \"../../../src/data/adapter.h\"\n#include \"../../../src/data/simple_dmatrix.h\"\n#include \"../helpers.h\"\n#include \"xgboost/base.h\"\n#include \"xgboost/c_api.h\"\n\nnamespace xgboost {\nTEST(Adapter, CSRArrayAdapter) {\n  {\n    std::size_t n = 2;\n    HostDeviceVector<float> data{1, 2, 3, 4, 5};\n    HostDeviceVector<unsigned> feature_idx{0, 1, 0, 1, 1};\n    HostDeviceVector<size_t> row_ptr{0, 2, 4, 5};\n\n    auto j_data = Json::Dump(GetArrayInterface(&data, data.Size(), 1));\n    auto j_feature_idx = Json::Dump(GetArrayInterface(&feature_idx, feature_idx.Size(), 1));\n    auto j_row_ptr = Json::Dump(GetArrayInterface(&row_ptr, row_ptr.Size(), 1));\n\n    data::CSRArrayAdapter adapter{j_row_ptr, j_feature_idx, j_data, n};\n    adapter.Next();\n    auto &batch = adapter.Value();\n    auto line0 = batch.GetLine(0);\n    EXPECT_EQ(line0.GetElement(0).value, 1);\n    EXPECT_EQ(line0.GetElement(1).value, 2);\n\n    auto line1 = batch.GetLine(1);\n    EXPECT_EQ(line1.GetElement(0).value, 3);\n    EXPECT_EQ(line1.GetElement(1).value, 4);\n\n    auto line2 = batch.GetLine(2);\n    EXPECT_EQ(line2.GetElement(0).value, 5);\n    EXPECT_EQ(line2.GetElement(0).row_idx, 2);\n    EXPECT_EQ(line2.GetElement(0).column_idx, 1);\n  }\n  {\n    HostDeviceVector<std::size_t> indptr;\n    HostDeviceVector<float> values;\n    HostDeviceVector<bst_feature_t> indices;\n    size_t n_features = 100, n_samples = 10;\n    RandomDataGenerator{n_samples, n_features, 0.5}.GenerateCSR(&values, &indptr, &indices);\n    using linalg::MakeVec;\n    auto indptr_arr = ArrayInterfaceStr(MakeVec(indptr.HostPointer(), indptr.Size()));\n    auto values_arr = ArrayInterfaceStr(MakeVec(values.HostPointer(), values.Size()));\n    auto indices_arr = ArrayInterfaceStr(MakeVec(indices.HostPointer(), indices.Size()));\n    auto adapter =\n        data::CSRArrayAdapter(StringView{indptr_arr.c_str(), indptr_arr.size()},\n                              StringView{values_arr.c_str(), values_arr.size()},\n                              StringView{indices_arr.c_str(), indices_arr.size()}, n_features);\n    auto batch = adapter.Value();\n    ASSERT_EQ(batch.NumRows(), n_samples);\n    ASSERT_EQ(batch.NumCols(), n_features);\n\n    ASSERT_EQ(adapter.NumRows(), n_samples);\n    ASSERT_EQ(adapter.NumColumns(), n_features);\n  }\n}\n\nTEST(Adapter, CSCAdapterColsMoreThanRows) {\n  HostDeviceVector<float> data{1, 2, 3, 4, 5, 6, 7, 8};\n  HostDeviceVector<unsigned> row_idx{0, 1, 0, 1, 0, 1, 0, 1};\n  HostDeviceVector<size_t> col_ptr{0, 2, 4, 6, 8};\n\n  auto j_data = Json::Dump(GetArrayInterface(&data, data.Size(), 1));\n  auto j_row_idx = Json::Dump(GetArrayInterface(&row_idx, row_idx.Size(), 1));\n  auto j_col_ptr = Json::Dump(GetArrayInterface(&col_ptr, col_ptr.Size(), 1));\n\n  data::CSCArrayAdapter adapter{j_col_ptr, j_row_idx, j_data, 0};\n  // Infer row count\n  data::SimpleDMatrix dmat(&adapter, std::numeric_limits<float>::quiet_NaN(), -1);\n  EXPECT_EQ(dmat.Info().num_col_, 4);\n  EXPECT_EQ(dmat.Info().num_row_, 2);\n  EXPECT_EQ(dmat.Info().num_nonzero_, 8);\n\n  auto &batch = *dmat.GetBatches<SparsePage>().begin();\n  auto page = batch.GetView();\n  auto inst = page[0];\n  EXPECT_EQ(inst[0].fvalue, 1);\n  EXPECT_EQ(inst[0].index, 0);\n  EXPECT_EQ(inst[1].fvalue, 3);\n  EXPECT_EQ(inst[1].index, 1);\n  EXPECT_EQ(inst[2].fvalue, 5);\n  EXPECT_EQ(inst[2].index, 2);\n  EXPECT_EQ(inst[3].fvalue, 7);\n  EXPECT_EQ(inst[3].index, 3);\n\n  inst = page[1];\n  EXPECT_EQ(inst[0].fvalue, 2);\n  EXPECT_EQ(inst[0].index, 0);\n  EXPECT_EQ(inst[1].fvalue, 4);\n  EXPECT_EQ(inst[1].index, 1);\n  EXPECT_EQ(inst[2].fvalue, 6);\n  EXPECT_EQ(inst[2].index, 2);\n  EXPECT_EQ(inst[3].fvalue, 8);\n  EXPECT_EQ(inst[3].index, 3);\n}\n\n// A mock for JVM data iterator.\nclass CSRIterForTest {\n  std::vector<float> data_{1, 2, 3, 4, 5};\n  std::vector<std::remove_pointer_t<decltype(std::declval<XGBoostBatchCSR>().index)>> feature_idx_{\n      0, 1, 0, 1, 1};\n  std::vector<std::remove_pointer_t<decltype(std::declval<XGBoostBatchCSR>().offset)>> row_ptr_{\n      0, 2, 4, 5, 5};\n  size_t iter_ {0};\n\n public:\n  size_t static constexpr kRows { 4 };  // Test for the last row being empty\n  size_t static constexpr kCols { 13 };  // Test for having some missing columns\n\n  XGBoostBatchCSR Next() {\n    for (auto& v : data_) {\n      v += iter_;\n    }\n    XGBoostBatchCSR batch;\n    batch.columns = 2;\n    batch.offset = dmlc::BeginPtr(row_ptr_);\n    batch.index = dmlc::BeginPtr(feature_idx_);\n    batch.value = dmlc::BeginPtr(data_);\n    batch.size = kRows;\n\n    batch.label = nullptr;\n    batch.weight = nullptr;\n\n    iter_++;\n\n    return batch;\n  }\n  size_t Iter() const { return iter_; }\n};\n\nsize_t constexpr CSRIterForTest::kCols;\n\nint CSRSetDataNextForTest(DataIterHandle data_handle,\n                          XGBCallbackSetData *set_function,\n                          DataHolderHandle set_function_handle) {\n  size_t constexpr kIters { 2 };\n  auto iter = static_cast<CSRIterForTest *>(data_handle);\n  if (iter->Iter() < kIters) {\n    auto batch = iter->Next();\n    batch.columns = CSRIterForTest::kCols;\n    set_function(set_function_handle, batch);\n    return 1;\n  } else {\n    return 0;  // stoping condition\n  }\n}\n\nTEST(Adapter, IteratorAdapter) {\n  CSRIterForTest iter;\n  data::IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext,\n                        XGBoostBatchCSR> adapter{&iter, CSRSetDataNextForTest};\n  constexpr size_t kRows { 8 };\n\n  std::unique_ptr<DMatrix> data {\n    DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)\n  };\n  ASSERT_EQ(data->Info().num_col_, CSRIterForTest::kCols);\n  ASSERT_EQ(data->Info().num_row_, kRows);\n  int num_batch = 0;\n  for (auto const& batch : data->GetBatches<SparsePage>()) {\n    ASSERT_EQ(batch.offset.HostVector(), std::vector<bst_idx_t>({0, 2, 4, 5, 5, 7, 9, 10, 10}));\n    ++num_batch;\n  }\n  ASSERT_EQ(num_batch, 1);\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/data/test_array_interface.cc",
    "content": "/**\n * Copyright 2020-2023 by XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/host_device_vector.h>\n#include \"../helpers.h\"\n#include \"../../../src/data/array_interface.h\"\n#include \"dmlc/logging.h\"\n#include \"xgboost/json.h\"\n\nnamespace xgboost {\nTEST(ArrayInterface, Initialize) {\n  size_t constexpr kRows = 10, kCols = 10;\n  HostDeviceVector<float> storage;\n  auto array = RandomDataGenerator{kRows, kCols, 0}.GenerateArrayInterface(&storage);\n  auto arr_interface = ArrayInterface<2>(StringView{array});\n  ASSERT_EQ(arr_interface.Shape<0>(), kRows);\n  ASSERT_EQ(arr_interface.Shape<1>(), kCols);\n  ASSERT_EQ(arr_interface.data, storage.ConstHostPointer());\n  ASSERT_EQ(arr_interface.ElementSize(), 4);\n  ASSERT_EQ(arr_interface.type, ArrayInterfaceHandler::kF4);\n\n  HostDeviceVector<size_t> u64_storage(storage.Size());\n  std::string u64_arr_str{ArrayInterfaceStr(linalg::TensorView<size_t const, 2>{\n      u64_storage.ConstHostSpan(), {kRows, kCols}, DeviceOrd::CPU()})};\n  std::copy(storage.ConstHostVector().cbegin(), storage.ConstHostVector().cend(),\n            u64_storage.HostSpan().begin());\n  auto u64_arr = ArrayInterface<2>{u64_arr_str};\n  ASSERT_EQ(u64_arr.ElementSize(), 8);\n  ASSERT_EQ(u64_arr.type, ArrayInterfaceHandler::kU8);\n}\n\nTEST(ArrayInterface, Error) {\n  constexpr size_t kRows = 16, kCols = 10;\n  Json column { Object() };\n  std::vector<Json> j_shape {Json(Integer(static_cast<Integer::Int>(kRows)))};\n  column[\"shape\"] = Array(j_shape);\n  std::vector<Json> j_data{Json(Integer(reinterpret_cast<Integer::Int>(nullptr))),\n                           Json(Boolean(false))};\n\n  auto const& column_obj = get<Object>(column);\n  std::string typestr{\"<f4\"};\n  size_t n = kRows * kCols;\n\n  // missing version\n  EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n), dmlc::Error);\n  column[\"version\"] = 3;\n  // missing data\n  EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n),\n               dmlc::Error);\n  // null data\n  column[\"data\"] = Null{};\n  EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n),\n               dmlc::Error);\n  column[\"data\"] = j_data;\n  // missing typestr\n  EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n),\n               dmlc::Error);\n  column[\"typestr\"] = String(\"<f4\");\n  // nullptr is not valid\n  EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n),\n               dmlc::Error);\n\n  HostDeviceVector<float> storage;\n  auto array = RandomDataGenerator{kRows, kCols, 0}.GenerateArrayInterface(&storage);\n  j_data = {\n      Json(Integer(reinterpret_cast<Integer::Int>(storage.ConstHostPointer()))),\n      Json(Boolean(false))};\n  column[\"data\"] = j_data;\n  EXPECT_NO_THROW(ArrayInterfaceHandler::ExtractData(column_obj, n));\n  // null data in mask\n  column[\"mask\"] = Object{};\n  column[\"mask\"][\"data\"] = Null{};\n  common::Span<RBitField8::value_type> s_mask;\n  EXPECT_THROW(ArrayInterfaceHandler::ExtractMask(column_obj, &s_mask), dmlc::Error);\n\n  get<Object>(column).erase(\"mask\");\n  // misaligned.\n  j_data = {Json(Integer(reinterpret_cast<Integer::Int>(\n                reinterpret_cast<char const*>(storage.ConstHostPointer()) + 1))),\n            Json(Boolean(false))};\n  column[\"data\"] = j_data;\n  EXPECT_THROW({ ArrayInterface<1> arr{column}; }, dmlc::Error);\n}\n\nTEST(ArrayInterface, GetElement) {\n  size_t kRows = 4, kCols = 2;\n  HostDeviceVector<float> storage;\n  auto intefrace_str = RandomDataGenerator{kRows, kCols, 0}.GenerateArrayInterface(&storage);\n  ArrayInterface<2> array_interface{intefrace_str};\n\n  auto const& h_storage = storage.ConstHostVector();\n  for (size_t i = 0; i < kRows; ++i) {\n    for (size_t j = 0; j < kCols; ++j) {\n      float v0 = array_interface(i, j);\n      float v1 = h_storage.at(i * kCols + j);\n      ASSERT_EQ(v0, v1);\n    }\n  }\n}\n\nTEST(ArrayInterface, TrivialDim) {\n  size_t kRows{1000}, kCols = 1;\n  HostDeviceVector<float> storage;\n  auto interface_str = RandomDataGenerator{kRows, kCols, 0}.GenerateArrayInterface(&storage);\n  {\n    ArrayInterface<1> arr_i{interface_str};\n    ASSERT_EQ(arr_i.n, kRows);\n    ASSERT_EQ(arr_i.Shape<0>(), kRows);\n  }\n\n  std::swap(kRows, kCols);\n  interface_str = RandomDataGenerator{kRows, kCols, 0}.GenerateArrayInterface(&storage);\n  {\n    ArrayInterface<1> arr_i{interface_str};\n    ASSERT_EQ(arr_i.n, kCols);\n    ASSERT_EQ(arr_i.Shape<0>(), kCols);\n  }\n}\n\nTEST(ArrayInterface, ToDType) {\n  static_assert(ToDType<float>::kType == ArrayInterfaceHandler::kF4);\n  static_assert(ToDType<double>::kType == ArrayInterfaceHandler::kF8);\n\n  static_assert(ToDType<uint32_t>::kType == ArrayInterfaceHandler::kU4);\n  static_assert(ToDType<uint64_t>::kType == ArrayInterfaceHandler::kU8);\n\n  static_assert(ToDType<int32_t>::kType == ArrayInterfaceHandler::kI4);\n  static_assert(ToDType<int64_t>::kType == ArrayInterfaceHandler::kI8);\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/data/test_array_interface.cu",
    "content": "/**\n * Copyright 2021-2025, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/host_device_vector.h>\n\n#include \"../../../src/data/array_interface.h\"\n#include \"../helpers.h\"\n\nnamespace xgboost {\n\n__global__ void SleepForTest(uint64_t *out, uint64_t duration) {\n  auto start = clock64();\n  auto t = 0;\n  while (t < duration) {\n    t = clock64() - start;\n  }\n  out[0] = t;\n}\n\nTEST(ArrayInterface, Stream) {\n  size_t constexpr kRows = 10, kCols = 10;\n  HostDeviceVector<float> storage;\n  auto arr_str = RandomDataGenerator{kRows, kCols, 0}.GenerateArrayInterface(&storage);\n\n  curt::Stream stream;\n\n  auto j_arr = Json::Load(StringView{arr_str});\n  j_arr[\"stream\"] = Integer(reinterpret_cast<int64_t>(stream.Handle()));\n  Json::Dump(j_arr, &arr_str);\n\n  dh::caching_device_vector<uint64_t> out(1, 0);\n  std::uint64_t dur = 1e9;\n  dh::LaunchKernel{1, 1, 0, stream.View()}(SleepForTest, out.data().get(), dur);\n  ArrayInterface<2> arr(arr_str);\n\n  auto t = out[0];\n  CHECK_GE(t, dur);\n}\n\nTEST(ArrayInterface, Ptr) {\n  std::vector<float> h_data(10);\n  ASSERT_FALSE(ArrayInterfaceHandler::IsCudaPtr(h_data.data()));\n  dh::safe_cuda(cudaGetLastError());\n\n  dh::device_vector<float> d_data(10);\n  ASSERT_TRUE(ArrayInterfaceHandler::IsCudaPtr(d_data.data().get()));\n  dh::safe_cuda(cudaGetLastError());\n\n  ASSERT_FALSE(ArrayInterfaceHandler::IsCudaPtr(nullptr));\n  dh::safe_cuda(cudaGetLastError());\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/data/test_array_interface.h",
    "content": "/**\n * Copyright 2019-2024, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <thrust/device_vector.h>\n#include <thrust/execution_policy.h>  // for device\n#include <thrust/sequence.h>          // for sequence\n#include <xgboost/data.h>\n#include <xgboost/json.h>\n\nnamespace xgboost {\ntemplate <typename T>\nJson GenerateDenseColumn(std::string const& typestr, size_t kRows,\n                         thrust::device_vector<T>* out_d_data) {\n  auto& d_data = *out_d_data;\n  d_data.resize(kRows);\n  Json column { Object() };\n  std::vector<Json> j_shape {Json(Integer(static_cast<Integer::Int>(kRows)))};\n  column[\"shape\"] = Array(j_shape);\n  column[\"strides\"] = Array(std::vector<Json>{Json(Integer(static_cast<Integer::Int>(sizeof(T))))});\n  column[\"stream\"] = nullptr;\n\n  d_data.resize(kRows);\n  thrust::sequence(thrust::device, d_data.begin(), d_data.end(), 0.0f, 2.0f);\n\n  auto p_d_data = d_data.data().get();\n\n  std::vector<Json> j_data {\n    Json(Integer(reinterpret_cast<Integer::Int>(p_d_data))),\n        Json(Boolean(false))};\n  column[\"data\"] = j_data;\n\n  column[\"version\"] = 3;\n  column[\"typestr\"] = String(typestr);\n  return column;\n}\n\ntemplate <typename T>\nJson GenerateSparseColumn(std::string const& typestr, size_t kRows,\n                         thrust::device_vector<T>* out_d_data) {\n  auto& d_data = *out_d_data;\n  Json column { Object() };\n  std::vector<Json> j_shape {Json(Integer(static_cast<Integer::Int>(kRows)))};\n  column[\"shape\"] = Array(j_shape);\n  column[\"strides\"] = Array(std::vector<Json>{Json(Integer(static_cast<Integer::Int>(sizeof(T))))});\n  column[\"stream\"] = nullptr;\n\n  d_data.resize(kRows);\n  for (size_t i = 0; i < d_data.size(); ++i) {\n    d_data[i] = i * 2.0;\n  }\n\n  auto p_d_data = d_data.data().get();\n\n  std::vector<Json> j_data {\n    Json(Integer(reinterpret_cast<Integer::Int>(p_d_data))),\n        Json(Boolean(false))};\n  column[\"data\"] = j_data;\n\n  column[\"version\"] = 3;\n  column[\"typestr\"] = String(typestr);\n  return column;\n}\n\ntemplate <typename T>\nJson Generate2dArrayInterface(int rows, int cols, std::string typestr,\n                              thrust::device_vector<T> *p_data) {\n  auto& data = *p_data;\n  thrust::sequence(data.begin(), data.end());\n\n  Json array_interface{Object()};\n  std::vector<Json> shape = {Json(static_cast<Integer::Int>(rows)),\n                             Json(static_cast<Integer::Int>(cols))};\n  array_interface[\"shape\"] = Array(shape);\n  std::vector<Json> j_data{\n      Json(Integer(reinterpret_cast<Integer::Int>(data.data().get()))),\n      Json(Boolean(false))};\n  array_interface[\"data\"] = j_data;\n  array_interface[\"version\"] = 3;\n  array_interface[\"typestr\"] = String(typestr);\n  array_interface[\"stream\"] = nullptr;\n  return array_interface;\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/data/test_batch_utils.cu",
    "content": "/**\n * Copyright 2025, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n\n#include <cstdint>  // for int64_t\n#include <tuple>    // for tie\n\n#include \"../../../src/common/cuda_rt_utils.h\"  // for TotalMemory\n#include \"../../../src/data/batch_utils.h\"      // for AutoHostRatio\n#include \"../helpers.h\"\n\nnamespace xgboost::data {\nTEST(BatchUtils, CacheHostRatio) {\n  {\n    bst_idx_t n_cache_bytes = 128;\n    double cache_host_ratio = ::xgboost::cuda_impl::AutoHostRatio();\n    std::int64_t min_cache_page_bytes = ::xgboost::cuda_impl::AutoCachePageBytes();\n    std::tie(cache_host_ratio, min_cache_page_bytes) =\n        detail::DftPageSizeHostRatio(n_cache_bytes, false, cache_host_ratio, min_cache_page_bytes);\n    ASSERT_EQ(cache_host_ratio, 0.0);  // Assuming the device has more than 256 bytes of memory ..\n    ASSERT_GT(min_cache_page_bytes, 0);\n    ASSERT_THAT(\n        [&] {\n          [[maybe_unused]] auto r =\n              detail::DftPageSizeHostRatio(n_cache_bytes, false, 2.0, min_cache_page_bytes);\n        },\n        GMockThrow(R\"(cache_host_ratio)\"));\n  }\n  {\n    bst_idx_t constexpr kGB = 1024ul * 1024ul * 1024ul;\n    bst_idx_t n_cache_bytes = 1024ul * kGB;\n    double cache_host_ratio = ::xgboost::cuda_impl::AutoHostRatio();\n    std::int64_t min_cache_page_bytes = ::xgboost::cuda_impl::AutoCachePageBytes();\n    std::tie(cache_host_ratio, min_cache_page_bytes) =\n        detail::DftPageSizeHostRatio(n_cache_bytes, false, cache_host_ratio, min_cache_page_bytes);\n    ASSERT_GE(min_cache_page_bytes + 512, curt::TotalMemory() * cuda_impl::CachePageRatio() * 0.5);\n    ASSERT_GT(cache_host_ratio, (1.0 - curt::TotalMemory() / static_cast<double_t>(n_cache_bytes)));\n    ASSERT_LT(cache_host_ratio, (1.0 - curt::TotalMemory() / (3.0 * n_cache_bytes)));\n  }\n}\n}  // namespace xgboost::data\n"
  },
  {
    "path": "tests/cpp/data/test_cat_container.cc",
    "content": "/**\n * Copyright 2025, XGBoost contributors\n */\n\n#include \"test_cat_container.h\"\n\n#include <gtest/gtest.h>\n\n#include \"../encoder/df_mock.h\"\n\nnamespace xgboost {\nusing DfTest = enc::cpu_impl::DfTest;\n\nauto eq_check = [](common::Span<bst_cat_t const> sorted_idx, std::vector<bst_cat_t> const& sol) {\n  ASSERT_EQ(sorted_idx, common::Span{sol});\n};\n\nTEST(CatContainer, Str) {\n  Context ctx;\n  TestCatContainerStr<DfTest>(&ctx, eq_check);\n}\n\nTEST(CatContainer, Mixed) {\n  Context ctx;\n  TestCatContainerMixed<DfTest>(&ctx, eq_check);\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/data/test_cat_container.cu",
    "content": "/**\n * Copyright 2025, XGBoost contributors\n */\n\n#include <gtest/gtest.h>\n#include <xgboost/base.h>  // for bst_cat_t\n#include <xgboost/span.h>  // for Span\n\n#include <vector>  // for vector\n\n#include \"../../../src/common/common.h\"           // for safe_cuda\n#include \"../../../src/common/threading_utils.h\"  // for ParallelFor\n#include \"../encoder/df_mock.h\"\n#include \"../helpers.h\"  // for MakeCUDACtx\n#include \"test_cat_container.h\"\n\nnamespace xgboost {\n// Doesn't support GPU input yet since cuDF doesn't have cuda arrow export.\nusing DfTest = enc::cpu_impl::DfTest;\nnamespace {\nauto eq_check = [](common::Span<bst_cat_t const> sorted_idx, std::vector<bst_cat_t> const& sol) {\n  std::vector<bst_cat_t> h_sorted(sorted_idx.size());\n  dh::safe_cuda(cudaMemcpyAsync(h_sorted.data(), sorted_idx.data(), sorted_idx.size_bytes(),\n                                cudaMemcpyDefault));\n  ASSERT_EQ(h_sorted, sol);\n};\n}  // namespace\n\nTEST(CatContainer, StrGpu) {\n  auto ctx = MakeCUDACtx(0);\n  auto df = TestCatContainerStr<DfTest>(&ctx, eq_check);\n}\n\nTEST(CatContainer, MixedGpu) {\n  auto ctx = MakeCUDACtx(0);\n  auto df = TestCatContainerMixed<DfTest>(&ctx, eq_check);\n}\n\nTEST(CatContainer, ThreadSafety) {\n  auto ctx = MakeCUDACtx(0);\n  auto df = DfTest::Make(DfTest::MakeStrs(\"abc\", \"bcd\", \"cde\", \"ab\"), DfTest::MakeInts(2, 2, 3, 0));\n  auto h_df = df.View();\n  auto cats = test_cat_detail::FromDf(&ctx, h_df);\n  cats.Sort(&ctx);  // not thread safe\n\n  common::ParallelFor(ctx.Threads(), 64, [&](auto i) {\n    auto sorted_idx = cats.RefSortedIndex(&ctx);\n    if (i % 2 == 0) {\n      auto h_cats = cats.HostView();\n      ASSERT_EQ(h_cats.n_total_cats, 8);\n    } else {\n      auto d_cats = cats.DeviceView(&ctx);\n      ASSERT_EQ(d_cats.n_total_cats, 8);\n    }\n    auto sol = std::vector<bst_cat_t>{3, 0, 1, 2, 3, 0, 1, 2};\n    eq_check(sorted_idx, sol);\n  });\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/data/test_cat_container.h",
    "content": "/**\n * Copyright 2025, XGBoost contributors\n */\n#pragma once\n\n#include <gtest/gtest.h>\n\n#include \"../../../src/data/cat_container.h\"\n\nnamespace xgboost {\nnamespace test_cat_detail {\ninline void HostCheck(CatContainer const& cats) {\n  ASSERT_TRUE(cats.HasCategorical());\n  ASSERT_FALSE(cats.Empty());\n  ASSERT_TRUE(cats.HostCanRead());\n  ASSERT_FALSE(cats.DeviceCanRead());\n}\n\ninline void DeviceCheck(CatContainer const& cats) {\n  ASSERT_TRUE(cats.HasCategorical());\n  ASSERT_FALSE(cats.Empty());\n  ASSERT_TRUE(cats.HostCanRead());\n  ASSERT_FALSE(cats.DeviceCanRead());\n}\n\n[[nodiscard]] inline CatContainer FromDf(Context const*, enc::HostColumnsView df) {\n  return CatContainer{df, false};\n}\n\n#if defined(XGBOOST_USE_CUDA)\n[[nodiscard]] inline CatContainer FromDf(Context const* ctx, enc::DeviceColumnsView df) {\n  return CatContainer{ctx, df, false};\n}\n#endif  // defined(XGBOOST_USE_CUDA)\n}  // namespace test_cat_detail\n\ntemplate <typename DfTest, typename EqCheck>\nauto TestCatContainerStr(Context const* ctx, EqCheck&& is_eq) {\n  auto df = DfTest::Make(DfTest::MakeStrs(\"abc\", \"bcd\", \"cde\", \"ab\"));\n  auto h_df = df.View();\n  auto cats = test_cat_detail::FromDf(ctx, h_df);\n  if (ctx->IsCPU()) {\n    test_cat_detail::HostCheck(cats);\n  } else {\n    test_cat_detail::DeviceCheck(cats);\n  }\n\n  [&] {\n    ASSERT_EQ(df.View().columns.size(), cats.NumFeatures());\n  }();\n\n  cats.Sort(ctx);\n\n  auto sol = std::vector<bst_cat_t>{3, 0, 1, 2};\n  auto sorted_idx = cats.RefSortedIndex(ctx);\n  is_eq(sorted_idx, sol);\n  [&] {\n    auto view = cats.HostView();\n    ASSERT_EQ(view.n_total_cats, sol.size());\n    ASSERT_EQ(view.feature_segments.size(), 2ul);\n    ASSERT_EQ(view.feature_segments[0], 0);\n    ASSERT_EQ(view.feature_segments[1], static_cast<bst_cat_t>(sol.size()));\n  }();\n\n  return df;\n}\n\ntemplate <typename DfTest, typename EqCheck>\nauto TestCatContainerMixed(Context const* ctx, EqCheck&& is_eq) {\n  auto df =\n      DfTest::Make(DfTest::MakeStrs(\"abc\", \"bcd\", \"cde\", \"ab\"), DfTest::MakeInts(2, 2, 3, 0, 4));\n  auto h_df = df.View();\n  auto cats = test_cat_detail::FromDf(ctx, h_df);\n  if (ctx->IsCPU()) {\n    test_cat_detail::HostCheck(cats);\n  } else {\n    test_cat_detail::DeviceCheck(cats);\n  }\n\n  cats.Sort(ctx);\n  auto sorted_idx = cats.RefSortedIndex(ctx);\n  auto sol = std::vector<bst_cat_t>{3, 0, 1, 2, 3, 0, 1, 2, 4};\n  is_eq(sorted_idx, sol);\n  auto view = cats.HostView();\n  [&] {\n    ASSERT_EQ(view.n_total_cats, sol.size());\n    ASSERT_EQ(view.feature_segments.size(), 3ul);\n    ASSERT_EQ(view.feature_segments[0], 0);\n    ASSERT_EQ(view.feature_segments[1], 4);\n    ASSERT_EQ(view.feature_segments[2], static_cast<bst_cat_t>(sol.size()));\n  }();\n\n  return df;\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/data/test_data.cc",
    "content": "/**\n * Copyright 2019-2025, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n\n#include <memory>\n#include <vector>\n\n#include \"../filesystem.h\"  // TemporaryDirectory\n#include \"../helpers.h\"\n#include \"xgboost/data.h\"\n\nnamespace xgboost {\nTEST(SparsePage, PushCSC) {\n  std::vector<bst_idx_t> offset {0};\n  std::vector<Entry> data;\n  SparsePage batch;\n  batch.offset.HostVector() = offset;\n  batch.data.HostVector() = data;\n\n  offset = {0, 1, 4};\n  for (size_t i = 0; i < offset.back(); ++i) {\n    data.emplace_back(i, 0.1f);\n  }\n\n  SparsePage other;\n  other.offset.HostVector() = offset;\n  other.data.HostVector() = data;\n\n  batch.PushCSC(other);\n\n  ASSERT_EQ(batch.offset.HostVector().size(), offset.size());\n  ASSERT_EQ(batch.data.HostVector().size(), data.size());\n  for (size_t i = 0; i < offset.size(); ++i) {\n    ASSERT_EQ(batch.offset.HostVector()[i], offset[i]);\n  }\n  for (size_t i = 0; i < data.size(); ++i) {\n    ASSERT_EQ(batch.data.HostVector()[i].index, data[i].index);\n  }\n\n  batch.PushCSC(other);\n  ASSERT_EQ(batch.offset.HostVector().size(), offset.size());\n  ASSERT_EQ(batch.data.Size(), data.size() * 2);\n\n  for (size_t i = 0; i < offset.size(); ++i) {\n    ASSERT_EQ(batch.offset.HostVector()[i], offset[i] * 2);\n  }\n\n  auto page = batch.GetView();\n  auto inst = page[0];\n  ASSERT_EQ(inst.size(), 2ul);\n  for (auto entry : inst) {\n    ASSERT_EQ(entry.index, 0u);\n  }\n\n  inst = page[1];\n  ASSERT_EQ(inst.size(), 6ul);\n  std::vector<size_t> indices_sol {1, 2, 3};\n  for (size_t i = 0; i < inst.size(); ++i) {\n    ASSERT_EQ(inst[i].index, indices_sol[i % 3]);\n  }\n}\n\nTEST(SparsePage, PushCSCAfterTranspose) {\n  bst_idx_t constexpr kRows = 1024, kCols = 21;\n\n  auto dmat =\n      RandomDataGenerator{kRows, kCols, 0.0f}.Batches(4).GenerateSparsePageDMatrix(\"temp\", true);\n  const int ncols = dmat->Info().num_col_;\n  SparsePage page;  // Consolidated sparse page\n  for (const auto& batch : dmat->GetBatches<xgboost::SparsePage>()) {\n    // Transpose each batch and push\n    SparsePage tmp = batch.GetTranspose(ncols, AllThreadsForTest());\n    page.PushCSC(tmp);\n  }\n\n  // Make sure that the final sparse page has the right number of entries\n  ASSERT_EQ(kRows * kCols, page.data.Size());\n\n  page.SortRows(AllThreadsForTest());\n  auto v = page.GetView();\n  for (size_t i = 0; i < v.Size(); ++i) {\n    auto column = v[i];\n    for (size_t j = 1; j < column.size(); ++j) {\n      ASSERT_GE(column[j].fvalue, column[j - 1].fvalue);\n    }\n  }\n}\n\nTEST(SparsePage, SortIndices) {\n  auto p_fmat = RandomDataGenerator{100, 10, 0.6}.GenerateDMatrix();\n  auto n_threads = AllThreadsForTest();\n  SparsePage copy;\n  for (auto const& page : p_fmat->GetBatches<SparsePage>()) {\n    ASSERT_TRUE(page.IsIndicesSorted(n_threads));\n    copy.Push(page);\n  }\n  ASSERT_TRUE(copy.IsIndicesSorted(n_threads));\n\n  for (size_t ridx = 0; ridx < copy.Size(); ++ridx) {\n    auto beg = copy.offset.HostVector()[ridx];\n    auto end = copy.offset.HostVector()[ridx + 1];\n    auto& h_data = copy.data.HostVector();\n    if (end - beg >= 2) {\n      std::swap(h_data[beg], h_data[end - 1]);\n    }\n  }\n  ASSERT_FALSE(copy.IsIndicesSorted(n_threads));\n\n  copy.SortIndices(n_threads);\n  ASSERT_TRUE(copy.IsIndicesSorted(n_threads));\n}\n\nTEST(DMatrix, Uri) {\n  auto constexpr kRows {16};\n  auto constexpr kCols {8};\n\n  common::TemporaryDirectory tmpdir;\n  auto const path = tmpdir.Path() / \"small.csv\";\n  CreateTestCSV(path.string(), kRows, kCols);\n\n  std::unique_ptr<DMatrix> dmat;\n  // FIXME(trivialfis): Enable the following test by restricting csv parser in dmlc-core.\n  // EXPECT_THROW(dmat.reset(DMatrix::Load(path, false, true)), dmlc::Error);\n\n  std::string uri = path.string() + \"?format=csv\";\n  dmat.reset(DMatrix::Load(uri, false));\n\n  ASSERT_EQ(dmat->Info().num_col_, kCols);\n  ASSERT_EQ(dmat->Info().num_row_, kRows);\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/data/test_device_adapter.cu",
    "content": "/**\n * Copyright 2019-2024, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/data.h>\n#include \"../../../src/data/adapter.h\"\n#include \"../helpers.h\"\n#include <thrust/device_vector.h>\n#include \"../../../src/data/device_adapter.cuh\"\n#include \"test_array_interface.h\"\nusing namespace xgboost;  // NOLINT\n\nvoid TestCudfAdapter()\n{\n  constexpr size_t kRowsA {16};\n  constexpr size_t kRowsB {16};\n  std::vector<Json> columns;\n  thrust::device_vector<double> d_data_0(kRowsA);\n  thrust::device_vector<uint32_t> d_data_1(kRowsB);\n\n  columns.emplace_back(GenerateDenseColumn<double>(\"<f8\", kRowsA, &d_data_0));\n  columns.emplace_back(GenerateDenseColumn<uint32_t>(\"<u4\", kRowsB, &d_data_1));\n\n  Json column_arr {columns};\n\n  std::string str;\n  Json::Dump(column_arr, &str);\n\n  data::CudfAdapter adapter(str);\n\n  adapter.Next();\n  auto & batch = adapter.Value();\n  EXPECT_EQ(batch.Size(), kRowsA + kRowsB);\n\n  EXPECT_NO_THROW({\n    dh::LaunchN(batch.Size(), [=] __device__(size_t idx) {\n      auto element = batch.GetElement(idx);\n      KERNEL_CHECK(element.row_idx == idx / 2);\n      if (idx % 2 == 0) {\n        KERNEL_CHECK(element.column_idx == 0);\n        KERNEL_CHECK(element.value == element.row_idx * 2.0f);\n      } else {\n        KERNEL_CHECK(element.column_idx == 1);\n        KERNEL_CHECK(element.value == element.row_idx * 2.0f);\n      }\n    });\n    dh::safe_cuda(cudaDeviceSynchronize());\n  });\n}\n\nTEST(DeviceAdapter, CudfAdapter) {\n  TestCudfAdapter();\n}\n\nnamespace xgboost::data {\nTEST(DeviceAdapter, GetRowCounts) {\n  auto ctx = MakeCUDACtx(0);\n\n  for (bst_feature_t n_features : {1, 2, 4, 64, 128, 256}) {\n    HostDeviceVector<float> storage;\n    auto str_arr = RandomDataGenerator{8192, n_features, 0.0}\n                       .Device(ctx.Device())\n                       .GenerateArrayInterface(&storage);\n    auto adapter = CupyAdapter{str_arr};\n    HostDeviceVector<bst_idx_t> offset(adapter.NumRows() + 1, 0);\n    offset.SetDevice(ctx.Device());\n    auto rstride = GetRowCounts(&ctx, adapter.Value(), offset.DeviceSpan(), ctx.Device(),\n                                std::numeric_limits<float>::quiet_NaN());\n    ASSERT_EQ(rstride, n_features);\n  }\n}\n}  // namespace xgboost::data\n"
  },
  {
    "path": "tests/cpp/data/test_ellpack_page.cu",
    "content": "/**\n * Copyright 2019-2025, XGBoost contributors\n */\n#include <xgboost/base.h>\n\n#include <utility>\n\n#include \"../../../src/common/categorical.h\"          // for AsCat\n#include \"../../../src/common/compressed_iterator.h\"  // for CompressedByteT\n#include \"../../../src/common/cuda_stream.h\"          // for DefaultStream\n#include \"../../../src/common/hist_util.h\"\n#include \"../../../src/data/device_adapter.cuh\"  // for CupyAdapter\n#include \"../../../src/data/ellpack_page.cuh\"\n#include \"../../../src/data/ellpack_page.h\"\n#include \"../../../src/data/gradient_index.h\"  // for GHistIndexMatrix\n#include \"../../../src/tree/param.h\"           // for TrainParam\n#include \"../helpers.h\"\n#include \"../histogram_helpers.h\"\n#include \"gtest/gtest.h\"\n\nnamespace xgboost {\nTEST(EllpackPage, EmptyDMatrix) {\n  constexpr int kNRows = 0, kNCols = 0, kMaxBin = 256;\n  constexpr float kSparsity = 0;\n  auto dmat = RandomDataGenerator(kNRows, kNCols, kSparsity).GenerateDMatrix();\n  auto ctx = MakeCUDACtx(0);\n  auto& page = *dmat->GetBatches<EllpackPage>(\n                        &ctx, BatchParam{kMaxBin, tree::TrainParam::DftSparseThreshold()})\n                    .begin();\n  auto impl = page.Impl();\n  ASSERT_EQ(impl->info.row_stride, 0);\n  ASSERT_EQ(impl->Cuts().TotalBins(), 0);\n  ASSERT_EQ(impl->gidx_buffer.size(), 9);\n}\n\nTEST(EllpackPage, BuildGidxDense) {\n  bst_idx_t n_samples = 16, n_features = 8;\n  auto ctx = MakeCUDACtx(0);\n  auto page = BuildEllpackPage(&ctx, n_samples, n_features);\n\n  ASSERT_EQ(page->info.row_stride, n_features);\n\n  std::vector<uint32_t> solution = {\n    0, 3, 8,  9, 14, 17, 20, 21,\n    0, 4, 7, 10, 14, 16, 19, 22,\n    1, 3, 7, 11, 14, 15, 19, 21,\n    2, 3, 7,  9, 13, 16, 20, 22,\n    2, 3, 6,  9, 12, 16, 20, 21,\n    1, 5, 6, 10, 13, 16, 20, 21,\n    2, 5, 8,  9, 13, 17, 19, 22,\n    2, 4, 6, 10, 14, 17, 19, 21,\n    2, 5, 7,  9, 13, 16, 19, 22,\n    0, 3, 8, 10, 12, 16, 19, 22,\n    1, 3, 7, 10, 13, 16, 19, 21,\n    1, 3, 8, 10, 13, 17, 20, 22,\n    2, 4, 6,  9, 14, 15, 19, 22,\n    1, 4, 6,  9, 13, 16, 19, 21,\n    2, 4, 8, 10, 14, 15, 19, 22,\n    1, 4, 7, 10, 14, 16, 19, 21,\n  };\n\n  page->VisitOnHost(&ctx, [&](auto&& h_accessor) {\n    for (size_t i = 0; i < n_samples * n_features; ++i) {\n      auto fidx = i % n_features;\n      ASSERT_EQ(solution[i], h_accessor.gidx_iter[i] + h_accessor.feature_segments[fidx]);\n      ASSERT_EQ(page->NumSymbols(), h_accessor.NullValue());\n    }\n  });\n  ASSERT_EQ(page->NumSymbols(), 3);\n  ASSERT_EQ(page->NumNonMissing(&ctx, {}), n_samples * n_features);\n}\n\nTEST(EllpackPage, BuildGidxSparse) {\n  int constexpr kNRows = 16, kNCols = 8;\n  auto ctx = MakeCUDACtx(0);\n  auto page = BuildEllpackPage(&ctx, kNRows, kNCols, 0.9f);\n\n  ASSERT_EQ(page->info.row_stride, 3);\n\n  // row_stride = 3, 16 rows, 48 entries for ELLPack\n  std::vector<uint32_t> solution = {\n    15, 24, 24,  0, 24, 24, 24, 24, 24, 24, 24, 24, 20, 24, 24, 24,\n    24, 24, 24, 24, 24,  5, 24, 24,  0, 16, 24, 15, 24, 24, 24, 24,\n    24,  7, 14, 16,  4, 24, 24, 24, 24, 24,  9, 24, 24,  1, 24, 24\n  };\n  page->VisitOnHost(&ctx, [&](auto&& h_acc) {\n    for (size_t i = 0; i < kNRows * page->info.row_stride; ++i) {\n      ASSERT_EQ(solution[i], h_acc.gidx_iter[i]);\n    }\n  });\n}\n\nTEST(EllpackPage, FromCategoricalBasic) {\n  using common::AsCat;\n  size_t constexpr kRows = 1000, kCats = 13, kCols = 1;\n  int32_t max_bins = 8;\n  auto x = GenerateRandomCategoricalSingleColumn(kRows, kCats);\n  auto m = GetDMatrixFromData(x, kRows, 1);\n  auto& h_ft = m->Info().feature_types.HostVector();\n  h_ft.resize(kCols, FeatureType::kCategorical);\n\n  auto ctx = MakeCUDACtx(0);\n  auto p = BatchParam{max_bins, tree::TrainParam::DftSparseThreshold()};\n  auto ellpack = EllpackPage(&ctx, m.get(), p);\n\n  auto x_copy = x;\n  std::sort(x_copy.begin(), x_copy.end());\n  auto n_uniques = std::unique(x_copy.begin(), x_copy.end()) - x_copy.begin();\n  ASSERT_EQ(n_uniques, kCats);\n\n  ellpack.Impl()->Visit(&ctx, {}, [&](auto&& accessor) {\n    ASSERT_EQ(kCats, accessor.NumBins());\n    std::vector<uint32_t> h_cuts_ptr(accessor.NumFeatures() + 1);\n    dh::safe_cuda(cudaMemcpyAsync(h_cuts_ptr.data(), accessor.feature_segments,\n                                  sizeof(bst_feature_t) * h_cuts_ptr.size(), cudaMemcpyDefault));\n    std::vector<float> h_cuts_values(accessor.gidx_fvalue_map.size());\n    dh::CopyDeviceSpanToVector(&h_cuts_values, accessor.gidx_fvalue_map);\n    ASSERT_EQ(h_cuts_ptr.size(), 2);\n    ASSERT_EQ(h_cuts_values.size(), kCats);\n\n    ellpack.Impl()->VisitOnHost(&ctx, [&](auto&& h_accessor) {\n      for (size_t i = 0; i < x.size(); ++i) {\n        auto bin = h_accessor.gidx_iter[i];\n        auto bin_value = h_cuts_values.at(bin);\n        ASSERT_EQ(AsCat(x[i]), AsCat(bin_value));\n      }\n    });\n  });\n}\n\nTEST(EllpackPage, FromCategoricalMissing) {\n  auto ctx = MakeCUDACtx(0);\n\n  std::shared_ptr<common::HistogramCuts> cuts;\n  auto nan = std::numeric_limits<float>::quiet_NaN();\n  // 2 rows and 3 columns. The second column is nan, row_stride is 2.\n  std::vector<float> data{{0.1, nan, 1, 0.2, nan, 0}};\n  auto p_fmat = GetDMatrixFromData(data, 2, 3);\n  p_fmat->Info().feature_types.HostVector() = {FeatureType::kNumerical, FeatureType::kNumerical,\n                                               FeatureType::kCategorical};\n  p_fmat->Info().feature_types.SetDevice(ctx.Device());\n\n  auto p = BatchParam{256, tree::TrainParam::DftSparseThreshold()};\n  for (auto const& page : p_fmat->GetBatches<GHistIndexMatrix>(&ctx, p)) {\n    cuts = std::make_shared<common::HistogramCuts>(page.Cuts());\n  }\n  cuts->SetDevice(ctx.Device());\n  for (auto const& page : p_fmat->GetBatches<EllpackPage>(&ctx, p)) {\n    page.Impl()->VisitOnHost(&ctx, [&](auto&& h_acc) {\n      ASSERT_EQ(h_acc.n_rows, 2);\n      ASSERT_EQ(cuts->NumFeatures(), 3);\n      ASSERT_EQ(h_acc.row_stride, 2);\n      ASSERT_EQ(h_acc.gidx_iter[0], 0);\n      ASSERT_EQ(h_acc.gidx_iter[1], 4);  // cat 1\n      ASSERT_EQ(h_acc.gidx_iter[2], 1);\n      ASSERT_EQ(h_acc.gidx_iter[3], 3);  // cat 0\n    });\n  }\n}\n\ntemplate <typename Accessor>\nstruct ReadRowFunction {\n  Accessor matrix;\n  std::size_t row;\n  bst_float* row_data_d;\n  ReadRowFunction(Accessor matrix, std::size_t row, bst_float* row_data_d)\n      : matrix(std::move(matrix)), row(row), row_data_d(row_data_d) {}\n\n  __device__ void operator()(size_t col) {\n    auto value = matrix.GetFvalue(row, col);\n    if (isnan(value)) {\n      value = -1;\n    }\n    row_data_d[col] = value;\n  }\n};\n\nTEST(EllpackPage, Copy) {\n  constexpr size_t kRows = 1024;\n  constexpr size_t kCols = 16;\n\n  // Create a DMatrix with multiple batches.\n  auto dmat =\n      RandomDataGenerator{kRows, kCols, 0.0f}.Batches(4).GenerateSparsePageDMatrix(\"temp\", true);\n  auto ctx = MakeCUDACtx(0);\n  auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};\n  auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();\n\n  // Create an empty result page.\n  EllpackPageImpl result(&ctx, page->CutsShared(), page->is_dense, page->info.row_stride, kRows);\n\n  // Copy batch pages into the result page.\n  size_t offset = 0;\n  for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {\n    size_t num_elements = result.Copy(&ctx, batch.Impl(), offset);\n    offset += num_elements;\n  }\n\n  size_t current_row = 0;\n  thrust::device_vector<bst_float> row_d(kCols);\n  thrust::device_vector<bst_float> row_result_d(kCols);\n  std::vector<bst_float> row(kCols);\n  std::vector<bst_float> row_result(kCols);\n  for (auto& page : dmat->GetBatches<EllpackPage>(&ctx, param)) {\n    auto impl = page.Impl();\n    EXPECT_EQ(impl->base_rowid, current_row);\n\n    for (size_t i = 0; i < impl->Size(); i++) {\n      impl->Visit(&ctx, {}, [&](auto&& acc) {\n        dh::LaunchN(kCols, ReadRowFunction(acc, current_row, row_d.data().get()));\n      });\n      thrust::copy(row_d.begin(), row_d.end(), row.begin());\n      result.Visit(&ctx, {}, [&](auto&& acc) {\n        dh::LaunchN(kCols, ReadRowFunction(acc, current_row, row_result_d.data().get()));\n      });\n      thrust::copy(row_result_d.begin(), row_result_d.end(), row_result.begin());\n\n      EXPECT_EQ(row, row_result);\n      current_row++;\n    }\n  }\n}\n\nnamespace {\n// Test for treating sparse ellpack as a dense\nclass CompressedDense : public ::testing::TestWithParam<std::size_t> {\n  auto InitSparsePage(std::size_t null_column) const {\n    bst_idx_t n_samples = 16, n_features = 8;\n    std::vector<float> data(n_samples * n_features);\n\n    std::iota(data.begin(), data.end(), 0.0f);\n    for (std::size_t i = 0; i < data.size(); i += n_features) {\n      data[i + null_column] = std::numeric_limits<float>::quiet_NaN();\n    }\n    data[null_column] = null_column;  // keep the first sample full.\n    auto p_fmat = GetDMatrixFromData(data, n_samples, n_features);\n    return p_fmat;\n  }\n\n  void CheckBasic(Context const* ctx, BatchParam batch, std::size_t null_column,\n                  EllpackPageImpl const& impl) {\n    ASSERT_FALSE(impl.IsDense());\n    ASSERT_TRUE(impl.IsDenseCompressed());\n    ASSERT_EQ(impl.NumSymbols(), batch.max_bin + 1);\n\n    std::vector<common::CompressedByteT> h_gidx;\n    impl.VisitOnHost(ctx, [&](auto&& h_acc) {\n      ASSERT_EQ(h_acc.row_stride, h_acc.NumFeatures());\n      ASSERT_EQ(h_acc.NullValue(), batch.max_bin);\n      for (std::size_t i = 0; i < h_acc.row_stride * h_acc.n_rows; ++i) {\n        auto [m, n] = linalg::UnravelIndex(i, h_acc.n_rows, h_acc.row_stride);\n        if (n == null_column && m != 0) {\n          ASSERT_EQ(static_cast<std::int32_t>(h_acc.gidx_iter[i]), h_acc.NullValue());\n        } else {\n          ASSERT_EQ(static_cast<std::int32_t>(h_acc.gidx_iter[i]), m);\n        }\n      }\n    });\n  }\n\n public:\n  void CheckFromSparsePage(std::size_t null_column) {\n    auto p_fmat = this->InitSparsePage(null_column);\n    auto ctx = MakeCUDACtx(0);\n    auto batch = BatchParam{static_cast<bst_bin_t>(p_fmat->Info().num_row_),\n                            std::numeric_limits<float>::quiet_NaN()};\n\n    for (auto const& ellpack : p_fmat->GetBatches<EllpackPage>(&ctx, batch)) {\n      auto impl = ellpack.Impl();\n      this->CheckBasic(&ctx, batch, null_column, *impl);\n    }\n  }\n\n  void CheckFromAdapter(std::size_t null_column) {\n    bst_idx_t n_samples = 16, n_features = 8;\n\n    auto ctx = MakeCUDACtx(0);\n    HostDeviceVector<float> data(n_samples * n_features, 0.0f, ctx.Device());\n    auto& h_data = data.HostVector();\n    std::iota(h_data.begin(), h_data.end(), 0.0f);\n    for (std::size_t i = 0; i < h_data.size(); i += n_features) {\n      h_data[i + null_column] = std::numeric_limits<float>::quiet_NaN();\n    }\n    h_data[null_column] = null_column;  // Keep the first sample full.\n    auto p_fmat = GetDMatrixFromData(h_data, n_samples, n_features);\n\n    data.ConstDeviceSpan();  // Pull to device\n    auto arri = GetArrayInterface(&data, n_samples, n_features);\n    auto sarri = Json::Dump(arri);\n    data::CupyAdapter adapter{StringView{sarri}};\n\n    Context cpu_ctx;\n    auto batch = BatchParam{static_cast<bst_bin_t>(p_fmat->Info().num_row_), 0.8};\n\n    std::shared_ptr<common::HistogramCuts> cuts;\n    for (auto const& page : p_fmat->GetBatches<GHistIndexMatrix>(&cpu_ctx, batch)) {\n      cuts = std::make_shared<common::HistogramCuts>(page.Cuts());\n    }\n    dh::device_vector<bst_idx_t> row_counts(n_samples, n_features - 1);\n    row_counts[0] = n_features;\n    auto d_row_counts = dh::ToSpan(row_counts);\n    ASSERT_EQ(adapter.NumColumns(), n_features);\n    auto impl =\n        EllpackPageImpl{&ctx,       adapter.Value(), std::numeric_limits<float>::quiet_NaN(),\n                        false,      d_row_counts,    {},\n                        n_features, n_samples,       cuts};\n    this->CheckBasic(&ctx, batch, null_column, impl);\n    curt::DefaultStream().Sync();\n  }\n\n  void CheckFromToGHist(std::size_t null_column) {\n    Context cpu_ctx;\n    auto ctx = MakeCUDACtx(0);\n    std::vector<std::uint8_t> orig;\n    {\n      // Test from GHist\n      auto p_fmat = this->InitSparsePage(null_column);\n      auto batch = BatchParam{static_cast<bst_bin_t>(p_fmat->Info().num_row_), 0.8};\n      for (auto const& page : p_fmat->GetBatches<GHistIndexMatrix>(&cpu_ctx, batch)) {\n        orig = {page.data.cbegin(), page.data.cend()};\n        auto impl = EllpackPageImpl{&ctx, page, {}};\n        this->CheckBasic(&ctx, batch, null_column, impl);\n      }\n    }\n\n    {\n      // Test to GHist\n      auto p_fmat = this->InitSparsePage(null_column);\n      auto batch = BatchParam{static_cast<bst_bin_t>(p_fmat->Info().num_row_), 0.8};\n      for (auto const& page : p_fmat->GetBatches<EllpackPage>(&ctx, batch)) {\n        auto gidx = GHistIndexMatrix{&ctx, p_fmat->Info(), page, batch};\n        ASSERT_EQ(gidx.Size(), p_fmat->Info().num_row_);\n        for (std::size_t ridx = 0; ridx < gidx.Size(); ++ridx) {\n          auto rbegin = gidx.row_ptr[ridx];\n          auto rend = gidx.row_ptr[ridx + 1];\n          if (ridx == 0) {\n            ASSERT_EQ(rend - rbegin, p_fmat->Info().num_col_);\n          } else {\n            ASSERT_EQ(rend - rbegin, p_fmat->Info().num_col_ - 1);\n          }\n        }\n        // GHist can't compress a dataset with missing values\n        ASSERT_FALSE(gidx.index.Offset());\n        ASSERT_TRUE(std::equal(gidx.data.cbegin(), gidx.data.cend(), orig.cbegin()));\n      }\n    }\n  }\n};\n\nTEST_P(CompressedDense, FromSparsePage) { this->CheckFromSparsePage(this->GetParam()); }\n\nTEST_P(CompressedDense, FromAdapter) { this->CheckFromAdapter(this->GetParam()); }\n\nTEST_P(CompressedDense, FromToGHist) { this->CheckFromToGHist(this->GetParam()); }\n}  // anonymous namespace\n\nINSTANTIATE_TEST_SUITE_P(EllpackPage, CompressedDense, testing::Values(0ul, 1ul, 7ul));\n\nnamespace {\nclass SparseEllpack : public testing::TestWithParam<float> {\n protected:\n  void TestFromGHistIndex(float sparsity) const {\n    // Only testing with small sample size as the cuts might be different between host and\n    // device.\n    size_t n_samples{128}, n_features{13};\n    Context ctx;\n    Context gpu_ctx{MakeCUDACtx(0)};\n    auto Xy = RandomDataGenerator{n_samples, n_features, sparsity}.GenerateDMatrix(true);\n    std::unique_ptr<EllpackPageImpl> from_ghist;\n    ASSERT_TRUE(Xy->SingleColBlock());\n\n    for (auto const& page : Xy->GetBatches<GHistIndexMatrix>(&ctx, BatchParam{17, 0.6})) {\n      from_ghist.reset(new EllpackPageImpl{&gpu_ctx, page, {}});\n    }\n\n    for (auto const& page : Xy->GetBatches<EllpackPage>(\n             &gpu_ctx, BatchParam{17, tree::TrainParam::DftSparseThreshold()})) {\n      auto from_sparse_page = page.Impl();\n      ASSERT_EQ(from_sparse_page->is_dense, from_ghist->is_dense);\n      ASSERT_EQ(from_sparse_page->base_rowid, 0);\n      ASSERT_EQ(from_sparse_page->base_rowid, from_ghist->base_rowid);\n      ASSERT_EQ(from_sparse_page->n_rows, from_ghist->n_rows);\n      ASSERT_EQ(from_sparse_page->gidx_buffer.size(), from_ghist->gidx_buffer.size());\n      ASSERT_EQ(from_sparse_page->NumSymbols(), from_ghist->NumSymbols());\n      std::vector<common::CompressedByteT> h_gidx_from_sparse, h_gidx_from_ghist;\n      auto from_ghist_acc = from_ghist->GetHostEllpack(&gpu_ctx, &h_gidx_from_ghist);\n      auto from_sparse_acc = from_sparse_page->GetHostEllpack(&gpu_ctx, &h_gidx_from_sparse);\n      std::visit(\n          [&](auto&& from_ghist_acc, auto&& from_sparse_acc) {\n            for (size_t i = 0; i < from_ghist->n_rows * from_ghist->info.row_stride; ++i) {\n              ASSERT_EQ(from_ghist_acc.gidx_iter[i], from_sparse_acc.gidx_iter[i]);\n            }\n          },\n          from_ghist_acc, from_sparse_acc);\n    }\n  }\n\n  void TestNumNonMissing(float sparsity) const {\n    size_t n_samples{1024}, n_features{13};\n    auto ctx = MakeCUDACtx(0);\n    auto p_fmat = RandomDataGenerator{n_samples, n_features, sparsity}.GenerateDMatrix(true);\n    auto nnz = p_fmat->Info().num_nonzero_;\n    for (auto const& page : p_fmat->GetBatches<EllpackPage>(\n             &ctx, BatchParam{17, tree::TrainParam::DftSparseThreshold()})) {\n      auto ellpack_nnz =\n          page.Impl()->NumNonMissing(&ctx, p_fmat->Info().feature_types.ConstDeviceSpan());\n      ASSERT_EQ(nnz, ellpack_nnz);\n    }\n  }\n};\n}  // namespace\n\nTEST_P(SparseEllpack, FromGHistIndex) { this->TestFromGHistIndex(GetParam()); }\n\nTEST_P(SparseEllpack, NumNonMissing) { this->TestNumNonMissing(this->GetParam()); }\n\nINSTANTIATE_TEST_SUITE_P(EllpackPage, SparseEllpack, ::testing::Values(.0f, .2f, .4f, .8f));\n\nTEST(EllpackPage, IsDense) {\n  auto test = [](float sparsity) {\n    auto p_fmat = RandomDataGenerator{64, 16, sparsity}.GenerateDMatrix();\n    auto p = BatchParam{16, tree::TrainParam::DftSparseThreshold()};\n    auto ctx = MakeCUDACtx(0);\n    for (auto const& page : p_fmat->GetBatches<EllpackPage>(&ctx, p)) {\n      page.Impl()->Visit(&ctx, {}, [&](auto&& d_acc) {\n        if (sparsity == 0.0) {\n          ASSERT_EQ(d_acc.IsDense(), page.Impl()->IsDense());\n          ASSERT_TRUE(d_acc.IsDense());\n          ASSERT_EQ(p.max_bin, d_acc.NullValue());\n        } else {\n          ASSERT_FALSE(d_acc.IsDense());\n          ASSERT_EQ(p.max_bin * p_fmat->Info().num_col_, d_acc.NullValue());\n        }\n      });\n\n      page.Impl()->VisitOnHost(&ctx, [&](auto&& h_acc) {\n        if (sparsity == 0.0) {\n          ASSERT_TRUE(h_acc.IsDense());\n        } else {\n          ASSERT_FALSE(h_acc.IsDense());\n        }\n      });\n    }\n  };\n  test(0.0);\n  test(0.5);\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/data/test_ellpack_page_raw_format.cu",
    "content": "/**\n * Copyright 2021-2025, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/data.h>\n\n#include \"../../../src/data/batch_utils.h\"              // for AutoHostRatio\n#include \"../../../src/data/ellpack_page.cuh\"           // for EllpackPage, GetRowStride\n#include \"../../../src/data/ellpack_page_raw_format.h\"  // for EllpackPageRawFormat\n#include \"../../../src/data/ellpack_page_source.h\"      // for EllpackFormatStreamPolicy\n#include \"../../../src/tree/param.h\"                    // for TrainParam\n#include \"../filesystem.h\"                              // for TemporaryDirectory\n#include \"../helpers.h\"\n\nnamespace xgboost::data {\nnamespace {\n[[nodiscard]] EllpackCacheInfo CInfoForTest(Context const *ctx, DMatrix *Xy, bst_idx_t row_stride,\n                                            BatchParam param,\n                                            std::shared_ptr<common::HistogramCuts const> cuts) {\n  EllpackCacheInfo cinfo{param, ::xgboost::cuda_impl::AutoHostRatio(),\n                         std::numeric_limits<float>::quiet_NaN()};\n  ExternalDataInfo ext_info;\n  ext_info.n_batches = 1;\n  ext_info.row_stride = row_stride;\n  ext_info.base_rowids.push_back(Xy->Info().num_row_);\n\n  CalcCacheMapping(ctx, Xy->IsDense(), cuts, 0, ext_info, false, &cinfo);\n  CHECK_EQ(ext_info.n_batches, cinfo.cache_mapping.size());\n  if (cinfo.NumBatchesCc() == 1) {\n    EXPECT_EQ(cinfo.cache_host_ratio, 0.0);\n    cinfo.cache_host_ratio = 1.0;  // We test the host cache.\n  }\n  return cinfo;\n}\n\nclass TestEllpackPageRawFormat : public ::testing::TestWithParam<bool> {\n public:\n  template <typename FormatStreamPolicy>\n  void Run(FormatStreamPolicy *p_policy, bool prefetch_copy) {\n    auto &policy = *p_policy;\n    auto ctx = MakeCUDACtx(0);\n    auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};\n    param.prefetch_copy = prefetch_copy;\n\n    auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix();\n    common::TemporaryDirectory tmpdir;\n    std::string path = tmpdir.Str() + \"/ellpack.page\";\n\n    std::shared_ptr<common::HistogramCuts const> cuts;\n    for (auto const &page : m->GetBatches<EllpackPage>(&ctx, param)) {\n      cuts = page.Impl()->CutsShared();\n    }\n\n    ASSERT_EQ(cuts->cut_values_.Device(), ctx.Device());\n    ASSERT_TRUE(cuts->cut_values_.DeviceCanRead());\n\n    auto row_stride = GetRowStride(m.get());\n    EllpackCacheInfo cinfo = CInfoForTest(&ctx, m.get(), row_stride, param, cuts);\n    policy.SetCuts(cuts, ctx.Device(), cinfo);\n\n    std::unique_ptr<EllpackPageRawFormat> format{policy.CreatePageFormat(param)};\n\n    std::size_t n_bytes{0};\n    {\n      auto fo = policy.CreateWriter(StringView{path}, 0);\n      for (auto const &ellpack : m->GetBatches<EllpackPage>(&ctx, param)) {\n        n_bytes += format->Write(ellpack, fo.get());\n      }\n    }\n\n    EllpackPage page;\n    auto fi = policy.CreateReader(StringView{path}, static_cast<bst_idx_t>(0), n_bytes);\n    ASSERT_TRUE(format->Read(&page, fi.get()));\n\n    for (auto const &ellpack : m->GetBatches<EllpackPage>(&ctx, param)) {\n      auto loaded = page.Impl();\n      auto orig = ellpack.Impl();\n      ASSERT_EQ(loaded->Cuts().Ptrs(), orig->Cuts().Ptrs());\n      ASSERT_EQ(loaded->Cuts().Values(), orig->Cuts().Values());\n      ASSERT_EQ(loaded->base_rowid, orig->base_rowid);\n      ASSERT_EQ(loaded->info.row_stride, orig->info.row_stride);\n      std::vector<common::CompressedByteT> h_loaded, h_orig;\n      [[maybe_unused]] auto h_loaded_acc = loaded->GetHostEllpack(&ctx, &h_loaded);\n      [[maybe_unused]] auto h_orig_acc = orig->GetHostEllpack(&ctx, &h_orig);\n      ASSERT_EQ(h_loaded, h_orig);\n    }\n  }\n};\n}  // anonymous namespace\n\nTEST_P(TestEllpackPageRawFormat, DiskIO) {\n  EllpackMmapStreamPolicy<EllpackPage, EllpackFormatPolicy> policy{false};\n  this->Run(&policy, this->GetParam());\n}\n\nTEST_P(TestEllpackPageRawFormat, DiskIOHmm) {\n  if (curt::SupportsPageableMem()) {\n    EllpackMmapStreamPolicy<EllpackPage, EllpackFormatPolicy> policy{true};\n    this->Run(&policy, this->GetParam());\n  } else {\n    GTEST_SKIP_(\"HMM is not supported.\");\n  }\n}\n\nTEST_P(TestEllpackPageRawFormat, HostIO) {\n  {\n    EllpackCacheStreamPolicy<EllpackPage, EllpackFormatPolicy> policy;\n    this->Run(&policy, this->GetParam());\n  }\n  {\n    auto ctx = MakeCUDACtx(0);\n    auto param = BatchParam{32, tree::TrainParam::DftSparseThreshold()};\n    param.n_prefetch_batches = 1;\n    param.prefetch_copy = this->GetParam();\n\n    EllpackCacheStreamPolicy<EllpackPage, EllpackFormatPolicy> policy;\n    std::unique_ptr<EllpackPageRawFormat> format{};\n    Cache cache{false, \"name\", \"ellpack\", true};\n    for (std::size_t i = 0; i < 3; ++i) {\n      auto p_fmat = RandomDataGenerator{100, 14, 0.5}.Seed(i).GenerateDMatrix();\n      for (auto const &page : p_fmat->GetBatches<EllpackPage>(&ctx, param)) {\n        if (!format) {\n          auto n_cache_bytes = page.Impl()->MemCostBytes() * 3;\n          // Prepare the mapping info.\n          auto [cache_host_ratio, min_cache_page_bytes] = detail::DftPageSizeHostRatio(\n              n_cache_bytes, false, 1.0, ::xgboost::cuda_impl::AutoCachePageBytes());\n          EllpackCacheInfo cinfo{param, cache_host_ratio, std::numeric_limits<float>::quiet_NaN()};\n          for (std::size_t i = 0; i < 3; ++i) {\n            cinfo.cache_mapping.push_back(i);\n            cinfo.buffer_bytes.push_back(page.Impl()->MemCostBytes());\n            cinfo.buffer_rows.push_back(page.Impl()->n_rows);\n          }\n          policy.SetCuts(page.Impl()->CutsShared(), ctx.Device(), std::move(cinfo));\n          format = policy.CreatePageFormat(param);\n        }\n        auto writer = policy.CreateWriter({}, i);\n        auto n_bytes = format->Write(page, writer.get());\n        ASSERT_EQ(n_bytes, page.Impl()->MemCostBytes());\n        cache.Push(n_bytes);\n      }\n    }\n    cache.Commit();\n\n    for (std::size_t i = 0; i < 3; ++i) {\n      auto reader = policy.CreateReader({}, cache.offset[i], cache.Bytes(i));\n      EllpackPage page;\n      ASSERT_TRUE(format->Read(&page, reader.get()));\n      ASSERT_EQ(page.Impl()->MemCostBytes(), cache.Bytes(i));\n      auto p_fmat = RandomDataGenerator{100, 14, 0.5}.Seed(i).GenerateDMatrix();\n      for (auto const &orig : p_fmat->GetBatches<EllpackPage>(&ctx, param)) {\n        std::vector<common::CompressedByteT> h_orig;\n        auto h_acc_orig = orig.Impl()->GetHostEllpack(&ctx, &h_orig, {});\n        std::vector<common::CompressedByteT> h_page;\n        auto h_acc = page.Impl()->GetHostEllpack(&ctx, &h_page, {});\n        ASSERT_EQ(h_orig, h_page);\n        std::visit(\n            [&](auto &&h_acc_orig, auto &&h_acc) {\n              ASSERT_EQ(h_acc_orig.NumFeatures(), h_acc.NumFeatures());\n              ASSERT_EQ(h_acc_orig.row_stride, h_acc.row_stride);\n              ASSERT_EQ(h_acc_orig.n_rows, h_acc.n_rows);\n              ASSERT_EQ(h_acc_orig.base_rowid, h_acc.base_rowid);\n              ASSERT_EQ(h_acc_orig.IsDenseCompressed(), h_acc.IsDenseCompressed());\n              ASSERT_EQ(h_acc_orig.NullValue(), h_acc.NullValue());\n            },\n            h_acc_orig, h_acc);\n      }\n    }\n  }\n}\n\nINSTANTIATE_TEST_SUITE_P(EllpackPageRawFormat, TestEllpackPageRawFormat, ::testing::Bool());\n\nTEST(EllpackPageRawFormat, DevicePageConcat) {\n  auto ctx = MakeCUDACtx(0);\n  auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};\n  bst_idx_t n_features = 16, n_samples = 128;\n\n  auto test = [&](std::int64_t min_cache_page_bytes, float cache_host_ratio) {\n    EllpackCacheInfo cinfo{param, cache_host_ratio, std::numeric_limits<float>::quiet_NaN()};\n    ExternalDataInfo ext_info;\n\n    ext_info.n_batches = 8;\n    ext_info.row_stride = n_features;\n    for (bst_idx_t i = 0; i < ext_info.n_batches; ++i) {\n      ext_info.base_rowids.push_back(n_samples);\n    }\n    std::partial_sum(ext_info.base_rowids.cbegin(), ext_info.base_rowids.cend(),\n                     ext_info.base_rowids.begin());\n    ext_info.accumulated_rows = n_samples * ext_info.n_batches;\n    ext_info.nnz = ext_info.accumulated_rows * n_features;\n\n    auto p_fmat = RandomDataGenerator{n_samples, n_features, 0}.Seed(0).GenerateDMatrix();\n    EllpackCacheStreamPolicy<EllpackPage, EllpackFormatPolicy> policy;\n\n    for (auto const &page : p_fmat->GetBatches<EllpackPage>(&ctx, param)) {\n      auto cuts = page.Impl()->CutsShared();\n      EXPECT_TRUE(page.Impl()->IsDense());\n      CalcCacheMapping(&ctx, page.Impl()->IsDense(), cuts, min_cache_page_bytes, ext_info, false,\n                       &cinfo);\n      if (min_cache_page_bytes == ::xgboost::cuda_impl::MatchingPageBytes()) {\n        EXPECT_EQ(cinfo.NumBatchesCc(), ext_info.n_batches);\n      } else {\n        EXPECT_EQ(cinfo.buffer_rows.size(), 4ul);\n      }\n      policy.SetCuts(page.Impl()->CutsShared(), ctx.Device(), std::move(cinfo));\n    }\n\n    auto format = policy.CreatePageFormat(param);\n\n    // write multipe identical pages\n    std::size_t n_gidx_total_bytes = 0;\n    for (bst_idx_t i = 0; i < ext_info.n_batches; ++i) {\n      for (auto const &page : p_fmat->GetBatches<EllpackPage>(&ctx, param)) {\n        auto writer = policy.CreateWriter({}, i);\n        [[maybe_unused]] auto n_bytes = format->Write(page, writer.get());\n        n_gidx_total_bytes += page.Impl()->gidx_buffer.size_bytes();\n      }\n    }\n    // check correct concatenation.\n    auto mem_cache = policy.Share();\n    EXPECT_EQ(mem_cache->GidxSizeBytes(), n_gidx_total_bytes);\n    return mem_cache;\n  };\n\n  {\n    auto mem_cache =\n        test(::xgboost::cuda_impl::MatchingPageBytes(), ::xgboost::cuda_impl::AutoHostRatio());\n    ASSERT_EQ(mem_cache->d_pages.size(), 8);\n  }\n  {\n    auto mem_cache = test(n_features * n_samples, ::xgboost::cuda_impl::AutoHostRatio());\n    ASSERT_EQ(mem_cache->h_pages.size(), 4);\n    ASSERT_EQ(mem_cache->d_pages.size(), 4);\n    ASSERT_FALSE(mem_cache->d_pages[0].empty());\n  }\n  {\n    float cache_host_ratio = 0.65;\n    auto mem_cache = test(n_features * n_samples, cache_host_ratio);\n    ASSERT_EQ(mem_cache->h_pages.size(), 4);\n    ASSERT_EQ(mem_cache->d_pages.size(), 4);\n    ASSERT_FALSE(mem_cache->d_pages[0].empty());\n    auto n_total_bytes = mem_cache->SizeBytes();\n    ASSERT_LT(mem_cache->DeviceSizeBytes(), n_total_bytes - (n_total_bytes * cache_host_ratio));\n    ASSERT_GT(mem_cache->DeviceSizeBytes(), n_total_bytes - (n_total_bytes * 0.7));\n  }\n}\n}  // namespace xgboost::data\n"
  },
  {
    "path": "tests/cpp/data/test_extmem_quantile_dmatrix.cc",
    "content": "/**\n * Copyright 2024, XGBoost Contributors\n */\n#include \"test_extmem_quantile_dmatrix.h\"  // for TestExtMemQdmBasic\n\n#include <gtest/gtest.h>\n#include <xgboost/data.h>  // for BatchParam\n\n#include <algorithm>  // for equal\n\n#include \"../../../src/common/column_matrix.h\"  // for ColumnMatrix\n#include \"../../../src/data/gradient_index.h\"   // for GHistIndexMatrix\n#include \"../../../src/tree/param.h\"            // for TrainParam\n\nnamespace xgboost::data {\nnamespace {\nclass ExtMemQuantileDMatrixCpu : public ::testing::TestWithParam<float> {\n public:\n  void Run(float sparsity) {\n    auto equal = [](Context const*, GHistIndexMatrix const& orig, GHistIndexMatrix const& sparse) {\n      // Check the CSR matrix\n      auto orig_cuts = orig.Cuts();\n      auto sparse_cuts = sparse.Cuts();\n      ASSERT_EQ(orig_cuts.Values(), sparse_cuts.Values());\n      ASSERT_EQ(orig_cuts.Ptrs(), sparse_cuts.Ptrs());\n\n      auto orig_ptr = orig.data.data();\n      auto sparse_ptr = sparse.data.data();\n      ASSERT_EQ(orig.data.size(), sparse.data.size());\n\n      auto equal = std::equal(orig_ptr, orig_ptr + orig.data.size(), sparse_ptr);\n      ASSERT_TRUE(equal);\n\n      // Check the column matrix\n      common::ColumnMatrix const& orig_columns = orig.Transpose();\n      common::ColumnMatrix const& sparse_columns = sparse.Transpose();\n\n      std::string str_orig, str_sparse;\n      common::AlignedMemWriteStream fo_orig{&str_orig}, fo_sparse{&str_sparse};\n      auto n_bytes_orig = orig_columns.Write(&fo_orig);\n      auto n_bytes_sparse = sparse_columns.Write(&fo_sparse);\n      ASSERT_EQ(n_bytes_orig, n_bytes_sparse);\n      ASSERT_EQ(str_orig, str_sparse);\n    };\n\n    Context ctx;\n    TestExtMemQdmBasic<GHistIndexMatrix>(\n        &ctx, false, sparsity, equal, [](GHistIndexMatrix const& page) { return page.IsDense(); });\n  }\n};\n}  // anonymous namespace\n\nTEST_P(ExtMemQuantileDMatrixCpu, Basic) { this->Run(this->GetParam()); }\n\nINSTANTIATE_TEST_SUITE_P(ExtMemQuantileDMatrix, ExtMemQuantileDMatrixCpu, ::testing::ValuesIn([] {\n                           std::vector<float> sparsities{\n                               0.0f, tree::TrainParam::DftSparseThreshold(), 0.4f, 0.8f};\n                           return sparsities;\n                         }()));\n}  // namespace xgboost::data\n"
  },
  {
    "path": "tests/cpp/data/test_extmem_quantile_dmatrix.cu",
    "content": "/**\n * Copyright 2024-2025, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/data.h>  // for BatchParam\n\n#include <tuple>   // for tuple\n#include <vector>  // for vector\n\n#include \"../../../src/data/batch_utils.h\"     // for AutoHostRatio\n#include \"../../../src/data/ellpack_page.cuh\"  // for EllpackPageImpl\n#include \"../helpers.h\"                        // for RandomDataGenerator, GMockThrow\n#include \"test_extmem_quantile_dmatrix.h\"      // for TestExtMemQdmBasic\n\nnamespace xgboost::data {\nauto AssertEllpackEq(Context const* ctx, EllpackPageImpl const* lhs, EllpackPageImpl const* rhs) {\n  ASSERT_EQ(lhs->n_rows, rhs->n_rows);\n  ASSERT_EQ(lhs->info.row_stride, rhs->info.row_stride);\n  ASSERT_EQ(lhs->info.n_symbols, rhs->info.n_symbols);\n  ASSERT_EQ(lhs->gidx_buffer.size(), rhs->gidx_buffer.size());\n\n  ASSERT_EQ(lhs->Cuts().Values(), rhs->Cuts().Values());\n  ASSERT_EQ(lhs->Cuts().Ptrs(), rhs->Cuts().Ptrs());\n\n  std::vector<common::CompressedByteT> h_buf, d_buf;\n  auto h_acc = rhs->GetHostEllpack(ctx, &h_buf);\n  auto d_acc = rhs->GetHostEllpack(ctx, &d_buf);\n  std::visit(\n      [&](auto&& h_acc, auto&& d_acc) {\n        for (std::size_t i = 0; i < h_acc.n_rows * h_acc.row_stride; ++i) {\n          ASSERT_EQ(h_acc.gidx_iter[i], d_acc.gidx_iter[i]);\n        }\n      },\n      h_acc, d_acc);\n}\n\nclass ExtMemQuantileDMatrixGpu : public ::testing::TestWithParam<std::tuple<float, bool>> {\n public:\n  void Run(float sparsity, bool on_host) {\n    auto equal = [](Context const* ctx, EllpackPage const& orig, EllpackPage const& sparse) {\n      AssertEllpackEq(ctx, orig.Impl(), sparse.Impl());\n    };\n    auto no_missing = [](EllpackPage const& page) {\n      return page.Impl()->IsDense();\n    };\n\n    auto ctx = MakeCUDACtx(0);\n    TestExtMemQdmBasic<EllpackPage>(&ctx, on_host, sparsity, equal, no_missing);\n  }\n};\n\nTEST_P(ExtMemQuantileDMatrixGpu, Basic) {\n  auto [sparsity, on_host] = this->GetParam();\n  this->Run(sparsity, on_host);\n}\n\nINSTANTIATE_TEST_SUITE_P(ExtMemQuantileDMatrix, ExtMemQuantileDMatrixGpu,\n                         ::testing::Combine(::testing::Values(0.0f, 0.2f, 0.4f, 0.8f),\n                                            ::testing::Bool()));\n\nclass EllpackHostCacheTest : public ::testing::TestWithParam<std::tuple<double, bool, float>> {\n public:\n  static constexpr bst_idx_t NumSamples() { return 8192; }\n  static constexpr bst_idx_t NumFeatures() { return 4; }\n  static constexpr bst_bin_t NumBins() { return 256; }\n  // Assumes dense\n  static constexpr bst_idx_t NumBytes() { return NumFeatures() * NumSamples(); }\n\n  void Run(float sparsity, bool is_concat, float cache_host_ratio) {\n    auto ctx = MakeCUDACtx(0);\n    auto param = BatchParam{NumBins(), tree::TrainParam::DftSparseThreshold()};\n    auto n_batches = 4;\n    auto p_fmat = RandomDataGenerator{NumSamples(), NumFeatures(), sparsity}\n                      .Device(ctx.Device())\n                      .GenerateDMatrix();\n    bst_idx_t min_page_cache_bytes = ::xgboost::cuda_impl::MatchingPageBytes();\n    if (is_concat) {\n      min_page_cache_bytes =\n          p_fmat->GetBatches<EllpackPage>(&ctx, param).begin().Page()->Impl()->MemCostBytes() / 3;\n    }\n\n    auto p_ext_fmat = RandomDataGenerator{NumSamples(), NumFeatures(), sparsity}\n                          .Batches(n_batches)\n                          .Bins(param.max_bin)\n                          .Device(ctx.Device())\n                          .OnHost(true)\n                          .MinPageCacheBytes(min_page_cache_bytes)\n                          .CacheHostRatio(cache_host_ratio)\n                          .GenerateExtMemQuantileDMatrix(\"temp\", true);\n    if (!is_concat) {\n      ASSERT_EQ(p_ext_fmat->NumBatches(), n_batches);\n    } else {\n      ASSERT_EQ(p_ext_fmat->NumBatches(), n_batches / 2);\n    }\n    ASSERT_EQ(p_fmat->Info().num_row_, p_ext_fmat->Info().num_row_);\n    for (auto const& page_s : p_fmat->GetBatches<EllpackPage>(&ctx, param)) {\n      auto impl_s = page_s.Impl();\n      auto cuts_s = impl_s->CutsShared();\n      auto new_impl = std::make_unique<EllpackPageImpl>(&ctx, cuts_s, sparsity == 0.0,\n                                                        impl_s->info.row_stride, impl_s->n_rows);\n      new_impl->CopyInfo(impl_s);\n      bst_idx_t offset = 0;\n      for (auto const& page_m : p_ext_fmat->GetBatches<EllpackPage>(&ctx, param)) {\n        auto impl_m = page_m.Impl();\n        offset += new_impl->Copy(&ctx, impl_m, offset);\n      }\n      AssertEllpackEq(&ctx, impl_s, new_impl.get());\n    }\n  }\n};\n\nTEST_P(EllpackHostCacheTest, Basic) {\n  auto [sparsity, is_concat, cache_host_ratio] = this->GetParam();\n  this->Run(sparsity, is_concat, cache_host_ratio);\n}\n\nINSTANTIATE_TEST_SUITE_P(\n    ExtMemQuantileDMatrix, EllpackHostCacheTest,\n    ::testing::Combine(::testing::Values(0.0f, 0.2f, 0.4f, 0.8f), ::testing::Bool(),\n                       ::testing::Values(0.0f, 0.5f, 1.0f, ::xgboost::cuda_impl::AutoHostRatio())));\n\nTEST(EllpackHostCacheTest, Accessor) {\n  auto ctx = MakeCUDACtx(0);\n  auto param = BatchParam{32, tree::TrainParam::DftSparseThreshold()};\n  param.prefetch_copy = false;\n  std::size_t n_bytes = 0;\n  {\n    auto p_ext_fmat = RandomDataGenerator{128, 16, 0.0}\n                          .Batches(4)\n                          .Bins(param.max_bin)\n                          .Device(ctx.Device())\n                          .OnHost(true)\n                          .MinPageCacheBytes(1024 * 1024 * 1024)\n                          .CacheHostRatio(0.0)\n                          .GenerateExtMemQuantileDMatrix(\"temp\", true);\n    ASSERT_EQ(p_ext_fmat->NumBatches(), 1);\n\n    for (auto const& page : p_ext_fmat->GetBatches<EllpackPage>(&ctx, param)) {\n      auto acc = page.Impl()->GetDeviceEllpack(&ctx, {});\n      // Fully on device\n      auto dacc = std::get_if<EllpackDeviceAccessor>(&acc);\n      ASSERT_TRUE(dacc);\n      n_bytes = page.Impl()->MemCostBytes();\n    }\n  }\n  if (!curt::SupportsPageableMem()) {\n    GTEST_SKIP_(\"Requires HMM or ATS.\");\n  }\n  {\n    std::size_t n_pages = 2;  // split for 2 pages\n    auto p_ext_fmat = RandomDataGenerator{128, 16, 0.0}\n                          .Batches(4)\n                          .Bins(param.max_bin)\n                          .Device(ctx.Device())\n                          .OnHost(true)\n                          .MinPageCacheBytes(n_bytes / n_pages)\n                          .CacheHostRatio(0.5)\n                          .GenerateExtMemQuantileDMatrix(\"temp\", true);\n    ASSERT_EQ(p_ext_fmat->NumBatches(), n_pages);\n    for (auto const& page : p_ext_fmat->GetBatches<EllpackPage>(&ctx, param)) {\n      auto acc = page.Impl()->GetDeviceEllpack(&ctx, {});\n      // Host + device\n      auto dacc = std::get_if<DoubleEllpackAccessor>(&acc);\n      ASSERT_TRUE(dacc);\n    }\n  }\n}\n\nclass EllpackDecompTest : public ::testing::TestWithParam<float> {\n public:\n  void Run(float hw_decomp_ratio) {\n    auto ctx = MakeCUDACtx(0);\n    auto param = BatchParam{128, tree::TrainParam::DftSparseThreshold()};\n    std::size_t n_samples = 8192, n_features = 512;\n    float sparsity = 0.6;\n    auto full_p_fmat = RandomDataGenerator{n_samples, n_features, sparsity}\n                           .Batches(4)\n                           .Bins(param.max_bin)\n                           .Device(ctx.Device())\n                           .HwDecompRatio(0.0)\n                           .OnHost(true)\n                           .MinPageCacheBytes(n_samples * n_features / 4)\n                           .CacheHostRatio(0.8)\n                           .GenerateExtMemQuantileDMatrix(\"temp\", false);\n\n    auto comp_p_fmat = RandomDataGenerator{n_samples, n_features, sparsity}\n                           .Batches(4)\n                           .Bins(param.max_bin)\n                           .Device(ctx.Device())\n                           .HwDecompRatio(hw_decomp_ratio)\n                           .OnHost(true)\n                           .MinPageCacheBytes(n_samples * n_features / 4)\n                           .CacheHostRatio(0.8)\n                           .GenerateExtMemQuantileDMatrix(\"temp\", false);\n\n    auto get_pages = [&](std::shared_ptr<DMatrix> p_fmat) {\n      std::vector<std::shared_ptr<EllpackPage const>> pages;\n      auto it = p_fmat->GetBatches<EllpackPage>(&ctx, param).begin();\n      while (!it.AtEnd()) {\n        auto page = it.Page();\n        EXPECT_FALSE(page->Impl()->IsDenseCompressed());\n        pages.emplace_back(std::move(page));\n        ++it;\n      }\n      return pages;\n    };\n\n    std::vector<std::shared_ptr<EllpackPage const>> full_pages = get_pages(full_p_fmat);\n    std::vector<std::shared_ptr<EllpackPage const>> comp_pages = get_pages(comp_p_fmat);\n\n    ASSERT_EQ(full_pages.size(), comp_pages.size());\n    for (std::size_t i = 0, n = full_pages.size(); i < n; ++i) {\n      auto impl_f = full_pages[i]->Impl();\n      auto impl_c = comp_pages[i]->Impl();\n      ASSERT_EQ(impl_f->gidx_buffer.size(), impl_c->gidx_buffer.size());\n      ASSERT_EQ(impl_f->d_gidx_buffer.size(), impl_c->d_gidx_buffer.size());\n      ASSERT_EQ(impl_f->NumNonMissing(&ctx, {}), impl_c->NumNonMissing(&ctx, {}));\n\n      std::vector<common::CompressedByteT> buf_f;\n      [[maybe_unused]] auto acc_f = impl_f->GetHostEllpack(&ctx, &buf_f);\n\n      std::vector<common::CompressedByteT> buf_c;\n      [[maybe_unused]] auto acc_c = impl_c->GetHostEllpack(&ctx, &buf_c);\n\n      ASSERT_EQ(buf_f.size(), buf_c.size());\n      for (std::size_t i = 0, m = buf_f.size(); i < m; ++i) {\n        ASSERT_EQ(buf_f[i], buf_c[i]) << i;\n      }\n    }\n  }\n};\n\nTEST_P(EllpackDecompTest, Basic) {\n  auto ctx = MakeCUDACtx(0);\n  auto hw_decomp_ratio = this->GetParam();\n  this->Run(hw_decomp_ratio);\n}\n\nINSTANTIATE_TEST_SUITE_P(ExtMemQuantileDMatrix, EllpackDecompTest,\n                         ::testing::Values(1.0f, 0.1f, 0.5f, 0.0f));\n}  // namespace xgboost::data\n"
  },
  {
    "path": "tests/cpp/data/test_extmem_quantile_dmatrix.h",
    "content": "/**\n * Copyright 2024, XGBoost Contributors\n */\n#include <xgboost/base.h>\n#include <xgboost/context.h>\n\n#include \"../../../src/tree/param.h\"  // for TrainParam\n#include \"../helpers.h\"               // for RandomDataGenerator\n\nnamespace xgboost::data {\ntemplate <typename Page, typename Equal, typename NoMissing>\nvoid TestExtMemQdmBasic(Context const* ctx, bool on_host, float sparsity, Equal&& check_equal,\n                        NoMissing&& no_missing) {\n  bst_idx_t n_samples = 256, n_features = 16, n_batches = 4;\n  bst_bin_t max_bin = 64;\n  bst_target_t n_targets = 3;\n  BatchParam p{max_bin, tree::TrainParam::DftSparseThreshold()};\n\n  auto p_fmat = RandomDataGenerator{n_samples, n_features, sparsity}\n                    .Bins(max_bin)\n                    .Batches(n_batches)\n                    .Targets(n_targets)\n                    .Device(ctx->Device())\n                    .OnHost(on_host)\n                    .GenerateExtMemQuantileDMatrix(\"temp\", true);\n  ASSERT_FALSE(p_fmat->SingleColBlock());\n\n  // Loop over the batches and count the number of pages\n  bst_idx_t batch_cnt = 0, base_cnt = 0, row_cnt = 0;\n  for (auto const& page : p_fmat->GetBatches<Page>(ctx, p)) {\n    ASSERT_EQ(page.BaseRowId(), base_cnt);\n    ++batch_cnt;\n    base_cnt += n_samples / n_batches;\n    row_cnt += page.Size();\n    ASSERT_EQ((sparsity == 0.0f), no_missing(page));\n  }\n  ASSERT_EQ(n_batches, batch_cnt);\n  ASSERT_EQ(p_fmat->Info().num_row_, n_samples);\n  EXPECT_EQ(p_fmat->Info().num_row_, row_cnt);\n  ASSERT_EQ(p_fmat->Info().num_col_, n_features);\n  if (sparsity == 0.0f) {\n    ASSERT_EQ(p_fmat->Info().num_nonzero_, n_samples * n_features);\n  } else {\n    ASSERT_LT(p_fmat->Info().num_nonzero_, n_samples * n_features);\n    ASSERT_GT(p_fmat->Info().num_nonzero_, 0);\n  }\n  ASSERT_EQ(p_fmat->Info().labels.Shape(0), n_samples);\n  ASSERT_EQ(p_fmat->Info().labels.Shape(1), n_targets);\n\n  // Compare against the sparse page DMatrix\n  auto p_sparse = RandomDataGenerator{n_samples, n_features, sparsity}\n                      .Bins(max_bin)\n                      .Batches(n_batches)\n                      .Targets(n_targets)\n                      .Device(ctx->Device())\n                      .OnHost(on_host)\n                      .GenerateSparsePageDMatrix(\"temp\", true);\n  auto it = p_fmat->GetBatches<Page>(ctx, p).begin();\n  for (auto const& page : p_sparse->GetBatches<Page>(ctx, p)) {\n    auto orig = it.Page();\n    check_equal(ctx, *orig, page);\n    ++it;\n  }\n\n  // Check meta info\n  auto h_y_sparse = p_sparse->Info().labels.HostView();\n  auto h_y = p_fmat->Info().labels.HostView();\n  for (std::size_t i = 0, m = h_y_sparse.Shape(0); i < m; ++i) {\n    for (std::size_t j = 0, n = h_y_sparse.Shape(1); j < n; ++j) {\n      ASSERT_EQ(h_y(i, j), h_y_sparse(i, j));\n    }\n  }\n}\n}  // namespace xgboost::data\n"
  },
  {
    "path": "tests/cpp/data/test_file_iterator.cc",
    "content": "/**\n * Copyright 2021-2025, XGBoost contributors\n */\n#include <gtest/gtest.h>\n\n#include <any>  // for any_cast\n#include <memory>\n\n#include \"../../../src/data/adapter.h\"\n#include \"../../../src/data/file_iterator.h\"\n#include \"../../../src/data/proxy_dmatrix.h\"\n#include \"../filesystem.h\"  // for TemporaryDirectory\n#include \"../helpers.h\"\n\nnamespace xgboost::data {\nTEST(FileIterator, Basic) {\n  auto check_n_features = [](FileIterator *iter) {\n    size_t n_features = 0;\n    iter->Reset();\n    while (iter->Next()) {\n      auto proxy = MakeProxy(iter->Proxy());\n      auto csr = std::any_cast<std::shared_ptr<CSRArrayAdapter>>(proxy->Adapter());\n      n_features = std::max(n_features, csr->NumColumns());\n    }\n    ASSERT_EQ(n_features, 5);\n  };\n\n  common::TemporaryDirectory tmpdir;\n  {\n    auto zpath = tmpdir.Str() + \"/0-based.svm\";\n    CreateBigTestData(zpath, 3 * 64, true);\n    zpath += \"?indexing_mode=0&format=libsvm\";\n    FileIterator iter{zpath, 0, 1};\n    check_n_features(&iter);\n  }\n\n  {\n    auto opath = tmpdir.Str() + \"/1-based.svm\";\n    CreateBigTestData(opath, 3 * 64, false);\n    opath += \"?indexing_mode=1&format=libsvm\";\n    FileIterator iter{opath, 0, 1};\n    check_n_features(&iter);\n  }\n}\n}  // namespace xgboost::data\n"
  },
  {
    "path": "tests/cpp/data/test_gradient_index.cc",
    "content": "/**\n * Copyright 2021-2024, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/data.h>                       // for BatchIterator, BatchSet, DMatrix, BatchParam\n\n#include <algorithm>                            // for sort, unique\n#include <cmath>                                // for isnan\n#include <cstddef>                              // for size_t\n#include <limits>                               // for numeric_limits\n#include <memory>                               // for shared_ptr, __shared_ptr_access, unique_ptr\n#include <string>                               // for string\n#include <tuple>                                // for make_tuple, tie, tuple\n#include <utility>                              // for move\n#include <vector>                               // for vector\n\n#include \"../../../src/common/categorical.h\"    // for AsCat\n#include \"../../../src/common/column_matrix.h\"  // for ColumnMatrix\n#include \"../../../src/common/hist_util.h\"      // for Index, HistogramCuts, SketchOnDMatrix\n#include \"../../../src/common/io.h\"             // for MemoryBufferStream\n#include \"../../../src/data/adapter.h\"          // for SparsePageAdapterBatch\n#include \"../../../src/data/gradient_index.h\"   // for GHistIndexMatrix\n#include \"../../../src/tree/param.h\"            // for TrainParam\n#include \"../helpers.h\"                         // for GenerateRandomCategoricalSingleColumn...\n#include \"xgboost/base.h\"                       // for bst_bin_t\n#include \"xgboost/context.h\"                    // for Context\n#include \"xgboost/host_device_vector.h\"         // for HostDeviceVector\n\nnamespace xgboost::data {\nTEST(GradientIndex, ExternalMemoryBaseRowID) {\n  Context ctx;\n  auto p_fmat = RandomDataGenerator{4096, 256, 0.5}\n                    .Device(ctx.Device())\n                    .Batches(8)\n                    .GenerateSparsePageDMatrix(\"cache\", true);\n\n  std::vector<size_t> base_rowids;\n  std::vector<float> hessian(p_fmat->Info().num_row_, 1);\n  for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(&ctx, {64, hessian, true})) {\n    base_rowids.push_back(page.base_rowid);\n  }\n\n  std::size_t i = 0;\n  for (auto const &page : p_fmat->GetBatches<SparsePage>()) {\n    ASSERT_EQ(base_rowids[i], page.base_rowid);\n    ++i;\n  }\n\n  base_rowids.clear();\n  for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(&ctx, {64, hessian, false})) {\n    base_rowids.push_back(page.base_rowid);\n  }\n  i = 0;\n  for (auto const &page : p_fmat->GetBatches<SparsePage>()) {\n    ASSERT_EQ(base_rowids[i], page.base_rowid);\n    ++i;\n  }\n}\n\nTEST(GradientIndex, FromCategoricalBasic) {\n  size_t constexpr kRows = 1000, kCats = 13, kCols = 1;\n  size_t max_bins = 8;\n  auto x = GenerateRandomCategoricalSingleColumn(kRows, kCats);\n  auto m = GetDMatrixFromData(x, kRows, 1);\n  Context ctx;\n\n  auto &h_ft = m->Info().feature_types.HostVector();\n  h_ft.resize(kCols, FeatureType::kCategorical);\n\n  BatchParam p(max_bins, 0.8);\n  GHistIndexMatrix gidx(&ctx, m.get(), max_bins, p.sparse_thresh, false, {});\n\n  auto x_copy = x;\n  std::sort(x_copy.begin(), x_copy.end());\n  auto n_uniques = std::unique(x_copy.begin(), x_copy.end()) - x_copy.begin();\n  ASSERT_EQ(n_uniques, kCats);\n\n  auto const &h_cut_ptr = gidx.cut.Ptrs();\n  auto const &h_cut_values = gidx.cut.Values();\n\n  ASSERT_EQ(h_cut_ptr.size(), 2);\n  ASSERT_EQ(h_cut_values.size(), kCats);\n\n  auto const &index = gidx.index;\n\n  for (size_t i = 0; i < x.size(); ++i) {\n    auto bin = index[i];\n    auto bin_value = h_cut_values.at(bin);\n    ASSERT_EQ(common::AsCat(x[i]), common::AsCat(bin_value));\n  }\n}\n\nTEST(GradientIndex, FromCategoricalLarge) {\n  size_t constexpr kRows = 1000, kCats = 512, kCols = 1;\n  bst_bin_t max_bins = 8;\n  auto x = GenerateRandomCategoricalSingleColumn(kRows, kCats);\n  auto m = GetDMatrixFromData(x, kRows, 1);\n  Context ctx;\n\n  auto &h_ft = m->Info().feature_types.HostVector();\n  h_ft.resize(kCols, FeatureType::kCategorical);\n\n  BatchParam p{max_bins, 0.8};\n  {\n    GHistIndexMatrix gidx{&ctx, m.get(), max_bins, p.sparse_thresh, false, {}};\n    ASSERT_TRUE(gidx.index.GetBinTypeSize() == common::kUint16BinsTypeSize);\n  }\n  {\n    for (auto const &page : m->GetBatches<GHistIndexMatrix>(&ctx, p)) {\n      common::HistogramCuts cut = page.cut;\n      GHistIndexMatrix gidx{m->Info(), std::move(cut), max_bins};\n      ASSERT_EQ(gidx.MaxNumBinPerFeat(), kCats);\n    }\n  }\n}\n\nTEST(GradientIndex, PushBatch) {\n  size_t constexpr kRows = 64, kCols = 4;\n  bst_bin_t max_bins = 64;\n  float st = 0.5;\n  Context ctx;\n\n  auto test = [&](float sparisty) {\n    auto m = RandomDataGenerator{kRows, kCols, sparisty}.GenerateDMatrix(true);\n    auto cuts = common::SketchOnDMatrix(&ctx, m.get(), max_bins, false, {});\n    common::HistogramCuts copy_cuts = cuts;\n\n    ASSERT_EQ(m->Info().num_row_, kRows);\n    ASSERT_EQ(m->Info().num_col_, kCols);\n    GHistIndexMatrix gmat{m->Info(), std::move(copy_cuts), max_bins};\n\n    for (auto const &page : m->GetBatches<SparsePage>()) {\n      SparsePageAdapterBatch batch{page.GetView()};\n      gmat.PushAdapterBatch(m->Ctx(), 0, 0, batch, std::numeric_limits<float>::quiet_NaN(), {}, st,\n                            m->Info().num_row_);\n      gmat.PushAdapterBatchColumns(m->Ctx(), batch, std::numeric_limits<float>::quiet_NaN(), 0);\n    }\n    for (auto const &page : m->GetBatches<GHistIndexMatrix>(&ctx, BatchParam{max_bins, st})) {\n      for (size_t i = 0; i < kRows; ++i) {\n        for (size_t j = 0; j < kCols; ++j) {\n          auto v0 = gmat.GetFvalue(i, j, false);\n          auto v1 = page.GetFvalue(i, j, false);\n          if (sparisty == 0.0) {\n            ASSERT_FALSE(std::isnan(v0));\n          }\n          if (!std::isnan(v0)) {\n            ASSERT_EQ(v0, v1);\n          }\n        }\n      }\n    }\n  };\n\n  test(0.0f);\n  test(0.5f);\n  test(0.9f);\n}\n\n#if defined(XGBOOST_USE_CUDA)\n\nnamespace {\nclass GHistIndexMatrixTest : public testing::TestWithParam<std::tuple<float, float>> {\n protected:\n  void Run(float density, double threshold) {\n    // Only testing with small sample size as the cuts might be different between host and\n    // device.\n    size_t n_samples{128}, n_features{13};\n    Context ctx;\n    auto Xy = RandomDataGenerator{n_samples, n_features, 1 - density}.GenerateDMatrix(true);\n    std::unique_ptr<GHistIndexMatrix> from_ellpack;\n    ASSERT_TRUE(Xy->SingleColBlock());\n    bst_bin_t constexpr kBins{17};\n    auto p = BatchParam{kBins, threshold};\n    auto gpu_ctx = MakeCUDACtx(0);\n    for (auto const &page : Xy->GetBatches<EllpackPage>(\n             &gpu_ctx, BatchParam{kBins, tree::TrainParam::DftSparseThreshold()})) {\n      from_ellpack = std::make_unique<GHistIndexMatrix>(&ctx, Xy->Info(), page, p);\n    }\n\n    for (auto const &from_sparse_page : Xy->GetBatches<GHistIndexMatrix>(&ctx, p)) {\n      ASSERT_EQ(from_sparse_page.IsDense(), from_ellpack->IsDense());\n      ASSERT_EQ(from_sparse_page.base_rowid, 0);\n      ASSERT_EQ(from_sparse_page.base_rowid, from_ellpack->base_rowid);\n      ASSERT_EQ(from_sparse_page.Size(), from_ellpack->Size());\n      ASSERT_EQ(from_sparse_page.index.Size(), from_ellpack->index.Size());\n\n      auto const &gidx_from_sparse = from_sparse_page.index;\n      auto const &gidx_from_ellpack = from_ellpack->index;\n\n      for (size_t i = 0; i < gidx_from_sparse.Size(); ++i) {\n        ASSERT_EQ(gidx_from_sparse[i], gidx_from_ellpack[i]);\n      }\n\n      auto const &columns_from_sparse = from_sparse_page.Transpose();\n      auto const &columns_from_ellpack = from_ellpack->Transpose();\n      ASSERT_EQ(columns_from_sparse.AnyMissing(), columns_from_ellpack.AnyMissing());\n      ASSERT_EQ(columns_from_sparse.GetTypeSize(), columns_from_ellpack.GetTypeSize());\n      ASSERT_EQ(columns_from_sparse.GetNumFeature(), columns_from_ellpack.GetNumFeature());\n      for (size_t i = 0; i < n_features; ++i) {\n        ASSERT_EQ(columns_from_sparse.GetColumnType(i), columns_from_ellpack.GetColumnType(i));\n      }\n\n      std::string from_sparse_buf;\n      {\n        common::AlignedMemWriteStream fo{&from_sparse_buf};\n        auto n_bytes = columns_from_sparse.Write(&fo);\n        ASSERT_EQ(fo.Tell(), n_bytes);\n      }\n      std::string from_ellpack_buf;\n      {\n        common::AlignedMemWriteStream fo{&from_ellpack_buf};\n        auto n_bytes = columns_from_sparse.Write(&fo);\n        ASSERT_EQ(fo.Tell(), n_bytes);\n      }\n      ASSERT_EQ(from_sparse_buf, from_ellpack_buf);\n    }\n  }\n};\n}  // anonymous namespace\n\nTEST_P(GHistIndexMatrixTest, FromEllpack) {\n  float sparsity;\n  double thresh;\n  std::tie(sparsity, thresh) = GetParam();\n  this->Run(sparsity, thresh);\n}\n\nINSTANTIATE_TEST_SUITE_P(GHistIndexMatrix, GHistIndexMatrixTest,\n                         testing::Values(std::make_tuple(1.f, .0),    // no missing\n                                         std::make_tuple(.2f, .8),    // sparse columns\n                                         std::make_tuple(.8f, .2),    // dense columns\n                                         std::make_tuple(1.f, .2),    // no missing\n                                         std::make_tuple(.5f, .6),    // sparse columns\n                                         std::make_tuple(.6f, .4)));  // dense columns\n\n#endif  // defined(XGBOOST_USE_CUDA)\n}  // namespace xgboost::data\n"
  },
  {
    "path": "tests/cpp/data/test_gradient_index_page_raw_format.cc",
    "content": "/**\n * Copyright 2021-2025, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/context.h>  // for Context\n\n#include <cstddef>  // for size_t\n#include <memory>   // for unique_ptr\n\n#include \"../../../src/common/column_matrix.h\"  // for common::ColumnMatrix\n#include \"../../../src/common/io.h\"             // for MmapResource, AlignedResourceReadStream...\n#include \"../../../src/data/gradient_index.h\"   // for GHistIndexMatrix\n#include \"../../../src/data/gradient_index_format.h\"       // for GHistIndexRawFormat\n#include \"../../../src/data/gradient_index_page_source.h\"  // for GHistIndexFormatPolicy\n#include \"../helpers.h\"                                    // for RandomDataGenerator\n\nnamespace xgboost::data {\nTEST(GHistIndexPageRawFormat, IO) {\n  Context ctx;\n\n  auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix();\n  common::TemporaryDirectory tmpdir;\n  std::string path = tmpdir.Str() + \"/ghistindex.page\";\n  auto batch = BatchParam{256, 0.5};\n\n  common::HistogramCuts cuts{0};\n  for (auto const &index : m->GetBatches<GHistIndexMatrix>(&ctx, batch)) {\n    cuts = index.Cuts();\n    break;\n  }\n  auto format = std::make_unique<GHistIndexRawFormat>(std::move(cuts));\n\n  std::size_t bytes{0};\n  {\n    auto fo = std::make_unique<common::AlignedFileWriteStream>(StringView{path}, \"wb\");\n    for (auto const &index : m->GetBatches<GHistIndexMatrix>(&ctx, batch)) {\n      bytes += format->Write(index, fo.get());\n    }\n  }\n\n  GHistIndexMatrix page;\n\n  std::unique_ptr<common::AlignedResourceReadStream> fi{\n      std::make_unique<common::PrivateMmapConstStream>(path, 0, bytes)};\n  ASSERT_TRUE(format->Read(&page, fi.get()));\n\n  for (auto const &gidx : m->GetBatches<GHistIndexMatrix>(&ctx, batch)) {\n    auto const &loaded = gidx;\n    ASSERT_EQ(loaded.cut.Ptrs(), page.cut.Ptrs());\n    ASSERT_EQ(loaded.cut.Values(), page.cut.Values());\n    ASSERT_EQ(loaded.base_rowid, page.base_rowid);\n    ASSERT_EQ(loaded.row_ptr.size(), page.row_ptr.size());\n    ASSERT_TRUE(std::equal(loaded.row_ptr.cbegin(), loaded.row_ptr.cend(), page.row_ptr.cbegin()));\n    ASSERT_EQ(loaded.IsDense(), page.IsDense());\n    ASSERT_TRUE(std::equal(loaded.index.begin(), loaded.index.end(), page.index.begin()));\n    ASSERT_TRUE(std::equal(loaded.index.Offset(), loaded.index.Offset() + loaded.index.OffsetSize(),\n                           page.index.Offset()));\n\n    ASSERT_EQ(loaded.Transpose().GetTypeSize(), loaded.Transpose().GetTypeSize());\n  }\n}\n\nTEST(GHistIndexPageRawFormat, File) {\n  auto policy = MemBufFileReadFormatStreamPolicy<GHistIndexMatrix, GHistIndexFormatPolicy>{};\n\n  std::string path = \"ghist.page\";\n  ASSERT_THAT([&] { policy.CreateReader(StringView{path}, static_cast<bst_idx_t>(0), 0); },\n              GMockThrow(\"doesn't exist\"));\n}\n}  // namespace xgboost::data\n"
  },
  {
    "path": "tests/cpp/data/test_iterative_dmatrix.cc",
    "content": "/**\n * Copyright 2022-2024, XGBoost contributors\n */\n#include \"test_iterative_dmatrix.h\"\n\n#include <gtest/gtest.h>\n\n#include <limits>  // for numeric_limits\n#include <memory>\n\n#include \"../../../src/data/gradient_index.h\"\n#include \"../../../src/data/iterative_dmatrix.h\"\n#include \"../helpers.h\"\n#include \"xgboost/data.h\"  // DMatrix\n\nnamespace xgboost::data {\nTEST(IterativeDMatrix, Ref) {\n  Context ctx;\n  TestRefDMatrix<GHistIndexMatrix, NumpyArrayIterForTest>(\n      &ctx, [&](GHistIndexMatrix const& page) { return page.cut; });\n}\n\nTEST(IterativeDMatrix, IsDense) {\n  bst_bin_t n_bins = 16;\n  auto test = [n_bins](float sparsity) {\n    NumpyArrayIterForTest iter(sparsity);\n    auto n_threads = 0;\n    IterativeDMatrix m(&iter, iter.Proxy(), nullptr, Reset, Next,\n                       std::numeric_limits<float>::quiet_NaN(), n_threads, n_bins);\n    ASSERT_EQ(m.Ctx()->Threads(), AllThreadsForTest());\n    if (sparsity == 0.0) {\n      ASSERT_TRUE(m.IsDense());\n    } else {\n      ASSERT_FALSE(m.IsDense());\n    }\n  };\n  test(0.0);\n  test(0.1);\n  test(1.0);\n}\n}  // namespace xgboost::data\n"
  },
  {
    "path": "tests/cpp/data/test_iterative_dmatrix.cu",
    "content": "/**\n * Copyright 2020-2025, XGBoost contributors\n */\n#include <gtest/gtest.h>\n\n#include <memory>  // for dynamic_pointer_cast\n\n#include \"../../../src/common/io.h\"  // for AlignedFileWriteStream\n#include \"../../../src/data/device_adapter.cuh\"\n#include \"../../../src/data/ellpack_page.cuh\"\n#include \"../../../src/data/ellpack_page.h\"\n#include \"../../../src/data/iterative_dmatrix.h\"\n#include \"../../../src/tree/param.h\"  // TrainParam\n#include \"../filesystem.h\"            // for TemporaryDirectory\n#include \"../helpers.h\"\n#include \"test_iterative_dmatrix.h\"\n\nnamespace xgboost::data {\nvoid TestEquivalent(float sparsity) {\n  auto ctx = MakeCUDACtx(0);\n\n  CudaArrayIterForTest iter{sparsity};\n  IterativeDMatrix m{\n      &iter, iter.Proxy(), nullptr, Reset, Next, std::numeric_limits<float>::quiet_NaN(), 0, 256};\n  std::size_t offset = 0;\n  auto first = (*m.GetEllpackBatches(&ctx, {}).begin()).Impl();\n  std::unique_ptr<EllpackPageImpl> page_concatenated{new EllpackPageImpl{\n      &ctx, first->CutsShared(), first->is_dense, first->info.row_stride, 1000 * 100}};\n  for (auto& batch : m.GetBatches<EllpackPage>(&ctx, {})) {\n    auto page = batch.Impl();\n    size_t num_elements = page_concatenated->Copy(&ctx, page, offset);\n    offset += num_elements;\n  }\n  std::vector<common::CompressedByteT> h_iter_buffer;\n  auto from_iter = page_concatenated->GetHostEllpack(&ctx, &h_iter_buffer);\n  ASSERT_EQ(m.Info().num_col_, CudaArrayIterForTest::Cols());\n  ASSERT_EQ(m.Info().num_row_, CudaArrayIterForTest::Rows());\n\n  std::string interface_str = iter.AsArray();\n  auto adapter = CupyAdapter(interface_str);\n  std::unique_ptr<DMatrix> dm{\n      DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 0)};\n  auto bp = BatchParam{256, tree::TrainParam::DftSparseThreshold()};\n  for (auto& ellpack : dm->GetBatches<EllpackPage>(&ctx, bp)) {\n    std::vector<common::CompressedByteT> h_data_buffer;\n    auto from_data = ellpack.Impl()->GetHostEllpack(&ctx, &h_data_buffer);\n\n    std::visit(\n        [](auto&& from_iter, auto&& from_data) {\n          ASSERT_EQ(from_iter.gidx_fvalue_map.size(), from_data.gidx_fvalue_map.size());\n          for (size_t i = 0; i < from_iter.gidx_fvalue_map.size(); ++i) {\n            EXPECT_NEAR(from_iter.gidx_fvalue_map[i], from_data.gidx_fvalue_map[i], kRtEps);\n          }\n          ASSERT_EQ(from_iter.NumFeatures(), from_data.NumFeatures());\n          for (size_t i = 0; i < from_iter.NumFeatures() + 1; ++i) {\n            ASSERT_EQ(from_iter.feature_segments[i], from_data.feature_segments[i]);\n          }\n        },\n        from_iter, from_data);\n\n    std::vector<common::CompressedByteT> buffer_from_iter, buffer_from_data;\n    auto data_iter = page_concatenated->GetHostEllpack(&ctx, &buffer_from_iter);\n    auto data_buf = ellpack.Impl()->GetHostEllpack(&ctx, &buffer_from_data);\n    ASSERT_NE(buffer_from_data.size(), 0);\n    ASSERT_NE(buffer_from_iter.size(), 0);\n    CHECK_EQ(ellpack.Impl()->NumSymbols(), page_concatenated->NumSymbols());\n\n    std::visit(\n        [](auto&& from_iter, auto&& from_data) {\n          CHECK_EQ(from_data.n_rows * from_data.row_stride,\n                   from_data.n_rows * from_iter.row_stride);\n        },\n        from_iter, from_data);\n    std::visit(\n        [](auto&& from_data, auto&& data_buf, auto&& data_iter) {\n          for (size_t i = 0; i < from_data.n_rows * from_data.row_stride; ++i) {\n            CHECK_EQ(data_buf.gidx_iter[i], data_iter.gidx_iter[i]);\n          }\n        },\n        from_data, data_buf, data_iter);\n  }\n}\n\nTEST(IterativeDeviceDMatrix, Basic) {\n  TestEquivalent(0.0);\n  TestEquivalent(0.5);\n}\n\nTEST(IterativeDeviceDMatrix, RowMajor) {\n  CudaArrayIterForTest iter(0.0f);\n  IterativeDMatrix m{\n      &iter, iter.Proxy(), nullptr, Reset, Next, std::numeric_limits<float>::quiet_NaN(), 0, 256};\n  size_t n_batches = 0;\n  std::string interface_str = iter.AsArray();\n  Context ctx{MakeCUDACtx(0)};\n  for (auto& ellpack : m.GetBatches<EllpackPage>(&ctx, {})) {\n    n_batches++;\n    auto impl = ellpack.Impl();\n\n    auto cols = CudaArrayIterForTest::Cols();\n    auto rows = CudaArrayIterForTest::Rows();\n\n    auto j_interface = Json::Load({interface_str.c_str(), interface_str.size()});\n    ArrayInterface<2> loaded{get<Object const>(j_interface)};\n    std::vector<float> h_data(cols * rows);\n    common::Span<float const> s_data{static_cast<float const*>(loaded.data), cols * rows};\n    dh::CopyDeviceSpanToVector(&h_data, s_data);\n\n    impl->VisitOnHost(&ctx, [&](auto&& h_accessor) {\n      auto cut_ptr = h_accessor.feature_segments;\n      for (auto i = 0ull; i < rows * cols; i++) {\n        int column_idx = i % cols;\n        EXPECT_EQ(impl->Cuts().SearchBin(h_data[i], column_idx),\n                  h_accessor.gidx_iter[i] + cut_ptr[column_idx]);\n      }\n    });\n\n    EXPECT_EQ(m.Info().num_col_, cols);\n    EXPECT_EQ(m.Info().num_row_, rows);\n    EXPECT_EQ(m.Info().num_nonzero_, rows * cols);\n  }\n  // All batches are concatenated.\n  ASSERT_EQ(n_batches, 1);\n}\n\nTEST(IterativeDeviceDMatrix, RowMajorMissing) {\n  const float kMissing = std::numeric_limits<float>::quiet_NaN();\n  bst_idx_t rows = 4;\n  size_t cols = 3;\n  CudaArrayIterForTest iter{0.0f, rows, cols, 2};\n  std::string interface_str = iter.AsArray();\n  auto j_interface = Json::Load({interface_str.c_str(), interface_str.size()});\n  ArrayInterface<2> loaded{get<Object const>(j_interface)};\n  std::vector<float> h_data(cols * rows);\n  common::Span<float const> s_data{static_cast<float const*>(loaded.data), cols * rows};\n  dh::CopyDeviceSpanToVector(&h_data, s_data);\n  h_data[1] = kMissing;\n  h_data[5] = kMissing;\n  h_data[6] = kMissing;\n  h_data[9] = kMissing;   // idx = (2, 0)\n  h_data[10] = kMissing;  // idx = (2, 1)\n  auto ptr =\n      thrust::device_ptr<float>(reinterpret_cast<float*>(get<Integer>(j_interface[\"data\"][0])));\n  thrust::copy(h_data.cbegin(), h_data.cend(), ptr);\n  IterativeDMatrix m{\n      &iter, iter.Proxy(), nullptr, Reset, Next, std::numeric_limits<float>::quiet_NaN(), 0, 256};\n  auto ctx = MakeCUDACtx(0);\n  auto& ellpack =\n      *m.GetBatches<EllpackPage>(&ctx, BatchParam{256, tree::TrainParam::DftSparseThreshold()})\n           .begin();\n  auto impl = ellpack.Impl();\n  impl->VisitOnHost(&ctx, [&](auto&& h_acc) {\n    // null values get placed after valid values in a row\n    ASSERT_FALSE(h_acc.IsDenseCompressed());\n    ASSERT_EQ(h_acc.row_stride, cols - 1);\n    ASSERT_EQ(h_acc.gidx_iter[7], impl->NullValue());\n    for (std::size_t i = 0; i < 7; ++i) {\n      ASSERT_NE(h_acc.gidx_iter[i], impl->NullValue());\n    }\n  });\n\n  EXPECT_EQ(m.Info().num_col_, cols);\n  EXPECT_EQ(m.Info().num_row_, rows);\n  EXPECT_EQ(m.Info().num_nonzero_, rows * cols - 5);\n}\n\nTEST(IterativeDeviceDMatrix, IsDense) {\n  int num_bins = 16;\n  auto test = [num_bins](float sparsity) {\n    CudaArrayIterForTest iter(sparsity);\n    IterativeDMatrix m(&iter, iter.Proxy(), nullptr, Reset, Next,\n                       std::numeric_limits<float>::quiet_NaN(), 0, num_bins);\n    if (sparsity == 0.0) {\n      ASSERT_TRUE(m.IsDense());\n    } else {\n      ASSERT_FALSE(m.IsDense());\n    }\n  };\n  test(0.0);\n  test(0.1);\n  test(1.0);\n}\n\nTEST(IterativeDeviceDMatrix, Ref) {\n  Context ctx{MakeCUDACtx(0)};\n  TestRefDMatrix<EllpackPage, CudaArrayIterForTest>(\n      &ctx, [](EllpackPage const& page) { return page.Impl()->Cuts(); });\n}\n\nTEST(IterativeDeviceDMatrix, IO) {\n  auto ctx = MakeCUDACtx(0);\n  std::size_t n_samples = 2048, n_features = 128;\n  auto p_fmat = RandomDataGenerator{n_samples, n_features, 0.0}\n                    .Bins(32)\n                    .Device(ctx.Device())\n                    .GenerateQuantileDMatrix(true);\n  auto qdm = std::dynamic_pointer_cast<IterativeDMatrix>(p_fmat);\n  ASSERT_TRUE(qdm);\n  common::TemporaryDirectory tmpdir;\n  auto path = tmpdir.Path() / \"data.qdm\";\n  {\n    auto fo = std::make_unique<common::AlignedFileWriteStream>(path.string(), \"wb\");\n    qdm->Save(fo.get());\n  }\n  auto fsize = std::filesystem::file_size(path);\n  auto fi = std::make_unique<common::MemBufFileReadStream>(path.string(), 0ul, fsize);\n  auto loaded = std::shared_ptr<IterativeDMatrix>(IterativeDMatrix::Load(fi.get()));\n  for (auto const& orig_page : qdm->GetBatches<EllpackPage>(&ctx, {})) {\n    for (auto const& new_page : loaded->GetBatches<EllpackPage>(&ctx, {})) {\n      std::vector<common::CompressedByteT> h_orig, h_new;\n      orig_page.Impl()->GetHostEllpack(&ctx, &h_orig);\n      new_page.Impl()->GetHostEllpack(&ctx, &h_new);\n      ASSERT_EQ(h_orig, h_new);\n      auto orig_cuts = orig_page.Impl()->Cuts();\n      auto new_cuts = new_page.Impl()->Cuts();\n      ASSERT_EQ(orig_cuts.Ptrs(), new_cuts.Ptrs());\n      ASSERT_EQ(orig_cuts.Values(), new_cuts.Values());\n    }\n  }\n}\n}  // namespace xgboost::data\n"
  },
  {
    "path": "tests/cpp/data/test_iterative_dmatrix.h",
    "content": "/**\n * Copyright 2022-2024, XGBoost contributors\n */\n#pragma once\n#include <xgboost/context.h>  // for Context\n\n#include <limits>  // for numeric_limits\n#include <memory>  // for make_shared\n\n#include \"../../../src/data/iterative_dmatrix.h\"\n#include \"../helpers.h\"\n\nnamespace xgboost {\nnamespace data {\ntemplate <typename Page, typename Iter, typename Cuts>\nvoid TestRefDMatrix(Context const* ctx, Cuts&& get_cuts) {\n  int n_bins = 256;\n  Iter iter(0.3, 2048);\n  auto m = std::make_shared<IterativeDMatrix>(&iter, iter.Proxy(), nullptr, Reset, Next,\n                                              std::numeric_limits<float>::quiet_NaN(), 0, n_bins);\n\n  Iter iter_1(0.8, 32, Iter::Cols(), 13);\n  auto m_1 = std::make_shared<IterativeDMatrix>(&iter_1, iter_1.Proxy(), m, Reset, Next,\n                                                std::numeric_limits<float>::quiet_NaN(), 0, n_bins);\n\n  for (auto const& page_0 : m->template GetBatches<Page>(ctx, {})) {\n    for (auto const& page_1 : m_1->template GetBatches<Page>(ctx, {})) {\n      auto const& cuts_0 = get_cuts(page_0);\n      auto const& cuts_1 = get_cuts(page_1);\n      ASSERT_EQ(cuts_0.Values(), cuts_1.Values());\n      ASSERT_EQ(cuts_0.Ptrs(), cuts_1.Ptrs());\n    }\n  }\n\n  m_1 = std::make_shared<IterativeDMatrix>(&iter_1, iter_1.Proxy(), nullptr, Reset, Next,\n                                           std::numeric_limits<float>::quiet_NaN(), 0, n_bins);\n  for (auto const& page_0 : m->template GetBatches<Page>(ctx, {})) {\n    for (auto const& page_1 : m_1->template GetBatches<Page>(ctx, {})) {\n      auto const& cuts_0 = get_cuts(page_0);\n      auto const& cuts_1 = get_cuts(page_1);\n      ASSERT_NE(cuts_0.Values(), cuts_1.Values());\n      ASSERT_NE(cuts_0.Ptrs(), cuts_1.Ptrs());\n    }\n  }\n\n  // Use DMatrix as ref\n  auto dm = RandomDataGenerator(2048, Iter::Cols(), 0.5).GenerateDMatrix(true);\n  auto dqm = std::make_shared<IterativeDMatrix>(&iter_1, iter_1.Proxy(), dm, Reset, Next,\n                                                std::numeric_limits<float>::quiet_NaN(), 0, n_bins);\n  for (auto const& page_0 : dm->template GetBatches<Page>(ctx, {})) {\n    for (auto const& page_1 : dqm->template GetBatches<Page>(ctx, {})) {\n      auto const& cuts_0 = get_cuts(page_0);\n      auto const& cuts_1 = get_cuts(page_1);\n      ASSERT_EQ(cuts_0.Values(), cuts_1.Values());\n      ASSERT_EQ(cuts_0.Ptrs(), cuts_1.Ptrs());\n    }\n  }\n}\n}  // namespace data\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/data/test_metainfo.cc",
    "content": "/**\n * Copyright 2016-2025, XGBoost contributors\n */\n#include \"test_metainfo.h\"\n\n#include <dmlc/io.h>\n#include <gmock/gmock.h>\n#include <xgboost/data.h>\n\n#include <memory>\n#include <string>\n\n#include \"../collective/test_worker.h\"  // for TestDistributedGlobal\n#include \"../filesystem.h\"              // TemporaryDirectory\n#include \"../helpers.h\"                 // for GMockTHrow\n#include \"xgboost/base.h\"\n\nnamespace xgboost {\nTEST(MetaInfo, GetSet) {\n  xgboost::Context ctx;\n  xgboost::MetaInfo info;\n\n  double double2[2] = {1.0, 2.0};\n\n  EXPECT_EQ(info.labels.Size(), 0);\n  info.SetInfo(ctx, \"label\", Make1dInterfaceTest(double2, 2));\n  EXPECT_EQ(info.labels.Size(), 2);\n\n  float float2[2] = {1.0f, 2.0f};\n  EXPECT_EQ(info.GetWeight(1), 1.0f) << \"When no weights are given, was expecting default value 1\";\n  info.SetInfo(ctx, \"weight\", Make1dInterfaceTest(float2, 2));\n  EXPECT_EQ(info.GetWeight(1), 2.0f);\n\n  uint32_t uint32_t2[2] = {1U, 2U};\n  EXPECT_EQ(info.base_margin_.Size(), 0);\n  info.SetInfo(ctx, \"base_margin\", Make1dInterfaceTest(uint32_t2, 2));\n  EXPECT_EQ(info.base_margin_.Size(), 2);\n\n  uint64_t uint64_t2[2] = {1U, 2U};\n  EXPECT_EQ(info.group_ptr_.size(), 0);\n  info.SetInfo(ctx, \"group\", Make1dInterfaceTest(uint64_t2, 2));\n  ASSERT_EQ(info.group_ptr_.size(), 3);\n  EXPECT_EQ(info.group_ptr_[2], 3);\n\n  info.Clear();\n  ASSERT_EQ(info.group_ptr_.size(), 0);\n}\n\nTEST(MetaInfo, GetSetFeature) {\n  xgboost::MetaInfo info;\n  ASSERT_THAT([&] { info.SetFeatureInfo(\"\", nullptr, 0); },\n              GMockThrow(\"Unknown feature info name\"));\n  EXPECT_THROW(info.SetFeatureInfo(\"\", nullptr, 0), dmlc::Error);\n  EXPECT_THROW(info.SetFeatureInfo(\"foo\", nullptr, 0), dmlc::Error);\n  EXPECT_NO_THROW(info.SetFeatureInfo(\"feature_name\", nullptr, 0));\n  EXPECT_NO_THROW(info.SetFeatureInfo(\"feature_type\", nullptr, 0));\n  ASSERT_EQ(info.feature_type_names.size(), 0);\n  ASSERT_EQ(info.feature_types.Size(), 0);\n  ASSERT_EQ(info.feature_names.size(), 0);\n\n  size_t constexpr kCols = 19;\n  std::vector<std::string> types(kCols, u8\"float\");\n  std::vector<char const*> c_types(kCols);\n  std::transform(types.cbegin(), types.cend(), c_types.begin(),\n                 [](auto const &str) { return str.c_str(); });\n  info.num_col_ = 1;\n  EXPECT_THROW(\n      info.SetFeatureInfo(u8\"feature_type\", c_types.data(), c_types.size()),\n      dmlc::Error);\n  info.num_col_ = kCols;\n  EXPECT_NO_THROW(\n      info.SetFeatureInfo(u8\"feature_type\", c_types.data(), c_types.size()));\n\n  // Test clear.\n  info.SetFeatureInfo(\"feature_type\", nullptr, 0);\n  ASSERT_EQ(info.feature_type_names.size(), 0);\n  ASSERT_EQ(info.feature_types.Size(), 0);\n  // Other conditions are tested in `SaveLoadBinary`.\n}\n\nnamespace {\nvoid VerifyGetSetFeatureColumnSplit() {\n  xgboost::MetaInfo info;\n  info.data_split_mode = DataSplitMode::kCol;\n  auto const world_size = collective::GetWorldSize();\n\n  auto constexpr kCols{2};\n  std::vector<std::string> types{u8\"float\", u8\"c\"};\n  std::vector<char const *> c_types(kCols);\n  std::transform(types.cbegin(), types.cend(), c_types.begin(),\n                 [](auto const &str) { return str.c_str(); });\n  info.num_col_ = kCols;\n  ASSERT_THAT([&] { info.SetFeatureInfo(u8\"feature_type\", c_types.data(), c_types.size()); },\n              GMockThrow(\"Length of feature_type must be equal to number of columns\"));\n  info.num_col_ = kCols * world_size;\n  EXPECT_NO_THROW(info.SetFeatureInfo(u8\"feature_type\", c_types.data(), c_types.size()));\n  std::vector<std::string> expected_type_names{u8\"float\", u8\"c\",     u8\"float\",\n                                               u8\"c\",     u8\"float\", u8\"c\"};\n  EXPECT_EQ(info.feature_type_names, expected_type_names);\n  std::vector<xgboost::FeatureType> expected_types{\n      xgboost::FeatureType::kNumerical, xgboost::FeatureType::kCategorical,\n      xgboost::FeatureType::kNumerical, xgboost::FeatureType::kCategorical,\n      xgboost::FeatureType::kNumerical, xgboost::FeatureType::kCategorical};\n  EXPECT_EQ(info.feature_types.HostVector(), expected_types);\n\n  std::vector<std::string> names{u8\"feature0\", u8\"feature1\"};\n  std::vector<char const *> c_names(kCols);\n  std::transform(names.cbegin(), names.cend(), c_names.begin(),\n                 [](auto const &str) { return str.c_str(); });\n  info.num_col_ = kCols;\n  ASSERT_THAT([&] { info.SetFeatureInfo(u8\"feature_name\", c_names.data(), c_names.size()); },\n              GMockThrow(\"Length of feature_name must be equal to number of columns\"));\n  info.num_col_ = kCols * world_size;\n  EXPECT_NO_THROW(info.SetFeatureInfo(u8\"feature_name\", c_names.data(), c_names.size()));\n  std::vector<std::string> expected_names{u8\"0.feature0\", u8\"0.feature1\", u8\"1.feature0\",\n                                          u8\"1.feature1\", u8\"2.feature0\", u8\"2.feature1\"};\n  EXPECT_EQ(info.feature_names, expected_names);\n}\n}  // anonymous namespace\n\nTEST(MetaInfo, GetSetFeatureColumnSplit) {\n  auto constexpr kWorkers{3};\n  collective::TestDistributedGlobal(kWorkers, VerifyGetSetFeatureColumnSplit);\n}\n\nTEST(MetaInfo, SaveLoadBinary) {\n  xgboost::MetaInfo info;\n  xgboost::Context ctx;\n\n  uint64_t constexpr kRows { 64 }, kCols { 32 };\n  auto generator = []() {\n                     static float f = 0;\n                     return f++;\n                   };\n  std::vector<float> values (kRows);\n  std::generate(values.begin(), values.end(), generator);\n  info.SetInfo(ctx, \"label\", Make1dInterfaceTest(values.data(), kRows));\n  info.SetInfo(ctx, \"weight\", Make1dInterfaceTest(values.data(), kRows));\n  info.SetInfo(ctx, \"base_margin\", Make1dInterfaceTest(values.data(), kRows));\n\n  info.num_row_ = kRows;\n  info.num_col_ = kCols;\n\n  auto featname = u8\"特征名\";\n  std::vector<std::string> types(kCols, u8\"float\");\n  std::vector<char const*> c_types(kCols);\n  std::transform(types.cbegin(), types.cend(), c_types.begin(),\n                 [](auto const &str) { return str.c_str(); });\n  info.SetFeatureInfo(u8\"feature_type\", c_types.data(), c_types.size());\n  std::vector<std::string> names(kCols, featname);\n  std::vector<char const*> c_names(kCols);\n  std::transform(names.cbegin(), names.cend(), c_names.begin(),\n                 [](auto const &str) { return str.c_str(); });\n  info.SetFeatureInfo(u8\"feature_name\", c_names.data(), c_names.size());;\n\n  common::TemporaryDirectory tempdir;\n  const std::string tmp_file = tempdir.Str() + \"/metainfo.binary\";\n  {\n    std::unique_ptr<dmlc::Stream> fs {\n      dmlc::Stream::Create(tmp_file.c_str(), \"w\")\n    };\n    info.SaveBinary(fs.get());\n  }\n\n  {\n    // Round-trip test\n    std::unique_ptr<dmlc::Stream> fs {\n      dmlc::Stream::Create(tmp_file.c_str(), \"r\")\n    };\n    xgboost::MetaInfo inforead;\n    inforead.LoadBinary(fs.get());\n    ASSERT_EQ(inforead.num_row_, kRows);\n    EXPECT_EQ(inforead.num_row_, info.num_row_);\n    EXPECT_EQ(inforead.num_col_, info.num_col_);\n    EXPECT_EQ(inforead.num_nonzero_, info.num_nonzero_);\n\n    ASSERT_EQ(inforead.labels.Data()->HostVector(), values);\n    EXPECT_EQ(inforead.labels.Data()->HostVector(), info.labels.Data()->HostVector());\n    EXPECT_EQ(inforead.group_ptr_, info.group_ptr_);\n    EXPECT_EQ(inforead.weights_.HostVector(), info.weights_.HostVector());\n\n    auto orig_margin = info.base_margin_.View(xgboost::DeviceOrd::CPU());\n    auto read_margin = inforead.base_margin_.View(xgboost::DeviceOrd::CPU());\n    EXPECT_TRUE(std::equal(orig_margin.Values().cbegin(), orig_margin.Values().cend(),\n                           read_margin.Values().cbegin()));\n\n    EXPECT_EQ(inforead.feature_type_names.size(), kCols);\n    EXPECT_EQ(inforead.feature_types.Size(), kCols);\n    EXPECT_TRUE(std::all_of(inforead.feature_type_names.cbegin(),\n                            inforead.feature_type_names.cend(),\n                            [](auto const &str) { return str == u8\"float\"; }));\n    auto h_ft = inforead.feature_types.HostSpan();\n    EXPECT_TRUE(std::all_of(h_ft.cbegin(), h_ft.cend(), [](auto f) {\n      return f == xgboost::FeatureType::kNumerical;\n    }));\n\n    EXPECT_EQ(inforead.feature_names.size(), kCols);\n    EXPECT_TRUE(std::all_of(inforead.feature_names.cbegin(),\n                            inforead.feature_names.cend(),\n                            [=](auto const& str) {\n                              return str == featname;\n                            }));\n  }\n}\n\nTEST(MetaInfo, LoadQid) {\n  common::TemporaryDirectory tempdir;\n  std::string tmp_file = tempdir.Str() + \"/qid_test.libsvm\";\n  {\n    std::unique_ptr<dmlc::Stream> fs(dmlc::Stream::Create(tmp_file.c_str(), \"w\"));\n    dmlc::ostream os(fs.get());\n    os << R\"qid(3 qid:1 1:1 2:1 3:0 4:0.2 5:0\n                2 qid:1 1:0 2:0 3:1 4:0.1 5:1\n                1 qid:1 1:0 2:1 3:0 4:0.4 5:0\n                1 qid:1 1:0 2:0 3:1 4:0.3 5:0\n                1 qid:2 1:0 2:0 3:1 4:0.2 5:0\n                2 qid:2 1:1 2:0 3:1 4:0.4 5:0\n                1 qid:2 1:0 2:0 3:1 4:0.1 5:0\n                1 qid:2 1:0 2:0 3:1 4:0.2 5:0\n                2 qid:3 1:0 2:0 3:1 4:0.1 5:1\n                3 qid:3 1:1 2:1 3:0 4:0.3 5:0\n                4 qid:3 1:1 2:0 3:0 4:0.4 5:1\n                1 qid:3 1:0 2:1 3:1 4:0.5 5:0)qid\";\n    os.set_stream(nullptr);\n  }\n  std::unique_ptr<xgboost::DMatrix> dmat(\n      xgboost::DMatrix::Load(tmp_file + \"?format=libsvm\", true, xgboost::DataSplitMode::kRow));\n\n  const xgboost::MetaInfo& info = dmat->Info();\n  const std::vector<xgboost::bst_uint> expected_group_ptr{0, 4, 8, 12};\n  CHECK(info.group_ptr_ == expected_group_ptr);\n\n  const std::vector<xgboost::bst_idx_t> expected_offset{\n    0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60\n  };\n  const std::vector<xgboost::Entry> expected_data{\n      xgboost::Entry(1, 1),   xgboost::Entry(2, 1),   xgboost::Entry(3, 0),\n      xgboost::Entry(4, 0.2), xgboost::Entry(5, 0),   xgboost::Entry(1, 0),\n      xgboost::Entry(2, 0),   xgboost::Entry(3, 1),   xgboost::Entry(4, 0.1),\n      xgboost::Entry(5, 1),   xgboost::Entry(1, 0),   xgboost::Entry(2, 1),\n      xgboost::Entry(3, 0),   xgboost::Entry(4, 0.4), xgboost::Entry(5, 0),\n      xgboost::Entry(1, 0),   xgboost::Entry(2, 0),   xgboost::Entry(3, 1),\n      xgboost::Entry(4, 0.3), xgboost::Entry(5, 0),   xgboost::Entry(1, 0),\n      xgboost::Entry(2, 0),   xgboost::Entry(3, 1),   xgboost::Entry(4, 0.2),\n      xgboost::Entry(5, 0),   xgboost::Entry(1, 1),   xgboost::Entry(2, 0),\n      xgboost::Entry(3, 1),   xgboost::Entry(4, 0.4), xgboost::Entry(5, 0),\n      xgboost::Entry(1, 0),   xgboost::Entry(2, 0),   xgboost::Entry(3, 1),\n      xgboost::Entry(4, 0.1), xgboost::Entry(5, 0),   xgboost::Entry(1, 0),\n      xgboost::Entry(2, 0),   xgboost::Entry(3, 1),   xgboost::Entry(4, 0.2),\n      xgboost::Entry(5, 0),   xgboost::Entry(1, 0),   xgboost::Entry(2, 0),\n      xgboost::Entry(3, 1),   xgboost::Entry(4, 0.1), xgboost::Entry(5, 1),\n      xgboost::Entry(1, 1),   xgboost::Entry(2, 1),   xgboost::Entry(3, 0),\n      xgboost::Entry(4, 0.3), xgboost::Entry(5, 0),   xgboost::Entry(1, 1),\n      xgboost::Entry(2, 0),   xgboost::Entry(3, 0),   xgboost::Entry(4, 0.4),\n      xgboost::Entry(5, 1),   xgboost::Entry(1, 0),   xgboost::Entry(2, 1),\n      xgboost::Entry(3, 1),   xgboost::Entry(4, 0.5), {5, 0}};\n  for (const auto &batch : dmat->GetBatches<xgboost::SparsePage>()) {\n    CHECK_EQ(batch.base_rowid, 0);\n    CHECK(batch.offset.HostVector() == expected_offset);\n    CHECK(batch.data.HostVector() == expected_data);\n  }\n}\n\nTEST(MetaInfo, CPUQid) {\n  xgboost::MetaInfo info;\n  xgboost::Context ctx;\n  info.num_row_ = 100;\n  std::vector<uint32_t> qid(info.num_row_, 0);\n  for (size_t i = 0; i < qid.size(); ++i) {\n    qid[i] = i;\n  }\n\n  info.SetInfo(ctx, \"qid\", Make1dInterfaceTest(qid.data(), info.num_row_));\n  ASSERT_EQ(info.group_ptr_.size(), info.num_row_ + 1);\n  ASSERT_EQ(info.group_ptr_.front(), 0);\n  ASSERT_EQ(info.group_ptr_.back(), info.num_row_);\n\n  for (size_t i = 0; i < info.num_row_ + 1; ++i) {\n    ASSERT_EQ(info.group_ptr_[i], i);\n  }\n}\n\nTEST(MetaInfo, Validate) {\n  xgboost::MetaInfo info;\n  info.num_row_ = 10;\n  info.num_nonzero_ = 12;\n  info.num_col_ = 3;\n  std::vector<xgboost::bst_group_t> groups (11);\n  Context ctx;\n  info.SetInfo(ctx, \"group\", Make1dInterfaceTest(groups.data(), groups.size()));\n  EXPECT_THROW(info.Validate(FstCU()), dmlc::Error);\n\n  std::vector<float> labels(info.num_row_ + 1);\n  EXPECT_THROW(\n      { info.SetInfo(ctx, \"label\", Make1dInterfaceTest(labels.data(), info.num_row_ + 1)); },\n      dmlc::Error);\n\n  // Make overflow data, which can happen when users pass group structure as int\n  // or float.\n  groups = {};\n  for (size_t i = 0; i < 63; ++i) {\n    groups.push_back(1562500);\n  }\n  groups.push_back(static_cast<xgboost::bst_group_t>(-1));\n  EXPECT_THROW(info.SetInfo(ctx, \"group\", Make1dInterfaceTest(groups.data(), groups.size())),\n               dmlc::Error);\n\n#if defined(XGBOOST_USE_CUDA)\n  info.group_ptr_.clear();\n  labels.resize(info.num_row_);\n  info.SetInfo(ctx, \"label\", Make1dInterfaceTest(labels.data(), info.num_row_));\n  info.labels.SetDevice(FstCU());\n  EXPECT_THROW(info.Validate(DeviceOrd::CUDA(1)), dmlc::Error);\n\n  xgboost::HostDeviceVector<xgboost::bst_group_t> d_groups{groups};\n  d_groups.SetDevice(FstCU());\n  d_groups.DevicePointer();  // pull to device\n  std::string arr_interface_str{ArrayInterfaceStr(xgboost::linalg::MakeVec(\n      d_groups.ConstDevicePointer(), d_groups.Size(), xgboost::DeviceOrd::CUDA(0)))};\n  EXPECT_THROW(info.SetInfo(ctx, \"group\", xgboost::StringView{arr_interface_str}), dmlc::Error);\n#endif  // defined(XGBOOST_USE_CUDA)\n}\n\nTEST(MetaInfo, HostExtend) {\n  xgboost::MetaInfo lhs, rhs;\n  xgboost::Context ctx;\n  size_t const kRows = 100;\n  lhs.labels.Reshape(kRows);\n  lhs.num_row_ = kRows;\n  rhs.labels.Reshape(kRows);\n  rhs.num_row_ = kRows;\n  ASSERT_TRUE(lhs.labels.Data()->HostCanRead());\n  ASSERT_TRUE(rhs.labels.Data()->HostCanRead());\n\n  size_t per_group = 10;\n  std::vector<xgboost::bst_group_t> groups;\n  for (size_t g = 0; g < kRows / per_group; ++g) {\n    groups.emplace_back(per_group);\n  }\n  lhs.SetInfo(ctx, \"group\", Make1dInterfaceTest(groups.data(), groups.size()));\n  rhs.SetInfo(ctx, \"group\", Make1dInterfaceTest(groups.data(), groups.size()));\n\n  lhs.Extend(rhs, true, true);\n  ASSERT_EQ(lhs.num_row_, kRows * 2);\n  ASSERT_TRUE(lhs.labels.Data()->HostCanRead());\n  ASSERT_TRUE(rhs.labels.Data()->HostCanRead());\n  ASSERT_FALSE(lhs.labels.Data()->DeviceCanRead());\n  ASSERT_FALSE(rhs.labels.Data()->DeviceCanRead());\n\n  ASSERT_EQ(lhs.group_ptr_.front(), 0);\n  ASSERT_EQ(lhs.group_ptr_.back(), kRows * 2);\n  for (size_t i = 0; i < kRows * 2 / per_group; ++i) {\n    ASSERT_EQ(lhs.group_ptr_.at(i), per_group * i);\n  }\n}\n\nTEST(MetaInfo, CPUStridedData) { TestMetaInfoStridedData(DeviceOrd::CPU()); }\n\nnamespace {\nclass TestMetaInfo : public ::testing::TestWithParam<std::tuple<bst_target_t, bool>> {\n public:\n  void Run(Context const *ctx, bst_target_t n_targets) {\n    MetaInfo info;\n    info.num_row_ = 128;\n    info.num_col_ = 3;\n    info.feature_names.resize(info.num_col_, \"a\");\n    info.labels.Reshape(info.num_row_, n_targets);\n\n    HostDeviceVector<bst_idx_t> ridx(info.num_row_ / 2, 0);\n    ridx.SetDevice(ctx->Device());\n    auto h_ridx = ridx.HostSpan();\n    for (std::size_t i = 0, j = 0; i < ridx.Size(); i++, j += 2) {\n      h_ridx[i] = j;\n    }\n\n    {\n      info.weights_.Resize(info.num_row_);\n      auto h_w = info.weights_.HostSpan();\n      std::iota(h_w.begin(), h_w.end(), 0);\n    }\n\n    auto out = info.Slice(ctx, ctx->IsCPU() ? h_ridx : ridx.ConstDeviceSpan(), /*nnz=*/256);\n\n    ASSERT_EQ(info.labels.Device(), ctx->Device());\n    auto h_y = info.labels.HostView();\n    auto h_y_out = out.labels.HostView();\n    ASSERT_EQ(h_y_out.Shape(0), ridx.Size());\n    ASSERT_EQ(h_y_out.Shape(1), n_targets);\n\n    auto h_w = info.weights_.ConstHostSpan();\n    auto h_w_out = out.weights_.ConstHostSpan();\n    ASSERT_EQ(h_w_out.size(), ridx.Size());\n\n    for (std::size_t i = 0; i < ridx.Size(); ++i) {\n      for (bst_target_t t = 0; t < n_targets; ++t) {\n        ASSERT_EQ(h_y_out(i, t), h_y(h_ridx[i], t));\n      }\n      ASSERT_EQ(h_w_out[i], h_w[h_ridx[i]]);\n    }\n\n    for (auto v : info.feature_names) {\n      ASSERT_EQ(v, \"a\");\n    }\n  }\n};\n}  // anonymous namespace\n\nTEST_P(TestMetaInfo, Slice) {\n  Context ctx;\n  auto [n_targets, is_cuda] = this->GetParam();\n  if (is_cuda) {\n    ctx = MakeCUDACtx(0);\n  }\n  this->Run(&ctx, n_targets);\n}\n\nINSTANTIATE_TEST_SUITE_P(Cpu, TestMetaInfo,\n                         ::testing::Values(std::tuple{1u, false}, std::tuple{3u, false}));\n\n#if defined(XGBOOST_USE_CUDA)\nINSTANTIATE_TEST_SUITE_P(Gpu, TestMetaInfo,\n                         ::testing::Values(std::tuple{1u, true}, std::tuple{3u, true}));\n#endif  // defined(XGBOOST_USE_CUDA)\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/data/test_metainfo.cu",
    "content": "/*! Copyright 2019-2021 by XGBoost Contributors */\n\n#include <gtest/gtest.h>\n#include <thrust/device_vector.h>\n#include <xgboost/context.h>\n#include <xgboost/data.h>\n#include <xgboost/json.h>\n\n#include \"../../../src/common/device_helpers.cuh\"\n#include \"test_array_interface.h\"\n#include \"test_metainfo.h\"\n\nnamespace xgboost {\n\ntemplate <typename T>\nstd::string PrepareData(std::string typestr, thrust::device_vector<T>* out, const size_t kRows=16) {\n  out->resize(kRows);\n  auto& d_data = *out;\n\n  for (size_t i = 0; i < d_data.size(); ++i) {\n    d_data[i] = i * 2.0;\n  }\n\n  Json column { Object() };\n\n  std::vector<Json> j_shape {Json(Integer(static_cast<Integer::Int>(kRows)))};\n  column[\"shape\"] = Array(j_shape);\n  column[\"strides\"] = Array(std::vector<Json>{Json(Integer{static_cast<Integer::Int>(sizeof(T))})});\n  column[\"version\"] = 3;\n  column[\"typestr\"] = String(typestr);\n\n  auto p_d_data = d_data.data().get();\n  std::vector<Json> j_data{Json(Integer{reinterpret_cast<Integer::Int>(p_d_data)}),\n                           Json(Boolean(false))};\n  column[\"data\"] = j_data;\n  column[\"stream\"] = nullptr;\n  Json array(std::vector<Json>{column});\n\n  std::string str;\n  Json::Dump(array, &str);\n\n  return str;\n}\n\nTEST(MetaInfo, FromInterface) {\n  cudaSetDevice(0);\n  Context ctx;\n  thrust::device_vector<float> d_data;\n\n  std::string str = PrepareData<float>(\"<f4\", &d_data);\n\n  MetaInfo info;\n  info.SetInfo(ctx, \"label\", str.c_str());\n\n  auto const& h_label = info.labels.HostView();\n  ASSERT_EQ(h_label.Size(), d_data.size());\n  for (size_t i = 0; i < d_data.size(); ++i) {\n    ASSERT_EQ(h_label(i), d_data[i]);\n  }\n\n  info.SetInfo(ctx, \"weight\", str.c_str());\n  auto const& h_weight = info.weights_.HostVector();\n  for (size_t i = 0; i < d_data.size(); ++i) {\n    ASSERT_EQ(h_weight[i], d_data[i]);\n  }\n\n  info.SetInfo(ctx, \"base_margin\", str.c_str());\n  auto const h_base_margin = info.base_margin_.View(DeviceOrd::CPU());\n  ASSERT_EQ(h_base_margin.Size(), d_data.size());\n  for (size_t i = 0; i < d_data.size(); ++i) {\n    ASSERT_EQ(h_base_margin(i), d_data[i]);\n  }\n\n  thrust::device_vector<int> d_group_data;\n  std::string group_str = PrepareData<int>(\"<i4\", &d_group_data, 4);\n  d_group_data[0] = 4;\n  d_group_data[1] = 3;\n  d_group_data[2] = 2;\n  d_group_data[3] = 1;\n  info.SetInfo(ctx, \"group\", group_str.c_str());\n  std::vector<bst_group_t> expected_group_ptr = {0, 4, 7, 9, 10};\n  EXPECT_EQ(info.group_ptr_, expected_group_ptr);\n}\n\nTEST(MetaInfo, GPUStridedData) {\n  TestMetaInfoStridedData(DeviceOrd::CUDA(0));\n}\n\nTEST(MetaInfo, Group) {\n  cudaSetDevice(0);\n  MetaInfo info;\n  Context ctx;\n\n  thrust::device_vector<uint32_t> d_uint;\n  std::string uint_str = PrepareData<uint32_t>(\"<u4\", &d_uint);\n  info.SetInfo(ctx, \"group\", uint_str.c_str());\n  auto& h_group = info.group_ptr_;\n  ASSERT_EQ(h_group.size(), d_uint.size() + 1);\n  for (size_t i = 1; i < h_group.size(); ++i) {\n    ASSERT_EQ(h_group[i], d_uint[i - 1] + h_group[i - 1]) << \"i: \" << i;\n  }\n\n  thrust::device_vector<int64_t> d_int64;\n  std::string int_str = PrepareData<int64_t>(\"<i8\", &d_int64);\n  info = MetaInfo();\n  info.SetInfo(ctx, \"group\", int_str.c_str());\n  h_group = info.group_ptr_;\n  ASSERT_EQ(h_group.size(), d_uint.size() + 1);\n  for (size_t i = 1; i < h_group.size(); ++i) {\n    ASSERT_EQ(h_group[i], d_uint[i - 1] + h_group[i - 1]) << \"i: \" << i;\n  }\n\n  // Incorrect type\n  thrust::device_vector<float> d_float;\n  std::string float_str = PrepareData<float>(\"<f4\", &d_float);\n  info = MetaInfo();\n  EXPECT_ANY_THROW(info.SetInfo(ctx, \"group\", float_str.c_str()));\n}\n\nTEST(MetaInfo, GPUQid) {\n  xgboost::MetaInfo info;\n  Context ctx;\n  info.num_row_ = 100;\n  thrust::device_vector<uint32_t> qid(info.num_row_, 0);\n  for (size_t i = 0; i < qid.size(); ++i) {\n    qid[i] = i;\n  }\n  auto column = Generate2dArrayInterface(info.num_row_, 1, \"<u4\", &qid);\n  Json array{std::vector<Json>{column}};\n  std::string array_str;\n  Json::Dump(array, &array_str);\n  info.SetInfo(ctx, \"qid\", array_str.c_str());\n  ASSERT_EQ(info.group_ptr_.size(), info.num_row_ + 1);\n  ASSERT_EQ(info.group_ptr_.front(), 0);\n  ASSERT_EQ(info.group_ptr_.back(), info.num_row_);\n\n  for (size_t i = 0; i < info.num_row_ + 1; ++i) {\n    ASSERT_EQ(info.group_ptr_[i], i);\n  }\n}\n\n\nTEST(MetaInfo, DeviceExtend) {\n  dh::safe_cuda(cudaSetDevice(0));\n  size_t const kRows = 100;\n  MetaInfo lhs, rhs;\n  Context ctx;\n\n  thrust::device_vector<float> d_data;\n  std::string str = PrepareData<float>(\"<f4\", &d_data, kRows);\n  lhs.SetInfo(ctx, \"label\", str.c_str());\n  rhs.SetInfo(ctx, \"label\", str.c_str());\n  ASSERT_FALSE(rhs.labels.Data()->HostCanRead());\n  lhs.num_row_ = kRows;\n  rhs.num_row_ = kRows;\n\n  lhs.Extend(rhs, true, true);\n  ASSERT_EQ(lhs.num_row_, kRows * 2);\n  ASSERT_FALSE(lhs.labels.Data()->HostCanRead());\n\n  ASSERT_FALSE(lhs.labels.Data()->HostCanRead());\n  ASSERT_FALSE(rhs.labels.Data()->HostCanRead());\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/data/test_metainfo.h",
    "content": "/**\n * Copyright 2021-2024, XGBoost Contributors\n */\n#ifndef XGBOOST_TESTS_CPP_DATA_TEST_METAINFO_H_\n#define XGBOOST_TESTS_CPP_DATA_TEST_METAINFO_H_\n#include <gtest/gtest.h>\n#include <xgboost/data.h>\n#include <xgboost/host_device_vector.h>\n#include <xgboost/linalg.h>\n\n#include <numeric>\n\n#include \"../../../src/common/linalg_op.h\"\n\nnamespace xgboost {\ninline void TestMetaInfoStridedData(DeviceOrd device) {\n  MetaInfo info;\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", device.Name()}});\n  {\n    // labels\n    linalg::Tensor<float, 3> labels;\n    labels.Reshape(4, 2, 3);\n    auto& h_label = labels.Data()->HostVector();\n    std::iota(h_label.begin(), h_label.end(), 0.0);\n    auto t_labels = labels.View(device).Slice(linalg::All(), 0, linalg::All());\n    ASSERT_EQ(t_labels.Shape().size(), 2);\n\n    info.SetInfo(ctx, \"label\", StringView{ArrayInterfaceStr(t_labels)});\n    auto const& h_result = info.labels.View(DeviceOrd::CPU());\n    ASSERT_EQ(h_result.Shape().size(), 2);\n    auto in_labels = labels.View(DeviceOrd::CPU());\n    linalg::cpu_impl::ElementWiseKernel(h_result, omp_get_max_threads(),\n                                        [&](size_t i, std::size_t j) {\n                                          // Sliced at second dimension.\n                                          auto v_0 = h_result(i, j);\n                                          auto v_1 = in_labels(i, 0, j);\n                                          CHECK_EQ(v_0, v_1);\n                                        });\n  }\n  {\n    // qid\n    linalg::Tensor<uint64_t, 2> qid;\n    qid.Reshape(32, 2);\n    auto& h_qid = qid.Data()->HostVector();\n    std::iota(h_qid.begin(), h_qid.end(), 0);\n    auto s = qid.View(device).Slice(linalg::All(), 0);\n    auto str = ArrayInterfaceStr(s);\n    info.SetInfo(ctx, \"qid\", StringView{str});\n    auto const& h_result = info.group_ptr_;\n    ASSERT_EQ(h_result.size(), s.Size() + 1);\n  }\n  {\n    // base margin\n    linalg::Tensor<float, 3> base_margin;\n    base_margin.Reshape(4, 2, 3);\n    auto& h_margin = base_margin.Data()->HostVector();\n    std::iota(h_margin.begin(), h_margin.end(), 0.0);\n    auto t_margin = base_margin.View(device).Slice(linalg::All(), 0, linalg::All());\n    ASSERT_EQ(t_margin.Shape().size(), 2);\n\n    info.SetInfo(ctx, \"base_margin\", StringView{ArrayInterfaceStr(t_margin)});\n    auto const& h_result = info.base_margin_.View(DeviceOrd::CPU());\n    ASSERT_EQ(h_result.Shape().size(), 2);\n    auto in_margin = base_margin.View(DeviceOrd::CPU());\n    linalg::cpu_impl::ElementWiseKernel(h_result, omp_get_max_threads(),\n                                        [&](std::size_t i, std::size_t j) {\n                                          // Sliced at second dimension.\n                                          auto v_0 = h_result(i, j);\n                                          auto v_1 = in_margin(i, 0, j);\n                                          CHECK_EQ(v_0, v_1);\n                                        });\n  }\n}\n}  // namespace xgboost\n#endif  // XGBOOST_TESTS_CPP_DATA_TEST_METAINFO_H_\n"
  },
  {
    "path": "tests/cpp/data/test_proxy_dmatrix.cc",
    "content": "/**\n * Copyright 2021-2025, XGBoost contributors\n */\n#include <gtest/gtest.h>\n\n#include <cstddef>  // for size_t\n#include <vector>   // for vector\n\n#include \"../../../src/data/proxy_dmatrix.h\"\n#include \"../helpers.h\"\n#include \"xgboost/host_device_vector.h\"  // for HostDeviceVector\n\nnamespace xgboost::data {\nTEST(ProxyDMatrix, HostData) {\n  DMatrixProxy proxy;\n  std::size_t constexpr kRows = 100, kCols = 10;\n  std::vector<HostDeviceVector<float>> label_storage(1);\n\n  HostDeviceVector<float> storage;\n  auto data =\n      RandomDataGenerator(kRows, kCols, 0.5).Device(FstCU()).GenerateArrayInterface(&storage);\n\n  proxy.SetArray(data.c_str());\n  using cpu_impl::DispatchAny;\n\n  auto n_samples = DispatchAny(&proxy, [](auto const &value) { return value.Size(); });\n  ASSERT_EQ(n_samples, kRows);\n  auto n_features = DispatchAny(&proxy, [](auto const &value) { return value.NumCols(); });\n  ASSERT_EQ(n_features, kCols);\n}\n}  // namespace xgboost::data\n"
  },
  {
    "path": "tests/cpp/data/test_proxy_dmatrix.cu",
    "content": "/**\n * Copyright 2020-2025, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/host_device_vector.h>\n\n#include <any>     // for any_cast\n#include <memory>  // for shared_ptr\n#include <vector>  // for vector\n\n#include \"../../../src/data/device_adapter.cuh\"\n#include \"../../../src/data/proxy_dmatrix.h\"\n#include \"../helpers.h\"\n#include \"xgboost/host_device_vector.h\"  // for HostDeviceVector\n\nnamespace xgboost::data {\nTEST(ProxyDMatrix, DeviceData) {\n  constexpr size_t kRows{100}, kCols{100};\n  HostDeviceVector<float> storage;\n  auto data =\n      RandomDataGenerator(kRows, kCols, 0.5).Device(FstCU()).GenerateArrayInterface(&storage);\n  std::vector<HostDeviceVector<float>> label_storage(1);\n  auto labels = RandomDataGenerator(kRows, 1, 0)\n                    .Device(FstCU())\n                    .GenerateColumnarArrayInterface(&label_storage);\n\n  DMatrixProxy proxy;\n  proxy.SetCudaArray(data.c_str());\n  proxy.SetInfo(\"label\", labels.c_str());\n\n  ASSERT_EQ(proxy.Adapter().type(), typeid(std::shared_ptr<CupyAdapter>));\n  ASSERT_EQ(proxy.Info().labels.Size(), kRows);\n  ASSERT_EQ(std::any_cast<std::shared_ptr<CupyAdapter>>(proxy.Adapter())->NumRows(), kRows);\n  ASSERT_EQ(std::any_cast<std::shared_ptr<CupyAdapter>>(proxy.Adapter())->NumColumns(), kCols);\n\n  std::vector<HostDeviceVector<float>> columnar_storage(kCols);\n  data = RandomDataGenerator(kRows, kCols, 0)\n             .Device(FstCU())\n             .GenerateColumnarArrayInterface(&columnar_storage);\n  proxy.SetCudaColumnar(data.c_str());\n  ASSERT_EQ(proxy.Adapter().type(), typeid(std::shared_ptr<CudfAdapter>));\n  ASSERT_EQ(std::any_cast<std::shared_ptr<CudfAdapter>>(proxy.Adapter())->NumRows(), kRows);\n  ASSERT_EQ(std::any_cast<std::shared_ptr<CudfAdapter>>(proxy.Adapter())->NumColumns(), kCols);\n}\n}  // namespace xgboost::data\n"
  },
  {
    "path": "tests/cpp/data/test_simple_dmatrix.cc",
    "content": "/**\n * Copyright 2016-2025, XGBoost Contributors\n */\n#include <xgboost/data.h>\n\n#include <array>   // std::array\n#include <limits>  // std::numeric_limits\n#include <memory>  // std::unique_ptr\n\n#include \"../../../src/data/adapter.h\"         // ArrayAdapter\n#include \"../../../src/data/simple_dmatrix.h\"  // SimpleDMatrix\n#include \"../collective/test_worker.h\"         // for TestDistributedGlobal\n#include \"../filesystem.h\"                     // for TemporaryDirectory\n#include \"../helpers.h\"                        // RandomDataGenerator,CreateSimpleTestData\n#include \"xgboost/base.h\"\n#include \"xgboost/host_device_vector.h\"  // HostDeviceVector\n#include \"xgboost/string_view.h\"         // StringView\n\nusing namespace xgboost;  // NOLINT\n\nnamespace {\nstd::string UriSVM(std::string name) { return name + \"?format=libsvm\"; }\n}  // namespace\n\nTEST(SimpleDMatrix, MetaInfo) {\n  common::TemporaryDirectory tempdir;\n  const std::string tmp_file = tempdir.Str() + \"/simple.libsvm\";\n  CreateSimpleTestData(tmp_file);\n  xgboost::DMatrix *dmat = xgboost::DMatrix::Load(UriSVM(tmp_file));\n\n  // Test the metadata that was parsed\n  EXPECT_EQ(dmat->Info().num_row_, 2);\n  EXPECT_EQ(dmat->Info().num_col_, 5);\n  EXPECT_EQ(dmat->Info().num_nonzero_, 6);\n  EXPECT_EQ(dmat->Info().labels.Size(), dmat->Info().num_row_);\n  EXPECT_EQ(dmat->Info().data_split_mode, DataSplitMode::kRow);\n\n  delete dmat;\n}\n\nTEST(SimpleDMatrix, RowAccess) {\n  common::TemporaryDirectory tempdir;\n  const std::string tmp_file = tempdir.Str() + \"/simple.libsvm\";\n  CreateSimpleTestData(tmp_file);\n  xgboost::DMatrix *dmat = xgboost::DMatrix::Load(UriSVM(tmp_file), false);\n\n  // Loop over the batches and count the records\n  int64_t row_count = 0;\n  for (auto &batch : dmat->GetBatches<xgboost::SparsePage>()) {\n    row_count += batch.Size();\n  }\n  EXPECT_EQ(row_count, dmat->Info().num_row_);\n  // Test the data read into the first row\n  auto &batch = *dmat->GetBatches<xgboost::SparsePage>().begin();\n  auto page = batch.GetView();\n  auto first_row = page[0];\n  ASSERT_EQ(first_row.size(), 3);\n  EXPECT_EQ(first_row[2].index, 2);\n  EXPECT_EQ(first_row[2].fvalue, 20);\n\n  delete dmat;\n}\n\nTEST(SimpleDMatrix, ColAccessWithoutBatches) {\n  Context ctx;\n  common::TemporaryDirectory tempdir;\n  const std::string tmp_file = tempdir.Str() + \"/simple.libsvm\";\n  CreateSimpleTestData(tmp_file);\n  xgboost::DMatrix *dmat = xgboost::DMatrix::Load(UriSVM(tmp_file));\n\n  ASSERT_TRUE(dmat->SingleColBlock());\n\n  // Loop over the batches and assert the data is as expected\n  int64_t num_col_batch = 0;\n  for (const auto &batch : dmat->GetBatches<xgboost::SortedCSCPage>(&ctx)) {\n    num_col_batch += 1;\n    EXPECT_EQ(batch.Size(), dmat->Info().num_col_)\n        << \"Expected batch size = number of cells as #batches is 1.\";\n  }\n  EXPECT_EQ(num_col_batch, 1) << \"Expected number of batches to be 1\";\n  delete dmat;\n}\n\nTEST(SimpleDMatrix, Empty) {\n  HostDeviceVector<float> data{};\n  HostDeviceVector<unsigned> feature_idx{};\n  HostDeviceVector<size_t> row_ptr{};\n\n  auto j_data = Json::Dump(GetArrayInterface(&data, 0, 1));\n  auto j_feature_idx = Json::Dump(GetArrayInterface(&feature_idx, 0, 1));\n  auto j_row_ptr = Json::Dump(GetArrayInterface(&row_ptr, 0, 1));\n\n  data::CSRArrayAdapter csr_adapter(j_row_ptr, j_feature_idx, j_data, 0);\n  std::unique_ptr<data::SimpleDMatrix> dmat(\n      new data::SimpleDMatrix(&csr_adapter, std::numeric_limits<float>::quiet_NaN(), 1));\n  CHECK_EQ(dmat->Info().num_nonzero_, 0);\n  CHECK_EQ(dmat->Info().num_row_, 0);\n  CHECK_EQ(dmat->Info().num_col_, 0);\n  for (auto &batch : dmat->GetBatches<SparsePage>()) {\n    CHECK_EQ(batch.Size(), 0);\n  }\n\n  data::DenseAdapter dense_adapter(nullptr, 0, 0);\n  dmat.reset(new data::SimpleDMatrix(&dense_adapter, std::numeric_limits<float>::quiet_NaN(), 1));\n  CHECK_EQ(dmat->Info().num_nonzero_, 0);\n  CHECK_EQ(dmat->Info().num_row_, 0);\n  CHECK_EQ(dmat->Info().num_col_, 0);\n  for (auto &batch : dmat->GetBatches<SparsePage>()) {\n    CHECK_EQ(batch.Size(), 0);\n  }\n\n  data::CSCArrayAdapter csc_adapter(j_row_ptr, j_feature_idx, j_data, 0);\n  dmat.reset(new data::SimpleDMatrix(&csc_adapter, std::numeric_limits<float>::quiet_NaN(), 1));\n  CHECK_EQ(dmat->Info().num_nonzero_, 0);\n  CHECK_EQ(dmat->Info().num_row_, 0);\n  CHECK_EQ(dmat->Info().num_col_, 0);\n  for (auto &batch : dmat->GetBatches<SparsePage>()) {\n    CHECK_EQ(batch.Size(), 0);\n  }\n}\n\nTEST(SimpleDMatrix, MissingData) {\n  HostDeviceVector<float> data{0.0, std::nanf(\"\"), 1.0};\n  HostDeviceVector<unsigned> feature_idx = {0, 1, 0};\n  HostDeviceVector<size_t> row_ptr = {0, 2, 3};\n\n  auto j_data = Json::Dump(GetArrayInterface(&data, 3, 1));\n  auto j_feature_idx = Json::Dump(GetArrayInterface(&feature_idx, 3, 1));\n  auto j_row_ptr = Json::Dump(GetArrayInterface(&row_ptr, 3, 1));\n\n  data::CSRArrayAdapter adapter{j_row_ptr, j_feature_idx, j_data, 2ul};\n  std::unique_ptr<data::SimpleDMatrix> dmat{\n      new data::SimpleDMatrix{&adapter, std::numeric_limits<float>::quiet_NaN(), 1}};\n  CHECK_EQ(dmat->Info().num_nonzero_, 2);\n  dmat.reset(new data::SimpleDMatrix(&adapter, 1.0, 1));\n  CHECK_EQ(dmat->Info().num_nonzero_, 1);\n\n  {\n    data.HostVector()[1] = std::numeric_limits<float>::infinity();\n    data::DenseAdapter adapter(data.ConstHostPointer(), data.Size(), 1);\n    EXPECT_THROW(data::SimpleDMatrix dmat(&adapter, std::numeric_limits<float>::quiet_NaN(), -1),\n                 dmlc::Error);\n  }\n}\n\nTEST(SimpleDMatrix, EmptyRow) {\n  HostDeviceVector<float> data{0.0, 1.0};\n  HostDeviceVector<unsigned> feature_idx{0, 1};\n  HostDeviceVector<size_t> row_ptr{0, 2, 2};\n\n  auto j_data = Json::Dump(GetArrayInterface(&data, 2, 1));\n  auto j_feature_idx = Json::Dump(GetArrayInterface(&feature_idx, 2, 1));\n  auto j_row_ptr = Json::Dump(GetArrayInterface(&row_ptr, 3, 1));\n\n  data::CSRArrayAdapter adapter{j_row_ptr, j_feature_idx, j_data, 2};\n  data::SimpleDMatrix dmat(&adapter, std::numeric_limits<float>::quiet_NaN(), 1);\n  CHECK_EQ(dmat.Info().num_nonzero_, 2);\n  CHECK_EQ(dmat.Info().num_row_, 2);\n  CHECK_EQ(dmat.Info().num_col_, 2);\n}\n\nTEST(SimpleDMatrix, FromDense) {\n  int m = 3;\n  int n = 2;\n  std::vector<float> data = {1, 2, 3, 4, 5, 6};\n  data::DenseAdapter adapter(data.data(), m, n);\n  data::SimpleDMatrix dmat(&adapter, std::numeric_limits<float>::quiet_NaN(),\n                           -1);\n  EXPECT_EQ(dmat.Info().num_col_, 2);\n  EXPECT_EQ(dmat.Info().num_row_, 3);\n  EXPECT_EQ(dmat.Info().num_nonzero_, 6);\n\n  for (auto &batch : dmat.GetBatches<SparsePage>()) {\n    auto page = batch.GetView();\n    for (auto i = 0ull; i < batch.Size(); i++) {\n      auto inst = page[i];\n      for (auto j = 0ull; j < inst.size(); j++) {\n        EXPECT_EQ(inst[j].fvalue, data[i * n + j]);\n        EXPECT_EQ(inst[j].index, j);\n      }\n    }\n  }\n}\n\nTEST(SimpleDMatrix, FromCSC) {\n  HostDeviceVector<float> data{1, 3, 2, 4, 5};\n  HostDeviceVector<unsigned> row_idx{0, 1, 0, 1, 2};\n  HostDeviceVector<size_t> col_ptr{0, 2, 5};\n\n  auto j_data = Json::Dump(GetArrayInterface(&data, data.Size(), 1));\n  auto j_row_idx = Json::Dump(GetArrayInterface(&row_idx, row_idx.Size(), 1));\n  auto j_col_ptr = Json::Dump(GetArrayInterface(&col_ptr, col_ptr.Size(), 1));\n\n  data::CSCArrayAdapter adapter{j_col_ptr, j_row_idx, j_data, 3};\n  data::SimpleDMatrix dmat(&adapter, std::numeric_limits<float>::quiet_NaN(), -1);\n  EXPECT_EQ(dmat.Info().num_col_, 2);\n  EXPECT_EQ(dmat.Info().num_row_, 3);\n  EXPECT_EQ(dmat.Info().num_nonzero_, 5);\n\n  auto &batch = *dmat.GetBatches<SparsePage>().begin();\n  auto page = batch.GetView();\n  auto inst = page[0];\n  EXPECT_EQ(inst[0].fvalue, 1);\n  EXPECT_EQ(inst[0].index, 0);\n  EXPECT_EQ(inst[1].fvalue, 2);\n  EXPECT_EQ(inst[1].index, 1);\n\n  inst = page[1];\n  EXPECT_EQ(inst[0].fvalue, 3);\n  EXPECT_EQ(inst[0].index, 0);\n  EXPECT_EQ(inst[1].fvalue, 4);\n  EXPECT_EQ(inst[1].index, 1);\n\n  inst = page[2];\n  EXPECT_EQ(inst[0].fvalue, 5);\n  EXPECT_EQ(inst[0].index, 1);\n}\n\nTEST(SimpleDMatrix, FromFile) {\n  common::TemporaryDirectory tempdir;\n  std::string filename = tempdir.Str() + \"/test.libsvm\";\n  CreateBigTestData(filename, 3 * 5);\n  // Add an empty row at the end of the matrix\n  {\n    std::ofstream fo(filename, std::ios::app | std::ios::out);\n    fo << \"0\\n\";\n  }\n  constexpr size_t kExpectedNumRow = 6;\n  std::unique_ptr<dmlc::Parser<uint32_t>> parser(\n      dmlc::Parser<uint32_t>::Create(filename.c_str(), 0, 1, \"auto\"));\n\n  auto verify_batch = [kExpectedNumRow](SparsePage const &page) {\n    auto batch = page.GetView();\n    EXPECT_EQ(batch.Size(), kExpectedNumRow);\n    EXPECT_EQ(page.offset.HostVector(),\n              std::vector<bst_idx_t>({0, 3, 6, 9, 12, 15, 15}));\n    EXPECT_EQ(page.base_rowid, 0);\n\n    for (auto i = 0ull; i < batch.Size() - 1; i++) {\n      if (i % 2 == 0) {\n        EXPECT_EQ(batch[i][0].index, 0);\n        EXPECT_EQ(batch[i][1].index, 1);\n        EXPECT_EQ(batch[i][2].index, 2);\n      } else {\n        EXPECT_EQ(batch[i][0].index, 0);\n        EXPECT_EQ(batch[i][1].index, 3);\n        EXPECT_EQ(batch[i][2].index, 4);\n      }\n    }\n  };\n\n  constexpr bst_feature_t kCols = 5;\n  data::FileAdapter adapter(parser.get());\n  data::SimpleDMatrix dmat(&adapter, std::numeric_limits<float>::quiet_NaN(),\n                           1);\n  ASSERT_EQ(dmat.Info().num_col_, kCols);\n\n  for (auto &batch : dmat.GetBatches<SparsePage>()) {\n    verify_batch(batch);\n  }\n}\n\nTEST(SimpleDMatrix, Slice) {\n  size_t constexpr kRows {16};\n  size_t constexpr kCols {8};\n  size_t constexpr kClasses {3};\n  auto p_m = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true);\n  auto& weights = p_m->Info().weights_.HostVector();\n  weights.resize(kRows);\n  std::iota(weights.begin(), weights.end(), 0.0f);\n\n  auto& lower = p_m->Info().labels_lower_bound_.HostVector();\n  auto& upper = p_m->Info().labels_upper_bound_.HostVector();\n  lower.resize(kRows);\n  upper.resize(kRows);\n\n  std::iota(lower.begin(), lower.end(), 0.0f);\n  std::iota(upper.begin(), upper.end(), 1.0f);\n\n  auto& margin = p_m->Info().base_margin_;\n  margin = decltype(p_m->Info().base_margin_){{kRows, kClasses}, DeviceOrd::CPU()};\n\n  std::array<int32_t, 3> ridxs {1, 3, 5};\n  std::unique_ptr<DMatrix> out { p_m->Slice(ridxs) };\n  ASSERT_EQ(out->Info().labels.Size(), ridxs.size());\n  ASSERT_EQ(out->Info().labels_lower_bound_.Size(), ridxs.size());\n  ASSERT_EQ(out->Info().labels_upper_bound_.Size(), ridxs.size());\n  ASSERT_EQ(out->Info().base_margin_.Size(), ridxs.size() * kClasses);\n\n  for (auto const& in_batch : p_m->GetBatches<SparsePage>()) {\n    auto in_page = in_batch.GetView();\n    for (auto const &out_batch : out->GetBatches<SparsePage>()) {\n      auto out_page = out_batch.GetView();\n      for (size_t i = 0; i < ridxs.size(); ++i) {\n        auto ridx = ridxs[i];\n        auto out_inst = out_page[i];\n        auto in_inst = in_page[ridx];\n        ASSERT_EQ(out_inst.size(), in_inst.size()) << i;\n        for (size_t j = 0; j < in_inst.size(); ++j) {\n          ASSERT_EQ(in_inst[j].fvalue, out_inst[j].fvalue);\n          ASSERT_EQ(in_inst[j].index, out_inst[j].index);\n        }\n\n        ASSERT_EQ(p_m->Info().labels_lower_bound_.HostVector().at(ridx),\n                  out->Info().labels_lower_bound_.HostVector().at(i));\n        ASSERT_EQ(p_m->Info().labels_upper_bound_.HostVector().at(ridx),\n                  out->Info().labels_upper_bound_.HostVector().at(i));\n        ASSERT_EQ(p_m->Info().weights_.HostVector().at(ridx),\n                  out->Info().weights_.HostVector().at(i));\n\n        auto out_margin = out->Info().base_margin_.View(DeviceOrd::CPU());\n        auto in_margin = margin.View(DeviceOrd::CPU());\n        for (size_t j = 0; j < kClasses; ++j) {\n          ASSERT_EQ(out_margin(i, j), in_margin(ridx, j));\n        }\n      }\n    }\n  }\n\n  ASSERT_EQ(out->Info().num_col_, out->Info().num_col_);\n  ASSERT_EQ(out->Info().num_row_, ridxs.size());\n  ASSERT_EQ(out->Info().num_nonzero_, ridxs.size() * kCols);  // dense\n\n  {\n    HostDeviceVector<float> data;\n    auto arr_str = RandomDataGenerator{kRows, kCols, 0.0}.GenerateArrayInterface(&data);\n    auto adapter = data::ArrayAdapter{StringView{arr_str}};\n    auto n_threads = 2;\n    std::unique_ptr<DMatrix> p_fmat{\n        DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), n_threads, \"\")};\n    std::unique_ptr<DMatrix> slice{p_fmat->Slice(ridxs)};\n    ASSERT_LE(slice->Ctx()->Threads(), n_threads);\n  }\n}\n\nTEST(SimpleDMatrix, SliceCol) {\n  size_t constexpr kRows {16};\n  size_t constexpr kCols {8};\n  size_t constexpr kClasses {3};\n  auto p_m = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true);\n  auto& weights = p_m->Info().weights_.HostVector();\n  weights.resize(kRows);\n  std::iota(weights.begin(), weights.end(), 0.0f);\n\n  auto& lower = p_m->Info().labels_lower_bound_.HostVector();\n  auto& upper = p_m->Info().labels_upper_bound_.HostVector();\n  lower.resize(kRows);\n  upper.resize(kRows);\n\n  std::iota(lower.begin(), lower.end(), 0.0f);\n  std::iota(upper.begin(), upper.end(), 1.0f);\n\n  auto& margin = p_m->Info().base_margin_;\n  margin = decltype(p_m->Info().base_margin_){{kRows, kClasses}, DeviceOrd::CPU()};\n\n  auto constexpr kSlices {2};\n  auto constexpr kSliceSize {4};\n  for (auto slice = 0; slice < kSlices; slice++) {\n    std::unique_ptr<DMatrix> out { p_m->SliceCol(kSlices, slice) };\n    ASSERT_EQ(out->Info().labels.Size(), kRows);\n    ASSERT_EQ(out->Info().labels_lower_bound_.Size(), kRows);\n    ASSERT_EQ(out->Info().labels_upper_bound_.Size(), kRows);\n    ASSERT_EQ(out->Info().base_margin_.Size(), kRows * kClasses);\n\n    for (auto const &in_batch : p_m->GetBatches<SparsePage>()) {\n      auto in_page = in_batch.GetView();\n      for (auto const &out_batch : out->GetBatches<SparsePage>()) {\n        auto out_page = out_batch.GetView();\n        for (size_t i = 0; i < kRows; ++i) {\n          auto out_inst = out_page[i];\n          auto in_inst = in_page[i];\n          ASSERT_EQ(out_inst.size() * 2, in_inst.size()) << i;\n          for (size_t j = 0; j < kSliceSize; ++j) {\n            auto const slice_start = kSliceSize * slice;\n            ASSERT_EQ(in_inst[slice_start + j].fvalue, out_inst[j].fvalue);\n            ASSERT_EQ(in_inst[slice_start + j].index, out_inst[j].index);\n          }\n\n          ASSERT_EQ(p_m->Info().labels_lower_bound_.HostVector().at(i),\n                    out->Info().labels_lower_bound_.HostVector().at(i));\n          ASSERT_EQ(p_m->Info().labels_upper_bound_.HostVector().at(i),\n                    out->Info().labels_upper_bound_.HostVector().at(i));\n          ASSERT_EQ(p_m->Info().weights_.HostVector().at(i), out->Info().weights_.HostVector().at(i));\n\n          auto out_margin = out->Info().base_margin_.View(DeviceOrd::CPU());\n          auto in_margin = margin.View(DeviceOrd::CPU());\n          for (size_t j = 0; j < kClasses; ++j) {\n            ASSERT_EQ(out_margin(i, j), in_margin(i, j));\n          }\n        }\n      }\n    }\n\n    ASSERT_EQ(out->Info().num_col_, out->Info().num_col_);\n    ASSERT_EQ(out->Info().num_row_, kRows);\n    ASSERT_EQ(out->Info().num_nonzero_, kRows * kSliceSize);  // dense\n    ASSERT_EQ(out->Info().data_split_mode, DataSplitMode::kCol);\n  }\n}\n\nTEST(SimpleDMatrix, SaveLoadBinary) {\n  common::TemporaryDirectory tempdir;\n  const std::string tmp_file = tempdir.Str() + \"/simple.libsvm\";\n  CreateSimpleTestData(tmp_file);\n  xgboost::DMatrix * dmat = xgboost::DMatrix::Load(UriSVM(tmp_file));\n  data::SimpleDMatrix *simple_dmat = dynamic_cast<data::SimpleDMatrix*>(dmat);\n\n  const std::string tmp_binfile = tempdir.Str() + \"/csr_source.binary\";\n  simple_dmat->SaveToLocalFile(tmp_binfile);\n  xgboost::DMatrix * dmat_read = xgboost::DMatrix::Load(tmp_binfile);\n\n  EXPECT_EQ(dmat->Info().num_col_, dmat_read->Info().num_col_);\n  EXPECT_EQ(dmat->Info().num_row_, dmat_read->Info().num_row_);\n  EXPECT_EQ(dmat->Info().num_row_, dmat_read->Info().num_row_);\n\n  // Test we have non-empty batch\n  EXPECT_EQ(dmat->GetBatches<xgboost::SparsePage>().begin().AtEnd(), false);\n\n  auto row_iter = dmat->GetBatches<xgboost::SparsePage>().begin();\n  auto row_iter_read = dmat_read->GetBatches<xgboost::SparsePage>().begin();\n  // Test the data read into the first row\n  auto first_row = (*row_iter).GetView()[0];\n  auto first_row_read = (*row_iter_read).GetView()[0];\n  EXPECT_EQ(first_row.size(), first_row_read.size());\n  EXPECT_EQ(first_row[2].index, first_row_read[2].index);\n  EXPECT_EQ(first_row[2].fvalue, first_row_read[2].fvalue);\n  delete dmat;\n  delete dmat_read;\n}\n\nTEST(SimpleDMatrix, Threads) {\n  size_t constexpr kRows{16};\n  size_t constexpr kCols{8};\n  HostDeviceVector<float> data;\n  auto arr_str = RandomDataGenerator{kRows, kCols, 0.0}.GenerateArrayInterface(&data);\n  auto adapter = data::ArrayAdapter{StringView{arr_str}};\n  std::unique_ptr<DMatrix> p_fmat{\n      DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 0, \"\")};\n  ASSERT_EQ(p_fmat->Ctx()->Threads(), AllThreadsForTest());\n}\n\nnamespace {\nvoid VerifyColumnSplit() {\n  size_t constexpr kRows {16};\n  size_t constexpr kCols {8};\n  auto p_fmat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(false, DataSplitMode::kCol);\n\n  ASSERT_EQ(p_fmat->Info().num_col_, kCols * collective::GetWorldSize());\n  ASSERT_EQ(p_fmat->Info().num_row_, kRows);\n  ASSERT_EQ(p_fmat->Info().data_split_mode, DataSplitMode::kCol);\n}\n}  // anonymous namespace\n\nTEST(SimpleDMatrix, ColumnSplit) {\n  auto constexpr kWorldSize{3};\n  collective::TestDistributedGlobal(kWorldSize, VerifyColumnSplit);\n}\n"
  },
  {
    "path": "tests/cpp/data/test_simple_dmatrix.cu",
    "content": "// Copyright by Contributors\n#include <xgboost/data.h>\n#include \"../../../src/data/simple_dmatrix.h\"\n\n#include <thrust/sequence.h>\n#include \"../../../src/data/device_adapter.cuh\"\n#include \"../helpers.h\"\n#include \"test_array_interface.h\"\n#include \"../../../src/data/array_interface.h\"\n\nusing namespace xgboost;  // NOLINT\n\nTEST(SimpleDMatrix, FromColumnarDenseBasic) {\n  constexpr size_t kRows{16};\n  std::vector<Json> columns;\n  thrust::device_vector<double> d_data_0(kRows);\n  thrust::device_vector<uint32_t> d_data_1(kRows);\n\n  columns.emplace_back(GenerateDenseColumn<double>(\"<f8\", kRows, &d_data_0));\n  columns.emplace_back(GenerateDenseColumn<uint32_t>(\"<u4\", kRows, &d_data_1));\n\n  Json column_arr{columns};\n\n  std::string str;\n  Json::Dump(column_arr, &str);\n\n  data::CudfAdapter adapter(str);\n  data::SimpleDMatrix dmat(&adapter, std::numeric_limits<float>::quiet_NaN(),\n                           -1);\n  EXPECT_EQ(dmat.Info().num_col_, 2);\n  EXPECT_EQ(dmat.Info().num_row_, 16);\n  EXPECT_EQ(dmat.Info().num_nonzero_, 32);\n}\n\nvoid TestDenseColumn(DMatrix* dmat, size_t n_rows, size_t n_cols) {\n  for (auto& batch : dmat->GetBatches<SparsePage>()) {\n    auto page = batch.GetView();\n    for (auto i = 0ull; i < batch.Size(); i++) {\n      auto inst = page[i];\n      for (auto j = 0ull; j < inst.size(); j++) {\n        EXPECT_EQ(inst[j].fvalue, i * 2);\n        EXPECT_EQ(inst[j].index, j);\n      }\n    }\n  }\n  ASSERT_EQ(dmat->Info().num_row_, n_rows);\n  ASSERT_EQ(dmat->Info().num_col_, n_cols);\n}\n\nTEST(SimpleDMatrix, FromColumnarDense) {\n  constexpr size_t kRows{16};\n  constexpr size_t kCols{2};\n  std::vector<Json> columns;\n  thrust::device_vector<float> d_data_0(kRows);\n  thrust::device_vector<int32_t> d_data_1(kRows);\n  columns.emplace_back(GenerateDenseColumn<float>(\"<f4\", kRows, &d_data_0));\n  columns.emplace_back(GenerateDenseColumn<int32_t>(\"<i4\", kRows, &d_data_1));\n\n  Json column_arr{columns};\n\n  std::string str;\n  Json::Dump(column_arr, &str);\n\n  // no missing value\n  {\n    data::CudfAdapter adapter(str);\n    data::SimpleDMatrix dmat(&adapter, std::numeric_limits<float>::quiet_NaN(),\n                             -1);\n    TestDenseColumn(&dmat, kRows, kCols);\n  }\n\n  // with missing value specified\n  {\n    data::CudfAdapter adapter(str);\n    data::SimpleDMatrix dmat(&adapter, 4.0, -1);\n\n    ASSERT_EQ(dmat.Info().num_row_, kRows);\n    ASSERT_EQ(dmat.Info().num_col_, kCols);\n    ASSERT_EQ(dmat.Info().num_nonzero_, kCols * kRows - 2);\n  }\n\n  {\n    // no missing value, but has NaN\n    d_data_0[3] = std::numeric_limits<float>::quiet_NaN();\n    ASSERT_TRUE(std::isnan(d_data_0[3]));  // removes 6.0\n    data::CudfAdapter adapter(str);\n    data::SimpleDMatrix dmat(&adapter, std::numeric_limits<float>::quiet_NaN(),\n                             -1);\n    ASSERT_EQ(dmat.Info().num_nonzero_, kRows * kCols - 1);\n    ASSERT_EQ(dmat.Info().num_row_, kRows);\n    ASSERT_EQ(dmat.Info().num_col_, kCols);\n  }\n}\n\nTEST(SimpleDMatrix, FromColumnarWithEmptyRows) {\n  constexpr size_t kRows = 102;\n  constexpr size_t kCols = 24;\n\n  std::vector<Json> v_columns(kCols);\n  std::vector<dh::device_vector<float>> columns_data(kCols);\n  std::vector<dh::device_vector<RBitField8::value_type>> column_bitfields(\n      kCols);\n\n  RBitField8::value_type constexpr kUCOne = 1;\n\n  for (size_t i = 0; i < kCols; ++i) {\n    auto& col = v_columns[i];\n    col = Object();\n    auto& data = columns_data[i];\n    data.resize(kRows);\n    thrust::sequence(data.begin(), data.end(), 0);\n    dh::safe_cuda(cudaDeviceSynchronize());\n    dh::safe_cuda(cudaGetLastError());\n\n    ASSERT_EQ(data.size(), kRows);\n\n    auto p_d_data = raw_pointer_cast(data.data());\n    std::vector<Json> j_data{\n        Json(Integer(reinterpret_cast<Integer::Int>(p_d_data))),\n        Json(Boolean(false))};\n    col[\"data\"] = j_data;\n    std::vector<Json> j_shape{Json(Integer(static_cast<Integer::Int>(kRows)))};\n    col[\"shape\"] = Array(j_shape);\n    col[\"version\"] = 3;\n    col[\"typestr\"] = String(\"<f4\");\n\n    // Construct the mask object.\n    col[\"mask\"] = Object();\n    auto& j_mask = col[\"mask\"];\n    j_mask[\"version\"] = 3;\n    auto& mask_storage = column_bitfields[i];\n    mask_storage.resize(16);  // 16 bytes\n\n    mask_storage[0] = ~(kUCOne << 2);  // 3^th row is missing\n    mask_storage[1] = ~(kUCOne << 3);  // 12^th row is missing\n    size_t last_ind = 12;\n    mask_storage[last_ind] = ~(kUCOne << 5);\n    std::set<size_t> missing_row_index{0, 1, last_ind};\n\n    for (size_t j = 0; j < mask_storage.size(); ++j) {\n      if (missing_row_index.find(j) == missing_row_index.cend()) {\n        // all other rows are valid\n        mask_storage[j] = ~0;\n      }\n    }\n\n    j_mask[\"data\"] = std::vector<Json>{\n        Json(\n            Integer(reinterpret_cast<Integer::Int>(mask_storage.data().get()))),\n        Json(Boolean(false))};\n    j_mask[\"shape\"] = Array(\n        std::vector<Json>{Json(Integer(static_cast<Integer::Int>(kRows)))});\n    j_mask[\"typestr\"] = String(\"|i1\");\n  }\n\n  Json column_arr{Array(v_columns)};\n  std::string str;\n  Json::Dump(column_arr, &str);\n\n  data::CudfAdapter adapter(str);\n  data::SimpleDMatrix dmat(&adapter, std::numeric_limits<float>::quiet_NaN(),\n                           -1);\n\n  for (auto& batch : dmat.GetBatches<SparsePage>()) {\n    auto page = batch.GetView();\n    for (auto i = 0ull; i < batch.Size(); i++) {\n      auto inst = page[i];\n      for (auto j = 0ull; j < inst.size(); j++) {\n        EXPECT_EQ(inst[j].fvalue, i);\n        EXPECT_EQ(inst[j].index, j);\n      }\n    }\n  }\n  ASSERT_EQ(dmat.Info().num_nonzero_, (kRows - 3) * kCols);\n  ASSERT_EQ(dmat.Info().num_row_, kRows);\n  ASSERT_EQ(dmat.Info().num_col_, kCols);\n}\n\nTEST(SimpleCSRSource, FromColumnarSparse) {\n  constexpr size_t kRows = 32;\n  constexpr size_t kCols = 2;\n  RBitField8::value_type constexpr kUCOne = 1;\n\n  std::vector<dh::device_vector<float>> columns_data(kCols);\n  std::vector<dh::device_vector<RBitField8::value_type>> column_bitfields(kCols);\n\n  {\n    // column 0\n    auto& mask = column_bitfields[0];\n    mask.resize(8);\n\n    for (auto && j : mask) {\n      j = ~0;\n    }\n    // the 2^th entry of first column is invalid\n    // [0 0 0 0 0 1 0 0]\n    mask[0] = ~(kUCOne << 2);\n  }\n  {\n    // column 1\n    auto& mask = column_bitfields[1];\n    mask.resize(8);\n\n    for (auto && j : mask) {\n      j = ~0;\n    }\n    // the 19^th entry of second column is invalid\n    // [~0~], [~0~], [0 0 0 0 1 0 0 0]\n    mask[2] = ~(kUCOne << 3);\n  }\n\n  for (size_t c = 0; c < kCols; ++c) {\n    columns_data[c].resize(kRows);\n    thrust::sequence(columns_data[c].begin(), columns_data[c].end(), 0);\n  }\n\n  std::vector<Json> j_columns(kCols);\n\n  for (size_t c = 0; c < kCols; ++c) {\n    auto& column = j_columns[c];\n    column = Object();\n    column[\"version\"] = 3;\n    column[\"typestr\"] = String(\"<f4\");\n    auto p_d_data = raw_pointer_cast(columns_data[c].data());\n    std::vector<Json> j_data {\n      Json(Integer(reinterpret_cast<Integer::Int>(p_d_data))),\n          Json(Boolean(false))};\n    column[\"data\"] = j_data;\n    std::vector<Json> j_shape {Json(Integer(static_cast<Integer::Int>(kRows)))};\n    column[\"shape\"] = Array(j_shape);\n    column[\"version\"] = 3;\n    column[\"typestr\"] = String(\"<f4\");\n\n    column[\"mask\"] = Object();\n    auto& j_mask = column[\"mask\"];\n    j_mask[\"version\"] = 3;\n    j_mask[\"data\"] = std::vector<Json>{\n      Json(Integer(reinterpret_cast<Integer::Int>(column_bitfields[c].data().get()))),\n      Json(Boolean(false))};\n    j_mask[\"shape\"] = Array(std::vector<Json>{Json(Integer(static_cast<Integer::Int>(kRows)))});\n    j_mask[\"typestr\"] = String(\"|i1\");\n  }\n\n  Json column_arr {Array(j_columns)};\n\n  std::string str;\n  Json::Dump(column_arr, &str);\n\n  {\n    data::CudfAdapter adapter(str);\n    data::SimpleDMatrix dmat(&adapter, std::numeric_limits<float>::quiet_NaN(), -1);\n\n    ASSERT_EQ(dmat.Info().num_row_, kRows);\n    ASSERT_EQ(dmat.Info().num_nonzero_, (kRows*kCols)-2);\n  }\n\n  {\n    data::CudfAdapter adapter(str);\n    data::SimpleDMatrix dmat(&adapter, 2.0, -1);\n    for (auto& batch : dmat.GetBatches<SparsePage>()) {\n      auto page = batch.GetView();\n      for (auto i = 0ull; i < batch.Size(); i++) {\n        auto inst = page[i];\n        for (auto e : inst) {\n          ASSERT_NE(e.fvalue, 2.0);\n        }\n      }\n    }\n  }\n\n  {\n    // no missing value, but has NaN\n    data::CudfAdapter adapter(str);\n    columns_data[0][4] = std::numeric_limits<float>::quiet_NaN();  // 0^th column 4^th row\n    data::SimpleDMatrix dmat(&adapter, std::numeric_limits<float>::quiet_NaN(),\n                             -1);\n    ASSERT_TRUE(std::isnan(columns_data[0][4]));\n\n    // Two invalid entries and one NaN, in CSC\n    // 0^th column: 0, 1, 4, 5, 6, ..., kRows\n    // 1^th column: 0, 1, 2, 3, ..., 19, 21, ..., kRows\n    ASSERT_EQ(dmat.Info().num_nonzero_, kRows * kCols - 3);\n  }\n}\n\n\nTEST(SimpleDMatrix, FromColumnarSparseBasic) {\n  constexpr size_t kRows{16};\n  std::vector<Json> columns;\n  thrust::device_vector<double> d_data_0(kRows);\n  thrust::device_vector<uint32_t> d_data_1(kRows);\n\n  columns.emplace_back(GenerateSparseColumn<double>(\"<f8\", kRows, &d_data_0));\n  columns.emplace_back(GenerateSparseColumn<uint32_t>(\"<u4\", kRows, &d_data_1));\n\n  Json column_arr{columns};\n\n  std::string str;\n  Json::Dump(column_arr, &str);\n\n  data::CudfAdapter adapter(str);\n  data::SimpleDMatrix dmat(&adapter, std::numeric_limits<float>::quiet_NaN(),\n                           -1);\n  EXPECT_EQ(dmat.Info().num_col_, 2);\n  EXPECT_EQ(dmat.Info().num_row_, 16);\n  EXPECT_EQ(dmat.Info().num_nonzero_, 32);\n\n  for (auto& batch : dmat.GetBatches<SparsePage>()) {\n    auto page = batch.GetView();\n    for (auto i = 0ull; i < batch.Size(); i++) {\n      auto inst = page[i];\n      for (auto j = 0ull; j < inst.size(); j++) {\n        EXPECT_EQ(inst[j].fvalue, i * 2);\n        EXPECT_EQ(inst[j].index, j);\n      }\n    }\n  }\n}\n\n\nTEST(SimpleDMatrix, FromCupy){\n  int rows = 50;\n  int cols = 10;\n  thrust::device_vector< float> data(rows*cols);\n  auto json_array_interface = Generate2dArrayInterface(rows, cols, \"<f4\", &data);\n  std::string str;\n  Json::Dump(json_array_interface, &str);\n  data::CupyAdapter adapter(str);\n  data::SimpleDMatrix dmat(&adapter, -1, 1);\n  EXPECT_EQ(dmat.Info().num_col_, cols);\n  EXPECT_EQ(dmat.Info().num_row_, rows);\n  EXPECT_EQ(dmat.Info().num_nonzero_, rows*cols);\n\n  for (auto& batch : dmat.GetBatches<SparsePage>()) {\n    auto page = batch.GetView();\n    for (auto i = 0ull; i < batch.Size(); i++) {\n      auto inst = page[i];\n      for (auto j = 0ull; j < inst.size(); j++) {\n        EXPECT_EQ(inst[j].fvalue, i * cols + j);\n        EXPECT_EQ(inst[j].index, j);\n      }\n    }\n  }\n}\n\nTEST(SimpleDMatrix, FromCupySparse){\n  int rows = 2;\n  int cols = 2;\n  thrust::device_vector< float> data(rows*cols);\n  auto json_array_interface = Generate2dArrayInterface(rows, cols, \"<f4\", &data);\n  data[1] = std::numeric_limits<float>::quiet_NaN();\n  data[2] = std::numeric_limits<float>::quiet_NaN();\n  std::string str;\n  Json::Dump(json_array_interface, &str);\n  data::CupyAdapter adapter(str);\n  data::SimpleDMatrix dmat(&adapter, -1, 1);\n  EXPECT_EQ(dmat.Info().num_col_, cols);\n  EXPECT_EQ(dmat.Info().num_row_, rows);\n  EXPECT_EQ(dmat.Info().num_nonzero_, rows * cols - 2);\n  auto& batch = *dmat.GetBatches<SparsePage>().begin();\n  auto page = batch.GetView();\n\n  EXPECT_EQ(page[0].size(), 1);\n  EXPECT_EQ(page[1].size(), 1);\n  EXPECT_EQ(page[0][0].fvalue, 0.0f);\n  EXPECT_EQ(page[0][0].index, 0);\n  EXPECT_EQ(page[1][0].fvalue, 3.0f);\n  EXPECT_EQ(page[1][0].index, 1);\n}\n"
  },
  {
    "path": "tests/cpp/data/test_sparse_page_dmatrix.cc",
    "content": "/**\n * Copyright 2016-2025, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/data.h>\n#include <xgboost/host_device_vector.h>  // for HostDeviceVector\n\n#include <filesystem>  // for path\n#include <future>      // for future, async\n#include <thread>      // for sleep_for\n\n#include \"../../../src/common/io.h\"\n#include \"../../../src/data/batch_utils.h\"  // for MatchingPageBytes\n#include \"../../../src/data/sparse_page_dmatrix.h\"\n#include \"../../../src/tree/param.h\"  // for TrainParam\n#include \"../filesystem.h\"            // for TemporaryDirectory\n#include \"../helpers.h\"\n\nusing namespace xgboost;  // NOLINT\ntemplate <typename Page>\nvoid TestSparseDMatrixLoad(Context const *ctx) {\n  auto m = RandomDataGenerator{1024, 5, 0.0}.Batches(4).GenerateSparsePageDMatrix(\"temp\", true);\n\n  auto n_threads = 0;\n  auto config = ExtMemConfig{\"temp\",\n                             false,\n                             ::xgboost::cuda_impl::AutoHostRatio(),\n                             cuda_impl::MatchingPageBytes(),\n                             std::numeric_limits<float>::quiet_NaN(),\n                             n_threads};\n  ASSERT_EQ(AllThreadsForTest(), m->Ctx()->Threads());\n  ASSERT_EQ(m->Info().num_col_, 5);\n  ASSERT_EQ(m->Info().num_row_, 1024);\n\n  auto simple = RandomDataGenerator{1024, 5, 0.0}.GenerateDMatrix(true);\n  Page out;\n  for (auto const &page : m->GetBatches<Page>(ctx)) {\n    if (std::is_same_v<Page, SparsePage>) {\n      out.Push(page);\n    } else {\n      out.PushCSC(page);\n    }\n  }\n  ASSERT_EQ(m->Info().num_col_, simple->Info().num_col_);\n  ASSERT_EQ(m->Info().num_row_, simple->Info().num_row_);\n  if (std::is_same_v<Page, SortedCSCPage>) {\n    out.SortRows(ctx->Threads());\n  }\n\n  for (auto const &page : simple->GetBatches<Page>(ctx)) {\n    ASSERT_EQ(page.offset.HostVector(), out.offset.HostVector());\n    for (size_t i = 0; i < page.data.Size(); ++i) {\n      ASSERT_EQ(page.data.HostVector()[i].fvalue, out.data.HostVector()[i].fvalue);\n    }\n  }\n}\n\nTEST(SparsePageDMatrix, Load) {\n  Context ctx;\n  TestSparseDMatrixLoad<SparsePage>(&ctx);\n  TestSparseDMatrixLoad<CSCPage>(&ctx);\n  TestSparseDMatrixLoad<SortedCSCPage>(&ctx);\n}\n\n// allow caller to retain pages so they can process multiple pages at the same time.\ntemplate <typename Page>\nvoid TestRetainPage() {\n  std::size_t n_batches = 4;\n  auto p_fmat = RandomDataGenerator{1024, 128, 0.5f}.Batches(n_batches).GenerateSparsePageDMatrix(\n      \"cache\", true);\n  Context ctx;\n  auto batches = p_fmat->GetBatches<Page>(&ctx);\n  auto begin = batches.begin();\n  auto end = batches.end();\n\n  std::vector<Page> pages;\n  std::vector<std::shared_ptr<Page const>> iterators;\n  for (auto it = begin; it != end; ++it) {\n    iterators.push_back(it.Page());\n    pages.emplace_back(Page{});\n    if (std::is_same_v<Page, SparsePage>) {\n      pages.back().Push(*it);\n    } else {\n      pages.back().PushCSC(*it);\n    }\n    ASSERT_EQ(pages.back().Size(), (*it).Size());\n  }\n  ASSERT_GE(iterators.size(), n_batches);\n\n  for (size_t i = 0; i < iterators.size(); ++i) {\n    ASSERT_EQ((*iterators[i]).Size(), pages.at(i).Size());\n    ASSERT_EQ((*iterators[i]).data.HostVector(), pages.at(i).data.HostVector());\n  }\n\n  // make sure it's const and the caller can not modify the content of page.\n  for (auto &page : p_fmat->GetBatches<Page>({&ctx})) {\n    static_assert(std::is_const_v<std::remove_reference_t<decltype(page)>>);\n  }\n}\n\nTEST(SparsePageDMatrix, RetainSparsePage) {\n  TestRetainPage<SparsePage>();\n  TestRetainPage<CSCPage>();\n  TestRetainPage<SortedCSCPage>();\n}\n\nclass TestGradientIndexExt : public ::testing::TestWithParam<bool> {\n protected:\n  void Run(bool is_dense) {\n    constexpr bst_idx_t kRows = 64;\n    constexpr size_t kCols = 2;\n    float sparsity = is_dense ? 0.0 : 0.4;\n    bst_bin_t n_bins = 16;\n    Context ctx;\n    auto p_ext_fmat =\n        RandomDataGenerator{kRows, kCols, sparsity}.Batches(4).GenerateSparsePageDMatrix(\"temp\",\n                                                                                         true);\n\n    auto cuts = common::SketchOnDMatrix(&ctx, p_ext_fmat.get(), n_bins, false, {});\n    std::vector<std::unique_ptr<GHistIndexMatrix>> pages;\n    for (auto const &page : p_ext_fmat->GetBatches<SparsePage>()) {\n      pages.emplace_back(std::make_unique<GHistIndexMatrix>(\n          &ctx, page, common::Span<FeatureType const>{}, cuts, n_bins, is_dense, 0.8));\n    }\n    std::int32_t k = 0;\n    for (auto const &page : p_ext_fmat->GetBatches<GHistIndexMatrix>(\n             &ctx, BatchParam{n_bins, tree::TrainParam::DftSparseThreshold()})) {\n      auto const &from_sparse = pages[k];\n      ASSERT_TRUE(std::equal(page.index.begin(), page.index.end(), from_sparse->index.begin()));\n      if (is_dense) {\n        ASSERT_TRUE(std::equal(page.index.Offset(), page.index.Offset() + kCols,\n                               from_sparse->index.Offset()));\n      } else {\n        ASSERT_FALSE(page.index.Offset());\n        ASSERT_FALSE(from_sparse->index.Offset());\n      }\n      ASSERT_TRUE(\n          std::equal(page.row_ptr.cbegin(), page.row_ptr.cend(), from_sparse->row_ptr.cbegin()));\n      ++k;\n    }\n  }\n};\n\nTEST_P(TestGradientIndexExt, Basic) { this->Run(this->GetParam()); }\n\nINSTANTIATE_TEST_SUITE_P(SparsePageDMatrix, TestGradientIndexExt, testing::Bool());\n\n// Test GHistIndexMatrix can avoid loading sparse page after the initialization.\nTEST(SparsePageDMatrix, GHistIndexSkipSparsePage) {\n  common::TemporaryDirectory tmpdir;\n  std::size_t n_batches = 6;\n  auto Xy = RandomDataGenerator{180, 12, 0.0}.Batches(n_batches).GenerateSparsePageDMatrix(\n      tmpdir.Str() + \"/\", true);\n  Context ctx;\n  bst_bin_t n_bins{256};\n  double sparse_thresh{0.8};\n  BatchParam batch_param{n_bins, sparse_thresh};\n\n  auto check_ghist = [&] {\n    std::int32_t k = 0;\n    for (auto const &page : Xy->GetBatches<GHistIndexMatrix>(&ctx, batch_param)) {\n      ASSERT_EQ(page.Size(), 30);\n      ASSERT_EQ(k, page.base_rowid);\n      k += page.Size();\n    }\n  };\n  check_ghist();\n\n  auto casted = std::dynamic_pointer_cast<data::SparsePageDMatrix>(Xy);\n  CHECK(casted);\n  // Make the number of fetches don't change (no new fetch)\n  auto n_init_fetches = casted->SparsePageFetchCount();\n\n  std::vector<float> hess(Xy->Info().num_row_, 1.0f);\n  // Run multiple iterations to make sure fetches are consistent after reset.\n  for (std::int32_t i = 0; i < 4; ++i) {\n    auto n_fetches = casted->SparsePageFetchCount();\n    check_ghist();\n    ASSERT_EQ(casted->SparsePageFetchCount(), n_fetches);\n    if (i == 0) {\n      ASSERT_EQ(n_fetches, n_init_fetches);\n    }\n    // Make sure other page types don't interfere the GHist. This way, we can reuse the\n    // DMatrix for multiple purposes.\n    for ([[maybe_unused]] auto const &page : Xy->GetBatches<SparsePage>(&ctx)) {\n    }\n    for ([[maybe_unused]] auto const &page : Xy->GetBatches<SortedCSCPage>(&ctx)) {\n    }\n    for ([[maybe_unused]] auto const &page : Xy->GetBatches<GHistIndexMatrix>(&ctx, batch_param)) {\n    }\n    // Approx tree method pages\n    {\n      BatchParam regen{n_bins, common::Span{hess.data(), hess.size()}, false};\n      for ([[maybe_unused]] auto const &page : Xy->GetBatches<GHistIndexMatrix>(&ctx, regen)) {\n      }\n    }\n    {\n      BatchParam regen{n_bins, common::Span{hess.data(), hess.size()}, true};\n      for ([[maybe_unused]] auto const &page : Xy->GetBatches<GHistIndexMatrix>(&ctx, regen)) {\n      }\n    }\n    // Restore the batch parameter by passing it in again through check_ghist\n    check_ghist();\n  }\n\n  // half the pages\n  {\n    auto it = Xy->GetBatches<SparsePage>(&ctx).begin();\n    for (std::size_t i = 0; i < n_batches / 2; ++i) {\n      ++it;\n    }\n    check_ghist();\n  }\n  {\n    auto it = Xy->GetBatches<GHistIndexMatrix>(&ctx, batch_param).begin();\n    for (std::size_t i = 0; i < n_batches / 2; ++i) {\n      ++it;\n    }\n    check_ghist();\n  }\n  {\n    BatchParam regen{n_bins, common::Span{hess.data(), hess.size()}, true};\n    auto it = Xy->GetBatches<GHistIndexMatrix>(&ctx, regen).begin();\n    for (std::size_t i = 0; i < n_batches / 2; ++i) {\n      ++it;\n    }\n    check_ghist();\n  }\n}\n\nTEST(SparsePageDMatrix, MetaInfo) {\n  common::TemporaryDirectory tmpdir;\n  auto dmat = RandomDataGenerator{256, 5, 0.0}.Batches(4).GenerateSparsePageDMatrix(\n      tmpdir.Str() + \"/\", true);\n\n  // Test the metadata that was parsed\n  EXPECT_EQ(dmat->Info().num_row_, 256ul);\n  EXPECT_EQ(dmat->Info().num_col_, 5ul);\n  EXPECT_EQ(dmat->Info().num_nonzero_, dmat->Info().num_col_ * dmat->Info().num_row_);\n  EXPECT_EQ(dmat->Info().labels.Size(), dmat->Info().num_row_);\n}\n\nTEST(SparsePageDMatrix, RowAccess) {\n  auto dmat = RandomDataGenerator{12, 6, 0.8f}.Batches(2).GenerateSparsePageDMatrix(\"temp\", false);\n\n  // Test the data read into the first row\n  auto &batch = *dmat->GetBatches<xgboost::SparsePage>().begin();\n  auto page = batch.GetView();\n  auto first_row = page[0];\n  ASSERT_EQ(first_row.size(), 1ul);\n  EXPECT_EQ(first_row[0].index, 5u);\n  EXPECT_NEAR(first_row[0].fvalue, 0.1805125, 1e-4);\n}\n\nTEST(SparsePageDMatrix, ColAccess) {\n  common::TemporaryDirectory tempdir;\n  Context ctx;\n\n  auto nan = std::numeric_limits<float>::quiet_NaN();\n  HostDeviceVector<float> x{\n      0, 10,  20,  nan, nan,  // row-0\n      0, nan, nan, 30,  40    // row-1\n  };\n  auto dmat = GetExternalMemoryDMatrixFromData(x, 2, 5, tempdir, 2);\n\n  // Loop over the batches and assert the data is as expected\n  size_t iter = 0;\n  for (auto const &col_batch : dmat->GetBatches<xgboost::SortedCSCPage>(&ctx)) {\n    auto col_page = col_batch.GetView();\n    ASSERT_EQ(col_page.Size(), dmat->Info().num_col_);\n    if (iter == 1) {\n      ASSERT_EQ(col_page[0][0].fvalue, 0.f);\n      ASSERT_EQ(col_page[3][0].fvalue, 30.f);\n      ASSERT_EQ(col_page[3][0].index, 1);\n      ASSERT_EQ(col_page[3].size(), 1);\n    } else {\n      ASSERT_EQ(col_page[1][0].fvalue, 10.0f);\n      ASSERT_EQ(col_page[1].size(), 1);\n    }\n    ASSERT_LE(col_batch.base_rowid, dmat->Info().num_row_);\n    ++iter;\n  }\n\n  // Loop over the batches and assert the data is as expected\n  iter = 0;\n  for (auto const &col_batch : dmat->GetBatches<xgboost::CSCPage>(&ctx)) {\n    auto col_page = col_batch.GetView();\n    ASSERT_EQ(col_page.Size(), dmat->Info().num_col_);\n    if (iter == 0) {\n      ASSERT_EQ(col_page[1][0].fvalue, 10.0f);\n      ASSERT_EQ(col_page[1].size(), 1);\n    } else {\n      ASSERT_EQ(col_page[3][0].fvalue, 30.f);\n      ASSERT_EQ(col_page[3].size(), 1);\n    }\n    iter++;\n  }\n}\n\nTEST(SparsePageDMatrix, ThreadSafetyException) {\n  Context ctx;\n\n  auto dmat =\n      RandomDataGenerator{4096, 12, 0.0f}.Batches(8).GenerateSparsePageDMatrix(\"temp\", true);\n\n  int threads = 1000;\n\n  std::vector<std::future<void>> waiting;\n\n  std::atomic<bool> exception{false};\n\n  for (int32_t i = 0; i < threads; ++i) {\n    waiting.emplace_back(std::async(std::launch::async, [&]() {\n      try {\n        auto iter = dmat->GetBatches<SparsePage>().begin();\n        ++iter;\n      } catch (...) {\n        exception.store(true);\n      }\n    }));\n  }\n\n  using namespace std::chrono_literals;  // NOLINT\n\n  while (std::any_of(waiting.cbegin(), waiting.cend(),\n                     [](auto const &f) { return f.wait_for(0ms) != std::future_status::ready; })) {\n    std::this_thread::sleep_for(50ms);\n  }\n\n  CHECK(exception);\n}\n\n// Multi-batches access\nTEST(SparsePageDMatrix, ColAccessBatches) {\n  // Create multiple sparse pages\n  auto dmat =\n      RandomDataGenerator{1024, 32, 0.4f}.Batches(3).GenerateSparsePageDMatrix(\"temp\", true);\n  ASSERT_EQ(dmat->Ctx()->Threads(), AllThreadsForTest());\n  Context ctx;\n  for (auto const &page : dmat->GetBatches<xgboost::CSCPage>(&ctx)) {\n    ASSERT_EQ(dmat->Info().num_col_, page.Size());\n  }\n}\n\nauto TestSparsePageDMatrixDeterminism(std::int32_t n_threads) {\n  std::vector<float> sparse_data;\n  std::vector<size_t> sparse_rptr;\n  std::vector<bst_feature_t> sparse_cids;\n\n  common::TemporaryDirectory tmpdir;\n  auto prefix = (tmpdir.Path() / \"temp\").string();\n  auto dmat = RandomDataGenerator{4096, 64, 0.0}.Batches(4).GenerateSparsePageDMatrix(prefix, true);\n\n  auto config = ExtMemConfig{prefix,\n                             false,\n                             ::xgboost::cuda_impl::AutoHostRatio(),\n                             cuda_impl::MatchingPageBytes(),\n                             std::numeric_limits<float>::quiet_NaN(),\n                             n_threads};\n  CHECK(dmat->Ctx()->Threads() == n_threads || dmat->Ctx()->Threads() == AllThreadsForTest());\n\n  DMatrixToCSR(dmat.get(), &sparse_data, &sparse_rptr, &sparse_cids);\n\n  auto cache_name =\n      data::MakeId(prefix, dynamic_cast<data::SparsePageDMatrix *>(dmat.get())) + \".row.page\";\n  auto cache = common::LoadSequentialFile(cache_name);\n  return cache;\n}\n\nTEST(SparsePageDMatrix, Determinism) {\n#if defined(_MSC_VER)\n  return;\n#endif  // defined(_MSC_VER)\n  std::vector<std::vector<char>> caches;\n  for (size_t i = 1; i < 18; i += 2) {\n    caches.emplace_back(TestSparsePageDMatrixDeterminism(i));\n  }\n\n  for (size_t i = 1; i < caches.size(); ++i) {\n    ASSERT_EQ(caches[i], caches.front());\n  }\n}\n"
  },
  {
    "path": "tests/cpp/data/test_sparse_page_dmatrix.cu",
    "content": "/**\n * Copyright 2019-2025, XGBoost Contributors\n */\n#include <xgboost/data.h>  // for DMatrix\n\n#include \"../../../src/common/compressed_iterator.h\"\n#include \"../../../src/data/ellpack_page.cuh\"\n#include \"../../../src/data/ellpack_page.h\"\n#include \"../../../src/data/sparse_page_dmatrix.h\"\n#include \"../../../src/tree/param.h\"  // TrainParam\n#include \"../helpers.h\"\n\nnamespace xgboost {\nTEST(SparsePageDMatrix, EllpackPage) {\n  auto ctx = MakeCUDACtx(0);\n  auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};\n  auto dmat = RandomDataGenerator{512, 12, 0.0}.Batches(4).GenerateSparsePageDMatrix(\"temp\", true);\n\n  // Loop over the batches and assert the data is as expected\n  std::size_t n = 0;\n  for (const auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {\n    n += batch.Size();\n  }\n  EXPECT_EQ(n, dmat->Info().num_row_);\n\n  auto path = data::MakeId(\"temp\", std::dynamic_pointer_cast<data::SparsePageDMatrix>(dmat).get()) +\n              \".row.page\";\n  ASSERT_TRUE(FileExists(path));\n  path = data::MakeId(\"temp\", std::dynamic_pointer_cast<data::SparsePageDMatrix>(dmat).get()) +\n         \".ellpack.page\";\n  ASSERT_TRUE(FileExists(path));\n}\n\nTEST(SparsePageDMatrix, EllpackSkipSparsePage) {\n  // Test Ellpack can avoid loading sparse page after the initialization.\n  std::size_t n_batches = 6;\n  auto Xy =\n      RandomDataGenerator{180, 12, 0.0}.Batches(n_batches).GenerateSparsePageDMatrix(\"temp\", true);\n  auto ctx = MakeCUDACtx(0);\n  auto cpu = ctx.MakeCPU();\n  bst_bin_t n_bins{256};\n  double sparse_thresh{0.8};\n  BatchParam batch_param{n_bins, sparse_thresh};\n\n  auto check_ellpack = [&]() {\n    std::int32_t k = 0;\n    for (auto const& page : Xy->GetBatches<EllpackPage>(&ctx, batch_param)) {\n      auto impl = page.Impl();\n      ASSERT_EQ(page.Size(), 30);\n      ASSERT_EQ(k, impl->base_rowid);\n      k += page.Size();\n    }\n  };\n\n  auto casted = std::dynamic_pointer_cast<data::SparsePageDMatrix>(Xy);\n  CHECK(casted);\n  check_ellpack();\n\n  // Make the number of fetches don't change (no new fetch)\n  auto n_fetches = casted->SparsePageFetchCount();\n  for (std::size_t i = 0; i < 3; ++i) {\n    for ([[maybe_unused]] auto const& page : Xy->GetBatches<EllpackPage>(&ctx, batch_param)) {\n    }\n    auto casted = std::dynamic_pointer_cast<data::SparsePageDMatrix>(Xy);\n    ASSERT_EQ(casted->SparsePageFetchCount(), n_fetches);\n  }\n  check_ellpack();\n\n  dh::device_vector<float> hess(Xy->Info().num_row_, 1.0f);\n  for (std::size_t i = 0; i < 4; ++i) {\n    for ([[maybe_unused]] auto const& page : Xy->GetBatches<SparsePage>(&ctx)) {\n    }\n    for ([[maybe_unused]] auto const& page : Xy->GetBatches<SortedCSCPage>(&cpu)) {\n    }\n    for ([[maybe_unused]] auto const& page : Xy->GetBatches<EllpackPage>(&ctx, batch_param)) {\n    }\n    // Approx tree method pages\n    {\n      BatchParam regen{n_bins, dh::ToSpan(hess), false};\n      for ([[maybe_unused]] auto const& page : Xy->GetBatches<EllpackPage>(&ctx, regen)) {\n      }\n    }\n    {\n      BatchParam regen{n_bins, dh::ToSpan(hess), true};\n      for ([[maybe_unused]] auto const& page : Xy->GetBatches<EllpackPage>(&ctx, regen)) {\n      }\n    }\n\n    check_ellpack();\n  }\n\n  // half the pages\n  {\n    auto it = Xy->GetBatches<SparsePage>(&ctx).begin();\n    for (std::size_t i = 0; i < n_batches / 2; ++i) {\n      ++it;\n    }\n    check_ellpack();\n  }\n  {\n    auto it = Xy->GetBatches<EllpackPage>(&ctx, batch_param).begin();\n    for (std::size_t i = 0; i < n_batches / 2; ++i) {\n      ++it;\n    }\n    check_ellpack();\n  }\n}\n\nTEST(SparsePageDMatrix, MultipleEllpackPages) {\n  auto ctx = MakeCUDACtx(0);\n  auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};\n  auto dmat = RandomDataGenerator{1024, 2, 0.5f}.Batches(2).GenerateSparsePageDMatrix(\"temp\", true);\n\n  // Loop over the batches and count the records\n  std::int64_t batch_count = 0;\n  bst_idx_t row_count = 0;\n  for (const auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {\n    EXPECT_LT(batch.Size(), dmat->Info().num_row_);\n    batch_count++;\n    row_count += batch.Size();\n  }\n  EXPECT_GE(batch_count, 2);\n  EXPECT_EQ(row_count, dmat->Info().num_row_);\n\n  auto path =\n      data::MakeId(\"tmep\", dynamic_cast<data::SparsePageDMatrix*>(dmat.get())) + \".ellpack.page\";\n}\n\nTEST(SparsePageDMatrix, RetainEllpackPage) {\n  auto ctx = MakeCUDACtx(0);\n  auto param = BatchParam{32, tree::TrainParam::DftSparseThreshold()};\n  auto m = RandomDataGenerator{2048, 4, 0.0f}.Batches(8).GenerateSparsePageDMatrix(\"temp\", true);\n\n  auto batches = m->GetBatches<EllpackPage>(&ctx, param);\n  auto begin = batches.begin();\n  auto end = batches.end();\n\n  std::vector<HostDeviceVector<common::CompressedByteT>> gidx_buffers;\n  std::vector<std::shared_ptr<EllpackPage const>> iterators;\n  for (auto it = begin; it != end; ++it) {\n    iterators.push_back(it.Page());\n    gidx_buffers.emplace_back();\n    gidx_buffers.back().SetDevice(ctx.Device());\n    gidx_buffers.back().Resize((*it).Impl()->gidx_buffer.size());\n    auto d_dst = gidx_buffers.back().DevicePointer();\n    auto const& d_src = (*it).Impl()->gidx_buffer;\n    dh::safe_cuda(cudaMemcpyAsync(d_dst, d_src.data(), d_src.size_bytes(), cudaMemcpyDefault));\n  }\n  ASSERT_EQ(iterators.size(), 8);\n\n  for (size_t i = 0; i < iterators.size(); ++i) {\n    std::vector<common::CompressedByteT> h_buf;\n    [[maybe_unused]] auto h_acc = (*iterators[i]).Impl()->GetHostEllpack(&ctx, &h_buf);\n    ASSERT_EQ(h_buf, gidx_buffers.at(i).HostVector());\n    // The last page is still kept in the DMatrix until Reset is called.\n    if (i == iterators.size() - 1) {\n      ASSERT_EQ(iterators[i].use_count(), 2);\n    } else {\n      ASSERT_EQ(iterators[i].use_count(), 1);\n    }\n  }\n\n  // make sure it's const and the caller can not modify the content of page.\n  for (auto& page : m->GetBatches<EllpackPage>(&ctx, param)) {\n    static_assert(std::is_const_v<std::remove_reference_t<decltype(page)>>);\n    break;\n  }\n\n  // The above iteration clears out all references inside DMatrix.\n  for (auto const& ptr : iterators) {\n    ASSERT_TRUE(ptr.unique());\n  }\n}\n\nnamespace {\n// Test comparing external DMatrix with in-core DMatrix\nclass TestEllpackPageExt : public ::testing::TestWithParam<std::tuple<bool, bool>> {\n protected:\n  void Run(bool on_host, bool is_dense) {\n    float sparsity = is_dense ? 0.0 : 0.2;\n\n    auto ctx = MakeCUDACtx(0);\n    constexpr bst_idx_t kRows = 64;\n    constexpr size_t kCols = 2;\n\n    // Create an in-memory DMatrix.\n    auto p_fmat = RandomDataGenerator{kRows, kCols, sparsity}.GenerateDMatrix(true);\n\n    // Create a DMatrix with multiple batches.\n    auto p_ext_fmat = RandomDataGenerator{kRows, kCols, sparsity}\n                          .Batches(4)\n                          .Device(ctx.Device())\n                          .OnHost(on_host)\n                          .GenerateSparsePageDMatrix(\"temp\", true);\n\n    auto param = BatchParam{2, tree::TrainParam::DftSparseThreshold()};\n    auto impl = (*p_fmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();\n    ASSERT_EQ(impl->base_rowid, 0);\n    ASSERT_EQ(impl->n_rows, kRows);\n    ASSERT_EQ(impl->IsDense(), is_dense);\n    ASSERT_EQ(impl->info.row_stride, kCols);\n    ASSERT_EQ(impl->Cuts().TotalBins(), param.max_bin * kCols);\n\n    std::unique_ptr<EllpackPageImpl> impl_ext;\n    size_t offset = 0;\n    for (auto& batch : p_ext_fmat->GetBatches<EllpackPage>(&ctx, param)) {\n      if (!impl_ext) {\n        impl_ext = std::make_unique<EllpackPageImpl>(&ctx, batch.Impl()->CutsShared(),\n                                                     batch.Impl()->is_dense,\n                                                     batch.Impl()->info.row_stride, kRows);\n      }\n      auto n_elems = impl_ext->Copy(&ctx, batch.Impl(), offset);\n      offset += n_elems;\n    }\n    ASSERT_EQ(impl_ext->base_rowid, 0);\n    ASSERT_EQ(impl_ext->n_rows, kRows);\n    ASSERT_EQ(impl_ext->IsDense(), is_dense);\n    ASSERT_EQ(impl_ext->info.row_stride, 2);\n    ASSERT_EQ(impl_ext->Cuts().TotalBins(), 4);\n\n    std::vector<common::CompressedByteT> buffer;\n    [[maybe_unused]] auto h_acc = impl->GetHostEllpack(&ctx, &buffer);\n    std::vector<common::CompressedByteT> buffer_ext;\n    [[maybe_unused]] auto h_ext_acc = impl_ext->GetHostEllpack(&ctx, &buffer_ext);\n    ASSERT_EQ(buffer, buffer_ext);\n  }\n};\n}  // anonymous namespace\n\nTEST_P(TestEllpackPageExt, Data) {\n  auto [on_host, is_dense] = this->GetParam();\n  this->Run(on_host, is_dense);\n}\n\nINSTANTIATE_TEST_SUITE_P(EllpackPageExt, TestEllpackPageExt, ::testing::ValuesIn([]() {\n                           std::vector<std::tuple<bool, bool>> values;\n                           for (auto on_host : {true, false}) {\n                             for (auto is_dense : {true, false}) {\n                               values.emplace_back(on_host, is_dense);\n                             }\n                           }\n                           return values;\n                         }()),\n                         [](::testing::TestParamInfo<TestEllpackPageExt::ParamType> const& info) {\n                           auto on_host = std::get<0>(info.param);\n                           auto is_dense = std::get<1>(info.param);\n                           std::stringstream ss;\n                           ss << (on_host ? \"host\" : \"ext\");\n                           ss << \"_\";\n                           ss << (is_dense ? \"dense\" : \"sparse\");\n                           return ss.str();\n                         });\n\ntemplate <typename Accessor>\nstruct ReadRowFunction {\n  Accessor matrix;\n  bst_idx_t row;\n  bst_float* row_data_d;\n  ReadRowFunction(Accessor matrix, bst_idx_t row, bst_float* row_data_d)\n      : matrix(std::move(matrix)), row{row}, row_data_d(row_data_d) {}\n\n  __device__ void operator()(size_t col) {\n    auto value = matrix.GetFvalue(row, col);\n    if (isnan(value)) {\n      value = -1;\n    }\n    row_data_d[col] = value;\n  }\n};\n\nTEST(SparsePageDMatrix, MultipleEllpackPageContent) {\n  constexpr size_t kRows = 16;\n  constexpr size_t kCols = 2;\n  constexpr int kMaxBins = 256;\n\n  // Create an in-memory DMatrix.\n  auto dmat =\n      RandomDataGenerator{kRows, kCols, 0.0f}.Batches(1).GenerateSparsePageDMatrix(\"temp\", true);\n\n  // Create a DMatrix with multiple batches.\n  auto dmat_ext =\n      RandomDataGenerator{kRows, kCols, 0.0f}.Batches(2).GenerateSparsePageDMatrix(\"temp\", true);\n\n  auto ctx = MakeCUDACtx(0);\n  auto param = BatchParam{kMaxBins, tree::TrainParam::DftSparseThreshold()};\n  auto impl = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();\n  EXPECT_EQ(impl->base_rowid, 0);\n  EXPECT_EQ(impl->n_rows, kRows);\n\n  size_t current_row = 0;\n  thrust::device_vector<bst_float> row_d(kCols);\n  thrust::device_vector<bst_float> row_ext_d(kCols);\n  std::vector<bst_float> row(kCols);\n  std::vector<bst_float> row_ext(kCols);\n  for (auto& page : dmat_ext->GetBatches<EllpackPage>(&ctx, param)) {\n    auto impl_ext = page.Impl();\n    EXPECT_EQ(impl_ext->base_rowid, current_row);\n\n    for (size_t i = 0; i < impl_ext->Size(); i++) {\n      impl->Visit(&ctx, {}, [&](auto&& acc) {\n        dh::LaunchN(kCols, ReadRowFunction{acc, current_row, row_d.data().get()});\n      });\n\n      thrust::copy(row_d.begin(), row_d.end(), row.begin());\n      impl_ext->Visit(&ctx, {}, [&](auto&& acc) {\n        dh::LaunchN(kCols, ReadRowFunction{acc, current_row, row_ext_d.data().get()});\n      });\n\n      thrust::copy(row_ext_d.begin(), row_ext_d.end(), row_ext.begin());\n\n      EXPECT_EQ(row, row_ext);\n      current_row++;\n    }\n  }\n}\n\nTEST(SparsePageDMatrix, EllpackPageMultipleLoops) {\n  constexpr size_t kRows = 1024;\n  constexpr size_t kCols = 16;\n  constexpr int kMaxBins = 256;\n\n  // Create an in-memory DMatrix.\n  auto dmat =\n      RandomDataGenerator{kRows, kCols, 0.0f}.Batches(1).GenerateSparsePageDMatrix(\"temp\", true);\n\n  // Create a DMatrix with multiple batches.\n  auto dmat_ext =\n      RandomDataGenerator{kRows, kCols, 0.0f}.Batches(8).GenerateSparsePageDMatrix(\"temp\", true);\n\n  auto ctx = MakeCUDACtx(0);\n  auto param = BatchParam{kMaxBins, tree::TrainParam::DftSparseThreshold()};\n\n  size_t current_row = 0;\n  for (auto& page : dmat_ext->GetBatches<EllpackPage>(&ctx, param)) {\n    auto impl_ext = page.Impl();\n    EXPECT_EQ(impl_ext->base_rowid, current_row);\n    current_row += impl_ext->n_rows;\n  }\n}\n\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/data/test_sparse_page_raw_format.cc",
    "content": "/**\n * Copyright 2021-2025, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/data.h>  // for CSCPage, SortedCSCPage, SparsePage\n\n#include <memory>  // for allocator, unique_ptr, __shared_ptr_ac...\n#include <string>  // for char_traits, operator+, basic_string\n\n#include \"../../../src/common/io.h\"  // for PrivateMmapConstStream, AlignedResourceReadStream...\n#include \"../../../src/data/sparse_page_writer.h\"  // for CreatePageFormat\n#include \"../filesystem.h\"                         // for TemporaryDirectory\n#include \"../helpers.h\"                            // for RandomDataGenerator\n#include \"xgboost/context.h\"                       // for Context\n\nnamespace xgboost::data {\ntemplate <typename S> void TestSparsePageRawFormat() {\n  std::unique_ptr<SparsePageFormat<S>> format{CreatePageFormat<S>(\"raw\")};\n  Context ctx;\n\n  auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix();\n  ASSERT_TRUE(m->SingleColBlock());\n  common::TemporaryDirectory tmpdir;\n  std::string path = tmpdir.Str() + \"/sparse.page\";\n  S orig;\n  std::size_t n_bytes{0};\n  {\n    // block code to flush the stream\n    auto fo = std::make_unique<common::AlignedFileWriteStream>(StringView{path}, \"wb\");\n    for (auto const &page : m->GetBatches<S>(&ctx)) {\n      orig.Push(page);\n      n_bytes = format->Write(page, fo.get());\n    }\n  }\n\n  S page;\n  std::unique_ptr<common::AlignedResourceReadStream> fi{\n      std::make_unique<common::PrivateMmapConstStream>(path.c_str(), 0, n_bytes)};\n  format->Read(&page, fi.get());\n  for (size_t i = 0; i < orig.data.Size(); ++i) {\n    ASSERT_EQ(page.data.HostVector()[i].fvalue,\n              orig.data.HostVector()[i].fvalue);\n    ASSERT_EQ(page.data.HostVector()[i].index, orig.data.HostVector()[i].index);\n  }\n  for (size_t i = 0; i < orig.offset.Size(); ++i) {\n    ASSERT_EQ(page.offset.HostVector()[i], orig.offset.HostVector()[i]);\n  }\n  ASSERT_EQ(page.base_rowid, orig.base_rowid);\n}\n\nTEST(SparsePageRawFormat, SparsePage) {\n  TestSparsePageRawFormat<SparsePage>();\n}\n\nTEST(SparsePageRawFormat, CSCPage) {\n  TestSparsePageRawFormat<CSCPage>();\n}\n\nTEST(SparsePageRawFormat, SortedCSCPage) {\n  TestSparsePageRawFormat<SortedCSCPage>();\n}\n}  // namespace xgboost::data\n"
  },
  {
    "path": "tests/cpp/encoder/df_mock.cuh",
    "content": "/**\n * Copyright 2024-2025, XGBoost contributors\n */\n#pragma once\n\n#include <variant>  // for visit\n#include <vector>   // for vector\n\n#include \"../../../src/encoder/types.h\"        // for Overloaded\n#include \"../../src/common/device_vector.cuh\"  // for device_vector\n#include \"../../src/data/cat_container.cuh\"    // for CatIndexTypes\n#include \"df_mock.h\"                           // for MakeStrArrayImpl\n\nnamespace enc::cuda_impl {\nusing CatIndexTypes = ::xgboost::cuda_impl::CatIndexTypes;\nusing ColumnType = enc::cpu_impl::TupToVarT<CatIndexTypes>;\n\nclass DfTest {\n public:\n  template <typename T>\n  using Vector = dh::device_vector<T>;\n\n private:\n  std::vector<ColumnType> columns_;\n  dh::device_vector<enc::DeviceCatIndexView> columns_v_;\n  dh::device_vector<std::int32_t> segments_;\n  std::vector<std::int32_t> h_segments_;\n\n  dh::device_vector<std::int32_t> mapping_;\n\n  template <typename Head>\n  static void MakeImpl(std::vector<ColumnType>* p_out, dh::device_vector<std::int32_t>* p_sizes,\n                       Head&& col) {\n    p_sizes->push_back(col.size());\n    p_out->emplace_back(std::forward<Head>(col));\n\n    p_sizes->insert(p_sizes->begin(), 0);\n    thrust::inclusive_scan(p_sizes->cbegin(), p_sizes->cend(), p_sizes->begin());\n  }\n\n  template <typename Head, typename... Col>\n  static void MakeImpl(std::vector<ColumnType>* p_out, dh::device_vector<std::int32_t>* p_sizes,\n                       Head&& col, Col&&... columns) {\n    p_sizes->push_back(col.size());\n    p_out->emplace_back(std::forward<Head>(col));\n    MakeImpl(p_out, p_sizes, std::forward<Col>(columns)...);\n  }\n\n public:\n  template <typename... Col>\n  static DfTest Make(Col&&... columns) {\n    DfTest df;\n    MakeImpl(&df.columns_, &df.segments_, std::forward<Col>(columns)...);\n    for (std::size_t i = 0; i < df.columns_.size(); ++i) {\n      auto const& col = df.columns_[i];\n      std::visit(Overloaded{[&](xgboost::cuda_impl::CatStrArray const& str) {\n                              df.columns_v_.push_back(enc::CatStrArrayView(str));\n                            },\n                            [&](auto&& args) {\n                              df.columns_v_.push_back(dh::ToSpan(args));\n                            }},\n                 col);\n    }\n    CHECK_EQ(df.columns_v_.size(), sizeof...(columns));\n    df.h_segments_.resize(df.segments_.size());\n    thrust::copy_n(df.segments_.cbegin(), df.segments_.size(), df.h_segments_.begin());\n    df.mapping_.resize(df.h_segments_.back());\n    return df;\n  }\n\n  template <typename... Strs>\n  static auto MakeStrs(Strs&&... strs) {\n    auto array = MakeStrArrayImpl(std::forward<Strs>(strs)...);\n    return xgboost::cuda_impl::CatStrArray{array.offsets, array.values};\n  }\n\n  template <typename... Ints>\n  static auto MakeInts(Ints&&... names) {\n    return dh::device_vector<std::int32_t>{names...};\n  }\n\n  auto View() const {\n    return enc::DeviceColumnsView{dh::ToSpan(this->columns_v_), dh::ToSpan(segments_),\n                                  h_segments_.back()};\n  }\n  auto Segment() const { return Span{h_segments_}; }\n\n  auto MappingView() { return dh::ToSpan(mapping_); }\n  auto const& Mapping() { return mapping_; }\n};\n}  // namespace enc::cuda_impl\n"
  },
  {
    "path": "tests/cpp/encoder/df_mock.h",
    "content": "/**\n * Copyright 2024-2025, XGBoost contributors\n */\n#pragma once\n#include <gtest/gtest.h>\n\n#include <cstdint>  // for int32_t, int8_t\n#include <numeric>  // for partial_sum\n#include <string>   // for string\n#include <utility>  // for forward\n#include <variant>  // for visit\n#include <vector>   // for vector\n\n#include \"../../../src/data/cat_container.h\"  // for ColumnType, CatStrArray\n#include \"../../../src/encoder/ordinal.h\"     // for CatStrArrayView\n#include \"../../../src/encoder/types.h\"       // for Overloaded\n\nnamespace enc {\ntemplate <typename... Strs>\nauto MakeStrArrayImpl(Strs&&... strs) {\n  std::vector<std::string> names{strs...};\n  std::vector<std::int8_t> values;\n  std::vector<std::int32_t> offsets{0};\n\n  for (const auto& name : names) {\n    for (char c : name) {\n      values.push_back(c);\n    }\n    offsets.push_back(name.size());\n  }\n  std::partial_sum(offsets.cbegin(), offsets.cend(), offsets.begin());\n  return xgboost::cpu_impl::CatStrArray{offsets, values};\n}\n}  // namespace enc\n\nnamespace enc::cpu_impl {\nusing ColumnType = xgboost::cpu_impl::ColumnType;\nclass DfTest {\n private:\n  std::vector<ColumnType> columns_;\n  std::vector<enc::HostCatIndexView> columns_v_;\n  std::vector<std::int32_t> segments_;\n\n  std::vector<std::int32_t> mapping_;\n\n  template <typename Head>\n  static auto MakeImpl(std::vector<ColumnType>* p_out, std::vector<std::int32_t>* p_sizes,\n                       Head&& col) {\n    p_out->emplace_back(col);\n    p_sizes->push_back(col.size());\n    p_sizes->insert(p_sizes->begin(), 0);\n    std::partial_sum(p_sizes->cbegin(), p_sizes->cend(), p_sizes->begin());\n  }\n\n  template <typename Head, typename... Col>\n  static void MakeImpl(std::vector<ColumnType>* p_out, std::vector<std::int32_t>* p_sizes,\n                       Head&& col, Col&&... columns) {\n    p_out->emplace_back(col);\n    p_sizes->push_back(col.size());\n\n    MakeImpl(p_out, p_sizes, columns...);\n  }\n\n public:\n  template <typename... Col>\n  static DfTest Make(Col&&... columns) {\n    DfTest df;\n    MakeImpl(&df.columns_, &df.segments_, std::forward<Col>(columns)...);\n    for (std::size_t i = 0; i < df.columns_.size(); ++i) {\n      auto const& col = df.columns_[i];\n      std::visit(Overloaded{[&](xgboost::cpu_impl::CatStrArray const& str) {\n                              df.columns_v_.emplace_back(enc::CatStrArrayView(str));\n                            },\n                            [&](auto&& args) {\n                              df.columns_v_.emplace_back(Span{args});\n                            }},\n                 col);\n    }\n    auto check = [&] {\n      // the macro needs to return void.\n      ASSERT_EQ(df.columns_v_.size(), sizeof...(columns));\n    };\n    check();\n    df.mapping_.resize(df.segments_.back());\n    return df;\n  }\n\n  template <typename... Strs>\n  static auto MakeStrs(Strs&&... strs) {\n    return MakeStrArrayImpl(std::forward<Strs>(strs)...);\n  }\n\n  template <typename... Ints>\n  static auto MakeInts(Ints&&... names) {\n    return std::vector<std::int32_t>{names...};\n  }\n\n  auto View() const { return enc::HostColumnsView{Span{columns_v_}, segments_, segments_.back()}; }\n\n  auto Segment() const { return Span{segments_}; }\n  auto MappingView() { return Span{mapping_}; }\n  auto const& Mapping() { return mapping_; }\n};\n}  // namespace enc::cpu_impl\n"
  },
  {
    "path": "tests/cpp/encoder/test_ordinal.cc",
    "content": "/**\n * Copyright 2025, XGBoost contributors\n */\n#include \"test_ordinal.h\"\n\n#include <gmock/gmock.h>\n#include <gtest/gtest.h>\n\n#include <cstdint>  // for int32_t\n#include <sstream>  // for stringstream\n#include <vector>   // for vector\n\n#include \"../../../src/encoder/ordinal.h\"\n#include \"df_mock.h\"  // for DfTest\n\nnamespace enc {\nnamespace {\nusing DfTest = cpu_impl::DfTest;\n\nclass OrdRecoderTest {\n public:\n  void Recode(HostColumnsView orig_enc, HostColumnsView new_enc, Span<std::int32_t> mapping) {\n    std::vector<std::int32_t> sorted_idx(orig_enc.n_total_cats);\n    SortNames(DftHostPolicy{}, orig_enc, sorted_idx);\n    ::enc::Recode(DftHostPolicy{}, orig_enc, sorted_idx, new_enc, mapping);\n  }\n};\n}  // namespace\n\nTEST(CategoricalEncoder, Str) { TestOrdinalEncoderStrs<OrdRecoderTest, DfTest>(); }\n\nTEST(CategoricalEncoder, Int) { TestOrdinalEncoderInts<OrdRecoderTest, DfTest>(); }\n\nTEST(CategoricalEncoder, Mixed) { TestOrdinalEncoderMixed<OrdRecoderTest, DfTest>(); }\n\nTEST(CategoricalEncoder, Empty) { TestOrdinalEncoderEmpty<OrdRecoderTest, DfTest>(); }\n\nTEST(CategoricalEncoder, Print) {\n  auto df = DfTest::Make(DfTest::MakeInts(0, 1), DfTest::MakeStrs(\"cbd\", \"bbd\", \"dbd\", \"ab\"),\n                         DfTest::MakeInts(2, 3));\n  std::stringstream ss;\n  ss << df.View();\n  auto str = ss.str();\n  auto sol = R\"(f0: [0, 1]\nf1: [cbd, bbd, dbd, ab]\nf2: [2, 3]\n)\";\n  ASSERT_EQ(sol, str);\n}\n}  // namespace enc\n"
  },
  {
    "path": "tests/cpp/encoder/test_ordinal.cu",
    "content": "/**\n * Copyright 2025, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <thrust/device_vector.h>\n\n#include \"../../src/encoder/ordinal.cuh\"\n#include \"df_mock.cuh\"\n#include \"test_ordinal.h\"\n\nnamespace enc::cuda_impl {\nnamespace {\nclass OrdRecoderTest {\n public:\n  void Recode(DeviceColumnsView orig_enc, DeviceColumnsView new_enc, Span<std::int32_t> mapping) {\n    auto policy = DftDevicePolicy{};\n    thrust::device_vector<std::int32_t> ref_sorted_idx(orig_enc.n_total_cats);\n    SortNames(policy, orig_enc, dh::ToSpan(ref_sorted_idx));\n    auto d_sorted_idx = dh::ToSpan(ref_sorted_idx);\n    ::enc::Recode(policy, orig_enc, d_sorted_idx, new_enc, mapping);\n  }\n};\n}  // namespace\n\nTEST(CategoricalEncoder, StrGpu) { TestOrdinalEncoderStrs<OrdRecoderTest, DfTest>(); }\n\nTEST(CategoricalEncoder, IntGpu) { TestOrdinalEncoderInts<OrdRecoderTest, DfTest>(); }\n\nTEST(CategoricalEncoder, MixedGpu) { TestOrdinalEncoderMixed<OrdRecoderTest, DfTest>(); }\n\nTEST(CategoricalEncoder, EmptyGpu) { TestOrdinalEncoderEmpty<OrdRecoderTest, DfTest>(); }\n}  // namespace enc::cuda_impl\n"
  },
  {
    "path": "tests/cpp/encoder/test_ordinal.h",
    "content": "/**\n * Copyright 2024-2025, XGBoost contributors\n */\n#pragma once\n\n#include <gmock/gmock.h>\n#include <gtest/gtest.h>\n\n#include <cstdint>  // for int8_t, int32_t\n#include <vector>   // for vector\n\nnamespace enc {\ntemplate <typename Encoder, typename DfTest>\nvoid TestOrdinalEncoderStrs() {\n  Encoder encoder;\n  auto sol = std::vector<std::int32_t>{0, 3, 1};\n\n  {\n    auto df = DfTest::Make(DfTest::MakeStrs(\"c\", \"b\", \"d\", \"a\"));\n    auto orig_dict = df.View();\n    ASSERT_EQ(orig_dict.Size(), 1);\n\n    auto new_df = DfTest::Make(DfTest::MakeStrs(\"c\", \"a\", \"b\"));\n    auto new_dict = new_df.View();\n\n    encoder.Recode(orig_dict, new_dict, new_df.MappingView());\n    ASSERT_EQ(new_df.Mapping().size(), 3);\n\n    ASSERT_EQ(new_df.Mapping(), sol);\n  }\n  {\n    // longer strings\n    auto df = DfTest::Make(DfTest::MakeStrs(\"cbd\", \"bbd\", \"dbd\", \"ab\"));\n    auto orig_dict = df.View();\n\n    auto new_df = DfTest::Make(DfTest::MakeStrs(\"cbd\", \"ab\", \"bbd\"));\n    auto new_dict = new_df.View();\n\n    encoder.Recode(orig_dict, new_dict, new_df.MappingView());\n    ASSERT_EQ(new_df.Mapping().size(), 3);\n    ASSERT_EQ(new_df.Mapping(), sol);\n  }\n  {\n    // Test error message.\n    auto df = DfTest::Make(DfTest::MakeStrs(\"cbd\", \"bbd\", \"dbd\", \"ab\"));\n    auto orig_dict = df.View();\n\n    auto new_df = DfTest::Make(DfTest::MakeStrs(\"oops\", \"ab\", \"bbd\"));\n    auto new_dict = new_df.View();\n    ASSERT_THAT([&] { encoder.Recode(orig_dict, new_dict, new_df.MappingView()); },\n                ::testing::ThrowsMessage<std::logic_error>(::testing::HasSubstr(\"`oops`\")));\n  }\n  {\n    // Multi-columns\n    auto df = DfTest::Make(DfTest::MakeStrs(\"cbd\", \"bbd\", \"dbd\", \"ab\"),\n                           DfTest::MakeStrs(\"b\", \"c\", \"a\", \"d\"));\n    auto orig_dict = df.View();\n\n    auto new_df =\n        DfTest::Make(DfTest::MakeStrs(\"cbd\", \"ab\", \"bbd\"), DfTest::MakeStrs(\"d\", \"a\", \"b\"));\n    auto new_dict = new_df.View();\n\n    encoder.Recode(orig_dict, new_dict, new_df.MappingView());\n    auto segs = new_df.Segment();\n    auto beg = segs[0];\n    auto end = segs[1];\n\n    auto sol0 = sol;\n    for (auto i = beg, k = 0; i < end; ++i, ++k) {\n      ASSERT_EQ(sol0[k], new_df.Mapping()[i]);\n    }\n\n    beg = end;\n    end = segs[2];\n    auto sol1 = std::vector{3, 2, 0};\n    for (auto i = beg, k = 0; i < end; ++i, ++k) {\n      ASSERT_EQ(sol1[k], new_df.Mapping()[i]);\n    }\n  }\n}\n\ntemplate <typename Encoder, typename DfTest>\nvoid TestOrdinalEncoderInts() {\n  Encoder encoder;\n  auto sol = std::vector<std::int32_t>{0, 3, 1};\n\n  {\n    auto df = DfTest::Make(DfTest::MakeInts(2, 1, 3, 0));\n    auto orig_dict = df.View();\n\n    auto new_df = DfTest::Make(DfTest::MakeInts(2, 0, 1));\n    auto new_dict = new_df.View();\n\n    encoder.Recode(orig_dict, new_dict, new_df.MappingView());\n    ASSERT_EQ(new_df.Mapping(), sol);\n  }\n  {\n    // Test error message.\n    auto df = DfTest::Make(DfTest::MakeInts(2, 1, 3, 0));\n    auto orig_dict = df.View();\n\n    auto new_df = DfTest::Make(DfTest::MakeInts(2, 0, 5));\n    auto new_dict = new_df.View();\n    ASSERT_THAT([&] { encoder.Recode(orig_dict, new_dict, new_df.MappingView()); },\n                ::testing::ThrowsMessage<std::logic_error>(::testing::HasSubstr(\"`5`\")));\n  }\n  {\n    auto df = DfTest::Make(DfTest::MakeInts(0), DfTest::MakeInts(0, 1));\n    auto orig_dict = df.View();\n\n    auto new_df = DfTest::Make(DfTest::MakeInts(0), DfTest::MakeInts(0, 1));\n    auto new_dict = new_df.View();\n\n    encoder.Recode(orig_dict, new_dict, new_df.MappingView());\n    auto mapping = new_df.Mapping();\n    std::vector<std::int32_t> sol{0, 0, 1};\n    ASSERT_EQ(mapping, sol);\n  }\n}\n\ntemplate <typename Encoder, typename DfTest>\nvoid TestOrdinalEncoderMixed() {\n  Encoder encoder;\n  auto sol = std::vector<std::int32_t>{0, 3, 1};\n\n  {\n    auto df =\n        DfTest::Make(DfTest::MakeInts(2, 1, 3, 0), DfTest::MakeStrs(\"cbd\", \"bbd\", \"dbd\", \"ab\"));\n    auto orig_dict = df.View();\n\n    auto new_df = DfTest::Make(DfTest::MakeInts(2, 0, 1), DfTest::MakeStrs(\"cbd\", \"ab\", \"bbd\"));\n    auto new_dict = new_df.View();\n\n    encoder.Recode(orig_dict, new_dict, new_df.MappingView());\n    ASSERT_EQ(new_df.Mapping().size(), 6);\n    for (std::size_t i = 0; i < new_df.Mapping().size(); ++i) {\n      ASSERT_EQ(new_df.Mapping()[i], sol[i % sol.size()]);\n    }\n  }\n  {\n    auto df =\n        DfTest::Make(DfTest::MakeStrs(\"cbd\", \"bbd\", \"dbd\", \"ab\"), DfTest::MakeInts(2, 1, 3, 0));\n    auto orig_dict = df.View();\n\n    auto new_df = DfTest::Make(DfTest::MakeStrs(\"cbd\", \"ab\", \"bbd\"), DfTest::MakeInts(2, 0, 1));\n    auto new_dict = new_df.View();\n\n    encoder.Recode(orig_dict, new_dict, new_df.MappingView());\n    ASSERT_EQ(new_df.Mapping().size(), 6);\n    for (std::size_t i = 0; i < new_df.Mapping().size(); ++i) {\n      ASSERT_EQ(new_df.Mapping()[i], sol[i % sol.size()]);\n    }\n  }\n  {\n    auto df =\n        DfTest::Make(DfTest::MakeStrs(\"cbd\", \"bbd\", \"dbd\", \"ab\"), DfTest::MakeInts(2, 1, 3, 0),\n                     DfTest::MakeStrs(\"cbd\", \"bbd\", \"dbd\", \"ab\"));\n    auto orig_dict = df.View();\n\n    auto new_df = DfTest::Make(DfTest::MakeStrs(\"cbd\", \"ab\", \"bbd\"), DfTest::MakeInts(2, 0),\n                               DfTest::MakeStrs(\"cbd\", \"ab\", \"bbd\"));\n    auto new_dict = new_df.View();\n\n    encoder.Recode(orig_dict, new_dict, new_df.MappingView());\n    ASSERT_EQ(new_df.Mapping().size(), 8);\n    for (std::size_t i = 0; i < 3; ++i) {\n      ASSERT_EQ(new_df.Mapping()[i], sol[i]);\n    }\n    for (std::size_t i = 3, k = 0; i < 5; ++i, ++k) {\n      ASSERT_EQ(new_df.Mapping()[i], sol[k]);\n    }\n    for (std::size_t i = 5, k = 0; i < 8; ++i, ++k) {\n      ASSERT_EQ(new_df.Mapping()[i], sol[k]);\n    }\n  }\n}\n\ntemplate <typename Encoder, typename DfTest>\nvoid TestOrdinalEncoderEmpty() {\n  auto sol = std::vector<std::int32_t>{0, 3, 1};\n  Encoder encoder;\n  auto df = DfTest::Make(DfTest::MakeInts(), DfTest::MakeStrs(\"cbd\", \"bbd\", \"dbd\", \"ab\"),\n                         DfTest::MakeInts());\n  auto orig_dict = df.View();\n\n  auto new_df =\n      DfTest::Make(DfTest::MakeInts(), DfTest::MakeStrs(\"cbd\", \"ab\", \"bbd\"), DfTest::MakeInts());\n  auto new_dict = new_df.View();\n  encoder.Recode(orig_dict, new_dict, new_df.MappingView());\n  ASSERT_EQ(new_df.Mapping().size(), 3);\n  ASSERT_EQ(new_df.Mapping(), sol);\n}\n}  // namespace enc\n"
  },
  {
    "path": "tests/cpp/filesystem.cc",
    "content": "/**\n * Copyright 2025, XGBoost Contributors\n */\n#include \"filesystem.h\"\n\n#include <xgboost/windefs.h>\n\n#include <filesystem>  // for path, temp_directory_path\n\n#if !defined(xgboost_IS_WIN)\n\n#include <cstdlib>  // for mkdtemp\n\n#include \"../../src/common/error_msg.h\"  // for SystemError\n\n#else\n\n#include <random>  // for uniform_int_distribution\n\n#include \"xgboost/string_view.h\"  // for StringView\n\n#endif  // !defined(xgboost_IS_WIN)\n\nnamespace xgboost::common {\nTemporaryDirectory::TemporaryDirectory(std::string prefix) : prefix_{std::move(prefix)} {\n  namespace fs = std::filesystem;\n\n  auto tmp = fs::temp_directory_path();\n\n#if defined(xgboost_IS_WIN)\n  std::default_random_engine rng;\n  auto make_name = [&rng, this] {\n    constexpr std::size_t kPathMax = 6;\n    constexpr StringView kAlphabet{\"abcdefghijklmnopqrstuvwxyz\"};\n    static_assert(kAlphabet.size() == 26);\n    std::uniform_int_distribution dist{0, 25};\n    char path[kPathMax + 1];\n    std::memset(path, 0, sizeof(path));\n    for (std::size_t i = 0; i < kPathMax; ++i) {\n      auto k = dist(rng);\n      path[i] = kAlphabet[k];\n    }\n    auto res = std::string{path};\n    CHECK_EQ(res.size(), kPathMax);\n    return this->prefix_ + \"tmpdir.\" + std::string{path};\n  };\n  auto dirname = tmp / make_name();\n  std::int32_t retry = 0;\n  while (fs::exists(dirname) && retry < 64) {\n    dirname = tmp / make_name();\n    ++retry;\n  }\n  if (retry >= 64) {\n    LOG(FATAL) << \"Failed to create temporary directory.\";\n  }\n  this->path_ = dirname.string();\n  CHECK(fs::create_directory(this->path_));\n#else\n  auto dirtemplate = (tmp / (this->prefix_ + \"tmpdir.XXXXXX\")).string();\n  // https://man7.org/linux/man-pages/man3/mkdtemp.3.html\n  char* tmpdir = mkdtemp(dirtemplate.data());\n  if (!tmpdir) {\n    LOG(FATAL) << error::SystemError().message();\n  }\n  this->path_ = tmpdir;\n#endif\n  LOG(DEBUG) << \"TmpDir:\" << this->path_;\n  CHECK(fs::exists(this->path_));\n}\n\nTemporaryDirectory::~TemporaryDirectory() noexcept(false) {\n  std::filesystem::remove_all(this->path_);\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/filesystem.h",
    "content": "/**\n * Copyright 2022-2025, XGBoost Contributors\n */\n#ifndef XGBOOST_TESTS_CPP_FILESYSTEM_H\n#define XGBOOST_TESTS_CPP_FILESYSTEM_H\n\n#include <filesystem>  // for path\n\nnamespace xgboost::common {\nclass TemporaryDirectory {\n  std::filesystem::path path_;\n  std::string prefix_;\n\n public:\n  explicit TemporaryDirectory(std::string prefix = \"xgboost-\");\n  ~TemporaryDirectory() noexcept(false);\n\n  [[nodiscard]] std::filesystem::path const& Path() const { return this->path_; }\n  // Path can be implicitly converted to string on unix, but not on windows, due its use\n  // of wchar.\n  [[nodiscard]] std::string Str() const { return this->path_.string(); }\n};\n}  // namespace xgboost::common\n\n#endif  // XGBOOST_TESTS_CPP_FILESYSTEM_H\n"
  },
  {
    "path": "tests/cpp/gbm/test_gblinear.cc",
    "content": "/**\n * Copyright 2019-2024, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/feature_map.h>  // for FeatureMap\n\n#include <memory>\n\n#include \"../helpers.h\"\n#include \"xgboost/context.h\"\n#include \"xgboost/gbm.h\"\n#include \"xgboost/json.h\"\n#include \"xgboost/learner.h\"\n\nnamespace xgboost::gbm {\nTEST(GBLinear, JsonIO) {\n  size_t constexpr kRows = 16, kCols = 16;\n\n  Context ctx;\n  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};\n\n  std::unique_ptr<GradientBooster> gbm{\n      CreateTrainedGBM(\"gblinear\", Args{}, kRows, kCols, &mparam, &ctx)};\n  Json model { Object() };\n  gbm->SaveModel(&model);\n  ASSERT_TRUE(IsA<Object>(model));\n\n  std::string model_str;\n  Json::Dump(model, &model_str);\n\n  model = Json::Load(StringView{model_str.c_str(), model_str.size()});\n  ASSERT_TRUE(IsA<Object>(model));\n\n  {\n    model = model[\"model\"];\n    auto weights = get<Array>(model[\"weights\"]);\n    ASSERT_EQ(weights.size(), 17);\n  }\n}\n\nTEST(GBLinear, Dump) {\n  Context ctx;\n  size_t constexpr kRows = 16, kCols = 16;\n  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};\n\n  std::unique_ptr<GradientBooster> gbm{\n      CreateTrainedGBM(\"gblinear\", Args{}, kRows, kCols, &mparam, &ctx)};\n  FeatureMap fmap;\n  ASSERT_THAT([&] { [[maybe_unused]] auto vec = gbm->DumpModel(fmap, true, \"dot\"); },\n              GMockThrow(R\"(`dot` is not supported)\"));\n}\n}  // namespace xgboost::gbm\n"
  },
  {
    "path": "tests/cpp/gbm/test_gblinear.cu",
    "content": "/**\n * Copyright 2023-2025, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/global_config.h>  // for GlobalConfigThreadLocalStore\n#include <xgboost/json.h>           // for Json, Object\n#include <xgboost/learner.h>        // for Learner\n\n#include <algorithm>  // for transform\n#include <string>     // for string\n#include <utility>    // for swap\n\n#include \"../helpers.h\"  // for RandomDataGenerator\n\nnamespace xgboost {\nTEST(GBlinear, DispatchUpdater) {\n  auto verbosity = 3;\n  std::swap(GlobalConfigThreadLocalStore::Get()->verbosity, verbosity);\n\n  auto test = [](std::string device) {\n    auto p_fmat = RandomDataGenerator{10, 10, 0.0f}.GenerateDMatrix(true);\n    std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};\n    learner->SetParams(\n        Args{{\"booster\", \"gblinear\"}, {\"updater\", \"coord_descent\"}, {\"device\", device}});\n    learner->Configure();\n    for (std::int32_t iter = 0; iter < 3; ++iter) {\n      learner->UpdateOneIter(iter, p_fmat);\n    }\n    Json config{Object{}};\n    ::testing::internal::CaptureStderr();\n    learner->SaveConfig(&config);\n    auto str = ::testing::internal::GetCapturedStderr();\n    std::transform(device.cbegin(), device.cend(), device.begin(),\n                   [](char c) { return std::toupper(c); });\n    ASSERT_NE(str.find(device), std::string::npos);\n  };\n  test(\"cpu\");\n  test(\"gpu\");\n\n  std::swap(GlobalConfigThreadLocalStore::Get()->verbosity, verbosity);\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/gbm/test_gbtree.cc",
    "content": "/**\n * Copyright 2019-2025, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/context.h>\n#include <xgboost/host_device_vector.h>  // for HostDeviceVector\n#include <xgboost/json.h>                // for Json, Object\n#include <xgboost/learner.h>             // for Learner\n\n#include <limits>    // for numeric_limits\n#include <memory>    // for shared_ptr\n#include <optional>  // for optional\n#include <string>    // for string\n\n#include \"../../../src/data/proxy_dmatrix.h\"  // for DMatrixProxy\n#include \"../../../src/gbm/gbtree.h\"\n#include \"../filesystem.h\"  // TemporaryDirectory\n#include \"../helpers.h\"\n#include \"xgboost/base.h\"\n#include \"xgboost/predictor.h\"\n\nnamespace xgboost {\nTEST(GBTree, SelectTreeMethod) {\n  size_t constexpr kCols = 10;\n\n  Context ctx;\n  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};\n\n  std::unique_ptr<GradientBooster> p_gbm{GradientBooster::Create(\"gbtree\", &ctx, &mparam)};\n  auto& gbtree = dynamic_cast<gbm::GBTree&>(*p_gbm);\n\n  // Test if `tree_method` can be set\n  Args args{{\"tree_method\", \"approx\"}};\n  gbtree.Configure({args.cbegin(), args.cend()});\n\n  gbtree.Configure(args);\n  auto const& tparam = gbtree.GetTrainParam();\n  gbtree.Configure({{\"tree_method\", \"approx\"}});\n  ASSERT_EQ(tparam.updater_seq, \"grow_histmaker\");\n  gbtree.Configure({{\"tree_method\", \"exact\"}});\n  ASSERT_EQ(tparam.updater_seq, \"grow_colmaker,prune\");\n  gbtree.Configure({{\"tree_method\", \"hist\"}});\n  ASSERT_EQ(tparam.updater_seq, \"grow_quantile_histmaker\");\n  gbtree.Configure({{\"booster\", \"dart\"}, {\"tree_method\", \"hist\"}});\n  ASSERT_EQ(tparam.updater_seq, \"grow_quantile_histmaker\");\n\n#ifdef XGBOOST_USE_CUDA\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"cuda\"}});\n  gbtree.Configure({{\"tree_method\", \"hist\"}});\n  ASSERT_EQ(tparam.updater_seq, \"grow_gpu_hist\");\n  gbtree.Configure({{\"booster\", \"dart\"}, {\"tree_method\", \"hist\"}});\n  ASSERT_EQ(tparam.updater_seq, \"grow_gpu_hist\");\n#endif  // XGBOOST_USE_CUDA\n}\n\nTEST(GBTree, PredictionCache) {\n  size_t constexpr kRows = 100, kCols = 10;\n  Context ctx;\n  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};\n\n  std::unique_ptr<GradientBooster> p_gbm{GradientBooster::Create(\"gbtree\", &ctx, &mparam)};\n  auto& gbtree = dynamic_cast<gbm::GBTree&>(*p_gbm);\n\n  gbtree.Configure({{\"tree_method\", \"hist\"}});\n  auto p_m = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();\n\n  GradientContainer gpair = GenerateRandomGradients(&ctx, kRows, 1);\n\n  PredictionCacheEntry out_predictions;\n  gbtree.DoBoost(p_m.get(), &gpair, &out_predictions, nullptr);\n\n  gbtree.PredictBatch(p_m.get(), &out_predictions, false, 0, 0);\n  ASSERT_EQ(1, out_predictions.version);\n  std::vector<float> first_iter = out_predictions.predictions.HostVector();\n  // Add 1 more boosted round\n  gbtree.DoBoost(p_m.get(), &gpair, &out_predictions, nullptr);\n  gbtree.PredictBatch(p_m.get(), &out_predictions, false, 0, 0);\n  ASSERT_EQ(2, out_predictions.version);\n  // Update the cache for all rounds\n  out_predictions.version = 0;\n  gbtree.PredictBatch(p_m.get(), &out_predictions, false, 0, 0);\n  ASSERT_EQ(2, out_predictions.version);\n\n  gbtree.DoBoost(p_m.get(), &gpair, &out_predictions, nullptr);\n  // drop the cache.\n  gbtree.PredictBatch(p_m.get(), &out_predictions, false, 1, 2);\n  ASSERT_EQ(0, out_predictions.version);\n  // half open set [1, 3)\n  gbtree.PredictBatch(p_m.get(), &out_predictions, false, 1, 3);\n  ASSERT_EQ(0, out_predictions.version);\n  // iteration end\n  gbtree.PredictBatch(p_m.get(), &out_predictions, false, 0, 2);\n  ASSERT_EQ(2, out_predictions.version);\n  // restart the cache when end iteration is smaller than cache version\n  gbtree.PredictBatch(p_m.get(), &out_predictions, false, 0, 1);\n  ASSERT_EQ(1, out_predictions.version);\n  ASSERT_EQ(out_predictions.predictions.HostVector(), first_iter);\n}\n\nTEST(GBTree, WrongUpdater) {\n  size_t constexpr kRows = 17;\n  size_t constexpr kCols = 15;\n\n  auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();\n\n  p_dmat->Info().labels.Reshape(kRows);\n\n  auto learner = std::unique_ptr<Learner>(Learner::Create({p_dmat}));\n  // Hist can not be used for updating tree.\n  learner->SetParams(Args{{\"tree_method\", \"hist\"}, {\"process_type\", \"update\"}});\n  ASSERT_THROW(learner->UpdateOneIter(0, p_dmat), dmlc::Error);\n  // Prune can not be used for learning new tree.\n  learner->SetParams(Args{{\"tree_method\", \"prune\"}, {\"process_type\", \"default\"}});\n  ASSERT_THROW(learner->UpdateOneIter(0, p_dmat), dmlc::Error);\n}\n\n#ifdef XGBOOST_USE_CUDA\nTEST(GBTree, ChoosePredictor) {\n  // The test ensures data don't get pulled into device.\n  // XGBoost chooses predictor based on the data placement when input is a SparsePage.\n  std::size_t constexpr kRows = 17, kCols = 15;\n\n  auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();\n\n  auto const& data = (*(p_dmat->GetBatches<SparsePage>().begin())).data;\n  p_dmat->Info().labels.Reshape(kRows);\n\n  auto learner = std::unique_ptr<Learner>(Learner::Create({p_dmat}));\n  learner->SetParams(Args{{\"tree_method\", \"hist\"}, {\"device\", \"cuda\"}});\n  for (size_t i = 0; i < 4; ++i) {\n    learner->UpdateOneIter(i, p_dmat);\n  }\n  ASSERT_TRUE(data.HostCanWrite());\n\n  common::TemporaryDirectory tempdir;\n  const std::string fname = tempdir.Str() + \"/model_param.bst\";\n  {\n    std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname.c_str(), \"w\"));\n    learner->Save(fo.get());\n  }\n  // a new learner\n  learner = std::unique_ptr<Learner>(Learner::Create({p_dmat}));\n  {\n    std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), \"r\"));\n    learner->Load(fi.get());\n  }\n  learner->SetParams(Args{{\"tree_method\", \"hist\"}, {\"device\", \"cuda\"}});\n  for (size_t i = 0; i < 4; ++i) {\n    learner->UpdateOneIter(i, p_dmat);\n  }\n  ASSERT_TRUE(data.HostCanWrite());\n  ASSERT_FALSE(data.DeviceCanWrite());\n  ASSERT_FALSE(data.DeviceCanRead());\n\n  // pull data into device.\n  data.HostVector();\n  data.SetDevice(DeviceOrd::CUDA(0));\n  data.DeviceSpan();\n  ASSERT_FALSE(data.HostCanWrite());\n\n  // another new learner\n  learner = std::unique_ptr<Learner>(Learner::Create({p_dmat}));\n  learner->SetParams(Args{{\"tree_method\", \"hist\"}, {\"device\", \"cuda\"}});\n  for (size_t i = 0; i < 4; ++i) {\n    learner->UpdateOneIter(i, p_dmat);\n  }\n  // data is not pulled back into host\n  ASSERT_FALSE(data.HostCanWrite());\n}\n\nTEST(GBTree, ChooseTreeMethod) {\n  bst_idx_t n_samples{128};\n  bst_feature_t n_features{64};\n  auto Xy = RandomDataGenerator{n_samples, n_features, 0.5f}.GenerateDMatrix(true);\n\n  auto with_update = [&](std::optional<std::string> device,\n                         std::optional<std::string> tree_method) {\n    auto learner = std::unique_ptr<Learner>(Learner::Create({Xy}));\n    if (tree_method.has_value()) {\n      learner->SetParam(\"tree_method\", tree_method.value());\n    }\n    if (device.has_value()) {\n      auto const& d = device.value();\n      learner->SetParam(\"device\", d);\n    }\n    learner->Configure();\n    for (std::int32_t i = 0; i < 3; ++i) {\n      learner->UpdateOneIter(0, Xy);\n    }\n    Json config{Object{}};\n    learner->SaveConfig(&config);\n    auto updater = config[\"learner\"][\"gradient_booster\"][\"updater\"];\n    CHECK(!IsA<Null>(updater));\n    return updater;\n  };\n\n  auto with_boost = [&](std::optional<std::string> device, std::optional<std::string> tree_method) {\n    auto learner = std::unique_ptr<Learner>(Learner::Create({Xy}));\n    if (tree_method.has_value()) {\n      learner->SetParam(\"tree_method\", tree_method.value());\n    }\n    if (device.has_value()) {\n      auto const& d = device.value();\n      learner->SetParam(\"device\", d);\n    }\n    learner->Configure();\n    Context ctx;\n    for (std::int32_t i = 0; i < 3; ++i) {\n      GradientContainer gpair = GenerateRandomGradients(&ctx, Xy->Info().num_row_, 1);\n      learner->BoostOneIter(0, Xy, &gpair);\n    }\n\n    Json config{Object{}};\n    learner->SaveConfig(&config);\n    auto updater = config[\"learner\"][\"gradient_booster\"][\"updater\"];\n    return updater;\n  };\n\n  // |        | hist    | approx | exact | NA  |\n  // |--------+---------+--------+-------+-----|\n  // | CUDA:0 | GPU     | GPU    | Err   | GPU |\n  // | CPU    | CPU     | GPU    | CPU   | CPU |\n  // |--------+---------+--------+-------+-----|\n  // | NA     | CPU     | CPU    | CPU   | CPU |\n  //\n  // - CPU: Run on CPU.\n  // - GPU: Run on CUDA.\n  // - Err: Not feasible.\n  // - NA:  Parameter is not specified.\n  std::map<std::pair<std::optional<std::string>, std::optional<std::string>>, std::string>\n      expectation{\n          // hist\n          {{\"hist\", \"cpu\"}, \"grow_quantile_histmaker\"},\n          {{\"hist\", \"cuda\"}, \"grow_gpu_hist\"},\n          {{\"hist\", \"cuda:0\"}, \"grow_gpu_hist\"},\n          {{\"hist\", std::nullopt}, \"grow_quantile_histmaker\"},\n          // approx\n          {{\"approx\", \"cpu\"}, \"grow_histmaker\"},\n          {{\"approx\", \"cuda\"}, \"grow_gpu_approx\"},\n          {{\"approx\", \"cuda:0\"}, \"grow_gpu_approx\"},\n          {{\"approx\", std::nullopt}, \"grow_histmaker\"},\n          // exact\n          {{\"exact\", \"cpu\"}, \"grow_colmaker,prune\"},\n          {{\"exact\", \"cuda\"}, \"err\"},\n          {{\"exact\", \"cuda:0\"}, \"err\"},\n          {{\"exact\", std::nullopt}, \"grow_colmaker,prune\"},\n          // NA\n          {{std::nullopt, \"cpu\"}, \"grow_quantile_histmaker\"},\n          {{std::nullopt, \"cuda\"}, \"grow_gpu_hist\"},\n          {{std::nullopt, \"cuda:0\"}, \"grow_gpu_hist\"},\n          {{std::nullopt, std::nullopt}, \"grow_quantile_histmaker\"},\n      };\n\n  auto run_test = [&](auto fn) {\n    for (auto const& kv : expectation) {\n      auto device = kv.first.second;\n      auto tm = kv.first.first;\n\n      if (kv.second == \"err\") {\n        ASSERT_THROW(\n            { fn(device, tm); }, dmlc::Error)\n            << \" device:\" << device.value_or(\"NA\") << \" tm:\" << tm.value_or(\"NA\");\n        continue;\n      }\n      auto up = fn(device, tm);\n      auto ups = get<Array const>(up);\n      auto exp_names = common::Split(kv.second, ',');\n      ASSERT_EQ(exp_names.size(), ups.size());\n      for (std::size_t i = 0; i < exp_names.size(); ++i) {\n        ASSERT_EQ(get<String const>(ups[i][\"name\"]), exp_names[i])\n            << \" device:\" << device.value_or(\"NA\") << \" tm:\" << tm.value_or(\"NA\");\n      }\n    }\n  };\n\n  run_test(with_update);\n  run_test(with_boost);\n}\n#endif  // XGBOOST_USE_CUDA\n\n// Some other parts of test are in `Tree.JsonIO'.\nTEST(GBTree, JsonIO) {\n  size_t constexpr kRows = 16, kCols = 16;\n\n  Context ctx;\n  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};\n\n  std::unique_ptr<GradientBooster> gbm{\n      CreateTrainedGBM(\"gbtree\", Args{{\"tree_method\", \"exact\"}, {\"default_direction\", \"left\"}},\n                       kRows, kCols, &mparam, &ctx)};\n\n  Json model{Object()};\n  model[\"model\"] = Object();\n  auto j_model = model[\"model\"];\n\n  model[\"config\"] = Object();\n  auto j_config = model[\"config\"];\n\n  gbm->SaveModel(&j_model);\n  gbm->SaveConfig(&j_config);\n\n  std::string model_str;\n  Json::Dump(model, &model_str);\n\n  model = Json::Load({model_str.c_str(), model_str.size()});\n  j_model = model[\"model\"];\n  j_config = model[\"config\"];\n  ASSERT_EQ(get<String>(j_model[\"name\"]), \"gbtree\");\n\n  auto gbtree_model = j_model[\"model\"];\n  ASSERT_EQ(get<Array>(gbtree_model[\"trees\"]).size(), 1ul);\n  ASSERT_EQ(get<Integer>(get<Object>(get<Array>(gbtree_model[\"trees\"]).front()).at(\"id\")), 0);\n  ASSERT_EQ(get<Array>(gbtree_model[\"tree_info\"]).size(), 1ul);\n  auto j_train_param = j_config[\"gbtree_model_param\"];\n  ASSERT_EQ(get<String>(j_train_param[\"num_parallel_tree\"]), \"1\");\n\n  auto check_config = [](Json j_up_config) {\n    auto colmaker = get<Array const>(j_up_config).front();\n    auto pruner = get<Array const>(j_up_config).back();\n    ASSERT_EQ(get<String const>(colmaker[\"name\"]), \"grow_colmaker\");\n    ASSERT_EQ(get<String const>(pruner[\"name\"]), \"prune\");\n    ASSERT_EQ(get<String const>(colmaker[\"colmaker_train_param\"][\"default_direction\"]), \"left\");\n  };\n  check_config(j_config[\"updater\"]);\n\n  std::unique_ptr<GradientBooster> loaded(gbm::GBTree::Create(\"gbtree\", &ctx, &mparam));\n  loaded->LoadModel(j_model);\n  loaded->LoadConfig(j_config);\n\n  // roundtrip test\n  Json j_config_rt{Object{}};\n  loaded->SaveConfig(&j_config_rt);\n  check_config(j_config_rt[\"updater\"]);\n}\n\nTEST(Dart, JsonIO) {\n  size_t constexpr kRows = 16, kCols = 16;\n\n  Context ctx;\n  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};\n\n  std::unique_ptr<GradientBooster> gbm{\n      CreateTrainedGBM(\"dart\", Args{}, kRows, kCols, &mparam, &ctx)};\n\n  Json model{Object()};\n  model[\"model\"] = Object();\n  auto& j_model = model[\"model\"];\n  model[\"config\"] = Object();\n\n  auto& j_param = model[\"config\"];\n\n  gbm->SaveModel(&j_model);\n  gbm->SaveConfig(&j_param);\n\n  std::string model_str;\n  Json::Dump(model, &model_str);\n\n  model = Json::Load({model_str.c_str(), model_str.size()});\n\n  ASSERT_EQ(get<String>(model[\"model\"][\"name\"]), \"gbtree\") << model;\n  ASSERT_EQ(get<String>(model[\"config\"][\"name\"]), \"gbtree\");\n}\n\nTEST(GBTree, LoadLegacyDartJson) {\n  size_t constexpr kRows = 16, kCols = 16;\n\n  Context ctx;\n  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};\n\n  std::unique_ptr<GradientBooster> gbm{\n      CreateTrainedGBM(\"gbtree\", Args{{\"rate_drop\", \"0.5\"}}, kRows, kCols, &mparam, &ctx)};\n\n  Json model{Object{}};\n  Json config{Object{}};\n  gbm->SaveModel(&model);\n  gbm->SaveConfig(&config);\n\n  Json legacy_model{Object{}};\n  legacy_model[\"name\"] = String{\"dart\"};\n  legacy_model[\"gbtree\"] = model;\n  legacy_model[\"weight_drop\"] = model[\"weight_drop\"];\n  get<Object>(legacy_model[\"gbtree\"]).erase(\"weight_drop\");\n\n  Json legacy_config{Object{}};\n  legacy_config[\"name\"] = String{\"dart\"};\n  legacy_config[\"gbtree\"] = config;\n  legacy_config[\"dart_train_param\"] = config[\"dart_train_param\"];\n  get<Object>(legacy_config[\"gbtree\"]).erase(\"dart_train_param\");\n\n  std::unique_ptr<GradientBooster> loaded{GradientBooster::Create(\"dart\", &ctx, &mparam)};\n  loaded->LoadModel(legacy_model);\n  loaded->LoadConfig(legacy_config);\n\n  Json canonical_model{Object{}};\n  Json canonical_config{Object{}};\n  loaded->SaveModel(&canonical_model);\n  loaded->SaveConfig(&canonical_config);\n\n  ASSERT_EQ(get<String>(canonical_model[\"name\"]), \"gbtree\");\n  ASSERT_EQ(get<String>(canonical_config[\"name\"]), \"gbtree\");\n  ASSERT_NE(get<Array>(canonical_model[\"weight_drop\"]).size(), 0ul);\n  ASSERT_TRUE(IsA<Object>(canonical_config[\"dart_train_param\"]));\n}\n\nTEST(GBTree, DropoutJsonIO) {\n  size_t constexpr kRows = 16, kCols = 16;\n\n  Context ctx;\n  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};\n\n  std::unique_ptr<GradientBooster> gbm{\n      CreateTrainedGBM(\"gbtree\", Args{{\"rate_drop\", \"0.5\"}}, kRows, kCols, &mparam, &ctx)};\n\n  Json model{Object()};\n  model[\"model\"] = Object();\n  auto& j_model = model[\"model\"];\n  model[\"config\"] = Object();\n  auto& j_param = model[\"config\"];\n\n  gbm->SaveModel(&j_model);\n  gbm->SaveConfig(&j_param);\n\n  ASSERT_EQ(get<String>(model[\"model\"][\"name\"]), \"gbtree\") << model;\n  ASSERT_EQ(get<String>(model[\"config\"][\"name\"]), \"gbtree\");\n  ASSERT_NE(get<Array>(model[\"model\"][\"weight_drop\"]).size(), 0ul);\n  ASSERT_TRUE(IsA<Object>(model[\"config\"][\"dart_train_param\"]));\n}\n\nnamespace {\nclass Dart : public testing::TestWithParam<char const*> {\n public:\n  void Run(std::string device) {\n    size_t constexpr kRows = 16, kCols = 10;\n\n    HostDeviceVector<float> data;\n    Context ctx;\n    if (device == \"GPU\") {\n      ctx = MakeCUDACtx(0);\n    }\n    auto rng = RandomDataGenerator(kRows, kCols, 0).Device(ctx.Device());\n    auto array_str = rng.GenerateArrayInterface(&data);\n    auto p_mat = GetDMatrixFromData(data.HostVector(), kRows, kCols);\n\n    std::vector<bst_float> labels(kRows);\n    for (size_t i = 0; i < kRows; ++i) {\n      labels[i] = i % 2;\n    }\n    p_mat->SetInfo(\"label\", Make1dInterfaceTest(labels.data(), kRows));\n\n    auto learner = std::unique_ptr<Learner>(Learner::Create({p_mat}));\n    learner->SetParam(\"booster\", \"dart\");\n    learner->SetParam(\"rate_drop\", \"0.5\");\n    learner->Configure();\n\n    for (size_t i = 0; i < 16; ++i) {\n      learner->UpdateOneIter(i, p_mat);\n    }\n    learner->SetParam(\"device\", ctx.DeviceName());\n\n    HostDeviceVector<float> predts_training;\n    learner->Predict(p_mat, false, &predts_training, 0, 0, true);\n\n    HostDeviceVector<float>* inplace_predts;\n    std::shared_ptr<data::DMatrixProxy> x{new data::DMatrixProxy{}};\n    if (ctx.IsCUDA()) {\n      x->SetCudaArray(array_str.c_str());\n    } else {\n      x->SetArray(array_str.c_str());\n    }\n    learner->InplacePredict(x, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),\n                            &inplace_predts, 0, 0);\n    CHECK(inplace_predts);\n\n    HostDeviceVector<float> predts_inference;\n    learner->Predict(p_mat, false, &predts_inference, 0, 0, false);\n\n    auto const& h_predts_training = predts_training.ConstHostVector();\n    auto const& h_predts_inference = predts_inference.ConstHostVector();\n    auto const& h_inplace_predts = inplace_predts->HostVector();\n    ASSERT_EQ(h_predts_training.size(), h_predts_inference.size());\n    ASSERT_EQ(h_inplace_predts.size(), h_predts_inference.size());\n    for (size_t i = 0; i < predts_inference.Size(); ++i) {\n      // Inference doesn't drop tree.\n      ASSERT_GT(std::abs(h_predts_training[i] - h_predts_inference[i]), kRtEps * 10);\n      // Inplace prediction is inference.\n      ASSERT_LT(h_inplace_predts[i] - h_predts_inference[i], kRtEps / 10);\n    }\n  }\n};\n}  // anonymous namespace\n\nTEST_P(Dart, Prediction) { this->Run(GetParam()); }\n\n#if defined(XGBOOST_USE_CUDA)\nINSTANTIATE_TEST_SUITE_P(PredictorTypes, Dart, testing::Values(\"CPU\", \"GPU\"));\n#else\nINSTANTIATE_TEST_SUITE_P(PredictorTypes, Dart, testing::Values(\"CPU\"));\n#endif  // defined(XGBOOST_USE_CUDA)\n\nstd::pair<Json, Json> TestModelSlice(std::string booster) {\n  size_t constexpr kRows = 1000, kCols = 100, kForest = 2, kClasses = 3;\n  auto m = RandomDataGenerator{kRows, kCols, 0}.Classes(kClasses).GenerateDMatrix(true);\n\n  int32_t kIters = 10;\n  std::unique_ptr<Learner> learner{Learner::Create({m})};\n  Args args{{\"booster\", booster},\n            {\"tree_method\", \"hist\"},\n            {\"num_parallel_tree\", std::to_string(kForest)},\n            {\"num_class\", std::to_string(kClasses)},\n            {\"subsample\", \"0.5\"},\n            {\"max_depth\", \"2\"}};\n  if (booster == \"dart\") {\n    args.emplace_back(\"rate_drop\", \"0.5\");\n  }\n  learner->SetParams(args);\n\n  for (auto i = 0; i < kIters; ++i) {\n    learner->UpdateOneIter(i, m);\n  }\n\n  Json model{Object()};\n  Json config{Object()};\n  learner->SaveModel(&model);\n  learner->SaveConfig(&config);\n  bool out_of_bound = false;\n\n  size_t constexpr kSliceStart = 2, kSliceEnd = 8, kStep = 3;\n  std::unique_ptr<Learner> sliced{learner->Slice(kSliceStart, kSliceEnd, kStep, &out_of_bound)};\n  Json sliced_model{Object()};\n  sliced->SaveModel(&sliced_model);\n\n  auto get_gbtree = [](Json const& model) -> Json const& {\n    auto const& booster = model[\"learner\"][\"gradient_booster\"];\n    auto const& obj = get<Object const>(booster);\n    auto it = obj.find(\"model\");\n    if (it != obj.cend() && IsA<Object>(it->second)) {\n      return booster;\n    }\n    return obj.at(\"gbtree\");\n  };\n\n  auto get_gbtree_config = [](Json& model) -> Json& {\n    auto& booster = model[\"learner\"][\"gradient_booster\"];\n    auto& obj = get<Object>(booster);\n    auto it = obj.find(\"gbtree_model_param\");\n    if (it != obj.cend() && IsA<Object>(it->second)) {\n      return booster;\n    }\n    return obj.at(\"gbtree\");\n  };\n\n  auto get_shape = [&](Json const& model) {\n    auto const& gbtree = get_gbtree(model);\n    return get<Object const>(gbtree[\"model\"][\"gbtree_model_param\"]);\n  };\n\n  auto const& model_shape = get_shape(sliced_model);\n  CHECK_EQ(get<String const>(model_shape.at(\"num_trees\")), std::to_string(2 * kClasses * kForest));\n\n  Json sliced_config{Object()};\n  sliced->SaveConfig(&sliced_config);\n  // Only num trees is changed\n  auto& gradient_booster = get_gbtree_config(sliced_config);\n  gradient_booster[\"gbtree_model_param\"][\"num_trees\"] = String(\"60\");\n  CHECK_EQ(sliced_config, config);\n\n  auto get_trees = [&](Json const& model) {\n    auto const& gbtree = get_gbtree(model);\n    return get<Array const>(gbtree[\"model\"][\"trees\"]);\n  };\n\n  auto get_info = [&](Json const& model) {\n    auto const& gbtree = get_gbtree(model);\n    return get<Array const>(gbtree[\"model\"][\"tree_info\"]);\n  };\n\n  auto const& sliced_trees = get_trees(sliced_model);\n  CHECK_EQ(sliced_trees.size(), 2 * kClasses * kForest);\n\n  auto constexpr kLayerSize = kClasses * kForest;\n  auto const& sliced_info = get_info(sliced_model);\n\n  for (size_t layer = 0; layer < 2; ++layer) {\n    for (size_t j = 0; j < kClasses; ++j) {\n      for (size_t k = 0; k < kForest; ++k) {\n        auto idx = layer * kLayerSize + j * kForest + k;\n        auto const& group = get<Integer const>(sliced_info.at(idx));\n        CHECK_EQ(static_cast<size_t>(group), j);\n      }\n    }\n  }\n\n  auto const& trees = get_trees(model);\n\n  // Sliced layers are [2, 5]\n  auto begin = kLayerSize * kSliceStart;\n  auto end = begin + kLayerSize;\n  auto j = 0;\n  for (size_t i = begin; i < end; ++i) {\n    Json tree = trees[i];\n    tree[\"id\"] = Integer(0);  // id is different, we set it to 0 to allow comparison.\n    auto sliced_tree = sliced_trees[j];\n    sliced_tree[\"id\"] = Integer(0);\n    CHECK_EQ(tree, sliced_tree);\n    j++;\n  }\n\n  begin = kLayerSize * (kSliceStart + kStep);\n  end = begin + kLayerSize;\n  for (size_t i = begin; i < end; ++i) {\n    Json tree = trees[i];\n    tree[\"id\"] = Integer(0);\n    auto sliced_tree = sliced_trees[j];\n    sliced_tree[\"id\"] = Integer(0);\n    CHECK_EQ(tree, sliced_tree);\n    j++;\n  }\n\n  // CHECK sliced model doesn't have dependency on the old one\n  learner.reset();\n  CHECK_EQ(sliced->GetNumFeature(), kCols);\n\n  return std::make_pair(model, sliced_model);\n}\n\nTEST(GBTree, Slice) { TestModelSlice(\"gbtree\"); }\n\nTEST(Dart, Slice) {\n  Json model, sliced_model;\n  std::tie(model, sliced_model) = TestModelSlice(\"dart\");\n  auto const& weights = get<Array const>(model[\"learner\"][\"gradient_booster\"][\"weight_drop\"]);\n  auto const& trees = get<Array const>(model[\"learner\"][\"gradient_booster\"][\"model\"][\"trees\"]);\n  ASSERT_EQ(weights.size(), trees.size());\n}\n\nTEST(GBTree, FeatureScore) {\n  size_t n_samples = 1000, n_features = 10, n_classes = 4;\n  auto m = RandomDataGenerator{n_samples, n_features, 0.5}.Classes(n_classes).GenerateDMatrix(true);\n\n  std::unique_ptr<Learner> learner{Learner::Create({m})};\n  learner->SetParam(\"num_class\", std::to_string(n_classes));\n\n  learner->Configure();\n  for (size_t i = 0; i < 2; ++i) {\n    learner->UpdateOneIter(i, m);\n  }\n\n  std::vector<bst_feature_t> features_weight;\n  std::vector<float> scores_weight;\n  learner->CalcFeatureScore(\"weight\", {}, &features_weight, &scores_weight);\n  ASSERT_EQ(features_weight.size(), scores_weight.size());\n  ASSERT_LE(features_weight.size(), learner->GetNumFeature());\n  ASSERT_TRUE(std::is_sorted(features_weight.begin(), features_weight.end()));\n\n  auto test_eq = [&learner, &scores_weight](std::string type) {\n    std::vector<bst_feature_t> features;\n    std::vector<float> scores;\n    learner->CalcFeatureScore(type, {}, &features, &scores);\n\n    std::vector<bst_feature_t> features_total;\n    std::vector<float> scores_total;\n    learner->CalcFeatureScore(\"total_\" + type, {}, &features_total, &scores_total);\n\n    for (size_t i = 0; i < scores_weight.size(); ++i) {\n      ASSERT_LE(RelError(scores_total[i] / scores[i], scores_weight[i]), kRtEps);\n    }\n  };\n\n  test_eq(\"gain\");\n  test_eq(\"cover\");\n}\n\nTEST(GBTree, PredictRange) {\n  size_t n_samples = 1000, n_features = 10, n_classes = 4;\n  auto m = RandomDataGenerator{n_samples, n_features, 0.5}.Classes(n_classes).GenerateDMatrix(true);\n\n  std::unique_ptr<Learner> learner{Learner::Create({m})};\n  learner->SetParam(\"num_class\", std::to_string(n_classes));\n\n  learner->Configure();\n  for (size_t i = 0; i < 2; ++i) {\n    learner->UpdateOneIter(i, m);\n  }\n  HostDeviceVector<float> out_predt;\n  ASSERT_THROW(learner->Predict(m, false, &out_predt, 0, 3), dmlc::Error);\n\n  auto m_1 =\n      RandomDataGenerator{n_samples, n_features, 0.5}.Classes(n_classes).GenerateDMatrix(true);\n  HostDeviceVector<float> out_predt_full;\n  learner->Predict(m_1, false, &out_predt_full, 0, 0);\n  ASSERT_TRUE(std::equal(out_predt.HostVector().begin(), out_predt.HostVector().end(),\n                         out_predt_full.HostVector().begin()));\n\n  {\n    // inplace predict\n    HostDeviceVector<float> raw_storage;\n    auto raw = RandomDataGenerator{n_samples, n_features, 0.5}.GenerateArrayInterface(&raw_storage);\n    std::shared_ptr<data::DMatrixProxy> x{new data::DMatrixProxy{}};\n    x->SetArray(raw.data());\n\n    HostDeviceVector<float>* out_predt;\n    learner->InplacePredict(x, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),\n                            &out_predt, 0, 2);\n    auto h_out_predt = out_predt->HostVector();\n    learner->InplacePredict(x, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),\n                            &out_predt, 0, 0);\n    auto h_out_predt_full = out_predt->HostVector();\n\n    ASSERT_TRUE(std::equal(h_out_predt.begin(), h_out_predt.end(), h_out_predt_full.begin()));\n    // Out of range.\n    ASSERT_THROW(learner->InplacePredict(x, PredictionType::kValue,\n                                         std::numeric_limits<float>::quiet_NaN(), &out_predt, 0, 3),\n                 dmlc::Error);\n  }\n}\n\nTEST(GBTree, InplacePredictionError) {\n  std::size_t n_samples{2048}, n_features{32};\n\n  auto test_ext_err = [&](std::string booster, Context const* ctx) {\n    std::shared_ptr<DMatrix> p_fmat =\n        RandomDataGenerator{n_samples, n_features, 0.5f}.Batches(2).GenerateSparsePageDMatrix(\n            \"cache\", true);\n    std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};\n    learner->SetParams(Args{{\"booster\", booster}, {\"device\", ctx->DeviceName()}});\n    learner->Configure();\n    for (std::int32_t i = 0; i < 3; ++i) {\n      learner->UpdateOneIter(i, p_fmat);\n    }\n    HostDeviceVector<float>* out_predt;\n    ASSERT_THROW(\n        {\n          learner->InplacePredict(p_fmat, PredictionType::kValue,\n                                  std::numeric_limits<float>::quiet_NaN(), &out_predt, 0, 0);\n        },\n        dmlc::Error);\n  };\n\n  {\n    Context ctx;\n    test_ext_err(\"gbtree\", &ctx);\n    test_ext_err(\"dart\", &ctx);\n  }\n\n#if defined(XGBOOST_USE_CUDA)\n  {\n    auto ctx = MakeCUDACtx(0);\n    test_ext_err(\"gbtree\", &ctx);\n    test_ext_err(\"dart\", &ctx);\n  }\n#endif  // defined(XGBOOST_USE_CUDA)\n\n  auto test_qdm_err = [&](std::string booster, Context const* ctx) {\n    std::shared_ptr<DMatrix> p_fmat;\n    bst_bin_t max_bins = 16;\n    auto rng =\n        RandomDataGenerator{n_samples, n_features, 0.5f}.Device(ctx->Device()).Bins(max_bins);\n    if (ctx->IsCPU()) {\n      p_fmat = rng.GenerateQuantileDMatrix(true);\n    } else {\n#if defined(XGBOOST_USE_CUDA)\n      p_fmat = rng.Device(ctx->Device()).GenerateQuantileDMatrix(true);\n#else\n      CHECK(p_fmat);\n#endif  // defined(XGBOOST_USE_CUDA)\n    }\n    std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};\n    learner->SetParams(Args{{\"booster\", booster},\n                            {\"max_bin\", std::to_string(max_bins)},\n                            {\"device\", ctx->DeviceName()}});\n    learner->Configure();\n    for (std::int32_t i = 0; i < 3; ++i) {\n      learner->UpdateOneIter(i, p_fmat);\n    }\n    HostDeviceVector<float>* out_predt;\n    ASSERT_THROW(\n        {\n          learner->InplacePredict(p_fmat, PredictionType::kValue,\n                                  std::numeric_limits<float>::quiet_NaN(), &out_predt, 0, 0);\n        },\n        dmlc::Error);\n  };\n\n  {\n    Context ctx;\n    test_qdm_err(\"gbtree\", &ctx);\n    test_qdm_err(\"dart\", &ctx);\n  }\n\n#if defined(XGBOOST_USE_CUDA)\n  {\n    auto ctx = MakeCUDACtx(0);\n    test_qdm_err(\"gbtree\", &ctx);\n    test_qdm_err(\"dart\", &ctx);\n  }\n#endif  // defined(XGBOOST_USE_CUDA)\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/gbm/test_gbtree.cu",
    "content": "/**\n * Copyright 2023-2025, XGBoost contributors\n */\n#include <xgboost/context.h>      // for Context\n#include <xgboost/learner.h>      // for Learner\n#include <xgboost/string_view.h>  // for StringView\n\n#include <limits>  // for numeric_limits\n#include <memory>  // for shared_ptr\n#include <string>  // for string\n\n#include \"../../../src/data/adapter.h\"           // for ArrayAdapter\n#include \"../../../src/data/device_adapter.cuh\"  // for CupyAdapter\n#include \"../../../src/data/proxy_dmatrix.h\"     // for DMatrixProxy\n#include \"../helpers.h\"                          // for RandomDataGenerator\n\nnamespace xgboost {\nvoid TestInplaceFallback(Context const* ctx) {\n  // prepare data\n  bst_idx_t n_samples{1024};\n  bst_feature_t n_features{32};\n  HostDeviceVector<float> X_storage;\n  // use a different device than the learner\n  auto data_ordinal = ctx->IsCPU() ? DeviceOrd::CUDA(0) : DeviceOrd::CPU();\n  auto X = RandomDataGenerator{n_samples, n_features, 0.0}\n               .Device(data_ordinal)\n               .GenerateArrayInterface(&X_storage);\n  HostDeviceVector<float> y_storage;\n  auto y = RandomDataGenerator{n_samples, 1u, 0.0}.GenerateArrayInterface(&y_storage);\n\n  std::shared_ptr<DMatrix> Xy;\n  if (data_ordinal.IsCPU()) {\n    auto X_adapter = data::ArrayAdapter{StringView{X}};\n    Xy.reset(DMatrix::Create(&X_adapter, std::numeric_limits<float>::quiet_NaN(), ctx->Threads()));\n  } else {\n    auto X_adapter = data::CupyAdapter{StringView{X}};\n    Xy.reset(DMatrix::Create(&X_adapter, std::numeric_limits<float>::quiet_NaN(), ctx->Threads()));\n  }\n\n  Xy->SetInfo(\"label\", y);\n\n  // learner is configured to the device specified by ctx\n  std::unique_ptr<Learner> learner{Learner::Create({Xy})};\n  learner->SetParam(\"device\", ctx->DeviceName());\n  for (std::int32_t i = 0; i < 3; ++i) {\n    learner->UpdateOneIter(i, Xy);\n  }\n\n  std::shared_ptr<DMatrix> p_m{new data::DMatrixProxy};\n  auto proxy = std::dynamic_pointer_cast<data::DMatrixProxy>(p_m);\n  if (data_ordinal.IsCPU()) {\n    proxy->SetArray(StringView{X});\n  } else {\n    proxy->SetCudaArray(X.c_str());\n  }\n\n  HostDeviceVector<float>* out_predt{nullptr};\n  ConsoleLogger::Configure(Args{{\"verbosity\", \"1\"}});\n  std::string output;\n\n  learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),\n                          &out_predt, 0, 0);\n\n  // test when the contexts match\n  Context new_ctx = *proxy->Ctx();\n  ASSERT_NE(new_ctx.Ordinal(), ctx->Ordinal());\n\n  learner->SetParam(\"device\", new_ctx.DeviceName());\n  HostDeviceVector<float>* out_predt_1{nullptr};\n  // no warning is raised\n  ::testing::internal::CaptureStderr();\n  learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),\n                          &out_predt_1, 0, 0);\n  output = testing::internal::GetCapturedStderr();\n\n  ASSERT_TRUE(output.empty());\n\n  ASSERT_EQ(out_predt->ConstHostVector(), out_predt_1->ConstHostVector());\n}\n\nTEST(GBTree, InplacePredictFallback) {\n  auto ctx = MakeCUDACtx(0);\n  TestInplaceFallback(&ctx);\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/helpers.cc",
    "content": "/**\n * Copyright 2016-2025, XGBoost contributors\n */\n#include \"helpers.h\"\n\n#include <gtest/gtest.h>\n#include <xgboost/gbm.h>\n#include <xgboost/json.h>\n#include <xgboost/learner.h>\n#include <xgboost/logging.h>\n#include <xgboost/metric.h>\n#include <xgboost/objective.h>\n\n#include <algorithm>\n#include <filesystem>  // for path\n#include <limits>      // for numeric_limits\n#include <random>      // for mt19937\n\n#include \"../../src/collective/communicator-inl.h\"  // for GetRank\n#include \"../../src/data/adapter.h\"\n#include \"../../src/data/batch_utils.h\"  // for AutoHostRatio, AutoCachePageBytes\n#include \"../../src/data/iterative_dmatrix.h\"\n#include \"../../src/data/simple_dmatrix.h\"\n#include \"../../src/data/sparse_page_dmatrix.h\"\n#include \"../../src/gbm/gbtree_model.h\"\n#include \"xgboost/c_api.h\"\n#include \"xgboost/predictor.h\"\n\n#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1\n#include <memory>\n#include <vector>\n\n#include \"rmm/version_config.hpp\"\n\n// TODO(hcho3): Remove this guard once we require Rapids 25.12+\n#if (RMM_VERSION_MAJOR == 25 && RMM_VERSION_MINOR == 12) || RMM_VERSION_MAJOR >= 26\n#include \"rmm/mr/cuda_memory_resource.hpp\"\n#include \"rmm/mr/per_device_resource.hpp\"\n#include \"rmm/mr/pool_memory_resource.hpp\"\n#else  // (RMM_VERSION_MAJOR == 25 && RMM_VERSION_MINOR == 12) || RMM_VERSION_MAJOR >= 26\n#include \"rmm/mr/device/cuda_memory_resource.hpp\"\n#include \"rmm/mr/device/per_device_resource.hpp\"\n#include \"rmm/mr/device/pool_memory_resource.hpp\"\n#endif  // (RMM_VERSION_MAJOR == 25 && RMM_VERSION_MINOR == 12) || RMM_VERSION_MAJOR >= 26\n\n#endif  // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1\n\nbool FileExists(const std::string& filename) {\n  struct stat st;\n  return stat(filename.c_str(), &st) == 0;\n}\n\nvoid CreateSimpleTestData(const std::string& filename) { CreateBigTestData(filename, 6); }\n\nvoid CreateBigTestData(const std::string& filename, size_t n_entries, bool zero_based) {\n  std::ofstream fo(filename.c_str());\n  const size_t entries_per_row = 3;\n  std::string odd_row;\n  if (zero_based) {\n    odd_row = \" 0:0 3:30 4:40\\n\";\n  } else {\n    odd_row = \" 1:0 4:30 5:40\\n\";\n  }\n  std::string even_row;\n  if (zero_based) {\n    even_row = \" 0:0 1:10 2:20\\n\";\n  } else {\n    even_row = \" 1:0 2:10 3:20\\n\";\n  }\n\n  size_t n_rows = (n_entries + entries_per_row - 1) / entries_per_row;\n  for (size_t i = 0; i < n_rows; ++i) {\n    auto row = i % 2 == 0 ? even_row : odd_row;\n    fo << i << row;\n  }\n}\n\nvoid CreateTestCSV(std::string const& path, size_t rows, size_t cols) {\n  std::vector<float> data(rows * cols);\n\n  for (size_t i = 0; i < rows * cols; ++i) {\n    data[i] = i;\n  }\n\n  std::ofstream fout(path);\n  size_t i = 0;\n  for (size_t r = 0; r < rows; ++r) {\n    for (size_t c = 0; c < cols; ++c) {\n      fout << data[i];\n      i++;\n      if (c != cols - 1) {\n        fout << \",\";\n      }\n    }\n    fout << \"\\n\";\n  }\n  fout.flush();\n  fout.close();\n}\n\nvoid CheckObjFunctionImpl(std::unique_ptr<xgboost::ObjFunction> const& obj,\n                          std::vector<xgboost::bst_float> preds,\n                          std::vector<xgboost::bst_float> labels,\n                          std::vector<xgboost::bst_float> weights, xgboost::MetaInfo const& info,\n                          std::vector<xgboost::bst_float> out_grad,\n                          std::vector<xgboost::bst_float> out_hess) {\n  xgboost::HostDeviceVector<xgboost::bst_float> in_preds(preds);\n  xgboost::linalg::Matrix<xgboost::GradientPair> out_gpair;\n  obj->GetGradient(in_preds, info, 0, &out_gpair);\n  std::vector<xgboost::GradientPair>& gpair = out_gpair.Data()->HostVector();\n\n  ASSERT_EQ(gpair.size(), in_preds.Size());\n  for (int i = 0; i < static_cast<int>(gpair.size()); ++i) {\n    EXPECT_NEAR(gpair[i].GetGrad(), out_grad[i], 0.01)\n        << \"Unexpected grad for pred=\" << preds[i] << \" label=\" << labels[i]\n        << \" weight=\" << weights[i];\n    EXPECT_NEAR(gpair[i].GetHess(), out_hess[i], 0.01)\n        << \"Unexpected hess for pred=\" << preds[i] << \" label=\" << labels[i]\n        << \" weight=\" << weights[i];\n  }\n}\n\nvoid CheckObjFunction(std::unique_ptr<xgboost::ObjFunction> const& obj,\n                      std::vector<xgboost::bst_float> preds, std::vector<xgboost::bst_float> labels,\n                      std::vector<xgboost::bst_float> weights,\n                      std::vector<xgboost::bst_float> out_grad,\n                      std::vector<xgboost::bst_float> out_hess) {\n  xgboost::MetaInfo info;\n  info.num_row_ = labels.size();\n  info.labels = xgboost::linalg::Tensor<float, 2>{labels.cbegin(),\n                                                  labels.cend(),\n                                                  {labels.size(), static_cast<std::size_t>(1)},\n                                                  xgboost::DeviceOrd::CPU()};\n  info.weights_.HostVector() = weights;\n\n  CheckObjFunctionImpl(obj, preds, labels, weights, info, out_grad, out_hess);\n}\n\nxgboost::Json CheckConfigReloadImpl(xgboost::Configurable* const configurable, std::string name) {\n  xgboost::Json config_0{xgboost::Object()};\n  configurable->SaveConfig(&config_0);\n  configurable->LoadConfig(config_0);\n\n  xgboost::Json config_1{xgboost::Object()};\n  configurable->SaveConfig(&config_1);\n\n  std::string str_0, str_1;\n  xgboost::Json::Dump(config_0, &str_0);\n  xgboost::Json::Dump(config_1, &str_1);\n  EXPECT_EQ(str_0, str_1);\n\n  if (name != \"\") {\n    EXPECT_EQ(xgboost::get<xgboost::String>(config_1[\"name\"]), name);\n  }\n  return config_1;\n}\n\nvoid CheckRankingObjFunction(std::unique_ptr<xgboost::ObjFunction> const& obj,\n                             std::vector<xgboost::bst_float> preds,\n                             std::vector<xgboost::bst_float> labels,\n                             std::vector<xgboost::bst_float> weights,\n                             std::vector<xgboost::bst_uint> groups,\n                             std::vector<xgboost::bst_float> out_grad,\n                             std::vector<xgboost::bst_float> out_hess) {\n  xgboost::MetaInfo info;\n  info.num_row_ = labels.size();\n  info.labels = xgboost::linalg::Matrix<float>{labels.cbegin(),\n                                               labels.cend(),\n                                               {labels.size(), static_cast<std::size_t>(1)},\n                                               xgboost::DeviceOrd::CPU()};\n  info.weights_.HostVector() = weights;\n  info.group_ptr_ = groups;\n\n  CheckObjFunctionImpl(obj, preds, labels, weights, info, out_grad, out_hess);\n}\n\nxgboost::bst_float GetMetricEval(xgboost::Metric* metric,\n                                 xgboost::HostDeviceVector<xgboost::bst_float> const& preds,\n                                 std::vector<xgboost::bst_float> labels,\n                                 std::vector<xgboost::bst_float> weights,\n                                 std::vector<xgboost::bst_uint> groups,\n                                 xgboost::DataSplitMode data_split_mode) {\n  return GetMultiMetricEval(\n      metric, preds,\n      xgboost::linalg::Tensor<float, 2>{\n          labels.begin(), labels.end(), {labels.size()}, xgboost::DeviceOrd::CPU()},\n      weights, groups, data_split_mode);\n}\n\ndouble GetMultiMetricEval(xgboost::Metric* metric,\n                          xgboost::HostDeviceVector<xgboost::bst_float> const& preds,\n                          xgboost::linalg::Tensor<float, 2> const& labels,\n                          std::vector<xgboost::bst_float> weights,\n                          std::vector<xgboost::bst_uint> groups,\n                          xgboost::DataSplitMode data_split_mode) {\n  std::shared_ptr<xgboost::DMatrix> p_fmat{xgboost::RandomDataGenerator{0, 0, 0}.GenerateDMatrix()};\n  auto& info = p_fmat->Info();\n  info.num_row_ = labels.Shape(0);\n  info.labels.Reshape(labels.Shape()[0], labels.Shape()[1]);\n  info.labels.Data()->Copy(*labels.Data());\n  info.weights_.HostVector() = weights;\n  info.group_ptr_ = groups;\n  info.data_split_mode = data_split_mode;\n  if (info.IsVerticalFederated() && xgboost::collective::GetRank() != 0) {\n    info.labels.Reshape(0);\n  }\n  return metric->Evaluate(preds, p_fmat);\n}\n\nnamespace xgboost {\n[[nodiscard]] std::vector<float> GetBaseScore(Json const& config) {\n  auto str = get<String const>(config[\"learner\"][\"learner_model_param\"][\"base_score\"]);\n  auto jintercept = Json::Load(str);\n  auto const& array = get<Array const>(jintercept);\n  std::vector<float> results;\n  std::transform(array.begin(), array.end(), std::back_inserter(results),\n                 [](Json v) { return get<Number>(v); });\n  return results;\n}\n\nSimpleLCG::StateType SimpleLCG::operator()() {\n  state_ = (alpha_ * state_ + (state_ == 0 ? kDefaultInit : 0)) % mod_;\n  return state_;\n}\nSimpleLCG::StateType SimpleLCG::Min() const { return min(); }\nSimpleLCG::StateType SimpleLCG::Max() const { return max(); }\n// Make sure it's compile time constant.\nstatic_assert(SimpleLCG::max() - SimpleLCG::min());\n\nRandomDataGenerator::RandomDataGenerator(bst_idx_t rows, std::size_t cols, float sparsity)\n    : rows_{rows},\n      cols_{cols},\n      sparsity_{sparsity},\n      lcg_{seed_},\n      cache_host_ratio_{cuda_impl::AutoHostRatio()} {}\n\nvoid RandomDataGenerator::GenerateLabels(std::shared_ptr<DMatrix> p_fmat) const {\n  RandomDataGenerator{static_cast<bst_idx_t>(p_fmat->Info().num_row_), this->n_targets_, 0.0f}\n      .GenerateDense(p_fmat->Info().labels.Data());\n  CHECK_EQ(p_fmat->Info().labels.Size(), this->rows_ * this->n_targets_);\n  p_fmat->Info().labels.Reshape(this->rows_, this->n_targets_);\n  if (device_.IsCUDA()) {\n    p_fmat->Info().labels.SetDevice(device_);\n  }\n}\n\nvoid RandomDataGenerator::GenerateDense(HostDeviceVector<float>* out) const {\n  xgboost::SimpleRealUniformDistribution<bst_float> dist(lower_, upper_);\n  CHECK(out);\n\n  SimpleLCG lcg{lcg_};\n  out->Resize(rows_ * cols_, 0);\n  auto& h_data = out->HostVector();\n  float sparsity = sparsity_ * (upper_ - lower_) + lower_;\n  for (auto& v : h_data) {\n    auto g = dist(&lcg);\n    if (g < sparsity) {\n      v = std::numeric_limits<float>::quiet_NaN();\n    } else {\n      v = dist(&lcg);\n    }\n  }\n  if (device_.IsCUDA()) {\n    out->SetDevice(device_);\n    out->DeviceSpan();\n  }\n}\n\nJson RandomDataGenerator::ArrayInterfaceImpl(HostDeviceVector<float>* storage, size_t rows,\n                                             size_t cols) const {\n  this->GenerateDense(storage);\n  return GetArrayInterface(storage, rows, cols);\n}\n\nstd::string RandomDataGenerator::GenerateArrayInterface(HostDeviceVector<float>* storage) const {\n  auto array_interface = this->ArrayInterfaceImpl(storage, rows_, cols_);\n  std::string out;\n  Json::Dump(array_interface, &out);\n  return out;\n}\n\nstd::pair<std::vector<std::string>, std::string> MakeArrayInterfaceBatch(\n    HostDeviceVector<float> const* storage, std::size_t n_samples, bst_feature_t n_features,\n    std::size_t batches, DeviceOrd device) {\n  std::vector<std::string> result(batches);\n  std::vector<Json> objects;\n\n  size_t const rows_per_batch = n_samples / batches;\n\n  auto make_interface = [storage, device, n_features](std::size_t offset, std::size_t rows) {\n    Json array_interface{Object()};\n    array_interface[\"data\"] = std::vector<Json>(2);\n    if (device.IsCUDA()) {\n      array_interface[\"data\"][0] =\n          Integer(reinterpret_cast<int64_t>(storage->DevicePointer() + offset));\n      array_interface[\"stream\"] = Null{};\n    } else {\n      array_interface[\"data\"][0] =\n          Integer(reinterpret_cast<int64_t>(storage->HostPointer() + offset));\n    }\n\n    array_interface[\"data\"][1] = Boolean(false);\n\n    array_interface[\"shape\"] = std::vector<Json>(2);\n    array_interface[\"shape\"][0] = rows;\n    array_interface[\"shape\"][1] = n_features;\n\n    array_interface[\"typestr\"] = String(\"<f4\");\n    array_interface[\"version\"] = 3;\n    return array_interface;\n  };\n\n  auto j_interface = make_interface(0, n_samples);\n  size_t offset = 0;\n  for (size_t i = 0; i < batches - 1; ++i) {\n    objects.emplace_back(make_interface(offset, rows_per_batch));\n    offset += rows_per_batch * n_features;\n  }\n\n  size_t const remaining = n_samples - offset / n_features;\n  CHECK_LE(offset, n_samples * n_features);\n  objects.emplace_back(make_interface(offset, remaining));\n\n  for (size_t i = 0; i < batches; ++i) {\n    Json::Dump(objects[i], &result[i]);\n  }\n\n  std::string interface_str;\n  Json::Dump(j_interface, &interface_str);\n  return {result, interface_str};\n}\n\nstd::pair<std::vector<std::string>, std::string> RandomDataGenerator::GenerateArrayInterfaceBatch(\n    HostDeviceVector<float>* storage, size_t batches) const {\n  this->GenerateDense(storage);\n  return MakeArrayInterfaceBatch(storage, rows_, cols_, batches, device_);\n}\n\nstd::string RandomDataGenerator::GenerateColumnarArrayInterface(\n    std::vector<HostDeviceVector<float>>* data) const {\n  CHECK(data);\n  CHECK_EQ(data->size(), cols_);\n  auto& storage = *data;\n  Json arr{Array()};\n  for (size_t i = 0; i < cols_; ++i) {\n    auto column = this->ArrayInterfaceImpl(&storage[i], rows_, 1);\n    get<Array>(arr).emplace_back(column);\n  }\n  std::string out;\n  Json::Dump(arr, &out);\n  return out;\n}\n\nvoid RandomDataGenerator::GenerateCSR(HostDeviceVector<float>* value,\n                                      HostDeviceVector<std::size_t>* row_ptr,\n                                      HostDeviceVector<bst_feature_t>* columns) const {\n  auto& h_value = value->HostVector();\n  auto& h_rptr = row_ptr->HostVector();\n  auto& h_cols = columns->HostVector();\n  SimpleLCG lcg{lcg_};\n\n  xgboost::SimpleRealUniformDistribution<bst_float> dist(lower_, upper_);\n  float sparsity = sparsity_ * (upper_ - lower_) + lower_;\n  SimpleRealUniformDistribution<bst_float> cat(0.0, max_cat_);\n\n  h_rptr.emplace_back(0);\n  for (size_t i = 0; i < rows_; ++i) {\n    size_t rptr = h_rptr.back();\n    for (size_t j = 0; j < cols_; ++j) {\n      auto g = dist(&lcg);\n      if (g >= sparsity) {\n        if (common::IsCat(ft_, j)) {\n          g = common::AsCat(cat(&lcg));\n        } else {\n          g = dist(&lcg);\n        }\n        h_value.emplace_back(g);\n        rptr++;\n        h_cols.emplace_back(j);\n      }\n    }\n    h_rptr.emplace_back(rptr);\n  }\n\n  if (device_.IsCUDA()) {\n    value->SetDevice(device_);\n    value->DeviceSpan();\n    row_ptr->SetDevice(device_);\n    row_ptr->DeviceSpan();\n    columns->SetDevice(device_);\n    columns->DeviceSpan();\n  }\n\n  CHECK_LE(h_value.size(), rows_ * cols_);\n  CHECK_EQ(value->Size(), h_rptr.back());\n  CHECK_EQ(columns->Size(), value->Size());\n}\n\nnamespace {\nvoid MakeLabels(DeviceOrd device, bst_idx_t n_samples, bst_target_t n_classes,\n                bst_target_t n_targets, std::shared_ptr<DMatrix> out) {\n  RandomDataGenerator gen{n_samples, n_targets, 0.0f};\n  if (n_classes != 0) {\n    gen.Lower(0).Upper(n_classes).GenerateDense(out->Info().labels.Data());\n    out->Info().labels.Reshape(n_samples, n_targets);\n    auto& h_labels = out->Info().labels.Data()->HostVector();\n    for (auto& v : h_labels) {\n      v = static_cast<float>(static_cast<uint32_t>(v));\n    }\n  } else {\n    gen.GenerateDense(out->Info().labels.Data());\n    CHECK_EQ(out->Info().labels.Size(), n_samples * n_targets);\n    out->Info().labels.Reshape(n_samples, n_targets);\n  }\n  if (device.IsCUDA()) {\n    out->Info().labels.Data()->SetDevice(device);\n    out->Info().labels.Data()->ConstDevicePointer();\n    out->Info().feature_types.SetDevice(device);\n    out->Info().feature_types.ConstDevicePointer();\n  }\n}\n\n[[nodiscard]] bool DecompAllowFallback() {\n#if defined(XGBOOST_USE_NVCOMP)\n  bool allow_decomp_fallback = true;\n#else\n  bool allow_decomp_fallback = false;\n#endif\n  return allow_decomp_fallback;\n}\n}  // namespace\n\n[[nodiscard]] std::shared_ptr<DMatrix> RandomDataGenerator::GenerateDMatrix(\n    bool with_label, DataSplitMode data_split_mode) const {\n  HostDeviceVector<float> data;\n  HostDeviceVector<std::size_t> rptrs;\n  HostDeviceVector<bst_feature_t> columns;\n  this->GenerateCSR(&data, &rptrs, &columns);\n  // Initialize on CPU.\n  data.HostVector();\n  rptrs.HostVector();\n  columns.HostVector();\n  auto adapter =\n      data::CSRArrayAdapter{Json::Dump(GetArrayInterface(&rptrs, rptrs.Size(), 1)),\n                            Json::Dump(GetArrayInterface(&columns, columns.Size(), 1)),\n                            Json::Dump(GetArrayInterface(&data, data.Size(), 1)), this->cols_};\n\n  std::shared_ptr<DMatrix> out{\n      DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1, \"\", data_split_mode)};\n\n  if (with_label) {\n    MakeLabels(this->device_, this->rows_, this->n_classes_, this->n_targets_, out);\n  }\n  if (device_.IsCUDA()) {\n    out->Info().labels.SetDevice(device_);\n    out->Info().feature_types.SetDevice(device_);\n    for (auto const& page : out->GetBatches<SparsePage>()) {\n      page.data.SetDevice(device_);\n      page.offset.SetDevice(device_);\n      // pull to device\n      page.data.ConstDeviceSpan();\n      page.offset.ConstDeviceSpan();\n    }\n  }\n  if (!ft_.empty()) {\n    out->Info().feature_types.HostVector() = ft_;\n  }\n  return out;\n}\n\n[[nodiscard]] std::shared_ptr<DMatrix> RandomDataGenerator::GenerateSparsePageDMatrix(\n    std::string prefix, bool with_label) const {\n  CHECK_GE(this->rows_, this->n_batches_);\n  CHECK_GE(this->n_batches_, 1)\n      << \"Must set the n_batches before generating an external memory DMatrix.\";\n  std::unique_ptr<ArrayIterForTest> iter;\n  if (device_.IsCPU()) {\n    iter = std::make_unique<NumpyArrayIterForTest>(this->sparsity_, rows_, cols_, n_batches_);\n  } else {\n#if defined(XGBOOST_USE_CUDA)\n    iter = std::make_unique<CudaArrayIterForTest>(this->sparsity_, rows_, cols_, n_batches_);\n#else\n    CHECK(iter);\n#endif  // defined(XGBOOST_USE_CUDA)\n  }\n\n  auto config =\n      ExtMemConfig{\n          prefix,\n          this->on_host_,\n          this->cache_host_ratio_,\n          this->min_cache_page_bytes_,\n          std::numeric_limits<float>::quiet_NaN(),\n          Context{}.Threads(),\n      }\n          .SetParamsForTest(this->hw_decomp_ratio_, DecompAllowFallback());\n  std::shared_ptr<DMatrix> p_fmat{\n      DMatrix::Create(static_cast<DataIterHandle>(iter.get()), iter->Proxy(), Reset, Next, config)};\n\n  auto row_page_path = data::MakeId(data::MakeCachePrefix(prefix),\n                                    dynamic_cast<data::SparsePageDMatrix*>(p_fmat.get())) +\n                       \".row.page\";\n  EXPECT_TRUE(FileExists(row_page_path)) << row_page_path << \" prefix:\" << prefix;\n\n  // Loop over the batches and count the number of pages\n  std::size_t batch_count = 0;\n  bst_idx_t row_count = 0;\n  for (const auto& batch : p_fmat->GetBatches<xgboost::SparsePage>()) {\n    batch_count++;\n    row_count += batch.Size();\n    CHECK_NE(batch.data.Size(), 0);\n  }\n\n  EXPECT_EQ(batch_count, n_batches_);\n  EXPECT_EQ(p_fmat->NumBatches(), n_batches_);\n  EXPECT_EQ(row_count, p_fmat->Info().num_row_);\n\n  if (with_label) {\n    MakeLabels(this->device_, this->rows_, this->n_classes_, this->n_targets_, p_fmat);\n  }\n  return p_fmat;\n}\n\n[[nodiscard]] std::shared_ptr<DMatrix> RandomDataGenerator::GenerateExtMemQuantileDMatrix(\n    std::string prefix, bool with_label) const {\n  CHECK_GE(this->rows_, this->n_batches_);\n  CHECK_GE(this->n_batches_, 1)\n      << \"Must set the n_batches before generating an external memory DMatrix.\";\n  // The iterator should be freed after construction of the DMatrix.\n  std::unique_ptr<ArrayIterForTest> iter;\n  if (device_.IsCPU()) {\n    iter = std::make_unique<NumpyArrayIterForTest>(this->sparsity_, rows_, cols_, n_batches_);\n  } else {\n#if defined(XGBOOST_USE_CUDA)\n    iter = std::make_unique<CudaArrayIterForTest>(this->sparsity_, rows_, cols_, n_batches_);\n#endif  // defined(XGBOOST_USE_CUDA)\n  }\n  CHECK(iter);\n\n  auto config =\n      ExtMemConfig{\n          prefix,\n          this->on_host_,\n          this->cache_host_ratio_,\n          this->min_cache_page_bytes_,\n          std::numeric_limits<float>::quiet_NaN(),\n          Context{}.Threads(),\n      }\n          .SetParamsForTest(this->hw_decomp_ratio_, DecompAllowFallback());\n\n  std::shared_ptr<DMatrix> p_fmat{DMatrix::Create(static_cast<DataIterHandle>(iter.get()),\n                                                  iter->Proxy(), this->ref_, Reset, Next,\n                                                  this->bins_, config)};\n\n  auto page_path = data::MakeId(prefix, p_fmat.get());\n  page_path += device_.IsCPU() ? \".gradient_index.page\" : \".ellpack.page\";\n  if (!this->on_host_) {\n    EXPECT_TRUE(FileExists(page_path)) << page_path;\n  }\n\n  if (with_label) {\n    MakeLabels(this->device_, this->rows_, this->n_classes_, this->n_targets_, p_fmat);\n  }\n  return p_fmat;\n}\n\nstd::shared_ptr<DMatrix> RandomDataGenerator::GenerateQuantileDMatrix(bool with_label) {\n  std::shared_ptr<data::IterativeDMatrix> p_fmat;\n\n  if (this->device_.IsCPU()) {\n    NumpyArrayIterForTest iter{this->sparsity_, this->rows_, this->cols_, 1};\n    p_fmat =\n        std::make_shared<data::IterativeDMatrix>(&iter, iter.Proxy(), nullptr, Reset, Next,\n                                                 std::numeric_limits<float>::quiet_NaN(), 0, bins_);\n  } else {\n    CudaArrayIterForTest iter{this->sparsity_, this->rows_, this->cols_, 1};\n    p_fmat =\n        std::make_shared<data::IterativeDMatrix>(&iter, iter.Proxy(), nullptr, Reset, Next,\n                                                 std::numeric_limits<float>::quiet_NaN(), 0, bins_);\n  }\n\n  if (with_label) {\n    this->GenerateLabels(p_fmat);\n  }\n  return p_fmat;\n}\n\n#if !defined(XGBOOST_USE_CUDA)\nCudaArrayIterForTest::CudaArrayIterForTest(float sparsity, size_t rows, size_t cols, size_t batches)\n    : ArrayIterForTest{sparsity, rows, cols, batches} {\n  common::AssertGPUSupport();\n}\n\nint CudaArrayIterForTest::Next() {\n  common::AssertGPUSupport();\n  return 0;\n}\n#endif  // !defined(XGBOOST_USE_CUDA)\n\nNumpyArrayIterForTest::NumpyArrayIterForTest(float sparsity, bst_idx_t rows, size_t cols,\n                                             size_t batches)\n    : ArrayIterForTest{sparsity, rows, cols, batches} {\n  rng_->Device(DeviceOrd::CPU());\n  std::tie(batches_, interface_) = rng_->GenerateArrayInterfaceBatch(&data_, n_batches_);\n  this->Reset();\n}\n\nint NumpyArrayIterForTest::Next() {\n  if (iter_ == n_batches_) {\n    return 0;\n  }\n  XGProxyDMatrixSetDataDense(proxy_, batches_[iter_].c_str());\n  iter_++;\n  return 1;\n}\n\n[[nodiscard]] std::vector<float> GenerateRandomCategoricalSingleColumn(std::size_t n,\n                                                                       std::size_t n_categories) {\n  std::vector<float> x(n);\n  std::mt19937 rng(0);\n  std::uniform_int_distribution<size_t> dist(0, n_categories - 1);\n  std::generate(x.begin(), x.end(), [&]() { return static_cast<float>(dist(rng)); });\n  // Make sure each category is present\n  for (size_t i = 0; i < n_categories; i++) {\n    x[i] = static_cast<decltype(x)::value_type>(i);\n  }\n  return x;\n}\n\nstd::shared_ptr<DMatrix> GetDMatrixFromData(const std::vector<float>& x, std::size_t num_rows,\n                                            bst_feature_t num_columns) {\n  data::DenseAdapter adapter(x.data(), num_rows, num_columns);\n  auto p_fmat = std::shared_ptr<DMatrix>(\n      new data::SimpleDMatrix(&adapter, std::numeric_limits<float>::quiet_NaN(), 1));\n  CHECK_EQ(p_fmat->Info().num_row_, num_rows);\n  CHECK_EQ(p_fmat->Info().num_col_, num_columns);\n  return p_fmat;\n}\n\n[[nodiscard]] std::shared_ptr<DMatrix> GetExternalMemoryDMatrixFromData(\n    HostDeviceVector<float> const& x, bst_idx_t n_samples, bst_feature_t n_features,\n    const common::TemporaryDirectory& tempdir, bst_idx_t n_batches) {\n  Context ctx;\n  auto iter = NumpyArrayIterForTest{&ctx, x, n_samples / n_batches, n_features, n_batches};\n\n  auto prefix = tempdir.Path() / \"temp\";\n  auto config = ExtMemConfig{\n      prefix.string(),\n      false,\n      ::xgboost::cuda_impl::AutoHostRatio(),\n      ::xgboost::cuda_impl::AutoCachePageBytes(),\n      std::numeric_limits<float>::quiet_NaN(),\n      Context{}.Threads(),\n  };\n  std::shared_ptr<DMatrix> p_fmat{\n      DMatrix::Create(static_cast<DataIterHandle>(&iter), iter.Proxy(), Reset, Next, config)};\n  return p_fmat;\n}\n\nstd::unique_ptr<GradientBooster> CreateTrainedGBM(std::string name, Args kwargs, size_t kRows,\n                                                  size_t kCols,\n                                                  LearnerModelParam const* learner_model_param,\n                                                  Context const* ctx) {\n  auto caches = std::make_shared<PredictionContainer>();\n  std::unique_ptr<GradientBooster> gbm{GradientBooster::Create(name, ctx, learner_model_param)};\n  gbm->Configure(kwargs);\n  auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();\n\n  std::vector<float> labels(kRows);\n  for (size_t i = 0; i < kRows; ++i) {\n    labels[i] = i;\n  }\n  p_dmat->Info().labels =\n      linalg::Tensor<float, 2>{labels.cbegin(), labels.cend(), {labels.size()}, DeviceOrd::CPU()};\n  GradientContainer gpair;\n  gpair.gpair = linalg::Matrix<GradientPair>{{kRows}, ctx->Device()};\n  auto h_gpair = gpair.gpair.HostView();\n  for (size_t i = 0; i < kRows; ++i) {\n    h_gpair(i) = GradientPair{static_cast<float>(i), 1};\n  }\n\n  PredictionCacheEntry predts;\n\n  gbm->DoBoost(p_dmat.get(), &gpair, &predts, nullptr);\n\n  return gbm;\n}\n\nArrayIterForTest::ArrayIterForTest(float sparsity, bst_idx_t rows, size_t cols, size_t batches)\n    : rows_{rows}, cols_{cols}, n_batches_{batches} {\n  XGProxyDMatrixCreate(&proxy_);\n  rng_ = std::make_unique<RandomDataGenerator>(rows_, cols_, sparsity);\n  std::tie(batches_, interface_) = rng_->GenerateArrayInterfaceBatch(&data_, n_batches_);\n}\n\nArrayIterForTest::ArrayIterForTest(Context const* ctx, HostDeviceVector<float> const& data,\n                                   std::size_t n_samples, bst_feature_t n_features,\n                                   std::size_t n_batches)\n    : rows_{n_samples}, cols_{n_features}, n_batches_{n_batches} {\n  XGProxyDMatrixCreate(&proxy_);\n  this->data_.Resize(data.Size());\n  CHECK_EQ(this->data_.Size(), rows_ * cols_ * n_batches);\n  this->data_.Copy(data);\n  std::tie(batches_, interface_) =\n      MakeArrayInterfaceBatch(&data_, rows_ * n_batches_, cols_, n_batches_, ctx->Device());\n}\n\nArrayIterForTest::~ArrayIterForTest() { XGDMatrixFree(proxy_); }\n\nvoid DMatrixToCSR(DMatrix* dmat, std::vector<float>* p_data, std::vector<size_t>* p_row_ptr,\n                  std::vector<bst_feature_t>* p_cids) {\n  auto& data = *p_data;\n  auto& row_ptr = *p_row_ptr;\n  auto& cids = *p_cids;\n\n  data.resize(dmat->Info().num_nonzero_);\n  cids.resize(data.size());\n  row_ptr.resize(dmat->Info().num_row_ + 1);\n  SparsePage page;\n  for (const auto& batch : dmat->GetBatches<SparsePage>()) {\n    page.Push(batch);\n  }\n\n  auto const& in_offset = page.offset.HostVector();\n  auto const& in_data = page.data.HostVector();\n\n  CHECK_EQ(in_offset.size(), row_ptr.size());\n  std::copy(in_offset.cbegin(), in_offset.cend(), row_ptr.begin());\n  ASSERT_EQ(in_data.size(), data.size());\n  std::transform(in_data.cbegin(), in_data.cend(), data.begin(),\n                 [](Entry const& e) { return e.fvalue; });\n  ASSERT_EQ(in_data.size(), cids.size());\n  std::transform(in_data.cbegin(), in_data.cend(), cids.begin(),\n                 [](Entry const& e) { return e.index; });\n}\n\n#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1\n\nusing CUDAMemoryResource = rmm::mr::cuda_memory_resource;\nusing PoolMemoryResource = rmm::mr::pool_memory_resource<CUDAMemoryResource>;\nclass RMMAllocator {\n public:\n  std::vector<std::unique_ptr<CUDAMemoryResource>> cuda_mr;\n  std::vector<std::unique_ptr<PoolMemoryResource>> pool_mr;\n  int n_gpu;\n  RMMAllocator() : n_gpu(curt::AllVisibleGPUs()) {\n    int current_device;\n    CHECK_EQ(cudaGetDevice(&current_device), cudaSuccess);\n    for (int i = 0; i < n_gpu; ++i) {\n      CHECK_EQ(cudaSetDevice(i), cudaSuccess);\n      cuda_mr.push_back(std::make_unique<CUDAMemoryResource>());\n      pool_mr.push_back(std::make_unique<PoolMemoryResource>(cuda_mr[i].get(), 0ul));\n    }\n    CHECK_EQ(cudaSetDevice(current_device), cudaSuccess);\n  }\n  ~RMMAllocator() = default;\n};\n\nvoid DeleteRMMResource(RMMAllocator* r) { delete r; }\n\nRMMAllocatorPtr SetUpRMMResourceForCppTests(int argc, char** argv) {\n  bool use_rmm_pool = false;\n  for (int i = 1; i < argc; ++i) {\n    if (argv[i] == std::string(\"--use-rmm-pool\")) {\n      use_rmm_pool = true;\n    }\n  }\n  if (!use_rmm_pool) {\n    return {nullptr, DeleteRMMResource};\n  }\n  LOG(INFO) << \"Using RMM memory pool\";\n  auto ptr = RMMAllocatorPtr(new RMMAllocator(), DeleteRMMResource);\n  for (int i = 0; i < ptr->n_gpu; ++i) {\n    rmm::mr::set_per_device_resource(rmm::cuda_device_id(i), ptr->pool_mr[i].get());\n  }\n  GlobalConfigThreadLocalStore::Get()->UpdateAllowUnknown(Args{{\"use_rmm\", \"true\"}});\n  return ptr;\n}\n#else   // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1\nclass RMMAllocator {};\n\nvoid DeleteRMMResource(RMMAllocator*) {}\n\nRMMAllocatorPtr SetUpRMMResourceForCppTests(int, char**) { return {nullptr, DeleteRMMResource}; }\n#endif  // !defined(XGBOOST_USE_RMM) || XGBOOST_USE_RMM != 1\n\nstd::int32_t DistGpuIdx() { return curt::AllVisibleGPUs() == 1 ? 0 : collective::GetRank(); }\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/helpers.cu",
    "content": "/**\n * Copyright 2020-2024, XGBoost contributors\n */\n#include <xgboost/c_api.h>\n\n#include \"helpers.h\"\n\nnamespace xgboost {\nCudaArrayIterForTest::CudaArrayIterForTest(float sparsity, size_t rows,\n                                           size_t cols, size_t batches)\n    : ArrayIterForTest{sparsity, rows, cols, batches} {\n  rng_->Device(FstCU());\n  std::tie(batches_, interface_) = rng_->GenerateArrayInterfaceBatch(&data_, n_batches_);\n  this->Reset();\n}\n\nint CudaArrayIterForTest::Next() {\n  if (iter_ == n_batches_) {\n    return 0;\n  }\n  XGProxyDMatrixSetDataCudaArrayInterface(proxy_, batches_[iter_].c_str());\n  iter_++;\n  return 1;\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/helpers.h",
    "content": "/**\n * Copyright 2016-2025, XGBoost contributors\n */\n#pragma once\n\n#include <gmock/gmock.h>\n#include <gtest/gtest.h>\n#include <sys/stat.h>\n#include <sys/types.h>\n#include <xgboost/base.h>\n#include <xgboost/context.h>\n#include <xgboost/json.h>\n#include <xgboost/learner.h>  // for LearnerModelParam\n#include <xgboost/model.h>    // for Configurable\n\n#include <cstdint>  // std::int32_t\n#include <cstdio>\n#include <memory>\n#include <string>\n#include <vector>\n\n\n#if defined(__CUDACC__)\n#include \"../../src/collective/communicator-inl.h\"  // for GetRank\n#include \"../../src/common/cuda_rt_utils.h\"         // for AllVisibleGPUs\n#endif  // defined(__CUDACC__)\n\n#include \"filesystem.h\"  // for TemporaryDirectory\n#include \"xgboost/linalg.h\"\n\n#if defined(__CUDACC__)\n#define DeclareUnifiedTest(name) GPU ## name\n#else\n#define DeclareUnifiedTest(name) name\n#endif\n\n#if defined(__CUDACC__)\n#define GPUIDX (curt::AllVisibleGPUs() == 1 ? 0 : collective::GetRank())\n#else\n#define GPUIDX (-1)\n#endif\n\n#if defined(__CUDACC__)\n#define DeclareUnifiedDistributedTest(name) MGPU ## name\n#else\n#define DeclareUnifiedDistributedTest(name) name\n#endif\n\nnamespace xgboost {\nclass ObjFunction;\nclass Metric;\nstruct LearnerModelParam;\nclass GradientBooster;\n}\n\ntemplate <typename Float>\nFloat RelError(Float l, Float r) {\n  static_assert(std::is_floating_point_v<Float>);\n  return std::abs(1.0f - l / r);\n}\n\ntemplate <typename T, typename V = std::remove_cv_t<T>>\nvoid AssertVecEq(std::vector<T> h_vec, std::vector<V> const& exp, float atol = 1e-5) {\n  ASSERT_EQ(h_vec.size(), exp.size());\n  for (std::size_t i = 0; i < h_vec.size(); ++i) {\n    if constexpr (std::is_floating_point_v<V>) {\n      ASSERT_NEAR(h_vec[i], exp[i], atol) << \"i:\" << i;\n    } else {\n      ASSERT_EQ(h_vec[i], exp[i]);\n    }\n  }\n}\n\nbool FileExists(const std::string& filename);\n\nvoid CreateSimpleTestData(const std::string& filename);\n\n// Create a libsvm format file with 3 entries per-row. `zero_based` specifies whether it's\n// 0-based indexing.\nvoid CreateBigTestData(const std::string& filename, size_t n_entries, bool zero_based = true);\n\nvoid CreateTestCSV(std::string const& path, size_t rows, size_t cols);\n\nvoid CheckObjFunction(std::unique_ptr<xgboost::ObjFunction> const& obj,\n                      std::vector<xgboost::bst_float> preds,\n                      std::vector<xgboost::bst_float> labels,\n                      std::vector<xgboost::bst_float> weights,\n                      std::vector<xgboost::bst_float> out_grad,\n                      std::vector<xgboost::bst_float> out_hess);\n\nxgboost::Json CheckConfigReloadImpl(xgboost::Configurable* const configurable,\n                                    std::string name);\n\ntemplate <typename T>\nxgboost::Json CheckConfigReload(std::unique_ptr<T> const& configurable,\n                                std::string name = \"\") {\n  return CheckConfigReloadImpl(dynamic_cast<xgboost::Configurable*>(configurable.get()),\n                               name);\n}\n\nvoid CheckRankingObjFunction(std::unique_ptr<xgboost::ObjFunction> const& obj,\n                             std::vector<xgboost::bst_float> preds,\n                             std::vector<xgboost::bst_float> labels,\n                             std::vector<xgboost::bst_float> weights,\n                             std::vector<xgboost::bst_uint> groups,\n                             std::vector<xgboost::bst_float> out_grad,\n                             std::vector<xgboost::bst_float> out_hess);\n\nxgboost::bst_float GetMetricEval(\n  xgboost::Metric * metric,\n  xgboost::HostDeviceVector<xgboost::bst_float> const& preds,\n  std::vector<xgboost::bst_float> labels,\n  std::vector<xgboost::bst_float> weights = std::vector<xgboost::bst_float>(),\n  std::vector<xgboost::bst_uint> groups = std::vector<xgboost::bst_uint>(),\n  xgboost::DataSplitMode data_split_Mode = xgboost::DataSplitMode::kRow);\n\ndouble GetMultiMetricEval(xgboost::Metric* metric,\n                          xgboost::HostDeviceVector<xgboost::bst_float> const& preds,\n                          xgboost::linalg::Tensor<float, 2> const& labels,\n                          std::vector<xgboost::bst_float> weights = {},\n                          std::vector<xgboost::bst_uint> groups = {},\n                          xgboost::DataSplitMode data_split_Mode = xgboost::DataSplitMode::kRow);\n\nnamespace xgboost {\n[[nodiscard]] std::vector<float> GetBaseScore(Json const& config);\n\n/*!\n * \\brief Linear congruential generator.\n *\n * The distribution defined in std is not portable. Given the same seed, it\n * migth produce different outputs on different platforms or with different\n * compilers.  The SimpleLCG implemented here is to make sure all tests are\n * reproducible.\n */\nclass SimpleLCG {\n private:\n  using StateType = uint64_t;\n  static StateType constexpr kDefaultInit = 3;\n  static StateType constexpr kDefaultAlpha = 61;\n  static StateType constexpr kMaxValue = (static_cast<StateType>(1) << 32) - 1;\n\n  StateType state_;\n  StateType const alpha_;\n  StateType const mod_;\n\n public:\n  using result_type = StateType;  // NOLINT\n\n public:\n  SimpleLCG() : state_{kDefaultInit}, alpha_{kDefaultAlpha}, mod_{kMaxValue} {}\n  SimpleLCG(SimpleLCG const& that) = default;\n  SimpleLCG(SimpleLCG&& that) = default;\n\n  void Seed(StateType seed) { state_ = seed % mod_; }\n  /*!\n   * \\brief Initialize SimpleLCG.\n   *\n   * \\param state  Initial state, can also be considered as seed. If set to\n   *               zero, SimpleLCG will use internal default value.\n   */\n  explicit SimpleLCG(StateType state)\n      : state_{state == 0 ? kDefaultInit : state}, alpha_{kDefaultAlpha}, mod_{kMaxValue} {}\n\n  StateType operator()();\n  StateType Min() const;\n  StateType Max() const;\n\n  constexpr result_type static min() { return 0; };         // NOLINT\n  constexpr result_type static max() { return kMaxValue; }  // NOLINT\n};\n\ntemplate <typename ResultT>\nclass SimpleRealUniformDistribution {\n private:\n  ResultT const lower_;\n  ResultT const upper_;\n\n  /*! \\brief Over-simplified version of std::generate_canonical. */\n  template <size_t Bits, typename GeneratorT>\n  ResultT GenerateCanonical(GeneratorT* rng) const {\n    static_assert(std::is_floating_point_v<ResultT>, \"Result type must be floating point.\");\n    long double const r = (static_cast<long double>(rng->Max())\n                           - static_cast<long double>(rng->Min())) + 1.0L;\n    auto const log2r = static_cast<size_t>(std::log(r) / std::log(2.0L));\n    size_t m = std::max<size_t>(1UL, (Bits + log2r - 1UL) / log2r);\n    ResultT sum_value = 0, r_k = 1;\n\n    for (size_t k = m; k != 0; --k) {\n      sum_value += static_cast<ResultT>((*rng)() - rng->Min()) * r_k;\n      r_k *= static_cast<ResultT>(r);\n    }\n\n    ResultT res = sum_value / r_k;\n    return res;\n  }\n\n public:\n  SimpleRealUniformDistribution(ResultT l, ResultT u) :\n      lower_{l}, upper_{u} {}\n\n  template <typename GeneratorT>\n  ResultT operator()(GeneratorT* rng) const {\n    ResultT tmp = GenerateCanonical<std::numeric_limits<ResultT>::digits,\n                                    GeneratorT>(rng);\n    auto ret = (tmp * (upper_ - lower_)) + lower_;\n    // Correct floating point error.\n    return std::max(ret, lower_);\n  }\n};\n\ntemplate <typename T>\nJson GetArrayInterface(HostDeviceVector<T> const* storage, size_t rows, size_t cols) {\n  Json array_interface{Object()};\n  array_interface[\"data\"] = std::vector<Json>(2);\n  if (storage->DeviceCanRead()) {\n    array_interface[\"data\"][0] = Integer{reinterpret_cast<int64_t>(storage->ConstDevicePointer())};\n    array_interface[\"stream\"] = nullptr;\n  } else {\n    array_interface[\"data\"][0] = Integer{reinterpret_cast<int64_t>(storage->ConstHostPointer())};\n  }\n  array_interface[\"data\"][1] = Boolean(false);\n\n  array_interface[\"shape\"] = std::vector<Json>(2);\n  array_interface[\"shape\"][0] = rows;\n  array_interface[\"shape\"][1] = cols;\n\n  char t = linalg::detail::ArrayInterfaceHandler::TypeChar<T>();\n  array_interface[\"typestr\"] = String(std::string{\"<\"} + t + std::to_string(sizeof(T)));\n  array_interface[\"version\"] = 3;\n  return array_interface;\n}\n\n// Generate in-memory random data without using DMatrix.\nclass RandomDataGenerator {\n  bst_idx_t rows_;\n  size_t cols_;\n  float sparsity_;\n\n  float lower_{0.0f};\n  float upper_{1.0f};\n\n  bst_target_t n_targets_{1};\n  bst_target_t n_classes_{0};\n\n  DeviceOrd device_{DeviceOrd::CPU()};\n  std::size_t n_batches_{0};\n  std::uint64_t seed_{0};\n  SimpleLCG lcg_;\n\n  bst_bin_t bins_{0};\n  std::vector<FeatureType> ft_;\n  bst_cat_t max_cat_{32};\n  bool on_host_{false};\n  std::shared_ptr<DMatrix> ref_{nullptr};\n  std::int64_t min_cache_page_bytes_{0};\n  float cache_host_ratio_;\n  float hw_decomp_ratio_{true};\n\n  Json ArrayInterfaceImpl(HostDeviceVector<float>* storage, size_t rows, size_t cols) const;\n\n  void GenerateLabels(std::shared_ptr<DMatrix> p_fmat) const;\n\n public:\n  RandomDataGenerator(bst_idx_t rows, std::size_t cols, float sparsity);\n\n  RandomDataGenerator& Lower(float v) {\n    lower_ = v;\n    return *this;\n  }\n  RandomDataGenerator& Upper(float v) {\n    upper_ = v;\n    return *this;\n  }\n  RandomDataGenerator& Device(DeviceOrd d) {\n    device_ = d;\n    return *this;\n  }\n  RandomDataGenerator& Batches(std::size_t n_batches) {\n    n_batches_ = n_batches;\n    return *this;\n  }\n  RandomDataGenerator& OnHost(bool on_host) {\n    on_host_ = on_host;\n    return *this;\n  }\n  RandomDataGenerator& Ref(std::shared_ptr<DMatrix> ref) {\n    this->ref_ = std::move(ref);\n    return *this;\n  }\n  RandomDataGenerator& MinPageCacheBytes(std::int64_t min_cache_page_bytes) {\n    this->min_cache_page_bytes_ = min_cache_page_bytes;\n    return *this;\n  }\n  [[nodiscard]] RandomDataGenerator& CacheHostRatio(float cache_host_ratio) {\n    this->cache_host_ratio_ = cache_host_ratio;\n    return *this;\n  }\n  [[nodiscard]] RandomDataGenerator& HwDecompRatio(float hw_decomp_ratio) {\n    this->hw_decomp_ratio_ = hw_decomp_ratio;\n    return *this;\n  }\n  RandomDataGenerator& Seed(uint64_t s) {\n    seed_ = s;\n    lcg_.Seed(seed_);\n    return *this;\n  }\n  RandomDataGenerator& Bins(bst_bin_t b) {\n    bins_ = b;\n    return *this;\n  }\n  RandomDataGenerator& Type(common::Span<FeatureType> ft) {\n    CHECK_EQ(ft.size(), cols_);\n    ft_.resize(ft.size());\n    std::copy(ft.cbegin(), ft.cend(), ft_.begin());\n    return *this;\n  }\n  RandomDataGenerator& MaxCategory(bst_cat_t cat) {\n    max_cat_ = cat;\n    return *this;\n  }\n  RandomDataGenerator& Targets(bst_target_t n_targets) {\n    n_targets_ = n_targets;\n    return *this;\n  }\n  RandomDataGenerator& Classes(bst_target_t n_classes) {\n    n_classes_ = n_classes;\n    return *this;\n  }\n\n  void GenerateDense(HostDeviceVector<float>* out) const;\n\n  std::string GenerateArrayInterface(HostDeviceVector<float>* storage) const;\n\n  /*!\n   * \\brief Generate batches of array interface stored in consecutive memory.\n   *\n   * \\param storage The consecutive momory used to store the arrays.\n   * \\param batches Number of batches.\n   *\n   * \\return A vector storing JSON string representation of interface for each batch, and\n   *         a single JSON string representing the consecutive memory as a whole\n   *         (combining all the batches).\n   */\n  std::pair<std::vector<std::string>, std::string> GenerateArrayInterfaceBatch(\n      HostDeviceVector<float>* storage, size_t batches) const;\n\n  std::string GenerateColumnarArrayInterface(std::vector<HostDeviceVector<float>>* data) const;\n\n  void GenerateCSR(HostDeviceVector<float>* value, HostDeviceVector<std::size_t>* row_ptr,\n                   HostDeviceVector<bst_feature_t>* columns) const;\n\n  [[nodiscard]] std::shared_ptr<DMatrix> GenerateDMatrix(\n      bool with_label = false, DataSplitMode data_split_mode = DataSplitMode::kRow) const;\n\n  [[nodiscard]] std::shared_ptr<DMatrix> GenerateSparsePageDMatrix(std::string prefix,\n                                                                   bool with_label) const;\n\n  [[nodiscard]] std::shared_ptr<DMatrix> GenerateExtMemQuantileDMatrix(std::string prefix,\n                                                                       bool with_label) const;\n\n  std::shared_ptr<DMatrix> GenerateQuantileDMatrix(bool with_label);\n};\n\n// Generate an empty DMatrix, mostly for its meta info.\ninline std::shared_ptr<DMatrix> EmptyDMatrix() {\n  return RandomDataGenerator{0, 0, 0.0}.GenerateDMatrix();\n}\n\n[[nodiscard]] std::vector<float> GenerateRandomCategoricalSingleColumn(std::size_t n,\n                                                                       std::size_t n_categories);\n\nstd::shared_ptr<DMatrix> GetDMatrixFromData(const std::vector<float>& x, std::size_t num_rows,\n                                            bst_feature_t num_columns);\n\n[[nodiscard]] std::shared_ptr<DMatrix> GetExternalMemoryDMatrixFromData(\n    HostDeviceVector<float> const& x, bst_idx_t n_samples, bst_feature_t n_features,\n    const common::TemporaryDirectory& tempdir, bst_idx_t n_batches = 4);\n\nstd::unique_ptr<GradientBooster> CreateTrainedGBM(std::string name, Args kwargs, size_t kRows,\n                                                  size_t kCols,\n                                                  LearnerModelParam const* learner_model_param,\n                                                  Context const* generic_param);\n\n/**\n * \\brief Make a context that uses CUDA if device >= 0.\n */\ninline Context MakeCUDACtx(std::int32_t device) {\n  if (device == DeviceOrd::CPUOrdinal()) {\n    return Context{};\n  }\n  return Context{}.MakeCUDA(device);\n}\n\ninline HostDeviceVector<GradientPair> GenerateRandomGradients(const size_t n_rows,\n                                                              float lower = 0.0f,\n                                                              float upper = 1.0f) {\n  xgboost::SimpleLCG gen;\n  xgboost::SimpleRealUniformDistribution<bst_float> dist(lower, upper);\n  std::vector<GradientPair> h_gpair(n_rows);\n  for (auto& gpair : h_gpair) {\n    bst_float grad = dist(&gen);\n    bst_float hess = dist(&gen);\n    gpair = GradientPair(grad, hess);\n  }\n  HostDeviceVector<GradientPair> gpair(h_gpair);\n  return gpair;\n}\n\ninline auto GenerateRandomGradients(Context const* ctx, bst_idx_t n_rows, bst_target_t n_targets,\n                                    float lower = 0.0f, float upper = 1.0f) {\n  auto g = GenerateRandomGradients(n_rows * n_targets, lower, upper);\n  GradientContainer gpair;\n  gpair.gpair = linalg::Matrix<GradientPair>{{n_rows, static_cast<bst_idx_t>(n_targets)}, ctx->Device()};\n  gpair.gpair.Data()->Copy(g);\n  return gpair;\n}\n\ntypedef void *DMatrixHandle;  // NOLINT(*);\n\nclass ArrayIterForTest {\n protected:\n  HostDeviceVector<float> data_;\n  size_t iter_{0};\n  DMatrixHandle proxy_;\n  std::unique_ptr<RandomDataGenerator> rng_;\n\n  std::vector<std::string> batches_;\n  std::string interface_;\n  bst_idx_t rows_;\n  size_t cols_;\n  size_t n_batches_;\n\n public:\n  bst_idx_t static constexpr Rows() { return 1024; }\n  size_t static constexpr Batches() { return 100; }\n  size_t static constexpr Cols() { return 13; }\n\n public:\n  [[nodiscard]] std::string AsArray() const { return interface_; }\n\n  virtual int Next() = 0;\n  virtual void Reset() { iter_ = 0; }\n  [[nodiscard]] std::size_t Iter() const { return iter_; }\n  auto Proxy() -> decltype(proxy_) { return proxy_; }\n\n  explicit ArrayIterForTest(float sparsity, bst_idx_t rows, size_t cols, size_t batches);\n  /**\n   * \\brief Create iterator with user provided data.\n   */\n  explicit ArrayIterForTest(Context const* ctx, HostDeviceVector<float> const& data,\n                            std::size_t n_samples, bst_feature_t n_features, std::size_t n_batches);\n  virtual ~ArrayIterForTest();\n};\n\nclass CudaArrayIterForTest : public ArrayIterForTest {\n public:\n  explicit CudaArrayIterForTest(float sparsity, size_t rows = Rows(), size_t cols = Cols(),\n                                size_t batches = Batches());\n  explicit CudaArrayIterForTest(Context const* ctx, HostDeviceVector<float> const& data,\n                                std::size_t n_samples, bst_feature_t n_features,\n                                std::size_t n_batches)\n      : ArrayIterForTest{ctx, data, n_samples, n_features, n_batches} {};\n  int Next() override;\n  ~CudaArrayIterForTest() override = default;\n};\n\nclass NumpyArrayIterForTest : public ArrayIterForTest {\n public:\n  explicit NumpyArrayIterForTest(float sparsity, bst_idx_t rows = Rows(), size_t cols = Cols(),\n                                 size_t batches = Batches());\n  explicit NumpyArrayIterForTest(Context const* ctx, HostDeviceVector<float> const& data,\n                                 std::size_t n_samples, bst_feature_t n_features,\n                                 std::size_t n_batches)\n      : ArrayIterForTest{ctx, data, n_samples, n_features, n_batches} {}\n  int Next() override;\n  ~NumpyArrayIterForTest() override = default;\n};\n\nvoid DMatrixToCSR(DMatrix *dmat, std::vector<float> *p_data,\n                  std::vector<size_t> *p_row_ptr,\n                  std::vector<bst_feature_t> *p_cids);\n\ntypedef void *DataIterHandle;  // NOLINT(*)\n\ninline void Reset(DataIterHandle self) {\n  static_cast<ArrayIterForTest*>(self)->Reset();\n}\n\ninline int Next(DataIterHandle self) {\n  return static_cast<ArrayIterForTest*>(self)->Next();\n}\n\n/**\n * @brief Create an array interface for host vector.\n */\ntemplate <typename T>\nchar const* Make1dInterfaceTest(T const* vec, std::size_t len) {\n  static thread_local std::string str;\n  str = linalg::Make1dInterface(vec, len);\n  return str.c_str();\n}\n\nclass RMMAllocator;\nusing RMMAllocatorPtr = std::unique_ptr<RMMAllocator, void(*)(RMMAllocator*)>;\nRMMAllocatorPtr SetUpRMMResourceForCppTests(int argc, char** argv);\n\n/*\n * \\brief Make learner model param\n */\ninline LearnerModelParam MakeMP(bst_feature_t n_features, float base_score, uint32_t n_groups,\n                                DeviceOrd device = DeviceOrd::CPU()) {\n  size_t shape[1]{1};\n  LearnerModelParam mparam(n_features, linalg::Tensor<float, 1>{{base_score}, shape, device},\n                           n_groups, 1, MultiStrategy::kOneOutputPerTree);\n  return mparam;\n}\n\ninline std::int32_t AllThreadsForTest() { return Context{}.Threads(); }\n\ninline DeviceOrd FstCU() { return DeviceOrd::CUDA(0); }\n\n// GPU device ordinal for distributed tests\nstd::int32_t DistGpuIdx();\n\ninline auto GMockThrow(StringView msg) {\n  return ::testing::ThrowsMessage<dmlc::Error>(::testing::HasSubstr(msg));\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/histogram_helpers.cu",
    "content": "/**\n * Copyright 2025, XGBoost contributors\n */\n#include <xgboost/base.h>  // for bst_feature_t\n#include <xgboost/data.h>  // for FeatureType\n#include <xgboost/span.h>  // for Span\n\n#include <memory>  // for make_unique\n#include <random>  // for uniform_real_distribution\n#include <vector>  // for vector\n\n#include \"../../src/common/device_vector.cuh\"  // for device_vector\n#include \"../../src/common/hist_util.h\"        // for HistogramCuts\n#include \"../../src/data/device_adapter.cuh\"   // for CupyAdapter, GetRowCounts\n#include \"../../src/data/ellpack_page.cuh\"     // for EllpackPageImpl\n#include \"histogram_helpers.h\"\n\nnamespace xgboost {\n[[nodiscard]] std::unique_ptr<EllpackPageImpl> MakeEllpackForTest(Context const* ctx,\n                                                                  bst_idx_t n_samples,\n                                                                  bst_feature_t n_features,\n                                                                  bst_bin_t n_bins_per_feat) {\n  // Construct the histogram bins\n  std::vector<std::uint32_t> cut_indptr(n_features + 1, 0);\n  for (std::size_t i = 1; i < cut_indptr.size(); ++i) {\n    cut_indptr[i] = i * n_bins_per_feat;\n  }\n  std::vector<float> cut_values;\n  for (bst_feature_t f_idx = 0; f_idx < n_features; ++f_idx) {\n    for (bst_bin_t bin_idx = 0; bin_idx < n_bins_per_feat; ++bin_idx) {\n      cut_values.push_back(bin_idx + 1.0f);\n    }\n  }\n  std::default_random_engine rng(2025);\n  auto p_cuts = std::make_shared<common::HistogramCuts>(n_features);\n  p_cuts->cut_ptrs_.HostVector() = cut_indptr;\n  p_cuts->cut_values_.HostVector() = cut_values;\n\n  // Construct the data\n  auto n_values_per_bin = n_samples / n_bins_per_feat;\n\n  linalg::Matrix<float> values{\n      {n_samples, static_cast<bst_idx_t>(n_features)}, DeviceOrd::CPU(), linalg::kF};\n  auto& h_values = values.Data()->HostVector();\n  h_values.clear();\n\n  for (bst_feature_t f_idx = 0; f_idx < n_features; ++f_idx) {\n    for (bst_bin_t bin_idx = 0; bin_idx < n_bins_per_feat; ++bin_idx) {\n      // min-max value for the current bin\n      auto min_value = static_cast<float>(bin_idx + kRtEps);\n      auto max_value = static_cast<float>(bin_idx + 1.0 - 1e-3);\n      std::uniform_real_distribution<float> dist(min_value, max_value);\n      for (std::size_t i = 0; i < n_values_per_bin; ++i) {\n        h_values.emplace_back(dist(rng));\n      }\n      if (bin_idx == n_bins_per_feat - 1) {\n        auto remainder = n_samples % n_bins_per_feat;\n        for (std::size_t i = 0; i < remainder; ++i) {\n          h_values.emplace_back(dist(rng));\n        }\n      }\n    }\n  }\n  CHECK_EQ(h_values.size(), n_samples * n_features);\n\n  auto str = linalg::ArrayInterfaceStr(values.View(ctx->Device()));\n  auto adapter = data::CupyAdapter{StringView{str}};\n  dh::device_vector<bst_idx_t> row_counts(n_samples);\n  auto missing = std::numeric_limits<float>::quiet_NaN();\n  bst_idx_t row_stride =\n      GetRowCounts(ctx, adapter.Value(), dh::ToSpan(row_counts), ctx->Device(), missing);\n  auto ellpack = std::make_unique<EllpackPageImpl>(\n      ctx, adapter.Value(), missing, true, dh::ToSpan(row_counts),\n      common::Span<FeatureType const>{}, row_stride, n_samples, p_cuts);\n\n  LOG(INFO) << \"Ellpack size:\" << common::HumanMemUnit(ellpack->MemCostBytes());\n  return ellpack;\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/histogram_helpers.h",
    "content": "/**\n * Copyright 2020-2025, XGBoost contributors\n */\n#pragma once\n\n#if defined(__CUDACC__)\n#include \"../../src/data/ellpack_page.cuh\"\n#endif\n\n#include <xgboost/data.h>  // for SparsePage\n\n#include \"./helpers.h\"  // for RandomDataGenerator\n\nnamespace xgboost {\n#if defined(__CUDACC__)\nnamespace detail {\nclass HistogramCutsWrapper : public common::HistogramCuts {\n public:\n  using SuperT = common::HistogramCuts;\n  HistogramCutsWrapper() : SuperT{0} {}\n  void SetValues(std::vector<float> cuts) { SuperT::cut_values_.HostVector() = std::move(cuts); }\n  void SetPtrs(std::vector<uint32_t> ptrs) { SuperT::cut_ptrs_.HostVector() = std::move(ptrs); }\n};\n}  // namespace detail\n\ninline std::unique_ptr<EllpackPageImpl> BuildEllpackPage(Context const* ctx, int n_rows, int n_cols,\n                                                         bst_float sparsity = 0) {\n  auto dmat = RandomDataGenerator(n_rows, n_cols, sparsity).Seed(3).GenerateDMatrix();\n  const SparsePage& batch = *dmat->GetBatches<xgboost::SparsePage>().begin();\n\n  auto cmat = std::make_shared<detail::HistogramCutsWrapper>();\n  cmat->SetPtrs({0, 3, 6, 9, 12, 15, 18, 21, 24});\n  // 24 cut fields, 3 cut fields for each feature (column).\n  cmat->SetValues({0.30f, 0.67f, 1.64f, 0.32f, 0.77f, 1.95f, 0.29f, 0.70f,\n                   1.80f, 0.32f, 0.75f, 1.85f, 0.18f, 0.59f, 1.69f, 0.25f,\n                   0.74f, 2.00f, 0.26f, 0.74f, 1.98f, 0.26f, 0.71f, 1.83f});\n\n  bst_idx_t row_stride = 0;\n  const auto& offset_vec = batch.offset.ConstHostVector();\n  for (size_t i = 1; i < offset_vec.size(); ++i) {\n    row_stride = std::max(row_stride, offset_vec[i] - offset_vec[i - 1]);\n  }\n\n  auto page = std::unique_ptr<EllpackPageImpl>(\n      new EllpackPageImpl(ctx, cmat, batch, dmat->IsDense(), row_stride, {}));\n\n  return page;\n}\n\n/**\n * @brief Create an ellpack page with evenly distributed values across histogram bins.\n *\n * @note The last bin contains all the extra values if @ref n_samples is not divisible by\n *       @ref n_bins_per_feat. Otherwise, all bins contain the same number of values.\n */\n[[nodiscard]] std::unique_ptr<EllpackPageImpl> MakeEllpackForTest(Context const* ctx,\n                                                                  bst_idx_t n_samples,\n                                                                  bst_feature_t n_features,\n                                                                  bst_bin_t n_bins_per_feat);\n#endif\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/linear/test_json_io.h",
    "content": "/*!\n * Copyright 2020 XGBoost contributors\n */\n#ifndef XGBOOST_TEST_JSON_IO_H_\n#define XGBOOST_TEST_JSON_IO_H_\n\n#include <xgboost/linear_updater.h>\n#include <xgboost/json.h>\n#include <string>\n#include \"../helpers.h\"\n#include \"../../../src/gbm/gblinear_model.h\"\n\nnamespace xgboost {\ninline void TestUpdaterJsonIO(std::string updater_str) {\n  Context ctx{MakeCUDACtx(GPUIDX)};\n  Json config_0 {Object() };\n\n  {\n    auto updater =\n        std::unique_ptr<xgboost::LinearUpdater>(xgboost::LinearUpdater::Create(updater_str, &ctx));\n    updater->Configure({{\"eta\", std::to_string(3.14)}});\n    updater->SaveConfig(&config_0);\n  }\n\n  {\n    auto updater =\n        std::unique_ptr<xgboost::LinearUpdater>(xgboost::LinearUpdater::Create(updater_str, &ctx));\n    updater->LoadConfig(config_0);\n    Json config_1 { Object() };\n    updater->SaveConfig(&config_1);\n\n    ASSERT_EQ(config_0, config_1);\n    auto eta = atof(get<String const>(config_1[\"linear_train_param\"][\"eta\"]).c_str());\n    ASSERT_NEAR(eta, 3.14, kRtEps);\n  }\n\n}\n\n}  // namespace xgboost\n\n#endif  // XGBOOST_TEST_JSON_IO_H_\n"
  },
  {
    "path": "tests/cpp/linear/test_linear.cc",
    "content": "/*!\n * Copyright 2018-2019 by Contributors\n */\n#include <xgboost/linear_updater.h>\n#include <xgboost/gbm.h>\n\n#include \"../helpers.h\"\n#include \"test_json_io.h\"\n#include \"../../../src/gbm/gblinear_model.h\"\n#include \"xgboost/base.h\"\n\nnamespace xgboost {\n\nTEST(Linear, Shotgun) {\n  size_t constexpr kRows = 10;\n  size_t constexpr kCols = 10;\n\n  auto p_fmat = xgboost::RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();\n\n  auto ctx = MakeCUDACtx(GPUIDX);\n  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};\n\n  {\n    auto updater =\n        std::unique_ptr<xgboost::LinearUpdater>(xgboost::LinearUpdater::Create(\"shotgun\", &ctx));\n    updater->Configure({{\"eta\", \"1.\"}});\n    linalg::Matrix<xgboost::GradientPair> gpair{\n        linalg::Constant(&ctx, xgboost::GradientPair(-5, 1.0), p_fmat->Info().num_row_, 1)};\n    xgboost::gbm::GBLinearModel model{&mparam};\n    model.LazyInitModel();\n    updater->Update(&gpair, p_fmat.get(), &model, gpair.Size());\n\n    ASSERT_EQ(model.Bias()[0], 5.0f);\n  }\n  {\n    auto updater = std::unique_ptr<xgboost::LinearUpdater>(\n        xgboost::LinearUpdater::Create(\"shotgun\", &ctx));\n    EXPECT_ANY_THROW(updater->Configure({{\"feature_selector\", \"random\"}}));\n  }\n}\n\nTEST(Shotgun, JsonIO) {\n  TestUpdaterJsonIO(\"shotgun\");\n}\n\nTEST(Linear, coordinate) {\n  size_t constexpr kRows = 10;\n  size_t constexpr kCols = 10;\n\n  auto p_fmat = xgboost::RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();\n\n  auto ctx = MakeCUDACtx(GPUIDX);\n  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};\n\n  auto updater = std::unique_ptr<xgboost::LinearUpdater>(\n      xgboost::LinearUpdater::Create(\"coord_descent\", &ctx));\n  updater->Configure({{\"eta\", \"1.\"}});\n  linalg::Matrix<xgboost::GradientPair> gpair{\n      linalg::Constant(&ctx, xgboost::GradientPair(-5, 1.0), p_fmat->Info().num_row_, 1)};\n  xgboost::gbm::GBLinearModel model{&mparam};\n  model.LazyInitModel();\n  updater->Update(&gpair, p_fmat.get(), &model, gpair.Size());\n\n  ASSERT_EQ(model.Bias()[0], 5.0f);\n}\n\nTEST(Coordinate, JsonIO){\n  TestUpdaterJsonIO(\"coord_descent\");\n}\n\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/linear/test_linear.cu",
    "content": "/**\n * Copyright 2018-2023, XGBoost Contributors\n */\n#include <xgboost/linear_updater.h>\n#include <xgboost/gbm.h>\n\n#include \"../helpers.h\"\n#include \"test_json_io.h\"\n#include \"../../../src/gbm/gblinear_model.h\"\n\nnamespace xgboost {\n\nTEST(Linear, GPUCoordinate) {\n  size_t constexpr kRows = 10;\n  size_t constexpr kCols = 10;\n\n  auto mat = xgboost::RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();\n  auto ctx = MakeCUDACtx(0);\n\n  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};\n  auto updater = std::unique_ptr<xgboost::LinearUpdater>(\n      xgboost::LinearUpdater::Create(\"gpu_coord_descent\", &ctx));\n  updater->Configure({{\"eta\", \"1.\"}});\n  auto gpair = linalg::Constant(&ctx, xgboost::GradientPair(-5, 1.0), mat->Info().num_row_, 1);\n  xgboost::gbm::GBLinearModel model{&mparam};\n\n  model.LazyInitModel();\n  updater->Update(&gpair, mat.get(), &model, gpair.Size());\n\n  ASSERT_EQ(model.Bias()[0], 5.0f);\n}\n\nTEST(GPUCoordinate, JsonIO) {\n  TestUpdaterJsonIO(\"gpu_coord_descent\");\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/metric/test_auc.h",
    "content": "/*!\n * Copyright (c) 2023 by XGBoost Contributors\n */\n#pragma once\n\n#include <xgboost/metric.h>\n\n#include \"../helpers.h\"\n\nnamespace xgboost::metric {\ninline void VerifyBinaryAUC(DataSplitMode data_split_mode, DeviceOrd device) {\n  auto ctx = MakeCUDACtx(device.ordinal);\n  std::unique_ptr<Metric> uni_ptr{Metric::Create(\"auc\", &ctx)};\n  Metric* metric = uni_ptr.get();\n  ASSERT_STREQ(metric->Name(), \"auc\");\n\n  // Binary\n  EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1.0f, 1e-10);\n  EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {1, 0}, {}, {}, data_split_mode), 0.0f, 1e-10);\n  EXPECT_NEAR(GetMetricEval(metric, {0, 0}, {0, 1}, {}, {}, data_split_mode), 0.5f, 1e-10);\n  EXPECT_NEAR(GetMetricEval(metric, {1, 1}, {0, 1}, {}, {}, data_split_mode), 0.5f, 1e-10);\n  EXPECT_NEAR(GetMetricEval(metric, {0, 0}, {1, 0}, {}, {}, data_split_mode), 0.5f, 1e-10);\n  EXPECT_NEAR(GetMetricEval(metric, {1, 1}, {1, 0}, {}, {}, data_split_mode), 0.5f, 1e-10);\n  EXPECT_NEAR(GetMetricEval(metric, {1, 0, 0}, {0, 0, 1}, {}, {}, data_split_mode), 0.25f, 1e-10);\n\n  // Invalid dataset\n  auto p_fmat = EmptyDMatrix();\n  MetaInfo& info = p_fmat->Info();\n  info.labels = linalg::Tensor<float, 2>{{0.0f, 0.0f}, {2}, DeviceOrd::CPU()};\n  float auc = metric->Evaluate({1, 1}, p_fmat);\n  ASSERT_TRUE(std::isnan(auc));\n  *info.labels.Data() = HostDeviceVector<float>{};\n  auc = metric->Evaluate(HostDeviceVector<float>{}, p_fmat);\n  ASSERT_TRUE(std::isnan(auc));\n\n  EXPECT_NEAR(GetMetricEval(metric, {0, 1, 0, 1}, {0, 1, 0, 1}, {}, {}, data_split_mode), 1.0f,\n              1e-10);\n\n  // AUC with instance weights\n  EXPECT_NEAR(GetMetricEval(metric, {0.9f, 0.1f, 0.4f, 0.3f}, {0, 0, 1, 1},\n                            {1.0f, 3.0f, 2.0f, 4.0f}, {}, data_split_mode),\n              0.75f, 0.001f);\n\n  // regression test case\n  ASSERT_NEAR(GetMetricEval(metric, {0.79523796, 0.5201713,  0.79523796, 0.24273258, 0.53452194,\n                                     0.53452194, 0.24273258, 0.5201713,  0.79523796, 0.53452194,\n                                     0.24273258, 0.53452194, 0.79523796, 0.5201713,  0.24273258,\n                                     0.5201713,  0.5201713,  0.53452194, 0.5201713,  0.53452194},\n                            {0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0}, {}, {},\n                            data_split_mode),\n              0.5, 1e-10);\n}\n\ninline void VerifyMultiClassAUC(DataSplitMode data_split_mode, DeviceOrd device) {\n  auto ctx = MakeCUDACtx(device.ordinal);\n  std::unique_ptr<Metric> uni_ptr{Metric::Create(\"auc\", &ctx)};\n  auto metric = uni_ptr.get();\n\n  // MultiClass\n  // 3x3\n  EXPECT_NEAR(GetMetricEval(metric,\n                            {\n                                1.0f, 0.0f, 0.0f,  // p_0\n                                0.0f, 1.0f, 0.0f,  // p_1\n                                0.0f, 0.0f, 1.0f   // p_2\n                            },\n                            {0, 1, 2}, {}, {}, data_split_mode),\n              1.0f, 1e-10);\n\n  EXPECT_NEAR(GetMetricEval(metric,\n                            {\n                                1.0f, 0.0f, 0.0f,  // p_0\n                                0.0f, 1.0f, 0.0f,  // p_1\n                                0.0f, 0.0f, 1.0f   // p_2\n                            },\n                            {0, 1, 2}, {1.0f, 1.0f, 1.0f}, {}, data_split_mode),\n              1.0f, 1e-10);\n\n  EXPECT_NEAR(GetMetricEval(metric,\n                            {\n                                1.0f, 0.0f, 0.0f,  // p_0\n                                0.0f, 1.0f, 0.0f,  // p_1\n                                0.0f, 0.0f, 1.0f   // p_2\n                            },\n                            {2, 1, 0}, {}, {}, data_split_mode),\n              0.5f, 1e-10);\n\n  EXPECT_NEAR(GetMetricEval(metric,\n                            {\n                                1.0f, 0.0f, 0.0f,  // p_0\n                                0.0f, 1.0f, 0.0f,  // p_1\n                                0.0f, 0.0f, 1.0f   // p_2\n                            },\n                            {2, 0, 1}, {}, {}, data_split_mode),\n              0.25f, 1e-10);\n\n  // invalid dataset\n  float auc = GetMetricEval(metric,\n                            {\n                                1.0f, 0.0f, 0.0f,                 // p_0\n                                0.0f, 1.0f, 0.0f,                 // p_1\n                                0.0f, 0.0f, 1.0f                  // p_2\n                            },\n                            {0, 1, 1}, {}, {}, data_split_mode);  // no class 2.\n  EXPECT_TRUE(std::isnan(auc)) << auc;\n\n  HostDeviceVector<float> predts{\n      0.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f,\n  };\n  std::vector<float> labels{1.0f, 0.0f, 2.0f, 1.0f};\n  auc = GetMetricEval(metric, predts, labels, {1.0f, 2.0f, 3.0f, 4.0f}, {}, data_split_mode);\n  ASSERT_GT(auc, 0.714);\n}\n\ninline void VerifyRankingAUC(DataSplitMode data_split_mode, DeviceOrd device) {\n  auto ctx = MakeCUDACtx(device.ordinal);\n  std::unique_ptr<Metric> metric{Metric::Create(\"auc\", &ctx)};\n\n  // single group\n  EXPECT_NEAR(GetMetricEval(metric.get(), {0.7f, 0.2f, 0.3f, 0.6f}, {1.0f, 0.8f, 0.4f, 0.2f},\n                            /*weights=*/{}, {0, 4}, data_split_mode),\n              0.5f, 1e-10);\n\n  // multi group\n  EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1, 2, 0, 1, 2}, {0, 1, 2, 0, 1, 2}, /*weights=*/{},\n                            {0, 3, 6}, data_split_mode),\n              1.0f, 1e-10);\n\n  EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1, 2, 0, 1, 2}, {0, 1, 2, 0, 1, 2},\n                            /*weights=*/{1.0f, 2.0f}, {0, 3, 6}, data_split_mode),\n              1.0f, 1e-10);\n\n  // AUC metric for grouped datasets - exception scenarios\n  ASSERT_TRUE(std::isnan(\n      GetMetricEval(metric.get(), {0, 1, 2}, {0, 0, 0}, {}, {0, 2, 3}, data_split_mode)));\n\n  // regression case\n  HostDeviceVector<float> predt{\n      0.33935383, 0.5149714,  0.32138085, 1.4547751, 1.2010975, 0.42651367, 0.23104341, 0.83610827,\n      0.8494239,  0.07136688, 0.5623144,  0.8086237, 1.5066161, -4.094787,  0.76887935, -2.4082742};\n  std::vector<bst_group_t> groups{0, 7, 16};\n  std::vector<float> labels{1., 0., 0., 1., 2., 1., 0., 0., 0., 0., 0., 0., 1., 0., 1., 0.};\n\n  EXPECT_NEAR(GetMetricEval(metric.get(), std::move(predt), labels,\n                            /*weights=*/{}, groups, data_split_mode),\n              0.769841f, 1e-6);\n}\n\ninline void VerifyPRAUC(DataSplitMode data_split_mode, DeviceOrd device) {\n  auto ctx = MakeCUDACtx(device.ordinal);\n\n  xgboost::Metric* metric = xgboost::Metric::Create(\"aucpr\", &ctx);\n  ASSERT_STREQ(metric->Name(), \"aucpr\");\n  EXPECT_NEAR(GetMetricEval(metric, {0, 0, 1, 1}, {0, 0, 1, 1}, {}, {}, data_split_mode), 1, 1e-10);\n  EXPECT_NEAR(\n      GetMetricEval(metric, {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {}, {}, data_split_mode), 0.5f,\n      0.001f);\n  EXPECT_NEAR(GetMetricEval(metric, {0.4f, 0.2f, 0.9f, 0.1f, 0.2f, 0.4f, 0.1f, 0.1f, 0.2f, 0.1f},\n                            {0, 0, 0, 0, 0, 1, 0, 0, 1, 1}, {}, {}, data_split_mode),\n              0.2908445f, 0.001f);\n  EXPECT_NEAR(\n      GetMetricEval(metric, {0.87f, 0.31f, 0.40f, 0.42f, 0.25f, 0.66f, 0.95f, 0.09f, 0.10f, 0.97f,\n                             0.76f, 0.69f, 0.15f, 0.20f, 0.30f, 0.14f, 0.07f, 0.58f, 0.61f, 0.08f},\n                    {0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1}, {}, {},\n                    data_split_mode),\n      0.2769199f, 0.001f);\n  auto auc = GetMetricEval(metric, {0, 1}, {}, {}, {}, data_split_mode);\n  ASSERT_TRUE(std::isnan(auc));\n\n  // AUCPR with instance weights\n  EXPECT_NEAR(GetMetricEval(metric,\n                            {0.29f, 0.52f, 0.11f, 0.21f, 0.219f, 0.93f, 0.493f, 0.17f, 0.47f, 0.13f,\n                             0.43f, 0.59f, 0.87f, 0.007f},\n                            {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0},\n                            {1, 2, 7, 4, 5, 2.2f, 3.2f, 5, 6, 1, 2, 1.1f, 3.2f, 4.5f},  // weights\n                            {}, data_split_mode),\n              0.694435f, 0.001f);\n\n  // Both groups contain only pos or neg samples.\n  auc = GetMetricEval(metric, {0, 0.1f, 0.3f, 0.5f, 0.7f}, {1, 1, 0, 0, 0}, {}, {0, 2, 5},\n                      data_split_mode);\n  ASSERT_TRUE(std::isnan(auc));\n  delete metric;\n}\n\ninline void VerifyMultiClassPRAUC(DataSplitMode data_split_mode, DeviceOrd device) {\n  auto ctx = MakeCUDACtx(device.ordinal);\n\n  std::unique_ptr<Metric> metric{Metric::Create(\"aucpr\", &ctx)};\n\n  float auc = 0;\n  std::vector<float> labels{1.0f, 0.0f, 2.0f};\n  HostDeviceVector<float> predts{\n      0.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f,\n  };\n  auc = GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode);\n  EXPECT_EQ(auc, 1.0f);\n\n  auc = GetMetricEval(metric.get(), predts, labels, {1.0f, 1.0f, 1.0f}, {}, data_split_mode);\n  EXPECT_EQ(auc, 1.0f);\n\n  predts.HostVector() = {\n      0.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f,\n  };\n  labels = {1.0f, 0.0f, 2.0f, 1.0f};\n  auc = GetMetricEval(metric.get(), predts, labels, {1.0f, 2.0f, 3.0f, 4.0f}, {}, data_split_mode);\n  ASSERT_GT(auc, 0.699);\n}\n\ninline void VerifyRankingPRAUC(DataSplitMode data_split_mode, DeviceOrd device) {\n  auto ctx = MakeCUDACtx(device.ordinal);\n\n  std::unique_ptr<Metric> metric{Metric::Create(\"aucpr\", &ctx)};\n\n  std::vector<float> labels{1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f};\n  std::vector<uint32_t> groups{0, 2, 6};\n\n  float auc = 0;\n  auc = GetMetricEval(metric.get(), {1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f}, labels, {}, groups,\n                      data_split_mode);\n  EXPECT_EQ(auc, 1.0f);\n\n  auc = GetMetricEval(metric.get(), {1.0f, 0.5f, 0.8f, 0.3f, 0.2f, 1.0f}, labels, {}, groups,\n                      data_split_mode);\n  EXPECT_EQ(auc, 1.0f);\n\n  auc = GetMetricEval(metric.get(), {1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f},\n                      {1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f}, {}, groups, data_split_mode);\n  ASSERT_TRUE(std::isnan(auc));\n\n  // Incorrect label\n  ASSERT_THROW(GetMetricEval(metric.get(), {1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f},\n                             {1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 3.0f}, {}, groups, data_split_mode),\n               dmlc::Error);\n\n  // AUCPR with groups and no weights\n  EXPECT_NEAR(\n      GetMetricEval(metric.get(),\n                    {0.87f, 0.31f, 0.40f, 0.42f, 0.25f, 0.66f, 0.95f, 0.09f, 0.10f, 0.97f,\n                     0.76f, 0.69f, 0.15f, 0.20f, 0.30f, 0.14f, 0.07f, 0.58f, 0.61f, 0.08f},\n                    {0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1}, {},  // weights\n                    {0, 2, 5, 9, 14, 20},                                              // group info\n                    data_split_mode),\n      0.556021f, 0.001f);\n}\n}  // namespace xgboost::metric\n"
  },
  {
    "path": "tests/cpp/metric/test_distributed_metric.cc",
    "content": "/**\n * Copyright 2023, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/context.h>  // for DeviceOrd\n#include <xgboost/data.h>     // for DataSplitMode\n\n#include <algorithm>   // for min\n#include <cstdint>     // for int32_t\n#include <functional>  // for function\n#include <string>      // for string\n#include <thread>      // for thread\n\n#include \"../collective/test_worker.h\"  // for TestDistributedGlobal\n#include \"test_auc.h\"\n#include \"test_elementwise_metric.h\"\n#include \"test_multiclass_metric.h\"\n#include \"test_rank_metric.h\"\n#include \"test_survival_metric.h\"\n\n#if defined(XGBOOST_USE_FEDERATED)\n\n#include \"../plugin/federated/test_worker.h\"  // for TestFederatedGlobal\n\n#endif  // defined(XGBOOST_USE_FEDERATED)\n\nnamespace xgboost::metric {\nnamespace {\nusing Verifier = std::function<void(DataSplitMode, DeviceOrd)>;\nstruct Param {\n  bool is_dist;         // is distributed\n  bool is_fed;          // is federated learning\n  DataSplitMode split;  // how to split data\n  Verifier v;           // test function\n  std::string name;     // metric name\n  DeviceOrd device;     // device to run\n};\n\nclass TestDistributedMetric : public ::testing::TestWithParam<Param> {\n protected:\n  template <typename Fn>\n  void Run(bool is_dist, bool is_fed, DataSplitMode split_mode, Fn fn, DeviceOrd device) {\n    if (!is_dist) {\n      fn(split_mode, device);\n      return;\n    }\n\n    std::int32_t n_workers{0};\n    if (device.IsCUDA()) {\n      n_workers = curt::AllVisibleGPUs();\n    } else {\n      n_workers = std::min(static_cast<std::int32_t>(std::thread::hardware_concurrency()), 3);\n    }\n    auto fn1 = [&]() {\n      auto r = collective::GetRank();\n      if (device.IsCPU()) {\n        fn(split_mode, DeviceOrd::CPU());\n      } else {\n        fn(split_mode, DeviceOrd::CUDA(r));\n      }\n    };\n    if (is_fed) {\n#if defined(XGBOOST_USE_FEDERATED)\n      collective::TestFederatedGlobal(n_workers, fn1);\n#endif  // defined(XGBOOST_USE_FEDERATED)\n    } else {\n      collective::TestDistributedGlobal(n_workers, fn1);\n    }\n  }\n};\n}  // anonymous namespace\n\nTEST_P(TestDistributedMetric, BinaryAUCRowSplit) {\n  auto p = GetParam();\n  this->Run(p.is_dist, p.is_fed, p.split, p.v, p.device);\n}\n\nconstexpr bool UseNCCL() {\n#if defined(XGBOOST_USE_NCCL)\n  return true;\n#else\n  return false;\n#endif  // defined(XGBOOST_USE_NCCL)\n}\n\nconstexpr bool UseCUDA() {\n#if defined(XGBOOST_USE_CUDA)\n  return true;\n#else\n  return false;\n#endif  // defined(XGBOOST_USE_CUDA)\n}\n\nconstexpr bool UseFederated() {\n#if defined(XGBOOST_USE_FEDERATED)\n  return true;\n#else\n  return false;\n#endif\n}\n\nauto MakeParamsForTest() {\n  std::vector<Param> cases;\n\n  auto push = [&](std::string name, auto fn) {\n    for (bool is_federated : {false, true}) {\n      for (DataSplitMode m : {DataSplitMode::kCol, DataSplitMode::kRow}) {\n        for (auto d : {DeviceOrd::CPU(), DeviceOrd::CUDA(0)}) {\n          if (!is_federated && !UseNCCL() && d.IsCUDA()) {\n            // Federated doesn't use nccl.\n            continue;\n          }\n          if (!UseCUDA() && d.IsCUDA()) {\n            // skip CUDA tests\n            continue;\n          }\n          if (!UseFederated() && is_federated) {\n            // skip GRPC tests\n            continue;\n          }\n\n          auto p = Param{true, is_federated, m, fn, name, d};\n          cases.push_back(p);\n          if (!is_federated) {\n            // Add a local test.\n            p.is_dist = false;\n            cases.push_back(p);\n          }\n        }\n      }\n    }\n  };\n\n#define REFLECT_NAME(name) push(#name, Verify##name)\n  // AUC\n  REFLECT_NAME(BinaryAUC);\n  REFLECT_NAME(MultiClassAUC);\n  REFLECT_NAME(RankingAUC);\n  REFLECT_NAME(PRAUC);\n  REFLECT_NAME(MultiClassPRAUC);\n  REFLECT_NAME(RankingPRAUC);\n  // Elementwise\n  REFLECT_NAME(RMSE);\n  REFLECT_NAME(RMSLE);\n  REFLECT_NAME(MAE);\n  REFLECT_NAME(MAPE);\n  REFLECT_NAME(MPHE);\n  REFLECT_NAME(LogLoss);\n  REFLECT_NAME(Error);\n  REFLECT_NAME(PoissonNegLogLik);\n  REFLECT_NAME(MultiRMSE);\n  REFLECT_NAME(Quantile);\n  REFLECT_NAME(Expectile);\n  // Multi-Class\n  REFLECT_NAME(MultiClassError);\n  REFLECT_NAME(MultiClassLogLoss);\n  // Ranking\n  REFLECT_NAME(Precision);\n  REFLECT_NAME(NDCG);\n  REFLECT_NAME(MAP);\n  REFLECT_NAME(NDCGExpGain);\n  // AFT\n  using namespace xgboost::common;  // NOLINT\n  REFLECT_NAME(AFTNegLogLik);\n  REFLECT_NAME(IntervalRegressionAccuracy);\n\n#undef REFLECT_NAME\n\n  return cases;\n}\n\nINSTANTIATE_TEST_SUITE_P(\n    DistributedMetric, TestDistributedMetric, ::testing::ValuesIn(MakeParamsForTest()),\n    [](const ::testing::TestParamInfo<TestDistributedMetric::ParamType>& info) {\n      std::string result;\n      if (info.param.is_dist) {\n        result += \"Dist_\";\n      }\n      if (info.param.is_fed) {\n        result += \"Federated_\";\n      }\n      if (info.param.split == DataSplitMode::kRow) {\n        result += \"RowSplit\";\n      } else {\n        result += \"ColSplit\";\n      }\n      result += \"_\";\n      result += info.param.device.IsCPU() ? \"CPU\" : \"MGPU\";\n      result += \"_\";\n      result += info.param.name;\n      return result;\n    });\n}  // namespace xgboost::metric\n"
  },
  {
    "path": "tests/cpp/metric/test_elementwise_metric.h",
    "content": "/**\n * Copyright 2018-2026, XGBoost contributors\n */\n#pragma once\n#include <xgboost/json.h>\n#include <xgboost/metric.h>\n\n#include <algorithm>   // for transform\n#include <functional>  // for plus\n#include <memory>\n#include <numeric>  // for iota\n#include <vector>   // for vector\n\n#include \"../helpers.h\"\n\nnamespace xgboost::metric {\ninline void CheckDeterministicMetricElementWise(StringView name, int32_t device) {\n  auto ctx = MakeCUDACtx(device);\n  std::unique_ptr<Metric> metric{Metric::Create(name.c_str(), &ctx)};\n\n  HostDeviceVector<float> predts;\n  size_t n_samples = 2048;\n\n  auto p_fmat = EmptyDMatrix();\n  MetaInfo &info = p_fmat->Info();\n  info.labels.Reshape(n_samples, 1);\n  info.num_row_ = n_samples;\n  auto &h_labels = info.labels.Data()->HostVector();\n  auto &h_predts = predts.HostVector();\n\n  SimpleLCG lcg;\n  SimpleRealUniformDistribution<float> dist{0.0f, 1.0f};\n\n  h_labels.resize(n_samples);\n  h_predts.resize(n_samples);\n\n  for (size_t i = 0; i < n_samples; ++i) {\n    h_predts[i] = dist(&lcg);\n    h_labels[i] = dist(&lcg);\n  }\n\n  auto result = metric->Evaluate(predts, p_fmat);\n  for (size_t i = 0; i < 8; ++i) {\n    ASSERT_EQ(metric->Evaluate(predts, p_fmat), result);\n  }\n}\n\ninline void VerifyRMSE(DataSplitMode data_split_mode, DeviceOrd device) {\n  auto ctx = MakeCUDACtx(device.ordinal);\n  xgboost::Metric *metric = xgboost::Metric::Create(\"rmse\", &ctx);\n  metric->Configure({});\n  ASSERT_STREQ(metric->Name(), \"rmse\");\n  EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-10);\n  EXPECT_NEAR(\n      GetMetricEval(metric, {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {}, {}, data_split_mode),\n      0.6403f, 0.001f);\n  auto expected = 2.8284f;\n  if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) {\n    expected = sqrt(8.0f * collective::GetWorldSize());\n  }\n  EXPECT_NEAR(GetMetricEval(metric, {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {-1, 1, 9, -9}, {},\n                            data_split_mode),\n              expected, 0.001f);\n  EXPECT_NEAR(GetMetricEval(metric, {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {1, 2, 9, 8}, {},\n                            data_split_mode),\n              0.6708f, 0.001f);\n  delete metric;\n\n  CheckDeterministicMetricElementWise(StringView{\"rmse\"}, device.ordinal);\n}\n\ninline void VerifyRMSLE(DataSplitMode data_split_mode, DeviceOrd device) {\n  auto ctx = MakeCUDACtx(device.ordinal);\n  xgboost::Metric *metric = xgboost::Metric::Create(\"rmsle\", &ctx);\n  metric->Configure({});\n  ASSERT_STREQ(metric->Name(), \"rmsle\");\n  EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-10);\n  EXPECT_NEAR(GetMetricEval(metric, {0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f},\n                            {}, {}, data_split_mode),\n              0.4063f, 1e-4);\n  auto expected = 0.6212f;\n  if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) {\n    expected = sqrt(0.3859f * collective::GetWorldSize());\n  }\n  EXPECT_NEAR(GetMetricEval(metric, {0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f},\n                            {0, -1, 1, -9, 9}, {}, data_split_mode),\n              expected, 1e-4);\n  EXPECT_NEAR(GetMetricEval(metric, {0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f},\n                            {0, 1, 2, 9, 8}, {}, data_split_mode),\n              0.2415f, 1e-4);\n  delete metric;\n\n  CheckDeterministicMetricElementWise(StringView{\"rmsle\"}, device.ordinal);\n}\n\ninline void VerifyMAE(DataSplitMode data_split_mode, DeviceOrd device) {\n  auto ctx = MakeCUDACtx(device.ordinal);\n  xgboost::Metric *metric = xgboost::Metric::Create(\"mae\", &ctx);\n  metric->Configure({});\n  ASSERT_STREQ(metric->Name(), \"mae\");\n  EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-10);\n  EXPECT_NEAR(\n      GetMetricEval(metric, {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {}, {}, data_split_mode), 0.5f,\n      0.001f);\n  auto expected = 8.0f;\n  if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) {\n    expected *= collective::GetWorldSize();\n  }\n  EXPECT_NEAR(GetMetricEval(metric, {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {-1, 1, 9, -9}, {},\n                            data_split_mode),\n              expected, 0.001f);\n  EXPECT_NEAR(GetMetricEval(metric, {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {1, 2, 9, 8}, {},\n                            data_split_mode),\n              0.54f, 0.001f);\n  delete metric;\n\n  CheckDeterministicMetricElementWise(StringView{\"mae\"}, device.ordinal);\n}\n\ninline void VerifyMAPE(DataSplitMode data_split_mode, DeviceOrd device) {\n  auto ctx = MakeCUDACtx(device.ordinal);\n  xgboost::Metric *metric = xgboost::Metric::Create(\"mape\", &ctx);\n  metric->Configure({});\n  ASSERT_STREQ(metric->Name(), \"mape\");\n  EXPECT_NEAR(GetMetricEval(metric, {150, 300}, {100, 200}, {}, {}, data_split_mode), 0.5f, 1e-10);\n  EXPECT_NEAR(\n      GetMetricEval(metric, {50, 400, 500, 4000}, {100, 200, 500, 1000}, {}, {}, data_split_mode),\n      1.125f, 0.001f);\n  auto expected = -26.5f;\n  if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) {\n    expected *= collective::GetWorldSize();\n  }\n  EXPECT_NEAR(GetMetricEval(metric, {50, 400, 500, 4000}, {100, 200, 500, 1000}, {-1, 1, 9, -9}, {},\n                            data_split_mode),\n              expected, 0.001f);\n  EXPECT_NEAR(GetMetricEval(metric, {50, 400, 500, 4000}, {100, 200, 500, 1000}, {1, 2, 9, 8}, {},\n                            data_split_mode),\n              1.3250f, 0.001f);\n  delete metric;\n\n  CheckDeterministicMetricElementWise(StringView{\"mape\"}, device.ordinal);\n}\n\ninline void VerifyMPHE(DataSplitMode data_split_mode, DeviceOrd device) {\n  auto ctx = MakeCUDACtx(device.ordinal);\n  std::unique_ptr<xgboost::Metric> metric{xgboost::Metric::Create(\"mphe\", &ctx)};\n  metric->Configure({});\n  ASSERT_STREQ(metric->Name(), \"mphe\");\n  EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-10);\n  EXPECT_NEAR(\n      GetMetricEval(metric.get(), {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {}, {}, data_split_mode),\n      0.1751f, 1e-4);\n  auto expected = 3.40375f;\n  if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) {\n    expected *= collective::GetWorldSize();\n  }\n  EXPECT_NEAR(GetMetricEval(metric.get(), {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {-1, 1, 9, -9},\n                            {}, data_split_mode),\n              expected, 1e-4);\n  EXPECT_NEAR(GetMetricEval(metric.get(), {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {1, 2, 9, 8}, {},\n                            data_split_mode),\n              0.1922f, 1e-4);\n\n  CheckDeterministicMetricElementWise(StringView{\"mphe\"}, device.ordinal);\n\n  metric->Configure({{\"huber_slope\", \"0.1\"}});\n  EXPECT_NEAR(GetMetricEval(metric.get(), {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {1, 2, 9, 8}, {},\n                            data_split_mode),\n              0.0461686f, 1e-4);\n}\n\ninline void VerifyLogLoss(DataSplitMode data_split_mode, DeviceOrd device) {\n  auto ctx = MakeCUDACtx(device.ordinal);\n  xgboost::Metric *metric = xgboost::Metric::Create(\"logloss\", &ctx);\n  metric->Configure({});\n  ASSERT_STREQ(metric->Name(), \"logloss\");\n  EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-10);\n  EXPECT_NEAR(GetMetricEval(metric, {0.5f, 1e-17f, 1.0f + 1e-17f, 0.9f}, {0, 0, 1, 1}, {}, {},\n                            data_split_mode),\n              0.1996f, 0.001f);\n  EXPECT_NEAR(\n      GetMetricEval(metric, {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {}, {}, data_split_mode),\n      1.2039f, 0.001f);\n  auto expected = 21.9722f;\n  if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) {\n    expected *= collective::GetWorldSize();\n  }\n  EXPECT_NEAR(GetMetricEval(metric, {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {-1, 1, 9, -9}, {},\n                            data_split_mode),\n              expected, 0.001f);\n  EXPECT_NEAR(GetMetricEval(metric, {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {1, 2, 9, 8}, {},\n                            data_split_mode),\n              1.3138f, 0.001f);\n  delete metric;\n\n  CheckDeterministicMetricElementWise(StringView{\"logloss\"}, device.ordinal);\n}\n\ninline void VerifyError(DataSplitMode data_split_mode, DeviceOrd device) {\n  auto ctx = MakeCUDACtx(device.ordinal);\n  xgboost::Metric *metric = xgboost::Metric::Create(\"error\", &ctx);\n  metric->Configure({});\n  ASSERT_STREQ(metric->Name(), \"error\");\n  EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-10);\n  EXPECT_NEAR(\n      GetMetricEval(metric, {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {}, {}, data_split_mode), 0.5f,\n      0.001f);\n  auto expected = 10.0f;\n  if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) {\n    expected *= collective::GetWorldSize();\n  }\n  EXPECT_NEAR(GetMetricEval(metric, {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {-1, 1, 9, -9}, {},\n                            data_split_mode),\n              expected, 0.001f);\n  EXPECT_NEAR(GetMetricEval(metric, {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {1, 2, 9, 8}, {},\n                            data_split_mode),\n              0.55f, 0.001f);\n\n  EXPECT_ANY_THROW(xgboost::Metric::Create(\"error@abc\", &ctx));\n  delete metric;\n\n  metric = xgboost::Metric::Create(\"error@0.5f\", &ctx);\n  metric->Configure({});\n  EXPECT_STREQ(metric->Name(), \"error\");\n\n  delete metric;\n\n  metric = xgboost::Metric::Create(\"error@0.1\", &ctx);\n  metric->Configure({});\n  ASSERT_STREQ(metric->Name(), \"error@0.1\");\n  EXPECT_STREQ(metric->Name(), \"error@0.1\");\n  EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-10);\n  EXPECT_NEAR(\n      GetMetricEval(metric, {-0.1f, -0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {}, {}, data_split_mode),\n      0.25f, 0.001f);\n  expected = 9.0f;\n  if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) {\n    expected *= collective::GetWorldSize();\n  }\n  EXPECT_NEAR(GetMetricEval(metric, {-0.1f, -0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {-1, 1, 9, -9}, {},\n                            data_split_mode),\n              expected, 0.001f);\n  EXPECT_NEAR(GetMetricEval(metric, {-0.1f, -0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {1, 2, 9, 8}, {},\n                            data_split_mode),\n              0.45f, 0.001f);\n  delete metric;\n\n  CheckDeterministicMetricElementWise(StringView{\"error@0.5\"}, device.ordinal);\n}\n\ninline void VerifyPoissonNegLogLik(DataSplitMode data_split_mode, DeviceOrd device) {\n  auto ctx = MakeCUDACtx(device.ordinal);\n  xgboost::Metric *metric = xgboost::Metric::Create(\"poisson-nloglik\", &ctx);\n  metric->Configure({});\n  ASSERT_STREQ(metric->Name(), \"poisson-nloglik\");\n  EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0.5f, 1e-10);\n  EXPECT_NEAR(GetMetricEval(metric, {0.5f, 1e-17f, 1.0f + 1e-17f, 0.9f}, {0, 0, 1, 1}, {}, {},\n                            data_split_mode),\n              0.6263f, 0.001f);\n  EXPECT_NEAR(\n      GetMetricEval(metric, {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {}, {}, data_split_mode),\n      1.1019f, 0.001f);\n  auto expected = 13.3750f;\n  if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) {\n    expected *= collective::GetWorldSize();\n  }\n  EXPECT_NEAR(GetMetricEval(metric, {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {-1, 1, 9, -9}, {},\n                            data_split_mode),\n              expected, 0.001f);\n  EXPECT_NEAR(GetMetricEval(metric, {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {1, 2, 9, 8}, {},\n                            data_split_mode),\n              1.5783f, 0.001f);\n  delete metric;\n\n  CheckDeterministicMetricElementWise(StringView{\"poisson-nloglik\"}, device.ordinal);\n}\n\ninline void VerifyMultiRMSE(DataSplitMode data_split_mode, DeviceOrd device) {\n  auto ctx = MakeCUDACtx(device.ordinal);\n  size_t n_samples = 32, n_targets = 8;\n  linalg::Tensor<float, 2> y{{n_samples, n_targets}, ctx.Device()};\n  auto &h_y = y.Data()->HostVector();\n  std::iota(h_y.begin(), h_y.end(), 0);\n\n  HostDeviceVector<float> predt(n_samples * n_targets, 0);\n\n  std::unique_ptr<Metric> metric{Metric::Create(\"rmse\", &ctx)};\n  metric->Configure({});\n\n  auto loss = GetMultiMetricEval(metric.get(), predt, y, {}, {}, data_split_mode);\n  std::vector<float> weights(n_samples, 1);\n  auto loss_w = GetMultiMetricEval(metric.get(), predt, y, weights, {}, data_split_mode);\n\n  std::transform(h_y.cbegin(), h_y.cend(), h_y.begin(), [](auto &v) { return v * v; });\n  auto ret = std::sqrt(std::accumulate(h_y.cbegin(), h_y.cend(), 1.0, std::plus<>{}) / h_y.size());\n  ASSERT_FLOAT_EQ(ret, loss);\n  ASSERT_FLOAT_EQ(ret, loss_w);\n}\n\ninline void VerifyQuantile(DataSplitMode data_split_mode, DeviceOrd device) {\n  auto ctx = MakeCUDACtx(device.ordinal);\n  std::unique_ptr<Metric> metric{Metric::Create(\"quantile\", &ctx)};\n\n  HostDeviceVector<float> predts{0.1f, 0.9f, 0.1f, 0.9f};\n  HostDeviceVector<float> predts_2{0.2f, 0.6f, 0.4f, 0.6f, 0.5f, 1.2f, 0.0f, 0.4f};\n  HostDeviceVector<float> predts_3{0.2f, 0.4f, 0.6f, 0.4f, 0.5f, 0.6f,\n                                   0.5f, 0.8f, 1.2f, 0.0f, 0.3f, 0.4f};\n  std::vector<float> labels{0.5f, 0.5f, 0.9f, 0.1f};\n  std::vector<float> weights{0.2f, 0.4f, 0.6f, 0.8f};\n\n  metric->Configure(Args{{\"quantile_alpha\", \"[0.0]\"}});\n  EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights, {}, data_split_mode), 0.400f,\n              0.001f);\n  metric->Configure(Args{{\"quantile_alpha\", \"[0.2]\"}});\n  EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights, {}, data_split_mode), 0.376f,\n              0.001f);\n  metric->Configure(Args{{\"quantile_alpha\", \"[0.4]\"}});\n  EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights, {}, data_split_mode), 0.352f,\n              0.001f);\n  metric->Configure(Args{{\"quantile_alpha\", \"[0.8]\"}});\n  EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights, {}, data_split_mode), 0.304f,\n              0.001f);\n  metric->Configure(Args{{\"quantile_alpha\", \"[1.0]\"}});\n  EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights, {}, data_split_mode), 0.28f,\n              0.001f);\n\n  metric->Configure(Args{{\"quantile_alpha\", \"[0.0]\"}});\n  EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode), 0.3f, 0.001f);\n  metric->Configure(Args{{\"quantile_alpha\", \"[0.2]\"}});\n  EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode), 0.3f, 0.001f);\n  metric->Configure(Args{{\"quantile_alpha\", \"[0.4]\"}});\n  EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode), 0.3f, 0.001f);\n  metric->Configure(Args{{\"quantile_alpha\", \"[0.8]\"}});\n  EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode), 0.3f, 0.001f);\n  metric->Configure(Args{{\"quantile_alpha\", \"[1.0]\"}});\n  EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode), 0.3f, 0.001f);\n\n  metric->Configure(Args{{\"quantile_alpha\", \"[0.2, 0.8]\"}});\n  EXPECT_NEAR(GetMetricEval(metric.get(), predts_2, labels, {}, {}, data_split_mode), 0.0425f,\n              0.0001f);\n  metric->Configure(Args{{\"quantile_alpha\", \"[0.2, 0.5, 0.8]\"}});\n  EXPECT_NEAR(GetMetricEval(metric.get(), predts_3, labels, {}, {}, data_split_mode), 0.0450f,\n              0.0001f);\n}\n\ninline void VerifyExpectile(DataSplitMode data_split_mode, DeviceOrd device) {\n  auto ctx = MakeCUDACtx(device.ordinal);\n  std::unique_ptr<Metric> metric{Metric::Create(\"expectile\", &ctx)};\n\n  HostDeviceVector<float> predts{0.1f, 0.9f, 0.1f, 0.9f};\n  HostDeviceVector<float> predts_2{0.2f, 0.6f, 0.4f, 0.6f, 0.5f, 1.2f, 0.0f, 0.4f};\n  HostDeviceVector<float> predts_3{0.2f, 0.4f, 0.6f, 0.4f, 0.5f, 0.6f,\n                                   0.5f, 0.8f, 1.2f, 0.0f, 0.3f, 0.4f};\n  std::vector<float> labels{0.5f, 0.5f, 0.9f, 0.1f};\n  std::vector<float> weights{0.2f, 0.4f, 0.6f, 0.8f};\n\n  metric->Configure(Args{{\"expectile_alpha\", \"[0.0]\"}});\n  EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights, {}, data_split_mode), 0.288f,\n              0.001f);\n  metric->Configure(Args{{\"expectile_alpha\", \"[0.2]\"}});\n  EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights, {}, data_split_mode), 0.272f,\n              0.001f);\n  metric->Configure(Args{{\"expectile_alpha\", \"[0.4]\"}});\n  EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights, {}, data_split_mode), 0.256f,\n              0.001f);\n  metric->Configure(Args{{\"expectile_alpha\", \"[0.8]\"}});\n  EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights, {}, data_split_mode), 0.224f,\n              0.001f);\n  metric->Configure(Args{{\"expectile_alpha\", \"[1.0]\"}});\n  EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights, {}, data_split_mode), 0.208f,\n              0.001f);\n\n  metric->Configure(Args{{\"expectile_alpha\", \"[0.0]\"}});\n  EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode), 0.2f, 0.001f);\n  metric->Configure(Args{{\"expectile_alpha\", \"[0.2]\"}});\n  EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode), 0.2f, 0.001f);\n  metric->Configure(Args{{\"expectile_alpha\", \"[0.4]\"}});\n  EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode), 0.2f, 0.001f);\n  metric->Configure(Args{{\"expectile_alpha\", \"[0.8]\"}});\n  EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode), 0.2f, 0.001f);\n  metric->Configure(Args{{\"expectile_alpha\", \"[1.0]\"}});\n  EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode), 0.2f, 0.001f);\n\n  metric->Configure(Args{{\"expectile_alpha\", \"[0.2, 0.8]\"}});\n  EXPECT_NEAR(GetMetricEval(metric.get(), predts_2, labels, {}, {}, data_split_mode), 0.01175f,\n              0.0001f);\n  metric->Configure(Args{{\"expectile_alpha\", \"[0.2, 0.5, 0.8]\"}});\n  EXPECT_NEAR(GetMetricEval(metric.get(), predts_3, labels, {}, {}, data_split_mode), 0.0103333f,\n              0.0001f);\n\n  metric->Configure(Args{{\"expectile_alpha\", \"[0.2, 0.8]\"}});\n  EXPECT_NEAR(GetMetricEval(metric.get(), predts_2, labels, weights, {}, data_split_mode), 0.0129f,\n              0.0001f);\n  metric->Configure(Args{{\"expectile_alpha\", \"[0.2, 0.5, 0.8]\"}});\n  EXPECT_NEAR(GetMetricEval(metric.get(), predts_3, labels, weights, {}, data_split_mode),\n              0.0119333f, 0.0001f);\n}\n}  // namespace xgboost::metric\n"
  },
  {
    "path": "tests/cpp/metric/test_metric.cc",
    "content": "// Copyright by Contributors\n#include <xgboost/metric.h>\n\n#include \"../helpers.h\"\nnamespace xgboost {\nTEST(Metric, UnknownMetric) {\n  auto ctx = MakeCUDACtx(GPUIDX);\n  xgboost::Metric* metric = nullptr;\n  EXPECT_ANY_THROW(metric = xgboost::Metric::Create(\"unknown_name\", &ctx));\n  EXPECT_NO_THROW(metric = xgboost::Metric::Create(\"rmse\", &ctx));\n  delete metric;\n  metric = nullptr;\n  EXPECT_ANY_THROW(metric = xgboost::Metric::Create(\"unknown_name@1\", &ctx));\n  EXPECT_NO_THROW(metric = xgboost::Metric::Create(\"error@0.5f\", &ctx));\n  delete metric;\n}\n\nTEST(Metric, ExpectileLoadConfig) {\n  auto ctx = MakeCUDACtx(GPUIDX);\n  std::unique_ptr<xgboost::Metric> metric{xgboost::Metric::Create(\"expectile\", &ctx)};\n  metric->Configure({{\"expectile_alpha\", \"0.8\"}});\n  Json config{Object{}};\n  metric->SaveConfig(&config);\n\n  std::unique_ptr<xgboost::Metric> loaded{xgboost::Metric::Create(\"expectile\", &ctx)};\n  loaded->LoadConfig(config);\n\n  xgboost::HostDeviceVector<float> preds;\n  preds.HostVector() = {0.1f, 0.9f};\n  auto result = GetMetricEval(loaded.get(), preds, {0.0f, 1.0f}, {}, {}, DataSplitMode::kRow);\n  // alpha=0.8, diffs {0.1, -0.1} => losses {0.2*0.01, 0.8*0.01} -> mean 0.005.\n  EXPECT_NEAR(result, 0.005f, 1e-6f);\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/metric/test_multiclass_metric.h",
    "content": "// Copyright by Contributors\n#include <xgboost/metric.h>\n#include <string>\n\n#include \"../helpers.h\"\n\nnamespace xgboost {\nnamespace metric {\n\ninline void CheckDeterministicMetricMultiClass(StringView name, int32_t device) {\n  auto ctx = MakeCUDACtx(device);\n  std::unique_ptr<Metric> metric{Metric::Create(name.c_str(), &ctx)};\n\n  HostDeviceVector<float> predts;\n  auto p_fmat = EmptyDMatrix();\n  MetaInfo& info = p_fmat->Info();\n  auto &h_predts = predts.HostVector();\n\n  SimpleLCG lcg;\n\n  size_t n_samples = 2048, n_classes = 4;\n\n  info.labels.Reshape(n_samples);\n  auto &h_labels = info.labels.Data()->HostVector();\n  h_predts.resize(n_samples * n_classes);\n\n  {\n    SimpleRealUniformDistribution<float> dist{0.0f, static_cast<float>(n_classes)};\n    for (size_t i = 0; i < n_samples; ++i) {\n      h_labels[i] = dist(&lcg);\n    }\n  }\n\n  {\n    SimpleRealUniformDistribution<float> dist{0.0f, 1.0f};\n    for (size_t i = 0; i < n_samples * n_classes; ++i) {\n      h_predts[i] = dist(&lcg);\n    }\n  }\n\n  auto result = metric->Evaluate(predts, p_fmat);\n  for (size_t i = 0; i < 8; ++i) {\n    ASSERT_EQ(metric->Evaluate(predts, p_fmat), result);\n  }\n}\n\ninline void TestMultiClassError(DataSplitMode data_split_mode, DeviceOrd device) {\n  auto ctx = MakeCUDACtx(device.ordinal);\n  xgboost::Metric * metric = xgboost::Metric::Create(\"merror\", &ctx);\n  metric->Configure({});\n  ASSERT_STREQ(metric->Name(), \"merror\");\n  EXPECT_ANY_THROW(GetMetricEval(metric, {0}, {0, 0}, {}, {}, data_split_mode));\n  EXPECT_NEAR(GetMetricEval(\n      metric, {1, 0, 0, 0, 1, 0, 0, 0, 1}, {0, 1, 2}, {}, {}, data_split_mode), 0, 1e-10);\n  EXPECT_NEAR(GetMetricEval(metric,\n                            {0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f},\n                            {0, 1, 2}, {}, {}, data_split_mode),\n              0.666f, 0.001f);\n  delete metric;\n}\n\ninline void VerifyMultiClassError(DataSplitMode data_split_mode, DeviceOrd device) {\n  TestMultiClassError(data_split_mode, device);\n  CheckDeterministicMetricMultiClass(StringView{\"merror\"}, device.ordinal);\n}\n\ninline void TestMultiClassLogLoss(DataSplitMode data_split_mode, DeviceOrd device) {\n  auto ctx = MakeCUDACtx(device.ordinal);\n  xgboost::Metric * metric = xgboost::Metric::Create(\"mlogloss\", &ctx);\n  metric->Configure({});\n  ASSERT_STREQ(metric->Name(), \"mlogloss\");\n  EXPECT_ANY_THROW(GetMetricEval(metric, {0}, {0, 0}, {}, {}, data_split_mode));\n  EXPECT_NEAR(GetMetricEval(\n    metric, {1, 0, 0, 0, 1, 0, 0, 0, 1}, {0, 1, 2}, {}, {}, data_split_mode), 0, 1e-10);\n  EXPECT_NEAR(GetMetricEval(metric,\n                            {0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f},\n                            {0, 1, 2}, {}, {}, data_split_mode),\n              2.302f, 0.001f);\n\n  delete metric;\n}\n\ninline void VerifyMultiClassLogLoss(DataSplitMode data_split_mode, DeviceOrd device) {\n  TestMultiClassLogLoss(data_split_mode, device);\n  CheckDeterministicMetricMultiClass(StringView{\"mlogloss\"}, device.ordinal);\n}\n\n}  // namespace metric\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/metric/test_rank_metric.cc",
    "content": "/**\n * Copyright 2016-2023, XGBoost Contributors\n */\n#include \"test_rank_metric.h\"\n\n#include <gtest/gtest.h>      // for Test, EXPECT_NEAR, ASSERT_STREQ\n#include <xgboost/context.h>  // for Context\n#include <xgboost/metric.h>   // for Metric\n\n#include <memory>  // for unique_ptr\n\n#include \"../helpers.h\"    // for GetMetricEval, CreateEmptyGe...\n#include \"xgboost/base.h\"  // for bst_float, kRtEps\n\nnamespace xgboost::metric {\nTEST(Metric, AMS) {\n  auto ctx = MakeCUDACtx(GPUIDX);\n  EXPECT_ANY_THROW(Metric::Create(\"ams\", &ctx));\n  std::unique_ptr<Metric> metric{Metric::Create(\"ams@0.5f\", &ctx)};\n  ASSERT_STREQ(metric->Name(), \"ams@0.5\");\n  EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1}, {0, 1}), 0.311f, 0.001f);\n  EXPECT_NEAR(GetMetricEval(metric.get(), {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}), 0.29710f,\n              0.001f);\n\n  metric.reset(Metric::Create(\"ams@0\", &ctx));\n  ASSERT_STREQ(metric->Name(), \"ams@0\");\n  EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1}, {0, 1}), 0.311f, 0.001f);\n}\n}  // namespace xgboost::metric\n"
  },
  {
    "path": "tests/cpp/metric/test_rank_metric.h",
    "content": "/**\n * Copyright 2016-2023 by XGBoost Contributors\n */\n#pragma once\n#include <gtest/gtest.h>                 // for Test, EXPECT_NEAR, ASSERT_STREQ\n#include <xgboost/context.h>             // for Context\n#include <xgboost/data.h>                // for MetaInfo, DMatrix\n#include <xgboost/linalg.h>              // for Matrix\n#include <xgboost/metric.h>              // for Metric\n\n#include <algorithm>                     // for max\n#include <memory>                        // for unique_ptr\n#include <vector>                        // for vector\n\n#include \"../helpers.h\"                  // for GetMetricEval, CreateEmptyGe...\n#include \"xgboost/base.h\"                // for bst_float, kRtEps\n#include \"xgboost/host_device_vector.h\"  // for HostDeviceVector\n#include \"xgboost/json.h\"                // for Json, String, Object\n\nnamespace xgboost::metric {\n\ninline void VerifyPrecision(DataSplitMode data_split_mode, DeviceOrd device) {\n  auto ctx = MakeCUDACtx(device.ordinal);\n  std::unique_ptr<xgboost::Metric> metric{Metric::Create(\"pre\", &ctx)};\n  ASSERT_STREQ(metric->Name(), \"pre\");\n  EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1}, {0, 1}, {}, {}, data_split_mode), 0.5, 1e-7);\n  EXPECT_NEAR(\n      GetMetricEval(metric.get(), {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {}, {}, data_split_mode),\n      0.5, 1e-7);\n\n  metric.reset(xgboost::Metric::Create(\"pre@2\", &ctx));\n  ASSERT_STREQ(metric->Name(), \"pre@2\");\n  EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1}, {0, 1}, {}, {}, data_split_mode), 0.5f, 1e-7);\n  EXPECT_NEAR(\n      GetMetricEval(metric.get(), {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {}, {}, data_split_mode),\n      0.5f, 0.001f);\n\n  EXPECT_ANY_THROW(GetMetricEval(metric.get(), {0, 1}, {}, {}, {}, data_split_mode));\n\n  metric.reset(xgboost::Metric::Create(\"pre@4\", &ctx));\n  EXPECT_NEAR(GetMetricEval(metric.get(), {0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f},\n                            {0.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f}, {}, {}, data_split_mode),\n              0.5f, 1e-7);\n}\n\ninline void VerifyNDCG(DataSplitMode data_split_mode, DeviceOrd device) {\n  auto ctx = MakeCUDACtx(device.ordinal);\n  Metric * metric = xgboost::Metric::Create(\"ndcg\", &ctx);\n  ASSERT_STREQ(metric->Name(), \"ndcg\");\n  EXPECT_ANY_THROW(GetMetricEval(metric, {0, 1}, {}, {}, {}, data_split_mode));\n  ASSERT_NEAR(GetMetricEval(metric,\n                            xgboost::HostDeviceVector<xgboost::bst_float>{},\n                            {}, {}, {}, data_split_mode), 1, 1e-10);\n  ASSERT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1, 1e-10);\n  EXPECT_NEAR(GetMetricEval(metric,\n                            {0.1f, 0.9f, 0.1f, 0.9f},\n                            {  0,   0,   1,   1}, {}, {}, data_split_mode),\n              0.6509f, 0.001f);\n\n  delete metric;\n  metric = xgboost::Metric::Create(\"ndcg@2\", &ctx);\n  ASSERT_STREQ(metric->Name(), \"ndcg@2\");\n  EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1, 1e-10);\n  EXPECT_NEAR(GetMetricEval(metric,\n                            {0.1f, 0.9f, 0.1f, 0.9f},\n                            {  0,   0,   1,   1}, {}, {}, data_split_mode),\n              0.3868f, 0.001f);\n\n  delete metric;\n  metric = xgboost::Metric::Create(\"ndcg@-\", &ctx);\n  ASSERT_STREQ(metric->Name(), \"ndcg-\");\n  EXPECT_NEAR(GetMetricEval(metric,\n                            xgboost::HostDeviceVector<xgboost::bst_float>{},\n                            {}, {}, {}, data_split_mode), 0, 1e-10);\n  ASSERT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1.f, 1e-10);\n  EXPECT_NEAR(GetMetricEval(metric,\n                            {0.1f, 0.9f, 0.1f, 0.9f},\n                            {  0,   0,   1,   1}, {}, {}, data_split_mode),\n              0.6509f, 0.001f);\n  delete metric;\n  metric = xgboost::Metric::Create(\"ndcg-\", &ctx);\n  ASSERT_STREQ(metric->Name(), \"ndcg-\");\n  EXPECT_NEAR(GetMetricEval(metric,\n                            xgboost::HostDeviceVector<xgboost::bst_float>{},\n                            {}, {}, {}, data_split_mode), 0, 1e-10);\n  EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1.f, 1e-10);\n  EXPECT_NEAR(GetMetricEval(metric,\n                            {0.1f, 0.9f, 0.1f, 0.9f},\n                            {  0,   0,   1,   1}, {}, {}, data_split_mode),\n               0.6509f, 0.001f);\n\n  delete metric;\n  metric = xgboost::Metric::Create(\"ndcg@2-\", &ctx);\n  ASSERT_STREQ(metric->Name(), \"ndcg@2-\");\n  EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1.f, 1e-10);\n  EXPECT_NEAR(GetMetricEval(metric,\n                            {0.1f, 0.9f, 0.1f, 0.9f},\n                            {  0,   0,   1,   1}, {}, {}, data_split_mode),\n              1.f - 0.3868f, 1.f - 0.001f);\n\n  delete metric;\n}\n\ninline void VerifyMAP(DataSplitMode data_split_mode, DeviceOrd device) {\n  auto ctx = MakeCUDACtx(device.ordinal);\n  Metric * metric = xgboost::Metric::Create(\"map\", &ctx);\n  ASSERT_STREQ(metric->Name(), \"map\");\n  EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1, kRtEps);\n\n  EXPECT_NEAR(GetMetricEval(metric,\n                            {0.1f, 0.9f, 0.1f, 0.9f},\n                            {  0,   0,   1,   1}, {}, {}, data_split_mode),\n              0.5f, 0.001f);\n  EXPECT_NEAR(GetMetricEval(metric,\n                            xgboost::HostDeviceVector<xgboost::bst_float>{},\n                            std::vector<xgboost::bst_float>{}, {}, {}, data_split_mode), 1, 1e-10);\n\n  // Rank metric with group info\n  EXPECT_NEAR(GetMetricEval(metric,\n                            {0.1f, 0.9f, 0.2f, 0.8f, 0.4f, 1.7f},\n                            {1, 1, 1, 0, 1, 0},  // Labels\n                            {},  // Weights\n                            {0, 2, 5, 6},  // Group info\n                            data_split_mode),\n              0.8611f, 0.001f);\n\n  delete metric;\n  metric = xgboost::Metric::Create(\"map@-\", &ctx);\n  ASSERT_STREQ(metric->Name(), \"map-\");\n  EXPECT_NEAR(GetMetricEval(metric,\n                            xgboost::HostDeviceVector<xgboost::bst_float>{},\n                            {}, {}, {}, data_split_mode), 0, 1e-10);\n\n  delete metric;\n  metric = xgboost::Metric::Create(\"map-\", &ctx);\n  ASSERT_STREQ(metric->Name(), \"map-\");\n  EXPECT_NEAR(GetMetricEval(metric,\n                            xgboost::HostDeviceVector<xgboost::bst_float>{},\n                            {}, {}, {}, data_split_mode), 0, 1e-10);\n\n  delete metric;\n  metric = xgboost::Metric::Create(\"map@2\", &ctx);\n  ASSERT_STREQ(metric->Name(), \"map@2\");\n  EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1, 1e-10);\n  EXPECT_NEAR(GetMetricEval(metric,\n                            {0.1f, 0.9f, 0.1f, 0.9f},\n                            {  0,   0,   1,   1}, {}, {}, data_split_mode),\n              0.25f, 0.001f);\n  delete metric;\n}\n\ninline void VerifyNDCGExpGain(DataSplitMode data_split_mode, DeviceOrd device) {\n  Context ctx = MakeCUDACtx(device.ordinal);\n\n  auto p_fmat = xgboost::RandomDataGenerator{0, 0, 0}.GenerateDMatrix();\n  MetaInfo& info = p_fmat->Info();\n  info.labels = linalg::Matrix<float>{{10.0f, 0.0f, 0.0f, 1.0f, 5.0f}, {5}, ctx.Device()};\n  info.num_row_ = info.labels.Shape(0);\n  info.group_ptr_.resize(2);\n  info.group_ptr_[0] = 0;\n  info.group_ptr_[1] = info.num_row_;\n  info.data_split_mode = data_split_mode;\n  HostDeviceVector<float> predt{{0.1f, 0.2f, 0.3f, 4.0f, 70.0f}};\n\n  std::unique_ptr<Metric> metric{Metric::Create(\"ndcg\", &ctx)};\n  Json config{Object{}};\n  config[\"name\"] = String{\"ndcg\"};\n  config[\"lambdarank_param\"] = Object{};\n  config[\"lambdarank_param\"][\"ndcg_exp_gain\"] = String{\"true\"};\n  config[\"lambdarank_param\"][\"lambdarank_num_pair_per_sample\"] = String{\"32\"};\n  metric->LoadConfig(config);\n\n  auto ndcg = metric->Evaluate(predt, p_fmat);\n  ASSERT_NEAR(ndcg, 0.409738f, kRtEps);\n\n  config[\"lambdarank_param\"][\"ndcg_exp_gain\"] = String{\"false\"};\n  metric->LoadConfig(config);\n\n  ndcg = metric->Evaluate(predt, p_fmat);\n  ASSERT_NEAR(ndcg, 0.695694f, kRtEps);\n\n  predt.HostVector() = info.labels.Data()->HostVector();\n  ndcg = metric->Evaluate(predt, p_fmat);\n  ASSERT_NEAR(ndcg, 1.0, kRtEps);\n}\n}  // namespace xgboost::metric\n"
  },
  {
    "path": "tests/cpp/metric/test_survival_metric.cc",
    "content": "/**\n * Copyright 2020-2023, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <memory>\n#include <vector>\n#include <string>\n#include <limits>\n#include <cmath>\n\n#include \"xgboost/metric.h\"\n#include \"xgboost/logging.h\"\n#include \"../helpers.h\"\n#include \"../../../src/common/survival_util.h\"\n\n// CUDA conditional compile trick.\n#include \"test_survival_metric.cu\"\n\nnamespace xgboost::common {\n\n/** Tests for Survival metrics that should run only on CPU **/\n\n/**\n * Reference values obtained from\n * https://github.com/avinashbarnwal/GSOC-2019/blob/master/AFT/R/combined_assignment.R\n **/\n\n/**\n * AFTLoss.* tests verify metric values over individual data points.\n **/\n\n// Generate prediction value ranging from 2**1 to 2**15, using grid points in log scale\n// Then check prediction against the reference values\ntemplate <typename Distribution>\nstatic inline void CheckLossOverGridPoints(\n                      double true_label_lower_bound,\n                      double true_label_upper_bound,\n                      const std::vector<double>& reference_values) {\n  const int num_point = 20;\n  const double log_y_low = 1.0;\n  const double log_y_high = 15.0;\n  CHECK_EQ(num_point, reference_values.size());\n  for (int i = 0; i < num_point; ++i) {\n    const double y_pred\n      = std::pow(2.0, i * (log_y_high - log_y_low) / (num_point - 1) + log_y_low);\n    const double loss_val = AFTLoss<Distribution>::Loss(\n      true_label_lower_bound, true_label_upper_bound, std::log(y_pred), 1.0);\n    EXPECT_NEAR(loss_val, reference_values[i], 1e-4);\n  }\n}\n\nTEST(AFTLoss, Uncensored) {\n  // Given label 100, compute the AFT loss for various prediction values\n  const double true_label_lower_bound = 100.0;\n  const double true_label_upper_bound = true_label_lower_bound;\n\n  CheckLossOverGridPoints<NormalDistribution>(true_label_lower_bound, true_label_upper_bound,\n    { 13.1761, 11.3085, 9.7017, 8.3558, 7.2708, 6.4466, 5.8833, 5.5808, 5.5392, 5.7585, 6.2386,\n      6.9795, 7.9813, 9.2440, 10.7675, 12.5519, 14.5971, 16.9032, 19.4702, 22.2980 });\n  CheckLossOverGridPoints<LogisticDistribution>(true_label_lower_bound, true_label_upper_bound,\n    { 8.5568, 8.0720, 7.6038, 7.1620, 6.7612, 6.4211, 6.1659, 6.0197, 5.9990, 6.1064, 6.3293,\n      6.6450, 7.0289, 7.4594, 7.9205, 8.4008, 8.8930, 9.3926, 9.8966, 10.4033 });\n  CheckLossOverGridPoints<ExtremeDistribution>(true_label_lower_bound, true_label_upper_bound,\n    { 27.6310, 27.6310, 19.7177, 13.0281, 9.2183, 7.1365, 6.0916, 5.6688, 5.6195, 5.7941, 6.1031,\n      6.4929, 6.9310, 7.3981, 7.8827, 8.3778, 8.8791, 9.3842, 9.8916, 10.40033 });\n}\n\nTEST(AFTLoss, LeftCensored) {\n  // Given label (-inf, 20], compute the AFT loss for various prediction values\n  const double true_label_lower_bound = 0.0;\n  const double true_label_upper_bound = 20.0;\n\n  CheckLossOverGridPoints<NormalDistribution>(true_label_lower_bound, true_label_upper_bound,\n    { 0.0107, 0.0373, 0.1054, 0.2492, 0.5068, 0.9141, 1.5003, 2.2869, 3.2897, 4.5196, 5.9846,\n      7.6902, 9.6405, 11.8385, 14.2867, 16.9867, 19.9399, 23.1475, 26.6103, 27.6310 });\n  CheckLossOverGridPoints<LogisticDistribution>(true_label_lower_bound, true_label_upper_bound,\n    { 0.0953, 0.1541, 0.2451, 0.3804, 0.5717, 0.8266, 1.1449, 1.5195, 1.9387, 2.3902, 2.8636,\n      3.3512, 3.8479, 4.3500, 4.8556, 5.3632, 5.8721, 6.3817, 6.8918, 7.4021 });\n  CheckLossOverGridPoints<ExtremeDistribution>(true_label_lower_bound, true_label_upper_bound,\n    { 0.0000, 0.0025, 0.0277, 0.1225, 0.3195, 0.6150, 0.9862, 1.4094, 1.8662, 2.3441, 2.8349,\n      3.3337, 3.8372, 4.3436, 4.8517, 5.3609, 5.8707, 6.3808, 6.8912, 7.4018 });\n}\n\nTEST(AFTLoss, RightCensored) {\n  // Given label [60, +inf), compute the AFT loss for various prediction values\n  const double true_label_lower_bound = 60.0;\n  const double true_label_upper_bound = std::numeric_limits<double>::infinity();\n\n  CheckLossOverGridPoints<NormalDistribution>(true_label_lower_bound, true_label_upper_bound,\n    { 8.0000, 6.2537, 4.7487, 3.4798, 2.4396, 1.6177, 0.9993, 0.5638, 0.2834, 0.1232, 0.0450,\n      0.0134, 0.0032, 0.0006, 0.0001, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000 });\n  CheckLossOverGridPoints<LogisticDistribution>(true_label_lower_bound, true_label_upper_bound,\n    { 3.4340, 2.9445, 2.4683, 2.0125, 1.5871, 1.2041, 0.8756, 0.6099, 0.4083, 0.2643, 0.1668,\n      0.1034, 0.0633, 0.0385, 0.0233, 0.0140, 0.0084, 0.0051, 0.0030, 0.0018 });\n  CheckLossOverGridPoints<ExtremeDistribution>(true_label_lower_bound, true_label_upper_bound,\n    { 27.6310, 18.0015, 10.8018, 6.4817, 3.8893, 2.3338, 1.4004, 0.8403, 0.5042, 0.3026, 0.1816,\n      0.1089, 0.0654, 0.0392, 0.0235, 0.0141, 0.0085, 0.0051, 0.0031, 0.0018 });\n}\n\nTEST(AFTLoss, IntervalCensored) {\n  // Given label [16, 200], compute the AFT loss for various prediction values\n  const double true_label_lower_bound = 16.0;\n  const double true_label_upper_bound = 200.0;\n\n  CheckLossOverGridPoints<NormalDistribution>(true_label_lower_bound, true_label_upper_bound,\n    { 3.9746, 2.8415, 1.9319, 1.2342, 0.7335, 0.4121, 0.2536, 0.2470, 0.3919, 0.6982, 1.1825,\n      1.8622, 2.7526, 3.8656, 5.2102, 6.7928, 8.6183, 10.6901, 13.0108, 15.5826 });\n  CheckLossOverGridPoints<LogisticDistribution>(true_label_lower_bound, true_label_upper_bound,\n    { 2.2906, 1.8578, 1.4667, 1.1324, 0.8692, 0.6882, 0.5948, 0.5909, 0.6764, 0.8499, 1.1061,\n      1.4348, 1.8215, 2.2511, 2.7104, 3.1891, 3.6802, 4.1790, 4.6825, 5.1888 });\n  CheckLossOverGridPoints<ExtremeDistribution>(true_label_lower_bound, true_label_upper_bound,\n    { 8.0000, 4.8004, 2.8805, 1.7284, 1.0372, 0.6231, 0.3872, 0.3031, 0.3740, 0.5839, 0.8995,\n      1.2878, 1.7231, 2.1878, 2.6707, 3.1647, 3.6653, 4.1699, 4.6770, 5.1856 });\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/metric/test_survival_metric.cu",
    "content": "/*!\n * Copyright (c) by Contributors 2020\n */\n#include <gtest/gtest.h>\n#include \"test_survival_metric.h\"\n#include \"xgboost/metric.h\"\n\n/** Tests for Survival metrics that should run both on CPU and GPU **/\n\nnamespace xgboost::common {\n// Test configuration of AFT metric\nTEST(AFTNegLogLikMetric, DeclareUnifiedTest(Configuration)) {\n  auto ctx = MakeCUDACtx(GPUIDX);\n  std::unique_ptr<Metric> metric(Metric::Create(\"aft-nloglik\", &ctx));\n  metric->Configure({{\"aft_loss_distribution\", \"normal\"}, {\"aft_loss_distribution_scale\", \"10\"}});\n\n  // Configuration round-trip test\n  Json j_obj{ Object() };\n  metric->SaveConfig(&j_obj);\n  auto aft_param_json = j_obj[\"aft_loss_param\"];\n  EXPECT_EQ(get<String>(aft_param_json[\"aft_loss_distribution\"]), \"normal\");\n  EXPECT_EQ(get<String>(aft_param_json[\"aft_loss_distribution_scale\"]), \"10\");\n\n  CheckDeterministicMetricElementWise(StringView{\"aft-nloglik\"}, GPUIDX);\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/metric/test_survival_metric.h",
    "content": "/**\n * Copyright 2020-2023 by XGBoost Contributors\n */\n#pragma once\n#include <gtest/gtest.h>\n\n#include <cmath>\n\n#include \"../../../src/common/survival_util.h\"\n#include \"../helpers.h\"\n#include \"xgboost/metric.h\"\n\nnamespace xgboost {\nnamespace common {\ninline void CheckDeterministicMetricElementWise(StringView name, int32_t device) {\n  auto ctx = MakeCUDACtx(device);\n  std::unique_ptr<Metric> metric{Metric::Create(name.c_str(), &ctx)};\n  metric->Configure(Args{});\n\n  HostDeviceVector<float> predts;\n  auto p_fmat = EmptyDMatrix();\n  MetaInfo& info = p_fmat->Info();\n  auto &h_predts = predts.HostVector();\n\n  SimpleLCG lcg;\n  SimpleRealUniformDistribution<float> dist{0.0f, 1.0f};\n\n  size_t n_samples = 2048;\n  h_predts.resize(n_samples);\n\n  for (size_t i = 0; i < n_samples; ++i) {\n    h_predts[i] = dist(&lcg);\n  }\n\n  auto &h_upper = info.labels_upper_bound_.HostVector();\n  auto &h_lower = info.labels_lower_bound_.HostVector();\n  h_lower.resize(n_samples);\n  h_upper.resize(n_samples);\n  for (size_t i = 0; i < n_samples; ++i) {\n    h_lower[i] = 1;\n    h_upper[i] = 10;\n  }\n\n  auto result = metric->Evaluate(predts, p_fmat);\n  for (size_t i = 0; i < 8; ++i) {\n    ASSERT_EQ(metric->Evaluate(predts, p_fmat), result);\n  }\n}\n\ninline void VerifyAFTNegLogLik(DataSplitMode data_split_mode, DeviceOrd device) {\n  auto ctx = MakeCUDACtx(device.ordinal);\n\n  /**\n   * Test aggregate output from the AFT metric over a small test data set.\n   * This is unlike AFTLoss.* tests, which verify metric values over individual data points.\n   **/\n  auto p_fmat = EmptyDMatrix();\n  MetaInfo& info = p_fmat->Info();\n  info.num_row_ = 4;\n  info.labels_lower_bound_.HostVector()\n      = { 100.0f, 0.0f, 60.0f, 16.0f };\n  info.labels_upper_bound_.HostVector()\n      = { 100.0f, 20.0f, std::numeric_limits<bst_float>::infinity(), 200.0f };\n  info.weights_.HostVector() = std::vector<bst_float>();\n  info.data_split_mode = data_split_mode;\n  HostDeviceVector<bst_float> preds(4, std::log(64));\n\n  struct TestCase {\n    std::string dist_type;\n    bst_float reference_value;\n  };\n  for (const auto& test_case : std::vector<TestCase>{ {\"normal\", 2.1508f}, {\"logistic\", 2.1804f},\n                                                     {\"extreme\", 2.0706f} }) {\n    std::unique_ptr<Metric> metric(Metric::Create(\"aft-nloglik\", &ctx));\n    metric->Configure({ {\"aft_loss_distribution\", test_case.dist_type},\n                       {\"aft_loss_distribution_scale\", \"1.0\"} });\n    EXPECT_NEAR(metric->Evaluate(preds, p_fmat), test_case.reference_value, 1e-4);\n  }\n}\n\ninline void VerifyIntervalRegressionAccuracy(DataSplitMode data_split_mode, DeviceOrd device) {\n  auto ctx = MakeCUDACtx(device.ordinal);\n\n  auto p_fmat = EmptyDMatrix();\n  MetaInfo& info = p_fmat->Info();\n  info.num_row_ = 4;\n  info.labels_lower_bound_.HostVector() = { 20.0f, 0.0f, 60.0f, 16.0f };\n  info.labels_upper_bound_.HostVector() = { 80.0f, 20.0f, 80.0f, 200.0f };\n  info.weights_.HostVector() = std::vector<bst_float>();\n  info.data_split_mode = data_split_mode;\n  HostDeviceVector<bst_float> preds(4, std::log(60.0f));\n\n  std::unique_ptr<Metric> metric(Metric::Create(\"interval-regression-accuracy\", &ctx));\n  EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.75f);\n  info.labels_lower_bound_.HostVector()[2] = 70.0f;\n  EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.50f);\n  info.labels_upper_bound_.HostVector()[2] = std::numeric_limits<bst_float>::infinity();\n  EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.50f);\n  info.labels_upper_bound_.HostVector()[3] = std::numeric_limits<bst_float>::infinity();\n  EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.50f);\n  info.labels_lower_bound_.HostVector()[0] = 70.0f;\n  EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.25f);\n\n  CheckDeterministicMetricElementWise(StringView{\"interval-regression-accuracy\"}, device.ordinal);\n}\n}  // namespace common\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/objective/test_aft_obj.cc",
    "content": "/**\n * Copyright 2020-2024, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <memory>\n#include <vector>\n#include <limits>\n#include <cmath>\n\n#include \"xgboost/objective.h\"\n#include \"xgboost/logging.h\"\n#include \"../helpers.h\"\n#include \"test_aft_obj.h\"\n\nnamespace xgboost::common {\nvoid TestAFTObjConfiguration(const Context* ctx) {\n  std::unique_ptr<ObjFunction> objective(ObjFunction::Create(\"survival:aft\", ctx));\n  objective->Configure({ {\"aft_loss_distribution\", \"logistic\"},\n                          {\"aft_loss_distribution_scale\", \"5\"} });\n\n  // Configuration round-trip test\n  Json j_obj{ Object() };\n  objective->SaveConfig(&j_obj);\n  EXPECT_EQ(get<String>(j_obj[\"name\"]), \"survival:aft\");\n  auto aft_param_json = j_obj[\"aft_loss_param\"];\n  EXPECT_EQ(get<String>(aft_param_json[\"aft_loss_distribution\"]), \"logistic\");\n  EXPECT_EQ(get<String>(aft_param_json[\"aft_loss_distribution_scale\"]), \"5\");\n}\n\n/**\n * Verify that gradient pair (gpair) is computed correctly for various prediction values.\n * Reference values obtained from\n * https://github.com/avinashbarnwal/GSOC-2019/blob/master/AFT/R/combined_assignment.R\n **/\n\n// Generate prediction value ranging from 2**1 to 2**15, using grid points in log scale\n// Then check prediction against the reference values\nstatic inline void CheckGPairOverGridPoints(\n                      ObjFunction* obj,\n                      bst_float true_label_lower_bound,\n                      bst_float true_label_upper_bound,\n                      const std::string& dist_type,\n                      const std::vector<bst_float>& expected_grad,\n                      const std::vector<bst_float>& expected_hess,\n                      float ftol = 1e-4f) {\n  const int num_point = 20;\n  const double log_y_low = 1.0;\n  const double log_y_high = 15.0;\n\n  obj->Configure({ {\"aft_loss_distribution\", dist_type},\n                   {\"aft_loss_distribution_scale\", \"1\"} });\n\n  MetaInfo info;\n  info.num_row_ = num_point;\n  info.labels_lower_bound_.HostVector()\n    = std::vector<bst_float>(num_point, true_label_lower_bound);\n  info.labels_upper_bound_.HostVector()\n    = std::vector<bst_float>(num_point, true_label_upper_bound);\n  info.weights_.HostVector() = std::vector<bst_float>();\n  std::vector<bst_float> preds(num_point);\n  for (int i = 0; i < num_point; ++i) {\n    preds[i] = std::log(std::pow(2.0, i * (log_y_high - log_y_low) / (num_point - 1) + log_y_low));\n  }\n\n  linalg::Matrix<GradientPair> out_gpair;\n  obj->GetGradient(HostDeviceVector<bst_float>(preds), info, 1, &out_gpair);\n  const auto gpair = out_gpair.HostView();\n  CHECK_EQ(num_point, expected_grad.size());\n  CHECK_EQ(num_point, expected_hess.size());\n  for (int i = 0; i < num_point; ++i) {\n    EXPECT_NEAR(gpair(i).GetGrad(), expected_grad[i], ftol);\n    EXPECT_NEAR(gpair(i).GetHess(), expected_hess[i], ftol);\n  }\n}\n\nvoid TestAFTObjGPairUncensoredLabels(const Context* ctx) {\n  std::unique_ptr<ObjFunction> obj(ObjFunction::Create(\"survival:aft\", ctx));\n\n  CheckGPairOverGridPoints(obj.get(), 100.0f, 100.0f, \"normal\",\n    { -3.9120f, -3.4013f, -2.8905f, -2.3798f, -1.8691f, -1.3583f, -0.8476f, -0.3368f, 0.1739f,\n      0.6846f, 1.1954f, 1.7061f, 2.2169f, 2.7276f, 3.2383f, 3.7491f, 4.2598f, 4.7706f, 5.2813f,\n      5.7920f },\n    { 1.0000f, 1.0000f, 1.0000f, 1.0000f, 1.0000f, 1.0000f, 1.0000f, 1.0000f, 1.0000f, 1.0000f,\n      1.0000f, 1.0000f, 1.0000f, 1.0000f, 1.0000f, 1.0000f, 1.0000f, 1.0000f, 1.0000f, 1.0000f });\n  CheckGPairOverGridPoints(obj.get(), 100.0f, 100.0f, \"logistic\",\n    { -0.9608f, -0.9355f, -0.8948f, -0.8305f, -0.7327f, -0.5910f, -0.4001f, -0.1668f, 0.0867f,\n      0.3295f, 0.5354f, 0.6927f, 0.8035f, 0.8773f, 0.9245f, 0.9540f, 0.9721f, 0.9832f, 0.9899f,\n      0.9939f },\n    { 0.0384f, 0.0624f, 0.0997f, 0.1551f, 0.2316f, 0.3254f, 0.4200f, 0.4861f, 0.4962f, 0.4457f,\n      0.3567f, 0.2601f, 0.1772f, 0.1152f, 0.0726f, 0.0449f, 0.0275f, 0.0167f, 0.0101f, 0.0061f });\n  CheckGPairOverGridPoints(obj.get(), 100.0f, 100.0f, \"extreme\",\n    { -15.0000f, -15.0000f, -15.0000f, -9.8028f, -5.4822f, -2.8897f, -1.3340f, -0.4005f, 0.1596f,\n      0.4957f, 0.6974f, 0.8184f, 0.8910f, 0.9346f, 0.9608f, 0.9765f, 0.9859f, 0.9915f, 0.9949f,\n      0.9969f },\n    { 15.0000f, 15.0000f, 15.0000f, 10.8028f, 6.4822f, 3.8897f, 2.3340f, 1.4005f, 0.8404f, 0.5043f,\n      0.3026f, 0.1816f, 0.1090f, 0.0654f, 0.0392f, 0.0235f, 0.0141f, 0.0085f, 0.0051f, 0.0031f });\n}\n\nvoid TestAFTObjGPairLeftCensoredLabels(const Context* ctx) {\n  std::unique_ptr<ObjFunction> obj(ObjFunction::Create(\"survival:aft\", ctx));\n\n  CheckGPairOverGridPoints(obj.get(), 0.0f, 20.0f, \"normal\",\n    { 0.0285f, 0.0832f, 0.1951f, 0.3804f, 0.6403f, 0.9643f, 1.3379f, 1.7475f, 2.1828f, 2.6361f,\n      3.1023f, 3.5779f, 4.0603f, 4.5479f, 5.0394f, 5.5340f, 6.0309f, 6.5298f, 7.0303f, 7.5326f },\n    { 0.0663f, 0.1559f, 0.2881f, 0.4378f, 0.5762f, 0.6878f, 0.7707f, 0.8300f, 0.8719f, 0.9016f,\n      0.9229f, 0.9385f, 0.9501f, 0.9588f, 0.9656f, 0.9709f, 0.9751f, 0.9785f, 0.9813f, 0.9877f });\n  CheckGPairOverGridPoints(obj.get(), 0.0f, 20.0f, \"logistic\",\n    { 0.0909f, 0.1428f, 0.2174f, 0.3164f, 0.4355f, 0.5625f, 0.6818f, 0.7812f, 0.8561f, 0.9084f,\n      0.9429f, 0.9650f, 0.9787f, 0.9871f, 0.9922f, 0.9953f, 0.9972f, 0.9983f, 0.9990f, 0.9994f },\n    { 0.0826f, 0.1224f, 0.1701f, 0.2163f, 0.2458f, 0.2461f, 0.2170f, 0.1709f, 0.1232f, 0.0832f,\n      0.0538f, 0.0338f, 0.0209f, 0.0127f, 0.0077f, 0.0047f, 0.0028f, 0.0017f, 0.0010f, 0.0006f });\n  CheckGPairOverGridPoints(obj.get(), 0.0f, 20.0f, \"extreme\",\n    { 0.0005f, 0.0149f, 0.1011f, 0.2815f, 0.4881f, 0.6610f, 0.7847f, 0.8665f, 0.9183f, 0.9504f,\n      0.9700f, 0.9820f, 0.9891f, 0.9935f, 0.9961f, 0.9976f, 0.9986f, 0.9992f, 0.9995f, 0.9997f },\n    { 0.0041f, 0.0747f, 0.2731f, 0.4059f, 0.3829f, 0.2901f, 0.1973f, 0.1270f, 0.0793f, 0.0487f,\n      0.0296f, 0.0179f, 0.0108f, 0.0065f, 0.0039f, 0.0024f, 0.0014f, 0.0008f, 0.0005f, 0.0003f });\n}\n\nvoid TestAFTObjGPairRightCensoredLabels(const Context* ctx) {\n  std::unique_ptr<ObjFunction> obj(ObjFunction::Create(\"survival:aft\", ctx));\n\n  CheckGPairOverGridPoints(obj.get(), 60.0f, std::numeric_limits<float>::infinity(), \"normal\",\n    { -3.6583f, -3.1815f, -2.7135f, -2.2577f, -1.8190f, -1.4044f, -1.0239f, -0.6905f, -0.4190f,\n      -0.2209f, -0.0973f, -0.0346f, -0.0097f, -0.0021f, -0.0004f, -0.0000f, -0.0000f, -0.0000f,\n      -0.0000f, -0.0000f },\n    { 0.9407f, 0.9259f, 0.9057f, 0.8776f, 0.8381f, 0.7821f, 0.7036f, 0.5970f, 0.4624f, 0.3128f,\n      0.1756f, 0.0780f, 0.0265f, 0.0068f, 0.0013f, 0.0002f, 0.0000f, 0.0000f, 0.0000f, 0.0000f });\n  CheckGPairOverGridPoints(obj.get(), 60.0f, std::numeric_limits<float>::infinity(), \"logistic\",\n    { -0.9677f, -0.9474f, -0.9153f, -0.8663f, -0.7955f, -0.7000f, -0.5834f, -0.4566f, -0.3352f,\n      -0.2323f, -0.1537f, -0.0982f, -0.0614f, -0.0377f, -0.0230f, -0.0139f, -0.0084f, -0.0051f,\n      -0.0030f, -0.0018f },\n    { 0.0312f, 0.0499f, 0.0776f, 0.1158f, 0.1627f, 0.2100f, 0.2430f, 0.2481f, 0.2228f, 0.1783f,\n      0.1300f, 0.0886f, 0.0576f, 0.0363f, 0.0225f, 0.0137f, 0.0083f, 0.0050f, 0.0030f, 0.0018f });\n  CheckGPairOverGridPoints(obj.get(), 60.0f, std::numeric_limits<float>::infinity(), \"extreme\",\n    { -15.0000f, -15.0000f, -10.8018f, -6.4817f, -3.8893f, -2.3338f, -1.4004f, -0.8403f, -0.5042f,\n      -0.3026f, -0.1816f, -0.1089f, -0.0654f, -0.0392f, -0.0235f, -0.0141f, -0.0085f, -0.0051f,\n      -0.0031f, -0.0018f },\n    { 15.0000f, 15.0000f, 10.8018f, 6.4817f, 3.8893f, 2.3338f, 1.4004f, 0.8403f, 0.5042f, 0.3026f,\n      0.1816f, 0.1089f, 0.0654f, 0.0392f, 0.0235f, 0.0141f, 0.0085f, 0.0051f, 0.0031f, 0.0018f });\n}\n\nvoid TestAFTObjGPairIntervalCensoredLabels(const Context* ctx) {\n  std::unique_ptr<ObjFunction> obj(ObjFunction::Create(\"survival:aft\", ctx));\n\n  CheckGPairOverGridPoints(obj.get(), 16.0f, 200.0f, \"normal\",\n    { -2.4435f, -1.9965f, -1.5691f, -1.1679f, -0.7990f, -0.4649f, -0.1596f, 0.1336f, 0.4370f,\n      0.7682f, 1.1340f, 1.5326f, 1.9579f, 2.4035f, 2.8639f, 3.3351f, 3.8143f, 4.2995f, 4.7891f,\n      5.2822f },\n    { 0.8909f, 0.8579f, 0.8134f, 0.7557f, 0.6880f, 0.6221f, 0.5789f, 0.5769f, 0.6171f, 0.6818f,\n      0.7500f, 0.8088f, 0.8545f, 0.8884f, 0.9131f, 0.9312f, 0.9446f, 0.9547f, 0.9624f, 0.9684f });\n  CheckGPairOverGridPoints(obj.get(), 16.0f, 200.0f, \"logistic\",\n    { -0.8790f, -0.8112f, -0.7153f, -0.5893f, -0.4375f, -0.2697f, -0.0955f, 0.0800f, 0.2545f,\n      0.4232f, 0.5768f, 0.7054f, 0.8040f, 0.8740f, 0.9210f, 0.9513f, 0.9703f, 0.9820f, 0.9891f,\n      0.9934f },\n    { 0.1086f, 0.1588f, 0.2176f, 0.2745f, 0.3164f, 0.3374f, 0.3433f, 0.3434f, 0.3384f, 0.3191f,\n      0.2789f, 0.2229f, 0.1637f, 0.1125f, 0.0737f, 0.0467f, 0.0290f, 0.0177f, 0.0108f, 0.0065f });\n  CheckGPairOverGridPoints(obj.get(), 16.0f, 200.0f, \"extreme\",\n    { -8.0000f, -4.8004f, -2.8805f, -1.7284f, -1.0371f, -0.6168f, -0.3140f, -0.0121f, 0.2841f,\n      0.5261f, 0.6989f, 0.8132f, 0.8857f, 0.9306f, 0.9581f, 0.9747f, 0.9848f, 0.9909f, 0.9945f,\n      0.9967f },\n    { 8.0000f, 4.8004f, 2.8805f, 1.7284f, 1.0380f, 0.6567f, 0.5727f, 0.6033f, 0.5384f, 0.4051f,\n      0.2757f, 0.1776f, 0.1110f, 0.0682f, 0.0415f, 0.0251f, 0.0151f, 0.0091f, 0.0055f, 0.0033f });\n}\n\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/objective/test_aft_obj.cu",
    "content": "/*!\n * Copyright 2020 XGBoost contributors\n */\n// Dummy file to keep the CUDA tests.\n\n#include \"test_aft_obj_cpu.cc\"\n"
  },
  {
    "path": "tests/cpp/objective/test_aft_obj.h",
    "content": "/**\n * Copyright 2020-2024 by XGBoost Contributors\n */\n#ifndef XGBOOST_TEST_AFT_OBJ_H_\n#define XGBOOST_TEST_AFT_OBJ_H_\n\n#include <xgboost/context.h>  // for Context\n\nnamespace xgboost::common {\n\nvoid TestAFTObjConfiguration(const Context* ctx);\n\nvoid TestAFTObjGPairUncensoredLabels(const Context* ctx);\n\nvoid TestAFTObjGPairLeftCensoredLabels(const Context* ctx);\n\nvoid TestAFTObjGPairRightCensoredLabels(const Context* ctx);\n\nvoid TestAFTObjGPairIntervalCensoredLabels(const Context* ctx);\n\n}  // namespace xgboost::common\n\n#endif  // XGBOOST_TEST_AFT_OBJ_H_\n"
  },
  {
    "path": "tests/cpp/objective/test_aft_obj_cpu.cc",
    "content": "/**\n * Copyright 2020-2024, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <memory>\n#include <vector>\n#include <limits>\n#include <cmath>\n\n#include \"xgboost/objective.h\"\n#include \"xgboost/logging.h\"\n#include \"../helpers.h\"\n#include \"test_aft_obj.h\"\n\nnamespace xgboost::common {\nTEST(Objective, DeclareUnifiedTest(AFTObjConfiguration)) {\n  auto ctx = MakeCUDACtx(GPUIDX);\n  TestAFTObjConfiguration(&ctx);\n}\n\nTEST(Objective, DeclareUnifiedTest(AFTObjGPairUncensoredLabels)) {\n  auto ctx = MakeCUDACtx(GPUIDX);\n  TestAFTObjGPairUncensoredLabels(&ctx);\n}\n\nTEST(Objective, DeclareUnifiedTest(AFTObjGPairLeftCensoredLabels)) {\n  auto ctx = MakeCUDACtx(GPUIDX);\n  TestAFTObjGPairLeftCensoredLabels(&ctx);\n}\n\nTEST(Objective, DeclareUnifiedTest(AFTObjGPairRightCensoredLabels)) {\n  auto ctx = MakeCUDACtx(GPUIDX);\n  TestAFTObjGPairRightCensoredLabels(&ctx);\n}\n\nTEST(Objective, DeclareUnifiedTest(AFTObjGPairIntervalCensoredLabels)) {\n  auto ctx = MakeCUDACtx(GPUIDX);\n  TestAFTObjGPairIntervalCensoredLabels(&ctx);\n}\n\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/objective/test_hinge.cc",
    "content": "/**\n * Copyright 2018-2023, XGBoost Contributors\n */\n#include <xgboost/objective.h>\n#include <xgboost/context.h>\n#include <limits>\n\n#include \"../helpers.h\"\n#include \"test_hinge.h\"\n#include \"../../../src/common/linalg_op.h\"\nnamespace xgboost {\n\nvoid TestHingeObj(const Context* ctx) {\n  std::unique_ptr<ObjFunction> obj{ObjFunction::Create(\"binary:hinge\", ctx)};\n\n  float eps = std::numeric_limits<xgboost::bst_float>::min();\n  std::vector<float> predt{-1.0f, -0.5f, 0.5f, 1.0f, -1.0f, -0.5f, 0.5f, 1.0f};\n  std::vector<float> label{ 0.0f,  0.0f, 0.0f, 0.0f,  1.0f,  1.0f,  1.0f, 1.0f};\n  std::vector<float> grad{0.0f, 1.0f, 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, 0.0f};\n  std::vector<float> hess{eps, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, eps};\n\n  CheckObjFunction(obj, predt, label, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, grad, hess);\n  CheckObjFunction(obj, predt, label, {/* Empty weight. */}, grad, hess);\n\n  ASSERT_EQ(obj->DefaultEvalMetric(), StringView{\"error\"});\n\n  MetaInfo info;\n  info.num_row_ = label.size();\n  info.labels.Reshape(info.num_row_, 3);\n  ASSERT_EQ(obj->Targets(info), 3);\n  auto h_labels = info.labels.HostView();\n  for (std::size_t j = 0; j < obj->Targets(info); ++j) {\n    for (std::size_t i = 0; i < info.num_row_; ++i) {\n      h_labels(i, j) = label[i];\n    }\n  }\n  linalg::Tensor<float, 2> t_predt{};\n  t_predt.Reshape(info.labels.Shape());\n  for (std::size_t j = 0; j < obj->Targets(info); ++j) {\n    for (std::size_t i = 0; i < info.num_row_; ++i) {\n      t_predt(i, j) = predt[i];\n    }\n  }\n  linalg::Matrix<GradientPair> out_gpair;\n  obj->GetGradient(*t_predt.Data(), info, 0, &out_gpair);\n\n  for (std::size_t j = 0; j < obj->Targets(info); ++j) {\n    auto gh = out_gpair.Slice(linalg::All(), j);\n    ASSERT_EQ(gh.Size(), info.num_row_);\n    for (std::size_t i = 0; i < gh.Size(); ++i) {\n      ASSERT_EQ(gh(i).GetGrad(), grad[i]);\n      ASSERT_EQ(gh(i).GetHess(), hess[i]);\n    }\n  }\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/objective/test_hinge.cu",
    "content": "#include \"test_hinge_cpu.cc\"\n"
  },
  {
    "path": "tests/cpp/objective/test_hinge.h",
    "content": "/**\n * Copyright 2020-2024 by XGBoost Contributors\n */\n#ifndef XGBOOST_TEST_HINGE_H_\n#define XGBOOST_TEST_HINGE_H_\n\n#include <xgboost/context.h>  // for Context\n\nnamespace xgboost {\n\nvoid TestHingeObj(const Context* ctx);\n\n}  // namespace xgboost\n\n#endif  // XGBOOST_TEST_REGRESSION_OBJ_H_\n"
  },
  {
    "path": "tests/cpp/objective/test_hinge_cpu.cc",
    "content": "/**\n * Copyright 2018-2023, XGBoost Contributors\n */\n#include <xgboost/objective.h>\n#include <xgboost/context.h>\n#include <limits>\n\n#include \"../helpers.h\"\n#include \"test_hinge.h\"\n#include \"../../../src/common/linalg_op.h\"\n\nnamespace xgboost {\n\nTEST(Objective, DeclareUnifiedTest(HingeObj)) {\n  Context ctx = MakeCUDACtx(GPUIDX);\n  TestHingeObj(&ctx);\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/objective/test_lambdarank_obj.cc",
    "content": "/**\n * Copyright 2023-2025, XGBoost Contributors\n */\n#include \"test_lambdarank_obj.h\"\n\n#include <gtest/gtest.h>  // for Test, Message, TestPartResult, CmpHel...\n\n#include <algorithm>         // for sort\n#include <cstddef>           // for size_t\n#include <initializer_list>  // for initializer_list\n#include <memory>            // for unique_ptr, shared_ptr, make_shared\n#include <numeric>           // for iota\n#include <string>            // for char_traits, basic_string, string\n#include <vector>            // for vector\n\n#include \"../../../src/common/ranking_utils.h\"      // for NDCGCache, LambdaRankParam\n#include \"../../../src/objective/lambdarank_obj.h\"  // for MAPStat, MakePairs\n#include \"../helpers.h\"                  // for CheckRankingObjFunction, CheckConfigReload\n#include \"xgboost/base.h\"                // for GradientPair, bst_group_t, Args\n#include \"xgboost/context.h\"             // for Context\n#include \"xgboost/data.h\"                // for MetaInfo, DMatrix\n#include \"xgboost/host_device_vector.h\"  // for HostDeviceVector\n#include \"xgboost/linalg.h\"              // for Tensor, All, TensorView\n#include \"xgboost/objective.h\"           // for ObjFunction\n#include \"xgboost/span.h\"                // for Span\n\nnamespace xgboost::obj {\nTEST(LambdaRank, NDCGJsonIO) {\n  Context ctx;\n  TestNDCGJsonIO(&ctx);\n}\n\nvoid TestNDCGGPair(Context const* ctx) {\n  {\n    std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create(\"rank:ndcg\", ctx)};\n    obj->Configure(Args{{\"lambdarank_pair_method\", \"topk\"}});\n    CheckConfigReload(obj, \"rank:ndcg\");\n\n    // No gain in swapping 2 documents.\n    CheckRankingObjFunction(obj,\n                            {1, 1, 1, 1},\n                            {1, 1, 1, 1},\n                            {1.0f, 1.0f},\n                            {0, 2, 4},\n                            {0.0f, -0.0f, 0.0f, 0.0f},\n                            {0.0f, 0.0f, 0.0f, 0.0f});\n  }\n  {\n    std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create(\"rank:ndcg\", ctx)};\n    obj->Configure(Args{{\"lambdarank_pair_method\", \"topk\"}});\n    // Test with setting sample weight to second query group\n    CheckRankingObjFunction(obj,\n                            {0, 0.1f, 0, 0.1f},\n                            {0,   1, 0, 1},\n                            {2.0f, 0.0f},\n                            {0, 2, 4},\n                            {2.06611f, -2.06611f, 0.0f, 0.0f},\n                            {2.169331f, 2.169331f, 0.0f, 0.0f});\n  }\n  {\n    std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create(\"rank:ndcg\", ctx)};\n    obj->Configure(Args{{\"lambdarank_pair_method\", \"topk\"}});\n    float weight_norm = 0.5;  // n_groups / sum_weights\n    std::vector<float> out_grad{2.06611f, -2.06611f, 2.06611f, -2.06611f};\n    std::vector<float> out_hess{2.169331f, 2.169331f, 2.169331f, 2.169331f};\n    auto norm = [=](auto v) { return v * weight_norm; };\n    std::transform(out_grad.begin(), out_grad.end(), out_grad.begin(), norm);\n    std::transform(out_hess.begin(), out_hess.end(), out_hess.begin(), norm);\n    CheckRankingObjFunction(obj, {0, 0.1f, 0, 0.1f}, {0, 1, 0, 1}, {2.0f, 2.0f}, {0, 2, 4},\n                            out_grad, out_hess);\n  }\n\n  std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create(\"rank:ndcg\", ctx)};\n  obj->Configure(Args{{\"lambdarank_pair_method\", \"topk\"}});\n\n  HostDeviceVector<float> predts{0, 1, 0, 1};\n  MetaInfo info;\n  info.labels = linalg::Tensor<float, 2>{{0, 1, 0, 1}, {4, 1}, ctx->Device()};\n  info.group_ptr_ = {0, 2, 4};\n  info.num_row_ = 4;\n  linalg::Matrix<GradientPair> gpairs;\n  obj->GetGradient(predts, info, 0, &gpairs);\n  ASSERT_EQ(gpairs.Size(), predts.Size());\n\n  {\n    predts = {1, 0, 1, 0};\n    linalg::Matrix<GradientPair> gpairs;\n    obj->GetGradient(predts, info, 0, &gpairs);\n    for (std::size_t i = 0; i < gpairs.Size(); ++i) {\n      ASSERT_GT(gpairs.HostView()(i).GetHess(), 0);\n    }\n    ASSERT_LT(gpairs.HostView()(1).GetGrad(), 0);\n    ASSERT_LT(gpairs.HostView()(3).GetGrad(), 0);\n\n    ASSERT_GT(gpairs.HostView()(0).GetGrad(), 0);\n    ASSERT_GT(gpairs.HostView()(2).GetGrad(), 0);\n\n    info.weights_ = {2, 3};\n    linalg::Matrix<GradientPair> weighted_gpairs;\n    obj->GetGradient(predts, info, 0, &weighted_gpairs);\n    auto const& h_gpairs = gpairs.HostView();\n    auto const& h_weighted_gpairs = weighted_gpairs.HostView();\n    for (size_t i : {0ul, 1ul}) {\n      ASSERT_FLOAT_EQ(h_weighted_gpairs(i).GetGrad(), h_gpairs(i).GetGrad() * 2.0f);\n      ASSERT_FLOAT_EQ(h_weighted_gpairs(i).GetHess(), h_gpairs(i).GetHess() * 2.0f);\n    }\n    for (size_t i : {2ul, 3ul}) {\n      ASSERT_FLOAT_EQ(h_weighted_gpairs(i).GetGrad(), h_gpairs(i).GetGrad() * 3.0f);\n      ASSERT_FLOAT_EQ(h_weighted_gpairs(i).GetHess(), h_gpairs(i).GetHess() * 3.0f);\n    }\n  }\n\n  {\n    // Test empty input\n    std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create(\"rank:ndcg\", ctx)};\n    obj->Configure(Args{{\"lambdarank_pair_method\", \"topk\"}});\n\n    HostDeviceVector<float> predts;\n    MetaInfo info;\n    info.labels = linalg::Tensor<float, 2>{{}, {0, 1}, ctx->Device()};\n    info.group_ptr_ = {0, 0};\n    info.num_row_ = 0;\n    linalg::Matrix<GradientPair> gpairs;\n    obj->GetGradient(predts, info, 0, &gpairs);\n    ASSERT_EQ(gpairs.Size(), 0);\n  }\n  ASSERT_NO_THROW({ [[maybe_unused]] auto _ = obj->DefaultEvalMetric(); });\n}\n\nTEST(LambdaRank, NDCGGPair) {\n  Context ctx;\n  TestNDCGGPair(&ctx);\n}\n\nvoid TestUnbiasedNDCG(Context const* ctx) {\n  std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create(\"rank:ndcg\", ctx)};\n  obj->Configure(Args{{\"lambdarank_pair_method\", \"topk\"},\n                      {\"lambdarank_unbiased\", \"true\"},\n                      {\"lambdarank_bias_norm\", \"0\"}});\n  std::shared_ptr<DMatrix> p_fmat{\n      RandomDataGenerator{10, 1, 0.0f}.Classes(2).GenerateDMatrix(true)};\n  auto h_label = p_fmat->Info().labels.HostView().Values();\n  // Move clicked samples to the beginning.\n  std::sort(h_label.begin(), h_label.end(), std::greater<>{});\n  HostDeviceVector<float> predt(p_fmat->Info().num_row_, 1.0f);\n\n  linalg::Matrix<GradientPair> out_gpair;\n  obj->GetGradient(predt, p_fmat->Info(), 0, &out_gpair);\n\n  Json config{Object{}};\n  obj->SaveConfig(&config);\n  auto ti_plus = get<F32Array const>(config[\"ti+\"]);\n  ASSERT_FLOAT_EQ(ti_plus[0], 1.0);\n  // bias is non-increasing when prediction is constant. (constant cost on swapping documents)\n  for (std::size_t i = 1; i < ti_plus.size(); ++i) {\n    ASSERT_LE(ti_plus[i], ti_plus[i - 1]);\n  }\n  auto tj_minus = get<F32Array const>(config[\"tj-\"]);\n  ASSERT_FLOAT_EQ(tj_minus[0], 1.0);\n}\n\nTEST(LambdaRank, UnbiasedNDCG) {\n  Context ctx;\n  TestUnbiasedNDCG(&ctx);\n}\n\nvoid InitMakePairTest(Context const* ctx, MetaInfo* out_info, HostDeviceVector<float>* out_predt) {\n  out_predt->SetDevice(ctx->Device());\n  MetaInfo& info = *out_info;\n  info.num_row_ = 128;\n  info.labels.ModifyInplace([&](HostDeviceVector<float>* data, common::Span<std::size_t> shape) {\n    shape[0] = info.num_row_;\n    shape[1] = 1;\n    auto& h_data = data->HostVector();\n    h_data.resize(shape[0]);\n    for (std::size_t i = 0; i < h_data.size(); ++i) {\n      h_data[i] = i % 2;\n    }\n  });\n  std::vector<float> predt(info.num_row_);\n  std::iota(predt.rbegin(), predt.rend(), 0.0f);\n  out_predt->HostVector() = predt;\n}\n\nTEST(LambdaRank, MakePair) {\n  Context ctx;\n  MetaInfo info;\n  HostDeviceVector<float> predt;\n\n  InitMakePairTest(&ctx, &info, &predt);\n\n  ltr::LambdaRankParam param;\n  param.UpdateAllowUnknown(Args{{\"lambdarank_pair_method\", \"topk\"}});\n  ASSERT_TRUE(param.HasTruncation());\n\n  std::shared_ptr<ltr::RankingCache> p_cache = std::make_shared<ltr::NDCGCache>(&ctx, info, param);\n  auto const& h_predt = predt.ConstHostVector();\n  {\n    auto rank_idx = p_cache->SortedIdx(&ctx, h_predt);\n    for (std::size_t i = 0; i < h_predt.size(); ++i) {\n      ASSERT_EQ(rank_idx[i], static_cast<std::size_t>(*(h_predt.crbegin() + i)));\n    }\n    std::int32_t n_pairs{0};\n    MakePairs(&ctx, 0, p_cache, 0, info.labels.HostView().Slice(linalg::All(), 0), rank_idx,\n              [&](auto i, auto j) {\n                ASSERT_GT(j, i);\n                ASSERT_LT(i, p_cache->Param().NumPair());\n                ++n_pairs;\n              });\n    ASSERT_EQ(n_pairs, 3568);\n  }\n\n  auto const h_label = info.labels.HostView();\n\n  {\n    param.UpdateAllowUnknown(Args{{\"lambdarank_pair_method\", \"mean\"}});\n    auto p_cache = std::make_shared<ltr::NDCGCache>(&ctx, info, param);\n    ASSERT_FALSE(param.HasTruncation());\n    std::int32_t n_pairs = 0;\n    auto rank_idx = p_cache->SortedIdx(&ctx, h_predt);\n    MakePairs(&ctx, 0, p_cache, 0, info.labels.HostView().Slice(linalg::All(), 0), rank_idx,\n              [&](auto i, auto j) {\n                ++n_pairs;\n                // Not in the same bucket\n                ASSERT_NE(h_label(rank_idx[i]), h_label(rank_idx[j]));\n              });\n    ASSERT_EQ(n_pairs, info.num_row_ * param.NumPair());\n  }\n\n  {\n    param.UpdateAllowUnknown(Args{{\"lambdarank_num_pair_per_sample\", \"2\"}});\n    auto p_cache = std::make_shared<ltr::NDCGCache>(&ctx, info, param);\n    auto rank_idx = p_cache->SortedIdx(&ctx, h_predt);\n    std::int32_t n_pairs = 0;\n    MakePairs(&ctx, 0, p_cache, 0, info.labels.HostView().Slice(linalg::All(), 0), rank_idx,\n              [&](auto i, auto j) {\n                ++n_pairs;\n                // Not in the same bucket\n                ASSERT_NE(h_label(rank_idx[i]), h_label(rank_idx[j]));\n              });\n    ASSERT_EQ(param.NumPair(), 2);\n    ASSERT_EQ(n_pairs, info.num_row_ * param.NumPair());\n  }\n}\n\nvoid TestMAPStat(Context const* ctx) {\n  auto p_fmat = EmptyDMatrix();\n  MetaInfo& info = p_fmat->Info();\n  ltr::LambdaRankParam param;\n  param.UpdateAllowUnknown(Args{});\n\n  {\n    std::vector<float> h_data{1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 1.0f};\n    info.labels.Reshape(h_data.size(), 1);\n    info.labels.Data()->HostVector() = h_data;\n    info.num_row_ = h_data.size();\n\n    HostDeviceVector<float> predt;\n    auto& h_predt = predt.HostVector();\n    h_predt.resize(h_data.size());\n    std::iota(h_predt.rbegin(), h_predt.rend(), 0.0f);\n\n    auto p_cache = std::make_shared<ltr::MAPCache>(ctx, info, param);\n\n    predt.SetDevice(ctx->Device());\n    auto rank_idx =\n        p_cache->SortedIdx(ctx, !ctx->IsCUDA() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());\n\n    if (!ctx->IsCUDA()) {\n      obj::cpu_impl::MAPStat(ctx, info.labels.HostView().Slice(linalg::All(), 0), rank_idx,\n                             p_cache);\n    } else {\n      obj::cuda_impl::MAPStat(ctx, info, rank_idx, p_cache);\n    }\n\n    Context cpu_ctx;\n    auto n_rel = p_cache->NumRelevant(&cpu_ctx);\n    auto acc = p_cache->Acc(&cpu_ctx);\n\n    ASSERT_EQ(n_rel[0], 1.0);\n    ASSERT_EQ(acc[0], 1.0);\n\n    ASSERT_EQ(n_rel.back(), h_data.size() - 1.0);\n    ASSERT_NEAR(acc.back(), 1.95 + (1.0 / h_data.size()), kRtEps);\n  }\n  {\n    info.labels.Reshape(16);\n    auto& h_label = info.labels.Data()->HostVector();\n    info.group_ptr_ = {0, 8, 16};\n    info.num_row_ = info.labels.Shape(0);\n\n    std::fill_n(h_label.begin(), 8, 1.0f);\n    std::fill_n(h_label.begin() + 8, 8, 0.0f);\n    HostDeviceVector<float> predt;\n    auto& h_predt = predt.HostVector();\n    h_predt.resize(h_label.size());\n    std::iota(h_predt.rbegin(), h_predt.rbegin() + 8, 0.0f);\n    std::iota(h_predt.rbegin() + 8, h_predt.rend(), 0.0f);\n\n    auto p_cache = std::make_shared<ltr::MAPCache>(ctx, info, param);\n\n    predt.SetDevice(ctx->Device());\n    auto rank_idx =\n        p_cache->SortedIdx(ctx, !ctx->IsCUDA() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());\n\n    if (!ctx->IsCUDA()) {\n      obj::cpu_impl::MAPStat(ctx, info.labels.HostView().Slice(linalg::All(), 0), rank_idx,\n                             p_cache);\n    } else {\n      obj::cuda_impl::MAPStat(ctx, info, rank_idx, p_cache);\n    }\n\n    Context cpu_ctx;\n    auto n_rel = p_cache->NumRelevant(&cpu_ctx);\n    ASSERT_EQ(n_rel[7], 8);      // first group\n    ASSERT_EQ(n_rel.back(), 0);  // second group\n  }\n}\n\nTEST(LambdaRank, MAPStat) {\n  Context ctx;\n  TestMAPStat(&ctx);\n}\n\nvoid TestMAPGPair(Context const* ctx) {\n  std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create(\"rank:map\", ctx)};\n  obj->Configure({});\n\n  CheckConfigReload(obj, \"rank:map\");\n\n  CheckRankingObjFunction(obj,                                                 // obj\n                          {0, 0.1f, 0, 0.1f},                                  // score\n                          {0, 1, 0, 1},                                        // label\n                          {2.0f, 2.0f},                                        // weight\n                          {0, 2, 4},                                           // group\n                          {1.2054923f, -1.2054923f, 1.2054923f, -1.2054923f},  // out grad\n                          {1.2657166f, 1.2657166f, 1.2657166f, 1.2657166f});\n\n  obj.reset(xgboost::ObjFunction::Create(\"rank:map\", ctx));\n  obj->Configure({});\n\n  // disable the second query group with 0 weight\n  auto w = 2.0f;  // weight for the first group\n  // weight norm is 1.0 (n_groups / sum_weights)\n  CheckRankingObjFunction(obj,                                          // obj\n                          {0, 0.1f, 0, 0.1f},                           // score\n                          {0, 1, 0, 1},                                 // label\n                          {w, 0.0f},                                    // weight\n                          {0, 2, 4},                                    // group\n                          {1.2054923f * w, -1.2054923f * w, .0f, .0f},  // out grad\n                          {1.2657166f * w, 1.2657166f * w, .0f, .0f});\n}\n\nTEST(LambdaRank, MAPGPair) {\n  Context ctx;\n  TestMAPGPair(&ctx);\n}\n\nvoid TestPairWiseGPair(Context const* ctx) {\n  std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create(\"rank:pairwise\", ctx)};\n  Args args;\n  obj->Configure(args);\n\n  args.emplace_back(\"lambdarank_unbiased\", \"true\");\n}\n\nTEST(LambdaRank, Pairwise) {\n  Context ctx;\n  TestPairWiseGPair(&ctx);\n}\n}  // namespace xgboost::obj\n"
  },
  {
    "path": "tests/cpp/objective/test_lambdarank_obj.cu",
    "content": "/**\n * Copyright 2023 by XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/context.h>                     // for Context\n\n#include <cstdint>                               // for uint32_t\n#include <vector>                                // for vector\n\n#include \"../../../src/common/cuda_context.cuh\"  // for CUDAContext\n#include \"../../../src/objective/lambdarank_obj.cuh\"\n#include \"test_lambdarank_obj.h\"\n\nnamespace xgboost::obj {\nTEST(LambdaRank, GPUNDCGJsonIO) {\n  auto ctx = MakeCUDACtx(0);\n  TestNDCGJsonIO(&ctx);\n}\n\nTEST(LambdaRank, GPUMAPStat) {\n  auto ctx = MakeCUDACtx(0);\n  TestMAPStat(&ctx);\n}\n\nTEST(LambdaRank, GPUNDCGGPair) {\n  auto ctx = MakeCUDACtx(0);\n  TestNDCGGPair(&ctx);\n}\n\nvoid TestGPUMakePair() {\n  auto ctx = MakeCUDACtx(0);\n\n  MetaInfo info;\n  HostDeviceVector<float> predt;\n  InitMakePairTest(&ctx, &info, &predt);\n\n  ltr::LambdaRankParam param;\n\n  auto make_args = [&](std::shared_ptr<ltr::RankingCache> p_cache, auto rank_idx,\n                       common::Span<std::size_t const> y_sorted_idx) {\n    linalg::Vector<double> dummy;\n    auto d = dummy.View(ctx.Device());\n    linalg::Vector<GradientPair> dgpair;\n    auto dg = dgpair.View(ctx.Device());\n    cuda_impl::KernelInputs args{\n        d,\n        d,\n        d,\n        d,\n        p_cache->DataGroupPtr(&ctx),\n        p_cache->CUDAThreadsGroupPtr(),\n        rank_idx,\n        info.labels.View(ctx.Device()),\n        predt.ConstDeviceSpan(),\n        linalg::MatrixView<GradientPair>{common::Span<GradientPair>{}, {0}, DeviceOrd::CUDA(0)},\n        dg,\n        nullptr,\n        y_sorted_idx,\n        0};\n    return args;\n  };\n\n  {\n    param.UpdateAllowUnknown(Args{{\"lambdarank_pair_method\", \"topk\"}});\n    auto p_cache = std::make_shared<ltr::NDCGCache>(&ctx, info, param);\n    auto rank_idx = p_cache->SortedIdx(&ctx, predt.ConstDeviceSpan());\n\n    ASSERT_EQ(p_cache->CUDAThreads(), 3568);\n\n    auto args = make_args(p_cache, rank_idx, {});\n    auto n_pairs = p_cache->Param().NumPair();\n    auto make_pair = cuda_impl::MakePairsOp<true>{args};\n\n    dh::LaunchN(p_cache->CUDAThreads(), ctx.CUDACtx()->Stream(),\n                [=] XGBOOST_DEVICE(std::size_t idx) {\n                  auto [i, j] = make_pair(idx, 0);\n                  SPAN_CHECK(j > i);\n                  SPAN_CHECK(i < n_pairs);\n                });\n  }\n  {\n    param.UpdateAllowUnknown(Args{{\"lambdarank_pair_method\", \"mean\"}});\n    auto p_cache = std::make_shared<ltr::NDCGCache>(&ctx, info, param);\n    auto rank_idx = p_cache->SortedIdx(&ctx, predt.ConstDeviceSpan());\n    auto y_sorted_idx = cuda_impl::SortY(&ctx, info, rank_idx, p_cache);\n\n    ASSERT_FALSE(param.HasTruncation());\n    ASSERT_EQ(p_cache->CUDAThreads(), info.num_row_ * param.NumPair());\n\n    auto args = make_args(p_cache, rank_idx, y_sorted_idx);\n    auto make_pair = cuda_impl::MakePairsOp<false>{args};\n    auto n_pairs = p_cache->Param().NumPair();\n    ASSERT_EQ(n_pairs, 1);\n\n    dh::LaunchN(\n        p_cache->CUDAThreads(), ctx.CUDACtx()->Stream(), [=] XGBOOST_DEVICE(std::size_t idx) {\n          idx = 97;\n          auto [i, j] = make_pair(idx, 0);\n          // Not in the same bucket\n          SPAN_CHECK(make_pair.args.labels(rank_idx[i]) != make_pair.args.labels(rank_idx[j]));\n        });\n  }\n  {\n    param.UpdateAllowUnknown(Args{{\"lambdarank_num_pair_per_sample\", \"2\"}});\n    auto p_cache = std::make_shared<ltr::NDCGCache>(&ctx, info, param);\n    auto rank_idx = p_cache->SortedIdx(&ctx, predt.ConstDeviceSpan());\n    auto y_sorted_idx = cuda_impl::SortY(&ctx, info, rank_idx, p_cache);\n\n    auto args = make_args(p_cache, rank_idx, y_sorted_idx);\n    auto make_pair = cuda_impl::MakePairsOp<false>{args};\n\n    dh::LaunchN(\n        p_cache->CUDAThreads(), ctx.CUDACtx()->Stream(), [=] XGBOOST_DEVICE(std::size_t idx) {\n          auto [i, j] = make_pair(idx, 0);\n          // Not in the same bucket\n          SPAN_CHECK(make_pair.args.labels(rank_idx[i]) != make_pair.args.labels(rank_idx[j]));\n        });\n    ASSERT_EQ(param.NumPair(), 2);\n    ASSERT_EQ(p_cache->CUDAThreads(), info.num_row_ * param.NumPair());\n  }\n}\n\nTEST(LambdaRank, GPUMakePair) { TestGPUMakePair(); }\n\nTEST(LambdaRank, GPUUnbiasedNDCG) {\n  auto ctx = MakeCUDACtx(0);\n  TestUnbiasedNDCG(&ctx);\n}\n\ntemplate <typename CountFunctor>\nvoid RankItemCountImpl(std::vector<std::uint32_t> const &sorted_items, CountFunctor f,\n                       std::uint32_t find_val, std::uint32_t exp_val) {\n  EXPECT_NE(std::find(sorted_items.begin(), sorted_items.end(), find_val), sorted_items.end());\n  EXPECT_EQ(f(&sorted_items[0], sorted_items.size(), find_val), exp_val);\n}\n\nTEST(LambdaRank, RankItemCountOnLeft) {\n  // Items sorted descendingly\n  std::vector<std::uint32_t> sorted_items{10, 10, 6, 4, 4, 4, 4, 1, 1, 1, 1, 1, 0};\n  auto wrapper = [](auto const &...args) { return cuda_impl::CountNumItemsToTheLeftOf(args...); };\n  RankItemCountImpl(sorted_items, wrapper, 10, static_cast<uint32_t>(0));\n  RankItemCountImpl(sorted_items, wrapper, 6, static_cast<uint32_t>(2));\n  RankItemCountImpl(sorted_items, wrapper, 4, static_cast<uint32_t>(3));\n  RankItemCountImpl(sorted_items, wrapper, 1, static_cast<uint32_t>(7));\n  RankItemCountImpl(sorted_items, wrapper, 0, static_cast<uint32_t>(12));\n}\n\nTEST(LambdaRank, RankItemCountOnRight) {\n  // Items sorted descendingly\n  std::vector<std::uint32_t> sorted_items{10, 10, 6, 4, 4, 4, 4, 1, 1, 1, 1, 1, 0};\n  auto wrapper = [](auto const &...args) { return cuda_impl::CountNumItemsToTheRightOf(args...); };\n  RankItemCountImpl(sorted_items, wrapper, 10, static_cast<uint32_t>(11));\n  RankItemCountImpl(sorted_items, wrapper, 6, static_cast<uint32_t>(10));\n  RankItemCountImpl(sorted_items, wrapper, 4, static_cast<uint32_t>(6));\n  RankItemCountImpl(sorted_items, wrapper, 1, static_cast<uint32_t>(1));\n  RankItemCountImpl(sorted_items, wrapper, 0, static_cast<uint32_t>(0));\n}\n\nTEST(LambdaRank, GPUMAPGPair) {\n  auto ctx = MakeCUDACtx(0);\n  TestMAPGPair(&ctx);\n}\n}  // namespace xgboost::obj\n"
  },
  {
    "path": "tests/cpp/objective/test_lambdarank_obj.h",
    "content": "/**\n * Copyright 2023-2025, XGBoost Contributors\n */\n#ifndef XGBOOST_OBJECTIVE_TEST_LAMBDARANK_OBJ_H_\n#define XGBOOST_OBJECTIVE_TEST_LAMBDARANK_OBJ_H_\n#include <gtest/gtest.h>\n#include <xgboost/data.h>                           // for MetaInfo\n#include <xgboost/host_device_vector.h>             // for HostDeviceVector\n#include <xgboost/linalg.h>                         // for All\n#include <xgboost/objective.h>                      // for ObjFunction\n\n#include <memory>                                   // for shared_ptr, make_shared\n\n#include \"../../../src/common/ranking_utils.h\"      // for LambdaRankParam, MAPCache\n#include \"../helpers.h\"                             // for EmptyDMatrix\n\nnamespace xgboost::obj {\nvoid TestMAPStat(Context const* ctx);\n\ninline void TestNDCGJsonIO(Context const* ctx) {\n  std::unique_ptr<xgboost::ObjFunction> obj{ObjFunction::Create(\"rank:ndcg\", ctx)};\n\n  obj->Configure(Args{});\n  Json j_obj{Object()};\n  obj->SaveConfig(&j_obj);\n\n  ASSERT_EQ(get<String>(j_obj[\"name\"]), \"rank:ndcg\");\n  auto const& j_param = j_obj[\"lambdarank_param\"];\n\n  ASSERT_EQ(get<String>(j_param[\"ndcg_exp_gain\"]), \"1\");\n  ASSERT_EQ(get<String>(j_param[\"lambdarank_num_pair_per_sample\"]),\n            std::to_string(ltr::LambdaRankParam::NotSet()));\n}\n\nvoid TestNDCGGPair(Context const* ctx);\n\nvoid TestUnbiasedNDCG(Context const* ctx);\n\nvoid TestMAPGPair(Context const* ctx);\n\n/**\n * \\brief Initialize test data for make pair tests.\n */\nvoid InitMakePairTest(Context const* ctx, MetaInfo* out_info, HostDeviceVector<float>* out_predt);\n}  // namespace xgboost::obj\n#endif  // XGBOOST_OBJECTIVE_TEST_LAMBDARANK_OBJ_H_\n"
  },
  {
    "path": "tests/cpp/objective/test_multiclass_obj.cc",
    "content": "/**\n * Copyright 2018-2025, XGBoost contributors\n */\n#include <xgboost/objective.h>\n#include <xgboost/context.h>\n#include \"../helpers.h\"\n#include \"test_multiclass_obj.h\"\n\nnamespace xgboost {\n\nvoid TestSoftmaxMultiClassObjGPair(const Context* ctx) {\n  std::vector<std::pair<std::string, std::string>> args {{\"num_class\", \"3\"}};\n  std::unique_ptr<ObjFunction> obj {\n    ObjFunction::Create(\"multi:softmax\", ctx)\n  };\n\n  obj->Configure(args);\n  CheckConfigReload(obj, \"multi:softmax\");\n\n  CheckObjFunction(obj,\n\t\t   {1.0f, 0.0f, 2.0f, 2.0f, 0.0f, 1.0f}, // preds\n\t\t   {1.0f, 0.0f},\t       // labels\n\t\t   {1.0f, 1.0f},\t       // weights\n\t\t   {0.24f, -0.91f, 0.66f, -0.33f, 0.09f, 0.24f}, // grad\n\t\t   {0.36f, 0.16f, 0.44f, 0.45f, 0.16f, 0.37f});\t // hess\n\n  CheckObjFunction(obj,\n\t\t   {1.0f, 0.0f, 2.0f, 2.0f, 0.0f, 1.0f}, // preds\n\t\t   {1.0f, 0.0f},\t       // labels\n                   {},                         // weights\n\t\t   {0.24f, -0.91f, 0.66f, -0.33f, 0.09f, 0.24f}, // grad\n\t\t   {0.36f, 0.16f, 0.44f, 0.45f, 0.16f, 0.37f});\t // hess\n\n  ASSERT_NO_THROW({ [[maybe_unused]] auto _ = obj->DefaultEvalMetric(); });\n}\n\nvoid TestSoftmaxMultiClassBasic(const Context* ctx) {\n  std::vector<std::pair<std::string, std::string>> args{\n      std::pair<std::string, std::string>(\"num_class\", \"3\")};\n\n  std::unique_ptr<ObjFunction> obj{ObjFunction::Create(\"multi:softmax\", ctx)};\n  obj->Configure(args);\n  CheckConfigReload(obj, \"multi:softmax\");\n\n  HostDeviceVector<bst_float>  io_preds = {2.0f, 0.0f, 1.0f,\n                                           1.0f, 0.0f, 2.0f};\n  std::vector<bst_float> out_preds = {0.0f, 2.0f};\n  obj->PredTransform(&io_preds);\n\n  auto& preds = io_preds.HostVector();\n\n  for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {\n    EXPECT_NEAR(preds[i], out_preds[i], 0.01f);\n  }\n}\n\nvoid TestSoftprobMultiClassBasic(const Context* ctx) {\n  std::vector<std::pair<std::string, std::string>> args {\n    std::pair<std::string, std::string>(\"num_class\", \"3\")};\n\n  std::unique_ptr<ObjFunction> obj {\n    ObjFunction::Create(\"multi:softprob\", ctx)\n  };\n  obj->Configure(args);\n  CheckConfigReload(obj, \"multi:softprob\");\n\n  HostDeviceVector<bst_float>  io_preds = {2.0f, 0.0f, 1.0f};\n  std::vector<bst_float> out_preds = {0.66524096f, 0.09003057f, 0.24472847f};\n\n  obj->PredTransform(&io_preds);\n  auto& preds = io_preds.HostVector();\n\n  for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {\n    EXPECT_NEAR(preds[i], out_preds[i], 0.01f);\n  }\n}\n\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/objective/test_multiclass_obj.h",
    "content": "/**\n * Copyright 2020-2023 by XGBoost Contributors\n */\n#ifndef XGBOOST_TEST_MULTICLASS_OBJ_H_\n#define XGBOOST_TEST_MULTICLASS_OBJ_H_\n\n#include <xgboost/context.h>  // for Context\n\nnamespace xgboost {\n\nvoid TestSoftmaxMultiClassObjGPair(const Context* ctx);\n\nvoid TestSoftmaxMultiClassBasic(const Context* ctx);\n\nvoid TestSoftprobMultiClassBasic(const Context* ctx);\n\n}  // namespace xgboost\n\n#endif  // XGBOOST_TEST_MULTICLASS_OBJ_H_\n"
  },
  {
    "path": "tests/cpp/objective/test_multiclass_obj_cpu.cc",
    "content": "/*!\n * Copyright 2018-2023 XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/context.h>\n\n#include \"../helpers.h\"\n#include \"test_multiclass_obj.h\"\n\nnamespace xgboost {\nTEST(Objective, DeclareUnifiedTest(SoftmaxMultiClassObjGPair)) {\n  Context ctx = MakeCUDACtx(GPUIDX);\n  TestSoftmaxMultiClassObjGPair(&ctx);\n}\n\nTEST(Objective, DeclareUnifiedTest(SoftmaxMultiClassBasic)) {\n  auto ctx = MakeCUDACtx(GPUIDX);\n  TestSoftmaxMultiClassBasic(&ctx);\n}\n\nTEST(Objective, DeclareUnifiedTest(SoftprobMultiClassBasic)) {\n  Context ctx = MakeCUDACtx(GPUIDX);\n  TestSoftprobMultiClassBasic(&ctx);\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/objective/test_multiclass_obj_gpu.cu",
    "content": "#include \"test_multiclass_obj_cpu.cc\"\n"
  },
  {
    "path": "tests/cpp/objective/test_objective.cc",
    "content": "/**\n * Copyright 2016-2023 by XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/context.h>\n#include <xgboost/objective.h>\n\n#include \"../helpers.h\"\n#include \"../objective_helpers.h\"\n\nTEST(Objective, UnknownFunction) {\n  xgboost::ObjFunction* obj = nullptr;\n  xgboost::Context tparam;\n  std::vector<std::pair<std::string, std::string>> args;\n  tparam.UpdateAllowUnknown(args);\n\n  EXPECT_ANY_THROW(obj = xgboost::ObjFunction::Create(\"unknown_name\", &tparam));\n  EXPECT_NO_THROW(obj = xgboost::ObjFunction::Create(\"reg:squarederror\", &tparam));\n  if (obj) {\n    delete obj;\n  }\n}\n\nnamespace xgboost {\nTEST(Objective, PredTransform) {\n  // Test that show PredTransform uses the same device with predictor.\n  xgboost::Context tparam;\n  tparam.UpdateAllowUnknown(Args{{\"device\", \"cuda\"}});\n  size_t n = 100;\n\n  for (const auto& entry : ::dmlc::Registry<::xgboost::ObjFunctionReg>::List()) {\n    std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create(entry->name, &tparam)};\n    if (entry->name.find(\"multi\") != std::string::npos) {\n      obj->Configure(Args{{\"num_class\", \"2\"}});\n    }\n    if (entry->name.find(\"quantile\") != std::string::npos) {\n      obj->Configure(Args{{\"quantile_alpha\", \"0.5\"}});\n    }\n    if (entry->name.find(\"expectile\") != std::string::npos) {\n      obj->Configure(Args{{\"expectile_alpha\", \"0.5\"}});\n    }\n    HostDeviceVector<float> predts;\n    predts.Resize(n, 3.14f);  // prediction is performed on host.\n    ASSERT_FALSE(predts.DeviceCanRead());\n    obj->PredTransform(&predts);\n    ASSERT_FALSE(predts.DeviceCanRead());\n    ASSERT_TRUE(predts.HostCanWrite());\n  }\n}\n\nclass TestDefaultObjConfig : public ::testing::TestWithParam<std::string> {\n  Context ctx_;\n\n public:\n  void Run(std::string objective) {\n    auto Xy = MakeFmatForObjTest(objective, 10, 10, 3);\n    std::unique_ptr<Learner> learner{Learner::Create({Xy})};\n    std::unique_ptr<ObjFunction> objfn{ObjFunction::Create(objective, &ctx_)};\n\n    learner->SetParam(\"objective\", objective);\n    if (objective.find(\"multi\") != std::string::npos) {\n      learner->SetParam(\"num_class\", \"3\");\n      objfn->Configure(Args{{\"num_class\", \"3\"}});\n    } else if (objective.find(\"quantile\") != std::string::npos) {\n      learner->SetParam(\"quantile_alpha\", \"0.5\");\n      objfn->Configure(Args{{\"quantile_alpha\", \"0.5\"}});\n    } else if (objective.find(\"expectile\") != std::string::npos) {\n      learner->SetParam(\"expectile_alpha\", \"0.5\");\n      objfn->Configure(Args{{\"expectile_alpha\", \"0.5\"}});\n    } else {\n      objfn->Configure(Args{});\n    }\n    learner->Configure();\n    learner->UpdateOneIter(0, Xy);\n    learner->EvalOneIter(0, {Xy}, {\"train\"});\n    Json config{Object{}};\n    learner->SaveConfig(&config);\n    auto jobj = get<Object const>(config[\"learner\"][\"objective\"]);\n\n    ASSERT_TRUE(jobj.find(\"name\") != jobj.cend());\n    // FIXME(jiamingy): We should have the following check, but some legacy parameter like\n    // \"pos_weight\", \"delta_step\" in objectives are not in metrics.\n\n    // if (jobj.size() > 1) {\n    //   ASSERT_FALSE(IsA<Null>(objfn->DefaultMetricConfig()));\n    // }\n    auto mconfig = objfn->DefaultMetricConfig();\n    if (!IsA<Null>(mconfig)) {\n      // make sure metric can handle it\n      std::unique_ptr<Metric> metricfn{Metric::Create(get<String const>(mconfig[\"name\"]), &ctx_)};\n      metricfn->LoadConfig(mconfig);\n      Json loaded(Object{});\n      metricfn->SaveConfig(&loaded);\n      metricfn->Configure(Args{});\n      ASSERT_EQ(mconfig, loaded);\n    }\n  }\n};\n\nTEST_P(TestDefaultObjConfig, Objective) {\n  std::string objective = GetParam();\n  this->Run(objective);\n}\n\nINSTANTIATE_TEST_SUITE_P(Objective, TestDefaultObjConfig,\n                         ::testing::ValuesIn(MakeObjNamesForTest()),\n                         [](const ::testing::TestParamInfo<TestDefaultObjConfig::ParamType>& info) {\n                           return ObjTestNameGenerator(info);\n                         });\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/objective/test_objective_helpers.h",
    "content": "/**\n * Copyright 2026, XGBoost contributors\n */\n#pragma once\n\n#include <xgboost/base.h>                // for bst_node_t, bst_idx_t\n#include <xgboost/data.h>                // for MetaInfo\n#include <xgboost/host_device_vector.h>  // for HostDeviceVector\n\n#include <numeric>  // for iota\n\nnamespace xgboost {\n// Generate node position for two nodes.\ninline auto MakePositionsForTest(bst_idx_t n_samples, bst_node_t left_nidx, bst_node_t right_nidx,\n                                 HostDeviceVector<bst_node_t>* p_position) {\n  HostDeviceVector<bst_node_t>& position = *p_position;\n  position.Resize(n_samples, 0);\n  auto& h_position = position.HostVector();\n  for (size_t i = 0; i < n_samples; ++i) {\n    if (i < n_samples / 2) {\n      h_position[i] = left_nidx;\n    } else {\n      h_position[i] = right_nidx;\n    }\n  }\n}\n\ninline void MakeIotaLabelsForTest(bst_idx_t n_samples, bst_target_t n_targets, MetaInfo* p_info) {\n  auto& info = *p_info;\n  std::vector<float> labels(n_samples * n_targets);\n  std::iota(labels.begin(), labels.end(), 0.0f);\n  info.labels.Reshape(n_samples, n_targets);\n  info.labels.Data()->HostVector() = labels;\n  info.num_row_ = n_samples;\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/objective/test_quantile_obj.cc",
    "content": "/**\n * Copyright 2017-2026, XGBoost contributors\n */\n#include \"test_quantile_obj.h\"\n\n#include <xgboost/base.h>       // for Args\n#include <xgboost/context.h>    // for Context\n#include <xgboost/data.h>       // for MetaInfo\n#include <xgboost/objective.h>  // for ObjFunction\n#include <xgboost/span.h>       // for Span\n\n#include <memory>  // for unique_ptr\n#include <vector>  // for vector\n\n#include \"../helpers.h\"  // CheckConfigReload,MakeCUDACtx,DeclareUnifiedTest\n\nnamespace xgboost {\nvoid TestQuantile(Context const* ctx) {\n  {\n    Args args{{\"quantile_alpha\", \"[0.6, 0.8]\"}};\n    std::unique_ptr<ObjFunction> obj{ObjFunction::Create(\"reg:quantileerror\", ctx)};\n    obj->Configure(args);\n    CheckConfigReload(obj, \"reg:quantileerror\");\n  }\n\n  Args args{{\"quantile_alpha\", \"0.6\"}};\n  std::unique_ptr<ObjFunction> obj{ObjFunction::Create(\"reg:quantileerror\", ctx)};\n  obj->Configure(args);\n  CheckConfigReload(obj, \"reg:quantileerror\");\n\n  std::vector<float> predts{1.0f, 2.0f, 3.0f};\n  std::vector<float> labels{3.0f, 2.0f, 1.0f};\n  std::vector<float> weights{1.0f, 1.0f, 1.0f};\n  std::vector<float> grad{-0.6f, 0.4f, 0.4f};\n  std::vector<float> hess = weights;\n  CheckObjFunction(obj, predts, labels, weights, grad, hess);\n}\n\nvoid TestQuantileIntercept(Context const* ctx) {\n  Args args{{\"quantile_alpha\", \"[0.6, 0.8]\"}};\n  std::unique_ptr<ObjFunction> obj{ObjFunction::Create(\"reg:quantileerror\", ctx)};\n  obj->Configure(args);\n\n  MetaInfo info;\n  info.num_row_ = 10;\n  info.labels.ModifyInplace([&](HostDeviceVector<float>* data, common::Span<std::size_t> shape) {\n    data->SetDevice(ctx->Device());\n    data->Resize(info.num_row_);\n    shape[0] = info.num_row_;\n    shape[1] = 1;\n\n    auto& h_labels = data->HostVector();\n    for (std::size_t i = 0; i < info.num_row_; ++i) {\n      h_labels[i] = i;\n    }\n  });\n\n  linalg::Vector<float> base_scores;\n  obj->InitEstimation(info, &base_scores);\n  ASSERT_EQ(base_scores.Size(), 2);\n  ASSERT_NEAR(base_scores(0), 5.6, kRtEps);\n  ASSERT_NEAR(base_scores(1), 7.8, kRtEps);\n\n  for (std::size_t i = 0; i < info.num_row_; ++i) {\n    info.weights_.HostVector().emplace_back(info.num_row_ - i - 1.0);\n  }\n\n  obj->InitEstimation(info, &base_scores);\n  ASSERT_EQ(base_scores.Size(), 2);\n  ASSERT_NEAR(base_scores(0), 3.0, kRtEps);\n  ASSERT_NEAR(base_scores(1), 5.0, kRtEps);\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/objective/test_quantile_obj.h",
    "content": "/**\n * Copyright 2020-2026, XGBoost Contributors\n */\n#ifndef XGBOOST_TEST_QUANTILE_OBJ_H_\n#define XGBOOST_TEST_QUANTILE_OBJ_H_\n\n#include <xgboost/context.h>  // for Context\n\nnamespace xgboost {\n\nvoid TestQuantile(Context const* ctx);\n\nvoid TestQuantileIntercept(Context const* ctx);\n\n}  // namespace xgboost\n\n#endif  // XGBOOST_TEST_REGRESSION_OBJ_H_\n"
  },
  {
    "path": "tests/cpp/objective/test_quantile_obj_cpu.cc",
    "content": "/**\n * Copyright 2024-2026, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/context.h>\n\n#include \"../helpers.h\"\n#include \"test_quantile_obj.h\"\n#include \"test_regression_obj.h\"  // for TestVectorLeafObj\n\nnamespace xgboost {\nTEST(Objective, DeclareUnifiedTest(Quantile)) {\n  Context ctx = MakeCUDACtx(GPUIDX);\n  TestQuantile(&ctx);\n}\n\nTEST(Objective, DeclareUnifiedTest(QuantileIntercept)) {\n  Context ctx = MakeCUDACtx(GPUIDX);\n  TestQuantileIntercept(&ctx);\n}\n\nTEST(Objective, DeclareUnifiedTest(QuantileVectorLeaf)) {\n  Context ctx = MakeCUDACtx(GPUIDX);\n  bst_idx_t n_samples = 10;\n  std::vector<float> sol_left{1.0f, 4.0f, 7.0f};\n  std::vector<float> sol_right{11.0f, 14.0f, 17.0f};\n  Args args{{\"quantile_alpha\", \"[0.25, 0.5, 0.75]\"}};\n  TestVectorLeafObj(&ctx, \"reg:quantileerror\", args, n_samples, 1u, sol_left, sol_right);\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/objective/test_quantile_obj_gpu.cu",
    "content": "/**\n * Copyright 2023 XGBoost contributors\n */\n// Dummy file to enable the CUDA tests.\n#include \"test_quantile_obj_cpu.cc\"\n"
  },
  {
    "path": "tests/cpp/objective/test_regression_obj.cc",
    "content": "/**\n * Copyright 2017-2026, XGBoost contributors\n */\n#include \"test_regression_obj.h\"\n\n#include <gtest/gtest.h>\n#include <xgboost/context.h>\n#include <xgboost/json.h>\n#include <xgboost/objective.h>\n#include <xgboost/tree_model.h>  // for RegTree\n\n#include <memory>   // for unique_ptr\n#include <numeric>  // for iota\n#include <utility>  // for pair\n\n#include \"../../../src/common/linalg_op.h\"  // for begin, end\n#include \"../../../src/tree/param.h\"        // for TrainParam\n#include \"../../../src/tree/tree_view.h\"    // for MultiTargetTreeView\n#include \"../helpers.h\"\n#include \"../tree/test_multi_target_tree_model.h\"  // for MakeMtTreeForTest\n#include \"test_objective_helpers.h\"  // for MakePositionsForTest, MakeIotaLabelsForTest\n#include \"xgboost/base.h\"\n#include \"xgboost/data.h\"\n#include \"xgboost/linalg.h\"\n#include \"xgboost/tree_model.h\"  // for RegTree\n\nnamespace xgboost {\nnamespace {\nvoid CheckProbaToMargin(std::unique_ptr<ObjFunction> const& obj, float in, float expect,\n                        float abs_error = 1e-2f) {\n  linalg::Vector<float> t{{in}, {1}, obj->Ctx()->Device()};\n  obj->ProbToMargin(&t);\n  ASSERT_NEAR(t(0), expect, abs_error);\n}\n}  // namespace\n\nvoid TestLinearRegressionGPair(const Context* ctx) {\n  std::string obj_name = \"reg:squarederror\";\n\n  std::vector<std::pair<std::string, std::string>> args;\n  std::unique_ptr<ObjFunction> obj{ObjFunction::Create(obj_name, ctx)};\n\n  obj->Configure(args);\n  // clang-format off\n  CheckObjFunction(obj,\n                   {0, 0.1f, 0.9f,   1,    0,  0.1f, 0.9f,  1},\n                   {0,   0,   0,   0,    1,    1,    1, 1},\n                   {1,   1,   1,   1,    1,    1,    1, 1},\n                   {0, 0.1f, 0.9f, 1.0f, -1.0f, -0.9f, -0.1f, 0},\n                   {1,   1,   1,   1,    1,    1,    1, 1});\n  CheckObjFunction(obj,\n                   {0, 0.1f, 0.9f,   1,    0,  0.1f, 0.9f,  1},\n                   {0,   0,   0,   0,    1,    1,    1, 1},\n                   {},  // empty weight\n                   {0, 0.1f, 0.9f, 1.0f, -1.0f, -0.9f, -0.1f, 0},\n                   {1,   1,   1,   1,    1,    1,    1, 1});\n  // clang-format on\n  ASSERT_NO_THROW({ [[maybe_unused]] auto _ = obj->DefaultEvalMetric(); });\n}\n\nvoid TestSquaredLog(const Context* ctx) {\n  std::string obj_name = \"reg:squaredlogerror\";\n  std::vector<std::pair<std::string, std::string>> args;\n\n  std::unique_ptr<ObjFunction> obj{ObjFunction::Create(obj_name, ctx)};\n  obj->Configure(args);\n  CheckConfigReload(obj, obj_name);\n  // clang-format off\n  CheckObjFunction(obj,\n                   {0.1f, 0.2f, 0.4f, 0.8f, 1.6f},  // pred\n                   {1.0f, 1.0f, 1.0f, 1.0f, 1.0f},  // labels\n                   {1.0f, 1.0f, 1.0f, 1.0f, 1.0f},  // weights\n                   {-0.5435f, -0.4257f, -0.25475f, -0.05855f, 0.1009f},\n                   { 1.3205f,  1.0492f,  0.69215f,  0.34115f, 0.1091f});\n  CheckObjFunction(obj,\n                   {0.1f, 0.2f, 0.4f, 0.8f, 1.6f},  // pred\n                   {1.0f, 1.0f, 1.0f, 1.0f, 1.0f},  // labels\n                   {},                              // empty weights\n                   {-0.5435f, -0.4257f, -0.25475f, -0.05855f, 0.1009f},\n                   { 1.3205f,  1.0492f,  0.69215f,  0.34115f, 0.1091f});\n  // clang-format on\n  ASSERT_EQ(obj->DefaultEvalMetric(), std::string{\"rmsle\"});\n}\n\nvoid TestLogisticRegressionGPair(const Context* ctx) {\n  std::string obj_name = \"reg:logistic\";\n  std::vector<std::pair<std::string, std::string>> args;\n  std::unique_ptr<ObjFunction> obj{ObjFunction::Create(obj_name, ctx)};\n\n  obj->Configure(args);\n  CheckConfigReload(obj, obj_name);\n  // clang-format off\n  CheckObjFunction(obj,\n                   {   0,  0.1f,  0.9f,    1,    0,   0.1f,  0.9f,      1},  // preds\n                   {   0,    0,    0,    0,    1,     1,     1,     1},  // labels\n                   {   1,    1,    1,    1,    1,     1,     1,     1},  // weights\n                   { 0.5f, 0.52f, 0.71f, 0.73f, -0.5f, -0.47f, -0.28f, -0.26f},  // out_grad\n                   {0.25f, 0.24f, 0.20f, 0.19f, 0.25f,  0.24f,  0.20f,  0.19f});  // out_hess\n  // clang-format on\n}\n\nvoid TestLogisticRegressionBasic(const Context* ctx) {\n  std::string obj_name = \"reg:logistic\";\n  std::vector<std::pair<std::string, std::string>> args;\n  std::unique_ptr<ObjFunction> obj{ObjFunction::Create(obj_name, ctx)};\n\n  obj->Configure(args);\n  CheckConfigReload(obj, obj_name);\n\n  // test label validation\n  EXPECT_ANY_THROW(CheckObjFunction(obj, {0}, {10}, {1}, {0}, {0}))\n      << \"Expected error when label not in range [0,1f] for LogisticRegression\";\n\n  // test ProbToMargin\n  CheckProbaToMargin(obj, 0.1f, -2.197f);\n  CheckProbaToMargin(obj, 0.5f, 0);\n  CheckProbaToMargin(obj, 0.9f, 2.197f);\n  ASSERT_THAT([&] { CheckProbaToMargin(obj, 10, 0); }, GMockThrow(\"base_score must be in (0,1)\"));\n\n  // test PredTransform\n  HostDeviceVector<bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};\n  std::vector<bst_float> out_preds = {0.5f, 0.524f, 0.622f, 0.710f, 0.731f};\n  obj->PredTransform(&io_preds);\n  auto& preds = io_preds.HostVector();\n  for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {\n    EXPECT_NEAR(preds[i], out_preds[i], 0.01f);\n  }\n}\n\nvoid TestsLogisticRawGPair(const Context* ctx) {\n  std::string obj_name = \"binary:logitraw\";\n  std::vector<std::pair<std::string, std::string>> args;\n  std::unique_ptr<ObjFunction> obj{ObjFunction::Create(obj_name, ctx)};\n  obj->Configure(args);\n  // clang-format off\n  CheckObjFunction(obj,\n                   {   0,  0.1f,  0.9f,    1,    0,   0.1f,   0.9f,     1},\n                   {   0,    0,    0,    0,    1,     1,     1,     1},\n                   {   1,    1,    1,    1,    1,     1,     1,     1},\n                   { 0.5f, 0.52f, 0.71f, 0.73f, -0.5f, -0.47f, -0.28f, -0.26f},\n                   {0.25f, 0.24f, 0.20f, 0.19f, 0.25f,  0.24f,  0.20f,  0.19f});\n  // clang-format on\n}\n\nvoid TestPoissonRegressionGPair(const Context* ctx) {\n  std::vector<std::pair<std::string, std::string>> args;\n  std::unique_ptr<ObjFunction> obj{ObjFunction::Create(\"count:poisson\", ctx)};\n\n  args.emplace_back(\"max_delta_step\", \"0.1f\");\n  obj->Configure(args);\n  // clang-format off\n  CheckObjFunction(obj,\n                   {   0,  0.1f,  0.9f,    1,    0,  0.1f,  0.9f,    1},\n                   {   0,    0,    0,    0,    1,    1,    1,    1},\n                   {   1,    1,    1,    1,    1,    1,    1,    1},\n                   {   1, 1.10f, 2.45f, 2.71f,    0, 0.10f, 1.45f, 1.71f},\n                   {1.10f, 1.22f, 2.71f, 3.00f, 1.10f, 1.22f, 2.71f, 3.00f});\n  CheckObjFunction(obj,\n                   {   0,  0.1f,  0.9f,    1,    0,  0.1f,  0.9f,    1},\n                   {   0,    0,    0,    0,    1,    1,    1,    1},\n                   {},  // Empty weight\n                   {   1, 1.10f, 2.45f, 2.71f,    0, 0.10f, 1.45f, 1.71f},\n                   {1.10f, 1.22f, 2.71f, 3.00f, 1.10f, 1.22f, 2.71f, 3.00f});\n  // clang-format on\n}\n\nvoid TestPoissonRegressionBasic(const Context* ctx) {\n  std::vector<std::pair<std::string, std::string>> args;\n  std::unique_ptr<ObjFunction> obj{ObjFunction::Create(\"count:poisson\", ctx)};\n\n  obj->Configure(args);\n  CheckConfigReload(obj, \"count:poisson\");\n\n  // test label validation\n  EXPECT_ANY_THROW(CheckObjFunction(obj, {0}, {-1}, {1}, {0}, {0}))\n      << \"Expected error when label < 0 for PoissonRegression\";\n\n  // test ProbToMargin\n  CheckProbaToMargin(obj, 0.1f, -2.30f);\n  CheckProbaToMargin(obj, 0.5f, -0.69f);\n  CheckProbaToMargin(obj, 0.9f, -0.10f);\n\n  // test PredTransform\n  HostDeviceVector<bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};\n  std::vector<bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f};\n  obj->PredTransform(&io_preds);\n  auto& preds = io_preds.HostVector();\n  for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {\n    EXPECT_NEAR(preds[i], out_preds[i], 0.01f);\n  }\n}\n\nvoid TestGammaRegressionGPair(const Context* ctx) {\n  std::vector<std::pair<std::string, std::string>> args;\n  std::unique_ptr<ObjFunction> obj{ObjFunction::Create(\"reg:gamma\", ctx)};\n\n  obj->Configure(args);\n  // clang-format off\n  CheckObjFunction(obj,\n                   {0, 0.1f, 0.9f, 1, 0,  0.1f,  0.9f,    1},\n                   {2,   2,   2,   2, 1,    1,    1,    1},\n                   {1,   1,   1,   1, 1,    1,    1,    1},\n                   {-1,  -0.809, 0.187, 0.264, 0, 0.09f, 0.59f, 0.63f},\n                   {2,   1.809,  0.813, 0.735, 1, 0.90f, 0.40f, 0.36f});\n  CheckObjFunction(obj,\n                   {0, 0.1f, 0.9f, 1, 0,  0.1f,  0.9f,    1},\n                   {2,   2,   2,   2, 1,    1,    1,    1},\n                   {},  // Empty weight\n                   {-1,  -0.809, 0.187, 0.264, 0, 0.09f, 0.59f, 0.63f},\n                   {2,   1.809,  0.813, 0.735, 1, 0.90f, 0.40f, 0.36f});\n  // clang-format on\n}\n\nvoid TestGammaRegressionBasic(const Context* ctx) {\n  std::vector<std::pair<std::string, std::string>> args;\n  std::unique_ptr<ObjFunction> obj{ObjFunction::Create(\"reg:gamma\", ctx)};\n\n  obj->Configure(args);\n  CheckConfigReload(obj, \"reg:gamma\");\n\n  // test label validation\n  EXPECT_ANY_THROW(CheckObjFunction(obj, {0}, {0}, {1}, {0}, {0}))\n      << \"Expected error when label = 0 for GammaRegression\";\n  EXPECT_ANY_THROW(CheckObjFunction(obj, {-1}, {-1}, {1}, {-1}, {-3}))\n      << \"Expected error when label < 0 for GammaRegression\";\n\n  // test ProbToMargin\n  CheckProbaToMargin(obj, 0.1f, -2.30f);\n  CheckProbaToMargin(obj, 0.5f, -0.69f);\n  CheckProbaToMargin(obj, 0.9f, -0.10f);\n\n  // test PredTransform\n  HostDeviceVector<bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};\n  std::vector<bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f};\n  obj->PredTransform(&io_preds);\n  auto& preds = io_preds.HostVector();\n  for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {\n    EXPECT_NEAR(preds[i], out_preds[i], 0.01f);\n  }\n}\n\nvoid TestTweedieRegressionGPair(const Context* ctx) {\n  std::vector<std::pair<std::string, std::string>> args;\n  std::unique_ptr<ObjFunction> obj{ObjFunction::Create(\"reg:tweedie\", ctx)};\n\n  args.emplace_back(\"tweedie_variance_power\", \"1.1f\");\n  obj->Configure(args);\n  // clang-format off\n  CheckObjFunction(obj,\n                   {   0,  0.1f,  0.9f,    1, 0,  0.1f,  0.9f,    1},\n                   {   0,    0,    0,    0, 1,    1,    1,    1},\n                   {   1,    1,    1,    1, 1,    1,    1,    1},\n                   {   1, 1.09f, 2.24f, 2.45f, 0, 0.10f, 1.33f, 1.55f},\n                   {0.89f, 0.98f, 2.02f, 2.21f, 1, 1.08f, 2.11f, 2.30f});\n  CheckObjFunction(obj,\n                   {   0,  0.1f,  0.9f,    1, 0,  0.1f,  0.9f,    1},\n                   {   0,    0,    0,    0, 1,    1,    1,    1},\n                   {},  // Empty weight.\n                   {   1, 1.09f, 2.24f, 2.45f, 0, 0.10f, 1.33f, 1.55f},\n                   {0.89f, 0.98f, 2.02f, 2.21f, 1, 1.08f, 2.11f, 2.30f});\n  // clang-format on\n  ASSERT_EQ(obj->DefaultEvalMetric(), std::string{\"tweedie-nloglik@1.1\"});\n}\n\nvoid TestTweedieRegressionBasic(const Context* ctx) {\n  std::vector<std::pair<std::string, std::string>> args;\n  std::unique_ptr<ObjFunction> obj{ObjFunction::Create(\"reg:tweedie\", ctx)};\n\n  obj->Configure(args);\n  CheckConfigReload(obj, \"reg:tweedie\");\n\n  // test label validation\n  EXPECT_ANY_THROW(CheckObjFunction(obj, {0}, {-1}, {1}, {0}, {0}))\n      << \"Expected error when label < 0 for TweedieRegression\";\n\n  // test ProbToMargin\n  CheckProbaToMargin(obj, 0.1f, -2.30f);\n  CheckProbaToMargin(obj, 0.5f, -0.69f);\n  CheckProbaToMargin(obj, 0.9f, -0.10f);\n\n  // test PredTransform\n  HostDeviceVector<bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};\n  std::vector<bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f};\n  obj->PredTransform(&io_preds);\n  auto& preds = io_preds.HostVector();\n  for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {\n    EXPECT_NEAR(preds[i], out_preds[i], 0.01f);\n  }\n}\n\nvoid TestCoxRegressionGPair(const Context* ctx) {\n  std::vector<std::pair<std::string, std::string>> args;\n  std::unique_ptr<ObjFunction> obj{ObjFunction::Create(\"survival:cox\", ctx)};\n\n  obj->Configure(args);\n  // clang-format off\n  CheckObjFunction(obj,\n                   { 0, 0.1f, 0.9f,       1,       0,    0.1f,   0.9f,       1},\n                   { 0,   -2,   -2,       2,       3,       5,    -10,     100},\n                   { 1,    1,    1,       1,       1,       1,      1,       1},\n                   { 0,    0,    0, -0.799f, -0.788f, -0.590f, 0.910f,  1.006f},\n                   { 0,    0,    0,  0.160f,  0.186f,  0.348f, 0.610f,  0.639f});\n  // clang-format on\n}\n\nvoid TestAbsoluteError(const Context* ctx) {\n  std::unique_ptr<ObjFunction> obj{ObjFunction::Create(\"reg:absoluteerror\", ctx)};\n  obj->Configure({});\n  CheckConfigReload(obj, \"reg:absoluteerror\");\n\n  MetaInfo info;\n  std::vector<float> labels{0.f, 3.f, 2.f, 5.f, 4.f, 7.f};\n  info.labels.Reshape(6, 1);\n  info.labels.Data()->HostVector() = labels;\n  info.num_row_ = labels.size();\n\n  HostDeviceVector<float> predt{1.f, 2.f, 3.f, 4.f, 5.f, 6.f};\n  info.weights_.HostVector() = {1.f, 1.f, 1.f, 1.f, 1.f, 1.f};\n\n  CheckObjFunction(obj, predt.HostVector(), labels, info.weights_.HostVector(),\n                   {1.f, -1.f, 1.f, -1.f, 1.f, -1.f}, info.weights_.HostVector());\n\n  RegTree tree;\n  tree.ExpandNode(0, /*split_index=*/1, 2, true, 0.0f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.f);\n  bst_node_t left_nidx = tree.LeftChild(RegTree::kRoot);\n  bst_node_t right_nidx = tree.RightChild(RegTree::kRoot);\n\n  HostDeviceVector<bst_node_t> position;\n  MakePositionsForTest(info.num_row_, left_nidx, right_nidx, &position);\n\n  auto& h_predt = predt.HostVector();\n  for (size_t i = 0; i < h_predt.size(); ++i) {\n    h_predt[i] = labels[i] + i;\n  }\n\n  tree::TrainParam param;\n  param.Init(Args{});\n  auto lr = param.learning_rate;\n\n  obj->UpdateTreeLeaf(position, info, lr, predt, 0, &tree);\n  ASSERT_EQ(tree[1].LeafValue(), -1.0f * lr);\n  ASSERT_EQ(tree[2].LeafValue(), -4.0f * lr);\n}\n\nvoid TestAbsoluteErrorLeaf(const Context* ctx) {\n  bst_target_t constexpr kTargets = 3, kRows = 16;\n  std::unique_ptr<ObjFunction> obj{ObjFunction::Create(\"reg:absoluteerror\", ctx)};\n  obj->Configure({});\n\n  MetaInfo info;\n  info.num_row_ = kRows;\n  info.labels.Reshape(16, kTargets);\n  HostDeviceVector<float> predt(info.labels.Size());\n\n  for (bst_target_t t{0}; t < kTargets; ++t) {\n    auto h_labels = info.labels.HostView().Slice(linalg::All(), t);\n    std::iota(linalg::begin(h_labels), linalg::end(h_labels), .0f);\n\n    auto h_predt =\n        linalg::MakeTensorView(ctx, predt.HostSpan(), kRows, kTargets).Slice(linalg::All(), t);\n    for (size_t i = 0; i < h_predt.Size(); ++i) {\n      h_predt(i) = h_labels(i) + i;\n    }\n\n    HostDeviceVector<bst_node_t> position(h_labels.Size(), 0);\n    auto& h_position = position.HostVector();\n    for (int32_t i = 0; i < 3; ++i) {\n      h_position[i] = ~i;  // negation for sampled nodes.\n    }\n    for (size_t i = 3; i < 8; ++i) {\n      h_position[i] = 3;\n    }\n    // empty leaf for node 4\n    for (size_t i = 8; i < 13; ++i) {\n      h_position[i] = 5;\n    }\n    for (size_t i = 13; i < h_labels.Size(); ++i) {\n      h_position[i] = 6;\n    }\n\n    RegTree tree;\n    tree.ExpandNode(0, /*split_index=*/1, 2, true, 0.0f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.f);\n    tree.ExpandNode(1, /*split_index=*/1, 2, true, 0.0f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.f);\n    tree.ExpandNode(2, /*split_index=*/1, 2, true, 0.0f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.f);\n    ASSERT_EQ(tree.GetNumLeaves(), 4);\n\n    auto empty_leaf = tree[4].LeafValue();\n\n    tree::TrainParam param;\n    param.Init(Args{});\n    auto lr = param.learning_rate;\n\n    obj->UpdateTreeLeaf(position, info, lr, predt, t, &tree);\n    ASSERT_EQ(tree[3].LeafValue(), -5.0f * lr);\n    ASSERT_EQ(tree[4].LeafValue(), empty_leaf * lr);\n    ASSERT_EQ(tree[5].LeafValue(), -10.0f * lr);\n    ASSERT_EQ(tree[6].LeafValue(), -14.0f * lr);\n  }\n}\n\nvoid TestVectorLeafObj(Context const* ctx, std::string name, Args const& args, bst_idx_t n_samples,\n                       bst_idx_t n_target_labels, std::vector<float> const& sol_left,\n                       std::vector<float> const& sol_right) {\n  std::unique_ptr<ObjFunction> obj{ObjFunction::Create(name, ctx)};\n  obj->Configure(args);\n\n  bst_target_t n_targets = 3;\n  auto tree = MakeMtTreeForTest(n_targets);\n\n  bst_node_t left_nidx = tree->LeftChild(RegTree::kRoot);\n  bst_node_t right_nidx = tree->RightChild(RegTree::kRoot);\n\n  MetaInfo info;\n  MakeIotaLabelsForTest(n_samples, n_target_labels, &info);\n  HostDeviceVector<bst_node_t> position;\n  MakePositionsForTest(info.num_row_, left_nidx, right_nidx, &position);\n\n  HostDeviceVector<float> predt(info.labels.Shape(0) * n_targets, 0.0f);\n\n  auto lr = 2.0f;\n  obj->UpdateTreeLeaf(position, info, lr, predt, 0, tree.get());\n\n  auto mt_tree = tree->HostMtView();\n  auto left = mt_tree.LeafValue(mt_tree.LeftChild(RegTree::kRoot));\n  auto right = mt_tree.LeafValue(mt_tree.RightChild(RegTree::kRoot));\n\n  for (std::size_t i = 0; i < left.Size(); ++i) {\n    ASSERT_FLOAT_EQ(left(i), sol_left[i]);\n    ASSERT_FLOAT_EQ(right(i), sol_right[i]);\n  }\n}\n\nvoid TestExpectileRegressionGPair(const Context* ctx) {\n  Args args{{\"expectile_alpha\", \"0.8\"}};\n\n  std::unique_ptr<ObjFunction> obj{ObjFunction::Create(\"reg:expectileerror\", ctx)};\n  obj->Configure(args);\n  CheckConfigReload(obj, \"reg:expectileerror\");\n\n  std::vector<float> predts{1.0f, 2.0f, 3.0f};\n  std::vector<float> labels{3.0f, 2.0f, 1.0f};\n  std::vector<float> weights{1.0f, 1.0f, 1.0f};\n  std::vector<float> grad{-1.6f, 0.0f, 0.4f};\n  std::vector<float> hess{0.8f, 0.2f, 0.2f};\n  CheckObjFunction(obj, predts, labels, weights, grad, hess);\n  CheckObjFunction(obj, predts, labels, {}, grad, hess);\n\n  ASSERT_EQ(obj->DefaultEvalMetric(), std::string{\"expectile\"});\n}\n\nvoid TestExpectileRegressionMultiAlpha(const Context* ctx) {\n  Args args{{\"expectile_alpha\", \"[0.2, 0.8]\"}};\n\n  std::unique_ptr<ObjFunction> obj{ObjFunction::Create(\"reg:expectileerror\", ctx)};\n  obj->Configure(args);\n  CheckConfigReload(obj, \"reg:expectileerror\");\n\n  std::vector<float> predts{0.0f, 0.0f, 0.0f, 0.0f};\n  std::vector<float> labels{1.0f, 2.0f};\n  std::vector<float> grad{-0.2f, -0.8f, -0.4f, -1.6f};\n  std::vector<float> hess{0.2f, 0.8f, 0.2f, 0.8f};\n  CheckObjFunction(obj, predts, labels, {}, grad, hess);\n}\n\nvoid TestExpectileRegressionInitEstimation(const Context* ctx) {\n  Args args{{\"expectile_alpha\", \"[0.2, 0.8]\"}};\n  std::unique_ptr<ObjFunction> obj{ObjFunction::Create(\"reg:expectileerror\", ctx)};\n  obj->Configure(args);\n\n  MetaInfo info;\n  info.num_row_ = 10;\n  info.labels.ModifyInplace([&](HostDeviceVector<float>* data, common::Span<std::size_t> shape) {\n    data->SetDevice(ctx->Device());\n    data->Resize(info.num_row_);\n    shape[0] = info.num_row_;\n    shape[1] = 1;\n\n    auto& h_labels = data->HostVector();\n    for (std::size_t i = 0; i < info.num_row_; ++i) {\n      h_labels[i] = static_cast<float>(i);\n    }\n  });\n\n  linalg::Vector<float> base_scores;\n  obj->InitEstimation(info, &base_scores);\n  ASSERT_EQ(base_scores.Size(), 2);\n  auto one_step = [&](float alpha) {\n    double sum_w = 0.0;\n    double sum_wy = 0.0;\n    double mean = 4.5;\n    for (std::size_t i = 0; i < info.num_row_; ++i) {\n      double label = static_cast<double>(i);\n      double diff = mean - label;\n      double w = diff >= 0.0 ? (1.0 - alpha) : alpha;\n      sum_w += w;\n      sum_wy += w * label;\n    }\n    return static_cast<float>(sum_wy / sum_w);\n  };\n  ASSERT_NEAR(base_scores(0), one_step(0.2f), kRtEps);\n  ASSERT_NEAR(base_scores(1), one_step(0.8f), kRtEps);\n}\n\nvoid TestPseudoHuber(const Context* ctx) {\n  Args args;\n\n  std::unique_ptr<ObjFunction> obj{ObjFunction::Create(\"reg:pseudohubererror\", ctx)};\n  obj->Configure(args);\n  CheckConfigReload(obj, \"reg:pseudohubererror\");\n\n  CheckObjFunction(obj, {0.1f, 0.2f, 0.4f, 0.8f, 1.6f},                          // pred\n                   {1.0f, 1.0f, 1.0f, 1.0f, 1.0f},                               // labels\n                   {1.0f, 1.0f, 1.0f, 1.0f, 1.0f},                               // weights\n                   {-0.668965f, -0.624695f, -0.514496f, -0.196116f, 0.514496f},  // out_grad\n                   {0.410660f, 0.476140f, 0.630510f, 0.9428660f, 0.630510f});    // out_hess\n  CheckObjFunction(obj, {0.1f, 0.2f, 0.4f, 0.8f, 1.6f},                          // pred\n                   {1.0f, 1.0f, 1.0f, 1.0f, 1.0f},                               // labels\n                   {},                                                           // empty weights\n                   {-0.668965f, -0.624695f, -0.514496f, -0.196116f, 0.514496f},  // out_grad\n                   {0.410660f, 0.476140f, 0.630510f, 0.9428660f, 0.630510f});    // out_hess\n  ASSERT_EQ(obj->DefaultEvalMetric(), std::string{\"mphe\"});\n\n  obj->Configure({{\"huber_slope\", \"0.1\"}});\n  CheckConfigReload(obj, \"reg:pseudohubererror\");\n  CheckObjFunction(obj, {0.1f, 0.2f, 0.4f, 0.8f, 1.6f},                          // pred\n                   {1.0f, 1.0f, 1.0f, 1.0f, 1.0f},                               // labels\n                   {1.0f, 1.0f, 1.0f, 1.0f, 1.0f},                               // weights\n                   {-0.099388f, -0.099228f, -0.098639f, -0.089443f, 0.098639f},  // out_grad\n                   {0.0013467f, 0.001908f, 0.004443f, 0.089443f, 0.004443f});    // out_hess\n}\n\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/objective/test_regression_obj.h",
    "content": "/**\n * Copyright 2020-2026, XGBoost Contributors\n */\n#pragma once\n\n#include <xgboost/context.h>  // for Context\n\n#include <string>  // for string\n#include <vector>  // for vector\n\nnamespace xgboost {\n\nvoid TestLinearRegressionGPair(const Context* ctx);\n\nvoid TestSquaredLog(const Context* ctx);\n\nvoid TestLogisticRegressionGPair(const Context* ctx);\n\nvoid TestLogisticRegressionBasic(const Context* ctx);\n\nvoid TestsLogisticRawGPair(const Context* ctx);\n\nvoid TestPoissonRegressionGPair(const Context* ctx);\n\nvoid TestPoissonRegressionBasic(const Context* ctx);\n\nvoid TestGammaRegressionGPair(const Context* ctx);\n\nvoid TestGammaRegressionBasic(const Context* ctx);\n\nvoid TestTweedieRegressionGPair(const Context* ctx);\n\nvoid TestTweedieRegressionBasic(const Context* ctx);\n\nvoid TestCoxRegressionGPair(const Context* ctx);\n\nvoid TestAbsoluteError(const Context* ctx);\n\nvoid TestAbsoluteErrorLeaf(const Context* ctx);\n\nvoid TestVectorLeafObj(Context const* ctx, std::string name, Args const& args, bst_idx_t n_samples,\n                       bst_idx_t n_target_labels, std::vector<float> const& sol_left,\n                       std::vector<float> const& sol_right);\n\nvoid TestPseudoHuber(const Context* ctx);\n\nvoid TestExpectileRegressionGPair(const Context* ctx);\n\nvoid TestExpectileRegressionMultiAlpha(const Context* ctx);\n\nvoid TestExpectileRegressionInitEstimation(const Context* ctx);\n\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/objective/test_regression_obj_cpu.cc",
    "content": "/**\n * Copyright 2018-2026, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/context.h>\n#include <xgboost/objective.h>\n\n#include \"../../../src/objective/adaptive.h\"\n#include \"../helpers.h\"\n#include \"test_regression_obj.h\"\n\nnamespace xgboost {\nTEST(Objective, DeclareUnifiedTest(LinearRegressionGPair)) {\n  Context ctx = MakeCUDACtx(GPUIDX);\n  TestLinearRegressionGPair(&ctx);\n}\n\nTEST(Objective, DeclareUnifiedTest(SquaredLog)) {\n  Context ctx = MakeCUDACtx(GPUIDX);\n  TestSquaredLog(&ctx);\n}\n\nTEST(Objective, DeclareUnifiedTest(PseudoHuber)) {\n  Context ctx = MakeCUDACtx(GPUIDX);\n  TestPseudoHuber(&ctx);\n}\n\nTEST(Objective, DeclareUnifiedTest(ExpectileRegressionGPair)) {\n  Context ctx = MakeCUDACtx(GPUIDX);\n  TestExpectileRegressionGPair(&ctx);\n}\n\nTEST(Objective, DeclareUnifiedTest(ExpectileRegressionMultiAlpha)) {\n  Context ctx = MakeCUDACtx(GPUIDX);\n  TestExpectileRegressionMultiAlpha(&ctx);\n}\n\nTEST(Objective, DeclareUnifiedTest(ExpectileRegressionInitEstimation)) {\n  Context ctx = MakeCUDACtx(GPUIDX);\n  TestExpectileRegressionInitEstimation(&ctx);\n}\n\nTEST(Objective, DeclareUnifiedTest(LogisticRegressionGPair)) {\n  Context ctx = MakeCUDACtx(GPUIDX);\n  TestLogisticRegressionGPair(&ctx);\n}\n\nTEST(Objective, DeclareUnifiedTest(LogisticRegressionBasic)) {\n  Context ctx = MakeCUDACtx(GPUIDX);\n  TestLogisticRegressionBasic(&ctx);\n}\n\nTEST(Objective, DeclareUnifiedTest(LogisticRawGPair)) {\n  Context ctx = MakeCUDACtx(GPUIDX);\n  TestsLogisticRawGPair(&ctx);\n}\n\nTEST(Objective, DeclareUnifiedTest(PoissonRegressionGPair)) {\n  Context ctx = MakeCUDACtx(GPUIDX);\n  TestPoissonRegressionGPair(&ctx);\n}\n\nTEST(Objective, DeclareUnifiedTest(PoissonRegressionBasic)) {\n  Context ctx = MakeCUDACtx(GPUIDX);\n  TestPoissonRegressionBasic(&ctx);\n}\n\nTEST(Objective, DeclareUnifiedTest(GammaRegressionGPair)) {\n  Context ctx = MakeCUDACtx(GPUIDX);\n  TestGammaRegressionGPair(&ctx);\n}\n\nTEST(Objective, DeclareUnifiedTest(GammaRegressionBasic)) {\n  Context ctx = MakeCUDACtx(GPUIDX);\n  TestGammaRegressionBasic(&ctx);\n}\n\nTEST(Objective, DeclareUnifiedTest(TweedieRegressionGPair)) {\n  Context ctx = MakeCUDACtx(GPUIDX);\n  TestTweedieRegressionGPair(&ctx);\n}\n\n#if defined(__CUDACC__)\nTEST(Objective, CPU_vs_CUDA) {\n  Context ctx = MakeCUDACtx(GPUIDX);\n\n  std::unique_ptr<ObjFunction> obj{ObjFunction::Create(\"reg:squarederror\", &ctx)};\n  linalg::Matrix<GradientPair> cpu_out_preds;\n  linalg::Matrix<GradientPair> cuda_out_preds;\n\n  constexpr size_t kRows = 400;\n  constexpr size_t kCols = 100;\n  auto pdmat = RandomDataGenerator(kRows, kCols, 0).Seed(0).GenerateDMatrix();\n  HostDeviceVector<float> preds;\n  preds.Resize(kRows);\n  auto& h_preds = preds.HostVector();\n  for (size_t i = 0; i < h_preds.size(); ++i) {\n    h_preds[i] = static_cast<float>(i);\n  }\n  auto& info = pdmat->Info();\n\n  info.labels.Reshape(kRows);\n  auto& h_labels = info.labels.Data()->HostVector();\n  for (size_t i = 0; i < h_labels.size(); ++i) {\n    h_labels[i] = 1 / static_cast<float>(i + 1);\n  }\n\n  {\n    // CPU\n    ctx = ctx.MakeCPU();\n    obj->GetGradient(preds, info, 0, &cpu_out_preds);\n  }\n  {\n    // CUDA\n    ctx = ctx.MakeCUDA(0);\n    obj->GetGradient(preds, info, 0, &cuda_out_preds);\n  }\n\n  auto h_cpu_out = cpu_out_preds.HostView();\n  auto h_cuda_out = cuda_out_preds.HostView();\n\n  float sgrad = 0;\n  float shess = 0;\n  for (size_t i = 0; i < kRows; ++i) {\n    sgrad += std::pow(h_cpu_out(i).GetGrad() - h_cuda_out(i).GetGrad(), 2);\n    shess += std::pow(h_cpu_out(i).GetHess() - h_cuda_out(i).GetHess(), 2);\n  }\n  ASSERT_NEAR(sgrad, 0.0f, kRtEps);\n  ASSERT_NEAR(shess, 0.0f, kRtEps);\n}\n#endif\n\nTEST(Objective, DeclareUnifiedTest(TweedieRegressionBasic)) {\n  Context ctx = MakeCUDACtx(GPUIDX);\n  TestTweedieRegressionBasic(&ctx);\n}\n\n// CoxRegression not implemented in GPU code, no need for testing.\n#if !defined(__CUDACC__)\nTEST(Objective, CoxRegressionGPair) {\n  Context ctx = MakeCUDACtx(GPUIDX);\n  TestCoxRegressionGPair(&ctx);\n}\n#endif\n\nTEST(Objective, DeclareUnifiedTest(AbsoluteError)) {\n  Context ctx = MakeCUDACtx(GPUIDX);\n  TestAbsoluteError(&ctx);\n}\n\nTEST(Objective, DeclareUnifiedTest(AbsoluteErrorLeaf)) {\n  Context ctx = MakeCUDACtx(GPUIDX);\n  TestAbsoluteErrorLeaf(&ctx);\n}\n\nTEST(Objective, DeclareUnifiedTest(AbsoluteErrorVectorLeaf)) {\n  Context ctx = MakeCUDACtx(GPUIDX);\n  bst_idx_t n_samples = 16;\n  std::vector<float> sol_left{21.0f, 23.0f, 25.0f};\n  std::vector<float> sol_right{69.0f, 71.0f, 73.0f};\n  TestVectorLeafObj(&ctx, \"reg:absoluteerror\", Args{}, n_samples, 3u, sol_left, sol_right);\n}\n\nTEST(Adaptive, DeclareUnifiedTest(MissingLeaf)) {\n  std::vector<bst_node_t> missing{1, 3};\n\n  std::vector<bst_node_t> h_nidx = {2, 4, 5};\n  std::vector<size_t> h_nptr = {0, 4, 8, 16};\n\n  obj::detail::FillMissingLeaf(missing, &h_nidx, &h_nptr);\n\n  ASSERT_EQ(h_nidx[0], missing[0]);\n  ASSERT_EQ(h_nidx[2], missing[1]);\n  ASSERT_EQ(h_nidx[1], 2);\n  ASSERT_EQ(h_nidx[3], 4);\n  ASSERT_EQ(h_nidx[4], 5);\n\n  ASSERT_EQ(h_nptr[0], 0);\n  ASSERT_EQ(h_nptr[1], 0);  // empty\n  ASSERT_EQ(h_nptr[2], 4);\n  ASSERT_EQ(h_nptr[3], 4);  // empty\n  ASSERT_EQ(h_nptr[4], 8);\n  ASSERT_EQ(h_nptr[5], 16);\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/objective/test_regression_obj_gpu.cu",
    "content": "/*!\n * Copyright 2018 XGBoost contributors\n */\n// Dummy file to keep the CUDA tests.\n\n#include \"test_regression_obj_cpu.cc\"\n"
  },
  {
    "path": "tests/cpp/objective_helpers.cc",
    "content": "/**\n * Copyright 2023-2025, XGBoost contributors\n */\n#include \"objective_helpers.h\"\n\n#include \"../../src/common/linalg_op.h\"  // for begin, end\n#include \"helpers.h\"                     // for RandomDataGenerator\n\nnamespace xgboost {\n\nvoid MakeLabelForObjTest(std::shared_ptr<DMatrix> p_fmat, std::string const& obj) {\n  auto& h_upper = p_fmat->Info().labels_upper_bound_.HostVector();\n  auto& h_lower = p_fmat->Info().labels_lower_bound_.HostVector();\n  h_lower.resize(p_fmat->Info().num_row_);\n  h_upper.resize(p_fmat->Info().num_row_);\n  for (size_t i = 0; i < p_fmat->Info().num_row_; ++i) {\n    h_lower[i] = 1;\n    h_upper[i] = 10;\n  }\n\n  if (obj.find(\"rank:\") != std::string::npos) {\n    auto h_label = p_fmat->Info().labels.HostView();\n    std::size_t k = 0;\n    for (auto& v : h_label) {\n      v = k % 2 == 0;\n      ++k;\n    }\n  }\n}\n\n[[nodiscard]] std::shared_ptr<DMatrix> MakeFmatForObjTest(std::string const& obj,\n                                                          bst_idx_t n_samples,\n                                                          bst_feature_t n_features,\n                                                          bst_target_t n_classes, bool make_label) {\n  std::shared_ptr<DMatrix> p_fmat;\n  if (obj.find(\"multi:\") != std::string::npos) {\n    CHECK_GE(n_classes, 3);\n    p_fmat = RandomDataGenerator{n_samples, n_features, 0}.Classes(n_classes).GenerateDMatrix(\n        make_label);\n  } else {\n    p_fmat = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(make_label);\n  }\n  if (make_label) {\n    MakeLabelForObjTest(p_fmat, obj);\n  }\n  return p_fmat;\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/objective_helpers.h",
    "content": "/**\n * Copyright (c) 2023, XGBoost contributors\n */\n#pragma once\n\n#include <dmlc/registry.h>  // for Registry\n#include <gtest/gtest.h>\n#include <xgboost/objective.h>  // for ObjFunctionReg\n\n#include <algorithm>  // for transform\n#include <iterator>   // for back_insert_iterator, back_inserter\n#include <string>     // for string\n#include <vector>     // for vector\n\nnamespace xgboost {\ninline auto MakeObjNamesForTest() {\n  auto list = ::dmlc::Registry<::xgboost::ObjFunctionReg>::List();\n  std::vector<std::string> names;\n  std::transform(list.cbegin(), list.cend(), std::back_inserter(names),\n                 [](auto const* entry) { return entry->name; });\n  return names;\n}\n\ntemplate <typename ParamType>\ninline std::string ObjTestNameGenerator(const ::testing::TestParamInfo<ParamType>& info) {\n  auto name = std::string{info.param};\n  // Name must be a valid c++ symbol\n  auto it = std::find(name.cbegin(), name.cend(), ':');\n  if (it != name.cend()) {\n    name[std::distance(name.cbegin(), it)] = '_';\n  }\n  return name;\n};\n\n/**\n * @brief Construct random label for testing.\n */\nvoid MakeLabelForObjTest(std::shared_ptr<DMatrix> p_fmat, std::string const& obj);\n\n[[nodiscard]] std::shared_ptr<DMatrix> MakeFmatForObjTest(std::string const& obj,\n                                                          bst_idx_t n_samples,\n                                                          bst_feature_t n_features,\n                                                          bst_target_t n_classes,\n                                                          bool make_label = true);\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/plugin/federated/CMakeLists.txt",
    "content": "target_sources(\n  testxgboost PRIVATE\n  ${xgboost_SOURCE_DIR}/tests/cpp/plugin/federated/test_federated_coll.cc\n  ${xgboost_SOURCE_DIR}/tests/cpp/plugin/federated/test_federated_comm.cc\n  ${xgboost_SOURCE_DIR}/tests/cpp/plugin/federated/test_federated_comm_group.cc\n  ${xgboost_SOURCE_DIR}/tests/cpp/plugin/federated/test_federated_tracker.cc\n  ${xgboost_SOURCE_DIR}/tests/cpp/plugin/federated/test_federated_learner.cc\n  ${xgboost_SOURCE_DIR}/tests/cpp/plugin/federated/test_federated_data.cc\n)\n\nif(USE_CUDA)\n  target_sources(\n    testxgboost PRIVATE\n    ${xgboost_SOURCE_DIR}/tests/cpp/plugin/federated/test_federated_coll.cu\n    ${xgboost_SOURCE_DIR}/tests/cpp/plugin/federated/test_federated_comm_group.cu\n  )\nendif()\n\ntarget_include_directories(testxgboost PRIVATE ${xgboost_SOURCE_DIR}/plugin/federated)\ntarget_link_libraries(testxgboost PRIVATE federated_client)\n"
  },
  {
    "path": "tests/cpp/plugin/federated/test_federated_coll.cc",
    "content": "/**\n * Copyright 2022-2023, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/span.h>  // for Span\n\n#include <array>  // for array\n\n#include \"../../../../src/common/type.h\"   // for EraseType\n#include \"../../collective/test_worker.h\"  // for SocketTest\n#include \"federated_coll.h\"                // for FederatedColl\n#include \"federated_comm.h\"                // for FederatedComm\n#include \"test_worker.h\"                   // for TestFederated\n\nnamespace xgboost::collective {\nnamespace {\nclass FederatedCollTest : public SocketTest {};\n}  // namespace\n\nTEST_F(FederatedCollTest, Allreduce) {\n  std::int32_t n_workers = std::min(std::thread::hardware_concurrency(), 3u);\n  TestFederated(n_workers, [=](std::shared_ptr<FederatedComm> comm, std::int32_t) {\n    std::array<std::int32_t, 5> buffer = {1, 2, 3, 4, 5};\n    std::array<std::int32_t, 5> expected;\n    std::transform(buffer.cbegin(), buffer.cend(), expected.begin(),\n                   [=](auto i) { return i * n_workers; });\n\n    auto coll = std::make_shared<FederatedColl>();\n    auto rc = coll->Allreduce(*comm, common::EraseType(common::Span{buffer.data(), buffer.size()}),\n                              ArrayInterfaceHandler::kI4, Op::kSum);\n    SafeColl(rc);\n    for (auto i = 0; i < 5; i++) {\n      ASSERT_EQ(buffer[i], expected[i]);\n    }\n  });\n}\n\nTEST_F(FederatedCollTest, Broadcast) {\n  std::int32_t n_workers = std::min(std::thread::hardware_concurrency(), 3u);\n  TestFederated(n_workers, [=](std::shared_ptr<FederatedComm> comm, std::int32_t) {\n    FederatedColl coll{};\n    auto rc = Success();\n    if (comm->Rank() == 0) {\n      std::string buffer{\"hello\"};\n      rc = coll.Broadcast(*comm, common::EraseType(common::Span{buffer.data(), buffer.size()}), 0);\n      ASSERT_EQ(buffer, \"hello\");\n    } else {\n      std::string buffer{\"     \"};\n      rc = coll.Broadcast(*comm, common::EraseType(common::Span{buffer.data(), buffer.size()}), 0);\n      ASSERT_EQ(buffer, \"hello\");\n    }\n    SafeColl(rc);\n  });\n}\n\nTEST_F(FederatedCollTest, Allgather) {\n  std::int32_t n_workers = std::min(std::thread::hardware_concurrency(), 3u);\n  TestFederated(n_workers, [=](std::shared_ptr<FederatedComm> comm, std::int32_t) {\n    FederatedColl coll{};\n\n    std::vector<std::int32_t> buffer(n_workers, 0);\n    buffer[comm->Rank()] = comm->Rank();\n    auto rc = coll.Allgather(*comm, common::EraseType(common::Span{buffer.data(), buffer.size()}));\n    SafeColl(rc);\n    for (auto i = 0; i < n_workers; i++) {\n      ASSERT_EQ(buffer[i], i);\n    }\n  });\n}\n\nTEST_F(FederatedCollTest, AllgatherV) {\n  std::int32_t n_workers = 2;\n  TestFederated(n_workers, [=](std::shared_ptr<FederatedComm> comm, std::int32_t) {\n    FederatedColl coll{};\n\n    std::vector<std::string_view> inputs{\"Federated\", \" Learning!!!\"};\n    std::vector<std::int64_t> recv_segments(inputs.size() + 1, 0);\n    std::string r;\n    std::vector<std::int64_t> sizes{static_cast<std::int64_t>(inputs[0].size()),\n                                    static_cast<std::int64_t>(inputs[1].size())};\n    r.resize(sizes[0] + sizes[1]);\n\n    auto rc = coll.AllgatherV(\n        *comm,\n        common::EraseType(common::Span{inputs[comm->Rank()].data(), inputs[comm->Rank()].size()}),\n        common::Span{sizes.data(), sizes.size()}, recv_segments,\n        common::EraseType(common::Span{r.data(), r.size()}), AllgatherVAlgo::kRing);\n\n    EXPECT_EQ(r, \"Federated Learning!!!\");\n    SafeColl(rc);\n  });\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "tests/cpp/plugin/federated/test_federated_coll.cu",
    "content": "/**\n * Copyright 2022-2023, XGBoost contributors\n */\n\n#include <gtest/gtest.h>\n#include <xgboost/collective/result.h>  // for Result\n\n#include \"../../../../src/collective/allreduce.h\"\n#include \"../../../../src/common/cuda_rt_utils.h\"     // for AllVisibleGPUs\n#include \"../../../../src/common/device_helpers.cuh\"  // for device_vector\n#include \"../../../../src/common/type.h\"              // for EraseType\n#include \"../../collective/test_worker.h\"             // for SocketTest\n#include \"../../helpers.h\"                            // for MakeCUDACtx\n#include \"federated_coll.cuh\"\n#include \"test_worker.h\"  // for TestFederated\n\nnamespace xgboost::collective {\nnamespace {\nclass FederatedCollTestGPU : public SocketTest {};\n\nstruct Worker {\n  std::shared_ptr<FederatedColl> impl;\n  std::shared_ptr<Comm> nccl_comm;\n  std::shared_ptr<CUDAFederatedColl> coll;\n\n  Worker(std::shared_ptr<FederatedComm> comm, std::int32_t rank) {\n    auto ctx = MakeCUDACtx(rank);\n    impl = std::make_shared<FederatedColl>();\n    nccl_comm.reset(comm->MakeCUDAVar(&ctx, impl));\n    coll = std::make_shared<CUDAFederatedColl>(impl);\n  }\n};\n\nvoid TestAllreduce(std::shared_ptr<FederatedComm> comm, std::int32_t rank, std::int32_t n_workers) {\n  Worker w{comm, rank};\n\n  dh::device_vector<std::int32_t> buffer{std::vector<std::int32_t>{1, 2, 3, 4, 5}};\n  dh::device_vector<std::int32_t> expected(buffer.size());\n  thrust::transform(buffer.cbegin(), buffer.cend(), expected.begin(),\n                    [=] XGBOOST_DEVICE(std::int32_t i) { return i * n_workers; });\n\n  auto rc = w.coll->Allreduce(*w.nccl_comm, common::EraseType(dh::ToSpan(buffer)),\n                              ArrayInterfaceHandler::kI4, Op::kSum);\n  SafeColl(rc);\n  for (auto i = 0; i < 5; i++) {\n    ASSERT_EQ(buffer[i], expected[i]);\n  }\n}\n\nvoid TestBroadcast(std::shared_ptr<FederatedComm> comm, std::int32_t rank) {\n  Worker w{comm, rank};\n\n  auto rc = Success();\n  std::vector<std::int32_t> expect{0, 1, 2, 3};\n\n  if (comm->Rank() == 0) {\n    dh::device_vector<std::int32_t> buffer{expect};\n    rc = w.coll->Broadcast(*w.nccl_comm, common::EraseType(dh::ToSpan(buffer)), 0);\n    std::vector<std::int32_t> expect{0, 1, 2, 3};\n    ASSERT_EQ(buffer, expect);\n  } else {\n    dh::device_vector<std::int32_t> buffer(std::vector<std::int32_t>{4, 5, 6, 7});\n    rc = w.coll->Broadcast(*w.nccl_comm, common::EraseType(dh::ToSpan(buffer)), 0);\n    ASSERT_EQ(buffer, expect);\n  }\n  SafeColl(rc);\n}\n\nvoid TestAllgather(std::shared_ptr<FederatedComm> comm, std::int32_t rank, std::int32_t n_workers) {\n  Worker w{comm, rank};\n\n  dh::device_vector<std::int32_t> buffer(n_workers, 0);\n  buffer[comm->Rank()] = comm->Rank();\n  auto rc = w.coll->Allgather(*w.nccl_comm, common::EraseType(dh::ToSpan(buffer)));\n  SafeColl(rc);\n  for (auto i = 0; i < n_workers; i++) {\n    ASSERT_EQ(buffer[i], i);\n  }\n}\n\nvoid TestAllgatherV(std::shared_ptr<FederatedComm> comm, std::int32_t rank) {\n  Worker w{comm, rank};\n\n  std::vector<dh::device_vector<std::int32_t>> inputs{std::vector<std::int32_t>{1, 2, 3},\n                                                      std::vector<std::int32_t>{4, 5}};\n  std::vector<std::int64_t> recv_segments(inputs.size() + 1, 0);\n  dh::device_vector<std::int32_t> r;\n  std::vector<std::int64_t> sizes{static_cast<std::int64_t>(inputs[0].size()),\n                                  static_cast<std::int64_t>(inputs[1].size())};\n  r.resize(sizes[0] + sizes[1]);\n\n  auto rc = w.coll->AllgatherV(*w.nccl_comm, common::EraseType(dh::ToSpan(inputs[comm->Rank()])),\n                               common::Span{sizes.data(), sizes.size()}, recv_segments,\n                               common::EraseType(dh::ToSpan(r)), AllgatherVAlgo::kRing);\n  SafeColl(rc);\n\n  ASSERT_EQ(r[0], 1);\n  for (std::size_t i = 1; i < r.size(); ++i) {\n    ASSERT_EQ(r[i], r[i - 1] + 1);\n  }\n}\n}  // namespace\n\nTEST_F(FederatedCollTestGPU, Allreduce) {\n  std::int32_t n_workers = curt::AllVisibleGPUs();\n  TestFederated(n_workers, [=](std::shared_ptr<FederatedComm> comm, std::int32_t rank) {\n    TestAllreduce(comm, rank, n_workers);\n  });\n}\n\nTEST(FederatedCollGPUGlobal, Allreduce) {\n  std::int32_t n_workers = curt::AllVisibleGPUs();\n  TestFederatedGlobal(n_workers, [&] {\n    auto r = collective::GetRank();\n    auto world = collective::GetWorldSize();\n    CHECK_EQ(n_workers, world);\n\n    dh::device_vector<std::uint32_t> values(3, r);\n    auto ctx = MakeCUDACtx(r);\n    auto rc = collective::Allreduce(\n        &ctx, linalg::MakeVec(values.data().get(), values.size(), DeviceOrd::CUDA(r)),\n        Op::kBitwiseOR);\n    SafeColl(rc);\n\n    std::vector<std::uint32_t> expected(values.size(), 0);\n    for (std::int32_t rank = 0; rank < world; ++rank) {\n      for (std::size_t i = 0; i < expected.size(); ++i) {\n        expected[i] |= rank;\n      }\n    }\n    for (std::size_t i = 0; i < expected.size(); ++i) {\n      CHECK_EQ(expected[i], values[i]);\n    }\n  });\n}\n\nTEST_F(FederatedCollTestGPU, Broadcast) {\n  std::int32_t n_workers = curt::AllVisibleGPUs();\n  TestFederated(n_workers, [=](std::shared_ptr<FederatedComm> comm, std::int32_t rank) {\n    TestBroadcast(comm, rank);\n  });\n}\n\nTEST_F(FederatedCollTestGPU, Allgather) {\n  std::int32_t n_workers = curt::AllVisibleGPUs();\n  TestFederated(n_workers, [=](std::shared_ptr<FederatedComm> comm, std::int32_t rank) {\n    TestAllgather(comm, rank, n_workers);\n  });\n}\n\nTEST_F(FederatedCollTestGPU, AllgatherV) {\n  std::int32_t n_workers = 2;\n  if (curt::AllVisibleGPUs() < n_workers) {\n    GTEST_SKIP_(\"At least 2 GPUs are required for the test.\");\n  }\n  TestFederated(n_workers, [=](std::shared_ptr<FederatedComm> comm, std::int32_t rank) {\n    TestAllgatherV(comm, rank);\n  });\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "tests/cpp/plugin/federated/test_federated_comm.cc",
    "content": "/**\n * Copyright 2022-2024, XGBoost contributors\n */\n#include <gmock/gmock.h>\n#include <gtest/gtest.h>\n\n#include <string>  // for string\n#include <thread>  // for thread\n\n#include \"../../../../plugin/federated/federated_comm.h\"\n#include \"../../collective/test_worker.h\"  // for SocketTest\n#include \"../../helpers.h\"                 // for GMockThrow\n#include \"test_worker.h\"                   // for TestFederated\n#include \"xgboost/json.h\"                  // for Json\n\nnamespace xgboost::collective {\nnamespace {\nclass FederatedCommTest : public SocketTest {};\nauto MakeConfig(std::string host, std::int32_t port, std::int32_t world, std::int32_t rank) {\n  Json config{Object{}};\n  config[\"federated_server_address\"] = host + \":\" + std::to_string(port);\n  config[\"federated_world_size\"] = Integer{world};\n  config[\"federated_rank\"] = Integer{rank};\n  return config;\n}\n}  // namespace\n\nTEST_F(FederatedCommTest, ThrowOnWorldSizeTooSmall) {\n  auto config = MakeConfig(\"localhost\", 0, 0, 0);\n  auto construct = [config] {\n    FederatedComm comm{DefaultRetry(), std::chrono::seconds{DefaultTimeoutSec()}, \"\", config};\n  };\n  ASSERT_THAT(construct, GMockThrow(\"Invalid world size\"));\n}\n\nTEST_F(FederatedCommTest, ThrowOnRankTooSmall) {\n  auto config = MakeConfig(\"localhost\", 0, 1, -1);\n  auto construct = [config] {\n    FederatedComm comm{DefaultRetry(), std::chrono::seconds{DefaultTimeoutSec()}, \"\", config};\n  };\n  ASSERT_THAT(construct, GMockThrow(\"Invalid worker rank.\"));\n}\n\nTEST_F(FederatedCommTest, ThrowOnRankTooBig) {\n  auto config = MakeConfig(\"localhost\", 0, 1, 1);\n  auto construct = [config] {\n    FederatedComm comm{DefaultRetry(), std::chrono::seconds{DefaultTimeoutSec()}, \"\", config};\n  };\n  ASSERT_THAT(construct, GMockThrow(\"Invalid worker rank.\"));\n}\n\nTEST_F(FederatedCommTest, ThrowOnWorldSizeNotInteger) {\n  auto construct = [] {\n    Json config{Object{}};\n    config[\"federated_server_address\"] = std::string(\"localhost:0\");\n    config[\"federated_world_size\"] = std::string(\"1\");\n    config[\"federated_rank\"] = Integer(0);\n    FederatedComm comm{DefaultRetry(), std::chrono::seconds{DefaultTimeoutSec()}, \"\", config};\n  };\n  ASSERT_THAT(construct, GMockThrow(\"got: `String`\"));\n}\n\nTEST_F(FederatedCommTest, ThrowOnRankNotInteger) {\n  auto construct = [] {\n    Json config{Object{}};\n    config[\"federated_server_address\"] = std::string(\"localhost:0\");\n    config[\"federated_world_size\"] = 1;\n    config[\"federated_rank\"] = std::string(\"0\");\n    FederatedComm comm(DefaultRetry(), std::chrono::seconds{DefaultTimeoutSec()}, \"\", config);\n  };\n  ASSERT_THAT(construct, GMockThrow(\"got: `String`\"));\n}\n\nTEST_F(FederatedCommTest, GetWorldSizeAndRank) {\n  Json config{Object{}};\n  config[\"federated_world_size\"] = 6;\n  config[\"federated_rank\"] = 3;\n  config[\"federated_server_address\"] = String{\"localhost:0\"};\n  FederatedComm comm{DefaultRetry(), std::chrono::seconds{DefaultTimeoutSec()}, \"\", config};\n  EXPECT_EQ(comm.World(), 6);\n  EXPECT_EQ(comm.Rank(), 3);\n}\n\nTEST_F(FederatedCommTest, IsDistributed) {\n  FederatedComm comm{DefaultRetry(), std::chrono::seconds{DefaultTimeoutSec()}, \"\",\n                     MakeConfig(\"localhost\", 0, 2, 1)};\n  EXPECT_TRUE(comm.IsDistributed());\n}\n\nTEST_F(FederatedCommTest, InsecureTracker) {\n  std::int32_t n_workers = std::min(std::thread::hardware_concurrency(), 3u);\n  TestFederated(n_workers, [=](std::shared_ptr<FederatedComm> comm, std::int32_t rank) {\n    ASSERT_EQ(comm->Rank(), rank);\n    ASSERT_EQ(comm->World(), n_workers);\n  });\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "tests/cpp/plugin/federated/test_federated_comm_group.cc",
    "content": "/**\n * Copyright 2023-2024, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/json.h>  // for Json\n\n#include \"../../../../src/collective/comm_group.h\"\n#include \"../../../../src/common/cuda_rt_utils.h\"  // for AllVisibleGPUs\n#include \"test_worker.h\"\n\nnamespace xgboost::collective {\nTEST(CommGroup, Federated) {\n  std::int32_t n_workers = curt::AllVisibleGPUs();\n  TestFederatedGroup(n_workers, [&](std::shared_ptr<CommGroup> comm_group, std::int32_t r) {\n    Context ctx;\n    ASSERT_EQ(comm_group->Rank(), r);\n    auto const& comm = comm_group->Ctx(&ctx, DeviceOrd::CPU());\n    ASSERT_EQ(comm.TaskID(), std::to_string(r));\n    ASSERT_EQ(comm.Retry(), 2);\n  });\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "tests/cpp/plugin/federated/test_federated_comm_group.cu",
    "content": "/**\n * Copyright 2023-2024, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/json.h>  // for Json\n\n#include \"../../../../src/collective/comm_group.h\"\n#include \"../../../../src/common/cuda_rt_utils.h\"  // for AllVisibleGPUs\n#include \"../../helpers.h\"\n#include \"test_worker.h\"\n\nnamespace xgboost::collective {\nTEST(CommGroup, FederatedGPU) {\n  std::int32_t n_workers = curt::AllVisibleGPUs();\n  TestFederatedGroup(n_workers, [&](std::shared_ptr<CommGroup> comm_group, std::int32_t r) {\n    Context ctx = MakeCUDACtx(0);\n    auto const& comm = comm_group->Ctx(&ctx, DeviceOrd::CUDA(0));\n    ASSERT_EQ(comm_group->Rank(), r);\n    ASSERT_EQ(comm.TaskID(), std::to_string(r));\n    ASSERT_EQ(comm.Retry(), 2);\n  });\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "tests/cpp/plugin/federated/test_federated_data.cc",
    "content": "/**\n * Copyright 2023-2025, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/data.h>\n\n#include \"../../../../src/collective/communicator-inl.h\"\n#include \"../../filesystem.h\"  // for test_federated_data\n#include \"../../helpers.h\"\n#include \"test_worker.h\"\n\nnamespace xgboost {\n\nvoid VerifyLoadUri() {\n  auto const rank = collective::GetRank();\n\n  size_t constexpr kRows{16};\n  size_t const kCols = 8 + rank;\n\n  common::TemporaryDirectory tmpdir;\n  auto path = tmpdir.Path() / (\"small\" + std::to_string(rank) + \".csv\");\n  CreateTestCSV(path.string(), kRows, kCols);\n\n  std::unique_ptr<DMatrix> dmat;\n  std::string uri = path.string() + \"?format=csv\";\n  dmat.reset(DMatrix::Load(uri, false, DataSplitMode::kCol));\n\n  ASSERT_EQ(dmat->Info().num_col_, 8 * collective::GetWorldSize() + 1);\n  ASSERT_EQ(dmat->Info().num_row_, kRows);\n\n  for (auto const& page : dmat->GetBatches<SparsePage>()) {\n    auto entries = page.GetView().data;\n    auto index = 0;\n    int offsets[] = {0, 8, 17};\n    int offset = offsets[rank];\n    for (std::size_t row = 0; row < kRows; row++) {\n      for (std::size_t col = 0; col < kCols; col++) {\n        EXPECT_EQ(entries[index].index, col + offset);\n        index++;\n      }\n    }\n  }\n}\n\nTEST(FederatedDataTest, LoadUri) {\n  static int constexpr kWorldSize{2};\n  collective::TestFederatedGlobal(kWorldSize, [] { VerifyLoadUri(); });\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/plugin/federated/test_federated_learner.cc",
    "content": "/**\n * Copyright 2023-2024, XGBoost contributors\n *\n * Some other tests for federated learning are in the main test suite (test_learner.cc).\n */\n#include <dmlc/parameter.h>\n#include <gtest/gtest.h>\n#include <xgboost/data.h>\n#include <xgboost/objective.h>\n\n#include \"../../../../src/collective/communicator-inl.h\"\n#include \"../../../../src/common/linalg_op.h\"  // for begin, end\n#include \"../../helpers.h\"\n#include \"../../objective_helpers.h\"  // for MakeObjNamesForTest, ObjTestNameGenerator\n#include \"test_worker.h\"\n\nnamespace xgboost {\nnamespace {\ninline constexpr bst_target_t kClassesForTest = 3;\nauto MakeModel(std::string tree_method, std::string device, std::string objective,\n               std::shared_ptr<DMatrix> dmat) {\n  std::unique_ptr<Learner> learner{Learner::Create({dmat})};\n  learner->SetParam(\"tree_method\", tree_method);\n  learner->SetParam(\"device\", device);\n  learner->SetParam(\"objective\", objective);\n  if (objective.find(\"quantile\") != std::string::npos) {\n    learner->SetParam(\"quantile_alpha\", \"0.5\");\n  }\n  if (objective.find(\"expectile\") != std::string::npos) {\n    learner->SetParam(\"expectile_alpha\", \"0.5\");\n  }\n  if (objective.find(\"multi\") != std::string::npos) {\n    learner->SetParam(\"num_class\", std::to_string(kClassesForTest));\n  }\n  learner->UpdateOneIter(0, dmat);\n  Json config{Object{}};\n  learner->SaveConfig(&config);\n\n  Json model{Object{}};\n  learner->SaveModel(&model);\n  return model;\n}\n\nvoid VerifyObjective(std::size_t rows, std::size_t cols,\n                     std::vector<float> const &expected_base_score, Json expected_model,\n                     std::string const &tree_method, std::string device,\n                     std::string const &objective) {\n  auto rank = collective::GetRank();\n  std::shared_ptr<DMatrix> dmat =\n      MakeFmatForObjTest(objective, rows, cols, kClassesForTest, rank == 0);\n  std::shared_ptr<DMatrix> sliced{dmat->SliceCol(collective::GetWorldSize(), rank)};\n\n  auto model = MakeModel(tree_method, device, objective, sliced);\n  auto base_score = GetBaseScore(model);\n  ASSERT_EQ(base_score, expected_base_score) << \" rank \" << rank;\n  ASSERT_EQ(model, expected_model) << \" rank \" << rank;\n}\n}  // namespace\n\nclass VerticalFederatedLearnerTest : public ::testing::TestWithParam<std::string> {\n  static int constexpr kWorldSize{3};\n\n protected:\n  void Run(std::string tree_method, std::string device, std::string objective) {\n    static auto constexpr kRows{16};\n    static auto constexpr kCols{16};\n\n    auto dmat = MakeFmatForObjTest(objective, kRows, kCols, kClassesForTest);\n    auto model = MakeModel(tree_method, device, objective, dmat);\n    auto score = GetBaseScore(model);\n    collective::TestFederatedGlobal(kWorldSize, [&]() {\n      VerifyObjective(kRows, kCols, score, model, tree_method, device, objective);\n    });\n  }\n};\n\nTEST_P(VerticalFederatedLearnerTest, Approx) {\n  std::string objective = GetParam();\n  this->Run(\"approx\", \"cpu\", objective);\n}\n\nTEST_P(VerticalFederatedLearnerTest, Hist) {\n  std::string objective = GetParam();\n  this->Run(\"hist\", \"cpu\", objective);\n}\n\n#if defined(XGBOOST_USE_CUDA)\nTEST_P(VerticalFederatedLearnerTest, GPUApprox) {\n  std::string objective = GetParam();\n  this->Run(\"approx\", \"cuda:0\", objective);\n}\n\nTEST_P(VerticalFederatedLearnerTest, GPUHist) {\n  std::string objective = GetParam();\n  this->Run(\"hist\", \"cuda:0\", objective);\n}\n#endif  // defined(XGBOOST_USE_CUDA)\n\nINSTANTIATE_TEST_SUITE_P(\n    FederatedLearnerObjective, VerticalFederatedLearnerTest,\n    ::testing::ValuesIn(MakeObjNamesForTest()),\n    [](const ::testing::TestParamInfo<VerticalFederatedLearnerTest::ParamType> &info) {\n      return ObjTestNameGenerator(info);\n    });\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/plugin/federated/test_federated_tracker.cc",
    "content": "/**\n * Copyright 2023-2024, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n\n#include <memory>  // for make_unique\n#include <string>  // for string\n\n#include \"../../../../src/collective/tracker.h\"  // for GetHostAddress\n#include \"federated_tracker.h\"\n#include \"xgboost/json.h\"  // for Json\n\nnamespace xgboost::collective {\nTEST(FederatedTrackerTest, Basic) {\n  Json config{Object()};\n  config[\"federated_secure\"] = Boolean{false};\n  config[\"n_workers\"] = Integer{3};\n\n  auto tracker = std::make_unique<FederatedTracker>(config);\n  ASSERT_FALSE(tracker->Ready());\n  auto fut = tracker->Run();\n  auto args = tracker->WorkerArgs();\n  ASSERT_TRUE(tracker->Ready());\n\n  ASSERT_GE(tracker->Port(), 1);\n  std::string host;\n  auto rc = GetHostAddress(&host);\n  ASSERT_EQ(get<String const>(args[\"dmlc_tracker_uri\"]), host);\n\n  rc = tracker->Shutdown();\n  SafeColl(rc);\n  SafeColl(fut.get());\n  ASSERT_FALSE(tracker->Ready());\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "tests/cpp/plugin/federated/test_worker.h",
    "content": "/**\n * Copyright 2022-2023, XGBoost contributors\n */\n#pragma once\n\n#include <gtest/gtest.h>\n\n#include <chrono>  // for ms, seconds\n#include <memory>  // for shared_ptr\n#include <thread>  // for thread\n\n#include \"../../../../plugin/federated/federated_tracker.h\"\n#include \"../../../../src/collective/comm_group.h\"\n#include \"../../../../src/collective/communicator-inl.h\"\n#include \"federated_comm.h\"  // for FederatedComm\n#include \"xgboost/json.h\"    // for Json\n\nnamespace xgboost::collective {\ninline Json FederatedTestConfig(std::int32_t n_workers, std::int32_t port, std::int32_t i) {\n  Json config{Object{}};\n  config[\"dmlc_communicator\"] = std::string{\"federated\"};\n  config[\"dmlc_task_id\"] = std::to_string(i);\n  config[\"dmlc_retry\"] = 2;\n  config[\"federated_world_size\"] = n_workers;\n  config[\"federated_rank\"] = i;\n  config[\"federated_server_address\"] = \"0.0.0.0:\" + std::to_string(port);\n  return config;\n}\n\ntemplate <typename WorkerFn>\nvoid TestFederatedImpl(std::int32_t n_workers, WorkerFn&& fn) {\n  Json config{Object()};\n  config[\"federated_secure\"] = Boolean{false};\n  config[\"n_workers\"] = Integer{n_workers};\n  FederatedTracker tracker{config};\n  auto fut = tracker.Run();\n\n  std::vector<std::thread> workers;\n  using namespace std::chrono_literals;\n  auto rc = tracker.WaitUntilReady();\n  SafeColl(rc);\n  std::int32_t port = tracker.Port();\n\n  for (std::int32_t i = 0; i < n_workers; ++i) {\n    workers.emplace_back([=] { fn(port, i); });\n  }\n\n  for (auto& t : workers) {\n    t.join();\n  }\n\n  rc = tracker.Shutdown();\n  SafeColl(rc);\n  SafeColl(fut.get());\n}\n\ntemplate <typename WorkerFn>\nvoid TestFederated(std::int32_t n_workers, WorkerFn&& fn) {\n  TestFederatedImpl(n_workers, [&](std::int32_t port, std::int32_t i) {\n    auto config = FederatedTestConfig(n_workers, port, i);\n    auto comm = std::make_shared<FederatedComm>(\n        DefaultRetry(), std::chrono::seconds{DefaultTimeoutSec()}, std::to_string(i), config);\n\n    fn(comm, i);\n  });\n}\n\ntemplate <typename WorkerFn>\nvoid TestFederatedGroup(std::int32_t n_workers, WorkerFn&& fn) {\n  TestFederatedImpl(n_workers, [&](std::int32_t port, std::int32_t i) {\n    auto config = FederatedTestConfig(n_workers, port, i);\n    std::shared_ptr<CommGroup> comm_group{CommGroup::Create(config)};\n    fn(comm_group, i);\n  });\n}\n\ntemplate <typename WorkerFn>\nvoid TestFederatedGlobal(std::int32_t n_workers, WorkerFn&& fn) {\n  TestFederatedImpl(n_workers, [&](std::int32_t port, std::int32_t i) {\n    auto config = FederatedTestConfig(n_workers, port, i);\n    collective::Init(config);\n    fn();\n    collective::Finalize();\n  });\n}\n}  // namespace xgboost::collective\n"
  },
  {
    "path": "tests/cpp/plugin/sycl_helpers.h",
    "content": "/*!\n * Copyright 2022-2024 XGBoost contributors\n */\n#pragma once\n\n#include \"../helpers.h\"\n#include \"../../plugin/sycl/device_manager.h\"\n#include \"../../plugin/sycl/data.h\"\n\nnamespace xgboost::sycl {\n\ntemplate<typename T, typename Fn>\nvoid TransformOnDeviceData(DeviceOrd device, T* device_data, size_t n_data, Fn&& fn) {\n  sycl::DeviceManager device_manager;\n  ::sycl::queue* qu = device_manager.GetQueue(device);\n\n  qu->submit([&](::sycl::handler& cgh) {\n    cgh.parallel_for<>(::sycl::range<1>(n_data), [=](::sycl::item<1> nid) {\n      const size_t i = nid.get_id(0);\n      device_data[i] = fn(device_data[i]);\n    });\n  }).wait();\n}\n\ntemplate<typename T>\nvoid VerifyOnDeviceData(DeviceOrd device, const T* device_data, const T* host_data, size_t n_data, T eps = T()) {\n  sycl::DeviceManager device_manager;\n  ::sycl::queue* qu = device_manager.GetQueue(device);\n\n  std::vector<T> copy_device_data(n_data);\n  qu->memcpy(copy_device_data.data(), device_data, n_data * sizeof(T)).wait();\n  for (size_t i = 0; i < n_data; ++i) {\n    EXPECT_NEAR(copy_device_data[i], host_data[i], eps);\n  }\n}\n\ntemplate<typename T, typename Container>\nvoid VerifySyclVector(const USMVector<T, MemoryType::shared>& sycl_vector,\n                      const Container& host_vector, T eps = T()) {\n  ASSERT_EQ(sycl_vector.Size(), host_vector.size());\n\n  size_t size = sycl_vector.Size();\n  for (size_t i = 0; i < size; ++i) {\n    EXPECT_NEAR(sycl_vector[i], host_vector[i], eps);\n  }\n}\n\ntemplate<typename T, typename Container>\nvoid VerifySyclVector(const std::vector<T>& sycl_vector,\n                      const Container& host_vector, T eps = T()) {\n  ASSERT_EQ(sycl_vector.size(), host_vector.size());\n\n  size_t size = sycl_vector.size();\n  for (size_t i = 0; i < size; ++i) {\n    EXPECT_NEAR(sycl_vector[i], host_vector[i], eps);\n  }\n}\n\n}  // namespace xgboost::sycl\n"
  },
  {
    "path": "tests/cpp/plugin/test_example_objective.cc",
    "content": "#include <gtest/gtest.h>\n#include <xgboost/objective.h>\n#include <string>\n#include \"../helpers.h\"\n\nnamespace xgboost {\nTEST(Plugin, ExampleObjective) {\n  xgboost::Context ctx = MakeCUDACtx(GPUIDX);\n  auto* obj = xgboost::ObjFunction::Create(\"mylogistic\", &ctx);\n  ASSERT_EQ(obj->DefaultEvalMetric(), std::string{\"logloss\"});\n  delete obj;\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/plugin/test_sycl_aft_obj.cc",
    "content": "/**\n * Copyright 2024 by XGBoost contributors\n */\n#include <gtest/gtest.h>\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wtautological-constant-compare\"\n#pragma GCC diagnostic ignored \"-W#pragma-messages\"\n#include <xgboost/objective.h>\n#pragma GCC diagnostic pop\n#include <xgboost/context.h>\n\n#include \"../helpers.h\"\n#include \"../objective/test_aft_obj.h\"\n\nnamespace xgboost::common {\nTEST(SyclObjective, DeclareUnifiedTest(AFTObjConfiguration)) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestAFTObjConfiguration(&ctx);\n}\n\nTEST(SyclObjective, DeclareUnifiedTest(AFTObjGPairUncensoredLabels)) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestAFTObjGPairUncensoredLabels(&ctx);\n}\n\nTEST(SyclObjective, DeclareUnifiedTest(AFTObjGPairLeftCensoredLabels)) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestAFTObjGPairLeftCensoredLabels(&ctx);\n}\n\nTEST(SyclObjective, DeclareUnifiedTest(AFTObjGPairRightCensoredLabels)) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestAFTObjGPairRightCensoredLabels(&ctx);\n}\n\nTEST(SyclObjective, DeclareUnifiedTest(AFTObjGPairIntervalCensoredLabels)) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestAFTObjGPairIntervalCensoredLabels(&ctx);\n}\n\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/plugin/test_sycl_ghist_builder.cc",
    "content": "/**\n * Copyright 2020-2024 by XGBoost contributors\n */\n#include <gtest/gtest.h>\n\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wtautological-constant-compare\"\n#pragma GCC diagnostic ignored \"-W#pragma-messages\"\n#include \"../../../src/data/gradient_index.h\"       // for GHistIndexMatrix\n#pragma GCC diagnostic pop\n\n#include \"../../../plugin/sycl/common/hist_util.h\"\n#include \"../../../plugin/sycl/device_manager.h\"\n#include \"../../../plugin/sycl/tree/hist_dispatcher.h\"\n#include \"sycl_helpers.h\"\n#include \"../helpers.h\"\n\nnamespace xgboost::sycl::common {\n\ntemplate <typename GradientSumT>\nvoid GHistBuilderTest(float sparsity, bool force_atomic_use) {\n  const size_t num_rows = 8;\n  const size_t num_columns = 1;\n  const int n_bins = 2;\n  const GradientSumT eps = 1e-6;\n\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n\n  DeviceManager device_manager;\n  auto qu = device_manager.GetQueue(ctx.Device());\n\n  auto p_fmat = RandomDataGenerator{num_rows, num_columns, sparsity}.GenerateDMatrix();\n  GHistIndexMatrix gmat_sycl;\n  gmat_sycl.Init(qu, &ctx, p_fmat.get(), n_bins);\n\n  xgboost::GHistIndexMatrix gmat{&ctx, p_fmat.get(), n_bins, 0.3, false};\n\n  RowSetCollection row_set_collection;\n  auto& row_indices = row_set_collection.Data();\n  row_indices.Resize(qu, num_rows);\n  size_t* p_row_indices = row_indices.Data();\n\n  qu->submit([&](::sycl::handler& cgh) {\n    cgh.parallel_for<>(::sycl::range<1>(num_rows),\n                       [p_row_indices](::sycl::item<1> pid) {\n      const size_t idx = pid.get_id(0);\n      p_row_indices[idx] = idx;\n    });\n  }).wait_and_throw();\n  row_set_collection.Init();\n\n  auto builder = GHistBuilder<GradientSumT>(qu, n_bins);\n\n  HostDeviceVector<GradientPair> gpair({\n      {0.1f, 0.2f}, {0.3f, 0.4f}, {0.5f, 0.6f}, {0.7f, 0.8f},\n      {0.9f, 0.1f}, {0.2f, 0.3f}, {0.4f, 0.5f}, {0.6f, 0.7f}},\n      ctx.Device());\n  CHECK_EQ(gpair.Size(), num_rows);\n\n  std::vector<GradientSumT> hist_host(2*n_bins);\n  GHistRow<GradientSumT, MemoryType::on_device> hist(qu, 2 * n_bins);\n  ::sycl::event event;\n\n  const size_t nblocks = 2;\n  GHistRow<GradientSumT, MemoryType::on_device> hist_buffer(qu, 2 * nblocks * n_bins);\n\n  InitHist(qu, &hist, hist.Size(), &event);\n  InitHist(qu, &hist_buffer, hist_buffer.Size(), &event);\n\n  DeviceProperties device_prop(qu->get_device());\n  event = builder.BuildHist(gpair, row_set_collection[0], gmat_sycl, &hist,\n                            sparsity < eps , &hist_buffer, device_prop, event, force_atomic_use);\n  qu->memcpy(hist_host.data(), hist.Data(),\n            2 * n_bins * sizeof(GradientSumT), event);\n  qu->wait_and_throw();\n\n  // Build hist on host to compare\n  std::vector<GradientSumT> hist_desired(2*n_bins);\n  for (size_t rid = 0; rid < num_rows; ++rid) {\n    const size_t ibegin = gmat.row_ptr[rid];\n    const size_t iend = gmat.row_ptr[rid + 1];\n    for (size_t i = ibegin; i < iend; ++i) {\n      const size_t bin_idx = gmat.index[i];\n      hist_desired[2*bin_idx]   += gpair.HostVector()[rid].GetGrad();\n      hist_desired[2*bin_idx+1] += gpair.HostVector()[rid].GetHess();\n    }\n  }\n\n  VerifySyclVector(hist_host, hist_desired, eps);\n}\n\ntemplate <typename GradientSumT>\nvoid GHistSubtractionTest() {\n  const size_t n_bins = 4;\n  using GHistType = GHistRow<GradientSumT, MemoryType::on_device>;\n\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n\n  DeviceManager device_manager;\n  auto qu = device_manager.GetQueue(ctx.Device());\n\n  ::sycl::event event;\n  std::vector<GradientSumT> hist1_host = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8};\n  GHistType hist1(qu, 2 * n_bins);\n  event = qu->memcpy(hist1.Data(), hist1_host.data(),\n                     2 * n_bins * sizeof(GradientSumT), event);\n\n  std::vector<GradientSumT> hist2_host = {0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1};\n  GHistType hist2(qu, 2 * n_bins);\n  event = qu->memcpy(hist2.Data(), hist2_host.data(),\n            2 * n_bins * sizeof(GradientSumT), event);\n\n  std::vector<GradientSumT> hist3_host(2 * n_bins);\n  GHistType hist3(qu, 2 * n_bins);\n  event = SubtractionHist(qu, &hist3, hist1, hist2, n_bins, event);\n  qu->memcpy(hist3_host.data(), hist3.Data(),\n            2 * n_bins * sizeof(GradientSumT), event);\n  qu->wait_and_throw();\n\n  std::vector<GradientSumT> hist3_desired(2 * n_bins);\n  for (size_t idx = 0; idx < 2 * n_bins; ++idx) {\n    hist3_desired[idx] = hist1_host[idx] - hist2_host[idx];\n  }\n\n  const GradientSumT eps = 1e-6;\n  VerifySyclVector(hist3_host, hist3_desired, eps);\n}\n\nTEST(SyclGHistBuilder, ByBlockDenseCase) {\n  GHistBuilderTest<float>(0.0, false);\n  GHistBuilderTest<double>(0.0, false);\n}\n\nTEST(SyclGHistBuilder, ByBlockSparseCase) {\n  GHistBuilderTest<float>(0.3, false);\n  GHistBuilderTest<double>(0.3, false);\n}\n\nTEST(SyclGHistBuilder, ByAtomicDenseCase) {\n  GHistBuilderTest<float>(0.0, true);\n  GHistBuilderTest<double>(0.0, true);\n}\n\nTEST(SyclGHistBuilder, ByAtomicSparseCase) {\n  GHistBuilderTest<float>(0.3, true);\n  GHistBuilderTest<double>(0.3, true);\n}\n\nTEST(SyclGHistBuilder, Subtraction) {\n  GHistSubtractionTest<float>();\n  GHistSubtractionTest<double>();\n}\n\n}  // namespace xgboost::sycl::common\n"
  },
  {
    "path": "tests/cpp/plugin/test_sycl_gradient_index.cc",
    "content": "/**\n * Copyright 2021-2024 by XGBoost contributors\n */\n\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wtautological-constant-compare\"\n#pragma GCC diagnostic ignored \"-W#pragma-messages\"\n#include \"../../../src/data/gradient_index.h\"       // for GHistIndexMatrix\n#pragma GCC diagnostic pop\n\n#include \"../../../plugin/sycl/data/gradient_index.h\"\n#include \"../../../plugin/sycl/device_manager.h\"\n#include \"sycl_helpers.h\"\n#include \"../helpers.h\"\n\nnamespace xgboost::sycl::data {\n\nTEST(SyclGradientIndex, Init) {\n  size_t n_rows = 128;\n  size_t n_columns = 7;\n\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n\n  DeviceManager device_manager;\n  auto qu = device_manager.GetQueue(ctx.Device());\n\n  auto p_fmat = RandomDataGenerator{n_rows, n_columns, 0.3}.GenerateDMatrix();\n  int max_bins = 256;\n  common::GHistIndexMatrix gmat_sycl;\n  gmat_sycl.Init(qu, &ctx, p_fmat.get(), max_bins);\n\n  xgboost::GHistIndexMatrix gmat{&ctx, p_fmat.get(), max_bins, 0.3, false};\n\n  {\n    ASSERT_EQ(gmat_sycl.max_num_bins, max_bins);\n    ASSERT_EQ(gmat_sycl.nfeatures, n_columns);\n  }\n\n  {\n    VerifySyclVector(gmat_sycl.hit_count.ConstHostVector(), gmat.hit_count);\n  }\n\n  {\n    std::vector<size_t> feature_count_sycl(n_columns, 0);\n    gmat_sycl.GetFeatureCounts(feature_count_sycl.data());\n\n    std::vector<size_t> feature_count(n_columns, 0);\n    gmat.GetFeatureCounts(feature_count.data());\n    VerifySyclVector(feature_count_sycl, feature_count);\n  }\n}\n\n}  // namespace xgboost::sycl::data\n"
  },
  {
    "path": "tests/cpp/plugin/test_sycl_hinge.cc",
    "content": "/**\n * Copyright 2024 by XGBoost contributors\n */\n#include <gtest/gtest.h>\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wtautological-constant-compare\"\n#pragma GCC diagnostic ignored \"-W#pragma-messages\"\n#include <xgboost/objective.h>\n#pragma GCC diagnostic pop\n#include <xgboost/context.h>\n\n#include \"../helpers.h\"\n#include \"../objective/test_hinge.h\"\n\nnamespace xgboost {\nTEST(SyclObjective, DeclareUnifiedTest(HingeObj)) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestHingeObj(&ctx);\n}\n\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/plugin/test_sycl_hist_updater.cc",
    "content": "/**\n * Copyright 2020-2024 by XGBoost contributors\n */\n#include <gtest/gtest.h>\n\n#include <oneapi/dpl/random>\n\n#include \"../../../plugin/sycl/tree/hist_updater.h\"\n#include \"../../../plugin/sycl/device_manager.h\"\n\n#include \"../../../src/tree/common_row_partitioner.h\"\n\n#include \"../helpers.h\"\n\nnamespace xgboost::sycl::tree {\n\n// Use this class to test the protected methods of HistUpdater\ntemplate <typename GradientSumT>\nclass TestHistUpdater : public HistUpdater<GradientSumT> {\n public:\n  TestHistUpdater(const Context* ctx,\n                  ::sycl::queue* qu,\n                  const xgboost::tree::TrainParam& param,\n                  FeatureInteractionConstraintHost int_constraints_,\n                  DMatrix const* fmat) : HistUpdater<GradientSumT>(ctx, qu, param,\n                                                                   int_constraints_, fmat) {}\n\n  void TestInitSampling(const HostDeviceVector<GradientPair>& gpair,\n                        USMVector<size_t, MemoryType::on_device>* row_indices) {\n    HistUpdater<GradientSumT>::InitSampling(gpair, row_indices);\n  }\n\n  auto* TestInitData(const common::GHistIndexMatrix& gmat,\n                     const HostDeviceVector<GradientPair>& gpair,\n                     const DMatrix& fmat,\n                     const RegTree& tree) {\n    HistUpdater<GradientSumT>::InitData(gmat, gpair, fmat, tree);\n    return &(HistUpdater<GradientSumT>::row_set_collection_);\n  }\n\n  const auto* TestBuildHistogramsLossGuide(ExpandEntry entry,\n                                    const common::GHistIndexMatrix &gmat,\n                                    RegTree *p_tree,\n                                    const HostDeviceVector<GradientPair>& gpair) {\n    HistUpdater<GradientSumT>::BuildHistogramsLossGuide(entry, gmat, p_tree, gpair);\n    return &(HistUpdater<GradientSumT>::hist_);\n  }\n\n  auto TestInitNewNode(int nid,\n                       const common::GHistIndexMatrix& gmat,\n                       const HostDeviceVector<GradientPair>& gpair,\n                       const RegTree& tree) {\n    HistUpdater<GradientSumT>::InitNewNode(nid, gmat, gpair, tree);\n    return HistUpdater<GradientSumT>::snode_host_[nid];\n  }\n\n  auto TestEvaluateSplits(const std::vector<ExpandEntry>& nodes_set,\n                          const common::GHistIndexMatrix& gmat,\n                          const RegTree& tree) {\n    HistUpdater<GradientSumT>::EvaluateSplits(nodes_set, gmat, tree);\n    return HistUpdater<GradientSumT>::snode_host_;\n  }\n\n  void TestApplySplit(const std::vector<ExpandEntry> nodes,\n                      const common::GHistIndexMatrix& gmat,\n                      RegTree* p_tree) {\n    HistUpdater<GradientSumT>::ApplySplit(nodes, gmat, p_tree);\n  }\n\n  auto TestExpandWithLossGuide(const common::GHistIndexMatrix& gmat,\n                               DMatrix *p_fmat,\n                               RegTree* p_tree,\n                               const HostDeviceVector<GradientPair>& gpair) {\n    HistUpdater<GradientSumT>::ExpandWithLossGuide(gmat, p_tree, gpair);\n  }\n\n  auto TestExpandWithDepthWise(const common::GHistIndexMatrix& gmat,\n                               DMatrix *p_fmat,\n                               RegTree* p_tree,\n                               const HostDeviceVector<GradientPair>& gpair) {\n    HistUpdater<GradientSumT>::ExpandWithDepthWise(gmat, p_tree, gpair);\n  }\n};\n\nvoid GenerateRandomGPairs(::sycl::queue* qu, GradientPair* gpair_ptr, size_t num_rows, bool has_neg_hess) {\n  qu->submit([&](::sycl::handler& cgh) {\n    cgh.parallel_for<>(::sycl::range<1>(::sycl::range<1>(num_rows)),\n                                        [=](::sycl::item<1> pid) {\n      uint64_t i = pid.get_linear_id();\n\n      constexpr uint32_t seed = 777;\n      oneapi::dpl::minstd_rand engine(seed, i);\n      GradientPair::ValueT smallest_hess_val = has_neg_hess ? -1. : 0.;\n      oneapi::dpl::uniform_real_distribution<GradientPair::ValueT> distr(smallest_hess_val, 1.);\n      gpair_ptr[i] = {distr(engine), distr(engine)};\n    });\n  });\n  qu->wait();\n}\n\ntemplate <typename GradientSumT>\nvoid TestHistUpdaterSampling(const xgboost::tree::TrainParam& param) {\n  const size_t num_rows = 1u << 12;\n  const size_t num_columns = 1;\n\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n\n  DeviceManager device_manager;\n  auto qu = device_manager.GetQueue(ctx.Device());\n\n  auto p_fmat = RandomDataGenerator{num_rows, num_columns, 0.0}.GenerateDMatrix();\n\n  FeatureInteractionConstraintHost int_constraints;\n\n  TestHistUpdater<GradientSumT> updater(&ctx, qu, param, int_constraints, p_fmat.get());\n\n  USMVector<size_t, MemoryType::on_device> row_indices_0(qu, num_rows);\n  USMVector<size_t, MemoryType::on_device> row_indices_1(qu, num_rows);\n  HostDeviceVector<GradientPair> gpair(num_rows, {0, 0}, ctx.Device());\n  GenerateRandomGPairs(qu, gpair.DevicePointer(), num_rows, true);\n\n  updater.TestInitSampling(gpair, &row_indices_0);\n  \n  size_t n_samples = row_indices_0.Size();\n  // Half of gpairs have neg hess\n  ASSERT_LT(n_samples, num_rows * 0.5 * param.subsample * 1.2);\n  ASSERT_GT(n_samples, num_rows * 0.5 * param.subsample / 1.2);\n\n  // Check if two lanunches generate different realisations:\n  updater.TestInitSampling(gpair, &row_indices_1);\n  if (row_indices_1.Size() == n_samples) {\n    std::vector<size_t> row_indices_0_host(n_samples);\n    std::vector<size_t> row_indices_1_host(n_samples);\n    qu->memcpy(row_indices_0_host.data(), row_indices_0.Data(), n_samples * sizeof(size_t)).wait();\n    qu->memcpy(row_indices_1_host.data(), row_indices_1.Data(), n_samples * sizeof(size_t)).wait();\n\n    // The order in row_indices_0 and row_indices_1 can be different\n    std::set<size_t> rows;\n    for (auto row : row_indices_0_host) {\n      rows.insert(row);\n    }\n\n    size_t num_diffs = 0;\n    for (auto row : row_indices_1_host) {\n      if (rows.count(row) == 0) num_diffs++;\n    }\n\n    ASSERT_NE(num_diffs, 0);\n  }\n}\n\ntemplate <typename GradientSumT>\nvoid TestHistUpdaterInitData(const xgboost::tree::TrainParam& param, bool has_neg_hess) {\n  const size_t num_rows = 1u << 8;\n  const size_t num_columns = 1;\n  const size_t n_bins = 32;\n\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n\n  DeviceManager device_manager;\n  auto qu = device_manager.GetQueue(ctx.Device());\n\n  auto p_fmat = RandomDataGenerator{num_rows, num_columns, 0.0}.GenerateDMatrix();\n\n  FeatureInteractionConstraintHost int_constraints;\n\n  TestHistUpdater<GradientSumT> updater(&ctx, qu, param, int_constraints, p_fmat.get());\n\n  HostDeviceVector<GradientPair> gpair(num_rows, {0, 0}, ctx.Device());\n  GenerateRandomGPairs(qu, gpair.DevicePointer(), num_rows, has_neg_hess);\n\n  common::GHistIndexMatrix gmat;\n  gmat.Init(qu, &ctx, p_fmat.get(), n_bins);\n  RegTree tree;\n\n  auto* row_set_collection = updater.TestInitData(gmat, gpair, *p_fmat, tree);\n  auto& row_indices = row_set_collection->Data();\n\n  std::vector<size_t> row_indices_host(row_indices.Size());\n  qu->memcpy(row_indices_host.data(), row_indices.DataConst(), row_indices.Size()*sizeof(size_t)).wait();\n\n  if (!has_neg_hess) {\n    for (size_t i = 0; i < num_rows; ++i) {\n      ASSERT_EQ(row_indices_host[i], i);\n    }\n  } else {\n    std::set<size_t> rows;\n    for (size_t i = 0; i < num_rows; ++i) {\n      if (gpair.HostVector()[i].GetHess() >= 0.0f) {\n        rows.insert(i);\n      }\n    }\n    ASSERT_EQ(rows.size(), row_indices_host.size());\n    for (size_t row_idx : row_indices_host) {\n      ASSERT_EQ(rows.count(row_idx), 1);\n    }\n  }\n}\n\ntemplate <typename GradientSumT>\nvoid TestHistUpdaterBuildHistogramsLossGuide(const xgboost::tree::TrainParam& param, float sparsity) {\n  const size_t num_rows = 1u << 8;\n  const size_t num_columns = 1;\n  const size_t n_bins = 32;\n\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n\n  DeviceManager device_manager;\n  auto qu = device_manager.GetQueue(ctx.Device());\n\n  auto p_fmat = RandomDataGenerator{num_rows, num_columns, sparsity}.GenerateDMatrix();\n\n  FeatureInteractionConstraintHost int_constraints;\n\n  TestHistUpdater<GradientSumT> updater(&ctx, qu, param, int_constraints, p_fmat.get());\n  updater.SetHistSynchronizer(new BatchHistSynchronizer<GradientSumT>());\n  updater.SetHistRowsAdder(new BatchHistRowsAdder<GradientSumT>());\n\n  HostDeviceVector<GradientPair> gpair(num_rows, {0, 0}, ctx.Device());\n  GenerateRandomGPairs(qu, gpair.DevicePointer(), num_rows, false);\n\n  common::GHistIndexMatrix gmat;\n  gmat.Init(qu, &ctx, p_fmat.get(), n_bins);\n\n  RegTree tree;\n  tree.ExpandNode(0, 0, 0, false, 0, 0, 0, 0, 0, 0, 0);\n  tree.ExpandNode(tree[0].LeftChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0);\n  tree.ExpandNode(tree[0].RightChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0);\n\n  ExpandEntry node0(0, tree.GetDepth(0));\n  ExpandEntry node1(1, tree.GetDepth(1));\n  ExpandEntry node2(2, tree.GetDepth(2));\n\n  auto* row_set_collection = updater.TestInitData(gmat, gpair, *p_fmat, tree);\n  row_set_collection->AddSplit(0, 1, 2, 42, num_rows - 42);\n\n  updater.TestBuildHistogramsLossGuide(node0, gmat, &tree, gpair);\n  const auto* hist = updater.TestBuildHistogramsLossGuide(node1, gmat, &tree, gpair);\n\n  ASSERT_EQ((*hist)[0].Size(), n_bins);\n  ASSERT_EQ((*hist)[1].Size(), n_bins);\n  ASSERT_EQ((*hist)[2].Size(), n_bins);\n\n  std::vector<xgboost::detail::GradientPairInternal<GradientSumT>> hist0_host(n_bins);\n  std::vector<xgboost::detail::GradientPairInternal<GradientSumT>> hist1_host(n_bins);\n  std::vector<xgboost::detail::GradientPairInternal<GradientSumT>> hist2_host(n_bins);\n  qu->memcpy(hist0_host.data(), (*hist)[0].DataConst(), sizeof(xgboost::detail::GradientPairInternal<GradientSumT>) * n_bins);\n  qu->memcpy(hist1_host.data(), (*hist)[1].DataConst(), sizeof(xgboost::detail::GradientPairInternal<GradientSumT>) * n_bins);\n  qu->memcpy(hist2_host.data(), (*hist)[2].DataConst(), sizeof(xgboost::detail::GradientPairInternal<GradientSumT>) * n_bins);\n  qu->wait();\n\n  for (size_t idx_bin = 0; idx_bin < n_bins; ++idx_bin) {\n    EXPECT_NEAR(hist0_host[idx_bin].GetGrad(), hist1_host[idx_bin].GetGrad() + hist2_host[idx_bin].GetGrad(), 1e-6);\n    EXPECT_NEAR(hist0_host[idx_bin].GetHess(), hist1_host[idx_bin].GetHess() + hist2_host[idx_bin].GetHess(), 1e-6);\n  }\n}\n\ntemplate <typename GradientSumT>\nvoid TestHistUpdaterInitNewNode(const xgboost::tree::TrainParam& param, float sparsity) {\n  const size_t num_rows = 1u << 8;\n  const size_t num_columns = 1;\n  const size_t n_bins = 32;\n\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n\n  DeviceManager device_manager;\n  auto qu = device_manager.GetQueue(ctx.Device());\n\n  auto p_fmat = RandomDataGenerator{num_rows, num_columns, sparsity}.GenerateDMatrix();\n\n  FeatureInteractionConstraintHost int_constraints;\n\n  TestHistUpdater<GradientSumT> updater(&ctx, qu, param, int_constraints, p_fmat.get());\n  updater.SetHistSynchronizer(new BatchHistSynchronizer<GradientSumT>());\n  updater.SetHistRowsAdder(new BatchHistRowsAdder<GradientSumT>());\n\n  HostDeviceVector<GradientPair> gpair(num_rows, {0, 0}, ctx.Device());\n  auto* gpair_ptr = gpair.DevicePointer();\n  GenerateRandomGPairs(qu, gpair_ptr, num_rows, false);\n\n  common::GHistIndexMatrix gmat;\n  gmat.Init(qu, &ctx, p_fmat.get(), n_bins);\n\n  RegTree tree;\n  tree.ExpandNode(0, 0, 0, false, 0, 0, 0, 0, 0, 0, 0);\n  ExpandEntry node(ExpandEntry::kRootNid, tree.GetDepth(ExpandEntry::kRootNid));\n\n  auto* row_set_collection = updater.TestInitData(gmat, gpair, *p_fmat, tree);\n  auto& row_idxs = row_set_collection->Data();\n  const size_t* row_idxs_ptr = row_idxs.DataConst();\n  updater.TestBuildHistogramsLossGuide(node, gmat, &tree, gpair);\n  const auto snode = updater.TestInitNewNode(ExpandEntry::kRootNid, gmat, gpair, tree);\n\n  GradStats<GradientSumT> grad_stat;\n  {\n    ::sycl::buffer<GradStats<GradientSumT>> buff(&grad_stat, 1);\n    qu->submit([&](::sycl::handler& cgh) {\n      auto buff_acc  = buff.template get_access<::sycl::access::mode::read_write>(cgh);\n      cgh.single_task<>([=]() {\n        for (size_t i = 0; i < num_rows; ++i) {\n          size_t row_idx = row_idxs_ptr[i];\n          buff_acc[0] += GradStats<GradientSumT>(gpair_ptr[row_idx].GetGrad(),\n                                                 gpair_ptr[row_idx].GetHess());\n        }\n      });\n    }).wait_and_throw();\n  }\n\n  EXPECT_NEAR(snode.stats.GetGrad(), grad_stat.GetGrad(), 1e-6 * grad_stat.GetGrad());\n  EXPECT_NEAR(snode.stats.GetHess(), grad_stat.GetHess(), 1e-6 * grad_stat.GetHess());\n}\n\ntemplate <typename GradientSumT>\nvoid TestHistUpdaterEvaluateSplits(const xgboost::tree::TrainParam& param) {\n  const size_t num_rows = 1u << 8;\n  const size_t num_columns = 2;\n  const size_t n_bins = 32;\n\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n\n  DeviceManager device_manager;\n  auto qu = device_manager.GetQueue(ctx.Device());\n\n  auto p_fmat = RandomDataGenerator{num_rows, num_columns, 0.0f}.GenerateDMatrix();\n\n  FeatureInteractionConstraintHost int_constraints;\n\n  TestHistUpdater<GradientSumT> updater(&ctx, qu, param, int_constraints, p_fmat.get());\n  updater.SetHistSynchronizer(new BatchHistSynchronizer<GradientSumT>());\n  updater.SetHistRowsAdder(new BatchHistRowsAdder<GradientSumT>());\n\n  HostDeviceVector<GradientPair> gpair(num_rows, {0, 0}, ctx.Device());\n  auto* gpair_ptr = gpair.DevicePointer();\n  GenerateRandomGPairs(qu, gpair_ptr, num_rows, false);\n\n  common::GHistIndexMatrix gmat;\n  gmat.Init(qu, &ctx, p_fmat.get(), n_bins);\n\n  RegTree tree;\n  tree.ExpandNode(0, 0, 0, false, 0, 0, 0, 0, 0, 0, 0);\n  ExpandEntry node(ExpandEntry::kRootNid, tree.GetDepth(ExpandEntry::kRootNid));\n\n  auto* row_set_collection = updater.TestInitData(gmat, gpair, *p_fmat, tree);\n  auto& row_idxs = row_set_collection->Data();\n  const size_t* row_idxs_ptr = row_idxs.DataConst();\n  const auto* hist = updater.TestBuildHistogramsLossGuide(node, gmat, &tree, gpair);\n  const auto snode_init = updater.TestInitNewNode(ExpandEntry::kRootNid, gmat, gpair, tree);\n\n  const auto snode_updated = updater.TestEvaluateSplits({node}, gmat, tree);\n  auto best_loss_chg = snode_updated[0].best.loss_chg;\n  auto stats = snode_init.stats;\n  auto root_gain = snode_init.root_gain;\n\n  // Check all splits manually. Save the best one and compare with the ans\n  TreeEvaluator<GradientSumT> tree_evaluator(qu, param, num_columns);\n  auto evaluator = tree_evaluator.GetEvaluator();\n  const uint32_t* cut_ptr = gmat.cut.cut_ptrs_.ConstDevicePointer();\n  const size_t size = gmat.cut.cut_ptrs_.Size();\n  int n_better_splits = 0;\n  const auto* hist_ptr = (*hist)[0].DataConst();\n  std::vector<bst_float> best_loss_chg_des(1, -1);\n  {\n    ::sycl::buffer<bst_float> best_loss_chg_buff(best_loss_chg_des.data(), 1);\n    qu->submit([&](::sycl::handler& cgh) {\n      auto best_loss_chg_acc = best_loss_chg_buff.template get_access<::sycl::access::mode::read_write>(cgh);\n      cgh.single_task<>([=]() {\n        for (size_t i = 1; i < size; ++i) {\n          GradStats<GradientSumT> left(0, 0);\n          GradStats<GradientSumT> right = stats - left;\n          for (size_t j = cut_ptr[i-1]; j < cut_ptr[i]; ++j) {\n            auto loss_change = evaluator.CalcSplitGain(0, i - 1, left, right) - root_gain;\n            if (loss_change > best_loss_chg_acc[0]) {\n              best_loss_chg_acc[0] = loss_change;\n            }\n            left.Add(hist_ptr[j].GetGrad(), hist_ptr[j].GetHess());\n            right = stats - left;\n          }\n        }\n      });\n    }).wait();\n  }\n\n  ASSERT_NEAR(best_loss_chg_des[0], best_loss_chg, 1e-4);\n}\n\ntemplate <typename GradientSumT>\nvoid TestHistUpdaterApplySplit(const xgboost::tree::TrainParam& param, float sparsity, int max_bins) {\n  const size_t num_rows = 1024;\n  const size_t num_columns = 2;\n\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n\n  DeviceManager device_manager;\n  auto qu = device_manager.GetQueue(ctx.Device());\n\n  auto p_fmat = RandomDataGenerator{num_rows, num_columns, sparsity}.GenerateDMatrix();\n  common::GHistIndexMatrix gmat;\n  gmat.Init(qu, &ctx, p_fmat.get(), max_bins);\n\n  RegTree tree;\n  tree.ExpandNode(0, 0, 0, false, 0, 0, 0, 0, 0, 0, 0);\n\n  std::vector<tree::ExpandEntry> nodes;\n  nodes.emplace_back(tree::ExpandEntry(0, tree.GetDepth(0)));\n\n  FeatureInteractionConstraintHost int_constraints;\n  TestHistUpdater<GradientSumT> updater(&ctx, qu, param, int_constraints, p_fmat.get());\n  HostDeviceVector<GradientPair> gpair(num_rows, {0, 0}, ctx.Device());\n  auto* gpair_ptr = gpair.DevicePointer();\n  GenerateRandomGPairs(qu, gpair_ptr, num_rows, false);\n\n  auto* row_set_collection = updater.TestInitData(gmat, gpair, *p_fmat, tree);\n  updater.TestApplySplit(nodes, gmat, &tree);\n\n  // Copy indexes to host\n  std::vector<size_t> row_indices_host(num_rows);\n  qu->memcpy(row_indices_host.data(), row_set_collection->Data().Data(), sizeof(size_t)*num_rows).wait();\n\n  // Reference Implementation\n  std::vector<size_t> row_indices_desired_host(num_rows);\n  size_t n_left, n_right;\n  {\n    TestHistUpdater<GradientSumT> updater4verification(&ctx, qu, param, int_constraints, p_fmat.get());\n    auto* row_set_collection4verification = updater4verification.TestInitData(gmat, gpair, *p_fmat, tree);\n\n    size_t n_nodes = nodes.size();\n    std::vector<int32_t> split_conditions(n_nodes);\n    xgboost::tree::CommonRowPartitioner::FindSplitConditions(nodes, tree.HostScView(), gmat,\n                                                             &split_conditions);\n\n    common::PartitionBuilder partition_builder;\n    partition_builder.Init(qu, n_nodes, [&](size_t node_in_set) {\n      const int32_t nid = nodes[node_in_set].nid;\n      return (*row_set_collection4verification)[nid].Size();\n    });\n\n    ::sycl::event event;\n    partition_builder.Partition(gmat, nodes, (*row_set_collection4verification),\n                                split_conditions, &tree, &event);\n    qu->wait_and_throw();\n\n    for (size_t node_in_set = 0; node_in_set < n_nodes; node_in_set++) {\n      const int32_t nid = nodes[node_in_set].nid;\n      size_t* data_result = const_cast<size_t*>((*row_set_collection4verification)[nid].begin);\n      partition_builder.MergeToArray(node_in_set, data_result, &event);\n    }\n    qu->wait_and_throw();\n\n    const int32_t nid = nodes[0].nid;\n    n_left = partition_builder.GetNLeftElems(0);\n    n_right = partition_builder.GetNRightElems(0);\n\n    row_set_collection4verification->AddSplit(nid, tree[nid].LeftChild(),\n        tree[nid].RightChild(), n_left, n_right);\n\n    qu->memcpy(row_indices_desired_host.data(), row_set_collection4verification->Data().Data(), sizeof(size_t)*num_rows).wait();\n  }\n\n  std::sort(row_indices_desired_host.begin(), row_indices_desired_host.begin() + n_left);\n  std::sort(row_indices_host.begin(), row_indices_host.begin() + n_left);\n  std::sort(row_indices_desired_host.begin() + n_left, row_indices_desired_host.end());\n  std::sort(row_indices_host.begin() + n_left, row_indices_host.end());\n\n  for (size_t row = 0; row < num_rows; ++row) {\n    ASSERT_EQ(row_indices_desired_host[row], row_indices_host[row]);\n  }\n}\n\ntemplate <typename GradientSumT>\nvoid TestHistUpdaterExpandWithLossGuide(const xgboost::tree::TrainParam& param) {\n  const size_t num_rows = 3;\n  const size_t num_columns = 1;\n  const size_t n_bins = 16;\n\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n\n  DeviceManager device_manager;\n  auto qu = device_manager.GetQueue(ctx.Device());\n\n  std::vector<float> data = {7, 3, 15};\n  auto p_fmat = GetDMatrixFromData(data, num_rows, num_columns);\n  common::GHistIndexMatrix gmat;\n  gmat.Init(qu, &ctx, p_fmat.get(), n_bins);\n\n  HostDeviceVector<GradientPair> gpair({{1, 2}, {3, 1}, {1, 1}}, ctx.Device());\n\n  RegTree tree;\n  FeatureInteractionConstraintHost int_constraints;\n  TestHistUpdater<GradientSumT> updater(&ctx, qu, param, int_constraints, p_fmat.get());\n  updater.SetHistSynchronizer(new BatchHistSynchronizer<GradientSumT>());\n  updater.SetHistRowsAdder(new BatchHistRowsAdder<GradientSumT>());\n  auto* row_set_collection = updater.TestInitData(gmat, gpair, *p_fmat, tree);\n\n  updater.TestExpandWithLossGuide(gmat, p_fmat.get(), &tree, gpair);\n\n  const auto& nodes = tree.GetNodes(DeviceOrd::CPU());\n  std::vector<float> ans(data.size());\n  for (size_t data_idx = 0; data_idx < data.size(); ++data_idx) {\n      size_t node_idx = 0;\n      while (!nodes[node_idx].IsLeaf()) {\n        node_idx = data[data_idx] < nodes[node_idx].SplitCond() ? nodes[node_idx].LeftChild() : nodes[node_idx].RightChild();\n      }\n      ans[data_idx] = nodes[node_idx].LeafValue();\n  }\n\n  ASSERT_NEAR(ans[0], -0.15, 1e-6);\n  ASSERT_NEAR(ans[1], -0.45, 1e-6);\n  ASSERT_NEAR(ans[2], -0.15, 1e-6);\n}\n\n\ntemplate <typename GradientSumT>\nvoid TestHistUpdaterExpandWithDepthWise(const xgboost::tree::TrainParam& param) {\n  const size_t num_rows = 3;\n  const size_t num_columns = 1;\n  const size_t n_bins = 16;\n\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n\n  DeviceManager device_manager;\n  auto qu = device_manager.GetQueue(ctx.Device());\n\n  std::vector<float> data = {7, 3, 15};\n  auto p_fmat = GetDMatrixFromData(data, num_rows, num_columns);\n  common::GHistIndexMatrix gmat;\n  gmat.Init(qu, &ctx, p_fmat.get(), n_bins);\n\n  HostDeviceVector<GradientPair> gpair({{1, 2}, {3, 1}, {1, 1}}, ctx.Device());\n\n  RegTree tree;\n  FeatureInteractionConstraintHost int_constraints;\n  TestHistUpdater<GradientSumT> updater(&ctx, qu, param, int_constraints, p_fmat.get());\n  updater.SetHistSynchronizer(new BatchHistSynchronizer<GradientSumT>());\n  updater.SetHistRowsAdder(new BatchHistRowsAdder<GradientSumT>());\n  auto* row_set_collection = updater.TestInitData(gmat, gpair, *p_fmat, tree);\n\n  updater.TestExpandWithDepthWise(gmat, p_fmat.get(), &tree, gpair);\n\n  const auto& nodes = tree.GetNodes(DeviceOrd::CPU());\n  std::vector<float> ans(data.size());\n  for (size_t data_idx = 0; data_idx < data.size(); ++data_idx) {\n      size_t node_idx = 0;\n      while (!nodes[node_idx].IsLeaf()) {\n        node_idx = data[data_idx] < nodes[node_idx].SplitCond() ? nodes[node_idx].LeftChild() : nodes[node_idx].RightChild();\n      }\n      ans[data_idx] = nodes[node_idx].LeafValue();\n  }\n\n  ASSERT_NEAR(ans[0], -0.15, 1e-6);\n  ASSERT_NEAR(ans[1], -0.45, 1e-6);\n  ASSERT_NEAR(ans[2], -0.15, 1e-6);\n}\n\nTEST(SyclHistUpdater, Sampling) {\n  xgboost::tree::TrainParam param;\n  param.UpdateAllowUnknown(Args{{\"subsample\", \"0.7\"}});\n\n  TestHistUpdaterSampling<float>(param);\n  TestHistUpdaterSampling<double>(param);\n}\n\nTEST(SyclHistUpdater, InitData) {\n  xgboost::tree::TrainParam param;\n  param.UpdateAllowUnknown(Args{{\"subsample\", \"1\"}});\n\n  TestHistUpdaterInitData<float>(param, true);\n  TestHistUpdaterInitData<float>(param, false);\n\n  TestHistUpdaterInitData<double>(param, true);\n  TestHistUpdaterInitData<double>(param, false);\n}\n\nTEST(SyclHistUpdater, BuildHistogramsLossGuide) {\n  xgboost::tree::TrainParam param;\n  param.UpdateAllowUnknown(Args{{\"max_depth\", \"3\"}});\n\n  TestHistUpdaterBuildHistogramsLossGuide<float>(param, 0.0);\n  TestHistUpdaterBuildHistogramsLossGuide<float>(param, 0.5);\n  TestHistUpdaterBuildHistogramsLossGuide<double>(param, 0.0);\n  TestHistUpdaterBuildHistogramsLossGuide<double>(param, 0.5);\n}\n\nTEST(SyclHistUpdater, InitNewNode) {\n  xgboost::tree::TrainParam param;\n  param.UpdateAllowUnknown(Args{{\"max_depth\", \"3\"}});\n\n  TestHistUpdaterInitNewNode<float>(param, 0.0);\n  TestHistUpdaterInitNewNode<float>(param, 0.5);\n  TestHistUpdaterInitNewNode<double>(param, 0.0);\n  TestHistUpdaterInitNewNode<double>(param, 0.5);\n}\n\nTEST(SyclHistUpdater, EvaluateSplits) {\n  xgboost::tree::TrainParam param;\n  param.UpdateAllowUnknown(Args{{\"max_depth\", \"3\"}});\n\n  TestHistUpdaterEvaluateSplits<float>(param);\n  TestHistUpdaterEvaluateSplits<double>(param);\n}\n\nTEST(SyclHistUpdater, ApplySplitSparce) {\n  xgboost::tree::TrainParam param;\n  param.UpdateAllowUnknown(Args{{\"max_depth\", \"3\"}});\n\n  TestHistUpdaterApplySplit<float>(param, 0.3, 256);\n  TestHistUpdaterApplySplit<double>(param, 0.3, 256);\n}\n\nTEST(SyclHistUpdater, ApplySplitDence) {\n  xgboost::tree::TrainParam param;\n  param.UpdateAllowUnknown(Args{{\"max_depth\", \"3\"}});\n\n  TestHistUpdaterApplySplit<float>(param, 0.0, 256);\n  TestHistUpdaterApplySplit<float>(param, 0.0, 256+1);\n  TestHistUpdaterApplySplit<float>(param, 0.0, (1u << 16) + 1);\n  TestHistUpdaterApplySplit<double>(param, 0.0, 256);\n  TestHistUpdaterApplySplit<double>(param, 0.0, 256+1);\n  TestHistUpdaterApplySplit<double>(param, 0.0, (1u << 16) + 1);\n}\n\nTEST(SyclHistUpdater, ExpandWithLossGuide) {\n  xgboost::tree::TrainParam param;\n  param.UpdateAllowUnknown(Args{{\"max_depth\", \"2\"},\n                                {\"grow_policy\", \"lossguide\"}});\n\n  TestHistUpdaterExpandWithLossGuide<float>(param);\n  TestHistUpdaterExpandWithLossGuide<double>(param);\n}\n\nTEST(SyclHistUpdater, ExpandWithDepthWise) {\n  xgboost::tree::TrainParam param;\n  param.UpdateAllowUnknown(Args{{\"max_depth\", \"2\"}});\n\n  TestHistUpdaterExpandWithDepthWise<float>(param);\n  TestHistUpdaterExpandWithDepthWise<double>(param);\n}\n\n}  // namespace xgboost::sycl::tree\n"
  },
  {
    "path": "tests/cpp/plugin/test_sycl_host_device_vector.cc",
    "content": "/**\n * Copyright 2018-2024, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <numeric>\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-W#pragma-messages\"\n#pragma GCC diagnostic ignored \"-Wtautological-constant-compare\"\n#include <xgboost/host_device_vector.h>\n#pragma GCC diagnostic pop\n\n#include \"sycl_helpers.h\"\n\nnamespace xgboost::common {\nnamespace {\n\nvoid InitHostDeviceVector(size_t n, DeviceOrd device, HostDeviceVector<int> *v) {\n  // create the vector\n  v->SetDevice(device);\n  v->Resize(n);\n\n  ASSERT_EQ(v->Size(), n);\n  ASSERT_EQ(v->Device(), device);\n  // ensure that the device have read-write access\n  ASSERT_TRUE(v->DeviceCanRead());\n  ASSERT_TRUE(v->DeviceCanWrite());\n  // ensure that the host has no access\n  ASSERT_FALSE(v->HostCanRead());\n  ASSERT_FALSE(v->HostCanWrite());\n\n  // fill in the data on the host\n  std::vector<int>& data_h = v->HostVector();\n  // ensure that the host has full access, while the device have none\n  ASSERT_TRUE(v->HostCanRead());\n  ASSERT_TRUE(v->HostCanWrite());\n  ASSERT_FALSE(v->DeviceCanRead());\n  ASSERT_FALSE(v->DeviceCanWrite());\n  ASSERT_EQ(data_h.size(), n);\n  std::iota(data_h.begin(), data_h.end(), 0);\n}\n\nvoid PlusOne(HostDeviceVector<int> *v) {\n  auto device = v->Device();\n  sycl::TransformOnDeviceData(v->Device(), v->DevicePointer(), v->Size(), [=](size_t a){ return a + 1; });\n  ASSERT_TRUE(v->DeviceCanWrite());\n}\n\nvoid CheckDevice(HostDeviceVector<int>* v,\n                 size_t size,\n                 unsigned int first,\n                 GPUAccess access) {\n  ASSERT_EQ(v->Size(), size);\n\n  std::vector<int> desired_data(size);\n  std::iota(desired_data.begin(), desired_data.end(), first);\n  sycl::VerifyOnDeviceData(v->Device(), v->ConstDevicePointer(), desired_data.data(), size);\n  ASSERT_TRUE(v->DeviceCanRead());\n  // ensure that the device has at most the access specified by access\n  ASSERT_EQ(v->DeviceCanWrite(), access == GPUAccess::kWrite);\n  ASSERT_EQ(v->HostCanRead(), access == GPUAccess::kRead);\n  ASSERT_FALSE(v->HostCanWrite());\n\n  sycl::VerifyOnDeviceData(v->Device(), v->DevicePointer(), desired_data.data(), size);\n  ASSERT_TRUE(v->DeviceCanRead());\n  ASSERT_TRUE(v->DeviceCanWrite());\n  ASSERT_FALSE(v->HostCanRead());\n  ASSERT_FALSE(v->HostCanWrite());\n}\n\nvoid CheckHost(HostDeviceVector<int> *v, GPUAccess access) {\n  const std::vector<int>& data_h = access == GPUAccess::kNone ?\n    v->HostVector() : v->ConstHostVector();\n  for (size_t i = 0; i < v->Size(); ++i) {\n    ASSERT_EQ(data_h.at(i), i + 1);\n  }\n  ASSERT_TRUE(v->HostCanRead());\n  ASSERT_EQ(v->HostCanWrite(), access == GPUAccess::kNone);\n  ASSERT_EQ(v->DeviceCanRead(), access == GPUAccess::kRead);\n  // the devices should have no write access\n  ASSERT_FALSE(v->DeviceCanWrite());\n}\n\nvoid TestHostDeviceVector(size_t n, DeviceOrd device) {\n  HostDeviceVector<int> v;\n  InitHostDeviceVector(n, device, &v);\n  CheckDevice(&v, n, 0, GPUAccess::kRead);\n  PlusOne(&v);\n  CheckDevice(&v, n, 1, GPUAccess::kWrite);\n  CheckHost(&v, GPUAccess::kRead);\n  CheckHost(&v, GPUAccess::kNone);\n}\n\nTEST(SyclHostDeviceVector, Basic) {\n  size_t n = 1001;\n  DeviceOrd device = DeviceOrd::SyclDefault();\n  TestHostDeviceVector(n, device);\n}\n\nTEST(SyclHostDeviceVector, Copy) {\n  size_t n = 1001;\n  auto device = DeviceOrd::SyclDefault();\n\n  HostDeviceVector<int> v;\n  {\n    // a separate scope to ensure that v1 is gone before further checks\n    HostDeviceVector<int> v1;\n    InitHostDeviceVector(n, device, &v1);\n    v.Resize(v1.Size());\n    v.Copy(v1);\n  }\n  CheckDevice(&v, n, 0, GPUAccess::kRead);\n  PlusOne(&v);\n  CheckDevice(&v, n, 1, GPUAccess::kWrite);\n  CheckHost(&v, GPUAccess::kRead);\n  CheckHost(&v, GPUAccess::kNone);\n}\n\nTEST(SyclHostDeviceVector, Fill) {\n  size_t n = 1001;\n  auto device = DeviceOrd::SyclDefault();\n\n  int val = 42;\n  HostDeviceVector<int> v;\n  v.SetDevice(device);\n  v.Resize(n);\n\n  ASSERT_TRUE(v.DeviceCanWrite());\n  v.Fill(val);\n\n  ASSERT_FALSE(v.HostCanRead());\n  ASSERT_FALSE(v.HostCanWrite());\n  ASSERT_TRUE(v.DeviceCanRead());\n  ASSERT_TRUE(v.DeviceCanWrite());\n\n  std::vector<int> desired_data(n, val);\n  sycl::VerifyOnDeviceData(v.Device(), v.ConstDevicePointer(), desired_data.data(), n);\n}\n\nTEST(SyclHostDeviceVector, Extend) {\n  size_t n0 = 1001;\n  size_t n1 = 17;\n  auto device = DeviceOrd::SyclDefault();\n\n  int val = 42;\n  HostDeviceVector<int> v0;\n  v0.SetDevice(device);\n  v0.Resize(n0);\n  v0.Fill(val);\n\n  HostDeviceVector<int> v1;\n  v1.SetDevice(device);\n  v1.Resize(n1);\n  v1.Fill(val);\n\n  v0.Extend(v1);\n  {\n    std::vector<int> desired_data(n0+n1, val);\n    sycl::VerifyOnDeviceData(v0.Device(), v0.ConstDevicePointer(), desired_data.data(), n0+n1);\n  }\n  v1.Extend(v0);\n  {\n    std::vector<int> desired_data(n0+2*n1, val);\n    sycl::VerifyOnDeviceData(v1.Device(), v1.ConstDevicePointer(), desired_data.data(), n0+2*n1);\n  }\n}\n\nTEST(SyclHostDeviceVector, SetDevice) {\n  std::vector<int> h_vec (2345);\n  for (size_t i = 0; i < h_vec.size(); ++i) {\n    h_vec[i] = i;\n  }\n  HostDeviceVector<int> vec (h_vec);\n  auto device = DeviceOrd::SyclDefault();\n\n  vec.SetDevice(device);\n  ASSERT_EQ(vec.Size(), h_vec.size());\n  auto span = vec.DeviceSpan();  // sync to device\n\n  vec.SetDevice(DeviceOrd::CPU());  // pull back to cpu.\n  ASSERT_EQ(vec.Size(), h_vec.size());\n  ASSERT_EQ(vec.Device(), DeviceOrd::CPU());\n\n  auto h_vec_1 = vec.HostVector();\n  ASSERT_TRUE(std::equal(h_vec_1.cbegin(), h_vec_1.cend(), h_vec.cbegin()));\n}\n\nTEST(SyclHostDeviceVector, Span) {\n  HostDeviceVector<float> vec {1.0f, 2.0f, 3.0f, 4.0f};\n  vec.SetDevice(DeviceOrd::SyclDefault());\n  auto span = vec.DeviceSpan();\n  ASSERT_EQ(vec.Size(), span.size());\n  ASSERT_EQ(vec.DevicePointer(), span.data());\n  auto const_span = vec.ConstDeviceSpan();\n  ASSERT_EQ(vec.Size(), const_span.size());\n  ASSERT_EQ(vec.ConstDevicePointer(), const_span.data());\n\n  auto h_span = vec.ConstHostSpan();\n  ASSERT_TRUE(vec.HostCanRead());\n  ASSERT_FALSE(vec.HostCanWrite());\n  ASSERT_EQ(h_span.size(), vec.Size());\n  ASSERT_EQ(h_span.data(), vec.ConstHostPointer());\n\n  h_span = vec.HostSpan();\n  ASSERT_TRUE(vec.HostCanWrite());\n}\n\nTEST(SyclHostDeviceVector, Empty) {\n  HostDeviceVector<float> vec {1.0f, 2.0f, 3.0f, 4.0f};\n  HostDeviceVector<float> another { std::move(vec) };\n  ASSERT_FALSE(another.Empty());\n  ASSERT_TRUE(vec.Empty());\n}\n\nTEST(SyclHostDeviceVector, Resize) {\n  auto check = [&](HostDeviceVector<float> const& vec) {\n    auto const& h_vec = vec.ConstHostSpan();\n    for (std::size_t i = 0; i < 4; ++i) {\n      ASSERT_EQ(h_vec[i], i + 1);\n    }\n    for (std::size_t i = 4; i < vec.Size(); ++i) {\n      ASSERT_EQ(h_vec[i], 3.0);\n    }\n  };\n  {\n    HostDeviceVector<float> vec{1.0f, 2.0f, 3.0f, 4.0f};\n    vec.SetDevice(DeviceOrd::SyclDefault());\n    vec.ConstDeviceSpan();\n    ASSERT_TRUE(vec.DeviceCanRead());\n    ASSERT_FALSE(vec.DeviceCanWrite());\n    vec.DeviceSpan();\n    vec.Resize(7, 3.0f);\n    ASSERT_TRUE(vec.DeviceCanWrite());\n    check(vec);\n  }\n  {\n    HostDeviceVector<float> vec{{1.0f, 2.0f, 3.0f, 4.0f}, DeviceOrd::SyclDefault()};\n    ASSERT_TRUE(vec.DeviceCanWrite());\n    vec.Resize(7, 3.0f);\n    ASSERT_TRUE(vec.DeviceCanWrite());\n    check(vec);\n  }\n  {\n    HostDeviceVector<float> vec{1.0f, 2.0f, 3.0f, 4.0f};\n    ASSERT_TRUE(vec.HostCanWrite());\n    vec.Resize(7, 3.0f);\n    ASSERT_TRUE(vec.HostCanWrite());\n    check(vec);\n  }\n}\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/plugin/test_sycl_lambdarank_obj.cc",
    "content": "/**\n * Copyright 2024 by XGBoost Contributors\n */\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-W#pragma-messages\"\n#include \"../objective/test_lambdarank_obj.h\"\n#pragma GCC diagnostic pop\n\n#include <gtest/gtest.h>\n\n#include \"xgboost/context.h\"\n\nnamespace xgboost::obj {\nTEST(SyclObjective, LambdaRankNDCGJsonIO) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestNDCGJsonIO(&ctx);\n}\n\nTEST(SyclObjective, LambdaRankTestNDCGGPair) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestNDCGGPair(&ctx);\n}\n\nTEST(SyclObjective, LambdaRankUnbiasedNDCG) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestUnbiasedNDCG(&ctx);\n}\n\nTEST(SyclObjective, LambdaRankMAPStat) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestMAPStat(&ctx);\n}\n\nTEST(SyclObjective, LambdaRankMAPGPair) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestMAPGPair(&ctx);\n}\n\n}  // namespace xgboost::obj\n\n"
  },
  {
    "path": "tests/cpp/plugin/test_sycl_linalg.cc",
    "content": "/*!\n * Copyright 2017-2025 XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <algorithm>\n#include <random>\n\n#include \"../../src/common/linalg_op.h\"\n#include \"../../../src/common/optional_weight.h\"  // for MakeOptionalWeights\n#include \"sycl_helpers.h\"\n\nnamespace xgboost::sycl::linalg {\nTEST(SyclLinalg, SmallHistogram) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n\n  std::size_t cnt = 32, n_bins = 4;\n  DeviceManager device_manager;\n  auto qu = device_manager.GetQueue(ctx.Device());\n\n  HostDeviceVector<float> values(cnt * n_bins);\n  values.SetDevice(ctx.Device());\n  float* values_host_ptr = values.HostPointer();\n  for (std::size_t i = 0; i < n_bins; ++i) {\n    std::fill(values_host_ptr + i * cnt, values_host_ptr + (i  + 1) * cnt, i);\n  }\n\n  std::mt19937 rng;\n  rng.seed(2025);\n  std::shuffle(values_host_ptr, values_host_ptr + cnt * n_bins, rng);\n\n  float* values_device_ptr = values.DevicePointer();\n  xgboost::linalg::MatrixView<float> indices =\n      xgboost::linalg::MakeTensorView(&ctx, xgboost::common::Span(values_device_ptr, cnt * n_bins),\n                                      cnt * n_bins, 1);\n  HostDeviceVector<float> bins(n_bins, 0);\n  bins.SetDevice(ctx.Device());\n\n  HostDeviceVector<float> weights;\n  xgboost::linalg::SmallHistogram(&ctx, indices, xgboost::common::MakeOptionalWeights(ctx.Device(), weights),\n                 xgboost::linalg::MakeTensorView(&ctx, xgboost::common::Span(bins.DevicePointer(), n_bins), n_bins));\n\n  for (std::size_t i = 0; i < n_bins; ++i) {\n    ASSERT_EQ(bins.HostVector()[i], cnt);\n  }\n}\n}  // namespace xgboost::linalg"
  },
  {
    "path": "tests/cpp/plugin/test_sycl_multiclass_obj.cc",
    "content": "/*!\n * Copyright 2018-2023 XGBoost contributors\n */\n#include <gtest/gtest.h>\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wtautological-constant-compare\"\n#pragma GCC diagnostic ignored \"-W#pragma-messages\"\n#include <xgboost/context.h>\n#pragma GCC diagnostic pop\n\n#include \"../objective/test_multiclass_obj.h\"\n\nnamespace xgboost {\n\nTEST(SyclObjective, SoftmaxMultiClassObjGPair) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestSoftmaxMultiClassObjGPair(&ctx);\n}\n\nTEST(SyclObjective, SoftmaxMultiClassBasic) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestSoftmaxMultiClassBasic(&ctx);\n}\n\nTEST(SyclObjective, SoftprobMultiClassBasic) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestSoftprobMultiClassBasic(&ctx);\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/plugin/test_sycl_partition_builder.cc",
    "content": "/**\n * Copyright 2020-2024 by XGBoost contributors\n */\n#include <gtest/gtest.h>\n\n#include <string>\n#include <utility>\n#include <vector>\n\n#include \"../../../plugin/sycl/common/partition_builder.h\"\n#include \"../../../plugin/sycl/device_manager.h\"\n#include \"../helpers.h\"\n\nnamespace xgboost::sycl::common {\n\nvoid TestPartitioning(float sparsity, int max_bins) {\n  const size_t num_rows = 16;\n  const size_t num_columns = 1;\n\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n\n  DeviceManager device_manager;\n  auto qu = device_manager.GetQueue(ctx.Device());\n\n  auto p_fmat = RandomDataGenerator{num_rows, num_columns, sparsity}.GenerateDMatrix();\n  common::GHistIndexMatrix gmat;\n  gmat.Init(qu, &ctx, p_fmat.get(), max_bins);\n\n  RowSetCollection row_set_collection;\n  auto& row_indices = row_set_collection.Data();\n  row_indices.Resize(qu, num_rows);\n  size_t* p_row_indices = row_indices.Data();\n\n  qu->submit([&](::sycl::handler& cgh) {\n    cgh.parallel_for<>(::sycl::range<1>(num_rows),\n                       [p_row_indices](::sycl::item<1> pid) {\n      const size_t idx = pid.get_id(0);\n      p_row_indices[idx] = idx;\n    });\n  }).wait_and_throw();\n  row_set_collection.Init();\n\n  RegTree tree;\n  tree.ExpandNode(0, 0, 0, false, 0, 0, 0, 0, 0, 0, 0);\n\n  const size_t n_nodes = row_set_collection.Size();\n  PartitionBuilder partition_builder;\n  partition_builder.Init(qu, n_nodes, [&](size_t nid) {\n    return row_set_collection[nid].Size();\n  });\n\n  std::vector<tree::ExpandEntry> nodes;\n  nodes.emplace_back(tree::ExpandEntry(0, tree.GetDepth(0)));\n\n  ::sycl::event event;\n  std::vector<int32_t> split_conditions = {2};\n  partition_builder.Partition(gmat, nodes, row_set_collection,\n                    split_conditions, &tree, &event);\n  qu->wait_and_throw();\n\n  size_t* data_result = const_cast<size_t*>(row_set_collection[0].begin);\n  partition_builder.MergeToArray(0, data_result, &event);\n  qu->wait_and_throw();\n\n  bst_float split_pt = gmat.cut.Values()[split_conditions[0]];\n\n  std::vector<uint8_t> ridx_left(num_rows, 0);\n  std::vector<uint8_t> ridx_right(num_rows, 0);\n  for (auto &batch : p_fmat->GetBatches<SparsePage>()) {\n    const auto& data_vec = batch.data.HostVector();\n    const auto& offset_vec = batch.offset.HostVector();\n\n    size_t begin = offset_vec[0];\n    for (size_t idx = 0; idx < offset_vec.size() - 1; ++idx) {\n      size_t end = offset_vec[idx + 1];\n      if (begin < end) {\n        const auto& entry = data_vec[begin];\n        if (entry.fvalue < split_pt) {\n          ridx_left[idx] = 1;\n        } else {\n          ridx_right[idx] = 1;\n        }\n      } else {\n        // missing value\n        if (tree[0].DefaultLeft()) {\n          ridx_left[idx] = 1;\n        } else {\n          ridx_right[idx] = 1;\n        }\n      }\n      begin = end;\n    }\n  }\n  auto n_left  = std::accumulate(ridx_left.begin(),  ridx_left.end(),  0);\n  auto n_right = std::accumulate(ridx_right.begin(), ridx_right.end(), 0);\n\n  std::vector<size_t> row_indices_host(num_rows);\n  qu->memcpy(row_indices_host.data(), row_indices.Data(), num_rows * sizeof(size_t));\n  qu->wait_and_throw();\n\n  ASSERT_EQ(n_left,  partition_builder.GetNLeftElems(0));\n  for (size_t i = 0; i < n_left; ++i) {\n    auto idx = row_indices_host[i];\n    ASSERT_EQ(ridx_left[idx], 1);\n  }\n\n  ASSERT_EQ(n_right, partition_builder.GetNRightElems(0));\n  for (size_t i = 0; i < n_right; ++i) {\n    auto idx = row_indices_host[num_rows - 1 - i];\n    ASSERT_EQ(ridx_right[idx], 1);\n  }\n}\n\nTEST(SyclPartitionBuilder, BasicTest) {\n  constexpr size_t kNodes = 5;\n  // Number of rows for each node\n  std::vector<size_t> rows = { 5, 5, 10, 1, 2 };\n\n  DeviceManager device_manager;\n  auto qu = device_manager.GetQueue(DeviceOrd::SyclDefault());\n  PartitionBuilder builder;\n  builder.Init(qu, kNodes, [&](size_t i) {\n    return rows[i];\n  });\n\n  // We test here only the basics, thus syntetic partition builder is adopted\n  // Number of rows to go left for each node.\n  std::vector<size_t> rows_for_left_node = { 2, 0, 7, 1, 2 };\n\n  size_t first_row_id = 0;\n  for(size_t nid = 0; nid < kNodes; ++nid) {\n    size_t n_rows_nodes = rows[nid];\n\n    auto rid_buff = builder.GetData(nid);\n    size_t rid_buff_size = rid_buff.size();\n    auto* rid_buff_ptr = rid_buff.data();\n\n    size_t n_left  = rows_for_left_node[nid];\n    size_t n_right = rows[nid] - n_left;\n\n    qu->submit([&](::sycl::handler& cgh) {\n      cgh.parallel_for<>(::sycl::range<1>(n_left), [=](::sycl::id<1> pid) {\n        int row_id = first_row_id + pid[0];\n        rid_buff_ptr[pid[0]] = row_id;\n      });\n    });\n    qu->wait();\n    first_row_id += n_left;\n\n    // We are storing indexes for the right side in the tail of the array to save some memory\n    qu->submit([&](::sycl::handler& cgh) {\n      cgh.parallel_for<>(::sycl::range<1>(n_right), [=](::sycl::id<1> pid) {\n        int row_id = first_row_id + pid[0];\n        rid_buff_ptr[rid_buff_size - pid[0] - 1] = row_id;\n      });\n    });\n    qu->wait();\n    first_row_id += n_right;\n\n    builder.SetNLeftElems(nid, n_left);\n    builder.SetNRightElems(nid, n_right);\n  }\n\n  ::sycl::event event;\n  std::vector<size_t> v(*std::max_element(rows.begin(), rows.end()));\n  size_t row_id = 0;\n  for(size_t nid = 0; nid < kNodes; ++nid) {\n    builder.MergeToArray(nid, v.data(), &event);\n    qu->wait();\n\n    // Check that row_id for left side are correct\n    for(size_t j = 0; j < rows_for_left_node[nid]; ++j) {\n       ASSERT_EQ(v[j], row_id++);\n    }\n\n    // Check that row_id for right side are correct\n    for(size_t j = 0; j < rows[nid] - rows_for_left_node[nid]; ++j) {\n      ASSERT_EQ(v[rows[nid] - j - 1], row_id++);\n    }\n\n    // Check that number of left/right rows are correct\n    size_t n_left  = builder.GetNLeftElems(nid);\n    size_t n_right = builder.GetNRightElems(nid);\n    ASSERT_EQ(n_left, rows_for_left_node[nid]);\n    ASSERT_EQ(n_right, (rows[nid] - rows_for_left_node[nid]));\n  }\n}\n\nTEST(SyclPartitionBuilder, PartitioningSparce) {\n  TestPartitioning(0.3, 256);\n}\n\nTEST(SyclPartitionBuilder, PartitioningDence8Bits) {\n  TestPartitioning(0.0, 256);\n}\n\nTEST(SyclPartitionBuilder, PartitioningDence16Bits) {\n  TestPartitioning(0.0, 256 + 1);\n}\n\nTEST(SyclPartitionBuilder, PartitioningDence32Bits) {\n  TestPartitioning(0.0, (1u << 16) + 1);\n}\n\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/plugin/test_sycl_prediction_cache.cc",
    "content": "/**\n * Copyright 2020-2024 by XGBoost contributors\n */\n#include <gtest/gtest.h>\n\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wtautological-constant-compare\"\n#pragma GCC diagnostic ignored \"-W#pragma-messages\"\n#include \"../tree/test_prediction_cache.h\"\n#pragma GCC diagnostic pop\n\nnamespace xgboost::sycl::tree {\n\nclass SyclPredictionCache : public xgboost::TestPredictionCache {};\n\nTEST_F(SyclPredictionCache, Hist) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n\n  this->RunTest(&ctx, \"grow_quantile_histmaker_sycl\", \"one_output_per_tree\");\n}\n\n}  // namespace xgboost::sycl::tree\n"
  },
  {
    "path": "tests/cpp/plugin/test_sycl_predictor.cc",
    "content": "/*!\n * Copyright 2017-2025 XGBoost contributors\n */\n#include <gtest/gtest.h>\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wtautological-constant-compare\"\n#pragma GCC diagnostic ignored \"-W#pragma-messages\"\n#include <xgboost/predictor.h>\n#pragma GCC diagnostic pop\n\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wtautological-constant-compare\"\n#include \"../../../src/data/adapter.h\"\n#include \"../../../src/gbm/gbtree.h\"\n#pragma GCC diagnostic pop\n\n#include \"../../../src/data/proxy_dmatrix.h\"\n#include \"../../../src/gbm/gbtree_model.h\"\n#include \"../filesystem.h\"  // dmlc::TemporaryDirectory\n#include \"../helpers.h\"\n#include \"../predictor/test_predictor.h\"\n\nnamespace xgboost {\n\nTEST(SyclPredictor, Basic) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n\n  size_t constexpr kRows = 5;\n  size_t constexpr kCols = 5;\n  auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();\n  TestBasic(dmat.get(), &ctx);\n}\n\nTEST(SyclPredictor, ExternalMemory) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n\n  bst_idx_t constexpr kRows{64};\n  bst_feature_t constexpr kCols{12};\n  auto dmat =\n      RandomDataGenerator{kRows, kCols, 0.5f}.Batches(3).GenerateSparsePageDMatrix(\"temp\", true);\n  TestBasic(dmat.get(), &ctx);\n}\n\nTEST(SyclPredictor, InplacePredict) {\n  bst_idx_t constexpr kRows{128};\n  bst_feature_t constexpr kCols{64};\n  Context ctx;\n  auto gen = RandomDataGenerator{kRows, kCols, 0.5}.Device(ctx.Device());\n  {\n    HostDeviceVector<float> data;\n    gen.GenerateDense(&data);\n    ASSERT_EQ(data.Size(), kRows * kCols);\n    Context ctx;\n    ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n    std::shared_ptr<data::DMatrixProxy> x{new data::DMatrixProxy{}};\n    auto array_interface = GetArrayInterface(&data, kRows, kCols);\n    std::string arr_str;\n    Json::Dump(array_interface, &arr_str);\n    x->SetArray(arr_str.data());\n    TestInplacePrediction(&ctx, x, kRows, kCols);\n  }\n}\n\nTEST(SyclPredictor, IterationRange) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestIterationRange(&ctx);\n}\n\nTEST(SyclPredictor, GHistIndexTraining) {\n  size_t constexpr kRows{128}, kCols{16}, kBins{64};\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  auto p_hist = RandomDataGenerator{kRows, kCols, 0.0}.Bins(kBins).GenerateDMatrix(false);\n  HostDeviceVector<float> storage(kRows * kCols);\n  auto columnar = RandomDataGenerator{kRows, kCols, 0.0}.GenerateArrayInterface(&storage);\n  auto adapter = data::ArrayAdapter(columnar.c_str());\n  std::shared_ptr<DMatrix> p_full{\n      DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)};\n  TestTrainingPrediction(&ctx, kRows, kBins, p_full, p_hist);\n}\n\nTEST(SyclPredictor, CategoricalPredictLeaf) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestCategoricalPredictLeaf(&ctx, false);\n}\n\nTEST(SyclPredictor, LesserFeatures) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestPredictionWithLesserFeatures(&ctx);\n}\n\nTEST(SyclPredictor, Sparse) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestSparsePrediction(&ctx, 0.2);\n  TestSparsePrediction(&ctx, 0.8);\n}\n\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/plugin/test_sycl_quantile_hist_builder.cc",
    "content": "/**\n * Copyright 2020-2024 by XGBoost contributors\n */\n#include <gtest/gtest.h>\n\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wtautological-constant-compare\"\n#pragma GCC diagnostic ignored \"-W#pragma-messages\"\n#include <xgboost/json.h>\n#include <xgboost/task.h>\n#include \"../../../plugin/sycl/tree/updater_quantile_hist.h\"       // for QuantileHistMaker\n#pragma GCC diagnostic pop\n\nnamespace xgboost::sycl::tree {\nTEST(SyclQuantileHistMaker, Basic) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n\n  ObjInfo task{ObjInfo::kRegression};\n  std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(\"grow_quantile_histmaker_sycl\", &ctx, &task)};\n\n  ASSERT_EQ(updater->Name(), \"grow_quantile_histmaker_sycl\");\n}\n\nTEST(SyclQuantileHistMaker, JsonIO) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n\n  ObjInfo task{ObjInfo::kRegression};\n  Json config {Object()};\n  {\n    std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(\"grow_quantile_histmaker_sycl\", &ctx, &task)};\n    updater->Configure({{\"max_depth\", std::to_string(42)}});\n    updater->Configure({{\"single_precision_histogram\", std::to_string(true)}});\n    updater->SaveConfig(&config);\n  }\n\n  {\n    std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(\"grow_quantile_histmaker_sycl\", &ctx, &task)};\n    updater->LoadConfig(config);\n\n    Json new_config {Object()};\n    updater->SaveConfig(&new_config);\n\n    ASSERT_EQ(config, new_config);\n\n    auto max_depth = atoi(get<String const>(new_config[\"train_param\"][\"max_depth\"]).c_str());\n    ASSERT_EQ(max_depth, 42);\n\n    auto single_precision_histogram = atoi(get<String const>(new_config[\"sycl_hist_train_param\"][\"single_precision_histogram\"]).c_str());\n    ASSERT_EQ(single_precision_histogram, 1);\n  }\n  \n}\n}  // namespace xgboost::sycl::tree\n"
  },
  {
    "path": "tests/cpp/plugin/test_sycl_quantile_obj.cc",
    "content": "/**\n * Copyright 2024 by XGBoost contributors\n */\n#include <gtest/gtest.h>\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wtautological-constant-compare\"\n#pragma GCC diagnostic ignored \"-W#pragma-messages\"\n#include <xgboost/objective.h>\n#pragma GCC diagnostic pop\n#include <xgboost/context.h>\n\n#include \"../helpers.h\"\n#include \"../objective/test_quantile_obj.h\"\n\nnamespace xgboost {\nTEST(SyclObjective, DeclareUnifiedTest(Quantile)) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestQuantile(&ctx);\n}\n\nTEST(SyclObjective, DeclareUnifiedTest(QuantileIntercept)) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestQuantileIntercept(&ctx);\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/plugin/test_sycl_regression_obj.cc",
    "content": "/*!\n * Copyright 2017-2019 XGBoost contributors\n */\n#include <gtest/gtest.h>\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wtautological-constant-compare\"\n#pragma GCC diagnostic ignored \"-W#pragma-messages\"\n#include <xgboost/objective.h>\n#pragma GCC diagnostic pop\n#include <xgboost/context.h>\n\n#include \"../helpers.h\"\n#include \"../objective/test_regression_obj.h\"\n\nnamespace xgboost {\n\nTEST(SyclObjective, LinearRegressionGPair) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestLinearRegressionGPair(&ctx);\n}\n\nTEST(SyclObjective, SquaredLog) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestSquaredLog(&ctx);\n}\n\nTEST(SyclObjective, LogisticRegressionGPair) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestLogisticRegressionGPair(&ctx);\n}\n\nTEST(SyclObjective, LogisticRegressionBasic) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n\n  TestLogisticRegressionBasic(&ctx);\n}\n\nTEST(SyclObjective, LogisticRawGPair) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestsLogisticRawGPair(&ctx);\n}\n\nTEST(SyclObjective, PoissonRegressionGPair) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestPoissonRegressionGPair(&ctx);\n}\n\nTEST(SyclObjective, PoissonRegressionBasic) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestPoissonRegressionBasic(&ctx);\n}\n\nTEST(SyclObjective, GammaRegressionGPair) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestGammaRegressionGPair(&ctx);\n}\n\nTEST(SyclObjective, GammaRegressionBasic) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestGammaRegressionBasic(&ctx);\n}\n\nTEST(SyclObjective, TweedieRegressionGPair) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestTweedieRegressionGPair(&ctx);\n}\n\nTEST(SyclObjective, TweedieRegressionBasic) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestTweedieRegressionBasic(&ctx);\n}\n\nTEST(SyclObjective, CoxRegressionGPair) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestCoxRegressionGPair(&ctx);\n}\n\nTEST(SyclObjective, AbsoluteError) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestAbsoluteError(&ctx);\n}\n\nTEST(SyclObjective, AbsoluteErrorLeaf) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestAbsoluteErrorLeaf(&ctx);\n}\n\nTEST(SyclObjective, DeclareUnifiedTest(PseudoHuber)) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  TestPseudoHuber(&ctx);\n}\n\nTEST(SyclObjective, CPUvsSycl) {\n  Context ctx_sycl;\n  ctx_sycl.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n  ObjFunction * obj_sycl =\n      ObjFunction::Create(\"reg:squarederror\", &ctx_sycl);\n\n  Context ctx_cpu;\n  ctx_cpu.UpdateAllowUnknown(Args{{\"device\", \"cpu\"}});\n  ObjFunction * obj_cpu =\n      ObjFunction::Create(\"reg:squarederror\", &ctx_cpu);\n\n  linalg::Matrix<GradientPair> cpu_out_preds;\n  linalg::Matrix<GradientPair> sycl_out_preds;\n\n  constexpr size_t kRows = 400;\n  constexpr size_t kCols = 100;\n  auto pdmat = RandomDataGenerator(kRows, kCols, 0).Seed(0).GenerateDMatrix();\n  HostDeviceVector<float> preds;\n  preds.Resize(kRows);\n  auto& h_preds = preds.HostVector();\n  for (size_t i = 0; i < h_preds.size(); ++i) {\n    h_preds[i] = static_cast<float>(i);\n  }\n  auto& info = pdmat->Info();\n\n  info.labels.Reshape(kRows, 1);\n  auto& h_labels = info.labels.Data()->HostVector();\n  for (size_t i = 0; i < h_labels.size(); ++i) {\n    h_labels[i] = 1 / static_cast<float>(i+1);\n  }\n\n  {\n    // CPU\n    obj_cpu->GetGradient(preds, info, 0, &cpu_out_preds);\n  }\n  {\n    // sycl\n    obj_sycl->GetGradient(preds, info, 0, &sycl_out_preds);\n  }\n\n  auto h_cpu_out = cpu_out_preds.HostView();\n  auto h_sycl_out = sycl_out_preds.HostView();\n\n  float sgrad = 0;\n  float shess = 0;\n  for (size_t i = 0; i < kRows; ++i) {\n    sgrad += std::pow(h_cpu_out(i).GetGrad() - h_sycl_out(i).GetGrad(), 2);\n    shess += std::pow(h_cpu_out(i).GetHess() - h_sycl_out(i).GetHess(), 2);\n  }\n  ASSERT_NEAR(sgrad, 0.0f, kRtEps);\n  ASSERT_NEAR(shess, 0.0f, kRtEps);\n\n  delete obj_cpu;\n  delete obj_sycl;\n}\n\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/plugin/test_sycl_row_set_collection.cc",
    "content": "/**\n * Copyright 2020-2023 by XGBoost contributors\n */\n#include <gtest/gtest.h>\n\n#include <string>\n#include <utility>\n#include <vector>\n\n#include \"../../../plugin/sycl/common/row_set.h\"\n#include \"../../../plugin/sycl/device_manager.h\"\n#include \"../helpers.h\"\n\nnamespace xgboost::sycl::common {\nTEST(SyclRowSetCollection, AddSplits) {\n  const size_t num_rows = 16;\n\n  DeviceManager device_manager;\n  auto qu = device_manager.GetQueue(DeviceOrd::SyclDefault());\n\n  RowSetCollection row_set_collection;\n\n  auto& row_indices = row_set_collection.Data();\n  row_indices.Resize(qu, num_rows);\n  size_t* p_row_indices = row_indices.Data();\n\n  qu->submit([&](::sycl::handler& cgh) {\n    cgh.parallel_for<>(::sycl::range<1>(num_rows),\n                       [p_row_indices](::sycl::item<1> pid) {\n      const size_t idx = pid.get_id(0);\n      p_row_indices[idx] = idx;\n    });\n  }).wait_and_throw();\n  row_set_collection.Init();\n\n  CHECK_EQ(row_set_collection.Size(), 1);\n  {\n    size_t nid_test = 0;\n    auto& elem = row_set_collection[nid_test];\n    CHECK_EQ(elem.begin, row_indices.Begin());\n    CHECK_EQ(elem.end, row_indices.End());\n    CHECK_EQ(elem.node_id , 0);\n  }\n\n  size_t nid = 0;\n  size_t nid_left = 1;\n  size_t nid_right = 2;\n  size_t n_left = 4;\n  size_t n_right = num_rows - n_left;\n  row_set_collection.AddSplit(nid, nid_left, nid_right, n_left, n_right);\n  CHECK_EQ(row_set_collection.Size(), 3);\n\n  {\n    size_t nid_test = 0;\n    auto& elem = row_set_collection[nid_test];\n    CHECK_EQ(elem.begin, nullptr);\n    CHECK_EQ(elem.end, nullptr);\n    CHECK_EQ(elem.node_id , -1);\n  }\n\n  {\n    size_t nid_test = 1;\n    auto& elem = row_set_collection[nid_test];\n    CHECK_EQ(elem.begin, row_indices.Begin());\n    CHECK_EQ(elem.end, row_indices.Begin() + n_left);\n    CHECK_EQ(elem.node_id , nid_test);\n  }\n\n  {\n    size_t nid_test = 2;\n    auto& elem = row_set_collection[nid_test];\n    CHECK_EQ(elem.begin, row_indices.Begin() + n_left);\n    CHECK_EQ(elem.end, row_indices.End());\n    CHECK_EQ(elem.node_id , nid_test);\n  }\n\n}\n}  // namespace xgboost::sycl::common\n"
  },
  {
    "path": "tests/cpp/plugin/test_sycl_split_evaluator.cc",
    "content": "/**\n * Copyright 2020-2024 by XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <vector>\n\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wtautological-constant-compare\"\n#pragma GCC diagnostic ignored \"-W#pragma-messages\"\n#include \"../../../plugin/sycl/tree/split_evaluator.h\"\n#pragma GCC diagnostic pop\n\n#include \"../../../plugin/sycl/device_manager.h\"\n#include \"../helpers.h\"\n\nnamespace xgboost::sycl::tree {\n\ntemplate<typename GradientSumT>\nvoid BasicTestSplitEvaluator(const std::string& monotone_constraints, bool has_constrains) {\n  const size_t n_columns = 2;\n\n  xgboost::tree::TrainParam param;\n  param.UpdateAllowUnknown(Args{{\"min_child_weight\", \"0\"},\n                                {\"reg_lambda\", \"0\"},\n                                {\"monotone_constraints\", monotone_constraints}});\n\n  DeviceManager device_manager;\n  auto qu = device_manager.GetQueue(DeviceOrd::SyclDefault());\n\n  TreeEvaluator<GradientSumT> tree_evaluator(qu, param, n_columns);\n  {\n    // Check correctness of has_constrains flag\n    ASSERT_EQ(tree_evaluator.HasConstraint(), has_constrains);\n  }\n\n  auto split_evaluator = tree_evaluator.GetEvaluator();\n  {\n    // Check if params were inititialised correctly\n    ASSERT_EQ(split_evaluator.param.min_child_weight, param.min_child_weight);\n    ASSERT_EQ(split_evaluator.param.reg_lambda, param.reg_lambda);\n    ASSERT_EQ(split_evaluator.param.reg_alpha, param.reg_alpha);\n    ASSERT_EQ(split_evaluator.param.max_delta_step, param.max_delta_step);\n  }\n}\n\ntemplate<typename GradientSumT>\nvoid TestSplitEvaluator(const std::string& monotone_constraints) {\n  const size_t n_columns = 2;\n\n  xgboost::tree::TrainParam param;\n  param.UpdateAllowUnknown(Args{{\"min_child_weight\", \"0\"},\n                                {\"reg_lambda\", \"0\"},\n                                {\"monotone_constraints\", monotone_constraints}});\n\n  DeviceManager device_manager;\n  auto qu = device_manager.GetQueue(DeviceOrd::SyclDefault());\n\n  TreeEvaluator<GradientSumT> tree_evaluator(qu, param, n_columns);\n  auto split_evaluator = tree_evaluator.GetEvaluator();\n  {\n    // Test ThresholdL1\n    const GradientSumT alpha = 0.5;\n    {\n      const GradientSumT val = 0.0;\n      const auto trh = split_evaluator.ThresholdL1(val, alpha);\n      ASSERT_EQ(trh, 0.0);\n    }\n\n    {\n      const GradientSumT val = 1.0;\n      const auto trh = split_evaluator.ThresholdL1(val, alpha);\n      ASSERT_EQ(trh, val - alpha);\n    }\n\n    {\n      const GradientSumT val = -1.0;\n      const auto trh = split_evaluator.ThresholdL1(val, alpha);\n      ASSERT_EQ(trh, val + alpha);\n    }\n  }\n\n  {\n    constexpr float eps = 1e-8;\n    tree_evaluator.AddSplit(0, 1, 2, 0, 0.3, 0.7);\n\n    GradStats<GradientSumT> left(0.1, 0.2);\n    GradStats<GradientSumT> right(0.3, 0.4);\n    bst_node_t nidx = 0;\n    bst_feature_t fidx = 0;\n\n    GradientSumT wleft  = split_evaluator.CalcWeight(nidx, left);\n    // wleft = -grad/hess = -0.1/0.2\n    EXPECT_NEAR(wleft, -0.5, eps);\n    GradientSumT wright = split_evaluator.CalcWeight(nidx, right);\n    // wright = -grad/hess = -0.3/0.4\n    EXPECT_NEAR(wright, -0.75, eps);\n\n    GradientSumT gweight_left = split_evaluator.CalcGainGivenWeight(nidx, left, wleft);\n    // gweight_left = left.grad**2 / left.hess = 0.1*0.1/0.2 = 0.05\n    EXPECT_NEAR(gweight_left, 0.05, eps);\n    // gweight_left = right.grad**2 / right.hess = 0.3*0.3/0.4 = 0.225\n    GradientSumT gweight_right = split_evaluator.CalcGainGivenWeight(nidx, right, wright);\n    EXPECT_NEAR(gweight_right, 0.225, eps);\n\n    GradientSumT split_gain = split_evaluator.CalcSplitGain(nidx, fidx, left, right);\n    if (!tree_evaluator.HasConstraint()) {\n      EXPECT_NEAR(split_gain, gweight_left + gweight_right, eps);\n    } else {\n      // Parameters are chosen to have -inf here\n      ASSERT_EQ(split_gain, -std::numeric_limits<GradientSumT>::infinity());\n    }\n  }\n}\n\nTEST(SyclSplitEvaluator, BasicTest) {\n  BasicTestSplitEvaluator<float>(\"( 0,  0)\", false);\n  BasicTestSplitEvaluator<float>(\"( 1,  0)\", true);\n  BasicTestSplitEvaluator<float>(\"( 0,  1)\", true);\n  BasicTestSplitEvaluator<float>(\"(-1,  0)\", true);\n  BasicTestSplitEvaluator<float>(\"( 0, -1)\", true);\n  BasicTestSplitEvaluator<float>(\"( 1,  1)\", true);\n  BasicTestSplitEvaluator<float>(\"(-1, -1)\", true);\n  BasicTestSplitEvaluator<float>(\"( 1, -1)\", true);\n  BasicTestSplitEvaluator<float>(\"(-1,  1)\", true);\n}\n\nTEST(SyclSplitEvaluator, TestMath) {\n  // Without constraints\n  TestSplitEvaluator<float>(\"( 0,  0)\");\n  // With constraints\n  TestSplitEvaluator<float>(\"( 1,  0)\");\n}\n\n}  // namespace xgboost::sycl::tree\n"
  },
  {
    "path": "tests/cpp/plugin/test_sycl_transform_range.cc",
    "content": "/**\n * Copyright 2018-2024, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/base.h>\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-W#pragma-messages\"\n#include <xgboost/host_device_vector.h>\n#pragma GCC diagnostic pop\n#include <xgboost/span.h>\n\n#include <numeric>  // for iota\n#include <vector>\n\n#include \"../../../src/common/transform.h\"\n#include \"../helpers.h\"\n\nnamespace xgboost::common {\n\ntemplate <typename T>\nstruct TestTransformRange {\n  void operator()(std::size_t _idx, Span<float> _out, Span<const float> _in) {\n    _out[_idx] = _in[_idx];\n  }\n};\n\nTEST(SyclTransform, DeclareUnifiedTest(Basic)) {\n  const size_t size{256};\n  std::vector<float> h_in(size);\n  std::vector<float> h_out(size);\n  std::iota(h_in.begin(), h_in.end(), 0);\n  std::vector<float> h_sol(size);\n  std::iota(h_sol.begin(), h_sol.end(), 0);\n\n  auto device =  DeviceOrd::SyclDefault();\n  HostDeviceVector<float> const in_vec{h_in, device};\n  HostDeviceVector<float> out_vec{h_out, device};\n  out_vec.Fill(0);\n\n  Transform<>::Init(TestTransformRange<float>{},\n                    Range{0, static_cast<Range::DifferenceType>(size)}, 1,\n                    device)\n      .Eval(&out_vec, &in_vec);\n  std::vector<float> res = out_vec.HostVector();\n\n  ASSERT_TRUE(std::equal(h_sol.begin(), h_sol.end(), res.begin()));\n}\n}  // namespace xgboost::common\n"
  },
  {
    "path": "tests/cpp/predictor/test_cpu_predictor.cc",
    "content": "/**\n * Copyright 2017-2026, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/predictor.h>\n\n#include \"../../../src/collective/communicator-inl.h\"\n#include \"../../../src/data/adapter.h\"\n#include \"../../../src/data/proxy_dmatrix.h\"\n#include \"../../../src/gbm/gbtree.h\"\n#include \"../../../src/gbm/gbtree_model.h\"\n#include \"../../../src/predictor/array_tree_layout.h\"\n#include \"../../../src/tree/tree_view.h\"\n#include \"../collective/test_worker.h\"  // for TestDistributedGlobal\n#include \"../helpers.h\"\n#include \"test_predictor.h\"\n#include \"test_shap.h\"\n\nnamespace xgboost {\nTEST(CpuPredictor, Basic) {\n  Context ctx;\n  size_t constexpr kRows = 5;\n  size_t constexpr kCols = 5;\n  auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();\n  TestBasic(dmat.get(), &ctx);\n}\n\nTEST(CpuPredictor, BatchPredictionWithWeights) {\n  Context ctx;\n  TestBatchPredictionWithWeights(&ctx);\n}\n\nTEST(CpuPredictor, InplacePredictionWithWeights) {\n  Context ctx;\n  TestInplacePredictionWithWeights(&ctx);\n}\n\ntemplate <typename ArrayLayoutT>\nvoid CheckArrayLayout(const RegTree& tree, ArrayLayoutT buffer, int max_depth, int depth,\n                      size_t nid, size_t nid_array) {\n  const auto& split_idx = buffer.SplitIndex();\n  const auto& split_cond = buffer.SplitCond();\n  const auto& default_left = buffer.DefaultLeft();\n  const auto& nidx_in_tree = buffer.NidxInTree();\n  const auto& nodes = tree.GetNodes(DeviceOrd::CPU());\n\n  if (depth == max_depth) {\n    ASSERT_EQ(nidx_in_tree[nid_array - (1u << max_depth) + 1], nid);\n    return;\n  }\n\n  if (nodes[nid].IsLeaf()) {\n    ASSERT_EQ(default_left[nid_array], 0);\n    ASSERT_TRUE(std::isnan(split_cond[nid_array]));\n\n    CheckArrayLayout(tree, buffer, max_depth, depth + 1, nid, 2 * nid_array + 2);\n  } else {\n    ASSERT_EQ(nodes[nid].SplitIndex(), split_idx[nid_array]);\n    ASSERT_EQ(nodes[nid].SplitCond(), split_cond[nid_array]);\n    ASSERT_EQ(nodes[nid].DefaultLeft(), default_left[nid_array]);\n\n    if (nodes[nid].LeftChild() != RegTree::kInvalidNodeId) {\n      CheckArrayLayout(tree, buffer, max_depth, depth + 1, nodes[nid].LeftChild(),\n                       2 * nid_array + 1);\n    }\n    if (nodes[nid].RightChild() != RegTree::kInvalidNodeId) {\n      CheckArrayLayout(tree, buffer, max_depth, depth + 1, nodes[nid].RightChild(),\n                       2 * nid_array + 2);\n    }\n  }\n}\n\nnamespace {\ntemplate <bst_node_t kDepth>\nusing LayoutForTest = predictor::ArrayTreeLayout<false, true, kDepth, tree::ScalarTreeView>;\n}\n\nTEST(CpuPredictor, ArrayTreeLayout) {\n  Context ctx;\n\n  RegTree tree;\n  size_t n_nodes = 15;  // 2^4 - 1\n  for (size_t nid = 0; nid < n_nodes; ++nid) {\n    // Some place-holders\n    size_t split_index = nid + 1;\n    bst_float split_cond = nid + 2;\n    bool default_left = nid % 2 == 0;\n\n    tree.ExpandNode(nid, split_index, split_cond, default_left, 0, 0, 0, 0, 0, 0, 0);\n  }\n\n  auto sc_tree = tree::ScalarTreeView{ctx.Device(), false, &tree};\n  {\n    constexpr bst_node_t kDepth = 1;\n    LayoutForTest<kDepth> buffer(sc_tree, sc_tree.GetCategoriesMatrix());\n    CheckArrayLayout(tree, buffer, kDepth, 0, 0, 0);\n  }\n  {\n    constexpr bst_node_t kDepth = 2;\n    LayoutForTest<kDepth> buffer{sc_tree, sc_tree.GetCategoriesMatrix()};\n    CheckArrayLayout(tree, buffer, kDepth, 0, 0, 0);\n  }\n  {\n    constexpr bst_node_t kDepth = 3;\n    LayoutForTest<kDepth> buffer{sc_tree, sc_tree.GetCategoriesMatrix()};\n    CheckArrayLayout(tree, buffer, kDepth, 0, 0, 0);\n  }\n  {\n    constexpr bst_node_t kDepth = 4;\n    LayoutForTest<kDepth> buffer{sc_tree, sc_tree.GetCategoriesMatrix()};\n    CheckArrayLayout(tree, buffer, kDepth, 0, 0, 0);\n  }\n  {\n    constexpr bst_node_t kDepth = 5;\n    LayoutForTest<kDepth> buffer{sc_tree, sc_tree.GetCategoriesMatrix()};\n    CheckArrayLayout(tree, buffer, kDepth, 0, 0, 0);\n  }\n}\n\nnamespace {\nvoid TestColumnSplit() {\n  Context ctx;\n  size_t constexpr kRows = 5;\n  size_t constexpr kCols = 5;\n  auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();\n\n  auto const world_size = collective::GetWorldSize();\n  auto const rank = collective::GetRank();\n  dmat = std::unique_ptr<DMatrix>{dmat->SliceCol(world_size, rank)};\n\n  TestBasic(dmat.get(), &ctx);\n}\n}  // anonymous namespace\n\nTEST(CpuPredictor, BasicColumnSplit) {\n  auto constexpr kWorldSize = 2;\n  collective::TestDistributedGlobal(kWorldSize, TestColumnSplit);\n}\n\nTEST(CpuPredictor, IterationRange) {\n  Context ctx;\n  TestIterationRange(&ctx);\n}\n\nTEST(CpuPredictor, IterationRangeColmnSplit) {\n  auto constexpr kWorldSize = 2;\n  TestIterationRangeColumnSplit(kWorldSize, false);\n}\n\nTEST(CpuPredictor, ExternalMemory) {\n  Context ctx;\n  bst_idx_t constexpr kRows{64};\n  bst_feature_t constexpr kCols{12};\n  auto dmat =\n      RandomDataGenerator{kRows, kCols, 0.5f}.Batches(3).GenerateSparsePageDMatrix(\"temp\", true);\n  TestBasic(dmat.get(), &ctx);\n}\n\nTEST(CpuPredictor, InplacePredict) {\n  bst_idx_t constexpr kRows{128};\n  bst_feature_t constexpr kCols{64};\n  Context ctx;\n  auto gen = RandomDataGenerator{kRows, kCols, 0.5}.Device(ctx.Device());\n  {\n    HostDeviceVector<float> data;\n    gen.GenerateDense(&data);\n    ASSERT_EQ(data.Size(), kRows * kCols);\n    std::shared_ptr<data::DMatrixProxy> x{new data::DMatrixProxy{}};\n    auto array_interface = GetArrayInterface(&data, kRows, kCols);\n    std::string arr_str;\n    Json::Dump(array_interface, &arr_str);\n    x->SetArray(arr_str.data());\n    TestInplacePrediction(&ctx, x, kRows, kCols);\n  }\n\n  {\n    HostDeviceVector<float> data;\n    HostDeviceVector<std::size_t> rptrs;\n    HostDeviceVector<bst_feature_t> columns;\n    gen.GenerateCSR(&data, &rptrs, &columns);\n    auto data_interface = GetArrayInterface(&data, kRows * kCols, 1);\n    auto rptr_interface = GetArrayInterface(&rptrs, kRows + 1, 1);\n    auto col_interface = GetArrayInterface(&columns, kRows * kCols, 1);\n    std::string data_str, rptr_str, col_str;\n    Json::Dump(data_interface, &data_str);\n    Json::Dump(rptr_interface, &rptr_str);\n    Json::Dump(col_interface, &col_str);\n    std::shared_ptr<data::DMatrixProxy> x{new data::DMatrixProxy};\n    x->SetCsr(rptr_str.data(), col_str.data(), data_str.data(), kCols, true);\n    TestInplacePrediction(&ctx, x, kRows, kCols);\n  }\n}\n\nnamespace {\nvoid TestUpdatePredictionCache(bool use_subsampling) {\n  std::size_t constexpr kRows = 64, kCols = 16, kClasses = 4;\n  LearnerModelParam mparam{MakeMP(kCols, .0, kClasses)};\n  Context ctx;\n\n  std::unique_ptr<gbm::GBTree> gbm;\n  gbm.reset(static_cast<gbm::GBTree*>(GradientBooster::Create(\"gbtree\", &ctx, &mparam)));\n  Args args{{\"tree_method\", \"hist\"}};\n  if (use_subsampling) {\n    args.emplace_back(\"subsample\", \"0.5\");\n  }\n  gbm->Configure(args);\n\n  auto dmat = RandomDataGenerator(kRows, kCols, 0).Classes(kClasses).GenerateDMatrix(true);\n\n  GradientContainer gpair;\n  gpair.gpair = linalg::Matrix<GradientPair>({kRows, kClasses}, ctx.Device());\n  auto h_gpair = gpair.gpair.HostView();\n  for (size_t i = 0; i < kRows * kClasses; ++i) {\n    std::apply(h_gpair, linalg::UnravelIndex(i, kRows, kClasses)) = {static_cast<float>(i), 1};\n  }\n\n  PredictionCacheEntry predtion_cache;\n  predtion_cache.predictions.Resize(kRows * kClasses, 0);\n  // after one training iteration predtion_cache is filled with cached in QuantileHistMaker\n  // prediction values\n  gbm->DoBoost(dmat.get(), &gpair, &predtion_cache, nullptr);\n\n  PredictionCacheEntry out_predictions;\n  // perform prediction from scratch on the same input data, should be equal to cached result\n  gbm->PredictBatch(dmat.get(), &out_predictions, false, 0, 0);\n\n  std::vector<float>& out_predictions_h = out_predictions.predictions.HostVector();\n  std::vector<float>& predtion_cache_from_train = predtion_cache.predictions.HostVector();\n  for (size_t i = 0; i < out_predictions_h.size(); ++i) {\n    ASSERT_NEAR(out_predictions_h[i], predtion_cache_from_train[i], kRtEps);\n  }\n}\n}  // namespace\n\nTEST(CPUPredictor, GHistIndexTraining) {\n  size_t constexpr kRows{128}, kCols{16}, kBins{64};\n  Context ctx;\n  auto p_hist = RandomDataGenerator{kRows, kCols, 0.0}.Bins(kBins).GenerateQuantileDMatrix(false);\n  HostDeviceVector<float> storage(kRows * kCols);\n  auto columnar = RandomDataGenerator{kRows, kCols, 0.0}.GenerateArrayInterface(&storage);\n  auto adapter = data::ArrayAdapter(columnar.c_str());\n  std::shared_ptr<DMatrix> p_full{\n      DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)};\n  TestTrainingPrediction(&ctx, kRows, kBins, p_full, p_hist);\n}\n\nTEST(CPUPredictor, CategoricalPrediction) { TestCategoricalPrediction(false, false); }\n\nTEST(CPUPredictor, CategoricalPredictionColumnSplit) {\n  auto constexpr kWorldSize = 2;\n  collective::TestDistributedGlobal(kWorldSize, [] { TestCategoricalPrediction(false, true); });\n}\n\nTEST(CPUPredictor, CategoricalPredictLeaf) {\n  Context ctx;\n  TestCategoricalPredictLeaf(&ctx, false);\n}\n\nTEST(CPUPredictor, CategoricalPredictLeafColumnSplit) {\n  auto constexpr kWorldSize = 2;\n  Context ctx;\n  collective::TestDistributedGlobal(kWorldSize, [&] { TestCategoricalPredictLeaf(&ctx, true); });\n}\n\nTEST(CpuPredictor, UpdatePredictionCache) {\n  TestUpdatePredictionCache(false);\n  TestUpdatePredictionCache(true);\n}\n\nTEST(CpuPredictor, LesserFeatures) {\n  Context ctx;\n  TestPredictionWithLesserFeatures(&ctx);\n}\n\nTEST(CpuPredictor, LesserFeaturesColumnSplit) {\n  auto constexpr kWorldSize = 2;\n  collective::TestDistributedGlobal(kWorldSize,\n                                    [] { TestPredictionWithLesserFeaturesColumnSplit(false); });\n}\n\nTEST(CpuPredictor, Sparse) {\n  Context ctx;\n  TestSparsePrediction(&ctx, 0.2);\n  TestSparsePrediction(&ctx, 0.8);\n}\n\nTEST(CpuPredictor, SparseColumnSplit) {\n  auto constexpr kWorldSize = 2;\n  TestSparsePredictionColumnSplit(kWorldSize, false, 0.2);\n  TestSparsePredictionColumnSplit(kWorldSize, false, 0.8);\n}\n\nTEST(CpuPredictor, Multi) {\n  Context ctx;\n  TestVectorLeafPrediction(&ctx);\n}\n\nTEST(CpuPredictor, Access) { TestPredictionDeviceAccess(); }\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/predictor/test_gpu_predictor.cu",
    "content": "/**\n * Copyright 2017-2025, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/c_api.h>\n#include <xgboost/learner.h>\n#include <xgboost/logging.h>\n#include <xgboost/predictor.h>\n\n#include <string>\n\n#include \"../../../src/data/device_adapter.cuh\"\n#include \"../../../src/data/proxy_dmatrix.h\"\n#include \"../../../src/gbm/gbtree_model.h\"\n#include \"../collective/test_worker.h\"  // for TestDistributedGlobal, BaseMGPUTest\n#include \"../helpers.h\"\n#include \"test_predictor.h\"\n#include \"test_shap.h\"\n\nnamespace xgboost::predictor {\nTEST(GPUPredictor, Basic) {\n  auto cpu_lparam = MakeCUDACtx(-1);\n  auto gpu_lparam = MakeCUDACtx(0);\n\n  std::unique_ptr<Predictor> gpu_predictor =\n      std::unique_ptr<Predictor>(Predictor::Create(\"gpu_predictor\", &gpu_lparam));\n  std::unique_ptr<Predictor> cpu_predictor =\n      std::unique_ptr<Predictor>(Predictor::Create(\"cpu_predictor\", &cpu_lparam));\n\n  gpu_predictor->Configure({});\n  cpu_predictor->Configure({});\n\n  for (size_t i = 1; i < 33; i *= 2) {\n    int n_row = i, n_col = i;\n    auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();\n\n    auto ctx = MakeCUDACtx(0);\n    LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Device())};\n    std::unique_ptr<gbm::GBTreeModel> p_model = CreateTestModel(&mparam, &ctx);\n    auto const& model = *p_model;\n\n    // Test predict batch\n    PredictionCacheEntry gpu_out_predictions;\n    PredictionCacheEntry cpu_out_predictions;\n\n    gpu_predictor->InitOutPredictions(dmat->Info(), &gpu_out_predictions.predictions, model);\n    gpu_predictor->PredictBatch(dmat.get(), &gpu_out_predictions, model, 0);\n    cpu_predictor->InitOutPredictions(dmat->Info(), &cpu_out_predictions.predictions, model);\n    cpu_predictor->PredictBatch(dmat.get(), &cpu_out_predictions, model, 0);\n\n    std::vector<float>& gpu_out_predictions_h = gpu_out_predictions.predictions.HostVector();\n    std::vector<float>& cpu_out_predictions_h = cpu_out_predictions.predictions.HostVector();\n    float abs_tolerance = 0.001;\n    for (size_t j = 0; j < gpu_out_predictions.predictions.Size(); j++) {\n      ASSERT_NEAR(gpu_out_predictions_h[j], cpu_out_predictions_h[j], abs_tolerance);\n    }\n  }\n}\n\nTEST(GPUPredictor, BatchPredictionWithWeights) {\n  auto ctx = MakeCUDACtx(0);\n  TestBatchPredictionWithWeights(&ctx);\n}\n\nTEST(GPUPredictor, InplacePredictionWithWeights) {\n  auto ctx = MakeCUDACtx(0);\n  TestInplacePredictionWithWeights(&ctx);\n}\n\nnamespace {\nvoid VerifyBasicColumnSplit(std::array<std::vector<float>, 32> const& expected_result) {\n  auto const world_size = collective::GetWorldSize();\n  auto const rank = collective::GetRank();\n\n  auto ctx = MakeCUDACtx(GPUIDX);\n  std::unique_ptr<Predictor> predictor =\n      std::unique_ptr<Predictor>(Predictor::Create(\"gpu_predictor\", &ctx));\n  predictor->Configure({});\n\n  for (size_t i = 1; i < 33; i *= 2) {\n    size_t n_row = i, n_col = i;\n    auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();\n    std::unique_ptr<DMatrix> sliced{dmat->SliceCol(world_size, rank)};\n\n    LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Device())};\n    std::unique_ptr<gbm::GBTreeModel> p_model = CreateTestModel(&mparam, &ctx);\n    auto const& model = *p_model;\n\n    // Test predict batch\n    PredictionCacheEntry out_predictions;\n\n    predictor->InitOutPredictions(sliced->Info(), &out_predictions.predictions, model);\n    predictor->PredictBatch(sliced.get(), &out_predictions, model, 0);\n\n    std::vector<float>& out_predictions_h = out_predictions.predictions.HostVector();\n    EXPECT_EQ(out_predictions_h, expected_result[i - 1]);\n  }\n}\n}  // anonymous namespace\n\nclass MGPUPredictorTest : public collective::BaseMGPUTest {};\n\nTEST_F(MGPUPredictorTest, BasicColumnSplit) {\n  auto ctx = MakeCUDACtx(0);\n  std::unique_ptr<Predictor> predictor =\n      std::unique_ptr<Predictor>(Predictor::Create(\"gpu_predictor\", &ctx));\n  predictor->Configure({});\n\n  std::array<std::vector<float>, 32> result{};\n  for (size_t i = 1; i < 33; i *= 2) {\n    size_t n_row = i, n_col = i;\n    auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();\n\n    LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Device())};\n    std::unique_ptr<gbm::GBTreeModel> p_model = CreateTestModel(&mparam, &ctx);\n    auto const& model = *p_model;\n\n    // Test predict batch\n    PredictionCacheEntry out_predictions;\n\n    predictor->InitOutPredictions(dmat->Info(), &out_predictions.predictions, model);\n    predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);\n\n    std::vector<float>& out_predictions_h = out_predictions.predictions.HostVector();\n    result[i - 1] = out_predictions_h;\n  }\n\n  this->DoTest([&] { VerifyBasicColumnSplit(result); }, true);\n  this->DoTest([&] { VerifyBasicColumnSplit(result); }, false);\n}\n\nTEST(GPUPredictor, EllpackBasic) {\n  size_t constexpr kCols{8};\n  auto ctx = MakeCUDACtx(0);\n  for (size_t bins = 2; bins < 258; bins += 16) {\n    size_t rows = bins * 16;\n    auto p_m = RandomDataGenerator{rows, kCols, 0.0}\n                   .Bins(bins)\n                   .Device(ctx.Device())\n                   .GenerateQuantileDMatrix(false);\n    ASSERT_FALSE(p_m->PageExists<SparsePage>());\n    TestPredictionFromGradientIndex<EllpackPage>(&ctx, rows, kCols, p_m);\n    TestPredictionFromGradientIndex<EllpackPage>(&ctx, bins, kCols, p_m);\n  }\n}\n\nTEST(GPUPredictor, EllpackTraining) {\n  auto ctx = MakeCUDACtx(0);\n  size_t constexpr kRows{128}, kCols{16}, kBins{64};\n  auto p_ellpack = RandomDataGenerator{kRows, kCols, 0.0}\n                       .Bins(kBins)\n                       .Device(ctx.Device())\n                       .GenerateQuantileDMatrix(false);\n  HostDeviceVector<float> storage(kRows * kCols);\n  auto columnar =\n      RandomDataGenerator{kRows, kCols, 0.0}.Device(ctx.Device()).GenerateArrayInterface(&storage);\n  auto adapter = data::CupyAdapter(columnar);\n  std::shared_ptr<DMatrix> p_full{\n      DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)};\n  TestTrainingPrediction(&ctx, kRows, kBins, p_full, p_ellpack);\n}\n\nnamespace {\ntemplate <typename Create>\nvoid TestDecisionStumpExternalMemory(Context const* ctx, bst_feature_t n_features,\n                                     Create create_fn) {\n  std::int32_t n_classes = 3;\n  LearnerModelParam mparam{MakeMP(n_features, .5, n_classes, ctx->Device())};\n  std::unique_ptr<gbm::GBTreeModel> p_model = CreateTestModel(&mparam, ctx, n_classes);\n  auto const& model = *p_model;\n  std::unique_ptr<Predictor> gpu_predictor =\n      std::unique_ptr<Predictor>(Predictor::Create(\"gpu_predictor\", ctx));\n  gpu_predictor->Configure({});\n\n  for (auto p_fmat : {create_fn(400), create_fn(800), create_fn(2048)}) {\n    p_fmat->Info().base_margin_ = linalg::Constant(ctx, 0.5f, p_fmat->Info().num_row_, n_classes);\n    PredictionCacheEntry out_predictions;\n    gpu_predictor->InitOutPredictions(p_fmat->Info(), &out_predictions.predictions, model);\n    gpu_predictor->PredictBatch(p_fmat.get(), &out_predictions, model, 0);\n    ASSERT_EQ(out_predictions.predictions.Size(), p_fmat->Info().num_row_ * n_classes);\n    auto const& h_predt = out_predictions.predictions.ConstHostVector();\n    for (size_t i = 0; i < h_predt.size() / n_classes; i++) {\n      ASSERT_EQ(h_predt[i * n_classes], 2.0);\n      ASSERT_EQ(h_predt[i * n_classes + 1], 0.5);\n      ASSERT_EQ(h_predt[i * n_classes + 2], 0.5);\n    }\n  }\n}\n}  // namespace\n\nTEST(GPUPredictor, ExternalMemory) {\n  auto ctx = MakeCUDACtx(0);\n\n  bst_bin_t max_bin = 128;\n  bst_feature_t n_features = 32;\n\n  TestDecisionStumpExternalMemory(&ctx, n_features, [&](bst_idx_t n_samples) {\n    return RandomDataGenerator{n_samples, n_features, 0.0f}\n        .Batches(4)\n        .Device(ctx.Device())\n        .Bins(max_bin)\n        .GenerateSparsePageDMatrix(\"temp\", false);\n  });\n  TestDecisionStumpExternalMemory(&ctx, n_features, [&](bst_idx_t n_samples) {\n    return RandomDataGenerator{n_samples, n_features, 0.0f}\n        .Batches(4)\n        .Device(ctx.Device())\n        .Bins(max_bin)\n        .GenerateExtMemQuantileDMatrix(\"temp\", false);\n  });\n}\n\nTEST(GPUPredictor, InplacePredictCupy) {\n  auto ctx = MakeCUDACtx(0);\n  size_t constexpr kRows{128}, kCols{64};\n  RandomDataGenerator gen(kRows, kCols, 0.5);\n  gen.Device(ctx.Device());\n  HostDeviceVector<float> data;\n  std::string interface_str = gen.GenerateArrayInterface(&data);\n  std::shared_ptr<DMatrix> p_fmat{new data::DMatrixProxy};\n  dynamic_cast<data::DMatrixProxy*>(p_fmat.get())->SetCudaArray(interface_str.c_str());\n  TestInplacePrediction(&ctx, p_fmat, kRows, kCols);\n}\n\nTEST(GPUPredictor, InplacePredictCuDF) {\n  auto ctx = MakeCUDACtx(0);\n  size_t constexpr kRows{128}, kCols{64};\n  RandomDataGenerator gen(kRows, kCols, 0.5);\n  gen.Device(ctx.Device());\n  std::vector<HostDeviceVector<float>> storage(kCols);\n  auto interface_str = gen.GenerateColumnarArrayInterface(&storage);\n  std::shared_ptr<DMatrix> p_fmat{new data::DMatrixProxy};\n  dynamic_cast<data::DMatrixProxy*>(p_fmat.get())->SetCudaColumnar(interface_str.c_str());\n  TestInplacePrediction(&ctx, p_fmat, kRows, kCols);\n}\n\nTEST(GpuPredictor, LesserFeatures) {\n  auto ctx = MakeCUDACtx(0);\n  TestPredictionWithLesserFeatures(&ctx);\n}\n\nTEST_F(MGPUPredictorTest, LesserFeaturesColumnSplit) {\n  this->DoTest([] { TestPredictionWithLesserFeaturesColumnSplit(true); }, true);\n  this->DoTest([] { TestPredictionWithLesserFeaturesColumnSplit(true); }, false);\n}\n\nTEST(GPUPredictor, IterationRange) {\n  auto ctx = MakeCUDACtx(0);\n  TestIterationRange(&ctx);\n}\n\nTEST_F(MGPUPredictorTest, IterationRangeColumnSplit) {\n  TestIterationRangeColumnSplit(curt::AllVisibleGPUs(), true);\n}\n\nTEST(GPUPredictor, CategoricalPrediction) { TestCategoricalPrediction(true, false); }\n\nTEST_F(MGPUPredictorTest, CategoricalPredictionColumnSplit) {\n  this->DoTest([] { TestCategoricalPrediction(true, true); }, true);\n  this->DoTest([] { TestCategoricalPrediction(true, true); }, false);\n}\n\nTEST(GPUPredictor, CategoricalPredictLeaf) {\n  auto ctx = MakeCUDACtx(curt::AllVisibleGPUs() == 1 ? 0 : collective::GetRank());\n  TestCategoricalPredictLeaf(&ctx, false);\n}\n\nTEST_F(MGPUPredictorTest, CategoricalPredictionLeafColumnSplit) {\n  this->DoTest(\n      [&] {\n        auto ctx = MakeCUDACtx(collective::GetRank());\n        TestCategoricalPredictLeaf(&ctx, true);\n      },\n      true);\n  this->DoTest(\n      [&] {\n        auto ctx = MakeCUDACtx(collective::GetRank());\n        TestCategoricalPredictLeaf(&ctx, true);\n      },\n      false);\n}\n\nTEST(GPUPredictor, PredictLeafBasic) {\n  size_t constexpr kRows = 5, kCols = 5;\n  auto dmat = RandomDataGenerator(kRows, kCols, 0).Device(DeviceOrd::CUDA(0)).GenerateDMatrix();\n  auto lparam = MakeCUDACtx(GPUIDX);\n  std::unique_ptr<Predictor> gpu_predictor =\n      std::unique_ptr<Predictor>(Predictor::Create(\"gpu_predictor\", &lparam));\n  gpu_predictor->Configure({});\n\n  LearnerModelParam mparam{MakeMP(kCols, .0, 1)};\n  Context ctx;\n  std::unique_ptr<gbm::GBTreeModel> p_model = CreateTestModel(&mparam, &ctx);\n  auto const& model = *p_model;\n\n  HostDeviceVector<float> leaf_out_predictions;\n  gpu_predictor->PredictLeaf(dmat.get(), &leaf_out_predictions, model);\n  auto const& h_leaf_out_predictions = leaf_out_predictions.ConstHostVector();\n  for (auto v : h_leaf_out_predictions) {\n    ASSERT_EQ(v, 0);\n  }\n}\n\nTEST(GPUPredictor, Multi) {\n  auto ctx = MakeCUDACtx(0);\n  TestVectorLeafPrediction(&ctx);\n}\n\nTEST(GPUPredictor, Sparse) {\n  auto ctx = MakeCUDACtx(0);\n  TestSparsePrediction(&ctx, 0.2);\n  TestSparsePrediction(&ctx, 0.8);\n}\n\nTEST_F(MGPUPredictorTest, SparseColumnSplit) {\n  TestSparsePredictionColumnSplit(curt::AllVisibleGPUs(), true, 0.2);\n  TestSparsePredictionColumnSplit(curt::AllVisibleGPUs(), true, 0.8);\n}\n}  // namespace xgboost::predictor\n"
  },
  {
    "path": "tests/cpp/predictor/test_predictor.cc",
    "content": "/**\n * Copyright 2020-2026, XGBoost Contributors\n */\n#include \"test_predictor.h\"\n\n#include <gtest/gtest.h>\n#include <xgboost/context.h>             // for Context\n#include <xgboost/data.h>                // for DMatrix, BatchIterator, BatchSet, MetaInfo\n#include <xgboost/host_device_vector.h>  // for HostDeviceVector\n#include <xgboost/json.h>                // for Json\n#include <xgboost/predictor.h>           // for PredictionCacheEntry, Predictor, Predic...\n#include <xgboost/string_view.h>         // for StringView\n\n#include <limits>         // for numeric_limits\n#include <memory>         // for shared_ptr\n#include <unordered_map>  // for unordered_map\n#include <utility>        // for move\n#include <vector>         // for vector\n\n#include \"../../../src/common/bitfield.h\"         // for LBitField32\n#include \"../../../src/data/iterative_dmatrix.h\"  // for IterativeDMatrix\n#include \"../../../src/data/proxy_dmatrix.h\"      // for DMatrixProxy\n#include \"../../../src/tree/tree_view.h\"          // for MultiTargetTreeView\n#include \"../collective/test_worker.h\"            // for TestDistributedGlobal\n#include \"../helpers.h\"                           // for GetDMatrixFromData, RandomDataGenerator\n#include \"xgboost/json.h\"                         // for Json, Object, get, String\n#include \"xgboost/linalg.h\"                       // for MakeVec, Tensor, TensorView, Vector\n#include \"xgboost/logging.h\"                      // for CHECK\n#include \"xgboost/span.h\"                         // for operator!=, SpanIterator, Span\n#include \"xgboost/tree_model.h\"                   // for RegTree\n\nnamespace xgboost {\nvoid TestBasic(DMatrix *dmat, Context const *ctx) {\n  auto predictor = std::unique_ptr<Predictor>(CreatePredictorForTest(ctx));\n\n  size_t const kCols = dmat->Info().num_col_;\n\n  LearnerModelParam mparam{MakeMP(kCols, .0, 1, ctx->Device())};\n\n  std::unique_ptr<gbm::GBTreeModel> p_model = CreateTestModel(&mparam, ctx);\n  auto const &model = *p_model;\n\n  // Test predict batch\n  PredictionCacheEntry out_predictions;\n  predictor->InitOutPredictions(dmat->Info(), &out_predictions.predictions, model);\n  predictor->PredictBatch(dmat, &out_predictions, model, 0);\n\n  std::vector<float> &out_predictions_h = out_predictions.predictions.HostVector();\n  for (size_t i = 0; i < out_predictions.predictions.Size(); i++) {\n    ASSERT_EQ(out_predictions_h[i], 1.5);\n  }\n\n  // Test predict leaf\n  HostDeviceVector<float> leaf_out_predictions;\n  predictor->PredictLeaf(dmat, &leaf_out_predictions, model);\n  auto const &h_leaf_out_predictions = leaf_out_predictions.ConstHostVector();\n  for (auto v : h_leaf_out_predictions) {\n    ASSERT_EQ(v, 0);\n  }\n}\n\nvoid TestBatchPredictionWithWeights(Context const *ctx) {\n  size_t constexpr kRows = 5, kCols = 5;\n  auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();\n  auto predictor = std::unique_ptr<Predictor>(CreatePredictorForTest(ctx));\n\n  LearnerModelParam mparam{MakeMP(kCols, .0, 1, ctx->Device())};\n  auto model = std::make_unique<gbm::GBTreeModel>(&mparam, ctx);\n  {\n    std::vector<std::unique_ptr<RegTree>> trees;\n    trees.push_back(std::make_unique<RegTree>());\n    (*trees.back())[0].SetLeaf(1.5f);\n    (*trees.back()).Stat(0).sum_hess = 1.0f;\n    model->CommitModelGroup(std::move(trees), 0);\n  }\n  {\n    std::vector<std::unique_ptr<RegTree>> trees;\n    trees.push_back(std::make_unique<RegTree>());\n    (*trees.back())[0].SetLeaf(2.0f);\n    (*trees.back()).Stat(0).sum_hess = 1.0f;\n    model->CommitModelGroup(std::move(trees), 0);\n  }\n  std::vector<float> tree_weights{0.5f, 2.0f};\n\n  PredictionCacheEntry weighted_predictions;\n  predictor->InitOutPredictions(dmat->Info(), &weighted_predictions.predictions, *model);\n  predictor->PredictBatch(dmat.get(), &weighted_predictions, *model, 0, 0, &tree_weights);\n\n  auto const &h_predt = weighted_predictions.predictions.ConstHostVector();\n  for (auto v : h_predt) {\n    ASSERT_EQ(v, 4.75f);\n  }\n\n  PredictionCacheEntry ranged_predictions;\n  predictor->InitOutPredictions(dmat->Info(), &ranged_predictions.predictions, *model);\n  predictor->PredictBatch(dmat.get(), &ranged_predictions, *model, 1, 2, &tree_weights);\n\n  auto const &h_ranged = ranged_predictions.predictions.ConstHostVector();\n  for (auto v : h_ranged) {\n    ASSERT_EQ(v, 4.0f);\n  }\n}\n\nvoid TestInplacePredictionWithWeights(Context const *ctx) {\n  size_t constexpr kRows = 5, kCols = 5;\n  HostDeviceVector<float> data(kRows * kCols);\n  auto predictor = std::unique_ptr<Predictor>(CreatePredictorForTest(ctx));\n\n  LearnerModelParam mparam{MakeMP(kCols, .0, 1, ctx->Device())};\n  auto model = std::make_unique<gbm::GBTreeModel>(&mparam, ctx);\n  {\n    std::vector<std::unique_ptr<RegTree>> trees;\n    trees.push_back(std::make_unique<RegTree>());\n    (*trees.back())[0].SetLeaf(1.5f);\n    (*trees.back()).Stat(0).sum_hess = 1.0f;\n    model->CommitModelGroup(std::move(trees), 0);\n  }\n  {\n    std::vector<std::unique_ptr<RegTree>> trees;\n    trees.push_back(std::make_unique<RegTree>());\n    (*trees.back())[0].SetLeaf(2.0f);\n    (*trees.back()).Stat(0).sum_hess = 1.0f;\n    model->CommitModelGroup(std::move(trees), 0);\n  }\n  std::vector<float> tree_weights{0.5f, 2.0f};\n\n  if (ctx->IsCUDA()) {\n    data.SetDevice(ctx->Device());\n    data.ConstDeviceSpan();\n  }\n  auto array = GetArrayInterface(&data, kRows, kCols);\n  std::string array_str;\n  Json::Dump(array, &array_str);\n  auto proxy = std::shared_ptr<DMatrix>(new data::DMatrixProxy{});\n  if (ctx->IsCUDA()) {\n    dynamic_cast<data::DMatrixProxy *>(proxy.get())->SetCudaArray(array_str.c_str());\n  } else {\n    dynamic_cast<data::DMatrixProxy *>(proxy.get())->SetArray(array_str.c_str());\n  }\n\n  PredictionCacheEntry weighted_predictions;\n  predictor->InplacePredict(proxy, *model, std::numeric_limits<float>::quiet_NaN(),\n                            &weighted_predictions, 0, 0, &tree_weights);\n  auto const &h_predt = weighted_predictions.predictions.ConstHostVector();\n  for (auto v : h_predt) {\n    ASSERT_EQ(v, 4.75f);\n  }\n\n  PredictionCacheEntry ranged_predictions;\n  predictor->InplacePredict(proxy, *model, std::numeric_limits<float>::quiet_NaN(),\n                            &ranged_predictions, 1, 2, &tree_weights);\n  auto const &h_ranged = ranged_predictions.predictions.ConstHostVector();\n  for (auto v : h_ranged) {\n    ASSERT_EQ(v, 4.0f);\n  }\n}\n\nTEST(Predictor, PredictionCache) {\n  size_t constexpr kRows = 16, kCols = 4;\n\n  PredictionContainer container;\n  DMatrix *m;\n  // Add a cache that is immediately expired.\n  auto add_cache = [&]() {\n    auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();\n    container.Cache(p_dmat, DeviceOrd::CPU());\n    m = p_dmat.get();\n  };\n\n  add_cache();\n  ASSERT_EQ(container.Container().size(), 0ul);\n  add_cache();\n  EXPECT_ANY_THROW(container.Entry(m));\n}\n\nvoid TestTrainingPrediction(Context const *ctx, size_t rows, size_t bins,\n                            std::shared_ptr<DMatrix> p_full, std::shared_ptr<DMatrix> p_hist) {\n  size_t constexpr kCols = 16;\n  size_t constexpr kClasses = 3;\n  size_t constexpr kIters = 3;\n\n  std::unique_ptr<Learner> learner;\n\n  p_hist->Info().labels.Reshape(rows, 1);\n  auto &h_label = p_hist->Info().labels.Data()->HostVector();\n\n  for (size_t i = 0; i < rows; ++i) {\n    h_label[i] = i % kClasses;\n  }\n\n  learner.reset(Learner::Create({}));\n  learner->SetParams(Args{{\"objective\", \"multi:softprob\"},\n                          {\"num_feature\", std::to_string(kCols)},\n                          {\"num_class\", std::to_string(kClasses)},\n                          {\"max_bin\", std::to_string(bins)},\n                          {\"device\", ctx->DeviceName()}});\n  learner->Configure();\n\n  for (size_t i = 0; i < kIters; ++i) {\n    learner->UpdateOneIter(i, p_hist);\n  }\n\n  Json model{Object{}};\n  learner->SaveModel(&model);\n\n  learner.reset(Learner::Create({}));\n  learner->LoadModel(model);\n  learner->SetParam(\"device\", ctx->DeviceName());\n  learner->Configure();\n\n  HostDeviceVector<float> from_full;\n  learner->Predict(p_full, false, &from_full, 0, 0);\n\n  HostDeviceVector<float> from_hist;\n  learner->Predict(p_hist, false, &from_hist, 0, 0);\n\n  for (size_t i = 0; i < rows; ++i) {\n    EXPECT_NEAR(from_hist.ConstHostVector()[i], from_full.ConstHostVector()[i], kRtEps);\n  }\n}\n\nvoid TestInplacePrediction(Context const *ctx, std::shared_ptr<DMatrix> x, bst_idx_t rows,\n                           bst_feature_t cols) {\n  std::size_t constexpr kClasses{4};\n  auto gen = RandomDataGenerator{rows, cols, 0.5}.Device(ctx->Device()).Classes(kClasses);\n  std::shared_ptr<DMatrix> m = gen.GenerateDMatrix(true);\n\n  std::unique_ptr<Learner> learner{Learner::Create({m})};\n\n  learner->SetParam(\"num_parallel_tree\", \"4\");\n  learner->SetParam(\"num_class\", std::to_string(kClasses));\n  learner->SetParam(\"seed\", \"0\");\n  learner->SetParam(\"subsample\", \"0.5\");\n  learner->SetParam(\"tree_method\", \"hist\");\n  for (int32_t it = 0; it < 4; ++it) {\n    learner->UpdateOneIter(it, m);\n  }\n\n  learner->SetParam(\"device\", ctx->DeviceName());\n  learner->Configure();\n\n  HostDeviceVector<float> *p_out_predictions_0{nullptr};\n  learner->InplacePredict(x, PredictionType::kMargin, std::numeric_limits<float>::quiet_NaN(),\n                          &p_out_predictions_0, 0, 2);\n  CHECK(p_out_predictions_0);\n  HostDeviceVector<float> predict_0(p_out_predictions_0->Size());\n  predict_0.Copy(*p_out_predictions_0);\n\n  HostDeviceVector<float> *p_out_predictions_1{nullptr};\n  learner->InplacePredict(x, PredictionType::kMargin, std::numeric_limits<float>::quiet_NaN(),\n                          &p_out_predictions_1, 2, 4);\n  CHECK(p_out_predictions_1);\n  HostDeviceVector<float> predict_1(p_out_predictions_1->Size());\n  predict_1.Copy(*p_out_predictions_1);\n\n  HostDeviceVector<float> *p_out_predictions{nullptr};\n  learner->InplacePredict(x, PredictionType::kMargin, std::numeric_limits<float>::quiet_NaN(),\n                          &p_out_predictions, 0, 4);\n\n  auto &h_pred = p_out_predictions->HostVector();\n  auto &h_pred_0 = predict_0.HostVector();\n  auto &h_pred_1 = predict_1.HostVector();\n\n  Json config{Object{}};\n  learner->SaveConfig(&config);\n  auto base_score = GetBaseScore(config);\n\n  ASSERT_EQ(h_pred.size(), rows * kClasses);\n  ASSERT_EQ(h_pred.size(), h_pred_0.size());\n  ASSERT_EQ(h_pred.size(), h_pred_1.size());\n  for (size_t i = 0; i < h_pred.size(); ++i) {\n    // Need to remove the global bias here.\n    auto j = i % kClasses;\n    ASSERT_NEAR(h_pred[i], h_pred_0[i] + h_pred_1[i] - base_score.at(j), kRtEps);\n  }\n\n  learner->SetParam(\"device\", \"cpu\");\n  learner->Configure();\n}\n\nnamespace {\nstd::unique_ptr<Learner> LearnerForTest(Context const *ctx, std::shared_ptr<DMatrix> dmat,\n                                        size_t iters, size_t forest = 1) {\n  std::unique_ptr<Learner> learner{Learner::Create({dmat})};\n  learner->SetParams(Args{{\"num_parallel_tree\", std::to_string(forest)},\n                          {\"device\", ctx->IsSycl() ? \"cpu\" : ctx->DeviceName()}});\n  for (size_t i = 0; i < iters; ++i) {\n    learner->UpdateOneIter(i, dmat);\n  }\n\n  return learner;\n}\n\nvoid VerifyPredictionWithLesserFeatures(Learner *learner, bst_idx_t kRows,\n                                        std::shared_ptr<DMatrix> m_test,\n                                        std::shared_ptr<DMatrix> m_invalid) {\n  HostDeviceVector<float> prediction;\n  Json config{Object()};\n  learner->SaveConfig(&config);\n\n  learner->Predict(m_test, false, &prediction, 0, 0);\n  ASSERT_EQ(prediction.Size(), kRows);\n\n  ASSERT_THROW({ learner->Predict(m_invalid, false, &prediction, 0, 0); }, dmlc::Error);\n}\n\n}  // anonymous namespace\n\nvoid TestPredictionWithLesserFeatures(Context const *ctx) {\n  size_t constexpr kRows = 256, kTrainCols = 256, kTestCols = 4, kIters = 4;\n  auto m_train = RandomDataGenerator(kRows, kTrainCols, 0.5).GenerateDMatrix(true);\n  auto learner = LearnerForTest(ctx, m_train, kIters);\n  auto m_test = RandomDataGenerator(kRows, kTestCols, 0.5).GenerateDMatrix(false);\n  auto m_invalid = RandomDataGenerator(kRows, kTrainCols + 1, 0.5).GenerateDMatrix(false);\n  VerifyPredictionWithLesserFeatures(learner.get(), kRows, m_test, m_invalid);\n}\n\nvoid TestPredictionDeviceAccess() {\n  Context ctx;\n  size_t constexpr kRows = 256, kTrainCols = 256, kTestCols = 4, kIters = 4;\n  auto m_train = RandomDataGenerator(kRows, kTrainCols, 0.5).GenerateDMatrix(true);\n  auto m_test = RandomDataGenerator(kRows, kTestCols, 0.5).GenerateDMatrix(false);\n  auto learner = LearnerForTest(&ctx, m_train, kIters);\n\n  HostDeviceVector<float> from_cpu;\n  {\n    ASSERT_TRUE(from_cpu.Device().IsCPU());\n    Context cpu_ctx;\n    learner->SetParam(\"device\", cpu_ctx.DeviceName());\n    learner->Predict(m_test, false, &from_cpu, 0, 0);\n    ASSERT_TRUE(from_cpu.HostCanWrite());\n    ASSERT_FALSE(from_cpu.DeviceCanRead());\n  }\n\n#if defined(XGBOOST_USE_CUDA)\n  HostDeviceVector<float> from_cuda;\n  {\n    Context cuda_ctx = MakeCUDACtx(0);\n    learner->SetParam(\"device\", cuda_ctx.DeviceName());\n    learner->Predict(m_test, false, &from_cuda, 0, 0);\n    ASSERT_EQ(from_cuda.Device(), DeviceOrd::CUDA(0));\n    ASSERT_TRUE(from_cuda.DeviceCanWrite());\n    ASSERT_FALSE(from_cuda.HostCanRead());\n  }\n\n  auto const &h_cpu = from_cpu.ConstHostVector();\n  auto const &h_gpu = from_cuda.ConstHostVector();\n  for (size_t i = 0; i < h_cpu.size(); ++i) {\n    ASSERT_NEAR(h_cpu[i], h_gpu[i], kRtEps);\n  }\n#endif  // defined(XGBOOST_USE_CUDA)\n}\n\nvoid TestPredictionWithLesserFeaturesColumnSplit(bool use_gpu) {\n  auto const world_size = collective::GetWorldSize();\n  auto const rank = collective::GetRank();\n\n  std::size_t constexpr kRows = 256, kTrainCols = 256, kTestCols = 4, kIters = 4;\n  auto m_train = RandomDataGenerator(kRows, kTrainCols, 0.5).Seed(rank).GenerateDMatrix(true);\n  Context ctx;\n  if (use_gpu) {\n    ctx = MakeCUDACtx(curt::AllVisibleGPUs() == 1 ? 0 : rank);\n  }\n  auto learner = LearnerForTest(&ctx, m_train, kIters);\n  auto m_test = RandomDataGenerator(kRows, kTestCols, 0.5).GenerateDMatrix(false);\n  auto m_invalid = RandomDataGenerator(kRows, kTrainCols + 1, 0.5).GenerateDMatrix(false);\n\n  std::shared_ptr<DMatrix> sliced_test{m_test->SliceCol(world_size, rank)};\n  std::shared_ptr<DMatrix> sliced_invalid{m_invalid->SliceCol(world_size, rank)};\n\n  VerifyPredictionWithLesserFeatures(learner.get(), kRows, sliced_test, sliced_invalid);\n}\n\nvoid GBTreeModelForTest(gbm::GBTreeModel *model, uint32_t split_ind, bst_cat_t split_cat,\n                        float left_weight, float right_weight) {\n  PredictionCacheEntry out_predictions;\n\n  std::vector<std::unique_ptr<RegTree>> trees;\n  trees.push_back(std::unique_ptr<RegTree>(new RegTree));\n  auto &p_tree = trees.front();\n\n  std::vector<uint32_t> split_cats(LBitField32::ComputeStorageSize(split_cat));\n  LBitField32 cats_bits(split_cats);\n  cats_bits.Set(split_cat);\n\n  p_tree->ExpandCategorical(0, split_ind, split_cats, true, 1.5f, left_weight, right_weight, 3.0f,\n                            2.2f, 7.0f, 9.0f);\n  model->CommitModelGroup(std::move(trees), 0);\n}\n\nvoid TestCategoricalPrediction(bool use_gpu, bool is_column_split) {\n  Context ctx;\n  if (use_gpu) {\n    ctx = MakeCUDACtx(curt::AllVisibleGPUs() == 1 ? 0 : collective::GetRank());\n  }\n  size_t constexpr kCols = 10;\n  PredictionCacheEntry out_predictions;\n\n  LearnerModelParam mparam{MakeMP(kCols, .5, 1, ctx.Device())};\n  uint32_t split_ind = 3;\n  bst_cat_t split_cat = 4;\n  float left_weight = 1.3f;\n  float right_weight = 1.7f;\n\n  gbm::GBTreeModel model(&mparam, &ctx);\n  GBTreeModelForTest(&model, split_ind, split_cat, left_weight, right_weight);\n\n  std::unique_ptr<Predictor> predictor{CreatePredictorForTest(&ctx)};\n\n  std::vector<float> row(kCols);\n  row[split_ind] = split_cat;\n  auto m = GetDMatrixFromData(row, 1, kCols);\n\n  std::vector<FeatureType> types(10, FeatureType::kCategorical);\n  m->Info().feature_types.HostVector() = types;\n  if (is_column_split) {\n    m = std::shared_ptr<DMatrix>{m->SliceCol(collective::GetWorldSize(), collective::GetRank())};\n  }\n\n  predictor->InitOutPredictions(m->Info(), &out_predictions.predictions, model);\n  predictor->PredictBatch(m.get(), &out_predictions, model, 0);\n  auto score = mparam.BaseScore(DeviceOrd::CPU())(0);\n  ASSERT_EQ(out_predictions.predictions.Size(), 1ul);\n  ASSERT_EQ(out_predictions.predictions.HostVector()[0],\n            right_weight + score);  // go to right for matching cat\n\n  row[split_ind] = split_cat + 1;\n  m = GetDMatrixFromData(row, 1, kCols);\n  if (is_column_split) {\n    m = std::shared_ptr<DMatrix>{m->SliceCol(collective::GetWorldSize(), collective::GetRank())};\n  }\n  out_predictions.version = 0;\n  predictor->InitOutPredictions(m->Info(), &out_predictions.predictions, model);\n  predictor->PredictBatch(m.get(), &out_predictions, model, 0);\n  ASSERT_EQ(out_predictions.predictions.HostVector()[0], left_weight + score);\n}\n\nvoid TestCategoricalPredictLeaf(Context const *ctx, bool is_column_split) {\n  size_t constexpr kCols = 10;\n  PredictionCacheEntry out_predictions;\n\n  LearnerModelParam mparam{MakeMP(kCols, .5, 1, ctx->Device())};\n\n  uint32_t split_ind = 3;\n  bst_cat_t split_cat = 4;\n  float left_weight = 1.3f;\n  float right_weight = 1.7f;\n\n  gbm::GBTreeModel model(&mparam, ctx);\n  GBTreeModelForTest(&model, split_ind, split_cat, left_weight, right_weight);\n\n  std::unique_ptr<Predictor> predictor{CreatePredictorForTest(ctx)};\n\n  std::vector<float> row(kCols);\n  row[split_ind] = split_cat;\n  auto m = GetDMatrixFromData(row, 1, kCols);\n  if (is_column_split) {\n    m = std::shared_ptr<DMatrix>{m->SliceCol(collective::GetWorldSize(), collective::GetRank())};\n  }\n\n  predictor->PredictLeaf(m.get(), &out_predictions.predictions, model);\n  CHECK_EQ(out_predictions.predictions.Size(), 1);\n  // go to left if it doesn't match the category, otherwise right.\n  ASSERT_EQ(out_predictions.predictions.HostVector()[0], 2);\n\n  row[split_ind] = split_cat + 1;\n  m = GetDMatrixFromData(row, 1, kCols);\n  if (is_column_split) {\n    m = std::shared_ptr<DMatrix>{m->SliceCol(collective::GetWorldSize(), collective::GetRank())};\n  }\n  out_predictions.version = 0;\n  predictor->InitOutPredictions(m->Info(), &out_predictions.predictions, model);\n  predictor->PredictLeaf(m.get(), &out_predictions.predictions, model);\n  ASSERT_EQ(out_predictions.predictions.HostVector()[0], 1);\n}\n\nvoid TestIterationRange(Context const *ctx) {\n  size_t constexpr kRows = 1000, kCols = 20, kClasses = 4, kForest = 3, kIters = 10;\n  auto dmat = RandomDataGenerator(kRows, kCols, 0)\n                  .Device(ctx->Device())\n                  .Classes(kClasses)\n                  .GenerateDMatrix(true);\n  auto learner = LearnerForTest(ctx, dmat, kIters, kForest);\n\n  bool bound = false;\n  bst_layer_t lend{3};\n  std::unique_ptr<Learner> sliced{learner->Slice(0, lend, 1, &bound)};\n  ASSERT_FALSE(bound);\n\n  HostDeviceVector<float> out_predt_sliced;\n  HostDeviceVector<float> out_predt_ranged;\n\n  {\n    sliced->Predict(dmat, true, &out_predt_sliced, 0, 0, false, false, false, false, false);\n    learner->Predict(dmat, true, &out_predt_ranged, 0, lend, false, false, false, false, false);\n\n    auto const &h_sliced = out_predt_sliced.HostVector();\n    auto const &h_range = out_predt_ranged.HostVector();\n    ASSERT_EQ(h_sliced.size(), h_range.size());\n    ASSERT_EQ(h_sliced, h_range);\n  }\n\n  // Leaf\n  {\n    sliced->Predict(dmat, false, &out_predt_sliced, 0, 0, false, true, false, false, false);\n    learner->Predict(dmat, false, &out_predt_ranged, 0, lend, false, true, false, false, false);\n    auto const &h_sliced = out_predt_sliced.HostVector();\n    auto const &h_range = out_predt_ranged.HostVector();\n    ASSERT_EQ(h_sliced.size(), h_range.size());\n    ASSERT_EQ(h_sliced, h_range);\n  }\n}\n\nnamespace {\nvoid VerifyIterationRangeColumnSplit(bool use_gpu, Json const &ranged_model,\n                                     Json const &sliced_model, std::size_t rows, std::size_t cols,\n                                     std::size_t classes,\n                                     std::vector<float> const &expected_margin_ranged,\n                                     std::vector<float> const &expected_margin_sliced,\n                                     std::vector<float> const &expected_leaf_ranged,\n                                     std::vector<float> const &expected_leaf_sliced) {\n  auto const world_size = collective::GetWorldSize();\n  auto const rank = collective::GetRank();\n  Context ctx;\n  if (use_gpu) {\n    ctx = MakeCUDACtx(curt::AllVisibleGPUs() == 1 ? 0 : rank);\n  }\n  collective::GetWorkerLocalThreads(world_size, &ctx);\n\n  auto dmat = RandomDataGenerator(rows, cols, 0).Classes(classes).GenerateDMatrix(true);\n  std::shared_ptr<DMatrix> Xy{dmat->SliceCol(world_size, rank)};\n\n  std::unique_ptr<Learner> learner{Learner::Create({Xy})};\n  auto args = Args{{\"device\", ctx.DeviceName()}, {\"nthread\", std::to_string(ctx.Threads())}};\n  learner->SetParams(args);\n  learner->LoadModel(ranged_model);\n\n  std::unique_ptr<Learner> sliced{Learner::Create({Xy})};\n  sliced->SetParams(args);\n  sliced->LoadModel(sliced_model);\n\n  HostDeviceVector<float> out_predt_sliced;\n  HostDeviceVector<float> out_predt_ranged;\n\n  // margin\n  {\n    sliced->Predict(Xy, true, &out_predt_sliced, 0, 0, false, false, false, false, false);\n    learner->Predict(Xy, true, &out_predt_ranged, 0, 3, false, false, false, false, false);\n    auto const &h_sliced = out_predt_sliced.HostVector();\n    auto const &h_ranged = out_predt_ranged.HostVector();\n    EXPECT_EQ(h_sliced.size(), expected_margin_sliced.size());\n    for (std::size_t i = 0; i < expected_margin_sliced.size(); ++i) {\n      ASSERT_FLOAT_EQ(h_sliced[i], expected_margin_sliced[i]) << \"rank \" << rank << \", i \" << i;\n    }\n    EXPECT_EQ(h_ranged.size(), expected_margin_ranged.size());\n    for (std::size_t i = 0; i < expected_margin_ranged.size(); ++i) {\n      ASSERT_FLOAT_EQ(h_ranged[i], expected_margin_ranged[i]) << \"rank \" << rank << \", i \" << i;\n    }\n  }\n\n  // Leaf\n  {\n    sliced->Predict(Xy, false, &out_predt_sliced, 0, 0, false, true, false, false, false);\n    learner->Predict(Xy, false, &out_predt_ranged, 0, 3, false, true, false, false, false);\n    auto const &h_sliced = out_predt_sliced.HostVector();\n    auto const &h_ranged = out_predt_ranged.HostVector();\n    EXPECT_EQ(h_sliced.size(), expected_leaf_sliced.size());\n    for (std::size_t i = 0; i < expected_leaf_sliced.size(); ++i) {\n      ASSERT_FLOAT_EQ(h_sliced[i], expected_leaf_sliced[i]) << \"rank \" << rank << \", i \" << i;\n    }\n    EXPECT_EQ(h_ranged.size(), expected_leaf_ranged.size());\n    for (std::size_t i = 0; i < expected_leaf_ranged.size(); ++i) {\n      ASSERT_FLOAT_EQ(h_ranged[i], expected_leaf_ranged[i]) << \"rank \" << rank << \", i \" << i;\n    }\n  }\n}\n}  // anonymous namespace\n\nvoid TestIterationRangeColumnSplit(int world_size, bool use_gpu) {\n  std::size_t constexpr kRows = 1000, kCols = 20, kClasses = 4, kForest = 3, kIters = 10;\n  auto dmat = RandomDataGenerator(kRows, kCols, 0).Classes(kClasses).GenerateDMatrix(true);\n  Context ctx;\n  if (use_gpu) {\n    ctx = MakeCUDACtx(0);\n  }\n  auto learner = LearnerForTest(&ctx, dmat, kIters, kForest);\n\n  bool bound = false;\n  std::unique_ptr<Learner> sliced{learner->Slice(0, 3, 1, &bound)};\n  ASSERT_FALSE(bound);\n\n  // margin\n  HostDeviceVector<float> margin_predt_sliced;\n  HostDeviceVector<float> margin_predt_ranged;\n  sliced->Predict(dmat, true, &margin_predt_sliced, 0, 0, false, false, false, false, false);\n  learner->Predict(dmat, true, &margin_predt_ranged, 0, 3, false, false, false, false, false);\n  auto const &margin_sliced = margin_predt_sliced.HostVector();\n  auto const &margin_ranged = margin_predt_ranged.HostVector();\n\n  // Leaf\n  HostDeviceVector<float> leaf_predt_sliced;\n  HostDeviceVector<float> leaf_predt_ranged;\n  sliced->Predict(dmat, false, &leaf_predt_sliced, 0, 0, false, true, false, false, false);\n  learner->Predict(dmat, false, &leaf_predt_ranged, 0, 3, false, true, false, false, false);\n  auto const &leaf_sliced = leaf_predt_sliced.HostVector();\n  auto const &leaf_ranged = leaf_predt_ranged.HostVector();\n\n  Json ranged_model{Object{}};\n  learner->SaveModel(&ranged_model);\n  Json sliced_model{Object{}};\n  sliced->SaveModel(&sliced_model);\n\n#if !defined(XGBOOST_USE_NCCL)\n  if (use_gpu) {\n    GTEST_SKIP_(\"Not compiled with NCCL\");\n    return;\n  }\n#endif  // defined(XGBOOST_USE_NCCL)\n  collective::TestDistributedGlobal(world_size, [&] {\n    VerifyIterationRangeColumnSplit(use_gpu, ranged_model, sliced_model, kRows, kCols, kClasses,\n                                    margin_ranged, margin_sliced, leaf_ranged, leaf_sliced);\n  });\n\n#if defined(XGBOOST_USE_FEDERATED)\n  collective::TestFederatedGlobal(world_size, [&] {\n    VerifyIterationRangeColumnSplit(use_gpu, ranged_model, sliced_model, kRows, kCols, kClasses,\n                                    margin_ranged, margin_sliced, leaf_ranged, leaf_sliced);\n  });\n#endif  // defined(XGBOOST_USE_FEDERATED)\n}\n\nvoid TestSparsePrediction(Context const *ctx, float sparsity) {\n  size_t constexpr kRows = 512, kCols = 128, kIters = 4;\n  auto Xy = RandomDataGenerator(kRows, kCols, sparsity).GenerateDMatrix(true);\n  auto learner = LearnerForTest(ctx, Xy, kIters);\n\n  HostDeviceVector<float> sparse_predt;\n\n  Json model{Object{}};\n  learner->SaveModel(&model);\n\n  learner.reset(Learner::Create({Xy}));\n  learner->LoadModel(model);\n  learner->SetParam(\"device\", ctx->DeviceName());\n  learner->Configure();\n  if (!ctx->IsCPU()) {\n    learner->SetParam(\"tree_method\", \"hist\");\n    learner->SetParam(\"device\", ctx->Device().Name());\n  }\n  learner->Predict(Xy, false, &sparse_predt, 0, 0);\n\n  HostDeviceVector<float> with_nan(kRows * kCols, std::numeric_limits<float>::quiet_NaN());\n  auto &h_with_nan = with_nan.HostVector();\n  for (auto const &page : Xy->GetBatches<SparsePage>()) {\n    auto batch = page.GetView();\n    for (size_t i = 0; i < batch.Size(); ++i) {\n      auto row = batch[i];\n      for (auto e : row) {\n        h_with_nan[i * kCols + e.index] = e.fvalue;\n      }\n    }\n  }\n\n  learner->SetParam(\"tree_method\", \"hist\");\n  learner->SetParam(\"device\", \"cpu\");\n  // Xcode_12.4 doesn't compile with `std::make_shared`.\n  auto dense = std::shared_ptr<DMatrix>(new data::DMatrixProxy{});\n  auto array_interface = GetArrayInterface(&with_nan, kRows, kCols);\n  std::string arr_str;\n  Json::Dump(array_interface, &arr_str);\n  dynamic_cast<data::DMatrixProxy *>(dense.get())->SetArray(arr_str.data());\n  HostDeviceVector<float> *p_dense_predt;\n  learner->InplacePredict(dense, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),\n                          &p_dense_predt, 0, 0);\n\n  auto const &dense_predt = *p_dense_predt;\n  if (ctx->IsCPU()) {\n    ASSERT_EQ(dense_predt.HostVector(), sparse_predt.HostVector());\n  } else {\n    auto const &h_dense = dense_predt.HostVector();\n    auto const &h_sparse = sparse_predt.HostVector();\n    ASSERT_EQ(h_dense.size(), h_sparse.size());\n    for (size_t i = 0; i < h_dense.size(); ++i) {\n      ASSERT_FLOAT_EQ(h_dense[i], h_sparse[i]);\n    }\n  }\n}\n\nnamespace {\nvoid VerifySparsePredictionColumnSplit(bool use_gpu, Json const &model, std::size_t rows,\n                                       std::size_t cols, float sparsity,\n                                       std::vector<float> const &expected_predt) {\n  Context ctx;\n  if (use_gpu) {\n    ctx = MakeCUDACtx(curt::AllVisibleGPUs() == 1 ? 0 : collective::GetRank());\n  }\n  auto Xy = RandomDataGenerator(rows, cols, sparsity).GenerateDMatrix(true);\n  std::shared_ptr<DMatrix> sliced{Xy->SliceCol(collective::GetWorldSize(), collective::GetRank())};\n  HostDeviceVector<float> sparse_predt;\n\n  std::unique_ptr<Learner> learner{Learner::Create({sliced})};\n  learner->SetParam(\"device\", ctx.DeviceName());\n  learner->LoadModel(model);\n  learner->Predict(sliced, false, &sparse_predt, 0, 0);\n\n  auto const &predt = sparse_predt.HostVector();\n  ASSERT_EQ(predt.size(), expected_predt.size());\n  for (size_t i = 0; i < predt.size(); ++i) {\n    ASSERT_FLOAT_EQ(predt[i], expected_predt[i]);\n  }\n}\n}  // anonymous namespace\n\nvoid TestSparsePredictionColumnSplit(int world_size, bool use_gpu, float sparsity) {\n  Context ctx;\n  if (use_gpu) {\n    ctx = MakeCUDACtx(0);\n  }\n  size_t constexpr kRows = 512, kCols = 128, kIters = 4;\n  auto Xy = RandomDataGenerator(kRows, kCols, sparsity).GenerateDMatrix(true);\n  auto learner = LearnerForTest(&ctx, Xy, kIters);\n\n  HostDeviceVector<float> sparse_predt;\n\n  Json model{Object{}};\n  learner->SaveModel(&model);\n\n  learner.reset(Learner::Create({Xy}));\n  learner->LoadModel(model);\n\n  learner->SetParam(\"device\", ctx.DeviceName());\n  learner->Predict(Xy, false, &sparse_predt, 0, 0);\n\n#if !defined(XGBOOST_USE_NCCL)\n  if (use_gpu) {\n    GTEST_SKIP_(\"Not compiled with NCCL.\");\n    return;\n  }\n#endif  // defined(XGBOOST_USE_CUDA)\n  collective::TestDistributedGlobal(world_size, [&] {\n    VerifySparsePredictionColumnSplit(use_gpu, model, kRows, kCols, sparsity,\n                                      sparse_predt.HostVector());\n  });\n\n#if defined(XGBOOST_USE_FEDERATED)\n  collective::TestFederatedGlobal(world_size, [&] {\n    VerifySparsePredictionColumnSplit(use_gpu, model, kRows, kCols, sparsity,\n                                      sparse_predt.HostVector());\n  });\n#endif  // defined(XGBOOST_USE_FEDERATED)\n}\n\nvoid TestVectorLeafPrediction(Context const *ctx) {\n  std::unique_ptr<Predictor> predictor{CreatePredictorForTest(ctx)};\n\n  size_t constexpr kRows = 5;\n  size_t constexpr kCols = 5;\n\n  LearnerModelParam mparam{static_cast<bst_feature_t>(kCols),\n                           linalg::Vector<float>{{0.5}, {1}, ctx->Device()}, 1, 3,\n                           MultiStrategy::kMultiOutputTree};\n\n  std::vector<std::unique_ptr<RegTree>> trees;\n  trees.emplace_back(new RegTree{mparam.LeafLength(), mparam.num_feature});\n\n  std::vector<float> p_w(mparam.LeafLength(), 0.0f);\n  std::vector<float> l_w(mparam.LeafLength(), 1.0f);\n  std::vector<float> r_w(mparam.LeafLength(), 2.0f);\n\n  auto &tree = trees.front();\n  tree->SetRoot(linalg::MakeVec(p_w.data(), p_w.size()), /*sum_hess=*/1.0f);\n  tree->ExpandNode(0, static_cast<bst_feature_t>(1), 2.0, true,\n                   linalg::MakeVec(p_w.data(), p_w.size()), linalg::MakeVec(l_w.data(), l_w.size()),\n                   linalg::MakeVec(r_w.data(), r_w.size()), /*loss_chg=*/0.5f, /*sum_hess=*/1.0f,\n                   /*left_sum=*/0.6f, /*right_sum=*/0.4f);\n  tree->GetMultiTargetTree()->SetLeaves();\n  ASSERT_TRUE(tree->IsMultiTarget());\n  ASSERT_TRUE(mparam.IsVectorLeaf());\n\n  gbm::GBTreeModel model{&mparam, ctx};\n  model.CommitModelGroup(std::move(trees), 0);\n\n  auto test_batch = [&](float expected, HostDeviceVector<float> const *p_data) {\n    auto p_fmat = GetDMatrixFromData(p_data->ConstHostVector(), kRows, kCols);\n    PredictionCacheEntry predt_cache;\n    predictor->InitOutPredictions(p_fmat->Info(), &predt_cache.predictions, model);\n    ASSERT_EQ(predt_cache.predictions.Size(), kRows * mparam.LeafLength());\n    predictor->PredictBatch(p_fmat.get(), &predt_cache, model, 0, 1);\n    auto const &h_predt = predt_cache.predictions.HostVector();\n    for (auto v : h_predt) {\n      ASSERT_EQ(v, expected);\n    }\n  };\n  auto test_inplace = [&](float expected, HostDeviceVector<float> const *p_data) {\n    PredictionCacheEntry predt_cache;\n    std::shared_ptr<DMatrix> p_fmat = GetDMatrixFromData(p_data->ConstHostVector(), kRows, kCols);\n    predictor->InitOutPredictions(p_fmat->Info(), &predt_cache.predictions, model);\n    if (ctx->IsCUDA()) {\n      // pull data to device.\n      p_data->SetDevice(ctx->Device());\n      p_data->ConstDeviceSpan();\n    }\n    auto arr = GetArrayInterface(p_data, kRows, kCols);\n    std::string str;\n    Json::Dump(arr, &str);\n    auto proxy = std::shared_ptr<DMatrix>(new data::DMatrixProxy{});\n    if (ctx->IsCUDA()) {\n      dynamic_cast<data::DMatrixProxy *>(proxy.get())->SetCudaArray(str.c_str());\n    } else {\n      dynamic_cast<data::DMatrixProxy *>(proxy.get())->SetArray(str.c_str());\n    }\n    predictor->InplacePredict(proxy, model, std::numeric_limits<float>::quiet_NaN(), &predt_cache,\n                              0, 1);\n    auto const &h_predt = predt_cache.predictions.HostVector();\n    for (auto v : h_predt) {\n      ASSERT_EQ(v, expected);\n    }\n  };\n  auto test_ghist = [&](float expected, HostDeviceVector<float> *p_data) {\n    // ghist\n    PredictionCacheEntry predt_cache;\n    auto &h_data = p_data->HostVector();\n    // give it at least two bins, otherwise the histogram cuts only have min and max values.\n    for (std::size_t i = 0; i < kCols; ++i) {\n      h_data[i] = 1.0;\n    }\n    auto p_fmat = GetDMatrixFromData(p_data->ConstHostVector(), kRows, kCols);\n\n    predictor->InitOutPredictions(p_fmat->Info(), &predt_cache.predictions, model);\n\n    std::unique_ptr<ArrayIterForTest> iter;\n    if (ctx->IsCUDA()) {\n      iter.reset(new CudaArrayIterForTest{ctx, *p_data, kRows, static_cast<bst_feature_t>(kCols),\n                                          static_cast<std::size_t>(1)});\n    } else {\n      iter.reset(new NumpyArrayIterForTest{ctx, *p_data, kRows, static_cast<bst_feature_t>(kCols),\n                                           static_cast<std::size_t>(1)});\n    }\n\n    p_fmat =\n        std::make_shared<data::IterativeDMatrix>(iter.get(), iter->Proxy(), nullptr, Reset, Next,\n                                                 std::numeric_limits<float>::quiet_NaN(), 0, 256);\n\n    predictor->InitOutPredictions(p_fmat->Info(), &predt_cache.predictions, model);\n    predictor->PredictBatch(p_fmat.get(), &predt_cache, model, 0, 1);\n    auto const &h_predt = predt_cache.predictions.HostVector();\n    // the smallest v uses the min_value from histogram cuts, which leads to a left leaf\n    // during prediction.\n    for (std::size_t i = 5; i < h_predt.size(); ++i) {\n      ASSERT_EQ(h_predt[i], expected) << i;\n    }\n  };\n\n  // go to right\n  auto mt_tree = model.trees.front()->HostMtView();\n  HostDeviceVector<float> data(kRows * kCols, mt_tree.SplitCond(RegTree::kRoot) + 1.0);\n  test_batch(2.5, &data);\n  test_inplace(2.5, &data);\n  test_ghist(2.5, &data);\n\n  // go to left\n  data.HostVector().assign(data.Size(), mt_tree.SplitCond(RegTree::kRoot) - 1.0);\n  test_batch(1.5, &data);\n  test_inplace(1.5, &data);\n  test_ghist(1.5, &data);\n}\n\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/predictor/test_predictor.h",
    "content": "/**\n * Copyright 2020-2023 by XGBoost Contributors\n */\n#ifndef TESTS_CPP_PREDICTOR_TEST_PREDICTOR_H_\n#define TESTS_CPP_PREDICTOR_TEST_PREDICTOR_H_\n\n#include <xgboost/context.h>  // for Context\n#include <xgboost/predictor.h>\n\n#include <cstddef>\n#include <memory>\n#include <string>\n#include <utility>\n#include <vector>\n\n#include \"../../../src/gbm/gbtree_model.h\"  // for GBTreeModel\n#include \"../helpers.h\"\n\nnamespace xgboost {\ninline std::unique_ptr<gbm::GBTreeModel> CreateTestModel(LearnerModelParam const* param,\n                                                         Context const* ctx, size_t n_classes = 1) {\n  auto model = std::make_unique<gbm::GBTreeModel>(param, ctx);\n\n  for (size_t i = 0; i < n_classes; ++i) {\n    std::vector<std::unique_ptr<RegTree>> trees;\n    trees.push_back(std::unique_ptr<RegTree>(new RegTree));\n    if (i == 0) {\n      (*trees.back())[0].SetLeaf(1.5f);\n      (*trees.back()).Stat(0).sum_hess = 1.0f;\n    }\n    model->CommitModelGroup(std::move(trees), i);\n  }\n\n  return model;\n}\n\ninline auto CreatePredictorForTest(Context const* ctx) {\n  if (ctx->IsCPU()) {\n    return Predictor::Create(\"cpu_predictor\", ctx);\n  } else if (ctx->IsSycl()) {\n    return Predictor::Create(\"sycl_predictor\", ctx);\n  } else {\n    return Predictor::Create(\"gpu_predictor\", ctx);\n  }\n}\n\n// fixme: cpu test\ntemplate <typename Page>\nvoid TestPredictionFromGradientIndex(Context const* ctx, size_t rows, size_t cols,\n                                     std::shared_ptr<DMatrix> p_hist) {\n  constexpr size_t kClasses{3};\n\n  LearnerModelParam mparam{MakeMP(cols, .5, kClasses, ctx->Device())};\n  auto cuda_ctx = MakeCUDACtx(0);\n\n  std::unique_ptr<Predictor> predictor =\n      std::unique_ptr<Predictor>(CreatePredictorForTest(&cuda_ctx));\n  predictor->Configure({});\n\n  std::unique_ptr<gbm::GBTreeModel> p_model = CreateTestModel(&mparam, ctx, kClasses);\n  auto const& model = *p_model;\n\n  {\n    auto p_precise = RandomDataGenerator(rows, cols, 0).GenerateDMatrix();\n\n    PredictionCacheEntry approx_out_predictions;\n    predictor->InitOutPredictions(p_hist->Info(), &approx_out_predictions.predictions, model);\n    predictor->PredictBatch(p_hist.get(), &approx_out_predictions, model, 0);\n\n    PredictionCacheEntry precise_out_predictions;\n    predictor->InitOutPredictions(p_precise->Info(), &precise_out_predictions.predictions, model);\n    predictor->PredictBatch(p_precise.get(), &precise_out_predictions, model, 0);\n\n    for (size_t i = 0; i < rows; ++i) {\n      CHECK_EQ(approx_out_predictions.predictions.HostVector()[i],\n               precise_out_predictions.predictions.HostVector()[i]);\n    }\n  }\n\n  {\n    // Predictor should never try to create the histogram index by itself.  As only\n    // histogram index from training data is valid and predictor doesn't known which\n    // matrix is used for training.\n    auto p_dmat = RandomDataGenerator(rows, cols, 0).GenerateDMatrix();\n    PredictionCacheEntry precise_out_predictions;\n    predictor->InitOutPredictions(p_dmat->Info(), &precise_out_predictions.predictions, model);\n    predictor->PredictBatch(p_dmat.get(), &precise_out_predictions, model, 0);\n    CHECK(!p_dmat->PageExists<Page>());\n  }\n}\n\nvoid TestBasic(DMatrix* dmat, Context const* ctx);\nvoid TestBatchPredictionWithWeights(Context const* ctx);\nvoid TestInplacePredictionWithWeights(Context const* ctx);\n\n// p_full and p_hist should come from the same data set.\nvoid TestTrainingPrediction(Context const* ctx, size_t rows, size_t bins,\n                            std::shared_ptr<DMatrix> p_full, std::shared_ptr<DMatrix> p_hist);\n\nvoid TestInplacePrediction(Context const* ctx, std::shared_ptr<DMatrix> x, bst_idx_t rows,\n                           bst_feature_t cols);\n\nvoid TestPredictionWithLesserFeatures(Context const* ctx);\n\nvoid TestPredictionDeviceAccess();\n\nvoid TestCategoricalPrediction(bool use_gpu, bool is_column_split);\n\nvoid TestPredictionWithLesserFeaturesColumnSplit(bool use_gpu);\n\nvoid TestCategoricalPredictLeaf(Context const* ctx, bool is_column_split);\n\nvoid TestIterationRange(Context const* ctx);\n\nvoid TestIterationRangeColumnSplit(int world_size, bool use_gpu);\n\nvoid TestSparsePrediction(Context const* ctx, float sparsity);\n\nvoid TestSparsePredictionColumnSplit(int world_size, bool use_gpu, float sparsity);\n\nvoid TestVectorLeafPrediction(Context const* ctx);\n\n}  // namespace xgboost\n\n#endif  // TESTS_CPP_PREDICTOR_TEST_PREDICTOR_H_\n"
  },
  {
    "path": "tests/cpp/predictor/test_shap.cc",
    "content": "/**\n * Copyright 2020-2026, XGBoost Contributors\n */\n#include \"test_shap.h\"\n\n#include <gtest/gtest.h>\n#include <xgboost/base.h>\n#include <xgboost/data.h>                // for DMatrix\n#include <xgboost/host_device_vector.h>  // for HostDeviceVector\n#include <xgboost/json.h>                // for Json\n#include <xgboost/learner.h>             // for Learner\n#include <xgboost/linalg.h>              // for Vector\n#include <xgboost/objective.h>           // for ObjFunction\n\n#include <algorithm>\n#include <memory>  // for unique_ptr\n#include <sstream>\n#include <string>  // for to_string\n\n#include \"../../../src/common/param_array.h\"\n#include \"../../../src/gbm/gbtree_model.h\"\n#include \"../../../src/predictor/interpretability/shap.h\"\n#include \"../helpers.h\"\n\nnamespace xgboost {\nnamespace {\nvoid SetLabels(DMatrix* dmat, bst_target_t n_classes) {\n  size_t const rows = dmat->Info().num_row_;\n  dmat->Info().labels.Reshape(rows, 1);\n  auto& h_labels = dmat->Info().labels.Data()->HostVector();\n  if (n_classes > 1) {\n    for (size_t i = 0; i < rows; ++i) {\n      h_labels[i] = static_cast<float>(i % n_classes);\n    }\n  } else {\n    for (size_t i = 0; i < rows; ++i) {\n      h_labels[i] = static_cast<float>(i % 2);\n    }\n  }\n}\n\nArgs BaseParams(Context const* ctx, std::string objective, std::string max_depth) {\n  return Args{{\"objective\", std::move(objective)},\n              {\"max_depth\", std::move(max_depth)},\n              {\"min_split_loss\", \"0\"},\n              {\"min_child_weight\", \"0\"},\n              {\"reg_lambda\", \"0\"},\n              {\"reg_alpha\", \"0\"},\n              {\"subsample\", \"1\"},\n              {\"colsample_bytree\", \"1\"},\n              {\"device\", ctx->IsSycl() ? \"cpu\" : ctx->DeviceName()}};\n}\n\nstd::unique_ptr<gbm::GBTreeModel> LoadGBTreeModel(Learner* learner, Context const* ctx,\n                                                  Args const& model_args,\n                                                  LearnerModelParam* out_param) {\n  Json model{Object{}};\n  learner->SaveModel(&model);\n\n  auto const& model_obj = get<Object const>(model);\n  auto const& learner_obj = get<Object const>(model_obj.at(\"learner\"));\n  auto const& lmp = get<Object const>(learner_obj.at(\"learner_model_param\"));\n\n  auto get_or = [&](char const* key, std::string dft) {\n    auto it = lmp.find(key);\n    return it == lmp.cend() ? dft : get<String const>(it->second);\n  };\n  auto const& num_feature = get_or(\"num_feature\", \"0\");\n  auto const& num_class = get_or(\"num_class\", \"0\");\n  auto const& num_target = get_or(\"num_target\", \"1\");\n  auto const& base_score_str = get_or(\"base_score\", \"0\");\n\n  common::ParamArray<float> base_score_arr{\"base_score\"};\n  std::stringstream ss;\n  ss << base_score_str;\n  ss >> base_score_arr;\n\n  std::size_t shape[1]{base_score_arr.size()};\n  linalg::Vector<float> base_score_vec{shape, ctx->Device()};\n  auto& h_base = base_score_vec.Data()->HostVector();\n  h_base.assign(base_score_arr.cbegin(), base_score_arr.cend());\n\n  std::string objective{\"reg:squarederror\"};\n  for (auto const& kv : model_args) {\n    if (kv.first == \"objective\") {\n      objective = kv.second;\n      break;\n    }\n  }\n  auto obj = std::unique_ptr<ObjFunction>(ObjFunction::Create(objective, ctx));\n  obj->Configure(model_args);\n  obj->ProbToMargin(&base_score_vec);\n  // Keep both host/device views readable, matching LearnerModelParam invariants.\n  std::as_const(base_score_vec).HostView();\n  if (!ctx->Device().IsCPU()) {\n    std::as_const(base_score_vec).View(ctx->Device());\n  }\n\n  auto n_features = static_cast<bst_feature_t>(std::stol(num_feature));\n  auto n_classes = static_cast<bst_target_t>(std::stol(num_class));\n  auto n_targets = static_cast<bst_target_t>(std::stol(num_target));\n  auto n_groups = static_cast<uint32_t>(std::max(n_classes, n_targets));\n  LearnerModelParam tmp{n_features, std::move(base_score_vec), n_groups, n_targets,\n                        MultiStrategy::kOneOutputPerTree};\n  out_param->Copy(tmp);\n\n  auto gbtree = std::make_unique<gbm::GBTreeModel>(out_param, ctx);\n  auto const& gbm_obj = get<Object const>(learner_obj.at(\"gradient_booster\"));\n  gbtree->LoadModel(gbm_obj.at(\"model\"));\n  return gbtree;\n}\n}  // namespace\n\nstd::vector<ShapTestCase> BuildShapTestCases(Context const* ctx) {\n  std::vector<ShapTestCase> cases;\n  auto device = ctx->Device();\n\n  {\n    // small dense, shallow tree\n    auto dmat = RandomDataGenerator(16, 4, 0.0).Device(device).GenerateDMatrix();\n    SetLabels(dmat.get(), 1);\n    cases.emplace_back(dmat, BaseParams(ctx, \"reg:squarederror\", \"2\"));\n  }\n\n  {\n    // medium dense training DMatrix, moderate depth\n    auto dmat = RandomDataGenerator(64, 6, 0.0).Device(device).GenerateDMatrix(true);\n    SetLabels(dmat.get(), 1);\n    cases.emplace_back(dmat, BaseParams(ctx, \"reg:squarederror\", \"4\"));\n  }\n\n  {\n    // quantile DMatrix with explicit bins, deeper tree\n    auto dmat =\n        RandomDataGenerator(128, 8, 0.0).Bins(32).Device(device).GenerateQuantileDMatrix(false);\n    SetLabels(dmat.get(), 1);\n    auto args = BaseParams(ctx, \"reg:squarederror\", \"5\");\n    args.emplace_back(\"max_bin\", \"32\");\n    cases.emplace_back(dmat, std::move(args));\n  }\n\n  {\n    // external memory quantile DMatrix, moderate depth\n    bst_bin_t max_bin{32};\n    auto dmat = RandomDataGenerator(4096, 10, 0.0)\n                    .Batches(2)\n                    .Bins(max_bin)\n                    .Device(device)\n                    .GenerateExtMemQuantileDMatrix(\"shap_extmem\", true);\n    SetLabels(dmat.get(), 1);\n    auto args = BaseParams(ctx, \"reg:squarederror\", \"6\");\n    args.emplace_back(\"max_bin\", std::to_string(max_bin));\n    cases.emplace_back(dmat, std::move(args));\n  }\n\n  {\n    // external memory sparse page DMatrix, moderate depth\n    auto dmat = RandomDataGenerator(256, 8, 0.0)\n                    .Batches(2)\n                    .Device(device)\n                    .GenerateSparsePageDMatrix(\"shap_extmem\", true);\n    SetLabels(dmat.get(), 1);\n    cases.emplace_back(dmat, BaseParams(ctx, \"reg:squarederror\", \"4\"));\n  }\n\n  {\n    // multi-class dense training DMatrix, medium depth\n    bst_target_t n_classes{3};\n    auto dmat =\n        RandomDataGenerator(64, 6, 0.0).Classes(n_classes).Device(device).GenerateDMatrix(true);\n    SetLabels(dmat.get(), n_classes);\n    auto args = BaseParams(ctx, \"multi:softprob\", \"3\");\n    args.emplace_back(\"num_class\", std::to_string(n_classes));\n    cases.emplace_back(dmat, std::move(args));\n  }\n\n  {\n    // compact dense classification case to keep runtime bounded\n    auto dmat = RandomDataGenerator(256, 8, 0.0).Device(device).GenerateDMatrix();\n    SetLabels(dmat.get(), 1);\n    cases.emplace_back(dmat, BaseParams(ctx, \"binary:logistic\", \"4\"));\n  }\n\n  return cases;\n}\n\nvoid CheckShapOutput(DMatrix* dmat, Args const& model_args) {\n  size_t const kRows = dmat->Info().num_row_;\n  size_t const kCols = dmat->Info().num_col_;\n\n  std::shared_ptr<DMatrix> p_dmat{dmat, [](DMatrix*) {}};\n  std::unique_ptr<Learner> learner{Learner::Create({p_dmat})};\n  learner->SetParams(model_args);\n  learner->Configure();\n  for (size_t i = 0; i < 2; ++i) {\n    learner->UpdateOneIter(i, p_dmat);\n  }\n\n  HostDeviceVector<float> margin_predt;\n  learner->Predict(p_dmat, true, &margin_predt, 0, 0, false, false, false, false, false);\n  size_t const n_outputs = margin_predt.HostVector().size() / kRows;\n\n  LearnerModelParam mparam;\n  auto gbtree = LoadGBTreeModel(learner.get(), dmat->Ctx(), model_args, &mparam);\n\n  HostDeviceVector<float> shap_values;\n  interpretability::ShapValues(dmat->Ctx(), p_dmat.get(), &shap_values, *gbtree, 0, nullptr, 0, 0);\n  ASSERT_EQ(shap_values.HostVector().size(), kRows * (kCols + 1) * n_outputs);\n  CheckShapAdditivity(kRows, kCols, shap_values, margin_predt);\n\n  HostDeviceVector<float> shap_interactions;\n  interpretability::ShapInteractionValues(dmat->Ctx(), p_dmat.get(), &shap_interactions, *gbtree, 0,\n                                          {}, false);\n  ASSERT_EQ(shap_interactions.HostVector().size(), kRows * (kCols + 1) * (kCols + 1) * n_outputs);\n  CheckShapAdditivity(kRows, kCols, shap_interactions, margin_predt);\n}\n\nvoid CheckDartShapOutput(Context const* ctx) {\n  size_t constexpr kRows = 64, kCols = 8;\n  auto dmat = RandomDataGenerator(kRows, kCols, 0.0).Device(ctx->Device()).GenerateDMatrix();\n  SetLabels(dmat.get(), 1);\n\n  std::unique_ptr<Learner> learner{Learner::Create({dmat})};\n  learner->SetParams(Args{{\"booster\", \"dart\"},\n                          {\"objective\", \"binary:logistic\"},\n                          {\"max_depth\", \"3\"},\n                          {\"rate_drop\", \"0.5\"},\n                          {\"sample_type\", \"uniform\"},\n                          {\"normalize_type\", \"tree\"},\n                          {\"device\", ctx->IsSycl() ? \"cpu\" : ctx->DeviceName()}});\n  learner->Configure();\n  for (size_t i = 0; i < 4; ++i) {\n    learner->UpdateOneIter(i, dmat);\n  }\n\n  HostDeviceVector<float> margin_predt;\n  learner->Predict(dmat, true, &margin_predt, 0, 0, false, false, false, false, false);\n\n  HostDeviceVector<float> shap_values;\n  learner->Predict(dmat, false, &shap_values, 0, 0, false, false, true, false, false);\n  ASSERT_EQ(shap_values.Size(), kRows * (kCols + 1));\n  CheckShapAdditivity(kRows, kCols, shap_values, margin_predt);\n\n  HostDeviceVector<float> shap_interactions;\n  learner->Predict(dmat, false, &shap_interactions, 0, 0, false, false, false, false, true);\n  ASSERT_EQ(shap_interactions.Size(), kRows * (kCols + 1) * (kCols + 1));\n  CheckShapAdditivity(kRows, kCols, shap_interactions, margin_predt);\n}\n\nvoid CheckShapAdditivity(size_t rows, size_t cols, HostDeviceVector<float> const& shap_values,\n                         HostDeviceVector<float> const& margin_predt) {\n  auto const& h_shap = shap_values.ConstHostVector();\n  auto const& h_margin = margin_predt.ConstHostVector();\n\n  ASSERT_EQ(h_margin.size() % rows, 0);\n  size_t const n_outputs = h_margin.size() / rows;\n  size_t const kShapSize = rows * (cols + 1) * n_outputs;\n  size_t const kInteractionSize = rows * (cols + 1) * (cols + 1) * n_outputs;\n  bool const is_interaction = h_shap.size() == kInteractionSize;\n  ASSERT_TRUE(h_shap.size() == kShapSize || is_interaction);\n\n  for (size_t row = 0; row < rows; ++row) {\n    for (size_t out = 0; out < n_outputs; ++out) {\n      float sum = 0.0f;\n      if (is_interaction) {\n        size_t const base = (row * n_outputs + out) * (cols + 1) * (cols + 1);\n        for (size_t idx = 0; idx < (cols + 1) * (cols + 1); ++idx) {\n          sum += h_shap[base + idx];\n        }\n      } else {\n        size_t const base = (row * n_outputs + out) * (cols + 1);\n        for (size_t c = 0; c < cols + 1; ++c) {\n          sum += h_shap[base + c];\n        }\n      }\n      EXPECT_NEAR(sum, h_margin[row * n_outputs + out], 1e-5f);\n    }\n  }\n}\n\nTEST(Predictor, ShapOutputCasesCPU) {\n  Context ctx;\n  auto cases = BuildShapTestCases(&ctx);\n  for (auto const& [dmat, args] : cases) {\n    CheckShapOutput(dmat.get(), args);\n  }\n}\n\nTEST(Predictor, DartShapOutputCPU) {\n  Context ctx;\n  CheckDartShapOutput(&ctx);\n}\n\nTEST(Predictor, ApproxContribsBasic) {\n  Context ctx;\n  size_t constexpr kRows = 64;\n  size_t constexpr kCols = 6;\n\n  auto dmat = RandomDataGenerator(kRows, kCols, 0.0).Device(ctx.Device()).GenerateDMatrix();\n  SetLabels(dmat.get(), 1);\n\n  auto args = BaseParams(&ctx, \"reg:squarederror\", \"3\");\n  args.emplace_back(\"tree_method\", \"approx\");\n\n  std::unique_ptr<Learner> learner{Learner::Create({dmat})};\n  learner->SetParams(args);\n  learner->Configure();\n  for (size_t i = 0; i < 3; ++i) {\n    learner->UpdateOneIter(i, dmat);\n  }\n\n  HostDeviceVector<float> margin_predt;\n  learner->Predict(dmat, true, &margin_predt, 0, 0, false, false, false, false, false);\n\n  LearnerModelParam mparam;\n  auto gbtree = LoadGBTreeModel(learner.get(), dmat->Ctx(), args, &mparam);\n\n  HostDeviceVector<float> approx_contribs;\n  interpretability::ApproxFeatureImportance(dmat->Ctx(), dmat.get(), &approx_contribs, *gbtree, 0,\n                                            {});\n\n  auto const& h_margin = margin_predt.ConstHostVector();\n  auto const& h_contribs = approx_contribs.ConstHostVector();\n  ASSERT_EQ(h_margin.size(), kRows);\n  ASSERT_EQ(h_contribs.size(), kRows * (kCols + 1));\n\n  for (size_t row = 0; row < kRows; ++row) {\n    float sum = 0.0f;\n    size_t base = row * (kCols + 1);\n    for (size_t c = 0; c < kCols + 1; ++c) {\n      sum += h_contribs[base + c];\n    }\n    EXPECT_NEAR(sum, h_margin[row], 1e-2f);\n  }\n}\n\nTEST(Predictor, ShapIterationRange) {\n  Context ctx;\n  size_t constexpr kRows = 1000;\n  size_t constexpr kCols = 20;\n  size_t constexpr kClasses = 4;\n  size_t constexpr kForest = 3;\n  size_t constexpr kIters = 10;\n\n  auto dmat = RandomDataGenerator(kRows, kCols, 0)\n                  .Device(ctx.Device())\n                  .Classes(kClasses)\n                  .GenerateDMatrix(true);\n  std::unique_ptr<Learner> learner{Learner::Create({dmat})};\n  learner->SetParams(Args{{\"num_parallel_tree\", std::to_string(kForest)},\n                          {\"device\", ctx.IsSycl() ? \"cpu\" : ctx.DeviceName()}});\n  for (size_t i = 0; i < kIters; ++i) {\n    learner->UpdateOneIter(i, dmat);\n  }\n\n  bool bound = false;\n  bst_layer_t lend{3};\n  std::unique_ptr<Learner> sliced{learner->Slice(0, lend, 1, &bound)};\n  ASSERT_FALSE(bound);\n\n  HostDeviceVector<float> out_predt_sliced;\n  HostDeviceVector<float> out_predt_ranged;\n\n  // SHAP\n  {\n    sliced->Predict(dmat, false, &out_predt_sliced, 0, 0, false, false, true, false, false);\n    learner->Predict(dmat, false, &out_predt_ranged, 0, lend, false, false, true, false, false);\n\n    auto const& h_sliced = out_predt_sliced.HostVector();\n    auto const& h_range = out_predt_ranged.HostVector();\n    ASSERT_EQ(h_sliced.size(), h_range.size());\n    ASSERT_EQ(h_sliced, h_range);\n  }\n\n  // SHAP interaction\n  {\n    sliced->Predict(dmat, false, &out_predt_sliced, 0, 0, false, false, false, false, true);\n    learner->Predict(dmat, false, &out_predt_ranged, 0, lend, false, false, false, false, true);\n    auto const& h_sliced = out_predt_sliced.HostVector();\n    auto const& h_range = out_predt_ranged.HostVector();\n    ASSERT_EQ(h_sliced.size(), h_range.size());\n    ASSERT_EQ(h_sliced, h_range);\n  }\n}\n\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/predictor/test_shap.cu",
    "content": "/**\n * Copyright 2017-2026, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/json.h>\n#include <xgboost/learner.h>\n\n#include <memory>\n\n#include \"../helpers.h\"\n#include \"test_predictor.h\"\n#include \"test_shap.h\"\n\nnamespace xgboost {\n\nTEST(GPUPredictor, CompareCPUShap) {\n  auto ctx = MakeCUDACtx(0);\n  Context cpu_ctx;\n  bst_feature_t constexpr kCols{10};\n  bst_idx_t constexpr kRows{1000};\n  std::size_t constexpr kIters{10};\n\n  HostDeviceVector<float> predictions;\n  HostDeviceVector<float> cpu_predictions;\n  HostDeviceVector<float> interactions;\n  HostDeviceVector<float> cpu_interactions;\n\n  auto dmat = RandomDataGenerator(kRows, kCols, 0.0).Device(ctx.Device()).GenerateDMatrix();\n  dmat->Info().labels.Reshape(kRows, 1);\n  auto& h_labels = dmat->Info().labels.Data()->HostVector();\n  for (size_t i = 0; i < kRows; ++i) {\n    h_labels[i] = i % 2;\n  }\n\n  std::unique_ptr<Learner> learner{Learner::Create({dmat})};\n  learner->SetParams(Args{{\"objective\", \"binary:logistic\"},\n                          {\"max_depth\", \"12\"},\n                          {\"min_split_loss\", \"0\"},\n                          {\"min_child_weight\", \"0\"},\n                          {\"reg_lambda\", \"0\"},\n                          {\"reg_alpha\", \"0\"},\n                          {\"subsample\", \"1\"},\n                          {\"colsample_bytree\", \"1\"},\n                          {\"device\", ctx.DeviceName()}});\n  learner->Configure();\n  for (std::size_t i = 0; i < kIters; ++i) {\n    learner->UpdateOneIter(i, dmat);\n  }\n\n  Json model{Object{}};\n  learner->SaveModel(&model);\n\n  std::unique_ptr<Learner> learner_gpu{Learner::Create({})};\n  learner_gpu->LoadModel(model);\n  learner_gpu->SetParam(\"device\", ctx.DeviceName());\n  learner_gpu->Configure();\n\n  std::unique_ptr<Learner> learner_cpu{Learner::Create({})};\n  learner_cpu->LoadModel(model);\n  learner_cpu->SetParam(\"device\", cpu_ctx.DeviceName());\n  learner_cpu->Configure();\n\n  learner_gpu->Predict(dmat, false, &predictions, 0, 0, false, false, true, false, false);\n  learner_cpu->Predict(dmat, false, &cpu_predictions, 0, 0, false, false, true, false, false);\n  learner_gpu->Predict(dmat, false, &interactions, 0, 0, false, false, false, false, true);\n  learner_cpu->Predict(dmat, false, &cpu_interactions, 0, 0, false, false, false, false, true);\n  auto& phis = predictions.HostVector();\n  auto& cpu_phis = cpu_predictions.HostVector();\n  for (auto i = 0ull; i < phis.size(); i++) {\n    EXPECT_NEAR(cpu_phis[i], phis[i], 1e-4);\n  }\n\n  auto& inter = interactions.HostVector();\n  auto& cpu_inter = cpu_interactions.HostVector();\n  for (auto i = 0ull; i < inter.size(); i++) {\n    EXPECT_NEAR(cpu_inter[i], inter[i], 1e-3);\n  }\n}\n\nTEST(GPUPredictor, ShapOutputCasesGPU) {\n  auto ctx = MakeCUDACtx(0);\n  auto cases = BuildShapTestCases(&ctx);\n  for (auto const& [dmat, args] : cases) {\n    CheckShapOutput(dmat.get(), args);\n  }\n}\n\nTEST(GPUPredictor, DartShapOutputGPU) {\n  auto ctx = MakeCUDACtx(0);\n  CheckDartShapOutput(&ctx);\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/predictor/test_shap.h",
    "content": "/**\n * Copyright 2020-2026, XGBoost Contributors\n */\n#ifndef TESTS_CPP_PREDICTOR_TEST_SHAP_H_\n#define TESTS_CPP_PREDICTOR_TEST_SHAP_H_\n\n#include <xgboost/base.h>\n#include <xgboost/context.h>\n#include <xgboost/host_device_vector.h>\n\n#include <memory>\n#include <utility>\n#include <vector>\n\nnamespace xgboost {\nclass DMatrix;\nclass Learner;\n}  // namespace xgboost\n\nnamespace xgboost {\nvoid CheckShapOutput(DMatrix* dmat, Args const& model_args);\nvoid CheckDartShapOutput(Context const* ctx);\nvoid CheckShapAdditivity(size_t rows, size_t cols, HostDeviceVector<float> const& shap_values,\n                         HostDeviceVector<float> const& margin_predt);\n\nusing ShapTestCase = std::pair<std::shared_ptr<DMatrix>, Args>;\nstd::vector<ShapTestCase> BuildShapTestCases(Context const* ctx);\n\n}  // namespace xgboost\n\n#endif  // TESTS_CPP_PREDICTOR_TEST_SHAP_H_\n"
  },
  {
    "path": "tests/cpp/test_cache.cc",
    "content": "/**\n * Copyright 2023 by XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/cache.h>\n#include <xgboost/data.h>  // for DMatrix\n\n#include <cstddef>         // for size_t\n#include <cstdint>         // for uint32_t\n#include <thread>          // for thread\n\n#include \"helpers.h\"       // for RandomDataGenerator\n\nnamespace xgboost {\nnamespace {\nstruct CacheForTest {\n  std::size_t const i;\n\n  explicit CacheForTest(std::size_t k) : i{k} {}\n};\n}  // namespace\n\nTEST(DMatrixCache, Basic) {\n  std::size_t constexpr kRows = 2, kCols = 1, kCacheSize = 4;\n  DMatrixCache<CacheForTest> cache{kCacheSize};\n\n  auto add_cache = [&]() {\n    // Create a lambda function here, so that p_fmat gets deleted upon the\n    // end of the lambda. This is to test how the cache handle expired\n    // cache entries.\n    auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();\n    cache.CacheItem(p_fmat, 3);\n    DMatrix* m = p_fmat.get();\n    return m;\n  };\n  auto m = add_cache();\n  ASSERT_EQ(cache.Container().size(), 0);\n  ASSERT_THROW(cache.Entry(m), dmlc::Error);\n\n  auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();\n\n  auto item = cache.CacheItem(p_fmat, 1);\n  ASSERT_EQ(cache.Entry(p_fmat.get())->i, 1);\n\n  std::vector<std::shared_ptr<DMatrix>> items;\n  for (std::size_t i = 0; i < kCacheSize * 2; ++i) {\n    items.emplace_back(RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix());\n    cache.CacheItem(items.back(), i);\n    ASSERT_EQ(cache.Entry(items.back().get())->i, i);\n    ASSERT_LE(cache.Container().size(), kCacheSize);\n    if (i > kCacheSize) {\n      auto k = i - kCacheSize - 1;\n      ASSERT_THROW(cache.Entry(items[k].get()), dmlc::Error);\n    }\n  }\n}\n\nTEST(DMatrixCache, MultiThread) {\n  std::size_t constexpr kRows = 2, kCols = 1, kCacheSize = 3;\n  auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();\n\n#if defined(__linux__)\n  auto const n = std::thread::hardware_concurrency() * 128;\n#else\n  auto const n = std::thread::hardware_concurrency();\n#endif\n  CHECK_NE(n, 0);\n  std::vector<std::shared_ptr<CacheForTest>> results(n);\n\n  {\n    DMatrixCache<CacheForTest> cache{kCacheSize};\n    std::vector<std::thread> tasks;\n    for (std::uint32_t tidx = 0; tidx < n; ++tidx) {\n      tasks.emplace_back([&, i = tidx]() {\n        cache.CacheItem(p_fmat, i);\n\n        auto p_fmat_local = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();\n        results[i] = cache.CacheItem(p_fmat_local, i);\n      });\n    }\n    for (auto& t : tasks) {\n      t.join();\n    }\n    for (std::uint32_t tidx = 0; tidx < n; ++tidx) {\n      ASSERT_EQ(results[tidx]->i, tidx);\n    }\n\n    tasks.clear();\n\n    for (std::int32_t tidx = static_cast<std::int32_t>(n - 1); tidx >= 0; --tidx) {\n      tasks.emplace_back([&, i = tidx]() {\n        cache.CacheItem(p_fmat, i);\n\n        auto p_fmat_local = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();\n        results[i] = cache.CacheItem(p_fmat_local, i);\n      });\n    }\n    for (auto& t : tasks) {\n      t.join();\n    }\n    for (std::uint32_t tidx = 0; tidx < n; ++tidx) {\n      ASSERT_EQ(results[tidx]->i, tidx);\n    }\n  }\n\n  {\n    DMatrixCache<CacheForTest> cache{n};\n    std::vector<std::thread> tasks;\n    for (std::uint32_t tidx = 0; tidx < n; ++tidx) {\n      tasks.emplace_back([&, tidx]() { results[tidx] = cache.CacheItem(p_fmat, tidx); });\n    }\n    for (auto& t : tasks) {\n      t.join();\n    }\n    for (std::uint32_t tidx = 0; tidx < n; ++tidx) {\n      ASSERT_EQ(results[tidx]->i, tidx);\n    }\n  }\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/test_context.cc",
    "content": "/**\n * Copyright 2023, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/base.h>\n#include <xgboost/context.h>\n\n#include <sstream>\n\nnamespace xgboost {\nTEST(Context, CPU) {\n  Context ctx;\n  ASSERT_EQ(ctx.Device(), DeviceOrd::CPU());\n  ASSERT_EQ(ctx.Ordinal(), DeviceOrd::CPUOrdinal());\n\n  std::int32_t flag{0};\n  ctx.DispatchDevice([&] { flag = -1; }, [&] { flag = 1; });\n  ASSERT_EQ(flag, -1);\n\n  ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{\"device\", \"oops\"}}), dmlc::Error);\n  ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{\"device\", \"-1\"}}), dmlc::Error);\n  ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{\"device\", \"CPU\"}}), dmlc::Error);\n  ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{\"device\", \"CUDA\"}}), dmlc::Error);\n  ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{\"device\", \"CPU:0\"}}), dmlc::Error);\n  ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{\"device\", \"gpu:+0\"}}), dmlc::Error);\n  ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{\"device\", \"gpu:0-\"}}), dmlc::Error);\n  ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{\"device\", \"gpu:\"}}), dmlc::Error);\n  ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{\"device\", \":\"}}), dmlc::Error);\n  ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{\"device\", \":gpu\"}}), dmlc::Error);\n  ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{\"device\", \":0\"}}), dmlc::Error);\n  ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{\"device\", \"\"}}), dmlc::Error);\n\n  std::stringstream ss;\n  ss << ctx.Device();\n  ASSERT_EQ(ss.str(), \"cpu\");\n}\n\nTEST(Context, ErrorInit) {\n  Context ctx;\n  ASSERT_THROW({ ctx.Init({{\"foo\", \"bar\"}}); }, dmlc::Error);\n  try {\n    ctx.Init({{\"foo\", \"bar\"}});\n  } catch (dmlc::Error const& e) {\n    auto msg = std::string{e.what()};\n    ASSERT_NE(msg.find(\"foo\"), std::string::npos);\n  }\n}\n\nTEST(Context, SYCL) {\n  Context ctx;\n  // Default SYCL device\n  {\n    ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl\"}});\n    ASSERT_EQ(ctx.Device(), DeviceOrd::SyclDefault());\n    ASSERT_EQ(ctx.Ordinal(), -1);\n\n    std::int32_t flag{0};\n    ctx.DispatchDevice([&] { flag = -1; }, [&] { flag = 1; }, [&] { flag = 2; });\n    ASSERT_EQ(flag, 2);\n\n    std::stringstream ss;\n    ss << ctx.Device();\n    ASSERT_EQ(ss.str(), \"sycl:-1\");\n  }\n\n  // SYCL device with idx\n  {\n    ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl:42\"}});\n    ASSERT_EQ(ctx.Device(), DeviceOrd::SyclDefault(42));\n    ASSERT_EQ(ctx.Ordinal(), 42);\n\n    std::int32_t flag{0};\n    ctx.DispatchDevice([&] { flag = -1; }, [&] { flag = 1; }, [&] { flag = 2; });\n    ASSERT_EQ(flag, 2);\n\n    std::stringstream ss;\n    ss << ctx.Device();\n    ASSERT_EQ(ss.str(), \"sycl:42\");\n  }\n\n  // SYCL cpu\n  {\n    ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl:cpu\"}});\n    ASSERT_EQ(ctx.Device(), DeviceOrd::SyclCPU());\n    ASSERT_EQ(ctx.Ordinal(), -1);\n\n    std::int32_t flag{0};\n    ctx.DispatchDevice([&] { flag = -1; }, [&] { flag = 1; }, [&] { flag = 2; });\n    ASSERT_EQ(flag, 2);\n\n    std::stringstream ss;\n    ss << ctx.Device();\n    ASSERT_EQ(ss.str(), \"sycl:cpu:-1\");\n  }\n\n  // SYCL cpu with idx\n  {\n    ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl:cpu:42\"}});\n    ASSERT_EQ(ctx.Device(), DeviceOrd::SyclCPU(42));\n    ASSERT_EQ(ctx.Ordinal(), 42);\n\n    std::int32_t flag{0};\n    ctx.DispatchDevice([&] { flag = -1; }, [&] { flag = 1; }, [&] { flag = 2; });\n    ASSERT_EQ(flag, 2);\n\n    std::stringstream ss;\n    ss << ctx.Device();\n    ASSERT_EQ(ss.str(), \"sycl:cpu:42\");\n  }\n\n  // SYCL gpu\n  {\n    ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl:gpu\"}});\n    ASSERT_EQ(ctx.Device(), DeviceOrd::SyclGPU());\n    ASSERT_EQ(ctx.Ordinal(), -1);\n\n    std::int32_t flag{0};\n    ctx.DispatchDevice([&] { flag = -1; }, [&] { flag = 1; }, [&] { flag = 2; });\n    ASSERT_EQ(flag, 2);\n\n    std::stringstream ss;\n    ss << ctx.Device();\n    ASSERT_EQ(ss.str(), \"sycl:gpu:-1\");\n  }\n\n  // SYCL gpu with idx\n  {\n    ctx.UpdateAllowUnknown(Args{{\"device\", \"sycl:gpu:42\"}});\n    ASSERT_EQ(ctx.Device(), DeviceOrd::SyclGPU(42));\n    ASSERT_EQ(ctx.Ordinal(), 42);\n\n    std::int32_t flag{0};\n    ctx.DispatchDevice([&] { flag = -1; }, [&] { flag = 1; }, [&] { flag = 2; });\n    ASSERT_EQ(flag, 2);\n\n    std::stringstream ss;\n    ss << ctx.Device();\n    ASSERT_EQ(ss.str(), \"sycl:gpu:42\");\n  }\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/test_context.cu",
    "content": "/**\n * Copyright 2023-2025, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <thread>         // for thread\n#include <xgboost/base.h>  // for Args\n#include <xgboost/context.h>\n#include <xgboost/json.h>  // for FromJson, ToJson\n\n#include <string>  // for string, to_string\n\n#include \"../../src/common/cuda_rt_utils.h\"  // for AllVisibleGPUs\n\nnamespace xgboost {\nnamespace {\nvoid TestCUDA(Context const& ctx, bst_d_ordinal_t ord) {\n  ASSERT_EQ(ctx.Device().ordinal, ord);\n  ASSERT_EQ(ctx.DeviceName(), \"cuda:\" + std::to_string(ord));\n  ASSERT_EQ(ctx.Ordinal(), ord);\n  ASSERT_TRUE(ctx.IsCUDA());\n  ASSERT_FALSE(ctx.IsCPU());\n  ASSERT_EQ(ctx.Device(), DeviceOrd::CUDA(ord));\n\n  Json jctx{ToJson(ctx)};\n  Context new_ctx;\n  FromJson(jctx, &new_ctx);\n  ASSERT_EQ(new_ctx.Device(), ctx.Device());\n  ASSERT_EQ(new_ctx.Ordinal(), ctx.Ordinal());\n}\n}  // namespace\n\nTEST(Context, MGPUDeviceOrdinal) {\n  Context ctx;\n  auto n_vis = curt::AllVisibleGPUs();\n  auto ord = n_vis - 1;\n\n  std::string device = \"cuda:\" + std::to_string(ord);\n  ctx.UpdateAllowUnknown(Args{{\"device\", device}});\n  TestCUDA(ctx, ord);\n\n  device = \"cuda:\" + std::to_string(1001);\n  ctx.UpdateAllowUnknown(Args{{\"device\", device}});\n  ord = 1001 % n_vis;\n\n  TestCUDA(ctx, ord);\n\n  std::int32_t flag{0};\n  ctx.DispatchDevice([&] { flag = -1; }, [&] { flag = 1; });\n  ASSERT_EQ(flag, 1);\n\n  Context new_ctx = ctx;\n  TestCUDA(new_ctx, ctx.Ordinal());\n\n  auto cpu_ctx = ctx.MakeCPU();\n  ASSERT_TRUE(cpu_ctx.IsCPU());\n  ASSERT_EQ(cpu_ctx.Ordinal(), DeviceOrd::CPUOrdinal());\n  ASSERT_EQ(cpu_ctx.Device(), DeviceOrd::CPU());\n\n  auto cuda_ctx = cpu_ctx.MakeCUDA(ctx.Ordinal());\n  TestCUDA(cuda_ctx, ctx.Ordinal());\n\n  cuda_ctx.UpdateAllowUnknown(Args{{\"fail_on_invalid_gpu_id\", \"true\"}});\n  ASSERT_THROW({ cuda_ctx.UpdateAllowUnknown(Args{{\"device\", \"cuda:9999\"}}); }, dmlc::Error);\n  cuda_ctx.UpdateAllowUnknown(Args{{\"device\", \"cuda:00\"}});\n  ASSERT_EQ(cuda_ctx.Ordinal(), 0);\n\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"cpu\"}});\n  // Test alias\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"gpu:0\"}});\n  TestCUDA(ctx, 0);\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"gpu\"}});\n  TestCUDA(ctx, 0);\n\n  // Test the thread local memory in dmlc is not linking different instances together.\n  cpu_ctx.UpdateAllowUnknown(Args{{\"device\", \"cpu\"}});\n  TestCUDA(ctx, 0);\n  ctx.UpdateAllowUnknown(Args{});\n  TestCUDA(ctx, 0);\n}\n\nTEST(Context, MGPUId) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"cuda\"}});\n  TestCUDA(ctx, 0);\n\n  auto n_vis = curt::AllVisibleGPUs();\n  // Use threads to avoid changing the global variable in tests.\n  auto t0 = std::thread{[n_vis] {\n    Context ctx;\n    auto ord = n_vis - 1;\n    ctx.UpdateAllowUnknown(Args{{\"device\", \"cuda:\" + std::to_string(ord)}});\n    TestCUDA(ctx, ord);\n  }};\n  auto t1 = std::thread{[n_vis] {\n    Context ctx;\n    auto device = \"cuda:\" + std::to_string(1001);\n    ctx.UpdateAllowUnknown(Args{{\"device\", device}});\n    auto ord = 1001 % n_vis;\n    TestCUDA(ctx, ord);\n  }};\n  t0.join();\n  t1.join();\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/test_global_config.cc",
    "content": "/**\n * Copyright 2020-2025, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/c_api.h>\n#include <xgboost/global_config.h>\n#include <xgboost/json.h>\n#include <xgboost/logging.h>\n\nnamespace xgboost {\nTEST(GlobalConfiguration, Verbosity) {\n  // Configure verbosity via global configuration\n  Json config{JsonObject()};\n  config[\"verbosity\"] = String(\"0\");\n  auto& global_config = *GlobalConfigThreadLocalStore::Get();\n  FromJson(config, &global_config);\n  // Now verbosity should be updated\n  EXPECT_EQ(ConsoleLogger::GlobalVerbosity(), ConsoleLogger::LogVerbosity::kSilent);\n  EXPECT_NE(ConsoleLogger::LogVerbosity::kSilent, ConsoleLogger::DefaultVerbosity());\n  // GetConfig() should also return updated verbosity\n  Json current_config{ToJson(*GlobalConfigThreadLocalStore::Get())};\n  EXPECT_EQ(get<String>(current_config[\"verbosity\"]), \"0\");\n}\n\nTEST(GlobalConfiguration, UseRMM) {\n  Json config{JsonObject()};\n  config[\"use_rmm\"] = String(\"true\");\n  auto& global_config = *GlobalConfigThreadLocalStore::Get();\n  FromJson(config, &global_config);\n  // GetConfig() should return updated use_rmm flag\n  Json current_config{ToJson(*GlobalConfigThreadLocalStore::Get())};\n  EXPECT_EQ(get<String>(current_config[\"use_rmm\"]), \"1\");\n}\n\nTEST(GlobalConfiguration, Threads) {\n  char const* config;\n  ASSERT_EQ(XGBGetGlobalConfig(&config), 0);\n  auto jconfig = Json::Load(config);\n  auto nthread = get<Integer const>(jconfig[\"nthread\"]);\n  ASSERT_LE(nthread, 0);\n  auto n_omp = omp_get_num_threads();\n  ASSERT_EQ(XGBSetGlobalConfig(config), 0);\n  ASSERT_EQ(n_omp, omp_get_num_threads());\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/test_helpers.cc",
    "content": "/**\n * Copyright 2020-2025, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <algorithm>\n\n#include \"helpers.h\"\n#include \"filesystem.h\"  // for TemporaryDirectory\n#include \"../../src/data/array_interface.h\"\nnamespace xgboost {\n\nTEST(RandomDataGenerator, DMatrix) {\n  size_t constexpr kRows { 16 }, kCols { 32 };\n  float constexpr kSparsity { 0.4f };\n  auto p_dmatrix = RandomDataGenerator{kRows, kCols, kSparsity}.GenerateDMatrix();\n\n  HostDeviceVector<float> csr_value;\n  HostDeviceVector<std::size_t> csr_rptr;\n  HostDeviceVector<bst_feature_t> csr_cidx;\n  RandomDataGenerator{kRows, kCols, kSparsity}.GenerateCSR(&csr_value, &csr_rptr, &csr_cidx);\n\n  HostDeviceVector<float> dense_data;\n  RandomDataGenerator{kRows, kCols, kSparsity}.GenerateDense(&dense_data);\n\n  auto it = std::copy_if(\n      dense_data.HostVector().begin(), dense_data.HostVector().end(),\n      dense_data.HostVector().begin(), [](float v) { return !std::isnan(v); });\n\n  CHECK_EQ(p_dmatrix->Info().num_row_, kRows);\n  CHECK_EQ(p_dmatrix->Info().num_col_, kCols);\n\n  for (auto const& page : p_dmatrix->GetBatches<SparsePage>()) {\n    size_t n_elements = page.data.Size();\n    CHECK_EQ(n_elements, it - dense_data.HostVector().begin());\n    CHECK_EQ(n_elements, csr_value.Size());\n\n    for (size_t i = 0; i < n_elements; ++i) {\n      CHECK_EQ(dense_data.HostVector()[i], csr_value.HostVector()[i]);\n      CHECK_EQ(dense_data.HostVector()[i], page.data.HostVector()[i].fvalue);\n      CHECK_EQ(page.data.HostVector()[i].index, csr_cidx.HostVector()[i]);\n    }\n    CHECK_EQ(page.offset.Size(), csr_rptr.Size());\n    for (size_t i = 0; i < p_dmatrix->Info().num_row_; ++i) {\n      CHECK_EQ(page.offset.HostVector()[i], csr_rptr.HostVector()[i]);\n    }\n  }\n}\n\nTEST(RandomDataGenerator, GenerateArrayInterfaceBatch) {\n  size_t constexpr kRows { 937 }, kCols { 100 }, kBatches { 13 };\n  float constexpr kSparsity { 0.4f };\n\n  HostDeviceVector<float> storage;\n  std::string array;\n  std::vector<std::string> batches;\n  std::tie(batches, array) =\n      RandomDataGenerator{kRows, kCols, kSparsity}.GenerateArrayInterfaceBatch(\n          &storage, kBatches);\n  CHECK_EQ(batches.size(), kBatches);\n\n  size_t rows = 0;\n  for (auto const &interface_str : batches) {\n    Json j_interface =\n        Json::Load({interface_str.c_str(), interface_str.size()});\n    ArrayInterfaceHandler::Validate(get<Object const>(j_interface));\n    CHECK_EQ(get<Integer>(j_interface[\"shape\"][1]), kCols);\n    rows += get<Integer>(j_interface[\"shape\"][0]);\n  }\n  CHECK_EQ(rows, kRows);\n  auto j_array = Json::Load({array.c_str(), array.size()});\n  CHECK_EQ(get<Integer>(j_array[\"shape\"][0]), kRows);\n  CHECK_EQ(get<Integer>(j_array[\"shape\"][1]), kCols);\n}\n\nTEST(RandomDataGenerator, SparseDMatrix) {\n  bst_idx_t constexpr kCols{100}, kBatches{13};\n  bst_idx_t n_samples{kBatches * 128};\n  common::TemporaryDirectory tmpdir;\n  auto prefix = tmpdir.Str() + \"/cache\";\n  auto p_ext_fmat =\n      RandomDataGenerator{n_samples, kCols, 0.0}.Batches(kBatches).GenerateSparsePageDMatrix(prefix,\n                                                                                             true);\n\n  auto p_fmat = RandomDataGenerator{n_samples, kCols, 0.0}.GenerateDMatrix(true);\n\n  SparsePage concat;\n  std::int32_t n_batches{0};\n  for (auto const& page : p_ext_fmat->GetBatches<SparsePage>()) {\n    concat.Push(page);\n    ++n_batches;\n  }\n  ASSERT_EQ(n_batches, kBatches);\n  ASSERT_EQ(concat.Size(), n_samples);\n\n  for (auto const& page : p_fmat->GetBatches<SparsePage>()) {\n    ASSERT_EQ(page.data.ConstHostVector(), concat.data.ConstHostVector());\n    ASSERT_EQ(page.offset.ConstHostVector(), concat.offset.ConstHostVector());\n  }\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/test_histogram_helpers.cu",
    "content": "#include <gtest/gtest.h>\n#include <xgboost/context.h>\n\n#include \"histogram_helpers.h\"\n\nnamespace xgboost {\nnamespace {\n// Count occurrences of each bin for each feature\ntemplate <typename Accessor>\nauto CountBins(Accessor const& accessor, bst_bin_t n_bins_per_feat) {\n  auto n_samples = accessor.NumRows();\n  auto n_features = accessor.NumFeatures();\n  std::vector<std::vector<bst_idx_t>> bin_counts(n_features,\n                                                 std::vector<bst_idx_t>(n_bins_per_feat, 0));\n\n  // Count occurrences of each bin for each feature\n  for (bst_idx_t row = 0; row < n_samples; ++row) {\n    for (bst_feature_t feat = 0; feat < n_features; ++feat) {\n      bst_idx_t idx = row * accessor.row_stride + feat;\n      bst_bin_t bin = accessor.gidx_iter[idx];\n\n      // The bin values are already local to each feature\n      EXPECT_GE(bin, 0);\n      EXPECT_LT(bin, n_bins_per_feat);\n      bin_counts[feat][bin]++;\n    }\n  }\n  return bin_counts;\n}\n}  // namespace\n\nTEST(HistogramHelpers, MakeEllpack) {\n  auto ctx = MakeCUDACtx(0);\n\n  bst_idx_t n_samples = 100;\n  bst_feature_t n_features = 5;\n  bst_bin_t n_bins_per_feat = 10;\n\n  auto ellpack = MakeEllpackForTest(&ctx, n_samples, n_features, n_bins_per_feat);\n\n  ASSERT_NE(ellpack, nullptr);\n  EXPECT_EQ(ellpack->Size(), n_samples);\n  EXPECT_EQ(ellpack->Cuts().NumFeatures(), n_features);\n\n  // Test histogram cuts structure\n  const auto& cuts = ellpack->Cuts();\n  EXPECT_EQ(cuts.NumFeatures(), n_features);\n  EXPECT_EQ(cuts.TotalBins(), n_features * n_bins_per_feat);\n\n  // Verify cut pointers are correct\n  const auto& cut_ptrs = cuts.Ptrs();\n  EXPECT_EQ(cut_ptrs.size(), n_features + 1);\n  for (bst_feature_t f = 0; f < n_features; ++f) {\n    EXPECT_EQ(cut_ptrs[f + 1] - cut_ptrs[f], n_bins_per_feat);\n  }\n\n  EXPECT_TRUE(ellpack->IsDense());\n\n  std::vector<common::CompressedByteT> h_gidx_buffer;\n  auto accessor_var = ellpack->GetHostEllpack(&ctx, &h_gidx_buffer);\n  std::visit(\n      [&](auto&& accessor) {\n        EXPECT_EQ(accessor.row_stride, n_features);\n        EXPECT_EQ(accessor.n_rows, n_samples);\n\n        auto bin_counts = CountBins(accessor, n_bins_per_feat);\n        // Validate histogram index distribution\n        auto n_values_per_bin = n_samples / n_bins_per_feat;\n        auto remainder = n_samples % n_bins_per_feat;\n\n        // Verify expected distribution\n        for (bst_feature_t feat = 0; feat < n_features; ++feat) {\n          for (bst_bin_t bin = 0; bin < n_bins_per_feat; ++bin) {\n            bst_idx_t expected_count = n_values_per_bin;\n            if (bin == n_bins_per_feat - 1) {\n              expected_count += remainder;  // Last bin gets the remainder\n            }\n            EXPECT_EQ(bin_counts[feat][bin], expected_count);\n          }\n        }\n      },\n      accessor_var);\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/test_learner.cc",
    "content": "/**\n * Copyright 2017-2026, XGBoost contributors\n */\n#include <gmock/gmock.h>\n#include <gtest/gtest.h>\n#include <xgboost/learner.h>         // for Learner\n#include <xgboost/logging.h>         // for LogCheck_NE, CHECK_NE, LogCheck_EQ\n#include <xgboost/objective.h>       // for ObjFunction\n#include <xgboost/version_config.h>  // for XGBOOST_VER_MAJOR, XGBOOST_VER_MINOR\n\n#include <algorithm>    // for equal, transform\n#include <cstddef>      // for size_t\n#include <iosfwd>       // for ofstream\n#include <limits>       // for numeric_limits\n#include <map>          // for map\n#include <memory>       // for unique_ptr, shared_ptr, __shared_ptr_...\n#include <random>       // for uniform_real_distribution\n#include <string>       // for allocator, basic_string, string, oper...\n#include <thread>       // for thread\n#include <type_traits>  // for is_integral\n#include <utility>      // for pair\n#include <vector>       // for vector\n\n#include \"../../src/collective/communicator-inl.h\"  // for GetRank, GetWorldSize\n#include \"../../src/common/api_entry.h\"             // for XGBAPIThreadLocalEntry\n#include \"../../src/common/io.h\"                    // for LoadSequentialFile\n#include \"../../src/common/linalg_op.h\"             // for ElementWiseTransformHost, begin, end\n#include \"./collective/test_worker.h\"               // for TestDistributedGlobal\n#include \"dmlc/omp.h\"                               // for omp_get_max_threads\n#include \"filesystem.h\"                             // for TemporaryDirectory\n#include \"helpers.h\"                                // for GetBaseScore, RandomDataGenerator\n#include \"objective_helpers.h\"                      // for MakeObjNamesForTest, ObjTestNameGenerator\n#include \"test_serialization.h\"                     // for CompareJsonModels\n#include \"xgboost/base.h\"                           // for bst_float, Args, bst_feature_t, bst_int\n#include \"xgboost/context.h\"                        // for Context, DeviceOrd\n#include \"xgboost/data.h\"                           // for DMatrix, MetaInfo, DataType\n#include \"xgboost/host_device_vector.h\"             // for HostDeviceVector\n#include \"xgboost/json.h\"                           // for Json, Object, get, String, IsA, opera...\n#include \"xgboost/linalg.h\"                         // for Tensor, TensorView\n#include \"xgboost/logging.h\"                        // for ConsoleLogger\n#include \"xgboost/predictor.h\"                      // for PredictionCacheEntry\n#include \"xgboost/string_view.h\"                    // for StringView\n\nnamespace xgboost {\nTEST(Learner, Basic) {\n  using Arg = std::pair<std::string, std::string>;\n  auto args = {Arg(\"tree_method\", \"exact\")};\n  auto mat_ptr = RandomDataGenerator{10, 10, 0.0f}.GenerateDMatrix();\n  auto learner = std::unique_ptr<Learner>(Learner::Create({mat_ptr}));\n  learner->SetParams(args);\n\n  auto major = XGBOOST_VER_MAJOR;\n  auto minor = XGBOOST_VER_MINOR;\n  auto patch = XGBOOST_VER_PATCH;\n\n  static_assert(std::is_integral_v<decltype(major)>, \"Wrong major version type\");\n  static_assert(std::is_integral_v<decltype(minor)>, \"Wrong minor version type\");\n  static_assert(std::is_integral_v<decltype(patch)>, \"Wrong patch version type\");\n}\n\nTEST(Learner, ParameterValidation) {\n  ConsoleLogger::Configure({{\"verbosity\", \"2\"}});\n  size_t constexpr kRows = 1;\n  size_t constexpr kCols = 1;\n  auto p_mat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();\n\n  auto learner = std::unique_ptr<Learner>(Learner::Create({p_mat}));\n  learner->SetParam(\"validate_parameters\", \"1\");\n  learner->SetParam(\"Knock-Knock\", \"Who's-there?\");\n  learner->SetParam(\"Silence\", \"....\");\n  learner->SetParam(\"tree_method\", \"exact\");\n\n  testing::internal::CaptureStderr();\n  learner->Configure();\n  std::string output = testing::internal::GetCapturedStderr();\n\n  ASSERT_TRUE(output.find(R\"(Parameters: { \"Knock-Knock\", \"Silence\" })\") != std::string::npos);\n\n  // whitespace\n  learner->SetParam(\"tree method\", \"exact\");\n  ASSERT_THAT([&] { learner->Configure(); }, GMockThrow(R\"(\"tree method\" contains whitespace)\"));\n}\n\nTEST(Learner, DeprecatedGblinearBooster) {\n  auto p_mat = RandomDataGenerator{8, 4, 0.0f}.GenerateDMatrix();\n\n  std::unique_ptr<Learner> learner{Learner::Create({p_mat})};\n  learner->SetParam(\"booster\", \"gblinear\");\n  learner->SetParam(\"verbosity\", \"2\");\n\n  testing::internal::CaptureStderr();\n  learner->Configure();\n  auto output = testing::internal::GetCapturedStderr();\n\n  ASSERT_NE(output.find(\"`booster=gblinear` is deprecated\"), std::string::npos);\n}\n\nTEST(Learner, CheckGroup) {\n  using Arg = std::pair<std::string, std::string>;\n  size_t constexpr kNumGroups = 4;\n  size_t constexpr kNumRows = 17;\n  bst_feature_t constexpr kNumCols = 15;\n\n  std::shared_ptr<DMatrix> p_mat{RandomDataGenerator{kNumRows, kNumCols, 0.0f}.GenerateDMatrix()};\n  std::vector<bst_float> weight(kNumGroups, 1);\n  std::vector<bst_group_t> group(kNumGroups);\n  group[0] = 2;\n  group[1] = 3;\n  group[2] = 7;\n  group[3] = 5;\n  std::vector<bst_float> labels(kNumRows);\n  for (size_t i = 0; i < kNumRows; ++i) {\n    labels[i] = i % 2;\n  }\n\n  p_mat->SetInfo(\"weight\", Make1dInterfaceTest(weight.data(), kNumGroups));\n  p_mat->SetInfo(\"group\", Make1dInterfaceTest(group.data(), kNumGroups));\n  p_mat->SetInfo(\"label\", Make1dInterfaceTest(labels.data(), kNumRows));\n\n  std::vector<std::shared_ptr<xgboost::DMatrix>> mat = {p_mat};\n  auto learner = std::unique_ptr<Learner>(Learner::Create(mat));\n  learner->SetParams({Arg{\"objective\", \"rank:pairwise\"}});\n  EXPECT_NO_THROW(learner->UpdateOneIter(0, p_mat));\n\n  group.resize(kNumGroups + 1);\n  group[3] = 4;\n  group[4] = 1;\n  p_mat->SetInfo(\"group\", Make1dInterfaceTest(group.data(), kNumGroups + 1));\n  EXPECT_ANY_THROW(learner->UpdateOneIter(0, p_mat));\n}\n\nTEST(Learner, CheckMultiBatch) {\n  auto p_fmat =\n      RandomDataGenerator{512, 128, 0.8}.Batches(4).GenerateSparsePageDMatrix(\"temp\", true);\n  ASSERT_FALSE(p_fmat->SingleColBlock());\n\n  std::vector<std::shared_ptr<DMatrix>> mat{p_fmat};\n  auto learner = std::unique_ptr<Learner>(Learner::Create(mat));\n  learner->SetParams(Args{{\"objective\", \"binary:logistic\"}});\n  learner->UpdateOneIter(0, p_fmat);\n}\n\nTEST(Learner, Configuration) {\n  std::string const emetric = \"eval_metric\";\n  {\n    std::unique_ptr<Learner> learner{Learner::Create({nullptr})};\n    learner->SetParam(emetric, \"auc\");\n    learner->SetParam(emetric, \"rmsle\");\n    learner->SetParam(\"foo\", \"bar\");\n\n    // eval_metric is not part of configuration\n    auto attr_names = learner->GetConfigurationArguments();\n    ASSERT_EQ(attr_names.size(), 1ul);\n    ASSERT_EQ(attr_names.find(emetric), attr_names.cend());\n    ASSERT_EQ(attr_names.at(\"foo\"), \"bar\");\n  }\n\n  {\n    std::unique_ptr<Learner> learner{Learner::Create({nullptr})};\n    learner->SetParams({{\"foo\", \"bar\"}, {emetric, \"auc\"}, {emetric, \"entropy\"}, {emetric, \"KL\"}});\n    auto attr_names = learner->GetConfigurationArguments();\n    ASSERT_EQ(attr_names.size(), 1ul);\n    ASSERT_EQ(attr_names.at(\"foo\"), \"bar\");\n  }\n}\n\nTEST(Learner, JsonModelIO) {\n  // Test of comparing JSON object directly.\n  size_t constexpr kRows = 8;\n  int32_t constexpr kIters = 4;\n\n  std::shared_ptr<DMatrix> p_dmat{RandomDataGenerator{kRows, 10, 0}.GenerateDMatrix()};\n  p_dmat->Info().labels.Reshape(kRows);\n  CHECK_NE(p_dmat->Info().num_col_, 0);\n\n  {\n    std::unique_ptr<Learner> learner{Learner::Create({p_dmat})};\n    learner->Configure();\n    Json out{Object()};\n    learner->SaveModel(&out);\n\n    common::TemporaryDirectory tmpdir;\n\n    std::ofstream fout(tmpdir.Path() / \"model.json\");\n    fout << out;\n    fout.close();\n\n    auto loaded_str = common::LoadSequentialFile(tmpdir.Str() + \"/model.json\");\n    Json loaded = Json::Load(StringView{loaded_str.data(), loaded_str.size()});\n\n    learner->LoadModel(loaded);\n    learner->Configure();\n\n    Json new_in{Object()};\n    learner->SaveModel(&new_in);\n    ASSERT_EQ(new_in, out);\n  }\n\n  {\n    std::unique_ptr<Learner> learner{Learner::Create({p_dmat})};\n    for (int32_t iter = 0; iter < kIters; ++iter) {\n      learner->UpdateOneIter(iter, p_dmat);\n    }\n    learner->SetAttr(\"best_score\", \"15.2\");\n\n    Json out{Object()};\n    learner->SaveModel(&out);\n\n    learner->LoadModel(out);\n    Json new_in{Object()};\n    learner->Configure();\n    learner->SaveModel(&new_in);\n\n    ASSERT_TRUE(IsA<Object>(out[\"learner\"][\"attributes\"]));\n    ASSERT_EQ(get<Object>(out[\"learner\"][\"attributes\"]).size(), 1ul);\n    ASSERT_EQ(out, new_in);\n  }\n}\n\nTEST(Learner, ConfigIO) {\n  bst_idx_t n_samples = 128;\n  bst_feature_t n_features = 12;\n  std::shared_ptr<DMatrix> p_fmat{\n      RandomDataGenerator{n_samples, n_features, 0}.Classes(2).GenerateDMatrix(true)};\n\n  auto serialised_model_tmp = std::string{};\n  std::string eval_res_0;\n  std::string eval_res_1;\n  {\n    std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};\n    learner->SetParams(Args{{\"eval_metric\", \"ndcg\"}, {\"eval_metric\", \"map\"}});\n    learner->Configure();\n    learner->UpdateOneIter(0, p_fmat);\n    eval_res_0 = learner->EvalOneIter(0, {p_fmat}, {\"Train\"});\n    common::MemoryBufferStream fo(&serialised_model_tmp);\n    learner->Save(&fo);\n  }\n\n  {\n    common::MemoryBufferStream fi(&serialised_model_tmp);\n    std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};\n    learner->Load(&fi);\n    eval_res_1 = learner->EvalOneIter(0, {p_fmat}, {\"Train\"});\n  }\n  ASSERT_EQ(eval_res_0, eval_res_1);\n}\n\n// Crashes the test runner if there are race condiditions.\n//\n// Build with additional cmake flags to enable thread sanitizer\n// which definitely catches problems. Note that OpenMP needs to be\n// disabled, otherwise thread sanitizer will also report false\n// positives.\n//\n// ```\n// -DUSE_SANITIZER=ON -DENABLED_SANITIZERS=thread -DUSE_OPENMP=OFF\n// ```\nTEST(Learner, MultiThreadedPredict) {\n  size_t constexpr kRows = 1000;\n  size_t constexpr kCols = 100;\n\n  std::shared_ptr<DMatrix> p_dmat{RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix()};\n  p_dmat->Info().labels.Reshape(kRows);\n  CHECK_NE(p_dmat->Info().num_col_, 0);\n\n  std::shared_ptr<DMatrix> p_data{RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix()};\n  CHECK_NE(p_data->Info().num_col_, 0);\n\n  std::shared_ptr<Learner> learner{Learner::Create({p_dmat})};\n  learner->Configure();\n\n  std::vector<std::thread> threads;\n\n#if defined(__linux__)\n  auto n_threads = std::thread::hardware_concurrency() * 4u;\n#else\n  auto n_threads = std::thread::hardware_concurrency();\n#endif\n\n  for (decltype(n_threads) thread_id = 0; thread_id < n_threads; ++thread_id) {\n    threads.emplace_back([learner, p_data] {\n      size_t constexpr kIters = 10;\n      auto& entry = learner->GetThreadLocal().prediction_entry;\n      HostDeviceVector<float> predictions;\n      for (size_t iter = 0; iter < kIters; ++iter) {\n        learner->Predict(p_data, false, &entry.predictions, 0, 0);\n\n        learner->Predict(p_data, false, &predictions, 0, 0, false, true);         // leaf\n        learner->Predict(p_data, false, &predictions, 0, 0, false, false, true);  // contribs\n      }\n    });\n  }\n  for (auto& thread : threads) {\n    thread.join();\n  }\n}\n\n#if defined(XGBOOST_USE_CUDA)\n// Tests for automatic GPU configuration.\nTEST(Learner, GPUConfiguration) {\n  using Arg = std::pair<std::string, std::string>;\n  size_t constexpr kRows = 10;\n  auto p_dmat = RandomDataGenerator(kRows, 10, 0).GenerateDMatrix();\n  std::vector<std::shared_ptr<DMatrix>> mat{p_dmat};\n  std::vector<bst_float> labels(kRows);\n  for (size_t i = 0; i < labels.size(); ++i) {\n    labels[i] = i;\n  }\n  p_dmat->Info().labels.Data()->HostVector() = labels;\n  p_dmat->Info().labels.Reshape(kRows);\n  {\n    std::unique_ptr<Learner> learner{Learner::Create(mat)};\n    learner->SetParams(\n        {Arg{\"booster\", \"gblinear\"}, Arg{\"updater\", \"coord_descent\"}, Arg{\"device\", \"cuda\"}});\n    learner->UpdateOneIter(0, p_dmat);\n    ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));\n  }\n  {\n    std::unique_ptr<Learner> learner{Learner::Create(mat)};\n    learner->SetParams({Arg{\"tree_method\", \"hist\"}, {\"device\", \"cuda\"}});\n    learner->Configure();\n    ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));\n    learner->UpdateOneIter(0, p_dmat);\n    ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));\n  }\n  {\n    std::unique_ptr<Learner> learner{Learner::Create(mat)};\n    learner->SetParams({Arg{\"tree_method\", \"hist\"}, Arg{\"device\", \"cuda\"}});\n    learner->UpdateOneIter(0, p_dmat);\n    ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));\n  }\n  {\n    // with CPU algorithm\n    std::unique_ptr<Learner> learner{Learner::Create(mat)};\n    learner->SetParams({Arg{\"tree_method\", \"hist\"}});\n    learner->UpdateOneIter(0, p_dmat);\n    ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CPU());\n  }\n}\n#endif  // defined(XGBOOST_USE_CUDA)\n\nTEST(Learner, Seed) {\n  auto m = RandomDataGenerator{10, 10, 0}.GenerateDMatrix();\n  std::unique_ptr<Learner> learner{Learner::Create({m})};\n  auto seed = std::numeric_limits<int64_t>::max();\n  learner->SetParam(\"seed\", std::to_string(seed));\n  learner->Configure();\n  Json config{Object()};\n  learner->SaveConfig(&config);\n  ASSERT_EQ(std::to_string(seed), get<String>(config[\"learner\"][\"generic_param\"][\"seed\"]));\n\n  seed = std::numeric_limits<int64_t>::min();\n  learner->SetParam(\"seed\", std::to_string(seed));\n  learner->Configure();\n  learner->SaveConfig(&config);\n  ASSERT_EQ(std::to_string(seed), get<String>(config[\"learner\"][\"generic_param\"][\"seed\"]));\n}\n\nTEST(Learner, ConstantSeed) {\n  auto m = RandomDataGenerator{10, 10, 0}.GenerateDMatrix(true);\n  std::unique_ptr<Learner> learner{Learner::Create({m})};\n  // Use exact as it doesn't initialize column sampler at construction, which alters the rng.\n  learner->SetParam(\"tree_method\", \"exact\");\n  learner->Configure();\n\n  std::uniform_real_distribution<float> dist;\n  auto& rng = learner->Ctx()->Rng();\n  float v_0 = dist(rng);\n\n  learner->SetParam(\"\", \"\");\n  learner->Configure();  // check configure doesn't change the seed.\n  float v_1 = dist(rng);\n  CHECK_NE(v_0, v_1);\n\n  {\n    rng.seed(Context::kDefaultSeed);\n    std::uniform_real_distribution<float> dist;\n    float v_2 = dist(rng);\n    CHECK_EQ(v_0, v_2);\n  }\n}\n\nTEST(Learner, FeatureInfo) {\n  size_t constexpr kCols = 10;\n  auto m = RandomDataGenerator{10, kCols, 0}.GenerateDMatrix(true);\n  std::vector<std::string> names(kCols);\n  for (size_t i = 0; i < kCols; ++i) {\n    names[i] = (\"f\" + std::to_string(i));\n  }\n\n  std::vector<std::string> types(kCols);\n  for (size_t i = 0; i < kCols; ++i) {\n    types[i] = \"q\";\n  }\n  types[8] = \"f\";\n  types[0] = \"int\";\n  types[3] = \"i\";\n  types[7] = \"i\";\n\n  std::vector<char const*> c_names(kCols);\n  for (size_t i = 0; i < names.size(); ++i) {\n    c_names[i] = names[i].c_str();\n  }\n  std::vector<char const*> c_types(kCols);\n  for (size_t i = 0; i < types.size(); ++i) {\n    c_types[i] = names[i].c_str();\n  }\n\n  std::vector<std::string> out_names;\n  std::vector<std::string> out_types;\n\n  Json model{Object()};\n  {\n    std::unique_ptr<Learner> learner{Learner::Create({m})};\n    learner->Configure();\n    learner->SetFeatureNames(names);\n    learner->GetFeatureNames(&out_names);\n\n    learner->SetFeatureTypes(types);\n    learner->GetFeatureTypes(&out_types);\n\n    ASSERT_TRUE(std::equal(out_names.begin(), out_names.end(), names.begin()));\n    ASSERT_TRUE(std::equal(out_types.begin(), out_types.end(), types.begin()));\n\n    learner->SaveModel(&model);\n  }\n\n  {\n    std::unique_ptr<Learner> learner{Learner::Create({m})};\n    learner->LoadModel(model);\n\n    learner->GetFeatureNames(&out_names);\n    learner->GetFeatureTypes(&out_types);\n    ASSERT_TRUE(std::equal(out_names.begin(), out_names.end(), names.begin()));\n    ASSERT_TRUE(std::equal(out_types.begin(), out_types.end(), types.begin()));\n  }\n}\n\nTEST(Learner, MultiTarget) {\n  size_t constexpr kRows{128}, kCols{10}, kTargets{3};\n  auto m = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();\n  m->Info().labels.Reshape(kRows, kTargets);\n  linalg::cpu_impl::TransformIdxKernel(m->Info().labels.HostView(), omp_get_max_threads(),\n                                       [](auto i, auto) { return i; });\n\n  {\n    std::unique_ptr<Learner> learner{Learner::Create({m})};\n    learner->Configure();\n\n    Json model{Object()};\n    learner->SaveModel(&model);\n    ASSERT_EQ(get<String>(model[\"learner\"][\"learner_model_param\"][\"num_target\"]),\n              std::to_string(kTargets));\n  }\n  {\n    std::unique_ptr<Learner> learner{Learner::Create({m})};\n    learner->SetParam(\"objective\", \"multi:softprob\");\n    // unsupported objective.\n    EXPECT_THROW({ learner->Configure(); }, dmlc::Error);\n  }\n}\n\n/**\n * Test the model initialization sequence is correctly performed.\n */\nclass InitBaseScore : public ::testing::Test {\n protected:\n  std::size_t static constexpr Cols() { return 10; }\n  std::shared_ptr<DMatrix> Xy_;\n\n  void SetUp() override { Xy_ = RandomDataGenerator{10, Cols(), 0}.GenerateDMatrix(true); }\n\n public:\n  void TestUpdateConfig() {\n    std::unique_ptr<Learner> learner{Learner::Create({Xy_})};\n    learner->SetParam(\"objective\", \"reg:absoluteerror\");\n    learner->UpdateOneIter(0, Xy_);\n    Json config{Object{}};\n    learner->SaveConfig(&config);\n    auto base_score = GetBaseScore(config);\n    ASSERT_EQ(base_score.size(), 1);\n    ASSERT_NE(base_score[0], ObjFunction::DefaultBaseScore());\n\n    // already initialized\n    auto Xy1 = RandomDataGenerator{100, Cols(), 0}.Seed(321).GenerateDMatrix(true);\n    learner->UpdateOneIter(1, Xy1);\n    learner->SaveConfig(&config);\n    auto base_score1 = GetBaseScore(config);\n    ASSERT_EQ(base_score, base_score1);\n\n    Json model{Object{}};\n    learner->SaveModel(&model);\n    learner.reset(Learner::Create({}));\n    learner->LoadModel(model);\n    learner->Configure();\n    learner->UpdateOneIter(2, Xy1);\n    learner->SaveConfig(&config);\n    auto base_score2 = GetBaseScore(config);\n    ASSERT_EQ(base_score, base_score2);\n  }\n\n  void TestBoostFromAvgParam() {\n    std::unique_ptr<Learner> learner{Learner::Create({Xy_})};\n    learner->SetParam(\"objective\", \"reg:absoluteerror\");\n    learner->SetParam(\"base_score\", \"1.3\");\n    Json config(Object{});\n    learner->Configure();\n    learner->SaveConfig(&config);\n\n    auto base_score = GetBaseScore(config);\n    ASSERT_EQ(base_score.size(), 1);\n    // no change\n    ASSERT_FLOAT_EQ(base_score[0], 1.3);\n\n    HostDeviceVector<float> predt;\n    learner->Predict(Xy_, false, &predt, 0, 0);\n    auto h_predt = predt.ConstHostSpan();\n    for (auto v : h_predt) {\n      ASSERT_FLOAT_EQ(v, 1.3);\n    }\n    learner->UpdateOneIter(0, Xy_);\n    learner->SaveConfig(&config);\n    base_score = GetBaseScore(config);\n    ASSERT_EQ(base_score.size(), 1);\n    // no change\n    ASSERT_FLOAT_EQ(base_score[0], 1.3);\n\n    auto from_avg = std::stoi(\n        get<String const>(config[\"learner\"][\"learner_model_param\"][\"boost_from_average\"]));\n    // from_avg is disabled when base score is set\n    ASSERT_EQ(from_avg, 0);\n    // in the future when we can deprecate the binary model, user can set the parameter directly.\n    learner->SetParam(\"boost_from_average\", \"1\");\n    learner->Configure();\n    learner->SaveConfig(&config);\n    from_avg = std::stoi(\n        get<String const>(config[\"learner\"][\"learner_model_param\"][\"boost_from_average\"]));\n    ASSERT_EQ(from_avg, 1);\n  }\n\n  void TestInitAfterLoad() {\n    std::unique_ptr<Learner> learner{Learner::Create({Xy_})};\n    learner->SetParam(\"objective\", \"reg:absoluteerror\");\n    learner->Configure();\n\n    Json model{Object{}};\n    learner->SaveModel(&model);\n    auto base_score = GetBaseScore(model);\n    ASSERT_EQ(base_score.size(), 1);\n    ASSERT_FALSE(std::isnan(base_score[0]));\n    ASSERT_EQ(base_score[0], ObjFunction::DefaultBaseScore());\n\n    learner.reset(Learner::Create({Xy_}));\n    learner->LoadModel(model);\n    Json config(Object{});\n    learner->Configure();\n    learner->SaveConfig(&config);\n    base_score = GetBaseScore(config);\n    ASSERT_EQ(base_score[0], ObjFunction::DefaultBaseScore());\n\n    learner->UpdateOneIter(0, Xy_);\n    learner->SaveConfig(&config);\n    base_score = GetBaseScore(config);\n    ASSERT_EQ(base_score.size(), 1);\n    ASSERT_FALSE(std::isnan(base_score[0]));\n    ASSERT_NE(base_score[0], ObjFunction::DefaultBaseScore());\n  }\n\n  void TestInitWithPredt() {\n    std::unique_ptr<Learner> learner{Learner::Create({Xy_})};\n    learner->SetParam(\"objective\", \"reg:absoluteerror\");\n    HostDeviceVector<float> predt;\n    learner->Predict(Xy_, false, &predt, 0, 0);\n\n    auto h_predt = predt.ConstHostSpan();\n    for (auto v : h_predt) {\n      ASSERT_EQ(v, ObjFunction::DefaultBaseScore());\n    }\n\n    Json config(Object{});\n    learner->SaveConfig(&config);\n    auto base_score = GetBaseScore(config);\n    ASSERT_EQ(base_score.size(), 1);\n    ASSERT_EQ(base_score[0], ObjFunction::DefaultBaseScore());\n\n    // since prediction is not used for trianing, the train procedure still runs estimation\n    learner->UpdateOneIter(0, Xy_);\n    learner->SaveConfig(&config);\n    base_score = GetBaseScore(config);\n    ASSERT_EQ(base_score.size(), 1);\n    ASSERT_FALSE(std::isnan(base_score[0]));\n    ASSERT_NE(base_score[0], ObjFunction::DefaultBaseScore());\n  }\n\n  void TestUpdateProcess() {\n    // Check that when training continuation is performed with update, the base score is\n    // not re-evaluated.\n    std::unique_ptr<Learner> learner{Learner::Create({Xy_})};\n    learner->SetParam(\"objective\", \"reg:absoluteerror\");\n    learner->Configure();\n\n    learner->UpdateOneIter(0, Xy_);\n    Json model{Object{}};\n    learner->SaveModel(&model);\n    auto base_score = GetBaseScore(model);\n    ASSERT_EQ(base_score.size(), 1);\n    ASSERT_FALSE(std::isnan(base_score[0]));\n\n    auto Xy1 = RandomDataGenerator{100, Cols(), 0}.Seed(321).GenerateDMatrix(true);\n    learner.reset(Learner::Create({Xy1}));\n    learner->LoadModel(model);\n    learner->SetParam(\"process_type\", \"update\");\n    learner->SetParam(\"updater\", \"refresh\");\n    learner->UpdateOneIter(1, Xy1);\n\n    Json config(Object{});\n    learner->SaveConfig(&config);\n    auto base_score1 = GetBaseScore(config);\n    ASSERT_EQ(base_score1.size(), 1);\n    ASSERT_FALSE(std::isnan(base_score1[0]));\n    ASSERT_EQ(base_score, base_score1);\n  }\n};\n\nTEST_F(InitBaseScore, TestUpdateConfig) { this->TestUpdateConfig(); }\n\nTEST_F(InitBaseScore, FromAvgParam) { this->TestBoostFromAvgParam(); }\n\nTEST_F(InitBaseScore, InitAfterLoad) { this->TestInitAfterLoad(); }\n\nTEST_F(InitBaseScore, InitWithPredict) { this->TestInitWithPredt(); }\n\nTEST_F(InitBaseScore, UpdateProcess) { this->TestUpdateProcess(); }\n\nclass TestColumnSplit : public ::testing::TestWithParam<std::string> {\n  void TestBaseScore(std::string objective, std::vector<float> const& expected_base_score,\n                     Json expected_model) {\n    auto const world_size = collective::GetWorldSize();\n    auto n_threads = collective::GetWorkerLocalThreads(world_size);\n    auto const rank = collective::GetRank();\n\n    std::shared_ptr<DMatrix> p_fmat = MakeFmatForObjTest(objective, 10, 10, 3);\n    std::shared_ptr<DMatrix> sliced{p_fmat->SliceCol(world_size, rank)};\n    std::unique_ptr<Learner> learner{Learner::Create({sliced})};\n    learner->SetParams(Args{{\"nthread\", std::to_string(n_threads)},\n                            {\"tree_method\", \"approx\"},\n                            {\"objective\", objective}});\n    if (objective.find(\"quantile\") != std::string::npos) {\n      learner->SetParam(\"quantile_alpha\", \"0.5\");\n    }\n    if (objective.find(\"expectile\") != std::string::npos) {\n      learner->SetParam(\"expectile_alpha\", \"0.5\");\n    }\n    if (objective.find(\"multi\") != std::string::npos) {\n      learner->SetParam(\"num_class\", \"3\");\n    }\n    learner->UpdateOneIter(0, sliced);\n    Json config{Object{}};\n    learner->SaveConfig(&config);\n    auto base_score = GetBaseScore(config);\n    for (size_t idx = 0; idx < base_score.size(); ++idx) {\n      ASSERT_NEAR(base_score[idx], expected_base_score[idx], 1e-6);\n    }\n\n    Json model{Object{}};\n    learner->SaveModel(&model);\n    CompareJsonModels(model, expected_model);\n  }\n\n public:\n  void Run(std::string objective) {\n    std::shared_ptr<DMatrix> p_fmat = MakeFmatForObjTest(objective, 10, 10, 3);\n    std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};\n    learner->SetParam(\"tree_method\", \"approx\");\n    learner->SetParam(\"objective\", objective);\n    if (objective.find(\"quantile\") != std::string::npos) {\n      learner->SetParam(\"quantile_alpha\", \"0.5\");\n    }\n    if (objective.find(\"expectile\") != std::string::npos) {\n      learner->SetParam(\"expectile_alpha\", \"0.5\");\n    }\n    if (objective.find(\"multi\") != std::string::npos) {\n      learner->SetParam(\"num_class\", \"3\");\n    }\n    learner->UpdateOneIter(0, p_fmat);\n\n    Json config{Object{}};\n    learner->SaveConfig(&config);\n\n    Json model{Object{}};\n    learner->SaveModel(&model);\n\n    auto constexpr kWorldSize{3};\n    auto call = [this, &objective](auto&... args) {\n      this->TestBaseScore(objective, args...);\n    };\n    auto score = GetBaseScore(config);\n    collective::TestDistributedGlobal(kWorldSize, [&] { call(score, model); });\n  }\n};\n\nTEST_P(TestColumnSplit, Objective) {\n  std::string objective = GetParam();\n  this->Run(objective);\n}\n\nINSTANTIATE_TEST_SUITE_P(ColumnSplitObjective, TestColumnSplit,\n                         ::testing::ValuesIn(MakeObjNamesForTest()),\n                         [](const ::testing::TestParamInfo<TestColumnSplit::ParamType>& info) {\n                           return ObjTestNameGenerator(info);\n                         });\n\nnamespace {\nJson GetModelWithArgs(std::shared_ptr<DMatrix> dmat, std::string const& tree_method,\n                      std::string const& device, Args const& args) {\n  std::unique_ptr<Learner> learner{Learner::Create({dmat})};\n  auto n_threads = collective::GetWorkerLocalThreads(collective::GetWorldSize());\n  learner->SetParam(\"tree_method\", tree_method);\n  learner->SetParam(\"device\", device);\n  learner->SetParam(\"nthread\", std::to_string(n_threads));\n  learner->SetParam(\"objective\", \"reg:logistic\");\n  learner->SetParams(args);\n  learner->UpdateOneIter(0, dmat);\n  Json model{Object{}};\n  learner->SaveModel(&model);\n  return model;\n}\n\nvoid VerifyColumnSplitWithArgs(std::string const& tree_method, bool use_gpu, Args const& args,\n                               Json const& expected_model) {\n  auto const world_size = collective::GetWorldSize();\n  auto const rank = collective::GetRank();\n  auto p_fmat = MakeFmatForObjTest(\"\", 10, 10, 0);\n  std::shared_ptr<DMatrix> sliced{p_fmat->SliceCol(world_size, rank)};\n  std::string device = \"cpu\";\n  if (use_gpu) {\n    device = MakeCUDACtx(DistGpuIdx()).DeviceName();\n  }\n  auto model = GetModelWithArgs(sliced, tree_method, device, args);\n  ASSERT_EQ(model, expected_model);\n}\n\nvoid TestColumnSplitWithArgs(std::string const& tree_method, bool use_gpu, Args const& args,\n                             bool federated) {\n  auto p_fmat = MakeFmatForObjTest(\"\", 10, 10, 0);\n  std::string device = use_gpu ? \"cuda:0\" : \"cpu\";\n  auto model = GetModelWithArgs(p_fmat, tree_method, device, args);\n\n  auto world_size{3};\n  if (use_gpu) {\n    world_size = curt::AllVisibleGPUs();\n    // Simulate MPU on a single GPU. Federated doesn't use nccl, can run multiple\n    // instances on the same GPU.\n    if (world_size == 1 && federated) {\n      world_size = 3;\n    }\n  }\n  if (federated) {\n#if defined(XGBOOST_USE_FEDERATED)\n    collective::TestFederatedGlobal(\n        world_size, [&] { VerifyColumnSplitWithArgs(tree_method, use_gpu, args, model); });\n#else\n    GTEST_SKIP_(\"Not compiled with federated learning.\");\n#endif  //  defined(XGBOOST_USE_FEDERATED)\n  } else {\n#if !defined(XGBOOST_USE_NCCL)\n    if (use_gpu) {\n      GTEST_SKIP_(\"Not compiled with NCCL.\");\n      return;\n    }\n#endif  //  defined(XGBOOST_USE_NCCL)\n    collective::TestDistributedGlobal(\n        world_size, [&] { VerifyColumnSplitWithArgs(tree_method, use_gpu, args, model); });\n  }\n}\n\nclass ColumnSplitTrainingTest\n    : public ::testing::TestWithParam<std::tuple<std::string, bool, bool>> {\n public:\n  static void TestColumnSplitColumnSampler(std::string const& tree_method, bool use_gpu,\n                                           bool federated) {\n    Args args{\n        {\"colsample_bytree\", \"0.5\"}, {\"colsample_bylevel\", \"0.6\"}, {\"colsample_bynode\", \"0.7\"}};\n    TestColumnSplitWithArgs(tree_method, use_gpu, args, federated);\n  }\n  static void TestColumnSplitInteractionConstraints(std::string const& tree_method, bool use_gpu,\n                                                    bool federated) {\n    Args args{{\"interaction_constraints\", \"[[0, 5, 7], [2, 8, 9], [1, 3, 6]]\"}};\n    TestColumnSplitWithArgs(tree_method, use_gpu, args, federated);\n  }\n  static void TestColumnSplitMonotoneConstraints(std::string const& tree_method, bool use_gpu,\n                                                 bool federated) {\n    Args args{{\"monotone_constraints\", \"(1,-1,0,1,1,-1,-1,0,0,1)\"}};\n    TestColumnSplitWithArgs(tree_method, use_gpu, args, federated);\n  }\n};\n\nauto WithFed() {\n#if defined(XGBOOST_USE_FEDERATED)\n  return ::testing::Bool();\n#else\n  return ::testing::Values(false);\n#endif\n}\n}  // anonymous namespace\n\nTEST_P(ColumnSplitTrainingTest, ColumnSampler) {\n  std::apply(TestColumnSplitColumnSampler, GetParam());\n}\n\nTEST_P(ColumnSplitTrainingTest, InteractionConstraints) {\n  std::apply(TestColumnSplitInteractionConstraints, GetParam());\n}\n\nTEST_P(ColumnSplitTrainingTest, MonotoneConstraints) {\n  std::apply(TestColumnSplitMonotoneConstraints, GetParam());\n}\n\nINSTANTIATE_TEST_SUITE_P(Cpu, ColumnSplitTrainingTest,\n                         ::testing::Combine(::testing::Values(\"hist\", \"approx\"),\n                                            ::testing::Values(false), WithFed()));\n\nINSTANTIATE_TEST_SUITE_P(MGPU, ColumnSplitTrainingTest,\n                         ::testing::Combine(::testing::Values(\"hist\", \"approx\"),\n                                            ::testing::Values(true), WithFed()));\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/test_learner.cu",
    "content": "/**\n * Copyright 2024, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/context.h>        // for DeviceSym\n#include <xgboost/global_config.h>  // for GlobalConfigThreadLocalStore\n#include <xgboost/learner.h>\n\n#include <cstdint>  // for int32_t\n#include <memory>   // for unique_ptr\n\n#include \"../../src/common/device_vector.cuh\"  // for GlobalMemoryLogger\n#include \"helpers.h\"                           // for RandomDataGenerator\n\nnamespace xgboost {\nTEST(Learner, Reset) {\n  dh::GlobalMemoryLogger().Clear();\n\n  auto verbosity = GlobalConfigThreadLocalStore::Get()->verbosity;\n  ConsoleLogger::Configure({{\"verbosity\", \"3\"}});\n  auto p_fmat = RandomDataGenerator{1024, 32, 0.0}.GenerateDMatrix(true);\n  std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};\n  learner->SetParam(\"device\", DeviceSym::CUDA());\n  learner->Configure();\n  for (std::int32_t i = 0; i < 2; ++i) {\n    learner->UpdateOneIter(i, p_fmat);\n  }\n\n  auto cur = dh::GlobalMemoryLogger().CurrentlyAllocatedBytes();\n  p_fmat.reset();\n  auto after_p_fmat_reset = dh::GlobalMemoryLogger().CurrentlyAllocatedBytes();\n  ASSERT_LT(after_p_fmat_reset, cur);\n  learner->Reset();\n  auto after_learner_reset = dh::GlobalMemoryLogger().CurrentlyAllocatedBytes();\n  ASSERT_LT(after_learner_reset, after_p_fmat_reset);\n  ASSERT_LE(after_learner_reset, 64);\n  ConsoleLogger::Configure({{\"verbosity\", std::to_string(verbosity)}});\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/test_logging.cc",
    "content": "#include <map>\n\n#include <gtest/gtest.h>\n#include <xgboost/logging.h>\n\nnamespace xgboost {\n\nTEST(Logging, Basic) {\n  std::map<std::string, std::string> args {};\n  std::string output;\n\n  args[\"verbosity\"] = \"0\";  // silent\n  ConsoleLogger::Configure({args.cbegin(), args.cend()});\n  testing::internal::CaptureStderr();\n  LOG(DEBUG) << \"Test silent.\";\n  output = testing::internal::GetCapturedStderr();\n  ASSERT_EQ(output.length(), 0);\n\n  args[\"verbosity\"] = \"3\";  // debug\n  ConsoleLogger::Configure({args.cbegin(), args.cend()});\n\n  testing::internal::CaptureStderr();\n  LOG(WARNING) << \"Test Log Warning.\";\n  output = testing::internal::GetCapturedStderr();\n  ASSERT_NE(output.find(\"WARNING\"), std::string::npos);\n\n  testing::internal::CaptureStderr();\n  LOG(INFO) << \"Test Log Info.\";\n  output = testing::internal::GetCapturedStderr();\n  ASSERT_NE(output.find(\"Test Log Info\"), std::string::npos);\n\n  testing::internal::CaptureStderr();\n  LOG(DEBUG) << \"Test Log Debug.\";\n  output = testing::internal::GetCapturedStderr();\n  ASSERT_NE(output.find(\"DEBUG\"), std::string::npos);\n\n  args[\"verbosity\"] = \"1\";  // warning\n  ConsoleLogger::Configure({args.cbegin(), args.cend()});\n  testing::internal::CaptureStderr();\n  LOG(INFO) << \"INFO should not be displayed when set to warning.\";\n  output = testing::internal::GetCapturedStderr();\n  ASSERT_EQ(output.size(), 0);\n\n  testing::internal::CaptureStderr();\n  LOG(CONSOLE) << \"Test Log Console\";  // ignore global setting.\n  output = testing::internal::GetCapturedStderr();\n  ASSERT_NE(output.find(\"Test Log Console\"), std::string::npos);\n\n  args[\"verbosity\"] = \"2\";  // restore\n  ConsoleLogger::Configure({args.cbegin(), args.cend()});\n}\n\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/test_main.cc",
    "content": "/**\n * Copyright 2016-2024, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/base.h>\n#include <xgboost/logging.h>\n\n#include <string>\n\n#include \"helpers.h\"\n\nint main(int argc, char** argv) {\n  xgboost::Args args{{\"verbosity\", \"2\"}};\n  xgboost::ConsoleLogger::Configure(args);\n\n  testing::InitGoogleTest(&argc, argv);\n  testing::FLAGS_gtest_death_test_style = \"threadsafe\";\n  auto rmm_alloc = xgboost::SetUpRMMResourceForCppTests(argc, argv);\n  return RUN_ALL_TESTS();\n}\n"
  },
  {
    "path": "tests/cpp/test_multi_target.cc",
    "content": "/**\n * Copyright 2023-2025, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/base.h>                         // for Args, bst_target_t\n#include <xgboost/data.h>                         // for DMatrix, MetaInfo\n#include <xgboost/json.h>                         // for Json, get, Object, String\n#include <xgboost/learner.h>                      // for Learner\n\n#include <algorithm>                              // for copy\n#include <cstddef>                                // for size_t\n#include <memory>                                 // for shared_ptr, allocator, __shared_ptr_access\n#include <numeric>                                // for accumulate\n#include <string>                                 // for stod, string\n#include <vector>                                 // for vector\n\n#include \"../../src/common/linalg_op.h\"           // for begin, cbegin, cend\n#include \"../../src/common/stats.h\"               // for Median\n#include \"helpers.h\"                              // for RandomDataGenerator\n#include \"xgboost/host_device_vector.h\"           // for HostDeviceVector\n#include \"xgboost/linalg.h\"                       // for Tensor, All, TensorView, Vector\n\nnamespace xgboost {\nclass TestL1MultiTarget : public ::testing::Test {\n  std::shared_ptr<DMatrix> Xy_;\n  std::shared_ptr<DMatrix> Xyw_;\n  std::vector<std::shared_ptr<DMatrix>> single_;\n  std::vector<std::shared_ptr<DMatrix>> single_w_;\n\n public:\n  void SetUp() override {\n    std::size_t constexpr kRows{256}, kCols{5}, kTargets{3};\n    auto make_fmat = [&](bool weighted) {\n      if (weighted) {\n        auto p_fmat =\n            RandomDataGenerator{kRows, kCols, 0.5f}.Targets(kTargets).GenerateDMatrix(true);\n        p_fmat->Info().weights_.Resize(kRows);\n        RandomDataGenerator{kRows, 1, 0.0f}.GenerateDense(&p_fmat->Info().weights_);\n        return p_fmat;\n      } else {\n        return RandomDataGenerator{kRows, kCols, 0.5f}.Targets(kTargets).GenerateDMatrix(true);\n      }\n    };\n\n    Xy_ = make_fmat(false);\n    Xyw_ = make_fmat(true);\n    ASSERT_EQ(Xy_->Info().labels.Shape(1), kTargets);\n    ASSERT_EQ(Xyw_->Info().labels.Shape(1), kTargets);\n\n    single_.clear();\n    single_w_.clear();\n    for (bst_target_t t{0}; t < kTargets; ++t) {\n      {\n        single_.emplace_back(make_fmat(false));\n        single_[t]->Info().labels.Reshape(kRows, 1);\n        auto h_labels = single_[t]->Info().labels.HostView();\n        auto in_labels = Xy_->Info().labels.HostView().Slice(linalg::All(), t);\n        std::copy(linalg::cbegin(in_labels), linalg::cend(in_labels), linalg::begin(h_labels));\n      }\n      {\n        single_w_.emplace_back(make_fmat(true));\n        single_w_[t]->Info().labels.Reshape(kRows, 1);\n        auto h_labels = single_w_[t]->Info().labels.HostView();\n        auto in_labels = Xyw_->Info().labels.HostView().Slice(linalg::All(), t);\n        std::copy(linalg::cbegin(in_labels), linalg::cend(in_labels), linalg::begin(h_labels));\n      }\n    }\n  }\n\n  void RunTest(Context const* ctx, std::string const& tree_method, bool weight) {\n    auto p_fmat = weight ? Xyw_ : Xy_;\n    std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};\n    learner->SetParams(Args{{\"tree_method\", tree_method},\n                            {\"objective\", \"reg:absoluteerror\"},\n                            {\"device\", ctx->DeviceName()}});\n    learner->Configure();\n    for (auto i = 0; i < 4; ++i) {\n      learner->UpdateOneIter(i, p_fmat);\n    }\n    ASSERT_EQ(learner->Groups(), 3);\n\n    Json config{Object{}};\n    learner->SaveConfig(&config);\n    auto base_score = GetBaseScore(config);\n\n    std::vector<float> split_scores;\n    for (bst_target_t t{0}; t < p_fmat->Info().labels.Shape(1); ++t) {\n      auto t_Xy = weight ? single_w_[t] : single_[t];\n      std::unique_ptr<Learner> sl{Learner::Create({t_Xy})};\n      sl->SetParams(Args{{\"tree_method\", tree_method},\n                         {\"objective\", \"reg:absoluteerror\"},\n                         {\"device\", ctx->DeviceName()}});\n      sl->Configure();\n      sl->UpdateOneIter(0, t_Xy);\n      Json s_config{Object{}};\n      sl->SaveConfig(&s_config);\n      auto s_base_score = GetBaseScore(s_config);\n      ASSERT_EQ(s_base_score.size(), 1);\n      linalg::Vector<float> out;\n      common::Median(sl->Ctx(), t_Xy->Info().labels, t_Xy->Info().weights_, &out);\n      ASSERT_FLOAT_EQ(s_base_score[0], out(0));\n      split_scores.push_back(s_base_score[0]);\n    }\n    ASSERT_EQ(split_scores, base_score);\n  }\n\n  void RunTest(Context const* ctx, std::string const& tree_method) {\n    this->RunTest(ctx, tree_method, false);\n    this->RunTest(ctx, tree_method, true);\n  }\n};\n\nTEST_F(TestL1MultiTarget, Hist) {\n  Context ctx;\n  this->RunTest(&ctx, \"hist\");\n}\n\nTEST_F(TestL1MultiTarget, Exact) {\n  Context ctx;\n  this->RunTest(&ctx, \"exact\");\n}\n\nTEST_F(TestL1MultiTarget, Approx) {\n  Context ctx;\n  this->RunTest(&ctx, \"approx\");\n}\n\n#if defined(XGBOOST_USE_CUDA)\nTEST_F(TestL1MultiTarget, GpuHist) {\n  auto ctx = MakeCUDACtx(0);\n  this->RunTest(&ctx, \"hist\");\n}\n#endif  // defined(XGBOOST_USE_CUDA)\n\nTEST(MultiStrategy, Configure) {\n  auto p_fmat = RandomDataGenerator{12ul, 3ul, 0.0}.GenerateDMatrix();\n  p_fmat->Info().labels.Reshape(p_fmat->Info().num_row_, 2);\n  std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};\n  learner->SetParams(Args{{\"multi_strategy\", \"multi_output_tree\"}, {\"num_target\", \"2\"}});\n  learner->Configure();\n  ASSERT_EQ(learner->Groups(), 2);\n\n  learner->SetParams(Args{{\"multi_strategy\", \"multi_output_tree\"}, {\"num_target\", \"0\"}});\n  ASSERT_THROW({ learner->Configure(); }, dmlc::Error);\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/test_serialization.cc",
    "content": "/**\n * Copyright 2019-2026, XGBoost Contributors\n */\n#include \"test_serialization.h\"\n\n#include <gtest/gtest.h>\n#include <xgboost/base.h>\n#include <xgboost/data.h>\n#include <xgboost/feature_map.h>  // for FeatureMap\n#include <xgboost/json.h>         // for Json\n#include <xgboost/learner.h>\n\n#include <cmath>\n#include <random>  // for mt19937\n#include <string>\n\n#include \"../../src/common/io.h\"\n#include \"filesystem.h\"  // for TemporaryDirectory\n#include \"helpers.h\"\n\nnamespace xgboost {\ntemplate <typename T>\nvoid CompareFloat(T lhs, T rhs) {\n  if (std::isnan(lhs) || std::isnan(rhs)) {\n    ASSERT_TRUE(std::isnan(lhs));\n    ASSERT_TRUE(std::isnan(rhs));\n    return;\n  }\n  if (std::isinf(lhs) || std::isinf(rhs)) {\n    ASSERT_EQ(lhs, rhs);\n    return;\n  }\n  ASSERT_NEAR(lhs, rhs, kRtEps);\n}\n\ntemplate <typename Array>\nvoid CompareIntArray(Json l, Json r) {\n  auto const& l_arr = get<Array const>(l);\n  auto const& r_arr = get<Array const>(r);\n  ASSERT_EQ(l_arr.size(), r_arr.size());\n  for (size_t i = 0; i < l_arr.size(); ++i) {\n    ASSERT_EQ(l_arr[i], r_arr[i]);\n  }\n}\n\nvoid CompareJSON(Json l, Json r) {\n  switch (l.GetValue().Type()) {\n    case Value::ValueKind::kString: {\n      ASSERT_EQ(l, r);\n      break;\n    }\n    case Value::ValueKind::kNumber: {\n      CompareFloat(get<Number>(l), get<Number>(r));\n      break;\n    }\n    case Value::ValueKind::kInteger: {\n      ASSERT_EQ(l, r);\n      break;\n    }\n    case Value::ValueKind::kObject: {\n      auto const& l_obj = get<Object const>(l);\n      auto const& r_obj = get<Object const>(r);\n      ASSERT_EQ(l_obj.size(), r_obj.size());\n\n      for (auto const& kv : l_obj) {\n        ASSERT_NE(r_obj.find(kv.first), r_obj.cend());\n        // Floating point array saved as a string.\n        if (kv.first == \"base_score\") {\n          auto l_v = Json::Load(get<String const>(l_obj.at(kv.first)));\n          auto r_v = Json::Load(get<String const>(r_obj.at(kv.first)));\n          CompareJSON(l_v, r_v);\n        } else {\n          CompareJSON(l_obj.at(kv.first), r_obj.at(kv.first));\n        }\n      }\n      break;\n    }\n    case Value::ValueKind::kArray: {\n      auto const& l_arr = get<Array const>(l);\n      auto const& r_arr = get<Array const>(r);\n      ASSERT_EQ(l_arr.size(), r_arr.size());\n      for (size_t i = 0; i < l_arr.size(); ++i) {\n        CompareJSON(l_arr[i], r_arr[i]);\n      }\n      break;\n    }\n    case Value::ValueKind::kF32Array: {\n      auto const& l_arr = get<F32Array const>(l);\n      auto const& r_arr = get<F32Array const>(r);\n      ASSERT_EQ(l_arr.size(), r_arr.size());\n      for (size_t i = 0; i < l_arr.size(); ++i) {\n        CompareFloat(l_arr[i], r_arr[i]);\n      }\n      break;\n    }\n    case Value::ValueKind::kF64Array: {\n      auto const& l_arr = get<F64Array const>(l);\n      auto const& r_arr = get<F64Array const>(r);\n      ASSERT_EQ(l_arr.size(), r_arr.size());\n      for (size_t i = 0; i < l_arr.size(); ++i) {\n        CompareFloat(l_arr[i], r_arr[i]);\n      }\n      break;\n    }\n    case Value::ValueKind::kI8Array: {\n      CompareIntArray<I8Array>(l, r);\n      break;\n    }\n    case Value::ValueKind::kU8Array: {\n      CompareIntArray<U8Array>(l, r);\n      break;\n    }\n    case Value::ValueKind::kI16Array: {\n      CompareIntArray<I16Array>(l, r);\n      break;\n    }\n    case Value::ValueKind::kU16Array: {\n      CompareIntArray<U16Array>(l, r);\n      break;\n    }\n    case Value::ValueKind::kI32Array: {\n      CompareIntArray<I32Array>(l, r);\n      break;\n    }\n    case Value::ValueKind::kU32Array: {\n      CompareIntArray<U32Array>(l, r);\n      break;\n    }\n    case Value::ValueKind::kI64Array: {\n      CompareIntArray<I64Array>(l, r);\n      break;\n    }\n    case Value::ValueKind::kU64Array: {\n      CompareIntArray<U64Array>(l, r);\n      break;\n    }\n    case Value::ValueKind::kBoolean: {\n      ASSERT_EQ(l, r);\n      break;\n    }\n    case Value::ValueKind::kNull: {\n      ASSERT_EQ(l, r);\n      break;\n    }\n  }\n}\n\nvoid CompareJsonModels(Json l, Json r) { CompareJSON(std::move(l), std::move(r)); }\n\nvoid TestLearnerSerialization(Args args, FeatureMap const& fmap, std::shared_ptr<DMatrix> p_dmat) {\n  for (auto& batch : p_dmat->GetBatches<SparsePage>()) {\n    batch.data.HostVector();\n    batch.offset.HostVector();\n  }\n\n  int32_t constexpr kIters = 2;\n\n  common::TemporaryDirectory tempdir;\n  std::string const fname = tempdir.Str() + \"/model\";\n\n  std::vector<std::string> dumped_0;\n  std::string model_at_kiter;\n\n  // Train for kIters.\n  {\n    std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname.c_str(), \"w\"));\n    std::unique_ptr<Learner> learner{Learner::Create({p_dmat})};\n    learner->SetParams(args);\n    for (int32_t iter = 0; iter < kIters; ++iter) {\n      learner->UpdateOneIter(iter, p_dmat);\n    }\n    dumped_0 = learner->DumpModel(fmap, true, \"json\");\n    learner->Save(fo.get());\n\n    common::MemoryBufferStream mem_out(&model_at_kiter);\n    learner->Save(&mem_out);\n  }\n\n  // Assert dumped model is same after loading\n  std::vector<std::string> dumped_1;\n  {\n    std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), \"r\"));\n    std::unique_ptr<Learner> learner{Learner::Create({p_dmat})};\n    learner->Load(fi.get());\n    learner->Configure();\n    dumped_1 = learner->DumpModel(fmap, true, \"json\");\n  }\n  ASSERT_EQ(dumped_0, dumped_1);\n\n  std::string model_at_2kiter;\n\n  // Test training continuation with data from host\n  {\n    std::string continued_model;\n    {\n      // Continue the previous training with another kIters\n      std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), \"r\"));\n      std::unique_ptr<Learner> learner{Learner::Create({p_dmat})};\n      learner->Load(fi.get());\n      learner->Configure();\n\n      // verify the loaded model doesn't change.\n      std::string serialised_model_tmp;\n      common::MemoryBufferStream mem_out(&serialised_model_tmp);\n      learner->Save(&mem_out);\n      ASSERT_EQ(model_at_kiter, serialised_model_tmp);\n\n      for (auto& batch : p_dmat->GetBatches<SparsePage>()) {\n        batch.data.HostVector();\n        batch.offset.HostVector();\n      }\n\n      for (int32_t iter = kIters; iter < 2 * kIters; ++iter) {\n        learner->UpdateOneIter(iter, p_dmat);\n      }\n      common::MemoryBufferStream fo(&continued_model);\n      learner->Save(&fo);\n    }\n\n    {\n      // Train 2 * kIters in one go\n      std::unique_ptr<Learner> learner{Learner::Create({p_dmat})};\n      learner->SetParams(args);\n      for (int32_t iter = 0; iter < 2 * kIters; ++iter) {\n        learner->UpdateOneIter(iter, p_dmat);\n\n        // Verify model is same at the same iteration during two training\n        // sessions.\n        if (iter == kIters - 1) {\n          std::string reproduced_model;\n          common::MemoryBufferStream fo(&reproduced_model);\n          learner->Save(&fo);\n          ASSERT_EQ(model_at_kiter, reproduced_model);\n        }\n      }\n      common::MemoryBufferStream fo(&model_at_2kiter);\n      learner->Save(&fo);\n    }\n\n    Json m_0 = Json::Load(StringView{continued_model}, std::ios::binary);\n    Json m_1 = Json::Load(StringView{model_at_2kiter}, std::ios::binary);\n\n    CompareJSON(m_0, m_1);\n  }\n\n  // Test training continuation with data from device.\n  {\n    // Continue the previous training but on data from device.\n    std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), \"r\"));\n    std::unique_ptr<Learner> learner{Learner::Create({p_dmat})};\n    learner->Load(fi.get());\n    learner->Configure();\n\n    // verify the loaded model doesn't change.\n    std::string serialised_model_tmp;\n    common::MemoryBufferStream mem_out(&serialised_model_tmp);\n    learner->Save(&mem_out);\n    ASSERT_EQ(model_at_kiter, serialised_model_tmp);\n\n    // Set the model to device\n    for (auto const& [key, value] : args) {\n      if (key == \"device\") {\n        learner->SetParam(key, value);\n      }\n    }\n\n    // Pull data to device\n    for (auto& batch : p_dmat->GetBatches<SparsePage>()) {\n      batch.data.SetDevice(DeviceOrd::CUDA(0));\n      batch.data.DeviceSpan();\n      batch.offset.SetDevice(DeviceOrd::CUDA(0));\n      batch.offset.DeviceSpan();\n    }\n\n    for (int32_t iter = kIters; iter < 2 * kIters; ++iter) {\n      learner->UpdateOneIter(iter, p_dmat);\n    }\n    serialised_model_tmp = std::string{};\n    common::MemoryBufferStream fo(&serialised_model_tmp);\n    learner->Save(&fo);\n\n    Json m_0 = Json::Load(StringView{model_at_2kiter}, std::ios::binary);\n    Json m_1 = Json::Load(StringView{serialised_model_tmp}, std::ios::binary);\n    // GPU ID is changed as data is coming from device.\n    get<Object>(m_0[\"Config\"][\"learner\"][\"generic_param\"]).erase(\"device\");\n    get<Object>(m_1[\"Config\"][\"learner\"][\"generic_param\"]).erase(\"device\");\n    ASSERT_EQ(get<Object>(m_0[\"Config\"][\"learner\"][\"generic_param\"]),\n              get<Object>(m_1[\"Config\"][\"learner\"][\"generic_param\"]));\n  }\n}\n\n// Binary is not tested, as it is NOT reproducible.\nclass SerializationTest : public ::testing::Test {\n protected:\n  size_t constexpr static kRows = 15;\n  size_t constexpr static kCols = 15;\n  std::shared_ptr<DMatrix> p_dmat_;\n  FeatureMap fmap_;\n\n protected:\n  ~SerializationTest() override = default;\n  void SetUp() override {\n    p_dmat_ = RandomDataGenerator(kRows, kCols, .5f).GenerateDMatrix();\n\n    p_dmat_->Info().labels.Reshape(kRows);\n    auto& h_labels = p_dmat_->Info().labels.Data()->HostVector();\n\n    xgboost::SimpleLCG gen(0);\n    SimpleRealUniformDistribution<float> dis(0.0f, 1.0f);\n\n    for (auto& v : h_labels) {\n      v = dis(&gen);\n    }\n\n    for (size_t i = 0; i < kCols; ++i) {\n      std::string name = \"feat_\" + std::to_string(i);\n      fmap_.PushBack(i, name.c_str(), \"q\");\n    }\n  }\n};\n\nsize_t constexpr SerializationTest::kRows;\nsize_t constexpr SerializationTest::kCols;\n\nTEST_F(SerializationTest, Exact) {\n  TestLearnerSerialization({{\"booster\", \"gbtree\"},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"base_score\", \"3.14195265\"},\n                            {\"max_depth\", \"2\"},\n                            {\"tree_method\", \"exact\"}},\n                           fmap_, p_dmat_);\n\n  TestLearnerSerialization({{\"booster\", \"gbtree\"},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"base_score\", \"3.14195265\"},\n                            {\"max_depth\", \"2\"},\n                            {\"num_parallel_tree\", \"4\"},\n                            {\"tree_method\", \"exact\"}},\n                           fmap_, p_dmat_);\n\n  TestLearnerSerialization({{\"booster\", \"dart\"},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"base_score\", \"3.14195265\"},\n                            {\"max_depth\", \"2\"},\n                            {\"tree_method\", \"exact\"}},\n                           fmap_, p_dmat_);\n}\n\nTEST_F(SerializationTest, Approx) {\n  TestLearnerSerialization({{\"booster\", \"gbtree\"},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"max_depth\", \"2\"},\n                            {\"tree_method\", \"approx\"}},\n                           fmap_, p_dmat_);\n\n  TestLearnerSerialization({{\"booster\", \"gbtree\"},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"max_depth\", \"2\"},\n                            {\"num_parallel_tree\", \"4\"},\n                            {\"tree_method\", \"approx\"}},\n                           fmap_, p_dmat_);\n\n  TestLearnerSerialization({{\"booster\", \"dart\"},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"max_depth\", \"2\"},\n                            {\"tree_method\", \"approx\"}},\n                           fmap_, p_dmat_);\n}\n\nTEST_F(SerializationTest, Hist) {\n  TestLearnerSerialization({{\"booster\", \"gbtree\"},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"max_depth\", \"2\"},\n                            {\"tree_method\", \"hist\"}},\n                           fmap_, p_dmat_);\n\n  TestLearnerSerialization({{\"booster\", \"gbtree\"},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"max_depth\", \"2\"},\n                            {\"num_parallel_tree\", \"4\"},\n                            {\"tree_method\", \"hist\"}},\n                           fmap_, p_dmat_);\n\n  TestLearnerSerialization({{\"booster\", \"dart\"},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"max_depth\", \"2\"},\n                            {\"tree_method\", \"hist\"}},\n                           fmap_, p_dmat_);\n}\n\nTEST_F(SerializationTest, CPUCoordDescent) {\n  TestLearnerSerialization(\n      {{\"booster\", \"gblinear\"}, {\"seed\", \"0\"}, {\"nthread\", \"1\"}, {\"updater\", \"coord_descent\"}},\n      fmap_, p_dmat_);\n}\n\n#if defined(XGBOOST_USE_CUDA)\nTEST_F(SerializationTest, GpuHist) {\n  TestLearnerSerialization({{\"booster\", \"gbtree\"},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"max_depth\", \"2\"},\n                            {\"device\", \"cuda\"},\n                            {\"tree_method\", \"hist\"}},\n                           fmap_, p_dmat_);\n\n  TestLearnerSerialization({{\"booster\", \"gbtree\"},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"max_depth\", \"2\"},\n                            {\"num_parallel_tree\", \"4\"},\n                            {\"device\", \"cuda\"},\n                            {\"tree_method\", \"hist\"}},\n                           fmap_, p_dmat_);\n\n  TestLearnerSerialization({{\"booster\", \"dart\"},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"max_depth\", \"2\"},\n                            {\"device\", \"cuda\"},\n                            {\"tree_method\", \"hist\"}},\n                           fmap_, p_dmat_);\n}\n\nTEST_F(SerializationTest, ConfigurationCount) {\n  auto& p_dmat = p_dmat_;\n  std::vector<std::shared_ptr<xgboost::DMatrix>> mat = {p_dmat};\n\n  xgboost::ConsoleLogger::Configure({{\"verbosity\", \"3\"}});\n\n  testing::internal::CaptureStderr();\n\n  std::string model_str;\n  {\n    auto learner = std::unique_ptr<Learner>(Learner::Create(mat));\n\n    learner->SetParams(Args{{\"tree_method\", \"hist\"}, {\"device\", \"cuda\"}});\n\n    for (size_t i = 0; i < 10; ++i) {\n      learner->UpdateOneIter(i, p_dmat);\n    }\n    common::MemoryBufferStream fo(&model_str);\n    learner->Save(&fo);\n  }\n\n  {\n    common::MemoryBufferStream fi(&model_str);\n    auto learner = std::unique_ptr<Learner>(Learner::Create(mat));\n    learner->Load(&fi);\n    for (size_t i = 0; i < 10; ++i) {\n      learner->UpdateOneIter(i, p_dmat);\n    }\n  }\n\n  std::string output = testing::internal::GetCapturedStderr();\n  std::string target = \"[GPU Hist]: Configure\";\n  ASSERT_NE(output.find(target), std::string::npos);\n\n  size_t occureences = 0;\n  size_t pos = 0;\n  // Should run configuration exactly 2 times, one for each learner.\n  while ((pos = output.find(\"[GPU Hist]: Configure\", pos)) != std::string::npos) {\n    occureences++;\n    pos += target.size();\n  }\n  ASSERT_EQ(occureences, 2ul);\n\n  xgboost::ConsoleLogger::Configure({{\"verbosity\", \"2\"}});\n}\n\nTEST_F(SerializationTest, GPUCoordDescent) {\n  TestLearnerSerialization({{\"booster\", \"gblinear\"},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"device\", \"cuda\"},\n                            {\"updater\", \"coord_descent\"}},\n                           fmap_, p_dmat_);\n}\n#endif  // defined(XGBOOST_USE_CUDA)\n\nclass L1SerializationTest : public SerializationTest {};\n\nTEST_F(L1SerializationTest, Exact) {\n  TestLearnerSerialization({{\"booster\", \"gbtree\"},\n                            {\"objective\", \"reg:absoluteerror\"},\n                            {\"seed\", \"0\"},\n                            {\"max_depth\", \"2\"},\n                            {\"tree_method\", \"exact\"}},\n                           fmap_, p_dmat_);\n}\n\nTEST_F(L1SerializationTest, Approx) {\n  TestLearnerSerialization({{\"booster\", \"gbtree\"},\n                            {\"objective\", \"reg:absoluteerror\"},\n                            {\"seed\", \"0\"},\n                            {\"max_depth\", \"2\"},\n                            {\"tree_method\", \"approx\"}},\n                           fmap_, p_dmat_);\n}\n\nTEST_F(L1SerializationTest, Hist) {\n  TestLearnerSerialization({{\"booster\", \"gbtree\"},\n                            {\"objective\", \"reg:absoluteerror\"},\n                            {\"seed\", \"0\"},\n                            {\"max_depth\", \"2\"},\n                            {\"tree_method\", \"hist\"}},\n                           fmap_, p_dmat_);\n}\n\n#if defined(XGBOOST_USE_CUDA)\nTEST_F(L1SerializationTest, GpuHist) {\n  TestLearnerSerialization({{\"booster\", \"gbtree\"},\n                            {\"objective\", \"reg:absoluteerror\"},\n                            {\"seed\", \"0\"},\n                            {\"max_depth\", \"2\"},\n                            {\"device\", \"cuda\"},\n                            {\"tree_method\", \"hist\"}},\n                           fmap_, p_dmat_);\n}\n#endif  //  defined(XGBOOST_USE_CUDA)\n\nclass LogitSerializationTest : public SerializationTest {\n protected:\n  void SetUp() override {\n    p_dmat_ = RandomDataGenerator(kRows, kCols, .5f).GenerateDMatrix();\n\n    std::shared_ptr<DMatrix> p_dmat{p_dmat_};\n    p_dmat->Info().labels.Reshape(kRows);\n    auto& h_labels = p_dmat->Info().labels.Data()->HostVector();\n\n    std::bernoulli_distribution flip(0.5);\n    std::mt19937 rnd{0};\n\n    for (auto& v : h_labels) {\n      v = flip(rnd);\n    }\n\n    for (size_t i = 0; i < kCols; ++i) {\n      std::string name = \"feat_\" + std::to_string(i);\n      fmap_.PushBack(i, name.c_str(), \"q\");\n    }\n  }\n};\n\nTEST_F(LogitSerializationTest, Exact) {\n  TestLearnerSerialization({{\"booster\", \"gbtree\"},\n                            {\"objective\", \"binary:logistic\"},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"max_depth\", \"2\"},\n                            {\"tree_method\", \"exact\"}},\n                           fmap_, p_dmat_);\n\n  TestLearnerSerialization({{\"booster\", \"dart\"},\n                            {\"objective\", \"binary:logistic\"},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"max_depth\", \"2\"},\n                            {\"tree_method\", \"exact\"}},\n                           fmap_, p_dmat_);\n}\n\nTEST_F(LogitSerializationTest, Approx) {\n  TestLearnerSerialization({{\"booster\", \"gbtree\"},\n                            {\"objective\", \"binary:logistic\"},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"max_depth\", \"2\"},\n                            {\"tree_method\", \"approx\"}},\n                           fmap_, p_dmat_);\n\n  TestLearnerSerialization({{\"booster\", \"dart\"},\n                            {\"objective\", \"binary:logistic\"},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"max_depth\", \"2\"},\n                            {\"tree_method\", \"approx\"}},\n                           fmap_, p_dmat_);\n}\n\nTEST_F(LogitSerializationTest, Hist) {\n  TestLearnerSerialization({{\"booster\", \"gbtree\"},\n                            {\"objective\", \"binary:logistic\"},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"max_depth\", \"2\"},\n                            {\"tree_method\", \"hist\"}},\n                           fmap_, p_dmat_);\n\n  TestLearnerSerialization({{\"booster\", \"dart\"},\n                            {\"objective\", \"binary:logistic\"},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"max_depth\", \"2\"},\n                            {\"tree_method\", \"hist\"}},\n                           fmap_, p_dmat_);\n}\n\nTEST_F(LogitSerializationTest, CPUCoordDescent) {\n  TestLearnerSerialization(\n      {{\"booster\", \"gblinear\"}, {\"seed\", \"0\"}, {\"nthread\", \"1\"}, {\"updater\", \"coord_descent\"}},\n      fmap_, p_dmat_);\n}\n\n#if defined(XGBOOST_USE_CUDA)\nTEST_F(LogitSerializationTest, GpuHist) {\n  TestLearnerSerialization({{\"booster\", \"gbtree\"},\n                            {\"objective\", \"binary:logistic\"},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"max_depth\", \"2\"},\n                            {\"device\", \"cuda\"},\n                            {\"tree_method\", \"hist\"}},\n                           fmap_, p_dmat_);\n\n  TestLearnerSerialization({{\"booster\", \"gbtree\"},\n                            {\"objective\", \"binary:logistic\"},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"max_depth\", \"2\"},\n                            {\"num_parallel_tree\", \"4\"},\n                            {\"device\", \"cuda\"},\n                            {\"tree_method\", \"hist\"}},\n                           fmap_, p_dmat_);\n\n  TestLearnerSerialization({{\"booster\", \"dart\"},\n                            {\"objective\", \"binary:logistic\"},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"max_depth\", \"2\"},\n                            {\"device\", \"cuda\"},\n                            {\"tree_method\", \"hist\"}},\n                           fmap_, p_dmat_);\n}\n\nTEST_F(LogitSerializationTest, GPUCoordDescent) {\n  TestLearnerSerialization({{\"booster\", \"gblinear\"},\n                            {\"objective\", \"binary:logistic\"},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"device\", \"cuda\"},\n                            {\"updater\", \"coord_descent\"}},\n                           fmap_, p_dmat_);\n}\n#endif  // defined(XGBOOST_USE_CUDA)\n\nclass MultiClassesSerializationTest : public SerializationTest {\n protected:\n  size_t constexpr static kClasses = 4;\n\n  void SetUp() override {\n    p_dmat_ = RandomDataGenerator(kRows, kCols, .5f).GenerateDMatrix();\n\n    std::shared_ptr<DMatrix> p_dmat{p_dmat_};\n    p_dmat->Info().labels.Reshape(kRows);\n    auto& h_labels = p_dmat->Info().labels.Data()->HostVector();\n\n    std::uniform_int_distribution<size_t> categorical(0, kClasses - 1);\n    std::mt19937 rnd{0};\n\n    for (auto& v : h_labels) {\n      v = categorical(rnd);\n    }\n\n    for (size_t i = 0; i < kCols; ++i) {\n      std::string name = \"feat_\" + std::to_string(i);\n      fmap_.PushBack(i, name.c_str(), \"q\");\n    }\n  }\n};\n\nTEST_F(MultiClassesSerializationTest, Exact) {\n  TestLearnerSerialization({{\"booster\", \"gbtree\"},\n                            {\"num_class\", std::to_string(kClasses)},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"max_depth\", std::to_string(kClasses)},\n                            {\"tree_method\", \"exact\"}},\n                           fmap_, p_dmat_);\n\n  TestLearnerSerialization({{\"booster\", \"gbtree\"},\n                            {\"num_class\", std::to_string(kClasses)},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"max_depth\", std::to_string(kClasses)},\n                            {\"num_parallel_tree\", \"4\"},\n                            {\"tree_method\", \"exact\"}},\n                           fmap_, p_dmat_);\n\n  TestLearnerSerialization({{\"booster\", \"dart\"},\n                            {\"num_class\", std::to_string(kClasses)},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"max_depth\", std::to_string(kClasses)},\n                            {\"tree_method\", \"exact\"}},\n                           fmap_, p_dmat_);\n}\n\nTEST_F(MultiClassesSerializationTest, Approx) {\n  TestLearnerSerialization({{\"booster\", \"gbtree\"},\n                            {\"num_class\", std::to_string(kClasses)},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"max_depth\", std::to_string(kClasses)},\n                            {\"tree_method\", \"approx\"}},\n                           fmap_, p_dmat_);\n\n  TestLearnerSerialization({{\"booster\", \"dart\"},\n                            {\"num_class\", std::to_string(kClasses)},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"max_depth\", std::to_string(kClasses)},\n                            {\"tree_method\", \"approx\"}},\n                           fmap_, p_dmat_);\n}\n\nTEST_F(MultiClassesSerializationTest, Hist) {\n  TestLearnerSerialization({{\"booster\", \"gbtree\"},\n                            {\"num_class\", std::to_string(kClasses)},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"max_depth\", std::to_string(kClasses)},\n                            {\"tree_method\", \"hist\"}},\n                           fmap_, p_dmat_);\n\n  TestLearnerSerialization({{\"booster\", \"gbtree\"},\n                            {\"num_class\", std::to_string(kClasses)},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"max_depth\", std::to_string(kClasses)},\n                            {\"num_parallel_tree\", \"4\"},\n                            {\"tree_method\", \"hist\"}},\n                           fmap_, p_dmat_);\n\n  TestLearnerSerialization({{\"booster\", \"dart\"},\n                            {\"num_class\", std::to_string(kClasses)},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"max_depth\", std::to_string(kClasses)},\n                            {\"tree_method\", \"hist\"}},\n                           fmap_, p_dmat_);\n}\n\nTEST_F(MultiClassesSerializationTest, CPUCoordDescent) {\n  TestLearnerSerialization(\n      {{\"booster\", \"gblinear\"}, {\"seed\", \"0\"}, {\"nthread\", \"1\"}, {\"updater\", \"coord_descent\"}},\n      fmap_, p_dmat_);\n}\n\n#if defined(XGBOOST_USE_CUDA)\nTEST_F(MultiClassesSerializationTest, GpuHist) {\n  TestLearnerSerialization({{\"booster\", \"gbtree\"},\n                            {\"num_class\", std::to_string(kClasses)},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"max_depth\", std::to_string(kClasses)},\n                            // Mitigate the difference caused by hardware fused multiply\n                            // add to tree weight during update prediction cache.\n                            {\"learning_rate\", \"1.0\"},\n                            {\"device\", \"cuda\"},\n                            {\"tree_method\", \"hist\"}},\n                           fmap_, p_dmat_);\n\n  TestLearnerSerialization({{\"booster\", \"gbtree\"},\n                            {\"num_class\", std::to_string(kClasses)},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"max_depth\", std::to_string(kClasses)},\n                            // GPU_Hist has higher floating point error. 1e-6 doesn't work\n                            // after num_parallel_tree goes to 4\n                            {\"num_parallel_tree\", \"4\"},\n                            {\"learning_rate\", \"1.0\"},\n                            {\"device\", \"cuda\"},\n                            {\"tree_method\", \"hist\"}},\n                           fmap_, p_dmat_);\n\n  TestLearnerSerialization({{\"booster\", \"dart\"},\n                            {\"num_class\", std::to_string(kClasses)},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"learning_rate\", \"1.0\"},\n                            {\"max_depth\", std::to_string(kClasses)},\n                            {\"device\", \"cuda\"},\n                            {\"tree_method\", \"hist\"}},\n                           fmap_, p_dmat_);\n}\n\nTEST_F(MultiClassesSerializationTest, GPUCoordDescent) {\n  TestLearnerSerialization({{\"booster\", \"gblinear\"},\n                            {\"num_class\", std::to_string(kClasses)},\n                            {\"seed\", \"0\"},\n                            {\"nthread\", \"1\"},\n                            {\"updater\", \"coord_descent\"},\n                            {\"device\", \"cuda\"}},\n                           fmap_, p_dmat_);\n}\n#endif  // defined(XGBOOST_USE_CUDA)\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/test_serialization.h",
    "content": "/**\n * Copyright 2025, XGBoost contributors\n */\n#pragma once\n#include <xgboost/json.h>  // for Json\n\nnamespace xgboost {\nvoid CompareJsonModels(Json l, Json r);\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/tree/gpu_hist/dummy_quantizer.cuh",
    "content": "/**\n * Copyright 2025-2026, XGBoost Contributors\n */\n#pragma once\n\n#include <xgboost/base.h>  // for bst_target_t\n\n#include <vector>  // for vector\n\n#include \"../../../../src/common/device_vector.cuh\"     // for device_vector\n#include \"../../../../src/tree/gpu_hist/quantiser.cuh\"  // for GradientQuantiser\n#include \"../../helpers.h\"\n\nnamespace xgboost::tree {\ninline GradientQuantiser MakeDummyQuantizer() {\n  return {GradientPairPrecise{1.0f, 1.0f}, GradientPairPrecise{1.0f, 1.0f}};\n}\n\ninline auto MakeDummyQuantizers(bst_target_t n_targets) {\n  std::vector<GradientQuantiser> h_quantizers;\n  for (bst_target_t i = 0; i < n_targets; ++i) {\n    h_quantizers.emplace_back(MakeDummyQuantizer());\n  }\n  dh::device_vector<GradientQuantiser> d_quantizers(h_quantizers);\n  return d_quantizers;\n}\n\nstruct QuantizedGradients {\n  linalg::Matrix<GradientPairInt64> gpair;\n  GradientQuantiserGroup quantizer;\n};\n\n// Returns both quantized gradients and quantizers.\ninline auto GenerateGradientsFixedPoint(Context const* ctx, bst_idx_t n_samples,\n                                        bst_target_t n_targets = 1, float lower = 0.0f,\n                                        float upper = 1.0f) {\n  auto gpairs = GenerateRandomGradients(n_samples * n_targets, lower, upper);\n  gpairs.SetDevice(ctx->Device());\n  auto d_gpair = linalg::MakeTensorView(ctx, gpairs.ConstDeviceSpan(), n_samples, n_targets);\n\n  GradientQuantiserGroup quantizer_group{ctx, d_gpair, MetaInfo{}};\n\n  linalg::Matrix<GradientPairInt64> gpairs_i64;\n  CalcQuantizedGpairs(ctx, d_gpair, quantizer_group.DeviceSpan(), &gpairs_i64);\n\n  return QuantizedGradients{std::move(gpairs_i64), std::move(quantizer_group)};\n}\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "tests/cpp/tree/gpu_hist/test_driver.cu",
    "content": "/**\n * Copyright 2020-2025, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include \"../../../../src/tree/driver.h\"\n#include \"../../../../src/tree/gpu_hist/expand_entry.cuh\"\n\nnamespace xgboost {\nnamespace tree {\n\nTEST(GpuHist, DriverDepthWise) {\n  TrainParam p;\n  p.UpdateAllowUnknown(Args{{\"grow_policy\", \"depthwise\"}});\n\n  Driver<GPUExpandEntry> driver(p, 2);\n  EXPECT_TRUE(driver.Pop().empty());\n  DeviceSplitCandidate split;\n  split.loss_chg = 1.0f;\n  split.left_sum = {0, 1};\n  split.right_sum = {0, 1};\n  GPUExpandEntry root(0, 0, split, 2.0f, 1.0f, 1.0f);\n  driver.Push({root});\n  EXPECT_EQ(driver.Pop().front().nidx, 0);\n  driver.Push({GPUExpandEntry{1, 1, split, 2.0f, 1.0f, 1.0f}});\n  driver.Push({GPUExpandEntry{2, 1, split, 2.0f, 1.0f, 1.0f}});\n  driver.Push({GPUExpandEntry{3, 1, split, 2.0f, 1.0f, 1.0f}});\n  driver.Push({GPUExpandEntry{4, 2, split, 2.0f, 1.0f, 1.0f}});\n  // Should return 2 entries from level 1\n  // as we limited the driver to pop maximum 2 nodes\n  auto res = driver.Pop();\n  EXPECT_EQ(res.size(), 2);\n  for (auto &e : res) {\n    EXPECT_EQ(e.depth, 1);\n  }\n\n  // Should now return 1 entry from level 1\n  res = driver.Pop();\n  EXPECT_EQ(res.size(), 1);\n  EXPECT_EQ(res.at(0).depth, 1);\n\n  res = driver.Pop();\n  EXPECT_EQ(res.at(0).depth, 2);\n  EXPECT_TRUE(driver.Pop().empty());\n}\n\nTEST(GpuHist, DriverLossGuided) {\n  DeviceSplitCandidate high_gain;\n  high_gain.left_sum = {0, 1};\n  high_gain.right_sum = {0, 1};\n  high_gain.loss_chg = 5.0f;\n  DeviceSplitCandidate low_gain = high_gain;\n  low_gain.loss_chg = 1.0f;\n\n  TrainParam p;\n  p.UpdateAllowUnknown(Args{{\"grow_policy\", \"lossguide\"}});\n\n  Driver<GPUExpandEntry> driver(p);\n  EXPECT_TRUE(driver.Pop().empty());\n  GPUExpandEntry root(0, 0, high_gain, 2.0f, 1.0f, 1.0f );\n  driver.Push({root});\n  EXPECT_EQ(driver.Pop().front().nidx, 0);\n  // Select high gain first\n  driver.Push({GPUExpandEntry{1, 1, low_gain, 2.0f, 1.0f, 1.0f}});\n  driver.Push({GPUExpandEntry{2, 2, high_gain, 2.0f, 1.0f, 1.0f}});\n  auto res = driver.Pop();\n  EXPECT_EQ(res.size(), 1);\n  EXPECT_EQ(res[0].nidx, 2);\n  res = driver.Pop();\n  EXPECT_EQ(res.size(), 1);\n  EXPECT_EQ(res[0].nidx, 1);\n\n  // If equal gain, use nid\n  driver.Push({GPUExpandEntry{2, 1, low_gain, 2.0f, 1.0f, 1.0f}});\n  driver.Push({GPUExpandEntry{1, 1, low_gain, 2.0f, 1.0f, 1.0f}});\n  res = driver.Pop();\n  EXPECT_EQ(res[0].nidx, 1);\n  res = driver.Pop();\n  EXPECT_EQ(res[0].nidx, 2);\n}\n}  // namespace tree\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/tree/gpu_hist/test_evaluate_splits.cu",
    "content": "/**\n * Copyright 2020-2024, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <thrust/host_vector.h>\n\n#include \"../../../../src/tree/gpu_hist/evaluate_splits.cuh\"\n#include \"../../collective/test_worker.h\"  // for BaseMGPUTest\n#include \"../../helpers.h\"\n#include \"../test_evaluate_splits.h\"  // TestPartitionBasedSplit\n\nnamespace xgboost::tree {\nnamespace {\nauto ZeroParam() {\n  auto args = Args{{\"min_child_weight\", \"0\"}, {\"lambda\", \"0\"}};\n  TrainParam tparam;\n  tparam.UpdateAllowUnknown(args);\n  return tparam;\n}\n\nGradientQuantiser DummyRoundingFactor(Context const* ctx) {\n  thrust::device_vector<GradientPair> gpair(1);\n  gpair[0] = {1000.f, 1000.f};  // Tests should not exceed sum of 1000\n  GradientQuantiserGroup group{ctx, linalg::MakeVec(ctx->Device(), dh::ToSpan(gpair)), MetaInfo()};\n  return group[0];\n}\n}  // anonymous namespace\n\nthrust::device_vector<GradientPairInt64> ConvertToInteger(Context const* ctx,\n                                                          std::vector<GradientPairPrecise> x) {\n  auto r = DummyRoundingFactor(ctx);\n  std::vector<GradientPairInt64> y(x.size());\n  for (std::size_t i = 0; i < x.size(); i++) {\n    y[i] = r.ToFixedPoint(GradientPair(x[i]));\n  }\n  return y;\n}\n\nTEST_F(TestCategoricalSplitWithMissing, GPUHistEvaluator) {\n  auto ctx = MakeCUDACtx(0);\n  thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0};\n  GPUTrainingParam param{param_};\n  cuts_.cut_ptrs_.SetDevice(ctx.Device());\n  cuts_.cut_values_.SetDevice(ctx.Device());\n  thrust::device_vector<GradientPairInt64> feature_histogram{\n      ConvertToInteger(&ctx, feature_histogram_)};\n\n  dh::device_vector<FeatureType> feature_types(feature_set.size(), FeatureType::kCategorical);\n  auto d_feature_types = dh::ToSpan(feature_types);\n  auto quantiser = DummyRoundingFactor(&ctx);\n  EvaluateSplitInputs input{1, 0, quantiser.ToFixedPoint(parent_sum_), dh::ToSpan(feature_set),\n                            dh::ToSpan(feature_histogram)};\n  EvaluateSplitSharedInputs shared_inputs{param,\n                                          quantiser,\n                                          d_feature_types,\n                                          cuts_.cut_ptrs_.ConstDeviceSpan(),\n                                          cuts_.cut_values_.ConstDeviceSpan(),\n                                          false};\n\n  GPUHistEvaluator evaluator{param_, static_cast<bst_feature_t>(feature_set.size()), ctx.Device()};\n\n  evaluator.Reset(&ctx, cuts_, dh::ToSpan(feature_types), feature_set.size(), param_, false);\n  DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;\n\n  ASSERT_EQ(result.thresh, 1);\n  this->CheckResult(result.loss_chg, result.findex, result.fvalue, result.is_cat,\n                    result.dir == kLeftDir, quantiser.ToFloatingPoint(result.left_sum),\n                    quantiser.ToFloatingPoint(result.right_sum));\n}\n\nTEST(GpuHist, PartitionBasic) {\n  auto ctx = MakeCUDACtx(0);\n  TrainParam tparam = ZeroParam();\n  tparam.max_cat_to_onehot = 0;\n  GPUTrainingParam param{tparam};\n\n  common::HistogramCuts cuts{1};\n  cuts.cut_values_.HostVector() = std::vector<float>{0.0, 1.0, 2.0};\n  cuts.cut_ptrs_.HostVector() = std::vector<uint32_t>{0, 3};\n  cuts.cut_ptrs_.SetDevice(ctx.Device());\n  cuts.cut_values_.SetDevice(ctx.Device());\n  thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0};\n\n  thrust::device_vector<int> monotonic_constraints(feature_set.size(), 0);\n  dh::device_vector<FeatureType> feature_types(feature_set.size(), FeatureType::kCategorical);\n  common::Span<FeatureType> d_feature_types;\n  auto max_cat =\n      *std::max_element(cuts.cut_values_.HostVector().begin(), cuts.cut_values_.HostVector().end());\n  cuts.SetCategorical(true, max_cat);\n  d_feature_types = dh::ToSpan(feature_types);\n  auto quantiser = DummyRoundingFactor(&ctx);\n  EvaluateSplitSharedInputs shared_inputs{\n      param,\n      quantiser,\n      d_feature_types,\n      cuts.cut_ptrs_.ConstDeviceSpan(),\n      cuts.cut_values_.ConstDeviceSpan(),\n      false,\n  };\n\n  GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), ctx.Device()};\n  evaluator.Reset(&ctx, cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false);\n\n  {\n    // -1.0s go right\n    // -3.0s go left\n    auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-5.0, 3.0});\n    auto feature_histogram = ConvertToInteger(&ctx, {{-1.0, 1.0}, {-1.0, 1.0}, {-3.0, 1.0}});\n    EvaluateSplitInputs input{0, 0, parent_sum, dh::ToSpan(feature_set),\n                              dh::ToSpan(feature_histogram)};\n    DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;\n    auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);\n    EXPECT_EQ(result.dir, kLeftDir);\n    EXPECT_EQ(cats, std::bitset<32>(\"11000000000000000000000000000000\"));\n    EXPECT_EQ(result.left_sum + result.right_sum, parent_sum);\n  }\n\n  {\n    // -1.0s go right\n    // -3.0s go left\n    auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-7.0, 3.0});\n    auto feature_histogram = ConvertToInteger(&ctx, {{-1.0, 1.0}, {-3.0, 1.0}, {-3.0, 1.0}});\n    EvaluateSplitInputs input{1, 0, parent_sum, dh::ToSpan(feature_set),\n                              dh::ToSpan(feature_histogram)};\n    DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;\n    auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);\n    EXPECT_EQ(result.dir, kLeftDir);\n    EXPECT_EQ(cats, std::bitset<32>(\"10000000000000000000000000000000\"));\n    EXPECT_EQ(result.left_sum + result.right_sum, parent_sum);\n  }\n  {\n    // All -1.0, gain from splitting should be 0.0\n    auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-3.0, 3.0});\n    auto feature_histogram = ConvertToInteger(&ctx, {{-1.0, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}});\n    EvaluateSplitInputs input{2, 0, parent_sum, dh::ToSpan(feature_set),\n                              dh::ToSpan(feature_histogram)};\n    DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;\n    EXPECT_EQ(result.dir, kLeftDir);\n    EXPECT_FLOAT_EQ(result.loss_chg, 0.0f);\n    EXPECT_EQ(result.left_sum + result.right_sum, parent_sum);\n  }\n  // With 3.0/3.0 missing values\n  // Forward, first 2 categories are selected, while the last one go to left along with missing\n  // value\n  {\n    auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 6.0});\n    auto feature_histogram = ConvertToInteger(&ctx, {{-1.0, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}});\n    EvaluateSplitInputs input{3, 0, parent_sum, dh::ToSpan(feature_set),\n                              dh::ToSpan(feature_histogram)};\n    DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;\n    auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);\n    EXPECT_EQ(cats, std::bitset<32>(\"11000000000000000000000000000000\"));\n    EXPECT_EQ(result.dir, kLeftDir);\n    EXPECT_EQ(result.left_sum + result.right_sum, parent_sum);\n  }\n  {\n    // -1.0s go right\n    // -3.0s go left\n    auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-5.0, 3.0});\n    auto feature_histogram = ConvertToInteger(&ctx, {{-1.0, 1.0}, {-3.0, 1.0}, {-1.0, 1.0}});\n    EvaluateSplitInputs input{4, 0, parent_sum, dh::ToSpan(feature_set),\n                              dh::ToSpan(feature_histogram)};\n    DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;\n    auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);\n    EXPECT_EQ(result.dir, kLeftDir);\n    EXPECT_EQ(cats, std::bitset<32>(\"10100000000000000000000000000000\"));\n    EXPECT_EQ(result.left_sum + result.right_sum, parent_sum);\n  }\n  {\n    // -1.0s go right\n    // -3.0s go left\n    auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-5.0, 3.0});\n    auto feature_histogram = ConvertToInteger(&ctx, {{-3.0, 1.0}, {-1.0, 1.0}, {-3.0, 1.0}});\n    EvaluateSplitInputs input{5, 0, parent_sum, dh::ToSpan(feature_set),\n                              dh::ToSpan(feature_histogram)};\n    DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;\n    auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);\n    EXPECT_EQ(cats, std::bitset<32>(\"01000000000000000000000000000000\"));\n    EXPECT_EQ(result.left_sum + result.right_sum, parent_sum);\n  }\n}\n\nTEST(GpuHist, PartitionTwoFeatures) {\n  auto ctx = MakeCUDACtx(0);\n  TrainParam tparam = ZeroParam();\n  tparam.max_cat_to_onehot = 0;\n  GPUTrainingParam param{tparam};\n\n  common::HistogramCuts cuts{2};\n  cuts.cut_values_.HostVector() = std::vector<float>{0.0, 1.0, 2.0, 0.0, 1.0, 2.0};\n  cuts.cut_ptrs_.HostVector() = std::vector<uint32_t>{0, 3, 6};\n  cuts.cut_ptrs_.SetDevice(ctx.Device());\n  cuts.cut_values_.SetDevice(ctx.Device());\n  thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};\n\n  thrust::device_vector<int> monotonic_constraints(feature_set.size(), 0);\n  dh::device_vector<FeatureType> feature_types(feature_set.size(), FeatureType::kCategorical);\n  common::Span<FeatureType> d_feature_types(dh::ToSpan(feature_types));\n  auto max_cat =\n      *std::max_element(cuts.cut_values_.HostVector().begin(), cuts.cut_values_.HostVector().end());\n  cuts.SetCategorical(true, max_cat);\n\n  auto quantiser = DummyRoundingFactor(&ctx);\n  EvaluateSplitSharedInputs shared_inputs{param,\n                                          quantiser,\n                                          d_feature_types,\n                                          cuts.cut_ptrs_.ConstDeviceSpan(),\n                                          cuts.cut_values_.ConstDeviceSpan(),\n                                          false};\n\n  GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), ctx.Device()};\n  evaluator.Reset(&ctx, cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false);\n\n  {\n    auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-6.0, 3.0});\n    auto feature_histogram = ConvertToInteger(\n        &ctx, {{-2.0, 1.0}, {-2.0, 1.0}, {-2.0, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}, {-4.0, 1.0}});\n    EvaluateSplitInputs input{0, 0, parent_sum, dh::ToSpan(feature_set),\n                              dh::ToSpan(feature_histogram)};\n    DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;\n    auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);\n    EXPECT_EQ(result.findex, 1);\n    EXPECT_EQ(cats, std::bitset<32>(\"11000000000000000000000000000000\"));\n    EXPECT_EQ(result.left_sum + result.right_sum, parent_sum);\n  }\n\n  {\n    auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-6.0, 3.0});\n    auto feature_histogram = ConvertToInteger(\n        &ctx, {{-2.0, 1.0}, {-2.0, 1.0}, {-2.0, 1.0}, {-1.0, 1.0}, {-2.5, 1.0}, {-2.5, 1.0}});\n    EvaluateSplitInputs input{1, 0, parent_sum, dh::ToSpan(feature_set),\n                              dh::ToSpan(feature_histogram)};\n    DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;\n    auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);\n    EXPECT_EQ(result.findex, 1);\n    EXPECT_EQ(cats, std::bitset<32>(\"10000000000000000000000000000000\"));\n    EXPECT_EQ(result.left_sum + result.right_sum, parent_sum);\n  }\n}\n\nTEST(GpuHist, PartitionTwoNodes) {\n  auto ctx = MakeCUDACtx(0);\n  TrainParam tparam = ZeroParam();\n  tparam.max_cat_to_onehot = 0;\n  GPUTrainingParam param{tparam};\n\n  common::HistogramCuts cuts{1};\n  cuts.cut_values_.HostVector() = std::vector<float>{0.0, 1.0, 2.0};\n  cuts.cut_ptrs_.HostVector() = std::vector<uint32_t>{0, 3};\n  cuts.cut_ptrs_.SetDevice(ctx.Device());\n  cuts.cut_values_.SetDevice(ctx.Device());\n  thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0};\n\n  thrust::device_vector<int> monotonic_constraints(feature_set.size(), 0);\n  dh::device_vector<FeatureType> feature_types(feature_set.size(), FeatureType::kCategorical);\n  common::Span<FeatureType> d_feature_types(dh::ToSpan(feature_types));\n  auto max_cat =\n      *std::max_element(cuts.cut_values_.HostVector().begin(), cuts.cut_values_.HostVector().end());\n  cuts.SetCategorical(true, max_cat);\n\n  auto quantiser = DummyRoundingFactor(&ctx);\n  EvaluateSplitSharedInputs shared_inputs{param,\n                                          quantiser,\n                                          d_feature_types,\n                                          cuts.cut_ptrs_.ConstDeviceSpan(),\n                                          cuts.cut_values_.ConstDeviceSpan(),\n                                          false};\n\n  GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), ctx.Device()};\n  evaluator.Reset(&ctx, cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false);\n\n  {\n    auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-6.0, 3.0});\n    auto feature_histogram_a = ConvertToInteger(\n        &ctx, {{-1.0, 1.0}, {-2.5, 1.0}, {-2.5, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}, {-4.0, 1.0}});\n    thrust::device_vector<EvaluateSplitInputs> inputs(2);\n    inputs[0] = EvaluateSplitInputs{0, 0, parent_sum, dh::ToSpan(feature_set),\n                                    dh::ToSpan(feature_histogram_a)};\n    auto feature_histogram_b = ConvertToInteger(&ctx, {{-1.0, 1.0}, {-1.0, 1.0}, {-4.0, 1.0}});\n    inputs[1] = EvaluateSplitInputs{1, 0, parent_sum, dh::ToSpan(feature_set),\n                                    dh::ToSpan(feature_histogram_b)};\n    thrust::device_vector<GPUExpandEntry> results(2);\n    evaluator.EvaluateSplits(&ctx, {0, 1}, 1, dh::ToSpan(inputs), shared_inputs,\n                             dh::ToSpan(results));\n    EXPECT_EQ(std::bitset<32>(evaluator.GetHostNodeCats(0)[0]),\n              std::bitset<32>(\"10000000000000000000000000000000\"));\n    EXPECT_EQ(std::bitset<32>(evaluator.GetHostNodeCats(1)[0]),\n              std::bitset<32>(\"11000000000000000000000000000000\"));\n  }\n}\n\nvoid TestEvaluateSingleSplit(bool is_categorical) {\n  auto ctx = MakeCUDACtx(0);\n  auto quantiser = DummyRoundingFactor(&ctx);\n  auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});\n  TrainParam tparam = ZeroParam();\n  GPUTrainingParam param{tparam};\n\n  common::HistogramCuts cuts{MakeCutsForTest({1.0, 2.0, 11.0, 12.0}, {0, 2, 4}, ctx.Device())};\n  thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};\n\n  // Setup gradients so that second feature gets higher gain\n  auto feature_histogram =\n      ConvertToInteger(&ctx, {{-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}});\n\n  dh::device_vector<FeatureType> feature_types(feature_set.size(), FeatureType::kCategorical);\n  common::Span<FeatureType> d_feature_types;\n  if (is_categorical) {\n    auto max_cat = *std::max_element(cuts.cut_values_.HostVector().begin(),\n                                     cuts.cut_values_.HostVector().end());\n    cuts.SetCategorical(true, max_cat);\n    d_feature_types = dh::ToSpan(feature_types);\n  }\n\n  EvaluateSplitInputs input{1, 0, parent_sum, dh::ToSpan(feature_set),\n                            dh::ToSpan(feature_histogram)};\n  EvaluateSplitSharedInputs shared_inputs{param,\n                                          quantiser,\n                                          d_feature_types,\n                                          cuts.cut_ptrs_.ConstDeviceSpan(),\n                                          cuts.cut_values_.ConstDeviceSpan(),\n                                          false};\n\n  GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), ctx.Device()};\n  evaluator.Reset(&ctx, cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false);\n  DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;\n\n  EXPECT_EQ(result.findex, 1);\n  if (is_categorical) {\n    ASSERT_TRUE(std::isnan(result.fvalue));\n  } else {\n    EXPECT_EQ(result.fvalue, 11.0);\n  }\n  EXPECT_EQ(result.left_sum + result.right_sum, parent_sum);\n}\n\nTEST(GpuHist, EvaluateSingleSplit) { TestEvaluateSingleSplit(false); }\n\nTEST(GpuHist, EvaluateSingleCategoricalSplit) { TestEvaluateSingleSplit(true); }\n\nTEST(GpuHist, EvaluateSingleSplitMissing) {\n  auto ctx = MakeCUDACtx(0);\n  auto quantiser = DummyRoundingFactor(&ctx);\n  auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{1.0, 1.5});\n  TrainParam tparam = ZeroParam();\n  GPUTrainingParam param{tparam};\n\n  thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0};\n  thrust::device_vector<uint32_t> feature_segments = std::vector<bst_idx_t>{0, 2};\n  thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0};\n  auto feature_histogram = ConvertToInteger(&ctx, {{-0.5, 0.5}, {0.5, 0.5}});\n  EvaluateSplitInputs input{1, 0, parent_sum, dh::ToSpan(feature_set),\n                            dh::ToSpan(feature_histogram)};\n  EvaluateSplitSharedInputs shared_inputs{\n      param, quantiser, {}, dh::ToSpan(feature_segments), dh::ToSpan(feature_values), false};\n\n  GPUHistEvaluator evaluator(tparam, feature_set.size(), FstCU());\n  DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;\n\n  EXPECT_EQ(result.findex, 0);\n  EXPECT_EQ(result.fvalue, 1.0);\n  EXPECT_EQ(result.dir, kRightDir);\n  EXPECT_EQ(result.left_sum, quantiser.ToFixedPoint(GradientPairPrecise(-0.5, 0.5)));\n  EXPECT_EQ(result.right_sum, quantiser.ToFixedPoint(GradientPairPrecise(1.5, 1.0)));\n}\n\nTEST(GpuHist, EvaluateSingleSplitEmpty) {\n  auto ctx = MakeCUDACtx(0);\n  TrainParam tparam = ZeroParam();\n  GPUHistEvaluator evaluator(tparam, 1, FstCU());\n  DeviceSplitCandidate result =\n      evaluator\n          .EvaluateSingleSplit(\n              &ctx, EvaluateSplitInputs{},\n              EvaluateSplitSharedInputs{\n                  GPUTrainingParam(tparam), DummyRoundingFactor(&ctx), {}, {}, {}, false})\n          .split;\n  EXPECT_EQ(result.findex, -1);\n  EXPECT_LT(result.loss_chg, 0.0f);\n}\n\n// Feature 0 has a better split, but the algorithm must select feature 1\nTEST(GpuHist, EvaluateSingleSplitFeatureSampling) {\n  auto ctx = MakeCUDACtx(0);\n  auto quantiser = DummyRoundingFactor(&ctx);\n  auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});\n  TrainParam tparam = ZeroParam();\n  tparam.UpdateAllowUnknown(Args{});\n  GPUTrainingParam param{tparam};\n\n  thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{1};\n  thrust::device_vector<uint32_t> feature_segments = std::vector<bst_idx_t>{0, 2, 4};\n  thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0, 11.0, 12.0};\n  auto feature_histogram =\n      ConvertToInteger(&ctx, {{-10.0, 0.5}, {10.0, 0.5}, {-0.5, 0.5}, {0.5, 0.5}});\n  EvaluateSplitInputs input{1, 0, parent_sum, dh::ToSpan(feature_set),\n                            dh::ToSpan(feature_histogram)};\n  EvaluateSplitSharedInputs shared_inputs{\n      param, quantiser, {}, dh::ToSpan(feature_segments), dh::ToSpan(feature_values), false};\n\n  GPUHistEvaluator evaluator(tparam, 2, FstCU());\n  DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;\n\n  EXPECT_EQ(result.findex, 1);\n  EXPECT_EQ(result.fvalue, 11.0);\n  EXPECT_EQ(result.left_sum, quantiser.ToFixedPoint(GradientPairPrecise(-0.5, 0.5)));\n  EXPECT_EQ(result.right_sum, quantiser.ToFixedPoint(GradientPairPrecise(0.5, 0.5)));\n}\n\n// Features 0 and 1 have identical gain, the algorithm must select 0\nTEST(GpuHist, EvaluateSingleSplitBreakTies) {\n  auto ctx = MakeCUDACtx(0);\n  auto quantiser = DummyRoundingFactor(&ctx);\n  auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});\n  TrainParam tparam = ZeroParam();\n  tparam.UpdateAllowUnknown(Args{});\n  GPUTrainingParam param{tparam};\n\n  thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};\n  thrust::device_vector<uint32_t> feature_segments = std::vector<bst_idx_t>{0, 2, 4};\n  thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0, 11.0, 12.0};\n  auto feature_histogram =\n      ConvertToInteger(&ctx, {{-0.5, 0.5}, {0.5, 0.5}, {-0.5, 0.5}, {0.5, 0.5}});\n  EvaluateSplitInputs input{1, 0, parent_sum, dh::ToSpan(feature_set),\n                            dh::ToSpan(feature_histogram)};\n  EvaluateSplitSharedInputs shared_inputs{\n      param, quantiser, {}, dh::ToSpan(feature_segments), dh::ToSpan(feature_values), false};\n\n  GPUHistEvaluator evaluator(tparam, 2, FstCU());\n  DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;\n\n  EXPECT_EQ(result.findex, 0);\n  EXPECT_EQ(result.fvalue, 1.0);\n}\n\nTEST(GpuHist, EvaluateSplits) {\n  auto ctx = MakeCUDACtx(0);\n  thrust::device_vector<DeviceSplitCandidate> out_splits(2);\n  auto quantiser = DummyRoundingFactor(&ctx);\n  auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});\n  TrainParam tparam = ZeroParam();\n  tparam.UpdateAllowUnknown(Args{});\n  GPUTrainingParam param{tparam};\n\n  thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};\n  thrust::device_vector<uint32_t> feature_segments = std::vector<bst_idx_t>{0, 2, 4};\n  thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0, 11.0, 12.0};\n  auto feature_histogram_left =\n      ConvertToInteger(&ctx, {{-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}});\n  auto feature_histogram_right =\n      ConvertToInteger(&ctx, {{-1.0, 0.5}, {1.0, 0.5}, {-0.5, 0.5}, {0.5, 0.5}});\n  EvaluateSplitInputs input_left{1, 0, parent_sum, dh::ToSpan(feature_set),\n                                 dh::ToSpan(feature_histogram_left)};\n  EvaluateSplitInputs input_right{2, 0, parent_sum, dh::ToSpan(feature_set),\n                                  dh::ToSpan(feature_histogram_right)};\n  EvaluateSplitSharedInputs shared_inputs{\n      param, quantiser, {}, dh::ToSpan(feature_segments), dh::ToSpan(feature_values), false};\n\n  GPUHistEvaluator evaluator{tparam, 2, FstCU()};\n  dh::device_vector<EvaluateSplitInputs> inputs =\n      std::vector<EvaluateSplitInputs>{input_left, input_right};\n  evaluator.LaunchEvaluateSplits(&ctx, input_left.feature_set.size(), dh::ToSpan(inputs),\n                                 shared_inputs, evaluator.GetEvaluator(), dh::ToSpan(out_splits));\n\n  DeviceSplitCandidate result_left = out_splits[0];\n  EXPECT_EQ(result_left.findex, 1);\n  EXPECT_EQ(result_left.fvalue, 11.0);\n\n  DeviceSplitCandidate result_right = out_splits[1];\n  EXPECT_EQ(result_right.findex, 0);\n  EXPECT_EQ(result_right.fvalue, 1.0);\n}\n\nTEST_F(TestPartitionBasedSplit, GpuHist) {\n  auto ctx = MakeCUDACtx(0);\n  dh::device_vector<FeatureType> ft{std::vector<FeatureType>{FeatureType::kCategorical}};\n  GPUHistEvaluator evaluator{param_, static_cast<bst_feature_t>(info_.num_col_), ctx.Device()};\n\n  cuts_.cut_ptrs_.SetDevice(ctx.Device());\n  cuts_.cut_values_.SetDevice(ctx.Device());\n\n  evaluator.Reset(&ctx, cuts_, dh::ToSpan(ft), info_.num_col_, param_, false);\n\n  // Convert the sample histogram to fixed point\n  auto quantiser = DummyRoundingFactor(&ctx);\n  thrust::host_vector<GradientPairInt64> h_hist;\n  for (auto e : hist_[0]) {\n    h_hist.push_back(quantiser.ToFixedPoint(e));\n  }\n  dh::device_vector<GradientPairInt64> d_hist = h_hist;\n  dh::device_vector<bst_feature_t> feature_set{std::vector<bst_feature_t>{0}};\n\n  EvaluateSplitInputs input{0, 0, quantiser.ToFixedPoint(total_gpair_), dh::ToSpan(feature_set),\n                            dh::ToSpan(d_hist)};\n  EvaluateSplitSharedInputs shared_inputs{GPUTrainingParam{param_},\n                                          quantiser,\n                                          dh::ToSpan(ft),\n                                          cuts_.cut_ptrs_.ConstDeviceSpan(),\n                                          cuts_.cut_values_.ConstDeviceSpan(),\n                                          false};\n  auto split = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;\n  ASSERT_NEAR(split.loss_chg, best_score_, 1e-2);\n}\n\nclass MGPUHistTest : public collective::BaseMGPUTest {};\n\nnamespace {\nvoid VerifyColumnSplitEvaluateSingleSplit(bool is_categorical) {\n  auto ctx = MakeCUDACtx(GPUIDX);\n  auto rank = collective::GetRank();\n  auto quantiser = DummyRoundingFactor(&ctx);\n  auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});\n  TrainParam tparam = ZeroParam();\n  GPUTrainingParam param{tparam};\n\n  common::HistogramCuts cuts{rank == 0 ? MakeCutsForTest({1.0, 2.0}, {0, 2, 2}, ctx.Device())\n                                       : MakeCutsForTest({11.0, 12.0}, {0, 0, 2}, ctx.Device())};\n  thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};\n\n  // Setup gradients so that second feature gets higher gain\n  auto feature_histogram = rank == 0 ? ConvertToInteger(&ctx, {{-0.5, 0.5}, {0.5, 0.5}})\n                                     : ConvertToInteger(&ctx, {{-1.0, 0.5}, {1.0, 0.5}});\n\n  dh::device_vector<FeatureType> feature_types(feature_set.size(), FeatureType::kCategorical);\n  common::Span<FeatureType> d_feature_types;\n  if (is_categorical) {\n    auto max_cat = *std::max_element(cuts.cut_values_.HostVector().begin(),\n                                     cuts.cut_values_.HostVector().end());\n    cuts.SetCategorical(true, max_cat);\n    d_feature_types = dh::ToSpan(feature_types);\n  }\n\n  EvaluateSplitInputs input{1, 0, parent_sum, dh::ToSpan(feature_set),\n                            dh::ToSpan(feature_histogram)};\n  EvaluateSplitSharedInputs shared_inputs{param,\n                                          quantiser,\n                                          d_feature_types,\n                                          cuts.cut_ptrs_.ConstDeviceSpan(),\n                                          cuts.cut_values_.ConstDeviceSpan(),\n                                          false};\n\n  GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), ctx.Device()};\n  evaluator.Reset(&ctx, cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, true);\n  DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;\n\n  EXPECT_EQ(result.findex, 1);\n  if (is_categorical) {\n    ASSERT_TRUE(std::isnan(result.fvalue));\n  } else {\n    EXPECT_EQ(result.fvalue, 11.0);\n  }\n  EXPECT_EQ(result.left_sum + result.right_sum, parent_sum);\n}\n}  // anonymous namespace\n\nTEST_F(MGPUHistTest, ColumnSplitEvaluateSingleSplit) {\n  if (curt::AllVisibleGPUs() > 1) {\n    // We can't emulate multiple GPUs with NCCL.\n    this->DoTest([] { VerifyColumnSplitEvaluateSingleSplit(false); }, false, true);\n  }\n  this->DoTest([] { VerifyColumnSplitEvaluateSingleSplit(false); }, true, true);\n}\n\nTEST_F(MGPUHistTest, ColumnSplitEvaluateSingleCategoricalSplit) {\n  if (curt::AllVisibleGPUs() > 1) {\n    // We can't emulate multiple GPUs with NCCL.\n    this->DoTest([] { VerifyColumnSplitEvaluateSingleSplit(true); }, false, true);\n  }\n  this->DoTest([] { VerifyColumnSplitEvaluateSingleSplit(true); }, true, true);\n}\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "tests/cpp/tree/gpu_hist/test_expand_entry.cu",
    "content": "/**\n * Copyright 2023, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/json.h>\n#include <xgboost/tree_model.h>  // for RegTree\n\n#include \"../../../../src/tree/gpu_hist/expand_entry.cuh\"\n\nnamespace xgboost::tree {\nTEST(ExpandEntry, IOGPU) {\n  DeviceSplitCandidate split;\n  GPUExpandEntry entry{RegTree::kRoot, 0, split, 3.0, 1.0, 2.0};\n\n  Json je{Object{}};\n  entry.Save(&je);\n\n  GPUExpandEntry loaded;\n  loaded.Load(je);\n\n  ASSERT_EQ(entry.base_weight, loaded.base_weight);\n  ASSERT_EQ(entry.left_weight, loaded.left_weight);\n  ASSERT_EQ(entry.right_weight, loaded.right_weight);\n\n  ASSERT_EQ(entry.GetDepth(), loaded.GetDepth());\n  ASSERT_EQ(entry.GetLossChange(), loaded.GetLossChange());\n}\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "tests/cpp/tree/gpu_hist/test_histogram.cu",
    "content": "/**\n * Copyright 2020-2026, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/context.h>  // for Context\n\n#include <memory>  // for unique_ptr\n#include <tuple>   // for tuple\n#include <vector>  // for vector\n\n#include \"../../../../src/tree/gpu_hist/expand_entry.cuh\"  // for GPUExpandEntry\n#include \"../../../../src/tree/gpu_hist/histogram.cuh\"\n#include \"../../../../src/tree/gpu_hist/row_partitioner.cuh\"  // for RowPartitioner\n#include \"../../../../src/tree/hist/hist_param.h\"             // for HistMakerTrainParam\n#include \"../../../../src/tree/param.h\"                       // for TrainParam\n#include \"../../categorical_helpers.h\"                        // for OneHotEncodeFeature\n#include \"../../helpers.h\"\n#include \"../../histogram_helpers.h\"  // for BuildEllpackPage\n#include \"dummy_quantizer.cuh\"\n\nnamespace xgboost::tree {\nTEST(Histogram, DeviceHistogramStorage) {\n  // Ensures that node allocates correctly after reaching `kStopGrowingSize`.\n  auto ctx = MakeCUDACtx(0);\n  constexpr size_t kNBins = 128;\n  constexpr int kNNodes = 4;\n  constexpr size_t kStopGrowing = kNNodes * kNBins * 2u;\n  DeviceHistogramStorage histogram{};\n  histogram.Reset(&ctx, kNBins, kNNodes);\n  for (int i = 0; i < kNNodes; ++i) {\n    histogram.AllocateHistograms(&ctx, {i});\n  }\n  ASSERT_EQ(histogram.Data().size(), kStopGrowing);\n  histogram.Reset(&ctx, kNBins, kNNodes);\n\n  // Use allocated memory but do not erase nidx_map.\n  for (int i = 0; i < kNNodes; ++i) {\n    histogram.AllocateHistograms(&ctx, {i});\n  }\n  for (int i = 0; i < kNNodes; ++i) {\n    ASSERT_TRUE(histogram.HistogramExists(i));\n  }\n\n  // Add two new nodes\n  histogram.AllocateHistograms(&ctx, {kNNodes});\n  histogram.AllocateHistograms(&ctx, {kNNodes + 1});\n\n  // Old cached nodes should still exist\n  for (int i = 0; i < kNNodes; ++i) {\n    ASSERT_TRUE(histogram.HistogramExists(i));\n  }\n\n  // Should be deleted\n  ASSERT_FALSE(histogram.HistogramExists(kNNodes));\n  // Most recent node should exist\n  ASSERT_TRUE(histogram.HistogramExists(kNNodes + 1));\n\n  // Add same node again - should fail\n  EXPECT_ANY_THROW(histogram.AllocateHistograms(&ctx, {kNNodes + 1}););\n}\n\nTEST(Histogram, SubtractionTrack) {\n  auto ctx = MakeCUDACtx(0);\n\n  auto page = BuildEllpackPage(&ctx, 64, 4);\n  auto cuts = page->CutsShared();\n  FeatureGroups fg{*cuts, true, std::numeric_limits<std::size_t>::max()};\n  auto n_total_bins = cuts->TotalBins();\n\n  // 2 nodes\n  auto max_cached_hist_nodes = 2ull;\n  DeviceHistogramBuilder histogram;\n  histogram.Reset(&ctx, max_cached_hist_nodes, n_total_bins, false);\n  histogram.AllocateHistograms(&ctx, {0, 1, 2});\n  GPUExpandEntry root;\n  root.nidx = 0;\n  auto need_build = histogram.SubtractHist<GPUExpandEntry>(&ctx, {root}, {0}, {1});\n\n  std::vector<GPUExpandEntry> candidates(2);\n  candidates[0].nidx = 1;\n  candidates[1].nidx = 2;\n\n  need_build = histogram.SubtractHist(&ctx, candidates, {3, 5}, {4, 6});\n  ASSERT_EQ(need_build.size(), 2);\n  ASSERT_EQ(need_build[0], 4);\n  ASSERT_EQ(need_build[1], 6);\n}\n\nnamespace {\nvoid TestDeterministicHistogram(bool is_dense, std::size_t shm_size, bool force_global) {\n  Context ctx = MakeCUDACtx(0);\n  size_t constexpr kBins = 256, kCols = 120, kRows = 16384, kRounds = 16;\n  float constexpr kLower = -1e-2, kUpper = 1e2;\n\n  float sparsity = is_dense ? 0.0f : 0.5f;\n  auto matrix = RandomDataGenerator(kRows, kCols, sparsity).GenerateDMatrix();\n  auto batch_param = BatchParam{kBins, tree::TrainParam::DftSparseThreshold()};\n\n  for (auto const& batch : matrix->GetBatches<EllpackPage>(&ctx, batch_param)) {\n    auto* page = batch.Impl();\n\n    tree::RowPartitioner row_partitioner;\n    row_partitioner.Reset(&ctx, kRows, page->base_rowid);\n    auto ridx = row_partitioner.GetRows(0);\n\n    bst_bin_t num_bins = kBins * kCols;\n    dh::device_vector<GradientPairInt64> histogram(num_bins);\n    auto d_histogram = dh::ToSpan(histogram);\n    auto gpair = GenerateGradientsFixedPoint(&ctx, kRows, 1, kLower, kUpper).gpair;\n\n    FeatureGroups feature_groups{page->Cuts(), page->IsDenseCompressed(), shm_size};\n\n    DeviceHistogramBuilder builder;\n    builder.Reset(&ctx, HistMakerTrainParam::CudaDefaultNodes(), num_bins, force_global);\n    page->Visit(&ctx, {}, [&](auto&& acc) {\n      builder.BuildHistogram(&ctx, acc, feature_groups.DeviceAccessor(ctx.Device()),\n                             gpair.View(ctx.Device()).Values(), ridx, d_histogram);\n    });\n\n    std::vector<GradientPairInt64> histogram_h(num_bins);\n    dh::safe_cuda(cudaMemcpy(histogram_h.data(), d_histogram.data(),\n                             num_bins * sizeof(GradientPairInt64), cudaMemcpyDeviceToHost));\n\n    for (std::size_t i = 0; i < kRounds; ++i) {\n      dh::device_vector<GradientPairInt64> new_histogram(num_bins);\n      auto d_new_histogram = dh::ToSpan(new_histogram);\n\n      DeviceHistogramBuilder builder;\n      builder.Reset(&ctx, HistMakerTrainParam::CudaDefaultNodes(), num_bins, force_global);\n      page->Visit(&ctx, {}, [&](auto&& acc) {\n        builder.BuildHistogram(&ctx, acc, feature_groups.DeviceAccessor(ctx.Device()),\n                               gpair.View(ctx.Device()).Values(), ridx, d_new_histogram);\n      });\n\n      std::vector<GradientPairInt64> new_histogram_h(num_bins);\n      dh::safe_cuda(cudaMemcpy(new_histogram_h.data(), d_new_histogram.data(),\n                               num_bins * sizeof(GradientPairInt64), cudaMemcpyDeviceToHost));\n      for (size_t j = 0; j < new_histogram_h.size(); ++j) {\n        ASSERT_EQ(new_histogram_h[j].GetQuantisedGrad(), histogram_h[j].GetQuantisedGrad());\n        ASSERT_EQ(new_histogram_h[j].GetQuantisedHess(), histogram_h[j].GetQuantisedHess());\n      }\n    }\n\n    {\n      auto gpair = GenerateGradientsFixedPoint(&ctx, kRows, 1, kLower, kUpper).gpair;\n\n      // Use a single feature group to compute the baseline.\n      FeatureGroups single_group(page->Cuts());\n\n      dh::device_vector<GradientPairInt64> baseline(num_bins);\n      DeviceHistogramBuilder builder;\n      // Single group must use global memory.\n      builder.Reset(&ctx, HistMakerTrainParam::CudaDefaultNodes(), num_bins, /*force_global=*/true);\n      page->Visit(&ctx, {}, [&](auto&& acc) {\n        builder.BuildHistogram(&ctx, acc, single_group.DeviceAccessor(ctx.Device()),\n                               gpair.View(ctx.Device()).Values(), ridx, dh::ToSpan(baseline));\n      });\n\n      std::vector<GradientPairInt64> baseline_h(num_bins);\n      dh::safe_cuda(cudaMemcpy(baseline_h.data(), baseline.data().get(),\n                               num_bins * sizeof(GradientPairInt64), cudaMemcpyDeviceToHost));\n\n      for (size_t i = 0; i < baseline.size(); ++i) {\n        ASSERT_NEAR(baseline_h[i].GetQuantisedGrad(), histogram_h[i].GetQuantisedGrad(),\n                    baseline_h[i].GetQuantisedGrad() * 1e-3);\n      }\n    }\n  }\n}\n\nclass TestGPUDeterministic : public ::testing::TestWithParam<std::tuple<bool, std::size_t, bool>> {\n protected:\n  void Run() {\n    auto [is_dense, shm_size, force_global] = this->GetParam();\n    if (shm_size > dh::MaxSharedMemoryOptin(0) && !force_global) {\n      force_global = true;  // We will have to skip this test to avoid false check in the builder.\n    }\n    TestDeterministicHistogram(is_dense, shm_size, force_global);\n  }\n};\n}  // anonymous namespace\n\nTEST_P(TestGPUDeterministic, Histogram) { this->Run(); }\n\nINSTANTIATE_TEST_SUITE_P(Histogram, TestGPUDeterministic,\n                         ::testing::Combine(::testing::Bool(),\n                                            ::testing::Values(48 * 1024, 64 * 1024, 160 * 1024),\n                                            ::testing::Bool()));\n\nvoid ValidateCategoricalHistogram(size_t n_categories, common::Span<GradientPairInt64> onehot,\n                                  common::Span<GradientPairInt64> cat) {\n  auto cat_sum = std::accumulate(cat.cbegin(), cat.cend(), GradientPairInt64{});\n  for (size_t c = 0; c < n_categories; ++c) {\n    auto zero = onehot[c * 2];\n    auto one = onehot[c * 2 + 1];\n\n    auto chosen = cat[c];\n    auto not_chosen = cat_sum - chosen;\n    ASSERT_EQ(zero, not_chosen);\n    ASSERT_EQ(one, chosen);\n  }\n}\n\n// Test 1 vs rest categorical histogram is equivalent to one hot encoded data.\nvoid TestGPUHistogramCategorical(size_t num_categories) {\n  auto ctx = MakeCUDACtx(0);\n  size_t kRows = std::max(static_cast<decltype(num_categories)>(340), num_categories);\n  size_t constexpr kBins = 256;\n  auto x = GenerateRandomCategoricalSingleColumn(kRows, num_categories);\n  auto cat_m = GetDMatrixFromData(x, kRows, 1);\n  cat_m->Info().feature_types.HostVector().push_back(FeatureType::kCategorical);\n  auto batch_param = BatchParam{kBins, tree::TrainParam::DftSparseThreshold()};\n  tree::RowPartitioner row_partitioner;\n  row_partitioner.Reset(&ctx, kRows, 0);\n  auto ridx = row_partitioner.GetRows(0);\n  dh::device_vector<GradientPairInt64> cat_hist(num_categories);\n\n  auto gpair = GenerateRandomGradients(kRows, 0, 2);\n  gpair.SetDevice(DeviceOrd::CUDA(0));\n  GradientQuantiserGroup quantiser_group{\n      &ctx, linalg::MakeVec(ctx.Device(), gpair.ConstDeviceSpan()), MetaInfo()};\n  linalg::Matrix<GradientPairInt64> gpairs_i64;\n  CalcQuantizedGpairs(&ctx, linalg::MakeTensorView(&ctx, gpair.ConstDeviceSpan(), gpair.Size(), 1),\n                      quantiser_group.DeviceSpan(), &gpairs_i64);\n  /**\n   * Generate hist with cat data.\n   */\n  for (auto const& batch : cat_m->GetBatches<EllpackPage>(&ctx, batch_param)) {\n    auto* page = batch.Impl();\n    FeatureGroups single_group(page->Cuts());\n    DeviceHistogramBuilder builder;\n    builder.Reset(&ctx, HistMakerTrainParam::CudaDefaultNodes(), num_categories, false);\n    page->Visit(&ctx, {}, [&](auto&& acc) {\n      builder.BuildHistogram(&ctx, acc, single_group.DeviceAccessor(ctx.Device()),\n                             gpairs_i64.View(ctx.Device()).Values(), ridx, dh::ToSpan(cat_hist));\n    });\n  }\n\n  /**\n   * Generate hist with one hot encoded data.\n   */\n  auto x_encoded = OneHotEncodeFeature(x, num_categories);\n  auto encode_m = GetDMatrixFromData(x_encoded, kRows, num_categories);\n  dh::device_vector<GradientPairInt64> encode_hist(2 * num_categories);\n  for (auto const& batch : encode_m->GetBatches<EllpackPage>(&ctx, batch_param)) {\n    auto* page = batch.Impl();\n    FeatureGroups single_group(page->Cuts());\n    DeviceHistogramBuilder builder;\n    builder.Reset(&ctx, HistMakerTrainParam::CudaDefaultNodes(), encode_hist.size(), false);\n    page->Visit(&ctx, {}, [&](auto&& acc) {\n      builder.BuildHistogram(&ctx, acc, single_group.DeviceAccessor(ctx.Device()),\n                             gpairs_i64.View(ctx.Device()).Values(), ridx, dh::ToSpan(encode_hist));\n    });\n  }\n\n  std::vector<GradientPairInt64> h_cat_hist(cat_hist.size());\n  thrust::copy(cat_hist.begin(), cat_hist.end(), h_cat_hist.begin());\n\n  std::vector<GradientPairInt64> h_encode_hist(encode_hist.size());\n  thrust::copy(encode_hist.begin(), encode_hist.end(), h_encode_hist.begin());\n  ValidateCategoricalHistogram(num_categories, common::Span<GradientPairInt64>{h_encode_hist},\n                               common::Span<GradientPairInt64>{h_cat_hist});\n}\n\nTEST(Histogram, GPUHistCategorical) {\n  for (size_t num_categories = 2; num_categories < 8; ++num_categories) {\n    TestGPUHistogramCategorical(num_categories);\n  }\n  // Larger than the shared memory size, must use global memory since there's no feature\n  // group with a single feature.\n  auto max_shmem = dh::MaxSharedMemoryOptin(0);\n  auto n_categories = common::DivRoundUp(max_shmem, sizeof(GradientPairInt64)) * 2;\n  TestGPUHistogramCategorical(n_categories);\n}\n\nnamespace {\n// Atomic add as type cast for test.\nXGBOOST_DEV_INLINE int64_t atomicAdd(int64_t* dst, int64_t src) {  // NOLINT\n  uint64_t* u_dst = reinterpret_cast<uint64_t*>(dst);\n  uint64_t u_src = *reinterpret_cast<uint64_t*>(&src);\n  uint64_t ret = ::atomicAdd(u_dst, u_src);\n  return *reinterpret_cast<int64_t*>(&ret);\n}\n}  // namespace\n\nvoid TestAtomicAdd() {\n  size_t n_elements = 1024;\n  dh::device_vector<int64_t> result_a(1, 0);\n  auto d_result_a = result_a.data().get();\n\n  dh::device_vector<int64_t> result_b(1, 0);\n  auto d_result_b = result_b.data().get();\n\n  /**\n   * Test for simple inputs\n   */\n  std::vector<int64_t> h_inputs(n_elements);\n  for (size_t i = 0; i < h_inputs.size(); ++i) {\n    h_inputs[i] = (i % 2 == 0) ? i : -i;\n  }\n  dh::device_vector<int64_t> inputs(h_inputs);\n  auto d_inputs = inputs.data().get();\n\n  dh::LaunchN(n_elements, [=] __device__(size_t i) {\n    AtomicAdd64As32(d_result_a, d_inputs[i]);\n    atomicAdd(d_result_b, d_inputs[i]);\n  });\n  ASSERT_EQ(result_a[0], result_b[0]);\n\n  /**\n   * Test for positive values that don't fit into 32 bit integer.\n   */\n  thrust::fill(inputs.begin(), inputs.end(), (std::numeric_limits<uint32_t>::max() / 2));\n  thrust::fill(result_a.begin(), result_a.end(), 0);\n  thrust::fill(result_b.begin(), result_b.end(), 0);\n  dh::LaunchN(n_elements, [=] __device__(size_t i) {\n    AtomicAdd64As32(d_result_a, d_inputs[i]);\n    atomicAdd(d_result_b, d_inputs[i]);\n  });\n  ASSERT_EQ(result_a[0], result_b[0]);\n  ASSERT_GT(result_a[0], std::numeric_limits<uint32_t>::max());\n  CHECK_EQ(thrust::reduce(inputs.begin(), inputs.end(), int64_t(0)), result_a[0]);\n\n  /**\n   * Test for negative values that don't fit into 32 bit integer.\n   */\n  thrust::fill(inputs.begin(), inputs.end(), (std::numeric_limits<int32_t>::min() / 2));\n  thrust::fill(result_a.begin(), result_a.end(), 0);\n  thrust::fill(result_b.begin(), result_b.end(), 0);\n  dh::LaunchN(n_elements, [=] __device__(size_t i) {\n    AtomicAdd64As32(d_result_a, d_inputs[i]);\n    atomicAdd(d_result_b, d_inputs[i]);\n  });\n  ASSERT_EQ(result_a[0], result_b[0]);\n  ASSERT_LT(result_a[0], std::numeric_limits<int32_t>::min());\n  CHECK_EQ(thrust::reduce(inputs.begin(), inputs.end(), int64_t(0)), result_a[0]);\n}\n\nTEST(Histogram, AtomicAddInt64) { TestAtomicAdd(); }\n\nTEST(Histogram, Quantiser) {\n  auto ctx = MakeCUDACtx(0);\n  std::size_t n_samples{16};\n  HostDeviceVector<GradientPair> gpair(n_samples, GradientPair{1.0, 1.0});\n  gpair.SetDevice(ctx.Device());\n\n  GradientQuantiserGroup quantiser_group{\n      &ctx, linalg::MakeVec(ctx.Device(), gpair.ConstDeviceSpan()), MetaInfo()};\n  auto quantiser = quantiser_group[0];\n  for (auto v : gpair.ConstHostVector()) {\n    auto gh = quantiser.ToFloatingPoint(quantiser.ToFixedPoint(v));\n    ASSERT_EQ(gh.GetGrad(), 1.0);\n    ASSERT_EQ(gh.GetHess(), 1.0);\n  }\n}\nnamespace {\nenum CacheMode {\n  kNoCache = 0,\n  kCopy = 1,\n  kDirect = 2,\n};\n\nclass HistogramExternalMemoryTest\n    : public ::testing::TestWithParam<std::tuple<float, bool, CacheMode>> {\n public:\n  void Run(float sparsity, bool force_global, CacheMode cache_mode) {\n    auto ctx = MakeCUDACtx(0);\n    bst_idx_t n_samples{512}, n_features{12}, n_batches{3};\n    std::vector<std::unique_ptr<RowPartitioner>> partitioners;\n    auto rng = RandomDataGenerator{n_samples, n_features, sparsity}.Batches(n_batches);\n    bst_bin_t n_bins = 16;\n    std::shared_ptr<DMatrix> p_fmat;\n    switch (cache_mode) {\n      case kCopy:\n      case kDirect: {\n        p_fmat = rng.CacheHostRatio(0.5)\n                     .Device(ctx.Device())\n                     .Bins(n_bins)\n                     .OnHost(true)\n                     .MinPageCacheBytes(n_bins * n_features)\n                     .GenerateExtMemQuantileDMatrix(\"cache\", true);\n        break;\n      }\n      case kNoCache: {\n        p_fmat = rng.GenerateSparsePageDMatrix(\"cache\", true);\n        break;\n      }\n    }\n\n    BatchParam p{n_bins, TrainParam::DftSparseThreshold()};\n    if (cache_mode == kDirect) {\n      p.prefetch_copy = false;\n    } else if (cache_mode == kCopy) {\n      p.prefetch_copy = true;\n    }\n\n    std::unique_ptr<FeatureGroups> fg;\n    dh::device_vector<GradientPairInt64> single_hist;\n    dh::device_vector<GradientPairInt64> multi_hist;\n\n    auto gpair = GenerateGradientsFixedPoint(&ctx, n_samples).gpair;\n    std::shared_ptr<common::HistogramCuts> cuts;\n\n    std::size_t row_stride = 0;\n    {\n      /**\n       * Multi page.\n       */\n      std::int32_t k{0};\n      for (auto const& page : p_fmat->GetBatches<EllpackPage>(&ctx, p)) {\n        auto impl = page.Impl();\n        row_stride = impl->info.row_stride;\n        if (k == 0) {\n          // Initialization\n          fg = std::make_unique<FeatureGroups>(impl->Cuts());\n          auto init = GradientPairInt64{0, 0};\n          multi_hist = decltype(multi_hist)(impl->Cuts().TotalBins(), init);\n          single_hist = decltype(single_hist)(impl->Cuts().TotalBins(), init);\n          cuts = std::make_shared<common::HistogramCuts>(impl->Cuts());\n        }\n\n        partitioners.emplace_back(std::make_unique<RowPartitioner>());\n        partitioners.back()->Reset(&ctx, impl->Size(), impl->base_rowid);\n\n        auto ridx = partitioners.at(k)->GetRows(0);\n        auto d_histogram = dh::ToSpan(multi_hist);\n        DeviceHistogramBuilder builder;\n        builder.Reset(&ctx, HistMakerTrainParam::CudaDefaultNodes(), d_histogram.size(),\n                      force_global);\n        impl->Visit(&ctx, {}, [&](auto&& acc) {\n          builder.BuildHistogram(&ctx, acc, fg->DeviceAccessor(ctx.Device()),\n                                 gpair.View(ctx.Device()).Values(), ridx, d_histogram);\n        });\n        ++k;\n      }\n      ASSERT_EQ(k, n_batches);\n    }\n\n    {\n      /**\n       * Single page.\n       */\n      RowPartitioner partitioner;\n      partitioner.Reset(&ctx, p_fmat->Info().num_row_, 0);\n\n      auto concat = EllpackPageImpl(&ctx, cuts, sparsity == 0.0, row_stride, n_samples);\n      std::vector<float> hess(p_fmat->Info().num_row_, 1.0f);\n      std::size_t offset = 0;\n      for (auto const& page : p_fmat->GetBatches<EllpackPage>(&ctx, p)) {\n        bst_idx_t num_elements = concat.Copy(&ctx, page.Impl(), offset);\n        offset += num_elements;\n      }\n      auto ridx = partitioner.GetRows(0);\n      auto d_histogram = dh::ToSpan(single_hist);\n      DeviceHistogramBuilder builder;\n      builder.Reset(&ctx, HistMakerTrainParam::CudaDefaultNodes(), d_histogram.size(),\n                    force_global);\n      concat.Visit(&ctx, {}, [&](auto&& acc) {\n        builder.BuildHistogram(&ctx, acc, fg->DeviceAccessor(ctx.Device()),\n                               gpair.View(ctx.Device()).Values(), ridx, d_histogram);\n      });\n    }\n\n    std::vector<GradientPairInt64> h_single(single_hist.size());\n    thrust::copy(single_hist.begin(), single_hist.end(), h_single.begin());\n    std::vector<GradientPairInt64> h_multi(multi_hist.size());\n    thrust::copy(multi_hist.begin(), multi_hist.end(), h_multi.begin());\n\n    for (std::size_t i = 0; i < single_hist.size(); ++i) {\n      ASSERT_EQ(h_single[i].GetQuantisedGrad(), h_multi[i].GetQuantisedGrad()) << i;\n      ASSERT_EQ(h_single[i].GetQuantisedHess(), h_multi[i].GetQuantisedHess());\n    }\n  }\n};\n}  // namespace\n\nTEST_P(HistogramExternalMemoryTest, ExternalMemory) {\n  std::apply(&HistogramExternalMemoryTest::Run, std::tuple_cat(std::make_tuple(this), GetParam()));\n}\n\nINSTANTIATE_TEST_SUITE_P(\n    Histogram, HistogramExternalMemoryTest,\n    ::testing::Combine(::testing::Values(0.0f, 0.2f, 0.8f), ::testing::Bool(),\n                       ::testing::Values(kNoCache, kDirect, kCopy)),\n    [](::testing::TestParamInfo<HistogramExternalMemoryTest::ParamType> const& info) {\n      std::stringstream ss;\n      auto const& p = info.param;\n      ss << \"sparsity_0\" << (std::get<0>(p) * 10) << \"_global_\" << std::get<1>(p) << \"_dcache_\";\n      switch (std::get<2>(p)) {\n        case kNoCache:\n          ss << \"nocache\";\n          break;\n        case kDirect:\n          ss << \"direct\";\n          break;\n        case kCopy:\n          ss << \"copy\";\n          break;\n      }\n      return ss.str();\n    });\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "tests/cpp/tree/gpu_hist/test_leaf_sum.cu",
    "content": "/**\n * Copyright 2025, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <thrust/sequence.h>  // for sequence\n#include <xgboost/linalg.h>   // for Constant\n\n#include <vector>  // for vector\n\n#include \"../../../../src/common/device_vector.cuh\"\n#include \"../../../../src/tree/gpu_hist/leaf_sum.cuh\"\n#include \"../../../../src/tree/gpu_hist/row_partitioner.cuh\"  // for LeafInfo\n#include \"../../helpers.h\"\n#include \"dummy_quantizer.cuh\"  // for MakeDummyQuantizers\n\nnamespace xgboost::tree::cuda_impl {\nTEST(LeafGradSum, Basic) {\n  auto ctx = MakeCUDACtx(0);\n\n  bst_target_t n_targets = 2;\n  bst_idx_t n_samples = 6;\n  bst_idx_t n_leaves = 2;\n\n  // Create leaf information\n  std::vector<LeafInfo> h_leaves(n_leaves);\n  h_leaves[0].nidx = 1;\n  h_leaves[0].node.segment = Segment{0, 3};\n  h_leaves[1].nidx = 2;\n  h_leaves[1].node.segment = Segment{3, 6};\n\n  auto gpairs = linalg::Constant(&ctx, GradientPair{1.0f, 1.0f}, n_samples, n_targets);\n\n  dh::device_vector<RowIndexT> sorted_ridx(n_samples);\n  thrust::sequence(ctx.CUDACtx()->CTP(), sorted_ridx.begin(), sorted_ridx.end(), 0);\n\n  auto quantizers = MakeDummyQuantizers(n_targets);\n  auto out_sum = linalg::Constant(&ctx, GradientPairInt64{}, n_leaves, n_targets);\n\n  LeafGradSum(&ctx, h_leaves, dh::ToSpan(quantizers), dh::ToSpan(sorted_ridx),\n              gpairs.View(ctx.Device()), out_sum.View(ctx.Device()));\n\n  for (auto v : out_sum.HostView()) {\n    ASSERT_EQ(v.GetQuantisedGrad(), 3);\n    ASSERT_EQ(v.GetQuantisedHess(), 3);\n  }\n}\n}  // namespace xgboost::tree::cuda_impl\n"
  },
  {
    "path": "tests/cpp/tree/gpu_hist/test_multi_evaluate_splits.cu",
    "content": "/**\n * Copyright 2025, XGBoost contributors\n */\n#include <gtest/gtest.h>\n\n#include \"../../../../src/tree/gpu_hist/evaluate_splits.cuh\"\n#include \"../../../../src/tree/gpu_hist/multi_evaluate_splits.cuh\"\n#include \"../../helpers.h\"\n#include \"dummy_quantizer.cuh\"  // for MakeDummyQuantizers\n\nnamespace xgboost::tree::cuda_impl {\nclass GpuMultiHistEvaluatorBasicTest : public ::testing::Test {\n public:\n  Context ctx{MakeCUDACtx(0)};\n  bst_target_t n_targets = 2;\n  bst_bin_t n_bins_per_feat_tar = 4;\n\n  dh::device_vector<GradientPairInt64> parent_sum;\n  dh::device_vector<GradientPairInt64> histogram;\n  MultiEvaluateSplitInputs input;\n  dh::device_vector<GradientQuantiser> quantizers;\n  MultiEvaluateSplitSharedInputs shared_inputs;\n\n  dh::device_vector<bst_feature_t> feature_segments;\n  dh::device_vector<bst_feature_t> feature_set;\n  dh::device_vector<float> feature_values{.0f, .1f, .2f, .3f};\n\n  void SetUp() override {\n    input.nidx = 0;\n    input.depth = 0;\n\n    parent_sum.resize(n_targets);\n    parent_sum[0] = GradientPairInt64{56, 40};\n    parent_sum[1] = GradientPairInt64{96, 128};\n\n    histogram.resize(n_bins_per_feat_tar * n_targets);\n    // first target, dense,                    // 0/0, 56/40\n    histogram[0] = GradientPairInt64{8, 4};    // 8/4, 48/36\n    histogram[1] = GradientPairInt64{12, 8};   // 20/12, 36/28\n    histogram[2] = GradientPairInt64{16, 12};  // 36/24, 20/16\n    histogram[3] = GradientPairInt64{20, 16};  // 56/40, 0/0\n\n    // second target, dense                    // 0/0,  96/128\n    histogram[4] = GradientPairInt64{11, 13};  // 11/13, 85/115\n    histogram[5] = GradientPairInt64{19, 29};  // 30/42, 66/86\n    histogram[6] = GradientPairInt64{27, 45};  // 57/87, 39/41\n    histogram[7] = GradientPairInt64{39, 41};  // 96/128, 0/0\n\n    input.parent_sum = dh::ToSpan(parent_sum);\n    input.histogram = dh::ToSpan(histogram);\n\n    quantizers = MakeDummyQuantizers(2);\n\n    shared_inputs.roundings = dh::ToSpan(quantizers);\n\n    feature_segments.resize(2);\n    feature_segments[0] = 0;\n    feature_segments[1] = static_cast<bst_feature_t>(n_bins_per_feat_tar);\n    shared_inputs.feature_segments = dh::ToSpan(feature_segments);\n\n    feature_set.resize(1, 0);\n    input.feature_set = dh::ToSpan(feature_set);\n\n    shared_inputs.feature_values = dh::ToSpan(feature_values).data();\n\n    shared_inputs.n_bins_per_feat_tar = n_bins_per_feat_tar;\n    shared_inputs.max_active_feature = 1;\n\n    TrainParam param;\n    param.Init(Args{{\"min_child_weight\", \"0\"}, {\"reg_lambda\", \"0\"}, {\"learning_rate\", \"1\"}});\n    shared_inputs.param = GPUTrainingParam{param};\n  }\n\n  void TestEmptyHess() {\n    // Turn all Hessian values into 0.\n    thrust::transform(histogram.begin(), histogram.end(), histogram.begin(),\n                      [] XGBOOST_DEVICE(GradientPairInt64 const& bin) {\n                        return GradientPairInt64{bin.GetQuantisedGrad(), 0};\n                      });\n    MultiHistEvaluator evaluator;\n    auto candidate = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs);\n    TrainParam param;\n    param.Init(Args{});\n    ASSERT_FALSE(candidate.IsValid(param, 100));\n  }\n};\n\nnamespace {\ntemplate <typename T, typename V = std::remove_cv_t<T>>\nvoid AssertDeviceVecEq(common::Span<T> span, std::vector<V> const& exp) {\n  std::vector<V> h_vec(span.size());\n  dh::CopyDeviceSpanToVector(&h_vec, span);\n  AssertVecEq(h_vec, exp);\n}\n}  // namespace\n\nTEST_F(GpuMultiHistEvaluatorBasicTest, Root) {\n  using OnePass = MultiEvaluateSplitSharedInputs;\n\n  std::vector<GradientPairInt64> exp_left_sum{{36, 24}, {57, 87}};\n  std::vector<GradientPairInt64> exp_right_sum{{20, 16}, {39, 41}};\n  std::vector<float> exp_base_weight{-1.4, -0.75};\n  std::vector<float> exp_left_weight{-1.5, -0.655172};\n  std::vector<float> exp_right_weight{-1.25, -0.951219};\n\n  for (auto one_pass : {OnePass::kNone, OnePass::kForward, OnePass::kBackward}) {\n    auto shared = this->shared_inputs;\n    shared.one_pass = one_pass;\n    MultiHistEvaluator evaluator;\n    auto candidate = evaluator.EvaluateSingleSplit(&ctx, input, shared);\n    ASSERT_NEAR(candidate.split.loss_chg, 3.04239, 1e-5);\n\n    std::vector<float> base, left, right;\n    evaluator.CopyNodeWeightsToHost(candidate.nidx, candidate.base_weight.size(), &base, &left,\n                                    &right);\n    AssertVecEq(base, exp_base_weight);\n    AssertVecEq(left, exp_left_weight);\n    AssertVecEq(right, exp_right_weight);\n\n    std::stringstream ss;\n    ss << candidate;\n    auto str = ss.str();\n    if (one_pass != OnePass::kBackward) {\n      ASSERT_NE(str.find(\"left_sum\"), std::string::npos);\n      ASSERT_EQ(str.find(\"right_sum\"), std::string::npos);\n      AssertDeviceVecEq(candidate.split.child_sum, exp_left_sum);\n    } else {\n      ASSERT_EQ(str.find(\"left_sum\"), std::string::npos);\n      ASSERT_NE(str.find(\"right_sum\"), std::string::npos);\n      AssertDeviceVecEq(candidate.split.child_sum, exp_right_sum);\n    }\n  }\n}\n\nTEST_F(GpuMultiHistEvaluatorBasicTest, EmptyHess) { this->TestEmptyHess(); }\n}  // namespace xgboost::tree::cuda_impl\n"
  },
  {
    "path": "tests/cpp/tree/gpu_hist/test_multi_histogram.cu",
    "content": "/**\n * Copyright 2025-2026, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <thrust/sequence.h>\n\n#include <cuda/functional>\n\n#include \"../../../../src/tree/gpu_hist/histogram.cuh\"\n#include \"../../helpers.h\"\n#include \"../../histogram_helpers.h\"\n#include \"dummy_quantizer.cuh\"  // for MakeDummyQuantizers\n\nnamespace xgboost::tree::cuda_impl {\nclass MultiHistTest\n    : public ::testing::TestWithParam<std::tuple<bst_idx_t, bst_feature_t, bst_target_t, bool>> {\n public:\n  Context ctx{MakeCUDACtx(0)};\n\n  bst_bin_t n_bins = 256;\n\n  bst_target_t n_targets{0};\n  bst_feature_t n_features{0};\n\n  bst_idx_t n_samples{0};\n\n  std::unique_ptr<EllpackPageImpl> page;\n\n  std::shared_ptr<common::HistogramCuts const> cuts;\n  std::unique_ptr<FeatureGroups> p_fg;\n\n  DeviceHistogramBuilder histogram;\n  common::Span<GradientPairInt64> node_hist;\n  linalg::Matrix<GradientPair> gpairs;\n  linalg::Matrix<GradientPairInt64> gpairs_i64;\n  dh::device_vector<std::uint32_t> ridx;\n  dh::device_vector<GradientQuantiser> quantizers;\n\n  void SetUp() override {\n    bool force_global = false;\n    std::tie(this->n_samples, this->n_features, this->n_targets, force_global) = this->GetParam();\n\n    this->page = MakeEllpackForTest(&ctx, n_samples, n_features, n_bins);\n    this->cuts = page->CutsShared();\n\n    this->p_fg = std::make_unique<FeatureGroups>(*cuts, true, DftMtHistShmemBytes(ctx.Ordinal()));\n\n    this->gpairs = linalg::Constant(&ctx, GradientPair{1.0f, 1.0f}, n_samples, n_targets);\n    this->quantizers = MakeDummyQuantizers(n_targets);\n    CalcQuantizedGpairs(&this->ctx, this->gpairs.View(this->ctx.Device()),\n                        dh::ToSpan(this->quantizers), &gpairs_i64);\n\n    bst_bin_t n_total_bins = n_targets * n_features * n_bins;\n    this->histogram.Reset(&ctx, /*max_cached_hist_nodes=*/3, n_total_bins, force_global);\n\n    this->ridx.resize(n_samples);\n    thrust::sequence(ctx.CUDACtx()->CTP(), ridx.begin(), ridx.end(), 0);\n\n    this->histogram.AllocateHistograms(&ctx, {0});\n    this->node_hist = histogram.GetNodeHistogram(0);\n  }\n\n  void TestMtBuild() {\n    auto ridxs = dh::device_vector<common::Span<std::uint32_t const>>{dh::ToSpan(ridx)};\n    auto hists = dh::device_vector<common::Span<GradientPairInt64>>{node_hist};\n    auto sizes_cum = std::vector<std::size_t>{0, ridx.size()};\n\n    this->histogram.BuildHistogram(\n        &this->ctx, page->GetDeviceEllpack(&ctx, {}), p_fg->DeviceAccessor(ctx.Device()),\n        gpairs_i64.View(this->ctx.Device()), dh::ToSpan(ridxs), dh::ToSpan(hists), sizes_cum);\n\n    auto d_hist = this->node_hist;\n    std::vector<GradientPairInt64> h_hist(d_hist.size());\n    dh::CopyDeviceSpanToVector(&h_hist, d_hist);\n    // The values are evenly distributed across all bins\n    auto expected = n_samples / n_bins;\n    std::int32_t k = 0;\n    for (auto v : h_hist) {\n      ASSERT_EQ(v.GetQuantisedGrad(), expected) << \" k:\" << k;\n      ASSERT_EQ(v.GetQuantisedHess(), expected) << \" k:\" << k;\n      ++k;\n    }\n  }\n\n  void TestMtChildrenBuild() {\n    auto d_ridx = dh::ToSpan(ridx);\n    auto ridxs = dh::device_vector<common::Span<std::uint32_t const>>{\n        d_ridx.subspan(0, n_samples / 4), d_ridx.subspan(n_samples / 4)};\n    auto sizes_cum = std::vector<std::size_t>{0, n_samples / 4, n_samples};\n    this->histogram.AllocateHistograms(&ctx, {1, 2});\n    auto hists = dh::device_vector<common::Span<GradientPairInt64>>{\n        this->histogram.GetNodeHistogram(1), this->histogram.GetNodeHistogram(2)};\n\n    this->histogram.BuildHistogram(\n        &this->ctx, page->GetDeviceEllpack(&ctx, {}), p_fg->DeviceAccessor(ctx.Device()),\n        gpairs_i64.View(this->ctx.Device()), dh::ToSpan(ridxs), dh::ToSpan(hists), sizes_cum);\n\n    auto d_hist_1 = this->histogram.GetNodeHistogram(1);\n    auto d_hist_2 = this->histogram.GetNodeHistogram(2);\n    std::vector<GradientPairInt64> h_hist_1(d_hist_1.size());\n    std::vector<GradientPairInt64> h_hist_2(d_hist_2.size());\n    dh::CopyDeviceSpanToVector(&h_hist_1, d_hist_1);\n    dh::CopyDeviceSpanToVector(&h_hist_2, d_hist_2);\n    ASSERT_EQ(h_hist_1.size(), h_hist_2.size());\n\n    // The values are evenly distributed across all bins\n    auto expected = n_samples / n_bins;\n\n    for (std::size_t i = 0; i < h_hist_1.size(); ++i) {\n      ASSERT_EQ(h_hist_1[i].GetQuantisedHess() + h_hist_2[i].GetQuantisedHess(), expected)\n          << \"i:\" << i << \" l:\" << h_hist_1[i].GetQuantisedHess()\n          << \" r:\" << h_hist_2[i].GetQuantisedHess();\n    }\n  }\n};\n\nTEST_P(MultiHistTest, Root) { this->TestMtBuild(); }\n\nTEST_P(MultiHistTest, Children) { this->TestMtChildrenBuild(); }\n\nnamespace {\nstd::string TestName(::testing::TestParamInfo<MultiHistTest::ParamType> const& info) {\n  std::stringstream ss;\n  auto [n_samples, n_features, n_targets, global] = info.param;\n  ss << \"n_samples_\" << n_samples << \"_n_features_\" << n_features << \"_n_targets_\" << n_targets\n     << \"_global_\" << global;\n  return ss.str();\n}\n}  // namespace\n\nINSTANTIATE_TEST_SUITE_P(Basic, MultiHistTest,\n                         ::testing::Combine(::testing::Values<bst_idx_t>(256, 1024, 8192),\n                                            ::testing::Values(1, 128, 257),\n                                            ::testing::Values(1, 16), ::testing::Bool()),\n                         TestName);\n\nINSTANTIATE_TEST_SUITE_P(Large, MultiHistTest,\n                         ::testing::Combine(::testing::Values<bst_idx_t>((1ul << 21)),\n                                            ::testing::Values(2), ::testing::Values(2),\n                                            ::testing::Bool()),\n                         TestName);\n}  // namespace xgboost::tree::cuda_impl\n"
  },
  {
    "path": "tests/cpp/tree/gpu_hist/test_row_partitioner.cu",
    "content": "/**\n * Copyright 2019-2025, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <thrust/device_vector.h>\n#include <thrust/sort.h>    // for sort\n#include <thrust/unique.h>  // for unique\n#include <xgboost/base.h>\n#include <xgboost/tree_model.h>  // for RegTree\n\n#include <cstddef>   // for size_t\n#include <cstdint>   // for uint32_t\n#include <iterator>  // for distance\n#include <vector>    // for vector\n\n#include \"../../../../src/data/ellpack_page.cuh\"\n#include \"../../../../src/tree/gpu_hist/expand_entry.cuh\"  // for GPUExpandEntry\n#include \"../../../../src/tree/gpu_hist/row_partitioner.cuh\"\n#include \"../../../../src/tree/param.h\"    // for TrainParam\n#include \"../../collective/test_worker.h\"  // for TestDistributedGlobal\n#include \"../../helpers.h\"                 // for RandomDataGenerator\n\nnamespace xgboost::tree {\nvoid TestUpdatePositionBatch() {\n  const int kNumRows = 10;\n  auto ctx = MakeCUDACtx(0);\n  RowPartitioner rp;\n  rp.Reset(&ctx, kNumRows, 0);\n  auto rows = rp.GetRowsHost(0);\n  EXPECT_EQ(rows.size(), kNumRows);\n  for (auto i = 0ull; i < kNumRows; i++) {\n    EXPECT_EQ(rows[i], i);\n  }\n  std::vector<int> extra_data = {0};\n  dh::DeviceUVector<cuda_impl::RowIndexT> ridx_tmp(kNumRows);\n  // Send the first five training instances to the right node\n  // and the second 5 to the left node\n  rp.UpdatePositionBatch(\n      &ctx, {0}, {1}, {2}, extra_data, dh::ToSpan(ridx_tmp),\n      [=] __device__(RowPartitioner::RowIndexT ridx, int, int) { return ridx > 4; });\n  rows = rp.GetRowsHost(1);\n  for (auto r : rows) {\n    EXPECT_GT(r, 4);\n  }\n  rows = rp.GetRowsHost(2);\n  for (auto r : rows) {\n    EXPECT_LT(r, 5);\n  }\n\n  // Split the left node again\n  rp.UpdatePositionBatch(\n      &ctx, {1}, {3}, {4}, extra_data, dh::ToSpan(ridx_tmp),\n      [=] __device__(RowPartitioner::RowIndexT ridx, int, int) { return ridx < 7; });\n  EXPECT_EQ(rp.GetRows(3).size(), 2);\n  EXPECT_EQ(rp.GetRows(4).size(), 3);\n}\n\nTEST(RowPartitioner, Batch) { TestUpdatePositionBatch(); }\n\nvoid TestSortPositionBatch(const std::vector<int>& ridx_in, const std::vector<Segment>& segments) {\n  auto ctx = MakeCUDACtx(0);\n  thrust::device_vector<cuda_impl::RowIndexT> ridx = ridx_in;\n  thrust::device_vector<cuda_impl::RowIndexT> ridx_tmp(ridx_in.size());\n  thrust::device_vector<cuda_impl::RowIndexT> counts(segments.size());\n\n  auto op = [=] __device__(auto ridx, int split_index, int data) {\n    return ridx % 2 == 0;\n  };\n  std::vector<int> op_data(segments.size());\n  std::vector<PerNodeData<int>> h_batch_info(segments.size());\n  dh::TemporaryArray<PerNodeData<int>> d_batch_info(segments.size());\n\n  std::size_t total_rows = 0;\n  for (size_t i = 0; i < segments.size(); i++) {\n    h_batch_info[i] = {segments.at(i), 0};\n    total_rows += segments.at(i).Size();\n  }\n  dh::safe_cuda(cudaMemcpyAsync(d_batch_info.data().get(), h_batch_info.data(),\n                                h_batch_info.size() * sizeof(PerNodeData<int>), cudaMemcpyDefault,\n                                nullptr));\n  dh::DeviceUVector<std::int8_t> tmp;\n  SortPositionBatch<decltype(op), int>(&ctx, dh::ToSpan(d_batch_info), dh::ToSpan(ridx),\n                                       dh::ToSpan(ridx_tmp), dh::ToSpan(counts), total_rows, op,\n                                       &tmp);\n\n  auto op_without_data = [=] __device__(auto ridx) {\n    return ridx % 2 == 0;\n  };\n  for (size_t i = 0; i < segments.size(); i++) {\n    auto begin = ridx.begin() + segments[i].begin;\n    auto end = ridx.begin() + segments[i].end;\n    bst_uint count = counts[i];\n    auto left_partition_count =\n        thrust::count_if(thrust::device, begin, begin + count, op_without_data);\n    EXPECT_EQ(left_partition_count, count);\n    auto right_partition_count =\n        thrust::count_if(thrust::device, begin + count, end, op_without_data);\n    EXPECT_EQ(right_partition_count, 0);\n  }\n}\n\nTEST(RowPartitioner, SortPositionBatch) {\n  TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{0, 3}, {3, 6}});\n  TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{0, 1}, {3, 6}});\n  TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{0, 6}});\n  TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{3, 6}, {0, 2}});\n}\n\nnamespace {\nvoid GetSplit(RegTree* tree, float split_value, std::vector<GPUExpandEntry>* candidates) {\n  CHECK(!tree->IsMultiTarget());\n  tree->ExpandNode(\n      /*nid=*/RegTree::kRoot, /*split_index=*/0, /*split_value=*/split_value,\n      /*default_left=*/true, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,\n      /*left_sum=*/0.0f,\n      /*right_sum=*/0.0f);\n  candidates->front().nidx = 0;\n  candidates->front().depth = 0;\n  candidates->front().split.fvalue = split_value;\n  candidates->front().split.findex = 0;\n}\n\nnamespace {\ntemplate <typename Accessor>\nstruct LessThanOp {\n  Accessor acc;\n  explicit LessThanOp(Accessor acc) : acc{acc} {}\n  __device__ bool operator()(bst_idx_t ridx, std::int32_t nidx_in_batch,\n                             RegTree::Node const& node) const {\n    auto fvalue = acc.GetFvalue(ridx, node.SplitIndex());\n    return fvalue <= node.SplitCond();\n  }\n};\n}  // namespace\n\nvoid TestExternalMemory() {\n  auto ctx = MakeCUDACtx(0);\n\n  bst_bin_t max_bin = 32;\n  auto p_fmat =\n      RandomDataGenerator{256, 16, 0.0f}.Batches(4).GenerateSparsePageDMatrix(\"temp\", true);\n\n  std::vector<std::unique_ptr<RowPartitioner>> partitioners;\n  RegTree tree;\n  std::vector<GPUExpandEntry> candidates(1);\n\n  auto param = BatchParam{max_bin, TrainParam::DftSparseThreshold()};\n  float split_value{0.0f};\n  bst_feature_t const split_ind = 0;\n  dh::device_vector<bst_node_t> position(p_fmat->Info().num_row_, 0);\n\n  auto encode_op = [=] __device__(bst_idx_t, bst_node_t nidx) {\n    return nidx;\n  };  // NOLINT\n\n  for (auto const& page : p_fmat->GetBatches<EllpackPage>(&ctx, param)) {\n    if (partitioners.empty()) {\n      auto ptr = page.Impl()->Cuts().Ptrs()[split_ind + 1];\n      split_value = page.Impl()->Cuts().Values().at(ptr / 2);\n      GetSplit(&tree, split_value, &candidates);\n    }\n\n    partitioners.emplace_back(std::make_unique<RowPartitioner>());\n    partitioners.back()->Reset(&ctx, page.Size(), page.BaseRowId());\n    dh::DeviceUVector<cuda_impl::RowIndexT> ridx_tmp(page.Size());\n    std::vector<RegTree::Node> splits{tree[0]};\n    page.Impl()->Visit(&ctx, {}, [&](auto&& acc) {\n      partitioners.back()->UpdatePositionBatch(&ctx, {0}, {1}, {2}, splits, dh::ToSpan(ridx_tmp),\n                                               LessThanOp{acc});\n    });\n    partitioners.back()->FinalisePosition(\n        &ctx, dh::ToSpan(position).subspan(page.BaseRowId(), page.Size()), page.BaseRowId(),\n        encode_op);\n  }\n\n  bst_idx_t n_left{0};\n  for (auto const& page : p_fmat->GetBatches<SparsePage>()) {\n    auto batch = page.GetView();\n    for (size_t i = 0; i < batch.Size(); ++i) {\n      if (batch[i][split_ind].fvalue < split_value) {\n        n_left++;\n      }\n    }\n  }\n\n  RegTree::Node node = tree[RegTree::kRoot];\n  auto n_left_pos =\n      thrust::count_if(position.cbegin(), position.cend(),\n                       [=] XGBOOST_DEVICE(bst_node_t v) { return v == node.LeftChild(); });\n  ASSERT_EQ(n_left, n_left_pos);\n  thrust::sort(position.begin(), position.end());\n  auto end_it = thrust::unique(position.begin(), position.end());\n  ASSERT_EQ(std::distance(position.begin(), end_it), 2);\n}\n}  // anonymous namespace\n\nTEST(RowPartitioner, LeafPartitionExternalMemory) { TestExternalMemory(); }\n\nnamespace {\nvoid TestEmptyNode(std::int32_t n_workers) {\n  collective::TestDistributedGlobal(n_workers, [] {\n    auto ctx = MakeCUDACtx(DistGpuIdx());\n    RowPartitioner partitioner;\n    bst_idx_t n_samples = (collective::GetRank() == 0) ? 0 : 1024;\n    bst_idx_t base_rowid = 0;\n    partitioner.Reset(&ctx, n_samples, base_rowid);\n    std::vector<RegTree::Node> splits(1);\n    dh::DeviceUVector<cuda_impl::RowIndexT> ridx_tmp(n_samples);\n    partitioner.UpdatePositionBatch(\n        &ctx, {0}, {1}, {2}, splits, dh::ToSpan(ridx_tmp),\n        [] XGBOOST_DEVICE(bst_idx_t ridx, std::int32_t /*nidx_in_batch*/, RegTree::Node) {\n          return ridx < 3;\n        });\n    ASSERT_EQ(partitioner.GetNumNodes(), 3);\n    if (collective::GetRank() == 0) {\n      for (std::size_t i = 0; i < 3; ++i) {\n        ASSERT_TRUE(partitioner.GetRows(i).empty());\n      }\n    }\n    ctx.CUDACtx()->Stream().Sync();\n  });\n}\n}  // anonymous namespace\n\nTEST(RowPartitioner, MGPUEmpty) {\n  std::int32_t n_workers = curt::AllVisibleGPUs();\n  TestEmptyNode(n_workers);\n}\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "tests/cpp/tree/gpu_hist/test_sampler.cu",
    "content": "/**\n * Copyright 2020-2026, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n\n#include <algorithm>  // for sort\n#include <limits>     // for numeric_limits\n#include <numeric>    // for partial_sum\n#include <vector>     // for vector\n\n#include \"../../../../src/tree/fit_stump.h\"  // for SumGradients\n#include \"../../../../src/tree/gpu_hist/sampler.cuh\"\n#include \"../../../../src/tree/hist/sampler.h\"  // for cpu_impl::CalculateThreshold\n#include \"../../../../src/tree/param.h\"         // TrainParam\n#include \"../../helpers.h\"\n#include \"../test_sampler.h\"  // VerifyApplySamplingMask\n#include \"dummy_quantizer.cuh\"\n\nnamespace xgboost::tree::cuda_impl {\nvoid CalcFloatGrad(linalg::MatrixView<GradientPairInt64> in_gpair,\n                   common::Span<GradientQuantiser const> roundings,\n                   linalg::Matrix<GradientPair>* p_out_gpair) {\n  auto& out_gpair = *p_out_gpair;\n  out_gpair.Reshape(in_gpair.Shape());\n  auto h_out_gpair = out_gpair.HostView();\n  for (std::size_t i = 0; i < in_gpair.Shape(0); ++i) {\n    for (std::size_t j = 0; j < in_gpair.Shape(1); ++j) {\n      auto g64 = roundings[j].ToFloatingPoint(in_gpair(i, j));\n      h_out_gpair(i, j) = GradientPair(g64.GetGrad(), g64.GetHess());\n    }\n  }\n}\n\nvoid VerifySampling(float subsample, int sampling_method, bst_target_t n_targets = 1,\n                    bool check_sum = true) {\n  auto ctx = MakeCUDACtx(0);\n\n  constexpr size_t kRows = 4096;\n  auto [gpair_i64, quantizer] = GenerateGradientsFixedPoint(&ctx, kRows, n_targets);\n\n  // Copy quantizers to host for summing\n  std::vector<GradientQuantiser> h_quantizers(n_targets, MakeDummyQuantizer());\n  dh::safe_cuda(cudaMemcpy(h_quantizers.data(), quantizer.DeviceSpan().data(),\n                           n_targets * sizeof(GradientQuantiser), cudaMemcpyDeviceToHost));\n\n  auto sum_gradients = [&](linalg::MatrixView<GradientPair const> gpair) {\n    auto sum = linalg::Empty<GradientPairPrecise>(&ctx, n_targets);\n    xgboost::tree::cpu_impl::SumGradients(&ctx, gpair, sum.HostView());\n    return sum.Data()->HostVector();\n  };\n\n  linalg::Matrix<GradientPair> gpair;\n  CalcFloatGrad(gpair_i64.HostView(), common::Span{h_quantizers}, &gpair);\n  auto sum_gpair = sum_gradients(gpair.HostView());\n  // sample\n  Sampler sampler{kRows, subsample, sampling_method};\n  sampler.Sample(&ctx, gpair_i64.View(ctx.Device()), quantizer.DeviceSpan());\n  // Refresh float gradient after sampling\n  CalcFloatGrad(gpair_i64.HostView(), common::Span{h_quantizers}, &gpair);\n  auto sum_sampled_gpair = sum_gradients(gpair.HostView());\n  CheckSampling(subsample, n_targets, check_sum, sum_sampled_gpair, sum_gpair, gpair.HostView());\n}\n\nTEST(GpuSampler, NoSampling) {\n  constexpr float kSubsample = 1.0f;\n  constexpr int kSamplingMethod = TrainParam::kUniform;\n  VerifySampling(kSubsample, kSamplingMethod);\n}\n\nTEST(GpuSampler, UniformSampling) {\n  constexpr float kSubsample = 0.5;\n  constexpr int kSamplingMethod = TrainParam::kUniform;\n  // Uniform sampling preserves the mean, not the sum (check_sum = false)\n  constexpr bool kCheckSum = false;\n  VerifySampling(kSubsample, kSamplingMethod, 1, kCheckSum);\n  VerifySampling(kSubsample, kSamplingMethod, 3, kCheckSum);\n}\n\nTEST(GpuSampler, GradientBasedSampling) {\n  constexpr float kSubsample = 0.8;\n  constexpr int kSamplingMethod = TrainParam::kGradientBased;\n  VerifySampling(kSubsample, kSamplingMethod, 1);\n  VerifySampling(kSubsample, kSamplingMethod, 3);\n}\n\nTEST(GpuSampler, ApplySampling) {\n  auto ctx = MakeCUDACtx(0);\n\n  bst_idx_t n_samples = 1024;\n  bst_target_t n_split_targets = 2, n_value_targets = 4;\n  constexpr float kSubsample = 0.5f;\n  constexpr int kSamplingMethod = TrainParam::kGradientBased;\n\n  // Generate and sample the split gradient\n  auto [split_gpair, quantizer] = GenerateGradientsFixedPoint(&ctx, n_samples, n_split_targets);\n  Sampler sampler{n_samples, kSubsample, kSamplingMethod};\n  sampler.Sample(&ctx, split_gpair.View(ctx.Device()), quantizer.DeviceSpan());\n  auto d_roundings = quantizer.DeviceSpan();\n  std::vector<GradientQuantiser> h_roundings(d_roundings.size(), MakeDummyQuantizer());\n  thrust::copy(dh::tcbegin(d_roundings), dh::tcend(d_roundings), h_roundings.begin());\n\n  // Generate value gradient (more targets than split)\n  auto value_gpair = GenerateRandomGradients(&ctx, n_samples, n_value_targets);\n  auto h_value_before = value_gpair.gpair.HostView();\n  linalg::Matrix<GradientPair> sampled;\n  CalcFloatGrad(split_gpair.HostView(), dh::ToSpan(h_roundings), &sampled);\n\n  sampler.ApplySampling(&ctx, split_gpair, &value_gpair.gpair);\n  CheckSamplingMask(sampled.HostView(), value_gpair.gpair.HostView(), kSubsample);\n\n  auto h_value_after = value_gpair.gpair.HostView();\n  std::vector<float> thresholds;\n  auto reg_abs_grad = ::xgboost::tree::cpu_impl::CalcRegAbsGrad(&ctx, h_value_before, &thresholds);\n\n  dh::device_vector<float> d_sorted(thresholds);\n  dh::device_vector<float> d_csum(n_samples);\n  auto threshold_index =\n      cuda_impl::CalculateThresholdIndex(&ctx, dh::ToSpan(d_sorted), dh::ToSpan(d_csum), n_samples,\n                                         static_cast<bst_idx_t>(n_samples * kSubsample));\n  float threshold = d_sorted[threshold_index];\n\n  auto h_sampled_split = sampled.HostView();\n  CheckValueReweight(h_sampled_split, h_value_before, h_value_after, reg_abs_grad, threshold);\n}\n}  // namespace xgboost::tree::cuda_impl\n\nnamespace xgboost::tree {\n// Test consistency between CPU and GPU threshold calculations\nTEST(CalculateThreshold, CpuGpuConsistency) {\n  auto ctx = MakeCUDACtx(0);\n\n  // Test with various gradient distributions\n  std::vector<std::vector<float>> test_cases = {\n      {0.5f, 5.0f, 1.0f, 2.0f, 2.0f},                                // Basic\n      {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},  // All equal\n      {0.1f, 0.5f, 1.0f, 2.0f, 5.0f, 10.0f},                         // Varied\n  };\n\n  std::vector<float> subsample_rates = {0.3f, 0.5f, 0.8f};\n\n  for (auto const& rag : test_cases) {\n    for (float subsample : subsample_rates) {\n      bst_idx_t n = rag.size();\n      bst_idx_t sample_rows = static_cast<bst_idx_t>(n * subsample);\n\n      // CPU calculation\n      std::vector<float> cpu_sorted = rag;\n      std::sort(cpu_sorted.begin(), cpu_sorted.end());\n      cpu_sorted.push_back(std::numeric_limits<float>::max());\n      std::vector<float> cpu_csum(n);\n      std::partial_sum(cpu_sorted.begin(), cpu_sorted.end() - 1, cpu_csum.begin());\n      float cpu_threshold = cpu_impl::CalculateThreshold(common::Span{cpu_sorted},\n                                                         common::Span{cpu_csum}, n, sample_rows);\n\n      // GPU calculation\n      std::vector<float> gpu_sorted = rag;\n      std::sort(gpu_sorted.begin(), gpu_sorted.end());\n      gpu_sorted.push_back(std::numeric_limits<float>::max());\n      dh::device_vector<float> d_sorted(gpu_sorted);\n      dh::device_vector<float> d_csum(n);\n      std::size_t threshold_index = cuda_impl::CalculateThresholdIndex(\n          &ctx, dh::ToSpan(d_sorted), dh::ToSpan(d_csum), n, sample_rows);\n      float gpu_threshold = d_sorted[threshold_index];\n\n      // Both should produce similar expected sample counts\n      auto calc_expected = [&](float threshold) {\n        float expected = 0.0f;\n        for (bst_idx_t i = 0; i < n; ++i) {\n          expected += std::min(SamplingProbability(threshold, cpu_sorted[i]), 1.0f);\n        }\n        return expected;\n      };\n\n      float cpu_expected = calc_expected(cpu_threshold);\n      float gpu_expected = calc_expected(gpu_threshold);\n      // Both should be close to target sample_rows\n      EXPECT_NEAR(cpu_expected, sample_rows, 0.1f);\n      EXPECT_NEAR(gpu_expected, sample_rows, 0.1f);\n      EXPECT_NEAR(cpu_expected, gpu_expected, 0.1f);\n    }\n  }\n}\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "tests/cpp/tree/hist/test_evaluate_splits.cc",
    "content": "/**\n * Copyright 2021-2026, XGBoost Contributors\n */\n#include \"../test_evaluate_splits.h\"\n\n#include <gtest/gtest.h>\n#include <xgboost/base.h>        // for GradientPairPrecise, Args, Gradie...\n#include <xgboost/context.h>     // for Context\n#include <xgboost/data.h>        // for FeatureType, DMatrix, MetaInfo\n#include <xgboost/logging.h>     // for CHECK_EQ\n#include <xgboost/tree_model.h>  // for RegTree, RTreeNodeStat\n\n#include <memory>   // for make_shared, shared_ptr, addressof\n#include <numeric>  // for iota\n#include <tuple>    // for make_tuple\n\n#include \"../../../../src/common/hist_util.h\"           // for HistCollection, HistogramCuts\n#include \"../../../../src/common/random.h\"              // for ColumnSampler\n#include \"../../../../src/common/row_set.h\"             // for RowSetCollection\n#include \"../../../../src/data/gradient_index.h\"        // for GHistIndexMatrix\n#include \"../../../../src/tree/hist/evaluate_splits.h\"  // for HistEvaluator, TreeEvaluator\n#include \"../../../../src/tree/hist/expand_entry.h\"     // for CPUExpandEntry\n#include \"../../../../src/tree/hist/hist_cache.h\"       // for BoundedHistCollection\n#include \"../../../../src/tree/hist/hist_param.h\"       // for HistMakerTrainParam\n#include \"../../../../src/tree/param.h\"                 // for GradStats, TrainParam\n#include \"../../helpers.h\"                              // for RandomDataGenerator, AllThreadsFo...\n\nnamespace xgboost::tree {\nvoid TestPartitionBasedSplit::SetUp() {\n  param_.UpdateAllowUnknown(Args{{\"min_child_weight\", \"0\"}, {\"reg_lambda\", \"0\"}});\n  sorted_idx_.resize(n_bins_);\n  std::iota(sorted_idx_.begin(), sorted_idx_.end(), 0);\n\n  info_.num_col_ = 1;\n  cuts_ = common::HistogramCuts{1};\n\n  cuts_.SetCategorical(true, n_bins_);\n  auto &h_cuts = cuts_.cut_ptrs_.HostVector();\n  h_cuts[0] = 0;\n  h_cuts[1] = n_bins_;\n  auto &h_vals = cuts_.cut_values_.HostVector();\n  h_vals.resize(n_bins_);\n  std::iota(h_vals.begin(), h_vals.end(), 0.0);\n\n  Context ctx;\n  HistMakerTrainParam hist_param;\n  hist_.Reset(cuts_.TotalBins(), hist_param.MaxCachedHistNodes(ctx.Device()));\n  hist_.AllocateHistograms({0});\n  auto node_hist = hist_[0];\n\n  SimpleLCG lcg;\n  SimpleRealUniformDistribution<double> grad_dist{-4.0, 4.0};\n  SimpleRealUniformDistribution<double> hess_dist{0.0, 4.0};\n\n  for (auto &e : node_hist) {\n    e = GradientPairPrecise{grad_dist(&lcg), hess_dist(&lcg)};\n    total_gpair_ += e;\n  }\n\n  auto enumerate = [this, n_feat = info_.num_col_](common::GHistRow hist,\n                                                   GradientPairPrecise parent_sum) {\n    int32_t best_thresh = -1;\n    float best_score{-std::numeric_limits<float>::infinity()};\n    TreeEvaluator evaluator{param_, static_cast<bst_feature_t>(n_feat), DeviceOrd::CPU()};\n    auto tree_evaluator = evaluator.GetEvaluator<TrainParam>();\n    GradientPairPrecise left_sum;\n    auto parent_gain = tree_evaluator.CalcGain(0, param_, GradStats{total_gpair_});\n    for (size_t i = 0; i < hist.size() - 1; ++i) {\n      left_sum += hist[i];\n      auto right_sum = parent_sum - left_sum;\n      auto gain =\n          tree_evaluator.CalcSplitGain(param_, 0, 0, GradStats{left_sum}, GradStats{right_sum}) -\n          parent_gain;\n      if (gain > best_score) {\n        best_score = gain;\n        best_thresh = i;\n      }\n    }\n    return std::make_tuple(best_thresh, best_score);\n  };\n\n  // enumerate all possible partitions to find the optimal split\n  do {\n    std::vector<GradientPairPrecise> sorted_hist(node_hist.size());\n    for (size_t i = 0; i < sorted_hist.size(); ++i) {\n      sorted_hist[i] = node_hist[sorted_idx_[i]];\n    }\n    auto [thresh, score] = enumerate({sorted_hist}, total_gpair_);\n    if (score > best_score_) {\n      best_score_ = score;\n    }\n  } while (std::next_permutation(sorted_idx_.begin(), sorted_idx_.end()));\n}\n\nvoid TestEvaluateSplits(bool force_read_by_column) {\n  Context ctx;\n  ctx.nthread = 4;\n  static constexpr bst_idx_t kRows = 8, kCols = 16;\n  auto sampler = std::make_shared<common::ColumnSampler>();\n\n  TrainParam param;\n  param.UpdateAllowUnknown(Args{{\"min_child_weight\", \"0\"}, {\"reg_lambda\", \"0\"}});\n\n  auto dmat = RandomDataGenerator(kRows, kCols, 0).Seed(3).GenerateDMatrix();\n\n  auto evaluator = HistEvaluator{&ctx, &param, dmat->Info(), sampler};\n  BoundedHistCollection hist;\n  std::vector<GradientPair> row_gpairs = {{1.23f, 0.24f},  {0.24f, 0.25f}, {0.26f, 0.27f},\n                                          {2.27f, 0.28f},  {0.27f, 0.29f}, {0.37f, 0.39f},\n                                          {-0.47f, 0.49f}, {0.57f, 0.59f}};\n\n  size_t constexpr kMaxBins = 4;\n  // dense, no missing values\n  GHistIndexMatrix gmat(&ctx, dmat.get(), kMaxBins, 0.5, false);\n  common::RowSetCollection row_set_collection;\n  std::vector<bst_idx_t> &row_indices = *row_set_collection.Data();\n  row_indices.resize(kRows);\n  std::iota(row_indices.begin(), row_indices.end(), 0);\n  row_set_collection.Init();\n\n  HistMakerTrainParam hist_param;\n  hist.Reset(gmat.cut.Ptrs().back(), hist_param.MaxCachedHistNodes(ctx.Device()));\n  hist.AllocateHistograms({0});\n  auto const &elem = row_set_collection[0];\n  common::BuildHist<false>(row_gpairs, common::Span{elem.begin(), elem.end()}, gmat, hist[0],\n                           force_read_by_column);\n\n  // Compute total gradient for all data points\n  GradientPairPrecise total_gpair;\n  for (const auto &e : row_gpairs) {\n    total_gpair += GradientPairPrecise(e);\n  }\n\n  RegTree tree;\n  std::vector<CPUExpandEntry> entries(1);\n  entries.front().nid = 0;\n  entries.front().depth = 0;\n\n  evaluator.InitRoot(GradStats{total_gpair});\n  evaluator.EvaluateSplits(hist, gmat.cut, {}, tree, &entries);\n\n  auto best_loss_chg = evaluator.Evaluator().CalcSplitGain(\n                           param, 0, entries.front().split.SplitIndex(),\n                           entries.front().split.left_sum, entries.front().split.right_sum) -\n                       evaluator.Stats().front().root_gain;\n  ASSERT_EQ(entries.front().split.loss_chg, best_loss_chg);\n  ASSERT_GT(entries.front().split.loss_chg, 16.2f);\n\n  // Assert that's the best split\n  for (size_t i = 1; i < gmat.cut.Ptrs().size(); ++i) {\n    GradStats left, right;\n    for (size_t j = gmat.cut.Ptrs()[i - 1]; j < gmat.cut.Ptrs()[i]; ++j) {\n      auto loss_chg = evaluator.Evaluator().CalcSplitGain(param, 0, i - 1, left, right) -\n                      evaluator.Stats().front().root_gain;\n      ASSERT_GE(best_loss_chg, loss_chg);\n      left.Add(hist[0][j].GetGrad(), hist[0][j].GetHess());\n      right.SetSubstract(GradStats{total_gpair}, left);\n    }\n  }\n}\n\nTEST(HistEvaluator, Evaluate) {\n  TestEvaluateSplits(false);\n  TestEvaluateSplits(true);\n}\n\nTEST(HistMultiEvaluator, Evaluate) {\n  Context ctx;\n  ctx.nthread = 1;\n\n  TrainParam param;\n  param.Init(Args{{\"min_child_weight\", \"0\"}, {\"reg_lambda\", \"0\"}});\n  auto sampler = std::make_shared<common::ColumnSampler>();\n\n  std::size_t n_samples = 3;\n  bst_feature_t n_features = 2;\n  bst_target_t n_targets = 2;\n  bst_bin_t n_bins = 2;\n\n  auto p_fmat =\n      RandomDataGenerator{n_samples, n_features, 0.5}.Targets(n_targets).GenerateDMatrix(true);\n\n  HistMultiEvaluator evaluator{&ctx, p_fmat->Info(), &param, sampler};\n  HistMakerTrainParam hist_param;\n  std::vector<BoundedHistCollection> histogram(n_targets);\n  linalg::Vector<GradientPairPrecise> root_sum({2}, DeviceOrd::CPU());\n  for (bst_target_t t{0}; t < n_targets; ++t) {\n    auto &hist = histogram[t];\n    hist.Reset(n_bins * n_features, hist_param.MaxCachedHistNodes(ctx.Device()));\n    hist.AllocateHistograms({0});\n    auto node_hist = hist[0];\n    node_hist[0] = {-0.5, 0.5};\n    node_hist[1] = {2.0, 0.5};\n    node_hist[2] = {0.5, 0.5};\n    node_hist[3] = {1.0, 0.5};\n\n    root_sum(t) += node_hist[0];\n    root_sum(t) += node_hist[1];\n  }\n\n  RegTree tree{n_targets, n_features};\n  auto weight = evaluator.InitRoot(root_sum.HostView());\n  // Compute root sum_hess by summing hessians across all targets\n  float root_sum_hess = 0.0f;\n  for (bst_target_t t{0}; t < n_targets; ++t) {\n    root_sum_hess += static_cast<float>(root_sum.HostView()(t).GetHess());\n  }\n  tree.SetRoot(weight.HostView(), root_sum_hess);\n  auto w = weight.HostView();\n  ASSERT_EQ(w.Size(), n_targets);\n  ASSERT_EQ(w(0), -1.5);\n  ASSERT_EQ(w(1), -1.5);\n\n  common::HistogramCuts cuts{2};\n  cuts.cut_ptrs_ = {0, 2, 4};\n  cuts.cut_values_ = {0.5, 1.0, 2.0, 3.0};\n\n  std::vector<MultiExpandEntry> entries(1, {/*nidx=*/0, /*depth=*/0});\n\n  std::vector<BoundedHistCollection const *> ptrs;\n  std::transform(histogram.cbegin(), histogram.cend(), std::back_inserter(ptrs),\n                 [](auto const &h) { return std::addressof(h); });\n\n  evaluator.EvaluateSplits(tree, ptrs, cuts, {}, &entries);\n\n  ASSERT_EQ(entries.front().split.loss_chg, 12.5);\n  ASSERT_EQ(entries.front().split.split_value, 0.5);\n  ASSERT_EQ(entries.front().split.SplitIndex(), 0);\n\n  ASSERT_EQ(sampler->GetFeatureSet(&ctx, 0)->Size(), n_features);\n}\n\nTEST(HistEvaluator, Apply) {\n  Context ctx;\n  ctx.nthread = 4;\n  RegTree tree;\n  static constexpr bst_idx_t kRows = 8, kCols = 16;\n  TrainParam param;\n  param.UpdateAllowUnknown(Args{{\"min_child_weight\", \"0\"}, {\"reg_lambda\", \"0.0\"}});\n  auto dmat = RandomDataGenerator(kRows, kCols, 0).Seed(3).GenerateDMatrix();\n  auto sampler = std::make_shared<common::ColumnSampler>();\n  auto evaluator_ = HistEvaluator{&ctx, &param, dmat->Info(), sampler};\n\n  CPUExpandEntry entry{0, 0};\n  entry.split.loss_chg = 10.0f;\n  entry.split.left_sum = GradStats{0.4, 0.6f};\n  entry.split.right_sum = GradStats{0.5, 0.5f};\n\n  evaluator_.ApplyTreeSplit(entry, &tree);\n  ASSERT_EQ(tree.NumExtraNodes(), 2);\n  ASSERT_EQ(tree.Stat(tree[0].LeftChild()).sum_hess, 0.6f);\n  ASSERT_EQ(tree.Stat(tree[0].RightChild()).sum_hess, 0.5f);\n\n  {\n    RegTree tree;\n    entry.split.is_cat = true;\n    entry.split.split_value = 1.0;\n    evaluator_.ApplyTreeSplit(entry, &tree);\n    auto l = entry.split.left_sum;\n    ASSERT_NEAR(tree[1].LeafValue(), -l.sum_grad / l.sum_hess * param.learning_rate, kRtEps);\n    ASSERT_NEAR(tree[2].LeafValue(), -param.learning_rate, kRtEps);\n  }\n}\n\nTEST_F(TestPartitionBasedSplit, CPUHist) {\n  Context ctx;\n  // check the evaluator is returning the optimal split\n  std::vector<FeatureType> ft{FeatureType::kCategorical};\n  auto sampler = std::make_shared<common::ColumnSampler>();\n  HistEvaluator evaluator{&ctx, &param_, info_, sampler};\n  evaluator.InitRoot(GradStats{total_gpair_});\n  RegTree tree;\n  std::vector<CPUExpandEntry> entries(1);\n  evaluator.EvaluateSplits(hist_, cuts_, {ft}, tree, &entries);\n  ASSERT_NEAR(entries[0].split.loss_chg, best_score_, 1e-16);\n}\n\nnamespace {\nauto CompareOneHotAndPartition(bool onehot) {\n  Context ctx;\n  static constexpr bst_idx_t kRows = 128, kCols = 1;\n  std::vector<FeatureType> ft(kCols, FeatureType::kCategorical);\n\n  TrainParam param;\n  if (onehot) {\n    // force use one-hot\n    param.UpdateAllowUnknown(\n        Args{{\"min_child_weight\", \"0\"}, {\"reg_lambda\", \"0\"}, {\"max_cat_to_onehot\", \"100\"}});\n  } else {\n    param.UpdateAllowUnknown(\n        Args{{\"min_child_weight\", \"0\"}, {\"reg_lambda\", \"0\"}, {\"max_cat_to_onehot\", \"1\"}});\n  }\n\n  size_t n_cats{2};\n\n  auto dmat =\n      RandomDataGenerator(kRows, kCols, 0).Seed(3).Type(ft).MaxCategory(n_cats).GenerateDMatrix();\n\n  auto sampler = std::make_shared<common::ColumnSampler>();\n  auto evaluator = HistEvaluator{&ctx, &param, dmat->Info(), sampler};\n  std::vector<CPUExpandEntry> entries(1);\n  HistMakerTrainParam hist_param;\n\n  for (auto const &gmat : dmat->GetBatches<GHistIndexMatrix>(&ctx, {32, param.sparse_threshold})) {\n    BoundedHistCollection hist;\n\n    entries.front().nid = 0;\n    entries.front().depth = 0;\n\n    hist.Reset(gmat.cut.TotalBins(), hist_param.MaxCachedHistNodes(ctx.Device()));\n    hist.AllocateHistograms({0});\n    auto node_hist = hist[0];\n\n    CHECK_EQ(node_hist.size(), n_cats);\n    CHECK_EQ(node_hist.size(), gmat.cut.Ptrs().back());\n\n    GradientPairPrecise total_gpair;\n    for (size_t i = 0; i < node_hist.size(); ++i) {\n      node_hist[i] = {static_cast<double>(node_hist.size() - i), 1.0};\n      total_gpair += node_hist[i];\n    }\n    RegTree tree;\n    evaluator.InitRoot(GradStats{total_gpair});\n    evaluator.EvaluateSplits(hist, gmat.cut, ft, tree, &entries);\n  }\n  return entries.front();\n}\n}  // anonymous namespace\n\nTEST(HistEvaluator, Categorical) {\n  auto with_onehot = CompareOneHotAndPartition(true);\n  auto with_part = CompareOneHotAndPartition(false);\n\n  ASSERT_EQ(with_onehot.split.loss_chg, with_part.split.loss_chg);\n}\n\nTEST_F(TestCategoricalSplitWithMissing, HistEvaluator) {\n  Context ctx;\n  BoundedHistCollection hist;\n  HistMakerTrainParam hist_param;\n  hist.Reset(cuts_.TotalBins(), hist_param.MaxCachedHistNodes(ctx.Device()));\n  hist.AllocateHistograms({0});\n  auto node_hist = hist[0];\n  ASSERT_EQ(node_hist.size(), feature_histogram_.size());\n  std::copy(feature_histogram_.cbegin(), feature_histogram_.cend(), node_hist.begin());\n\n  auto sampler = std::make_shared<common::ColumnSampler>();\n  MetaInfo info;\n  info.num_col_ = 1;\n  info.feature_types = {FeatureType::kCategorical};\n\n  auto evaluator = HistEvaluator{&ctx, &param_, info, sampler};\n  evaluator.InitRoot(GradStats{parent_sum_});\n  std::vector<CPUExpandEntry> entries(1);\n  RegTree tree;\n  evaluator.EvaluateSplits(hist, cuts_, info.feature_types.ConstHostSpan(), tree, &entries);\n  auto const &split = entries.front().split;\n\n  this->CheckResult(split.loss_chg, split.SplitIndex(), split.split_value, split.is_cat,\n                    split.DefaultLeft(),\n                    GradientPairPrecise{split.left_sum.GetGrad(), split.left_sum.GetHess()},\n                    GradientPairPrecise{split.right_sum.GetGrad(), split.right_sum.GetHess()});\n}\n\nTEST(HistMultiEvaluator, CategoricalOneHot) {\n  Context ctx;\n  ctx.nthread = 1;\n\n  TrainParam param;\n  param.Init(Args{{\"min_child_weight\", \"0\"}, {\"reg_lambda\", \"0\"}, {\"max_cat_to_onehot\", \"100\"}});\n  auto sampler = std::make_shared<common::ColumnSampler>();\n\n  bst_feature_t n_features = 1;\n  bst_target_t n_targets = 2;\n  bst_bin_t n_cats = 3;\n\n  MetaInfo info;\n  info.num_col_ = n_features;\n  info.feature_types = {FeatureType::kCategorical};\n\n  HistMultiEvaluator evaluator{&ctx, info, &param, sampler};\n  HistMakerTrainParam hist_param;\n\n  // Per-target histograms with n_cats bins each.\n  std::vector<BoundedHistCollection> histogram(n_targets);\n  linalg::Vector<GradientPairPrecise> root_sum({n_targets}, DeviceOrd::CPU());\n  std::vector<std::vector<GradientPairPrecise>> hist_data = {\n      {{1.0, 0.5}, {-0.5, 0.5}, {0.5, 0.5}},   // t-0\n      {{0.5, 0.5}, {1.0, 0.5}, {-0.5, 0.5}}};  // t-1\n\n  for (bst_target_t t = 0; t < n_targets; ++t) {\n    auto &hist = histogram[t];\n    hist.Reset(n_cats * n_features, hist_param.MaxCachedHistNodes(ctx.Device()));\n    hist.AllocateHistograms({0});\n    auto node_hist = hist[0];\n    for (bst_bin_t b = 0; b < n_cats; ++b) {\n      node_hist[b] = hist_data[t][b];\n      root_sum(t) += node_hist[b];\n    }\n  }\n\n  common::HistogramCuts cuts{n_features};\n  cuts.cut_ptrs_ = {0, 3};\n  cuts.cut_values_ = {0.0, 1.0, 2.0};\n  cuts.SetCategorical(true, 2.0);\n\n  RegTree tree{n_targets, n_features};\n  auto weight = evaluator.InitRoot(root_sum.HostView());\n  float root_sum_hess = 0.0f;\n  for (bst_target_t t = 0; t < n_targets; ++t) {\n    root_sum_hess += static_cast<float>(root_sum.HostView()(t).GetHess());\n  }\n  tree.SetRoot(weight.HostView(), root_sum_hess);\n\n  std::vector<MultiExpandEntry> entries(1, {0, 0});\n  std::vector<BoundedHistCollection const *> ptrs;\n  for (auto &h : histogram) {\n    ptrs.push_back(&h);\n  }\n\n  std::vector<FeatureType> ft{FeatureType::kCategorical};\n  evaluator.EvaluateSplits(tree, ptrs, cuts, ft, &entries);\n\n  auto const &split = entries.front().split;\n  ASSERT_TRUE(split.is_cat);\n  ASSERT_FALSE(split.cat_bits.empty());\n  ASSERT_GT(split.loss_chg, 0.0f);\n\n  common::KCatBitField cat_bits{split.cat_bits};\n  auto chosen_cat = static_cast<bst_cat_t>(split.split_value);\n  ASSERT_TRUE(cat_bits.Check(chosen_cat));\n\n  // Verify ApplyTreeSplit works with categorical split.\n  evaluator.ApplyTreeSplit(entries.front(), &tree);\n  ASSERT_TRUE(tree.HasCategoricalSplit());\n  auto mt_view = tree.HostMtView();\n  ASSERT_EQ(mt_view.SplitType(0), FeatureType::kCategorical);\n  ASSERT_FALSE(mt_view.NodeCats(0).empty());\n}\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "tests/cpp/tree/hist/test_expand_entry.cc",
    "content": "/**\n * Copyright 2023-2024, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/json.h>        // for Json\n#include <xgboost/tree_model.h>  // for RegTree\n\n#include \"../../../../src/common/categorical.h\"  // for CatBitField\n#include \"../../../../src/tree/hist/expand_entry.h\"\n\nnamespace xgboost::tree {\nTEST(ExpandEntry, IO) {\n  CPUExpandEntry entry{RegTree::kRoot, 0};\n  entry.split.Update(1.0, 1, /*new_split_value=*/0.3, true, true, GradStats{1.0, 1.0},\n                     GradStats{2.0, 2.0});\n  bst_bin_t n_bins_feature = 256;\n  auto n = common::CatBitField::ComputeStorageSize(n_bins_feature);\n  entry.split.cat_bits = decltype(entry.split.cat_bits)(n, 0);\n  common::CatBitField cat_bits{entry.split.cat_bits};\n  cat_bits.Set(n_bins_feature / 2);\n\n  Json je{Object{}};\n  entry.Save(&je);\n\n  CPUExpandEntry loaded;\n  loaded.Load(je);\n\n  ASSERT_EQ(loaded.split.is_cat, entry.split.is_cat);\n  ASSERT_EQ(loaded.split.cat_bits, entry.split.cat_bits);\n  ASSERT_EQ(loaded.split.left_sum.GetGrad(), entry.split.left_sum.GetGrad());\n  ASSERT_EQ(loaded.split.right_sum.GetHess(), entry.split.right_sum.GetHess());\n}\n\nTEST(ExpandEntry, IOMulti) {\n  MultiExpandEntry entry{RegTree::kRoot, 0};\n  auto left_sum = std::vector<GradientPairPrecise>{{1.0, 1.0}, {1.0, 1.0}};\n  auto right_sum = std::vector<GradientPairPrecise>{{2.0, 2.0}, {2.0, 2.0}};\n  entry.split.Update(1.0, 1, /*new_split_value=*/0.3, true, true,\n                     linalg::MakeVec(left_sum.data(), left_sum.size()),\n                     linalg::MakeVec(right_sum.data(), right_sum.size()));\n  bst_bin_t n_bins_feature = 256;\n  auto n = common::CatBitField::ComputeStorageSize(n_bins_feature);\n  entry.split.cat_bits = decltype(entry.split.cat_bits)(n, 0);\n  common::CatBitField cat_bits{entry.split.cat_bits};\n  cat_bits.Set(n_bins_feature / 2);\n\n  Json je{Object{}};\n  entry.Save(&je);\n\n  MultiExpandEntry loaded;\n  loaded.Load(je);\n\n  ASSERT_EQ(loaded.split.is_cat, entry.split.is_cat);\n  ASSERT_EQ(loaded.split.cat_bits, entry.split.cat_bits);\n  ASSERT_EQ(loaded.split.left_sum, entry.split.left_sum);\n  ASSERT_EQ(loaded.split.right_sum, entry.split.right_sum);\n}\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "tests/cpp/tree/hist/test_histogram.cc",
    "content": "/**\n * Copyright 2018-2023 by Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/base.h>                // for bst_node_t, bst_bin_t, Gradient...\n#include <xgboost/context.h>             // for Context\n#include <xgboost/data.h>                // for BatchIterator, BatchSet, DMatrix\n#include <xgboost/host_device_vector.h>  // for HostDeviceVector\n#include <xgboost/linalg.h>              // for MakeTensorView\n#include <xgboost/logging.h>             // for Error, LogCheck_EQ, LogCheck_LT\n#include <xgboost/span.h>                // for Span, operator!=\n#include <xgboost/tree_model.h>          // for RegTree\n\n#include <algorithm>  // for max\n#include <cstddef>    // for size_t\n#include <cstdint>    // for int32_t, uint32_t\n#include <iterator>   // for back_inserter\n#include <limits>     // for numeric_limits\n#include <memory>     // for shared_ptr, allocator, unique_ptr\n#include <numeric>    // for iota, accumulate\n#include <vector>     // for vector\n\n#include \"../../../../src/collective/communicator-inl.h\"  // for GetRank, GetWorldSize\n#include \"../../../../src/common/hist_util.h\"             // for GHistRow, HistogramCuts, Sketch...\n#include \"../../../../src/common/ref_resource_view.h\"     // for RefResourceView\n#include \"../../../../src/common/row_set.h\"               // for RowSetCollection\n#include \"../../../../src/common/threading_utils.h\"       // for BlockedSpace2d\n#include \"../../../../src/data/gradient_index.h\"          // for GHistIndexMatrix\n#include \"../../../../src/tree/common_row_partitioner.h\"  // for CommonRowPartitioner\n#include \"../../../../src/tree/hist/expand_entry.h\"       // for CPUExpandEntry\n#include \"../../../../src/tree/hist/hist_cache.h\"         // for BoundedHistCollection\n#include \"../../../../src/tree/hist/hist_param.h\"         // for HistMakerTrainParam\n#include \"../../../../src/tree/hist/histogram.h\"          // for HistogramBuilder\n#include \"../../../../src/tree/tree_view.h\"               // for ScalarTreeView\n#include \"../../categorical_helpers.h\"                    // for OneHotEncodeFeature\n#include \"../../collective/test_worker.h\"                 // for TestDistributedGlobal\n#include \"../../helpers.h\"                                // for RandomDataGenerator, GenerateRa...\n\nnamespace xgboost::tree {\nnamespace {\nvoid InitRowPartitionForTest(common::RowSetCollection *row_set, size_t n_samples,\n                             size_t base_rowid = 0) {\n  auto &row_indices = *row_set->Data();\n  row_indices.resize(n_samples);\n  std::iota(row_indices.begin(), row_indices.end(), base_rowid);\n  row_set->Init();\n}\n}  // anonymous namespace\n\nvoid TestAddHistRows(bool is_distributed) {\n  Context ctx;\n  std::vector<bst_node_t> nodes_to_build;\n  std::vector<bst_node_t> nodes_to_sub;\n\n  size_t constexpr kNRows = 8, kNCols = 16;\n  int32_t constexpr kMaxBins = 4;\n  auto p_fmat = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();\n  auto const &gmat =\n      *(p_fmat->GetBatches<GHistIndexMatrix>(&ctx, BatchParam{kMaxBins, 0.5}).begin());\n\n  RegTree tree;\n\n  tree.ExpandNode(0, 0, 0, false, 0, 0, 0, 0, 0, 0, 0);\n  tree.ExpandNode(tree[0].LeftChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0);\n  tree.ExpandNode(tree[0].RightChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0);\n  nodes_to_build.emplace_back(3);\n  nodes_to_build.emplace_back(4);\n  nodes_to_sub.emplace_back(5);\n  nodes_to_sub.emplace_back(6);\n\n  HistMakerTrainParam hist_param;\n  HistogramBuilder histogram_builder;\n  histogram_builder.Reset(&ctx, gmat.cut.TotalBins(), {kMaxBins, 0.5}, is_distributed, false,\n                          &hist_param);\n  histogram_builder.AddHistRows(tree.HostScView(), &nodes_to_build, &nodes_to_sub, false);\n\n  for (bst_node_t const &nidx : nodes_to_build) {\n    ASSERT_TRUE(histogram_builder.Histogram().HistogramExists(nidx));\n  }\n  for (bst_node_t const &nidx : nodes_to_sub) {\n    ASSERT_TRUE(histogram_builder.Histogram().HistogramExists(nidx));\n  }\n}\n\nTEST(CPUHistogram, AddRows) {\n  TestAddHistRows(true);\n  TestAddHistRows(false);\n}\n\nvoid TestSyncHist(bool is_distributed) {\n  std::size_t constexpr kNRows = 8, kNCols = 16;\n  bst_bin_t constexpr kMaxBins = 4;\n  Context ctx;\n\n  std::vector<bst_bin_t> nodes_for_explicit_hist_build;\n  std::vector<bst_bin_t> nodes_for_subtraction_trick;\n  RegTree tree;\n\n  auto p_fmat = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();\n  auto const &gmat =\n      *(p_fmat->GetBatches<GHistIndexMatrix>(&ctx, BatchParam{kMaxBins, 0.5}).begin());\n\n  HistogramBuilder histogram;\n  uint32_t total_bins = gmat.cut.Ptrs().back();\n  HistMakerTrainParam hist_param;\n  histogram.Reset(&ctx, total_bins, {kMaxBins, 0.5}, is_distributed, false, &hist_param);\n\n  common::RowSetCollection row_set_collection;\n  {\n    row_set_collection.Clear();\n    std::vector<bst_idx_t> &row_indices = *row_set_collection.Data();\n    row_indices.resize(kNRows);\n    std::iota(row_indices.begin(), row_indices.end(), 0);\n    row_set_collection.Init();\n  }\n\n  // level 0\n  nodes_for_explicit_hist_build.emplace_back(0);\n  histogram.AddHistRows(tree.HostScView(), &nodes_for_explicit_hist_build,\n                        &nodes_for_subtraction_trick, false);\n\n  tree.ExpandNode(0, 0, 0, false, 0, 0, 0, 0, 0, 0, 0);\n  nodes_for_explicit_hist_build.clear();\n  nodes_for_subtraction_trick.clear();\n\n  // level 1\n  nodes_for_explicit_hist_build.emplace_back(tree[0].LeftChild());\n  nodes_for_subtraction_trick.emplace_back(tree[0].RightChild());\n\n  histogram.AddHistRows(tree.HostScView(), &nodes_for_explicit_hist_build,\n                        &nodes_for_subtraction_trick, false);\n\n  tree.ExpandNode(tree[0].LeftChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0);\n  tree.ExpandNode(tree[0].RightChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0);\n\n  nodes_for_explicit_hist_build.clear();\n  nodes_for_subtraction_trick.clear();\n  // level 2\n  nodes_for_explicit_hist_build.emplace_back(3);\n  nodes_for_subtraction_trick.emplace_back(4);\n  nodes_for_explicit_hist_build.emplace_back(5);\n  nodes_for_subtraction_trick.emplace_back(6);\n\n  histogram.AddHistRows(tree.HostScView(), &nodes_for_explicit_hist_build,\n                        &nodes_for_subtraction_trick, false);\n\n  const size_t n_nodes = nodes_for_explicit_hist_build.size();\n  ASSERT_EQ(n_nodes, 2ul);\n  row_set_collection.AddSplit(0, tree[0].LeftChild(), tree[0].RightChild(), 4, 4);\n  row_set_collection.AddSplit(1, tree[1].LeftChild(), tree[1].RightChild(), 2, 2);\n  row_set_collection.AddSplit(2, tree[2].LeftChild(), tree[2].RightChild(), 2, 2);\n\n  common::BlockedSpace2d space(\n      n_nodes,\n      [&](std::size_t nidx_in_set) {\n        bst_node_t nidx = nodes_for_explicit_hist_build[nidx_in_set];\n        return row_set_collection[nidx].Size();\n      },\n      256);\n\n  std::vector<common::GHistRow> target_hists(n_nodes);\n  for (size_t i = 0; i < nodes_for_explicit_hist_build.size(); ++i) {\n    bst_node_t nidx = nodes_for_explicit_hist_build[i];\n    target_hists[i] = histogram.Histogram()[nidx];\n  }\n\n  // set values to specific nodes hist\n  std::vector<size_t> n_ids = {1, 2};\n  for (size_t i : n_ids) {\n    auto this_hist = histogram.Histogram()[i];\n    double *p_hist = reinterpret_cast<double *>(this_hist.data());\n    for (size_t bin_id = 0; bin_id < 2 * total_bins; ++bin_id) {\n      p_hist[bin_id] = 2 * bin_id;\n    }\n  }\n  n_ids[0] = 3;\n  n_ids[1] = 5;\n  for (size_t i : n_ids) {\n    auto this_hist = histogram.Histogram()[i];\n    double *p_hist = reinterpret_cast<double *>(this_hist.data());\n    for (size_t bin_id = 0; bin_id < 2 * total_bins; ++bin_id) {\n      p_hist[bin_id] = bin_id;\n    }\n  }\n\n  histogram.Buffer().Reset(1, n_nodes, space, target_hists);\n  // sync hist\n  histogram.SyncHistogram(&ctx, tree.HostScView(), nodes_for_explicit_hist_build,\n                          nodes_for_subtraction_trick);\n\n  using GHistRowT = common::GHistRow;\n  auto check_hist = [](const GHistRowT parent, const GHistRowT left, const GHistRowT right,\n                       size_t begin, size_t end) {\n    const double *p_parent = reinterpret_cast<const double *>(parent.data());\n    const double *p_left = reinterpret_cast<const double *>(left.data());\n    const double *p_right = reinterpret_cast<const double *>(right.data());\n    for (size_t i = 2 * begin; i < 2 * end; ++i) {\n      ASSERT_EQ(p_parent[i], p_left[i] + p_right[i]);\n    }\n  };\n  size_t node_id = 0;\n  for (auto const &nidx : nodes_for_explicit_hist_build) {\n    auto this_hist = histogram.Histogram()[nidx];\n    const size_t parent_id = tree[nidx].Parent();\n    const size_t subtraction_node_id = nodes_for_subtraction_trick[node_id];\n    auto parent_hist = histogram.Histogram()[parent_id];\n    auto sibling_hist = histogram.Histogram()[subtraction_node_id];\n\n    check_hist(parent_hist, this_hist, sibling_hist, 0, total_bins);\n    ++node_id;\n  }\n  node_id = 0;\n  for (auto const &nidx : nodes_for_subtraction_trick) {\n    auto this_hist = histogram.Histogram()[nidx];\n    const size_t parent_id = tree[nidx].Parent();\n    const size_t subtraction_node_id = nodes_for_explicit_hist_build[node_id];\n    auto parent_hist = histogram.Histogram()[parent_id];\n    auto sibling_hist = histogram.Histogram()[subtraction_node_id];\n\n    check_hist(parent_hist, this_hist, sibling_hist, 0, total_bins);\n    ++node_id;\n  }\n}\n\nTEST(CPUHistogram, SyncHist) {\n  TestSyncHist(true);\n  TestSyncHist(false);\n}\n\nvoid TestBuildHistogram(Context const *ctx, bool is_distributed, bool force_read_by_column,\n                        bool is_col_split) {\n  size_t constexpr kNRows = 8, kNCols = 16;\n  int32_t constexpr kMaxBins = 4;\n  auto p_fmat = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();\n  if (is_col_split) {\n    p_fmat = std::shared_ptr<DMatrix>{\n        p_fmat->SliceCol(collective::GetWorldSize(), collective::GetRank())};\n  }\n  auto const &gmat =\n      *(p_fmat->GetBatches<GHistIndexMatrix>(ctx, BatchParam{kMaxBins, 0.5}).begin());\n  uint32_t total_bins = gmat.cut.Ptrs().back();\n\n  static double constexpr kEps = 1e-6;\n  std::vector<GradientPair> gpair = {{0.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f},\n                                     {0.27f, 0.28f}, {0.27f, 0.29f}, {0.37f, 0.39f},\n                                     {0.47f, 0.49f}, {0.57f, 0.59f}};\n\n  bst_node_t nid = 0;\n  HistogramBuilder histogram;\n  HistMakerTrainParam hist_param;\n  histogram.Reset(ctx, total_bins, {kMaxBins, 0.5}, is_distributed, is_col_split, &hist_param);\n\n  RegTree tree;\n\n  common::RowSetCollection row_set_collection;\n  row_set_collection.Clear();\n  std::vector<bst_idx_t> &row_indices = *row_set_collection.Data();\n  row_indices.resize(kNRows);\n  std::iota(row_indices.begin(), row_indices.end(), 0);\n  row_set_collection.Init();\n\n  CPUExpandEntry node{RegTree::kRoot, tree.GetDepth(0)};\n  std::vector<bst_node_t> nodes_to_build{node.nid};\n  std::vector<bst_node_t> dummy_sub;\n\n  histogram.AddHistRows(tree.HostScView(), &nodes_to_build, &dummy_sub, false);\n  common::BlockedSpace2d space{\n      1, [&](std::size_t nidx_in_set) { return row_set_collection[nidx_in_set].Size(); }, 256};\n  for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>(ctx, {kMaxBins, 0.5})) {\n    histogram.BuildHist(0, space, gidx, row_set_collection, nodes_to_build,\n                        linalg::MakeTensorView(ctx, gpair, gpair.size()), force_read_by_column);\n  }\n  histogram.SyncHistogram(ctx, tree.HostScView(), nodes_to_build, {});\n\n  // Check if number of histogram bins is correct\n  ASSERT_EQ(histogram.Histogram()[nid].size(), gmat.cut.Ptrs().back());\n  std::vector<GradientPairPrecise> histogram_expected(histogram.Histogram()[nid].size());\n\n  // Compute the correct histogram (histogram_expected)\n  CHECK_EQ(gpair.size(), kNRows);\n  for (size_t rid = 0; rid < kNRows; ++rid) {\n    const size_t ibegin = gmat.row_ptr[rid];\n    const size_t iend = gmat.row_ptr[rid + 1];\n    for (size_t i = ibegin; i < iend; ++i) {\n      const size_t bin_id = gmat.index[i];\n      histogram_expected[bin_id] += GradientPairPrecise(gpair[rid]);\n    }\n  }\n\n  // Now validate the computed histogram returned by BuildHist\n  for (size_t i = 0; i < histogram.Histogram()[nid].size(); ++i) {\n    GradientPairPrecise sol = histogram_expected[i];\n    ASSERT_NEAR(sol.GetGrad(), histogram.Histogram()[nid][i].GetGrad(), kEps);\n    ASSERT_NEAR(sol.GetHess(), histogram.Histogram()[nid][i].GetHess(), kEps);\n  }\n}\n\nTEST(CPUHistogram, BuildHist) {\n  Context ctx;\n  TestBuildHistogram(&ctx, true, false, false);\n  TestBuildHistogram(&ctx, false, false, false);\n  TestBuildHistogram(&ctx, true, true, false);\n  TestBuildHistogram(&ctx, false, true, false);\n}\n\nTEST(CPUHistogram, BuildHistColumnSplit) {\n  auto constexpr kWorkers = 4;\n  Context ctx;\n  std::int32_t n_total_threads = std::thread::hardware_concurrency();\n  auto n_threads = std::max(n_total_threads / kWorkers, 1);\n  ctx.UpdateAllowUnknown(Args{{\"nthread\", std::to_string(n_threads)}});\n  collective::TestDistributedGlobal(kWorkers, [&] { TestBuildHistogram(&ctx, true, true, true); });\n  collective::TestDistributedGlobal(kWorkers, [&] { TestBuildHistogram(&ctx, true, false, true); });\n}\n\nnamespace {\ntemplate <typename GradientSumT>\nvoid ValidateCategoricalHistogram(size_t n_categories, common::Span<GradientSumT> onehot,\n                                  common::Span<GradientSumT> cat) {\n  auto cat_sum = std::accumulate(cat.cbegin(), cat.cend(), GradientPairPrecise{});\n  for (size_t c = 0; c < n_categories; ++c) {\n    auto zero = onehot[c * 2];\n    auto one = onehot[c * 2 + 1];\n\n    auto chosen = cat[c];\n    auto not_chosen = cat_sum - chosen;\n\n    ASSERT_LE(RelError(zero.GetGrad(), not_chosen.GetGrad()), kRtEps);\n    ASSERT_LE(RelError(zero.GetHess(), not_chosen.GetHess()), kRtEps);\n\n    ASSERT_LE(RelError(one.GetGrad(), chosen.GetGrad()), kRtEps);\n    ASSERT_LE(RelError(one.GetHess(), chosen.GetHess()), kRtEps);\n  }\n}\n\nvoid TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {\n  size_t constexpr kRows = 340;\n  bst_bin_t constexpr kBins = 256;\n  auto x = GenerateRandomCategoricalSingleColumn(kRows, n_categories);\n  auto cat_m = GetDMatrixFromData(x, kRows, 1);\n  cat_m->Info().feature_types.HostVector().push_back(FeatureType::kCategorical);\n  Context ctx;\n\n  BatchParam batch_param{0, kBins};\n\n  RegTree tree;\n  CPUExpandEntry node{RegTree::kRoot, tree.GetDepth(RegTree::kRoot)};\n  std::vector<bst_node_t> nodes_to_build;\n  nodes_to_build.push_back(node.nid);\n\n  auto gpair = GenerateRandomGradients(kRows, 0, 2);\n\n  common::RowSetCollection row_set_collection;\n  row_set_collection.Clear();\n  std::vector<bst_idx_t> &row_indices = *row_set_collection.Data();\n  row_indices.resize(kRows);\n  std::iota(row_indices.begin(), row_indices.end(), 0);\n  row_set_collection.Init();\n  HistMakerTrainParam hist_param;\n  std::vector<bst_node_t> dummy_sub;\n\n  common::BlockedSpace2d space{\n      1, [&](std::size_t nidx_in_set) { return row_set_collection[nidx_in_set].Size(); }, 256};\n\n  /**\n   * Generate hist with cat data.\n   */\n  HistogramBuilder cat_hist;\n  for (auto const &gidx : cat_m->GetBatches<GHistIndexMatrix>(&ctx, {kBins, 0.5})) {\n    auto total_bins = gidx.cut.TotalBins();\n    cat_hist.Reset(&ctx, total_bins, {kBins, 0.5}, false, false, &hist_param);\n    cat_hist.AddHistRows(tree.HostScView(), &nodes_to_build, &dummy_sub, false);\n    cat_hist.BuildHist(0, space, gidx, row_set_collection, nodes_to_build,\n                       linalg::MakeTensorView(&ctx, gpair.ConstHostSpan(), gpair.Size()),\n                       force_read_by_column);\n  }\n  cat_hist.SyncHistogram(&ctx, tree.HostScView(), nodes_to_build, {});\n\n  /**\n   * Generate hist with one hot encoded data.\n   */\n  auto x_encoded = OneHotEncodeFeature(x, n_categories);\n  auto encode_m = GetDMatrixFromData(x_encoded, kRows, n_categories);\n  HistogramBuilder onehot_hist;\n  for (auto const &gidx : encode_m->GetBatches<GHistIndexMatrix>(&ctx, {kBins, 0.5})) {\n    auto total_bins = gidx.cut.TotalBins();\n    onehot_hist.Reset(&ctx, total_bins, {kBins, 0.5}, false, false, &hist_param);\n    onehot_hist.AddHistRows(tree.HostScView(), &nodes_to_build, &dummy_sub, false);\n    onehot_hist.BuildHist(0, space, gidx, row_set_collection, nodes_to_build,\n                          linalg::MakeTensorView(&ctx, gpair.ConstHostSpan(), gpair.Size()),\n                          force_read_by_column);\n  }\n  onehot_hist.SyncHistogram(&ctx, tree.HostScView(), nodes_to_build, {});\n\n  auto cat = cat_hist.Histogram()[0];\n  auto onehot = onehot_hist.Histogram()[0];\n  ValidateCategoricalHistogram(n_categories, onehot, cat);\n}\n}  // anonymous namespace\n\nTEST(CPUHistogram, Categorical) {\n  for (size_t n_categories = 2; n_categories < 8; ++n_categories) {\n    TestHistogramCategorical(n_categories, false);\n  }\n  for (size_t n_categories = 2; n_categories < 8; ++n_categories) {\n    TestHistogramCategorical(n_categories, true);\n  }\n}\nnamespace {\nvoid TestHistogramExternalMemory(Context const *ctx, BatchParam batch_param, bool is_approx,\n                                 bool force_read_by_column) {\n  size_t constexpr kEntries = 1 << 16;\n  auto m =\n      RandomDataGenerator{kEntries / 8, 8, 0.0f}.Batches(4).GenerateSparsePageDMatrix(\"temp\", true);\n\n  std::vector<float> hess(m->Info().num_row_, 1.0);\n  if (is_approx) {\n    batch_param.hess = hess;\n  }\n\n  std::vector<bst_idx_t> partition_size(1, 0);\n  bst_bin_t total_bins{0};\n  bst_idx_t n_samples{0};\n\n  auto gpair = GenerateRandomGradients(m->Info().num_row_, 0.0, 1.0);\n  auto const &h_gpair = gpair.HostVector();\n\n  RegTree tree;\n  std::vector<bst_node_t> nodes{RegTree::kRoot};\n  common::BlockedSpace2d space{\n      1, [&](std::size_t nidx_in_set) { return partition_size.at(nidx_in_set); }, 256};\n\n  common::GHistRow multi_page;\n  HistogramBuilder multi_build;\n  HistMakerTrainParam hist_param;\n  std::vector<bst_node_t> dummy_sub;\n  {\n    /**\n     * Multi page\n     */\n    std::vector<common::RowSetCollection> rows_set;\n    for (auto const &page : m->GetBatches<GHistIndexMatrix>(ctx, batch_param)) {\n      CHECK_LT(page.base_rowid, m->Info().num_row_);\n      auto n_rows_in_node = page.Size();\n      partition_size[0] = std::max(partition_size[0], n_rows_in_node);\n      total_bins = page.cut.TotalBins();\n      n_samples += n_rows_in_node;\n\n      rows_set.emplace_back();\n      InitRowPartitionForTest(&rows_set.back(), n_rows_in_node, page.base_rowid);\n    }\n    ASSERT_EQ(n_samples, m->Info().num_row_);\n\n    multi_build.Reset(ctx, total_bins, batch_param, false, false, &hist_param);\n    multi_build.AddHistRows(tree.HostScView(), &nodes, &dummy_sub, false);\n    std::size_t page_idx{0};\n    for (auto const &page : m->GetBatches<GHistIndexMatrix>(ctx, batch_param)) {\n      multi_build.BuildHist(page_idx, space, page, rows_set[page_idx], nodes,\n                            linalg::MakeTensorView(ctx, h_gpair, h_gpair.size()),\n                            force_read_by_column);\n      ++page_idx;\n    }\n    multi_build.SyncHistogram(ctx, tree.HostScView(), nodes, {});\n\n    multi_page = multi_build.Histogram()[RegTree::kRoot];\n  }\n\n  HistogramBuilder single_build;\n  common::GHistRow single_page;\n  {\n    /**\n     * Single page\n     */\n    common::RowSetCollection row_set_collection;\n    InitRowPartitionForTest(&row_set_collection, n_samples);\n\n    single_build.Reset(ctx, total_bins, batch_param, false, false, &hist_param);\n    SparsePage concat;\n    std::vector<float> hess(m->Info().num_row_, 1.0f);\n    for (auto const &page : m->GetBatches<SparsePage>()) {\n      concat.Push(page);\n    }\n\n    auto cut = common::SketchOnDMatrix(ctx, m.get(), batch_param.max_bin, false, hess);\n    GHistIndexMatrix gmat(ctx, concat, {}, cut, batch_param.max_bin, false,\n                          std::numeric_limits<double>::quiet_NaN());\n\n    single_build.AddHistRows(tree.HostScView(), &nodes, &dummy_sub, false);\n    single_build.BuildHist(0, space, gmat, row_set_collection, nodes,\n                           linalg::MakeTensorView(ctx, h_gpair, h_gpair.size()),\n                           force_read_by_column);\n    single_build.SyncHistogram(ctx, tree.HostScView(), nodes, {});\n\n    single_page = single_build.Histogram()[RegTree::kRoot];\n  }\n\n  for (size_t i = 0; i < single_page.size(); ++i) {\n    ASSERT_NEAR(single_page[i].GetGrad(), multi_page[i].GetGrad(), kRtEps);\n    ASSERT_NEAR(single_page[i].GetHess(), multi_page[i].GetHess(), kRtEps);\n  }\n}\n}  // anonymous namespace\n\nTEST(CPUHistogram, ExternalMemory) {\n  int32_t constexpr kBins = 256;\n  Context ctx;\n\n  TestHistogramExternalMemory(&ctx, BatchParam{kBins, common::Span<float>{}, false}, true, false);\n  TestHistogramExternalMemory(&ctx, BatchParam{kBins, common::Span<float>{}, false}, true, true);\n\n  float sparse_thresh{0.5};\n  TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, false);\n  TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, true);\n  sparse_thresh = std::numeric_limits<float>::quiet_NaN();\n  TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, false);\n  TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, true);\n}\n\nnamespace {\nclass OverflowTest : public ::testing::TestWithParam<std::tuple<bool, bool>> {\n public:\n  std::vector<GradientPairPrecise> TestOverflow(bool limit, bool is_distributed,\n                                                bool is_col_split) {\n    bst_bin_t constexpr kBins = 256;\n    Context ctx;\n    HistMakerTrainParam hist_param;\n    if (limit) {\n      hist_param.Init(Args{{\"max_cached_hist_node\", \"1\"}});\n    }\n\n    std::shared_ptr<DMatrix> Xy =\n        is_col_split ? RandomDataGenerator{8192, 16, 0.5}.GenerateDMatrix(true)\n                     : RandomDataGenerator{8192, 16, 0.5}.Bins(kBins).GenerateQuantileDMatrix(true);\n    if (is_col_split) {\n      Xy =\n          std::shared_ptr<DMatrix>{Xy->SliceCol(collective::GetWorldSize(), collective::GetRank())};\n    }\n\n    double sparse_thresh{TrainParam::DftSparseThreshold()};\n    auto batch = BatchParam{kBins, sparse_thresh};\n    bst_bin_t n_total_bins{0};\n    float split_cond{0};\n    for (auto const &page : Xy->GetBatches<GHistIndexMatrix>(&ctx, batch)) {\n      n_total_bins = page.cut.TotalBins();\n      // use a cut point in the second column for split\n      split_cond = page.cut.Values()[kBins + kBins / 2];\n    }\n\n    RegTree tree;\n    MultiHistogramBuilder hist_builder;\n    CHECK_EQ(Xy->Info().IsColumnSplit(), is_col_split);\n\n    hist_builder.Reset(&ctx, n_total_bins, tree.NumTargets(), batch, is_distributed,\n                       Xy->Info().IsColumnSplit(), &hist_param);\n\n    std::vector<CommonRowPartitioner> partitioners;\n    partitioners.emplace_back(&ctx, Xy->Info().num_row_, /*base_rowid=*/0,\n                              Xy->Info().IsColumnSplit());\n\n    auto gpair = GenerateRandomGradients(Xy->Info().num_row_, 0.0, 1.0);\n\n    CPUExpandEntry best;\n    hist_builder.BuildRootHist(Xy.get(), tree.HostScView(), partitioners,\n                               linalg::MakeTensorView(&ctx, gpair.ConstHostSpan(), gpair.Size(), 1),\n                               best, batch);\n\n    best.split.Update(1.0f, 1, split_cond, false, false, GradStats{1.0, 1.0}, GradStats{1.0, 1.0});\n    tree.ExpandNode(best.nid, best.split.SplitIndex(), best.split.split_value, false,\n                    /*base_weight=*/2.0f,\n                    /*left_leaf_weight=*/1.0f, /*right_leaf_weight=*/1.0f, best.GetLossChange(),\n                    /*sum_hess=*/2.0f, best.split.left_sum.GetHess(),\n                    best.split.right_sum.GetHess());\n\n    std::vector<CPUExpandEntry> valid_candidates{best};\n    for (auto const &page : Xy->GetBatches<GHistIndexMatrix>(&ctx, batch)) {\n      partitioners.front().UpdatePosition(&ctx, page, valid_candidates, tree.HostScView());\n    }\n    CHECK_NE(partitioners.front()[tree.LeftChild(best.nid)].Size(), 0);\n    CHECK_NE(partitioners.front()[tree.RightChild(best.nid)].Size(), 0);\n\n    hist_builder.BuildHistLeftRight(\n        &ctx, Xy.get(), tree.HostScView(), partitioners, valid_candidates,\n        linalg::MakeTensorView(&ctx, gpair.ConstHostSpan(), gpair.Size(), 1), batch);\n\n    if (limit) {\n      CHECK(!hist_builder.Histogram(0).HistogramExists(best.nid));\n    } else {\n      CHECK(hist_builder.Histogram(0).HistogramExists(best.nid));\n    }\n\n    std::vector<GradientPairPrecise> result;\n    auto hist = hist_builder.Histogram(0)[tree.LeftChild(best.nid)];\n    std::copy(hist.cbegin(), hist.cend(), std::back_inserter(result));\n    hist = hist_builder.Histogram(0)[tree.RightChild(best.nid)];\n    std::copy(hist.cbegin(), hist.cend(), std::back_inserter(result));\n\n    return result;\n  }\n\n  void RunTest() {\n    auto param = GetParam();\n    auto res0 = this->TestOverflow(false, std::get<0>(param), std::get<1>(param));\n    auto res1 = this->TestOverflow(true, std::get<0>(param), std::get<1>(param));\n    ASSERT_EQ(res0, res1);\n  }\n};\n\nauto MakeParamsForTest() {\n  std::vector<std::tuple<bool, bool>> configs;\n  for (auto i : {true, false}) {\n    for (auto j : {true, false}) {\n      configs.emplace_back(i, j);\n    }\n  }\n  return configs;\n}\n}  // anonymous namespace\n\nTEST_P(OverflowTest, Overflow) { this->RunTest(); }\n\nINSTANTIATE_TEST_SUITE_P(CPUHistogram, OverflowTest, ::testing::ValuesIn(MakeParamsForTest()));\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "tests/cpp/tree/hist/test_sampler.cc",
    "content": "/**\n * Copyright 2023-2026, XGBoost Contributors\n */\n#include \"../test_sampler.h\"  // VerifyApplySamplingMask\n\n#include <gtest/gtest.h>\n\n#include <algorithm>  // std::sort\n#include <cmath>      // std::exp\n#include <cstddef>    // std::size_t\n#include <numeric>    // std::partial_sum\n#include <string>     // std::to_string\n#include <vector>     // std::vector\n\n#include \"../../../../src/tree/fit_stump.h\"     // SumGradients\n#include \"../../../../src/tree/hist/sampler.h\"  // Sampler\n#include \"../../../../src/tree/param.h\"         // TrainParam\n#include \"../../helpers.h\"                      // GenerateRandomGradients\n#include \"xgboost/base.h\"                       // GradientPair,bst_target_t\n#include \"xgboost/context.h\"                    // Context\n#include \"xgboost/data.h\"                       // MetaInfo\n#include \"xgboost/linalg.h\"                     // Matrix,Constants\n\nnamespace xgboost::tree::cpu_impl {\nvoid VerifySampling(float subsample, int sampling_method, bst_target_t n_targets = 1,\n                    bool check_sum = true) {\n  Context ctx;\n\n  constexpr std::size_t kRows = 4096;\n  // Generate random gradients\n  auto gpair_container = GenerateRandomGradients(&ctx, kRows, n_targets);\n  auto h_gpair = gpair_container.gpair.HostView();\n\n  auto sum_gradients = [&]() {\n    auto sum = linalg::Empty<GradientPairPrecise>(&ctx, n_targets);\n    cpu_impl::SumGradients(&ctx, h_gpair, sum.HostView());\n    return sum.Data()->HostVector();\n  };\n\n  auto sum_gpair = sum_gradients();\n\n  TrainParam param;\n  param.UpdateAllowUnknown(Args{\n      {\"subsample\", std::to_string(subsample)},\n      {\"sampling_method\", sampling_method == TrainParam::kUniform ? \"uniform\" : \"gradient_based\"}});\n  Sampler sampler{param};\n  sampler.Sample(&ctx, h_gpair);\n\n  auto sum_sampled_gpair = sum_gradients();\n  CheckSampling(subsample, n_targets, check_sum, sum_sampled_gpair, sum_gpair, h_gpair);\n}\n\nTEST(CpuSampler, NoSampling) {\n  constexpr float kSubsample = 1.0f;\n  constexpr int kSamplingMethod = TrainParam::kUniform;\n  VerifySampling(kSubsample, kSamplingMethod);\n}\n\nTEST(CpuSampler, UniformSampling) {\n  constexpr float kSubsample = 0.5;\n  constexpr int kSamplingMethod = TrainParam::kUniform;\n  // Uniform sampling preserves the mean, not the sum (check_sum = false)\n  constexpr bool kCheckSum = false;\n  // Single target\n  VerifySampling(kSubsample, kSamplingMethod, 1, kCheckSum);\n  // Multi-target\n  VerifySampling(kSubsample, kSamplingMethod, 3, kCheckSum);\n}\n\nTEST(CpuSampler, GradientBasedSampling) {\n  constexpr float kSubsample = 0.8;\n  constexpr int kSamplingMethod = TrainParam::kGradientBased;\n  VerifySampling(kSubsample, kSamplingMethod, 1);\n  VerifySampling(kSubsample, kSamplingMethod, 3);\n}\n\nTEST(CpuSampler, ApplySampling) {\n  Context ctx;\n\n  std::size_t n_samples = 1024;\n  std::size_t n_split_targets = 2, n_value_targets = 4;\n  constexpr float kSubsample = 0.5f;\n\n  TrainParam param;\n  param.UpdateAllowUnknown(\n      Args{{\"subsample\", std::to_string(kSubsample)}, {\"sampling_method\", \"gradient_based\"}});\n\n  // Generate and sample the split gradient\n  std::size_t split_shape[2] = {n_samples, n_split_targets};\n  linalg::Matrix<GradientPair> split_gpair{split_shape, ctx.Device()};\n  *split_gpair.Data() = GenerateRandomGradients(n_samples * n_split_targets, 0.0f, 1.0f);\n  linalg::Matrix<GradientPair> split_gpair_before{split_shape, ctx.Device()};\n  auto h_split_before = split_gpair_before.HostView();\n  auto h_split_init = split_gpair.HostView();\n  std::copy(linalg::cbegin(h_split_init), linalg::cend(h_split_init),\n            linalg::begin(h_split_before));\n  Sampler sampler{param};\n  sampler.Sample(&ctx, split_gpair.HostView());\n\n  // Generate value gradient (more targets than split)\n  std::size_t value_shape[2] = {n_samples, n_value_targets};\n  linalg::Matrix<GradientPair> value_gpair{value_shape, ctx.Device()};\n  *value_gpair.Data() = GenerateRandomGradients(n_samples * n_value_targets, 0.0f, 1.0f);\n  linalg::Matrix<GradientPair> value_gpair_before{value_shape, ctx.Device()};\n  value_gpair_before.Data()->Copy(*value_gpair.Data());\n\n  sampler.ApplySampling(&ctx, split_gpair.HostView(), &value_gpair);\n  CheckSamplingMask(split_gpair.HostView(), value_gpair.HostView(), kSubsample);\n  auto h_value_before = value_gpair_before.HostView();\n\n  auto h_value_after = value_gpair.HostView();\n  std::vector<float> thresholds;\n  auto reg_abs_grad = cpu_impl::CalcRegAbsGrad(&ctx, h_value_before, &thresholds);\n\n  std::vector<float> grad_csum(n_samples);\n  std::partial_sum(thresholds.begin(), thresholds.end() - 1, grad_csum.begin());\n  float threshold =\n      cpu_impl::CalculateThreshold(common::Span<float const>{thresholds.data(), thresholds.size()},\n                                   common::Span<float const>{grad_csum.data(), grad_csum.size()},\n                                   n_samples, static_cast<bst_idx_t>(n_samples * kSubsample));\n\n  CheckValueReweight(split_gpair.HostView(), h_value_before, h_value_after, reg_abs_grad,\n                     threshold);\n}\n}  // namespace xgboost::tree::cpu_impl\n"
  },
  {
    "path": "tests/cpp/tree/test_approx.cc",
    "content": "/**\n * Copyright 2021-2025, XGBoost contributors.\n */\n#include <gtest/gtest.h>\n#include <xgboost/gradient.h>      // for GradientContainer\n#include <xgboost/tree_model.h>    // for RegTree\n#include <xgboost/tree_updater.h>  // for TreeUpdater\n\n#include <algorithm>  // for transform\n#include <limits>\n#include <memory>  // for unique_ptr\n#include <vector>  // for vector\n\n#include \"../../../src/tree/common_row_partitioner.h\"\n#include \"../../../src/tree/param.h\"    // for TrainParam\n#include \"../collective/test_worker.h\"  // for TestDistributedGlobal\n#include \"../helpers.h\"\n#include \"test_column_split.h\"  // for TestColumnSplit\n#include \"test_partitioner.h\"\n\nnamespace xgboost::tree {\nnamespace {\nstd::vector<float> GenerateHess(size_t n_samples) {\n  auto grad = GenerateRandomGradients(n_samples);\n  std::vector<float> hess(grad.Size());\n  std::transform(grad.HostVector().cbegin(), grad.HostVector().cend(), hess.begin(),\n                 [](auto gpair) { return gpair.GetHess(); });\n  return hess;\n}\n}  // anonymous namespace\n\nTEST(Approx, Partitioner) {\n  size_t n_samples = 1024, n_features = 1, base_rowid = 0;\n  Context ctx;\n  ctx.InitAllowUnknown(Args{});\n  CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};\n  ASSERT_EQ(partitioner.base_rowid, base_rowid);\n  ASSERT_EQ(partitioner.Size(), 1);\n  ASSERT_EQ(partitioner.Partitions()[0].Size(), n_samples);\n\n  auto const Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);\n  auto hess = GenerateHess(n_samples);\n  std::vector<CPUExpandEntry> candidates{{0, 0}};\n  candidates.front().split.loss_chg = 0.4;\n\n  for (auto const& page : Xy->GetBatches<GHistIndexMatrix>(&ctx, {64, hess, true})) {\n    bst_feature_t const split_ind = 0;\n    {\n      auto min_value = -std::numeric_limits<float>::infinity();\n      RegTree tree;\n      CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};\n      GetSplit(&tree, min_value, &candidates);\n      partitioner.UpdatePosition(&ctx, page, candidates, tree.HostScView());\n      ASSERT_EQ(partitioner.Size(), 3);\n      ASSERT_EQ(partitioner[1].Size(), 0);\n      ASSERT_EQ(partitioner[2].Size(), n_samples);\n    }\n    {\n      CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};\n      auto ptr = page.cut.Ptrs()[split_ind + 1];\n      float split_value = page.cut.Values().at(ptr / 2);\n      RegTree tree;\n      GetSplit(&tree, split_value, &candidates);\n      partitioner.UpdatePosition(&ctx, page, candidates, tree.HostScView());\n\n      {\n        auto left_nidx = tree[RegTree::kRoot].LeftChild();\n        auto const& elem = partitioner[left_nidx];\n        ASSERT_LT(elem.Size(), n_samples);\n        ASSERT_GT(elem.Size(), 1);\n        for (auto& it : elem) {\n          auto value = page.cut.Values().at(page.index[it]);\n          ASSERT_LE(value, split_value);\n        }\n      }\n      {\n        auto right_nidx = tree[RegTree::kRoot].RightChild();\n        auto const& elem = partitioner[right_nidx];\n        for (auto& it : elem) {\n          auto value = page.cut.Values().at(page.index[it]);\n          ASSERT_GT(value, split_value) << it;\n        }\n      }\n    }\n  }\n}\n\nTEST(Approx, InteractionConstraint) {\n  auto constexpr kRows = 32;\n  auto constexpr kCols = 16;\n  auto p_dmat = GenerateCatDMatrix(kRows, kCols, 0.6f, false);\n  Context ctx;\n\n  GradientContainer gpair = GenerateRandomGradients(&ctx, kRows, 1);\n\n  ObjInfo task{ObjInfo::kRegression};\n  {\n    // With constraints\n    RegTree tree{1, kCols};\n\n    std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(\"grow_histmaker\", &ctx, &task)};\n    TrainParam param;\n    param.UpdateAllowUnknown(\n        Args{{\"interaction_constraints\", \"[[0, 1]]\"}, {\"num_feature\", std::to_string(kCols)}});\n    std::vector<HostDeviceVector<bst_node_t>> position(1);\n    updater->Configure(Args{});\n    updater->Update(&param, &gpair, p_dmat.get(), position, {&tree});\n\n    ASSERT_EQ(tree.NumExtraNodes(), 4);\n    ASSERT_EQ(tree[0].SplitIndex(), 1);\n\n    ASSERT_EQ(tree[tree[0].LeftChild()].SplitIndex(), 0);\n    ASSERT_EQ(tree[tree[0].RightChild()].SplitIndex(), 0);\n  }\n  {\n    // Without constraints\n    RegTree tree{1u, kCols};\n\n    std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(\"grow_histmaker\", &ctx, &task)};\n    std::vector<HostDeviceVector<bst_node_t>> position(1);\n    TrainParam param;\n    param.Init(Args{});\n    updater->Configure(Args{});\n    updater->Update(&param, &gpair, p_dmat.get(), position, {&tree});\n\n    ASSERT_EQ(tree.NumExtraNodes(), 10);\n    ASSERT_EQ(tree[0].SplitIndex(), 1);\n\n    ASSERT_NE(tree[tree[0].LeftChild()].SplitIndex(), 0);\n    ASSERT_NE(tree[tree[0].RightChild()].SplitIndex(), 0);\n  }\n}\n\nnamespace {\nvoid TestColumnSplitPartitioner(size_t n_samples, size_t base_rowid, std::shared_ptr<DMatrix> Xy,\n                                std::vector<float>* hess, float min_value, float mid_value,\n                                CommonRowPartitioner const& expected_mid_partitioner) {\n  auto dmat =\n      std::unique_ptr<DMatrix>{Xy->SliceCol(collective::GetWorldSize(), collective::GetRank())};\n  std::vector<CPUExpandEntry> candidates{{0, 0}};\n  candidates.front().split.loss_chg = 0.4;\n\n  Context ctx;\n  ctx.InitAllowUnknown(Args{});\n  for (auto const& page : dmat->GetBatches<GHistIndexMatrix>(&ctx, {64, *hess, true})) {\n    {\n      RegTree tree;\n      CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true};\n      GetSplit(&tree, min_value, &candidates);\n      partitioner.UpdatePosition(&ctx, page, candidates, tree.HostScView());\n      ASSERT_EQ(partitioner.Size(), 3);\n      ASSERT_EQ(partitioner[1].Size(), 0);\n      ASSERT_EQ(partitioner[2].Size(), n_samples);\n    }\n    {\n      CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true};\n      RegTree tree;\n      GetSplit(&tree, mid_value, &candidates);\n      partitioner.UpdatePosition(&ctx, page, candidates, tree.HostScView());\n      {\n        auto left_nidx = tree[RegTree::kRoot].LeftChild();\n        auto const& elem = partitioner[left_nidx];\n        ASSERT_LT(elem.Size(), n_samples);\n        ASSERT_GT(elem.Size(), 1);\n        auto const& expected_elem = expected_mid_partitioner[left_nidx];\n        ASSERT_EQ(elem.Size(), expected_elem.Size());\n        for (auto it = elem.begin(), eit = expected_elem.begin(); it != elem.end(); ++it, ++eit) {\n          ASSERT_EQ(*it, *eit);\n        }\n      }\n      {\n        auto right_nidx = tree[RegTree::kRoot].RightChild();\n        auto const& elem = partitioner[right_nidx];\n        auto const& expected_elem = expected_mid_partitioner[right_nidx];\n        ASSERT_EQ(elem.Size(), expected_elem.Size());\n        for (auto it = elem.begin(), eit = expected_elem.begin(); it != elem.end(); ++it, ++eit) {\n          ASSERT_EQ(*it, *eit);\n        }\n      }\n    }\n  }\n}\n}  // anonymous namespace\n\nTEST(Approx, PartitionerColumnSplit) {\n  size_t n_samples = 1024, n_features = 16, base_rowid = 0;\n  auto const Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);\n  auto hess = GenerateHess(n_samples);\n  std::vector<CPUExpandEntry> candidates{{0, 0}};\n  candidates.front().split.loss_chg = 0.4;\n\n  float min_value, mid_value;\n  Context ctx;\n  ctx.InitAllowUnknown(Args{});\n  CommonRowPartitioner mid_partitioner{&ctx, n_samples, base_rowid, false};\n  for (auto const& page : Xy->GetBatches<GHistIndexMatrix>(&ctx, {64, hess, true})) {\n    bst_feature_t const split_ind = 0;\n    min_value = -std::numeric_limits<float>::infinity();\n\n    auto ptr = page.cut.Ptrs()[split_ind + 1];\n    mid_value = page.cut.Values().at(ptr / 2);\n    RegTree tree;\n    GetSplit(&tree, mid_value, &candidates);\n    mid_partitioner.UpdatePosition(&ctx, page, candidates, tree.HostScView());\n  }\n\n  auto constexpr kWorkers = 4;\n  collective::TestDistributedGlobal(kWorkers, [&] {\n    TestColumnSplitPartitioner(n_samples, base_rowid, Xy, &hess, min_value, mid_value,\n                               mid_partitioner);\n  });\n}\n\nnamespace {\nclass TestApproxColumnSplit : public ::testing::TestWithParam<std::tuple<bool, float>> {\n public:\n  void Run() {\n    auto [categorical, sparsity] = GetParam();\n    TestColumnSplit(1u, categorical, \"grow_histmaker\", sparsity);\n  }\n};\n}  // namespace\n\nTEST_P(TestApproxColumnSplit, Basic) { this->Run(); }\n\nINSTANTIATE_TEST_SUITE_P(ColumnSplit, TestApproxColumnSplit, ::testing::ValuesIn([]() {\n                           std::vector<std::tuple<bool, float>> params;\n                           for (auto categorical : {true, false}) {\n                             for (auto sparsity : {0.0f, 0.6f}) {\n                               params.emplace_back(categorical, sparsity);\n                             }\n                           }\n                           return params;\n                         }()));\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "tests/cpp/tree/test_column_split.cc",
    "content": "/**\n * Copyright 2024, XGBoost Contributors\n */\n#include \"test_column_split.h\"\n\n#include <gtest/gtest.h>\n#include <xgboost/tree_model.h>    // for RegTree\n#include <xgboost/tree_updater.h>  // for TreeUpdater\n\n#include <vector>  // for vector\n\n#include \"../../../src/tree/param.h\"    // for TrainParam\n#include \"../collective/test_worker.h\"  // for TestDistributedGlobal\n\nnamespace xgboost::tree {\nvoid TestColumnSplit(bst_target_t n_targets, bool categorical, std::string name, float sparsity) {\n  auto constexpr kRows = 32;\n  auto constexpr kCols = 16;\n\n  RegTree expected_tree{n_targets, static_cast<bst_feature_t>(kCols)};\n  ObjInfo task{ObjInfo::kRegression};\n  Context ctx;\n  {\n    auto p_dmat = GenerateCatDMatrix(kRows, kCols, sparsity, categorical);\n    auto gpair = GenerateRandomGradients(&ctx, kRows, n_targets);\n    std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(name, &ctx, &task)};\n    std::vector<HostDeviceVector<bst_node_t>> position(1);\n    TrainParam param;\n    param.Init(Args{});\n    updater->Configure(Args{});\n    updater->Update(&param, &gpair, p_dmat.get(), position, {&expected_tree});\n  }\n\n  auto constexpr kWorldSize = 2;\n\n  auto verify = [&] {\n    Context ctx;\n    collective::GetWorkerLocalThreads(kWorldSize, &ctx);\n\n    auto p_dmat = GenerateCatDMatrix(kRows, kCols, sparsity, categorical);\n    auto gpair = GenerateRandomGradients(&ctx, kRows, n_targets);\n\n    ObjInfo task{ObjInfo::kRegression};\n    std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(name, &ctx, &task)};\n    std::vector<HostDeviceVector<bst_node_t>> position(1);\n\n    std::unique_ptr<DMatrix> sliced{\n        p_dmat->SliceCol(collective::GetWorldSize(), collective::GetRank())};\n\n    RegTree tree{n_targets, static_cast<bst_feature_t>(kCols)};\n    TrainParam param;\n    param.Init(Args{});\n    updater->Configure(Args{});\n    updater->Update(&param, &gpair, sliced.get(), position, {&tree});\n\n    Json json{Object{}};\n    tree.SaveModel(&json);\n    Json expected_json{Object{}};\n    expected_tree.SaveModel(&expected_json);\n    ASSERT_EQ(json, expected_json);\n  };\n\n  collective::TestDistributedGlobal(kWorldSize, [&] { verify(); });\n}\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "tests/cpp/tree/test_column_split.h",
    "content": "/**\n * Copyright 2023-2024, XGBoost Contributors\n */\n#pragma once\n\n#include <xgboost/data.h>          // for FeatureType, DMatrix\n\n#include <cstddef>  // for size_t\n#include <memory>   // for shared_ptr\n#include <vector>   // for vector\n\n#include \"../helpers.h\"                 // for RandomDataGenerator\n\nnamespace xgboost::tree {\ninline std::shared_ptr<DMatrix> GenerateCatDMatrix(std::size_t rows, std::size_t cols,\n                                                   float sparsity, bool categorical) {\n  if (categorical) {\n    std::vector<FeatureType> ft(cols);\n    for (size_t i = 0; i < ft.size(); ++i) {\n      ft[i] = (i % 3 == 0) ? FeatureType::kNumerical : FeatureType::kCategorical;\n    }\n    return RandomDataGenerator(rows, cols, sparsity)\n        .Seed(3)\n        .Type(ft)\n        .MaxCategory(17)\n        .GenerateDMatrix();\n  } else {\n    return RandomDataGenerator{rows, cols, sparsity}.Seed(3).GenerateDMatrix();\n  }\n}\n\nvoid TestColumnSplit(bst_target_t n_targets, bool categorical, std::string name, float sparsity);\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "tests/cpp/tree/test_common_partitioner.cc",
    "content": "/**\n * Copyright 2022-2024, XGBoost contributors.\n */\n#include <gtest/gtest.h>\n#include <xgboost/base.h>                         // for bst_node_t\n#include <xgboost/context.h>                      // for Context\n\n#include <algorithm>                              // for transform\n#include <iterator>                               // for distance\n#include <vector>                                 // for vector\n\n#include \"../../../src/common/numeric.h\"          // for ==RunLengthEncode\n#include \"../../../src/common/row_set.h\"          // for RowSetCollection\n#include \"../../../src/data/gradient_index.h\"     // for GHistIndexMatrix\n#include \"../../../src/tree/common_row_partitioner.h\"\n#include \"../../../src/tree/hist/expand_entry.h\"  // for CPUExpandEntry\n#include \"../helpers.h\"                           // for RandomDataGenerator\n#include \"test_partitioner.h\"                     // for GetSplit\n\nnamespace xgboost::tree {\nnamespace {\nvoid TestLeafPartition(size_t n_samples) {\n  size_t const n_features = 2, base_rowid = 0;\n  Context ctx;\n  common::RowSetCollection row_set;\n  CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};\n\n  auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);\n  std::vector<CPUExpandEntry> candidates{{0, 0}};\n  candidates.front().split.loss_chg = 0.4;\n  RegTree tree;\n  std::vector<float> hess(n_samples, 0);\n  // emulate sampling\n  auto not_sampled = [](size_t i) {\n    size_t const kSampleFactor{3};\n    return i % kSampleFactor != 0;\n  };\n  for (size_t i = 0; i < hess.size(); ++i) {\n    if (not_sampled(i)) {\n      hess[i] = 1.0f;\n    }\n  }\n\n  std::vector<size_t> h_nptr;\n  float split_value{0};\n  bst_feature_t const split_ind = 0;\n\n  for (auto const& page : Xy->GetBatches<GHistIndexMatrix>(&ctx, BatchParam{64, 0.2})) {\n    auto ptr = page.cut.Ptrs()[split_ind + 1];\n    split_value = page.cut.Values().at(ptr / 2);\n    GetSplit(&tree, split_value, &candidates);\n    partitioner.UpdatePosition(&ctx, page, candidates, tree.HostScView());\n    std::vector<bst_node_t> position(page.Size());\n    partitioner.LeafPartition(&ctx, tree.HostScView(), hess, position);\n    std::sort(position.begin(), position.end());\n    size_t beg = std::distance(\n        position.begin(),\n        std::find_if(position.begin(), position.end(), [&](bst_node_t nidx) { return nidx >= 0; }));\n    std::vector<size_t> nptr;\n    common::RunLengthEncode(position.cbegin() + beg, position.cend(), &nptr);\n    std::transform(nptr.begin(), nptr.end(), nptr.begin(), [&](size_t x) { return x + beg; });\n    auto n_uniques = std::unique(position.begin() + beg, position.end()) - (position.begin() + beg);\n    ASSERT_EQ(nptr.size(), n_uniques + 1);\n    ASSERT_EQ(nptr[0], beg);\n    ASSERT_EQ(nptr.back(), n_samples);\n\n    h_nptr = nptr;\n  }\n\n  if (h_nptr.front() == n_samples) {\n    return;\n  }\n\n  ASSERT_GE(h_nptr.size(), 2);\n\n  for (auto const& page : Xy->GetBatches<SparsePage>()) {\n    auto batch = page.GetView();\n    size_t left{0};\n    for (size_t i = 0; i < batch.Size(); ++i) {\n      if (not_sampled(i) && batch[i][split_ind].fvalue < split_value) {\n        left++;\n      }\n    }\n    ASSERT_EQ(left, h_nptr[1] - h_nptr[0]);  // equal to number of sampled assigned to left\n  }\n}\n\nvoid TestExternalMemory() {\n  Context ctx;\n  bst_bin_t max_bin = 32;\n  auto p_fmat =\n      RandomDataGenerator{256, 16, 0.0f}.Batches(4).GenerateSparsePageDMatrix(\"temp\", true);\n  std::vector<CommonRowPartitioner> partitioners;\n\n  RegTree tree;\n  std::vector<CPUExpandEntry> candidates{{0, 0}};\n\n  auto gpair = GenerateRandomGradients(p_fmat->Info().num_row_);\n  auto t_gpair = linalg::MakeTensorView(&ctx, gpair.ConstHostSpan(), p_fmat->Info().num_row_, 1);\n  std::vector<bst_node_t> position(p_fmat->Info().num_row_);\n\n  auto param = BatchParam{max_bin, TrainParam::DftSparseThreshold()};\n  float split_value{0.0f};\n  bst_feature_t const split_ind = 0;\n  for (auto const& page : p_fmat->GetBatches<GHistIndexMatrix>(&ctx, param)) {\n    if (partitioners.empty()) {\n      auto ptr = page.cut.Ptrs()[split_ind + 1];\n      split_value = page.cut.Values().at(ptr / 2);\n      GetSplit(&tree, split_value, &candidates);\n    }\n\n    partitioners.emplace_back(&ctx, page.Size(), page.base_rowid, false);\n    partitioners.back().UpdatePosition(&ctx, page, candidates, tree.HostScView());\n    partitioners.back().LeafPartition(&ctx, tree.HostScView(), t_gpair, position);\n  }\n\n  bst_idx_t n_left{0};\n  for (auto const& page : p_fmat->GetBatches<SparsePage>()) {\n    auto batch = page.GetView();\n    for (size_t i = 0; i < batch.Size(); ++i) {\n      ASSERT_EQ(batch[i].size(), 16);\n      if (batch[i][split_ind].fvalue < split_value) {\n        n_left++;\n      }\n    }\n  }\n  auto n_left_pos = std::count_if(position.cbegin(), position.cend(),\n                                  [&](auto v) { return v == tree[RegTree::kRoot].LeftChild(); });\n  ASSERT_EQ(n_left, n_left_pos);\n  std::sort(position.begin(), position.end());\n  auto end_it = std::unique(position.begin(), position.end());\n  ASSERT_EQ(std::distance(position.begin(), end_it), 2);\n}\n}  // anonymous namespace\n\nTEST(CommonRowPartitioner, LeafPartition) {\n  for (auto n_samples : {0ul, 1ul, 128ul, 256ul}) {\n    TestLeafPartition(n_samples);\n  }\n}\n\nTEST(CommonRowPartitioner, LeafPartitionExternalMemory) { TestExternalMemory(); }\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "tests/cpp/tree/test_constraints.cc",
    "content": "/**\n * Copyright 2019-2026, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/base.h>\n#include <xgboost/logging.h>\n\n#include <memory>\n#include <string>\n\n#include \"../../../src/tree/constraints.h\"\n#include \"../../../src/tree/hist/evaluate_splits.h\"\n#include \"../helpers.h\"\n\nnamespace xgboost::tree {\nTEST(CPUFeatureInteractionConstraint, Empty) {\n  TrainParam param;\n  param.UpdateAllowUnknown(Args{});\n  bst_feature_t constexpr kFeatures = 6;\n\n  FeatureInteractionConstraintHost constraints;\n  constraints.Configure(param, kFeatures);\n\n  // no-op\n  constraints.Split(/*node_id=*/0, /*feature_id=*/2, /*left_id=*/1, /*right_id=*/2);\n\n  std::vector<bst_feature_t> h_input_feature_list{0, 1, 2, 3, 4, 5};\n  common::Span<bst_feature_t> s_input_feature_list =\n      common::Span<bst_feature_t>{h_input_feature_list};\n\n  for (auto f : h_input_feature_list) {\n    constraints.Query(f, 1);\n  }\n\n  // no-op\n  ASSERT_TRUE(constraints.Query(94389, 12309));\n}\n\nTEST(CPUFeatureInteractionConstraint, Basic) {\n  std::string const constraints_str = R\"constraint([[1, 2], [2, 3, 4]])constraint\";\n\n  std::vector<std::pair<std::string, std::string>> args{\n      {\"interaction_constraints\", constraints_str}};\n  TrainParam param;\n  param.interaction_constraints = constraints_str;\n  bst_feature_t constexpr kFeatures = 6;\n\n  FeatureInteractionConstraintHost constraints;\n  constraints.Configure(param, kFeatures);\n  constraints.Split(/*node_id=*/0, /*feature_id=*/2, /*left_id=*/1, /*right_id=*/2);\n\n  std::vector<bst_feature_t> h_input_feature_list{0, 1, 2, 3, 4, 5};\n\n  ASSERT_TRUE(constraints.Query(1, 1));\n  ASSERT_TRUE(constraints.Query(1, 2));\n  ASSERT_TRUE(constraints.Query(1, 3));\n  ASSERT_TRUE(constraints.Query(1, 4));\n\n  ASSERT_FALSE(constraints.Query(1, 0));\n  ASSERT_FALSE(constraints.Query(1, 5));\n}\n\nTEST(CPUMonoConstraint, Basic) {\n  std::size_t kRows{64}, kCols{16};\n  Context ctx;\n\n  TrainParam param;\n  std::vector<std::int32_t> mono(kCols, 1);\n  I32Array arr;\n  for (std::size_t i = 0; i < kCols; ++i) {\n    arr.GetArray().push_back(mono[i]);\n  }\n  Json jarr{std::move(arr)};\n  std::string str_mono;\n  Json::Dump(jarr, &str_mono);\n  str_mono.front() = '(';\n  str_mono.back() = ')';\n\n  param.UpdateAllowUnknown(Args{{\"monotone_constraints\", str_mono}});\n\n  auto Xy = RandomDataGenerator{kRows, kCols, 0.0}.GenerateDMatrix(true);\n  auto sampler = std::make_shared<common::ColumnSampler>();\n\n  HistEvaluator evalutor{&ctx, &param, Xy->Info(), sampler};\n  evalutor.InitRoot(GradStats{2.0, 2.0});\n\n  SplitEntry split;\n  split.Update(1.0f, 0, 3.0, false, false, GradStats{1.0, 1.0}, GradStats{1.0, 1.0});\n  CPUExpandEntry entry{0, 0, split};\n  RegTree tree{1, static_cast<bst_feature_t>(kCols)};\n  evalutor.ApplyTreeSplit(entry, &tree);\n\n  ASSERT_TRUE(evalutor.Evaluator().has_constraint);\n}\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "tests/cpp/tree/test_constraints.cu",
    "content": "/**\n * Copyright 2019-2026, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <thrust/copy.h>\n#include <thrust/device_vector.h>\n\n#include <cstdint>\n#include <set>\n#include <string>\n\n#include \"../../../src/common/device_helpers.cuh\"\n#include \"../../../src/tree/constraints.cuh\"\n#include \"../../../src/tree/param.h\"\n#include \"../helpers.h\"  // MakeCUDACtx\n\nnamespace xgboost {\nnamespace {\n\nstruct FConstraintWrapper : public FeatureInteractionConstraintDevice {\n  common::Span<LBitField64> GetNodeConstraints() {\n    return FeatureInteractionConstraintDevice::s_node_constraints_;\n  }\n  FConstraintWrapper(tree::TrainParam param, bst_feature_t n_features) :\n      FeatureInteractionConstraintDevice(param, n_features) {}\n\n  dh::device_vector<bst_feature_t> const& GetDSets() const {\n    return d_sets_;\n  }\n  dh::device_vector<size_t> const& GetDSetsPtr() const {\n    return d_sets_ptr_;\n  }\n};\n\nstd::string GetConstraintsStr() {\n  std::string const constraints_str = R\"constraint([[1, 2], [3, 4, 5]])constraint\";\n  return constraints_str;\n}\n\ntree::TrainParam GetParameter() {\n  Args args{{\"interaction_constraints\", GetConstraintsStr()}};\n  tree::TrainParam param;\n  param.Init(args);\n  return param;\n}\n\nvoid CompareBitField(LBitField64 d_field, std::set<uint32_t> positions) {\n  std::vector<LBitField64::value_type> h_field_storage(d_field.Bits().size());\n  thrust::copy(thrust::device_ptr<LBitField64::value_type>(d_field.Bits().data()),\n               thrust::device_ptr<LBitField64::value_type>(\n                   d_field.Bits().data() + d_field.Bits().size()),\n               h_field_storage.data());\n  LBitField64 h_field{ {h_field_storage.data(),\n                        h_field_storage.data() + h_field_storage.size()} };\n\n  for (size_t i = 0; i < h_field.Capacity(); ++i) {\n    if (positions.find(i) != positions.cend()) {\n      ASSERT_TRUE(h_field.Check(i));\n    } else {\n      ASSERT_FALSE(h_field.Check(i));\n    }\n  }\n}\n\n}  // anonymous namespace\n\n\nTEST(GPUFeatureInteractionConstraint, Init) {\n  {\n    int32_t constexpr kFeatures = 6;\n    tree::TrainParam param = GetParameter();\n    FConstraintWrapper constraints(param, kFeatures);\n    ASSERT_EQ(constraints.Features(), kFeatures);\n    common::Span<LBitField64> s_nodes_constraints = constraints.GetNodeConstraints();\n    for (LBitField64 const& d_node : s_nodes_constraints) {\n      std::vector<LBitField64::value_type> h_node_storage(d_node.Bits().size());\n      thrust::copy(thrust::device_ptr<LBitField64::value_type const>(d_node.Bits().data()),\n                   thrust::device_ptr<LBitField64::value_type const>(\n                       d_node.Bits().data() + d_node.Bits().size()),\n                   h_node_storage.data());\n      LBitField64 h_node {\n        {h_node_storage.data(), h_node_storage.data() +  h_node_storage.size()}\n      };\n      // no feature is attached to node.\n      for (size_t i = 0; i < h_node.Capacity(); ++i) {\n        ASSERT_FALSE(h_node.Check(i));\n      }\n    }\n  }\n\n  {\n    // Test one feature in multiple sets\n    int32_t constexpr kFeatures = 7;\n    tree::TrainParam param = GetParameter();\n    param.interaction_constraints = R\"([[0, 1, 3], [3, 5, 6]])\";\n    FConstraintWrapper constraints(param, kFeatures);\n    std::vector<bst_feature_t> h_sets {0, 0, 0, 1, 1, 1};\n    std::vector<size_t> h_sets_ptr {0, 1, 2, 2, 4, 4, 5, 6};\n    auto d_sets = constraints.GetDSets();\n    ASSERT_EQ(h_sets.size(), d_sets.size());\n    auto d_sets_ptr = constraints.GetDSetsPtr();\n    ASSERT_EQ(h_sets_ptr, d_sets_ptr);\n    for (size_t i = 0; i < h_sets.size(); ++i) {\n      ASSERT_EQ(h_sets[i], d_sets[i]);\n    }\n    for (size_t i = 0; i < h_sets_ptr.size(); ++i) {\n      ASSERT_EQ(h_sets_ptr[i], d_sets_ptr[i]);\n    }\n  }\n\n  {\n    // Test having more than 1 LBitField64::value_type\n    int32_t constexpr kFeatures = 129;\n    tree::TrainParam param = GetParameter();\n    param.interaction_constraints = R\"([[0, 1, 3], [3, 5, 128], [127, 128]])\";\n    FConstraintWrapper constraints(param, kFeatures);\n    auto d_sets = constraints.GetDSets();\n    auto d_sets_ptr = constraints.GetDSetsPtr();\n    auto _128_beg = d_sets_ptr[128];\n    auto _128_end = d_sets_ptr[128 + 1];\n    ASSERT_EQ(_128_end - _128_beg, 2);\n    ASSERT_EQ(d_sets[_128_beg], 1);\n    ASSERT_EQ(d_sets[_128_end-1], 2);\n  }\n}\n\nTEST(GPUFeatureInteractionConstraint, Split) {\n  tree::TrainParam param = GetParameter();\n  int32_t constexpr kFeatures = 6;\n  FConstraintWrapper constraints(param, kFeatures);\n\n  {\n    LBitField64 d_node[3];\n    constraints.Split(0, /*feature_id=*/1, 1, 2);\n    for (size_t nid = 0; nid < 3; ++nid) {\n      d_node[nid] = constraints.GetNodeConstraints()[nid];\n      ASSERT_EQ(d_node[nid].Bits().size(), 1);\n      CompareBitField(d_node[nid], {1, 2});\n    }\n  }\n\n  {\n    LBitField64 d_node[5];\n    constraints.Split(1, /*feature_id=*/0, /*left_id=*/3, /*right_id=*/4);\n    for (auto nid : {1, 3, 4}) {\n      d_node[nid] = constraints.GetNodeConstraints()[nid];\n      CompareBitField(d_node[nid], {0, 1, 2});\n    }\n    for (auto nid : {0, 2}) {\n      d_node[nid] = constraints.GetNodeConstraints()[nid];\n      CompareBitField(d_node[nid], {1, 2});\n    }\n  }\n}\n\nTEST(GPUFeatureInteractionConstraint, QueryNode) {\n  auto ctx = MakeCUDACtx(0);\n  tree::TrainParam param = GetParameter();\n  bst_feature_t constexpr kFeatures = 6;\n  FConstraintWrapper constraints(param, kFeatures);\n\n  {\n    auto span = constraints.QueryNode(&ctx, 0);\n    ASSERT_EQ(span.size(), 0);\n  }\n\n  {\n    constraints.Split(/*node_id=*/ 0, /*feature_id=*/ 1, 1, 2);\n    auto span = constraints.QueryNode(&ctx, 0);\n    std::vector<bst_feature_t> h_result (span.size());\n    thrust::copy(thrust::device_ptr<bst_feature_t>(span.data()),\n                 thrust::device_ptr<bst_feature_t>(span.data() + span.size()),\n                 h_result.begin());\n    ASSERT_EQ(h_result.size(), 2);\n    ASSERT_EQ(h_result[0], 1);\n    ASSERT_EQ(h_result[1], 2);\n  }\n\n  {\n    constraints.Split(1, /*feature_id=*/0, 3, 4);\n    auto span = constraints.QueryNode(&ctx, 1);\n    std::vector<bst_feature_t> h_result (span.size());\n    thrust::copy(thrust::device_ptr<bst_feature_t>(span.data()),\n                 thrust::device_ptr<bst_feature_t>(span.data() + span.size()),\n                 h_result.begin());\n    ASSERT_EQ(h_result.size(), 3);\n    ASSERT_EQ(h_result[0], 0);\n    ASSERT_EQ(h_result[1], 1);\n    ASSERT_EQ(h_result[2], 2);\n\n    // same as parent\n    span = constraints.QueryNode(&ctx, 3);\n    h_result.resize(span.size());\n    thrust::copy(thrust::device_ptr<bst_feature_t>(span.data()),\n                 thrust::device_ptr<bst_feature_t>(span.data() + span.size()),\n                 h_result.begin());\n    ASSERT_EQ(h_result.size(), 3);\n    ASSERT_EQ(h_result[0], 0);\n    ASSERT_EQ(h_result[1], 1);\n    ASSERT_EQ(h_result[2], 2);\n  }\n\n  {\n    tree::TrainParam large_param = GetParameter();\n    large_param.interaction_constraints = R\"([[1, 139], [244, 0], [139, 221]])\";\n    FConstraintWrapper large_features(large_param, 256);\n    large_features.Split(0, 139, 1, 2);\n    auto span = large_features.QueryNode(&ctx, 0);\n    std::vector<bst_feature_t> h_result (span.size());\n    thrust::copy(thrust::device_ptr<bst_feature_t>(span.data()),\n                 thrust::device_ptr<bst_feature_t>(span.data() + span.size()),\n                 h_result.begin());\n    ASSERT_EQ(h_result.size(), 3);\n    ASSERT_EQ(h_result[0], 1);\n    ASSERT_EQ(h_result[1], 139);\n    ASSERT_EQ(h_result[2], 221);\n  }\n}\n\nnamespace {\nvoid CompareFeatureList(common::Span<bst_feature_t const> s_output,\n                        std::vector<bst_feature_t> solution) {\n  std::vector<bst_feature_t> h_output(s_output.size());\n  thrust::copy(dh::tcbegin(s_output), dh::tcend(s_output), h_output.begin());\n  ASSERT_EQ(h_output.size(), solution.size());\n  for (size_t i = 0; i < solution.size(); ++i) {\n    ASSERT_EQ(h_output[i], solution[i]);\n  }\n}\n}  // anonymous namespace\n\nTEST(GPUFeatureInteractionConstraint, Query) {\n  {\n    tree::TrainParam param = GetParameter();\n    bst_feature_t constexpr kFeatures = 6;\n    FConstraintWrapper constraints(param, kFeatures);\n    std::vector<bst_feature_t> h_input_feature_list {0, 1, 2, 3, 4, 5};\n    dh::device_vector<bst_feature_t> d_input_feature_list (h_input_feature_list);\n    common::Span<bst_feature_t> s_input_feature_list = dh::ToSpan(d_input_feature_list);\n\n    auto s_output = constraints.Query(s_input_feature_list, 0);\n    CompareFeatureList(s_output, h_input_feature_list);\n  }\n  {\n    tree::TrainParam param = GetParameter();\n    bst_feature_t constexpr kFeatures = 6;\n    FConstraintWrapper constraints(param, kFeatures);\n    constraints.Split(/*node_id=*/0, /*feature_id=*/1, /*left_id=*/1, /*right_id=*/2);\n    constraints.Split(/*node_id=*/1, /*feature_id=*/0, /*left_id=*/3, /*right_id=*/4);\n    constraints.Split(/*node_id=*/4, /*feature_id=*/3, /*left_id=*/5, /*right_id=*/6);\n    /*\n     * (node id) [allowed features]\n     *\n     *               (0) [1, 2]\n     *           /        \\\n     *      {split at 0}   \\\n     *         /            \\\n     *        (1)[0, 1, 2]  (2)[1, 2]\n     *     /        \\\n     *    /      {split at 3}\n     *   /            \\\n     * (3)[0, 1, 2]   (4)[0, 1, 2, 3, 4, 5]\n     *\n     */\n\n    std::vector<bst_feature_t> h_input_feature_list {0, 1, 2, 3, 4, 5};\n    dh::device_vector<bst_feature_t> d_input_feature_list (h_input_feature_list);\n    common::Span<bst_feature_t> s_input_feature_list = dh::ToSpan(d_input_feature_list);\n\n    auto s_output = constraints.Query(s_input_feature_list, 1);\n    CompareFeatureList(s_output, {0, 1, 2});\n    s_output = constraints.Query(s_input_feature_list, 2);\n    CompareFeatureList(s_output, {1, 2});\n    s_output = constraints.Query(s_input_feature_list, 3);\n    CompareFeatureList(s_output, {0, 1, 2});\n    s_output = constraints.Query(s_input_feature_list, 4);\n    CompareFeatureList(s_output, {0, 1, 2, 3, 4, 5});\n    s_output = constraints.Query(s_input_feature_list, 5);\n    CompareFeatureList(s_output, {0, 1, 2, 3, 4, 5});\n    s_output = constraints.Query(s_input_feature_list, 6);\n    CompareFeatureList(s_output, {0, 1, 2, 3, 4, 5});\n  }\n\n  // Test shared feature\n  {\n    tree::TrainParam param = GetParameter();\n    bst_feature_t constexpr kFeatures = 6;\n    std::string const constraints_str = R\"constraint([[1, 2], [2, 3, 4]])constraint\";\n    param.interaction_constraints = constraints_str;\n\n    FConstraintWrapper constraints(param, kFeatures);\n    constraints.Split(/*node_id=*/0, /*feature_id=*/2, /*left_id=*/1, /*right_id=*/2);\n\n    std::vector<bst_feature_t> h_input_feature_list {0, 1, 2, 3, 4, 5};\n    dh::device_vector<bst_feature_t> d_input_feature_list (h_input_feature_list);\n    common::Span<bst_feature_t> s_input_feature_list = dh::ToSpan(d_input_feature_list);\n\n    auto s_output = constraints.Query(s_input_feature_list, 1);\n    CompareFeatureList(s_output, {1, 2, 3, 4});\n  }\n\n  // Test choosing free feature in root\n  {\n    tree::TrainParam param = GetParameter();\n    bst_feature_t constexpr kFeatures = 6;\n    std::string const constraints_str = R\"constraint([[0, 1]])constraint\";\n    param.interaction_constraints = constraints_str;\n    FConstraintWrapper constraints(param, kFeatures);\n    std::vector<bst_feature_t> h_input_feature_list {0, 1, 2, 3, 4, 5};\n    dh::device_vector<bst_feature_t> d_input_feature_list (h_input_feature_list);\n    common::Span<bst_feature_t> s_input_feature_list = dh::ToSpan(d_input_feature_list);\n    constraints.Split(/*node_id=*/0, /*feature_id=*/2, /*left_id=*/1, /*right_id=*/2);\n    auto s_output = constraints.Query(s_input_feature_list, 1);\n    CompareFeatureList(s_output, {2});\n    s_output = constraints.Query(s_input_feature_list, 2);\n    CompareFeatureList(s_output, {2});\n  }\n}\n\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/tree/test_evaluate_splits.h",
    "content": "#ifndef TESTS_CPP_TREE_TEST_EVALUATE_SPLITS_H_\n#define TESTS_CPP_TREE_TEST_EVALUATE_SPLITS_H_\n\n/**\n * Copyright 2022-2024, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/base.h>                // for GradientPairInternal, GradientPairPrecise\n#include <xgboost/data.h>                // for MetaInfo\n#include <xgboost/host_device_vector.h>  // for HostDeviceVector\n#include <xgboost/span.h>                // for operator!=, Span, SpanIterator\n\n#include <algorithm>  // for max, max_element, next_permutation, copy\n#include <cmath>      // for isnan\n#include <cstddef>    // for size_t\n#include <cstdint>    // for int32_t, uint64_t, uint32_t\n#include <limits>     // for numeric_limits\n#include <vector>     // for vector\n\n#include \"../../../src/common/hist_util.h\"      // for HistogramCuts, HistCollection, GHistRow\n#include \"../../../src/tree/hist/hist_cache.h\"  // for HistogramCollection\n#include \"../../../src/tree/param.h\"            // for TrainParam, GradStats\n\nnamespace xgboost::tree {\n/**\n * @brief Enumerate all possible partitions for categorical split.\n */\nclass TestPartitionBasedSplit : public ::testing::Test {\n protected:\n  size_t n_bins_ = 6;\n  std::vector<size_t> sorted_idx_;\n  TrainParam param_;\n  MetaInfo info_;\n  float best_score_{-std::numeric_limits<float>::infinity()};\n  common::HistogramCuts cuts_{0};\n  BoundedHistCollection hist_;\n  GradientPairPrecise total_gpair_;\n\n  void SetUp() override;\n};\n\ninline auto MakeCutsForTest(std::vector<float> values, std::vector<uint32_t> ptrs,\n                            DeviceOrd device) {\n  common::HistogramCuts cuts{static_cast<bst_feature_t>(ptrs.size() - 1)};\n  cuts.cut_values_.HostVector() = values;\n  cuts.cut_ptrs_.HostVector() = ptrs;\n\n  if (device.IsCUDA()) {\n    cuts.cut_ptrs_.SetDevice(device);\n    cuts.cut_values_.SetDevice(device);\n  }\n\n  return cuts;\n}\n\nclass TestCategoricalSplitWithMissing : public testing::Test {\n protected:\n  common::HistogramCuts cuts_{0};\n  // Setup gradients and parent sum with missing values.\n  GradientPairPrecise parent_sum_{1.0, 6.0};\n  std::vector<GradientPairPrecise> feature_histogram_{\n      {0.5, 0.5}, {0.5, 0.5}, {1.0, 1.0}, {1.0, 1.0}};\n  TrainParam param_;\n\n  void SetUp() override {\n    cuts_ = MakeCutsForTest({0.0, 1.0, 2.0, 3.0}, {0, 4}, DeviceOrd::CPU());\n    auto max_cat = *std::max_element(cuts_.cut_values_.HostVector().begin(),\n                                     cuts_.cut_values_.HostVector().end());\n    cuts_.SetCategorical(true, max_cat);\n    param_.UpdateAllowUnknown(\n        Args{{\"min_child_weight\", \"0\"}, {\"reg_lambda\", \"0\"}, {\"max_cat_to_onehot\", \"1\"}});\n  }\n\n  void CheckResult(float loss_chg, bst_feature_t split_ind, float fvalue, bool is_cat,\n                   bool dft_left, GradientPairPrecise left_sum, GradientPairPrecise right_sum) {\n    // forward\n    // it: 0, gain: 0.545455\n    // it: 1, gain: 1.000000\n    // it: 2, gain: 2.250000\n    // backward\n    // it: 3, gain: 1.000000\n    // it: 2, gain: 2.250000\n    // it: 1, gain: 3.142857\n    ASSERT_NEAR(loss_chg, 2.97619, kRtEps);\n    ASSERT_TRUE(is_cat);\n    ASSERT_TRUE(std::isnan(fvalue));\n    ASSERT_EQ(split_ind, 0);\n    ASSERT_FALSE(dft_left);\n    ASSERT_EQ(left_sum.GetHess(), 2.5);\n    ASSERT_EQ(right_sum.GetHess(), parent_sum_.GetHess() - left_sum.GetHess());\n  }\n};\n}  // namespace xgboost::tree\n\n#endif  // TESTS_CPP_TREE_TEST_EVALUATE_SPLITS_H_\n"
  },
  {
    "path": "tests/cpp/tree/test_fit_stump.cc",
    "content": "/**\n * Copyright 2022-2024, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/linalg.h>\n\n#include \"../../src/common/linalg_op.h\"\n#include \"../../src/tree/fit_stump.h\"\n#include \"../collective/test_worker.h\"  // for TestDistributedGlobal\n#include \"../helpers.h\"\n\nnamespace xgboost::tree {\nnamespace {\nvoid TestFitStump(Context const *ctx, DataSplitMode split = DataSplitMode::kRow) {\n  std::size_t constexpr kRows = 16, kTargets = 2;\n  linalg::Matrix<GradientPair> gpair;\n  gpair.SetDevice(ctx->Device());\n  gpair.Reshape(kRows, kTargets);\n  auto h_gpair = gpair.HostView();\n  for (std::size_t i = 0; i < kRows; ++i) {\n    for (std::size_t t = 0; t < kTargets; ++t) {\n      h_gpair(i, t) = GradientPair{static_cast<float>(i), 1};\n    }\n  }\n  linalg::Vector<float> out;\n  MetaInfo info;\n  info.data_split_mode = split;\n  FitStump(ctx, info, gpair, kTargets, &out);\n  auto h_out = out.HostView();\n  for (auto it = linalg::cbegin(h_out); it != linalg::cend(h_out); ++it) {\n    // sum_hess == kRows\n    auto n = static_cast<float>(kRows);\n    auto sum_grad = n * (n - 1) / 2;\n    ASSERT_EQ(static_cast<float>(-sum_grad / n), *it);\n  }\n}\n}  // anonymous namespace\n\nTEST(InitEstimation, FitStump) {\n  Context ctx;\n  TestFitStump(&ctx);\n}\n\n#if defined(XGBOOST_USE_CUDA)\nTEST(InitEstimation, GPUFitStump) {\n  Context ctx;\n  ctx.UpdateAllowUnknown(Args{{\"device\", \"cuda\"}});\n  TestFitStump(&ctx);\n}\n#endif  // defined(XGBOOST_USE_CUDA)\n\nTEST(InitEstimation, FitStumpColumnSplit) {\n  Context ctx;\n  auto constexpr kWorldSize{3};\n  collective::TestDistributedGlobal(kWorldSize, [&] { TestFitStump(&ctx, DataSplitMode::kCol); });\n}\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "tests/cpp/tree/test_gpu_approx.cu",
    "content": "/**\n * Copyright 2024-2025, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/gradient.h>      // for GradientContainer\n#include <xgboost/json.h>          // for Json\n#include <xgboost/task.h>          // for ObjInfo\n#include <xgboost/tree_model.h>    // for RegTree\n#include <xgboost/tree_updater.h>  // for TreeUpdater\n\n#include \"../../../src/tree/param.h\"    // for TrainParam\n#include \"../collective/test_worker.h\"  // for BaseMGPUTest\n#include \"../helpers.h\"                 // for GenerateRandomGradients\n\nnamespace xgboost::tree {\nnamespace {\nRegTree GetApproxTree(Context const* ctx, DMatrix* dmat) {\n  ObjInfo task{ObjInfo::kRegression};\n  std::unique_ptr<TreeUpdater> approx_maker{TreeUpdater::Create(\"grow_gpu_approx\", ctx, &task)};\n  approx_maker->Configure(Args{});\n\n  TrainParam param;\n  param.UpdateAllowUnknown(Args{});\n\n  auto gpair = GenerateRandomGradients(ctx, dmat->Info().num_row_, 1);\n\n  std::vector<HostDeviceVector<bst_node_t>> position(1);\n  RegTree tree;\n  approx_maker->Update(&param, &gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position},\n                       {&tree});\n  return tree;\n}\n\nvoid VerifyApproxColumnSplit(bst_idx_t rows, bst_feature_t cols, RegTree const& expected_tree) {\n  auto ctx = MakeCUDACtx(DistGpuIdx());\n\n  auto Xy = RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(true);\n  auto const world_size = collective::GetWorldSize();\n  auto const rank = collective::GetRank();\n  std::unique_ptr<DMatrix> sliced{Xy->SliceCol(world_size, rank)};\n\n  RegTree tree = GetApproxTree(&ctx, sliced.get());\n\n  Json json{Object{}};\n  tree.SaveModel(&json);\n  Json expected_json{Object{}};\n  expected_tree.SaveModel(&expected_json);\n  ASSERT_EQ(json, expected_json);\n}\n}  // anonymous namespace\n\nclass MGPUApproxTest : public collective::BaseMGPUTest {};\n\nTEST_F(MGPUApproxTest, GPUApproxColumnSplit) {\n  auto constexpr kRows = 32;\n  auto constexpr kCols = 16;\n\n  Context ctx(MakeCUDACtx(0));\n  auto dmat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true);\n  RegTree expected_tree = GetApproxTree(&ctx, dmat.get());\n\n  this->DoTest([&] { VerifyApproxColumnSplit(kRows, kCols, expected_tree); }, true);\n  this->DoTest([&] { VerifyApproxColumnSplit(kRows, kCols, expected_tree); }, false);\n}\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "tests/cpp/tree/test_gpu_hist.cu",
    "content": "/**\n * Copyright 2017-2026, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/base.h>                // for Args\n#include <xgboost/context.h>             // for Context\n#include <xgboost/gradient.h>            // for GradientContainer\n#include <xgboost/host_device_vector.h>  // for HostDeviceVector\n#include <xgboost/json.h>                // for Json\n#include <xgboost/task.h>                // for ObjInfo\n#include <xgboost/tree_model.h>          // for RegTree\n#include <xgboost/tree_updater.h>        // for TreeUpdater\n\n#include <memory>  // for unique_ptr\n#include <string>  // for string\n#include <vector>  // for vector\n\n#include \"../../../src/tree/param.h\"    // for TrainParam\n#include \"../collective/test_worker.h\"  // for BaseMGPUTest\n#include \"../helpers.h\"\n\nnamespace xgboost::tree {\nnamespace {\nvoid UpdateTree(Context const* ctx, GradientContainer* gpair, DMatrix* dmat, RegTree* tree,\n                HostDeviceVector<bst_float>* preds, float subsample,\n                const std::string& sampling_method, bst_bin_t max_bin) {\n  Args args{\n      {\"max_depth\", \"2\"},\n      {\"max_bin\", std::to_string(max_bin)},\n      {\"min_child_weight\", \"0.0\"},\n      {\"reg_alpha\", \"0\"},\n      {\"reg_lambda\", \"0\"},\n      {\"subsample\", std::to_string(subsample)},\n      {\"sampling_method\", sampling_method},\n  };\n  TrainParam param;\n  param.UpdateAllowUnknown(args);\n\n  ObjInfo task{ObjInfo::kRegression};\n  std::unique_ptr<TreeUpdater> hist_maker{TreeUpdater::Create(\"grow_gpu_hist\", ctx, &task)};\n  hist_maker->Configure(Args{});\n\n  std::vector<HostDeviceVector<bst_node_t>> position(1);\n  hist_maker->Update(&param, gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position},\n                     {tree});\n  auto cache = linalg::MakeTensorView(ctx, preds->DeviceSpan(), preds->Size(), 1);\n  ASSERT_TRUE(hist_maker->UpdatePredictionCache(dmat, common::Span{position}, cache));\n}\n}  // anonymous namespace\n\nTEST(GpuHist, UniformSampling) {\n  constexpr size_t kRows = 4096;\n  constexpr size_t kCols = 2;\n  constexpr float kSubsample = 0.9999;\n  auto ctx = MakeCUDACtx(0);\n  ctx.Rng().seed(1994);\n\n  // Create an in-memory DMatrix.\n  auto p_fmat = RandomDataGenerator{kRows, kCols, 0.0f}.GenerateDMatrix(true);\n  ASSERT_TRUE(p_fmat->SingleColBlock());\n\n  auto gpair = GenerateRandomGradients(&ctx, kRows, 1);\n\n  // Build a tree using the in-memory DMatrix.\n  RegTree tree;\n  HostDeviceVector<bst_float> preds(kRows, 0.0, ctx.Device());\n  UpdateTree(&ctx, &gpair, p_fmat.get(), &tree, &preds, 1.0, \"uniform\", kRows);\n  // Build another tree using sampling.\n  RegTree tree_sampling;\n  HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, ctx.Device());\n  UpdateTree(&ctx, &gpair, p_fmat.get(), &tree_sampling, &preds_sampling, kSubsample, \"uniform\",\n             kRows);\n\n  // Make sure the predictions are the same.\n  auto preds_h = preds.ConstHostVector();\n  auto preds_sampling_h = preds_sampling.ConstHostVector();\n  for (size_t i = 0; i < kRows; i++) {\n    EXPECT_NEAR(preds_h[i], preds_sampling_h[i], 1e-8);\n  }\n}\n\nTEST(GpuHist, GradientBasedSampling) {\n  constexpr size_t kRows = 4096;\n  constexpr size_t kCols = 2;\n  constexpr float kSubsample = 0.9999;\n  auto ctx = MakeCUDACtx(0);\n  ctx.Rng().seed(1994);\n\n  // Create an in-memory DMatrix.\n  auto p_fmat = RandomDataGenerator{kRows, kCols, 0.0f}.GenerateDMatrix(true);\n  auto gpair = GenerateRandomGradients(&ctx, kRows, 1);\n\n  // Build a tree using the in-memory DMatrix.\n  RegTree tree;\n  HostDeviceVector<bst_float> preds(kRows, 0.0, ctx.Device());\n  UpdateTree(&ctx, &gpair, p_fmat.get(), &tree, &preds, 1.0, \"uniform\", kRows);\n\n  // Build another tree using sampling.\n  RegTree tree_sampling;\n  HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, ctx.Device());\n  UpdateTree(&ctx, &gpair, p_fmat.get(), &tree_sampling, &preds_sampling, kSubsample,\n             \"gradient_based\", kRows);\n\n  // Make sure the predictions are the same.\n  auto preds_h = preds.ConstHostVector();\n  auto preds_sampling_h = preds_sampling.ConstHostVector();\n  for (size_t i = 0; i < kRows; i++) {\n    EXPECT_NEAR(preds_h[i], preds_sampling_h[i], 1e-3);\n  }\n}\n\nTEST(GpuHist, ExternalMemory) {\n  constexpr size_t kRows = 4096;\n  constexpr size_t kCols = 2;\n\n  // Create a DMatrix with multiple batches.\n  auto p_fmat_ext =\n      RandomDataGenerator{kRows, kCols, 0.0f}.Batches(4).GenerateSparsePageDMatrix(\"temp\", true);\n  ASSERT_FALSE(p_fmat_ext->SingleColBlock());\n\n  // Create a single batch DMatrix.\n  auto p_fmat =\n      RandomDataGenerator{kRows, kCols, 0.0f}.Batches(1).GenerateSparsePageDMatrix(\"temp\", true);\n  ASSERT_TRUE(p_fmat->SingleColBlock());\n\n  auto ctx = MakeCUDACtx(0);\n  auto gpair = GenerateRandomGradients(&ctx, kRows, 1);\n\n  // Build a tree using the in-memory DMatrix.\n  RegTree tree;\n  HostDeviceVector<bst_float> preds(kRows, 0.0, ctx.Device());\n  UpdateTree(&ctx, &gpair, p_fmat.get(), &tree, &preds, 1.0, \"uniform\", kRows);\n  // Build another tree using multiple ELLPACK pages.\n  RegTree tree_ext;\n  HostDeviceVector<bst_float> preds_ext(kRows, 0.0, ctx.Device());\n  UpdateTree(&ctx, &gpair, p_fmat_ext.get(), &tree_ext, &preds_ext, 1.0, \"uniform\", kRows);\n\n  // Make sure the predictions are the same.\n  auto preds_h = preds.ConstHostVector();\n  auto preds_ext_h = preds_ext.ConstHostVector();\n  for (size_t i = 0; i < kRows; i++) {\n    EXPECT_NEAR(preds_h[i], preds_ext_h[i], 1e-6);\n  }\n}\n\nTEST(GpuHist, ExternalMemoryWithSampling) {\n  constexpr size_t kRows = 4096, kCols = 2;\n  constexpr float kSubsample = 0.5;\n  const std::string kSamplingMethod = \"gradient_based\";\n  auto ctx = MakeCUDACtx(0);\n  ctx.Rng().seed(0);\n\n  // Create a single batch DMatrix.\n  auto p_fmat = RandomDataGenerator{kRows, kCols, 0.0f}\n                    .Device(ctx.Device())\n                    .Batches(1)\n                    .GenerateSparsePageDMatrix(\"temp\", true);\n  ASSERT_TRUE(p_fmat->SingleColBlock());\n\n  // Create a DMatrix with multiple batches.\n  auto p_fmat_ext = RandomDataGenerator{kRows, kCols, 0.0f}\n                        .Device(ctx.Device())\n                        .Batches(4)\n                        .GenerateSparsePageDMatrix(\"temp\", true);\n  ASSERT_FALSE(p_fmat_ext->SingleColBlock());\n\n  auto gpair = GenerateRandomGradients(&ctx, kRows, 1);\n\n  // Build a tree using the in-memory DMatrix.\n  auto rng = ctx.Rng();\n\n  RegTree tree;\n  HostDeviceVector<bst_float> preds(kRows, 0.0, ctx.Device());\n  UpdateTree(&ctx, &gpair, p_fmat.get(), &tree, &preds, kSubsample, kSamplingMethod, kRows);\n\n  // Build another tree using multiple ELLPACK pages.\n  ctx.Rng() = rng;\n  RegTree tree_ext;\n  HostDeviceVector<bst_float> preds_ext(kRows, 0.0, ctx.Device());\n  UpdateTree(&ctx, &gpair, p_fmat_ext.get(), &tree_ext, &preds_ext, kSubsample, kSamplingMethod,\n             kRows);\n\n  Json jtree{Object{}};\n  Json jtree_ext{Object{}};\n  tree.SaveModel(&jtree);\n  tree_ext.SaveModel(&jtree_ext);\n  ASSERT_EQ(jtree, jtree_ext);\n}\n\nTEST(GpuHist, ConfigIO) {\n  auto ctx = MakeCUDACtx(0);\n  ObjInfo task{ObjInfo::kRegression};\n  std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(\"grow_gpu_hist\", &ctx, &task)};\n  updater->Configure(Args{});\n\n  Json j_updater{Object{}};\n  updater->SaveConfig(&j_updater);\n  ASSERT_TRUE(IsA<Object>(j_updater[\"hist_train_param\"]));\n  updater->LoadConfig(j_updater);\n\n  Json j_updater_roundtrip{Object{}};\n  updater->SaveConfig(&j_updater_roundtrip);\n  ASSERT_TRUE(IsA<Object>(j_updater_roundtrip[\"hist_train_param\"]));\n\n  ASSERT_EQ(j_updater, j_updater_roundtrip);\n}\n\nTEST(GpuHist, MaxDepth) {\n  auto ctx = MakeCUDACtx(0);\n  size_t constexpr kRows = 16;\n  size_t constexpr kCols = 4;\n  auto p_mat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();\n\n  auto learner = std::unique_ptr<Learner>(Learner::Create({p_mat}));\n  learner->SetParam(\"max_depth\", \"32\");\n  learner->Configure();\n\n  ASSERT_THROW({ learner->UpdateOneIter(0, p_mat); }, dmlc::Error);\n}\n\nnamespace {\nRegTree GetHistTree(Context const* ctx, DMatrix* dmat) {\n  ObjInfo task{ObjInfo::kRegression};\n  std::unique_ptr<TreeUpdater> hist_maker{TreeUpdater::Create(\"grow_gpu_hist\", ctx, &task)};\n  hist_maker->Configure(Args{});\n\n  TrainParam param;\n  param.UpdateAllowUnknown(Args{});\n  auto gpair = GenerateRandomGradients(ctx, dmat->Info().num_row_, 1);\n\n  std::vector<HostDeviceVector<bst_node_t>> position(1);\n  RegTree tree;\n  hist_maker->Update(&param, &gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position},\n                     {&tree});\n  return tree;\n}\n\nvoid VerifyHistColumnSplit(bst_idx_t rows, bst_feature_t cols, RegTree const& expected_tree) {\n  Context ctx(MakeCUDACtx(GPUIDX));\n\n  auto Xy = RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(true);\n  auto const world_size = collective::GetWorldSize();\n  auto const rank = collective::GetRank();\n  std::unique_ptr<DMatrix> sliced{Xy->SliceCol(world_size, rank)};\n\n  RegTree tree = GetHistTree(&ctx, sliced.get());\n\n  Json json{Object{}};\n  tree.SaveModel(&json);\n  Json expected_json{Object{}};\n  expected_tree.SaveModel(&expected_json);\n  ASSERT_EQ(json, expected_json);\n}\n}  // anonymous namespace\n\nclass MGPUHistTest : public collective::BaseMGPUTest {};\n\nTEST_F(MGPUHistTest, HistColumnSplit) {\n  auto constexpr kRows = 32;\n  auto constexpr kCols = 16;\n\n  Context ctx(MakeCUDACtx(0));\n  auto dmat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true);\n  RegTree expected_tree = GetHistTree(&ctx, dmat.get());\n\n  this->DoTest([&] { VerifyHistColumnSplit(kRows, kCols, expected_tree); }, true);\n  this->DoTest([&] { VerifyHistColumnSplit(kRows, kCols, expected_tree); }, false);\n}\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "tests/cpp/tree/test_multi_target_tree_model.cc",
    "content": "/**\n * Copyright 2023-2026, XGBoost Contributors\n */\n#include \"test_multi_target_tree_model.h\"\n\n#include <gtest/gtest.h>\n#include <xgboost/context.h>  // for Context\n#include <xgboost/linalg.h>   // for Vector\n#include <xgboost/multi_target_tree_model.h>\n#include <xgboost/tree_model.h>  // for RegTree\n\n#include <memory>   // for unique_ptr\n#include <numeric>  // for iota\n\n#include \"../../../src/tree/tree_view.h\"\n\nnamespace xgboost {\nstd::unique_ptr<RegTree> MakeMtTreeForTest(bst_target_t n_targets) {\n  bst_feature_t n_features{4};\n  std::unique_ptr<RegTree> tree{std::make_unique<RegTree>(n_targets, n_features)};\n  CHECK(tree->IsMultiTarget());\n\n  auto iota_weights = [&](float init, HostDeviceVector<float>* data,\n                          common::Span<std::size_t> shape) {\n    shape[0] = n_targets;\n    auto& h_data = data->HostVector();\n    h_data.resize(n_targets);\n    std::iota(h_data.begin(), h_data.end(), init);\n  };\n\n  linalg::Vector<float> base_weight;\n  base_weight.ModifyInplace([&](HostDeviceVector<float>* data, common::Span<std::size_t> shape) {\n    iota_weights(1.0f, data, shape);\n  });\n  tree->SetRoot(base_weight.HostView(), /*sum_hess=*/1.0f);\n\n  linalg::Vector<float> left_weight;\n  left_weight.ModifyInplace([&](HostDeviceVector<float>* data, common::Span<std::size_t> shape) {\n    iota_weights(2.0f, data, shape);\n  });\n  linalg::Vector<float> right_weight;\n  right_weight.ModifyInplace([&](HostDeviceVector<float>* data, common::Span<std::size_t> shape) {\n    iota_weights(3.0f, data, shape);\n  });\n\n  tree->ExpandNode(RegTree::kRoot, /*split_idx=*/1, 0.5f, true, base_weight.HostView(),\n                   left_weight.HostView(), right_weight.HostView(), /*loss_chg=*/0.5f,\n                   /*sum_hess=*/1.0f, /*left_sum=*/0.6f, /*right_sum=*/0.4f);\n  tree->GetMultiTargetTree()->SetLeaves();\n  return tree;\n}\n\nTEST(MultiTargetTree, JsonIO) {\n  auto tree = MakeMtTreeForTest(3);\n  ASSERT_EQ(tree->NumNodes(), 3);\n  ASSERT_EQ(tree->NumTargets(), 3);\n  ASSERT_EQ(tree->GetMultiTargetTree()->Size(), 3);\n  ASSERT_EQ(tree->Size(), 3);\n\n  Json jtree{Object{}};\n  tree->SaveModel(&jtree);\n\n  auto check_jtree = [](Json jtree, RegTree const& tree) {\n    ASSERT_EQ(get<String const>(jtree[\"tree_param\"][\"num_nodes\"]), std::to_string(tree.NumNodes()));\n    ASSERT_EQ(get<F32Array const>(jtree[\"base_weights\"]).size(),\n              tree.NumNodes() * tree.NumTargets());\n    ASSERT_EQ(get<I32Array const>(jtree[\"parents\"]).size(), tree.NumNodes());\n    ASSERT_EQ(get<I32Array const>(jtree[\"left_children\"]).size(), tree.NumNodes());\n    ASSERT_EQ(get<I32Array const>(jtree[\"right_children\"]).size(), tree.NumNodes());\n  };\n  check_jtree(jtree, *tree);\n  Context ctx;\n\n  RegTree loaded;\n  loaded.LoadModel(jtree);\n  ASSERT_TRUE(loaded.IsMultiTarget());\n  ASSERT_EQ(loaded.NumNodes(), 3);\n  ASSERT_EQ(loaded.GetMultiTargetTree()->LeafWeights(ctx.Device()),\n            tree->GetMultiTargetTree()->LeafWeights(ctx.Device()));\n\n  Json jtree1{Object{}};\n  loaded.SaveModel(&jtree1);\n  check_jtree(jtree1, *tree);\n\n  RegTree loaded1;\n  loaded1.LoadModel(jtree1);\n  ASSERT_EQ(loaded1.GetMultiTargetTree()->LeafWeights(ctx.Device()),\n            tree->GetMultiTargetTree()->LeafWeights(ctx.Device()));\n\n  Json jtree2{Object{}};\n  loaded1.SaveModel(&jtree2);\n  ASSERT_EQ(Json::Dump(jtree1), Json::Dump(jtree2));\n}\n\nnamespace {\nvoid TestTreeDump(std::string format, std::string leaf_key) {\n  auto tree = MakeMtTreeForTest(3);\n  auto n_features = tree->NumFeatures();\n  FeatureMap fmap;\n  for (bst_feature_t f = 0; f < n_features; ++f) {\n    auto name = \"feat_\" + std::to_string(f);\n    fmap.PushBack(f, name.c_str(), \"q\");\n  }\n  {\n    auto str = tree->DumpModel(fmap, false, format);\n    ASSERT_NE(str.find(leaf_key + \"[2, 3, 4]\"), std::string::npos);\n    ASSERT_NE(str.find(leaf_key + \"[3, 4, 5]\"), std::string::npos);\n  }\n\n  {\n    // Test the \"...\"\n    bst_target_t n_targets{4};\n    RegTree tree{n_targets, n_features};\n    linalg::Vector<float> weight{{1.0f, 2.0f, 3.0f, 4.0f}, {4ul}, DeviceOrd::CPU()};\n    tree.SetRoot(weight.HostView(), /*sum_hess=*/1.0f);\n    tree.ExpandNode(RegTree::kRoot, /*split_idx=*/1, 0.5f, true, weight.HostView(),\n                    weight.HostView(), weight.HostView(), /*loss_chg=*/0.5f, /*sum_hess=*/1.0f,\n                    /*left_sum=*/0.6f, /*right_sum=*/0.4f);\n    tree.GetMultiTargetTree()->SetLeaves();\n    auto str = tree.DumpModel(fmap, false, format);\n    ASSERT_NE(str.find(leaf_key + \"[1, 2, ..., 4]\"), std::string::npos);\n  }\n}\n}  // namespace\n\nTEST(MultiTargetTree, DotDump) { TestTreeDump(\"dot\", \"leaf=\"); }\n\nTEST(MultiTargetTree, TextDump) { TestTreeDump(\"text\", \"leaf=\"); }\n\nTEST(MultiTargetTree, JsonDump) { TestTreeDump(\"json\", \"\\\"leaf\\\": \"); }\n\nTEST(MultiTargetTree, View) {\n  auto tree = MakeMtTreeForTest(3);\n  auto v = tree->HostMtView();\n  ASSERT_EQ(v.NumTargets(), 3);\n  ASSERT_EQ(v.Size(), 3);\n  ASSERT_EQ(v.LeftChild(0), 1);\n  ASSERT_EQ(v.RightChild(0), 2);\n}\n\nTEST(MultiTargetTree, SetLeaves) {\n  bst_target_t n_targets{5};\n  bst_feature_t n_features{4};\n  std::unique_ptr<RegTree> tree{std::make_unique<RegTree>(n_targets, n_features)};\n  CHECK(tree->IsMultiTarget());\n  // Reduce to 2 targets\n  linalg::Vector<float> base_weight{{1.0f, 2.0f}, {2ul}, DeviceOrd::CPU()};\n  tree->SetRoot(base_weight.HostView(), /*sum_hess=*/1.0f);\n  ASSERT_EQ(tree->GetMultiTargetTree()->NumSplitTargets(), 2);\n\n  linalg::Vector<float> left_weight{{2.0f, 3.0f}, {2ul}, DeviceOrd::CPU()};\n  linalg::Vector<float> right_weight{{3.0f, 4.0f}, {2ul}, DeviceOrd::CPU()};\n  tree->ExpandNode(RegTree::kRoot, /*split_idx=*/1, 0.5f, true, base_weight.HostView(),\n                   left_weight.HostView(), right_weight.HostView(), /*loss_chg=*/0.5f,\n                   /*sum_hess=*/1.0f, /*left_sum=*/0.6f, /*right_sum=*/0.4f);\n\n  std::vector<float> leaf_weights(n_targets * 2);\n  std::iota(leaf_weights.begin(), leaf_weights.end(), 0);\n  tree->SetLeaves({1, 2}, {leaf_weights});\n  ASSERT_TRUE(tree->HostMtView().IsLeaf(1));\n  ASSERT_TRUE(tree->HostMtView().IsLeaf(2));\n  auto mt_tree = tree->HostMtView();\n  auto n_leaves = tree->GetMultiTargetTree()->NumLeaves();\n  ASSERT_EQ(tree->GetNumLeaves(), n_leaves);\n  ASSERT_EQ(2, n_leaves);\n  ASSERT_EQ(mt_tree.leaf_weights.Shape(0), n_leaves);\n  ASSERT_EQ(mt_tree.leaf_weights.Shape(1), n_targets);\n  auto leaves = mt_tree.leaf_weights;\n  for (std::size_t i = 0; i < leaves.Size(); ++i) {\n    ASSERT_EQ(leaves.Values()[i], i);\n  }\n  auto left = mt_tree.LeafValue(1);\n  for (std::size_t i = 0; i < left.Size(); ++i) {\n    ASSERT_EQ(left.Values()[i], i);\n  }\n  auto right = mt_tree.LeafValue(2);\n  for (std::size_t i = 0; i < right.Size(); ++i) {\n    ASSERT_EQ(right.Values()[i], i + left.Size());\n  }\n}\n\nTEST(MultiTargetTree, Statistics) {\n  // Test that gain and sum_hess are serialized and deserialized correctly\n  auto tree = MakeMtTreeForTest(3);\n  // Following values are defined by the `MakeMtTreeForTest.\n  auto view = tree->HostMtView();\n  // Gain and sum_hess stored at the parent (split node)\n  ASSERT_FLOAT_EQ(view.LossChg(0), 0.5f);\n  ASSERT_FLOAT_EQ(view.SumHess(0), 1.0f);\n  // Child nodes have their sum_hess values\n  ASSERT_FLOAT_EQ(view.LossChg(1), 0.0f);  // Leaves have no gain\n  ASSERT_FLOAT_EQ(view.SumHess(1), 0.6f);  // Left child\n  ASSERT_FLOAT_EQ(view.LossChg(2), 0.0f);\n  ASSERT_FLOAT_EQ(view.SumHess(2), 0.4f);  // Right child\n\n  // Test serialization round-trip\n  Json jtree{Object{}};\n  tree->SaveModel(&jtree);\n\n  // Check that statistics are in the JSON\n  auto const& obj = get<Object const>(jtree);\n  ASSERT_TRUE(obj.find(\"loss_changes\") != obj.end());\n  ASSERT_TRUE(obj.find(\"sum_hessian\") != obj.end());\n  auto const& gains = get<F32Array const>(jtree[\"loss_changes\"]);\n  ASSERT_EQ(gains.size(), tree->NumNodes());\n  ASSERT_FLOAT_EQ(gains[0], 0.5f);\n\n  // Load and verify statistics are preserved\n  RegTree loaded;\n  loaded.LoadModel(jtree);\n  auto loaded_view = loaded.HostMtView();\n  ASSERT_FLOAT_EQ(loaded_view.LossChg(0), 0.5f);\n  ASSERT_FLOAT_EQ(loaded_view.SumHess(0), 1.0f);\n  ASSERT_FLOAT_EQ(loaded_view.SumHess(1), 0.6f);\n  ASSERT_FLOAT_EQ(loaded_view.SumHess(2), 0.4f);\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/tree/test_multi_target_tree_model.h",
    "content": "/**\n * Copyright 2026, XGBoost Contributors\n */\n#pragma once\n#include <xgboost/tree_model.h>  // for RegTree\n\n#include <memory>  // for unique_ptr\n\nnamespace xgboost {\nstd::unique_ptr<RegTree> MakeMtTreeForTest(bst_target_t n_targets);\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/tree/test_node_partition.cc",
    "content": "/**\n * Copyright 2023 by XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/context.h>       // for Context\n#include <xgboost/task.h>          // for ObjInfo\n#include <xgboost/tree_updater.h>  // for TreeUpdater\n\n#include <memory>  // for unique_ptr\n\n#include \"../helpers.h\"\n\nnamespace xgboost {\nTEST(Updater, HasNodePosition) {\n  Context ctx;\n  ObjInfo task{ObjInfo::kRegression, true, true};\n  std::unique_ptr<TreeUpdater> up{TreeUpdater::Create(\"grow_histmaker\", &ctx, &task)};\n  ASSERT_TRUE(up->HasNodePosition());\n\n  up.reset(TreeUpdater::Create(\"grow_quantile_histmaker\", &ctx, &task));\n  ASSERT_TRUE(up->HasNodePosition());\n\n#if defined(XGBOOST_USE_CUDA)\n  ctx = MakeCUDACtx(0);\n  up.reset(TreeUpdater::Create(\"grow_gpu_hist\", &ctx, &task));\n  ASSERT_TRUE(up->HasNodePosition());\n#endif  // defined(XGBOOST_USE_CUDA)\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/tree/test_param.cc",
    "content": "// Copyright by Contributors\n#include \"../../../src/tree/param.h\"\n#include \"../helpers.h\"\n#include <gtest/gtest.h>\n\nTEST(Param, VectorIOStream) {\n  std::vector<int> vals = {3, 2, 1};\n  std::stringstream ss;\n  std::vector<int> vals_in;\n\n  ss << vals;\n  EXPECT_EQ(ss.str(), \"(3,2,1)\");\n\n  ss >> vals_in;\n  EXPECT_EQ(vals_in, vals);\n\n  vals.clear(); ss.flush(); ss.clear(); ss.str(\"\");\n  vals = {1};\n  ss << vals;\n  EXPECT_EQ(ss.str(), \"(1,)\");\n}\n\nTEST(Param, VectorStreamRead) {\n  std::vector<int> vals = {3, 2, 1};\n  std::stringstream ss;\n  std::vector<int> vals_in;\n\n  vals_in.clear(); ss.flush(); ss.clear(); ss.str(\"\");\n  ss << \"(3, 2, 1)\";\n  ss >> vals_in;\n  EXPECT_EQ(vals_in, vals);\n\n  vals_in.clear(); ss.flush(); ss.clear(); ss.str(\"\");\n  ss << \"(3L,2L,1L)\";\n  ss >> vals_in;\n  EXPECT_EQ(vals_in, vals);\n\n  vals_in.clear(); ss.flush(); ss.clear(); ss.str(\"\");\n  ss << \" (3,2,1,)\";\n  ss >> vals_in;\n  EXPECT_EQ(vals_in, vals);\n\n  vals_in.clear(); ss.flush(); ss.clear(); ss.str(\"\");\n  ss << \" ( 3, 2,1 )\";\n  ss >> vals_in;\n  EXPECT_EQ(vals_in, vals);\n\n  vals_in.clear(); ss.flush(); ss.clear(); ss.str(\"\");\n  ss << \" ( 3, 2,1 ) \";\n  ss >> vals_in;\n  EXPECT_EQ(vals_in, vals);\n\n  vals_in.clear(); ss.flush(); ss.clear(); ss.str(\"\");\n  ss << \" 321 \";\n  ss >> vals_in;\n  EXPECT_EQ(vals_in[0], 321);\n\n  vals_in.clear(); ss.flush(); ss.clear(); ss.str(\"\");\n  ss << \"(3.0,2,1)\";\n  ss >> vals_in;\n  EXPECT_NE(vals_in, vals);\n\n  vals_in.clear(); ss.flush(); ss.clear(); ss.str(\"\");\n  ss << \"1a\";\n  ss >> vals_in;\n  EXPECT_NE(vals_in, vals);\n\n  vals_in.clear(); ss.flush(); ss.clear(); ss.str(\"\");\n  ss << \"abcde\";\n  ss >> vals_in;\n  EXPECT_NE(vals_in, vals);\n\n  vals_in.clear(); ss.flush(); ss.clear(); ss.str(\"\");\n  ss << \"(3,2,1\";\n  ss >> vals_in;\n  EXPECT_NE(vals_in, vals);\n\n  vals_in.clear(); ss.flush(); ss.clear(); ss.str(\"\");\n  vals_in.emplace_back(3);\n  ss << \"( )\";\n  ss >> vals_in;\n  ASSERT_TRUE(ss.good());\n}\n\nTEST(Param, SplitEntry) {\n  xgboost::tree::SplitEntry se1;\n  EXPECT_FALSE(se1.NeedReplace(-1, 100));\n\n  xgboost::tree::SplitEntry se2;\n  EXPECT_FALSE(se1.Update(se2));\n  EXPECT_FALSE(se2.Update(-1, 100, 0, true, false, xgboost::tree::GradStats(),\n                          xgboost::tree::GradStats()));\n  ASSERT_TRUE(se2.Update(1, 100, 0, true, false, xgboost::tree::GradStats(),\n                         xgboost::tree::GradStats()));\n  ASSERT_TRUE(se1.Update(se2));\n\n  xgboost::tree::SplitEntry se3;\n  se3.Update(2, 101, 0, false, false, xgboost::tree::GradStats(),\n             xgboost::tree::GradStats());\n  xgboost::tree::SplitEntry::Reduce(se2, se3);\n  EXPECT_EQ(se2.SplitIndex(), 101);\n  EXPECT_FALSE(se2.DefaultLeft());\n\n  EXPECT_TRUE(se1.NeedReplace(3, 1));\n}\n"
  },
  {
    "path": "tests/cpp/tree/test_partitioner.h",
    "content": "/**\n * Copyright 2021-2026 by XGBoost contributors.\n */\n#ifndef XGBOOST_TESTS_CPP_TREE_TEST_PARTITIONER_H_\n#define XGBOOST_TESTS_CPP_TREE_TEST_PARTITIONER_H_\n#include <xgboost/context.h>                      // for Context\n#include <xgboost/linalg.h>                       // for Constant, Vector\n#include <xgboost/logging.h>                      // for CHECK\n#include <xgboost/tree_model.h>                   // for RegTree\n\n#include <vector>                                 // for vector\n\n#include \"../../../src/tree/hist/expand_entry.h\"  // for CPUExpandEntry, MultiExpandEntry\n\nnamespace xgboost::tree {\ninline void GetSplit(RegTree *tree, float split_value, std::vector<CPUExpandEntry> *candidates) {\n  CHECK(!tree->IsMultiTarget());\n  tree->ExpandNode(\n      /*nid=*/RegTree::kRoot, /*split_index=*/0, /*split_value=*/split_value,\n      /*default_left=*/true, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,\n      /*left_sum=*/0.0f,\n      /*right_sum=*/0.0f);\n  candidates->front().split.split_value = split_value;\n  candidates->front().split.sindex = 0;\n  candidates->front().split.sindex |= (1U << 31);\n}\n\ninline void GetMultiSplitForTest(RegTree *tree, float split_value,\n                                 std::vector<MultiExpandEntry> *candidates) {\n  CHECK(tree->IsMultiTarget());\n  auto n_targets = tree->NumTargets();\n  Context ctx;\n  linalg::Vector<float> base_weight{linalg::Constant(&ctx, 0.0f, n_targets)};\n  linalg::Vector<float> left_weight{linalg::Constant(&ctx, 0.0f, n_targets)};\n  linalg::Vector<float> right_weight{linalg::Constant(&ctx, 0.0f, n_targets)};\n  tree->SetRoot(base_weight.HostView(), /*sum_hess=*/0.0f);\n  tree->ExpandNode(/*nidx=*/RegTree::kRoot, /*split_index=*/0, /*split_value=*/split_value,\n                   /*default_left=*/true, base_weight.HostView(), left_weight.HostView(),\n                   right_weight.HostView(), /*loss_chg=*/0.0f, /*sum_hess=*/0.0f, /*left_sum=*/0.0f,\n                   /*right_sum=*/0.0f);\n  candidates->front().split.split_value = split_value;\n  candidates->front().split.sindex = 0;\n  candidates->front().split.sindex |= (1U << 31);\n  tree->GetMultiTargetTree()->SetLeaves();\n}\n}  // namespace xgboost::tree\n#endif  // XGBOOST_TESTS_CPP_TREE_TEST_PARTITIONER_H_\n"
  },
  {
    "path": "tests/cpp/tree/test_prediction_cache.cc",
    "content": "/**\n * Copyright 2021-2026, XGBoost contributors\n */\n#include \"test_prediction_cache.h\"\n\n#include <gtest/gtest.h>\n\nnamespace xgboost {\nTEST_F(TestPredictionCache, Approx) {\n  Context ctx;\n  this->RunTest(&ctx, \"grow_histmaker\", \"one_output_per_tree\");\n}\n\nTEST_F(TestPredictionCache, Hist) {\n  Context ctx;\n  this->RunTest(&ctx, \"grow_quantile_histmaker\", \"one_output_per_tree\");\n}\n\nTEST_F(TestPredictionCache, MultiHist) {\n  Context ctx;\n  this->RunTest(&ctx, \"grow_quantile_histmaker\", \"multi_output_tree\");\n}\n\n#if defined(XGBOOST_USE_CUDA)\nTEST_F(TestPredictionCache, GpuHist) {\n  auto ctx = MakeCUDACtx(0);\n  this->RunTest(&ctx, \"grow_gpu_hist\", \"one_output_per_tree\");\n}\n\nTEST_F(TestPredictionCache, GpuMultiHist) {\n  auto ctx = MakeCUDACtx(0);\n  this->RunTest(&ctx, \"grow_gpu_hist\", \"multi_output_tree\");\n}\n\nTEST_F(TestPredictionCache, GpuApprox) {\n  auto ctx = MakeCUDACtx(0);\n  this->RunTest(&ctx, \"grow_gpu_approx\", \"one_output_per_tree\");\n}\n#endif  // defined(XGBOOST_USE_CUDA)\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/tree/test_prediction_cache.h",
    "content": "/**\n * Copyright 2021-2026, XGBoost contributors.\n */\n#pragma once\n\n#include <gtest/gtest.h>\n\n#include <xgboost/host_device_vector.h>\n#include <xgboost/tree_updater.h>\n\n#include <memory>\n\n#include \"../../../src/tree/param.h\"  // for TrainParam\n#include \"../helpers.h\"\n#include \"xgboost/task.h\"             // for ObjInfo\n\nnamespace xgboost {\nclass TestPredictionCache : public ::testing::Test {\n  std::shared_ptr<DMatrix> Xy_;\n  std::size_t n_samples_{2048};\n\n protected:\n  void SetUp() override {\n    std::size_t n_features = 13;\n    bst_target_t n_targets = 3;\n    Xy_ = RandomDataGenerator{n_samples_, n_features, 0}.Targets(n_targets).GenerateDMatrix(true);\n  }\n\n  void RunLearnerTest(Context const* ctx, std::string updater_name, float subsample,\n                      std::string const& grow_policy, std::string const& strategy) {\n    std::unique_ptr<Learner> learner{Learner::Create({Xy_})};\n    learner->SetParam(\"device\", ctx->DeviceName());\n    learner->SetParam(\"updater\", updater_name);\n    learner->SetParam(\"multi_strategy\", strategy);\n    learner->SetParam(\"grow_policy\", grow_policy);\n    learner->SetParam(\"subsample\", std::to_string(subsample));\n    learner->SetParam(\"nthread\", \"0\");\n    learner->Configure();\n\n    for (size_t i = 0; i < 8; ++i) {\n      learner->UpdateOneIter(i, Xy_);\n    }\n\n    HostDeviceVector<float> out_prediction_cached;\n    learner->Predict(Xy_, false, &out_prediction_cached, 0, 0);\n\n    Json model{Object()};\n    learner->SaveModel(&model);\n\n    HostDeviceVector<float> out_prediction;\n    {\n      std::unique_ptr<Learner> learner{Learner::Create({Xy_})};\n      learner->LoadModel(model);\n      learner->Predict(Xy_, false, &out_prediction, 0, 0);\n    }\n\n    auto const h_predt_cached = out_prediction_cached.ConstHostSpan();\n    auto const h_predt = out_prediction.ConstHostSpan();\n\n    ASSERT_EQ(h_predt.size(), h_predt_cached.size());\n    for (size_t i = 0; i < h_predt.size(); ++i) {\n      ASSERT_NEAR(h_predt[i], h_predt_cached[i], kRtEps);\n    }\n  }\n\n  void RunTest(Context* ctx, std::string const& updater_name, std::string const& strategy) {\n    {\n      ctx->InitAllowUnknown(Args{{\"nthread\", \"8\"}});\n\n      ObjInfo task{ObjInfo::kRegression};\n      std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(updater_name, ctx, &task)};\n      RegTree tree;\n      std::vector<RegTree*> trees{&tree};\n      auto gpair = GenerateRandomGradients(ctx, n_samples_, 1);\n      tree::TrainParam param;\n      param.UpdateAllowUnknown(Args{{\"max_bin\", \"64\"}});\n\n      updater->Configure(Args{});\n      std::vector<HostDeviceVector<bst_node_t>> position(1);\n      updater->Update(&param, &gpair, Xy_.get(), position, trees);\n      HostDeviceVector<float> out_prediction_cached;\n      out_prediction_cached.SetDevice(ctx->Device());\n      out_prediction_cached.Resize(n_samples_);\n      auto cache =\n          linalg::MakeTensorView(ctx, &out_prediction_cached, out_prediction_cached.Size(), 1);\n      ASSERT_TRUE(updater->UpdatePredictionCache(Xy_.get(), common::Span{position}, cache));\n    }\n\n    for (auto policy : {\"depthwise\", \"lossguide\"}) {\n      for (auto subsample : {1.0f, 0.4f}) {\n        this->RunLearnerTest(ctx, updater_name, subsample, policy, strategy);\n        this->RunLearnerTest(ctx, updater_name, subsample, policy, strategy);\n      }\n    }\n  }\n};\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/tree/test_prune.cc",
    "content": "/**\n * Copyright 2018-2025, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/data.h>\n#include <xgboost/gradient.h>            // for GradientContainer\n#include <xgboost/host_device_vector.h>  // for HostDeviceVector\n#include <xgboost/learner.h>\n#include <xgboost/tree_updater.h>\n\n#include <memory>\n#include <string>\n#include <vector>\n\n#include \"../../../src/tree/param.h\"  // for TrainParam\n#include \"../helpers.h\"\n\nnamespace xgboost::tree {\nTEST(Updater, Prune) {\n  int constexpr kCols = 16;\n\n  std::vector<std::pair<std::string, std::string>> cfg;\n  cfg.emplace_back(\"num_feature\", std::to_string(kCols));\n  cfg.emplace_back(\"min_split_loss\", \"10\");\n  Context ctx;\n\n  // These data are just place holders.\n  GradientContainer gpair;\n  gpair.gpair = linalg::Matrix<GradientPair>\n      {{ {0.50f, 0.25f}, {0.50f, 0.25f}, {0.50f, 0.25f}, {0.50f, 0.25f},\n         {0.25f, 0.24f}, {0.25f, 0.24f}, {0.25f, 0.24f}, {0.25f, 0.24f} }, {8, 1}, ctx.Device()};\n  std::shared_ptr<DMatrix> p_dmat{RandomDataGenerator{32, 10, 0}.GenerateDMatrix()};\n\n  // prepare tree\n  RegTree tree = RegTree{1u, kCols};\n  std::vector<RegTree*> trees {&tree};\n  // prepare pruner\n  TrainParam param;\n  param.UpdateAllowUnknown(cfg);\n\n  ObjInfo task{ObjInfo::kRegression};\n  std::unique_ptr<TreeUpdater> pruner(TreeUpdater::Create(\"prune\", &ctx, &task));\n\n  // loss_chg < min_split_loss;\n  std::vector<HostDeviceVector<bst_node_t>> position(trees.size());\n  tree.ExpandNode(0, 0, 0, true, 0.0f, 0.3f, 0.4f, 0.0f, 0.0f,\n                  /*left_sum=*/0.0f, /*right_sum=*/0.0f);\n  pruner->Update(&param, &gpair, p_dmat.get(), position, trees);\n\n  ASSERT_EQ(tree.NumExtraNodes(), 0);\n\n  // loss_chg > min_split_loss;\n  tree.ExpandNode(0, 0, 0, true, 0.0f, 0.3f, 0.4f, 11.0f, 0.0f,\n                  /*left_sum=*/0.0f, /*right_sum=*/0.0f);\n  pruner->Update(&param, &gpair, p_dmat.get(), position, trees);\n\n  ASSERT_EQ(tree.NumExtraNodes(), 2);\n\n  // loss_chg == min_split_loss;\n  tree.Stat(0).loss_chg = 10;\n  pruner->Update(&param, &gpair, p_dmat.get(), position, trees);\n\n  ASSERT_EQ(tree.NumExtraNodes(), 2);\n\n  // Test depth\n  // loss_chg > min_split_loss\n  tree.ExpandNode(tree[0].LeftChild(),\n                  0, 0.5f, true, 0.3, 0.4, 0.5,\n                  /*loss_chg=*/18.0f, 0.0f,\n                  /*left_sum=*/0.0f, /*right_sum=*/0.0f);\n  tree.ExpandNode(tree[0].RightChild(),\n                  0, 0.5f, true, 0.3, 0.4, 0.5,\n                  /*loss_chg=*/19.0f, 0.0f,\n                  /*left_sum=*/0.0f, /*right_sum=*/0.0f);\n\n  cfg.emplace_back(\"max_depth\", \"1\");\n  param.UpdateAllowUnknown(cfg);\n  pruner->Update(&param, &gpair, p_dmat.get(), position, trees);\n  ASSERT_EQ(tree.NumExtraNodes(), 2);\n\n  tree.ExpandNode(tree[0].LeftChild(),\n                  0, 0.5f, true, 0.3, 0.4, 0.5,\n                  /*loss_chg=*/18.0f, 0.0f,\n                  /*left_sum=*/0.0f, /*right_sum=*/0.0f);\n  cfg.emplace_back(\"min_split_loss\", \"0\");\n  param.UpdateAllowUnknown(cfg);\n\n  pruner->Update(&param, &gpair, p_dmat.get(), position, trees);\n  ASSERT_EQ(tree.NumExtraNodes(), 2);\n}\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "tests/cpp/tree/test_quantile_hist.cc",
    "content": "/**\n * Copyright 2018-2026, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/gradient.h>  // for GradientContainer\n#include <xgboost/host_device_vector.h>\n#include <xgboost/linalg.h>\n#include <xgboost/tree_updater.h>\n\n#include <cmath>\n#include <cstddef>  // for size_t\n#include <cstring>\n#include <limits>\n#include <memory>\n#include <string>\n#include <vector>\n\n#include \"../../../src/tree/common_row_partitioner.h\"\n#include \"../../../src/tree/hist/expand_entry.h\"  // for MultiExpandEntry, CPUExpandEntry\n#include \"../collective/test_worker.h\"            // for TestDistributedGlobal\n#include \"../helpers.h\"\n#include \"test_column_split.h\"  // for TestColumnSplit\n#include \"test_partitioner.h\"\n#include \"xgboost/data.h\"\n#include \"xgboost/task.h\"\n\nnamespace xgboost::tree {\nnamespace {\ntemplate <typename ExpandEntry>\nvoid TestPartitioner(bst_target_t n_targets) {\n  std::size_t n_samples = 1024, base_rowid = 0;\n  bst_feature_t n_features = 1;\n\n  Context ctx;\n  ctx.InitAllowUnknown(Args{});\n\n  CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};\n  ASSERT_EQ(partitioner.base_rowid, base_rowid);\n  ASSERT_EQ(partitioner.Size(), 1);\n  ASSERT_EQ(partitioner.Partitions()[0].Size(), n_samples);\n\n  auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);\n  std::vector<ExpandEntry> candidates{{0, 0}};\n  candidates.front().split.loss_chg = 0.4;\n\n  auto cuts = common::SketchOnDMatrix(&ctx, Xy.get(), 64);\n\n  for (auto const& page : Xy->GetBatches<SparsePage>()) {\n    GHistIndexMatrix gmat{&ctx, page, {}, cuts, 64, true, 0.5};\n    bst_feature_t const split_ind = 0;\n    common::ColumnMatrix column_indices;\n    column_indices.InitFromSparse(page, gmat, 0.5, ctx.Threads());\n    {\n      auto min_value = -std::numeric_limits<float>::infinity();\n      RegTree tree{n_targets, n_features};\n      CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};\n      if constexpr (std::is_same_v<ExpandEntry, CPUExpandEntry>) {\n        GetSplit(&tree, min_value, &candidates);\n        partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates,\n                                                tree.HostScView());\n      } else {\n        GetMultiSplitForTest(&tree, min_value, &candidates);\n        partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates,\n                                                tree.HostMtView());\n      }\n      ASSERT_EQ(partitioner.Size(), 3);\n      ASSERT_EQ(partitioner[1].Size(), 0);\n      ASSERT_EQ(partitioner[2].Size(), n_samples);\n    }\n    {\n      CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};\n      auto ptr = gmat.cut.Ptrs()[split_ind + 1];\n      float split_value = gmat.cut.Values().at(ptr / 2);\n      RegTree tree{n_targets, n_features};\n      if constexpr (std::is_same_v<ExpandEntry, CPUExpandEntry>) {\n        GetSplit(&tree, split_value, &candidates);\n        partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates,\n                                                tree.HostScView());\n      } else {\n        GetMultiSplitForTest(&tree, split_value, &candidates);\n        partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates,\n                                                tree.HostMtView());\n      }\n\n      {\n        auto left_nidx = tree.LeftChild(RegTree::kRoot);\n        auto const& elem = partitioner[left_nidx];\n        ASSERT_LT(elem.Size(), n_samples);\n        ASSERT_GT(elem.Size(), 1);\n        for (auto& it : elem) {\n          auto value = gmat.cut.Values().at(gmat.index[it]);\n          ASSERT_LE(value, split_value);\n        }\n      }\n      {\n        auto right_nidx = tree.RightChild(RegTree::kRoot);\n        auto const& elem = partitioner[right_nidx];\n        for (auto& it : elem) {\n          auto value = gmat.cut.Values().at(gmat.index[it]);\n          ASSERT_GT(value, split_value);\n        }\n      }\n    }\n  }\n}\n}  // anonymous namespace\n\nTEST(QuantileHist, Partitioner) { TestPartitioner<CPUExpandEntry>(1); }\n\nTEST(QuantileHist, MultiPartitioner) { TestPartitioner<MultiExpandEntry>(3); }\n\nnamespace {\n\ntemplate <typename ExpandEntry>\nvoid VerifyColumnSplitPartitioner(bst_target_t n_targets, size_t n_samples,\n                                  bst_feature_t n_features, size_t base_rowid,\n                                  std::shared_ptr<DMatrix> Xy, float min_value, float mid_value,\n                                  CommonRowPartitioner const& expected_mid_partitioner) {\n  auto dmat =\n      std::unique_ptr<DMatrix>{Xy->SliceCol(collective::GetWorldSize(), collective::GetRank())};\n\n  Context ctx;\n  ctx.InitAllowUnknown(Args{});\n\n  std::vector<ExpandEntry> candidates{{0, 0}};\n  candidates.front().split.loss_chg = 0.4;\n  auto cuts = common::SketchOnDMatrix(&ctx, dmat.get(), 64);\n\n  for (auto const& page : Xy->GetBatches<SparsePage>()) {\n    GHistIndexMatrix gmat(&ctx, page, {}, cuts, 64, true, 0.5);\n    common::ColumnMatrix column_indices;\n    column_indices.InitFromSparse(page, gmat, 0.5, ctx.Threads());\n    {\n      RegTree tree{n_targets, n_features};\n      CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true};\n      if constexpr (std::is_same_v<ExpandEntry, CPUExpandEntry>) {\n        GetSplit(&tree, min_value, &candidates);\n        partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates,\n                                                tree.HostScView());\n      } else {\n        GetMultiSplitForTest(&tree, min_value, &candidates);\n        partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates,\n                                                tree.HostMtView());\n      }\n      ASSERT_EQ(partitioner.Size(), 3);\n      ASSERT_EQ(partitioner[1].Size(), 0);\n      ASSERT_EQ(partitioner[2].Size(), n_samples);\n    }\n    {\n      RegTree tree{n_targets, n_features};\n      CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true};\n      if constexpr (std::is_same_v<ExpandEntry, CPUExpandEntry>) {\n        GetSplit(&tree, mid_value, &candidates);\n        partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates,\n                                                tree.HostScView());\n      } else {\n        GetMultiSplitForTest(&tree, mid_value, &candidates);\n        partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates,\n                                                tree.HostMtView());\n      }\n      auto left_nidx = tree.LeftChild(RegTree::kRoot);\n\n      {\n        auto const& elem = partitioner[left_nidx];\n        ASSERT_LT(elem.Size(), n_samples);\n        ASSERT_GT(elem.Size(), 1);\n        auto const& expected_elem = expected_mid_partitioner[left_nidx];\n        ASSERT_EQ(elem.Size(), expected_elem.Size());\n        for (auto it = elem.begin(), eit = expected_elem.begin(); it != elem.end(); ++it, ++eit) {\n          ASSERT_EQ(*it, *eit);\n        }\n      }\n      {\n        auto right_nidx = tree.RightChild(RegTree::kRoot);\n        auto const& elem = partitioner[right_nidx];\n        auto const& expected_elem = expected_mid_partitioner[right_nidx];\n        ASSERT_EQ(elem.Size(), expected_elem.Size());\n        for (auto it = elem.begin(), eit = expected_elem.begin(); it != elem.end(); ++it, ++eit) {\n          ASSERT_EQ(*it, *eit);\n        }\n      }\n    }\n  }\n}\n\ntemplate <typename ExpandEntry>\nvoid TestColumnSplitPartitioner(bst_target_t n_targets) {\n  std::size_t n_samples = 1024, base_rowid = 0;\n  bst_feature_t n_features = 16;\n  auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);\n  std::vector<ExpandEntry> candidates{{0, 0}};\n  candidates.front().split.loss_chg = 0.4;\n\n  Context ctx;\n  ctx.InitAllowUnknown(Args{});\n  auto cuts = common::SketchOnDMatrix(&ctx, Xy.get(), 64);\n\n  float min_value, mid_value;\n  CommonRowPartitioner mid_partitioner{&ctx, n_samples, base_rowid, false};\n  for (auto const& page : Xy->GetBatches<SparsePage>()) {\n    GHistIndexMatrix gmat{&ctx, page, {}, cuts, 64, true, 0.5};\n    bst_feature_t const split_ind = 0;\n    common::ColumnMatrix column_indices;\n    column_indices.InitFromSparse(page, gmat, 0.5, ctx.Threads());\n    min_value = -std::numeric_limits<float>::infinity();\n\n    auto ptr = gmat.cut.Ptrs()[split_ind + 1];\n    mid_value = gmat.cut.Values().at(ptr / 2);\n    RegTree tree{n_targets, n_features};\n    if constexpr (std::is_same_v<ExpandEntry, CPUExpandEntry>) {\n      GetSplit(&tree, mid_value, &candidates);\n      mid_partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates,\n                                                  tree.HostScView());\n    } else {\n      GetMultiSplitForTest(&tree, mid_value, &candidates);\n      mid_partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates,\n                                                  tree.HostMtView());\n    }\n  }\n\n  auto constexpr kWorkers = 4;\n  collective::TestDistributedGlobal(kWorkers, [&] {\n    VerifyColumnSplitPartitioner<ExpandEntry>(n_targets, n_samples, n_features, base_rowid, Xy,\n                                              min_value, mid_value, mid_partitioner);\n  });\n}\n}  // anonymous namespace\n\nTEST(QuantileHist, PartitionerColumnSplit) { TestColumnSplitPartitioner<CPUExpandEntry>(1); }\n\nTEST(QuantileHist, MultiPartitionerColumnSplit) { TestColumnSplitPartitioner<MultiExpandEntry>(3); }\n\nnamespace {\nclass TestHistColumnSplit : public ::testing::TestWithParam<std::tuple<bst_target_t, bool, float>> {\n public:\n  void Run() {\n    auto [n_targets, categorical, sparsity] = GetParam();\n    TestColumnSplit(n_targets, categorical, \"grow_quantile_histmaker\", sparsity);\n  }\n};\n}  // anonymous namespace\n\nTEST_P(TestHistColumnSplit, Basic) { this->Run(); }\n\nINSTANTIATE_TEST_SUITE_P(ColumnSplit, TestHistColumnSplit, ::testing::ValuesIn([]() {\n                           std::vector<std::tuple<bst_target_t, bool, float>> params;\n                           for (auto categorical : {true, false}) {\n                             for (auto sparsity : {0.0f, 0.6f}) {\n                               for (bst_target_t n_targets : {1u, 3u}) {\n                                 // Categorical features are not yet supported for\n                                 // multi-target trees.\n                                 if (categorical && n_targets > 1) {\n                                   continue;\n                                 }\n                                 params.emplace_back(n_targets, categorical, sparsity);\n                               }\n                             }\n                           }\n                           return params;\n                         }()));\n\nnamespace {\nvoid FillGradients(linalg::Matrix<GradientPair>* gpair) {\n  auto h = gpair->HostView();\n  for (std::size_t row = 0; row < h.Shape(0); ++row) {\n    for (std::size_t target = 0; target < h.Shape(1); ++target) {\n      h(row, target) = GradientPair{1.0f, 0.0f};\n    }\n  }\n}\n\n// Verify partitioner doesn't write past buffer end when doing\n// update on small dataset after large one.\nvoid TestPartitionerOverrun(bst_target_t n_targets) {\n  constexpr bst_idx_t kNBig = 1 << 16, kNSmall = 1024;\n  constexpr int kCols = 3;\n\n  Context ctx;\n  ctx.InitAllowUnknown(Args{{\"nthread\", \"1\"}});\n\n  ObjInfo task{ObjInfo::kRegression, true, true};\n  auto updater =\n      std::unique_ptr<TreeUpdater>{TreeUpdater::Create(\"grow_quantile_histmaker\", &ctx, &task)};\n\n  TrainParam param;\n  param.InitAllowUnknown(Args{{\"max_depth\", \"1\"},\n                              {\"max_bin\", \"32\"},\n                              {\"lambda\", \"0\"},\n                              {\"gamma\", \"0\"},\n                              {\"min_child_weight\", \"0\"}});\n  updater->Configure(Args{});\n\n  auto const n_targets_size = static_cast<std::size_t>(n_targets);\n\n  auto dmat_large =\n      RandomDataGenerator{kNBig, kCols, 0.0f}.Seed(0).Batches(8).GenerateSparsePageDMatrix(\n          \"part_resize_big_first\", true);\n\n  std::size_t shape_large[2]{dmat_large->Info().num_row_, n_targets_size};\n  GradientContainer gpair_large;\n  gpair_large.gpair = linalg::Matrix<GradientPair>{shape_large, ctx.Device()};\n  FillGradients(&gpair_large.gpair);\n\n  RegTree tree_large{n_targets, static_cast<bst_feature_t>(kCols)};\n  std::vector<RegTree*> trees_large{&tree_large};\n  std::vector<HostDeviceVector<bst_node_t>> position_large(1);\n  common::Span<HostDeviceVector<bst_node_t>> pos_large{position_large.data(), 1};\n  updater->Update(&param, &gpair_large, dmat_large.get(), pos_large, trees_large);\n\n  auto dmat_small =\n      RandomDataGenerator{kNSmall, kCols, 0.0f}.Seed(1).Batches(1).GenerateSparsePageDMatrix(\n          \"part_resize_small_second\", false);\n\n  std::vector<HostDeviceVector<bst_node_t>> position_small(1);\n  auto& pos = position_small.front();\n  pos.Resize(kNBig);    // Allocate large\n  pos.Resize(kNSmall);  // Shrink logical size, capacity remains large\n\n  auto& hv = pos.HostVector();\n  std::size_t cap = hv.capacity();\n  ASSERT_GE(cap, static_cast<std::size_t>(kNBig));\n\n  std::size_t tail_elems = cap - hv.size();\n  ASSERT_GT(tail_elems, 0u) << \"Expected reserved tail storage\";\n  std::vector<bst_node_t> tail_before(tail_elems);\n  std::memcpy(tail_before.data(), hv.data() + hv.size(), tail_elems * sizeof(bst_node_t));\n\n  std::size_t shape_small[2]{dmat_small->Info().num_row_, n_targets_size};\n  GradientContainer gpair_small;\n  gpair_small.gpair = linalg::Matrix<GradientPair>{shape_small, ctx.Device()};\n  FillGradients(&gpair_small.gpair);\n\n  RegTree tree_small{n_targets, static_cast<bst_feature_t>(kCols)};\n  std::vector<RegTree*> trees_small{&tree_small};\n  common::Span<HostDeviceVector<bst_node_t>> pos_small{position_small.data(), 1};\n  updater->Update(&param, &gpair_small, dmat_small.get(), pos_small, trees_small);\n\n  // Verify no buffer overrun: tail bytes should be unchanged\n  ASSERT_EQ(hv.capacity(), cap) << \"Test precondition violated: capacity changed\";\n  std::vector<bst_node_t> tail_after(tail_elems);\n  std::memcpy(tail_after.data(), hv.data() + hv.size(), tail_elems * sizeof(bst_node_t));\n\n  EXPECT_EQ(tail_before, tail_after)\n      << \"Buffer overrun detected: writes past kNSmall when updating small \"\n         \"single-batch DMatrix after large multi-batch one. \"\n         \"Likely stale partitioner writing to buffer.\";\n}\n}  // anonymous namespace\n\nTEST(QuantileHist, HistUpdaterPartitionerOverrun) { TestPartitionerOverrun(1); }\n\nTEST(QuantileHist, MultiTargetHistBuilderPartitionerOverrun) { TestPartitionerOverrun(3); }\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "tests/cpp/tree/test_refresh.cc",
    "content": "/**\n * Copyright 2018-2025, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/gradient.h>  // for GradientContainer\n#include <xgboost/host_device_vector.h>\n#include <xgboost/task.h>  // for ObjInfo\n#include <xgboost/tree_updater.h>\n\n#include <memory>\n#include <string>\n#include <vector>\n\n#include \"../../../src/tree/param.h\"  // for TrainParam\n#include \"../helpers.h\"\n\nnamespace xgboost::tree {\nTEST(Updater, Refresh) {\n  bst_idx_t constexpr kRows = 8;\n  bst_feature_t constexpr kCols = 16;\n  Context ctx;\n\n  GradientContainer gpair;\n  gpair.gpair = linalg::Matrix<GradientPair>{{{0.23f, 0.24f},\n                                              {0.23f, 0.24f},\n                                              {0.23f, 0.24f},\n                                              {0.23f, 0.24f},\n                                              {0.27f, 0.29f},\n                                              {0.27f, 0.29f},\n                                              {0.27f, 0.29f},\n                                              {0.27f, 0.29f}},\n                                             {8, 1},\n                                             ctx.Device()};\n\n  std::shared_ptr<DMatrix> p_dmat{\n    RandomDataGenerator{kRows, kCols, 0.4f}.Seed(3).GenerateDMatrix()};\n  std::vector<std::pair<std::string, std::string>> cfg{\n      {\"reg_alpha\", \"0.0\"},\n      {\"num_feature\", std::to_string(kCols)},\n      {\"reg_lambda\", \"1\"}};\n\n  RegTree tree = RegTree{1u, kCols};\n  std::vector<RegTree*> trees{&tree};\n\n  ObjInfo task{ObjInfo::kRegression};\n  std::unique_ptr<TreeUpdater> refresher(TreeUpdater::Create(\"refresh\", &ctx, &task));\n\n  tree.ExpandNode(0, 2, 0.2f, false, 0.0, 0.2f, 0.8f, 0.0f, 0.0f,\n                  /*left_sum=*/0.0f, /*right_sum=*/0.0f);\n  int cleft = tree[0].LeftChild();\n  int cright = tree[0].RightChild();\n\n  tree.Stat(cleft).base_weight = 1.2;\n  tree.Stat(cright).base_weight = 1.3;\n\n  std::vector<HostDeviceVector<bst_node_t>> position;\n  tree::TrainParam param;\n  param.UpdateAllowUnknown(cfg);\n\n  refresher->Update(&param, &gpair, p_dmat.get(), position, trees);\n\n  bst_float constexpr kEps = 1e-6;\n  ASSERT_NEAR(-0.183392, tree[cright].LeafValue(), kEps);\n  ASSERT_NEAR(-0.224489, tree.Stat(0).loss_chg, kEps);\n  ASSERT_NEAR(0, tree.Stat(cleft).loss_chg, kEps);\n  ASSERT_NEAR(0, tree.Stat(1).loss_chg, kEps);\n  ASSERT_NEAR(0, tree.Stat(2).loss_chg, kEps);\n}\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "tests/cpp/tree/test_regen.cc",
    "content": "/**\n * Copyright 2022-2023 XGBoost contributors\n */\n#include <gtest/gtest.h>\n\n#include \"../../../src/data/adapter.h\"\n#include \"../../../src/data/simple_dmatrix.h\"\n#include \"../helpers.h\"\n#include \"xgboost/context.h\"\n\nnamespace xgboost {\nnamespace {\nclass DMatrixForTest : public data::SimpleDMatrix {\n  size_t n_regen_{0};\n\n public:\n  using SimpleDMatrix::SimpleDMatrix;\n  BatchSet<GHistIndexMatrix> GetGradientIndex(Context const* ctx,\n                                              const BatchParam& param) override {\n    auto backup = this->gradient_index_;\n    auto iter = SimpleDMatrix::GetGradientIndex(ctx, param);\n    n_regen_ += (backup != this->gradient_index_);\n    return iter;\n  }\n\n  BatchSet<EllpackPage> GetEllpackBatches(Context const* ctx, const BatchParam& param) override {\n    auto backup = this->ellpack_page_;\n    auto iter = SimpleDMatrix::GetEllpackBatches(ctx, param);\n    n_regen_ += (backup != this->ellpack_page_);\n    return iter;\n  }\n\n  auto NumRegen() const { return n_regen_; }\n\n  void Reset() {\n    this->gradient_index_.reset();\n    this->ellpack_page_.reset();\n    n_regen_ = 0;\n  }\n};\n\n/**\n * \\brief Test for whether the gradient index is correctly regenerated.\n */\nclass RegenTest : public ::testing::Test {\n protected:\n  std::shared_ptr<DMatrix> p_fmat_;\n\n  void SetUp() override {\n    size_t constexpr kRows = 256, kCols = 10;\n    HostDeviceVector<float> storage;\n    auto dense = RandomDataGenerator{kRows, kCols, 0.5}.GenerateArrayInterface(&storage);\n    auto adapter = data::ArrayAdapter(StringView{dense});\n    p_fmat_ = std::shared_ptr<DMatrix>(\n        new DMatrixForTest{&adapter, std::numeric_limits<float>::quiet_NaN(), AllThreadsForTest()});\n\n    p_fmat_->Info().labels.Reshape(256, 1);\n    auto labels = p_fmat_->Info().labels.Data();\n    RandomDataGenerator{kRows, 1, 0}.GenerateDense(labels);\n  }\n\n  auto constexpr Iter() const { return 4; }\n\n  template <typename Page>\n  size_t TestTreeMethod(Context const* ctx, std::string tree_method, std::string obj,\n                        bool reset = true) const {\n    auto learner = std::unique_ptr<Learner>{Learner::Create({p_fmat_})};\n    learner->SetParam(\"device\", ctx->DeviceName());\n    learner->SetParam(\"tree_method\", tree_method);\n    learner->SetParam(\"objective\", obj);\n    learner->Configure();\n\n    for (auto i = 0; i < Iter(); ++i) {\n      learner->UpdateOneIter(i, p_fmat_);\n    }\n\n    auto for_test = dynamic_cast<DMatrixForTest*>(p_fmat_.get());\n    CHECK(for_test);\n    auto backup = for_test->NumRegen();\n    for_test->GetBatches<Page>(p_fmat_->Ctx(), BatchParam{});\n    CHECK_EQ(for_test->NumRegen(), backup);\n\n    if (reset) {\n      for_test->Reset();\n    }\n    return backup;\n  }\n};\n}  // anonymous namespace\n\nTEST_F(RegenTest, Approx) {\n  Context ctx;\n  auto n = this->TestTreeMethod<GHistIndexMatrix>(&ctx, \"approx\", \"reg:squarederror\");\n  ASSERT_EQ(n, 1);\n  n = this->TestTreeMethod<GHistIndexMatrix>(&ctx, \"approx\", \"reg:logistic\");\n  ASSERT_EQ(n, this->Iter());\n}\n\nTEST_F(RegenTest, Hist) {\n  Context ctx;\n  auto n = this->TestTreeMethod<GHistIndexMatrix>(&ctx, \"hist\", \"reg:squarederror\");\n  ASSERT_EQ(n, 1);\n  n = this->TestTreeMethod<GHistIndexMatrix>(&ctx, \"hist\", \"reg:logistic\");\n  ASSERT_EQ(n, 1);\n}\n\nTEST_F(RegenTest, Mixed) {\n  Context ctx;\n  auto n = this->TestTreeMethod<GHistIndexMatrix>(&ctx, \"hist\", \"reg:squarederror\", false);\n  ASSERT_EQ(n, 1);\n  n = this->TestTreeMethod<GHistIndexMatrix>(&ctx, \"approx\", \"reg:logistic\", true);\n  ASSERT_EQ(n, this->Iter() + 1);\n\n  n = this->TestTreeMethod<GHistIndexMatrix>(&ctx, \"approx\", \"reg:logistic\", false);\n  ASSERT_EQ(n, this->Iter());\n  n = this->TestTreeMethod<GHistIndexMatrix>(&ctx, \"hist\", \"reg:squarederror\", true);\n  ASSERT_EQ(n, this->Iter() + 1);\n}\n\n#if defined(XGBOOST_USE_CUDA)\nTEST_F(RegenTest, GpuApprox) {\n  auto ctx = MakeCUDACtx(0);\n  auto n = this->TestTreeMethod<EllpackPage>(&ctx, \"approx\", \"reg:squarederror\", true);\n  ASSERT_EQ(n, 1);\n  n = this->TestTreeMethod<EllpackPage>(&ctx, \"approx\", \"reg:logistic\", false);\n  ASSERT_EQ(n, this->Iter());\n\n  n = this->TestTreeMethod<EllpackPage>(&ctx, \"approx\", \"reg:logistic\", true);\n  ASSERT_EQ(n, this->Iter() * 2);\n}\n\nTEST_F(RegenTest, GpuHist) {\n  auto ctx = MakeCUDACtx(0);\n  auto n = this->TestTreeMethod<EllpackPage>(&ctx, \"hist\", \"reg:squarederror\", true);\n  ASSERT_EQ(n, 1);\n  n = this->TestTreeMethod<EllpackPage>(&ctx, \"hist\", \"reg:logistic\", false);\n  ASSERT_EQ(n, 1);\n\n  {\n    Context ctx;\n    n = this->TestTreeMethod<EllpackPage>(&ctx, \"hist\", \"reg:logistic\");\n    ASSERT_EQ(n, 2);\n  }\n}\n\nTEST_F(RegenTest, GpuMixed) {\n  auto ctx = MakeCUDACtx(0);\n  auto n = this->TestTreeMethod<EllpackPage>(&ctx, \"hist\", \"reg:squarederror\", false);\n  ASSERT_EQ(n, 1);\n  n = this->TestTreeMethod<EllpackPage>(&ctx, \"approx\", \"reg:logistic\", true);\n  ASSERT_EQ(n, this->Iter() + 1);\n\n  n = this->TestTreeMethod<EllpackPage>(&ctx, \"approx\", \"reg:logistic\", false);\n  ASSERT_EQ(n, this->Iter());\n  n = this->TestTreeMethod<EllpackPage>(&ctx, \"hist\", \"reg:squarederror\", true);\n  ASSERT_EQ(n, this->Iter() + 1);\n}\n#endif  // defined(XGBOOST_USE_CUDA)\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/tree/test_sampler.h",
    "content": "/**\n * Copyright 2026, XGBoost Contributors\n */\n#pragma once\n#include <gtest/gtest.h>\n#include <xgboost/base.h>    // for bst_idx_t, bst_target_t, GradientPair, GradientPairInt64\n#include <xgboost/linalg.h>  // for MatrixView\n\n#include <cmath>    // for abs, copysign, sqrt\n#include <cstddef>  // for size_t\n#include <vector>   // for vector\n\n#include \"../../../src/tree/hist/sampler.h\"  // for CalcRegAbsGrad\n\nnamespace xgboost::tree {\n// Check that multi-target rows are consistently sampled and return count.\ninline bst_idx_t CheckSampledRows(linalg::MatrixView<GradientPair const> gpair_0,\n                                  linalg::MatrixView<GradientPair const> gpair_1) {\n  CHECK_EQ(gpair_0.Shape(0), gpair_1.Shape(0));\n  CHECK_GE(gpair_1.Shape(1), gpair_1.Shape(1));\n  auto n_samples = gpair_0.Shape(0);\n  auto n_targets = gpair_1.Shape(1);\n  bst_idx_t sampled_count = 0;\n  for (std::size_t i = 0; i < n_samples; ++i) {\n    bool first_is_zero = gpair_0(i, 0).GetHess() == 0.0f;\n    for (bst_target_t t = 0; t < n_targets; ++t) {\n      bool is_zero = (gpair_1(i, t).GetGrad() == 0.0f && gpair_1(i, t).GetHess() == 0.0f);\n      EXPECT_EQ(first_is_zero, is_zero);\n    }\n    if (!first_is_zero) {\n      ++sampled_count;\n    }\n  }\n  return sampled_count;\n}\n\n// Check that sampling mask was correctly applied from split gradient to value gradient.\ninline void CheckSamplingMask(linalg::MatrixView<GradientPair> h_split,\n                              linalg::MatrixView<GradientPair> h_value, float subsample) {\n  auto n_samples = h_value.Shape(0);\n  std::size_t sampled_count = CheckSampledRows(h_split, h_value);\n  // Verify approximately the right fraction of rows are sampled\n  double sampled_fraction = static_cast<double>(sampled_count) / static_cast<double>(n_samples);\n  ASSERT_NEAR(sampled_fraction, subsample, 0.05);\n}\n\ninline void CheckSampling(float subsample, bst_target_t n_targets, bool check_sum,\n                          std::vector<GradientPairPrecise> const& sum_sampled_gpair,\n                          std::vector<GradientPairPrecise> const& sum_gpair,\n                          linalg::MatrixView<GradientPair> h_gpair) {\n  auto n_samples = h_gpair.Shape(0);\n  bst_idx_t sample_rows = n_samples * subsample;\n\n  // Verify gradient sums per target\n  for (bst_target_t t = 0; t < n_targets; ++t) {\n    if (check_sum) {\n      // Gradient-based sampling preserves the sum approximately\n      ASSERT_NEAR(sum_gpair[t].GetGrad(), sum_sampled_gpair[t].GetGrad(), 0.03f * n_samples);\n      ASSERT_NEAR(sum_gpair[t].GetHess(), sum_sampled_gpair[t].GetHess(), 0.03f * n_samples);\n    } else {\n      // Uniform sampling preserves the mean approximately\n      auto mean_grad = sum_gpair[t].GetGrad() / n_samples;\n      auto mean_hess = sum_gpair[t].GetHess() / n_samples;\n      auto sampled_mean_grad = sum_sampled_gpair[t].GetGrad() / sample_rows;\n      auto sampled_mean_hess = sum_sampled_gpair[t].GetHess() / sample_rows;\n      ASSERT_NEAR(mean_grad, sampled_mean_grad, mean_grad * 0.1);\n      ASSERT_NEAR(mean_hess, sampled_mean_hess, mean_hess * 0.1);\n    }\n  }\n\n  // Verify multi-target consistency and sample fraction (reuse CheckSampledRows)\n  auto sampled_count = CheckSampledRows(h_gpair, h_gpair);\n  if (subsample < 1.0f) {\n    double sampled_fraction = static_cast<double>(sampled_count) / n_samples;\n    ASSERT_NEAR(sampled_fraction, subsample, 0.05);\n  }\n}\n\n// Validate that value gradients are reweighted using the provided threshold and reg_abs_grad.\ninline void CheckValueReweight(linalg::MatrixView<GradientPair const> sampled_split,\n                               linalg::MatrixView<GradientPair const> value_before,\n                               linalg::MatrixView<GradientPair const> value_after,\n                               std::vector<float> const& reg_abs_grad, float threshold,\n                               float tol = 1e-3f) {\n  CHECK_EQ(value_before.Shape(0), value_after.Shape(0));\n  CHECK_EQ(value_before.Shape(1), value_after.Shape(1));\n  CHECK_EQ(value_before.Shape(0), reg_abs_grad.size());\n\n  auto n_samples = value_after.Shape(0);\n  auto n_targets = value_after.Shape(1);\n  for (bst_idx_t i = 0; i < n_samples; ++i) {\n    if (sampled_split(i, 0).GetHess() == 0.0f) {\n      for (bst_target_t t = 0; t < n_targets; ++t) {\n        ASSERT_EQ(value_after(i, t).GetGrad(), 0.0f);\n        ASSERT_EQ(value_after(i, t).GetHess(), 0.0f);\n      }\n      continue;\n    }\n    float p = SamplingProbability(threshold, reg_abs_grad[i]);\n    for (bst_target_t t = 0; t < n_targets; ++t) {\n      auto expected = RescaleGrad(p, value_before(i, t));\n      auto grad_tol = tol * (1.0f + std::abs(expected.GetGrad()));\n      auto hess_tol = tol * (1.0f + std::abs(expected.GetHess()));\n      ASSERT_NEAR(value_after(i, t).GetGrad(), expected.GetGrad(), grad_tol);\n      ASSERT_NEAR(value_after(i, t).GetHess(), expected.GetHess(), hess_tol);\n    }\n  }\n}\n}  // namespace xgboost::tree\n"
  },
  {
    "path": "tests/cpp/tree/test_tree_model.cc",
    "content": "/**\n * Copyright 2018-2025, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n\n#include <stack>  // for stack\n\n#include \"../../../src/common/bitfield.h\"\n#include \"../../../src/common/categorical.h\"\n#include \"../../../src/tree/io_utils.h\"  // for DftBadValue\n#include \"../helpers.h\"\n#include \"xgboost/tree_model.h\"\n\nnamespace xgboost {\nTEST(Tree, ModelShape) {\n  bst_feature_t n_features = std::numeric_limits<uint32_t>::max();\n  RegTree tree{1u, n_features};\n  ASSERT_EQ(tree.NumFeatures(), n_features);\n\n  {\n    // json\n    Json j_tree{Object{}};\n    tree.SaveModel(&j_tree);\n    std::vector<char> dumped;\n    Json::Dump(j_tree, &dumped);\n    RegTree new_tree;\n\n    auto j_loaded = Json::Load(StringView{dumped.data(), dumped.size()});\n    new_tree.LoadModel(j_loaded);\n    ASSERT_EQ(new_tree.NumFeatures(), n_features);\n  }\n  {\n    // ubjson\n    Json j_tree{Object{}};\n    tree.SaveModel(&j_tree);\n    std::vector<char> dumped;\n    Json::Dump(j_tree, &dumped, std::ios::binary);\n    RegTree new_tree;\n\n    auto j_loaded = Json::Load(StringView{dumped.data(), dumped.size()}, std::ios::binary);\n    new_tree.LoadModel(j_loaded);\n    ASSERT_EQ(new_tree.NumFeatures(), n_features);\n  }\n}\n\nTEST(Tree, AllocateNode) {\n  RegTree tree;\n  tree.ExpandNode(0, 0, 0.0f, false, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,\n                  /*left_sum=*/0.0f, /*right_sum=*/0.0f);\n  tree.CollapseToLeaf(0, 0);\n  ASSERT_EQ(tree.NumExtraNodes(), 0);\n\n  tree.ExpandNode(0, 0, 0.0f, false, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,\n                  /*left_sum=*/0.0f, /*right_sum=*/0.0f);\n  ASSERT_EQ(tree.NumExtraNodes(), 2);\n\n  auto nodes = tree.GetNodes(DeviceOrd::CPU());\n  ASSERT_FALSE(nodes[1].IsDeleted());\n  ASSERT_TRUE(nodes[1].IsLeaf());\n  ASSERT_TRUE(nodes[2].IsLeaf());\n}\n\nTEST(Tree, ExpandCategoricalFeature) {\n  Context ctx;\n  {\n    RegTree tree;\n    tree.ExpandCategorical(0, 0, {}, true, 1.0, 2.0, 3.0, 11.0, 2.0,\n                           /*left_sum=*/3.0, /*right_sum=*/4.0);\n    ASSERT_EQ(tree.Size(), 3ul);\n    ASSERT_EQ(tree.GetNumLeaves(), 2);\n    ASSERT_EQ(tree.GetSplitTypes(ctx.Device()).size(), 3ul);\n    ASSERT_EQ(tree.GetSplitTypes(ctx.Device())[0], FeatureType::kCategorical);\n    ASSERT_EQ(tree.GetSplitTypes(ctx.Device())[1], FeatureType::kNumerical);\n    ASSERT_EQ(tree.GetSplitTypes(ctx.Device())[2], FeatureType::kNumerical);\n    ASSERT_EQ(tree.GetSplitCategories(ctx.Device()).size(), 0ul);\n    ASSERT_EQ(tree[0].SplitCond(), DftBadValue());\n  }\n  {\n    RegTree tree;\n    bst_cat_t cat = 33;\n    std::vector<uint32_t> split_cats(LBitField32::ComputeStorageSize(cat+1));\n    LBitField32 bitset {split_cats};\n    bitset.Set(cat);\n    tree.ExpandCategorical(0, 0, split_cats, true, 1.0, 2.0, 3.0, 11.0, 2.0,\n                           /*left_sum=*/3.0, /*right_sum=*/4.0);\n    auto categories = tree.GetSplitCategories(ctx.Device());\n    auto segments = tree.GetSplitCategoriesPtr();\n    auto got = categories.subspan(segments[0].beg, segments[0].size);\n    ASSERT_TRUE(std::equal(got.cbegin(), got.cend(), split_cats.cbegin()));\n\n    Json out{Object()};\n    tree.SaveModel(&out);\n\n    RegTree loaded_tree;\n    loaded_tree.LoadModel(out);\n\n    auto const& cat_ptr = loaded_tree.GetSplitCategoriesPtr();\n    ASSERT_EQ(cat_ptr.size(), 3ul);\n    ASSERT_EQ(cat_ptr[0].beg, 0ul);\n    ASSERT_EQ(cat_ptr[0].size, 2ul);\n\n    auto loaded_categories = loaded_tree.GetSplitCategories(ctx.Device());\n    auto loaded_root = loaded_categories.subspan(cat_ptr[0].beg, cat_ptr[0].size);\n    ASSERT_TRUE(std::equal(loaded_root.begin(), loaded_root.end(), split_cats.begin()));\n  }\n}\n\nvoid GrowTree(RegTree* p_tree) {\n  SimpleLCG lcg;\n  size_t n_expands = 10;\n  constexpr size_t kCols = 256;\n  SimpleRealUniformDistribution<double> coin(0.0, 1.0);\n  SimpleRealUniformDistribution<double> feat(0.0, kCols);\n  SimpleRealUniformDistribution<double> split_cat(0.0, 128.0);\n  SimpleRealUniformDistribution<double> split_value(0.0, kCols);\n\n  std::stack<bst_node_t> stack;\n  stack.push(RegTree::kRoot);\n  auto& tree = *p_tree;\n\n  for (size_t i = 0; i < n_expands; ++i) {\n    auto is_cat = coin(&lcg) <= 0.5;\n    bst_node_t node = stack.top();\n    stack.pop();\n\n    bst_feature_t f = feat(&lcg);\n    if (is_cat) {\n      bst_cat_t cat = common::AsCat(split_cat(&lcg));\n      std::vector<uint32_t> split_cats(\n          LBitField32::ComputeStorageSize(cat + 1));\n      LBitField32 bitset{split_cats};\n      bitset.Set(cat);\n      tree.ExpandCategorical(node, f, split_cats, true, 1.0, 2.0, 3.0, 11.0, 2.0,\n                             /*left_sum=*/3.0, /*right_sum=*/4.0);\n    } else {\n      auto split = split_value(&lcg);\n      tree.ExpandNode(node, f, split, true, 1.0, 2.0, 3.0, 11.0, 2.0,\n                      /*left_sum=*/3.0, /*right_sum=*/4.0);\n    }\n\n    stack.push(tree[node].LeftChild());\n    stack.push(tree[node].RightChild());\n  }\n}\n\nvoid CheckReload(RegTree const &tree) {\n  Json out{Object()};\n  tree.SaveModel(&out);\n\n  RegTree loaded_tree;\n  loaded_tree.LoadModel(out);\n  Json saved{Object()};\n  loaded_tree.SaveModel(&saved);\n\n  ASSERT_EQ(out, saved);\n}\n\nTEST(Tree, CategoricalIO) {\n  {\n    RegTree tree;\n    bst_cat_t cat = 32;\n    std::vector<uint32_t> split_cats(LBitField32::ComputeStorageSize(cat + 1));\n    LBitField32 bitset{split_cats};\n    bitset.Set(cat);\n    tree.ExpandCategorical(0, 0, split_cats, true, 1.0, 2.0, 3.0, 11.0, 2.0,\n                           /*left_sum=*/3.0, /*right_sum=*/4.0);\n\n    CheckReload(tree);\n  }\n\n  {\n    RegTree tree;\n    GrowTree(&tree);\n    CheckReload(tree);\n  }\n}\n\nnamespace {\nRegTree ConstructTree() {\n  RegTree tree;\n  tree.ExpandNode(\n      /*nid=*/0, /*split_index=*/0, /*split_value=*/0.0f,\n      /*default_left=*/true, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, /*left_sum=*/0.0f,\n      /*right_sum=*/0.0f);\n  auto left = tree[0].LeftChild();\n  auto right = tree[0].RightChild();\n  tree.ExpandNode(\n      /*nid=*/left, /*split_index=*/1, /*split_value=*/1.0f,\n      /*default_left=*/false, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, /*left_sum=*/0.0f,\n      /*right_sum=*/0.0f);\n  tree.ExpandNode(\n      /*nid=*/right, /*split_index=*/2, /*split_value=*/2.0f,\n      /*default_left=*/false, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, /*left_sum=*/0.0f,\n      /*right_sum=*/0.0f);\n  return tree;\n}\n\nRegTree ConstructTreeCat(std::vector<bst_cat_t>* cond) {\n  RegTree tree;\n  std::vector<uint32_t> cats_storage(common::CatBitField::ComputeStorageSize(33), 0);\n  common::CatBitField split_cats(cats_storage);\n  split_cats.Set(0);\n  split_cats.Set(14);\n  split_cats.Set(32);\n\n  cond->push_back(0);\n  cond->push_back(14);\n  cond->push_back(32);\n\n  tree.ExpandCategorical(0, /*split_index=*/0, cats_storage, true, 0.0f, 2.0,\n                         3.00, 11.0, 2.0, 3.0, 4.0);\n  auto left = tree[0].LeftChild();\n  auto right = tree[0].RightChild();\n  tree.ExpandNode(\n      /*nid=*/left, /*split_index=*/1, /*split_value=*/1.0f,\n      /*default_left=*/false, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, /*left_sum=*/0.0f,\n      /*right_sum=*/0.0f);\n  tree.ExpandCategorical(right, /*split_index=*/0, cats_storage, true, 0.0f,\n                         2.0, 3.00, 11.0, 2.0, 3.0, 4.0);\n  return tree;\n}\n\nvoid TestCategoricalTreeDump(std::string format, std::string sep) {\n  std::vector<bst_cat_t> cond;\n  auto tree = ConstructTreeCat(&cond);\n\n  FeatureMap fmap;\n  auto str = tree.DumpModel(fmap, true, format);\n  std::string cond_str;\n  for (size_t c = 0; c < cond.size(); ++c) {\n    cond_str += std::to_string(cond[c]);\n    if (c != cond.size() - 1) {\n      cond_str += sep;\n    }\n  }\n  auto pos = str.find(cond_str);\n  ASSERT_NE(pos, std::string::npos);\n  pos = str.find(cond_str, pos + 1);\n  ASSERT_NE(pos, std::string::npos);\n\n  fmap.PushBack(0, \"feat_0\", \"c\");\n  fmap.PushBack(1, \"feat_1\", \"q\");\n  fmap.PushBack(2, \"feat_2\", \"int\");\n\n  str = tree.DumpModel(fmap, true, format);\n  pos = str.find(cond_str);\n  ASSERT_NE(pos, std::string::npos);\n  pos = str.find(cond_str, pos + 1);\n  ASSERT_NE(pos, std::string::npos);\n  ASSERT_NE(str.find(\"gain\"), std::string::npos);\n\n  if (format == \"json\") {\n    // Make sure it's valid JSON\n    Json::Load(StringView{str});\n  }\n}\n}  // anonymous namespace\n\nTEST(Tree, DumpJson) {\n  auto tree = ConstructTree();\n  FeatureMap fmap;\n  auto str = tree.DumpModel(fmap, true, \"json\");\n  size_t n_leaves = 0;\n  size_t iter = 0;\n  while ((iter = str.find(\"leaf\", iter + 1)) != std::string::npos) {\n    n_leaves++;\n  }\n  ASSERT_EQ(n_leaves, 4ul);\n\n  size_t n_conditions = 0;\n  iter = 0;\n  while ((iter = str.find(\"split_condition\", iter + 1)) != std::string::npos) {\n    n_conditions++;\n  }\n  ASSERT_EQ(n_conditions, 3ul);\n\n  fmap.PushBack(0, \"feat_0\", \"i\");\n  fmap.PushBack(1, \"feat_1\", \"q\");\n  fmap.PushBack(2, \"feat_2\", \"int\");\n\n  str = tree.DumpModel(fmap, true, \"json\");\n  ASSERT_NE(str.find(R\"(\"split\": \"feat_0\")\"), std::string::npos);\n  ASSERT_NE(str.find(R\"(\"split\": \"feat_1\")\"), std::string::npos);\n  ASSERT_NE(str.find(R\"(\"split\": \"feat_2\")\"), std::string::npos);\n\n  str = tree.DumpModel(fmap, false, \"json\");\n  ASSERT_EQ(str.find(\"cover\"), std::string::npos);\n\n\n  auto j_tree = Json::Load({str.c_str(), str.size()});\n  ASSERT_EQ(get<Array>(j_tree[\"children\"]).size(), 2ul);\n}\n\nTEST(Tree, DumpJsonCategorical) {\n  TestCategoricalTreeDump(\"json\", \", \");\n}\n\nTEST(Tree, DumpText) {\n  auto tree = ConstructTree();\n  FeatureMap fmap;\n  auto str = tree.DumpModel(fmap, true, \"text\");\n  size_t n_leaves = 0;\n  size_t iter = 0;\n  while ((iter = str.find(\"leaf\", iter + 1)) != std::string::npos) {\n    n_leaves++;\n  }\n  ASSERT_EQ(n_leaves, 4ul);\n\n  iter = 0;\n  size_t n_conditions = 0;\n  while ((iter = str.find(\"gain\", iter + 1)) != std::string::npos) {\n    n_conditions++;\n  }\n  ASSERT_EQ(n_conditions, 3ul);\n\n  ASSERT_NE(str.find(\"[f0<0]\"), std::string::npos) << str;\n  ASSERT_NE(str.find(\"[f1<1]\"), std::string::npos);\n  ASSERT_NE(str.find(\"[f2<2]\"), std::string::npos);\n\n  fmap.PushBack(0, \"feat_0\", \"i\");\n  fmap.PushBack(1, \"feat_1\", \"q\");\n  fmap.PushBack(2, \"feat_2\", \"int\");\n\n  str = tree.DumpModel(fmap, true, \"text\");\n  ASSERT_NE(str.find(\"[feat_0]\"), std::string::npos);\n  ASSERT_NE(str.find(\"[feat_1<1]\"), std::string::npos);\n  ASSERT_NE(str.find(\"[feat_2<2]\"), std::string::npos);\n\n  str = tree.DumpModel(fmap, false, \"text\");\n  ASSERT_EQ(str.find(\"cover\"), std::string::npos);\n}\n\nTEST(Tree, DumpTextCategorical) {\n  TestCategoricalTreeDump(\"text\", \",\");\n}\n\nTEST(Tree, DumpDot) {\n  auto tree = ConstructTree();\n  FeatureMap fmap;\n  auto str = tree.DumpModel(fmap, true, \"dot\");\n\n  size_t n_leaves = 0;\n  size_t iter = 0;\n  while ((iter = str.find(\"leaf\", iter + 1)) != std::string::npos) {\n    n_leaves++;\n  }\n  ASSERT_EQ(n_leaves, 4ul);\n\n  size_t n_edges = 0;\n  iter = 0;\n  while ((iter = str.find(\"->\", iter + 1)) != std::string::npos) {\n    n_edges++;\n  }\n  ASSERT_EQ(n_edges, 6ul);\n\n  fmap.PushBack(0, \"feat_0\", \"i\");\n  fmap.PushBack(1, \"feat_1\", \"q\");\n  fmap.PushBack(2, \"feat_2\", \"int\");\n\n  str = tree.DumpModel(fmap, true, \"dot\");\n  ASSERT_NE(str.find(R\"(\"feat_0)\"), std::string::npos);\n  ASSERT_EQ(str.find(R\"(\"feat_0\")\"), std::string::npos);  // newline\n  ASSERT_NE(str.find(R\"(feat_1<1)\"), std::string::npos);\n  ASSERT_NE(str.find(R\"(feat_2<2)\"), std::string::npos);\n\n  str = tree.DumpModel(fmap, true, R\"(dot:{\"graph_attrs\": {\"bgcolor\": \"#FFFF00\"}})\");\n  ASSERT_NE(str.find(R\"(graph [ bgcolor=\"#FFFF00\" ])\"), std::string::npos);\n\n  // Default left for root.\n  ASSERT_NE(str.find(R\"(0 -> 1 [label=\"yes, missing\")\"), std::string::npos);\n  // Default right for node 1\n  ASSERT_NE(str.find(R\"(1 -> 4 [label=\"no, missing\")\"), std::string::npos);\n}\n\nTEST(Tree, DumpDotCategorical) {\n  TestCategoricalTreeDump(\"dot\", \",\");\n}\n\nTEST(Tree, JsonIO) {\n  RegTree tree;\n  tree.ExpandNode(0, 0, 0.0f, false, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,\n                  /*left_sum=*/0.0f, /*right_sum=*/0.0f);\n  Json j_tree{Object()};\n  tree.SaveModel(&j_tree);\n\n  auto tparam = j_tree[\"tree_param\"];\n  ASSERT_EQ(get<String>(tparam[\"num_feature\"]), \"0\");\n  ASSERT_EQ(get<String>(tparam[\"num_nodes\"]), \"3\");\n  ASSERT_EQ(get<String>(tparam[\"size_leaf_vector\"]), \"1\");\n\n  ASSERT_EQ(get<I32Array const>(j_tree[\"left_children\"]).size(), 3ul);\n  ASSERT_EQ(get<I32Array const>(j_tree[\"right_children\"]).size(), 3ul);\n  ASSERT_EQ(get<I32Array const>(j_tree[\"parents\"]).size(), 3ul);\n  ASSERT_EQ(get<I32Array const>(j_tree[\"split_indices\"]).size(), 3ul);\n  ASSERT_EQ(get<F32Array const>(j_tree[\"split_conditions\"]).size(), 3ul);\n  ASSERT_EQ(get<U8Array const>(j_tree[\"default_left\"]).size(), 3ul);\n\n  RegTree loaded_tree;\n  loaded_tree.LoadModel(j_tree);\n  ASSERT_EQ(loaded_tree.NumNodes(), 3);\n  ASSERT_TRUE(loaded_tree == tree);\n\n  auto left = tree[0].LeftChild();\n  auto right = tree[0].RightChild();\n  tree.ExpandNode(left, 0, 0.0f, false, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,\n                  /*left_sum=*/0.0f, /*right_sum=*/0.0f);\n  tree.ExpandNode(right, 0, 0.0f, false, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,\n                  /*left_sum=*/0.0f, /*right_sum=*/0.0f);\n  tree.SaveModel(&j_tree);\n\n  tree.ChangeToLeaf(1, 1.0f);\n  ASSERT_EQ(tree[1].LeftChild(), -1);\n  ASSERT_EQ(tree[1].RightChild(), -1);\n  tree.SaveModel(&j_tree);\n  loaded_tree.LoadModel(j_tree);\n  ASSERT_EQ(loaded_tree[1].LeftChild(), -1);\n  ASSERT_EQ(loaded_tree[1].RightChild(), -1);\n  ASSERT_TRUE(tree.Equal(loaded_tree));\n}\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/tree/test_tree_policy.cc",
    "content": "/**\n * Copyright 2021-2026, XGBoost contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/base.h>     // for bst_node_t\n#include <xgboost/context.h>  // for Context\n#include <xgboost/tree_model.h>\n\n#include <memory>  // for unique_ptr\n#include <string>  // for string\n\n#include \"../../../src/tree/tree_view.h\"  // for WalkTree\n#include \"../helpers.h\"\n\nnamespace xgboost {\nclass TestGrowPolicy : public ::testing::Test {\n protected:\n  bst_idx_t n_samples_ = 4096, n_features_ = 13;\n  float sparsity_ = 0.5;\n\n protected:\n  std::unique_ptr<Learner> TrainOneIter(Context const* ctx, bst_target_t n_targets,\n                                        std::string tree_method, std::string policy,\n                                        bst_node_t max_leaves, bst_node_t max_depth) {\n    auto Xy =\n        RandomDataGenerator{n_samples_, n_features_, sparsity_}.Targets(n_targets).GenerateDMatrix(\n            true);\n\n    std::unique_ptr<Learner> learner{Learner::Create({Xy})};\n    learner->SetParam(\"tree_method\", tree_method);\n    learner->SetParam(\"device\", ctx->DeviceName());\n    if (max_leaves >= 0) {\n      learner->SetParam(\"max_leaves\", std::to_string(max_leaves));\n    }\n    if (max_depth >= 0) {\n      learner->SetParam(\"max_depth\", std::to_string(max_depth));\n    }\n    learner->SetParam(\"grow_policy\", policy);\n    if (n_targets > 1) {\n      learner->SetParam(\"multi_strategy\", \"multi_output_tree\");\n    }\n\n    auto check_max_leave = [&]() {\n      Json model{Object{}};\n      learner->SaveModel(&model);\n      auto j_tree = model[\"learner\"][\"gradient_booster\"][\"model\"][\"trees\"][0];\n      RegTree tree;\n      tree.LoadModel(j_tree);\n      CHECK_LE(tree.GetNumLeaves(), max_leaves);\n    };\n\n    auto check_max_depth = [&](int32_t sol) {\n      Json model{Object{}};\n      learner->SaveModel(&model);\n\n      auto j_tree = model[\"learner\"][\"gradient_booster\"][\"model\"][\"trees\"][0];\n      RegTree tree;\n      tree.LoadModel(j_tree);\n      bst_node_t depth = 0;\n      tree::WalkTree(tree, [&](auto const& tree, bst_node_t nidx) {\n        depth = std::max(tree.GetDepth(nidx), depth);\n        return true;\n      });\n      if (sol > -1) {\n        CHECK_EQ(depth, sol);\n      } else {\n        CHECK_EQ(depth, max_depth) << \"tree method: \" << tree_method << \" policy: \" << policy\n                                   << \" leaves:\" << max_leaves << \", depth:\" << max_depth;\n      }\n    };\n\n    if (max_leaves == 0 && max_depth == 0) {\n      // unconstrained\n      if (ctx->IsCPU()) {\n        // GPU pre-allocates for all nodes.\n        learner->UpdateOneIter(0, Xy);\n      }\n    } else if (max_leaves > 0 && max_depth == 0) {\n      learner->UpdateOneIter(0, Xy);\n      check_max_leave();\n    } else if (max_leaves == 0 && max_depth > 0) {\n      learner->UpdateOneIter(0, Xy);\n      check_max_depth(-1);\n    } else if (max_leaves > 0 && max_depth > 0) {\n      learner->UpdateOneIter(0, Xy);\n      check_max_leave();\n      check_max_depth(2);\n    } else if (max_leaves == -1 && max_depth == 0) {\n      // default max_leaves is 0, so both of them are now 0\n    } else {\n      // default parameters\n      learner->UpdateOneIter(0, Xy);\n    }\n    return learner;\n  }\n\n  void TestCombination(Context const* ctx, bst_target_t n_targets, std::string tree_method) {\n    for (auto policy : {\"depthwise\", \"lossguide\"}) {\n      // -1 means default\n      for (auto leaves : {-1, 0, 3}) {\n        for (auto depth : {-1, 0, 3}) {\n          this->TrainOneIter(ctx, n_targets, tree_method, policy, leaves, depth);\n        }\n      }\n    }\n  }\n\n  void TestTreeGrowPolicy(Context const* ctx, bst_target_t n_targets, std::string tree_method,\n                          std::string policy) {\n    {\n      /**\n       *  max_leaves\n       */\n      auto learner = this->TrainOneIter(ctx, n_targets, tree_method, policy, 16, -1);\n      Json model{Object{}};\n      learner->SaveModel(&model);\n\n      auto j_tree = model[\"learner\"][\"gradient_booster\"][\"model\"][\"trees\"][0];\n      RegTree tree;\n      tree.LoadModel(j_tree);\n      ASSERT_EQ(tree.GetNumLeaves(), 16);\n    }\n    {\n      /**\n       *  max_depth\n       */\n      auto learner = this->TrainOneIter(ctx, n_targets, tree_method, policy, -1, 3);\n      Json model{Object{}};\n      learner->SaveModel(&model);\n\n      auto j_tree = model[\"learner\"][\"gradient_booster\"][\"model\"][\"trees\"][0];\n      RegTree tree;\n      tree.LoadModel(j_tree);\n      bst_node_t depth = 0;\n      tree::WalkTree(tree, [&](auto const& tree, bst_node_t nidx) {\n        depth = std::max(tree.GetDepth(nidx), depth);\n        return true;\n      });\n      ASSERT_EQ(depth, 3);\n    }\n  }\n};\n\nTEST_F(TestGrowPolicy, Approx) {\n  Context ctx;\n  bst_target_t n_targets = 1;\n  this->TestTreeGrowPolicy(&ctx, n_targets, \"approx\", \"depthwise\");\n  this->TestTreeGrowPolicy(&ctx, n_targets, \"approx\", \"lossguide\");\n\n  this->TestCombination(&ctx, n_targets, \"approx\");\n}\n\nTEST_F(TestGrowPolicy, Hist) {\n  Context ctx;\n  bst_target_t n_targets = 1;\n  this->TestTreeGrowPolicy(&ctx, n_targets, \"hist\", \"depthwise\");\n  this->TestTreeGrowPolicy(&ctx, n_targets, \"hist\", \"lossguide\");\n\n  this->TestCombination(&ctx, n_targets, \"hist\");\n}\n\nTEST_F(TestGrowPolicy, MultiHist) {\n  Context ctx;\n  bst_target_t n_targets = 3;\n  this->TestTreeGrowPolicy(&ctx, n_targets, \"hist\", \"depthwise\");\n  this->TestTreeGrowPolicy(&ctx, n_targets, \"hist\", \"lossguide\");\n\n  this->TestCombination(&ctx, n_targets, \"hist\");\n}\n\n#if defined(XGBOOST_USE_CUDA)\nTEST_F(TestGrowPolicy, GpuHist) {\n  auto ctx = MakeCUDACtx(0);\n  bst_target_t n_targets = 1;\n  this->TestTreeGrowPolicy(&ctx, n_targets, \"hist\", \"depthwise\");\n  this->TestTreeGrowPolicy(&ctx, n_targets, \"hist\", \"lossguide\");\n\n  this->TestCombination(&ctx, n_targets, \"hist\");\n}\n\nTEST_F(TestGrowPolicy, GpuMultiHist) {\n  auto ctx = MakeCUDACtx(0);\n  bst_target_t n_targets = 3;\n  this->TestTreeGrowPolicy(&ctx, n_targets, \"hist\", \"depthwise\");\n  this->TestTreeGrowPolicy(&ctx, n_targets, \"hist\", \"lossguide\");\n\n  this->TestCombination(&ctx, n_targets, \"hist\");\n}\n\nTEST_F(TestGrowPolicy, GpuApprox) {\n  auto ctx = MakeCUDACtx(0);\n  bst_target_t n_targets = 1;\n  this->TestTreeGrowPolicy(&ctx, n_targets, \"approx\", \"depthwise\");\n  this->TestTreeGrowPolicy(&ctx, n_targets, \"approx\", \"lossguide\");\n\n  this->TestCombination(&ctx, n_targets, \"approx\");\n}\n#endif  // defined(XGBOOST_USE_CUDA)\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cpp/tree/test_tree_stat.cc",
    "content": "/**\n * Copyright 2020-2026, XGBoost Contributors\n */\n#include <gtest/gtest.h>\n#include <xgboost/context.h>       // for Context\n#include <xgboost/gradient.h>      // for GradientContainer\n#include <xgboost/task.h>          // for ObjInfo\n#include <xgboost/tree_model.h>    // for RegTree\n#include <xgboost/tree_updater.h>  // for TreeUpdater\n\n#include <memory>  // for unique_ptr\n\n#include \"../../../src/tree/io_utils.h\"   // for DftBadValue\n#include \"../../../src/tree/param.h\"      // for TrainParam\n#include \"../../../src/tree/tree_view.h\"  // for WalkTree\n#include \"../helpers.h\"\n\nnamespace xgboost {\n/**\n * @brief Test the tree statistic (like sum Hessian) is correct.\n */\nclass UpdaterTreeStatTest : public ::testing::Test {\n protected:\n  std::shared_ptr<DMatrix> p_dmat_;\n  GradientContainer gpairs_;\n  size_t constexpr static kRows = 10;\n  size_t constexpr static kCols = 10;\n\n protected:\n  void SetUp() override {\n    p_dmat_ = RandomDataGenerator(kRows, kCols, .5f).GenerateDMatrix(true);\n    Context ctx;\n    gpairs_ = GenerateRandomGradients(&ctx, kRows, 1);\n  }\n\n  void RunTest(Context const* ctx, std::string updater) {\n    tree::TrainParam param;\n    ObjInfo task{ObjInfo::kRegression};\n    param.Init(Args{});\n\n    auto up = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, ctx, &task)};\n    up->Configure(Args{});\n    RegTree tree{1u, kCols};\n    std::vector<HostDeviceVector<bst_node_t>> position(1);\n    up->Update(&param, &gpairs_, p_dmat_.get(), position, {&tree});\n\n    auto sc_tree = tree.HostScView();\n    sc_tree.WalkTree([&sc_tree](bst_node_t nidx) {\n      if (sc_tree.IsLeaf(nidx)) {\n        // 1.0 is the default `min_child_weight`.\n        CHECK_GE(sc_tree.Stat(nidx).sum_hess, 1.0);\n      }\n      return true;\n    });\n  }\n};\n\n#if defined(XGBOOST_USE_CUDA)\nTEST_F(UpdaterTreeStatTest, GpuHist) {\n  auto ctx = MakeCUDACtx(0);\n  this->RunTest(&ctx, \"grow_gpu_hist\");\n}\n\nTEST_F(UpdaterTreeStatTest, GpuApprox) {\n  auto ctx = MakeCUDACtx(0);\n  this->RunTest(&ctx, \"grow_gpu_approx\");\n}\n#endif  // defined(XGBOOST_USE_CUDA)\n\nTEST_F(UpdaterTreeStatTest, Hist) {\n  Context ctx;\n  this->RunTest(&ctx, \"grow_quantile_histmaker\");\n}\n\nTEST_F(UpdaterTreeStatTest, Exact) {\n  Context ctx;\n  this->RunTest(&ctx, \"grow_colmaker\");\n}\n\nTEST_F(UpdaterTreeStatTest, Approx) {\n  Context ctx;\n  this->RunTest(&ctx, \"grow_histmaker\");\n}\n\nnamespace {\nvoid BuildTree(Context const* ctx, DMatrix* p_fmat, GradientContainer* grad,\n               std::string const& name, Args const& args, RegTree* p_tree) {\n  tree::TrainParam param;\n  param.Init(args);\n  ObjInfo task{ObjInfo::kRegression};\n  auto up = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(name, ctx, &task)};\n  up->Configure({});\n  std::vector<HostDeviceVector<bst_node_t>> position(1);\n  up->Update(&param, grad, p_fmat, common::Span{position}, {p_tree});\n}\n}  // namespace\n\n/**\n * @brief Test changing learning rate doesn't change internal splits.\n */\nclass TestSplitWithEta : public ::testing::Test {\n protected:\n  void Run(Context const* ctx, bst_target_t n_targets, std::string name) {\n    auto Xy = RandomDataGenerator{512, 64, 0.2}.Targets(n_targets).GenerateDMatrix(true);\n\n    auto gen_tree = [&](float eta) {\n      auto tree =\n          std::make_unique<RegTree>(n_targets, static_cast<bst_feature_t>(Xy->Info().num_col_));\n      std::vector<RegTree*> trees{tree.get()};\n\n      auto grad = GenerateRandomGradients(ctx, Xy->Info().num_row_, n_targets);\n      CHECK_EQ(grad.gpair.Shape(1), n_targets);\n      auto args = Args{{\"learning_rate\", std::to_string(eta)}};\n\n      BuildTree(ctx, Xy.get(), &grad, name, args, tree.get());\n\n      CHECK_EQ(tree->NumTargets(), n_targets);\n      if (n_targets > 1) {\n        CHECK(tree->IsMultiTarget());\n      }\n      return tree;\n    };\n\n    auto eta_ratio = 8.0f;\n    auto p_tree0 = gen_tree(0.1f);\n    auto p_tree1 = gen_tree(0.1f * eta_ratio);\n    // Just to make sure we are not testing a stump.\n    CHECK_GE(p_tree0->NumExtraNodes(), 32);\n\n    bst_node_t n_nodes{0};\n    tree::WalkTree(\n        *p_tree0,\n        [&](auto const& tree0, auto const& tree1, bst_node_t nidx) {\n          if (tree0.IsLeaf(nidx)) {\n            CHECK(tree1.IsLeaf(nidx));\n            if (p_tree0->IsMultiTarget()) {\n              CHECK(p_tree1->IsMultiTarget());\n              auto leaf_0 = p_tree0->GetMultiTargetTree()->LeafValue(nidx);\n              auto leaf_1 = p_tree1->GetMultiTargetTree()->LeafValue(nidx);\n              CHECK_EQ(leaf_0.Size(), leaf_1.Size());\n              for (std::size_t i = 0; i < leaf_0.Size(); ++i) {\n                CHECK_EQ(leaf_0(i) * eta_ratio, leaf_1(i));\n              }\n              CHECK_EQ(DftBadValue(), tree0.SplitCond(nidx));\n              CHECK_EQ(DftBadValue(), tree1.SplitCond(nidx));\n            } else {\n              // NON-mt tree reuses split cond for leaf value.\n              auto leaf_0 = tree0.SplitCond(nidx);\n              auto leaf_1 = tree1.SplitCond(nidx);\n              CHECK_EQ(leaf_0 * eta_ratio, leaf_1);\n            }\n          } else {\n            CHECK(!tree1.IsLeaf(nidx));\n            CHECK_EQ(tree0.SplitCond(nidx), tree1.SplitCond(nidx));\n          }\n          n_nodes++;\n          return true;\n        },\n        *p_tree1);\n    ASSERT_EQ(n_nodes, p_tree0->NumExtraNodes() + 1);\n  }\n};\n\nTEST_F(TestSplitWithEta, MultiHist) {\n  Context ctx;\n  bst_target_t n_targets{3};\n  this->Run(&ctx, n_targets, \"grow_quantile_histmaker\");\n}\n\nTEST_F(TestSplitWithEta, Hist) {\n  Context ctx;\n  bst_target_t n_targets{1};\n  this->Run(&ctx, n_targets, \"grow_quantile_histmaker\");\n}\n\nTEST_F(TestSplitWithEta, Approx) {\n  Context ctx;\n  bst_target_t n_targets{1};\n  this->Run(&ctx, n_targets, \"grow_histmaker\");\n}\n\nTEST_F(TestSplitWithEta, Exact) {\n  Context ctx;\n  bst_target_t n_targets{1};\n  this->Run(&ctx, n_targets, \"grow_colmaker\");\n}\n\n#if defined(XGBOOST_USE_CUDA)\nTEST_F(TestSplitWithEta, GpuHist) {\n  auto ctx = MakeCUDACtx(0);\n  bst_target_t n_targets{1};\n  this->Run(&ctx, n_targets, \"grow_gpu_hist\");\n}\n\nTEST_F(TestSplitWithEta, GpuMultiHist) {\n  auto ctx = MakeCUDACtx(0);\n  bst_target_t n_targets{3};\n  this->Run(&ctx, n_targets, \"grow_gpu_hist\");\n}\n\nTEST_F(TestSplitWithEta, GpuApprox) {\n  auto ctx = MakeCUDACtx(0);\n  bst_target_t n_targets{1};\n  this->Run(&ctx, n_targets, \"grow_gpu_approx\");\n}\n#endif  // defined(XGBOOST_USE_CUDA)\n\nclass TestMinSplitLoss : public ::testing::Test {\n  std::shared_ptr<DMatrix> p_fmat_;\n  GradientContainer gpair_;\n\n  void SynthesizeData(bst_target_t n_targets) {\n    constexpr size_t kRows = 32;\n    constexpr size_t kCols = 16;\n    constexpr float kSparsity = 0.6;\n    p_fmat_ =\n        RandomDataGenerator(kRows, kCols, kSparsity).Seed(3).Targets(n_targets).GenerateDMatrix();\n    Context ctx;\n    gpair_ = GenerateRandomGradients(&ctx, kRows, n_targets);\n  }\n\n  bst_node_t Update(Context const* ctx, std::string updater, float gamma) {\n    Args args{{\"max_depth\", \"1\"},\n              {\"max_leaves\", \"0\"},\n\n              // Disable all other parameters.\n              {\"colsample_bynode\", \"1\"},\n              {\"colsample_bylevel\", \"1\"},\n              {\"colsample_bytree\", \"1\"},\n              {\"min_child_weight\", \"0.01\"},\n              {\"reg_alpha\", \"0\"},\n              {\"reg_lambda\", \"0\"},\n              {\"max_delta_step\", \"0\"},\n\n              // test gamma\n              {\"gamma\", std::to_string(gamma)}};\n\n    RegTree tree{static_cast<bst_target_t>(this->gpair_.gpair.Shape(1)),\n                 static_cast<bst_target_t>(this->p_fmat_->Info().num_col_)};\n\n    BuildTree(ctx, p_fmat_.get(), &gpair_, updater, args, &tree);\n    auto n_nodes = tree.NumExtraNodes();\n    return n_nodes;\n  }\n\n public:\n  void RunTest(Context const* ctx, std::string updater, bst_target_t n_targets) {\n    this->SynthesizeData(n_targets);\n\n    {\n      bst_node_t n_nodes = this->Update(ctx, updater, 0.01);\n      // This is not strictly verified, meaning the number `2` is whatever GPU_Hist retured\n      // when writing this test, and only used for testing larger gamma (below) does prevent\n      // building tree.\n      ASSERT_EQ(n_nodes, 2);\n    }\n    {\n      bst_node_t n_nodes = this->Update(ctx, updater, 100.0);\n      // No new nodes with gamma == 100.\n      ASSERT_EQ(n_nodes, static_cast<decltype(n_nodes)>(0));\n    }\n  }\n};\n\n/* Exact tree method requires a pruner as an additional updater, so not tested here. */\n\nTEST_F(TestMinSplitLoss, Approx) {\n  Context ctx;\n  this->RunTest(&ctx, \"grow_histmaker\", 1u);\n}\n\nTEST_F(TestMinSplitLoss, Hist) {\n  Context ctx;\n  this->RunTest(&ctx, \"grow_quantile_histmaker\", 1u);\n}\n\nTEST_F(TestMinSplitLoss, MultiHist) {\n  Context ctx;\n  this->RunTest(&ctx, \"grow_quantile_histmaker\", 2u);\n}\n\n#if defined(XGBOOST_USE_CUDA)\nTEST_F(TestMinSplitLoss, GpuHist) {\n  auto ctx = MakeCUDACtx(0);\n  this->RunTest(&ctx, \"grow_gpu_hist\", 1u);\n}\n\nTEST_F(TestMinSplitLoss, GpuMultiHist) {\n  auto ctx = MakeCUDACtx(0);\n  this->RunTest(&ctx, \"grow_gpu_hist\", 2u);\n}\n\nTEST_F(TestMinSplitLoss, GpuApprox) {\n  auto ctx = MakeCUDACtx(0);\n  this->RunTest(&ctx, \"grow_gpu_approx\", 1u);\n}\n#endif  // defined(XGBOOST_USE_CUDA)\n\nclass TestRegularization : public ::testing::Test {\n public:\n  void Run(Context const* ctx, std::string const& updater, std::string p, bst_target_t n_targets) {\n    bst_idx_t n_samples = 4096;\n    bst_feature_t n_features = 32;\n    auto p_fmat = RandomDataGenerator(n_samples, n_features, .0f)\n                      .Seed(3)\n                      .Targets(n_targets)\n                      .GenerateDMatrix(true);\n    auto gpairs = GenerateRandomGradients(ctx, n_samples, n_targets);\n\n    RegTree tree_0{static_cast<bst_target_t>(gpairs.gpair.Shape(1)),\n                   static_cast<bst_target_t>(p_fmat->Info().num_col_)};\n    BuildTree(ctx, p_fmat.get(), &gpairs, updater, Args{{p, \"0.0\"}}, &tree_0);\n    // not exact, just checking the tree can be built\n    if (n_targets > 1) {\n      ASSERT_GE(tree_0.NumNodes(), 40);\n    } else {\n      ASSERT_GE(tree_0.NumNodes(), 50);\n    }\n\n    RegTree tree_1{static_cast<bst_target_t>(gpairs.gpair.Shape(1)),\n                   static_cast<bst_target_t>(p_fmat->Info().num_col_)};\n    BuildTree(ctx, p_fmat.get(), &gpairs, updater, Args{{p, \"1024.0\"}}, &tree_1);\n    ASSERT_EQ(tree_1.NumNodes(), 1);\n  }\n};\n\nclass TestLambda : public TestRegularization {\n public:\n  void RunTest(Context const* ctx, std::string const& updater, bst_target_t n_targets) {\n    this->Run(ctx, updater, \"lambda\", n_targets);\n  }\n};\n\nTEST_F(TestLambda, Hist) {\n  Context ctx;\n  this->RunTest(&ctx, \"grow_quantile_histmaker\", 1u);\n}\n\nTEST_F(TestLambda, MultiHist) {\n  Context ctx;\n  this->RunTest(&ctx, \"grow_quantile_histmaker\", 3u);\n}\n\nTEST_F(TestLambda, Approx) {\n  Context ctx;\n  this->RunTest(&ctx, \"grow_histmaker\", 1u);\n}\n\n#if defined(XGBOOST_USE_CUDA)\nTEST_F(TestLambda, GpuHist) {\n  auto ctx = MakeCUDACtx(0);\n  this->RunTest(&ctx, \"grow_gpu_hist\", 1u);\n}\n\nTEST_F(TestLambda, GpuMultiHist) {\n  auto ctx = MakeCUDACtx(0);\n  this->RunTest(&ctx, \"grow_gpu_hist\", 3u);\n}\n\nTEST_F(TestLambda, GpuApprox) {\n  auto ctx = MakeCUDACtx(0);\n  this->RunTest(&ctx, \"grow_gpu_approx\", 1u);\n}\n#endif  // defined(XGBOOST_USE_CUDA)\n\nclass TestAlpha : public TestRegularization {\n public:\n  void RunTest(Context const* ctx, std::string const& updater, bst_target_t n_targets) {\n    this->Run(ctx, updater, \"alpha\", n_targets);\n  }\n};\n\nTEST_F(TestAlpha, Hist) {\n  Context ctx;\n  this->RunTest(&ctx, \"grow_quantile_histmaker\", 1u);\n}\n\nTEST_F(TestAlpha, MultiHist) {\n  Context ctx;\n  this->RunTest(&ctx, \"grow_quantile_histmaker\", 3u);\n}\n\nTEST_F(TestAlpha, Approx) {\n  Context ctx;\n  this->RunTest(&ctx, \"grow_histmaker\", 1u);\n}\n\n#if defined(XGBOOST_USE_CUDA)\nTEST_F(TestAlpha, GpuHist) {\n  auto ctx = MakeCUDACtx(0);\n  this->RunTest(&ctx, \"grow_gpu_hist\", 1u);\n}\n\nTEST_F(TestAlpha, GpuMultiHist) {\n  auto ctx = MakeCUDACtx(0);\n  this->RunTest(&ctx, \"grow_gpu_hist\", 3u);\n}\n\nTEST_F(TestAlpha, GpuApprox) {\n  auto ctx = MakeCUDACtx(0);\n  this->RunTest(&ctx, \"grow_gpu_approx\", 1u);\n}\n#endif  // defined(XGBOOST_USE_CUDA)\n\nclass TestMaxDeltaStep : public ::testing::Test {\n public:\n  void RunTest(Context const* ctx, std::string const& updater, bst_target_t n_targets) {\n    bst_idx_t n_samples = 4096;\n    bst_feature_t n_features = 32;\n    auto p_fmat = RandomDataGenerator(n_samples, n_features, .0f)\n                      .Seed(3)\n                      .Targets(n_targets)\n                      .GenerateDMatrix(true);\n    auto gpairs = GenerateRandomGradients(ctx, n_samples, n_targets);\n\n    RegTree tree_0{static_cast<bst_target_t>(gpairs.gpair.Shape(1)),\n                   static_cast<bst_target_t>(p_fmat->Info().num_col_)};\n    BuildTree(ctx, p_fmat.get(), &gpairs, updater, Args{{\"max_delta_step\", std::to_string(0.5)}}, &tree_0);\n    ASSERT_EQ(tree_0.NumNodes(), 1);\n  }\n};\n\nTEST_F(TestMaxDeltaStep, Hist) {\n  Context ctx;\n  this->RunTest(&ctx, \"grow_quantile_histmaker\", 1u);\n}\n\nTEST_F(TestMaxDeltaStep, MultiHist) {\n  Context ctx;\n  this->RunTest(&ctx, \"grow_quantile_histmaker\", 3u);\n}\n\nTEST_F(TestMaxDeltaStep, Approx) {\n  Context ctx;\n  this->RunTest(&ctx, \"grow_histmaker\", 1u);\n}\n\n#if defined(XGBOOST_USE_CUDA)\nTEST_F(TestMaxDeltaStep, GpuHist) {\n  auto ctx = MakeCUDACtx(0);\n  this->RunTest(&ctx, \"grow_gpu_hist\", 1u);\n}\n\nTEST_F(TestMaxDeltaStep, GpuMultiHist) {\n  auto ctx = MakeCUDACtx(0);\n  this->RunTest(&ctx, \"grow_gpu_hist\", 3u);\n}\n\nTEST_F(TestMaxDeltaStep, GpuApprox) {\n  auto ctx = MakeCUDACtx(0);\n  this->RunTest(&ctx, \"grow_gpu_approx\", 1u);\n}\n#endif  // defined(XGBOOST_USE_CUDA)\n}  // namespace xgboost\n"
  },
  {
    "path": "tests/cross-platform/test_cross_platform_model.py",
    "content": "\"\"\"Cross-platform model test: Train on GPU (Linux), test inference on macOS.\"\"\"\n\nimport argparse\nimport pickle\nimport sys\nfrom pathlib import Path\nfrom typing import Tuple\n\nimport numpy as np\nimport xgboost as xgb\nfrom sklearn.datasets import make_classification\nfrom sklearn.metrics import accuracy_score\n\nSEED = 2026\n\n\ndef _pickle_path(model_path: str) -> Path:\n    return Path(model_path).with_suffix(\".pkl\")\n\n\ndef get_data() -> Tuple[np.ndarray, np.ndarray]:\n    \"\"\"Generate reproducible synthetic classification data.\"\"\"\n    X, y = make_classification(\n        n_samples=1000,\n        n_features=20,\n        n_informative=10,\n        n_classes=3,\n        n_clusters_per_class=1,\n        random_state=SEED,\n    )\n    return X.astype(np.float32), y.astype(np.int32)\n\n\ndef train_model(model_path: str) -> None:\n    \"\"\"Train models using GPU and save them (binary + pickle with column sampling).\"\"\"\n    X, y = get_data()\n\n    clf = xgb.XGBClassifier(\n        device=\"cuda\",\n        n_estimators=50,\n        max_depth=6,\n        learning_rate=0.3,\n        random_state=SEED,\n        colsample_bynode=0.8,\n    )\n    clf.fit(X, y)\n\n    accuracy = accuracy_score(y, clf.predict(X))\n    clf.get_booster().set_attr(expected_accuracy=str(accuracy))\n    clf.save_model(model_path)\n\n    with open(_pickle_path(model_path), \"wb\") as fd:\n        pickle.dump(clf.get_booster(), fd)\n\n\ndef test_inference(model_path: str) -> None:\n    \"\"\"Load models and verify predictions match (binary + pickle).\"\"\"\n    X, y = get_data()\n\n    clf = xgb.XGBClassifier()\n    clf.load_model(model_path)\n\n    accuracy = accuracy_score(y, clf.predict(X))\n    ea = clf.get_booster().attr(\"expected_accuracy\")\n    assert ea is not None\n    expected_accuracy = float(ea)\n    np.testing.assert_allclose(accuracy, expected_accuracy)\n\n    with open(_pickle_path(model_path), \"rb\") as f:\n        booster = pickle.load(f)\n\n    clf = xgb.XGBClassifier(n_estimators=2)\n    clf.fit(X, y, xgb_model=booster)\n\n\ndef main() -> int:\n    \"\"\"Entry for both training and inference.\"\"\"\n    parser = argparse.ArgumentParser(description=\"Cross-platform XGBoost model test.\")\n    group = parser.add_mutually_exclusive_group(required=True)\n    group.add_argument(\"--train\", action=\"store_true\", help=\"Train models using GPU\")\n    group.add_argument(\"--inference\", action=\"store_true\", help=\"Test inference\")\n    parser.add_argument(\n        \"--model-path\",\n        type=str,\n        default=\"cross_platform_model.ubj\",\n        help=\"Path to model file (pickle path is derived by replacing extension)\",\n    )\n\n    args = parser.parse_args()\n\n    if args.train:\n        train_model(args.model_path)\n    else:\n        test_inference(args.model_path)\n\n    return 0\n\n\nif __name__ == \"__main__\":\n    sys.exit(main())\n"
  },
  {
    "path": "tests/pytest.ini",
    "content": "[pytest]\nmarkers =\n    mgpu: Mark a test that requires multiple GPUs to run.\n    ci: Mark a test that runs only on CI.\n"
  },
  {
    "path": "tests/python/generate_models.py",
    "content": "import os\n\nimport numpy as np\nimport xgboost\nfrom sklearn.datasets import make_classification\nfrom xgboost.testing import make_categorical, make_ltr\n\nkRounds = 4\nkRows = 1000\nkCols = 4\nkForests = 2\nkMaxDepth = 3\nkClasses = 3\n\n\nversion = xgboost.__version__\n\ntarget_dir = \"models\"\n\n\ndef booster_ubj(model: str) -> str:\n    return os.path.join(target_dir, \"xgboost-\" + version + \".\" + model + \".ubj\")\n\n\ndef booster_json(model: str) -> str:\n    return os.path.join(target_dir, \"xgboost-\" + version + \".\" + model + \".json\")\n\n\ndef skl_ubj(model: str) -> str:\n    return os.path.join(target_dir, \"xgboost_scikit-\" + version + \".\" + model + \".ubj\")\n\n\ndef skl_json(model: str) -> str:\n    return os.path.join(target_dir, \"xgboost_scikit-\" + version + \".\" + model + \".json\")\n\n\ndef generate_regression_model() -> None:\n    print(\"Regression\")\n    X, y = make_categorical(\n        n_samples=kRows, n_features=kCols, n_categories=16, onehot=False, cat_ratio=0.5\n    )\n    w = np.random.default_rng(2025).uniform(size=X.shape[0])\n    data = xgboost.DMatrix(X, label=y, weight=w)\n    booster = xgboost.train(\n        {\n            \"tree_method\": \"hist\",\n            \"num_parallel_tree\": kForests,\n            \"max_depth\": kMaxDepth,\n            \"base_score\": 0.5,\n        },\n        num_boost_round=kRounds,\n        dtrain=data,\n    )\n    booster.save_model(booster_ubj(\"reg\"))\n    booster.save_model(booster_json(\"reg\"))\n\n    reg = xgboost.XGBRegressor(\n        tree_method=\"hist\",\n        num_parallel_tree=kForests,\n        max_depth=kMaxDepth,\n        n_estimators=kRounds,\n        base_score=0.5,\n    )\n    reg.fit(X, y, sample_weight=w)\n    reg.save_model(skl_ubj(\"reg\"))\n    reg.save_model(skl_json(\"reg\"))\n\n\ndef generate_logistic_model() -> None:\n    print(\"Logistic\")\n    X, y = make_classification(n_samples=kRows, n_features=kCols, random_state=2025)\n    assert y.max() == 1 and y.min() == 0\n    w = np.random.default_rng(2025).uniform(size=X.shape[0])\n\n    for objective, name in [\n        (\"binary:logistic\", \"logit\"),\n        (\"binary:logitraw\", \"logitraw\"),\n    ]:\n        data = xgboost.DMatrix(X, label=y, weight=w)\n        booster = xgboost.train(\n            {\n                \"tree_method\": \"hist\",\n                \"num_parallel_tree\": kForests,\n                \"max_depth\": kMaxDepth,\n                \"objective\": objective,\n                \"base_score\": 0.5,\n            },\n            num_boost_round=kRounds,\n            dtrain=data,\n        )\n        booster.save_model(booster_ubj(name))\n        booster.save_model(booster_json(name))\n\n        reg = xgboost.XGBClassifier(\n            tree_method=\"hist\",\n            num_parallel_tree=kForests,\n            max_depth=kMaxDepth,\n            n_estimators=kRounds,\n            objective=objective,\n            base_score=0.5,\n        )\n        reg.fit(X, y, sample_weight=w)\n        reg.save_model(skl_ubj(name))\n        reg.save_model(skl_json(name))\n\n\ndef generate_classification_model() -> None:\n    print(\"Classification\")\n    X, y = make_classification(\n        n_samples=kRows,\n        n_features=kCols,\n        random_state=2025,\n        n_classes=kClasses,\n        n_informative=4,\n        n_redundant=0,\n    )\n    w = np.random.default_rng(2025).uniform(size=X.shape[0])\n\n    data = xgboost.DMatrix(X, label=y, weight=w)\n    booster = xgboost.train(\n        {\n            \"num_class\": kClasses,\n            \"tree_method\": \"hist\",\n            \"num_parallel_tree\": kForests,\n            \"max_depth\": kMaxDepth,\n        },\n        num_boost_round=kRounds,\n        dtrain=data,\n    )\n    booster.save_model(booster_ubj(\"cls\"))\n    booster.save_model(booster_json(\"cls\"))\n\n    cls = xgboost.XGBClassifier(\n        tree_method=\"hist\",\n        num_parallel_tree=kForests,\n        max_depth=kMaxDepth,\n        n_estimators=kRounds,\n    )\n    cls.fit(X, y, sample_weight=w)\n    cls.save_model(skl_ubj(\"cls\"))\n    cls.save_model(skl_json(\"cls\"))\n\n\ndef generate_ranking_model() -> None:\n    print(\"Learning to Rank\")\n    X, y, qid, w = make_ltr(\n        n_samples=kRows, n_features=kCols, n_query_groups=7, max_rel=3\n    )\n\n    data = xgboost.DMatrix(X, y, weight=w, qid=qid)\n    booster = xgboost.train(\n        {\n            \"objective\": \"rank:ndcg\",\n            \"num_parallel_tree\": kForests,\n            \"tree_method\": \"hist\",\n            \"max_depth\": kMaxDepth,\n            \"base_score\": 0.5,\n        },\n        num_boost_round=kRounds,\n        dtrain=data,\n    )\n    booster.save_model(booster_ubj(\"ltr\"))\n    booster.save_model(booster_json(\"ltr\"))\n\n    ranker = xgboost.sklearn.XGBRanker(\n        n_estimators=kRounds,\n        tree_method=\"hist\",\n        objective=\"rank:ndcg\",\n        max_depth=kMaxDepth,\n        num_parallel_tree=kForests,\n        base_score=0.5,\n    )\n    ranker.fit(X, y, qid=qid, sample_weight=w)\n    ranker.save_model(skl_ubj(\"ltr\"))\n    ranker.save_model(skl_json(\"ltr\"))\n\n\ndef generate_aft_survival_models() -> None:\n    print(\"AFT Survival\")\n    X, y_lower = make_categorical(\n        n_samples=kRows, n_features=kCols, n_categories=16, onehot=False, cat_ratio=0.5\n    )\n    w = np.random.default_rng(2025).uniform(size=X.shape[0])\n    y_upper = y_lower + np.mean(y_lower) + w\n    data = xgboost.QuantileDMatrix(\n        X, label_lower_bound=y_lower, label_upper_bound=y_upper\n    )\n    params = {\n        \"num_parallel_tree\": kForests,\n        \"tree_method\": \"hist\",\n        \"max_depth\": kMaxDepth,\n        \"objective\": \"survival:aft\",\n        \"aft_loss_distribution\": \"normal\",\n        \"base_score\": 0.5,\n    }\n    booster = xgboost.train(params, num_boost_round=kRounds, dtrain=data)\n    booster.save_model(booster_ubj(\"aft\"))\n    booster.save_model(booster_json(\"aft\"))\n\n\nif __name__ == \"__main__\":\n    if not os.path.exists(target_dir):\n        os.mkdir(target_dir)\n\n    generate_regression_model()\n    generate_logistic_model()\n    generate_classification_model()\n    generate_ranking_model()\n    generate_aft_survival_models()\n"
  },
  {
    "path": "tests/python/test_basic.py",
    "content": "import json\nfrom pathlib import Path\n\nimport numpy as np\nimport pytest\nimport xgboost as xgb\nfrom xgboost import testing as tm\nfrom xgboost._c_api import _parse_version\n\ndpath = \"demo/data/\"\nrng = np.random.RandomState(1994)\n\n\nclass TestBasic:\n    def test_compat(self):\n        from xgboost.compat import lazy_isinstance\n\n        a = np.array([1, 2, 3])\n        assert lazy_isinstance(a, \"numpy\", \"ndarray\")\n        assert not lazy_isinstance(a, \"numpy\", \"dataframe\")\n\n    def test_basic(self, tmp_path: Path) -> None:\n        dtrain, dtest = tm.load_agaricus(__file__)\n        param = {\"max_depth\": 2, \"eta\": 1, \"objective\": \"binary:logistic\"}\n        # specify validations set to watch performance\n        watchlist = [(dtrain, \"train\")]\n        num_round = 2\n        bst = xgb.train(param, dtrain, num_round, evals=watchlist, verbose_eval=True)\n\n        preds = bst.predict(dtrain)\n        labels = dtrain.get_label()\n        err = sum(\n            1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]\n        ) / float(len(preds))\n        # error must be smaller than 10%\n        assert err < 0.1\n\n        preds = bst.predict(dtest)\n        labels = dtest.get_label()\n        err = sum(\n            1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]\n        ) / float(len(preds))\n        # error must be smaller than 10%\n        assert err < 0.1\n\n        dtest_path = tmp_path / \"dtest.dmatrix\"\n        # save dmatrix into binary buffer\n        dtest.save_binary(dtest_path)\n        # save model\n        model_path = tmp_path / \"model.ubj\"\n        bst.save_model(model_path)\n        # load model and data in\n        bst2 = xgb.Booster(model_file=model_path)\n        dtest2 = xgb.DMatrix(dtest_path)\n        preds2 = bst2.predict(dtest2)\n        # assert they are the same\n        assert np.sum(np.abs(preds2 - preds)) == 0\n\n    def test_metric_config(self, tmp_path: Path) -> None:\n        # Make sure that the metric configuration happens in booster so the string\n        # `['error', 'auc']` doesn't get passed down to core.\n        dtrain, dtest = tm.load_agaricus(__file__)\n        param = {\n            \"max_depth\": 2,\n            \"eta\": 1,\n            \"objective\": \"binary:logistic\",\n            \"eval_metric\": [\"error\", \"auc\"],\n        }\n        watchlist = [(dtest, \"eval\"), (dtrain, \"train\")]\n        num_round = 2\n        booster = xgb.train(param, dtrain, num_round, evals=watchlist)\n        predt_0 = booster.predict(dtrain)\n        path = tmp_path / \"model.json\"\n        booster.save_model(path)\n\n        booster = xgb.Booster(params=param, model_file=path)\n        predt_1 = booster.predict(dtrain)\n        np.testing.assert_allclose(predt_0, predt_1)\n\n    def test_multiclass(self, tmp_path: Path) -> None:\n        dtrain, dtest = tm.load_agaricus(__file__)\n        param = {\"max_depth\": 2, \"eta\": 1, \"num_class\": 2}\n        # specify validations set to watch performance\n        watchlist = [(dtest, \"eval\"), (dtrain, \"train\")]\n        num_round = 2\n        bst = xgb.train(param, dtrain, num_round, evals=watchlist)\n        # this is prediction\n        preds = bst.predict(dtest)\n        labels = dtest.get_label()\n        err = sum(1 for i in range(len(preds)) if preds[i] != labels[i]) / float(\n            len(preds)\n        )\n        # error must be smaller than 10%\n        assert err < 0.1\n\n        dtest_path = tmp_path / \"dtest.buffer\"\n        model_path = tmp_path / \"model.ubj\"\n        # save dmatrix into binary buffer\n        dtest.save_binary(dtest_path)\n        # save model\n        bst.save_model(model_path)\n        # load model and data in\n        bst2 = xgb.Booster(model_file=model_path)\n        dtest2 = xgb.DMatrix(dtest_path)\n        preds2 = bst2.predict(dtest2)\n        # assert they are the same\n        assert np.sum(np.abs(preds2 - preds)) == 0\n\n    def test_dump(self):\n        data = np.random.randn(100, 2)\n        target = np.array([0, 1] * 50)\n        features = [\"Feature1\", \"Feature2\"]\n\n        dm = xgb.DMatrix(data, label=target, feature_names=features)\n        params = {\n            \"objective\": \"binary:logistic\",\n            \"eval_metric\": \"logloss\",\n            \"eta\": 0.3,\n            \"max_depth\": 1,\n        }\n\n        bst = xgb.train(params, dm, num_boost_round=1)\n\n        # number of feature importances should == number of features\n        dump1 = bst.get_dump()\n        assert len(dump1) == 1, \"Expected only 1 tree to be dumped.\"\n        assert len(dump1[0].splitlines()) == 3, (\n            \"Expected 1 root and 2 leaves - 3 lines in dump.\"\n        )\n\n        dump2 = bst.get_dump(with_stats=True)\n        assert dump2[0].count(\"\\n\") == 3, (\n            \"Expected 1 root and 2 leaves - 3 lines in dump.\"\n        )\n        msg = \"Expected more info when with_stats=True is given.\"\n        assert dump2[0].find(\"\\n\") > dump1[0].find(\"\\n\"), msg\n\n        dump3 = bst.get_dump(dump_format=\"json\")\n        dump3j = json.loads(dump3[0])\n        assert dump3j[\"nodeid\"] == 0, \"Expected the root node on top.\"\n\n        dump4 = bst.get_dump(dump_format=\"json\", with_stats=True)\n        dump4j = json.loads(dump4[0])\n        assert \"gain\" in dump4j, \"Expected 'gain' to be dumped in JSON.\"\n\n        with pytest.raises(ValueError):\n            bst.get_dump(fmap=\"foo\")\n\n    def test_feature_score(self):\n        rng = np.random.RandomState(0)\n        data = rng.randn(100, 2)\n        target = np.array([0, 1] * 50)\n        features = [\"F0\"]\n        with pytest.raises(ValueError):\n            xgb.DMatrix(data, label=target, feature_names=features)\n\n        params = {\"objective\": \"binary:logistic\"}\n        dm = xgb.DMatrix(data, label=target, feature_names=[\"F0\", \"F1\"])\n        booster = xgb.train(params, dm, num_boost_round=1)\n        # no error since feature names might be assigned before the booster seeing data\n        # and booster doesn't known about the actual number of features.\n        booster.feature_names = [\"F0\"]\n        with pytest.raises(ValueError):\n            booster.get_fscore()\n\n        booster.feature_names = None\n        # Use JSON to make sure the output has native Python type\n        scores = json.loads(json.dumps(booster.get_fscore()))\n        np.testing.assert_allclose(scores[\"f0\"], 6.0)\n\n    def test_load_file_invalid(self):\n        with pytest.raises(xgb.core.XGBoostError):\n            xgb.Booster(model_file=\"incorrect_path\")\n\n        with pytest.raises(xgb.core.XGBoostError):\n            xgb.Booster(model_file=\"不正なパス\")\n\n    @pytest.mark.parametrize(\n        \"path\", [\"모델.ubj\", \"がうる・ぐら.json\"], ids=[\"path-0\", \"path-1\"]\n    )\n    def test_unicode_path(self, tmp_path: Path, path: str) -> None:\n        model_path = tmp_path / path\n        dtrain, _ = tm.load_agaricus(__file__)\n        param = {\"max_depth\": 2, \"eta\": 1, \"objective\": \"binary:logistic\"}\n        bst = xgb.train(param, dtrain, num_boost_round=2)\n        bst.save_model(model_path)\n\n        bst2 = xgb.Booster(model_file=model_path)\n        assert bst.get_dump(dump_format=\"text\") == bst2.get_dump(dump_format=\"text\")\n\n    def test_dmatrix_numpy_init_omp(self):\n        rows = [1000, 11326, 15000]\n        cols = 50\n        for row in rows:\n            X = np.random.randn(row, cols)\n            y = np.random.randn(row).astype(\"f\")\n            dm = xgb.DMatrix(X, y, nthread=0)\n            np.testing.assert_array_equal(dm.get_label(), y)\n            assert dm.num_row() == row\n            assert dm.num_col() == cols\n\n            dm = xgb.DMatrix(X, y, nthread=10)\n            np.testing.assert_array_equal(dm.get_label(), y)\n            assert dm.num_row() == row\n            assert dm.num_col() == cols\n\n    def test_cv(self):\n        dm, _ = tm.load_agaricus(__file__)\n        params = {\"max_depth\": 2, \"eta\": 1, \"objective\": \"binary:logistic\"}\n\n        # return np.ndarray\n        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=False)\n        assert isinstance(cv, dict)\n        assert len(cv) == (4)\n\n    def test_cv_no_shuffle(self):\n        dm, _ = tm.load_agaricus(__file__)\n        params = {\"max_depth\": 2, \"eta\": 1, \"objective\": \"binary:logistic\"}\n\n        # return np.ndarray\n        cv = xgb.cv(\n            params, dm, num_boost_round=10, shuffle=False, nfold=10, as_pandas=False\n        )\n        assert isinstance(cv, dict)\n        assert len(cv) == (4)\n\n    def test_cv_explicit_fold_indices(self):\n        dm, _ = tm.load_agaricus(__file__)\n        params = {\"max_depth\": 2, \"eta\": 1, \"objective\": \"binary:logistic\"}\n        folds = [\n            # Train        Test\n            ([1, 3], [5, 8]),\n            ([7, 9], [23, 43]),\n        ]\n\n        # return np.ndarray\n        cv = xgb.cv(params, dm, num_boost_round=10, folds=folds, as_pandas=False)\n        assert isinstance(cv, dict)\n        assert len(cv) == (4)\n\n    def test_cv_explicit_fold_indices_labels(self):\n        params = {\"max_depth\": 2, \"eta\": 1, \"objective\": \"reg:squarederror\"}\n        N = 100\n        F = 3\n        dm = xgb.DMatrix(data=np.random.randn(N, F), label=np.arange(N))\n        folds = [\n            # Train        Test\n            ([1, 3], [5, 8]),\n            ([7, 9], [23, 43, 11]),\n        ]\n\n        # Use callback to log the test labels in each fold\n        class Callback(xgb.callback.TrainingCallback):\n            def __init__(self) -> None:\n                super().__init__()\n\n            def after_iteration(\n                self,\n                model,\n                epoch: int,\n                evals_log: xgb.callback.TrainingCallback.EvalsLog,\n            ):\n                print([fold.dtest.get_label() for fold in model.cvfolds])\n\n        cb = Callback()\n\n        # Run cross validation and capture standard out to test callback result\n        with tm.captured_output() as (out, err):\n            xgb.cv(\n                params,\n                dm,\n                num_boost_round=1,\n                folds=folds,\n                callbacks=[cb],\n                as_pandas=False,\n            )\n            output = out.getvalue().strip()\n        solution = (\n            \"[array([5., 8.], dtype=float32), array([23., 43., 11.],\"\n            + \" dtype=float32)]\"\n        )\n        assert output == solution\n\n\nclass TestBasicPathLike:\n    \"\"\"Unit tests using pathlib.Path for file interaction.\"\"\"\n\n    def test_DMatrix_init_from_path(self):\n        \"\"\"Initialization from the data path.\"\"\"\n        dtrain, _ = tm.load_agaricus(__file__)\n        assert dtrain.num_row() == 6513\n        assert dtrain.num_col() == 127\n\n    def test_DMatrix_save_to_path(self, tmp_path: Path) -> None:\n        \"\"\"Saving to a binary file using pathlib from a DMatrix.\"\"\"\n        data = np.random.randn(100, 2)\n        target = np.array([0, 1] * 50)\n        features = [\"Feature1\", \"Feature2\"]\n\n        dm = xgb.DMatrix(data, label=target, feature_names=features)\n\n        binary_path = tmp_path / \"dtrain.bin\"\n        dm.save_binary(binary_path)\n        assert binary_path.exists()\n\n    def test_Booster_init_invalid_path(self):\n        \"\"\"An invalid model_file path should raise XGBoostError.\"\"\"\n        with pytest.raises(xgb.core.XGBoostError):\n            xgb.Booster(model_file=Path(\"invalidpath\"))\n\n\ndef test_parse_ver() -> None:\n    (major, minor, patch), post = _parse_version(\"2.1.0\")\n    assert post == \"\"\n    (major, minor, patch), post = _parse_version(\"2.1.0-dev\")\n    assert post == \"dev\"\n    (major, minor, patch), post = _parse_version(\"2.1.0rc1\")\n    assert post == \"rc1\"\n    (major, minor, patch), post = _parse_version(\"2.1.0.post1\")\n    assert post == \"post1\"\n"
  },
  {
    "path": "tests/python/test_basic_models.py",
    "content": "import json\nfrom pathlib import Path\n\nimport numpy as np\nimport pytest\nimport xgboost as xgb\nfrom xgboost import testing as tm\nfrom xgboost.core import Integer\nfrom xgboost.testing.basic_models import run_custom_objective\nfrom xgboost.testing.updater import get_basescore\n\n\nclass TestModels:\n    def test_glm(self):\n        param = {\n            \"objective\": \"binary:logistic\",\n            \"booster\": \"gblinear\",\n            \"alpha\": 0.0001,\n            \"lambda\": 1,\n            \"nthread\": 1,\n        }\n        dtrain, dtest = tm.load_agaricus(__file__)\n        watchlist = [(dtest, \"eval\"), (dtrain, \"train\")]\n        num_round = 4\n        bst = xgb.train(param, dtrain, num_round, watchlist)\n        assert isinstance(bst, xgb.core.Booster)\n        preds = bst.predict(dtest)\n        labels = dtest.get_label()\n        err = sum(\n            1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]\n        ) / float(len(preds))\n        assert err < 0.2\n\n    def test_dart(self, tmp_path: Path) -> None:\n        dtrain, dtest = tm.load_agaricus(__file__)\n        param = {\n            \"max_depth\": 5,\n            \"objective\": \"binary:logistic\",\n            \"eval_metric\": \"logloss\",\n            \"booster\": \"dart\",\n            \"verbosity\": 1,\n        }\n        # specify validations set to watch performance\n        watchlist = [(dtest, \"eval\"), (dtrain, \"train\")]\n        num_round = 2\n        bst = xgb.train(param, dtrain, num_round, watchlist)\n        # this is prediction\n        preds = bst.predict(dtest, iteration_range=(0, num_round))\n        labels = dtest.get_label()\n        err = sum(\n            1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]\n        ) / float(len(preds))\n        # error must be smaller than 10%\n        assert err < 0.1\n\n        dtest_path = tmp_path / \"dtest.dmatrix\"\n        model_path = tmp_path / \"xgboost.model.dart.ubj\"\n        # save dmatrix into binary buffer\n        dtest.save_binary(dtest_path)\n        # save model\n        bst.save_model(model_path)\n        # load model and data in\n        bst2 = xgb.Booster(params=param, model_file=model_path)\n        dtest2 = xgb.DMatrix(dtest_path)\n\n        preds2 = bst2.predict(dtest2, iteration_range=(0, num_round))\n\n        # assert they are the same\n        assert np.sum(np.abs(preds2 - preds)) == 0\n\n        def my_logloss(preds, dtrain):\n            labels = dtrain.get_label()\n            return \"logloss\", np.sum(np.log(np.where(labels, preds, 1 - preds)))\n\n        # check whether custom evaluation metrics work\n        bst = xgb.train(\n            param, dtrain, num_round, evals=watchlist, custom_metric=my_logloss\n        )\n        preds3 = bst.predict(dtest, iteration_range=(0, num_round))\n        assert all(preds3 == preds)\n\n        # check whether sample_type and normalize_type work\n        num_round = 50\n        param[\"learning_rate\"] = 0.1\n        param[\"rate_drop\"] = 0.1\n        preds_list = []\n        for p in [\n            [p0, p1] for p0 in [\"uniform\", \"weighted\"] for p1 in [\"tree\", \"forest\"]\n        ]:\n            param[\"sample_type\"] = p[0]\n            param[\"normalize_type\"] = p[1]\n            bst = xgb.train(param, dtrain, num_round, evals=watchlist)\n            preds = bst.predict(dtest, iteration_range=(0, num_round))\n            err = sum(\n                1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]\n            ) / float(len(preds))\n            assert err < 0.1\n            preds_list.append(preds)\n\n        for ii in range(len(preds_list)):\n            for jj in range(ii + 1, len(preds_list)):\n                assert np.sum(np.abs(preds_list[ii] - preds_list[jj])) > 0\n\n    def test_boost_from_prediction(self):\n        # Re-construct dtrain here to avoid modification\n        margined, _ = tm.load_agaricus(__file__)\n        bst = xgb.train({\"tree_method\": \"hist\"}, margined, 1)\n        predt_0 = bst.predict(margined, output_margin=True)\n        margined.set_base_margin(predt_0)\n        bst = xgb.train({\"tree_method\": \"hist\"}, margined, 1)\n        predt_1 = bst.predict(margined)\n\n        assert np.any(np.abs(predt_1 - predt_0) > 1e-6)\n        dtrain, _ = tm.load_agaricus(__file__)\n        bst = xgb.train({\"tree_method\": \"hist\"}, dtrain, 2)\n        predt_2 = bst.predict(dtrain)\n        assert np.all(np.abs(predt_2 - predt_1) < 1e-6)\n\n    def test_boost_from_existing_model(self) -> None:\n        X, _ = tm.load_agaricus(__file__)\n        booster = xgb.train({\"tree_method\": \"hist\"}, X, num_boost_round=4)\n        assert booster.num_boosted_rounds() == 4\n        booster.set_param({\"tree_method\": \"approx\"})\n        assert booster.num_boosted_rounds() == 4\n        booster = xgb.train(\n            {\"tree_method\": \"hist\"}, X, num_boost_round=4, xgb_model=booster\n        )\n        assert booster.num_boosted_rounds() == 8\n        with pytest.warns(UserWarning, match=\"`updater`\"):\n            booster = xgb.train(\n                {\"updater\": \"prune\", \"process_type\": \"update\"},\n                X,\n                num_boost_round=4,\n                xgb_model=booster,\n            )\n        # Trees are moved for update, the rounds is reduced.  This test is\n        # written for being compatible with current code (1.0.0).  If the\n        # behaviour is considered sub-optimal, feel free to change.\n        assert booster.num_boosted_rounds() == 4\n\n        booster = xgb.train({\"booster\": \"gblinear\"}, X, num_boost_round=4)\n        assert booster.num_boosted_rounds() == 4\n        booster.set_param({\"updater\": \"coord_descent\"})\n        assert booster.num_boosted_rounds() == 4\n        booster.set_param({\"updater\": \"shotgun\"})\n        assert booster.num_boosted_rounds() == 4\n        booster = xgb.train(\n            {\"booster\": \"gblinear\"}, X, num_boost_round=4, xgb_model=booster\n        )\n        assert booster.num_boosted_rounds() == 8\n\n    def test_custom_objective(self) -> None:\n        dtrain, dtest = tm.load_agaricus(__file__)\n        run_custom_objective(\"hist\", \"cpu\", dtrain, dtest)\n\n    def test_multi_eval_metric(self) -> None:\n        dtrain, dtest = tm.load_agaricus(__file__)\n        watchlist = [(dtest, \"eval\"), (dtrain, \"train\")]\n        param = {\n            \"max_depth\": 2,\n            \"eta\": 0.2,\n            \"verbosity\": 1,\n            \"objective\": \"binary:logistic\",\n        }\n        param[\"eval_metric\"] = [\"auc\", \"logloss\", \"error\"]\n        evals_result = {}\n        bst = xgb.train(param, dtrain, 4, evals=watchlist, evals_result=evals_result)\n        assert isinstance(bst, xgb.core.Booster)\n        assert len(evals_result[\"eval\"]) == 3\n        assert set(evals_result[\"eval\"].keys()) == {\"auc\", \"error\", \"logloss\"}\n\n    def test_fpreproc(self):\n        param = {\"max_depth\": 2, \"eta\": 1, \"objective\": \"binary:logistic\"}\n        num_round = 2\n\n        def fpreproc(dtrain, dtest, param):\n            label = dtrain.get_label()\n            ratio = float(np.sum(label == 0)) / np.sum(label == 1)\n            param[\"scale_pos_weight\"] = ratio\n            return (dtrain, dtest, param)\n\n        dtrain, _ = tm.load_agaricus(__file__)\n        xgb.cv(\n            param,\n            dtrain,\n            num_round,\n            nfold=5,\n            metrics={\"auc\"},\n            seed=0,\n            fpreproc=fpreproc,\n        )\n\n    def test_show_stdv(self):\n        param = {\"max_depth\": 2, \"eta\": 1, \"objective\": \"binary:logistic\"}\n        num_round = 2\n        dtrain, _ = tm.load_agaricus(__file__)\n        xgb.cv(\n            param,\n            dtrain,\n            num_round,\n            nfold=5,\n            metrics={\"error\"},\n            seed=0,\n            show_stdv=False,\n        )\n\n    def test_prediction_cache(self, tmp_path: Path) -> None:\n        X, y = tm.make_sparse_regression(512, 4, 0.5, as_dense=False)\n        Xy = xgb.DMatrix(X, y)\n        param = {\"max_depth\": 8}\n        booster = xgb.train(param, Xy, num_boost_round=1)\n        path = tmp_path / \"model.json\"\n        booster.save_model(path)\n\n        predt_0 = booster.predict(Xy)\n\n        param[\"max_depth\"] = 2\n\n        booster = xgb.train(param, Xy, num_boost_round=1)\n        predt_1 = booster.predict(Xy)\n        assert not np.isclose(predt_0, predt_1).all()\n\n        booster.load_model(path)\n        predt_2 = booster.predict(Xy)\n        np.testing.assert_allclose(predt_0, predt_2)\n\n    def test_feature_names_validation(self):\n        X = np.random.random((10, 3))\n        y = np.random.randint(2, size=(10,))\n\n        dm1 = xgb.DMatrix(X, y, feature_names=(\"a\", \"b\", \"c\"))\n        dm2 = xgb.DMatrix(X, y)\n\n        bst = xgb.train([], dm1)\n        bst.predict(dm1)  # success\n        with pytest.raises(ValueError):\n            bst.predict(dm2)\n        bst.predict(dm1)  # success\n\n        bst = xgb.train([], dm2)\n        bst.predict(dm2)  # success\n\n    def test_special_model_dump_characters(self) -> None:\n        params = {\"objective\": \"reg:squarederror\", \"max_depth\": 3}\n        feature_names = ['\"feature 0\"', \"\\tfeature\\n1\", \"\"\"feature \"2\".\"\"\"]\n        X, y, w = tm.make_regression(n_samples=128, n_features=3, use_cupy=False)\n        Xy = xgb.DMatrix(X, label=y, feature_names=feature_names)\n        booster = xgb.train(params, Xy, num_boost_round=3)\n\n        json_dump = booster.get_dump(dump_format=\"json\")\n        assert len(json_dump) == 3\n\n        def validate_json(obj: dict) -> None:\n            for k, v in obj.items():\n                if k == \"split\":\n                    assert v in feature_names\n                elif isinstance(v, dict):\n                    validate_json(v)\n\n        for j_tree in json_dump:\n            loaded = json.loads(j_tree)\n            validate_json(loaded)\n\n        dot_dump = booster.get_dump(dump_format=\"dot\")\n        for d in dot_dump:\n            assert d.find(r\"feature \\\"2\\\"\") != -1\n\n        text_dump = booster.get_dump(dump_format=\"text\")\n        for d in text_dump:\n            assert d.find(r\"feature \\\"2\\\"\") != -1\n\n    def run_slice(\n        self,\n        booster: xgb.Booster,\n        dtrain: xgb.DMatrix,\n        num_parallel_tree: int,\n        num_classes: int,\n        num_boost_round: int,\n        use_np_type: bool,\n    ):\n        beg = 3\n        if use_np_type:\n            end: Integer = np.int32(7)\n        else:\n            end = 7\n\n        sliced: xgb.Booster = booster[beg:end]\n        assert sliced.feature_types == booster.feature_types\n\n        sliced_trees = (end - beg) * num_parallel_tree * num_classes\n        assert sliced_trees == len(sliced.get_dump())\n\n        sliced_trees = sliced_trees // 2\n        sliced = booster[beg:end:2]\n        assert sliced_trees == len(sliced.get_dump())\n\n        sliced = booster[beg:]\n        sliced_trees = (num_boost_round - beg) * num_parallel_tree * num_classes\n        assert sliced_trees == len(sliced.get_dump())\n\n        sliced = booster[beg:]\n        sliced_trees = (num_boost_round - beg) * num_parallel_tree * num_classes\n        assert sliced_trees == len(sliced.get_dump())\n\n        sliced = booster[:end]\n        sliced_trees = end * num_parallel_tree * num_classes\n        assert sliced_trees == len(sliced.get_dump())\n\n        sliced = booster[:end]\n        sliced_trees = end * num_parallel_tree * num_classes\n        assert sliced_trees == len(sliced.get_dump())\n\n        with pytest.raises(ValueError, match=r\">= 0\"):\n            booster[-1:0]\n\n        # we do not accept empty slice.\n        with pytest.raises(ValueError, match=\"Empty slice\"):\n            booster[1:1]\n        # stop can not be smaller than begin\n        with pytest.raises(ValueError, match=r\"Invalid.*\"):\n            booster[3:0]\n        with pytest.raises(ValueError, match=r\"Invalid.*\"):\n            booster[3:-1]\n        # negative step is not supported.\n        with pytest.raises(ValueError, match=r\".*>= 1.*\"):\n            booster[0:2:-1]\n        # step can not be 0.\n        with pytest.raises(ValueError, match=r\".*>= 1.*\"):\n            booster[0:2:0]\n\n        trees = [_ for _ in booster]\n        assert len(trees) == num_boost_round\n\n        with pytest.raises(TypeError):\n            booster[\"wrong type\"]  # type: ignore\n        with pytest.raises(IndexError):\n            booster[: num_boost_round + 1]\n        with pytest.raises(ValueError):\n            booster[1, 2]  # too many dims\n        # setitem is not implemented as model is immutable during slicing.\n        with pytest.raises(TypeError):\n            booster[:end] = booster  # type: ignore\n\n        sliced_0 = booster[1:3]\n        np.testing.assert_allclose(\n            booster.predict(dtrain, iteration_range=(1, 3)), sliced_0.predict(dtrain)\n        )\n        sliced_1 = booster[3:7]\n        np.testing.assert_allclose(\n            booster.predict(dtrain, iteration_range=(3, 7)), sliced_1.predict(dtrain)\n        )\n\n        predt_0 = sliced_0.predict(dtrain, output_margin=True)\n        predt_1 = sliced_1.predict(dtrain, output_margin=True)\n\n        # base score.\n        intercept = np.broadcast_to(np.array(get_basescore(booster)), predt_0.shape)\n        merged = predt_0 + predt_1 - intercept\n        single = booster[1:7].predict(dtrain, output_margin=True)\n        np.testing.assert_allclose(merged, single, atol=1e-6)\n\n        sliced_0 = booster[1:7:2]  # 1,3,5\n        sliced_1 = booster[2:8:2]  # 2,4,6\n\n        predt_0 = sliced_0.predict(dtrain, output_margin=True)\n        predt_1 = sliced_1.predict(dtrain, output_margin=True)\n\n        merged = predt_0 + predt_1 - intercept\n        single = booster[1:7].predict(dtrain, output_margin=True)\n        np.testing.assert_allclose(merged, single, atol=1e-6)\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    @pytest.mark.parametrize(\"booster_name\", [\"gbtree\", \"dart\"])\n    def test_slice(self, booster_name: str) -> None:\n        from sklearn.datasets import make_classification\n\n        num_classes = 3\n        X, y = make_classification(\n            n_samples=1000, n_informative=5, n_classes=num_classes\n        )\n        dtrain = xgb.DMatrix(data=X, label=y)\n        num_parallel_tree = 4\n        num_boost_round = 16\n        total_trees = num_parallel_tree * num_classes * num_boost_round\n        booster = xgb.train(\n            {\n                \"num_parallel_tree\": num_parallel_tree,\n                \"subsample\": 0.5,\n                \"num_class\": num_classes,\n                \"booster\": booster_name,\n                \"objective\": \"multi:softprob\",\n            },\n            num_boost_round=num_boost_round,\n            dtrain=dtrain,\n        )\n        booster.feature_types = [\"q\"] * X.shape[1]\n\n        assert len(booster.get_dump()) == total_trees\n\n        assert booster[...].num_boosted_rounds() == num_boost_round\n\n        self.run_slice(\n            booster, dtrain, num_parallel_tree, num_classes, num_boost_round, False\n        )\n\n        bytesarray = booster.save_raw(raw_format=\"ubj\")\n        booster = xgb.Booster(model_file=bytesarray)\n        self.run_slice(\n            booster, dtrain, num_parallel_tree, num_classes, num_boost_round, False\n        )\n\n    @pytest.mark.skipif(**tm.no_pandas())\n    @pytest.mark.parametrize(\"ext\", [\"json\", \"ubj\"])\n    def test_feature_info(self, ext: str, tmp_path: Path) -> None:\n        import pandas as pd\n\n        # make data\n        rows = 100\n        cols = 10\n        rng = np.random.RandomState(1994)\n        X = rng.randn(rows, cols)\n        y = rng.randn(rows)\n\n        # Test with pandas, which has feature info.\n        feature_names = [\"test_feature_\" + str(i) for i in range(cols)]\n        X_pd = pd.DataFrame(X, columns=feature_names)\n        X_pd[f\"test_feature_{3}\"] = X_pd.iloc[:, 3].astype(np.int32)\n\n        Xy = xgb.DMatrix(X_pd, y)\n        assert Xy.feature_types is not None\n        assert Xy.feature_types[3] == \"int\"\n        booster = xgb.train({}, dtrain=Xy, num_boost_round=1)\n\n        assert booster.feature_names == Xy.feature_names\n        assert booster.feature_names == feature_names\n        assert booster.feature_types == Xy.feature_types\n\n        path = tmp_path / f\"model.{ext}\"\n        booster.save_model(path)\n        booster = xgb.Booster()\n        booster.load_model(path)\n\n        assert booster.feature_names == Xy.feature_names\n        assert booster.feature_types == Xy.feature_types\n\n        # Test with numpy, no feature info is set\n        Xy = xgb.DMatrix(X, y)\n        assert Xy.feature_names is None\n        assert Xy.feature_types is None\n\n        booster = xgb.train({}, dtrain=Xy, num_boost_round=1)\n        assert booster.feature_names is None\n        assert booster.feature_types is None\n\n        # test explicitly set\n        fns = [str(i) for i in range(cols)]\n        booster.feature_names = fns\n\n        assert booster.feature_names == fns\n\n        path = tmp_path / f\"model2.{ext}\"\n        booster.save_model(path)\n\n        booster = xgb.Booster(model_file=path)\n        assert booster.feature_names == fns\n"
  },
  {
    "path": "tests/python/test_callback.py",
    "content": "from collections import namedtuple\nfrom pathlib import Path\nfrom typing import Optional, Tuple, Union\n\nimport numpy as np\nimport pytest\nimport xgboost as xgb\nfrom xgboost import testing as tm\nfrom xgboost.testing.callbacks import (\n    run_eta_decay,\n    run_eta_decay_leaf_output,\n    tree_methods_objs,\n)\n\n# We use the dataset for tests.\npytestmark = pytest.mark.skipif(**tm.no_sklearn())\n\n\nBreastCancer = namedtuple(\"BreastCancer\", [\"full\", \"tr\", \"va\"])\n\n\n@pytest.fixture\ndef breast_cancer() -> BreastCancer:\n    from sklearn.datasets import load_breast_cancer\n\n    X, y = load_breast_cancer(return_X_y=True)\n\n    split = int(X.shape[0] * 0.8)\n    return BreastCancer(\n        full=(X, y),\n        tr=(X[:split, ...], y[:split, ...]),\n        va=(X[split:, ...], y[split:, ...]),\n    )\n\n\ndef eval_error_metric(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, np.float64]:\n    # No custom objective, receive transformed output\n    return tm.eval_error_metric(predt, dtrain, rev_link=False)\n\n\nclass TestCallbacks:\n    def run_evaluation_monitor(\n        self,\n        D_train: xgb.DMatrix,\n        D_valid: xgb.DMatrix,\n        rounds: int,\n        verbose_eval: Union[bool, int],\n    ):\n        def check_output(output: str) -> None:\n            if int(verbose_eval) == 1:\n                # Should print each iteration info\n                assert len(output.split(\"\\n\")) == rounds\n            elif int(verbose_eval) > rounds:\n                # Should print first and latest iteration info\n                assert len(output.split(\"\\n\")) == 2\n            else:\n                # Should print info by each period additionaly to first and latest\n                # iteration\n                num_periods = rounds // int(verbose_eval)\n                # Extra information is required for latest iteration\n                is_extra_info_required = num_periods * int(verbose_eval) < (rounds - 1)\n                assert len(output.split(\"\\n\")) == (\n                    1 + num_periods + int(is_extra_info_required)\n                )\n\n        evals_result: xgb.callback.TrainingCallback.EvalsLog = {}\n        params = {\"objective\": \"binary:logistic\", \"eval_metric\": \"error\"}\n        with tm.captured_output() as (out, err):\n            xgb.train(\n                params,\n                D_train,\n                evals=[(D_train, \"Train\"), (D_valid, \"Valid\")],\n                num_boost_round=rounds,\n                evals_result=evals_result,\n                verbose_eval=verbose_eval,\n            )\n            output: str = out.getvalue().strip()\n            check_output(output)\n\n        with tm.captured_output() as (out, err):\n            xgb.cv(params, D_train, num_boost_round=rounds, verbose_eval=verbose_eval)\n            output = out.getvalue().strip()\n            check_output(output)\n\n    def test_evaluation_monitor(self, breast_cancer: BreastCancer) -> None:\n        D_train = xgb.DMatrix(breast_cancer.tr[0], breast_cancer.tr[1])\n        D_valid = xgb.DMatrix(breast_cancer.va[0], breast_cancer.va[1])\n        evals_result = {}\n        rounds = 10\n        xgb.train(\n            {\"objective\": \"binary:logistic\", \"eval_metric\": \"error\"},\n            D_train,\n            evals=[(D_train, \"Train\"), (D_valid, \"Valid\")],\n            num_boost_round=rounds,\n            evals_result=evals_result,\n            verbose_eval=True,\n        )\n        assert len(evals_result[\"Train\"][\"error\"]) == rounds\n        assert len(evals_result[\"Valid\"][\"error\"]) == rounds\n\n        self.run_evaluation_monitor(D_train, D_valid, rounds, True)\n        self.run_evaluation_monitor(D_train, D_valid, rounds, 2)\n        self.run_evaluation_monitor(D_train, D_valid, rounds, 4)\n        self.run_evaluation_monitor(D_train, D_valid, rounds, rounds + 1)\n\n    def test_early_stopping(self, breast_cancer: BreastCancer) -> None:\n        D_train = xgb.DMatrix(breast_cancer.tr[0], breast_cancer.tr[1])\n        D_valid = xgb.DMatrix(breast_cancer.va[0], breast_cancer.va[1])\n        evals_result = {}\n        rounds = 30\n        early_stopping_rounds = 5\n        booster = xgb.train(\n            {\"objective\": \"binary:logistic\", \"eval_metric\": \"error\"},\n            D_train,\n            evals=[(D_train, \"Train\"), (D_valid, \"Valid\")],\n            num_boost_round=rounds,\n            evals_result=evals_result,\n            verbose_eval=True,\n            early_stopping_rounds=early_stopping_rounds,\n        )\n        dump = booster.get_dump(dump_format=\"json\")\n        assert len(dump) - booster.best_iteration == early_stopping_rounds + 1\n\n    def test_early_stopping_custom_eval(self, breast_cancer: BreastCancer) -> None:\n        D_train = xgb.DMatrix(breast_cancer.tr[0], breast_cancer.tr[1])\n        D_valid = xgb.DMatrix(breast_cancer.va[0], breast_cancer.va[1])\n        early_stopping_rounds = 5\n        booster = xgb.train(\n            {\n                \"objective\": \"binary:logistic\",\n                \"eval_metric\": \"error\",\n                \"tree_method\": \"hist\",\n            },\n            D_train,\n            evals=[(D_train, \"Train\"), (D_valid, \"Valid\")],\n            custom_metric=eval_error_metric,\n            num_boost_round=1000,\n            early_stopping_rounds=early_stopping_rounds,\n            verbose_eval=False,\n        )\n        dump = booster.get_dump(dump_format=\"json\")\n        assert len(dump) - booster.best_iteration == early_stopping_rounds + 1\n\n    def test_early_stopping_customize(self, breast_cancer: BreastCancer) -> None:\n        D_train = xgb.DMatrix(breast_cancer.tr[0], breast_cancer.tr[1])\n        D_valid = xgb.DMatrix(breast_cancer.va[0], breast_cancer.va[1])\n        early_stopping_rounds = 5\n        early_stop = xgb.callback.EarlyStopping(\n            rounds=early_stopping_rounds, metric_name=\"CustomErr\", data_name=\"Train\"\n        )\n        # Specify which dataset and which metric should be used for early stopping.\n        booster = xgb.train(\n            {\n                \"objective\": \"binary:logistic\",\n                \"eval_metric\": [\"error\", \"rmse\"],\n                \"tree_method\": \"hist\",\n            },\n            D_train,\n            evals=[(D_train, \"Train\"), (D_valid, \"Valid\")],\n            custom_metric=eval_error_metric,\n            num_boost_round=1000,\n            callbacks=[early_stop],\n            verbose_eval=False,\n        )\n        dump = booster.get_dump(dump_format=\"json\")\n        assert len(dump) - booster.best_iteration == early_stopping_rounds + 1\n        assert len(early_stop.stopping_history[\"Train\"][\"CustomErr\"]) == len(dump)\n\n        rounds = 100\n        early_stop = xgb.callback.EarlyStopping(\n            rounds=early_stopping_rounds,\n            metric_name=\"CustomErr\",\n            data_name=\"Train\",\n            min_delta=100,\n            save_best=True,\n        )\n        booster = xgb.train(\n            {\n                \"objective\": \"binary:logistic\",\n                \"eval_metric\": [\"error\", \"rmse\"],\n                \"tree_method\": \"hist\",\n            },\n            D_train,\n            evals=[(D_train, \"Train\"), (D_valid, \"Valid\")],\n            # No custom objective, transformed output\n            custom_metric=eval_error_metric,\n            num_boost_round=rounds,\n            callbacks=[early_stop],\n            verbose_eval=False,\n        )\n        # No iteration can be made with min_delta == 100\n        assert booster.best_iteration == 0\n        assert booster.num_boosted_rounds() == 1\n\n    def test_early_stopping_skl(self, breast_cancer: BreastCancer) -> None:\n        X, y = breast_cancer.full\n        early_stopping_rounds = 5\n        cls = xgb.XGBClassifier(\n            early_stopping_rounds=early_stopping_rounds, eval_metric=\"error\"\n        )\n        cls.fit(X, y, eval_set=[(X, y)])\n        booster = cls.get_booster()\n        dump = booster.get_dump(dump_format=\"json\")\n        assert len(dump) - booster.best_iteration == early_stopping_rounds + 1\n\n    def test_early_stopping_custom_eval_skl(self, breast_cancer: BreastCancer) -> None:\n        X, y = breast_cancer.full\n        early_stopping_rounds = 5\n        early_stop = xgb.callback.EarlyStopping(rounds=early_stopping_rounds)\n        cls = xgb.XGBClassifier(\n            eval_metric=tm.eval_error_metric_skl, callbacks=[early_stop]\n        )\n        cls.fit(X, y, eval_set=[(X, y)])\n        booster = cls.get_booster()\n        dump = booster.get_dump(dump_format=\"json\")\n        assert len(dump) - booster.best_iteration == early_stopping_rounds + 1\n\n    def test_early_stopping_save_best_model(self, breast_cancer: BreastCancer) -> None:\n        X, y = breast_cancer.full\n        n_estimators = 100\n        early_stopping_rounds = 5\n        early_stop = xgb.callback.EarlyStopping(\n            rounds=early_stopping_rounds, save_best=True\n        )\n        cls = xgb.XGBClassifier(\n            n_estimators=n_estimators,\n            eval_metric=tm.eval_error_metric_skl,\n            callbacks=[early_stop],\n        )\n        cls.fit(X, y, eval_set=[(X, y)])\n        booster = cls.get_booster()\n        dump = booster.get_dump(dump_format=\"json\")\n        assert len(dump) == booster.best_iteration + 1\n\n        early_stop = xgb.callback.EarlyStopping(\n            rounds=early_stopping_rounds, save_best=True\n        )\n        cls = xgb.XGBClassifier(\n            booster=\"gblinear\",\n            n_estimators=10,\n            eval_metric=tm.eval_error_metric_skl,\n            callbacks=[early_stop],\n        )\n        with pytest.raises(ValueError):\n            cls.fit(X, y, eval_set=[(X, y)])\n\n        # No error\n        early_stop = xgb.callback.EarlyStopping(\n            rounds=early_stopping_rounds, save_best=False\n        )\n        xgb.XGBClassifier(\n            booster=\"gblinear\",\n            n_estimators=10,\n            eval_metric=tm.eval_error_metric_skl,\n            callbacks=[early_stop],\n        ).fit(X, y, eval_set=[(X, y)])\n\n    def test_early_stopping_continuation(\n        self, breast_cancer: BreastCancer, tmp_path: Path\n    ) -> None:\n        X, y = breast_cancer.full\n\n        early_stopping_rounds = 5\n        early_stop = xgb.callback.EarlyStopping(\n            rounds=early_stopping_rounds, save_best=True\n        )\n        cls = xgb.XGBClassifier(\n            eval_metric=tm.eval_error_metric_skl, callbacks=[early_stop]\n        )\n        cls.fit(X, y, eval_set=[(X, y)])\n\n        booster = cls.get_booster()\n        assert booster.num_boosted_rounds() == booster.best_iteration + 1\n\n        path = tmp_path / \"model.json\"\n        cls.save_model(path)\n        cls = xgb.XGBClassifier()\n        cls.load_model(path)\n        assert cls._Booster is not None\n        early_stopping_rounds = 3\n        cls.set_params(\n            eval_metric=tm.eval_error_metric_skl,\n            early_stopping_rounds=early_stopping_rounds,\n        )\n        cls.fit(X, y, eval_set=[(X, y)])\n        booster = cls.get_booster()\n        assert (\n            booster.num_boosted_rounds()\n            == booster.best_iteration + early_stopping_rounds + 1\n        )\n\n    def test_early_stopping_multiple_metrics(self):\n        from sklearn.datasets import make_classification\n\n        X, y = make_classification(random_state=1994)\n        # AUC approaches 1.0 real quick.\n        clf = xgb.XGBClassifier(eval_metric=[\"logloss\", \"auc\"], early_stopping_rounds=2)\n        clf.fit(X, y, eval_set=[(X, y)])\n        assert clf.best_iteration < 8\n        assert clf.evals_result()[\"validation_0\"][\"auc\"][-1] > 0.99\n\n        clf = xgb.XGBClassifier(eval_metric=[\"auc\", \"logloss\"], early_stopping_rounds=2)\n        clf.fit(X, y, eval_set=[(X, y)])\n\n        assert clf.best_iteration > 50\n        assert clf.evals_result()[\"validation_0\"][\"auc\"][-1] > 0.99\n\n    @pytest.mark.parametrize(\"tree_method\", [\"hist\", \"approx\", \"exact\"])\n    def test_eta_decay(self, tree_method: str) -> None:\n        dtrain, dtest = tm.load_agaricus(__file__)\n        run_eta_decay(tree_method, dtrain, dtest, \"cpu\")\n\n    @pytest.mark.parametrize(\"tree_method,objective\", tree_methods_objs())\n    def test_eta_decay_leaf_output(self, tree_method: str, objective: str) -> None:\n        dtrain, dtest = tm.load_agaricus(__file__)\n        run_eta_decay_leaf_output(tree_method, objective, dtrain, dtest, \"cpu\")\n\n    def test_check_point(self, breast_cancer: BreastCancer, tmp_path: Path) -> None:\n        X, y = breast_cancer.full\n        m = xgb.DMatrix(X, y)\n        check_point = xgb.callback.TrainingCheckPoint(\n            directory=tmp_path, interval=1, name=\"model\"\n        )\n        xgb.train(\n            {\"objective\": \"binary:logistic\"},\n            m,\n            num_boost_round=10,\n            verbose_eval=False,\n            callbacks=[check_point],\n        )\n        for i in range(1, 10):\n            assert (\n                tmp_path / f\"model_{i}.{xgb.callback.TrainingCheckPoint.default_format}\"\n            ).exists()\n\n        check_point = xgb.callback.TrainingCheckPoint(\n            directory=tmp_path, interval=1, as_pickle=True, name=\"model\"\n        )\n        xgb.train(\n            {\"objective\": \"binary:logistic\"},\n            m,\n            num_boost_round=10,\n            verbose_eval=False,\n            callbacks=[check_point],\n        )\n        for i in range(1, 10):\n            assert (tmp_path / f\"model_{i}.pkl\").exists()\n\n    def test_callback_list(self) -> None:\n        X, y = tm.data.get_california_housing()\n        m = xgb.DMatrix(X, y)\n        callbacks = [xgb.callback.EarlyStopping(rounds=10)]\n        for i in range(4):\n            xgb.train(\n                {\"objective\": \"reg:squarederror\", \"eval_metric\": \"rmse\"},\n                m,\n                evals=[(m, \"Train\")],\n                num_boost_round=1,\n                verbose_eval=True,\n                callbacks=callbacks,\n            )\n        assert len(callbacks) == 1\n\n    def test_attribute_error(self, breast_cancer: BreastCancer) -> None:\n        X, y = breast_cancer.full\n\n        clf = xgb.XGBClassifier(n_estimators=8)\n        clf.fit(X, y, eval_set=[(X, y)])\n\n        with pytest.raises(AttributeError, match=\"early stopping is used\"):\n            clf.best_iteration\n\n        with pytest.raises(AttributeError, match=\"early stopping is used\"):\n            clf.best_score\n\n        booster = clf.get_booster()\n        with pytest.raises(AttributeError, match=\"early stopping is used\"):\n            booster.best_iteration\n\n        with pytest.raises(AttributeError, match=\"early stopping is used\"):\n            booster.best_score\n\n    def test_preserve_order(self) -> None:\n        \"\"\"Test the ordering of the callbacks is preserved.\"\"\"\n        X, y, w = tm.make_regression(256, 16, False)\n        fst_call: Optional[int] = None\n\n        # If we use Python `set`, Cb1 is ordered before Cb2. This test makes sure Cb2 is\n        # called before Cb1.\n        class Cb2(xgb.callback.TrainingCallback):\n            def before_iteration(self, model, epoch: int, evals_log) -> bool:\n                nonlocal fst_call\n                assert fst_call is None or fst_call == 2\n                fst_call = 2\n                return False\n\n        class Cb1(xgb.callback.TrainingCallback):\n            def before_iteration(self, model, epoch: int, evals_log) -> bool:\n                assert fst_call == 2\n                return False\n\n        callbacks = [Cb2(), Cb1()]\n        xgb.train({}, dtrain=xgb.QuantileDMatrix(X, y, weight=w), callbacks=callbacks)\n"
  },
  {
    "path": "tests/python/test_collective.py",
    "content": "import socket\nfrom dataclasses import asdict\n\nimport numpy as np\nimport pytest\n\nimport xgboost as xgb\nfrom xgboost import RabitTracker, build_info, federated\nfrom xgboost import testing as tm\nfrom xgboost.collective import Config\n\n\ndef run_rabit_worker(rabit_env: dict, world_size: int) -> int:\n    with xgb.collective.CommunicatorContext(**rabit_env):\n        assert xgb.collective.get_world_size() == world_size\n        assert xgb.collective.is_distributed()\n        assert xgb.collective.get_processor_name() == socket.gethostname()\n        ret = xgb.collective.broadcast(\"test1234\", 0)\n        assert str(ret) == \"test1234\"\n        reduced = xgb.collective.allreduce(np.asarray([1, 2, 3]), xgb.collective.Op.SUM)\n        assert np.array_equal(reduced, np.asarray([2, 4, 6]))\n    return 0\n\n\n@pytest.mark.skipif(**tm.no_loky())\ndef test_rabit_communicator() -> None:\n    from loky import get_reusable_executor\n\n    world_size = 2\n    tracker = RabitTracker(host_ip=\"127.0.0.1\", n_workers=world_size)\n    tracker.start()\n    workers = []\n\n    with get_reusable_executor(max_workers=world_size) as pool:\n        for _ in range(world_size):\n            worker = pool.submit(\n                run_rabit_worker, rabit_env=tracker.worker_args(), world_size=world_size\n            )\n            workers.append(worker)\n\n        for worker in workers:\n            assert worker.result() == 0\n\n\ndef run_federated_worker(port: int, world_size: int, rank: int) -> int:\n    with xgb.collective.CommunicatorContext(\n        dmlc_communicator=\"federated\",\n        federated_server_address=f\"localhost:{port}\",\n        federated_world_size=world_size,\n        federated_rank=rank,\n    ):\n        assert xgb.collective.get_world_size() == world_size\n        assert xgb.collective.is_distributed()\n        assert xgb.collective.get_processor_name() == f\"rank:{rank}\"\n        bret = xgb.collective.broadcast(\"test1234\", 0)\n        assert str(bret) == \"test1234\"\n        aret = xgb.collective.allreduce(np.asarray([1, 2, 3]), xgb.collective.Op.SUM)\n        assert np.array_equal(aret, np.asarray([2, 4, 6]))\n    return 0\n\n\n@pytest.mark.skipif(**tm.skip_win())\n@pytest.mark.skipif(**tm.no_loky())\ndef test_federated_communicator() -> None:\n    from loky import get_reusable_executor\n\n    if not build_info()[\"USE_FEDERATED\"]:\n        pytest.skip(\"XGBoost not built with federated learning enabled\")\n\n    port = 9091\n    world_size = 2\n    with get_reusable_executor(max_workers=world_size + 1) as pool:\n        kwargs = {\"port\": port, \"n_workers\": world_size, \"blocking\": False}\n        tracker = pool.submit(federated.run_federated_server, **kwargs)\n        if not tracker.running():\n            raise RuntimeError(\"Error starting Federated Learning server\")\n\n        workers = []\n        for rank in range(world_size):\n            worker = pool.submit(\n                run_federated_worker, port=port, world_size=world_size, rank=rank\n            )\n            workers.append(worker)\n        for worker in workers:\n            assert worker.result() == 0\n\n\ndef test_config_serialization() -> None:\n    cfg = Config(retry=1, timeout=2, tracker_host_ip=\"127.0.0.1\", tracker_port=None)\n    cfg1 = Config(**asdict(cfg))\n    assert cfg == cfg1\n"
  },
  {
    "path": "tests/python/test_config.py",
    "content": "import multiprocessing\nfrom concurrent.futures import ThreadPoolExecutor\n\nimport pytest\n\nimport xgboost as xgb\n\n\n@pytest.mark.parametrize(\"verbosity_level\", [0, 1, 2, 3])\ndef test_global_config_verbosity(verbosity_level):\n    def get_current_verbosity():\n        return xgb.get_config()[\"verbosity\"]\n\n    old_verbosity = get_current_verbosity()\n    assert old_verbosity == 1\n    with xgb.config_context(verbosity=verbosity_level):\n        new_verbosity = get_current_verbosity()\n        assert new_verbosity == verbosity_level\n    assert old_verbosity == get_current_verbosity()\n\n\n@pytest.mark.parametrize(\"use_rmm\", [False, True])\ndef test_global_config_use_rmm(use_rmm):\n    def get_current_use_rmm_flag():\n        return xgb.get_config()[\"use_rmm\"]\n\n    old_use_rmm_flag = get_current_use_rmm_flag()\n    with xgb.config_context(use_rmm=use_rmm):\n        new_use_rmm_flag = get_current_use_rmm_flag()\n        assert new_use_rmm_flag == use_rmm\n    assert old_use_rmm_flag == get_current_use_rmm_flag()\n\n\ndef test_nested_config() -> None:\n    verbosity = xgb.get_config()[\"verbosity\"]\n    assert verbosity == 1\n\n    with xgb.config_context(verbosity=3):\n        assert xgb.get_config()[\"verbosity\"] == 3\n        with xgb.config_context(verbosity=2):\n            assert xgb.get_config()[\"verbosity\"] == 2\n            with xgb.config_context(verbosity=1):\n                assert xgb.get_config()[\"verbosity\"] == 1\n            assert xgb.get_config()[\"verbosity\"] == 2\n        assert xgb.get_config()[\"verbosity\"] == 3\n\n    with xgb.config_context(verbosity=3):\n        assert xgb.get_config()[\"verbosity\"] == 3\n        with xgb.config_context(verbosity=None):\n            assert xgb.get_config()[\"verbosity\"] == 3  # None has no effect\n\n    xgb.set_config(verbosity=2)\n    assert xgb.get_config()[\"verbosity\"] == 2\n    with xgb.config_context(verbosity=3):\n        assert xgb.get_config()[\"verbosity\"] == 3\n    xgb.set_config(verbosity=verbosity)  # reset\n\n    verbosity = xgb.get_config()[\"verbosity\"]\n    assert verbosity == 1\n\n\ndef test_thread_safety():\n    n_threads = multiprocessing.cpu_count()\n    futures = []\n    with ThreadPoolExecutor(max_workers=n_threads) as executor:\n        for i in range(256):\n            f = executor.submit(test_nested_config)\n            futures.append(f)\n\n    for f in futures:\n        f.result()\n\n\ndef test_nthread() -> None:\n    config = xgb.get_config()\n    assert config[\"nthread\"] == 0\n"
  },
  {
    "path": "tests/python/test_data_iterator.py",
    "content": "import weakref\nfrom pathlib import Path\nfrom typing import Any, Callable, Dict, List\n\nimport numpy as np\nimport pytest\nimport xgboost as xgb\nfrom hypothesis import given, settings, strategies\nfrom scipy.sparse import csr_matrix\nfrom xgboost import testing as tm\nfrom xgboost.core import SingleBatchInternalIter as SingleBatch\nfrom xgboost.testing import IteratorForTest, make_batches, non_increasing\nfrom xgboost.testing.data_iter import check_invalid_cat_batches, check_uneven_sizes\nfrom xgboost.testing.updater import (\n    check_categorical_missing,\n    check_categorical_ohe,\n    check_extmem_qdm,\n    check_quantile_loss_extmem,\n)\n\npytestmark = tm.timeout(30)\n\n\ndef test_single_batch(tree_method: str = \"approx\", device: str = \"cpu\") -> None:\n    from sklearn.datasets import load_breast_cancer\n\n    n_rounds = 10\n    X, y = load_breast_cancer(return_X_y=True)\n    X = X.astype(np.float32)\n    y = y.astype(np.float32)\n\n    params = {\"tree_method\": tree_method, \"device\": device}\n\n    Xy = xgb.DMatrix(SingleBatch(data=X, label=y))\n    from_it = xgb.train(params, Xy, num_boost_round=n_rounds)\n\n    Xy = xgb.DMatrix(X, y)\n    from_dmat = xgb.train(params, Xy, num_boost_round=n_rounds)\n    assert from_it.get_dump() == from_dmat.get_dump()\n\n    X, y = load_breast_cancer(return_X_y=True, as_frame=True)\n    X = X.astype(np.float32)\n    Xy = xgb.DMatrix(SingleBatch(data=X, label=y))\n    from_pd = xgb.train(params, Xy, num_boost_round=n_rounds)\n    # remove feature info to generate exact same text representation.\n    from_pd.feature_names = None\n    from_pd.feature_types = None\n\n    assert from_pd.get_dump() == from_it.get_dump()\n\n    X, y = load_breast_cancer(return_X_y=True)\n    X = csr_matrix(X)\n    Xy = xgb.DMatrix(SingleBatch(data=X, label=y))\n    from_it = xgb.train(params, Xy, num_boost_round=n_rounds)\n\n    X, y = load_breast_cancer(return_X_y=True)\n    Xy = xgb.DMatrix(SingleBatch(data=X, label=y), missing=0.0)\n    from_np = xgb.train(params, Xy, num_boost_round=n_rounds)\n    assert from_np.get_dump() == from_it.get_dump()\n\n\ndef test_with_cat_single() -> None:\n    X, y = tm.make_categorical(\n        n_samples=128, n_features=3, n_categories=6, onehot=False\n    )\n    Xy = xgb.DMatrix(SingleBatch(data=X, label=y))\n    from_it = xgb.train({}, Xy, num_boost_round=3)\n\n    Xy = xgb.DMatrix(X, y)\n    from_Xy = xgb.train({}, Xy, num_boost_round=3)\n\n    jit = from_it.save_raw(raw_format=\"json\")\n    jxy = from_Xy.save_raw(raw_format=\"json\")\n    assert jit == jxy\n\n\ndef run_data_iterator(\n    n_samples_per_batch: int,\n    n_features: int,\n    n_batches: int,\n    tree_method: str,\n    subsample: bool,\n    device: str,\n    use_cupy: bool,\n    on_host: bool,\n) -> None:\n    n_rounds = 2\n    # The test is more difficult to pass if the subsample rate is smaller as the root_sum\n    # is accumulated in parallel.  Reductions with different number of entries lead to\n    # different floating point errors.\n    subsample_rate = 0.8 if subsample else 1.0\n\n    it = IteratorForTest(\n        *make_batches(n_samples_per_batch, n_features, n_batches, use_cupy),\n        cache=\"cache\",\n        on_host=on_host,\n    )\n    if n_batches == 0:\n        with pytest.raises(ValueError, match=\"1 batch\"):\n            Xy = xgb.DMatrix(it)\n        return\n\n    Xy = xgb.DMatrix(it)\n    assert Xy.num_row() == n_samples_per_batch * n_batches\n    assert Xy.num_col() == n_features\n\n    parameters = {\n        \"tree_method\": tree_method,\n        \"max_depth\": 2,\n        \"subsample\": subsample_rate,\n        \"device\": device,\n        \"seed\": 0,\n    }\n\n    if device.find(\"cuda\") != -1:\n        parameters[\"sampling_method\"] = \"gradient_based\"\n\n    results_from_it: Dict[str, Dict[str, List[float]]] = {}\n    from_it = xgb.train(\n        parameters,\n        Xy,\n        num_boost_round=n_rounds,\n        evals=[(Xy, \"Train\")],\n        evals_result=results_from_it,\n        verbose_eval=False,\n    )\n    if not subsample:\n        assert non_increasing(results_from_it[\"Train\"][\"rmse\"])\n\n    X, y, w = it.as_arrays()\n    if use_cupy:\n        _y = y.get()\n    else:\n        _y = y\n    np.testing.assert_allclose(Xy.get_label(), _y)\n\n    Xy = xgb.DMatrix(X, y, weight=w)\n    assert Xy.num_row() == n_samples_per_batch * n_batches\n    assert Xy.num_col() == n_features\n\n    results_from_arrays: Dict[str, Dict[str, List[float]]] = {}\n    from_arrays = xgb.train(\n        parameters,\n        Xy,\n        num_boost_round=n_rounds,\n        evals=[(Xy, \"Train\")],\n        evals_result=results_from_arrays,\n        verbose_eval=False,\n    )\n    arr_predt = from_arrays.predict(Xy)\n    if not subsample:\n        assert non_increasing(results_from_arrays[\"Train\"][\"rmse\"])\n\n    rtol = 1e-2\n    # CPU sketching is more memory efficient but less consistent due to small chunks\n    it_predt = from_it.predict(Xy)\n    arr_predt = from_arrays.predict(Xy)\n    np.testing.assert_allclose(it_predt, arr_predt, rtol=rtol)\n\n    np.testing.assert_allclose(\n        results_from_it[\"Train\"][\"rmse\"],\n        results_from_arrays[\"Train\"][\"rmse\"],\n        rtol=rtol,\n    )\n\n\n@given(\n    strategies.integers(0, 1024),\n    strategies.integers(1, 7),\n    strategies.integers(0, 13),\n    strategies.booleans(),\n)\n@settings(deadline=None, max_examples=10, print_blob=True)\ndef test_data_iterator(\n    n_samples_per_batch: int,\n    n_features: int,\n    n_batches: int,\n    subsample: bool,\n) -> None:\n    run_data_iterator(\n        n_samples_per_batch,\n        n_features,\n        n_batches,\n        \"approx\",\n        subsample,\n        \"cpu\",\n        False,\n        False,\n    )\n    run_data_iterator(\n        n_samples_per_batch,\n        n_features,\n        n_batches,\n        \"hist\",\n        subsample,\n        \"cpu\",\n        False,\n        False,\n    )\n\n\nclass IterForCacheTest(xgb.DataIter):\n    def __init__(\n        self, x: np.ndarray, y: np.ndarray, w: np.ndarray, release_data: bool\n    ) -> None:\n        self.kwargs = {\"data\": x, \"label\": y, \"weight\": w}\n        super().__init__(release_data=release_data)\n\n    def next(self, input_data: Callable) -> bool:\n        if self.it == 1:\n            return False\n        self.it += 1\n        input_data(**self.kwargs)\n        return True\n\n    def reset(self) -> None:\n        self.it = 0\n\n\ndef test_data_cache() -> None:\n    n_batches = 1\n    n_features = 2\n    n_samples_per_batch = 16\n    data = make_batches(n_samples_per_batch, n_features, n_batches, False)\n    batches = [v[0] for v in data]\n\n    # Test with a cache.\n    it = IterForCacheTest(batches[0], batches[1], batches[2], release_data=False)\n    transform = xgb.data._proxy_transform\n\n    called = 0\n\n    def mock(*args: Any, **kwargs: Any) -> Any:\n        nonlocal called\n        called += 1\n        return transform(*args, **kwargs)\n\n    xgb.data._proxy_transform = mock\n    xgb.QuantileDMatrix(it)\n    assert it._data_ref is weakref.ref(batches[0])\n    assert called == 1\n\n    # Test without a cache.\n    called = 0\n    it = IterForCacheTest(batches[0], batches[1], batches[2], release_data=True)\n    xgb.QuantileDMatrix(it)\n    assert called == 4\n\n    xgb.data._proxy_transform = transform\n\n\ndef test_cat_check(tmp_path: Path) -> None:\n    n_batches = 3\n    n_features = 2\n    n_samples_per_batch = 16\n\n    batches = []\n\n    for i in range(n_batches):\n        X_df, y_arr = tm.make_categorical(\n            n_samples=n_samples_per_batch,\n            n_features=n_features,\n            n_categories=3,\n            onehot=False,\n        )\n        batches.append((X_df, y_arr))\n\n    X, y = list(zip(*batches))\n    it = tm.IteratorForTest(X, y, None, cache=None, on_host=False)\n    Xy: xgb.DMatrix = xgb.QuantileDMatrix(it)\n\n    with pytest.raises(ValueError, match=\"categorical features\"):\n        xgb.train({\"tree_method\": \"exact\"}, Xy)\n\n    Xy = xgb.DMatrix(X[0], y[0])\n    with pytest.raises(ValueError, match=\"categorical features\"):\n        xgb.train({\"tree_method\": \"exact\"}, Xy)\n\n    cache_path = tmp_path / \"cache\"\n    it = tm.IteratorForTest(X, y, None, cache=str(cache_path), on_host=False)\n    Xy = xgb.DMatrix(it, enable_categorical=True)\n    with pytest.raises(ValueError, match=\"categorical features\"):\n        xgb.train({\"booster\": \"gblinear\"}, Xy)\n\n\n@given(\n    strategies.integers(1, 64),\n    strategies.integers(1, 8),\n    strategies.integers(1, 4),\n)\n@settings(deadline=None, max_examples=10, print_blob=True)\ndef test_quantile_objective(\n    n_samples_per_batch: int, n_features: int, n_batches: int\n) -> None:\n    check_quantile_loss_extmem(\n        n_samples_per_batch,\n        n_features,\n        n_batches,\n        \"hist\",\n        \"cpu\",\n    )\n    check_quantile_loss_extmem(\n        n_samples_per_batch,\n        n_features,\n        n_batches,\n        \"approx\",\n        \"cpu\",\n    )\n\n\n@given(\n    strategies.integers(1, 4096),\n    strategies.integers(1, 8),\n    strategies.integers(1, 4),\n    strategies.integers(2, 16),\n)\n@settings(deadline=None, max_examples=10, print_blob=True)\n@tm.timeout(45)\ndef test_extmem_qdm(\n    n_samples_per_batch: int, n_features: int, n_batches: int, n_bins: int\n) -> None:\n    check_extmem_qdm(\n        n_samples_per_batch,\n        n_features,\n        n_batches=n_batches,\n        n_bins=n_bins,\n        device=\"cpu\",\n        on_host=False,\n        is_cat=False,\n    )\n\n\n@given(\n    strategies.integers(1, 4096),\n    strategies.integers(1, 4),\n    strategies.integers(2, 16),\n)\n@settings(deadline=None, max_examples=10, print_blob=True)\ndef test_categorical_extmem_qdm(\n    n_samples_per_batch: int, n_batches: int, n_bins: int\n) -> None:\n    check_extmem_qdm(\n        n_samples_per_batch,\n        4,\n        n_batches=n_batches,\n        n_bins=n_bins,\n        device=\"cpu\",\n        on_host=False,\n        is_cat=True,\n    )\n\n\n@pytest.mark.parametrize(\"tree_method\", [\"hist\", \"approx\"])\ndef test_categorical_missing(tree_method: str) -> None:\n    check_categorical_missing(\n        1024, 4, 5, device=\"cpu\", tree_method=tree_method, extmem=True\n    )\n\n\n@pytest.mark.parametrize(\"tree_method\", [\"hist\", \"approx\"])\ndef test_categorical_ohe(tree_method: str) -> None:\n    check_categorical_ohe(\n        rows=1024,\n        cols=16,\n        rounds=4,\n        cats=5,\n        device=\"cpu\",\n        tree_method=tree_method,\n        extmem=True,\n    )\n\n\ndef test_invalid_cat_batches() -> None:\n    check_invalid_cat_batches(\"cpu\")\n\n\n@pytest.mark.skipif(**tm.no_cupy())\ndef test_uneven_sizes() -> None:\n    check_uneven_sizes(\"cpu\")\n"
  },
  {
    "path": "tests/python/test_demos.py",
    "content": "import os\nimport subprocess\nimport sys\nfrom pathlib import Path\n\nimport pytest\nimport xgboost\nfrom xgboost import testing as tm\n\npytestmark = tm.timeout(30)\n\nDEMO_DIR = tm.demo_dir(__file__)\nPYTHON_DEMO_DIR = os.path.join(DEMO_DIR, \"guide-python\")\nCLI_DEMO_DIR = os.path.join(DEMO_DIR, \"CLI\")\n\n\nPYTHON = sys.executable\n\n\ndef test_basic_walkthrough(tmp_path: Path) -> None:\n    script = os.path.join(PYTHON_DEMO_DIR, \"basic_walkthrough.py\")\n    cmd = [PYTHON, script]\n    subprocess.check_call(cmd, cwd=tmp_path)\n\n\n@pytest.mark.skipif(**tm.no_pandas())\ndef test_categorical() -> None:\n    script = os.path.join(PYTHON_DEMO_DIR, \"categorical.py\")\n    cmd = [PYTHON, script]\n    subprocess.check_call(cmd)\n\n\n@pytest.mark.skipif(**tm.no_pandas())\ndef test_cat_pipeline() -> None:\n    script = os.path.join(PYTHON_DEMO_DIR, \"cat_pipeline.py\")\n    cmd = [PYTHON, script]\n    subprocess.check_call(cmd)\n\n\n@pytest.mark.skipif(**tm.no_matplotlib())\ndef test_custom_multiclass_objective() -> None:\n    script = os.path.join(PYTHON_DEMO_DIR, \"custom_softmax.py\")\n    cmd = [PYTHON, script, \"--plot=0\"]\n    subprocess.check_call(cmd)\n\n\n@pytest.mark.skipif(**tm.no_matplotlib())\ndef test_custom_rmsle_objective() -> None:\n    script = os.path.join(PYTHON_DEMO_DIR, \"custom_rmsle.py\")\n    cmd = [PYTHON, script, \"--plot=0\"]\n    subprocess.check_call(cmd)\n\n\n@pytest.mark.skipif(**tm.no_matplotlib())\ndef test_feature_weights_demo() -> None:\n    script = os.path.join(PYTHON_DEMO_DIR, \"feature_weights.py\")\n    cmd = [PYTHON, script, \"--plot=0\"]\n    subprocess.check_call(cmd)\n\n\n@pytest.mark.skipif(**tm.no_sklearn())\ndef test_sklearn_demo(tmp_path: Path) -> None:\n    script = os.path.join(PYTHON_DEMO_DIR, \"sklearn_examples.py\")\n    cmd = [PYTHON, script]\n    subprocess.check_call(cmd, cwd=tmp_path)\n    assert (tmp_path / \"best_calif.pkl\").exists()\n\n\n@pytest.mark.skipif(**tm.no_sklearn())\n@pytest.mark.timeout(60)\ndef test_sklearn_parallel_demo() -> None:\n    script = os.path.join(PYTHON_DEMO_DIR, \"sklearn_parallel.py\")\n    cmd = [PYTHON, script]\n    subprocess.check_call(cmd)\n\n\n@pytest.mark.skipif(**tm.no_sklearn())\ndef test_sklearn_evals_result_demo() -> None:\n    script = os.path.join(PYTHON_DEMO_DIR, \"sklearn_evals_result.py\")\n    cmd = [PYTHON, script]\n    subprocess.check_call(cmd)\n\n\ndef test_boost_from_prediction_demo() -> None:\n    script = os.path.join(PYTHON_DEMO_DIR, \"boost_from_prediction.py\")\n    cmd = [PYTHON, script]\n    subprocess.check_call(cmd)\n\n\ndef test_predict_first_ntree_demo() -> None:\n    script = os.path.join(PYTHON_DEMO_DIR, \"predict_first_ntree.py\")\n    cmd = [PYTHON, script]\n    subprocess.check_call(cmd)\n\n\ndef test_individual_trees() -> None:\n    script = os.path.join(PYTHON_DEMO_DIR, \"individual_trees.py\")\n    cmd = [PYTHON, script]\n    subprocess.check_call(cmd)\n\n\ndef test_predict_leaf_indices_demo() -> None:\n    script = os.path.join(PYTHON_DEMO_DIR, \"predict_leaf_indices.py\")\n    cmd = [PYTHON, script]\n    subprocess.check_call(cmd)\n\n\ndef test_generalized_linear_model_demo() -> None:\n    script = os.path.join(PYTHON_DEMO_DIR, \"generalized_linear_model.py\")\n    cmd = [PYTHON, script]\n    subprocess.check_call(cmd)\n\n\ndef test_cross_validation_demo() -> None:\n    script = os.path.join(PYTHON_DEMO_DIR, \"cross_validation.py\")\n    cmd = [PYTHON, script]\n    subprocess.check_call(cmd)\n\n\ndef test_external_memory_demo() -> None:\n    script = os.path.join(PYTHON_DEMO_DIR, \"external_memory.py\")\n    cmd = [PYTHON, script, \"--device=cpu\"]\n    subprocess.check_call(cmd)\n\n\ndef test_distributed_extmem_basic_demo() -> None:\n    script = os.path.join(PYTHON_DEMO_DIR, \"distributed_extmem_basic.py\")\n    cmd = [PYTHON, script, \"--device=cpu\"]\n    subprocess.check_call(cmd)\n\n\ndef test_evals_result_demo() -> None:\n    script = os.path.join(PYTHON_DEMO_DIR, \"evals_result.py\")\n    cmd = [PYTHON, script]\n    subprocess.check_call(cmd)\n\n\n@pytest.mark.skipif(**tm.no_sklearn())\n@pytest.mark.skipif(**tm.no_pandas())\ndef test_aft_demo(tmp_path: Path) -> None:\n    script = os.path.join(DEMO_DIR, \"aft_survival\", \"aft_survival_demo.py\")\n    cmd = [PYTHON, script]\n    subprocess.check_call(cmd, cwd=tmp_path)\n    assert (tmp_path / \"aft_model.json\").exists()\n\n\n@pytest.mark.skipif(**tm.no_matplotlib())\ndef test_callbacks_demo() -> None:\n    script = os.path.join(PYTHON_DEMO_DIR, \"callbacks.py\")\n    cmd = [PYTHON, script, \"--plot=0\"]\n    subprocess.check_call(cmd)\n\n\ndef test_continuation_demo() -> None:\n    script = os.path.join(PYTHON_DEMO_DIR, \"continuation.py\")\n    cmd = [PYTHON, script]\n    subprocess.check_call(cmd)\n\n\n@pytest.mark.skipif(**tm.no_sklearn())\n@pytest.mark.skipif(**tm.no_matplotlib())\ndef test_multioutput_reg() -> None:\n    script = os.path.join(PYTHON_DEMO_DIR, \"multioutput_regression.py\")\n    cmd = [PYTHON, script, \"--plot=0\"]\n    subprocess.check_call(cmd)\n\n\n@pytest.mark.skipif(**tm.no_sklearn())\ndef test_quantile_reg() -> None:\n    script = os.path.join(PYTHON_DEMO_DIR, \"quantile_regression.py\")\n    cmd = [PYTHON, script]\n    subprocess.check_call(cmd)\n\n\n@pytest.mark.skipif(**tm.no_ubjson())\ndef test_json_model(tmp_path: Path) -> None:\n    script = os.path.join(PYTHON_DEMO_DIR, \"model_parser.py\")\n\n    def run_test(reg: xgboost.XGBRegressor, suffix: str) -> None:\n        path = tmp_path / f\"reg_{suffix}.json\"\n        reg.save_model(path)\n        cmd = [PYTHON, script, f\"--model={path}\"]\n        subprocess.check_call(cmd)\n\n        path = tmp_path / f\"reg_{suffix}.ubj\"\n        reg.save_model(path)\n        cmd = [PYTHON, script, f\"--model={path}\"]\n        subprocess.check_call(cmd)\n\n    # numerical\n    X, y = tm.make_sparse_regression(100, 10, 0.5, False)\n    reg = xgboost.XGBRegressor(n_estimators=2, tree_method=\"hist\")\n    reg.fit(X, y)\n    run_test(reg, \"numerical\")\n\n    # categorical\n    X, y = tm.make_categorical(\n        n_samples=1000,\n        n_features=10,\n        n_categories=6,\n        onehot=False,\n        sparsity=0.5,\n        cat_ratio=0.5,\n        shuffle=True,\n    )\n    reg = xgboost.XGBRegressor(n_estimators=2, tree_method=\"hist\")\n    reg.fit(X, y)\n    run_test(reg, \"categorical\")\n\n\n# - gpu_acceleration is not tested due to covertype dataset is being too huge.\n# - gamma regression is not tested as it requires running a R script first.\n# - aft viz is not tested due to ploting is not controlled\n# - aft tunning is not tested due to extra dependency.\n"
  },
  {
    "path": "tests/python/test_dmatrix.py",
    "content": "import csv\nimport os\nimport warnings\nfrom pathlib import Path\n\nimport numpy as np\nimport pytest\nimport scipy.sparse\nimport xgboost as xgb\nfrom hypothesis import given, settings, strategies\nfrom scipy.sparse import csr_matrix, rand\nfrom xgboost import testing as tm\nfrom xgboost.core import DataSplitMode\nfrom xgboost.testing.data import np_dtypes, run_base_margin_info\nfrom xgboost.testing.utils import predictor_equal\n\ndpath = \"demo/data/\"\nrng = np.random.RandomState(1994)\n\n\nclass TestDMatrix:\n    def test_warn_missing(self):\n        from xgboost import data\n\n        with pytest.warns(UserWarning):\n            data._warn_unused_missing(\"uri\", 4)\n\n        with warnings.catch_warnings():\n            warnings.simplefilter(\"error\")\n            data._warn_unused_missing(\"uri\", None)\n            data._warn_unused_missing(\"uri\", np.nan)\n\n        with warnings.catch_warnings():\n            warnings.simplefilter(\"error\")\n            x = rng.randn(10, 10)\n            y = rng.randn(10)\n\n            xgb.DMatrix(x, y, missing=4)\n\n    def test_dmatrix_numpy_init(self):\n        data = np.random.randn(5, 5)\n        dm = xgb.DMatrix(data)\n        assert dm.num_row() == 5\n        assert dm.num_col() == 5\n\n        data = np.array([[1, 2], [3, 4]])\n        dm = xgb.DMatrix(data)\n        assert dm.num_row() == 2\n        assert dm.num_col() == 2\n\n        # 0d array\n        with pytest.raises(ValueError):\n            xgb.DMatrix(np.array(1))\n        # 1d array\n        with pytest.raises(ValueError):\n            xgb.DMatrix(np.array([1, 2, 3]))\n        # 3d array\n        data = np.random.randn(5, 5, 5)\n        with pytest.raises(ValueError):\n            xgb.DMatrix(data)\n        # object dtype\n        data = np.array([[\"a\", \"b\"], [\"c\", \"d\"]])\n        with pytest.raises(ValueError):\n            xgb.DMatrix(data)\n\n    def test_np_view(self):\n        # Sliced Float32 array\n        y = np.array([12, 34, 56], np.float32)[::2]\n        from_view = xgb.DMatrix(np.array([[]]), label=y).get_label()\n        from_array = xgb.DMatrix(np.array([[]]), label=y + 0).get_label()\n        assert from_view.shape == from_array.shape\n        assert (from_view == from_array).all()\n\n        # Sliced UInt array\n        z = np.array([12, 34, 56], np.uint32)[::2]\n        dmat = xgb.DMatrix(np.array([[]]))\n        dmat.set_uint_info(\"group\", z)\n        from_view = dmat.get_uint_info(\"group_ptr\")\n        dmat = xgb.DMatrix(np.array([[]]))\n        dmat.set_uint_info(\"group\", z + 0)\n        from_array = dmat.get_uint_info(\"group_ptr\")\n        assert from_view.shape == from_array.shape\n        assert (from_view == from_array).all()\n\n    def test_slice(self):\n        X = rng.randn(100, 100)\n        y = rng.randint(low=0, high=3, size=100).astype(np.float32)\n        d = xgb.DMatrix(X, y)\n        np.testing.assert_equal(d.get_label(), y)\n\n        fw = rng.uniform(size=100).astype(np.float32)\n        d.set_info(feature_weights=fw)\n\n        # base margin is per-class in multi-class classifier\n        base_margin = rng.randn(100, 3).astype(np.float32)\n        d.set_base_margin(base_margin)\n        np.testing.assert_allclose(d.get_base_margin().reshape(100, 3), base_margin)\n\n        ridxs = [1, 2, 3, 4, 5, 6]\n        sliced = d.slice(ridxs)\n\n        # Slicing works with label and other meta info fields\n        np.testing.assert_equal(sliced.get_label(), y[1:7])\n        np.testing.assert_equal(sliced.get_float_info(\"feature_weights\"), fw)\n        np.testing.assert_equal(sliced.get_base_margin(), base_margin[1:7, :])\n\n        # Slicing a DMatrix results into a DMatrix that's equivalent to a DMatrix that's\n        # constructed from the corresponding NumPy slice\n        d2 = xgb.DMatrix(X[1:7, :], y[1:7])\n        d2.set_base_margin(base_margin[1:7, :])\n        eval_res = {}\n        _ = xgb.train(\n            {\"num_class\": 3, \"objective\": \"multi:softprob\", \"eval_metric\": \"mlogloss\"},\n            d,\n            num_boost_round=2,\n            evals=[(d2, \"d2\"), (sliced, \"sliced\")],\n            evals_result=eval_res,\n        )\n        np.testing.assert_equal(\n            eval_res[\"d2\"][\"mlogloss\"], eval_res[\"sliced\"][\"mlogloss\"]\n        )\n\n        ridxs_arr = np.array(ridxs)[1:]  # handles numpy slice correctly\n        sliced = d.slice(ridxs_arr)\n        np.testing.assert_equal(sliced.get_label(), y[2:7])\n\n    def test_feature_names_slice(self):\n        data = np.random.randn(5, 5)\n\n        # different length\n        with pytest.raises(ValueError):\n            xgb.DMatrix(data, feature_names=list(\"abcdef\"))\n        # contains duplicates\n        with pytest.raises(ValueError):\n            xgb.DMatrix(data, feature_names=[\"a\", \"b\", \"c\", \"d\", \"d\"])\n        # contains symbol\n        with pytest.raises(ValueError):\n            xgb.DMatrix(data, feature_names=[\"a\", \"b\", \"c\", \"d\", \"e<1\"])\n\n        dm = xgb.DMatrix(data)\n        dm.feature_names = list(\"abcde\")\n        assert dm.feature_names == list(\"abcde\")\n\n        assert dm.slice([0, 1]).num_col() == dm.num_col()\n        assert dm.slice([0, 1]).feature_names == dm.feature_names\n\n        with pytest.raises(ValueError, match=r\"Duplicates found: \\[.*'bar'.*\\]\"):\n            dm.feature_names = [\"bar\"] * (data.shape[1] - 2) + [\"a\", \"b\"]\n\n        dm.feature_types = list(\"qiqiq\")\n        assert dm.feature_types == list(\"qiqiq\")\n\n        with pytest.raises(ValueError):\n            dm.feature_types = list(\"abcde\")\n\n        # reset\n        dm.feature_names = None\n        dm.feature_types = None\n        assert dm.feature_names is None\n        assert dm.feature_types is None\n\n    def test_feature_names(self):\n        data = np.random.randn(100, 5)\n        target = np.array([0, 1] * 50)\n\n        cases = [\n            [\"Feature1\", \"Feature2\", \"Feature3\", \"Feature4\", \"Feature5\"],\n            [\"要因1\", \"要因2\", \"要因3\", \"要因4\", \"要因5\"],\n        ]\n\n        for features in cases:\n            dm = xgb.DMatrix(data, label=target, feature_names=features)\n            assert dm.feature_names == features\n            assert dm.num_row() == 100\n            assert dm.num_col() == 5\n\n            params = {\n                \"objective\": \"multi:softprob\",\n                \"eval_metric\": \"mlogloss\",\n                \"eta\": 0.3,\n                \"num_class\": 3,\n            }\n\n            bst = xgb.train(params, dm, num_boost_round=10)\n            scores = bst.get_fscore()\n            assert list(sorted(k for k in scores)) == features\n\n            dummy = np.random.randn(5, 5)\n            dm = xgb.DMatrix(dummy, feature_names=features)\n            bst.predict(dm)\n\n            # different feature name must raises error\n            dm = xgb.DMatrix(dummy, feature_names=list(\"abcde\"))\n            with pytest.raises(ValueError):\n                bst.predict(dm)\n\n    @pytest.mark.skipif(**tm.no_pandas())\n    def test_save_binary(self, tmp_path: Path) -> None:\n        import pandas as pd\n\n        path = tmp_path / \"m.dmatrix\"\n        data = pd.DataFrame({\"a\": [0, 1], \"b\": [2, 3], \"c\": [4, 5]})\n        m0 = xgb.DMatrix(data.loc[:, [\"a\", \"b\"]], data[\"c\"])\n        assert m0.feature_names == [\"a\", \"b\"]\n        m0.save_binary(path)\n        m1 = xgb.DMatrix(path)\n        assert m0.feature_names == m1.feature_names\n        assert m0.feature_types == m1.feature_types\n\n    def test_get_info(self):\n        dtrain, _ = tm.load_agaricus(__file__)\n        dtrain.get_float_info(\"label\")\n        dtrain.get_float_info(\"weight\")\n        dtrain.get_float_info(\"base_margin\")\n        dtrain.get_uint_info(\"group_ptr\")\n\n        group_len = np.array([2, 3, 4])\n        dtrain.set_group(group_len)\n        np.testing.assert_equal(group_len, dtrain.get_group())\n\n    def test_qid(self):\n        rows = 100\n        cols = 10\n        X, y = rng.randn(rows, cols), rng.randn(rows)\n        qid = rng.randint(low=0, high=10, size=rows, dtype=np.uint32)\n        qid = np.sort(qid)\n\n        Xy = xgb.DMatrix(X, y)\n        Xy.set_info(qid=qid)\n        group_ptr = Xy.get_uint_info(\"group_ptr\")\n        assert group_ptr[0] == 0\n        assert group_ptr[-1] == rows\n\n    def test_feature_weights(self):\n        kRows = 10\n        kCols = 50\n        rng = np.random.RandomState(1994)\n        fw = rng.uniform(size=kCols)\n        X = rng.randn(kRows, kCols)\n        m = xgb.DMatrix(X)\n        m.set_info(feature_weights=fw)\n        np.testing.assert_allclose(fw, m.get_float_info(\"feature_weights\"))\n        # Handle empty\n        m.set_info(feature_weights=np.empty((0,)))\n\n        assert m.get_float_info(\"feature_weights\").shape[0] == 0\n\n        fw -= 1\n\n        with pytest.raises(ValueError):\n            m.set_info(feature_weights=fw)\n\n    def test_sparse_dmatrix_csr(self, tmp_path: Path) -> None:\n        nrow = 100\n        ncol = 1000\n        x = rand(nrow, ncol, density=0.0005, format=\"csr\", random_state=rng)\n        assert x.indices.max() < ncol\n        x.data[:] = 1\n        dtrain = xgb.DMatrix(x, label=rng.binomial(1, 0.3, nrow))\n        assert (dtrain.num_row(), dtrain.num_col()) == (nrow, ncol)\n        watchlist = [(dtrain, \"train\")]\n        param = {\"max_depth\": 3, \"objective\": \"binary:logistic\"}\n        bst = xgb.train(param, dtrain, 5, evals=watchlist)\n        bst.predict(dtrain)\n\n        i32 = csr_matrix((x.data.astype(np.int32), x.indices, x.indptr), shape=x.shape)\n        f32 = csr_matrix(\n            (i32.data.astype(np.float32), x.indices, x.indptr), shape=x.shape\n        )\n        di32 = xgb.DMatrix(i32)\n        df32 = xgb.DMatrix(f32)\n        dense = xgb.DMatrix(f32.toarray(), missing=0)\n\n        path = tmp_path / \"f32.dmatrix\"\n        df32.save_binary(path)\n        with open(path, \"rb\") as fd:\n            df32_buffer = np.array(fd.read())\n        path = tmp_path / \"i32.dmatrix\"\n        di32.save_binary(path)\n        with open(path, \"rb\") as fd:\n            di32_buffer = np.array(fd.read())\n\n        path = tmp_path / \"dense.dmatrix\"\n        dense.save_binary(path)\n        with open(path, \"rb\") as fd:\n            dense_buffer = np.array(fd.read())\n\n        np.testing.assert_equal(df32_buffer, di32_buffer)\n        np.testing.assert_equal(df32_buffer, dense_buffer)\n\n    def test_sparse_dmatrix_csc(self):\n        nrow = 1000\n        ncol = 100\n        x = rand(nrow, ncol, density=0.0005, format=\"csc\", random_state=rng)\n        assert x.indices.max() < nrow - 1\n        x.data[:] = 1\n        dtrain = xgb.DMatrix(x, label=rng.binomial(1, 0.3, nrow))\n        assert (dtrain.num_row(), dtrain.num_col()) == (nrow, ncol)\n        watchlist = [(dtrain, \"train\")]\n        param = {\"max_depth\": 3, \"objective\": \"binary:logistic\"}\n        bst = xgb.train(param, dtrain, 5, evals=watchlist)\n        bst.predict(dtrain)\n\n    def test_unknown_data(self):\n        class Data:\n            pass\n\n        with pytest.raises(TypeError):\n            with pytest.warns(UserWarning):\n                d = Data()\n                xgb.DMatrix(d)\n\n        from scipy import sparse\n\n        rng = np.random.RandomState(1994)\n        X = rng.rand(10, 10)\n        y = rng.rand(10)\n        X = sparse.dok_matrix(X)\n        with pytest.warns(UserWarning, match=\"dok_matrix\"):\n            Xy = xgb.DMatrix(X, y)\n            assert Xy.num_row() == 10\n            assert Xy.num_col() == 10\n\n    @pytest.mark.skipif(**tm.no_pandas())\n    def test_np_categorical(self):\n        n_features = 10\n        X, y = tm.make_categorical(10, n_features, n_categories=4, onehot=False)\n        X = X.values.astype(np.float32)\n        feature_types = [\"c\"] * n_features\n\n        assert isinstance(X, np.ndarray)\n        Xy = xgb.DMatrix(X, y, feature_types=feature_types)\n        np.testing.assert_equal(np.array(Xy.feature_types), np.array(feature_types))\n\n    def test_scipy_categorical(self):\n        from scipy import sparse\n\n        n_features = 10\n        X, y = tm.make_categorical(10, n_features, n_categories=4, onehot=False)\n        X = X.values.astype(np.float32)\n        feature_types = [\"c\"] * n_features\n\n        X[1, 3] = np.nan\n        X[2, 4] = np.nan\n        X = sparse.csr_matrix(X)\n\n        Xy = xgb.DMatrix(X, y, feature_types=feature_types)\n        np.testing.assert_equal(np.array(Xy.feature_types), np.array(feature_types))\n\n        X = sparse.csc_matrix(X)\n\n        Xy = xgb.DMatrix(X, y, feature_types=feature_types)\n        np.testing.assert_equal(np.array(Xy.feature_types), np.array(feature_types))\n\n        X = sparse.coo_matrix(X)\n\n        Xy = xgb.DMatrix(X, y, feature_types=feature_types)\n        np.testing.assert_equal(np.array(Xy.feature_types), np.array(feature_types))\n\n    def test_uri_categorical(self):\n        path = os.path.join(dpath, \"agaricus.txt.train\")\n        feature_types = [\"q\"] * 5 + [\"c\"] + [\"q\"] * 120\n        Xy = xgb.DMatrix(\n            path + \"?indexing_mode=1&format=libsvm\", feature_types=feature_types\n        )\n        np.testing.assert_equal(np.array(Xy.feature_types), np.array(feature_types))\n\n    def test_base_margin(self) -> None:\n        run_base_margin_info(np.asarray, xgb.DMatrix, \"cpu\")\n\n    @given(\n        strategies.integers(0, 1000),\n        strategies.integers(0, 100),\n        strategies.fractions(0, 1),\n    )\n    @settings(deadline=None, print_blob=True)\n    def test_to_csr(self, n_samples, n_features, sparsity) -> None:\n        if n_samples == 0 or n_features == 0 or sparsity == 1.0:\n            csr = scipy.sparse.csr_matrix(np.empty((0, 0)))\n        else:\n            csr = tm.make_sparse_regression(n_samples, n_features, sparsity, False)[\n                0\n            ].astype(np.float32)\n        m = xgb.DMatrix(data=csr)\n        ret = m.get_data()\n        np.testing.assert_equal(csr.indptr, ret.indptr)\n        np.testing.assert_equal(csr.data, ret.data)\n        np.testing.assert_equal(csr.indices, ret.indices)\n\n    def test_dtypes(self) -> None:\n        n_samples = 128\n        n_features = 16\n        for orig, x in np_dtypes(n_samples, n_features):\n            m0 = xgb.DMatrix(orig)\n            m1 = xgb.DMatrix(x)\n            assert predictor_equal(m0, m1)\n\n\n@pytest.mark.skipif(tm.is_windows(), reason=\"Rabit does not run on windows\")\nclass TestDMatrixColumnSplit:\n    def test_numpy(self):\n        def verify_numpy():\n            data = np.random.randn(5, 5)\n            dm = xgb.DMatrix(data, data_split_mode=DataSplitMode.COL)\n            assert dm.num_row() == 5\n            assert dm.num_col() == 5 * xgb.collective.get_world_size()\n            assert dm.feature_names is None\n            assert dm.feature_types is None\n\n        tm.run_with_rabit(world_size=3, test_fn=verify_numpy)\n\n    def test_numpy_feature_names(self):\n        def verify_numpy_feature_names():\n            world_size = xgb.collective.get_world_size()\n            data = np.random.randn(5, 5)\n            feature_names = [f\"feature{x}\" for x in range(5)]\n            feature_types = [\"float\"] * 5\n            dm = xgb.DMatrix(\n                data,\n                feature_names=feature_names,\n                feature_types=feature_types,\n                data_split_mode=DataSplitMode.COL,\n            )\n            assert dm.num_row() == 5\n            assert dm.num_col() == 5 * world_size\n            assert len(dm.feature_names) == 5 * world_size\n            assert dm.feature_names == tm.column_split_feature_names(\n                feature_names, world_size\n            )\n            assert len(dm.feature_types) == 5 * world_size\n            assert dm.feature_types == [\"float\"] * 5 * world_size\n\n        tm.run_with_rabit(world_size=3, test_fn=verify_numpy_feature_names)\n\n    def test_csr(self):\n        def verify_csr():\n            indptr = np.array([0, 2, 3, 6])\n            indices = np.array([0, 2, 2, 0, 1, 2])\n            data = np.array([1, 2, 3, 4, 5, 6])\n            X = scipy.sparse.csr_matrix((data, indices, indptr), shape=(3, 3))\n            dtrain = xgb.DMatrix(X, data_split_mode=DataSplitMode.COL)\n            assert dtrain.num_row() == 3\n            assert dtrain.num_col() == 3 * xgb.collective.get_world_size()\n\n        tm.run_with_rabit(world_size=3, test_fn=verify_csr)\n\n    def test_csc(self):\n        def verify_csc():\n            row = np.array([0, 2, 2, 0, 1, 2])\n            col = np.array([0, 0, 1, 2, 2, 2])\n            data = np.array([1, 2, 3, 4, 5, 6])\n            X = scipy.sparse.csc_matrix((data, (row, col)), shape=(3, 3))\n            dtrain = xgb.DMatrix(X, data_split_mode=DataSplitMode.COL)\n            assert dtrain.num_row() == 3\n            assert dtrain.num_col() == 3 * xgb.collective.get_world_size()\n\n        tm.run_with_rabit(world_size=3, test_fn=verify_csc)\n\n    def test_coo(self):\n        def verify_coo():\n            row = np.array([0, 2, 2, 0, 1, 2])\n            col = np.array([0, 0, 1, 2, 2, 2])\n            data = np.array([1, 2, 3, 4, 5, 6])\n            X = scipy.sparse.coo_matrix((data, (row, col)), shape=(3, 3))\n            dtrain = xgb.DMatrix(X, data_split_mode=DataSplitMode.COL)\n            assert dtrain.num_row() == 3\n            assert dtrain.num_col() == 3 * xgb.collective.get_world_size()\n\n        tm.run_with_rabit(world_size=3, test_fn=verify_coo)\n\n    def test_uri(self, tmp_path: Path) -> None:\n        def verify_uri():\n            rank = xgb.collective.get_rank()\n            filename = tmp_path / f\"test_data_{rank}.csv\"\n\n            data = np.random.rand(5, 5)\n            with open(filename, mode=\"w\", newline=\"\") as file:\n                writer = csv.writer(file)\n                for row in data:\n                    writer.writerow(row)\n            dtrain = xgb.DMatrix(\n                f\"{filename}?format=csv\", data_split_mode=DataSplitMode.COL\n            )\n            assert dtrain.num_row() == 5\n            assert dtrain.num_col() == 5 * xgb.collective.get_world_size()\n\n        tm.run_with_rabit(world_size=3, test_fn=verify_uri)\n\n    def test_list(self):\n        def verify_list():\n            data = [\n                [1, 2, 3, 4, 5],\n                [6, 7, 8, 9, 10],\n                [11, 12, 13, 14, 15],\n                [16, 17, 18, 19, 20],\n                [21, 22, 23, 24, 25],\n            ]\n            dm = xgb.DMatrix(data, data_split_mode=DataSplitMode.COL)\n            assert dm.num_row() == 5\n            assert dm.num_col() == 5 * xgb.collective.get_world_size()\n\n        tm.run_with_rabit(world_size=3, test_fn=verify_list)\n\n    def test_tuple(self):\n        def verify_tuple():\n            data = (\n                (1, 2, 3, 4, 5),\n                (6, 7, 8, 9, 10),\n                (11, 12, 13, 14, 15),\n                (16, 17, 18, 19, 20),\n                (21, 22, 23, 24, 25),\n            )\n            dm = xgb.DMatrix(data, data_split_mode=DataSplitMode.COL)\n            assert dm.num_row() == 5\n            assert dm.num_col() == 5 * xgb.collective.get_world_size()\n\n        tm.run_with_rabit(world_size=3, test_fn=verify_tuple)\n"
  },
  {
    "path": "tests/python/test_early_stopping.py",
    "content": "from typing import Tuple\n\nimport numpy as np\nimport pytest\n\nimport xgboost as xgb\nfrom xgboost import testing as tm\nfrom xgboost.testing.updater import get_basescore\n\nrng = np.random.RandomState(1994)\n\n\nclass TestEarlyStopping:\n    @pytest.mark.skipif(**tm.no_sklearn())\n    def test_early_stopping_nonparallel(self):\n        from sklearn.datasets import load_digits\n        from sklearn.model_selection import train_test_split\n\n        X, y = load_digits(n_class=2, return_X_y=True)\n        X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)\n        clf1 = xgb.XGBClassifier(\n            learning_rate=0.1, early_stopping_rounds=5, eval_metric=\"auc\"\n        )\n        clf1.fit(X_train, y_train, eval_set=[(X_test, y_test)])\n        clf2 = xgb.XGBClassifier(\n            learning_rate=0.1, early_stopping_rounds=4, eval_metric=\"auc\"\n        )\n        clf2.fit(X_train, y_train, eval_set=[(X_test, y_test)])\n        # should be the same\n        assert clf1.best_score == clf2.best_score\n        assert clf1.best_score != 1\n        # check overfit\n        clf3 = xgb.XGBClassifier(\n            learning_rate=0.1, eval_metric=\"auc\", early_stopping_rounds=10\n        )\n        clf3.fit(X_train, y_train, eval_set=[(X_test, y_test)])\n        base_score = get_basescore(clf3)\n        assert 0.53 > base_score[0] > 0.5\n\n        clf3 = xgb.XGBClassifier(\n            learning_rate=0.1,\n            base_score=0.5,\n            eval_metric=\"auc\",\n            early_stopping_rounds=10,\n        )\n        clf3.fit(X_train, y_train, eval_set=[(X_test, y_test)])\n\n        assert clf3.best_score == 1\n\n    @staticmethod\n    def assert_metrics_length(cv, expected_length):\n        for key, value in cv.items():\n            assert len(value) == expected_length\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    def test_cv_early_stopping(self) -> None:\n        from sklearn.datasets import load_digits\n\n        X, y = load_digits(n_class=2, return_X_y=True)\n        dm = xgb.DMatrix(X, label=y)\n        params = {\n            \"max_depth\": 2,\n            \"eta\": 1,\n            \"objective\": \"binary:logistic\",\n            \"eval_metric\": \"error\",\n        }\n\n        def evalerror(preds: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, float]:\n            from sklearn.metrics import mean_squared_error\n\n            labels = dtrain.get_label()\n            return \"rmse\", mean_squared_error(labels, preds)\n\n        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, early_stopping_rounds=10)\n        self.assert_metrics_length(cv, 10)\n        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, early_stopping_rounds=5)\n        self.assert_metrics_length(cv, 3)\n        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, early_stopping_rounds=1)\n        self.assert_metrics_length(cv, 1)\n\n        cv = xgb.cv(\n            params,\n            dm,\n            num_boost_round=10,\n            nfold=10,\n            custom_metric=evalerror,\n            early_stopping_rounds=10,\n        )\n        self.assert_metrics_length(cv, 10)\n        cv = xgb.cv(\n            params,\n            dm,\n            num_boost_round=10,\n            nfold=10,\n            custom_metric=evalerror,\n            early_stopping_rounds=1,\n        )\n        self.assert_metrics_length(cv, 5)\n        cv = xgb.cv(\n            params,\n            dm,\n            num_boost_round=10,\n            nfold=10,\n            custom_metric=evalerror,\n            maximize=True,\n            early_stopping_rounds=1,\n        )\n        self.assert_metrics_length(cv, 1)\n\n        with pytest.raises(ValueError, match=\"`save_best`\"):\n            cv = xgb.cv(\n                params,\n                dm,\n                num_boost_round=10,\n                nfold=10,\n                early_stopping_rounds=1,\n                callbacks=[xgb.callback.EarlyStopping(3, save_best=True)],\n            )\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    @pytest.mark.skipif(**tm.no_pandas())\n    def test_cv_early_stopping_with_multiple_eval_sets_and_metrics(self):\n        from sklearn.datasets import load_breast_cancer\n\n        X, y = load_breast_cancer(return_X_y=True)\n        dm = xgb.DMatrix(X, label=y)\n        params = {\"objective\": \"binary:logistic\"}\n\n        metrics = [\n            [\"auc\"],\n            [\"error\"],\n            [\"logloss\"],\n            [\"logloss\", \"auc\"],\n            [\"logloss\", \"error\"],\n            [\"error\", \"logloss\"],\n        ]\n\n        num_iteration_history = []\n\n        # If more than one metrics is given, early stopping should use the last metric\n        for i, m in enumerate(metrics):\n            result = xgb.cv(\n                params,\n                dm,\n                num_boost_round=1000,\n                nfold=5,\n                stratified=True,\n                metrics=m,\n                early_stopping_rounds=20,\n                seed=42,\n            )\n            num_iteration_history.append(len(result))\n            df = result[\"test-{}-mean\".format(m[-1])]\n            # When early stopping is invoked, the last metric should be as best it can be.\n            if m[-1] == \"auc\":\n                assert np.all(df <= df.iloc[-1])\n            else:\n                assert np.all(df >= df.iloc[-1])\n        assert num_iteration_history[:3] == num_iteration_history[3:]\n"
  },
  {
    "path": "tests/python/test_eval_metrics.py",
    "content": "import numpy as np\nimport pytest\nimport xgboost as xgb\nfrom xgboost import testing as tm\nfrom xgboost.testing.metrics import (\n    check_expectile_error,\n    check_precision_score,\n    check_quantile_error,\n    run_pr_auc_binary,\n    run_pr_auc_ltr,\n    run_pr_auc_multi,\n    run_roc_auc_binary,\n    run_roc_auc_multi,\n)\n\nrng = np.random.RandomState(1337)\n\n\nclass TestEvalMetrics:\n    xgb_params_01 = {\"nthread\": 1, \"eval_metric\": \"error\"}\n\n    xgb_params_02 = {\"nthread\": 1, \"eval_metric\": [\"error\"]}\n\n    xgb_params_03 = {\"nthread\": 1, \"eval_metric\": [\"rmse\", \"error\"]}\n\n    xgb_params_04 = {\"nthread\": 1, \"eval_metric\": [\"error\", \"rmse\"]}\n\n    def evalerror_01(self, preds, dtrain):\n        labels = dtrain.get_label()\n        return \"error\", float(sum(labels != (preds > 0.0))) / len(labels)\n\n    def evalerror_02(self, preds, dtrain):\n        labels = dtrain.get_label()\n        return [(\"error\", float(sum(labels != (preds > 0.0))) / len(labels))]\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    def evalerror_03(self, preds, dtrain):\n        from sklearn.metrics import mean_squared_error\n\n        labels = dtrain.get_label()\n        return [\n            (\"rmse\", mean_squared_error(labels, preds)),\n            (\"error\", float(sum(labels != (preds > 0.0))) / len(labels)),\n        ]\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    def evalerror_04(self, preds, dtrain):\n        from sklearn.metrics import mean_squared_error\n\n        labels = dtrain.get_label()\n        return [\n            (\"error\", float(sum(labels != (preds > 0.0))) / len(labels)),\n            (\"rmse\", mean_squared_error(labels, preds)),\n        ]\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    def test_eval_metrics(self):\n        try:\n            from sklearn.model_selection import train_test_split\n        except ImportError:\n            from sklearn.cross_validation import train_test_split\n        from sklearn.datasets import load_digits\n\n        digits = load_digits(n_class=2)\n        X = digits[\"data\"]\n        y = digits[\"target\"]\n\n        Xt, Xv, yt, yv = train_test_split(X, y, test_size=0.2, random_state=0)\n\n        dtrain = xgb.DMatrix(Xt, label=yt)\n        dvalid = xgb.DMatrix(Xv, label=yv)\n\n        watchlist = [(dtrain, \"train\"), (dvalid, \"val\")]\n\n        gbdt_01 = xgb.train(self.xgb_params_01, dtrain, num_boost_round=10)\n        gbdt_02 = xgb.train(self.xgb_params_02, dtrain, num_boost_round=10)\n        gbdt_03 = xgb.train(self.xgb_params_03, dtrain, num_boost_round=10)\n        assert gbdt_01.predict(dvalid)[0] == gbdt_02.predict(dvalid)[0]\n        assert gbdt_01.predict(dvalid)[0] == gbdt_03.predict(dvalid)[0]\n\n        gbdt_01 = xgb.train(\n            self.xgb_params_01, dtrain, 10, watchlist, early_stopping_rounds=2\n        )\n        gbdt_02 = xgb.train(\n            self.xgb_params_02, dtrain, 10, watchlist, early_stopping_rounds=2\n        )\n        gbdt_03 = xgb.train(\n            self.xgb_params_03, dtrain, 10, watchlist, early_stopping_rounds=2\n        )\n        gbdt_04 = xgb.train(\n            self.xgb_params_04, dtrain, 10, watchlist, early_stopping_rounds=2\n        )\n        assert gbdt_01.predict(dvalid)[0] == gbdt_02.predict(dvalid)[0]\n        assert gbdt_01.predict(dvalid)[0] == gbdt_03.predict(dvalid)[0]\n        assert gbdt_03.predict(dvalid)[0] != gbdt_04.predict(dvalid)[0]\n\n        gbdt_01 = xgb.train(\n            self.xgb_params_01,\n            dtrain,\n            10,\n            watchlist,\n            early_stopping_rounds=2,\n            custom_metric=self.evalerror_01,\n        )\n        gbdt_02 = xgb.train(\n            self.xgb_params_02,\n            dtrain,\n            10,\n            watchlist,\n            early_stopping_rounds=2,\n            custom_metric=self.evalerror_02,\n        )\n        gbdt_03 = xgb.train(\n            self.xgb_params_03,\n            dtrain,\n            10,\n            watchlist,\n            early_stopping_rounds=2,\n            custom_metric=self.evalerror_03,\n        )\n        gbdt_04 = xgb.train(\n            self.xgb_params_04,\n            dtrain,\n            10,\n            watchlist,\n            early_stopping_rounds=2,\n            custom_metric=self.evalerror_04,\n        )\n        assert gbdt_01.predict(dvalid)[0] == gbdt_02.predict(dvalid)[0]\n        assert gbdt_01.predict(dvalid)[0] == gbdt_03.predict(dvalid)[0]\n        assert gbdt_03.predict(dvalid)[0] != gbdt_04.predict(dvalid)[0]\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    def test_gamma_deviance(self):\n        from sklearn.metrics import mean_gamma_deviance\n\n        rng = np.random.RandomState(1994)\n        n_samples = 100\n        n_features = 30\n\n        X = rng.randn(n_samples, n_features)\n        y = rng.randn(n_samples)\n        y = y - y.min() * 100\n\n        reg = xgb.XGBRegressor(\n            tree_method=\"hist\",\n            objective=\"reg:gamma\",\n            n_estimators=10,\n            eval_metric=\"gamma-deviance\",\n        )\n        reg.fit(X, y)\n\n        booster = reg.get_booster()\n        score = reg.predict(X)\n        gamma_dev = float(booster.eval(xgb.DMatrix(X, y)).split(\":\")[1].split(\":\")[0])\n        skl_gamma_dev = mean_gamma_deviance(y, score)\n        np.testing.assert_allclose(gamma_dev, skl_gamma_dev, atol=1e-6)\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    def test_gamma_lik(self) -> None:\n        import scipy.stats as stats\n\n        rng = np.random.default_rng(1994)\n        n_samples = 32\n        n_features = 10\n\n        X = rng.normal(0, 1, size=n_samples * n_features).reshape(\n            (n_samples, n_features)\n        )\n\n        alpha, loc, beta = 5.0, 11.1, 22\n        y = stats.gamma.rvs(\n            alpha, loc=loc, scale=beta, size=n_samples, random_state=rng\n        )\n        reg = xgb.XGBRegressor(\n            tree_method=\"hist\",\n            objective=\"reg:gamma\",\n            n_estimators=64,\n            eval_metric=\"gamma-nloglik\",\n        )\n        reg.fit(X, y, eval_set=[(X, y)])\n\n        score = reg.predict(X)\n\n        booster = reg.get_booster()\n        nloglik = float(booster.eval(xgb.DMatrix(X, y)).split(\":\")[1].split(\":\")[0])\n\n        # \\beta_i = - (1 / \\theta_i a)\n        # where \\theta_i is the canonical parameter\n        # XGBoost uses the canonical link function of gamma in evaluation function.\n        # so \\theta = - (1.0 / y)\n        # dispersion is hardcoded as 1.0, so shape (a in scipy parameter) is also 1.0\n        beta = -(1.0 / (-(1.0 / y)))  # == y\n        nloglik_stats = -stats.gamma.logpdf(score, a=1.0, scale=beta)\n\n        np.testing.assert_allclose(nloglik, np.mean(nloglik_stats), rtol=1e-3)\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    @pytest.mark.parametrize(\"n_samples\", [100, 1000, 10000])\n    def test_roc_auc(self, n_samples: int) -> None:\n        run_roc_auc_binary(\"hist\", n_samples, \"cpu\")\n\n    @pytest.mark.parametrize(\n        \"n_samples,weighted\", [(4, False), (100, False), (1000, False), (10000, True)]\n    )\n    def test_roc_auc_multi(self, n_samples: int, weighted: bool) -> None:\n        run_roc_auc_multi(\"hist\", n_samples, weighted, \"cpu\")\n\n    def test_pr_auc_binary(self) -> None:\n        run_pr_auc_binary(\"hist\", \"cpu\")\n\n    def test_pr_auc_multi(self) -> None:\n        run_pr_auc_multi(\"hist\", \"cpu\")\n\n    def test_pr_auc_ltr(self) -> None:\n        run_pr_auc_ltr(\"hist\", \"cpu\")\n\n    def test_precision_score(self) -> None:\n        check_precision_score(\"hist\", \"cpu\")\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    def test_quantile_error(self) -> None:\n        check_quantile_error(\"hist\", \"cpu\")\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    def test_expectile_error(self) -> None:\n        check_expectile_error(\"hist\", \"cpu\")\n\n    def test_expectile_uniform_convergence(self) -> None:\n        rng = np.random.default_rng(42)\n        n_samples = 1_000\n        y = rng.random(n_samples)\n        X = np.zeros((n_samples, 1))\n        dtrain = xgb.DMatrix(X, label=y)\n\n        def uniform_expectile(alpha: float) -> float:\n            sqrt_alpha = np.sqrt(alpha)\n            sqrt_one_minus = np.sqrt(1.0 - alpha)\n            return sqrt_alpha / (sqrt_alpha + sqrt_one_minus)\n\n        params = {\n            \"tree_method\": \"hist\",\n            \"objective\": \"reg:expectileerror\",\n            \"max_depth\": 1,\n            \"min_child_weight\": 0.0,\n            \"gamma\": 0.0,\n            \"lambda\": 0.0,\n            \"alpha\": 0.0,\n            \"subsample\": 1.0,\n            \"colsample_bytree\": 1.0,\n            \"eta\": 0.2,\n        }\n        num_boost_round = 200\n        atol = 1e-2\n\n        for alpha in [0.1, 0.5, 0.9]:\n            params[\"expectile_alpha\"] = alpha\n            booster = xgb.train(params, dtrain, num_boost_round=num_boost_round)\n            pred = float(booster.predict(dtrain).mean())\n            expected = uniform_expectile(alpha)\n            np.testing.assert_allclose(pred, expected, rtol=atol, atol=atol)\n"
  },
  {
    "path": "tests/python/test_interaction_constraints.py",
    "content": "import pytest\n\nfrom xgboost import testing as tm\nfrom xgboost.testing.interaction_constraints import (\n    run_interaction_constraints,\n    training_accuracy,\n)\n\n\nclass TestInteractionConstraints:\n    def test_exact_interaction_constraints(self) -> None:\n        run_interaction_constraints(tree_method=\"exact\", device=\"cpu\")\n\n    def test_hist_interaction_constraints(self) -> None:\n        run_interaction_constraints(tree_method=\"hist\", device=\"cpu\")\n\n    def test_approx_interaction_constraints(self) -> None:\n        run_interaction_constraints(tree_method=\"approx\", device=\"cpu\")\n\n    def test_interaction_constraints_feature_names(self) -> None:\n        with pytest.raises(ValueError):\n            constraints = [(\"feature_0\", \"feature_1\")]\n            run_interaction_constraints(\n                tree_method=\"exact\", device=\"cpu\", interaction_constraints=constraints\n            )\n\n        with pytest.raises(ValueError):\n            constraints = [(\"feature_0\", \"feature_3\")]\n            feature_names = [\"feature_0\", \"feature_1\", \"feature_2\"]\n            run_interaction_constraints(\n                tree_method=\"exact\",\n                device=\"cpu\",\n                feature_names=feature_names,\n                interaction_constraints=constraints,\n            )\n\n        constraints = [(\"feature_0\", \"feature_1\")]\n        feature_names = [\"feature_0\", \"feature_1\", \"feature_2\"]\n        run_interaction_constraints(\n            tree_method=\"exact\",\n            device=\"cpu\",\n            feature_names=feature_names,\n            interaction_constraints=constraints,\n        )\n\n        constraints_lst = [[\"feature_0\", \"feature_1\"], [\"feature_2\"]]\n        feature_names = [\"feature_0\", \"feature_1\", \"feature_2\"]\n        run_interaction_constraints(\n            tree_method=\"exact\",\n            device=\"cpu\",\n            feature_names=feature_names,\n            interaction_constraints=constraints_lst,\n        )\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    @pytest.mark.parametrize(\"tree_method\", [\"hist\", \"approx\", \"exact\"])\n    def test_hist_training_accuracy(self, tree_method: str) -> None:\n        dpath = \"demo/data/\"\n        training_accuracy(tree_method=tree_method, dpath=dpath, device=\"cpu\")\n"
  },
  {
    "path": "tests/python/test_intercept.py",
    "content": "from itertools import product\n\nimport pytest\n\nfrom xgboost.testing.intercept import (\n    run_adaptive,\n    run_exp_family,\n    run_init_estimation,\n    run_logistic_degenerate,\n)\n\n\ndef test_init_estimation() -> None:\n    run_init_estimation(\"hist\", \"cpu\")\n\n\n@pytest.mark.parametrize(\n    \"tree_method,weighted\", list(product([\"approx\", \"hist\"], [True, False]))\n)\ndef test_adaptive(tree_method: str, weighted: bool) -> None:\n    run_adaptive(tree_method, weighted, \"cpu\")\n\n\ndef test_exp_family() -> None:\n    run_exp_family(\"cpu\")\n\n\ndef test_logistic_degenerate() -> None:\n    run_logistic_degenerate(\"cpu\")\n"
  },
  {
    "path": "tests/python/test_linear.py",
    "content": "from typing import Dict\n\nfrom hypothesis import given, note, settings, strategies\n\nimport xgboost as xgb\nfrom xgboost import testing as tm\n\npytestmark = tm.timeout(20)\n\n\nparameter_strategy = strategies.fixed_dictionaries({\n    'booster': strategies.just('gblinear'),\n    'eta': strategies.floats(0.01, 0.25),\n    'tolerance': strategies.floats(1e-5, 1e-2),\n    'nthread': strategies.integers(1, 4),\n})\n\ncoord_strategy = strategies.fixed_dictionaries({\n    'feature_selector': strategies.sampled_from(['cyclic', 'shuffle',\n                                                 'greedy', 'thrifty']),\n    'top_k': strategies.integers(1, 10),\n})\n\n\ndef train_result(param: dict, dmat: xgb.DMatrix, num_rounds: int) -> Dict[str, Dict]:\n    result: Dict[str, Dict] = {}\n    xgb.train(\n        param,\n        dmat,\n        num_rounds,\n        evals=[(dmat, \"train\")],\n        verbose_eval=False,\n        evals_result=result,\n    )\n    return result\n\n\nclass TestLinear:\n    @given(\n        parameter_strategy,\n        strategies.integers(10, 50),\n        tm.make_dataset_strategy(),\n        coord_strategy\n    )\n    @settings(deadline=None, max_examples=20, print_blob=True)\n    def test_coordinate(self, param, num_rounds, dataset, coord_param):\n        param['updater'] = 'coord_descent'\n        param.update(coord_param)\n        param = dataset.set_params(param)\n        result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric]\n        note(result)\n        assert tm.non_increasing(result, 5e-4)\n\n    # Loss is not guaranteed to always decrease because of regularisation parameters\n    # We test a weaker condition that the loss has not increased between the first and last\n    # iteration\n    @given(\n        parameter_strategy,\n        strategies.integers(10, 50),\n        tm.make_dataset_strategy(),\n        coord_strategy,\n        strategies.floats(1e-5, 0.8),\n        strategies.floats(1e-5, 0.8)\n    )\n    @settings(deadline=None, max_examples=20, print_blob=True)\n    def test_coordinate_regularised(self, param, num_rounds, dataset, coord_param, alpha, lambd):\n        param['updater'] = 'coord_descent'\n        param['alpha'] = alpha\n        param['lambda'] = lambd\n        param.update(coord_param)\n        param = dataset.set_params(param)\n        result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric]\n        note(result)\n        assert tm.non_increasing([result[0], result[-1]])\n\n    @given(\n        parameter_strategy, strategies.integers(10, 50), tm.make_dataset_strategy()\n    )\n    @settings(deadline=None, max_examples=20, print_blob=True)\n    def test_shotgun(self, param, num_rounds, dataset):\n        param['updater'] = 'shotgun'\n        param = dataset.set_params(param)\n        result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric]\n        note(result)\n        # shotgun is non-deterministic, so we relax the test by only using first and last\n        # iteration.\n        if len(result) > 2:\n            sampled_result = (result[0], result[-1])\n        else:\n            sampled_result = result\n        assert tm.non_increasing(sampled_result)\n\n    @given(\n        parameter_strategy,\n        strategies.integers(10, 50),\n        tm.make_dataset_strategy(),\n        strategies.floats(1e-5, 1.0),\n        strategies.floats(1e-5, 1.0)\n    )\n    @settings(deadline=None, max_examples=20, print_blob=True)\n    def test_shotgun_regularised(self, param, num_rounds, dataset, alpha, lambd):\n        param['updater'] = 'shotgun'\n        param['alpha'] = alpha\n        param['lambda'] = lambd\n        param = dataset.set_params(param)\n        result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric]\n        note(result)\n        assert tm.non_increasing([result[0], result[-1]])\n"
  },
  {
    "path": "tests/python/test_model_compatibility.py",
    "content": "import copy\nimport hashlib\nimport json\nimport os\nimport urllib.request\nimport zipfile\nfrom typing import Any, Dict\n\nimport generate_models as gm\nimport pytest\n\nimport xgboost\nfrom xgboost import testing as tm\nfrom xgboost.testing.updater import get_basescore\n\n\ndef run_model_param_check(name: str, config: Dict[str, Any]) -> None:\n    assert config[\"learner\"][\"learner_model_param\"][\"num_feature\"] == str(4)\n    assert config[\"learner\"][\"learner_train_param\"][\"booster\"] == \"gbtree\"\n\n    booster = config[\"learner\"][\"gradient_booster\"]\n    assert booster[\"name\"] == \"gbtree\"\n    if name.find(\"1.0.0rc1\") != -1:\n        # There's no `num_parallel_tree` in the model parameter in 1.0 (it was a\n        # configuration instead of a model parameter).\n        return\n    assert booster[\"gbtree_model_param\"][\"num_parallel_tree\"] == str(gm.kForests)\n\n\ndef run_booster_check(booster: xgboost.Booster, name: str) -> None:\n    config = json.loads(booster.save_config())\n    run_model_param_check(name, config)\n    n_rounds = get_n_rounds(name)\n    if name.find(\"cls\") != -1:\n        assert len(booster.get_dump()) == gm.kForests * n_rounds * gm.kClasses\n        base_score = get_basescore(config)\n        assert isinstance(base_score, list)\n        assert all(v == 0.5 for v in base_score)\n        assert config[\"learner\"][\"learner_train_param\"][\"objective\"] == \"multi:softmax\"\n    elif name.find(\"logitraw\") != -1:\n        assert len(booster.get_dump()) == gm.kForests * n_rounds\n        assert config[\"learner\"][\"learner_model_param\"][\"num_class\"] == str(0)\n        assert (\n            config[\"learner\"][\"learner_train_param\"][\"objective\"] == \"binary:logitraw\"\n        )\n    elif name.find(\"logit\") != -1:\n        assert len(booster.get_dump()) == gm.kForests * n_rounds\n        assert config[\"learner\"][\"learner_model_param\"][\"num_class\"] == str(0)\n        assert (\n            config[\"learner\"][\"learner_train_param\"][\"objective\"] == \"binary:logistic\"\n        )\n    elif name.find(\"ltr\") != -1:\n        assert config[\"learner\"][\"learner_train_param\"][\"objective\"] == \"rank:ndcg\"\n    elif name.find(\"aft\") != -1:\n        assert config[\"learner\"][\"learner_train_param\"][\"objective\"] == \"survival:aft\"\n        assert (\n            config[\"learner\"][\"objective\"][\"aft_loss_param\"][\"aft_loss_distribution\"]\n            == \"normal\"\n        )\n    else:\n        assert name.find(\"reg\") != -1\n        assert len(booster.get_dump()) == gm.kForests * n_rounds\n        assert get_basescore(config) == [0.5]\n        assert (\n            config[\"learner\"][\"learner_train_param\"][\"objective\"] == \"reg:squarederror\"\n        )\n\n\ndef get_n_rounds(name: str) -> int:\n    if name.find(\"1.0.0rc1\") != -1:\n        n_rounds = 2\n    else:\n        n_rounds = gm.kRounds\n    return n_rounds\n\n\ndef run_scikit_model_check(name: str, path: str) -> None:\n    if name.find(\"reg\") != -1:\n        reg = xgboost.XGBRegressor()\n        reg.load_model(path)\n        config = json.loads(reg.get_booster().save_config())\n        assert (\n            config[\"learner\"][\"learner_train_param\"][\"objective\"] == \"reg:squarederror\"\n        )\n        assert len(reg.get_booster().get_dump()) == get_n_rounds(name) * gm.kForests\n        run_model_param_check(name, config)\n    elif name.find(\"cls\") != -1:\n        cls = xgboost.XGBClassifier()\n        cls.load_model(path)\n        n_rounds = get_n_rounds(name)\n        assert (\n            len(cls.get_booster().get_dump()) == n_rounds * gm.kForests * gm.kClasses\n        ), path\n        config = json.loads(cls.get_booster().save_config())\n        assert (\n            config[\"learner\"][\"learner_train_param\"][\"objective\"] == \"multi:softprob\"\n        ), path\n        run_model_param_check(name, config)\n    elif name.find(\"ltr\") != -1:\n        ltr = xgboost.XGBRanker()\n        ltr.load_model(path)\n        assert len(ltr.get_booster().get_dump()) == get_n_rounds(name) * gm.kForests\n        config = json.loads(ltr.get_booster().save_config())\n        assert config[\"learner\"][\"learner_train_param\"][\"objective\"] == \"rank:ndcg\"\n        run_model_param_check(name, config)\n    elif name.find(\"logitraw\") != -1:\n        logit = xgboost.XGBClassifier()\n        logit.load_model(path)\n        assert len(logit.get_booster().get_dump()) == get_n_rounds(name) * gm.kForests\n        config = json.loads(logit.get_booster().save_config())\n        assert (\n            config[\"learner\"][\"learner_train_param\"][\"objective\"] == \"binary:logitraw\"\n        )\n        run_model_param_check(name, config)\n    elif name.find(\"logit\") != -1:\n        logit = xgboost.XGBClassifier()\n        logit.load_model(path)\n        assert len(logit.get_booster().get_dump()) == get_n_rounds(name) * gm.kForests\n        config = json.loads(logit.get_booster().save_config())\n        assert (\n            config[\"learner\"][\"learner_train_param\"][\"objective\"] == \"binary:logistic\"\n        )\n        run_model_param_check(name, config)\n    else:\n        assert False\n\n\ndef download(path: str) -> None:\n    \"\"\"Download the model files from S3.\"\"\"\n    zip_path, _ = urllib.request.urlretrieve(\n        \"https://xgboost-ci-jenkins-artifacts.s3-us-west-2\"\n        + \".amazonaws.com/xgboost_model_compatibility_tests-3.0.2.zip\"\n    )\n    sha = \"49d4d4db667a73590099dad9dca4f078532df05c5ea6e035ad4fa09596b1905a\"\n    if hasattr(hashlib, \"file_digest\"):  # not in py 3.10\n        with open(zip_path, \"rb\") as fd:\n            digest = hashlib.file_digest(fd, \"sha256\")  # pylint: disable=attr-defined\n            assert digest.hexdigest() == sha\n    with zipfile.ZipFile(zip_path, \"r\") as z:\n        z.extractall(path)\n\n\n@pytest.mark.skipif(**tm.no_sklearn())\ndef test_model_compatibility() -> None:\n    \"\"\"Test model compatibility.\"\"\"\n    path = os.path.dirname(os.path.abspath(__file__))\n    path = os.path.join(path, \"models\")\n\n    if not os.path.exists(path):\n        download(path)\n\n    models = [\n        os.path.join(root, f) for root, subdir, files in os.walk(path) for f in files\n    ]\n    assert len(models) == 54\n\n    for path in models:\n        name = os.path.basename(path)\n        if name.startswith(\"xgboost-\"):\n            booster = xgboost.Booster(model_file=path)\n            run_booster_check(booster, name)\n            # Do full serialization.\n            booster = copy.copy(booster)\n            run_booster_check(booster, name)\n        elif name.startswith(\"xgboost_scikit\"):\n            run_scikit_model_check(name, path)\n        else:\n            assert False\n"
  },
  {
    "path": "tests/python/test_model_io.py",
    "content": "import json\nimport locale\nimport os\nimport pickle\nfrom pathlib import Path\nfrom typing import List\n\nimport numpy as np\nimport pytest\nimport xgboost as xgb\nfrom xgboost import testing as tm\n\n\ndef json_model(model_path: str, parameters: dict) -> dict:\n    datasets = pytest.importorskip(\"sklearn.datasets\")\n\n    X, y = datasets.make_classification(64, n_features=8, n_classes=3, n_informative=6)\n    if parameters.get(\"objective\", None) == \"multi:softmax\":\n        parameters[\"num_class\"] = 3\n\n    dm1 = xgb.DMatrix(X, y)\n\n    bst = xgb.train(parameters, dm1)\n    bst.save_model(model_path)\n\n    if model_path.endswith(\"ubj\"):\n        import ubjson\n\n        with open(model_path, \"rb\") as ubjfd:\n            model = ubjson.load(ubjfd)\n    else:\n        with open(model_path, \"r\") as fd:\n            model = json.load(fd)\n\n    return model\n\n\nclass TestBoosterIO:\n    def run_model_json_io(self, parameters: dict, ext: str) -> None:\n        config = xgb.config.get_config()\n        assert config[\"verbosity\"] == 1\n\n        if ext == \"ubj\" and tm.no_ubjson()[\"condition\"]:\n            pytest.skip(tm.no_ubjson()[\"reason\"])\n\n        loc = locale.getpreferredencoding(False)\n        model_path = \"test_model_json_io.\" + ext\n        j_model = json_model(model_path, parameters)\n        assert isinstance(j_model[\"learner\"], dict)\n\n        bst = xgb.Booster(model_file=model_path)\n\n        bst.save_model(fname=model_path)\n        if ext == \"ubj\":\n            import ubjson\n\n            with open(model_path, \"rb\") as ubjfd:\n                j_model = ubjson.load(ubjfd)\n        else:\n            with open(model_path, \"r\") as fd:\n                j_model = json.load(fd)\n\n        assert isinstance(j_model[\"learner\"], dict)\n\n        os.remove(model_path)\n        assert locale.getpreferredencoding(False) == loc\n\n        json_raw = bst.save_raw(raw_format=\"json\")\n        from_jraw = xgb.Booster()\n        from_jraw.load_model(json_raw)\n\n        ubj_raw = bst.save_raw(raw_format=\"ubj\")\n        from_ubjraw = xgb.Booster()\n        from_ubjraw.load_model(ubj_raw)\n\n        raw_json = bst.save_raw(raw_format=\"json\")\n        pretty = json.dumps(json.loads(raw_json), indent=2) + \"\\n\\n\"\n        bst.load_model(bytearray(pretty, encoding=\"ascii\"))\n\n        rng = np.random.default_rng()\n        X = rng.random(size=from_jraw.num_features() * 10).reshape(\n            (10, from_jraw.num_features())\n        )\n        predt_from_jraw = from_jraw.predict(xgb.DMatrix(X))\n        predt_from_bst = bst.predict(xgb.DMatrix(X))\n        np.testing.assert_allclose(predt_from_jraw, predt_from_bst)\n\n    @pytest.mark.parametrize(\"ext\", [\"json\", \"ubj\"])\n    def test_model_json_io(self, ext: str) -> None:\n        parameters = {\"booster\": \"gbtree\", \"tree_method\": \"hist\"}\n        self.run_model_json_io(parameters, ext)\n        parameters = {\n            \"booster\": \"gbtree\",\n            \"tree_method\": \"hist\",\n            \"multi_strategy\": \"multi_output_tree\",\n            \"objective\": \"multi:softmax\",\n        }\n        self.run_model_json_io(parameters, ext)\n        parameters = {\"booster\": \"gblinear\"}\n        self.run_model_json_io(parameters, ext)\n        parameters = {\"booster\": \"dart\", \"tree_method\": \"hist\"}\n        self.run_model_json_io(parameters, ext)\n\n    def test_categorical_model_io(self, tmp_path: Path) -> None:\n        X, y = tm.make_categorical(256, 16, 71, onehot=False)\n        Xy = xgb.DMatrix(X, y)\n        booster = xgb.train({\"tree_method\": \"approx\"}, Xy, num_boost_round=16)\n        predt_0 = booster.predict(Xy)\n\n        path = tmp_path / \"model.json\"\n        booster.save_model(path)\n        booster = xgb.Booster(model_file=path)\n        predt_1 = booster.predict(Xy)\n        np.testing.assert_allclose(predt_0, predt_1)\n\n        path = tmp_path / \"model.ubj\"\n        booster.save_model(path)\n        booster = xgb.Booster(model_file=path)\n        predt_1 = booster.predict(Xy)\n        np.testing.assert_allclose(predt_0, predt_1)\n\n    def test_with_pathlib(self, tmp_path: Path) -> None:\n        \"\"\"Saving and loading model files from paths.\"\"\"\n        save_path = tmp_path / \"model.ubj\"\n\n        rng = np.random.default_rng(1994)\n\n        data = rng.normal(size=(100, 2))\n        target = np.array([0, 1] * 50)\n        features = [\"Feature1\", \"Feature2\"]\n\n        dm = xgb.DMatrix(data, label=target, feature_names=features)\n        params = {\n            \"objective\": \"binary:logistic\",\n            \"eval_metric\": \"logloss\",\n            \"eta\": 0.3,\n            \"max_depth\": 1,\n        }\n\n        bst = xgb.train(params, dm, num_boost_round=1)\n\n        # save, assert exists\n        bst.save_model(save_path)\n        assert save_path.exists()\n\n        def dump_assertions(dump: List[str]) -> None:\n            \"\"\"Assertions for the expected dump from Booster\"\"\"\n            assert len(dump) == 1, \"Exepcted only 1 tree to be dumped.\"\n            assert len(dump[0].splitlines()) == 3, (\n                \"Expected 1 root and 2 leaves - 3 lines.\"\n            )\n\n        # load the model again using Path\n        bst2 = xgb.Booster(model_file=save_path)\n        dump2 = bst2.get_dump()\n        dump_assertions(dump2)\n\n        # load again using load_model\n        bst3 = xgb.Booster()\n        bst3.load_model(save_path)\n        dump3 = bst3.get_dump()\n        dump_assertions(dump3)\n\n    def test_invalid_postfix(self, tmp_path: Path) -> None:\n        \"\"\"Test mis-specified model format, no special hanlding is expected, the\n        JSON/UBJ parser can emit parsing errors.\n\n        \"\"\"\n        X, y, w = tm.make_regression(64, 16, False)\n        booster = xgb.train({}, xgb.QuantileDMatrix(X, y, weight=w), num_boost_round=3)\n\n        def rename(src: Path, dst: Path) -> None:\n            if dst.exists():\n                # Windows cannot overwrite an existing file.\n                dst.unlink()\n            src.rename(dst)\n\n        path_ubj = tmp_path / \"model.ubj\"\n        path_json = tmp_path / \"model.json\"\n\n        booster.save_model(path_ubj)\n        rename(path_ubj, path_json)\n\n        with pytest.raises(ValueError, match=\"{\"):\n            xgb.Booster(model_file=path_json)\n\n        booster.save_model(path_json)\n        rename(path_json, path_ubj)\n\n        with pytest.raises(ValueError, match=\"{\"):\n            xgb.Booster(model_file=path_ubj)\n\n        # save model without file extension\n        path_no = tmp_path / \"model\"\n        with pytest.warns(UserWarning, match=\"UBJSON\"):\n            booster.save_model(path_no)\n\n        with pytest.warns(UserWarning, match=\"Using UBJSON\"):\n            booster_1 = xgb.Booster(model_file=path_no)\n        r0 = booster.save_raw(raw_format=\"json\")\n        r1 = booster_1.save_raw(raw_format=\"json\")\n        assert r0 == r1\n\n        booster.save_model(path_json)\n        rename(path_json, path_no)\n        with pytest.warns(UserWarning, match=\"Using JSON\"):\n            xgb.Booster(model_file=path_no)\n\n    def test_invalid_format(self, tmp_path: Path) -> None:\n        X, y, w = tm.make_regression(64, 16, False)\n        booster = xgb.train({}, xgb.QuantileDMatrix(X, y, weight=w), num_boost_round=3)\n        with pytest.raises(ValueError, match=\"Unknown model format\"):\n            booster.save_raw(raw_format=\"deprecated\")\n\n        path = tmp_path / \"model.deprecated\"\n        with pytest.warns(UserWarning, match=\"Saving model in the UBJSON format\"):\n            booster.save_model(path)\n\n\ndef save_load_model(model_path: str) -> None:\n    from sklearn.datasets import load_digits\n    from sklearn.model_selection import KFold\n\n    rng = np.random.RandomState(1994)\n\n    digits = load_digits(n_class=2)\n    y = digits[\"target\"]\n    X = digits[\"data\"]\n    kf = KFold(n_splits=2, shuffle=True, random_state=rng)\n    for train_index, test_index in kf.split(X, y):\n        xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])\n        xgb_model.save_model(model_path)\n\n        xgb_model = xgb.XGBClassifier()\n        xgb_model.load_model(model_path)\n\n        assert isinstance(xgb_model.classes_, np.ndarray)\n        np.testing.assert_equal(xgb_model.classes_, np.array([0, 1]))\n        assert isinstance(xgb_model._Booster, xgb.Booster)\n\n        preds = xgb_model.predict(X[test_index])\n        labels = y[test_index]\n        err = sum(\n            1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]\n        ) / float(len(preds))\n        assert err < 0.1\n        assert xgb_model.get_booster().attr(\"scikit_learn\") is None\n\n        # test native booster\n        preds = xgb_model.predict(X[test_index], output_margin=True)\n        booster = xgb.Booster(model_file=model_path)\n        predt_1 = booster.predict(xgb.DMatrix(X[test_index]), output_margin=True)\n        assert np.allclose(preds, predt_1)\n\n        with pytest.raises(TypeError):\n            xgb_model = xgb.XGBModel()\n            xgb_model.load_model(model_path)\n\n    clf = xgb.XGBClassifier(booster=\"gblinear\", early_stopping_rounds=1)\n    clf.fit(X, y, eval_set=[(X, y)])\n    best_iteration = clf.best_iteration\n    best_score = clf.best_score\n    predt_0 = clf.predict(X)\n    clf.save_model(model_path)\n    clf.load_model(model_path)\n    assert clf.booster == \"gblinear\"\n    predt_1 = clf.predict(X)\n    np.testing.assert_allclose(predt_0, predt_1)\n    assert clf.best_iteration == best_iteration\n    assert clf.best_score == best_score\n\n    clfpkl = pickle.dumps(clf)\n    clf = pickle.loads(clfpkl)\n    predt_2 = clf.predict(X)\n    np.testing.assert_allclose(predt_0, predt_2)\n    assert clf.best_iteration == best_iteration\n    assert clf.best_score == best_score\n\n\n@pytest.mark.skipif(**tm.no_sklearn())\ndef test_sklearn_model(tmp_path: Path) -> None:\n    from sklearn.datasets import load_digits\n    from sklearn.model_selection import train_test_split\n\n    model_path = tmp_path / \"digits.model.json\"\n    save_load_model(str(model_path))\n\n    model_path = tmp_path / \"digits.model.ubj\"\n    digits = load_digits(n_class=2)\n    y = digits[\"target\"]\n    X = digits[\"data\"]\n    booster = xgb.train(\n        {\"tree_method\": \"hist\", \"objective\": \"binary:logistic\"},\n        dtrain=xgb.DMatrix(X, y),\n        num_boost_round=4,\n    )\n    predt_0 = booster.predict(xgb.DMatrix(X))\n    booster.save_model(model_path)\n    cls = xgb.XGBClassifier()\n    cls.load_model(model_path)\n\n    proba = cls.predict_proba(X)\n    assert proba.shape[0] == X.shape[0]\n    assert proba.shape[1] == 2  # binary\n\n    predt_1 = cls.predict_proba(X)[:, 1]\n    assert np.allclose(predt_0, predt_1)\n\n    cls = xgb.XGBModel()\n    cls.load_model(model_path)\n    predt_1 = cls.predict(X)\n    assert np.allclose(predt_0, predt_1)\n\n    # mclass\n    X, y = load_digits(n_class=10, return_X_y=True)\n    # small test_size to force early stop\n    X_train, X_test, y_train, y_test = train_test_split(\n        X, y, test_size=0.01, random_state=1\n    )\n    clf = xgb.XGBClassifier(\n        n_estimators=64, tree_method=\"hist\", early_stopping_rounds=2\n    )\n    clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])\n    score = clf.best_score\n    intercept = clf.intercept_\n    clf.save_model(model_path)\n\n    clf = xgb.XGBClassifier()\n    clf.load_model(model_path)\n    assert clf.classes_.size == 10\n    assert clf.objective == \"multi:softprob\"\n    np.testing.assert_allclose(intercept, clf.intercept_)\n\n    np.testing.assert_equal(clf.classes_, np.arange(10))\n    assert clf.n_classes_ == 10\n\n    assert clf.best_score == score\n\n\n@pytest.mark.skipif(**tm.no_sklearn())\ndef test_with_sklearn_obj_metric(tmp_path: Path) -> None:\n    from sklearn.metrics import mean_squared_error\n\n    X, y = tm.datasets.make_regression()\n    reg = xgb.XGBRegressor(objective=tm.ls_obj, eval_metric=mean_squared_error)\n    reg.fit(X, y)\n\n    pkl = pickle.dumps(reg)\n    reg_1 = pickle.loads(pkl)\n    assert callable(reg_1.objective)\n    assert callable(reg_1.eval_metric)\n\n    path = tmp_path / \"model.json\"\n    reg.save_model(path)\n\n    reg_2 = xgb.XGBRegressor()\n    reg_2.load_model(path)\n\n    assert not callable(reg_2.objective)\n    assert not callable(reg_2.eval_metric)\n    assert reg_2.eval_metric is None\n\n\n@pytest.mark.skipif(**tm.no_sklearn())\ndef test_attributes(tmp_path: Path) -> None:\n    from sklearn.datasets import load_iris\n\n    X, y = load_iris(return_X_y=True)\n    clf = xgb.XGBClassifier(n_estimators=2, early_stopping_rounds=1)\n    clf.fit(X, y, eval_set=[(X, y)])\n    best_iteration = clf.get_booster().best_iteration\n    assert best_iteration is not None\n    assert clf.n_estimators is not None\n    assert best_iteration == clf.n_estimators - 1\n\n    best_iteration = clf.best_iteration\n    assert best_iteration == clf.get_booster().best_iteration\n\n    clf.get_booster().set_attr(foo=\"bar\")\n\n    path = tmp_path / \"clf.json\"\n    clf.save_model(path)\n\n    clf = xgb.XGBClassifier(n_estimators=2)\n    clf.load_model(path)\n    assert clf.n_estimators is not None\n    assert clf.get_booster().best_iteration == clf.n_estimators - 1\n    assert clf.best_iteration == clf.get_booster().best_iteration\n\n    assert clf.get_booster().attributes()[\"foo\"] == \"bar\"\n"
  },
  {
    "path": "tests/python/test_monotone_constraints.py",
    "content": "from typing import Type\n\nimport numpy as np\nimport pytest\n\nimport xgboost as xgb\nfrom xgboost import testing as tm\nfrom xgboost.testing.monotone_constraints import training_dset, x, y\n\ndpath = \"demo/data/\"\n\n\ndef is_increasing(y):\n    return np.count_nonzero(np.diff(y) < 0.0) == 0\n\n\ndef is_decreasing(y):\n    return np.count_nonzero(np.diff(y) > 0.0) == 0\n\n\ndef is_correctly_constrained(learner, feature_names=None):\n    n = 100\n    variable_x = np.linspace(0, 1, n).reshape((n, 1))\n    fixed_xs_values = np.linspace(0, 1, n)\n\n    for i in range(n):\n        fixed_x = fixed_xs_values[i] * np.ones((n, 1))\n        monotonically_increasing_x = np.column_stack((variable_x, fixed_x))\n        monotonically_increasing_dset = xgb.DMatrix(\n            monotonically_increasing_x, feature_names=feature_names\n        )\n        monotonically_increasing_y = learner.predict(monotonically_increasing_dset)\n\n        monotonically_decreasing_x = np.column_stack((fixed_x, variable_x))\n        monotonically_decreasing_dset = xgb.DMatrix(\n            monotonically_decreasing_x, feature_names=feature_names\n        )\n        monotonically_decreasing_y = learner.predict(monotonically_decreasing_dset)\n\n        if not (\n            is_increasing(monotonically_increasing_y)\n            and is_decreasing(monotonically_decreasing_y)\n        ):\n            return False\n\n    return True\n\n\nclass TestMonotoneConstraints:\n    def test_monotone_constraints_for_exact_tree_method(self) -> None:\n\n        # first check monotonicity for the 'exact' tree method\n        params_for_constrained_exact_method = {\n            \"tree_method\": \"exact\",\n            \"verbosity\": 1,\n            \"monotone_constraints\": \"(1, -1)\",\n        }\n        constrained_exact_method = xgb.train(\n            params_for_constrained_exact_method, training_dset\n        )\n        assert is_correctly_constrained(constrained_exact_method)\n\n    @pytest.mark.parametrize(\n        \"tree_method,policy\",\n        [\n            (\"hist\", \"depthwise\"),\n            (\"approx\", \"depthwise\"),\n            (\"hist\", \"lossguide\"),\n            (\"approx\", \"lossguide\"),\n        ],\n    )\n    def test_monotone_constraints(self, tree_method: str, policy: str) -> None:\n        params_for_constrained = {\n            \"tree_method\": tree_method,\n            \"grow_policy\": policy,\n            \"monotone_constraints\": \"(1, -1)\",\n        }\n        constrained = xgb.train(params_for_constrained, training_dset)\n        assert is_correctly_constrained(constrained)\n\n    def test_monotone_constraints_tuple(self) -> None:\n        params_for_constrained = {\"monotone_constraints\": (1, -1)}\n        constrained = xgb.train(params_for_constrained, training_dset)\n        assert is_correctly_constrained(constrained)\n\n    @pytest.mark.parametrize(\"format\", [dict, list])\n    def test_monotone_constraints_feature_names(self, format: Type) -> None:\n\n        # next check monotonicity when initializing monotone_constraints by feature names\n        params = {\n            \"tree_method\": \"hist\",\n            \"grow_policy\": \"lossguide\",\n            \"monotone_constraints\": {\"feature_0\": 1, \"feature_1\": -1},\n        }\n\n        if format == list:\n            params = list(params.items())\n\n        with pytest.raises(ValueError):\n            xgb.train(params, training_dset)\n\n        feature_names = [\"feature_0\", \"feature_2\"]\n        training_dset_w_feature_names = xgb.DMatrix(\n            x, label=y, feature_names=feature_names\n        )\n\n        with pytest.raises(ValueError):\n            xgb.train(params, training_dset_w_feature_names)\n\n        feature_names = [\"feature_0\", \"feature_1\"]\n        training_dset_w_feature_names = xgb.DMatrix(\n            x, label=y, feature_names=feature_names\n        )\n\n        constrained_learner = xgb.train(params, training_dset_w_feature_names)\n\n        assert is_correctly_constrained(constrained_learner, feature_names)\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    def test_training_accuracy(self) -> None:\n        from sklearn.metrics import accuracy_score\n\n        dtrain = xgb.DMatrix(dpath + \"agaricus.txt.train?indexing_mode=1&format=libsvm\")\n        dtest = xgb.DMatrix(dpath + \"agaricus.txt.test?indexing_mode=1&format=libsvm\")\n        params = {\n            \"eta\": 1,\n            \"max_depth\": 6,\n            \"objective\": \"binary:logistic\",\n            \"tree_method\": \"hist\",\n            \"monotone_constraints\": \"(1, 0)\",\n        }\n        num_boost_round = 5\n\n        params[\"grow_policy\"] = \"lossguide\"\n        bst = xgb.train(params, dtrain, num_boost_round)\n        pred_dtest = bst.predict(dtest) < 0.5\n        assert accuracy_score(dtest.get_label(), pred_dtest) < 0.1\n\n        params[\"grow_policy\"] = \"depthwise\"\n        bst = xgb.train(params, dtrain, num_boost_round)\n        pred_dtest = bst.predict(dtest) < 0.5\n        assert accuracy_score(dtest.get_label(), pred_dtest) < 0.1\n"
  },
  {
    "path": "tests/python/test_multi_target.py",
    "content": "\"\"\"Tests for the CPU implementation of multi-target.\"\"\"\n\n# pylint: disable=too-many-arguments,too-many-positional-arguments\n# pylint: disable=missing-function-docstring\nfrom typing import Any, Callable, Dict\n\nimport pytest\nfrom hypothesis import given, note, settings, strategies\nfrom xgboost import testing as tm\nfrom xgboost.testing.multi_target import (\n    all_reg_objectives,\n    run_absolute_error,\n    run_column_sampling,\n    run_eta,\n    run_feature_importance_strategy_compare,\n    run_gradient_based_sampling_accuracy,\n    run_grow_policy,\n    run_mixed_strategy,\n    run_multiclass,\n    run_multilabel,\n    run_quantile_loss,\n    run_reduced_grad,\n    run_subsample,\n    run_with_iter,\n)\nfrom xgboost.testing.params import (\n    exact_parameter_strategy,\n    hist_cache_strategy,\n    hist_multi_parameter_strategy,\n    hist_parameter_strategy,\n)\nfrom xgboost.testing.updater import check_quantile_loss_rf, train_result\nfrom xgboost.testing.utils import Device\n\n\n@pytest.mark.parametrize(\"multi_strategy\", [\"multi_output_tree\", \"one_output_per_tree\"])\ndef test_quantile_loss_rf(multi_strategy: str) -> None:\n    check_quantile_loss_rf(\"cpu\", \"hist\", multi_strategy)\n    if multi_strategy == \"one_output_per_tree\":\n        check_quantile_loss_rf(\"cpu\", \"approx\", multi_strategy)\n\n\nclass TestTreeMethodMulti:\n    \"\"\"Integration tests for tree methods.\"\"\"\n\n    @given(\n        exact_parameter_strategy, strategies.integers(1, 20), tm.multi_dataset_strategy\n    )\n    @settings(deadline=None, print_blob=True)\n    def test_exact(self, param: dict, num_rounds: int, dataset: tm.TestDataset) -> None:\n        if dataset.name.endswith(\"-l1\"):\n            return\n        param[\"tree_method\"] = \"exact\"\n        param = dataset.set_params(param)\n        result = train_result(param, dataset.get_dmat(), num_rounds)\n        assert tm.non_increasing(result[\"train\"][dataset.metric])\n\n    @given(\n        exact_parameter_strategy,\n        hist_parameter_strategy,\n        hist_cache_strategy,\n        strategies.integers(1, 20),\n        tm.multi_dataset_strategy,\n    )\n    @settings(deadline=None, print_blob=True)\n    def test_approx(\n        self,\n        param: Dict[str, Any],\n        hist_param: Dict[str, Any],\n        cache_param: Dict[str, Any],\n        num_rounds: int,\n        dataset: tm.TestDataset,\n    ) -> None:\n        param[\"tree_method\"] = \"approx\"\n        param = dataset.set_params(param)\n        param.update(hist_param)\n        param.update(cache_param)\n        result = train_result(param, dataset.get_dmat(), num_rounds)\n        note(str(result))\n        assert tm.non_increasing(result[\"train\"][dataset.metric])\n\n    @given(\n        exact_parameter_strategy,\n        hist_multi_parameter_strategy,\n        hist_cache_strategy,\n        strategies.integers(1, 20),\n        tm.multi_dataset_strategy,\n    )\n    @settings(deadline=None, print_blob=True)\n    def test_hist(\n        self,\n        param: Dict[str, Any],\n        hist_param: Dict[str, Any],\n        cache_param: Dict[str, Any],\n        num_rounds: int,\n        dataset: tm.TestDataset,\n    ) -> None:\n        if dataset.name.endswith(\"-l1\"):\n            return\n        param[\"tree_method\"] = \"hist\"\n        param = dataset.set_params(param)\n        param.update(hist_param)\n        param.update(cache_param)\n        result = train_result(param, dataset.get_dmat(), num_rounds)\n        note(str(result))\n        assert tm.non_increasing(result[\"train\"][dataset.metric])\n\n\ndef test_multiclass() -> None:\n    run_multiclass(\"cpu\", None)\n\n\ndef test_multilabel() -> None:\n    run_multilabel(\"cpu\", None)\n\n\n@pytest.mark.parametrize(\"weighted\", [True, False])\ndef test_quantile_loss(weighted: bool) -> None:\n    run_quantile_loss(\"cpu\", weighted)\n\n\ndef test_absolute_error() -> None:\n    run_absolute_error(\"cpu\")\n\n\ndef test_reduced_grad() -> None:\n    run_reduced_grad(\"cpu\")\n\n\ndef test_with_iter() -> None:\n    run_with_iter(\"cpu\")\n\n\ndef test_eta() -> None:\n    run_eta(\"cpu\")\n\n\ndef test_column_sampling() -> None:\n    run_column_sampling(\"cpu\")\n\n\n@pytest.mark.parametrize(\"grow_policy\", [\"depthwise\", \"lossguide\"])\ndef test_grow_policy(grow_policy: str) -> None:\n    run_grow_policy(\"cpu\", grow_policy)\n\n\ndef test_mixed_strategy() -> None:\n    run_mixed_strategy(\"cpu\")\n\n\ndef test_feature_importance_strategy_compare() -> None:\n    run_feature_importance_strategy_compare(\"cpu\")\n\n\n@pytest.mark.parametrize(\"obj_fn\", all_reg_objectives())\ndef test_reg_objective(obj_fn: Callable[[Device], None]) -> None:\n    obj_fn(\"cpu\")\n\n\n@pytest.mark.parametrize(\"sampling_method\", [\"uniform\", \"gradient_based\"])\ndef test_subsample(sampling_method: str) -> None:\n    run_subsample(\"cpu\", sampling_method)\n\n\ndef test_gradient_based_sampling_accuracy() -> None:\n    run_gradient_based_sampling_accuracy(\"cpu\")\n"
  },
  {
    "path": "tests/python/test_openmp.py",
    "content": "import os\nimport subprocess\nfrom pathlib import Path\n\nimport numpy as np\nimport pytest\nimport xgboost as xgb\nfrom xgboost import testing as tm\n\npytestmark = tm.timeout(10)\n\n\nclass TestOMP:\n    def test_omp(self):\n        dtrain, dtest = tm.load_agaricus(__file__)\n\n        param = {\n            \"booster\": \"gbtree\",\n            \"objective\": \"binary:logistic\",\n            \"grow_policy\": \"depthwise\",\n            \"tree_method\": \"hist\",\n            \"eval_metric\": \"error\",\n            \"max_depth\": 5,\n            \"min_child_weight\": 0,\n        }\n\n        watchlist = [(dtest, \"eval\"), (dtrain, \"train\")]\n        num_round = 5\n\n        def run_trial():\n            res = {}\n            bst = xgb.train(param, dtrain, num_round, watchlist, evals_result=res)\n            metrics = [res[\"train\"][\"error\"][-1], res[\"eval\"][\"error\"][-1]]\n            preds = bst.predict(dtest)\n            return metrics, preds\n\n        def consist_test(title, n):\n            auc, pred = run_trial()\n            for i in range(n - 1):\n                auc2, pred2 = run_trial()\n                try:\n                    assert auc == auc2\n                    assert np.array_equal(pred, pred2)\n                except Exception as e:\n                    print(\"-------test %s failed, num_trial: %d-------\" % (title, i))\n                    raise e\n                auc, pred = auc2, pred2\n            return auc, pred\n\n        print(\"test approx ...\")\n        param[\"tree_method\"] = \"approx\"\n\n        n_trials = 10\n        param[\"nthread\"] = 1\n        auc_1, pred_1 = consist_test(\"approx_thread_1\", n_trials)\n\n        param[\"nthread\"] = 2\n        auc_2, pred_2 = consist_test(\"approx_thread_2\", n_trials)\n\n        param[\"nthread\"] = 3\n        auc_3, pred_3 = consist_test(\"approx_thread_3\", n_trials)\n\n        assert auc_1 == auc_2 == auc_3\n        assert np.array_equal(auc_1, auc_2)\n        assert np.array_equal(auc_1, auc_3)\n\n        print(\"test hist ...\")\n        param[\"tree_method\"] = \"hist\"\n\n        param[\"nthread\"] = 1\n        auc_1, pred_1 = consist_test(\"hist_thread_1\", n_trials)\n\n        param[\"nthread\"] = 2\n        auc_2, pred_2 = consist_test(\"hist_thread_2\", n_trials)\n\n        param[\"nthread\"] = 3\n        auc_3, pred_3 = consist_test(\"hist_thread_3\", n_trials)\n\n        assert auc_1 == auc_2 == auc_3\n        assert np.array_equal(auc_1, auc_2)\n        assert np.array_equal(auc_1, auc_3)\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    @pytest.mark.timeout(30)\n    def test_with_omp_thread_limit(self, tmp_path: Path) -> None:\n        args = [\n            \"python\",\n            os.path.join(os.path.dirname(tm.normpath(__file__)), \"with_omp_limit.py\"),\n        ]\n        results = []\n        for i in (1, 2, 16):\n            path = tmp_path / str(i)\n            with open(path, \"w\") as fd:\n                fd.write(\"\\n\")\n            cp = args.copy()\n            cp.append(str(path))\n\n            env = os.environ.copy()\n            env[\"OMP_THREAD_LIMIT\"] = str(i)\n\n            status = subprocess.call(cp, env=env)\n            assert status == 0\n\n            with open(path, \"r\") as fd:\n                results.append(float(fd.read()))\n\n        for auc in results:\n            np.testing.assert_allclose(auc, results[0])\n"
  },
  {
    "path": "tests/python/test_ordinal.py",
    "content": "import pytest\n\nfrom xgboost import testing as tm\nfrom xgboost.testing.ordinal import (\n    run_cat_container,\n    run_cat_container_iter,\n    run_cat_container_mixed,\n    run_cat_invalid,\n    run_cat_leaf,\n    run_cat_predict,\n    run_cat_shap,\n    run_cat_thread_safety,\n    run_recode_dmatrix,\n    run_recode_dmatrix_predict,\n    run_specified_cat,\n    run_training_continuation,\n    run_update,\n    run_validation,\n)\n\npytestmark = pytest.mark.skipif(**tm.no_multiple(tm.no_arrow(), tm.no_pandas()))\n\n\ndef test_cat_container() -> None:\n    run_cat_container(\"cpu\")\n\n\ndef test_cat_container_mixed() -> None:\n    run_cat_container_mixed(\"cpu\")\n\n\ndef test_cat_container_iter() -> None:\n    run_cat_container_iter(\"cpu\")\n\n\ndef test_cat_predict() -> None:\n    run_cat_predict(\"cpu\")\n\n\ndef test_cat_invalid() -> None:\n    run_cat_invalid(\"cpu\")\n\n\ndef test_cat_thread_safety() -> None:\n    run_cat_thread_safety(\"cpu\")\n\n\ndef test_cat_shap() -> None:\n    run_cat_shap(\"cpu\")\n\n\ndef test_cat_leaf() -> None:\n    run_cat_leaf(\"cpu\")\n\n\ndef test_specified_cat() -> None:\n    run_specified_cat(\"cpu\")\n\n\ndef test_validation() -> None:\n    run_validation(\"cpu\")\n\n\ndef test_recode_dmatrix() -> None:\n    run_recode_dmatrix(\"cpu\")\n\n\ndef test_training_continuation() -> None:\n    run_training_continuation(\"cpu\")\n\n\ndef test_update() -> None:\n    run_update(\"cpu\")\n\n\ndef test_recode_dmatrix_predict() -> None:\n    run_recode_dmatrix_predict(\"cpu\")\n"
  },
  {
    "path": "tests/python/test_parse_tree.py",
    "content": "import numpy as np\nimport pytest\nimport xgboost as xgb\nfrom xgboost import testing as tm\nfrom xgboost.testing.parse_tree import (\n    run_split_value_histograms,\n    run_tree_to_df_categorical,\n)\n\npytestmark = pytest.mark.skipif(**tm.no_pandas())\n\n\ndpath = \"demo/data/\"\nrng = np.random.RandomState(1994)\n\n\nclass TestTreesToDataFrame:\n    def build_model(self, max_depth, num_round):\n        dtrain, _ = tm.load_agaricus(__file__)\n        param = {\"max_depth\": max_depth, \"objective\": \"binary:logistic\", \"verbosity\": 1}\n        num_round = num_round\n        bst = xgb.train(param, dtrain, num_round)\n        return bst\n\n    def parse_dumped_model(self, booster, item_to_get, splitter):\n        item_to_get += \"=\"\n        txt_dump = booster.get_dump(with_stats=True)\n        tree_list = [tree.split(\"/n\") for tree in txt_dump]\n        split_trees = [tree[0].split(item_to_get)[1:] for tree in tree_list]\n        res = sum(\n            [float(line.split(splitter)[0]) for tree in split_trees for line in tree]\n        )\n        return res\n\n    def test_trees_to_dataframe(self):\n        bst = self.build_model(max_depth=5, num_round=10)\n        gain_from_dump = self.parse_dumped_model(\n            booster=bst, item_to_get=\"gain\", splitter=\",\"\n        )\n        cover_from_dump = self.parse_dumped_model(\n            booster=bst, item_to_get=\"cover\", splitter=\"\\n\"\n        )\n        # method being tested\n        df = bst.trees_to_dataframe()\n\n        # test for equality of gains\n        gain_from_df = df[df.Feature != \"Leaf\"][[\"Gain\"]].sum()\n        assert np.allclose(gain_from_dump, gain_from_df)\n\n        # test for equality of covers\n        cover_from_df = df.Cover.sum()\n        assert np.allclose(cover_from_dump, cover_from_df)\n\n    def test_tree_to_df_categorical(self) -> None:\n        run_tree_to_df_categorical(\"approx\", \"cpu\")\n\n    def test_tree_to_df_indicator(self, tmp_path) -> None:\n        \"\"\"Test trees_to_dataframe with indicator (boolean) features.\"\"\"\n        n_samples = 200\n        n_features = 5\n        X_int = rng.randint(0, 2, size=(n_samples, n_features))\n        y = np.logical_xor(X_int[:, 0], X_int[:, 1]).astype(np.float32)\n        X = X_int.astype(np.float32)\n        dtrain = xgb.DMatrix(X, label=y)\n\n        # Create a feature map with indicator type 'i'\n        fmap_path = str(tmp_path / \"fmap.txt\")\n        with open(fmap_path, \"w\", encoding=\"utf-8\") as f:\n            for i in range(n_features):\n                f.write(f\"{i}\\tf{i}\\ti\\n\")\n\n        bst = xgb.train(\n            {\"max_depth\": 3, \"objective\": \"binary:logistic\", \"verbosity\": 0},\n            dtrain,\n            num_boost_round=5,\n        )\n        df = bst.trees_to_dataframe(fmap=fmap_path)\n\n        # Basic structure checks\n        assert \"Tree\" in df.columns\n        assert \"Feature\" in df.columns\n        assert \"Gain\" in df.columns\n        assert \"Cover\" in df.columns\n        assert len(df) > 0\n\n        # Indicator nodes should have NaN splits; missing defaults to no-direction\n        non_leaf = df[df.Feature != \"Leaf\"]\n        assert len(non_leaf) > 0\n        assert non_leaf[\"Split\"].isna().all()\n        assert (non_leaf[\"Missing\"] == non_leaf[\"No\"]).all()\n\n    def test_split_value_histograms(self):\n        run_split_value_histograms(\"approx\", \"cpu\")\n"
  },
  {
    "path": "tests/python/test_pickling.py",
    "content": "import json\nimport os\nimport pickle\n\nimport numpy as np\n\nimport xgboost as xgb\n\nkRows = 100\nkCols = 10\n\n\ndef generate_data():\n    X = np.random.randn(kRows, kCols)\n    y = np.random.randn(kRows)\n    return X, y\n\n\nclass TestPickling:\n    def run_model_pickling(self, xgb_params) -> str:\n        X, y = generate_data()\n        dtrain = xgb.DMatrix(X, y)\n        bst = xgb.train(xgb_params, dtrain)\n\n        dump_0 = bst.get_dump(dump_format=\"json\")\n        assert dump_0\n        config_0 = bst.save_config()\n\n        filename = \"model.pkl\"\n\n        with open(filename, \"wb\") as fd:\n            pickle.dump(bst, fd)\n\n        with open(filename, \"rb\") as fd:\n            bst = pickle.load(fd)\n\n        with open(filename, \"wb\") as fd:\n            pickle.dump(bst, fd)\n\n        with open(filename, \"rb\") as fd:\n            bst = pickle.load(fd)\n\n        assert bst.get_dump(dump_format=\"json\") == dump_0\n\n        if os.path.exists(filename):\n            os.remove(filename)\n\n        config_1 = bst.save_config()\n        assert config_0 == config_1\n        return json.loads(config_0)\n\n    def test_model_pickling_json(self):\n        def check(config):\n            tree_param = config[\"learner\"][\"gradient_booster\"][\"tree_train_param\"]\n            subsample = tree_param[\"subsample\"]\n            assert float(subsample) == 0.5\n\n        params = {\"nthread\": 8, \"tree_method\": \"hist\", \"subsample\": 0.5}\n        config = self.run_model_pickling(params)\n        check(config)\n        params = {\"nthread\": 8, \"tree_method\": \"exact\", \"subsample\": 0.5}\n        config = self.run_model_pickling(params)\n        check(config)\n"
  },
  {
    "path": "tests/python/test_plotting.py",
    "content": "import numpy as np\nimport pytest\n\nimport xgboost as xgb\nfrom xgboost import testing as tm\nfrom xgboost.testing.plotting import run_categorical\n\ntry:\n    import matplotlib\n\n    matplotlib.use(\"Agg\")\n    from graphviz import Source\n    from matplotlib.axes import Axes\nexcept ImportError:\n    pass\n\npytestmark = pytest.mark.skipif(**tm.no_multiple(tm.no_matplotlib(), tm.no_graphviz()))\n\n\nclass TestPlotting:\n    def test_plotting(self):\n        m, _ = tm.load_agaricus(__file__)\n        booster = xgb.train(\n            {\"max_depth\": 2, \"eta\": 1, \"objective\": \"binary:logistic\"},\n            m,\n            num_boost_round=2,\n        )\n\n        ax = xgb.plot_importance(booster)\n        assert isinstance(ax, Axes)\n        assert ax.get_title() == \"Feature importance\"\n        assert ax.get_xlabel() == \"Importance score\"\n        assert ax.get_ylabel() == \"Features\"\n        assert len(ax.patches) == 4\n\n        ax = xgb.plot_importance(booster, color=\"r\", title=\"t\", xlabel=\"x\", ylabel=\"y\")\n        assert isinstance(ax, Axes)\n        assert ax.get_title() == \"t\"\n        assert ax.get_xlabel() == \"x\"\n        assert ax.get_ylabel() == \"y\"\n        assert len(ax.patches) == 4\n        for p in ax.patches:\n            assert p.get_facecolor() == (1.0, 0, 0, 1.0)  # red\n\n        ax = xgb.plot_importance(\n            booster, color=[\"r\", \"r\", \"b\", \"b\"], title=None, xlabel=None, ylabel=None\n        )\n        assert isinstance(ax, Axes)\n        assert ax.get_title() == \"\"\n        assert ax.get_xlabel() == \"\"\n        assert ax.get_ylabel() == \"\"\n        assert len(ax.patches) == 4\n        assert ax.patches[0].get_facecolor() == (1.0, 0, 0, 1.0)  # red\n        assert ax.patches[1].get_facecolor() == (1.0, 0, 0, 1.0)  # red\n        assert ax.patches[2].get_facecolor() == (0, 0, 1.0, 1.0)  # blue\n        assert ax.patches[3].get_facecolor() == (0, 0, 1.0, 1.0)  # blue\n\n        g = xgb.to_graphviz(booster, tree_idx=0)\n        assert isinstance(g, Source)\n\n        ax = xgb.plot_tree(booster, tree_idx=0)\n        assert isinstance(ax, Axes)\n\n    def test_importance_plot_lim(self):\n        np.random.seed(1)\n        dm = xgb.DMatrix(np.random.randn(100, 100), label=[0, 1] * 50)\n        bst = xgb.train({}, dm)\n        assert len(bst.get_fscore()) == 71\n        ax = xgb.plot_importance(bst)\n        assert ax.get_xlim() == (0.0, 11.0)\n        assert ax.get_ylim() == (-1.0, 71.0)\n\n        ax = xgb.plot_importance(bst, xlim=(0, 5), ylim=(10, 71))\n        assert ax.get_xlim() == (0.0, 5.0)\n        assert ax.get_ylim() == (10.0, 71.0)\n\n    @pytest.mark.skipif(**tm.no_pandas())\n    def test_categorical(self) -> None:\n        run_categorical(\"approx\", \"cpu\")\n"
  },
  {
    "path": "tests/python/test_predict.py",
    "content": "\"\"\"Tests for running inplace prediction.\"\"\"\n\nfrom concurrent.futures import ThreadPoolExecutor\nfrom typing import List, Type, Union\n\nimport numpy as np\nimport pandas as pd\nimport pytest\nimport xgboost as xgb\nfrom scipy import sparse\nfrom xgboost import testing as tm\nfrom xgboost.testing.data import get_california_housing, np_dtypes, pd_dtypes\nfrom xgboost.testing.predict import run_base_margin_vs_base_score, run_predict_leaf\n\n\ndef run_threaded_predict(X, rows, predict_func):\n    results = []\n    per_thread = 20\n    with ThreadPoolExecutor(max_workers=10) as e:\n        for i in range(0, rows, int(rows / per_thread)):\n            if hasattr(X, \"iloc\"):\n                predictor = X.iloc[i : i + per_thread, :]\n            else:\n                predictor = X[i : i + per_thread, ...]\n            f = e.submit(predict_func, predictor)\n            results.append(f)\n\n    for f in results:\n        assert f.result()\n\n\n@pytest.mark.parametrize(\"DMatrixT\", [xgb.DMatrix, xgb.QuantileDMatrix])\ndef test_predict_leaf(DMatrixT: Type[xgb.DMatrix]) -> None:\n    run_predict_leaf(\"cpu\", DMatrixT)\n\n\ndef test_predict_shape():\n    X, y = get_california_housing()\n    reg = xgb.XGBRegressor(n_estimators=1)\n    reg.fit(X, y)\n    predt = reg.get_booster().predict(xgb.DMatrix(X), strict_shape=True)\n    assert len(predt.shape) == 2\n    assert predt.shape[0] == X.shape[0]\n    assert predt.shape[1] == 1\n\n    contrib = reg.get_booster().predict(\n        xgb.DMatrix(X), pred_contribs=True, strict_shape=True\n    )\n    assert len(contrib.shape) == 3\n    assert contrib.shape[1] == 1\n\n    contrib = reg.get_booster().predict(\n        xgb.DMatrix(X), pred_contribs=True, approx_contribs=True\n    )\n    assert len(contrib.shape) == 2\n    assert contrib.shape[1] == X.shape[1] + 1\n\n    interaction = reg.get_booster().predict(\n        xgb.DMatrix(X), pred_interactions=True, approx_contribs=True\n    )\n    assert len(interaction.shape) == 3\n    assert interaction.shape[1] == X.shape[1] + 1\n    assert interaction.shape[2] == X.shape[1] + 1\n\n    interaction = reg.get_booster().predict(\n        xgb.DMatrix(X), pred_interactions=True, approx_contribs=True, strict_shape=True\n    )\n    assert len(interaction.shape) == 4\n    assert interaction.shape[1] == 1\n    assert interaction.shape[2] == X.shape[1] + 1\n    assert interaction.shape[3] == X.shape[1] + 1\n\n\ndef test_base_margin_vs_base_score() -> None:\n    run_base_margin_vs_base_score(\"cpu\")\n\n\nclass TestInplacePredict:\n    \"\"\"Tests for running inplace prediction\"\"\"\n\n    @classmethod\n    def setup_class(cls):\n        cls.rows = 1000\n        cls.cols = 10\n\n        cls.missing = 11  # set to integer for testing\n\n        cls.rng = np.random.RandomState(1994)\n\n        cls.X = cls.rng.randn(cls.rows, cls.cols)\n        missing_idx = [i for i in range(0, cls.cols, 4)]\n        cls.X[:, missing_idx] = cls.missing  # set to be missing\n\n        cls.y = cls.rng.randn(cls.rows)\n\n        dtrain = xgb.DMatrix(cls.X, cls.y)\n        cls.test = xgb.DMatrix(cls.X[:10, ...], missing=cls.missing)\n\n        cls.num_boost_round = 10\n        cls.booster = xgb.train({\"tree_method\": \"hist\"}, dtrain, num_boost_round=10)\n\n    def test_predict(self):\n        booster = self.booster\n        X = self.X\n        test = self.test\n\n        predt_from_array = booster.inplace_predict(X[:10, ...], missing=self.missing)\n        predt_from_dmatrix = booster.predict(test)\n\n        X_obj = X.copy().astype(object)\n\n        assert X_obj.dtype.hasobject is True\n        assert X.dtype.hasobject is False\n        np.testing.assert_allclose(\n            booster.inplace_predict(X_obj), booster.inplace_predict(X)\n        )\n\n        np.testing.assert_allclose(predt_from_dmatrix, predt_from_array)\n\n        predt_from_array = booster.inplace_predict(\n            X[:10, ...], iteration_range=(0, 4), missing=self.missing\n        )\n        predt_from_dmatrix = booster.predict(test, iteration_range=(0, 4))\n\n        np.testing.assert_allclose(predt_from_dmatrix, predt_from_array)\n\n        with pytest.raises(ValueError):\n            booster.predict(test, iteration_range=(0, booster.num_boosted_rounds() + 2))\n\n        default = booster.predict(test)\n\n        range_full = booster.predict(test, iteration_range=(0, self.num_boost_round))\n        np.testing.assert_allclose(range_full, default)\n\n        range_full = booster.predict(\n            test, iteration_range=(0, booster.num_boosted_rounds())\n        )\n        np.testing.assert_allclose(range_full, default)\n\n        def predict_dense(x):\n            inplace_predt = booster.inplace_predict(x)\n            d = xgb.DMatrix(x)\n            copied_predt = booster.predict(d)\n            return np.all(copied_predt == inplace_predt)\n\n        for i in range(10):\n            run_threaded_predict(X, self.rows, predict_dense)\n\n        def predict_csr(x):\n            inplace_predt = booster.inplace_predict(sparse.csr_matrix(x))\n            d = xgb.DMatrix(x)\n            copied_predt = booster.predict(d)\n            return np.all(copied_predt == inplace_predt)\n\n        for i in range(10):\n            run_threaded_predict(X, self.rows, predict_csr)\n\n    @pytest.mark.skipif(**tm.no_pandas())\n    def test_predict_pd(self):\n        X = self.X\n        # construct it in column major style\n        df = pd.DataFrame({str(i): X[:, i] for i in range(X.shape[1])})\n        booster = self.booster\n        df_predt = booster.inplace_predict(df)\n        arr_predt = booster.inplace_predict(X)\n        dmat_predt = booster.predict(xgb.DMatrix(X))\n\n        X = df.values\n        X = np.asfortranarray(X)\n        fort_predt = booster.inplace_predict(X)\n\n        np.testing.assert_allclose(dmat_predt, arr_predt)\n        np.testing.assert_allclose(df_predt, arr_predt)\n        np.testing.assert_allclose(fort_predt, arr_predt)\n\n    def test_base_margin(self):\n        booster = self.booster\n\n        base_margin = self.rng.randn(self.rows)\n        from_inplace = booster.inplace_predict(data=self.X, base_margin=base_margin)\n\n        dtrain = xgb.DMatrix(self.X, self.y, base_margin=base_margin)\n        from_dmatrix = booster.predict(dtrain)\n        np.testing.assert_allclose(from_dmatrix, from_inplace)\n\n    @pytest.mark.skipif(**tm.no_pandas())\n    def test_dtypes(self) -> None:\n        for orig, x in np_dtypes(self.rows, self.cols):\n            predt_orig = self.booster.inplace_predict(orig)\n            predt = self.booster.inplace_predict(x)\n            np.testing.assert_allclose(predt, predt_orig)\n\n        # unsupported types\n        for dtype in [\n            np.bytes_,\n            np.complex64,\n            np.complex128,\n        ]:\n            X: np.ndarray = np.array(orig, dtype=dtype)\n            with pytest.raises(ValueError):\n                self.booster.inplace_predict(X)\n\n    @pytest.mark.skipif(**tm.no_pandas())\n    def test_pd_dtypes(self) -> None:\n        import pandas as pd\n        from pandas.api.types import is_bool_dtype\n\n        for orig, x in pd_dtypes():\n            dtypes: Union[List, pd.Series] = (\n                orig.dtypes if isinstance(orig, pd.DataFrame) else [orig.dtypes]\n            )\n            if isinstance(orig, pd.DataFrame) and is_bool_dtype(dtypes.iloc[0]):\n                continue\n            y = np.arange(x.shape[0])\n            Xy = xgb.DMatrix(orig, y)\n            booster = xgb.train({\"tree_method\": \"hist\"}, Xy, num_boost_round=1)\n            predt_orig = booster.inplace_predict(orig)\n            predt = booster.inplace_predict(x)\n            np.testing.assert_allclose(predt, predt_orig)\n"
  },
  {
    "path": "tests/python/test_quantile_dmatrix.py",
    "content": "from typing import Any, Dict, List\n\nimport numpy as np\nimport pytest\nimport xgboost as xgb\nfrom hypothesis import given, settings, strategies\nfrom scipy import sparse\nfrom xgboost.testing import (\n    IteratorForTest,\n    make_batches,\n    make_batches_sparse,\n    make_categorical,\n    make_ltr,\n    make_sparse_regression,\n)\nfrom xgboost.testing.data import check_inf, np_dtypes\nfrom xgboost.testing.data_iter import run_mixed_sparsity\nfrom xgboost.testing.quantile_dmatrix import (\n    check_categorical_strings,\n    check_ref_quantile_cut,\n)\nfrom xgboost.testing.utils import predictor_equal\n\n\nclass TestQuantileDMatrix:\n    def test_basic(self) -> None:\n        \"\"\"Checks for np array, list, tuple.\"\"\"\n        n_samples = 234\n        n_features = 8\n\n        rng = np.random.default_rng()\n        X = rng.normal(loc=0, scale=3, size=n_samples * n_features).reshape(\n            n_samples, n_features\n        )\n        y = rng.normal(0, 3, size=n_samples)\n        Xy = xgb.QuantileDMatrix(X, y)\n        assert Xy.num_row() == n_samples\n        assert Xy.num_col() == n_features\n\n        X = sparse.random(n_samples, n_features, density=0.1, format=\"csr\")\n        Xy = xgb.QuantileDMatrix(X, y)\n        assert Xy.num_row() == n_samples\n        assert Xy.num_col() == n_features\n\n        X = sparse.random(n_samples, n_features, density=0.8, format=\"csr\")\n        Xy = xgb.QuantileDMatrix(X, y)\n        assert Xy.num_row() == n_samples\n        assert Xy.num_col() == n_features\n\n        n_samples = 64\n        data = []\n        for f in range(n_samples):\n            row = [f] * n_features\n            data.append(row)\n        assert np.array(data).shape == (n_samples, n_features)\n        Xy = xgb.QuantileDMatrix(data, max_bin=256)\n        assert Xy.num_row() == n_samples\n        assert Xy.num_col() == n_features\n        r = np.arange(1.0, n_samples)\n        np.testing.assert_allclose(Xy.get_data().toarray()[1:, 0], r)\n\n    def test_categorical_strings(self) -> None:\n        check_categorical_strings(\"cpu\")\n\n    def test_error(self):\n        from sklearn.model_selection import train_test_split\n\n        rng = np.random.default_rng(1994)\n        X, y = make_categorical(\n            n_samples=128, n_features=2, n_categories=3, onehot=False\n        )\n        reg = xgb.XGBRegressor(tree_method=\"hist\")\n        w = rng.uniform(0, 1, size=y.shape[0])\n\n        X_train, X_test, y_train, y_test, w_train, w_test = train_test_split(\n            X, y, w, random_state=1994\n        )\n\n        with pytest.raises(ValueError, match=\"sample weight\"):\n            reg.fit(\n                X,\n                y,\n                sample_weight=w_train,\n                eval_set=[(X_test, y_test)],\n                sample_weight_eval_set=[w_test],\n            )\n\n        with pytest.raises(ValueError, match=\"sample weight\"):\n            reg.fit(\n                X_train,\n                y_train,\n                sample_weight=w,\n                eval_set=[(X_test, y_test)],\n                sample_weight_eval_set=[w_test],\n            )\n\n    @pytest.mark.parametrize(\"sparsity\", [0.0, 0.1, 0.8, 0.9])\n    def test_with_iterator(self, sparsity: float) -> None:\n        n_samples_per_batch = 317\n        n_features = 8\n        n_batches = 7\n\n        if sparsity == 0.0:\n            it = IteratorForTest(\n                *make_batches(n_samples_per_batch, n_features, n_batches, False),\n                cache=None,\n            )\n        else:\n            it = IteratorForTest(\n                *make_batches_sparse(\n                    n_samples_per_batch, n_features, n_batches, sparsity\n                ),\n                cache=None,\n            )\n        Xy = xgb.QuantileDMatrix(it)\n        assert Xy.num_row() == n_samples_per_batch * n_batches\n        assert Xy.num_col() == n_features\n\n    def test_different_size(self) -> None:\n        n_samples_per_batch = 317\n        n_features = 8\n        n_batches = 7\n\n        it = IteratorForTest(\n            *make_batches(\n                n_samples_per_batch, n_features, n_batches, False, vary_size=True\n            ),\n            cache=None,\n        )\n        Xy = xgb.QuantileDMatrix(it)\n        assert Xy.num_row() == 2429\n        X, y, w = it.as_arrays()\n        Xy1 = xgb.QuantileDMatrix(X, y, weight=w)\n        assert predictor_equal(Xy, Xy1)\n\n    @pytest.mark.parametrize(\"sparsity\", [0.0, 0.1, 0.5, 0.8, 0.9])\n    def test_training(self, sparsity: float) -> None:\n        n_samples_per_batch = 317\n        n_features = 8\n        n_batches = 7\n        if sparsity == 0.0:\n            it = IteratorForTest(\n                *make_batches(n_samples_per_batch, n_features, n_batches, False),\n                cache=None,\n            )\n        else:\n            it = IteratorForTest(\n                *make_batches_sparse(\n                    n_samples_per_batch, n_features, n_batches, sparsity\n                ),\n                cache=None,\n            )\n\n        parameters = {\"tree_method\": \"hist\", \"max_bin\": 256}\n        Xy_it = xgb.QuantileDMatrix(it, max_bin=parameters[\"max_bin\"])\n        from_it = xgb.train(parameters, Xy_it)\n\n        X, y, w = it.as_arrays()\n        w_it = Xy_it.get_weight()\n        np.testing.assert_allclose(w_it, w)\n\n        Xy_arr = xgb.DMatrix(X, y, weight=w)\n        from_arr = xgb.train(parameters, Xy_arr)\n\n        np.testing.assert_allclose(from_arr.predict(Xy_it), from_it.predict(Xy_arr))\n\n        y -= y.min()\n        y += 0.01\n        Xy = xgb.QuantileDMatrix(X, y, weight=w)\n        with pytest.raises(ValueError, match=r\"Only.*hist.*\"):\n            parameters = {\n                \"tree_method\": \"approx\",\n                \"max_bin\": 256,\n                \"objective\": \"reg:gamma\",\n            }\n            xgb.train(parameters, Xy)\n\n    def run_ref_dmatrix(self, rng: Any, device: str, enable_cat: bool) -> None:\n        n_samples, n_features = 2048, 17\n        if enable_cat:\n            X, y = make_categorical(\n                n_samples, n_features, n_categories=13, onehot=False\n            )\n            if device == \"cuda\":\n                import cudf\n\n                X = cudf.from_pandas(X)\n                y = cudf.from_pandas(y)\n        else:\n            X = rng.normal(loc=0, scale=3, size=n_samples * n_features).reshape(\n                n_samples, n_features\n            )\n            y = rng.normal(0, 3, size=n_samples)\n\n        # Use ref\n        Xy = xgb.QuantileDMatrix(X, y, enable_categorical=enable_cat)\n        Xy_valid: xgb.DMatrix = xgb.QuantileDMatrix(\n            X, y, ref=Xy, enable_categorical=enable_cat\n        )\n        qdm_results: Dict[str, Dict[str, List[float]]] = {}\n        xgb.train(\n            {\"tree_method\": \"hist\", \"device\": device},\n            Xy,\n            evals=[(Xy, \"Train\"), (Xy_valid, \"valid\")],\n            evals_result=qdm_results,\n        )\n        np.testing.assert_allclose(\n            qdm_results[\"Train\"][\"rmse\"], qdm_results[\"valid\"][\"rmse\"]\n        )\n        # No ref\n        Xy_valid = xgb.DMatrix(X, y, enable_categorical=enable_cat)\n        qdm_results = {}\n        xgb.train(\n            {\"tree_method\": \"hist\", \"device\": device},\n            Xy,\n            evals=[(Xy, \"Train\"), (Xy_valid, \"valid\")],\n            evals_result=qdm_results,\n        )\n        np.testing.assert_allclose(\n            qdm_results[\"Train\"][\"rmse\"], qdm_results[\"valid\"][\"rmse\"]\n        )\n\n        # Different number of features\n        Xy = xgb.QuantileDMatrix(X, y, enable_categorical=enable_cat)\n        dXy = xgb.DMatrix(X, y, enable_categorical=enable_cat)\n\n        n_samples, n_features = 256, 15\n        X = rng.normal(loc=0, scale=3, size=n_samples * n_features).reshape(\n            n_samples, n_features\n        )\n        y = rng.normal(0, 3, size=n_samples)\n        with pytest.raises(ValueError, match=r\".*features\\.\"):\n            xgb.QuantileDMatrix(X, y, ref=Xy, enable_categorical=enable_cat)\n\n        # Compare training results\n        n_samples, n_features = 256, 17\n        if enable_cat:\n            X, y = make_categorical(n_samples, n_features, 13, onehot=False)\n            if device == \"cuda\":\n                import cudf\n\n                X = cudf.from_pandas(X)\n                y = cudf.from_pandas(y)\n        else:\n            X = rng.normal(loc=0, scale=3, size=n_samples * n_features).reshape(\n                n_samples, n_features\n            )\n            y = rng.normal(0, 3, size=n_samples)\n        Xy_valid = xgb.QuantileDMatrix(X, y, ref=Xy, enable_categorical=enable_cat)\n        # use DMatrix as ref\n        Xy_valid_d = xgb.QuantileDMatrix(X, y, ref=dXy, enable_categorical=enable_cat)\n        dXy_valid = xgb.DMatrix(X, y, enable_categorical=enable_cat)\n\n        qdm_results = {}\n        xgb.train(\n            {\"tree_method\": \"hist\", \"device\": device},\n            Xy,\n            evals=[(Xy, \"Train\"), (Xy_valid, \"valid\")],\n            evals_result=qdm_results,\n        )\n\n        dm_results: Dict[str, Dict[str, List[float]]] = {}\n        xgb.train(\n            {\"tree_method\": \"hist\", \"device\": device},\n            dXy,\n            evals=[(dXy, \"Train\"), (dXy_valid, \"valid\"), (Xy_valid_d, \"dvalid\")],\n            evals_result=dm_results,\n        )\n        np.testing.assert_allclose(\n            dm_results[\"Train\"][\"rmse\"], qdm_results[\"Train\"][\"rmse\"]\n        )\n        np.testing.assert_allclose(\n            dm_results[\"valid\"][\"rmse\"], qdm_results[\"valid\"][\"rmse\"]\n        )\n        np.testing.assert_allclose(\n            dm_results[\"dvalid\"][\"rmse\"], qdm_results[\"valid\"][\"rmse\"]\n        )\n\n        Xy_valid = xgb.QuantileDMatrix(X, y, enable_categorical=enable_cat)\n        with pytest.raises(ValueError, match=\"should be used as a reference\"):\n            xgb.train(\n                {\"device\": device}, dXy, evals=[(dXy, \"Train\"), (Xy_valid, \"Valid\")]\n            )\n\n    def test_ref_quantile_cut(self) -> None:\n        check_ref_quantile_cut(\"cpu\")\n\n    @pytest.mark.parametrize(\"enable_cat\", [True, False])\n    def test_ref_dmatrix(self, enable_cat: bool) -> None:\n        rng = np.random.RandomState(1994)\n        self.run_ref_dmatrix(rng, \"cpu\", enable_cat)\n\n    @pytest.mark.parametrize(\"sparsity\", [0.0, 0.5])\n    def test_predict(self, sparsity: float) -> None:\n        n_samples, n_features = 256, 4\n        X, y = make_categorical(\n            n_samples, n_features, n_categories=13, onehot=False, sparsity=sparsity\n        )\n        Xy = xgb.DMatrix(X, y)\n\n        booster = xgb.train({\"tree_method\": \"hist\"}, Xy)\n\n        Xy = xgb.DMatrix(X, y)\n        a = booster.predict(Xy)\n        qXy = xgb.QuantileDMatrix(X, y)\n        b = booster.predict(qXy)\n        np.testing.assert_allclose(a, b)\n\n    def test_ltr(self) -> None:\n        X, y, qid, w = make_ltr(100, 3, 3, 5)\n        Xy_qdm = xgb.QuantileDMatrix(X, y, qid=qid, weight=w)\n        Xy = xgb.DMatrix(X, y, qid=qid, weight=w)\n        xgb.train({\"tree_method\": \"hist\", \"objective\": \"rank:ndcg\"}, Xy)\n\n        from_qdm = xgb.QuantileDMatrix(X, weight=w, ref=Xy_qdm)\n        from_dm = xgb.QuantileDMatrix(X, weight=w, ref=Xy)\n        assert predictor_equal(from_qdm, from_dm)\n\n    def test_check_inf(self) -> None:\n        rng = np.random.default_rng(1994)\n        check_inf(rng)\n\n    # we don't test empty Quantile DMatrix in single node construction.\n    @given(\n        strategies.integers(1, 1000),\n        strategies.integers(1, 100),\n        strategies.fractions(0, 0.99),\n    )\n    @settings(deadline=None, print_blob=True)\n    def test_to_csr(self, n_samples: int, n_features: int, sparsity: float) -> None:\n        csr, y = make_sparse_regression(n_samples, n_features, sparsity, False)\n        csr = csr.astype(np.float32)\n        qdm = xgb.QuantileDMatrix(data=csr, label=y)\n        ret = qdm.get_data()\n        np.testing.assert_equal(csr.indptr, ret.indptr)\n        np.testing.assert_equal(csr.indices, ret.indices)\n\n        booster = xgb.train({\"tree_method\": \"hist\"}, dtrain=qdm)\n\n        np.testing.assert_allclose(\n            booster.predict(qdm), booster.predict(xgb.DMatrix(qdm.get_data()))\n        )\n\n    def test_dtypes(self) -> None:\n        \"\"\"Checks for both np array and pd DataFrame.\"\"\"\n        n_samples = 128\n        n_features = 16\n        for orig, x in np_dtypes(n_samples, n_features):\n            m0 = xgb.QuantileDMatrix(orig)\n            m1 = xgb.QuantileDMatrix(x)\n            assert predictor_equal(m0, m1)\n\n        # unsupported types\n        for dtype in [\n            np.bytes_,\n            np.complex64,\n            np.complex128,\n        ]:\n            X: np.ndarray = np.array(orig, dtype=dtype)\n            with pytest.raises(ValueError):\n                xgb.QuantileDMatrix(X)\n\n    def test_changed_max_bin(self) -> None:\n        n_samples = 128\n        n_features = 16\n        csr, y = make_sparse_regression(n_samples, n_features, 0.5, False)\n        Xy = xgb.QuantileDMatrix(csr, y, max_bin=9)\n        booster = xgb.train({\"max_bin\": 9}, Xy, num_boost_round=2)\n\n        Xy = xgb.QuantileDMatrix(csr, y, max_bin=11)\n\n        with pytest.raises(ValueError, match=\"consistent\"):\n            xgb.train({}, Xy, num_boost_round=2, xgb_model=booster)\n\n    def test_mixed_sparsity(self) -> None:\n        run_mixed_sparsity(\"cpu\")\n\n    def test_sparse_predict(self) -> None:\n        X, y = make_sparse_regression(512, 16, sparsity=0.9, as_dense=False)\n\n        Xy: xgb.DMatrix = xgb.QuantileDMatrix(X, y)\n        booster = xgb.train({}, Xy, num_boost_round=8)\n\n        p0 = booster.predict(Xy)\n        Xy = xgb.DMatrix(X, y)\n        p1 = booster.predict(Xy)\n        np.testing.assert_allclose(p0, p1)\n\n        X, y = make_categorical(128, 16, 5, onehot=False, sparsity=0.9)\n        Xy = xgb.QuantileDMatrix(X, y)\n        booster = xgb.train({}, Xy, num_boost_round=8)\n\n        p0 = booster.predict(Xy)\n        Xy = xgb.DMatrix(X, y)\n        p1 = booster.predict(Xy)\n        np.testing.assert_allclose(p0, p1)\n\n    def test_cv_error(self) -> None:\n        X, y = make_sparse_regression(8, 2, sparsity=0.2, as_dense=False)\n        Xy = xgb.QuantileDMatrix(X, y)\n        with pytest.raises(ValueError):\n            xgb.cv({}, Xy, 10, nfold=10, early_stopping_rounds=10)\n\n\ndef test_feature_types() -> None:\n    it = IteratorForTest(*make_batches(32, 8, 4, False), cache=None)\n    with pytest.raises(ValueError, match=\"specified as batch argument\"):\n        xgb.QuantileDMatrix(it, feature_types=[\"q\"] * 8)\n"
  },
  {
    "path": "tests/python/test_ranking.py",
    "content": "import itertools\nimport json\nimport os\nimport shutil\nfrom typing import Optional\n\nimport numpy as np\nimport pytest\nimport xgboost\nfrom hypothesis import given, note, settings\nfrom scipy.sparse import csr_matrix\nfrom xgboost import testing as tm\nfrom xgboost.testing.data import RelDataCV, simulate_clicks, sort_ltr_samples\nfrom xgboost.testing.params import lambdarank_parameter_strategy\nfrom xgboost.testing.ranking import run_normalization, run_score_normalization\n\n\ndef test_ndcg_custom_gain():\n    def ndcg_gain(y: np.ndarray) -> np.ndarray:\n        return np.exp2(y.astype(np.float64)) - 1.0\n\n    X, y, q, w = tm.make_ltr(n_samples=1024, n_features=4, n_query_groups=3, max_rel=3)\n    y_gain = ndcg_gain(y)\n\n    byxgb = xgboost.XGBRanker(tree_method=\"hist\", ndcg_exp_gain=True, n_estimators=10)\n    byxgb.fit(\n        X,\n        y,\n        qid=q,\n        sample_weight=w,\n        eval_set=[(X, y)],\n        eval_qid=(q,),\n        sample_weight_eval_set=(w,),\n        verbose=True,\n    )\n    byxgb_json = json.loads(byxgb.get_booster().save_raw(raw_format=\"json\"))\n\n    bynp = xgboost.XGBRanker(tree_method=\"hist\", ndcg_exp_gain=False, n_estimators=10)\n    bynp.fit(\n        X,\n        y_gain,\n        qid=q,\n        sample_weight=w,\n        eval_set=[(X, y_gain)],\n        eval_qid=(q,),\n        sample_weight_eval_set=(w,),\n        verbose=True,\n    )\n    bynp_json = json.loads(bynp.get_booster().save_raw(raw_format=\"json\"))\n\n    # Remove the difference in parameter for comparison\n    byxgb_json[\"learner\"][\"objective\"][\"lambdarank_param\"][\"ndcg_exp_gain\"] = \"0\"\n    assert byxgb.evals_result() == bynp.evals_result()\n    assert byxgb_json == bynp_json\n\n    # test pairwise can handle max_rel > 31, while ndcg metric is using custom gain\n    X, y, q, w = tm.make_ltr(n_samples=1024, n_features=4, n_query_groups=3, max_rel=33)\n    ranknet = xgboost.XGBRanker(\n        tree_method=\"hist\",\n        ndcg_exp_gain=False,\n        n_estimators=10,\n        objective=\"rank:pairwise\",\n    )\n    ranknet.fit(X, y, qid=q, eval_set=[(X, y)], eval_qid=[q])\n    history = ranknet.evals_result()\n    assert (\n        history[\"validation_0\"][\"ndcg@32\"][0] < history[\"validation_0\"][\"ndcg@32\"][-1]\n    )\n\n\ndef test_ndcg_non_exp() -> None:\n    # NDCG exp gain must have label smaller than 32\n    X, y, q, w = tm.make_ltr(n_samples=1024, n_features=4, n_query_groups=3, max_rel=44)\n\n    def fit(ltr: xgboost.XGBRanker):\n        ltr.fit(\n            X,\n            y,\n            qid=q,\n            sample_weight=w,\n            eval_set=[(X, y)],\n            eval_qid=(q,),\n            sample_weight_eval_set=(w,),\n        )\n\n    ltr = xgboost.XGBRanker(tree_method=\"hist\", ndcg_exp_gain=True, n_estimators=2)\n    with pytest.raises(ValueError, match=\"Set `ndcg_exp_gain`\"):\n        fit(ltr)\n    ltr = xgboost.XGBRanker(tree_method=\"hist\", ndcg_exp_gain=False, n_estimators=2)\n    fit(ltr)\n\n\ndef test_ranking_with_unweighted_data():\n    # fmt: off\n    Xrow = np.array([1, 2, 6, 8, 11, 14, 16, 17])\n    Xcol = np.array([0, 0, 1, 1, 2, 2, 3, 3])\n    y = np.array([\n        0.0, 1.0, 1.0, 0.0, 0.0,\n        0.0, 1.0, 0.0, 1.0, 0.0,\n        0.0, 1.0, 0.0, 0.0, 1.0,\n        0.0, 1.0, 1.0, 0.0, 0.0\n    ])\n    # fmt: on\n    X = csr_matrix((np.ones(shape=8), (Xrow, Xcol)), shape=(20, 4))\n\n    group = np.array([5, 5, 5, 5], dtype=np.uint)\n    dtrain = xgboost.DMatrix(X, label=y)\n    dtrain.set_group(group)\n\n    params = {\n        \"eta\": 1,\n        \"tree_method\": \"exact\",\n        \"objective\": \"rank:pairwise\",\n        \"eval_metric\": [\"auc\", \"aucpr\"],\n        \"max_depth\": 1,\n    }\n    evals_result = {}\n    bst = xgboost.train(\n        params, dtrain, 10, evals=[(dtrain, \"train\")], evals_result=evals_result\n    )\n    auc_rec = evals_result[\"train\"][\"auc\"]\n    assert all(p <= q for p, q in zip(auc_rec, auc_rec[1:]))\n    auc_rec = evals_result[\"train\"][\"aucpr\"]\n    assert all(p <= q for p, q in zip(auc_rec, auc_rec[1:]))\n\n\ndef test_ranking_with_weighted_data():\n    # fmt: off\n    Xrow = np.array([1, 2, 6, 8, 11, 14, 16, 17])\n    Xcol = np.array([0, 0, 1, 1, 2, 2, 3, 3])\n    y = np.array([\n        0.0, 1.0, 1.0, 0.0, 0.0,\n        0.0, 1.0, 0.0, 1.0, 0.0,\n        0.0, 1.0, 0.0, 0.0, 1.0,\n        0.0, 1.0, 1.0, 0.0, 0.0\n    ])\n    # fmt: on\n    X = csr_matrix((np.ones(shape=8), (Xrow, Xcol)), shape=(20, 4))\n    weights = np.array([1.0, 2.0, 3.0, 4.0])\n\n    group = np.array([5, 5, 5, 5], dtype=np.uint)\n    dtrain = xgboost.DMatrix(X, label=y, weight=weights)\n    dtrain.set_group(group)\n\n    params = {\n        \"eta\": 1,\n        \"tree_method\": \"exact\",\n        \"objective\": \"rank:pairwise\",\n        \"eval_metric\": [\"auc\", \"aucpr\"],\n        \"max_depth\": 1,\n    }\n    evals_result = {}\n    bst = xgboost.train(\n        params, dtrain, 10, evals=[(dtrain, \"train\")], evals_result=evals_result\n    )\n    auc_rec = evals_result[\"train\"][\"auc\"]\n    assert all(p <= q for p, q in zip(auc_rec, auc_rec[1:]))\n    auc_rec = evals_result[\"train\"][\"aucpr\"]\n    assert all(p <= q for p, q in zip(auc_rec, auc_rec[1:]))\n\n    for i in range(1, 11):\n        pred = bst.predict(dtrain, iteration_range=(0, i))\n        # is_sorted[i]: is i-th group correctly sorted by the ranking predictor?\n        is_sorted = []\n        for k in range(0, 20, 5):\n            ind = np.argsort(-pred[k : k + 5])\n            z = y[ind + k]\n            is_sorted.append(all(i >= j for i, j in zip(z, z[1:])))\n        # Since we give weights 1, 2, 3, 4 to the four query groups,\n        # the ranking predictor will first try to correctly sort the last query group\n        # before correctly sorting other groups.\n        assert all(p <= q for p, q in zip(is_sorted, is_sorted[1:]))\n\n\ndef test_error_msg() -> None:\n    X, y, qid, w = tm.make_ltr(10, 2, 2, 2)\n    ranker = xgboost.XGBRanker()\n    with pytest.raises(ValueError, match=r\"equal to the number of query groups\"):\n        ranker.fit(X, y, qid=qid, sample_weight=y)\n\n\n@given(lambdarank_parameter_strategy)\n@settings(deadline=None, print_blob=True, max_examples=10)\ndef test_lambdarank_parameters(params):\n    if params[\"objective\"] == \"rank:map\":\n        rel = 1\n    else:\n        rel = 4\n    X, y, q, w = tm.make_ltr(4096, 3, 13, rel)\n    ranker = xgboost.XGBRanker(tree_method=\"hist\", n_estimators=64, **params)\n    ranker.fit(X, y, qid=q, sample_weight=w, eval_set=[(X, y)], eval_qid=[q])\n    for k, v in ranker.evals_result()[\"validation_0\"].items():\n        note(v)\n        assert v[-1] >= v[0]\n        assert ranker.n_features_in_ == 3\n\n\n@pytest.mark.skipif(**tm.no_pandas())\n@pytest.mark.skipif(**tm.no_sklearn())\ndef test_unbiased() -> None:\n    import pandas as pd\n    from sklearn.model_selection import train_test_split\n\n    X, y, q, w = tm.make_ltr(8192, 2, n_query_groups=6, max_rel=4)\n    X, Xe, y, ye, q, qe = train_test_split(X, y, q, test_size=0.2, random_state=3)\n    X = csr_matrix(X)\n    Xe = csr_matrix(Xe)\n    data = RelDataCV((X, y, q), (Xe, ye, qe), max_rel=4)\n\n    train, _ = simulate_clicks(data)\n    x, c, y, q = sort_ltr_samples(train.X, train.y, train.qid, train.click, train.pos)\n    df: Optional[pd.DataFrame] = None\n\n    class Position(xgboost.callback.TrainingCallback):\n        def after_training(self, model) -> bool:\n            nonlocal df\n            config = json.loads(model.save_config())\n            ti_plus = np.array(config[\"learner\"][\"objective\"][\"ti+\"])\n            tj_minus = np.array(config[\"learner\"][\"objective\"][\"tj-\"])\n            df = pd.DataFrame({\"ti+\": ti_plus, \"tj-\": tj_minus})\n            return model\n\n    ltr = xgboost.XGBRanker(\n        n_estimators=8,\n        tree_method=\"hist\",\n        lambdarank_unbiased=True,\n        lambdarank_num_pair_per_sample=12,\n        lambdarank_pair_method=\"topk\",\n        objective=\"rank:ndcg\",\n        callbacks=[Position()],\n        base_score=0.5,\n    )\n    ltr.fit(x, c, qid=q, eval_set=[(x, c)], eval_qid=[q])\n\n    assert df is not None\n    # normalized\n    np.testing.assert_allclose(df[\"ti+\"].iloc[0], 1.0)\n    np.testing.assert_allclose(df[\"tj-\"].iloc[0], 1.0)\n    # less biased on low ranks.\n    assert df[\"ti+\"].iloc[-1] < df[\"ti+\"].iloc[0]\n\n    # Training continuation\n    ltr.fit(x, c, qid=q, eval_set=[(x, c)], eval_qid=[q], xgb_model=ltr)\n    # normalized\n    np.testing.assert_allclose(df[\"ti+\"].iloc[0], 1.0)\n    np.testing.assert_allclose(df[\"tj-\"].iloc[0], 1.0)\n\n\ndef test_normalization() -> None:\n    run_normalization(\"cpu\")\n\n\n@pytest.mark.parametrize(\"objective\", [\"rank:pairwise\", \"rank:ndcg\", \"rank:map\"])\ndef test_score_normalization(objective: str) -> None:\n    run_score_normalization(\"cpu\", objective)\n\n\nclass TestRanking:\n    @classmethod\n    def setup_class(cls):\n        \"\"\"\n        Download and setup the test fixtures\n        \"\"\"\n        cls.dpath = \"demo/\"\n        (\n            x_train,\n            y_train,\n            qid_train,\n            x_test,\n            y_test,\n            qid_test,\n            x_valid,\n            y_valid,\n            qid_valid,\n        ) = tm.data.get_mq2008(cls.dpath)\n\n        # instantiate the matrices\n        cls.dtrain = xgboost.DMatrix(x_train, y_train)\n        cls.dvalid = xgboost.DMatrix(x_valid, y_valid)\n        cls.dtest = xgboost.DMatrix(x_test, y_test)\n        # set the group counts from the query IDs\n        cls.dtrain.set_group(\n            [len(list(items)) for _key, items in itertools.groupby(qid_train)]\n        )\n        cls.dtest.set_group(\n            [len(list(items)) for _key, items in itertools.groupby(qid_test)]\n        )\n        cls.dvalid.set_group(\n            [len(list(items)) for _key, items in itertools.groupby(qid_valid)]\n        )\n        # save the query IDs for testing\n        cls.qid_train = qid_train\n        cls.qid_test = qid_test\n        cls.qid_valid = qid_valid\n\n        # model training parameters\n        cls.params = {\n            \"objective\": \"rank:pairwise\",\n            \"booster\": \"gbtree\",\n            \"eval_metric\": [\"ndcg\"],\n        }\n\n    @classmethod\n    def teardown_class(cls):\n        \"\"\"\n        Cleanup test artifacts from download and unpacking\n        :return:\n        \"\"\"\n        zip_f = cls.dpath + \"MQ2008.zip\"\n        if os.path.exists(zip_f):\n            os.remove(zip_f)\n        directory = cls.dpath + \"MQ2008\"\n        if os.path.exists(directory):\n            shutil.rmtree(directory)\n\n    def test_training(self):\n        \"\"\"\n        Train an XGBoost ranking model\n        \"\"\"\n        # specify validations set to watch performance\n        watchlist = [(self.dtest, \"eval\"), (self.dtrain, \"train\")]\n        bst = xgboost.train(\n            self.params,\n            self.dtrain,\n            num_boost_round=2500,\n            early_stopping_rounds=10,\n            evals=watchlist,\n        )\n        assert bst.best_score > 0.98\n\n    def test_cv(self):\n        \"\"\"\n        Test cross-validation with a group specified\n        \"\"\"\n        cv = xgboost.cv(\n            self.params,\n            self.dtrain,\n            num_boost_round=2500,\n            early_stopping_rounds=10,\n            nfold=10,\n            as_pandas=False,\n        )\n        assert isinstance(cv, dict)\n        assert set(cv.keys()) == {\n            \"test-ndcg-mean\",\n            \"train-ndcg-mean\",\n            \"test-ndcg-std\",\n            \"train-ndcg-std\",\n        }, \"CV results dict key mismatch.\"\n\n    def test_cv_no_shuffle(self):\n        \"\"\"\n        Test cross-validation with a group specified\n        \"\"\"\n        cv = xgboost.cv(\n            self.params,\n            self.dtrain,\n            num_boost_round=2500,\n            early_stopping_rounds=10,\n            shuffle=False,\n            nfold=10,\n            as_pandas=False,\n        )\n        assert isinstance(cv, dict)\n        assert len(cv) == 4\n\n    def test_get_group(self):\n        \"\"\"\n        Retrieve the group number from the dmatrix\n        \"\"\"\n        # test the new getter\n        self.dtrain.get_uint_info(\"group_ptr\")\n\n        for d, qid in [\n            (self.dtrain, self.qid_train),\n            (self.dvalid, self.qid_valid),\n            (self.dtest, self.qid_test),\n        ]:\n            # size of each group\n            group_sizes = np.array(\n                [len(list(items)) for _key, items in itertools.groupby(qid)]\n            )\n            # indexes of group boundaries\n            group_limits = d.get_uint_info(\"group_ptr\")\n            assert len(group_limits) == len(group_sizes) + 1\n            assert np.array_equal(np.diff(group_limits), group_sizes)\n            assert np.array_equal(group_sizes, np.diff(d.get_uint_info(\"group_ptr\")))\n            assert np.array_equal(group_sizes, np.diff(d.get_uint_info(\"group_ptr\")))\n            assert np.array_equal(group_limits, d.get_uint_info(\"group_ptr\"))\n"
  },
  {
    "path": "tests/python/test_shap.py",
    "content": "import itertools\nimport re\n\nimport numpy as np\nimport scipy.special\n\nimport xgboost as xgb\nfrom xgboost import testing as tm\n\n\nclass TestSHAP:\n    def test_feature_importances(self) -> None:\n        rng = np.random.RandomState(1994)\n        data = rng.randn(100, 5)\n        target = np.array([0, 1] * 50)\n\n        features = [\"Feature1\", \"Feature2\", \"Feature3\", \"Feature4\", \"Feature5\"]\n\n        dm = xgb.DMatrix(data, label=target, feature_names=features)\n        params = {\n            \"objective\": \"multi:softprob\",\n            \"eval_metric\": \"mlogloss\",\n            \"eta\": 0.3,\n            \"num_class\": 3,\n        }\n\n        bst = xgb.train(params, dm, num_boost_round=10)\n\n        # number of feature importances should == number of features\n        scores1 = bst.get_score()\n        scores2 = bst.get_score(importance_type=\"weight\")\n        scores3 = bst.get_score(importance_type=\"cover\")\n        scores4 = bst.get_score(importance_type=\"gain\")\n        scores5 = bst.get_score(importance_type=\"total_cover\")\n        scores6 = bst.get_score(importance_type=\"total_gain\")\n        assert len(scores1) == len(features)\n        assert len(scores2) == len(features)\n        assert len(scores3) == len(features)\n        assert len(scores4) == len(features)\n        assert len(scores5) == len(features)\n        assert len(scores6) == len(features)\n\n        # check backwards compatibility of get_fscore\n        fscores = bst.get_fscore()\n        assert scores1 == fscores\n\n        dtrain, dtest = tm.load_agaricus(__file__)\n\n        def fn(max_depth: int, num_rounds: int) -> None:\n            # train\n            params = {\"max_depth\": max_depth, \"eta\": 1}\n            bst = xgb.train(params, dtrain, num_boost_round=num_rounds)\n\n            # predict\n            preds = bst.predict(dtest)\n            contribs = bst.predict(dtest, pred_contribs=True)\n\n            # result should be (number of features + BIAS) * number of rows\n            assert contribs.shape == (dtest.num_row(), dtest.num_col() + 1)\n\n            # sum of contributions should be same as predictions\n            np.testing.assert_array_almost_equal(np.sum(contribs, axis=1), preds)\n\n        # for max_depth, num_rounds in itertools.product(range(0, 3), range(1, 5)):\n        #     yield fn, max_depth, num_rounds\n\n        # check that we get the right SHAP values for a basic AND example\n        # (https://arxiv.org/abs/1706.06060)\n        X = np.zeros((4, 2))\n        X[0, :] = 1\n        X[1, 0] = 1\n        X[2, 1] = 1\n        y = np.zeros(4)\n        y[0] = 1\n        param = {\"max_depth\": 2, \"base_score\": 0.0, \"eta\": 1.0, \"lambda\": 0}\n        bst = xgb.train(param, xgb.DMatrix(X, label=y), 1)\n        out = bst.predict(xgb.DMatrix(X[0:1, :]), pred_contribs=True)\n        assert out[0, 0] == 0.375\n        assert out[0, 1] == 0.375\n        assert out[0, 2] == 0.25\n\n        def parse_model(model: xgb.Booster) -> list:\n            trees = []\n            r_exp = r\"([0-9]+):\\[f([0-9]+)<([0-9\\.e-]+)\\] yes=([0-9]+),no=([0-9]+).*cover=([0-9e\\.]+)\"\n            r_exp_leaf = r\"([0-9]+):leaf=([0-9\\.e-]+),cover=([0-9e\\.]+)\"\n            for tree in model.get_dump(with_stats=True):\n                lines = list(tree.splitlines())\n                trees.append([None for i in range(len(lines))])\n                for line in lines:\n                    match = re.search(r_exp, line)\n                    if match is not None:\n                        ind = int(match.group(1))\n                        assert trees[-1] is not None\n                        while ind >= len(trees[-1]):\n                            assert isinstance(trees[-1], list)\n                            trees[-1].append(None)\n                        trees[-1][ind] = {\n                            \"yes_ind\": int(match.group(4)),\n                            \"no_ind\": int(match.group(5)),\n                            \"value\": None,\n                            \"threshold\": float(match.group(3)),\n                            \"feature_index\": int(match.group(2)),\n                            \"cover\": float(match.group(6)),\n                        }\n                    else:\n                        match = re.search(r_exp_leaf, line)\n                        ind = int(match.group(1))\n                        while ind >= len(trees[-1]):\n                            trees[-1].append(None)\n                        trees[-1][ind] = {\n                            \"value\": float(match.group(2)),\n                            \"cover\": float(match.group(3)),\n                        }\n            return trees\n\n        def exp_value_rec(tree, z, x, i=0):\n            if tree[i][\"value\"] is not None:\n                return tree[i][\"value\"]\n            else:\n                ind = tree[i][\"feature_index\"]\n                if z[ind] == 1:\n                    # 1e-6 for numeric error from parsing text dump.\n                    if x[ind] + 1e-6 <= tree[i][\"threshold\"]:\n                        return exp_value_rec(tree, z, x, tree[i][\"yes_ind\"])\n                    else:\n                        return exp_value_rec(tree, z, x, tree[i][\"no_ind\"])\n                else:\n                    r_yes = tree[tree[i][\"yes_ind\"]][\"cover\"] / tree[i][\"cover\"]\n                    out = exp_value_rec(tree, z, x, tree[i][\"yes_ind\"])\n                    val = out * r_yes\n\n                    r_no = tree[tree[i][\"no_ind\"]][\"cover\"] / tree[i][\"cover\"]\n                    out = exp_value_rec(tree, z, x, tree[i][\"no_ind\"])\n                    val += out * r_no\n                    return val\n\n        def exp_value(trees, z, x):\n            \"E[f(z)|Z_s = X_s]\"\n            return np.sum([exp_value_rec(tree, z, x) for tree in trees])\n\n        def all_subsets(ss):\n            return itertools.chain(\n                *map(lambda x: itertools.combinations(ss, x), range(0, len(ss) + 1))\n            )\n\n        def shap_value(trees, x, i, cond=None, cond_value=None):\n            M = len(x)\n            z = np.zeros(M)\n            other_inds = list(set(range(M)) - set([i]))\n            if cond is not None:\n                other_inds = list(set(other_inds) - set([cond]))\n                z[cond] = cond_value\n                M -= 1\n            total = 0.0\n\n            for subset in all_subsets(other_inds):\n                if len(subset) > 0:\n                    z[list(subset)] = 1\n                v1 = exp_value(trees, z, x)\n                z[i] = 1\n                v2 = exp_value(trees, z, x)\n                total += (v2 - v1) / (scipy.special.binom(M - 1, len(subset)) * M)\n                z[i] = 0\n                z[list(subset)] = 0\n            return total\n\n        def shap_values(trees, x):\n            vals = [shap_value(trees, x, i) for i in range(len(x))]\n            vals.append(exp_value(trees, np.zeros(len(x)), x))\n            return np.array(vals)\n\n        def interaction_values(trees, x):\n            M = len(x)\n            out = np.zeros((M + 1, M + 1))\n            for i in range(len(x)):\n                for j in range(len(x)):\n                    if i != j:\n                        out[i, j] = interaction_value(trees, x, i, j) / 2\n            svals = shap_values(trees, x)\n            main_effects = svals - out.sum(1)\n            out[np.diag_indices_from(out)] = main_effects\n            return out\n\n        def interaction_value(trees, x, i, j):\n            M = len(x)\n            z = np.zeros(M)\n            other_inds = list(set(range(M)) - set([i, j]))\n\n            total = 0.0\n            for subset in all_subsets(other_inds):\n                if len(subset) > 0:\n                    z[list(subset)] = 1\n                v00 = exp_value(trees, z, x)\n                z[i] = 1\n                v10 = exp_value(trees, z, x)\n                z[j] = 1\n                v11 = exp_value(trees, z, x)\n                z[i] = 0\n                v01 = exp_value(trees, z, x)\n                z[j] = 0\n                total += (v11 - v01 - v10 + v00) / (\n                    scipy.special.binom(M - 2, len(subset)) * (M - 1)\n                )\n                z[list(subset)] = 0\n            return total\n\n        # test a simple and function\n        M = 2\n        N = 4\n        X = np.zeros((N, M))\n        X[0, :] = 1\n        X[1, 0] = 1\n        X[2, 1] = 1\n        y = np.zeros(N)\n        y[0] = 1\n        param = {\"max_depth\": 2, \"base_score\": 0.0, \"eta\": 1.0, \"lambda\": 0}\n        bst = xgb.train(param, xgb.DMatrix(X, label=y), 1)\n        brute_force = shap_values(parse_model(bst), X[0, :])\n        fast_method = bst.predict(xgb.DMatrix(X[0:1, :]), pred_contribs=True)\n        assert np.linalg.norm(brute_force - fast_method[0, :]) < 1e-4\n\n        brute_force = interaction_values(parse_model(bst), X[0, :])\n        fast_method = bst.predict(xgb.DMatrix(X[0:1, :]), pred_interactions=True)\n        assert np.linalg.norm(brute_force - fast_method[0, :, :]) < 1e-4\n\n        # test a random function\n        M = 2\n        N = 4\n        X = rng.randn(N, M)\n        y = rng.randn(N)\n        param = {\"max_depth\": 2, \"base_score\": 0.0, \"eta\": 1.0, \"lambda\": 0}\n        bst = xgb.train(param, xgb.DMatrix(X, label=y), 1)\n        brute_force = shap_values(parse_model(bst), X[0, :])\n        fast_method = bst.predict(xgb.DMatrix(X[0:1, :]), pred_contribs=True)\n        assert np.linalg.norm(brute_force - fast_method[0, :]) < 1e-4\n\n        brute_force = interaction_values(parse_model(bst), X[0, :])\n        fast_method = bst.predict(xgb.DMatrix(X[0:1, :]), pred_interactions=True)\n        assert np.linalg.norm(brute_force - fast_method[0, :, :]) < 1e-4\n\n        # test another larger more complex random function\n        M = 5\n        N = 100\n        X = rng.randn(N, M)\n        y = rng.randn(N)\n        base_score = 1.0\n        param = {\"max_depth\": 5, \"base_score\": base_score, \"eta\": 0.1, \"gamma\": 2.0}\n        bst = xgb.train(param, xgb.DMatrix(X, label=y), 10)\n        brute_force = shap_values(parse_model(bst), X[0, :])\n        brute_force[-1] += base_score\n        fast_method = bst.predict(xgb.DMatrix(X[0:1, :]), pred_contribs=True)\n        assert np.linalg.norm(brute_force - fast_method[0, :]) < 1e-4\n\n        brute_force = interaction_values(parse_model(bst), X[0, :])\n        brute_force[-1, -1] += base_score\n        fast_method = bst.predict(xgb.DMatrix(X[0:1, :]), pred_interactions=True)\n        assert np.linalg.norm(brute_force - fast_method[0, :, :]) < 1e-4\n\n    def test_shap_values(self) -> None:\n        from sklearn.datasets import make_classification, make_regression\n\n        def assert_same(X: np.ndarray, y: np.ndarray) -> None:\n            Xy = xgb.DMatrix(X, y)\n            booster = xgb.train({}, Xy, num_boost_round=4)\n            shap_dm = booster.predict(Xy, pred_contribs=True)\n            Xy = xgb.QuantileDMatrix(X, y)\n            shap_qdm = booster.predict(Xy, pred_contribs=True)\n            np.testing.assert_allclose(shap_dm, shap_qdm)\n\n            margin = booster.predict(Xy, output_margin=True)\n            np.testing.assert_allclose(\n                np.sum(shap_qdm, axis=len(shap_qdm.shape) - 1), margin, 1e-3, 1e-3\n            )\n\n            shap_dm = booster.predict(Xy, pred_interactions=True)\n            Xy = xgb.QuantileDMatrix(X, y)\n            shap_qdm = booster.predict(Xy, pred_interactions=True)\n            np.testing.assert_allclose(shap_dm, shap_qdm)\n\n        X, y = make_regression()\n        assert_same(X, y)\n\n        X, y = make_classification()\n        assert_same(X, y)\n"
  },
  {
    "path": "tests/python/test_survival.py",
    "content": "import json\nimport os\nfrom typing import List, Optional, Tuple, cast\n\nimport numpy as np\nimport pytest\n\nimport xgboost as xgb\nfrom xgboost import testing as tm\n\ndpath = tm.data_dir(__file__)\n\n\n@pytest.fixture(scope=\"module\")\ndef toy_data() -> Tuple[xgb.DMatrix, np.ndarray, np.ndarray]:\n    X = np.array([1, 2, 3, 4, 5]).reshape((-1, 1))\n    INF = np.inf\n    y_lower = np.array([10, 15, -INF, 30, 100])\n    y_upper = np.array([INF, INF, 20, 50, INF])\n\n    dmat = xgb.DMatrix(X)\n    dmat.set_float_info(\"label_lower_bound\", y_lower)\n    dmat.set_float_info(\"label_upper_bound\", y_upper)\n    return dmat, y_lower, y_upper\n\n\ndef test_default_metric(toy_data: Tuple[xgb.DMatrix, np.ndarray, np.ndarray]) -> None:\n    Xy, y_lower, y_upper = toy_data\n\n    def run(evals: Optional[list]) -> None:\n        # test with or without actual evaluation.\n        booster = xgb.train(\n            {\"objective\": \"survival:aft\", \"aft_loss_distribution\": \"extreme\"},\n            Xy,\n            num_boost_round=1,\n            evals=evals,\n        )\n        config = json.loads(booster.save_config())\n        metrics = config[\"learner\"][\"metrics\"]\n        assert len(metrics) == 1\n        assert metrics[0][\"aft_loss_param\"][\"aft_loss_distribution\"] == \"extreme\"\n\n        booster = xgb.train(\n            {\"objective\": \"survival:aft\"},\n            Xy,\n            num_boost_round=1,\n            evals=evals,\n        )\n        config = json.loads(booster.save_config())\n        metrics = config[\"learner\"][\"metrics\"]\n        assert len(metrics) == 1\n        assert metrics[0][\"aft_loss_param\"][\"aft_loss_distribution\"] == \"normal\"\n\n    run([(Xy, \"Train\")])\n    run(None)\n\n\ndef test_aft_survival_toy_data(\n    toy_data: Tuple[xgb.DMatrix, np.ndarray, np.ndarray]\n) -> None:\n    # See demo/aft_survival/aft_survival_viz_demo.py\n    X = np.array([1, 2, 3, 4, 5]).reshape((-1, 1))\n    dmat, y_lower, y_upper = toy_data\n\n    # \"Accuracy\" = the number of data points whose ranged label (y_lower, y_upper)\n    #              includes the corresponding predicted label (y_pred)\n    acc_rec = []\n\n    class Callback(xgb.callback.TrainingCallback):\n        def __init__(self):\n            super().__init__()\n\n        def after_iteration(\n            self,\n            model: xgb.Booster,\n            epoch: int,\n            evals_log: xgb.callback.TrainingCallback.EvalsLog,\n        ):\n            y_pred = model.predict(dmat)\n            acc = np.sum(np.logical_and(y_pred >= y_lower, y_pred <= y_upper) / len(X))\n            acc_rec.append(acc)\n            return False\n\n    evals_result: xgb.callback.TrainingCallback.EvalsLog = {}\n    params = {\n        \"max_depth\": 3,\n        \"objective\": \"survival:aft\",\n        \"min_child_weight\": 0,\n        \"tree_method\": \"exact\",\n    }\n    bst = xgb.train(\n        params,\n        dmat,\n        15,\n        [(dmat, \"train\")],\n        evals_result=evals_result,\n        callbacks=[Callback()],\n    )\n\n    nloglik_rec = cast(List[float], evals_result[\"train\"][\"aft-nloglik\"])\n    # AFT metric (negative log likelihood) improve monotonically\n    assert all(p >= q for p, q in zip(nloglik_rec, nloglik_rec[:1]))\n    # \"Accuracy\" improve monotonically.\n    # Over time, XGBoost model makes predictions that fall within given label ranges.\n    assert all(p <= q for p, q in zip(acc_rec, acc_rec[1:]))\n    assert acc_rec[-1] == 1.0\n\n    def gather_split_thresholds(tree):\n        if \"split_condition\" in tree:\n            return (\n                gather_split_thresholds(tree[\"children\"][0])\n                | gather_split_thresholds(tree[\"children\"][1])\n                | {tree[\"split_condition\"]}\n            )\n        return set()\n\n    # Only 2.5, 3.5, and 4.5 are used as split thresholds.\n    model_json = [json.loads(e) for e in bst.get_dump(dump_format=\"json\")]\n    for i, tree in enumerate(model_json):\n        assert gather_split_thresholds(tree).issubset({2.5, 3.5, 4.5})\n\n\ndef test_aft_empty_dmatrix():\n    X = np.array([]).reshape((0, 2))\n    y_lower, y_upper = np.array([]), np.array([])\n    dtrain = xgb.DMatrix(X)\n    dtrain.set_info(label_lower_bound=y_lower, label_upper_bound=y_upper)\n    bst = xgb.train({'objective': 'survival:aft', 'tree_method': 'hist'},\n                    dtrain, num_boost_round=2, evals=[(dtrain, 'train')])\n\n\n@pytest.mark.skipif(**tm.no_pandas())\ndef test_aft_survival_demo_data():\n    import pandas as pd\n    df = pd.read_csv(os.path.join(dpath, 'veterans_lung_cancer.csv'))\n\n    y_lower_bound = df['Survival_label_lower_bound']\n    y_upper_bound = df['Survival_label_upper_bound']\n    X = df.drop(['Survival_label_lower_bound', 'Survival_label_upper_bound'], axis=1)\n\n    dtrain = xgb.DMatrix(X)\n    dtrain.set_float_info('label_lower_bound', y_lower_bound)\n    dtrain.set_float_info('label_upper_bound', y_upper_bound)\n\n    base_params = {'verbosity': 0,\n                   'objective': 'survival:aft',\n                   'eval_metric': 'aft-nloglik',\n                   'tree_method': 'hist',\n                   'learning_rate': 0.05,\n                   'aft_loss_distribution_scale': 1.20,\n                   'max_depth': 6,\n                   'lambda': 0.01,\n                   'alpha': 0.02}\n    nloglik_rec = {}\n    dists = ['normal', 'logistic', 'extreme']\n    for dist in dists:\n        params = base_params\n        params.update({'aft_loss_distribution': dist})\n        evals_result = {}\n        bst = xgb.train(params, dtrain, num_boost_round=500, evals=[(dtrain, 'train')],\n                        evals_result=evals_result)\n        nloglik_rec[dist] = evals_result['train']['aft-nloglik']\n        # AFT metric (negative log likelihood) improve monotonically\n        assert all(p >= q for p, q in zip(nloglik_rec[dist], nloglik_rec[dist][:1]))\n    # For this data, normal distribution works the best\n    assert nloglik_rec['normal'][-1] < 4.9\n    assert nloglik_rec['logistic'][-1] > 4.9\n    assert nloglik_rec['extreme'][-1] > 4.9\n"
  },
  {
    "path": "tests/python/test_tracker.py",
    "content": "import re\nfrom functools import partial, update_wrapper\nfrom platform import system\nfrom typing import Dict, Union\n\nimport numpy as np\nimport pytest\nfrom hypothesis import HealthCheck, given, settings, strategies\nfrom xgboost import RabitTracker, collective\nfrom xgboost import testing as tm\nfrom xgboost.testing.collective import get_avail_port\n\n\ndef test_rabit_tracker() -> None:\n    tracker = RabitTracker(host_ip=\"127.0.0.1\", n_workers=1)\n    tracker.start()\n    args = tracker.worker_args()\n    port = args[\"dmlc_tracker_port\"]\n    with collective.CommunicatorContext(**tracker.worker_args()):\n        ret = collective.broadcast(\"test1234\", 0)\n        assert str(ret) == \"test1234\"\n\n    if system() == \"Windows\":\n        pytest.skip(\"Windows is not supported.\")\n\n    with pytest.raises(ValueError, match=\"Failed to bind socket\"):\n        # Port is already being used\n        RabitTracker(host_ip=\"127.0.0.1\", port=port, n_workers=1)\n\n\n@pytest.mark.skipif(**tm.not_linux())\ndef test_wait() -> None:\n    tracker = RabitTracker(host_ip=\"127.0.0.1\", n_workers=2)\n    tracker.start()\n\n    with pytest.raises(ValueError, match=\"Timeout waiting for the tracker\"):\n        tracker.wait_for(1)\n\n    with pytest.raises(ValueError, match=r\"Failed to (accept|call `getsockopt`)\"):\n        tracker.free()\n\n\n@pytest.mark.skipif(**tm.not_linux())\ndef test_socket_error() -> None:\n    tracker = RabitTracker(host_ip=\"127.0.0.1\", n_workers=2)\n    tracker.start()\n    env = tracker.worker_args()\n    env[\"dmlc_tracker_port\"] = 0\n    env[\"dmlc_retry\"] = 1\n    with pytest.raises(ValueError, match=\"Failed to bootstrap the communication.\"):\n        with collective.CommunicatorContext(**env):\n            pass\n    with pytest.raises(ValueError):\n        tracker.free()\n\n\n@pytest.mark.skipif(**tm.no_loky())\ndef test_worker_port() -> None:\n    from loky import get_reusable_executor\n\n    n_workers = 4\n\n    tracker = RabitTracker(host_ip=\"127.0.0.1\", n_workers=n_workers)\n    tracker.start()\n    args = tracker.worker_args()\n\n    def local_test(worker_id: int, rabit_args: dict) -> int:\n        cfg = collective.Config(worker_port=get_avail_port)\n        cfg.update_worker_args(rabit_args)\n        with collective.CommunicatorContext(**rabit_args):\n            a = np.array([1])\n            result = collective.allreduce(a, collective.Op.SUM)\n            assert result[0] == n_workers\n\n            return 1\n\n    fn = update_wrapper(partial(local_test, rabit_args=args), local_test)\n    with get_reusable_executor(max_workers=n_workers) as pool:\n        results = pool.map(fn, range(n_workers))\n        assert sum(results) == n_workers\n\n\ndef run_rabit_ops(pool, n_workers: int, address: str) -> None:\n    tracker = RabitTracker(host_ip=address, n_workers=n_workers)\n    tracker.start()\n    args = tracker.worker_args()\n\n    def local_test(worker_id: int, rabit_args: dict) -> int:\n        with collective.CommunicatorContext(**rabit_args):\n            a = 1\n            assert collective.is_distributed()\n            arr = np.array([a])\n            reduced = collective.allreduce(arr, collective.Op.SUM)\n            assert reduced[0] == n_workers\n\n            arr = np.array([worker_id])\n            reduced = collective.allreduce(arr, collective.Op.MAX)\n            assert reduced == n_workers - 1\n\n            return 1\n\n    fn = update_wrapper(partial(local_test, rabit_args=args), local_test)\n    results = pool.map(fn, range(n_workers))\n    assert sum(results) == n_workers\n\n\n@pytest.mark.skipif(**tm.no_loky())\ndef test_rabit_ops():\n    from loky import get_reusable_executor\n\n    n_workers = 4\n    with get_reusable_executor(max_workers=n_workers) as pool:\n        run_rabit_ops(pool, n_workers, \"127.0.0.1\")\n\n\n@pytest.mark.skipif(**tm.no_ipv6())\n@pytest.mark.skipif(**tm.no_loky())\ndef test_rabit_ops_ipv6():\n    from loky import get_reusable_executor\n\n    n_workers = 4\n    with get_reusable_executor(max_workers=n_workers) as pool:\n        run_rabit_ops(pool, n_workers, \"::1\")\n\n\ndef run_allreduce(pool, n_workers: int) -> None:\n    tracker = RabitTracker(host_ip=\"127.0.0.1\", n_workers=n_workers)\n    tracker.start()\n    args = tracker.worker_args()\n\n    def local_test(worker_id: int, rabit_args: Dict[str, Union[str, int]]) -> None:\n        x = np.full(shape=(1024 * 1024 * 32), fill_value=1.0)\n        with collective.CommunicatorContext(**rabit_args):\n            k = np.asarray([1.0])\n            for i in range(128):\n                m = collective.allreduce(k, collective.Op.SUM)\n                assert m == n_workers\n\n            y = collective.allreduce(x, collective.Op.SUM)\n            np.testing.assert_allclose(y, np.full_like(y, fill_value=float(n_workers)))\n\n    fn = update_wrapper(partial(local_test, rabit_args=args), local_test)\n    results = pool.map(fn, range(n_workers))\n    for r in results:\n        assert r is None\n\n\n@pytest.mark.skipif(**tm.no_loky())\ndef test_allreduce() -> None:\n    from loky import get_reusable_executor\n\n    n_workers = 4\n    n_trials = 2\n    for _ in range(n_trials):\n        with get_reusable_executor(max_workers=n_workers) as pool:\n            run_allreduce(pool, n_workers)\n\n\ndef run_broadcast(pool, n_workers: int) -> None:\n    tracker = RabitTracker(host_ip=\"127.0.0.1\", n_workers=n_workers)\n    tracker.start()\n    args = tracker.worker_args()\n\n    def local_test(worker_id: int, rabit_args: Dict[str, Union[str, int]]):\n        with collective.CommunicatorContext(**rabit_args):\n            res = collective.broadcast(17, 0)\n            return res\n\n    fn = update_wrapper(partial(local_test, rabit_args=args), local_test)\n    results = pool.map(fn, range(n_workers))\n    np.testing.assert_allclose(np.array(list(results)), 17)\n\n\n@pytest.mark.skipif(**tm.no_loky())\ndef test_broadcast():\n    from loky import get_reusable_executor\n\n    n_workers = 4\n    n_trials = 2\n\n    for _ in range(n_trials):\n        with get_reusable_executor(max_workers=n_workers) as pool:\n            run_broadcast(pool, n_workers)\n\n\n@pytest.mark.skipif(**tm.no_dask())\ndef test_rank_assignment() -> None:\n    from distributed import Client, LocalCluster\n    from xgboost import dask as dxgb\n    from xgboost.testing.dask import get_rabit_args\n\n    def local_test(worker_id):\n        with dxgb.CommunicatorContext(**args) as ctx:\n            task_id = ctx[\"DMLC_TASK_ID\"]\n            matched = re.search(\".*-([0-9]).*\", task_id)\n            rank = collective.get_rank()\n            # As long as the number of workers is lesser than 10, rank and worker id\n            # should be the same\n            assert rank == int(matched.group(1))\n\n    with LocalCluster(n_workers=8) as cluster:\n        with Client(cluster) as client:\n            workers = tm.dask.get_client_workers(client)\n            args = get_rabit_args(client, len(workers))\n            futures = client.map(local_test, range(len(workers)), workers=workers)\n            client.gather(futures)\n\n\nops_strategy = strategies.lists(\n    strategies.sampled_from([\"broadcast\", \"allreduce_max\", \"allreduce_sum\"])\n)\n\n\n@pytest.mark.skipif(**tm.no_loky())\n@given(ops=ops_strategy, size=strategies.integers(2**4, 2**16))\n@settings(\n    deadline=None,\n    print_blob=True,\n    max_examples=10,\n    suppress_health_check=[HealthCheck.function_scoped_fixture],\n)\ndef test_ops_restart_comm(ops, size) -> None:\n    from loky import get_reusable_executor\n\n    n_workers = 8\n\n    def local_test(w: int, rabit_args: Dict[str, Union[str, int]]) -> None:\n        a = np.arange(0, n_workers)\n        with collective.CommunicatorContext(**rabit_args):\n            for op in ops:\n                if op == \"broadcast\":\n                    b = collective.broadcast(a, root=1)\n                    np.testing.assert_allclose(b, a)\n                elif op == \"allreduce_max\":\n                    b = collective.allreduce(a, collective.Op.MAX)\n                    np.testing.assert_allclose(b, a)\n                elif op == \"allreduce_sum\":\n                    b = collective.allreduce(a, collective.Op.SUM)\n                    np.testing.assert_allclose(a * n_workers, b)\n                else:\n                    raise ValueError()\n\n    with get_reusable_executor(max_workers=n_workers) as pool:\n        tracker = RabitTracker(host_ip=\"127.0.0.1\", n_workers=n_workers)\n        tracker.start()\n        args = tracker.worker_args()\n\n        fn = update_wrapper(partial(local_test, rabit_args=args), local_test)\n        results = pool.map(fn, range(n_workers))\n\n        for r in results:\n            assert r is None\n\n\n@pytest.mark.skipif(**tm.no_loky())\ndef test_ops_reuse_comm() -> None:\n    from loky import get_reusable_executor\n\n    rng = np.random.default_rng(1994)\n    n_examples = 10\n    ops = rng.choice(\n        [\"broadcast\", \"allreduce_sum\", \"allreduce_max\"], size=n_examples\n    ).tolist()\n\n    n_workers = 8\n    n_trials = 8\n\n    def local_test(w: int, rabit_args: Dict[str, Union[str, int]]) -> None:\n        a = np.arange(0, n_workers)\n\n        with collective.CommunicatorContext(**rabit_args):\n            for op in ops:\n                if op == \"broadcast\":\n                    b = collective.broadcast(a, root=1)\n                    assert np.allclose(b, a)\n                elif op == \"allreduce_max\":\n                    c = np.full_like(a, collective.get_rank())\n                    b = collective.allreduce(c, collective.Op.MAX)\n                    assert np.allclose(b, n_workers - 1), b\n                elif op == \"allreduce_sum\":\n                    b = collective.allreduce(a, collective.Op.SUM)\n                    assert np.allclose(a * 8, b)\n                else:\n                    raise ValueError()\n\n    with get_reusable_executor(max_workers=n_workers) as pool:\n        for _ in range(n_trials):\n            tracker = RabitTracker(host_ip=\"127.0.0.1\", n_workers=n_workers)\n            tracker.start()\n            args = tracker.worker_args()\n\n            fn = update_wrapper(partial(local_test, rabit_args=args), local_test)\n            results = pool.map(fn, range(n_workers))\n            for r in results:\n                assert r is None\n"
  },
  {
    "path": "tests/python/test_training_continuation.py",
    "content": "from pathlib import Path\nfrom typing import Any\n\nimport numpy as np\nimport pytest\nimport xgboost as xgb\nfrom hypothesis import given, settings\nfrom xgboost import testing as tm\nfrom xgboost.testing.continuation import (\n    make_determinism_strategy,\n    run_training_continuation_determinism,\n    run_training_continuation_model_output,\n)\n\n\nclass TestTrainingContinuation:\n    num_parallel_tree = 3\n\n    def generate_parameters(self):\n        xgb_params_01_binary = {\n            \"nthread\": 1,\n        }\n\n        xgb_params_02_binary = {\n            \"nthread\": 1,\n            \"num_parallel_tree\": self.num_parallel_tree,\n        }\n\n        xgb_params_03_binary = {\n            \"nthread\": 1,\n            \"num_class\": 5,\n            \"num_parallel_tree\": self.num_parallel_tree,\n        }\n\n        return [xgb_params_01_binary, xgb_params_02_binary, xgb_params_03_binary]\n\n    def run_training_continuation(\n        self, xgb_params_01, xgb_params_02, xgb_params_03, tmp_path: Path\n    ):\n        from sklearn.datasets import load_digits\n        from sklearn.metrics import mean_squared_error\n\n        digits_2class = load_digits(n_class=2)\n        digits_5class = load_digits(n_class=5)\n\n        X_2class = digits_2class[\"data\"]\n        y_2class = digits_2class[\"target\"]\n\n        X_5class = digits_5class[\"data\"]\n        y_5class = digits_5class[\"target\"]\n\n        dtrain_2class = xgb.DMatrix(X_2class, label=y_2class)\n        dtrain_5class = xgb.DMatrix(X_5class, label=y_5class)\n\n        gbdt_01 = xgb.train(xgb_params_01, dtrain_2class, num_boost_round=10)\n        ntrees_01 = len(gbdt_01.get_dump())\n        assert ntrees_01 == 10\n\n        model_path = tmp_path / \"xgb_tc.json\"\n        gbdt_02 = xgb.train(xgb_params_01, dtrain_2class, num_boost_round=0)\n        gbdt_02.save_model(model_path)\n\n        gbdt_02a = xgb.train(\n            xgb_params_01, dtrain_2class, num_boost_round=10, xgb_model=gbdt_02\n        )\n        gbdt_02b = xgb.train(\n            xgb_params_01, dtrain_2class, num_boost_round=10, xgb_model=model_path\n        )\n        ntrees_02a = len(gbdt_02a.get_dump())\n        ntrees_02b = len(gbdt_02b.get_dump())\n        assert ntrees_02a == 10\n        assert ntrees_02b == 10\n\n        res1 = mean_squared_error(y_2class, gbdt_01.predict(dtrain_2class))\n        res2 = mean_squared_error(y_2class, gbdt_02a.predict(dtrain_2class))\n        assert res1 == res2\n\n        res1 = mean_squared_error(y_2class, gbdt_01.predict(dtrain_2class))\n        res2 = mean_squared_error(y_2class, gbdt_02b.predict(dtrain_2class))\n        assert res1 == res2\n\n        gbdt_03 = xgb.train(xgb_params_01, dtrain_2class, num_boost_round=3)\n        gbdt_03.save_model(model_path)\n\n        gbdt_03a = xgb.train(\n            xgb_params_01, dtrain_2class, num_boost_round=7, xgb_model=gbdt_03\n        )\n        gbdt_03b = xgb.train(\n            xgb_params_01, dtrain_2class, num_boost_round=7, xgb_model=model_path\n        )\n        ntrees_03a = len(gbdt_03a.get_dump())\n        ntrees_03b = len(gbdt_03b.get_dump())\n        assert ntrees_03a == 10\n        assert ntrees_03b == 10\n\n        res1 = mean_squared_error(y_2class, gbdt_03a.predict(dtrain_2class))\n        res2 = mean_squared_error(y_2class, gbdt_03b.predict(dtrain_2class))\n        assert res1 == res2\n\n        gbdt_04 = xgb.train(xgb_params_02, dtrain_2class, num_boost_round=3)\n        res1 = mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class))\n        res2 = mean_squared_error(\n            y_2class,\n            gbdt_04.predict(\n                dtrain_2class, iteration_range=(0, gbdt_04.num_boosted_rounds())\n            ),\n        )\n        assert res1 == res2\n\n        gbdt_04 = xgb.train(\n            xgb_params_02, dtrain_2class, num_boost_round=7, xgb_model=gbdt_04\n        )\n        res1 = mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class))\n        res2 = mean_squared_error(\n            y_2class,\n            gbdt_04.predict(\n                dtrain_2class, iteration_range=(0, gbdt_04.num_boosted_rounds())\n            ),\n        )\n        assert res1 == res2\n\n        gbdt_05 = xgb.train(xgb_params_03, dtrain_5class, num_boost_round=7)\n        gbdt_05 = xgb.train(\n            xgb_params_03, dtrain_5class, num_boost_round=3, xgb_model=gbdt_05\n        )\n\n        res1 = gbdt_05.predict(dtrain_5class)\n        res2 = gbdt_05.predict(\n            dtrain_5class, iteration_range=(0, gbdt_05.num_boosted_rounds())\n        )\n        np.testing.assert_almost_equal(res1, res2)\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    def test_training_continuation_json(self, tmp_path: Path) -> None:\n        params = self.generate_parameters()\n        self.run_training_continuation(params[0], params[1], params[2], tmp_path)\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    def test_training_continuation_updaters_json(self, tmp_path: Path) -> None:\n        # Picked up from R tests.\n        updaters = \"grow_colmaker,prune,refresh\"\n        params = self.generate_parameters()\n        for p in params:\n            p[\"updater\"] = updaters\n        self.run_training_continuation(params[0], params[1], params[2], tmp_path)\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    def test_changed_parameter(self, tmp_path: Path) -> None:\n        from sklearn.datasets import load_breast_cancer\n\n        X, y = load_breast_cancer(return_X_y=True)\n        clf = xgb.XGBClassifier(n_estimators=2, eval_metric=\"logloss\")\n        clf.fit(X, y, eval_set=[(X, y)])\n        assert tm.non_increasing(clf.evals_result()[\"validation_0\"][\"logloss\"])\n\n        clf.save_model(tmp_path / \"clf.json\")\n        loaded = xgb.XGBClassifier()\n        loaded.load_model(tmp_path / \"clf.json\")\n\n        clf = xgb.XGBClassifier(n_estimators=2)\n        # change metric to error\n        clf.set_params(eval_metric=\"error\")\n        clf.fit(X, y, eval_set=[(X, y)], xgb_model=loaded)\n        assert tm.non_increasing(clf.evals_result()[\"validation_0\"][\"error\"])\n\n    @pytest.mark.parametrize(\"tree_method\", [\"hist\", \"approx\", \"exact\"])\n    def test_model_output(self, tree_method: str) -> None:\n        run_training_continuation_model_output(\"cpu\", tree_method)\n\n\n@given(make_determinism_strategy([\"hist\", \"approx\", \"exact\"]))\n@settings(deadline=None, print_blob=True, max_examples=10)\n@pytest.mark.skipif(**tm.no_sklearn())\ndef test_continuation_determinism(kwargs: Any) -> None:\n    run_training_continuation_determinism(\n        device=\"cpu\",\n        **kwargs,\n    )\n"
  },
  {
    "path": "tests/python/test_tree_regularization.py",
    "content": "import numpy as np\nfrom numpy.testing import assert_approx_equal\n\nimport xgboost as xgb\n\ntrain_data = xgb.DMatrix(np.array([[1]]), label=np.array([1]))\n\n\nclass TestTreeRegularization:\n    def test_alpha(self):\n        params = {\n            \"tree_method\": \"exact\",\n            \"verbosity\": 0,\n            \"objective\": \"reg:squarederror\",\n            \"eta\": 1,\n            \"lambda\": 0,\n            \"alpha\": 0.1,\n            \"base_score\": 0.5,\n        }\n\n        model = xgb.train(params, train_data, 1)\n        preds = model.predict(train_data)\n\n        # Default prediction (with no trees) is 0.5\n        # sum_grad = (0.5 - 1.0)\n        # sum_hess = 1.0\n        # 0.9 = 0.5 - (sum_grad - alpha * sgn(sum_grad)) / sum_hess\n        assert_approx_equal(preds[0], 0.9)\n\n    def test_lambda(self):\n        params = {\n            \"tree_method\": \"exact\",\n            \"verbosity\": 0,\n            \"objective\": \"reg:squarederror\",\n            \"eta\": 1,\n            \"lambda\": 1,\n            \"alpha\": 0,\n            \"base_score\": 0.5,\n        }\n\n        model = xgb.train(params, train_data, 1)\n        preds = model.predict(train_data)\n\n        # Default prediction (with no trees) is 0.5\n        # sum_grad = (0.5 - 1.0)\n        # sum_hess = 1.0\n        # 0.75 = 0.5 - sum_grad / (sum_hess + lambda)\n        assert_approx_equal(preds[0], 0.75)\n\n    def test_alpha_and_lambda(self):\n        params = {\n            \"tree_method\": \"exact\",\n            \"verbosity\": 1,\n            \"objective\": \"reg:squarederror\",\n            \"eta\": 1,\n            \"lambda\": 1,\n            \"alpha\": 0.1,\n            \"base_score\": 0.5,\n        }\n\n        model = xgb.train(params, train_data, 1)\n        preds = model.predict(train_data)\n\n        # Default prediction (with no trees) is 0.5\n        # sum_grad = (0.5 - 1.0)\n        # sum_hess = 1.0\n        # 0.7 = 0.5 - (sum_grad - alpha * sgn(sum_grad)) / (sum_hess + lambda)\n        assert_approx_equal(preds[0], 0.7)\n\n    def test_unlimited_depth(self):\n        x = np.array([[0], [1], [2], [3]])\n        y = np.array([0, 1, 2, 3])\n\n        model = xgb.XGBRegressor(\n            n_estimators=1,\n            eta=1,\n            tree_method=\"hist\",\n            grow_policy=\"lossguide\",\n            reg_lambda=0,\n            max_leaves=128,\n            max_depth=0,\n        ).fit(x, y)\n        assert np.array_equal(model.predict(x), y)\n"
  },
  {
    "path": "tests/python/test_updaters.py",
    "content": "from typing import Any, Dict\n\nimport numpy as np\nimport pytest\nimport xgboost as xgb\nfrom hypothesis import given, note, settings, strategies\nfrom xgboost import testing as tm\nfrom xgboost.testing.params import (\n    cat_parameter_strategy,\n    exact_parameter_strategy,\n    hist_cache_strategy,\n    hist_parameter_strategy,\n)\nfrom xgboost.testing.updater import (\n    check_categorical_missing,\n    check_categorical_ohe,\n    check_get_quantile_cut,\n    check_quantile_loss,\n    run_invalid_category,\n    run_max_cat,\n    train_result,\n)\n\n\nclass TestTreeMethod:\n    USE_ONEHOT = np.iinfo(np.int32).max\n    USE_PART = 1\n\n    @given(\n        exact_parameter_strategy, strategies.integers(1, 20), tm.make_dataset_strategy()\n    )\n    @settings(deadline=None, print_blob=True)\n    def test_exact(self, param, num_rounds, dataset):\n        if dataset.name.endswith(\"-l1\"):\n            return\n        param[\"tree_method\"] = \"exact\"\n        param = dataset.set_params(param)\n        result = train_result(param, dataset.get_dmat(), num_rounds)\n        assert tm.non_increasing(result[\"train\"][dataset.metric])\n\n    def test_exact_sample_by_node_error(self) -> None:\n        X, y, w = tm.make_regression(128, 12, False)\n        with pytest.raises(ValueError, match=\"column sample by node\"):\n            xgb.train(\n                {\"tree_method\": \"exact\", \"colsample_bynode\": 0.999},\n                xgb.DMatrix(X, y, weight=w),\n            )\n\n        xgb.train(\n            {\"tree_method\": \"exact\", \"colsample_bynode\": 1.0},\n            xgb.DMatrix(X, y, weight=w),\n            num_boost_round=2,\n        )\n\n    @pytest.mark.parametrize(\"tree_method\", [\"approx\", \"hist\"])\n    def test_colsample_rng(self, tree_method: str) -> None:\n        \"\"\"Test rng has an effect on column sampling.\"\"\"\n        X, y, _ = tm.make_regression(128, 16, use_cupy=False)\n        reg0 = xgb.XGBRegressor(\n            n_estimators=2,\n            colsample_bynode=0.5,\n            random_state=42,\n            tree_method=tree_method,\n        )\n        reg0.fit(X, y)\n\n        reg1 = xgb.XGBRegressor(\n            n_estimators=2,\n            colsample_bynode=0.5,\n            random_state=43,\n            tree_method=tree_method,\n        )\n        reg1.fit(X, y)\n\n        assert list(reg0.feature_importances_) != list(reg1.feature_importances_)\n\n    @given(\n        exact_parameter_strategy,\n        hist_parameter_strategy,\n        hist_cache_strategy,\n        strategies.integers(1, 20),\n        tm.make_dataset_strategy(),\n    )\n    @settings(deadline=None, print_blob=True)\n    def test_approx(\n        self,\n        param: Dict[str, Any],\n        hist_param: Dict[str, Any],\n        cache_param: Dict[str, Any],\n        num_rounds: int,\n        dataset: tm.TestDataset,\n    ) -> None:\n        param[\"tree_method\"] = \"approx\"\n        param = dataset.set_params(param)\n        param.update(hist_param)\n        param.update(cache_param)\n        result = train_result(param, dataset.get_dmat(), num_rounds)\n        note(str(result))\n        assert tm.non_increasing(result[\"train\"][dataset.metric])\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    def test_pruner(self):\n        import sklearn\n\n        params = {\"tree_method\": \"exact\"}\n        cancer = sklearn.datasets.load_breast_cancer()\n        X = cancer[\"data\"]\n        y = cancer[\"target\"]\n\n        dtrain = xgb.DMatrix(X, y)\n        booster = xgb.train(params, dtrain=dtrain, num_boost_round=10)\n        grown = str(booster.get_dump())\n\n        params = {\"updater\": \"prune\", \"process_type\": \"update\", \"gamma\": \"0.2\"}\n        booster = xgb.train(\n            params, dtrain=dtrain, num_boost_round=10, xgb_model=booster\n        )\n        after_prune = str(booster.get_dump())\n        assert grown != after_prune\n\n        booster = xgb.train(\n            params, dtrain=dtrain, num_boost_round=10, xgb_model=booster\n        )\n        second_prune = str(booster.get_dump())\n        # Second prune should not change the tree\n        assert after_prune == second_prune\n\n    @given(\n        exact_parameter_strategy,\n        hist_parameter_strategy,\n        hist_cache_strategy,\n        strategies.integers(1, 20),\n        tm.make_dataset_strategy(),\n    )\n    @settings(deadline=None, print_blob=True)\n    def test_hist(\n        self,\n        param: Dict[str, Any],\n        hist_param: Dict[str, Any],\n        cache_param: Dict[str, Any],\n        num_rounds: int,\n        dataset: tm.TestDataset,\n    ) -> None:\n        param[\"tree_method\"] = \"hist\"\n        param = dataset.set_params(param)\n        param.update(hist_param)\n        param.update(cache_param)\n        result = train_result(param, dataset.get_dmat(), num_rounds)\n        note(str(result))\n        assert tm.non_increasing(result[\"train\"][dataset.metric])\n\n    def test_hist_categorical(self):\n        # hist must be same as exact on all-categorial data\n        ag_dtrain, ag_dtest = tm.load_agaricus(__file__)\n        ag_param = {\n            \"max_depth\": 2,\n            \"tree_method\": \"hist\",\n            \"eta\": 1,\n            \"objective\": \"binary:logistic\",\n            \"eval_metric\": \"auc\",\n        }\n        hist_res = {}\n        exact_res = {}\n\n        xgb.train(\n            ag_param,\n            ag_dtrain,\n            10,\n            evals=[(ag_dtrain, \"train\"), (ag_dtest, \"test\")],\n            evals_result=hist_res,\n        )\n        ag_param[\"tree_method\"] = \"exact\"\n        xgb.train(\n            ag_param,\n            ag_dtrain,\n            10,\n            evals=[(ag_dtrain, \"train\"), (ag_dtest, \"test\")],\n            evals_result=exact_res,\n        )\n        assert hist_res[\"train\"][\"auc\"] == exact_res[\"train\"][\"auc\"]\n        assert hist_res[\"test\"][\"auc\"] == exact_res[\"test\"][\"auc\"]\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    def test_hist_degenerate_case(self):\n        # Test a degenerate case where the quantile sketcher won't return any\n        # quantile points for a particular feature (the second feature in\n        # this example). Source: https://github.com/dmlc/xgboost/issues/2943\n        nan = np.nan\n        param = {\"missing\": nan, \"tree_method\": \"hist\"}\n        model = xgb.XGBRegressor(**param)\n        X = np.array(\n            [\n                [6.18827160e05, 1.73000000e02],\n                [6.37345679e05, nan],\n                [6.38888889e05, nan],\n                [6.28086420e05, nan],\n            ]\n        )\n        y = [1000000.0, 0.0, 0.0, 500000.0]\n        w = [0, 0, 1, 0]\n        model.fit(X, y, sample_weight=w)\n\n    @given(tm.sparse_datasets_strategy)\n    @settings(deadline=None, print_blob=True)\n    def test_sparse(self, dataset):\n        param = {\"tree_method\": \"hist\", \"max_bin\": 64}\n        hist_result = train_result(param, dataset.get_dmat(), 16)\n        note(str(hist_result))\n        assert tm.non_increasing(hist_result[\"train\"][dataset.metric])\n\n        param = {\"tree_method\": \"approx\", \"max_bin\": 64}\n        approx_result = train_result(param, dataset.get_dmat(), 16)\n        note(str(approx_result))\n        assert tm.non_increasing(approx_result[\"train\"][dataset.metric])\n\n        np.testing.assert_allclose(\n            hist_result[\"train\"][\"rmse\"], approx_result[\"train\"][\"rmse\"]\n        )\n\n    @pytest.mark.parametrize(\"tree_method\", [\"hist\", \"approx\"])\n    def test_invalid_category(self, tree_method: str) -> None:\n        run_invalid_category(tree_method, \"cpu\")\n\n    @pytest.mark.parametrize(\"tree_method\", [\"hist\", \"approx\"])\n    @pytest.mark.skipif(**tm.no_pandas())\n    def test_max_cat(self, tree_method: str) -> None:\n        run_max_cat(tree_method, \"cpu\")\n\n    @given(\n        strategies.integers(10, 400),\n        strategies.integers(3, 8),\n        strategies.integers(1, 2),\n        strategies.integers(4, 7),\n    )\n    @settings(deadline=None, print_blob=True, max_examples=10)\n    @pytest.mark.skipif(**tm.no_pandas())\n    def test_categorical_ohe(\n        self, rows: int, cols: int, rounds: int, cats: int\n    ) -> None:\n        check_categorical_ohe(\n            rows=rows,\n            cols=cols,\n            rounds=rounds,\n            cats=cats,\n            device=\"cpu\",\n            tree_method=\"approx\",\n        )\n        check_categorical_ohe(\n            rows=rows,\n            cols=cols,\n            rounds=rounds,\n            cats=cats,\n            device=\"cpu\",\n            tree_method=\"hist\",\n        )\n        check_categorical_ohe(\n            rows=rows,\n            cols=cols,\n            rounds=rounds,\n            cats=cats,\n            device=\"cpu\",\n            tree_method=\"hist\",\n            multi_target=True,\n        )\n\n    @given(\n        tm.categorical_dataset_strategy,\n        exact_parameter_strategy,\n        hist_parameter_strategy,\n        cat_parameter_strategy,\n        strategies.integers(4, 32),\n        strategies.sampled_from([\"hist\", \"approx\"]),\n    )\n    @settings(deadline=None, print_blob=True)\n    @pytest.mark.skipif(**tm.no_pandas())\n    def test_categorical(\n        self,\n        dataset: tm.TestDataset,\n        exact_parameters: Dict[str, Any],\n        hist_parameters: Dict[str, Any],\n        cat_parameters: Dict[str, Any],\n        n_rounds: int,\n        tree_method: str,\n    ) -> None:\n        cat_parameters.update(exact_parameters)\n        cat_parameters.update(hist_parameters)\n        cat_parameters[\"tree_method\"] = tree_method\n\n        results = train_result(cat_parameters, dataset.get_dmat(), n_rounds)\n        tm.non_increasing(results[\"train\"][\"rmse\"])\n\n    @given(\n        hist_parameter_strategy,\n        cat_parameter_strategy,\n        strategies.sampled_from([\"hist\", \"approx\"]),\n    )\n    @settings(deadline=None, print_blob=True, max_examples=10)\n    def test_categorical_ames_housing(\n        self,\n        hist_parameters: Dict[str, Any],\n        cat_parameters: Dict[str, Any],\n        tree_method: str,\n    ) -> None:\n        cat_parameters.update(hist_parameters)\n        dataset = tm.TestDataset(\n            \"ames_housing\", tm.data.get_ames_housing, \"reg:squarederror\", \"rmse\"\n        )\n        cat_parameters[\"tree_method\"] = tree_method\n        results = train_result(cat_parameters, dataset.get_dmat(), 16)\n        tm.non_increasing(results[\"train\"][\"rmse\"])\n\n    @given(\n        strategies.integers(10, 400),\n        strategies.integers(3, 8),\n        strategies.integers(4, 7),\n    )\n    @settings(deadline=None, print_blob=True, max_examples=10)\n    @pytest.mark.skipif(**tm.no_pandas())\n    def test_categorical_missing(self, rows: int, cols: int, cats: int) -> None:\n        check_categorical_missing(\n            rows, cols, cats, device=\"cpu\", tree_method=\"approx\", extmem=False\n        )\n        check_categorical_missing(\n            rows, cols, cats, device=\"cpu\", tree_method=\"hist\", extmem=False\n        )\n\n    @pytest.mark.parametrize(\"weighted\", [True, False])\n    def test_quantile_loss(self, weighted: bool) -> None:\n        check_quantile_loss(\"hist\", weighted, \"cpu\")\n\n    @pytest.mark.skipif(**tm.no_pandas())\n    @pytest.mark.parametrize(\"tree_method\", [\"hist\"])\n    def test_get_quantile_cut(self, tree_method: str) -> None:\n        check_get_quantile_cut(tree_method, \"cpu\")\n"
  },
  {
    "path": "tests/python/test_with_arrow.py",
    "content": "import os\n\nimport numpy as np\nimport pytest\n\nimport xgboost as xgb\nfrom xgboost import testing as tm\nfrom xgboost.compat import is_dataframe\nfrom xgboost.core import DataSplitMode\n\npytestmark = pytest.mark.skipif(\n    tm.no_arrow()[\"condition\"] or tm.no_pandas()[\"condition\"],\n    reason=tm.no_arrow()[\"reason\"] + \" or \" + tm.no_pandas()[\"reason\"],\n)\n\nimport pandas as pd\nimport pyarrow as pa\nimport pyarrow.csv as pc\n\n\ndef test_type_check() -> None:\n    df = pd.DataFrame(\n        [[0, 1, 2.0, 3.0], [1, 2, 3.0, 4.0]], columns=[\"a\", \"b\", \"c\", \"d\"]\n    )\n    table = pa.Table.from_pandas(df)\n    assert is_dataframe(table)\n\n\nclass TestArrowTable:\n    def test_arrow_table(self):\n        df = pd.DataFrame(\n            [[0, 1, 2.0, 3.0], [1, 2, 3.0, 4.0]], columns=[\"a\", \"b\", \"c\", \"d\"]\n        )\n        table = pa.Table.from_pandas(df)\n        dm = xgb.DMatrix(table)\n        assert dm.num_row() == 2\n        assert dm.num_col() == 4\n\n    def test_arrow_table_with_label(self):\n        df = pd.DataFrame([[1, 2.0, 3.0], [2, 3.0, 4.0]], columns=[\"a\", \"b\", \"c\"])\n        table = pa.Table.from_pandas(df)\n        label = np.array([0, 1])\n        dm = xgb.DMatrix(table)\n        dm.set_label(label)\n        assert dm.num_row() == 2\n        assert dm.num_col() == 3\n        np.testing.assert_array_equal(dm.get_label(), np.array([0, 1]))\n\n    def test_arrow_table_from_np(self):\n        coldata = np.array(\n            [[1.0, 1.0, 0.0, 0.0], [2.0, 0.0, 1.0, 0.0], [3.0, 0.0, 0.0, 1.0]]\n        )\n        cols = list(map(pa.array, coldata))\n        table = pa.Table.from_arrays(cols, [\"a\", \"b\", \"c\"])\n        dm = xgb.DMatrix(table)\n        assert dm.num_row() == 4\n        assert dm.num_col() == 3\n\n    @pytest.mark.parametrize(\"DMatrixT\", [xgb.DMatrix, xgb.QuantileDMatrix])\n    def test_arrow_train(self, DMatrixT):\n        import pandas as pd\n\n        rows = 100\n        X = pd.DataFrame(\n            {\n                \"A\": np.random.randint(0, 10, size=rows),\n                \"B\": np.random.randn(rows),\n                \"C\": np.random.permutation([1, 0] * (rows // 2)),\n            }\n        )\n        y = pd.Series(np.random.randn(rows))\n\n        table = pa.Table.from_pandas(X)\n        dtrain1 = DMatrixT(table)\n        dtrain1.set_label(pa.Table.from_pandas(pd.DataFrame(y)))\n        bst1 = xgb.train({}, dtrain1, num_boost_round=10)\n        preds1 = bst1.predict(DMatrixT(X))\n\n        dtrain2 = DMatrixT(X, y)\n        bst2 = xgb.train({}, dtrain2, num_boost_round=10)\n        preds2 = bst2.predict(DMatrixT(X))\n\n        np.testing.assert_allclose(preds1, preds2)\n\n        preds3 = bst2.inplace_predict(table)\n        np.testing.assert_allclose(preds1, preds3)\n        assert bst2.feature_names == [\"A\", \"B\", \"C\"]\n        assert bst2.feature_types == [\"int\", \"float\", \"int\"]\n\n    def test_arrow_survival(self):\n        data = os.path.join(tm.data_dir(__file__), \"veterans_lung_cancer.csv\")\n        table = pc.read_csv(data)\n        y_lower_bound = table[\"Survival_label_lower_bound\"]\n        y_upper_bound = table[\"Survival_label_upper_bound\"]\n        X = table.drop([\"Survival_label_lower_bound\", \"Survival_label_upper_bound\"])\n\n        dtrain = xgb.DMatrix(\n            X, label_lower_bound=y_lower_bound, label_upper_bound=y_upper_bound\n        )\n        y_np_up = dtrain.get_float_info(\"label_upper_bound\")\n        y_np_low = dtrain.get_float_info(\"label_lower_bound\")\n        np.testing.assert_equal(y_np_up, y_upper_bound.to_pandas().values)\n        np.testing.assert_equal(y_np_low, y_lower_bound.to_pandas().values)\n\n\n@pytest.mark.skipif(tm.is_windows(), reason=\"Rabit does not run on windows\")\nclass TestArrowTableColumnSplit:\n    def test_arrow_table(self):\n        def verify_arrow_table():\n            df = pd.DataFrame(\n                [[0, 1, 2.0, 3.0], [1, 2, 3.0, 4.0]], columns=[\"a\", \"b\", \"c\", \"d\"]\n            )\n            table = pa.Table.from_pandas(df)\n            dm = xgb.DMatrix(table, data_split_mode=DataSplitMode.COL)\n            assert dm.num_row() == 2\n            assert dm.num_col() == 4 * xgb.collective.get_world_size()\n\n        tm.run_with_rabit(world_size=3, test_fn=verify_arrow_table)\n"
  },
  {
    "path": "tests/python/test_with_modin.py",
    "content": "import numpy as np\nimport pandas as pd\nimport pytest\n\nimport xgboost as xgb\nfrom xgboost import testing as tm\nfrom xgboost.compat import is_dataframe\nfrom xgboost.testing.data import run_base_margin_info\n\ntry:\n    import modin.pandas as md\nexcept ImportError:\n    pass\n\n\npytestmark = pytest.mark.skipif(**tm.no_modin())\n\n\ndef test_type_check() -> None:\n    df = md.DataFrame([[1, 2.0], [2, 3.0]], columns=[\"a\", \"b\"])\n    assert is_dataframe(df)\n    assert is_dataframe(df.a)\n\n\nclass TestModin:\n    @pytest.mark.xfail\n    def test_modin(self) -> None:\n        df = md.DataFrame([[1, 2., True], [2, 3., False]],\n                          columns=['a', 'b', 'c'])\n        dm = xgb.DMatrix(df, label=md.Series([1, 2]))\n        assert dm.feature_names == ['a', 'b', 'c']\n        assert dm.feature_types == ['int', 'float', 'i']\n        assert dm.num_row() == 2\n        assert dm.num_col() == 3\n        np.testing.assert_array_equal(dm.get_label(), np.array([1, 2]))\n\n        # overwrite feature_names and feature_types\n        dm = xgb.DMatrix(df, label=md.Series([1, 2]),\n                         feature_names=['x', 'y', 'z'],\n                         feature_types=['q', 'q', 'q'])\n        assert dm.feature_names == ['x', 'y', 'z']\n        assert dm.feature_types == ['q', 'q', 'q']\n        assert dm.num_row() == 2\n        assert dm.num_col() == 3\n\n        # incorrect dtypes\n        df = md.DataFrame([[1, 2., 'x'], [2, 3., 'y']],\n                          columns=['a', 'b', 'c'])\n        with pytest.raises(ValueError):\n            xgb.DMatrix(df)\n\n        # numeric columns\n        df = md.DataFrame([[1, 2., True], [2, 3., False]])\n        dm = xgb.DMatrix(df, label=md.Series([1, 2]))\n        assert dm.feature_names == ['0', '1', '2']\n        assert dm.feature_types == ['int', 'float', 'i']\n        assert dm.num_row() == 2\n        assert dm.num_col() == 3\n        np.testing.assert_array_equal(dm.get_label(), np.array([1, 2]))\n\n        df = md.DataFrame([[1, 2., 1], [2, 3., 1]], columns=[4, 5, 6])\n        dm = xgb.DMatrix(df, label=md.Series([1, 2]))\n        assert dm.feature_names == ['4', '5', '6']\n        assert dm.feature_types == ['int', 'float', 'int']\n        assert dm.num_row() == 2\n        assert dm.num_col() == 3\n\n        df = md.DataFrame({'A': ['X', 'Y', 'Z'], 'B': [1, 2, 3]})\n        dummies = md.get_dummies(df)\n        #    B  A_X  A_Y  A_Z\n        # 0  1    1    0    0\n        # 1  2    0    1    0\n        # 2  3    0    0    1\n        result, _, _ = xgb.data._transform_pandas_df(dummies,\n                                                     enable_categorical=False)\n        exp = np.array([[1., 1., 0., 0.],\n                        [2., 0., 1., 0.],\n                        [3., 0., 0., 1.]]).T\n        np.testing.assert_array_equal(result.columns, exp)\n        dm = xgb.DMatrix(dummies)\n        assert dm.feature_names == ['B', 'A_X', 'A_Y', 'A_Z']\n        if int(pd.__version__[0]) >= 2:\n            assert dm.feature_types == [\"int\", \"i\", \"i\", \"i\"]\n        else:\n            assert dm.feature_types == [\"int\", \"int\", \"int\", \"int\"]\n\n        assert dm.num_row() == 3\n        assert dm.num_col() == 4\n\n        df = md.DataFrame({'A=1': [1, 2, 3], 'A=2': [4, 5, 6]})\n        dm = xgb.DMatrix(df)\n        assert dm.feature_names == ['A=1', 'A=2']\n        assert dm.feature_types == ['int', 'int']\n        assert dm.num_row() == 3\n        assert dm.num_col() == 2\n\n        df_int = md.DataFrame([[1, 1.1], [2, 2.2]], columns=[9, 10])\n        dm_int = xgb.DMatrix(df_int)\n        df_range = md.DataFrame([[1, 1.1], [2, 2.2]], columns=range(9, 11, 1))\n        dm_range = xgb.DMatrix(df_range)\n        assert dm_int.feature_names == ['9', '10']  # assert not \"9 \"\n        assert dm_int.feature_names == dm_range.feature_names\n\n        # test MultiIndex as columns\n        df = md.DataFrame(\n            [\n                (1, 2, 3, 4, 5, 6),\n                (6, 5, 4, 3, 2, 1)\n            ],\n            columns=md.MultiIndex.from_tuples((\n                ('a', 1), ('a', 2), ('a', 3),\n                ('b', 1), ('b', 2), ('b', 3),\n            ))\n        )\n        dm = xgb.DMatrix(df)\n        assert dm.feature_names == ['a 1', 'a 2', 'a 3', 'b 1', 'b 2', 'b 3']\n        assert dm.feature_types == ['int', 'int', 'int', 'int', 'int', 'int']\n        assert dm.num_row() == 2\n        assert dm.num_col() == 6\n\n    def test_modin_label(self):\n        # label must be a single column\n        df = md.DataFrame({\"A\": [\"X\", \"Y\", \"Z\"], \"B\": [1, 2, 3]})\n        with pytest.raises(ValueError):\n            xgb.data._transform_pandas_df(df, False, None, None, \"label\")\n\n        # label must be supported dtype\n        df = md.DataFrame({\"A\": np.array([\"a\", \"b\", \"c\"], dtype=object)})\n        with pytest.raises(ValueError):\n            xgb.data._transform_pandas_df(df, False, None, None, \"label\")\n\n        df = md.DataFrame({\"A\": np.array([1, 2, 3], dtype=int)})\n        result, _, _ = xgb.data._transform_pandas_df(\n            df, False, None, None, \"label\"\n        )\n        np.testing.assert_array_equal(\n            np.stack(result.columns, axis=1),\n            np.array([[1.0], [2.0], [3.0]], dtype=float),\n        )\n        dm = xgb.DMatrix(np.random.randn(3, 2), label=df)\n        assert dm.num_row() == 3\n        assert dm.num_col() == 2\n\n    def test_modin_weight(self):\n        kRows = 32\n        kCols = 8\n\n        X = np.random.randn(kRows, kCols)\n        y = np.random.randn(kRows)\n        w = np.random.uniform(size=kRows).astype(np.float32)\n        w_pd = md.DataFrame(w)\n        data = xgb.DMatrix(X, y, w_pd)\n\n        assert data.num_row() == kRows\n        assert data.num_col() == kCols\n\n        np.testing.assert_array_equal(data.get_weight(), w)\n\n    def test_base_margin(self):\n        run_base_margin_info(md.DataFrame, xgb.DMatrix, \"cpu\")\n"
  },
  {
    "path": "tests/python/test_with_pandas.py",
    "content": "from typing import Type\n\nimport numpy as np\nimport pytest\nimport xgboost as xgb\nfrom xgboost import testing as tm\nfrom xgboost.compat import is_dataframe\nfrom xgboost.core import DataSplitMode\nfrom xgboost.testing.data import pd_arrow_dtypes, pd_dtypes, run_base_margin_info\nfrom xgboost.testing.utils import predictor_equal\n\ntry:\n    import pandas as pd\nexcept ImportError:\n    pass\n\n\npytestmark = pytest.mark.skipif(**tm.no_pandas())\n\n\ndpath = \"demo/data/\"\nrng = np.random.RandomState(1994)\n\n\ndef test_type_check() -> None:\n    df = pd.DataFrame([[1, 2.0], [2, 3.0]], columns=[\"a\", \"b\"])\n    assert is_dataframe(df)\n    assert is_dataframe(df.a)\n\n\nclass TestPandas:\n    def test_pandas(self, data_split_mode=DataSplitMode.ROW):\n        world_size = xgb.collective.get_world_size()\n        df = pd.DataFrame([[1, 2.0, True], [2, 3.0, False]], columns=[\"a\", \"b\", \"c\"])\n        assert is_dataframe(df)\n        assert is_dataframe(df.a)\n        dm = xgb.DMatrix(df, label=pd.Series([1, 2]), data_split_mode=data_split_mode)\n        assert dm.num_row() == 2\n        if data_split_mode == DataSplitMode.ROW:\n            assert dm.feature_names == [\"a\", \"b\", \"c\"]\n            assert dm.feature_types == [\"int\", \"float\", \"i\"]\n            assert dm.num_col() == 3\n        else:\n            assert dm.feature_names == tm.column_split_feature_names(\n                [\"a\", \"b\", \"c\"], world_size\n            )\n            assert dm.feature_types == [\"int\", \"float\", \"i\"] * world_size\n            assert dm.num_col() == 3 * world_size\n        np.testing.assert_array_equal(dm.get_label(), np.array([1, 2]))\n\n        # overwrite feature_names and feature_types\n        dm = xgb.DMatrix(\n            df,\n            label=pd.Series([1, 2]),\n            feature_names=[\"x\", \"y\", \"z\"],\n            feature_types=[\"q\", \"q\", \"q\"],\n            data_split_mode=data_split_mode,\n        )\n        assert dm.num_row() == 2\n        if data_split_mode == DataSplitMode.ROW:\n            assert dm.feature_names == [\"x\", \"y\", \"z\"]\n            assert dm.feature_types == [\"q\", \"q\", \"q\"]\n            assert dm.num_col() == 3\n        else:\n            assert dm.feature_names == tm.column_split_feature_names(\n                [\"x\", \"y\", \"z\"], world_size\n            )\n            assert dm.feature_types == [\"q\", \"q\", \"q\"] * world_size\n            assert dm.num_col() == 3 * world_size\n\n        # incorrect dtypes\n        df = pd.DataFrame([[1, 2.0, \"x\"], [2, 3.0, \"y\"]], columns=[\"a\", \"b\", \"c\"])\n        with pytest.raises(ValueError):\n            xgb.DMatrix(df, data_split_mode=data_split_mode)\n\n        # numeric columns\n        df = pd.DataFrame([[1, 2.0, True], [2, 3.0, False]])\n        dm = xgb.DMatrix(df, label=pd.Series([1, 2]), data_split_mode=data_split_mode)\n        assert dm.num_row() == 2\n        if data_split_mode == DataSplitMode.ROW:\n            assert dm.feature_names == [\"0\", \"1\", \"2\"]\n            assert dm.feature_types == [\"int\", \"float\", \"i\"]\n            assert dm.num_col() == 3\n        else:\n            assert dm.feature_names == tm.column_split_feature_names(\n                [\"0\", \"1\", \"2\"], world_size\n            )\n            assert dm.feature_types == [\"int\", \"float\", \"i\"] * world_size\n            assert dm.num_col() == 3 * world_size\n        np.testing.assert_array_equal(dm.get_label(), np.array([1, 2]))\n\n        df = pd.DataFrame([[1, 2.0, 1], [2, 3.0, 1]], columns=[4, 5, 6])\n        dm = xgb.DMatrix(df, label=pd.Series([1, 2]), data_split_mode=data_split_mode)\n        assert dm.num_row() == 2\n        if data_split_mode == DataSplitMode.ROW:\n            assert dm.feature_names == [\"4\", \"5\", \"6\"]\n            assert dm.feature_types == [\"int\", \"float\", \"int\"]\n            assert dm.num_col() == 3\n        else:\n            assert dm.feature_names == tm.column_split_feature_names(\n                [\"4\", \"5\", \"6\"], world_size\n            )\n            assert dm.feature_types == [\"int\", \"float\", \"int\"] * world_size\n            assert dm.num_col() == 3 * world_size\n\n        df = pd.DataFrame({\"A\": [\"X\", \"Y\", \"Z\"], \"B\": [1, 2, 3]})\n        dummies = pd.get_dummies(df)\n        #    B  A_X  A_Y  A_Z\n        # 0  1    1    0    0\n        # 1  2    0    1    0\n        # 2  3    0    0    1\n        result, _, _ = xgb.data._transform_pandas_df(dummies, enable_categorical=False)\n        exp = np.array(\n            [[1.0, 1.0, 0.0, 0.0], [2.0, 0.0, 1.0, 0.0], [3.0, 0.0, 0.0, 1.0]]\n        ).T\n        np.testing.assert_array_equal(result.columns, exp)\n        dm = xgb.DMatrix(dummies, data_split_mode=data_split_mode)\n        assert dm.num_row() == 3\n        if data_split_mode == DataSplitMode.ROW:\n            assert dm.feature_names == [\"B\", \"A_X\", \"A_Y\", \"A_Z\"]\n            if int(pd.__version__[0]) >= 2:\n                assert dm.feature_types == [\"int\", \"i\", \"i\", \"i\"]\n            else:\n                assert dm.feature_types == [\"int\", \"int\", \"int\", \"int\"]\n            assert dm.num_col() == 4\n        else:\n            assert dm.feature_names == tm.column_split_feature_names(\n                [\"B\", \"A_X\", \"A_Y\", \"A_Z\"], world_size\n            )\n            if int(pd.__version__[0]) >= 2:\n                assert dm.feature_types == [\"int\", \"i\", \"i\", \"i\"] * world_size\n            else:\n                assert dm.feature_types == [\"int\", \"int\", \"int\", \"int\"] * world_size\n            assert dm.num_col() == 4 * world_size\n\n        df = pd.DataFrame({\"A=1\": [1, 2, 3], \"A=2\": [4, 5, 6]})\n        dm = xgb.DMatrix(df, data_split_mode=data_split_mode)\n        assert dm.num_row() == 3\n        if data_split_mode == DataSplitMode.ROW:\n            assert dm.feature_names == [\"A=1\", \"A=2\"]\n            assert dm.feature_types == [\"int\", \"int\"]\n            assert dm.num_col() == 2\n        else:\n            assert dm.feature_names == tm.column_split_feature_names(\n                [\"A=1\", \"A=2\"], world_size\n            )\n            assert dm.feature_types == [\"int\", \"int\"] * world_size\n            assert dm.num_col() == 2 * world_size\n\n        df_int = pd.DataFrame([[1, 1.1], [2, 2.2]], columns=[9, 10])\n        dm_int = xgb.DMatrix(df_int, data_split_mode=data_split_mode)\n        df_range = pd.DataFrame([[1, 1.1], [2, 2.2]], columns=range(9, 11, 1))\n        dm_range = xgb.DMatrix(df_range, data_split_mode=data_split_mode)\n        if data_split_mode == DataSplitMode.ROW:\n            assert dm_int.feature_names == [\"9\", \"10\"]  # assert not \"9 \"\n        else:\n            assert dm_int.feature_names == tm.column_split_feature_names(\n                [\"9\", \"10\"], world_size\n            )\n        assert dm_int.feature_names == dm_range.feature_names\n\n        # test MultiIndex as columns\n        df = pd.DataFrame(\n            [(1, 2, 3, 4, 5, 6), (6, 5, 4, 3, 2, 1)],\n            columns=pd.MultiIndex.from_tuples(\n                (\n                    (\"a\", 1),\n                    (\"a\", 2),\n                    (\"a\", 3),\n                    (\"b\", 1),\n                    (\"b\", 2),\n                    (\"b\", 3),\n                )\n            ),\n        )\n        dm = xgb.DMatrix(df, data_split_mode=data_split_mode)\n        assert dm.num_row() == 2\n        if data_split_mode == DataSplitMode.ROW:\n            assert dm.feature_names == [\"a 1\", \"a 2\", \"a 3\", \"b 1\", \"b 2\", \"b 3\"]\n            assert dm.feature_types == [\"int\", \"int\", \"int\", \"int\", \"int\", \"int\"]\n            assert dm.num_col() == 6\n        else:\n            assert dm.feature_names == tm.column_split_feature_names(\n                [\"a 1\", \"a 2\", \"a 3\", \"b 1\", \"b 2\", \"b 3\"], world_size\n            )\n            assert (\n                dm.feature_types\n                == [\"int\", \"int\", \"int\", \"int\", \"int\", \"int\"] * world_size\n            )\n            assert dm.num_col() == 6 * world_size\n\n        # test Index as columns\n        df = pd.DataFrame([[1, 1.1], [2, 2.2]], columns=pd.Index([1, 2]))\n        Xy = xgb.DMatrix(df, data_split_mode=data_split_mode)\n        if data_split_mode == DataSplitMode.ROW:\n            np.testing.assert_equal(np.array(Xy.feature_names), np.array([\"1\", \"2\"]))\n        else:\n            np.testing.assert_equal(\n                np.array(Xy.feature_names),\n                np.array(tm.column_split_feature_names([\"1\", \"2\"], world_size)),\n            )\n\n        # test pandas series\n        data_series = pd.Series([1, 2, 3, 4, 5])\n        dm = xgb.DMatrix(data_series, data_split_mode=data_split_mode)\n        assert dm.num_row() == 5\n        if data_split_mode == DataSplitMode.ROW:\n            assert dm.num_col() == 1\n        else:\n            assert dm.num_col() == 1 * world_size\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    def test_multi_target(self) -> None:\n        from sklearn.datasets import make_regression\n\n        X, y = make_regression(n_samples=1024, n_features=4, n_targets=3)\n        ydf = pd.DataFrame({i: y[:, i] for i in range(y.shape[1])})\n\n        Xy = xgb.DMatrix(X, ydf)\n        assert Xy.num_row() == y.shape[0]\n        assert Xy.get_label().size == y.shape[0] * y.shape[1]\n        Xy = xgb.QuantileDMatrix(X, ydf)\n        assert Xy.num_row() == y.shape[0]\n        assert Xy.get_label().size == y.shape[0] * y.shape[1]\n\n    def test_slice(self):\n        rng = np.random.RandomState(1994)\n        rows = 100\n        X = rng.randint(3, 7, size=rows)\n        X = pd.DataFrame({\"f0\": X})\n        y = rng.randn(rows)\n        ridxs = [1, 2, 3, 4, 5, 6]\n        m = xgb.DMatrix(X, y)\n        sliced = m.slice(ridxs)\n\n        assert m.feature_types == sliced.feature_types\n\n    def test_pandas_categorical(self, data_split_mode=DataSplitMode.ROW):\n        world_size = xgb.collective.get_world_size()\n        rng = np.random.RandomState(1994)\n        rows = 100\n        X = rng.randint(3, 7, size=rows)\n        X = pd.Series(X, dtype=\"category\")\n        X = pd.DataFrame({\"f0\": X})\n        y = rng.randn(rows)\n        m = xgb.DMatrix(X, y, data_split_mode=data_split_mode)\n        assert m.feature_types[0] == \"c\"\n\n        X_0 = [\"f\", \"o\", \"o\"]\n        X_1 = [4, 3, 2]\n        X = pd.DataFrame({\"feat_0\": X_0, \"feat_1\": X_1})\n        X[\"feat_0\"] = X[\"feat_0\"].astype(\"category\")\n        transformed, _, feature_types = xgb.data._transform_pandas_df(\n            X, enable_categorical=True\n        )\n\n        assert len(transformed.aitfs[0]) == 2\n\n        # test missing value\n        X = pd.DataFrame({\"f0\": [\"a\", \"b\", np.nan]})\n        X[\"f0\"] = X[\"f0\"].astype(\"category\")\n        arr, _, _ = xgb.data._transform_pandas_df(X, enable_categorical=True)\n        for c in arr.columns:\n            assert not np.any(c == -1.0)\n\n        X = X[\"f0\"]\n        y = y[: X.shape[0]]\n\n        Xy = xgb.DMatrix(X, y, data_split_mode=data_split_mode)\n        assert Xy.num_row() == 3\n        if data_split_mode == DataSplitMode.ROW:\n            assert Xy.num_col() == 1\n        else:\n            assert Xy.num_col() == 1 * world_size\n\n    def test_pandas_sparse(self):\n        import pandas as pd\n\n        rows = 100\n        X = pd.DataFrame(\n            {\n                \"A\": pd.arrays.SparseArray(np.random.randint(0, 10, size=rows)),\n                \"B\": pd.arrays.SparseArray(np.random.randn(rows)),\n                \"C\": pd.arrays.SparseArray(\n                    np.random.permutation([True, False] * (rows // 2))\n                ),\n            }\n        )\n        y = pd.Series(pd.arrays.SparseArray(np.random.randn(rows)))\n        with pytest.warns(UserWarning, match=\"Sparse arrays from pandas\"):\n            dtrain = xgb.DMatrix(X, y)\n        booster = xgb.train({}, dtrain, num_boost_round=4)\n        with pytest.warns(UserWarning, match=\"Sparse arrays from pandas\"):\n            predt_sparse = booster.predict(xgb.DMatrix(X))\n            predt_dense = booster.predict(xgb.DMatrix(X.sparse.to_dense()))\n        np.testing.assert_allclose(predt_sparse, predt_dense)\n\n    def test_pandas_label(\n        self, data_split_mode: DataSplitMode = DataSplitMode.ROW\n    ) -> None:\n        world_size = xgb.collective.get_world_size()\n        # label must be a single column\n        df = pd.DataFrame({\"A\": [\"X\", \"Y\", \"Z\"], \"B\": [1, 2, 3]})\n        with pytest.raises(ValueError):\n            xgb.data._transform_pandas_df(df, False, None, None, \"label\")\n\n        # label must be supported dtype\n        df = pd.DataFrame({\"A\": np.array([\"a\", \"b\", \"c\"], dtype=object)})\n        with pytest.raises(ValueError):\n            xgb.data._transform_pandas_df(df, False, None, None, \"label\")\n\n        df = pd.DataFrame({\"A\": np.array([1, 2, 3], dtype=int)})\n        result, _, _ = xgb.data._transform_pandas_df(df, False, None, None, \"label\")\n        np.testing.assert_array_equal(\n            np.stack(result.columns, axis=1),\n            np.array([[1.0], [2.0], [3.0]], dtype=float),\n        )\n        dm = xgb.DMatrix(\n            np.random.randn(3, 2), label=df, data_split_mode=data_split_mode\n        )\n        assert dm.num_row() == 3\n        if data_split_mode == DataSplitMode.ROW:\n            assert dm.num_col() == 2\n        else:\n            assert dm.num_col() == 2 * world_size\n\n    def test_pandas_weight(self, data_split_mode=DataSplitMode.ROW):\n        world_size = xgb.collective.get_world_size()\n        kRows = 32\n        kCols = 8\n\n        X = np.random.randn(kRows, kCols)\n        y = np.random.randn(kRows)\n        w = np.random.uniform(size=kRows).astype(np.float32)\n        w_pd = pd.DataFrame(w)\n        data = xgb.DMatrix(X, y, weight=w_pd, data_split_mode=data_split_mode)\n\n        assert data.num_row() == kRows\n        if data_split_mode == DataSplitMode.ROW:\n            assert data.num_col() == kCols\n        else:\n            assert data.num_col() == kCols * world_size\n        np.testing.assert_array_equal(data.get_weight(), w)\n\n    def test_base_margin(self):\n        run_base_margin_info(pd.DataFrame, xgb.DMatrix, \"cpu\")\n\n    def test_cv_as_pandas(self):\n        dm, _ = tm.load_agaricus(__file__)\n        params = {\n            \"max_depth\": 2,\n            \"eta\": 1,\n            \"objective\": \"binary:logistic\",\n            \"eval_metric\": \"error\",\n        }\n\n        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10)\n        assert isinstance(cv, pd.DataFrame)\n        exp = pd.Index(\n            [\"test-error-mean\", \"test-error-std\", \"train-error-mean\", \"train-error-std\"]\n        )\n        assert len(cv.columns.intersection(exp)) == 4\n\n        # show progress log (result is the same as above)\n        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, verbose_eval=True)\n        assert isinstance(cv, pd.DataFrame)\n        exp = pd.Index(\n            [\"test-error-mean\", \"test-error-std\", \"train-error-mean\", \"train-error-std\"]\n        )\n        assert len(cv.columns.intersection(exp)) == 4\n        cv = xgb.cv(\n            params, dm, num_boost_round=10, nfold=10, verbose_eval=True, show_stdv=False\n        )\n        assert isinstance(cv, pd.DataFrame)\n        exp = pd.Index(\n            [\"test-error-mean\", \"test-error-std\", \"train-error-mean\", \"train-error-std\"]\n        )\n        assert len(cv.columns.intersection(exp)) == 4\n\n        params = {\n            \"max_depth\": 2,\n            \"eta\": 1,\n            \"objective\": \"binary:logistic\",\n            \"eval_metric\": \"auc\",\n        }\n        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True)\n        assert \"eval_metric\" in params\n        assert \"auc\" in cv.columns[0]\n\n        params = {\n            \"max_depth\": 2,\n            \"eta\": 1,\n            \"objective\": \"binary:logistic\",\n            \"eval_metric\": [\"auc\"],\n        }\n        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True)\n        assert \"eval_metric\" in params\n        assert \"auc\" in cv.columns[0]\n\n        params = {\n            \"max_depth\": 2,\n            \"eta\": 1,\n            \"objective\": \"binary:logistic\",\n            \"eval_metric\": [\"auc\"],\n        }\n        cv = xgb.cv(\n            params,\n            dm,\n            num_boost_round=10,\n            nfold=10,\n            as_pandas=True,\n            early_stopping_rounds=1,\n        )\n        assert \"eval_metric\" in params\n        assert \"auc\" in cv.columns[0]\n        assert cv.shape[0] < 10\n\n        params = {\n            \"max_depth\": 2,\n            \"eta\": 1,\n            \"objective\": \"binary:logistic\",\n        }\n        cv = xgb.cv(\n            params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics=\"auc\"\n        )\n        assert \"auc\" in cv.columns[0]\n\n        params = {\n            \"max_depth\": 2,\n            \"eta\": 1,\n            \"objective\": \"binary:logistic\",\n        }\n        cv = xgb.cv(\n            params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics=[\"auc\"]\n        )\n        assert \"auc\" in cv.columns[0]\n\n        params = {\n            \"max_depth\": 2,\n            \"eta\": 1,\n            \"objective\": \"binary:logistic\",\n            \"eval_metric\": [\"auc\"],\n        }\n        cv = xgb.cv(\n            params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics=\"error\"\n        )\n        assert \"eval_metric\" in params\n        assert \"auc\" not in cv.columns[0]\n        assert \"error\" in cv.columns[0]\n\n        cv = xgb.cv(\n            params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics=[\"error\"]\n        )\n        assert \"eval_metric\" in params\n        assert \"auc\" not in cv.columns[0]\n        assert \"error\" in cv.columns[0]\n\n        params = list(params.items())\n        cv = xgb.cv(\n            params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics=[\"error\"]\n        )\n        assert isinstance(params, list)\n        assert \"auc\" not in cv.columns[0]\n        assert \"error\" in cv.columns[0]\n\n    @pytest.mark.parametrize(\"DMatrixT\", [xgb.DMatrix, xgb.QuantileDMatrix])\n    def test_nullable_type(self, DMatrixT) -> None:\n        from xgboost.data import is_pd_cat_dtype\n\n        for orig, df in pd_dtypes():\n            if hasattr(df.dtypes, \"__iter__\"):\n                enable_categorical = any(is_pd_cat_dtype(dtype) for dtype in df.dtypes)\n            else:\n                # series\n                enable_categorical = is_pd_cat_dtype(df.dtype)\n\n            f0_orig = orig[orig.columns[0]] if isinstance(orig, pd.DataFrame) else orig\n            f0 = df[df.columns[0]] if isinstance(df, pd.DataFrame) else df\n            y_orig = f0_orig.astype(pd.Float32Dtype()).fillna(0)\n            y = f0.astype(pd.Float32Dtype()).fillna(0)\n\n            m_orig = DMatrixT(orig, enable_categorical=enable_categorical, label=y_orig)\n            # extension types\n            copy = df.copy()\n            m_etype = DMatrixT(df, enable_categorical=enable_categorical, label=y)\n            # no mutation\n            assert df.equals(copy)\n            # different from pd.BooleanDtype(), None is converted to False with bool\n            if hasattr(orig.dtypes, \"__iter__\") and any(\n                dtype == \"bool\" for dtype in orig.dtypes\n            ):\n                assert not predictor_equal(m_orig, m_etype)\n            else:\n                assert predictor_equal(m_orig, m_etype)\n\n            np.testing.assert_allclose(m_orig.get_label(), m_etype.get_label())\n            np.testing.assert_allclose(m_etype.get_label(), y.values.astype(np.float32))\n\n            if isinstance(df, pd.DataFrame):\n                f0 = df[\"f0\"]\n                with pytest.raises(ValueError, match=\"Label contains NaN\"):\n                    xgb.DMatrix(df, f0, enable_categorical=enable_categorical)\n\n    @pytest.mark.skipif(**tm.no_arrow())\n    @pytest.mark.parametrize(\"DMatrixT\", [xgb.DMatrix, xgb.QuantileDMatrix])\n    def test_pyarrow_type(self, DMatrixT: Type[xgb.DMatrix]) -> None:\n        for orig, df in pd_arrow_dtypes():\n            f0_orig: pd.Series = orig[\"f0\"]\n            f0 = df[\"f0\"]\n\n            if f0.dtype.name.startswith(\"bool\"):\n                y = None\n                y_orig = None\n            else:\n                y_orig = f0_orig.fillna(0, inplace=False)\n                y = f0.fillna(0, inplace=False)\n\n            m_orig = DMatrixT(orig, label=y_orig)\n            m_etype = DMatrixT(df, label=y)\n\n            assert predictor_equal(m_orig, m_etype)\n            if y is not None:\n                np.testing.assert_allclose(m_orig.get_label(), m_etype.get_label())\n                np.testing.assert_allclose(m_etype.get_label(), y.values)\n\n    @pytest.mark.parametrize(\"DMatrixT\", [xgb.DMatrix, xgb.QuantileDMatrix])\n    def test_mixed_type(self, DMatrixT: Type[xgb.DMatrix]) -> None:\n        f0 = np.arange(0, 4)\n        f1 = pd.Series(f0, dtype=\"int64[pyarrow]\")\n        f2l = list(f0)\n        f2l[0] = pd.NA\n        f2 = pd.Series(f2l, dtype=pd.Int64Dtype())\n\n        df = pd.DataFrame({\"f0\": f0})\n        df[\"f2\"] = f2\n\n        m = DMatrixT(df)\n        assert m.num_col() == df.shape[1]\n\n        df[\"f1\"] = f1\n        m = DMatrixT(df)\n        assert m.num_col() == df.shape[1]\n        assert m.num_row() == df.shape[0]\n        assert m.num_nonmissing() == df.size - 1\n        assert m.feature_names == list(map(str, df.columns))\n        assert m.feature_types == [\"int\"] * df.shape[1]\n\n        y = f0\n        m.set_info(label=y)\n        booster = xgb.train({}, m)\n        p0 = booster.inplace_predict(df)\n        p1 = booster.predict(m)\n        np.testing.assert_allclose(p0, p1)\n\n    @pytest.mark.skipif(tm.is_windows(), reason=\"Rabit does not run on windows\")\n    def test_pandas_column_split(self):\n        tm.run_with_rabit(\n            world_size=3, test_fn=self.test_pandas, data_split_mode=DataSplitMode.COL\n        )\n\n    @pytest.mark.skipif(tm.is_windows(), reason=\"Rabit does not run on windows\")\n    def test_pandas_categorical_column_split(self):\n        tm.run_with_rabit(\n            world_size=3,\n            test_fn=self.test_pandas_categorical,\n            data_split_mode=DataSplitMode.COL,\n        )\n\n    @pytest.mark.skipif(tm.is_windows(), reason=\"Rabit does not run on windows\")\n    def test_pandas_sparse_column_split(self):\n        rows = 100\n        X = pd.DataFrame(\n            {\n                \"A\": pd.arrays.SparseArray(np.random.randint(0, 10, size=rows)),\n                \"B\": pd.arrays.SparseArray(np.random.randn(rows)),\n                \"C\": pd.arrays.SparseArray(\n                    np.random.permutation([True, False] * (rows // 2))\n                ),\n            }\n        )\n        y = pd.Series(pd.arrays.SparseArray(np.random.randn(rows)))\n\n        def verify_pandas_sparse():\n            with pytest.warns(UserWarning, match=\"Sparse arrays from pandas\"):\n                dtrain = xgb.DMatrix(X, y, data_split_mode=DataSplitMode.COL)\n            booster = xgb.train({}, dtrain, num_boost_round=4)\n            with pytest.warns(UserWarning, match=\"Sparse arrays from pandas\"):\n                predt_sparse = booster.predict(\n                    xgb.DMatrix(X, data_split_mode=DataSplitMode.COL)\n                )\n                predt_dense = booster.predict(\n                    xgb.DMatrix(X.sparse.to_dense(), data_split_mode=DataSplitMode.COL)\n                )\n            np.testing.assert_allclose(predt_sparse, predt_dense)\n\n        tm.run_with_rabit(world_size=3, test_fn=verify_pandas_sparse)\n\n    @pytest.mark.skipif(tm.is_windows(), reason=\"Rabit does not run on windows\")\n    def test_pandas_label_column_split(self):\n        tm.run_with_rabit(\n            world_size=3,\n            test_fn=self.test_pandas_label,\n            data_split_mode=DataSplitMode.COL,\n        )\n\n    @pytest.mark.skipif(tm.is_windows(), reason=\"Rabit does not run on windows\")\n    def test_pandas_weight_column_split(self):\n        tm.run_with_rabit(\n            world_size=3,\n            test_fn=self.test_pandas_weight,\n            data_split_mode=DataSplitMode.COL,\n        )\n"
  },
  {
    "path": "tests/python/test_with_polars.py",
    "content": "\"\"\"Copyright 2024, XGBoost contributors\"\"\"\n\nimport json\nfrom pathlib import Path\nfrom typing import Type, Union\n\nimport numpy as np\nimport pytest\nimport xgboost as xgb\nfrom xgboost.compat import is_dataframe\n\npl = pytest.importorskip(\"polars\")\n\n\ndef test_type_check() -> None:\n    df = pl.DataFrame({\"a\": [1, 2, 3], \"b\": [3, 4, 5]})\n    assert is_dataframe(df)\n    assert is_dataframe(df[\"a\"])\n\n\n@pytest.mark.parametrize(\"DMatrixT\", [xgb.DMatrix, xgb.QuantileDMatrix])\ndef test_polars_basic(\n    DMatrixT: Union[Type[xgb.DMatrix], Type[xgb.QuantileDMatrix]],\n) -> None:\n    df = pl.DataFrame({\"a\": [1, 2, 3], \"b\": [3, 4, 5]})\n    Xy = DMatrixT(df)\n    assert Xy.num_row() == df.shape[0]\n    assert Xy.num_col() == df.shape[1]\n    assert Xy.num_nonmissing() == np.prod(df.shape)\n\n    # feature info\n    assert Xy.feature_names == df.columns\n    assert Xy.feature_types == [\"int\", \"int\"]\n\n    res = Xy.get_data().toarray()\n    res1 = df.to_numpy()\n\n    if isinstance(Xy, xgb.QuantileDMatrix):\n        # skip min values in the cut.\n        np.testing.assert_allclose(res[1:, :], res1[1:, :])\n    else:\n        np.testing.assert_allclose(res, res1)\n\n    # boolean\n    df = pl.DataFrame({\"a\": [True, False, False], \"b\": [False, False, True]})\n    Xy = DMatrixT(df)\n    np.testing.assert_allclose(\n        Xy.get_data().data, np.array([1, 0, 0, 0, 0, 1]), atol=1e-5\n    )\n\n\ndef test_polars_missing() -> None:\n    df = pl.DataFrame({\"a\": [1, None, 3], \"b\": [3, 4, None]})\n    Xy = xgb.DMatrix(df)\n    assert Xy.num_row() == df.shape[0]\n    assert Xy.num_col() == df.shape[1]\n    assert Xy.num_nonmissing() == 4\n\n    np.testing.assert_allclose(Xy.get_data().data, np.array([1, 3, 4, 3]))\n    np.testing.assert_allclose(Xy.get_data().indptr, np.array([0, 2, 3, 4]))\n    np.testing.assert_allclose(Xy.get_data().indices, np.array([0, 1, 1, 0]))\n\n    ser = pl.Series(\"y\", np.arange(0, df.shape[0]))\n    Xy.set_info(label=ser)\n    booster = xgb.train({}, Xy, num_boost_round=1)\n    predt0 = booster.inplace_predict(df)\n    predt1 = booster.predict(Xy)\n    np.testing.assert_allclose(predt0, predt1)\n\n\ndef test_classififer(tmp_path: Path) -> None:\n    from sklearn.datasets import make_classification, make_multilabel_classification\n\n    X, y = make_classification(random_state=2024)\n    X_df = pl.DataFrame(X)\n    y_ser = pl.Series(y)\n\n    clf0 = xgb.XGBClassifier()\n    clf0.fit(X_df, y_ser)\n\n    clf1 = xgb.XGBClassifier()\n    clf1.fit(X, y)\n\n    path0 = tmp_path / \"clf0.json\"\n    clf0.save_model(path0)\n\n    path1 = tmp_path / \"clf1.json\"\n    clf1.save_model(path1)\n\n    with open(path0, \"r\") as fd:\n        model0 = json.load(fd)\n    with open(path1, \"r\") as fd:\n        model1 = json.load(fd)\n\n    model0[\"learner\"][\"feature_names\"] = []\n    model0[\"learner\"][\"feature_types\"] = []\n    assert model0 == model1\n\n    predt0 = clf0.predict(X)\n    predt1 = clf1.predict(X)\n\n    np.testing.assert_allclose(predt0, predt1)\n\n    assert (clf0.feature_names_in_ == X_df.columns).all()\n    assert clf0.n_features_in_ == X_df.shape[1]\n\n    X, y = make_multilabel_classification(128)\n    X_df = pl.DataFrame(X)\n    y_df = pl.DataFrame(y)\n    clf = xgb.XGBClassifier(n_estimators=1)\n    clf.fit(X_df, y_df)\n    assert clf.n_classes_ == 2\n\n    X, y = make_classification(n_classes=3, n_informative=5)\n    X_df = pl.DataFrame(X)\n    y_ser = pl.Series(y)\n    clf = xgb.XGBClassifier(n_estimators=1)\n    clf.fit(X_df, y_ser)\n    assert clf.n_classes_ == 3\n\n\ndef test_regressor() -> None:\n    from sklearn.datasets import make_regression\n\n    X, y = make_regression(n_targets=3)\n    X_df = pl.DataFrame(X)\n    y_df = pl.DataFrame(y)\n    assert y_df.shape[1] == 3\n\n    reg0 = xgb.XGBRegressor()\n    reg0.fit(X_df, y_df)\n\n    reg1 = xgb.XGBRegressor()\n    reg1.fit(X, y)\n\n    predt0 = reg0.predict(X)\n    predt1 = reg1.predict(X)\n\n    np.testing.assert_allclose(predt0, predt1)\n\n\ndef test_categorical() -> None:\n    import polars as pl\n\n    cats = [\"aa\", \"cc\", \"bb\", \"ee\", \"ee\"]\n    df = pl.DataFrame(\n        {\"f0\": [1, 3, 2, 4, 4], \"f1\": cats},\n        schema=[(\"f0\", pl.Int64()), (\"f1\", pl.Categorical(ordering=\"lexical\"))],\n    )\n\n    data = xgb.DMatrix(df)\n    categories = data.get_categories(export_to_arrow=True)\n    assert dict(categories.to_arrow())[\"f0\"] is None\n    f1 = dict(categories.to_arrow())[\"f1\"]\n    assert f1 is not None\n    assert f1.to_pylist() == cats[:4]\n\n    df = pl.DataFrame(\n        {\"f0\": [1, 3, 2, 4, 4], \"f1\": cats},\n        schema=[(\"f0\", pl.Int64()), (\"f1\", pl.Enum(cats[:4]))],\n    )\n    data = xgb.DMatrix(df)\n    categories = data.get_categories(export_to_arrow=True)\n    assert dict(categories.to_arrow())[\"f0\"] is None\n    f1 = dict(categories.to_arrow())[\"f1\"]\n    assert f1 is not None\n    assert f1.to_pylist() == cats[:4]\n\n    rng = np.random.default_rng(2025)\n    y = rng.normal(size=(df.shape[0]))\n    Xy = xgb.QuantileDMatrix(df, y)\n    booster = xgb.train({}, Xy, num_boost_round=8)\n    predt_0 = booster.inplace_predict(df)\n\n    df_rev = pl.DataFrame(\n        {\"f0\": [1, 3, 2, 4, 4], \"f1\": cats},\n        schema=[(\"f0\", pl.Int64()), (\"f1\", pl.Enum(cats[:4][::-1]))],\n    )\n    predt_1 = booster.inplace_predict(df_rev)\n    assert (\n        df[\"f1\"].cat.get_categories().to_list()\n        != df_rev[\"f1\"].cat.get_categories().to_list()\n    )\n    np.testing.assert_allclose(predt_0, predt_1)\n"
  },
  {
    "path": "tests/python/test_with_scipy.py",
    "content": "import itertools\nimport warnings\nfrom typing import Type\n\nimport numpy as np\nimport pytest\nimport scipy.sparse\n\nimport xgboost as xgb\nfrom xgboost.testing.utils import predictor_equal\n\n\n@pytest.mark.filterwarnings(\"error\")\n@pytest.mark.parametrize(\n    \"DMatrixT,CSR\",\n    [\n        (m, n)\n        for m, n in itertools.product(\n            (xgb.DMatrix, xgb.QuantileDMatrix),\n            (scipy.sparse.csr_matrix, scipy.sparse.csr_array),\n        )\n    ],\n)\ndef test_csr(DMatrixT: Type[xgb.DMatrix], CSR: Type) -> None:\n    with warnings.catch_warnings():\n        indptr = np.array([0, 2, 3, 6])\n        indices = np.array([0, 2, 2, 0, 1, 2])\n        data = np.array([1, 2, 3, 4, 5, 6])\n        X = CSR((data, indices, indptr), shape=(3, 3))\n        dtrain = DMatrixT(X)\n        assert dtrain.num_row() == 3\n        assert dtrain.num_col() == 3\n        assert dtrain.num_nonmissing() == data.size\n\n\n@pytest.mark.filterwarnings(\"error\")\n@pytest.mark.parametrize(\n    \"DMatrixT,CSC\",\n    [\n        (m, n)\n        for m, n in itertools.product(\n            (xgb.DMatrix, xgb.QuantileDMatrix),\n            (scipy.sparse.csc_matrix, scipy.sparse.csc_array),\n        )\n    ],\n)\ndef test_csc(DMatrixT: Type[xgb.DMatrix], CSC: Type) -> None:\n    with warnings.catch_warnings():\n        row = np.array([0, 2, 2, 0, 1, 2])\n        col = np.array([0, 0, 1, 2, 2, 2])\n        data = np.array([1, 2, 3, 4, 5, 6])\n        X = CSC((data, (row, col)), shape=(3, 3))\n        dtrain = DMatrixT(X)\n        assert dtrain.num_row() == 3\n        assert dtrain.num_col() == 3\n        assert dtrain.num_nonmissing() == data.size\n\n        indptr = np.array([0, 3, 5])\n        data = np.array([0, 1, 2, 3, 4])\n        row_idx = np.array([0, 1, 2, 0, 2])\n        X = CSC((data, row_idx, indptr), shape=(3, 2))\n        assert predictor_equal(DMatrixT(X.tocsr()), DMatrixT(X))\n\n\n@pytest.mark.filterwarnings(\"error\")\n@pytest.mark.parametrize(\n    \"DMatrixT,COO\",\n    [\n        (m, n)\n        for m, n in itertools.product(\n            (xgb.DMatrix, xgb.QuantileDMatrix),\n            (scipy.sparse.coo_matrix, scipy.sparse.coo_array),\n        )\n    ],\n)\ndef test_coo(DMatrixT: Type[xgb.DMatrix], COO: Type) -> None:\n    with warnings.catch_warnings():\n        row = np.array([0, 2, 2, 0, 1, 2])\n        col = np.array([0, 0, 1, 2, 2, 2])\n        data = np.array([1, 2, 3, 4, 5, 6])\n        X = COO((data, (row, col)), shape=(3, 3))\n        dtrain = DMatrixT(X)\n        assert dtrain.num_row() == 3\n        assert dtrain.num_col() == 3\n        assert dtrain.num_nonmissing() == data.size\n\n        assert predictor_equal(DMatrixT(X.tocsr()), DMatrixT(X))\n"
  },
  {
    "path": "tests/python/test_with_shap.py",
    "content": "import numpy as np\nimport pytest\n\nimport xgboost as xgb\nfrom xgboost.testing.data import get_california_housing\n\ntry:\n    import shap\nexcept Exception:\n    shap = None\n    pass\n\n\npytestmark = pytest.mark.skipif(shap is None, reason=\"Requires shap package\")\n\n\n# xgboost removed ntree_limit in 2.0, which breaks the SHAP package.\n@pytest.mark.xfail\ndef test_with_shap() -> None:\n    X, y = get_california_housing()\n    dtrain = xgb.DMatrix(X, label=y)\n    model = xgb.train({\"learning_rate\": 0.01}, dtrain, 10)\n    explainer = shap.TreeExplainer(model)\n    shap_values = explainer.shap_values(X)\n    margin = model.predict(dtrain, output_margin=True)\n    assert np.allclose(\n        np.sum(shap_values, axis=len(shap_values.shape) - 1),\n        margin - explainer.expected_value,\n        1e-3,\n        1e-3,\n    )\n"
  },
  {
    "path": "tests/python/test_with_sklearn.py",
    "content": "import json\nimport os\nimport pickle\nimport re\nimport warnings\nfrom pathlib import Path\nfrom typing import Optional\n\nimport numpy as np\nimport pytest\nimport xgboost as xgb\nfrom sklearn.utils.estimator_checks import parametrize_with_checks\nfrom xgboost import testing as tm\nfrom xgboost.testing.data import get_california_housing\nfrom xgboost.testing.ranking import run_ranking_categorical, run_ranking_qid_df\nfrom xgboost.testing.shared import get_feature_weights, validate_data_initialization\nfrom xgboost.testing.updater import get_basescore\nfrom xgboost.testing.with_skl import (\n    run_boost_from_prediction_binary,\n    run_boost_from_prediction_multi_clasas,\n    run_housing_rf_regression,\n    run_intercept,\n    run_recoding,\n)\n\nrng = np.random.RandomState(1994)\npytestmark = [pytest.mark.skipif(**tm.no_sklearn()), tm.timeout(30)]\n\n\ndef test_binary_classification():\n    from sklearn.datasets import load_digits\n    from sklearn.model_selection import KFold\n\n    digits = load_digits(n_class=2)\n    y = digits[\"target\"]\n    X = digits[\"data\"]\n    kf = KFold(n_splits=2, shuffle=True, random_state=rng)\n    for cls in (xgb.XGBClassifier, xgb.XGBRFClassifier):\n        for train_index, test_index in kf.split(X, y):\n            clf = cls(random_state=42, eval_metric=[\"auc\", \"logloss\"])\n            xgb_model = clf.fit(X[train_index], y[train_index])\n            preds = xgb_model.predict(X[test_index])\n            labels = y[test_index]\n            err = sum(\n                1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]\n            ) / float(len(preds))\n            assert err < 0.1\n\n\n@pytest.mark.parametrize(\"objective\", [\"multi:softmax\", \"multi:softprob\"])\ndef test_multiclass_classification(objective):\n    from sklearn.datasets import load_iris\n    from sklearn.model_selection import KFold\n\n    def check_pred(preds, labels, output_margin):\n        if output_margin:\n            err = sum(\n                1 for i in range(len(preds)) if preds[i].argmax() != labels[i]\n            ) / float(len(preds))\n        else:\n            err = sum(1 for i in range(len(preds)) if preds[i] != labels[i]) / float(\n                len(preds)\n            )\n        assert err < 0.4\n\n    X, y = load_iris(return_X_y=True)\n    kf = KFold(n_splits=2, shuffle=True, random_state=rng)\n    for train_index, test_index in kf.split(X, y):\n        xgb_model = xgb.XGBClassifier(objective=objective).fit(\n            X[train_index], y[train_index]\n        )\n        assert xgb_model.get_booster().num_boosted_rounds() == 100\n        preds = xgb_model.predict(X[test_index])\n        # test other params in XGBClassifier().fit\n        preds2 = xgb_model.predict(\n            X[test_index], output_margin=True, iteration_range=(0, 1)\n        )\n        preds3 = xgb_model.predict(\n            X[test_index], output_margin=True, iteration_range=None\n        )\n        preds4 = xgb_model.predict(\n            X[test_index], output_margin=False, iteration_range=(0, 1)\n        )\n        labels = y[test_index]\n\n        check_pred(preds, labels, output_margin=False)\n        check_pred(preds2, labels, output_margin=True)\n        check_pred(preds3, labels, output_margin=True)\n        check_pred(preds4, labels, output_margin=False)\n\n    cls = xgb.XGBClassifier(n_estimators=4).fit(X, y)\n    assert cls.n_classes_ == 3\n    proba = cls.predict_proba(X)\n    assert proba.shape[0] == X.shape[0]\n    assert proba.shape[1] == cls.n_classes_\n\n    # custom objective, the default is multi:softprob so no transformation is required.\n    cls = xgb.XGBClassifier(n_estimators=4, objective=tm.softprob_obj(3)).fit(X, y)\n    proba = cls.predict_proba(X)\n    assert proba.shape[0] == X.shape[0]\n    assert proba.shape[1] == cls.n_classes_\n\n\ndef test_best_iteration():\n    from sklearn.datasets import load_iris\n\n    X, y = load_iris(return_X_y=True)\n\n    def train(booster: str, forest: Optional[int]) -> None:\n        rounds = 4\n        cls = xgb.XGBClassifier(\n            n_estimators=rounds,\n            num_parallel_tree=forest,\n            booster=booster,\n            early_stopping_rounds=3,\n        ).fit(X, y, eval_set=[(X, y)])\n        assert cls.best_iteration == rounds - 1\n\n        # best_iteration is used by default, assert that under gblinear it's\n        # automatically ignored due to being 0.\n        cls.predict(X)\n\n    num_parallel_tree = 4\n    train(\"gbtree\", num_parallel_tree)\n    train(\"dart\", num_parallel_tree)\n    train(\"gblinear\", None)\n\n\ndef test_ranking():\n    # generate random data\n    x_train = np.random.rand(1000, 10)\n    y_train = np.random.randint(5, size=1000)\n    train_group = np.repeat(50, 20)\n\n    x_valid = np.random.rand(200, 10)\n    y_valid = np.random.randint(5, size=200)\n    valid_group = np.repeat(50, 4)\n\n    x_test = np.random.rand(100, 10)\n\n    params = {\n        \"tree_method\": \"exact\",\n        \"objective\": \"rank:pairwise\",\n        \"learning_rate\": 0.1,\n        \"gamma\": 1.0,\n        \"min_child_weight\": 0.1,\n        \"max_depth\": 6,\n        \"n_estimators\": 4,\n    }\n    model = xgb.sklearn.XGBRanker(**params)\n    model.fit(\n        x_train,\n        y_train,\n        group=train_group,\n        eval_set=[(x_valid, y_valid)],\n        eval_group=[valid_group],\n    )\n    assert model.evals_result()\n\n    pred = model.predict(x_test)\n\n    train_data = xgb.DMatrix(x_train, y_train)\n    valid_data = xgb.DMatrix(x_valid, y_valid)\n    test_data = xgb.DMatrix(x_test)\n    train_data.set_group(train_group)\n    assert train_data.get_label().shape[0] == x_train.shape[0]\n    valid_data.set_group(valid_group)\n\n    params_orig = {\n        \"tree_method\": \"exact\",\n        \"objective\": \"rank:pairwise\",\n        \"eta\": 0.1,\n        \"gamma\": 1.0,\n        \"min_child_weight\": 0.1,\n        \"max_depth\": 6,\n    }\n    xgb_model_orig = xgb.train(\n        params_orig, train_data, num_boost_round=4, evals=[(valid_data, \"validation\")]\n    )\n    pred_orig = xgb_model_orig.predict(test_data)\n\n    np.testing.assert_almost_equal(pred, pred_orig)\n\n\n@pytest.mark.skipif(**tm.no_pandas())\ndef test_ranking_categorical() -> None:\n    run_ranking_categorical(device=\"cpu\")\n\n\ndef test_ranking_metric() -> None:\n    from sklearn.metrics import roc_auc_score\n\n    X, y, qid, w = tm.make_ltr(512, 4, 3, 1)\n    # use auc for test as ndcg_score in sklearn works only on label gain instead of exp\n    # gain.\n    # note that the auc in sklearn is different from the one in XGBoost. The one in\n    # sklearn compares the number of mis-classified docs, while the one in xgboost\n    # compares the number of mis-classified pairs.\n    ltr = xgb.XGBRanker(\n        eval_metric=roc_auc_score,\n        n_estimators=10,\n        tree_method=\"hist\",\n        max_depth=2,\n        objective=\"rank:pairwise\",\n    )\n    ltr.fit(\n        X,\n        y,\n        qid=qid,\n        sample_weight=w,\n        eval_set=[(X, y)],\n        eval_qid=[qid],\n        sample_weight_eval_set=[w],\n        verbose=True,\n    )\n    results = ltr.evals_result()\n    assert results[\"validation_0\"][\"roc_auc_score\"][-1] > 0.6\n\n\n@pytest.mark.skipif(**tm.no_pandas())\ndef test_ranking_qid_df():\n    import pandas as pd\n\n    run_ranking_qid_df(pd, \"hist\", \"cpu\")\n\n\ndef test_stacking_regression():\n    from sklearn.datasets import load_diabetes\n    from sklearn.ensemble import RandomForestRegressor, StackingRegressor\n    from sklearn.linear_model import RidgeCV\n    from sklearn.model_selection import train_test_split\n\n    X, y = load_diabetes(return_X_y=True)\n    estimators = [\n        (\"gbm\", xgb.sklearn.XGBRegressor(objective=\"reg:squarederror\")),\n        (\"lr\", RidgeCV()),\n    ]\n    reg = StackingRegressor(\n        estimators=estimators,\n        final_estimator=RandomForestRegressor(n_estimators=10, random_state=42),\n    )\n\n    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)\n    reg.fit(X_train, y_train).score(X_test, y_test)\n\n\ndef test_stacking_classification():\n    from sklearn.datasets import load_iris\n    from sklearn.ensemble import StackingClassifier\n    from sklearn.linear_model import LogisticRegression\n    from sklearn.model_selection import train_test_split\n    from sklearn.pipeline import make_pipeline\n    from sklearn.preprocessing import StandardScaler\n    from sklearn.svm import LinearSVC\n\n    X, y = load_iris(return_X_y=True)\n    estimators = [\n        (\"gbm\", xgb.sklearn.XGBClassifier()),\n        (\"svr\", make_pipeline(StandardScaler(), LinearSVC(random_state=42))),\n    ]\n    clf = StackingClassifier(\n        estimators=estimators, final_estimator=LogisticRegression()\n    )\n\n    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)\n    clf.fit(X_train, y_train).score(X_test, y_test)\n\n\n@pytest.mark.skipif(**tm.no_pandas())\ndef test_feature_importances_weight(tmp_path: Path) -> None:\n    from sklearn.datasets import load_digits\n\n    digits = load_digits(n_class=2)\n    y = digits[\"target\"]\n    X = digits[\"data\"]\n\n    xgb_model = xgb.XGBClassifier(\n        random_state=0,\n        tree_method=\"exact\",\n        learning_rate=0.1,\n        importance_type=\"weight\",\n        base_score=0.5,\n    ).fit(X, y)\n    # fmt: off\n    exp = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.00833333, 0.,\n                    0., 0., 0., 0., 0., 0., 0., 0.025, 0.14166667, 0., 0., 0.,\n                    0., 0., 0., 0.00833333, 0.25833333, 0., 0., 0., 0.,\n                    0.03333334, 0.03333334, 0., 0.32499999, 0., 0., 0., 0.,\n                    0.05, 0.06666667, 0., 0., 0., 0., 0., 0., 0., 0.04166667,\n                    0., 0., 0., 0., 0., 0., 0., 0.00833333, 0., 0., 0., 0.,\n                    0.], dtype=np.float32)\n    # fmt: on\n    np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)\n\n    # numeric columns\n    import pandas as pd\n\n    y = pd.Series(digits[\"target\"])\n    X = pd.DataFrame(digits[\"data\"])\n    xgb_model = xgb.XGBClassifier(\n        random_state=0,\n        tree_method=\"exact\",\n        learning_rate=0.1,\n        base_score=0.5,\n        importance_type=\"weight\",\n    ).fit(X, y)\n    np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)\n\n    xgb_model = xgb.XGBClassifier(\n        random_state=0,\n        tree_method=\"exact\",\n        learning_rate=0.1,\n        importance_type=\"weight\",\n        base_score=0.5,\n    ).fit(X, y)\n    np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)\n\n    with pytest.raises(ValueError):\n        xgb_model.set_params(importance_type=\"foo\")\n        xgb_model.feature_importances_\n\n    X, y = load_digits(n_class=3, return_X_y=True)\n\n    cls = xgb.XGBClassifier(booster=\"gblinear\", n_estimators=4)\n    cls.fit(X, y)\n    assert cls.feature_importances_.shape[0] == X.shape[1]\n    assert cls.feature_importances_.shape[1] == 3\n    path = tmp_path / \"model.json\"\n    cls.save_model(path)\n    with open(path, \"r\") as fd:\n        model = json.load(fd)\n    weights = np.array(\n        model[\"learner\"][\"gradient_booster\"][\"model\"][\"weights\"]\n    ).reshape((cls.n_features_in_ + 1, 3))\n    weights = weights[:-1, ...]\n    np.testing.assert_allclose(\n        weights / weights.sum(), cls.feature_importances_, rtol=1e-6\n    )\n\n    with pytest.raises(ValueError):\n        cls.set_params(importance_type=\"cover\")\n        cls.feature_importances_\n\n\n@pytest.mark.skipif(**tm.no_pandas())\ndef test_feature_importances_gain():\n    from sklearn.datasets import load_digits\n\n    digits = load_digits(n_class=2)\n    y = digits[\"target\"]\n    X = digits[\"data\"]\n    xgb_model = xgb.XGBClassifier(\n        random_state=0,\n        tree_method=\"exact\",\n        learning_rate=0.1,\n        importance_type=\"gain\",\n        base_score=0.5,\n    ).fit(X, y)\n    # fmt: off\n    exp = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n                    0.00326159, 0., 0., 0., 0., 0., 0., 0., 0.,\n                    0.00297238, 0.00988034, 0., 0., 0., 0., 0., 0.,\n                    0.03512521, 0.41123885, 0., 0., 0., 0.,\n                    0.01326332, 0.00160674, 0., 0.4206952, 0., 0., 0.,\n                    0., 0.00616747, 0.01237546, 0., 0., 0., 0., 0.,\n                    0., 0., 0.08240705, 0., 0., 0., 0., 0., 0., 0.,\n                    0.00100649, 0., 0., 0., 0., 0.], dtype=np.float32)\n    # fmt: on\n    np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)\n\n    # numeric columns\n    import pandas as pd\n\n    y = pd.Series(digits[\"target\"])\n    X = pd.DataFrame(digits[\"data\"])\n    xgb_model = xgb.XGBClassifier(\n        random_state=0,\n        tree_method=\"exact\",\n        learning_rate=0.1,\n        importance_type=\"gain\",\n        base_score=0.5,\n    ).fit(X, y)\n    np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)\n\n    xgb_model = xgb.XGBClassifier(\n        random_state=0,\n        tree_method=\"exact\",\n        learning_rate=0.1,\n        importance_type=\"gain\",\n        base_score=0.5,\n    ).fit(X, y)\n    np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)\n\n    # no split can be found\n    cls = xgb.XGBClassifier(min_child_weight=1000, tree_method=\"hist\", n_estimators=1)\n    cls.fit(X, y)\n    assert np.all(cls.feature_importances_ == 0)\n\n\ndef test_select_feature():\n    from sklearn.datasets import load_digits\n    from sklearn.feature_selection import SelectFromModel\n\n    digits = load_digits(n_class=2)\n    y = digits[\"target\"]\n    X = digits[\"data\"]\n    cls = xgb.XGBClassifier()\n    cls.fit(X, y)\n    selector = SelectFromModel(cls, prefit=True, max_features=1)\n    X_selected = selector.transform(X)\n    assert X_selected.shape[1] == 1\n\n\ndef test_num_parallel_tree():\n    from sklearn.datasets import load_diabetes\n\n    reg = xgb.XGBRegressor(n_estimators=4, num_parallel_tree=4, tree_method=\"hist\")\n    X, y = load_diabetes(return_X_y=True)\n    bst = reg.fit(X=X, y=y)\n    dump = bst.get_booster().get_dump(dump_format=\"json\")\n    assert len(dump) == 16\n\n    reg = xgb.XGBRFRegressor(n_estimators=4)\n    bst = reg.fit(X=X, y=y)\n    dump = bst.get_booster().get_dump(dump_format=\"json\")\n    assert len(dump) == 4\n\n    config = json.loads(bst.get_booster().save_config())\n    assert (\n        int(\n            config[\"learner\"][\"gradient_booster\"][\"gbtree_model_param\"][\n                \"num_parallel_tree\"\n            ]\n        )\n        == 4\n    )\n\n\ndef test_regression():\n    from sklearn.metrics import mean_squared_error\n    from sklearn.model_selection import KFold\n\n    X, y = get_california_housing()\n    kf = KFold(n_splits=2, shuffle=True, random_state=rng)\n    for train_index, test_index in kf.split(X, y):\n        xgb_model = xgb.XGBRegressor().fit(X[train_index], y[train_index])\n\n        preds = xgb_model.predict(X[test_index])\n        # test other params in XGBRegressor().fit\n        preds2 = xgb_model.predict(\n            X[test_index], output_margin=True, iteration_range=(0, np.int16(3))\n        )\n        preds3 = xgb_model.predict(\n            X[test_index], output_margin=True, iteration_range=None\n        )\n        preds4 = xgb_model.predict(\n            X[test_index], output_margin=False, iteration_range=(0, 3)\n        )\n        labels = y[test_index]\n\n        assert mean_squared_error(preds, labels) < 25\n        assert mean_squared_error(preds2, labels) < 350\n        assert mean_squared_error(preds3, labels) < 25\n        assert mean_squared_error(preds4, labels) < 350\n\n        with pytest.raises(AttributeError, match=\"feature_names_in_\"):\n            xgb_model.feature_names_in_\n\n\ndef test_rf_regression():\n    run_housing_rf_regression(\"hist\", \"cpu\")\n\n\n@pytest.mark.parametrize(\"tree_method\", [\"exact\", \"hist\", \"approx\"])\ndef test_parameter_tuning(tree_method: str) -> None:\n    from sklearn.model_selection import GridSearchCV\n\n    X, y = get_california_housing()\n    reg = xgb.XGBRegressor(learning_rate=0.1, tree_method=tree_method)\n    grid_cv = GridSearchCV(\n        reg, {\"max_depth\": [2, 4], \"n_estimators\": [50, 200]}, cv=2, verbose=1\n    )\n    grid_cv.fit(X, y)\n    assert grid_cv.best_score_ < 0.7\n    assert grid_cv.best_params_ == {\n        \"n_estimators\": 50,\n        \"max_depth\": 2,\n    }\n\n\ndef test_regression_with_custom_objective():\n    from sklearn.metrics import mean_squared_error\n    from sklearn.model_selection import KFold\n\n    X, y = get_california_housing()\n    kf = KFold(n_splits=2, shuffle=True, random_state=rng)\n    for train_index, test_index in kf.split(X, y):\n        xgb_model = xgb.XGBRegressor(objective=tm.ls_obj).fit(\n            X[train_index], y[train_index]\n        )\n        preds = xgb_model.predict(X[test_index])\n        labels = y[test_index]\n    assert mean_squared_error(preds, labels) < 25\n\n    w = rng.uniform(low=0.0, high=1.0, size=X.shape[0])\n    reg = xgb.XGBRegressor(objective=tm.ls_obj, n_estimators=25)\n    reg.fit(X, y, sample_weight=w)\n    y_pred = reg.predict(X)\n    assert mean_squared_error(y_true=y, y_pred=y_pred, sample_weight=w) < 25\n\n    # Test that the custom objective function is actually used\n    class XGBCustomObjectiveException(Exception):\n        pass\n\n    def dummy_objective(y_true, y_pred):\n        raise XGBCustomObjectiveException()\n\n    xgb_model = xgb.XGBRegressor(objective=dummy_objective)\n    np.testing.assert_raises(XGBCustomObjectiveException, xgb_model.fit, X, y)\n\n\ndef logregobj(y_true, y_pred):\n    y_pred = 1.0 / (1.0 + np.exp(-y_pred))\n    grad = y_pred - y_true\n    hess = y_pred * (1.0 - y_pred)\n    return grad, hess\n\n\ndef test_classification_with_custom_objective():\n    from sklearn.datasets import load_digits\n    from sklearn.model_selection import KFold\n\n    digits = load_digits(n_class=2)\n    y = digits[\"target\"]\n    X = digits[\"data\"]\n    kf = KFold(n_splits=2, shuffle=True, random_state=rng)\n    for train_index, test_index in kf.split(X, y):\n        xgb_model = xgb.XGBClassifier(objective=logregobj)\n        xgb_model.fit(X[train_index], y[train_index])\n        preds = xgb_model.predict(X[test_index])\n        labels = y[test_index]\n        err = sum(\n            1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]\n        ) / float(len(preds))\n        assert err < 0.1\n\n    # Test that the custom objective function is actually used\n    class XGBCustomObjectiveException(Exception):\n        pass\n\n    def dummy_objective(y_true, y_preds):\n        raise XGBCustomObjectiveException()\n\n    xgb_model = xgb.XGBClassifier(objective=dummy_objective)\n    np.testing.assert_raises(XGBCustomObjectiveException, xgb_model.fit, X, y)\n\n    cls = xgb.XGBClassifier(n_estimators=1)\n    cls.fit(X, y)\n\n    is_called = [False]\n\n    def wrapped(y, p):\n        is_called[0] = True\n        return logregobj(y, p)\n\n    cls.set_params(objective=wrapped)\n    cls.predict(X)  # no throw\n    cls.fit(X, y)\n\n    assert is_called[0]\n\n\ndef run_sklearn_api(booster, error, n_est):\n    from sklearn.datasets import load_iris\n    from sklearn.model_selection import train_test_split\n\n    iris = load_iris()\n    tr_d, te_d, tr_l, te_l = train_test_split(\n        iris.data, iris.target, train_size=120, test_size=0.2\n    )\n\n    classifier = xgb.XGBClassifier(booster=booster, n_estimators=n_est)\n    classifier.fit(tr_d, tr_l)\n\n    preds = classifier.predict(te_d)\n    labels = te_l\n    err = sum([1 for p, l in zip(preds, labels) if p != l]) * 1.0 / len(te_l)\n    assert err < error\n\n\ndef test_sklearn_api():\n    run_sklearn_api(\"gbtree\", 0.2, 10)\n    run_sklearn_api(\"gblinear\", 0.5, 100)\n\n\n@pytest.mark.skipif(**tm.no_matplotlib())\n@pytest.mark.skipif(**tm.no_graphviz())\ndef test_sklearn_plotting():\n    from sklearn.datasets import load_iris\n\n    iris = load_iris()\n\n    classifier = xgb.XGBClassifier()\n    classifier.fit(iris.data, iris.target)\n\n    import matplotlib\n\n    matplotlib.use(\"Agg\")\n\n    from graphviz import Source\n    from matplotlib.axes import Axes\n\n    ax = xgb.plot_importance(classifier)\n    assert isinstance(ax, Axes)\n    assert ax.get_title() == \"Feature importance\"\n    assert ax.get_xlabel() == \"Importance score\"\n    assert ax.get_ylabel() == \"Features\"\n    assert len(ax.patches) == 4\n\n    g = xgb.to_graphviz(classifier, num_trees=0)\n    assert isinstance(g, Source)\n\n    ax = xgb.plot_tree(classifier, num_trees=0)\n    assert isinstance(ax, Axes)\n\n\n@pytest.mark.skipif(**tm.no_pandas())\ndef test_sklearn_nfolds_cv():\n    from sklearn.datasets import load_digits\n    from sklearn.model_selection import StratifiedKFold\n\n    digits = load_digits(n_class=3)\n    X = digits[\"data\"]\n    y = digits[\"target\"]\n    dm = xgb.DMatrix(X, label=y)\n\n    params = {\n        \"max_depth\": 2,\n        \"eta\": 1,\n        \"verbosity\": 0,\n        \"objective\": \"multi:softprob\",\n        \"num_class\": 3,\n    }\n\n    seed = 2016\n    nfolds = 5\n    skf = StratifiedKFold(n_splits=nfolds, shuffle=True, random_state=seed)\n\n    cv1 = xgb.cv(\n        params, dm, num_boost_round=10, nfold=nfolds, seed=seed, as_pandas=True\n    )\n    cv2 = xgb.cv(\n        params,\n        dm,\n        num_boost_round=10,\n        nfold=nfolds,\n        folds=skf,\n        seed=seed,\n        as_pandas=True,\n    )\n    cv3 = xgb.cv(\n        params,\n        dm,\n        num_boost_round=10,\n        nfold=nfolds,\n        stratified=True,\n        seed=seed,\n        as_pandas=True,\n    )\n    assert cv1.shape[0] == cv2.shape[0] and cv2.shape[0] == cv3.shape[0]\n    assert cv2.iloc[-1, 0] == cv3.iloc[-1, 0]\n\n\n@pytest.mark.skipif(**tm.no_pandas())\ndef test_split_value_histograms():\n    from sklearn.datasets import load_digits\n\n    digits_2class = load_digits(n_class=2)\n\n    X = digits_2class[\"data\"]\n    y = digits_2class[\"target\"]\n\n    dm = xgb.DMatrix(X, label=y)\n    params = {\n        \"max_depth\": 6,\n        \"eta\": 0.01,\n        \"objective\": \"binary:logistic\",\n        \"base_score\": 0.5,\n    }\n\n    gbdt = xgb.train(params, dm, num_boost_round=10)\n    assert gbdt.get_split_value_histogram(\"not_there\", as_pandas=True).shape[0] == 0\n    assert gbdt.get_split_value_histogram(\"not_there\", as_pandas=False).shape[0] == 0\n    assert gbdt.get_split_value_histogram(\"f28\", bins=0).shape[0] == 1\n    assert gbdt.get_split_value_histogram(\"f28\", bins=1).shape[0] == 1\n    assert gbdt.get_split_value_histogram(\"f28\", bins=2).shape[0] == 2\n    assert gbdt.get_split_value_histogram(\"f28\", bins=5).shape[0] == 2\n    assert gbdt.get_split_value_histogram(\"f28\", bins=None).shape[0] == 2\n\n\ndef test_sklearn_random_state():\n    clf = xgb.XGBClassifier(random_state=402)\n    assert clf.get_xgb_params()[\"random_state\"] == 402\n\n    clf = xgb.XGBClassifier(random_state=401)\n    assert clf.get_xgb_params()[\"random_state\"] == 401\n\n    random_state = np.random.RandomState(seed=403)\n    clf = xgb.XGBClassifier(random_state=random_state)\n    assert isinstance(clf.get_xgb_params()[\"random_state\"], int)\n\n    random_state = np.random.default_rng(seed=404)\n    clf = xgb.XGBClassifier(random_state=random_state)\n    assert isinstance(clf.get_xgb_params()[\"random_state\"], int)\n\n\ndef test_sklearn_n_jobs():\n    clf = xgb.XGBClassifier(n_jobs=1)\n    assert clf.get_xgb_params()[\"n_jobs\"] == 1\n\n    clf = xgb.XGBClassifier(n_jobs=2)\n    assert clf.get_xgb_params()[\"n_jobs\"] == 2\n\n\ndef test_parameters_access(tmp_path: Path) -> None:\n    from sklearn import datasets\n\n    params = {\"updater\": \"grow_gpu_hist\", \"subsample\": 0.5, \"n_jobs\": -1}\n    clf = xgb.XGBClassifier(n_estimators=1000, **params)\n    assert clf.get_params()[\"updater\"] == \"grow_gpu_hist\"\n    assert clf.get_params()[\"subsample\"] == 0.5\n    assert clf.get_params()[\"n_estimators\"] == 1000\n\n    clf = xgb.XGBClassifier(n_estimators=1, nthread=4)\n    X, y = datasets.load_iris(return_X_y=True)\n    clf.fit(X, y)\n\n    config = json.loads(clf.get_booster().save_config())\n    assert int(config[\"learner\"][\"generic_param\"][\"nthread\"]) == 4\n\n    clf.set_params(nthread=16)\n    config = json.loads(clf.get_booster().save_config())\n    assert int(config[\"learner\"][\"generic_param\"][\"nthread\"]) == 16\n\n    clf.predict(X)\n    config = json.loads(clf.get_booster().save_config())\n    assert int(config[\"learner\"][\"generic_param\"][\"nthread\"]) == 16\n\n    clf = xgb.XGBClassifier(n_estimators=2)\n    assert clf.tree_method is None\n    assert clf.get_params()[\"tree_method\"] is None\n    clf.fit(X, y)\n    assert clf.get_params()[\"tree_method\"] is None\n\n    save_load_counter = [0]\n\n    def save_load(clf: xgb.XGBClassifier) -> xgb.XGBClassifier:\n        path = tmp_path / f\"model_{save_load_counter[0]}.json\"\n        save_load_counter[0] += 1\n        clf.save_model(path)\n        clf = xgb.XGBClassifier()\n        clf.load_model(path)\n        return clf\n\n    def get_tm(clf: xgb.XGBClassifier) -> str:\n        tm_val = json.loads(clf.get_booster().save_config())[\"learner\"][\n            \"gradient_booster\"\n        ][\"gbtree_train_param\"][\"tree_method\"]\n        return tm_val\n\n    assert get_tm(clf) == \"auto\"  # Kept as auto, immutable since 2.0\n\n    clf = pickle.loads(pickle.dumps(clf))\n\n    assert clf.tree_method is None\n    assert clf.n_estimators == 2\n    assert clf.get_params()[\"tree_method\"] is None\n    assert clf.get_params()[\"n_estimators\"] == 2\n    assert get_tm(clf) == \"auto\"  # preserved for pickle\n\n    clf = save_load(clf)\n\n    assert clf.tree_method is None\n    assert clf.n_estimators is None\n    assert clf.get_params()[\"tree_method\"] is None\n    assert clf.get_params()[\"n_estimators\"] is None\n    assert get_tm(clf) == \"auto\"  # discarded for save/load_model\n\n    clf.set_params(tree_method=\"hist\")\n    assert clf.get_params()[\"tree_method\"] == \"hist\"\n    clf = pickle.loads(pickle.dumps(clf))\n    assert clf.get_params()[\"tree_method\"] == \"hist\"\n    clf = save_load(clf)\n    assert clf.get_params()[\"tree_method\"] is None\n\n\ndef test_get_params_works_as_expected():\n    # XGBModel -> BaseEstimator\n    params = xgb.XGBModel(max_depth=2).get_params()\n    assert params[\"max_depth\"] == 2\n    # 'objective' defaults to None in the signature of XGBModel\n    assert params[\"objective\"] is None\n\n    # XGBRegressor -> XGBModel -> BaseEstimator\n    params = xgb.XGBRegressor(max_depth=3).get_params()\n    assert params[\"max_depth\"] == 3\n    # 'objective' defaults to 'reg:squarederror' in the signature of XGBRegressor\n    assert params[\"objective\"] == \"reg:squarederror\"\n    # 'colsample_bynode' defaults to 'None' for XGBModel (which XGBRegressor inherits from), so it\n    # should be in get_params() output\n    assert params[\"colsample_bynode\"] is None\n\n    # XGBRFRegressor -> XGBRegressor -> XGBModel -> BaseEstimator\n    params = xgb.XGBRFRegressor(max_depth=4, objective=\"reg:tweedie\").get_params()\n    assert params[\"max_depth\"] == 4\n    # 'objective' is a keyword argument for XGBRegressor, so it should be in get_params() output\n    # ... but values passed through kwargs should override the default from the signature of XGBRegressor\n    assert params[\"objective\"] == \"reg:tweedie\"\n    # 'colsample_bynode' defaults to 0.8 for XGBRFRegressor...that should be preferred to the None from XGBRegressor\n    assert params[\"colsample_bynode\"] == 0.8\n\n\ndef test_kwargs_error():\n    params = {\"updater\": \"grow_gpu_hist\", \"subsample\": 0.5, \"n_jobs\": -1}\n    with pytest.raises(TypeError):\n        clf = xgb.XGBClassifier(n_jobs=1000, **params)\n        assert isinstance(clf, xgb.XGBClassifier)\n\n\ndef test_kwargs_grid_search():\n    from sklearn import datasets\n    from sklearn.model_selection import GridSearchCV\n\n    params = {\"tree_method\": \"hist\"}\n    clf = xgb.XGBClassifier(n_estimators=3, **params)\n    assert clf.get_params()[\"tree_method\"] == \"hist\"\n    # 'eta' is not a default argument of XGBClassifier\n    # Check we can still do grid search over this parameter\n    search_params = {\"eta\": [0, 0.2, 0.4]}\n    grid_cv = GridSearchCV(clf, search_params, cv=5)\n    iris = datasets.load_iris()\n    grid_cv.fit(iris.data, iris.target)\n\n    # Expect unique results for each parameter value\n    # This confirms sklearn is able to successfully update the parameter\n    means = grid_cv.cv_results_[\"mean_test_score\"]\n    assert len(means) == len(set(means))\n\n\ndef test_sklearn_clone():\n    from sklearn.base import clone\n\n    clf = xgb.XGBClassifier(n_jobs=2)\n    clf.n_jobs = -1\n    clone(clf)\n\n\ndef test_sklearn_get_default_params():\n    from sklearn.datasets import load_digits\n\n    digits_2class = load_digits(n_class=2)\n    X = digits_2class[\"data\"]\n    y = digits_2class[\"target\"]\n    cls = xgb.XGBClassifier()\n    assert cls.get_params()[\"base_score\"] is None\n    cls.fit(X[:4, ...], y[:4, ...])\n    base_score = get_basescore(cls)\n    np.testing.assert_equal(base_score, [0.5])\n\n\ndef run_validation_weights(model):\n    from sklearn.datasets import make_hastie_10_2\n\n    # prepare training and test data\n    X, y = make_hastie_10_2(n_samples=2000, random_state=42)\n    labels, y = np.unique(y, return_inverse=True)\n    X_train, X_test = X[:1600], X[1600:]\n    y_train, y_test = y[:1600], y[1600:]\n\n    # instantiate model\n    param_dist = {\n        \"objective\": \"binary:logistic\",\n        \"n_estimators\": 2,\n        \"random_state\": 123,\n    }\n    clf = model(**param_dist)\n\n    # train it using instance weights only in the training set\n    weights_train = np.random.choice([1, 2], len(X_train))\n    clf.set_params(eval_metric=\"logloss\")\n    clf.fit(\n        X_train,\n        y_train,\n        sample_weight=weights_train,\n        eval_set=[(X_test, y_test)],\n        verbose=False,\n    )\n    # evaluate logloss metric on test set *without* using weights\n    evals_result_without_weights = clf.evals_result()\n    logloss_without_weights = evals_result_without_weights[\"validation_0\"][\"logloss\"]\n\n    # now use weights for the test set\n    np.random.seed(0)\n    weights_test = np.random.choice([1, 2], len(X_test))\n    clf.set_params(eval_metric=\"logloss\")\n    clf.fit(\n        X_train,\n        y_train,\n        sample_weight=weights_train,\n        eval_set=[(X_test, y_test)],\n        sample_weight_eval_set=[weights_test],\n        verbose=False,\n    )\n    evals_result_with_weights = clf.evals_result()\n    logloss_with_weights = evals_result_with_weights[\"validation_0\"][\"logloss\"]\n\n    # check that the logloss in the test set is actually different when using\n    # weights than when not using them\n    assert all((logloss_with_weights[i] != logloss_without_weights[i] for i in [0, 1]))\n\n    with pytest.raises(ValueError):\n        # length of eval set and sample weight doesn't match.\n        clf.fit(\n            X_train,\n            y_train,\n            sample_weight=weights_train,\n            eval_set=[(X_train, y_train), (X_test, y_test)],\n            sample_weight_eval_set=[weights_train],\n        )\n\n    with pytest.raises(ValueError):\n        cls = xgb.XGBClassifier()\n        cls.fit(\n            X_train,\n            y_train,\n            sample_weight=weights_train,\n            eval_set=[(X_train, y_train), (X_test, y_test)],\n            sample_weight_eval_set=[weights_train],\n        )\n\n\ndef test_validation_weights():\n    run_validation_weights(xgb.XGBModel)\n    run_validation_weights(xgb.XGBClassifier)\n\n\ndef test_RFECV():\n    from sklearn.datasets import load_breast_cancer, load_diabetes, load_iris\n    from sklearn.feature_selection import RFECV\n\n    # Regression\n    X, y = load_diabetes(return_X_y=True)\n    bst = xgb.XGBRegressor(\n        booster=\"gblinear\",\n        learning_rate=0.1,\n        n_estimators=10,\n        objective=\"reg:squarederror\",\n        random_state=0,\n        verbosity=0,\n    )\n    rfecv = RFECV(estimator=bst, step=1, cv=3, scoring=\"neg_mean_squared_error\")\n    rfecv.fit(X, y)\n\n    # Binary classification\n    X, y = load_breast_cancer(return_X_y=True)\n    bst = xgb.XGBClassifier(\n        booster=\"gblinear\",\n        learning_rate=0.1,\n        n_estimators=10,\n        objective=\"binary:logistic\",\n        random_state=0,\n        verbosity=0,\n    )\n    rfecv = RFECV(estimator=bst, step=0.5, cv=3, scoring=\"roc_auc\")\n    rfecv.fit(X, y)\n\n    # Multi-class classification\n    X, y = load_iris(return_X_y=True)\n    bst = xgb.XGBClassifier(\n        base_score=0.4,\n        booster=\"gblinear\",\n        learning_rate=0.1,\n        n_estimators=10,\n        objective=\"multi:softprob\",\n        random_state=0,\n        reg_alpha=0.001,\n        reg_lambda=0.01,\n        scale_pos_weight=0.5,\n        verbosity=0,\n    )\n    rfecv = RFECV(estimator=bst, step=0.5, cv=3, scoring=\"neg_log_loss\")\n    rfecv.fit(X, y)\n\n    X[0:4, :] = np.nan  # verify scikit_learn doesn't throw with nan\n    reg = xgb.XGBRegressor()\n    rfecv = RFECV(estimator=reg)\n    rfecv.fit(X, y)\n\n    cls = xgb.XGBClassifier()\n    rfecv = RFECV(estimator=cls, step=0.5, cv=3, scoring=\"neg_mean_squared_error\")\n    rfecv.fit(X, y)\n\n\ndef test_XGBClassifier_resume(tmp_path: Path) -> None:\n    from sklearn.datasets import load_breast_cancer\n    from sklearn.metrics import log_loss\n\n    model1_path = tmp_path / \"test_XGBClassifier.model\"\n    model1_booster_path = tmp_path / \"test_XGBClassifier.booster\"\n\n    X, Y = load_breast_cancer(return_X_y=True)\n\n    model1 = xgb.XGBClassifier(learning_rate=0.3, random_state=0, n_estimators=8)\n    model1.fit(X, Y)\n\n    pred1 = model1.predict(X)\n    log_loss1 = log_loss(pred1, Y)\n\n    # file name of stored xgb model\n    model1.save_model(model1_path)\n    model2 = xgb.XGBClassifier(learning_rate=0.3, random_state=0, n_estimators=8)\n    model2.fit(X, Y, xgb_model=model1_path)\n\n    pred2 = model2.predict(X)\n    log_loss2 = log_loss(pred2, Y)\n\n    assert np.any(pred1 != pred2)\n    assert log_loss1 > log_loss2\n\n    # file name of 'Booster' instance Xgb model\n    model1.get_booster().save_model(model1_booster_path)\n    model2 = xgb.XGBClassifier(learning_rate=0.3, random_state=0, n_estimators=8)\n    model2.fit(X, Y, xgb_model=model1_booster_path)\n\n    pred2 = model2.predict(X)\n    log_loss2 = log_loss(pred2, Y)\n\n    assert np.any(pred1 != pred2)\n    assert log_loss1 > log_loss2\n\n\ndef test_constraint_parameters():\n    reg = xgb.XGBRegressor(interaction_constraints=\"[[0, 1], [2, 3, 4]]\")\n    X = np.random.randn(10, 10)\n    y = np.random.randn(10)\n    reg.fit(X, y)\n\n    config = json.loads(reg.get_booster().save_config())\n    assert (\n        config[\"learner\"][\"gradient_booster\"][\"tree_train_param\"][\n            \"interaction_constraints\"\n        ]\n        == \"[[0, 1], [2, 3, 4]]\"\n    )\n\n\n@pytest.mark.filterwarnings(\"error\")\ndef test_parameter_validation():\n    reg = xgb.XGBRegressor(foo=\"bar\", verbosity=1)\n    X = np.random.randn(10, 10)\n    y = np.random.randn(10)\n    with pytest.warns(Warning, match=\"foo\"):\n        reg.fit(X, y)\n\n    reg = xgb.XGBRegressor(\n        n_estimators=2, missing=3, importance_type=\"gain\", verbosity=1\n    )\n    X = np.random.randn(10, 10)\n    y = np.random.randn(10)\n\n    with warnings.catch_warnings():\n        reg.fit(X, y)\n\n\ndef test_deprecate_position_arg():\n    from sklearn.datasets import load_digits\n\n    X, y = load_digits(return_X_y=True, n_class=2)\n    w = np.random.default_rng(0).uniform(size=y.size)\n    with pytest.warns(FutureWarning):\n        xgb.XGBRegressor(3, learning_rate=0.1)\n    model = xgb.XGBRegressor(n_estimators=1)\n    with pytest.warns(FutureWarning):\n        model.fit(X, y, w)\n\n    with pytest.warns(FutureWarning):\n        xgb.XGBClassifier(1)\n    model = xgb.XGBClassifier(n_estimators=1)\n    with pytest.warns(FutureWarning):\n        model.fit(X, y, w)\n\n    with pytest.warns(FutureWarning):\n        xgb.XGBRanker(\"rank:ndcg\", learning_rate=0.1)\n    model = xgb.XGBRanker(n_estimators=1)\n    group = np.repeat(1, X.shape[0])\n    with pytest.warns(FutureWarning):\n        model.fit(X, y, group)\n\n    with pytest.warns(FutureWarning):\n        xgb.XGBRFRegressor(1, learning_rate=0.1)\n    model = xgb.XGBRFRegressor(n_estimators=1)\n    with pytest.warns(FutureWarning):\n        model.fit(X, y, w)\n\n    model = xgb.XGBRFClassifier(n_estimators=1)\n    with pytest.warns(FutureWarning):\n        model.fit(X, y, w)\n\n\n@pytest.mark.skipif(**tm.no_pandas())\ndef test_pandas_input():\n    import pandas as pd\n    from sklearn.calibration import CalibratedClassifierCV\n    from sklearn.frozen import FrozenEstimator\n\n    rng = np.random.RandomState(1994)\n\n    kRows = 100\n    kCols = 6\n\n    X = rng.randint(low=0, high=2, size=kRows * kCols)\n    X = X.reshape(kRows, kCols)\n\n    df = pd.DataFrame(X)\n    feature_names = []\n    for i in range(1, kCols):\n        feature_names += [\"k\" + str(i)]\n\n    df.columns = [\"status\"] + feature_names\n\n    target = df[\"status\"]\n    train = df.drop(columns=[\"status\"])\n    model = xgb.XGBClassifier()\n    model.fit(train, target)\n    np.testing.assert_equal(model.feature_names_in_, np.array(feature_names))\n\n    columns = list(train.columns)\n    rng.shuffle(columns)\n    df_incorrect = df[columns]\n\n    with pytest.raises(ValueError, match=\"feature_names mismatch\"):\n        model.predict(df_incorrect)\n\n    clf_isotonic = CalibratedClassifierCV(FrozenEstimator(model), method=\"isotonic\")\n    clf_isotonic.fit(train, target)\n    assert isinstance(\n        clf_isotonic.calibrated_classifiers_[0].estimator.estimator, xgb.XGBClassifier\n    )\n    np.testing.assert_allclose(np.array(clf_isotonic.classes_), np.array([0, 1]))\n\n    train_ser = train[\"k1\"]\n    assert isinstance(train_ser, pd.Series)\n    model = xgb.XGBClassifier(n_estimators=8)\n    model.fit(train_ser, target, eval_set=[(train_ser, target)])\n    assert tm.non_increasing(model.evals_result()[\"validation_0\"][\"logloss\"])\n\n\n@pytest.mark.parametrize(\"tree_method\", [\"approx\", \"hist\"])\ndef test_feature_weights(tree_method):\n    kRows = 512\n    kCols = 64\n    X = rng.randn(kRows, kCols)\n    y = rng.randn(kRows)\n\n    fw = np.ones(shape=(kCols,))\n    for i in range(kCols):\n        fw[i] *= float(i)\n\n    parser_path = os.path.join(tm.demo_dir(__file__), \"guide-python\", \"model_parser.py\")\n    poly_increasing = get_feature_weights(\n        X=X,\n        y=y,\n        fw=fw,\n        parser_path=parser_path,\n        tree_method=tree_method,\n        model=xgb.XGBRegressor,\n    )\n\n    fw = np.ones(shape=(kCols,))\n    for i in range(kCols):\n        fw[i] *= float(kCols - i)\n    poly_decreasing = get_feature_weights(\n        X=X,\n        y=y,\n        fw=fw,\n        parser_path=parser_path,\n        tree_method=tree_method,\n        model=xgb.XGBRegressor,\n    )\n\n    # Approxmated test, this is dependent on the implementation of random\n    # number generator in std library.\n    assert poly_increasing[0] > 0.08\n    assert poly_decreasing[0] < -0.08\n\n    reg = xgb.XGBRegressor(feature_weights=np.ones((kCols,)))\n    with pytest.raises(ValueError, match=\"Use the one in\"):\n        reg.fit(X, y, feature_weights=np.ones((kCols,)))\n\n\n@pytest.mark.parametrize(\"tree_method\", [\"hist\", \"approx\", \"exact\"])\ndef test_boost_from_prediction(tree_method: str) -> None:\n    import pandas as pd\n    from sklearn.datasets import load_breast_cancer, load_iris, make_regression\n\n    X, y = load_breast_cancer(return_X_y=True)\n\n    run_boost_from_prediction_binary(tree_method, \"cpu\", X, y, None)\n    run_boost_from_prediction_binary(tree_method, \"cpu\", X, y, pd.DataFrame)\n\n    X, y = load_iris(return_X_y=True)\n\n    run_boost_from_prediction_multi_clasas(\n        xgb.XGBClassifier, tree_method, \"cpu\", X, y, None\n    )\n    run_boost_from_prediction_multi_clasas(\n        xgb.XGBClassifier, tree_method, \"cpu\", X, y, pd.DataFrame\n    )\n\n    X, y = make_regression(n_samples=100, n_targets=4)\n    run_boost_from_prediction_multi_clasas(\n        xgb.XGBRegressor, tree_method, \"cpu\", X, y, None\n    )\n\n\ndef test_estimator_type(tmp_path: Path) -> None:\n    assert xgb.XGBClassifier()._get_type() == \"classifier\"\n    assert xgb.XGBRFClassifier()._get_type() == \"classifier\"\n    assert xgb.XGBRegressor()._get_type() == \"regressor\"\n    assert xgb.XGBRFRegressor()._get_type() == \"regressor\"\n    assert xgb.XGBRanker()._get_type() == \"ranker\"\n\n    from sklearn.datasets import load_digits\n\n    X, y = load_digits(n_class=2, return_X_y=True)\n    cls = xgb.XGBClassifier(n_estimators=2).fit(X, y)\n    path = tmp_path / \"cls.json\"\n    cls.save_model(path)\n\n    reg = xgb.XGBRegressor()\n    with pytest.raises(TypeError):\n        reg.load_model(path)\n\n    cls = xgb.XGBClassifier()\n    cls.load_model(path)  # no error\n\n\ndef test_multilabel_classification() -> None:\n    from sklearn.datasets import make_multilabel_classification\n\n    X, y = make_multilabel_classification(\n        n_samples=32, n_classes=5, n_labels=3, random_state=0\n    )\n    clf = xgb.XGBClassifier(tree_method=\"hist\")\n    clf.fit(X, y)\n    booster = clf.get_booster()\n    learner = json.loads(booster.save_config())[\"learner\"]\n    assert int(learner[\"learner_model_param\"][\"num_target\"]) == 5\n\n    np.testing.assert_allclose(clf.predict(X), y)\n    predt = (clf.predict_proba(X) > 0.5).astype(np.int64)\n    np.testing.assert_allclose(clf.predict(X), predt)\n    assert predt.dtype == np.int64\n\n    y = y.tolist()\n    clf.fit(X, y)\n    np.testing.assert_allclose(clf.predict(X), predt)\n\n\ndef test_data_initialization() -> None:\n    from sklearn.datasets import load_digits\n\n    X, y = load_digits(return_X_y=True)\n    validate_data_initialization(xgb.QuantileDMatrix, xgb.XGBClassifier, X, y)\n\n\n@parametrize_with_checks([xgb.XGBRegressor()])\ndef test_estimator_reg(estimator, check):\n    if os.environ[\"PYTEST_CURRENT_TEST\"].find(\"check_supervised_y_no_nan\") != -1:\n        # The test uses float64 and requires the error message to contain:\n        #\n        #   \"value too large for dtype(float64)\",\n        #\n        # while XGBoost stores values as float32.  But XGBoost does verify the label\n        # internally, so we replace this test with custom check.\n        rng = np.random.RandomState(888)\n        X = rng.randn(10, 5)\n        y = np.full(10, np.inf)\n        with pytest.raises(\n            ValueError, match=\"contains NaN, infinity or a value too large\"\n        ):\n            estimator.fit(X, y)\n        return\n    elif os.environ[\"PYTEST_CURRENT_TEST\"].find(\"check_regressor_multioutput\") != -1:\n        # sklearn requires float64\n        with pytest.raises(AssertionError, match=\"Got float32\"):\n            check(estimator)\n    else:\n        check(estimator)\n\n\ndef test_categorical(tmp_path: Path) -> None:\n    X, y = tm.make_categorical(n_samples=32, n_features=2, n_categories=3, onehot=False)\n    ft = [\"c\"] * X.shape[1]\n    reg = xgb.XGBRegressor(\n        feature_types=ft,\n        max_cat_to_onehot=1,\n    )\n    reg.fit(X.values, y, eval_set=[(X.values, y)])\n    from_cat = reg.evals_result()[\"validation_0\"][\"rmse\"]\n    predt_cat = reg.predict(X.values)\n    assert reg.get_booster().feature_types == ft\n    path = tmp_path / \"model.json\"\n    reg.save_model(path)\n    reg = xgb.XGBRegressor()\n    reg.load_model(path)\n    assert reg.feature_types == ft\n    assert reg.enable_categorical is True\n\n    onehot, y = tm.make_categorical(\n        n_samples=32, n_features=2, n_categories=3, onehot=True\n    )\n    reg = xgb.XGBRegressor()\n    reg.fit(onehot, y, eval_set=[(onehot, y)])\n    from_enc = reg.evals_result()[\"validation_0\"][\"rmse\"]\n    predt_enc = reg.predict(onehot)\n\n    np.testing.assert_allclose(from_cat, from_enc)\n    np.testing.assert_allclose(predt_cat, predt_enc)\n\n\ndef test_evaluation_metric():\n    from sklearn.datasets import load_diabetes, load_digits\n    from sklearn.metrics import mean_absolute_error\n\n    X, y = load_diabetes(return_X_y=True)\n    n_estimators = 16\n\n    with tm.captured_output() as (out, err):\n        reg = xgb.XGBRegressor(\n            tree_method=\"hist\",\n            eval_metric=mean_absolute_error,\n            n_estimators=n_estimators,\n        )\n        reg.fit(X, y, eval_set=[(X, y)])\n        lines = out.getvalue().strip().split(\"\\n\")\n\n    assert len(lines) == n_estimators\n    for line in lines:\n        assert line.find(\"mean_absolute_error\") != -1\n\n    def merror(y_true: np.ndarray, predt: np.ndarray):\n        n_samples = y_true.shape[0]\n        assert n_samples == predt.size\n        errors = np.zeros(y_true.shape[0])\n        errors[y != predt] = 1.0\n        return np.sum(errors) / n_samples\n\n    X, y = load_digits(n_class=10, return_X_y=True)\n\n    clf = xgb.XGBClassifier(\n        tree_method=\"hist\",\n        eval_metric=merror,\n        n_estimators=16,\n        objective=\"multi:softmax\",\n    )\n    clf.fit(X, y, eval_set=[(X, y)])\n    custom = clf.evals_result()\n\n    clf = xgb.XGBClassifier(\n        tree_method=\"hist\",\n        eval_metric=\"merror\",\n        n_estimators=16,\n        objective=\"multi:softmax\",\n    )\n    clf.fit(X, y, eval_set=[(X, y)])\n    internal = clf.evals_result()\n\n    np.testing.assert_allclose(\n        custom[\"validation_0\"][\"merror\"], internal[\"validation_0\"][\"merror\"], atol=1e-6\n    )\n\n    clf = xgb.XGBRFClassifier(\n        tree_method=\"hist\",\n        n_estimators=16,\n        objective=tm.softprob_obj(10),\n        eval_metric=merror,\n    )\n    with pytest.raises(AssertionError):\n        # shape check inside the `merror` function\n        clf.fit(X, y, eval_set=[(X, y)])\n\n\ndef test_mixed_metrics() -> None:\n    from sklearn.datasets import make_classification\n    from sklearn.metrics import hamming_loss, hinge_loss, log_loss\n\n    X, y = make_classification(random_state=2025)\n\n    clf = xgb.XGBClassifier(eval_metric=[\"logloss\", hinge_loss], n_estimators=2)\n    clf.fit(X, y, eval_set=[(X, y)])\n    results = clf.evals_result()[\"validation_0\"]\n    assert \"logloss\" in results\n    assert \"hinge_loss\" in results\n\n    clf = xgb.XGBClassifier(eval_metric=[hamming_loss, log_loss], n_estimators=2)\n    with pytest.raises(\n        NotImplementedError, match=\"multiple custom metrics is not yet supported.\"\n    ):\n        clf.fit(X, y, eval_set=[(X, y)])\n\n    clf = xgb.XGBClassifier(eval_metric=[123, log_loss], n_estimators=2)\n    with pytest.raises(TypeError, match=\"Invalid type for the `eval_metric`\"):\n        clf.fit(X, y, eval_set=[(X, y)])\n\n\ndef test_weighted_evaluation_metric():\n    from sklearn.datasets import make_hastie_10_2\n    from sklearn.metrics import log_loss\n\n    X, y = make_hastie_10_2(n_samples=2000, random_state=42)\n    labels, y = np.unique(y, return_inverse=True)\n    X_train, X_test = X[:1600], X[1600:]\n    y_train, y_test = y[:1600], y[1600:]\n    weights_eval_set = np.random.choice([1, 2], len(X_test))\n\n    np.random.seed(0)\n    weights_train = np.random.choice([1, 2], len(X_train))\n\n    clf = xgb.XGBClassifier(\n        tree_method=\"hist\",\n        eval_metric=log_loss,\n        n_estimators=16,\n        objective=\"binary:logistic\",\n    )\n    clf.fit(\n        X_train,\n        y_train,\n        sample_weight=weights_train,\n        eval_set=[(X_test, y_test)],\n        sample_weight_eval_set=[weights_eval_set],\n    )\n    custom = clf.evals_result()\n\n    clf = xgb.XGBClassifier(\n        tree_method=\"hist\",\n        eval_metric=\"logloss\",\n        n_estimators=16,\n        objective=\"binary:logistic\",\n    )\n    clf.fit(\n        X_train,\n        y_train,\n        sample_weight=weights_train,\n        eval_set=[(X_test, y_test)],\n        sample_weight_eval_set=[weights_eval_set],\n    )\n    internal = clf.evals_result()\n\n    np.testing.assert_allclose(\n        custom[\"validation_0\"][\"log_loss\"],\n        internal[\"validation_0\"][\"logloss\"],\n        atol=1e-6,\n    )\n\n\ndef test_intercept() -> None:\n    run_intercept(\"cpu\")\n\n\ndef test_fit_none() -> None:\n    with pytest.raises(TypeError, match=\"NoneType\"):\n        xgb.XGBClassifier().fit(None, [0, 1])\n\n    X = rng.normal(size=4).reshape(2, 2)\n\n    with pytest.raises(ValueError, match=\"Invalid classes\"):\n        xgb.XGBClassifier().fit(X, None)\n\n    with pytest.raises(ValueError, match=\"labels\"):\n        xgb.XGBRegressor().fit(X, None)\n\n\ndef test_tags() -> None:\n    for reg in [xgb.XGBRegressor(), xgb.XGBRFRegressor()]:\n        tags = reg._more_tags()\n        assert \"non_deterministic\" not in tags\n        assert tags[\"multioutput\"] is True\n        assert tags[\"multioutput_only\"] is False\n\n    for clf in [xgb.XGBClassifier(), xgb.XGBRFClassifier()]:\n        tags = clf._more_tags()\n        assert \"multioutput\" not in tags\n        assert tags[\"multilabel\"] is True\n\n    tags = xgb.XGBRanker()._more_tags()\n    assert \"multioutput\" not in tags\n\n\n# the try-excepts in this test should be removed once xgboost's\n# minimum supported scikit-learn version is at least 1.6\ndef test_sklearn_tags():\n    def _assert_has_xgbmodel_tags(tags):\n        # values set by XGBModel.__sklearn_tags__()\n        assert tags.non_deterministic is False\n        assert tags.no_validation is True\n        assert tags.input_tags.allow_nan is True\n\n    for reg in [xgb.XGBRegressor(), xgb.XGBRFRegressor()]:\n        try:\n            # if no AttributeError was thrown, we must be using scikit-learn>=1.6,\n            # and so the actual effects of __sklearn_tags__() should be tested\n            tags = reg.__sklearn_tags__()\n            _assert_has_xgbmodel_tags(tags)\n            # regressor-specific values\n            assert tags.estimator_type == \"regressor\"\n            assert tags.regressor_tags is not None\n            assert tags.classifier_tags is None\n            assert tags.target_tags.multi_output is True\n            assert tags.target_tags.single_output is True\n        except AttributeError as err:\n            # only the exact error we expected to be raised should be raised\n            assert bool(re.search(r\"__sklearn_tags__.* should not be called\", str(err)))\n\n    for clf in [xgb.XGBClassifier(), xgb.XGBRFClassifier()]:\n        try:\n            # if no AttributeError was thrown, we must be using scikit-learn>=1.6,\n            # and so the actual effects of __sklearn_tags__() should be tested\n            tags = clf.__sklearn_tags__()\n            _assert_has_xgbmodel_tags(tags)\n            # classifier-specific values\n            assert tags.estimator_type == \"classifier\"\n            assert tags.regressor_tags is None\n            assert tags.classifier_tags is not None\n            assert tags.classifier_tags.multi_label is True\n        except AttributeError as err:\n            # only the exact error we expected to be raised should be raised\n            assert bool(re.search(r\"__sklearn_tags__.* should not be called\", str(err)))\n\n    for rnk in [xgb.XGBRanker()]:\n        try:\n            # if no AttributeError was thrown, we must be using scikit-learn>=1.6,\n            # and so the actual effects of __sklearn_tags__() should be tested\n            tags = rnk.__sklearn_tags__()\n            _assert_has_xgbmodel_tags(tags)\n        except AttributeError as err:\n            # only the exact error we expected to be raised should be raised\n            assert bool(re.search(r\"__sklearn_tags__.* should not be called\", str(err)))\n\n\ndef test_doc_link() -> None:\n    for est in [\n        xgb.XGBRegressor(),\n        xgb.XGBClassifier(),\n        xgb.XGBRanker(),\n        xgb.XGBRFRegressor(),\n        xgb.XGBRFClassifier(),\n    ]:\n        name = est.__class__.__name__\n        link = est._get_doc_link()\n        assert f\"xgboost.{name}\" in link\n\n\ndef test_apply_method() -> None:\n    import pandas as pd\n\n    X_num = np.random.rand(5, 5)\n    df = pd.DataFrame(X_num, columns=[f\"f{i}\" for i in range(X_num.shape[1])])\n    df[\"test\"] = pd.Series(\n        [\"one\", \"two\", \"three\", \"four\", \"five\"], dtype=\"category\"\n    )  # <- categorical column\n    y = np.arange(len(df))\n\n    model = xgb.XGBClassifier()\n    model.fit(df, y)\n\n    model.apply(df)  # this must not raise\n\n    model.set_params(enable_categorical=False)\n    with pytest.raises(ValueError, match=\"`enable_categorical`\"):\n        model.apply(df)\n\n\ndef test_recoding() -> None:\n    run_recoding(\"cpu\")\n"
  },
  {
    "path": "tests/python/with_omp_limit.py",
    "content": "import sys\n\nfrom sklearn.datasets import make_classification\nfrom sklearn.metrics import roc_auc_score\n\nimport xgboost as xgb\n\n\ndef run_omp(output_path: str):\n    X, y = make_classification(\n        n_samples=200, n_features=32, n_classes=3, n_informative=8\n    )\n    Xy = xgb.DMatrix(X, y, nthread=16)\n    booster = xgb.train(\n        {\"num_class\": 3, \"objective\": \"multi:softprob\", \"n_jobs\": 16},\n        Xy,\n        num_boost_round=8,\n    )\n    score = booster.predict(Xy)\n    auc = roc_auc_score(y, score, average=\"weighted\", multi_class=\"ovr\")\n    with open(output_path, \"w\") as fd:\n        fd.write(str(auc))\n\n\nif __name__ == \"__main__\":\n    out = sys.argv[1]\n    run_omp(out)\n"
  },
  {
    "path": "tests/python-gpu/conftest.py",
    "content": "from typing import Any, List\n\nimport pytest\n\nfrom xgboost import testing as tm\n\n\ndef has_rmm() -> bool:\n    return tm.no_rmm()[\"condition\"]\n\n\n@pytest.fixture(scope=\"session\", autouse=True)\ndef setup_rmm_pool(request: Any, pytestconfig: pytest.Config) -> None:\n    tm.setup_rmm_pool(request, pytestconfig)\n\n\ndef pytest_addoption(parser: pytest.Parser) -> None:\n    parser.addoption(\n        \"--use-rmm-pool\", action=\"store_true\", default=False, help=\"Use RMM pool\"\n    )\n\n\ndef pytest_collection_modifyitems(\n    config: pytest.Config, items: List[pytest.Item]\n) -> None:\n    if config.getoption(\"--use-rmm-pool\"):\n        blocklist = [\n            \"python-gpu/test_gpu_demos.py::test_dask_training\",\n            \"python-gpu/test_gpu_prediction.py::TestGPUPredict::test_shap\",\n            \"python-gpu/test_gpu_linear.py::TestGPULinear\",\n        ]\n        skip_mark = pytest.mark.skip(\n            reason=\"This test is not run when --use-rmm-pool flag is active\"\n        )\n        for item in items:\n            if any(item.nodeid.startswith(x) for x in blocklist):\n                item.add_marker(skip_mark)\n"
  },
  {
    "path": "tests/python-gpu/load_pickle.py",
    "content": "\"\"\"Loading a pickled model generated by test_pickling.py, only used by\n`test_gpu_with_dask.py`\"\"\"\n\nimport json\nimport os\n\nimport numpy as np\nimport pytest\nfrom test_gpu_pickling import build_dataset, load_pickle, model_path\n\nimport xgboost as xgb\n\n\nclass TestLoadPickle:\n    def test_load_pkl(self) -> None:\n        \"\"\"Test whether prediction is correct.\"\"\"\n        assert os.environ[\"CUDA_VISIBLE_DEVICES\"] == \"-1\"\n        bst = load_pickle(model_path)\n        x, y = build_dataset()\n        if isinstance(bst, xgb.Booster):\n            test_x = xgb.DMatrix(x)\n            res = bst.predict(test_x)\n        else:\n            res = bst.predict(x)\n            assert len(res) == 10\n            bst.set_params(n_jobs=1)  # triggers a re-configuration\n            res = bst.predict(x)\n\n        assert len(res) == 10\n\n    def test_context_is_removed(self) -> None:\n        \"\"\"Under invalid CUDA_VISIBLE_DEVICES, context should reset\"\"\"\n        assert os.environ[\"CUDA_VISIBLE_DEVICES\"] == \"-1\"\n        bst = load_pickle(model_path)\n        config = bst.save_config()\n        config = json.loads(config)\n        assert config[\"learner\"][\"generic_param\"][\"device\"] == \"cpu\"\n\n    def test_context_is_preserved(self) -> None:\n        \"\"\"Test the device context is preserved after pickling.\"\"\"\n        assert \"CUDA_VISIBLE_DEVICES\" not in os.environ.keys()\n        bst = load_pickle(model_path)\n        config = bst.save_config()\n        config = json.loads(config)\n        assert config[\"learner\"][\"generic_param\"][\"device\"] == \"cuda:0\"\n\n    def test_wrap_gpu_id(self) -> None:\n        assert os.environ[\"CUDA_VISIBLE_DEVICES\"] == \"0\"\n        bst = load_pickle(model_path)\n        config = bst.save_config()\n        config = json.loads(config)\n        assert config[\"learner\"][\"generic_param\"][\"device\"] == \"cuda:0\"\n\n        x, y = build_dataset()\n        test_x = xgb.DMatrix(x)\n        res = bst.predict(test_x)\n        assert len(res) == 10\n\n    def test_training_on_cpu_only_env(self) -> None:\n        assert os.environ[\"CUDA_VISIBLE_DEVICES\"] == \"-1\"\n        rng = np.random.RandomState(1994)\n        X = rng.randn(10, 10)\n        y = rng.randn(10)\n        with pytest.warns(UserWarning, match=\"No visible GPU is found\"):\n            xgb.train({\"device\": \"cuda\"}, xgb.DMatrix(X, y))\n"
  },
  {
    "path": "tests/python-gpu/test_device_quantile_dmatrix.py",
    "content": "import sys\n\nimport numpy as np\nimport pytest\nfrom hypothesis import given, settings, strategies\n\nimport xgboost as xgb\nfrom xgboost import testing as tm\nfrom xgboost.testing.data import check_inf\nfrom xgboost.testing.data_iter import run_mixed_sparsity\nfrom xgboost.testing.quantile_dmatrix import (\n    check_categorical_strings,\n    check_ref_quantile_cut,\n)\nfrom xgboost.testing.utils import predictor_equal\n\nsys.path.append(\"tests/python\")\nimport test_quantile_dmatrix as tqd\n\n\nclass TestQuantileDMatrix:\n    cputest = tqd.TestQuantileDMatrix()\n\n    @pytest.mark.skipif(**tm.no_cupy())\n    def test_dmatrix_feature_weights(self) -> None:\n        import cupy as cp\n\n        rng = cp.random.RandomState(np.uint64(1994))\n        data = rng.randn(5, 5)\n        m = xgb.DMatrix(data)\n\n        feature_weights = rng.uniform(size=5)\n        m.set_info(feature_weights=feature_weights)\n\n        cp.testing.assert_array_equal(\n            cp.array(m.get_float_info(\"feature_weights\")),\n            feature_weights.astype(np.float32),\n        )\n\n    def test_categorical_strings(self) -> None:\n        check_categorical_strings(\"cuda\")\n\n    @pytest.mark.skipif(**tm.no_cupy())\n    def test_dmatrix_cupy_init(self) -> None:\n        import cupy as cp\n\n        data = cp.random.randn(5, 5)\n        xgb.QuantileDMatrix(data, cp.ones(5, dtype=np.float64))\n\n    @pytest.mark.parametrize(\n        \"on_device,device\",\n        [(True, \"cpu\"), (False, \"cuda\"), (False, \"cpu\"), (True, \"cuda\")],\n    )\n    def test_initialization(self, on_device: bool, device: str) -> None:\n        n_samples, n_features, max_bin = 64, 3, 16\n        X, y, w = tm.make_batches(\n            n_samples,\n            n_features=n_features,\n            n_batches=1,\n            use_cupy=on_device,\n        )\n\n        tree_method = \"hist\"\n        # Init SparsePage\n        Xy = xgb.DMatrix(X[0], y[0], weight=w[0])\n        # Init GIDX/Ellpack\n        xgb.train(\n            {\"tree_method\": tree_method, \"max_bin\": max_bin, \"device\": device},\n            Xy,\n            num_boost_round=1,\n        )\n        # query cuts from GIDX/Ellpack\n        qXy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin, ref=Xy)\n        predictor_equal(Xy, qXy)\n        with pytest.raises(ValueError, match=\"Inconsistent\"):\n            # max_bin changed.\n            xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin - 1, ref=Xy)\n\n        # No error, DMatrix can be modified for different training session.\n        xgb.train(\n            {\"tree_method\": tree_method, \"max_bin\": max_bin - 1, \"device\": device},\n            Xy,\n            num_boost_round=1,\n        )\n\n        # Init Ellpack/GIDX\n        Xy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin)\n        # Init GIDX/Ellpack\n        xgb.train(\n            {\"tree_method\": tree_method, \"max_bin\": max_bin, \"device\": device},\n            Xy,\n            num_boost_round=1,\n        )\n        # query cuts from GIDX/Ellpack\n        qXy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin, ref=Xy)\n        predictor_equal(Xy, qXy)\n        with pytest.raises(ValueError, match=\"Inconsistent\"):\n            # max_bin changed.\n            xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin - 1, ref=Xy)\n\n        Xy = xgb.DMatrix(X[0], y[0], weight=w[0])\n        booster0 = xgb.train(\n            {\n                \"tree_method\": \"hist\",\n                \"max_bin\": max_bin,\n                \"max_depth\": 4,\n                \"device\": \"cpu\",\n            },\n            Xy,\n            num_boost_round=1,\n        )\n        booster1 = xgb.train(\n            {\n                \"tree_method\": \"hist\",\n                \"max_bin\": max_bin,\n                \"max_depth\": 4,\n                \"device\": \"cuda\",\n            },\n            Xy,\n            num_boost_round=1,\n        )\n        qXy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin, ref=Xy)\n        predt0 = booster0.predict(qXy)\n        predt1 = booster1.predict(qXy)\n        np.testing.assert_allclose(predt0, predt1)\n\n    @pytest.mark.skipif(**tm.no_cupy())\n    @pytest.mark.parametrize(\n        \"device,max_bin\",\n        [(\"cpu\", 16), (\"cuda\", 16), (\"cpu\", 64), (\"cuda\", 64)],\n    )\n    def test_interoperability(self, device: str, max_bin: int) -> None:\n        import cupy as cp\n\n        n_samples = 64\n        n_features = 3\n        X, y, w = tm.make_batches(\n            n_samples, n_features=n_features, n_batches=1, use_cupy=False\n        )\n        # from CPU\n        Xy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin)\n        booster_0 = xgb.train(\n            {\"device\": device, \"max_bin\": max_bin}, Xy, num_boost_round=4\n        )\n\n        X[0] = cp.array(X[0])\n        y[0] = cp.array(y[0])\n        w[0] = cp.array(w[0])\n\n        # from GPU\n        Xy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin)\n        booster_1 = xgb.train(\n            {\"device\": device, \"max_bin\": max_bin}, Xy, num_boost_round=4\n        )\n        cp.testing.assert_allclose(\n            booster_0.inplace_predict(X[0]), booster_1.inplace_predict(X[0])\n        )\n\n        with pytest.raises(ValueError, match=r\"Only.*hist.*\"):\n            xgb.train(\n                {\"tree_method\": \"approx\", \"max_bin\": max_bin, \"device\": device},\n                Xy,\n                num_boost_round=4,\n            )\n\n    def test_ref_quantile_cut(self) -> None:\n        check_ref_quantile_cut(\"cuda\")\n\n    @pytest.mark.skipif(**tm.no_cupy())\n    def test_metainfo(self) -> None:\n        import cupy as cp\n\n        rng = cp.random.RandomState(np.uint64(1994))\n\n        rows = 10\n        cols = 3\n        data = rng.randn(rows, cols)\n\n        labels = rng.randn(rows)\n\n        fw = rng.randn(rows)\n        fw -= fw.min()\n\n        m = xgb.QuantileDMatrix(data=data, label=labels, feature_weights=fw)\n\n        got_fw = m.get_float_info(\"feature_weights\")\n        got_labels = m.get_label()\n\n        cp.testing.assert_allclose(fw, got_fw)\n        cp.testing.assert_allclose(labels, got_labels)\n\n    @pytest.mark.skipif(**tm.no_cupy())\n    @pytest.mark.skipif(**tm.no_cudf())\n    def test_ref_dmatrix(self) -> None:\n        import cupy as cp\n\n        rng = cp.random.RandomState(np.uint64(1994))\n        self.cputest.run_ref_dmatrix(rng, \"cuda\", False)\n\n    @given(\n        strategies.integers(1, 1000),\n        strategies.integers(1, 100),\n        strategies.fractions(0, 0.99),\n    )\n    @settings(print_blob=True, deadline=None)\n    def test_to_csr(self, n_samples: int, n_features: int, sparsity: float) -> None:\n        import cupy as cp\n\n        X, y = tm.make_sparse_regression(n_samples, n_features, sparsity, False)\n        h_X = X.astype(np.float32)\n\n        csr = h_X\n        h_X = X.toarray()\n        h_X[h_X == 0] = np.nan\n\n        h_m = xgb.QuantileDMatrix(data=h_X)\n        h_ret = h_m.get_data()\n\n        d_X = cp.array(h_X)\n\n        d_m = xgb.QuantileDMatrix(data=d_X, label=y)\n        d_ret = d_m.get_data()\n\n        np.testing.assert_equal(csr.indptr, d_ret.indptr)\n        np.testing.assert_equal(csr.indices, d_ret.indices)\n\n        np.testing.assert_equal(h_ret.indptr, d_ret.indptr)\n        np.testing.assert_equal(h_ret.indices, d_ret.indices)\n\n        booster = xgb.train({\"tree_method\": \"hist\", \"device\": \"cuda:0\"}, dtrain=d_m)\n\n        np.testing.assert_allclose(\n            booster.predict(d_m),\n            booster.predict(xgb.DMatrix(d_m.get_data())),\n            atol=1e-6,\n        )\n\n    def test_ltr(self) -> None:\n        import cupy as cp\n\n        X, y, qid, w = tm.make_ltr(100, 3, 3, 5)\n        # make sure GPU is used to run sketching.\n        cpX = cp.array(X)\n        Xy_qdm = xgb.QuantileDMatrix(cpX, y, qid=qid, weight=w)\n        Xy = xgb.DMatrix(X, y, qid=qid, weight=w)\n        xgb.train({\"device\": \"cuda\", \"objective\": \"rank:ndcg\"}, Xy)\n\n        from_dm = xgb.QuantileDMatrix(X, weight=w, ref=Xy)\n        from_qdm = xgb.QuantileDMatrix(X, weight=w, ref=Xy_qdm)\n\n        assert predictor_equal(from_qdm, from_dm)\n\n    @pytest.mark.skipif(**tm.no_cupy())\n    def test_check_inf(self) -> None:\n        import cupy as cp\n\n        rng = cp.random.default_rng(1994)\n        check_inf(rng)\n\n    def test_mixed_sparsity(self) -> None:\n        run_mixed_sparsity(\"cuda\")\n"
  },
  {
    "path": "tests/python-gpu/test_from_cudf.py",
    "content": "import json\nfrom typing import TYPE_CHECKING, Any, Callable, Dict, Type\n\nimport numpy as np\nimport pytest\nimport xgboost as xgb\nfrom xgboost import testing as tm\nfrom xgboost.compat import is_dataframe\nfrom xgboost.testing.data import run_base_margin_info\nfrom xgboost.testing.utils import assert_allclose\n\nif TYPE_CHECKING:\n    import cudf\nelse:\n    cudf = pytest.importorskip(\"cudf\")\n\n\ndef test_type_check() -> None:\n    df = cudf.DataFrame([[1, 2.0], [2, 3.0]], columns=[\"a\", \"b\"])\n    assert is_dataframe(df)\n    assert is_dataframe(df.a)\n\n\ndef dmatrix_from_cudf(\n    input_type: Any, DMatrixT: Type[xgb.DMatrix], missing: float = np.nan\n) -> None:\n    \"\"\"Test constructing DMatrix from cudf\"\"\"\n    import pandas as pd\n\n    kRows = 80\n    kCols = 3\n\n    na = np.random.randn(kRows, kCols)\n    na[:, 0:2] = na[:, 0:2].astype(input_type)\n\n    na[5, 0] = missing\n    na[3, 1] = missing\n\n    pa = pd.DataFrame({\"0\": na[:, 0], \"1\": na[:, 1], \"2\": na[:, 2].astype(np.int32)})\n\n    np_label = np.random.randn(kRows).astype(input_type)\n    pa_label = pd.DataFrame(np_label)\n\n    cd = cudf.from_pandas(pa)\n    cd_label = cudf.from_pandas(pa_label).iloc[:, 0]\n\n    dtrain = DMatrixT(cd, missing=missing, label=cd_label)\n    assert dtrain.num_col() == kCols\n    assert dtrain.num_row() == kRows\n\n\ndef _test_from_cudf(DMatrixT: Type[xgb.DMatrix]) -> None:\n    \"\"\"Test constructing DMatrix from cudf\"\"\"\n    dmatrix_from_cudf(np.float32, DMatrixT, np.nan)\n    dmatrix_from_cudf(np.float64, DMatrixT, np.nan)\n\n    dmatrix_from_cudf(np.int8, DMatrixT, 2)\n    dmatrix_from_cudf(np.int32, DMatrixT, -2)\n    dmatrix_from_cudf(np.int64, DMatrixT, -3)\n\n    cd = cudf.DataFrame({\"x\": [1, 2, 3], \"y\": [0.1, 0.2, 0.3]})\n    dtrain = DMatrixT(cd)\n\n    assert dtrain.feature_names == [\"x\", \"y\"]\n    assert dtrain.feature_types == [\"int\", \"float\"]\n\n    series = cudf.DataFrame({\"x\": [1, 2, 3]}).iloc[:, 0]\n    assert isinstance(series, cudf.Series)\n    dtrain = DMatrixT(series)\n\n    assert dtrain.feature_names == [\"x\"]\n    assert dtrain.feature_types == [\"int\"]\n\n    with pytest.raises(ValueError, match=r\".*multi.*\"):\n        dtrain = DMatrixT(cd, label=cd)\n        xgb.train(\n            {\"tree_method\": \"hist\", \"device\": \"cuda\", \"objective\": \"multi:softprob\"},\n            dtrain,\n        )\n\n    # Test when number of elements is less than 8\n    X = cudf.DataFrame({\"x\": cudf.Series([0, 1, 2, np.nan, 4], dtype=np.int32)})\n    dtrain = DMatrixT(X)\n    assert dtrain.num_col() == 1\n    assert dtrain.num_row() == 5\n\n\ndef _test_cudf_training(DMatrixT: Type[xgb.DMatrix]) -> None:\n    import pandas as pd\n    from cudf import DataFrame as df\n\n    np.random.seed(1)\n    X = pd.DataFrame(np.random.randn(50, 10))\n    y = pd.DataFrame(np.random.randn(50))\n    weights = np.random.random(50) + 1.0\n    cudf_weights = df.from_pandas(pd.DataFrame(weights))\n    base_margin = np.random.random(50)\n    cudf_base_margin = df.from_pandas(pd.DataFrame(base_margin))\n\n    evals_result_cudf: Dict[str, Any] = {}\n    dtrain_cudf = DMatrixT(\n        df.from_pandas(X),\n        df.from_pandas(y),\n        weight=cudf_weights,\n        base_margin=cudf_base_margin,\n    )\n    params = {\"device\": \"cuda\", \"tree_method\": \"hist\"}\n    xgb.train(\n        params,\n        dtrain_cudf,\n        evals=[(dtrain_cudf, \"train\")],\n        evals_result=evals_result_cudf,\n    )\n    evals_result_np: Dict[str, Any] = {}\n    dtrain_np = xgb.DMatrix(X, y, weight=weights, base_margin=base_margin)\n    xgb.train(\n        params, dtrain_np, evals=[(dtrain_np, \"train\")], evals_result=evals_result_np\n    )\n    assert np.array_equal(\n        evals_result_cudf[\"train\"][\"rmse\"], evals_result_np[\"train\"][\"rmse\"]\n    )\n\n\ndef _test_cudf_metainfo(DMatrixT: Type[xgb.DMatrix]) -> None:\n    import pandas as pd\n    from cudf import DataFrame as df\n\n    n = 100\n    X = np.random.random((n, 2))\n    dmat_cudf = DMatrixT(df.from_pandas(pd.DataFrame(X)))\n    dmat = xgb.DMatrix(X)\n    floats = np.random.random(n)\n    uints = np.array([4, 2, 8]).astype(\"uint32\")\n    cudf_floats = df.from_pandas(pd.DataFrame(floats))\n    cudf_uints = df.from_pandas(pd.DataFrame(uints))\n    dmat.set_weight(floats)\n    dmat.set_label(floats)\n    dmat.set_base_margin(floats)\n    dmat.set_uint_info(\"group\", uints)\n    dmat_cudf.set_info(weight=cudf_floats)\n    dmat_cudf.set_info(label=cudf_floats)\n    dmat_cudf.set_info(base_margin=cudf_floats)\n    dmat_cudf.set_info(group=cudf_uints)\n\n    # Test setting info with cudf DataFrame\n    assert_allclose(\"cuda\", dmat.get_weight(), dmat_cudf.get_weight())\n    assert_allclose(\"cuda\", dmat.get_label(), dmat_cudf.get_label())\n    assert_allclose(\"cuda\", dmat.get_base_margin(), dmat_cudf.get_base_margin())\n    assert np.array_equal(\n        dmat.get_uint_info(\"group_ptr\"), dmat_cudf.get_uint_info(\"group_ptr\")\n    )\n\n    # Test setting info with cudf Series\n    dmat_cudf.set_info(weight=cudf_floats[cudf_floats.columns[0]])\n    dmat_cudf.set_info(label=cudf_floats[cudf_floats.columns[0]])\n    dmat_cudf.set_info(base_margin=cudf_floats[cudf_floats.columns[0]])\n    dmat_cudf.set_info(group=cudf_uints[cudf_uints.columns[0]])\n    assert_allclose(\"cuda\", dmat.get_weight(), dmat_cudf.get_weight())\n    assert_allclose(\"cuda\", dmat.get_label(), dmat_cudf.get_label())\n    assert_allclose(\"cuda\", dmat.get_base_margin(), dmat_cudf.get_base_margin())\n    assert np.array_equal(\n        dmat.get_uint_info(\"group_ptr\"), dmat_cudf.get_uint_info(\"group_ptr\")\n    )\n\n    run_base_margin_info(df, DMatrixT, \"cuda\")\n\n\nclass TestFromColumnar:\n    \"\"\"Tests for constructing DMatrix from data structure conforming Apache\n    Arrow specification.\"\"\"\n\n    @pytest.mark.skipif(**tm.no_cudf())\n    def test_simple_dmatrix_from_cudf(self) -> None:\n        _test_from_cudf(xgb.DMatrix)\n\n    @pytest.mark.skipif(**tm.no_cudf())\n    def test_device_dmatrix_from_cudf(self) -> None:\n        _test_from_cudf(xgb.QuantileDMatrix)\n\n    @pytest.mark.skipif(**tm.no_cudf())\n    def test_cudf_training_simple_dmatrix(self) -> None:\n        _test_cudf_training(xgb.DMatrix)\n\n    @pytest.mark.skipif(**tm.no_cudf())\n    def test_cudf_training_device_dmatrix(self) -> None:\n        _test_cudf_training(xgb.QuantileDMatrix)\n\n    @pytest.mark.skipif(**tm.no_cudf())\n    def test_cudf_metainfo_simple_dmatrix(self) -> None:\n        _test_cudf_metainfo(xgb.DMatrix)\n\n    @pytest.mark.skipif(**tm.no_cudf())\n    def test_cudf_metainfo_device_dmatrix(self) -> None:\n        _test_cudf_metainfo(xgb.QuantileDMatrix)\n\n    @pytest.mark.skipif(**tm.no_cudf())\n    def test_cudf_categorical(self) -> None:\n        import pandas as pd\n\n        n_features = 30\n        _X, _y = tm.make_categorical(100, n_features, 17, onehot=False)\n        X = cudf.from_pandas(_X)\n        y = cudf.from_pandas(pd.DataFrame(_y))\n\n        Xy = xgb.DMatrix(X, y)\n        assert Xy.feature_types is not None\n        assert len(Xy.feature_types) == X.shape[1]\n        assert all(t == \"c\" for t in Xy.feature_types)\n\n        Xy = xgb.QuantileDMatrix(X, y)\n        assert Xy.feature_types is not None\n        assert len(Xy.feature_types) == X.shape[1]\n        assert all(t == \"c\" for t in Xy.feature_types)\n\n        # mixed dtypes\n        X[\"0\"] = X[\"0\"].astype(np.int64)\n        X[\"2\"] = X[\"2\"].astype(np.int64)\n        df, _, _ = xgb.data._transform_cudf_df(X, None, None, enable_categorical=True)\n        assert X.shape[1] == n_features\n        assert isinstance(df.aitfs[0], dict)\n        assert isinstance(df.aitfs[1], tuple)\n        assert isinstance(df.aitfs[2], dict)\n\n        interfaces_str = df.array_interface()\n        interfaces = json.loads(interfaces_str)\n        assert len(interfaces) == X.shape[1]\n\n        # test missing value\n        X = cudf.DataFrame({\"f0\": [\"a\", \"b\", np.nan]})\n        X[\"f0\"] = X[\"f0\"].astype(\"category\")\n        df, _, _ = xgb.data._transform_cudf_df(X, None, None, enable_categorical=True)\n        for col in df.aitfs:\n            assert isinstance(col, tuple)\n            assert \"mask\" in col[1]\n\n        y = [0, 1, 2]\n        Xy = xgb.DMatrix(X, y)\n        assert Xy.num_row() == 3\n        assert Xy.num_col() == 1\n\n        Xy = xgb.QuantileDMatrix(X, y)\n        assert Xy.num_row() == 3\n        assert Xy.num_col() == 1\n\n        X = X[\"f0\"]\n        Xy = xgb.DMatrix(X, y)\n        assert Xy.num_row() == 3\n        assert Xy.num_col() == 1\n\n\n@pytest.mark.skipif(**tm.no_cudf())\n@pytest.mark.skipif(**tm.no_cupy())\n@pytest.mark.skipif(**tm.no_sklearn())\n@pytest.mark.skipif(**tm.no_pandas())\ndef test_cudf_training_with_sklearn() -> None:\n    import pandas as pd\n    from cudf import DataFrame as df\n    from cudf import Series as ss\n\n    np.random.seed(1)\n    X = pd.DataFrame(np.random.randn(50, 10))\n    y = pd.DataFrame((np.random.randn(50) > 0).astype(np.int8))\n    weights = np.random.random(50) + 1.0\n    cudf_weights = df.from_pandas(pd.DataFrame(weights))\n    base_margin = np.random.random(50)\n    cudf_base_margin = df.from_pandas(pd.DataFrame(base_margin))\n\n    X_cudf = df.from_pandas(X)\n    y_cudf = df.from_pandas(y)\n    y_cudf_series = ss(data=y.iloc[:, 0])\n\n    for y_obj in [y_cudf, y_cudf_series]:\n        clf = xgb.XGBClassifier(tree_method=\"hist\", device=\"cuda:0\")\n        clf.fit(\n            X_cudf,\n            y_obj,\n            sample_weight=cudf_weights,\n            base_margin=cudf_base_margin,\n            eval_set=[(X_cudf, y_obj)],\n        )\n        pred = clf.predict(X_cudf)\n        assert np.array_equal(np.unique(pred), np.array([0, 1]))\n\n\nclass IterForDMatrixTest(xgb.core.DataIter):\n    \"\"\"A data iterator for XGBoost DMatrix.\n\n    `reset` and `next` are required for any data iterator, other functions here\n    are utilites for demonstration's purpose.\n\n    \"\"\"\n\n    ROWS_PER_BATCH = 100  # data is splited by rows\n    BATCHES = 16\n\n    def __init__(self, categorical: bool) -> None:\n        \"\"\"Generate some random data for demostration.\n\n        Actual data can be anything that is currently supported by XGBoost.\n        \"\"\"\n        self.rows = self.ROWS_PER_BATCH\n\n        if categorical:\n            self._data = []\n            self._labels = []\n            for i in range(self.BATCHES):\n                X, y = tm.make_categorical(self.ROWS_PER_BATCH, 4, 13, onehot=False)\n                self._data.append(cudf.from_pandas(X))\n                self._labels.append(y)\n        else:\n            rng = np.random.RandomState(1994)\n            self._data = [\n                cudf.DataFrame(\n                    {\n                        \"a\": rng.randn(self.ROWS_PER_BATCH),\n                        \"b\": rng.randn(self.ROWS_PER_BATCH),\n                    }\n                )\n            ] * self.BATCHES\n            self._labels = [rng.randn(self.rows)] * self.BATCHES\n\n        self.it = 0  # set iterator to 0\n        super().__init__(cache_prefix=None)\n\n    def as_array(self) -> \"cudf.DataFrame\":\n        return cudf.concat(self._data)\n\n    def as_array_labels(self) -> np.ndarray:\n        return np.concatenate(self._labels)\n\n    def data(self) -> \"cudf.DataFrame\":\n        \"\"\"Utility function for obtaining current batch of data.\"\"\"\n        return self._data[self.it]\n\n    def labels(self) -> Any:\n        \"\"\"Utility function for obtaining current batch of label.\"\"\"\n        return self._labels[self.it]\n\n    def reset(self) -> None:\n        \"\"\"Reset the iterator\"\"\"\n        self.it = 0\n\n    def next(self, input_data: Callable) -> bool:\n        \"\"\"Yield next batch of data\"\"\"\n        if self.it == len(self._data):\n            # Return False when there's no more batch.\n            return False\n        input_data(data=self.data(), label=self.labels())\n        self.it += 1\n        return True\n\n\n@pytest.mark.skipif(**tm.no_cudf())\n@pytest.mark.parametrize(\"enable_categorical\", [True, False])\ndef test_from_cudf_iter(enable_categorical: bool) -> None:\n    rounds = 100\n    it = IterForDMatrixTest(enable_categorical)\n    params = {\"tree_method\": \"hist\", \"device\": \"cuda\"}\n\n    # Use iterator\n    m_it = xgb.QuantileDMatrix(it, enable_categorical=enable_categorical)\n    reg_with_it = xgb.train(params, m_it, num_boost_round=rounds)\n\n    X = it.as_array()\n    y = it.as_array_labels()\n\n    m = xgb.DMatrix(X, y, enable_categorical=enable_categorical)\n\n    assert m_it.num_col() == m.num_col()\n    assert m_it.num_row() == m.num_row()\n\n    reg = xgb.train(params, m, num_boost_round=rounds)\n\n    predict = reg.predict(m)\n    predict_with_it = reg_with_it.predict(m_it)\n    np.testing.assert_allclose(predict_with_it, predict)\n\n\ndef test_invalid_meta() -> None:\n    df = cudf.DataFrame({\"f0\": [0, 1, 2], \"f1\": [2, 3, 4], \"y\": [None, 1, 2]})\n    y = df[\"y\"]\n    X = df.drop([\"y\"], axis=1)\n    with pytest.raises(ValueError, match=\"Missing value\"):\n        xgb.DMatrix(X, y)\n    with pytest.raises(ValueError, match=\"Missing value\"):\n        xgb.QuantileDMatrix(X, y)\n    y = X.copy()\n    y.iloc[0, 0] = None\n    # check by the cuDF->cupy converter.\n    with pytest.raises(ValueError, match=\"Label contains NaN\"):\n        xgb.DMatrix(X, y)\n    with pytest.raises(ValueError, match=\"Label contains NaN\"):\n        xgb.QuantileDMatrix(X, y)\n"
  },
  {
    "path": "tests/python-gpu/test_from_cupy.py",
    "content": "import json\nfrom typing import Any, Dict, Type\n\nimport numpy as np\nimport pytest\n\nimport xgboost as xgb\nfrom xgboost import testing as tm\nfrom xgboost.testing.data import run_base_margin_info\nfrom xgboost.testing.utils import assert_allclose\n\ncp = pytest.importorskip(\"cupy\")\n\n\ndef test_array_interface() -> None:\n    arr = cp.array([[1, 2, 3, 4], [1, 2, 3, 4]])\n    i_arr = arr.__cuda_array_interface__\n    i_arr = json.loads(json.dumps(i_arr))\n    ret = xgb.core.from_array_interface(i_arr)\n    np.testing.assert_equal(cp.asnumpy(arr), cp.asnumpy(ret))\n\n\ndef dmatrix_from_cupy(\n    input_type: Any, DMatrixT: Type[xgb.DMatrix], missing: float = np.nan\n) -> xgb.DMatrix:\n    \"\"\"Test constructing DMatrix from cupy\"\"\"\n    kRows = 80\n    kCols = 3\n\n    np_X = np.random.randn(kRows, kCols).astype(dtype=input_type)\n    X = cp.array(np_X)\n    X[5, 0] = missing\n    X[3, 1] = missing\n    y = cp.random.randn(kRows).astype(dtype=input_type)\n    dtrain = DMatrixT(X, missing=missing, label=y)\n    assert dtrain.num_col() == kCols\n    assert dtrain.num_row() == kRows\n\n    if DMatrixT is xgb.QuantileDMatrix:\n        # Slice is not supported by QuantileDMatrix\n        with pytest.raises(xgb.core.XGBoostError):\n            dtrain.slice(rindex=[0, 1, 2])\n            dtrain.slice(rindex=[0, 1, 2])\n    else:\n        dtrain.slice(rindex=[0, 1, 2])\n        dtrain.slice(rindex=[0, 1, 2])\n\n    return dtrain\n\n\ndef _test_from_cupy(DMatrixT: Type[xgb.DMatrix]) -> None:\n    \"\"\"Test constructing DMatrix from cupy\"\"\"\n    dmatrix_from_cupy(np.float16, DMatrixT, np.nan)\n    dmatrix_from_cupy(np.float32, DMatrixT, np.nan)\n    dmatrix_from_cupy(np.float64, DMatrixT, np.nan)\n\n    dmatrix_from_cupy(np.uint8, DMatrixT, 2)\n    dmatrix_from_cupy(np.uint32, DMatrixT, 3)\n    dmatrix_from_cupy(np.uint64, DMatrixT, 4)\n\n    dmatrix_from_cupy(np.int8, DMatrixT, 2)\n    dmatrix_from_cupy(np.int32, DMatrixT, -2)\n    dmatrix_from_cupy(np.int64, DMatrixT, -3)\n\n    with pytest.raises(ValueError):\n        X = cp.random.randn(2, 2, dtype=\"float32\")\n        y = cp.random.randn(2, 2, 3, dtype=\"float32\")\n        DMatrixT(X, label=y)\n\n\ndef _test_cupy_training(DMatrixT: Type[xgb.DMatrix]) -> None:\n    np.random.seed(1)\n    cp.random.seed(np.uint64(1))\n    X = cp.random.randn(50, 10, dtype=\"float32\")\n    y = cp.random.randn(50, dtype=\"float32\")\n    weights = np.random.random(50) + 1\n    cupy_weights = cp.array(weights)\n    base_margin = np.random.random(50)\n    cupy_base_margin = cp.array(base_margin)\n\n    evals_result_cupy: Dict[str, Any] = {}\n    dtrain_cp = DMatrixT(X, y, weight=cupy_weights, base_margin=cupy_base_margin)\n    params = {\"tree_method\": \"hist\", \"device\": \"cuda:0\"}\n    xgb.train(\n        params, dtrain_cp, evals=[(dtrain_cp, \"train\")], evals_result=evals_result_cupy\n    )\n    evals_result_np: Dict[str, Any] = {}\n    dtrain_np = xgb.DMatrix(\n        cp.asnumpy(X), cp.asnumpy(y), weight=weights, base_margin=base_margin\n    )\n    xgb.train(\n        params, dtrain_np, evals=[(dtrain_np, \"train\")], evals_result=evals_result_np\n    )\n    assert np.array_equal(\n        evals_result_cupy[\"train\"][\"rmse\"], evals_result_np[\"train\"][\"rmse\"]\n    )\n\n\ndef _test_cupy_metainfo(DMatrixT: Type[xgb.DMatrix]) -> None:\n    n = 100\n    X = np.random.random((n, 2))\n    dmat_cupy = DMatrixT(cp.array(X))\n    dmat = xgb.DMatrix(X)\n    floats = np.random.random(n)\n    uints = np.array([4, 2, 8]).astype(\"uint32\")\n    cupy_floats = cp.array(floats)\n    cupy_uints = cp.array(uints)\n    dmat.set_float_info(\"weight\", floats)\n    dmat.set_float_info(\"label\", floats)\n    dmat.set_float_info(\"base_margin\", floats)\n    dmat.set_uint_info(\"group\", uints)\n    dmat_cupy.set_info(weight=cupy_floats)\n    dmat_cupy.set_info(label=cupy_floats)\n    dmat_cupy.set_info(base_margin=cupy_floats)\n    dmat_cupy.set_info(group=cupy_uints)\n\n    # Test setting info with cupy\n    assert_allclose(\"cuda\", dmat.get_weight(), dmat_cupy.get_weight())\n    assert_allclose(\"cuda\", dmat.get_label(), dmat_cupy.get_label())\n    assert_allclose(\"cuda\", dmat.get_base_margin(), dmat_cupy.get_base_margin())\n    assert np.array_equal(\n        dmat.get_uint_info(\"group_ptr\"), dmat_cupy.get_uint_info(\"group_ptr\")\n    )\n\n    run_base_margin_info(cp.asarray, DMatrixT, \"cuda\")\n\n    dmat_cupy = DMatrixT(cp.array(X))\n    y = dmat_cupy.get_label()\n    assert y.size == 0\n    y = cp.array(X)\n    dmat_cupy.set_label(y)\n    y1 = dmat_cupy.get_label()\n    assert_allclose(\"cuda\", y1, y)\n\n\n@pytest.mark.skipif(**tm.no_cupy())\n@pytest.mark.skipif(**tm.no_sklearn())\ndef test_cupy_training_with_sklearn() -> None:\n    np.random.seed(1)\n    cp.random.seed(np.uint64(1))\n    X = cp.random.randn(50, 10, dtype=\"float32\")\n    y = (cp.random.randn(50, dtype=\"float32\") > 0).astype(\"int8\")\n    weights = np.random.random(50) + 1\n    cupy_weights = cp.array(weights)\n    base_margin = np.random.random(50)\n    cupy_base_margin = cp.array(base_margin)\n\n    clf = xgb.XGBClassifier(tree_method=\"hist\", device=\"cuda:0\")\n    clf.fit(\n        X,\n        y,\n        sample_weight=cupy_weights,\n        base_margin=cupy_base_margin,\n        eval_set=[(X, y)],\n    )\n    pred = clf.predict(X)\n    assert np.array_equal(np.unique(pred), np.array([0, 1]))\n\n\nclass TestFromCupy:\n    \"\"\"Tests for constructing DMatrix from data structure conforming Apache\n    Arrow specification.\"\"\"\n\n    @pytest.mark.skipif(**tm.no_cupy())\n    def test_simple_dmat_from_cupy(self) -> None:\n        _test_from_cupy(xgb.DMatrix)\n\n    @pytest.mark.skipif(**tm.no_cupy())\n    def test_quantile_dmat_from_cupy(self) -> None:\n        _test_from_cupy(xgb.QuantileDMatrix)\n\n    @pytest.mark.skipif(**tm.no_cupy())\n    def test_cupy_training_quantile_dmat(self) -> None:\n        _test_cupy_training(xgb.QuantileDMatrix)\n\n    @pytest.mark.skipif(**tm.no_cupy())\n    def test_cupy_training_simple_dmat(self) -> None:\n        _test_cupy_training(xgb.DMatrix)\n\n    @pytest.mark.skipif(**tm.no_cupy())\n    def test_cupy_metainfo_simple_dmat(self) -> None:\n        _test_cupy_metainfo(xgb.DMatrix)\n\n    @pytest.mark.skipif(**tm.no_cupy())\n    def test_cupy_metainfo_quantile_dmat(self) -> None:\n        _test_cupy_metainfo(xgb.QuantileDMatrix)\n\n    @pytest.mark.skipif(**tm.no_cupy())\n    def test_dlpack_simple_dmat(self) -> None:\n        n = 100\n        X = cp.random.random((n, 2))\n        capsule = X.__dlpack__()\n        xgb.DMatrix(capsule)\n\n    @pytest.mark.skipif(**tm.no_cupy())\n    def test_cupy_categorical(self) -> None:\n        n_features = 10\n        X, y = tm.make_categorical(10, n_features, n_categories=4, onehot=False)\n        X = cp.asarray(X.values.astype(cp.float32))\n        y = cp.array(y)\n        feature_types = [\"c\"] * n_features\n\n        assert isinstance(X, cp.ndarray)\n        Xy = xgb.DMatrix(X, y, feature_types=feature_types)\n        np.testing.assert_equal(np.array(Xy.feature_types), np.array(feature_types))\n\n    @pytest.mark.skipif(**tm.no_cupy())\n    def test_dlpack_quantile_dmat(self) -> None:\n        n = 100\n        X = cp.random.random((n, 2))\n        m = xgb.QuantileDMatrix(X.__dlpack__())\n\n        with pytest.raises(\n            xgb.core.XGBoostError, match=\"Slicing DMatrix is not supported\"\n        ):\n            m.slice(rindex=[0, 1, 2])\n\n    @pytest.mark.skipif(**tm.no_cupy())\n    def test_qid(self) -> None:\n        rng = cp.random.RandomState(np.uint64(1994))\n        rows = 100\n        cols = 10\n        X, y = rng.randn(rows, cols), rng.randn(rows)\n        qid = rng.randint(low=0, high=10, size=rows, dtype=np.uint32)\n        qid = cp.sort(qid)\n\n        Xy = xgb.DMatrix(X, y)\n        Xy.set_info(qid=qid)\n        group_ptr = Xy.get_uint_info(\"group_ptr\")\n        assert group_ptr[0] == 0\n        assert group_ptr[-1] == rows\n\n    @pytest.mark.skipif(**tm.no_cupy())\n    @pytest.mark.mgpu\n    def test_specified_device(self) -> None:\n        cp.cuda.runtime.setDevice(0)\n        dtrain = dmatrix_from_cupy(np.float32, xgb.QuantileDMatrix, np.nan)\n        with pytest.raises(xgb.core.XGBoostError, match=\"Invalid device ordinal\"):\n            xgb.train(\n                {\"tree_method\": \"hist\", \"device\": \"cuda:1\"}, dtrain, num_boost_round=10\n            )\n"
  },
  {
    "path": "tests/python-gpu/test_gpu_basic_models.py",
    "content": "import os\nfrom typing import Tuple\n\nimport numpy as np\nimport pytest\n\nimport xgboost as xgb\nfrom xgboost import testing as tm\nfrom xgboost.testing.basic_models import run_custom_objective\n\n\nclass TestGPUBasicModels:\n    def run_cls(self, X: np.ndarray, y: np.ndarray) -> Tuple[int, int]:\n        cls = xgb.XGBClassifier(tree_method=\"hist\", device=\"cuda\")\n        cls.fit(X, y)\n        cls.get_booster().save_model(\"test_deterministic_gpu_hist-0.json\")\n\n        cls = xgb.XGBClassifier(tree_method=\"hist\", device=\"cuda\")\n        cls.fit(X, y)\n        cls.get_booster().save_model(\"test_deterministic_gpu_hist-1.json\")\n\n        with open(\"test_deterministic_gpu_hist-0.json\", \"r\") as fd:\n            model_0 = fd.read()\n        with open(\"test_deterministic_gpu_hist-1.json\", \"r\") as fd:\n            model_1 = fd.read()\n\n        os.remove(\"test_deterministic_gpu_hist-0.json\")\n        os.remove(\"test_deterministic_gpu_hist-1.json\")\n\n        return hash(model_0), hash(model_1)\n\n    def test_custom_objective(self) -> None:\n        dtrain, dtest = tm.load_agaricus(__file__)\n        run_custom_objective(\"hist\", \"cuda\", dtrain, dtest)\n\n    def test_deterministic_gpu_hist(self) -> None:\n        kRows = 1000\n        kCols = 64\n        kClasses = 4\n        # Create large values to force rounding.\n        X = np.random.randn(kRows, kCols) * 1e4\n        y = np.random.randint(0, kClasses, size=kRows)\n\n        model_0, model_1 = self.run_cls(X, y)\n        assert model_0 == model_1\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    def test_invalid_gpu_id(self) -> None:\n        from sklearn.datasets import load_digits\n\n        X, y = load_digits(return_X_y=True)\n        # should pass with invalid gpu id\n        cls1 = xgb.XGBClassifier(tree_method=\"hist\", device=\"cuda:9999\")\n        cls1.fit(X, y)\n        # should throw error with fail_on_invalid_gpu_id enabled\n        cls2 = xgb.XGBClassifier(\n            tree_method=\"hist\", device=\"cuda:9999\", fail_on_invalid_gpu_id=True\n        )\n        with pytest.raises(ValueError, match=\"ordinal 9999 is invalid\"):\n            cls2.fit(X, y)\n\n        cls2 = xgb.XGBClassifier(\n            tree_method=\"hist\", device=\"cuda:9999\", fail_on_invalid_gpu_id=True\n        )\n        with pytest.raises(ValueError, match=\"ordinal 9999 is invalid\"):\n            cls2.fit(X, y)\n\n        clf = xgb.XGBClassifier(tree_method=\"hist\", gpu_id=0)\n        with pytest.raises(ValueError, match=\"`gpu_id` has been removed\"):\n            clf.fit(X, y)\n"
  },
  {
    "path": "tests/python-gpu/test_gpu_callbacks.py",
    "content": "import pytest\n\nfrom xgboost import testing as tm\nfrom xgboost.testing.callbacks import (\n    run_eta_decay,\n    run_eta_decay_leaf_output,\n    tree_methods_objs,\n)\n\n\n@pytest.mark.parametrize(\"tree_method\", [\"approx\", \"hist\"])\ndef test_eta_decay(tree_method: str) -> None:\n    dtrain, dtest = tm.load_agaricus(__file__)\n    run_eta_decay(tree_method, dtrain, dtest, \"cuda\")\n\n\n@pytest.mark.parametrize(\"tree_method,objective\", tree_methods_objs())\ndef test_eta_decay_leaf_output(tree_method: str, objective: str) -> None:\n    dtrain, dtest = tm.load_agaricus(__file__)\n    run_eta_decay_leaf_output(tree_method, objective, dtrain, dtest, \"cuda\")\n"
  },
  {
    "path": "tests/python-gpu/test_gpu_data_iterator.py",
    "content": "import sys\n\nimport numpy as np\nimport pytest\nfrom hypothesis import given, settings, strategies\n\nimport xgboost as xgb\nfrom xgboost import testing as tm\nfrom xgboost.testing import no_cupy\nfrom xgboost.testing.data_iter import check_invalid_cat_batches, check_uneven_sizes\nfrom xgboost.testing.updater import (\n    check_categorical_missing,\n    check_categorical_ohe,\n    check_extmem_qdm,\n    check_quantile_loss_extmem,\n)\n\nsys.path.append(\"tests/python\")\nfrom test_data_iterator import run_data_iterator\nfrom test_data_iterator import test_single_batch as cpu_single_batch\n\n# There are lots of warnings if XGBoost is not running on ATS-enabled systems.\npytestmark = pytest.mark.filterwarnings(\"ignore\")\n\n\ndef test_gpu_single_batch() -> None:\n    cpu_single_batch(\"hist\", \"cuda\")\n\n\n@pytest.mark.skipif(**no_cupy())\n@given(\n    strategies.integers(0, 1024),\n    strategies.integers(1, 7),\n    strategies.integers(0, 8),\n    strategies.booleans(),\n    strategies.booleans(),\n    strategies.booleans(),\n)\n@settings(deadline=None, max_examples=16, print_blob=True)\ndef test_gpu_data_iterator(\n    n_samples_per_batch: int,\n    n_features: int,\n    n_batches: int,\n    subsample: bool,\n    use_cupy: bool,\n    on_host: bool,\n) -> None:\n    run_data_iterator(\n        n_samples_per_batch,\n        n_features,\n        n_batches,\n        \"hist\",\n        subsample=subsample,\n        device=\"cuda\",\n        use_cupy=use_cupy,\n        on_host=on_host,\n    )\n\n\ndef test_cpu_data_iterator() -> None:\n    \"\"\"Make sure CPU algorithm can handle GPU inputs\"\"\"\n    run_data_iterator(\n        1024,\n        2,\n        3,\n        \"approx\",\n        device=\"cuda\",\n        subsample=False,\n        use_cupy=True,\n        on_host=False,\n    )\n\n\n@given(\n    strategies.integers(1, 2048),\n    strategies.integers(1, 8),\n    strategies.integers(1, 4),\n    strategies.integers(2, 16),\n    strategies.booleans(),\n)\n@settings(deadline=None, max_examples=10, print_blob=True)\ndef test_extmem_qdm(\n    n_samples_per_batch: int,\n    n_features: int,\n    n_batches: int,\n    n_bins: int,\n    on_host: bool,\n) -> None:\n    check_extmem_qdm(\n        n_samples_per_batch,\n        n_features,\n        n_batches=n_batches,\n        n_bins=n_bins,\n        device=\"cuda\",\n        on_host=on_host,\n        is_cat=False,\n    )\n\n\n@given(\n    strategies.integers(1, 2048),\n    strategies.integers(1, 4),\n    strategies.integers(2, 16),\n    strategies.booleans(),\n)\n@settings(deadline=None, max_examples=10, print_blob=True)\n@pytest.mark.skipif(**tm.no_cudf())\n@pytest.mark.skipif(**tm.no_cupy())\ndef test_categorical_extmem_qdm(\n    n_samples_per_batch: int,\n    n_batches: int,\n    n_bins: int,\n    on_host: bool,\n) -> None:\n    check_extmem_qdm(\n        n_samples_per_batch,\n        4,\n        n_batches=n_batches,\n        n_bins=n_bins,\n        device=\"cuda\",\n        on_host=on_host,\n        is_cat=True,\n    )\n\n\ndef test_invalid_device_extmem_qdm() -> None:\n    it = tm.IteratorForTest(\n        *tm.make_batches(16, 4, 2, use_cupy=False), cache=\"cache\", on_host=True\n    )\n    Xy = xgb.ExtMemQuantileDMatrix(it)\n    with pytest.raises(ValueError, match=\"cannot be used for GPU\"):\n        xgb.train({\"device\": \"cuda\"}, Xy)\n\n    it = tm.IteratorForTest(\n        *tm.make_batches(16, 4, 2, use_cupy=True), cache=\"cache\", on_host=True\n    )\n    Xy = xgb.ExtMemQuantileDMatrix(it)\n    with pytest.raises(ValueError, match=\"cannot be used for CPU\"):\n        xgb.train({\"device\": \"cpu\"}, Xy)\n\n\ndef test_concat_pages() -> None:\n    boosters = []\n    for min_cache_page_bytes in [0, 256, 386, np.iinfo(np.int64).max]:\n        it = tm.IteratorForTest(\n            *tm.make_batches(64, 16, 4, use_cupy=True),\n            cache=None,\n            min_cache_page_bytes=min_cache_page_bytes,\n            on_host=True,\n        )\n        Xy = xgb.ExtMemQuantileDMatrix(it)\n        booster = xgb.train(\n            {\n                \"device\": \"cuda\",\n                \"objective\": \"reg:absoluteerror\",\n            },\n            Xy,\n        )\n        boosters.append(booster.save_raw(raw_format=\"json\"))\n\n    for model in boosters[1:]:\n        assert str(model) == str(boosters[0])\n\n\n@given(\n    strategies.integers(1, 64),\n    strategies.integers(1, 8),\n    strategies.integers(1, 4),\n)\n@settings(deadline=None, max_examples=10, print_blob=True)\ndef test_quantile_objective(\n    n_samples_per_batch: int, n_features: int, n_batches: int\n) -> None:\n    check_quantile_loss_extmem(\n        n_samples_per_batch,\n        n_features,\n        n_batches,\n        \"hist\",\n        \"cuda\",\n    )\n    check_quantile_loss_extmem(\n        n_samples_per_batch,\n        n_features,\n        n_batches,\n        \"approx\",\n        \"cuda\",\n    )\n\n\n@pytest.mark.parametrize(\"tree_method\", [\"hist\", \"approx\"])\n@pytest.mark.skipif(**tm.no_cudf())\n@pytest.mark.skipif(**tm.no_cupy())\ndef test_categorical_missing(tree_method: str) -> None:\n    check_categorical_missing(\n        1024, 4, 5, device=\"cuda\", tree_method=tree_method, extmem=True\n    )\n\n\n@pytest.mark.parametrize(\"tree_method\", [\"hist\", \"approx\"])\n@pytest.mark.skipif(**tm.no_cudf())\n@pytest.mark.skipif(**tm.no_cupy())\ndef test_categorical_ohe(tree_method: str) -> None:\n    check_categorical_ohe(\n        rows=1024,\n        cols=16,\n        rounds=4,\n        cats=5,\n        device=\"cuda\",\n        tree_method=tree_method,\n        extmem=True,\n    )\n\n\n@pytest.mark.skipif(**tm.no_cudf())\n@pytest.mark.skipif(**tm.no_cupy())\ndef test_invalid_cat_batches() -> None:\n    check_invalid_cat_batches(\"cuda\")\n\n\ndef test_uneven_sizes() -> None:\n    check_uneven_sizes(\"cuda\")\n\n\ndef test_cache_host_ratio() -> None:\n    boosters = []\n    for min_cache_page_bytes in [0, 64, np.iinfo(np.int64).max, None]:\n        for cache_host_ratio in [0, 0.5, 1.0, None]:\n            it = tm.IteratorForTest(\n                *tm.make_batches(64, 16, 4, use_cupy=True),\n                cache=None,\n                on_host=True,\n            )\n            Xy = xgb.ExtMemQuantileDMatrix(it, cache_host_ratio=cache_host_ratio)\n            booster = xgb.train({\"device\": \"cuda\"}, Xy)\n            boosters.append(booster.save_raw(raw_format=\"json\"))\n\n        for model in boosters[1:]:\n            assert str(model) == str(boosters[0])\n"
  },
  {
    "path": "tests/python-gpu/test_gpu_demos.py",
    "content": "import os\nimport subprocess\n\nimport pytest\n\nfrom xgboost import testing as tm\n\nDEMO_DIR = tm.demo_dir(__file__)\nPYTHON_DEMO_DIR = os.path.join(DEMO_DIR, \"guide-python\")\n\n\n@pytest.mark.skipif(**tm.no_cupy())\ndef test_data_iterator() -> None:\n    script = os.path.join(PYTHON_DEMO_DIR, \"quantile_data_iterator.py\")\n    cmd = [\"python\", script]\n    subprocess.check_call(cmd)\n\n\ndef test_update_process_demo() -> None:\n    script = os.path.join(PYTHON_DEMO_DIR, \"update_process.py\")\n    cmd = [\"python\", script]\n    subprocess.check_call(cmd)\n\n\ndef test_categorical_demo() -> None:\n    script = os.path.join(PYTHON_DEMO_DIR, \"categorical.py\")\n    cmd = [\"python\", script]\n    subprocess.check_call(cmd)\n\n\n@pytest.mark.skipif(**tm.no_rmm())\n@pytest.mark.skipif(**tm.no_cupy())\ndef test_external_memory_demo() -> None:\n    script = os.path.join(PYTHON_DEMO_DIR, \"external_memory.py\")\n    cmd = [\"python\", script, \"--device=cuda\"]\n    subprocess.check_call(cmd)\n\n\n@pytest.mark.skipif(**tm.no_rmm())\n@pytest.mark.skipif(**tm.no_cupy())\n@pytest.mark.mgpu\ndef test_distributed_extmem_basic_demo() -> None:\n    script = os.path.join(PYTHON_DEMO_DIR, \"distributed_extmem_basic.py\")\n    cmd = [\"python\", script, \"--device=cuda\"]\n    subprocess.check_call(cmd)\n"
  },
  {
    "path": "tests/python-gpu/test_gpu_eval_metrics.py",
    "content": "import json\n\nimport pytest\n\nimport xgboost\nfrom xgboost import testing as tm\nfrom xgboost.testing.metrics import (\n    check_precision_score,\n    check_quantile_error,\n    run_pr_auc_binary,\n    run_pr_auc_ltr,\n    run_pr_auc_multi,\n    run_roc_auc_binary,\n    run_roc_auc_multi,\n)\n\n\nclass TestGPUEvalMetrics:\n    @pytest.mark.parametrize(\"n_samples\", [4, 100, 1000])\n    def test_roc_auc_binary(self, n_samples: int) -> None:\n        run_roc_auc_binary(\"hist\", n_samples, \"cuda\")\n\n    @pytest.mark.parametrize(\n        \"n_samples,weighted\", [(4, False), (100, False), (1000, False), (1000, True)]\n    )\n    def test_roc_auc_multi(self, n_samples: int, weighted: bool) -> None:\n        run_roc_auc_multi(\"hist\", n_samples, weighted, \"cuda\")\n\n    @pytest.mark.parametrize(\"n_samples\", [4, 100, 1000])\n    def test_roc_auc_ltr(self, n_samples: int) -> None:\n        import numpy as np\n\n        rng = np.random.RandomState(1994)\n        n_samples = n_samples\n        n_features = 10\n        X = rng.randn(n_samples, n_features)\n        y = rng.randint(0, 16, size=n_samples)\n        group = np.array([n_samples // 2, n_samples // 2])\n\n        Xy = xgboost.DMatrix(X, y, group=group)\n\n        booster = xgboost.train(\n            {\"tree_method\": \"hist\", \"eval_metric\": \"auc\", \"objective\": \"rank:ndcg\"},\n            Xy,\n            num_boost_round=10,\n        )\n        cpu_auc = float(booster.eval(Xy).split(\":\")[1])\n        booster.set_param({\"device\": \"cuda:0\"})\n        assert (\n            json.loads(booster.save_config())[\"learner\"][\"generic_param\"][\"device\"]\n            == \"cuda:0\"\n        )\n        gpu_auc = float(booster.eval(Xy).split(\":\")[1])\n        assert (\n            json.loads(booster.save_config())[\"learner\"][\"generic_param\"][\"device\"]\n            == \"cuda:0\"\n        )\n\n        np.testing.assert_allclose(cpu_auc, gpu_auc)\n\n    def test_pr_auc_binary(self) -> None:\n        run_pr_auc_binary(\"hist\", \"cuda\")\n\n    def test_pr_auc_multi(self) -> None:\n        run_pr_auc_multi(\"hist\", \"cuda\")\n\n    def test_pr_auc_ltr(self) -> None:\n        run_pr_auc_ltr(\"hist\", \"cuda\")\n\n    def test_precision_score(self) -> None:\n        check_precision_score(\"hist\", \"cuda\")\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    def test_quantile_error(self) -> None:\n        check_quantile_error(\"hist\", \"cuda\")\n"
  },
  {
    "path": "tests/python-gpu/test_gpu_interaction_constraints.py",
    "content": "import numpy as np\nimport pandas as pd\nimport pytest\n\nimport xgboost as xgb\nfrom xgboost.testing.interaction_constraints import (\n    run_interaction_constraints,\n    training_accuracy,\n)\n\n\nclass TestGPUInteractionConstraints:\n    @pytest.mark.parametrize(\"tree_method\", [\"hist\", \"approx\"])\n    def test_interaction_constraints(self, tree_method: str) -> None:\n        run_interaction_constraints(tree_method=tree_method, device=\"cuda\")\n\n    @pytest.mark.parametrize(\"tree_method\", [\"hist\", \"approx\"])\n    def test_training_accuracy(self, tree_method: str) -> None:\n        dpath = \"demo/data/\"\n        training_accuracy(tree_method=tree_method, dpath=dpath, device=\"cuda\")\n\n    # case where different number of features can occur in the evaluator\n    def test_issue_8730(self) -> None:\n        X = pd.DataFrame(\n            zip(range(0, 100), range(200, 300), range(300, 400), range(400, 500)),\n            columns=[\"A\", \"B\", \"C\", \"D\"],\n        )\n        y = np.array([*([0] * 50), *([1] * 50)])\n        dm = xgb.DMatrix(X, label=y)\n\n        params = {\n            \"eta\": 0.16095019509249486,\n            \"min_child_weight\": 1,\n            \"subsample\": 0.688567929338029,\n            \"colsample_bynode\": 0.7,\n            \"gamma\": 5.666579817418348e-06,\n            \"lambda\": 0.14943712232059794,\n            \"grow_policy\": \"depthwise\",\n            \"max_depth\": 3,\n            \"tree_method\": \"hist\",\n            \"device\": \"cuda\",\n            \"interaction_constraints\": [[\"A\", \"B\"], [\"B\", \"D\", \"C\"], [\"C\", \"D\"]],\n            \"objective\": \"count:poisson\",\n            \"eval_metric\": \"poisson-nloglik\",\n            \"verbosity\": 0,\n        }\n\n        xgb.train(params, dm, num_boost_round=100)\n"
  },
  {
    "path": "tests/python-gpu/test_gpu_intercept.py",
    "content": "from itertools import product\n\nimport pytest\n\nfrom xgboost.testing.intercept import (\n    run_adaptive,\n    run_exp_family,\n    run_init_estimation,\n    run_logistic_degenerate,\n)\n\n\ndef test_init_estimation() -> None:\n    run_init_estimation(\"hist\", \"cuda\")\n\n\n@pytest.mark.parametrize(\n    \"tree_method,weighted\", list(product([\"approx\", \"hist\"], [True, False]))\n)\ndef test_adaptive(tree_method: str, weighted: bool) -> None:\n    run_adaptive(tree_method, weighted, \"cuda\")\n\n\ndef test_exp_family() -> None:\n    run_exp_family(\"cuda\")\n\n\ndef test_logistic_degenerate() -> None:\n    run_logistic_degenerate(\"cuda\")\n"
  },
  {
    "path": "tests/python-gpu/test_gpu_linear.py",
    "content": "from typing import Any, Dict\n\nimport pytest\nfrom hypothesis import assume, given, note, settings, strategies\n\nimport xgboost as xgb\nfrom xgboost import testing as tm\n\npytestmark = tm.timeout(10)\n\nparameter_strategy = strategies.fixed_dictionaries(\n    {\n        \"booster\": strategies.just(\"gblinear\"),\n        \"eta\": strategies.floats(0.01, 0.25),\n        \"tolerance\": strategies.floats(1e-5, 1e-2),\n        \"nthread\": strategies.integers(1, 4),\n        \"feature_selector\": strategies.sampled_from(\n            [\"cyclic\", \"shuffle\", \"greedy\", \"thrifty\"]\n        ),\n        \"top_k\": strategies.integers(1, 10),\n    }\n)\n\n\ndef train_result(\n    param: Dict[str, Any], dmat: xgb.DMatrix, num_rounds: int\n) -> Dict[str, Any]:\n    result: Dict[str, Any] = {}\n    booster = xgb.train(\n        param,\n        dmat,\n        num_rounds,\n        [(dmat, \"train\")],\n        verbose_eval=False,\n        evals_result=result,\n    )\n    assert booster.num_boosted_rounds() == num_rounds\n    return result\n\n\nclass TestGPULinear:\n    @given(parameter_strategy, strategies.integers(10, 50), tm.make_dataset_strategy())\n    @settings(deadline=None, max_examples=20, print_blob=True)\n    def test_gpu_coordinate(\n        self, param: Dict[str, Any], num_rounds: int, dataset: tm.TestDataset\n    ) -> None:\n        assume(len(dataset.y) > 0)\n        param[\"updater\"] = \"coord_descent\"\n        param[\"device\"] = \"cuda\"\n        param = dataset.set_params(param)\n        result = train_result(param, dataset.get_dmat(), num_rounds)[\"train\"][\n            dataset.metric\n        ]\n        note(result)\n        assert tm.non_increasing(result)\n\n    # Loss is not guaranteed to always decrease because of regularisation parameters\n    # We test a weaker condition that the loss has not increased between the first and last\n    # iteration\n    @given(\n        parameter_strategy,\n        strategies.integers(10, 50),\n        tm.make_dataset_strategy(),\n        strategies.floats(1e-5, 0.8),\n        strategies.floats(1e-5, 0.8),\n    )\n    @settings(deadline=None, max_examples=20, print_blob=True)\n    def test_gpu_coordinate_regularised(\n        self,\n        param: Dict[str, Any],\n        num_rounds: int,\n        dataset: tm.TestDataset,\n        alpha: float,\n        lambd: float,\n    ) -> None:\n        assume(len(dataset.y) > 0)\n        param[\"updater\"] = \"coord_descent\"\n        param[\"device\"] = \"cuda\"\n        param[\"alpha\"] = alpha\n        param[\"lambda\"] = lambd\n        param = dataset.set_params(param)\n        result = train_result(param, dataset.get_dmat(), num_rounds)[\"train\"][\n            dataset.metric\n        ]\n        note(result)\n        assert tm.non_increasing([result[0], result[-1]])\n\n    @pytest.mark.skipif(**tm.no_cupy())\n    def test_gpu_coordinate_from_cupy(self) -> None:\n        # Training linear model is quite expensive, so we don't include it in\n        # test_from_cupy.py\n        import cupy\n\n        params = {\n            \"booster\": \"gblinear\",\n            \"updater\": \"coord_descent\",\n            \"device\": \"cuda\",\n            \"n_estimators\": 100,\n        }\n        X, y = tm.get_california_housing()\n        cpu_model = xgb.XGBRegressor(**params)\n        cpu_model.fit(X, y)\n        cpu_predt = cpu_model.predict(X)\n\n        X = cupy.array(X)\n        y = cupy.array(y)\n        gpu_model = xgb.XGBRegressor(**params)\n        gpu_model.fit(X, y)\n        gpu_predt = gpu_model.predict(X)\n        cupy.testing.assert_allclose(cpu_predt, gpu_predt)\n"
  },
  {
    "path": "tests/python-gpu/test_gpu_multi_target.py",
    "content": "\"\"\"Tests for the CUDA implementation of multi-target.\"\"\"\n\n# pylint: disable=too-many-positional-arguments,missing-function-docstring\nfrom typing import Any, Callable, Dict, Optional\n\nimport pytest\nfrom hypothesis import given, note, settings, strategies\nfrom xgboost import config_context\nfrom xgboost import testing as tm\nfrom xgboost.testing.multi_target import (\n    all_reg_objectives,\n    run_absolute_error,\n    run_column_sampling,\n    run_deterministic,\n    run_eta,\n    run_feature_importance_strategy_compare,\n    run_gradient_based_sampling_accuracy,\n    run_grow_policy,\n    run_mixed_strategy,\n    run_multiclass,\n    run_multilabel,\n    run_quantile_loss,\n    run_reduced_grad,\n    run_subsample,\n    run_with_iter,\n)\nfrom xgboost.testing.params import hist_parameter_strategy\nfrom xgboost.testing.updater import check_quantile_loss_rf, train_result\nfrom xgboost.testing.utils import Device\n\n\n@pytest.mark.parametrize(\"learning_rate\", [1.0, None])\ndef test_multiclass(learning_rate: Optional[float]) -> None:\n    run_multiclass(\"cuda\", learning_rate)\n\n\n@pytest.mark.parametrize(\"learning_rate\", [1.0, None])\ndef test_multilabel(learning_rate: Optional[float]) -> None:\n    run_multilabel(\"cuda\", learning_rate)\n\n\n@pytest.mark.parametrize(\"weighted\", [True, False])\ndef test_quantile_loss(weighted: bool) -> None:\n    run_quantile_loss(\"cuda\", weighted)\n\n\n@pytest.mark.parametrize(\"multi_strategy\", [\"multi_output_tree\", \"one_output_per_tree\"])\ndef test_quantile_loss_rf(multi_strategy: str) -> None:\n    check_quantile_loss_rf(\"cuda\", \"hist\", multi_strategy)\n    if multi_strategy == \"one_output_per_tree\":\n        check_quantile_loss_rf(\"cuda\", \"approx\", multi_strategy)\n\n\ndef test_absolute_error() -> None:\n    run_absolute_error(\"cuda\")\n\n\ndef test_reduced_grad() -> None:\n    run_reduced_grad(\"cuda\")\n\n\ndef test_with_iter() -> None:\n    with config_context(use_rmm=True):\n        run_with_iter(\"cuda\")\n\n\ndef test_eta() -> None:\n    run_eta(\"cuda\")\n\n\ndef test_deterministic() -> None:\n    run_deterministic(\"cuda\")\n\n\ndef test_column_sampling() -> None:\n    run_column_sampling(\"cuda\")\n\n\n@pytest.mark.parametrize(\"grow_policy\", [\"depthwise\", \"lossguide\"])\ndef test_grow_policy(grow_policy: str) -> None:\n    run_grow_policy(\"cuda\", grow_policy)\n\n\ndef test_mixed_strategy() -> None:\n    run_mixed_strategy(\"cuda\")\n\n\ndef test_feature_importance_strategy_compare() -> None:\n    run_feature_importance_strategy_compare(\"cuda\")\n\n\n@given(hist_parameter_strategy, strategies.integers(1, 20), tm.multi_dataset_strategy)\n@settings(deadline=None, max_examples=50, print_blob=True)\ndef test_hist(param: Dict[str, Any], num_rounds: int, dataset: tm.TestDataset) -> None:\n    param[\"tree_method\"] = \"hist\"\n    param[\"device\"] = \"cuda\"\n    param = dataset.set_params(param)\n    result = train_result(param, dataset.get_dmat(), num_rounds)\n    note(str(result))\n    assert tm.non_increasing(result[\"train\"][dataset.metric])\n\n\n@pytest.mark.parametrize(\"obj_fn\", all_reg_objectives())\ndef test_reg_objective(obj_fn: Callable[[Device], None]) -> None:\n    obj_fn(\"cuda\")\n\n\n@pytest.mark.parametrize(\"sampling_method\", [\"uniform\", \"gradient_based\"])\ndef test_subsample(sampling_method: str) -> None:\n    run_subsample(\"cuda\", sampling_method)\n\n\ndef test_gradient_based_sampling_accuracy() -> None:\n    run_gradient_based_sampling_accuracy(\"cuda\")\n"
  },
  {
    "path": "tests/python-gpu/test_gpu_ordinal.py",
    "content": "import os\nfrom concurrent.futures import ThreadPoolExecutor\nfrom typing import Type\n\nimport numpy as np\nimport pytest\nimport xgboost as xgb\nfrom xgboost import testing as tm\nfrom xgboost.testing.data import make_categorical\nfrom xgboost.testing.ordinal import (\n    run_basic_predict,\n    run_cat_container,\n    run_cat_container_iter,\n    run_cat_container_mixed,\n    run_cat_invalid,\n    run_cat_leaf,\n    run_cat_predict,\n    run_cat_shap,\n    run_cat_thread_safety,\n    run_recode_dmatrix,\n    run_recode_dmatrix_predict,\n    run_specified_cat,\n    run_training_continuation,\n    run_update,\n    run_validation,\n)\n\npytestmark = pytest.mark.skipif(**tm.no_multiple(tm.no_arrow(), tm.no_cudf()))\n\n\ndef test_cat_container() -> None:\n    run_cat_container(\"cuda\")\n\n\ndef test_cat_container_mixed() -> None:\n    run_cat_container_mixed(\"cuda\")\n\n\ndef test_cat_container_iter() -> None:\n    run_cat_container_iter(\"cuda\")\n\n\ndef test_cat_predict() -> None:\n    run_cat_predict(\"cuda\")\n\n\ndef test_cat_invalid() -> None:\n    run_cat_invalid(\"cuda\")\n\n\ndef test_cat_thread_safety() -> None:\n    run_cat_thread_safety(\"cuda\")\n\n\ndef test_cat_shap() -> None:\n    run_cat_shap(\"cuda\")\n\n\ndef test_cat_leaf() -> None:\n    run_cat_leaf(\"cuda\")\n\n\ndef test_mixed_devices() -> None:\n    n_samples = 128\n    n_features = 4\n    X, y = make_categorical(n_samples, n_features, 7, onehot=False, device=\"cpu\")\n\n    def run_cpu_gpu(DMatrixT: Type) -> bool:\n        Xy = DMatrixT(X, y)\n        booster = xgb.train({\"tree_method\": \"hist\", \"device\": \"cuda\"}, Xy)\n        predt0 = booster.inplace_predict(X)\n        predt1 = booster.predict(DMatrixT(X, y))\n\n        np.testing.assert_allclose(predt0, predt1)\n        return True\n\n    n_cpus = os.cpu_count()\n    assert n_cpus is not None\n\n    futures = []\n    with ThreadPoolExecutor(max_workers=n_cpus) as e:\n        for dm in (xgb.DMatrix, xgb.QuantileDMatrix):\n            f = e.submit(run_cpu_gpu, dm)\n            futures.append(f)\n\n    for f in futures:\n        assert f.result()\n\n    X, y = make_categorical(n_samples, n_features, 7, onehot=False, device=\"cuda\")\n\n    def run_gpu_cpu(DMatrixT: Type) -> bool:\n        Xy = DMatrixT(X, y)\n        booster = xgb.train({\"tree_method\": \"hist\", \"device\": \"cpu\"}, Xy)\n        p = booster.inplace_predict(X)\n        assert not isinstance(p, np.ndarray)\n        predt0 = p.get()\n        predt1 = booster.predict(DMatrixT(X, y))\n\n        np.testing.assert_allclose(predt0, predt1)\n        return True\n\n    futures = []\n    with ThreadPoolExecutor(max_workers=n_cpus) as e:\n        for dm in (xgb.DMatrix, xgb.QuantileDMatrix):\n            f = e.submit(run_gpu_cpu, dm)\n            futures.append(f)\n\n    for f in futures:\n        assert f.result()\n\n\n@pytest.mark.parametrize(\"DMatrixT\", [xgb.DMatrix, xgb.QuantileDMatrix])\ndef test_mixed_devices_types(DMatrixT: Type) -> None:\n    run_basic_predict(DMatrixT, \"cuda\", \"cpu\")\n    run_basic_predict(DMatrixT, \"cpu\", \"cuda\")\n\n\ndef test_specified_cat() -> None:\n    run_specified_cat(\"cuda\")\n\n\ndef test_validation() -> None:\n    run_validation(\"cuda\")\n\n\ndef test_recode_dmatrix() -> None:\n    run_recode_dmatrix(\"cuda\")\n\n\ndef test_training_continuation() -> None:\n    run_training_continuation(\"cuda\")\n\n\ndef test_update() -> None:\n    run_update(\"cuda\")\n\n\ndef test_recode_dmatrix_predict() -> None:\n    run_recode_dmatrix_predict(\"cuda\")\n"
  },
  {
    "path": "tests/python-gpu/test_gpu_parse_tree.py",
    "content": "from xgboost.testing.parse_tree import (\n    run_split_value_histograms,\n    run_tree_to_df_categorical,\n)\n\n\ndef test_tree_to_df_categorical() -> None:\n    run_tree_to_df_categorical(\"hist\", \"cuda\")\n\n\ndef test_split_value_histograms() -> None:\n    run_split_value_histograms(\"hist\", \"cuda\")\n"
  },
  {
    "path": "tests/python-gpu/test_gpu_pickling.py",
    "content": "\"\"\"Test model IO with pickle.\"\"\"\n\nimport os\nimport pickle\nimport subprocess\nfrom typing import Any, Dict, Tuple, Union\n\nimport numpy as np\nimport pytest\n\nimport xgboost as xgb\nfrom xgboost import XGBClassifier\nfrom xgboost import testing as tm\n\nmodel_path = \"./model.pkl\"\n\npytestmark = tm.timeout(30)\n\n\ndef build_dataset() -> Tuple[np.ndarray, np.ndarray]:\n    N = 10\n    x = np.linspace(0, N * N, N * N)\n    x = x.reshape((N, N))\n    y = np.linspace(0, N, N)\n    return x, y\n\n\ndef save_pickle(bst: Union[xgb.Booster, xgb.XGBModel], path: str) -> None:\n    with open(path, \"wb\") as fd:\n        pickle.dump(bst, fd)\n\n\ndef load_pickle(path: str) -> Any:\n    with open(path, \"rb\") as fd:\n        bst = pickle.load(fd)\n    return bst\n\n\nclass TestPickling:\n    args_template = [\"pytest\", \"--verbose\", \"-s\", \"--fulltrace\"]\n\n    def run_pickling(self, bst: Union[xgb.Booster, xgb.XGBModel]) -> None:\n        save_pickle(bst, model_path)\n        args = [\n            \"pytest\",\n            \"--verbose\",\n            \"-s\",\n            \"--fulltrace\",\n            \"./tests/python-gpu/load_pickle.py::TestLoadPickle::test_load_pkl\",\n        ]\n        command = \"\"\n        for arg in args:\n            command += arg\n            command += \" \"\n\n        cuda_environment = {\"CUDA_VISIBLE_DEVICES\": \"-1\"}\n        env = os.environ.copy()\n        # Passing new_environment directly to `env' argument results\n        # in failure on Windows:\n        #    Fatal Python error: _Py_HashRandomization_Init: failed to\n        #    get random numbers to initialize Python\n        env.update(cuda_environment)\n\n        # Load model in a CPU only environment.\n        status = subprocess.call(command, env=env, shell=True)\n        assert status == 0\n        os.remove(model_path)\n\n    # TODO: This test is too slow\n    @pytest.mark.skipif(**tm.no_sklearn())\n    def test_pickling(self) -> None:\n        x, y = build_dataset()\n        train_x = xgb.DMatrix(x, label=y)\n\n        param = {\"tree_method\": \"hist\", \"device\": \"cuda\"}\n        bst = xgb.train(param, train_x)\n        self.run_pickling(bst)\n\n        bst = xgb.XGBRegressor(**param).fit(x, y)\n        self.run_pickling(bst)\n\n        param = {\"booster\": \"gblinear\", \"updater\": \"coord_descent\", \"device\": \"cuda\"}\n        bst = xgb.train(param, train_x)\n        self.run_pickling(bst)\n\n        bst = xgb.XGBRegressor(**param).fit(x, y)\n        self.run_pickling(bst)\n\n    @pytest.mark.mgpu\n    def test_wrap_gpu_id(self) -> None:\n        X, y = build_dataset()\n        dtrain = xgb.DMatrix(X, y)\n\n        bst = xgb.train(\n            {\"tree_method\": \"hist\", \"device\": \"cuda:1\"}, dtrain, num_boost_round=6\n        )\n\n        model_path = \"model.pkl\"\n        save_pickle(bst, model_path)\n        cuda_environment = {\"CUDA_VISIBLE_DEVICES\": \"0\"}\n        env = os.environ.copy()\n        env.update(cuda_environment)\n        args = self.args_template.copy()\n        args.append(\n            \"./tests/python-gpu/\" \"load_pickle.py::TestLoadPickle::test_wrap_gpu_id\"\n        )\n        status = subprocess.call(args, env=env)\n        assert status == 0\n        os.remove(model_path)\n\n    def test_pickled_context(self) -> None:\n        x, y = tm.make_sparse_regression(10, 10, sparsity=0.8, as_dense=True)\n        train_x = xgb.DMatrix(x, label=y)\n\n        def run_test(param: Dict[str, Any]) -> None:\n            bst = xgb.train(param, train_x)\n\n            save_pickle(bst, model_path)\n\n            args = self.args_template.copy()\n            root = tm.project_root(__file__)\n            path = os.path.join(root, \"tests\", \"python-gpu\", \"load_pickle.py\")\n            args.append(path + \"::TestLoadPickle::test_context_is_removed\")\n\n            cuda_environment = {\"CUDA_VISIBLE_DEVICES\": \"-1\"}\n            env = os.environ.copy()\n            env.update(cuda_environment)\n\n            # Load model in a CPU only environment.\n            status = subprocess.call(args, env=env)\n            assert status == 0\n\n            args = self.args_template.copy()\n            args.append(\n                \"./tests/python-gpu/\"\n                \"load_pickle.py::TestLoadPickle::test_context_is_preserved\"\n            )\n\n            # Load in environment that has GPU.\n            env = os.environ.copy()\n            assert \"CUDA_VISIBLE_DEVICES\" not in env.keys()\n            status = subprocess.call(args, env=env)\n            assert status == 0\n\n            os.remove(model_path)\n\n        param = {\"tree_method\": \"hist\", \"verbosity\": 1, \"device\": \"cuda\"}\n        run_test(param)\n        param = {\"booster\": \"gblinear\", \"updater\": \"coord_descent\", \"device\": \"cuda\"}\n        run_test(param)\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    def test_predict_sklearn_pickle(self) -> None:\n        from sklearn.datasets import load_digits\n\n        x, y = load_digits(return_X_y=True)\n\n        kwargs = {\n            \"tree_method\": \"hist\",\n            \"objective\": \"binary:logistic\",\n            \"device\": \"cuda\",\n            \"n_estimators\": 10,\n        }\n\n        model = XGBClassifier(**kwargs)\n        model.fit(x, y)\n\n        save_pickle(model, \"model.pkl\")\n        del model\n\n        # load model\n        model = load_pickle(\"model.pkl\")\n        os.remove(\"model.pkl\")\n\n        gpu_pred = model.predict(x, output_margin=True)\n\n        # Switch to CPU predictor\n        bst = model.get_booster()\n        bst.set_param({\"device\": \"cpu\"})\n        cpu_pred = model.predict(x, output_margin=True)\n        np.testing.assert_allclose(cpu_pred, gpu_pred, rtol=1e-5)\n\n    def test_training_on_cpu_only_env(self) -> None:\n        cuda_environment = {\"CUDA_VISIBLE_DEVICES\": \"-1\"}\n        env = os.environ.copy()\n        env.update(cuda_environment)\n        args = self.args_template.copy()\n        args.append(\n            \"./tests/python-gpu/\"\n            \"load_pickle.py::TestLoadPickle::test_training_on_cpu_only_env\"\n        )\n        status = subprocess.call(args, env=env)\n        assert status == 0\n"
  },
  {
    "path": "tests/python-gpu/test_gpu_plotting.py",
    "content": "import pytest\n\nfrom xgboost import testing as tm\n\n\nclass TestPlotting:\n    @pytest.mark.skipif(**tm.no_multiple(tm.no_matplotlib(), tm.no_graphviz()))\n    def test_categorical(self) -> None:\n        from xgboost.testing.plotting import run_categorical\n\n        run_categorical(\"hist\", \"cuda\")\n"
  },
  {
    "path": "tests/python-gpu/test_gpu_prediction.py",
    "content": "import sys\nfrom copy import copy\nfrom typing import Any, Dict, Type\n\nimport numpy as np\nimport pytest\nimport xgboost as xgb\nfrom hypothesis import assume, given, settings, strategies\nfrom hypothesis.extra.pandas import column, data_frames, range_indexes\nfrom xgboost import testing as tm\nfrom xgboost.compat import import_cupy\nfrom xgboost.testing.predict import run_base_margin_vs_base_score, run_predict_leaf\n\nsys.path.append(\"tests/python\")\n\nfrom test_predict import run_threaded_predict  # noqa\n\nrng = np.random.RandomState(1994)\n\nshap_parameter_strategy = strategies.fixed_dictionaries(\n    {\n        \"max_depth\": strategies.integers(1, 11),\n        \"max_leaves\": strategies.integers(0, 256),\n        \"num_parallel_tree\": strategies.sampled_from([1, 10]),\n    }\n).filter(lambda x: x[\"max_depth\"] > 0 or x[\"max_leaves\"] > 0)\n\npredict_parameter_strategy = strategies.fixed_dictionaries(\n    {\n        \"max_depth\": strategies.integers(1, 8),\n        \"num_parallel_tree\": strategies.sampled_from([1, 4]),\n    }\n)\n\n# cupy nvrtc compilation can take a long time for the first run\npytestmark = tm.timeout(60)\n\n\nclass TestGPUPredict:\n    def test_predict(self) -> None:\n        iterations = 10\n        np.random.seed(1)\n        test_num_rows = [10, 1000, 5000]\n        test_num_cols = [10, 50, 500]\n        # This test passes for tree_method=gpu_hist and tree_method=exact. but\n        # for `hist` and `approx` the floating point error accumulates faster\n        # and fails even tol is set to 1e-4.  For `hist`, the mismatching rate\n        # with 5000 rows is 0.04.\n        for num_rows in test_num_rows:\n            for num_cols in test_num_cols:\n                dtrain = xgb.DMatrix(\n                    np.random.randn(num_rows, num_cols),\n                    label=[0, 1] * int(num_rows / 2),\n                )\n                dval = xgb.DMatrix(\n                    np.random.randn(num_rows, num_cols),\n                    label=[0, 1] * int(num_rows / 2),\n                )\n                dtest = xgb.DMatrix(\n                    np.random.randn(num_rows, num_cols),\n                    label=[0, 1] * int(num_rows / 2),\n                )\n                watchlist = [(dtrain, \"train\"), (dval, \"validation\")]\n                res: Dict[str, Any] = {}\n                param = {\n                    \"objective\": \"binary:logistic\",\n                    \"eval_metric\": \"logloss\",\n                    \"tree_method\": \"hist\",\n                    \"device\": \"gpu:0\",\n                    \"max_depth\": 1,\n                }\n                bst = xgb.train(\n                    param, dtrain, iterations, evals=watchlist, evals_result=res\n                )\n                assert tm.non_increasing(res[\"train\"][\"logloss\"], tolerance=0.001)\n\n                gpu_pred_train = bst.predict(dtrain, output_margin=True)\n                gpu_pred_test = bst.predict(dtest, output_margin=True)\n                gpu_pred_val = bst.predict(dval, output_margin=True)\n\n                bst.set_param({\"device\": \"cpu\", \"tree_method\": \"hist\"})\n                bst_cpu = copy(bst)\n                cpu_pred_train = bst_cpu.predict(dtrain, output_margin=True)\n                cpu_pred_test = bst_cpu.predict(dtest, output_margin=True)\n                cpu_pred_val = bst_cpu.predict(dval, output_margin=True)\n\n                np.testing.assert_allclose(cpu_pred_train, gpu_pred_train, rtol=1e-6)\n                np.testing.assert_allclose(cpu_pred_val, gpu_pred_val, rtol=1e-6)\n                np.testing.assert_allclose(cpu_pred_test, gpu_pred_test, rtol=1e-6)\n\n    # Test case for a bug where multiple batch predictions made on a\n    # test set produce incorrect results\n    @pytest.mark.skipif(**tm.no_sklearn())\n    def test_multi_predict(self) -> None:\n        from sklearn.datasets import make_regression\n        from sklearn.model_selection import train_test_split\n\n        n = 1000\n        X, y = make_regression(n, random_state=rng)\n        X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123)\n        dtrain = xgb.DMatrix(X_train, label=y_train)\n\n        params = {}\n        params[\"tree_method\"] = \"hist\"\n        params[\"device\"] = \"cuda:0\"\n        bst = xgb.train(params, dtrain)\n\n        bst.set_param({\"device\": \"cuda:0\"})\n        # Don't reuse the DMatrix for prediction, otherwise the result is cached.\n        predict_gpu_0 = bst.predict(xgb.DMatrix(X_test))\n        predict_gpu_1 = bst.predict(xgb.DMatrix(X_test))\n        bst.set_param({\"device\": \"cpu\"})\n        predict_cpu = bst.predict(xgb.DMatrix(X_test))\n\n        assert np.allclose(predict_gpu_0, predict_gpu_1)\n        assert np.allclose(predict_gpu_0, predict_cpu)\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    def test_sklearn(self) -> None:\n        m, n = 15000, 14\n        tr_size = 2500\n        X = np.random.rand(m, n)\n        y = 200 * np.matmul(X, np.arange(-3, -3 + n))\n        y = y.reshape(y.size)\n        X_train, y_train = X[:tr_size, :], y[:tr_size]\n        X_test, y_test = X[tr_size:, :], y[tr_size:]\n\n        params = {\n            \"tree_method\": \"hist\",\n            \"device\": \"cuda:0\",\n            \"n_jobs\": -1,\n            \"seed\": 123,\n        }\n        m = xgb.XGBRegressor(**params).fit(X_train, y_train)\n        gpu_train_score = m.score(X_train, y_train)\n        gpu_test_score = m.score(X_test, y_test)\n\n        # Now with cpu\n        m.set_params(device=\"cpu\")\n        cpu_train_score = m.score(X_train, y_train)\n        cpu_test_score = m.score(X_test, y_test)\n\n        assert np.allclose(cpu_train_score, gpu_train_score)\n        assert np.allclose(cpu_test_score, gpu_test_score)\n\n    @pytest.mark.parametrize(\"device\", [\"cpu\", \"cuda\"])\n    @pytest.mark.skipif(**tm.no_cupy())\n    @pytest.mark.skipif(**tm.no_cudf())\n    def test_inplace_predict_device_type(self, device: str) -> None:\n        \"\"\"Test inplace predict with different device and data types.\n\n        The sklearn interface uses inplace predict by default and gbtree fallbacks to\n        DMatrix whenever device doesn't match. This test checks that XGBoost can handle\n        different combinations of device and input data type.\n\n        \"\"\"\n        import cudf\n        import pandas as pd\n        from scipy.sparse import csr_matrix\n\n        cp = import_cupy()\n        reg = xgb.XGBRegressor(tree_method=\"hist\", device=device)\n        n_samples = 4096\n        n_features = 13\n        X, y, w = tm.make_regression(n_samples, n_features, use_cupy=True)\n        X[X == 0.0] = 1.0\n\n        reg.fit(X, y, sample_weight=w)\n        predt_0 = reg.predict(X)\n\n        X = cp.asnumpy(X)\n        predt_1 = reg.predict(X)\n\n        df = pd.DataFrame(X)\n        predt_2 = reg.predict(df)\n\n        df = cudf.DataFrame(X)\n        predt_3 = reg.predict(df)\n\n        X_csr = csr_matrix(X)\n        predt_4 = reg.predict(X_csr)\n\n        np.testing.assert_allclose(predt_0, predt_1)\n        np.testing.assert_allclose(predt_0, predt_2)\n        np.testing.assert_allclose(predt_0, predt_3)\n        np.testing.assert_allclose(predt_0, predt_4)\n\n    def run_inplace_base_margin(\n        self,\n        device: int,\n        booster: xgb.Booster,\n        dtrain: xgb.DMatrix,\n        X: Any,\n        base_margin: Any,\n    ) -> None:\n        cp = import_cupy()\n\n        booster.set_param({\"device\": f\"cuda:{device}\"})\n        dtrain.set_info(base_margin=base_margin)\n        from_inplace = booster.inplace_predict(data=X, base_margin=base_margin)\n        from_dmatrix = booster.predict(dtrain)\n        cp.testing.assert_allclose(from_inplace, from_dmatrix)\n\n        booster = booster.copy()  # clear prediction cache.\n        booster.set_param({\"device\": \"cpu\"})\n        from_inplace = booster.inplace_predict(data=X, base_margin=base_margin)\n        from_dmatrix = booster.predict(dtrain)\n        cp.testing.assert_allclose(from_inplace, from_dmatrix)\n\n        booster = booster.copy()  # clear prediction cache.\n        base_margin = cp.asnumpy(base_margin)\n        if hasattr(X, \"values\"):\n            X = cp.asnumpy(X.values)\n        booster.set_param({\"device\": f\"cuda:{device}\"})\n        from_inplace = booster.inplace_predict(data=X, base_margin=base_margin)\n        from_dmatrix = booster.predict(dtrain)\n        cp.testing.assert_allclose(from_inplace, from_dmatrix, rtol=1e-6)\n\n    def run_inplace_predict_cupy(self, device: int) -> None:\n        import cupy as cp\n\n        cp.cuda.runtime.setDevice(device)\n        rows = 1000\n        cols = 10\n        missing = 11  # set to integer for testing\n\n        cp_rng = cp.random.RandomState(np.uint64(1994))\n        cp.random.set_random_state(cp_rng)\n\n        X = cp.random.randn(rows, cols)\n        missing_idx = [i for i in range(0, cols, 4)]\n        X[:, missing_idx] = missing  # set to be missing\n        y = cp.random.randn(rows)\n\n        dtrain = xgb.DMatrix(X, y)\n\n        booster = xgb.train(\n            {\"tree_method\": \"hist\", \"device\": f\"cuda:{device}\"},\n            dtrain,\n            num_boost_round=10,\n        )\n\n        test = xgb.DMatrix(X[:10, ...], missing=missing)\n        predt_from_array = booster.inplace_predict(X[:10, ...], missing=missing)\n        predt_from_dmatrix = booster.predict(test)\n        cp.testing.assert_allclose(predt_from_array, predt_from_dmatrix)\n\n        def predict_dense(x: cp.ndarray) -> bool:\n            cp.cuda.runtime.setDevice(device)\n            inplace_predt = booster.inplace_predict(x)\n            d = xgb.DMatrix(x)\n            copied_predt = cp.array(booster.predict(d))\n            return cp.all(copied_predt == inplace_predt)\n\n        # Don't do this on Windows, see issue #5793\n        if sys.platform.startswith(\"win\"):\n            pytest.skip(\n                \"Multi-threaded in-place prediction with cuPy is not working on Windows\"\n            )\n        for i in range(10):\n            run_threaded_predict(X, rows, predict_dense)\n\n        base_margin = cp_rng.randn(rows)\n        self.run_inplace_base_margin(device, booster, dtrain, X, base_margin)\n\n        # Create a wide dataset\n        X = cp_rng.randn(100, 10000)\n        y = cp_rng.randn(100)\n\n        missing_idx = [i for i in range(0, X.shape[1], 16)]\n        X[:, missing_idx] = missing\n        reg = xgb.XGBRegressor(\n            tree_method=\"hist\", n_estimators=8, missing=missing, device=f\"cuda:{device}\"\n        )\n        reg.fit(X, y)\n\n        reg.set_params(device=f\"cuda:{device}\")\n        gpu_predt = reg.predict(X)\n        reg = reg.set_params(device=\"cpu\")\n        cpu_predt = reg.predict(cp.asnumpy(X))\n        np.testing.assert_allclose(gpu_predt, cpu_predt, atol=1e-6)\n        cp.cuda.runtime.setDevice(0)\n\n    @pytest.mark.skipif(**tm.no_cupy())\n    def test_inplace_predict_cupy(self) -> None:\n        self.run_inplace_predict_cupy(0)\n\n    @pytest.mark.skipif(**tm.no_cupy())\n    @pytest.mark.mgpu\n    def test_inplace_predict_cupy_specified_device(self) -> None:\n        cp = import_cupy()\n\n        n_devices = cp.cuda.runtime.getDeviceCount()\n        for d in range(n_devices):\n            self.run_inplace_predict_cupy(d)\n\n    @pytest.mark.skipif(**tm.no_cupy())\n    @pytest.mark.skipif(**tm.no_cudf())\n    def test_inplace_predict_cudf(self) -> None:\n        import cudf\n        import pandas as pd\n\n        cp = import_cupy()\n        rows = 1000\n        cols = 10\n        rng = np.random.RandomState(1994)\n        cp.cuda.runtime.setDevice(0)\n        X = rng.randn(rows, cols)\n        X = pd.DataFrame(X)\n        y = rng.randn(rows)\n        X = cudf.from_pandas(X)\n\n        dtrain = xgb.DMatrix(X, y)\n\n        booster = xgb.train(\n            {\"tree_method\": \"hist\", \"device\": \"cuda:0\"}, dtrain, num_boost_round=10\n        )\n        test = xgb.DMatrix(X)\n        predt_from_array = booster.inplace_predict(X)\n        predt_from_dmatrix = booster.predict(test)\n\n        cp.testing.assert_allclose(predt_from_array, predt_from_dmatrix)\n\n        def predict_df(x: cudf.DataFrame) -> bool:\n            # column major array\n            inplace_predt = booster.inplace_predict(x.values)\n            d = xgb.DMatrix(x)\n            copied_predt = cp.array(booster.predict(d))\n            assert cp.all(copied_predt == inplace_predt)\n\n            inplace_predt = booster.inplace_predict(x)\n            return cp.all(copied_predt == inplace_predt)\n\n        for i in range(10):\n            run_threaded_predict(X, rows, predict_df)\n\n        base_margin = cudf.Series(rng.randn(rows))\n        self.run_inplace_base_margin(0, booster, dtrain, X, base_margin)\n\n    @given(\n        strategies.integers(1, 10), tm.make_dataset_strategy(), shap_parameter_strategy\n    )\n    @settings(deadline=None, max_examples=20, print_blob=True)\n    @pytest.mark.timeout(120)\n    def test_shap(self, num_rounds: int, dataset: tm.TestDataset, param: dict) -> None:\n        if dataset.name.endswith(\"-l1\"):  # not supported by the exact tree method\n            return\n        param.update({\"tree_method\": \"hist\", \"device\": \"gpu:0\"})\n        param = dataset.set_params(param)\n        dmat = dataset.get_dmat()\n        bst = xgb.train(param, dmat, num_rounds)\n        test_dmat = xgb.DMatrix(\n            dataset.X, dataset.y, weight=dataset.w, base_margin=dataset.margin\n        )\n        bst.set_param({\"device\": \"gpu:0\"})\n        shap = bst.predict(test_dmat, pred_contribs=True)\n        margin = bst.predict(test_dmat, output_margin=True)\n        assume(len(dataset.y) > 0)\n        assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-3, 1e-3)\n\n        dmat = dataset.get_external_dmat()\n        shap = bst.predict(dmat, pred_contribs=True)\n        margin = bst.predict(dmat, output_margin=True)\n        assume(len(dataset.y) > 0)\n        assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-3, 1e-3)\n\n    @given(\n        strategies.integers(1, 10), tm.make_dataset_strategy(), shap_parameter_strategy\n    )\n    @settings(deadline=None, max_examples=10, print_blob=True)\n    @pytest.mark.timeout(120)\n    def test_shap_interactions(\n        self, num_rounds: int, dataset: tm.TestDataset, param: dict\n    ) -> None:\n        if dataset.name.endswith(\"-l1\"):  # not supported by the exact tree method\n            return\n        param.update({\"tree_method\": \"hist\", \"device\": \"cuda:0\"})\n        param = dataset.set_params(param)\n        dmat = dataset.get_dmat()\n        bst = xgb.train(param, dmat, num_rounds)\n\n        test_dmat = xgb.DMatrix(\n            dataset.X, dataset.y, weight=dataset.w, base_margin=dataset.margin\n        )\n        bst.set_param({\"device\": \"cuda:0\"})\n        shap = bst.predict(test_dmat, pred_interactions=True)\n        margin = bst.predict(test_dmat, output_margin=True)\n        assume(len(dataset.y) > 0)\n        assert np.allclose(\n            np.sum(shap, axis=(len(shap.shape) - 1, len(shap.shape) - 2)),\n            margin,\n            1e-3,\n            1e-3,\n        )\n\n        test_dmat = dataset.get_external_dmat()\n        shap = bst.predict(test_dmat, pred_interactions=True)\n        margin = bst.predict(test_dmat, output_margin=True)\n        assume(len(dataset.y) > 0)\n        assert np.allclose(\n            np.sum(shap, axis=(len(shap.shape) - 1, len(shap.shape) - 2)),\n            margin,\n            1e-3,\n            1e-3,\n        )\n\n    def test_shap_categorical(self) -> None:\n        X, y = tm.make_categorical(100, 20, 7, onehot=False)\n        Xy = xgb.DMatrix(X, y)\n        booster = xgb.train(\n            {\"tree_method\": \"hist\", \"device\": \"gpu:0\"}, Xy, num_boost_round=10\n        )\n\n        booster.set_param({\"device\": \"cuda:0\"})\n        shap = booster.predict(Xy, pred_contribs=True)\n        margin = booster.predict(Xy, output_margin=True)\n        np.testing.assert_allclose(\n            np.sum(shap, axis=len(shap.shape) - 1), margin, rtol=1e-3\n        )\n\n        booster.set_param({\"device\": \"cpu\"})\n        shap = booster.predict(Xy, pred_contribs=True)\n        margin = booster.predict(Xy, output_margin=True)\n        np.testing.assert_allclose(\n            np.sum(shap, axis=len(shap.shape) - 1), margin, rtol=1e-3\n        )\n\n    @pytest.mark.parametrize(\"DMatrixT\", [xgb.DMatrix, xgb.QuantileDMatrix])\n    def test_predict_leaf_basic(self, DMatrixT: Type[xgb.DMatrix]) -> None:\n        gpu_leaf = run_predict_leaf(\"cuda\", DMatrixT)\n        cpu_leaf = run_predict_leaf(\"cpu\", DMatrixT)\n        np.testing.assert_equal(gpu_leaf, cpu_leaf)\n\n    def run_predict_leaf_booster(\n        self,\n        param: Dict[str, Any],\n        num_rounds: int,\n        dataset: tm.TestDataset,\n        DMatrixT: Type[xgb.DMatrix],\n    ) -> None:\n        param = dataset.set_params(param)\n        m = dataset.get_dmat()\n        booster = xgb.train(\n            param, dtrain=dataset.get_dmat(), num_boost_round=num_rounds\n        )\n        booster.set_param({\"device\": \"cpu\"})\n        cpu_leaf = booster.predict(m, pred_leaf=True)\n\n        booster.set_param({\"device\": \"cuda:0\"})\n        gpu_leaf = booster.predict(m, pred_leaf=True)\n\n        np.testing.assert_equal(cpu_leaf, gpu_leaf)\n\n    @given(\n        predict_parameter_strategy,\n        tm.make_dataset_strategy(),\n        strategies.fixed_dictionaries(\n            {\n                \"DMatrixT\": strategies.sampled_from([xgb.DMatrix, xgb.QuantileDMatrix]),\n            }\n        ),\n    )\n    @settings(deadline=None, max_examples=20, print_blob=True)\n    def test_predict_leaf_gbtree(\n        self, param: dict, dataset: tm.TestDataset, DMatrixT: Type[xgb.DMatrix]\n    ) -> None:\n        # Unsupported for random forest\n        if param.get(\"num_parallel_tree\", 1) > 1 and dataset.name.endswith(\"-l1\"):\n            return\n\n        param.update({\"booster\": \"gbtree\", \"tree_method\": \"hist\", \"device\": \"cuda:0\"})\n        self.run_predict_leaf_booster(param, 10, dataset, DMatrixT)\n\n    @given(predict_parameter_strategy, tm.make_dataset_strategy())\n    @settings(deadline=None, max_examples=20, print_blob=True)\n    def test_predict_leaf_dart(self, param: dict, dataset: tm.TestDataset) -> None:\n        # Unsupported for random forest\n        if param.get(\"num_parallel_tree\", 1) > 1 and dataset.name.endswith(\"-l1\"):\n            return\n\n        param.update({\"booster\": \"dart\", \"tree_method\": \"hist\", \"device\": \"cuda:0\"})\n        self.run_predict_leaf_booster(param, 10, dataset, xgb.DMatrix)\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    @pytest.mark.skipif(**tm.no_pandas())\n    @given(\n        df=data_frames(\n            [\n                column(\"x0\", elements=strategies.integers(min_value=0, max_value=3)),\n                column(\"x1\", elements=strategies.integers(min_value=0, max_value=5)),\n            ],\n            index=range_indexes(min_size=20, max_size=50),\n        )\n    )\n    @settings(deadline=None, max_examples=20, print_blob=True)\n    def test_predict_categorical_split(self, df: Any) -> None:\n        from sklearn.metrics import root_mean_squared_error\n\n        df = df.astype(\"category\")\n        x0, x1 = df[\"x0\"].to_numpy(), df[\"x1\"].to_numpy()\n        y = (x0 * 10 - 20) + (x1 - 2)\n        dtrain = xgb.DMatrix(df, label=y)\n\n        params = {\n            \"tree_method\": \"hist\",\n            \"max_depth\": 3,\n            \"learning_rate\": 1.0,\n            \"base_score\": 0.0,\n            \"eval_metric\": \"rmse\",\n            \"device\": \"cuda:0\",\n        }\n\n        eval_history: Dict[str, Any] = {}\n        bst = xgb.train(\n            params,\n            dtrain,\n            num_boost_round=5,\n            evals=[(dtrain, \"train\")],\n            verbose_eval=False,\n            evals_result=eval_history,\n        )\n        bst.set_param({\"device\": \"cuda:0\"})\n        pred = bst.predict(dtrain)\n        rmse = root_mean_squared_error(y_true=y, y_pred=pred)\n        np.testing.assert_almost_equal(\n            rmse, eval_history[\"train\"][\"rmse\"][-1], decimal=5\n        )\n\n    @pytest.mark.skipif(**tm.no_cupy())\n    @pytest.mark.parametrize(\"n_classes\", [2, 3])\n    def test_predict_dart(self, n_classes: int) -> None:\n        from sklearn.datasets import make_classification\n\n        cp = import_cupy()\n        n_samples = 1000\n        X_, y_ = make_classification(\n            n_samples=n_samples, n_informative=5, n_classes=n_classes\n        )\n        X, y = cp.array(X_), cp.array(y_)\n\n        Xy = xgb.DMatrix(X, y)\n        if n_classes == 2:\n            params = {\n                \"tree_method\": \"hist\",\n                \"device\": \"cuda:0\",\n                \"booster\": \"dart\",\n                \"rate_drop\": 0.5,\n                \"objective\": \"binary:logistic\",\n            }\n        else:\n            params = {\n                \"tree_method\": \"hist\",\n                \"device\": \"cuda:0\",\n                \"booster\": \"dart\",\n                \"rate_drop\": 0.5,\n                \"objective\": \"multi:softprob\",\n                \"num_class\": n_classes,\n            }\n\n        booster = xgb.train(params, Xy, num_boost_round=32)\n\n        # auto (GPU)\n        inplace = booster.inplace_predict(X)\n        copied = booster.predict(Xy)\n\n        # CPU\n        booster.set_param({\"device\": \"cpu\"})\n        cpu_inplace = booster.inplace_predict(X_)\n        cpu_copied = booster.predict(Xy)\n\n        copied = cp.array(copied)\n        cp.testing.assert_allclose(cpu_inplace, copied, atol=1e-6)\n        cp.testing.assert_allclose(cpu_copied, copied, atol=1e-6)\n        cp.testing.assert_allclose(inplace, copied, atol=1e-6)\n\n        # GPU\n        booster.set_param({\"device\": \"cuda:0\"})\n        inplace = booster.inplace_predict(X)\n        copied = booster.predict(Xy)\n\n        copied = cp.array(copied)\n        cp.testing.assert_allclose(inplace, copied, atol=1e-6)\n\n    @pytest.mark.skipif(**tm.no_cupy())\n    def test_dtypes(self) -> None:\n        cp = import_cupy()\n\n        rows = 1000\n        cols = 10\n\n        rng = cp.random.default_rng(1994)\n        orig = rng.integers(low=0, high=127, size=rows * cols).reshape(rows, cols)\n        y = rng.integers(low=0, high=127, size=rows)\n        dtrain = xgb.DMatrix(orig, label=y)\n        booster = xgb.train({\"tree_method\": \"hist\", \"device\": \"cuda:0\"}, dtrain)\n\n        predt_orig = booster.inplace_predict(orig)\n        # all primitive types in numpy\n        for dtype in [\n            cp.byte,\n            cp.short,\n            cp.intc,\n            cp.int_,\n            cp.longlong,\n            cp.ubyte,\n            cp.ushort,\n            cp.uintc,\n            cp.uint,\n            cp.ulonglong,\n            cp.half,\n            cp.single,\n            cp.double,\n        ]:\n            X = cp.array(orig, dtype=dtype)\n            predt = booster.inplace_predict(X)\n            cp.testing.assert_allclose(predt, predt_orig)\n\n        # boolean\n        orig = cp.random.binomial(1, 0.5, size=rows * cols).reshape(rows, cols)\n        predt_orig = booster.inplace_predict(orig)\n\n        X = cp.array(orig, dtype=cp.bool_)\n        predt = booster.inplace_predict(X)\n        cp.testing.assert_allclose(predt, predt_orig)\n\n        # unsupported types\n        for dtype in [\n            cp.complex64,\n            cp.complex128,\n        ]:\n            X = cp.array(orig, dtype=dtype)\n            with pytest.raises(ValueError):\n                booster.inplace_predict(X)\n\n\ndef test_base_margin_vs_base_score() -> None:\n    run_base_margin_vs_base_score(\"cuda\")\n\n\n@pytest.mark.skipif(**tm.no_sklearn())\ndef test_shap_multiclass() -> None:\n    from sklearn.datasets import make_classification\n\n    X, y = make_classification(n_classes=3, random_state=2025, n_informative=16)\n    param = {\n        \"tree_method\": \"hist\",\n        \"device\": \"cuda\",\n        \"num_class\": 3,\n        \"base_score\": [1.0, 2.0, 3.0],\n    }\n    Xy = xgb.DMatrix(X, y)\n    bst = xgb.train(param, Xy, 8)\n\n    d_shap = bst.predict(Xy, pred_contribs=True)\n    d_margin = bst.predict(Xy, output_margin=True)\n\n    bst.set_param({\"device\": \"cpu\"})\n\n    h_shap = bst.predict(Xy, pred_contribs=True)\n    h_margin = bst.predict(Xy, output_margin=True)\n\n    np.testing.assert_allclose(d_shap, h_shap, atol=1e-6)\n    np.testing.assert_allclose(d_margin, h_margin, atol=1e-6)\n\n    # Compare base margin and base score\n    margin = np.stack(\n        [\n            np.ones(X.shape[0]),\n            np.full(X.shape[0], fill_value=2.0),\n            np.full(X.shape[0], fill_value=3.0),\n        ],\n        axis=1,\n    )\n    Xy = xgb.DMatrix(X, y, base_margin=margin)\n\n    bst.set_param({\"device\": \"cuda\"})\n    d_shap = bst.predict(Xy, pred_contribs=True)\n    np.testing.assert_allclose(d_shap, h_shap, atol=1e-6)\n"
  },
  {
    "path": "tests/python-gpu/test_gpu_ranking.py",
    "content": "from typing import Dict\n\nimport numpy as np\nimport pytest\n\nimport xgboost\nfrom xgboost import testing as tm\nfrom xgboost.testing.ranking import run_normalization, run_score_normalization\n\npytestmark = tm.timeout(30)\n\n\ndef comp_training_with_rank_objective(\n    dtrain: xgboost.DMatrix,\n    dtest: xgboost.DMatrix,\n    rank_objective: str,\n    metric_name: str,\n    tolerance: float = 1e-02,\n) -> None:\n    \"\"\"Internal method that trains the dataset using the rank objective on GPU and CPU,\n    evaluates the metric and determines if the delta between the metric is within the\n    tolerance level.\n\n    \"\"\"\n    # specify validations set to watch performance\n    watchlist = [(dtest, \"eval\"), (dtrain, \"train\")]\n\n    params = {\n        \"booster\": \"gbtree\",\n        \"tree_method\": \"hist\",\n        \"device\": \"cuda\",\n    }\n\n    num_trees = 100\n    check_metric_improvement_rounds = 10\n\n    evals_result: Dict[str, Dict] = {}\n    params[\"objective\"] = rank_objective\n    params[\"eval_metric\"] = metric_name\n    bst = xgboost.train(\n        params,\n        dtrain,\n        num_boost_round=num_trees,\n        early_stopping_rounds=check_metric_improvement_rounds,\n        evals=watchlist,\n        evals_result=evals_result,\n    )\n    gpu_scores = evals_result[\"train\"][metric_name][-1]\n\n    evals_result = {}\n\n    cpu_params = {\n        \"booster\": \"gbtree\",\n        \"tree_method\": \"hist\",\n        \"device\": \"cpu\",\n    }\n    cpu_params[\"objective\"] = rank_objective\n    cpu_params[\"eval_metric\"] = metric_name\n    bstc = xgboost.train(\n        cpu_params,\n        dtrain,\n        num_boost_round=num_trees,\n        early_stopping_rounds=check_metric_improvement_rounds,\n        evals=watchlist,\n        evals_result=evals_result,\n    )\n    cpu_scores = evals_result[\"train\"][metric_name][-1]\n\n    info = (rank_objective, metric_name)\n    assert np.allclose(gpu_scores, cpu_scores, tolerance, tolerance), info\n    assert np.allclose(bst.best_score, bstc.best_score, tolerance, tolerance), info\n\n    evals_result_weighted: Dict[str, Dict] = {}\n    dtest.set_weight(np.ones((dtest.get_group().size,)))\n    dtrain.set_weight(np.ones((dtrain.get_group().size,)))\n    watchlist = [(dtest, \"eval\"), (dtrain, \"train\")]\n    bst_w = xgboost.train(\n        params,\n        dtrain,\n        num_boost_round=num_trees,\n        early_stopping_rounds=check_metric_improvement_rounds,\n        evals=watchlist,\n        evals_result=evals_result_weighted,\n    )\n    weighted_metric = evals_result_weighted[\"train\"][metric_name][-1]\n\n    tolerance = 1e-5\n    assert np.allclose(bst_w.best_score, bst.best_score, tolerance, tolerance)\n    assert np.allclose(weighted_metric, gpu_scores, tolerance, tolerance)\n\n\n@pytest.mark.parametrize(\n    \"objective,metric\",\n    [\n        (\"rank:pairwise\", \"auc\"),\n        (\"rank:pairwise\", \"ndcg\"),\n        (\"rank:pairwise\", \"map\"),\n        (\"rank:ndcg\", \"auc\"),\n        (\"rank:ndcg\", \"ndcg\"),\n        (\"rank:ndcg\", \"map\"),\n        (\"rank:map\", \"auc\"),\n        (\"rank:map\", \"ndcg\"),\n        (\"rank:map\", \"map\"),\n    ],\n)\ndef test_with_mq2008(objective: str, metric: str) -> None:\n    (\n        x_train,\n        y_train,\n        qid_train,\n        x_test,\n        y_test,\n        qid_test,\n        x_valid,\n        y_valid,\n        qid_valid,\n    ) = tm.data.get_mq2008(tm.demo_dir(__file__))\n\n    if metric.find(\"map\") != -1 or objective.find(\"map\") != -1:\n        y_train[y_train <= 1] = 0.0\n        y_train[y_train > 1] = 1.0\n        y_test[y_test <= 1] = 0.0\n        y_test[y_test > 1] = 1.0\n\n    dtrain = xgboost.DMatrix(x_train, y_train, qid=qid_train)\n    dtest = xgboost.DMatrix(x_test, y_test, qid=qid_test)\n\n    comp_training_with_rank_objective(dtrain, dtest, objective, metric)\n\n\ndef test_normalization() -> None:\n    run_normalization(\"cuda\")\n\n\n@pytest.mark.parametrize(\"objective\", [\"rank:pairwise\", \"rank:ndcg\", \"rank:map\"])\ndef test_score_normalization(objective: str) -> None:\n    run_score_normalization(\"cuda\", objective)\n"
  },
  {
    "path": "tests/python-gpu/test_gpu_training_continuation.py",
    "content": "from typing import Any\n\nimport pytest\nfrom hypothesis import given, settings\nfrom xgboost import testing as tm\nfrom xgboost.testing.continuation import (\n    make_determinism_strategy,\n    run_training_continuation_determinism,\n    run_training_continuation_model_output,\n)\n\n\n@pytest.mark.parametrize(\"tree_method\", [\"hist\", \"approx\"])\ndef test_model_output(tree_method: str) -> None:\n    run_training_continuation_model_output(\"cuda\", tree_method)\n\n\n@given(make_determinism_strategy([\"hist\", \"approx\"]))\n@settings(deadline=None, print_blob=True, max_examples=10)\n@pytest.mark.skipif(**tm.no_sklearn())\ndef test_continuation_determinism(\n    kwargs: Any,\n) -> None:\n    run_training_continuation_determinism(device=\"cuda\", **kwargs)\n"
  },
  {
    "path": "tests/python-gpu/test_gpu_updaters.py",
    "content": "from typing import Any, Dict\n\nimport numpy as np\nimport pytest\nfrom hypothesis import assume, given, note, settings, strategies\n\nimport xgboost as xgb\nfrom xgboost import testing as tm\nfrom xgboost.testing.params import (\n    cat_parameter_strategy,\n    exact_parameter_strategy,\n    hist_cache_strategy,\n    hist_parameter_strategy,\n)\nfrom xgboost.testing.updater import (\n    check_categorical_missing,\n    check_categorical_ohe,\n    check_get_quantile_cut,\n    check_quantile_loss,\n    run_invalid_category,\n    run_max_cat,\n    train_result,\n)\n\npytestmark = tm.timeout(30)\n\n\nclass TestGPUUpdaters:\n    @given(\n        exact_parameter_strategy,\n        hist_parameter_strategy,\n        hist_cache_strategy,\n        strategies.integers(1, 20),\n        tm.make_dataset_strategy(),\n    )\n    @settings(deadline=None, max_examples=50, print_blob=True)\n    def test_gpu_hist(\n        self,\n        param: Dict[str, Any],\n        hist_param: Dict[str, Any],\n        cache_param: Dict[str, Any],\n        num_rounds: int,\n        dataset: tm.TestDataset,\n    ) -> None:\n        param.update({\"tree_method\": \"hist\", \"device\": \"cuda\"})\n        param.update(hist_param)\n        param.update(cache_param)\n        param = dataset.set_params(param)\n        result = train_result(param, dataset.get_dmat(), num_rounds)\n        note(str(result))\n        assert tm.non_increasing(result[\"train\"][dataset.metric])\n\n    @pytest.mark.parametrize(\"tree_method\", [\"approx\", \"hist\"])\n    def test_cache_size(self, tree_method: str) -> None:\n        from sklearn.datasets import make_regression\n\n        X, y = make_regression(n_samples=4096, n_features=64, random_state=1994)\n        Xy = xgb.DMatrix(X, y)\n        results = []\n        for cache_size in [1, 3, 2048]:\n            params: Dict[str, Any] = {\"tree_method\": tree_method, \"device\": \"cuda\"}\n            params[\"max_cached_hist_node\"] = cache_size\n            evals_result: Dict[str, Dict[str, list]] = {}\n            xgb.train(\n                params,\n                Xy,\n                num_boost_round=4,\n                evals=[(Xy, \"Train\")],\n                evals_result=evals_result,\n            )\n            results.append(evals_result[\"Train\"][\"rmse\"])\n        for i in range(1, len(results)):\n            np.testing.assert_allclose(results[0], results[i])\n\n    @given(\n        exact_parameter_strategy,\n        hist_parameter_strategy,\n        hist_cache_strategy,\n        strategies.integers(1, 20),\n        tm.make_dataset_strategy(),\n    )\n    @settings(deadline=None, print_blob=True)\n    def test_gpu_approx(\n        self,\n        param: Dict[str, Any],\n        hist_param: Dict[str, Any],\n        cache_param: Dict[str, Any],\n        num_rounds: int,\n        dataset: tm.TestDataset,\n    ) -> None:\n        param.update({\"tree_method\": \"approx\", \"device\": \"cuda\"})\n        param.update(hist_param)\n        param.update(cache_param)\n        param = dataset.set_params(param)\n        result = train_result(param, dataset.get_dmat(), num_rounds)\n        note(str(result))\n        assert tm.non_increasing(result[\"train\"][dataset.metric])\n\n    @given(tm.sparse_datasets_strategy)\n    @settings(deadline=None, print_blob=True)\n    def test_sparse(self, dataset: tm.TestDataset) -> None:\n        param = {\"tree_method\": \"hist\", \"max_bin\": 64}\n        hist_result = train_result(param, dataset.get_dmat(), 16)\n        note(str(hist_result))\n        assert tm.non_increasing(hist_result[\"train\"][dataset.metric])\n\n        param = {\"tree_method\": \"hist\", \"max_bin\": 64, \"device\": \"cuda\"}\n        gpu_hist_result = train_result(param, dataset.get_dmat(), 16)\n        note(str(gpu_hist_result))\n        assert tm.non_increasing(gpu_hist_result[\"train\"][dataset.metric])\n\n        np.testing.assert_allclose(\n            hist_result[\"train\"][\"rmse\"], gpu_hist_result[\"train\"][\"rmse\"], rtol=1e-2\n        )\n\n    @given(\n        strategies.integers(10, 400),\n        strategies.integers(3, 8),\n        strategies.integers(1, 2),\n        strategies.integers(4, 7),\n    )\n    @settings(deadline=None, max_examples=20, print_blob=True)\n    @pytest.mark.skipif(**tm.no_pandas())\n    def test_categorical_ohe(\n        self, rows: int, cols: int, rounds: int, cats: int\n    ) -> None:\n        check_categorical_ohe(\n            rows=rows,\n            cols=cols,\n            rounds=rounds,\n            cats=cats,\n            device=\"cuda\",\n            tree_method=\"hist\",\n            extmem=False,\n        )\n\n    @given(\n        tm.categorical_dataset_strategy,\n        hist_parameter_strategy,\n        cat_parameter_strategy,\n        strategies.integers(4, 32),\n    )\n    @settings(deadline=None, max_examples=20, print_blob=True)\n    @pytest.mark.skipif(**tm.no_pandas())\n    def test_categorical_hist(\n        self,\n        dataset: tm.TestDataset,\n        hist_parameters: Dict[str, Any],\n        cat_parameters: Dict[str, Any],\n        n_rounds: int,\n    ) -> None:\n        cat_parameters.update(hist_parameters)\n        cat_parameters[\"tree_method\"] = \"hist\"\n        cat_parameters[\"device\"] = \"cuda\"\n\n        results = train_result(cat_parameters, dataset.get_dmat(), n_rounds)\n        tm.non_increasing(results[\"train\"][\"rmse\"])\n\n    @given(\n        tm.categorical_dataset_strategy,\n        hist_parameter_strategy,\n        cat_parameter_strategy,\n        strategies.integers(4, 32),\n    )\n    @settings(deadline=None, max_examples=20, print_blob=True)\n    @pytest.mark.skipif(**tm.no_pandas())\n    def test_categorical_approx(\n        self,\n        dataset: tm.TestDataset,\n        hist_parameters: Dict[str, Any],\n        cat_parameters: Dict[str, Any],\n        n_rounds: int,\n    ) -> None:\n        cat_parameters.update(hist_parameters)\n        cat_parameters[\"tree_method\"] = \"approx\"\n        cat_parameters[\"device\"] = \"cuda\"\n\n        results = train_result(cat_parameters, dataset.get_dmat(), n_rounds)\n        tm.non_increasing(results[\"train\"][\"rmse\"])\n\n    @given(\n        hist_parameter_strategy,\n        cat_parameter_strategy,\n    )\n    @settings(deadline=None, max_examples=10, print_blob=True)\n    def test_categorical_ames_housing(\n        self,\n        hist_parameters: Dict[str, Any],\n        cat_parameters: Dict[str, Any],\n    ) -> None:\n        cat_parameters.update(hist_parameters)\n        dataset = tm.TestDataset(\n            \"ames_housing\", tm.data.get_ames_housing, \"reg:squarederror\", \"rmse\"\n        )\n        cat_parameters[\"tree_method\"] = \"hist\"\n        cat_parameters[\"device\"] = \"cuda\"\n        results = train_result(cat_parameters, dataset.get_dmat(), 16)\n        tm.non_increasing(results[\"train\"][\"rmse\"])\n\n    @given(\n        strategies.integers(10, 400),\n        strategies.integers(3, 8),\n        strategies.integers(4, 7),\n    )\n    @settings(deadline=None, max_examples=20, print_blob=True)\n    @pytest.mark.skipif(**tm.no_pandas())\n    def test_categorical_missing(self, rows: int, cols: int, cats: int) -> None:\n        check_categorical_missing(\n            rows, cols, cats, device=\"cuda\", tree_method=\"approx\", extmem=False\n        )\n        check_categorical_missing(\n            rows, cols, cats, device=\"cuda\", tree_method=\"hist\", extmem=False\n        )\n\n    @pytest.mark.skipif(**tm.no_pandas())\n    @pytest.mark.parametrize(\"tree_method\", [\"hist\", \"approx\"])\n    def test_max_cat(self, tree_method: str) -> None:\n        run_max_cat(tree_method, \"cuda\")\n\n    def test_categorical_32_cat(self) -> None:\n        \"\"\"32 hits the bound of integer bitset, so special test\"\"\"\n        rows = 1000\n        check_categorical_ohe(\n            rows=rows,\n            cols=10,\n            rounds=4,\n            cats=32,\n            device=\"cuda\",\n            tree_method=\"hist\",\n            extmem=False,\n        )\n\n    @pytest.mark.skipif(**tm.no_cupy())\n    @pytest.mark.parametrize(\"tree_method\", [\"hist\", \"approx\"])\n    def test_invalid_category(self, tree_method: str) -> None:\n        run_invalid_category(tree_method, \"cuda\")\n\n    @pytest.mark.skipif(**tm.no_cupy())\n    @given(\n        hist_parameter_strategy,\n        strategies.integers(1, 20),\n        tm.make_dataset_strategy(),\n    )\n    @settings(deadline=None, max_examples=20, print_blob=True)\n    def test_gpu_hist_device_dmatrix(\n        self, param: dict, num_rounds: int, dataset: tm.TestDataset\n    ) -> None:\n        # We cannot handle empty dataset yet\n        assume(len(dataset.y) > 0)\n        param[\"tree_method\"] = \"hist\"\n        param[\"device\"] = \"cuda\"\n        param = dataset.set_params(param)\n        result = train_result(\n            param,\n            dataset.get_device_dmat(max_bin=param.get(\"max_bin\", None)),\n            num_rounds,\n        )\n        note(str(result))\n        assert tm.non_increasing(result[\"train\"][dataset.metric], tolerance=1e-3)\n\n    @given(\n        hist_parameter_strategy,\n        strategies.integers(1, 3),\n        tm.make_dataset_strategy(),\n    )\n    @settings(deadline=None, max_examples=10, print_blob=True)\n    def test_external_memory(\n        self, param: Dict[str, Any], num_rounds: int, dataset: tm.TestDataset\n    ) -> None:\n        # We cannot handle empty dataset yet\n        assume(len(dataset.y) > 0)\n\n        with xgb.config_context(use_rmm=True):\n            param[\"tree_method\"] = \"hist\"\n            param[\"device\"] = \"cuda\"\n            param = dataset.set_params(param)\n            m = dataset.get_external_dmat()\n            external_result = train_result(param, m, num_rounds)\n            del m\n            assert tm.non_increasing(external_result[\"train\"][dataset.metric])\n\n    def test_empty_dmatrix_prediction(self) -> None:\n        # FIXME(trivialfis): This should be done with all updaters\n        kRows = 0\n        kCols = 100\n\n        X = np.empty((kRows, kCols))\n        y = np.empty((kRows,))\n\n        dtrain = xgb.DMatrix(X, y)\n\n        bst = xgb.train(\n            {\"verbosity\": 2, \"tree_method\": \"hist\", \"device\": \"cuda\"},\n            dtrain,\n            verbose_eval=True,\n            num_boost_round=6,\n            evals=[(dtrain, \"Train\")],\n        )\n\n        kRows = 100\n        X_test = np.random.randn(kRows, kCols)\n\n        dtest = xgb.DMatrix(X_test)\n        predictions = bst.predict(dtest)\n        # non-distributed, 0.0 is returned due to base_score estimation with 0 gradient.\n        np.testing.assert_allclose(predictions, 0.0, 1e-6)\n\n    @pytest.mark.mgpu\n    @given(tm.make_dataset_strategy(), strategies.integers(0, 10))\n    @settings(deadline=None, max_examples=10, print_blob=True)\n    def test_specified_gpu_id_gpu_update(\n        self, dataset: tm.TestDataset, gpu_id: int\n    ) -> None:\n        param = {\"tree_method\": \"hist\", \"device\": f\"cuda:{gpu_id}\"}\n        param = dataset.set_params(param)\n        result = train_result(param, dataset.get_dmat(), 10)\n        assert tm.non_increasing(result[\"train\"][dataset.metric])\n\n    @pytest.mark.parametrize(\"weighted\", [True, False])\n    def test_quantile_loss(self, weighted: bool) -> None:\n        check_quantile_loss(\"hist\", weighted, \"cuda\")\n\n    @pytest.mark.skipif(**tm.no_pandas())\n    def test_issue8824(self) -> None:\n        # column sampling by node crashes because shared pointers go out of scope\n        import pandas as pd\n\n        data = pd.DataFrame(np.random.rand(1024, 8))\n        data.columns = \"x\" + data.columns.astype(str)\n        features = data.columns\n        data[\"y\"] = data.sum(axis=1) < 4\n        dtrain = xgb.DMatrix(data[features], label=data[\"y\"])\n        model = xgb.train(\n            dtrain=dtrain,\n            params={\n                \"max_depth\": 5,\n                \"learning_rate\": 0.05,\n                \"objective\": \"binary:logistic\",\n                \"tree_method\": \"hist\",\n                \"device\": \"cuda\",\n                \"colsample_bytree\": 0.5,\n                \"colsample_bylevel\": 0.5,\n                \"colsample_bynode\": 0.5,  # Causes issues\n                \"reg_alpha\": 0.05,\n                \"reg_lambda\": 0.005,\n                \"seed\": 66,\n                \"subsample\": 0.5,\n                \"gamma\": 0.2,\n                \"eval_metric\": \"auc\",\n            },\n            num_boost_round=150,\n        )\n\n    @pytest.mark.skipif(**tm.no_cudf())\n    def test_get_quantile_cut(self) -> None:\n        check_get_quantile_cut(\"hist\", \"cuda\")\n"
  },
  {
    "path": "tests/python-gpu/test_gpu_with_sklearn.py",
    "content": "import itertools\nimport json\nimport os\nfrom concurrent.futures import ThreadPoolExecutor\nfrom pathlib import Path\nfrom typing import Any, List, Tuple\n\nimport numpy as np\nimport pytest\nimport xgboost as xgb\nfrom xgboost import testing as tm\nfrom xgboost.testing.ranking import run_ranking_categorical, run_ranking_qid_df\nfrom xgboost.testing.with_skl import (\n    run_boost_from_prediction_binary,\n    run_boost_from_prediction_multi_clasas,\n    run_housing_rf_regression,\n    run_intercept,\n    run_recoding,\n)\n\npytestmark = pytest.mark.skipif(**tm.no_sklearn())\n\nrng = np.random.RandomState(1994)\n\n\ndef test_gpu_binary_classification() -> None:\n    from sklearn.datasets import load_digits\n    from sklearn.model_selection import KFold\n\n    digits = load_digits(n_class=2)\n    y = digits[\"target\"]\n    X = digits[\"data\"]\n    kf = KFold(n_splits=2, shuffle=True, random_state=rng)\n    for cls in (xgb.XGBClassifier, xgb.XGBRFClassifier):\n        for train_index, test_index in kf.split(X, y):\n            xgb_model = cls(\n                random_state=42,\n                tree_method=\"hist\",\n                n_estimators=4,\n                device=\"cuda\",\n            ).fit(X[train_index], y[train_index])\n            cfg: str = json.loads(xgb_model.get_booster().save_config())[\"learner\"][\n                \"generic_param\"\n            ][\"device\"]\n            assert cfg.startswith(\"cuda\")\n            preds = xgb_model.predict(X[test_index])\n            labels = y[test_index]\n            err = sum(\n                1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]\n            ) / float(len(preds))\n            assert err < 0.1\n\n\n@pytest.mark.skipif(**tm.no_cupy())\n@pytest.mark.skipif(**tm.no_cudf())\n@pytest.mark.parametrize(\"tree_method\", [\"hist\", \"approx\"])\ndef test_boost_from_prediction_gpu_hist(tree_method: str) -> None:\n    import cudf\n    import cupy as cp\n    from sklearn.datasets import load_breast_cancer, load_digits\n\n    X, y = load_breast_cancer(return_X_y=True)\n    X, y = cp.array(X), cp.array(y)\n\n    run_boost_from_prediction_binary(tree_method, \"cuda\", X, y, None)\n    run_boost_from_prediction_binary(tree_method, \"cuda\", X, y, cudf.DataFrame)\n\n    X, y = load_digits(return_X_y=True)\n    X, y = cp.array(X), cp.array(y)\n\n    run_boost_from_prediction_multi_clasas(\n        xgb.XGBClassifier, tree_method, \"cuda\", X, y, None\n    )\n    run_boost_from_prediction_multi_clasas(\n        xgb.XGBClassifier, tree_method, \"cuda\", X, y, cudf.DataFrame\n    )\n\n\ndef test_num_parallel_tree() -> None:\n    run_housing_rf_regression(\"hist\", \"cuda\")\n\n\n@pytest.mark.skipif(**tm.no_pandas())\n@pytest.mark.skipif(**tm.no_cudf())\n@pytest.mark.skipif(**tm.no_sklearn())\ndef test_categorical(tmp_path: Path) -> None:\n    import cudf\n    import cupy as cp\n    import pandas as pd\n    from sklearn.datasets import load_svmlight_file\n\n    data_dir = tm.data_dir(__file__)\n    X, y = load_svmlight_file(\n        os.path.join(data_dir, \"agaricus.txt.train\"), dtype=np.float32\n    )\n    clf = xgb.XGBClassifier(\n        tree_method=\"hist\",\n        device=\"cuda\",\n        n_estimators=10,\n    )\n    X = pd.DataFrame(X.todense()).astype(\"category\")\n    for c in X.columns:\n        X[c] = X[c].cat.rename_categories(int)\n    clf.fit(X, y)\n\n    model = tmp_path / \"categorial.json\"\n    clf.save_model(model)\n\n    with open(model) as fd:\n        categorical = json.load(fd)\n        categories_sizes = np.array(\n            categorical[\"learner\"][\"gradient_booster\"][\"model\"][\"trees\"][0][\n                \"categories_sizes\"\n            ]\n        )\n        assert categories_sizes.shape[0] != 0\n        np.testing.assert_allclose(categories_sizes, 1)\n\n    def check_predt(X: Any, y: List[float]) -> None:\n        reg = xgb.XGBRegressor(tree_method=\"hist\", n_estimators=64, device=\"cuda\")\n        reg.fit(X, y)\n        predts = reg.predict(X)\n        booster = reg.get_booster()\n        feature_types = booster.feature_types\n        assert feature_types is not None\n        assert \"c\" in feature_types\n        assert len(feature_types) == 1\n        inp_predts = booster.inplace_predict(X)\n        if isinstance(inp_predts, cp.ndarray):\n            inp_predts = cp.asnumpy(inp_predts)\n        np.testing.assert_allclose(predts, inp_predts)\n\n    y = [1, 2, 3]\n    X = pd.DataFrame({\"f0\": [\"a\", \"b\", \"c\"]})\n    X[\"f0\"] = X[\"f0\"].astype(\"category\")\n    check_predt(X, y)\n\n    X = cudf.DataFrame(X)\n    check_predt(X, y)\n\n\n@pytest.mark.skipif(**tm.no_cupy())\n@pytest.mark.skipif(**tm.no_cudf())\ndef test_classififer() -> None:\n    import cudf\n    import cupy as cp\n    from sklearn.datasets import load_digits\n\n    X, y = load_digits(return_X_y=True)\n    y *= 10\n\n    clf = xgb.XGBClassifier(tree_method=\"hist\", n_estimators=1, device=\"cuda\")\n\n    # numpy\n    with pytest.raises(ValueError, match=r\"Invalid classes.*\"):\n        clf.fit(X, y)\n\n    # cupy\n    X, y = cp.array(X), cp.array(y)\n    with pytest.raises(ValueError, match=r\"Invalid classes.*\"):\n        clf.fit(X, y)\n\n    # cudf\n    X, y = cudf.DataFrame(X), cudf.DataFrame(y)\n    with pytest.raises(ValueError, match=r\"Invalid classes.*\"):\n        clf.fit(X, y)\n\n    # pandas\n    X, y = load_digits(return_X_y=True, as_frame=True)\n    y *= 10\n    with pytest.raises(ValueError, match=r\"Invalid classes.*\"):\n        clf.fit(X, y)\n\n\n@pytest.mark.parametrize(\n    \"use_cupy,tree_method,device,order,gdtype,strategy\",\n    [\n        c\n        for c in itertools.product(\n            (True, False),\n            (\"hist\", \"approx\"),\n            (\"cpu\", \"cuda\"),\n            (\"C\", \"F\"),\n            (\"float64\", \"float32\"),\n            (\"one_output_per_tree\", \"multi_output_tree\"),\n        )\n    ],\n)\ndef test_custom_objective(\n    use_cupy: bool,\n    tree_method: str,\n    device: str,\n    order: str,\n    gdtype: str,\n    strategy: str,\n) -> None:\n    from sklearn.datasets import load_iris\n\n    X, y = load_iris(return_X_y=True)\n\n    params = {\n        \"tree_method\": tree_method,\n        \"device\": device,\n        \"n_estimators\": 8,\n        \"multi_strategy\": strategy,\n    }\n\n    obj = tm.softprob_obj(y.max() + 1, use_cupy=use_cupy, order=order, gdtype=gdtype)\n    assert callable(obj)\n\n    clf = xgb.XGBClassifier(objective=obj, **params)\n\n    if strategy == \"multi_output_tree\" and tree_method == \"approx\":\n        with pytest.raises(ValueError, match=r\"Only the hist\"):\n            clf.fit(X, y)\n        return\n\n    clf.fit(X, y)\n\n    clf_1 = xgb.XGBClassifier(**params)\n    clf_1.fit(X, y)\n\n    np.testing.assert_allclose(clf.predict_proba(X), clf_1.predict_proba(X), rtol=1e-4)\n\n    params[\"n_estimators\"] = 2\n\n    def wrong_shape(\n        labels: np.ndarray, predt: np.ndarray\n    ) -> Tuple[np.ndarray, np.ndarray]:\n        grad, hess = obj(labels, predt)\n        return grad[:, :-1], hess[:, :-1]\n\n    with pytest.raises(ValueError, match=\"should be equal to the number of\"):\n        clf = xgb.XGBClassifier(objective=wrong_shape, **params)\n        clf.fit(X, y)\n\n    def wrong_shape_1(\n        labels: np.ndarray, predt: np.ndarray\n    ) -> Tuple[np.ndarray, np.ndarray]:\n        grad, hess = obj(labels, predt)\n        return grad[:-1, :], hess[:-1, :]\n\n    with pytest.raises(ValueError, match=\"Mismatched size between the gradient\"):\n        clf = xgb.XGBClassifier(objective=wrong_shape_1, **params)\n        clf.fit(X, y)\n\n    def wrong_shape_2(\n        labels: np.ndarray, predt: np.ndarray\n    ) -> Tuple[np.ndarray, np.ndarray]:\n        grad, hess = obj(labels, predt)\n        return grad[:, :], hess[:-1, :]\n\n    with pytest.raises(ValueError, match=\"Mismatched shape between the gradient\"):\n        clf = xgb.XGBClassifier(objective=wrong_shape_2, **params)\n        clf.fit(X, y)\n\n    def wrong_shape_3(\n        labels: np.ndarray, predt: np.ndarray\n    ) -> Tuple[np.ndarray, np.ndarray]:\n        grad, hess = obj(labels, predt)\n        grad = grad.reshape(grad.size)\n        hess = hess.reshape(hess.size)\n        return grad, hess\n\n    with pytest.warns(FutureWarning, match=\"required to be\"):\n        clf = xgb.XGBClassifier(objective=wrong_shape_3, **params)\n        clf.fit(X, y)\n\n\n@pytest.mark.skipif(**tm.no_cudf())\ndef test_ranking_qid_df() -> None:\n    import cudf\n\n    run_ranking_qid_df(cudf, \"hist\", \"cuda\")\n\n\n@pytest.mark.skipif(**tm.no_pandas())\ndef test_ranking_categorical() -> None:\n    run_ranking_categorical(device=\"cuda\")\n\n\n@pytest.mark.skipif(**tm.no_cupy())\n@pytest.mark.mgpu\ndef test_device_ordinal() -> None:\n    import cupy as cp\n\n    n_devices = 2\n\n    def worker(ordinal: int, correct_ordinal: bool) -> None:\n        if correct_ordinal:\n            cp.cuda.runtime.setDevice(ordinal)\n        else:\n            cp.cuda.runtime.setDevice((ordinal + 1) % n_devices)\n\n        X, y, w = tm.make_regression(4096, 12, use_cupy=True)\n        reg = xgb.XGBRegressor(device=f\"cuda:{ordinal}\", tree_method=\"hist\")\n\n        if correct_ordinal:\n            reg.fit(\n                X, y, sample_weight=w, eval_set=[(X, y)], sample_weight_eval_set=[w]\n            )\n            assert tm.non_increasing(reg.evals_result()[\"validation_0\"][\"rmse\"])\n            return\n\n        with pytest.raises(ValueError, match=\"Invalid device ordinal\"):\n            reg.fit(\n                X, y, sample_weight=w, eval_set=[(X, y)], sample_weight_eval_set=[w]\n            )\n\n    with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:\n        futures = []\n        n_trials = 32\n        for i in range(n_trials):\n            fut = executor.submit(\n                worker, ordinal=i % n_devices, correct_ordinal=i % 3 != 0\n            )\n            futures.append(fut)\n\n        for fut in futures:\n            fut.result()\n\n    cp.cuda.runtime.setDevice(0)\n\n\n@pytest.mark.skipif(**tm.no_cudf())\ndef test_recoding() -> None:\n    run_recoding(\"cuda\")\n\n\ndef test_intercept() -> None:\n    run_intercept(\"cuda\")\n"
  },
  {
    "path": "tests/python-gpu/test_large_input.py",
    "content": "import cupy as cp\nimport numpy as np\nimport pytest\n\nimport xgboost as xgb\n\n\n# Test for integer overflow or out of memory exceptions\ndef test_large_input() -> None:\n    available_bytes, _ = cp.cuda.runtime.memGetInfo()\n    # 15 GB\n    required_bytes = 1.5e10\n    if available_bytes < required_bytes:\n        pytest.skip(\"Not enough memory on this device\")\n    n = 1000\n    m = ((1 << 31) + n - 1) // n\n    assert np.log2(m * n) > 31\n    X = cp.ones((m, n), dtype=np.float32)\n    y = cp.ones(m)\n    w = cp.ones(m)\n    dmat = xgb.QuantileDMatrix(X, y, weight=w)\n    booster = xgb.train(\n        {\"tree_method\": \"hist\", \"max_depth\": 1, \"device\": \"cuda\"}, dmat, 1\n    )\n    del y\n    booster.inplace_predict(X)\n"
  },
  {
    "path": "tests/python-gpu/test_monotonic_constraints.py",
    "content": "import numpy as np\nimport pytest\n\nimport xgboost as xgb\nfrom xgboost import testing as tm\nfrom xgboost.testing.monotone_constraints import is_correctly_constrained, training_dset\n\nrng = np.random.RandomState(1994)\n\n\ndef non_decreasing(L: np.ndarray) -> bool:\n    return all((x - y) < 0.001 for x, y in zip(L, L[1:]))\n\n\ndef non_increasing(L: np.ndarray) -> bool:\n    return all((y - x) < 0.001 for x, y in zip(L, L[1:]))\n\n\ndef assert_constraint(constraint: int, tree_method: str) -> None:\n    from sklearn.datasets import make_regression\n\n    n = 1000\n    X, y = make_regression(n, random_state=rng, n_features=1, n_informative=1)\n    dtrain = xgb.DMatrix(X, y)\n    param = {}\n    param[\"tree_method\"] = tree_method\n    param[\"device\"] = \"cuda\"\n    param[\"monotone_constraints\"] = \"(\" + str(constraint) + \")\"\n    bst = xgb.train(param, dtrain)\n    dpredict = xgb.DMatrix(X[X[:, 0].argsort()])\n    pred = bst.predict(dpredict)\n\n    if constraint > 0:\n        assert non_decreasing(pred)\n    elif constraint < 0:\n        assert non_increasing(pred)\n\n\n@pytest.mark.skipif(**tm.no_sklearn())\ndef test_gpu_hist_basic() -> None:\n    assert_constraint(1, \"hist\")\n    assert_constraint(-1, \"hist\")\n\n\n@pytest.mark.skipif(**tm.no_sklearn())\ndef test_gpu_approx_basic() -> None:\n    assert_constraint(1, \"approx\")\n    assert_constraint(-1, \"approx\")\n\n\ndef test_gpu_hist_depthwise() -> None:\n    params = {\n        \"tree_method\": \"hist\",\n        \"grow_policy\": \"depthwise\",\n        \"device\": \"cuda\",\n        \"monotone_constraints\": \"(1, -1)\",\n    }\n    model = xgb.train(params, training_dset)\n    is_correctly_constrained(model)\n\n\ndef test_gpu_hist_lossguide() -> None:\n    params = {\n        \"tree_method\": \"hist\",\n        \"grow_policy\": \"lossguide\",\n        \"device\": \"cuda\",\n        \"monotone_constraints\": \"(1, -1)\",\n    }\n    model = xgb.train(params, training_dset)\n    is_correctly_constrained(model)\n"
  },
  {
    "path": "tests/python-sycl/test_sycl_prediction.py",
    "content": "import sys\nimport unittest\nimport pytest\n\nimport numpy as np\nimport xgboost as xgb\nfrom hypothesis import given, strategies, assume, settings, note\n\nfrom xgboost import testing as tm\n\nrng = np.random.RandomState(1994)\n\nshap_parameter_strategy = strategies.fixed_dictionaries(\n    {\n        \"max_depth\": strategies.integers(1, 11),\n        \"max_leaves\": strategies.integers(0, 256),\n        \"num_parallel_tree\": strategies.sampled_from([1, 10]),\n    }\n).filter(lambda x: x[\"max_depth\"] > 0 or x[\"max_leaves\"] > 0)\n\n\nclass TestSYCLPredict(unittest.TestCase):\n    def test_predict(self):\n        iterations = 10\n        np.random.seed(1)\n        test_num_rows = [10, 1000, 5000]\n        test_num_cols = [10, 50, 500]\n        for num_rows in test_num_rows:\n            for num_cols in test_num_cols:\n                dtrain = xgb.DMatrix(\n                    np.random.randn(num_rows, num_cols),\n                    label=[0, 1] * int(num_rows / 2),\n                )\n                dval = xgb.DMatrix(\n                    np.random.randn(num_rows, num_cols),\n                    label=[0, 1] * int(num_rows / 2),\n                )\n                dtest = xgb.DMatrix(\n                    np.random.randn(num_rows, num_cols),\n                    label=[0, 1] * int(num_rows / 2),\n                )\n                watchlist = [(dtrain, \"train\"), (dval, \"validation\")]\n                res = {}\n                param = {\n                    \"objective\": \"binary:logistic\",\n                    \"eval_metric\": \"logloss\",\n                    \"tree_method\": \"hist\",\n                    \"device\": \"cpu\",\n                    \"max_depth\": 1,\n                    \"verbosity\": 0,\n                }\n                bst = xgb.train(\n                    param, dtrain, iterations, evals=watchlist, evals_result=res\n                )\n                assert tm.non_increasing(res[\"train\"][\"logloss\"])\n                cpu_pred_train = bst.predict(dtrain, output_margin=True)\n                cpu_pred_test = bst.predict(dtest, output_margin=True)\n                cpu_pred_val = bst.predict(dval, output_margin=True)\n\n                bst.set_param({\"device\": \"sycl\"})\n                sycl_pred_train = bst.predict(dtrain, output_margin=True)\n                sycl_pred_test = bst.predict(dtest, output_margin=True)\n                sycl_pred_val = bst.predict(dval, output_margin=True)\n\n                np.testing.assert_allclose(cpu_pred_train, sycl_pred_train, rtol=1e-6)\n                np.testing.assert_allclose(cpu_pred_val, sycl_pred_val, rtol=1e-6)\n                np.testing.assert_allclose(cpu_pred_test, sycl_pred_test, rtol=1e-6)\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    def test_multi_predict(self):\n        from sklearn.datasets import make_regression\n        from sklearn.model_selection import train_test_split\n\n        n = 1000\n        X, y = make_regression(n, random_state=rng)\n        X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123)\n        dtrain = xgb.DMatrix(X_train, label=y_train)\n        dtest = xgb.DMatrix(X_test)\n\n        params = {}\n        params[\"tree_method\"] = \"hist\"\n        params[\"device\"] = \"cpu\"\n\n        bst = xgb.train(params, dtrain)\n        cpu_predict = bst.predict(dtest)\n\n        bst.set_param({\"device\": \"sycl\"})\n\n        predict0 = bst.predict(dtest)\n        predict1 = bst.predict(dtest)\n\n        assert np.allclose(predict0, predict1)\n        assert np.allclose(predict0, cpu_predict)\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    def test_sklearn(self):\n        m, n = 15000, 14\n        tr_size = 2500\n        X = np.random.rand(m, n)\n        y = 200 * np.matmul(X, np.arange(-3, -3 + n))\n        X_train, y_train = X[:tr_size, :], y[:tr_size]\n        X_test, y_test = X[tr_size:, :], y[tr_size:]\n\n        # First with cpu_predictor\n        params = {\n            \"tree_method\": \"hist\",\n            \"device\": \"cpu\",\n            \"n_jobs\": -1,\n            \"verbosity\": 0,\n            \"seed\": 123,\n        }\n        m = xgb.XGBRegressor(**params).fit(X_train, y_train)\n        cpu_train_score = m.score(X_train, y_train)\n        cpu_test_score = m.score(X_test, y_test)\n\n        # Now with sycl_predictor\n        params[\"device\"] = \"sycl\"\n        m.set_params(**params)\n\n        sycl_train_score = m.score(X_train, y_train)\n        sycl_test_score = m.score(X_test, y_test)\n\n        assert np.allclose(cpu_train_score, sycl_train_score)\n        assert np.allclose(cpu_test_score, sycl_test_score)\n\n    @given(\n        strategies.integers(1, 10), tm.make_dataset_strategy(), shap_parameter_strategy\n    )\n    @settings(deadline=None)\n    def test_shap(self, num_rounds, dataset, param):\n        if dataset.name.endswith(\"-l1\"):  # not supported by the exact tree method\n            return\n        param.update({\"tree_method\": \"hist\", \"device\": \"cpu\"})\n        param = dataset.set_params(param)\n        dmat = dataset.get_dmat()\n        bst = xgb.train(param, dmat, num_rounds)\n        test_dmat = xgb.DMatrix(dataset.X, dataset.y, dataset.w, dataset.margin)\n        bst.set_param({\"device\": \"sycl\"})\n        shap = bst.predict(test_dmat, pred_contribs=True)\n        margin = bst.predict(test_dmat, output_margin=True)\n        assume(len(dataset.y) > 0)\n        assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-3, 1e-3)\n\n    @given(\n        strategies.integers(1, 10), tm.make_dataset_strategy(), shap_parameter_strategy\n    )\n    @settings(deadline=None, max_examples=20)\n    def test_shap_interactions(self, num_rounds, dataset, param):\n        if dataset.name.endswith(\"-l1\"):  # not supported by the exact tree method\n            return\n        param.update({\"tree_method\": \"hist\", \"device\": \"cpu\"})\n        param = dataset.set_params(param)\n        dmat = dataset.get_dmat()\n        bst = xgb.train(param, dmat, num_rounds)\n        test_dmat = xgb.DMatrix(dataset.X, dataset.y, dataset.w, dataset.margin)\n        bst.set_param({\"device\": \"sycl\"})\n        shap = bst.predict(test_dmat, pred_interactions=True)\n        margin = bst.predict(test_dmat, output_margin=True)\n        assume(len(dataset.y) > 0)\n        assert np.allclose(\n            np.sum(shap, axis=(len(shap.shape) - 1, len(shap.shape) - 2)),\n            margin,\n            1e-3,\n            1e-3,\n        )\n"
  },
  {
    "path": "tests/python-sycl/test_sycl_simple_dask.py",
    "content": "from xgboost import dask as dxgb\nfrom xgboost import testing as tm\n\nimport dask.array as da\nimport dask.distributed\n\n\ndef train_result(client, param, dtrain, num_rounds):\n    result = dxgb.train(\n        client,\n        param,\n        dtrain,\n        num_rounds,\n        verbose_eval=False,\n        evals=[(dtrain, \"train\")],\n    )\n    return result\n\n\nclass TestSYCLDask:\n    # The simplest test verify only one node training.\n    def test_simple(self):\n        cluster = dask.distributed.LocalCluster(n_workers=1)\n        client = dask.distributed.Client(cluster)\n\n        param = {}\n        param[\"tree_method\"] = \"hist\"\n        param[\"device\"] = \"sycl\"\n        param[\"verbosity\"] = 0\n        param[\"objective\"] = \"reg:squarederror\"\n\n        # X and y must be Dask dataframes or arrays\n        num_obs = int(1e4)\n        num_features = 20\n\n        rng = da.random.RandomState(1994)\n        X = rng.random_sample((num_obs, num_features), chunks=(1000, -1))\n        y = X.sum(axis=1)\n        dtrain = dxgb.DaskDMatrix(client, X, y)\n\n        result = train_result(client, param, dtrain, 10)\n        assert tm.non_increasing(result[\"history\"][\"train\"][\"rmse\"])\n"
  },
  {
    "path": "tests/python-sycl/test_sycl_training_continuation.py",
    "content": "import numpy as np\nimport xgboost as xgb\nimport json\n\nrng = np.random.RandomState(1994)\n\n\nclass TestSYCLTrainingContinuation:\n    def run_training_continuation(self, use_json):\n        kRows = 64\n        kCols = 32\n        X = rng.randn(kRows, kCols)\n        y = rng.randn(kRows)\n        dtrain = xgb.DMatrix(X, y)\n        params = {\n            \"device\": \"sycl\",\n            \"max_depth\": \"2\",\n            \"gamma\": \"0.1\",\n            \"alpha\": \"0.01\",\n            \"enable_experimental_json_serialization\": use_json,\n        }\n        bst_0 = xgb.train(params, dtrain, num_boost_round=64)\n        dump_0 = bst_0.get_dump(dump_format=\"json\")\n\n        bst_1 = xgb.train(params, dtrain, num_boost_round=32)\n        bst_1 = xgb.train(params, dtrain, num_boost_round=32, xgb_model=bst_1)\n        dump_1 = bst_1.get_dump(dump_format=\"json\")\n\n        def recursive_compare(obj_0, obj_1):\n            if isinstance(obj_0, float):\n                assert np.isclose(obj_0, obj_1, atol=1e-6)\n            elif isinstance(obj_0, str):\n                assert obj_0 == obj_1\n            elif isinstance(obj_0, int):\n                assert obj_0 == obj_1\n            elif isinstance(obj_0, dict):\n                keys_0 = list(obj_0.keys())\n                keys_1 = list(obj_1.keys())\n                values_0 = list(obj_0.values())\n                values_1 = list(obj_1.values())\n                for i in range(len(obj_0.items())):\n                    assert keys_0[i] == keys_1[i]\n                    if list(obj_0.keys())[i] != \"missing\":\n                        recursive_compare(values_0[i], values_1[i])\n            else:\n                for i in range(len(obj_0)):\n                    recursive_compare(obj_0[i], obj_1[i])\n\n        assert len(dump_0) == len(dump_1)\n        for i in range(len(dump_0)):\n            obj_0 = json.loads(dump_0[i])\n            obj_1 = json.loads(dump_1[i])\n            recursive_compare(obj_0, obj_1)\n\n    def test_sycl_training_continuation_binary(self):\n        self.run_training_continuation(False)\n\n    def test_sycl_training_continuation_json(self):\n        self.run_training_continuation(True)\n"
  },
  {
    "path": "tests/python-sycl/test_sycl_updaters.py",
    "content": "import numpy as np\nimport gc\nimport pytest\nimport xgboost as xgb\nfrom hypothesis import given, strategies, assume, settings, note\n\nimport sys\nimport os\n\n# sys.path.append(\"tests/python\")\n# import testing as tm\nfrom xgboost import testing as tm\n\nparameter_strategy = strategies.fixed_dictionaries(\n    {\n        \"max_depth\": strategies.integers(0, 11),\n        \"max_leaves\": strategies.integers(0, 256),\n        \"max_bin\": strategies.integers(2, 1024),\n        \"grow_policy\": strategies.sampled_from([\"lossguide\", \"depthwise\"]),\n        \"single_precision_histogram\": strategies.booleans(),\n        \"min_child_weight\": strategies.floats(0.5, 2.0),\n        \"seed\": strategies.integers(0, 10),\n        # We cannot enable subsampling as the training loss can increase\n        # 'subsample': strategies.floats(0.5, 1.0),\n        \"colsample_bytree\": strategies.floats(0.5, 1.0),\n        \"colsample_bylevel\": strategies.floats(0.5, 1.0),\n    }\n).filter(\n    lambda x: (x[\"max_depth\"] > 0 or x[\"max_leaves\"] > 0)\n    and (x[\"max_depth\"] > 0 or x[\"grow_policy\"] == \"lossguide\")\n)\n\n\ndef train_result(param, dmat, num_rounds):\n    result = {}\n    xgb.train(\n        param,\n        dmat,\n        num_rounds,\n        [(dmat, \"train\")],\n        verbose_eval=False,\n        evals_result=result,\n    )\n    return result\n\n\nclass TestSYCLUpdaters:\n    @given(parameter_strategy, strategies.integers(1, 5), tm.make_dataset_strategy())\n    @settings(deadline=None)\n    def test_sycl_hist(self, param, num_rounds, dataset):\n        param[\"tree_method\"] = \"hist\"\n        param[\"device\"] = \"sycl\"\n        param[\"verbosity\"] = 0\n        param = dataset.set_params(param)\n        result = train_result(param, dataset.get_dmat(), num_rounds)\n        note(result)\n        assert tm.non_increasing(result[\"train\"][dataset.metric])\n\n    @given(tm.make_dataset_strategy(), strategies.integers(0, 1))\n    @settings(deadline=None)\n    def test_specified_device_id_sycl_update(self, dataset, device_id):\n        # Read the list of sycl-devicese\n        sycl_ls = os.popen(\"sycl-ls\").read()\n        devices = sycl_ls.split(\"\\n\")\n\n        # Test should launch only on gpu\n        # Find gpus in the list of devices\n        # and use the id in the list insteard of device_id\n        target_device_type = \"opencl:gpu\"\n        found_devices = 0\n        for idx in range(len(devices)):\n            if len(devices[idx]) >= len(target_device_type):\n                if devices[idx][1 : 1 + len(target_device_type)] == target_device_type:\n                    if found_devices == device_id:\n                        param = {\"device\": f\"sycl:gpu:{idx}\"}\n                        param = dataset.set_params(param)\n                        result = train_result(param, dataset.get_dmat(), 10)\n                        assert tm.non_increasing(result[\"train\"][dataset.metric])\n                    else:\n                        found_devices += 1\n"
  },
  {
    "path": "tests/python-sycl/test_sycl_with_sklearn.py",
    "content": "import xgboost as xgb\nimport pytest\nimport sys\nimport numpy as np\n\nfrom xgboost import testing as tm\n\npytestmark = pytest.mark.skipif(**tm.no_sklearn())\n\nrng = np.random.RandomState(1994)\n\n\ndef test_sycl_binary_classification():\n    from sklearn.datasets import load_digits\n    from sklearn.model_selection import KFold\n\n    digits = load_digits(n_class=2)\n    y = digits[\"target\"]\n    X = digits[\"data\"]\n    kf = KFold(n_splits=2, shuffle=True, random_state=rng)\n    for cls in (xgb.XGBClassifier, xgb.XGBRFClassifier):\n        for train_index, test_index in kf.split(X, y):\n            xgb_model = cls(random_state=42, device=\"sycl\", n_estimators=4).fit(\n                X[train_index], y[train_index]\n            )\n            preds = xgb_model.predict(X[test_index])\n            labels = y[test_index]\n            err = sum(\n                1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]\n            ) / float(len(preds))\n            print(preds)\n            print(labels)\n            print(err)\n            assert err < 0.1\n"
  },
  {
    "path": "tests/test_distributed/__init__.py",
    "content": ""
  },
  {
    "path": "tests/test_distributed/test_federated/test_federated.py",
    "content": "import pytest\n\nfrom xgboost.testing.federated import run_federated_learning\n\n\n@pytest.mark.parametrize(\"with_ssl\", [True, False])\ndef test_federated_learning(with_ssl: bool) -> None:\n    run_federated_learning(with_ssl, False, __file__)\n"
  },
  {
    "path": "tests/test_distributed/test_gpu_federated/test_gpu_federated.py",
    "content": "import pytest\n\nfrom xgboost.testing.federated import run_federated_learning\n\n\n@pytest.mark.parametrize(\"with_ssl\", [True, False])\n@pytest.mark.mgpu\ndef test_federated_learning(with_ssl: bool) -> None:\n    run_federated_learning(with_ssl, True, __file__)\n"
  },
  {
    "path": "tests/test_distributed/test_gpu_with_dask/__init__.py",
    "content": "\n"
  },
  {
    "path": "tests/test_distributed/test_gpu_with_dask/conftest.py",
    "content": "from typing import Any, Generator, Sequence\n\nimport pytest\n\nfrom xgboost import testing as tm\n\n\n@pytest.fixture(scope=\"session\", autouse=True)\ndef setup_rmm_pool(request: Any, pytestconfig: pytest.Config) -> None:\n    tm.setup_rmm_pool(request, pytestconfig)\n\n\n@pytest.fixture(scope=\"class\")\ndef local_cuda_client(request: Any, pytestconfig: pytest.Config) -> Generator:\n    kwargs = {}\n    if hasattr(request, \"param\"):\n        kwargs.update(request.param)\n    if pytestconfig.getoption(\"--use-rmm-pool\"):\n        if tm.no_rmm()[\"condition\"]:\n            raise ImportError(\"The --use-rmm-pool option requires the RMM package\")\n        import rmm\n\n        kwargs[\"rmm_pool_size\"] = \"2GB\"\n    if tm.no_dask_cuda()[\"condition\"]:\n        raise ImportError(\"The local_cuda_cluster fixture requires dask_cuda package\")\n    from dask.distributed import Client\n    from dask_cuda import LocalCUDACluster\n\n    yield Client(LocalCUDACluster(**kwargs))\n\n\ndef pytest_addoption(parser: pytest.Parser) -> None:\n    parser.addoption(\n        \"--use-rmm-pool\", action=\"store_true\", default=False, help=\"Use RMM pool\"\n    )\n\n\ndef pytest_collection_modifyitems(config: pytest.Config, items: Sequence) -> None:\n    # mark dask tests as `mgpu`.\n    mgpu_mark = pytest.mark.mgpu\n    for item in items:\n        item.add_marker(mgpu_mark)\n"
  },
  {
    "path": "tests/test_distributed/test_gpu_with_dask/test_gpu_demos.py",
    "content": "import os\nimport subprocess\n\nimport pytest\n\nfrom xgboost import testing as tm\n\npytestmark = [\n    pytest.mark.skipif(**tm.no_dask()),\n    pytest.mark.skipif(**tm.no_dask_cuda()),\n    tm.timeout(60),\n]\n\n\n@pytest.mark.skipif(**tm.no_cupy())\n@pytest.mark.mgpu\ndef test_dask_training() -> None:\n    script = os.path.join(tm.demo_dir(__file__), \"dask\", \"gpu_training.py\")\n    cmd = [\"python\", script]\n    subprocess.check_call(cmd)\n\n\n@pytest.mark.mgpu\ndef test_dask_sklearn_demo() -> None:\n    script = os.path.join(tm.demo_dir(__file__), \"dask\", \"sklearn_gpu_training.py\")\n    cmd = [\"python\", script]\n    subprocess.check_call(cmd)\n\n\n@pytest.mark.mgpu\n@pytest.mark.skipif(**tm.no_cupy())\ndef test_forward_logging_demo() -> None:\n    script = os.path.join(tm.demo_dir(__file__), \"dask\", \"forward_logging.py\")\n    cmd = [\"python\", script]\n    subprocess.check_call(cmd)\n"
  },
  {
    "path": "tests/test_distributed/test_gpu_with_dask/test_gpu_external_memory.py",
    "content": "\"\"\"Copyright 2024-2025, XGBoost contributors\"\"\"\n\nfrom functools import partial, update_wrapper\nfrom typing import Any\n\nimport pytest\nfrom dask_cuda import LocalCUDACluster\nfrom distributed import Client\n\nimport xgboost as xgb\nfrom xgboost import collective as coll\nfrom xgboost import testing as tm\nfrom xgboost.testing.dask import check_external_memory, get_rabit_args\nfrom xgboost.tracker import RabitTracker\n\n\n@pytest.mark.parametrize(\"is_qdm\", [True, False])\ndef test_external_memory(is_qdm: bool) -> None:\n    n_workers = 2\n    with LocalCUDACluster(n_workers=2) as cluster:\n        with Client(cluster) as client:\n            args = get_rabit_args(client, 2)\n            futs = client.map(\n                check_external_memory,\n                range(n_workers),\n                n_workers=n_workers,\n                device=\"cuda\",\n                comm_args=args,\n                is_qdm=is_qdm,\n            )\n            client.gather(futs)\n\n\n@pytest.mark.skipif(**tm.no_loky())\ndef test_extmem_qdm_distributed() -> None:\n    from loky import get_reusable_executor\n\n    n_samples_per_batch = 2048\n    n_features = 128\n    n_batches = 8\n\n    def do_train(ordinal: int) -> None:\n        it = tm.IteratorForTest(\n            *tm.make_batches(n_samples_per_batch, n_features, n_batches, use_cupy=True),\n            cache=\"cache\",\n            on_host=True,\n        )\n\n        Xy = xgb.ExtMemQuantileDMatrix(it)\n        results: dict[str, Any] = {}\n        booster = xgb.train(\n            {\"device\": f\"cuda:{ordinal}\"},\n            num_boost_round=2,\n            dtrain=Xy,\n            evals=[(Xy, \"Train\")],\n            evals_result=results,\n        )\n        assert tm.non_increasing(results[\"Train\"][\"rmse\"])\n\n    tracker = RabitTracker(host_ip=\"127.0.0.1\", n_workers=2)\n    tracker.start()\n    args = tracker.worker_args()\n\n    def local_test(worker_id: int, rabit_args: dict) -> None:\n        import cupy as cp\n\n        cp.cuda.runtime.setDevice(worker_id)\n\n        with coll.CommunicatorContext(**rabit_args, DMLC_TASK_ID=str(worker_id)):\n            assert coll.get_rank() == worker_id\n            do_train(coll.get_rank())\n\n    n_workers = 2\n    fn = update_wrapper(partial(local_test, rabit_args=args), local_test)\n    with get_reusable_executor(max_workers=n_workers) as pool:\n        results = pool.map(fn, range(n_workers))\n"
  },
  {
    "path": "tests/test_distributed/test_gpu_with_dask/test_gpu_ranking.py",
    "content": "\"\"\"Copyright 2024, XGBoost contributors\"\"\"\n\nimport dask\nimport pytest\nfrom distributed import Client\n\nfrom xgboost import testing as tm\nfrom xgboost.testing import dask as dtm\n\npytestmark = [\n    pytest.mark.skipif(**tm.no_dask()),\n    pytest.mark.skipif(**tm.no_dask_cuda()),\n    tm.timeout(120),\n]\n\n\n@pytest.mark.filterwarnings(\"error\")\ndef test_no_group_split(local_cuda_client: Client) -> None:\n    with dask.config.set(\n        {\n            \"array.backend\": \"cupy\",\n            \"dataframe.backend\": \"cudf\",\n        }\n    ):\n        dtm.check_no_group_split(local_cuda_client, \"cuda\")\n"
  },
  {
    "path": "tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py",
    "content": "\"\"\"Copyright 2019-2026, XGBoost contributors\"\"\"\n\nimport asyncio\nimport json\nfrom collections import OrderedDict\nfrom inspect import signature\nfrom pathlib import Path\nfrom typing import Any, Dict, List, Type, TypeVar\n\nimport numpy as np\nimport pytest\nimport xgboost as xgb\nfrom hypothesis import given, note, settings, strategies\nfrom hypothesis._settings import duration\nfrom packaging.version import parse as parse_version\nfrom xgboost import testing as tm\nfrom xgboost.collective import CommunicatorContext\nfrom xgboost.testing.dask import get_rabit_args, make_categorical, run_recode\nfrom xgboost.testing.params import hist_parameter_strategy\n\nfrom ..test_with_dask.test_with_dask import (\n    generate_array,\n    run_auc,\n    run_boost_from_prediction,\n    run_boost_from_prediction_multi_class,\n    run_categorical,\n    run_dask_classifier,\n    run_empty_dmatrix_auc,\n    run_empty_dmatrix_cls,\n    run_empty_dmatrix_reg,\n    run_tree_stats,\n    suppress,\n)\nfrom ..test_with_dask.test_with_dask import kCols as random_cols\n\npytestmark = [\n    pytest.mark.skipif(**tm.no_dask()),\n    pytest.mark.skipif(**tm.no_dask_cuda()),\n    tm.timeout(180),\n]\n\nimport cudf\nimport dask\nimport dask.dataframe as dd\nfrom dask import __version__ as dask_version\nfrom dask import array as da\nfrom dask.distributed import Client\nfrom dask_cuda import LocalCUDACluster\nfrom xgboost import dask as dxgb\nfrom xgboost.testing.dask import check_init_estimation, check_uneven_nan\n\ndask_version_ge110 = dask_version and parse_version(dask_version) >= parse_version(\n    \"2024.11.0\"\n)\n\n\ndef run_with_dask_dataframe(DMatrixT: Type, client: Client) -> None:\n    import cupy as cp\n\n    cp.cuda.runtime.setDevice(0)\n    _X, _y, _ = generate_array()\n\n    X = dd.from_dask_array(_X).to_backend(\"cudf\")\n    y = dd.from_dask_array(_y).to_backend(\"cudf\")\n\n    dtrain = DMatrixT(client, X, y)\n    out = dxgb.train(\n        client,\n        {\"tree_method\": \"hist\", \"debug_synchronize\": True, \"device\": \"cuda\"},\n        dtrain=dtrain,\n        evals=[(dtrain, \"X\")],\n        num_boost_round=4,\n    )\n\n    assert isinstance(out[\"booster\"], dxgb.Booster)\n    assert len(out[\"history\"][\"X\"][\"rmse\"]) == 4\n\n    predictions = dxgb.predict(client, out, dtrain)\n    assert isinstance(predictions.compute(), np.ndarray)\n\n    series_predictions = dxgb.inplace_predict(client, out, X)\n    assert isinstance(series_predictions, dd.Series)\n\n    single_node = out[\"booster\"].predict(xgb.DMatrix(X.compute()))\n\n    cp.testing.assert_allclose(single_node, predictions.compute())\n    np.testing.assert_allclose(single_node, series_predictions.compute().to_numpy())\n\n    predt = dxgb.predict(client, out, X)\n    assert isinstance(predt, dd.Series)\n\n    T = TypeVar(\"T\")\n\n    def is_df(part: T) -> T:\n        assert isinstance(part, cudf.DataFrame), part\n        return part\n\n    predt.map_partitions(is_df, meta=dd.utils.make_meta({\"prediction\": \"f4\"}))\n\n    cp.testing.assert_allclose(predt.values.compute(), single_node)\n\n    # Work around https://github.com/dmlc/xgboost/issues/10752\n    X.columns = X.columns.astype(\"object\")\n    # Make sure the output can be integrated back to original dataframe\n    X[\"predict\"] = predictions\n    X[\"inplace_predict\"] = series_predictions\n\n    has_null = X.isnull().values.any().compute()\n    assert bool(has_null) is False\n\n\ndef run_with_dask_array(DMatrixT: Type, client: Client) -> None:\n    import cupy as cp\n\n    cp.cuda.runtime.setDevice(0)\n    X, y, _ = generate_array()\n\n    X = X.map_blocks(cp.asarray)  # type: ignore\n    y = y.map_blocks(cp.asarray)  # type: ignore\n    dtrain = DMatrixT(client, X, y)\n    out = dxgb.train(\n        client,\n        {\"tree_method\": \"hist\", \"debug_synchronize\": True, \"device\": \"cuda\"},\n        dtrain=dtrain,\n        evals=[(dtrain, \"X\")],\n        num_boost_round=2,\n    )\n    from_dmatrix = dxgb.predict(client, out, dtrain).compute()\n    assert (\n        json.loads(out[\"booster\"].save_config())[\"learner\"][\"gradient_booster\"][\n            \"updater\"\n        ][0][\"name\"]\n        == \"grow_gpu_hist\"\n    )\n    inplace_predictions = dxgb.inplace_predict(client, out, X).compute()\n    single_node = out[\"booster\"].predict(xgb.DMatrix(X.compute()))\n    np.testing.assert_allclose(single_node, from_dmatrix)\n    device = cp.cuda.runtime.getDevice()\n    assert device == inplace_predictions.device.id\n    single_node = cp.array(single_node)\n    assert device == single_node.device.id\n    cp.testing.assert_allclose(single_node, inplace_predictions)\n\n\ndef to_cp(x: Any, DMatrixT: Type) -> Any:\n    import cupy\n\n    if isinstance(x, np.ndarray) and DMatrixT is dxgb.DaskQuantileDMatrix:\n        X = cupy.array(x)\n    else:\n        X = x\n    return X\n\n\ndef run_gpu_hist(\n    params: Dict,\n    num_rounds: int,\n    dataset: tm.TestDataset,\n    DMatrixT: Type,\n    client: Client,\n) -> None:\n    params[\"device\"] = \"cuda\"\n    params = dataset.set_params(params)\n    # It doesn't make sense to distribute a completely\n    # empty dataset.\n    if dataset.X.shape[0] == 0:\n        return\n\n    chunk = 128\n    X = to_cp(dataset.X, DMatrixT)\n    X = da.from_array(X, chunks=(chunk, dataset.X.shape[1]))\n    y = to_cp(dataset.y, DMatrixT)\n    y_chunk = chunk if len(dataset.y.shape) == 1 else (chunk, dataset.y.shape[1])\n    y = da.from_array(y, chunks=y_chunk)\n\n    if dataset.w is not None:\n        w = to_cp(dataset.w, DMatrixT)\n        w = da.from_array(w, chunks=(chunk,))\n    else:\n        w = None\n\n    if DMatrixT is dxgb.DaskQuantileDMatrix:\n        m = DMatrixT(\n            client, data=X, label=y, weight=w, max_bin=params.get(\"max_bin\", 256)\n        )\n    else:\n        m = DMatrixT(client, data=X, label=y, weight=w)\n    history = dxgb.train(\n        client,\n        params=params,\n        dtrain=m,\n        num_boost_round=num_rounds,\n        evals=[(m, \"train\")],\n    )[\"history\"][\"train\"][dataset.metric]\n    note(str(history))\n\n    # See note on `ObjFunction::UpdateTreeLeaf`.\n    update_leaf = dataset.name.endswith(\"-l1\")\n    if update_leaf:\n        assert history[0] + 1e-2 >= history[-1]\n        return\n    else:\n        assert tm.non_increasing(history)\n\n\ndef test_tree_stats() -> None:\n    with LocalCUDACluster(n_workers=1) as cluster:\n        with Client(cluster) as client:\n            local = run_tree_stats(client, \"hist\", \"cuda\")\n\n    with LocalCUDACluster(n_workers=2) as cluster:\n        with Client(cluster) as client:\n            distributed = run_tree_stats(client, \"hist\", \"cuda\")\n\n    assert local == distributed\n\n\nclass TestDistributedGPU:\n    @pytest.mark.skipif(**tm.no_cudf())\n    def test_boost_from_prediction(self, local_cuda_client: Client) -> None:\n        from sklearn.datasets import load_breast_cancer, load_iris\n\n        X_, y_ = load_breast_cancer(return_X_y=True)\n        X = dd.from_array(X_, chunksize=100).to_backend(\"cudf\")\n        y = dd.from_array(y_, chunksize=100).to_backend(\"cudf\")\n        run_boost_from_prediction(X, y, \"hist\", \"cuda\", local_cuda_client)\n\n        X_, y_ = load_iris(return_X_y=True)\n        X = dd.from_array(X_, chunksize=50).to_backend(\"cudf\")\n        y = dd.from_array(y_, chunksize=50).to_backend(\"cudf\")\n        run_boost_from_prediction_multi_class(X, y, \"hist\", \"cuda\", local_cuda_client)\n\n    def test_init_estimation(self, local_cuda_client: Client) -> None:\n        check_init_estimation(\"hist\", \"cuda\", local_cuda_client)\n\n    def test_uneven_nan(self) -> None:\n        n_workers = 2\n        with LocalCUDACluster(n_workers=n_workers) as cluster:\n            with Client(cluster) as client:\n                check_uneven_nan(client, \"hist\", \"cuda\", n_workers)\n\n    @pytest.mark.skipif(**tm.no_dask_cudf())\n    @pytest.mark.xfail(reason=\"Incompatible with Dask 2025.2.0+\")\n    def test_dask_dataframe(self, local_cuda_client: Client) -> None:\n        run_with_dask_dataframe(dxgb.DaskDMatrix, local_cuda_client)\n        run_with_dask_dataframe(dxgb.DaskQuantileDMatrix, local_cuda_client)\n\n    @given(\n        params=hist_parameter_strategy,\n        num_rounds=strategies.integers(1, 20),\n        dataset=tm.make_dataset_strategy(),\n        dmatrix_type=strategies.sampled_from(\n            [dxgb.DaskDMatrix, dxgb.DaskQuantileDMatrix]\n        ),\n    )\n    @settings(\n        deadline=duration(seconds=120),\n        max_examples=20,\n        suppress_health_check=suppress,\n        print_blob=True,\n    )\n    @pytest.mark.skipif(**tm.no_cupy())\n    def test_gpu_hist(\n        self,\n        params: Dict,\n        num_rounds: int,\n        dataset: tm.TestDataset,\n        dmatrix_type: type,\n        local_cuda_client: Client,\n    ) -> None:\n        params[\"tree_method\"] = \"hist\"\n        run_gpu_hist(params, num_rounds, dataset, dmatrix_type, local_cuda_client)\n\n    @given(\n        params=hist_parameter_strategy,\n        num_rounds=strategies.integers(1, 20),\n        dataset=tm.make_dataset_strategy(),\n    )\n    @settings(\n        deadline=duration(seconds=120),\n        max_examples=20,\n        suppress_health_check=suppress,\n        print_blob=True,\n    )\n    @pytest.mark.skipif(**tm.no_cupy())\n    def test_gpu_approx(\n        self,\n        params: Dict,\n        num_rounds: int,\n        dataset: tm.TestDataset,\n        local_cuda_client: Client,\n    ) -> None:\n        params[\"tree_method\"] = \"approx\"\n        run_gpu_hist(params, num_rounds, dataset, dxgb.DaskDMatrix, local_cuda_client)\n\n    def test_empty_quantile_dmatrix(self, local_cuda_client: Client) -> None:\n        client = local_cuda_client\n\n        X, y = make_categorical(client, 1, 30, 13)\n        X_valid, y_valid = make_categorical(client, 10000, 30, 13)\n\n        Xy = dxgb.DaskQuantileDMatrix(client, X, y)\n        Xy_valid = dxgb.DaskQuantileDMatrix(client, X_valid, y_valid, ref=Xy)\n        # The error is from a worker. Dask cannot prioritize which worker's error to\n        # propagate, it could be the emtpy DMatrix error or the collective communication\n        # error. As a result, the test doesn't match the error message.\n        with pytest.raises(ValueError):\n            dxgb.train(\n                client,\n                {\"tree_method\": \"hist\", \"device\": \"cuda\", \"debug_synchronize\": True},\n                Xy,\n                num_boost_round=10,\n                evals=[(Xy_valid, \"Valid\")],\n            )\n\n    @pytest.mark.skipif(**tm.no_cupy())\n    def test_dask_array(self, local_cuda_client: Client) -> None:\n        run_with_dask_array(dxgb.DaskDMatrix, local_cuda_client)\n        run_with_dask_array(dxgb.DaskQuantileDMatrix, local_cuda_client)\n\n    @pytest.mark.skipif(**tm.no_cupy())\n    def test_early_stopping(self, local_cuda_client: Client) -> None:\n        from sklearn.datasets import load_breast_cancer\n\n        X, y = load_breast_cancer(return_X_y=True)\n        X, y = da.from_array(X), da.from_array(y)\n\n        m = dxgb.DaskDMatrix(local_cuda_client, X, y)\n\n        valid = dxgb.DaskDMatrix(local_cuda_client, X, y)\n        early_stopping_rounds = 5\n        booster = dxgb.train(\n            local_cuda_client,\n            {\n                \"objective\": \"binary:logistic\",\n                \"eval_metric\": \"error\",\n                \"tree_method\": \"hist\",\n                \"device\": \"cuda\",\n            },\n            m,\n            evals=[(valid, \"Valid\")],\n            num_boost_round=1000,\n            early_stopping_rounds=early_stopping_rounds,\n        )[\"booster\"]\n        assert hasattr(booster, \"best_score\")\n        dump = booster.get_dump(dump_format=\"json\")\n        assert len(dump) - booster.best_iteration == early_stopping_rounds + 1\n\n        valid_X = X\n        valid_y = y\n        cls = dxgb.DaskXGBClassifier(\n            objective=\"binary:logistic\",\n            tree_method=\"hist\",\n            device=\"cuda\",\n            eval_metric=\"error\",\n            n_estimators=100,\n            early_stopping_rounds=early_stopping_rounds,\n        )\n        cls.client = local_cuda_client\n        cls.fit(\n            X,\n            y,\n            eval_set=[(valid_X, valid_y)],\n        )\n        booster = cls.get_booster()\n        dump = booster.get_dump(dump_format=\"json\")\n        assert len(dump) - booster.best_iteration == early_stopping_rounds + 1\n\n    @pytest.mark.xfail(\n        dask_version_ge110, reason=\"Test cannot pass with Dask 2024.11.0+\"\n    )\n    @pytest.mark.skipif(**tm.no_cudf())\n    @pytest.mark.parametrize(\"model\", [\"boosting\"])\n    def test_dask_classifier(self, model: str, local_cuda_client: Client) -> None:\n        X_, y_, w_ = generate_array(with_weights=True)\n        y_ = (y_ * 10).astype(np.int32)\n        X = dd.from_dask_array(X_).to_backend(\"cudf\")\n        y = dd.from_dask_array(y_).to_backend(\"cudf\")\n        w = dd.from_dask_array(w_).to_backend(\"cudf\")\n        run_dask_classifier(X, y, w, model, \"hist\", \"cuda\", local_cuda_client, 10)\n\n    def test_empty_dmatrix(self, local_cuda_client: Client) -> None:\n        parameters = {\n            \"tree_method\": \"hist\",\n            \"debug_synchronize\": True,\n            \"device\": \"cuda\",\n        }\n        run_empty_dmatrix_reg(local_cuda_client, parameters)\n        run_empty_dmatrix_cls(local_cuda_client, parameters)\n\n    @pytest.mark.skipif(**tm.no_dask_cudf())\n    def test_empty_partition(self, local_cuda_client: Client) -> None:\n        import cudf\n        import cupy\n        import dask_cudf\n\n        mult = 100\n        df = cudf.DataFrame(\n            {\n                \"a\": [1, 2, 3, 4, 5.1] * mult,\n                \"b\": [10, 15, 29.3, 30, 31] * mult,\n                \"y\": [10, 20, 30, 40.0, 50] * mult,\n            }\n        )\n        parameters = {\n            \"tree_method\": \"hist\",\n            \"debug_synchronize\": True,\n            \"device\": \"cuda\",\n        }\n\n        empty = df.iloc[:0]\n        ddf = dask_cudf.concat(\n            [dask_cudf.from_cudf(empty, npartitions=1)]\n            + [dask_cudf.from_cudf(df, npartitions=3)]\n            + [dask_cudf.from_cudf(df, npartitions=3)]\n        )\n        X = ddf[ddf.columns.difference([\"y\"])]\n        y = ddf[[\"y\"]]\n        dtrain = dxgb.DaskQuantileDMatrix(local_cuda_client, X, y)\n        bst_empty = dxgb.train(\n            local_cuda_client, parameters, dtrain, evals=[(dtrain, \"train\")]\n        )\n        predt_empty = dxgb.predict(local_cuda_client, bst_empty, X).compute().values\n\n        ddf = dask_cudf.concat(\n            [dask_cudf.from_cudf(df, npartitions=3)]\n            + [dask_cudf.from_cudf(df, npartitions=3)]\n        )\n        X = ddf[ddf.columns.difference([\"y\"])]\n        y = ddf[[\"y\"]]\n        dtrain = dxgb.DaskQuantileDMatrix(local_cuda_client, X, y)\n        bst = dxgb.train(\n            local_cuda_client, parameters, dtrain, evals=[(dtrain, \"train\")]\n        )\n\n        predt = dxgb.predict(local_cuda_client, bst, X).compute().values\n        cupy.testing.assert_allclose(predt, predt_empty)\n\n        predt = dxgb.predict(local_cuda_client, bst, dtrain).compute()\n        cupy.testing.assert_allclose(predt, predt_empty)\n\n        predt = dxgb.inplace_predict(local_cuda_client, bst, X).compute().values\n        cupy.testing.assert_allclose(predt, predt_empty)\n\n        df = df.to_pandas()\n        empty = df.iloc[:0]\n        ddf = dd.concat(\n            [dd.from_pandas(empty, npartitions=1)]\n            + [dd.from_pandas(df, npartitions=3)]\n            + [dd.from_pandas(df, npartitions=3)]\n        )\n        X = ddf[ddf.columns.difference([\"y\"])]\n        y = ddf[[\"y\"]]\n\n        predt_empty = cupy.asnumpy(predt_empty)\n\n        predt = dxgb.predict(local_cuda_client, bst_empty, X).compute().values\n        np.testing.assert_allclose(predt, predt_empty)\n\n        in_predt = (\n            dxgb.inplace_predict(local_cuda_client, bst_empty, X).compute().values\n        )\n        np.testing.assert_allclose(predt, in_predt)\n\n    def test_empty_dmatrix_auc(self, local_cuda_client: Client) -> None:\n        n_workers = len(tm.dask.get_client_workers(local_cuda_client))\n        run_empty_dmatrix_auc(local_cuda_client, \"cuda\", n_workers)\n\n    def test_auc(self, local_cuda_client: Client) -> None:\n        run_auc(local_cuda_client, \"cuda\")\n\n    def test_invalid_ordinal(self, local_cuda_client: Client) -> None:\n        \"\"\"One should not specify the device ordinal with dask.\"\"\"\n        with pytest.raises(ValueError, match=\"device=cuda\"):\n            X, y, _ = generate_array()\n            m = dxgb.DaskDMatrix(local_cuda_client, X, y)\n            dxgb.train(local_cuda_client, {\"device\": \"cuda:0\"}, m)\n\n        booster = dxgb.train(local_cuda_client, {\"device\": \"cuda\"}, m)[\"booster\"]\n        assert (\n            json.loads(booster.save_config())[\"learner\"][\"generic_param\"][\"device\"]\n            == \"cuda:0\"\n        )\n\n    def test_data_initialization(self, local_cuda_client: Client) -> None:\n        X, y, _ = generate_array()\n        fw = da.random.random((random_cols,))\n        fw = fw - fw.min()\n        m = dxgb.DaskDMatrix(local_cuda_client, X, y, feature_weights=fw)\n\n        workers = tm.dask.get_client_workers(local_cuda_client)\n        rabit_args = get_rabit_args(local_cuda_client, len(workers))\n\n        def worker_fn(worker_addr: str, data_ref: Dict) -> None:\n            with dxgb.CommunicatorContext(**rabit_args):\n                from xgboost.dask.data import _dmatrix_from_list_of_parts\n\n                local_dtrain = _dmatrix_from_list_of_parts(\n                    **data_ref, nthread=7, model=None, Xy_cats=None\n                )\n                fw_rows = local_dtrain.get_float_info(\"feature_weights\").shape[0]\n                assert fw_rows == local_dtrain.num_col()\n\n        futures = []\n        for i in range(len(workers)):\n            futures.append(\n                local_cuda_client.submit(\n                    worker_fn,\n                    workers[i],\n                    m._create_fn_args(workers[i]),\n                    pure=False,\n                    workers=[workers[i]],\n                )\n            )\n        local_cuda_client.gather(futures)\n\n    def test_interface_consistency(self) -> None:\n        \"\"\"Check parameters are roughly the same between various DMatrices, with the\n        same ordering.\n\n        \"\"\"\n\n        def comp_dm_qdm(dm_names: List[str], qdm_names: List[str]) -> None:\n            qdm_only = {\"max_bin\", \"ref\", \"max_quantile_batches\"}\n            assert len(dm_names) == len(qdm_names) - len(qdm_only)\n            i, j = 0, 0\n            while i < len(dm_names) and j < len(qdm_names):\n                if qdm_names[j] in qdm_only:\n                    j += 1\n                    continue\n                assert dm_names[i] == qdm_names[j]\n                i += 1\n                j += 1\n\n        # DaskDMatrix <-> DaskQuantileDMatrix\n        sig = OrderedDict(signature(dxgb.DaskDMatrix).parameters)\n        ddm_names = list(sig.keys())\n\n        sig = OrderedDict(signature(dxgb.DaskQuantileDMatrix).parameters)\n        dqdm_names = list(sig.keys())\n\n        comp_dm_qdm(ddm_names, dqdm_names)\n\n        # DMatrix <-> QuantileDMatrix\n        sig = OrderedDict(signature(xgb.DMatrix).parameters)\n        dm_names = list(sig.keys())\n        sig = OrderedDict(signature(xgb.QuantileDMatrix).parameters)\n        qdm_names = list(sig.keys())\n        comp_dm_qdm(dm_names, qdm_names)\n\n        def comp_dm_ddm(dm_names: List[str], ddm_names: List[str]) -> None:\n            dm_only = {\"nthread\", \"data_split_mode\"}\n            ddm_only = {\"client\"}\n            assert len(dm_names) - len(dm_only) == len(ddm_names) - len(ddm_only)\n            i, j = 0, 0\n            while i < len(dm_names) and j < len(ddm_names):\n                if dm_names[i] in dm_only:\n                    i += 1\n                    continue\n                elif ddm_names[j] in ddm_only:\n                    j += 1\n                    continue\n                assert dm_names[i] == ddm_names[j]\n                i += 1\n                j += 1\n\n        # DaskDMatrix <-> DMatrix\n        comp_dm_ddm(dm_names, ddm_names)\n\n        # QuantileDMatrix <-> DaskQuantileDMatrix\n        comp_dm_ddm(qdm_names, dqdm_names)\n\n        sig = OrderedDict(signature(xgb.XGBRanker.fit).parameters)\n        ranker_names = list(sig.keys())\n        sig = OrderedDict(signature(dxgb.DaskXGBRanker.fit).parameters)\n        dranker_names = list(sig.keys())\n\n        for rn, drn in zip(ranker_names, dranker_names):\n            assert rn == drn\n\n\n@pytest.mark.skipif(**tm.no_dask_cudf())\ndef test_categorical(tmp_path: Path, local_cuda_client: Client) -> None:\n    X, y = make_categorical(local_cuda_client, 10000, 30, 13)\n    X = X.to_backend(\"cudf\")\n\n    X_onehot, _ = make_categorical(local_cuda_client, 10000, 30, 13, onehot=True)\n    X_onehot = X_onehot.to_backend(\"cudf\")\n    run_categorical(local_cuda_client, \"hist\", \"cuda\", X, X_onehot, y, tmp_path)\n\n\n@pytest.mark.skipif(**tm.no_dask_cudf())\ndef test_recode(local_cuda_client: Client) -> None:\n    with dask.config.set(\n        {\n            \"array.backend\": \"cupy\",\n            \"dataframe.backend\": \"cudf\",\n        }\n    ):\n        run_recode(local_cuda_client, \"cuda\")\n\n\n@pytest.mark.skipif(**tm.no_cupy())\ndef test_with_asyncio(local_cuda_client: Client) -> None:\n    address = local_cuda_client.scheduler.address\n    output = asyncio.run(run_from_dask_array_asyncio(address))\n    assert isinstance(output[\"booster\"], xgb.Booster)\n    assert isinstance(output[\"history\"], dict)\n\n\n@pytest.mark.skipif(\n    condition=not xgb.build_info()[\"USE_DLOPEN_NCCL\"],\n    reason=\"Not compiled with dlopen.\",\n)\ndef test_invalid_nccl(local_cuda_client: Client) -> None:\n    client = local_cuda_client\n    workers = tm.dask.get_client_workers(client)\n    args = get_rabit_args(client, len(workers))\n\n    def run(wid: int) -> None:\n        ctx = CommunicatorContext(dmlc_nccl_path=\"foo\", **args)\n        X, y, w = tm.make_regression(n_samples=10, n_features=10, use_cupy=True)\n\n        with ctx:\n            with pytest.raises(ValueError, match=r\"pip install\"):\n                xgb.QuantileDMatrix(X, y, weight=w)\n\n    futures = client.map(run, range(len(workers)), workers=workers)\n    client.gather(futures)\n\n\n@pytest.mark.skipif(\n    condition=not xgb.build_info()[\"USE_DLOPEN_NCCL\"],\n    reason=\"Not compiled with dlopen.\",\n)\n@pytest.mark.parametrize(\"tree_method\", [\"hist\", \"approx\"])\ndef test_nccl_load(local_cuda_client: Client, tree_method: str) -> None:\n    X, y, w = tm.make_regression(128, 16, use_cupy=True)\n\n    def make_model() -> None:\n        xgb.XGBRegressor(\n            device=\"cuda\",\n            tree_method=tree_method,\n            objective=\"reg:quantileerror\",\n            verbosity=2,\n            quantile_alpha=[0.2, 0.8],\n        ).fit(X, y, sample_weight=w)\n\n    # no nccl load when using single-node.\n    with tm.captured_output() as (out, err):\n        make_model()\n        assert out.getvalue().find(\"NCCL\") == -1\n        assert err.getvalue().find(\"NCCL\") == -1\n\n    client = local_cuda_client\n    workers = tm.dask.get_client_workers(client)\n    args = get_rabit_args(client, len(workers))\n\n    # nccl is loaded\n    def run(wid: int) -> None:\n        # FIXME(jiamingy): https://github.com/dmlc/xgboost/issues/9147\n        from xgboost._c_api import _LIB, _register_log_callback\n\n        _register_log_callback(_LIB)\n\n        with CommunicatorContext(**args):\n            with tm.captured_output() as (out, err):\n                make_model()\n                assert out.getvalue().find(\"Loaded shared NCCL\") != -1, out.getvalue()\n\n    futures = client.map(run, range(len(workers)), workers=workers)\n    client.gather(futures)\n\n\nasync def run_from_dask_array_asyncio(scheduler_address: str) -> dxgb.TrainReturnT:\n    async with Client(scheduler_address, asynchronous=True) as client:\n        import cupy as cp\n\n        X, y, _ = generate_array()\n        X = X.to_backend(\"cupy\")\n        y = y.to_backend(\"cupy\")\n\n        m: dxgb.DaskDMatrix = await dxgb.DaskQuantileDMatrix(client, X, y)\n        output = await dxgb.train(\n            client, {\"tree_method\": \"hist\", \"device\": \"cuda\"}, dtrain=m\n        )\n\n        with_m = await dxgb.predict(client, output, m)\n        with_X = await dxgb.predict(client, output, X)\n        inplace = await dxgb.inplace_predict(client, output, X)\n        assert isinstance(with_m, da.Array)\n        assert isinstance(with_X, da.Array)\n        assert isinstance(inplace, da.Array)\n\n        cp.testing.assert_allclose(\n            await client.compute(with_m), await client.compute(with_X)\n        )\n        cp.testing.assert_allclose(\n            await client.compute(with_m), await client.compute(inplace)\n        )\n\n        client.shutdown()\n        return output\n"
  },
  {
    "path": "tests/test_distributed/test_with_dask/__init__.py",
    "content": "\n"
  },
  {
    "path": "tests/test_distributed/test_with_dask/conftest.py",
    "content": "\"\"\"Shared fixtures for Dask tests.\"\"\"\n\nfrom __future__ import annotations\n\nimport os\nfrom typing import Any, Dict, Generator\n\nimport pytest\nfrom distributed import Client, LocalCluster\n\n\n@pytest.fixture(scope=\"session\")\ndef client_kwargs(request: pytest.FixtureRequest) -> Dict[str, Any]:\n    n_threads = os.cpu_count()\n    assert n_threads is not None\n    kwargs: Dict[str, Any] = {\n        \"n_workers\": 2,\n        \"threads_per_worker\": max(1, n_threads // 2),\n        \"dashboard_address\": \":0\",\n    }\n    if hasattr(request, \"param\"):\n        kwargs.update(request.param)\n    return kwargs\n\n\n@pytest.fixture(scope=\"session\")\ndef client(client_kwargs: Dict[str, Any]) -> Generator[Client, None, None]:\n    with LocalCluster(**client_kwargs) as dask_cluster:\n        with Client(dask_cluster) as dask_client:\n            yield dask_client\n\n\n@pytest.fixture(autouse=True)\ndef client_as_current(request: pytest.FixtureRequest) -> Generator[None, None, None]:\n    for name in (\"client\", \"client_one_worker\"):\n        if name in request.fixturenames:\n            dask_client = request.getfixturevalue(name)\n            with dask_client.as_current():\n                yield\n            return\n    yield\n\n\n@pytest.fixture(scope=\"session\")\ndef client_one_worker() -> Generator[Client, None, None]:\n    n_threads = os.cpu_count()\n    assert n_threads is not None\n    with LocalCluster(\n        n_workers=1, threads_per_worker=max(1, n_threads), dashboard_address=\":0\"\n    ) as dask_cluster:\n        with Client(dask_cluster) as dask_client:\n            yield dask_client\n"
  },
  {
    "path": "tests/test_distributed/test_with_dask/test_demos.py",
    "content": "import os\nimport subprocess\n\nimport pytest\n\nfrom xgboost import testing as tm\n\n\n@pytest.mark.skipif(**tm.no_dask())\ndef test_dask_cpu_training_demo() -> None:\n    script = os.path.join(tm.demo_dir(__file__), \"dask\", \"cpu_training.py\")\n    cmd = [\"python\", script]\n    subprocess.check_call(cmd)\n\n\n@pytest.mark.skipif(**tm.no_dask())\ndef test_dask_cpu_survival_demo() -> None:\n    script = os.path.join(tm.demo_dir(__file__), \"dask\", \"cpu_survival.py\")\n    cmd = [\"python\", script]\n    subprocess.check_call(cmd)\n\n\n# Not actually run on CI due to missing dask_ml.\n@pytest.mark.skipif(**tm.no_dask())\n@pytest.mark.skipif(**tm.no_dask_ml())\ndef test_dask_callbacks_demo() -> None:\n    script = os.path.join(tm.demo_dir(__file__), \"dask\", \"dask_callbacks.py\")\n    cmd = [\"python\", script]\n    subprocess.check_call(cmd)\n\n\n@pytest.mark.skipif(**tm.no_dask())\ndef test_dask_sklearn_demo() -> None:\n    script = os.path.join(tm.demo_dir(__file__), \"dask\", \"sklearn_cpu_training.py\")\n    cmd = [\"python\", script]\n    subprocess.check_call(cmd)\n"
  },
  {
    "path": "tests/test_distributed/test_with_dask/test_external_memory.py",
    "content": "\"\"\"Copyright 2024, XGBoost contributors\"\"\"\n\nimport pytest\nfrom distributed import Client, Scheduler, Worker\nfrom distributed.utils_test import gen_cluster\n\nfrom xgboost import testing as tm\nfrom xgboost.testing.dask import check_external_memory, get_rabit_args\n\n\n@pytest.mark.parametrize(\"is_qdm\", [True, False])\n@gen_cluster(client=True)\nasync def test_external_memory(\n    client: Client, s: Scheduler, a: Worker, b: Worker, is_qdm: bool\n) -> None:\n    workers = tm.dask.get_client_workers(client)\n    n_workers = len(workers)\n    args = await get_rabit_args(client, n_workers)\n\n    futs = client.map(\n        check_external_memory,\n        range(n_workers),\n        n_workers=n_workers,\n        device=\"cpu\",\n        comm_args=args,\n        is_qdm=is_qdm,\n    )\n    await client.gather(futs)\n"
  },
  {
    "path": "tests/test_distributed/test_with_dask/test_ranking.py",
    "content": "\"\"\"Copyright 2019-2024, XGBoost contributors\"\"\"\n\nimport numpy as np\nimport pytest\nimport scipy.sparse\nfrom dask import dataframe as dd\nfrom distributed import Client\nfrom xgboost import dask as dxgb\nfrom xgboost import testing as tm\nfrom xgboost.testing import dask as dtm\n\n\ndef test_dask_ranking(client: Client) -> None:\n    dpath = \"demo/\"\n    mq2008 = tm.data.get_mq2008(dpath)\n    data = []\n    for d in mq2008:\n        if isinstance(d, scipy.sparse.csr_matrix):\n            d[d == 0] = np.inf\n            d = d.toarray()\n            d[d == 0] = np.nan\n            d[np.isinf(d)] = 0\n            data.append(dd.from_array(d, chunksize=32))\n        else:\n            data.append(dd.from_array(d, chunksize=32))\n\n    (\n        x_train,\n        y_train,\n        qid_train,\n        x_test,\n        y_test,\n        qid_test,\n        x_valid,\n        y_valid,\n        qid_valid,\n    ) = data\n    qid_train = qid_train.astype(np.uint32)\n    qid_valid = qid_valid.astype(np.uint32)\n    qid_test = qid_test.astype(np.uint32)\n\n    rank = dxgb.DaskXGBRanker(\n        learning_rate=0.5,\n        n_estimators=2500,\n        eval_metric=[\"ndcg\"],\n        early_stopping_rounds=5,\n        allow_group_split=True,\n    )\n    rank.fit(\n        x_train,\n        y_train,\n        qid=qid_train,\n        eval_set=[(x_test, y_test), (x_train, y_train)],\n        eval_qid=[qid_test, qid_train],\n        verbose=True,\n    )\n    assert rank.n_features_in_ == 46\n    assert rank.best_score > 0.98\n\n\n@pytest.mark.filterwarnings(\"error\")\ndef test_no_group_split(client: Client) -> None:\n    dtm.check_no_group_split(client, \"cpu\")\n"
  },
  {
    "path": "tests/test_distributed/test_with_dask/test_with_dask.py",
    "content": "\"\"\"Copyright 2019-2026, XGBoost contributors\"\"\"\n\nimport asyncio\nimport json\nimport os\nimport pickle\nimport socket\nfrom concurrent.futures import ThreadPoolExecutor\nfrom functools import partial\nfrom pathlib import Path\nfrom typing import Any, Dict, Literal, Optional, Tuple, Type, Union\n\nimport dask\nimport dask.array as da\nimport dask.dataframe as dd\nimport distributed\nimport hypothesis\nimport numpy as np\nimport pytest\nimport scipy\nimport sklearn\nimport xgboost as xgb\nfrom distributed import Client, LocalCluster, Nanny, Worker\nfrom distributed.scheduler import KilledWorker, Scheduler\nfrom distributed.utils_test import async_poll_for, gen_cluster\nfrom hypothesis import HealthCheck, assume, given, note, settings\nfrom sklearn.datasets import make_classification, make_regression\nfrom sklearn.model_selection import train_test_split\nfrom xgboost import collective as coll\nfrom xgboost import dask as dxgb\nfrom xgboost import testing as tm\nfrom xgboost.collective import Config as CollConfig\nfrom xgboost.dask import DaskDMatrix\nfrom xgboost.testing.dask import (\n    check_init_estimation,\n    check_uneven_nan,\n    get_rabit_args,\n    make_categorical,\n    run_recode,\n)\nfrom xgboost.testing.data import get_california_housing\nfrom xgboost.testing.params import hist_cache_strategy, hist_parameter_strategy\nfrom xgboost.testing.shared import (\n    get_feature_weights,\n    validate_data_initialization,\n    validate_leaf_output,\n)\nfrom xgboost.testing.updater import get_basescore\n\ndask.config.set({\"distributed.scheduler.allowed-failures\": False})\n\npytestmark = tm.timeout(60)\n\n\nif hasattr(HealthCheck, \"function_scoped_fixture\"):\n    suppress = [HealthCheck.function_scoped_fixture]\nelse:\n    suppress = hypothesis.utils.conventions.not_set  # type: ignore\n\n\nkRows = 1000\nkCols = 10\nkWorkers = 5\n\n\ndef generate_array(\n    with_weights: bool = False,\n) -> Tuple[da.Array, da.Array, Optional[da.Array]]:\n    chunk_size = 20\n    rng = da.random.RandomState(1994)\n    X = rng.random_sample((kRows, kCols), chunks=(chunk_size, -1))\n    y = rng.random_sample(kRows, chunks=chunk_size)\n    if with_weights:\n        w = rng.random_sample(kRows, chunks=chunk_size)\n        return X, y, w\n    return X, y, None\n\n\n@pytest.mark.parametrize(\"to_frame\", [True, False])\ndef test_xgbclassifier_classes_type_and_value(to_frame: bool, client: \"Client\") -> None:\n    X, y = make_classification(n_samples=1000, n_features=4, random_state=123)\n    if to_frame:\n        import pandas as pd\n\n        feats = [f\"var_{i}\" for i in range(4)]\n        df = pd.DataFrame(X, columns=feats)\n        df[\"target\"] = y\n        df = dd.from_pandas(df, npartitions=1)\n        X, y = df[feats], df[\"target\"]\n    else:\n        X = da.from_array(X)\n        y = da.from_array(y)\n\n    est = dxgb.DaskXGBClassifier(n_estimators=10).fit(X, y)\n    assert isinstance(est.classes_, np.ndarray)\n    np.testing.assert_array_equal(est.classes_, np.array([0, 1]))\n\n\ndef test_from_dask_dataframe(client: \"Client\") -> None:\n    X_, y_, _ = generate_array()\n\n    X = dd.from_dask_array(X_)\n    y = dd.from_dask_array(y_)\n\n    dtrain = DaskDMatrix(client, X, y)\n    booster = dxgb.train(client, {}, dtrain, num_boost_round=2)[\"booster\"]\n\n    prediction = dxgb.predict(client, model=booster, data=dtrain)\n\n    assert prediction.ndim == 1\n    assert isinstance(prediction, da.Array)\n    assert prediction.shape[0] == kRows\n\n    with pytest.raises(TypeError):\n        # evals_result is not supported in dask interface.\n        dxgb.train(  # type: ignore\n            client, {}, dtrain, num_boost_round=2, evals_result={}\n        )\n    # force prediction to be computed\n    from_dmatrix = prediction.compute()\n\n    prediction = dxgb.predict(client, model=booster, data=X)\n    from_df = prediction.compute()\n\n    assert isinstance(prediction, dd.Series)\n    assert np.all(prediction.compute().values == from_dmatrix)\n    assert np.all(from_dmatrix == from_df.to_numpy())\n\n    series_predictions = dxgb.inplace_predict(client, booster, X)\n    assert isinstance(series_predictions, dd.Series)\n    np.testing.assert_allclose(series_predictions.compute().values, from_dmatrix)\n\n    # Make sure the output can be integrated back to original dataframe\n    X[\"predict\"] = prediction\n    X[\"inplace_predict\"] = series_predictions\n\n    assert bool(X.isnull().values.any().compute()) is False\n\n\ndef test_from_dask_array(client: \"Client\") -> None:\n    X, y, _ = generate_array()\n    dtrain = DaskDMatrix(client, X, y)\n    # results is {'booster': Booster, 'history': {...}}\n    result = dxgb.train(client, {}, dtrain)\n\n    prediction = dxgb.predict(client, result, dtrain)\n    assert prediction.shape[0] == kRows\n\n    assert isinstance(prediction, da.Array)\n    # force prediction to be computed\n    prediction = prediction.compute()\n\n    booster: xgb.Booster = result[\"booster\"]\n    single_node_predt = booster.predict(xgb.DMatrix(X.compute()))\n    np.testing.assert_allclose(prediction, single_node_predt)\n\n    config = json.loads(booster.save_config())\n    scheduler_info = client.scheduler_info()\n    worker_nthreads = next(iter(scheduler_info[\"workers\"].values()))[\"nthreads\"]\n    assert int(config[\"learner\"][\"generic_param\"][\"nthread\"]) == worker_nthreads\n\n    from_arr = dxgb.predict(client, model=booster, data=X)\n\n    assert isinstance(from_arr, da.Array)\n    assert np.all(single_node_predt == from_arr.compute())\n\n\ndef test_dask_sparse(client: \"Client\") -> None:\n    X_, y_ = make_classification(n_samples=1000, n_informative=5, n_classes=3)\n    rng = np.random.default_rng(seed=0)\n    idx = rng.integers(low=0, high=X_.shape[0], size=X_.shape[0] // 4)\n    X_[idx, :] = np.nan\n\n    # numpy\n    X, y = da.from_array(X_), da.from_array(y_)\n    clf = dxgb.DaskXGBClassifier(tree_method=\"hist\", n_estimators=10)\n    clf.client = client\n    clf.fit(X, y, eval_set=[(X, y)])\n    dense_results = clf.evals_result()\n\n    # scipy sparse\n    X, y = da.from_array(X_).map_blocks(scipy.sparse.csr_matrix), da.from_array(y_)\n    clf = dxgb.DaskXGBClassifier(tree_method=\"hist\", n_estimators=10)\n    clf.client = client\n    clf.fit(X, y, eval_set=[(X, y)])\n    sparse_results = clf.evals_result()\n    np.testing.assert_allclose(\n        dense_results[\"validation_0\"][\"mlogloss\"],\n        sparse_results[\"validation_0\"][\"mlogloss\"],\n    )\n\n\ndef run_categorical(\n    client: \"Client\",\n    tree_method: str,\n    device: str,\n    X: dd.DataFrame,\n    X_onehot: dd.DataFrame,\n    y: dd.Series,\n    tmp_path: Path,\n) -> None:\n    # Force onehot\n    parameters = {\n        \"tree_method\": tree_method,\n        \"device\": device,\n        \"max_cat_to_onehot\": 9999,\n    }\n    rounds = 10\n    m = dxgb.DaskDMatrix(client, X_onehot, y)\n    by_etl_results = dxgb.train(\n        client,\n        parameters,\n        m,\n        num_boost_round=rounds,\n        evals=[(m, \"Train\")],\n    )[\"history\"]\n\n    m = dxgb.DaskDMatrix(client, X, y)\n    output = dxgb.train(\n        client,\n        parameters,\n        m,\n        num_boost_round=rounds,\n        evals=[(m, \"Train\")],\n    )\n    by_builtin_results = output[\"history\"]\n\n    np.testing.assert_allclose(\n        np.array(by_etl_results[\"Train\"][\"rmse\"]),\n        np.array(by_builtin_results[\"Train\"][\"rmse\"]),\n        rtol=1e-3,\n    )\n    assert tm.non_increasing(by_builtin_results[\"Train\"][\"rmse\"])\n\n    check_model_counter = [0]\n\n    def check_model_output(model: dxgb.Booster) -> None:\n        path = tmp_path / f\"model_{check_model_counter[0]}.json\"\n        check_model_counter[0] += 1\n        model.save_model(path)\n        with open(path, \"r\") as fd:\n            categorical = json.load(fd)\n\n        categories_sizes = np.array(\n            categorical[\"learner\"][\"gradient_booster\"][\"model\"][\"trees\"][-1][\n                \"categories_sizes\"\n            ]\n        )\n        assert categories_sizes.shape[0] != 0\n        np.testing.assert_allclose(categories_sizes, 1)\n\n    check_model_output(output[\"booster\"])\n    reg = dxgb.DaskXGBRegressor(\n        n_estimators=10,\n        tree_method=tree_method,\n        device=device,\n        # force onehot\n        max_cat_to_onehot=9999,\n    )\n    reg.fit(X, y)\n\n    check_model_output(reg.get_booster())\n\n    reg = dxgb.DaskXGBRegressor(n_estimators=10, tree_method=\"exact\")\n    with pytest.raises(ValueError, match=\"distributed training\"):\n        reg.fit(X, y)\n    # check partition based\n    reg = dxgb.DaskXGBRegressor(\n        n_estimators=10,\n        tree_method=tree_method,\n        device=device,\n    )\n    reg.fit(X, y, eval_set=[(X, y)])\n    assert tm.non_increasing(reg.evals_result()[\"validation_0\"][\"rmse\"])\n\n    booster = reg.get_booster()\n    predt = dxgb.predict(client, booster, X).compute().values\n    inpredt = dxgb.inplace_predict(client, booster, X).compute().values\n\n    if hasattr(predt, \"get\"):\n        predt = predt.get()\n    if hasattr(inpredt, \"get\"):\n        inpredt = inpredt.get()\n\n    np.testing.assert_allclose(predt, inpredt)\n\n\ndef test_categorical(client: \"Client\", tmp_path: Path) -> None:\n    X, y = make_categorical(client, 3000, 30, 13)\n    X_onehot, _ = make_categorical(client, 3000, 30, 13, onehot=True)\n    run_categorical(client, \"approx\", \"cpu\", X, X_onehot, y, tmp_path)\n    run_categorical(client, \"hist\", \"cpu\", X, X_onehot, y, tmp_path)\n\n    ft = [\"c\"] * X.shape[1]\n    reg = dxgb.DaskXGBRegressor(tree_method=\"hist\", feature_types=ft)\n    reg.fit(X, y)\n    assert reg.get_booster().feature_types == ft\n\n\ndef test_recode(client: \"Client\") -> None:\n    run_recode(client, \"cpu\")\n\n\ndef test_dask_predict_shape_infer(client: \"Client\") -> None:\n    X, y = make_classification(n_samples=kRows, n_informative=5, n_classes=3)\n    X_ = dd.from_array(X, chunksize=100)\n    y_ = dd.from_array(y, chunksize=100)\n    dtrain = dxgb.DaskDMatrix(client, data=X_, label=y_)\n\n    model = dxgb.train(\n        client, {\"objective\": \"multi:softprob\", \"num_class\": 3}, dtrain=dtrain\n    )\n\n    preds = dxgb.predict(client, model, dtrain)\n    assert preds.shape[0] == preds.compute().shape[0]\n    assert preds.shape[1] == preds.compute().shape[1]\n\n    prediction = dxgb.predict(client, model, X_, output_margin=True)\n    assert isinstance(prediction, dd.DataFrame)\n\n    prediction = prediction.compute()\n    assert prediction.ndim == 2\n    assert prediction.shape[0] == kRows\n    assert prediction.shape[1] == 3\n\n    prediction = dxgb.inplace_predict(client, model, X_, predict_type=\"margin\")\n    assert isinstance(prediction, dd.DataFrame)\n    prediction = prediction.compute()\n    assert prediction.ndim == 2\n    assert prediction.shape[0] == kRows\n    assert prediction.shape[1] == 3\n\n\ndef run_boost_from_prediction_multi_class(\n    X: dd.DataFrame,\n    y: dd.Series,\n    tree_method: str,\n    device: str,\n    client: \"Client\",\n) -> None:\n    model_0 = dxgb.DaskXGBClassifier(\n        learning_rate=0.3,\n        n_estimators=4,\n        tree_method=tree_method,\n        max_bin=768,\n        device=device,\n    )\n    model_0.fit(X=X, y=y, eval_set=[(X, y)])\n    margin = dxgb.inplace_predict(\n        client, model_0.get_booster(), X, predict_type=\"margin\"\n    )\n    margin.columns = [f\"m_{i}\" for i in range(margin.shape[1])]\n\n    model_1 = dxgb.DaskXGBClassifier(\n        learning_rate=0.3,\n        n_estimators=4,\n        tree_method=tree_method,\n        max_bin=768,\n        device=device,\n    )\n    model_1.fit(\n        X=X, y=y, base_margin=margin, eval_set=[(X, y)], base_margin_eval_set=[margin]\n    )\n    predictions_1 = dxgb.predict(\n        client,\n        model_1.get_booster(),\n        dxgb.DaskDMatrix(client, X, base_margin=margin),\n        output_margin=True,\n    )\n\n    model_2 = dxgb.DaskXGBClassifier(\n        learning_rate=0.3,\n        n_estimators=8,\n        tree_method=tree_method,\n        max_bin=768,\n        device=device,\n    )\n    model_2.fit(X=X, y=y, eval_set=[(X, y)])\n    predictions_2 = dxgb.inplace_predict(\n        client, model_2.get_booster(), X, predict_type=\"margin\"\n    )\n    a = predictions_1.compute()\n    b = predictions_2.compute()\n    # cupy/cudf\n    if hasattr(a, \"get\"):\n        a = a.get()\n    if hasattr(b, \"values\"):\n        b = b.values\n    if hasattr(b, \"get\"):\n        b = b.get()\n    np.testing.assert_allclose(a, b, atol=1e-5)\n\n\ndef run_boost_from_prediction(\n    X: dd.DataFrame,\n    y: dd.Series,\n    tree_method: str,\n    device: str,\n    client: \"Client\",\n) -> None:\n    X, y = client.persist([X, y])\n\n    model_0 = dxgb.DaskXGBClassifier(\n        learning_rate=0.3,\n        n_estimators=3,\n        tree_method=tree_method,\n        max_bin=512,\n        device=device,\n    )\n    model_0.fit(X=X, y=y, eval_set=[(X, y)])\n    margin: dd.Series = model_0.predict(X, output_margin=True)\n\n    model_1 = dxgb.DaskXGBClassifier(\n        learning_rate=0.3,\n        n_estimators=3,\n        tree_method=tree_method,\n        max_bin=512,\n        device=device,\n    )\n    model_1.fit(\n        X=X, y=y, base_margin=margin, eval_set=[(X, y)], base_margin_eval_set=[margin]\n    )\n    predictions_1: dd.Series = model_1.predict(X, base_margin=margin)\n\n    model_2 = dxgb.DaskXGBClassifier(\n        learning_rate=0.3,\n        n_estimators=6,\n        tree_method=tree_method,\n        max_bin=512,\n        device=device,\n    )\n    model_2.fit(X=X, y=y, eval_set=[(X, y)])\n    predictions_2: dd.Series = model_2.predict(X)\n\n    logloss_concat = (\n        model_0.evals_result()[\"validation_0\"][\"logloss\"]\n        + model_1.evals_result()[\"validation_0\"][\"logloss\"]\n    )\n    logloss_2 = model_2.evals_result()[\"validation_0\"][\"logloss\"]\n    np.testing.assert_allclose(logloss_concat, logloss_2, rtol=1e-4)\n\n    margined = dxgb.DaskXGBClassifier(n_estimators=4)\n    margined.fit(\n        X=X, y=y, base_margin=margin, eval_set=[(X, y)], base_margin_eval_set=[margin]\n    )\n\n    unmargined = dxgb.DaskXGBClassifier(n_estimators=4)\n    unmargined.fit(X=X, y=y, eval_set=[(X, y)], base_margin=margin)\n\n    margined_res = margined.evals_result()[\"validation_0\"][\"logloss\"]\n    unmargined_res = unmargined.evals_result()[\"validation_0\"][\"logloss\"]\n\n    assert len(margined_res) == len(unmargined_res)\n    for i in range(len(margined_res)):\n        # margined is correct one, so smaller error.\n        assert margined_res[i] < unmargined_res[i]\n\n\n@pytest.mark.parametrize(\"tree_method\", [\"hist\", \"approx\"])\ndef test_boost_from_prediction(tree_method: str, client_one_worker: \"Client\") -> None:\n    from sklearn.datasets import load_breast_cancer, load_digits\n\n    # This test has strict reproducibility requirements. However, Dask is freed to move\n    # partitions between workers and modify the partitions' size during the test. Given\n    # the lack of control over the partitioning logic, here we use a single worker as a\n    # workaround.\n    X_, y_ = load_breast_cancer(return_X_y=True)\n    X, y = dd.from_array(X_, chunksize=200), dd.from_array(y_, chunksize=200)\n    run_boost_from_prediction(X, y, tree_method, \"cpu\", client_one_worker)\n\n    X_, y_ = load_digits(return_X_y=True)\n    X_, _, y_, _ = train_test_split(\n        X_, y_, train_size=300, stratify=y_, random_state=1994\n    )\n    X, y = dd.from_array(X_, chunksize=100), dd.from_array(y_, chunksize=100)\n    run_boost_from_prediction_multi_class(X, y, tree_method, \"cpu\", client_one_worker)\n\n\ndef test_inplace_predict(client: \"Client\") -> None:\n    from sklearn.datasets import load_diabetes\n\n    X_, y_ = load_diabetes(return_X_y=True)\n    X, y = dd.from_array(X_, chunksize=32), dd.from_array(y_, chunksize=32)\n    reg = dxgb.DaskXGBRegressor(n_estimators=4)\n    reg.client = client\n    reg.fit(X, y)\n    booster = reg.get_booster()\n    base_margin = y\n\n    inplace = client.compute(\n        dxgb.inplace_predict(client, booster, X, base_margin=base_margin)\n    ).result()\n    Xy = dxgb.DaskDMatrix(client, X, base_margin=base_margin)\n    copied = client.compute(dxgb.predict(client, booster, Xy)).result()\n    np.testing.assert_allclose(inplace, copied)\n\n\ndef test_dask_missing_value_reg(client: \"Client\") -> None:\n    X_0 = np.ones((20 // 2, kCols))\n    X_1 = np.zeros((20 // 2, kCols))\n    X = np.concatenate([X_0, X_1], axis=0)\n    np.random.shuffle(X)\n    X = da.from_array(X)\n    X = X.rechunk(20, 1)\n    y = da.random.randint(0, 3, size=20)\n    y.rechunk(20)\n    regressor = dxgb.DaskXGBRegressor(verbosity=1, n_estimators=2, missing=0.0)\n    regressor.client = client\n    regressor.set_params(tree_method=\"hist\")\n    regressor.fit(X, y, eval_set=[(X, y)])\n    dd_predt = client.compute(regressor.predict(X)).result()\n\n    np_X = client.compute(X).result()\n    np_predt = regressor.get_booster().predict(xgb.DMatrix(np_X, missing=0.0))\n    np.testing.assert_allclose(np_predt, dd_predt)\n\n\ndef test_dask_missing_value_cls(client: \"Client\") -> None:\n    X_0 = np.ones((kRows // 2, kCols))\n    X_1 = np.zeros((kRows // 2, kCols))\n    X = np.concatenate([X_0, X_1], axis=0)\n    np.random.shuffle(X)\n    X = da.from_array(X)\n    X = X.rechunk(20, None)\n    y = da.random.randint(0, 3, size=kRows)\n    y = y.rechunk(20, 1)\n    cls = dxgb.DaskXGBClassifier(\n        verbosity=1, n_estimators=2, tree_method=\"hist\", missing=0.0\n    )\n    cls.client = client\n    cls.fit(X, y, eval_set=[(X, y)])\n    dd_pred_proba = cls.predict_proba(X).compute()\n\n    np_X = X.compute()\n    np_pred_proba = cls.get_booster().predict(xgb.DMatrix(np_X, missing=0.0))\n    np.testing.assert_allclose(np_pred_proba, dd_pred_proba)\n\n    cls = dxgb.DaskXGBClassifier()\n    assert hasattr(cls, \"missing\")\n\n\n@pytest.mark.parametrize(\"model\", [\"boosting\", \"rf\"])\ndef test_dask_regressor(model: str, client: \"Client\") -> None:\n    X, y, w = generate_array(with_weights=True)\n    if model == \"boosting\":\n        regressor = dxgb.DaskXGBRegressor(verbosity=1, n_estimators=2)\n    else:\n        regressor = dxgb.DaskXGBRFRegressor(verbosity=1, n_estimators=2)\n\n    if hasattr(regressor, \"_estimator_type\"):\n        assert regressor._estimator_type == \"regressor\"\n    else:\n        assert regressor._get_type() == \"regressor\"\n    assert sklearn.base.is_regressor(regressor)\n\n    regressor.set_params(tree_method=\"hist\")\n    regressor.client = client\n    regressor.fit(X, y, sample_weight=w, eval_set=[(X, y)])\n    prediction = regressor.predict(X)\n\n    assert prediction.ndim == 1\n    assert prediction.shape[0] == kRows\n\n    history = regressor.evals_result()\n\n    assert isinstance(prediction, da.Array)\n    assert isinstance(history, dict)\n\n    assert list(history[\"validation_0\"].keys())[0] == \"rmse\"\n    forest = int(\n        json.loads(regressor.get_booster().save_config())[\"learner\"][\n            \"gradient_booster\"\n        ][\"gbtree_model_param\"][\"num_parallel_tree\"]\n    )\n\n    if model == \"boosting\":\n        assert len(history[\"validation_0\"][\"rmse\"]) == 2\n        assert forest == 1\n    else:\n        assert len(history[\"validation_0\"][\"rmse\"]) == 1\n        assert forest == 2\n\n\ndef run_dask_classifier(\n    X: dxgb._DataT,\n    y: dxgb._DaskCollection,\n    w: dxgb._DaskCollection,\n    model: str,\n    tree_method: Optional[str],\n    device: Literal[\"cpu\", \"cuda\"],\n    client: \"Client\",\n    n_classes: int,\n) -> None:\n    metric = \"merror\" if n_classes > 2 else \"logloss\"\n\n    if model == \"boosting\":\n        classifier = dxgb.DaskXGBClassifier(\n            verbosity=1,\n            n_estimators=2,\n            eval_metric=metric,\n            tree_method=tree_method,\n            device=device,\n        )\n    else:\n        classifier = dxgb.DaskXGBRFClassifier(\n            verbosity=1,\n            n_estimators=2,\n            eval_metric=metric,\n            tree_method=tree_method,\n            device=device,\n        )\n\n    if hasattr(classifier, \"_estimator_type\"):\n        assert classifier._estimator_type == \"classifier\"\n    else:\n        assert classifier._get_type() == \"classifier\"\n    assert sklearn.base.is_classifier(classifier)\n\n    classifier.client = client\n    classifier.fit(X, y, sample_weight=w, eval_set=[(X, y)])\n    prediction = classifier.predict(X).compute()\n\n    assert prediction.ndim == 1\n    assert prediction.shape[0] == kRows\n\n    history = classifier.evals_result()\n\n    assert isinstance(history, dict)\n\n    assert list(history.keys())[0] == \"validation_0\"\n    assert list(history[\"validation_0\"].keys())[0] == metric\n    assert len(list(history[\"validation_0\"])) == 1\n\n    config = json.loads(classifier.get_booster().save_config())\n    n_threads = int(config[\"learner\"][\"generic_param\"][\"nthread\"])\n    assert n_threads != 0 and n_threads != os.cpu_count()\n\n    forest = int(\n        config[\"learner\"][\"gradient_booster\"][\"gbtree_model_param\"][\"num_parallel_tree\"]\n    )\n    if model == \"boosting\":\n        assert len(history[\"validation_0\"][metric]) == 2\n        assert forest == 1\n    else:\n        assert len(history[\"validation_0\"][metric]) == 1\n        assert forest == 2\n\n    # Test .predict_proba()\n    probas = classifier.predict_proba(X).compute()\n    assert classifier.n_classes_ == n_classes\n    assert probas.ndim == 2\n    assert probas.shape[0] == kRows\n    assert probas.shape[1] == n_classes\n\n    if n_classes > 2:\n        cls_booster = classifier.get_booster()\n        single_node_proba = cls_booster.inplace_predict(X.compute())\n\n        # test shared by CPU and GPU\n        if isinstance(single_node_proba, np.ndarray):\n            np.testing.assert_allclose(single_node_proba, probas)\n        else:\n            import cupy\n\n            cupy.testing.assert_allclose(single_node_proba, probas)\n\n    # Test with dataframe, not shared with GPU as cupy doesn't work well with da.unique.\n    if isinstance(X, da.Array) and n_classes > 2:\n        X_d: dd.DataFrame = X.to_dask_dataframe()\n\n        assert classifier.n_classes_ == n_classes\n        prediction_df = classifier.predict(X_d).compute()\n\n        assert prediction_df.ndim == 1\n        assert prediction_df.shape[0] == kRows\n        np.testing.assert_allclose(prediction_df, prediction)\n\n        probas = classifier.predict_proba(X).compute()\n        np.testing.assert_allclose(single_node_proba, probas)\n\n\n@pytest.mark.parametrize(\"model\", [\"boosting\", \"rf\"])\ndef test_dask_classifier(model: str, client: \"Client\") -> None:\n    X, y, w = generate_array(with_weights=True)\n    y = (y * 10).astype(np.int32)\n    assert w is not None\n    run_dask_classifier(X, y, w, model, None, \"cpu\", client, 10)\n\n    y_bin = y.copy()\n    y_bin[y > 5] = 1.0\n    y_bin[y <= 5] = 0.0\n    run_dask_classifier(X, y_bin, w, model, None, \"cpu\", client, 2)\n\n\ndef test_empty_dmatrix_training_continuation(client: \"Client\") -> None:\n    kRows, kCols = 1, 97\n    X = dd.from_array(np.random.randn(kRows, kCols))\n    y = dd.from_array(np.random.rand(kRows))\n    X.columns = [\"X\" + str(i) for i in range(0, kCols)]\n    dtrain = dxgb.DaskDMatrix(client, X, y)\n\n    kRows += 1000\n    X = dd.from_array(np.random.randn(kRows, kCols), chunksize=10)\n    X.columns = [\"X\" + str(i) for i in range(0, kCols)]\n    y = dd.from_array(np.random.rand(kRows), chunksize=10)\n    valid = dxgb.DaskDMatrix(client, X, y)\n\n    out = dxgb.train(\n        client,\n        {\"tree_method\": \"hist\"},\n        dtrain=dtrain,\n        num_boost_round=2,\n        evals=[(valid, \"validation\")],\n    )\n\n    out = dxgb.train(\n        client,\n        {\"tree_method\": \"hist\"},\n        dtrain=dtrain,\n        xgb_model=out[\"booster\"],\n        num_boost_round=2,\n        evals=[(valid, \"validation\")],\n    )\n    assert dxgb.predict(client, out, dtrain).compute().shape[0] == 1\n\n\ndef run_empty_dmatrix_reg(client: \"Client\", parameters: dict) -> None:\n    def _check_outputs(out: dxgb.TrainReturnT, predictions: np.ndarray) -> None:\n        assert isinstance(out[\"booster\"], dxgb.Booster)\n        for _, v in out[\"history\"][\"validation\"].items():\n            assert len(v) == 2\n        assert isinstance(predictions, np.ndarray)\n        assert predictions.shape[0] == 1\n\n    kRows, kCols = 1, 97\n    X = dd.from_array(np.random.randn(kRows, kCols))\n    y = dd.from_array(np.random.rand(kRows))\n    dtrain = dxgb.DaskDMatrix(client, X, y)\n\n    out = dxgb.train(\n        client,\n        parameters,\n        dtrain=dtrain,\n        evals=[(dtrain, \"validation\")],\n        num_boost_round=2,\n    )\n    predictions = dxgb.predict(client=client, model=out, data=dtrain).compute()\n    _check_outputs(out, predictions)\n\n    # valid has more rows than train\n    kRows += 1\n    X = dd.from_array(np.random.randn(kRows, kCols))\n    y = dd.from_array(np.random.rand(kRows))\n    valid = dxgb.DaskDMatrix(client, X, y)\n    out = dxgb.train(\n        client,\n        parameters,\n        dtrain=dtrain,\n        evals=[(valid, \"validation\")],\n        num_boost_round=2,\n    )\n    predictions = dxgb.predict(client=client, model=out, data=dtrain).compute()\n    _check_outputs(out, predictions)\n\n    # train has more rows than evals\n    valid = dtrain\n    kRows += 1\n    X = dd.from_array(np.random.randn(kRows, kCols))\n    y = dd.from_array(np.random.rand(kRows))\n    dtrain = dxgb.DaskDMatrix(client, X, y)\n\n    out = dxgb.train(\n        client,\n        parameters,\n        dtrain=dtrain,\n        evals=[(valid, \"validation\")],\n        num_boost_round=2,\n    )\n    predictions = dxgb.predict(client=client, model=out, data=valid).compute()\n    _check_outputs(out, predictions)\n\n\ndef run_empty_dmatrix_cls(client: \"Client\", parameters: dict) -> None:\n    n_classes = 4\n\n    def _check_outputs(out: dxgb.TrainReturnT, predictions: np.ndarray) -> None:\n        assert isinstance(out[\"booster\"], dxgb.Booster)\n        assert len(out[\"history\"][\"validation\"][\"merror\"]) == 2\n        assert isinstance(predictions, np.ndarray)\n        assert predictions.shape[1] == n_classes, predictions.shape\n\n    kRows, kCols = 1, 97\n    X = dd.from_array(np.random.randn(kRows, kCols))\n    y = dd.from_array(np.random.randint(low=0, high=n_classes, size=kRows))\n    dtrain = dxgb.DaskDMatrix(client, X, y)\n    parameters[\"objective\"] = \"multi:softprob\"\n    parameters[\"eval_metric\"] = \"merror\"\n    parameters[\"num_class\"] = n_classes\n\n    out = dxgb.train(\n        client,\n        parameters,\n        dtrain=dtrain,\n        evals=[(dtrain, \"validation\")],\n        num_boost_round=2,\n    )\n    predictions = dxgb.predict(client=client, model=out, data=dtrain)\n    assert predictions.shape[1] == n_classes\n    predictions = predictions.compute()\n    _check_outputs(out, predictions)\n\n    # train has more rows than evals\n    valid = dtrain\n    kRows += 1\n    X = dd.from_array(np.random.randn(kRows, kCols))\n    y = dd.from_array(np.random.randint(low=0, high=n_classes, size=kRows))\n    dtrain = dxgb.DaskDMatrix(client, X, y)\n\n    out = dxgb.train(\n        client,\n        parameters,\n        dtrain=dtrain,\n        evals=[(valid, \"validation\")],\n        num_boost_round=2,\n    )\n    predictions = dxgb.predict(client=client, model=out, data=valid).compute()\n    _check_outputs(out, predictions)\n\n\ndef run_empty_dmatrix_auc(client: \"Client\", device: str, n_workers: int) -> None:\n    from sklearn import datasets\n\n    n_samples = 100\n    n_features = 7\n    rng = np.random.RandomState(1994)\n\n    make_classification = partial(\n        datasets.make_classification, n_features=n_features, random_state=rng\n    )\n\n    # binary\n    X_, y_ = make_classification(n_samples=n_samples, random_state=rng)\n    X = dd.from_array(X_, chunksize=10)\n    y = dd.from_array(y_, chunksize=10)\n\n    n_samples = n_workers - 1\n    valid_X_, valid_y_ = make_classification(n_samples=n_samples, random_state=rng)\n    valid_X = dd.from_array(valid_X_, chunksize=n_samples)\n    valid_y = dd.from_array(valid_y_, chunksize=n_samples)\n\n    cls = dxgb.DaskXGBClassifier(\n        device=device, n_estimators=2, eval_metric=[\"auc\", \"aucpr\"]\n    )\n    cls.fit(X, y, eval_set=[(valid_X, valid_y)])\n\n    # multiclass\n    X_, y_ = make_classification(\n        n_samples=n_samples,\n        n_classes=n_workers,\n        n_informative=n_features,\n        n_redundant=0,\n        n_repeated=0,\n    )\n    for i in range(y_.shape[0]):\n        y_[i] = i % n_workers\n    X = dd.from_array(X_, chunksize=10)\n    y = dd.from_array(y_, chunksize=10)\n\n    n_samples = n_workers - 1\n    valid_X_, valid_y_ = make_classification(\n        n_samples=n_samples,\n        n_classes=n_workers,\n        n_informative=n_features,\n        n_redundant=0,\n        n_repeated=0,\n    )\n    for i in range(valid_y_.shape[0]):\n        valid_y_[i] = i % n_workers\n    valid_X = dd.from_array(valid_X_, chunksize=n_samples)\n    valid_y = dd.from_array(valid_y_, chunksize=n_samples)\n\n    # Specify base score in case if there are only two workers and one sample.\n    cls = dxgb.DaskXGBClassifier(\n        device=device, n_estimators=2, eval_metric=[\"auc\", \"aucpr\"], base_score=0.5\n    )\n    cls.fit(X, y, eval_set=[(valid_X, valid_y)])\n\n\n@pytest.mark.parametrize(\n    \"client_kwargs\",\n    [pytest.param({\"n_workers\": 4, \"dashboard_address\": \":0\"}, id=\"4-workers\")],\n    indirect=True,\n)\ndef test_empty_dmatrix_auc(client: \"Client\") -> None:\n    run_empty_dmatrix_auc(client, \"cpu\", 4)\n\n\ndef run_auc(client: \"Client\", device: str) -> None:\n    from sklearn import datasets\n\n    n_samples = 100\n    n_features = 97\n    rng = np.random.RandomState(1994)\n    X_, y_ = datasets.make_classification(\n        n_samples=n_samples, n_features=n_features, random_state=rng\n    )\n    X = dd.from_array(X_, chunksize=10)\n    y = dd.from_array(y_, chunksize=10)\n\n    valid_X_, valid_y_ = datasets.make_classification(\n        n_samples=n_samples, n_features=n_features, random_state=rng\n    )\n    valid_X = dd.from_array(valid_X_, chunksize=10)\n    valid_y = dd.from_array(valid_y_, chunksize=10)\n\n    cls = xgb.XGBClassifier(device=device, n_estimators=2, eval_metric=\"auc\")\n    cls.fit(X_, y_, eval_set=[(valid_X_, valid_y_)])\n\n    dcls = dxgb.DaskXGBClassifier(device=device, n_estimators=2, eval_metric=\"auc\")\n    dcls.fit(X, y, eval_set=[(valid_X, valid_y)])\n\n    approx = dcls.evals_result()[\"validation_0\"][\"auc\"]\n    exact = cls.evals_result()[\"validation_0\"][\"auc\"]\n    for i in range(2):\n        # approximated test.\n        assert np.abs(approx[i] - exact[i]) <= 0.06\n\n\ndef test_auc(client: \"Client\") -> None:\n    run_auc(client, \"cpu\")\n\n\n# No test for Exact, as empty DMatrix handling are mostly for distributed\n# environment and Exact doesn't support it.\n@pytest.mark.parametrize(\"tree_method\", [\"hist\", \"approx\"])\ndef test_empty_dmatrix(tree_method: str, client: \"Client\") -> None:\n    parameters = {\"tree_method\": tree_method}\n    run_empty_dmatrix_reg(client, parameters)\n    run_empty_dmatrix_cls(client, parameters)\n    parameters = {\"tree_method\": tree_method, \"objective\": \"reg:absoluteerror\"}\n    run_empty_dmatrix_reg(client, parameters)\n\n\nasync def run_from_dask_array_asyncio(scheduler_address: str) -> dxgb.TrainReturnT:\n    async with Client(scheduler_address, asynchronous=True) as client:\n        X, y, _ = generate_array()\n        m = await DaskDMatrix(client, X, y)\n        output = await dxgb.train(client, {}, dtrain=m)\n\n        with_m = await dxgb.predict(client, output, m)\n        with_X = await dxgb.predict(client, output, X)\n        inplace = await dxgb.inplace_predict(client, output, X)\n        assert isinstance(with_m, da.Array)\n        assert isinstance(with_X, da.Array)\n        assert isinstance(inplace, da.Array)\n\n        np.testing.assert_allclose(\n            await client.compute(with_m), await client.compute(with_X)\n        )\n        np.testing.assert_allclose(\n            await client.compute(with_m), await client.compute(inplace)\n        )\n    return output\n\n\nasync def run_dask_regressor_asyncio(scheduler_address: str) -> None:\n    async with Client(scheduler_address, asynchronous=True) as client:\n        X, y, _ = generate_array()\n        X = X[:200]\n        y = y[:200]\n        expected_rows = X.shape[0]\n        regressor = await dxgb.DaskXGBRegressor(verbosity=1, n_estimators=2)\n        regressor.set_params(tree_method=\"hist\")\n        regressor.client = client\n        await regressor.fit(X, y, eval_set=[(X, y)])\n        prediction = await regressor.predict(X)\n\n        assert prediction.ndim == 1\n        assert prediction.shape[0] == expected_rows\n\n        history = regressor.evals_result()\n\n        assert isinstance(prediction, da.Array)\n        assert isinstance(history, dict)\n\n        assert list(history[\"validation_0\"].keys())[0] == \"rmse\"\n        assert len(history[\"validation_0\"][\"rmse\"]) == 2\n\n        awaited = await client.compute(prediction)\n        assert awaited.shape[0] == expected_rows\n\n\nasync def run_dask_classifier_asyncio(scheduler_address: str) -> None:\n    async with Client(scheduler_address, asynchronous=True) as client:\n        X, y, _ = generate_array()\n        expected_rows = X.shape[0]\n        y = (y * 10).astype(np.int32)\n        classifier = await dxgb.DaskXGBClassifier(\n            verbosity=1, n_estimators=2, eval_metric=\"merror\"\n        )\n        classifier.client = client\n        await classifier.fit(X, y, eval_set=[(X, y)])\n        prediction = await classifier.predict(X)\n\n        assert prediction.ndim == 1\n        assert prediction.shape[0] == expected_rows\n\n        history = classifier.evals_result()\n\n        assert isinstance(prediction, da.Array)\n        assert isinstance(history, dict)\n\n        assert list(history.keys())[0] == \"validation_0\"\n        assert list(history[\"validation_0\"].keys())[0] == \"merror\"\n        assert len(list(history[\"validation_0\"])) == 1\n        assert len(history[\"validation_0\"][\"merror\"]) == 2\n\n        # Test .predict_proba()\n        probas = await classifier.predict_proba(X)\n        assert classifier.n_classes_ == 10\n        assert probas.ndim == 2\n        assert probas.shape[0] == expected_rows\n        assert probas.shape[1] == 10\n\n        # Test with dataframe.\n        X_d = dd.from_dask_array(X)\n        y_d = dd.from_dask_array(y)\n        await classifier.fit(X_d, y_d)\n\n        assert classifier.n_classes_ == 10\n        prediction = await client.compute(await classifier.predict(X_d))\n\n        assert prediction.ndim == 1\n        assert prediction.shape[0] == expected_rows\n\n\ndef test_with_asyncio(client: \"Client\") -> None:\n    address = client.scheduler.address\n    output = asyncio.run(run_from_dask_array_asyncio(address))\n    assert isinstance(output[\"booster\"], xgb.Booster)\n    assert isinstance(output[\"history\"], dict)\n\n    asyncio.run(run_dask_regressor_asyncio(address))\n    asyncio.run(run_dask_classifier_asyncio(address))\n\n\nasync def generate_concurrent_trainings() -> None:\n    async def train() -> None:\n        async with LocalCluster(\n            n_workers=2, threads_per_worker=1, asynchronous=True, dashboard_address=\":0\"\n        ) as cluster:\n            async with Client(cluster, asynchronous=True) as client:\n                X, y, w = generate_array(with_weights=True)\n                dtrain = await DaskDMatrix(client, X, y, weight=w)\n                dvalid = await DaskDMatrix(client, X, y, weight=w)\n                output = await dxgb.train(client, {}, dtrain=dtrain)\n                await dxgb.predict(client, output, data=dvalid)\n\n    await asyncio.gather(train(), train())\n\n\ndef test_concurrent_trainings() -> None:\n    asyncio.run(generate_concurrent_trainings())\n\n\ndef test_predict(client: \"Client\") -> None:\n    X, y, _ = generate_array()\n    dtrain = DaskDMatrix(client, X, y)\n    booster = dxgb.train(client, {}, dtrain, num_boost_round=2)[\"booster\"]\n\n    predt_0 = dxgb.predict(client, model=booster, data=dtrain)\n    assert predt_0.ndim == 1\n    assert predt_0.shape[0] == kRows\n\n    margin = dxgb.predict(client, model=booster, data=dtrain, output_margin=True)\n    assert margin.ndim == 1\n    assert margin.shape[0] == kRows\n\n    shap = dxgb.predict(client, model=booster, data=dtrain, pred_contribs=True)\n    assert shap.ndim == 2\n    assert shap.shape[0] == kRows\n    assert shap.shape[1] == kCols + 1\n\n    booster_f = client.scatter(booster, broadcast=True)\n\n    predt_1 = dxgb.predict(client, booster_f, X).compute()\n    predt_2 = dxgb.inplace_predict(client, booster_f, X).compute()\n    np.testing.assert_allclose(predt_0, predt_1)\n    np.testing.assert_allclose(predt_0, predt_2)\n\n\ndef test_predict_with_meta(client: \"Client\") -> None:\n    X, y, w = generate_array(with_weights=True)\n    assert w is not None\n    partition_size = 20\n    margin = da.random.random(kRows, partition_size) + 1e4\n\n    dtrain = DaskDMatrix(client, X, y, weight=w, base_margin=margin)\n    booster: xgb.Booster = dxgb.train(client, {}, dtrain, num_boost_round=4)[\"booster\"]\n\n    prediction = dxgb.predict(client, model=booster, data=dtrain)\n    assert prediction.ndim == 1\n    assert prediction.shape[0] == kRows\n\n    prediction = client.compute(prediction).result()\n    assert np.all(prediction > 1e3)\n\n    m = xgb.DMatrix(X.compute())\n    m.set_info(label=y.compute(), weight=w.compute(), base_margin=margin.compute())\n    single = booster.predict(m)  # Make sure the ordering is correct.\n    assert np.all(prediction == single)\n\n\ndef run_aft_survival(client: \"Client\", dmatrix_t: Type) -> None:\n    df = dd.read_csv(os.path.join(tm.data_dir(__file__), \"veterans_lung_cancer.csv\"))\n    y_lower_bound = df[\"Survival_label_lower_bound\"]\n    y_upper_bound = df[\"Survival_label_upper_bound\"]\n    X = df.drop([\"Survival_label_lower_bound\", \"Survival_label_upper_bound\"], axis=1)\n    m = dmatrix_t(\n        client, X, label_lower_bound=y_lower_bound, label_upper_bound=y_upper_bound\n    )\n    base_params = {\n        \"verbosity\": 0,\n        \"objective\": \"survival:aft\",\n        \"eval_metric\": \"aft-nloglik\",\n        \"learning_rate\": 0.05,\n        \"aft_loss_distribution_scale\": 1.20,\n        \"max_depth\": 6,\n        \"lambda\": 0.01,\n        \"alpha\": 0.02,\n    }\n\n    nloglik_rec = {}\n    dists = [\"normal\", \"logistic\", \"extreme\"]\n    for dist in dists:\n        params = base_params\n        params.update({\"aft_loss_distribution\": dist})\n        evals_result = {}\n        out = dxgb.train(client, params, m, num_boost_round=100, evals=[(m, \"train\")])\n        evals_result = out[\"history\"]\n        nloglik_rec[dist] = evals_result[\"train\"][\"aft-nloglik\"]\n        # AFT metric (negative log likelihood) improve monotonically\n        assert all(p >= q for p, q in zip(nloglik_rec[dist], nloglik_rec[dist][:1]))\n    # For this data, normal distribution works the best\n    assert nloglik_rec[\"normal\"][-1] < 4.9\n    assert nloglik_rec[\"logistic\"][-1] > 4.9\n    assert nloglik_rec[\"extreme\"][-1] > 4.9\n\n\ndef test_dask_aft_survival(client: \"Client\") -> None:\n    run_aft_survival(client, DaskDMatrix)\n\n\n@pytest.mark.parametrize(\"booster\", [\"dart\", \"gbtree\"])\ndef test_dask_predict_leaf(booster: str, client: \"Client\") -> None:\n    from sklearn.datasets import load_digits\n\n    X_, y_ = load_digits(return_X_y=True)\n    X_, _, y_, _ = train_test_split(\n        X_, y_, train_size=300, stratify=y_, random_state=1994\n    )\n    num_parallel_tree = 4\n    X, y = dd.from_array(X_, chunksize=32), dd.from_array(y_, chunksize=32)\n    rounds = 4\n    cls = dxgb.DaskXGBClassifier(\n        n_estimators=rounds, num_parallel_tree=num_parallel_tree, booster=booster\n    )\n    cls.client = client\n    cls.fit(X, y)\n    leaf = dxgb.predict(\n        client,\n        cls.get_booster(),\n        X.to_dask_array(),  # we can't map_blocks on dataframe when output is 4-dim.\n        pred_leaf=True,\n        strict_shape=True,\n        validate_features=False,\n    ).compute()\n\n    assert leaf.shape[0] == X_.shape[0]\n    assert leaf.shape[1] == rounds\n    assert leaf.shape[2] == cls.n_classes_\n    assert leaf.shape[3] == num_parallel_tree\n\n    leaf_from_apply = cls.apply(X).reshape(leaf.shape).compute()\n    np.testing.assert_allclose(leaf_from_apply, leaf)\n\n    validate_leaf_output(leaf, num_parallel_tree)\n\n\ndef test_dask_iteration_range(client: \"Client\") -> None:\n    X, y, _ = generate_array()\n    n_rounds = 5\n\n    Xy = xgb.DMatrix(X.compute(), y.compute())\n\n    dXy = dxgb.DaskDMatrix(client, X, y)\n    booster = dxgb.train(\n        client, {\"tree_method\": \"hist\"}, dXy, num_boost_round=n_rounds\n    )[\"booster\"]\n\n    for i in range(0, n_rounds):\n        iter_range = (0, i)\n        native_predt = booster.predict(Xy, iteration_range=iter_range)\n\n        with_dask_dmatrix = dxgb.predict(\n            client, booster, dXy, iteration_range=iter_range\n        )\n        with_dask_collection = dxgb.predict(\n            client, booster, X, iteration_range=iter_range\n        )\n        with_inplace = dxgb.inplace_predict(\n            client, booster, X, iteration_range=iter_range\n        )\n        np.testing.assert_allclose(native_predt, with_dask_dmatrix.compute())\n        np.testing.assert_allclose(native_predt, with_dask_collection.compute())\n        np.testing.assert_allclose(native_predt, with_inplace.compute())\n\n    full_predt = dxgb.predict(client, booster, X, iteration_range=(0, n_rounds))\n    default = dxgb.predict(client, booster, X)\n    np.testing.assert_allclose(full_predt.compute(), default.compute())\n\n\ndef test_killed_task_wo_hang(client: \"Client\") -> None:\n    # Test that aborting a worker doesn't lead to hang.\n    class Eve(xgb.callback.TrainingCallback):\n        def after_iteration(\n            self, model: xgb.Booster, epoch: int, evals_log: Dict\n        ) -> bool:\n            if coll.get_rank() == 1:\n                os.abort()\n            return False\n\n    X, y, _ = generate_array()\n    n_rounds = 10\n    dXy = dxgb.DaskDMatrix(client, X, y)\n    # The precise error message depends on Dask scheduler.\n    try:\n        dxgb.train(\n            client,\n            {\"tree_method\": \"hist\"},\n            dXy,\n            num_boost_round=n_rounds,\n            callbacks=[Eve()],\n        )\n    except (ValueError, KilledWorker):\n        # These exceptions indicate that the killed worker caused training to fail\n        # promptly, which is the expected behavior for this test. We only verify\n        # that training does not hang, so the exceptions are intentionally ignored.\n        pass\n\n\ndef test_invalid_config(client: \"Client\") -> None:\n    X, y, _ = generate_array()\n    dtrain = DaskDMatrix(client, X, y)\n\n    with dask.config.set({\"xgboost.foo\": \"bar\"}):\n        with pytest.raises(ValueError, match=r\"Unknown configuration.*\"):\n            dxgb.train(client, {}, dtrain, num_boost_round=4)\n\n    with dask.config.set({\"xgboost.scheduler_address\": \"127.0.0.1:foo\"}):\n        with pytest.raises(socket.gaierror, match=r\".*not known.*\"):\n            dxgb.train(client, {}, dtrain, num_boost_round=1)\n\n    # No failure only because we are also using the Dask scheduler address.\n    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:\n        s.bind((\"127.0.0.1\", 0))\n        port = s.getsockname()[1]\n        cfg = CollConfig(tracker_host_ip=\"127.0.0.1\", tracker_port=port)\n        dxgb.train(client, {}, dtrain, num_boost_round=1, coll_cfg=cfg)\n\n    with pytest.raises(ValueError, match=r\"comm_group.*timeout >= 0.*\"):\n        cfg = CollConfig(tracker_host_ip=\"127.0.0.1\", tracker_port=0, timeout=-1)\n        dxgb.train(client, {}, dtrain, num_boost_round=1, coll_cfg=cfg)\n\n\ndef test_worker_port(client_one_worker: \"Client\") -> None:\n    from xgboost.testing.collective import get_avail_port\n\n    X, y, _ = generate_array()\n    dtrain = DaskDMatrix(client_one_worker, X, y)\n\n    cfg = CollConfig(worker_port=get_avail_port)\n    dxgb.train(client_one_worker, {}, dtrain, num_boost_round=4, coll_cfg=cfg)\n\n\nclass TestWithDask:\n    def test_dmatrix_binary(self, client: \"Client\", tmp_path: Path) -> None:\n        def save_dmatrix(rabit_args: Dict[str, Union[int, str]], tmpdir: str) -> None:\n            with dxgb.CommunicatorContext(**rabit_args):\n                rank = xgb.collective.get_rank()\n                X, y = tm.make_categorical(100, 4, 4, onehot=False)\n                Xy = xgb.DMatrix(X, y)\n                path = os.path.join(tmpdir, f\"{rank}.bin\")\n                Xy.save_binary(path)\n\n        def load_dmatrix(rabit_args: Dict[str, Union[int, str]], tmpdir: str) -> None:\n            with dxgb.CommunicatorContext(**rabit_args):\n                rank = xgb.collective.get_rank()\n                path = os.path.join(tmpdir, f\"{rank}.bin\")\n                Xy = xgb.DMatrix(path)\n                assert Xy.num_row() == 100\n                assert Xy.num_col() == 4\n\n        workers = tm.dask.get_client_workers(client)\n        rabit_args = get_rabit_args(client, len(workers))\n        futures = []\n        for w in workers:\n            # same argument for each worker, must set pure to False otherwise dask\n            # will try to reuse the result from the first worker and hang waiting\n            # for it.\n            f = client.submit(\n                save_dmatrix, rabit_args, str(tmp_path), workers=[w], pure=False\n            )\n            futures.append(f)\n        client.gather(futures)\n\n        rabit_args = get_rabit_args(client, len(workers))\n        futures = []\n        for w in workers:\n            f = client.submit(\n                load_dmatrix, rabit_args, str(tmp_path), workers=[w], pure=False\n            )\n            futures.append(f)\n        client.gather(futures)\n\n    @pytest.mark.parametrize(\n        \"config_key,config_value\", [(\"verbosity\", 0), (\"use_rmm\", True)]\n    )\n    def test_global_config(\n        self, client: \"Client\", config_key: str, config_value: Any\n    ) -> None:\n        X, y, _ = generate_array()\n        xgb.config.set_config(**{config_key: config_value})\n        dtrain = DaskDMatrix(client, X, y)\n        before_fname = \"./before_training-test_global_config\"\n        after_fname = \"./after_training-test_global_config\"\n\n        class TestCallback(xgb.callback.TrainingCallback):\n            def write_file(self, fname: str) -> None:\n                with open(fname, \"w\") as fd:\n                    fd.write(str(xgb.config.get_config()[config_key]))\n\n            def before_training(self, model: xgb.Booster) -> xgb.Booster:\n                self.write_file(before_fname)\n                assert xgb.config.get_config()[config_key] == config_value\n                return model\n\n            def after_training(self, model: xgb.Booster) -> xgb.Booster:\n                assert xgb.config.get_config()[config_key] == config_value\n                return model\n\n            def before_iteration(\n                self, model: xgb.Booster, epoch: int, evals_log: Dict\n            ) -> bool:\n                assert xgb.config.get_config()[config_key] == config_value\n                return False\n\n            def after_iteration(\n                self, model: xgb.Booster, epoch: int, evals_log: Dict\n            ) -> bool:\n                self.write_file(after_fname)\n                assert xgb.config.get_config()[config_key] == config_value\n                return False\n\n        dxgb.train(client, {}, dtrain, num_boost_round=4, callbacks=[TestCallback()])[\n            \"booster\"\n        ]\n\n        with open(before_fname, \"r\") as before, open(after_fname, \"r\") as after:\n            assert before.read() == str(config_value)\n            assert after.read() == str(config_value)\n\n        os.remove(before_fname)\n        os.remove(after_fname)\n\n    def run_updater_test(\n        self,\n        client: \"Client\",\n        params: Dict,\n        num_rounds: int,\n        dataset: tm.TestDataset,\n        tree_method: str,\n    ) -> None:\n        params[\"tree_method\"] = tree_method\n        params[\"debug_synchronize\"] = True\n        params = dataset.set_params(params)\n\n        # It doesn't make sense to distribute a completely empty dataset.\n        assume(dataset.X.shape[0] != 0)\n\n        chunk = 128\n        y_chunk = chunk if len(dataset.y.shape) == 1 else (chunk, dataset.y.shape[1])\n        X = da.from_array(dataset.X, chunks=(chunk, dataset.X.shape[1]))\n        y = da.from_array(dataset.y, chunks=y_chunk)\n        if dataset.w is not None:\n            w = da.from_array(dataset.w, chunks=(chunk,))\n        else:\n            w = None\n\n        m = dxgb.DaskDMatrix(client, data=X, label=y, weight=w)\n        history = dxgb.train(\n            client,\n            params=params,\n            dtrain=m,\n            num_boost_round=num_rounds,\n            evals=[(m, \"train\")],\n        )[\"history\"]\n        note(str(history))\n        history = history[\"train\"][dataset.metric]\n\n        def is_stump() -> bool:\n            return (\n                params.get(\"max_depth\", None) == 1\n                or params.get(\"max_leaves\", None) == 1\n            )\n\n        def minimum_bin() -> bool:\n            return \"max_bin\" in params and params[\"max_bin\"] == 2\n\n        # See note on `ObjFunction::UpdateTreeLeaf`.\n        update_leaf = dataset.name.endswith(\"-l1\")\n        if update_leaf and (is_stump() or minimum_bin()):\n            assert tm.non_increasing(history, tolerance=1e-2)\n            return\n        elif minimum_bin() and is_stump():\n            assert tm.non_increasing(history, tolerance=1e-3)\n        else:\n            assert tm.non_increasing(history)\n        # Make sure that it's decreasing\n        if is_stump():\n            # we might have already got the best score with base_score.\n            assert history[-1] <= history[0] + 1e-3\n        else:\n            assert history[-1] < history[0]\n\n    @given(\n        params=hist_parameter_strategy,\n        cache_param=hist_cache_strategy,\n        dataset=tm.make_dataset_strategy(),\n    )\n    @settings(\n        deadline=None, max_examples=3, suppress_health_check=suppress, print_blob=True\n    )\n    def test_hist(\n        self,\n        params: Dict[str, Any],\n        cache_param: Dict[str, Any],\n        dataset: tm.TestDataset,\n        client: \"Client\",\n    ) -> None:\n        num_rounds = 10\n        params.update(cache_param)\n        self.run_updater_test(client, params, num_rounds, dataset, \"hist\")\n\n    def test_quantile_dmatrix(self, client: Client) -> None:\n        X, y = make_categorical(client, 3000, 30, 13)\n\n        Xy = dxgb.DaskDMatrix(client, X, y)\n        valid_Xy = dxgb.DaskDMatrix(client, X, y)\n\n        output = dxgb.train(\n            client,\n            {\"tree_method\": \"hist\"},\n            Xy,\n            num_boost_round=10,\n            evals=[(Xy, \"Train\"), (valid_Xy, \"Valid\")],\n        )\n        dmatrix_hist = output[\"history\"]\n\n        Xy = dxgb.DaskQuantileDMatrix(client, X, y)\n        valid_Xy = dxgb.DaskQuantileDMatrix(client, X, y, ref=Xy)\n\n        output = dxgb.train(\n            client,\n            {\"tree_method\": \"hist\"},\n            Xy,\n            num_boost_round=10,\n            evals=[(Xy, \"Train\"), (valid_Xy, \"Valid\")],\n        )\n        quantile_hist = output[\"history\"]\n\n        np.testing.assert_allclose(\n            quantile_hist[\"Train\"][\"rmse\"], dmatrix_hist[\"Train\"][\"rmse\"]\n        )\n        np.testing.assert_allclose(\n            quantile_hist[\"Valid\"][\"rmse\"], dmatrix_hist[\"Valid\"][\"rmse\"]\n        )\n\n    def test_empty_quantile_dmatrix(self, client: Client) -> None:\n        X, y = make_categorical(client, 1, 16, 4, onehot=True)\n        X_valid, y_valid = make_categorical(client, 2000, 16, 4, onehot=True)\n\n        Xy = dxgb.DaskQuantileDMatrix(client, X, y)\n        Xy_valid = dxgb.DaskQuantileDMatrix(client, X_valid, y_valid, ref=Xy)\n        result = dxgb.train(\n            client,\n            {\"tree_method\": \"hist\"},\n            Xy,\n            num_boost_round=10,\n            evals=[(Xy_valid, \"Valid\")],\n        )\n        predt = dxgb.inplace_predict(client, result[\"booster\"], X).compute()\n        np.testing.assert_allclose(y.compute(), predt)\n        rmse = result[\"history\"][\"Valid\"][\"rmse\"][-1]\n        assert rmse < 6.5\n\n    @given(\n        params=hist_parameter_strategy,\n        cache_param=hist_cache_strategy,\n        dataset=tm.make_dataset_strategy(),\n    )\n    @settings(\n        deadline=None, max_examples=10, suppress_health_check=suppress, print_blob=True\n    )\n    def test_approx(\n        self,\n        client: \"Client\",\n        params: Dict,\n        cache_param: Dict[str, Any],\n        dataset: tm.TestDataset,\n    ) -> None:\n        num_rounds = 10\n        params.update(cache_param)\n        self.run_updater_test(client, params, num_rounds, dataset, \"approx\")\n\n    def test_adaptive(self, client: \"Client\") -> None:\n        def local_test(rabit_args: Dict[str, Union[int, str]], worker_id: int) -> bool:\n            with dxgb.CommunicatorContext(**rabit_args):\n                if worker_id == 0:\n                    y = np.array([0.0, 0.0, 0.0])\n                    x = np.array([[0.0]] * 3)\n                else:\n                    y = np.array([1000.0])\n                    x = np.array(\n                        [\n                            [0.0],\n                        ]\n                    )\n\n                Xy = xgb.DMatrix(x, y)\n                booster = xgb.train(\n                    {\"tree_method\": \"hist\", \"objective\": \"reg:absoluteerror\"},\n                    Xy,\n                    num_boost_round=1,\n                )\n                config = json.loads(booster.save_config())\n                base_score = get_basescore(config)\n                assert base_score == [250.0]\n                return True\n\n        workers = tm.dask.get_client_workers(client)\n        rabit_args = get_rabit_args(client, len(workers))\n        futures = []\n        for i, _ in enumerate(workers):\n            f = client.submit(local_test, rabit_args, i)\n            futures.append(f)\n\n        results = client.gather(futures)\n        assert all(results)\n\n    def test_n_workers(self, client: \"Client\") -> None:\n        \"\"\"Check obtaining worker addresses using input data.\"\"\"\n\n        def from_delayed(fut: Any, x: np.ndarray) -> Any:\n            return da.from_delayed(fut, shape=x.shape, dtype=x.dtype)\n\n        def place_on_worker(data: np.ndarray, worker: str) -> Any:\n            return client.submit(\n                lambda x: x,\n                data,\n                workers=[worker],\n                allow_other_workers=False,\n                pure=False,\n            )\n\n        workers = tm.dask.get_client_workers(client)\n        from sklearn.datasets import load_breast_cancer\n\n        X, y = load_breast_cancer(return_X_y=True)\n\n        # Use client.submit to place data on specific workers with unique keys.\n        X_fut_0 = place_on_worker(X, workers[0])\n        y_fut_0 = place_on_worker(y, workers[0])\n        distributed.wait([X_fut_0, y_fut_0])\n        dX = from_delayed(X_fut_0, X)\n        dy = from_delayed(y_fut_0, y)\n        train = dxgb.DaskDMatrix(client, dX, dy)\n\n        X_fut_1 = place_on_worker(X, workers[1])\n        y_fut_1 = place_on_worker(y, workers[1])\n        distributed.wait([X_fut_1, y_fut_1])\n        dX_valid = from_delayed(X_fut_1, X)\n        dy_valid = from_delayed(y_fut_1, y)\n        valid = dxgb.DaskDMatrix(client, dX_valid, dy_valid)\n\n        merged = dxgb._get_workers_from_data(train, evals=[(valid, \"Valid\")])\n        assert len(merged) == 2\n\n    @pytest.mark.skipif(**tm.no_dask())\n    def test_feature_weights(self, client: \"Client\") -> None:\n        kRows = 1024\n        kCols = 64\n        rng = da.random.RandomState(1994)\n        X = rng.random_sample((kRows, kCols), chunks=(32, -1))\n        y = rng.random_sample(kRows, chunks=32)\n\n        fw = np.ones(shape=(kCols,))\n        for i in range(kCols):\n            fw[i] *= float(i)\n        fw = da.from_array(fw)\n        parser = os.path.join(tm.demo_dir(__file__), \"guide-python\", \"model_parser.py\")\n        poly_increasing = get_feature_weights(\n            X=X,\n            y=y,\n            fw=fw,\n            parser_path=parser,\n            tree_method=\"approx\",\n            model=dxgb.DaskXGBRegressor,\n        )\n\n        fw = np.ones(shape=(kCols,))\n        for i in range(kCols):\n            fw[i] *= float(kCols - i)\n        fw = da.from_array(fw)\n        poly_decreasing = get_feature_weights(\n            X=X,\n            y=y,\n            fw=fw,\n            parser_path=parser,\n            tree_method=\"approx\",\n            model=dxgb.DaskXGBRegressor,\n        )\n\n        # Approxmated test, this is dependent on the implementation of random\n        # number generator in std library.\n        assert poly_increasing[0] > 0.08\n        assert poly_decreasing[0] < -0.08\n\n    @pytest.mark.skipif(**tm.no_dask())\n    @pytest.mark.skipif(**tm.no_sklearn())\n    def test_custom_objective(self, client: \"Client\", tmp_path: Path) -> None:\n        X, y = get_california_housing()\n        X, y = da.from_array(X), da.from_array(y)\n        rounds = 20\n\n        path = tmp_path / \"log\"\n\n        def sqr(\n            labels: np.ndarray, predts: np.ndarray\n        ) -> Tuple[np.ndarray, np.ndarray]:\n            with open(path, \"a\") as fd:\n                print(\"Running sqr\", file=fd)\n            grad = predts - labels\n            hess = np.ones(shape=labels.shape[0])\n            return grad, hess\n\n        reg = dxgb.DaskXGBRegressor(\n            n_estimators=rounds, objective=sqr, tree_method=\"hist\"\n        )\n        reg.fit(X, y, eval_set=[(X, y)])\n\n        # Check the obj is ran for rounds.\n        with open(path, \"r\") as fd:\n            out = fd.readlines()\n            assert len(out) == rounds\n\n        results_custom = reg.evals_result()\n\n        reg = dxgb.DaskXGBRegressor(\n            n_estimators=rounds, tree_method=\"hist\", base_score=0.5\n        )\n        reg.fit(X, y, eval_set=[(X, y)])\n        results_native = reg.evals_result()\n\n        np.testing.assert_allclose(\n            results_custom[\"validation_0\"][\"rmse\"],\n            results_native[\"validation_0\"][\"rmse\"],\n        )\n        tm.non_increasing(results_native[\"validation_0\"][\"rmse\"])\n\n        reg = dxgb.DaskXGBRegressor(\n            n_estimators=rounds, objective=tm.ls_obj, tree_method=\"hist\"\n        )\n        rng = da.random.RandomState(1994)\n        w = rng.uniform(low=0.0, high=1.0, size=y.shape[0])\n        reg.fit(X, y, sample_weight=w, eval_set=[(X, y)], sample_weight_eval_set=[w])\n        results_custom = reg.evals_result()\n        tm.non_increasing(results_custom[\"validation_0\"][\"rmse\"])\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    def test_custom_metrics(self, client: \"Client\") -> None:\n        from sklearn.datasets import make_classification\n        from sklearn.metrics import hamming_loss, hinge_loss, log_loss\n\n        Xn, yn = make_classification(random_state=2025)\n        X, y = da.array(Xn), da.array(yn)\n\n        clf = dxgb.DaskXGBClassifier(\n            eval_metric=[\"logloss\", hinge_loss], n_estimators=2\n        )\n        clf.fit(X, y, eval_set=[(X, y)])\n        results = clf.evals_result()[\"validation_0\"]\n        assert \"logloss\" in results\n        assert \"hinge_loss\" in results\n\n        clf = dxgb.DaskXGBClassifier(\n            eval_metric=[hamming_loss, log_loss], n_estimators=2\n        )\n        with pytest.raises(\n            NotImplementedError, match=\"multiple custom metrics is not yet supported.\"\n        ):\n            clf.fit(X, y, eval_set=[(X, y)])\n\n    def test_no_duplicated_partition(self) -> None:\n        \"\"\"Assert each worker has the correct amount of data, and DMatrix initialization\n        doesn't generate unnecessary copies of data.\n\n        \"\"\"\n        n_threads = os.cpu_count()\n        assert n_threads is not None\n        # This test needs a fresh client\n        with LocalCluster(\n            n_workers=2,\n            threads_per_worker=max(1, n_threads // 2),\n            dashboard_address=\":0\",\n        ) as cluster:\n            with Client(cluster) as client:\n                X, y, _ = generate_array()\n                n_partitions = X.npartitions\n                m = dxgb.DaskDMatrix(client, X, y)\n                workers = tm.dask.get_client_workers(client)\n                rabit_args = get_rabit_args(client, len(workers))\n                n_workers = len(workers)\n\n                def worker_fn(worker_addr: str, data_ref: Dict) -> None:\n                    from xgboost.dask.data import _dmatrix_from_list_of_parts\n\n                    with dxgb.CommunicatorContext(**rabit_args):\n                        local_dtrain = _dmatrix_from_list_of_parts(\n                            **data_ref,\n                            nthread=7,\n                            model=None,\n                            Xy_cats=None,\n                        )\n                        total = np.array([local_dtrain.num_row()])\n                        total = xgb.collective.allreduce(total, xgb.collective.Op.SUM)\n                        assert total[0] == kRows\n\n                futures = []\n                for i in range(len(workers)):\n                    futures.append(\n                        client.submit(\n                            worker_fn,\n                            workers[i],\n                            m._create_fn_args(workers[i]),\n                            pure=False,\n                            workers=[workers[i]],\n                        )\n                    )\n                client.gather(futures)\n\n                has_what = client.has_what()\n                cnt = 0\n                data = set()\n                for k, v in has_what.items():\n                    for d in v:\n                        cnt += 1\n                        data.add(d)\n\n                assert len(data) == cnt\n                # Subtract the on disk resource from each worker\n                assert cnt - n_workers == n_partitions\n\n    def test_data_initialization(self, client: \"Client\") -> None:\n        \"\"\"assert that we don't create duplicated DMatrix\"\"\"\n        from sklearn.datasets import load_digits\n\n        X, y = load_digits(return_X_y=True)\n        X, _, y, _ = train_test_split(\n            X, y, train_size=200, stratify=y, random_state=1994\n        )\n        X, y = dd.from_array(X, chunksize=32), dd.from_array(y, chunksize=32)\n        validate_data_initialization(\n            dxgb.DaskQuantileDMatrix, dxgb.DaskXGBClassifier, X, y\n        )\n\n    def run_shap(\n        self, X: Any, y: Any, params: Dict[str, Any], client: \"Client\"\n    ) -> None:\n        rows = X.shape[0]\n        cols = X.shape[1]\n\n        def assert_shape(shape: Tuple[int, ...]) -> None:\n            assert shape[0] == rows\n            if \"num_class\" in params.keys():\n                assert shape[1] == params[\"num_class\"]\n                assert shape[2] == cols + 1\n            else:\n                assert shape[1] == cols + 1\n\n        X, y = da.from_array(X, chunks=(32, -1)), da.from_array(y, chunks=32)\n        Xy = dxgb.DaskDMatrix(client, X, y)\n        booster = dxgb.train(client, params, Xy, num_boost_round=10)[\"booster\"]\n\n        test_Xy = dxgb.DaskDMatrix(client, X, y)\n\n        shap = dxgb.predict(client, booster, test_Xy, pred_contribs=True).compute()\n        margin = dxgb.predict(client, booster, test_Xy, output_margin=True).compute()\n        assert_shape(shap.shape)\n        assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-5, 1e-5)\n\n        shap = dxgb.predict(client, booster, X, pred_contribs=True).compute()\n        margin = dxgb.predict(client, booster, X, output_margin=True).compute()\n        assert_shape(shap.shape)\n        assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-5, 1e-5)\n\n        if \"num_class\" not in params.keys():\n            X = dd.from_dask_array(X).repartition(npartitions=32)\n            y = dd.from_dask_array(y).repartition(npartitions=32)\n            shap_df = dxgb.predict(\n                client, booster, X, pred_contribs=True, validate_features=False\n            ).compute()\n            assert_shape(shap_df.shape)\n            assert np.allclose(\n                np.sum(shap_df, axis=len(shap_df.shape) - 1), margin, 1e-5, 1e-5\n            )\n\n    def run_shap_cls_sklearn(self, X: Any, y: Any, client: \"Client\") -> None:\n        X, y = da.from_array(X, chunks=(32, -1)), da.from_array(y, chunks=32)\n        cls = dxgb.DaskXGBClassifier(n_estimators=4)\n        cls.client = client\n        cls.fit(X, y)\n        booster = cls.get_booster()\n\n        test_Xy = dxgb.DaskDMatrix(client, X, y)\n\n        shap = dxgb.predict(client, booster, test_Xy, pred_contribs=True).compute()\n        margin = dxgb.predict(client, booster, test_Xy, output_margin=True).compute()\n        assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-5, 1e-5)\n\n        shap = dxgb.predict(client, booster, X, pred_contribs=True).compute()\n        margin = dxgb.predict(client, booster, X, output_margin=True).compute()\n        assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-5, 1e-5)\n\n    def test_shap(self, client: \"Client\") -> None:\n        from sklearn.datasets import load_diabetes, load_iris\n\n        X, y = load_diabetes(return_X_y=True)\n        params: Dict[str, Any] = {\"objective\": \"reg:squarederror\"}\n        self.run_shap(X, y, params, client)\n\n        X, y = load_iris(return_X_y=True)\n        params = {\"objective\": \"multi:softmax\", \"num_class\": 3}\n        self.run_shap(X, y, params, client)\n\n        params = {\"objective\": \"multi:softprob\", \"num_class\": 3}\n        self.run_shap(X, y, params, client)\n\n        self.run_shap_cls_sklearn(X, y, client)\n\n    def run_shap_interactions(\n        self, X: Any, y: Any, params: Dict[str, Any], client: \"Client\"\n    ) -> None:\n        rows = X.shape[0]\n        cols = X.shape[1]\n        X, y = da.from_array(X, chunks=(32, -1)), da.from_array(y, chunks=32)\n\n        Xy = dxgb.DaskDMatrix(client, X, y)\n        booster = dxgb.train(client, params, Xy, num_boost_round=10)[\"booster\"]\n\n        test_Xy = dxgb.DaskDMatrix(client, X, y)\n\n        shap = dxgb.predict(client, booster, test_Xy, pred_interactions=True).compute()\n\n        assert len(shap.shape) == 3\n        assert shap.shape[0] == rows\n        assert shap.shape[1] == cols + 1\n        assert shap.shape[2] == cols + 1\n\n        margin = dxgb.predict(client, booster, test_Xy, output_margin=True).compute()\n        assert np.allclose(\n            np.sum(shap, axis=(len(shap.shape) - 1, len(shap.shape) - 2)),\n            margin,\n            1e-5,\n            1e-5,\n        )\n\n    def test_shap_interactions(self, client: \"Client\") -> None:\n        from sklearn.datasets import load_diabetes\n\n        X, y = load_diabetes(return_X_y=True)\n        params = {\"objective\": \"reg:squarederror\"}\n        self.run_shap_interactions(X, y, params, client)\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    def test_sklearn_io(self, client: \"Client\", tmp_path: Path) -> None:\n        from sklearn.datasets import load_digits\n\n        X_, y_ = load_digits(return_X_y=True)\n        X, y = da.from_array(X_), da.from_array(y_)\n        cls = dxgb.DaskXGBClassifier(n_estimators=10)\n        cls.client = client\n        cls.fit(X, y)\n        predt_0 = cls.predict(X)\n        proba_0 = cls.predict_proba(X)\n\n        path = tmp_path / \"model.pkl\"\n        with open(path, \"wb\") as fd:\n            pickle.dump(cls, fd)\n\n        with open(path, \"rb\") as fd:\n            cls = pickle.load(fd)\n        predt_1 = cls.predict(X)\n        proba_1 = cls.predict_proba(X)\n        np.testing.assert_allclose(predt_0.compute(), predt_1.compute())\n        np.testing.assert_allclose(proba_0.compute(), proba_1.compute())\n\n        path = tmp_path / \"cls.json\"\n        cls.save_model(path)\n\n        cls = dxgb.DaskXGBClassifier()\n        cls.load_model(path)\n        assert cls.n_classes_ == 10\n        predt_2 = cls.predict(X)\n        proba_2 = cls.predict_proba(X)\n\n        np.testing.assert_allclose(predt_0.compute(), predt_2.compute())\n        np.testing.assert_allclose(proba_0.compute(), proba_2.compute())\n\n        # Use single node to load\n        cls = xgb.XGBClassifier()\n        cls.load_model(path)\n        assert cls.n_classes_ == 10\n        predt_3 = cls.predict(X_)\n        proba_3 = cls.predict_proba(X_)\n\n        np.testing.assert_allclose(predt_0.compute(), predt_3)\n        np.testing.assert_allclose(proba_0.compute(), proba_3)\n\n\ndef test_dask_unsupported_features(client: \"Client\") -> None:\n    X, y, _ = generate_array()\n    # gblinear doesn't support distributed training.\n    with pytest.raises(NotImplementedError, match=\"gblinear\"):\n        dxgb.train(client, {\"booster\": \"gblinear\"}, dxgb.DaskDMatrix(client, X, y))\n\n\ndef test_parallel_submits(client: \"Client\") -> None:\n    \"\"\"Test for running multiple train simultaneously from single clients.\"\"\"\n    try:\n        from distributed import MultiLock  # NOQA\n    except ImportError:\n        pytest.skip(\"`distributed.MultiLock' is not available\")\n\n    from sklearn.datasets import load_digits\n\n    futures = []\n    workers = tm.dask.get_client_workers(client)\n    n_submits = len(workers)\n    for i in range(n_submits):\n        X_, y_ = load_digits(return_X_y=True)\n        X_, _, y_, _ = train_test_split(\n            X_, y_, train_size=300, stratify=y_, random_state=1994\n        )\n        X = dd.from_array(X_, chunksize=32)\n        y = dd.from_array(y_, chunksize=32)\n        cls = dxgb.DaskXGBClassifier(\n            verbosity=1,\n            n_estimators=i + 1,\n            eval_metric=\"merror\",\n        )\n        f = client.submit(cls.fit, X, y, pure=False)\n        futures.append(f)\n\n    classifiers = client.gather(futures)\n    assert len(classifiers) == n_submits\n    for i, cls in enumerate(classifiers):\n        assert cls.get_booster().num_boosted_rounds() == i + 1\n\n\ndef run_tree_stats(client: Client, tree_method: str, device: str) -> str:\n    \"\"\"assert that different workers count dosn't affect summ statistic's on root\"\"\"\n\n    def dask_train(\n        X: np.ndarray, y: np.ndarray, num_obs: int, num_features: int\n    ) -> Dict[str, Any]:\n        chunk_size = 100\n        X = da.from_array(X, chunks=(chunk_size, num_features))\n        y = da.from_array(y.reshape(num_obs, 1), chunks=(chunk_size, 1))\n        dtrain = dxgb.DaskDMatrix(client, X, y)\n\n        output = dxgb.train(\n            client,\n            {\n                \"verbosity\": 0,\n                \"tree_method\": tree_method,\n                \"device\": device,\n                \"objective\": \"reg:squarederror\",\n                \"max_depth\": 3,\n            },\n            dtrain,\n            num_boost_round=1,\n        )\n        dump_model = output[\"booster\"].get_dump(with_stats=True, dump_format=\"json\")[0]\n        return json.loads(dump_model)\n\n    num_obs = 1000\n    num_features = 10\n    X, y = make_regression(num_obs, num_features, random_state=777)\n    model = dask_train(X, y, num_obs, num_features)\n\n    # asserts children have correct cover.\n    stack = [model]\n    while stack:\n        node: dict = stack.pop()\n        if \"leaf\" in node.keys():\n            continue\n        cover = 0\n        for c in node[\"children\"]:\n            cover += c[\"cover\"]\n            stack.append(c)\n        assert cover == node[\"cover\"]\n\n    return model[\"cover\"]\n\n\n@pytest.mark.parametrize(\"tree_method\", [\"hist\", \"approx\"])\ndef test_tree_stats(\n    tree_method: str, client_one_worker: \"Client\", client: \"Client\"\n) -> None:\n    with client_one_worker.as_current():\n        local = run_tree_stats(client_one_worker, tree_method, \"cpu\")\n    with client.as_current():\n        distributed = run_tree_stats(client, tree_method, \"cpu\")\n\n    assert local == distributed\n\n\ndef test_parallel_submit_multi_clients() -> None:\n    \"\"\"Test for running multiple train simultaneously from multiple clients.\"\"\"\n    try:\n        from distributed import MultiLock  # NOQA\n    except ImportError:\n        pytest.skip(\"`distributed.MultiLock' is not available\")\n\n    from sklearn.datasets import load_digits\n\n    with LocalCluster(n_workers=4, dashboard_address=\":0\") as cluster:\n        with Client(cluster) as client:\n            workers = tm.dask.get_client_workers(client)\n\n        n_submits = len(workers)\n        assert n_submits == 4\n        futures = []\n\n        for i in range(n_submits):\n            client = Client(cluster)\n            X_, y_ = load_digits(return_X_y=True)\n            X_ += 1.0\n            X = dd.from_array(X_, chunksize=32)\n            y = dd.from_array(y_, chunksize=32)\n            cls = dxgb.DaskXGBClassifier(\n                verbosity=1,\n                n_estimators=i + 1,\n                eval_metric=\"merror\",\n            )\n            f = client.submit(cls.fit, X, y, pure=False)\n            futures.append((client, f))\n\n        t_futures = []\n        with ThreadPoolExecutor(max_workers=16) as e:\n            for i in range(n_submits):\n\n                def _() -> dxgb.DaskXGBClassifier:\n                    return futures[i][0].compute(futures[i][1]).result()\n\n                tf = e.submit(_)\n                t_futures.append(tf)\n\n        for i, tf in enumerate(t_futures):\n            assert tf.result().get_booster().num_boosted_rounds() == i + 1\n\n\ndef test_init_estimation(client: Client) -> None:\n    check_init_estimation(\"hist\", \"cpu\", client)\n\n\n@pytest.mark.parametrize(\"tree_method\", [\"hist\", \"approx\"])\ndef test_uneven_nan(tree_method: str, client: \"Client\") -> None:\n    n_workers = 2\n    check_uneven_nan(client, tree_method, \"cpu\", n_workers)\n\n\nclass TestDaskCallbacks:\n    @pytest.mark.skipif(**tm.no_sklearn())\n    def test_early_stopping(self, client: \"Client\") -> None:\n        from sklearn.datasets import load_breast_cancer\n\n        X, y = load_breast_cancer(return_X_y=True)\n        X, y = da.from_array(X), da.from_array(y)\n        m = dxgb.DaskDMatrix(client, X, y)\n\n        valid = dxgb.DaskDMatrix(client, X, y)\n        early_stopping_rounds = 5\n        booster = dxgb.train(\n            client,\n            {\n                \"objective\": \"binary:logistic\",\n                \"eval_metric\": \"error\",\n                \"tree_method\": \"hist\",\n            },\n            m,\n            evals=[(valid, \"Valid\")],\n            num_boost_round=1000,\n            early_stopping_rounds=early_stopping_rounds,\n        )[\"booster\"]\n        assert hasattr(booster, \"best_score\")\n        dump = booster.get_dump(dump_format=\"json\")\n        assert len(dump) - booster.best_iteration == early_stopping_rounds + 1\n\n        valid_X, valid_y = load_breast_cancer(return_X_y=True)\n        valid_X, valid_y = da.from_array(valid_X), da.from_array(valid_y)\n        cls = dxgb.DaskXGBClassifier(\n            objective=\"binary:logistic\",\n            tree_method=\"hist\",\n            n_estimators=1000,\n            early_stopping_rounds=early_stopping_rounds,\n        )\n        cls.client = client\n        cls.fit(\n            X,\n            y,\n            eval_set=[(valid_X, valid_y)],\n        )\n        booster = cls.get_booster()\n        dump = booster.get_dump(dump_format=\"json\")\n        assert len(dump) - booster.best_iteration == early_stopping_rounds + 1\n\n        # Specify the metric\n        cls = dxgb.DaskXGBClassifier(\n            objective=\"binary:logistic\",\n            tree_method=\"hist\",\n            n_estimators=1000,\n            early_stopping_rounds=early_stopping_rounds,\n            eval_metric=\"error\",\n        )\n        cls.client = client\n        cls.fit(\n            X,\n            y,\n            eval_set=[(valid_X, valid_y)],\n        )\n        assert tm.non_increasing(cls.evals_result()[\"validation_0\"][\"error\"])\n        booster = cls.get_booster()\n        dump = booster.get_dump(dump_format=\"json\")\n        assert len(cls.evals_result()[\"validation_0\"][\"error\"]) < 20\n        assert len(dump) - booster.best_iteration == early_stopping_rounds + 1\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    def test_early_stopping_custom_eval(self, client: \"Client\") -> None:\n        from sklearn.datasets import load_breast_cancer\n\n        X, y = load_breast_cancer(return_X_y=True)\n        X, y = da.from_array(X), da.from_array(y)\n        m = dxgb.DaskDMatrix(client, X, y)\n\n        def eval_error_metric(\n            predt: np.ndarray, dtrain: xgb.DMatrix\n        ) -> Tuple[str, np.float64]:\n            return tm.eval_error_metric(predt, dtrain, rev_link=False)\n\n        valid = dxgb.DaskDMatrix(client, X, y)\n        early_stopping_rounds = 5\n        booster = dxgb.train(\n            client,\n            {\n                \"objective\": \"binary:logistic\",\n                \"eval_metric\": \"error\",\n                \"tree_method\": \"hist\",\n            },\n            m,\n            evals=[(m, \"Train\"), (valid, \"Valid\")],\n            custom_metric=eval_error_metric,\n            num_boost_round=1000,\n            early_stopping_rounds=early_stopping_rounds,\n        )[\"booster\"]\n        assert hasattr(booster, \"best_score\")\n        dump = booster.get_dump(dump_format=\"json\")\n        assert len(dump) - booster.best_iteration == early_stopping_rounds + 1\n\n        valid_X, valid_y = load_breast_cancer(return_X_y=True)\n        valid_X, valid_y = da.from_array(valid_X), da.from_array(valid_y)\n        cls = dxgb.DaskXGBClassifier(\n            objective=\"binary:logistic\",\n            tree_method=\"hist\",\n            n_estimators=1000,\n            eval_metric=tm.eval_error_metric_skl,\n            early_stopping_rounds=early_stopping_rounds,\n        )\n        cls.client = client\n        cls.fit(\n            X,\n            y,\n            eval_set=[(valid_X, valid_y)],\n        )\n        booster = cls.get_booster()\n        dump = booster.get_dump(dump_format=\"json\")\n        assert len(dump) - booster.best_iteration == early_stopping_rounds + 1\n\n    @pytest.mark.skipif(**tm.no_sklearn())\n    def test_callback(self, client: \"Client\", tmp_path: Path) -> None:\n        from sklearn.datasets import load_breast_cancer\n\n        X, y = load_breast_cancer(return_X_y=True)\n        X, y = da.from_array(X), da.from_array(y)\n\n        cls = dxgb.DaskXGBClassifier(\n            objective=\"binary:logistic\",\n            tree_method=\"hist\",\n            n_estimators=10,\n            callbacks=[\n                xgb.callback.TrainingCheckPoint(\n                    directory=tmp_path, interval=1, name=\"model\"\n                )\n            ],\n        )\n        cls.client = client\n        cls.fit(\n            X,\n            y,\n        )\n        for i in range(1, 10):\n            assert (\n                tmp_path / f\"model_{i}.{xgb.callback.TrainingCheckPoint.default_format}\"\n            ).exists()\n\n\n@gen_cluster(\n    client=True,\n    clean_kwargs={\"processes\": False, \"threads\": False},\n    allow_unclosed=True,\n)\n@pytest.mark.skip(reason=\"dmlc/xgboost#11405: test_worker_left is flaky\")\nasync def test_worker_left(c: Client, s: Scheduler, a: Worker, b: Worker) -> None:\n    async with Worker(s.address):\n        dx = da.random.random((1000, 10)).rechunk(chunks=(10, None))\n        dy = da.random.random((1000,)).rechunk(chunks=(10,))\n        d_train = await dxgb.DaskDMatrix(\n            c,\n            dx,\n            dy,\n        )\n    await async_poll_for(lambda: len(s.workers) == 2, timeout=5)\n    with pytest.raises(RuntimeError, match=\"Missing\"):\n        await dxgb.train(\n            c,\n            {},\n            d_train,\n            evals=[(d_train, \"train\")],\n        )\n\n\n@gen_cluster(\n    client=True,\n    Worker=Nanny,\n    clean_kwargs={\"processes\": False, \"threads\": False},\n    allow_unclosed=True,\n)\n@pytest.mark.skip\nasync def test_worker_restarted(c: Client, s: Scheduler, a: Nanny, b: Nanny) -> None:\n    dx = da.random.random((1000, 10)).rechunk(chunks=(10, None))\n    dy = da.random.random((1000,)).rechunk(chunks=(10,))\n    d_train = await dxgb.DaskDMatrix(\n        c,\n        dx,\n        dy,\n    )\n    await c.restart_workers([a.worker_address])\n    with pytest.raises(RuntimeError, match=\"Missing\"):\n        await dxgb.train(\n            c,\n            {},\n            d_train,\n            evals=[(d_train, \"train\")],\n        )\n\n\ndef test_doc_link() -> None:\n    for est in [\n        dxgb.DaskXGBRegressor(),\n        dxgb.DaskXGBClassifier(),\n        dxgb.DaskXGBRanker(),\n        dxgb.DaskXGBRFRegressor(),\n        dxgb.DaskXGBRFClassifier(),\n    ]:\n        name = est.__class__.__name__\n        link = est._get_doc_link()\n        assert f\"xgboost.dask.{name}\" in link\n"
  },
  {
    "path": "tests/test_distributed/test_with_spark/__init__.py",
    "content": ""
  },
  {
    "path": "tests/test_distributed/test_with_spark/discover_gpu.sh",
    "content": "#!/usr/bin/env bash\nset -euo pipefail\n\naddresses=$(\n  nvidia-smi --query-gpu=index --format=csv,noheader \\\n    | sed '/^[[:space:]]*$/d; s/^[[:space:]]*//; s/[[:space:]]*$//; s/.*/\"&\"/' \\\n    | paste -sd,\n)\nprintf '{\"name\":\"gpu\",\"addresses\":[%s]}\\n' \"${addresses}\"\n"
  },
  {
    "path": "tests/test_distributed/test_with_spark/test_data.py",
    "content": "from typing import List\n\nimport numpy as np\nimport pandas as pd\nimport pytest\nfrom xgboost import testing as tm\n\npytestmark = [pytest.mark.skipif(**tm.no_spark())]\n\nfrom xgboost import DMatrix, QuantileDMatrix\nfrom xgboost.spark.data import (\n    _read_csr_matrix_from_unwrapped_spark_vec,\n    alias,\n    create_dmatrix_from_partitions,\n    stack_series,\n)\n\n\ndef test_stack() -> None:\n    a = pd.DataFrame({\"a\": [[1, 2], [3, 4]]})\n    b = stack_series(a[\"a\"])\n    assert b.shape == (2, 2)\n\n    a = pd.DataFrame({\"a\": [[1], [3]]})\n    b = stack_series(a[\"a\"])\n    assert b.shape == (2, 1)\n\n    a = pd.DataFrame({\"a\": [np.array([1, 2]), np.array([3, 4])]})\n    b = stack_series(a[\"a\"])\n    assert b.shape == (2, 2)\n\n    a = pd.DataFrame({\"a\": [np.array([1]), np.array([3])]})\n    b = stack_series(a[\"a\"])\n    assert b.shape == (2, 1)\n\n\ndef run_dmatrix_ctor(is_feature_cols: bool, is_qdm: bool, on_gpu: bool) -> None:\n    rng = np.random.default_rng(0)\n    dfs: List[pd.DataFrame] = []\n    n_features = 16\n    n_samples_per_batch = 16\n    n_batches = 10\n    feature_types = [\"float\"] * n_features\n\n    for i in range(n_batches):\n        X = rng.normal(loc=0, size=256).reshape(n_samples_per_batch, n_features)\n        y = rng.normal(loc=0, size=n_samples_per_batch)\n        m = rng.normal(loc=0, size=n_samples_per_batch)\n        w = rng.normal(loc=0.5, scale=0.5, size=n_samples_per_batch)\n        w -= w.min()\n\n        valid = rng.binomial(n=1, p=0.5, size=16).astype(np.bool_)\n\n        df = pd.DataFrame(\n            {alias.label: y, alias.margin: m, alias.weight: w, alias.valid: valid}\n        )\n        if is_feature_cols:\n            for j in range(X.shape[1]):\n                df[f\"feat-{j}\"] = pd.Series(X[:, j])\n        else:\n            df[alias.data] = pd.Series(list(X))\n        dfs.append(df)\n\n    kwargs = {\"feature_types\": feature_types}\n    device_id = 0 if on_gpu else None\n    cols = [f\"feat-{i}\" for i in range(n_features)]\n    feature_cols = cols if is_feature_cols else None\n    train_Xy, valid_Xy = create_dmatrix_from_partitions(\n        iterator=iter(dfs),\n        feature_cols=feature_cols,\n        dev_ordinal=device_id,\n        use_qdm=is_qdm,\n        kwargs=kwargs,\n        enable_sparse_data_optim=False,\n        has_validation_col=True,\n    )\n\n    if is_qdm:\n        assert isinstance(train_Xy, QuantileDMatrix)\n        assert isinstance(valid_Xy, QuantileDMatrix)\n    else:\n        assert not isinstance(train_Xy, QuantileDMatrix)\n        assert isinstance(train_Xy, DMatrix)\n        assert not isinstance(valid_Xy, QuantileDMatrix)\n        assert isinstance(valid_Xy, DMatrix)\n\n    assert valid_Xy is not None\n    assert valid_Xy.num_row() + train_Xy.num_row() == n_samples_per_batch * n_batches\n    assert train_Xy.num_col() == n_features\n    assert valid_Xy.num_col() == n_features\n\n    df = pd.concat(dfs, axis=0)\n    df_train = df.loc[~df[alias.valid], :]\n    df_valid = df.loc[df[alias.valid], :]\n\n    assert df_train.shape[0] == train_Xy.num_row()\n    assert df_valid.shape[0] == valid_Xy.num_row()\n\n    # margin\n    np.testing.assert_allclose(\n        df_train[alias.margin].to_numpy(), train_Xy.get_base_margin()\n    )\n    np.testing.assert_allclose(\n        df_valid[alias.margin].to_numpy(), valid_Xy.get_base_margin()\n    )\n    # weight\n    np.testing.assert_allclose(df_train[alias.weight].to_numpy(), train_Xy.get_weight())\n    np.testing.assert_allclose(df_valid[alias.weight].to_numpy(), valid_Xy.get_weight())\n    # label\n    np.testing.assert_allclose(df_train[alias.label].to_numpy(), train_Xy.get_label())\n    np.testing.assert_allclose(df_valid[alias.label].to_numpy(), valid_Xy.get_label())\n\n    np.testing.assert_equal(train_Xy.feature_types, feature_types)\n    np.testing.assert_equal(valid_Xy.feature_types, feature_types)\n\n\n@pytest.mark.parametrize(\n    \"is_feature_cols,is_qdm\",\n    [(True, True), (True, False), (False, True), (False, False)],\n)\ndef test_dmatrix_ctor(is_feature_cols: bool, is_qdm: bool) -> None:\n    run_dmatrix_ctor(is_feature_cols, is_qdm, on_gpu=False)\n\n\n@pytest.mark.skipif(**tm.no_cudf())\n@tm.timeout(120)\n@pytest.mark.parametrize(\n    \"is_feature_cols,is_qdm\",\n    [(True, True), (True, False), (False, True), (False, False)],\n)\ndef test_dmatrix_ctor_gpu(is_feature_cols: bool, is_qdm: bool) -> None:\n    run_dmatrix_ctor(is_feature_cols, is_qdm, on_gpu=True)\n\n\ndef test_read_csr_matrix_from_unwrapped_spark_vec() -> None:\n    from scipy.sparse import csr_matrix\n\n    pd1 = pd.DataFrame(\n        {\n            \"featureVectorType\": [0, 1, 1, 0],\n            \"featureVectorSize\": [3, None, None, 3],\n            \"featureVectorIndices\": [\n                np.array([0, 2], dtype=np.int32),\n                None,\n                None,\n                np.array([1, 2], dtype=np.int32),\n            ],\n            \"featureVectorValues\": [\n                np.array([3.0, 0.0], dtype=np.float64),\n                np.array([13.0, 14.0, 0.0], dtype=np.float64),\n                np.array([0.0, 24.0, 25.0], dtype=np.float64),\n                np.array([0.0, 35.0], dtype=np.float64),\n            ],\n        }\n    )\n    sm = _read_csr_matrix_from_unwrapped_spark_vec(pd1)\n    assert isinstance(sm, csr_matrix)\n\n    np.testing.assert_array_equal(\n        sm.data, [3.0, 0.0, 13.0, 14.0, 0.0, 0.0, 24.0, 25.0, 0.0, 35.0]\n    )\n    np.testing.assert_array_equal(sm.indptr, [0, 2, 5, 8, 10])\n    np.testing.assert_array_equal(sm.indices, [0, 2, 0, 1, 2, 0, 1, 2, 1, 2])\n    assert sm.shape == (4, 3)\n"
  },
  {
    "path": "tests/test_distributed/test_with_spark/test_spark.py",
    "content": "import logging\nimport os\nimport subprocess\nfrom collections import namedtuple\nfrom pathlib import Path\nfrom typing import Generator, Iterable, List\n\nimport numpy as np\nimport pytest\nimport xgboost as xgb\nfrom pyspark import SparkConf\nfrom pyspark.ml import Pipeline, PipelineModel\nfrom pyspark.ml.evaluation import BinaryClassificationEvaluator\nfrom pyspark.ml.feature import VectorAssembler\nfrom pyspark.ml.functions import vector_to_array\nfrom pyspark.ml.linalg import Vectors\nfrom pyspark.ml.tuning import CrossValidator, ParamGridBuilder\nfrom pyspark.sql import SparkSession\nfrom pyspark.sql import functions as spark_sql_func\nfrom xgboost import XGBClassifier, XGBRegressor\nfrom xgboost import testing as tm\nfrom xgboost.callback import LearningRateScheduler\nfrom xgboost.collective import Config\nfrom xgboost.spark import (\n    SparkXGBClassifier,\n    SparkXGBClassifierModel,\n    SparkXGBRanker,\n    SparkXGBRegressor,\n    SparkXGBRegressorModel,\n)\nfrom xgboost.spark.utils import _get_max_num_concurrent_tasks\nfrom xgboost.testing.collective import get_avail_port\n\nlogging.getLogger(\"py4j\").setLevel(logging.INFO)\n\npytestmark = [tm.timeout(60), pytest.mark.skipif(**tm.no_spark())]\n\n\ndef _to_bool(value: object) -> bool:\n    if isinstance(value, bool):\n        return value\n    return str(value).lower() in {\"1\", \"true\", \"on\", \"yes\"}\n\n\ndef _probe_gpu_addresses() -> List[str]:\n    if tm.no_spark()[\"condition\"]:\n        return []\n\n    info = xgb.build_info()\n    use_cuda = _to_bool(info.get(\"USE_CUDA\", False))\n    if not use_cuda:\n        return []\n\n    try:\n        completed = subprocess.run(\n            [\"nvidia-smi\", \"--query-gpu=index\", \"--format=csv,noheader\"],\n            capture_output=True,\n            text=True,\n            check=False,\n        )\n    except OSError:\n        return []\n\n    if completed.returncode != 0:\n        return []\n\n    addresses = [line.strip() for line in completed.stdout.splitlines() if line.strip()]\n    return addresses\n\n\n_GPU_ADDRESSES = _probe_gpu_addresses()\n_NUM_GPUS = len(_GPU_ADDRESSES)\n_GPU_DISCOVERY_SCRIPT = os.path.join(os.path.dirname(__file__), \"discover_gpu.sh\")\n_HAS_GPU_SPARK_MODE = bool(_GPU_ADDRESSES)\n_GPU_SKIP_REASON = (\n    \"local_cluster_gpu requires CUDA-enabled XGBoost and visible GPUs via nvidia-smi.\"\n)\n\nSPARK_MODES = [\n    pytest.param(\"local\", id=\"local\"),\n    pytest.param(\"local_cluster\", id=\"local_cluster\"),\n    pytest.param(\n        \"local_cluster_gpu\",\n        id=\"local_cluster_gpu\",\n        marks=[\n            tm.timeout(240),\n            pytest.mark.skipif(not _HAS_GPU_SPARK_MODE, reason=_GPU_SKIP_REASON),\n        ],\n    ),\n]\n\n\ndef no_sparse_unwrap() -> tm.PytestSkip:\n    try:\n        from pyspark.sql.functions import unwrap_udt\n\n    except ImportError:\n        return {\"reason\": \"PySpark<3.4\", \"condition\": True}\n\n    return {\"reason\": \"PySpark<3.4\", \"condition\": False}\n\n\ndef _spark_test_mode(spark: SparkSession) -> str:\n    return spark.sparkContext.getConf().get(\"spark.xgboost.test.mode\")\n\n\ndef _spark_test_device(spark: SparkSession) -> str:\n    if _spark_test_mode(spark) == \"local_cluster_gpu\":\n        return \"cuda\"\n    return \"cpu\"\n\n\n@pytest.fixture(scope=\"module\")\ndef spark(request: pytest.FixtureRequest) -> Generator[SparkSession, None, None]:\n    mode = getattr(request, \"param\", \"local\")\n    if mode not in {\"local\", \"local_cluster\", \"local_cluster_gpu\"}:\n        raise ValueError(f\"Unknown Spark test mode: {mode}\")\n    os.environ[\"XGBOOST_PYSPARK_SHARED_SESSION\"] = \"1\"\n    config = {\n        \"spark.master\": \"local[4]\",\n        \"spark.python.worker.reuse\": \"true\",\n        \"spark.driver.host\": \"127.0.0.1\",\n        \"spark.task.maxFailures\": \"1\",\n        \"spark.sql.shuffle.partitions\": \"4\",\n        \"spark.sql.execution.pyspark.udf.simplifiedTraceback.enabled\": \"false\",\n        \"spark.sql.pyspark.jvmStacktrace.enabled\": \"true\",\n        \"spark.ui.enabled\": \"false\",\n        \"spark.xgboost.test.mode\": mode,\n    }\n    if mode == \"local_cluster\":\n        config.update(\n            {\n                \"spark.master\": \"local-cluster[2, 1, 1024]\",\n                \"spark.cores.max\": \"2\",\n                \"spark.task.cpus\": \"1\",\n                \"spark.executor.cores\": \"1\",\n            }\n        )\n    elif mode == \"local_cluster_gpu\":\n        # Use all available GPUs with a single worker node.\n        config.update(\n            {\n                \"spark.master\": f\"local-cluster[1, {_NUM_GPUS}, 1024]\",\n                \"spark.python.worker.reuse\": \"false\",\n                \"spark.cores.max\": str(_NUM_GPUS),\n                \"spark.task.cpus\": \"1\",\n                \"spark.executor.cores\": str(_NUM_GPUS),\n                \"spark.default.parallelism\": str(_NUM_GPUS),\n                \"spark.worker.resource.gpu.amount\": str(_NUM_GPUS),\n                \"spark.task.resource.gpu.amount\": \"1\",\n                \"spark.executor.resource.gpu.amount\": str(_NUM_GPUS),\n                \"spark.worker.resource.gpu.discoveryScript\": _GPU_DISCOVERY_SCRIPT,\n            }\n        )\n\n    builder = SparkSession.builder.appName(\"XGBoost PySpark Python API Tests\")\n    for k, v in config.items():\n        builder.config(k, v)\n    logging.getLogger(\"pyspark\").setLevel(logging.INFO)\n    sess = builder.getOrCreate()\n    if mode in {\"local_cluster\", \"local_cluster_gpu\"}:\n        # Block until workers are connected.\n        num_slots = sess.sparkContext.defaultParallelism\n        (\n            sess.sparkContext.parallelize(range(num_slots), num_slots)\n            .barrier()\n            .mapPartitions(lambda _: [])\n            .collect()\n        )\n    try:\n        yield sess\n    finally:\n        sess.stop()\n        sess.sparkContext.stop()\n        os.environ.pop(\"XGBOOST_PYSPARK_SHARED_SESSION\", None)\n\n\n@pytest.fixture(scope=\"module\")\ndef num_workers(spark: SparkSession) -> int:\n    if _spark_test_mode(spark) == \"local_cluster_gpu\":\n        return _NUM_GPUS\n    return _get_max_num_concurrent_tasks(spark.sparkContext)\n\n\nRegData = namedtuple(\n    \"RegData\",\n    (\n        \"X_train\",\n        \"X_test\",\n        \"y_train\",\n        \"y_test\",\n        \"weights\",\n        \"base_margin\",\n        \"is_val\",\n        \"X\",\n        \"y\",\n        \"df\",\n    ),\n)\n\n\nclass TestRegressor:\n    @pytest.fixture(scope=\"class\")\n    def reg_data(self, spark: SparkSession) -> RegData:\n        rng = np.random.default_rng(seed=42)\n        X = rng.random((100, 10))\n        # Make odd rows sparse with some random values to test both dense and sparse paths.\n        X[1::2, :] = 0.0\n        X[1::2, 1] = rng.random(len(X[1::2, 1]))\n        X[1::2, 2] = rng.random(len(X[1::2, 2]))\n        y = rng.random(100)\n        w = rng.random(100)\n        base_margin = rng.random(100)\n        is_val = rng.random(100) < 0.2\n        X_train, X_test = X[~is_val], X[is_val]\n        y_train, y_test = y[~is_val], y[is_val]\n        rows = []\n        for i in range(len(y)):\n            vec = (\n                Vectors.dense(X[i, :])\n                if i % 2 == 0\n                else Vectors.sparse(X.shape[1], {1: float(X[i, 1]), 2: float(X[i, 2])})\n            )\n            rows.append(\n                (\n                    i,\n                    vec,\n                    float(y[i]),\n                    float(w[i]),\n                    float(base_margin[i]),\n                    bool(is_val[i]),\n                )\n            )\n        df = spark.createDataFrame(\n            rows, [\"row_id\", \"features\", \"label\", \"weight\", \"base_margin\", \"is_val\"]\n        )\n        return RegData(\n            X_train, X_test, y_train, y_test, w, base_margin, is_val, X, y, df\n        )\n\n    @pytest.mark.parametrize(\"spark\", SPARK_MODES, indirect=True)\n    def test_regressor(\n        self, spark: SparkSession, reg_data: RegData, num_workers: int\n    ) -> None:\n        train_rows = np.where(~reg_data.is_val)[0]\n        validation_rows = np.where(reg_data.is_val)[0]\n        device = _spark_test_device(spark)\n\n        reg_param = {\n            \"n_estimators\": 10,\n            \"max_depth\": 5,\n            \"objective\": \"reg:squarederror\",\n            \"max_bin\": 9,\n            \"eval_metric\": \"rmse\",\n            \"early_stopping_rounds\": 1,\n            \"device\": device,\n        }\n        reg = XGBRegressor(**reg_param).fit(\n            reg_data.X_train,\n            reg_data.y_train,\n            sample_weight=reg_data.weights[train_rows],\n            eval_set=[(reg_data.X_test, reg_data.y_test)],\n            sample_weight_eval_set=[reg_data.weights[validation_rows]],\n        )\n        spark_regressor = SparkXGBRegressor(\n            pred_contrib_col=\"pred_contribs\",\n            weight_col=\"weight\",\n            validation_indicator_col=\"is_val\",\n            num_workers=num_workers,\n            **reg_param,\n        ).fit(reg_data.df)\n        pred_result = spark_regressor.transform(reg_data.df)\n        preds = (\n            pred_result.orderBy(\"row_id\")\n            .select(\"prediction\")\n            .toPandas()[\"prediction\"]\n            .to_numpy()\n        )\n        pred_contribs = np.array(\n            pred_result.orderBy(\"row_id\")\n            .select(\"pred_contribs\")\n            .toPandas()[\"pred_contribs\"]\n            .tolist()\n        )\n        rounds = reg.get_booster().num_boosted_rounds()\n        iter_range = (0, max(1, min(5, rounds)))\n        spark_iter_regressor = SparkXGBRegressor(\n            weight_col=\"weight\",\n            validation_indicator_col=\"is_val\",\n            iteration_range=iter_range,\n            num_workers=num_workers,\n            **reg_param,\n        ).fit(reg_data.df)\n        iter_preds = (\n            spark_iter_regressor.transform(reg_data.df)\n            .orderBy(\"row_id\")\n            .select(\"prediction\")\n            .toPandas()[\"prediction\"]\n            .to_numpy()\n        )\n\n        score_atol = 1e-2\n        train_history = spark_regressor.training_summary.train_objective_history[\"rmse\"]\n        assert len(train_history) > 0\n        assert np.isfinite(train_history).all()\n        assert np.all(np.diff(train_history) <= 0.0)\n        assert np.allclose(\n            reg.best_score,\n            spark_regressor._xgb_sklearn_model.best_score,\n            atol=score_atol,\n        )\n        assert preds.shape == reg.predict(reg_data.X).shape\n        assert (\n            iter_preds.shape\n            == reg.predict(reg_data.X, iteration_range=iter_range).shape\n        )\n\n        assert np.allclose(pred_contribs.sum(axis=1), preds, rtol=1e-3)\n        assert np.allclose(\n            reg.evals_result()[\"validation_0\"][\"rmse\"],\n            spark_regressor.training_summary.validation_objective_history[\"rmse\"],\n            atol=score_atol,\n        )\n        assert np.allclose(\n            reg.best_score,\n            spark_regressor._xgb_sklearn_model.best_score,\n            atol=score_atol,\n        )\n\n    def test_training_continuation(self, reg_data: RegData) -> None:\n        params = {\n            \"max_depth\": 3,\n            \"objective\": \"reg:squarederror\",\n            \"eval_metric\": \"rmse\",\n        }\n\n        base = SparkXGBRegressor(n_estimators=2, **params).fit(reg_data.df)\n        continued = SparkXGBRegressor(\n            n_estimators=4, xgb_model=base.get_booster(), **params\n        ).fit(reg_data.df)\n\n        preds_base = (\n            base.transform(reg_data.df)\n            .select(\"prediction\")\n            .toPandas()[\"prediction\"]\n            .to_numpy()\n        )\n        preds_cont = (\n            continued.transform(reg_data.df)\n            .select(\"prediction\")\n            .toPandas()[\"prediction\"]\n            .to_numpy()\n        )\n\n        ref_base = XGBRegressor(n_estimators=2, **params).fit(reg_data.X, reg_data.y)\n        ref_cont = XGBRegressor(n_estimators=4, **params).fit(\n            reg_data.X, reg_data.y, xgb_model=ref_base.get_booster()\n        )\n\n        assert np.allclose(preds_cont, ref_cont.predict(reg_data.X), rtol=1e-3)\n        assert not np.allclose(preds_base, preds_cont, rtol=1e-6)\n\n    def test_regressor_with_base_margin(self, reg_data: RegData) -> None:\n        params = {\n            \"n_estimators\": 5,\n            \"max_depth\": 3,\n            \"objective\": \"reg:squarederror\",\n        }\n        spark_model = SparkXGBRegressor(base_margin_col=\"base_margin\", **params).fit(\n            reg_data.df\n        )\n        preds = (\n            spark_model.transform(\n                reg_data.df.select(\"row_id\", \"features\", \"base_margin\")\n            )\n            .orderBy(\"row_id\")\n            .select(\"prediction\")\n            .toPandas()[\"prediction\"]\n            .to_numpy()\n        )\n\n        ref = XGBRegressor(**params).fit(\n            reg_data.X, reg_data.y, base_margin=reg_data.base_margin\n        )\n        expected = ref.predict(reg_data.X, base_margin=reg_data.base_margin)\n\n        assert np.allclose(preds, expected, rtol=1e-3)\n\n    def test_regressor_save_load(self, reg_data: RegData, tmp_path: Path) -> None:\n        train_df = reg_data.df.select(\"features\", \"label\")\n        model = SparkXGBRegressor(n_estimators=5, max_depth=3).fit(train_df)\n        preds_before = (\n            model.transform(train_df)\n            .select(\"prediction\")\n            .toPandas()[\"prediction\"]\n            .to_numpy()\n        )\n\n        path = str(tmp_path / \"spark-xgb-reg-model\")\n        model.save(path)\n        loaded = SparkXGBRegressorModel.load(path)\n        preds_after = (\n            loaded.transform(train_df)\n            .select(\"prediction\")\n            .toPandas()[\"prediction\"]\n            .to_numpy()\n        )\n\n        assert np.allclose(preds_before, preds_after, rtol=1e-6)\n\n    def test_regressor_params(self) -> None:\n        py_reg = SparkXGBRegressor()\n        assert hasattr(py_reg, \"n_estimators\")\n        assert py_reg.n_estimators.parent == py_reg.uid\n        assert not hasattr(py_reg, \"gpu_id\")\n        assert hasattr(py_reg, \"device\")\n        assert py_reg.getOrDefault(py_reg.n_estimators) == 100\n        assert py_reg.getOrDefault(getattr(py_reg, \"objective\")) == \"reg:squarederror\"\n        py_reg2 = SparkXGBRegressor(n_estimators=200)\n        assert py_reg2.getOrDefault(getattr(py_reg2, \"n_estimators\")) == 200\n        py_reg3 = py_reg2.copy({getattr(py_reg2, \"max_depth\"): 10})\n        assert py_reg3.getOrDefault(getattr(py_reg3, \"n_estimators\")) == 200\n        assert py_reg3.getOrDefault(getattr(py_reg3, \"max_depth\")) == 10\n        with pytest.raises(ValueError, match=\"Number of workers\"):\n            SparkXGBRegressor(num_workers=-1)._validate_params()\n        with pytest.raises(ValueError, match=\"Number of workers\"):\n            SparkXGBRegressor(num_workers=0)._validate_params()\n\n    def test_valid_type(self, spark: SparkSession) -> None:\n        df_train = spark.createDataFrame(\n            [\n                (Vectors.dense(1.0, 2.0, 3.0), 0, 0),\n                (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 1, 0),\n                (Vectors.dense(4.0, 5.0, 6.0), 0, 1),\n                (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 1),\n            ],\n            [\"features\", \"label\", \"isVal\"],\n        )\n        reg = SparkXGBRegressor(\n            features_col=\"features\",\n            label_col=\"label\",\n            validation_indicator_col=\"isVal\",\n        )\n        with pytest.raises(TypeError, match=\"The validation indicator must be boolean\"):\n            reg.fit(df_train)\n\n    @pytest.mark.parametrize(\"spark\", SPARK_MODES, indirect=True)\n    def test_callbacks(\n        self, spark: SparkSession, reg_data: RegData, tmp_path: Path\n    ) -> None:\n        train_df = reg_data.df.select(\"row_id\", \"features\", \"label\")\n        device = _spark_test_device(spark)\n\n        def custom_lr(boosting_round: int) -> float:\n            return 1.0 / (boosting_round + 1)\n\n        reg_params = {\n            \"n_estimators\": 10,\n            \"max_depth\": 3,\n            \"objective\": \"reg:squarederror\",\n            \"eval_metric\": \"rmse\",\n            \"device\": device,\n        }\n\n        path = str(tmp_path / \"spark-xgb-reg-cb\")\n        regressor = SparkXGBRegressor(\n            callbacks=[LearningRateScheduler(custom_lr)], **reg_params\n        )\n        regressor.save(path)\n        regressor = SparkXGBRegressor.load(path)\n        loaded_callbacks = regressor.getOrDefault(regressor.callbacks)\n        assert loaded_callbacks is not None\n        assert len(loaded_callbacks) == 1\n\n        model = regressor.fit(train_df)\n        preds = (\n            model.transform(train_df)\n            .orderBy(\"row_id\")\n            .select(\"prediction\")\n            .toPandas()[\"prediction\"]\n            .to_numpy()\n        )\n\n        ref = XGBRegressor(\n            callbacks=[LearningRateScheduler(custom_lr)], **reg_params\n        ).fit(reg_data.X, reg_data.y)\n        assert np.allclose(preds, ref.predict(reg_data.X), rtol=1e-3)\n\n    @pytest.mark.parametrize(\"tree_method\", [\"hist\", \"approx\"])\n    def test_empty_train_data(self, spark: SparkSession, tree_method: str) -> None:\n        df_train = spark.createDataFrame(\n            [\n                (Vectors.dense(10.1, 11.2, 11.3), 0, True),\n                (Vectors.dense(1, 1.2, 1.3), 1, True),\n                (Vectors.dense(14.0, 15.0, 16.0), 0, True),\n                (Vectors.dense(1.1, 1.2, 1.3), 1, False),\n            ],\n            [\"features\", \"label\", \"val_col\"],\n        )\n        classifier = SparkXGBRegressor(\n            num_workers=2,\n            min_child_weight=0.0,\n            reg_alpha=0,\n            reg_lambda=0,\n            tree_method=tree_method,\n            validation_indicator_col=\"val_col\",\n        )\n        model = classifier.fit(df_train)\n        pred_result = model.transform(df_train).collect()\n        for row in pred_result:\n            assert row.prediction == 1.0\n\n\nClfData = namedtuple(\n    \"ClfData\",\n    (\n        \"X_train\",\n        \"X_test\",\n        \"y_train\",\n        \"y_test\",\n        \"weights\",\n        \"base_margin\",\n        \"is_val\",\n        \"X\",\n        \"y\",\n        \"df\",\n    ),\n)\n\n\nclass TestClassifier:\n    @pytest.fixture(scope=\"class\")\n    def clf_data(self, spark: SparkSession) -> ClfData:\n        rng = np.random.default_rng(seed=123)\n        X = rng.random((200, 10))\n        X[1::2, :] = 0.0\n        X[1::2, 1] = rng.random(len(X[1::2, 1]))\n        X[1::2, 2] = rng.random(len(X[1::2, 2]))\n        y = rng.integers(0, 2, size=200)\n        w = rng.random(200)\n        base_margin = rng.random(200)\n        is_val = rng.random(200) < 0.2\n        X_train, X_test = X[~is_val], X[is_val]\n        y_train, y_test = y[~is_val], y[is_val]\n        rows = []\n        for i in range(len(y)):\n            vec = (\n                Vectors.dense(X[i, :])\n                if i % 2 == 0\n                else Vectors.sparse(X.shape[1], {1: float(X[i, 1]), 2: float(X[i, 2])})\n            )\n            rows.append(\n                (\n                    i,\n                    vec,\n                    int(y[i]),\n                    float(w[i]),\n                    float(base_margin[i]),\n                    bool(is_val[i]),\n                )\n            )\n        df = spark.createDataFrame(\n            rows, [\"row_id\", \"features\", \"label\", \"weight\", \"base_margin\", \"is_val\"]\n        )\n        return ClfData(\n            X_train, X_test, y_train, y_test, w, base_margin, is_val, X, y, df\n        )\n\n    @pytest.mark.parametrize(\"spark\", SPARK_MODES, indirect=True)\n    def test_classifier(\n        self, spark: SparkSession, clf_data: ClfData, num_workers: int\n    ) -> None:\n        train_df = clf_data.df\n        X = clf_data.X\n        y = clf_data.y\n        weights = clf_data.weights\n        train_rows = np.where(~clf_data.is_val)[0]\n        validation_rows = np.where(clf_data.is_val)[0]\n        device = _spark_test_device(spark)\n\n        cls_params = {\n            \"n_estimators\": 10,\n            \"max_depth\": 5,\n            \"eval_metric\": \"logloss\",\n            \"device\": device,\n        }\n        ref = XGBClassifier(**cls_params).fit(\n            X[train_rows],\n            y[train_rows],\n            sample_weight=weights[train_rows],\n            eval_set=[\n                (X[train_rows], y[train_rows]),\n                (X[validation_rows], y[validation_rows]),\n            ],\n            sample_weight_eval_set=[weights[train_rows], weights[validation_rows]],\n        )\n\n        spark_cls = SparkXGBClassifier(\n            weight_col=\"weight\",\n            validation_indicator_col=\"is_val\",\n            num_workers=num_workers,\n            **cls_params,\n        ).fit(train_df)\n\n        pred_result = spark_cls.transform(train_df)\n        preds = (\n            pred_result.orderBy(\"row_id\")\n            .select(\"prediction\")\n            .toPandas()[\"prediction\"]\n            .to_numpy()\n        )\n        proba = np.array(\n            pred_result.orderBy(\"row_id\")\n            .select(\"probability\")\n            .toPandas()[\"probability\"]\n            .tolist()\n        )\n\n        assert np.allclose(preds, ref.predict(X), rtol=1e-3)\n        assert np.allclose(proba, ref.predict_proba(X), rtol=1e-3)\n        assert np.allclose(\n            ref.evals_result()[\"validation_0\"][\"logloss\"],\n            spark_cls.training_summary.train_objective_history[\"logloss\"],\n            atol=1e-6,\n        )\n        assert np.allclose(\n            ref.evals_result()[\"validation_1\"][\"logloss\"],\n            spark_cls.training_summary.validation_objective_history[\"logloss\"],\n            atol=1e-6,\n        )\n\n    def test_classifier_model_save_load(\n        self, clf_data: ClfData, tmp_path: Path\n    ) -> None:\n        train_df = clf_data.df.select(\"features\", \"label\")\n        test_df = clf_data.df.select(\"features\")\n        path = str(tmp_path / \"spark-xgb-clf-model\")\n        clf = SparkXGBClassifier(n_estimators=5, max_depth=3)\n        model = clf.fit(train_df)\n        model.save(path)\n        loaded_model = SparkXGBClassifierModel.load(path)\n        assert model.uid == loaded_model.uid\n        pred_before = (\n            model.transform(test_df)\n            .select(\"prediction\")\n            .toPandas()[\"prediction\"]\n            .to_numpy()\n        )\n        pred_after = (\n            loaded_model.transform(test_df)\n            .select(\"prediction\")\n            .toPandas()[\"prediction\"]\n            .to_numpy()\n        )\n        assert np.allclose(pred_before, pred_after, rtol=1e-6)\n\n        with pytest.raises(AssertionError, match=\"Expected class name\"):\n            SparkXGBRegressorModel.load(path)\n\n    def test_classifier_model_pipeline_save_load(\n        self, clf_data: ClfData, tmp_path: Path\n    ) -> None:\n        train_df = clf_data.df.select(\"features\", \"label\")\n        test_df = clf_data.df.select(\"features\")\n        path = str(tmp_path / \"spark-xgb-clf-pipeline\")\n        classifier = SparkXGBClassifier()\n        pipeline = Pipeline(stages=[classifier])\n        pipeline = pipeline.copy(\n            extra={\n                getattr(classifier, k): v\n                for k, v in {\"max_depth\": 5, \"n_estimators\": 10}.items()\n            }\n        )\n        model = pipeline.fit(train_df)\n        model.save(path)\n\n        loaded_model = PipelineModel.load(path)\n        pred_before = (\n            model.transform(test_df)\n            .select(\"prediction\")\n            .toPandas()[\"prediction\"]\n            .to_numpy()\n        )\n        pred_after = (\n            loaded_model.transform(test_df)\n            .select(\"prediction\")\n            .toPandas()[\"prediction\"]\n            .to_numpy()\n        )\n        assert np.allclose(pred_before, pred_after, rtol=1e-6)\n\n    def test_classifier_params(self) -> None:\n        py_clf = SparkXGBClassifier()\n        assert hasattr(py_clf, \"n_estimators\")\n        assert py_clf.n_estimators.parent == py_clf.uid\n        assert not hasattr(py_clf, \"gpu_id\")\n        assert hasattr(py_clf, \"device\")\n\n        assert py_clf.getOrDefault(py_clf.n_estimators) == 100\n        assert py_clf.getOrDefault(getattr(py_clf, \"objective\")) is None\n        py_clf2 = SparkXGBClassifier(n_estimators=200)\n        assert py_clf2.getOrDefault(getattr(py_clf2, \"n_estimators\")) == 200\n        py_clf3 = py_clf2.copy({getattr(py_clf2, \"max_depth\"): 10})\n        assert py_clf3.getOrDefault(getattr(py_clf3, \"n_estimators\")) == 200\n        assert py_clf3.getOrDefault(getattr(py_clf3, \"max_depth\")) == 10\n        with pytest.raises(ValueError, match=\"custom 'objective'\"):\n            SparkXGBClassifier(objective=\"binary:logistic\")._validate_params()\n        assert hasattr(py_clf, \"arbitrary_params_dict\")\n        assert py_clf.getOrDefault(py_clf.arbitrary_params_dict) == {}\n\n        # Testing overwritten params via setParams\n        py_clf_overwrite = SparkXGBClassifier()\n        py_clf_overwrite.setParams(x=1, y=2)\n        py_clf_overwrite.setParams(y=3, z=4)\n        xgb_params = py_clf_overwrite._gen_xgb_params_dict()\n        assert xgb_params[\"x\"] == 1\n        assert xgb_params[\"y\"] == 3\n        assert xgb_params[\"z\"] == 4\n        with pytest.raises(ValueError, match=\"evals_result\"):\n            SparkXGBClassifier(evals_result={})\n\n    @pytest.mark.parametrize(\"tree_method\", [\"hist\", \"approx\"])\n    def test_empty_validation_data(self, spark: SparkSession, tree_method: str) -> None:\n        df_train = spark.createDataFrame(\n            [\n                (Vectors.dense(10.1, 11.2, 11.3), 0, False),\n                (Vectors.dense(1, 1.2, 1.3), 1, False),\n                (Vectors.dense(14.0, 15.0, 16.0), 0, False),\n                (Vectors.dense(1.1, 1.2, 1.3), 1, True),\n            ],\n            [\"features\", \"label\", \"val_col\"],\n        )\n        classifier = SparkXGBClassifier(\n            num_workers=2,\n            tree_method=tree_method,\n            min_child_weight=0.0,\n            reg_alpha=0,\n            reg_lambda=0,\n            validation_indicator_col=\"val_col\",\n            n_estimators=10,\n        )\n        model = classifier.fit(df_train)\n        pred_result = model.transform(df_train).collect()\n        for row in pred_result:\n            assert row.prediction == row.label\n\n    @pytest.mark.parametrize(\"tree_method\", [\"hist\", \"approx\"])\n    def test_empty_partition(self, spark: SparkSession, tree_method: str) -> None:\n        # raw_df.repartition(4) will result int severe data skew, actually,\n        # there is no any data in reducer partition 1, reducer partition 2\n        # see https://github.com/dmlc/xgboost/issues/8221\n        raw_df = spark.range(0, 40, 1, 50).withColumn(\n            \"label\",\n            spark_sql_func.when(spark_sql_func.rand(1) > 0.5, 1).otherwise(0),\n        )\n        vector_assembler = (\n            VectorAssembler().setInputCols([\"id\"]).setOutputCol(\"features\")\n        )\n        data_trans = vector_assembler.setHandleInvalid(\"keep\").transform(raw_df)\n        classifier = SparkXGBClassifier(\n            tree_method=tree_method,\n            n_estimators=10,\n        )\n        model = classifier.fit(data_trans)\n        pred_result = model.transform(data_trans).collect()\n        for row in pred_result:\n            assert row.prediction in [0.0, 1.0]\n\n    def test_classifier_with_cross_validator(self, clf_data: ClfData) -> None:\n        xgb_classifier = SparkXGBClassifier(n_estimators=1)\n        param_maps = (\n            ParamGridBuilder().addGrid(xgb_classifier.max_depth, [1, 2]).build()\n        )\n        cv_bin = CrossValidator(\n            estimator=xgb_classifier,\n            estimatorParamMaps=param_maps,\n            evaluator=BinaryClassificationEvaluator(),\n            seed=1,\n            parallelism=4,\n            numFolds=2,\n        )\n        cv_model = cv_bin.fit(clf_data.df.select(\"features\", \"label\"))\n        cv_model.transform(clf_data.df.select(\"features\"))\n\n    def test_convert_to_sklearn_model_clf(self, clf_data: ClfData) -> None:\n        classifier = SparkXGBClassifier(\n            n_estimators=10,\n            missing=2.0,\n            max_depth=3,\n            sketch_eps=0.5,\n        )\n        clf_model = classifier.fit(clf_data.df.select(\"features\", \"label\"))\n\n        # Check that regardless of what booster, _convert_to_model converts to the\n        # correct class type\n        sklearn_classifier = classifier._convert_to_sklearn_model(\n            clf_model.get_booster().save_raw(\"json\"),\n            clf_model.get_booster().save_config(),\n        )\n        assert isinstance(sklearn_classifier, XGBClassifier)\n        assert sklearn_classifier.n_estimators == 10\n        assert sklearn_classifier.missing == 2.0\n        assert sklearn_classifier.max_depth == 3\n        assert sklearn_classifier.get_params()[\"sketch_eps\"] == 0.5\n\n    def test_classifier_array_col_as_feature(self, clf_data: ClfData) -> None:\n        vector_train = clf_data.df.select(\"row_id\", \"features\", \"label\")\n        vector_test = clf_data.df.select(\"row_id\", \"features\")\n        train_dataset = vector_train.withColumn(\n            \"features\", vector_to_array(spark_sql_func.col(\"features\"))\n        )\n        test_dataset = vector_test.withColumn(\n            \"features\", vector_to_array(spark_sql_func.col(\"features\"))\n        )\n        params = {\"n_estimators\": 10, \"max_depth\": 3}\n        vector_model = SparkXGBClassifier(**params).fit(vector_train)\n        array_model = SparkXGBClassifier(**params).fit(train_dataset)\n\n        vector_pred = (\n            vector_model.transform(vector_test)\n            .orderBy(\"row_id\")\n            .select(\"prediction\", \"probability\")\n            .toPandas()\n        )\n        array_pred = (\n            array_model.transform(test_dataset)\n            .orderBy(\"row_id\")\n            .select(\"prediction\", \"probability\")\n            .toPandas()\n        )\n        vector_proba = np.array(vector_pred[\"probability\"].tolist())\n        array_proba = np.array(array_pred[\"probability\"].tolist())\n        array_label = array_pred[\"prediction\"].to_numpy()\n\n        assert np.allclose(\n            array_pred[\"prediction\"].to_numpy(), vector_pred[\"prediction\"].to_numpy()\n        )\n        assert np.allclose(array_proba, vector_proba, rtol=1e-3)\n        assert np.allclose(\n            array_proba.sum(axis=1), np.ones(array_proba.shape[0]), atol=1e-6\n        )\n        assert np.all((array_proba >= 0.0) & (array_proba <= 1.0))\n        assert np.allclose(array_label, np.argmax(array_proba, axis=1))\n\n    def test_classifier_with_feature_names_types(self, clf_data: ClfData) -> None:\n        n_features = clf_data.X.shape[1]\n        classifier = SparkXGBClassifier(\n            feature_names=[f\"f{i}\" for i in range(n_features)],\n            feature_types=[\"float\"] * n_features,\n            feature_weights=[float(i + 1) for i in range(n_features)],\n            n_estimators=10,\n        )\n        model = classifier.fit(clf_data.df.select(\"features\", \"label\"))\n        model.transform(clf_data.df.select(\"features\")).collect()\n\n    def test_early_stop_param_validation(self, clf_data: ClfData) -> None:\n        classifier = SparkXGBClassifier(early_stopping_rounds=1)\n        with pytest.raises(ValueError, match=\"early_stopping_rounds\"):\n            classifier.fit(clf_data.df.select(\"features\", \"label\"))\n\n    def test_classifier_with_list_eval_metric(self, clf_data: ClfData) -> None:\n        classifier = SparkXGBClassifier(eval_metric=[\"auc\", \"rmse\"], n_estimators=10)\n        model = classifier.fit(clf_data.df.select(\"features\", \"label\"))\n        model.transform(clf_data.df.select(\"features\")).collect()\n\n    @pytest.mark.skipif(**no_sparse_unwrap())\n    def test_classifier_with_sparse_optim(self, spark: SparkSession) -> None:\n        sparse_train = spark.createDataFrame(\n            [\n                (Vectors.dense(1.0, 0.0, 3.0, 0.0, 0.0), 0),\n                (Vectors.sparse(5, {1: 1.0, 3: 5.5}), 1),\n                (Vectors.sparse(5, {4: -3.0}), 0),\n            ]\n            * 5,\n            [\"features\", \"label\"],\n        )\n        cls = SparkXGBClassifier(missing=0.0, n_estimators=10)\n        model = cls.fit(sparse_train)\n        assert model._xgb_sklearn_model.missing == 0.0\n        pred_result = model.transform(sparse_train).collect()\n\n        # enable sparse optimization\n        cls2 = SparkXGBClassifier(\n            missing=0.0,\n            enable_sparse_data_optim=True,\n            n_estimators=10,\n        )\n        model2 = cls2.fit(sparse_train)\n        assert model2.getOrDefault(model2.enable_sparse_data_optim)\n        assert model2._xgb_sklearn_model.missing == 0.0\n        pred_result2 = model2.transform(sparse_train).collect()\n\n        for row1, row2 in zip(pred_result, pred_result2):\n            assert np.allclose(row1.probability, row2.probability, rtol=1e-3)\n\n    def test_param_alias(self) -> None:\n        py_cls = SparkXGBClassifier(features_col=\"f1\", label_col=\"l1\")\n        assert py_cls.getOrDefault(py_cls.featuresCol) == \"f1\"\n        assert py_cls.getOrDefault(py_cls.labelCol) == \"l1\"\n        with pytest.raises(\n            ValueError, match=\"Please use param name features_col instead\"\n        ):\n            SparkXGBClassifier(featuresCol=\"f1\")\n\n    def test_param_value_converter(self) -> None:\n        py_cls = SparkXGBClassifier(missing=np.float64(1.0), sketch_eps=np.float64(0.3))\n        # don't check by isinstance(v, float) because for numpy scalar it will also return True\n        assert py_cls.getOrDefault(py_cls.missing).__class__.__name__ == \"float\"\n        assert (\n            py_cls.getOrDefault(py_cls.arbitrary_params_dict)[\n                \"sketch_eps\"\n            ].__class__.__name__\n            == \"float64\"\n        )\n\n    def test_device_and_gpu_params(self, clf_data: ClfData) -> None:\n        clf = SparkXGBClassifier(device=\"cuda\", tree_method=\"exact\")\n        with pytest.raises(ValueError, match=\"not supported for distributed\"):\n            clf.fit(clf_data.df.select(\"features\", \"label\"))\n\n        clf = SparkXGBClassifier(device=\"cuda\", tree_method=\"approx\")\n        clf._validate_params()\n        clf = SparkXGBClassifier(device=\"cuda\")\n        clf._validate_params()\n\n        clf = SparkXGBClassifier()\n        assert not clf._run_on_gpu()\n\n        clf = SparkXGBClassifier(device=\"cuda\", tree_method=\"hist\")\n        assert clf._run_on_gpu()\n\n        clf = SparkXGBClassifier(device=\"cuda\")\n        assert clf._run_on_gpu()\n\n        clf = SparkXGBClassifier(tree_method=\"hist\")\n        assert not clf._run_on_gpu()\n\n        clf = SparkXGBClassifier(device=\"cuda\", tree_method=\"approx\")\n        assert clf._run_on_gpu()\n\n    def test_gpu_transform(self, clf_data: ClfData, tmp_path: Path) -> None:\n        \"\"\"local mode\"\"\"\n        classifier = SparkXGBClassifier(device=\"cpu\", n_estimators=10)\n        model: SparkXGBClassifierModel = classifier.fit(\n            clf_data.df.select(\"features\", \"label\")\n        )\n\n        path = \"file:\" + str(tmp_path)\n        model.write().overwrite().save(path)\n\n        # The model trained with CPU, transform defaults to cpu\n        assert not model._run_on_gpu()\n\n        # without error\n        model.transform(clf_data.df.select(\"features\")).collect()\n\n        model.set_device(\"cuda\")\n        assert model._run_on_gpu()\n\n        model_loaded = SparkXGBClassifierModel.load(path)\n\n        # The model trained with CPU, transform defaults to cpu\n        assert not model_loaded._run_on_gpu()\n        # without error\n        model_loaded.transform(clf_data.df.select(\"features\")).collect()\n\n        model_loaded.set_device(\"cuda\")\n        assert model_loaded._run_on_gpu()\n\n    def test_validate_gpu_params(self) -> None:\n        # Standalone\n        standalone_conf = (\n            SparkConf()\n            .setMaster(\"spark://foo\")\n            .set(\"spark.executor.cores\", \"12\")\n            .set(\"spark.task.cpus\", \"1\")\n            .set(\"spark.executor.resource.gpu.amount\", \"1\")\n            .set(\"spark.task.resource.gpu.amount\", \"0.08\")\n        )\n        classifier_on_cpu = SparkXGBClassifier(device=\"cpu\")\n        classifier_on_gpu = SparkXGBClassifier(device=\"cuda\")\n\n        # No exception for classifier on CPU\n        classifier_on_cpu._validate_gpu_params(\"3.4.0\", standalone_conf)\n\n        with pytest.raises(\n            ValueError, match=\"XGBoost doesn't support GPU fractional configurations\"\n        ):\n            classifier_on_gpu._validate_gpu_params(\"3.3.0\", standalone_conf)\n\n        # No issues\n        classifier_on_gpu._validate_gpu_params(\"3.4.0\", standalone_conf)\n        classifier_on_gpu._validate_gpu_params(\"3.4.1\", standalone_conf)\n        classifier_on_gpu._validate_gpu_params(\"3.5.0\", standalone_conf)\n        classifier_on_gpu._validate_gpu_params(\"3.5.1\", standalone_conf)\n\n        # no spark.executor.resource.gpu.amount\n        standalone_bad_conf = (\n            SparkConf()\n            .setMaster(\"spark://foo\")\n            .set(\"spark.executor.cores\", \"12\")\n            .set(\"spark.task.cpus\", \"1\")\n            .set(\"spark.task.resource.gpu.amount\", \"0.08\")\n        )\n        msg_match = (\n            \"The `spark.executor.resource.gpu.amount` is required for training on GPU\"\n        )\n        with pytest.raises(ValueError, match=msg_match):\n            classifier_on_gpu._validate_gpu_params(\"3.3.0\", standalone_bad_conf)\n        with pytest.raises(ValueError, match=msg_match):\n            classifier_on_gpu._validate_gpu_params(\"3.4.0\", standalone_bad_conf)\n        with pytest.raises(ValueError, match=msg_match):\n            classifier_on_gpu._validate_gpu_params(\"3.4.1\", standalone_bad_conf)\n        with pytest.raises(ValueError, match=msg_match):\n            classifier_on_gpu._validate_gpu_params(\"3.5.0\", standalone_bad_conf)\n        with pytest.raises(ValueError, match=msg_match):\n            classifier_on_gpu._validate_gpu_params(\"3.5.1\", standalone_bad_conf)\n\n        standalone_bad_conf = (\n            SparkConf()\n            .setMaster(\"spark://foo\")\n            .set(\"spark.executor.cores\", \"12\")\n            .set(\"spark.task.cpus\", \"1\")\n            .set(\"spark.executor.resource.gpu.amount\", \"1\")\n        )\n        msg_match = (\n            \"The `spark.task.resource.gpu.amount` is required for training on GPU\"\n        )\n        with pytest.raises(ValueError, match=msg_match):\n            classifier_on_gpu._validate_gpu_params(\"3.3.0\", standalone_bad_conf)\n\n        classifier_on_gpu._validate_gpu_params(\"3.4.0\", standalone_bad_conf)\n        classifier_on_gpu._validate_gpu_params(\"3.5.0\", standalone_bad_conf)\n        classifier_on_gpu._validate_gpu_params(\"3.5.1\", standalone_bad_conf)\n\n        # Yarn and K8s mode\n        for mode in [\"yarn\", \"k8s://\"]:\n            conf = (\n                SparkConf()\n                .setMaster(mode)\n                .set(\"spark.executor.cores\", \"12\")\n                .set(\"spark.task.cpus\", \"1\")\n                .set(\"spark.executor.resource.gpu.amount\", \"1\")\n                .set(\"spark.task.resource.gpu.amount\", \"0.08\")\n            )\n            with pytest.raises(\n                ValueError,\n                match=\"XGBoost doesn't support GPU fractional configurations\",\n            ):\n                classifier_on_gpu._validate_gpu_params(\"3.3.0\", conf)\n            with pytest.raises(\n                ValueError,\n                match=\"XGBoost doesn't support GPU fractional configurations\",\n            ):\n                classifier_on_gpu._validate_gpu_params(\"3.4.0\", conf)\n            with pytest.raises(\n                ValueError,\n                match=\"XGBoost doesn't support GPU fractional configurations\",\n            ):\n                classifier_on_gpu._validate_gpu_params(\"3.4.1\", conf)\n            with pytest.raises(\n                ValueError,\n                match=\"XGBoost doesn't support GPU fractional configurations\",\n            ):\n                classifier_on_gpu._validate_gpu_params(\"3.5.0\", conf)\n\n            classifier_on_gpu._validate_gpu_params(\"3.5.1\", conf)\n\n        for mode in [\"yarn\", \"k8s://\"]:\n            bad_conf = (\n                SparkConf()\n                .setMaster(mode)\n                .set(\"spark.executor.cores\", \"12\")\n                .set(\"spark.task.cpus\", \"1\")\n                .set(\"spark.executor.resource.gpu.amount\", \"1\")\n            )\n            msg_match = (\n                \"The `spark.task.resource.gpu.amount` is required for training on GPU\"\n            )\n            with pytest.raises(ValueError, match=msg_match):\n                classifier_on_gpu._validate_gpu_params(\"3.3.0\", bad_conf)\n            with pytest.raises(ValueError, match=msg_match):\n                classifier_on_gpu._validate_gpu_params(\"3.4.0\", bad_conf)\n            with pytest.raises(ValueError, match=msg_match):\n                classifier_on_gpu._validate_gpu_params(\"3.5.0\", bad_conf)\n\n            classifier_on_gpu._validate_gpu_params(\"3.5.1\", bad_conf)\n\n    def test_skip_stage_level_scheduling(self) -> None:\n        standalone_conf = (\n            SparkConf()\n            .setMaster(\"spark://foo\")\n            .set(\"spark.executor.cores\", \"12\")\n            .set(\"spark.task.cpus\", \"1\")\n            .set(\"spark.executor.resource.gpu.amount\", \"1\")\n            .set(\"spark.task.resource.gpu.amount\", \"0.08\")\n        )\n\n        classifier_on_cpu = SparkXGBClassifier(device=\"cpu\")\n        classifier_on_gpu = SparkXGBClassifier(device=\"cuda\")\n\n        # the correct configurations should not skip stage-level scheduling\n        assert not classifier_on_gpu._skip_stage_level_scheduling(\n            \"3.4.0\", standalone_conf\n        )\n        assert not classifier_on_gpu._skip_stage_level_scheduling(\n            \"3.4.1\", standalone_conf\n        )\n        assert not classifier_on_gpu._skip_stage_level_scheduling(\n            \"3.5.0\", standalone_conf\n        )\n        assert not classifier_on_gpu._skip_stage_level_scheduling(\n            \"3.5.1\", standalone_conf\n        )\n\n        # spark version < 3.4.0\n        assert classifier_on_gpu._skip_stage_level_scheduling(\"3.3.0\", standalone_conf)\n        # not run on GPU\n        assert classifier_on_cpu._skip_stage_level_scheduling(\"3.4.0\", standalone_conf)\n\n        # spark.executor.cores is not set\n        bad_conf = (\n            SparkConf()\n            .setMaster(\"spark://foo\")\n            .set(\"spark.task.cpus\", \"1\")\n            .set(\"spark.executor.resource.gpu.amount\", \"1\")\n            .set(\"spark.task.resource.gpu.amount\", \"0.08\")\n        )\n        assert classifier_on_gpu._skip_stage_level_scheduling(\"3.4.0\", bad_conf)\n\n        # spark.executor.cores=1\n        bad_conf = (\n            SparkConf()\n            .setMaster(\"spark://foo\")\n            .set(\"spark.executor.cores\", \"1\")\n            .set(\"spark.task.cpus\", \"1\")\n            .set(\"spark.executor.resource.gpu.amount\", \"1\")\n            .set(\"spark.task.resource.gpu.amount\", \"0.08\")\n        )\n        assert classifier_on_gpu._skip_stage_level_scheduling(\"3.4.0\", bad_conf)\n\n        # spark.executor.resource.gpu.amount is not set\n        bad_conf = (\n            SparkConf()\n            .setMaster(\"spark://foo\")\n            .set(\"spark.executor.cores\", \"12\")\n            .set(\"spark.task.cpus\", \"1\")\n            .set(\"spark.task.resource.gpu.amount\", \"0.08\")\n        )\n        assert classifier_on_gpu._skip_stage_level_scheduling(\"3.4.0\", bad_conf)\n\n        # spark.executor.resource.gpu.amount>1\n        bad_conf = (\n            SparkConf()\n            .setMaster(\"spark://foo\")\n            .set(\"spark.executor.cores\", \"12\")\n            .set(\"spark.task.cpus\", \"1\")\n            .set(\"spark.executor.resource.gpu.amount\", \"2\")\n            .set(\"spark.task.resource.gpu.amount\", \"0.08\")\n        )\n        assert classifier_on_gpu._skip_stage_level_scheduling(\"3.4.0\", bad_conf)\n\n        # spark.task.resource.gpu.amount is not set\n        bad_conf = (\n            SparkConf()\n            .setMaster(\"spark://foo\")\n            .set(\"spark.executor.cores\", \"12\")\n            .set(\"spark.task.cpus\", \"1\")\n            .set(\"spark.executor.resource.gpu.amount\", \"1\")\n        )\n        assert not classifier_on_gpu._skip_stage_level_scheduling(\"3.4.0\", bad_conf)\n\n        # spark.task.resource.gpu.amount=1\n        bad_conf = (\n            SparkConf()\n            .setMaster(\"spark://foo\")\n            .set(\"spark.executor.cores\", \"12\")\n            .set(\"spark.task.cpus\", \"1\")\n            .set(\"spark.executor.resource.gpu.amount\", \"1\")\n            .set(\"spark.task.resource.gpu.amount\", \"1\")\n        )\n        assert classifier_on_gpu._skip_stage_level_scheduling(\"3.4.0\", bad_conf)\n\n        # For Yarn and K8S\n        for mode in [\"yarn\", \"k8s://\"]:\n            for gpu_amount in [\"0.08\", \"0.2\", \"1.0\"]:\n                conf = (\n                    SparkConf()\n                    .setMaster(mode)\n                    .set(\"spark.executor.cores\", \"12\")\n                    .set(\"spark.task.cpus\", \"1\")\n                    .set(\"spark.executor.resource.gpu.amount\", \"1\")\n                    .set(\"spark.task.resource.gpu.amount\", gpu_amount)\n                )\n                assert classifier_on_gpu._skip_stage_level_scheduling(\"3.3.0\", conf)\n                assert classifier_on_gpu._skip_stage_level_scheduling(\"3.4.0\", conf)\n                assert classifier_on_gpu._skip_stage_level_scheduling(\"3.4.1\", conf)\n                assert classifier_on_gpu._skip_stage_level_scheduling(\"3.5.0\", conf)\n\n                # This will be fixed when spark 4.0.0 is released.\n                if gpu_amount == \"1.0\":\n                    assert classifier_on_gpu._skip_stage_level_scheduling(\"3.5.1\", conf)\n                else:\n                    # Starting from 3.5.1+, stage-level scheduling is working for Yarn and K8s\n                    assert not classifier_on_gpu._skip_stage_level_scheduling(\n                        \"3.5.1\", conf\n                    )\n\n    def test_collective_conf(self, spark: SparkSession, tmp_path: Path) -> None:\n        classifier = SparkXGBClassifier(\n            launch_tracker_on_driver=True,\n            coll_cfg=Config(tracker_host_ip=\"192.168.1.32\", tracker_port=59981),\n        )\n        with pytest.raises(Exception, match=\"Failed to bind socket\"):\n            classifier._get_tracker_args()\n\n        classifier = SparkXGBClassifier(\n            launch_tracker_on_driver=False,\n            coll_cfg=Config(tracker_host_ip=\"127.0.0.1\", tracker_port=58892),\n        )\n        with pytest.raises(\n            ValueError, match=\"You must enable launch_tracker_on_driver\"\n        ):\n            classifier._get_tracker_args()\n\n        classifier = SparkXGBClassifier(\n            launch_tracker_on_driver=True,\n            coll_cfg=Config(tracker_host_ip=\"127.0.0.1\", tracker_port=58893),\n            num_workers=2,\n        )\n        launch_tracker_on_driver, rabit_envs = classifier._get_tracker_args()\n        assert launch_tracker_on_driver is True\n        assert rabit_envs[\"n_workers\"] == 2\n        assert rabit_envs[\"dmlc_tracker_uri\"] == \"127.0.0.1\"\n        assert rabit_envs[\"dmlc_tracker_port\"] == 58893\n\n        path = \"file:\" + str(tmp_path)\n        port = get_avail_port()\n        classifier = SparkXGBClassifier(\n            launch_tracker_on_driver=True,\n            coll_cfg=Config(tracker_host_ip=\"127.0.0.1\", tracker_port=port),\n            num_workers=1,\n            n_estimators=1,\n        )\n\n        def check_conf(conf: Config) -> None:\n            assert conf.tracker_host_ip == \"127.0.0.1\"\n            assert conf.tracker_port == port\n\n        check_conf(classifier.getOrDefault(classifier.coll_cfg))\n        classifier.write().overwrite().save(path)\n\n        loaded_classifier = SparkXGBClassifier.load(path)\n        check_conf(loaded_classifier.getOrDefault(loaded_classifier.coll_cfg))\n\n        sparse_train = spark.createDataFrame(\n            [\n                (Vectors.dense(1.0, 0.0, 3.0, 0.0, 0.0), 0),\n                (Vectors.sparse(5, {1: 1.0, 3: 5.5}), 1),\n                (Vectors.sparse(5, {4: -3.0}), 0),\n            ]\n            * 5,\n            [\"features\", \"label\"],\n        )\n        model = classifier.fit(sparse_train)\n        check_conf(model.getOrDefault(model.coll_cfg))\n\n        model.write().overwrite().save(path)\n        loaded_model = SparkXGBClassifierModel.load(path)\n        check_conf(loaded_model.getOrDefault(loaded_model.coll_cfg))\n\n\nLTRData = namedtuple(\n    \"LTRData\",\n    (\n        \"ranker_df\",\n        \"X_train\",\n        \"y_train\",\n        \"qid_train\",\n        \"X_test\",\n        \"y_test\",\n        \"qid_test\",\n    ),\n)\n\n\nclass TestPySparkLocalLETOR:\n    @pytest.fixture(scope=\"class\")\n    def ltr_data(self, spark: SparkSession) -> LTRData:\n        spark.conf.set(\"spark.sql.execution.arrow.maxRecordsPerBatch\", \"8\")\n        ranker_df = spark.createDataFrame(\n            [\n                (Vectors.dense(1.0, 2.0, 3.0), 0, 0, None, False),\n                (Vectors.dense(4.0, 5.0, 6.0), 1, 0, None, False),\n                (Vectors.dense(9.0, 4.0, 8.0), 2, 0, None, False),\n                (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 1, None, False),\n                (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 1, None, False),\n                (Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 1, None, False),\n                (Vectors.dense(1.5, 2.0, 3.0), 1, 0, 0, True),\n                (Vectors.dense(4.5, 5.0, 6.0), 0, 0, 1, True),\n                (Vectors.dense(9.0, 4.5, 8.0), 2, 0, 2, True),\n                (Vectors.sparse(3, {1: 1.0, 2: 6.0}), 1, 1, 3, True),\n                (Vectors.sparse(3, {1: 6.0, 2: 7.0}), 1, 1, 4, True),\n                (Vectors.sparse(3, {1: 8.0, 2: 10.5}), 2, 1, 5, True),\n            ],\n            [\"features\", \"label\", \"qid\", \"row_id\", \"isVal\"],\n        )\n        X_train = np.array(\n            [\n                [1.0, 2.0, 3.0],\n                [4.0, 5.0, 6.0],\n                [9.0, 4.0, 8.0],\n                [np.nan, 1.0, 5.5],\n                [np.nan, 6.0, 7.5],\n                [np.nan, 8.0, 9.5],\n            ]\n        )\n        qid_train = np.array([0, 0, 0, 1, 1, 1])\n        y_train = np.array([0, 1, 2, 0, 1, 2])\n\n        X_test = np.array(\n            [\n                [1.5, 2.0, 3.0],\n                [4.5, 5.0, 6.0],\n                [9.0, 4.5, 8.0],\n                [np.nan, 1.0, 6.0],\n                [np.nan, 6.0, 7.0],\n                [np.nan, 8.0, 10.5],\n            ]\n        )\n        qid_test = np.array([0, 0, 0, 1, 1, 1])\n        y_test = np.array([1, 0, 2, 1, 1, 2])\n\n        return LTRData(\n            ranker_df,\n            X_train,\n            y_train,\n            qid_train,\n            X_test,\n            y_test,\n            qid_test,\n        )\n\n    def test_ranker(self, ltr_data: LTRData) -> None:\n        ref = xgb.XGBRanker(\n            tree_method=\"approx\",\n            objective=\"rank:pairwise\",\n            n_estimators=10,\n        )\n        ref.fit(\n            ltr_data.X_train,\n            ltr_data.y_train,\n            qid=ltr_data.qid_train,\n            eval_set=[(ltr_data.X_test, ltr_data.y_test)],\n            eval_qid=[ltr_data.qid_test],\n        )\n        expected = ref.predict(ltr_data.X_test)\n\n        ranker = SparkXGBRanker(\n            qid_col=\"qid\",\n            tree_method=\"approx\",\n            objective=\"rank:pairwise\",\n            validation_indicator_col=\"isVal\",\n            n_estimators=10,\n        )\n        assert ranker.getOrDefault(ranker.objective) == \"rank:pairwise\"\n        model = ranker.fit(ltr_data.ranker_df)\n        test_df = ltr_data.ranker_df.where(spark_sql_func.col(\"isVal\"))\n        pred_result = (\n            model.transform(test_df)\n            .orderBy(\"row_id\")\n            .select(\"prediction\")\n            .toPandas()[\"prediction\"]\n            .to_numpy()\n        )\n        assert np.allclose(pred_result, expected, rtol=1e-3)\n\n    def test_ranker_same_qid_in_same_partition(self, spark: SparkSession) -> None:\n        ranker_df_train = spark.createDataFrame(\n            [\n                (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 9),\n                (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 9),\n                (Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 9),\n                (Vectors.dense(1.0, 2.0, 3.0), 0, 8),\n                (Vectors.dense(4.0, 5.0, 6.0), 1, 8),\n                (Vectors.dense(9.0, 4.0, 8.0), 2, 8),\n                (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 7),\n                (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 7),\n                (Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 7),\n                (Vectors.dense(1.0, 2.0, 3.0), 0, 6),\n                (Vectors.dense(4.0, 5.0, 6.0), 1, 6),\n                (Vectors.dense(9.0, 4.0, 8.0), 2, 6),\n            ]\n            * 4,\n            [\"features\", \"label\", \"qid\"],\n        )\n        ranker = SparkXGBRanker(qid_col=\"qid\", num_workers=4, force_repartition=True)\n        df, _ = ranker._prepare_input(ranker_df_train)\n\n        def f(iterator: Iterable) -> List[int]:\n            yield list(set(iterator))\n\n        rows = df.select(\"qid\").rdd.mapPartitions(f).collect()\n        assert len(rows) == 4\n        for row in rows:\n            assert len(row) == 1\n            assert row[0].qid in [6, 7, 8, 9]\n\n    def test_ranker_xgb_summary(self, ltr_data: LTRData) -> None:\n        spark_xgb_model = SparkXGBRanker(\n            tree_method=\"approx\",\n            qid_col=\"qid\",\n            objective=\"rank:pairwise\",\n            validation_indicator_col=\"isVal\",\n            n_estimators=10,\n        ).fit(ltr_data.ranker_df)\n\n        ref = xgb.XGBRanker(\n            tree_method=\"approx\",\n            objective=\"rank:pairwise\",\n            n_estimators=10,\n        )\n        ref.fit(\n            ltr_data.X_train,\n            ltr_data.y_train,\n            qid=ltr_data.qid_train,\n            eval_set=[\n                (ltr_data.X_train, ltr_data.y_train),\n                (ltr_data.X_test, ltr_data.y_test),\n            ],\n            eval_qid=[ltr_data.qid_train, ltr_data.qid_test],\n        )\n\n        np.testing.assert_allclose(\n            ref.evals_result()[\"validation_0\"][\"ndcg@32\"],\n            spark_xgb_model.training_summary.train_objective_history[\"ndcg@32\"],\n            atol=1e-3,\n        )\n\n        np.testing.assert_allclose(\n            ref.evals_result()[\"validation_1\"][\"ndcg@32\"],\n            spark_xgb_model.training_summary.validation_objective_history[\"ndcg@32\"],\n            atol=1e-3,\n        )\n"
  },
  {
    "path": "tests/test_distributed/test_with_spark/utils.py",
    "content": "import unittest\n\nimport pytest\nfrom xgboost import testing as tm\n\npytestmark = [pytest.mark.skipif(**tm.no_spark())]\n\nfrom xgboost.spark.utils import _get_default_params_from_func\n\n\nclass UtilsTest(unittest.TestCase):\n    def test_get_default_params(self):\n        class Foo:\n            def func1(self, x, y, key1=None, key2=\"val2\", key3=0, key4=None):\n                pass\n\n        unsupported_params = {\"key2\", \"key4\"}\n        expected_default_params = {\n            \"key1\": None,\n            \"key3\": 0,\n        }\n        actual_default_params = _get_default_params_from_func(\n            Foo.func1, unsupported_params\n        )\n        self.assertEqual(\n            len(expected_default_params.keys()), len(actual_default_params.keys())\n        )\n        for k, v in actual_default_params.items():\n            self.assertEqual(expected_default_params[k], v)\n"
  }
]